aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig145
-rw-r--r--arch/x86/Kconfig.debug14
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/include/asm/alternative.h11
-rw-r--r--arch/x86/include/asm/amd_iommu.h6
-rw-r--r--arch/x86/include/asm/amd_iommu_proto.h2
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h23
-rw-r--r--arch/x86/include/asm/amd_nb.h (renamed from arch/x86/include/asm/k8.h)21
-rw-r--r--arch/x86/include/asm/apb_timer.h1
-rw-r--r--arch/x86/include/asm/apic.h4
-rw-r--r--arch/x86/include/asm/apicdef.h1
-rw-r--r--arch/x86/include/asm/bitops.h2
-rw-r--r--arch/x86/include/asm/calgary.h4
-rw-r--r--arch/x86/include/asm/cpu.h1
-rw-r--r--arch/x86/include/asm/cpufeature.h13
-rw-r--r--arch/x86/include/asm/dwarf2.h20
-rw-r--r--arch/x86/include/asm/e820.h20
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/entry_arch.h4
-rw-r--r--arch/x86/include/asm/fixmap.h15
-rw-r--r--arch/x86/include/asm/gart.h20
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/hpet.h10
-rw-r--r--arch/x86/include/asm/hw_irq.h19
-rw-r--r--arch/x86/include/asm/i387.h185
-rw-r--r--arch/x86/include/asm/i8259.h2
-rw-r--r--arch/x86/include/asm/io.h2
-rw-r--r--arch/x86/include/asm/io_apic.h6
-rw-r--r--arch/x86/include/asm/iommu_table.h100
-rw-r--r--arch/x86/include/asm/irq.h12
-rw-r--r--arch/x86/include/asm/irq_remapping.h35
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/irqflags.h32
-rw-r--r--arch/x86/include/asm/jump_label.h37
-rw-r--r--arch/x86/include/asm/memblock.h23
-rw-r--r--arch/x86/include/asm/module.h7
-rw-r--r--arch/x86/include/asm/mrst.h10
-rw-r--r--arch/x86/include/asm/mwait.h15
-rw-r--r--arch/x86/include/asm/olpc_ofw.h4
-rw-r--r--arch/x86/include/asm/page_32_types.h4
-rw-r--r--arch/x86/include/asm/page_types.h2
-rw-r--r--arch/x86/include/asm/paravirt.h21
-rw-r--r--arch/x86/include/asm/paravirt_types.h1
-rw-r--r--arch/x86/include/asm/percpu.h14
-rw-r--r--arch/x86/include/asm/perf_event_p4.h52
-rw-r--r--arch/x86/include/asm/pgtable.h4
-rw-r--r--arch/x86/include/asm/pgtable_64.h2
-rw-r--r--arch/x86/include/asm/processor.h29
-rw-r--r--arch/x86/include/asm/setup.h5
-rw-r--r--arch/x86/include/asm/swiotlb.h13
-rw-r--r--arch/x86/include/asm/vmi.h269
-rw-r--r--arch/x86/include/asm/vmi_time.h98
-rw-r--r--arch/x86/kernel/Makefile10
-rw-r--r--arch/x86/kernel/acpi/cstate.c11
-rw-r--r--arch/x86/kernel/acpi/sleep.c9
-rw-r--r--arch/x86/kernel/alternative.c71
-rw-r--r--arch/x86/kernel/amd_iommu.c2
-rw-r--r--arch/x86/kernel/amd_iommu_init.c139
-rw-r--r--arch/x86/kernel/amd_nb.c (renamed from arch/x86/kernel/k8.c)56
-rw-r--r--arch/x86/kernel/apb_timer.c60
-rw-r--r--arch/x86/kernel/aperture_64.c31
-rw-r--r--arch/x86/kernel/apic/apic.c91
-rw-r--r--arch/x86/kernel/apic/io_apic.c882
-rw-r--r--arch/x86/kernel/apic/nmi.c2
-rw-r--r--arch/x86/kernel/apic/numaq_32.c3
-rw-r--r--arch/x86/kernel/apic/probe_64.c3
-rw-r--r--arch/x86/kernel/apm_32.c1
-rw-r--r--arch/x86/kernel/check.c16
-rw-r--r--arch/x86/kernel/cpu/amd.c77
-rw-r--r--arch/x86/kernel/cpu/common.c24
-rw-r--r--arch/x86/kernel/cpu/cpu.h1
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c14
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c27
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c128
-rw-r--r--arch/x86/kernel/cpu/perf_event.c280
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c292
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c9
-rw-r--r--arch/x86/kernel/cpu/scattered.c6
-rw-r--r--arch/x86/kernel/crash_dump_64.c3
-rw-r--r--arch/x86/kernel/e820.c191
-rw-r--r--arch/x86/kernel/early-quirks.c2
-rw-r--r--arch/x86/kernel/early_printk.c13
-rw-r--r--arch/x86/kernel/early_printk_mrst.c319
-rw-r--r--arch/x86/kernel/efi.c5
-rw-r--r--arch/x86/kernel/entry_32.S310
-rw-r--r--arch/x86/kernel/entry_64.S114
-rw-r--r--arch/x86/kernel/ftrace.c63
-rw-r--r--arch/x86/kernel/head.c3
-rw-r--r--arch/x86/kernel/head32.c10
-rw-r--r--arch/x86/kernel/head64.c9
-rw-r--r--arch/x86/kernel/hpet.c67
-rw-r--r--arch/x86/kernel/i387.c58
-rw-r--r--arch/x86/kernel/i8259.c63
-rw-r--r--arch/x86/kernel/irq.c32
-rw-r--r--arch/x86/kernel/irq_32.c12
-rw-r--r--arch/x86/kernel/irq_work.c30
-rw-r--r--arch/x86/kernel/irqinit.c23
-rw-r--r--arch/x86/kernel/jump_label.c50
-rw-r--r--arch/x86/kernel/kdebugfs.c1
-rw-r--r--arch/x86/kernel/kprobes.c14
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/microcode_core.c1
-rw-r--r--arch/x86/kernel/module.c3
-rw-r--r--arch/x86/kernel/mpparse.c5
-rw-r--r--arch/x86/kernel/olpc-xo1.c140
-rw-r--r--arch/x86/kernel/olpc.c89
-rw-r--r--arch/x86/kernel/olpc_ofw.c6
-rw-r--r--arch/x86/kernel/paravirt.c1
-rw-r--r--arch/x86/kernel/pci-calgary_64.c18
-rw-r--r--arch/x86/kernel/pci-dma.c44
-rw-r--r--arch/x86/kernel/pci-gart_64.c33
-rw-r--r--arch/x86/kernel/pci-iommu_table.c89
-rw-r--r--arch/x86/kernel/pci-swiotlb.c44
-rw-r--r--arch/x86/kernel/pmtimer_64.c69
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/setup.c215
-rw-r--r--arch/x86/kernel/setup_percpu.c8
-rw-r--r--arch/x86/kernel/sfi.c4
-rw-r--r--arch/x86/kernel/smpboot.c118
-rw-r--r--arch/x86/kernel/sys_i386_32.c4
-rw-r--r--arch/x86/kernel/tlb_uv.c1
-rw-r--r--arch/x86/kernel/trampoline.c10
-rw-r--r--arch/x86/kernel/traps.c35
-rw-r--r--arch/x86/kernel/tsc.c66
-rw-r--r--arch/x86/kernel/uv_irq.c55
-rw-r--r--arch/x86/kernel/visws_quirks.c140
-rw-r--r--arch/x86/kernel/vmi_32.c893
-rw-r--r--arch/x86/kernel/vmiclock_32.c317
-rw-r--r--arch/x86/kernel/vmlinux.lds.S30
-rw-r--r--arch/x86/kvm/lapic.c3
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--arch/x86/lguest/boot.c18
-rw-r--r--arch/x86/lib/memcpy_32.c199
-rw-r--r--arch/x86/lib/memcpy_64.S158
-rw-r--r--arch/x86/lib/memmove_64.c189
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/fault.c47
-rw-r--r--arch/x86/mm/init.c10
-rw-r--r--arch/x86/mm/init_32.c123
-rw-r--r--arch/x86/mm/init_64.c116
-rw-r--r--arch/x86/mm/ioremap.c5
-rw-r--r--arch/x86/mm/k8topology_64.c12
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c2
-rw-r--r--arch/x86/mm/kmemcheck/opcode.c2
-rw-r--r--arch/x86/mm/memblock.c348
-rw-r--r--arch/x86/mm/memtest.c7
-rw-r--r--arch/x86/mm/numa_32.c30
-rw-r--r--arch/x86/mm/numa_64.c86
-rw-r--r--arch/x86/mm/pgtable.c24
-rw-r--r--arch/x86/mm/srat_32.c3
-rw-r--r--arch/x86/mm/srat_64.c11
-rw-r--r--arch/x86/mm/tlb.c48
-rw-r--r--arch/x86/oprofile/backtrace.c70
-rw-r--r--arch/x86/oprofile/nmi_int.c9
-rw-r--r--arch/x86/oprofile/op_model_amd.c145
-rw-r--r--arch/x86/pci/olpc.c2
-rw-r--r--arch/x86/xen/debugfs.c1
-rw-r--r--arch/x86/xen/enlighten.c3
-rw-r--r--arch/x86/xen/mmu.c32
-rw-r--r--arch/x86/xen/pci-swiotlb-xen.c5
-rw-r--r--arch/x86/xen/setup.c3
-rw-r--r--arch/x86/xen/spinlock.c2
171 files changed, 4637 insertions, 4557 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cea0cd9a316f..dfabfefc21c4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -25,14 +25,17 @@ config X86
25 select HAVE_IDE 25 select HAVE_IDE
26 select HAVE_OPROFILE 26 select HAVE_OPROFILE
27 select HAVE_PERF_EVENTS if (!M386 && !M486) 27 select HAVE_PERF_EVENTS if (!M386 && !M486)
28 select HAVE_IRQ_WORK
28 select HAVE_IOREMAP_PROT 29 select HAVE_IOREMAP_PROT
29 select HAVE_KPROBES 30 select HAVE_KPROBES
31 select HAVE_MEMBLOCK
30 select ARCH_WANT_OPTIONAL_GPIOLIB 32 select ARCH_WANT_OPTIONAL_GPIOLIB
31 select ARCH_WANT_FRAME_POINTERS 33 select ARCH_WANT_FRAME_POINTERS
32 select HAVE_DMA_ATTRS 34 select HAVE_DMA_ATTRS
33 select HAVE_KRETPROBES 35 select HAVE_KRETPROBES
34 select HAVE_OPTPROBES 36 select HAVE_OPTPROBES
35 select HAVE_FTRACE_MCOUNT_RECORD 37 select HAVE_FTRACE_MCOUNT_RECORD
38 select HAVE_C_RECORDMCOUNT
36 select HAVE_DYNAMIC_FTRACE 39 select HAVE_DYNAMIC_FTRACE
37 select HAVE_FUNCTION_TRACER 40 select HAVE_FUNCTION_TRACER
38 select HAVE_FUNCTION_GRAPH_TRACER 41 select HAVE_FUNCTION_GRAPH_TRACER
@@ -59,6 +62,12 @@ config X86
59 select ANON_INODES 62 select ANON_INODES
60 select HAVE_ARCH_KMEMCHECK 63 select HAVE_ARCH_KMEMCHECK
61 select HAVE_USER_RETURN_NOTIFIER 64 select HAVE_USER_RETURN_NOTIFIER
65 select HAVE_ARCH_JUMP_LABEL
66 select HAVE_TEXT_POKE_SMP
67 select HAVE_GENERIC_HARDIRQS
68 select HAVE_SPARSE_IRQ
69 select GENERIC_IRQ_PROBE
70 select GENERIC_PENDING_IRQ if SMP
62 71
63config INSTRUCTION_DECODER 72config INSTRUCTION_DECODER
64 def_bool (KPROBES || PERF_EVENTS) 73 def_bool (KPROBES || PERF_EVENTS)
@@ -193,27 +202,10 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING
193config ARCH_SUPPORTS_DEBUG_PAGEALLOC 202config ARCH_SUPPORTS_DEBUG_PAGEALLOC
194 def_bool y 203 def_bool y
195 204
196config HAVE_EARLY_RES
197 def_bool y
198
199config HAVE_INTEL_TXT 205config HAVE_INTEL_TXT
200 def_bool y 206 def_bool y
201 depends on EXPERIMENTAL && DMAR && ACPI 207 depends on EXPERIMENTAL && DMAR && ACPI
202 208
203# Use the generic interrupt handling code in kernel/irq/:
204config GENERIC_HARDIRQS
205 def_bool y
206
207config GENERIC_HARDIRQS_NO__DO_IRQ
208 def_bool y
209
210config GENERIC_IRQ_PROBE
211 def_bool y
212
213config GENERIC_PENDING_IRQ
214 def_bool y
215 depends on GENERIC_HARDIRQS && SMP
216
217config USE_GENERIC_SMP_HELPERS 209config USE_GENERIC_SMP_HELPERS
218 def_bool y 210 def_bool y
219 depends on SMP 211 depends on SMP
@@ -296,23 +288,6 @@ config X86_X2APIC
296 288
297 If you don't know what to do here, say N. 289 If you don't know what to do here, say N.
298 290
299config SPARSE_IRQ
300 bool "Support sparse irq numbering"
301 depends on PCI_MSI || HT_IRQ
302 ---help---
303 This enables support for sparse irqs. This is useful for distro
304 kernels that want to define a high CONFIG_NR_CPUS value but still
305 want to have low kernel memory footprint on smaller machines.
306
307 ( Sparse IRQs can also be beneficial on NUMA boxes, as they spread
308 out the irq_desc[] array in a more NUMA-friendly way. )
309
310 If you don't know what to do here, say N.
311
312config NUMA_IRQ_DESC
313 def_bool y
314 depends on SPARSE_IRQ && NUMA
315
316config X86_MPPARSE 291config X86_MPPARSE
317 bool "Enable MPS table" if ACPI 292 bool "Enable MPS table" if ACPI
318 default y 293 default y
@@ -517,25 +492,6 @@ if PARAVIRT_GUEST
517 492
518source "arch/x86/xen/Kconfig" 493source "arch/x86/xen/Kconfig"
519 494
520config VMI
521 bool "VMI Guest support (DEPRECATED)"
522 select PARAVIRT
523 depends on X86_32
524 ---help---
525 VMI provides a paravirtualized interface to the VMware ESX server
526 (it could be used by other hypervisors in theory too, but is not
527 at the moment), by linking the kernel to a GPL-ed ROM module
528 provided by the hypervisor.
529
530 As of September 2009, VMware has started a phased retirement
531 of this feature from VMware's products. Please see
532 feature-removal-schedule.txt for details. If you are
533 planning to enable this option, please note that you cannot
534 live migrate a VMI enabled VM to a future VMware product,
535 which doesn't support VMI. So if you expect your kernel to
536 seamlessly migrate to newer VMware products, keep this
537 disabled.
538
539config KVM_CLOCK 495config KVM_CLOCK
540 bool "KVM paravirtualized clock" 496 bool "KVM paravirtualized clock"
541 select PARAVIRT 497 select PARAVIRT
@@ -590,16 +546,7 @@ config PARAVIRT_DEBUG
590 a paravirt_op is missing when it is called. 546 a paravirt_op is missing when it is called.
591 547
592config NO_BOOTMEM 548config NO_BOOTMEM
593 default y 549 def_bool y
594 bool "Disable Bootmem code"
595 ---help---
596 Use early_res directly instead of bootmem before slab is ready.
597 - allocator (buddy) [generic]
598 - early allocator (bootmem) [generic]
599 - very early allocator (reserve_early*()) [x86]
600 - very very early allocator (early brk model) [x86]
601 So reduce one layer between early allocator to final allocator
602
603 550
604config MEMTEST 551config MEMTEST
605 bool "Memtest" 552 bool "Memtest"
@@ -670,7 +617,7 @@ config GART_IOMMU
670 bool "GART IOMMU support" if EMBEDDED 617 bool "GART IOMMU support" if EMBEDDED
671 default y 618 default y
672 select SWIOTLB 619 select SWIOTLB
673 depends on X86_64 && PCI && K8_NB 620 depends on X86_64 && PCI && AMD_NB
674 ---help--- 621 ---help---
675 Support for full DMA access of devices with 32bit memory access only 622 Support for full DMA access of devices with 32bit memory access only
676 on systems with more than 3GB. This is usually needed for USB, 623 on systems with more than 3GB. This is usually needed for USB,
@@ -795,6 +742,17 @@ config SCHED_MC
795 making when dealing with multi-core CPU chips at a cost of slightly 742 making when dealing with multi-core CPU chips at a cost of slightly
796 increased overhead in some places. If unsure say N here. 743 increased overhead in some places. If unsure say N here.
797 744
745config IRQ_TIME_ACCOUNTING
746 bool "Fine granularity task level IRQ time accounting"
747 default n
748 ---help---
749 Select this option to enable fine granularity task irq time
750 accounting. This is done by reading a timestamp on each
751 transitions between softirq and hardirq state, so there can be a
752 small performance impact.
753
754 If in doubt, say N here.
755
798source "kernel/Kconfig.preempt" 756source "kernel/Kconfig.preempt"
799 757
800config X86_UP_APIC 758config X86_UP_APIC
@@ -1148,6 +1106,9 @@ config X86_PAE
1148config ARCH_PHYS_ADDR_T_64BIT 1106config ARCH_PHYS_ADDR_T_64BIT
1149 def_bool X86_64 || X86_PAE 1107 def_bool X86_64 || X86_PAE
1150 1108
1109config ARCH_DMA_ADDR_T_64BIT
1110 def_bool X86_64 || HIGHMEM64G
1111
1151config DIRECT_GBPAGES 1112config DIRECT_GBPAGES
1152 bool "Enable 1GB pages for kernel pagetables" if EMBEDDED 1113 bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
1153 default y 1114 default y
@@ -1326,25 +1287,34 @@ config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
1326 Set whether the default state of memory_corruption_check is 1287 Set whether the default state of memory_corruption_check is
1327 on or off. 1288 on or off.
1328 1289
1329config X86_RESERVE_LOW_64K 1290config X86_RESERVE_LOW
1330 bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen" 1291 int "Amount of low memory, in kilobytes, to reserve for the BIOS"
1331 default y 1292 default 64
1293 range 4 640
1332 ---help--- 1294 ---help---
1333 Reserve the first 64K of physical RAM on BIOSes that are known 1295 Specify the amount of low memory to reserve for the BIOS.
1334 to potentially corrupt that memory range. A numbers of BIOSes are 1296
1335 known to utilize this area during suspend/resume, so it must not 1297 The first page contains BIOS data structures that the kernel
1336 be used by the kernel. 1298 must not use, so that page must always be reserved.
1299
1300 By default we reserve the first 64K of physical RAM, as a
1301 number of BIOSes are known to corrupt that memory range
1302 during events such as suspend/resume or monitor cable
1303 insertion, so it must not be used by the kernel.
1337 1304
1338 Set this to N if you are absolutely sure that you trust the BIOS 1305 You can set this to 4 if you are absolutely sure that you
1339 to get all its memory reservations and usages right. 1306 trust the BIOS to get all its memory reservations and usages
1307 right. If you know your BIOS have problems beyond the
1308 default 64K area, you can set this to 640 to avoid using the
1309 entire low memory range.
1340 1310
1341 If you have doubts about the BIOS (e.g. suspend/resume does not 1311 If you have doubts about the BIOS (e.g. suspend/resume does
1342 work or there's kernel crashes after certain hardware hotplug 1312 not work or there's kernel crashes after certain hardware
1343 events) and it's not AMI or Phoenix, then you might want to enable 1313 hotplug events) then you might want to enable
1344 X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical 1314 X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check
1345 corruption patterns. 1315 typical corruption patterns.
1346 1316
1347 Say Y if unsure. 1317 Leave this to the default value of 64 if you are unsure.
1348 1318
1349config MATH_EMULATION 1319config MATH_EMULATION
1350 bool 1320 bool
@@ -1900,7 +1870,7 @@ config PCI_GODIRECT
1900 bool "Direct" 1870 bool "Direct"
1901 1871
1902config PCI_GOOLPC 1872config PCI_GOOLPC
1903 bool "OLPC" 1873 bool "OLPC XO-1"
1904 depends on OLPC 1874 depends on OLPC
1905 1875
1906config PCI_GOANY 1876config PCI_GOANY
@@ -2061,14 +2031,21 @@ config SCx200HR_TIMER
2061config OLPC 2031config OLPC
2062 bool "One Laptop Per Child support" 2032 bool "One Laptop Per Child support"
2063 select GPIOLIB 2033 select GPIOLIB
2034 select OLPC_OPENFIRMWARE
2064 ---help--- 2035 ---help---
2065 Add support for detecting the unique features of the OLPC 2036 Add support for detecting the unique features of the OLPC
2066 XO hardware. 2037 XO hardware.
2067 2038
2039config OLPC_XO1
2040 tristate "OLPC XO-1 support"
2041 depends on OLPC && PCI
2042 ---help---
2043 Add support for non-essential features of the OLPC XO-1 laptop.
2044
2068config OLPC_OPENFIRMWARE 2045config OLPC_OPENFIRMWARE
2069 bool "Support for OLPC's Open Firmware" 2046 bool "Support for OLPC's Open Firmware"
2070 depends on !X86_64 && !X86_PAE 2047 depends on !X86_64 && !X86_PAE
2071 default y if OLPC 2048 default n
2072 help 2049 help
2073 This option adds support for the implementation of Open Firmware 2050 This option adds support for the implementation of Open Firmware
2074 that is used on the OLPC XO-1 Children's Machine. 2051 that is used on the OLPC XO-1 Children's Machine.
@@ -2076,7 +2053,7 @@ config OLPC_OPENFIRMWARE
2076 2053
2077endif # X86_32 2054endif # X86_32
2078 2055
2079config K8_NB 2056config AMD_NB
2080 def_bool y 2057 def_bool y
2081 depends on CPU_SUP_AMD && PCI 2058 depends on CPU_SUP_AMD && PCI
2082 2059
@@ -2125,6 +2102,10 @@ config HAVE_ATOMIC_IOMAP
2125 def_bool y 2102 def_bool y
2126 depends on X86_32 2103 depends on X86_32
2127 2104
2105config HAVE_TEXT_POKE_SMP
2106 bool
2107 select STOP_MACHINE if SMP
2108
2128source "net/Kconfig" 2109source "net/Kconfig"
2129 2110
2130source "drivers/Kconfig" 2111source "drivers/Kconfig"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 75085080b63e..b59ee765414e 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -43,6 +43,10 @@ config EARLY_PRINTK
43 with klogd/syslogd or the X server. You should normally N here, 43 with klogd/syslogd or the X server. You should normally N here,
44 unless you want to debug such a crash. 44 unless you want to debug such a crash.
45 45
46config EARLY_PRINTK_MRST
47 bool "Early printk for MRST platform support"
48 depends on EARLY_PRINTK && X86_MRST
49
46config EARLY_PRINTK_DBGP 50config EARLY_PRINTK_DBGP
47 bool "Early printk via EHCI debug port" 51 bool "Early printk via EHCI debug port"
48 depends on EARLY_PRINTK && PCI 52 depends on EARLY_PRINTK && PCI
@@ -121,16 +125,6 @@ config DEBUG_NX_TEST
121 and the software setup of this feature. 125 and the software setup of this feature.
122 If in doubt, say "N" 126 If in doubt, say "N"
123 127
124config 4KSTACKS
125 bool "Use 4Kb for kernel stacks instead of 8Kb"
126 depends on X86_32
127 ---help---
128 If you say Y here the kernel will use a 4Kb stacksize for the
129 kernel stack attached to each process/thread. This facilitates
130 running more threads on a system and also reduces the pressure
131 on the VM subsystem for higher order allocations. This option
132 will also use IRQ stacks to compensate for the reduced stackspace.
133
134config DOUBLEFAULT 128config DOUBLEFAULT
135 default y 129 default y
136 bool "Enable doublefault exception handler" if EMBEDDED 130 bool "Enable doublefault exception handler" if EMBEDDED
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index e8c8881351b3..b02e509072a7 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -96,8 +96,12 @@ cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_en
96# is .cfi_signal_frame supported too? 96# is .cfi_signal_frame supported too?
97cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) 97cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1)
98cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) 98cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
99KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) 99
100KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) 100# does binutils support specific instructions?
101asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
102
103KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr)
104KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr)
101 105
102LDFLAGS := -m elf_$(UTS_MACHINE) 106LDFLAGS := -m elf_$(UTS_MACHINE)
103 107
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index bc6abb7bc7ee..76561d20ea2f 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -4,6 +4,7 @@
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/stddef.h> 5#include <linux/stddef.h>
6#include <linux/stringify.h> 6#include <linux/stringify.h>
7#include <linux/jump_label.h>
7#include <asm/asm.h> 8#include <asm/asm.h>
8 9
9/* 10/*
@@ -160,6 +161,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
160#define __parainstructions_end NULL 161#define __parainstructions_end NULL
161#endif 162#endif
162 163
164extern void *text_poke_early(void *addr, const void *opcode, size_t len);
165
163/* 166/*
164 * Clear and restore the kernel write-protection flag on the local CPU. 167 * Clear and restore the kernel write-protection flag on the local CPU.
165 * Allows the kernel to edit read-only pages. 168 * Allows the kernel to edit read-only pages.
@@ -180,4 +183,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
180extern void *text_poke(void *addr, const void *opcode, size_t len); 183extern void *text_poke(void *addr, const void *opcode, size_t len);
181extern void *text_poke_smp(void *addr, const void *opcode, size_t len); 184extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
182 185
186#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
187#define IDEAL_NOP_SIZE_5 5
188extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
189extern void arch_init_ideal_nop5(void);
190#else
191static inline void arch_init_ideal_nop5(void) {}
192#endif
193
183#endif /* _ASM_X86_ALTERNATIVE_H */ 194#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index 5af2982133b5..a6863a2dec1f 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
@@ -24,11 +24,11 @@
24 24
25#ifdef CONFIG_AMD_IOMMU 25#ifdef CONFIG_AMD_IOMMU
26 26
27extern void amd_iommu_detect(void); 27extern int amd_iommu_detect(void);
28 28
29#else 29#else
30 30
31static inline void amd_iommu_detect(void) { } 31static inline int amd_iommu_detect(void) { return -ENODEV; }
32 32
33#endif 33#endif
34 34
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index cb030374b90a..916bc8111a01 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2009 Advanced Micro Devices, Inc. 2 * Copyright (C) 2009-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify it 5 * This program is free software; you can redistribute it and/or modify it
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 08616180deaf..e3509fc303bf 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
@@ -416,13 +416,22 @@ struct amd_iommu {
416 struct dma_ops_domain *default_dom; 416 struct dma_ops_domain *default_dom;
417 417
418 /* 418 /*
419 * This array is required to work around a potential BIOS bug. 419 * We can't rely on the BIOS to restore all values on reinit, so we
420 * The BIOS may miss to restore parts of the PCI configuration 420 * need to stash them
421 * space when the system resumes from S3. The result is that the
422 * IOMMU does not execute commands anymore which leads to system
423 * failure.
424 */ 421 */
425 u32 cache_cfg[4]; 422
423 /* The iommu BAR */
424 u32 stored_addr_lo;
425 u32 stored_addr_hi;
426
427 /*
428 * Each iommu has 6 l1s, each of which is documented as having 0x12
429 * registers
430 */
431 u32 stored_l1[6][0x12];
432
433 /* The l2 indirect registers */
434 u32 stored_l2[0x83];
426}; 435};
427 436
428/* 437/*
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/amd_nb.h
index af00bd1d2089..c8517f81b21e 100644
--- a/arch/x86/include/asm/k8.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -1,5 +1,5 @@
1#ifndef _ASM_X86_K8_H 1#ifndef _ASM_X86_AMD_NB_H
2#define _ASM_X86_K8_H 2#define _ASM_X86_AMD_NB_H
3 3
4#include <linux/pci.h> 4#include <linux/pci.h>
5 5
@@ -7,24 +7,27 @@ extern struct pci_device_id k8_nb_ids[];
7struct bootnode; 7struct bootnode;
8 8
9extern int early_is_k8_nb(u32 value); 9extern int early_is_k8_nb(u32 value);
10extern struct pci_dev **k8_northbridges;
11extern int num_k8_northbridges;
12extern int cache_k8_northbridges(void); 10extern int cache_k8_northbridges(void);
13extern void k8_flush_garts(void); 11extern void k8_flush_garts(void);
14extern int k8_get_nodes(struct bootnode *nodes); 12extern int k8_get_nodes(struct bootnode *nodes);
15extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); 13extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn);
16extern int k8_scan_nodes(void); 14extern int k8_scan_nodes(void);
17 15
18#ifdef CONFIG_K8_NB 16struct k8_northbridge_info {
19extern int num_k8_northbridges; 17 u16 num;
18 u8 gart_supported;
19 struct pci_dev **nb_misc;
20};
21extern struct k8_northbridge_info k8_northbridges;
22
23#ifdef CONFIG_AMD_NB
20 24
21static inline struct pci_dev *node_to_k8_nb_misc(int node) 25static inline struct pci_dev *node_to_k8_nb_misc(int node)
22{ 26{
23 return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; 27 return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL;
24} 28}
25 29
26#else 30#else
27#define num_k8_northbridges 0
28 31
29static inline struct pci_dev *node_to_k8_nb_misc(int node) 32static inline struct pci_dev *node_to_k8_nb_misc(int node)
30{ 33{
@@ -33,4 +36,4 @@ static inline struct pci_dev *node_to_k8_nb_misc(int node)
33#endif 36#endif
34 37
35 38
36#endif /* _ASM_X86_K8_H */ 39#endif /* _ASM_X86_AMD_NB_H */
diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h
index a69b1ac9eaf8..2fefa501d3ba 100644
--- a/arch/x86/include/asm/apb_timer.h
+++ b/arch/x86/include/asm/apb_timer.h
@@ -54,7 +54,6 @@ extern struct clock_event_device *global_clock_event;
54extern unsigned long apbt_quick_calibrate(void); 54extern unsigned long apbt_quick_calibrate(void);
55extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); 55extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu);
56extern void apbt_setup_secondary_clock(void); 56extern void apbt_setup_secondary_clock(void);
57extern unsigned int boot_cpu_id;
58 57
59extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint); 58extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint);
60extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr); 59extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr);
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 1fa03e04ae44..286de34b0ed6 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -252,9 +252,7 @@ static inline int apic_is_clustered_box(void)
252} 252}
253#endif 253#endif
254 254
255extern u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask); 255extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
256extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask);
257
258 256
259#else /* !CONFIG_X86_LOCAL_APIC */ 257#else /* !CONFIG_X86_LOCAL_APIC */
260static inline void lapic_shutdown(void) { } 258static inline void lapic_shutdown(void) { }
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 7fe3b3060f08..a859ca461fb0 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -131,6 +131,7 @@
131#define APIC_EILVTn(n) (0x500 + 0x10 * n) 131#define APIC_EILVTn(n) (0x500 + 0x10 * n)
132#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ 132#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */
133#define APIC_EILVT_NR_AMD_10H 4 133#define APIC_EILVT_NR_AMD_10H 4
134#define APIC_EILVT_NR_MAX APIC_EILVT_NR_AMD_10H
134#define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF) 135#define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF)
135#define APIC_EILVT_MSG_FIX 0x0 136#define APIC_EILVT_MSG_FIX 0x0
136#define APIC_EILVT_MSG_SMI 0x2 137#define APIC_EILVT_MSG_SMI 0x2
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index bafd80defa43..903683b07e42 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -440,6 +440,8 @@ static inline int fls(int x)
440 440
441#ifdef __KERNEL__ 441#ifdef __KERNEL__
442 442
443#include <asm-generic/bitops/find.h>
444
443#include <asm-generic/bitops/sched.h> 445#include <asm-generic/bitops/sched.h>
444 446
445#define ARCH_HAS_FAST_MULTIPLIER 1 447#define ARCH_HAS_FAST_MULTIPLIER 1
diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h
index 0918654305af..0d467b338835 100644
--- a/arch/x86/include/asm/calgary.h
+++ b/arch/x86/include/asm/calgary.h
@@ -62,9 +62,9 @@ struct cal_chipset_ops {
62extern int use_calgary; 62extern int use_calgary;
63 63
64#ifdef CONFIG_CALGARY_IOMMU 64#ifdef CONFIG_CALGARY_IOMMU
65extern void detect_calgary(void); 65extern int detect_calgary(void);
66#else 66#else
67static inline void detect_calgary(void) { return; } 67static inline int detect_calgary(void) { return -ENODEV; }
68#endif 68#endif
69 69
70#endif /* _ASM_X86_CALGARY_H */ 70#endif /* _ASM_X86_CALGARY_H */
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index b185091bf19c..4fab24de26b1 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -32,6 +32,5 @@ extern void arch_unregister_cpu(int);
32 32
33DECLARE_PER_CPU(int, cpu_state); 33DECLARE_PER_CPU(int, cpu_state);
34 34
35extern unsigned int boot_cpu_id;
36 35
37#endif /* _ASM_X86_CPU_H */ 36#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 3f76523589af..220e2ea08e80 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -152,10 +152,14 @@
152#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */ 152#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */
153#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */ 153#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */
154#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */ 154#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */
155#define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */ 155#define X86_FEATURE_XOP (6*32+11) /* extended AVX instructions */
156#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ 156#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */
157#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ 157#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */
158#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */
159#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */
158#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ 160#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
161#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
162#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
159 163
160/* 164/*
161 * Auxiliary flags: Linux defined - For features scattered in various 165 * Auxiliary flags: Linux defined - For features scattered in various
@@ -180,6 +184,13 @@
180#define X86_FEATURE_LBRV (8*32+ 6) /* AMD LBR Virtualization support */ 184#define X86_FEATURE_LBRV (8*32+ 6) /* AMD LBR Virtualization support */
181#define X86_FEATURE_SVML (8*32+ 7) /* "svm_lock" AMD SVM locking MSR */ 185#define X86_FEATURE_SVML (8*32+ 7) /* "svm_lock" AMD SVM locking MSR */
182#define X86_FEATURE_NRIPS (8*32+ 8) /* "nrip_save" AMD SVM next_rip save */ 186#define X86_FEATURE_NRIPS (8*32+ 8) /* "nrip_save" AMD SVM next_rip save */
187#define X86_FEATURE_TSCRATEMSR (8*32+ 9) /* "tsc_scale" AMD TSC scaling support */
188#define X86_FEATURE_VMCBCLEAN (8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */
189#define X86_FEATURE_FLUSHBYASID (8*32+11) /* AMD flush-by-ASID support */
190#define X86_FEATURE_DECODEASSISTS (8*32+12) /* AMD Decode Assists support */
191#define X86_FEATURE_PAUSEFILTER (8*32+13) /* AMD filtered pause intercept */
192#define X86_FEATURE_PFTHRESHOLD (8*32+14) /* AMD pause filter threshold */
193
183 194
184/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ 195/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
185#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ 196#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index 733f7e91e7a9..326099199318 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -89,6 +89,16 @@
89 CFI_ADJUST_CFA_OFFSET -8 89 CFI_ADJUST_CFA_OFFSET -8
90 .endm 90 .endm
91 91
92 .macro pushfq_cfi
93 pushfq
94 CFI_ADJUST_CFA_OFFSET 8
95 .endm
96
97 .macro popfq_cfi
98 popfq
99 CFI_ADJUST_CFA_OFFSET -8
100 .endm
101
92 .macro movq_cfi reg offset=0 102 .macro movq_cfi reg offset=0
93 movq %\reg, \offset(%rsp) 103 movq %\reg, \offset(%rsp)
94 CFI_REL_OFFSET \reg, \offset 104 CFI_REL_OFFSET \reg, \offset
@@ -109,6 +119,16 @@
109 CFI_ADJUST_CFA_OFFSET -4 119 CFI_ADJUST_CFA_OFFSET -4
110 .endm 120 .endm
111 121
122 .macro pushfl_cfi
123 pushfl
124 CFI_ADJUST_CFA_OFFSET 4
125 .endm
126
127 .macro popfl_cfi
128 popfl
129 CFI_ADJUST_CFA_OFFSET -4
130 .endm
131
112 .macro movl_cfi reg offset=0 132 .macro movl_cfi reg offset=0
113 movl %\reg, \offset(%esp) 133 movl %\reg, \offset(%esp)
114 CFI_REL_OFFSET \reg, \offset 134 CFI_REL_OFFSET \reg, \offset
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index ec8a52d14ab1..5be1542fbfaf 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -112,23 +112,13 @@ static inline void early_memtest(unsigned long start, unsigned long end)
112} 112}
113#endif 113#endif
114 114
115extern unsigned long end_user_pfn;
116
117extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
118extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
119extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
120#include <linux/early_res.h>
121
122extern unsigned long e820_end_of_ram_pfn(void); 115extern unsigned long e820_end_of_ram_pfn(void);
123extern unsigned long e820_end_of_low_ram_pfn(void); 116extern unsigned long e820_end_of_low_ram_pfn(void);
124extern int e820_find_active_region(const struct e820entry *ei, 117extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
125 unsigned long start_pfn, 118
126 unsigned long last_pfn, 119void memblock_x86_fill(void);
127 unsigned long *ei_startpfn, 120void memblock_find_dma_reserve(void);
128 unsigned long *ei_endpfn); 121
129extern void e820_register_active_regions(int nid, unsigned long start_pfn,
130 unsigned long end_pfn);
131extern u64 e820_hole_size(u64 start, u64 end);
132extern void finish_e820_parsing(void); 122extern void finish_e820_parsing(void);
133extern void e820_reserve_resources(void); 123extern void e820_reserve_resources(void);
134extern void e820_reserve_resources_late(void); 124extern void e820_reserve_resources_late(void);
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 8406ed7f9926..8e4a16508d4e 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -90,7 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
90#endif /* CONFIG_X86_32 */ 90#endif /* CONFIG_X86_32 */
91 91
92extern int add_efi_memmap; 92extern int add_efi_memmap;
93extern void efi_reserve_early(void); 93extern void efi_memblock_x86_reserve_range(void);
94extern void efi_call_phys_prelog(void); 94extern void efi_call_phys_prelog(void);
95extern void efi_call_phys_epilog(void); 95extern void efi_call_phys_epilog(void);
96 96
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 8e8ec663a98f..b8e96a18676b 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
49BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) 49BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
50BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) 50BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
51 51
52#ifdef CONFIG_PERF_EVENTS 52#ifdef CONFIG_IRQ_WORK
53BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) 53BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
54#endif 54#endif
55 55
56#ifdef CONFIG_X86_THERMAL_VECTOR 56#ifdef CONFIG_X86_THERMAL_VECTOR
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index d07b44f7d1dc..4d293dced62f 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -214,5 +214,20 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr)
214 BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); 214 BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
215 return __virt_to_fix(vaddr); 215 return __virt_to_fix(vaddr);
216} 216}
217
218/* Return an pointer with offset calculated */
219static inline unsigned long __set_fixmap_offset(enum fixed_addresses idx,
220 phys_addr_t phys, pgprot_t flags)
221{
222 __set_fixmap(idx, phys, flags);
223 return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1));
224}
225
226#define set_fixmap_offset(idx, phys) \
227 __set_fixmap_offset(idx, phys, PAGE_KERNEL)
228
229#define set_fixmap_offset_nocache(idx, phys) \
230 __set_fixmap_offset(idx, phys, PAGE_KERNEL_NOCACHE)
231
217#endif /* !__ASSEMBLY__ */ 232#endif /* !__ASSEMBLY__ */
218#endif /* _ASM_X86_FIXMAP_H */ 233#endif /* _ASM_X86_FIXMAP_H */
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 4ac5b0f33fc1..43085bfc99c3 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -17,6 +17,7 @@ extern int fix_aperture;
17#define GARTEN (1<<0) 17#define GARTEN (1<<0)
18#define DISGARTCPU (1<<4) 18#define DISGARTCPU (1<<4)
19#define DISGARTIO (1<<5) 19#define DISGARTIO (1<<5)
20#define DISTLBWALKPRB (1<<6)
20 21
21/* GART cache control register bits. */ 22/* GART cache control register bits. */
22#define INVGART (1<<0) 23#define INVGART (1<<0)
@@ -27,7 +28,6 @@ extern int fix_aperture;
27#define AMD64_GARTAPERTUREBASE 0x94 28#define AMD64_GARTAPERTUREBASE 0x94
28#define AMD64_GARTTABLEBASE 0x98 29#define AMD64_GARTTABLEBASE 0x98
29#define AMD64_GARTCACHECTL 0x9c 30#define AMD64_GARTCACHECTL 0x9c
30#define AMD64_GARTEN (1<<0)
31 31
32#ifdef CONFIG_GART_IOMMU 32#ifdef CONFIG_GART_IOMMU
33extern int gart_iommu_aperture; 33extern int gart_iommu_aperture;
@@ -37,7 +37,7 @@ extern int gart_iommu_aperture_disabled;
37extern void early_gart_iommu_check(void); 37extern void early_gart_iommu_check(void);
38extern int gart_iommu_init(void); 38extern int gart_iommu_init(void);
39extern void __init gart_parse_options(char *); 39extern void __init gart_parse_options(char *);
40extern void gart_iommu_hole_init(void); 40extern int gart_iommu_hole_init(void);
41 41
42#else 42#else
43#define gart_iommu_aperture 0 43#define gart_iommu_aperture 0
@@ -50,13 +50,27 @@ static inline void early_gart_iommu_check(void)
50static inline void gart_parse_options(char *options) 50static inline void gart_parse_options(char *options)
51{ 51{
52} 52}
53static inline void gart_iommu_hole_init(void) 53static inline int gart_iommu_hole_init(void)
54{ 54{
55 return -ENODEV;
55} 56}
56#endif 57#endif
57 58
58extern int agp_amd64_init(void); 59extern int agp_amd64_init(void);
59 60
61static inline void gart_set_size_and_enable(struct pci_dev *dev, u32 order)
62{
63 u32 ctl;
64
65 /*
66 * Don't enable translation but enable GART IO and CPU accesses.
67 * Also, set DISTLBWALKPRB since GART tables memory is UC.
68 */
69 ctl = DISTLBWALKPRB | order << 1;
70
71 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
72}
73
60static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) 74static inline void enable_gart_translation(struct pci_dev *dev, u64 addr)
61{ 75{
62 u32 tmp, ctl; 76 u32 tmp, ctl;
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index aeab29aee617..55e4de613f0e 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -14,7 +14,7 @@ typedef struct {
14#endif 14#endif
15 unsigned int x86_platform_ipis; /* arch dependent */ 15 unsigned int x86_platform_ipis; /* arch dependent */
16 unsigned int apic_perf_irqs; 16 unsigned int apic_perf_irqs;
17 unsigned int apic_pending_irqs; 17 unsigned int apic_irq_work_irqs;
18#ifdef CONFIG_SMP 18#ifdef CONFIG_SMP
19 unsigned int irq_resched_count; 19 unsigned int irq_resched_count;
20 unsigned int irq_call_count; 20 unsigned int irq_call_count;
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 1d5c08a1bdfd..2c392d663dce 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -74,10 +74,12 @@ extern void hpet_disable(void);
74extern unsigned int hpet_readl(unsigned int a); 74extern unsigned int hpet_readl(unsigned int a);
75extern void force_hpet_resume(void); 75extern void force_hpet_resume(void);
76 76
77extern void hpet_msi_unmask(unsigned int irq); 77struct irq_data;
78extern void hpet_msi_mask(unsigned int irq); 78extern void hpet_msi_unmask(struct irq_data *data);
79extern void hpet_msi_write(unsigned int irq, struct msi_msg *msg); 79extern void hpet_msi_mask(struct irq_data *data);
80extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg); 80struct hpet_dev;
81extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
82extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
81 83
82#ifdef CONFIG_PCI_MSI 84#ifdef CONFIG_PCI_MSI
83extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id); 85extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id);
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 46c0fe05f230..0274ec5a7e62 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,7 +29,7 @@
29extern void apic_timer_interrupt(void); 29extern void apic_timer_interrupt(void);
30extern void x86_platform_ipi(void); 30extern void x86_platform_ipi(void);
31extern void error_interrupt(void); 31extern void error_interrupt(void);
32extern void perf_pending_interrupt(void); 32extern void irq_work_interrupt(void);
33 33
34extern void spurious_interrupt(void); 34extern void spurious_interrupt(void);
35extern void thermal_interrupt(void); 35extern void thermal_interrupt(void);
@@ -78,6 +78,13 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
78 irq_attr->polarity = polarity; 78 irq_attr->polarity = polarity;
79} 79}
80 80
81struct irq_2_iommu {
82 struct intel_iommu *iommu;
83 u16 irte_index;
84 u16 sub_handle;
85 u8 irte_mask;
86};
87
81/* 88/*
82 * This is performance-critical, we want to do it O(1) 89 * This is performance-critical, we want to do it O(1)
83 * 90 *
@@ -89,15 +96,17 @@ struct irq_cfg {
89 cpumask_var_t old_domain; 96 cpumask_var_t old_domain;
90 u8 vector; 97 u8 vector;
91 u8 move_in_progress : 1; 98 u8 move_in_progress : 1;
99#ifdef CONFIG_INTR_REMAP
100 struct irq_2_iommu irq_2_iommu;
101#endif
92}; 102};
93 103
94extern struct irq_cfg *irq_cfg(unsigned int);
95extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); 104extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
96extern void send_cleanup_vector(struct irq_cfg *); 105extern void send_cleanup_vector(struct irq_cfg *);
97 106
98struct irq_desc; 107struct irq_data;
99extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *, 108int __ioapic_set_affinity(struct irq_data *, const struct cpumask *,
100 unsigned int *dest_id); 109 unsigned int *dest_id);
101extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); 110extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
102extern void setup_ioapic_dest(void); 111extern void setup_ioapic_dest(void);
103 112
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index a73a8d5a5e69..4aa2bb3b242a 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -55,6 +55,12 @@ extern int save_i387_xstate_ia32(void __user *buf);
55extern int restore_i387_xstate_ia32(void __user *buf); 55extern int restore_i387_xstate_ia32(void __user *buf);
56#endif 56#endif
57 57
58#ifdef CONFIG_MATH_EMULATION
59extern void finit_soft_fpu(struct i387_soft_struct *soft);
60#else
61static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
62#endif
63
58#define X87_FSW_ES (1 << 7) /* Exception Summary */ 64#define X87_FSW_ES (1 << 7) /* Exception Summary */
59 65
60static __always_inline __pure bool use_xsaveopt(void) 66static __always_inline __pure bool use_xsaveopt(void)
@@ -67,6 +73,11 @@ static __always_inline __pure bool use_xsave(void)
67 return static_cpu_has(X86_FEATURE_XSAVE); 73 return static_cpu_has(X86_FEATURE_XSAVE);
68} 74}
69 75
76static __always_inline __pure bool use_fxsr(void)
77{
78 return static_cpu_has(X86_FEATURE_FXSR);
79}
80
70extern void __sanitize_i387_state(struct task_struct *); 81extern void __sanitize_i387_state(struct task_struct *);
71 82
72static inline void sanitize_i387_state(struct task_struct *tsk) 83static inline void sanitize_i387_state(struct task_struct *tsk)
@@ -77,19 +88,11 @@ static inline void sanitize_i387_state(struct task_struct *tsk)
77} 88}
78 89
79#ifdef CONFIG_X86_64 90#ifdef CONFIG_X86_64
80
81/* Ignore delayed exceptions from user space */
82static inline void tolerant_fwait(void)
83{
84 asm volatile("1: fwait\n"
85 "2:\n"
86 _ASM_EXTABLE(1b, 2b));
87}
88
89static inline int fxrstor_checking(struct i387_fxsave_struct *fx) 91static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
90{ 92{
91 int err; 93 int err;
92 94
95 /* See comment in fxsave() below. */
93 asm volatile("1: rex64/fxrstor (%[fx])\n\t" 96 asm volatile("1: rex64/fxrstor (%[fx])\n\t"
94 "2:\n" 97 "2:\n"
95 ".section .fixup,\"ax\"\n" 98 ".section .fixup,\"ax\"\n"
@@ -98,44 +101,10 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
98 ".previous\n" 101 ".previous\n"
99 _ASM_EXTABLE(1b, 3b) 102 _ASM_EXTABLE(1b, 3b)
100 : [err] "=r" (err) 103 : [err] "=r" (err)
101#if 0 /* See comment in fxsave() below. */ 104 : [fx] "R" (fx), "m" (*fx), "0" (0));
102 : [fx] "r" (fx), "m" (*fx), "0" (0));
103#else
104 : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
105#endif
106 return err; 105 return err;
107} 106}
108 107
109/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
110 is pending. Clear the x87 state here by setting it to fixed
111 values. The kernel data segment can be sometimes 0 and sometimes
112 new user value. Both should be ok.
113 Use the PDA as safe address because it should be already in L1. */
114static inline void fpu_clear(struct fpu *fpu)
115{
116 struct xsave_struct *xstate = &fpu->state->xsave;
117 struct i387_fxsave_struct *fx = &fpu->state->fxsave;
118
119 /*
120 * xsave header may indicate the init state of the FP.
121 */
122 if (use_xsave() &&
123 !(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
124 return;
125
126 if (unlikely(fx->swd & X87_FSW_ES))
127 asm volatile("fnclex");
128 alternative_input(ASM_NOP8 ASM_NOP2,
129 " emms\n" /* clear stack tags */
130 " fildl %%gs:0", /* load to clear state */
131 X86_FEATURE_FXSAVE_LEAK);
132}
133
134static inline void clear_fpu_state(struct task_struct *tsk)
135{
136 fpu_clear(&tsk->thread.fpu);
137}
138
139static inline int fxsave_user(struct i387_fxsave_struct __user *fx) 108static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
140{ 109{
141 int err; 110 int err;
@@ -149,6 +118,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
149 if (unlikely(err)) 118 if (unlikely(err))
150 return -EFAULT; 119 return -EFAULT;
151 120
121 /* See comment in fxsave() below. */
152 asm volatile("1: rex64/fxsave (%[fx])\n\t" 122 asm volatile("1: rex64/fxsave (%[fx])\n\t"
153 "2:\n" 123 "2:\n"
154 ".section .fixup,\"ax\"\n" 124 ".section .fixup,\"ax\"\n"
@@ -157,11 +127,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
157 ".previous\n" 127 ".previous\n"
158 _ASM_EXTABLE(1b, 3b) 128 _ASM_EXTABLE(1b, 3b)
159 : [err] "=r" (err), "=m" (*fx) 129 : [err] "=r" (err), "=m" (*fx)
160#if 0 /* See comment in fxsave() below. */ 130 : [fx] "R" (fx), "0" (0));
161 : [fx] "r" (fx), "0" (0));
162#else
163 : [fx] "cdaSDb" (fx), "0" (0));
164#endif
165 if (unlikely(err) && 131 if (unlikely(err) &&
166 __clear_user(fx, sizeof(struct i387_fxsave_struct))) 132 __clear_user(fx, sizeof(struct i387_fxsave_struct)))
167 err = -EFAULT; 133 err = -EFAULT;
@@ -175,56 +141,29 @@ static inline void fpu_fxsave(struct fpu *fpu)
175 uses any extended registers for addressing, a second REX prefix 141 uses any extended registers for addressing, a second REX prefix
176 will be generated (to the assembler, rex64 followed by semicolon 142 will be generated (to the assembler, rex64 followed by semicolon
177 is a separate instruction), and hence the 64-bitness is lost. */ 143 is a separate instruction), and hence the 64-bitness is lost. */
178#if 0 144
145#ifdef CONFIG_AS_FXSAVEQ
179 /* Using "fxsaveq %0" would be the ideal choice, but is only supported 146 /* Using "fxsaveq %0" would be the ideal choice, but is only supported
180 starting with gas 2.16. */ 147 starting with gas 2.16. */
181 __asm__ __volatile__("fxsaveq %0" 148 __asm__ __volatile__("fxsaveq %0"
182 : "=m" (fpu->state->fxsave)); 149 : "=m" (fpu->state->fxsave));
183#elif 0 150#else
184 /* Using, as a workaround, the properly prefixed form below isn't 151 /* Using, as a workaround, the properly prefixed form below isn't
185 accepted by any binutils version so far released, complaining that 152 accepted by any binutils version so far released, complaining that
186 the same type of prefix is used twice if an extended register is 153 the same type of prefix is used twice if an extended register is
187 needed for addressing (fix submitted to mainline 2005-11-21). */ 154 needed for addressing (fix submitted to mainline 2005-11-21).
188 __asm__ __volatile__("rex64/fxsave %0" 155 asm volatile("rex64/fxsave %0"
189 : "=m" (fpu->state->fxsave)); 156 : "=m" (fpu->state->fxsave));
190#else 157 This, however, we can work around by forcing the compiler to select
191 /* This, however, we can work around by forcing the compiler to select
192 an addressing mode that doesn't require extended registers. */ 158 an addressing mode that doesn't require extended registers. */
193 __asm__ __volatile__("rex64/fxsave (%1)" 159 asm volatile("rex64/fxsave (%[fx])"
194 : "=m" (fpu->state->fxsave) 160 : "=m" (fpu->state->fxsave)
195 : "cdaSDb" (&fpu->state->fxsave)); 161 : [fx] "R" (&fpu->state->fxsave));
196#endif 162#endif
197} 163}
198 164
199static inline void fpu_save_init(struct fpu *fpu)
200{
201 if (use_xsave())
202 fpu_xsave(fpu);
203 else
204 fpu_fxsave(fpu);
205
206 fpu_clear(fpu);
207}
208
209static inline void __save_init_fpu(struct task_struct *tsk)
210{
211 fpu_save_init(&tsk->thread.fpu);
212 task_thread_info(tsk)->status &= ~TS_USEDFPU;
213}
214
215#else /* CONFIG_X86_32 */ 165#else /* CONFIG_X86_32 */
216 166
217#ifdef CONFIG_MATH_EMULATION
218extern void finit_soft_fpu(struct i387_soft_struct *soft);
219#else
220static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
221#endif
222
223static inline void tolerant_fwait(void)
224{
225 asm volatile("fnclex ; fwait");
226}
227
228/* perform fxrstor iff the processor has extended states, otherwise frstor */ 167/* perform fxrstor iff the processor has extended states, otherwise frstor */
229static inline int fxrstor_checking(struct i387_fxsave_struct *fx) 168static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
230{ 169{
@@ -241,6 +180,14 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
241 return 0; 180 return 0;
242} 181}
243 182
183static inline void fpu_fxsave(struct fpu *fpu)
184{
185 asm volatile("fxsave %[fx]"
186 : [fx] "=m" (fpu->state->fxsave));
187}
188
189#endif /* CONFIG_X86_64 */
190
244/* We need a safe address that is cheap to find and that is already 191/* We need a safe address that is cheap to find and that is already
245 in L1 during context switch. The best choices are unfortunately 192 in L1 during context switch. The best choices are unfortunately
246 different for UP and SMP */ 193 different for UP and SMP */
@@ -256,47 +203,33 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
256static inline void fpu_save_init(struct fpu *fpu) 203static inline void fpu_save_init(struct fpu *fpu)
257{ 204{
258 if (use_xsave()) { 205 if (use_xsave()) {
259 struct xsave_struct *xstate = &fpu->state->xsave;
260 struct i387_fxsave_struct *fx = &fpu->state->fxsave;
261
262 fpu_xsave(fpu); 206 fpu_xsave(fpu);
263 207
264 /* 208 /*
265 * xsave header may indicate the init state of the FP. 209 * xsave header may indicate the init state of the FP.
266 */ 210 */
267 if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) 211 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
268 goto end; 212 return;
269 213 } else if (use_fxsr()) {
270 if (unlikely(fx->swd & X87_FSW_ES)) 214 fpu_fxsave(fpu);
271 asm volatile("fnclex"); 215 } else {
272 216 asm volatile("fsave %[fx]; fwait"
273 /* 217 : [fx] "=m" (fpu->state->fsave));
274 * we can do a simple return here or be paranoid :) 218 return;
275 */
276 goto clear_state;
277 } 219 }
278 220
279 /* Use more nops than strictly needed in case the compiler 221 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES))
280 varies code */ 222 asm volatile("fnclex");
281 alternative_input( 223
282 "fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4,
283 "fxsave %[fx]\n"
284 "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
285 X86_FEATURE_FXSR,
286 [fx] "m" (fpu->state->fxsave),
287 [fsw] "m" (fpu->state->fxsave.swd) : "memory");
288clear_state:
289 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception 224 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
290 is pending. Clear the x87 state here by setting it to fixed 225 is pending. Clear the x87 state here by setting it to fixed
291 values. safe_address is a random variable that should be in L1 */ 226 values. safe_address is a random variable that should be in L1 */
292 alternative_input( 227 alternative_input(
293 GENERIC_NOP8 GENERIC_NOP2, 228 ASM_NOP8 ASM_NOP2,
294 "emms\n\t" /* clear stack tags */ 229 "emms\n\t" /* clear stack tags */
295 "fildl %[addr]", /* set F?P to defined value */ 230 "fildl %P[addr]", /* set F?P to defined value */
296 X86_FEATURE_FXSAVE_LEAK, 231 X86_FEATURE_FXSAVE_LEAK,
297 [addr] "m" (safe_address)); 232 [addr] "m" (safe_address));
298end:
299 ;
300} 233}
301 234
302static inline void __save_init_fpu(struct task_struct *tsk) 235static inline void __save_init_fpu(struct task_struct *tsk)
@@ -305,9 +238,6 @@ static inline void __save_init_fpu(struct task_struct *tsk)
305 task_thread_info(tsk)->status &= ~TS_USEDFPU; 238 task_thread_info(tsk)->status &= ~TS_USEDFPU;
306} 239}
307 240
308
309#endif /* CONFIG_X86_64 */
310
311static inline int fpu_fxrstor_checking(struct fpu *fpu) 241static inline int fpu_fxrstor_checking(struct fpu *fpu)
312{ 242{
313 return fxrstor_checking(&fpu->state->fxsave); 243 return fxrstor_checking(&fpu->state->fxsave);
@@ -344,7 +274,10 @@ static inline void __unlazy_fpu(struct task_struct *tsk)
344static inline void __clear_fpu(struct task_struct *tsk) 274static inline void __clear_fpu(struct task_struct *tsk)
345{ 275{
346 if (task_thread_info(tsk)->status & TS_USEDFPU) { 276 if (task_thread_info(tsk)->status & TS_USEDFPU) {
347 tolerant_fwait(); 277 /* Ignore delayed exceptions from user space */
278 asm volatile("1: fwait\n"
279 "2:\n"
280 _ASM_EXTABLE(1b, 2b));
348 task_thread_info(tsk)->status &= ~TS_USEDFPU; 281 task_thread_info(tsk)->status &= ~TS_USEDFPU;
349 stts(); 282 stts();
350 } 283 }
@@ -405,19 +338,6 @@ static inline void irq_ts_restore(int TS_state)
405 stts(); 338 stts();
406} 339}
407 340
408#ifdef CONFIG_X86_64
409
410static inline void save_init_fpu(struct task_struct *tsk)
411{
412 __save_init_fpu(tsk);
413 stts();
414}
415
416#define unlazy_fpu __unlazy_fpu
417#define clear_fpu __clear_fpu
418
419#else /* CONFIG_X86_32 */
420
421/* 341/*
422 * These disable preemption on their own and are safe 342 * These disable preemption on their own and are safe
423 */ 343 */
@@ -443,8 +363,6 @@ static inline void clear_fpu(struct task_struct *tsk)
443 preempt_enable(); 363 preempt_enable();
444} 364}
445 365
446#endif /* CONFIG_X86_64 */
447
448/* 366/*
449 * i387 state interaction 367 * i387 state interaction
450 */ 368 */
@@ -508,7 +426,4 @@ extern void fpu_finit(struct fpu *fpu);
508 426
509#endif /* __ASSEMBLY__ */ 427#endif /* __ASSEMBLY__ */
510 428
511#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5
512#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5
513
514#endif /* _ASM_X86_I387_H */ 429#endif /* _ASM_X86_I387_H */
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 1655147646aa..a20365953bf8 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -55,6 +55,8 @@ extern struct irq_chip i8259A_chip;
55struct legacy_pic { 55struct legacy_pic {
56 int nr_legacy_irqs; 56 int nr_legacy_irqs;
57 struct irq_chip *chip; 57 struct irq_chip *chip;
58 void (*mask)(unsigned int irq);
59 void (*unmask)(unsigned int irq);
58 void (*mask_all)(void); 60 void (*mask_all)(void);
59 void (*restore_mask)(void); 61 void (*restore_mask)(void);
60 void (*init)(int auto_eoi); 62 void (*init)(int auto_eoi);
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 30a3e9776123..f0203f4791a8 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -206,6 +206,7 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
206 206
207extern void iounmap(volatile void __iomem *addr); 207extern void iounmap(volatile void __iomem *addr);
208 208
209extern void set_iounmap_nonlazy(void);
209 210
210#ifdef __KERNEL__ 211#ifdef __KERNEL__
211 212
@@ -348,6 +349,7 @@ extern void __iomem *early_memremap(resource_size_t phys_addr,
348 unsigned long size); 349 unsigned long size);
349extern void early_iounmap(void __iomem *addr, unsigned long size); 350extern void early_iounmap(void __iomem *addr, unsigned long size);
350extern void fixup_early_ioremap(void); 351extern void fixup_early_ioremap(void);
352extern bool is_early_ioremap_ptep(pte_t *ptep);
351 353
352#define IO_SPACE_LIMIT 0xffff 354#define IO_SPACE_LIMIT 0xffff
353 355
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 9cb2edb87c2f..c8be4566c3d2 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -170,12 +170,6 @@ extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
170 170
171extern void probe_nr_irqs_gsi(void); 171extern void probe_nr_irqs_gsi(void);
172 172
173extern int setup_ioapic_entry(int apic, int irq,
174 struct IO_APIC_route_entry *entry,
175 unsigned int destination, int trigger,
176 int polarity, int vector, int pin);
177extern void ioapic_write_entry(int apic, int pin,
178 struct IO_APIC_route_entry e);
179extern void setup_ioapic_ids_from_mpc(void); 173extern void setup_ioapic_ids_from_mpc(void);
180 174
181struct mp_ioapic_gsi{ 175struct mp_ioapic_gsi{
diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h
new file mode 100644
index 000000000000..f229b13a5f30
--- /dev/null
+++ b/arch/x86/include/asm/iommu_table.h
@@ -0,0 +1,100 @@
1#ifndef _ASM_X86_IOMMU_TABLE_H
2#define _ASM_X86_IOMMU_TABLE_H
3
4#include <asm/swiotlb.h>
5
6/*
7 * History lesson:
8 * The execution chain of IOMMUs in 2.6.36 looks as so:
9 *
10 * [xen-swiotlb]
11 * |
12 * +----[swiotlb *]--+
13 * / | \
14 * / | \
15 * [GART] [Calgary] [Intel VT-d]
16 * /
17 * /
18 * [AMD-Vi]
19 *
20 * *: if SWIOTLB detected 'iommu=soft'/'swiotlb=force' it would skip
21 * over the rest of IOMMUs and unconditionally initialize the SWIOTLB.
22 * Also it would surreptitiously initialize set the swiotlb=1 if there were
23 * more than 4GB and if the user did not pass in 'iommu=off'. The swiotlb
24 * flag would be turned off by all IOMMUs except the Calgary one.
25 *
26 * The IOMMU_INIT* macros allow a similar tree (or more complex if desired)
27 * to be built by defining who we depend on.
28 *
29 * And all that needs to be done is to use one of the macros in the IOMMU
30 * and the pci-dma.c will take care of the rest.
31 */
32
33struct iommu_table_entry {
34 initcall_t detect;
35 initcall_t depend;
36 void (*early_init)(void); /* No memory allocate available. */
37 void (*late_init)(void); /* Yes, can allocate memory. */
38#define IOMMU_FINISH_IF_DETECTED (1<<0)
39#define IOMMU_DETECTED (1<<1)
40 int flags;
41};
42/*
43 * Macro fills out an entry in the .iommu_table that is equivalent
44 * to the fields that 'struct iommu_table_entry' has. The entries
45 * that are put in the .iommu_table section are not put in any order
46 * hence during boot-time we will have to resort them based on
47 * dependency. */
48
49
50#define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\
51 static const struct iommu_table_entry const \
52 __iommu_entry_##_detect __used \
53 __attribute__ ((unused, __section__(".iommu_table"), \
54 aligned((sizeof(void *))))) \
55 = {_detect, _depend, _early_init, _late_init, \
56 _finish ? IOMMU_FINISH_IF_DETECTED : 0}
57/*
58 * The simplest IOMMU definition. Provide the detection routine
59 * and it will be run after the SWIOTLB and the other IOMMUs
60 * that utilize this macro. If the IOMMU is detected (ie, the
61 * detect routine returns a positive value), the other IOMMUs
62 * are also checked. You can use IOMMU_INIT_POST_FINISH if you prefer
63 * to stop detecting the other IOMMUs after yours has been detected.
64 */
65#define IOMMU_INIT_POST(_detect) \
66 __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 0)
67
68#define IOMMU_INIT_POST_FINISH(detect) \
69 __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 1)
70
71/*
72 * A more sophisticated version of IOMMU_INIT. This variant requires:
73 * a). A detection routine function.
74 * b). The name of the detection routine we depend on to get called
75 * before us.
76 * c). The init routine which gets called if the detection routine
77 * returns a positive value from the pci_iommu_alloc. This means
78 * no presence of a memory allocator.
79 * d). Similar to the 'init', except that this gets called from pci_iommu_init
80 * where we do have a memory allocator.
81 *
82 * The standard vs the _FINISH differs in that the _FINISH variant will
83 * continue detecting other IOMMUs in the call list after the
84 * the detection routine returns a positive number. The _FINISH will
85 * stop the execution chain. Both will still call the 'init' and
86 * 'late_init' functions if they are set.
87 */
88#define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init) \
89 __IOMMU_INIT(_detect, _depend, _init, _late_init, 1)
90
91#define IOMMU_INIT(_detect, _depend, _init, _late_init) \
92 __IOMMU_INIT(_detect, _depend, _init, _late_init, 0)
93
94void sort_iommu_table(struct iommu_table_entry *start,
95 struct iommu_table_entry *finish);
96
97void check_iommu_entries(struct iommu_table_entry *start,
98 struct iommu_table_entry *finish);
99
100#endif /* _ASM_X86_IOMMU_TABLE_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 5458380b6ef8..0bf5b0083650 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -19,18 +19,16 @@ static inline int irq_canonicalize(int irq)
19# define ARCH_HAS_NMI_WATCHDOG 19# define ARCH_HAS_NMI_WATCHDOG
20#endif 20#endif
21 21
22#ifdef CONFIG_4KSTACKS 22#ifdef CONFIG_X86_32
23 extern void irq_ctx_init(int cpu); 23extern void irq_ctx_init(int cpu);
24 extern void irq_ctx_exit(int cpu); 24extern void irq_ctx_exit(int cpu);
25# define __ARCH_HAS_DO_SOFTIRQ
26#else 25#else
27# define irq_ctx_init(cpu) do { } while (0) 26# define irq_ctx_init(cpu) do { } while (0)
28# define irq_ctx_exit(cpu) do { } while (0) 27# define irq_ctx_exit(cpu) do { } while (0)
29# ifdef CONFIG_X86_64
30# define __ARCH_HAS_DO_SOFTIRQ
31# endif
32#endif 28#endif
33 29
30#define __ARCH_HAS_DO_SOFTIRQ
31
34#ifdef CONFIG_HOTPLUG_CPU 32#ifdef CONFIG_HOTPLUG_CPU
35#include <linux/cpumask.h> 33#include <linux/cpumask.h>
36extern void fixup_irqs(void); 34extern void fixup_irqs(void);
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index f275e2244505..1c23360fb2d8 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -3,4 +3,39 @@
3 3
4#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8) 4#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
5 5
6#ifdef CONFIG_INTR_REMAP
7static inline void prepare_irte(struct irte *irte, int vector,
8 unsigned int dest)
9{
10 memset(irte, 0, sizeof(*irte));
11
12 irte->present = 1;
13 irte->dst_mode = apic->irq_dest_mode;
14 /*
15 * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
16 * actual level or edge trigger will be setup in the IO-APIC
17 * RTE. This will help simplify level triggered irq migration.
18 * For more details, see the comments (in io_apic.c) explainig IO-APIC
19 * irq migration in the presence of interrupt-remapping.
20 */
21 irte->trigger_mode = 0;
22 irte->dlvry_mode = apic->irq_delivery_mode;
23 irte->vector = vector;
24 irte->dest_id = IRTE_DEST(dest);
25 irte->redir_hint = 1;
26}
27static inline bool irq_remapped(struct irq_cfg *cfg)
28{
29 return cfg->irq_2_iommu.iommu != NULL;
30}
31#else
32static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
33{
34}
35static inline bool irq_remapped(struct irq_cfg *cfg)
36{
37 return false;
38}
39#endif
40
6#endif /* _ASM_X86_IRQ_REMAPPING_H */ 41#endif /* _ASM_X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index e2ca30092557..6af0894dafb4 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -114,9 +114,9 @@
114#define X86_PLATFORM_IPI_VECTOR 0xed 114#define X86_PLATFORM_IPI_VECTOR 0xed
115 115
116/* 116/*
117 * Performance monitoring pending work vector: 117 * IRQ work vector:
118 */ 118 */
119#define LOCAL_PENDING_VECTOR 0xec 119#define IRQ_WORK_VECTOR 0xec
120 120
121#define UV_BAU_MESSAGE 0xea 121#define UV_BAU_MESSAGE 0xea
122 122
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 9e2b952f810a..5745ce8bf108 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -61,22 +61,22 @@ static inline void native_halt(void)
61#else 61#else
62#ifndef __ASSEMBLY__ 62#ifndef __ASSEMBLY__
63 63
64static inline unsigned long __raw_local_save_flags(void) 64static inline unsigned long arch_local_save_flags(void)
65{ 65{
66 return native_save_fl(); 66 return native_save_fl();
67} 67}
68 68
69static inline void raw_local_irq_restore(unsigned long flags) 69static inline void arch_local_irq_restore(unsigned long flags)
70{ 70{
71 native_restore_fl(flags); 71 native_restore_fl(flags);
72} 72}
73 73
74static inline void raw_local_irq_disable(void) 74static inline void arch_local_irq_disable(void)
75{ 75{
76 native_irq_disable(); 76 native_irq_disable();
77} 77}
78 78
79static inline void raw_local_irq_enable(void) 79static inline void arch_local_irq_enable(void)
80{ 80{
81 native_irq_enable(); 81 native_irq_enable();
82} 82}
@@ -85,7 +85,7 @@ static inline void raw_local_irq_enable(void)
85 * Used in the idle loop; sti takes one instruction cycle 85 * Used in the idle loop; sti takes one instruction cycle
86 * to complete: 86 * to complete:
87 */ 87 */
88static inline void raw_safe_halt(void) 88static inline void arch_safe_halt(void)
89{ 89{
90 native_safe_halt(); 90 native_safe_halt();
91} 91}
@@ -102,12 +102,10 @@ static inline void halt(void)
102/* 102/*
103 * For spinlocks, etc: 103 * For spinlocks, etc:
104 */ 104 */
105static inline unsigned long __raw_local_irq_save(void) 105static inline unsigned long arch_local_irq_save(void)
106{ 106{
107 unsigned long flags = __raw_local_save_flags(); 107 unsigned long flags = arch_local_save_flags();
108 108 arch_local_irq_disable();
109 raw_local_irq_disable();
110
111 return flags; 109 return flags;
112} 110}
113#else 111#else
@@ -153,22 +151,16 @@ static inline unsigned long __raw_local_irq_save(void)
153#endif /* CONFIG_PARAVIRT */ 151#endif /* CONFIG_PARAVIRT */
154 152
155#ifndef __ASSEMBLY__ 153#ifndef __ASSEMBLY__
156#define raw_local_save_flags(flags) \ 154static inline int arch_irqs_disabled_flags(unsigned long flags)
157 do { (flags) = __raw_local_save_flags(); } while (0)
158
159#define raw_local_irq_save(flags) \
160 do { (flags) = __raw_local_irq_save(); } while (0)
161
162static inline int raw_irqs_disabled_flags(unsigned long flags)
163{ 155{
164 return !(flags & X86_EFLAGS_IF); 156 return !(flags & X86_EFLAGS_IF);
165} 157}
166 158
167static inline int raw_irqs_disabled(void) 159static inline int arch_irqs_disabled(void)
168{ 160{
169 unsigned long flags = __raw_local_save_flags(); 161 unsigned long flags = arch_local_save_flags();
170 162
171 return raw_irqs_disabled_flags(flags); 163 return arch_irqs_disabled_flags(flags);
172} 164}
173 165
174#else 166#else
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
new file mode 100644
index 000000000000..f52d42e80585
--- /dev/null
+++ b/arch/x86/include/asm/jump_label.h
@@ -0,0 +1,37 @@
1#ifndef _ASM_X86_JUMP_LABEL_H
2#define _ASM_X86_JUMP_LABEL_H
3
4#ifdef __KERNEL__
5
6#include <linux/types.h>
7#include <asm/nops.h>
8
9#define JUMP_LABEL_NOP_SIZE 5
10
11# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
12
13# define JUMP_LABEL(key, label) \
14 do { \
15 asm goto("1:" \
16 JUMP_LABEL_INITIAL_NOP \
17 ".pushsection __jump_table, \"a\" \n\t"\
18 _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
19 ".popsection \n\t" \
20 : : "i" (key) : : label); \
21 } while (0)
22
23#endif /* __KERNEL__ */
24
25#ifdef CONFIG_X86_64
26typedef u64 jump_label_t;
27#else
28typedef u32 jump_label_t;
29#endif
30
31struct jump_entry {
32 jump_label_t code;
33 jump_label_t target;
34 jump_label_t key;
35};
36
37#endif
diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h
new file mode 100644
index 000000000000..19ae14ba6978
--- /dev/null
+++ b/arch/x86/include/asm/memblock.h
@@ -0,0 +1,23 @@
1#ifndef _X86_MEMBLOCK_H
2#define _X86_MEMBLOCK_H
3
4#define ARCH_DISCARD_MEMBLOCK
5
6u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align);
7void memblock_x86_to_bootmem(u64 start, u64 end);
8
9void memblock_x86_reserve_range(u64 start, u64 end, char *name);
10void memblock_x86_free_range(u64 start, u64 end);
11struct range;
12int __get_free_all_memory_range(struct range **range, int nodeid,
13 unsigned long start_pfn, unsigned long end_pfn);
14int get_free_all_memory_range(struct range **rangep, int nodeid);
15
16void memblock_x86_register_active_regions(int nid, unsigned long start_pfn,
17 unsigned long last_pfn);
18u64 memblock_x86_hole_size(u64 start, u64 end);
19u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align);
20u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit);
21u64 memblock_x86_memory_in_range(u64 addr, u64 limit);
22
23#endif
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 3e2ce58a31a3..67763c5d8b4e 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -60,12 +60,7 @@
60#endif 60#endif
61 61
62#ifdef CONFIG_X86_32 62#ifdef CONFIG_X86_32
63# ifdef CONFIG_4KSTACKS 63# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY
64# define MODULE_STACKSIZE "4KSTACKS "
65# else
66# define MODULE_STACKSIZE ""
67# endif
68# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE
69#endif 64#endif
70 65
71#endif /* _ASM_X86_MODULE_H */ 66#endif /* _ASM_X86_MODULE_H */
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
index 16350740edf6..4a711a684b17 100644
--- a/arch/x86/include/asm/mrst.h
+++ b/arch/x86/include/asm/mrst.h
@@ -10,6 +10,9 @@
10 */ 10 */
11#ifndef _ASM_X86_MRST_H 11#ifndef _ASM_X86_MRST_H
12#define _ASM_X86_MRST_H 12#define _ASM_X86_MRST_H
13
14#include <linux/sfi.h>
15
13extern int pci_mrst_init(void); 16extern int pci_mrst_init(void);
14int __init sfi_parse_mrtc(struct sfi_table_header *table); 17int __init sfi_parse_mrtc(struct sfi_table_header *table);
15 18
@@ -26,7 +29,7 @@ enum mrst_cpu_type {
26}; 29};
27 30
28extern enum mrst_cpu_type __mrst_cpu_chip; 31extern enum mrst_cpu_type __mrst_cpu_chip;
29static enum mrst_cpu_type mrst_identify_cpu(void) 32static inline enum mrst_cpu_type mrst_identify_cpu(void)
30{ 33{
31 return __mrst_cpu_chip; 34 return __mrst_cpu_chip;
32} 35}
@@ -42,4 +45,9 @@ extern enum mrst_timer_options mrst_timer_options;
42#define SFI_MTMR_MAX_NUM 8 45#define SFI_MTMR_MAX_NUM 8
43#define SFI_MRTC_MAX 8 46#define SFI_MRTC_MAX 8
44 47
48extern struct console early_mrst_console;
49extern void mrst_early_console_init(void);
50
51extern struct console early_hsu_console;
52extern void hsu_early_console_init(void);
45#endif /* _ASM_X86_MRST_H */ 53#endif /* _ASM_X86_MRST_H */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
new file mode 100644
index 000000000000..bcdff997668c
--- /dev/null
+++ b/arch/x86/include/asm/mwait.h
@@ -0,0 +1,15 @@
1#ifndef _ASM_X86_MWAIT_H
2#define _ASM_X86_MWAIT_H
3
4#define MWAIT_SUBSTATE_MASK 0xf
5#define MWAIT_CSTATE_MASK 0xf
6#define MWAIT_SUBSTATE_SIZE 4
7#define MWAIT_MAX_NUM_CSTATES 8
8
9#define CPUID_MWAIT_LEAF 5
10#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
11#define CPUID5_ECX_INTERRUPT_BREAK 0x2
12
13#define MWAIT_ECX_INTERRUPT_BREAK 0x1
14
15#endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h
index 08fde475cb3b..2a8478140bb3 100644
--- a/arch/x86/include/asm/olpc_ofw.h
+++ b/arch/x86/include/asm/olpc_ofw.h
@@ -21,10 +21,14 @@ extern void olpc_ofw_detect(void);
21/* install OFW's pde permanently into the kernel's pgtable */ 21/* install OFW's pde permanently into the kernel's pgtable */
22extern void setup_olpc_ofw_pgd(void); 22extern void setup_olpc_ofw_pgd(void);
23 23
24/* check if OFW was detected during boot */
25extern bool olpc_ofw_present(void);
26
24#else /* !CONFIG_OLPC_OPENFIRMWARE */ 27#else /* !CONFIG_OLPC_OPENFIRMWARE */
25 28
26static inline void olpc_ofw_detect(void) { } 29static inline void olpc_ofw_detect(void) { }
27static inline void setup_olpc_ofw_pgd(void) { } 30static inline void setup_olpc_ofw_pgd(void) { }
31static inline bool olpc_ofw_present(void) { return false; }
28 32
29#endif /* !CONFIG_OLPC_OPENFIRMWARE */ 33#endif /* !CONFIG_OLPC_OPENFIRMWARE */
30 34
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index 6f1b7331313f..ade619ff9e2a 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -15,11 +15,7 @@
15 */ 15 */
16#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) 16#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
17 17
18#ifdef CONFIG_4KSTACKS
19#define THREAD_ORDER 0
20#else
21#define THREAD_ORDER 1 18#define THREAD_ORDER 1
22#endif
23#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) 19#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
24 20
25#define STACKFAULT_STACK 0 21#define STACKFAULT_STACK 0
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index a667f24c7254..1df66211fd1b 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -8,7 +8,7 @@
8#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) 8#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
9#define PAGE_MASK (~(PAGE_SIZE-1)) 9#define PAGE_MASK (~(PAGE_SIZE-1))
10 10
11#define __PHYSICAL_MASK ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1) 11#define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1))
12#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) 12#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
13 13
14/* Cast PAGE_MASK to a signed type so that it is sign-extended if 14/* Cast PAGE_MASK to a signed type so that it is sign-extended if
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 5653f43d90e5..18e3b8a8709f 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -105,7 +105,7 @@ static inline void write_cr8(unsigned long x)
105} 105}
106#endif 106#endif
107 107
108static inline void raw_safe_halt(void) 108static inline void arch_safe_halt(void)
109{ 109{
110 PVOP_VCALL0(pv_irq_ops.safe_halt); 110 PVOP_VCALL0(pv_irq_ops.safe_halt);
111} 111}
@@ -416,11 +416,6 @@ static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
416 PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn); 416 PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn);
417} 417}
418 418
419static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
420 unsigned long start, unsigned long count)
421{
422 PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count);
423}
424static inline void paravirt_release_pmd(unsigned long pfn) 419static inline void paravirt_release_pmd(unsigned long pfn)
425{ 420{
426 PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn); 421 PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn);
@@ -829,32 +824,32 @@ static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
829#define __PV_IS_CALLEE_SAVE(func) \ 824#define __PV_IS_CALLEE_SAVE(func) \
830 ((struct paravirt_callee_save) { func }) 825 ((struct paravirt_callee_save) { func })
831 826
832static inline unsigned long __raw_local_save_flags(void) 827static inline unsigned long arch_local_save_flags(void)
833{ 828{
834 return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); 829 return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
835} 830}
836 831
837static inline void raw_local_irq_restore(unsigned long f) 832static inline void arch_local_irq_restore(unsigned long f)
838{ 833{
839 PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); 834 PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
840} 835}
841 836
842static inline void raw_local_irq_disable(void) 837static inline void arch_local_irq_disable(void)
843{ 838{
844 PVOP_VCALLEE0(pv_irq_ops.irq_disable); 839 PVOP_VCALLEE0(pv_irq_ops.irq_disable);
845} 840}
846 841
847static inline void raw_local_irq_enable(void) 842static inline void arch_local_irq_enable(void)
848{ 843{
849 PVOP_VCALLEE0(pv_irq_ops.irq_enable); 844 PVOP_VCALLEE0(pv_irq_ops.irq_enable);
850} 845}
851 846
852static inline unsigned long __raw_local_irq_save(void) 847static inline unsigned long arch_local_irq_save(void)
853{ 848{
854 unsigned long f; 849 unsigned long f;
855 850
856 f = __raw_local_save_flags(); 851 f = arch_local_save_flags();
857 raw_local_irq_disable(); 852 arch_local_irq_disable();
858 return f; 853 return f;
859} 854}
860 855
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index db9ef5532341..b82bac975250 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -255,7 +255,6 @@ struct pv_mmu_ops {
255 */ 255 */
256 void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn); 256 void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
257 void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn); 257 void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
258 void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count);
259 void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn); 258 void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
260 void (*release_pte)(unsigned long pfn); 259 void (*release_pte)(unsigned long pfn);
261 void (*release_pmd)(unsigned long pfn); 260 void (*release_pmd)(unsigned long pfn);
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index cd28f9ad910d..f899e01a8ac9 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -47,6 +47,20 @@
47#ifdef CONFIG_SMP 47#ifdef CONFIG_SMP
48#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x 48#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
49#define __my_cpu_offset percpu_read(this_cpu_off) 49#define __my_cpu_offset percpu_read(this_cpu_off)
50
51/*
52 * Compared to the generic __my_cpu_offset version, the following
53 * saves one instruction and avoids clobbering a temp register.
54 */
55#define __this_cpu_ptr(ptr) \
56({ \
57 unsigned long tcp_ptr__; \
58 __verify_pcpu_ptr(ptr); \
59 asm volatile("add " __percpu_arg(1) ", %0" \
60 : "=r" (tcp_ptr__) \
61 : "m" (this_cpu_off), "0" (ptr)); \
62 (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
63})
50#else 64#else
51#define __percpu_arg(x) "%P" #x 65#define __percpu_arg(x) "%P" #x
52#endif 66#endif
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index def500776b16..a70cd216be5d 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -36,19 +36,6 @@
36#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) 36#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT)
37#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) 37#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT)
38 38
39/* Non HT mask */
40#define P4_ESCR_MASK \
41 (P4_ESCR_EVENT_MASK | \
42 P4_ESCR_EVENTMASK_MASK | \
43 P4_ESCR_TAG_MASK | \
44 P4_ESCR_TAG_ENABLE | \
45 P4_ESCR_T0_OS | \
46 P4_ESCR_T0_USR)
47
48/* HT mask */
49#define P4_ESCR_MASK_HT \
50 (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR)
51
52#define P4_CCCR_OVF 0x80000000U 39#define P4_CCCR_OVF 0x80000000U
53#define P4_CCCR_CASCADE 0x40000000U 40#define P4_CCCR_CASCADE 0x40000000U
54#define P4_CCCR_OVF_PMI_T0 0x04000000U 41#define P4_CCCR_OVF_PMI_T0 0x04000000U
@@ -70,23 +57,6 @@
70#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) 57#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
71#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) 58#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
72 59
73/* Non HT mask */
74#define P4_CCCR_MASK \
75 (P4_CCCR_OVF | \
76 P4_CCCR_CASCADE | \
77 P4_CCCR_OVF_PMI_T0 | \
78 P4_CCCR_FORCE_OVF | \
79 P4_CCCR_EDGE | \
80 P4_CCCR_THRESHOLD_MASK | \
81 P4_CCCR_COMPLEMENT | \
82 P4_CCCR_COMPARE | \
83 P4_CCCR_ESCR_SELECT_MASK | \
84 P4_CCCR_ENABLE)
85
86/* HT mask */
87#define P4_CCCR_MASK_HT \
88 (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY)
89
90#define P4_GEN_ESCR_EMASK(class, name, bit) \ 60#define P4_GEN_ESCR_EMASK(class, name, bit) \
91 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) 61 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
92#define P4_ESCR_EMASK_BIT(class, name) class##__##name 62#define P4_ESCR_EMASK_BIT(class, name) class##__##name
@@ -127,6 +97,28 @@
127#define P4_CONFIG_HT_SHIFT 63 97#define P4_CONFIG_HT_SHIFT 63
128#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) 98#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
129 99
100/*
101 * The bits we allow to pass for RAW events
102 */
103#define P4_CONFIG_MASK_ESCR \
104 P4_ESCR_EVENT_MASK | \
105 P4_ESCR_EVENTMASK_MASK | \
106 P4_ESCR_TAG_MASK | \
107 P4_ESCR_TAG_ENABLE
108
109#define P4_CONFIG_MASK_CCCR \
110 P4_CCCR_EDGE | \
111 P4_CCCR_THRESHOLD_MASK | \
112 P4_CCCR_COMPLEMENT | \
113 P4_CCCR_COMPARE | \
114 P4_CCCR_THREAD_ANY | \
115 P4_CCCR_RESERVED
116
117/* some dangerous bits are reserved for kernel internals */
118#define P4_CONFIG_MASK \
119 (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \
120 (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
121
130static inline bool p4_is_event_cascaded(u64 config) 122static inline bool p4_is_event_cascaded(u64 config)
131{ 123{
132 u32 cccr = p4_config_unpack_cccr(config); 124 u32 cccr = p4_config_unpack_cccr(config);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a34c785c5a63..ada823a13c7c 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -28,6 +28,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
28extern spinlock_t pgd_lock; 28extern spinlock_t pgd_lock;
29extern struct list_head pgd_list; 29extern struct list_head pgd_list;
30 30
31extern struct mm_struct *pgd_page_get_mm(struct page *page);
32
31#ifdef CONFIG_PARAVIRT 33#ifdef CONFIG_PARAVIRT
32#include <asm/paravirt.h> 34#include <asm/paravirt.h>
33#else /* !CONFIG_PARAVIRT */ 35#else /* !CONFIG_PARAVIRT */
@@ -603,6 +605,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
603 pte_update(mm, addr, ptep); 605 pte_update(mm, addr, ptep);
604} 606}
605 607
608#define flush_tlb_fix_spurious_fault(vma, address)
609
606/* 610/*
607 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); 611 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
608 * 612 *
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 076052cd62be..f96ac9bedf75 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -102,6 +102,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
102 native_set_pgd(pgd, native_make_pgd(0)); 102 native_set_pgd(pgd, native_make_pgd(0));
103} 103}
104 104
105extern void sync_global_pgds(unsigned long start, unsigned long end);
106
105/* 107/*
106 * Conversion functions: convert a page and protection to a page entry, 108 * Conversion functions: convert a page and protection to a page entry,
107 * and a page entry and page directory to the page they refer to. 109 * and a page entry and page directory to the page they refer to.
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 325b7bdbebaa..cae9c3cb95cf 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -110,6 +110,8 @@ struct cpuinfo_x86 {
110 u16 phys_proc_id; 110 u16 phys_proc_id;
111 /* Core id: */ 111 /* Core id: */
112 u16 cpu_core_id; 112 u16 cpu_core_id;
113 /* Compute unit id */
114 u8 compute_unit_id;
113 /* Index into per_cpu list: */ 115 /* Index into per_cpu list: */
114 u16 cpu_index; 116 u16 cpu_index;
115#endif 117#endif
@@ -602,7 +604,7 @@ extern unsigned long mmu_cr4_features;
602 604
603static inline void set_in_cr4(unsigned long mask) 605static inline void set_in_cr4(unsigned long mask)
604{ 606{
605 unsigned cr4; 607 unsigned long cr4;
606 608
607 mmu_cr4_features |= mask; 609 mmu_cr4_features |= mask;
608 cr4 = read_cr4(); 610 cr4 = read_cr4();
@@ -612,7 +614,7 @@ static inline void set_in_cr4(unsigned long mask)
612 614
613static inline void clear_in_cr4(unsigned long mask) 615static inline void clear_in_cr4(unsigned long mask)
614{ 616{
615 unsigned cr4; 617 unsigned long cr4;
616 618
617 mmu_cr4_features &= ~mask; 619 mmu_cr4_features &= ~mask;
618 cr4 = read_cr4(); 620 cr4 = read_cr4();
@@ -764,29 +766,6 @@ extern unsigned long idle_halt;
764extern unsigned long idle_nomwait; 766extern unsigned long idle_nomwait;
765extern bool c1e_detected; 767extern bool c1e_detected;
766 768
767/*
768 * on systems with caches, caches must be flashed as the absolute
769 * last instruction before going into a suspended halt. Otherwise,
770 * dirty data can linger in the cache and become stale on resume,
771 * leading to strange errors.
772 *
773 * perform a variety of operations to guarantee that the compiler
774 * will not reorder instructions. wbinvd itself is serializing
775 * so the processor will not reorder.
776 *
777 * Systems without cache can just go into halt.
778 */
779static inline void wbinvd_halt(void)
780{
781 mb();
782 /* check for clflush to determine if wbinvd is legal */
783 if (cpu_has_clflush)
784 asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory");
785 else
786 while (1)
787 halt();
788}
789
790extern void enable_sep_cpu(void); 769extern void enable_sep_cpu(void);
791extern int sysenter_setup(void); 770extern int sysenter_setup(void);
792 771
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ef292c792d74..d6763b139a84 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -93,6 +93,11 @@ void *extend_brk(size_t size, size_t align);
93 : : "i" (sz)); \ 93 : : "i" (sz)); \
94 } 94 }
95 95
96/* Helper for reserving space for arrays of things */
97#define RESERVE_BRK_ARRAY(type, name, entries) \
98 type *name; \
99 RESERVE_BRK(name, sizeof(type) * entries)
100
96#ifdef __i386__ 101#ifdef __i386__
97 102
98void __init i386_start_kernel(void); 103void __init i386_start_kernel(void);
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 8085277e1b8b..977f1761a25d 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -5,17 +5,26 @@
5 5
6#ifdef CONFIG_SWIOTLB 6#ifdef CONFIG_SWIOTLB
7extern int swiotlb; 7extern int swiotlb;
8extern int __init pci_swiotlb_detect(void); 8extern int __init pci_swiotlb_detect_override(void);
9extern int __init pci_swiotlb_detect_4gb(void);
9extern void __init pci_swiotlb_init(void); 10extern void __init pci_swiotlb_init(void);
11extern void __init pci_swiotlb_late_init(void);
10#else 12#else
11#define swiotlb 0 13#define swiotlb 0
12static inline int pci_swiotlb_detect(void) 14static inline int pci_swiotlb_detect_override(void)
15{
16 return 0;
17}
18static inline int pci_swiotlb_detect_4gb(void)
13{ 19{
14 return 0; 20 return 0;
15} 21}
16static inline void pci_swiotlb_init(void) 22static inline void pci_swiotlb_init(void)
17{ 23{
18} 24}
25static inline void pci_swiotlb_late_init(void)
26{
27}
19#endif 28#endif
20 29
21static inline void dma_mark_clean(void *addr, size_t size) {} 30static inline void dma_mark_clean(void *addr, size_t size) {}
diff --git a/arch/x86/include/asm/vmi.h b/arch/x86/include/asm/vmi.h
deleted file mode 100644
index 61e08c0a2907..000000000000
--- a/arch/x86/include/asm/vmi.h
+++ /dev/null
@@ -1,269 +0,0 @@
1/*
2 * VMI interface definition
3 *
4 * Copyright (C) 2005, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Maintained by: Zachary Amsden zach@vmware.com
22 *
23 */
24#include <linux/types.h>
25
26/*
27 *---------------------------------------------------------------------
28 *
29 * VMI Option ROM API
30 *
31 *---------------------------------------------------------------------
32 */
33#define VMI_SIGNATURE 0x696d5663 /* "cVmi" */
34
35#define PCI_VENDOR_ID_VMWARE 0x15AD
36#define PCI_DEVICE_ID_VMWARE_VMI 0x0801
37
38/*
39 * We use two version numbers for compatibility, with the major
40 * number signifying interface breakages, and the minor number
41 * interface extensions.
42 */
43#define VMI_API_REV_MAJOR 3
44#define VMI_API_REV_MINOR 0
45
46#define VMI_CALL_CPUID 0
47#define VMI_CALL_WRMSR 1
48#define VMI_CALL_RDMSR 2
49#define VMI_CALL_SetGDT 3
50#define VMI_CALL_SetLDT 4
51#define VMI_CALL_SetIDT 5
52#define VMI_CALL_SetTR 6
53#define VMI_CALL_GetGDT 7
54#define VMI_CALL_GetLDT 8
55#define VMI_CALL_GetIDT 9
56#define VMI_CALL_GetTR 10
57#define VMI_CALL_WriteGDTEntry 11
58#define VMI_CALL_WriteLDTEntry 12
59#define VMI_CALL_WriteIDTEntry 13
60#define VMI_CALL_UpdateKernelStack 14
61#define VMI_CALL_SetCR0 15
62#define VMI_CALL_SetCR2 16
63#define VMI_CALL_SetCR3 17
64#define VMI_CALL_SetCR4 18
65#define VMI_CALL_GetCR0 19
66#define VMI_CALL_GetCR2 20
67#define VMI_CALL_GetCR3 21
68#define VMI_CALL_GetCR4 22
69#define VMI_CALL_WBINVD 23
70#define VMI_CALL_SetDR 24
71#define VMI_CALL_GetDR 25
72#define VMI_CALL_RDPMC 26
73#define VMI_CALL_RDTSC 27
74#define VMI_CALL_CLTS 28
75#define VMI_CALL_EnableInterrupts 29
76#define VMI_CALL_DisableInterrupts 30
77#define VMI_CALL_GetInterruptMask 31
78#define VMI_CALL_SetInterruptMask 32
79#define VMI_CALL_IRET 33
80#define VMI_CALL_SYSEXIT 34
81#define VMI_CALL_Halt 35
82#define VMI_CALL_Reboot 36
83#define VMI_CALL_Shutdown 37
84#define VMI_CALL_SetPxE 38
85#define VMI_CALL_SetPxELong 39
86#define VMI_CALL_UpdatePxE 40
87#define VMI_CALL_UpdatePxELong 41
88#define VMI_CALL_MachineToPhysical 42
89#define VMI_CALL_PhysicalToMachine 43
90#define VMI_CALL_AllocatePage 44
91#define VMI_CALL_ReleasePage 45
92#define VMI_CALL_InvalPage 46
93#define VMI_CALL_FlushTLB 47
94#define VMI_CALL_SetLinearMapping 48
95
96#define VMI_CALL_SetIOPLMask 61
97#define VMI_CALL_SetInitialAPState 62
98#define VMI_CALL_APICWrite 63
99#define VMI_CALL_APICRead 64
100#define VMI_CALL_IODelay 65
101#define VMI_CALL_SetLazyMode 73
102
103/*
104 *---------------------------------------------------------------------
105 *
106 * MMU operation flags
107 *
108 *---------------------------------------------------------------------
109 */
110
111/* Flags used by VMI_{Allocate|Release}Page call */
112#define VMI_PAGE_PAE 0x10 /* Allocate PAE shadow */
113#define VMI_PAGE_CLONE 0x20 /* Clone from another shadow */
114#define VMI_PAGE_ZEROED 0x40 /* Page is pre-zeroed */
115
116
117/* Flags shared by Allocate|Release Page and PTE updates */
118#define VMI_PAGE_PT 0x01
119#define VMI_PAGE_PD 0x02
120#define VMI_PAGE_PDP 0x04
121#define VMI_PAGE_PML4 0x08
122
123#define VMI_PAGE_NORMAL 0x00 /* for debugging */
124
125/* Flags used by PTE updates */
126#define VMI_PAGE_CURRENT_AS 0x10 /* implies VMI_PAGE_VA_MASK is valid */
127#define VMI_PAGE_DEFER 0x20 /* may queue update until TLB inval */
128#define VMI_PAGE_VA_MASK 0xfffff000
129
130#ifdef CONFIG_X86_PAE
131#define VMI_PAGE_L1 (VMI_PAGE_PT | VMI_PAGE_PAE | VMI_PAGE_ZEROED)
132#define VMI_PAGE_L2 (VMI_PAGE_PD | VMI_PAGE_PAE | VMI_PAGE_ZEROED)
133#else
134#define VMI_PAGE_L1 (VMI_PAGE_PT | VMI_PAGE_ZEROED)
135#define VMI_PAGE_L2 (VMI_PAGE_PD | VMI_PAGE_ZEROED)
136#endif
137
138/* Flags used by VMI_FlushTLB call */
139#define VMI_FLUSH_TLB 0x01
140#define VMI_FLUSH_GLOBAL 0x02
141
142/*
143 *---------------------------------------------------------------------
144 *
145 * VMI relocation definitions for ROM call get_reloc
146 *
147 *---------------------------------------------------------------------
148 */
149
150/* VMI Relocation types */
151#define VMI_RELOCATION_NONE 0
152#define VMI_RELOCATION_CALL_REL 1
153#define VMI_RELOCATION_JUMP_REL 2
154#define VMI_RELOCATION_NOP 3
155
156#ifndef __ASSEMBLY__
157struct vmi_relocation_info {
158 unsigned char *eip;
159 unsigned char type;
160 unsigned char reserved[3];
161};
162#endif
163
164
165/*
166 *---------------------------------------------------------------------
167 *
168 * Generic ROM structures and definitions
169 *
170 *---------------------------------------------------------------------
171 */
172
173#ifndef __ASSEMBLY__
174
175struct vrom_header {
176 u16 rom_signature; /* option ROM signature */
177 u8 rom_length; /* ROM length in 512 byte chunks */
178 u8 rom_entry[4]; /* 16-bit code entry point */
179 u8 rom_pad0; /* 4-byte align pad */
180 u32 vrom_signature; /* VROM identification signature */
181 u8 api_version_min;/* Minor version of API */
182 u8 api_version_maj;/* Major version of API */
183 u8 jump_slots; /* Number of jump slots */
184 u8 reserved1; /* Reserved for expansion */
185 u32 virtual_top; /* Hypervisor virtual address start */
186 u16 reserved2; /* Reserved for expansion */
187 u16 license_offs; /* Offset to License string */
188 u16 pci_header_offs;/* Offset to PCI OPROM header */
189 u16 pnp_header_offs;/* Offset to PnP OPROM header */
190 u32 rom_pad3; /* PnP reserverd / VMI reserved */
191 u8 reserved[96]; /* Reserved for headers */
192 char vmi_init[8]; /* VMI_Init jump point */
193 char get_reloc[8]; /* VMI_GetRelocationInfo jump point */
194} __attribute__((packed));
195
196struct pnp_header {
197 char sig[4];
198 char rev;
199 char size;
200 short next;
201 short res;
202 long devID;
203 unsigned short manufacturer_offset;
204 unsigned short product_offset;
205} __attribute__((packed));
206
207struct pci_header {
208 char sig[4];
209 short vendorID;
210 short deviceID;
211 short vpdData;
212 short size;
213 char rev;
214 char class;
215 char subclass;
216 char interface;
217 short chunks;
218 char rom_version_min;
219 char rom_version_maj;
220 char codetype;
221 char lastRom;
222 short reserved;
223} __attribute__((packed));
224
225/* Function prototypes for bootstrapping */
226#ifdef CONFIG_VMI
227extern void vmi_init(void);
228extern void vmi_activate(void);
229extern void vmi_bringup(void);
230#else
231static inline void vmi_init(void) {}
232static inline void vmi_activate(void) {}
233static inline void vmi_bringup(void) {}
234#endif
235
236/* State needed to start an application processor in an SMP system. */
237struct vmi_ap_state {
238 u32 cr0;
239 u32 cr2;
240 u32 cr3;
241 u32 cr4;
242
243 u64 efer;
244
245 u32 eip;
246 u32 eflags;
247 u32 eax;
248 u32 ebx;
249 u32 ecx;
250 u32 edx;
251 u32 esp;
252 u32 ebp;
253 u32 esi;
254 u32 edi;
255 u16 cs;
256 u16 ss;
257 u16 ds;
258 u16 es;
259 u16 fs;
260 u16 gs;
261 u16 ldtr;
262
263 u16 gdtr_limit;
264 u32 gdtr_base;
265 u32 idtr_base;
266 u16 idtr_limit;
267};
268
269#endif
diff --git a/arch/x86/include/asm/vmi_time.h b/arch/x86/include/asm/vmi_time.h
deleted file mode 100644
index c6e0bee93e3c..000000000000
--- a/arch/x86/include/asm/vmi_time.h
+++ /dev/null
@@ -1,98 +0,0 @@
1/*
2 * VMI Time wrappers
3 *
4 * Copyright (C) 2006, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Send feedback to dhecht@vmware.com
22 *
23 */
24
25#ifndef _ASM_X86_VMI_TIME_H
26#define _ASM_X86_VMI_TIME_H
27
28/*
29 * Raw VMI call indices for timer functions
30 */
31#define VMI_CALL_GetCycleFrequency 66
32#define VMI_CALL_GetCycleCounter 67
33#define VMI_CALL_SetAlarm 68
34#define VMI_CALL_CancelAlarm 69
35#define VMI_CALL_GetWallclockTime 70
36#define VMI_CALL_WallclockUpdated 71
37
38/* Cached VMI timer operations */
39extern struct vmi_timer_ops {
40 u64 (*get_cycle_frequency)(void);
41 u64 (*get_cycle_counter)(int);
42 u64 (*get_wallclock)(void);
43 int (*wallclock_updated)(void);
44 void (*set_alarm)(u32 flags, u64 expiry, u64 period);
45 void (*cancel_alarm)(u32 flags);
46} vmi_timer_ops;
47
48/* Prototypes */
49extern void __init vmi_time_init(void);
50extern unsigned long vmi_get_wallclock(void);
51extern int vmi_set_wallclock(unsigned long now);
52extern unsigned long long vmi_sched_clock(void);
53extern unsigned long vmi_tsc_khz(void);
54
55#ifdef CONFIG_X86_LOCAL_APIC
56extern void __devinit vmi_time_bsp_init(void);
57extern void __devinit vmi_time_ap_init(void);
58#endif
59
60/*
61 * When run under a hypervisor, a vcpu is always in one of three states:
62 * running, halted, or ready. The vcpu is in the 'running' state if it
63 * is executing. When the vcpu executes the halt interface, the vcpu
64 * enters the 'halted' state and remains halted until there is some work
65 * pending for the vcpu (e.g. an alarm expires, host I/O completes on
66 * behalf of virtual I/O). At this point, the vcpu enters the 'ready'
67 * state (waiting for the hypervisor to reschedule it). Finally, at any
68 * time when the vcpu is not in the 'running' state nor the 'halted'
69 * state, it is in the 'ready' state.
70 *
71 * Real time is advances while the vcpu is 'running', 'ready', or
72 * 'halted'. Stolen time is the time in which the vcpu is in the
73 * 'ready' state. Available time is the remaining time -- the vcpu is
74 * either 'running' or 'halted'.
75 *
76 * All three views of time are accessible through the VMI cycle
77 * counters.
78 */
79
80/* The cycle counters. */
81#define VMI_CYCLES_REAL 0
82#define VMI_CYCLES_AVAILABLE 1
83#define VMI_CYCLES_STOLEN 2
84
85/* The alarm interface 'flags' bits */
86#define VMI_ALARM_COUNTERS 2
87
88#define VMI_ALARM_COUNTER_MASK 0x000000ff
89
90#define VMI_ALARM_WIRED_IRQ0 0x00000000
91#define VMI_ALARM_WIRED_LVTT 0x00010000
92
93#define VMI_ALARM_IS_ONESHOT 0x00000000
94#define VMI_ALARM_IS_PERIODIC 0x00000100
95
96#define CONFIG_VMI_ALARM_HZ 100
97
98#endif /* _ASM_X86_VMI_TIME_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index fedf32a8c3ec..2c833d8c4141 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -34,7 +34,8 @@ GCOV_PROFILE_paravirt.o := n
34obj-y := process_$(BITS).o signal.o entry_$(BITS).o 34obj-y := process_$(BITS).o signal.o entry_$(BITS).o
35obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 35obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
36obj-y += time.o ioport.o ldt.o dumpstack.o 36obj-y += time.o ioport.o ldt.o dumpstack.o
37obj-y += setup.o x86_init.o i8259.o irqinit.o 37obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
38obj-$(CONFIG_IRQ_WORK) += irq_work.o
38obj-$(CONFIG_X86_VISWS) += visws_quirks.o 39obj-$(CONFIG_X86_VISWS) += visws_quirks.o
39obj-$(CONFIG_X86_32) += probe_roms_32.o 40obj-$(CONFIG_X86_32) += probe_roms_32.o
40obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 41obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
@@ -44,6 +45,7 @@ obj-y += bootflag.o e820.o
44obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o 45obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
45obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o 46obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
46obj-y += tsc.o io_delay.o rtc.o 47obj-y += tsc.o io_delay.o rtc.o
48obj-y += pci-iommu_table.o
47 49
48obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 50obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
49obj-y += process.o 51obj-y += process.o
@@ -85,15 +87,15 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
85obj-$(CONFIG_KGDB) += kgdb.o 87obj-$(CONFIG_KGDB) += kgdb.o
86obj-$(CONFIG_VM86) += vm86_32.o 88obj-$(CONFIG_VM86) += vm86_32.o
87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 89obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
90obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o
88 91
89obj-$(CONFIG_HPET_TIMER) += hpet.o 92obj-$(CONFIG_HPET_TIMER) += hpet.o
90obj-$(CONFIG_APB_TIMER) += apb_timer.o 93obj-$(CONFIG_APB_TIMER) += apb_timer.o
91 94
92obj-$(CONFIG_K8_NB) += k8.o 95obj-$(CONFIG_AMD_NB) += amd_nb.o
93obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o 96obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
94obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o 97obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
95 98
96obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
97obj-$(CONFIG_KVM_GUEST) += kvm.o 99obj-$(CONFIG_KVM_GUEST) += kvm.o
98obj-$(CONFIG_KVM_CLOCK) += kvmclock.o 100obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
99obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o 101obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
@@ -106,6 +108,7 @@ obj-$(CONFIG_SCx200) += scx200.o
106scx200-y += scx200_32.o 108scx200-y += scx200_32.o
107 109
108obj-$(CONFIG_OLPC) += olpc.o 110obj-$(CONFIG_OLPC) += olpc.o
111obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o
109obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o 112obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o
110obj-$(CONFIG_X86_MRST) += mrst.o 113obj-$(CONFIG_X86_MRST) += mrst.o
111 114
@@ -122,7 +125,6 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
122# 64 bit specific files 125# 64 bit specific files
123ifeq ($(CONFIG_X86_64),y) 126ifeq ($(CONFIG_X86_64),y)
124 obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o 127 obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
125 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
126 obj-$(CONFIG_AUDIT) += audit_64.o 128 obj-$(CONFIG_AUDIT) += audit_64.o
127 129
128 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o 130 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index fb16f17e59be..5812404a0d4c 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -13,6 +13,7 @@
13 13
14#include <acpi/processor.h> 14#include <acpi/processor.h>
15#include <asm/acpi.h> 15#include <asm/acpi.h>
16#include <asm/mwait.h>
16 17
17/* 18/*
18 * Initialize bm_flags based on the CPU cache properties 19 * Initialize bm_flags based on the CPU cache properties
@@ -65,16 +66,6 @@ static struct cstate_entry __percpu *cpu_cstate_entry; /* per CPU ptr */
65 66
66static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; 67static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];
67 68
68#define MWAIT_SUBSTATE_MASK (0xf)
69#define MWAIT_CSTATE_MASK (0xf)
70#define MWAIT_SUBSTATE_SIZE (4)
71
72#define CPUID_MWAIT_LEAF (5)
73#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
74#define CPUID5_ECX_INTERRUPT_BREAK (0x2)
75
76#define MWAIT_ECX_INTERRUPT_BREAK (0x1)
77
78#define NATIVE_CSTATE_BEYOND_HALT (2) 69#define NATIVE_CSTATE_BEYOND_HALT (2)
79 70
80static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) 71static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 33cec152070d..e1252074ea40 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -7,6 +7,7 @@
7 7
8#include <linux/acpi.h> 8#include <linux/acpi.h>
9#include <linux/bootmem.h> 9#include <linux/bootmem.h>
10#include <linux/memblock.h>
10#include <linux/dmi.h> 11#include <linux/dmi.h>
11#include <linux/cpumask.h> 12#include <linux/cpumask.h>
12#include <asm/segment.h> 13#include <asm/segment.h>
@@ -125,7 +126,7 @@ void acpi_restore_state_mem(void)
125 */ 126 */
126void __init acpi_reserve_wakeup_memory(void) 127void __init acpi_reserve_wakeup_memory(void)
127{ 128{
128 unsigned long mem; 129 phys_addr_t mem;
129 130
130 if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { 131 if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) {
131 printk(KERN_ERR 132 printk(KERN_ERR
@@ -133,15 +134,15 @@ void __init acpi_reserve_wakeup_memory(void)
133 return; 134 return;
134 } 135 }
135 136
136 mem = find_e820_area(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE); 137 mem = memblock_find_in_range(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE);
137 138
138 if (mem == -1L) { 139 if (mem == MEMBLOCK_ERROR) {
139 printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); 140 printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
140 return; 141 return;
141 } 142 }
142 acpi_realmode = (unsigned long) phys_to_virt(mem); 143 acpi_realmode = (unsigned long) phys_to_virt(mem);
143 acpi_wakeup_address = mem; 144 acpi_wakeup_address = mem;
144 reserve_early(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); 145 memblock_x86_reserve_range(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP");
145} 146}
146 147
147 148
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f65ab8b014c4..a36bb90aef53 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
195 195
196extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 196extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
197extern s32 __smp_locks[], __smp_locks_end[]; 197extern s32 __smp_locks[], __smp_locks_end[];
198static void *text_poke_early(void *addr, const void *opcode, size_t len); 198void *text_poke_early(void *addr, const void *opcode, size_t len);
199 199
200/* Replace instructions with better alternatives for this CPU type. 200/* Replace instructions with better alternatives for this CPU type.
201 This runs before SMP is initialized to avoid SMP problems with 201 This runs before SMP is initialized to avoid SMP problems with
@@ -522,7 +522,7 @@ void __init alternative_instructions(void)
522 * instructions. And on the local CPU you need to be protected again NMI or MCE 522 * instructions. And on the local CPU you need to be protected again NMI or MCE
523 * handlers seeing an inconsistent instruction while you patch. 523 * handlers seeing an inconsistent instruction while you patch.
524 */ 524 */
525static void *__init_or_module text_poke_early(void *addr, const void *opcode, 525void *__init_or_module text_poke_early(void *addr, const void *opcode,
526 size_t len) 526 size_t len)
527{ 527{
528 unsigned long flags; 528 unsigned long flags;
@@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
637 tpp.len = len; 637 tpp.len = len;
638 atomic_set(&stop_machine_first, 1); 638 atomic_set(&stop_machine_first, 1);
639 wrote_text = 0; 639 wrote_text = 0;
640 stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); 640 /* Use __stop_machine() because the caller already got online_cpus. */
641 __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
641 return addr; 642 return addr;
642} 643}
643 644
645#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
646
647unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
648
649void __init arch_init_ideal_nop5(void)
650{
651 extern const unsigned char ftrace_test_p6nop[];
652 extern const unsigned char ftrace_test_nop5[];
653 extern const unsigned char ftrace_test_jmp[];
654 int faulted = 0;
655
656 /*
657 * There is no good nop for all x86 archs.
658 * We will default to using the P6_NOP5, but first we
659 * will test to make sure that the nop will actually
660 * work on this CPU. If it faults, we will then
661 * go to a lesser efficient 5 byte nop. If that fails
662 * we then just use a jmp as our nop. This isn't the most
663 * efficient nop, but we can not use a multi part nop
664 * since we would then risk being preempted in the middle
665 * of that nop, and if we enabled tracing then, it might
666 * cause a system crash.
667 *
668 * TODO: check the cpuid to determine the best nop.
669 */
670 asm volatile (
671 "ftrace_test_jmp:"
672 "jmp ftrace_test_p6nop\n"
673 "nop\n"
674 "nop\n"
675 "nop\n" /* 2 byte jmp + 3 bytes */
676 "ftrace_test_p6nop:"
677 P6_NOP5
678 "jmp 1f\n"
679 "ftrace_test_nop5:"
680 ".byte 0x66,0x66,0x66,0x66,0x90\n"
681 "1:"
682 ".section .fixup, \"ax\"\n"
683 "2: movl $1, %0\n"
684 " jmp ftrace_test_nop5\n"
685 "3: movl $2, %0\n"
686 " jmp 1b\n"
687 ".previous\n"
688 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
689 _ASM_EXTABLE(ftrace_test_nop5, 3b)
690 : "=r"(faulted) : "0" (faulted));
691
692 switch (faulted) {
693 case 0:
694 pr_info("converting mcount calls to 0f 1f 44 00 00\n");
695 memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
696 break;
697 case 1:
698 pr_info("converting mcount calls to 66 66 66 66 90\n");
699 memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
700 break;
701 case 2:
702 pr_info("converting mcount calls to jmp . + 5\n");
703 memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
704 break;
705 }
706
707}
708#endif
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 679b6450382b..d2fdb0826df2 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 5a170cbbbed8..6e11c8134158 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
@@ -31,7 +31,7 @@
31#include <asm/iommu.h> 31#include <asm/iommu.h>
32#include <asm/gart.h> 32#include <asm/gart.h>
33#include <asm/x86_init.h> 33#include <asm/x86_init.h>
34 34#include <asm/iommu_table.h>
35/* 35/*
36 * definitions for the ACPI scanning code 36 * definitions for the ACPI scanning code
37 */ 37 */
@@ -194,6 +194,39 @@ static inline unsigned long tbl_size(int entry_size)
194 return 1UL << shift; 194 return 1UL << shift;
195} 195}
196 196
197/* Access to l1 and l2 indexed register spaces */
198
199static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
200{
201 u32 val;
202
203 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
204 pci_read_config_dword(iommu->dev, 0xfc, &val);
205 return val;
206}
207
208static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
209{
210 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
211 pci_write_config_dword(iommu->dev, 0xfc, val);
212 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
213}
214
215static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
216{
217 u32 val;
218
219 pci_write_config_dword(iommu->dev, 0xf0, address);
220 pci_read_config_dword(iommu->dev, 0xf4, &val);
221 return val;
222}
223
224static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
225{
226 pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
227 pci_write_config_dword(iommu->dev, 0xf4, val);
228}
229
197/**************************************************************************** 230/****************************************************************************
198 * 231 *
199 * AMD IOMMU MMIO register space handling functions 232 * AMD IOMMU MMIO register space handling functions
@@ -619,6 +652,7 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
619{ 652{
620 int cap_ptr = iommu->cap_ptr; 653 int cap_ptr = iommu->cap_ptr;
621 u32 range, misc; 654 u32 range, misc;
655 int i, j;
622 656
623 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, 657 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
624 &iommu->cap); 658 &iommu->cap);
@@ -633,12 +667,29 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
633 MMIO_GET_LD(range)); 667 MMIO_GET_LD(range));
634 iommu->evt_msi_num = MMIO_MSI_NUM(misc); 668 iommu->evt_msi_num = MMIO_MSI_NUM(misc);
635 669
636 if (is_rd890_iommu(iommu->dev)) { 670 if (!is_rd890_iommu(iommu->dev))
637 pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]); 671 return;
638 pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]); 672
639 pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]); 673 /*
640 pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]); 674 * Some rd890 systems may not be fully reconfigured by the BIOS, so
641 } 675 * it's necessary for us to store this information so it can be
676 * reprogrammed on resume
677 */
678
679 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
680 &iommu->stored_addr_lo);
681 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
682 &iommu->stored_addr_hi);
683
684 /* Low bit locks writes to configuration space */
685 iommu->stored_addr_lo &= ~1;
686
687 for (i = 0; i < 6; i++)
688 for (j = 0; j < 0x12; j++)
689 iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
690
691 for (i = 0; i < 0x83; i++)
692 iommu->stored_l2[i] = iommu_read_l2(iommu, i);
642} 693}
643 694
644/* 695/*
@@ -1127,14 +1178,53 @@ static void iommu_init_flags(struct amd_iommu *iommu)
1127 iommu_feature_enable(iommu, CONTROL_COHERENT_EN); 1178 iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
1128} 1179}
1129 1180
1130static void iommu_apply_quirks(struct amd_iommu *iommu) 1181static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
1131{ 1182{
1132 if (is_rd890_iommu(iommu->dev)) { 1183 int i, j;
1133 pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]); 1184 u32 ioc_feature_control;
1134 pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]); 1185 struct pci_dev *pdev = NULL;
1135 pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]); 1186
1136 pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]); 1187 /* RD890 BIOSes may not have completely reconfigured the iommu */
1137 } 1188 if (!is_rd890_iommu(iommu->dev))
1189 return;
1190
1191 /*
1192 * First, we need to ensure that the iommu is enabled. This is
1193 * controlled by a register in the northbridge
1194 */
1195 pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0));
1196
1197 if (!pdev)
1198 return;
1199
1200 /* Select Northbridge indirect register 0x75 and enable writing */
1201 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
1202 pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
1203
1204 /* Enable the iommu */
1205 if (!(ioc_feature_control & 0x1))
1206 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
1207
1208 pci_dev_put(pdev);
1209
1210 /* Restore the iommu BAR */
1211 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
1212 iommu->stored_addr_lo);
1213 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
1214 iommu->stored_addr_hi);
1215
1216 /* Restore the l1 indirect regs for each of the 6 l1s */
1217 for (i = 0; i < 6; i++)
1218 for (j = 0; j < 0x12; j++)
1219 iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
1220
1221 /* Restore the l2 indirect regs */
1222 for (i = 0; i < 0x83; i++)
1223 iommu_write_l2(iommu, i, iommu->stored_l2[i]);
1224
1225 /* Lock PCI setup registers */
1226 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
1227 iommu->stored_addr_lo | 1);
1138} 1228}
1139 1229
1140/* 1230/*
@@ -1147,7 +1237,6 @@ static void enable_iommus(void)
1147 1237
1148 for_each_iommu(iommu) { 1238 for_each_iommu(iommu) {
1149 iommu_disable(iommu); 1239 iommu_disable(iommu);
1150 iommu_apply_quirks(iommu);
1151 iommu_init_flags(iommu); 1240 iommu_init_flags(iommu);
1152 iommu_set_device_table(iommu); 1241 iommu_set_device_table(iommu);
1153 iommu_enable_command_buffer(iommu); 1242 iommu_enable_command_buffer(iommu);
@@ -1173,6 +1262,11 @@ static void disable_iommus(void)
1173 1262
1174static int amd_iommu_resume(struct sys_device *dev) 1263static int amd_iommu_resume(struct sys_device *dev)
1175{ 1264{
1265 struct amd_iommu *iommu;
1266
1267 for_each_iommu(iommu)
1268 iommu_apply_resume_quirks(iommu);
1269
1176 /* re-load the hardware */ 1270 /* re-load the hardware */
1177 enable_iommus(); 1271 enable_iommus();
1178 1272
@@ -1405,13 +1499,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table)
1405 return 0; 1499 return 0;
1406} 1500}
1407 1501
1408void __init amd_iommu_detect(void) 1502int __init amd_iommu_detect(void)
1409{ 1503{
1410 if (no_iommu || (iommu_detected && !gart_iommu_aperture)) 1504 if (no_iommu || (iommu_detected && !gart_iommu_aperture))
1411 return; 1505 return -ENODEV;
1412 1506
1413 if (amd_iommu_disabled) 1507 if (amd_iommu_disabled)
1414 return; 1508 return -ENODEV;
1415 1509
1416 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { 1510 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
1417 iommu_detected = 1; 1511 iommu_detected = 1;
@@ -1420,7 +1514,9 @@ void __init amd_iommu_detect(void)
1420 1514
1421 /* Make sure ACS will be enabled */ 1515 /* Make sure ACS will be enabled */
1422 pci_request_acs(); 1516 pci_request_acs();
1517 return 1;
1423 } 1518 }
1519 return -ENODEV;
1424} 1520}
1425 1521
1426/**************************************************************************** 1522/****************************************************************************
@@ -1451,3 +1547,8 @@ static int __init parse_amd_iommu_options(char *str)
1451 1547
1452__setup("amd_iommu_dump", parse_amd_iommu_dump); 1548__setup("amd_iommu_dump", parse_amd_iommu_dump);
1453__setup("amd_iommu=", parse_amd_iommu_options); 1549__setup("amd_iommu=", parse_amd_iommu_options);
1550
1551IOMMU_INIT_FINISH(amd_iommu_detect,
1552 gart_iommu_hole_init,
1553 0,
1554 0);
diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/amd_nb.c
index 0f7bc20cfcde..8f6463d8ed0d 100644
--- a/arch/x86/kernel/k8.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -8,21 +8,19 @@
8#include <linux/errno.h> 8#include <linux/errno.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/spinlock.h> 10#include <linux/spinlock.h>
11#include <asm/k8.h> 11#include <asm/amd_nb.h>
12
13int num_k8_northbridges;
14EXPORT_SYMBOL(num_k8_northbridges);
15 12
16static u32 *flush_words; 13static u32 *flush_words;
17 14
18struct pci_device_id k8_nb_ids[] = { 15struct pci_device_id k8_nb_ids[] = {
19 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, 16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, 17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
21 {} 19 {}
22}; 20};
23EXPORT_SYMBOL(k8_nb_ids); 21EXPORT_SYMBOL(k8_nb_ids);
24 22
25struct pci_dev **k8_northbridges; 23struct k8_northbridge_info k8_northbridges;
26EXPORT_SYMBOL(k8_northbridges); 24EXPORT_SYMBOL(k8_northbridges);
27 25
28static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) 26static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
@@ -40,36 +38,45 @@ int cache_k8_northbridges(void)
40 int i; 38 int i;
41 struct pci_dev *dev; 39 struct pci_dev *dev;
42 40
43 if (num_k8_northbridges) 41 if (k8_northbridges.num)
44 return 0; 42 return 0;
45 43
46 dev = NULL; 44 dev = NULL;
47 while ((dev = next_k8_northbridge(dev)) != NULL) 45 while ((dev = next_k8_northbridge(dev)) != NULL)
48 num_k8_northbridges++; 46 k8_northbridges.num++;
47
48 /* some CPU families (e.g. family 0x11) do not support GART */
49 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
50 boot_cpu_data.x86 == 0x15)
51 k8_northbridges.gart_supported = 1;
49 52
50 k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *), 53 k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) *
51 GFP_KERNEL); 54 sizeof(void *), GFP_KERNEL);
52 if (!k8_northbridges) 55 if (!k8_northbridges.nb_misc)
53 return -ENOMEM; 56 return -ENOMEM;
54 57
55 if (!num_k8_northbridges) { 58 if (!k8_northbridges.num) {
56 k8_northbridges[0] = NULL; 59 k8_northbridges.nb_misc[0] = NULL;
57 return 0; 60 return 0;
58 } 61 }
59 62
60 flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL); 63 if (k8_northbridges.gart_supported) {
61 if (!flush_words) { 64 flush_words = kmalloc(k8_northbridges.num * sizeof(u32),
62 kfree(k8_northbridges); 65 GFP_KERNEL);
63 return -ENOMEM; 66 if (!flush_words) {
67 kfree(k8_northbridges.nb_misc);
68 return -ENOMEM;
69 }
64 } 70 }
65 71
66 dev = NULL; 72 dev = NULL;
67 i = 0; 73 i = 0;
68 while ((dev = next_k8_northbridge(dev)) != NULL) { 74 while ((dev = next_k8_northbridge(dev)) != NULL) {
69 k8_northbridges[i] = dev; 75 k8_northbridges.nb_misc[i] = dev;
70 pci_read_config_dword(dev, 0x9c, &flush_words[i++]); 76 if (k8_northbridges.gart_supported)
77 pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
71 } 78 }
72 k8_northbridges[i] = NULL; 79 k8_northbridges.nb_misc[i] = NULL;
73 return 0; 80 return 0;
74} 81}
75EXPORT_SYMBOL_GPL(cache_k8_northbridges); 82EXPORT_SYMBOL_GPL(cache_k8_northbridges);
@@ -93,22 +100,25 @@ void k8_flush_garts(void)
93 unsigned long flags; 100 unsigned long flags;
94 static DEFINE_SPINLOCK(gart_lock); 101 static DEFINE_SPINLOCK(gart_lock);
95 102
103 if (!k8_northbridges.gart_supported)
104 return;
105
96 /* Avoid races between AGP and IOMMU. In theory it's not needed 106 /* Avoid races between AGP and IOMMU. In theory it's not needed
97 but I'm not sure if the hardware won't lose flush requests 107 but I'm not sure if the hardware won't lose flush requests
98 when another is pending. This whole thing is so expensive anyways 108 when another is pending. This whole thing is so expensive anyways
99 that it doesn't matter to serialize more. -AK */ 109 that it doesn't matter to serialize more. -AK */
100 spin_lock_irqsave(&gart_lock, flags); 110 spin_lock_irqsave(&gart_lock, flags);
101 flushed = 0; 111 flushed = 0;
102 for (i = 0; i < num_k8_northbridges; i++) { 112 for (i = 0; i < k8_northbridges.num; i++) {
103 pci_write_config_dword(k8_northbridges[i], 0x9c, 113 pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c,
104 flush_words[i]|1); 114 flush_words[i]|1);
105 flushed++; 115 flushed++;
106 } 116 }
107 for (i = 0; i < num_k8_northbridges; i++) { 117 for (i = 0; i < k8_northbridges.num; i++) {
108 u32 w; 118 u32 w;
109 /* Make sure the hardware actually executed the flush*/ 119 /* Make sure the hardware actually executed the flush*/
110 for (;;) { 120 for (;;) {
111 pci_read_config_dword(k8_northbridges[i], 121 pci_read_config_dword(k8_northbridges.nb_misc[i],
112 0x9c, &w); 122 0x9c, &w);
113 if (!(w & 1)) 123 if (!(w & 1))
114 break; 124 break;
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 8dd77800ff5d..92543c73cf8e 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -231,34 +231,6 @@ static void apbt_restart_clocksource(struct clocksource *cs)
231 apbt_start_counter(phy_cs_timer_id); 231 apbt_start_counter(phy_cs_timer_id);
232} 232}
233 233
234/* Setup IRQ routing via IOAPIC */
235#ifdef CONFIG_SMP
236static void apbt_setup_irq(struct apbt_dev *adev)
237{
238 struct irq_chip *chip;
239 struct irq_desc *desc;
240
241 /* timer0 irq has been setup early */
242 if (adev->irq == 0)
243 return;
244 desc = irq_to_desc(adev->irq);
245 chip = get_irq_chip(adev->irq);
246 disable_irq(adev->irq);
247 desc->status |= IRQ_MOVE_PCNTXT;
248 irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
249 /* APB timer irqs are set up as mp_irqs, timer is edge triggerred */
250 set_irq_chip_and_handler_name(adev->irq, chip, handle_edge_irq, "edge");
251 enable_irq(adev->irq);
252 if (system_state == SYSTEM_BOOTING)
253 if (request_irq(adev->irq, apbt_interrupt_handler,
254 IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
255 adev->name, adev)) {
256 printk(KERN_ERR "Failed request IRQ for APBT%d\n",
257 adev->num);
258 }
259}
260#endif
261
262static void apbt_enable_int(int n) 234static void apbt_enable_int(int n)
263{ 235{
264 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); 236 unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL);
@@ -334,6 +306,27 @@ static int __init apbt_clockevent_register(void)
334} 306}
335 307
336#ifdef CONFIG_SMP 308#ifdef CONFIG_SMP
309
310static void apbt_setup_irq(struct apbt_dev *adev)
311{
312 /* timer0 irq has been setup early */
313 if (adev->irq == 0)
314 return;
315
316 if (system_state == SYSTEM_BOOTING) {
317 irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
318 /* APB timer irqs are set up as mp_irqs, timer is edge type */
319 __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge");
320 if (request_irq(adev->irq, apbt_interrupt_handler,
321 IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING,
322 adev->name, adev)) {
323 printk(KERN_ERR "Failed request IRQ for APBT%d\n",
324 adev->num);
325 }
326 } else
327 enable_irq(adev->irq);
328}
329
337/* Should be called with per cpu */ 330/* Should be called with per cpu */
338void apbt_setup_secondary_clock(void) 331void apbt_setup_secondary_clock(void)
339{ 332{
@@ -343,7 +336,7 @@ void apbt_setup_secondary_clock(void)
343 336
344 /* Don't register boot CPU clockevent */ 337 /* Don't register boot CPU clockevent */
345 cpu = smp_processor_id(); 338 cpu = smp_processor_id();
346 if (cpu == boot_cpu_id) 339 if (!cpu)
347 return; 340 return;
348 /* 341 /*
349 * We need to calculate the scaled math multiplication factor for 342 * We need to calculate the scaled math multiplication factor for
@@ -389,16 +382,17 @@ static int apbt_cpuhp_notify(struct notifier_block *n,
389 382
390 switch (action & 0xf) { 383 switch (action & 0xf) {
391 case CPU_DEAD: 384 case CPU_DEAD:
385 disable_irq(adev->irq);
392 apbt_disable_int(cpu); 386 apbt_disable_int(cpu);
393 if (system_state == SYSTEM_RUNNING) 387 if (system_state == SYSTEM_RUNNING) {
394 pr_debug("skipping APBT CPU %lu offline\n", cpu); 388 pr_debug("skipping APBT CPU %lu offline\n", cpu);
395 else if (adev) { 389 } else if (adev) {
396 pr_debug("APBT clockevent for cpu %lu offline\n", cpu); 390 pr_debug("APBT clockevent for cpu %lu offline\n", cpu);
397 free_irq(adev->irq, adev); 391 free_irq(adev->irq, adev);
398 } 392 }
399 break; 393 break;
400 default: 394 default:
401 pr_debug(KERN_INFO "APBT notified %lu, no action\n", action); 395 pr_debug("APBT notified %lu, no action\n", action);
402 } 396 }
403 return NOTIFY_OK; 397 return NOTIFY_OK;
404} 398}
@@ -552,7 +546,7 @@ bad_count:
552 pr_debug("APB CS going back %lx:%lx:%lx ", 546 pr_debug("APB CS going back %lx:%lx:%lx ",
553 t2, last_read, t2 - last_read); 547 t2, last_read, t2 - last_read);
554bad_count_x3: 548bad_count_x3:
555 pr_debug(KERN_INFO "tripple check enforced\n"); 549 pr_debug("triple check enforced\n");
556 t0 = apbt_readl(phy_cs_timer_id, 550 t0 = apbt_readl(phy_cs_timer_id,
557 APBTMR_N_CURRENT_VALUE); 551 APBTMR_N_CURRENT_VALUE);
558 udelay(1); 552 udelay(1);
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index a2e0caf26e17..b3a16e8f0703 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -27,7 +27,7 @@
27#include <asm/gart.h> 27#include <asm/gart.h>
28#include <asm/pci-direct.h> 28#include <asm/pci-direct.h>
29#include <asm/dma.h> 29#include <asm/dma.h>
30#include <asm/k8.h> 30#include <asm/amd_nb.h>
31#include <asm/x86_init.h> 31#include <asm/x86_init.h>
32 32
33int gart_iommu_aperture; 33int gart_iommu_aperture;
@@ -307,7 +307,7 @@ void __init early_gart_iommu_check(void)
307 continue; 307 continue;
308 308
309 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); 309 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
310 aper_enabled = ctl & AMD64_GARTEN; 310 aper_enabled = ctl & GARTEN;
311 aper_order = (ctl >> 1) & 7; 311 aper_order = (ctl >> 1) & 7;
312 aper_size = (32 * 1024 * 1024) << aper_order; 312 aper_size = (32 * 1024 * 1024) << aper_order;
313 aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; 313 aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
@@ -362,7 +362,7 @@ void __init early_gart_iommu_check(void)
362 continue; 362 continue;
363 363
364 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); 364 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
365 ctl &= ~AMD64_GARTEN; 365 ctl &= ~GARTEN;
366 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); 366 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
367 } 367 }
368 } 368 }
@@ -371,7 +371,7 @@ void __init early_gart_iommu_check(void)
371 371
372static int __initdata printed_gart_size_msg; 372static int __initdata printed_gart_size_msg;
373 373
374void __init gart_iommu_hole_init(void) 374int __init gart_iommu_hole_init(void)
375{ 375{
376 u32 agp_aper_base = 0, agp_aper_order = 0; 376 u32 agp_aper_base = 0, agp_aper_order = 0;
377 u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; 377 u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
@@ -381,7 +381,7 @@ void __init gart_iommu_hole_init(void)
381 381
382 if (gart_iommu_aperture_disabled || !fix_aperture || 382 if (gart_iommu_aperture_disabled || !fix_aperture ||
383 !early_pci_allowed()) 383 !early_pci_allowed())
384 return; 384 return -ENODEV;
385 385
386 printk(KERN_INFO "Checking aperture...\n"); 386 printk(KERN_INFO "Checking aperture...\n");
387 387
@@ -463,8 +463,9 @@ out:
463 unsigned long n = (32 * 1024 * 1024) << last_aper_order; 463 unsigned long n = (32 * 1024 * 1024) << last_aper_order;
464 464
465 insert_aperture_resource((u32)last_aper_base, n); 465 insert_aperture_resource((u32)last_aper_base, n);
466 return 1;
466 } 467 }
467 return; 468 return 0;
468 } 469 }
469 470
470 if (!fallback_aper_force) { 471 if (!fallback_aper_force) {
@@ -500,13 +501,18 @@ out:
500 panic("Not enough memory for aperture"); 501 panic("Not enough memory for aperture");
501 } 502 }
502 } else { 503 } else {
503 return; 504 return 0;
504 } 505 }
505 506
506 /* Fix up the north bridges */ 507 /* Fix up the north bridges */
507 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { 508 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
508 int bus; 509 int bus, dev_base, dev_limit;
509 int dev_base, dev_limit; 510
511 /*
512 * Don't enable translation yet but enable GART IO and CPU
513 * accesses and set DISTLBWALKPRB since GART table memory is UC.
514 */
515 u32 ctl = DISTLBWALKPRB | aper_order << 1;
510 516
511 bus = bus_dev_ranges[i].bus; 517 bus = bus_dev_ranges[i].bus;
512 dev_base = bus_dev_ranges[i].dev_base; 518 dev_base = bus_dev_ranges[i].dev_base;
@@ -515,13 +521,12 @@ out:
515 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 521 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
516 continue; 522 continue;
517 523
518 /* Don't enable translation yet. That is done later. 524 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
519 Assume this BIOS didn't initialise the GART so
520 just overwrite all previous bits */
521 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1);
522 write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25); 525 write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25);
523 } 526 }
524 } 527 }
525 528
526 set_up_gart_resume(aper_order, aper_alloc); 529 set_up_gart_resume(aper_order, aper_alloc);
530
531 return 1;
527} 532}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index e3b534cda49a..850657d1b0ed 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -52,6 +52,7 @@
52#include <asm/mce.h> 52#include <asm/mce.h>
53#include <asm/kvm_para.h> 53#include <asm/kvm_para.h>
54#include <asm/tsc.h> 54#include <asm/tsc.h>
55#include <asm/atomic.h>
55 56
56unsigned int num_processors; 57unsigned int num_processors;
57 58
@@ -370,38 +371,87 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
370} 371}
371 372
372/* 373/*
373 * Setup extended LVT, AMD specific (K8, family 10h) 374 * Setup extended LVT, AMD specific
374 * 375 *
375 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and 376 * Software should use the LVT offsets the BIOS provides. The offsets
376 * MCE interrupts are supported. Thus MCE offset must be set to 0. 377 * are determined by the subsystems using it like those for MCE
378 * threshold or IBS. On K8 only offset 0 (APIC500) and MCE interrupts
379 * are supported. Beginning with family 10h at least 4 offsets are
380 * available.
377 * 381 *
378 * If mask=1, the LVT entry does not generate interrupts while mask=0 382 * Since the offsets must be consistent for all cores, we keep track
379 * enables the vector. See also the BKDGs. 383 * of the LVT offsets in software and reserve the offset for the same
384 * vector also to be used on other cores. An offset is freed by
385 * setting the entry to APIC_EILVT_MASKED.
386 *
387 * If the BIOS is right, there should be no conflicts. Otherwise a
388 * "[Firmware Bug]: ..." error message is generated. However, if
389 * software does not properly determines the offsets, it is not
390 * necessarily a BIOS bug.
380 */ 391 */
381 392
382#define APIC_EILVT_LVTOFF_MCE 0 393static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
383#define APIC_EILVT_LVTOFF_IBS 1
384 394
385static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) 395static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
386{ 396{
387 unsigned long reg = (lvt_off << 4) + APIC_EILVTn(0); 397 return (old & APIC_EILVT_MASKED)
388 unsigned int v = (mask << 16) | (msg_type << 8) | vector; 398 || (new == APIC_EILVT_MASKED)
389 399 || ((new & ~APIC_EILVT_MASKED) == old);
390 apic_write(reg, v);
391} 400}
392 401
393u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) 402static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
394{ 403{
395 setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); 404 unsigned int rsvd; /* 0: uninitialized */
396 return APIC_EILVT_LVTOFF_MCE; 405
406 if (offset >= APIC_EILVT_NR_MAX)
407 return ~0;
408
409 rsvd = atomic_read(&eilvt_offsets[offset]) & ~APIC_EILVT_MASKED;
410 do {
411 if (rsvd &&
412 !eilvt_entry_is_changeable(rsvd, new))
413 /* may not change if vectors are different */
414 return rsvd;
415 rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
416 } while (rsvd != new);
417
418 return new;
397} 419}
398 420
399u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) 421/*
422 * If mask=1, the LVT entry does not generate interrupts while mask=0
423 * enables the vector. See also the BKDGs.
424 */
425
426int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
400{ 427{
401 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); 428 unsigned long reg = APIC_EILVTn(offset);
402 return APIC_EILVT_LVTOFF_IBS; 429 unsigned int new, old, reserved;
430
431 new = (mask << 16) | (msg_type << 8) | vector;
432 old = apic_read(reg);
433 reserved = reserve_eilvt_offset(offset, new);
434
435 if (reserved != new) {
436 pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but "
437 "vector 0x%x was already reserved by another core, "
438 "APIC%lX=0x%x\n",
439 smp_processor_id(), new, reserved, reg, old);
440 return -EINVAL;
441 }
442
443 if (!eilvt_entry_is_changeable(old, new)) {
444 pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but "
445 "register already in use, APIC%lX=0x%x\n",
446 smp_processor_id(), new, reg, old);
447 return -EBUSY;
448 }
449
450 apic_write(reg, new);
451
452 return 0;
403} 453}
404EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); 454EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
405 455
406/* 456/*
407 * Program the next event, relative to now 457 * Program the next event, relative to now
@@ -1665,10 +1715,7 @@ int __init APIC_init_uniprocessor(void)
1665 } 1715 }
1666#endif 1716#endif
1667 1717
1668#ifndef CONFIG_SMP
1669 enable_IR_x2apic();
1670 default_setup_apic_routing(); 1718 default_setup_apic_routing();
1671#endif
1672 1719
1673 verify_local_APIC(); 1720 verify_local_APIC();
1674 connect_bsp_APIC(); 1721 connect_bsp_APIC();
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5c5b8f3dddb5..8ae808d110f4 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -131,13 +131,9 @@ struct irq_pin_list {
131 struct irq_pin_list *next; 131 struct irq_pin_list *next;
132}; 132};
133 133
134static struct irq_pin_list *get_one_free_irq_2_pin(int node) 134static struct irq_pin_list *alloc_irq_pin_list(int node)
135{ 135{
136 struct irq_pin_list *pin; 136 return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
137
138 pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
139
140 return pin;
141} 137}
142 138
143/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 139/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -150,10 +146,7 @@ static struct irq_cfg irq_cfgx[NR_IRQS];
150int __init arch_early_irq_init(void) 146int __init arch_early_irq_init(void)
151{ 147{
152 struct irq_cfg *cfg; 148 struct irq_cfg *cfg;
153 struct irq_desc *desc; 149 int count, node, i;
154 int count;
155 int node;
156 int i;
157 150
158 if (!legacy_pic->nr_legacy_irqs) { 151 if (!legacy_pic->nr_legacy_irqs) {
159 nr_irqs_gsi = 0; 152 nr_irqs_gsi = 0;
@@ -162,13 +155,15 @@ int __init arch_early_irq_init(void)
162 155
163 cfg = irq_cfgx; 156 cfg = irq_cfgx;
164 count = ARRAY_SIZE(irq_cfgx); 157 count = ARRAY_SIZE(irq_cfgx);
165 node= cpu_to_node(boot_cpu_id); 158 node = cpu_to_node(0);
159
160 /* Make sure the legacy interrupts are marked in the bitmap */
161 irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
166 162
167 for (i = 0; i < count; i++) { 163 for (i = 0; i < count; i++) {
168 desc = irq_to_desc(i); 164 set_irq_chip_data(i, &cfg[i]);
169 desc->chip_data = &cfg[i]; 165 zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
170 zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); 166 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
171 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
172 /* 167 /*
173 * For legacy IRQ's, start with assigning irq0 to irq15 to 168 * For legacy IRQ's, start with assigning irq0 to irq15 to
174 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. 169 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
@@ -183,170 +178,88 @@ int __init arch_early_irq_init(void)
183} 178}
184 179
185#ifdef CONFIG_SPARSE_IRQ 180#ifdef CONFIG_SPARSE_IRQ
186struct irq_cfg *irq_cfg(unsigned int irq) 181static struct irq_cfg *irq_cfg(unsigned int irq)
187{ 182{
188 struct irq_cfg *cfg = NULL; 183 return get_irq_chip_data(irq);
189 struct irq_desc *desc;
190
191 desc = irq_to_desc(irq);
192 if (desc)
193 cfg = desc->chip_data;
194
195 return cfg;
196} 184}
197 185
198static struct irq_cfg *get_one_free_irq_cfg(int node) 186static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
199{ 187{
200 struct irq_cfg *cfg; 188 struct irq_cfg *cfg;
201 189
202 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); 190 cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
203 if (cfg) { 191 if (!cfg)
204 if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { 192 return NULL;
205 kfree(cfg); 193 if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
206 cfg = NULL; 194 goto out_cfg;
207 } else if (!zalloc_cpumask_var_node(&cfg->old_domain, 195 if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
208 GFP_ATOMIC, node)) { 196 goto out_domain;
209 free_cpumask_var(cfg->domain);
210 kfree(cfg);
211 cfg = NULL;
212 }
213 }
214
215 return cfg; 197 return cfg;
198out_domain:
199 free_cpumask_var(cfg->domain);
200out_cfg:
201 kfree(cfg);
202 return NULL;
216} 203}
217 204
218int arch_init_chip_data(struct irq_desc *desc, int node) 205static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
219{
220 struct irq_cfg *cfg;
221
222 cfg = desc->chip_data;
223 if (!cfg) {
224 desc->chip_data = get_one_free_irq_cfg(node);
225 if (!desc->chip_data) {
226 printk(KERN_ERR "can not alloc irq_cfg\n");
227 BUG_ON(1);
228 }
229 }
230
231 return 0;
232}
233
234/* for move_irq_desc */
235static void
236init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
237{ 206{
238 struct irq_pin_list *old_entry, *head, *tail, *entry; 207 if (!cfg)
239
240 cfg->irq_2_pin = NULL;
241 old_entry = old_cfg->irq_2_pin;
242 if (!old_entry)
243 return;
244
245 entry = get_one_free_irq_2_pin(node);
246 if (!entry)
247 return; 208 return;
209 set_irq_chip_data(at, NULL);
210 free_cpumask_var(cfg->domain);
211 free_cpumask_var(cfg->old_domain);
212 kfree(cfg);
213}
248 214
249 entry->apic = old_entry->apic; 215#else
250 entry->pin = old_entry->pin;
251 head = entry;
252 tail = entry;
253 old_entry = old_entry->next;
254 while (old_entry) {
255 entry = get_one_free_irq_2_pin(node);
256 if (!entry) {
257 entry = head;
258 while (entry) {
259 head = entry->next;
260 kfree(entry);
261 entry = head;
262 }
263 /* still use the old one */
264 return;
265 }
266 entry->apic = old_entry->apic;
267 entry->pin = old_entry->pin;
268 tail->next = entry;
269 tail = entry;
270 old_entry = old_entry->next;
271 }
272 216
273 tail->next = NULL; 217struct irq_cfg *irq_cfg(unsigned int irq)
274 cfg->irq_2_pin = head; 218{
219 return irq < nr_irqs ? irq_cfgx + irq : NULL;
275} 220}
276 221
277static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) 222static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
278{ 223{
279 struct irq_pin_list *entry, *next; 224 return irq_cfgx + irq;
280 225}
281 if (old_cfg->irq_2_pin == cfg->irq_2_pin)
282 return;
283 226
284 entry = old_cfg->irq_2_pin; 227static inline void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { }
285 228
286 while (entry) { 229#endif
287 next = entry->next;
288 kfree(entry);
289 entry = next;
290 }
291 old_cfg->irq_2_pin = NULL;
292}
293 230
294void arch_init_copy_chip_data(struct irq_desc *old_desc, 231static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
295 struct irq_desc *desc, int node)
296{ 232{
233 int res = irq_alloc_desc_at(at, node);
297 struct irq_cfg *cfg; 234 struct irq_cfg *cfg;
298 struct irq_cfg *old_cfg;
299
300 cfg = get_one_free_irq_cfg(node);
301 235
302 if (!cfg) 236 if (res < 0) {
303 return; 237 if (res != -EEXIST)
304 238 return NULL;
305 desc->chip_data = cfg; 239 cfg = get_irq_chip_data(at);
306 240 if (cfg)
307 old_cfg = old_desc->chip_data; 241 return cfg;
308 242 }
309 cfg->vector = old_cfg->vector;
310 cfg->move_in_progress = old_cfg->move_in_progress;
311 cpumask_copy(cfg->domain, old_cfg->domain);
312 cpumask_copy(cfg->old_domain, old_cfg->old_domain);
313
314 init_copy_irq_2_pin(old_cfg, cfg, node);
315}
316 243
317static void free_irq_cfg(struct irq_cfg *cfg) 244 cfg = alloc_irq_cfg(at, node);
318{ 245 if (cfg)
319 free_cpumask_var(cfg->domain); 246 set_irq_chip_data(at, cfg);
320 free_cpumask_var(cfg->old_domain); 247 else
321 kfree(cfg); 248 irq_free_desc(at);
249 return cfg;
322} 250}
323 251
324void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) 252static int alloc_irq_from(unsigned int from, int node)
325{ 253{
326 struct irq_cfg *old_cfg, *cfg; 254 return irq_alloc_desc_from(from, node);
327
328 old_cfg = old_desc->chip_data;
329 cfg = desc->chip_data;
330
331 if (old_cfg == cfg)
332 return;
333
334 if (old_cfg) {
335 free_irq_2_pin(old_cfg, cfg);
336 free_irq_cfg(old_cfg);
337 old_desc->chip_data = NULL;
338 }
339} 255}
340/* end for move_irq_desc */
341 256
342#else 257static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
343struct irq_cfg *irq_cfg(unsigned int irq)
344{ 258{
345 return irq < nr_irqs ? irq_cfgx + irq : NULL; 259 free_irq_cfg(at, cfg);
260 irq_free_desc(at);
346} 261}
347 262
348#endif
349
350struct io_apic { 263struct io_apic {
351 unsigned int index; 264 unsigned int index;
352 unsigned int unused[3]; 265 unsigned int unused[3];
@@ -451,7 +364,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
451 io_apic_write(apic, 0x10 + 2*pin, eu.w1); 364 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
452} 365}
453 366
454void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 367static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
455{ 368{
456 unsigned long flags; 369 unsigned long flags;
457 raw_spin_lock_irqsave(&ioapic_lock, flags); 370 raw_spin_lock_irqsave(&ioapic_lock, flags);
@@ -481,7 +394,7 @@ static void ioapic_mask_entry(int apic, int pin)
481 * fast in the common case, and fast for shared ISA-space IRQs. 394 * fast in the common case, and fast for shared ISA-space IRQs.
482 */ 395 */
483static int 396static int
484add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin) 397__add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
485{ 398{
486 struct irq_pin_list **last, *entry; 399 struct irq_pin_list **last, *entry;
487 400
@@ -493,7 +406,7 @@ add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
493 last = &entry->next; 406 last = &entry->next;
494 } 407 }
495 408
496 entry = get_one_free_irq_2_pin(node); 409 entry = alloc_irq_pin_list(node);
497 if (!entry) { 410 if (!entry) {
498 printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", 411 printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
499 node, apic, pin); 412 node, apic, pin);
@@ -508,7 +421,7 @@ add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
508 421
509static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) 422static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
510{ 423{
511 if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin)) 424 if (__add_pin_to_irq_node(cfg, node, apic, pin))
512 panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); 425 panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
513} 426}
514 427
@@ -571,11 +484,6 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
571 IO_APIC_REDIR_LEVEL_TRIGGER, NULL); 484 IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
572} 485}
573 486
574static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
575{
576 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
577}
578
579static void io_apic_sync(struct irq_pin_list *entry) 487static void io_apic_sync(struct irq_pin_list *entry)
580{ 488{
581 /* 489 /*
@@ -587,44 +495,37 @@ static void io_apic_sync(struct irq_pin_list *entry)
587 readl(&io_apic->data); 495 readl(&io_apic->data);
588} 496}
589 497
590static void __mask_IO_APIC_irq(struct irq_cfg *cfg) 498static void mask_ioapic(struct irq_cfg *cfg)
591{ 499{
500 unsigned long flags;
501
502 raw_spin_lock_irqsave(&ioapic_lock, flags);
592 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); 503 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
504 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
593} 505}
594 506
595static void mask_IO_APIC_irq_desc(struct irq_desc *desc) 507static void mask_ioapic_irq(struct irq_data *data)
596{ 508{
597 struct irq_cfg *cfg = desc->chip_data; 509 mask_ioapic(data->chip_data);
598 unsigned long flags; 510}
599
600 BUG_ON(!cfg);
601 511
602 raw_spin_lock_irqsave(&ioapic_lock, flags); 512static void __unmask_ioapic(struct irq_cfg *cfg)
603 __mask_IO_APIC_irq(cfg); 513{
604 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 514 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
605} 515}
606 516
607static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) 517static void unmask_ioapic(struct irq_cfg *cfg)
608{ 518{
609 struct irq_cfg *cfg = desc->chip_data;
610 unsigned long flags; 519 unsigned long flags;
611 520
612 raw_spin_lock_irqsave(&ioapic_lock, flags); 521 raw_spin_lock_irqsave(&ioapic_lock, flags);
613 __unmask_IO_APIC_irq(cfg); 522 __unmask_ioapic(cfg);
614 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 523 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
615} 524}
616 525
617static void mask_IO_APIC_irq(unsigned int irq) 526static void unmask_ioapic_irq(struct irq_data *data)
618{ 527{
619 struct irq_desc *desc = irq_to_desc(irq); 528 unmask_ioapic(data->chip_data);
620
621 mask_IO_APIC_irq_desc(desc);
622}
623static void unmask_IO_APIC_irq(unsigned int irq)
624{
625 struct irq_desc *desc = irq_to_desc(irq);
626
627 unmask_IO_APIC_irq_desc(desc);
628} 529}
629 530
630static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) 531static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
@@ -694,14 +595,14 @@ struct IO_APIC_route_entry **alloc_ioapic_entries(void)
694 struct IO_APIC_route_entry **ioapic_entries; 595 struct IO_APIC_route_entry **ioapic_entries;
695 596
696 ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics, 597 ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics,
697 GFP_ATOMIC); 598 GFP_KERNEL);
698 if (!ioapic_entries) 599 if (!ioapic_entries)
699 return 0; 600 return 0;
700 601
701 for (apic = 0; apic < nr_ioapics; apic++) { 602 for (apic = 0; apic < nr_ioapics; apic++) {
702 ioapic_entries[apic] = 603 ioapic_entries[apic] =
703 kzalloc(sizeof(struct IO_APIC_route_entry) * 604 kzalloc(sizeof(struct IO_APIC_route_entry) *
704 nr_ioapic_registers[apic], GFP_ATOMIC); 605 nr_ioapic_registers[apic], GFP_KERNEL);
705 if (!ioapic_entries[apic]) 606 if (!ioapic_entries[apic])
706 goto nomem; 607 goto nomem;
707 } 608 }
@@ -1259,7 +1160,6 @@ void __setup_vector_irq(int cpu)
1259 /* Initialize vector_irq on a new cpu */ 1160 /* Initialize vector_irq on a new cpu */
1260 int irq, vector; 1161 int irq, vector;
1261 struct irq_cfg *cfg; 1162 struct irq_cfg *cfg;
1262 struct irq_desc *desc;
1263 1163
1264 /* 1164 /*
1265 * vector_lock will make sure that we don't run into irq vector 1165 * vector_lock will make sure that we don't run into irq vector
@@ -1268,9 +1168,10 @@ void __setup_vector_irq(int cpu)
1268 */ 1168 */
1269 raw_spin_lock(&vector_lock); 1169 raw_spin_lock(&vector_lock);
1270 /* Mark the inuse vectors */ 1170 /* Mark the inuse vectors */
1271 for_each_irq_desc(irq, desc) { 1171 for_each_active_irq(irq) {
1272 cfg = desc->chip_data; 1172 cfg = get_irq_chip_data(irq);
1273 1173 if (!cfg)
1174 continue;
1274 /* 1175 /*
1275 * If it is a legacy IRQ handled by the legacy PIC, this cpu 1176 * If it is a legacy IRQ handled by the legacy PIC, this cpu
1276 * will be part of the irq_cfg's domain. 1177 * will be part of the irq_cfg's domain.
@@ -1327,17 +1228,17 @@ static inline int IO_APIC_irq_trigger(int irq)
1327} 1228}
1328#endif 1229#endif
1329 1230
1330static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger) 1231static void ioapic_register_intr(unsigned int irq, unsigned long trigger)
1331{ 1232{
1332 1233
1333 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1234 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1334 trigger == IOAPIC_LEVEL) 1235 trigger == IOAPIC_LEVEL)
1335 desc->status |= IRQ_LEVEL; 1236 irq_set_status_flags(irq, IRQ_LEVEL);
1336 else 1237 else
1337 desc->status &= ~IRQ_LEVEL; 1238 irq_clear_status_flags(irq, IRQ_LEVEL);
1338 1239
1339 if (irq_remapped(irq)) { 1240 if (irq_remapped(get_irq_chip_data(irq))) {
1340 desc->status |= IRQ_MOVE_PCNTXT; 1241 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
1341 if (trigger) 1242 if (trigger)
1342 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, 1243 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
1343 handle_fasteoi_irq, 1244 handle_fasteoi_irq,
@@ -1358,10 +1259,10 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t
1358 handle_edge_irq, "edge"); 1259 handle_edge_irq, "edge");
1359} 1260}
1360 1261
1361int setup_ioapic_entry(int apic_id, int irq, 1262static int setup_ioapic_entry(int apic_id, int irq,
1362 struct IO_APIC_route_entry *entry, 1263 struct IO_APIC_route_entry *entry,
1363 unsigned int destination, int trigger, 1264 unsigned int destination, int trigger,
1364 int polarity, int vector, int pin) 1265 int polarity, int vector, int pin)
1365{ 1266{
1366 /* 1267 /*
1367 * add it to the IO-APIC irq-routing table: 1268 * add it to the IO-APIC irq-routing table:
@@ -1382,21 +1283,7 @@ int setup_ioapic_entry(int apic_id, int irq,
1382 if (index < 0) 1283 if (index < 0)
1383 panic("Failed to allocate IRTE for ioapic %d\n", apic_id); 1284 panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
1384 1285
1385 memset(&irte, 0, sizeof(irte)); 1286 prepare_irte(&irte, vector, destination);
1386
1387 irte.present = 1;
1388 irte.dst_mode = apic->irq_dest_mode;
1389 /*
1390 * Trigger mode in the IRTE will always be edge, and the
1391 * actual level or edge trigger will be setup in the IO-APIC
1392 * RTE. This will help simplify level triggered irq migration.
1393 * For more details, see the comments above explainig IO-APIC
1394 * irq migration in the presence of interrupt-remapping.
1395 */
1396 irte.trigger_mode = 0;
1397 irte.dlvry_mode = apic->irq_delivery_mode;
1398 irte.vector = vector;
1399 irte.dest_id = IRTE_DEST(destination);
1400 1287
1401 /* Set source-id of interrupt request */ 1288 /* Set source-id of interrupt request */
1402 set_ioapic_sid(&irte, apic_id); 1289 set_ioapic_sid(&irte, apic_id);
@@ -1431,18 +1318,14 @@ int setup_ioapic_entry(int apic_id, int irq,
1431 return 0; 1318 return 0;
1432} 1319}
1433 1320
1434static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc, 1321static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq,
1435 int trigger, int polarity) 1322 struct irq_cfg *cfg, int trigger, int polarity)
1436{ 1323{
1437 struct irq_cfg *cfg;
1438 struct IO_APIC_route_entry entry; 1324 struct IO_APIC_route_entry entry;
1439 unsigned int dest; 1325 unsigned int dest;
1440 1326
1441 if (!IO_APIC_IRQ(irq)) 1327 if (!IO_APIC_IRQ(irq))
1442 return; 1328 return;
1443
1444 cfg = desc->chip_data;
1445
1446 /* 1329 /*
1447 * For legacy irqs, cfg->domain starts with cpu 0 for legacy 1330 * For legacy irqs, cfg->domain starts with cpu 0 for legacy
1448 * controllers like 8259. Now that IO-APIC can handle this irq, update 1331 * controllers like 8259. Now that IO-APIC can handle this irq, update
@@ -1471,9 +1354,9 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
1471 return; 1354 return;
1472 } 1355 }
1473 1356
1474 ioapic_register_intr(irq, desc, trigger); 1357 ioapic_register_intr(irq, trigger);
1475 if (irq < legacy_pic->nr_legacy_irqs) 1358 if (irq < legacy_pic->nr_legacy_irqs)
1476 legacy_pic->chip->mask(irq); 1359 legacy_pic->mask(irq);
1477 1360
1478 ioapic_write_entry(apic_id, pin, entry); 1361 ioapic_write_entry(apic_id, pin, entry);
1479} 1362}
@@ -1484,11 +1367,9 @@ static struct {
1484 1367
1485static void __init setup_IO_APIC_irqs(void) 1368static void __init setup_IO_APIC_irqs(void)
1486{ 1369{
1487 int apic_id, pin, idx, irq; 1370 int apic_id, pin, idx, irq, notcon = 0;
1488 int notcon = 0; 1371 int node = cpu_to_node(0);
1489 struct irq_desc *desc;
1490 struct irq_cfg *cfg; 1372 struct irq_cfg *cfg;
1491 int node = cpu_to_node(boot_cpu_id);
1492 1373
1493 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1374 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1494 1375
@@ -1525,19 +1406,17 @@ static void __init setup_IO_APIC_irqs(void)
1525 apic->multi_timer_check(apic_id, irq)) 1406 apic->multi_timer_check(apic_id, irq))
1526 continue; 1407 continue;
1527 1408
1528 desc = irq_to_desc_alloc_node(irq, node); 1409 cfg = alloc_irq_and_cfg_at(irq, node);
1529 if (!desc) { 1410 if (!cfg)
1530 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1531 continue; 1411 continue;
1532 } 1412
1533 cfg = desc->chip_data;
1534 add_pin_to_irq_node(cfg, node, apic_id, pin); 1413 add_pin_to_irq_node(cfg, node, apic_id, pin);
1535 /* 1414 /*
1536 * don't mark it in pin_programmed, so later acpi could 1415 * don't mark it in pin_programmed, so later acpi could
1537 * set it correctly when irq < 16 1416 * set it correctly when irq < 16
1538 */ 1417 */
1539 setup_IO_APIC_irq(apic_id, pin, irq, desc, 1418 setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx),
1540 irq_trigger(idx), irq_polarity(idx)); 1419 irq_polarity(idx));
1541 } 1420 }
1542 1421
1543 if (notcon) 1422 if (notcon)
@@ -1552,9 +1431,7 @@ static void __init setup_IO_APIC_irqs(void)
1552 */ 1431 */
1553void setup_IO_APIC_irq_extra(u32 gsi) 1432void setup_IO_APIC_irq_extra(u32 gsi)
1554{ 1433{
1555 int apic_id = 0, pin, idx, irq; 1434 int apic_id = 0, pin, idx, irq, node = cpu_to_node(0);
1556 int node = cpu_to_node(boot_cpu_id);
1557 struct irq_desc *desc;
1558 struct irq_cfg *cfg; 1435 struct irq_cfg *cfg;
1559 1436
1560 /* 1437 /*
@@ -1570,18 +1447,15 @@ void setup_IO_APIC_irq_extra(u32 gsi)
1570 return; 1447 return;
1571 1448
1572 irq = pin_2_irq(idx, apic_id, pin); 1449 irq = pin_2_irq(idx, apic_id, pin);
1573#ifdef CONFIG_SPARSE_IRQ 1450
1574 desc = irq_to_desc(irq); 1451 /* Only handle the non legacy irqs on secondary ioapics */
1575 if (desc) 1452 if (apic_id == 0 || irq < NR_IRQS_LEGACY)
1576 return; 1453 return;
1577#endif 1454
1578 desc = irq_to_desc_alloc_node(irq, node); 1455 cfg = alloc_irq_and_cfg_at(irq, node);
1579 if (!desc) { 1456 if (!cfg)
1580 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1581 return; 1457 return;
1582 }
1583 1458
1584 cfg = desc->chip_data;
1585 add_pin_to_irq_node(cfg, node, apic_id, pin); 1459 add_pin_to_irq_node(cfg, node, apic_id, pin);
1586 1460
1587 if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { 1461 if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
@@ -1591,7 +1465,7 @@ void setup_IO_APIC_irq_extra(u32 gsi)
1591 } 1465 }
1592 set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); 1466 set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
1593 1467
1594 setup_IO_APIC_irq(apic_id, pin, irq, desc, 1468 setup_ioapic_irq(apic_id, pin, irq, cfg,
1595 irq_trigger(idx), irq_polarity(idx)); 1469 irq_trigger(idx), irq_polarity(idx));
1596} 1470}
1597 1471
@@ -1642,7 +1516,6 @@ __apicdebuginit(void) print_IO_APIC(void)
1642 union IO_APIC_reg_03 reg_03; 1516 union IO_APIC_reg_03 reg_03;
1643 unsigned long flags; 1517 unsigned long flags;
1644 struct irq_cfg *cfg; 1518 struct irq_cfg *cfg;
1645 struct irq_desc *desc;
1646 unsigned int irq; 1519 unsigned int irq;
1647 1520
1648 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 1521 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
@@ -1729,10 +1602,10 @@ __apicdebuginit(void) print_IO_APIC(void)
1729 } 1602 }
1730 } 1603 }
1731 printk(KERN_DEBUG "IRQ to pin mappings:\n"); 1604 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1732 for_each_irq_desc(irq, desc) { 1605 for_each_active_irq(irq) {
1733 struct irq_pin_list *entry; 1606 struct irq_pin_list *entry;
1734 1607
1735 cfg = desc->chip_data; 1608 cfg = get_irq_chip_data(irq);
1736 if (!cfg) 1609 if (!cfg)
1737 continue; 1610 continue;
1738 entry = cfg->irq_2_pin; 1611 entry = cfg->irq_2_pin;
@@ -2239,29 +2112,26 @@ static int __init timer_irq_works(void)
2239 * an edge even if it isn't on the 8259A... 2112 * an edge even if it isn't on the 8259A...
2240 */ 2113 */
2241 2114
2242static unsigned int startup_ioapic_irq(unsigned int irq) 2115static unsigned int startup_ioapic_irq(struct irq_data *data)
2243{ 2116{
2244 int was_pending = 0; 2117 int was_pending = 0, irq = data->irq;
2245 unsigned long flags; 2118 unsigned long flags;
2246 struct irq_cfg *cfg;
2247 2119
2248 raw_spin_lock_irqsave(&ioapic_lock, flags); 2120 raw_spin_lock_irqsave(&ioapic_lock, flags);
2249 if (irq < legacy_pic->nr_legacy_irqs) { 2121 if (irq < legacy_pic->nr_legacy_irqs) {
2250 legacy_pic->chip->mask(irq); 2122 legacy_pic->mask(irq);
2251 if (legacy_pic->irq_pending(irq)) 2123 if (legacy_pic->irq_pending(irq))
2252 was_pending = 1; 2124 was_pending = 1;
2253 } 2125 }
2254 cfg = irq_cfg(irq); 2126 __unmask_ioapic(data->chip_data);
2255 __unmask_IO_APIC_irq(cfg);
2256 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2127 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2257 2128
2258 return was_pending; 2129 return was_pending;
2259} 2130}
2260 2131
2261static int ioapic_retrigger_irq(unsigned int irq) 2132static int ioapic_retrigger_irq(struct irq_data *data)
2262{ 2133{
2263 2134 struct irq_cfg *cfg = data->chip_data;
2264 struct irq_cfg *cfg = irq_cfg(irq);
2265 unsigned long flags; 2135 unsigned long flags;
2266 2136
2267 raw_spin_lock_irqsave(&vector_lock, flags); 2137 raw_spin_lock_irqsave(&vector_lock, flags);
@@ -2312,7 +2182,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
2312 * With interrupt-remapping, destination information comes 2182 * With interrupt-remapping, destination information comes
2313 * from interrupt-remapping table entry. 2183 * from interrupt-remapping table entry.
2314 */ 2184 */
2315 if (!irq_remapped(irq)) 2185 if (!irq_remapped(cfg))
2316 io_apic_write(apic, 0x11 + pin*2, dest); 2186 io_apic_write(apic, 0x11 + pin*2, dest);
2317 reg = io_apic_read(apic, 0x10 + pin*2); 2187 reg = io_apic_read(apic, 0x10 + pin*2);
2318 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 2188 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
@@ -2322,65 +2192,46 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
2322} 2192}
2323 2193
2324/* 2194/*
2325 * Either sets desc->affinity to a valid value, and returns 2195 * Either sets data->affinity to a valid value, and returns
2326 * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and 2196 * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
2327 * leaves desc->affinity untouched. 2197 * leaves data->affinity untouched.
2328 */ 2198 */
2329unsigned int 2199int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2330set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask, 2200 unsigned int *dest_id)
2331 unsigned int *dest_id)
2332{ 2201{
2333 struct irq_cfg *cfg; 2202 struct irq_cfg *cfg = data->chip_data;
2334 unsigned int irq;
2335 2203
2336 if (!cpumask_intersects(mask, cpu_online_mask)) 2204 if (!cpumask_intersects(mask, cpu_online_mask))
2337 return -1; 2205 return -1;
2338 2206
2339 irq = desc->irq; 2207 if (assign_irq_vector(data->irq, data->chip_data, mask))
2340 cfg = desc->chip_data;
2341 if (assign_irq_vector(irq, cfg, mask))
2342 return -1; 2208 return -1;
2343 2209
2344 cpumask_copy(desc->affinity, mask); 2210 cpumask_copy(data->affinity, mask);
2345 2211
2346 *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); 2212 *dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain);
2347 return 0; 2213 return 0;
2348} 2214}
2349 2215
2350static int 2216static int
2351set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) 2217ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2218 bool force)
2352{ 2219{
2353 struct irq_cfg *cfg; 2220 unsigned int dest, irq = data->irq;
2354 unsigned long flags; 2221 unsigned long flags;
2355 unsigned int dest; 2222 int ret;
2356 unsigned int irq;
2357 int ret = -1;
2358
2359 irq = desc->irq;
2360 cfg = desc->chip_data;
2361 2223
2362 raw_spin_lock_irqsave(&ioapic_lock, flags); 2224 raw_spin_lock_irqsave(&ioapic_lock, flags);
2363 ret = set_desc_affinity(desc, mask, &dest); 2225 ret = __ioapic_set_affinity(data, mask, &dest);
2364 if (!ret) { 2226 if (!ret) {
2365 /* Only the high 8 bits are valid. */ 2227 /* Only the high 8 bits are valid. */
2366 dest = SET_APIC_LOGICAL_ID(dest); 2228 dest = SET_APIC_LOGICAL_ID(dest);
2367 __target_IO_APIC_irq(irq, dest, cfg); 2229 __target_IO_APIC_irq(irq, dest, data->chip_data);
2368 } 2230 }
2369 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2231 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2370
2371 return ret; 2232 return ret;
2372} 2233}
2373 2234
2374static int
2375set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
2376{
2377 struct irq_desc *desc;
2378
2379 desc = irq_to_desc(irq);
2380
2381 return set_ioapic_affinity_irq_desc(desc, mask);
2382}
2383
2384#ifdef CONFIG_INTR_REMAP 2235#ifdef CONFIG_INTR_REMAP
2385 2236
2386/* 2237/*
@@ -2395,24 +2246,21 @@ set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
2395 * the interrupt-remapping table entry. 2246 * the interrupt-remapping table entry.
2396 */ 2247 */
2397static int 2248static int
2398migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) 2249ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2250 bool force)
2399{ 2251{
2400 struct irq_cfg *cfg; 2252 struct irq_cfg *cfg = data->chip_data;
2253 unsigned int dest, irq = data->irq;
2401 struct irte irte; 2254 struct irte irte;
2402 unsigned int dest;
2403 unsigned int irq;
2404 int ret = -1;
2405 2255
2406 if (!cpumask_intersects(mask, cpu_online_mask)) 2256 if (!cpumask_intersects(mask, cpu_online_mask))
2407 return ret; 2257 return -EINVAL;
2408 2258
2409 irq = desc->irq;
2410 if (get_irte(irq, &irte)) 2259 if (get_irte(irq, &irte))
2411 return ret; 2260 return -EBUSY;
2412 2261
2413 cfg = desc->chip_data;
2414 if (assign_irq_vector(irq, cfg, mask)) 2262 if (assign_irq_vector(irq, cfg, mask))
2415 return ret; 2263 return -EBUSY;
2416 2264
2417 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); 2265 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
2418 2266
@@ -2427,29 +2275,14 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2427 if (cfg->move_in_progress) 2275 if (cfg->move_in_progress)
2428 send_cleanup_vector(cfg); 2276 send_cleanup_vector(cfg);
2429 2277
2430 cpumask_copy(desc->affinity, mask); 2278 cpumask_copy(data->affinity, mask);
2431
2432 return 0; 2279 return 0;
2433} 2280}
2434 2281
2435/*
2436 * Migrates the IRQ destination in the process context.
2437 */
2438static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2439 const struct cpumask *mask)
2440{
2441 return migrate_ioapic_irq_desc(desc, mask);
2442}
2443static int set_ir_ioapic_affinity_irq(unsigned int irq,
2444 const struct cpumask *mask)
2445{
2446 struct irq_desc *desc = irq_to_desc(irq);
2447
2448 return set_ir_ioapic_affinity_irq_desc(desc, mask);
2449}
2450#else 2282#else
2451static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, 2283static inline int
2452 const struct cpumask *mask) 2284ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2285 bool force)
2453{ 2286{
2454 return 0; 2287 return 0;
2455} 2288}
@@ -2511,10 +2344,8 @@ unlock:
2511 irq_exit(); 2344 irq_exit();
2512} 2345}
2513 2346
2514static void __irq_complete_move(struct irq_desc **descp, unsigned vector) 2347static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
2515{ 2348{
2516 struct irq_desc *desc = *descp;
2517 struct irq_cfg *cfg = desc->chip_data;
2518 unsigned me; 2349 unsigned me;
2519 2350
2520 if (likely(!cfg->move_in_progress)) 2351 if (likely(!cfg->move_in_progress))
@@ -2526,31 +2357,28 @@ static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
2526 send_cleanup_vector(cfg); 2357 send_cleanup_vector(cfg);
2527} 2358}
2528 2359
2529static void irq_complete_move(struct irq_desc **descp) 2360static void irq_complete_move(struct irq_cfg *cfg)
2530{ 2361{
2531 __irq_complete_move(descp, ~get_irq_regs()->orig_ax); 2362 __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
2532} 2363}
2533 2364
2534void irq_force_complete_move(int irq) 2365void irq_force_complete_move(int irq)
2535{ 2366{
2536 struct irq_desc *desc = irq_to_desc(irq); 2367 struct irq_cfg *cfg = get_irq_chip_data(irq);
2537 struct irq_cfg *cfg = desc->chip_data;
2538 2368
2539 if (!cfg) 2369 if (!cfg)
2540 return; 2370 return;
2541 2371
2542 __irq_complete_move(&desc, cfg->vector); 2372 __irq_complete_move(cfg, cfg->vector);
2543} 2373}
2544#else 2374#else
2545static inline void irq_complete_move(struct irq_desc **descp) {} 2375static inline void irq_complete_move(struct irq_cfg *cfg) { }
2546#endif 2376#endif
2547 2377
2548static void ack_apic_edge(unsigned int irq) 2378static void ack_apic_edge(struct irq_data *data)
2549{ 2379{
2550 struct irq_desc *desc = irq_to_desc(irq); 2380 irq_complete_move(data->chip_data);
2551 2381 move_native_irq(data->irq);
2552 irq_complete_move(&desc);
2553 move_native_irq(irq);
2554 ack_APIC_irq(); 2382 ack_APIC_irq();
2555} 2383}
2556 2384
@@ -2572,10 +2400,12 @@ atomic_t irq_mis_count;
2572 * Otherwise, we simulate the EOI message manually by changing the trigger 2400 * Otherwise, we simulate the EOI message manually by changing the trigger
2573 * mode to edge and then back to level, with RTE being masked during this. 2401 * mode to edge and then back to level, with RTE being masked during this.
2574*/ 2402*/
2575static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) 2403static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2576{ 2404{
2577 struct irq_pin_list *entry; 2405 struct irq_pin_list *entry;
2406 unsigned long flags;
2578 2407
2408 raw_spin_lock_irqsave(&ioapic_lock, flags);
2579 for_each_irq_pin(entry, cfg->irq_2_pin) { 2409 for_each_irq_pin(entry, cfg->irq_2_pin) {
2580 if (mp_ioapics[entry->apic].apicver >= 0x20) { 2410 if (mp_ioapics[entry->apic].apicver >= 0x20) {
2581 /* 2411 /*
@@ -2584,7 +2414,7 @@ static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2584 * intr-remapping table entry. Hence for the io-apic 2414 * intr-remapping table entry. Hence for the io-apic
2585 * EOI we use the pin number. 2415 * EOI we use the pin number.
2586 */ 2416 */
2587 if (irq_remapped(irq)) 2417 if (irq_remapped(cfg))
2588 io_apic_eoi(entry->apic, entry->pin); 2418 io_apic_eoi(entry->apic, entry->pin);
2589 else 2419 else
2590 io_apic_eoi(entry->apic, cfg->vector); 2420 io_apic_eoi(entry->apic, cfg->vector);
@@ -2593,36 +2423,22 @@ static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2593 __unmask_and_level_IO_APIC_irq(entry); 2423 __unmask_and_level_IO_APIC_irq(entry);
2594 } 2424 }
2595 } 2425 }
2596}
2597
2598static void eoi_ioapic_irq(struct irq_desc *desc)
2599{
2600 struct irq_cfg *cfg;
2601 unsigned long flags;
2602 unsigned int irq;
2603
2604 irq = desc->irq;
2605 cfg = desc->chip_data;
2606
2607 raw_spin_lock_irqsave(&ioapic_lock, flags);
2608 __eoi_ioapic_irq(irq, cfg);
2609 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2426 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2610} 2427}
2611 2428
2612static void ack_apic_level(unsigned int irq) 2429static void ack_apic_level(struct irq_data *data)
2613{ 2430{
2431 struct irq_cfg *cfg = data->chip_data;
2432 int i, do_unmask_irq = 0, irq = data->irq;
2614 struct irq_desc *desc = irq_to_desc(irq); 2433 struct irq_desc *desc = irq_to_desc(irq);
2615 unsigned long v; 2434 unsigned long v;
2616 int i;
2617 struct irq_cfg *cfg;
2618 int do_unmask_irq = 0;
2619 2435
2620 irq_complete_move(&desc); 2436 irq_complete_move(cfg);
2621#ifdef CONFIG_GENERIC_PENDING_IRQ 2437#ifdef CONFIG_GENERIC_PENDING_IRQ
2622 /* If we are moving the irq we need to mask it */ 2438 /* If we are moving the irq we need to mask it */
2623 if (unlikely(desc->status & IRQ_MOVE_PENDING)) { 2439 if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
2624 do_unmask_irq = 1; 2440 do_unmask_irq = 1;
2625 mask_IO_APIC_irq_desc(desc); 2441 mask_ioapic(cfg);
2626 } 2442 }
2627#endif 2443#endif
2628 2444
@@ -2658,7 +2474,6 @@ static void ack_apic_level(unsigned int irq)
2658 * we use the above logic (mask+edge followed by unmask+level) from 2474 * we use the above logic (mask+edge followed by unmask+level) from
2659 * Manfred Spraul to clear the remote IRR. 2475 * Manfred Spraul to clear the remote IRR.
2660 */ 2476 */
2661 cfg = desc->chip_data;
2662 i = cfg->vector; 2477 i = cfg->vector;
2663 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); 2478 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2664 2479
@@ -2678,7 +2493,7 @@ static void ack_apic_level(unsigned int irq)
2678 if (!(v & (1 << (i & 0x1f)))) { 2493 if (!(v & (1 << (i & 0x1f)))) {
2679 atomic_inc(&irq_mis_count); 2494 atomic_inc(&irq_mis_count);
2680 2495
2681 eoi_ioapic_irq(desc); 2496 eoi_ioapic_irq(irq, cfg);
2682 } 2497 }
2683 2498
2684 /* Now we can move and renable the irq */ 2499 /* Now we can move and renable the irq */
@@ -2709,61 +2524,57 @@ static void ack_apic_level(unsigned int irq)
2709 * accurate and is causing problems then it is a hardware bug 2524 * accurate and is causing problems then it is a hardware bug
2710 * and you can go talk to the chipset vendor about it. 2525 * and you can go talk to the chipset vendor about it.
2711 */ 2526 */
2712 cfg = desc->chip_data;
2713 if (!io_apic_level_ack_pending(cfg)) 2527 if (!io_apic_level_ack_pending(cfg))
2714 move_masked_irq(irq); 2528 move_masked_irq(irq);
2715 unmask_IO_APIC_irq_desc(desc); 2529 unmask_ioapic(cfg);
2716 } 2530 }
2717} 2531}
2718 2532
2719#ifdef CONFIG_INTR_REMAP 2533#ifdef CONFIG_INTR_REMAP
2720static void ir_ack_apic_edge(unsigned int irq) 2534static void ir_ack_apic_edge(struct irq_data *data)
2721{ 2535{
2722 ack_APIC_irq(); 2536 ack_APIC_irq();
2723} 2537}
2724 2538
2725static void ir_ack_apic_level(unsigned int irq) 2539static void ir_ack_apic_level(struct irq_data *data)
2726{ 2540{
2727 struct irq_desc *desc = irq_to_desc(irq);
2728
2729 ack_APIC_irq(); 2541 ack_APIC_irq();
2730 eoi_ioapic_irq(desc); 2542 eoi_ioapic_irq(data->irq, data->chip_data);
2731} 2543}
2732#endif /* CONFIG_INTR_REMAP */ 2544#endif /* CONFIG_INTR_REMAP */
2733 2545
2734static struct irq_chip ioapic_chip __read_mostly = { 2546static struct irq_chip ioapic_chip __read_mostly = {
2735 .name = "IO-APIC", 2547 .name = "IO-APIC",
2736 .startup = startup_ioapic_irq, 2548 .irq_startup = startup_ioapic_irq,
2737 .mask = mask_IO_APIC_irq, 2549 .irq_mask = mask_ioapic_irq,
2738 .unmask = unmask_IO_APIC_irq, 2550 .irq_unmask = unmask_ioapic_irq,
2739 .ack = ack_apic_edge, 2551 .irq_ack = ack_apic_edge,
2740 .eoi = ack_apic_level, 2552 .irq_eoi = ack_apic_level,
2741#ifdef CONFIG_SMP 2553#ifdef CONFIG_SMP
2742 .set_affinity = set_ioapic_affinity_irq, 2554 .irq_set_affinity = ioapic_set_affinity,
2743#endif 2555#endif
2744 .retrigger = ioapic_retrigger_irq, 2556 .irq_retrigger = ioapic_retrigger_irq,
2745}; 2557};
2746 2558
2747static struct irq_chip ir_ioapic_chip __read_mostly = { 2559static struct irq_chip ir_ioapic_chip __read_mostly = {
2748 .name = "IR-IO-APIC", 2560 .name = "IR-IO-APIC",
2749 .startup = startup_ioapic_irq, 2561 .irq_startup = startup_ioapic_irq,
2750 .mask = mask_IO_APIC_irq, 2562 .irq_mask = mask_ioapic_irq,
2751 .unmask = unmask_IO_APIC_irq, 2563 .irq_unmask = unmask_ioapic_irq,
2752#ifdef CONFIG_INTR_REMAP 2564#ifdef CONFIG_INTR_REMAP
2753 .ack = ir_ack_apic_edge, 2565 .irq_ack = ir_ack_apic_edge,
2754 .eoi = ir_ack_apic_level, 2566 .irq_eoi = ir_ack_apic_level,
2755#ifdef CONFIG_SMP 2567#ifdef CONFIG_SMP
2756 .set_affinity = set_ir_ioapic_affinity_irq, 2568 .irq_set_affinity = ir_ioapic_set_affinity,
2757#endif 2569#endif
2758#endif 2570#endif
2759 .retrigger = ioapic_retrigger_irq, 2571 .irq_retrigger = ioapic_retrigger_irq,
2760}; 2572};
2761 2573
2762static inline void init_IO_APIC_traps(void) 2574static inline void init_IO_APIC_traps(void)
2763{ 2575{
2764 int irq;
2765 struct irq_desc *desc;
2766 struct irq_cfg *cfg; 2576 struct irq_cfg *cfg;
2577 unsigned int irq;
2767 2578
2768 /* 2579 /*
2769 * NOTE! The local APIC isn't very good at handling 2580 * NOTE! The local APIC isn't very good at handling
@@ -2776,8 +2587,8 @@ static inline void init_IO_APIC_traps(void)
2776 * Also, we've got to be careful not to trash gate 2587 * Also, we've got to be careful not to trash gate
2777 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2588 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2778 */ 2589 */
2779 for_each_irq_desc(irq, desc) { 2590 for_each_active_irq(irq) {
2780 cfg = desc->chip_data; 2591 cfg = get_irq_chip_data(irq);
2781 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { 2592 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2782 /* 2593 /*
2783 * Hmm.. We don't have an entry for this, 2594 * Hmm.. We don't have an entry for this,
@@ -2788,7 +2599,7 @@ static inline void init_IO_APIC_traps(void)
2788 legacy_pic->make_irq(irq); 2599 legacy_pic->make_irq(irq);
2789 else 2600 else
2790 /* Strange. Oh, well.. */ 2601 /* Strange. Oh, well.. */
2791 desc->chip = &no_irq_chip; 2602 set_irq_chip(irq, &no_irq_chip);
2792 } 2603 }
2793 } 2604 }
2794} 2605}
@@ -2797,7 +2608,7 @@ static inline void init_IO_APIC_traps(void)
2797 * The local APIC irq-chip implementation: 2608 * The local APIC irq-chip implementation:
2798 */ 2609 */
2799 2610
2800static void mask_lapic_irq(unsigned int irq) 2611static void mask_lapic_irq(struct irq_data *data)
2801{ 2612{
2802 unsigned long v; 2613 unsigned long v;
2803 2614
@@ -2805,7 +2616,7 @@ static void mask_lapic_irq(unsigned int irq)
2805 apic_write(APIC_LVT0, v | APIC_LVT_MASKED); 2616 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
2806} 2617}
2807 2618
2808static void unmask_lapic_irq(unsigned int irq) 2619static void unmask_lapic_irq(struct irq_data *data)
2809{ 2620{
2810 unsigned long v; 2621 unsigned long v;
2811 2622
@@ -2813,21 +2624,21 @@ static void unmask_lapic_irq(unsigned int irq)
2813 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); 2624 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2814} 2625}
2815 2626
2816static void ack_lapic_irq(unsigned int irq) 2627static void ack_lapic_irq(struct irq_data *data)
2817{ 2628{
2818 ack_APIC_irq(); 2629 ack_APIC_irq();
2819} 2630}
2820 2631
2821static struct irq_chip lapic_chip __read_mostly = { 2632static struct irq_chip lapic_chip __read_mostly = {
2822 .name = "local-APIC", 2633 .name = "local-APIC",
2823 .mask = mask_lapic_irq, 2634 .irq_mask = mask_lapic_irq,
2824 .unmask = unmask_lapic_irq, 2635 .irq_unmask = unmask_lapic_irq,
2825 .ack = ack_lapic_irq, 2636 .irq_ack = ack_lapic_irq,
2826}; 2637};
2827 2638
2828static void lapic_register_intr(int irq, struct irq_desc *desc) 2639static void lapic_register_intr(int irq)
2829{ 2640{
2830 desc->status &= ~IRQ_LEVEL; 2641 irq_clear_status_flags(irq, IRQ_LEVEL);
2831 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, 2642 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2832 "edge"); 2643 "edge");
2833} 2644}
@@ -2930,9 +2741,8 @@ int timer_through_8259 __initdata;
2930 */ 2741 */
2931static inline void __init check_timer(void) 2742static inline void __init check_timer(void)
2932{ 2743{
2933 struct irq_desc *desc = irq_to_desc(0); 2744 struct irq_cfg *cfg = get_irq_chip_data(0);
2934 struct irq_cfg *cfg = desc->chip_data; 2745 int node = cpu_to_node(0);
2935 int node = cpu_to_node(boot_cpu_id);
2936 int apic1, pin1, apic2, pin2; 2746 int apic1, pin1, apic2, pin2;
2937 unsigned long flags; 2747 unsigned long flags;
2938 int no_pin1 = 0; 2748 int no_pin1 = 0;
@@ -2942,7 +2752,7 @@ static inline void __init check_timer(void)
2942 /* 2752 /*
2943 * get/set the timer IRQ vector: 2753 * get/set the timer IRQ vector:
2944 */ 2754 */
2945 legacy_pic->chip->mask(0); 2755 legacy_pic->mask(0);
2946 assign_irq_vector(0, cfg, apic->target_cpus()); 2756 assign_irq_vector(0, cfg, apic->target_cpus());
2947 2757
2948 /* 2758 /*
@@ -3001,7 +2811,7 @@ static inline void __init check_timer(void)
3001 add_pin_to_irq_node(cfg, node, apic1, pin1); 2811 add_pin_to_irq_node(cfg, node, apic1, pin1);
3002 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2812 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
3003 } else { 2813 } else {
3004 /* for edge trigger, setup_IO_APIC_irq already 2814 /* for edge trigger, setup_ioapic_irq already
3005 * leave it unmasked. 2815 * leave it unmasked.
3006 * so only need to unmask if it is level-trigger 2816 * so only need to unmask if it is level-trigger
3007 * do we really have level trigger timer? 2817 * do we really have level trigger timer?
@@ -3009,12 +2819,12 @@ static inline void __init check_timer(void)
3009 int idx; 2819 int idx;
3010 idx = find_irq_entry(apic1, pin1, mp_INT); 2820 idx = find_irq_entry(apic1, pin1, mp_INT);
3011 if (idx != -1 && irq_trigger(idx)) 2821 if (idx != -1 && irq_trigger(idx))
3012 unmask_IO_APIC_irq_desc(desc); 2822 unmask_ioapic(cfg);
3013 } 2823 }
3014 if (timer_irq_works()) { 2824 if (timer_irq_works()) {
3015 if (nmi_watchdog == NMI_IO_APIC) { 2825 if (nmi_watchdog == NMI_IO_APIC) {
3016 setup_nmi(); 2826 setup_nmi();
3017 legacy_pic->chip->unmask(0); 2827 legacy_pic->unmask(0);
3018 } 2828 }
3019 if (disable_timer_pin_1 > 0) 2829 if (disable_timer_pin_1 > 0)
3020 clear_IO_APIC_pin(0, pin1); 2830 clear_IO_APIC_pin(0, pin1);
@@ -3037,14 +2847,14 @@ static inline void __init check_timer(void)
3037 */ 2847 */
3038 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); 2848 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
3039 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 2849 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
3040 legacy_pic->chip->unmask(0); 2850 legacy_pic->unmask(0);
3041 if (timer_irq_works()) { 2851 if (timer_irq_works()) {
3042 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2852 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
3043 timer_through_8259 = 1; 2853 timer_through_8259 = 1;
3044 if (nmi_watchdog == NMI_IO_APIC) { 2854 if (nmi_watchdog == NMI_IO_APIC) {
3045 legacy_pic->chip->mask(0); 2855 legacy_pic->mask(0);
3046 setup_nmi(); 2856 setup_nmi();
3047 legacy_pic->chip->unmask(0); 2857 legacy_pic->unmask(0);
3048 } 2858 }
3049 goto out; 2859 goto out;
3050 } 2860 }
@@ -3052,7 +2862,7 @@ static inline void __init check_timer(void)
3052 * Cleanup, just in case ... 2862 * Cleanup, just in case ...
3053 */ 2863 */
3054 local_irq_disable(); 2864 local_irq_disable();
3055 legacy_pic->chip->mask(0); 2865 legacy_pic->mask(0);
3056 clear_IO_APIC_pin(apic2, pin2); 2866 clear_IO_APIC_pin(apic2, pin2);
3057 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 2867 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
3058 } 2868 }
@@ -3069,16 +2879,16 @@ static inline void __init check_timer(void)
3069 apic_printk(APIC_QUIET, KERN_INFO 2879 apic_printk(APIC_QUIET, KERN_INFO
3070 "...trying to set up timer as Virtual Wire IRQ...\n"); 2880 "...trying to set up timer as Virtual Wire IRQ...\n");
3071 2881
3072 lapic_register_intr(0, desc); 2882 lapic_register_intr(0);
3073 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ 2883 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
3074 legacy_pic->chip->unmask(0); 2884 legacy_pic->unmask(0);
3075 2885
3076 if (timer_irq_works()) { 2886 if (timer_irq_works()) {
3077 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); 2887 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
3078 goto out; 2888 goto out;
3079 } 2889 }
3080 local_irq_disable(); 2890 local_irq_disable();
3081 legacy_pic->chip->mask(0); 2891 legacy_pic->mask(0);
3082 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); 2892 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
3083 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); 2893 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
3084 2894
@@ -3244,49 +3054,42 @@ device_initcall(ioapic_init_sysfs);
3244/* 3054/*
3245 * Dynamic irq allocate and deallocation 3055 * Dynamic irq allocate and deallocation
3246 */ 3056 */
3247unsigned int create_irq_nr(unsigned int irq_want, int node) 3057unsigned int create_irq_nr(unsigned int from, int node)
3248{ 3058{
3249 /* Allocate an unused irq */ 3059 struct irq_cfg *cfg;
3250 unsigned int irq;
3251 unsigned int new;
3252 unsigned long flags; 3060 unsigned long flags;
3253 struct irq_cfg *cfg_new = NULL; 3061 unsigned int ret = 0;
3254 struct irq_desc *desc_new = NULL; 3062 int irq;
3255
3256 irq = 0;
3257 if (irq_want < nr_irqs_gsi)
3258 irq_want = nr_irqs_gsi;
3259
3260 raw_spin_lock_irqsave(&vector_lock, flags);
3261 for (new = irq_want; new < nr_irqs; new++) {
3262 desc_new = irq_to_desc_alloc_node(new, node);
3263 if (!desc_new) {
3264 printk(KERN_INFO "can not get irq_desc for %d\n", new);
3265 continue;
3266 }
3267 cfg_new = desc_new->chip_data;
3268
3269 if (cfg_new->vector != 0)
3270 continue;
3271 3063
3272 desc_new = move_irq_desc(desc_new, node); 3064 if (from < nr_irqs_gsi)
3273 cfg_new = desc_new->chip_data; 3065 from = nr_irqs_gsi;
3274 3066
3275 if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) 3067 irq = alloc_irq_from(from, node);
3276 irq = new; 3068 if (irq < 0)
3277 break; 3069 return 0;
3070 cfg = alloc_irq_cfg(irq, node);
3071 if (!cfg) {
3072 free_irq_at(irq, NULL);
3073 return 0;
3278 } 3074 }
3279 raw_spin_unlock_irqrestore(&vector_lock, flags);
3280 3075
3281 if (irq > 0) 3076 raw_spin_lock_irqsave(&vector_lock, flags);
3282 dynamic_irq_init_keep_chip_data(irq); 3077 if (!__assign_irq_vector(irq, cfg, apic->target_cpus()))
3078 ret = irq;
3079 raw_spin_unlock_irqrestore(&vector_lock, flags);
3283 3080
3284 return irq; 3081 if (ret) {
3082 set_irq_chip_data(irq, cfg);
3083 irq_clear_status_flags(irq, IRQ_NOREQUEST);
3084 } else {
3085 free_irq_at(irq, cfg);
3086 }
3087 return ret;
3285} 3088}
3286 3089
3287int create_irq(void) 3090int create_irq(void)
3288{ 3091{
3289 int node = cpu_to_node(boot_cpu_id); 3092 int node = cpu_to_node(0);
3290 unsigned int irq_want; 3093 unsigned int irq_want;
3291 int irq; 3094 int irq;
3292 3095
@@ -3301,14 +3104,17 @@ int create_irq(void)
3301 3104
3302void destroy_irq(unsigned int irq) 3105void destroy_irq(unsigned int irq)
3303{ 3106{
3107 struct irq_cfg *cfg = get_irq_chip_data(irq);
3304 unsigned long flags; 3108 unsigned long flags;
3305 3109
3306 dynamic_irq_cleanup_keep_chip_data(irq); 3110 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
3307 3111
3308 free_irte(irq); 3112 if (intr_remapping_enabled)
3113 free_irte(irq);
3309 raw_spin_lock_irqsave(&vector_lock, flags); 3114 raw_spin_lock_irqsave(&vector_lock, flags);
3310 __clear_irq_vector(irq, get_irq_chip_data(irq)); 3115 __clear_irq_vector(irq, cfg);
3311 raw_spin_unlock_irqrestore(&vector_lock, flags); 3116 raw_spin_unlock_irqrestore(&vector_lock, flags);
3117 free_irq_at(irq, cfg);
3312} 3118}
3313 3119
3314/* 3120/*
@@ -3332,7 +3138,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3332 3138
3333 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); 3139 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
3334 3140
3335 if (irq_remapped(irq)) { 3141 if (irq_remapped(get_irq_chip_data(irq))) {
3336 struct irte irte; 3142 struct irte irte;
3337 int ir_index; 3143 int ir_index;
3338 u16 sub_handle; 3144 u16 sub_handle;
@@ -3340,14 +3146,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3340 ir_index = map_irq_to_irte_handle(irq, &sub_handle); 3146 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
3341 BUG_ON(ir_index == -1); 3147 BUG_ON(ir_index == -1);
3342 3148
3343 memset (&irte, 0, sizeof(irte)); 3149 prepare_irte(&irte, cfg->vector, dest);
3344
3345 irte.present = 1;
3346 irte.dst_mode = apic->irq_dest_mode;
3347 irte.trigger_mode = 0; /* edge */
3348 irte.dlvry_mode = apic->irq_delivery_mode;
3349 irte.vector = cfg->vector;
3350 irte.dest_id = IRTE_DEST(dest);
3351 3150
3352 /* Set source-id of interrupt request */ 3151 /* Set source-id of interrupt request */
3353 if (pdev) 3152 if (pdev)
@@ -3392,26 +3191,24 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3392} 3191}
3393 3192
3394#ifdef CONFIG_SMP 3193#ifdef CONFIG_SMP
3395static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) 3194static int
3195msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
3396{ 3196{
3397 struct irq_desc *desc = irq_to_desc(irq); 3197 struct irq_cfg *cfg = data->chip_data;
3398 struct irq_cfg *cfg;
3399 struct msi_msg msg; 3198 struct msi_msg msg;
3400 unsigned int dest; 3199 unsigned int dest;
3401 3200
3402 if (set_desc_affinity(desc, mask, &dest)) 3201 if (__ioapic_set_affinity(data, mask, &dest))
3403 return -1; 3202 return -1;
3404 3203
3405 cfg = desc->chip_data; 3204 __get_cached_msi_msg(data->msi_desc, &msg);
3406
3407 get_cached_msi_msg_desc(desc, &msg);
3408 3205
3409 msg.data &= ~MSI_DATA_VECTOR_MASK; 3206 msg.data &= ~MSI_DATA_VECTOR_MASK;
3410 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3207 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3411 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3208 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3412 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3209 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3413 3210
3414 write_msi_msg_desc(desc, &msg); 3211 __write_msi_msg(data->msi_desc, &msg);
3415 3212
3416 return 0; 3213 return 0;
3417} 3214}
@@ -3421,17 +3218,17 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3421 * done in the process context using interrupt-remapping hardware. 3218 * done in the process context using interrupt-remapping hardware.
3422 */ 3219 */
3423static int 3220static int
3424ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) 3221ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
3222 bool force)
3425{ 3223{
3426 struct irq_desc *desc = irq_to_desc(irq); 3224 struct irq_cfg *cfg = data->chip_data;
3427 struct irq_cfg *cfg = desc->chip_data; 3225 unsigned int dest, irq = data->irq;
3428 unsigned int dest;
3429 struct irte irte; 3226 struct irte irte;
3430 3227
3431 if (get_irte(irq, &irte)) 3228 if (get_irte(irq, &irte))
3432 return -1; 3229 return -1;
3433 3230
3434 if (set_desc_affinity(desc, mask, &dest)) 3231 if (__ioapic_set_affinity(data, mask, &dest))
3435 return -1; 3232 return -1;
3436 3233
3437 irte.vector = cfg->vector; 3234 irte.vector = cfg->vector;
@@ -3461,27 +3258,27 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3461 * which implement the MSI or MSI-X Capability Structure. 3258 * which implement the MSI or MSI-X Capability Structure.
3462 */ 3259 */
3463static struct irq_chip msi_chip = { 3260static struct irq_chip msi_chip = {
3464 .name = "PCI-MSI", 3261 .name = "PCI-MSI",
3465 .unmask = unmask_msi_irq, 3262 .irq_unmask = unmask_msi_irq,
3466 .mask = mask_msi_irq, 3263 .irq_mask = mask_msi_irq,
3467 .ack = ack_apic_edge, 3264 .irq_ack = ack_apic_edge,
3468#ifdef CONFIG_SMP 3265#ifdef CONFIG_SMP
3469 .set_affinity = set_msi_irq_affinity, 3266 .irq_set_affinity = msi_set_affinity,
3470#endif 3267#endif
3471 .retrigger = ioapic_retrigger_irq, 3268 .irq_retrigger = ioapic_retrigger_irq,
3472}; 3269};
3473 3270
3474static struct irq_chip msi_ir_chip = { 3271static struct irq_chip msi_ir_chip = {
3475 .name = "IR-PCI-MSI", 3272 .name = "IR-PCI-MSI",
3476 .unmask = unmask_msi_irq, 3273 .irq_unmask = unmask_msi_irq,
3477 .mask = mask_msi_irq, 3274 .irq_mask = mask_msi_irq,
3478#ifdef CONFIG_INTR_REMAP 3275#ifdef CONFIG_INTR_REMAP
3479 .ack = ir_ack_apic_edge, 3276 .irq_ack = ir_ack_apic_edge,
3480#ifdef CONFIG_SMP 3277#ifdef CONFIG_SMP
3481 .set_affinity = ir_set_msi_irq_affinity, 3278 .irq_set_affinity = ir_msi_set_affinity,
3482#endif 3279#endif
3483#endif 3280#endif
3484 .retrigger = ioapic_retrigger_irq, 3281 .irq_retrigger = ioapic_retrigger_irq,
3485}; 3282};
3486 3283
3487/* 3284/*
@@ -3513,8 +3310,8 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3513 3310
3514static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) 3311static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3515{ 3312{
3516 int ret;
3517 struct msi_msg msg; 3313 struct msi_msg msg;
3314 int ret;
3518 3315
3519 ret = msi_compose_msg(dev, irq, &msg, -1); 3316 ret = msi_compose_msg(dev, irq, &msg, -1);
3520 if (ret < 0) 3317 if (ret < 0)
@@ -3523,12 +3320,8 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3523 set_irq_msi(irq, msidesc); 3320 set_irq_msi(irq, msidesc);
3524 write_msi_msg(irq, &msg); 3321 write_msi_msg(irq, &msg);
3525 3322
3526 if (irq_remapped(irq)) { 3323 if (irq_remapped(get_irq_chip_data(irq))) {
3527 struct irq_desc *desc = irq_to_desc(irq); 3324 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3528 /*
3529 * irq migration in process context
3530 */
3531 desc->status |= IRQ_MOVE_PCNTXT;
3532 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); 3325 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
3533 } else 3326 } else
3534 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 3327 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
@@ -3540,13 +3333,10 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3540 3333
3541int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 3334int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3542{ 3335{
3543 unsigned int irq; 3336 int node, ret, sub_handle, index = 0;
3544 int ret, sub_handle; 3337 unsigned int irq, irq_want;
3545 struct msi_desc *msidesc; 3338 struct msi_desc *msidesc;
3546 unsigned int irq_want;
3547 struct intel_iommu *iommu = NULL; 3339 struct intel_iommu *iommu = NULL;
3548 int index = 0;
3549 int node;
3550 3340
3551 /* x86 doesn't support multiple MSI yet */ 3341 /* x86 doesn't support multiple MSI yet */
3552 if (type == PCI_CAP_ID_MSI && nvec > 1) 3342 if (type == PCI_CAP_ID_MSI && nvec > 1)
@@ -3606,18 +3396,17 @@ void arch_teardown_msi_irq(unsigned int irq)
3606 3396
3607#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) 3397#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
3608#ifdef CONFIG_SMP 3398#ifdef CONFIG_SMP
3609static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) 3399static int
3400dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
3401 bool force)
3610{ 3402{
3611 struct irq_desc *desc = irq_to_desc(irq); 3403 struct irq_cfg *cfg = data->chip_data;
3612 struct irq_cfg *cfg; 3404 unsigned int dest, irq = data->irq;
3613 struct msi_msg msg; 3405 struct msi_msg msg;
3614 unsigned int dest;
3615 3406
3616 if (set_desc_affinity(desc, mask, &dest)) 3407 if (__ioapic_set_affinity(data, mask, &dest))
3617 return -1; 3408 return -1;
3618 3409
3619 cfg = desc->chip_data;
3620
3621 dmar_msi_read(irq, &msg); 3410 dmar_msi_read(irq, &msg);
3622 3411
3623 msg.data &= ~MSI_DATA_VECTOR_MASK; 3412 msg.data &= ~MSI_DATA_VECTOR_MASK;
@@ -3633,14 +3422,14 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3633#endif /* CONFIG_SMP */ 3422#endif /* CONFIG_SMP */
3634 3423
3635static struct irq_chip dmar_msi_type = { 3424static struct irq_chip dmar_msi_type = {
3636 .name = "DMAR_MSI", 3425 .name = "DMAR_MSI",
3637 .unmask = dmar_msi_unmask, 3426 .irq_unmask = dmar_msi_unmask,
3638 .mask = dmar_msi_mask, 3427 .irq_mask = dmar_msi_mask,
3639 .ack = ack_apic_edge, 3428 .irq_ack = ack_apic_edge,
3640#ifdef CONFIG_SMP 3429#ifdef CONFIG_SMP
3641 .set_affinity = dmar_msi_set_affinity, 3430 .irq_set_affinity = dmar_msi_set_affinity,
3642#endif 3431#endif
3643 .retrigger = ioapic_retrigger_irq, 3432 .irq_retrigger = ioapic_retrigger_irq,
3644}; 3433};
3645 3434
3646int arch_setup_dmar_msi(unsigned int irq) 3435int arch_setup_dmar_msi(unsigned int irq)
@@ -3661,26 +3450,24 @@ int arch_setup_dmar_msi(unsigned int irq)
3661#ifdef CONFIG_HPET_TIMER 3450#ifdef CONFIG_HPET_TIMER
3662 3451
3663#ifdef CONFIG_SMP 3452#ifdef CONFIG_SMP
3664static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) 3453static int hpet_msi_set_affinity(struct irq_data *data,
3454 const struct cpumask *mask, bool force)
3665{ 3455{
3666 struct irq_desc *desc = irq_to_desc(irq); 3456 struct irq_cfg *cfg = data->chip_data;
3667 struct irq_cfg *cfg;
3668 struct msi_msg msg; 3457 struct msi_msg msg;
3669 unsigned int dest; 3458 unsigned int dest;
3670 3459
3671 if (set_desc_affinity(desc, mask, &dest)) 3460 if (__ioapic_set_affinity(data, mask, &dest))
3672 return -1; 3461 return -1;
3673 3462
3674 cfg = desc->chip_data; 3463 hpet_msi_read(data->handler_data, &msg);
3675
3676 hpet_msi_read(irq, &msg);
3677 3464
3678 msg.data &= ~MSI_DATA_VECTOR_MASK; 3465 msg.data &= ~MSI_DATA_VECTOR_MASK;
3679 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3466 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3680 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3467 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3681 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3468 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3682 3469
3683 hpet_msi_write(irq, &msg); 3470 hpet_msi_write(data->handler_data, &msg);
3684 3471
3685 return 0; 3472 return 0;
3686} 3473}
@@ -3688,34 +3475,33 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3688#endif /* CONFIG_SMP */ 3475#endif /* CONFIG_SMP */
3689 3476
3690static struct irq_chip ir_hpet_msi_type = { 3477static struct irq_chip ir_hpet_msi_type = {
3691 .name = "IR-HPET_MSI", 3478 .name = "IR-HPET_MSI",
3692 .unmask = hpet_msi_unmask, 3479 .irq_unmask = hpet_msi_unmask,
3693 .mask = hpet_msi_mask, 3480 .irq_mask = hpet_msi_mask,
3694#ifdef CONFIG_INTR_REMAP 3481#ifdef CONFIG_INTR_REMAP
3695 .ack = ir_ack_apic_edge, 3482 .irq_ack = ir_ack_apic_edge,
3696#ifdef CONFIG_SMP 3483#ifdef CONFIG_SMP
3697 .set_affinity = ir_set_msi_irq_affinity, 3484 .irq_set_affinity = ir_msi_set_affinity,
3698#endif 3485#endif
3699#endif 3486#endif
3700 .retrigger = ioapic_retrigger_irq, 3487 .irq_retrigger = ioapic_retrigger_irq,
3701}; 3488};
3702 3489
3703static struct irq_chip hpet_msi_type = { 3490static struct irq_chip hpet_msi_type = {
3704 .name = "HPET_MSI", 3491 .name = "HPET_MSI",
3705 .unmask = hpet_msi_unmask, 3492 .irq_unmask = hpet_msi_unmask,
3706 .mask = hpet_msi_mask, 3493 .irq_mask = hpet_msi_mask,
3707 .ack = ack_apic_edge, 3494 .irq_ack = ack_apic_edge,
3708#ifdef CONFIG_SMP 3495#ifdef CONFIG_SMP
3709 .set_affinity = hpet_msi_set_affinity, 3496 .irq_set_affinity = hpet_msi_set_affinity,
3710#endif 3497#endif
3711 .retrigger = ioapic_retrigger_irq, 3498 .irq_retrigger = ioapic_retrigger_irq,
3712}; 3499};
3713 3500
3714int arch_setup_hpet_msi(unsigned int irq, unsigned int id) 3501int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3715{ 3502{
3716 int ret;
3717 struct msi_msg msg; 3503 struct msi_msg msg;
3718 struct irq_desc *desc = irq_to_desc(irq); 3504 int ret;
3719 3505
3720 if (intr_remapping_enabled) { 3506 if (intr_remapping_enabled) {
3721 struct intel_iommu *iommu = map_hpet_to_ir(id); 3507 struct intel_iommu *iommu = map_hpet_to_ir(id);
@@ -3733,9 +3519,9 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3733 if (ret < 0) 3519 if (ret < 0)
3734 return ret; 3520 return ret;
3735 3521
3736 hpet_msi_write(irq, &msg); 3522 hpet_msi_write(get_irq_data(irq), &msg);
3737 desc->status |= IRQ_MOVE_PCNTXT; 3523 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3738 if (irq_remapped(irq)) 3524 if (irq_remapped(get_irq_chip_data(irq)))
3739 set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, 3525 set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
3740 handle_edge_irq, "edge"); 3526 handle_edge_irq, "edge");
3741 else 3527 else
@@ -3768,33 +3554,30 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3768 write_ht_irq_msg(irq, &msg); 3554 write_ht_irq_msg(irq, &msg);
3769} 3555}
3770 3556
3771static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) 3557static int
3558ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
3772{ 3559{
3773 struct irq_desc *desc = irq_to_desc(irq); 3560 struct irq_cfg *cfg = data->chip_data;
3774 struct irq_cfg *cfg;
3775 unsigned int dest; 3561 unsigned int dest;
3776 3562
3777 if (set_desc_affinity(desc, mask, &dest)) 3563 if (__ioapic_set_affinity(data, mask, &dest))
3778 return -1; 3564 return -1;
3779 3565
3780 cfg = desc->chip_data; 3566 target_ht_irq(data->irq, dest, cfg->vector);
3781
3782 target_ht_irq(irq, dest, cfg->vector);
3783
3784 return 0; 3567 return 0;
3785} 3568}
3786 3569
3787#endif 3570#endif
3788 3571
3789static struct irq_chip ht_irq_chip = { 3572static struct irq_chip ht_irq_chip = {
3790 .name = "PCI-HT", 3573 .name = "PCI-HT",
3791 .mask = mask_ht_irq, 3574 .irq_mask = mask_ht_irq,
3792 .unmask = unmask_ht_irq, 3575 .irq_unmask = unmask_ht_irq,
3793 .ack = ack_apic_edge, 3576 .irq_ack = ack_apic_edge,
3794#ifdef CONFIG_SMP 3577#ifdef CONFIG_SMP
3795 .set_affinity = set_ht_irq_affinity, 3578 .irq_set_affinity = ht_set_affinity,
3796#endif 3579#endif
3797 .retrigger = ioapic_retrigger_irq, 3580 .irq_retrigger = ioapic_retrigger_irq,
3798}; 3581};
3799 3582
3800int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) 3583int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
@@ -3885,14 +3668,13 @@ int __init arch_probe_nr_irqs(void)
3885 if (nr < nr_irqs) 3668 if (nr < nr_irqs)
3886 nr_irqs = nr; 3669 nr_irqs = nr;
3887 3670
3888 return 0; 3671 return NR_IRQS_LEGACY;
3889} 3672}
3890#endif 3673#endif
3891 3674
3892static int __io_apic_set_pci_routing(struct device *dev, int irq, 3675static int __io_apic_set_pci_routing(struct device *dev, int irq,
3893 struct io_apic_irq_attr *irq_attr) 3676 struct io_apic_irq_attr *irq_attr)
3894{ 3677{
3895 struct irq_desc *desc;
3896 struct irq_cfg *cfg; 3678 struct irq_cfg *cfg;
3897 int node; 3679 int node;
3898 int ioapic, pin; 3680 int ioapic, pin;
@@ -3908,13 +3690,11 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
3908 if (dev) 3690 if (dev)
3909 node = dev_to_node(dev); 3691 node = dev_to_node(dev);
3910 else 3692 else
3911 node = cpu_to_node(boot_cpu_id); 3693 node = cpu_to_node(0);
3912 3694
3913 desc = irq_to_desc_alloc_node(irq, node); 3695 cfg = alloc_irq_and_cfg_at(irq, node);
3914 if (!desc) { 3696 if (!cfg)
3915 printk(KERN_INFO "can not get irq_desc %d\n", irq);
3916 return 0; 3697 return 0;
3917 }
3918 3698
3919 pin = irq_attr->ioapic_pin; 3699 pin = irq_attr->ioapic_pin;
3920 trigger = irq_attr->trigger; 3700 trigger = irq_attr->trigger;
@@ -3924,15 +3704,14 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
3924 * IRQs < 16 are already in the irq_2_pin[] map 3704 * IRQs < 16 are already in the irq_2_pin[] map
3925 */ 3705 */
3926 if (irq >= legacy_pic->nr_legacy_irqs) { 3706 if (irq >= legacy_pic->nr_legacy_irqs) {
3927 cfg = desc->chip_data; 3707 if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) {
3928 if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
3929 printk(KERN_INFO "can not add pin %d for irq %d\n", 3708 printk(KERN_INFO "can not add pin %d for irq %d\n",
3930 pin, irq); 3709 pin, irq);
3931 return 0; 3710 return 0;
3932 } 3711 }
3933 } 3712 }
3934 3713
3935 setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); 3714 setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity);
3936 3715
3937 return 0; 3716 return 0;
3938} 3717}
@@ -4125,14 +3904,14 @@ void __init setup_ioapic_dest(void)
4125 */ 3904 */
4126 if (desc->status & 3905 if (desc->status &
4127 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 3906 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
4128 mask = desc->affinity; 3907 mask = desc->irq_data.affinity;
4129 else 3908 else
4130 mask = apic->target_cpus(); 3909 mask = apic->target_cpus();
4131 3910
4132 if (intr_remapping_enabled) 3911 if (intr_remapping_enabled)
4133 set_ir_ioapic_affinity_irq_desc(desc, mask); 3912 ir_ioapic_set_affinity(&desc->irq_data, mask, false);
4134 else 3913 else
4135 set_ioapic_affinity_irq_desc(desc, mask); 3914 ioapic_set_affinity(&desc->irq_data, mask, false);
4136 } 3915 }
4137 3916
4138} 3917}
@@ -4316,19 +4095,18 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
4316void __init pre_init_apic_IRQ0(void) 4095void __init pre_init_apic_IRQ0(void)
4317{ 4096{
4318 struct irq_cfg *cfg; 4097 struct irq_cfg *cfg;
4319 struct irq_desc *desc;
4320 4098
4321 printk(KERN_INFO "Early APIC setup for system timer0\n"); 4099 printk(KERN_INFO "Early APIC setup for system timer0\n");
4322#ifndef CONFIG_SMP 4100#ifndef CONFIG_SMP
4323 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); 4101 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
4324#endif 4102#endif
4325 desc = irq_to_desc_alloc_node(0, 0); 4103 /* Make sure the irq descriptor is set up */
4104 cfg = alloc_irq_and_cfg_at(0, 0);
4326 4105
4327 setup_local_APIC(); 4106 setup_local_APIC();
4328 4107
4329 cfg = irq_cfg(0);
4330 add_pin_to_irq_node(cfg, 0, 0, 0); 4108 add_pin_to_irq_node(cfg, 0, 0, 0);
4331 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); 4109 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
4332 4110
4333 setup_IO_APIC_irq(0, 0, 0, desc, 0, 0); 4111 setup_ioapic_irq(0, 0, 0, cfg, 0, 0);
4334} 4112}
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index a43f71cb30f8..c90041ccb742 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -178,7 +178,7 @@ int __init check_nmi_watchdog(void)
178error: 178error:
179 if (nmi_watchdog == NMI_IO_APIC) { 179 if (nmi_watchdog == NMI_IO_APIC) {
180 if (!timer_through_8259) 180 if (!timer_through_8259)
181 legacy_pic->chip->mask(0); 181 legacy_pic->mask(0);
182 on_each_cpu(__acpi_nmi_disable, NULL, 1); 182 on_each_cpu(__acpi_nmi_disable, NULL, 1);
183 } 183 }
184 184
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 3e28401f161c..960f26ab5c9f 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -26,6 +26,7 @@
26#include <linux/nodemask.h> 26#include <linux/nodemask.h>
27#include <linux/topology.h> 27#include <linux/topology.h>
28#include <linux/bootmem.h> 28#include <linux/bootmem.h>
29#include <linux/memblock.h>
29#include <linux/threads.h> 30#include <linux/threads.h>
30#include <linux/cpumask.h> 31#include <linux/cpumask.h>
31#include <linux/kernel.h> 32#include <linux/kernel.h>
@@ -88,7 +89,7 @@ static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
88 node_end_pfn[node] = 89 node_end_pfn[node] =
89 MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); 90 MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
90 91
91 e820_register_active_regions(node, node_start_pfn[node], 92 memblock_x86_register_active_regions(node, node_start_pfn[node],
92 node_end_pfn[node]); 93 node_end_pfn[node]);
93 94
94 memory_present(node, node_start_pfn[node], node_end_pfn[node]); 95 memory_present(node, node_start_pfn[node], node_end_pfn[node]);
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 83e9be4778e2..f9e4e6a54073 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -54,6 +54,9 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
54 */ 54 */
55void __init default_setup_apic_routing(void) 55void __init default_setup_apic_routing(void)
56{ 56{
57
58 enable_IR_x2apic();
59
57#ifdef CONFIG_X86_X2APIC 60#ifdef CONFIG_X86_X2APIC
58 if (x2apic_mode 61 if (x2apic_mode
59#ifdef CONFIG_X86_UV 62#ifdef CONFIG_X86_UV
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 4c9c67bf09b7..fbbc4dadecc4 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1926,6 +1926,7 @@ static const struct file_operations apm_bios_fops = {
1926 .unlocked_ioctl = do_ioctl, 1926 .unlocked_ioctl = do_ioctl,
1927 .open = do_open, 1927 .open = do_open,
1928 .release = do_release, 1928 .release = do_release,
1929 .llseek = noop_llseek,
1929}; 1930};
1930 1931
1931static struct miscdevice apm_device = { 1932static struct miscdevice apm_device = {
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index fc999e6fc46a..13a389179514 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -2,7 +2,8 @@
2#include <linux/sched.h> 2#include <linux/sched.h>
3#include <linux/kthread.h> 3#include <linux/kthread.h>
4#include <linux/workqueue.h> 4#include <linux/workqueue.h>
5#include <asm/e820.h> 5#include <linux/memblock.h>
6
6#include <asm/proto.h> 7#include <asm/proto.h>
7 8
8/* 9/*
@@ -18,10 +19,12 @@ static int __read_mostly memory_corruption_check = -1;
18static unsigned __read_mostly corruption_check_size = 64*1024; 19static unsigned __read_mostly corruption_check_size = 64*1024;
19static unsigned __read_mostly corruption_check_period = 60; /* seconds */ 20static unsigned __read_mostly corruption_check_period = 60; /* seconds */
20 21
21static struct e820entry scan_areas[MAX_SCAN_AREAS]; 22static struct scan_area {
23 u64 addr;
24 u64 size;
25} scan_areas[MAX_SCAN_AREAS];
22static int num_scan_areas; 26static int num_scan_areas;
23 27
24
25static __init int set_corruption_check(char *arg) 28static __init int set_corruption_check(char *arg)
26{ 29{
27 char *end; 30 char *end;
@@ -81,9 +84,9 @@ void __init setup_bios_corruption_check(void)
81 84
82 while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { 85 while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
83 u64 size; 86 u64 size;
84 addr = find_e820_area_size(addr, &size, PAGE_SIZE); 87 addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE);
85 88
86 if (!(addr + 1)) 89 if (addr == MEMBLOCK_ERROR)
87 break; 90 break;
88 91
89 if (addr >= corruption_check_size) 92 if (addr >= corruption_check_size)
@@ -92,7 +95,7 @@ void __init setup_bios_corruption_check(void)
92 if ((addr + size) > corruption_check_size) 95 if ((addr + size) > corruption_check_size)
93 size = corruption_check_size - addr; 96 size = corruption_check_size - addr;
94 97
95 e820_update_range(addr, size, E820_RAM, E820_RESERVED); 98 memblock_x86_reserve_range(addr, addr + size, "SCAN RAM");
96 scan_areas[num_scan_areas].addr = addr; 99 scan_areas[num_scan_areas].addr = addr;
97 scan_areas[num_scan_areas].size = size; 100 scan_areas[num_scan_areas].size = size;
98 num_scan_areas++; 101 num_scan_areas++;
@@ -105,7 +108,6 @@ void __init setup_bios_corruption_check(void)
105 108
106 printk(KERN_INFO "Scanning %d areas for low memory corruption\n", 109 printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
107 num_scan_areas); 110 num_scan_areas);
108 update_e820();
109} 111}
110 112
111 113
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index ba5f62f45f01..9e093f8fe78c 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -148,7 +148,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
148{ 148{
149#ifdef CONFIG_SMP 149#ifdef CONFIG_SMP
150 /* calling is from identify_secondary_cpu() ? */ 150 /* calling is from identify_secondary_cpu() ? */
151 if (c->cpu_index == boot_cpu_id) 151 if (!c->cpu_index)
152 return; 152 return;
153 153
154 /* 154 /*
@@ -253,37 +253,51 @@ static int __cpuinit nearby_node(int apicid)
253#endif 253#endif
254 254
255/* 255/*
256 * Fixup core topology information for AMD multi-node processors. 256 * Fixup core topology information for
257 * Assumption: Number of cores in each internal node is the same. 257 * (1) AMD multi-node processors
258 * Assumption: Number of cores in each internal node is the same.
259 * (2) AMD processors supporting compute units
258 */ 260 */
259#ifdef CONFIG_X86_HT 261#ifdef CONFIG_X86_HT
260static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) 262static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
261{ 263{
262 unsigned long long value; 264 u32 nodes;
263 u32 nodes, cores_per_node; 265 u8 node_id;
264 int cpu = smp_processor_id(); 266 int cpu = smp_processor_id();
265 267
266 if (!cpu_has(c, X86_FEATURE_NODEID_MSR)) 268 /* get information required for multi-node processors */
267 return; 269 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
270 u32 eax, ebx, ecx, edx;
268 271
269 /* fixup topology information only once for a core */ 272 cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
270 if (cpu_has(c, X86_FEATURE_AMD_DCM)) 273 nodes = ((ecx >> 8) & 7) + 1;
271 return; 274 node_id = ecx & 7;
272 275
273 rdmsrl(MSR_FAM10H_NODE_ID, value); 276 /* get compute unit information */
277 smp_num_siblings = ((ebx >> 8) & 3) + 1;
278 c->compute_unit_id = ebx & 0xff;
279 } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
280 u64 value;
274 281
275 nodes = ((value >> 3) & 7) + 1; 282 rdmsrl(MSR_FAM10H_NODE_ID, value);
276 if (nodes == 1) 283 nodes = ((value >> 3) & 7) + 1;
284 node_id = value & 7;
285 } else
277 return; 286 return;
278 287
279 set_cpu_cap(c, X86_FEATURE_AMD_DCM); 288 /* fixup multi-node processor information */
280 cores_per_node = c->x86_max_cores / nodes; 289 if (nodes > 1) {
290 u32 cores_per_node;
291
292 set_cpu_cap(c, X86_FEATURE_AMD_DCM);
293 cores_per_node = c->x86_max_cores / nodes;
281 294
282 /* store NodeID, use llc_shared_map to store sibling info */ 295 /* store NodeID, use llc_shared_map to store sibling info */
283 per_cpu(cpu_llc_id, cpu) = value & 7; 296 per_cpu(cpu_llc_id, cpu) = node_id;
284 297
285 /* fixup core id to be in range from 0 to (cores_per_node - 1) */ 298 /* core id to be in range from 0 to (cores_per_node - 1) */
286 c->cpu_core_id = c->cpu_core_id % cores_per_node; 299 c->cpu_core_id = c->cpu_core_id % cores_per_node;
300 }
287} 301}
288#endif 302#endif
289 303
@@ -304,9 +318,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
304 c->phys_proc_id = c->initial_apicid >> bits; 318 c->phys_proc_id = c->initial_apicid >> bits;
305 /* use socket ID also for last level cache */ 319 /* use socket ID also for last level cache */
306 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; 320 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
307 /* fixup topology information on multi-node processors */ 321 amd_get_topology(c);
308 if ((c->x86 == 0x10) && (c->x86_model == 9))
309 amd_fixup_dcm(c);
310#endif 322#endif
311} 323}
312 324
@@ -412,6 +424,23 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
412 set_cpu_cap(c, X86_FEATURE_EXTD_APICID); 424 set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
413 } 425 }
414#endif 426#endif
427
428 /* We need to do the following only once */
429 if (c != &boot_cpu_data)
430 return;
431
432 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
433
434 if (c->x86 > 0x10 ||
435 (c->x86 == 0x10 && c->x86_model >= 0x2)) {
436 u64 val;
437
438 rdmsrl(MSR_K7_HWCR, val);
439 if (!(val & BIT(24)))
440 printk(KERN_WARNING FW_BUG "TSC doesn't count "
441 "with P0 frequency!\n");
442 }
443 }
415} 444}
416 445
417static void __cpuinit init_amd(struct cpuinfo_x86 *c) 446static void __cpuinit init_amd(struct cpuinfo_x86 *c)
@@ -523,7 +552,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
523#endif 552#endif
524 553
525 if (c->extended_cpuid_level >= 0x80000006) { 554 if (c->extended_cpuid_level >= 0x80000006) {
526 if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) 555 if (cpuid_edx(0x80000006) & 0xf000)
527 num_cache_leaves = 4; 556 num_cache_leaves = 4;
528 else 557 else
529 num_cache_leaves = 3; 558 num_cache_leaves = 3;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f2f9ac7da25c..4b68bda30938 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -665,7 +665,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
665 this_cpu->c_early_init(c); 665 this_cpu->c_early_init(c);
666 666
667#ifdef CONFIG_SMP 667#ifdef CONFIG_SMP
668 c->cpu_index = boot_cpu_id; 668 c->cpu_index = 0;
669#endif 669#endif
670 filter_cpuid_features(c, false); 670 filter_cpuid_features(c, false);
671} 671}
@@ -704,16 +704,21 @@ void __init early_cpu_init(void)
704} 704}
705 705
706/* 706/*
707 * The NOPL instruction is supposed to exist on all CPUs with 707 * The NOPL instruction is supposed to exist on all CPUs of family >= 6;
708 * family >= 6; unfortunately, that's not true in practice because 708 * unfortunately, that's not true in practice because of early VIA
709 * of early VIA chips and (more importantly) broken virtualizers that 709 * chips and (more importantly) broken virtualizers that are not easy
710 * are not easy to detect. In the latter case it doesn't even *fail* 710 * to detect. In the latter case it doesn't even *fail* reliably, so
711 * reliably, so probing for it doesn't even work. Disable it completely 711 * probing for it doesn't even work. Disable it completely on 32-bit
712 * unless we can find a reliable way to detect all the broken cases. 712 * unless we can find a reliable way to detect all the broken cases.
713 * Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
713 */ 714 */
714static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) 715static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
715{ 716{
717#ifdef CONFIG_X86_32
716 clear_cpu_cap(c, X86_FEATURE_NOPL); 718 clear_cpu_cap(c, X86_FEATURE_NOPL);
719#else
720 set_cpu_cap(c, X86_FEATURE_NOPL);
721#endif
717} 722}
718 723
719static void __cpuinit generic_identify(struct cpuinfo_x86 *c) 724static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
@@ -1264,13 +1269,6 @@ void __cpuinit cpu_init(void)
1264 clear_all_debug_regs(); 1269 clear_all_debug_regs();
1265 dbg_restore_debug_regs(); 1270 dbg_restore_debug_regs();
1266 1271
1267 /*
1268 * Force FPU initialization:
1269 */
1270 current_thread_info()->status = 0;
1271 clear_used_math();
1272 mxcsr_feature_mask_init();
1273
1274 fpu_init(); 1272 fpu_init();
1275 xsave_init(); 1273 xsave_init();
1276} 1274}
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index f668bb1f7d43..e765633f210e 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -32,6 +32,7 @@ struct cpu_dev {
32extern const struct cpu_dev *const __x86_cpu_dev_start[], 32extern const struct cpu_dev *const __x86_cpu_dev_start[],
33 *const __x86_cpu_dev_end[]; 33 *const __x86_cpu_dev_end[];
34 34
35extern void get_cpu_cap(struct cpuinfo_x86 *c);
35extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); 36extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
36extern void get_cpu_cap(struct cpuinfo_x86 *c); 37extern void get_cpu_cap(struct cpuinfo_x86 *c);
37 38
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b4389441efbb..695f17731e23 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -170,7 +170,7 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
170{ 170{
171#ifdef CONFIG_SMP 171#ifdef CONFIG_SMP
172 /* calling is from identify_secondary_cpu() ? */ 172 /* calling is from identify_secondary_cpu() ? */
173 if (c->cpu_index == boot_cpu_id) 173 if (!c->cpu_index)
174 return; 174 return;
175 175
176 /* 176 /*
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 898c2f4eab88..12cd823c8d03 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -17,7 +17,7 @@
17 17
18#include <asm/processor.h> 18#include <asm/processor.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <asm/k8.h> 20#include <asm/amd_nb.h>
21#include <asm/smp.h> 21#include <asm/smp.h>
22 22
23#define LVL_1_INST 1 23#define LVL_1_INST 1
@@ -306,7 +306,7 @@ struct _cache_attr {
306 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); 306 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
307}; 307};
308 308
309#ifdef CONFIG_CPU_SUP_AMD 309#ifdef CONFIG_AMD_NB
310 310
311/* 311/*
312 * L3 cache descriptors 312 * L3 cache descriptors
@@ -369,7 +369,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
369 return; 369 return;
370 370
371 /* not in virtualized environments */ 371 /* not in virtualized environments */
372 if (num_k8_northbridges == 0) 372 if (k8_northbridges.num == 0)
373 return; 373 return;
374 374
375 /* 375 /*
@@ -377,7 +377,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
377 * never freed but this is done only on shutdown so it doesn't matter. 377 * never freed but this is done only on shutdown so it doesn't matter.
378 */ 378 */
379 if (!l3_caches) { 379 if (!l3_caches) {
380 int size = num_k8_northbridges * sizeof(struct amd_l3_cache *); 380 int size = k8_northbridges.num * sizeof(struct amd_l3_cache *);
381 381
382 l3_caches = kzalloc(size, GFP_ATOMIC); 382 l3_caches = kzalloc(size, GFP_ATOMIC);
383 if (!l3_caches) 383 if (!l3_caches)
@@ -556,12 +556,12 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
556static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, 556static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
557 show_cache_disable_1, store_cache_disable_1); 557 show_cache_disable_1, store_cache_disable_1);
558 558
559#else /* CONFIG_CPU_SUP_AMD */ 559#else /* CONFIG_AMD_NB */
560static void __cpuinit 560static void __cpuinit
561amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index) 561amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
562{ 562{
563}; 563};
564#endif /* CONFIG_CPU_SUP_AMD */ 564#endif /* CONFIG_AMD_NB */
565 565
566static int 566static int
567__cpuinit cpuid4_cache_lookup_regs(int index, 567__cpuinit cpuid4_cache_lookup_regs(int index,
@@ -1000,7 +1000,7 @@ static struct attribute *default_attrs[] = {
1000 1000
1001static struct attribute *default_l3_attrs[] = { 1001static struct attribute *default_l3_attrs[] = {
1002 DEFAULT_SYSFS_CACHE_ATTRS, 1002 DEFAULT_SYSFS_CACHE_ATTRS,
1003#ifdef CONFIG_CPU_SUP_AMD 1003#ifdef CONFIG_AMD_NB
1004 &cache_disable_0.attr, 1004 &cache_disable_0.attr,
1005 &cache_disable_1.attr, 1005 &cache_disable_1.attr,
1006#endif 1006#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 8a85dd1b1aa1..1e8d66c1336a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -192,6 +192,7 @@ static const struct file_operations severities_coverage_fops = {
192 .release = seq_release, 192 .release = seq_release,
193 .read = seq_read, 193 .read = seq_read,
194 .write = severities_coverage_write, 194 .write = severities_coverage_write,
195 .llseek = seq_lseek,
195}; 196};
196 197
197static int __init severities_debugfs_init(void) 198static int __init severities_debugfs_init(void)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ed41562909fe..7a35b72d7c03 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1665,6 +1665,7 @@ struct file_operations mce_chrdev_ops = {
1665 .read = mce_read, 1665 .read = mce_read,
1666 .poll = mce_poll, 1666 .poll = mce_poll,
1667 .unlocked_ioctl = mce_ioctl, 1667 .unlocked_ioctl = mce_ioctl,
1668 .llseek = no_llseek,
1668}; 1669};
1669EXPORT_SYMBOL_GPL(mce_chrdev_ops); 1670EXPORT_SYMBOL_GPL(mce_chrdev_ops);
1670 1671
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 39aaee5c1ab2..80c482382d5c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -131,7 +131,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
131 u32 low = 0, high = 0, address = 0; 131 u32 low = 0, high = 0, address = 0;
132 unsigned int bank, block; 132 unsigned int bank, block;
133 struct thresh_restart tr; 133 struct thresh_restart tr;
134 u8 lvt_off; 134 int lvt_off = -1;
135 u8 offset;
135 136
136 for (bank = 0; bank < NR_BANKS; ++bank) { 137 for (bank = 0; bank < NR_BANKS; ++bank) {
137 for (block = 0; block < NR_BLOCKS; ++block) { 138 for (block = 0; block < NR_BLOCKS; ++block) {
@@ -162,8 +163,28 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
162 if (shared_bank[bank] && c->cpu_core_id) 163 if (shared_bank[bank] && c->cpu_core_id)
163 break; 164 break;
164#endif 165#endif
165 lvt_off = setup_APIC_eilvt_mce(THRESHOLD_APIC_VECTOR, 166 offset = (high & MASK_LVTOFF_HI) >> 20;
166 APIC_EILVT_MSG_FIX, 0); 167 if (lvt_off < 0) {
168 if (setup_APIC_eilvt(offset,
169 THRESHOLD_APIC_VECTOR,
170 APIC_EILVT_MSG_FIX, 0)) {
171 pr_err(FW_BUG "cpu %d, failed to "
172 "setup threshold interrupt "
173 "for bank %d, block %d "
174 "(MSR%08X=0x%x%08x)",
175 smp_processor_id(), bank, block,
176 address, high, low);
177 continue;
178 }
179 lvt_off = offset;
180 } else if (lvt_off != offset) {
181 pr_err(FW_BUG "cpu %d, invalid threshold "
182 "interrupt offset %d for bank %d,"
183 "block %d (MSR%08X=0x%x%08x)",
184 smp_processor_id(), lvt_off, bank,
185 block, address, high, low);
186 continue;
187 }
167 188
168 high &= ~MASK_LVTOFF_HI; 189 high &= ~MASK_LVTOFF_HI;
169 high |= lvt_off << 20; 190 high |= lvt_off << 20;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 169d8804a9f8..4b683267eca5 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -350,7 +350,7 @@ static void intel_thermal_interrupt(void)
350 350
351static void unexpected_thermal_interrupt(void) 351static void unexpected_thermal_interrupt(void)
352{ 352{
353 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", 353 printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
354 smp_processor_id()); 354 smp_processor_id());
355 add_taint(TAINT_MACHINE_CHECK); 355 add_taint(TAINT_MACHINE_CHECK);
356} 356}
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index c5f59d071425..ac140c7be396 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -827,7 +827,7 @@ int __init amd_special_default_mtrr(void)
827 827
828 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 828 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
829 return 0; 829 return 0;
830 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) 830 if (boot_cpu_data.x86 < 0xf)
831 return 0; 831 return 0;
832 /* In case some hypervisor doesn't pass SYSCFG through: */ 832 /* In case some hypervisor doesn't pass SYSCFG through: */
833 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) 833 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 7d28d7d03885..9f27228ceffd 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -64,18 +64,59 @@ static inline void k8_check_syscfg_dram_mod_en(void)
64 } 64 }
65} 65}
66 66
67/* Get the size of contiguous MTRR range */
68static u64 get_mtrr_size(u64 mask)
69{
70 u64 size;
71
72 mask >>= PAGE_SHIFT;
73 mask |= size_or_mask;
74 size = -mask;
75 size <<= PAGE_SHIFT;
76 return size;
77}
78
67/* 79/*
68 * Returns the effective MTRR type for the region 80 * Check and return the effective type for MTRR-MTRR type overlap.
69 * Error returns: 81 * Returns 1 if the effective type is UNCACHEABLE, else returns 0
70 * - 0xFE - when the range is "not entirely covered" by _any_ var range MTRR
71 * - 0xFF - when MTRR is not enabled
72 */ 82 */
73u8 mtrr_type_lookup(u64 start, u64 end) 83static int check_type_overlap(u8 *prev, u8 *curr)
84{
85 if (*prev == MTRR_TYPE_UNCACHABLE || *curr == MTRR_TYPE_UNCACHABLE) {
86 *prev = MTRR_TYPE_UNCACHABLE;
87 *curr = MTRR_TYPE_UNCACHABLE;
88 return 1;
89 }
90
91 if ((*prev == MTRR_TYPE_WRBACK && *curr == MTRR_TYPE_WRTHROUGH) ||
92 (*prev == MTRR_TYPE_WRTHROUGH && *curr == MTRR_TYPE_WRBACK)) {
93 *prev = MTRR_TYPE_WRTHROUGH;
94 *curr = MTRR_TYPE_WRTHROUGH;
95 }
96
97 if (*prev != *curr) {
98 *prev = MTRR_TYPE_UNCACHABLE;
99 *curr = MTRR_TYPE_UNCACHABLE;
100 return 1;
101 }
102
103 return 0;
104}
105
106/*
107 * Error/Semi-error returns:
108 * 0xFF - when MTRR is not enabled
109 * *repeat == 1 implies [start:end] spanned across MTRR range and type returned
110 * corresponds only to [start:*partial_end].
111 * Caller has to lookup again for [*partial_end:end].
112 */
113static u8 __mtrr_type_lookup(u64 start, u64 end, u64 *partial_end, int *repeat)
74{ 114{
75 int i; 115 int i;
76 u64 base, mask; 116 u64 base, mask;
77 u8 prev_match, curr_match; 117 u8 prev_match, curr_match;
78 118
119 *repeat = 0;
79 if (!mtrr_state_set) 120 if (!mtrr_state_set)
80 return 0xFF; 121 return 0xFF;
81 122
@@ -126,8 +167,34 @@ u8 mtrr_type_lookup(u64 start, u64 end)
126 167
127 start_state = ((start & mask) == (base & mask)); 168 start_state = ((start & mask) == (base & mask));
128 end_state = ((end & mask) == (base & mask)); 169 end_state = ((end & mask) == (base & mask));
129 if (start_state != end_state) 170
130 return 0xFE; 171 if (start_state != end_state) {
172 /*
173 * We have start:end spanning across an MTRR.
174 * We split the region into
175 * either
176 * (start:mtrr_end) (mtrr_end:end)
177 * or
178 * (start:mtrr_start) (mtrr_start:end)
179 * depending on kind of overlap.
180 * Return the type for first region and a pointer to
181 * the start of second region so that caller will
182 * lookup again on the second region.
183 * Note: This way we handle multiple overlaps as well.
184 */
185 if (start_state)
186 *partial_end = base + get_mtrr_size(mask);
187 else
188 *partial_end = base;
189
190 if (unlikely(*partial_end <= start)) {
191 WARN_ON(1);
192 *partial_end = start + PAGE_SIZE;
193 }
194
195 end = *partial_end - 1; /* end is inclusive */
196 *repeat = 1;
197 }
131 198
132 if ((start & mask) != (base & mask)) 199 if ((start & mask) != (base & mask))
133 continue; 200 continue;
@@ -138,21 +205,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
138 continue; 205 continue;
139 } 206 }
140 207
141 if (prev_match == MTRR_TYPE_UNCACHABLE || 208 if (check_type_overlap(&prev_match, &curr_match))
142 curr_match == MTRR_TYPE_UNCACHABLE) { 209 return curr_match;
143 return MTRR_TYPE_UNCACHABLE;
144 }
145
146 if ((prev_match == MTRR_TYPE_WRBACK &&
147 curr_match == MTRR_TYPE_WRTHROUGH) ||
148 (prev_match == MTRR_TYPE_WRTHROUGH &&
149 curr_match == MTRR_TYPE_WRBACK)) {
150 prev_match = MTRR_TYPE_WRTHROUGH;
151 curr_match = MTRR_TYPE_WRTHROUGH;
152 }
153
154 if (prev_match != curr_match)
155 return MTRR_TYPE_UNCACHABLE;
156 } 210 }
157 211
158 if (mtrr_tom2) { 212 if (mtrr_tom2) {
@@ -166,6 +220,36 @@ u8 mtrr_type_lookup(u64 start, u64 end)
166 return mtrr_state.def_type; 220 return mtrr_state.def_type;
167} 221}
168 222
223/*
224 * Returns the effective MTRR type for the region
225 * Error return:
226 * 0xFF - when MTRR is not enabled
227 */
228u8 mtrr_type_lookup(u64 start, u64 end)
229{
230 u8 type, prev_type;
231 int repeat;
232 u64 partial_end;
233
234 type = __mtrr_type_lookup(start, end, &partial_end, &repeat);
235
236 /*
237 * Common path is with repeat = 0.
238 * However, we can have cases where [start:end] spans across some
239 * MTRR range. Do repeated lookups for that case here.
240 */
241 while (repeat) {
242 prev_type = type;
243 start = partial_end;
244 type = __mtrr_type_lookup(start, end, &partial_end, &repeat);
245
246 if (check_type_overlap(&prev_type, &type))
247 return type;
248 }
249
250 return type;
251}
252
169/* Get the MSR pair relating to a var range */ 253/* Get the MSR pair relating to a var range */
170static void 254static void
171get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) 255get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 03a5b0385ad6..fe73c1844a9a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -531,7 +531,7 @@ static int x86_pmu_hw_config(struct perf_event *event)
531/* 531/*
532 * Setup the hardware configuration for a given attr_type 532 * Setup the hardware configuration for a given attr_type
533 */ 533 */
534static int __hw_perf_event_init(struct perf_event *event) 534static int __x86_pmu_event_init(struct perf_event *event)
535{ 535{
536 int err; 536 int err;
537 537
@@ -584,7 +584,7 @@ static void x86_pmu_disable_all(void)
584 } 584 }
585} 585}
586 586
587void hw_perf_disable(void) 587static void x86_pmu_disable(struct pmu *pmu)
588{ 588{
589 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 589 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
590 590
@@ -619,7 +619,7 @@ static void x86_pmu_enable_all(int added)
619 } 619 }
620} 620}
621 621
622static const struct pmu pmu; 622static struct pmu pmu;
623 623
624static inline int is_x86_event(struct perf_event *event) 624static inline int is_x86_event(struct perf_event *event)
625{ 625{
@@ -801,10 +801,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
801 hwc->last_tag == cpuc->tags[i]; 801 hwc->last_tag == cpuc->tags[i];
802} 802}
803 803
804static int x86_pmu_start(struct perf_event *event); 804static void x86_pmu_start(struct perf_event *event, int flags);
805static void x86_pmu_stop(struct perf_event *event); 805static void x86_pmu_stop(struct perf_event *event, int flags);
806 806
807void hw_perf_enable(void) 807static void x86_pmu_enable(struct pmu *pmu)
808{ 808{
809 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 809 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
810 struct perf_event *event; 810 struct perf_event *event;
@@ -840,7 +840,14 @@ void hw_perf_enable(void)
840 match_prev_assignment(hwc, cpuc, i)) 840 match_prev_assignment(hwc, cpuc, i))
841 continue; 841 continue;
842 842
843 x86_pmu_stop(event); 843 /*
844 * Ensure we don't accidentally enable a stopped
845 * counter simply because we rescheduled.
846 */
847 if (hwc->state & PERF_HES_STOPPED)
848 hwc->state |= PERF_HES_ARCH;
849
850 x86_pmu_stop(event, PERF_EF_UPDATE);
844 } 851 }
845 852
846 for (i = 0; i < cpuc->n_events; i++) { 853 for (i = 0; i < cpuc->n_events; i++) {
@@ -852,7 +859,10 @@ void hw_perf_enable(void)
852 else if (i < n_running) 859 else if (i < n_running)
853 continue; 860 continue;
854 861
855 x86_pmu_start(event); 862 if (hwc->state & PERF_HES_ARCH)
863 continue;
864
865 x86_pmu_start(event, PERF_EF_RELOAD);
856 } 866 }
857 cpuc->n_added = 0; 867 cpuc->n_added = 0;
858 perf_events_lapic_init(); 868 perf_events_lapic_init();
@@ -953,15 +963,12 @@ static void x86_pmu_enable_event(struct perf_event *event)
953} 963}
954 964
955/* 965/*
956 * activate a single event 966 * Add a single event to the PMU.
957 * 967 *
958 * The event is added to the group of enabled events 968 * The event is added to the group of enabled events
959 * but only if it can be scehduled with existing events. 969 * but only if it can be scehduled with existing events.
960 *
961 * Called with PMU disabled. If successful and return value 1,
962 * then guaranteed to call perf_enable() and hw_perf_enable()
963 */ 970 */
964static int x86_pmu_enable(struct perf_event *event) 971static int x86_pmu_add(struct perf_event *event, int flags)
965{ 972{
966 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 973 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
967 struct hw_perf_event *hwc; 974 struct hw_perf_event *hwc;
@@ -970,58 +977,67 @@ static int x86_pmu_enable(struct perf_event *event)
970 977
971 hwc = &event->hw; 978 hwc = &event->hw;
972 979
980 perf_pmu_disable(event->pmu);
973 n0 = cpuc->n_events; 981 n0 = cpuc->n_events;
974 n = collect_events(cpuc, event, false); 982 ret = n = collect_events(cpuc, event, false);
975 if (n < 0) 983 if (ret < 0)
976 return n; 984 goto out;
985
986 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
987 if (!(flags & PERF_EF_START))
988 hwc->state |= PERF_HES_ARCH;
977 989
978 /* 990 /*
979 * If group events scheduling transaction was started, 991 * If group events scheduling transaction was started,
980 * skip the schedulability test here, it will be peformed 992 * skip the schedulability test here, it will be peformed
981 * at commit time(->commit_txn) as a whole 993 * at commit time (->commit_txn) as a whole
982 */ 994 */
983 if (cpuc->group_flag & PERF_EVENT_TXN) 995 if (cpuc->group_flag & PERF_EVENT_TXN)
984 goto out; 996 goto done_collect;
985 997
986 ret = x86_pmu.schedule_events(cpuc, n, assign); 998 ret = x86_pmu.schedule_events(cpuc, n, assign);
987 if (ret) 999 if (ret)
988 return ret; 1000 goto out;
989 /* 1001 /*
990 * copy new assignment, now we know it is possible 1002 * copy new assignment, now we know it is possible
991 * will be used by hw_perf_enable() 1003 * will be used by hw_perf_enable()
992 */ 1004 */
993 memcpy(cpuc->assign, assign, n*sizeof(int)); 1005 memcpy(cpuc->assign, assign, n*sizeof(int));
994 1006
995out: 1007done_collect:
996 cpuc->n_events = n; 1008 cpuc->n_events = n;
997 cpuc->n_added += n - n0; 1009 cpuc->n_added += n - n0;
998 cpuc->n_txn += n - n0; 1010 cpuc->n_txn += n - n0;
999 1011
1000 return 0; 1012 ret = 0;
1013out:
1014 perf_pmu_enable(event->pmu);
1015 return ret;
1001} 1016}
1002 1017
1003static int x86_pmu_start(struct perf_event *event) 1018static void x86_pmu_start(struct perf_event *event, int flags)
1004{ 1019{
1005 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1020 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1006 int idx = event->hw.idx; 1021 int idx = event->hw.idx;
1007 1022
1008 if (idx == -1) 1023 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1009 return -EAGAIN; 1024 return;
1025
1026 if (WARN_ON_ONCE(idx == -1))
1027 return;
1028
1029 if (flags & PERF_EF_RELOAD) {
1030 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
1031 x86_perf_event_set_period(event);
1032 }
1033
1034 event->hw.state = 0;
1010 1035
1011 x86_perf_event_set_period(event);
1012 cpuc->events[idx] = event; 1036 cpuc->events[idx] = event;
1013 __set_bit(idx, cpuc->active_mask); 1037 __set_bit(idx, cpuc->active_mask);
1014 __set_bit(idx, cpuc->running); 1038 __set_bit(idx, cpuc->running);
1015 x86_pmu.enable(event); 1039 x86_pmu.enable(event);
1016 perf_event_update_userpage(event); 1040 perf_event_update_userpage(event);
1017
1018 return 0;
1019}
1020
1021static void x86_pmu_unthrottle(struct perf_event *event)
1022{
1023 int ret = x86_pmu_start(event);
1024 WARN_ON_ONCE(ret);
1025} 1041}
1026 1042
1027void perf_event_print_debug(void) 1043void perf_event_print_debug(void)
@@ -1078,27 +1094,29 @@ void perf_event_print_debug(void)
1078 local_irq_restore(flags); 1094 local_irq_restore(flags);
1079} 1095}
1080 1096
1081static void x86_pmu_stop(struct perf_event *event) 1097static void x86_pmu_stop(struct perf_event *event, int flags)
1082{ 1098{
1083 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1099 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1084 struct hw_perf_event *hwc = &event->hw; 1100 struct hw_perf_event *hwc = &event->hw;
1085 int idx = hwc->idx;
1086 1101
1087 if (!__test_and_clear_bit(idx, cpuc->active_mask)) 1102 if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
1088 return; 1103 x86_pmu.disable(event);
1089 1104 cpuc->events[hwc->idx] = NULL;
1090 x86_pmu.disable(event); 1105 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
1091 1106 hwc->state |= PERF_HES_STOPPED;
1092 /* 1107 }
1093 * Drain the remaining delta count out of a event
1094 * that we are disabling:
1095 */
1096 x86_perf_event_update(event);
1097 1108
1098 cpuc->events[idx] = NULL; 1109 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
1110 /*
1111 * Drain the remaining delta count out of a event
1112 * that we are disabling:
1113 */
1114 x86_perf_event_update(event);
1115 hwc->state |= PERF_HES_UPTODATE;
1116 }
1099} 1117}
1100 1118
1101static void x86_pmu_disable(struct perf_event *event) 1119static void x86_pmu_del(struct perf_event *event, int flags)
1102{ 1120{
1103 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1121 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1104 int i; 1122 int i;
@@ -1111,7 +1129,7 @@ static void x86_pmu_disable(struct perf_event *event)
1111 if (cpuc->group_flag & PERF_EVENT_TXN) 1129 if (cpuc->group_flag & PERF_EVENT_TXN)
1112 return; 1130 return;
1113 1131
1114 x86_pmu_stop(event); 1132 x86_pmu_stop(event, PERF_EF_UPDATE);
1115 1133
1116 for (i = 0; i < cpuc->n_events; i++) { 1134 for (i = 0; i < cpuc->n_events; i++) {
1117 if (event == cpuc->event_list[i]) { 1135 if (event == cpuc->event_list[i]) {
@@ -1134,7 +1152,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1134 struct perf_sample_data data; 1152 struct perf_sample_data data;
1135 struct cpu_hw_events *cpuc; 1153 struct cpu_hw_events *cpuc;
1136 struct perf_event *event; 1154 struct perf_event *event;
1137 struct hw_perf_event *hwc;
1138 int idx, handled = 0; 1155 int idx, handled = 0;
1139 u64 val; 1156 u64 val;
1140 1157
@@ -1155,7 +1172,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1155 } 1172 }
1156 1173
1157 event = cpuc->events[idx]; 1174 event = cpuc->events[idx];
1158 hwc = &event->hw;
1159 1175
1160 val = x86_perf_event_update(event); 1176 val = x86_perf_event_update(event);
1161 if (val & (1ULL << (x86_pmu.cntval_bits - 1))) 1177 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
@@ -1171,7 +1187,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1171 continue; 1187 continue;
1172 1188
1173 if (perf_event_overflow(event, 1, &data, regs)) 1189 if (perf_event_overflow(event, 1, &data, regs))
1174 x86_pmu_stop(event); 1190 x86_pmu_stop(event, 0);
1175 } 1191 }
1176 1192
1177 if (handled) 1193 if (handled)
@@ -1180,25 +1196,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1180 return handled; 1196 return handled;
1181} 1197}
1182 1198
1183void smp_perf_pending_interrupt(struct pt_regs *regs)
1184{
1185 irq_enter();
1186 ack_APIC_irq();
1187 inc_irq_stat(apic_pending_irqs);
1188 perf_event_do_pending();
1189 irq_exit();
1190}
1191
1192void set_perf_event_pending(void)
1193{
1194#ifdef CONFIG_X86_LOCAL_APIC
1195 if (!x86_pmu.apic || !x86_pmu_initialized())
1196 return;
1197
1198 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1199#endif
1200}
1201
1202void perf_events_lapic_init(void) 1199void perf_events_lapic_init(void)
1203{ 1200{
1204 if (!x86_pmu.apic || !x86_pmu_initialized()) 1201 if (!x86_pmu.apic || !x86_pmu_initialized())
@@ -1388,7 +1385,6 @@ void __init init_hw_perf_events(void)
1388 x86_pmu.num_counters = X86_PMC_MAX_GENERIC; 1385 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1389 } 1386 }
1390 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; 1387 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1391 perf_max_events = x86_pmu.num_counters;
1392 1388
1393 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { 1389 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1394 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 1390 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
@@ -1424,6 +1420,7 @@ void __init init_hw_perf_events(void)
1424 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); 1420 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1425 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); 1421 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1426 1422
1423 perf_pmu_register(&pmu);
1427 perf_cpu_notifier(x86_pmu_notifier); 1424 perf_cpu_notifier(x86_pmu_notifier);
1428} 1425}
1429 1426
@@ -1437,10 +1434,11 @@ static inline void x86_pmu_read(struct perf_event *event)
1437 * Set the flag to make pmu::enable() not perform the 1434 * Set the flag to make pmu::enable() not perform the
1438 * schedulability test, it will be performed at commit time 1435 * schedulability test, it will be performed at commit time
1439 */ 1436 */
1440static void x86_pmu_start_txn(const struct pmu *pmu) 1437static void x86_pmu_start_txn(struct pmu *pmu)
1441{ 1438{
1442 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1439 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1443 1440
1441 perf_pmu_disable(pmu);
1444 cpuc->group_flag |= PERF_EVENT_TXN; 1442 cpuc->group_flag |= PERF_EVENT_TXN;
1445 cpuc->n_txn = 0; 1443 cpuc->n_txn = 0;
1446} 1444}
@@ -1450,7 +1448,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
1450 * Clear the flag and pmu::enable() will perform the 1448 * Clear the flag and pmu::enable() will perform the
1451 * schedulability test. 1449 * schedulability test.
1452 */ 1450 */
1453static void x86_pmu_cancel_txn(const struct pmu *pmu) 1451static void x86_pmu_cancel_txn(struct pmu *pmu)
1454{ 1452{
1455 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1453 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1456 1454
@@ -1460,6 +1458,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1460 */ 1458 */
1461 cpuc->n_added -= cpuc->n_txn; 1459 cpuc->n_added -= cpuc->n_txn;
1462 cpuc->n_events -= cpuc->n_txn; 1460 cpuc->n_events -= cpuc->n_txn;
1461 perf_pmu_enable(pmu);
1463} 1462}
1464 1463
1465/* 1464/*
@@ -1467,7 +1466,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1467 * Perform the group schedulability test as a whole 1466 * Perform the group schedulability test as a whole
1468 * Return 0 if success 1467 * Return 0 if success
1469 */ 1468 */
1470static int x86_pmu_commit_txn(const struct pmu *pmu) 1469static int x86_pmu_commit_txn(struct pmu *pmu)
1471{ 1470{
1472 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1471 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1473 int assign[X86_PMC_IDX_MAX]; 1472 int assign[X86_PMC_IDX_MAX];
@@ -1489,22 +1488,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
1489 memcpy(cpuc->assign, assign, n*sizeof(int)); 1488 memcpy(cpuc->assign, assign, n*sizeof(int));
1490 1489
1491 cpuc->group_flag &= ~PERF_EVENT_TXN; 1490 cpuc->group_flag &= ~PERF_EVENT_TXN;
1492 1491 perf_pmu_enable(pmu);
1493 return 0; 1492 return 0;
1494} 1493}
1495 1494
1496static const struct pmu pmu = {
1497 .enable = x86_pmu_enable,
1498 .disable = x86_pmu_disable,
1499 .start = x86_pmu_start,
1500 .stop = x86_pmu_stop,
1501 .read = x86_pmu_read,
1502 .unthrottle = x86_pmu_unthrottle,
1503 .start_txn = x86_pmu_start_txn,
1504 .cancel_txn = x86_pmu_cancel_txn,
1505 .commit_txn = x86_pmu_commit_txn,
1506};
1507
1508/* 1495/*
1509 * validate that we can schedule this event 1496 * validate that we can schedule this event
1510 */ 1497 */
@@ -1579,12 +1566,22 @@ out:
1579 return ret; 1566 return ret;
1580} 1567}
1581 1568
1582const struct pmu *hw_perf_event_init(struct perf_event *event) 1569int x86_pmu_event_init(struct perf_event *event)
1583{ 1570{
1584 const struct pmu *tmp; 1571 struct pmu *tmp;
1585 int err; 1572 int err;
1586 1573
1587 err = __hw_perf_event_init(event); 1574 switch (event->attr.type) {
1575 case PERF_TYPE_RAW:
1576 case PERF_TYPE_HARDWARE:
1577 case PERF_TYPE_HW_CACHE:
1578 break;
1579
1580 default:
1581 return -ENOENT;
1582 }
1583
1584 err = __x86_pmu_event_init(event);
1588 if (!err) { 1585 if (!err) {
1589 /* 1586 /*
1590 * we temporarily connect event to its pmu 1587 * we temporarily connect event to its pmu
@@ -1604,26 +1601,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1604 if (err) { 1601 if (err) {
1605 if (event->destroy) 1602 if (event->destroy)
1606 event->destroy(event); 1603 event->destroy(event);
1607 return ERR_PTR(err);
1608 } 1604 }
1609 1605
1610 return &pmu; 1606 return err;
1611} 1607}
1612 1608
1613/* 1609static struct pmu pmu = {
1614 * callchain support 1610 .pmu_enable = x86_pmu_enable,
1615 */ 1611 .pmu_disable = x86_pmu_disable,
1616 1612
1617static inline 1613 .event_init = x86_pmu_event_init,
1618void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1619{
1620 if (entry->nr < PERF_MAX_STACK_DEPTH)
1621 entry->ip[entry->nr++] = ip;
1622}
1623 1614
1624static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); 1615 .add = x86_pmu_add,
1625static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); 1616 .del = x86_pmu_del,
1617 .start = x86_pmu_start,
1618 .stop = x86_pmu_stop,
1619 .read = x86_pmu_read,
1626 1620
1621 .start_txn = x86_pmu_start_txn,
1622 .cancel_txn = x86_pmu_cancel_txn,
1623 .commit_txn = x86_pmu_commit_txn,
1624};
1625
1626/*
1627 * callchain support
1628 */
1627 1629
1628static void 1630static void
1629backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) 1631backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
@@ -1645,7 +1647,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1645{ 1647{
1646 struct perf_callchain_entry *entry = data; 1648 struct perf_callchain_entry *entry = data;
1647 1649
1648 callchain_store(entry, addr); 1650 perf_callchain_store(entry, addr);
1649} 1651}
1650 1652
1651static const struct stacktrace_ops backtrace_ops = { 1653static const struct stacktrace_ops backtrace_ops = {
@@ -1656,11 +1658,15 @@ static const struct stacktrace_ops backtrace_ops = {
1656 .walk_stack = print_context_stack_bp, 1658 .walk_stack = print_context_stack_bp,
1657}; 1659};
1658 1660
1659static void 1661void
1660perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1662perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1661{ 1663{
1662 callchain_store(entry, PERF_CONTEXT_KERNEL); 1664 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1663 callchain_store(entry, regs->ip); 1665 /* TODO: We don't support guest os callchain now */
1666 return;
1667 }
1668
1669 perf_callchain_store(entry, regs->ip);
1664 1670
1665 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1671 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
1666} 1672}
@@ -1689,7 +1695,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1689 if (fp < compat_ptr(regs->sp)) 1695 if (fp < compat_ptr(regs->sp))
1690 break; 1696 break;
1691 1697
1692 callchain_store(entry, frame.return_address); 1698 perf_callchain_store(entry, frame.return_address);
1693 fp = compat_ptr(frame.next_frame); 1699 fp = compat_ptr(frame.next_frame);
1694 } 1700 }
1695 return 1; 1701 return 1;
@@ -1702,19 +1708,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1702} 1708}
1703#endif 1709#endif
1704 1710
1705static void 1711void
1706perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) 1712perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1707{ 1713{
1708 struct stack_frame frame; 1714 struct stack_frame frame;
1709 const void __user *fp; 1715 const void __user *fp;
1710 1716
1711 if (!user_mode(regs)) 1717 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1712 regs = task_pt_regs(current); 1718 /* TODO: We don't support guest os callchain now */
1719 return;
1720 }
1713 1721
1714 fp = (void __user *)regs->bp; 1722 fp = (void __user *)regs->bp;
1715 1723
1716 callchain_store(entry, PERF_CONTEXT_USER); 1724 perf_callchain_store(entry, regs->ip);
1717 callchain_store(entry, regs->ip);
1718 1725
1719 if (perf_callchain_user32(regs, entry)) 1726 if (perf_callchain_user32(regs, entry))
1720 return; 1727 return;
@@ -1731,52 +1738,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1731 if ((unsigned long)fp < regs->sp) 1738 if ((unsigned long)fp < regs->sp)
1732 break; 1739 break;
1733 1740
1734 callchain_store(entry, frame.return_address); 1741 perf_callchain_store(entry, frame.return_address);
1735 fp = frame.next_frame; 1742 fp = frame.next_frame;
1736 } 1743 }
1737} 1744}
1738 1745
1739static void
1740perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1741{
1742 int is_user;
1743
1744 if (!regs)
1745 return;
1746
1747 is_user = user_mode(regs);
1748
1749 if (is_user && current->state != TASK_RUNNING)
1750 return;
1751
1752 if (!is_user)
1753 perf_callchain_kernel(regs, entry);
1754
1755 if (current->mm)
1756 perf_callchain_user(regs, entry);
1757}
1758
1759struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1760{
1761 struct perf_callchain_entry *entry;
1762
1763 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1764 /* TODO: We don't support guest os callchain now */
1765 return NULL;
1766 }
1767
1768 if (in_nmi())
1769 entry = &__get_cpu_var(pmc_nmi_entry);
1770 else
1771 entry = &__get_cpu_var(pmc_irq_entry);
1772
1773 entry->nr = 0;
1774
1775 perf_do_callchain(regs, entry);
1776
1777 return entry;
1778}
1779
1780unsigned long perf_instruction_pointer(struct pt_regs *regs) 1746unsigned long perf_instruction_pointer(struct pt_regs *regs)
1781{ 1747{
1782 unsigned long ip; 1748 unsigned long ip;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index c2897b7b4a3b..46d58448c3af 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids
52 [ C(DTLB) ] = { 52 [ C(DTLB) ] = {
53 [ C(OP_READ) ] = { 53 [ C(OP_READ) ] = {
54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
55 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ 55 [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
56 }, 56 },
57 [ C(OP_WRITE) ] = { 57 [ C(OP_WRITE) ] = {
58 [ C(RESULT_ACCESS) ] = 0, 58 [ C(RESULT_ACCESS) ] = 0,
@@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids
66 [ C(ITLB) ] = { 66 [ C(ITLB) ] = {
67 [ C(OP_READ) ] = { 67 [ C(OP_READ) ] = {
68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ 68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
69 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ 69 [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
70 }, 70 },
71 [ C(OP_WRITE) ] = { 71 [ C(OP_WRITE) ] = {
72 [ C(RESULT_ACCESS) ] = -1, 72 [ C(RESULT_ACCESS) ] = -1,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index ee05c90012d2..c8f5c088cad1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
713 struct cpu_hw_events *cpuc; 713 struct cpu_hw_events *cpuc;
714 int bit, loops; 714 int bit, loops;
715 u64 status; 715 u64 status;
716 int handled = 0; 716 int handled;
717 717
718 perf_sample_data_init(&data, 0); 718 perf_sample_data_init(&data, 0);
719 719
720 cpuc = &__get_cpu_var(cpu_hw_events); 720 cpuc = &__get_cpu_var(cpu_hw_events);
721 721
722 intel_pmu_disable_all(); 722 intel_pmu_disable_all();
723 intel_pmu_drain_bts_buffer(); 723 handled = intel_pmu_drain_bts_buffer();
724 status = intel_pmu_get_status(); 724 status = intel_pmu_get_status();
725 if (!status) { 725 if (!status) {
726 intel_pmu_enable_all(0); 726 intel_pmu_enable_all(0);
727 return 0; 727 return handled;
728 } 728 }
729 729
730 loops = 0; 730 loops = 0;
@@ -763,7 +763,7 @@ again:
763 data.period = event->hw.last_period; 763 data.period = event->hw.last_period;
764 764
765 if (perf_event_overflow(event, 1, &data, regs)) 765 if (perf_event_overflow(event, 1, &data, regs))
766 x86_pmu_stop(event); 766 x86_pmu_stop(event, 0);
767 } 767 }
768 768
769 /* 769 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 18018d1311cd..4977f9c400e5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void)
214 update_debugctlmsr(debugctlmsr); 214 update_debugctlmsr(debugctlmsr);
215} 215}
216 216
217static void intel_pmu_drain_bts_buffer(void) 217static int intel_pmu_drain_bts_buffer(void)
218{ 218{
219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
220 struct debug_store *ds = cpuc->ds; 220 struct debug_store *ds = cpuc->ds;
@@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void)
231 struct pt_regs regs; 231 struct pt_regs regs;
232 232
233 if (!event) 233 if (!event)
234 return; 234 return 0;
235 235
236 if (!ds) 236 if (!ds)
237 return; 237 return 0;
238 238
239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
240 top = (struct bts_record *)(unsigned long)ds->bts_index; 240 top = (struct bts_record *)(unsigned long)ds->bts_index;
241 241
242 if (top <= at) 242 if (top <= at)
243 return; 243 return 0;
244 244
245 ds->bts_index = ds->bts_buffer_base; 245 ds->bts_index = ds->bts_buffer_base;
246 246
@@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void)
256 perf_prepare_sample(&header, &data, event, &regs); 256 perf_prepare_sample(&header, &data, event, &regs);
257 257
258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) 258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
259 return; 259 return 1;
260 260
261 for (; at < top; at++) { 261 for (; at < top; at++) {
262 data.ip = at->from; 262 data.ip = at->from;
@@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void)
270 /* There's new data available. */ 270 /* There's new data available. */
271 event->hw.interrupts++; 271 event->hw.interrupts++;
272 event->pending_kill = POLL_IN; 272 event->pending_kill = POLL_IN;
273 return 1;
273} 274}
274 275
275/* 276/*
@@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
491 regs.flags &= ~PERF_EFLAGS_EXACT; 492 regs.flags &= ~PERF_EFLAGS_EXACT;
492 493
493 if (perf_event_overflow(event, 1, &data, &regs)) 494 if (perf_event_overflow(event, 1, &data, &regs))
494 x86_pmu_stop(event); 495 x86_pmu_stop(event, 0);
495} 496}
496 497
497static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) 498static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 249015173992..81400b93e694 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -18,6 +18,8 @@
18struct p4_event_bind { 18struct p4_event_bind {
19 unsigned int opcode; /* Event code and ESCR selector */ 19 unsigned int opcode; /* Event code and ESCR selector */
20 unsigned int escr_msr[2]; /* ESCR MSR for this event */ 20 unsigned int escr_msr[2]; /* ESCR MSR for this event */
21 unsigned int escr_emask; /* valid ESCR EventMask bits */
22 unsigned int shared; /* event is shared across threads */
21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ 23 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
22}; 24};
23 25
@@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = {
66 [P4_EVENT_TC_DELIVER_MODE] = { 68 [P4_EVENT_TC_DELIVER_MODE] = {
67 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), 69 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
68 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, 70 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
71 .escr_emask =
72 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) |
73 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) |
74 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) |
75 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) |
76 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) |
77 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) |
78 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
79 .shared = 1,
69 .cntr = { {4, 5, -1}, {6, 7, -1} }, 80 .cntr = { {4, 5, -1}, {6, 7, -1} },
70 }, 81 },
71 [P4_EVENT_BPU_FETCH_REQUEST] = { 82 [P4_EVENT_BPU_FETCH_REQUEST] = {
72 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), 83 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
73 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, 84 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
85 .escr_emask =
86 P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
74 .cntr = { {0, -1, -1}, {2, -1, -1} }, 87 .cntr = { {0, -1, -1}, {2, -1, -1} },
75 }, 88 },
76 [P4_EVENT_ITLB_REFERENCE] = { 89 [P4_EVENT_ITLB_REFERENCE] = {
77 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), 90 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
78 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, 91 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
92 .escr_emask =
93 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) |
94 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) |
95 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
79 .cntr = { {0, -1, -1}, {2, -1, -1} }, 96 .cntr = { {0, -1, -1}, {2, -1, -1} },
80 }, 97 },
81 [P4_EVENT_MEMORY_CANCEL] = { 98 [P4_EVENT_MEMORY_CANCEL] = {
82 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), 99 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
83 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, 100 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
101 .escr_emask =
102 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) |
103 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
84 .cntr = { {8, 9, -1}, {10, 11, -1} }, 104 .cntr = { {8, 9, -1}, {10, 11, -1} },
85 }, 105 },
86 [P4_EVENT_MEMORY_COMPLETE] = { 106 [P4_EVENT_MEMORY_COMPLETE] = {
87 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), 107 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
88 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, 108 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
109 .escr_emask =
110 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) |
111 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
89 .cntr = { {8, 9, -1}, {10, 11, -1} }, 112 .cntr = { {8, 9, -1}, {10, 11, -1} },
90 }, 113 },
91 [P4_EVENT_LOAD_PORT_REPLAY] = { 114 [P4_EVENT_LOAD_PORT_REPLAY] = {
92 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), 115 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
93 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, 116 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
117 .escr_emask =
118 P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
94 .cntr = { {8, 9, -1}, {10, 11, -1} }, 119 .cntr = { {8, 9, -1}, {10, 11, -1} },
95 }, 120 },
96 [P4_EVENT_STORE_PORT_REPLAY] = { 121 [P4_EVENT_STORE_PORT_REPLAY] = {
97 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), 122 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
98 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, 123 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
124 .escr_emask =
125 P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
99 .cntr = { {8, 9, -1}, {10, 11, -1} }, 126 .cntr = { {8, 9, -1}, {10, 11, -1} },
100 }, 127 },
101 [P4_EVENT_MOB_LOAD_REPLAY] = { 128 [P4_EVENT_MOB_LOAD_REPLAY] = {
102 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), 129 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
103 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, 130 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
131 .escr_emask =
132 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) |
133 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) |
134 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) |
135 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
104 .cntr = { {0, -1, -1}, {2, -1, -1} }, 136 .cntr = { {0, -1, -1}, {2, -1, -1} },
105 }, 137 },
106 [P4_EVENT_PAGE_WALK_TYPE] = { 138 [P4_EVENT_PAGE_WALK_TYPE] = {
107 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), 139 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
108 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, 140 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
141 .escr_emask =
142 P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) |
143 P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
144 .shared = 1,
109 .cntr = { {0, -1, -1}, {2, -1, -1} }, 145 .cntr = { {0, -1, -1}, {2, -1, -1} },
110 }, 146 },
111 [P4_EVENT_BSQ_CACHE_REFERENCE] = { 147 [P4_EVENT_BSQ_CACHE_REFERENCE] = {
112 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), 148 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
113 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, 149 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
150 .escr_emask =
151 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
152 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
153 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
154 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
155 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
156 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) |
157 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
158 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
159 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
114 .cntr = { {0, -1, -1}, {2, -1, -1} }, 160 .cntr = { {0, -1, -1}, {2, -1, -1} },
115 }, 161 },
116 [P4_EVENT_IOQ_ALLOCATION] = { 162 [P4_EVENT_IOQ_ALLOCATION] = {
117 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), 163 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
118 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 164 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
165 .escr_emask =
166 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) |
167 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) |
168 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) |
169 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) |
170 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) |
171 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) |
172 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) |
173 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) |
174 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) |
175 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) |
176 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
119 .cntr = { {0, -1, -1}, {2, -1, -1} }, 177 .cntr = { {0, -1, -1}, {2, -1, -1} },
120 }, 178 },
121 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ 179 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
122 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), 180 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
123 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, 181 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
182 .escr_emask =
183 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) |
184 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) |
185 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) |
186 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) |
187 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) |
188 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) |
189 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) |
190 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) |
191 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) |
192 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) |
193 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
124 .cntr = { {2, -1, -1}, {3, -1, -1} }, 194 .cntr = { {2, -1, -1}, {3, -1, -1} },
125 }, 195 },
126 [P4_EVENT_FSB_DATA_ACTIVITY] = { 196 [P4_EVENT_FSB_DATA_ACTIVITY] = {
127 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), 197 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
128 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 198 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
199 .escr_emask =
200 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
201 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) |
202 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) |
203 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) |
204 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) |
205 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
206 .shared = 1,
129 .cntr = { {0, -1, -1}, {2, -1, -1} }, 207 .cntr = { {0, -1, -1}, {2, -1, -1} },
130 }, 208 },
131 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ 209 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
132 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), 210 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
133 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, 211 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
212 .escr_emask =
213 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) |
214 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) |
215 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) |
216 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) |
217 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) |
218 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) |
219 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) |
220 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) |
221 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) |
222 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) |
223 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) |
224 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) |
225 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
134 .cntr = { {0, -1, -1}, {1, -1, -1} }, 226 .cntr = { {0, -1, -1}, {1, -1, -1} },
135 }, 227 },
136 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ 228 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
137 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), 229 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
138 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, 230 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
231 .escr_emask =
232 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) |
233 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) |
234 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) |
235 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) |
236 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) |
237 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) |
238 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) |
239 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) |
240 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) |
241 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) |
242 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) |
243 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) |
244 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
139 .cntr = { {2, -1, -1}, {3, -1, -1} }, 245 .cntr = { {2, -1, -1}, {3, -1, -1} },
140 }, 246 },
141 [P4_EVENT_SSE_INPUT_ASSIST] = { 247 [P4_EVENT_SSE_INPUT_ASSIST] = {
142 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), 248 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
143 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 249 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
250 .escr_emask =
251 P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
252 .shared = 1,
144 .cntr = { {8, 9, -1}, {10, 11, -1} }, 253 .cntr = { {8, 9, -1}, {10, 11, -1} },
145 }, 254 },
146 [P4_EVENT_PACKED_SP_UOP] = { 255 [P4_EVENT_PACKED_SP_UOP] = {
147 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), 256 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
148 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 257 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
258 .escr_emask =
259 P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
260 .shared = 1,
149 .cntr = { {8, 9, -1}, {10, 11, -1} }, 261 .cntr = { {8, 9, -1}, {10, 11, -1} },
150 }, 262 },
151 [P4_EVENT_PACKED_DP_UOP] = { 263 [P4_EVENT_PACKED_DP_UOP] = {
152 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), 264 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
153 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 265 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
266 .escr_emask =
267 P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
268 .shared = 1,
154 .cntr = { {8, 9, -1}, {10, 11, -1} }, 269 .cntr = { {8, 9, -1}, {10, 11, -1} },
155 }, 270 },
156 [P4_EVENT_SCALAR_SP_UOP] = { 271 [P4_EVENT_SCALAR_SP_UOP] = {
157 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), 272 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
158 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 273 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
274 .escr_emask =
275 P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
276 .shared = 1,
159 .cntr = { {8, 9, -1}, {10, 11, -1} }, 277 .cntr = { {8, 9, -1}, {10, 11, -1} },
160 }, 278 },
161 [P4_EVENT_SCALAR_DP_UOP] = { 279 [P4_EVENT_SCALAR_DP_UOP] = {
162 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), 280 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
163 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 281 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
282 .escr_emask =
283 P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
284 .shared = 1,
164 .cntr = { {8, 9, -1}, {10, 11, -1} }, 285 .cntr = { {8, 9, -1}, {10, 11, -1} },
165 }, 286 },
166 [P4_EVENT_64BIT_MMX_UOP] = { 287 [P4_EVENT_64BIT_MMX_UOP] = {
167 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), 288 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
168 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 289 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
290 .escr_emask =
291 P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
292 .shared = 1,
169 .cntr = { {8, 9, -1}, {10, 11, -1} }, 293 .cntr = { {8, 9, -1}, {10, 11, -1} },
170 }, 294 },
171 [P4_EVENT_128BIT_MMX_UOP] = { 295 [P4_EVENT_128BIT_MMX_UOP] = {
172 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), 296 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
173 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 297 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
298 .escr_emask =
299 P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
300 .shared = 1,
174 .cntr = { {8, 9, -1}, {10, 11, -1} }, 301 .cntr = { {8, 9, -1}, {10, 11, -1} },
175 }, 302 },
176 [P4_EVENT_X87_FP_UOP] = { 303 [P4_EVENT_X87_FP_UOP] = {
177 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), 304 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
178 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 305 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
306 .escr_emask =
307 P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
308 .shared = 1,
179 .cntr = { {8, 9, -1}, {10, 11, -1} }, 309 .cntr = { {8, 9, -1}, {10, 11, -1} },
180 }, 310 },
181 [P4_EVENT_TC_MISC] = { 311 [P4_EVENT_TC_MISC] = {
182 .opcode = P4_OPCODE(P4_EVENT_TC_MISC), 312 .opcode = P4_OPCODE(P4_EVENT_TC_MISC),
183 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, 313 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
314 .escr_emask =
315 P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
184 .cntr = { {4, 5, -1}, {6, 7, -1} }, 316 .cntr = { {4, 5, -1}, {6, 7, -1} },
185 }, 317 },
186 [P4_EVENT_GLOBAL_POWER_EVENTS] = { 318 [P4_EVENT_GLOBAL_POWER_EVENTS] = {
187 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), 319 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
188 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 320 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
321 .escr_emask =
322 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
189 .cntr = { {0, -1, -1}, {2, -1, -1} }, 323 .cntr = { {0, -1, -1}, {2, -1, -1} },
190 }, 324 },
191 [P4_EVENT_TC_MS_XFER] = { 325 [P4_EVENT_TC_MS_XFER] = {
192 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), 326 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
193 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, 327 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
328 .escr_emask =
329 P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
194 .cntr = { {4, 5, -1}, {6, 7, -1} }, 330 .cntr = { {4, 5, -1}, {6, 7, -1} },
195 }, 331 },
196 [P4_EVENT_UOP_QUEUE_WRITES] = { 332 [P4_EVENT_UOP_QUEUE_WRITES] = {
197 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), 333 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
198 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, 334 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
335 .escr_emask =
336 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) |
337 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) |
338 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
199 .cntr = { {4, 5, -1}, {6, 7, -1} }, 339 .cntr = { {4, 5, -1}, {6, 7, -1} },
200 }, 340 },
201 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { 341 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
202 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), 342 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
203 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, 343 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
344 .escr_emask =
345 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) |
346 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) |
347 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) |
348 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
204 .cntr = { {4, 5, -1}, {6, 7, -1} }, 349 .cntr = { {4, 5, -1}, {6, 7, -1} },
205 }, 350 },
206 [P4_EVENT_RETIRED_BRANCH_TYPE] = { 351 [P4_EVENT_RETIRED_BRANCH_TYPE] = {
207 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), 352 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
208 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, 353 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
354 .escr_emask =
355 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
356 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
357 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
358 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
209 .cntr = { {4, 5, -1}, {6, 7, -1} }, 359 .cntr = { {4, 5, -1}, {6, 7, -1} },
210 }, 360 },
211 [P4_EVENT_RESOURCE_STALL] = { 361 [P4_EVENT_RESOURCE_STALL] = {
212 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), 362 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
213 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, 363 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
364 .escr_emask =
365 P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
214 .cntr = { {12, 13, 16}, {14, 15, 17} }, 366 .cntr = { {12, 13, 16}, {14, 15, 17} },
215 }, 367 },
216 [P4_EVENT_WC_BUFFER] = { 368 [P4_EVENT_WC_BUFFER] = {
217 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), 369 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
218 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, 370 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
371 .escr_emask =
372 P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) |
373 P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
374 .shared = 1,
219 .cntr = { {8, 9, -1}, {10, 11, -1} }, 375 .cntr = { {8, 9, -1}, {10, 11, -1} },
220 }, 376 },
221 [P4_EVENT_B2B_CYCLES] = { 377 [P4_EVENT_B2B_CYCLES] = {
222 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), 378 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
223 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 379 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
380 .escr_emask = 0,
224 .cntr = { {0, -1, -1}, {2, -1, -1} }, 381 .cntr = { {0, -1, -1}, {2, -1, -1} },
225 }, 382 },
226 [P4_EVENT_BNR] = { 383 [P4_EVENT_BNR] = {
227 .opcode = P4_OPCODE(P4_EVENT_BNR), 384 .opcode = P4_OPCODE(P4_EVENT_BNR),
228 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 385 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
386 .escr_emask = 0,
229 .cntr = { {0, -1, -1}, {2, -1, -1} }, 387 .cntr = { {0, -1, -1}, {2, -1, -1} },
230 }, 388 },
231 [P4_EVENT_SNOOP] = { 389 [P4_EVENT_SNOOP] = {
232 .opcode = P4_OPCODE(P4_EVENT_SNOOP), 390 .opcode = P4_OPCODE(P4_EVENT_SNOOP),
233 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 391 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
392 .escr_emask = 0,
234 .cntr = { {0, -1, -1}, {2, -1, -1} }, 393 .cntr = { {0, -1, -1}, {2, -1, -1} },
235 }, 394 },
236 [P4_EVENT_RESPONSE] = { 395 [P4_EVENT_RESPONSE] = {
237 .opcode = P4_OPCODE(P4_EVENT_RESPONSE), 396 .opcode = P4_OPCODE(P4_EVENT_RESPONSE),
238 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 397 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
398 .escr_emask = 0,
239 .cntr = { {0, -1, -1}, {2, -1, -1} }, 399 .cntr = { {0, -1, -1}, {2, -1, -1} },
240 }, 400 },
241 [P4_EVENT_FRONT_END_EVENT] = { 401 [P4_EVENT_FRONT_END_EVENT] = {
242 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), 402 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
243 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 403 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
404 .escr_emask =
405 P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) |
406 P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
244 .cntr = { {12, 13, 16}, {14, 15, 17} }, 407 .cntr = { {12, 13, 16}, {14, 15, 17} },
245 }, 408 },
246 [P4_EVENT_EXECUTION_EVENT] = { 409 [P4_EVENT_EXECUTION_EVENT] = {
247 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), 410 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
248 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 411 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
412 .escr_emask =
413 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) |
414 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) |
415 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) |
416 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) |
417 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
418 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
419 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
420 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
249 .cntr = { {12, 13, 16}, {14, 15, 17} }, 421 .cntr = { {12, 13, 16}, {14, 15, 17} },
250 }, 422 },
251 [P4_EVENT_REPLAY_EVENT] = { 423 [P4_EVENT_REPLAY_EVENT] = {
252 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), 424 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
253 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 425 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
426 .escr_emask =
427 P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) |
428 P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
254 .cntr = { {12, 13, 16}, {14, 15, 17} }, 429 .cntr = { {12, 13, 16}, {14, 15, 17} },
255 }, 430 },
256 [P4_EVENT_INSTR_RETIRED] = { 431 [P4_EVENT_INSTR_RETIRED] = {
257 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), 432 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
258 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 433 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
434 .escr_emask =
435 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
436 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) |
437 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) |
438 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
259 .cntr = { {12, 13, 16}, {14, 15, 17} }, 439 .cntr = { {12, 13, 16}, {14, 15, 17} },
260 }, 440 },
261 [P4_EVENT_UOPS_RETIRED] = { 441 [P4_EVENT_UOPS_RETIRED] = {
262 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), 442 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
263 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 443 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
444 .escr_emask =
445 P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) |
446 P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
264 .cntr = { {12, 13, 16}, {14, 15, 17} }, 447 .cntr = { {12, 13, 16}, {14, 15, 17} },
265 }, 448 },
266 [P4_EVENT_UOP_TYPE] = { 449 [P4_EVENT_UOP_TYPE] = {
267 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), 450 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
268 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, 451 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
452 .escr_emask =
453 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) |
454 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
269 .cntr = { {12, 13, 16}, {14, 15, 17} }, 455 .cntr = { {12, 13, 16}, {14, 15, 17} },
270 }, 456 },
271 [P4_EVENT_BRANCH_RETIRED] = { 457 [P4_EVENT_BRANCH_RETIRED] = {
272 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), 458 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
273 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 459 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
460 .escr_emask =
461 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) |
462 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) |
463 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) |
464 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
274 .cntr = { {12, 13, 16}, {14, 15, 17} }, 465 .cntr = { {12, 13, 16}, {14, 15, 17} },
275 }, 466 },
276 [P4_EVENT_MISPRED_BRANCH_RETIRED] = { 467 [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
277 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), 468 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
278 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 469 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
470 .escr_emask =
471 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
279 .cntr = { {12, 13, 16}, {14, 15, 17} }, 472 .cntr = { {12, 13, 16}, {14, 15, 17} },
280 }, 473 },
281 [P4_EVENT_X87_ASSIST] = { 474 [P4_EVENT_X87_ASSIST] = {
282 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), 475 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
283 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 476 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
477 .escr_emask =
478 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) |
479 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) |
480 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) |
481 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) |
482 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
284 .cntr = { {12, 13, 16}, {14, 15, 17} }, 483 .cntr = { {12, 13, 16}, {14, 15, 17} },
285 }, 484 },
286 [P4_EVENT_MACHINE_CLEAR] = { 485 [P4_EVENT_MACHINE_CLEAR] = {
287 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), 486 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
288 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 487 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
488 .escr_emask =
489 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) |
490 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) |
491 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
289 .cntr = { {12, 13, 16}, {14, 15, 17} }, 492 .cntr = { {12, 13, 16}, {14, 15, 17} },
290 }, 493 },
291 [P4_EVENT_INSTR_COMPLETED] = { 494 [P4_EVENT_INSTR_COMPLETED] = {
292 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), 495 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
293 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 496 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
497 .escr_emask =
498 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) |
499 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
294 .cntr = { {12, 13, 16}, {14, 15, 17} }, 500 .cntr = { {12, 13, 16}, {14, 15, 17} },
295 }, 501 },
296}; 502};
@@ -428,29 +634,73 @@ static u64 p4_pmu_event_map(int hw_event)
428 return config; 634 return config;
429} 635}
430 636
637/* check cpu model specifics */
638static bool p4_event_match_cpu_model(unsigned int event_idx)
639{
640 /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
641 if (event_idx == P4_EVENT_INSTR_COMPLETED) {
642 if (boot_cpu_data.x86_model != 3 &&
643 boot_cpu_data.x86_model != 4 &&
644 boot_cpu_data.x86_model != 6)
645 return false;
646 }
647
648 /*
649 * For info
650 * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
651 */
652
653 return true;
654}
655
431static int p4_validate_raw_event(struct perf_event *event) 656static int p4_validate_raw_event(struct perf_event *event)
432{ 657{
433 unsigned int v; 658 unsigned int v, emask;
434 659
435 /* user data may have out-of-bound event index */ 660 /* User data may have out-of-bound event index */
436 v = p4_config_unpack_event(event->attr.config); 661 v = p4_config_unpack_event(event->attr.config);
437 if (v >= ARRAY_SIZE(p4_event_bind_map)) { 662 if (v >= ARRAY_SIZE(p4_event_bind_map))
438 pr_warning("P4 PMU: Unknown event code: %d\n", v); 663 return -EINVAL;
664
665 /* It may be unsupported: */
666 if (!p4_event_match_cpu_model(v))
439 return -EINVAL; 667 return -EINVAL;
668
669 /*
670 * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
671 * in Architectural Performance Monitoring, it means not
672 * on _which_ logical cpu to count but rather _when_, ie it
673 * depends on logical cpu state -- count event if one cpu active,
674 * none, both or any, so we just allow user to pass any value
675 * desired.
676 *
677 * In turn we always set Tx_OS/Tx_USR bits bound to logical
678 * cpu without their propagation to another cpu
679 */
680
681 /*
682 * if an event is shared accross the logical threads
683 * the user needs special permissions to be able to use it
684 */
685 if (p4_event_bind_map[v].shared) {
686 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
687 return -EACCES;
440 } 688 }
441 689
690 /* ESCR EventMask bits may be invalid */
691 emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
692 if (emask & ~p4_event_bind_map[v].escr_emask)
693 return -EINVAL;
694
442 /* 695 /*
443 * it may have some screwed PEBS bits 696 * it may have some invalid PEBS bits
444 */ 697 */
445 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { 698 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
446 pr_warning("P4 PMU: PEBS are not supported yet\n");
447 return -EINVAL; 699 return -EINVAL;
448 } 700
449 v = p4_config_unpack_metric(event->attr.config); 701 v = p4_config_unpack_metric(event->attr.config);
450 if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { 702 if (v >= ARRAY_SIZE(p4_pebs_bind_map))
451 pr_warning("P4 PMU: Unknown metric code: %d\n", v);
452 return -EINVAL; 703 return -EINVAL;
453 }
454 704
455 return 0; 705 return 0;
456} 706}
@@ -478,27 +728,21 @@ static int p4_hw_config(struct perf_event *event)
478 728
479 if (event->attr.type == PERF_TYPE_RAW) { 729 if (event->attr.type == PERF_TYPE_RAW) {
480 730
731 /*
732 * Clear bits we reserve to be managed by kernel itself
733 * and never allowed from a user space
734 */
735 event->attr.config &= P4_CONFIG_MASK;
736
481 rc = p4_validate_raw_event(event); 737 rc = p4_validate_raw_event(event);
482 if (rc) 738 if (rc)
483 goto out; 739 goto out;
484 740
485 /* 741 /*
486 * We don't control raw events so it's up to the caller
487 * to pass sane values (and we don't count the thread number
488 * on HT machine but allow HT-compatible specifics to be
489 * passed on)
490 *
491 * Note that for RAW events we allow user to use P4_CCCR_RESERVED 742 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
492 * bits since we keep additional info here (for cache events and etc) 743 * bits since we keep additional info here (for cache events and etc)
493 *
494 * XXX: HT wide things should check perf_paranoid_cpu() &&
495 * CAP_SYS_ADMIN
496 */ 744 */
497 event->hw.config |= event->attr.config & 745 event->hw.config |= event->attr.config;
498 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
499 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
500
501 event->hw.config &= ~P4_CCCR_FORCE_OVF;
502 } 746 }
503 747
504 rc = x86_setup_perfctr(event); 748 rc = x86_setup_perfctr(event);
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index fb329e9f8494..d9f4ff8fcd69 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -700,11 +700,10 @@ static void probe_nmi_watchdog(void)
700{ 700{
701 switch (boot_cpu_data.x86_vendor) { 701 switch (boot_cpu_data.x86_vendor) {
702 case X86_VENDOR_AMD: 702 case X86_VENDOR_AMD:
703 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && 703 if (boot_cpu_data.x86 == 6 ||
704 boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17) 704 (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
705 return; 705 wd_ops = &k7_wd_ops;
706 wd_ops = &k7_wd_ops; 706 return;
707 break;
708 case X86_VENDOR_INTEL: 707 case X86_VENDOR_INTEL:
709 /* Work around where perfctr1 doesn't have a working enable 708 /* Work around where perfctr1 doesn't have a working enable
710 * bit as described in the following errata: 709 * bit as described in the following errata:
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index d49079515122..c7f64e6f537a 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -44,6 +44,12 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
44 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, 44 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
45 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, 45 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 },
46 { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 }, 46 { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 },
47 { X86_FEATURE_TSCRATEMSR, CR_EDX, 4, 0x8000000a, 0 },
48 { X86_FEATURE_VMCBCLEAN, CR_EDX, 5, 0x8000000a, 0 },
49 { X86_FEATURE_FLUSHBYASID, CR_EDX, 6, 0x8000000a, 0 },
50 { X86_FEATURE_DECODEASSISTS, CR_EDX, 7, 0x8000000a, 0 },
51 { X86_FEATURE_PAUSEFILTER, CR_EDX,10, 0x8000000a, 0 },
52 { X86_FEATURE_PFTHRESHOLD, CR_EDX,12, 0x8000000a, 0 },
47 { 0, 0, 0, 0, 0 } 53 { 0, 0, 0, 0, 0 }
48 }; 54 };
49 55
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index 045b36cada65..994828899e09 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -34,7 +34,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
34 if (!csize) 34 if (!csize)
35 return 0; 35 return 0;
36 36
37 vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); 37 vaddr = ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE);
38 if (!vaddr) 38 if (!vaddr)
39 return -ENOMEM; 39 return -ENOMEM;
40 40
@@ -46,6 +46,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
46 } else 46 } else
47 memcpy(buf, vaddr + offset, csize); 47 memcpy(buf, vaddr + offset, csize);
48 48
49 set_iounmap_nonlazy();
49 iounmap(vaddr); 50 iounmap(vaddr);
50 return csize; 51 return csize;
51} 52}
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 0d6fc71bedb1..0c2b7ef7a34d 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -15,6 +15,7 @@
15#include <linux/pfn.h> 15#include <linux/pfn.h>
16#include <linux/suspend.h> 16#include <linux/suspend.h>
17#include <linux/firmware-map.h> 17#include <linux/firmware-map.h>
18#include <linux/memblock.h>
18 19
19#include <asm/e820.h> 20#include <asm/e820.h>
20#include <asm/proto.h> 21#include <asm/proto.h>
@@ -738,73 +739,7 @@ core_initcall(e820_mark_nvs_memory);
738#endif 739#endif
739 740
740/* 741/*
741 * Find a free area with specified alignment in a specific range. 742 * pre allocated 4k and reserved it in memblock and e820_saved
742 */
743u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
744{
745 int i;
746
747 for (i = 0; i < e820.nr_map; i++) {
748 struct e820entry *ei = &e820.map[i];
749 u64 addr;
750 u64 ei_start, ei_last;
751
752 if (ei->type != E820_RAM)
753 continue;
754
755 ei_last = ei->addr + ei->size;
756 ei_start = ei->addr;
757 addr = find_early_area(ei_start, ei_last, start, end,
758 size, align);
759
760 if (addr != -1ULL)
761 return addr;
762 }
763 return -1ULL;
764}
765
766u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align)
767{
768 return find_e820_area(start, end, size, align);
769}
770
771u64 __init get_max_mapped(void)
772{
773 u64 end = max_pfn_mapped;
774
775 end <<= PAGE_SHIFT;
776
777 return end;
778}
779/*
780 * Find next free range after *start
781 */
782u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
783{
784 int i;
785
786 for (i = 0; i < e820.nr_map; i++) {
787 struct e820entry *ei = &e820.map[i];
788 u64 addr;
789 u64 ei_start, ei_last;
790
791 if (ei->type != E820_RAM)
792 continue;
793
794 ei_last = ei->addr + ei->size;
795 ei_start = ei->addr;
796 addr = find_early_area_size(ei_start, ei_last, start,
797 sizep, align);
798
799 if (addr != -1ULL)
800 return addr;
801 }
802
803 return -1ULL;
804}
805
806/*
807 * pre allocated 4k and reserved it in e820
808 */ 743 */
809u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) 744u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
810{ 745{
@@ -813,8 +748,8 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
813 u64 start; 748 u64 start;
814 749
815 for (start = startt; ; start += size) { 750 for (start = startt; ; start += size) {
816 start = find_e820_area_size(start, &size, align); 751 start = memblock_x86_find_in_range_size(start, &size, align);
817 if (!(start + 1)) 752 if (start == MEMBLOCK_ERROR)
818 return 0; 753 return 0;
819 if (size >= sizet) 754 if (size >= sizet)
820 break; 755 break;
@@ -830,10 +765,9 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
830 addr = round_down(start + size - sizet, align); 765 addr = round_down(start + size - sizet, align);
831 if (addr < start) 766 if (addr < start)
832 return 0; 767 return 0;
833 e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); 768 memblock_x86_reserve_range(addr, addr + sizet, "new next");
834 e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); 769 e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
835 printk(KERN_INFO "update e820 for early_reserve_e820\n"); 770 printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
836 update_e820();
837 update_e820_saved(); 771 update_e820_saved();
838 772
839 return addr; 773 return addr;
@@ -895,74 +829,6 @@ unsigned long __init e820_end_of_low_ram_pfn(void)
895{ 829{
896 return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); 830 return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
897} 831}
898/*
899 * Finds an active region in the address range from start_pfn to last_pfn and
900 * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
901 */
902int __init e820_find_active_region(const struct e820entry *ei,
903 unsigned long start_pfn,
904 unsigned long last_pfn,
905 unsigned long *ei_startpfn,
906 unsigned long *ei_endpfn)
907{
908 u64 align = PAGE_SIZE;
909
910 *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
911 *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
912
913 /* Skip map entries smaller than a page */
914 if (*ei_startpfn >= *ei_endpfn)
915 return 0;
916
917 /* Skip if map is outside the node */
918 if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
919 *ei_startpfn >= last_pfn)
920 return 0;
921
922 /* Check for overlaps */
923 if (*ei_startpfn < start_pfn)
924 *ei_startpfn = start_pfn;
925 if (*ei_endpfn > last_pfn)
926 *ei_endpfn = last_pfn;
927
928 return 1;
929}
930
931/* Walk the e820 map and register active regions within a node */
932void __init e820_register_active_regions(int nid, unsigned long start_pfn,
933 unsigned long last_pfn)
934{
935 unsigned long ei_startpfn;
936 unsigned long ei_endpfn;
937 int i;
938
939 for (i = 0; i < e820.nr_map; i++)
940 if (e820_find_active_region(&e820.map[i],
941 start_pfn, last_pfn,
942 &ei_startpfn, &ei_endpfn))
943 add_active_range(nid, ei_startpfn, ei_endpfn);
944}
945
946/*
947 * Find the hole size (in bytes) in the memory range.
948 * @start: starting address of the memory range to scan
949 * @end: ending address of the memory range to scan
950 */
951u64 __init e820_hole_size(u64 start, u64 end)
952{
953 unsigned long start_pfn = start >> PAGE_SHIFT;
954 unsigned long last_pfn = end >> PAGE_SHIFT;
955 unsigned long ei_startpfn, ei_endpfn, ram = 0;
956 int i;
957
958 for (i = 0; i < e820.nr_map; i++) {
959 if (e820_find_active_region(&e820.map[i],
960 start_pfn, last_pfn,
961 &ei_startpfn, &ei_endpfn))
962 ram += ei_endpfn - ei_startpfn;
963 }
964 return end - start - ((u64)ram << PAGE_SHIFT);
965}
966 832
967static void early_panic(char *msg) 833static void early_panic(char *msg)
968{ 834{
@@ -1210,3 +1076,48 @@ void __init setup_memory_map(void)
1210 printk(KERN_INFO "BIOS-provided physical RAM map:\n"); 1076 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1211 e820_print_map(who); 1077 e820_print_map(who);
1212} 1078}
1079
1080void __init memblock_x86_fill(void)
1081{
1082 int i;
1083 u64 end;
1084
1085 /*
1086 * EFI may have more than 128 entries
1087 * We are safe to enable resizing, beause memblock_x86_fill()
1088 * is rather later for x86
1089 */
1090 memblock_can_resize = 1;
1091
1092 for (i = 0; i < e820.nr_map; i++) {
1093 struct e820entry *ei = &e820.map[i];
1094
1095 end = ei->addr + ei->size;
1096 if (end != (resource_size_t)end)
1097 continue;
1098
1099 if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
1100 continue;
1101
1102 memblock_add(ei->addr, ei->size);
1103 }
1104
1105 memblock_analyze();
1106 memblock_dump_all();
1107}
1108
1109void __init memblock_find_dma_reserve(void)
1110{
1111#ifdef CONFIG_X86_64
1112 u64 free_size_pfn;
1113 u64 mem_size_pfn;
1114 /*
1115 * need to find out used area below MAX_DMA_PFN
1116 * need to use memblock to get free size in [0, MAX_DMA_PFN]
1117 * at first, and assume boot_mem will not take below MAX_DMA_PFN
1118 */
1119 mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
1120 free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
1121 set_dma_reserve(mem_size_pfn - free_size_pfn);
1122#endif
1123}
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index ebdb85cf2686..76b8cd953dee 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -97,7 +97,6 @@ static void __init nvidia_bugs(int num, int slot, int func)
97} 97}
98 98
99#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) 99#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
100#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
101static u32 __init ati_ixp4x0_rev(int num, int slot, int func) 100static u32 __init ati_ixp4x0_rev(int num, int slot, int func)
102{ 101{
103 u32 d; 102 u32 d;
@@ -115,7 +114,6 @@ static u32 __init ati_ixp4x0_rev(int num, int slot, int func)
115 d &= 0xff; 114 d &= 0xff;
116 return d; 115 return d;
117} 116}
118#endif
119 117
120static void __init ati_bugs(int num, int slot, int func) 118static void __init ati_bugs(int num, int slot, int func)
121{ 119{
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index fa99bae75ace..4572f25f9325 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -14,6 +14,7 @@
14#include <xen/hvc-console.h> 14#include <xen/hvc-console.h>
15#include <asm/pci-direct.h> 15#include <asm/pci-direct.h>
16#include <asm/fixmap.h> 16#include <asm/fixmap.h>
17#include <asm/mrst.h>
17#include <asm/pgtable.h> 18#include <asm/pgtable.h>
18#include <linux/usb/ehci_def.h> 19#include <linux/usb/ehci_def.h>
19 20
@@ -239,6 +240,18 @@ static int __init setup_early_printk(char *buf)
239 if (!strncmp(buf, "xen", 3)) 240 if (!strncmp(buf, "xen", 3))
240 early_console_register(&xenboot_console, keep); 241 early_console_register(&xenboot_console, keep);
241#endif 242#endif
243#ifdef CONFIG_X86_MRST_EARLY_PRINTK
244 if (!strncmp(buf, "mrst", 4)) {
245 mrst_early_console_init();
246 early_console_register(&early_mrst_console, keep);
247 }
248
249 if (!strncmp(buf, "hsu", 3)) {
250 hsu_early_console_init();
251 early_console_register(&early_hsu_console, keep);
252 }
253
254#endif
242 buf++; 255 buf++;
243 } 256 }
244 return 0; 257 return 0;
diff --git a/arch/x86/kernel/early_printk_mrst.c b/arch/x86/kernel/early_printk_mrst.c
new file mode 100644
index 000000000000..65df603622b2
--- /dev/null
+++ b/arch/x86/kernel/early_printk_mrst.c
@@ -0,0 +1,319 @@
1/*
2 * early_printk_mrst.c - early consoles for Intel MID platforms
3 *
4 * Copyright (c) 2008-2010, Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; version 2
9 * of the License.
10 */
11
12/*
13 * This file implements two early consoles named mrst and hsu.
14 * mrst is based on Maxim3110 spi-uart device, it exists in both
15 * Moorestown and Medfield platforms, while hsu is based on a High
16 * Speed UART device which only exists in the Medfield platform
17 */
18
19#include <linux/serial_reg.h>
20#include <linux/serial_mfd.h>
21#include <linux/kmsg_dump.h>
22#include <linux/console.h>
23#include <linux/kernel.h>
24#include <linux/delay.h>
25#include <linux/init.h>
26#include <linux/io.h>
27
28#include <asm/fixmap.h>
29#include <asm/pgtable.h>
30#include <asm/mrst.h>
31
32#define MRST_SPI_TIMEOUT 0x200000
33#define MRST_REGBASE_SPI0 0xff128000
34#define MRST_REGBASE_SPI1 0xff128400
35#define MRST_CLK_SPI0_REG 0xff11d86c
36
37/* Bit fields in CTRLR0 */
38#define SPI_DFS_OFFSET 0
39
40#define SPI_FRF_OFFSET 4
41#define SPI_FRF_SPI 0x0
42#define SPI_FRF_SSP 0x1
43#define SPI_FRF_MICROWIRE 0x2
44#define SPI_FRF_RESV 0x3
45
46#define SPI_MODE_OFFSET 6
47#define SPI_SCPH_OFFSET 6
48#define SPI_SCOL_OFFSET 7
49#define SPI_TMOD_OFFSET 8
50#define SPI_TMOD_TR 0x0 /* xmit & recv */
51#define SPI_TMOD_TO 0x1 /* xmit only */
52#define SPI_TMOD_RO 0x2 /* recv only */
53#define SPI_TMOD_EPROMREAD 0x3 /* eeprom read mode */
54
55#define SPI_SLVOE_OFFSET 10
56#define SPI_SRL_OFFSET 11
57#define SPI_CFS_OFFSET 12
58
59/* Bit fields in SR, 7 bits */
60#define SR_MASK 0x7f /* cover 7 bits */
61#define SR_BUSY (1 << 0)
62#define SR_TF_NOT_FULL (1 << 1)
63#define SR_TF_EMPT (1 << 2)
64#define SR_RF_NOT_EMPT (1 << 3)
65#define SR_RF_FULL (1 << 4)
66#define SR_TX_ERR (1 << 5)
67#define SR_DCOL (1 << 6)
68
69struct dw_spi_reg {
70 u32 ctrl0;
71 u32 ctrl1;
72 u32 ssienr;
73 u32 mwcr;
74 u32 ser;
75 u32 baudr;
76 u32 txfltr;
77 u32 rxfltr;
78 u32 txflr;
79 u32 rxflr;
80 u32 sr;
81 u32 imr;
82 u32 isr;
83 u32 risr;
84 u32 txoicr;
85 u32 rxoicr;
86 u32 rxuicr;
87 u32 msticr;
88 u32 icr;
89 u32 dmacr;
90 u32 dmatdlr;
91 u32 dmardlr;
92 u32 idr;
93 u32 version;
94
95 /* Currently operates as 32 bits, though only the low 16 bits matter */
96 u32 dr;
97} __packed;
98
99#define dw_readl(dw, name) __raw_readl(&(dw)->name)
100#define dw_writel(dw, name, val) __raw_writel((val), &(dw)->name)
101
102/* Default use SPI0 register for mrst, we will detect Penwell and use SPI1 */
103static unsigned long mrst_spi_paddr = MRST_REGBASE_SPI0;
104
105static u32 *pclk_spi0;
106/* Always contains an accessable address, start with 0 */
107static struct dw_spi_reg *pspi;
108
109static struct kmsg_dumper dw_dumper;
110static int dumper_registered;
111
112static void dw_kmsg_dump(struct kmsg_dumper *dumper,
113 enum kmsg_dump_reason reason,
114 const char *s1, unsigned long l1,
115 const char *s2, unsigned long l2)
116{
117 int i;
118
119 /* When run to this, we'd better re-init the HW */
120 mrst_early_console_init();
121
122 for (i = 0; i < l1; i++)
123 early_mrst_console.write(&early_mrst_console, s1 + i, 1);
124 for (i = 0; i < l2; i++)
125 early_mrst_console.write(&early_mrst_console, s2 + i, 1);
126}
127
128/* Set the ratio rate to 115200, 8n1, IRQ disabled */
129static void max3110_write_config(void)
130{
131 u16 config;
132
133 config = 0xc001;
134 dw_writel(pspi, dr, config);
135}
136
137/* Translate char to a eligible word and send to max3110 */
138static void max3110_write_data(char c)
139{
140 u16 data;
141
142 data = 0x8000 | c;
143 dw_writel(pspi, dr, data);
144}
145
146void mrst_early_console_init(void)
147{
148 u32 ctrlr0 = 0;
149 u32 spi0_cdiv;
150 u32 freq; /* Freqency info only need be searched once */
151
152 /* Base clk is 100 MHz, the actual clk = 100M / (clk_divider + 1) */
153 pclk_spi0 = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
154 MRST_CLK_SPI0_REG);
155 spi0_cdiv = ((*pclk_spi0) & 0xe00) >> 9;
156 freq = 100000000 / (spi0_cdiv + 1);
157
158 if (mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL)
159 mrst_spi_paddr = MRST_REGBASE_SPI1;
160
161 pspi = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
162 mrst_spi_paddr);
163
164 /* Disable SPI controller */
165 dw_writel(pspi, ssienr, 0);
166
167 /* Set control param, 8 bits, transmit only mode */
168 ctrlr0 = dw_readl(pspi, ctrl0);
169
170 ctrlr0 &= 0xfcc0;
171 ctrlr0 |= 0xf | (SPI_FRF_SPI << SPI_FRF_OFFSET)
172 | (SPI_TMOD_TO << SPI_TMOD_OFFSET);
173 dw_writel(pspi, ctrl0, ctrlr0);
174
175 /*
176 * Change the spi0 clk to comply with 115200 bps, use 100000 to
177 * calculate the clk dividor to make the clock a little slower
178 * than real baud rate.
179 */
180 dw_writel(pspi, baudr, freq/100000);
181
182 /* Disable all INT for early phase */
183 dw_writel(pspi, imr, 0x0);
184
185 /* Set the cs to spi-uart */
186 dw_writel(pspi, ser, 0x2);
187
188 /* Enable the HW, the last step for HW init */
189 dw_writel(pspi, ssienr, 0x1);
190
191 /* Set the default configuration */
192 max3110_write_config();
193
194 /* Register the kmsg dumper */
195 if (!dumper_registered) {
196 dw_dumper.dump = dw_kmsg_dump;
197 kmsg_dump_register(&dw_dumper);
198 dumper_registered = 1;
199 }
200}
201
202/* Slave select should be called in the read/write function */
203static void early_mrst_spi_putc(char c)
204{
205 unsigned int timeout;
206 u32 sr;
207
208 timeout = MRST_SPI_TIMEOUT;
209 /* Early putc needs to make sure the TX FIFO is not full */
210 while (--timeout) {
211 sr = dw_readl(pspi, sr);
212 if (!(sr & SR_TF_NOT_FULL))
213 cpu_relax();
214 else
215 break;
216 }
217
218 if (!timeout)
219 pr_warning("MRST earlycon: timed out\n");
220 else
221 max3110_write_data(c);
222}
223
224/* Early SPI only uses polling mode */
225static void early_mrst_spi_write(struct console *con, const char *str, unsigned n)
226{
227 int i;
228
229 for (i = 0; i < n && *str; i++) {
230 if (*str == '\n')
231 early_mrst_spi_putc('\r');
232 early_mrst_spi_putc(*str);
233 str++;
234 }
235}
236
237struct console early_mrst_console = {
238 .name = "earlymrst",
239 .write = early_mrst_spi_write,
240 .flags = CON_PRINTBUFFER,
241 .index = -1,
242};
243
244/*
245 * Following is the early console based on Medfield HSU (High
246 * Speed UART) device.
247 */
248#define HSU_PORT2_PADDR 0xffa28180
249
250static void __iomem *phsu;
251
252void hsu_early_console_init(void)
253{
254 u8 lcr;
255
256 phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
257 HSU_PORT2_PADDR);
258
259 /* Disable FIFO */
260 writeb(0x0, phsu + UART_FCR);
261
262 /* Set to default 115200 bps, 8n1 */
263 lcr = readb(phsu + UART_LCR);
264 writeb((0x80 | lcr), phsu + UART_LCR);
265 writeb(0x18, phsu + UART_DLL);
266 writeb(lcr, phsu + UART_LCR);
267 writel(0x3600, phsu + UART_MUL*4);
268
269 writeb(0x8, phsu + UART_MCR);
270 writeb(0x7, phsu + UART_FCR);
271 writeb(0x3, phsu + UART_LCR);
272
273 /* Clear IRQ status */
274 readb(phsu + UART_LSR);
275 readb(phsu + UART_RX);
276 readb(phsu + UART_IIR);
277 readb(phsu + UART_MSR);
278
279 /* Enable FIFO */
280 writeb(0x7, phsu + UART_FCR);
281}
282
283#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
284
285static void early_hsu_putc(char ch)
286{
287 unsigned int timeout = 10000; /* 10ms */
288 u8 status;
289
290 while (--timeout) {
291 status = readb(phsu + UART_LSR);
292 if (status & BOTH_EMPTY)
293 break;
294 udelay(1);
295 }
296
297 /* Only write the char when there was no timeout */
298 if (timeout)
299 writeb(ch, phsu + UART_TX);
300}
301
302static void early_hsu_write(struct console *con, const char *str, unsigned n)
303{
304 int i;
305
306 for (i = 0; i < n && *str; i++) {
307 if (*str == '\n')
308 early_hsu_putc('\r');
309 early_hsu_putc(*str);
310 str++;
311 }
312}
313
314struct console early_hsu_console = {
315 .name = "earlyhsu",
316 .write = early_hsu_write,
317 .flags = CON_PRINTBUFFER,
318 .index = -1,
319};
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index c2fa9b8b497e..0fe27d7c6258 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -30,6 +30,7 @@
30#include <linux/init.h> 30#include <linux/init.h>
31#include <linux/efi.h> 31#include <linux/efi.h>
32#include <linux/bootmem.h> 32#include <linux/bootmem.h>
33#include <linux/memblock.h>
33#include <linux/spinlock.h> 34#include <linux/spinlock.h>
34#include <linux/uaccess.h> 35#include <linux/uaccess.h>
35#include <linux/time.h> 36#include <linux/time.h>
@@ -275,7 +276,7 @@ static void __init do_add_efi_memmap(void)
275 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 276 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
276} 277}
277 278
278void __init efi_reserve_early(void) 279void __init efi_memblock_x86_reserve_range(void)
279{ 280{
280 unsigned long pmap; 281 unsigned long pmap;
281 282
@@ -290,7 +291,7 @@ void __init efi_reserve_early(void)
290 boot_params.efi_info.efi_memdesc_size; 291 boot_params.efi_info.efi_memdesc_size;
291 memmap.desc_version = boot_params.efi_info.efi_memdesc_version; 292 memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
292 memmap.desc_size = boot_params.efi_info.efi_memdesc_size; 293 memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
293 reserve_early(pmap, pmap + memmap.nr_map * memmap.desc_size, 294 memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size,
294 "EFI memmap"); 295 "EFI memmap");
295} 296}
296 297
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 227d00920d2f..9fb188d7bc76 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -115,8 +115,7 @@
115 115
116 /* unfortunately push/pop can't be no-op */ 116 /* unfortunately push/pop can't be no-op */
117.macro PUSH_GS 117.macro PUSH_GS
118 pushl $0 118 pushl_cfi $0
119 CFI_ADJUST_CFA_OFFSET 4
120.endm 119.endm
121.macro POP_GS pop=0 120.macro POP_GS pop=0
122 addl $(4 + \pop), %esp 121 addl $(4 + \pop), %esp
@@ -140,14 +139,12 @@
140#else /* CONFIG_X86_32_LAZY_GS */ 139#else /* CONFIG_X86_32_LAZY_GS */
141 140
142.macro PUSH_GS 141.macro PUSH_GS
143 pushl %gs 142 pushl_cfi %gs
144 CFI_ADJUST_CFA_OFFSET 4
145 /*CFI_REL_OFFSET gs, 0*/ 143 /*CFI_REL_OFFSET gs, 0*/
146.endm 144.endm
147 145
148.macro POP_GS pop=0 146.macro POP_GS pop=0
14998: popl %gs 14798: popl_cfi %gs
150 CFI_ADJUST_CFA_OFFSET -4
151 /*CFI_RESTORE gs*/ 148 /*CFI_RESTORE gs*/
152 .if \pop <> 0 149 .if \pop <> 0
153 add $\pop, %esp 150 add $\pop, %esp
@@ -195,35 +192,25 @@
195.macro SAVE_ALL 192.macro SAVE_ALL
196 cld 193 cld
197 PUSH_GS 194 PUSH_GS
198 pushl %fs 195 pushl_cfi %fs
199 CFI_ADJUST_CFA_OFFSET 4
200 /*CFI_REL_OFFSET fs, 0;*/ 196 /*CFI_REL_OFFSET fs, 0;*/
201 pushl %es 197 pushl_cfi %es
202 CFI_ADJUST_CFA_OFFSET 4
203 /*CFI_REL_OFFSET es, 0;*/ 198 /*CFI_REL_OFFSET es, 0;*/
204 pushl %ds 199 pushl_cfi %ds
205 CFI_ADJUST_CFA_OFFSET 4
206 /*CFI_REL_OFFSET ds, 0;*/ 200 /*CFI_REL_OFFSET ds, 0;*/
207 pushl %eax 201 pushl_cfi %eax
208 CFI_ADJUST_CFA_OFFSET 4
209 CFI_REL_OFFSET eax, 0 202 CFI_REL_OFFSET eax, 0
210 pushl %ebp 203 pushl_cfi %ebp
211 CFI_ADJUST_CFA_OFFSET 4
212 CFI_REL_OFFSET ebp, 0 204 CFI_REL_OFFSET ebp, 0
213 pushl %edi 205 pushl_cfi %edi
214 CFI_ADJUST_CFA_OFFSET 4
215 CFI_REL_OFFSET edi, 0 206 CFI_REL_OFFSET edi, 0
216 pushl %esi 207 pushl_cfi %esi
217 CFI_ADJUST_CFA_OFFSET 4
218 CFI_REL_OFFSET esi, 0 208 CFI_REL_OFFSET esi, 0
219 pushl %edx 209 pushl_cfi %edx
220 CFI_ADJUST_CFA_OFFSET 4
221 CFI_REL_OFFSET edx, 0 210 CFI_REL_OFFSET edx, 0
222 pushl %ecx 211 pushl_cfi %ecx
223 CFI_ADJUST_CFA_OFFSET 4
224 CFI_REL_OFFSET ecx, 0 212 CFI_REL_OFFSET ecx, 0
225 pushl %ebx 213 pushl_cfi %ebx
226 CFI_ADJUST_CFA_OFFSET 4
227 CFI_REL_OFFSET ebx, 0 214 CFI_REL_OFFSET ebx, 0
228 movl $(__USER_DS), %edx 215 movl $(__USER_DS), %edx
229 movl %edx, %ds 216 movl %edx, %ds
@@ -234,39 +221,29 @@
234.endm 221.endm
235 222
236.macro RESTORE_INT_REGS 223.macro RESTORE_INT_REGS
237 popl %ebx 224 popl_cfi %ebx
238 CFI_ADJUST_CFA_OFFSET -4
239 CFI_RESTORE ebx 225 CFI_RESTORE ebx
240 popl %ecx 226 popl_cfi %ecx
241 CFI_ADJUST_CFA_OFFSET -4
242 CFI_RESTORE ecx 227 CFI_RESTORE ecx
243 popl %edx 228 popl_cfi %edx
244 CFI_ADJUST_CFA_OFFSET -4
245 CFI_RESTORE edx 229 CFI_RESTORE edx
246 popl %esi 230 popl_cfi %esi
247 CFI_ADJUST_CFA_OFFSET -4
248 CFI_RESTORE esi 231 CFI_RESTORE esi
249 popl %edi 232 popl_cfi %edi
250 CFI_ADJUST_CFA_OFFSET -4
251 CFI_RESTORE edi 233 CFI_RESTORE edi
252 popl %ebp 234 popl_cfi %ebp
253 CFI_ADJUST_CFA_OFFSET -4
254 CFI_RESTORE ebp 235 CFI_RESTORE ebp
255 popl %eax 236 popl_cfi %eax
256 CFI_ADJUST_CFA_OFFSET -4
257 CFI_RESTORE eax 237 CFI_RESTORE eax
258.endm 238.endm
259 239
260.macro RESTORE_REGS pop=0 240.macro RESTORE_REGS pop=0
261 RESTORE_INT_REGS 241 RESTORE_INT_REGS
2621: popl %ds 2421: popl_cfi %ds
263 CFI_ADJUST_CFA_OFFSET -4
264 /*CFI_RESTORE ds;*/ 243 /*CFI_RESTORE ds;*/
2652: popl %es 2442: popl_cfi %es
266 CFI_ADJUST_CFA_OFFSET -4
267 /*CFI_RESTORE es;*/ 245 /*CFI_RESTORE es;*/
2683: popl %fs 2463: popl_cfi %fs
269 CFI_ADJUST_CFA_OFFSET -4
270 /*CFI_RESTORE fs;*/ 247 /*CFI_RESTORE fs;*/
271 POP_GS \pop 248 POP_GS \pop
272.pushsection .fixup, "ax" 249.pushsection .fixup, "ax"
@@ -320,16 +297,12 @@
320 297
321ENTRY(ret_from_fork) 298ENTRY(ret_from_fork)
322 CFI_STARTPROC 299 CFI_STARTPROC
323 pushl %eax 300 pushl_cfi %eax
324 CFI_ADJUST_CFA_OFFSET 4
325 call schedule_tail 301 call schedule_tail
326 GET_THREAD_INFO(%ebp) 302 GET_THREAD_INFO(%ebp)
327 popl %eax 303 popl_cfi %eax
328 CFI_ADJUST_CFA_OFFSET -4 304 pushl_cfi $0x0202 # Reset kernel eflags
329 pushl $0x0202 # Reset kernel eflags 305 popfl_cfi
330 CFI_ADJUST_CFA_OFFSET 4
331 popfl
332 CFI_ADJUST_CFA_OFFSET -4
333 jmp syscall_exit 306 jmp syscall_exit
334 CFI_ENDPROC 307 CFI_ENDPROC
335END(ret_from_fork) 308END(ret_from_fork)
@@ -409,29 +382,23 @@ sysenter_past_esp:
409 * enough kernel state to call TRACE_IRQS_OFF can be called - but 382 * enough kernel state to call TRACE_IRQS_OFF can be called - but
410 * we immediately enable interrupts at that point anyway. 383 * we immediately enable interrupts at that point anyway.
411 */ 384 */
412 pushl $(__USER_DS) 385 pushl_cfi $(__USER_DS)
413 CFI_ADJUST_CFA_OFFSET 4
414 /*CFI_REL_OFFSET ss, 0*/ 386 /*CFI_REL_OFFSET ss, 0*/
415 pushl %ebp 387 pushl_cfi %ebp
416 CFI_ADJUST_CFA_OFFSET 4
417 CFI_REL_OFFSET esp, 0 388 CFI_REL_OFFSET esp, 0
418 pushfl 389 pushfl_cfi
419 orl $X86_EFLAGS_IF, (%esp) 390 orl $X86_EFLAGS_IF, (%esp)
420 CFI_ADJUST_CFA_OFFSET 4 391 pushl_cfi $(__USER_CS)
421 pushl $(__USER_CS)
422 CFI_ADJUST_CFA_OFFSET 4
423 /*CFI_REL_OFFSET cs, 0*/ 392 /*CFI_REL_OFFSET cs, 0*/
424 /* 393 /*
425 * Push current_thread_info()->sysenter_return to the stack. 394 * Push current_thread_info()->sysenter_return to the stack.
426 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words 395 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
427 * pushed above; +8 corresponds to copy_thread's esp0 setting. 396 * pushed above; +8 corresponds to copy_thread's esp0 setting.
428 */ 397 */
429 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) 398 pushl_cfi (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
430 CFI_ADJUST_CFA_OFFSET 4
431 CFI_REL_OFFSET eip, 0 399 CFI_REL_OFFSET eip, 0
432 400
433 pushl %eax 401 pushl_cfi %eax
434 CFI_ADJUST_CFA_OFFSET 4
435 SAVE_ALL 402 SAVE_ALL
436 ENABLE_INTERRUPTS(CLBR_NONE) 403 ENABLE_INTERRUPTS(CLBR_NONE)
437 404
@@ -486,8 +453,7 @@ sysenter_audit:
486 movl %eax,%edx /* 2nd arg: syscall number */ 453 movl %eax,%edx /* 2nd arg: syscall number */
487 movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ 454 movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
488 call audit_syscall_entry 455 call audit_syscall_entry
489 pushl %ebx 456 pushl_cfi %ebx
490 CFI_ADJUST_CFA_OFFSET 4
491 movl PT_EAX(%esp),%eax /* reload syscall number */ 457 movl PT_EAX(%esp),%eax /* reload syscall number */
492 jmp sysenter_do_call 458 jmp sysenter_do_call
493 459
@@ -529,8 +495,7 @@ ENDPROC(ia32_sysenter_target)
529 # system call handler stub 495 # system call handler stub
530ENTRY(system_call) 496ENTRY(system_call)
531 RING0_INT_FRAME # can't unwind into user space anyway 497 RING0_INT_FRAME # can't unwind into user space anyway
532 pushl %eax # save orig_eax 498 pushl_cfi %eax # save orig_eax
533 CFI_ADJUST_CFA_OFFSET 4
534 SAVE_ALL 499 SAVE_ALL
535 GET_THREAD_INFO(%ebp) 500 GET_THREAD_INFO(%ebp)
536 # system call tracing in operation / emulation 501 # system call tracing in operation / emulation
@@ -566,7 +531,6 @@ restore_all_notrace:
566 je ldt_ss # returning to user-space with LDT SS 531 je ldt_ss # returning to user-space with LDT SS
567restore_nocheck: 532restore_nocheck:
568 RESTORE_REGS 4 # skip orig_eax/error_code 533 RESTORE_REGS 4 # skip orig_eax/error_code
569 CFI_ADJUST_CFA_OFFSET -4
570irq_return: 534irq_return:
571 INTERRUPT_RETURN 535 INTERRUPT_RETURN
572.section .fixup,"ax" 536.section .fixup,"ax"
@@ -619,10 +583,8 @@ ldt_ss:
619 shr $16, %edx 583 shr $16, %edx
620 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ 584 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
621 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ 585 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
622 pushl $__ESPFIX_SS 586 pushl_cfi $__ESPFIX_SS
623 CFI_ADJUST_CFA_OFFSET 4 587 pushl_cfi %eax /* new kernel esp */
624 push %eax /* new kernel esp */
625 CFI_ADJUST_CFA_OFFSET 4
626 /* Disable interrupts, but do not irqtrace this section: we 588 /* Disable interrupts, but do not irqtrace this section: we
627 * will soon execute iret and the tracer was already set to 589 * will soon execute iret and the tracer was already set to
628 * the irqstate after the iret */ 590 * the irqstate after the iret */
@@ -666,11 +628,9 @@ work_notifysig: # deal with pending signals and
666 628
667 ALIGN 629 ALIGN
668work_notifysig_v86: 630work_notifysig_v86:
669 pushl %ecx # save ti_flags for do_notify_resume 631 pushl_cfi %ecx # save ti_flags for do_notify_resume
670 CFI_ADJUST_CFA_OFFSET 4
671 call save_v86_state # %eax contains pt_regs pointer 632 call save_v86_state # %eax contains pt_regs pointer
672 popl %ecx 633 popl_cfi %ecx
673 CFI_ADJUST_CFA_OFFSET -4
674 movl %eax, %esp 634 movl %eax, %esp
675#else 635#else
676 movl %esp, %eax 636 movl %esp, %eax
@@ -750,14 +710,18 @@ ptregs_##name: \
750#define PTREGSCALL3(name) \ 710#define PTREGSCALL3(name) \
751 ALIGN; \ 711 ALIGN; \
752ptregs_##name: \ 712ptregs_##name: \
713 CFI_STARTPROC; \
753 leal 4(%esp),%eax; \ 714 leal 4(%esp),%eax; \
754 pushl %eax; \ 715 pushl_cfi %eax; \
755 movl PT_EDX(%eax),%ecx; \ 716 movl PT_EDX(%eax),%ecx; \
756 movl PT_ECX(%eax),%edx; \ 717 movl PT_ECX(%eax),%edx; \
757 movl PT_EBX(%eax),%eax; \ 718 movl PT_EBX(%eax),%eax; \
758 call sys_##name; \ 719 call sys_##name; \
759 addl $4,%esp; \ 720 addl $4,%esp; \
760 ret 721 CFI_ADJUST_CFA_OFFSET -4; \
722 ret; \
723 CFI_ENDPROC; \
724ENDPROC(ptregs_##name)
761 725
762PTREGSCALL1(iopl) 726PTREGSCALL1(iopl)
763PTREGSCALL0(fork) 727PTREGSCALL0(fork)
@@ -772,15 +736,19 @@ PTREGSCALL1(vm86old)
772/* Clone is an oddball. The 4th arg is in %edi */ 736/* Clone is an oddball. The 4th arg is in %edi */
773 ALIGN; 737 ALIGN;
774ptregs_clone: 738ptregs_clone:
739 CFI_STARTPROC
775 leal 4(%esp),%eax 740 leal 4(%esp),%eax
776 pushl %eax 741 pushl_cfi %eax
777 pushl PT_EDI(%eax) 742 pushl_cfi PT_EDI(%eax)
778 movl PT_EDX(%eax),%ecx 743 movl PT_EDX(%eax),%ecx
779 movl PT_ECX(%eax),%edx 744 movl PT_ECX(%eax),%edx
780 movl PT_EBX(%eax),%eax 745 movl PT_EBX(%eax),%eax
781 call sys_clone 746 call sys_clone
782 addl $8,%esp 747 addl $8,%esp
748 CFI_ADJUST_CFA_OFFSET -8
783 ret 749 ret
750 CFI_ENDPROC
751ENDPROC(ptregs_clone)
784 752
785.macro FIXUP_ESPFIX_STACK 753.macro FIXUP_ESPFIX_STACK
786/* 754/*
@@ -795,10 +763,8 @@ ptregs_clone:
795 mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ 763 mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
796 shl $16, %eax 764 shl $16, %eax
797 addl %esp, %eax /* the adjusted stack pointer */ 765 addl %esp, %eax /* the adjusted stack pointer */
798 pushl $__KERNEL_DS 766 pushl_cfi $__KERNEL_DS
799 CFI_ADJUST_CFA_OFFSET 4 767 pushl_cfi %eax
800 pushl %eax
801 CFI_ADJUST_CFA_OFFSET 4
802 lss (%esp), %esp /* switch to the normal stack segment */ 768 lss (%esp), %esp /* switch to the normal stack segment */
803 CFI_ADJUST_CFA_OFFSET -8 769 CFI_ADJUST_CFA_OFFSET -8
804.endm 770.endm
@@ -835,8 +801,7 @@ vector=FIRST_EXTERNAL_VECTOR
835 .if vector <> FIRST_EXTERNAL_VECTOR 801 .if vector <> FIRST_EXTERNAL_VECTOR
836 CFI_ADJUST_CFA_OFFSET -4 802 CFI_ADJUST_CFA_OFFSET -4
837 .endif 803 .endif
8381: pushl $(~vector+0x80) /* Note: always in signed byte range */ 8041: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */
839 CFI_ADJUST_CFA_OFFSET 4
840 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 805 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
841 jmp 2f 806 jmp 2f
842 .endif 807 .endif
@@ -876,8 +841,7 @@ ENDPROC(common_interrupt)
876#define BUILD_INTERRUPT3(name, nr, fn) \ 841#define BUILD_INTERRUPT3(name, nr, fn) \
877ENTRY(name) \ 842ENTRY(name) \
878 RING0_INT_FRAME; \ 843 RING0_INT_FRAME; \
879 pushl $~(nr); \ 844 pushl_cfi $~(nr); \
880 CFI_ADJUST_CFA_OFFSET 4; \
881 SAVE_ALL; \ 845 SAVE_ALL; \
882 TRACE_IRQS_OFF \ 846 TRACE_IRQS_OFF \
883 movl %esp,%eax; \ 847 movl %esp,%eax; \
@@ -893,21 +857,18 @@ ENDPROC(name)
893 857
894ENTRY(coprocessor_error) 858ENTRY(coprocessor_error)
895 RING0_INT_FRAME 859 RING0_INT_FRAME
896 pushl $0 860 pushl_cfi $0
897 CFI_ADJUST_CFA_OFFSET 4 861 pushl_cfi $do_coprocessor_error
898 pushl $do_coprocessor_error
899 CFI_ADJUST_CFA_OFFSET 4
900 jmp error_code 862 jmp error_code
901 CFI_ENDPROC 863 CFI_ENDPROC
902END(coprocessor_error) 864END(coprocessor_error)
903 865
904ENTRY(simd_coprocessor_error) 866ENTRY(simd_coprocessor_error)
905 RING0_INT_FRAME 867 RING0_INT_FRAME
906 pushl $0 868 pushl_cfi $0
907 CFI_ADJUST_CFA_OFFSET 4
908#ifdef CONFIG_X86_INVD_BUG 869#ifdef CONFIG_X86_INVD_BUG
909 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ 870 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
910661: pushl $do_general_protection 871661: pushl_cfi $do_general_protection
911662: 872662:
912.section .altinstructions,"a" 873.section .altinstructions,"a"
913 .balign 4 874 .balign 4
@@ -922,19 +883,16 @@ ENTRY(simd_coprocessor_error)
922664: 883664:
923.previous 884.previous
924#else 885#else
925 pushl $do_simd_coprocessor_error 886 pushl_cfi $do_simd_coprocessor_error
926#endif 887#endif
927 CFI_ADJUST_CFA_OFFSET 4
928 jmp error_code 888 jmp error_code
929 CFI_ENDPROC 889 CFI_ENDPROC
930END(simd_coprocessor_error) 890END(simd_coprocessor_error)
931 891
932ENTRY(device_not_available) 892ENTRY(device_not_available)
933 RING0_INT_FRAME 893 RING0_INT_FRAME
934 pushl $-1 # mark this as an int 894 pushl_cfi $-1 # mark this as an int
935 CFI_ADJUST_CFA_OFFSET 4 895 pushl_cfi $do_device_not_available
936 pushl $do_device_not_available
937 CFI_ADJUST_CFA_OFFSET 4
938 jmp error_code 896 jmp error_code
939 CFI_ENDPROC 897 CFI_ENDPROC
940END(device_not_available) 898END(device_not_available)
@@ -956,82 +914,68 @@ END(native_irq_enable_sysexit)
956 914
957ENTRY(overflow) 915ENTRY(overflow)
958 RING0_INT_FRAME 916 RING0_INT_FRAME
959 pushl $0 917 pushl_cfi $0
960 CFI_ADJUST_CFA_OFFSET 4 918 pushl_cfi $do_overflow
961 pushl $do_overflow
962 CFI_ADJUST_CFA_OFFSET 4
963 jmp error_code 919 jmp error_code
964 CFI_ENDPROC 920 CFI_ENDPROC
965END(overflow) 921END(overflow)
966 922
967ENTRY(bounds) 923ENTRY(bounds)
968 RING0_INT_FRAME 924 RING0_INT_FRAME
969 pushl $0 925 pushl_cfi $0
970 CFI_ADJUST_CFA_OFFSET 4 926 pushl_cfi $do_bounds
971 pushl $do_bounds
972 CFI_ADJUST_CFA_OFFSET 4
973 jmp error_code 927 jmp error_code
974 CFI_ENDPROC 928 CFI_ENDPROC
975END(bounds) 929END(bounds)
976 930
977ENTRY(invalid_op) 931ENTRY(invalid_op)
978 RING0_INT_FRAME 932 RING0_INT_FRAME
979 pushl $0 933 pushl_cfi $0
980 CFI_ADJUST_CFA_OFFSET 4 934 pushl_cfi $do_invalid_op
981 pushl $do_invalid_op
982 CFI_ADJUST_CFA_OFFSET 4
983 jmp error_code 935 jmp error_code
984 CFI_ENDPROC 936 CFI_ENDPROC
985END(invalid_op) 937END(invalid_op)
986 938
987ENTRY(coprocessor_segment_overrun) 939ENTRY(coprocessor_segment_overrun)
988 RING0_INT_FRAME 940 RING0_INT_FRAME
989 pushl $0 941 pushl_cfi $0
990 CFI_ADJUST_CFA_OFFSET 4 942 pushl_cfi $do_coprocessor_segment_overrun
991 pushl $do_coprocessor_segment_overrun
992 CFI_ADJUST_CFA_OFFSET 4
993 jmp error_code 943 jmp error_code
994 CFI_ENDPROC 944 CFI_ENDPROC
995END(coprocessor_segment_overrun) 945END(coprocessor_segment_overrun)
996 946
997ENTRY(invalid_TSS) 947ENTRY(invalid_TSS)
998 RING0_EC_FRAME 948 RING0_EC_FRAME
999 pushl $do_invalid_TSS 949 pushl_cfi $do_invalid_TSS
1000 CFI_ADJUST_CFA_OFFSET 4
1001 jmp error_code 950 jmp error_code
1002 CFI_ENDPROC 951 CFI_ENDPROC
1003END(invalid_TSS) 952END(invalid_TSS)
1004 953
1005ENTRY(segment_not_present) 954ENTRY(segment_not_present)
1006 RING0_EC_FRAME 955 RING0_EC_FRAME
1007 pushl $do_segment_not_present 956 pushl_cfi $do_segment_not_present
1008 CFI_ADJUST_CFA_OFFSET 4
1009 jmp error_code 957 jmp error_code
1010 CFI_ENDPROC 958 CFI_ENDPROC
1011END(segment_not_present) 959END(segment_not_present)
1012 960
1013ENTRY(stack_segment) 961ENTRY(stack_segment)
1014 RING0_EC_FRAME 962 RING0_EC_FRAME
1015 pushl $do_stack_segment 963 pushl_cfi $do_stack_segment
1016 CFI_ADJUST_CFA_OFFSET 4
1017 jmp error_code 964 jmp error_code
1018 CFI_ENDPROC 965 CFI_ENDPROC
1019END(stack_segment) 966END(stack_segment)
1020 967
1021ENTRY(alignment_check) 968ENTRY(alignment_check)
1022 RING0_EC_FRAME 969 RING0_EC_FRAME
1023 pushl $do_alignment_check 970 pushl_cfi $do_alignment_check
1024 CFI_ADJUST_CFA_OFFSET 4
1025 jmp error_code 971 jmp error_code
1026 CFI_ENDPROC 972 CFI_ENDPROC
1027END(alignment_check) 973END(alignment_check)
1028 974
1029ENTRY(divide_error) 975ENTRY(divide_error)
1030 RING0_INT_FRAME 976 RING0_INT_FRAME
1031 pushl $0 # no error code 977 pushl_cfi $0 # no error code
1032 CFI_ADJUST_CFA_OFFSET 4 978 pushl_cfi $do_divide_error
1033 pushl $do_divide_error
1034 CFI_ADJUST_CFA_OFFSET 4
1035 jmp error_code 979 jmp error_code
1036 CFI_ENDPROC 980 CFI_ENDPROC
1037END(divide_error) 981END(divide_error)
@@ -1039,10 +983,8 @@ END(divide_error)
1039#ifdef CONFIG_X86_MCE 983#ifdef CONFIG_X86_MCE
1040ENTRY(machine_check) 984ENTRY(machine_check)
1041 RING0_INT_FRAME 985 RING0_INT_FRAME
1042 pushl $0 986 pushl_cfi $0
1043 CFI_ADJUST_CFA_OFFSET 4 987 pushl_cfi machine_check_vector
1044 pushl machine_check_vector
1045 CFI_ADJUST_CFA_OFFSET 4
1046 jmp error_code 988 jmp error_code
1047 CFI_ENDPROC 989 CFI_ENDPROC
1048END(machine_check) 990END(machine_check)
@@ -1050,10 +992,8 @@ END(machine_check)
1050 992
1051ENTRY(spurious_interrupt_bug) 993ENTRY(spurious_interrupt_bug)
1052 RING0_INT_FRAME 994 RING0_INT_FRAME
1053 pushl $0 995 pushl_cfi $0
1054 CFI_ADJUST_CFA_OFFSET 4 996 pushl_cfi $do_spurious_interrupt_bug
1055 pushl $do_spurious_interrupt_bug
1056 CFI_ADJUST_CFA_OFFSET 4
1057 jmp error_code 997 jmp error_code
1058 CFI_ENDPROC 998 CFI_ENDPROC
1059END(spurious_interrupt_bug) 999END(spurious_interrupt_bug)
@@ -1084,8 +1024,7 @@ ENTRY(xen_sysenter_target)
1084 1024
1085ENTRY(xen_hypervisor_callback) 1025ENTRY(xen_hypervisor_callback)
1086 CFI_STARTPROC 1026 CFI_STARTPROC
1087 pushl $0 1027 pushl_cfi $0
1088 CFI_ADJUST_CFA_OFFSET 4
1089 SAVE_ALL 1028 SAVE_ALL
1090 TRACE_IRQS_OFF 1029 TRACE_IRQS_OFF
1091 1030
@@ -1121,23 +1060,20 @@ ENDPROC(xen_hypervisor_callback)
1121# We distinguish between categories by maintaining a status value in EAX. 1060# We distinguish between categories by maintaining a status value in EAX.
1122ENTRY(xen_failsafe_callback) 1061ENTRY(xen_failsafe_callback)
1123 CFI_STARTPROC 1062 CFI_STARTPROC
1124 pushl %eax 1063 pushl_cfi %eax
1125 CFI_ADJUST_CFA_OFFSET 4
1126 movl $1,%eax 1064 movl $1,%eax
11271: mov 4(%esp),%ds 10651: mov 4(%esp),%ds
11282: mov 8(%esp),%es 10662: mov 8(%esp),%es
11293: mov 12(%esp),%fs 10673: mov 12(%esp),%fs
11304: mov 16(%esp),%gs 10684: mov 16(%esp),%gs
1131 testl %eax,%eax 1069 testl %eax,%eax
1132 popl %eax 1070 popl_cfi %eax
1133 CFI_ADJUST_CFA_OFFSET -4
1134 lea 16(%esp),%esp 1071 lea 16(%esp),%esp
1135 CFI_ADJUST_CFA_OFFSET -16 1072 CFI_ADJUST_CFA_OFFSET -16
1136 jz 5f 1073 jz 5f
1137 addl $16,%esp 1074 addl $16,%esp
1138 jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) 1075 jmp iret_exc # EAX != 0 => Category 2 (Bad IRET)
11395: pushl $0 # EAX == 0 => Category 1 (Bad segment) 10765: pushl_cfi $0 # EAX == 0 => Category 1 (Bad segment)
1140 CFI_ADJUST_CFA_OFFSET 4
1141 SAVE_ALL 1077 SAVE_ALL
1142 jmp ret_from_exception 1078 jmp ret_from_exception
1143 CFI_ENDPROC 1079 CFI_ENDPROC
@@ -1287,40 +1223,29 @@ syscall_table_size=(.-sys_call_table)
1287 1223
1288ENTRY(page_fault) 1224ENTRY(page_fault)
1289 RING0_EC_FRAME 1225 RING0_EC_FRAME
1290 pushl $do_page_fault 1226 pushl_cfi $do_page_fault
1291 CFI_ADJUST_CFA_OFFSET 4
1292 ALIGN 1227 ALIGN
1293error_code: 1228error_code:
1294 /* the function address is in %gs's slot on the stack */ 1229 /* the function address is in %gs's slot on the stack */
1295 pushl %fs 1230 pushl_cfi %fs
1296 CFI_ADJUST_CFA_OFFSET 4
1297 /*CFI_REL_OFFSET fs, 0*/ 1231 /*CFI_REL_OFFSET fs, 0*/
1298 pushl %es 1232 pushl_cfi %es
1299 CFI_ADJUST_CFA_OFFSET 4
1300 /*CFI_REL_OFFSET es, 0*/ 1233 /*CFI_REL_OFFSET es, 0*/
1301 pushl %ds 1234 pushl_cfi %ds
1302 CFI_ADJUST_CFA_OFFSET 4
1303 /*CFI_REL_OFFSET ds, 0*/ 1235 /*CFI_REL_OFFSET ds, 0*/
1304 pushl %eax 1236 pushl_cfi %eax
1305 CFI_ADJUST_CFA_OFFSET 4
1306 CFI_REL_OFFSET eax, 0 1237 CFI_REL_OFFSET eax, 0
1307 pushl %ebp 1238 pushl_cfi %ebp
1308 CFI_ADJUST_CFA_OFFSET 4
1309 CFI_REL_OFFSET ebp, 0 1239 CFI_REL_OFFSET ebp, 0
1310 pushl %edi 1240 pushl_cfi %edi
1311 CFI_ADJUST_CFA_OFFSET 4
1312 CFI_REL_OFFSET edi, 0 1241 CFI_REL_OFFSET edi, 0
1313 pushl %esi 1242 pushl_cfi %esi
1314 CFI_ADJUST_CFA_OFFSET 4
1315 CFI_REL_OFFSET esi, 0 1243 CFI_REL_OFFSET esi, 0
1316 pushl %edx 1244 pushl_cfi %edx
1317 CFI_ADJUST_CFA_OFFSET 4
1318 CFI_REL_OFFSET edx, 0 1245 CFI_REL_OFFSET edx, 0
1319 pushl %ecx 1246 pushl_cfi %ecx
1320 CFI_ADJUST_CFA_OFFSET 4
1321 CFI_REL_OFFSET ecx, 0 1247 CFI_REL_OFFSET ecx, 0
1322 pushl %ebx 1248 pushl_cfi %ebx
1323 CFI_ADJUST_CFA_OFFSET 4
1324 CFI_REL_OFFSET ebx, 0 1249 CFI_REL_OFFSET ebx, 0
1325 cld 1250 cld
1326 movl $(__KERNEL_PERCPU), %ecx 1251 movl $(__KERNEL_PERCPU), %ecx
@@ -1362,12 +1287,9 @@ END(page_fault)
1362 movl TSS_sysenter_sp0 + \offset(%esp), %esp 1287 movl TSS_sysenter_sp0 + \offset(%esp), %esp
1363 CFI_DEF_CFA esp, 0 1288 CFI_DEF_CFA esp, 0
1364 CFI_UNDEFINED eip 1289 CFI_UNDEFINED eip
1365 pushfl 1290 pushfl_cfi
1366 CFI_ADJUST_CFA_OFFSET 4 1291 pushl_cfi $__KERNEL_CS
1367 pushl $__KERNEL_CS 1292 pushl_cfi $sysenter_past_esp
1368 CFI_ADJUST_CFA_OFFSET 4
1369 pushl $sysenter_past_esp
1370 CFI_ADJUST_CFA_OFFSET 4
1371 CFI_REL_OFFSET eip, 0 1293 CFI_REL_OFFSET eip, 0
1372.endm 1294.endm
1373 1295
@@ -1377,8 +1299,7 @@ ENTRY(debug)
1377 jne debug_stack_correct 1299 jne debug_stack_correct
1378 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn 1300 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
1379debug_stack_correct: 1301debug_stack_correct:
1380 pushl $-1 # mark this as an int 1302 pushl_cfi $-1 # mark this as an int
1381 CFI_ADJUST_CFA_OFFSET 4
1382 SAVE_ALL 1303 SAVE_ALL
1383 TRACE_IRQS_OFF 1304 TRACE_IRQS_OFF
1384 xorl %edx,%edx # error code 0 1305 xorl %edx,%edx # error code 0
@@ -1398,32 +1319,27 @@ END(debug)
1398 */ 1319 */
1399ENTRY(nmi) 1320ENTRY(nmi)
1400 RING0_INT_FRAME 1321 RING0_INT_FRAME
1401 pushl %eax 1322 pushl_cfi %eax
1402 CFI_ADJUST_CFA_OFFSET 4
1403 movl %ss, %eax 1323 movl %ss, %eax
1404 cmpw $__ESPFIX_SS, %ax 1324 cmpw $__ESPFIX_SS, %ax
1405 popl %eax 1325 popl_cfi %eax
1406 CFI_ADJUST_CFA_OFFSET -4
1407 je nmi_espfix_stack 1326 je nmi_espfix_stack
1408 cmpl $ia32_sysenter_target,(%esp) 1327 cmpl $ia32_sysenter_target,(%esp)
1409 je nmi_stack_fixup 1328 je nmi_stack_fixup
1410 pushl %eax 1329 pushl_cfi %eax
1411 CFI_ADJUST_CFA_OFFSET 4
1412 movl %esp,%eax 1330 movl %esp,%eax
1413 /* Do not access memory above the end of our stack page, 1331 /* Do not access memory above the end of our stack page,
1414 * it might not exist. 1332 * it might not exist.
1415 */ 1333 */
1416 andl $(THREAD_SIZE-1),%eax 1334 andl $(THREAD_SIZE-1),%eax
1417 cmpl $(THREAD_SIZE-20),%eax 1335 cmpl $(THREAD_SIZE-20),%eax
1418 popl %eax 1336 popl_cfi %eax
1419 CFI_ADJUST_CFA_OFFSET -4
1420 jae nmi_stack_correct 1337 jae nmi_stack_correct
1421 cmpl $ia32_sysenter_target,12(%esp) 1338 cmpl $ia32_sysenter_target,12(%esp)
1422 je nmi_debug_stack_check 1339 je nmi_debug_stack_check
1423nmi_stack_correct: 1340nmi_stack_correct:
1424 /* We have a RING0_INT_FRAME here */ 1341 /* We have a RING0_INT_FRAME here */
1425 pushl %eax 1342 pushl_cfi %eax
1426 CFI_ADJUST_CFA_OFFSET 4
1427 SAVE_ALL 1343 SAVE_ALL
1428 xorl %edx,%edx # zero error code 1344 xorl %edx,%edx # zero error code
1429 movl %esp,%eax # pt_regs pointer 1345 movl %esp,%eax # pt_regs pointer
@@ -1452,18 +1368,14 @@ nmi_espfix_stack:
1452 * 1368 *
1453 * create the pointer to lss back 1369 * create the pointer to lss back
1454 */ 1370 */
1455 pushl %ss 1371 pushl_cfi %ss
1456 CFI_ADJUST_CFA_OFFSET 4 1372 pushl_cfi %esp
1457 pushl %esp
1458 CFI_ADJUST_CFA_OFFSET 4
1459 addl $4, (%esp) 1373 addl $4, (%esp)
1460 /* copy the iret frame of 12 bytes */ 1374 /* copy the iret frame of 12 bytes */
1461 .rept 3 1375 .rept 3
1462 pushl 16(%esp) 1376 pushl_cfi 16(%esp)
1463 CFI_ADJUST_CFA_OFFSET 4
1464 .endr 1377 .endr
1465 pushl %eax 1378 pushl_cfi %eax
1466 CFI_ADJUST_CFA_OFFSET 4
1467 SAVE_ALL 1379 SAVE_ALL
1468 FIXUP_ESPFIX_STACK # %eax == %esp 1380 FIXUP_ESPFIX_STACK # %eax == %esp
1469 xorl %edx,%edx # zero error code 1381 xorl %edx,%edx # zero error code
@@ -1477,8 +1389,7 @@ END(nmi)
1477 1389
1478ENTRY(int3) 1390ENTRY(int3)
1479 RING0_INT_FRAME 1391 RING0_INT_FRAME
1480 pushl $-1 # mark this as an int 1392 pushl_cfi $-1 # mark this as an int
1481 CFI_ADJUST_CFA_OFFSET 4
1482 SAVE_ALL 1393 SAVE_ALL
1483 TRACE_IRQS_OFF 1394 TRACE_IRQS_OFF
1484 xorl %edx,%edx # zero error code 1395 xorl %edx,%edx # zero error code
@@ -1490,8 +1401,7 @@ END(int3)
1490 1401
1491ENTRY(general_protection) 1402ENTRY(general_protection)
1492 RING0_EC_FRAME 1403 RING0_EC_FRAME
1493 pushl $do_general_protection 1404 pushl_cfi $do_general_protection
1494 CFI_ADJUST_CFA_OFFSET 4
1495 jmp error_code 1405 jmp error_code
1496 CFI_ENDPROC 1406 CFI_ENDPROC
1497END(general_protection) 1407END(general_protection)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 17be5ec7cbba..a7ae7fd1010f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -213,23 +213,17 @@ ENDPROC(native_usergs_sysret64)
213 .macro FAKE_STACK_FRAME child_rip 213 .macro FAKE_STACK_FRAME child_rip
214 /* push in order ss, rsp, eflags, cs, rip */ 214 /* push in order ss, rsp, eflags, cs, rip */
215 xorl %eax, %eax 215 xorl %eax, %eax
216 pushq $__KERNEL_DS /* ss */ 216 pushq_cfi $__KERNEL_DS /* ss */
217 CFI_ADJUST_CFA_OFFSET 8
218 /*CFI_REL_OFFSET ss,0*/ 217 /*CFI_REL_OFFSET ss,0*/
219 pushq %rax /* rsp */ 218 pushq_cfi %rax /* rsp */
220 CFI_ADJUST_CFA_OFFSET 8
221 CFI_REL_OFFSET rsp,0 219 CFI_REL_OFFSET rsp,0
222 pushq $X86_EFLAGS_IF /* eflags - interrupts on */ 220 pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */
223 CFI_ADJUST_CFA_OFFSET 8
224 /*CFI_REL_OFFSET rflags,0*/ 221 /*CFI_REL_OFFSET rflags,0*/
225 pushq $__KERNEL_CS /* cs */ 222 pushq_cfi $__KERNEL_CS /* cs */
226 CFI_ADJUST_CFA_OFFSET 8
227 /*CFI_REL_OFFSET cs,0*/ 223 /*CFI_REL_OFFSET cs,0*/
228 pushq \child_rip /* rip */ 224 pushq_cfi \child_rip /* rip */
229 CFI_ADJUST_CFA_OFFSET 8
230 CFI_REL_OFFSET rip,0 225 CFI_REL_OFFSET rip,0
231 pushq %rax /* orig rax */ 226 pushq_cfi %rax /* orig rax */
232 CFI_ADJUST_CFA_OFFSET 8
233 .endm 227 .endm
234 228
235 .macro UNFAKE_STACK_FRAME 229 .macro UNFAKE_STACK_FRAME
@@ -398,10 +392,8 @@ ENTRY(ret_from_fork)
398 392
399 LOCK ; btr $TIF_FORK,TI_flags(%r8) 393 LOCK ; btr $TIF_FORK,TI_flags(%r8)
400 394
401 push kernel_eflags(%rip) 395 pushq_cfi kernel_eflags(%rip)
402 CFI_ADJUST_CFA_OFFSET 8 396 popfq_cfi # reset kernel eflags
403 popf # reset kernel eflags
404 CFI_ADJUST_CFA_OFFSET -8
405 397
406 call schedule_tail # rdi: 'prev' task parameter 398 call schedule_tail # rdi: 'prev' task parameter
407 399
@@ -521,11 +513,9 @@ sysret_careful:
521 jnc sysret_signal 513 jnc sysret_signal
522 TRACE_IRQS_ON 514 TRACE_IRQS_ON
523 ENABLE_INTERRUPTS(CLBR_NONE) 515 ENABLE_INTERRUPTS(CLBR_NONE)
524 pushq %rdi 516 pushq_cfi %rdi
525 CFI_ADJUST_CFA_OFFSET 8
526 call schedule 517 call schedule
527 popq %rdi 518 popq_cfi %rdi
528 CFI_ADJUST_CFA_OFFSET -8
529 jmp sysret_check 519 jmp sysret_check
530 520
531 /* Handle a signal */ 521 /* Handle a signal */
@@ -634,11 +624,9 @@ int_careful:
634 jnc int_very_careful 624 jnc int_very_careful
635 TRACE_IRQS_ON 625 TRACE_IRQS_ON
636 ENABLE_INTERRUPTS(CLBR_NONE) 626 ENABLE_INTERRUPTS(CLBR_NONE)
637 pushq %rdi 627 pushq_cfi %rdi
638 CFI_ADJUST_CFA_OFFSET 8
639 call schedule 628 call schedule
640 popq %rdi 629 popq_cfi %rdi
641 CFI_ADJUST_CFA_OFFSET -8
642 DISABLE_INTERRUPTS(CLBR_NONE) 630 DISABLE_INTERRUPTS(CLBR_NONE)
643 TRACE_IRQS_OFF 631 TRACE_IRQS_OFF
644 jmp int_with_check 632 jmp int_with_check
@@ -652,12 +640,10 @@ int_check_syscall_exit_work:
652 /* Check for syscall exit trace */ 640 /* Check for syscall exit trace */
653 testl $_TIF_WORK_SYSCALL_EXIT,%edx 641 testl $_TIF_WORK_SYSCALL_EXIT,%edx
654 jz int_signal 642 jz int_signal
655 pushq %rdi 643 pushq_cfi %rdi
656 CFI_ADJUST_CFA_OFFSET 8
657 leaq 8(%rsp),%rdi # &ptregs -> arg1 644 leaq 8(%rsp),%rdi # &ptregs -> arg1
658 call syscall_trace_leave 645 call syscall_trace_leave
659 popq %rdi 646 popq_cfi %rdi
660 CFI_ADJUST_CFA_OFFSET -8
661 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi 647 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
662 jmp int_restore_rest 648 jmp int_restore_rest
663 649
@@ -714,9 +700,8 @@ END(ptregscall_common)
714 700
715ENTRY(stub_execve) 701ENTRY(stub_execve)
716 CFI_STARTPROC 702 CFI_STARTPROC
717 popq %r11 703 addq $8, %rsp
718 CFI_ADJUST_CFA_OFFSET -8 704 PARTIAL_FRAME 0
719 CFI_REGISTER rip, r11
720 SAVE_REST 705 SAVE_REST
721 FIXUP_TOP_OF_STACK %r11 706 FIXUP_TOP_OF_STACK %r11
722 movq %rsp, %rcx 707 movq %rsp, %rcx
@@ -735,7 +720,7 @@ END(stub_execve)
735ENTRY(stub_rt_sigreturn) 720ENTRY(stub_rt_sigreturn)
736 CFI_STARTPROC 721 CFI_STARTPROC
737 addq $8, %rsp 722 addq $8, %rsp
738 CFI_ADJUST_CFA_OFFSET -8 723 PARTIAL_FRAME 0
739 SAVE_REST 724 SAVE_REST
740 movq %rsp,%rdi 725 movq %rsp,%rdi
741 FIXUP_TOP_OF_STACK %r11 726 FIXUP_TOP_OF_STACK %r11
@@ -766,8 +751,7 @@ vector=FIRST_EXTERNAL_VECTOR
766 .if vector <> FIRST_EXTERNAL_VECTOR 751 .if vector <> FIRST_EXTERNAL_VECTOR
767 CFI_ADJUST_CFA_OFFSET -8 752 CFI_ADJUST_CFA_OFFSET -8
768 .endif 753 .endif
7691: pushq $(~vector+0x80) /* Note: always in signed byte range */ 7541: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */
770 CFI_ADJUST_CFA_OFFSET 8
771 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 755 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
772 jmp 2f 756 jmp 2f
773 .endif 757 .endif
@@ -796,8 +780,8 @@ END(interrupt)
796 780
797/* 0(%rsp): ~(interrupt number) */ 781/* 0(%rsp): ~(interrupt number) */
798 .macro interrupt func 782 .macro interrupt func
799 subq $10*8, %rsp 783 subq $ORIG_RAX-ARGOFFSET+8, %rsp
800 CFI_ADJUST_CFA_OFFSET 10*8 784 CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8
801 call save_args 785 call save_args
802 PARTIAL_FRAME 0 786 PARTIAL_FRAME 0
803 call \func 787 call \func
@@ -822,6 +806,7 @@ ret_from_intr:
822 TRACE_IRQS_OFF 806 TRACE_IRQS_OFF
823 decl PER_CPU_VAR(irq_count) 807 decl PER_CPU_VAR(irq_count)
824 leaveq 808 leaveq
809 CFI_RESTORE rbp
825 CFI_DEF_CFA_REGISTER rsp 810 CFI_DEF_CFA_REGISTER rsp
826 CFI_ADJUST_CFA_OFFSET -8 811 CFI_ADJUST_CFA_OFFSET -8
827exit_intr: 812exit_intr:
@@ -903,11 +888,9 @@ retint_careful:
903 jnc retint_signal 888 jnc retint_signal
904 TRACE_IRQS_ON 889 TRACE_IRQS_ON
905 ENABLE_INTERRUPTS(CLBR_NONE) 890 ENABLE_INTERRUPTS(CLBR_NONE)
906 pushq %rdi 891 pushq_cfi %rdi
907 CFI_ADJUST_CFA_OFFSET 8
908 call schedule 892 call schedule
909 popq %rdi 893 popq_cfi %rdi
910 CFI_ADJUST_CFA_OFFSET -8
911 GET_THREAD_INFO(%rcx) 894 GET_THREAD_INFO(%rcx)
912 DISABLE_INTERRUPTS(CLBR_NONE) 895 DISABLE_INTERRUPTS(CLBR_NONE)
913 TRACE_IRQS_OFF 896 TRACE_IRQS_OFF
@@ -956,8 +939,7 @@ END(common_interrupt)
956.macro apicinterrupt num sym do_sym 939.macro apicinterrupt num sym do_sym
957ENTRY(\sym) 940ENTRY(\sym)
958 INTR_FRAME 941 INTR_FRAME
959 pushq $~(\num) 942 pushq_cfi $~(\num)
960 CFI_ADJUST_CFA_OFFSET 8
961 interrupt \do_sym 943 interrupt \do_sym
962 jmp ret_from_intr 944 jmp ret_from_intr
963 CFI_ENDPROC 945 CFI_ENDPROC
@@ -1023,9 +1005,9 @@ apicinterrupt ERROR_APIC_VECTOR \
1023apicinterrupt SPURIOUS_APIC_VECTOR \ 1005apicinterrupt SPURIOUS_APIC_VECTOR \
1024 spurious_interrupt smp_spurious_interrupt 1006 spurious_interrupt smp_spurious_interrupt
1025 1007
1026#ifdef CONFIG_PERF_EVENTS 1008#ifdef CONFIG_IRQ_WORK
1027apicinterrupt LOCAL_PENDING_VECTOR \ 1009apicinterrupt IRQ_WORK_VECTOR \
1028 perf_pending_interrupt smp_perf_pending_interrupt 1010 irq_work_interrupt smp_irq_work_interrupt
1029#endif 1011#endif
1030 1012
1031/* 1013/*
@@ -1036,8 +1018,8 @@ ENTRY(\sym)
1036 INTR_FRAME 1018 INTR_FRAME
1037 PARAVIRT_ADJUST_EXCEPTION_FRAME 1019 PARAVIRT_ADJUST_EXCEPTION_FRAME
1038 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1020 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1039 subq $15*8,%rsp 1021 subq $ORIG_RAX-R15, %rsp
1040 CFI_ADJUST_CFA_OFFSET 15*8 1022 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1041 call error_entry 1023 call error_entry
1042 DEFAULT_FRAME 0 1024 DEFAULT_FRAME 0
1043 movq %rsp,%rdi /* pt_regs pointer */ 1025 movq %rsp,%rdi /* pt_regs pointer */
@@ -1052,9 +1034,9 @@ END(\sym)
1052ENTRY(\sym) 1034ENTRY(\sym)
1053 INTR_FRAME 1035 INTR_FRAME
1054 PARAVIRT_ADJUST_EXCEPTION_FRAME 1036 PARAVIRT_ADJUST_EXCEPTION_FRAME
1055 pushq $-1 /* ORIG_RAX: no syscall to restart */ 1037 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1056 CFI_ADJUST_CFA_OFFSET 8 1038 subq $ORIG_RAX-R15, %rsp
1057 subq $15*8, %rsp 1039 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1058 call save_paranoid 1040 call save_paranoid
1059 TRACE_IRQS_OFF 1041 TRACE_IRQS_OFF
1060 movq %rsp,%rdi /* pt_regs pointer */ 1042 movq %rsp,%rdi /* pt_regs pointer */
@@ -1070,9 +1052,9 @@ END(\sym)
1070ENTRY(\sym) 1052ENTRY(\sym)
1071 INTR_FRAME 1053 INTR_FRAME
1072 PARAVIRT_ADJUST_EXCEPTION_FRAME 1054 PARAVIRT_ADJUST_EXCEPTION_FRAME
1073 pushq $-1 /* ORIG_RAX: no syscall to restart */ 1055 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1074 CFI_ADJUST_CFA_OFFSET 8 1056 subq $ORIG_RAX-R15, %rsp
1075 subq $15*8, %rsp 1057 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1076 call save_paranoid 1058 call save_paranoid
1077 TRACE_IRQS_OFF 1059 TRACE_IRQS_OFF
1078 movq %rsp,%rdi /* pt_regs pointer */ 1060 movq %rsp,%rdi /* pt_regs pointer */
@@ -1089,8 +1071,8 @@ END(\sym)
1089ENTRY(\sym) 1071ENTRY(\sym)
1090 XCPT_FRAME 1072 XCPT_FRAME
1091 PARAVIRT_ADJUST_EXCEPTION_FRAME 1073 PARAVIRT_ADJUST_EXCEPTION_FRAME
1092 subq $15*8,%rsp 1074 subq $ORIG_RAX-R15, %rsp
1093 CFI_ADJUST_CFA_OFFSET 15*8 1075 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1094 call error_entry 1076 call error_entry
1095 DEFAULT_FRAME 0 1077 DEFAULT_FRAME 0
1096 movq %rsp,%rdi /* pt_regs pointer */ 1078 movq %rsp,%rdi /* pt_regs pointer */
@@ -1107,8 +1089,8 @@ END(\sym)
1107ENTRY(\sym) 1089ENTRY(\sym)
1108 XCPT_FRAME 1090 XCPT_FRAME
1109 PARAVIRT_ADJUST_EXCEPTION_FRAME 1091 PARAVIRT_ADJUST_EXCEPTION_FRAME
1110 subq $15*8,%rsp 1092 subq $ORIG_RAX-R15, %rsp
1111 CFI_ADJUST_CFA_OFFSET 15*8 1093 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1112 call save_paranoid 1094 call save_paranoid
1113 DEFAULT_FRAME 0 1095 DEFAULT_FRAME 0
1114 TRACE_IRQS_OFF 1096 TRACE_IRQS_OFF
@@ -1139,16 +1121,14 @@ zeroentry simd_coprocessor_error do_simd_coprocessor_error
1139 /* edi: new selector */ 1121 /* edi: new selector */
1140ENTRY(native_load_gs_index) 1122ENTRY(native_load_gs_index)
1141 CFI_STARTPROC 1123 CFI_STARTPROC
1142 pushf 1124 pushfq_cfi
1143 CFI_ADJUST_CFA_OFFSET 8
1144 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) 1125 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
1145 SWAPGS 1126 SWAPGS
1146gs_change: 1127gs_change:
1147 movl %edi,%gs 1128 movl %edi,%gs
11482: mfence /* workaround */ 11292: mfence /* workaround */
1149 SWAPGS 1130 SWAPGS
1150 popf 1131 popfq_cfi
1151 CFI_ADJUST_CFA_OFFSET -8
1152 ret 1132 ret
1153 CFI_ENDPROC 1133 CFI_ENDPROC
1154END(native_load_gs_index) 1134END(native_load_gs_index)
@@ -1215,8 +1195,7 @@ END(kernel_execve)
1215/* Call softirq on interrupt stack. Interrupts are off. */ 1195/* Call softirq on interrupt stack. Interrupts are off. */
1216ENTRY(call_softirq) 1196ENTRY(call_softirq)
1217 CFI_STARTPROC 1197 CFI_STARTPROC
1218 push %rbp 1198 pushq_cfi %rbp
1219 CFI_ADJUST_CFA_OFFSET 8
1220 CFI_REL_OFFSET rbp,0 1199 CFI_REL_OFFSET rbp,0
1221 mov %rsp,%rbp 1200 mov %rsp,%rbp
1222 CFI_DEF_CFA_REGISTER rbp 1201 CFI_DEF_CFA_REGISTER rbp
@@ -1225,6 +1204,7 @@ ENTRY(call_softirq)
1225 push %rbp # backlink for old unwinder 1204 push %rbp # backlink for old unwinder
1226 call __do_softirq 1205 call __do_softirq
1227 leaveq 1206 leaveq
1207 CFI_RESTORE rbp
1228 CFI_DEF_CFA_REGISTER rsp 1208 CFI_DEF_CFA_REGISTER rsp
1229 CFI_ADJUST_CFA_OFFSET -8 1209 CFI_ADJUST_CFA_OFFSET -8
1230 decl PER_CPU_VAR(irq_count) 1210 decl PER_CPU_VAR(irq_count)
@@ -1368,7 +1348,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip)
1368 1348
1369 /* ebx: no swapgs flag */ 1349 /* ebx: no swapgs flag */
1370ENTRY(paranoid_exit) 1350ENTRY(paranoid_exit)
1371 INTR_FRAME 1351 DEFAULT_FRAME
1372 DISABLE_INTERRUPTS(CLBR_NONE) 1352 DISABLE_INTERRUPTS(CLBR_NONE)
1373 TRACE_IRQS_OFF 1353 TRACE_IRQS_OFF
1374 testl %ebx,%ebx /* swapgs needed? */ 1354 testl %ebx,%ebx /* swapgs needed? */
@@ -1445,7 +1425,6 @@ error_swapgs:
1445error_sti: 1425error_sti:
1446 TRACE_IRQS_OFF 1426 TRACE_IRQS_OFF
1447 ret 1427 ret
1448 CFI_ENDPROC
1449 1428
1450/* 1429/*
1451 * There are two places in the kernel that can potentially fault with 1430 * There are two places in the kernel that can potentially fault with
@@ -1470,6 +1449,7 @@ bstep_iret:
1470 /* Fix truncated RIP */ 1449 /* Fix truncated RIP */
1471 movq %rcx,RIP+8(%rsp) 1450 movq %rcx,RIP+8(%rsp)
1472 jmp error_swapgs 1451 jmp error_swapgs
1452 CFI_ENDPROC
1473END(error_entry) 1453END(error_entry)
1474 1454
1475 1455
@@ -1498,8 +1478,8 @@ ENTRY(nmi)
1498 INTR_FRAME 1478 INTR_FRAME
1499 PARAVIRT_ADJUST_EXCEPTION_FRAME 1479 PARAVIRT_ADJUST_EXCEPTION_FRAME
1500 pushq_cfi $-1 1480 pushq_cfi $-1
1501 subq $15*8, %rsp 1481 subq $ORIG_RAX-R15, %rsp
1502 CFI_ADJUST_CFA_OFFSET 15*8 1482 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1503 call save_paranoid 1483 call save_paranoid
1504 DEFAULT_FRAME 0 1484 DEFAULT_FRAME 0
1505 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ 1485 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index cd37469b54ee..3afb33f14d2d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
257 return mod_code_status; 257 return mod_code_status;
258} 258}
259 259
260
261
262
263static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
264
265static unsigned char *ftrace_nop_replace(void) 260static unsigned char *ftrace_nop_replace(void)
266{ 261{
267 return ftrace_nop; 262 return ideal_nop5;
268} 263}
269 264
270static int 265static int
@@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
338 333
339int __init ftrace_dyn_arch_init(void *data) 334int __init ftrace_dyn_arch_init(void *data)
340{ 335{
341 extern const unsigned char ftrace_test_p6nop[];
342 extern const unsigned char ftrace_test_nop5[];
343 extern const unsigned char ftrace_test_jmp[];
344 int faulted = 0;
345
346 /*
347 * There is no good nop for all x86 archs.
348 * We will default to using the P6_NOP5, but first we
349 * will test to make sure that the nop will actually
350 * work on this CPU. If it faults, we will then
351 * go to a lesser efficient 5 byte nop. If that fails
352 * we then just use a jmp as our nop. This isn't the most
353 * efficient nop, but we can not use a multi part nop
354 * since we would then risk being preempted in the middle
355 * of that nop, and if we enabled tracing then, it might
356 * cause a system crash.
357 *
358 * TODO: check the cpuid to determine the best nop.
359 */
360 asm volatile (
361 "ftrace_test_jmp:"
362 "jmp ftrace_test_p6nop\n"
363 "nop\n"
364 "nop\n"
365 "nop\n" /* 2 byte jmp + 3 bytes */
366 "ftrace_test_p6nop:"
367 P6_NOP5
368 "jmp 1f\n"
369 "ftrace_test_nop5:"
370 ".byte 0x66,0x66,0x66,0x66,0x90\n"
371 "1:"
372 ".section .fixup, \"ax\"\n"
373 "2: movl $1, %0\n"
374 " jmp ftrace_test_nop5\n"
375 "3: movl $2, %0\n"
376 " jmp 1b\n"
377 ".previous\n"
378 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
379 _ASM_EXTABLE(ftrace_test_nop5, 3b)
380 : "=r"(faulted) : "0" (faulted));
381
382 switch (faulted) {
383 case 0:
384 pr_info("converting mcount calls to 0f 1f 44 00 00\n");
385 memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
386 break;
387 case 1:
388 pr_info("converting mcount calls to 66 66 66 66 90\n");
389 memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
390 break;
391 case 2:
392 pr_info("converting mcount calls to jmp . + 5\n");
393 memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
394 break;
395 }
396
397 /* The return code is retured via data */ 336 /* The return code is retured via data */
398 *(unsigned long *)data = 0; 337 *(unsigned long *)data = 0;
399 338
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index 3e66bd364a9d..af0699ba48cf 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -1,5 +1,6 @@
1#include <linux/kernel.h> 1#include <linux/kernel.h>
2#include <linux/init.h> 2#include <linux/init.h>
3#include <linux/memblock.h>
3 4
4#include <asm/setup.h> 5#include <asm/setup.h>
5#include <asm/bios_ebda.h> 6#include <asm/bios_ebda.h>
@@ -51,5 +52,5 @@ void __init reserve_ebda_region(void)
51 lowmem = 0x9f000; 52 lowmem = 0x9f000;
52 53
53 /* reserve all memory between lowmem and the 1MB mark */ 54 /* reserve all memory between lowmem and the 1MB mark */
54 reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved"); 55 memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved");
55} 56}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 784360c0625c..9a6ca2392170 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -8,6 +8,7 @@
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/start_kernel.h> 9#include <linux/start_kernel.h>
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/memblock.h>
11 12
12#include <asm/setup.h> 13#include <asm/setup.h>
13#include <asm/sections.h> 14#include <asm/sections.h>
@@ -30,17 +31,18 @@ static void __init i386_default_early_setup(void)
30 31
31void __init i386_start_kernel(void) 32void __init i386_start_kernel(void)
32{ 33{
34 memblock_init();
35
33#ifdef CONFIG_X86_TRAMPOLINE 36#ifdef CONFIG_X86_TRAMPOLINE
34 /* 37 /*
35 * But first pinch a few for the stack/trampoline stuff 38 * But first pinch a few for the stack/trampoline stuff
36 * FIXME: Don't need the extra page at 4K, but need to fix 39 * FIXME: Don't need the extra page at 4K, but need to fix
37 * trampoline before removing it. (see the GDT stuff) 40 * trampoline before removing it. (see the GDT stuff)
38 */ 41 */
39 reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, 42 memblock_x86_reserve_range(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE");
40 "EX TRAMPOLINE");
41#endif 43#endif
42 44
43 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); 45 memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
44 46
45#ifdef CONFIG_BLK_DEV_INITRD 47#ifdef CONFIG_BLK_DEV_INITRD
46 /* Reserve INITRD */ 48 /* Reserve INITRD */
@@ -49,7 +51,7 @@ void __init i386_start_kernel(void)
49 u64 ramdisk_image = boot_params.hdr.ramdisk_image; 51 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
50 u64 ramdisk_size = boot_params.hdr.ramdisk_size; 52 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
51 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); 53 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
52 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); 54 memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK");
53 } 55 }
54#endif 56#endif
55 57
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 7147143fd614..2d2673c28aff 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -12,6 +12,7 @@
12#include <linux/percpu.h> 12#include <linux/percpu.h>
13#include <linux/start_kernel.h> 13#include <linux/start_kernel.h>
14#include <linux/io.h> 14#include <linux/io.h>
15#include <linux/memblock.h>
15 16
16#include <asm/processor.h> 17#include <asm/processor.h>
17#include <asm/proto.h> 18#include <asm/proto.h>
@@ -79,6 +80,8 @@ void __init x86_64_start_kernel(char * real_mode_data)
79 /* Cleanup the over mapped high alias */ 80 /* Cleanup the over mapped high alias */
80 cleanup_highmap(); 81 cleanup_highmap();
81 82
83 max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
84
82 for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { 85 for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
83#ifdef CONFIG_EARLY_PRINTK 86#ifdef CONFIG_EARLY_PRINTK
84 set_intr_gate(i, &early_idt_handlers[i]); 87 set_intr_gate(i, &early_idt_handlers[i]);
@@ -98,7 +101,9 @@ void __init x86_64_start_reservations(char *real_mode_data)
98{ 101{
99 copy_bootdata(__va(real_mode_data)); 102 copy_bootdata(__va(real_mode_data));
100 103
101 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); 104 memblock_init();
105
106 memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
102 107
103#ifdef CONFIG_BLK_DEV_INITRD 108#ifdef CONFIG_BLK_DEV_INITRD
104 /* Reserve INITRD */ 109 /* Reserve INITRD */
@@ -107,7 +112,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
107 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; 112 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
108 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; 113 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
109 unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); 114 unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
110 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); 115 memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK");
111 } 116 }
112#endif 117#endif
113 118
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 7494999141b3..aff0b3c27509 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -380,44 +380,35 @@ static int hpet_next_event(unsigned long delta,
380 struct clock_event_device *evt, int timer) 380 struct clock_event_device *evt, int timer)
381{ 381{
382 u32 cnt; 382 u32 cnt;
383 s32 res;
383 384
384 cnt = hpet_readl(HPET_COUNTER); 385 cnt = hpet_readl(HPET_COUNTER);
385 cnt += (u32) delta; 386 cnt += (u32) delta;
386 hpet_writel(cnt, HPET_Tn_CMP(timer)); 387 hpet_writel(cnt, HPET_Tn_CMP(timer));
387 388
388 /* 389 /*
389 * We need to read back the CMP register on certain HPET 390 * HPETs are a complete disaster. The compare register is
390 * implementations (ATI chipsets) which seem to delay the 391 * based on a equal comparison and neither provides a less
391 * transfer of the compare register into the internal compare 392 * than or equal functionality (which would require to take
392 * logic. With small deltas this might actually be too late as 393 * the wraparound into account) nor a simple count down event
393 * the counter could already be higher than the compare value 394 * mode. Further the write to the comparator register is
394 * at that point and we would wait for the next hpet interrupt 395 * delayed internally up to two HPET clock cycles in certain
395 * forever. We found out that reading the CMP register back 396 * chipsets (ATI, ICH9,10). We worked around that by reading
396 * forces the transfer so we can rely on the comparison with 397 * back the compare register, but that required another
397 * the counter register below. If the read back from the 398 * workaround for ICH9,10 chips where the first readout after
398 * compare register does not match the value we programmed 399 * write can return the old stale value. We already have a
399 * then we might have a real hardware problem. We can not do 400 * minimum delta of 5us enforced, but a NMI or SMI hitting
400 * much about it here, but at least alert the user/admin with 401 * between the counter readout and the comparator write can
401 * a prominent warning. 402 * move us behind that point easily. Now instead of reading
402 * 403 * the compare register back several times, we make the ETIME
403 * An erratum on some chipsets (ICH9,..), results in 404 * decision based on the following: Return ETIME if the
404 * comparator read immediately following a write returning old 405 * counter value after the write is less than 8 HPET cycles
405 * value. Workaround for this is to read this value second 406 * away from the event or if the counter is already ahead of
406 * time, when first read returns old value. 407 * the event.
407 *
408 * In fact the write to the comparator register is delayed up
409 * to two HPET cycles so the workaround we tried to restrict
410 * the readback to those known to be borked ATI chipsets
411 * failed miserably. So we give up on optimizations forever
412 * and penalize all HPET incarnations unconditionally.
413 */ 408 */
414 if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) { 409 res = (s32)(cnt - hpet_readl(HPET_COUNTER));
415 if (hpet_readl(HPET_Tn_CMP(timer)) != cnt)
416 printk_once(KERN_WARNING
417 "hpet: compare register read back failed.\n");
418 }
419 410
420 return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; 411 return res < 8 ? -ETIME : 0;
421} 412}
422 413
423static void hpet_legacy_set_mode(enum clock_event_mode mode, 414static void hpet_legacy_set_mode(enum clock_event_mode mode,
@@ -440,9 +431,9 @@ static int hpet_legacy_next_event(unsigned long delta,
440static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); 431static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
441static struct hpet_dev *hpet_devs; 432static struct hpet_dev *hpet_devs;
442 433
443void hpet_msi_unmask(unsigned int irq) 434void hpet_msi_unmask(struct irq_data *data)
444{ 435{
445 struct hpet_dev *hdev = get_irq_data(irq); 436 struct hpet_dev *hdev = data->handler_data;
446 unsigned int cfg; 437 unsigned int cfg;
447 438
448 /* unmask it */ 439 /* unmask it */
@@ -451,10 +442,10 @@ void hpet_msi_unmask(unsigned int irq)
451 hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); 442 hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
452} 443}
453 444
454void hpet_msi_mask(unsigned int irq) 445void hpet_msi_mask(struct irq_data *data)
455{ 446{
447 struct hpet_dev *hdev = data->handler_data;
456 unsigned int cfg; 448 unsigned int cfg;
457 struct hpet_dev *hdev = get_irq_data(irq);
458 449
459 /* mask it */ 450 /* mask it */
460 cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); 451 cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
@@ -462,18 +453,14 @@ void hpet_msi_mask(unsigned int irq)
462 hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); 453 hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
463} 454}
464 455
465void hpet_msi_write(unsigned int irq, struct msi_msg *msg) 456void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg)
466{ 457{
467 struct hpet_dev *hdev = get_irq_data(irq);
468
469 hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num)); 458 hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
470 hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4); 459 hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
471} 460}
472 461
473void hpet_msi_read(unsigned int irq, struct msi_msg *msg) 462void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg)
474{ 463{
475 struct hpet_dev *hdev = get_irq_data(irq);
476
477 msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num)); 464 msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
478 msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4); 465 msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
479 msg->address_hi = 0; 466 msg->address_hi = 0;
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index a46cb3522c0c..58bb239a2fd7 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -68,19 +68,22 @@ static void __cpuinit init_thread_xstate(void)
68 */ 68 */
69 69
70 if (!HAVE_HWFP) { 70 if (!HAVE_HWFP) {
71 /*
72 * Disable xsave as we do not support it if i387
73 * emulation is enabled.
74 */
75 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
76 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
71 xstate_size = sizeof(struct i387_soft_struct); 77 xstate_size = sizeof(struct i387_soft_struct);
72 return; 78 return;
73 } 79 }
74 80
75 if (cpu_has_fxsr) 81 if (cpu_has_fxsr)
76 xstate_size = sizeof(struct i387_fxsave_struct); 82 xstate_size = sizeof(struct i387_fxsave_struct);
77#ifdef CONFIG_X86_32
78 else 83 else
79 xstate_size = sizeof(struct i387_fsave_struct); 84 xstate_size = sizeof(struct i387_fsave_struct);
80#endif
81} 85}
82 86
83#ifdef CONFIG_X86_64
84/* 87/*
85 * Called at bootup to set up the initial FPU state that is later cloned 88 * Called at bootup to set up the initial FPU state that is later cloned
86 * into all processes. 89 * into all processes.
@@ -88,12 +91,21 @@ static void __cpuinit init_thread_xstate(void)
88 91
89void __cpuinit fpu_init(void) 92void __cpuinit fpu_init(void)
90{ 93{
91 unsigned long oldcr0 = read_cr0(); 94 unsigned long cr0;
92 95 unsigned long cr4_mask = 0;
93 set_in_cr4(X86_CR4_OSFXSR);
94 set_in_cr4(X86_CR4_OSXMMEXCPT);
95 96
96 write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ 97 if (cpu_has_fxsr)
98 cr4_mask |= X86_CR4_OSFXSR;
99 if (cpu_has_xmm)
100 cr4_mask |= X86_CR4_OSXMMEXCPT;
101 if (cr4_mask)
102 set_in_cr4(cr4_mask);
103
104 cr0 = read_cr0();
105 cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
106 if (!HAVE_HWFP)
107 cr0 |= X86_CR0_EM;
108 write_cr0(cr0);
97 109
98 if (!smp_processor_id()) 110 if (!smp_processor_id())
99 init_thread_xstate(); 111 init_thread_xstate();
@@ -104,24 +116,12 @@ void __cpuinit fpu_init(void)
104 clear_used_math(); 116 clear_used_math();
105} 117}
106 118
107#else /* CONFIG_X86_64 */
108
109void __cpuinit fpu_init(void)
110{
111 if (!smp_processor_id())
112 init_thread_xstate();
113}
114
115#endif /* CONFIG_X86_32 */
116
117void fpu_finit(struct fpu *fpu) 119void fpu_finit(struct fpu *fpu)
118{ 120{
119#ifdef CONFIG_X86_32
120 if (!HAVE_HWFP) { 121 if (!HAVE_HWFP) {
121 finit_soft_fpu(&fpu->state->soft); 122 finit_soft_fpu(&fpu->state->soft);
122 return; 123 return;
123 } 124 }
124#endif
125 125
126 if (cpu_has_fxsr) { 126 if (cpu_has_fxsr) {
127 struct i387_fxsave_struct *fx = &fpu->state->fxsave; 127 struct i387_fxsave_struct *fx = &fpu->state->fxsave;
@@ -386,19 +386,17 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
386#ifdef CONFIG_X86_64 386#ifdef CONFIG_X86_64
387 env->fip = fxsave->rip; 387 env->fip = fxsave->rip;
388 env->foo = fxsave->rdp; 388 env->foo = fxsave->rdp;
389 /*
390 * should be actually ds/cs at fpu exception time, but
391 * that information is not available in 64bit mode.
392 */
393 env->fcs = task_pt_regs(tsk)->cs;
389 if (tsk == current) { 394 if (tsk == current) {
390 /* 395 savesegment(ds, env->fos);
391 * should be actually ds/cs at fpu exception time, but
392 * that information is not available in 64bit mode.
393 */
394 asm("mov %%ds, %[fos]" : [fos] "=r" (env->fos));
395 asm("mov %%cs, %[fcs]" : [fcs] "=r" (env->fcs));
396 } else { 396 } else {
397 struct pt_regs *regs = task_pt_regs(tsk); 397 env->fos = tsk->thread.ds;
398
399 env->fos = 0xffff0000 | tsk->thread.ds;
400 env->fcs = regs->cs;
401 } 398 }
399 env->fos |= 0xffff0000;
402#else 400#else
403 env->fip = fxsave->fip; 401 env->fip = fxsave->fip;
404 env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); 402 env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index cafa7c80ac95..20757cb2efa3 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -29,24 +29,10 @@
29 * plus some generic x86 specific things if generic specifics makes 29 * plus some generic x86 specific things if generic specifics makes
30 * any sense at all. 30 * any sense at all.
31 */ 31 */
32static void init_8259A(int auto_eoi);
32 33
33static int i8259A_auto_eoi; 34static int i8259A_auto_eoi;
34DEFINE_RAW_SPINLOCK(i8259A_lock); 35DEFINE_RAW_SPINLOCK(i8259A_lock);
35static void mask_and_ack_8259A(unsigned int);
36static void mask_8259A(void);
37static void unmask_8259A(void);
38static void disable_8259A_irq(unsigned int irq);
39static void enable_8259A_irq(unsigned int irq);
40static void init_8259A(int auto_eoi);
41static int i8259A_irq_pending(unsigned int irq);
42
43struct irq_chip i8259A_chip = {
44 .name = "XT-PIC",
45 .mask = disable_8259A_irq,
46 .disable = disable_8259A_irq,
47 .unmask = enable_8259A_irq,
48 .mask_ack = mask_and_ack_8259A,
49};
50 36
51/* 37/*
52 * 8259A PIC functions to handle ISA devices: 38 * 8259A PIC functions to handle ISA devices:
@@ -68,7 +54,7 @@ unsigned int cached_irq_mask = 0xffff;
68 */ 54 */
69unsigned long io_apic_irqs; 55unsigned long io_apic_irqs;
70 56
71static void disable_8259A_irq(unsigned int irq) 57static void mask_8259A_irq(unsigned int irq)
72{ 58{
73 unsigned int mask = 1 << irq; 59 unsigned int mask = 1 << irq;
74 unsigned long flags; 60 unsigned long flags;
@@ -82,7 +68,12 @@ static void disable_8259A_irq(unsigned int irq)
82 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 68 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
83} 69}
84 70
85static void enable_8259A_irq(unsigned int irq) 71static void disable_8259A_irq(struct irq_data *data)
72{
73 mask_8259A_irq(data->irq);
74}
75
76static void unmask_8259A_irq(unsigned int irq)
86{ 77{
87 unsigned int mask = ~(1 << irq); 78 unsigned int mask = ~(1 << irq);
88 unsigned long flags; 79 unsigned long flags;
@@ -96,6 +87,11 @@ static void enable_8259A_irq(unsigned int irq)
96 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 87 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
97} 88}
98 89
90static void enable_8259A_irq(struct irq_data *data)
91{
92 unmask_8259A_irq(data->irq);
93}
94
99static int i8259A_irq_pending(unsigned int irq) 95static int i8259A_irq_pending(unsigned int irq)
100{ 96{
101 unsigned int mask = 1<<irq; 97 unsigned int mask = 1<<irq;
@@ -117,7 +113,7 @@ static void make_8259A_irq(unsigned int irq)
117 disable_irq_nosync(irq); 113 disable_irq_nosync(irq);
118 io_apic_irqs &= ~(1<<irq); 114 io_apic_irqs &= ~(1<<irq);
119 set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, 115 set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
120 "XT"); 116 i8259A_chip.name);
121 enable_irq(irq); 117 enable_irq(irq);
122} 118}
123 119
@@ -150,8 +146,9 @@ static inline int i8259A_irq_real(unsigned int irq)
150 * first, _then_ send the EOI, and the order of EOI 146 * first, _then_ send the EOI, and the order of EOI
151 * to the two 8259s is important! 147 * to the two 8259s is important!
152 */ 148 */
153static void mask_and_ack_8259A(unsigned int irq) 149static void mask_and_ack_8259A(struct irq_data *data)
154{ 150{
151 unsigned int irq = data->irq;
155 unsigned int irqmask = 1 << irq; 152 unsigned int irqmask = 1 << irq;
156 unsigned long flags; 153 unsigned long flags;
157 154
@@ -223,6 +220,14 @@ spurious_8259A_irq:
223 } 220 }
224} 221}
225 222
223struct irq_chip i8259A_chip = {
224 .name = "XT-PIC",
225 .irq_mask = disable_8259A_irq,
226 .irq_disable = disable_8259A_irq,
227 .irq_unmask = enable_8259A_irq,
228 .irq_mask_ack = mask_and_ack_8259A,
229};
230
226static char irq_trigger[2]; 231static char irq_trigger[2];
227/** 232/**
228 * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ 233 * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
@@ -342,9 +347,9 @@ static void init_8259A(int auto_eoi)
342 * In AEOI mode we just have to mask the interrupt 347 * In AEOI mode we just have to mask the interrupt
343 * when acking. 348 * when acking.
344 */ 349 */
345 i8259A_chip.mask_ack = disable_8259A_irq; 350 i8259A_chip.irq_mask_ack = disable_8259A_irq;
346 else 351 else
347 i8259A_chip.mask_ack = mask_and_ack_8259A; 352 i8259A_chip.irq_mask_ack = mask_and_ack_8259A;
348 353
349 udelay(100); /* wait for 8259A to initialize */ 354 udelay(100); /* wait for 8259A to initialize */
350 355
@@ -363,14 +368,6 @@ static void init_8259A(int auto_eoi)
363static void legacy_pic_noop(void) { }; 368static void legacy_pic_noop(void) { };
364static void legacy_pic_uint_noop(unsigned int unused) { }; 369static void legacy_pic_uint_noop(unsigned int unused) { };
365static void legacy_pic_int_noop(int unused) { }; 370static void legacy_pic_int_noop(int unused) { };
366
367static struct irq_chip dummy_pic_chip = {
368 .name = "dummy pic",
369 .mask = legacy_pic_uint_noop,
370 .unmask = legacy_pic_uint_noop,
371 .disable = legacy_pic_uint_noop,
372 .mask_ack = legacy_pic_uint_noop,
373};
374static int legacy_pic_irq_pending_noop(unsigned int irq) 371static int legacy_pic_irq_pending_noop(unsigned int irq)
375{ 372{
376 return 0; 373 return 0;
@@ -378,7 +375,9 @@ static int legacy_pic_irq_pending_noop(unsigned int irq)
378 375
379struct legacy_pic null_legacy_pic = { 376struct legacy_pic null_legacy_pic = {
380 .nr_legacy_irqs = 0, 377 .nr_legacy_irqs = 0,
381 .chip = &dummy_pic_chip, 378 .chip = &dummy_irq_chip,
379 .mask = legacy_pic_uint_noop,
380 .unmask = legacy_pic_uint_noop,
382 .mask_all = legacy_pic_noop, 381 .mask_all = legacy_pic_noop,
383 .restore_mask = legacy_pic_noop, 382 .restore_mask = legacy_pic_noop,
384 .init = legacy_pic_int_noop, 383 .init = legacy_pic_int_noop,
@@ -389,7 +388,9 @@ struct legacy_pic null_legacy_pic = {
389struct legacy_pic default_legacy_pic = { 388struct legacy_pic default_legacy_pic = {
390 .nr_legacy_irqs = NR_IRQS_LEGACY, 389 .nr_legacy_irqs = NR_IRQS_LEGACY,
391 .chip = &i8259A_chip, 390 .chip = &i8259A_chip,
392 .mask_all = mask_8259A, 391 .mask = mask_8259A_irq,
392 .unmask = unmask_8259A_irq,
393 .mask_all = mask_8259A,
393 .restore_mask = unmask_8259A, 394 .restore_mask = unmask_8259A,
394 .init = init_8259A, 395 .init = init_8259A,
395 .irq_pending = i8259A_irq_pending, 396 .irq_pending = i8259A_irq_pending,
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 91fd0c70a18a..83ec0175f986 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
67 for_each_online_cpu(j) 67 for_each_online_cpu(j)
68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); 68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
69 seq_printf(p, " Performance monitoring interrupts\n"); 69 seq_printf(p, " Performance monitoring interrupts\n");
70 seq_printf(p, "%*s: ", prec, "PND"); 70 seq_printf(p, "%*s: ", prec, "IWI");
71 for_each_online_cpu(j) 71 for_each_online_cpu(j)
72 seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); 72 seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
73 seq_printf(p, " Performance pending work\n"); 73 seq_printf(p, " IRQ work interrupts\n");
74#endif 74#endif
75 if (x86_platform_ipi_callback) { 75 if (x86_platform_ipi_callback) {
76 seq_printf(p, "%*s: ", prec, "PLT"); 76 seq_printf(p, "%*s: ", prec, "PLT");
@@ -159,7 +159,7 @@ int show_interrupts(struct seq_file *p, void *v)
159 seq_printf(p, "%*d: ", prec, i); 159 seq_printf(p, "%*d: ", prec, i);
160 for_each_online_cpu(j) 160 for_each_online_cpu(j)
161 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); 161 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
162 seq_printf(p, " %8s", desc->chip->name); 162 seq_printf(p, " %8s", desc->irq_data.chip->name);
163 seq_printf(p, "-%-8s", desc->name); 163 seq_printf(p, "-%-8s", desc->name);
164 164
165 if (action) { 165 if (action) {
@@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
185 sum += irq_stats(cpu)->apic_timer_irqs; 185 sum += irq_stats(cpu)->apic_timer_irqs;
186 sum += irq_stats(cpu)->irq_spurious_count; 186 sum += irq_stats(cpu)->irq_spurious_count;
187 sum += irq_stats(cpu)->apic_perf_irqs; 187 sum += irq_stats(cpu)->apic_perf_irqs;
188 sum += irq_stats(cpu)->apic_pending_irqs; 188 sum += irq_stats(cpu)->apic_irq_work_irqs;
189#endif 189#endif
190 if (x86_platform_ipi_callback) 190 if (x86_platform_ipi_callback)
191 sum += irq_stats(cpu)->x86_platform_ipis; 191 sum += irq_stats(cpu)->x86_platform_ipis;
@@ -282,6 +282,7 @@ void fixup_irqs(void)
282 unsigned int irq, vector; 282 unsigned int irq, vector;
283 static int warned; 283 static int warned;
284 struct irq_desc *desc; 284 struct irq_desc *desc;
285 struct irq_data *data;
285 286
286 for_each_irq_desc(irq, desc) { 287 for_each_irq_desc(irq, desc) {
287 int break_affinity = 0; 288 int break_affinity = 0;
@@ -296,7 +297,8 @@ void fixup_irqs(void)
296 /* interrupt's are disabled at this point */ 297 /* interrupt's are disabled at this point */
297 raw_spin_lock(&desc->lock); 298 raw_spin_lock(&desc->lock);
298 299
299 affinity = desc->affinity; 300 data = &desc->irq_data;
301 affinity = data->affinity;
300 if (!irq_has_action(irq) || 302 if (!irq_has_action(irq) ||
301 cpumask_equal(affinity, cpu_online_mask)) { 303 cpumask_equal(affinity, cpu_online_mask)) {
302 raw_spin_unlock(&desc->lock); 304 raw_spin_unlock(&desc->lock);
@@ -315,16 +317,16 @@ void fixup_irqs(void)
315 affinity = cpu_all_mask; 317 affinity = cpu_all_mask;
316 } 318 }
317 319
318 if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask) 320 if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_mask)
319 desc->chip->mask(irq); 321 data->chip->irq_mask(data);
320 322
321 if (desc->chip->set_affinity) 323 if (data->chip->irq_set_affinity)
322 desc->chip->set_affinity(irq, affinity); 324 data->chip->irq_set_affinity(data, affinity, true);
323 else if (!(warned++)) 325 else if (!(warned++))
324 set_affinity = 0; 326 set_affinity = 0;
325 327
326 if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask) 328 if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_unmask)
327 desc->chip->unmask(irq); 329 data->chip->irq_unmask(data);
328 330
329 raw_spin_unlock(&desc->lock); 331 raw_spin_unlock(&desc->lock);
330 332
@@ -355,10 +357,10 @@ void fixup_irqs(void)
355 if (irr & (1 << (vector % 32))) { 357 if (irr & (1 << (vector % 32))) {
356 irq = __get_cpu_var(vector_irq)[vector]; 358 irq = __get_cpu_var(vector_irq)[vector];
357 359
358 desc = irq_to_desc(irq); 360 data = irq_get_irq_data(irq);
359 raw_spin_lock(&desc->lock); 361 raw_spin_lock(&desc->lock);
360 if (desc->chip->retrigger) 362 if (data->chip->irq_retrigger)
361 desc->chip->retrigger(irq); 363 data->chip->irq_retrigger(data);
362 raw_spin_unlock(&desc->lock); 364 raw_spin_unlock(&desc->lock);
363 } 365 }
364 } 366 }
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 10709f29d166..50fbbe60e507 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -49,20 +49,19 @@ static inline int check_stack_overflow(void) { return 0; }
49static inline void print_stack_overflow(void) { } 49static inline void print_stack_overflow(void) { }
50#endif 50#endif
51 51
52#ifdef CONFIG_4KSTACKS
53/* 52/*
54 * per-CPU IRQ handling contexts (thread information and stack) 53 * per-CPU IRQ handling contexts (thread information and stack)
55 */ 54 */
56union irq_ctx { 55union irq_ctx {
57 struct thread_info tinfo; 56 struct thread_info tinfo;
58 u32 stack[THREAD_SIZE/sizeof(u32)]; 57 u32 stack[THREAD_SIZE/sizeof(u32)];
59} __attribute__((aligned(PAGE_SIZE))); 58} __attribute__((aligned(THREAD_SIZE)));
60 59
61static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); 60static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx);
62static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); 61static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
63 62
64static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, hardirq_stack); 63static DEFINE_PER_CPU_MULTIPAGE_ALIGNED(union irq_ctx, hardirq_stack, THREAD_SIZE);
65static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, softirq_stack); 64static DEFINE_PER_CPU_MULTIPAGE_ALIGNED(union irq_ctx, softirq_stack, THREAD_SIZE);
66 65
67static void call_on_stack(void *func, void *stack) 66static void call_on_stack(void *func, void *stack)
68{ 67{
@@ -187,11 +186,6 @@ asmlinkage void do_softirq(void)
187 local_irq_restore(flags); 186 local_irq_restore(flags);
188} 187}
189 188
190#else
191static inline int
192execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; }
193#endif
194
195bool handle_irq(unsigned irq, struct pt_regs *regs) 189bool handle_irq(unsigned irq, struct pt_regs *regs)
196{ 190{
197 struct irq_desc *desc; 191 struct irq_desc *desc;
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
new file mode 100644
index 000000000000..ca8f703a1e70
--- /dev/null
+++ b/arch/x86/kernel/irq_work.c
@@ -0,0 +1,30 @@
1/*
2 * x86 specific code for irq_work
3 *
4 * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
5 */
6
7#include <linux/kernel.h>
8#include <linux/irq_work.h>
9#include <linux/hardirq.h>
10#include <asm/apic.h>
11
12void smp_irq_work_interrupt(struct pt_regs *regs)
13{
14 irq_enter();
15 ack_APIC_irq();
16 inc_irq_stat(apic_irq_work_irqs);
17 irq_work_run();
18 irq_exit();
19}
20
21void arch_irq_work_raise(void)
22{
23#ifdef CONFIG_X86_LOCAL_APIC
24 if (!cpu_has_apic)
25 return;
26
27 apic->send_IPI_self(IRQ_WORK_VECTOR);
28 apic_wait_icr_idle();
29#endif
30}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 990ae7cfc578..c752e973958d 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -100,6 +100,8 @@ int vector_used_by_percpu_irq(unsigned int vector)
100 100
101void __init init_ISA_irqs(void) 101void __init init_ISA_irqs(void)
102{ 102{
103 struct irq_chip *chip = legacy_pic->chip;
104 const char *name = chip->name;
103 int i; 105 int i;
104 106
105#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) 107#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
@@ -107,19 +109,8 @@ void __init init_ISA_irqs(void)
107#endif 109#endif
108 legacy_pic->init(0); 110 legacy_pic->init(0);
109 111
110 /* 112 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
111 * 16 old-style INTA-cycle interrupts: 113 set_irq_chip_and_handler_name(i, chip, handle_level_irq, name);
112 */
113 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) {
114 struct irq_desc *desc = irq_to_desc(i);
115
116 desc->status = IRQ_DISABLED;
117 desc->action = NULL;
118 desc->depth = 1;
119
120 set_irq_chip_and_handler_name(i, &i8259A_chip,
121 handle_level_irq, "XT");
122 }
123} 114}
124 115
125void __init init_IRQ(void) 116void __init init_IRQ(void)
@@ -224,9 +215,9 @@ static void __init apic_intr_init(void)
224 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 215 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
225 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 216 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
226 217
227 /* Performance monitoring interrupts: */ 218 /* IRQ work interrupts: */
228# ifdef CONFIG_PERF_EVENTS 219# ifdef CONFIG_IRQ_WORK
229 alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); 220 alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
230# endif 221# endif
231 222
232#endif 223#endif
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
new file mode 100644
index 000000000000..961b6b30ba90
--- /dev/null
+++ b/arch/x86/kernel/jump_label.c
@@ -0,0 +1,50 @@
1/*
2 * jump label x86 support
3 *
4 * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
5 *
6 */
7#include <linux/jump_label.h>
8#include <linux/memory.h>
9#include <linux/uaccess.h>
10#include <linux/module.h>
11#include <linux/list.h>
12#include <linux/jhash.h>
13#include <linux/cpu.h>
14#include <asm/kprobes.h>
15#include <asm/alternative.h>
16
17#ifdef HAVE_JUMP_LABEL
18
19union jump_code_union {
20 char code[JUMP_LABEL_NOP_SIZE];
21 struct {
22 char jump;
23 int offset;
24 } __attribute__((packed));
25};
26
27void arch_jump_label_transform(struct jump_entry *entry,
28 enum jump_label_type type)
29{
30 union jump_code_union code;
31
32 if (type == JUMP_LABEL_ENABLE) {
33 code.jump = 0xe9;
34 code.offset = entry->target -
35 (entry->code + JUMP_LABEL_NOP_SIZE);
36 } else
37 memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE);
38 get_online_cpus();
39 mutex_lock(&text_mutex);
40 text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
41 mutex_unlock(&text_mutex);
42 put_online_cpus();
43}
44
45void arch_jump_label_text_poke_early(jump_label_t addr)
46{
47 text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE);
48}
49
50#endif
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 8afd9f321f10..90fcf62854bb 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -78,6 +78,7 @@ static int setup_data_open(struct inode *inode, struct file *file)
78static const struct file_operations fops_setup_data = { 78static const struct file_operations fops_setup_data = {
79 .read = setup_data_read, 79 .read = setup_data_read,
80 .open = setup_data_open, 80 .open = setup_data_open,
81 .llseek = default_llseek,
81}; 82};
82 83
83static int __init 84static int __init
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 770ebfb349e9..1cbd54c0df99 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -230,9 +230,6 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
230 return 0; 230 return 0;
231} 231}
232 232
233/* Dummy buffers for kallsyms_lookup */
234static char __dummy_buf[KSYM_NAME_LEN];
235
236/* Check if paddr is at an instruction boundary */ 233/* Check if paddr is at an instruction boundary */
237static int __kprobes can_probe(unsigned long paddr) 234static int __kprobes can_probe(unsigned long paddr)
238{ 235{
@@ -241,7 +238,7 @@ static int __kprobes can_probe(unsigned long paddr)
241 struct insn insn; 238 struct insn insn;
242 kprobe_opcode_t buf[MAX_INSN_SIZE]; 239 kprobe_opcode_t buf[MAX_INSN_SIZE];
243 240
244 if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) 241 if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
245 return 0; 242 return 0;
246 243
247 /* Decode instructions */ 244 /* Decode instructions */
@@ -1129,7 +1126,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
1129 *(unsigned long *)addr = val; 1126 *(unsigned long *)addr = val;
1130} 1127}
1131 1128
1132void __kprobes kprobes_optinsn_template_holder(void) 1129static void __used __kprobes kprobes_optinsn_template_holder(void)
1133{ 1130{
1134 asm volatile ( 1131 asm volatile (
1135 ".global optprobe_template_entry\n" 1132 ".global optprobe_template_entry\n"
@@ -1221,7 +1218,8 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
1221 } 1218 }
1222 /* Check whether the address range is reserved */ 1219 /* Check whether the address range is reserved */
1223 if (ftrace_text_reserved(src, src + len - 1) || 1220 if (ftrace_text_reserved(src, src + len - 1) ||
1224 alternatives_text_reserved(src, src + len - 1)) 1221 alternatives_text_reserved(src, src + len - 1) ||
1222 jump_label_text_reserved(src, src + len - 1))
1225 return -EBUSY; 1223 return -EBUSY;
1226 1224
1227 return len; 1225 return len;
@@ -1269,11 +1267,9 @@ static int __kprobes can_optimize(unsigned long paddr)
1269 unsigned long addr, size = 0, offset = 0; 1267 unsigned long addr, size = 0, offset = 0;
1270 struct insn insn; 1268 struct insn insn;
1271 kprobe_opcode_t buf[MAX_INSN_SIZE]; 1269 kprobe_opcode_t buf[MAX_INSN_SIZE];
1272 /* Dummy buffers for lookup_symbol_attrs */
1273 static char __dummy_buf[KSYM_NAME_LEN];
1274 1270
1275 /* Lookup symbol including addr */ 1271 /* Lookup symbol including addr */
1276 if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf)) 1272 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
1277 return 0; 1273 return 0;
1278 1274
1279 /* Check there is enough space for a relative jump. */ 1275 /* Check there is enough space for a relative jump. */
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 035c8c529181..b3ea9db39db6 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -36,7 +36,7 @@ static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
36 if (!page) 36 if (!page)
37 goto out; 37 goto out;
38 pud = (pud_t *)page_address(page); 38 pud = (pud_t *)page_address(page);
39 memset(pud, 0, PAGE_SIZE); 39 clear_page(pud);
40 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); 40 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
41 } 41 }
42 pud = pud_offset(pgd, addr); 42 pud = pud_offset(pgd, addr);
@@ -45,7 +45,7 @@ static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
45 if (!page) 45 if (!page)
46 goto out; 46 goto out;
47 pmd = (pmd_t *)page_address(page); 47 pmd = (pmd_t *)page_address(page);
48 memset(pmd, 0, PAGE_SIZE); 48 clear_page(pmd);
49 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); 49 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
50 } 50 }
51 pmd = pmd_offset(pud, addr); 51 pmd = pmd_offset(pud, addr);
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index fa6551d36c10..0b3d37e83606 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -232,6 +232,7 @@ static const struct file_operations microcode_fops = {
232 .owner = THIS_MODULE, 232 .owner = THIS_MODULE,
233 .write = microcode_write, 233 .write = microcode_write,
234 .open = microcode_open, 234 .open = microcode_open,
235 .llseek = no_llseek,
235}; 236};
236 237
237static struct miscdevice microcode_dev = { 238static struct miscdevice microcode_dev = {
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 1c355c550960..8f2956091735 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -239,6 +239,9 @@ int module_finalize(const Elf_Ehdr *hdr,
239 apply_paravirt(pseg, pseg + para->sh_size); 239 apply_paravirt(pseg, pseg + para->sh_size);
240 } 240 }
241 241
242 /* make jump label nops */
243 jump_label_apply_nops(me);
244
242 return 0; 245 return 0;
243} 246}
244 247
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index d7b6f7fb4fec..9af64d9c4b67 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -11,6 +11,7 @@
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/delay.h> 12#include <linux/delay.h>
13#include <linux/bootmem.h> 13#include <linux/bootmem.h>
14#include <linux/memblock.h>
14#include <linux/kernel_stat.h> 15#include <linux/kernel_stat.h>
15#include <linux/mc146818rtc.h> 16#include <linux/mc146818rtc.h>
16#include <linux/bitops.h> 17#include <linux/bitops.h>
@@ -657,7 +658,7 @@ static void __init smp_reserve_memory(struct mpf_intel *mpf)
657{ 658{
658 unsigned long size = get_mpc_size(mpf->physptr); 659 unsigned long size = get_mpc_size(mpf->physptr);
659 660
660 reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc"); 661 memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc");
661} 662}
662 663
663static int __init smp_scan_config(unsigned long base, unsigned long length) 664static int __init smp_scan_config(unsigned long base, unsigned long length)
@@ -686,7 +687,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
686 mpf, (u64)virt_to_phys(mpf)); 687 mpf, (u64)virt_to_phys(mpf));
687 688
688 mem = virt_to_phys(mpf); 689 mem = virt_to_phys(mpf);
689 reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf"); 690 memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf");
690 if (mpf->physptr) 691 if (mpf->physptr)
691 smp_reserve_memory(mpf); 692 smp_reserve_memory(mpf);
692 693
diff --git a/arch/x86/kernel/olpc-xo1.c b/arch/x86/kernel/olpc-xo1.c
new file mode 100644
index 000000000000..f5442c03abc3
--- /dev/null
+++ b/arch/x86/kernel/olpc-xo1.c
@@ -0,0 +1,140 @@
1/*
2 * Support for features of the OLPC XO-1 laptop
3 *
4 * Copyright (C) 2010 One Laptop per Child
5 * Copyright (C) 2006 Red Hat, Inc.
6 * Copyright (C) 2006 Advanced Micro Devices, Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/module.h>
15#include <linux/pci.h>
16#include <linux/pci_ids.h>
17#include <linux/platform_device.h>
18#include <linux/pm.h>
19
20#include <asm/io.h>
21#include <asm/olpc.h>
22
23#define DRV_NAME "olpc-xo1"
24
25#define PMS_BAR 4
26#define ACPI_BAR 5
27
28/* PMC registers (PMS block) */
29#define PM_SCLK 0x10
30#define PM_IN_SLPCTL 0x20
31#define PM_WKXD 0x34
32#define PM_WKD 0x30
33#define PM_SSC 0x54
34
35/* PM registers (ACPI block) */
36#define PM1_CNT 0x08
37#define PM_GPE0_STS 0x18
38
39static unsigned long acpi_base;
40static unsigned long pms_base;
41
42static void xo1_power_off(void)
43{
44 printk(KERN_INFO "OLPC XO-1 power off sequence...\n");
45
46 /* Enable all of these controls with 0 delay */
47 outl(0x40000000, pms_base + PM_SCLK);
48 outl(0x40000000, pms_base + PM_IN_SLPCTL);
49 outl(0x40000000, pms_base + PM_WKXD);
50 outl(0x40000000, pms_base + PM_WKD);
51
52 /* Clear status bits (possibly unnecessary) */
53 outl(0x0002ffff, pms_base + PM_SSC);
54 outl(0xffffffff, acpi_base + PM_GPE0_STS);
55
56 /* Write SLP_EN bit to start the machinery */
57 outl(0x00002000, acpi_base + PM1_CNT);
58}
59
60/* Read the base addresses from the PCI BAR info */
61static int __devinit setup_bases(struct pci_dev *pdev)
62{
63 int r;
64
65 r = pci_enable_device_io(pdev);
66 if (r) {
67 dev_err(&pdev->dev, "can't enable device IO\n");
68 return r;
69 }
70
71 r = pci_request_region(pdev, ACPI_BAR, DRV_NAME);
72 if (r) {
73 dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", ACPI_BAR);
74 return r;
75 }
76
77 r = pci_request_region(pdev, PMS_BAR, DRV_NAME);
78 if (r) {
79 dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", PMS_BAR);
80 pci_release_region(pdev, ACPI_BAR);
81 return r;
82 }
83
84 acpi_base = pci_resource_start(pdev, ACPI_BAR);
85 pms_base = pci_resource_start(pdev, PMS_BAR);
86
87 return 0;
88}
89
90static int __devinit olpc_xo1_probe(struct platform_device *pdev)
91{
92 struct pci_dev *pcidev;
93 int r;
94
95 pcidev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA,
96 NULL);
97 if (!pdev)
98 return -ENODEV;
99
100 r = setup_bases(pcidev);
101 if (r)
102 return r;
103
104 pm_power_off = xo1_power_off;
105
106 printk(KERN_INFO "OLPC XO-1 support registered\n");
107 return 0;
108}
109
110static int __devexit olpc_xo1_remove(struct platform_device *pdev)
111{
112 pm_power_off = NULL;
113 return 0;
114}
115
116static struct platform_driver olpc_xo1_driver = {
117 .driver = {
118 .name = DRV_NAME,
119 .owner = THIS_MODULE,
120 },
121 .probe = olpc_xo1_probe,
122 .remove = __devexit_p(olpc_xo1_remove),
123};
124
125static int __init olpc_xo1_init(void)
126{
127 return platform_driver_register(&olpc_xo1_driver);
128}
129
130static void __exit olpc_xo1_exit(void)
131{
132 platform_driver_unregister(&olpc_xo1_driver);
133}
134
135MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>");
136MODULE_LICENSE("GPL");
137MODULE_ALIAS("platform:olpc-xo1");
138
139module_init(olpc_xo1_init);
140module_exit(olpc_xo1_exit);
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 0e0cdde519be..edaf3fe8dc5e 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -17,6 +17,7 @@
17#include <linux/spinlock.h> 17#include <linux/spinlock.h>
18#include <linux/io.h> 18#include <linux/io.h>
19#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/platform_device.h>
20 21
21#include <asm/geode.h> 22#include <asm/geode.h>
22#include <asm/setup.h> 23#include <asm/setup.h>
@@ -114,6 +115,7 @@ int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
114 unsigned long flags; 115 unsigned long flags;
115 int ret = -EIO; 116 int ret = -EIO;
116 int i; 117 int i;
118 int restarts = 0;
117 119
118 spin_lock_irqsave(&ec_lock, flags); 120 spin_lock_irqsave(&ec_lock, flags);
119 121
@@ -169,7 +171,9 @@ restart:
169 if (wait_on_obf(0x6c, 1)) { 171 if (wait_on_obf(0x6c, 1)) {
170 printk(KERN_ERR "olpc-ec: timeout waiting for" 172 printk(KERN_ERR "olpc-ec: timeout waiting for"
171 " EC to provide data!\n"); 173 " EC to provide data!\n");
172 goto restart; 174 if (restarts++ < 10)
175 goto restart;
176 goto err;
173 } 177 }
174 outbuf[i] = inb(0x68); 178 outbuf[i] = inb(0x68);
175 pr_devel("olpc-ec: received 0x%x\n", outbuf[i]); 179 pr_devel("olpc-ec: received 0x%x\n", outbuf[i]);
@@ -183,8 +187,21 @@ err:
183} 187}
184EXPORT_SYMBOL_GPL(olpc_ec_cmd); 188EXPORT_SYMBOL_GPL(olpc_ec_cmd);
185 189
186#ifdef CONFIG_OLPC_OPENFIRMWARE 190static bool __init check_ofw_architecture(void)
187static void __init platform_detect(void) 191{
192 size_t propsize;
193 char olpc_arch[5];
194 const void *args[] = { NULL, "architecture", olpc_arch, (void *)5 };
195 void *res[] = { &propsize };
196
197 if (olpc_ofw("getprop", args, res)) {
198 printk(KERN_ERR "ofw: getprop call failed!\n");
199 return false;
200 }
201 return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0;
202}
203
204static u32 __init get_board_revision(void)
188{ 205{
189 size_t propsize; 206 size_t propsize;
190 __be32 rev; 207 __be32 rev;
@@ -193,45 +210,43 @@ static void __init platform_detect(void)
193 210
194 if (olpc_ofw("getprop", args, res) || propsize != 4) { 211 if (olpc_ofw("getprop", args, res) || propsize != 4) {
195 printk(KERN_ERR "ofw: getprop call failed!\n"); 212 printk(KERN_ERR "ofw: getprop call failed!\n");
196 rev = cpu_to_be32(0); 213 return cpu_to_be32(0);
197 } 214 }
198 olpc_platform_info.boardrev = be32_to_cpu(rev); 215 return be32_to_cpu(rev);
199} 216}
200#else 217
201static void __init platform_detect(void) 218static bool __init platform_detect(void)
202{ 219{
203 /* stopgap until OFW support is added to the kernel */ 220 if (!check_ofw_architecture())
204 olpc_platform_info.boardrev = olpc_board(0xc2); 221 return false;
222 olpc_platform_info.flags |= OLPC_F_PRESENT;
223 olpc_platform_info.boardrev = get_board_revision();
224 return true;
205} 225}
206#endif
207 226
208static int __init olpc_init(void) 227static int __init add_xo1_platform_devices(void)
209{ 228{
210 unsigned char *romsig; 229 struct platform_device *pdev;
211 230
212 /* The ioremap check is dangerous; limit what we run it on */ 231 pdev = platform_device_register_simple("xo1-rfkill", -1, NULL, 0);
213 if (!is_geode() || cs5535_has_vsa2()) 232 if (IS_ERR(pdev))
214 return 0; 233 return PTR_ERR(pdev);
215 234
216 spin_lock_init(&ec_lock); 235 pdev = platform_device_register_simple("olpc-xo1", -1, NULL, 0);
236 if (IS_ERR(pdev))
237 return PTR_ERR(pdev);
217 238
218 romsig = ioremap(0xffffffc0, 16); 239 return 0;
219 if (!romsig) 240}
220 return 0;
221 241
222 if (strncmp(romsig, "CL1 Q", 7)) 242static int __init olpc_init(void)
223 goto unmap; 243{
224 if (strncmp(romsig+6, romsig+13, 3)) { 244 int r = 0;
225 printk(KERN_INFO "OLPC BIOS signature looks invalid. "
226 "Assuming not OLPC\n");
227 goto unmap;
228 }
229 245
230 printk(KERN_INFO "OLPC board with OpenFirmware %.16s\n", romsig); 246 if (!olpc_ofw_present() || !platform_detect())
231 olpc_platform_info.flags |= OLPC_F_PRESENT; 247 return 0;
232 248
233 /* get the platform revision */ 249 spin_lock_init(&ec_lock);
234 platform_detect();
235 250
236 /* assume B1 and above models always have a DCON */ 251 /* assume B1 and above models always have a DCON */
237 if (olpc_board_at_least(olpc_board(0xb1))) 252 if (olpc_board_at_least(olpc_board(0xb1)))
@@ -242,8 +257,10 @@ static int __init olpc_init(void)
242 (unsigned char *) &olpc_platform_info.ecver, 1); 257 (unsigned char *) &olpc_platform_info.ecver, 1);
243 258
244#ifdef CONFIG_PCI_OLPC 259#ifdef CONFIG_PCI_OLPC
245 /* If the VSA exists let it emulate PCI, if not emulate in kernel */ 260 /* If the VSA exists let it emulate PCI, if not emulate in kernel.
246 if (!cs5535_has_vsa2()) 261 * XO-1 only. */
262 if (olpc_platform_info.boardrev < olpc_board_pre(0xd0) &&
263 !cs5535_has_vsa2())
247 x86_init.pci.arch_init = pci_olpc_init; 264 x86_init.pci.arch_init = pci_olpc_init;
248#endif 265#endif
249 266
@@ -252,8 +269,12 @@ static int __init olpc_init(void)
252 olpc_platform_info.boardrev >> 4, 269 olpc_platform_info.boardrev >> 4,
253 olpc_platform_info.ecver); 270 olpc_platform_info.ecver);
254 271
255unmap: 272 if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) { /* XO-1 */
256 iounmap(romsig); 273 r = add_xo1_platform_devices();
274 if (r)
275 return r;
276 }
277
257 return 0; 278 return 0;
258} 279}
259 280
diff --git a/arch/x86/kernel/olpc_ofw.c b/arch/x86/kernel/olpc_ofw.c
index 3218aa71ab5e..787320464379 100644
--- a/arch/x86/kernel/olpc_ofw.c
+++ b/arch/x86/kernel/olpc_ofw.c
@@ -74,6 +74,12 @@ int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res,
74} 74}
75EXPORT_SYMBOL_GPL(__olpc_ofw); 75EXPORT_SYMBOL_GPL(__olpc_ofw);
76 76
77bool olpc_ofw_present(void)
78{
79 return olpc_ofw_cif != NULL;
80}
81EXPORT_SYMBOL_GPL(olpc_ofw_present);
82
77/* OFW cif _should_ be above this address */ 83/* OFW cif _should_ be above this address */
78#define OFW_MIN 0xff000000 84#define OFW_MIN 0xff000000
79 85
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1db183ed7c01..c5b250011fd4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -413,7 +413,6 @@ struct pv_mmu_ops pv_mmu_ops = {
413 413
414 .alloc_pte = paravirt_nop, 414 .alloc_pte = paravirt_nop,
415 .alloc_pmd = paravirt_nop, 415 .alloc_pmd = paravirt_nop,
416 .alloc_pmd_clone = paravirt_nop,
417 .alloc_pud = paravirt_nop, 416 .alloc_pud = paravirt_nop,
418 .release_pte = paravirt_nop, 417 .release_pte = paravirt_nop,
419 .release_pmd = paravirt_nop, 418 .release_pmd = paravirt_nop,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 078d4ec1a9d9..f56a117cef68 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -47,6 +47,7 @@
47#include <asm/rio.h> 47#include <asm/rio.h>
48#include <asm/bios_ebda.h> 48#include <asm/bios_ebda.h>
49#include <asm/x86_init.h> 49#include <asm/x86_init.h>
50#include <asm/iommu_table.h>
50 51
51#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT 52#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
52int use_calgary __read_mostly = 1; 53int use_calgary __read_mostly = 1;
@@ -1364,7 +1365,7 @@ static int __init calgary_iommu_init(void)
1364 return 0; 1365 return 0;
1365} 1366}
1366 1367
1367void __init detect_calgary(void) 1368int __init detect_calgary(void)
1368{ 1369{
1369 int bus; 1370 int bus;
1370 void *tbl; 1371 void *tbl;
@@ -1378,13 +1379,13 @@ void __init detect_calgary(void)
1378 * another HW IOMMU already, bail out. 1379 * another HW IOMMU already, bail out.
1379 */ 1380 */
1380 if (no_iommu || iommu_detected) 1381 if (no_iommu || iommu_detected)
1381 return; 1382 return -ENODEV;
1382 1383
1383 if (!use_calgary) 1384 if (!use_calgary)
1384 return; 1385 return -ENODEV;
1385 1386
1386 if (!early_pci_allowed()) 1387 if (!early_pci_allowed())
1387 return; 1388 return -ENODEV;
1388 1389
1389 printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n"); 1390 printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n");
1390 1391
@@ -1410,13 +1411,13 @@ void __init detect_calgary(void)
1410 if (!rio_table_hdr) { 1411 if (!rio_table_hdr) {
1411 printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table " 1412 printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table "
1412 "in EBDA - bailing!\n"); 1413 "in EBDA - bailing!\n");
1413 return; 1414 return -ENODEV;
1414 } 1415 }
1415 1416
1416 ret = build_detail_arrays(); 1417 ret = build_detail_arrays();
1417 if (ret) { 1418 if (ret) {
1418 printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret); 1419 printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret);
1419 return; 1420 return -ENOMEM;
1420 } 1421 }
1421 1422
1422 specified_table_size = determine_tce_table_size((is_kdump_kernel() ? 1423 specified_table_size = determine_tce_table_size((is_kdump_kernel() ?
@@ -1464,7 +1465,7 @@ void __init detect_calgary(void)
1464 1465
1465 x86_init.iommu.iommu_init = calgary_iommu_init; 1466 x86_init.iommu.iommu_init = calgary_iommu_init;
1466 } 1467 }
1467 return; 1468 return calgary_found;
1468 1469
1469cleanup: 1470cleanup:
1470 for (--bus; bus >= 0; --bus) { 1471 for (--bus; bus >= 0; --bus) {
@@ -1473,6 +1474,7 @@ cleanup:
1473 if (info->tce_space) 1474 if (info->tce_space)
1474 free_tce_table(info->tce_space); 1475 free_tce_table(info->tce_space);
1475 } 1476 }
1477 return -ENOMEM;
1476} 1478}
1477 1479
1478static int __init calgary_parse_options(char *p) 1480static int __init calgary_parse_options(char *p)
@@ -1594,3 +1596,5 @@ static int __init calgary_fixup_tce_spaces(void)
1594 * and before device_initcall. 1596 * and before device_initcall.
1595 */ 1597 */
1596rootfs_initcall(calgary_fixup_tce_spaces); 1598rootfs_initcall(calgary_fixup_tce_spaces);
1599
1600IOMMU_INIT_POST(detect_calgary);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 9f07cfcbd3a5..9ea999a4dcc1 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -11,9 +11,8 @@
11#include <asm/iommu.h> 11#include <asm/iommu.h>
12#include <asm/gart.h> 12#include <asm/gart.h>
13#include <asm/calgary.h> 13#include <asm/calgary.h>
14#include <asm/amd_iommu.h>
15#include <asm/x86_init.h> 14#include <asm/x86_init.h>
16#include <asm/xen/swiotlb-xen.h> 15#include <asm/iommu_table.h>
17 16
18static int forbid_dac __read_mostly; 17static int forbid_dac __read_mostly;
19 18
@@ -45,6 +44,8 @@ int iommu_detected __read_mostly = 0;
45 */ 44 */
46int iommu_pass_through __read_mostly; 45int iommu_pass_through __read_mostly;
47 46
47extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
48
48/* Dummy device used for NULL arguments (normally ISA). */ 49/* Dummy device used for NULL arguments (normally ISA). */
49struct device x86_dma_fallback_dev = { 50struct device x86_dma_fallback_dev = {
50 .init_name = "fallback device", 51 .init_name = "fallback device",
@@ -130,26 +131,24 @@ static void __init dma32_free_bootmem(void)
130 131
131void __init pci_iommu_alloc(void) 132void __init pci_iommu_alloc(void)
132{ 133{
134 struct iommu_table_entry *p;
135
133 /* free the range so iommu could get some range less than 4G */ 136 /* free the range so iommu could get some range less than 4G */
134 dma32_free_bootmem(); 137 dma32_free_bootmem();
135 138
136 if (pci_xen_swiotlb_detect() || pci_swiotlb_detect()) 139 sort_iommu_table(__iommu_table, __iommu_table_end);
137 goto out; 140 check_iommu_entries(__iommu_table, __iommu_table_end);
138
139 gart_iommu_hole_init();
140
141 detect_calgary();
142
143 detect_intel_iommu();
144 141
145 /* needs to be called after gart_iommu_hole_init */ 142 for (p = __iommu_table; p < __iommu_table_end; p++) {
146 amd_iommu_detect(); 143 if (p && p->detect && p->detect() > 0) {
147out: 144 p->flags |= IOMMU_DETECTED;
148 pci_xen_swiotlb_init(); 145 if (p->early_init)
149 146 p->early_init();
150 pci_swiotlb_init(); 147 if (p->flags & IOMMU_FINISH_IF_DETECTED)
148 break;
149 }
150 }
151} 151}
152
153void *dma_generic_alloc_coherent(struct device *dev, size_t size, 152void *dma_generic_alloc_coherent(struct device *dev, size_t size,
154 dma_addr_t *dma_addr, gfp_t flag) 153 dma_addr_t *dma_addr, gfp_t flag)
155{ 154{
@@ -292,6 +291,7 @@ EXPORT_SYMBOL(dma_supported);
292 291
293static int __init pci_iommu_init(void) 292static int __init pci_iommu_init(void)
294{ 293{
294 struct iommu_table_entry *p;
295 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); 295 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
296 296
297#ifdef CONFIG_PCI 297#ifdef CONFIG_PCI
@@ -299,12 +299,10 @@ static int __init pci_iommu_init(void)
299#endif 299#endif
300 x86_init.iommu.iommu_init(); 300 x86_init.iommu.iommu_init();
301 301
302 if (swiotlb || xen_swiotlb) { 302 for (p = __iommu_table; p < __iommu_table_end; p++) {
303 printk(KERN_INFO "PCI-DMA: " 303 if (p && (p->flags & IOMMU_DETECTED) && p->late_init)
304 "Using software bounce buffering for IO (SWIOTLB)\n"); 304 p->late_init();
305 swiotlb_print_info(); 305 }
306 } else
307 swiotlb_free();
308 306
309 return 0; 307 return 0;
310} 308}
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 0f7f130caa67..ba0f0ca9f280 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -39,8 +39,9 @@
39#include <asm/cacheflush.h> 39#include <asm/cacheflush.h>
40#include <asm/swiotlb.h> 40#include <asm/swiotlb.h>
41#include <asm/dma.h> 41#include <asm/dma.h>
42#include <asm/k8.h> 42#include <asm/amd_nb.h>
43#include <asm/x86_init.h> 43#include <asm/x86_init.h>
44#include <asm/iommu_table.h>
44 45
45static unsigned long iommu_bus_base; /* GART remapping area (physical) */ 46static unsigned long iommu_bus_base; /* GART remapping area (physical) */
46static unsigned long iommu_size; /* size of remapping area bytes */ 47static unsigned long iommu_size; /* size of remapping area bytes */
@@ -560,8 +561,11 @@ static void enable_gart_translations(void)
560{ 561{
561 int i; 562 int i;
562 563
563 for (i = 0; i < num_k8_northbridges; i++) { 564 if (!k8_northbridges.gart_supported)
564 struct pci_dev *dev = k8_northbridges[i]; 565 return;
566
567 for (i = 0; i < k8_northbridges.num; i++) {
568 struct pci_dev *dev = k8_northbridges.nb_misc[i];
565 569
566 enable_gart_translation(dev, __pa(agp_gatt_table)); 570 enable_gart_translation(dev, __pa(agp_gatt_table));
567 } 571 }
@@ -592,16 +596,19 @@ static void gart_fixup_northbridges(struct sys_device *dev)
592 if (!fix_up_north_bridges) 596 if (!fix_up_north_bridges)
593 return; 597 return;
594 598
599 if (!k8_northbridges.gart_supported)
600 return;
601
595 pr_info("PCI-DMA: Restoring GART aperture settings\n"); 602 pr_info("PCI-DMA: Restoring GART aperture settings\n");
596 603
597 for (i = 0; i < num_k8_northbridges; i++) { 604 for (i = 0; i < k8_northbridges.num; i++) {
598 struct pci_dev *dev = k8_northbridges[i]; 605 struct pci_dev *dev = k8_northbridges.nb_misc[i];
599 606
600 /* 607 /*
601 * Don't enable translations just yet. That is the next 608 * Don't enable translations just yet. That is the next
602 * step. Restore the pre-suspend aperture settings. 609 * step. Restore the pre-suspend aperture settings.
603 */ 610 */
604 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1); 611 gart_set_size_and_enable(dev, aperture_order);
605 pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); 612 pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25);
606 } 613 }
607} 614}
@@ -649,8 +656,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
649 656
650 aper_size = aper_base = info->aper_size = 0; 657 aper_size = aper_base = info->aper_size = 0;
651 dev = NULL; 658 dev = NULL;
652 for (i = 0; i < num_k8_northbridges; i++) { 659 for (i = 0; i < k8_northbridges.num; i++) {
653 dev = k8_northbridges[i]; 660 dev = k8_northbridges.nb_misc[i];
654 new_aper_base = read_aperture(dev, &new_aper_size); 661 new_aper_base = read_aperture(dev, &new_aper_size);
655 if (!new_aper_base) 662 if (!new_aper_base)
656 goto nommu; 663 goto nommu;
@@ -718,10 +725,13 @@ static void gart_iommu_shutdown(void)
718 if (!no_agp) 725 if (!no_agp)
719 return; 726 return;
720 727
721 for (i = 0; i < num_k8_northbridges; i++) { 728 if (!k8_northbridges.gart_supported)
729 return;
730
731 for (i = 0; i < k8_northbridges.num; i++) {
722 u32 ctl; 732 u32 ctl;
723 733
724 dev = k8_northbridges[i]; 734 dev = k8_northbridges.nb_misc[i];
725 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); 735 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
726 736
727 ctl &= ~GARTEN; 737 ctl &= ~GARTEN;
@@ -739,7 +749,7 @@ int __init gart_iommu_init(void)
739 unsigned long scratch; 749 unsigned long scratch;
740 long i; 750 long i;
741 751
742 if (num_k8_northbridges == 0) 752 if (!k8_northbridges.gart_supported)
743 return 0; 753 return 0;
744 754
745#ifndef CONFIG_AGP_AMD64 755#ifndef CONFIG_AGP_AMD64
@@ -896,3 +906,4 @@ void __init gart_parse_options(char *p)
896 } 906 }
897 } 907 }
898} 908}
909IOMMU_INIT_POST(gart_iommu_hole_init);
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
new file mode 100644
index 000000000000..55d745ec1181
--- /dev/null
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -0,0 +1,89 @@
1#include <linux/dma-mapping.h>
2#include <asm/iommu_table.h>
3#include <linux/string.h>
4#include <linux/kallsyms.h>
5
6
7#define DEBUG 1
8
9static struct iommu_table_entry * __init
10find_dependents_of(struct iommu_table_entry *start,
11 struct iommu_table_entry *finish,
12 struct iommu_table_entry *q)
13{
14 struct iommu_table_entry *p;
15
16 if (!q)
17 return NULL;
18
19 for (p = start; p < finish; p++)
20 if (p->detect == q->depend)
21 return p;
22
23 return NULL;
24}
25
26
27void __init sort_iommu_table(struct iommu_table_entry *start,
28 struct iommu_table_entry *finish) {
29
30 struct iommu_table_entry *p, *q, tmp;
31
32 for (p = start; p < finish; p++) {
33again:
34 q = find_dependents_of(start, finish, p);
35 /* We are bit sneaky here. We use the memory address to figure
36 * out if the node we depend on is past our point, if so, swap.
37 */
38 if (q > p) {
39 tmp = *p;
40 memmove(p, q, sizeof(*p));
41 *q = tmp;
42 goto again;
43 }
44 }
45
46}
47
48#ifdef DEBUG
49void __init check_iommu_entries(struct iommu_table_entry *start,
50 struct iommu_table_entry *finish)
51{
52 struct iommu_table_entry *p, *q, *x;
53 char sym_p[KSYM_SYMBOL_LEN];
54 char sym_q[KSYM_SYMBOL_LEN];
55
56 /* Simple cyclic dependency checker. */
57 for (p = start; p < finish; p++) {
58 q = find_dependents_of(start, finish, p);
59 x = find_dependents_of(start, finish, q);
60 if (p == x) {
61 sprint_symbol(sym_p, (unsigned long)p->detect);
62 sprint_symbol(sym_q, (unsigned long)q->detect);
63
64 printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %s depends" \
65 " on %s and vice-versa. BREAKING IT.\n",
66 sym_p, sym_q);
67 /* Heavy handed way..*/
68 x->depend = 0;
69 }
70 }
71
72 for (p = start; p < finish; p++) {
73 q = find_dependents_of(p, finish, p);
74 if (q && q > p) {
75 sprint_symbol(sym_p, (unsigned long)p->detect);
76 sprint_symbol(sym_q, (unsigned long)q->detect);
77
78 printk(KERN_ERR "EXECUTION ORDER INVALID! %s "\
79 "should be called before %s!\n",
80 sym_p, sym_q);
81 }
82 }
83}
84#else
85inline void check_iommu_entries(struct iommu_table_entry *start,
86 struct iommu_table_entry *finish)
87{
88}
89#endif
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index a5bc528d4328..8f972cbddef0 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -10,7 +10,8 @@
10#include <asm/iommu.h> 10#include <asm/iommu.h>
11#include <asm/swiotlb.h> 11#include <asm/swiotlb.h>
12#include <asm/dma.h> 12#include <asm/dma.h>
13 13#include <asm/xen/swiotlb-xen.h>
14#include <asm/iommu_table.h>
14int swiotlb __read_mostly; 15int swiotlb __read_mostly;
15 16
16static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, 17static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -41,25 +42,42 @@ static struct dma_map_ops swiotlb_dma_ops = {
41}; 42};
42 43
43/* 44/*
44 * pci_swiotlb_detect - set swiotlb to 1 if necessary 45 * pci_swiotlb_detect_override - set swiotlb to 1 if necessary
45 * 46 *
46 * This returns non-zero if we are forced to use swiotlb (by the boot 47 * This returns non-zero if we are forced to use swiotlb (by the boot
47 * option). 48 * option).
48 */ 49 */
49int __init pci_swiotlb_detect(void) 50int __init pci_swiotlb_detect_override(void)
50{ 51{
51 int use_swiotlb = swiotlb | swiotlb_force; 52 int use_swiotlb = swiotlb | swiotlb_force;
52 53
54 if (swiotlb_force)
55 swiotlb = 1;
56
57 return use_swiotlb;
58}
59IOMMU_INIT_FINISH(pci_swiotlb_detect_override,
60 pci_xen_swiotlb_detect,
61 pci_swiotlb_init,
62 pci_swiotlb_late_init);
63
64/*
65 * if 4GB or more detected (and iommu=off not set) return 1
66 * and set swiotlb to 1.
67 */
68int __init pci_swiotlb_detect_4gb(void)
69{
53 /* don't initialize swiotlb if iommu=off (no_iommu=1) */ 70 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
54#ifdef CONFIG_X86_64 71#ifdef CONFIG_X86_64
55 if (!no_iommu && max_pfn > MAX_DMA32_PFN) 72 if (!no_iommu && max_pfn > MAX_DMA32_PFN)
56 swiotlb = 1; 73 swiotlb = 1;
57#endif 74#endif
58 if (swiotlb_force) 75 return swiotlb;
59 swiotlb = 1;
60
61 return use_swiotlb;
62} 76}
77IOMMU_INIT(pci_swiotlb_detect_4gb,
78 pci_swiotlb_detect_override,
79 pci_swiotlb_init,
80 pci_swiotlb_late_init);
63 81
64void __init pci_swiotlb_init(void) 82void __init pci_swiotlb_init(void)
65{ 83{
@@ -68,3 +86,15 @@ void __init pci_swiotlb_init(void)
68 dma_ops = &swiotlb_dma_ops; 86 dma_ops = &swiotlb_dma_ops;
69 } 87 }
70} 88}
89
90void __init pci_swiotlb_late_init(void)
91{
92 /* An IOMMU turned us off. */
93 if (!swiotlb)
94 swiotlb_free();
95 else {
96 printk(KERN_INFO "PCI-DMA: "
97 "Using software bounce buffering for IO (SWIOTLB)\n");
98 swiotlb_print_info();
99 }
100}
diff --git a/arch/x86/kernel/pmtimer_64.c b/arch/x86/kernel/pmtimer_64.c
deleted file mode 100644
index b112406f1996..000000000000
--- a/arch/x86/kernel/pmtimer_64.c
+++ /dev/null
@@ -1,69 +0,0 @@
1/* Ported over from i386 by AK, original copyright was:
2 *
3 * (C) Dominik Brodowski <linux@brodo.de> 2003
4 *
5 * Driver to use the Power Management Timer (PMTMR) available in some
6 * southbridges as primary timing source for the Linux kernel.
7 *
8 * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
9 * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
10 *
11 * This file is licensed under the GPL v2.
12 *
13 * Dropped all the hardware bug workarounds for now. Hopefully they
14 * are not needed on 64bit chipsets.
15 */
16
17#include <linux/jiffies.h>
18#include <linux/kernel.h>
19#include <linux/time.h>
20#include <linux/init.h>
21#include <linux/cpumask.h>
22#include <linux/acpi_pmtmr.h>
23
24#include <asm/io.h>
25#include <asm/proto.h>
26#include <asm/msr.h>
27#include <asm/vsyscall.h>
28
29static inline u32 cyc2us(u32 cycles)
30{
31 /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
32 * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
33 *
34 * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
35 * easily be multiplied with 286 (=0x11E) without having to fear
36 * u32 overflows.
37 */
38 cycles *= 286;
39 return (cycles >> 10);
40}
41
42static unsigned pmtimer_wait_tick(void)
43{
44 u32 a, b;
45 for (a = b = inl(pmtmr_ioport) & ACPI_PM_MASK;
46 a == b;
47 b = inl(pmtmr_ioport) & ACPI_PM_MASK)
48 cpu_relax();
49 return b;
50}
51
52/* note: wait time is rounded up to one tick */
53void pmtimer_wait(unsigned us)
54{
55 u32 a, b;
56 a = pmtimer_wait_tick();
57 do {
58 b = inl(pmtmr_ioport);
59 cpu_relax();
60 } while (cyc2us(b - a) < us);
61}
62
63static int __init nopmtimer_setup(char *s)
64{
65 pmtmr_ioport = 0;
66 return 1;
67}
68
69__setup("nopmtimer", nopmtimer_setup);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3d9ea531ddd1..b3d7a3a04f38 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -424,7 +424,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
424 load_TLS(next, cpu); 424 load_TLS(next, cpu);
425 425
426 /* Must be after DS reload */ 426 /* Must be after DS reload */
427 unlazy_fpu(prev_p); 427 __unlazy_fpu(prev_p);
428 428
429 /* Make sure cpu is ready for new context */ 429 /* Make sure cpu is ready for new context */
430 if (preload_fpu) 430 if (preload_fpu)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e3af342fe83a..7a4cf14223ba 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -84,7 +84,7 @@ static int __init reboot_setup(char *str)
84 } 84 }
85 /* we will leave sorting out the final value 85 /* we will leave sorting out the final value
86 when we are ready to reboot, since we might not 86 when we are ready to reboot, since we might not
87 have set up boot_cpu_id or smp_num_cpu */ 87 have detected BSP APIC ID or smp_num_cpu */
88 break; 88 break;
89#endif /* CONFIG_SMP */ 89#endif /* CONFIG_SMP */
90 90
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c3a4fbb2b996..420e64197850 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -31,6 +31,7 @@
31#include <linux/apm_bios.h> 31#include <linux/apm_bios.h>
32#include <linux/initrd.h> 32#include <linux/initrd.h>
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34#include <linux/memblock.h>
34#include <linux/seq_file.h> 35#include <linux/seq_file.h>
35#include <linux/console.h> 36#include <linux/console.h>
36#include <linux/mca.h> 37#include <linux/mca.h>
@@ -83,7 +84,6 @@
83#include <asm/dmi.h> 84#include <asm/dmi.h>
84#include <asm/io_apic.h> 85#include <asm/io_apic.h>
85#include <asm/ist.h> 86#include <asm/ist.h>
86#include <asm/vmi.h>
87#include <asm/setup_arch.h> 87#include <asm/setup_arch.h>
88#include <asm/bios_ebda.h> 88#include <asm/bios_ebda.h>
89#include <asm/cacheflush.h> 89#include <asm/cacheflush.h>
@@ -107,11 +107,12 @@
107#include <asm/percpu.h> 107#include <asm/percpu.h>
108#include <asm/topology.h> 108#include <asm/topology.h>
109#include <asm/apicdef.h> 109#include <asm/apicdef.h>
110#include <asm/k8.h> 110#include <asm/amd_nb.h>
111#ifdef CONFIG_X86_64 111#ifdef CONFIG_X86_64
112#include <asm/numa_64.h> 112#include <asm/numa_64.h>
113#endif 113#endif
114#include <asm/mce.h> 114#include <asm/mce.h>
115#include <asm/alternative.h>
115 116
116/* 117/*
117 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. 118 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -125,7 +126,6 @@ unsigned long max_pfn_mapped;
125RESERVE_BRK(dmi_alloc, 65536); 126RESERVE_BRK(dmi_alloc, 65536);
126#endif 127#endif
127 128
128unsigned int boot_cpu_id __read_mostly;
129 129
130static __initdata unsigned long _brk_start = (unsigned long)__brk_base; 130static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
131unsigned long _brk_end = (unsigned long)__brk_base; 131unsigned long _brk_end = (unsigned long)__brk_base;
@@ -302,7 +302,7 @@ static inline void init_gbpages(void)
302static void __init reserve_brk(void) 302static void __init reserve_brk(void)
303{ 303{
304 if (_brk_end > _brk_start) 304 if (_brk_end > _brk_start)
305 reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK"); 305 memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK");
306 306
307 /* Mark brk area as locked down and no longer taking any 307 /* Mark brk area as locked down and no longer taking any
308 new allocations */ 308 new allocations */
@@ -324,17 +324,16 @@ static void __init relocate_initrd(void)
324 char *p, *q; 324 char *p, *q;
325 325
326 /* We need to move the initrd down into lowmem */ 326 /* We need to move the initrd down into lowmem */
327 ramdisk_here = find_e820_area(0, end_of_lowmem, area_size, 327 ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
328 PAGE_SIZE); 328 PAGE_SIZE);
329 329
330 if (ramdisk_here == -1ULL) 330 if (ramdisk_here == MEMBLOCK_ERROR)
331 panic("Cannot find place for new RAMDISK of size %lld\n", 331 panic("Cannot find place for new RAMDISK of size %lld\n",
332 ramdisk_size); 332 ramdisk_size);
333 333
334 /* Note: this includes all the lowmem currently occupied by 334 /* Note: this includes all the lowmem currently occupied by
335 the initrd, we rely on that fact to keep the data intact. */ 335 the initrd, we rely on that fact to keep the data intact. */
336 reserve_early(ramdisk_here, ramdisk_here + area_size, 336 memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK");
337 "NEW RAMDISK");
338 initrd_start = ramdisk_here + PAGE_OFFSET; 337 initrd_start = ramdisk_here + PAGE_OFFSET;
339 initrd_end = initrd_start + ramdisk_size; 338 initrd_end = initrd_start + ramdisk_size;
340 printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", 339 printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
@@ -390,7 +389,7 @@ static void __init reserve_initrd(void)
390 initrd_start = 0; 389 initrd_start = 0;
391 390
392 if (ramdisk_size >= (end_of_lowmem>>1)) { 391 if (ramdisk_size >= (end_of_lowmem>>1)) {
393 free_early(ramdisk_image, ramdisk_end); 392 memblock_x86_free_range(ramdisk_image, ramdisk_end);
394 printk(KERN_ERR "initrd too large to handle, " 393 printk(KERN_ERR "initrd too large to handle, "
395 "disabling initrd\n"); 394 "disabling initrd\n");
396 return; 395 return;
@@ -413,7 +412,7 @@ static void __init reserve_initrd(void)
413 412
414 relocate_initrd(); 413 relocate_initrd();
415 414
416 free_early(ramdisk_image, ramdisk_end); 415 memblock_x86_free_range(ramdisk_image, ramdisk_end);
417} 416}
418#else 417#else
419static void __init reserve_initrd(void) 418static void __init reserve_initrd(void)
@@ -469,7 +468,7 @@ static void __init e820_reserve_setup_data(void)
469 e820_print_map("reserve setup_data"); 468 e820_print_map("reserve setup_data");
470} 469}
471 470
472static void __init reserve_early_setup_data(void) 471static void __init memblock_x86_reserve_range_setup_data(void)
473{ 472{
474 struct setup_data *data; 473 struct setup_data *data;
475 u64 pa_data; 474 u64 pa_data;
@@ -481,7 +480,7 @@ static void __init reserve_early_setup_data(void)
481 while (pa_data) { 480 while (pa_data) {
482 data = early_memremap(pa_data, sizeof(*data)); 481 data = early_memremap(pa_data, sizeof(*data));
483 sprintf(buf, "setup data %x", data->type); 482 sprintf(buf, "setup data %x", data->type);
484 reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); 483 memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf);
485 pa_data = data->next; 484 pa_data = data->next;
486 early_iounmap(data, sizeof(*data)); 485 early_iounmap(data, sizeof(*data));
487 } 486 }
@@ -502,6 +501,7 @@ static inline unsigned long long get_total_mem(void)
502 return total << PAGE_SHIFT; 501 return total << PAGE_SHIFT;
503} 502}
504 503
504#define DEFAULT_BZIMAGE_ADDR_MAX 0x37FFFFFF
505static void __init reserve_crashkernel(void) 505static void __init reserve_crashkernel(void)
506{ 506{
507 unsigned long long total_mem; 507 unsigned long long total_mem;
@@ -519,23 +519,27 @@ static void __init reserve_crashkernel(void)
519 if (crash_base <= 0) { 519 if (crash_base <= 0) {
520 const unsigned long long alignment = 16<<20; /* 16M */ 520 const unsigned long long alignment = 16<<20; /* 16M */
521 521
522 crash_base = find_e820_area(alignment, ULONG_MAX, crash_size, 522 /*
523 alignment); 523 * kexec want bzImage is below DEFAULT_BZIMAGE_ADDR_MAX
524 if (crash_base == -1ULL) { 524 */
525 crash_base = memblock_find_in_range(alignment,
526 DEFAULT_BZIMAGE_ADDR_MAX, crash_size, alignment);
527
528 if (crash_base == MEMBLOCK_ERROR) {
525 pr_info("crashkernel reservation failed - No suitable area found.\n"); 529 pr_info("crashkernel reservation failed - No suitable area found.\n");
526 return; 530 return;
527 } 531 }
528 } else { 532 } else {
529 unsigned long long start; 533 unsigned long long start;
530 534
531 start = find_e820_area(crash_base, ULONG_MAX, crash_size, 535 start = memblock_find_in_range(crash_base,
532 1<<20); 536 crash_base + crash_size, crash_size, 1<<20);
533 if (start != crash_base) { 537 if (start != crash_base) {
534 pr_info("crashkernel reservation failed - memory is in use.\n"); 538 pr_info("crashkernel reservation failed - memory is in use.\n");
535 return; 539 return;
536 } 540 }
537 } 541 }
538 reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL"); 542 memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL");
539 543
540 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " 544 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
541 "for crashkernel (System RAM: %ldMB)\n", 545 "for crashkernel (System RAM: %ldMB)\n",
@@ -615,82 +619,10 @@ static __init void reserve_ibft_region(void)
615 addr = find_ibft_region(&size); 619 addr = find_ibft_region(&size);
616 620
617 if (size) 621 if (size)
618 reserve_early_overlap_ok(addr, addr + size, "ibft"); 622 memblock_x86_reserve_range(addr, addr + size, "* ibft");
619}
620
621#ifdef CONFIG_X86_RESERVE_LOW_64K
622static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
623{
624 printk(KERN_NOTICE
625 "%s detected: BIOS may corrupt low RAM, working around it.\n",
626 d->ident);
627
628 e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
629 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
630
631 return 0;
632} 623}
633#endif
634 624
635/* List of systems that have known low memory corruption BIOS problems */ 625static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
636static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
637#ifdef CONFIG_X86_RESERVE_LOW_64K
638 {
639 .callback = dmi_low_memory_corruption,
640 .ident = "AMI BIOS",
641 .matches = {
642 DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
643 },
644 },
645 {
646 .callback = dmi_low_memory_corruption,
647 .ident = "Phoenix BIOS",
648 .matches = {
649 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
650 },
651 },
652 {
653 .callback = dmi_low_memory_corruption,
654 .ident = "Phoenix/MSC BIOS",
655 .matches = {
656 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
657 },
658 },
659 /*
660 * AMI BIOS with low memory corruption was found on Intel DG45ID and
661 * DG45FC boards.
662 * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
663 * match only DMI_BOARD_NAME and see if there is more bad products
664 * with this vendor.
665 */
666 {
667 .callback = dmi_low_memory_corruption,
668 .ident = "AMI BIOS",
669 .matches = {
670 DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
671 },
672 },
673 {
674 .callback = dmi_low_memory_corruption,
675 .ident = "AMI BIOS",
676 .matches = {
677 DMI_MATCH(DMI_BOARD_NAME, "DG45FC"),
678 },
679 },
680 /*
681 * The Dell Inspiron Mini 1012 has DMI_BIOS_VENDOR = "Dell Inc.", so
682 * match on the product name.
683 */
684 {
685 .callback = dmi_low_memory_corruption,
686 .ident = "Phoenix BIOS",
687 .matches = {
688 DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"),
689 },
690 },
691#endif
692 {}
693};
694 626
695static void __init trim_bios_range(void) 627static void __init trim_bios_range(void)
696{ 628{
@@ -698,8 +630,14 @@ static void __init trim_bios_range(void)
698 * A special case is the first 4Kb of memory; 630 * A special case is the first 4Kb of memory;
699 * This is a BIOS owned area, not kernel ram, but generally 631 * This is a BIOS owned area, not kernel ram, but generally
700 * not listed as such in the E820 table. 632 * not listed as such in the E820 table.
633 *
634 * This typically reserves additional memory (64KiB by default)
635 * since some BIOSes are known to corrupt low memory. See the
636 * Kconfig help text for X86_RESERVE_LOW.
701 */ 637 */
702 e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED); 638 e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE),
639 E820_RAM, E820_RESERVED);
640
703 /* 641 /*
704 * special case: Some BIOSen report the PC BIOS 642 * special case: Some BIOSen report the PC BIOS
705 * area (640->1Mb) as ram even though it is not. 643 * area (640->1Mb) as ram even though it is not.
@@ -709,6 +647,37 @@ static void __init trim_bios_range(void)
709 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 647 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
710} 648}
711 649
650static int __init parse_reservelow(char *p)
651{
652 unsigned long long size;
653
654 if (!p)
655 return -EINVAL;
656
657 size = memparse(p, &p);
658
659 if (size < 4096)
660 size = 4096;
661
662 if (size > 640*1024)
663 size = 640*1024;
664
665 reserve_low = size;
666
667 return 0;
668}
669
670early_param("reservelow", parse_reservelow);
671
672static u64 __init get_max_mapped(void)
673{
674 u64 end = max_pfn_mapped;
675
676 end <<= PAGE_SHIFT;
677
678 return end;
679}
680
712/* 681/*
713 * Determine if we were loaded by an EFI loader. If so, then we have also been 682 * Determine if we were loaded by an EFI loader. If so, then we have also been
714 * passed the efi memmap, systab, etc., so we should use these data structures 683 * passed the efi memmap, systab, etc., so we should use these data structures
@@ -726,6 +695,7 @@ void __init setup_arch(char **cmdline_p)
726{ 695{
727 int acpi = 0; 696 int acpi = 0;
728 int k8 = 0; 697 int k8 = 0;
698 unsigned long flags;
729 699
730#ifdef CONFIG_X86_32 700#ifdef CONFIG_X86_32
731 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 701 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
@@ -734,10 +704,10 @@ void __init setup_arch(char **cmdline_p)
734 printk(KERN_INFO "Command line: %s\n", boot_command_line); 704 printk(KERN_INFO "Command line: %s\n", boot_command_line);
735#endif 705#endif
736 706
737 /* VMI may relocate the fixmap; do this before touching ioremap area */ 707 /*
738 vmi_init(); 708 * If we have OLPC OFW, we might end up relocating the fixmap due to
739 709 * reserve_top(), so do this before touching the ioremap area.
740 /* OFW also may relocate the fixmap */ 710 */
741 olpc_ofw_detect(); 711 olpc_ofw_detect();
742 712
743 early_trap_init(); 713 early_trap_init();
@@ -782,7 +752,7 @@ void __init setup_arch(char **cmdline_p)
782#endif 752#endif
783 4)) { 753 4)) {
784 efi_enabled = 1; 754 efi_enabled = 1;
785 efi_reserve_early(); 755 efi_memblock_x86_reserve_range();
786 } 756 }
787#endif 757#endif
788 758
@@ -838,11 +808,8 @@ void __init setup_arch(char **cmdline_p)
838 808
839 x86_report_nx(); 809 x86_report_nx();
840 810
841 /* Must be before kernel pagetables are setup */
842 vmi_activate();
843
844 /* after early param, so could get panic from serial */ 811 /* after early param, so could get panic from serial */
845 reserve_early_setup_data(); 812 memblock_x86_reserve_range_setup_data();
846 813
847 if (acpi_mps_check()) { 814 if (acpi_mps_check()) {
848#ifdef CONFIG_X86_LOCAL_APIC 815#ifdef CONFIG_X86_LOCAL_APIC
@@ -863,8 +830,6 @@ void __init setup_arch(char **cmdline_p)
863 830
864 dmi_scan_machine(); 831 dmi_scan_machine();
865 832
866 dmi_check_system(bad_bios_dmi_table);
867
868 /* 833 /*
869 * VMware detection requires dmi to be available, so this 834 * VMware detection requires dmi to be available, so this
870 * needs to be done after dmi_scan_machine, for the BP. 835 * needs to be done after dmi_scan_machine, for the BP.
@@ -897,8 +862,6 @@ void __init setup_arch(char **cmdline_p)
897 */ 862 */
898 max_pfn = e820_end_of_ram_pfn(); 863 max_pfn = e820_end_of_ram_pfn();
899 864
900 /* preallocate 4k for mptable mpc */
901 early_reserve_e820_mpc_new();
902 /* update e820 for memory not covered by WB MTRRs */ 865 /* update e820 for memory not covered by WB MTRRs */
903 mtrr_bp_init(); 866 mtrr_bp_init();
904 if (mtrr_trim_uncached_memory(max_pfn)) 867 if (mtrr_trim_uncached_memory(max_pfn))
@@ -920,18 +883,8 @@ void __init setup_arch(char **cmdline_p)
920 max_low_pfn = max_pfn; 883 max_low_pfn = max_pfn;
921 884
922 high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; 885 high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
923 max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
924#endif 886#endif
925 887
926#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
927 setup_bios_corruption_check();
928#endif
929
930 printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
931 max_pfn_mapped<<PAGE_SHIFT);
932
933 reserve_brk();
934
935 /* 888 /*
936 * Find and reserve possible boot-time SMP configuration: 889 * Find and reserve possible boot-time SMP configuration:
937 */ 890 */
@@ -939,6 +892,26 @@ void __init setup_arch(char **cmdline_p)
939 892
940 reserve_ibft_region(); 893 reserve_ibft_region();
941 894
895 /*
896 * Need to conclude brk, before memblock_x86_fill()
897 * it could use memblock_find_in_range, could overlap with
898 * brk area.
899 */
900 reserve_brk();
901
902 memblock.current_limit = get_max_mapped();
903 memblock_x86_fill();
904
905 /* preallocate 4k for mptable mpc */
906 early_reserve_e820_mpc_new();
907
908#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
909 setup_bios_corruption_check();
910#endif
911
912 printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
913 max_pfn_mapped<<PAGE_SHIFT);
914
942 reserve_trampoline_memory(); 915 reserve_trampoline_memory();
943 916
944#ifdef CONFIG_ACPI_SLEEP 917#ifdef CONFIG_ACPI_SLEEP
@@ -962,6 +935,7 @@ void __init setup_arch(char **cmdline_p)
962 max_low_pfn = max_pfn; 935 max_low_pfn = max_pfn;
963 } 936 }
964#endif 937#endif
938 memblock.current_limit = get_max_mapped();
965 939
966 /* 940 /*
967 * NOTE: On x86-32, only from this point on, fixmaps are ready for use. 941 * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
@@ -1000,10 +974,7 @@ void __init setup_arch(char **cmdline_p)
1000#endif 974#endif
1001 975
1002 initmem_init(0, max_pfn, acpi, k8); 976 initmem_init(0, max_pfn, acpi, k8);
1003#ifndef CONFIG_NO_BOOTMEM 977 memblock_find_dma_reserve();
1004 early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
1005#endif
1006
1007 dma32_reserve_bootmem(); 978 dma32_reserve_bootmem();
1008 979
1009#ifdef CONFIG_KVM_CLOCK 980#ifdef CONFIG_KVM_CLOCK
@@ -1071,6 +1042,10 @@ void __init setup_arch(char **cmdline_p)
1071 x86_init.oem.banner(); 1042 x86_init.oem.banner();
1072 1043
1073 mcheck_init(); 1044 mcheck_init();
1045
1046 local_irq_save(flags);
1047 arch_init_ideal_nop5();
1048 local_irq_restore(flags);
1074} 1049}
1075 1050
1076#ifdef CONFIG_X86_32 1051#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index a60df9ae6454..002b79685f73 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -131,13 +131,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
131 131
132static void __init pcpu_fc_free(void *ptr, size_t size) 132static void __init pcpu_fc_free(void *ptr, size_t size)
133{ 133{
134#ifdef CONFIG_NO_BOOTMEM
135 u64 start = __pa(ptr);
136 u64 end = start + size;
137 free_early_partial(start, end);
138#else
139 free_bootmem(__pa(ptr), size); 134 free_bootmem(__pa(ptr), size);
140#endif
141} 135}
142 136
143static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) 137static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
@@ -253,7 +247,7 @@ void __init setup_per_cpu_areas(void)
253 * Up to this point, the boot CPU has been using .init.data 247 * Up to this point, the boot CPU has been using .init.data
254 * area. Reload any changed state for the boot CPU. 248 * area. Reload any changed state for the boot CPU.
255 */ 249 */
256 if (cpu == boot_cpu_id) 250 if (!cpu)
257 switch_to_new_gdt(cpu); 251 switch_to_new_gdt(cpu);
258 } 252 }
259 253
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c
index cb22acf3ed09..dd4c281ffe57 100644
--- a/arch/x86/kernel/sfi.c
+++ b/arch/x86/kernel/sfi.c
@@ -34,7 +34,7 @@
34#ifdef CONFIG_X86_LOCAL_APIC 34#ifdef CONFIG_X86_LOCAL_APIC
35static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; 35static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
36 36
37void __init mp_sfi_register_lapic_address(unsigned long address) 37static void __init mp_sfi_register_lapic_address(unsigned long address)
38{ 38{
39 mp_lapic_addr = address; 39 mp_lapic_addr = address;
40 40
@@ -46,7 +46,7 @@ void __init mp_sfi_register_lapic_address(unsigned long address)
46} 46}
47 47
48/* All CPUs enumerated by SFI must be present and enabled */ 48/* All CPUs enumerated by SFI must be present and enabled */
49void __cpuinit mp_sfi_register_lapic(u8 id) 49static void __cpuinit mp_sfi_register_lapic(u8 id)
50{ 50{
51 if (MAX_APICS - id <= 0) { 51 if (MAX_APICS - id <= 0) {
52 pr_warning("Processor #%d invalid (max %d)\n", 52 pr_warning("Processor #%d invalid (max %d)\n",
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8b3bfc4dd708..dfb50890b5b7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,7 +62,7 @@
62#include <asm/pgtable.h> 62#include <asm/pgtable.h>
63#include <asm/tlbflush.h> 63#include <asm/tlbflush.h>
64#include <asm/mtrr.h> 64#include <asm/mtrr.h>
65#include <asm/vmi.h> 65#include <asm/mwait.h>
66#include <asm/apic.h> 66#include <asm/apic.h>
67#include <asm/setup.h> 67#include <asm/setup.h>
68#include <asm/uv/uv.h> 68#include <asm/uv/uv.h>
@@ -311,7 +311,6 @@ notrace static void __cpuinit start_secondary(void *unused)
311 __flush_tlb_all(); 311 __flush_tlb_all();
312#endif 312#endif
313 313
314 vmi_bringup();
315 cpu_init(); 314 cpu_init();
316 preempt_disable(); 315 preempt_disable();
317 smp_callin(); 316 smp_callin();
@@ -324,9 +323,9 @@ notrace static void __cpuinit start_secondary(void *unused)
324 check_tsc_sync_target(); 323 check_tsc_sync_target();
325 324
326 if (nmi_watchdog == NMI_IO_APIC) { 325 if (nmi_watchdog == NMI_IO_APIC) {
327 legacy_pic->chip->mask(0); 326 legacy_pic->mask(0);
328 enable_NMI_through_LVT0(); 327 enable_NMI_through_LVT0();
329 legacy_pic->chip->unmask(0); 328 legacy_pic->unmask(0);
330 } 329 }
331 330
332 /* This must be done before setting cpu_online_mask */ 331 /* This must be done before setting cpu_online_mask */
@@ -397,6 +396,19 @@ void __cpuinit smp_store_cpu_info(int id)
397 identify_secondary_cpu(c); 396 identify_secondary_cpu(c);
398} 397}
399 398
399static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
400{
401 struct cpuinfo_x86 *c1 = &cpu_data(cpu1);
402 struct cpuinfo_x86 *c2 = &cpu_data(cpu2);
403
404 cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
405 cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
406 cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
407 cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
408 cpumask_set_cpu(cpu1, c2->llc_shared_map);
409 cpumask_set_cpu(cpu2, c1->llc_shared_map);
410}
411
400 412
401void __cpuinit set_cpu_sibling_map(int cpu) 413void __cpuinit set_cpu_sibling_map(int cpu)
402{ 414{
@@ -409,14 +421,13 @@ void __cpuinit set_cpu_sibling_map(int cpu)
409 for_each_cpu(i, cpu_sibling_setup_mask) { 421 for_each_cpu(i, cpu_sibling_setup_mask) {
410 struct cpuinfo_x86 *o = &cpu_data(i); 422 struct cpuinfo_x86 *o = &cpu_data(i);
411 423
412 if (c->phys_proc_id == o->phys_proc_id && 424 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
413 c->cpu_core_id == o->cpu_core_id) { 425 if (c->phys_proc_id == o->phys_proc_id &&
414 cpumask_set_cpu(i, cpu_sibling_mask(cpu)); 426 c->compute_unit_id == o->compute_unit_id)
415 cpumask_set_cpu(cpu, cpu_sibling_mask(i)); 427 link_thread_siblings(cpu, i);
416 cpumask_set_cpu(i, cpu_core_mask(cpu)); 428 } else if (c->phys_proc_id == o->phys_proc_id &&
417 cpumask_set_cpu(cpu, cpu_core_mask(i)); 429 c->cpu_core_id == o->cpu_core_id) {
418 cpumask_set_cpu(i, c->llc_shared_map); 430 link_thread_siblings(cpu, i);
419 cpumask_set_cpu(cpu, o->llc_shared_map);
420 } 431 }
421 } 432 }
422 } else { 433 } else {
@@ -1109,8 +1120,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1109 } 1120 }
1110 set_cpu_sibling_map(0); 1121 set_cpu_sibling_map(0);
1111 1122
1112 enable_IR_x2apic();
1113 default_setup_apic_routing();
1114 1123
1115 if (smp_sanity_check(max_cpus) < 0) { 1124 if (smp_sanity_check(max_cpus) < 0) {
1116 printk(KERN_INFO "SMP disabled\n"); 1125 printk(KERN_INFO "SMP disabled\n");
@@ -1118,6 +1127,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1118 goto out; 1127 goto out;
1119 } 1128 }
1120 1129
1130 default_setup_apic_routing();
1131
1121 preempt_disable(); 1132 preempt_disable();
1122 if (read_apic_id() != boot_cpu_physical_apicid) { 1133 if (read_apic_id() != boot_cpu_physical_apicid) {
1123 panic("Boot APIC ID in local APIC unexpected (%d vs %d)", 1134 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
@@ -1383,11 +1394,88 @@ void play_dead_common(void)
1383 local_irq_disable(); 1394 local_irq_disable();
1384} 1395}
1385 1396
1397/*
1398 * We need to flush the caches before going to sleep, lest we have
1399 * dirty data in our caches when we come back up.
1400 */
1401static inline void mwait_play_dead(void)
1402{
1403 unsigned int eax, ebx, ecx, edx;
1404 unsigned int highest_cstate = 0;
1405 unsigned int highest_subcstate = 0;
1406 int i;
1407 void *mwait_ptr;
1408
1409 if (!cpu_has(&current_cpu_data, X86_FEATURE_MWAIT))
1410 return;
1411 if (!cpu_has(&current_cpu_data, X86_FEATURE_CLFLSH))
1412 return;
1413 if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1414 return;
1415
1416 eax = CPUID_MWAIT_LEAF;
1417 ecx = 0;
1418 native_cpuid(&eax, &ebx, &ecx, &edx);
1419
1420 /*
1421 * eax will be 0 if EDX enumeration is not valid.
1422 * Initialized below to cstate, sub_cstate value when EDX is valid.
1423 */
1424 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
1425 eax = 0;
1426 } else {
1427 edx >>= MWAIT_SUBSTATE_SIZE;
1428 for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
1429 if (edx & MWAIT_SUBSTATE_MASK) {
1430 highest_cstate = i;
1431 highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
1432 }
1433 }
1434 eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
1435 (highest_subcstate - 1);
1436 }
1437
1438 /*
1439 * This should be a memory location in a cache line which is
1440 * unlikely to be touched by other processors. The actual
1441 * content is immaterial as it is not actually modified in any way.
1442 */
1443 mwait_ptr = &current_thread_info()->flags;
1444
1445 wbinvd();
1446
1447 while (1) {
1448 /*
1449 * The CLFLUSH is a workaround for erratum AAI65 for
1450 * the Xeon 7400 series. It's not clear it is actually
1451 * needed, but it should be harmless in either case.
1452 * The WBINVD is insufficient due to the spurious-wakeup
1453 * case where we return around the loop.
1454 */
1455 clflush(mwait_ptr);
1456 __monitor(mwait_ptr, 0, 0);
1457 mb();
1458 __mwait(eax, 0);
1459 }
1460}
1461
1462static inline void hlt_play_dead(void)
1463{
1464 if (current_cpu_data.x86 >= 4)
1465 wbinvd();
1466
1467 while (1) {
1468 native_halt();
1469 }
1470}
1471
1386void native_play_dead(void) 1472void native_play_dead(void)
1387{ 1473{
1388 play_dead_common(); 1474 play_dead_common();
1389 tboot_shutdown(TB_SHUTDOWN_WFS); 1475 tboot_shutdown(TB_SHUTDOWN_WFS);
1390 wbinvd_halt(); 1476
1477 mwait_play_dead(); /* Only returns on failure */
1478 hlt_play_dead();
1391} 1479}
1392 1480
1393#else /* ... !CONFIG_HOTPLUG_CPU */ 1481#else /* ... !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index d5e06624e34a..0b0cb5fede19 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -33,8 +33,8 @@ int kernel_execve(const char *filename,
33 const char *const envp[]) 33 const char *const envp[])
34{ 34{
35 long __res; 35 long __res;
36 asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" 36 asm volatile ("int $0x80"
37 : "=a" (__res) 37 : "=a" (__res)
38 : "0" (__NR_execve), "ri" (filename), "c" (argv), "d" (envp) : "memory"); 38 : "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory");
39 return __res; 39 return __res;
40} 40}
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 312ef0292815..50ac949c7f1c 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -1285,6 +1285,7 @@ static const struct file_operations tunables_fops = {
1285 .open = tunables_open, 1285 .open = tunables_open,
1286 .read = tunables_read, 1286 .read = tunables_read,
1287 .write = tunables_write, 1287 .write = tunables_write,
1288 .llseek = default_llseek,
1288}; 1289};
1289 1290
1290static int __init uv_ptc_init(void) 1291static int __init uv_ptc_init(void)
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index e2a595257390..4c3da5674e67 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -1,8 +1,8 @@
1#include <linux/io.h> 1#include <linux/io.h>
2#include <linux/memblock.h>
2 3
3#include <asm/trampoline.h> 4#include <asm/trampoline.h>
4#include <asm/pgtable.h> 5#include <asm/pgtable.h>
5#include <asm/e820.h>
6 6
7#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) 7#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP)
8#define __trampinit 8#define __trampinit
@@ -17,15 +17,15 @@ unsigned char *__trampinitdata trampoline_base;
17 17
18void __init reserve_trampoline_memory(void) 18void __init reserve_trampoline_memory(void)
19{ 19{
20 unsigned long mem; 20 phys_addr_t mem;
21 21
22 /* Has to be in very low memory so we can execute real-mode AP code. */ 22 /* Has to be in very low memory so we can execute real-mode AP code. */
23 mem = find_e820_area(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE); 23 mem = memblock_find_in_range(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE);
24 if (mem == -1L) 24 if (mem == MEMBLOCK_ERROR)
25 panic("Cannot allocate trampoline\n"); 25 panic("Cannot allocate trampoline\n");
26 26
27 trampoline_base = __va(mem); 27 trampoline_base = __va(mem);
28 reserve_early(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE"); 28 memblock_x86_reserve_range(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE");
29} 29}
30 30
31/* 31/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 60788dee0f8a..d43968503dd2 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -776,21 +776,10 @@ asmlinkage void math_state_restore(void)
776} 776}
777EXPORT_SYMBOL_GPL(math_state_restore); 777EXPORT_SYMBOL_GPL(math_state_restore);
778 778
779#ifndef CONFIG_MATH_EMULATION
780void math_emulate(struct math_emu_info *info)
781{
782 printk(KERN_EMERG
783 "math-emulation not enabled and no coprocessor found.\n");
784 printk(KERN_EMERG "killing %s.\n", current->comm);
785 force_sig(SIGFPE, current);
786 schedule();
787}
788#endif /* CONFIG_MATH_EMULATION */
789
790dotraplinkage void __kprobes 779dotraplinkage void __kprobes
791do_device_not_available(struct pt_regs *regs, long error_code) 780do_device_not_available(struct pt_regs *regs, long error_code)
792{ 781{
793#ifdef CONFIG_X86_32 782#ifdef CONFIG_MATH_EMULATION
794 if (read_cr0() & X86_CR0_EM) { 783 if (read_cr0() & X86_CR0_EM) {
795 struct math_emu_info info = { }; 784 struct math_emu_info info = { };
796 785
@@ -798,12 +787,12 @@ do_device_not_available(struct pt_regs *regs, long error_code)
798 787
799 info.regs = regs; 788 info.regs = regs;
800 math_emulate(&info); 789 math_emulate(&info);
801 } else { 790 return;
802 math_state_restore(); /* interrupts still off */
803 conditional_sti(regs);
804 } 791 }
805#else 792#endif
806 math_state_restore(); 793 math_state_restore(); /* interrupts still off */
794#ifdef CONFIG_X86_32
795 conditional_sti(regs);
807#endif 796#endif
808} 797}
809 798
@@ -881,18 +870,6 @@ void __init trap_init(void)
881#endif 870#endif
882 871
883#ifdef CONFIG_X86_32 872#ifdef CONFIG_X86_32
884 if (cpu_has_fxsr) {
885 printk(KERN_INFO "Enabling fast FPU save and restore... ");
886 set_in_cr4(X86_CR4_OSFXSR);
887 printk("done.\n");
888 }
889 if (cpu_has_xmm) {
890 printk(KERN_INFO
891 "Enabling unmasked SIMD FPU exception support... ");
892 set_in_cr4(X86_CR4_OSXMMEXCPT);
893 printk("done.\n");
894 }
895
896 set_system_trap_gate(SYSCALL_VECTOR, &system_call); 873 set_system_trap_gate(SYSCALL_VECTOR, &system_call);
897 set_bit(SYSCALL_VECTOR, used_vectors); 874 set_bit(SYSCALL_VECTOR, used_vectors);
898#endif 875#endif
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 26a863a9c2a8..0c40d8b72416 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -104,10 +104,14 @@ int __init notsc_setup(char *str)
104 104
105__setup("notsc", notsc_setup); 105__setup("notsc", notsc_setup);
106 106
107static int no_sched_irq_time;
108
107static int __init tsc_setup(char *str) 109static int __init tsc_setup(char *str)
108{ 110{
109 if (!strcmp(str, "reliable")) 111 if (!strcmp(str, "reliable"))
110 tsc_clocksource_reliable = 1; 112 tsc_clocksource_reliable = 1;
113 if (!strncmp(str, "noirqtime", 9))
114 no_sched_irq_time = 1;
111 return 1; 115 return 1;
112} 116}
113 117
@@ -801,6 +805,7 @@ void mark_tsc_unstable(char *reason)
801 if (!tsc_unstable) { 805 if (!tsc_unstable) {
802 tsc_unstable = 1; 806 tsc_unstable = 1;
803 sched_clock_stable = 0; 807 sched_clock_stable = 0;
808 disable_sched_clock_irqtime();
804 printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); 809 printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
805 /* Change only the rating, when not registered */ 810 /* Change only the rating, when not registered */
806 if (clocksource_tsc.mult) 811 if (clocksource_tsc.mult)
@@ -892,60 +897,6 @@ static void __init init_tsc_clocksource(void)
892 clocksource_register_khz(&clocksource_tsc, tsc_khz); 897 clocksource_register_khz(&clocksource_tsc, tsc_khz);
893} 898}
894 899
895#ifdef CONFIG_X86_64
896/*
897 * calibrate_cpu is used on systems with fixed rate TSCs to determine
898 * processor frequency
899 */
900#define TICK_COUNT 100000000
901static unsigned long __init calibrate_cpu(void)
902{
903 int tsc_start, tsc_now;
904 int i, no_ctr_free;
905 unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
906 unsigned long flags;
907
908 for (i = 0; i < 4; i++)
909 if (avail_to_resrv_perfctr_nmi_bit(i))
910 break;
911 no_ctr_free = (i == 4);
912 if (no_ctr_free) {
913 WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... "
914 "cpu_khz value may be incorrect.\n");
915 i = 3;
916 rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
917 wrmsrl(MSR_K7_EVNTSEL3, 0);
918 rdmsrl(MSR_K7_PERFCTR3, pmc3);
919 } else {
920 reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
921 reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
922 }
923 local_irq_save(flags);
924 /* start measuring cycles, incrementing from 0 */
925 wrmsrl(MSR_K7_PERFCTR0 + i, 0);
926 wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
927 rdtscl(tsc_start);
928 do {
929 rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
930 tsc_now = get_cycles();
931 } while ((tsc_now - tsc_start) < TICK_COUNT);
932
933 local_irq_restore(flags);
934 if (no_ctr_free) {
935 wrmsrl(MSR_K7_EVNTSEL3, 0);
936 wrmsrl(MSR_K7_PERFCTR3, pmc3);
937 wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
938 } else {
939 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
940 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
941 }
942
943 return pmc_now * tsc_khz / (tsc_now - tsc_start);
944}
945#else
946static inline unsigned long calibrate_cpu(void) { return cpu_khz; }
947#endif
948
949void __init tsc_init(void) 900void __init tsc_init(void)
950{ 901{
951 u64 lpj; 902 u64 lpj;
@@ -964,10 +915,6 @@ void __init tsc_init(void)
964 return; 915 return;
965 } 916 }
966 917
967 if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
968 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
969 cpu_khz = calibrate_cpu();
970
971 printk("Detected %lu.%03lu MHz processor.\n", 918 printk("Detected %lu.%03lu MHz processor.\n",
972 (unsigned long)cpu_khz / 1000, 919 (unsigned long)cpu_khz / 1000,
973 (unsigned long)cpu_khz % 1000); 920 (unsigned long)cpu_khz % 1000);
@@ -987,6 +934,9 @@ void __init tsc_init(void)
987 /* now allow native_sched_clock() to use rdtsc */ 934 /* now allow native_sched_clock() to use rdtsc */
988 tsc_disabled = 0; 935 tsc_disabled = 0;
989 936
937 if (!no_sched_irq_time)
938 enable_sched_clock_irqtime();
939
990 lpj = ((u64)tsc_khz * 1000); 940 lpj = ((u64)tsc_khz * 1000);
991 do_div(lpj, HZ); 941 do_div(lpj, HZ);
992 lpj_fine = lpj; 942 lpj_fine = lpj;
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index 1132129db792..7b24460917d5 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -28,34 +28,21 @@ struct uv_irq_2_mmr_pnode{
28static spinlock_t uv_irq_lock; 28static spinlock_t uv_irq_lock;
29static struct rb_root uv_irq_root; 29static struct rb_root uv_irq_root;
30 30
31static int uv_set_irq_affinity(unsigned int, const struct cpumask *); 31static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool);
32 32
33static void uv_noop(unsigned int irq) 33static void uv_noop(struct irq_data *data) { }
34{
35}
36
37static unsigned int uv_noop_ret(unsigned int irq)
38{
39 return 0;
40}
41 34
42static void uv_ack_apic(unsigned int irq) 35static void uv_ack_apic(struct irq_data *data)
43{ 36{
44 ack_APIC_irq(); 37 ack_APIC_irq();
45} 38}
46 39
47static struct irq_chip uv_irq_chip = { 40static struct irq_chip uv_irq_chip = {
48 .name = "UV-CORE", 41 .name = "UV-CORE",
49 .startup = uv_noop_ret, 42 .irq_mask = uv_noop,
50 .shutdown = uv_noop, 43 .irq_unmask = uv_noop,
51 .enable = uv_noop, 44 .irq_eoi = uv_ack_apic,
52 .disable = uv_noop, 45 .irq_set_affinity = uv_set_irq_affinity,
53 .ack = uv_noop,
54 .mask = uv_noop,
55 .unmask = uv_noop,
56 .eoi = uv_ack_apic,
57 .end = uv_noop,
58 .set_affinity = uv_set_irq_affinity,
59}; 46};
60 47
61/* 48/*
@@ -144,26 +131,22 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
144 unsigned long mmr_offset, int limit) 131 unsigned long mmr_offset, int limit)
145{ 132{
146 const struct cpumask *eligible_cpu = cpumask_of(cpu); 133 const struct cpumask *eligible_cpu = cpumask_of(cpu);
147 struct irq_desc *desc = irq_to_desc(irq); 134 struct irq_cfg *cfg = get_irq_chip_data(irq);
148 struct irq_cfg *cfg;
149 int mmr_pnode;
150 unsigned long mmr_value; 135 unsigned long mmr_value;
151 struct uv_IO_APIC_route_entry *entry; 136 struct uv_IO_APIC_route_entry *entry;
152 int err; 137 int mmr_pnode, err;
153 138
154 BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != 139 BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
155 sizeof(unsigned long)); 140 sizeof(unsigned long));
156 141
157 cfg = irq_cfg(irq);
158
159 err = assign_irq_vector(irq, cfg, eligible_cpu); 142 err = assign_irq_vector(irq, cfg, eligible_cpu);
160 if (err != 0) 143 if (err != 0)
161 return err; 144 return err;
162 145
163 if (limit == UV_AFFINITY_CPU) 146 if (limit == UV_AFFINITY_CPU)
164 desc->status |= IRQ_NO_BALANCING; 147 irq_set_status_flags(irq, IRQ_NO_BALANCING);
165 else 148 else
166 desc->status |= IRQ_MOVE_PCNTXT; 149 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
167 150
168 set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, 151 set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
169 irq_name); 152 irq_name);
@@ -206,17 +189,17 @@ static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
206 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 189 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
207} 190}
208 191
209static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) 192static int
193uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
194 bool force)
210{ 195{
211 struct irq_desc *desc = irq_to_desc(irq); 196 struct irq_cfg *cfg = data->chip_data;
212 struct irq_cfg *cfg = desc->chip_data;
213 unsigned int dest; 197 unsigned int dest;
214 unsigned long mmr_value; 198 unsigned long mmr_value, mmr_offset;
215 struct uv_IO_APIC_route_entry *entry; 199 struct uv_IO_APIC_route_entry *entry;
216 unsigned long mmr_offset;
217 int mmr_pnode; 200 int mmr_pnode;
218 201
219 if (set_desc_affinity(desc, mask, &dest)) 202 if (__ioapic_set_affinity(data, mask, &dest))
220 return -1; 203 return -1;
221 204
222 mmr_value = 0; 205 mmr_value = 0;
@@ -231,7 +214,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
231 entry->dest = dest; 214 entry->dest = dest;
232 215
233 /* Get previously stored MMR and pnode of hub sourcing interrupts */ 216 /* Get previously stored MMR and pnode of hub sourcing interrupts */
234 if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) 217 if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode))
235 return -1; 218 return -1;
236 219
237 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 220 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index e680ea52db9b..3371bd053b89 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -66,10 +66,7 @@ static void __init visws_time_init(void)
66} 66}
67 67
68/* Replaces the default init_ISA_irqs in the generic setup */ 68/* Replaces the default init_ISA_irqs in the generic setup */
69static void __init visws_pre_intr_init(void) 69static void __init visws_pre_intr_init(void);
70{
71 init_VISWS_APIC_irqs();
72}
73 70
74/* Quirk for machine specific memory setup. */ 71/* Quirk for machine specific memory setup. */
75 72
@@ -429,67 +426,34 @@ static int is_co_apic(unsigned int irq)
429/* 426/*
430 * This is the SGI Cobalt (IO-)APIC: 427 * This is the SGI Cobalt (IO-)APIC:
431 */ 428 */
432 429static void enable_cobalt_irq(struct irq_data *data)
433static void enable_cobalt_irq(unsigned int irq)
434{ 430{
435 co_apic_set(is_co_apic(irq), irq); 431 co_apic_set(is_co_apic(data->irq), data->irq);
436} 432}
437 433
438static void disable_cobalt_irq(unsigned int irq) 434static void disable_cobalt_irq(struct irq_data *data)
439{ 435{
440 int entry = is_co_apic(irq); 436 int entry = is_co_apic(data->irq);
441 437
442 co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK); 438 co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
443 co_apic_read(CO_APIC_LO(entry)); 439 co_apic_read(CO_APIC_LO(entry));
444} 440}
445 441
446/* 442static void ack_cobalt_irq(struct irq_data *data)
447 * "irq" really just serves to identify the device. Here is where we
448 * map this to the Cobalt APIC entry where it's physically wired.
449 * This is called via request_irq -> setup_irq -> irq_desc->startup()
450 */
451static unsigned int startup_cobalt_irq(unsigned int irq)
452{ 443{
453 unsigned long flags; 444 unsigned long flags;
454 struct irq_desc *desc = irq_to_desc(irq);
455 445
456 spin_lock_irqsave(&cobalt_lock, flags); 446 spin_lock_irqsave(&cobalt_lock, flags);
457 if ((desc->status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) 447 disable_cobalt_irq(data);
458 desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
459 enable_cobalt_irq(irq);
460 spin_unlock_irqrestore(&cobalt_lock, flags);
461 return 0;
462}
463
464static void ack_cobalt_irq(unsigned int irq)
465{
466 unsigned long flags;
467
468 spin_lock_irqsave(&cobalt_lock, flags);
469 disable_cobalt_irq(irq);
470 apic_write(APIC_EOI, APIC_EIO_ACK); 448 apic_write(APIC_EOI, APIC_EIO_ACK);
471 spin_unlock_irqrestore(&cobalt_lock, flags); 449 spin_unlock_irqrestore(&cobalt_lock, flags);
472} 450}
473 451
474static void end_cobalt_irq(unsigned int irq)
475{
476 unsigned long flags;
477 struct irq_desc *desc = irq_to_desc(irq);
478
479 spin_lock_irqsave(&cobalt_lock, flags);
480 if (!(desc->status & (IRQ_DISABLED | IRQ_INPROGRESS)))
481 enable_cobalt_irq(irq);
482 spin_unlock_irqrestore(&cobalt_lock, flags);
483}
484
485static struct irq_chip cobalt_irq_type = { 452static struct irq_chip cobalt_irq_type = {
486 .name = "Cobalt-APIC", 453 .name = "Cobalt-APIC",
487 .startup = startup_cobalt_irq, 454 .irq_enable = enable_cobalt_irq,
488 .shutdown = disable_cobalt_irq, 455 .irq_disable = disable_cobalt_irq,
489 .enable = enable_cobalt_irq, 456 .irq_ack = ack_cobalt_irq,
490 .disable = disable_cobalt_irq,
491 .ack = ack_cobalt_irq,
492 .end = end_cobalt_irq,
493}; 457};
494 458
495 459
@@ -503,35 +467,34 @@ static struct irq_chip cobalt_irq_type = {
503 * interrupt controller type, and through a special virtual interrupt- 467 * interrupt controller type, and through a special virtual interrupt-
504 * controller. Device drivers only see the virtual interrupt sources. 468 * controller. Device drivers only see the virtual interrupt sources.
505 */ 469 */
506static unsigned int startup_piix4_master_irq(unsigned int irq) 470static unsigned int startup_piix4_master_irq(struct irq_data *data)
507{ 471{
508 legacy_pic->init(0); 472 legacy_pic->init(0);
509 473 enable_cobalt_irq(data);
510 return startup_cobalt_irq(irq);
511} 474}
512 475
513static void end_piix4_master_irq(unsigned int irq) 476static void end_piix4_master_irq(struct irq_data *data)
514{ 477{
515 unsigned long flags; 478 unsigned long flags;
516 479
517 spin_lock_irqsave(&cobalt_lock, flags); 480 spin_lock_irqsave(&cobalt_lock, flags);
518 enable_cobalt_irq(irq); 481 enable_cobalt_irq(data);
519 spin_unlock_irqrestore(&cobalt_lock, flags); 482 spin_unlock_irqrestore(&cobalt_lock, flags);
520} 483}
521 484
522static struct irq_chip piix4_master_irq_type = { 485static struct irq_chip piix4_master_irq_type = {
523 .name = "PIIX4-master", 486 .name = "PIIX4-master",
524 .startup = startup_piix4_master_irq, 487 .irq_startup = startup_piix4_master_irq,
525 .ack = ack_cobalt_irq, 488 .irq_ack = ack_cobalt_irq,
526 .end = end_piix4_master_irq,
527}; 489};
528 490
491static void pii4_mask(struct irq_data *data) { }
529 492
530static struct irq_chip piix4_virtual_irq_type = { 493static struct irq_chip piix4_virtual_irq_type = {
531 .name = "PIIX4-virtual", 494 .name = "PIIX4-virtual",
495 .mask = pii4_mask,
532}; 496};
533 497
534
535/* 498/*
536 * PIIX4-8259 master/virtual functions to handle interrupt requests 499 * PIIX4-8259 master/virtual functions to handle interrupt requests
537 * from legacy devices: floppy, parallel, serial, rtc. 500 * from legacy devices: floppy, parallel, serial, rtc.
@@ -549,9 +512,8 @@ static struct irq_chip piix4_virtual_irq_type = {
549 */ 512 */
550static irqreturn_t piix4_master_intr(int irq, void *dev_id) 513static irqreturn_t piix4_master_intr(int irq, void *dev_id)
551{ 514{
552 int realirq;
553 struct irq_desc *desc;
554 unsigned long flags; 515 unsigned long flags;
516 int realirq;
555 517
556 raw_spin_lock_irqsave(&i8259A_lock, flags); 518 raw_spin_lock_irqsave(&i8259A_lock, flags);
557 519
@@ -592,18 +554,10 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
592 554
593 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 555 raw_spin_unlock_irqrestore(&i8259A_lock, flags);
594 556
595 desc = irq_to_desc(realirq);
596
597 /* 557 /*
598 * handle this 'virtual interrupt' as a Cobalt one now. 558 * handle this 'virtual interrupt' as a Cobalt one now.
599 */ 559 */
600 kstat_incr_irqs_this_cpu(realirq, desc); 560 generic_handle_irq(realirq);
601
602 if (likely(desc->action != NULL))
603 handle_IRQ_event(realirq, desc->action);
604
605 if (!(desc->status & IRQ_DISABLED))
606 legacy_pic->chip->unmask(realirq);
607 561
608 return IRQ_HANDLED; 562 return IRQ_HANDLED;
609 563
@@ -624,41 +578,35 @@ static struct irqaction cascade_action = {
624 578
625static inline void set_piix4_virtual_irq_type(void) 579static inline void set_piix4_virtual_irq_type(void)
626{ 580{
627 piix4_virtual_irq_type.shutdown = i8259A_chip.mask;
628 piix4_virtual_irq_type.enable = i8259A_chip.unmask; 581 piix4_virtual_irq_type.enable = i8259A_chip.unmask;
629 piix4_virtual_irq_type.disable = i8259A_chip.mask; 582 piix4_virtual_irq_type.disable = i8259A_chip.mask;
583 piix4_virtual_irq_type.unmask = i8259A_chip.unmask;
630} 584}
631 585
632void init_VISWS_APIC_irqs(void) 586static void __init visws_pre_intr_init(void)
633{ 587{
634 int i; 588 int i;
635 589
636 for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { 590 set_piix4_virtual_irq_type();
637 struct irq_desc *desc = irq_to_desc(i);
638
639 desc->status = IRQ_DISABLED;
640 desc->action = 0;
641 desc->depth = 1;
642 591
643 if (i == 0) { 592 for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
644 desc->chip = &cobalt_irq_type; 593 struct irq_chip *chip = NULL;
645 } 594
646 else if (i == CO_IRQ_IDE0) { 595 if (i == 0)
647 desc->chip = &cobalt_irq_type; 596 chip = &cobalt_irq_type;
648 } 597 else if (i == CO_IRQ_IDE0)
649 else if (i == CO_IRQ_IDE1) { 598 chip = &cobalt_irq_type;
650 desc->chip = &cobalt_irq_type; 599 else if (i == CO_IRQ_IDE1)
651 } 600 >chip = &cobalt_irq_type;
652 else if (i == CO_IRQ_8259) { 601 else if (i == CO_IRQ_8259)
653 desc->chip = &piix4_master_irq_type; 602 chip = &piix4_master_irq_type;
654 } 603 else if (i < CO_IRQ_APIC0)
655 else if (i < CO_IRQ_APIC0) { 604 chip = &piix4_virtual_irq_type;
656 set_piix4_virtual_irq_type(); 605 else if (IS_CO_APIC(i))
657 desc->chip = &piix4_virtual_irq_type; 606 chip = &cobalt_irq_type;
658 } 607
659 else if (IS_CO_APIC(i)) { 608 if (chip)
660 desc->chip = &cobalt_irq_type; 609 set_irq_chip(i, chip);
661 }
662 } 610 }
663 611
664 setup_irq(CO_IRQ_8259, &master_action); 612 setup_irq(CO_IRQ_8259, &master_action);
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
deleted file mode 100644
index ce9fbacb7526..000000000000
--- a/arch/x86/kernel/vmi_32.c
+++ /dev/null
@@ -1,893 +0,0 @@
1/*
2 * VMI specific paravirt-ops implementation
3 *
4 * Copyright (C) 2005, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Send feedback to zach@vmware.com
22 *
23 */
24
25#include <linux/module.h>
26#include <linux/cpu.h>
27#include <linux/bootmem.h>
28#include <linux/mm.h>
29#include <linux/highmem.h>
30#include <linux/sched.h>
31#include <linux/gfp.h>
32#include <asm/vmi.h>
33#include <asm/io.h>
34#include <asm/fixmap.h>
35#include <asm/apicdef.h>
36#include <asm/apic.h>
37#include <asm/pgalloc.h>
38#include <asm/processor.h>
39#include <asm/timer.h>
40#include <asm/vmi_time.h>
41#include <asm/kmap_types.h>
42#include <asm/setup.h>
43
44/* Convenient for calling VMI functions indirectly in the ROM */
45typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void);
46typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int);
47
48#define call_vrom_func(rom,func) \
49 (((VROMFUNC *)(rom->func))())
50
51#define call_vrom_long_func(rom,func,arg) \
52 (((VROMLONGFUNC *)(rom->func)) (arg))
53
54static struct vrom_header *vmi_rom;
55static int disable_pge;
56static int disable_pse;
57static int disable_sep;
58static int disable_tsc;
59static int disable_mtrr;
60static int disable_noidle;
61static int disable_vmi_timer;
62
63/* Cached VMI operations */
64static struct {
65 void (*cpuid)(void /* non-c */);
66 void (*_set_ldt)(u32 selector);
67 void (*set_tr)(u32 selector);
68 void (*write_idt_entry)(struct desc_struct *, int, u32, u32);
69 void (*write_gdt_entry)(struct desc_struct *, int, u32, u32);
70 void (*write_ldt_entry)(struct desc_struct *, int, u32, u32);
71 void (*set_kernel_stack)(u32 selector, u32 sp0);
72 void (*allocate_page)(u32, u32, u32, u32, u32);
73 void (*release_page)(u32, u32);
74 void (*set_pte)(pte_t, pte_t *, unsigned);
75 void (*update_pte)(pte_t *, unsigned);
76 void (*set_linear_mapping)(int, void *, u32, u32);
77 void (*_flush_tlb)(int);
78 void (*set_initial_ap_state)(int, int);
79 void (*halt)(void);
80 void (*set_lazy_mode)(int mode);
81} vmi_ops;
82
83/* Cached VMI operations */
84struct vmi_timer_ops vmi_timer_ops;
85
86/*
87 * VMI patching routines.
88 */
89#define MNEM_CALL 0xe8
90#define MNEM_JMP 0xe9
91#define MNEM_RET 0xc3
92
93#define IRQ_PATCH_INT_MASK 0
94#define IRQ_PATCH_DISABLE 5
95
96static inline void patch_offset(void *insnbuf,
97 unsigned long ip, unsigned long dest)
98{
99 *(unsigned long *)(insnbuf+1) = dest-ip-5;
100}
101
102static unsigned patch_internal(int call, unsigned len, void *insnbuf,
103 unsigned long ip)
104{
105 u64 reloc;
106 struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc;
107 reloc = call_vrom_long_func(vmi_rom, get_reloc, call);
108 switch(rel->type) {
109 case VMI_RELOCATION_CALL_REL:
110 BUG_ON(len < 5);
111 *(char *)insnbuf = MNEM_CALL;
112 patch_offset(insnbuf, ip, (unsigned long)rel->eip);
113 return 5;
114
115 case VMI_RELOCATION_JUMP_REL:
116 BUG_ON(len < 5);
117 *(char *)insnbuf = MNEM_JMP;
118 patch_offset(insnbuf, ip, (unsigned long)rel->eip);
119 return 5;
120
121 case VMI_RELOCATION_NOP:
122 /* obliterate the whole thing */
123 return 0;
124
125 case VMI_RELOCATION_NONE:
126 /* leave native code in place */
127 break;
128
129 default:
130 BUG();
131 }
132 return len;
133}
134
135/*
136 * Apply patch if appropriate, return length of new instruction
137 * sequence. The callee does nop padding for us.
138 */
139static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
140 unsigned long ip, unsigned len)
141{
142 switch (type) {
143 case PARAVIRT_PATCH(pv_irq_ops.irq_disable):
144 return patch_internal(VMI_CALL_DisableInterrupts, len,
145 insns, ip);
146 case PARAVIRT_PATCH(pv_irq_ops.irq_enable):
147 return patch_internal(VMI_CALL_EnableInterrupts, len,
148 insns, ip);
149 case PARAVIRT_PATCH(pv_irq_ops.restore_fl):
150 return patch_internal(VMI_CALL_SetInterruptMask, len,
151 insns, ip);
152 case PARAVIRT_PATCH(pv_irq_ops.save_fl):
153 return patch_internal(VMI_CALL_GetInterruptMask, len,
154 insns, ip);
155 case PARAVIRT_PATCH(pv_cpu_ops.iret):
156 return patch_internal(VMI_CALL_IRET, len, insns, ip);
157 case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
158 return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip);
159 default:
160 break;
161 }
162 return len;
163}
164
165/* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */
166static void vmi_cpuid(unsigned int *ax, unsigned int *bx,
167 unsigned int *cx, unsigned int *dx)
168{
169 int override = 0;
170 if (*ax == 1)
171 override = 1;
172 asm volatile ("call *%6"
173 : "=a" (*ax),
174 "=b" (*bx),
175 "=c" (*cx),
176 "=d" (*dx)
177 : "0" (*ax), "2" (*cx), "r" (vmi_ops.cpuid));
178 if (override) {
179 if (disable_pse)
180 *dx &= ~X86_FEATURE_PSE;
181 if (disable_pge)
182 *dx &= ~X86_FEATURE_PGE;
183 if (disable_sep)
184 *dx &= ~X86_FEATURE_SEP;
185 if (disable_tsc)
186 *dx &= ~X86_FEATURE_TSC;
187 if (disable_mtrr)
188 *dx &= ~X86_FEATURE_MTRR;
189 }
190}
191
192static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new)
193{
194 if (gdt[nr].a != new->a || gdt[nr].b != new->b)
195 write_gdt_entry(gdt, nr, new, 0);
196}
197
198static void vmi_load_tls(struct thread_struct *t, unsigned int cpu)
199{
200 struct desc_struct *gdt = get_cpu_gdt_table(cpu);
201 vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 0, &t->tls_array[0]);
202 vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 1, &t->tls_array[1]);
203 vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 2, &t->tls_array[2]);
204}
205
206static void vmi_set_ldt(const void *addr, unsigned entries)
207{
208 unsigned cpu = smp_processor_id();
209 struct desc_struct desc;
210
211 pack_descriptor(&desc, (unsigned long)addr,
212 entries * sizeof(struct desc_struct) - 1,
213 DESC_LDT, 0);
214 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, &desc, DESC_LDT);
215 vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0);
216}
217
218static void vmi_set_tr(void)
219{
220 vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct));
221}
222
223static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
224{
225 u32 *idt_entry = (u32 *)g;
226 vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[1]);
227}
228
229static void vmi_write_gdt_entry(struct desc_struct *dt, int entry,
230 const void *desc, int type)
231{
232 u32 *gdt_entry = (u32 *)desc;
233 vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[1]);
234}
235
236static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
237 const void *desc)
238{
239 u32 *ldt_entry = (u32 *)desc;
240 vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
241}
242
243static void vmi_load_sp0(struct tss_struct *tss,
244 struct thread_struct *thread)
245{
246 tss->x86_tss.sp0 = thread->sp0;
247
248 /* This can only happen when SEP is enabled, no need to test "SEP"arately */
249 if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
250 tss->x86_tss.ss1 = thread->sysenter_cs;
251 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
252 }
253 vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.sp0);
254}
255
256static void vmi_flush_tlb_user(void)
257{
258 vmi_ops._flush_tlb(VMI_FLUSH_TLB);
259}
260
261static void vmi_flush_tlb_kernel(void)
262{
263 vmi_ops._flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL);
264}
265
266/* Stub to do nothing at all; used for delays and unimplemented calls */
267static void vmi_nop(void)
268{
269}
270
271static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn)
272{
273 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
274}
275
276static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn)
277{
278 /*
279 * This call comes in very early, before mem_map is setup.
280 * It is called only for swapper_pg_dir, which already has
281 * data on it.
282 */
283 vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
284}
285
286static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count)
287{
288 vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
289}
290
291static void vmi_release_pte(unsigned long pfn)
292{
293 vmi_ops.release_page(pfn, VMI_PAGE_L1);
294}
295
296static void vmi_release_pmd(unsigned long pfn)
297{
298 vmi_ops.release_page(pfn, VMI_PAGE_L2);
299}
300
301/*
302 * We use the pgd_free hook for releasing the pgd page:
303 */
304static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd)
305{
306 unsigned long pfn = __pa(pgd) >> PAGE_SHIFT;
307
308 vmi_ops.release_page(pfn, VMI_PAGE_L2);
309}
310
311/*
312 * Helper macros for MMU update flags. We can defer updates until a flush
313 * or page invalidation only if the update is to the current address space
314 * (otherwise, there is no flush). We must check against init_mm, since
315 * this could be a kernel update, which usually passes init_mm, although
316 * sometimes this check can be skipped if we know the particular function
317 * is only called on user mode PTEs. We could change the kernel to pass
318 * current->active_mm here, but in particular, I was unsure if changing
319 * mm/highmem.c to do this would still be correct on other architectures.
320 */
321#define is_current_as(mm, mustbeuser) ((mm) == current->active_mm || \
322 (!mustbeuser && (mm) == &init_mm))
323#define vmi_flags_addr(mm, addr, level, user) \
324 ((level) | (is_current_as(mm, user) ? \
325 (VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0))
326#define vmi_flags_addr_defer(mm, addr, level, user) \
327 ((level) | (is_current_as(mm, user) ? \
328 (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0))
329
330static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
331{
332 vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
333}
334
335static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
336{
337 vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0));
338}
339
340static void vmi_set_pte(pte_t *ptep, pte_t pte)
341{
342 /* XXX because of set_pmd_pte, this can be called on PT or PD layers */
343 vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT);
344}
345
346static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
347{
348 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
349}
350
351static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval)
352{
353#ifdef CONFIG_X86_PAE
354 const pte_t pte = { .pte = pmdval.pmd };
355#else
356 const pte_t pte = { pmdval.pud.pgd.pgd };
357#endif
358 vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD);
359}
360
361#ifdef CONFIG_X86_PAE
362
363static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval)
364{
365 /*
366 * XXX This is called from set_pmd_pte, but at both PT
367 * and PD layers so the VMI_PAGE_PT flag is wrong. But
368 * it is only called for large page mapping changes,
369 * the Xen backend, doesn't support large pages, and the
370 * ESX backend doesn't depend on the flag.
371 */
372 set_64bit((unsigned long long *)ptep,pte_val(pteval));
373 vmi_ops.update_pte(ptep, VMI_PAGE_PT);
374}
375
376static void vmi_set_pud(pud_t *pudp, pud_t pudval)
377{
378 /* Um, eww */
379 const pte_t pte = { .pte = pudval.pgd.pgd };
380 vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP);
381}
382
383static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
384{
385 const pte_t pte = { .pte = 0 };
386 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
387}
388
389static void vmi_pmd_clear(pmd_t *pmd)
390{
391 const pte_t pte = { .pte = 0 };
392 vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD);
393}
394#endif
395
396#ifdef CONFIG_SMP
397static void __devinit
398vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
399 unsigned long start_esp)
400{
401 struct vmi_ap_state ap;
402
403 /* Default everything to zero. This is fine for most GPRs. */
404 memset(&ap, 0, sizeof(struct vmi_ap_state));
405
406 ap.gdtr_limit = GDT_SIZE - 1;
407 ap.gdtr_base = (unsigned long) get_cpu_gdt_table(phys_apicid);
408
409 ap.idtr_limit = IDT_ENTRIES * 8 - 1;
410 ap.idtr_base = (unsigned long) idt_table;
411
412 ap.ldtr = 0;
413
414 ap.cs = __KERNEL_CS;
415 ap.eip = (unsigned long) start_eip;
416 ap.ss = __KERNEL_DS;
417 ap.esp = (unsigned long) start_esp;
418
419 ap.ds = __USER_DS;
420 ap.es = __USER_DS;
421 ap.fs = __KERNEL_PERCPU;
422 ap.gs = __KERNEL_STACK_CANARY;
423
424 ap.eflags = 0;
425
426#ifdef CONFIG_X86_PAE
427 /* efer should match BSP efer. */
428 if (cpu_has_nx) {
429 unsigned l, h;
430 rdmsr(MSR_EFER, l, h);
431 ap.efer = (unsigned long long) h << 32 | l;
432 }
433#endif
434
435 ap.cr3 = __pa(swapper_pg_dir);
436 /* Protected mode, paging, AM, WP, NE, MP. */
437 ap.cr0 = 0x80050023;
438 ap.cr4 = mmu_cr4_features;
439 vmi_ops.set_initial_ap_state((u32)&ap, phys_apicid);
440}
441#endif
442
443static void vmi_start_context_switch(struct task_struct *prev)
444{
445 paravirt_start_context_switch(prev);
446 vmi_ops.set_lazy_mode(2);
447}
448
449static void vmi_end_context_switch(struct task_struct *next)
450{
451 vmi_ops.set_lazy_mode(0);
452 paravirt_end_context_switch(next);
453}
454
455static void vmi_enter_lazy_mmu(void)
456{
457 paravirt_enter_lazy_mmu();
458 vmi_ops.set_lazy_mode(1);
459}
460
461static void vmi_leave_lazy_mmu(void)
462{
463 vmi_ops.set_lazy_mode(0);
464 paravirt_leave_lazy_mmu();
465}
466
467static inline int __init check_vmi_rom(struct vrom_header *rom)
468{
469 struct pci_header *pci;
470 struct pnp_header *pnp;
471 const char *manufacturer = "UNKNOWN";
472 const char *product = "UNKNOWN";
473 const char *license = "unspecified";
474
475 if (rom->rom_signature != 0xaa55)
476 return 0;
477 if (rom->vrom_signature != VMI_SIGNATURE)
478 return 0;
479 if (rom->api_version_maj != VMI_API_REV_MAJOR ||
480 rom->api_version_min+1 < VMI_API_REV_MINOR+1) {
481 printk(KERN_WARNING "VMI: Found mismatched rom version %d.%d\n",
482 rom->api_version_maj,
483 rom->api_version_min);
484 return 0;
485 }
486
487 /*
488 * Relying on the VMI_SIGNATURE field is not 100% safe, so check
489 * the PCI header and device type to make sure this is really a
490 * VMI device.
491 */
492 if (!rom->pci_header_offs) {
493 printk(KERN_WARNING "VMI: ROM does not contain PCI header.\n");
494 return 0;
495 }
496
497 pci = (struct pci_header *)((char *)rom+rom->pci_header_offs);
498 if (pci->vendorID != PCI_VENDOR_ID_VMWARE ||
499 pci->deviceID != PCI_DEVICE_ID_VMWARE_VMI) {
500 /* Allow it to run... anyways, but warn */
501 printk(KERN_WARNING "VMI: ROM from unknown manufacturer\n");
502 }
503
504 if (rom->pnp_header_offs) {
505 pnp = (struct pnp_header *)((char *)rom+rom->pnp_header_offs);
506 if (pnp->manufacturer_offset)
507 manufacturer = (const char *)rom+pnp->manufacturer_offset;
508 if (pnp->product_offset)
509 product = (const char *)rom+pnp->product_offset;
510 }
511
512 if (rom->license_offs)
513 license = (char *)rom+rom->license_offs;
514
515 printk(KERN_INFO "VMI: Found %s %s, API version %d.%d, ROM version %d.%d\n",
516 manufacturer, product,
517 rom->api_version_maj, rom->api_version_min,
518 pci->rom_version_maj, pci->rom_version_min);
519
520 /* Don't allow BSD/MIT here for now because we don't want to end up
521 with any binary only shim layers */
522 if (strcmp(license, "GPL") && strcmp(license, "GPL v2")) {
523 printk(KERN_WARNING "VMI: Non GPL license `%s' found for ROM. Not used.\n",
524 license);
525 return 0;
526 }
527
528 return 1;
529}
530
531/*
532 * Probe for the VMI option ROM
533 */
534static inline int __init probe_vmi_rom(void)
535{
536 unsigned long base;
537
538 /* VMI ROM is in option ROM area, check signature */
539 for (base = 0xC0000; base < 0xE0000; base += 2048) {
540 struct vrom_header *romstart;
541 romstart = (struct vrom_header *)isa_bus_to_virt(base);
542 if (check_vmi_rom(romstart)) {
543 vmi_rom = romstart;
544 return 1;
545 }
546 }
547 return 0;
548}
549
550/*
551 * VMI setup common to all processors
552 */
553void vmi_bringup(void)
554{
555 /* We must establish the lowmem mapping for MMU ops to work */
556 if (vmi_ops.set_linear_mapping)
557 vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0);
558}
559
560/*
561 * Return a pointer to a VMI function or NULL if unimplemented
562 */
563static void *vmi_get_function(int vmicall)
564{
565 u64 reloc;
566 const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc;
567 reloc = call_vrom_long_func(vmi_rom, get_reloc, vmicall);
568 BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL);
569 if (rel->type == VMI_RELOCATION_CALL_REL)
570 return (void *)rel->eip;
571 else
572 return NULL;
573}
574
575/*
576 * Helper macro for making the VMI paravirt-ops fill code readable.
577 * For unimplemented operations, fall back to default, unless nop
578 * is returned by the ROM.
579 */
580#define para_fill(opname, vmicall) \
581do { \
582 reloc = call_vrom_long_func(vmi_rom, get_reloc, \
583 VMI_CALL_##vmicall); \
584 if (rel->type == VMI_RELOCATION_CALL_REL) \
585 opname = (void *)rel->eip; \
586 else if (rel->type == VMI_RELOCATION_NOP) \
587 opname = (void *)vmi_nop; \
588 else if (rel->type != VMI_RELOCATION_NONE) \
589 printk(KERN_WARNING "VMI: Unknown relocation " \
590 "type %d for " #vmicall"\n",\
591 rel->type); \
592} while (0)
593
594/*
595 * Helper macro for making the VMI paravirt-ops fill code readable.
596 * For cached operations which do not match the VMI ROM ABI and must
597 * go through a tranlation stub. Ignore NOPs, since it is not clear
598 * a NOP * VMI function corresponds to a NOP paravirt-op when the
599 * functions are not in 1-1 correspondence.
600 */
601#define para_wrap(opname, wrapper, cache, vmicall) \
602do { \
603 reloc = call_vrom_long_func(vmi_rom, get_reloc, \
604 VMI_CALL_##vmicall); \
605 BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \
606 if (rel->type == VMI_RELOCATION_CALL_REL) { \
607 opname = wrapper; \
608 vmi_ops.cache = (void *)rel->eip; \
609 } \
610} while (0)
611
612/*
613 * Activate the VMI interface and switch into paravirtualized mode
614 */
615static inline int __init activate_vmi(void)
616{
617 short kernel_cs;
618 u64 reloc;
619 const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc;
620
621 /*
622 * Prevent page tables from being allocated in highmem, even if
623 * CONFIG_HIGHPTE is enabled.
624 */
625 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
626
627 if (call_vrom_func(vmi_rom, vmi_init) != 0) {
628 printk(KERN_ERR "VMI ROM failed to initialize!");
629 return 0;
630 }
631 savesegment(cs, kernel_cs);
632
633 pv_info.paravirt_enabled = 1;
634 pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
635 pv_info.name = "vmi [deprecated]";
636
637 pv_init_ops.patch = vmi_patch;
638
639 /*
640 * Many of these operations are ABI compatible with VMI.
641 * This means we can fill in the paravirt-ops with direct
642 * pointers into the VMI ROM. If the calling convention for
643 * these operations changes, this code needs to be updated.
644 *
645 * Exceptions
646 * CPUID paravirt-op uses pointers, not the native ISA
647 * halt has no VMI equivalent; all VMI halts are "safe"
648 * no MSR support yet - just trap and emulate. VMI uses the
649 * same ABI as the native ISA, but Linux wants exceptions
650 * from bogus MSR read / write handled
651 * rdpmc is not yet used in Linux
652 */
653
654 /* CPUID is special, so very special it gets wrapped like a present */
655 para_wrap(pv_cpu_ops.cpuid, vmi_cpuid, cpuid, CPUID);
656
657 para_fill(pv_cpu_ops.clts, CLTS);
658 para_fill(pv_cpu_ops.get_debugreg, GetDR);
659 para_fill(pv_cpu_ops.set_debugreg, SetDR);
660 para_fill(pv_cpu_ops.read_cr0, GetCR0);
661 para_fill(pv_mmu_ops.read_cr2, GetCR2);
662 para_fill(pv_mmu_ops.read_cr3, GetCR3);
663 para_fill(pv_cpu_ops.read_cr4, GetCR4);
664 para_fill(pv_cpu_ops.write_cr0, SetCR0);
665 para_fill(pv_mmu_ops.write_cr2, SetCR2);
666 para_fill(pv_mmu_ops.write_cr3, SetCR3);
667 para_fill(pv_cpu_ops.write_cr4, SetCR4);
668
669 para_fill(pv_irq_ops.save_fl.func, GetInterruptMask);
670 para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask);
671 para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts);
672 para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts);
673
674 para_fill(pv_cpu_ops.wbinvd, WBINVD);
675 para_fill(pv_cpu_ops.read_tsc, RDTSC);
676
677 /* The following we emulate with trap and emulate for now */
678 /* paravirt_ops.read_msr = vmi_rdmsr */
679 /* paravirt_ops.write_msr = vmi_wrmsr */
680 /* paravirt_ops.rdpmc = vmi_rdpmc */
681
682 /* TR interface doesn't pass TR value, wrap */
683 para_wrap(pv_cpu_ops.load_tr_desc, vmi_set_tr, set_tr, SetTR);
684
685 /* LDT is special, too */
686 para_wrap(pv_cpu_ops.set_ldt, vmi_set_ldt, _set_ldt, SetLDT);
687
688 para_fill(pv_cpu_ops.load_gdt, SetGDT);
689 para_fill(pv_cpu_ops.load_idt, SetIDT);
690 para_fill(pv_cpu_ops.store_gdt, GetGDT);
691 para_fill(pv_cpu_ops.store_idt, GetIDT);
692 para_fill(pv_cpu_ops.store_tr, GetTR);
693 pv_cpu_ops.load_tls = vmi_load_tls;
694 para_wrap(pv_cpu_ops.write_ldt_entry, vmi_write_ldt_entry,
695 write_ldt_entry, WriteLDTEntry);
696 para_wrap(pv_cpu_ops.write_gdt_entry, vmi_write_gdt_entry,
697 write_gdt_entry, WriteGDTEntry);
698 para_wrap(pv_cpu_ops.write_idt_entry, vmi_write_idt_entry,
699 write_idt_entry, WriteIDTEntry);
700 para_wrap(pv_cpu_ops.load_sp0, vmi_load_sp0, set_kernel_stack, UpdateKernelStack);
701 para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
702 para_fill(pv_cpu_ops.io_delay, IODelay);
703
704 para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch,
705 set_lazy_mode, SetLazyMode);
706 para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch,
707 set_lazy_mode, SetLazyMode);
708
709 para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu,
710 set_lazy_mode, SetLazyMode);
711 para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu,
712 set_lazy_mode, SetLazyMode);
713
714 /* user and kernel flush are just handled with different flags to FlushTLB */
715 para_wrap(pv_mmu_ops.flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB);
716 para_wrap(pv_mmu_ops.flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB);
717 para_fill(pv_mmu_ops.flush_tlb_single, InvalPage);
718
719 /*
720 * Until a standard flag format can be agreed on, we need to
721 * implement these as wrappers in Linux. Get the VMI ROM
722 * function pointers for the two backend calls.
723 */
724#ifdef CONFIG_X86_PAE
725 vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxELong);
726 vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxELong);
727#else
728 vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE);
729 vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE);
730#endif
731
732 if (vmi_ops.set_pte) {
733 pv_mmu_ops.set_pte = vmi_set_pte;
734 pv_mmu_ops.set_pte_at = vmi_set_pte_at;
735 pv_mmu_ops.set_pmd = vmi_set_pmd;
736#ifdef CONFIG_X86_PAE
737 pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic;
738 pv_mmu_ops.set_pud = vmi_set_pud;
739 pv_mmu_ops.pte_clear = vmi_pte_clear;
740 pv_mmu_ops.pmd_clear = vmi_pmd_clear;
741#endif
742 }
743
744 if (vmi_ops.update_pte) {
745 pv_mmu_ops.pte_update = vmi_update_pte;
746 pv_mmu_ops.pte_update_defer = vmi_update_pte_defer;
747 }
748
749 vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
750 if (vmi_ops.allocate_page) {
751 pv_mmu_ops.alloc_pte = vmi_allocate_pte;
752 pv_mmu_ops.alloc_pmd = vmi_allocate_pmd;
753 pv_mmu_ops.alloc_pmd_clone = vmi_allocate_pmd_clone;
754 }
755
756 vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
757 if (vmi_ops.release_page) {
758 pv_mmu_ops.release_pte = vmi_release_pte;
759 pv_mmu_ops.release_pmd = vmi_release_pmd;
760 pv_mmu_ops.pgd_free = vmi_pgd_free;
761 }
762
763 /* Set linear is needed in all cases */
764 vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
765
766 /*
767 * These MUST always be patched. Don't support indirect jumps
768 * through these operations, as the VMI interface may use either
769 * a jump or a call to get to these operations, depending on
770 * the backend. They are performance critical anyway, so requiring
771 * a patch is not a big problem.
772 */
773 pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
774 pv_cpu_ops.iret = (void *)0xbadbab0;
775
776#ifdef CONFIG_SMP
777 para_wrap(pv_apic_ops.startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState);
778#endif
779
780#ifdef CONFIG_X86_LOCAL_APIC
781 para_fill(apic->read, APICRead);
782 para_fill(apic->write, APICWrite);
783#endif
784
785 /*
786 * Check for VMI timer functionality by probing for a cycle frequency method
787 */
788 reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency);
789 if (!disable_vmi_timer && rel->type != VMI_RELOCATION_NONE) {
790 vmi_timer_ops.get_cycle_frequency = (void *)rel->eip;
791 vmi_timer_ops.get_cycle_counter =
792 vmi_get_function(VMI_CALL_GetCycleCounter);
793 vmi_timer_ops.get_wallclock =
794 vmi_get_function(VMI_CALL_GetWallclockTime);
795 vmi_timer_ops.wallclock_updated =
796 vmi_get_function(VMI_CALL_WallclockUpdated);
797 vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm);
798 vmi_timer_ops.cancel_alarm =
799 vmi_get_function(VMI_CALL_CancelAlarm);
800 x86_init.timers.timer_init = vmi_time_init;
801#ifdef CONFIG_X86_LOCAL_APIC
802 x86_init.timers.setup_percpu_clockev = vmi_time_bsp_init;
803 x86_cpuinit.setup_percpu_clockev = vmi_time_ap_init;
804#endif
805 pv_time_ops.sched_clock = vmi_sched_clock;
806 x86_platform.calibrate_tsc = vmi_tsc_khz;
807 x86_platform.get_wallclock = vmi_get_wallclock;
808 x86_platform.set_wallclock = vmi_set_wallclock;
809
810 /* We have true wallclock functions; disable CMOS clock sync */
811 no_sync_cmos_clock = 1;
812 } else {
813 disable_noidle = 1;
814 disable_vmi_timer = 1;
815 }
816
817 para_fill(pv_irq_ops.safe_halt, Halt);
818
819 /*
820 * Alternative instruction rewriting doesn't happen soon enough
821 * to convert VMI_IRET to a call instead of a jump; so we have
822 * to do this before IRQs get reenabled. Fortunately, it is
823 * idempotent.
824 */
825 apply_paravirt(__parainstructions, __parainstructions_end);
826
827 vmi_bringup();
828
829 return 1;
830}
831
832#undef para_fill
833
834void __init vmi_init(void)
835{
836 if (!vmi_rom)
837 probe_vmi_rom();
838 else
839 check_vmi_rom(vmi_rom);
840
841 /* In case probing for or validating the ROM failed, basil */
842 if (!vmi_rom)
843 return;
844
845 reserve_top_address(-vmi_rom->virtual_top);
846
847#ifdef CONFIG_X86_IO_APIC
848 /* This is virtual hardware; timer routing is wired correctly */
849 no_timer_check = 1;
850#endif
851}
852
853void __init vmi_activate(void)
854{
855 unsigned long flags;
856
857 if (!vmi_rom)
858 return;
859
860 local_irq_save(flags);
861 activate_vmi();
862 local_irq_restore(flags & X86_EFLAGS_IF);
863}
864
865static int __init parse_vmi(char *arg)
866{
867 if (!arg)
868 return -EINVAL;
869
870 if (!strcmp(arg, "disable_pge")) {
871 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE);
872 disable_pge = 1;
873 } else if (!strcmp(arg, "disable_pse")) {
874 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PSE);
875 disable_pse = 1;
876 } else if (!strcmp(arg, "disable_sep")) {
877 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
878 disable_sep = 1;
879 } else if (!strcmp(arg, "disable_tsc")) {
880 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC);
881 disable_tsc = 1;
882 } else if (!strcmp(arg, "disable_mtrr")) {
883 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_MTRR);
884 disable_mtrr = 1;
885 } else if (!strcmp(arg, "disable_timer")) {
886 disable_vmi_timer = 1;
887 disable_noidle = 1;
888 } else if (!strcmp(arg, "disable_noidle"))
889 disable_noidle = 1;
890 return 0;
891}
892
893early_param("vmi", parse_vmi);
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
deleted file mode 100644
index 5e1ff66ecd73..000000000000
--- a/arch/x86/kernel/vmiclock_32.c
+++ /dev/null
@@ -1,317 +0,0 @@
1/*
2 * VMI paravirtual timer support routines.
3 *
4 * Copyright (C) 2007, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 */
22
23#include <linux/smp.h>
24#include <linux/interrupt.h>
25#include <linux/cpumask.h>
26#include <linux/clocksource.h>
27#include <linux/clockchips.h>
28
29#include <asm/vmi.h>
30#include <asm/vmi_time.h>
31#include <asm/apicdef.h>
32#include <asm/apic.h>
33#include <asm/timer.h>
34#include <asm/i8253.h>
35#include <asm/irq_vectors.h>
36
37#define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
38#define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
39
40static DEFINE_PER_CPU(struct clock_event_device, local_events);
41
42static inline u32 vmi_counter(u32 flags)
43{
44 /* Given VMI_ONESHOT or VMI_PERIODIC, return the corresponding
45 * cycle counter. */
46 return flags & VMI_ALARM_COUNTER_MASK;
47}
48
49/* paravirt_ops.get_wallclock = vmi_get_wallclock */
50unsigned long vmi_get_wallclock(void)
51{
52 unsigned long long wallclock;
53 wallclock = vmi_timer_ops.get_wallclock(); // nsec
54 (void)do_div(wallclock, 1000000000); // sec
55
56 return wallclock;
57}
58
59/* paravirt_ops.set_wallclock = vmi_set_wallclock */
60int vmi_set_wallclock(unsigned long now)
61{
62 return 0;
63}
64
65/* paravirt_ops.sched_clock = vmi_sched_clock */
66unsigned long long vmi_sched_clock(void)
67{
68 return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE));
69}
70
71/* x86_platform.calibrate_tsc = vmi_tsc_khz */
72unsigned long vmi_tsc_khz(void)
73{
74 unsigned long long khz;
75 khz = vmi_timer_ops.get_cycle_frequency();
76 (void)do_div(khz, 1000);
77 return khz;
78}
79
80static inline unsigned int vmi_get_timer_vector(void)
81{
82 return IRQ0_VECTOR;
83}
84
85/** vmi clockchip */
86#ifdef CONFIG_X86_LOCAL_APIC
87static unsigned int startup_timer_irq(unsigned int irq)
88{
89 unsigned long val = apic_read(APIC_LVTT);
90 apic_write(APIC_LVTT, vmi_get_timer_vector());
91
92 return (val & APIC_SEND_PENDING);
93}
94
95static void mask_timer_irq(unsigned int irq)
96{
97 unsigned long val = apic_read(APIC_LVTT);
98 apic_write(APIC_LVTT, val | APIC_LVT_MASKED);
99}
100
101static void unmask_timer_irq(unsigned int irq)
102{
103 unsigned long val = apic_read(APIC_LVTT);
104 apic_write(APIC_LVTT, val & ~APIC_LVT_MASKED);
105}
106
107static void ack_timer_irq(unsigned int irq)
108{
109 ack_APIC_irq();
110}
111
112static struct irq_chip vmi_chip __read_mostly = {
113 .name = "VMI-LOCAL",
114 .startup = startup_timer_irq,
115 .mask = mask_timer_irq,
116 .unmask = unmask_timer_irq,
117 .ack = ack_timer_irq
118};
119#endif
120
121/** vmi clockevent */
122#define VMI_ALARM_WIRED_IRQ0 0x00000000
123#define VMI_ALARM_WIRED_LVTT 0x00010000
124static int vmi_wiring = VMI_ALARM_WIRED_IRQ0;
125
126static inline int vmi_get_alarm_wiring(void)
127{
128 return vmi_wiring;
129}
130
131static void vmi_timer_set_mode(enum clock_event_mode mode,
132 struct clock_event_device *evt)
133{
134 cycle_t now, cycles_per_hz;
135 BUG_ON(!irqs_disabled());
136
137 switch (mode) {
138 case CLOCK_EVT_MODE_ONESHOT:
139 case CLOCK_EVT_MODE_RESUME:
140 break;
141 case CLOCK_EVT_MODE_PERIODIC:
142 cycles_per_hz = vmi_timer_ops.get_cycle_frequency();
143 (void)do_div(cycles_per_hz, HZ);
144 now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_PERIODIC));
145 vmi_timer_ops.set_alarm(VMI_PERIODIC, now, cycles_per_hz);
146 break;
147 case CLOCK_EVT_MODE_UNUSED:
148 case CLOCK_EVT_MODE_SHUTDOWN:
149 switch (evt->mode) {
150 case CLOCK_EVT_MODE_ONESHOT:
151 vmi_timer_ops.cancel_alarm(VMI_ONESHOT);
152 break;
153 case CLOCK_EVT_MODE_PERIODIC:
154 vmi_timer_ops.cancel_alarm(VMI_PERIODIC);
155 break;
156 default:
157 break;
158 }
159 break;
160 default:
161 break;
162 }
163}
164
165static int vmi_timer_next_event(unsigned long delta,
166 struct clock_event_device *evt)
167{
168 /* Unfortunately, set_next_event interface only passes relative
169 * expiry, but we want absolute expiry. It'd be better if were
170 * were passed an absolute expiry, since a bunch of time may
171 * have been stolen between the time the delta is computed and
172 * when we set the alarm below. */
173 cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT));
174
175 BUG_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
176 vmi_timer_ops.set_alarm(VMI_ONESHOT, now + delta, 0);
177 return 0;
178}
179
180static struct clock_event_device vmi_clockevent = {
181 .name = "vmi-timer",
182 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
183 .shift = 22,
184 .set_mode = vmi_timer_set_mode,
185 .set_next_event = vmi_timer_next_event,
186 .rating = 1000,
187 .irq = 0,
188};
189
190static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
191{
192 struct clock_event_device *evt = &__get_cpu_var(local_events);
193 evt->event_handler(evt);
194 return IRQ_HANDLED;
195}
196
197static struct irqaction vmi_clock_action = {
198 .name = "vmi-timer",
199 .handler = vmi_timer_interrupt,
200 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER,
201};
202
203static void __devinit vmi_time_init_clockevent(void)
204{
205 cycle_t cycles_per_msec;
206 struct clock_event_device *evt;
207
208 int cpu = smp_processor_id();
209 evt = &__get_cpu_var(local_events);
210
211 /* Use cycles_per_msec since div_sc params are 32-bits. */
212 cycles_per_msec = vmi_timer_ops.get_cycle_frequency();
213 (void)do_div(cycles_per_msec, 1000);
214
215 memcpy(evt, &vmi_clockevent, sizeof(*evt));
216 /* Must pick .shift such that .mult fits in 32-bits. Choosing
217 * .shift to be 22 allows 2^(32-22) cycles per nano-seconds
218 * before overflow. */
219 evt->mult = div_sc(cycles_per_msec, NSEC_PER_MSEC, evt->shift);
220 /* Upper bound is clockevent's use of ulong for cycle deltas. */
221 evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
222 evt->min_delta_ns = clockevent_delta2ns(1, evt);
223 evt->cpumask = cpumask_of(cpu);
224
225 printk(KERN_WARNING "vmi: registering clock event %s. mult=%u shift=%u\n",
226 evt->name, evt->mult, evt->shift);
227 clockevents_register_device(evt);
228}
229
230void __init vmi_time_init(void)
231{
232 unsigned int cpu;
233 /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */
234 outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
235
236 vmi_time_init_clockevent();
237 setup_irq(0, &vmi_clock_action);
238 for_each_possible_cpu(cpu)
239 per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0;
240}
241
242#ifdef CONFIG_X86_LOCAL_APIC
243void __devinit vmi_time_bsp_init(void)
244{
245 /*
246 * On APIC systems, we want local timers to fire on each cpu. We do
247 * this by programming LVTT to deliver timer events to the IRQ handler
248 * for IRQ-0, since we can't re-use the APIC local timer handler
249 * without interfering with that code.
250 */
251 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
252 local_irq_disable();
253#ifdef CONFIG_SMP
254 /*
255 * XXX handle_percpu_irq only defined for SMP; we need to switch over
256 * to using it, since this is a local interrupt, which each CPU must
257 * handle individually without locking out or dropping simultaneous
258 * local timers on other CPUs. We also don't want to trigger the
259 * quirk workaround code for interrupts which gets invoked from
260 * handle_percpu_irq via eoi, so we use our own IRQ chip.
261 */
262 set_irq_chip_and_handler_name(0, &vmi_chip, handle_percpu_irq, "lvtt");
263#else
264 set_irq_chip_and_handler_name(0, &vmi_chip, handle_edge_irq, "lvtt");
265#endif
266 vmi_wiring = VMI_ALARM_WIRED_LVTT;
267 apic_write(APIC_LVTT, vmi_get_timer_vector());
268 local_irq_enable();
269 clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
270}
271
272void __devinit vmi_time_ap_init(void)
273{
274 vmi_time_init_clockevent();
275 apic_write(APIC_LVTT, vmi_get_timer_vector());
276}
277#endif
278
279/** vmi clocksource */
280static struct clocksource clocksource_vmi;
281
282static cycle_t read_real_cycles(struct clocksource *cs)
283{
284 cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
285 return max(ret, clocksource_vmi.cycle_last);
286}
287
288static struct clocksource clocksource_vmi = {
289 .name = "vmi-timer",
290 .rating = 450,
291 .read = read_real_cycles,
292 .mask = CLOCKSOURCE_MASK(64),
293 .mult = 0, /* to be set */
294 .shift = 22,
295 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
296};
297
298static int __init init_vmi_clocksource(void)
299{
300 cycle_t cycles_per_msec;
301
302 if (!vmi_timer_ops.get_cycle_frequency)
303 return 0;
304 /* Use khz2mult rather than hz2mult since hz arg is only 32-bits. */
305 cycles_per_msec = vmi_timer_ops.get_cycle_frequency();
306 (void)do_div(cycles_per_msec, 1000);
307
308 /* Note that clocksource.{mult, shift} converts in the opposite direction
309 * as clockevents. */
310 clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
311 clocksource_vmi.shift);
312
313 printk(KERN_WARNING "vmi: registering clock source khz=%lld\n", cycles_per_msec);
314 return clocksource_register(&clocksource_vmi);
315
316}
317module_init(init_vmi_clocksource);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index d0bb52296fa3..e03530aebfd0 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -242,6 +242,12 @@ SECTIONS
242 __x86_cpu_dev_end = .; 242 __x86_cpu_dev_end = .;
243 } 243 }
244 244
245 /*
246 * start address and size of operations which during runtime
247 * can be patched with virtualization friendly instructions or
248 * baremetal native ones. Think page table operations.
249 * Details in paravirt_types.h
250 */
245 . = ALIGN(8); 251 . = ALIGN(8);
246 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { 252 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
247 __parainstructions = .; 253 __parainstructions = .;
@@ -249,6 +255,11 @@ SECTIONS
249 __parainstructions_end = .; 255 __parainstructions_end = .;
250 } 256 }
251 257
258 /*
259 * struct alt_inst entries. From the header (alternative.h):
260 * "Alternative instructions for different CPU types or capabilities"
261 * Think locking instructions on spinlocks.
262 */
252 . = ALIGN(8); 263 . = ALIGN(8);
253 .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { 264 .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
254 __alt_instructions = .; 265 __alt_instructions = .;
@@ -256,11 +267,28 @@ SECTIONS
256 __alt_instructions_end = .; 267 __alt_instructions_end = .;
257 } 268 }
258 269
270 /*
271 * And here are the replacement instructions. The linker sticks
272 * them as binary blobs. The .altinstructions has enough data to
273 * get the address and the length of them to patch the kernel safely.
274 */
259 .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { 275 .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
260 *(.altinstr_replacement) 276 *(.altinstr_replacement)
261 } 277 }
262 278
263 /* 279 /*
280 * struct iommu_table_entry entries are injected in this section.
281 * It is an array of IOMMUs which during run time gets sorted depending
282 * on its dependency order. After rootfs_initcall is complete
283 * this section can be safely removed.
284 */
285 .iommu_table : AT(ADDR(.iommu_table) - LOAD_OFFSET) {
286 __iommu_table = .;
287 *(.iommu_table)
288 __iommu_table_end = .;
289 }
290 . = ALIGN(8);
291 /*
264 * .exit.text is discard at runtime, not link time, to deal with 292 * .exit.text is discard at runtime, not link time, to deal with
265 * references from .altinstructions and .eh_frame 293 * references from .altinstructions and .eh_frame
266 */ 294 */
@@ -273,7 +301,7 @@ SECTIONS
273 } 301 }
274 302
275#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) 303#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
276 PERCPU(PAGE_SIZE) 304 PERCPU(THREAD_SIZE)
277#endif 305#endif
278 306
279 . = ALIGN(PAGE_SIZE); 307 . = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 77d8c0f4817d..22b06f7660f4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1056,14 +1056,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
1056 1056
1057 vcpu->arch.apic = apic; 1057 vcpu->arch.apic = apic;
1058 1058
1059 apic->regs_page = alloc_page(GFP_KERNEL); 1059 apic->regs_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
1060 if (apic->regs_page == NULL) { 1060 if (apic->regs_page == NULL) {
1061 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 1061 printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
1062 vcpu->vcpu_id); 1062 vcpu->vcpu_id);
1063 goto nomem_free_apic; 1063 goto nomem_free_apic;
1064 } 1064 }
1065 apic->regs = page_address(apic->regs_page); 1065 apic->regs = page_address(apic->regs_page);
1066 memset(apic->regs, 0, PAGE_SIZE);
1067 apic->vcpu = vcpu; 1066 apic->vcpu = vcpu;
1068 1067
1069 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 1068 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3a09c625d526..6c2ecf0a806d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1991,13 +1991,14 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1991 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | 1991 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
1992 0 /* Reserved, DCA */ | F(XMM4_1) | 1992 0 /* Reserved, DCA */ | F(XMM4_1) |
1993 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | 1993 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
1994 0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX); 1994 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
1995 F(F16C);
1995 /* cpuid 0x80000001.ecx */ 1996 /* cpuid 0x80000001.ecx */
1996 const u32 kvm_supported_word6_x86_features = 1997 const u32 kvm_supported_word6_x86_features =
1997 F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | 1998 F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
1998 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | 1999 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
1999 F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) | 2000 F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) |
2000 0 /* SKINIT */ | 0 /* WDT */; 2001 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
2001 2002
2002 /* all calls to cpuid_count() should be made on the same cpu */ 2003 /* all calls to cpuid_count() should be made on the same cpu */
2003 get_cpu(); 2004 get_cpu();
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 9d5f55848455..73b1e1a1f489 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -791,22 +791,22 @@ static void lguest_flush_tlb_kernel(void)
791 * simple as setting a bit. We don't actually "ack" interrupts as such, we 791 * simple as setting a bit. We don't actually "ack" interrupts as such, we
792 * just mask and unmask them. I wonder if we should be cleverer? 792 * just mask and unmask them. I wonder if we should be cleverer?
793 */ 793 */
794static void disable_lguest_irq(unsigned int irq) 794static void disable_lguest_irq(struct irq_data *data)
795{ 795{
796 set_bit(irq, lguest_data.blocked_interrupts); 796 set_bit(data->irq, lguest_data.blocked_interrupts);
797} 797}
798 798
799static void enable_lguest_irq(unsigned int irq) 799static void enable_lguest_irq(struct irq_data *data)
800{ 800{
801 clear_bit(irq, lguest_data.blocked_interrupts); 801 clear_bit(data->irq, lguest_data.blocked_interrupts);
802} 802}
803 803
804/* This structure describes the lguest IRQ controller. */ 804/* This structure describes the lguest IRQ controller. */
805static struct irq_chip lguest_irq_controller = { 805static struct irq_chip lguest_irq_controller = {
806 .name = "lguest", 806 .name = "lguest",
807 .mask = disable_lguest_irq, 807 .irq_mask = disable_lguest_irq,
808 .mask_ack = disable_lguest_irq, 808 .irq_mask_ack = disable_lguest_irq,
809 .unmask = enable_lguest_irq, 809 .irq_unmask = enable_lguest_irq,
810}; 810};
811 811
812/* 812/*
@@ -838,12 +838,12 @@ static void __init lguest_init_IRQ(void)
838 * rather than set them in lguest_init_IRQ we are called here every time an 838 * rather than set them in lguest_init_IRQ we are called here every time an
839 * lguest device needs an interrupt. 839 * lguest device needs an interrupt.
840 * 840 *
841 * FIXME: irq_to_desc_alloc_node() can fail due to lack of memory, we should 841 * FIXME: irq_alloc_desc_at() can fail due to lack of memory, we should
842 * pass that up! 842 * pass that up!
843 */ 843 */
844void lguest_setup_irq(unsigned int irq) 844void lguest_setup_irq(unsigned int irq)
845{ 845{
846 irq_to_desc_alloc_node(irq, 0); 846 irq_alloc_desc_at(irq, 0);
847 set_irq_chip_and_handler_name(irq, &lguest_irq_controller, 847 set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
848 handle_level_irq, "level"); 848 handle_level_irq, "level");
849} 849}
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c
index 5415a9d06f53..b908a59eccf5 100644
--- a/arch/x86/lib/memcpy_32.c
+++ b/arch/x86/lib/memcpy_32.c
@@ -22,22 +22,187 @@ EXPORT_SYMBOL(memset);
22 22
23void *memmove(void *dest, const void *src, size_t n) 23void *memmove(void *dest, const void *src, size_t n)
24{ 24{
25 int d0, d1, d2; 25 int d0,d1,d2,d3,d4,d5;
26 26 char *ret = dest;
27 if (dest < src) { 27
28 memcpy(dest, src, n); 28 __asm__ __volatile__(
29 } else { 29 /* Handle more 16bytes in loop */
30 __asm__ __volatile__( 30 "cmp $0x10, %0\n\t"
31 "std\n\t" 31 "jb 1f\n\t"
32 "rep\n\t" 32
33 "movsb\n\t" 33 /* Decide forward/backward copy mode */
34 "cld" 34 "cmp %2, %1\n\t"
35 : "=&c" (d0), "=&S" (d1), "=&D" (d2) 35 "jb 2f\n\t"
36 :"0" (n), 36
37 "1" (n-1+src), 37 /*
38 "2" (n-1+dest) 38 * movs instruction have many startup latency
39 :"memory"); 39 * so we handle small size by general register.
40 } 40 */
41 return dest; 41 "cmp $680, %0\n\t"
42 "jb 3f\n\t"
43 /*
44 * movs instruction is only good for aligned case.
45 */
46 "mov %1, %3\n\t"
47 "xor %2, %3\n\t"
48 "and $0xff, %3\n\t"
49 "jz 4f\n\t"
50 "3:\n\t"
51 "sub $0x10, %0\n\t"
52
53 /*
54 * We gobble 16byts forward in each loop.
55 */
56 "3:\n\t"
57 "sub $0x10, %0\n\t"
58 "mov 0*4(%1), %3\n\t"
59 "mov 1*4(%1), %4\n\t"
60 "mov %3, 0*4(%2)\n\t"
61 "mov %4, 1*4(%2)\n\t"
62 "mov 2*4(%1), %3\n\t"
63 "mov 3*4(%1), %4\n\t"
64 "mov %3, 2*4(%2)\n\t"
65 "mov %4, 3*4(%2)\n\t"
66 "lea 0x10(%1), %1\n\t"
67 "lea 0x10(%2), %2\n\t"
68 "jae 3b\n\t"
69 "add $0x10, %0\n\t"
70 "jmp 1f\n\t"
71
72 /*
73 * Handle data forward by movs.
74 */
75 ".p2align 4\n\t"
76 "4:\n\t"
77 "mov -4(%1, %0), %3\n\t"
78 "lea -4(%2, %0), %4\n\t"
79 "shr $2, %0\n\t"
80 "rep movsl\n\t"
81 "mov %3, (%4)\n\t"
82 "jmp 11f\n\t"
83 /*
84 * Handle data backward by movs.
85 */
86 ".p2align 4\n\t"
87 "6:\n\t"
88 "mov (%1), %3\n\t"
89 "mov %2, %4\n\t"
90 "lea -4(%1, %0), %1\n\t"
91 "lea -4(%2, %0), %2\n\t"
92 "shr $2, %0\n\t"
93 "std\n\t"
94 "rep movsl\n\t"
95 "mov %3,(%4)\n\t"
96 "cld\n\t"
97 "jmp 11f\n\t"
98
99 /*
100 * Start to prepare for backward copy.
101 */
102 ".p2align 4\n\t"
103 "2:\n\t"
104 "cmp $680, %0\n\t"
105 "jb 5f\n\t"
106 "mov %1, %3\n\t"
107 "xor %2, %3\n\t"
108 "and $0xff, %3\n\t"
109 "jz 6b\n\t"
110
111 /*
112 * Calculate copy position to tail.
113 */
114 "5:\n\t"
115 "add %0, %1\n\t"
116 "add %0, %2\n\t"
117 "sub $0x10, %0\n\t"
118
119 /*
120 * We gobble 16byts backward in each loop.
121 */
122 "7:\n\t"
123 "sub $0x10, %0\n\t"
124
125 "mov -1*4(%1), %3\n\t"
126 "mov -2*4(%1), %4\n\t"
127 "mov %3, -1*4(%2)\n\t"
128 "mov %4, -2*4(%2)\n\t"
129 "mov -3*4(%1), %3\n\t"
130 "mov -4*4(%1), %4\n\t"
131 "mov %3, -3*4(%2)\n\t"
132 "mov %4, -4*4(%2)\n\t"
133 "lea -0x10(%1), %1\n\t"
134 "lea -0x10(%2), %2\n\t"
135 "jae 7b\n\t"
136 /*
137 * Calculate copy position to head.
138 */
139 "add $0x10, %0\n\t"
140 "sub %0, %1\n\t"
141 "sub %0, %2\n\t"
142
143 /*
144 * Move data from 8 bytes to 15 bytes.
145 */
146 ".p2align 4\n\t"
147 "1:\n\t"
148 "cmp $8, %0\n\t"
149 "jb 8f\n\t"
150 "mov 0*4(%1), %3\n\t"
151 "mov 1*4(%1), %4\n\t"
152 "mov -2*4(%1, %0), %5\n\t"
153 "mov -1*4(%1, %0), %1\n\t"
154
155 "mov %3, 0*4(%2)\n\t"
156 "mov %4, 1*4(%2)\n\t"
157 "mov %5, -2*4(%2, %0)\n\t"
158 "mov %1, -1*4(%2, %0)\n\t"
159 "jmp 11f\n\t"
160
161 /*
162 * Move data from 4 bytes to 7 bytes.
163 */
164 ".p2align 4\n\t"
165 "8:\n\t"
166 "cmp $4, %0\n\t"
167 "jb 9f\n\t"
168 "mov 0*4(%1), %3\n\t"
169 "mov -1*4(%1, %0), %4\n\t"
170 "mov %3, 0*4(%2)\n\t"
171 "mov %4, -1*4(%2, %0)\n\t"
172 "jmp 11f\n\t"
173
174 /*
175 * Move data from 2 bytes to 3 bytes.
176 */
177 ".p2align 4\n\t"
178 "9:\n\t"
179 "cmp $2, %0\n\t"
180 "jb 10f\n\t"
181 "movw 0*2(%1), %%dx\n\t"
182 "movw -1*2(%1, %0), %%bx\n\t"
183 "movw %%dx, 0*2(%2)\n\t"
184 "movw %%bx, -1*2(%2, %0)\n\t"
185 "jmp 11f\n\t"
186
187 /*
188 * Move data for 1 byte.
189 */
190 ".p2align 4\n\t"
191 "10:\n\t"
192 "cmp $1, %0\n\t"
193 "jb 11f\n\t"
194 "movb (%1), %%cl\n\t"
195 "movb %%cl, (%2)\n\t"
196 ".p2align 4\n\t"
197 "11:"
198 : "=&c" (d0), "=&S" (d1), "=&D" (d2),
199 "=r" (d3),"=r" (d4), "=r"(d5)
200 :"0" (n),
201 "1" (src),
202 "2" (dest)
203 :"memory");
204
205 return ret;
206
42} 207}
43EXPORT_SYMBOL(memmove); 208EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index bcbcd1e0f7d5..75ef61e35e38 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -40,84 +40,132 @@
40ENTRY(__memcpy) 40ENTRY(__memcpy)
41ENTRY(memcpy) 41ENTRY(memcpy)
42 CFI_STARTPROC 42 CFI_STARTPROC
43 movq %rdi, %rax
43 44
44 /* 45 /*
45 * Put the number of full 64-byte blocks into %ecx. 46 * Use 32bit CMP here to avoid long NOP padding.
46 * Tail portion is handled at the end:
47 */ 47 */
48 movq %rdi, %rax 48 cmp $0x20, %edx
49 movl %edx, %ecx 49 jb .Lhandle_tail
50 shrl $6, %ecx
51 jz .Lhandle_tail
52 50
53 .p2align 4
54.Lloop_64:
55 /* 51 /*
56 * We decrement the loop index here - and the zero-flag is 52 * We check whether memory false dependece could occur,
57 * checked at the end of the loop (instructions inbetween do 53 * then jump to corresponding copy mode.
58 * not change the zero flag):
59 */ 54 */
60 decl %ecx 55 cmp %dil, %sil
56 jl .Lcopy_backward
57 subl $0x20, %edx
58.Lcopy_forward_loop:
59 subq $0x20, %rdx
61 60
62 /* 61 /*
63 * Move in blocks of 4x16 bytes: 62 * Move in blocks of 4x8 bytes:
64 */ 63 */
65 movq 0*8(%rsi), %r11 64 movq 0*8(%rsi), %r8
66 movq 1*8(%rsi), %r8 65 movq 1*8(%rsi), %r9
67 movq %r11, 0*8(%rdi) 66 movq 2*8(%rsi), %r10
68 movq %r8, 1*8(%rdi) 67 movq 3*8(%rsi), %r11
69 68 leaq 4*8(%rsi), %rsi
70 movq 2*8(%rsi), %r9 69
71 movq 3*8(%rsi), %r10 70 movq %r8, 0*8(%rdi)
72 movq %r9, 2*8(%rdi) 71 movq %r9, 1*8(%rdi)
73 movq %r10, 3*8(%rdi) 72 movq %r10, 2*8(%rdi)
74 73 movq %r11, 3*8(%rdi)
75 movq 4*8(%rsi), %r11 74 leaq 4*8(%rdi), %rdi
76 movq 5*8(%rsi), %r8 75 jae .Lcopy_forward_loop
77 movq %r11, 4*8(%rdi) 76 addq $0x20, %rdx
78 movq %r8, 5*8(%rdi) 77 jmp .Lhandle_tail
79 78
80 movq 6*8(%rsi), %r9 79.Lcopy_backward:
81 movq 7*8(%rsi), %r10 80 /*
82 movq %r9, 6*8(%rdi) 81 * Calculate copy position to tail.
83 movq %r10, 7*8(%rdi) 82 */
84 83 addq %rdx, %rsi
85 leaq 64(%rsi), %rsi 84 addq %rdx, %rdi
86 leaq 64(%rdi), %rdi 85 subq $0x20, %rdx
87 86 /*
88 jnz .Lloop_64 87 * At most 3 ALU operations in one cycle,
88 * so append NOPS in the same 16bytes trunk.
89 */
90 .p2align 4
91.Lcopy_backward_loop:
92 subq $0x20, %rdx
93 movq -1*8(%rsi), %r8
94 movq -2*8(%rsi), %r9
95 movq -3*8(%rsi), %r10
96 movq -4*8(%rsi), %r11
97 leaq -4*8(%rsi), %rsi
98 movq %r8, -1*8(%rdi)
99 movq %r9, -2*8(%rdi)
100 movq %r10, -3*8(%rdi)
101 movq %r11, -4*8(%rdi)
102 leaq -4*8(%rdi), %rdi
103 jae .Lcopy_backward_loop
89 104
105 /*
106 * Calculate copy position to head.
107 */
108 addq $0x20, %rdx
109 subq %rdx, %rsi
110 subq %rdx, %rdi
90.Lhandle_tail: 111.Lhandle_tail:
91 movl %edx, %ecx 112 cmpq $16, %rdx
92 andl $63, %ecx 113 jb .Lless_16bytes
93 shrl $3, %ecx
94 jz .Lhandle_7
95 114
115 /*
116 * Move data from 16 bytes to 31 bytes.
117 */
118 movq 0*8(%rsi), %r8
119 movq 1*8(%rsi), %r9
120 movq -2*8(%rsi, %rdx), %r10
121 movq -1*8(%rsi, %rdx), %r11
122 movq %r8, 0*8(%rdi)
123 movq %r9, 1*8(%rdi)
124 movq %r10, -2*8(%rdi, %rdx)
125 movq %r11, -1*8(%rdi, %rdx)
126 retq
96 .p2align 4 127 .p2align 4
97.Lloop_8: 128.Lless_16bytes:
98 decl %ecx 129 cmpq $8, %rdx
99 movq (%rsi), %r8 130 jb .Lless_8bytes
100 movq %r8, (%rdi) 131 /*
101 leaq 8(%rdi), %rdi 132 * Move data from 8 bytes to 15 bytes.
102 leaq 8(%rsi), %rsi 133 */
103 jnz .Lloop_8 134 movq 0*8(%rsi), %r8
104 135 movq -1*8(%rsi, %rdx), %r9
105.Lhandle_7: 136 movq %r8, 0*8(%rdi)
106 movl %edx, %ecx 137 movq %r9, -1*8(%rdi, %rdx)
107 andl $7, %ecx 138 retq
108 jz .Lend 139 .p2align 4
140.Lless_8bytes:
141 cmpq $4, %rdx
142 jb .Lless_3bytes
109 143
144 /*
145 * Move data from 4 bytes to 7 bytes.
146 */
147 movl (%rsi), %ecx
148 movl -4(%rsi, %rdx), %r8d
149 movl %ecx, (%rdi)
150 movl %r8d, -4(%rdi, %rdx)
151 retq
110 .p2align 4 152 .p2align 4
153.Lless_3bytes:
154 cmpl $0, %edx
155 je .Lend
156 /*
157 * Move data from 1 bytes to 3 bytes.
158 */
111.Lloop_1: 159.Lloop_1:
112 movb (%rsi), %r8b 160 movb (%rsi), %r8b
113 movb %r8b, (%rdi) 161 movb %r8b, (%rdi)
114 incq %rdi 162 incq %rdi
115 incq %rsi 163 incq %rsi
116 decl %ecx 164 decl %edx
117 jnz .Lloop_1 165 jnz .Lloop_1
118 166
119.Lend: 167.Lend:
120 ret 168 retq
121 CFI_ENDPROC 169 CFI_ENDPROC
122ENDPROC(memcpy) 170ENDPROC(memcpy)
123ENDPROC(__memcpy) 171ENDPROC(__memcpy)
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
index 0a33909bf122..6d0f0ec41b34 100644
--- a/arch/x86/lib/memmove_64.c
+++ b/arch/x86/lib/memmove_64.c
@@ -8,14 +8,185 @@
8#undef memmove 8#undef memmove
9void *memmove(void *dest, const void *src, size_t count) 9void *memmove(void *dest, const void *src, size_t count)
10{ 10{
11 if (dest < src) { 11 unsigned long d0,d1,d2,d3,d4,d5,d6,d7;
12 return memcpy(dest, src, count); 12 char *ret;
13 } else { 13
14 char *p = dest + count; 14 __asm__ __volatile__(
15 const char *s = src + count; 15 /* Handle more 32bytes in loop */
16 while (count--) 16 "mov %2, %3\n\t"
17 *--p = *--s; 17 "cmp $0x20, %0\n\t"
18 } 18 "jb 1f\n\t"
19 return dest; 19
20 /* Decide forward/backward copy mode */
21 "cmp %2, %1\n\t"
22 "jb 2f\n\t"
23
24 /*
25 * movsq instruction have many startup latency
26 * so we handle small size by general register.
27 */
28 "cmp $680, %0\n\t"
29 "jb 3f\n\t"
30 /*
31 * movsq instruction is only good for aligned case.
32 */
33 "cmpb %%dil, %%sil\n\t"
34 "je 4f\n\t"
35 "3:\n\t"
36 "sub $0x20, %0\n\t"
37 /*
38 * We gobble 32byts forward in each loop.
39 */
40 "5:\n\t"
41 "sub $0x20, %0\n\t"
42 "movq 0*8(%1), %4\n\t"
43 "movq 1*8(%1), %5\n\t"
44 "movq 2*8(%1), %6\n\t"
45 "movq 3*8(%1), %7\n\t"
46 "leaq 4*8(%1), %1\n\t"
47
48 "movq %4, 0*8(%2)\n\t"
49 "movq %5, 1*8(%2)\n\t"
50 "movq %6, 2*8(%2)\n\t"
51 "movq %7, 3*8(%2)\n\t"
52 "leaq 4*8(%2), %2\n\t"
53 "jae 5b\n\t"
54 "addq $0x20, %0\n\t"
55 "jmp 1f\n\t"
56 /*
57 * Handle data forward by movsq.
58 */
59 ".p2align 4\n\t"
60 "4:\n\t"
61 "movq %0, %8\n\t"
62 "movq -8(%1, %0), %4\n\t"
63 "lea -8(%2, %0), %5\n\t"
64 "shrq $3, %8\n\t"
65 "rep movsq\n\t"
66 "movq %4, (%5)\n\t"
67 "jmp 13f\n\t"
68 /*
69 * Handle data backward by movsq.
70 */
71 ".p2align 4\n\t"
72 "7:\n\t"
73 "movq %0, %8\n\t"
74 "movq (%1), %4\n\t"
75 "movq %2, %5\n\t"
76 "leaq -8(%1, %0), %1\n\t"
77 "leaq -8(%2, %0), %2\n\t"
78 "shrq $3, %8\n\t"
79 "std\n\t"
80 "rep movsq\n\t"
81 "cld\n\t"
82 "movq %4, (%5)\n\t"
83 "jmp 13f\n\t"
84
85 /*
86 * Start to prepare for backward copy.
87 */
88 ".p2align 4\n\t"
89 "2:\n\t"
90 "cmp $680, %0\n\t"
91 "jb 6f \n\t"
92 "cmp %%dil, %%sil\n\t"
93 "je 7b \n\t"
94 "6:\n\t"
95 /*
96 * Calculate copy position to tail.
97 */
98 "addq %0, %1\n\t"
99 "addq %0, %2\n\t"
100 "subq $0x20, %0\n\t"
101 /*
102 * We gobble 32byts backward in each loop.
103 */
104 "8:\n\t"
105 "subq $0x20, %0\n\t"
106 "movq -1*8(%1), %4\n\t"
107 "movq -2*8(%1), %5\n\t"
108 "movq -3*8(%1), %6\n\t"
109 "movq -4*8(%1), %7\n\t"
110 "leaq -4*8(%1), %1\n\t"
111
112 "movq %4, -1*8(%2)\n\t"
113 "movq %5, -2*8(%2)\n\t"
114 "movq %6, -3*8(%2)\n\t"
115 "movq %7, -4*8(%2)\n\t"
116 "leaq -4*8(%2), %2\n\t"
117 "jae 8b\n\t"
118 /*
119 * Calculate copy position to head.
120 */
121 "addq $0x20, %0\n\t"
122 "subq %0, %1\n\t"
123 "subq %0, %2\n\t"
124 "1:\n\t"
125 "cmpq $16, %0\n\t"
126 "jb 9f\n\t"
127 /*
128 * Move data from 16 bytes to 31 bytes.
129 */
130 "movq 0*8(%1), %4\n\t"
131 "movq 1*8(%1), %5\n\t"
132 "movq -2*8(%1, %0), %6\n\t"
133 "movq -1*8(%1, %0), %7\n\t"
134 "movq %4, 0*8(%2)\n\t"
135 "movq %5, 1*8(%2)\n\t"
136 "movq %6, -2*8(%2, %0)\n\t"
137 "movq %7, -1*8(%2, %0)\n\t"
138 "jmp 13f\n\t"
139 ".p2align 4\n\t"
140 "9:\n\t"
141 "cmpq $8, %0\n\t"
142 "jb 10f\n\t"
143 /*
144 * Move data from 8 bytes to 15 bytes.
145 */
146 "movq 0*8(%1), %4\n\t"
147 "movq -1*8(%1, %0), %5\n\t"
148 "movq %4, 0*8(%2)\n\t"
149 "movq %5, -1*8(%2, %0)\n\t"
150 "jmp 13f\n\t"
151 "10:\n\t"
152 "cmpq $4, %0\n\t"
153 "jb 11f\n\t"
154 /*
155 * Move data from 4 bytes to 7 bytes.
156 */
157 "movl (%1), %4d\n\t"
158 "movl -4(%1, %0), %5d\n\t"
159 "movl %4d, (%2)\n\t"
160 "movl %5d, -4(%2, %0)\n\t"
161 "jmp 13f\n\t"
162 "11:\n\t"
163 "cmp $2, %0\n\t"
164 "jb 12f\n\t"
165 /*
166 * Move data from 2 bytes to 3 bytes.
167 */
168 "movw (%1), %4w\n\t"
169 "movw -2(%1, %0), %5w\n\t"
170 "movw %4w, (%2)\n\t"
171 "movw %5w, -2(%2, %0)\n\t"
172 "jmp 13f\n\t"
173 "12:\n\t"
174 "cmp $1, %0\n\t"
175 "jb 13f\n\t"
176 /*
177 * Move data for 1 byte.
178 */
179 "movb (%1), %4b\n\t"
180 "movb %4b, (%2)\n\t"
181 "13:\n\t"
182 : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) ,
183 "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7)
184 :"0" (count),
185 "1" (src),
186 "2" (dest)
187 :"memory");
188
189 return ret;
190
20} 191}
21EXPORT_SYMBOL(memmove); 192EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index a4c768397baa..55543397a8a7 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -26,4 +26,6 @@ obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
26obj-$(CONFIG_K8_NUMA) += k8topology_64.o 26obj-$(CONFIG_K8_NUMA) += k8topology_64.o
27obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o 27obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
28 28
29obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
30
29obj-$(CONFIG_MEMTEST) += memtest.o 31obj-$(CONFIG_MEMTEST) += memtest.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4c4508e8a204..79b0b372d2d0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -229,7 +229,16 @@ void vmalloc_sync_all(void)
229 229
230 spin_lock_irqsave(&pgd_lock, flags); 230 spin_lock_irqsave(&pgd_lock, flags);
231 list_for_each_entry(page, &pgd_list, lru) { 231 list_for_each_entry(page, &pgd_list, lru) {
232 if (!vmalloc_sync_one(page_address(page), address)) 232 spinlock_t *pgt_lock;
233 pmd_t *ret;
234
235 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
236
237 spin_lock(pgt_lock);
238 ret = vmalloc_sync_one(page_address(page), address);
239 spin_unlock(pgt_lock);
240
241 if (!ret)
233 break; 242 break;
234 } 243 }
235 spin_unlock_irqrestore(&pgd_lock, flags); 244 spin_unlock_irqrestore(&pgd_lock, flags);
@@ -251,6 +260,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
251 if (!(address >= VMALLOC_START && address < VMALLOC_END)) 260 if (!(address >= VMALLOC_START && address < VMALLOC_END))
252 return -1; 261 return -1;
253 262
263 WARN_ON_ONCE(in_nmi());
264
254 /* 265 /*
255 * Synchronize this task's top level page-table 266 * Synchronize this task's top level page-table
256 * with the 'reference' page table. 267 * with the 'reference' page table.
@@ -326,29 +337,7 @@ out:
326 337
327void vmalloc_sync_all(void) 338void vmalloc_sync_all(void)
328{ 339{
329 unsigned long address; 340 sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
330
331 for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
332 address += PGDIR_SIZE) {
333
334 const pgd_t *pgd_ref = pgd_offset_k(address);
335 unsigned long flags;
336 struct page *page;
337
338 if (pgd_none(*pgd_ref))
339 continue;
340
341 spin_lock_irqsave(&pgd_lock, flags);
342 list_for_each_entry(page, &pgd_list, lru) {
343 pgd_t *pgd;
344 pgd = (pgd_t *)page_address(page) + pgd_index(address);
345 if (pgd_none(*pgd))
346 set_pgd(pgd, *pgd_ref);
347 else
348 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
349 }
350 spin_unlock_irqrestore(&pgd_lock, flags);
351 }
352} 341}
353 342
354/* 343/*
@@ -369,6 +358,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
369 if (!(address >= VMALLOC_START && address < VMALLOC_END)) 358 if (!(address >= VMALLOC_START && address < VMALLOC_END))
370 return -1; 359 return -1;
371 360
361 WARN_ON_ONCE(in_nmi());
362
372 /* 363 /*
373 * Copy kernel mappings over when needed. This can also 364 * Copy kernel mappings over when needed. This can also
374 * happen within a race in page table update. In the later 365 * happen within a race in page table update. In the later
@@ -894,8 +885,14 @@ spurious_fault(unsigned long error_code, unsigned long address)
894 if (pmd_large(*pmd)) 885 if (pmd_large(*pmd))
895 return spurious_fault_check(error_code, (pte_t *) pmd); 886 return spurious_fault_check(error_code, (pte_t *) pmd);
896 887
888 /*
889 * Note: don't use pte_present() here, since it returns true
890 * if the _PAGE_PROTNONE bit is set. However, this aliases the
891 * _PAGE_GLOBAL bit, which for kernel pages give false positives
892 * when CONFIG_DEBUG_PAGEALLOC is used.
893 */
897 pte = pte_offset_kernel(pmd, address); 894 pte = pte_offset_kernel(pmd, address);
898 if (!pte_present(*pte)) 895 if (!(pte_flags(*pte) & _PAGE_PRESENT))
899 return 0; 896 return 0;
900 897
901 ret = spurious_fault_check(error_code, pte); 898 ret = spurious_fault_check(error_code, pte);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index b278535b14aa..c0e28a13de7d 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -2,6 +2,7 @@
2#include <linux/initrd.h> 2#include <linux/initrd.h>
3#include <linux/ioport.h> 3#include <linux/ioport.h>
4#include <linux/swap.h> 4#include <linux/swap.h>
5#include <linux/memblock.h>
5 6
6#include <asm/cacheflush.h> 7#include <asm/cacheflush.h>
7#include <asm/e820.h> 8#include <asm/e820.h>
@@ -33,6 +34,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
33 int use_gbpages) 34 int use_gbpages)
34{ 35{
35 unsigned long puds, pmds, ptes, tables, start; 36 unsigned long puds, pmds, ptes, tables, start;
37 phys_addr_t base;
36 38
37 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; 39 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
38 tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); 40 tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
@@ -75,12 +77,12 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
75#else 77#else
76 start = 0x8000; 78 start = 0x8000;
77#endif 79#endif
78 e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, 80 base = memblock_find_in_range(start, max_pfn_mapped<<PAGE_SHIFT,
79 tables, PAGE_SIZE); 81 tables, PAGE_SIZE);
80 if (e820_table_start == -1UL) 82 if (base == MEMBLOCK_ERROR)
81 panic("Cannot find space for the kernel page tables"); 83 panic("Cannot find space for the kernel page tables");
82 84
83 e820_table_start >>= PAGE_SHIFT; 85 e820_table_start = base >> PAGE_SHIFT;
84 e820_table_end = e820_table_start; 86 e820_table_end = e820_table_start;
85 e820_table_top = e820_table_start + (tables >> PAGE_SHIFT); 87 e820_table_top = e820_table_start + (tables >> PAGE_SHIFT);
86 88
@@ -299,7 +301,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
299 __flush_tlb_all(); 301 __flush_tlb_all();
300 302
301 if (!after_bootmem && e820_table_end > e820_table_start) 303 if (!after_bootmem && e820_table_end > e820_table_start)
302 reserve_early(e820_table_start << PAGE_SHIFT, 304 memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT,
303 e820_table_end << PAGE_SHIFT, "PGTABLE"); 305 e820_table_end << PAGE_SHIFT, "PGTABLE");
304 306
305 if (!after_bootmem) 307 if (!after_bootmem)
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index bca79091b9d6..5d0a6711c282 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -25,6 +25,7 @@
25#include <linux/pfn.h> 25#include <linux/pfn.h>
26#include <linux/poison.h> 26#include <linux/poison.h>
27#include <linux/bootmem.h> 27#include <linux/bootmem.h>
28#include <linux/memblock.h>
28#include <linux/proc_fs.h> 29#include <linux/proc_fs.h>
29#include <linux/memory_hotplug.h> 30#include <linux/memory_hotplug.h>
30#include <linux/initrd.h> 31#include <linux/initrd.h>
@@ -67,7 +68,7 @@ static __init void *alloc_low_page(void)
67 panic("alloc_low_page: ran out of memory"); 68 panic("alloc_low_page: ran out of memory");
68 69
69 adr = __va(pfn * PAGE_SIZE); 70 adr = __va(pfn * PAGE_SIZE);
70 memset(adr, 0, PAGE_SIZE); 71 clear_page(adr);
71 return adr; 72 return adr;
72} 73}
73 74
@@ -422,49 +423,28 @@ static void __init add_one_highpage_init(struct page *page)
422 totalhigh_pages++; 423 totalhigh_pages++;
423} 424}
424 425
425struct add_highpages_data { 426void __init add_highpages_with_active_regions(int nid,
426 unsigned long start_pfn; 427 unsigned long start_pfn, unsigned long end_pfn)
427 unsigned long end_pfn;
428};
429
430static int __init add_highpages_work_fn(unsigned long start_pfn,
431 unsigned long end_pfn, void *datax)
432{ 428{
433 int node_pfn; 429 struct range *range;
434 struct page *page; 430 int nr_range;
435 unsigned long final_start_pfn, final_end_pfn; 431 int i;
436 struct add_highpages_data *data;
437 432
438 data = (struct add_highpages_data *)datax; 433 nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn);
439 434
440 final_start_pfn = max(start_pfn, data->start_pfn); 435 for (i = 0; i < nr_range; i++) {
441 final_end_pfn = min(end_pfn, data->end_pfn); 436 struct page *page;
442 if (final_start_pfn >= final_end_pfn) 437 int node_pfn;
443 return 0;
444 438
445 for (node_pfn = final_start_pfn; node_pfn < final_end_pfn; 439 for (node_pfn = range[i].start; node_pfn < range[i].end;
446 node_pfn++) { 440 node_pfn++) {
447 if (!pfn_valid(node_pfn)) 441 if (!pfn_valid(node_pfn))
448 continue; 442 continue;
449 page = pfn_to_page(node_pfn); 443 page = pfn_to_page(node_pfn);
450 add_one_highpage_init(page); 444 add_one_highpage_init(page);
445 }
451 } 446 }
452
453 return 0;
454
455} 447}
456
457void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
458 unsigned long end_pfn)
459{
460 struct add_highpages_data data;
461
462 data.start_pfn = start_pfn;
463 data.end_pfn = end_pfn;
464
465 work_with_active_regions(nid, add_highpages_work_fn, &data);
466}
467
468#else 448#else
469static inline void permanent_kmaps_init(pgd_t *pgd_base) 449static inline void permanent_kmaps_init(pgd_t *pgd_base)
470{ 450{
@@ -558,7 +538,7 @@ char swsusp_pg_dir[PAGE_SIZE]
558 538
559static inline void save_pg_dir(void) 539static inline void save_pg_dir(void)
560{ 540{
561 memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE); 541 copy_page(swsusp_pg_dir, swapper_pg_dir);
562} 542}
563#else /* !CONFIG_ACPI_SLEEP */ 543#else /* !CONFIG_ACPI_SLEEP */
564static inline void save_pg_dir(void) 544static inline void save_pg_dir(void)
@@ -712,14 +692,14 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
712 highstart_pfn = highend_pfn = max_pfn; 692 highstart_pfn = highend_pfn = max_pfn;
713 if (max_pfn > max_low_pfn) 693 if (max_pfn > max_low_pfn)
714 highstart_pfn = max_low_pfn; 694 highstart_pfn = max_low_pfn;
715 e820_register_active_regions(0, 0, highend_pfn); 695 memblock_x86_register_active_regions(0, 0, highend_pfn);
716 sparse_memory_present_with_active_regions(0); 696 sparse_memory_present_with_active_regions(0);
717 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", 697 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
718 pages_to_mb(highend_pfn - highstart_pfn)); 698 pages_to_mb(highend_pfn - highstart_pfn));
719 num_physpages = highend_pfn; 699 num_physpages = highend_pfn;
720 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; 700 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
721#else 701#else
722 e820_register_active_regions(0, 0, max_low_pfn); 702 memblock_x86_register_active_regions(0, 0, max_low_pfn);
723 sparse_memory_present_with_active_regions(0); 703 sparse_memory_present_with_active_regions(0);
724 num_physpages = max_low_pfn; 704 num_physpages = max_low_pfn;
725 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; 705 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
@@ -750,68 +730,12 @@ static void __init zone_sizes_init(void)
750 free_area_init_nodes(max_zone_pfns); 730 free_area_init_nodes(max_zone_pfns);
751} 731}
752 732
753#ifndef CONFIG_NO_BOOTMEM
754static unsigned long __init setup_node_bootmem(int nodeid,
755 unsigned long start_pfn,
756 unsigned long end_pfn,
757 unsigned long bootmap)
758{
759 unsigned long bootmap_size;
760
761 /* don't touch min_low_pfn */
762 bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
763 bootmap >> PAGE_SHIFT,
764 start_pfn, end_pfn);
765 printk(KERN_INFO " node %d low ram: %08lx - %08lx\n",
766 nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
767 printk(KERN_INFO " node %d bootmap %08lx - %08lx\n",
768 nodeid, bootmap, bootmap + bootmap_size);
769 free_bootmem_with_active_regions(nodeid, end_pfn);
770
771 return bootmap + bootmap_size;
772}
773#endif
774
775void __init setup_bootmem_allocator(void) 733void __init setup_bootmem_allocator(void)
776{ 734{
777#ifndef CONFIG_NO_BOOTMEM
778 int nodeid;
779 unsigned long bootmap_size, bootmap;
780 /*
781 * Initialize the boot-time allocator (with low memory only):
782 */
783 bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
784 bootmap = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
785 PAGE_SIZE);
786 if (bootmap == -1L)
787 panic("Cannot find bootmem map of size %ld\n", bootmap_size);
788 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
789#endif
790
791 printk(KERN_INFO " mapped low ram: 0 - %08lx\n", 735 printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
792 max_pfn_mapped<<PAGE_SHIFT); 736 max_pfn_mapped<<PAGE_SHIFT);
793 printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); 737 printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
794 738
795#ifndef CONFIG_NO_BOOTMEM
796 for_each_online_node(nodeid) {
797 unsigned long start_pfn, end_pfn;
798
799#ifdef CONFIG_NEED_MULTIPLE_NODES
800 start_pfn = node_start_pfn[nodeid];
801 end_pfn = node_end_pfn[nodeid];
802 if (start_pfn > max_low_pfn)
803 continue;
804 if (end_pfn > max_low_pfn)
805 end_pfn = max_low_pfn;
806#else
807 start_pfn = 0;
808 end_pfn = max_low_pfn;
809#endif
810 bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
811 bootmap);
812 }
813#endif
814
815 after_bootmem = 1; 739 after_bootmem = 1;
816} 740}
817 741
@@ -1070,8 +994,3 @@ void mark_rodata_ro(void)
1070} 994}
1071#endif 995#endif
1072 996
1073int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
1074 int flags)
1075{
1076 return reserve_bootmem(phys, len, flags);
1077}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9a6674689a20..84346200e783 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -21,6 +21,7 @@
21#include <linux/initrd.h> 21#include <linux/initrd.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/bootmem.h> 23#include <linux/bootmem.h>
24#include <linux/memblock.h>
24#include <linux/proc_fs.h> 25#include <linux/proc_fs.h>
25#include <linux/pci.h> 26#include <linux/pci.h>
26#include <linux/pfn.h> 27#include <linux/pfn.h>
@@ -52,8 +53,6 @@
52#include <asm/init.h> 53#include <asm/init.h>
53#include <linux/bootmem.h> 54#include <linux/bootmem.h>
54 55
55static unsigned long dma_reserve __initdata;
56
57static int __init parse_direct_gbpages_off(char *arg) 56static int __init parse_direct_gbpages_off(char *arg)
58{ 57{
59 direct_gbpages = 0; 58 direct_gbpages = 0;
@@ -98,6 +97,43 @@ static int __init nonx32_setup(char *str)
98__setup("noexec32=", nonx32_setup); 97__setup("noexec32=", nonx32_setup);
99 98
100/* 99/*
100 * When memory was added/removed make sure all the processes MM have
101 * suitable PGD entries in the local PGD level page.
102 */
103void sync_global_pgds(unsigned long start, unsigned long end)
104{
105 unsigned long address;
106
107 for (address = start; address <= end; address += PGDIR_SIZE) {
108 const pgd_t *pgd_ref = pgd_offset_k(address);
109 unsigned long flags;
110 struct page *page;
111
112 if (pgd_none(*pgd_ref))
113 continue;
114
115 spin_lock_irqsave(&pgd_lock, flags);
116 list_for_each_entry(page, &pgd_list, lru) {
117 pgd_t *pgd;
118 spinlock_t *pgt_lock;
119
120 pgd = (pgd_t *)page_address(page) + pgd_index(address);
121 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
122 spin_lock(pgt_lock);
123
124 if (pgd_none(*pgd))
125 set_pgd(pgd, *pgd_ref);
126 else
127 BUG_ON(pgd_page_vaddr(*pgd)
128 != pgd_page_vaddr(*pgd_ref));
129
130 spin_unlock(pgt_lock);
131 }
132 spin_unlock_irqrestore(&pgd_lock, flags);
133 }
134}
135
136/*
101 * NOTE: This function is marked __ref because it calls __init function 137 * NOTE: This function is marked __ref because it calls __init function
102 * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. 138 * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
103 */ 139 */
@@ -293,7 +329,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
293 panic("alloc_low_page: ran out of memory"); 329 panic("alloc_low_page: ran out of memory");
294 330
295 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); 331 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
296 memset(adr, 0, PAGE_SIZE); 332 clear_page(adr);
297 *phys = pfn * PAGE_SIZE; 333 *phys = pfn * PAGE_SIZE;
298 return adr; 334 return adr;
299} 335}
@@ -534,11 +570,13 @@ kernel_physical_mapping_init(unsigned long start,
534 unsigned long end, 570 unsigned long end,
535 unsigned long page_size_mask) 571 unsigned long page_size_mask)
536{ 572{
537 573 bool pgd_changed = false;
538 unsigned long next, last_map_addr = end; 574 unsigned long next, last_map_addr = end;
575 unsigned long addr;
539 576
540 start = (unsigned long)__va(start); 577 start = (unsigned long)__va(start);
541 end = (unsigned long)__va(end); 578 end = (unsigned long)__va(end);
579 addr = start;
542 580
543 for (; start < end; start = next) { 581 for (; start < end; start = next) {
544 pgd_t *pgd = pgd_offset_k(start); 582 pgd_t *pgd = pgd_offset_k(start);
@@ -563,7 +601,12 @@ kernel_physical_mapping_init(unsigned long start,
563 spin_lock(&init_mm.page_table_lock); 601 spin_lock(&init_mm.page_table_lock);
564 pgd_populate(&init_mm, pgd, __va(pud_phys)); 602 pgd_populate(&init_mm, pgd, __va(pud_phys));
565 spin_unlock(&init_mm.page_table_lock); 603 spin_unlock(&init_mm.page_table_lock);
604 pgd_changed = true;
566 } 605 }
606
607 if (pgd_changed)
608 sync_global_pgds(addr, end);
609
567 __flush_tlb_all(); 610 __flush_tlb_all();
568 611
569 return last_map_addr; 612 return last_map_addr;
@@ -573,23 +616,7 @@ kernel_physical_mapping_init(unsigned long start,
573void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, 616void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
574 int acpi, int k8) 617 int acpi, int k8)
575{ 618{
576#ifndef CONFIG_NO_BOOTMEM 619 memblock_x86_register_active_regions(0, start_pfn, end_pfn);
577 unsigned long bootmap_size, bootmap;
578
579 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
580 bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
581 PAGE_SIZE);
582 if (bootmap == -1L)
583 panic("Cannot find bootmem map of size %ld\n", bootmap_size);
584 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
585 /* don't touch min_low_pfn */
586 bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
587 0, end_pfn);
588 e820_register_active_regions(0, start_pfn, end_pfn);
589 free_bootmem_with_active_regions(0, end_pfn);
590#else
591 e820_register_active_regions(0, start_pfn, end_pfn);
592#endif
593} 620}
594#endif 621#endif
595 622
@@ -799,52 +826,6 @@ void mark_rodata_ro(void)
799 826
800#endif 827#endif
801 828
802int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
803 int flags)
804{
805#ifdef CONFIG_NUMA
806 int nid, next_nid;
807 int ret;
808#endif
809 unsigned long pfn = phys >> PAGE_SHIFT;
810
811 if (pfn >= max_pfn) {
812 /*
813 * This can happen with kdump kernels when accessing
814 * firmware tables:
815 */
816 if (pfn < max_pfn_mapped)
817 return -EFAULT;
818
819 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %lu\n",
820 phys, len);
821 return -EFAULT;
822 }
823
824 /* Should check here against the e820 map to avoid double free */
825#ifdef CONFIG_NUMA
826 nid = phys_to_nid(phys);
827 next_nid = phys_to_nid(phys + len - 1);
828 if (nid == next_nid)
829 ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags);
830 else
831 ret = reserve_bootmem(phys, len, flags);
832
833 if (ret != 0)
834 return ret;
835
836#else
837 reserve_bootmem(phys, len, flags);
838#endif
839
840 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
841 dma_reserve += len / PAGE_SIZE;
842 set_dma_reserve(dma_reserve);
843 }
844
845 return 0;
846}
847
848int kern_addr_valid(unsigned long addr) 829int kern_addr_valid(unsigned long addr)
849{ 830{
850 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; 831 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
@@ -1003,6 +984,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
1003 } 984 }
1004 985
1005 } 986 }
987 sync_global_pgds((unsigned long)start_page, end);
1006 return 0; 988 return 0;
1007} 989}
1008 990
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 3ba6e0608c55..0369843511dc 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -362,6 +362,11 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr)
362 return &bm_pte[pte_index(addr)]; 362 return &bm_pte[pte_index(addr)];
363} 363}
364 364
365bool __init is_early_ioremap_ptep(pte_t *ptep)
366{
367 return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
368}
369
365static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; 370static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
366 371
367void __init early_ioremap_init(void) 372void __init early_ioremap_init(void)
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 970ed579d4e4..804a3b6c6e14 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -11,6 +11,8 @@
11#include <linux/string.h> 11#include <linux/string.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/nodemask.h> 13#include <linux/nodemask.h>
14#include <linux/memblock.h>
15
14#include <asm/io.h> 16#include <asm/io.h>
15#include <linux/pci_ids.h> 17#include <linux/pci_ids.h>
16#include <linux/acpi.h> 18#include <linux/acpi.h>
@@ -22,7 +24,7 @@
22#include <asm/numa.h> 24#include <asm/numa.h>
23#include <asm/mpspec.h> 25#include <asm/mpspec.h>
24#include <asm/apic.h> 26#include <asm/apic.h>
25#include <asm/k8.h> 27#include <asm/amd_nb.h>
26 28
27static struct bootnode __initdata nodes[8]; 29static struct bootnode __initdata nodes[8];
28static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; 30static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
@@ -54,8 +56,8 @@ static __init int find_northbridge(void)
54static __init void early_get_boot_cpu_id(void) 56static __init void early_get_boot_cpu_id(void)
55{ 57{
56 /* 58 /*
57 * need to get boot_cpu_id so can use that to create apicid_to_node 59 * need to get the APIC ID of the BSP so can use that to
58 * in k8_scan_nodes() 60 * create apicid_to_node in k8_scan_nodes()
59 */ 61 */
60#ifdef CONFIG_X86_MPPARSE 62#ifdef CONFIG_X86_MPPARSE
61 /* 63 /*
@@ -212,7 +214,7 @@ int __init k8_scan_nodes(void)
212 bits = boot_cpu_data.x86_coreid_bits; 214 bits = boot_cpu_data.x86_coreid_bits;
213 cores = (1<<bits); 215 cores = (1<<bits);
214 apicid_base = 0; 216 apicid_base = 0;
215 /* need to get boot_cpu_id early for system with apicid lifting */ 217 /* get the APIC ID of the BSP early for systems with apicid lifting */
216 early_get_boot_cpu_id(); 218 early_get_boot_cpu_id();
217 if (boot_cpu_physical_apicid > 0) { 219 if (boot_cpu_physical_apicid > 0) {
218 pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); 220 pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
@@ -222,7 +224,7 @@ int __init k8_scan_nodes(void)
222 for_each_node_mask(i, node_possible_map) { 224 for_each_node_mask(i, node_possible_map) {
223 int j; 225 int j;
224 226
225 e820_register_active_regions(i, 227 memblock_x86_register_active_regions(i,
226 nodes[i].start >> PAGE_SHIFT, 228 nodes[i].start >> PAGE_SHIFT,
227 nodes[i].end >> PAGE_SHIFT); 229 nodes[i].end >> PAGE_SHIFT);
228 for (j = apicid_base; j < cores + apicid_base; j++) 230 for (j = apicid_base; j < cores + apicid_base; j++)
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index b3b531a4f8e5..d87dd6d042d6 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -631,6 +631,8 @@ bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
631 if (!pte) 631 if (!pte)
632 return false; 632 return false;
633 633
634 WARN_ON_ONCE(in_nmi());
635
634 if (error_code & 2) 636 if (error_code & 2)
635 kmemcheck_access(regs, address, KMEMCHECK_WRITE); 637 kmemcheck_access(regs, address, KMEMCHECK_WRITE);
636 else 638 else
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c
index 63c19e27aa6f..324aa3f07237 100644
--- a/arch/x86/mm/kmemcheck/opcode.c
+++ b/arch/x86/mm/kmemcheck/opcode.c
@@ -9,7 +9,7 @@ static bool opcode_is_prefix(uint8_t b)
9 b == 0xf0 || b == 0xf2 || b == 0xf3 9 b == 0xf0 || b == 0xf2 || b == 0xf3
10 /* Group 2 */ 10 /* Group 2 */
11 || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 11 || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26
12 || b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e 12 || b == 0x64 || b == 0x65
13 /* Group 3 */ 13 /* Group 3 */
14 || b == 0x66 14 || b == 0x66
15 /* Group 4 */ 15 /* Group 4 */
diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c
new file mode 100644
index 000000000000..aa1169392b83
--- /dev/null
+++ b/arch/x86/mm/memblock.c
@@ -0,0 +1,348 @@
1#include <linux/kernel.h>
2#include <linux/types.h>
3#include <linux/init.h>
4#include <linux/bitops.h>
5#include <linux/memblock.h>
6#include <linux/bootmem.h>
7#include <linux/mm.h>
8#include <linux/range.h>
9
10/* Check for already reserved areas */
11static bool __init check_with_memblock_reserved_size(u64 *addrp, u64 *sizep, u64 align)
12{
13 struct memblock_region *r;
14 u64 addr = *addrp, last;
15 u64 size = *sizep;
16 bool changed = false;
17
18again:
19 last = addr + size;
20 for_each_memblock(reserved, r) {
21 if (last > r->base && addr < r->base) {
22 size = r->base - addr;
23 changed = true;
24 goto again;
25 }
26 if (last > (r->base + r->size) && addr < (r->base + r->size)) {
27 addr = round_up(r->base + r->size, align);
28 size = last - addr;
29 changed = true;
30 goto again;
31 }
32 if (last <= (r->base + r->size) && addr >= r->base) {
33 *sizep = 0;
34 return false;
35 }
36 }
37 if (changed) {
38 *addrp = addr;
39 *sizep = size;
40 }
41 return changed;
42}
43
44/*
45 * Find next free range after start, and size is returned in *sizep
46 */
47u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align)
48{
49 struct memblock_region *r;
50
51 for_each_memblock(memory, r) {
52 u64 ei_start = r->base;
53 u64 ei_last = ei_start + r->size;
54 u64 addr;
55
56 addr = round_up(ei_start, align);
57 if (addr < start)
58 addr = round_up(start, align);
59 if (addr >= ei_last)
60 continue;
61 *sizep = ei_last - addr;
62 while (check_with_memblock_reserved_size(&addr, sizep, align))
63 ;
64
65 if (*sizep)
66 return addr;
67 }
68
69 return MEMBLOCK_ERROR;
70}
71
72static __init struct range *find_range_array(int count)
73{
74 u64 end, size, mem;
75 struct range *range;
76
77 size = sizeof(struct range) * count;
78 end = memblock.current_limit;
79
80 mem = memblock_find_in_range(0, end, size, sizeof(struct range));
81 if (mem == MEMBLOCK_ERROR)
82 panic("can not find more space for range array");
83
84 /*
85 * This range is tempoaray, so don't reserve it, it will not be
86 * overlapped because We will not alloccate new buffer before
87 * We discard this one
88 */
89 range = __va(mem);
90 memset(range, 0, size);
91
92 return range;
93}
94
95static void __init memblock_x86_subtract_reserved(struct range *range, int az)
96{
97 u64 final_start, final_end;
98 struct memblock_region *r;
99
100 /* Take out region array itself at first*/
101 memblock_free_reserved_regions();
102
103 memblock_dbg("Subtract (%ld early reservations)\n", memblock.reserved.cnt);
104
105 for_each_memblock(reserved, r) {
106 memblock_dbg(" [%010llx-%010llx]\n", (u64)r->base, (u64)r->base + r->size - 1);
107 final_start = PFN_DOWN(r->base);
108 final_end = PFN_UP(r->base + r->size);
109 if (final_start >= final_end)
110 continue;
111 subtract_range(range, az, final_start, final_end);
112 }
113
114 /* Put region array back ? */
115 memblock_reserve_reserved_regions();
116}
117
118struct count_data {
119 int nr;
120};
121
122static int __init count_work_fn(unsigned long start_pfn,
123 unsigned long end_pfn, void *datax)
124{
125 struct count_data *data = datax;
126
127 data->nr++;
128
129 return 0;
130}
131
132static int __init count_early_node_map(int nodeid)
133{
134 struct count_data data;
135
136 data.nr = 0;
137 work_with_active_regions(nodeid, count_work_fn, &data);
138
139 return data.nr;
140}
141
142int __init __get_free_all_memory_range(struct range **rangep, int nodeid,
143 unsigned long start_pfn, unsigned long end_pfn)
144{
145 int count;
146 struct range *range;
147 int nr_range;
148
149 count = (memblock.reserved.cnt + count_early_node_map(nodeid)) * 2;
150
151 range = find_range_array(count);
152 nr_range = 0;
153
154 /*
155 * Use early_node_map[] and memblock.reserved.region to get range array
156 * at first
157 */
158 nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
159 subtract_range(range, count, 0, start_pfn);
160 subtract_range(range, count, end_pfn, -1ULL);
161
162 memblock_x86_subtract_reserved(range, count);
163 nr_range = clean_sort_range(range, count);
164
165 *rangep = range;
166 return nr_range;
167}
168
169int __init get_free_all_memory_range(struct range **rangep, int nodeid)
170{
171 unsigned long end_pfn = -1UL;
172
173#ifdef CONFIG_X86_32
174 end_pfn = max_low_pfn;
175#endif
176 return __get_free_all_memory_range(rangep, nodeid, 0, end_pfn);
177}
178
179static u64 __init __memblock_x86_memory_in_range(u64 addr, u64 limit, bool get_free)
180{
181 int i, count;
182 struct range *range;
183 int nr_range;
184 u64 final_start, final_end;
185 u64 free_size;
186 struct memblock_region *r;
187
188 count = (memblock.reserved.cnt + memblock.memory.cnt) * 2;
189
190 range = find_range_array(count);
191 nr_range = 0;
192
193 addr = PFN_UP(addr);
194 limit = PFN_DOWN(limit);
195
196 for_each_memblock(memory, r) {
197 final_start = PFN_UP(r->base);
198 final_end = PFN_DOWN(r->base + r->size);
199 if (final_start >= final_end)
200 continue;
201 if (final_start >= limit || final_end <= addr)
202 continue;
203
204 nr_range = add_range(range, count, nr_range, final_start, final_end);
205 }
206 subtract_range(range, count, 0, addr);
207 subtract_range(range, count, limit, -1ULL);
208
209 /* Subtract memblock.reserved.region in range ? */
210 if (!get_free)
211 goto sort_and_count_them;
212 for_each_memblock(reserved, r) {
213 final_start = PFN_DOWN(r->base);
214 final_end = PFN_UP(r->base + r->size);
215 if (final_start >= final_end)
216 continue;
217 if (final_start >= limit || final_end <= addr)
218 continue;
219
220 subtract_range(range, count, final_start, final_end);
221 }
222
223sort_and_count_them:
224 nr_range = clean_sort_range(range, count);
225
226 free_size = 0;
227 for (i = 0; i < nr_range; i++)
228 free_size += range[i].end - range[i].start;
229
230 return free_size << PAGE_SHIFT;
231}
232
233u64 __init memblock_x86_free_memory_in_range(u64 addr, u64 limit)
234{
235 return __memblock_x86_memory_in_range(addr, limit, true);
236}
237
238u64 __init memblock_x86_memory_in_range(u64 addr, u64 limit)
239{
240 return __memblock_x86_memory_in_range(addr, limit, false);
241}
242
243void __init memblock_x86_reserve_range(u64 start, u64 end, char *name)
244{
245 if (start == end)
246 return;
247
248 if (WARN_ONCE(start > end, "memblock_x86_reserve_range: wrong range [%#llx, %#llx)\n", start, end))
249 return;
250
251 memblock_dbg(" memblock_x86_reserve_range: [%#010llx-%#010llx] %16s\n", start, end - 1, name);
252
253 memblock_reserve(start, end - start);
254}
255
256void __init memblock_x86_free_range(u64 start, u64 end)
257{
258 if (start == end)
259 return;
260
261 if (WARN_ONCE(start > end, "memblock_x86_free_range: wrong range [%#llx, %#llx)\n", start, end))
262 return;
263
264 memblock_dbg(" memblock_x86_free_range: [%#010llx-%#010llx]\n", start, end - 1);
265
266 memblock_free(start, end - start);
267}
268
269/*
270 * Need to call this function after memblock_x86_register_active_regions,
271 * so early_node_map[] is filled already.
272 */
273u64 __init memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align)
274{
275 u64 addr;
276 addr = find_memory_core_early(nid, size, align, start, end);
277 if (addr != MEMBLOCK_ERROR)
278 return addr;
279
280 /* Fallback, should already have start end within node range */
281 return memblock_find_in_range(start, end, size, align);
282}
283
284/*
285 * Finds an active region in the address range from start_pfn to last_pfn and
286 * returns its range in ei_startpfn and ei_endpfn for the memblock entry.
287 */
288static int __init memblock_x86_find_active_region(const struct memblock_region *ei,
289 unsigned long start_pfn,
290 unsigned long last_pfn,
291 unsigned long *ei_startpfn,
292 unsigned long *ei_endpfn)
293{
294 u64 align = PAGE_SIZE;
295
296 *ei_startpfn = round_up(ei->base, align) >> PAGE_SHIFT;
297 *ei_endpfn = round_down(ei->base + ei->size, align) >> PAGE_SHIFT;
298
299 /* Skip map entries smaller than a page */
300 if (*ei_startpfn >= *ei_endpfn)
301 return 0;
302
303 /* Skip if map is outside the node */
304 if (*ei_endpfn <= start_pfn || *ei_startpfn >= last_pfn)
305 return 0;
306
307 /* Check for overlaps */
308 if (*ei_startpfn < start_pfn)
309 *ei_startpfn = start_pfn;
310 if (*ei_endpfn > last_pfn)
311 *ei_endpfn = last_pfn;
312
313 return 1;
314}
315
316/* Walk the memblock.memory map and register active regions within a node */
317void __init memblock_x86_register_active_regions(int nid, unsigned long start_pfn,
318 unsigned long last_pfn)
319{
320 unsigned long ei_startpfn;
321 unsigned long ei_endpfn;
322 struct memblock_region *r;
323
324 for_each_memblock(memory, r)
325 if (memblock_x86_find_active_region(r, start_pfn, last_pfn,
326 &ei_startpfn, &ei_endpfn))
327 add_active_range(nid, ei_startpfn, ei_endpfn);
328}
329
330/*
331 * Find the hole size (in bytes) in the memory range.
332 * @start: starting address of the memory range to scan
333 * @end: ending address of the memory range to scan
334 */
335u64 __init memblock_x86_hole_size(u64 start, u64 end)
336{
337 unsigned long start_pfn = start >> PAGE_SHIFT;
338 unsigned long last_pfn = end >> PAGE_SHIFT;
339 unsigned long ei_startpfn, ei_endpfn, ram = 0;
340 struct memblock_region *r;
341
342 for_each_memblock(memory, r)
343 if (memblock_x86_find_active_region(r, start_pfn, last_pfn,
344 &ei_startpfn, &ei_endpfn))
345 ram += ei_endpfn - ei_startpfn;
346
347 return end - start - ((u64)ram << PAGE_SHIFT);
348}
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 18d244f70205..92faf3a1c53e 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -6,8 +6,7 @@
6#include <linux/smp.h> 6#include <linux/smp.h>
7#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/pfn.h> 8#include <linux/pfn.h>
9 9#include <linux/memblock.h>
10#include <asm/e820.h>
11 10
12static u64 patterns[] __initdata = { 11static u64 patterns[] __initdata = {
13 0, 12 0,
@@ -35,7 +34,7 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
35 (unsigned long long) pattern, 34 (unsigned long long) pattern,
36 (unsigned long long) start_bad, 35 (unsigned long long) start_bad,
37 (unsigned long long) end_bad); 36 (unsigned long long) end_bad);
38 reserve_early(start_bad, end_bad, "BAD RAM"); 37 memblock_x86_reserve_range(start_bad, end_bad, "BAD RAM");
39} 38}
40 39
41static void __init memtest(u64 pattern, u64 start_phys, u64 size) 40static void __init memtest(u64 pattern, u64 start_phys, u64 size)
@@ -74,7 +73,7 @@ static void __init do_one_pass(u64 pattern, u64 start, u64 end)
74 u64 size = 0; 73 u64 size = 0;
75 74
76 while (start < end) { 75 while (start < end) {
77 start = find_e820_area_size(start, &size, 1); 76 start = memblock_x86_find_in_range_size(start, &size, 1);
78 77
79 /* done ? */ 78 /* done ? */
80 if (start >= end) 79 if (start >= end)
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 809baaaf48b1..84a3e4c9f277 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -24,6 +24,7 @@
24 24
25#include <linux/mm.h> 25#include <linux/mm.h>
26#include <linux/bootmem.h> 26#include <linux/bootmem.h>
27#include <linux/memblock.h>
27#include <linux/mmzone.h> 28#include <linux/mmzone.h>
28#include <linux/highmem.h> 29#include <linux/highmem.h>
29#include <linux/initrd.h> 30#include <linux/initrd.h>
@@ -120,7 +121,7 @@ int __init get_memcfg_numa_flat(void)
120 121
121 node_start_pfn[0] = 0; 122 node_start_pfn[0] = 0;
122 node_end_pfn[0] = max_pfn; 123 node_end_pfn[0] = max_pfn;
123 e820_register_active_regions(0, 0, max_pfn); 124 memblock_x86_register_active_regions(0, 0, max_pfn);
124 memory_present(0, 0, max_pfn); 125 memory_present(0, 0, max_pfn);
125 node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); 126 node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn);
126 127
@@ -161,14 +162,14 @@ static void __init allocate_pgdat(int nid)
161 NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; 162 NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
162 else { 163 else {
163 unsigned long pgdat_phys; 164 unsigned long pgdat_phys;
164 pgdat_phys = find_e820_area(min_low_pfn<<PAGE_SHIFT, 165 pgdat_phys = memblock_find_in_range(min_low_pfn<<PAGE_SHIFT,
165 max_pfn_mapped<<PAGE_SHIFT, 166 max_pfn_mapped<<PAGE_SHIFT,
166 sizeof(pg_data_t), 167 sizeof(pg_data_t),
167 PAGE_SIZE); 168 PAGE_SIZE);
168 NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT)); 169 NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT));
169 memset(buf, 0, sizeof(buf)); 170 memset(buf, 0, sizeof(buf));
170 sprintf(buf, "NODE_DATA %d", nid); 171 sprintf(buf, "NODE_DATA %d", nid);
171 reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf); 172 memblock_x86_reserve_range(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf);
172 } 173 }
173 printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", 174 printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n",
174 nid, (unsigned long)NODE_DATA(nid)); 175 nid, (unsigned long)NODE_DATA(nid));
@@ -291,15 +292,15 @@ static __init unsigned long calculate_numa_remap_pages(void)
291 PTRS_PER_PTE); 292 PTRS_PER_PTE);
292 node_kva_target <<= PAGE_SHIFT; 293 node_kva_target <<= PAGE_SHIFT;
293 do { 294 do {
294 node_kva_final = find_e820_area(node_kva_target, 295 node_kva_final = memblock_find_in_range(node_kva_target,
295 ((u64)node_end_pfn[nid])<<PAGE_SHIFT, 296 ((u64)node_end_pfn[nid])<<PAGE_SHIFT,
296 ((u64)size)<<PAGE_SHIFT, 297 ((u64)size)<<PAGE_SHIFT,
297 LARGE_PAGE_BYTES); 298 LARGE_PAGE_BYTES);
298 node_kva_target -= LARGE_PAGE_BYTES; 299 node_kva_target -= LARGE_PAGE_BYTES;
299 } while (node_kva_final == -1ULL && 300 } while (node_kva_final == MEMBLOCK_ERROR &&
300 (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid])); 301 (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid]));
301 302
302 if (node_kva_final == -1ULL) 303 if (node_kva_final == MEMBLOCK_ERROR)
303 panic("Can not get kva ram\n"); 304 panic("Can not get kva ram\n");
304 305
305 node_remap_size[nid] = size; 306 node_remap_size[nid] = size;
@@ -318,15 +319,13 @@ static __init unsigned long calculate_numa_remap_pages(void)
318 * but we could have some hole in high memory, and it will only 319 * but we could have some hole in high memory, and it will only
319 * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide 320 * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide
320 * to use it as free. 321 * to use it as free.
321 * So reserve_early here, hope we don't run out of that array 322 * So memblock_x86_reserve_range here, hope we don't run out of that array
322 */ 323 */
323 reserve_early(node_kva_final, 324 memblock_x86_reserve_range(node_kva_final,
324 node_kva_final+(((u64)size)<<PAGE_SHIFT), 325 node_kva_final+(((u64)size)<<PAGE_SHIFT),
325 "KVA RAM"); 326 "KVA RAM");
326 327
327 node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT; 328 node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT;
328 remove_active_range(nid, node_remap_start_pfn[nid],
329 node_remap_start_pfn[nid] + size);
330 } 329 }
331 printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", 330 printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n",
332 reserve_pages); 331 reserve_pages);
@@ -367,14 +366,14 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
367 366
368 kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); 367 kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
369 do { 368 do {
370 kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT, 369 kva_start_pfn = memblock_find_in_range(kva_target_pfn<<PAGE_SHIFT,
371 max_low_pfn<<PAGE_SHIFT, 370 max_low_pfn<<PAGE_SHIFT,
372 kva_pages<<PAGE_SHIFT, 371 kva_pages<<PAGE_SHIFT,
373 PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT; 372 PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT;
374 kva_target_pfn -= PTRS_PER_PTE; 373 kva_target_pfn -= PTRS_PER_PTE;
375 } while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn); 374 } while (kva_start_pfn == MEMBLOCK_ERROR && kva_target_pfn > min_low_pfn);
376 375
377 if (kva_start_pfn == -1UL) 376 if (kva_start_pfn == MEMBLOCK_ERROR)
378 panic("Can not get kva space\n"); 377 panic("Can not get kva space\n");
379 378
380 printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", 379 printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n",
@@ -382,7 +381,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
382 printk(KERN_INFO "max_pfn = %lx\n", max_pfn); 381 printk(KERN_INFO "max_pfn = %lx\n", max_pfn);
383 382
384 /* avoid clash with initrd */ 383 /* avoid clash with initrd */
385 reserve_early(kva_start_pfn<<PAGE_SHIFT, 384 memblock_x86_reserve_range(kva_start_pfn<<PAGE_SHIFT,
386 (kva_start_pfn + kva_pages)<<PAGE_SHIFT, 385 (kva_start_pfn + kva_pages)<<PAGE_SHIFT,
387 "KVA PG"); 386 "KVA PG");
388#ifdef CONFIG_HIGHMEM 387#ifdef CONFIG_HIGHMEM
@@ -419,9 +418,6 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
419 for_each_online_node(nid) { 418 for_each_online_node(nid) {
420 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 419 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
421 NODE_DATA(nid)->node_id = nid; 420 NODE_DATA(nid)->node_id = nid;
422#ifndef CONFIG_NO_BOOTMEM
423 NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
424#endif
425 } 421 }
426 422
427 setup_bootmem_allocator(); 423 setup_bootmem_allocator();
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a7bcc23ef96c..60f498511dd6 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -7,6 +7,7 @@
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/bootmem.h> 9#include <linux/bootmem.h>
10#include <linux/memblock.h>
10#include <linux/mmzone.h> 11#include <linux/mmzone.h>
11#include <linux/ctype.h> 12#include <linux/ctype.h>
12#include <linux/module.h> 13#include <linux/module.h>
@@ -18,7 +19,7 @@
18#include <asm/dma.h> 19#include <asm/dma.h>
19#include <asm/numa.h> 20#include <asm/numa.h>
20#include <asm/acpi.h> 21#include <asm/acpi.h>
21#include <asm/k8.h> 22#include <asm/amd_nb.h>
22 23
23struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 24struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
24EXPORT_SYMBOL(node_data); 25EXPORT_SYMBOL(node_data);
@@ -86,16 +87,16 @@ static int __init allocate_cachealigned_memnodemap(void)
86 87
87 addr = 0x8000; 88 addr = 0x8000;
88 nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); 89 nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
89 nodemap_addr = find_e820_area(addr, max_pfn<<PAGE_SHIFT, 90 nodemap_addr = memblock_find_in_range(addr, max_pfn<<PAGE_SHIFT,
90 nodemap_size, L1_CACHE_BYTES); 91 nodemap_size, L1_CACHE_BYTES);
91 if (nodemap_addr == -1UL) { 92 if (nodemap_addr == MEMBLOCK_ERROR) {
92 printk(KERN_ERR 93 printk(KERN_ERR
93 "NUMA: Unable to allocate Memory to Node hash map\n"); 94 "NUMA: Unable to allocate Memory to Node hash map\n");
94 nodemap_addr = nodemap_size = 0; 95 nodemap_addr = nodemap_size = 0;
95 return -1; 96 return -1;
96 } 97 }
97 memnodemap = phys_to_virt(nodemap_addr); 98 memnodemap = phys_to_virt(nodemap_addr);
98 reserve_early(nodemap_addr, nodemap_addr + nodemap_size, "MEMNODEMAP"); 99 memblock_x86_reserve_range(nodemap_addr, nodemap_addr + nodemap_size, "MEMNODEMAP");
99 100
100 printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", 101 printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
101 nodemap_addr, nodemap_addr + nodemap_size); 102 nodemap_addr, nodemap_addr + nodemap_size);
@@ -171,8 +172,8 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
171 if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) && 172 if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) &&
172 end > (MAX_DMA32_PFN<<PAGE_SHIFT)) 173 end > (MAX_DMA32_PFN<<PAGE_SHIFT))
173 start = MAX_DMA32_PFN<<PAGE_SHIFT; 174 start = MAX_DMA32_PFN<<PAGE_SHIFT;
174 mem = find_e820_area(start, end, size, align); 175 mem = memblock_x86_find_in_range_node(nodeid, start, end, size, align);
175 if (mem != -1L) 176 if (mem != MEMBLOCK_ERROR)
176 return __va(mem); 177 return __va(mem);
177 178
178 /* extend the search scope */ 179 /* extend the search scope */
@@ -181,8 +182,8 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
181 start = MAX_DMA32_PFN<<PAGE_SHIFT; 182 start = MAX_DMA32_PFN<<PAGE_SHIFT;
182 else 183 else
183 start = MAX_DMA_PFN<<PAGE_SHIFT; 184 start = MAX_DMA_PFN<<PAGE_SHIFT;
184 mem = find_e820_area(start, end, size, align); 185 mem = memblock_x86_find_in_range_node(nodeid, start, end, size, align);
185 if (mem != -1L) 186 if (mem != MEMBLOCK_ERROR)
186 return __va(mem); 187 return __va(mem);
187 188
188 printk(KERN_ERR "Cannot find %lu bytes in node %d\n", 189 printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
@@ -198,10 +199,6 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
198 unsigned long start_pfn, last_pfn, nodedata_phys; 199 unsigned long start_pfn, last_pfn, nodedata_phys;
199 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 200 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
200 int nid; 201 int nid;
201#ifndef CONFIG_NO_BOOTMEM
202 unsigned long bootmap_start, bootmap_pages, bootmap_size;
203 void *bootmap;
204#endif
205 202
206 if (!end) 203 if (!end)
207 return; 204 return;
@@ -226,7 +223,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
226 if (node_data[nodeid] == NULL) 223 if (node_data[nodeid] == NULL)
227 return; 224 return;
228 nodedata_phys = __pa(node_data[nodeid]); 225 nodedata_phys = __pa(node_data[nodeid]);
229 reserve_early(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA"); 226 memblock_x86_reserve_range(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA");
230 printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, 227 printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
231 nodedata_phys + pgdat_size - 1); 228 nodedata_phys + pgdat_size - 1);
232 nid = phys_to_nid(nodedata_phys); 229 nid = phys_to_nid(nodedata_phys);
@@ -238,47 +235,6 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
238 NODE_DATA(nodeid)->node_start_pfn = start_pfn; 235 NODE_DATA(nodeid)->node_start_pfn = start_pfn;
239 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; 236 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
240 237
241#ifndef CONFIG_NO_BOOTMEM
242 NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
243
244 /*
245 * Find a place for the bootmem map
246 * nodedata_phys could be on other nodes by alloc_bootmem,
247 * so need to sure bootmap_start not to be small, otherwise
248 * early_node_mem will get that with find_e820_area instead
249 * of alloc_bootmem, that could clash with reserved range
250 */
251 bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn);
252 bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE);
253 /*
254 * SMP_CACHE_BYTES could be enough, but init_bootmem_node like
255 * to use that to align to PAGE_SIZE
256 */
257 bootmap = early_node_mem(nodeid, bootmap_start, end,
258 bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
259 if (bootmap == NULL) {
260 free_early(nodedata_phys, nodedata_phys + pgdat_size);
261 node_data[nodeid] = NULL;
262 return;
263 }
264 bootmap_start = __pa(bootmap);
265 reserve_early(bootmap_start, bootmap_start+(bootmap_pages<<PAGE_SHIFT),
266 "BOOTMAP");
267
268 bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
269 bootmap_start >> PAGE_SHIFT,
270 start_pfn, last_pfn);
271
272 printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n",
273 bootmap_start, bootmap_start + bootmap_size - 1,
274 bootmap_pages);
275 nid = phys_to_nid(bootmap_start);
276 if (nid != nodeid)
277 printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);
278
279 free_bootmem_with_active_regions(nodeid, end);
280#endif
281
282 node_set_online(nodeid); 238 node_set_online(nodeid);
283} 239}
284 240
@@ -416,7 +372,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
416 nr_nodes = MAX_NUMNODES; 372 nr_nodes = MAX_NUMNODES;
417 } 373 }
418 374
419 size = (max_addr - addr - e820_hole_size(addr, max_addr)) / nr_nodes; 375 size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes;
420 /* 376 /*
421 * Calculate the number of big nodes that can be allocated as a result 377 * Calculate the number of big nodes that can be allocated as a result
422 * of consolidating the remainder. 378 * of consolidating the remainder.
@@ -452,7 +408,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
452 * non-reserved memory is less than the per-node size. 408 * non-reserved memory is less than the per-node size.
453 */ 409 */
454 while (end - physnodes[i].start - 410 while (end - physnodes[i].start -
455 e820_hole_size(physnodes[i].start, end) < size) { 411 memblock_x86_hole_size(physnodes[i].start, end) < size) {
456 end += FAKE_NODE_MIN_SIZE; 412 end += FAKE_NODE_MIN_SIZE;
457 if (end > physnodes[i].end) { 413 if (end > physnodes[i].end) {
458 end = physnodes[i].end; 414 end = physnodes[i].end;
@@ -466,7 +422,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
466 * this one must extend to the boundary. 422 * this one must extend to the boundary.
467 */ 423 */
468 if (end < dma32_end && dma32_end - end - 424 if (end < dma32_end && dma32_end - end -
469 e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) 425 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
470 end = dma32_end; 426 end = dma32_end;
471 427
472 /* 428 /*
@@ -475,7 +431,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
475 * physical node. 431 * physical node.
476 */ 432 */
477 if (physnodes[i].end - end - 433 if (physnodes[i].end - end -
478 e820_hole_size(end, physnodes[i].end) < size) 434 memblock_x86_hole_size(end, physnodes[i].end) < size)
479 end = physnodes[i].end; 435 end = physnodes[i].end;
480 436
481 /* 437 /*
@@ -503,7 +459,7 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
503{ 459{
504 u64 end = start + size; 460 u64 end = start + size;
505 461
506 while (end - start - e820_hole_size(start, end) < size) { 462 while (end - start - memblock_x86_hole_size(start, end) < size) {
507 end += FAKE_NODE_MIN_SIZE; 463 end += FAKE_NODE_MIN_SIZE;
508 if (end > max_addr) { 464 if (end > max_addr) {
509 end = max_addr; 465 end = max_addr;
@@ -532,7 +488,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
532 * creates a uniform distribution of node sizes across the entire 488 * creates a uniform distribution of node sizes across the entire
533 * machine (but not necessarily over physical nodes). 489 * machine (but not necessarily over physical nodes).
534 */ 490 */
535 min_size = (max_addr - addr - e820_hole_size(addr, max_addr)) / 491 min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) /
536 MAX_NUMNODES; 492 MAX_NUMNODES;
537 min_size = max(min_size, FAKE_NODE_MIN_SIZE); 493 min_size = max(min_size, FAKE_NODE_MIN_SIZE);
538 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) 494 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
@@ -565,7 +521,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
565 * this one must extend to the boundary. 521 * this one must extend to the boundary.
566 */ 522 */
567 if (end < dma32_end && dma32_end - end - 523 if (end < dma32_end && dma32_end - end -
568 e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) 524 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
569 end = dma32_end; 525 end = dma32_end;
570 526
571 /* 527 /*
@@ -574,7 +530,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
574 * physical node. 530 * physical node.
575 */ 531 */
576 if (physnodes[i].end - end - 532 if (physnodes[i].end - end -
577 e820_hole_size(end, physnodes[i].end) < size) 533 memblock_x86_hole_size(end, physnodes[i].end) < size)
578 end = physnodes[i].end; 534 end = physnodes[i].end;
579 535
580 /* 536 /*
@@ -638,7 +594,7 @@ static int __init numa_emulation(unsigned long start_pfn,
638 */ 594 */
639 remove_all_active_ranges(); 595 remove_all_active_ranges();
640 for_each_node_mask(i, node_possible_map) { 596 for_each_node_mask(i, node_possible_map) {
641 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, 597 memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
642 nodes[i].end >> PAGE_SHIFT); 598 nodes[i].end >> PAGE_SHIFT);
643 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 599 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
644 } 600 }
@@ -691,7 +647,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
691 node_set(0, node_possible_map); 647 node_set(0, node_possible_map);
692 for (i = 0; i < nr_cpu_ids; i++) 648 for (i = 0; i < nr_cpu_ids; i++)
693 numa_set_node(i, 0); 649 numa_set_node(i, 0);
694 e820_register_active_regions(0, start_pfn, last_pfn); 650 memblock_x86_register_active_regions(0, start_pfn, last_pfn);
695 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); 651 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
696} 652}
697 653
@@ -703,9 +659,7 @@ unsigned long __init numa_free_all_bootmem(void)
703 for_each_online_node(i) 659 for_each_online_node(i)
704 pages += free_all_bootmem_node(NODE_DATA(i)); 660 pages += free_all_bootmem_node(NODE_DATA(i));
705 661
706#ifdef CONFIG_NO_BOOTMEM
707 pages += free_all_memory_core_early(MAX_NUMNODES); 662 pages += free_all_memory_core_early(MAX_NUMNODES);
708#endif
709 663
710 return pages; 664 return pages;
711} 665}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 5c4ee422590e..8be8c7d7bc89 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -87,7 +87,19 @@ static inline void pgd_list_del(pgd_t *pgd)
87#define UNSHARED_PTRS_PER_PGD \ 87#define UNSHARED_PTRS_PER_PGD \
88 (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) 88 (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
89 89
90static void pgd_ctor(pgd_t *pgd) 90
91static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
92{
93 BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
94 virt_to_page(pgd)->index = (pgoff_t)mm;
95}
96
97struct mm_struct *pgd_page_get_mm(struct page *page)
98{
99 return (struct mm_struct *)page->index;
100}
101
102static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
91{ 103{
92 /* If the pgd points to a shared pagetable level (either the 104 /* If the pgd points to a shared pagetable level (either the
93 ptes in non-PAE, or shared PMD in PAE), then just copy the 105 ptes in non-PAE, or shared PMD in PAE), then just copy the
@@ -98,15 +110,13 @@ static void pgd_ctor(pgd_t *pgd)
98 clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, 110 clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
99 swapper_pg_dir + KERNEL_PGD_BOUNDARY, 111 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
100 KERNEL_PGD_PTRS); 112 KERNEL_PGD_PTRS);
101 paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
102 __pa(swapper_pg_dir) >> PAGE_SHIFT,
103 KERNEL_PGD_BOUNDARY,
104 KERNEL_PGD_PTRS);
105 } 113 }
106 114
107 /* list required to sync kernel mapping updates */ 115 /* list required to sync kernel mapping updates */
108 if (!SHARED_KERNEL_PMD) 116 if (!SHARED_KERNEL_PMD) {
117 pgd_set_mm(pgd, mm);
109 pgd_list_add(pgd); 118 pgd_list_add(pgd);
119 }
110} 120}
111 121
112static void pgd_dtor(pgd_t *pgd) 122static void pgd_dtor(pgd_t *pgd)
@@ -272,7 +282,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
272 */ 282 */
273 spin_lock_irqsave(&pgd_lock, flags); 283 spin_lock_irqsave(&pgd_lock, flags);
274 284
275 pgd_ctor(pgd); 285 pgd_ctor(mm, pgd);
276 pgd_prepopulate_pmd(mm, pgd, pmds); 286 pgd_prepopulate_pmd(mm, pgd, pmds);
277 287
278 spin_unlock_irqrestore(&pgd_lock, flags); 288 spin_unlock_irqrestore(&pgd_lock, flags);
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 9324f13492d5..a17dffd136c1 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -25,6 +25,7 @@
25 */ 25 */
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/bootmem.h> 27#include <linux/bootmem.h>
28#include <linux/memblock.h>
28#include <linux/mmzone.h> 29#include <linux/mmzone.h>
29#include <linux/acpi.h> 30#include <linux/acpi.h>
30#include <linux/nodemask.h> 31#include <linux/nodemask.h>
@@ -264,7 +265,7 @@ int __init get_memcfg_from_srat(void)
264 if (node_read_chunk(chunk->nid, chunk)) 265 if (node_read_chunk(chunk->nid, chunk))
265 continue; 266 continue;
266 267
267 e820_register_active_regions(chunk->nid, chunk->start_pfn, 268 memblock_x86_register_active_regions(chunk->nid, chunk->start_pfn,
268 min(chunk->end_pfn, max_pfn)); 269 min(chunk->end_pfn, max_pfn));
269 } 270 }
270 /* for out of order entries in SRAT */ 271 /* for out of order entries in SRAT */
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 9c0d0d399c30..a35cb9d8b060 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -16,6 +16,7 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/topology.h> 17#include <linux/topology.h>
18#include <linux/bootmem.h> 18#include <linux/bootmem.h>
19#include <linux/memblock.h>
19#include <linux/mm.h> 20#include <linux/mm.h>
20#include <asm/proto.h> 21#include <asm/proto.h>
21#include <asm/numa.h> 22#include <asm/numa.h>
@@ -98,15 +99,15 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
98 unsigned long phys; 99 unsigned long phys;
99 100
100 length = slit->header.length; 101 length = slit->header.length;
101 phys = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, length, 102 phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length,
102 PAGE_SIZE); 103 PAGE_SIZE);
103 104
104 if (phys == -1L) 105 if (phys == MEMBLOCK_ERROR)
105 panic(" Can not save slit!\n"); 106 panic(" Can not save slit!\n");
106 107
107 acpi_slit = __va(phys); 108 acpi_slit = __va(phys);
108 memcpy(acpi_slit, slit, length); 109 memcpy(acpi_slit, slit, length);
109 reserve_early(phys, phys + length, "ACPI SLIT"); 110 memblock_x86_reserve_range(phys, phys + length, "ACPI SLIT");
110} 111}
111 112
112/* Callback for Proximity Domain -> x2APIC mapping */ 113/* Callback for Proximity Domain -> x2APIC mapping */
@@ -324,7 +325,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
324 pxmram = 0; 325 pxmram = 0;
325 } 326 }
326 327
327 e820ram = max_pfn - (e820_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT); 328 e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
328 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ 329 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
329 if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) { 330 if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
330 printk(KERN_ERR 331 printk(KERN_ERR
@@ -421,7 +422,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
421 } 422 }
422 423
423 for (i = 0; i < num_node_memblks; i++) 424 for (i = 0; i < num_node_memblks; i++)
424 e820_register_active_regions(memblk_nodeid[i], 425 memblock_x86_register_active_regions(memblk_nodeid[i],
425 node_memblk_range[i].start >> PAGE_SHIFT, 426 node_memblk_range[i].start >> PAGE_SHIFT,
426 node_memblk_range[i].end >> PAGE_SHIFT); 427 node_memblk_range[i].end >> PAGE_SHIFT);
427 428
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index c03f14ab6667..49358481c733 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -5,6 +5,7 @@
5#include <linux/smp.h> 5#include <linux/smp.h>
6#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/cpu.h>
8 9
9#include <asm/tlbflush.h> 10#include <asm/tlbflush.h>
10#include <asm/mmu_context.h> 11#include <asm/mmu_context.h>
@@ -52,6 +53,8 @@ union smp_flush_state {
52 want false sharing in the per cpu data segment. */ 53 want false sharing in the per cpu data segment. */
53static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; 54static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
54 55
56static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
57
55/* 58/*
56 * We cannot call mmdrop() because we are in interrupt context, 59 * We cannot call mmdrop() because we are in interrupt context,
57 * instead update mm->cpu_vm_mask. 60 * instead update mm->cpu_vm_mask.
@@ -173,7 +176,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
173 union smp_flush_state *f; 176 union smp_flush_state *f;
174 177
175 /* Caller has disabled preemption */ 178 /* Caller has disabled preemption */
176 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; 179 sender = this_cpu_read(tlb_vector_offset);
177 f = &flush_state[sender]; 180 f = &flush_state[sender];
178 181
179 /* 182 /*
@@ -218,6 +221,47 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
218 flush_tlb_others_ipi(cpumask, mm, va); 221 flush_tlb_others_ipi(cpumask, mm, va);
219} 222}
220 223
224static void __cpuinit calculate_tlb_offset(void)
225{
226 int cpu, node, nr_node_vecs;
227 /*
228 * we are changing tlb_vector_offset for each CPU in runtime, but this
229 * will not cause inconsistency, as the write is atomic under X86. we
230 * might see more lock contentions in a short time, but after all CPU's
231 * tlb_vector_offset are changed, everything should go normal
232 *
233 * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might
234 * waste some vectors.
235 **/
236 if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS)
237 nr_node_vecs = 1;
238 else
239 nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
240
241 for_each_online_node(node) {
242 int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) *
243 nr_node_vecs;
244 int cpu_offset = 0;
245 for_each_cpu(cpu, cpumask_of_node(node)) {
246 per_cpu(tlb_vector_offset, cpu) = node_offset +
247 cpu_offset;
248 cpu_offset++;
249 cpu_offset = cpu_offset % nr_node_vecs;
250 }
251 }
252}
253
254static int tlb_cpuhp_notify(struct notifier_block *n,
255 unsigned long action, void *hcpu)
256{
257 switch (action & 0xf) {
258 case CPU_ONLINE:
259 case CPU_DEAD:
260 calculate_tlb_offset();
261 }
262 return NOTIFY_OK;
263}
264
221static int __cpuinit init_smp_flush(void) 265static int __cpuinit init_smp_flush(void)
222{ 266{
223 int i; 267 int i;
@@ -225,6 +269,8 @@ static int __cpuinit init_smp_flush(void)
225 for (i = 0; i < ARRAY_SIZE(flush_state); i++) 269 for (i = 0; i < ARRAY_SIZE(flush_state); i++)
226 raw_spin_lock_init(&flush_state[i].tlbstate_lock); 270 raw_spin_lock_init(&flush_state[i].tlbstate_lock);
227 271
272 calculate_tlb_offset();
273 hotcpu_notifier(tlb_cpuhp_notify, 0);
228 return 0; 274 return 0;
229} 275}
230core_initcall(init_smp_flush); 276core_initcall(init_smp_flush);
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 3855096c59b8..2d49d4e19a36 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -14,6 +14,7 @@
14#include <asm/ptrace.h> 14#include <asm/ptrace.h>
15#include <asm/uaccess.h> 15#include <asm/uaccess.h>
16#include <asm/stacktrace.h> 16#include <asm/stacktrace.h>
17#include <linux/compat.h>
17 18
18static void backtrace_warning_symbol(void *data, char *msg, 19static void backtrace_warning_symbol(void *data, char *msg,
19 unsigned long symbol) 20 unsigned long symbol)
@@ -48,14 +49,12 @@ static struct stacktrace_ops backtrace_ops = {
48 .walk_stack = print_context_stack, 49 .walk_stack = print_context_stack,
49}; 50};
50 51
51struct frame_head { 52#ifdef CONFIG_COMPAT
52 struct frame_head *bp; 53static struct stack_frame_ia32 *
53 unsigned long ret; 54dump_user_backtrace_32(struct stack_frame_ia32 *head)
54} __attribute__((packed));
55
56static struct frame_head *dump_user_backtrace(struct frame_head *head)
57{ 55{
58 struct frame_head bufhead[2]; 56 struct stack_frame_ia32 bufhead[2];
57 struct stack_frame_ia32 *fp;
59 58
60 /* Also check accessibility of one struct frame_head beyond */ 59 /* Also check accessibility of one struct frame_head beyond */
61 if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) 60 if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
@@ -63,20 +62,66 @@ static struct frame_head *dump_user_backtrace(struct frame_head *head)
63 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) 62 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
64 return NULL; 63 return NULL;
65 64
66 oprofile_add_trace(bufhead[0].ret); 65 fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
66
67 oprofile_add_trace(bufhead[0].return_address);
68
69 /* frame pointers should strictly progress back up the stack
70 * (towards higher addresses) */
71 if (head >= fp)
72 return NULL;
73
74 return fp;
75}
76
77static inline int
78x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
79{
80 struct stack_frame_ia32 *head;
81
82 /* User process is 32-bit */
83 if (!current || !test_thread_flag(TIF_IA32))
84 return 0;
85
86 head = (struct stack_frame_ia32 *) regs->bp;
87 while (depth-- && head)
88 head = dump_user_backtrace_32(head);
89
90 return 1;
91}
92
93#else
94static inline int
95x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
96{
97 return 0;
98}
99#endif /* CONFIG_COMPAT */
100
101static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
102{
103 struct stack_frame bufhead[2];
104
105 /* Also check accessibility of one struct stack_frame beyond */
106 if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
107 return NULL;
108 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
109 return NULL;
110
111 oprofile_add_trace(bufhead[0].return_address);
67 112
68 /* frame pointers should strictly progress back up the stack 113 /* frame pointers should strictly progress back up the stack
69 * (towards higher addresses) */ 114 * (towards higher addresses) */
70 if (head >= bufhead[0].bp) 115 if (head >= bufhead[0].next_frame)
71 return NULL; 116 return NULL;
72 117
73 return bufhead[0].bp; 118 return bufhead[0].next_frame;
74} 119}
75 120
76void 121void
77x86_backtrace(struct pt_regs * const regs, unsigned int depth) 122x86_backtrace(struct pt_regs * const regs, unsigned int depth)
78{ 123{
79 struct frame_head *head = (struct frame_head *)frame_pointer(regs); 124 struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
80 125
81 if (!user_mode_vm(regs)) { 126 if (!user_mode_vm(regs)) {
82 unsigned long stack = kernel_stack_pointer(regs); 127 unsigned long stack = kernel_stack_pointer(regs);
@@ -86,6 +131,9 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
86 return; 131 return;
87 } 132 }
88 133
134 if (x86_backtrace_32(regs, depth))
135 return;
136
89 while (depth-- && head) 137 while (depth-- && head)
90 head = dump_user_backtrace(head); 138 head = dump_user_backtrace(head);
91} 139}
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index f1575c9a2572..bd1489c3ce09 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -695,9 +695,6 @@ static int __init ppro_init(char **cpu_type)
695 return 1; 695 return 1;
696} 696}
697 697
698/* in order to get sysfs right */
699static int using_nmi;
700
701int __init op_nmi_init(struct oprofile_operations *ops) 698int __init op_nmi_init(struct oprofile_operations *ops)
702{ 699{
703 __u8 vendor = boot_cpu_data.x86_vendor; 700 __u8 vendor = boot_cpu_data.x86_vendor;
@@ -705,8 +702,6 @@ int __init op_nmi_init(struct oprofile_operations *ops)
705 char *cpu_type = NULL; 702 char *cpu_type = NULL;
706 int ret = 0; 703 int ret = 0;
707 704
708 using_nmi = 0;
709
710 if (!cpu_has_apic) 705 if (!cpu_has_apic)
711 return -ENODEV; 706 return -ENODEV;
712 707
@@ -790,13 +785,11 @@ int __init op_nmi_init(struct oprofile_operations *ops)
790 if (ret) 785 if (ret)
791 return ret; 786 return ret;
792 787
793 using_nmi = 1;
794 printk(KERN_INFO "oprofile: using NMI interrupt.\n"); 788 printk(KERN_INFO "oprofile: using NMI interrupt.\n");
795 return 0; 789 return 0;
796} 790}
797 791
798void op_nmi_exit(void) 792void op_nmi_exit(void)
799{ 793{
800 if (using_nmi) 794 exit_sysfs();
801 exit_sysfs();
802} 795}
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index b67a6b5aa8d4..42fb46f83883 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -64,15 +64,22 @@ static u64 ibs_op_ctl;
64 * IBS cpuid feature detection 64 * IBS cpuid feature detection
65 */ 65 */
66 66
67#define IBS_CPUID_FEATURES 0x8000001b 67#define IBS_CPUID_FEATURES 0x8000001b
68 68
69/* 69/*
70 * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but 70 * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
71 * bit 0 is used to indicate the existence of IBS. 71 * bit 0 is used to indicate the existence of IBS.
72 */ 72 */
73#define IBS_CAPS_AVAIL (1LL<<0) 73#define IBS_CAPS_AVAIL (1U<<0)
74#define IBS_CAPS_RDWROPCNT (1LL<<3) 74#define IBS_CAPS_RDWROPCNT (1U<<3)
75#define IBS_CAPS_OPCNT (1LL<<4) 75#define IBS_CAPS_OPCNT (1U<<4)
76
77/*
78 * IBS APIC setup
79 */
80#define IBSCTL 0x1cc
81#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8)
82#define IBSCTL_LVT_OFFSET_MASK 0x0F
76 83
77/* 84/*
78 * IBS randomization macros 85 * IBS randomization macros
@@ -266,6 +273,74 @@ static void op_amd_stop_ibs(void)
266 wrmsrl(MSR_AMD64_IBSOPCTL, 0); 273 wrmsrl(MSR_AMD64_IBSOPCTL, 0);
267} 274}
268 275
276static inline int eilvt_is_available(int offset)
277{
278 /* check if we may assign a vector */
279 return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
280}
281
282static inline int ibs_eilvt_valid(void)
283{
284 u64 val;
285 int offset;
286
287 rdmsrl(MSR_AMD64_IBSCTL, val);
288 if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
289 pr_err(FW_BUG "cpu %d, invalid IBS "
290 "interrupt offset %d (MSR%08X=0x%016llx)",
291 smp_processor_id(), offset,
292 MSR_AMD64_IBSCTL, val);
293 return 0;
294 }
295
296 offset = val & IBSCTL_LVT_OFFSET_MASK;
297
298 if (eilvt_is_available(offset))
299 return !0;
300
301 pr_err(FW_BUG "cpu %d, IBS interrupt offset %d "
302 "not available (MSR%08X=0x%016llx)",
303 smp_processor_id(), offset,
304 MSR_AMD64_IBSCTL, val);
305
306 return 0;
307}
308
309static inline int get_ibs_offset(void)
310{
311 u64 val;
312
313 rdmsrl(MSR_AMD64_IBSCTL, val);
314 if (!(val & IBSCTL_LVT_OFFSET_VALID))
315 return -EINVAL;
316
317 return val & IBSCTL_LVT_OFFSET_MASK;
318}
319
320static void setup_APIC_ibs(void)
321{
322 int offset;
323
324 offset = get_ibs_offset();
325 if (offset < 0)
326 goto failed;
327
328 if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
329 return;
330failed:
331 pr_warn("oprofile: IBS APIC setup failed on cpu #%d\n",
332 smp_processor_id());
333}
334
335static void clear_APIC_ibs(void)
336{
337 int offset;
338
339 offset = get_ibs_offset();
340 if (offset >= 0)
341 setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
342}
343
269#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX 344#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
270 345
271static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, 346static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
@@ -376,13 +451,13 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
376 } 451 }
377 452
378 if (ibs_caps) 453 if (ibs_caps)
379 setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); 454 setup_APIC_ibs();
380} 455}
381 456
382static void op_amd_cpu_shutdown(void) 457static void op_amd_cpu_shutdown(void)
383{ 458{
384 if (ibs_caps) 459 if (ibs_caps)
385 setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); 460 clear_APIC_ibs();
386} 461}
387 462
388static int op_amd_check_ctrs(struct pt_regs * const regs, 463static int op_amd_check_ctrs(struct pt_regs * const regs,
@@ -445,16 +520,11 @@ static void op_amd_stop(struct op_msrs const * const msrs)
445 op_amd_stop_ibs(); 520 op_amd_stop_ibs();
446} 521}
447 522
448static int __init_ibs_nmi(void) 523static int setup_ibs_ctl(int ibs_eilvt_off)
449{ 524{
450#define IBSCTL_LVTOFFSETVAL (1 << 8)
451#define IBSCTL 0x1cc
452 struct pci_dev *cpu_cfg; 525 struct pci_dev *cpu_cfg;
453 int nodes; 526 int nodes;
454 u32 value = 0; 527 u32 value = 0;
455 u8 ibs_eilvt_off;
456
457 ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1);
458 528
459 nodes = 0; 529 nodes = 0;
460 cpu_cfg = NULL; 530 cpu_cfg = NULL;
@@ -466,21 +536,60 @@ static int __init_ibs_nmi(void)
466 break; 536 break;
467 ++nodes; 537 ++nodes;
468 pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off 538 pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
469 | IBSCTL_LVTOFFSETVAL); 539 | IBSCTL_LVT_OFFSET_VALID);
470 pci_read_config_dword(cpu_cfg, IBSCTL, &value); 540 pci_read_config_dword(cpu_cfg, IBSCTL, &value);
471 if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) { 541 if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
472 pci_dev_put(cpu_cfg); 542 pci_dev_put(cpu_cfg);
473 printk(KERN_DEBUG "Failed to setup IBS LVT offset, " 543 printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
474 "IBSCTL = 0x%08x", value); 544 "IBSCTL = 0x%08x\n", value);
475 return 1; 545 return -EINVAL;
476 } 546 }
477 } while (1); 547 } while (1);
478 548
479 if (!nodes) { 549 if (!nodes) {
480 printk(KERN_DEBUG "No CPU node configured for IBS"); 550 printk(KERN_DEBUG "No CPU node configured for IBS\n");
481 return 1; 551 return -ENODEV;
552 }
553
554 return 0;
555}
556
557static int force_ibs_eilvt_setup(void)
558{
559 int i;
560 int ret;
561
562 /* find the next free available EILVT entry */
563 for (i = 1; i < 4; i++) {
564 if (!eilvt_is_available(i))
565 continue;
566 ret = setup_ibs_ctl(i);
567 if (ret)
568 return ret;
569 return 0;
482 } 570 }
483 571
572 printk(KERN_DEBUG "No EILVT entry available\n");
573
574 return -EBUSY;
575}
576
577static int __init_ibs_nmi(void)
578{
579 int ret;
580
581 if (ibs_eilvt_valid())
582 return 0;
583
584 ret = force_ibs_eilvt_setup();
585 if (ret)
586 return ret;
587
588 if (!ibs_eilvt_valid())
589 return -EFAULT;
590
591 pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
592
484 return 0; 593 return 0;
485} 594}
486 595
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c
index b34815408f58..13700ec8e2e4 100644
--- a/arch/x86/pci/olpc.c
+++ b/arch/x86/pci/olpc.c
@@ -304,7 +304,7 @@ static struct pci_raw_ops pci_olpc_conf = {
304 304
305int __init pci_olpc_init(void) 305int __init pci_olpc_init(void)
306{ 306{
307 printk(KERN_INFO "PCI: Using configuration type OLPC\n"); 307 printk(KERN_INFO "PCI: Using configuration type OLPC XO-1\n");
308 raw_pci_ops = &pci_olpc_conf; 308 raw_pci_ops = &pci_olpc_conf;
309 is_lx = is_geode_lx(); 309 is_lx = is_geode_lx();
310 return 0; 310 return 0;
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index 1304bcec8ee5..7c0fedd98ea0 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -106,6 +106,7 @@ static const struct file_operations u32_array_fops = {
106 .open = u32_array_open, 106 .open = u32_array_open,
107 .release= xen_array_release, 107 .release= xen_array_release,
108 .read = u32_array_read, 108 .read = u32_array_read,
109 .llseek = no_llseek,
109}; 110};
110 111
111struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode, 112struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode,
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 7d46c8441418..63b83ceebd1a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -30,6 +30,7 @@
30#include <linux/console.h> 30#include <linux/console.h>
31#include <linux/pci.h> 31#include <linux/pci.h>
32#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <linux/memblock.h>
33 34
34#include <xen/xen.h> 35#include <xen/xen.h>
35#include <xen/interface/xen.h> 36#include <xen/interface/xen.h>
@@ -1183,6 +1184,8 @@ asmlinkage void __init xen_start_kernel(void)
1183 local_irq_disable(); 1184 local_irq_disable();
1184 early_boot_irqs_off(); 1185 early_boot_irqs_off();
1185 1186
1187 memblock_init();
1188
1186 xen_raw_console_write("mapping kernel into physical memory\n"); 1189 xen_raw_console_write("mapping kernel into physical memory\n");
1187 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); 1190 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
1188 1191
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 42086ac406af..f72d18c69221 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -45,6 +45,7 @@
45#include <linux/vmalloc.h> 45#include <linux/vmalloc.h>
46#include <linux/module.h> 46#include <linux/module.h>
47#include <linux/gfp.h> 47#include <linux/gfp.h>
48#include <linux/memblock.h>
48 49
49#include <asm/pgtable.h> 50#include <asm/pgtable.h>
50#include <asm/tlbflush.h> 51#include <asm/tlbflush.h>
@@ -55,6 +56,7 @@
55#include <asm/e820.h> 56#include <asm/e820.h>
56#include <asm/linkage.h> 57#include <asm/linkage.h>
57#include <asm/page.h> 58#include <asm/page.h>
59#include <asm/init.h>
58 60
59#include <asm/xen/hypercall.h> 61#include <asm/xen/hypercall.h>
60#include <asm/xen/hypervisor.h> 62#include <asm/xen/hypervisor.h>
@@ -359,7 +361,8 @@ void make_lowmem_page_readonly(void *vaddr)
359 unsigned int level; 361 unsigned int level;
360 362
361 pte = lookup_address(address, &level); 363 pte = lookup_address(address, &level);
362 BUG_ON(pte == NULL); 364 if (pte == NULL)
365 return; /* vaddr missing */
363 366
364 ptev = pte_wrprotect(*pte); 367 ptev = pte_wrprotect(*pte);
365 368
@@ -374,7 +377,8 @@ void make_lowmem_page_readwrite(void *vaddr)
374 unsigned int level; 377 unsigned int level;
375 378
376 pte = lookup_address(address, &level); 379 pte = lookup_address(address, &level);
377 BUG_ON(pte == NULL); 380 if (pte == NULL)
381 return; /* vaddr missing */
378 382
379 ptev = pte_mkwrite(*pte); 383 ptev = pte_mkwrite(*pte);
380 384
@@ -1508,13 +1512,25 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
1508#endif 1512#endif
1509} 1513}
1510 1514
1511#ifdef CONFIG_X86_32
1512static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) 1515static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
1513{ 1516{
1517 unsigned long pfn = pte_pfn(pte);
1518
1519#ifdef CONFIG_X86_32
1514 /* If there's an existing pte, then don't allow _PAGE_RW to be set */ 1520 /* If there's an existing pte, then don't allow _PAGE_RW to be set */
1515 if (pte_val_ma(*ptep) & _PAGE_PRESENT) 1521 if (pte_val_ma(*ptep) & _PAGE_PRESENT)
1516 pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & 1522 pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
1517 pte_val_ma(pte)); 1523 pte_val_ma(pte));
1524#endif
1525
1526 /*
1527 * If the new pfn is within the range of the newly allocated
1528 * kernel pagetable, and it isn't being mapped into an
1529 * early_ioremap fixmap slot, make sure it is RO.
1530 */
1531 if (!is_early_ioremap_ptep(ptep) &&
1532 pfn >= e820_table_start && pfn < e820_table_end)
1533 pte = pte_wrprotect(pte);
1518 1534
1519 return pte; 1535 return pte;
1520} 1536}
@@ -1527,7 +1543,6 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
1527 1543
1528 xen_set_pte(ptep, pte); 1544 xen_set_pte(ptep, pte);
1529} 1545}
1530#endif
1531 1546
1532static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) 1547static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1533{ 1548{
@@ -1814,7 +1829,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
1814 __xen_write_cr3(true, __pa(pgd)); 1829 __xen_write_cr3(true, __pa(pgd));
1815 xen_mc_issue(PARAVIRT_LAZY_CPU); 1830 xen_mc_issue(PARAVIRT_LAZY_CPU);
1816 1831
1817 reserve_early(__pa(xen_start_info->pt_base), 1832 memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
1818 __pa(xen_start_info->pt_base + 1833 __pa(xen_start_info->pt_base +
1819 xen_start_info->nr_pt_frames * PAGE_SIZE), 1834 xen_start_info->nr_pt_frames * PAGE_SIZE),
1820 "XEN PAGETABLES"); 1835 "XEN PAGETABLES");
@@ -1852,7 +1867,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
1852 1867
1853 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); 1868 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
1854 1869
1855 reserve_early(__pa(xen_start_info->pt_base), 1870 memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
1856 __pa(xen_start_info->pt_base + 1871 __pa(xen_start_info->pt_base +
1857 xen_start_info->nr_pt_frames * PAGE_SIZE), 1872 xen_start_info->nr_pt_frames * PAGE_SIZE),
1858 "XEN PAGETABLES"); 1873 "XEN PAGETABLES");
@@ -1969,14 +1984,9 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1969 .alloc_pte = xen_alloc_pte_init, 1984 .alloc_pte = xen_alloc_pte_init,
1970 .release_pte = xen_release_pte_init, 1985 .release_pte = xen_release_pte_init,
1971 .alloc_pmd = xen_alloc_pmd_init, 1986 .alloc_pmd = xen_alloc_pmd_init,
1972 .alloc_pmd_clone = paravirt_nop,
1973 .release_pmd = xen_release_pmd_init, 1987 .release_pmd = xen_release_pmd_init,
1974 1988
1975#ifdef CONFIG_X86_64
1976 .set_pte = xen_set_pte,
1977#else
1978 .set_pte = xen_set_pte_init, 1989 .set_pte = xen_set_pte_init,
1979#endif
1980 .set_pte_at = xen_set_pte_at, 1990 .set_pte_at = xen_set_pte_at,
1981 .set_pmd = xen_set_pmd_hyper, 1991 .set_pmd = xen_set_pmd_hyper,
1982 1992
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index a013ec9d0c54..22471001b74c 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -5,6 +5,7 @@
5 5
6#include <asm/xen/hypervisor.h> 6#include <asm/xen/hypervisor.h>
7#include <xen/xen.h> 7#include <xen/xen.h>
8#include <asm/iommu_table.h>
8 9
9int xen_swiotlb __read_mostly; 10int xen_swiotlb __read_mostly;
10 11
@@ -56,3 +57,7 @@ void __init pci_xen_swiotlb_init(void)
56 dma_ops = &xen_swiotlb_dma_ops; 57 dma_ops = &xen_swiotlb_dma_ops;
57 } 58 }
58} 59}
60IOMMU_INIT_FINISH(pci_xen_swiotlb_detect,
61 0,
62 pci_xen_swiotlb_init,
63 0);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 328b00305426..9729c903404b 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -8,6 +8,7 @@
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/pm.h> 10#include <linux/pm.h>
11#include <linux/memblock.h>
11 12
12#include <asm/elf.h> 13#include <asm/elf.h>
13#include <asm/vdso.h> 14#include <asm/vdso.h>
@@ -129,7 +130,7 @@ char * __init xen_memory_setup(void)
129 * - xen_start_info 130 * - xen_start_info
130 * See comment above "struct start_info" in <xen/interface/xen.h> 131 * See comment above "struct start_info" in <xen/interface/xen.h>
131 */ 132 */
132 reserve_early(__pa(xen_start_info->mfn_list), 133 memblock_x86_reserve_range(__pa(xen_start_info->mfn_list),
133 __pa(xen_start_info->pt_base), 134 __pa(xen_start_info->pt_base),
134 "XEN START INFO"); 135 "XEN START INFO");
135 136
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index e0500646585d..23e061b9327b 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -224,7 +224,7 @@ static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enab
224 goto out; 224 goto out;
225 } 225 }
226 226
227 flags = __raw_local_save_flags(); 227 flags = arch_local_save_flags();
228 if (irq_enable) { 228 if (irq_enable) {
229 ADD_STATS(taken_slow_irqenable, 1); 229 ADD_STATS(taken_slow_irqenable, 1);
230 raw_local_irq_enable(); 230 raw_local_irq_enable();