aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/feature-removal-schedule.txt12
-rw-r--r--Documentation/filesystems/proc.txt9
-rw-r--r--Documentation/kernel-parameters.txt33
-rw-r--r--Documentation/nmi_watchdog.txt5
-rw-r--r--Documentation/x86/boot.txt6
-rw-r--r--Documentation/x86/pat.txt24
-rw-r--r--Documentation/x86/x86_64/boot-options.txt11
-rw-r--r--Documentation/x86/x86_64/mm.txt2
-rw-r--r--arch/x86/Kconfig84
-rw-r--r--arch/x86/Kconfig.debug20
-rw-r--r--arch/x86/boot/video-vga.c4
-rw-r--r--arch/x86/boot/video.c2
-rw-r--r--arch/x86/configs/i386_defconfig4
-rw-r--r--arch/x86/configs/x86_64_defconfig4
-rw-r--r--arch/x86/ia32/ia32_signal.c109
-rw-r--r--arch/x86/include/asm/apic.h1
-rw-r--r--arch/x86/include/asm/bigsmp/apic.h2
-rw-r--r--arch/x86/include/asm/bitops.h10
-rw-r--r--arch/x86/include/asm/bug.h2
-rw-r--r--arch/x86/include/asm/byteorder.h74
-rw-r--r--arch/x86/include/asm/cpufeature.h5
-rw-r--r--arch/x86/include/asm/dma-mapping.h2
-rw-r--r--arch/x86/include/asm/dwarf2.h97
-rw-r--r--arch/x86/include/asm/emergency-restart.h4
-rw-r--r--arch/x86/include/asm/es7000/apic.h79
-rw-r--r--arch/x86/include/asm/es7000/wakecpu.h41
-rw-r--r--arch/x86/include/asm/gart.h33
-rw-r--r--arch/x86/include/asm/genapic_32.h19
-rw-r--r--arch/x86/include/asm/genapic_64.h2
-rw-r--r--arch/x86/include/asm/hardirq_32.h2
-rw-r--r--arch/x86/include/asm/hardirq_64.h2
-rw-r--r--arch/x86/include/asm/hw_irq.h4
-rw-r--r--arch/x86/include/asm/hypervisor.h26
-rw-r--r--arch/x86/include/asm/ia32.h18
-rw-r--r--arch/x86/include/asm/idle.h5
-rw-r--r--arch/x86/include/asm/io.h37
-rw-r--r--arch/x86/include/asm/io_64.h2
-rw-r--r--arch/x86/include/asm/io_apic.h10
-rw-r--r--arch/x86/include/asm/iommu.h33
-rw-r--r--arch/x86/include/asm/irq.h4
-rw-r--r--arch/x86/include/asm/irq_regs_32.h2
-rw-r--r--arch/x86/include/asm/kexec.h31
-rw-r--r--arch/x86/include/asm/linkage.h60
-rw-r--r--arch/x86/include/asm/mach-default/mach_apic.h2
-rw-r--r--arch/x86/include/asm/mach-default/mach_wakecpu.h24
-rw-r--r--arch/x86/include/asm/mach-default/smpboot_hooks.h8
-rw-r--r--arch/x86/include/asm/mach-generic/mach_apic.h1
-rw-r--r--arch/x86/include/asm/mach-generic/mach_wakecpu.h12
-rw-r--r--arch/x86/include/asm/mmu_context_32.h13
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/msr.h12
-rw-r--r--arch/x86/include/asm/numaq/wakecpu.h24
-rw-r--r--arch/x86/include/asm/pci.h2
-rw-r--r--arch/x86/include/asm/pgtable-2level.h50
-rw-r--r--arch/x86/include/asm/pgtable-3level.h1
-rw-r--r--arch/x86/include/asm/pgtable.h28
-rw-r--r--arch/x86/include/asm/pgtable_32.h9
-rw-r--r--arch/x86/include/asm/pgtable_64.h28
-rw-r--r--arch/x86/include/asm/prctl.h3
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/reboot.h5
-rw-r--r--arch/x86/include/asm/setup.h7
-rw-r--r--arch/x86/include/asm/sigframe.h70
-rw-r--r--arch/x86/include/asm/signal.h6
-rw-r--r--arch/x86/include/asm/sparsemem.h2
-rw-r--r--arch/x86/include/asm/syscalls.h16
-rw-r--r--arch/x86/include/asm/system.h6
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/trampoline.h7
-rw-r--r--arch/x86/include/asm/traps.h11
-rw-r--r--arch/x86/include/asm/tsc.h8
-rw-r--r--arch/x86/include/asm/uaccess.h4
-rw-r--r--arch/x86/include/asm/uv/bios.h34
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h103
-rw-r--r--arch/x86/include/asm/vmware.h27
-rw-r--r--arch/x86/include/asm/xen/hypercall.h6
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h39
-rw-r--r--arch/x86/include/asm/xen/page.h5
-rw-r--r--arch/x86/kernel/Makefile7
-rw-r--r--arch/x86/kernel/acpi/boot.c11
-rw-r--r--arch/x86/kernel/amd_iommu.c1
-rw-r--r--arch/x86/kernel/amd_iommu_init.c1
-rw-r--r--arch/x86/kernel/aperture_64.c5
-rw-r--r--arch/x86/kernel/apic.c140
-rw-r--r--arch/x86/kernel/apm_32.c4
-rw-r--r--arch/x86/kernel/asm-offsets_32.c2
-rw-r--r--arch/x86/kernel/asm-offsets_64.c4
-rw-r--r--arch/x86/kernel/bios_uv.c58
-rw-r--r--arch/x86/kernel/check.c161
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c8
-rw-r--r--arch/x86/kernel/cpu/amd.c9
-rw-r--r--arch/x86/kernel/cpu/common.c8
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c58
-rw-r--r--arch/x86/kernel/cpu/intel.c21
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c17
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c346
-rw-r--r--arch/x86/kernel/cpu/vmware.c112
-rw-r--r--arch/x86/kernel/crash.c70
-rw-r--r--arch/x86/kernel/ds.c9
-rw-r--r--arch/x86/kernel/dumpstack.c319
-rw-r--r--arch/x86/kernel/dumpstack.h39
-rw-r--r--arch/x86/kernel/dumpstack_32.c302
-rw-r--r--arch/x86/kernel/dumpstack_64.c282
-rw-r--r--arch/x86/kernel/e820.c16
-rw-r--r--arch/x86/kernel/early-quirks.c1
-rw-r--r--arch/x86/kernel/early_printk.c47
-rw-r--r--arch/x86/kernel/entry_32.S477
-rw-r--r--arch/x86/kernel/entry_64.S1360
-rw-r--r--arch/x86/kernel/es7000_32.c62
-rw-r--r--arch/x86/kernel/genapic_64.c4
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c111
-rw-r--r--arch/x86/kernel/head.c1
-rw-r--r--arch/x86/kernel/head32.c3
-rw-r--r--arch/x86/kernel/head64.c3
-rw-r--r--arch/x86/kernel/hpet.c4
-rw-r--r--arch/x86/kernel/init_task.c1
-rw-r--r--arch/x86/kernel/io_apic.c3
-rw-r--r--arch/x86/kernel/irq_64.c24
-rw-r--r--arch/x86/kernel/irqinit_32.c2
-rw-r--r--arch/x86/kernel/irqinit_64.c66
-rw-r--r--arch/x86/kernel/machine_kexec_32.c104
-rw-r--r--arch/x86/kernel/microcode_amd.c232
-rw-r--r--arch/x86/kernel/microcode_core.c6
-rw-r--r--arch/x86/kernel/microcode_intel.c2
-rw-r--r--arch/x86/kernel/mpparse.c25
-rw-r--r--arch/x86/kernel/nmi.c58
-rw-r--r--arch/x86/kernel/numaq_32.c10
-rw-r--r--arch/x86/kernel/pci-dma.c11
-rw-r--r--arch/x86/kernel/process.c19
-rw-r--r--arch/x86/kernel/ptrace.c9
-rw-r--r--arch/x86/kernel/reboot.c126
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S115
-rw-r--r--arch/x86/kernel/setup.c169
-rw-r--r--arch/x86/kernel/sigframe.h42
-rw-r--r--arch/x86/kernel/signal.c (renamed from arch/x86/kernel/signal_32.c)567
-rw-r--r--arch/x86/kernel/signal_64.c516
-rw-r--r--arch/x86/kernel/smp.c31
-rw-r--r--arch/x86/kernel/smpboot.c23
-rw-r--r--arch/x86/kernel/time_32.c2
-rw-r--r--arch/x86/kernel/time_64.c6
-rw-r--r--arch/x86/kernel/tlb_32.c13
-rw-r--r--arch/x86/kernel/tlb_64.c2
-rw-r--r--arch/x86/kernel/tlb_uv.c4
-rw-r--r--arch/x86/kernel/trampoline.c19
-rw-r--r--arch/x86/kernel/traps.c38
-rw-r--r--arch/x86/kernel/tsc.c42
-rw-r--r--arch/x86/kernel/tsc_sync.c8
-rw-r--r--arch/x86/kernel/vmi_32.c119
-rw-r--r--arch/x86/kernel/vsyscall_64.c9
-rw-r--r--arch/x86/lguest/boot.c3
-rw-r--r--arch/x86/mach-generic/bigsmp.c1
-rw-r--r--arch/x86/mach-generic/default.c1
-rw-r--r--arch/x86/mach-generic/es7000.c14
-rw-r--r--arch/x86/mach-generic/probe.c16
-rw-r--r--arch/x86/mach-generic/summit.c1
-rw-r--r--arch/x86/mm/fault.c11
-rw-r--r--arch/x86/mm/init_32.c32
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/ioremap.c3
-rw-r--r--arch/x86/mm/pat.c236
-rw-r--r--arch/x86/pci/common.c17
-rw-r--r--arch/x86/pci/direct.c4
-rw-r--r--arch/x86/pci/pci.h1
-rw-r--r--arch/x86/xen/enlighten.c17
-rw-r--r--arch/x86/xen/mmu.c17
-rw-r--r--arch/x86/xen/multicalls.c2
-rw-r--r--arch/x86/xen/setup.c9
-rw-r--r--drivers/acpi/pci_irq.c56
-rw-r--r--drivers/acpi/processor_idle.c6
-rw-r--r--drivers/firmware/dmi_scan.c11
-rw-r--r--drivers/misc/sgi-gru/gruprocfs.c1
-rw-r--r--drivers/misc/sgi-xp/xp.h7
-rw-r--r--drivers/misc/sgi-xp/xp_main.c7
-rw-r--r--drivers/misc/sgi-xp/xp_sn2.c34
-rw-r--r--drivers/misc/sgi-xp/xp_uv.c70
-rw-r--r--drivers/misc/sgi-xp/xpc.h12
-rw-r--r--drivers/misc/sgi-xp/xpc_sn2.c15
-rw-r--r--drivers/misc/sgi-xp/xpc_uv.c290
-rw-r--r--drivers/pci/quirks.c170
-rw-r--r--drivers/xen/balloon.c4
-rw-r--r--drivers/xen/features.c6
-rw-r--r--drivers/xen/grant-table.c1
-rw-r--r--include/asm-generic/bug.h8
-rw-r--r--include/asm-generic/pgtable.h50
-rw-r--r--include/linux/dmi.h2
-rw-r--r--include/linux/kexec.h4
-rw-r--r--include/linux/linkage.h8
-rw-r--r--include/linux/mm.h19
-rw-r--r--include/linux/pci.h6
-rw-r--r--include/linux/pci_ids.h5
-rw-r--r--include/xen/interface/event_channel.h2
-rw-r--r--lib/bug.c19
-rw-r--r--mm/memory.c70
-rw-r--r--mm/swapfile.c9
197 files changed, 5182 insertions, 4125 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index c28a2ac88f9d..1a8af7354e79 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -244,18 +244,6 @@ Who: Michael Buesch <mb@bu3sch.de>
244 244
245--------------------------- 245---------------------------
246 246
247What: init_mm export
248When: 2.6.26
249Why: Not used in-tree. The current out-of-tree users used it to
250 work around problems in the CPA code which should be resolved
251 by now. One usecase was described to provide verification code
252 of the CPA operation. That's a good idea in general, but such
253 code / infrastructure should be in the kernel and not in some
254 out-of-tree driver.
255Who: Thomas Gleixner <tglx@linutronix.de>
256
257----------------------------
258
259What: usedac i386 kernel parameter 247What: usedac i386 kernel parameter
260When: 2.6.27 248When: 2.6.27
261Why: replaced by allowdac and no dac combination 249Why: replaced by allowdac and no dac combination
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index bb1b0dd3bfcb..71df353e367c 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1339,10 +1339,13 @@ nmi_watchdog
1339 1339
1340Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero 1340Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero
1341the NMI watchdog is enabled and will continuously test all online cpus to 1341the NMI watchdog is enabled and will continuously test all online cpus to
1342determine whether or not they are still functioning properly. 1342determine whether or not they are still functioning properly. Currently,
1343passing "nmi_watchdog=" parameter at boot time is required for this function
1344to work.
1343 1345
1344Because the NMI watchdog shares registers with oprofile, by disabling the NMI 1346If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel parameter), the
1345watchdog, oprofile may have more registers to utilize. 1347NMI watchdog shares registers with oprofile. By disabling the NMI watchdog,
1348oprofile may have more registers to utilize.
1346 1349
1347msgmni 1350msgmni
1348------ 1351------
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index bffffa4e8ee9..2c95cae8302b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1396,7 +1396,20 @@ and is between 256 and 4096 characters. It is defined in the file
1396 when a NMI is triggered. 1396 when a NMI is triggered.
1397 Format: [state][,regs][,debounce][,die] 1397 Format: [state][,regs][,debounce][,die]
1398 1398
1399 nmi_watchdog= [KNL,BUGS=X86-32] Debugging features for SMP kernels 1399 nmi_watchdog= [KNL,BUGS=X86-32,X86-64] Debugging features for SMP kernels
1400 Format: [panic,][num]
1401 Valid num: 0,1,2
1402 0 - turn nmi_watchdog off
1403 1 - use the IO-APIC timer for the NMI watchdog
1404 2 - use the local APIC for the NMI watchdog using
1405 a performance counter. Note: This will use one performance
1406 counter and the local APIC's performance vector.
1407 When panic is specified panic when an NMI watchdog timeout occurs.
1408 This is useful when you use a panic=... timeout and need the box
1409 quickly up again.
1410 Instead of 1 and 2 it is possible to use the following
1411 symbolic names: lapic and ioapic
1412 Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic
1400 1413
1401 no387 [BUGS=X86-32] Tells the kernel to use the 387 maths 1414 no387 [BUGS=X86-32] Tells the kernel to use the 387 maths
1402 emulation library even if a 387 maths coprocessor 1415 emulation library even if a 387 maths coprocessor
@@ -1633,6 +1646,17 @@ and is between 256 and 4096 characters. It is defined in the file
1633 nomsi [MSI] If the PCI_MSI kernel config parameter is 1646 nomsi [MSI] If the PCI_MSI kernel config parameter is
1634 enabled, this kernel boot option can be used to 1647 enabled, this kernel boot option can be used to
1635 disable the use of MSI interrupts system-wide. 1648 disable the use of MSI interrupts system-wide.
1649 noioapicquirk [APIC] Disable all boot interrupt quirks.
1650 Safety option to keep boot IRQs enabled. This
1651 should never be necessary.
1652 ioapicreroute [APIC] Enable rerouting of boot IRQs to the
1653 primary IO-APIC for bridges that cannot disable
1654 boot IRQs. This fixes a source of spurious IRQs
1655 when the system masks IRQs.
1656 noioapicreroute [APIC] Disable workaround that uses the
1657 boot IRQ equivalent of an IRQ that connects to
1658 a chipset where boot IRQs cannot be disabled.
1659 The opposite of ioapicreroute.
1636 biosirq [X86-32] Use PCI BIOS calls to get the interrupt 1660 biosirq [X86-32] Use PCI BIOS calls to get the interrupt
1637 routing table. These calls are known to be buggy 1661 routing table. These calls are known to be buggy
1638 on several machines and they hang the machine 1662 on several machines and they hang the machine
@@ -2262,6 +2286,13 @@ and is between 256 and 4096 characters. It is defined in the file
2262 Format: 2286 Format:
2263 <io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq> 2287 <io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq>
2264 2288
2289 tsc= Disable clocksource-must-verify flag for TSC.
2290 Format: <string>
2291 [x86] reliable: mark tsc clocksource as reliable, this
2292 disables clocksource verification at runtime.
2293 Used to enable high-resolution timer mode on older
2294 hardware, and in virtualized environment.
2295
2265 turbografx.map[2|3]= [HW,JOY] 2296 turbografx.map[2|3]= [HW,JOY]
2266 TurboGraFX parallel port interface 2297 TurboGraFX parallel port interface
2267 Format: 2298 Format:
diff --git a/Documentation/nmi_watchdog.txt b/Documentation/nmi_watchdog.txt
index 90aa4531cb67..bf9f80a98282 100644
--- a/Documentation/nmi_watchdog.txt
+++ b/Documentation/nmi_watchdog.txt
@@ -69,6 +69,11 @@ to the overall system performance.
69On x86 nmi_watchdog is disabled by default so you have to enable it with 69On x86 nmi_watchdog is disabled by default so you have to enable it with
70a boot time parameter. 70a boot time parameter.
71 71
72It's possible to disable the NMI watchdog in run-time by writing "0" to
73/proc/sys/kernel/nmi_watchdog. Writing "1" to the same file will re-enable
74the NMI watchdog. Notice that you still need to use "nmi_watchdog=" parameter
75at boot time.
76
72NOTE: In kernels prior to 2.4.2-ac18 the NMI-oopser is enabled unconditionally 77NOTE: In kernels prior to 2.4.2-ac18 the NMI-oopser is enabled unconditionally
73on x86 SMP boxes. 78on x86 SMP boxes.
74 79
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index 83c0033ee9e0..fcdc62b3c3d8 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -349,7 +349,7 @@ Protocol: 2.00+
349 3 SYSLINUX 349 3 SYSLINUX
350 4 EtherBoot 350 4 EtherBoot
351 5 ELILO 351 5 ELILO
352 7 GRuB 352 7 GRUB
353 8 U-BOOT 353 8 U-BOOT
354 9 Xen 354 9 Xen
355 A Gujin 355 A Gujin
@@ -537,8 +537,8 @@ Type: read
537Offset/size: 0x248/4 537Offset/size: 0x248/4
538Protocol: 2.08+ 538Protocol: 2.08+
539 539
540 If non-zero then this field contains the offset from the end of the 540 If non-zero then this field contains the offset from the beginning
541 real-mode code to the payload. 541 of the protected-mode code to the payload.
542 542
543 The payload may be compressed. The format of both the compressed and 543 The payload may be compressed. The format of both the compressed and
544 uncompressed data should be determined using the standard magic 544 uncompressed data should be determined using the standard magic
diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt
index c93ff5f4c0dd..cf08c9fff3cd 100644
--- a/Documentation/x86/pat.txt
+++ b/Documentation/x86/pat.txt
@@ -80,6 +80,30 @@ pci proc | -- | -- | WC |
80 | | | | 80 | | | |
81------------------------------------------------------------------- 81-------------------------------------------------------------------
82 82
83Advanced APIs for drivers
84-------------------------
85A. Exporting pages to users with remap_pfn_range, io_remap_pfn_range,
86vm_insert_pfn
87
88Drivers wanting to export some pages to userspace do it by using mmap
89interface and a combination of
901) pgprot_noncached()
912) io_remap_pfn_range() or remap_pfn_range() or vm_insert_pfn()
92
93With PAT support, a new API pgprot_writecombine is being added. So, drivers can
94continue to use the above sequence, with either pgprot_noncached() or
95pgprot_writecombine() in step 1, followed by step 2.
96
97In addition, step 2 internally tracks the region as UC or WC in memtype
98list in order to ensure no conflicting mapping.
99
100Note that this set of APIs only works with IO (non RAM) regions. If driver
101wants to export a RAM region, it has to do set_memory_uc() or set_memory_wc()
102as step 0 above and also track the usage of those pages and use set_memory_wb()
103before the page is freed to free pool.
104
105
106
83Notes: 107Notes:
84 108
85-- in the above table mean "Not suggested usage for the API". Some of the --'s 109-- in the above table mean "Not suggested usage for the API". Some of the --'s
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index f6d561a1a9b2..34c13040a718 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -79,17 +79,6 @@ Timing
79 Report when timer interrupts are lost because some code turned off 79 Report when timer interrupts are lost because some code turned off
80 interrupts for too long. 80 interrupts for too long.
81 81
82 nmi_watchdog=NUMBER[,panic]
83 NUMBER can be:
84 0 don't use an NMI watchdog
85 1 use the IO-APIC timer for the NMI watchdog
86 2 use the local APIC for the NMI watchdog using a performance counter. Note
87 This will use one performance counter and the local APIC's performance
88 vector.
89 When panic is specified panic when an NMI watchdog timeout occurs.
90 This is useful when you use a panic=... timeout and need the box
91 quickly up again.
92
93 nohpet 82 nohpet
94 Don't use the HPET timer. 83 Don't use the HPET timer.
95 84
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index efce75097369..29b52b14d0b4 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -6,7 +6,7 @@ Virtual memory map with 4 level page tables:
60000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm 60000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
7hole caused by [48:63] sign extension 7hole caused by [48:63] sign extension
8ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole 8ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole
9ffff810000000000 - ffffc0ffffffffff (=46 bits) direct mapping of all phys. memory 9ffff880000000000 - ffffc0ffffffffff (=57 TB) direct mapping of all phys. memory
10ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole 10ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole
11ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space 11ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space
12ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB) 12ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7719f7..a2ae4c05f46f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -19,6 +19,8 @@ config X86_64
19config X86 19config X86
20 def_bool y 20 def_bool y
21 select HAVE_AOUT if X86_32 21 select HAVE_AOUT if X86_32
22 select HAVE_READQ
23 select HAVE_WRITEQ
22 select HAVE_UNSTABLE_SCHED_CLOCK 24 select HAVE_UNSTABLE_SCHED_CLOCK
23 select HAVE_IDE 25 select HAVE_IDE
24 select HAVE_OPROFILE 26 select HAVE_OPROFILE
@@ -87,6 +89,10 @@ config GENERIC_IOMAP
87config GENERIC_BUG 89config GENERIC_BUG
88 def_bool y 90 def_bool y
89 depends on BUG 91 depends on BUG
92 select GENERIC_BUG_RELATIVE_POINTERS if X86_64
93
94config GENERIC_BUG_RELATIVE_POINTERS
95 bool
90 96
91config GENERIC_HWEIGHT 97config GENERIC_HWEIGHT
92 def_bool y 98 def_bool y
@@ -242,21 +248,13 @@ config X86_FIND_SMP_CONFIG
242 def_bool y 248 def_bool y
243 depends on X86_MPPARSE || X86_VOYAGER 249 depends on X86_MPPARSE || X86_VOYAGER
244 250
245if ACPI
246config X86_MPPARSE 251config X86_MPPARSE
247 def_bool y 252 bool "Enable MPS table" if ACPI
248 bool "Enable MPS table" 253 default y
249 depends on X86_LOCAL_APIC 254 depends on X86_LOCAL_APIC
250 help 255 help
251 For old smp systems that do not have proper acpi support. Newer systems 256 For old smp systems that do not have proper acpi support. Newer systems
252 (esp with 64bit cpus) with acpi support, MADT and DSDT will override it 257 (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
253endif
254
255if !ACPI
256config X86_MPPARSE
257 def_bool y
258 depends on X86_LOCAL_APIC
259endif
260 258
261choice 259choice
262 prompt "Subarchitecture Type" 260 prompt "Subarchitecture Type"
@@ -465,10 +463,6 @@ config X86_CYCLONE_TIMER
465 def_bool y 463 def_bool y
466 depends on X86_GENERICARCH 464 depends on X86_GENERICARCH
467 465
468config ES7000_CLUSTERED_APIC
469 def_bool y
470 depends on SMP && X86_ES7000 && MPENTIUMIII
471
472source "arch/x86/Kconfig.cpu" 466source "arch/x86/Kconfig.cpu"
473 467
474config HPET_TIMER 468config HPET_TIMER
@@ -569,7 +563,7 @@ config AMD_IOMMU
569 563
570# need this always selected by IOMMU for the VIA workaround 564# need this always selected by IOMMU for the VIA workaround
571config SWIOTLB 565config SWIOTLB
572 bool 566 def_bool y if X86_64
573 help 567 help
574 Support for software bounce buffers used on x86-64 systems 568 Support for software bounce buffers used on x86-64 systems
575 which don't have a hardware IOMMU (e.g. the current generation 569 which don't have a hardware IOMMU (e.g. the current generation
@@ -660,6 +654,30 @@ config X86_VISWS_APIC
660 def_bool y 654 def_bool y
661 depends on X86_32 && X86_VISWS 655 depends on X86_32 && X86_VISWS
662 656
657config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
658 bool "Reroute for broken boot IRQs"
659 default n
660 depends on X86_IO_APIC
661 help
662 This option enables a workaround that fixes a source of
663 spurious interrupts. This is recommended when threaded
664 interrupt handling is used on systems where the generation of
665 superfluous "boot interrupts" cannot be disabled.
666
667 Some chipsets generate a legacy INTx "boot IRQ" when the IRQ
668 entry in the chipset's IO-APIC is masked (as, e.g. the RT
669 kernel does during interrupt handling). On chipsets where this
670 boot IRQ generation cannot be disabled, this workaround keeps
671 the original IRQ line masked so that only the equivalent "boot
672 IRQ" is delivered to the CPUs. The workaround also tells the
673 kernel to set up the IRQ handler on the boot IRQ line. In this
674 way only one interrupt is delivered to the kernel. Otherwise
675 the spurious second interrupt may cause the kernel to bring
676 down (vital) interrupt lines.
677
678 Only affects "broken" chipsets. Interrupt sharing may be
679 increased on these systems.
680
663config X86_MCE 681config X86_MCE
664 bool "Machine Check Exception" 682 bool "Machine Check Exception"
665 depends on !X86_VOYAGER 683 depends on !X86_VOYAGER
@@ -956,24 +974,37 @@ config X86_PAE
956config ARCH_PHYS_ADDR_T_64BIT 974config ARCH_PHYS_ADDR_T_64BIT
957 def_bool X86_64 || X86_PAE 975 def_bool X86_64 || X86_PAE
958 976
977config DIRECT_GBPAGES
978 bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
979 default y
980 depends on X86_64
981 help
982 Allow the kernel linear mapping to use 1GB pages on CPUs that
983 support it. This can improve the kernel's performance a tiny bit by
984 reducing TLB pressure. If in doubt, say "Y".
985
959# Common NUMA Features 986# Common NUMA Features
960config NUMA 987config NUMA
961 bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" 988 bool "Numa Memory Allocation and Scheduler Support"
962 depends on SMP 989 depends on SMP
963 depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) 990 depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
964 default n if X86_PC 991 default n if X86_PC
965 default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) 992 default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
966 help 993 help
967 Enable NUMA (Non Uniform Memory Access) support. 994 Enable NUMA (Non Uniform Memory Access) support.
995
968 The kernel will try to allocate memory used by a CPU on the 996 The kernel will try to allocate memory used by a CPU on the
969 local memory controller of the CPU and add some more 997 local memory controller of the CPU and add some more
970 NUMA awareness to the kernel. 998 NUMA awareness to the kernel.
971 999
972 For 32-bit this is currently highly experimental and should be only 1000 For 64-bit this is recommended if the system is Intel Core i7
973 used for kernel development. It might also cause boot failures. 1001 (or later), AMD Opteron, or EM64T NUMA.
974 For 64-bit this is recommended on all multiprocessor Opteron systems. 1002
975 If the system is EM64T, you should say N unless your system is 1003 For 32-bit this is only needed on (rare) 32-bit-only platforms
976 EM64T NUMA. 1004 that support NUMA topologies, such as NUMAQ / Summit, or if you
1005 boot a 32-bit kernel on a 64-bit NUMA platform.
1006
1007 Otherwise, you should say N.
977 1008
978comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" 1009comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
979 depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) 1010 depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
@@ -1493,6 +1524,10 @@ config ARCH_ENABLE_MEMORY_HOTPLUG
1493 def_bool y 1524 def_bool y
1494 depends on X86_64 || (X86_32 && HIGHMEM) 1525 depends on X86_64 || (X86_32 && HIGHMEM)
1495 1526
1527config ARCH_ENABLE_MEMORY_HOTREMOVE
1528 def_bool y
1529 depends on MEMORY_HOTPLUG
1530
1496config HAVE_ARCH_EARLY_PFN_TO_NID 1531config HAVE_ARCH_EARLY_PFN_TO_NID
1497 def_bool X86_64 1532 def_bool X86_64
1498 depends on NUMA 1533 depends on NUMA
@@ -1632,13 +1667,6 @@ config APM_ALLOW_INTS
1632 many of the newer IBM Thinkpads. If you experience hangs when you 1667 many of the newer IBM Thinkpads. If you experience hangs when you
1633 suspend, try setting this to Y. Otherwise, say N. 1668 suspend, try setting this to Y. Otherwise, say N.
1634 1669
1635config APM_REAL_MODE_POWER_OFF
1636 bool "Use real mode APM BIOS call to power off"
1637 help
1638 Use real mode APM BIOS calls to switch off the computer. This is
1639 a work-around for a number of buggy BIOSes. Switch this option on if
1640 your computer crashes instead of powering off properly.
1641
1642endif # APM 1670endif # APM
1643 1671
1644source "arch/x86/kernel/cpu/cpufreq/Kconfig" 1672source "arch/x86/kernel/cpu/cpufreq/Kconfig"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2a3dfbd5e677..4ee768660f75 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -114,18 +114,6 @@ config DEBUG_RODATA
114 data. This is recommended so that we can catch kernel bugs sooner. 114 data. This is recommended so that we can catch kernel bugs sooner.
115 If in doubt, say "Y". 115 If in doubt, say "Y".
116 116
117config DIRECT_GBPAGES
118 bool "Enable gbpages-mapped kernel pagetables"
119 depends on DEBUG_KERNEL && EXPERIMENTAL && X86_64
120 help
121 Enable gigabyte pages support (if the CPU supports it). This can
122 improve the kernel's performance a tiny bit by reducing TLB
123 pressure.
124
125 This is experimental code.
126
127 If in doubt, say "N".
128
129config DEBUG_RODATA_TEST 117config DEBUG_RODATA_TEST
130 bool "Testcase for the DEBUG_RODATA feature" 118 bool "Testcase for the DEBUG_RODATA feature"
131 depends on DEBUG_RODATA 119 depends on DEBUG_RODATA
@@ -307,10 +295,10 @@ config OPTIMIZE_INLINING
307 developers have marked 'inline'. Doing so takes away freedom from gcc to 295 developers have marked 'inline'. Doing so takes away freedom from gcc to
308 do what it thinks is best, which is desirable for the gcc 3.x series of 296 do what it thinks is best, which is desirable for the gcc 3.x series of
309 compilers. The gcc 4.x series have a rewritten inlining algorithm and 297 compilers. The gcc 4.x series have a rewritten inlining algorithm and
310 disabling this option will generate a smaller kernel there. Hopefully 298 enabling this option will generate a smaller kernel there. Hopefully
311 this algorithm is so good that allowing gcc4 to make the decision can 299 this algorithm is so good that allowing gcc 4.x and above to make the
312 become the default in the future, until then this option is there to 300 decision will become the default in the future. Until then this option
313 test gcc for this. 301 is there to test gcc for this.
314 302
315 If unsure, say N. 303 If unsure, say N.
316 304
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index b939cb476dec..5d4742ed4aa2 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -34,7 +34,7 @@ static struct mode_info cga_modes[] = {
34 { VIDEO_80x25, 80, 25, 0 }, 34 { VIDEO_80x25, 80, 25, 0 },
35}; 35};
36 36
37__videocard video_vga; 37static __videocard video_vga;
38 38
39/* Set basic 80x25 mode */ 39/* Set basic 80x25 mode */
40static u8 vga_set_basic_mode(void) 40static u8 vga_set_basic_mode(void)
@@ -259,7 +259,7 @@ static int vga_probe(void)
259 return mode_count[adapter]; 259 return mode_count[adapter];
260} 260}
261 261
262__videocard video_vga = { 262static __videocard video_vga = {
263 .card_name = "VGA", 263 .card_name = "VGA",
264 .probe = vga_probe, 264 .probe = vga_probe,
265 .set_mode = vga_set_mode, 265 .set_mode = vga_set_mode,
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index 83598b23093a..3bef2c1febe9 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -226,7 +226,7 @@ static unsigned int mode_menu(void)
226 226
227#ifdef CONFIG_VIDEO_RETAIN 227#ifdef CONFIG_VIDEO_RETAIN
228/* Save screen content to the heap */ 228/* Save screen content to the heap */
229struct saved_screen { 229static struct saved_screen {
230 int x, y; 230 int x, y;
231 int curx, cury; 231 int curx, cury;
232 u16 *data; 232 u16 *data;
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 13b8c86ae985..b30a08ed8eb4 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -77,7 +77,7 @@ CONFIG_AUDIT=y
77CONFIG_AUDITSYSCALL=y 77CONFIG_AUDITSYSCALL=y
78CONFIG_AUDIT_TREE=y 78CONFIG_AUDIT_TREE=y
79# CONFIG_IKCONFIG is not set 79# CONFIG_IKCONFIG is not set
80CONFIG_LOG_BUF_SHIFT=17 80CONFIG_LOG_BUF_SHIFT=18
81CONFIG_CGROUPS=y 81CONFIG_CGROUPS=y
82# CONFIG_CGROUP_DEBUG is not set 82# CONFIG_CGROUP_DEBUG is not set
83CONFIG_CGROUP_NS=y 83CONFIG_CGROUP_NS=y
@@ -298,7 +298,7 @@ CONFIG_KEXEC=y
298CONFIG_CRASH_DUMP=y 298CONFIG_CRASH_DUMP=y
299# CONFIG_KEXEC_JUMP is not set 299# CONFIG_KEXEC_JUMP is not set
300CONFIG_PHYSICAL_START=0x1000000 300CONFIG_PHYSICAL_START=0x1000000
301CONFIG_RELOCATABLE=y 301# CONFIG_RELOCATABLE is not set
302CONFIG_PHYSICAL_ALIGN=0x200000 302CONFIG_PHYSICAL_ALIGN=0x200000
303CONFIG_HOTPLUG_CPU=y 303CONFIG_HOTPLUG_CPU=y
304# CONFIG_COMPAT_VDSO is not set 304# CONFIG_COMPAT_VDSO is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index f0a03d7a7d63..0e7dbc0a3e46 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -77,7 +77,7 @@ CONFIG_AUDIT=y
77CONFIG_AUDITSYSCALL=y 77CONFIG_AUDITSYSCALL=y
78CONFIG_AUDIT_TREE=y 78CONFIG_AUDIT_TREE=y
79# CONFIG_IKCONFIG is not set 79# CONFIG_IKCONFIG is not set
80CONFIG_LOG_BUF_SHIFT=17 80CONFIG_LOG_BUF_SHIFT=18
81CONFIG_CGROUPS=y 81CONFIG_CGROUPS=y
82# CONFIG_CGROUP_DEBUG is not set 82# CONFIG_CGROUP_DEBUG is not set
83CONFIG_CGROUP_NS=y 83CONFIG_CGROUP_NS=y
@@ -298,7 +298,7 @@ CONFIG_SCHED_HRTICK=y
298CONFIG_KEXEC=y 298CONFIG_KEXEC=y
299CONFIG_CRASH_DUMP=y 299CONFIG_CRASH_DUMP=y
300CONFIG_PHYSICAL_START=0x1000000 300CONFIG_PHYSICAL_START=0x1000000
301CONFIG_RELOCATABLE=y 301# CONFIG_RELOCATABLE is not set
302CONFIG_PHYSICAL_ALIGN=0x200000 302CONFIG_PHYSICAL_ALIGN=0x200000
303CONFIG_HOTPLUG_CPU=y 303CONFIG_HOTPLUG_CPU=y
304# CONFIG_COMPAT_VDSO is not set 304# CONFIG_COMPAT_VDSO is not set
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 4bc02b23674b..b195f85526e3 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -32,6 +32,8 @@
32#include <asm/proto.h> 32#include <asm/proto.h>
33#include <asm/vdso.h> 33#include <asm/vdso.h>
34 34
35#include <asm/sigframe.h>
36
35#define DEBUG_SIG 0 37#define DEBUG_SIG 0
36 38
37#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) 39#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -41,7 +43,6 @@
41 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ 43 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
42 X86_EFLAGS_CF) 44 X86_EFLAGS_CF)
43 45
44asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
45void signal_fault(struct pt_regs *regs, void __user *frame, char *where); 46void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
46 47
47int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) 48int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
@@ -173,47 +174,28 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
173/* 174/*
174 * Do a signal return; undo the signal stack. 175 * Do a signal return; undo the signal stack.
175 */ 176 */
177#define COPY(x) { \
178 err |= __get_user(regs->x, &sc->x); \
179}
176 180
177struct sigframe 181#define COPY_SEG_CPL3(seg) { \
178{ 182 unsigned short tmp; \
179 u32 pretcode; 183 err |= __get_user(tmp, &sc->seg); \
180 int sig; 184 regs->seg = tmp | 3; \
181 struct sigcontext_ia32 sc;
182 struct _fpstate_ia32 fpstate_unused; /* look at kernel/sigframe.h */
183 unsigned int extramask[_COMPAT_NSIG_WORDS-1];
184 char retcode[8];
185 /* fp state follows here */
186};
187
188struct rt_sigframe
189{
190 u32 pretcode;
191 int sig;
192 u32 pinfo;
193 u32 puc;
194 compat_siginfo_t info;
195 struct ucontext_ia32 uc;
196 char retcode[8];
197 /* fp state follows here */
198};
199
200#define COPY(x) { \
201 unsigned int reg; \
202 err |= __get_user(reg, &sc->x); \
203 regs->x = reg; \
204} 185}
205 186
206#define RELOAD_SEG(seg,mask) \ 187#define RELOAD_SEG(seg) { \
207 { unsigned int cur; \ 188 unsigned int cur, pre; \
208 unsigned short pre; \ 189 err |= __get_user(pre, &sc->seg); \
209 err |= __get_user(pre, &sc->seg); \ 190 savesegment(seg, cur); \
210 savesegment(seg, cur); \ 191 pre |= 3; \
211 pre |= mask; \ 192 if (pre != cur) \
212 if (pre != cur) loadsegment(seg, pre); } 193 loadsegment(seg, pre); \
194}
213 195
214static int ia32_restore_sigcontext(struct pt_regs *regs, 196static int ia32_restore_sigcontext(struct pt_regs *regs,
215 struct sigcontext_ia32 __user *sc, 197 struct sigcontext_ia32 __user *sc,
216 unsigned int *peax) 198 unsigned int *pax)
217{ 199{
218 unsigned int tmpflags, gs, oldgs, err = 0; 200 unsigned int tmpflags, gs, oldgs, err = 0;
219 void __user *buf; 201 void __user *buf;
@@ -240,18 +222,16 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
240 if (gs != oldgs) 222 if (gs != oldgs)
241 load_gs_index(gs); 223 load_gs_index(gs);
242 224
243 RELOAD_SEG(fs, 3); 225 RELOAD_SEG(fs);
244 RELOAD_SEG(ds, 3); 226 RELOAD_SEG(ds);
245 RELOAD_SEG(es, 3); 227 RELOAD_SEG(es);
246 228
247 COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); 229 COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
248 COPY(dx); COPY(cx); COPY(ip); 230 COPY(dx); COPY(cx); COPY(ip);
249 /* Don't touch extended registers */ 231 /* Don't touch extended registers */
250 232
251 err |= __get_user(regs->cs, &sc->cs); 233 COPY_SEG_CPL3(cs);
252 regs->cs |= 3; 234 COPY_SEG_CPL3(ss);
253 err |= __get_user(regs->ss, &sc->ss);
254 regs->ss |= 3;
255 235
256 err |= __get_user(tmpflags, &sc->flags); 236 err |= __get_user(tmpflags, &sc->flags);
257 regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); 237 regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
@@ -262,15 +242,13 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
262 buf = compat_ptr(tmp); 242 buf = compat_ptr(tmp);
263 err |= restore_i387_xstate_ia32(buf); 243 err |= restore_i387_xstate_ia32(buf);
264 244
265 err |= __get_user(tmp, &sc->ax); 245 err |= __get_user(*pax, &sc->ax);
266 *peax = tmp;
267
268 return err; 246 return err;
269} 247}
270 248
271asmlinkage long sys32_sigreturn(struct pt_regs *regs) 249asmlinkage long sys32_sigreturn(struct pt_regs *regs)
272{ 250{
273 struct sigframe __user *frame = (struct sigframe __user *)(regs->sp-8); 251 struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
274 sigset_t set; 252 sigset_t set;
275 unsigned int ax; 253 unsigned int ax;
276 254
@@ -300,12 +278,12 @@ badframe:
300 278
301asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) 279asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
302{ 280{
303 struct rt_sigframe __user *frame; 281 struct rt_sigframe_ia32 __user *frame;
304 sigset_t set; 282 sigset_t set;
305 unsigned int ax; 283 unsigned int ax;
306 struct pt_regs tregs; 284 struct pt_regs tregs;
307 285
308 frame = (struct rt_sigframe __user *)(regs->sp - 4); 286 frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
309 287
310 if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) 288 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
311 goto badframe; 289 goto badframe;
@@ -359,20 +337,15 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
359 err |= __put_user(regs->dx, &sc->dx); 337 err |= __put_user(regs->dx, &sc->dx);
360 err |= __put_user(regs->cx, &sc->cx); 338 err |= __put_user(regs->cx, &sc->cx);
361 err |= __put_user(regs->ax, &sc->ax); 339 err |= __put_user(regs->ax, &sc->ax);
362 err |= __put_user(regs->cs, &sc->cs);
363 err |= __put_user(regs->ss, &sc->ss);
364 err |= __put_user(current->thread.trap_no, &sc->trapno); 340 err |= __put_user(current->thread.trap_no, &sc->trapno);
365 err |= __put_user(current->thread.error_code, &sc->err); 341 err |= __put_user(current->thread.error_code, &sc->err);
366 err |= __put_user(regs->ip, &sc->ip); 342 err |= __put_user(regs->ip, &sc->ip);
343 err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
367 err |= __put_user(regs->flags, &sc->flags); 344 err |= __put_user(regs->flags, &sc->flags);
368 err |= __put_user(regs->sp, &sc->sp_at_signal); 345 err |= __put_user(regs->sp, &sc->sp_at_signal);
346 err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
369 347
370 tmp = save_i387_xstate_ia32(fpstate); 348 err |= __put_user(ptr_to_compat(fpstate), &sc->fpstate);
371 if (tmp < 0)
372 err = -EFAULT;
373 else
374 err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL),
375 &sc->fpstate);
376 349
377 /* non-iBCS2 extensions.. */ 350 /* non-iBCS2 extensions.. */
378 err |= __put_user(mask, &sc->oldmask); 351 err |= __put_user(mask, &sc->oldmask);
@@ -400,7 +373,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
400 } 373 }
401 374
402 /* This is the legacy signal stack switching. */ 375 /* This is the legacy signal stack switching. */
403 else if ((regs->ss & 0xffff) != __USER_DS && 376 else if ((regs->ss & 0xffff) != __USER32_DS &&
404 !(ka->sa.sa_flags & SA_RESTORER) && 377 !(ka->sa.sa_flags & SA_RESTORER) &&
405 ka->sa.sa_restorer) 378 ka->sa.sa_restorer)
406 sp = (unsigned long) ka->sa.sa_restorer; 379 sp = (unsigned long) ka->sa.sa_restorer;
@@ -408,6 +381,8 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
408 if (used_math()) { 381 if (used_math()) {
409 sp = sp - sig_xstate_ia32_size; 382 sp = sp - sig_xstate_ia32_size;
410 *fpstate = (struct _fpstate_ia32 *) sp; 383 *fpstate = (struct _fpstate_ia32 *) sp;
384 if (save_i387_xstate_ia32(*fpstate) < 0)
385 return (void __user *) -1L;
411 } 386 }
412 387
413 sp -= frame_size; 388 sp -= frame_size;
@@ -420,7 +395,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
420int ia32_setup_frame(int sig, struct k_sigaction *ka, 395int ia32_setup_frame(int sig, struct k_sigaction *ka,
421 compat_sigset_t *set, struct pt_regs *regs) 396 compat_sigset_t *set, struct pt_regs *regs)
422{ 397{
423 struct sigframe __user *frame; 398 struct sigframe_ia32 __user *frame;
424 void __user *restorer; 399 void __user *restorer;
425 int err = 0; 400 int err = 0;
426 void __user *fpstate = NULL; 401 void __user *fpstate = NULL;
@@ -430,12 +405,10 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
430 u16 poplmovl; 405 u16 poplmovl;
431 u32 val; 406 u32 val;
432 u16 int80; 407 u16 int80;
433 u16 pad;
434 } __attribute__((packed)) code = { 408 } __attribute__((packed)) code = {
435 0xb858, /* popl %eax ; movl $...,%eax */ 409 0xb858, /* popl %eax ; movl $...,%eax */
436 __NR_ia32_sigreturn, 410 __NR_ia32_sigreturn,
437 0x80cd, /* int $0x80 */ 411 0x80cd, /* int $0x80 */
438 0,
439 }; 412 };
440 413
441 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); 414 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
@@ -471,7 +444,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
471 * These are actually not used anymore, but left because some 444 * These are actually not used anymore, but left because some
472 * gdb versions depend on them as a marker. 445 * gdb versions depend on them as a marker.
473 */ 446 */
474 err |= __copy_to_user(frame->retcode, &code, 8); 447 err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode);
475 if (err) 448 if (err)
476 return -EFAULT; 449 return -EFAULT;
477 450
@@ -501,7 +474,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
501int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 474int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
502 compat_sigset_t *set, struct pt_regs *regs) 475 compat_sigset_t *set, struct pt_regs *regs)
503{ 476{
504 struct rt_sigframe __user *frame; 477 struct rt_sigframe_ia32 __user *frame;
505 void __user *restorer; 478 void __user *restorer;
506 int err = 0; 479 int err = 0;
507 void __user *fpstate = NULL; 480 void __user *fpstate = NULL;
@@ -511,8 +484,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
511 u8 movl; 484 u8 movl;
512 u32 val; 485 u32 val;
513 u16 int80; 486 u16 int80;
514 u16 pad; 487 u8 pad;
515 u8 pad2;
516 } __attribute__((packed)) code = { 488 } __attribute__((packed)) code = {
517 0xb8, 489 0xb8,
518 __NR_ia32_rt_sigreturn, 490 __NR_ia32_rt_sigreturn,
@@ -559,7 +531,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
559 * Not actually used anymore, but left because some gdb 531 * Not actually used anymore, but left because some gdb
560 * versions need it. 532 * versions need it.
561 */ 533 */
562 err |= __copy_to_user(frame->retcode, &code, 8); 534 err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode);
563 if (err) 535 if (err)
564 return -EFAULT; 536 return -EFAULT;
565 537
@@ -572,11 +544,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
572 regs->dx = (unsigned long) &frame->info; 544 regs->dx = (unsigned long) &frame->info;
573 regs->cx = (unsigned long) &frame->uc; 545 regs->cx = (unsigned long) &frame->uc;
574 546
575 /* Make -mregparm=3 work */
576 regs->ax = sig;
577 regs->dx = (unsigned long) &frame->info;
578 regs->cx = (unsigned long) &frame->uc;
579
580 loadsegment(ds, __USER32_DS); 547 loadsegment(ds, __USER32_DS);
581 loadsegment(es, __USER32_DS); 548 loadsegment(es, __USER32_DS);
582 549
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3b1510b4fc57..25caa0738af5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -193,6 +193,7 @@ extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask);
193static inline void lapic_shutdown(void) { } 193static inline void lapic_shutdown(void) { }
194#define local_apic_timer_c2_ok 1 194#define local_apic_timer_c2_ok 1
195static inline void init_apic_mappings(void) { } 195static inline void init_apic_mappings(void) { }
196static inline void disable_local_APIC(void) { }
196 197
197#endif /* !CONFIG_X86_LOCAL_APIC */ 198#endif /* !CONFIG_X86_LOCAL_APIC */
198 199
diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index 1d9543b9d358..ce547f24a1cd 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -24,8 +24,6 @@ static inline cpumask_t target_cpus(void)
24#define INT_DELIVERY_MODE (dest_Fixed) 24#define INT_DELIVERY_MODE (dest_Fixed)
25#define INT_DEST_MODE (0) /* phys delivery to target proc */ 25#define INT_DEST_MODE (0) /* phys delivery to target proc */
26#define NO_BALANCE_IRQ (0) 26#define NO_BALANCE_IRQ (0)
27#define WAKE_SECONDARY_VIA_INIT
28
29 27
30static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) 28static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
31{ 29{
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 360010322711..9fa9dcdf344b 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -168,7 +168,15 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
168 */ 168 */
169static inline void change_bit(int nr, volatile unsigned long *addr) 169static inline void change_bit(int nr, volatile unsigned long *addr)
170{ 170{
171 asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr)); 171 if (IS_IMMEDIATE(nr)) {
172 asm volatile(LOCK_PREFIX "xorb %1,%0"
173 : CONST_MASK_ADDR(nr, addr)
174 : "iq" ((u8)CONST_MASK(nr)));
175 } else {
176 asm volatile(LOCK_PREFIX "btc %1,%0"
177 : BITOP_ADDR(addr)
178 : "Ir" (nr));
179 }
172} 180}
173 181
174/** 182/**
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 3def2065fcea..d9cf1cd156d2 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -9,7 +9,7 @@
9#ifdef CONFIG_X86_32 9#ifdef CONFIG_X86_32
10# define __BUG_C0 "2:\t.long 1b, %c0\n" 10# define __BUG_C0 "2:\t.long 1b, %c0\n"
11#else 11#else
12# define __BUG_C0 "2:\t.quad 1b, %c0\n" 12# define __BUG_C0 "2:\t.long 1b - 2b, %c0 - 2b\n"
13#endif 13#endif
14 14
15#define BUG() \ 15#define BUG() \
diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h
index e02ae2d89acf..f110ad417df3 100644
--- a/arch/x86/include/asm/byteorder.h
+++ b/arch/x86/include/asm/byteorder.h
@@ -4,26 +4,33 @@
4#include <asm/types.h> 4#include <asm/types.h>
5#include <linux/compiler.h> 5#include <linux/compiler.h>
6 6
7#ifdef __GNUC__ 7#define __LITTLE_ENDIAN
8 8
9#ifdef __i386__ 9static inline __attribute_const__ __u32 __arch_swab32(__u32 val)
10
11static inline __attribute_const__ __u32 ___arch__swab32(__u32 x)
12{ 10{
13#ifdef CONFIG_X86_BSWAP 11#ifdef __i386__
14 asm("bswap %0" : "=r" (x) : "0" (x)); 12# ifdef CONFIG_X86_BSWAP
15#else 13 asm("bswap %0" : "=r" (val) : "0" (val));
14# else
16 asm("xchgb %b0,%h0\n\t" /* swap lower bytes */ 15 asm("xchgb %b0,%h0\n\t" /* swap lower bytes */
17 "rorl $16,%0\n\t" /* swap words */ 16 "rorl $16,%0\n\t" /* swap words */
18 "xchgb %b0,%h0" /* swap higher bytes */ 17 "xchgb %b0,%h0" /* swap higher bytes */
19 : "=q" (x) 18 : "=q" (val)
20 : "0" (x)); 19 : "0" (val));
20# endif
21
22#else /* __i386__ */
23 asm("bswapl %0"
24 : "=r" (val)
25 : "0" (val));
21#endif 26#endif
22 return x; 27 return val;
23} 28}
29#define __arch_swab32 __arch_swab32
24 30
25static inline __attribute_const__ __u64 ___arch__swab64(__u64 val) 31static inline __attribute_const__ __u64 __arch_swab64(__u64 val)
26{ 32{
33#ifdef __i386__
27 union { 34 union {
28 struct { 35 struct {
29 __u32 a; 36 __u32 a;
@@ -32,50 +39,27 @@ static inline __attribute_const__ __u64 ___arch__swab64(__u64 val)
32 __u64 u; 39 __u64 u;
33 } v; 40 } v;
34 v.u = val; 41 v.u = val;
35#ifdef CONFIG_X86_BSWAP 42# ifdef CONFIG_X86_BSWAP
36 asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1" 43 asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1"
37 : "=r" (v.s.a), "=r" (v.s.b) 44 : "=r" (v.s.a), "=r" (v.s.b)
38 : "0" (v.s.a), "1" (v.s.b)); 45 : "0" (v.s.a), "1" (v.s.b));
39#else 46# else
40 v.s.a = ___arch__swab32(v.s.a); 47 v.s.a = __arch_swab32(v.s.a);
41 v.s.b = ___arch__swab32(v.s.b); 48 v.s.b = __arch_swab32(v.s.b);
42 asm("xchgl %0,%1" 49 asm("xchgl %0,%1"
43 : "=r" (v.s.a), "=r" (v.s.b) 50 : "=r" (v.s.a), "=r" (v.s.b)
44 : "0" (v.s.a), "1" (v.s.b)); 51 : "0" (v.s.a), "1" (v.s.b));
45#endif 52# endif
46 return v.u; 53 return v.u;
47}
48
49#else /* __i386__ */ 54#else /* __i386__ */
50
51static inline __attribute_const__ __u64 ___arch__swab64(__u64 x)
52{
53 asm("bswapq %0" 55 asm("bswapq %0"
54 : "=r" (x) 56 : "=r" (val)
55 : "0" (x)); 57 : "0" (val));
56 return x; 58 return val;
57}
58
59static inline __attribute_const__ __u32 ___arch__swab32(__u32 x)
60{
61 asm("bswapl %0"
62 : "=r" (x)
63 : "0" (x));
64 return x;
65}
66
67#endif 59#endif
60}
61#define __arch_swab64 __arch_swab64
68 62
69/* Do not define swab16. Gcc is smart enough to recognize "C" version and 63#include <linux/byteorder.h>
70 convert it into rotation or exhange. */
71
72#define __arch__swab64(x) ___arch__swab64(x)
73#define __arch__swab32(x) ___arch__swab32(x)
74
75#define __BYTEORDER_HAS_U64__
76
77#endif /* __GNUC__ */
78
79#include <linux/byteorder/little_endian.h>
80 64
81#endif /* _ASM_X86_BYTEORDER_H */ 65#endif /* _ASM_X86_BYTEORDER_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index cfdf8c2c5c31..ea408dcba513 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -80,7 +80,6 @@
80#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ 80#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */
81#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */ 81#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */
82#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ 82#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
83#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */
84#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ 83#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */
85#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ 84#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */
86#define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */ 85#define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */
@@ -92,6 +91,8 @@
92#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ 91#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */
93#define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ 92#define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */
94#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ 93#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */
94#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
95#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */
95 96
96/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 97/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
97#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ 98#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
@@ -117,6 +118,7 @@
117#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ 118#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
118#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ 119#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */
119#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ 120#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
121#define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */
120 122
121/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ 123/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
122#define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */ 124#define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */
@@ -237,6 +239,7 @@ extern const char * const x86_power_flags[32];
237#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) 239#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2)
238#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) 240#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
239#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) 241#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
242#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
240 243
241#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) 244#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
242# define cpu_has_invlpg 1 245# define cpu_has_invlpg 1
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 097794ff6b79..dc22c0733282 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -71,12 +71,10 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
71/* Make sure we keep the same behaviour */ 71/* Make sure we keep the same behaviour */
72static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 72static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
73{ 73{
74#ifdef CONFIG_X86_64
75 struct dma_mapping_ops *ops = get_dma_ops(dev); 74 struct dma_mapping_ops *ops = get_dma_ops(dev);
76 if (ops->mapping_error) 75 if (ops->mapping_error)
77 return ops->mapping_error(dev, dma_addr); 76 return ops->mapping_error(dev, dma_addr);
78 77
79#endif
80 return (dma_addr == bad_dma_address); 78 return (dma_addr == bad_dma_address);
81} 79}
82 80
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index 804b6e6be929..3afc5e87cfdd 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -6,56 +6,91 @@
6#endif 6#endif
7 7
8/* 8/*
9 Macros for dwarf2 CFI unwind table entries. 9 * Macros for dwarf2 CFI unwind table entries.
10 See "as.info" for details on these pseudo ops. Unfortunately 10 * See "as.info" for details on these pseudo ops. Unfortunately
11 they are only supported in very new binutils, so define them 11 * they are only supported in very new binutils, so define them
12 away for older version. 12 * away for older version.
13 */ 13 */
14 14
15#ifdef CONFIG_AS_CFI 15#ifdef CONFIG_AS_CFI
16 16
17#define CFI_STARTPROC .cfi_startproc 17#define CFI_STARTPROC .cfi_startproc
18#define CFI_ENDPROC .cfi_endproc 18#define CFI_ENDPROC .cfi_endproc
19#define CFI_DEF_CFA .cfi_def_cfa 19#define CFI_DEF_CFA .cfi_def_cfa
20#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register 20#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
21#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset 21#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
22#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset 22#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
23#define CFI_OFFSET .cfi_offset 23#define CFI_OFFSET .cfi_offset
24#define CFI_REL_OFFSET .cfi_rel_offset 24#define CFI_REL_OFFSET .cfi_rel_offset
25#define CFI_REGISTER .cfi_register 25#define CFI_REGISTER .cfi_register
26#define CFI_RESTORE .cfi_restore 26#define CFI_RESTORE .cfi_restore
27#define CFI_REMEMBER_STATE .cfi_remember_state 27#define CFI_REMEMBER_STATE .cfi_remember_state
28#define CFI_RESTORE_STATE .cfi_restore_state 28#define CFI_RESTORE_STATE .cfi_restore_state
29#define CFI_UNDEFINED .cfi_undefined 29#define CFI_UNDEFINED .cfi_undefined
30 30
31#ifdef CONFIG_AS_CFI_SIGNAL_FRAME 31#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
32#define CFI_SIGNAL_FRAME .cfi_signal_frame 32#define CFI_SIGNAL_FRAME .cfi_signal_frame
33#else 33#else
34#define CFI_SIGNAL_FRAME 34#define CFI_SIGNAL_FRAME
35#endif 35#endif
36 36
37#else 37#else
38 38
39/* Due to the structure of pre-exisiting code, don't use assembler line 39/*
40 comment character # to ignore the arguments. Instead, use a dummy macro. */ 40 * Due to the structure of pre-exisiting code, don't use assembler line
41 * comment character # to ignore the arguments. Instead, use a dummy macro.
42 */
41.macro cfi_ignore a=0, b=0, c=0, d=0 43.macro cfi_ignore a=0, b=0, c=0, d=0
42.endm 44.endm
43 45
44#define CFI_STARTPROC cfi_ignore 46#define CFI_STARTPROC cfi_ignore
45#define CFI_ENDPROC cfi_ignore 47#define CFI_ENDPROC cfi_ignore
46#define CFI_DEF_CFA cfi_ignore 48#define CFI_DEF_CFA cfi_ignore
47#define CFI_DEF_CFA_REGISTER cfi_ignore 49#define CFI_DEF_CFA_REGISTER cfi_ignore
48#define CFI_DEF_CFA_OFFSET cfi_ignore 50#define CFI_DEF_CFA_OFFSET cfi_ignore
49#define CFI_ADJUST_CFA_OFFSET cfi_ignore 51#define CFI_ADJUST_CFA_OFFSET cfi_ignore
50#define CFI_OFFSET cfi_ignore 52#define CFI_OFFSET cfi_ignore
51#define CFI_REL_OFFSET cfi_ignore 53#define CFI_REL_OFFSET cfi_ignore
52#define CFI_REGISTER cfi_ignore 54#define CFI_REGISTER cfi_ignore
53#define CFI_RESTORE cfi_ignore 55#define CFI_RESTORE cfi_ignore
54#define CFI_REMEMBER_STATE cfi_ignore 56#define CFI_REMEMBER_STATE cfi_ignore
55#define CFI_RESTORE_STATE cfi_ignore 57#define CFI_RESTORE_STATE cfi_ignore
56#define CFI_UNDEFINED cfi_ignore 58#define CFI_UNDEFINED cfi_ignore
57#define CFI_SIGNAL_FRAME cfi_ignore 59#define CFI_SIGNAL_FRAME cfi_ignore
58 60
59#endif 61#endif
60 62
63/*
64 * An attempt to make CFI annotations more or less
65 * correct and shorter. It is implied that you know
66 * what you're doing if you use them.
67 */
68#ifdef __ASSEMBLY__
69#ifdef CONFIG_X86_64
70 .macro pushq_cfi reg
71 pushq \reg
72 CFI_ADJUST_CFA_OFFSET 8
73 .endm
74
75 .macro popq_cfi reg
76 popq \reg
77 CFI_ADJUST_CFA_OFFSET -8
78 .endm
79
80 .macro movq_cfi reg offset=0
81 movq %\reg, \offset(%rsp)
82 CFI_REL_OFFSET \reg, \offset
83 .endm
84
85 .macro movq_cfi_restore offset reg
86 movq \offset(%rsp), %\reg
87 CFI_RESTORE \reg
88 .endm
89#else /*!CONFIG_X86_64*/
90
91 /* 32bit defenitions are missed yet */
92
93#endif /*!CONFIG_X86_64*/
94#endif /*__ASSEMBLY__*/
95
61#endif /* _ASM_X86_DWARF2_H */ 96#endif /* _ASM_X86_DWARF2_H */
diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h
index 94826cf87455..cc70c1c78ca4 100644
--- a/arch/x86/include/asm/emergency-restart.h
+++ b/arch/x86/include/asm/emergency-restart.h
@@ -8,7 +8,9 @@ enum reboot_type {
8 BOOT_BIOS = 'b', 8 BOOT_BIOS = 'b',
9#endif 9#endif
10 BOOT_ACPI = 'a', 10 BOOT_ACPI = 'a',
11 BOOT_EFI = 'e' 11 BOOT_EFI = 'e',
12 BOOT_CF9 = 'p',
13 BOOT_CF9_COND = 'q',
12}; 14};
13 15
14extern enum reboot_type reboot_type; 16extern enum reboot_type reboot_type;
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index 380f0b4f17ed..e24ef876915f 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -9,31 +9,27 @@ static inline int apic_id_registered(void)
9 return (1); 9 return (1);
10} 10}
11 11
12static inline cpumask_t target_cpus(void) 12static inline cpumask_t target_cpus_cluster(void)
13{ 13{
14#if defined CONFIG_ES7000_CLUSTERED_APIC
15 return CPU_MASK_ALL; 14 return CPU_MASK_ALL;
16#else 15}
16
17static inline cpumask_t target_cpus(void)
18{
17 return cpumask_of_cpu(smp_processor_id()); 19 return cpumask_of_cpu(smp_processor_id());
18#endif
19} 20}
20 21
21#if defined CONFIG_ES7000_CLUSTERED_APIC 22#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER)
22#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) 23#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio)
23#define INT_DELIVERY_MODE (dest_LowestPrio) 24#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */
24#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ 25#define NO_BALANCE_IRQ_CLUSTER (1)
25#define NO_BALANCE_IRQ (1) 26
26#undef WAKE_SECONDARY_VIA_INIT
27#define WAKE_SECONDARY_VIA_MIP
28#else
29#define APIC_DFR_VALUE (APIC_DFR_FLAT) 27#define APIC_DFR_VALUE (APIC_DFR_FLAT)
30#define INT_DELIVERY_MODE (dest_Fixed) 28#define INT_DELIVERY_MODE (dest_Fixed)
31#define INT_DEST_MODE (0) /* phys delivery to target procs */ 29#define INT_DEST_MODE (0) /* phys delivery to target procs */
32#define NO_BALANCE_IRQ (0) 30#define NO_BALANCE_IRQ (0)
33#undef APIC_DEST_LOGICAL 31#undef APIC_DEST_LOGICAL
34#define APIC_DEST_LOGICAL 0x0 32#define APIC_DEST_LOGICAL 0x0
35#define WAKE_SECONDARY_VIA_INIT
36#endif
37 33
38static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) 34static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
39{ 35{
@@ -60,6 +56,16 @@ static inline unsigned long calculate_ldr(int cpu)
60 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel 56 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
61 * document number 292116). So here it goes... 57 * document number 292116). So here it goes...
62 */ 58 */
59static inline void init_apic_ldr_cluster(void)
60{
61 unsigned long val;
62 int cpu = smp_processor_id();
63
64 apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER);
65 val = calculate_ldr(cpu);
66 apic_write(APIC_LDR, val);
67}
68
63static inline void init_apic_ldr(void) 69static inline void init_apic_ldr(void)
64{ 70{
65 unsigned long val; 71 unsigned long val;
@@ -70,10 +76,6 @@ static inline void init_apic_ldr(void)
70 apic_write(APIC_LDR, val); 76 apic_write(APIC_LDR, val);
71} 77}
72 78
73#ifndef CONFIG_X86_GENERICARCH
74extern void enable_apic_mode(void);
75#endif
76
77extern int apic_version [MAX_APICS]; 79extern int apic_version [MAX_APICS];
78static inline void setup_apic_routing(void) 80static inline void setup_apic_routing(void)
79{ 81{
@@ -144,7 +146,7 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid)
144 return (1); 146 return (1);
145} 147}
146 148
147static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 149static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
148{ 150{
149 int num_bits_set; 151 int num_bits_set;
150 int cpus_found = 0; 152 int cpus_found = 0;
@@ -154,11 +156,7 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
154 num_bits_set = cpus_weight(cpumask); 156 num_bits_set = cpus_weight(cpumask);
155 /* Return id to all */ 157 /* Return id to all */
156 if (num_bits_set == NR_CPUS) 158 if (num_bits_set == NR_CPUS)
157#if defined CONFIG_ES7000_CLUSTERED_APIC
158 return 0xFF; 159 return 0xFF;
159#else
160 return cpu_to_logical_apicid(0);
161#endif
162 /* 160 /*
163 * The cpus in the mask must all be on the apic cluster. If are not 161 * The cpus in the mask must all be on the apic cluster. If are not
164 * on the same apicid cluster return default value of TARGET_CPUS. 162 * on the same apicid cluster return default value of TARGET_CPUS.
@@ -171,11 +169,40 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
171 if (apicid_cluster(apicid) != 169 if (apicid_cluster(apicid) !=
172 apicid_cluster(new_apicid)){ 170 apicid_cluster(new_apicid)){
173 printk ("%s: Not a valid mask!\n", __func__); 171 printk ("%s: Not a valid mask!\n", __func__);
174#if defined CONFIG_ES7000_CLUSTERED_APIC
175 return 0xFF; 172 return 0xFF;
176#else 173 }
174 apicid = new_apicid;
175 cpus_found++;
176 }
177 cpu++;
178 }
179 return apicid;
180}
181
182static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
183{
184 int num_bits_set;
185 int cpus_found = 0;
186 int cpu;
187 int apicid;
188
189 num_bits_set = cpus_weight(cpumask);
190 /* Return id to all */
191 if (num_bits_set == NR_CPUS)
192 return cpu_to_logical_apicid(0);
193 /*
194 * The cpus in the mask must all be on the apic cluster. If are not
195 * on the same apicid cluster return default value of TARGET_CPUS.
196 */
197 cpu = first_cpu(cpumask);
198 apicid = cpu_to_logical_apicid(cpu);
199 while (cpus_found < num_bits_set) {
200 if (cpu_isset(cpu, cpumask)) {
201 int new_apicid = cpu_to_logical_apicid(cpu);
202 if (apicid_cluster(apicid) !=
203 apicid_cluster(new_apicid)){
204 printk ("%s: Not a valid mask!\n", __func__);
177 return cpu_to_logical_apicid(0); 205 return cpu_to_logical_apicid(0);
178#endif
179 } 206 }
180 apicid = new_apicid; 207 apicid = new_apicid;
181 cpus_found++; 208 cpus_found++;
diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h
index 398493461913..78f0daaee436 100644
--- a/arch/x86/include/asm/es7000/wakecpu.h
+++ b/arch/x86/include/asm/es7000/wakecpu.h
@@ -1,36 +1,12 @@
1#ifndef __ASM_ES7000_WAKECPU_H 1#ifndef __ASM_ES7000_WAKECPU_H
2#define __ASM_ES7000_WAKECPU_H 2#define __ASM_ES7000_WAKECPU_H
3 3
4/* 4#define TRAMPOLINE_PHYS_LOW 0x467
5 * This file copes with machines that wakeup secondary CPUs by the 5#define TRAMPOLINE_PHYS_HIGH 0x469
6 * INIT, INIT, STARTUP sequence.
7 */
8
9#ifdef CONFIG_ES7000_CLUSTERED_APIC
10#define WAKE_SECONDARY_VIA_MIP
11#else
12#define WAKE_SECONDARY_VIA_INIT
13#endif
14
15#ifdef WAKE_SECONDARY_VIA_MIP
16extern int es7000_start_cpu(int cpu, unsigned long eip);
17static inline int
18wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
19{
20 int boot_error = 0;
21 boot_error = es7000_start_cpu(phys_apicid, start_eip);
22 return boot_error;
23}
24#endif
25
26#define TRAMPOLINE_LOW phys_to_virt(0x467)
27#define TRAMPOLINE_HIGH phys_to_virt(0x469)
28
29#define boot_cpu_apicid boot_cpu_physical_apicid
30 6
31static inline void wait_for_init_deassert(atomic_t *deassert) 7static inline void wait_for_init_deassert(atomic_t *deassert)
32{ 8{
33#ifdef WAKE_SECONDARY_VIA_INIT 9#ifndef CONFIG_ES7000_CLUSTERED_APIC
34 while (!atomic_read(deassert)) 10 while (!atomic_read(deassert))
35 cpu_relax(); 11 cpu_relax();
36#endif 12#endif
@@ -50,9 +26,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
50{ 26{
51} 27}
52 28
53#define inquire_remote_apic(apicid) do { \ 29extern void __inquire_remote_apic(int apicid);
54 if (apic_verbosity >= APIC_DEBUG) \ 30
55 __inquire_remote_apic(apicid); \ 31static inline void inquire_remote_apic(int apicid)
56 } while (0) 32{
33 if (apic_verbosity >= APIC_DEBUG)
34 __inquire_remote_apic(apicid);
35}
57 36
58#endif /* __ASM_MACH_WAKECPU_H */ 37#endif /* __ASM_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 74252264433d..6cfdafa409d8 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -29,6 +29,39 @@ extern int fix_aperture;
29#define AMD64_GARTCACHECTL 0x9c 29#define AMD64_GARTCACHECTL 0x9c
30#define AMD64_GARTEN (1<<0) 30#define AMD64_GARTEN (1<<0)
31 31
32#ifdef CONFIG_GART_IOMMU
33extern int gart_iommu_aperture;
34extern int gart_iommu_aperture_allowed;
35extern int gart_iommu_aperture_disabled;
36
37extern void early_gart_iommu_check(void);
38extern void gart_iommu_init(void);
39extern void gart_iommu_shutdown(void);
40extern void __init gart_parse_options(char *);
41extern void gart_iommu_hole_init(void);
42
43#else
44#define gart_iommu_aperture 0
45#define gart_iommu_aperture_allowed 0
46#define gart_iommu_aperture_disabled 1
47
48static inline void early_gart_iommu_check(void)
49{
50}
51static inline void gart_iommu_init(void)
52{
53}
54static inline void gart_iommu_shutdown(void)
55{
56}
57static inline void gart_parse_options(char *options)
58{
59}
60static inline void gart_iommu_hole_init(void)
61{
62}
63#endif
64
32extern int agp_amd64_init(void); 65extern int agp_amd64_init(void);
33 66
34static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) 67static inline void enable_gart_translation(struct pci_dev *dev, u64 addr)
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 5cbd4fcc06fd..0ac17d33a8c7 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -2,6 +2,7 @@
2#define _ASM_X86_GENAPIC_32_H 2#define _ASM_X86_GENAPIC_32_H
3 3
4#include <asm/mpspec.h> 4#include <asm/mpspec.h>
5#include <asm/atomic.h>
5 6
6/* 7/*
7 * Generic APIC driver interface. 8 * Generic APIC driver interface.
@@ -65,6 +66,14 @@ struct genapic {
65 void (*send_IPI_allbutself)(int vector); 66 void (*send_IPI_allbutself)(int vector);
66 void (*send_IPI_all)(int vector); 67 void (*send_IPI_all)(int vector);
67#endif 68#endif
69 int (*wakeup_cpu)(int apicid, unsigned long start_eip);
70 int trampoline_phys_low;
71 int trampoline_phys_high;
72 void (*wait_for_init_deassert)(atomic_t *deassert);
73 void (*smp_callin_clear_local_apic)(void);
74 void (*store_NMI_vector)(unsigned short *high, unsigned short *low);
75 void (*restore_NMI_vector)(unsigned short *high, unsigned short *low);
76 void (*inquire_remote_apic)(int apicid);
68}; 77};
69 78
70#define APICFUNC(x) .x = x, 79#define APICFUNC(x) .x = x,
@@ -105,16 +114,24 @@ struct genapic {
105 APICFUNC(get_apic_id) \ 114 APICFUNC(get_apic_id) \
106 .apic_id_mask = APIC_ID_MASK, \ 115 .apic_id_mask = APIC_ID_MASK, \
107 APICFUNC(cpu_mask_to_apicid) \ 116 APICFUNC(cpu_mask_to_apicid) \
108 APICFUNC(vector_allocation_domain) \ 117 APICFUNC(vector_allocation_domain) \
109 APICFUNC(acpi_madt_oem_check) \ 118 APICFUNC(acpi_madt_oem_check) \
110 IPIFUNC(send_IPI_mask) \ 119 IPIFUNC(send_IPI_mask) \
111 IPIFUNC(send_IPI_allbutself) \ 120 IPIFUNC(send_IPI_allbutself) \
112 IPIFUNC(send_IPI_all) \ 121 IPIFUNC(send_IPI_all) \
113 APICFUNC(enable_apic_mode) \ 122 APICFUNC(enable_apic_mode) \
114 APICFUNC(phys_pkg_id) \ 123 APICFUNC(phys_pkg_id) \
124 .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \
125 .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \
126 APICFUNC(wait_for_init_deassert) \
127 APICFUNC(smp_callin_clear_local_apic) \
128 APICFUNC(store_NMI_vector) \
129 APICFUNC(restore_NMI_vector) \
130 APICFUNC(inquire_remote_apic) \
115} 131}
116 132
117extern struct genapic *genapic; 133extern struct genapic *genapic;
134extern void es7000_update_genapic_to_cluster(void);
118 135
119enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; 136enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
120#define get_uv_system_type() UV_NONE 137#define get_uv_system_type() UV_NONE
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 13c4e96199ea..2cae011668b7 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -32,6 +32,8 @@ struct genapic {
32 unsigned int (*get_apic_id)(unsigned long x); 32 unsigned int (*get_apic_id)(unsigned long x);
33 unsigned long (*set_apic_id)(unsigned int id); 33 unsigned long (*set_apic_id)(unsigned int id);
34 unsigned long apic_id_mask; 34 unsigned long apic_id_mask;
35 /* wakeup_secondary_cpu */
36 int (*wakeup_cpu)(int apicid, unsigned long start_eip);
35}; 37};
36 38
37extern struct genapic *genapic; 39extern struct genapic *genapic;
diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h
index 5ca135e72f2b..cf7954d1405f 100644
--- a/arch/x86/include/asm/hardirq_32.h
+++ b/arch/x86/include/asm/hardirq_32.h
@@ -22,6 +22,8 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
22#define __ARCH_IRQ_STAT 22#define __ARCH_IRQ_STAT
23#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) 23#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
24 24
25#define inc_irq_stat(member) (__get_cpu_var(irq_stat).member++)
26
25void ack_bad_irq(unsigned int irq); 27void ack_bad_irq(unsigned int irq);
26#include <linux/irq_cpustat.h> 28#include <linux/irq_cpustat.h>
27 29
diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h
index 1ba381fc51d3..b5a6b5d56704 100644
--- a/arch/x86/include/asm/hardirq_64.h
+++ b/arch/x86/include/asm/hardirq_64.h
@@ -11,6 +11,8 @@
11 11
12#define __ARCH_IRQ_STAT 1 12#define __ARCH_IRQ_STAT 1
13 13
14#define inc_irq_stat(member) add_pda(member, 1)
15
14#define local_softirq_pending() read_pda(__softirq_pending) 16#define local_softirq_pending() read_pda(__softirq_pending)
15 17
16#define __ARCH_SET_SOFTIRQ_PENDING 1 18#define __ARCH_SET_SOFTIRQ_PENDING 1
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b97aecb0b61d..8de644b6b959 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -109,9 +109,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
109#endif 109#endif
110#endif 110#endif
111 111
112#ifdef CONFIG_X86_32 112extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
113extern void (*const interrupt[NR_VECTORS])(void);
114#endif
115 113
116typedef int vector_irq_t[NR_VECTORS]; 114typedef int vector_irq_t[NR_VECTORS];
117DECLARE_PER_CPU(vector_irq_t, vector_irq); 115DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
new file mode 100644
index 000000000000..369f5c5d09a1
--- /dev/null
+++ b/arch/x86/include/asm/hypervisor.h
@@ -0,0 +1,26 @@
1/*
2 * Copyright (C) 2008, VMware, Inc.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
12 * NON INFRINGEMENT. See the GNU General Public License for more
13 * details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 */
20#ifndef ASM_X86__HYPERVISOR_H
21#define ASM_X86__HYPERVISOR_H
22
23extern unsigned long get_hypervisor_tsc_freq(void);
24extern void init_hypervisor(struct cpuinfo_x86 *c);
25
26#endif
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 97989c0e534c..50ca486fd88c 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -129,24 +129,6 @@ typedef struct compat_siginfo {
129 } _sifields; 129 } _sifields;
130} compat_siginfo_t; 130} compat_siginfo_t;
131 131
132struct sigframe32 {
133 u32 pretcode;
134 int sig;
135 struct sigcontext_ia32 sc;
136 struct _fpstate_ia32 fpstate;
137 unsigned int extramask[_COMPAT_NSIG_WORDS-1];
138};
139
140struct rt_sigframe32 {
141 u32 pretcode;
142 int sig;
143 u32 pinfo;
144 u32 puc;
145 compat_siginfo_t info;
146 struct ucontext_ia32 uc;
147 struct _fpstate_ia32 fpstate;
148};
149
150struct ustat32 { 132struct ustat32 {
151 __u32 f_tfree; 133 __u32 f_tfree;
152 compat_ino_t f_tinode; 134 compat_ino_t f_tinode;
diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h
index 44c89c3a23e9..38d87379e270 100644
--- a/arch/x86/include/asm/idle.h
+++ b/arch/x86/include/asm/idle.h
@@ -8,8 +8,13 @@ struct notifier_block;
8void idle_notifier_register(struct notifier_block *n); 8void idle_notifier_register(struct notifier_block *n);
9void idle_notifier_unregister(struct notifier_block *n); 9void idle_notifier_unregister(struct notifier_block *n);
10 10
11#ifdef CONFIG_X86_64
11void enter_idle(void); 12void enter_idle(void);
12void exit_idle(void); 13void exit_idle(void);
14#else /* !CONFIG_X86_64 */
15static inline void enter_idle(void) { }
16static inline void exit_idle(void) { }
17#endif /* CONFIG_X86_64 */
13 18
14void c1e_remove_cpu(int cpu); 19void c1e_remove_cpu(int cpu);
15 20
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index ac2abc88cd95..05cfed4485fa 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -4,6 +4,7 @@
4#define ARCH_HAS_IOREMAP_WC 4#define ARCH_HAS_IOREMAP_WC
5 5
6#include <linux/compiler.h> 6#include <linux/compiler.h>
7#include <asm-generic/int-ll64.h>
7 8
8#define build_mmio_read(name, size, type, reg, barrier) \ 9#define build_mmio_read(name, size, type, reg, barrier) \
9static inline type name(const volatile void __iomem *addr) \ 10static inline type name(const volatile void __iomem *addr) \
@@ -45,21 +46,39 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
45#define mmiowb() barrier() 46#define mmiowb() barrier()
46 47
47#ifdef CONFIG_X86_64 48#ifdef CONFIG_X86_64
49
48build_mmio_read(readq, "q", unsigned long, "=r", :"memory") 50build_mmio_read(readq, "q", unsigned long, "=r", :"memory")
49build_mmio_read(__readq, "q", unsigned long, "=r", )
50build_mmio_write(writeq, "q", unsigned long, "r", :"memory") 51build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
51build_mmio_write(__writeq, "q", unsigned long, "r", )
52 52
53#define readq_relaxed(a) __readq(a) 53#else
54#define __raw_readq __readq 54
55#define __raw_writeq writeq 55static inline __u64 readq(const volatile void __iomem *addr)
56{
57 const volatile u32 __iomem *p = addr;
58 u32 low, high;
59
60 low = readl(p);
61 high = readl(p + 1);
62
63 return low + ((u64)high << 32);
64}
65
66static inline void writeq(__u64 val, volatile void __iomem *addr)
67{
68 writel(val, addr);
69 writel(val >> 32, addr+4);
70}
56 71
57/* Let people know we have them */
58#define readq readq
59#define writeq writeq
60#endif 72#endif
61 73
62extern int iommu_bio_merge; 74#define readq_relaxed(a) readq(a)
75
76#define __raw_readq(a) readq(a)
77#define __raw_writeq(val, addr) writeq(val, addr)
78
79/* Let people know that we have them */
80#define readq readq
81#define writeq writeq
63 82
64#ifdef CONFIG_X86_32 83#ifdef CONFIG_X86_32
65# include "io_32.h" 84# include "io_32.h"
diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h
index fea325a1122f..563c16270ba6 100644
--- a/arch/x86/include/asm/io_64.h
+++ b/arch/x86/include/asm/io_64.h
@@ -232,8 +232,6 @@ void memset_io(volatile void __iomem *a, int b, size_t c);
232 232
233#define flush_write_buffers() 233#define flush_write_buffers()
234 234
235#define BIO_VMERGE_BOUNDARY iommu_bio_merge
236
237/* 235/*
238 * Convert a virtual cached pointer to an uncached pointer 236 * Convert a virtual cached pointer to an uncached pointer
239 */ 237 */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 6afd9933a7dd..e475e009ae5d 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -156,11 +156,21 @@ extern int sis_apic_bug;
156/* 1 if "noapic" boot option passed */ 156/* 1 if "noapic" boot option passed */
157extern int skip_ioapic_setup; 157extern int skip_ioapic_setup;
158 158
159/* 1 if "noapic" boot option passed */
160extern int noioapicquirk;
161
162/* -1 if "noapic" boot option passed */
163extern int noioapicreroute;
164
159/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ 165/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */
160extern int timer_through_8259; 166extern int timer_through_8259;
161 167
162static inline void disable_ioapic_setup(void) 168static inline void disable_ioapic_setup(void)
163{ 169{
170#ifdef CONFIG_PCI
171 noioapicquirk = 1;
172 noioapicreroute = -1;
173#endif
164 skip_ioapic_setup = 1; 174 skip_ioapic_setup = 1;
165} 175}
166 176
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 0b500c5b6446..295b13193f4d 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -12,37 +12,4 @@ extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len);
12/* 10 seconds */ 12/* 10 seconds */
13#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) 13#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
14 14
15#ifdef CONFIG_GART_IOMMU
16extern int gart_iommu_aperture;
17extern int gart_iommu_aperture_allowed;
18extern int gart_iommu_aperture_disabled;
19
20extern void early_gart_iommu_check(void);
21extern void gart_iommu_init(void);
22extern void gart_iommu_shutdown(void);
23extern void __init gart_parse_options(char *);
24extern void gart_iommu_hole_init(void);
25
26#else
27#define gart_iommu_aperture 0
28#define gart_iommu_aperture_allowed 0
29#define gart_iommu_aperture_disabled 1
30
31static inline void early_gart_iommu_check(void)
32{
33}
34static inline void gart_iommu_init(void)
35{
36}
37static inline void gart_iommu_shutdown(void)
38{
39}
40static inline void gart_parse_options(char *options)
41{
42}
43static inline void gart_iommu_hole_init(void)
44{
45}
46#endif
47
48#endif /* _ASM_X86_IOMMU_H */ 15#endif /* _ASM_X86_IOMMU_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index bae0eda95486..28e409fc73f3 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -31,10 +31,6 @@ static inline int irq_canonicalize(int irq)
31# endif 31# endif
32#endif 32#endif
33 33
34#ifdef CONFIG_IRQBALANCE
35extern int irqbalance_disable(char *str);
36#endif
37
38#ifdef CONFIG_HOTPLUG_CPU 34#ifdef CONFIG_HOTPLUG_CPU
39#include <linux/cpumask.h> 35#include <linux/cpumask.h>
40extern void fixup_irqs(cpumask_t map); 36extern void fixup_irqs(cpumask_t map);
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
index af2f02d27fc7..86afd7473457 100644
--- a/arch/x86/include/asm/irq_regs_32.h
+++ b/arch/x86/include/asm/irq_regs_32.h
@@ -9,6 +9,8 @@
9 9
10#include <asm/percpu.h> 10#include <asm/percpu.h>
11 11
12#define ARCH_HAS_OWN_IRQ_REGS
13
12DECLARE_PER_CPU(struct pt_regs *, irq_regs); 14DECLARE_PER_CPU(struct pt_regs *, irq_regs);
13 15
14static inline struct pt_regs *get_irq_regs(void) 16static inline struct pt_regs *get_irq_regs(void)
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index a1f22771a15a..c61d8b2ab8b9 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -5,21 +5,8 @@
5# define PA_CONTROL_PAGE 0 5# define PA_CONTROL_PAGE 0
6# define VA_CONTROL_PAGE 1 6# define VA_CONTROL_PAGE 1
7# define PA_PGD 2 7# define PA_PGD 2
8# define VA_PGD 3 8# define PA_SWAP_PAGE 3
9# define PA_PTE_0 4 9# define PAGES_NR 4
10# define VA_PTE_0 5
11# define PA_PTE_1 6
12# define VA_PTE_1 7
13# define PA_SWAP_PAGE 8
14# ifdef CONFIG_X86_PAE
15# define PA_PMD_0 9
16# define VA_PMD_0 10
17# define PA_PMD_1 11
18# define VA_PMD_1 12
19# define PAGES_NR 13
20# else
21# define PAGES_NR 9
22# endif
23#else 10#else
24# define PA_CONTROL_PAGE 0 11# define PA_CONTROL_PAGE 0
25# define VA_CONTROL_PAGE 1 12# define VA_CONTROL_PAGE 1
@@ -170,6 +157,20 @@ relocate_kernel(unsigned long indirection_page,
170 unsigned long start_address) ATTRIB_NORET; 157 unsigned long start_address) ATTRIB_NORET;
171#endif 158#endif
172 159
160#ifdef CONFIG_X86_32
161#define ARCH_HAS_KIMAGE_ARCH
162
163struct kimage_arch {
164 pgd_t *pgd;
165#ifdef CONFIG_X86_PAE
166 pmd_t *pmd0;
167 pmd_t *pmd1;
168#endif
169 pte_t *pte0;
170 pte_t *pte1;
171};
172#endif
173
173#endif /* __ASSEMBLY__ */ 174#endif /* __ASSEMBLY__ */
174 175
175#endif /* _ASM_X86_KEXEC_H */ 176#endif /* _ASM_X86_KEXEC_H */
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index f61ee8f937e4..5d98d0b68ffc 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -57,5 +57,65 @@
57#define __ALIGN_STR ".align 16,0x90" 57#define __ALIGN_STR ".align 16,0x90"
58#endif 58#endif
59 59
60/*
61 * to check ENTRY_X86/END_X86 and
62 * KPROBE_ENTRY_X86/KPROBE_END_X86
63 * unbalanced-missed-mixed appearance
64 */
65#define __set_entry_x86 .set ENTRY_X86_IN, 0
66#define __unset_entry_x86 .set ENTRY_X86_IN, 1
67#define __set_kprobe_x86 .set KPROBE_X86_IN, 0
68#define __unset_kprobe_x86 .set KPROBE_X86_IN, 1
69
70#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed"
71
72#define __check_entry_x86 \
73 .ifdef ENTRY_X86_IN; \
74 .ifeq ENTRY_X86_IN; \
75 __macro_err_x86; \
76 .abort; \
77 .endif; \
78 .endif
79
80#define __check_kprobe_x86 \
81 .ifdef KPROBE_X86_IN; \
82 .ifeq KPROBE_X86_IN; \
83 __macro_err_x86; \
84 .abort; \
85 .endif; \
86 .endif
87
88#define __check_entry_kprobe_x86 \
89 __check_entry_x86; \
90 __check_kprobe_x86
91
92#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86
93
94#define ENTRY_X86(name) \
95 __check_entry_kprobe_x86; \
96 __set_entry_x86; \
97 .globl name; \
98 __ALIGN; \
99 name:
100
101#define END_X86(name) \
102 __unset_entry_x86; \
103 __check_entry_kprobe_x86; \
104 .size name, .-name
105
106#define KPROBE_ENTRY_X86(name) \
107 __check_entry_kprobe_x86; \
108 __set_kprobe_x86; \
109 .pushsection .kprobes.text, "ax"; \
110 .globl name; \
111 __ALIGN; \
112 name:
113
114#define KPROBE_END_X86(name) \
115 __unset_kprobe_x86; \
116 __check_entry_kprobe_x86; \
117 .size name, .-name; \
118 .popsection
119
60#endif /* _ASM_X86_LINKAGE_H */ 120#endif /* _ASM_X86_LINKAGE_H */
61 121
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index ff3a6c236c00..6cb3a467e067 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -32,11 +32,13 @@ static inline cpumask_t target_cpus(void)
32#define vector_allocation_domain (genapic->vector_allocation_domain) 32#define vector_allocation_domain (genapic->vector_allocation_domain)
33#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) 33#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
34#define send_IPI_self (genapic->send_IPI_self) 34#define send_IPI_self (genapic->send_IPI_self)
35#define wakeup_secondary_cpu (genapic->wakeup_cpu)
35extern void setup_apic_routing(void); 36extern void setup_apic_routing(void);
36#else 37#else
37#define INT_DELIVERY_MODE dest_LowestPrio 38#define INT_DELIVERY_MODE dest_LowestPrio
38#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ 39#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */
39#define TARGET_CPUS (target_cpus()) 40#define TARGET_CPUS (target_cpus())
41#define wakeup_secondary_cpu wakeup_secondary_cpu_via_init
40/* 42/*
41 * Set up the logical destination ID. 43 * Set up the logical destination ID.
42 * 44 *
diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h
index 9d80db91e992..ceb013660146 100644
--- a/arch/x86/include/asm/mach-default/mach_wakecpu.h
+++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h
@@ -1,17 +1,8 @@
1#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H 1#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
2#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H 2#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
3 3
4/* 4#define TRAMPOLINE_PHYS_LOW (0x467)
5 * This file copes with machines that wakeup secondary CPUs by the 5#define TRAMPOLINE_PHYS_HIGH (0x469)
6 * INIT, INIT, STARTUP sequence.
7 */
8
9#define WAKE_SECONDARY_VIA_INIT
10
11#define TRAMPOLINE_LOW phys_to_virt(0x467)
12#define TRAMPOLINE_HIGH phys_to_virt(0x469)
13
14#define boot_cpu_apicid boot_cpu_physical_apicid
15 6
16static inline void wait_for_init_deassert(atomic_t *deassert) 7static inline void wait_for_init_deassert(atomic_t *deassert)
17{ 8{
@@ -33,9 +24,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
33{ 24{
34} 25}
35 26
36#define inquire_remote_apic(apicid) do { \ 27extern void __inquire_remote_apic(int apicid);
37 if (apic_verbosity >= APIC_DEBUG) \ 28
38 __inquire_remote_apic(apicid); \ 29static inline void inquire_remote_apic(int apicid)
39 } while (0) 30{
31 if (apic_verbosity >= APIC_DEBUG)
32 __inquire_remote_apic(apicid);
33}
40 34
41#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */ 35#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/mach-default/smpboot_hooks.h
index dbab36d64d48..23bf52103b89 100644
--- a/arch/x86/include/asm/mach-default/smpboot_hooks.h
+++ b/arch/x86/include/asm/mach-default/smpboot_hooks.h
@@ -13,9 +13,11 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
13 CMOS_WRITE(0xa, 0xf); 13 CMOS_WRITE(0xa, 0xf);
14 local_flush_tlb(); 14 local_flush_tlb();
15 pr_debug("1.\n"); 15 pr_debug("1.\n");
16 *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; 16 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
17 start_eip >> 4;
17 pr_debug("2.\n"); 18 pr_debug("2.\n");
18 *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; 19 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
20 start_eip & 0xf;
19 pr_debug("3.\n"); 21 pr_debug("3.\n");
20} 22}
21 23
@@ -32,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
32 */ 34 */
33 CMOS_WRITE(0, 0xf); 35 CMOS_WRITE(0, 0xf);
34 36
35 *((volatile long *) phys_to_virt(0x467)) = 0; 37 *((volatile long *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
36} 38}
37 39
38static inline void __init smpboot_setup_io_apic(void) 40static inline void __init smpboot_setup_io_apic(void)
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h
index 5180bd7478fb..e430f47df667 100644
--- a/arch/x86/include/asm/mach-generic/mach_apic.h
+++ b/arch/x86/include/asm/mach-generic/mach_apic.h
@@ -27,6 +27,7 @@
27#define vector_allocation_domain (genapic->vector_allocation_domain) 27#define vector_allocation_domain (genapic->vector_allocation_domain)
28#define enable_apic_mode (genapic->enable_apic_mode) 28#define enable_apic_mode (genapic->enable_apic_mode)
29#define phys_pkg_id (genapic->phys_pkg_id) 29#define phys_pkg_id (genapic->phys_pkg_id)
30#define wakeup_secondary_cpu (genapic->wakeup_cpu)
30 31
31extern void generic_bigsmp_probe(void); 32extern void generic_bigsmp_probe(void);
32 33
diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h
new file mode 100644
index 000000000000..1ab16b168c8a
--- /dev/null
+++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h
@@ -0,0 +1,12 @@
1#ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
2#define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
3
4#define TRAMPOLINE_PHYS_LOW (genapic->trampoline_phys_low)
5#define TRAMPOLINE_PHYS_HIGH (genapic->trampoline_phys_high)
6#define wait_for_init_deassert (genapic->wait_for_init_deassert)
7#define smp_callin_clear_local_apic (genapic->smp_callin_clear_local_apic)
8#define store_NMI_vector (genapic->store_NMI_vector)
9#define restore_NMI_vector (genapic->restore_NMI_vector)
10#define inquire_remote_apic (genapic->inquire_remote_apic)
11
12#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h
index 8e10015781fb..7e98ce1d2c0e 100644
--- a/arch/x86/include/asm/mmu_context_32.h
+++ b/arch/x86/include/asm/mmu_context_32.h
@@ -4,9 +4,8 @@
4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
5{ 5{
6#ifdef CONFIG_SMP 6#ifdef CONFIG_SMP
7 unsigned cpu = smp_processor_id(); 7 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK)
8 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) 8 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY);
9 per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_LAZY;
10#endif 9#endif
11} 10}
12 11
@@ -20,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev,
20 /* stop flush ipis for the previous mm */ 19 /* stop flush ipis for the previous mm */
21 cpu_clear(cpu, prev->cpu_vm_mask); 20 cpu_clear(cpu, prev->cpu_vm_mask);
22#ifdef CONFIG_SMP 21#ifdef CONFIG_SMP
23 per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; 22 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
24 per_cpu(cpu_tlbstate, cpu).active_mm = next; 23 x86_write_percpu(cpu_tlbstate.active_mm, next);
25#endif 24#endif
26 cpu_set(cpu, next->cpu_vm_mask); 25 cpu_set(cpu, next->cpu_vm_mask);
27 26
@@ -36,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev,
36 } 35 }
37#ifdef CONFIG_SMP 36#ifdef CONFIG_SMP
38 else { 37 else {
39 per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; 38 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
40 BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next); 39 BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next);
41 40
42 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { 41 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
43 /* We were in lazy tlb mode and leave_mm disabled 42 /* We were in lazy tlb mode and leave_mm disabled
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e38859d577a1..cb58643947b9 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -85,7 +85,9 @@
85/* AMD64 MSRs. Not complete. See the architecture manual for a more 85/* AMD64 MSRs. Not complete. See the architecture manual for a more
86 complete list. */ 86 complete list. */
87 87
88#define MSR_AMD64_PATCH_LEVEL 0x0000008b
88#define MSR_AMD64_NB_CFG 0xc001001f 89#define MSR_AMD64_NB_CFG 0xc001001f
90#define MSR_AMD64_PATCH_LOADER 0xc0010020
89#define MSR_AMD64_IBSFETCHCTL 0xc0011030 91#define MSR_AMD64_IBSFETCHCTL 0xc0011030
90#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 92#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
91#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 93#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index c2a812ebde89..4640ddd58fb9 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -22,10 +22,10 @@ static inline unsigned long long native_read_tscp(unsigned int *aux)
22} 22}
23 23
24/* 24/*
25 * i386 calling convention returns 64-bit value in edx:eax, while 25 * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A"
26 * x86_64 returns at rax. Also, the "A" constraint does not really 26 * constraint has different meanings. For i386, "A" means exactly
27 * mean rdx:rax in x86_64, so we need specialized behaviour for each 27 * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead,
28 * architecture 28 * it means rax *or* rdx.
29 */ 29 */
30#ifdef CONFIG_X86_64 30#ifdef CONFIG_X86_64
31#define DECLARE_ARGS(val, low, high) unsigned low, high 31#define DECLARE_ARGS(val, low, high) unsigned low, high
@@ -181,10 +181,10 @@ static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
181} 181}
182 182
183#define rdtscl(low) \ 183#define rdtscl(low) \
184 ((low) = (u32)native_read_tsc()) 184 ((low) = (u32)__native_read_tsc())
185 185
186#define rdtscll(val) \ 186#define rdtscll(val) \
187 ((val) = native_read_tsc()) 187 ((val) = __native_read_tsc())
188 188
189#define rdpmc(counter, low, high) \ 189#define rdpmc(counter, low, high) \
190do { \ 190do { \
diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h
index c577bda5b1c5..6f499df8eddb 100644
--- a/arch/x86/include/asm/numaq/wakecpu.h
+++ b/arch/x86/include/asm/numaq/wakecpu.h
@@ -3,12 +3,8 @@
3 3
4/* This file copes with machines that wakeup secondary CPUs by NMIs */ 4/* This file copes with machines that wakeup secondary CPUs by NMIs */
5 5
6#define WAKE_SECONDARY_VIA_NMI 6#define TRAMPOLINE_PHYS_LOW (0x8)
7 7#define TRAMPOLINE_PHYS_HIGH (0xa)
8#define TRAMPOLINE_LOW phys_to_virt(0x8)
9#define TRAMPOLINE_HIGH phys_to_virt(0xa)
10
11#define boot_cpu_apicid boot_cpu_logical_apicid
12 8
13/* We don't do anything here because we use NMI's to boot instead */ 9/* We don't do anything here because we use NMI's to boot instead */
14static inline void wait_for_init_deassert(atomic_t *deassert) 10static inline void wait_for_init_deassert(atomic_t *deassert)
@@ -27,17 +23,23 @@ static inline void smp_callin_clear_local_apic(void)
27static inline void store_NMI_vector(unsigned short *high, unsigned short *low) 23static inline void store_NMI_vector(unsigned short *high, unsigned short *low)
28{ 24{
29 printk("Storing NMI vector\n"); 25 printk("Storing NMI vector\n");
30 *high = *((volatile unsigned short *) TRAMPOLINE_HIGH); 26 *high =
31 *low = *((volatile unsigned short *) TRAMPOLINE_LOW); 27 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH));
28 *low =
29 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW));
32} 30}
33 31
34static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) 32static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
35{ 33{
36 printk("Restoring NMI vector\n"); 34 printk("Restoring NMI vector\n");
37 *((volatile unsigned short *) TRAMPOLINE_HIGH) = *high; 35 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
38 *((volatile unsigned short *) TRAMPOLINE_LOW) = *low; 36 *high;
37 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
38 *low;
39} 39}
40 40
41#define inquire_remote_apic(apicid) {} 41static inline void inquire_remote_apic(int apicid)
42{
43}
42 44
43#endif /* __ASM_NUMAQ_WAKECPU_H */ 45#endif /* __ASM_NUMAQ_WAKECPU_H */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 875b38edf193..647781298e7e 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -19,6 +19,8 @@ struct pci_sysdata {
19}; 19};
20 20
21extern int pci_routeirq; 21extern int pci_routeirq;
22extern int noioapicquirk;
23extern int noioapicreroute;
22 24
23/* scan a bus after allocating a pci_sysdata for it */ 25/* scan a bus after allocating a pci_sysdata for it */
24extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, 26extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index b17edfd23628..e0d199fe1d83 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -56,23 +56,55 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp)
56#define pte_none(x) (!(x).pte_low) 56#define pte_none(x) (!(x).pte_low)
57 57
58/* 58/*
59 * Bits 0, 6 and 7 are taken, split up the 29 bits of offset 59 * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
60 * into this range: 60 * split up the 29 bits of offset into this range:
61 */ 61 */
62#define PTE_FILE_MAX_BITS 29 62#define PTE_FILE_MAX_BITS 29
63#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
64#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
65#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1)
66#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1)
67#else
68#define PTE_FILE_SHIFT2 (_PAGE_BIT_PROTNONE + 1)
69#define PTE_FILE_SHIFT3 (_PAGE_BIT_FILE + 1)
70#endif
71#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
72#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
63 73
64#define pte_to_pgoff(pte) \ 74#define pte_to_pgoff(pte) \
65 ((((pte).pte_low >> 1) & 0x1f) + (((pte).pte_low >> 8) << 5)) 75 ((((pte).pte_low >> PTE_FILE_SHIFT1) \
76 & ((1U << PTE_FILE_BITS1) - 1)) \
77 + ((((pte).pte_low >> PTE_FILE_SHIFT2) \
78 & ((1U << PTE_FILE_BITS2) - 1)) << PTE_FILE_BITS1) \
79 + (((pte).pte_low >> PTE_FILE_SHIFT3) \
80 << (PTE_FILE_BITS1 + PTE_FILE_BITS2)))
66 81
67#define pgoff_to_pte(off) \ 82#define pgoff_to_pte(off) \
68 ((pte_t) { .pte_low = (((off) & 0x1f) << 1) + \ 83 ((pte_t) { .pte_low = \
69 (((off) >> 5) << 8) + _PAGE_FILE }) 84 (((off) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \
85 + ((((off) >> PTE_FILE_BITS1) & ((1U << PTE_FILE_BITS2) - 1)) \
86 << PTE_FILE_SHIFT2) \
87 + (((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
88 << PTE_FILE_SHIFT3) \
89 + _PAGE_FILE })
70 90
71/* Encode and de-code a swap entry */ 91/* Encode and de-code a swap entry */
72#define __swp_type(x) (((x).val >> 1) & 0x1f) 92#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
73#define __swp_offset(x) ((x).val >> 8) 93#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
74#define __swp_entry(type, offset) \ 94#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
75 ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) 95#else
96#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1)
97#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
98#endif
99
100#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
101
102#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \
103 & ((1U << SWP_TYPE_BITS) - 1))
104#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT)
105#define __swp_entry(type, offset) ((swp_entry_t) { \
106 ((type) << (_PAGE_BIT_PRESENT + 1)) \
107 | ((offset) << SWP_OFFSET_SHIFT) })
76#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) 108#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
77#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) 109#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
78 110
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 52597aeadfff..447da43cddb3 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -166,6 +166,7 @@ static inline int pte_none(pte_t pte)
166#define PTE_FILE_MAX_BITS 32 166#define PTE_FILE_MAX_BITS 32
167 167
168/* Encode and de-code a swap entry */ 168/* Encode and de-code a swap entry */
169#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
169#define __swp_type(x) (((x).val) & 0x1f) 170#define __swp_type(x) (((x).val) & 0x1f)
170#define __swp_offset(x) ((x).val >> 5) 171#define __swp_offset(x) ((x).val >> 5)
171#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) 172#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5})
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index c012f3b11671..83e69f4a37f0 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -10,7 +10,6 @@
10#define _PAGE_BIT_PCD 4 /* page cache disabled */ 10#define _PAGE_BIT_PCD 4 /* page cache disabled */
11#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */ 11#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */
12#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */ 12#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */
13#define _PAGE_BIT_FILE 6
14#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ 13#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
15#define _PAGE_BIT_PAT 7 /* on 4KB pages */ 14#define _PAGE_BIT_PAT 7 /* on 4KB pages */
16#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ 15#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
@@ -22,6 +21,12 @@
22#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 21#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1
23#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ 22#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
24 23
24/* If _PAGE_BIT_PRESENT is clear, we use these: */
25/* - if the user mapped it with PROT_NONE; pte_present gives true */
26#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
27/* - set: nonlinear file mapping, saved PTE; unset:swap */
28#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY
29
25#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) 30#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
26#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) 31#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW)
27#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) 32#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER)
@@ -46,11 +51,8 @@
46#define _PAGE_NX (_AT(pteval_t, 0)) 51#define _PAGE_NX (_AT(pteval_t, 0))
47#endif 52#endif
48 53
49/* If _PAGE_PRESENT is clear, we use these: */ 54#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
50#define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping, 55#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
51 * saved PTE; unset:swap */
52#define _PAGE_PROTNONE _PAGE_PSE /* if the user mapped it with PROT_NONE;
53 pte_present gives true */
54 56
55#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ 57#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
56 _PAGE_ACCESSED | _PAGE_DIRTY) 58 _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -158,8 +160,19 @@
158#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ 160#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */
159#endif 161#endif
160 162
163/*
164 * Macro to mark a page protection value as UC-
165 */
166#define pgprot_noncached(prot) \
167 ((boot_cpu_data.x86 > 3) \
168 ? (__pgprot(pgprot_val(prot) | _PAGE_CACHE_UC_MINUS)) \
169 : (prot))
170
161#ifndef __ASSEMBLY__ 171#ifndef __ASSEMBLY__
162 172
173#define pgprot_writecombine pgprot_writecombine
174extern pgprot_t pgprot_writecombine(pgprot_t prot);
175
163/* 176/*
164 * ZERO_PAGE is a global shared page that is always zero: used 177 * ZERO_PAGE is a global shared page that is always zero: used
165 * for zero-mapped memory areas etc.. 178 * for zero-mapped memory areas etc..
@@ -329,6 +342,9 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
329#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) 342#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
330 343
331#ifndef __ASSEMBLY__ 344#ifndef __ASSEMBLY__
345/* Indicate that x86 has its own track and untrack pfn vma functions */
346#define __HAVE_PFNMAP_TRACKING
347
332#define __HAVE_PHYS_MEM_ACCESS_PROT 348#define __HAVE_PHYS_MEM_ACCESS_PROT
333struct file; 349struct file;
334pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, 350pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index f9d5889b336b..72b020deb46b 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -101,15 +101,6 @@ extern unsigned long pg0[];
101#endif 101#endif
102 102
103/* 103/*
104 * Macro to mark a page protection value as "uncacheable".
105 * On processors which do not support it, this is a no-op.
106 */
107#define pgprot_noncached(prot) \
108 ((boot_cpu_data.x86 > 3) \
109 ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) \
110 : (prot))
111
112/*
113 * Conversion functions: convert a page and protection to a page entry, 104 * Conversion functions: convert a page and protection to a page entry,
114 * and a page entry and page directory to the page they refer to. 105 * and a page entry and page directory to the page they refer to.
115 */ 106 */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 545a0e042bb2..ba09289accaa 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -146,7 +146,7 @@ static inline void native_pgd_clear(pgd_t *pgd)
146#define PGDIR_MASK (~(PGDIR_SIZE - 1)) 146#define PGDIR_MASK (~(PGDIR_SIZE - 1))
147 147
148 148
149#define MAXMEM _AC(0x00003fffffffffff, UL) 149#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
150#define VMALLOC_START _AC(0xffffc20000000000, UL) 150#define VMALLOC_START _AC(0xffffc20000000000, UL)
151#define VMALLOC_END _AC(0xffffe1ffffffffff, UL) 151#define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
152#define VMEMMAP_START _AC(0xffffe20000000000, UL) 152#define VMEMMAP_START _AC(0xffffe20000000000, UL)
@@ -177,12 +177,6 @@ static inline int pmd_bad(pmd_t pmd)
177#define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ 177#define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */
178 178
179/* 179/*
180 * Macro to mark a page protection value as "uncacheable".
181 */
182#define pgprot_noncached(prot) \
183 (__pgprot(pgprot_val((prot)) | _PAGE_PCD | _PAGE_PWT))
184
185/*
186 * Conversion functions: convert a page and protection to a page entry, 180 * Conversion functions: convert a page and protection to a page entry,
187 * and a page entry and page directory to the page they refer to. 181 * and a page entry and page directory to the page they refer to.
188 */ 182 */
@@ -250,10 +244,22 @@ static inline int pud_large(pud_t pte)
250extern int direct_gbpages; 244extern int direct_gbpages;
251 245
252/* Encode and de-code a swap entry */ 246/* Encode and de-code a swap entry */
253#define __swp_type(x) (((x).val >> 1) & 0x3f) 247#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
254#define __swp_offset(x) ((x).val >> 8) 248#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
255#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | \ 249#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
256 ((offset) << 8) }) 250#else
251#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1)
252#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
253#endif
254
255#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
256
257#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \
258 & ((1U << SWP_TYPE_BITS) - 1))
259#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT)
260#define __swp_entry(type, offset) ((swp_entry_t) { \
261 ((type) << (_PAGE_BIT_PRESENT + 1)) \
262 | ((offset) << SWP_OFFSET_SHIFT) })
257#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) 263#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
258#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) 264#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
259 265
diff --git a/arch/x86/include/asm/prctl.h b/arch/x86/include/asm/prctl.h
index fe681147a4f7..a8894647dd9a 100644
--- a/arch/x86/include/asm/prctl.h
+++ b/arch/x86/include/asm/prctl.h
@@ -6,5 +6,8 @@
6#define ARCH_GET_FS 0x1003 6#define ARCH_GET_FS 0x1003
7#define ARCH_GET_GS 0x1004 7#define ARCH_GET_GS 0x1004
8 8
9#ifdef CONFIG_X86_64
10extern long sys_arch_prctl(int, unsigned long);
11#endif /* CONFIG_X86_64 */
9 12
10#endif /* _ASM_X86_PRCTL_H */ 13#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 5ca01e383269..a570eafa4755 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -110,6 +110,7 @@ struct cpuinfo_x86 {
110 /* Index into per_cpu list: */ 110 /* Index into per_cpu list: */
111 u16 cpu_index; 111 u16 cpu_index;
112#endif 112#endif
113 unsigned int x86_hyper_vendor;
113} __attribute__((__aligned__(SMP_CACHE_BYTES))); 114} __attribute__((__aligned__(SMP_CACHE_BYTES)));
114 115
115#define X86_VENDOR_INTEL 0 116#define X86_VENDOR_INTEL 0
@@ -123,6 +124,9 @@ struct cpuinfo_x86 {
123 124
124#define X86_VENDOR_UNKNOWN 0xff 125#define X86_VENDOR_UNKNOWN 0xff
125 126
127#define X86_HYPER_VENDOR_NONE 0
128#define X86_HYPER_VENDOR_VMWARE 1
129
126/* 130/*
127 * capabilities of CPUs 131 * capabilities of CPUs
128 */ 132 */
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index df7710354f85..562d4fd31ba8 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_X86_REBOOT_H 1#ifndef _ASM_X86_REBOOT_H
2#define _ASM_X86_REBOOT_H 2#define _ASM_X86_REBOOT_H
3 3
4#include <linux/kdebug.h>
5
4struct pt_regs; 6struct pt_regs;
5 7
6struct machine_ops { 8struct machine_ops {
@@ -18,4 +20,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs);
18void native_machine_shutdown(void); 20void native_machine_shutdown(void);
19void machine_real_restart(const unsigned char *code, int length); 21void machine_real_restart(const unsigned char *code, int length);
20 22
23typedef void (*nmi_shootdown_cb)(int, struct die_args*);
24void nmi_shootdown_cpus(nmi_shootdown_cb callback);
25
21#endif /* _ASM_X86_REBOOT_H */ 26#endif /* _ASM_X86_REBOOT_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index f12d37237465..4fcd53fd5f43 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -8,6 +8,10 @@
8/* Interrupt control for vSMPowered x86_64 systems */ 8/* Interrupt control for vSMPowered x86_64 systems */
9void vsmp_init(void); 9void vsmp_init(void);
10 10
11
12void setup_bios_corruption_check(void);
13
14
11#ifdef CONFIG_X86_VISWS 15#ifdef CONFIG_X86_VISWS
12extern void visws_early_detect(void); 16extern void visws_early_detect(void);
13extern int is_visws_box(void); 17extern int is_visws_box(void);
@@ -16,6 +20,8 @@ static inline void visws_early_detect(void) { }
16static inline int is_visws_box(void) { return 0; } 20static inline int is_visws_box(void) { return 0; }
17#endif 21#endif
18 22
23extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
24extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
19/* 25/*
20 * Any setup quirks to be performed? 26 * Any setup quirks to be performed?
21 */ 27 */
@@ -39,6 +45,7 @@ struct x86_quirks {
39 void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, 45 void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
40 unsigned short oemsize); 46 unsigned short oemsize);
41 int (*setup_ioapic_ids)(void); 47 int (*setup_ioapic_ids)(void);
48 int (*update_genapic)(void);
42}; 49};
43 50
44extern struct x86_quirks *x86_quirks; 51extern struct x86_quirks *x86_quirks;
diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h
new file mode 100644
index 000000000000..4e0fe26d27d3
--- /dev/null
+++ b/arch/x86/include/asm/sigframe.h
@@ -0,0 +1,70 @@
1#ifndef _ASM_X86_SIGFRAME_H
2#define _ASM_X86_SIGFRAME_H
3
4#include <asm/sigcontext.h>
5#include <asm/siginfo.h>
6#include <asm/ucontext.h>
7
8#ifdef CONFIG_X86_32
9#define sigframe_ia32 sigframe
10#define rt_sigframe_ia32 rt_sigframe
11#define sigcontext_ia32 sigcontext
12#define _fpstate_ia32 _fpstate
13#define ucontext_ia32 ucontext
14#else /* !CONFIG_X86_32 */
15
16#ifdef CONFIG_IA32_EMULATION
17#include <asm/ia32.h>
18#endif /* CONFIG_IA32_EMULATION */
19
20#endif /* CONFIG_X86_32 */
21
22#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
23struct sigframe_ia32 {
24 u32 pretcode;
25 int sig;
26 struct sigcontext_ia32 sc;
27 /*
28 * fpstate is unused. fpstate is moved/allocated after
29 * retcode[] below. This movement allows to have the FP state and the
30 * future state extensions (xsave) stay together.
31 * And at the same time retaining the unused fpstate, prevents changing
32 * the offset of extramask[] in the sigframe and thus prevent any
33 * legacy application accessing/modifying it.
34 */
35 struct _fpstate_ia32 fpstate_unused;
36#ifdef CONFIG_IA32_EMULATION
37 unsigned int extramask[_COMPAT_NSIG_WORDS-1];
38#else /* !CONFIG_IA32_EMULATION */
39 unsigned long extramask[_NSIG_WORDS-1];
40#endif /* CONFIG_IA32_EMULATION */
41 char retcode[8];
42 /* fp state follows here */
43};
44
45struct rt_sigframe_ia32 {
46 u32 pretcode;
47 int sig;
48 u32 pinfo;
49 u32 puc;
50#ifdef CONFIG_IA32_EMULATION
51 compat_siginfo_t info;
52#else /* !CONFIG_IA32_EMULATION */
53 struct siginfo info;
54#endif /* CONFIG_IA32_EMULATION */
55 struct ucontext_ia32 uc;
56 char retcode[8];
57 /* fp state follows here */
58};
59#endif /* defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) */
60
61#ifdef CONFIG_X86_64
62struct rt_sigframe {
63 char __user *pretcode;
64 struct ucontext uc;
65 struct siginfo info;
66 /* fp state follows here */
67};
68#endif /* CONFIG_X86_64 */
69
70#endif /* _ASM_X86_SIGFRAME_H */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 96ac44f275da..7761a5d554bb 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -121,6 +121,10 @@ typedef unsigned long sigset_t;
121 121
122#ifndef __ASSEMBLY__ 122#ifndef __ASSEMBLY__
123 123
124# ifdef __KERNEL__
125extern void do_notify_resume(struct pt_regs *, void *, __u32);
126# endif /* __KERNEL__ */
127
124#ifdef __i386__ 128#ifdef __i386__
125# ifdef __KERNEL__ 129# ifdef __KERNEL__
126struct old_sigaction { 130struct old_sigaction {
@@ -141,8 +145,6 @@ struct k_sigaction {
141 struct sigaction sa; 145 struct sigaction sa;
142}; 146};
143 147
144extern void do_notify_resume(struct pt_regs *, void *, __u32);
145
146# else /* __KERNEL__ */ 148# else /* __KERNEL__ */
147/* Here we must cater to libcs that poke about in kernel headers. */ 149/* Here we must cater to libcs that poke about in kernel headers. */
148 150
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index be44f7dab395..e3cc3c063ec5 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -27,7 +27,7 @@
27#else /* CONFIG_X86_32 */ 27#else /* CONFIG_X86_32 */
28# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ 28# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
29# define MAX_PHYSADDR_BITS 44 29# define MAX_PHYSADDR_BITS 44
30# define MAX_PHYSMEM_BITS 44 30# define MAX_PHYSMEM_BITS 44 /* Can be max 45 bits */
31#endif 31#endif
32 32
33#endif /* CONFIG_SPARSEMEM */ 33#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 87803da44010..9c6797c3e56c 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -19,6 +19,13 @@
19/* kernel/ioport.c */ 19/* kernel/ioport.c */
20asmlinkage long sys_ioperm(unsigned long, unsigned long, int); 20asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
21 21
22/* kernel/ldt.c */
23asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
24
25/* kernel/tls.c */
26asmlinkage int sys_set_thread_area(struct user_desc __user *);
27asmlinkage int sys_get_thread_area(struct user_desc __user *);
28
22/* X86_32 only */ 29/* X86_32 only */
23#ifdef CONFIG_X86_32 30#ifdef CONFIG_X86_32
24/* kernel/process_32.c */ 31/* kernel/process_32.c */
@@ -33,14 +40,11 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
33 struct old_sigaction __user *); 40 struct old_sigaction __user *);
34asmlinkage int sys_sigaltstack(unsigned long); 41asmlinkage int sys_sigaltstack(unsigned long);
35asmlinkage unsigned long sys_sigreturn(unsigned long); 42asmlinkage unsigned long sys_sigreturn(unsigned long);
36asmlinkage int sys_rt_sigreturn(unsigned long); 43asmlinkage int sys_rt_sigreturn(struct pt_regs);
37 44
38/* kernel/ioport.c */ 45/* kernel/ioport.c */
39asmlinkage long sys_iopl(unsigned long); 46asmlinkage long sys_iopl(unsigned long);
40 47
41/* kernel/ldt.c */
42asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
43
44/* kernel/sys_i386_32.c */ 48/* kernel/sys_i386_32.c */
45asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, 49asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
46 unsigned long, unsigned long, unsigned long); 50 unsigned long, unsigned long, unsigned long);
@@ -54,10 +58,6 @@ asmlinkage int sys_uname(struct old_utsname __user *);
54struct oldold_utsname; 58struct oldold_utsname;
55asmlinkage int sys_olduname(struct oldold_utsname __user *); 59asmlinkage int sys_olduname(struct oldold_utsname __user *);
56 60
57/* kernel/tls.c */
58asmlinkage int sys_set_thread_area(struct user_desc __user *);
59asmlinkage int sys_get_thread_area(struct user_desc __user *);
60
61/* kernel/vm86_32.c */ 61/* kernel/vm86_32.c */
62asmlinkage int sys_vm86old(struct pt_regs); 62asmlinkage int sys_vm86old(struct pt_regs);
63asmlinkage int sys_vm86(struct pt_regs); 63asmlinkage int sys_vm86(struct pt_regs);
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 2ed3f0f44ff7..8e626ea33a1a 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -17,12 +17,12 @@
17# define AT_VECTOR_SIZE_ARCH 1 17# define AT_VECTOR_SIZE_ARCH 1
18#endif 18#endif
19 19
20#ifdef CONFIG_X86_32
21
22struct task_struct; /* one of the stranger aspects of C forward declarations */ 20struct task_struct; /* one of the stranger aspects of C forward declarations */
23struct task_struct *__switch_to(struct task_struct *prev, 21struct task_struct *__switch_to(struct task_struct *prev,
24 struct task_struct *next); 22 struct task_struct *next);
25 23
24#ifdef CONFIG_X86_32
25
26/* 26/*
27 * Saving eflags is important. It switches not only IOPL between tasks, 27 * Saving eflags is important. It switches not only IOPL between tasks,
28 * it also protects other tasks from NT leaking through sysenter etc. 28 * it also protects other tasks from NT leaking through sysenter etc.
@@ -314,6 +314,8 @@ extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
314 314
315void default_idle(void); 315void default_idle(void);
316 316
317void stop_this_cpu(void *dummy);
318
317/* 319/*
318 * Force strict CPU ordering. 320 * Force strict CPU ordering.
319 * And yes, this is required on UP too when we're talking 321 * And yes, this is required on UP too when we're talking
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e44d379faad2..8dbc57390d25 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -24,7 +24,7 @@ struct exec_domain;
24struct thread_info { 24struct thread_info {
25 struct task_struct *task; /* main task structure */ 25 struct task_struct *task; /* main task structure */
26 struct exec_domain *exec_domain; /* execution domain */ 26 struct exec_domain *exec_domain; /* execution domain */
27 unsigned long flags; /* low level flags */ 27 __u32 flags; /* low level flags */
28 __u32 status; /* thread synchronous flags */ 28 __u32 status; /* thread synchronous flags */
29 __u32 cpu; /* current CPU */ 29 __u32 cpu; /* current CPU */
30 int preempt_count; /* 0 => preemptable, 30 int preempt_count; /* 0 => preemptable,
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
index fa0d79facdbc..780ba0ab94f9 100644
--- a/arch/x86/include/asm/trampoline.h
+++ b/arch/x86/include/asm/trampoline.h
@@ -3,6 +3,7 @@
3 3
4#ifndef __ASSEMBLY__ 4#ifndef __ASSEMBLY__
5 5
6#ifdef CONFIG_X86_TRAMPOLINE
6/* 7/*
7 * Trampoline 80x86 program as an array. 8 * Trampoline 80x86 program as an array.
8 */ 9 */
@@ -13,8 +14,14 @@ extern unsigned char *trampoline_base;
13extern unsigned long init_rsp; 14extern unsigned long init_rsp;
14extern unsigned long initial_code; 15extern unsigned long initial_code;
15 16
17#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
16#define TRAMPOLINE_BASE 0x6000 18#define TRAMPOLINE_BASE 0x6000
19
17extern unsigned long setup_trampoline(void); 20extern unsigned long setup_trampoline(void);
21extern void __init reserve_trampoline_memory(void);
22#else
23static inline void reserve_trampoline_memory(void) {};
24#endif /* CONFIG_X86_TRAMPOLINE */
18 25
19#endif /* __ASSEMBLY__ */ 26#endif /* __ASSEMBLY__ */
20 27
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 45dee286e45c..2ee0a3bceedf 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -46,6 +46,10 @@ dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long);
46dotraplinkage void do_invalid_TSS(struct pt_regs *, long); 46dotraplinkage void do_invalid_TSS(struct pt_regs *, long);
47dotraplinkage void do_segment_not_present(struct pt_regs *, long); 47dotraplinkage void do_segment_not_present(struct pt_regs *, long);
48dotraplinkage void do_stack_segment(struct pt_regs *, long); 48dotraplinkage void do_stack_segment(struct pt_regs *, long);
49#ifdef CONFIG_X86_64
50dotraplinkage void do_double_fault(struct pt_regs *, long);
51asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *);
52#endif
49dotraplinkage void do_general_protection(struct pt_regs *, long); 53dotraplinkage void do_general_protection(struct pt_regs *, long);
50dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); 54dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
51dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long); 55dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long);
@@ -72,10 +76,13 @@ static inline int get_si_code(unsigned long condition)
72extern int panic_on_unrecovered_nmi; 76extern int panic_on_unrecovered_nmi;
73extern int kstack_depth_to_print; 77extern int kstack_depth_to_print;
74 78
75#ifdef CONFIG_X86_32
76void math_error(void __user *); 79void math_error(void __user *);
77unsigned long patch_espfix_desc(unsigned long, unsigned long);
78asmlinkage void math_emulate(long); 80asmlinkage void math_emulate(long);
81#ifdef CONFIG_X86_32
82unsigned long patch_espfix_desc(unsigned long, unsigned long);
83#else
84asmlinkage void smp_thermal_interrupt(void);
85asmlinkage void mce_threshold_interrupt(void);
79#endif 86#endif
80 87
81#endif /* _ASM_X86_TRAPS_H */ 88#endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 9cd83a8e40d5..38ae163cc91b 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -34,8 +34,6 @@ static inline cycles_t get_cycles(void)
34 34
35static __always_inline cycles_t vget_cycles(void) 35static __always_inline cycles_t vget_cycles(void)
36{ 36{
37 cycles_t cycles;
38
39 /* 37 /*
40 * We only do VDSOs on TSC capable CPUs, so this shouldnt 38 * We only do VDSOs on TSC capable CPUs, so this shouldnt
41 * access boot_cpu_data (which is not VDSO-safe): 39 * access boot_cpu_data (which is not VDSO-safe):
@@ -44,11 +42,7 @@ static __always_inline cycles_t vget_cycles(void)
44 if (!cpu_has_tsc) 42 if (!cpu_has_tsc)
45 return 0; 43 return 0;
46#endif 44#endif
47 rdtsc_barrier(); 45 return (cycles_t)__native_read_tsc();
48 cycles = (cycles_t)__native_read_tsc();
49 rdtsc_barrier();
50
51 return cycles;
52} 46}
53 47
54extern void tsc_init(void); 48extern void tsc_init(void);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 35c54921b2e4..580c3ee6c58c 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -350,14 +350,14 @@ do { \
350 350
351#define __put_user_nocheck(x, ptr, size) \ 351#define __put_user_nocheck(x, ptr, size) \
352({ \ 352({ \
353 long __pu_err; \ 353 int __pu_err; \
354 __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ 354 __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \
355 __pu_err; \ 355 __pu_err; \
356}) 356})
357 357
358#define __get_user_nocheck(x, ptr, size) \ 358#define __get_user_nocheck(x, ptr, size) \
359({ \ 359({ \
360 long __gu_err; \ 360 int __gu_err; \
361 unsigned long __gu_val; \ 361 unsigned long __gu_val; \
362 __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ 362 __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
363 (x) = (__force __typeof__(*(ptr)))__gu_val; \ 363 (x) = (__force __typeof__(*(ptr)))__gu_val; \
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index d931d3b7e6f7..7ed17ff502b9 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -32,13 +32,18 @@
32enum uv_bios_cmd { 32enum uv_bios_cmd {
33 UV_BIOS_COMMON, 33 UV_BIOS_COMMON,
34 UV_BIOS_GET_SN_INFO, 34 UV_BIOS_GET_SN_INFO,
35 UV_BIOS_FREQ_BASE 35 UV_BIOS_FREQ_BASE,
36 UV_BIOS_WATCHLIST_ALLOC,
37 UV_BIOS_WATCHLIST_FREE,
38 UV_BIOS_MEMPROTECT,
39 UV_BIOS_GET_PARTITION_ADDR
36}; 40};
37 41
38/* 42/*
39 * Status values returned from a BIOS call. 43 * Status values returned from a BIOS call.
40 */ 44 */
41enum { 45enum {
46 BIOS_STATUS_MORE_PASSES = 1,
42 BIOS_STATUS_SUCCESS = 0, 47 BIOS_STATUS_SUCCESS = 0,
43 BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, 48 BIOS_STATUS_UNIMPLEMENTED = -ENOSYS,
44 BIOS_STATUS_EINVAL = -EINVAL, 49 BIOS_STATUS_EINVAL = -EINVAL,
@@ -71,6 +76,21 @@ union partition_info_u {
71 }; 76 };
72}; 77};
73 78
79union uv_watchlist_u {
80 u64 val;
81 struct {
82 u64 blade : 16,
83 size : 32,
84 filler : 16;
85 };
86};
87
88enum uv_memprotect {
89 UV_MEMPROT_RESTRICT_ACCESS,
90 UV_MEMPROT_ALLOW_AMO,
91 UV_MEMPROT_ALLOW_RW
92};
93
74/* 94/*
75 * bios calls have 6 parameters 95 * bios calls have 6 parameters
76 */ 96 */
@@ -80,14 +100,20 @@ extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
80 100
81extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); 101extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *);
82extern s64 uv_bios_freq_base(u64, u64 *); 102extern s64 uv_bios_freq_base(u64, u64 *);
103extern int uv_bios_mq_watchlist_alloc(int, unsigned long, unsigned int,
104 unsigned long *);
105extern int uv_bios_mq_watchlist_free(int, int);
106extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);
107extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
83 108
84extern void uv_bios_init(void); 109extern void uv_bios_init(void);
85 110
111extern unsigned long sn_rtc_cycles_per_second;
86extern int uv_type; 112extern int uv_type;
87extern long sn_partition_id; 113extern long sn_partition_id;
88extern long uv_coherency_id; 114extern long sn_coherency_id;
89extern long uv_region_size; 115extern long sn_region_size;
90#define partition_coherence_id() (uv_coherency_id) 116#define partition_coherence_id() (sn_coherency_id)
91 117
92extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ 118extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */
93 119
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 7a5782610b2b..777327ef05c1 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -113,25 +113,37 @@
113 */ 113 */
114#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) 114#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2)
115 115
116struct uv_scir_s {
117 struct timer_list timer;
118 unsigned long offset;
119 unsigned long last;
120 unsigned long idle_on;
121 unsigned long idle_off;
122 unsigned char state;
123 unsigned char enabled;
124};
125
116/* 126/*
117 * The following defines attributes of the HUB chip. These attributes are 127 * The following defines attributes of the HUB chip. These attributes are
118 * frequently referenced and are kept in the per-cpu data areas of each cpu. 128 * frequently referenced and are kept in the per-cpu data areas of each cpu.
119 * They are kept together in a struct to minimize cache misses. 129 * They are kept together in a struct to minimize cache misses.
120 */ 130 */
121struct uv_hub_info_s { 131struct uv_hub_info_s {
122 unsigned long global_mmr_base; 132 unsigned long global_mmr_base;
123 unsigned long gpa_mask; 133 unsigned long gpa_mask;
124 unsigned long gnode_upper; 134 unsigned long gnode_upper;
125 unsigned long lowmem_remap_top; 135 unsigned long lowmem_remap_top;
126 unsigned long lowmem_remap_base; 136 unsigned long lowmem_remap_base;
127 unsigned short pnode; 137 unsigned short pnode;
128 unsigned short pnode_mask; 138 unsigned short pnode_mask;
129 unsigned short coherency_domain_number; 139 unsigned short coherency_domain_number;
130 unsigned short numa_blade_id; 140 unsigned short numa_blade_id;
131 unsigned char blade_processor_id; 141 unsigned char blade_processor_id;
132 unsigned char m_val; 142 unsigned char m_val;
133 unsigned char n_val; 143 unsigned char n_val;
144 struct uv_scir_s scir;
134}; 145};
146
135DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); 147DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
136#define uv_hub_info (&__get_cpu_var(__uv_hub_info)) 148#define uv_hub_info (&__get_cpu_var(__uv_hub_info))
137#define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) 149#define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu))
@@ -163,6 +175,30 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
163 175
164#define UV_APIC_PNODE_SHIFT 6 176#define UV_APIC_PNODE_SHIFT 6
165 177
178/* Local Bus from cpu's perspective */
179#define LOCAL_BUS_BASE 0x1c00000
180#define LOCAL_BUS_SIZE (4 * 1024 * 1024)
181
182/*
183 * System Controller Interface Reg
184 *
185 * Note there are NO leds on a UV system. This register is only
186 * used by the system controller to monitor system-wide operation.
187 * There are 64 regs per node. With Nahelem cpus (2 cores per node,
188 * 8 cpus per core, 2 threads per cpu) there are 32 cpu threads on
189 * a node.
190 *
191 * The window is located at top of ACPI MMR space
192 */
193#define SCIR_WINDOW_COUNT 64
194#define SCIR_LOCAL_MMR_BASE (LOCAL_BUS_BASE + \
195 LOCAL_BUS_SIZE - \
196 SCIR_WINDOW_COUNT)
197
198#define SCIR_CPU_HEARTBEAT 0x01 /* timer interrupt */
199#define SCIR_CPU_ACTIVITY 0x02 /* not idle */
200#define SCIR_CPU_HB_INTERVAL (HZ) /* once per second */
201
166/* 202/*
167 * Macros for converting between kernel virtual addresses, socket local physical 203 * Macros for converting between kernel virtual addresses, socket local physical
168 * addresses, and UV global physical addresses. 204 * addresses, and UV global physical addresses.
@@ -174,7 +210,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
174static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) 210static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
175{ 211{
176 if (paddr < uv_hub_info->lowmem_remap_top) 212 if (paddr < uv_hub_info->lowmem_remap_top)
177 paddr += uv_hub_info->lowmem_remap_base; 213 paddr |= uv_hub_info->lowmem_remap_base;
178 return paddr | uv_hub_info->gnode_upper; 214 return paddr | uv_hub_info->gnode_upper;
179} 215}
180 216
@@ -182,19 +218,7 @@ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
182/* socket virtual --> UV global physical address */ 218/* socket virtual --> UV global physical address */
183static inline unsigned long uv_gpa(void *v) 219static inline unsigned long uv_gpa(void *v)
184{ 220{
185 return __pa(v) | uv_hub_info->gnode_upper; 221 return uv_soc_phys_ram_to_gpa(__pa(v));
186}
187
188/* socket virtual --> UV global physical address */
189static inline void *uv_vgpa(void *v)
190{
191 return (void *)uv_gpa(v);
192}
193
194/* UV global physical address --> socket virtual */
195static inline void *uv_va(unsigned long gpa)
196{
197 return __va(gpa & uv_hub_info->gpa_mask);
198} 222}
199 223
200/* pnode, offset --> socket virtual */ 224/* pnode, offset --> socket virtual */
@@ -277,6 +301,16 @@ static inline void uv_write_local_mmr(unsigned long offset, unsigned long val)
277 *uv_local_mmr_address(offset) = val; 301 *uv_local_mmr_address(offset) = val;
278} 302}
279 303
304static inline unsigned char uv_read_local_mmr8(unsigned long offset)
305{
306 return *((unsigned char *)uv_local_mmr_address(offset));
307}
308
309static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val)
310{
311 *((unsigned char *)uv_local_mmr_address(offset)) = val;
312}
313
280/* 314/*
281 * Structures and definitions for converting between cpu, node, pnode, and blade 315 * Structures and definitions for converting between cpu, node, pnode, and blade
282 * numbers. 316 * numbers.
@@ -351,5 +385,20 @@ static inline int uv_num_possible_blades(void)
351 return uv_possible_blades; 385 return uv_possible_blades;
352} 386}
353 387
354#endif /* _ASM_X86_UV_UV_HUB_H */ 388/* Update SCIR state */
389static inline void uv_set_scir_bits(unsigned char value)
390{
391 if (uv_hub_info->scir.state != value) {
392 uv_hub_info->scir.state = value;
393 uv_write_local_mmr8(uv_hub_info->scir.offset, value);
394 }
395}
396static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
397{
398 if (uv_cpu_hub_info(cpu)->scir.state != value) {
399 uv_cpu_hub_info(cpu)->scir.state = value;
400 uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value);
401 }
402}
355 403
404#endif /* _ASM_X86_UV_UV_HUB_H */
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h
new file mode 100644
index 000000000000..c11b7e100d83
--- /dev/null
+++ b/arch/x86/include/asm/vmware.h
@@ -0,0 +1,27 @@
1/*
2 * Copyright (C) 2008, VMware, Inc.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
12 * NON INFRINGEMENT. See the GNU General Public License for more
13 * details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 */
20#ifndef ASM_X86__VMWARE_H
21#define ASM_X86__VMWARE_H
22
23extern unsigned long vmware_get_tsc_khz(void);
24extern int vmware_platform(void);
25extern void vmware_set_feature_bits(struct cpuinfo_x86 *c);
26
27#endif
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 3f6000d95fe2..5e79ca694326 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -33,8 +33,14 @@
33#ifndef _ASM_X86_XEN_HYPERCALL_H 33#ifndef _ASM_X86_XEN_HYPERCALL_H
34#define _ASM_X86_XEN_HYPERCALL_H 34#define _ASM_X86_XEN_HYPERCALL_H
35 35
36#include <linux/kernel.h>
37#include <linux/spinlock.h>
36#include <linux/errno.h> 38#include <linux/errno.h>
37#include <linux/string.h> 39#include <linux/string.h>
40#include <linux/types.h>
41
42#include <asm/page.h>
43#include <asm/pgtable.h>
38 44
39#include <xen/interface/xen.h> 45#include <xen/interface/xen.h>
40#include <xen/interface/sched.h> 46#include <xen/interface/sched.h>
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index a38d25ac87d2..81fbd735aec4 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -33,39 +33,10 @@
33#ifndef _ASM_X86_XEN_HYPERVISOR_H 33#ifndef _ASM_X86_XEN_HYPERVISOR_H
34#define _ASM_X86_XEN_HYPERVISOR_H 34#define _ASM_X86_XEN_HYPERVISOR_H
35 35
36#include <linux/types.h>
37#include <linux/kernel.h>
38
39#include <xen/interface/xen.h>
40#include <xen/interface/version.h>
41
42#include <asm/ptrace.h>
43#include <asm/page.h>
44#include <asm/desc.h>
45#if defined(__i386__)
46# ifdef CONFIG_X86_PAE
47# include <asm-generic/pgtable-nopud.h>
48# else
49# include <asm-generic/pgtable-nopmd.h>
50# endif
51#endif
52#include <asm/xen/hypercall.h>
53
54/* arch/i386/kernel/setup.c */ 36/* arch/i386/kernel/setup.c */
55extern struct shared_info *HYPERVISOR_shared_info; 37extern struct shared_info *HYPERVISOR_shared_info;
56extern struct start_info *xen_start_info; 38extern struct start_info *xen_start_info;
57 39
58/* arch/i386/mach-xen/evtchn.c */
59/* Force a proper event-channel callback from Xen. */
60extern void force_evtchn_callback(void);
61
62/* Turn jiffies into Xen system time. */
63u64 jiffies_to_st(unsigned long jiffies);
64
65
66#define MULTI_UVMFLAGS_INDEX 3
67#define MULTI_UVMDOMID_INDEX 4
68
69enum xen_domain_type { 40enum xen_domain_type {
70 XEN_NATIVE, 41 XEN_NATIVE,
71 XEN_PV_DOMAIN, 42 XEN_PV_DOMAIN,
@@ -74,9 +45,15 @@ enum xen_domain_type {
74 45
75extern enum xen_domain_type xen_domain_type; 46extern enum xen_domain_type xen_domain_type;
76 47
48#ifdef CONFIG_XEN
77#define xen_domain() (xen_domain_type != XEN_NATIVE) 49#define xen_domain() (xen_domain_type != XEN_NATIVE)
78#define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN) 50#else
51#define xen_domain() (0)
52#endif
53
54#define xen_pv_domain() (xen_domain() && xen_domain_type == XEN_PV_DOMAIN)
55#define xen_hvm_domain() (xen_domain() && xen_domain_type == XEN_HVM_DOMAIN)
56
79#define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN) 57#define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN)
80#define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN)
81 58
82#endif /* _ASM_X86_XEN_HYPERVISOR_H */ 59#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index bc628998a1b9..7ef617ef1df3 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -1,11 +1,16 @@
1#ifndef _ASM_X86_XEN_PAGE_H 1#ifndef _ASM_X86_XEN_PAGE_H
2#define _ASM_X86_XEN_PAGE_H 2#define _ASM_X86_XEN_PAGE_H
3 3
4#include <linux/kernel.h>
5#include <linux/types.h>
6#include <linux/spinlock.h>
4#include <linux/pfn.h> 7#include <linux/pfn.h>
5 8
6#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10#include <asm/page.h>
7#include <asm/pgtable.h> 11#include <asm/pgtable.h>
8 12
13#include <xen/interface/xen.h>
9#include <xen/features.h> 14#include <xen/features.h>
10 15
11/* Xen machine address */ 16/* Xen machine address */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b62a7667828e..1f208aaee780 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -12,6 +12,7 @@ CFLAGS_REMOVE_tsc.o = -pg
12CFLAGS_REMOVE_rtc.o = -pg 12CFLAGS_REMOVE_rtc.o = -pg
13CFLAGS_REMOVE_paravirt-spinlocks.o = -pg 13CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
14CFLAGS_REMOVE_ftrace.o = -pg 14CFLAGS_REMOVE_ftrace.o = -pg
15CFLAGS_REMOVE_early_printk.o = -pg
15endif 16endif
16 17
17# 18#
@@ -23,9 +24,9 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
23CFLAGS_hpet.o := $(nostackp) 24CFLAGS_hpet.o := $(nostackp)
24CFLAGS_tsc.o := $(nostackp) 25CFLAGS_tsc.o := $(nostackp)
25 26
26obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o 27obj-y := process_$(BITS).o signal.o entry_$(BITS).o
27obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 28obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
28obj-y += time_$(BITS).o ioport.o ldt.o 29obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
29obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o 30obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
30obj-$(CONFIG_X86_VISWS) += visws_quirks.o 31obj-$(CONFIG_X86_VISWS) += visws_quirks.o
31obj-$(CONFIG_X86_32) += probe_roms_32.o 32obj-$(CONFIG_X86_32) += probe_roms_32.o
@@ -105,6 +106,8 @@ microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
105microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o 106microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
106obj-$(CONFIG_MICROCODE) += microcode.o 107obj-$(CONFIG_MICROCODE) += microcode.o
107 108
109obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
110
108### 111###
109# 64 bit specific files 112# 64 bit specific files
110ifeq ($(CONFIG_X86_64),y) 113ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 4c51a2f8fd31..65d0b72777ea 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1360,6 +1360,17 @@ static void __init acpi_process_madt(void)
1360 disable_acpi(); 1360 disable_acpi();
1361 } 1361 }
1362 } 1362 }
1363
1364 /*
1365 * ACPI supports both logical (e.g. Hyper-Threading) and physical
1366 * processors, where MPS only supports physical.
1367 */
1368 if (acpi_lapic && acpi_ioapic)
1369 printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
1370 "information\n");
1371 else if (acpi_lapic)
1372 printk(KERN_INFO "Using ACPI for processor (LAPIC) "
1373 "configuration information\n");
1363#endif 1374#endif
1364 return; 1375 return;
1365} 1376}
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0a60d60ed036..2e2da717b350 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -24,6 +24,7 @@
24#include <linux/iommu-helper.h> 24#include <linux/iommu-helper.h>
25#include <asm/proto.h> 25#include <asm/proto.h>
26#include <asm/iommu.h> 26#include <asm/iommu.h>
27#include <asm/gart.h>
27#include <asm/amd_iommu_types.h> 28#include <asm/amd_iommu_types.h>
28#include <asm/amd_iommu.h> 29#include <asm/amd_iommu.h>
29 30
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c6cc22815d35..c625800c55ca 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -28,6 +28,7 @@
28#include <asm/amd_iommu_types.h> 28#include <asm/amd_iommu_types.h>
29#include <asm/amd_iommu.h> 29#include <asm/amd_iommu.h>
30#include <asm/iommu.h> 30#include <asm/iommu.h>
31#include <asm/gart.h>
31 32
32/* 33/*
33 * definitions for the ACPI scanning code 34 * definitions for the ACPI scanning code
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 9a32b37ee2ee..676debfc1702 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -1,8 +1,9 @@
1/* 1/*
2 * Firmware replacement code. 2 * Firmware replacement code.
3 * 3 *
4 * Work around broken BIOSes that don't set an aperture or only set the 4 * Work around broken BIOSes that don't set an aperture, only set the
5 * aperture in the AGP bridge. 5 * aperture in the AGP bridge, or set too small aperture.
6 *
6 * If all fails map the aperture over some low memory. This is cheaper than 7 * If all fails map the aperture over some low memory. This is cheaper than
7 * doing bounce buffering. The memory is lost. This is done at early boot 8 * doing bounce buffering. The memory is lost. This is done at early boot
8 * because only the bootmem allocator can allocate 32+MB. 9 * because only the bootmem allocator can allocate 32+MB.
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 16f94879b525..7397911f8478 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -441,6 +441,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
441 v = apic_read(APIC_LVTT); 441 v = apic_read(APIC_LVTT);
442 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); 442 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
443 apic_write(APIC_LVTT, v); 443 apic_write(APIC_LVTT, v);
444 apic_write(APIC_TMICT, 0xffffffff);
444 break; 445 break;
445 case CLOCK_EVT_MODE_RESUME: 446 case CLOCK_EVT_MODE_RESUME:
446 /* Nothing to do here */ 447 /* Nothing to do here */
@@ -559,13 +560,13 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta)
559 } else { 560 } else {
560 res = (((u64)deltapm) * mult) >> 22; 561 res = (((u64)deltapm) * mult) >> 22;
561 do_div(res, 1000000); 562 do_div(res, 1000000);
562 printk(KERN_WARNING "APIC calibration not consistent " 563 pr_warning("APIC calibration not consistent "
563 "with PM Timer: %ldms instead of 100ms\n", 564 "with PM Timer: %ldms instead of 100ms\n",
564 (long)res); 565 (long)res);
565 /* Correct the lapic counter value */ 566 /* Correct the lapic counter value */
566 res = (((u64)(*delta)) * pm_100ms); 567 res = (((u64)(*delta)) * pm_100ms);
567 do_div(res, deltapm); 568 do_div(res, deltapm);
568 printk(KERN_INFO "APIC delta adjusted to PM-Timer: " 569 pr_info("APIC delta adjusted to PM-Timer: "
569 "%lu (%ld)\n", (unsigned long)res, *delta); 570 "%lu (%ld)\n", (unsigned long)res, *delta);
570 *delta = (long)res; 571 *delta = (long)res;
571 } 572 }
@@ -645,8 +646,7 @@ static int __init calibrate_APIC_clock(void)
645 */ 646 */
646 if (calibration_result < (1000000 / HZ)) { 647 if (calibration_result < (1000000 / HZ)) {
647 local_irq_enable(); 648 local_irq_enable();
648 printk(KERN_WARNING 649 pr_warning("APIC frequency too slow, disabling apic timer\n");
649 "APIC frequency too slow, disabling apic timer\n");
650 return -1; 650 return -1;
651 } 651 }
652 652
@@ -672,13 +672,9 @@ static int __init calibrate_APIC_clock(void)
672 while (lapic_cal_loops <= LAPIC_CAL_LOOPS) 672 while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
673 cpu_relax(); 673 cpu_relax();
674 674
675 local_irq_disable();
676
677 /* Stop the lapic timer */ 675 /* Stop the lapic timer */
678 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); 676 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
679 677
680 local_irq_enable();
681
682 /* Jiffies delta */ 678 /* Jiffies delta */
683 deltaj = lapic_cal_j2 - lapic_cal_j1; 679 deltaj = lapic_cal_j2 - lapic_cal_j1;
684 apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); 680 apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
@@ -692,8 +688,7 @@ static int __init calibrate_APIC_clock(void)
692 local_irq_enable(); 688 local_irq_enable();
693 689
694 if (levt->features & CLOCK_EVT_FEAT_DUMMY) { 690 if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
695 printk(KERN_WARNING 691 pr_warning("APIC timer disabled due to verification failure.\n");
696 "APIC timer disabled due to verification failure.\n");
697 return -1; 692 return -1;
698 } 693 }
699 694
@@ -714,7 +709,7 @@ void __init setup_boot_APIC_clock(void)
714 * broadcast mechanism is used. On UP systems simply ignore it. 709 * broadcast mechanism is used. On UP systems simply ignore it.
715 */ 710 */
716 if (disable_apic_timer) { 711 if (disable_apic_timer) {
717 printk(KERN_INFO "Disabling APIC timer\n"); 712 pr_info("Disabling APIC timer\n");
718 /* No broadcast on UP ! */ 713 /* No broadcast on UP ! */
719 if (num_possible_cpus() > 1) { 714 if (num_possible_cpus() > 1) {
720 lapic_clockevent.mult = 1; 715 lapic_clockevent.mult = 1;
@@ -741,7 +736,7 @@ void __init setup_boot_APIC_clock(void)
741 if (nmi_watchdog != NMI_IO_APIC) 736 if (nmi_watchdog != NMI_IO_APIC)
742 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; 737 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
743 else 738 else
744 printk(KERN_WARNING "APIC timer registered as dummy," 739 pr_warning("APIC timer registered as dummy,"
745 " due to nmi_watchdog=%d!\n", nmi_watchdog); 740 " due to nmi_watchdog=%d!\n", nmi_watchdog);
746 741
747 /* Setup the lapic or request the broadcast */ 742 /* Setup the lapic or request the broadcast */
@@ -773,8 +768,7 @@ static void local_apic_timer_interrupt(void)
773 * spurious. 768 * spurious.
774 */ 769 */
775 if (!evt->event_handler) { 770 if (!evt->event_handler) {
776 printk(KERN_WARNING 771 pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
777 "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
778 /* Switch it off */ 772 /* Switch it off */
779 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); 773 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
780 return; 774 return;
@@ -783,11 +777,7 @@ static void local_apic_timer_interrupt(void)
783 /* 777 /*
784 * the NMI deadlock-detector uses this. 778 * the NMI deadlock-detector uses this.
785 */ 779 */
786#ifdef CONFIG_X86_64 780 inc_irq_stat(apic_timer_irqs);
787 add_pda(apic_timer_irqs, 1);
788#else
789 per_cpu(irq_stat, cpu).apic_timer_irqs++;
790#endif
791 781
792 evt->event_handler(evt); 782 evt->event_handler(evt);
793} 783}
@@ -814,9 +804,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
814 * Besides, if we don't timer interrupts ignore the global 804 * Besides, if we don't timer interrupts ignore the global
815 * interrupt lock, which is the WrongThing (tm) to do. 805 * interrupt lock, which is the WrongThing (tm) to do.
816 */ 806 */
817#ifdef CONFIG_X86_64
818 exit_idle(); 807 exit_idle();
819#endif
820 irq_enter(); 808 irq_enter();
821 local_apic_timer_interrupt(); 809 local_apic_timer_interrupt();
822 irq_exit(); 810 irq_exit();
@@ -1093,7 +1081,7 @@ static void __cpuinit lapic_setup_esr(void)
1093 unsigned int oldvalue, value, maxlvt; 1081 unsigned int oldvalue, value, maxlvt;
1094 1082
1095 if (!lapic_is_integrated()) { 1083 if (!lapic_is_integrated()) {
1096 printk(KERN_INFO "No ESR for 82489DX.\n"); 1084 pr_info("No ESR for 82489DX.\n");
1097 return; 1085 return;
1098 } 1086 }
1099 1087
@@ -1104,7 +1092,7 @@ static void __cpuinit lapic_setup_esr(void)
1104 * ESR disabled - we can't do anything useful with the 1092 * ESR disabled - we can't do anything useful with the
1105 * errors anyway - mbligh 1093 * errors anyway - mbligh
1106 */ 1094 */
1107 printk(KERN_INFO "Leaving ESR disabled.\n"); 1095 pr_info("Leaving ESR disabled.\n");
1108 return; 1096 return;
1109 } 1097 }
1110 1098
@@ -1298,7 +1286,7 @@ void check_x2apic(void)
1298 rdmsr(MSR_IA32_APICBASE, msr, msr2); 1286 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1299 1287
1300 if (msr & X2APIC_ENABLE) { 1288 if (msr & X2APIC_ENABLE) {
1301 printk("x2apic enabled by BIOS, switching to x2apic ops\n"); 1289 pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
1302 x2apic_preenabled = x2apic = 1; 1290 x2apic_preenabled = x2apic = 1;
1303 apic_ops = &x2apic_ops; 1291 apic_ops = &x2apic_ops;
1304 } 1292 }
@@ -1310,7 +1298,7 @@ void enable_x2apic(void)
1310 1298
1311 rdmsr(MSR_IA32_APICBASE, msr, msr2); 1299 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1312 if (!(msr & X2APIC_ENABLE)) { 1300 if (!(msr & X2APIC_ENABLE)) {
1313 printk("Enabling x2apic\n"); 1301 pr_info("Enabling x2apic\n");
1314 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); 1302 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
1315 } 1303 }
1316} 1304}
@@ -1325,9 +1313,8 @@ void __init enable_IR_x2apic(void)
1325 return; 1313 return;
1326 1314
1327 if (!x2apic_preenabled && disable_x2apic) { 1315 if (!x2apic_preenabled && disable_x2apic) {
1328 printk(KERN_INFO 1316 pr_info("Skipped enabling x2apic and Interrupt-remapping "
1329 "Skipped enabling x2apic and Interrupt-remapping " 1317 "because of nox2apic\n");
1330 "because of nox2apic\n");
1331 return; 1318 return;
1332 } 1319 }
1333 1320
@@ -1335,22 +1322,19 @@ void __init enable_IR_x2apic(void)
1335 panic("Bios already enabled x2apic, can't enforce nox2apic"); 1322 panic("Bios already enabled x2apic, can't enforce nox2apic");
1336 1323
1337 if (!x2apic_preenabled && skip_ioapic_setup) { 1324 if (!x2apic_preenabled && skip_ioapic_setup) {
1338 printk(KERN_INFO 1325 pr_info("Skipped enabling x2apic and Interrupt-remapping "
1339 "Skipped enabling x2apic and Interrupt-remapping " 1326 "because of skipping io-apic setup\n");
1340 "because of skipping io-apic setup\n");
1341 return; 1327 return;
1342 } 1328 }
1343 1329
1344 ret = dmar_table_init(); 1330 ret = dmar_table_init();
1345 if (ret) { 1331 if (ret) {
1346 printk(KERN_INFO 1332 pr_info("dmar_table_init() failed with %d:\n", ret);
1347 "dmar_table_init() failed with %d:\n", ret);
1348 1333
1349 if (x2apic_preenabled) 1334 if (x2apic_preenabled)
1350 panic("x2apic enabled by bios. But IR enabling failed"); 1335 panic("x2apic enabled by bios. But IR enabling failed");
1351 else 1336 else
1352 printk(KERN_INFO 1337 pr_info("Not enabling x2apic,Intr-remapping\n");
1353 "Not enabling x2apic,Intr-remapping\n");
1354 return; 1338 return;
1355 } 1339 }
1356 1340
@@ -1359,7 +1343,7 @@ void __init enable_IR_x2apic(void)
1359 1343
1360 ret = save_mask_IO_APIC_setup(); 1344 ret = save_mask_IO_APIC_setup();
1361 if (ret) { 1345 if (ret) {
1362 printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret); 1346 pr_info("Saving IO-APIC state failed: %d\n", ret);
1363 goto end; 1347 goto end;
1364 } 1348 }
1365 1349
@@ -1394,14 +1378,11 @@ end:
1394 1378
1395 if (!ret) { 1379 if (!ret) {
1396 if (!x2apic_preenabled) 1380 if (!x2apic_preenabled)
1397 printk(KERN_INFO 1381 pr_info("Enabled x2apic and interrupt-remapping\n");
1398 "Enabled x2apic and interrupt-remapping\n");
1399 else 1382 else
1400 printk(KERN_INFO 1383 pr_info("Enabled Interrupt-remapping\n");
1401 "Enabled Interrupt-remapping\n");
1402 } else 1384 } else
1403 printk(KERN_ERR 1385 pr_err("Failed to enable Interrupt-remapping and x2apic\n");
1404 "Failed to enable Interrupt-remapping and x2apic\n");
1405#else 1386#else
1406 if (!cpu_has_x2apic) 1387 if (!cpu_has_x2apic)
1407 return; 1388 return;
@@ -1410,8 +1391,8 @@ end:
1410 panic("x2apic enabled prior OS handover," 1391 panic("x2apic enabled prior OS handover,"
1411 " enable CONFIG_INTR_REMAP"); 1392 " enable CONFIG_INTR_REMAP");
1412 1393
1413 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " 1394 pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1414 " and x2apic\n"); 1395 " and x2apic\n");
1415#endif 1396#endif
1416 1397
1417 return; 1398 return;
@@ -1428,7 +1409,7 @@ end:
1428static int __init detect_init_APIC(void) 1409static int __init detect_init_APIC(void)
1429{ 1410{
1430 if (!cpu_has_apic) { 1411 if (!cpu_has_apic) {
1431 printk(KERN_INFO "No local APIC present\n"); 1412 pr_info("No local APIC present\n");
1432 return -1; 1413 return -1;
1433 } 1414 }
1434 1415
@@ -1469,8 +1450,8 @@ static int __init detect_init_APIC(void)
1469 * "lapic" specified. 1450 * "lapic" specified.
1470 */ 1451 */
1471 if (!force_enable_local_apic) { 1452 if (!force_enable_local_apic) {
1472 printk(KERN_INFO "Local APIC disabled by BIOS -- " 1453 pr_info("Local APIC disabled by BIOS -- "
1473 "you can enable it with \"lapic\"\n"); 1454 "you can enable it with \"lapic\"\n");
1474 return -1; 1455 return -1;
1475 } 1456 }
1476 /* 1457 /*
@@ -1480,8 +1461,7 @@ static int __init detect_init_APIC(void)
1480 */ 1461 */
1481 rdmsr(MSR_IA32_APICBASE, l, h); 1462 rdmsr(MSR_IA32_APICBASE, l, h);
1482 if (!(l & MSR_IA32_APICBASE_ENABLE)) { 1463 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1483 printk(KERN_INFO 1464 pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1484 "Local APIC disabled by BIOS -- reenabling.\n");
1485 l &= ~MSR_IA32_APICBASE_BASE; 1465 l &= ~MSR_IA32_APICBASE_BASE;
1486 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; 1466 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
1487 wrmsr(MSR_IA32_APICBASE, l, h); 1467 wrmsr(MSR_IA32_APICBASE, l, h);
@@ -1494,7 +1474,7 @@ static int __init detect_init_APIC(void)
1494 */ 1474 */
1495 features = cpuid_edx(1); 1475 features = cpuid_edx(1);
1496 if (!(features & (1 << X86_FEATURE_APIC))) { 1476 if (!(features & (1 << X86_FEATURE_APIC))) {
1497 printk(KERN_WARNING "Could not enable APIC!\n"); 1477 pr_warning("Could not enable APIC!\n");
1498 return -1; 1478 return -1;
1499 } 1479 }
1500 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 1480 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
@@ -1505,14 +1485,14 @@ static int __init detect_init_APIC(void)
1505 if (l & MSR_IA32_APICBASE_ENABLE) 1485 if (l & MSR_IA32_APICBASE_ENABLE)
1506 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; 1486 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1507 1487
1508 printk(KERN_INFO "Found and enabled local APIC!\n"); 1488 pr_info("Found and enabled local APIC!\n");
1509 1489
1510 apic_pm_activate(); 1490 apic_pm_activate();
1511 1491
1512 return 0; 1492 return 0;
1513 1493
1514no_apic: 1494no_apic:
1515 printk(KERN_INFO "No local APIC present or hardware disabled\n"); 1495 pr_info("No local APIC present or hardware disabled\n");
1516 return -1; 1496 return -1;
1517} 1497}
1518#endif 1498#endif
@@ -1588,12 +1568,12 @@ int __init APIC_init_uniprocessor(void)
1588{ 1568{
1589#ifdef CONFIG_X86_64 1569#ifdef CONFIG_X86_64
1590 if (disable_apic) { 1570 if (disable_apic) {
1591 printk(KERN_INFO "Apic disabled\n"); 1571 pr_info("Apic disabled\n");
1592 return -1; 1572 return -1;
1593 } 1573 }
1594 if (!cpu_has_apic) { 1574 if (!cpu_has_apic) {
1595 disable_apic = 1; 1575 disable_apic = 1;
1596 printk(KERN_INFO "Apic disabled by BIOS\n"); 1576 pr_info("Apic disabled by BIOS\n");
1597 return -1; 1577 return -1;
1598 } 1578 }
1599#else 1579#else
@@ -1605,8 +1585,8 @@ int __init APIC_init_uniprocessor(void)
1605 */ 1585 */
1606 if (!cpu_has_apic && 1586 if (!cpu_has_apic &&
1607 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { 1587 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
1608 printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n", 1588 pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
1609 boot_cpu_physical_apicid); 1589 boot_cpu_physical_apicid);
1610 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 1590 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1611 return -1; 1591 return -1;
1612 } 1592 }
@@ -1682,9 +1662,7 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1682{ 1662{
1683 u32 v; 1663 u32 v;
1684 1664
1685#ifdef CONFIG_X86_64
1686 exit_idle(); 1665 exit_idle();
1687#endif
1688 irq_enter(); 1666 irq_enter();
1689 /* 1667 /*
1690 * Check if this really is a spurious interrupt and ACK it 1668 * Check if this really is a spurious interrupt and ACK it
@@ -1695,14 +1673,11 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1695 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) 1673 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
1696 ack_APIC_irq(); 1674 ack_APIC_irq();
1697 1675
1698#ifdef CONFIG_X86_64 1676 inc_irq_stat(irq_spurious_count);
1699 add_pda(irq_spurious_count, 1); 1677
1700#else
1701 /* see sw-dev-man vol 3, chapter 7.4.13.5 */ 1678 /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1702 printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " 1679 pr_info("spurious APIC interrupt on CPU#%d, "
1703 "should never happen.\n", smp_processor_id()); 1680 "should never happen.\n", smp_processor_id());
1704 __get_cpu_var(irq_stat).irq_spurious_count++;
1705#endif
1706 irq_exit(); 1681 irq_exit();
1707} 1682}
1708 1683
@@ -1713,9 +1688,7 @@ void smp_error_interrupt(struct pt_regs *regs)
1713{ 1688{
1714 u32 v, v1; 1689 u32 v, v1;
1715 1690
1716#ifdef CONFIG_X86_64
1717 exit_idle(); 1691 exit_idle();
1718#endif
1719 irq_enter(); 1692 irq_enter();
1720 /* First tickle the hardware, only then report what went on. -- REW */ 1693 /* First tickle the hardware, only then report what went on. -- REW */
1721 v = apic_read(APIC_ESR); 1694 v = apic_read(APIC_ESR);
@@ -1724,17 +1697,18 @@ void smp_error_interrupt(struct pt_regs *regs)
1724 ack_APIC_irq(); 1697 ack_APIC_irq();
1725 atomic_inc(&irq_err_count); 1698 atomic_inc(&irq_err_count);
1726 1699
1727 /* Here is what the APIC error bits mean: 1700 /*
1728 0: Send CS error 1701 * Here is what the APIC error bits mean:
1729 1: Receive CS error 1702 * 0: Send CS error
1730 2: Send accept error 1703 * 1: Receive CS error
1731 3: Receive accept error 1704 * 2: Send accept error
1732 4: Reserved 1705 * 3: Receive accept error
1733 5: Send illegal vector 1706 * 4: Reserved
1734 6: Received illegal vector 1707 * 5: Send illegal vector
1735 7: Illegal register address 1708 * 6: Received illegal vector
1736 */ 1709 * 7: Illegal register address
1737 printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", 1710 */
1711 pr_debug("APIC error on CPU%d: %02x(%02x)\n",
1738 smp_processor_id(), v , v1); 1712 smp_processor_id(), v , v1);
1739 irq_exit(); 1713 irq_exit();
1740} 1714}
@@ -1838,15 +1812,15 @@ void __cpuinit generic_processor_info(int apicid, int version)
1838 * Validate version 1812 * Validate version
1839 */ 1813 */
1840 if (version == 0x0) { 1814 if (version == 0x0) {
1841 printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " 1815 pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
1842 "fixing up to 0x10. (tell your hw vendor)\n", 1816 "fixing up to 0x10. (tell your hw vendor)\n",
1843 version); 1817 version);
1844 version = 0x10; 1818 version = 0x10;
1845 } 1819 }
1846 apic_version[apicid] = version; 1820 apic_version[apicid] = version;
1847 1821
1848 if (num_processors >= NR_CPUS) { 1822 if (num_processors >= NR_CPUS) {
1849 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." 1823 pr_warning("WARNING: NR_CPUS limit of %i reached."
1850 " Processor ignored.\n", NR_CPUS); 1824 " Processor ignored.\n", NR_CPUS);
1851 return; 1825 return;
1852 } 1826 }
@@ -2209,7 +2183,7 @@ static int __init apic_set_verbosity(char *arg)
2209 else if (strcmp("verbose", arg) == 0) 2183 else if (strcmp("verbose", arg) == 0)
2210 apic_verbosity = APIC_VERBOSE; 2184 apic_verbosity = APIC_VERBOSE;
2211 else { 2185 else {
2212 printk(KERN_WARNING "APIC Verbosity level %s not recognised" 2186 pr_warning("APIC Verbosity level %s not recognised"
2213 " use apic=verbose or apic=debug\n", arg); 2187 " use apic=verbose or apic=debug\n", arg);
2214 return -EINVAL; 2188 return -EINVAL;
2215 } 2189 }
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 5145a6e72bbb..3a26525a3f31 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -391,11 +391,7 @@ static int power_off;
391#else 391#else
392static int power_off = 1; 392static int power_off = 1;
393#endif 393#endif
394#ifdef CONFIG_APM_REAL_MODE_POWER_OFF
395static int realmode_power_off = 1;
396#else
397static int realmode_power_off; 394static int realmode_power_off;
398#endif
399#ifdef CONFIG_APM_ALLOW_INTS 395#ifdef CONFIG_APM_ALLOW_INTS
400static int allow_ints = 1; 396static int allow_ints = 1;
401#else 397#else
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 6649d09ad88f..ee4df08feee6 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -11,7 +11,7 @@
11#include <linux/suspend.h> 11#include <linux/suspend.h>
12#include <linux/kbuild.h> 12#include <linux/kbuild.h>
13#include <asm/ucontext.h> 13#include <asm/ucontext.h>
14#include "sigframe.h" 14#include <asm/sigframe.h>
15#include <asm/pgtable.h> 15#include <asm/pgtable.h>
16#include <asm/fixmap.h> 16#include <asm/fixmap.h>
17#include <asm/processor.h> 17#include <asm/processor.h>
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 7fcf63d22f8b..1d41d3f1edbc 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -20,6 +20,8 @@
20 20
21#include <xen/interface/xen.h> 21#include <xen/interface/xen.h>
22 22
23#include <asm/sigframe.h>
24
23#define __NO_STUBS 1 25#define __NO_STUBS 1
24#undef __SYSCALL 26#undef __SYSCALL
25#undef _ASM_X86_UNISTD_64_H 27#undef _ASM_X86_UNISTD_64_H
@@ -87,7 +89,7 @@ int main(void)
87 BLANK(); 89 BLANK();
88#undef ENTRY 90#undef ENTRY
89 DEFINE(IA32_RT_SIGFRAME_sigcontext, 91 DEFINE(IA32_RT_SIGFRAME_sigcontext,
90 offsetof (struct rt_sigframe32, uc.uc_mcontext)); 92 offsetof (struct rt_sigframe_ia32, uc.uc_mcontext));
91 BLANK(); 93 BLANK();
92#endif 94#endif
93 DEFINE(pbe_address, offsetof(struct pbe, address)); 95 DEFINE(pbe_address, offsetof(struct pbe, address));
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index f0dfe6f17e7e..2a0a2a3cac26 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -69,10 +69,10 @@ s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
69 69
70long sn_partition_id; 70long sn_partition_id;
71EXPORT_SYMBOL_GPL(sn_partition_id); 71EXPORT_SYMBOL_GPL(sn_partition_id);
72long uv_coherency_id; 72long sn_coherency_id;
73EXPORT_SYMBOL_GPL(uv_coherency_id); 73EXPORT_SYMBOL_GPL(sn_coherency_id);
74long uv_region_size; 74long sn_region_size;
75EXPORT_SYMBOL_GPL(uv_region_size); 75EXPORT_SYMBOL_GPL(sn_region_size);
76int uv_type; 76int uv_type;
77 77
78 78
@@ -100,6 +100,56 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
100 return ret; 100 return ret;
101} 101}
102 102
103int
104uv_bios_mq_watchlist_alloc(int blade, unsigned long addr, unsigned int mq_size,
105 unsigned long *intr_mmr_offset)
106{
107 union uv_watchlist_u size_blade;
108 u64 watchlist;
109 s64 ret;
110
111 size_blade.size = mq_size;
112 size_blade.blade = blade;
113
114 /*
115 * bios returns watchlist number or negative error number.
116 */
117 ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr,
118 size_blade.val, (u64)intr_mmr_offset,
119 (u64)&watchlist, 0);
120 if (ret < BIOS_STATUS_SUCCESS)
121 return ret;
122
123 return watchlist;
124}
125EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc);
126
127int
128uv_bios_mq_watchlist_free(int blade, int watchlist_num)
129{
130 return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE,
131 blade, watchlist_num, 0, 0, 0);
132}
133EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free);
134
135s64
136uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms)
137{
138 return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len,
139 perms, 0, 0);
140}
141EXPORT_SYMBOL_GPL(uv_bios_change_memprotect);
142
143s64
144uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len)
145{
146 s64 ret;
147
148 ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie,
149 (u64)addr, buf, (u64)len, 0);
150 return ret;
151}
152EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa);
103 153
104s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) 154s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
105{ 155{
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
new file mode 100644
index 000000000000..2ac0ab71412a
--- /dev/null
+++ b/arch/x86/kernel/check.c
@@ -0,0 +1,161 @@
1#include <linux/module.h>
2#include <linux/sched.h>
3#include <linux/kthread.h>
4#include <linux/workqueue.h>
5#include <asm/e820.h>
6#include <asm/proto.h>
7
8/*
9 * Some BIOSes seem to corrupt the low 64k of memory during events
10 * like suspend/resume and unplugging an HDMI cable. Reserve all
11 * remaining free memory in that area and fill it with a distinct
12 * pattern.
13 */
14#define MAX_SCAN_AREAS 8
15
16static int __read_mostly memory_corruption_check = -1;
17
18static unsigned __read_mostly corruption_check_size = 64*1024;
19static unsigned __read_mostly corruption_check_period = 60; /* seconds */
20
21static struct e820entry scan_areas[MAX_SCAN_AREAS];
22static int num_scan_areas;
23
24
25static __init int set_corruption_check(char *arg)
26{
27 char *end;
28
29 memory_corruption_check = simple_strtol(arg, &end, 10);
30
31 return (*end == 0) ? 0 : -EINVAL;
32}
33early_param("memory_corruption_check", set_corruption_check);
34
35static __init int set_corruption_check_period(char *arg)
36{
37 char *end;
38
39 corruption_check_period = simple_strtoul(arg, &end, 10);
40
41 return (*end == 0) ? 0 : -EINVAL;
42}
43early_param("memory_corruption_check_period", set_corruption_check_period);
44
45static __init int set_corruption_check_size(char *arg)
46{
47 char *end;
48 unsigned size;
49
50 size = memparse(arg, &end);
51
52 if (*end == '\0')
53 corruption_check_size = size;
54
55 return (size == corruption_check_size) ? 0 : -EINVAL;
56}
57early_param("memory_corruption_check_size", set_corruption_check_size);
58
59
60void __init setup_bios_corruption_check(void)
61{
62 u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */
63
64 if (memory_corruption_check == -1) {
65 memory_corruption_check =
66#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
67 1
68#else
69 0
70#endif
71 ;
72 }
73
74 if (corruption_check_size == 0)
75 memory_corruption_check = 0;
76
77 if (!memory_corruption_check)
78 return;
79
80 corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
81
82 while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
83 u64 size;
84 addr = find_e820_area_size(addr, &size, PAGE_SIZE);
85
86 if (addr == 0)
87 break;
88
89 if ((addr + size) > corruption_check_size)
90 size = corruption_check_size - addr;
91
92 if (size == 0)
93 break;
94
95 e820_update_range(addr, size, E820_RAM, E820_RESERVED);
96 scan_areas[num_scan_areas].addr = addr;
97 scan_areas[num_scan_areas].size = size;
98 num_scan_areas++;
99
100 /* Assume we've already mapped this early memory */
101 memset(__va(addr), 0, size);
102
103 addr += size;
104 }
105
106 printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
107 num_scan_areas);
108 update_e820();
109}
110
111
112void check_for_bios_corruption(void)
113{
114 int i;
115 int corruption = 0;
116
117 if (!memory_corruption_check)
118 return;
119
120 for (i = 0; i < num_scan_areas; i++) {
121 unsigned long *addr = __va(scan_areas[i].addr);
122 unsigned long size = scan_areas[i].size;
123
124 for (; size; addr++, size -= sizeof(unsigned long)) {
125 if (!*addr)
126 continue;
127 printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
128 addr, __pa(addr), *addr);
129 corruption = 1;
130 *addr = 0;
131 }
132 }
133
134 WARN_ONCE(corruption, KERN_ERR "Memory corruption detected in low memory\n");
135}
136
137static void check_corruption(struct work_struct *dummy);
138static DECLARE_DELAYED_WORK(bios_check_work, check_corruption);
139
140static void check_corruption(struct work_struct *dummy)
141{
142 check_for_bios_corruption();
143 schedule_delayed_work(&bios_check_work,
144 round_jiffies_relative(corruption_check_period*HZ));
145}
146
147static int start_periodic_check_for_corruption(void)
148{
149 if (!memory_corruption_check || corruption_check_period == 0)
150 return 0;
151
152 printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
153 corruption_check_period);
154
155 /* First time we run the checks right away */
156 schedule_delayed_work(&bios_check_work, 0);
157 return 0;
158}
159
160module_init(start_periodic_check_for_corruption);
161
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 82ec6075c057..a5c04e88777e 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -4,6 +4,7 @@
4 4
5obj-y := intel_cacheinfo.o addon_cpuid_features.o 5obj-y := intel_cacheinfo.o addon_cpuid_features.o
6obj-y += proc.o capflags.o powerflags.o common.o 6obj-y += proc.o capflags.o powerflags.o common.o
7obj-y += vmware.o hypervisor.o
7 8
8obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o 9obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
9obj-$(CONFIG_X86_64) += bugs_64.o 10obj-$(CONFIG_X86_64) += bugs_64.o
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index ef8f831af823..2cf23634b6d9 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -120,9 +120,17 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
120 c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) 120 c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
121 & core_select_mask; 121 & core_select_mask;
122 c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); 122 c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width);
123 /*
124 * Reinit the apicid, now that we have extended initial_apicid.
125 */
126 c->apicid = phys_pkg_id(c->initial_apicid, 0);
123#else 127#else
124 c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; 128 c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
125 c->phys_proc_id = phys_pkg_id(core_plus_mask_width); 129 c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
130 /*
131 * Reinit the apicid, now that we have extended initial_apicid.
132 */
133 c->apicid = phys_pkg_id(0);
126#endif 134#endif
127 c->x86_max_cores = (core_level_siblings / smp_num_siblings); 135 c->x86_max_cores = (core_level_siblings / smp_num_siblings);
128 136
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 8f1e31db2ad5..7c878f6aa919 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -283,9 +283,14 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
283{ 283{
284 early_init_amd_mc(c); 284 early_init_amd_mc(c);
285 285
286 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ 286 /*
287 if (c->x86_power & (1<<8)) 287 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
288 * with P/T states and does not stop in deep C-states
289 */
290 if (c->x86_power & (1 << 8)) {
288 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 291 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
292 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
293 }
289 294
290#ifdef CONFIG_X86_64 295#ifdef CONFIG_X86_64
291 set_cpu_cap(c, X86_FEATURE_SYSCALL32); 296 set_cpu_cap(c, X86_FEATURE_SYSCALL32);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b9c9ea0217a9..42e0853030cb 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -36,6 +36,7 @@
36#include <asm/proto.h> 36#include <asm/proto.h>
37#include <asm/sections.h> 37#include <asm/sections.h>
38#include <asm/setup.h> 38#include <asm/setup.h>
39#include <asm/hypervisor.h>
39 40
40#include "cpu.h" 41#include "cpu.h"
41 42
@@ -703,6 +704,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
703 detect_ht(c); 704 detect_ht(c);
704#endif 705#endif
705 706
707 init_hypervisor(c);
706 /* 708 /*
707 * On SMP, boot_cpu_data holds the common feature set between 709 * On SMP, boot_cpu_data holds the common feature set between
708 * all CPUs; so make sure that we indicate which features are 710 * all CPUs; so make sure that we indicate which features are
@@ -862,7 +864,7 @@ EXPORT_SYMBOL(_cpu_pda);
862 864
863struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 865struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
864 866
865char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; 867static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
866 868
867void __cpuinit pda_init(int cpu) 869void __cpuinit pda_init(int cpu)
868{ 870{
@@ -903,8 +905,8 @@ void __cpuinit pda_init(int cpu)
903 } 905 }
904} 906}
905 907
906char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + 908static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
907 DEBUG_STKSZ] __page_aligned_bss; 909 DEBUG_STKSZ] __page_aligned_bss;
908 910
909extern asmlinkage void ignore_sysret(void); 911extern asmlinkage void ignore_sysret(void);
910 912
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
new file mode 100644
index 000000000000..fb5b86af0b01
--- /dev/null
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -0,0 +1,58 @@
1/*
2 * Common hypervisor code
3 *
4 * Copyright (C) 2008, VMware, Inc.
5 * Author : Alok N Kataria <akataria@vmware.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
15 * NON INFRINGEMENT. See the GNU General Public License for more
16 * details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 */
23
24#include <asm/processor.h>
25#include <asm/vmware.h>
26#include <asm/hypervisor.h>
27
28static inline void __cpuinit
29detect_hypervisor_vendor(struct cpuinfo_x86 *c)
30{
31 if (vmware_platform()) {
32 c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE;
33 } else {
34 c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE;
35 }
36}
37
38unsigned long get_hypervisor_tsc_freq(void)
39{
40 if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE)
41 return vmware_get_tsc_khz();
42 return 0;
43}
44
45static inline void __cpuinit
46hypervisor_set_feature_bits(struct cpuinfo_x86 *c)
47{
48 if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) {
49 vmware_set_feature_bits(c);
50 return;
51 }
52}
53
54void __cpuinit init_hypervisor(struct cpuinfo_x86 *c)
55{
56 detect_hypervisor_vendor(c);
57 hypervisor_set_feature_bits(c);
58}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cce0b6118d55..ccfd2047630c 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -41,6 +41,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
41 if (c->x86 == 15 && c->x86_cache_alignment == 64) 41 if (c->x86 == 15 && c->x86_cache_alignment == 64)
42 c->x86_cache_alignment = 128; 42 c->x86_cache_alignment = 128;
43#endif 43#endif
44
45 /*
46 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
47 * with P/T states and does not stop in deep C-states
48 */
49 if (c->x86_power & (1 << 8)) {
50 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
51 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
52 }
53
44} 54}
45 55
46#ifdef CONFIG_X86_32 56#ifdef CONFIG_X86_32
@@ -242,6 +252,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
242 252
243 intel_workarounds(c); 253 intel_workarounds(c);
244 254
255 /*
256 * Detect the extended topology information if available. This
257 * will reinitialise the initial_apicid which will be used
258 * in init_intel_cacheinfo()
259 */
260 detect_extended_topology(c);
261
245 l2 = init_intel_cacheinfo(c); 262 l2 = init_intel_cacheinfo(c);
246 if (c->cpuid_level > 9) { 263 if (c->cpuid_level > 9) {
247 unsigned eax = cpuid_eax(10); 264 unsigned eax = cpuid_eax(10);
@@ -307,13 +324,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
307 set_cpu_cap(c, X86_FEATURE_P4); 324 set_cpu_cap(c, X86_FEATURE_P4);
308 if (c->x86 == 6) 325 if (c->x86 == 6)
309 set_cpu_cap(c, X86_FEATURE_P3); 326 set_cpu_cap(c, X86_FEATURE_P3);
327#endif
310 328
311 if (cpu_has_bts) 329 if (cpu_has_bts)
312 ptrace_bts_init_intel(c); 330 ptrace_bts_init_intel(c);
313 331
314#endif
315
316 detect_extended_topology(c);
317 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { 332 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
318 /* 333 /*
319 * let's use the legacy cpuid vector 0x1 and 0x4 for topology 334 * let's use the legacy cpuid vector 0x1 and 0x4 for topology
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 3f46afbb1cf1..68b5d8681cbb 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -644,20 +644,17 @@ static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
644 return show_shared_cpu_map_func(leaf, 1, buf); 644 return show_shared_cpu_map_func(leaf, 1, buf);
645} 645}
646 646
647static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { 647static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
648 switch(this_leaf->eax.split.type) { 648{
649 case CACHE_TYPE_DATA: 649 switch (this_leaf->eax.split.type) {
650 case CACHE_TYPE_DATA:
650 return sprintf(buf, "Data\n"); 651 return sprintf(buf, "Data\n");
651 break; 652 case CACHE_TYPE_INST:
652 case CACHE_TYPE_INST:
653 return sprintf(buf, "Instruction\n"); 653 return sprintf(buf, "Instruction\n");
654 break; 654 case CACHE_TYPE_UNIFIED:
655 case CACHE_TYPE_UNIFIED:
656 return sprintf(buf, "Unified\n"); 655 return sprintf(buf, "Unified\n");
657 break; 656 default:
658 default:
659 return sprintf(buf, "Unknown\n"); 657 return sprintf(buf, "Unknown\n");
660 break;
661 } 658 }
662} 659}
663 660
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 5eb390a4b2e9..748c8f9e7a05 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -237,7 +237,7 @@ asmlinkage void mce_threshold_interrupt(void)
237 } 237 }
238 } 238 }
239out: 239out:
240 add_pda(irq_threshold_count, 1); 240 inc_irq_stat(irq_threshold_count);
241 irq_exit(); 241 irq_exit();
242} 242}
243 243
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index c17eaf5dd6dd..4b48f251fd39 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -26,7 +26,7 @@ asmlinkage void smp_thermal_interrupt(void)
26 if (therm_throt_process(msr_val & 1)) 26 if (therm_throt_process(msr_val & 1))
27 mce_log_therm_throt_event(smp_processor_id(), msr_val); 27 mce_log_therm_throt_event(smp_processor_id(), msr_val);
28 28
29 add_pda(irq_thermal_count, 1); 29 inc_irq_stat(irq_thermal_count);
30 irq_exit(); 30 irq_exit();
31} 31}
32 32
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index c78c04821ea1..1159e269e596 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -803,6 +803,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
803} 803}
804 804
805static struct res_range __initdata range[RANGE_NUM]; 805static struct res_range __initdata range[RANGE_NUM];
806static int __initdata nr_range;
806 807
807#ifdef CONFIG_MTRR_SANITIZER 808#ifdef CONFIG_MTRR_SANITIZER
808 809
@@ -1206,39 +1207,43 @@ struct mtrr_cleanup_result {
1206#define PSHIFT (PAGE_SHIFT - 10) 1207#define PSHIFT (PAGE_SHIFT - 10)
1207 1208
1208static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; 1209static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
1209static struct res_range __initdata range_new[RANGE_NUM];
1210static unsigned long __initdata min_loss_pfn[RANGE_NUM]; 1210static unsigned long __initdata min_loss_pfn[RANGE_NUM];
1211 1211
1212static int __init mtrr_cleanup(unsigned address_bits) 1212static void __init print_out_mtrr_range_state(void)
1213{ 1213{
1214 unsigned long extra_remove_base, extra_remove_size;
1215 unsigned long base, size, def, dummy;
1216 mtrr_type type;
1217 int nr_range, nr_range_new;
1218 u64 chunk_size, gran_size;
1219 unsigned long range_sums, range_sums_new;
1220 int index_good;
1221 int num_reg_good;
1222 int i; 1214 int i;
1215 char start_factor = 'K', size_factor = 'K';
1216 unsigned long start_base, size_base;
1217 mtrr_type type;
1223 1218
1224 /* extra one for all 0 */ 1219 for (i = 0; i < num_var_ranges; i++) {
1225 int num[MTRR_NUM_TYPES + 1];
1226 1220
1227 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) 1221 size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
1228 return 0; 1222 if (!size_base)
1229 rdmsr(MTRRdefType_MSR, def, dummy); 1223 continue;
1230 def &= 0xff;
1231 if (def != MTRR_TYPE_UNCACHABLE)
1232 return 0;
1233 1224
1234 /* get it and store it aside */ 1225 size_base = to_size_factor(size_base, &size_factor),
1235 memset(range_state, 0, sizeof(range_state)); 1226 start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
1236 for (i = 0; i < num_var_ranges; i++) { 1227 start_base = to_size_factor(start_base, &start_factor),
1237 mtrr_if->get(i, &base, &size, &type); 1228 type = range_state[i].type;
1238 range_state[i].base_pfn = base; 1229
1239 range_state[i].size_pfn = size; 1230 printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
1240 range_state[i].type = type; 1231 i, start_base, start_factor,
1232 size_base, size_factor,
1233 (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
1234 ((type == MTRR_TYPE_WRPROT) ? "WP" :
1235 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
1236 );
1241 } 1237 }
1238}
1239
1240static int __init mtrr_need_cleanup(void)
1241{
1242 int i;
1243 mtrr_type type;
1244 unsigned long size;
1245 /* extra one for all 0 */
1246 int num[MTRR_NUM_TYPES + 1];
1242 1247
1243 /* check entries number */ 1248 /* check entries number */
1244 memset(num, 0, sizeof(num)); 1249 memset(num, 0, sizeof(num));
@@ -1263,29 +1268,133 @@ static int __init mtrr_cleanup(unsigned address_bits)
1263 num_var_ranges - num[MTRR_NUM_TYPES]) 1268 num_var_ranges - num[MTRR_NUM_TYPES])
1264 return 0; 1269 return 0;
1265 1270
1266 /* print original var MTRRs at first, for debugging: */ 1271 return 1;
1267 printk(KERN_DEBUG "original variable MTRRs\n"); 1272}
1268 for (i = 0; i < num_var_ranges; i++) {
1269 char start_factor = 'K', size_factor = 'K';
1270 unsigned long start_base, size_base;
1271 1273
1272 size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); 1274static unsigned long __initdata range_sums;
1273 if (!size_base) 1275static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
1274 continue; 1276 unsigned long extra_remove_base,
1277 unsigned long extra_remove_size,
1278 int i)
1279{
1280 int num_reg;
1281 static struct res_range range_new[RANGE_NUM];
1282 static int nr_range_new;
1283 unsigned long range_sums_new;
1284
1285 /* convert ranges to var ranges state */
1286 num_reg = x86_setup_var_mtrrs(range, nr_range,
1287 chunk_size, gran_size);
1288
1289 /* we got new setting in range_state, check it */
1290 memset(range_new, 0, sizeof(range_new));
1291 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
1292 extra_remove_base, extra_remove_size);
1293 range_sums_new = sum_ranges(range_new, nr_range_new);
1294
1295 result[i].chunk_sizek = chunk_size >> 10;
1296 result[i].gran_sizek = gran_size >> 10;
1297 result[i].num_reg = num_reg;
1298 if (range_sums < range_sums_new) {
1299 result[i].lose_cover_sizek =
1300 (range_sums_new - range_sums) << PSHIFT;
1301 result[i].bad = 1;
1302 } else
1303 result[i].lose_cover_sizek =
1304 (range_sums - range_sums_new) << PSHIFT;
1275 1305
1276 size_base = to_size_factor(size_base, &size_factor), 1306 /* double check it */
1277 start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); 1307 if (!result[i].bad && !result[i].lose_cover_sizek) {
1278 start_base = to_size_factor(start_base, &start_factor), 1308 if (nr_range_new != nr_range ||
1279 type = range_state[i].type; 1309 memcmp(range, range_new, sizeof(range)))
1310 result[i].bad = 1;
1311 }
1280 1312
1281 printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", 1313 if (!result[i].bad && (range_sums - range_sums_new <
1282 i, start_base, start_factor, 1314 min_loss_pfn[num_reg])) {
1283 size_base, size_factor, 1315 min_loss_pfn[num_reg] =
1284 (type == MTRR_TYPE_UNCACHABLE) ? "UC" : 1316 range_sums - range_sums_new;
1285 ((type == MTRR_TYPE_WRPROT) ? "WP" :
1286 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
1287 );
1288 } 1317 }
1318}
1319
1320static void __init mtrr_print_out_one_result(int i)
1321{
1322 char gran_factor, chunk_factor, lose_factor;
1323 unsigned long gran_base, chunk_base, lose_base;
1324
1325 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
1326 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
1327 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
1328 printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
1329 result[i].bad ? "*BAD*" : " ",
1330 gran_base, gran_factor, chunk_base, chunk_factor);
1331 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
1332 result[i].num_reg, result[i].bad ? "-" : "",
1333 lose_base, lose_factor);
1334}
1335
1336static int __init mtrr_search_optimal_index(void)
1337{
1338 int i;
1339 int num_reg_good;
1340 int index_good;
1341
1342 if (nr_mtrr_spare_reg >= num_var_ranges)
1343 nr_mtrr_spare_reg = num_var_ranges - 1;
1344 num_reg_good = -1;
1345 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
1346 if (!min_loss_pfn[i])
1347 num_reg_good = i;
1348 }
1349
1350 index_good = -1;
1351 if (num_reg_good != -1) {
1352 for (i = 0; i < NUM_RESULT; i++) {
1353 if (!result[i].bad &&
1354 result[i].num_reg == num_reg_good &&
1355 !result[i].lose_cover_sizek) {
1356 index_good = i;
1357 break;
1358 }
1359 }
1360 }
1361
1362 return index_good;
1363}
1364
1365
1366static int __init mtrr_cleanup(unsigned address_bits)
1367{
1368 unsigned long extra_remove_base, extra_remove_size;
1369 unsigned long base, size, def, dummy;
1370 mtrr_type type;
1371 u64 chunk_size, gran_size;
1372 int index_good;
1373 int i;
1374
1375 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
1376 return 0;
1377 rdmsr(MTRRdefType_MSR, def, dummy);
1378 def &= 0xff;
1379 if (def != MTRR_TYPE_UNCACHABLE)
1380 return 0;
1381
1382 /* get it and store it aside */
1383 memset(range_state, 0, sizeof(range_state));
1384 for (i = 0; i < num_var_ranges; i++) {
1385 mtrr_if->get(i, &base, &size, &type);
1386 range_state[i].base_pfn = base;
1387 range_state[i].size_pfn = size;
1388 range_state[i].type = type;
1389 }
1390
1391 /* check if we need handle it and can handle it */
1392 if (!mtrr_need_cleanup())
1393 return 0;
1394
1395 /* print original var MTRRs at first, for debugging: */
1396 printk(KERN_DEBUG "original variable MTRRs\n");
1397 print_out_mtrr_range_state();
1289 1398
1290 memset(range, 0, sizeof(range)); 1399 memset(range, 0, sizeof(range));
1291 extra_remove_size = 0; 1400 extra_remove_size = 0;
@@ -1309,176 +1418,64 @@ static int __init mtrr_cleanup(unsigned address_bits)
1309 range_sums >> (20 - PAGE_SHIFT)); 1418 range_sums >> (20 - PAGE_SHIFT));
1310 1419
1311 if (mtrr_chunk_size && mtrr_gran_size) { 1420 if (mtrr_chunk_size && mtrr_gran_size) {
1312 int num_reg; 1421 i = 0;
1313 char gran_factor, chunk_factor, lose_factor; 1422 mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
1314 unsigned long gran_base, chunk_base, lose_base; 1423 extra_remove_base, extra_remove_size, i);
1315
1316 debug_print++;
1317 /* convert ranges to var ranges state */
1318 num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
1319 mtrr_gran_size);
1320 1424
1321 /* we got new setting in range_state, check it */ 1425 mtrr_print_out_one_result(i);
1322 memset(range_new, 0, sizeof(range_new));
1323 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
1324 extra_remove_base,
1325 extra_remove_size);
1326 range_sums_new = sum_ranges(range_new, nr_range_new);
1327 1426
1328 i = 0;
1329 result[i].chunk_sizek = mtrr_chunk_size >> 10;
1330 result[i].gran_sizek = mtrr_gran_size >> 10;
1331 result[i].num_reg = num_reg;
1332 if (range_sums < range_sums_new) {
1333 result[i].lose_cover_sizek =
1334 (range_sums_new - range_sums) << PSHIFT;
1335 result[i].bad = 1;
1336 } else
1337 result[i].lose_cover_sizek =
1338 (range_sums - range_sums_new) << PSHIFT;
1339
1340 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
1341 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
1342 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
1343 printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
1344 result[i].bad?"*BAD*":" ",
1345 gran_base, gran_factor, chunk_base, chunk_factor);
1346 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
1347 result[i].num_reg, result[i].bad?"-":"",
1348 lose_base, lose_factor);
1349 if (!result[i].bad) { 1427 if (!result[i].bad) {
1350 set_var_mtrr_all(address_bits); 1428 set_var_mtrr_all(address_bits);
1351 return 1; 1429 return 1;
1352 } 1430 }
1353 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " 1431 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
1354 "will find optimal one\n"); 1432 "will find optimal one\n");
1355 debug_print--;
1356 memset(result, 0, sizeof(result[0]));
1357 } 1433 }
1358 1434
1359 i = 0; 1435 i = 0;
1360 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); 1436 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
1361 memset(result, 0, sizeof(result)); 1437 memset(result, 0, sizeof(result));
1362 for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { 1438 for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
1363 char gran_factor;
1364 unsigned long gran_base;
1365
1366 if (debug_print)
1367 gran_base = to_size_factor(gran_size >> 10, &gran_factor);
1368 1439
1369 for (chunk_size = gran_size; chunk_size < (1ULL<<32); 1440 for (chunk_size = gran_size; chunk_size < (1ULL<<32);
1370 chunk_size <<= 1) { 1441 chunk_size <<= 1) {
1371 int num_reg;
1372 1442
1373 if (debug_print) {
1374 char chunk_factor;
1375 unsigned long chunk_base;
1376
1377 chunk_base = to_size_factor(chunk_size>>10, &chunk_factor),
1378 printk(KERN_INFO "\n");
1379 printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n",
1380 gran_base, gran_factor, chunk_base, chunk_factor);
1381 }
1382 if (i >= NUM_RESULT) 1443 if (i >= NUM_RESULT)
1383 continue; 1444 continue;
1384 1445
1385 /* convert ranges to var ranges state */ 1446 mtrr_calc_range_state(chunk_size, gran_size,
1386 num_reg = x86_setup_var_mtrrs(range, nr_range, 1447 extra_remove_base, extra_remove_size, i);
1387 chunk_size, gran_size); 1448 if (debug_print) {
1388 1449 mtrr_print_out_one_result(i);
1389 /* we got new setting in range_state, check it */ 1450 printk(KERN_INFO "\n");
1390 memset(range_new, 0, sizeof(range_new));
1391 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
1392 extra_remove_base, extra_remove_size);
1393 range_sums_new = sum_ranges(range_new, nr_range_new);
1394
1395 result[i].chunk_sizek = chunk_size >> 10;
1396 result[i].gran_sizek = gran_size >> 10;
1397 result[i].num_reg = num_reg;
1398 if (range_sums < range_sums_new) {
1399 result[i].lose_cover_sizek =
1400 (range_sums_new - range_sums) << PSHIFT;
1401 result[i].bad = 1;
1402 } else
1403 result[i].lose_cover_sizek =
1404 (range_sums - range_sums_new) << PSHIFT;
1405
1406 /* double check it */
1407 if (!result[i].bad && !result[i].lose_cover_sizek) {
1408 if (nr_range_new != nr_range ||
1409 memcmp(range, range_new, sizeof(range)))
1410 result[i].bad = 1;
1411 } 1451 }
1412 1452
1413 if (!result[i].bad && (range_sums - range_sums_new <
1414 min_loss_pfn[num_reg])) {
1415 min_loss_pfn[num_reg] =
1416 range_sums - range_sums_new;
1417 }
1418 i++; 1453 i++;
1419 } 1454 }
1420 } 1455 }
1421 1456
1422 /* print out all */
1423 for (i = 0; i < NUM_RESULT; i++) {
1424 char gran_factor, chunk_factor, lose_factor;
1425 unsigned long gran_base, chunk_base, lose_base;
1426
1427 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
1428 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
1429 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
1430 printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
1431 result[i].bad?"*BAD*":" ",
1432 gran_base, gran_factor, chunk_base, chunk_factor);
1433 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
1434 result[i].num_reg, result[i].bad?"-":"",
1435 lose_base, lose_factor);
1436 }
1437
1438 /* try to find the optimal index */ 1457 /* try to find the optimal index */
1439 if (nr_mtrr_spare_reg >= num_var_ranges) 1458 index_good = mtrr_search_optimal_index();
1440 nr_mtrr_spare_reg = num_var_ranges - 1;
1441 num_reg_good = -1;
1442 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
1443 if (!min_loss_pfn[i])
1444 num_reg_good = i;
1445 }
1446
1447 index_good = -1;
1448 if (num_reg_good != -1) {
1449 for (i = 0; i < NUM_RESULT; i++) {
1450 if (!result[i].bad &&
1451 result[i].num_reg == num_reg_good &&
1452 !result[i].lose_cover_sizek) {
1453 index_good = i;
1454 break;
1455 }
1456 }
1457 }
1458 1459
1459 if (index_good != -1) { 1460 if (index_good != -1) {
1460 char gran_factor, chunk_factor, lose_factor;
1461 unsigned long gran_base, chunk_base, lose_base;
1462
1463 printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); 1461 printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
1464 i = index_good; 1462 i = index_good;
1465 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), 1463 mtrr_print_out_one_result(i);
1466 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), 1464
1467 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
1468 printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t",
1469 gran_base, gran_factor, chunk_base, chunk_factor);
1470 printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n",
1471 result[i].num_reg, lose_base, lose_factor);
1472 /* convert ranges to var ranges state */ 1465 /* convert ranges to var ranges state */
1473 chunk_size = result[i].chunk_sizek; 1466 chunk_size = result[i].chunk_sizek;
1474 chunk_size <<= 10; 1467 chunk_size <<= 10;
1475 gran_size = result[i].gran_sizek; 1468 gran_size = result[i].gran_sizek;
1476 gran_size <<= 10; 1469 gran_size <<= 10;
1477 debug_print++;
1478 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); 1470 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
1479 debug_print--;
1480 set_var_mtrr_all(address_bits); 1471 set_var_mtrr_all(address_bits);
1472 printk(KERN_DEBUG "New variable MTRRs\n");
1473 print_out_mtrr_range_state();
1481 return 1; 1474 return 1;
1475 } else {
1476 /* print out all */
1477 for (i = 0; i < NUM_RESULT; i++)
1478 mtrr_print_out_one_result(i);
1482 } 1479 }
1483 1480
1484 printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); 1481 printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
@@ -1562,7 +1559,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1562{ 1559{
1563 unsigned long i, base, size, highest_pfn = 0, def, dummy; 1560 unsigned long i, base, size, highest_pfn = 0, def, dummy;
1564 mtrr_type type; 1561 mtrr_type type;
1565 int nr_range;
1566 u64 total_trim_size; 1562 u64 total_trim_size;
1567 1563
1568 /* extra one for all 0 */ 1564 /* extra one for all 0 */
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
new file mode 100644
index 000000000000..284c399e3234
--- /dev/null
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -0,0 +1,112 @@
1/*
2 * VMware Detection code.
3 *
4 * Copyright (C) 2008, VMware, Inc.
5 * Author : Alok N Kataria <akataria@vmware.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
15 * NON INFRINGEMENT. See the GNU General Public License for more
16 * details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 */
23
24#include <linux/dmi.h>
25#include <asm/div64.h>
26#include <asm/vmware.h>
27
28#define CPUID_VMWARE_INFO_LEAF 0x40000000
29#define VMWARE_HYPERVISOR_MAGIC 0x564D5868
30#define VMWARE_HYPERVISOR_PORT 0x5658
31
32#define VMWARE_PORT_CMD_GETVERSION 10
33#define VMWARE_PORT_CMD_GETHZ 45
34
35#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
36 __asm__("inl (%%dx)" : \
37 "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
38 "0"(VMWARE_HYPERVISOR_MAGIC), \
39 "1"(VMWARE_PORT_CMD_##cmd), \
40 "2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) : \
41 "memory");
42
43static inline int __vmware_platform(void)
44{
45 uint32_t eax, ebx, ecx, edx;
46 VMWARE_PORT(GETVERSION, eax, ebx, ecx, edx);
47 return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
48}
49
50static unsigned long __vmware_get_tsc_khz(void)
51{
52 uint64_t tsc_hz;
53 uint32_t eax, ebx, ecx, edx;
54
55 VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
56
57 if (ebx == UINT_MAX)
58 return 0;
59 tsc_hz = eax | (((uint64_t)ebx) << 32);
60 do_div(tsc_hz, 1000);
61 BUG_ON(tsc_hz >> 32);
62 return tsc_hz;
63}
64
65/*
66 * While checking the dmi string infomation, just checking the product
67 * serial key should be enough, as this will always have a VMware
68 * specific string when running under VMware hypervisor.
69 */
70int vmware_platform(void)
71{
72 if (cpu_has_hypervisor) {
73 unsigned int eax, ebx, ecx, edx;
74 char hyper_vendor_id[13];
75
76 cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx);
77 memcpy(hyper_vendor_id + 0, &ebx, 4);
78 memcpy(hyper_vendor_id + 4, &ecx, 4);
79 memcpy(hyper_vendor_id + 8, &edx, 4);
80 hyper_vendor_id[12] = '\0';
81 if (!strcmp(hyper_vendor_id, "VMwareVMware"))
82 return 1;
83 } else if (dmi_available && dmi_name_in_serial("VMware") &&
84 __vmware_platform())
85 return 1;
86
87 return 0;
88}
89
90unsigned long vmware_get_tsc_khz(void)
91{
92 BUG_ON(!vmware_platform());
93 return __vmware_get_tsc_khz();
94}
95
96/*
97 * VMware hypervisor takes care of exporting a reliable TSC to the guest.
98 * Still, due to timing difference when running on virtual cpus, the TSC can
99 * be marked as unstable in some cases. For example, the TSC sync check at
100 * bootup can fail due to a marginal offset between vcpus' TSCs (though the
101 * TSCs do not drift from each other). Also, the ACPI PM timer clocksource
102 * is not suitable as a watchdog when running on a hypervisor because the
103 * kernel may miss a wrap of the counter if the vcpu is descheduled for a
104 * long time. To skip these checks at runtime we set these capability bits,
105 * so that the kernel could just trust the hypervisor with providing a
106 * reliable virtual TSC that is suitable for timekeeping.
107 */
108void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c)
109{
110 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
111 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
112}
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 268553817909..d84a852e4cd7 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -29,34 +29,17 @@
29 29
30#include <mach_ipi.h> 30#include <mach_ipi.h>
31 31
32/* This keeps a track of which one is crashing cpu. */
33static int crashing_cpu;
34 32
35#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) 33#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
36static atomic_t waiting_for_crash_ipi;
37 34
38static int crash_nmi_callback(struct notifier_block *self, 35static void kdump_nmi_callback(int cpu, struct die_args *args)
39 unsigned long val, void *data)
40{ 36{
41 struct pt_regs *regs; 37 struct pt_regs *regs;
42#ifdef CONFIG_X86_32 38#ifdef CONFIG_X86_32
43 struct pt_regs fixed_regs; 39 struct pt_regs fixed_regs;
44#endif 40#endif
45 int cpu;
46 41
47 if (val != DIE_NMI_IPI) 42 regs = args->regs;
48 return NOTIFY_OK;
49
50 regs = ((struct die_args *)data)->regs;
51 cpu = raw_smp_processor_id();
52
53 /* Don't do anything if this handler is invoked on crashing cpu.
54 * Otherwise, system will completely hang. Crashing cpu can get
55 * an NMI if system was initially booted with nmi_watchdog parameter.
56 */
57 if (cpu == crashing_cpu)
58 return NOTIFY_STOP;
59 local_irq_disable();
60 43
61#ifdef CONFIG_X86_32 44#ifdef CONFIG_X86_32
62 if (!user_mode_vm(regs)) { 45 if (!user_mode_vm(regs)) {
@@ -65,54 +48,19 @@ static int crash_nmi_callback(struct notifier_block *self,
65 } 48 }
66#endif 49#endif
67 crash_save_cpu(regs, cpu); 50 crash_save_cpu(regs, cpu);
68 disable_local_APIC();
69 atomic_dec(&waiting_for_crash_ipi);
70 /* Assume hlt works */
71 halt();
72 for (;;)
73 cpu_relax();
74
75 return 1;
76}
77 51
78static void smp_send_nmi_allbutself(void) 52 disable_local_APIC();
79{
80 cpumask_t mask = cpu_online_map;
81 cpu_clear(safe_smp_processor_id(), mask);
82 if (!cpus_empty(mask))
83 send_IPI_mask(mask, NMI_VECTOR);
84} 53}
85 54
86static struct notifier_block crash_nmi_nb = { 55static void kdump_nmi_shootdown_cpus(void)
87 .notifier_call = crash_nmi_callback,
88};
89
90static void nmi_shootdown_cpus(void)
91{ 56{
92 unsigned long msecs; 57 nmi_shootdown_cpus(kdump_nmi_callback);
93
94 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
95 /* Would it be better to replace the trap vector here? */
96 if (register_die_notifier(&crash_nmi_nb))
97 return; /* return what? */
98 /* Ensure the new callback function is set before sending
99 * out the NMI
100 */
101 wmb();
102 58
103 smp_send_nmi_allbutself();
104
105 msecs = 1000; /* Wait at most a second for the other cpus to stop */
106 while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
107 mdelay(1);
108 msecs--;
109 }
110
111 /* Leave the nmi callback set */
112 disable_local_APIC(); 59 disable_local_APIC();
113} 60}
61
114#else 62#else
115static void nmi_shootdown_cpus(void) 63static void kdump_nmi_shootdown_cpus(void)
116{ 64{
117 /* There are no cpus to shootdown */ 65 /* There are no cpus to shootdown */
118} 66}
@@ -131,9 +79,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
131 /* The kernel is broken so disable interrupts */ 79 /* The kernel is broken so disable interrupts */
132 local_irq_disable(); 80 local_irq_disable();
133 81
134 /* Make a note of crashing cpu. Will be used in NMI callback.*/ 82 kdump_nmi_shootdown_cpus();
135 crashing_cpu = safe_smp_processor_id();
136 nmi_shootdown_cpus();
137 lapic_shutdown(); 83 lapic_shutdown();
138#if defined(CONFIG_X86_IO_APIC) 84#if defined(CONFIG_X86_IO_APIC)
139 disable_IO_APIC(); 85 disable_IO_APIC();
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index a2d1176c38ee..d6938d9351cf 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -847,17 +847,16 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
847 switch (c->x86) { 847 switch (c->x86) {
848 case 0x6: 848 case 0x6:
849 switch (c->x86_model) { 849 switch (c->x86_model) {
850 case 0 ... 0xC:
851 /* sorry, don't know about them */
852 break;
850 case 0xD: 853 case 0xD:
851 case 0xE: /* Pentium M */ 854 case 0xE: /* Pentium M */
852 ds_configure(&ds_cfg_var); 855 ds_configure(&ds_cfg_var);
853 break; 856 break;
854 case 0xF: /* Core2 */ 857 default: /* Core2, Atom, ... */
855 case 0x1C: /* Atom */
856 ds_configure(&ds_cfg_64); 858 ds_configure(&ds_cfg_64);
857 break; 859 break;
858 default:
859 /* sorry, don't know about them */
860 break;
861 } 860 }
862 break; 861 break;
863 case 0xF: 862 case 0xF:
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
new file mode 100644
index 000000000000..5962176dfabb
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.c
@@ -0,0 +1,319 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5#include <linux/kallsyms.h>
6#include <linux/kprobes.h>
7#include <linux/uaccess.h>
8#include <linux/utsname.h>
9#include <linux/hardirq.h>
10#include <linux/kdebug.h>
11#include <linux/module.h>
12#include <linux/ptrace.h>
13#include <linux/kexec.h>
14#include <linux/bug.h>
15#include <linux/nmi.h>
16#include <linux/sysfs.h>
17
18#include <asm/stacktrace.h>
19
20#include "dumpstack.h"
21
22int panic_on_unrecovered_nmi;
23unsigned int code_bytes = 64;
24int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
25static int die_counter;
26
27void printk_address(unsigned long address, int reliable)
28{
29 printk(" [<%p>] %s%pS\n", (void *) address,
30 reliable ? "" : "? ", (void *) address);
31}
32
33/*
34 * x86-64 can have up to three kernel stacks:
35 * process stack
36 * interrupt stack
37 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
38 */
39
40static inline int valid_stack_ptr(struct thread_info *tinfo,
41 void *p, unsigned int size, void *end)
42{
43 void *t = tinfo;
44 if (end) {
45 if (p < end && p >= (end-THREAD_SIZE))
46 return 1;
47 else
48 return 0;
49 }
50 return p > t && p < t + THREAD_SIZE - size;
51}
52
53unsigned long
54print_context_stack(struct thread_info *tinfo,
55 unsigned long *stack, unsigned long bp,
56 const struct stacktrace_ops *ops, void *data,
57 unsigned long *end)
58{
59 struct stack_frame *frame = (struct stack_frame *)bp;
60
61 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
62 unsigned long addr;
63
64 addr = *stack;
65 if (__kernel_text_address(addr)) {
66 if ((unsigned long) stack == bp + sizeof(long)) {
67 ops->address(data, addr, 1);
68 frame = frame->next_frame;
69 bp = (unsigned long) frame;
70 } else {
71 ops->address(data, addr, bp == 0);
72 }
73 }
74 stack++;
75 }
76 return bp;
77}
78
79
80static void
81print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
82{
83 printk(data);
84 print_symbol(msg, symbol);
85 printk("\n");
86}
87
88static void print_trace_warning(void *data, char *msg)
89{
90 printk("%s%s\n", (char *)data, msg);
91}
92
93static int print_trace_stack(void *data, char *name)
94{
95 printk("%s <%s> ", (char *)data, name);
96 return 0;
97}
98
99/*
100 * Print one address/symbol entries per line.
101 */
102static void print_trace_address(void *data, unsigned long addr, int reliable)
103{
104 touch_nmi_watchdog();
105 printk(data);
106 printk_address(addr, reliable);
107}
108
109static const struct stacktrace_ops print_trace_ops = {
110 .warning = print_trace_warning,
111 .warning_symbol = print_trace_warning_symbol,
112 .stack = print_trace_stack,
113 .address = print_trace_address,
114};
115
116void
117show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
118 unsigned long *stack, unsigned long bp, char *log_lvl)
119{
120 printk("%sCall Trace:\n", log_lvl);
121 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
122}
123
124void show_trace(struct task_struct *task, struct pt_regs *regs,
125 unsigned long *stack, unsigned long bp)
126{
127 show_trace_log_lvl(task, regs, stack, bp, "");
128}
129
130void show_stack(struct task_struct *task, unsigned long *sp)
131{
132 show_stack_log_lvl(task, NULL, sp, 0, "");
133}
134
135/*
136 * The architecture-independent dump_stack generator
137 */
138void dump_stack(void)
139{
140 unsigned long bp = 0;
141 unsigned long stack;
142
143#ifdef CONFIG_FRAME_POINTER
144 if (!bp)
145 get_bp(bp);
146#endif
147
148 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
149 current->pid, current->comm, print_tainted(),
150 init_utsname()->release,
151 (int)strcspn(init_utsname()->version, " "),
152 init_utsname()->version);
153 show_trace(NULL, NULL, &stack, bp);
154}
155EXPORT_SYMBOL(dump_stack);
156
157static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
158static int die_owner = -1;
159static unsigned int die_nest_count;
160
161unsigned __kprobes long oops_begin(void)
162{
163 int cpu;
164 unsigned long flags;
165
166 oops_enter();
167
168 /* racy, but better than risking deadlock. */
169 raw_local_irq_save(flags);
170 cpu = smp_processor_id();
171 if (!__raw_spin_trylock(&die_lock)) {
172 if (cpu == die_owner)
173 /* nested oops. should stop eventually */;
174 else
175 __raw_spin_lock(&die_lock);
176 }
177 die_nest_count++;
178 die_owner = cpu;
179 console_verbose();
180 bust_spinlocks(1);
181 return flags;
182}
183
184void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
185{
186 if (regs && kexec_should_crash(current))
187 crash_kexec(regs);
188
189 bust_spinlocks(0);
190 die_owner = -1;
191 add_taint(TAINT_DIE);
192 die_nest_count--;
193 if (!die_nest_count)
194 /* Nest count reaches zero, release the lock. */
195 __raw_spin_unlock(&die_lock);
196 raw_local_irq_restore(flags);
197 oops_exit();
198
199 if (!signr)
200 return;
201 if (in_interrupt())
202 panic("Fatal exception in interrupt");
203 if (panic_on_oops)
204 panic("Fatal exception");
205 do_exit(signr);
206}
207
208int __kprobes __die(const char *str, struct pt_regs *regs, long err)
209{
210#ifdef CONFIG_X86_32
211 unsigned short ss;
212 unsigned long sp;
213#endif
214 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
215#ifdef CONFIG_PREEMPT
216 printk("PREEMPT ");
217#endif
218#ifdef CONFIG_SMP
219 printk("SMP ");
220#endif
221#ifdef CONFIG_DEBUG_PAGEALLOC
222 printk("DEBUG_PAGEALLOC");
223#endif
224 printk("\n");
225 sysfs_printk_last_file();
226 if (notify_die(DIE_OOPS, str, regs, err,
227 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
228 return 1;
229
230 show_registers(regs);
231#ifdef CONFIG_X86_32
232 sp = (unsigned long) (&regs->sp);
233 savesegment(ss, ss);
234 if (user_mode(regs)) {
235 sp = regs->sp;
236 ss = regs->ss & 0xffff;
237 }
238 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
239 print_symbol("%s", regs->ip);
240 printk(" SS:ESP %04x:%08lx\n", ss, sp);
241#else
242 /* Executive summary in case the oops scrolled away */
243 printk(KERN_ALERT "RIP ");
244 printk_address(regs->ip, 1);
245 printk(" RSP <%016lx>\n", regs->sp);
246#endif
247 return 0;
248}
249
250/*
251 * This is gone through when something in the kernel has done something bad
252 * and is about to be terminated:
253 */
254void die(const char *str, struct pt_regs *regs, long err)
255{
256 unsigned long flags = oops_begin();
257 int sig = SIGSEGV;
258
259 if (!user_mode_vm(regs))
260 report_bug(regs->ip, regs);
261
262 if (__die(str, regs, err))
263 sig = 0;
264 oops_end(flags, regs, sig);
265}
266
267void notrace __kprobes
268die_nmi(char *str, struct pt_regs *regs, int do_panic)
269{
270 unsigned long flags;
271
272 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
273 return;
274
275 /*
276 * We are in trouble anyway, lets at least try
277 * to get a message out.
278 */
279 flags = oops_begin();
280 printk(KERN_EMERG "%s", str);
281 printk(" on CPU%d, ip %08lx, registers:\n",
282 smp_processor_id(), regs->ip);
283 show_registers(regs);
284 oops_end(flags, regs, 0);
285 if (do_panic || panic_on_oops)
286 panic("Non maskable interrupt");
287 nmi_exit();
288 local_irq_enable();
289 do_exit(SIGBUS);
290}
291
292static int __init oops_setup(char *s)
293{
294 if (!s)
295 return -EINVAL;
296 if (!strcmp(s, "panic"))
297 panic_on_oops = 1;
298 return 0;
299}
300early_param("oops", oops_setup);
301
302static int __init kstack_setup(char *s)
303{
304 if (!s)
305 return -EINVAL;
306 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
307 return 0;
308}
309early_param("kstack", kstack_setup);
310
311static int __init code_bytes_setup(char *s)
312{
313 code_bytes = simple_strtoul(s, NULL, 0);
314 if (code_bytes > 8192)
315 code_bytes = 8192;
316
317 return 1;
318}
319__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
new file mode 100644
index 000000000000..3119a801c32b
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5
6#ifndef DUMPSTACK_H
7#define DUMPSTACK_H
8
9#ifdef CONFIG_X86_32
10#define STACKSLOTS_PER_LINE 8
11#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
12#else
13#define STACKSLOTS_PER_LINE 4
14#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
15#endif
16
17extern unsigned long
18print_context_stack(struct thread_info *tinfo,
19 unsigned long *stack, unsigned long bp,
20 const struct stacktrace_ops *ops, void *data,
21 unsigned long *end);
22
23extern void
24show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
25 unsigned long *stack, unsigned long bp, char *log_lvl);
26
27extern void
28show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
29 unsigned long *sp, unsigned long bp, char *log_lvl);
30
31extern unsigned int code_bytes;
32extern int kstack_depth_to_print;
33
34/* The form of the top of the frame on the stack */
35struct stack_frame {
36 struct stack_frame *next_frame;
37 unsigned long return_address;
38};
39#endif
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index b3614752197b..7b031b106ec8 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,64 +17,7 @@
17 17
18#include <asm/stacktrace.h> 18#include <asm/stacktrace.h>
19 19
20#define STACKSLOTS_PER_LINE 8 20#include "dumpstack.h"
21#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
22
23int panic_on_unrecovered_nmi;
24int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
25static unsigned int code_bytes = 64;
26static int die_counter;
27
28void printk_address(unsigned long address, int reliable)
29{
30 printk(" [<%p>] %s%pS\n", (void *) address,
31 reliable ? "" : "? ", (void *) address);
32}
33
34static inline int valid_stack_ptr(struct thread_info *tinfo,
35 void *p, unsigned int size, void *end)
36{
37 void *t = tinfo;
38 if (end) {
39 if (p < end && p >= (end-THREAD_SIZE))
40 return 1;
41 else
42 return 0;
43 }
44 return p > t && p < t + THREAD_SIZE - size;
45}
46
47/* The form of the top of the frame on the stack */
48struct stack_frame {
49 struct stack_frame *next_frame;
50 unsigned long return_address;
51};
52
53static inline unsigned long
54print_context_stack(struct thread_info *tinfo,
55 unsigned long *stack, unsigned long bp,
56 const struct stacktrace_ops *ops, void *data,
57 unsigned long *end)
58{
59 struct stack_frame *frame = (struct stack_frame *)bp;
60
61 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
62 unsigned long addr;
63
64 addr = *stack;
65 if (__kernel_text_address(addr)) {
66 if ((unsigned long) stack == bp + sizeof(long)) {
67 ops->address(data, addr, 1);
68 frame = frame->next_frame;
69 bp = (unsigned long) frame;
70 } else {
71 ops->address(data, addr, bp == 0);
72 }
73 }
74 stack++;
75 }
76 return bp;
77}
78 21
79void dump_trace(struct task_struct *task, struct pt_regs *regs, 22void dump_trace(struct task_struct *task, struct pt_regs *regs,
80 unsigned long *stack, unsigned long bp, 23 unsigned long *stack, unsigned long bp,
@@ -119,57 +62,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
119} 62}
120EXPORT_SYMBOL(dump_trace); 63EXPORT_SYMBOL(dump_trace);
121 64
122static void 65void
123print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
124{
125 printk(data);
126 print_symbol(msg, symbol);
127 printk("\n");
128}
129
130static void print_trace_warning(void *data, char *msg)
131{
132 printk("%s%s\n", (char *)data, msg);
133}
134
135static int print_trace_stack(void *data, char *name)
136{
137 printk("%s <%s> ", (char *)data, name);
138 return 0;
139}
140
141/*
142 * Print one address/symbol entries per line.
143 */
144static void print_trace_address(void *data, unsigned long addr, int reliable)
145{
146 touch_nmi_watchdog();
147 printk(data);
148 printk_address(addr, reliable);
149}
150
151static const struct stacktrace_ops print_trace_ops = {
152 .warning = print_trace_warning,
153 .warning_symbol = print_trace_warning_symbol,
154 .stack = print_trace_stack,
155 .address = print_trace_address,
156};
157
158static void
159show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
160 unsigned long *stack, unsigned long bp, char *log_lvl)
161{
162 printk("%sCall Trace:\n", log_lvl);
163 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
164}
165
166void show_trace(struct task_struct *task, struct pt_regs *regs,
167 unsigned long *stack, unsigned long bp)
168{
169 show_trace_log_lvl(task, regs, stack, bp, "");
170}
171
172static void
173show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 66show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
174 unsigned long *sp, unsigned long bp, char *log_lvl) 67 unsigned long *sp, unsigned long bp, char *log_lvl)
175{ 68{
@@ -196,33 +89,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
196 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 89 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
197} 90}
198 91
199void show_stack(struct task_struct *task, unsigned long *sp)
200{
201 show_stack_log_lvl(task, NULL, sp, 0, "");
202}
203
204/*
205 * The architecture-independent dump_stack generator
206 */
207void dump_stack(void)
208{
209 unsigned long bp = 0;
210 unsigned long stack;
211
212#ifdef CONFIG_FRAME_POINTER
213 if (!bp)
214 get_bp(bp);
215#endif
216
217 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
218 current->pid, current->comm, print_tainted(),
219 init_utsname()->release,
220 (int)strcspn(init_utsname()->version, " "),
221 init_utsname()->version);
222 show_trace(NULL, NULL, &stack, bp);
223}
224
225EXPORT_SYMBOL(dump_stack);
226 92
227void show_registers(struct pt_regs *regs) 93void show_registers(struct pt_regs *regs)
228{ 94{
@@ -283,167 +149,3 @@ int is_valid_bugaddr(unsigned long ip)
283 return ud2 == 0x0b0f; 149 return ud2 == 0x0b0f;
284} 150}
285 151
286static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
287static int die_owner = -1;
288static unsigned int die_nest_count;
289
290unsigned __kprobes long oops_begin(void)
291{
292 unsigned long flags;
293
294 oops_enter();
295
296 if (die_owner != raw_smp_processor_id()) {
297 console_verbose();
298 raw_local_irq_save(flags);
299 __raw_spin_lock(&die_lock);
300 die_owner = smp_processor_id();
301 die_nest_count = 0;
302 bust_spinlocks(1);
303 } else {
304 raw_local_irq_save(flags);
305 }
306 die_nest_count++;
307 return flags;
308}
309
310void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
311{
312 bust_spinlocks(0);
313 die_owner = -1;
314 add_taint(TAINT_DIE);
315 __raw_spin_unlock(&die_lock);
316 raw_local_irq_restore(flags);
317
318 if (!regs)
319 return;
320
321 if (kexec_should_crash(current))
322 crash_kexec(regs);
323 if (in_interrupt())
324 panic("Fatal exception in interrupt");
325 if (panic_on_oops)
326 panic("Fatal exception");
327 oops_exit();
328 do_exit(signr);
329}
330
331int __kprobes __die(const char *str, struct pt_regs *regs, long err)
332{
333 unsigned short ss;
334 unsigned long sp;
335
336 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
337#ifdef CONFIG_PREEMPT
338 printk("PREEMPT ");
339#endif
340#ifdef CONFIG_SMP
341 printk("SMP ");
342#endif
343#ifdef CONFIG_DEBUG_PAGEALLOC
344 printk("DEBUG_PAGEALLOC");
345#endif
346 printk("\n");
347 sysfs_printk_last_file();
348 if (notify_die(DIE_OOPS, str, regs, err,
349 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
350 return 1;
351
352 show_registers(regs);
353 /* Executive summary in case the oops scrolled away */
354 sp = (unsigned long) (&regs->sp);
355 savesegment(ss, ss);
356 if (user_mode(regs)) {
357 sp = regs->sp;
358 ss = regs->ss & 0xffff;
359 }
360 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
361 print_symbol("%s", regs->ip);
362 printk(" SS:ESP %04x:%08lx\n", ss, sp);
363 return 0;
364}
365
366/*
367 * This is gone through when something in the kernel has done something bad
368 * and is about to be terminated:
369 */
370void die(const char *str, struct pt_regs *regs, long err)
371{
372 unsigned long flags = oops_begin();
373
374 if (die_nest_count < 3) {
375 report_bug(regs->ip, regs);
376
377 if (__die(str, regs, err))
378 regs = NULL;
379 } else {
380 printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
381 }
382
383 oops_end(flags, regs, SIGSEGV);
384}
385
386static DEFINE_SPINLOCK(nmi_print_lock);
387
388void notrace __kprobes
389die_nmi(char *str, struct pt_regs *regs, int do_panic)
390{
391 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
392 return;
393
394 spin_lock(&nmi_print_lock);
395 /*
396 * We are in trouble anyway, lets at least try
397 * to get a message out:
398 */
399 bust_spinlocks(1);
400 printk(KERN_EMERG "%s", str);
401 printk(" on CPU%d, ip %08lx, registers:\n",
402 smp_processor_id(), regs->ip);
403 show_registers(regs);
404 if (do_panic)
405 panic("Non maskable interrupt");
406 console_silent();
407 spin_unlock(&nmi_print_lock);
408
409 /*
410 * If we are in kernel we are probably nested up pretty bad
411 * and might aswell get out now while we still can:
412 */
413 if (!user_mode_vm(regs)) {
414 current->thread.trap_no = 2;
415 crash_kexec(regs);
416 }
417
418 bust_spinlocks(0);
419 do_exit(SIGSEGV);
420}
421
422static int __init oops_setup(char *s)
423{
424 if (!s)
425 return -EINVAL;
426 if (!strcmp(s, "panic"))
427 panic_on_oops = 1;
428 return 0;
429}
430early_param("oops", oops_setup);
431
432static int __init kstack_setup(char *s)
433{
434 if (!s)
435 return -EINVAL;
436 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
437 return 0;
438}
439early_param("kstack", kstack_setup);
440
441static int __init code_bytes_setup(char *s)
442{
443 code_bytes = simple_strtoul(s, NULL, 0);
444 if (code_bytes > 8192)
445 code_bytes = 8192;
446
447 return 1;
448}
449__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 96a5db7da8a7..33ff10287a5d 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -17,19 +17,7 @@
17 17
18#include <asm/stacktrace.h> 18#include <asm/stacktrace.h>
19 19
20#define STACKSLOTS_PER_LINE 4 20#include "dumpstack.h"
21#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
22
23int panic_on_unrecovered_nmi;
24int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
25static unsigned int code_bytes = 64;
26static int die_counter;
27
28void printk_address(unsigned long address, int reliable)
29{
30 printk(" [<%p>] %s%pS\n", (void *) address,
31 reliable ? "" : "? ", (void *) address);
32}
33 21
34static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 22static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
35 unsigned *usedp, char **idp) 23 unsigned *usedp, char **idp)
@@ -113,51 +101,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
113 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 101 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
114 */ 102 */
115 103
116static inline int valid_stack_ptr(struct thread_info *tinfo,
117 void *p, unsigned int size, void *end)
118{
119 void *t = tinfo;
120 if (end) {
121 if (p < end && p >= (end-THREAD_SIZE))
122 return 1;
123 else
124 return 0;
125 }
126 return p > t && p < t + THREAD_SIZE - size;
127}
128
129/* The form of the top of the frame on the stack */
130struct stack_frame {
131 struct stack_frame *next_frame;
132 unsigned long return_address;
133};
134
135static inline unsigned long
136print_context_stack(struct thread_info *tinfo,
137 unsigned long *stack, unsigned long bp,
138 const struct stacktrace_ops *ops, void *data,
139 unsigned long *end)
140{
141 struct stack_frame *frame = (struct stack_frame *)bp;
142
143 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
144 unsigned long addr;
145
146 addr = *stack;
147 if (__kernel_text_address(addr)) {
148 if ((unsigned long) stack == bp + sizeof(long)) {
149 ops->address(data, addr, 1);
150 frame = frame->next_frame;
151 bp = (unsigned long) frame;
152 } else {
153 ops->address(data, addr, bp == 0);
154 }
155 }
156 stack++;
157 }
158 return bp;
159}
160
161void dump_trace(struct task_struct *task, struct pt_regs *regs, 104void dump_trace(struct task_struct *task, struct pt_regs *regs,
162 unsigned long *stack, unsigned long bp, 105 unsigned long *stack, unsigned long bp,
163 const struct stacktrace_ops *ops, void *data) 106 const struct stacktrace_ops *ops, void *data)
@@ -248,57 +191,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
248} 191}
249EXPORT_SYMBOL(dump_trace); 192EXPORT_SYMBOL(dump_trace);
250 193
251static void 194void
252print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
253{
254 printk(data);
255 print_symbol(msg, symbol);
256 printk("\n");
257}
258
259static void print_trace_warning(void *data, char *msg)
260{
261 printk("%s%s\n", (char *)data, msg);
262}
263
264static int print_trace_stack(void *data, char *name)
265{
266 printk("%s <%s> ", (char *)data, name);
267 return 0;
268}
269
270/*
271 * Print one address/symbol entries per line.
272 */
273static void print_trace_address(void *data, unsigned long addr, int reliable)
274{
275 touch_nmi_watchdog();
276 printk(data);
277 printk_address(addr, reliable);
278}
279
280static const struct stacktrace_ops print_trace_ops = {
281 .warning = print_trace_warning,
282 .warning_symbol = print_trace_warning_symbol,
283 .stack = print_trace_stack,
284 .address = print_trace_address,
285};
286
287static void
288show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
289 unsigned long *stack, unsigned long bp, char *log_lvl)
290{
291 printk("%sCall Trace:\n", log_lvl);
292 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
293}
294
295void show_trace(struct task_struct *task, struct pt_regs *regs,
296 unsigned long *stack, unsigned long bp)
297{
298 show_trace_log_lvl(task, regs, stack, bp, "");
299}
300
301static void
302show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 195show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
303 unsigned long *sp, unsigned long bp, char *log_lvl) 196 unsigned long *sp, unsigned long bp, char *log_lvl)
304{ 197{
@@ -342,33 +235,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
342 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 235 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
343} 236}
344 237
345void show_stack(struct task_struct *task, unsigned long *sp)
346{
347 show_stack_log_lvl(task, NULL, sp, 0, "");
348}
349
350/*
351 * The architecture-independent dump_stack generator
352 */
353void dump_stack(void)
354{
355 unsigned long bp = 0;
356 unsigned long stack;
357
358#ifdef CONFIG_FRAME_POINTER
359 if (!bp)
360 get_bp(bp);
361#endif
362
363 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
364 current->pid, current->comm, print_tainted(),
365 init_utsname()->release,
366 (int)strcspn(init_utsname()->version, " "),
367 init_utsname()->version);
368 show_trace(NULL, NULL, &stack, bp);
369}
370EXPORT_SYMBOL(dump_stack);
371
372void show_registers(struct pt_regs *regs) 238void show_registers(struct pt_regs *regs)
373{ 239{
374 int i; 240 int i;
@@ -429,147 +295,3 @@ int is_valid_bugaddr(unsigned long ip)
429 return ud2 == 0x0b0f; 295 return ud2 == 0x0b0f;
430} 296}
431 297
432static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
433static int die_owner = -1;
434static unsigned int die_nest_count;
435
436unsigned __kprobes long oops_begin(void)
437{
438 int cpu;
439 unsigned long flags;
440
441 oops_enter();
442
443 /* racy, but better than risking deadlock. */
444 raw_local_irq_save(flags);
445 cpu = smp_processor_id();
446 if (!__raw_spin_trylock(&die_lock)) {
447 if (cpu == die_owner)
448 /* nested oops. should stop eventually */;
449 else
450 __raw_spin_lock(&die_lock);
451 }
452 die_nest_count++;
453 die_owner = cpu;
454 console_verbose();
455 bust_spinlocks(1);
456 return flags;
457}
458
459void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
460{
461 die_owner = -1;
462 bust_spinlocks(0);
463 die_nest_count--;
464 if (!die_nest_count)
465 /* Nest count reaches zero, release the lock. */
466 __raw_spin_unlock(&die_lock);
467 raw_local_irq_restore(flags);
468 if (!regs) {
469 oops_exit();
470 return;
471 }
472 if (in_interrupt())
473 panic("Fatal exception in interrupt");
474 if (panic_on_oops)
475 panic("Fatal exception");
476 oops_exit();
477 do_exit(signr);
478}
479
480int __kprobes __die(const char *str, struct pt_regs *regs, long err)
481{
482 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
483#ifdef CONFIG_PREEMPT
484 printk("PREEMPT ");
485#endif
486#ifdef CONFIG_SMP
487 printk("SMP ");
488#endif
489#ifdef CONFIG_DEBUG_PAGEALLOC
490 printk("DEBUG_PAGEALLOC");
491#endif
492 printk("\n");
493 sysfs_printk_last_file();
494 if (notify_die(DIE_OOPS, str, regs, err,
495 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
496 return 1;
497
498 show_registers(regs);
499 add_taint(TAINT_DIE);
500 /* Executive summary in case the oops scrolled away */
501 printk(KERN_ALERT "RIP ");
502 printk_address(regs->ip, 1);
503 printk(" RSP <%016lx>\n", regs->sp);
504 if (kexec_should_crash(current))
505 crash_kexec(regs);
506 return 0;
507}
508
509void die(const char *str, struct pt_regs *regs, long err)
510{
511 unsigned long flags = oops_begin();
512
513 if (!user_mode(regs))
514 report_bug(regs->ip, regs);
515
516 if (__die(str, regs, err))
517 regs = NULL;
518 oops_end(flags, regs, SIGSEGV);
519}
520
521notrace __kprobes void
522die_nmi(char *str, struct pt_regs *regs, int do_panic)
523{
524 unsigned long flags;
525
526 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
527 return;
528
529 flags = oops_begin();
530 /*
531 * We are in trouble anyway, lets at least try
532 * to get a message out.
533 */
534 printk(KERN_EMERG "%s", str);
535 printk(" on CPU%d, ip %08lx, registers:\n",
536 smp_processor_id(), regs->ip);
537 show_registers(regs);
538 if (kexec_should_crash(current))
539 crash_kexec(regs);
540 if (do_panic || panic_on_oops)
541 panic("Non maskable interrupt");
542 oops_end(flags, NULL, SIGBUS);
543 nmi_exit();
544 local_irq_enable();
545 do_exit(SIGBUS);
546}
547
548static int __init oops_setup(char *s)
549{
550 if (!s)
551 return -EINVAL;
552 if (!strcmp(s, "panic"))
553 panic_on_oops = 1;
554 return 0;
555}
556early_param("oops", oops_setup);
557
558static int __init kstack_setup(char *s)
559{
560 if (!s)
561 return -EINVAL;
562 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
563 return 0;
564}
565early_param("kstack", kstack_setup);
566
567static int __init code_bytes_setup(char *s)
568{
569 code_bytes = simple_strtoul(s, NULL, 0);
570 if (code_bytes > 8192)
571 code_bytes = 8192;
572
573 return 1;
574}
575__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7aafeb5263ef..65a13943e098 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -677,22 +677,6 @@ struct early_res {
677}; 677};
678static struct early_res early_res[MAX_EARLY_RES] __initdata = { 678static struct early_res early_res[MAX_EARLY_RES] __initdata = {
679 { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ 679 { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
680#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE)
681 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
682#endif
683#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
684 /*
685 * But first pinch a few for the stack/trampoline stuff
686 * FIXME: Don't need the extra page at 4K, but need to fix
687 * trampoline before removing it. (see the GDT stuff)
688 */
689 { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" },
690 /*
691 * Has to be in very low memory so we can execute
692 * real-mode AP code.
693 */
694 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" },
695#endif
696 {} 680 {}
697}; 681};
698 682
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 1b894b72c0f5..744aa7fc49d5 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -17,6 +17,7 @@
17#include <asm/io_apic.h> 17#include <asm/io_apic.h>
18#include <asm/apic.h> 18#include <asm/apic.h>
19#include <asm/iommu.h> 19#include <asm/iommu.h>
20#include <asm/gart.h>
20 21
21static void __init fix_hypertransport_config(int num, int slot, int func) 22static void __init fix_hypertransport_config(int num, int slot, int func)
22{ 23{
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 34ad997d3834..23b138e31e9c 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -875,49 +875,6 @@ static struct console early_dbgp_console = {
875}; 875};
876#endif 876#endif
877 877
878/* Console interface to a host file on AMD's SimNow! */
879
880static int simnow_fd;
881
882enum {
883 MAGIC1 = 0xBACCD00A,
884 MAGIC2 = 0xCA110000,
885 XOPEN = 5,
886 XWRITE = 4,
887};
888
889static noinline long simnow(long cmd, long a, long b, long c)
890{
891 long ret;
892
893 asm volatile("cpuid" :
894 "=a" (ret) :
895 "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2));
896 return ret;
897}
898
899static void __init simnow_init(char *str)
900{
901 char *fn = "klog";
902
903 if (*str == '=')
904 fn = ++str;
905 /* error ignored */
906 simnow_fd = simnow(XOPEN, (unsigned long)fn, O_WRONLY|O_APPEND|O_CREAT, 0644);
907}
908
909static void simnow_write(struct console *con, const char *s, unsigned n)
910{
911 simnow(XWRITE, simnow_fd, (unsigned long)s, n);
912}
913
914static struct console simnow_console = {
915 .name = "simnow",
916 .write = simnow_write,
917 .flags = CON_PRINTBUFFER,
918 .index = -1,
919};
920
921/* Direct interface for emergencies */ 878/* Direct interface for emergencies */
922static struct console *early_console = &early_vga_console; 879static struct console *early_console = &early_vga_console;
923static int __initdata early_console_initialized; 880static int __initdata early_console_initialized;
@@ -960,10 +917,6 @@ static int __init setup_early_printk(char *buf)
960 max_ypos = boot_params.screen_info.orig_video_lines; 917 max_ypos = boot_params.screen_info.orig_video_lines;
961 current_ypos = boot_params.screen_info.orig_y; 918 current_ypos = boot_params.screen_info.orig_y;
962 early_console = &early_vga_console; 919 early_console = &early_vga_console;
963 } else if (!strncmp(buf, "simnow", 6)) {
964 simnow_init(buf + 6);
965 early_console = &simnow_console;
966 keep_early = 1;
967#ifdef CONFIG_EARLY_PRINTK_DBGP 920#ifdef CONFIG_EARLY_PRINTK_DBGP
968 } else if (!strncmp(buf, "dbgp", 4)) { 921 } else if (!strncmp(buf, "dbgp", 4)) {
969 if (early_dbgp_init(buf+4) < 0) 922 if (early_dbgp_init(buf+4) < 0)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 28b597ef9ca1..fe7014176eb0 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -619,28 +619,37 @@ END(syscall_badsys)
61927:; 61927:;
620 620
621/* 621/*
622 * Build the entry stubs and pointer table with 622 * Build the entry stubs and pointer table with some assembler magic.
623 * some assembler magic. 623 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
624 * single cache line on all modern x86 implementations.
624 */ 625 */
625.section .rodata,"a" 626.section .init.rodata,"a"
626ENTRY(interrupt) 627ENTRY(interrupt)
627.text 628.text
628 629 .p2align 5
630 .p2align CONFIG_X86_L1_CACHE_SHIFT
629ENTRY(irq_entries_start) 631ENTRY(irq_entries_start)
630 RING0_INT_FRAME 632 RING0_INT_FRAME
631vector=0 633vector=FIRST_EXTERNAL_VECTOR
632.rept NR_VECTORS 634.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
633 ALIGN 635 .balign 32
634 .if vector 636 .rept 7
637 .if vector < NR_VECTORS
638 .if vector <> FIRST_EXTERNAL_VECTOR
635 CFI_ADJUST_CFA_OFFSET -4 639 CFI_ADJUST_CFA_OFFSET -4
636 .endif 640 .endif
6371: pushl $~(vector) 6411: pushl $(~vector+0x80) /* Note: always in signed byte range */
638 CFI_ADJUST_CFA_OFFSET 4 642 CFI_ADJUST_CFA_OFFSET 4
639 jmp common_interrupt 643 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
640 .previous 644 jmp 2f
645 .endif
646 .previous
641 .long 1b 647 .long 1b
642 .text 648 .text
643vector=vector+1 649vector=vector+1
650 .endif
651 .endr
6522: jmp common_interrupt
644.endr 653.endr
645END(irq_entries_start) 654END(irq_entries_start)
646 655
@@ -652,8 +661,9 @@ END(interrupt)
652 * the CPU automatically disables interrupts when executing an IRQ vector, 661 * the CPU automatically disables interrupts when executing an IRQ vector,
653 * so IRQ-flags tracing has to follow that: 662 * so IRQ-flags tracing has to follow that:
654 */ 663 */
655 ALIGN 664 .p2align CONFIG_X86_L1_CACHE_SHIFT
656common_interrupt: 665common_interrupt:
666 addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
657 SAVE_ALL 667 SAVE_ALL
658 TRACE_IRQS_OFF 668 TRACE_IRQS_OFF
659 movl %esp,%eax 669 movl %esp,%eax
@@ -678,65 +688,6 @@ ENDPROC(name)
678/* The include is where all of the SMP etc. interrupts come from */ 688/* The include is where all of the SMP etc. interrupts come from */
679#include "entry_arch.h" 689#include "entry_arch.h"
680 690
681KPROBE_ENTRY(page_fault)
682 RING0_EC_FRAME
683 pushl $do_page_fault
684 CFI_ADJUST_CFA_OFFSET 4
685 ALIGN
686error_code:
687 /* the function address is in %fs's slot on the stack */
688 pushl %es
689 CFI_ADJUST_CFA_OFFSET 4
690 /*CFI_REL_OFFSET es, 0*/
691 pushl %ds
692 CFI_ADJUST_CFA_OFFSET 4
693 /*CFI_REL_OFFSET ds, 0*/
694 pushl %eax
695 CFI_ADJUST_CFA_OFFSET 4
696 CFI_REL_OFFSET eax, 0
697 pushl %ebp
698 CFI_ADJUST_CFA_OFFSET 4
699 CFI_REL_OFFSET ebp, 0
700 pushl %edi
701 CFI_ADJUST_CFA_OFFSET 4
702 CFI_REL_OFFSET edi, 0
703 pushl %esi
704 CFI_ADJUST_CFA_OFFSET 4
705 CFI_REL_OFFSET esi, 0
706 pushl %edx
707 CFI_ADJUST_CFA_OFFSET 4
708 CFI_REL_OFFSET edx, 0
709 pushl %ecx
710 CFI_ADJUST_CFA_OFFSET 4
711 CFI_REL_OFFSET ecx, 0
712 pushl %ebx
713 CFI_ADJUST_CFA_OFFSET 4
714 CFI_REL_OFFSET ebx, 0
715 cld
716 pushl %fs
717 CFI_ADJUST_CFA_OFFSET 4
718 /*CFI_REL_OFFSET fs, 0*/
719 movl $(__KERNEL_PERCPU), %ecx
720 movl %ecx, %fs
721 UNWIND_ESPFIX_STACK
722 popl %ecx
723 CFI_ADJUST_CFA_OFFSET -4
724 /*CFI_REGISTER es, ecx*/
725 movl PT_FS(%esp), %edi # get the function address
726 movl PT_ORIG_EAX(%esp), %edx # get the error code
727 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
728 mov %ecx, PT_FS(%esp)
729 /*CFI_REL_OFFSET fs, ES*/
730 movl $(__USER_DS), %ecx
731 movl %ecx, %ds
732 movl %ecx, %es
733 TRACE_IRQS_OFF
734 movl %esp,%eax # pt_regs pointer
735 call *%edi
736 jmp ret_from_exception
737 CFI_ENDPROC
738KPROBE_END(page_fault)
739
740ENTRY(coprocessor_error) 691ENTRY(coprocessor_error)
741 RING0_INT_FRAME 692 RING0_INT_FRAME
742 pushl $0 693 pushl $0
@@ -767,140 +718,6 @@ ENTRY(device_not_available)
767 CFI_ENDPROC 718 CFI_ENDPROC
768END(device_not_available) 719END(device_not_available)
769 720
770/*
771 * Debug traps and NMI can happen at the one SYSENTER instruction
772 * that sets up the real kernel stack. Check here, since we can't
773 * allow the wrong stack to be used.
774 *
775 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
776 * already pushed 3 words if it hits on the sysenter instruction:
777 * eflags, cs and eip.
778 *
779 * We just load the right stack, and push the three (known) values
780 * by hand onto the new stack - while updating the return eip past
781 * the instruction that would have done it for sysenter.
782 */
783#define FIX_STACK(offset, ok, label) \
784 cmpw $__KERNEL_CS,4(%esp); \
785 jne ok; \
786label: \
787 movl TSS_sysenter_sp0+offset(%esp),%esp; \
788 CFI_DEF_CFA esp, 0; \
789 CFI_UNDEFINED eip; \
790 pushfl; \
791 CFI_ADJUST_CFA_OFFSET 4; \
792 pushl $__KERNEL_CS; \
793 CFI_ADJUST_CFA_OFFSET 4; \
794 pushl $sysenter_past_esp; \
795 CFI_ADJUST_CFA_OFFSET 4; \
796 CFI_REL_OFFSET eip, 0
797
798KPROBE_ENTRY(debug)
799 RING0_INT_FRAME
800 cmpl $ia32_sysenter_target,(%esp)
801 jne debug_stack_correct
802 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
803debug_stack_correct:
804 pushl $-1 # mark this as an int
805 CFI_ADJUST_CFA_OFFSET 4
806 SAVE_ALL
807 TRACE_IRQS_OFF
808 xorl %edx,%edx # error code 0
809 movl %esp,%eax # pt_regs pointer
810 call do_debug
811 jmp ret_from_exception
812 CFI_ENDPROC
813KPROBE_END(debug)
814
815/*
816 * NMI is doubly nasty. It can happen _while_ we're handling
817 * a debug fault, and the debug fault hasn't yet been able to
818 * clear up the stack. So we first check whether we got an
819 * NMI on the sysenter entry path, but after that we need to
820 * check whether we got an NMI on the debug path where the debug
821 * fault happened on the sysenter path.
822 */
823KPROBE_ENTRY(nmi)
824 RING0_INT_FRAME
825 pushl %eax
826 CFI_ADJUST_CFA_OFFSET 4
827 movl %ss, %eax
828 cmpw $__ESPFIX_SS, %ax
829 popl %eax
830 CFI_ADJUST_CFA_OFFSET -4
831 je nmi_espfix_stack
832 cmpl $ia32_sysenter_target,(%esp)
833 je nmi_stack_fixup
834 pushl %eax
835 CFI_ADJUST_CFA_OFFSET 4
836 movl %esp,%eax
837 /* Do not access memory above the end of our stack page,
838 * it might not exist.
839 */
840 andl $(THREAD_SIZE-1),%eax
841 cmpl $(THREAD_SIZE-20),%eax
842 popl %eax
843 CFI_ADJUST_CFA_OFFSET -4
844 jae nmi_stack_correct
845 cmpl $ia32_sysenter_target,12(%esp)
846 je nmi_debug_stack_check
847nmi_stack_correct:
848 /* We have a RING0_INT_FRAME here */
849 pushl %eax
850 CFI_ADJUST_CFA_OFFSET 4
851 SAVE_ALL
852 TRACE_IRQS_OFF
853 xorl %edx,%edx # zero error code
854 movl %esp,%eax # pt_regs pointer
855 call do_nmi
856 jmp restore_nocheck_notrace
857 CFI_ENDPROC
858
859nmi_stack_fixup:
860 RING0_INT_FRAME
861 FIX_STACK(12,nmi_stack_correct, 1)
862 jmp nmi_stack_correct
863
864nmi_debug_stack_check:
865 /* We have a RING0_INT_FRAME here */
866 cmpw $__KERNEL_CS,16(%esp)
867 jne nmi_stack_correct
868 cmpl $debug,(%esp)
869 jb nmi_stack_correct
870 cmpl $debug_esp_fix_insn,(%esp)
871 ja nmi_stack_correct
872 FIX_STACK(24,nmi_stack_correct, 1)
873 jmp nmi_stack_correct
874
875nmi_espfix_stack:
876 /* We have a RING0_INT_FRAME here.
877 *
878 * create the pointer to lss back
879 */
880 pushl %ss
881 CFI_ADJUST_CFA_OFFSET 4
882 pushl %esp
883 CFI_ADJUST_CFA_OFFSET 4
884 addw $4, (%esp)
885 /* copy the iret frame of 12 bytes */
886 .rept 3
887 pushl 16(%esp)
888 CFI_ADJUST_CFA_OFFSET 4
889 .endr
890 pushl %eax
891 CFI_ADJUST_CFA_OFFSET 4
892 SAVE_ALL
893 TRACE_IRQS_OFF
894 FIXUP_ESPFIX_STACK # %eax == %esp
895 xorl %edx,%edx # zero error code
896 call do_nmi
897 RESTORE_REGS
898 lss 12+4(%esp), %esp # back to espfix stack
899 CFI_ADJUST_CFA_OFFSET -24
900 jmp irq_return
901 CFI_ENDPROC
902KPROBE_END(nmi)
903
904#ifdef CONFIG_PARAVIRT 721#ifdef CONFIG_PARAVIRT
905ENTRY(native_iret) 722ENTRY(native_iret)
906 iret 723 iret
@@ -916,19 +733,6 @@ ENTRY(native_irq_enable_sysexit)
916END(native_irq_enable_sysexit) 733END(native_irq_enable_sysexit)
917#endif 734#endif
918 735
919KPROBE_ENTRY(int3)
920 RING0_INT_FRAME
921 pushl $-1 # mark this as an int
922 CFI_ADJUST_CFA_OFFSET 4
923 SAVE_ALL
924 TRACE_IRQS_OFF
925 xorl %edx,%edx # zero error code
926 movl %esp,%eax # pt_regs pointer
927 call do_int3
928 jmp ret_from_exception
929 CFI_ENDPROC
930KPROBE_END(int3)
931
932ENTRY(overflow) 736ENTRY(overflow)
933 RING0_INT_FRAME 737 RING0_INT_FRAME
934 pushl $0 738 pushl $0
@@ -993,14 +797,6 @@ ENTRY(stack_segment)
993 CFI_ENDPROC 797 CFI_ENDPROC
994END(stack_segment) 798END(stack_segment)
995 799
996KPROBE_ENTRY(general_protection)
997 RING0_EC_FRAME
998 pushl $do_general_protection
999 CFI_ADJUST_CFA_OFFSET 4
1000 jmp error_code
1001 CFI_ENDPROC
1002KPROBE_END(general_protection)
1003
1004ENTRY(alignment_check) 800ENTRY(alignment_check)
1005 RING0_EC_FRAME 801 RING0_EC_FRAME
1006 pushl $do_alignment_check 802 pushl $do_alignment_check
@@ -1051,6 +847,7 @@ ENTRY(kernel_thread_helper)
1051 push %eax 847 push %eax
1052 CFI_ADJUST_CFA_OFFSET 4 848 CFI_ADJUST_CFA_OFFSET 4
1053 call do_exit 849 call do_exit
850 ud2 # padding for call trace
1054 CFI_ENDPROC 851 CFI_ENDPROC
1055ENDPROC(kernel_thread_helper) 852ENDPROC(kernel_thread_helper)
1056 853
@@ -1210,3 +1007,227 @@ END(mcount)
1210#include "syscall_table_32.S" 1007#include "syscall_table_32.S"
1211 1008
1212syscall_table_size=(.-sys_call_table) 1009syscall_table_size=(.-sys_call_table)
1010
1011/*
1012 * Some functions should be protected against kprobes
1013 */
1014 .pushsection .kprobes.text, "ax"
1015
1016ENTRY(page_fault)
1017 RING0_EC_FRAME
1018 pushl $do_page_fault
1019 CFI_ADJUST_CFA_OFFSET 4
1020 ALIGN
1021error_code:
1022 /* the function address is in %fs's slot on the stack */
1023 pushl %es
1024 CFI_ADJUST_CFA_OFFSET 4
1025 /*CFI_REL_OFFSET es, 0*/
1026 pushl %ds
1027 CFI_ADJUST_CFA_OFFSET 4
1028 /*CFI_REL_OFFSET ds, 0*/
1029 pushl %eax
1030 CFI_ADJUST_CFA_OFFSET 4
1031 CFI_REL_OFFSET eax, 0
1032 pushl %ebp
1033 CFI_ADJUST_CFA_OFFSET 4
1034 CFI_REL_OFFSET ebp, 0
1035 pushl %edi
1036 CFI_ADJUST_CFA_OFFSET 4
1037 CFI_REL_OFFSET edi, 0
1038 pushl %esi
1039 CFI_ADJUST_CFA_OFFSET 4
1040 CFI_REL_OFFSET esi, 0
1041 pushl %edx
1042 CFI_ADJUST_CFA_OFFSET 4
1043 CFI_REL_OFFSET edx, 0
1044 pushl %ecx
1045 CFI_ADJUST_CFA_OFFSET 4
1046 CFI_REL_OFFSET ecx, 0
1047 pushl %ebx
1048 CFI_ADJUST_CFA_OFFSET 4
1049 CFI_REL_OFFSET ebx, 0
1050 cld
1051 pushl %fs
1052 CFI_ADJUST_CFA_OFFSET 4
1053 /*CFI_REL_OFFSET fs, 0*/
1054 movl $(__KERNEL_PERCPU), %ecx
1055 movl %ecx, %fs
1056 UNWIND_ESPFIX_STACK
1057 popl %ecx
1058 CFI_ADJUST_CFA_OFFSET -4
1059 /*CFI_REGISTER es, ecx*/
1060 movl PT_FS(%esp), %edi # get the function address
1061 movl PT_ORIG_EAX(%esp), %edx # get the error code
1062 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
1063 mov %ecx, PT_FS(%esp)
1064 /*CFI_REL_OFFSET fs, ES*/
1065 movl $(__USER_DS), %ecx
1066 movl %ecx, %ds
1067 movl %ecx, %es
1068 TRACE_IRQS_OFF
1069 movl %esp,%eax # pt_regs pointer
1070 call *%edi
1071 jmp ret_from_exception
1072 CFI_ENDPROC
1073END(page_fault)
1074
1075/*
1076 * Debug traps and NMI can happen at the one SYSENTER instruction
1077 * that sets up the real kernel stack. Check here, since we can't
1078 * allow the wrong stack to be used.
1079 *
1080 * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
1081 * already pushed 3 words if it hits on the sysenter instruction:
1082 * eflags, cs and eip.
1083 *
1084 * We just load the right stack, and push the three (known) values
1085 * by hand onto the new stack - while updating the return eip past
1086 * the instruction that would have done it for sysenter.
1087 */
1088#define FIX_STACK(offset, ok, label) \
1089 cmpw $__KERNEL_CS,4(%esp); \
1090 jne ok; \
1091label: \
1092 movl TSS_sysenter_sp0+offset(%esp),%esp; \
1093 CFI_DEF_CFA esp, 0; \
1094 CFI_UNDEFINED eip; \
1095 pushfl; \
1096 CFI_ADJUST_CFA_OFFSET 4; \
1097 pushl $__KERNEL_CS; \
1098 CFI_ADJUST_CFA_OFFSET 4; \
1099 pushl $sysenter_past_esp; \
1100 CFI_ADJUST_CFA_OFFSET 4; \
1101 CFI_REL_OFFSET eip, 0
1102
1103ENTRY(debug)
1104 RING0_INT_FRAME
1105 cmpl $ia32_sysenter_target,(%esp)
1106 jne debug_stack_correct
1107 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
1108debug_stack_correct:
1109 pushl $-1 # mark this as an int
1110 CFI_ADJUST_CFA_OFFSET 4
1111 SAVE_ALL
1112 TRACE_IRQS_OFF
1113 xorl %edx,%edx # error code 0
1114 movl %esp,%eax # pt_regs pointer
1115 call do_debug
1116 jmp ret_from_exception
1117 CFI_ENDPROC
1118END(debug)
1119
1120/*
1121 * NMI is doubly nasty. It can happen _while_ we're handling
1122 * a debug fault, and the debug fault hasn't yet been able to
1123 * clear up the stack. So we first check whether we got an
1124 * NMI on the sysenter entry path, but after that we need to
1125 * check whether we got an NMI on the debug path where the debug
1126 * fault happened on the sysenter path.
1127 */
1128ENTRY(nmi)
1129 RING0_INT_FRAME
1130 pushl %eax
1131 CFI_ADJUST_CFA_OFFSET 4
1132 movl %ss, %eax
1133 cmpw $__ESPFIX_SS, %ax
1134 popl %eax
1135 CFI_ADJUST_CFA_OFFSET -4
1136 je nmi_espfix_stack
1137 cmpl $ia32_sysenter_target,(%esp)
1138 je nmi_stack_fixup
1139 pushl %eax
1140 CFI_ADJUST_CFA_OFFSET 4
1141 movl %esp,%eax
1142 /* Do not access memory above the end of our stack page,
1143 * it might not exist.
1144 */
1145 andl $(THREAD_SIZE-1),%eax
1146 cmpl $(THREAD_SIZE-20),%eax
1147 popl %eax
1148 CFI_ADJUST_CFA_OFFSET -4
1149 jae nmi_stack_correct
1150 cmpl $ia32_sysenter_target,12(%esp)
1151 je nmi_debug_stack_check
1152nmi_stack_correct:
1153 /* We have a RING0_INT_FRAME here */
1154 pushl %eax
1155 CFI_ADJUST_CFA_OFFSET 4
1156 SAVE_ALL
1157 TRACE_IRQS_OFF
1158 xorl %edx,%edx # zero error code
1159 movl %esp,%eax # pt_regs pointer
1160 call do_nmi
1161 jmp restore_nocheck_notrace
1162 CFI_ENDPROC
1163
1164nmi_stack_fixup:
1165 RING0_INT_FRAME
1166 FIX_STACK(12,nmi_stack_correct, 1)
1167 jmp nmi_stack_correct
1168
1169nmi_debug_stack_check:
1170 /* We have a RING0_INT_FRAME here */
1171 cmpw $__KERNEL_CS,16(%esp)
1172 jne nmi_stack_correct
1173 cmpl $debug,(%esp)
1174 jb nmi_stack_correct
1175 cmpl $debug_esp_fix_insn,(%esp)
1176 ja nmi_stack_correct
1177 FIX_STACK(24,nmi_stack_correct, 1)
1178 jmp nmi_stack_correct
1179
1180nmi_espfix_stack:
1181 /* We have a RING0_INT_FRAME here.
1182 *
1183 * create the pointer to lss back
1184 */
1185 pushl %ss
1186 CFI_ADJUST_CFA_OFFSET 4
1187 pushl %esp
1188 CFI_ADJUST_CFA_OFFSET 4
1189 addw $4, (%esp)
1190 /* copy the iret frame of 12 bytes */
1191 .rept 3
1192 pushl 16(%esp)
1193 CFI_ADJUST_CFA_OFFSET 4
1194 .endr
1195 pushl %eax
1196 CFI_ADJUST_CFA_OFFSET 4
1197 SAVE_ALL
1198 TRACE_IRQS_OFF
1199 FIXUP_ESPFIX_STACK # %eax == %esp
1200 xorl %edx,%edx # zero error code
1201 call do_nmi
1202 RESTORE_REGS
1203 lss 12+4(%esp), %esp # back to espfix stack
1204 CFI_ADJUST_CFA_OFFSET -24
1205 jmp irq_return
1206 CFI_ENDPROC
1207END(nmi)
1208
1209ENTRY(int3)
1210 RING0_INT_FRAME
1211 pushl $-1 # mark this as an int
1212 CFI_ADJUST_CFA_OFFSET 4
1213 SAVE_ALL
1214 TRACE_IRQS_OFF
1215 xorl %edx,%edx # zero error code
1216 movl %esp,%eax # pt_regs pointer
1217 call do_int3
1218 jmp ret_from_exception
1219 CFI_ENDPROC
1220END(int3)
1221
1222ENTRY(general_protection)
1223 RING0_EC_FRAME
1224 pushl $do_general_protection
1225 CFI_ADJUST_CFA_OFFSET 4
1226 jmp error_code
1227 CFI_ENDPROC
1228END(general_protection)
1229
1230/*
1231 * End of kprobes section
1232 */
1233 .popsection
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b86f332c96a6..3194636a4293 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -11,15 +11,15 @@
11 * 11 *
12 * NOTE: This code handles signal-recognition, which happens every time 12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call. 13 * after an interrupt and after each system call.
14 * 14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is 15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al. 16 * only done for syscall tracing, signals or fork/exec et.al.
17 * 17 *
18 * A note on terminology: 18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP 19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack. 20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11. 21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved. 22 * - full stack frame: Like partial stack frame, but all register saved.
23 * 23 *
24 * Some macro usage: 24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better 25 * - CFI macros are used to generate dwarf2 unwind information for better
@@ -60,7 +60,6 @@
60#define __AUDIT_ARCH_LE 0x40000000 60#define __AUDIT_ARCH_LE 0x40000000
61 61
62 .code64 62 .code64
63
64#ifdef CONFIG_FUNCTION_TRACER 63#ifdef CONFIG_FUNCTION_TRACER
65#ifdef CONFIG_DYNAMIC_FTRACE 64#ifdef CONFIG_DYNAMIC_FTRACE
66ENTRY(mcount) 65ENTRY(mcount)
@@ -142,7 +141,7 @@ END(mcount)
142 141
143#ifndef CONFIG_PREEMPT 142#ifndef CONFIG_PREEMPT
144#define retint_kernel retint_restore_args 143#define retint_kernel retint_restore_args
145#endif 144#endif
146 145
147#ifdef CONFIG_PARAVIRT 146#ifdef CONFIG_PARAVIRT
148ENTRY(native_usergs_sysret64) 147ENTRY(native_usergs_sysret64)
@@ -161,29 +160,29 @@ ENTRY(native_usergs_sysret64)
161.endm 160.endm
162 161
163/* 162/*
164 * C code is not supposed to know about undefined top of stack. Every time 163 * C code is not supposed to know about undefined top of stack. Every time
165 * a C function with an pt_regs argument is called from the SYSCALL based 164 * a C function with an pt_regs argument is called from the SYSCALL based
166 * fast path FIXUP_TOP_OF_STACK is needed. 165 * fast path FIXUP_TOP_OF_STACK is needed.
167 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs 166 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
168 * manipulation. 167 * manipulation.
169 */ 168 */
170 169
171 /* %rsp:at FRAMEEND */ 170 /* %rsp:at FRAMEEND */
172 .macro FIXUP_TOP_OF_STACK tmp 171 .macro FIXUP_TOP_OF_STACK tmp offset=0
173 movq %gs:pda_oldrsp,\tmp 172 movq %gs:pda_oldrsp,\tmp
174 movq \tmp,RSP(%rsp) 173 movq \tmp,RSP+\offset(%rsp)
175 movq $__USER_DS,SS(%rsp) 174 movq $__USER_DS,SS+\offset(%rsp)
176 movq $__USER_CS,CS(%rsp) 175 movq $__USER_CS,CS+\offset(%rsp)
177 movq $-1,RCX(%rsp) 176 movq $-1,RCX+\offset(%rsp)
178 movq R11(%rsp),\tmp /* get eflags */ 177 movq R11+\offset(%rsp),\tmp /* get eflags */
179 movq \tmp,EFLAGS(%rsp) 178 movq \tmp,EFLAGS+\offset(%rsp)
180 .endm 179 .endm
181 180
182 .macro RESTORE_TOP_OF_STACK tmp,offset=0 181 .macro RESTORE_TOP_OF_STACK tmp offset=0
183 movq RSP-\offset(%rsp),\tmp 182 movq RSP+\offset(%rsp),\tmp
184 movq \tmp,%gs:pda_oldrsp 183 movq \tmp,%gs:pda_oldrsp
185 movq EFLAGS-\offset(%rsp),\tmp 184 movq EFLAGS+\offset(%rsp),\tmp
186 movq \tmp,R11-\offset(%rsp) 185 movq \tmp,R11+\offset(%rsp)
187 .endm 186 .endm
188 187
189 .macro FAKE_STACK_FRAME child_rip 188 .macro FAKE_STACK_FRAME child_rip
@@ -195,7 +194,7 @@ ENTRY(native_usergs_sysret64)
195 pushq %rax /* rsp */ 194 pushq %rax /* rsp */
196 CFI_ADJUST_CFA_OFFSET 8 195 CFI_ADJUST_CFA_OFFSET 8
197 CFI_REL_OFFSET rsp,0 196 CFI_REL_OFFSET rsp,0
198 pushq $(1<<9) /* eflags - interrupts on */ 197 pushq $X86_EFLAGS_IF /* eflags - interrupts on */
199 CFI_ADJUST_CFA_OFFSET 8 198 CFI_ADJUST_CFA_OFFSET 8
200 /*CFI_REL_OFFSET rflags,0*/ 199 /*CFI_REL_OFFSET rflags,0*/
201 pushq $__KERNEL_CS /* cs */ 200 pushq $__KERNEL_CS /* cs */
@@ -213,62 +212,184 @@ ENTRY(native_usergs_sysret64)
213 CFI_ADJUST_CFA_OFFSET -(6*8) 212 CFI_ADJUST_CFA_OFFSET -(6*8)
214 .endm 213 .endm
215 214
216 .macro CFI_DEFAULT_STACK start=1 215/*
216 * initial frame state for interrupts (and exceptions without error code)
217 */
218 .macro EMPTY_FRAME start=1 offset=0
217 .if \start 219 .if \start
218 CFI_STARTPROC simple 220 CFI_STARTPROC simple
219 CFI_SIGNAL_FRAME 221 CFI_SIGNAL_FRAME
220 CFI_DEF_CFA rsp,SS+8 222 CFI_DEF_CFA rsp,8+\offset
221 .else 223 .else
222 CFI_DEF_CFA_OFFSET SS+8 224 CFI_DEF_CFA_OFFSET 8+\offset
223 .endif 225 .endif
224 CFI_REL_OFFSET r15,R15
225 CFI_REL_OFFSET r14,R14
226 CFI_REL_OFFSET r13,R13
227 CFI_REL_OFFSET r12,R12
228 CFI_REL_OFFSET rbp,RBP
229 CFI_REL_OFFSET rbx,RBX
230 CFI_REL_OFFSET r11,R11
231 CFI_REL_OFFSET r10,R10
232 CFI_REL_OFFSET r9,R9
233 CFI_REL_OFFSET r8,R8
234 CFI_REL_OFFSET rax,RAX
235 CFI_REL_OFFSET rcx,RCX
236 CFI_REL_OFFSET rdx,RDX
237 CFI_REL_OFFSET rsi,RSI
238 CFI_REL_OFFSET rdi,RDI
239 CFI_REL_OFFSET rip,RIP
240 /*CFI_REL_OFFSET cs,CS*/
241 /*CFI_REL_OFFSET rflags,EFLAGS*/
242 CFI_REL_OFFSET rsp,RSP
243 /*CFI_REL_OFFSET ss,SS*/
244 .endm 226 .endm
227
228/*
229 * initial frame state for interrupts (and exceptions without error code)
230 */
231 .macro INTR_FRAME start=1 offset=0
232 EMPTY_FRAME \start, SS+8+\offset-RIP
233 /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
234 CFI_REL_OFFSET rsp, RSP+\offset-RIP
235 /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
236 /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
237 CFI_REL_OFFSET rip, RIP+\offset-RIP
238 .endm
239
240/*
241 * initial frame state for exceptions with error code (and interrupts
242 * with vector already pushed)
243 */
244 .macro XCPT_FRAME start=1 offset=0
245 INTR_FRAME \start, RIP+\offset-ORIG_RAX
246 /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
247 .endm
248
249/*
250 * frame that enables calling into C.
251 */
252 .macro PARTIAL_FRAME start=1 offset=0
253 XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
254 CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
255 CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
256 CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
257 CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
258 CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
259 CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
260 CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
261 CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
262 CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
263 .endm
264
265/*
266 * frame that enables passing a complete pt_regs to a C function.
267 */
268 .macro DEFAULT_FRAME start=1 offset=0
269 PARTIAL_FRAME \start, R11+\offset-R15
270 CFI_REL_OFFSET rbx, RBX+\offset
271 CFI_REL_OFFSET rbp, RBP+\offset
272 CFI_REL_OFFSET r12, R12+\offset
273 CFI_REL_OFFSET r13, R13+\offset
274 CFI_REL_OFFSET r14, R14+\offset
275 CFI_REL_OFFSET r15, R15+\offset
276 .endm
277
278/* save partial stack frame */
279ENTRY(save_args)
280 XCPT_FRAME
281 cld
282 movq_cfi rdi, RDI+16-ARGOFFSET
283 movq_cfi rsi, RSI+16-ARGOFFSET
284 movq_cfi rdx, RDX+16-ARGOFFSET
285 movq_cfi rcx, RCX+16-ARGOFFSET
286 movq_cfi rax, RAX+16-ARGOFFSET
287 movq_cfi r8, R8+16-ARGOFFSET
288 movq_cfi r9, R9+16-ARGOFFSET
289 movq_cfi r10, R10+16-ARGOFFSET
290 movq_cfi r11, R11+16-ARGOFFSET
291
292 leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
293 movq_cfi rbp, 8 /* push %rbp */
294 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
295 testl $3, CS(%rdi)
296 je 1f
297 SWAPGS
298 /*
299 * irqcount is used to check if a CPU is already on an interrupt stack
300 * or not. While this is essentially redundant with preempt_count it is
301 * a little cheaper to use a separate counter in the PDA (short of
302 * moving irq_enter into assembly, which would be too much work)
303 */
3041: incl %gs:pda_irqcount
305 jne 2f
306 popq_cfi %rax /* move return address... */
307 mov %gs:pda_irqstackptr,%rsp
308 EMPTY_FRAME 0
309 pushq_cfi %rax /* ... to the new stack */
310 /*
311 * We entered an interrupt context - irqs are off:
312 */
3132: TRACE_IRQS_OFF
314 ret
315 CFI_ENDPROC
316END(save_args)
317
318ENTRY(save_rest)
319 PARTIAL_FRAME 1 REST_SKIP+8
320 movq 5*8+16(%rsp), %r11 /* save return address */
321 movq_cfi rbx, RBX+16
322 movq_cfi rbp, RBP+16
323 movq_cfi r12, R12+16
324 movq_cfi r13, R13+16
325 movq_cfi r14, R14+16
326 movq_cfi r15, R15+16
327 movq %r11, 8(%rsp) /* return address */
328 FIXUP_TOP_OF_STACK %r11, 16
329 ret
330 CFI_ENDPROC
331END(save_rest)
332
333/* save complete stack frame */
334ENTRY(save_paranoid)
335 XCPT_FRAME 1 RDI+8
336 cld
337 movq_cfi rdi, RDI+8
338 movq_cfi rsi, RSI+8
339 movq_cfi rdx, RDX+8
340 movq_cfi rcx, RCX+8
341 movq_cfi rax, RAX+8
342 movq_cfi r8, R8+8
343 movq_cfi r9, R9+8
344 movq_cfi r10, R10+8
345 movq_cfi r11, R11+8
346 movq_cfi rbx, RBX+8
347 movq_cfi rbp, RBP+8
348 movq_cfi r12, R12+8
349 movq_cfi r13, R13+8
350 movq_cfi r14, R14+8
351 movq_cfi r15, R15+8
352 movl $1,%ebx
353 movl $MSR_GS_BASE,%ecx
354 rdmsr
355 testl %edx,%edx
356 js 1f /* negative -> in kernel */
357 SWAPGS
358 xorl %ebx,%ebx
3591: ret
360 CFI_ENDPROC
361END(save_paranoid)
362
245/* 363/*
246 * A newly forked process directly context switches into this. 364 * A newly forked process directly context switches into this address.
247 */ 365 *
248/* rdi: prev */ 366 * rdi: prev task we switched from
367 */
249ENTRY(ret_from_fork) 368ENTRY(ret_from_fork)
250 CFI_DEFAULT_STACK 369 DEFAULT_FRAME
370
251 push kernel_eflags(%rip) 371 push kernel_eflags(%rip)
252 CFI_ADJUST_CFA_OFFSET 8 372 CFI_ADJUST_CFA_OFFSET 8
253 popf # reset kernel eflags 373 popf # reset kernel eflags
254 CFI_ADJUST_CFA_OFFSET -8 374 CFI_ADJUST_CFA_OFFSET -8
255 call schedule_tail 375
376 call schedule_tail # rdi: 'prev' task parameter
377
256 GET_THREAD_INFO(%rcx) 378 GET_THREAD_INFO(%rcx)
257 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 379
258 jnz rff_trace 380 CFI_REMEMBER_STATE
259rff_action:
260 RESTORE_REST 381 RESTORE_REST
261 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? 382
383 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
262 je int_ret_from_sys_call 384 je int_ret_from_sys_call
263 testl $_TIF_IA32,TI_flags(%rcx) 385
386 testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
264 jnz int_ret_from_sys_call 387 jnz int_ret_from_sys_call
265 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET 388
266 jmp ret_from_sys_call 389 RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
267rff_trace: 390 jmp ret_from_sys_call # go to the SYSRET fastpath
268 movq %rsp,%rdi 391
269 call syscall_trace_leave 392 CFI_RESTORE_STATE
270 GET_THREAD_INFO(%rcx)
271 jmp rff_action
272 CFI_ENDPROC 393 CFI_ENDPROC
273END(ret_from_fork) 394END(ret_from_fork)
274 395
@@ -278,20 +399,20 @@ END(ret_from_fork)
278 * SYSCALL does not save anything on the stack and does not change the 399 * SYSCALL does not save anything on the stack and does not change the
279 * stack pointer. 400 * stack pointer.
280 */ 401 */
281 402
282/* 403/*
283 * Register setup: 404 * Register setup:
284 * rax system call number 405 * rax system call number
285 * rdi arg0 406 * rdi arg0
286 * rcx return address for syscall/sysret, C arg3 407 * rcx return address for syscall/sysret, C arg3
287 * rsi arg1 408 * rsi arg1
288 * rdx arg2 409 * rdx arg2
289 * r10 arg3 (--> moved to rcx for C) 410 * r10 arg3 (--> moved to rcx for C)
290 * r8 arg4 411 * r8 arg4
291 * r9 arg5 412 * r9 arg5
292 * r11 eflags for syscall/sysret, temporary for C 413 * r11 eflags for syscall/sysret, temporary for C
293 * r12-r15,rbp,rbx saved by C code, not touched. 414 * r12-r15,rbp,rbx saved by C code, not touched.
294 * 415 *
295 * Interrupts are off on entry. 416 * Interrupts are off on entry.
296 * Only called from user space. 417 * Only called from user space.
297 * 418 *
@@ -301,7 +422,7 @@ END(ret_from_fork)
301 * When user can change the frames always force IRET. That is because 422 * When user can change the frames always force IRET. That is because
302 * it deals with uncanonical addresses better. SYSRET has trouble 423 * it deals with uncanonical addresses better. SYSRET has trouble
303 * with them due to bugs in both AMD and Intel CPUs. 424 * with them due to bugs in both AMD and Intel CPUs.
304 */ 425 */
305 426
306ENTRY(system_call) 427ENTRY(system_call)
307 CFI_STARTPROC simple 428 CFI_STARTPROC simple
@@ -317,7 +438,7 @@ ENTRY(system_call)
317 */ 438 */
318ENTRY(system_call_after_swapgs) 439ENTRY(system_call_after_swapgs)
319 440
320 movq %rsp,%gs:pda_oldrsp 441 movq %rsp,%gs:pda_oldrsp
321 movq %gs:pda_kernelstack,%rsp 442 movq %gs:pda_kernelstack,%rsp
322 /* 443 /*
323 * No need to follow this irqs off/on section - it's straight 444 * No need to follow this irqs off/on section - it's straight
@@ -325,7 +446,7 @@ ENTRY(system_call_after_swapgs)
325 */ 446 */
326 ENABLE_INTERRUPTS(CLBR_NONE) 447 ENABLE_INTERRUPTS(CLBR_NONE)
327 SAVE_ARGS 8,1 448 SAVE_ARGS 8,1
328 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 449 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
329 movq %rcx,RIP-ARGOFFSET(%rsp) 450 movq %rcx,RIP-ARGOFFSET(%rsp)
330 CFI_REL_OFFSET rip,RIP-ARGOFFSET 451 CFI_REL_OFFSET rip,RIP-ARGOFFSET
331 GET_THREAD_INFO(%rcx) 452 GET_THREAD_INFO(%rcx)
@@ -339,19 +460,19 @@ system_call_fastpath:
339 movq %rax,RAX-ARGOFFSET(%rsp) 460 movq %rax,RAX-ARGOFFSET(%rsp)
340/* 461/*
341 * Syscall return path ending with SYSRET (fast path) 462 * Syscall return path ending with SYSRET (fast path)
342 * Has incomplete stack frame and undefined top of stack. 463 * Has incomplete stack frame and undefined top of stack.
343 */ 464 */
344ret_from_sys_call: 465ret_from_sys_call:
345 movl $_TIF_ALLWORK_MASK,%edi 466 movl $_TIF_ALLWORK_MASK,%edi
346 /* edi: flagmask */ 467 /* edi: flagmask */
347sysret_check: 468sysret_check:
348 LOCKDEP_SYS_EXIT 469 LOCKDEP_SYS_EXIT
349 GET_THREAD_INFO(%rcx) 470 GET_THREAD_INFO(%rcx)
350 DISABLE_INTERRUPTS(CLBR_NONE) 471 DISABLE_INTERRUPTS(CLBR_NONE)
351 TRACE_IRQS_OFF 472 TRACE_IRQS_OFF
352 movl TI_flags(%rcx),%edx 473 movl TI_flags(%rcx),%edx
353 andl %edi,%edx 474 andl %edi,%edx
354 jnz sysret_careful 475 jnz sysret_careful
355 CFI_REMEMBER_STATE 476 CFI_REMEMBER_STATE
356 /* 477 /*
357 * sysretq will re-enable interrupts: 478 * sysretq will re-enable interrupts:
@@ -366,7 +487,7 @@ sysret_check:
366 487
367 CFI_RESTORE_STATE 488 CFI_RESTORE_STATE
368 /* Handle reschedules */ 489 /* Handle reschedules */
369 /* edx: work, edi: workmask */ 490 /* edx: work, edi: workmask */
370sysret_careful: 491sysret_careful:
371 bt $TIF_NEED_RESCHED,%edx 492 bt $TIF_NEED_RESCHED,%edx
372 jnc sysret_signal 493 jnc sysret_signal
@@ -379,7 +500,7 @@ sysret_careful:
379 CFI_ADJUST_CFA_OFFSET -8 500 CFI_ADJUST_CFA_OFFSET -8
380 jmp sysret_check 501 jmp sysret_check
381 502
382 /* Handle a signal */ 503 /* Handle a signal */
383sysret_signal: 504sysret_signal:
384 TRACE_IRQS_ON 505 TRACE_IRQS_ON
385 ENABLE_INTERRUPTS(CLBR_NONE) 506 ENABLE_INTERRUPTS(CLBR_NONE)
@@ -388,17 +509,20 @@ sysret_signal:
388 jc sysret_audit 509 jc sysret_audit
389#endif 510#endif
390 /* edx: work flags (arg3) */ 511 /* edx: work flags (arg3) */
391 leaq do_notify_resume(%rip),%rax
392 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 512 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
393 xorl %esi,%esi # oldset -> arg2 513 xorl %esi,%esi # oldset -> arg2
394 call ptregscall_common 514 SAVE_REST
515 FIXUP_TOP_OF_STACK %r11
516 call do_notify_resume
517 RESTORE_TOP_OF_STACK %r11
518 RESTORE_REST
395 movl $_TIF_WORK_MASK,%edi 519 movl $_TIF_WORK_MASK,%edi
396 /* Use IRET because user could have changed frame. This 520 /* Use IRET because user could have changed frame. This
397 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ 521 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
398 DISABLE_INTERRUPTS(CLBR_NONE) 522 DISABLE_INTERRUPTS(CLBR_NONE)
399 TRACE_IRQS_OFF 523 TRACE_IRQS_OFF
400 jmp int_with_check 524 jmp int_with_check
401 525
402badsys: 526badsys:
403 movq $-ENOSYS,RAX-ARGOFFSET(%rsp) 527 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
404 jmp ret_from_sys_call 528 jmp ret_from_sys_call
@@ -437,7 +561,7 @@ sysret_audit:
437#endif /* CONFIG_AUDITSYSCALL */ 561#endif /* CONFIG_AUDITSYSCALL */
438 562
439 /* Do syscall tracing */ 563 /* Do syscall tracing */
440tracesys: 564tracesys:
441#ifdef CONFIG_AUDITSYSCALL 565#ifdef CONFIG_AUDITSYSCALL
442 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 566 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
443 jz auditsys 567 jz auditsys
@@ -460,8 +584,8 @@ tracesys:
460 call *sys_call_table(,%rax,8) 584 call *sys_call_table(,%rax,8)
461 movq %rax,RAX-ARGOFFSET(%rsp) 585 movq %rax,RAX-ARGOFFSET(%rsp)
462 /* Use IRET because user could have changed frame */ 586 /* Use IRET because user could have changed frame */
463 587
464/* 588/*
465 * Syscall return path ending with IRET. 589 * Syscall return path ending with IRET.
466 * Has correct top of stack, but partial stack frame. 590 * Has correct top of stack, but partial stack frame.
467 */ 591 */
@@ -505,18 +629,18 @@ int_very_careful:
505 TRACE_IRQS_ON 629 TRACE_IRQS_ON
506 ENABLE_INTERRUPTS(CLBR_NONE) 630 ENABLE_INTERRUPTS(CLBR_NONE)
507 SAVE_REST 631 SAVE_REST
508 /* Check for syscall exit trace */ 632 /* Check for syscall exit trace */
509 testl $_TIF_WORK_SYSCALL_EXIT,%edx 633 testl $_TIF_WORK_SYSCALL_EXIT,%edx
510 jz int_signal 634 jz int_signal
511 pushq %rdi 635 pushq %rdi
512 CFI_ADJUST_CFA_OFFSET 8 636 CFI_ADJUST_CFA_OFFSET 8
513 leaq 8(%rsp),%rdi # &ptregs -> arg1 637 leaq 8(%rsp),%rdi # &ptregs -> arg1
514 call syscall_trace_leave 638 call syscall_trace_leave
515 popq %rdi 639 popq %rdi
516 CFI_ADJUST_CFA_OFFSET -8 640 CFI_ADJUST_CFA_OFFSET -8
517 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi 641 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
518 jmp int_restore_rest 642 jmp int_restore_rest
519 643
520int_signal: 644int_signal:
521 testl $_TIF_DO_NOTIFY_MASK,%edx 645 testl $_TIF_DO_NOTIFY_MASK,%edx
522 jz 1f 646 jz 1f
@@ -531,22 +655,24 @@ int_restore_rest:
531 jmp int_with_check 655 jmp int_with_check
532 CFI_ENDPROC 656 CFI_ENDPROC
533END(system_call) 657END(system_call)
534 658
535/* 659/*
536 * Certain special system calls that need to save a complete full stack frame. 660 * Certain special system calls that need to save a complete full stack frame.
537 */ 661 */
538
539 .macro PTREGSCALL label,func,arg 662 .macro PTREGSCALL label,func,arg
540 .globl \label 663ENTRY(\label)
541\label: 664 PARTIAL_FRAME 1 8 /* offset 8: return address */
542 leaq \func(%rip),%rax 665 subq $REST_SKIP, %rsp
543 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ 666 CFI_ADJUST_CFA_OFFSET REST_SKIP
544 jmp ptregscall_common 667 call save_rest
668 DEFAULT_FRAME 0 8 /* offset 8: return address */
669 leaq 8(%rsp), \arg /* pt_regs pointer */
670 call \func
671 jmp ptregscall_common
672 CFI_ENDPROC
545END(\label) 673END(\label)
546 .endm 674 .endm
547 675
548 CFI_STARTPROC
549
550 PTREGSCALL stub_clone, sys_clone, %r8 676 PTREGSCALL stub_clone, sys_clone, %r8
551 PTREGSCALL stub_fork, sys_fork, %rdi 677 PTREGSCALL stub_fork, sys_fork, %rdi
552 PTREGSCALL stub_vfork, sys_vfork, %rdi 678 PTREGSCALL stub_vfork, sys_vfork, %rdi
@@ -554,25 +680,18 @@ END(\label)
554 PTREGSCALL stub_iopl, sys_iopl, %rsi 680 PTREGSCALL stub_iopl, sys_iopl, %rsi
555 681
556ENTRY(ptregscall_common) 682ENTRY(ptregscall_common)
557 popq %r11 683 DEFAULT_FRAME 1 8 /* offset 8: return address */
558 CFI_ADJUST_CFA_OFFSET -8 684 RESTORE_TOP_OF_STACK %r11, 8
559 CFI_REGISTER rip, r11 685 movq_cfi_restore R15+8, r15
560 SAVE_REST 686 movq_cfi_restore R14+8, r14
561 movq %r11, %r15 687 movq_cfi_restore R13+8, r13
562 CFI_REGISTER rip, r15 688 movq_cfi_restore R12+8, r12
563 FIXUP_TOP_OF_STACK %r11 689 movq_cfi_restore RBP+8, rbp
564 call *%rax 690 movq_cfi_restore RBX+8, rbx
565 RESTORE_TOP_OF_STACK %r11 691 ret $REST_SKIP /* pop extended registers */
566 movq %r15, %r11
567 CFI_REGISTER rip, r11
568 RESTORE_REST
569 pushq %r11
570 CFI_ADJUST_CFA_OFFSET 8
571 CFI_REL_OFFSET rip, 0
572 ret
573 CFI_ENDPROC 692 CFI_ENDPROC
574END(ptregscall_common) 693END(ptregscall_common)
575 694
576ENTRY(stub_execve) 695ENTRY(stub_execve)
577 CFI_STARTPROC 696 CFI_STARTPROC
578 popq %r11 697 popq %r11
@@ -588,11 +707,11 @@ ENTRY(stub_execve)
588 jmp int_ret_from_sys_call 707 jmp int_ret_from_sys_call
589 CFI_ENDPROC 708 CFI_ENDPROC
590END(stub_execve) 709END(stub_execve)
591 710
592/* 711/*
593 * sigreturn is special because it needs to restore all registers on return. 712 * sigreturn is special because it needs to restore all registers on return.
594 * This cannot be done with SYSRET, so use the IRET return path instead. 713 * This cannot be done with SYSRET, so use the IRET return path instead.
595 */ 714 */
596ENTRY(stub_rt_sigreturn) 715ENTRY(stub_rt_sigreturn)
597 CFI_STARTPROC 716 CFI_STARTPROC
598 addq $8, %rsp 717 addq $8, %rsp
@@ -608,70 +727,70 @@ ENTRY(stub_rt_sigreturn)
608END(stub_rt_sigreturn) 727END(stub_rt_sigreturn)
609 728
610/* 729/*
611 * initial frame state for interrupts and exceptions 730 * Build the entry stubs and pointer table with some assembler magic.
731 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
732 * single cache line on all modern x86 implementations.
612 */ 733 */
613 .macro _frame ref 734 .section .init.rodata,"a"
614 CFI_STARTPROC simple 735ENTRY(interrupt)
615 CFI_SIGNAL_FRAME 736 .text
616 CFI_DEF_CFA rsp,SS+8-\ref 737 .p2align 5
617 /*CFI_REL_OFFSET ss,SS-\ref*/ 738 .p2align CONFIG_X86_L1_CACHE_SHIFT
618 CFI_REL_OFFSET rsp,RSP-\ref 739ENTRY(irq_entries_start)
619 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ 740 INTR_FRAME
620 /*CFI_REL_OFFSET cs,CS-\ref*/ 741vector=FIRST_EXTERNAL_VECTOR
621 CFI_REL_OFFSET rip,RIP-\ref 742.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
622 .endm 743 .balign 32
744 .rept 7
745 .if vector < NR_VECTORS
746 .if vector <> FIRST_EXTERNAL_VECTOR
747 CFI_ADJUST_CFA_OFFSET -8
748 .endif
7491: pushq $(~vector+0x80) /* Note: always in signed byte range */
750 CFI_ADJUST_CFA_OFFSET 8
751 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
752 jmp 2f
753 .endif
754 .previous
755 .quad 1b
756 .text
757vector=vector+1
758 .endif
759 .endr
7602: jmp common_interrupt
761.endr
762 CFI_ENDPROC
763END(irq_entries_start)
623 764
624/* initial frame state for interrupts (and exceptions without error code) */ 765.previous
625#define INTR_FRAME _frame RIP 766END(interrupt)
626/* initial frame state for exceptions with error code (and interrupts with 767.previous
627 vector already pushed) */
628#define XCPT_FRAME _frame ORIG_RAX
629 768
630/* 769/*
631 * Interrupt entry/exit. 770 * Interrupt entry/exit.
632 * 771 *
633 * Interrupt entry points save only callee clobbered registers in fast path. 772 * Interrupt entry points save only callee clobbered registers in fast path.
634 * 773 *
635 * Entry runs with interrupts off. 774 * Entry runs with interrupts off.
636 */ 775 */
637 776
638/* 0(%rsp): interrupt number */ 777/* 0(%rsp): ~(interrupt number) */
639 .macro interrupt func 778 .macro interrupt func
640 cld 779 subq $10*8, %rsp
641 SAVE_ARGS 780 CFI_ADJUST_CFA_OFFSET 10*8
642 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler 781 call save_args
643 pushq %rbp 782 PARTIAL_FRAME 0
644 /*
645 * Save rbp twice: One is for marking the stack frame, as usual, and the
646 * other, to fill pt_regs properly. This is because bx comes right
647 * before the last saved register in that structure, and not bp. If the
648 * base pointer were in the place bx is today, this would not be needed.
649 */
650 movq %rbp, -8(%rsp)
651 CFI_ADJUST_CFA_OFFSET 8
652 CFI_REL_OFFSET rbp, 0
653 movq %rsp,%rbp
654 CFI_DEF_CFA_REGISTER rbp
655 testl $3,CS(%rdi)
656 je 1f
657 SWAPGS
658 /* irqcount is used to check if a CPU is already on an interrupt
659 stack or not. While this is essentially redundant with preempt_count
660 it is a little cheaper to use a separate counter in the PDA
661 (short of moving irq_enter into assembly, which would be too
662 much work) */
6631: incl %gs:pda_irqcount
664 cmoveq %gs:pda_irqstackptr,%rsp
665 push %rbp # backlink for old unwinder
666 /*
667 * We entered an interrupt context - irqs are off:
668 */
669 TRACE_IRQS_OFF
670 call \func 783 call \func
671 .endm 784 .endm
672 785
673ENTRY(common_interrupt) 786 /*
787 * The interrupt stubs push (~vector+0x80) onto the stack and
788 * then jump to common_interrupt.
789 */
790 .p2align CONFIG_X86_L1_CACHE_SHIFT
791common_interrupt:
674 XCPT_FRAME 792 XCPT_FRAME
793 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
675 interrupt do_IRQ 794 interrupt do_IRQ
676 /* 0(%rsp): oldrsp-ARGOFFSET */ 795 /* 0(%rsp): oldrsp-ARGOFFSET */
677ret_from_intr: 796ret_from_intr:
@@ -685,12 +804,12 @@ exit_intr:
685 GET_THREAD_INFO(%rcx) 804 GET_THREAD_INFO(%rcx)
686 testl $3,CS-ARGOFFSET(%rsp) 805 testl $3,CS-ARGOFFSET(%rsp)
687 je retint_kernel 806 je retint_kernel
688 807
689 /* Interrupt came from user space */ 808 /* Interrupt came from user space */
690 /* 809 /*
691 * Has a correct top of stack, but a partial stack frame 810 * Has a correct top of stack, but a partial stack frame
692 * %rcx: thread info. Interrupts off. 811 * %rcx: thread info. Interrupts off.
693 */ 812 */
694retint_with_reschedule: 813retint_with_reschedule:
695 movl $_TIF_WORK_MASK,%edi 814 movl $_TIF_WORK_MASK,%edi
696retint_check: 815retint_check:
@@ -763,20 +882,20 @@ retint_careful:
763 pushq %rdi 882 pushq %rdi
764 CFI_ADJUST_CFA_OFFSET 8 883 CFI_ADJUST_CFA_OFFSET 8
765 call schedule 884 call schedule
766 popq %rdi 885 popq %rdi
767 CFI_ADJUST_CFA_OFFSET -8 886 CFI_ADJUST_CFA_OFFSET -8
768 GET_THREAD_INFO(%rcx) 887 GET_THREAD_INFO(%rcx)
769 DISABLE_INTERRUPTS(CLBR_NONE) 888 DISABLE_INTERRUPTS(CLBR_NONE)
770 TRACE_IRQS_OFF 889 TRACE_IRQS_OFF
771 jmp retint_check 890 jmp retint_check
772 891
773retint_signal: 892retint_signal:
774 testl $_TIF_DO_NOTIFY_MASK,%edx 893 testl $_TIF_DO_NOTIFY_MASK,%edx
775 jz retint_swapgs 894 jz retint_swapgs
776 TRACE_IRQS_ON 895 TRACE_IRQS_ON
777 ENABLE_INTERRUPTS(CLBR_NONE) 896 ENABLE_INTERRUPTS(CLBR_NONE)
778 SAVE_REST 897 SAVE_REST
779 movq $-1,ORIG_RAX(%rsp) 898 movq $-1,ORIG_RAX(%rsp)
780 xorl %esi,%esi # oldset 899 xorl %esi,%esi # oldset
781 movq %rsp,%rdi # &pt_regs 900 movq %rsp,%rdi # &pt_regs
782 call do_notify_resume 901 call do_notify_resume
@@ -798,324 +917,211 @@ ENTRY(retint_kernel)
798 jnc retint_restore_args 917 jnc retint_restore_args
799 call preempt_schedule_irq 918 call preempt_schedule_irq
800 jmp exit_intr 919 jmp exit_intr
801#endif 920#endif
802 921
803 CFI_ENDPROC 922 CFI_ENDPROC
804END(common_interrupt) 923END(common_interrupt)
805 924
806/* 925/*
807 * APIC interrupts. 926 * APIC interrupts.
808 */ 927 */
809 .macro apicinterrupt num,func 928.macro apicinterrupt num sym do_sym
929ENTRY(\sym)
810 INTR_FRAME 930 INTR_FRAME
811 pushq $~(\num) 931 pushq $~(\num)
812 CFI_ADJUST_CFA_OFFSET 8 932 CFI_ADJUST_CFA_OFFSET 8
813 interrupt \func 933 interrupt \do_sym
814 jmp ret_from_intr 934 jmp ret_from_intr
815 CFI_ENDPROC 935 CFI_ENDPROC
816 .endm 936END(\sym)
817 937.endm
818ENTRY(thermal_interrupt)
819 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
820END(thermal_interrupt)
821
822ENTRY(threshold_interrupt)
823 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
824END(threshold_interrupt)
825
826#ifdef CONFIG_SMP
827ENTRY(reschedule_interrupt)
828 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
829END(reschedule_interrupt)
830
831 .macro INVALIDATE_ENTRY num
832ENTRY(invalidate_interrupt\num)
833 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
834END(invalidate_interrupt\num)
835 .endm
836 938
837 INVALIDATE_ENTRY 0 939#ifdef CONFIG_SMP
838 INVALIDATE_ENTRY 1 940apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
839 INVALIDATE_ENTRY 2 941 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
840 INVALIDATE_ENTRY 3
841 INVALIDATE_ENTRY 4
842 INVALIDATE_ENTRY 5
843 INVALIDATE_ENTRY 6
844 INVALIDATE_ENTRY 7
845
846ENTRY(call_function_interrupt)
847 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
848END(call_function_interrupt)
849ENTRY(call_function_single_interrupt)
850 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
851END(call_function_single_interrupt)
852ENTRY(irq_move_cleanup_interrupt)
853 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
854END(irq_move_cleanup_interrupt)
855#endif 942#endif
856 943
857ENTRY(apic_timer_interrupt) 944apicinterrupt UV_BAU_MESSAGE \
858 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt 945 uv_bau_message_intr1 uv_bau_message_interrupt
859END(apic_timer_interrupt) 946apicinterrupt LOCAL_TIMER_VECTOR \
947 apic_timer_interrupt smp_apic_timer_interrupt
948
949#ifdef CONFIG_SMP
950apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
951 invalidate_interrupt0 smp_invalidate_interrupt
952apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
953 invalidate_interrupt1 smp_invalidate_interrupt
954apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
955 invalidate_interrupt2 smp_invalidate_interrupt
956apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
957 invalidate_interrupt3 smp_invalidate_interrupt
958apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
959 invalidate_interrupt4 smp_invalidate_interrupt
960apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
961 invalidate_interrupt5 smp_invalidate_interrupt
962apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
963 invalidate_interrupt6 smp_invalidate_interrupt
964apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
965 invalidate_interrupt7 smp_invalidate_interrupt
966#endif
860 967
861ENTRY(uv_bau_message_intr1) 968apicinterrupt THRESHOLD_APIC_VECTOR \
862 apicinterrupt 220,uv_bau_message_interrupt 969 threshold_interrupt mce_threshold_interrupt
863END(uv_bau_message_intr1) 970apicinterrupt THERMAL_APIC_VECTOR \
971 thermal_interrupt smp_thermal_interrupt
972
973#ifdef CONFIG_SMP
974apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
975 call_function_single_interrupt smp_call_function_single_interrupt
976apicinterrupt CALL_FUNCTION_VECTOR \
977 call_function_interrupt smp_call_function_interrupt
978apicinterrupt RESCHEDULE_VECTOR \
979 reschedule_interrupt smp_reschedule_interrupt
980#endif
864 981
865ENTRY(error_interrupt) 982apicinterrupt ERROR_APIC_VECTOR \
866 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt 983 error_interrupt smp_error_interrupt
867END(error_interrupt) 984apicinterrupt SPURIOUS_APIC_VECTOR \
985 spurious_interrupt smp_spurious_interrupt
868 986
869ENTRY(spurious_interrupt)
870 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
871END(spurious_interrupt)
872
873/* 987/*
874 * Exception entry points. 988 * Exception entry points.
875 */ 989 */
876 .macro zeroentry sym 990.macro zeroentry sym do_sym
991ENTRY(\sym)
877 INTR_FRAME 992 INTR_FRAME
878 PARAVIRT_ADJUST_EXCEPTION_FRAME 993 PARAVIRT_ADJUST_EXCEPTION_FRAME
879 pushq $0 /* push error code/oldrax */ 994 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
880 CFI_ADJUST_CFA_OFFSET 8 995 subq $15*8,%rsp
881 pushq %rax /* push real oldrax to the rdi slot */ 996 CFI_ADJUST_CFA_OFFSET 15*8
882 CFI_ADJUST_CFA_OFFSET 8 997 call error_entry
883 CFI_REL_OFFSET rax,0 998 DEFAULT_FRAME 0
884 leaq \sym(%rip),%rax 999 movq %rsp,%rdi /* pt_regs pointer */
885 jmp error_entry 1000 xorl %esi,%esi /* no error code */
1001 call \do_sym
1002 jmp error_exit /* %ebx: no swapgs flag */
886 CFI_ENDPROC 1003 CFI_ENDPROC
887 .endm 1004END(\sym)
1005.endm
888 1006
889 .macro errorentry sym 1007.macro paranoidzeroentry sym do_sym
890 XCPT_FRAME 1008ENTRY(\sym)
1009 INTR_FRAME
891 PARAVIRT_ADJUST_EXCEPTION_FRAME 1010 PARAVIRT_ADJUST_EXCEPTION_FRAME
892 pushq %rax 1011 pushq $-1 /* ORIG_RAX: no syscall to restart */
893 CFI_ADJUST_CFA_OFFSET 8 1012 CFI_ADJUST_CFA_OFFSET 8
894 CFI_REL_OFFSET rax,0 1013 subq $15*8, %rsp
895 leaq \sym(%rip),%rax 1014 call save_paranoid
896 jmp error_entry 1015 TRACE_IRQS_OFF
1016 movq %rsp,%rdi /* pt_regs pointer */
1017 xorl %esi,%esi /* no error code */
1018 call \do_sym
1019 jmp paranoid_exit /* %ebx: no swapgs flag */
897 CFI_ENDPROC 1020 CFI_ENDPROC
898 .endm 1021END(\sym)
1022.endm
899 1023
900 /* error code is on the stack already */ 1024.macro paranoidzeroentry_ist sym do_sym ist
901 /* handle NMI like exceptions that can happen everywhere */ 1025ENTRY(\sym)
902 .macro paranoidentry sym, ist=0, irqtrace=1 1026 INTR_FRAME
903 SAVE_ALL 1027 PARAVIRT_ADJUST_EXCEPTION_FRAME
904 cld 1028 pushq $-1 /* ORIG_RAX: no syscall to restart */
905 movl $1,%ebx 1029 CFI_ADJUST_CFA_OFFSET 8
906 movl $MSR_GS_BASE,%ecx 1030 subq $15*8, %rsp
907 rdmsr 1031 call save_paranoid
908 testl %edx,%edx
909 js 1f
910 SWAPGS
911 xorl %ebx,%ebx
9121:
913 .if \ist
914 movq %gs:pda_data_offset, %rbp
915 .endif
916 .if \irqtrace
917 TRACE_IRQS_OFF
918 .endif
919 movq %rsp,%rdi
920 movq ORIG_RAX(%rsp),%rsi
921 movq $-1,ORIG_RAX(%rsp)
922 .if \ist
923 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
924 .endif
925 call \sym
926 .if \ist
927 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
928 .endif
929 DISABLE_INTERRUPTS(CLBR_NONE)
930 .if \irqtrace
931 TRACE_IRQS_OFF 1032 TRACE_IRQS_OFF
932 .endif 1033 movq %rsp,%rdi /* pt_regs pointer */
933 .endm 1034 xorl %esi,%esi /* no error code */
1035 movq %gs:pda_data_offset, %rbp
1036 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
1037 call \do_sym
1038 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
1039 jmp paranoid_exit /* %ebx: no swapgs flag */
1040 CFI_ENDPROC
1041END(\sym)
1042.endm
934 1043
935 /* 1044.macro errorentry sym do_sym
936 * "Paranoid" exit path from exception stack. 1045ENTRY(\sym)
937 * Paranoid because this is used by NMIs and cannot take 1046 XCPT_FRAME
938 * any kernel state for granted. 1047 PARAVIRT_ADJUST_EXCEPTION_FRAME
939 * We don't do kernel preemption checks here, because only 1048 subq $15*8,%rsp
940 * NMI should be common and it does not enable IRQs and 1049 CFI_ADJUST_CFA_OFFSET 15*8
941 * cannot get reschedule ticks. 1050 call error_entry
942 * 1051 DEFAULT_FRAME 0
943 * "trace" is 0 for the NMI handler only, because irq-tracing 1052 movq %rsp,%rdi /* pt_regs pointer */
944 * is fundamentally NMI-unsafe. (we cannot change the soft and 1053 movq ORIG_RAX(%rsp),%rsi /* get error code */
945 * hard flags at once, atomically) 1054 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
946 */ 1055 call \do_sym
947 .macro paranoidexit trace=1 1056 jmp error_exit /* %ebx: no swapgs flag */
948 /* ebx: no swapgs flag */
949paranoid_exit\trace:
950 testl %ebx,%ebx /* swapgs needed? */
951 jnz paranoid_restore\trace
952 testl $3,CS(%rsp)
953 jnz paranoid_userspace\trace
954paranoid_swapgs\trace:
955 .if \trace
956 TRACE_IRQS_IRETQ 0
957 .endif
958 SWAPGS_UNSAFE_STACK
959paranoid_restore\trace:
960 RESTORE_ALL 8
961 jmp irq_return
962paranoid_userspace\trace:
963 GET_THREAD_INFO(%rcx)
964 movl TI_flags(%rcx),%ebx
965 andl $_TIF_WORK_MASK,%ebx
966 jz paranoid_swapgs\trace
967 movq %rsp,%rdi /* &pt_regs */
968 call sync_regs
969 movq %rax,%rsp /* switch stack for scheduling */
970 testl $_TIF_NEED_RESCHED,%ebx
971 jnz paranoid_schedule\trace
972 movl %ebx,%edx /* arg3: thread flags */
973 .if \trace
974 TRACE_IRQS_ON
975 .endif
976 ENABLE_INTERRUPTS(CLBR_NONE)
977 xorl %esi,%esi /* arg2: oldset */
978 movq %rsp,%rdi /* arg1: &pt_regs */
979 call do_notify_resume
980 DISABLE_INTERRUPTS(CLBR_NONE)
981 .if \trace
982 TRACE_IRQS_OFF
983 .endif
984 jmp paranoid_userspace\trace
985paranoid_schedule\trace:
986 .if \trace
987 TRACE_IRQS_ON
988 .endif
989 ENABLE_INTERRUPTS(CLBR_ANY)
990 call schedule
991 DISABLE_INTERRUPTS(CLBR_ANY)
992 .if \trace
993 TRACE_IRQS_OFF
994 .endif
995 jmp paranoid_userspace\trace
996 CFI_ENDPROC 1057 CFI_ENDPROC
997 .endm 1058END(\sym)
1059.endm
998 1060
999/* 1061 /* error code is on the stack already */
1000 * Exception entry point. This expects an error code/orig_rax on the stack 1062.macro paranoiderrorentry sym do_sym
1001 * and the exception handler in %rax. 1063ENTRY(\sym)
1002 */ 1064 XCPT_FRAME
1003KPROBE_ENTRY(error_entry) 1065 PARAVIRT_ADJUST_EXCEPTION_FRAME
1004 _frame RDI 1066 subq $15*8,%rsp
1005 CFI_REL_OFFSET rax,0 1067 CFI_ADJUST_CFA_OFFSET 15*8
1006 /* rdi slot contains rax, oldrax contains error code */ 1068 call save_paranoid
1007 cld 1069 DEFAULT_FRAME 0
1008 subq $14*8,%rsp
1009 CFI_ADJUST_CFA_OFFSET (14*8)
1010 movq %rsi,13*8(%rsp)
1011 CFI_REL_OFFSET rsi,RSI
1012 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
1013 CFI_REGISTER rax,rsi
1014 movq %rdx,12*8(%rsp)
1015 CFI_REL_OFFSET rdx,RDX
1016 movq %rcx,11*8(%rsp)
1017 CFI_REL_OFFSET rcx,RCX
1018 movq %rsi,10*8(%rsp) /* store rax */
1019 CFI_REL_OFFSET rax,RAX
1020 movq %r8, 9*8(%rsp)
1021 CFI_REL_OFFSET r8,R8
1022 movq %r9, 8*8(%rsp)
1023 CFI_REL_OFFSET r9,R9
1024 movq %r10,7*8(%rsp)
1025 CFI_REL_OFFSET r10,R10
1026 movq %r11,6*8(%rsp)
1027 CFI_REL_OFFSET r11,R11
1028 movq %rbx,5*8(%rsp)
1029 CFI_REL_OFFSET rbx,RBX
1030 movq %rbp,4*8(%rsp)
1031 CFI_REL_OFFSET rbp,RBP
1032 movq %r12,3*8(%rsp)
1033 CFI_REL_OFFSET r12,R12
1034 movq %r13,2*8(%rsp)
1035 CFI_REL_OFFSET r13,R13
1036 movq %r14,1*8(%rsp)
1037 CFI_REL_OFFSET r14,R14
1038 movq %r15,(%rsp)
1039 CFI_REL_OFFSET r15,R15
1040 xorl %ebx,%ebx
1041 testl $3,CS(%rsp)
1042 je error_kernelspace
1043error_swapgs:
1044 SWAPGS
1045error_sti:
1046 TRACE_IRQS_OFF
1047 movq %rdi,RDI(%rsp)
1048 CFI_REL_OFFSET rdi,RDI
1049 movq %rsp,%rdi
1050 movq ORIG_RAX(%rsp),%rsi /* get error code */
1051 movq $-1,ORIG_RAX(%rsp)
1052 call *%rax
1053 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1054error_exit:
1055 movl %ebx,%eax
1056 RESTORE_REST
1057 DISABLE_INTERRUPTS(CLBR_NONE)
1058 TRACE_IRQS_OFF 1070 TRACE_IRQS_OFF
1059 GET_THREAD_INFO(%rcx) 1071 movq %rsp,%rdi /* pt_regs pointer */
1060 testl %eax,%eax 1072 movq ORIG_RAX(%rsp),%rsi /* get error code */
1061 jne retint_kernel 1073 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
1062 LOCKDEP_SYS_EXIT_IRQ 1074 call \do_sym
1063 movl TI_flags(%rcx),%edx 1075 jmp paranoid_exit /* %ebx: no swapgs flag */
1064 movl $_TIF_WORK_MASK,%edi
1065 andl %edi,%edx
1066 jnz retint_careful
1067 jmp retint_swapgs
1068 CFI_ENDPROC 1076 CFI_ENDPROC
1077END(\sym)
1078.endm
1069 1079
1070error_kernelspace: 1080zeroentry divide_error do_divide_error
1071 incl %ebx 1081zeroentry overflow do_overflow
1072 /* There are two places in the kernel that can potentially fault with 1082zeroentry bounds do_bounds
1073 usergs. Handle them here. The exception handlers after 1083zeroentry invalid_op do_invalid_op
1074 iret run with kernel gs again, so don't set the user space flag. 1084zeroentry device_not_available do_device_not_available
1075 B stepping K8s sometimes report an truncated RIP for IRET 1085paranoiderrorentry double_fault do_double_fault
1076 exceptions returning to compat mode. Check for these here too. */ 1086zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
1077 leaq irq_return(%rip),%rcx 1087errorentry invalid_TSS do_invalid_TSS
1078 cmpq %rcx,RIP(%rsp) 1088errorentry segment_not_present do_segment_not_present
1079 je error_swapgs 1089zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
1080 movl %ecx,%ecx /* zero extend */ 1090zeroentry coprocessor_error do_coprocessor_error
1081 cmpq %rcx,RIP(%rsp) 1091errorentry alignment_check do_alignment_check
1082 je error_swapgs 1092zeroentry simd_coprocessor_error do_simd_coprocessor_error
1083 cmpq $gs_change,RIP(%rsp) 1093
1084 je error_swapgs 1094 /* Reload gs selector with exception handling */
1085 jmp error_sti 1095 /* edi: new selector */
1086KPROBE_END(error_entry)
1087
1088 /* Reload gs selector with exception handling */
1089 /* edi: new selector */
1090ENTRY(native_load_gs_index) 1096ENTRY(native_load_gs_index)
1091 CFI_STARTPROC 1097 CFI_STARTPROC
1092 pushf 1098 pushf
1093 CFI_ADJUST_CFA_OFFSET 8 1099 CFI_ADJUST_CFA_OFFSET 8
1094 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) 1100 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1095 SWAPGS 1101 SWAPGS
1096gs_change: 1102gs_change:
1097 movl %edi,%gs 1103 movl %edi,%gs
10982: mfence /* workaround */ 11042: mfence /* workaround */
1099 SWAPGS 1105 SWAPGS
1100 popf 1106 popf
1101 CFI_ADJUST_CFA_OFFSET -8 1107 CFI_ADJUST_CFA_OFFSET -8
1102 ret 1108 ret
1103 CFI_ENDPROC 1109 CFI_ENDPROC
1104ENDPROC(native_load_gs_index) 1110END(native_load_gs_index)
1105 1111
1106 .section __ex_table,"a" 1112 .section __ex_table,"a"
1107 .align 8 1113 .align 8
1108 .quad gs_change,bad_gs 1114 .quad gs_change,bad_gs
1109 .previous 1115 .previous
1110 .section .fixup,"ax" 1116 .section .fixup,"ax"
1111 /* running with kernelgs */ 1117 /* running with kernelgs */
1112bad_gs: 1118bad_gs:
1113 SWAPGS /* switch back to user gs */ 1119 SWAPGS /* switch back to user gs */
1114 xorl %eax,%eax 1120 xorl %eax,%eax
1115 movl %eax,%gs 1121 movl %eax,%gs
1116 jmp 2b 1122 jmp 2b
1117 .previous 1123 .previous
1118 1124
1119/* 1125/*
1120 * Create a kernel thread. 1126 * Create a kernel thread.
1121 * 1127 *
@@ -1138,7 +1144,7 @@ ENTRY(kernel_thread)
1138 1144
1139 xorl %r8d,%r8d 1145 xorl %r8d,%r8d
1140 xorl %r9d,%r9d 1146 xorl %r9d,%r9d
1141 1147
1142 # clone now 1148 # clone now
1143 call do_fork 1149 call do_fork
1144 movq %rax,RAX(%rsp) 1150 movq %rax,RAX(%rsp)
@@ -1149,15 +1155,15 @@ ENTRY(kernel_thread)
1149 * so internally to the x86_64 port you can rely on kernel_thread() 1155 * so internally to the x86_64 port you can rely on kernel_thread()
1150 * not to reschedule the child before returning, this avoids the need 1156 * not to reschedule the child before returning, this avoids the need
1151 * of hacks for example to fork off the per-CPU idle tasks. 1157 * of hacks for example to fork off the per-CPU idle tasks.
1152 * [Hopefully no generic code relies on the reschedule -AK] 1158 * [Hopefully no generic code relies on the reschedule -AK]
1153 */ 1159 */
1154 RESTORE_ALL 1160 RESTORE_ALL
1155 UNFAKE_STACK_FRAME 1161 UNFAKE_STACK_FRAME
1156 ret 1162 ret
1157 CFI_ENDPROC 1163 CFI_ENDPROC
1158ENDPROC(kernel_thread) 1164END(kernel_thread)
1159 1165
1160child_rip: 1166ENTRY(child_rip)
1161 pushq $0 # fake return address 1167 pushq $0 # fake return address
1162 CFI_STARTPROC 1168 CFI_STARTPROC
1163 /* 1169 /*
@@ -1170,8 +1176,9 @@ child_rip:
1170 # exit 1176 # exit
1171 mov %eax, %edi 1177 mov %eax, %edi
1172 call do_exit 1178 call do_exit
1179 ud2 # padding for call trace
1173 CFI_ENDPROC 1180 CFI_ENDPROC
1174ENDPROC(child_rip) 1181END(child_rip)
1175 1182
1176/* 1183/*
1177 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. 1184 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -1191,10 +1198,10 @@ ENDPROC(child_rip)
1191ENTRY(kernel_execve) 1198ENTRY(kernel_execve)
1192 CFI_STARTPROC 1199 CFI_STARTPROC
1193 FAKE_STACK_FRAME $0 1200 FAKE_STACK_FRAME $0
1194 SAVE_ALL 1201 SAVE_ALL
1195 movq %rsp,%rcx 1202 movq %rsp,%rcx
1196 call sys_execve 1203 call sys_execve
1197 movq %rax, RAX(%rsp) 1204 movq %rax, RAX(%rsp)
1198 RESTORE_REST 1205 RESTORE_REST
1199 testq %rax,%rax 1206 testq %rax,%rax
1200 je int_ret_from_sys_call 1207 je int_ret_from_sys_call
@@ -1202,129 +1209,7 @@ ENTRY(kernel_execve)
1202 UNFAKE_STACK_FRAME 1209 UNFAKE_STACK_FRAME
1203 ret 1210 ret
1204 CFI_ENDPROC 1211 CFI_ENDPROC
1205ENDPROC(kernel_execve) 1212END(kernel_execve)
1206
1207KPROBE_ENTRY(page_fault)
1208 errorentry do_page_fault
1209KPROBE_END(page_fault)
1210
1211ENTRY(coprocessor_error)
1212 zeroentry do_coprocessor_error
1213END(coprocessor_error)
1214
1215ENTRY(simd_coprocessor_error)
1216 zeroentry do_simd_coprocessor_error
1217END(simd_coprocessor_error)
1218
1219ENTRY(device_not_available)
1220 zeroentry do_device_not_available
1221END(device_not_available)
1222
1223 /* runs on exception stack */
1224KPROBE_ENTRY(debug)
1225 INTR_FRAME
1226 PARAVIRT_ADJUST_EXCEPTION_FRAME
1227 pushq $0
1228 CFI_ADJUST_CFA_OFFSET 8
1229 paranoidentry do_debug, DEBUG_STACK
1230 paranoidexit
1231KPROBE_END(debug)
1232
1233 /* runs on exception stack */
1234KPROBE_ENTRY(nmi)
1235 INTR_FRAME
1236 PARAVIRT_ADJUST_EXCEPTION_FRAME
1237 pushq $-1
1238 CFI_ADJUST_CFA_OFFSET 8
1239 paranoidentry do_nmi, 0, 0
1240#ifdef CONFIG_TRACE_IRQFLAGS
1241 paranoidexit 0
1242#else
1243 jmp paranoid_exit1
1244 CFI_ENDPROC
1245#endif
1246KPROBE_END(nmi)
1247
1248KPROBE_ENTRY(int3)
1249 INTR_FRAME
1250 PARAVIRT_ADJUST_EXCEPTION_FRAME
1251 pushq $0
1252 CFI_ADJUST_CFA_OFFSET 8
1253 paranoidentry do_int3, DEBUG_STACK
1254 jmp paranoid_exit1
1255 CFI_ENDPROC
1256KPROBE_END(int3)
1257
1258ENTRY(overflow)
1259 zeroentry do_overflow
1260END(overflow)
1261
1262ENTRY(bounds)
1263 zeroentry do_bounds
1264END(bounds)
1265
1266ENTRY(invalid_op)
1267 zeroentry do_invalid_op
1268END(invalid_op)
1269
1270ENTRY(coprocessor_segment_overrun)
1271 zeroentry do_coprocessor_segment_overrun
1272END(coprocessor_segment_overrun)
1273
1274 /* runs on exception stack */
1275ENTRY(double_fault)
1276 XCPT_FRAME
1277 PARAVIRT_ADJUST_EXCEPTION_FRAME
1278 paranoidentry do_double_fault
1279 jmp paranoid_exit1
1280 CFI_ENDPROC
1281END(double_fault)
1282
1283ENTRY(invalid_TSS)
1284 errorentry do_invalid_TSS
1285END(invalid_TSS)
1286
1287ENTRY(segment_not_present)
1288 errorentry do_segment_not_present
1289END(segment_not_present)
1290
1291 /* runs on exception stack */
1292ENTRY(stack_segment)
1293 XCPT_FRAME
1294 PARAVIRT_ADJUST_EXCEPTION_FRAME
1295 paranoidentry do_stack_segment
1296 jmp paranoid_exit1
1297 CFI_ENDPROC
1298END(stack_segment)
1299
1300KPROBE_ENTRY(general_protection)
1301 errorentry do_general_protection
1302KPROBE_END(general_protection)
1303
1304ENTRY(alignment_check)
1305 errorentry do_alignment_check
1306END(alignment_check)
1307
1308ENTRY(divide_error)
1309 zeroentry do_divide_error
1310END(divide_error)
1311
1312ENTRY(spurious_interrupt_bug)
1313 zeroentry do_spurious_interrupt_bug
1314END(spurious_interrupt_bug)
1315
1316#ifdef CONFIG_X86_MCE
1317 /* runs on exception stack */
1318ENTRY(machine_check)
1319 INTR_FRAME
1320 PARAVIRT_ADJUST_EXCEPTION_FRAME
1321 pushq $0
1322 CFI_ADJUST_CFA_OFFSET 8
1323 paranoidentry do_machine_check
1324 jmp paranoid_exit1
1325 CFI_ENDPROC
1326END(machine_check)
1327#endif
1328 1213
1329/* Call softirq on interrupt stack. Interrupts are off. */ 1214/* Call softirq on interrupt stack. Interrupts are off. */
1330ENTRY(call_softirq) 1215ENTRY(call_softirq)
@@ -1344,40 +1229,33 @@ ENTRY(call_softirq)
1344 decl %gs:pda_irqcount 1229 decl %gs:pda_irqcount
1345 ret 1230 ret
1346 CFI_ENDPROC 1231 CFI_ENDPROC
1347ENDPROC(call_softirq) 1232END(call_softirq)
1348
1349KPROBE_ENTRY(ignore_sysret)
1350 CFI_STARTPROC
1351 mov $-ENOSYS,%eax
1352 sysret
1353 CFI_ENDPROC
1354ENDPROC(ignore_sysret)
1355 1233
1356#ifdef CONFIG_XEN 1234#ifdef CONFIG_XEN
1357ENTRY(xen_hypervisor_callback) 1235zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
1358 zeroentry xen_do_hypervisor_callback
1359END(xen_hypervisor_callback)
1360 1236
1361/* 1237/*
1362# A note on the "critical region" in our callback handler. 1238 * A note on the "critical region" in our callback handler.
1363# We want to avoid stacking callback handlers due to events occurring 1239 * We want to avoid stacking callback handlers due to events occurring
1364# during handling of the last event. To do this, we keep events disabled 1240 * during handling of the last event. To do this, we keep events disabled
1365# until we've done all processing. HOWEVER, we must enable events before 1241 * until we've done all processing. HOWEVER, we must enable events before
1366# popping the stack frame (can't be done atomically) and so it would still 1242 * popping the stack frame (can't be done atomically) and so it would still
1367# be possible to get enough handler activations to overflow the stack. 1243 * be possible to get enough handler activations to overflow the stack.
1368# Although unlikely, bugs of that kind are hard to track down, so we'd 1244 * Although unlikely, bugs of that kind are hard to track down, so we'd
1369# like to avoid the possibility. 1245 * like to avoid the possibility.
1370# So, on entry to the handler we detect whether we interrupted an 1246 * So, on entry to the handler we detect whether we interrupted an
1371# existing activation in its critical region -- if so, we pop the current 1247 * existing activation in its critical region -- if so, we pop the current
1372# activation and restart the handler using the previous one. 1248 * activation and restart the handler using the previous one.
1373*/ 1249 */
1374ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) 1250ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1375 CFI_STARTPROC 1251 CFI_STARTPROC
1376/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will 1252/*
1377 see the correct pointer to the pt_regs */ 1253 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1254 * see the correct pointer to the pt_regs
1255 */
1378 movq %rdi, %rsp # we don't return, adjust the stack frame 1256 movq %rdi, %rsp # we don't return, adjust the stack frame
1379 CFI_ENDPROC 1257 CFI_ENDPROC
1380 CFI_DEFAULT_STACK 1258 DEFAULT_FRAME
138111: incl %gs:pda_irqcount 125911: incl %gs:pda_irqcount
1382 movq %rsp,%rbp 1260 movq %rsp,%rbp
1383 CFI_DEF_CFA_REGISTER rbp 1261 CFI_DEF_CFA_REGISTER rbp
@@ -1392,23 +1270,26 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1392END(do_hypervisor_callback) 1270END(do_hypervisor_callback)
1393 1271
1394/* 1272/*
1395# Hypervisor uses this for application faults while it executes. 1273 * Hypervisor uses this for application faults while it executes.
1396# We get here for two reasons: 1274 * We get here for two reasons:
1397# 1. Fault while reloading DS, ES, FS or GS 1275 * 1. Fault while reloading DS, ES, FS or GS
1398# 2. Fault while executing IRET 1276 * 2. Fault while executing IRET
1399# Category 1 we do not need to fix up as Xen has already reloaded all segment 1277 * Category 1 we do not need to fix up as Xen has already reloaded all segment
1400# registers that could be reloaded and zeroed the others. 1278 * registers that could be reloaded and zeroed the others.
1401# Category 2 we fix up by killing the current process. We cannot use the 1279 * Category 2 we fix up by killing the current process. We cannot use the
1402# normal Linux return path in this case because if we use the IRET hypercall 1280 * normal Linux return path in this case because if we use the IRET hypercall
1403# to pop the stack frame we end up in an infinite loop of failsafe callbacks. 1281 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1404# We distinguish between categories by comparing each saved segment register 1282 * We distinguish between categories by comparing each saved segment register
1405# with its current contents: any discrepancy means we in category 1. 1283 * with its current contents: any discrepancy means we in category 1.
1406*/ 1284 */
1407ENTRY(xen_failsafe_callback) 1285ENTRY(xen_failsafe_callback)
1408 framesz = (RIP-0x30) /* workaround buggy gas */ 1286 INTR_FRAME 1 (6*8)
1409 _frame framesz 1287 /*CFI_REL_OFFSET gs,GS*/
1410 CFI_REL_OFFSET rcx, 0 1288 /*CFI_REL_OFFSET fs,FS*/
1411 CFI_REL_OFFSET r11, 8 1289 /*CFI_REL_OFFSET es,ES*/
1290 /*CFI_REL_OFFSET ds,DS*/
1291 CFI_REL_OFFSET r11,8
1292 CFI_REL_OFFSET rcx,0
1412 movw %ds,%cx 1293 movw %ds,%cx
1413 cmpw %cx,0x10(%rsp) 1294 cmpw %cx,0x10(%rsp)
1414 CFI_REMEMBER_STATE 1295 CFI_REMEMBER_STATE
@@ -1429,12 +1310,9 @@ ENTRY(xen_failsafe_callback)
1429 CFI_RESTORE r11 1310 CFI_RESTORE r11
1430 addq $0x30,%rsp 1311 addq $0x30,%rsp
1431 CFI_ADJUST_CFA_OFFSET -0x30 1312 CFI_ADJUST_CFA_OFFSET -0x30
1432 pushq $0 1313 pushq_cfi $0 /* RIP */
1433 CFI_ADJUST_CFA_OFFSET 8 1314 pushq_cfi %r11
1434 pushq %r11 1315 pushq_cfi %rcx
1435 CFI_ADJUST_CFA_OFFSET 8
1436 pushq %rcx
1437 CFI_ADJUST_CFA_OFFSET 8
1438 jmp general_protection 1316 jmp general_protection
1439 CFI_RESTORE_STATE 1317 CFI_RESTORE_STATE
14401: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ 13181: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
@@ -1444,11 +1322,223 @@ ENTRY(xen_failsafe_callback)
1444 CFI_RESTORE r11 1322 CFI_RESTORE r11
1445 addq $0x30,%rsp 1323 addq $0x30,%rsp
1446 CFI_ADJUST_CFA_OFFSET -0x30 1324 CFI_ADJUST_CFA_OFFSET -0x30
1447 pushq $0 1325 pushq_cfi $0
1448 CFI_ADJUST_CFA_OFFSET 8
1449 SAVE_ALL 1326 SAVE_ALL
1450 jmp error_exit 1327 jmp error_exit
1451 CFI_ENDPROC 1328 CFI_ENDPROC
1452END(xen_failsafe_callback) 1329END(xen_failsafe_callback)
1453 1330
1454#endif /* CONFIG_XEN */ 1331#endif /* CONFIG_XEN */
1332
1333/*
1334 * Some functions should be protected against kprobes
1335 */
1336 .pushsection .kprobes.text, "ax"
1337
1338paranoidzeroentry_ist debug do_debug DEBUG_STACK
1339paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
1340paranoiderrorentry stack_segment do_stack_segment
1341errorentry general_protection do_general_protection
1342errorentry page_fault do_page_fault
1343#ifdef CONFIG_X86_MCE
1344paranoidzeroentry machine_check do_machine_check
1345#endif
1346
1347 /*
1348 * "Paranoid" exit path from exception stack.
1349 * Paranoid because this is used by NMIs and cannot take
1350 * any kernel state for granted.
1351 * We don't do kernel preemption checks here, because only
1352 * NMI should be common and it does not enable IRQs and
1353 * cannot get reschedule ticks.
1354 *
1355 * "trace" is 0 for the NMI handler only, because irq-tracing
1356 * is fundamentally NMI-unsafe. (we cannot change the soft and
1357 * hard flags at once, atomically)
1358 */
1359
1360 /* ebx: no swapgs flag */
1361ENTRY(paranoid_exit)
1362 INTR_FRAME
1363 DISABLE_INTERRUPTS(CLBR_NONE)
1364 TRACE_IRQS_OFF
1365 testl %ebx,%ebx /* swapgs needed? */
1366 jnz paranoid_restore
1367 testl $3,CS(%rsp)
1368 jnz paranoid_userspace
1369paranoid_swapgs:
1370 TRACE_IRQS_IRETQ 0
1371 SWAPGS_UNSAFE_STACK
1372paranoid_restore:
1373 RESTORE_ALL 8
1374 jmp irq_return
1375paranoid_userspace:
1376 GET_THREAD_INFO(%rcx)
1377 movl TI_flags(%rcx),%ebx
1378 andl $_TIF_WORK_MASK,%ebx
1379 jz paranoid_swapgs
1380 movq %rsp,%rdi /* &pt_regs */
1381 call sync_regs
1382 movq %rax,%rsp /* switch stack for scheduling */
1383 testl $_TIF_NEED_RESCHED,%ebx
1384 jnz paranoid_schedule
1385 movl %ebx,%edx /* arg3: thread flags */
1386 TRACE_IRQS_ON
1387 ENABLE_INTERRUPTS(CLBR_NONE)
1388 xorl %esi,%esi /* arg2: oldset */
1389 movq %rsp,%rdi /* arg1: &pt_regs */
1390 call do_notify_resume
1391 DISABLE_INTERRUPTS(CLBR_NONE)
1392 TRACE_IRQS_OFF
1393 jmp paranoid_userspace
1394paranoid_schedule:
1395 TRACE_IRQS_ON
1396 ENABLE_INTERRUPTS(CLBR_ANY)
1397 call schedule
1398 DISABLE_INTERRUPTS(CLBR_ANY)
1399 TRACE_IRQS_OFF
1400 jmp paranoid_userspace
1401 CFI_ENDPROC
1402END(paranoid_exit)
1403
1404/*
1405 * Exception entry point. This expects an error code/orig_rax on the stack.
1406 * returns in "no swapgs flag" in %ebx.
1407 */
1408ENTRY(error_entry)
1409 XCPT_FRAME
1410 CFI_ADJUST_CFA_OFFSET 15*8
1411 /* oldrax contains error code */
1412 cld
1413 movq_cfi rdi, RDI+8
1414 movq_cfi rsi, RSI+8
1415 movq_cfi rdx, RDX+8
1416 movq_cfi rcx, RCX+8
1417 movq_cfi rax, RAX+8
1418 movq_cfi r8, R8+8
1419 movq_cfi r9, R9+8
1420 movq_cfi r10, R10+8
1421 movq_cfi r11, R11+8
1422 movq_cfi rbx, RBX+8
1423 movq_cfi rbp, RBP+8
1424 movq_cfi r12, R12+8
1425 movq_cfi r13, R13+8
1426 movq_cfi r14, R14+8
1427 movq_cfi r15, R15+8
1428 xorl %ebx,%ebx
1429 testl $3,CS+8(%rsp)
1430 je error_kernelspace
1431error_swapgs:
1432 SWAPGS
1433error_sti:
1434 TRACE_IRQS_OFF
1435 ret
1436 CFI_ENDPROC
1437
1438/*
1439 * There are two places in the kernel that can potentially fault with
1440 * usergs. Handle them here. The exception handlers after iret run with
1441 * kernel gs again, so don't set the user space flag. B stepping K8s
1442 * sometimes report an truncated RIP for IRET exceptions returning to
1443 * compat mode. Check for these here too.
1444 */
1445error_kernelspace:
1446 incl %ebx
1447 leaq irq_return(%rip),%rcx
1448 cmpq %rcx,RIP+8(%rsp)
1449 je error_swapgs
1450 movl %ecx,%ecx /* zero extend */
1451 cmpq %rcx,RIP+8(%rsp)
1452 je error_swapgs
1453 cmpq $gs_change,RIP+8(%rsp)
1454 je error_swapgs
1455 jmp error_sti
1456END(error_entry)
1457
1458
1459/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1460ENTRY(error_exit)
1461 DEFAULT_FRAME
1462 movl %ebx,%eax
1463 RESTORE_REST
1464 DISABLE_INTERRUPTS(CLBR_NONE)
1465 TRACE_IRQS_OFF
1466 GET_THREAD_INFO(%rcx)
1467 testl %eax,%eax
1468 jne retint_kernel
1469 LOCKDEP_SYS_EXIT_IRQ
1470 movl TI_flags(%rcx),%edx
1471 movl $_TIF_WORK_MASK,%edi
1472 andl %edi,%edx
1473 jnz retint_careful
1474 jmp retint_swapgs
1475 CFI_ENDPROC
1476END(error_exit)
1477
1478
1479 /* runs on exception stack */
1480ENTRY(nmi)
1481 INTR_FRAME
1482 PARAVIRT_ADJUST_EXCEPTION_FRAME
1483 pushq_cfi $-1
1484 subq $15*8, %rsp
1485 CFI_ADJUST_CFA_OFFSET 15*8
1486 call save_paranoid
1487 DEFAULT_FRAME 0
1488 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1489 movq %rsp,%rdi
1490 movq $-1,%rsi
1491 call do_nmi
1492#ifdef CONFIG_TRACE_IRQFLAGS
1493 /* paranoidexit; without TRACE_IRQS_OFF */
1494 /* ebx: no swapgs flag */
1495 DISABLE_INTERRUPTS(CLBR_NONE)
1496 testl %ebx,%ebx /* swapgs needed? */
1497 jnz nmi_restore
1498 testl $3,CS(%rsp)
1499 jnz nmi_userspace
1500nmi_swapgs:
1501 SWAPGS_UNSAFE_STACK
1502nmi_restore:
1503 RESTORE_ALL 8
1504 jmp irq_return
1505nmi_userspace:
1506 GET_THREAD_INFO(%rcx)
1507 movl TI_flags(%rcx),%ebx
1508 andl $_TIF_WORK_MASK,%ebx
1509 jz nmi_swapgs
1510 movq %rsp,%rdi /* &pt_regs */
1511 call sync_regs
1512 movq %rax,%rsp /* switch stack for scheduling */
1513 testl $_TIF_NEED_RESCHED,%ebx
1514 jnz nmi_schedule
1515 movl %ebx,%edx /* arg3: thread flags */
1516 ENABLE_INTERRUPTS(CLBR_NONE)
1517 xorl %esi,%esi /* arg2: oldset */
1518 movq %rsp,%rdi /* arg1: &pt_regs */
1519 call do_notify_resume
1520 DISABLE_INTERRUPTS(CLBR_NONE)
1521 jmp nmi_userspace
1522nmi_schedule:
1523 ENABLE_INTERRUPTS(CLBR_ANY)
1524 call schedule
1525 DISABLE_INTERRUPTS(CLBR_ANY)
1526 jmp nmi_userspace
1527 CFI_ENDPROC
1528#else
1529 jmp paranoid_exit
1530 CFI_ENDPROC
1531#endif
1532END(nmi)
1533
1534ENTRY(ignore_sysret)
1535 CFI_STARTPROC
1536 mov $-ENOSYS,%eax
1537 sysret
1538 CFI_ENDPROC
1539END(ignore_sysret)
1540
1541/*
1542 * End of kprobes section
1543 */
1544 .popsection
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index 0aa2c443d600..53699c931ad4 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -38,8 +38,11 @@
38#include <asm/io.h> 38#include <asm/io.h>
39#include <asm/nmi.h> 39#include <asm/nmi.h>
40#include <asm/smp.h> 40#include <asm/smp.h>
41#include <asm/atomic.h>
41#include <asm/apicdef.h> 42#include <asm/apicdef.h>
42#include <mach_mpparse.h> 43#include <mach_mpparse.h>
44#include <asm/genapic.h>
45#include <asm/setup.h>
43 46
44/* 47/*
45 * ES7000 chipsets 48 * ES7000 chipsets
@@ -161,6 +164,43 @@ es7000_rename_gsi(int ioapic, int gsi)
161 return gsi; 164 return gsi;
162} 165}
163 166
167static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
168{
169 unsigned long vect = 0, psaival = 0;
170
171 if (psai == NULL)
172 return -1;
173
174 vect = ((unsigned long)__pa(eip)/0x1000) << 16;
175 psaival = (0x1000000 | vect | cpu);
176
177 while (*psai & 0x1000000)
178 ;
179
180 *psai = psaival;
181
182 return 0;
183}
184
185static void noop_wait_for_deassert(atomic_t *deassert_not_used)
186{
187}
188
189static int __init es7000_update_genapic(void)
190{
191 genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
192
193 /* MPENTIUMIII */
194 if (boot_cpu_data.x86 == 6 &&
195 (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
196 es7000_update_genapic_to_cluster();
197 genapic->wait_for_init_deassert = noop_wait_for_deassert;
198 genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
199 }
200
201 return 0;
202}
203
164void __init 204void __init
165setup_unisys(void) 205setup_unisys(void)
166{ 206{
@@ -176,6 +216,8 @@ setup_unisys(void)
176 else 216 else
177 es7000_plat = ES7000_CLASSIC; 217 es7000_plat = ES7000_CLASSIC;
178 ioapic_renumber_irq = es7000_rename_gsi; 218 ioapic_renumber_irq = es7000_rename_gsi;
219
220 x86_quirks->update_genapic = es7000_update_genapic;
179} 221}
180 222
181/* 223/*
@@ -317,26 +359,6 @@ es7000_mip_write(struct mip_reg *mip_reg)
317 return status; 359 return status;
318} 360}
319 361
320int
321es7000_start_cpu(int cpu, unsigned long eip)
322{
323 unsigned long vect = 0, psaival = 0;
324
325 if (psai == NULL)
326 return -1;
327
328 vect = ((unsigned long)__pa(eip)/0x1000) << 16;
329 psaival = (0x1000000 | vect | cpu);
330
331 while (*psai & 0x1000000)
332 ;
333
334 *psai = psaival;
335
336 return 0;
337
338}
339
340void __init 362void __init
341es7000_sw_apic(void) 363es7000_sw_apic(void)
342{ 364{
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 6c9bfc9e1e95..2bced78b0b8e 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -21,6 +21,7 @@
21#include <asm/smp.h> 21#include <asm/smp.h>
22#include <asm/ipi.h> 22#include <asm/ipi.h>
23#include <asm/genapic.h> 23#include <asm/genapic.h>
24#include <asm/setup.h>
24 25
25extern struct genapic apic_flat; 26extern struct genapic apic_flat;
26extern struct genapic apic_physflat; 27extern struct genapic apic_physflat;
@@ -53,6 +54,9 @@ void __init setup_apic_routing(void)
53 genapic = &apic_physflat; 54 genapic = &apic_physflat;
54 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); 55 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
55 } 56 }
57
58 if (x86_quirks->update_genapic)
59 x86_quirks->update_genapic();
56} 60}
57 61
58/* Same for both flat and physical. */ 62/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 2c7dbdb98278..dece17289731 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/threads.h> 12#include <linux/threads.h>
13#include <linux/cpu.h>
13#include <linux/cpumask.h> 14#include <linux/cpumask.h>
14#include <linux/string.h> 15#include <linux/string.h>
15#include <linux/ctype.h> 16#include <linux/ctype.h>
@@ -17,6 +18,9 @@
17#include <linux/sched.h> 18#include <linux/sched.h>
18#include <linux/module.h> 19#include <linux/module.h>
19#include <linux/hardirq.h> 20#include <linux/hardirq.h>
21#include <linux/timer.h>
22#include <linux/proc_fs.h>
23#include <asm/current.h>
20#include <asm/smp.h> 24#include <asm/smp.h>
21#include <asm/ipi.h> 25#include <asm/ipi.h>
22#include <asm/genapic.h> 26#include <asm/genapic.h>
@@ -356,6 +360,103 @@ static __init void uv_rtc_init(void)
356} 360}
357 361
358/* 362/*
363 * percpu heartbeat timer
364 */
365static void uv_heartbeat(unsigned long ignored)
366{
367 struct timer_list *timer = &uv_hub_info->scir.timer;
368 unsigned char bits = uv_hub_info->scir.state;
369
370 /* flip heartbeat bit */
371 bits ^= SCIR_CPU_HEARTBEAT;
372
373 /* is this cpu idle? */
374 if (idle_cpu(raw_smp_processor_id()))
375 bits &= ~SCIR_CPU_ACTIVITY;
376 else
377 bits |= SCIR_CPU_ACTIVITY;
378
379 /* update system controller interface reg */
380 uv_set_scir_bits(bits);
381
382 /* enable next timer period */
383 mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
384}
385
386static void __cpuinit uv_heartbeat_enable(int cpu)
387{
388 if (!uv_cpu_hub_info(cpu)->scir.enabled) {
389 struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
390
391 uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
392 setup_timer(timer, uv_heartbeat, cpu);
393 timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
394 add_timer_on(timer, cpu);
395 uv_cpu_hub_info(cpu)->scir.enabled = 1;
396 }
397
398 /* check boot cpu */
399 if (!uv_cpu_hub_info(0)->scir.enabled)
400 uv_heartbeat_enable(0);
401}
402
403#ifdef CONFIG_HOTPLUG_CPU
404static void __cpuinit uv_heartbeat_disable(int cpu)
405{
406 if (uv_cpu_hub_info(cpu)->scir.enabled) {
407 uv_cpu_hub_info(cpu)->scir.enabled = 0;
408 del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
409 }
410 uv_set_cpu_scir_bits(cpu, 0xff);
411}
412
413/*
414 * cpu hotplug notifier
415 */
416static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self,
417 unsigned long action, void *hcpu)
418{
419 long cpu = (long)hcpu;
420
421 switch (action) {
422 case CPU_ONLINE:
423 uv_heartbeat_enable(cpu);
424 break;
425 case CPU_DOWN_PREPARE:
426 uv_heartbeat_disable(cpu);
427 break;
428 default:
429 break;
430 }
431 return NOTIFY_OK;
432}
433
434static __init void uv_scir_register_cpu_notifier(void)
435{
436 hotcpu_notifier(uv_scir_cpu_notify, 0);
437}
438
439#else /* !CONFIG_HOTPLUG_CPU */
440
441static __init void uv_scir_register_cpu_notifier(void)
442{
443}
444
445static __init int uv_init_heartbeat(void)
446{
447 int cpu;
448
449 if (is_uv_system())
450 for_each_online_cpu(cpu)
451 uv_heartbeat_enable(cpu);
452 return 0;
453}
454
455late_initcall(uv_init_heartbeat);
456
457#endif /* !CONFIG_HOTPLUG_CPU */
458
459/*
359 * Called on each cpu to initialize the per_cpu UV data area. 460 * Called on each cpu to initialize the per_cpu UV data area.
360 * ZZZ hotplug not supported yet 461 * ZZZ hotplug not supported yet
361 */ 462 */
@@ -428,7 +529,7 @@ void __init uv_system_init(void)
428 529
429 uv_bios_init(); 530 uv_bios_init();
430 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, 531 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
431 &uv_coherency_id, &uv_region_size); 532 &sn_coherency_id, &sn_region_size);
432 uv_rtc_init(); 533 uv_rtc_init();
433 534
434 for_each_present_cpu(cpu) { 535 for_each_present_cpu(cpu) {
@@ -439,8 +540,7 @@ void __init uv_system_init(void)
439 uv_blade_info[blade].nr_possible_cpus++; 540 uv_blade_info[blade].nr_possible_cpus++;
440 541
441 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; 542 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
442 uv_cpu_hub_info(cpu)->lowmem_remap_top = 543 uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
443 lowmem_redir_base + lowmem_redir_size;
444 uv_cpu_hub_info(cpu)->m_val = m_val; 544 uv_cpu_hub_info(cpu)->m_val = m_val;
445 uv_cpu_hub_info(cpu)->n_val = m_val; 545 uv_cpu_hub_info(cpu)->n_val = m_val;
446 uv_cpu_hub_info(cpu)->numa_blade_id = blade; 546 uv_cpu_hub_info(cpu)->numa_blade_id = blade;
@@ -450,7 +550,8 @@ void __init uv_system_init(void)
450 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; 550 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
451 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; 551 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
452 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; 552 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
453 uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id; 553 uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
554 uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
454 uv_node_to_blade[nid] = blade; 555 uv_node_to_blade[nid] = blade;
455 uv_cpu_to_blade[cpu] = blade; 556 uv_cpu_to_blade[cpu] = blade;
456 max_pnode = max(pnode, max_pnode); 557 max_pnode = max(pnode, max_pnode);
@@ -467,4 +568,6 @@ void __init uv_system_init(void)
467 map_mmioh_high(max_pnode); 568 map_mmioh_high(max_pnode);
468 569
469 uv_cpu_init(); 570 uv_cpu_init();
571 uv_scir_register_cpu_notifier();
572 proc_mkdir("sgi_uv", NULL);
470} 573}
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index 1dcb0f13897e..3e66bd364a9d 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -35,7 +35,6 @@ void __init reserve_ebda_region(void)
35 35
36 /* start of EBDA area */ 36 /* start of EBDA area */
37 ebda_addr = get_bios_ebda(); 37 ebda_addr = get_bios_ebda();
38 printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem);
39 38
40 /* Fixup: bios puts an EBDA in the top 64K segment */ 39 /* Fixup: bios puts an EBDA in the top 64K segment */
41 /* of conventional memory, but does not adjust lowmem. */ 40 /* of conventional memory, but does not adjust lowmem. */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index fa1d25dd83e3..ac108d1fe182 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -12,9 +12,12 @@
12#include <asm/sections.h> 12#include <asm/sections.h>
13#include <asm/e820.h> 13#include <asm/e820.h>
14#include <asm/bios_ebda.h> 14#include <asm/bios_ebda.h>
15#include <asm/trampoline.h>
15 16
16void __init i386_start_kernel(void) 17void __init i386_start_kernel(void)
17{ 18{
19 reserve_trampoline_memory();
20
18 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); 21 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
19 22
20#ifdef CONFIG_BLK_DEV_INITRD 23#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index d16084f90649..388e05a5fc17 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -24,6 +24,7 @@
24#include <asm/kdebug.h> 24#include <asm/kdebug.h>
25#include <asm/e820.h> 25#include <asm/e820.h>
26#include <asm/bios_ebda.h> 26#include <asm/bios_ebda.h>
27#include <asm/trampoline.h>
27 28
28/* boot cpu pda */ 29/* boot cpu pda */
29static struct x8664_pda _boot_cpu_pda __read_mostly; 30static struct x8664_pda _boot_cpu_pda __read_mostly;
@@ -120,6 +121,8 @@ void __init x86_64_start_reservations(char *real_mode_data)
120{ 121{
121 copy_bootdata(__va(real_mode_data)); 122 copy_bootdata(__va(real_mode_data));
122 123
124 reserve_trampoline_memory();
125
123 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); 126 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
124 127
125#ifdef CONFIG_BLK_DEV_INITRD 128#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 067d8de913f6..3f0a3edf0a57 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -33,7 +33,9 @@
33 * HPET address is set in acpi/boot.c, when an ACPI entry exists 33 * HPET address is set in acpi/boot.c, when an ACPI entry exists
34 */ 34 */
35unsigned long hpet_address; 35unsigned long hpet_address;
36unsigned long hpet_num_timers; 36#ifdef CONFIG_PCI_MSI
37static unsigned long hpet_num_timers;
38#endif
37static void __iomem *hpet_virt_address; 39static void __iomem *hpet_virt_address;
38 40
39struct hpet_dev { 41struct hpet_dev {
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index a4f93b4120c1..d39918076bb4 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -14,7 +14,6 @@ static struct fs_struct init_fs = INIT_FS;
14static struct signal_struct init_signals = INIT_SIGNALS(init_signals); 14static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
15static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); 15static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
16struct mm_struct init_mm = INIT_MM(init_mm); 16struct mm_struct init_mm = INIT_MM(init_mm);
17EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */
18 17
19/* 18/*
20 * Initial thread structure. 19 * Initial thread structure.
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9043251210fb..679e7bbbbcd6 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -2216,10 +2216,9 @@ static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
2216asmlinkage void smp_irq_move_cleanup_interrupt(void) 2216asmlinkage void smp_irq_move_cleanup_interrupt(void)
2217{ 2217{
2218 unsigned vector, me; 2218 unsigned vector, me;
2219
2219 ack_APIC_irq(); 2220 ack_APIC_irq();
2220#ifdef CONFIG_X86_64
2221 exit_idle(); 2221 exit_idle();
2222#endif
2223 irq_enter(); 2222 irq_enter();
2224 2223
2225 me = smp_processor_id(); 2224 me = smp_processor_id();
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84eb77a0..1d3d0e71b044 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,7 +18,6 @@
18#include <asm/idle.h> 18#include <asm/idle.h>
19#include <asm/smp.h> 19#include <asm/smp.h>
20 20
21#ifdef CONFIG_DEBUG_STACKOVERFLOW
22/* 21/*
23 * Probabilistic stack overflow check: 22 * Probabilistic stack overflow check:
24 * 23 *
@@ -28,19 +27,18 @@
28 */ 27 */
29static inline void stack_overflow_check(struct pt_regs *regs) 28static inline void stack_overflow_check(struct pt_regs *regs)
30{ 29{
30#ifdef CONFIG_DEBUG_STACKOVERFLOW
31 u64 curbase = (u64)task_stack_page(current); 31 u64 curbase = (u64)task_stack_page(current);
32 static unsigned long warned = -60*HZ; 32
33 33 WARN_ONCE(regs->sp >= curbase &&
34 if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE && 34 regs->sp <= curbase + THREAD_SIZE &&
35 regs->sp < curbase + sizeof(struct thread_info) + 128 && 35 regs->sp < curbase + sizeof(struct thread_info) +
36 time_after(jiffies, warned + 60*HZ)) { 36 sizeof(struct pt_regs) + 128,
37 printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", 37
38 current->comm, curbase, regs->sp); 38 "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
39 show_stack(NULL,NULL); 39 current->comm, curbase, regs->sp);
40 warned = jiffies;
41 }
42}
43#endif 40#endif
41}
44 42
45/* 43/*
46 * do_IRQ handles all normal device IRQ's (the special 44 * do_IRQ handles all normal device IRQ's (the special
@@ -60,9 +58,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
60 irq_enter(); 58 irq_enter();
61 irq = __get_cpu_var(vector_irq)[vector]; 59 irq = __get_cpu_var(vector_irq)[vector];
62 60
63#ifdef CONFIG_DEBUG_STACKOVERFLOW
64 stack_overflow_check(regs); 61 stack_overflow_check(regs);
65#endif
66 62
67 desc = irq_to_desc(irq); 63 desc = irq_to_desc(irq);
68 if (likely(desc)) 64 if (likely(desc))
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 845aa9803e80..607db63044a5 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -129,7 +129,7 @@ void __init native_init_IRQ(void)
129 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { 129 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
130 /* SYSCALL_VECTOR was reserved in trap_init. */ 130 /* SYSCALL_VECTOR was reserved in trap_init. */
131 if (i != SYSCALL_VECTOR) 131 if (i != SYSCALL_VECTOR)
132 set_intr_gate(i, interrupt[i]); 132 set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
133 } 133 }
134 134
135 135
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index ff0235391285..8670b3ce626e 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -24,41 +24,6 @@
24#include <asm/i8259.h> 24#include <asm/i8259.h>
25 25
26/* 26/*
27 * Common place to define all x86 IRQ vectors
28 *
29 * This builds up the IRQ handler stubs using some ugly macros in irq.h
30 *
31 * These macros create the low-level assembly IRQ routines that save
32 * register context and call do_IRQ(). do_IRQ() then does all the
33 * operations that are needed to keep the AT (or SMP IOAPIC)
34 * interrupt-controller happy.
35 */
36
37#define IRQ_NAME2(nr) nr##_interrupt(void)
38#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
39
40/*
41 * SMP has a few special interrupts for IPI messages
42 */
43
44#define BUILD_IRQ(nr) \
45 asmlinkage void IRQ_NAME(nr); \
46 asm("\n.text\n.p2align\n" \
47 "IRQ" #nr "_interrupt:\n\t" \
48 "push $~(" #nr ") ; " \
49 "jmp common_interrupt\n" \
50 ".previous");
51
52#define BI(x,y) \
53 BUILD_IRQ(x##y)
54
55#define BUILD_16_IRQS(x) \
56 BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
57 BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
58 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
59 BI(x,c) BI(x,d) BI(x,e) BI(x,f)
60
61/*
62 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: 27 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
63 * (these are usually mapped to vectors 0x30-0x3f) 28 * (these are usually mapped to vectors 0x30-0x3f)
64 */ 29 */
@@ -73,37 +38,6 @@
73 * 38 *
74 * (these are usually mapped into the 0x30-0xff vector range) 39 * (these are usually mapped into the 0x30-0xff vector range)
75 */ 40 */
76 BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
77BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
78BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
79BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
80
81#undef BUILD_16_IRQS
82#undef BI
83
84
85#define IRQ(x,y) \
86 IRQ##x##y##_interrupt
87
88#define IRQLIST_16(x) \
89 IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
90 IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
91 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
92 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
93
94/* for the irq vectors */
95static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
96 IRQLIST_16(0x2), IRQLIST_16(0x3),
97 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
98 IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
99 IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
100};
101
102#undef IRQ
103#undef IRQLIST_16
104
105
106
107 41
108/* 42/*
109 * IRQ2 is cascade interrupt to second interrupt controller 43 * IRQ2 is cascade interrupt to second interrupt controller
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 7a385746509a..37f420018a41 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -13,6 +13,7 @@
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h> 14#include <linux/ftrace.h>
15#include <linux/suspend.h> 15#include <linux/suspend.h>
16#include <linux/gfp.h>
16 17
17#include <asm/pgtable.h> 18#include <asm/pgtable.h>
18#include <asm/pgalloc.h> 19#include <asm/pgalloc.h>
@@ -25,15 +26,6 @@
25#include <asm/system.h> 26#include <asm/system.h>
26#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
27 28
28#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
29static u32 kexec_pgd[1024] PAGE_ALIGNED;
30#ifdef CONFIG_X86_PAE
31static u32 kexec_pmd0[1024] PAGE_ALIGNED;
32static u32 kexec_pmd1[1024] PAGE_ALIGNED;
33#endif
34static u32 kexec_pte0[1024] PAGE_ALIGNED;
35static u32 kexec_pte1[1024] PAGE_ALIGNED;
36
37static void set_idt(void *newidt, __u16 limit) 29static void set_idt(void *newidt, __u16 limit)
38{ 30{
39 struct desc_ptr curidt; 31 struct desc_ptr curidt;
@@ -76,6 +68,76 @@ static void load_segments(void)
76#undef __STR 68#undef __STR
77} 69}
78 70
71static void machine_kexec_free_page_tables(struct kimage *image)
72{
73 free_page((unsigned long)image->arch.pgd);
74#ifdef CONFIG_X86_PAE
75 free_page((unsigned long)image->arch.pmd0);
76 free_page((unsigned long)image->arch.pmd1);
77#endif
78 free_page((unsigned long)image->arch.pte0);
79 free_page((unsigned long)image->arch.pte1);
80}
81
82static int machine_kexec_alloc_page_tables(struct kimage *image)
83{
84 image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
85#ifdef CONFIG_X86_PAE
86 image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
87 image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
88#endif
89 image->arch.pte0 = (pte_t *)get_zeroed_page(GFP_KERNEL);
90 image->arch.pte1 = (pte_t *)get_zeroed_page(GFP_KERNEL);
91 if (!image->arch.pgd ||
92#ifdef CONFIG_X86_PAE
93 !image->arch.pmd0 || !image->arch.pmd1 ||
94#endif
95 !image->arch.pte0 || !image->arch.pte1) {
96 machine_kexec_free_page_tables(image);
97 return -ENOMEM;
98 }
99 return 0;
100}
101
102static void machine_kexec_page_table_set_one(
103 pgd_t *pgd, pmd_t *pmd, pte_t *pte,
104 unsigned long vaddr, unsigned long paddr)
105{
106 pud_t *pud;
107
108 pgd += pgd_index(vaddr);
109#ifdef CONFIG_X86_PAE
110 if (!(pgd_val(*pgd) & _PAGE_PRESENT))
111 set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT));
112#endif
113 pud = pud_offset(pgd, vaddr);
114 pmd = pmd_offset(pud, vaddr);
115 if (!(pmd_val(*pmd) & _PAGE_PRESENT))
116 set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
117 pte = pte_offset_kernel(pmd, vaddr);
118 set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
119}
120
121static void machine_kexec_prepare_page_tables(struct kimage *image)
122{
123 void *control_page;
124 pmd_t *pmd = 0;
125
126 control_page = page_address(image->control_code_page);
127#ifdef CONFIG_X86_PAE
128 pmd = image->arch.pmd0;
129#endif
130 machine_kexec_page_table_set_one(
131 image->arch.pgd, pmd, image->arch.pte0,
132 (unsigned long)control_page, __pa(control_page));
133#ifdef CONFIG_X86_PAE
134 pmd = image->arch.pmd1;
135#endif
136 machine_kexec_page_table_set_one(
137 image->arch.pgd, pmd, image->arch.pte1,
138 __pa(control_page), __pa(control_page));
139}
140
79/* 141/*
80 * A architecture hook called to validate the 142 * A architecture hook called to validate the
81 * proposed image and prepare the control pages 143 * proposed image and prepare the control pages
@@ -87,12 +149,20 @@ static void load_segments(void)
87 * reboot code buffer to allow us to avoid allocations 149 * reboot code buffer to allow us to avoid allocations
88 * later. 150 * later.
89 * 151 *
90 * Make control page executable. 152 * - Make control page executable.
153 * - Allocate page tables
154 * - Setup page tables
91 */ 155 */
92int machine_kexec_prepare(struct kimage *image) 156int machine_kexec_prepare(struct kimage *image)
93{ 157{
158 int error;
159
94 if (nx_enabled) 160 if (nx_enabled)
95 set_pages_x(image->control_code_page, 1); 161 set_pages_x(image->control_code_page, 1);
162 error = machine_kexec_alloc_page_tables(image);
163 if (error)
164 return error;
165 machine_kexec_prepare_page_tables(image);
96 return 0; 166 return 0;
97} 167}
98 168
@@ -104,6 +174,7 @@ void machine_kexec_cleanup(struct kimage *image)
104{ 174{
105 if (nx_enabled) 175 if (nx_enabled)
106 set_pages_nx(image->control_code_page, 1); 176 set_pages_nx(image->control_code_page, 1);
177 machine_kexec_free_page_tables(image);
107} 178}
108 179
109/* 180/*
@@ -150,18 +221,7 @@ void machine_kexec(struct kimage *image)
150 relocate_kernel_ptr = control_page; 221 relocate_kernel_ptr = control_page;
151 page_list[PA_CONTROL_PAGE] = __pa(control_page); 222 page_list[PA_CONTROL_PAGE] = __pa(control_page);
152 page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; 223 page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
153 page_list[PA_PGD] = __pa(kexec_pgd); 224 page_list[PA_PGD] = __pa(image->arch.pgd);
154 page_list[VA_PGD] = (unsigned long)kexec_pgd;
155#ifdef CONFIG_X86_PAE
156 page_list[PA_PMD_0] = __pa(kexec_pmd0);
157 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
158 page_list[PA_PMD_1] = __pa(kexec_pmd1);
159 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
160#endif
161 page_list[PA_PTE_0] = __pa(kexec_pte0);
162 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
163 page_list[PA_PTE_1] = __pa(kexec_pte1);
164 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
165 225
166 if (image->type == KEXEC_TYPE_DEFAULT) 226 if (image->type == KEXEC_TYPE_DEFAULT)
167 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) 227 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 5f8e5d75a254..c25fdb382292 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -10,7 +10,7 @@
10 * This driver allows to upgrade microcode on AMD 10 * This driver allows to upgrade microcode on AMD
11 * family 0x10 and 0x11 processors. 11 * family 0x10 and 0x11 processors.
12 * 12 *
13 * Licensed unter the terms of the GNU General Public 13 * Licensed under the terms of the GNU General Public
14 * License version 2. See file COPYING for details. 14 * License version 2. See file COPYING for details.
15*/ 15*/
16 16
@@ -32,9 +32,9 @@
32#include <linux/platform_device.h> 32#include <linux/platform_device.h>
33#include <linux/pci.h> 33#include <linux/pci.h>
34#include <linux/pci_ids.h> 34#include <linux/pci_ids.h>
35#include <linux/uaccess.h>
35 36
36#include <asm/msr.h> 37#include <asm/msr.h>
37#include <asm/uaccess.h>
38#include <asm/processor.h> 38#include <asm/processor.h>
39#include <asm/microcode.h> 39#include <asm/microcode.h>
40 40
@@ -47,43 +47,38 @@ MODULE_LICENSE("GPL v2");
47#define UCODE_UCODE_TYPE 0x00000001 47#define UCODE_UCODE_TYPE 0x00000001
48 48
49struct equiv_cpu_entry { 49struct equiv_cpu_entry {
50 unsigned int installed_cpu; 50 u32 installed_cpu;
51 unsigned int fixed_errata_mask; 51 u32 fixed_errata_mask;
52 unsigned int fixed_errata_compare; 52 u32 fixed_errata_compare;
53 unsigned int equiv_cpu; 53 u16 equiv_cpu;
54}; 54 u16 res;
55} __attribute__((packed));
55 56
56struct microcode_header_amd { 57struct microcode_header_amd {
57 unsigned int data_code; 58 u32 data_code;
58 unsigned int patch_id; 59 u32 patch_id;
59 unsigned char mc_patch_data_id[2]; 60 u16 mc_patch_data_id;
60 unsigned char mc_patch_data_len; 61 u8 mc_patch_data_len;
61 unsigned char init_flag; 62 u8 init_flag;
62 unsigned int mc_patch_data_checksum; 63 u32 mc_patch_data_checksum;
63 unsigned int nb_dev_id; 64 u32 nb_dev_id;
64 unsigned int sb_dev_id; 65 u32 sb_dev_id;
65 unsigned char processor_rev_id[2]; 66 u16 processor_rev_id;
66 unsigned char nb_rev_id; 67 u8 nb_rev_id;
67 unsigned char sb_rev_id; 68 u8 sb_rev_id;
68 unsigned char bios_api_rev; 69 u8 bios_api_rev;
69 unsigned char reserved1[3]; 70 u8 reserved1[3];
70 unsigned int match_reg[8]; 71 u32 match_reg[8];
71}; 72} __attribute__((packed));
72 73
73struct microcode_amd { 74struct microcode_amd {
74 struct microcode_header_amd hdr; 75 struct microcode_header_amd hdr;
75 unsigned int mpb[0]; 76 unsigned int mpb[0];
76}; 77};
77 78
78#define UCODE_MAX_SIZE (2048) 79#define UCODE_MAX_SIZE 2048
79#define DEFAULT_UCODE_DATASIZE (896) 80#define UCODE_CONTAINER_SECTION_HDR 8
80#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) 81#define UCODE_CONTAINER_HEADER_SIZE 12
81#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
82#define DWSIZE (sizeof(u32))
83/* For now we support a fixed ucode total size only */
84#define get_totalsize(mc) \
85 ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \
86 + MC_HEADER_SIZE)
87 82
88/* serialize access to the physical write */ 83/* serialize access to the physical write */
89static DEFINE_SPINLOCK(microcode_update_lock); 84static DEFINE_SPINLOCK(microcode_update_lock);
@@ -93,31 +88,24 @@ static struct equiv_cpu_entry *equiv_cpu_table;
93static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) 88static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
94{ 89{
95 struct cpuinfo_x86 *c = &cpu_data(cpu); 90 struct cpuinfo_x86 *c = &cpu_data(cpu);
91 u32 dummy;
96 92
97 memset(csig, 0, sizeof(*csig)); 93 memset(csig, 0, sizeof(*csig));
98
99 if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { 94 if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
100 printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n", 95 printk(KERN_WARNING "microcode: CPU%d: AMD CPU family 0x%x not "
101 cpu); 96 "supported\n", cpu, c->x86);
102 return -1; 97 return -1;
103 } 98 }
104 99 rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
105 asm volatile("movl %1, %%ecx; rdmsr" 100 printk(KERN_INFO "microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev);
106 : "=a" (csig->rev)
107 : "i" (0x0000008B) : "ecx");
108
109 printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n",
110 csig->rev);
111
112 return 0; 101 return 0;
113} 102}
114 103
115static int get_matching_microcode(int cpu, void *mc, int rev) 104static int get_matching_microcode(int cpu, void *mc, int rev)
116{ 105{
117 struct microcode_header_amd *mc_header = mc; 106 struct microcode_header_amd *mc_header = mc;
118 struct pci_dev *nb_pci_dev, *sb_pci_dev;
119 unsigned int current_cpu_id; 107 unsigned int current_cpu_id;
120 unsigned int equiv_cpu_id = 0x00; 108 u16 equiv_cpu_id = 0;
121 unsigned int i = 0; 109 unsigned int i = 0;
122 110
123 BUG_ON(equiv_cpu_table == NULL); 111 BUG_ON(equiv_cpu_table == NULL);
@@ -132,57 +120,25 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
132 } 120 }
133 121
134 if (!equiv_cpu_id) { 122 if (!equiv_cpu_id) {
135 printk(KERN_ERR "microcode: CPU%d cpu_id " 123 printk(KERN_WARNING "microcode: CPU%d: cpu revision "
136 "not found in equivalent cpu table \n", cpu); 124 "not listed in equivalent cpu table\n", cpu);
137 return 0; 125 return 0;
138 } 126 }
139 127
140 if ((mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff)) { 128 if (mc_header->processor_rev_id != equiv_cpu_id) {
141 printk(KERN_ERR 129 printk(KERN_ERR "microcode: CPU%d: patch mismatch "
142 "microcode: CPU%d patch does not match " 130 "(processor_rev_id: %x, equiv_cpu_id: %x)\n",
143 "(patch is %x, cpu extended is %x) \n", 131 cpu, mc_header->processor_rev_id, equiv_cpu_id);
144 cpu, mc_header->processor_rev_id[0],
145 (equiv_cpu_id & 0xff));
146 return 0; 132 return 0;
147 } 133 }
148 134
149 if ((mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff)) { 135 /* ucode might be chipset specific -- currently we don't support this */
150 printk(KERN_ERR "microcode: CPU%d patch does not match " 136 if (mc_header->nb_dev_id || mc_header->sb_dev_id) {
151 "(patch is %x, cpu base id is %x) \n", 137 printk(KERN_ERR "microcode: CPU%d: loading of chipset "
152 cpu, mc_header->processor_rev_id[1], 138 "specific code not yet supported\n", cpu);
153 ((equiv_cpu_id >> 16) & 0xff));
154
155 return 0; 139 return 0;
156 } 140 }
157 141
158 /* ucode may be northbridge specific */
159 if (mc_header->nb_dev_id) {
160 nb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD,
161 (mc_header->nb_dev_id & 0xff),
162 NULL);
163 if ((!nb_pci_dev) ||
164 (mc_header->nb_rev_id != nb_pci_dev->revision)) {
165 printk(KERN_ERR "microcode: CPU%d NB mismatch \n", cpu);
166 pci_dev_put(nb_pci_dev);
167 return 0;
168 }
169 pci_dev_put(nb_pci_dev);
170 }
171
172 /* ucode may be southbridge specific */
173 if (mc_header->sb_dev_id) {
174 sb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD,
175 (mc_header->sb_dev_id & 0xff),
176 NULL);
177 if ((!sb_pci_dev) ||
178 (mc_header->sb_rev_id != sb_pci_dev->revision)) {
179 printk(KERN_ERR "microcode: CPU%d SB mismatch \n", cpu);
180 pci_dev_put(sb_pci_dev);
181 return 0;
182 }
183 pci_dev_put(sb_pci_dev);
184 }
185
186 if (mc_header->patch_id <= rev) 142 if (mc_header->patch_id <= rev)
187 return 0; 143 return 0;
188 144
@@ -192,12 +148,10 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
192static void apply_microcode_amd(int cpu) 148static void apply_microcode_amd(int cpu)
193{ 149{
194 unsigned long flags; 150 unsigned long flags;
195 unsigned int eax, edx; 151 u32 rev, dummy;
196 unsigned int rev;
197 int cpu_num = raw_smp_processor_id(); 152 int cpu_num = raw_smp_processor_id();
198 struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; 153 struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
199 struct microcode_amd *mc_amd = uci->mc; 154 struct microcode_amd *mc_amd = uci->mc;
200 unsigned long addr;
201 155
202 /* We should bind the task to the CPU */ 156 /* We should bind the task to the CPU */
203 BUG_ON(cpu_num != cpu); 157 BUG_ON(cpu_num != cpu);
@@ -206,42 +160,34 @@ static void apply_microcode_amd(int cpu)
206 return; 160 return;
207 161
208 spin_lock_irqsave(&microcode_update_lock, flags); 162 spin_lock_irqsave(&microcode_update_lock, flags);
209 163 wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
210 addr = (unsigned long)&mc_amd->hdr.data_code;
211 edx = (unsigned int)(((unsigned long)upper_32_bits(addr)));
212 eax = (unsigned int)(((unsigned long)lower_32_bits(addr)));
213
214 asm volatile("movl %0, %%ecx; wrmsr" :
215 : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx");
216
217 /* get patch id after patching */ 164 /* get patch id after patching */
218 asm volatile("movl %1, %%ecx; rdmsr" 165 rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
219 : "=a" (rev)
220 : "i" (0x0000008B) : "ecx");
221
222 spin_unlock_irqrestore(&microcode_update_lock, flags); 166 spin_unlock_irqrestore(&microcode_update_lock, flags);
223 167
224 /* check current patch id and patch's id for match */ 168 /* check current patch id and patch's id for match */
225 if (rev != mc_amd->hdr.patch_id) { 169 if (rev != mc_amd->hdr.patch_id) {
226 printk(KERN_ERR "microcode: CPU%d update from revision " 170 printk(KERN_ERR "microcode: CPU%d: update failed "
227 "0x%x to 0x%x failed\n", cpu_num, 171 "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id);
228 mc_amd->hdr.patch_id, rev);
229 return; 172 return;
230 } 173 }
231 174
232 printk(KERN_INFO "microcode: CPU%d updated from revision " 175 printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n",
233 "0x%x to 0x%x \n", 176 cpu, rev);
234 cpu_num, uci->cpu_sig.rev, mc_amd->hdr.patch_id);
235 177
236 uci->cpu_sig.rev = rev; 178 uci->cpu_sig.rev = rev;
237} 179}
238 180
239static void * get_next_ucode(u8 *buf, unsigned int size, 181static int get_ucode_data(void *to, const u8 *from, size_t n)
240 int (*get_ucode_data)(void *, const void *, size_t), 182{
241 unsigned int *mc_size) 183 memcpy(to, from, n);
184 return 0;
185}
186
187static void *get_next_ucode(const u8 *buf, unsigned int size,
188 unsigned int *mc_size)
242{ 189{
243 unsigned int total_size; 190 unsigned int total_size;
244#define UCODE_CONTAINER_SECTION_HDR 8
245 u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; 191 u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
246 void *mc; 192 void *mc;
247 193
@@ -249,39 +195,37 @@ static void * get_next_ucode(u8 *buf, unsigned int size,
249 return NULL; 195 return NULL;
250 196
251 if (section_hdr[0] != UCODE_UCODE_TYPE) { 197 if (section_hdr[0] != UCODE_UCODE_TYPE) {
252 printk(KERN_ERR "microcode: error! " 198 printk(KERN_ERR "microcode: error: invalid type field in "
253 "Wrong microcode payload type field\n"); 199 "container file section header\n");
254 return NULL; 200 return NULL;
255 } 201 }
256 202
257 total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); 203 total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
258 204
259 printk(KERN_INFO "microcode: size %u, total_size %u\n", 205 printk(KERN_DEBUG "microcode: size %u, total_size %u\n",
260 size, total_size); 206 size, total_size);
261 207
262 if (total_size > size || total_size > UCODE_MAX_SIZE) { 208 if (total_size > size || total_size > UCODE_MAX_SIZE) {
263 printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); 209 printk(KERN_ERR "microcode: error: size mismatch\n");
264 return NULL; 210 return NULL;
265 } 211 }
266 212
267 mc = vmalloc(UCODE_MAX_SIZE); 213 mc = vmalloc(UCODE_MAX_SIZE);
268 if (mc) { 214 if (mc) {
269 memset(mc, 0, UCODE_MAX_SIZE); 215 memset(mc, 0, UCODE_MAX_SIZE);
270 if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) { 216 if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR,
217 total_size)) {
271 vfree(mc); 218 vfree(mc);
272 mc = NULL; 219 mc = NULL;
273 } else 220 } else
274 *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; 221 *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
275 } 222 }
276#undef UCODE_CONTAINER_SECTION_HDR
277 return mc; 223 return mc;
278} 224}
279 225
280 226
281static int install_equiv_cpu_table(u8 *buf, 227static int install_equiv_cpu_table(const u8 *buf)
282 int (*get_ucode_data)(void *, const void *, size_t))
283{ 228{
284#define UCODE_CONTAINER_HEADER_SIZE 12
285 u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; 229 u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
286 unsigned int *buf_pos = (unsigned int *)container_hdr; 230 unsigned int *buf_pos = (unsigned int *)container_hdr;
287 unsigned long size; 231 unsigned long size;
@@ -292,14 +236,15 @@ static int install_equiv_cpu_table(u8 *buf,
292 size = buf_pos[2]; 236 size = buf_pos[2];
293 237
294 if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { 238 if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
295 printk(KERN_ERR "microcode: error! " 239 printk(KERN_ERR "microcode: error: invalid type field in "
296 "Wrong microcode equivalnet cpu table\n"); 240 "container file section header\n");
297 return 0; 241 return 0;
298 } 242 }
299 243
300 equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); 244 equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size);
301 if (!equiv_cpu_table) { 245 if (!equiv_cpu_table) {
302 printk(KERN_ERR "microcode: error, can't allocate memory for equiv CPU table\n"); 246 printk(KERN_ERR "microcode: failed to allocate "
247 "equivalent CPU table\n");
303 return 0; 248 return 0;
304 } 249 }
305 250
@@ -310,7 +255,6 @@ static int install_equiv_cpu_table(u8 *buf,
310 } 255 }
311 256
312 return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ 257 return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
313#undef UCODE_CONTAINER_HEADER_SIZE
314} 258}
315 259
316static void free_equiv_cpu_table(void) 260static void free_equiv_cpu_table(void)
@@ -321,18 +265,20 @@ static void free_equiv_cpu_table(void)
321 } 265 }
322} 266}
323 267
324static int generic_load_microcode(int cpu, void *data, size_t size, 268static int generic_load_microcode(int cpu, const u8 *data, size_t size)
325 int (*get_ucode_data)(void *, const void *, size_t))
326{ 269{
327 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 270 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
328 u8 *ucode_ptr = data, *new_mc = NULL, *mc; 271 const u8 *ucode_ptr = data;
272 void *new_mc = NULL;
273 void *mc;
329 int new_rev = uci->cpu_sig.rev; 274 int new_rev = uci->cpu_sig.rev;
330 unsigned int leftover; 275 unsigned int leftover;
331 unsigned long offset; 276 unsigned long offset;
332 277
333 offset = install_equiv_cpu_table(ucode_ptr, get_ucode_data); 278 offset = install_equiv_cpu_table(ucode_ptr);
334 if (!offset) { 279 if (!offset) {
335 printk(KERN_ERR "microcode: installing equivalent cpu table failed\n"); 280 printk(KERN_ERR "microcode: failed to create "
281 "equivalent cpu table\n");
336 return -EINVAL; 282 return -EINVAL;
337 } 283 }
338 284
@@ -343,7 +289,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
343 unsigned int uninitialized_var(mc_size); 289 unsigned int uninitialized_var(mc_size);
344 struct microcode_header_amd *mc_header; 290 struct microcode_header_amd *mc_header;
345 291
346 mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size); 292 mc = get_next_ucode(ucode_ptr, leftover, &mc_size);
347 if (!mc) 293 if (!mc)
348 break; 294 break;
349 295
@@ -353,7 +299,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
353 vfree(new_mc); 299 vfree(new_mc);
354 new_rev = mc_header->patch_id; 300 new_rev = mc_header->patch_id;
355 new_mc = mc; 301 new_mc = mc;
356 } else 302 } else
357 vfree(mc); 303 vfree(mc);
358 304
359 ucode_ptr += mc_size; 305 ucode_ptr += mc_size;
@@ -365,9 +311,9 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
365 if (uci->mc) 311 if (uci->mc)
366 vfree(uci->mc); 312 vfree(uci->mc);
367 uci->mc = new_mc; 313 uci->mc = new_mc;
368 pr_debug("microcode: CPU%d found a matching microcode update with" 314 pr_debug("microcode: CPU%d found a matching microcode "
369 " version 0x%x (current=0x%x)\n", 315 "update with version 0x%x (current=0x%x)\n",
370 cpu, new_rev, uci->cpu_sig.rev); 316 cpu, new_rev, uci->cpu_sig.rev);
371 } else 317 } else
372 vfree(new_mc); 318 vfree(new_mc);
373 } 319 }
@@ -377,12 +323,6 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
377 return (int)leftover; 323 return (int)leftover;
378} 324}
379 325
380static int get_ucode_fw(void *to, const void *from, size_t n)
381{
382 memcpy(to, from, n);
383 return 0;
384}
385
386static int request_microcode_fw(int cpu, struct device *device) 326static int request_microcode_fw(int cpu, struct device *device)
387{ 327{
388 const char *fw_name = "amd-ucode/microcode_amd.bin"; 328 const char *fw_name = "amd-ucode/microcode_amd.bin";
@@ -394,12 +334,11 @@ static int request_microcode_fw(int cpu, struct device *device)
394 334
395 ret = request_firmware(&firmware, fw_name, device); 335 ret = request_firmware(&firmware, fw_name, device);
396 if (ret) { 336 if (ret) {
397 printk(KERN_ERR "microcode: ucode data file %s load failed\n", fw_name); 337 printk(KERN_ERR "microcode: failed to load file %s\n", fw_name);
398 return ret; 338 return ret;
399 } 339 }
400 340
401 ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, 341 ret = generic_load_microcode(cpu, firmware->data, firmware->size);
402 &get_ucode_fw);
403 342
404 release_firmware(firmware); 343 release_firmware(firmware);
405 344
@@ -408,8 +347,8 @@ static int request_microcode_fw(int cpu, struct device *device)
408 347
409static int request_microcode_user(int cpu, const void __user *buf, size_t size) 348static int request_microcode_user(int cpu, const void __user *buf, size_t size)
410{ 349{
411 printk(KERN_WARNING "microcode: AMD microcode update via /dev/cpu/microcode" 350 printk(KERN_INFO "microcode: AMD microcode update via "
412 "is not supported\n"); 351 "/dev/cpu/microcode not supported\n");
413 return -1; 352 return -1;
414} 353}
415 354
@@ -433,3 +372,4 @@ struct microcode_ops * __init init_amd_microcode(void)
433{ 372{
434 return &microcode_amd_ops; 373 return &microcode_amd_ops;
435} 374}
375
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index c4b5b24e0217..c9b721ba968c 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -99,7 +99,7 @@ MODULE_LICENSE("GPL");
99 99
100#define MICROCODE_VERSION "2.00" 100#define MICROCODE_VERSION "2.00"
101 101
102struct microcode_ops *microcode_ops; 102static struct microcode_ops *microcode_ops;
103 103
104/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ 104/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
105static DEFINE_MUTEX(microcode_mutex); 105static DEFINE_MUTEX(microcode_mutex);
@@ -203,7 +203,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
203#endif 203#endif
204 204
205/* fake device for request_firmware */ 205/* fake device for request_firmware */
206struct platform_device *microcode_pdev; 206static struct platform_device *microcode_pdev;
207 207
208static ssize_t reload_store(struct sys_device *dev, 208static ssize_t reload_store(struct sys_device *dev,
209 struct sysdev_attribute *attr, 209 struct sysdev_attribute *attr,
@@ -328,7 +328,7 @@ static int microcode_resume_cpu(int cpu)
328 return 0; 328 return 0;
329} 329}
330 330
331void microcode_update_cpu(int cpu) 331static void microcode_update_cpu(int cpu)
332{ 332{
333 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 333 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
334 int err = 0; 334 int err = 0;
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index a8e62792d171..b7f4c929e615 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -471,7 +471,7 @@ static void microcode_fini_cpu(int cpu)
471 uci->mc = NULL; 471 uci->mc = NULL;
472} 472}
473 473
474struct microcode_ops microcode_intel_ops = { 474static struct microcode_ops microcode_intel_ops = {
475 .request_microcode_user = request_microcode_user, 475 .request_microcode_user = request_microcode_user,
476 .request_microcode_fw = request_microcode_fw, 476 .request_microcode_fw = request_microcode_fw,
477 .collect_cpu_info = collect_cpu_info, 477 .collect_cpu_info = collect_cpu_info,
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 0f4c1fd5a1f4..45e3b69808ba 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -586,26 +586,23 @@ static void __init __get_smp_config(unsigned int early)
586{ 586{
587 struct intel_mp_floating *mpf = mpf_found; 587 struct intel_mp_floating *mpf = mpf_found;
588 588
589 if (x86_quirks->mach_get_smp_config) { 589 if (!mpf)
590 if (x86_quirks->mach_get_smp_config(early)) 590 return;
591 return; 591
592 }
593 if (acpi_lapic && early) 592 if (acpi_lapic && early)
594 return; 593 return;
594
595 /* 595 /*
596 * ACPI supports both logical (e.g. Hyper-Threading) and physical 596 * MPS doesn't support hyperthreading, aka only have
597 * processors, where MPS only supports physical. 597 * thread 0 apic id in MPS table
598 */ 598 */
599 if (acpi_lapic && acpi_ioapic) { 599 if (acpi_lapic && acpi_ioapic)
600 printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
601 "information\n");
602 return; 600 return;
603 } else if (acpi_lapic)
604 printk(KERN_INFO "Using ACPI for processor (LAPIC) "
605 "configuration information\n");
606 601
607 if (!mpf) 602 if (x86_quirks->mach_get_smp_config) {
608 return; 603 if (x86_quirks->mach_get_smp_config(early))
604 return;
605 }
609 606
610 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", 607 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
611 mpf->mpf_specification); 608 mpf->mpf_specification);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 2c97f07f1c2c..8bd1bf9622a7 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -131,6 +131,11 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
131 atomic_dec(&nmi_active); 131 atomic_dec(&nmi_active);
132} 132}
133 133
134static void __acpi_nmi_disable(void *__unused)
135{
136 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
137}
138
134int __init check_nmi_watchdog(void) 139int __init check_nmi_watchdog(void)
135{ 140{
136 unsigned int *prev_nmi_count; 141 unsigned int *prev_nmi_count;
@@ -179,8 +184,12 @@ int __init check_nmi_watchdog(void)
179 kfree(prev_nmi_count); 184 kfree(prev_nmi_count);
180 return 0; 185 return 0;
181error: 186error:
182 if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259) 187 if (nmi_watchdog == NMI_IO_APIC) {
183 disable_8259A_irq(0); 188 if (!timer_through_8259)
189 disable_8259A_irq(0);
190 on_each_cpu(__acpi_nmi_disable, NULL, 1);
191 }
192
184#ifdef CONFIG_X86_32 193#ifdef CONFIG_X86_32
185 timer_ack = 0; 194 timer_ack = 0;
186#endif 195#endif
@@ -199,12 +208,17 @@ static int __init setup_nmi_watchdog(char *str)
199 ++str; 208 ++str;
200 } 209 }
201 210
202 get_option(&str, &nmi); 211 if (!strncmp(str, "lapic", 5))
203 212 nmi_watchdog = NMI_LOCAL_APIC;
204 if (nmi >= NMI_INVALID) 213 else if (!strncmp(str, "ioapic", 6))
205 return 0; 214 nmi_watchdog = NMI_IO_APIC;
215 else {
216 get_option(&str, &nmi);
217 if (nmi >= NMI_INVALID)
218 return 0;
219 nmi_watchdog = nmi;
220 }
206 221
207 nmi_watchdog = nmi;
208 return 1; 222 return 1;
209} 223}
210__setup("nmi_watchdog=", setup_nmi_watchdog); 224__setup("nmi_watchdog=", setup_nmi_watchdog);
@@ -285,11 +299,6 @@ void acpi_nmi_enable(void)
285 on_each_cpu(__acpi_nmi_enable, NULL, 1); 299 on_each_cpu(__acpi_nmi_enable, NULL, 1);
286} 300}
287 301
288static void __acpi_nmi_disable(void *__unused)
289{
290 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
291}
292
293/* 302/*
294 * Disable timer based NMIs on all CPUs: 303 * Disable timer based NMIs on all CPUs:
295 */ 304 */
@@ -340,6 +349,8 @@ void stop_apic_nmi_watchdog(void *unused)
340 return; 349 return;
341 if (nmi_watchdog == NMI_LOCAL_APIC) 350 if (nmi_watchdog == NMI_LOCAL_APIC)
342 lapic_watchdog_stop(); 351 lapic_watchdog_stop();
352 else
353 __acpi_nmi_disable(NULL);
343 __get_cpu_var(wd_enabled) = 0; 354 __get_cpu_var(wd_enabled) = 0;
344 atomic_dec(&nmi_active); 355 atomic_dec(&nmi_active);
345} 356}
@@ -465,6 +476,24 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
465 476
466#ifdef CONFIG_SYSCTL 477#ifdef CONFIG_SYSCTL
467 478
479static void enable_ioapic_nmi_watchdog_single(void *unused)
480{
481 __get_cpu_var(wd_enabled) = 1;
482 atomic_inc(&nmi_active);
483 __acpi_nmi_enable(NULL);
484}
485
486static void enable_ioapic_nmi_watchdog(void)
487{
488 on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
489 touch_nmi_watchdog();
490}
491
492static void disable_ioapic_nmi_watchdog(void)
493{
494 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
495}
496
468static int __init setup_unknown_nmi_panic(char *str) 497static int __init setup_unknown_nmi_panic(char *str)
469{ 498{
470 unknown_nmi_panic = 1; 499 unknown_nmi_panic = 1;
@@ -507,6 +536,11 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
507 enable_lapic_nmi_watchdog(); 536 enable_lapic_nmi_watchdog();
508 else 537 else
509 disable_lapic_nmi_watchdog(); 538 disable_lapic_nmi_watchdog();
539 } else if (nmi_watchdog == NMI_IO_APIC) {
540 if (nmi_watchdog_enabled)
541 enable_ioapic_nmi_watchdog();
542 else
543 disable_ioapic_nmi_watchdog();
510 } else { 544 } else {
511 printk(KERN_WARNING 545 printk(KERN_WARNING
512 "NMI watchdog doesn't know what hardware to touch\n"); 546 "NMI watchdog doesn't know what hardware to touch\n");
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 4caff39078e0..0deea37a53cf 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -31,7 +31,7 @@
31#include <asm/numaq.h> 31#include <asm/numaq.h>
32#include <asm/topology.h> 32#include <asm/topology.h>
33#include <asm/processor.h> 33#include <asm/processor.h>
34#include <asm/mpspec.h> 34#include <asm/genapic.h>
35#include <asm/e820.h> 35#include <asm/e820.h>
36#include <asm/setup.h> 36#include <asm/setup.h>
37 37
@@ -235,6 +235,13 @@ static int __init numaq_setup_ioapic_ids(void)
235 return 1; 235 return 1;
236} 236}
237 237
238static int __init numaq_update_genapic(void)
239{
240 genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
241
242 return 0;
243}
244
238static struct x86_quirks numaq_x86_quirks __initdata = { 245static struct x86_quirks numaq_x86_quirks __initdata = {
239 .arch_pre_time_init = numaq_pre_time_init, 246 .arch_pre_time_init = numaq_pre_time_init,
240 .arch_time_init = NULL, 247 .arch_time_init = NULL,
@@ -250,6 +257,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
250 .mpc_oem_pci_bus = mpc_oem_pci_bus, 257 .mpc_oem_pci_bus = mpc_oem_pci_bus,
251 .smp_read_mpc_oem = smp_read_mpc_oem, 258 .smp_read_mpc_oem = smp_read_mpc_oem,
252 .setup_ioapic_ids = numaq_setup_ioapic_ids, 259 .setup_ioapic_ids = numaq_setup_ioapic_ids,
260 .update_genapic = numaq_update_genapic,
253}; 261};
254 262
255void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, 263void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 192624820217..7a3dfceb90e4 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -6,6 +6,7 @@
6#include <asm/proto.h> 6#include <asm/proto.h>
7#include <asm/dma.h> 7#include <asm/dma.h>
8#include <asm/iommu.h> 8#include <asm/iommu.h>
9#include <asm/gart.h>
9#include <asm/calgary.h> 10#include <asm/calgary.h>
10#include <asm/amd_iommu.h> 11#include <asm/amd_iommu.h>
11 12
@@ -30,11 +31,6 @@ int no_iommu __read_mostly;
30/* Set this to 1 if there is a HW IOMMU in the system */ 31/* Set this to 1 if there is a HW IOMMU in the system */
31int iommu_detected __read_mostly = 0; 32int iommu_detected __read_mostly = 0;
32 33
33/* This tells the BIO block layer to assume merging. Default to off
34 because we cannot guarantee merging later. */
35int iommu_bio_merge __read_mostly = 0;
36EXPORT_SYMBOL(iommu_bio_merge);
37
38dma_addr_t bad_dma_address __read_mostly = 0; 34dma_addr_t bad_dma_address __read_mostly = 0;
39EXPORT_SYMBOL(bad_dma_address); 35EXPORT_SYMBOL(bad_dma_address);
40 36
@@ -188,7 +184,6 @@ static __init int iommu_setup(char *p)
188 } 184 }
189 185
190 if (!strncmp(p, "biomerge", 8)) { 186 if (!strncmp(p, "biomerge", 8)) {
191 iommu_bio_merge = 4096;
192 iommu_merge = 1; 187 iommu_merge = 1;
193 force_iommu = 1; 188 force_iommu = 1;
194 } 189 }
@@ -300,8 +295,8 @@ fs_initcall(pci_iommu_init);
300static __devinit void via_no_dac(struct pci_dev *dev) 295static __devinit void via_no_dac(struct pci_dev *dev)
301{ 296{
302 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { 297 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
303 printk(KERN_INFO "PCI: VIA PCI bridge detected." 298 printk(KERN_INFO
304 "Disabling DAC.\n"); 299 "PCI: VIA PCI bridge detected. Disabling DAC.\n");
305 forbid_dac = 1; 300 forbid_dac = 1;
306 } 301 }
307} 302}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c622772744d8..b8f3e9dbabd7 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -1,6 +1,7 @@
1#include <linux/errno.h> 1#include <linux/errno.h>
2#include <linux/kernel.h> 2#include <linux/kernel.h>
3#include <linux/mm.h> 3#include <linux/mm.h>
4#include <asm/idle.h>
4#include <linux/smp.h> 5#include <linux/smp.h>
5#include <linux/slab.h> 6#include <linux/slab.h>
6#include <linux/sched.h> 7#include <linux/sched.h>
@@ -8,6 +9,7 @@
8#include <linux/pm.h> 9#include <linux/pm.h>
9#include <linux/clockchips.h> 10#include <linux/clockchips.h>
10#include <asm/system.h> 11#include <asm/system.h>
12#include <asm/apic.h>
11 13
12unsigned long idle_halt; 14unsigned long idle_halt;
13EXPORT_SYMBOL(idle_halt); 15EXPORT_SYMBOL(idle_halt);
@@ -122,6 +124,21 @@ void default_idle(void)
122EXPORT_SYMBOL(default_idle); 124EXPORT_SYMBOL(default_idle);
123#endif 125#endif
124 126
127void stop_this_cpu(void *dummy)
128{
129 local_irq_disable();
130 /*
131 * Remove this CPU:
132 */
133 cpu_clear(smp_processor_id(), cpu_online_map);
134 disable_local_APIC();
135
136 for (;;) {
137 if (hlt_works(smp_processor_id()))
138 halt();
139 }
140}
141
125static void do_nothing(void *unused) 142static void do_nothing(void *unused)
126{ 143{
127} 144}
@@ -270,7 +287,7 @@ static void c1e_idle(void)
270 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); 287 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
271 if (lo & K8_INTP_C1E_ACTIVE_MASK) { 288 if (lo & K8_INTP_C1E_ACTIVE_MASK) {
272 c1e_detected = 1; 289 c1e_detected = 1;
273 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 290 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
274 mark_tsc_unstable("TSC halt in AMD C1E"); 291 mark_tsc_unstable("TSC halt in AMD C1E");
275 printk(KERN_INFO "System has AMD C1E enabled\n"); 292 printk(KERN_INFO "System has AMD C1E enabled\n");
276 set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); 293 set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a6d8c12e10d..06180dff5b2e 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -929,17 +929,16 @@ void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
929 switch (c->x86) { 929 switch (c->x86) {
930 case 0x6: 930 case 0x6:
931 switch (c->x86_model) { 931 switch (c->x86_model) {
932 case 0 ... 0xC:
933 /* sorry, don't know about them */
934 break;
932 case 0xD: 935 case 0xD:
933 case 0xE: /* Pentium M */ 936 case 0xE: /* Pentium M */
934 bts_configure(&bts_cfg_pentium_m); 937 bts_configure(&bts_cfg_pentium_m);
935 break; 938 break;
936 case 0xF: /* Core2 */ 939 default: /* Core2, Atom, ... */
937 case 0x1C: /* Atom */
938 bts_configure(&bts_cfg_core2); 940 bts_configure(&bts_cfg_core2);
939 break; 941 break;
940 default:
941 /* sorry, don't know about them */
942 break;
943 } 942 }
944 break; 943 break;
945 case 0xF: 944 case 0xF:
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index cc5a2545dd41..61f718df6eec 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -21,6 +21,9 @@
21# include <asm/iommu.h> 21# include <asm/iommu.h>
22#endif 22#endif
23 23
24#include <mach_ipi.h>
25
26
24/* 27/*
25 * Power off function, if any 28 * Power off function, if any
26 */ 29 */
@@ -36,7 +39,10 @@ int reboot_force;
36static int reboot_cpu = -1; 39static int reboot_cpu = -1;
37#endif 40#endif
38 41
39/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] 42/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
43bool port_cf9_safe = false;
44
45/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
40 warm Don't set the cold reboot flag 46 warm Don't set the cold reboot flag
41 cold Set the cold reboot flag 47 cold Set the cold reboot flag
42 bios Reboot by jumping through the BIOS (only for X86_32) 48 bios Reboot by jumping through the BIOS (only for X86_32)
@@ -45,6 +51,7 @@ static int reboot_cpu = -1;
45 kbd Use the keyboard controller. cold reset (default) 51 kbd Use the keyboard controller. cold reset (default)
46 acpi Use the RESET_REG in the FADT 52 acpi Use the RESET_REG in the FADT
47 efi Use efi reset_system runtime service 53 efi Use efi reset_system runtime service
54 pci Use the so-called "PCI reset register", CF9
48 force Avoid anything that could hang. 55 force Avoid anything that could hang.
49 */ 56 */
50static int __init reboot_setup(char *str) 57static int __init reboot_setup(char *str)
@@ -79,6 +86,7 @@ static int __init reboot_setup(char *str)
79 case 'k': 86 case 'k':
80 case 't': 87 case 't':
81 case 'e': 88 case 'e':
89 case 'p':
82 reboot_type = *str; 90 reboot_type = *str;
83 break; 91 break;
84 92
@@ -404,12 +412,27 @@ static void native_machine_emergency_restart(void)
404 reboot_type = BOOT_KBD; 412 reboot_type = BOOT_KBD;
405 break; 413 break;
406 414
407
408 case BOOT_EFI: 415 case BOOT_EFI:
409 if (efi_enabled) 416 if (efi_enabled)
410 efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD, 417 efi.reset_system(reboot_mode ?
418 EFI_RESET_WARM :
419 EFI_RESET_COLD,
411 EFI_SUCCESS, 0, NULL); 420 EFI_SUCCESS, 0, NULL);
421 reboot_type = BOOT_KBD;
422 break;
412 423
424 case BOOT_CF9:
425 port_cf9_safe = true;
426 /* fall through */
427
428 case BOOT_CF9_COND:
429 if (port_cf9_safe) {
430 u8 cf9 = inb(0xcf9) & ~6;
431 outb(cf9|2, 0xcf9); /* Request hard reset */
432 udelay(50);
433 outb(cf9|6, 0xcf9); /* Actually do the reset */
434 udelay(50);
435 }
413 reboot_type = BOOT_KBD; 436 reboot_type = BOOT_KBD;
414 break; 437 break;
415 } 438 }
@@ -470,6 +493,11 @@ static void native_machine_restart(char *__unused)
470 493
471static void native_machine_halt(void) 494static void native_machine_halt(void)
472{ 495{
496 /* stop other cpus and apics */
497 machine_shutdown();
498
499 /* stop this cpu */
500 stop_this_cpu(NULL);
473} 501}
474 502
475static void native_machine_power_off(void) 503static void native_machine_power_off(void)
@@ -523,3 +551,95 @@ void machine_crash_shutdown(struct pt_regs *regs)
523 machine_ops.crash_shutdown(regs); 551 machine_ops.crash_shutdown(regs);
524} 552}
525#endif 553#endif
554
555
556#if defined(CONFIG_SMP)
557
558/* This keeps a track of which one is crashing cpu. */
559static int crashing_cpu;
560static nmi_shootdown_cb shootdown_callback;
561
562static atomic_t waiting_for_crash_ipi;
563
564static int crash_nmi_callback(struct notifier_block *self,
565 unsigned long val, void *data)
566{
567 int cpu;
568
569 if (val != DIE_NMI_IPI)
570 return NOTIFY_OK;
571
572 cpu = raw_smp_processor_id();
573
574 /* Don't do anything if this handler is invoked on crashing cpu.
575 * Otherwise, system will completely hang. Crashing cpu can get
576 * an NMI if system was initially booted with nmi_watchdog parameter.
577 */
578 if (cpu == crashing_cpu)
579 return NOTIFY_STOP;
580 local_irq_disable();
581
582 shootdown_callback(cpu, (struct die_args *)data);
583
584 atomic_dec(&waiting_for_crash_ipi);
585 /* Assume hlt works */
586 halt();
587 for (;;)
588 cpu_relax();
589
590 return 1;
591}
592
593static void smp_send_nmi_allbutself(void)
594{
595 cpumask_t mask = cpu_online_map;
596 cpu_clear(safe_smp_processor_id(), mask);
597 if (!cpus_empty(mask))
598 send_IPI_mask(mask, NMI_VECTOR);
599}
600
601static struct notifier_block crash_nmi_nb = {
602 .notifier_call = crash_nmi_callback,
603};
604
605/* Halt all other CPUs, calling the specified function on each of them
606 *
607 * This function can be used to halt all other CPUs on crash
608 * or emergency reboot time. The function passed as parameter
609 * will be called inside a NMI handler on all CPUs.
610 */
611void nmi_shootdown_cpus(nmi_shootdown_cb callback)
612{
613 unsigned long msecs;
614 local_irq_disable();
615
616 /* Make a note of crashing cpu. Will be used in NMI callback.*/
617 crashing_cpu = safe_smp_processor_id();
618
619 shootdown_callback = callback;
620
621 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
622 /* Would it be better to replace the trap vector here? */
623 if (register_die_notifier(&crash_nmi_nb))
624 return; /* return what? */
625 /* Ensure the new callback function is set before sending
626 * out the NMI
627 */
628 wmb();
629
630 smp_send_nmi_allbutself();
631
632 msecs = 1000; /* Wait at most a second for the other cpus to stop */
633 while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
634 mdelay(1);
635 msecs--;
636 }
637
638 /* Leave the nmi callback set */
639}
640#else /* !CONFIG_SMP */
641void nmi_shootdown_cpus(nmi_shootdown_cb callback)
642{
643 /* No other CPUs to shoot down */
644}
645#endif
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 6f50664b2ba5..a160f3119725 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -10,15 +10,12 @@
10#include <asm/page.h> 10#include <asm/page.h>
11#include <asm/kexec.h> 11#include <asm/kexec.h>
12#include <asm/processor-flags.h> 12#include <asm/processor-flags.h>
13#include <asm/pgtable.h>
14 13
15/* 14/*
16 * Must be relocatable PIC code callable as a C function 15 * Must be relocatable PIC code callable as a C function
17 */ 16 */
18 17
19#define PTR(x) (x << 2) 18#define PTR(x) (x << 2)
20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
21#define PAE_PGD_ATTR (_PAGE_PRESENT)
22 19
23/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE 20/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
24 * ~ control_page + PAGE_SIZE are used as data storage and stack for 21 * ~ control_page + PAGE_SIZE are used as data storage and stack for
@@ -39,7 +36,6 @@
39#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) 36#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c)
40 37
41 .text 38 .text
42 .align PAGE_SIZE
43 .globl relocate_kernel 39 .globl relocate_kernel
44relocate_kernel: 40relocate_kernel:
45 /* Save the CPU context, used for jumping back */ 41 /* Save the CPU context, used for jumping back */
@@ -60,117 +56,6 @@ relocate_kernel:
60 movl %cr4, %eax 56 movl %cr4, %eax
61 movl %eax, CR4(%edi) 57 movl %eax, CR4(%edi)
62 58
63#ifdef CONFIG_X86_PAE
64 /* map the control page at its virtual address */
65
66 movl PTR(VA_PGD)(%ebp), %edi
67 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
68 andl $0xc0000000, %eax
69 shrl $27, %eax
70 addl %edi, %eax
71
72 movl PTR(PA_PMD_0)(%ebp), %edx
73 orl $PAE_PGD_ATTR, %edx
74 movl %edx, (%eax)
75
76 movl PTR(VA_PMD_0)(%ebp), %edi
77 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
78 andl $0x3fe00000, %eax
79 shrl $18, %eax
80 addl %edi, %eax
81
82 movl PTR(PA_PTE_0)(%ebp), %edx
83 orl $PAGE_ATTR, %edx
84 movl %edx, (%eax)
85
86 movl PTR(VA_PTE_0)(%ebp), %edi
87 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
88 andl $0x001ff000, %eax
89 shrl $9, %eax
90 addl %edi, %eax
91
92 movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
93 orl $PAGE_ATTR, %edx
94 movl %edx, (%eax)
95
96 /* identity map the control page at its physical address */
97
98 movl PTR(VA_PGD)(%ebp), %edi
99 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
100 andl $0xc0000000, %eax
101 shrl $27, %eax
102 addl %edi, %eax
103
104 movl PTR(PA_PMD_1)(%ebp), %edx
105 orl $PAE_PGD_ATTR, %edx
106 movl %edx, (%eax)
107
108 movl PTR(VA_PMD_1)(%ebp), %edi
109 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
110 andl $0x3fe00000, %eax
111 shrl $18, %eax
112 addl %edi, %eax
113
114 movl PTR(PA_PTE_1)(%ebp), %edx
115 orl $PAGE_ATTR, %edx
116 movl %edx, (%eax)
117
118 movl PTR(VA_PTE_1)(%ebp), %edi
119 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
120 andl $0x001ff000, %eax
121 shrl $9, %eax
122 addl %edi, %eax
123
124 movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
125 orl $PAGE_ATTR, %edx
126 movl %edx, (%eax)
127#else
128 /* map the control page at its virtual address */
129
130 movl PTR(VA_PGD)(%ebp), %edi
131 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
132 andl $0xffc00000, %eax
133 shrl $20, %eax
134 addl %edi, %eax
135
136 movl PTR(PA_PTE_0)(%ebp), %edx
137 orl $PAGE_ATTR, %edx
138 movl %edx, (%eax)
139
140 movl PTR(VA_PTE_0)(%ebp), %edi
141 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
142 andl $0x003ff000, %eax
143 shrl $10, %eax
144 addl %edi, %eax
145
146 movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
147 orl $PAGE_ATTR, %edx
148 movl %edx, (%eax)
149
150 /* identity map the control page at its physical address */
151
152 movl PTR(VA_PGD)(%ebp), %edi
153 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
154 andl $0xffc00000, %eax
155 shrl $20, %eax
156 addl %edi, %eax
157
158 movl PTR(PA_PTE_1)(%ebp), %edx
159 orl $PAGE_ATTR, %edx
160 movl %edx, (%eax)
161
162 movl PTR(VA_PTE_1)(%ebp), %edi
163 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
164 andl $0x003ff000, %eax
165 shrl $10, %eax
166 addl %edi, %eax
167
168 movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
169 orl $PAGE_ATTR, %edx
170 movl %edx, (%eax)
171#endif
172
173relocate_new_kernel:
174 /* read the arguments and say goodbye to the stack */ 59 /* read the arguments and say goodbye to the stack */
175 movl 20+4(%esp), %ebx /* page_list */ 60 movl 20+4(%esp), %ebx /* page_list */
176 movl 20+8(%esp), %ebp /* list of pages */ 61 movl 20+8(%esp), %ebp /* list of pages */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index bdec76e55594..08e02e8453c9 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -93,11 +93,13 @@
93#include <asm/desc.h> 93#include <asm/desc.h>
94#include <asm/dma.h> 94#include <asm/dma.h>
95#include <asm/iommu.h> 95#include <asm/iommu.h>
96#include <asm/gart.h>
96#include <asm/mmu_context.h> 97#include <asm/mmu_context.h>
97#include <asm/proto.h> 98#include <asm/proto.h>
98 99
99#include <mach_apic.h> 100#include <mach_apic.h>
100#include <asm/paravirt.h> 101#include <asm/paravirt.h>
102#include <asm/hypervisor.h>
101 103
102#include <asm/percpu.h> 104#include <asm/percpu.h>
103#include <asm/topology.h> 105#include <asm/topology.h>
@@ -448,6 +450,7 @@ static void __init reserve_early_setup_data(void)
448 * @size: Size of the crashkernel memory to reserve. 450 * @size: Size of the crashkernel memory to reserve.
449 * Returns the base address on success, and -1ULL on failure. 451 * Returns the base address on success, and -1ULL on failure.
450 */ 452 */
453static
451unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) 454unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
452{ 455{
453 const unsigned long long alignment = 16<<20; /* 16M */ 456 const unsigned long long alignment = 16<<20; /* 16M */
@@ -583,161 +586,24 @@ static int __init setup_elfcorehdr(char *arg)
583early_param("elfcorehdr", setup_elfcorehdr); 586early_param("elfcorehdr", setup_elfcorehdr);
584#endif 587#endif
585 588
586static struct x86_quirks default_x86_quirks __initdata; 589static int __init default_update_genapic(void)
587
588struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
589
590/*
591 * Some BIOSes seem to corrupt the low 64k of memory during events
592 * like suspend/resume and unplugging an HDMI cable. Reserve all
593 * remaining free memory in that area and fill it with a distinct
594 * pattern.
595 */
596#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
597#define MAX_SCAN_AREAS 8
598
599static int __read_mostly memory_corruption_check = -1;
600
601static unsigned __read_mostly corruption_check_size = 64*1024;
602static unsigned __read_mostly corruption_check_period = 60; /* seconds */
603
604static struct e820entry scan_areas[MAX_SCAN_AREAS];
605static int num_scan_areas;
606
607
608static int set_corruption_check(char *arg)
609{
610 char *end;
611
612 memory_corruption_check = simple_strtol(arg, &end, 10);
613
614 return (*end == 0) ? 0 : -EINVAL;
615}
616early_param("memory_corruption_check", set_corruption_check);
617
618static int set_corruption_check_period(char *arg)
619{
620 char *end;
621
622 corruption_check_period = simple_strtoul(arg, &end, 10);
623
624 return (*end == 0) ? 0 : -EINVAL;
625}
626early_param("memory_corruption_check_period", set_corruption_check_period);
627
628static int set_corruption_check_size(char *arg)
629{ 590{
630 char *end; 591#ifdef CONFIG_X86_SMP
631 unsigned size; 592# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
632 593 genapic->wakeup_cpu = wakeup_secondary_cpu_via_init;
633 size = memparse(arg, &end); 594# endif
634
635 if (*end == '\0')
636 corruption_check_size = size;
637
638 return (size == corruption_check_size) ? 0 : -EINVAL;
639}
640early_param("memory_corruption_check_size", set_corruption_check_size);
641
642
643static void __init setup_bios_corruption_check(void)
644{
645 u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */
646
647 if (memory_corruption_check == -1) {
648 memory_corruption_check =
649#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
650 1
651#else
652 0
653#endif 595#endif
654 ;
655 }
656
657 if (corruption_check_size == 0)
658 memory_corruption_check = 0;
659
660 if (!memory_corruption_check)
661 return;
662
663 corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
664 596
665 while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { 597 return 0;
666 u64 size;
667 addr = find_e820_area_size(addr, &size, PAGE_SIZE);
668
669 if (addr == 0)
670 break;
671
672 if ((addr + size) > corruption_check_size)
673 size = corruption_check_size - addr;
674
675 if (size == 0)
676 break;
677
678 e820_update_range(addr, size, E820_RAM, E820_RESERVED);
679 scan_areas[num_scan_areas].addr = addr;
680 scan_areas[num_scan_areas].size = size;
681 num_scan_areas++;
682
683 /* Assume we've already mapped this early memory */
684 memset(__va(addr), 0, size);
685
686 addr += size;
687 }
688
689 printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
690 num_scan_areas);
691 update_e820();
692}
693
694static struct timer_list periodic_check_timer;
695
696void check_for_bios_corruption(void)
697{
698 int i;
699 int corruption = 0;
700
701 if (!memory_corruption_check)
702 return;
703
704 for(i = 0; i < num_scan_areas; i++) {
705 unsigned long *addr = __va(scan_areas[i].addr);
706 unsigned long size = scan_areas[i].size;
707
708 for(; size; addr++, size -= sizeof(unsigned long)) {
709 if (!*addr)
710 continue;
711 printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
712 addr, __pa(addr), *addr);
713 corruption = 1;
714 *addr = 0;
715 }
716 }
717
718 WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n");
719}
720
721static void periodic_check_for_corruption(unsigned long data)
722{
723 check_for_bios_corruption();
724 mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ));
725} 598}
726 599
727void start_periodic_check_for_corruption(void) 600static struct x86_quirks default_x86_quirks __initdata = {
728{ 601 .update_genapic = default_update_genapic,
729 if (!memory_corruption_check || corruption_check_period == 0) 602};
730 return;
731
732 printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
733 corruption_check_period);
734 603
735 init_timer(&periodic_check_timer); 604struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
736 periodic_check_timer.function = &periodic_check_for_corruption;
737 periodic_check_for_corruption(0);
738}
739#endif
740 605
606#ifdef CONFIG_X86_RESERVE_LOW_64K
741static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) 607static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
742{ 608{
743 printk(KERN_NOTICE 609 printk(KERN_NOTICE
@@ -749,6 +615,7 @@ static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
749 615
750 return 0; 616 return 0;
751} 617}
618#endif
752 619
753/* List of systems that have known low memory corruption BIOS problems */ 620/* List of systems that have known low memory corruption BIOS problems */
754static struct dmi_system_id __initdata bad_bios_dmi_table[] = { 621static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
@@ -907,6 +774,12 @@ void __init setup_arch(char **cmdline_p)
907 774
908 dmi_check_system(bad_bios_dmi_table); 775 dmi_check_system(bad_bios_dmi_table);
909 776
777 /*
778 * VMware detection requires dmi to be available, so this
779 * needs to be done after dmi_scan_machine, for the BP.
780 */
781 init_hypervisor(&boot_cpu_data);
782
910#ifdef CONFIG_X86_32 783#ifdef CONFIG_X86_32
911 probe_roms(); 784 probe_roms();
912#endif 785#endif
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
deleted file mode 100644
index cc673aa55ce4..000000000000
--- a/arch/x86/kernel/sigframe.h
+++ /dev/null
@@ -1,42 +0,0 @@
1#ifdef CONFIG_X86_32
2struct sigframe {
3 char __user *pretcode;
4 int sig;
5 struct sigcontext sc;
6 /*
7 * fpstate is unused. fpstate is moved/allocated after
8 * retcode[] below. This movement allows to have the FP state and the
9 * future state extensions (xsave) stay together.
10 * And at the same time retaining the unused fpstate, prevents changing
11 * the offset of extramask[] in the sigframe and thus prevent any
12 * legacy application accessing/modifying it.
13 */
14 struct _fpstate fpstate_unused;
15 unsigned long extramask[_NSIG_WORDS-1];
16 char retcode[8];
17 /* fp state follows here */
18};
19
20struct rt_sigframe {
21 char __user *pretcode;
22 int sig;
23 struct siginfo __user *pinfo;
24 void __user *puc;
25 struct siginfo info;
26 struct ucontext uc;
27 char retcode[8];
28 /* fp state follows here */
29};
30#else
31struct rt_sigframe {
32 char __user *pretcode;
33 struct ucontext uc;
34 struct siginfo info;
35 /* fp state follows here */
36};
37
38int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
39 sigset_t *set, struct pt_regs *regs);
40int ia32_setup_frame(int sig, struct k_sigaction *ka,
41 sigset_t *set, struct pt_regs *regs);
42#endif
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal.c
index d6dd057d0f22..89bb7668041d 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal.c
@@ -1,36 +1,41 @@
1/* 1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds 2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
3 * 4 *
4 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson 5 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
5 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes 6 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
7 * 2000-2002 x86-64 support by Andi Kleen
6 */ 8 */
7#include <linux/list.h>
8 9
9#include <linux/personality.h> 10#include <linux/sched.h>
10#include <linux/binfmts.h> 11#include <linux/mm.h>
11#include <linux/suspend.h> 12#include <linux/smp.h>
12#include <linux/kernel.h> 13#include <linux/kernel.h>
13#include <linux/ptrace.h>
14#include <linux/signal.h> 14#include <linux/signal.h>
15#include <linux/stddef.h>
16#include <linux/unistd.h>
17#include <linux/errno.h> 15#include <linux/errno.h>
18#include <linux/sched.h>
19#include <linux/wait.h> 16#include <linux/wait.h>
17#include <linux/ptrace.h>
20#include <linux/tracehook.h> 18#include <linux/tracehook.h>
21#include <linux/elf.h> 19#include <linux/unistd.h>
22#include <linux/smp.h> 20#include <linux/stddef.h>
23#include <linux/mm.h> 21#include <linux/personality.h>
22#include <linux/uaccess.h>
24 23
25#include <asm/processor.h> 24#include <asm/processor.h>
26#include <asm/ucontext.h> 25#include <asm/ucontext.h>
27#include <asm/uaccess.h>
28#include <asm/i387.h> 26#include <asm/i387.h>
29#include <asm/vdso.h> 27#include <asm/vdso.h>
28
29#ifdef CONFIG_X86_64
30#include <asm/proto.h>
31#include <asm/ia32_unistd.h>
32#include <asm/mce.h>
33#endif /* CONFIG_X86_64 */
34
30#include <asm/syscall.h> 35#include <asm/syscall.h>
31#include <asm/syscalls.h> 36#include <asm/syscalls.h>
32 37
33#include "sigframe.h" 38#include <asm/sigframe.h>
34 39
35#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) 40#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
36 41
@@ -45,74 +50,6 @@
45# define FIX_EFLAGS __FIX_EFLAGS 50# define FIX_EFLAGS __FIX_EFLAGS
46#endif 51#endif
47 52
48/*
49 * Atomically swap in the new signal mask, and wait for a signal.
50 */
51asmlinkage int
52sys_sigsuspend(int history0, int history1, old_sigset_t mask)
53{
54 mask &= _BLOCKABLE;
55 spin_lock_irq(&current->sighand->siglock);
56 current->saved_sigmask = current->blocked;
57 siginitset(&current->blocked, mask);
58 recalc_sigpending();
59 spin_unlock_irq(&current->sighand->siglock);
60
61 current->state = TASK_INTERRUPTIBLE;
62 schedule();
63 set_restore_sigmask();
64
65 return -ERESTARTNOHAND;
66}
67
68asmlinkage int
69sys_sigaction(int sig, const struct old_sigaction __user *act,
70 struct old_sigaction __user *oact)
71{
72 struct k_sigaction new_ka, old_ka;
73 int ret;
74
75 if (act) {
76 old_sigset_t mask;
77
78 if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
79 __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
80 __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
81 return -EFAULT;
82
83 __get_user(new_ka.sa.sa_flags, &act->sa_flags);
84 __get_user(mask, &act->sa_mask);
85 siginitset(&new_ka.sa.sa_mask, mask);
86 }
87
88 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
89
90 if (!ret && oact) {
91 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
92 __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
93 __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
94 return -EFAULT;
95
96 __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
97 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
98 }
99
100 return ret;
101}
102
103asmlinkage int sys_sigaltstack(unsigned long bx)
104{
105 /*
106 * This is needed to make gcc realize it doesn't own the
107 * "struct pt_regs"
108 */
109 struct pt_regs *regs = (struct pt_regs *)&bx;
110 const stack_t __user *uss = (const stack_t __user *)bx;
111 stack_t __user *uoss = (stack_t __user *)regs->cx;
112
113 return do_sigaltstack(uss, uoss, regs->sp);
114}
115
116#define COPY(x) { \ 53#define COPY(x) { \
117 err |= __get_user(regs->x, &sc->x); \ 54 err |= __get_user(regs->x, &sc->x); \
118} 55}
@@ -123,7 +60,7 @@ asmlinkage int sys_sigaltstack(unsigned long bx)
123 regs->seg = tmp; \ 60 regs->seg = tmp; \
124} 61}
125 62
126#define COPY_SEG_STRICT(seg) { \ 63#define COPY_SEG_CPL3(seg) { \
127 unsigned short tmp; \ 64 unsigned short tmp; \
128 err |= __get_user(tmp, &sc->seg); \ 65 err |= __get_user(tmp, &sc->seg); \
129 regs->seg = tmp | 3; \ 66 regs->seg = tmp | 3; \
@@ -135,9 +72,6 @@ asmlinkage int sys_sigaltstack(unsigned long bx)
135 loadsegment(seg, tmp); \ 72 loadsegment(seg, tmp); \
136} 73}
137 74
138/*
139 * Do a signal return; undo the signal stack.
140 */
141static int 75static int
142restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, 76restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
143 unsigned long *pax) 77 unsigned long *pax)
@@ -149,14 +83,36 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
149 /* Always make any pending restarted system calls return -EINTR */ 83 /* Always make any pending restarted system calls return -EINTR */
150 current_thread_info()->restart_block.fn = do_no_restart_syscall; 84 current_thread_info()->restart_block.fn = do_no_restart_syscall;
151 85
86#ifdef CONFIG_X86_32
152 GET_SEG(gs); 87 GET_SEG(gs);
153 COPY_SEG(fs); 88 COPY_SEG(fs);
154 COPY_SEG(es); 89 COPY_SEG(es);
155 COPY_SEG(ds); 90 COPY_SEG(ds);
91#endif /* CONFIG_X86_32 */
92
156 COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); 93 COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
157 COPY(dx); COPY(cx); COPY(ip); 94 COPY(dx); COPY(cx); COPY(ip);
158 COPY_SEG_STRICT(cs); 95
159 COPY_SEG_STRICT(ss); 96#ifdef CONFIG_X86_64
97 COPY(r8);
98 COPY(r9);
99 COPY(r10);
100 COPY(r11);
101 COPY(r12);
102 COPY(r13);
103 COPY(r14);
104 COPY(r15);
105#endif /* CONFIG_X86_64 */
106
107#ifdef CONFIG_X86_32
108 COPY_SEG_CPL3(cs);
109 COPY_SEG_CPL3(ss);
110#else /* !CONFIG_X86_32 */
111 /* Kernel saves and restores only the CS segment register on signals,
112 * which is the bare minimum needed to allow mixed 32/64-bit code.
113 * App's signal handler can save/restore other segments if needed. */
114 COPY_SEG_CPL3(cs);
115#endif /* CONFIG_X86_32 */
160 116
161 err |= __get_user(tmpflags, &sc->flags); 117 err |= __get_user(tmpflags, &sc->flags);
162 regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); 118 regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
@@ -169,102 +125,24 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
169 return err; 125 return err;
170} 126}
171 127
172asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
173{
174 struct sigframe __user *frame;
175 struct pt_regs *regs;
176 unsigned long ax;
177 sigset_t set;
178
179 regs = (struct pt_regs *) &__unused;
180 frame = (struct sigframe __user *)(regs->sp - 8);
181
182 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
183 goto badframe;
184 if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1
185 && __copy_from_user(&set.sig[1], &frame->extramask,
186 sizeof(frame->extramask))))
187 goto badframe;
188
189 sigdelsetmask(&set, ~_BLOCKABLE);
190 spin_lock_irq(&current->sighand->siglock);
191 current->blocked = set;
192 recalc_sigpending();
193 spin_unlock_irq(&current->sighand->siglock);
194
195 if (restore_sigcontext(regs, &frame->sc, &ax))
196 goto badframe;
197 return ax;
198
199badframe:
200 if (show_unhandled_signals && printk_ratelimit()) {
201 printk("%s%s[%d] bad frame in sigreturn frame:"
202 "%p ip:%lx sp:%lx oeax:%lx",
203 task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
204 current->comm, task_pid_nr(current), frame, regs->ip,
205 regs->sp, regs->orig_ax);
206 print_vma_addr(" in ", regs->ip);
207 printk(KERN_CONT "\n");
208 }
209
210 force_sig(SIGSEGV, current);
211
212 return 0;
213}
214
215static long do_rt_sigreturn(struct pt_regs *regs)
216{
217 struct rt_sigframe __user *frame;
218 unsigned long ax;
219 sigset_t set;
220
221 frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
222 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
223 goto badframe;
224 if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
225 goto badframe;
226
227 sigdelsetmask(&set, ~_BLOCKABLE);
228 spin_lock_irq(&current->sighand->siglock);
229 current->blocked = set;
230 recalc_sigpending();
231 spin_unlock_irq(&current->sighand->siglock);
232
233 if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
234 goto badframe;
235
236 if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
237 goto badframe;
238
239 return ax;
240
241badframe:
242 signal_fault(regs, frame, "rt_sigreturn");
243 return 0;
244}
245
246asmlinkage int sys_rt_sigreturn(unsigned long __unused)
247{
248 struct pt_regs *regs = (struct pt_regs *)&__unused;
249
250 return do_rt_sigreturn(regs);
251}
252
253/*
254 * Set up a signal frame.
255 */
256static int 128static int
257setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, 129setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
258 struct pt_regs *regs, unsigned long mask) 130 struct pt_regs *regs, unsigned long mask)
259{ 131{
260 int tmp, err = 0; 132 int err = 0;
261 133
262 err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); 134#ifdef CONFIG_X86_32
263 savesegment(gs, tmp); 135 {
264 err |= __put_user(tmp, (unsigned int __user *)&sc->gs); 136 unsigned int tmp;
265 137
138 savesegment(gs, tmp);
139 err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
140 }
141 err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
266 err |= __put_user(regs->es, (unsigned int __user *)&sc->es); 142 err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
267 err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds); 143 err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
144#endif /* CONFIG_X86_32 */
145
268 err |= __put_user(regs->di, &sc->di); 146 err |= __put_user(regs->di, &sc->di);
269 err |= __put_user(regs->si, &sc->si); 147 err |= __put_user(regs->si, &sc->si);
270 err |= __put_user(regs->bp, &sc->bp); 148 err |= __put_user(regs->bp, &sc->bp);
@@ -273,19 +151,33 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
273 err |= __put_user(regs->dx, &sc->dx); 151 err |= __put_user(regs->dx, &sc->dx);
274 err |= __put_user(regs->cx, &sc->cx); 152 err |= __put_user(regs->cx, &sc->cx);
275 err |= __put_user(regs->ax, &sc->ax); 153 err |= __put_user(regs->ax, &sc->ax);
154#ifdef CONFIG_X86_64
155 err |= __put_user(regs->r8, &sc->r8);
156 err |= __put_user(regs->r9, &sc->r9);
157 err |= __put_user(regs->r10, &sc->r10);
158 err |= __put_user(regs->r11, &sc->r11);
159 err |= __put_user(regs->r12, &sc->r12);
160 err |= __put_user(regs->r13, &sc->r13);
161 err |= __put_user(regs->r14, &sc->r14);
162 err |= __put_user(regs->r15, &sc->r15);
163#endif /* CONFIG_X86_64 */
164
276 err |= __put_user(current->thread.trap_no, &sc->trapno); 165 err |= __put_user(current->thread.trap_no, &sc->trapno);
277 err |= __put_user(current->thread.error_code, &sc->err); 166 err |= __put_user(current->thread.error_code, &sc->err);
278 err |= __put_user(regs->ip, &sc->ip); 167 err |= __put_user(regs->ip, &sc->ip);
168#ifdef CONFIG_X86_32
279 err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); 169 err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
280 err |= __put_user(regs->flags, &sc->flags); 170 err |= __put_user(regs->flags, &sc->flags);
281 err |= __put_user(regs->sp, &sc->sp_at_signal); 171 err |= __put_user(regs->sp, &sc->sp_at_signal);
282 err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); 172 err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
173#else /* !CONFIG_X86_32 */
174 err |= __put_user(regs->flags, &sc->flags);
175 err |= __put_user(regs->cs, &sc->cs);
176 err |= __put_user(0, &sc->gs);
177 err |= __put_user(0, &sc->fs);
178#endif /* CONFIG_X86_32 */
283 179
284 tmp = save_i387_xstate(fpstate); 180 err |= __put_user(fpstate, &sc->fpstate);
285 if (tmp < 0)
286 err = 1;
287 else
288 err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
289 181
290 /* non-iBCS2 extensions.. */ 182 /* non-iBCS2 extensions.. */
291 err |= __put_user(mask, &sc->oldmask); 183 err |= __put_user(mask, &sc->oldmask);
@@ -295,6 +187,32 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
295} 187}
296 188
297/* 189/*
190 * Set up a signal frame.
191 */
192#ifdef CONFIG_X86_32
193static const struct {
194 u16 poplmovl;
195 u32 val;
196 u16 int80;
197} __attribute__((packed)) retcode = {
198 0xb858, /* popl %eax; movl $..., %eax */
199 __NR_sigreturn,
200 0x80cd, /* int $0x80 */
201};
202
203static const struct {
204 u8 movl;
205 u32 val;
206 u16 int80;
207 u8 pad;
208} __attribute__((packed)) rt_retcode = {
209 0xb8, /* movl $..., %eax */
210 __NR_rt_sigreturn,
211 0x80cd, /* int $0x80 */
212 0
213};
214
215/*
298 * Determine which stack to use.. 216 * Determine which stack to use..
299 */ 217 */
300static inline void __user * 218static inline void __user *
@@ -328,6 +246,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
328 if (used_math()) { 246 if (used_math()) {
329 sp = sp - sig_xstate_size; 247 sp = sp - sig_xstate_size;
330 *fpstate = (struct _fpstate *) sp; 248 *fpstate = (struct _fpstate *) sp;
249 if (save_i387_xstate(*fpstate) < 0)
250 return (void __user *)-1L;
331 } 251 }
332 252
333 sp -= frame_size; 253 sp -= frame_size;
@@ -383,9 +303,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
383 * reasons and because gdb uses it as a signature to notice 303 * reasons and because gdb uses it as a signature to notice
384 * signal handler stack frames. 304 * signal handler stack frames.
385 */ 305 */
386 err |= __put_user(0xb858, (short __user *)(frame->retcode+0)); 306 err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode);
387 err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2));
388 err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
389 307
390 if (err) 308 if (err)
391 return -EFAULT; 309 return -EFAULT;
@@ -454,9 +372,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
454 * reasons and because gdb uses it as a signature to notice 372 * reasons and because gdb uses it as a signature to notice
455 * signal handler stack frames. 373 * signal handler stack frames.
456 */ 374 */
457 err |= __put_user(0xb8, (char __user *)(frame->retcode+0)); 375 err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
458 err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1));
459 err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
460 376
461 if (err) 377 if (err)
462 return -EFAULT; 378 return -EFAULT;
@@ -475,23 +391,293 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
475 391
476 return 0; 392 return 0;
477} 393}
394#else /* !CONFIG_X86_32 */
395/*
396 * Determine which stack to use..
397 */
398static void __user *
399get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size)
400{
401 /* Default to using normal stack - redzone*/
402 sp -= 128;
403
404 /* This is the X/Open sanctioned signal stack switching. */
405 if (ka->sa.sa_flags & SA_ONSTACK) {
406 if (sas_ss_flags(sp) == 0)
407 sp = current->sas_ss_sp + current->sas_ss_size;
408 }
409
410 return (void __user *)round_down(sp - size, 64);
411}
412
413static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
414 sigset_t *set, struct pt_regs *regs)
415{
416 struct rt_sigframe __user *frame;
417 void __user *fp = NULL;
418 int err = 0;
419 struct task_struct *me = current;
420
421 if (used_math()) {
422 fp = get_stack(ka, regs->sp, sig_xstate_size);
423 frame = (void __user *)round_down(
424 (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
425
426 if (save_i387_xstate(fp) < 0)
427 return -EFAULT;
428 } else
429 frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8;
430
431 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
432 return -EFAULT;
433
434 if (ka->sa.sa_flags & SA_SIGINFO) {
435 if (copy_siginfo_to_user(&frame->info, info))
436 return -EFAULT;
437 }
438
439 /* Create the ucontext. */
440 if (cpu_has_xsave)
441 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
442 else
443 err |= __put_user(0, &frame->uc.uc_flags);
444 err |= __put_user(0, &frame->uc.uc_link);
445 err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
446 err |= __put_user(sas_ss_flags(regs->sp),
447 &frame->uc.uc_stack.ss_flags);
448 err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
449 err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
450 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
451
452 /* Set up to return from userspace. If provided, use a stub
453 already in userspace. */
454 /* x86-64 should always use SA_RESTORER. */
455 if (ka->sa.sa_flags & SA_RESTORER) {
456 err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
457 } else {
458 /* could use a vstub here */
459 return -EFAULT;
460 }
461
462 if (err)
463 return -EFAULT;
464
465 /* Set up registers for signal handler */
466 regs->di = sig;
467 /* In case the signal handler was declared without prototypes */
468 regs->ax = 0;
469
470 /* This also works for non SA_SIGINFO handlers because they expect the
471 next argument after the signal number on the stack. */
472 regs->si = (unsigned long)&frame->info;
473 regs->dx = (unsigned long)&frame->uc;
474 regs->ip = (unsigned long) ka->sa.sa_handler;
475
476 regs->sp = (unsigned long)frame;
477
478 /* Set up the CS register to run signal handlers in 64-bit mode,
479 even if the handler happens to be interrupting 32-bit code. */
480 regs->cs = __USER_CS;
481
482 return 0;
483}
484#endif /* CONFIG_X86_32 */
485
486#ifdef CONFIG_X86_32
487/*
488 * Atomically swap in the new signal mask, and wait for a signal.
489 */
490asmlinkage int
491sys_sigsuspend(int history0, int history1, old_sigset_t mask)
492{
493 mask &= _BLOCKABLE;
494 spin_lock_irq(&current->sighand->siglock);
495 current->saved_sigmask = current->blocked;
496 siginitset(&current->blocked, mask);
497 recalc_sigpending();
498 spin_unlock_irq(&current->sighand->siglock);
499
500 current->state = TASK_INTERRUPTIBLE;
501 schedule();
502 set_restore_sigmask();
503
504 return -ERESTARTNOHAND;
505}
506
507asmlinkage int
508sys_sigaction(int sig, const struct old_sigaction __user *act,
509 struct old_sigaction __user *oact)
510{
511 struct k_sigaction new_ka, old_ka;
512 int ret;
513
514 if (act) {
515 old_sigset_t mask;
516
517 if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
518 __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
519 __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
520 return -EFAULT;
521
522 __get_user(new_ka.sa.sa_flags, &act->sa_flags);
523 __get_user(mask, &act->sa_mask);
524 siginitset(&new_ka.sa.sa_mask, mask);
525 }
526
527 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
528
529 if (!ret && oact) {
530 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
531 __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
532 __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
533 return -EFAULT;
534
535 __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
536 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
537 }
538
539 return ret;
540}
541#endif /* CONFIG_X86_32 */
542
543#ifdef CONFIG_X86_32
544asmlinkage int sys_sigaltstack(unsigned long bx)
545{
546 /*
547 * This is needed to make gcc realize it doesn't own the
548 * "struct pt_regs"
549 */
550 struct pt_regs *regs = (struct pt_regs *)&bx;
551 const stack_t __user *uss = (const stack_t __user *)bx;
552 stack_t __user *uoss = (stack_t __user *)regs->cx;
553
554 return do_sigaltstack(uss, uoss, regs->sp);
555}
556#else /* !CONFIG_X86_32 */
557asmlinkage long
558sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
559 struct pt_regs *regs)
560{
561 return do_sigaltstack(uss, uoss, regs->sp);
562}
563#endif /* CONFIG_X86_32 */
564
565/*
566 * Do a signal return; undo the signal stack.
567 */
568#ifdef CONFIG_X86_32
569asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
570{
571 struct sigframe __user *frame;
572 struct pt_regs *regs;
573 unsigned long ax;
574 sigset_t set;
575
576 regs = (struct pt_regs *) &__unused;
577 frame = (struct sigframe __user *)(regs->sp - 8);
578
579 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
580 goto badframe;
581 if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1
582 && __copy_from_user(&set.sig[1], &frame->extramask,
583 sizeof(frame->extramask))))
584 goto badframe;
585
586 sigdelsetmask(&set, ~_BLOCKABLE);
587 spin_lock_irq(&current->sighand->siglock);
588 current->blocked = set;
589 recalc_sigpending();
590 spin_unlock_irq(&current->sighand->siglock);
591
592 if (restore_sigcontext(regs, &frame->sc, &ax))
593 goto badframe;
594 return ax;
595
596badframe:
597 signal_fault(regs, frame, "sigreturn");
598
599 return 0;
600}
601#endif /* CONFIG_X86_32 */
602
603static long do_rt_sigreturn(struct pt_regs *regs)
604{
605 struct rt_sigframe __user *frame;
606 unsigned long ax;
607 sigset_t set;
608
609 frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
610 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
611 goto badframe;
612 if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
613 goto badframe;
614
615 sigdelsetmask(&set, ~_BLOCKABLE);
616 spin_lock_irq(&current->sighand->siglock);
617 current->blocked = set;
618 recalc_sigpending();
619 spin_unlock_irq(&current->sighand->siglock);
620
621 if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
622 goto badframe;
623
624 if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
625 goto badframe;
626
627 return ax;
628
629badframe:
630 signal_fault(regs, frame, "rt_sigreturn");
631 return 0;
632}
633
634#ifdef CONFIG_X86_32
635asmlinkage int sys_rt_sigreturn(struct pt_regs regs)
636{
637 return do_rt_sigreturn(&regs);
638}
639#else /* !CONFIG_X86_32 */
640asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
641{
642 return do_rt_sigreturn(regs);
643}
644#endif /* CONFIG_X86_32 */
478 645
479/* 646/*
480 * OK, we're invoking a handler: 647 * OK, we're invoking a handler:
481 */ 648 */
482static int signr_convert(int sig) 649static int signr_convert(int sig)
483{ 650{
651#ifdef CONFIG_X86_32
484 struct thread_info *info = current_thread_info(); 652 struct thread_info *info = current_thread_info();
485 653
486 if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32) 654 if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32)
487 return info->exec_domain->signal_invmap[sig]; 655 return info->exec_domain->signal_invmap[sig];
656#endif /* CONFIG_X86_32 */
488 return sig; 657 return sig;
489} 658}
490 659
660#ifdef CONFIG_X86_32
661
491#define is_ia32 1 662#define is_ia32 1
492#define ia32_setup_frame __setup_frame 663#define ia32_setup_frame __setup_frame
493#define ia32_setup_rt_frame __setup_rt_frame 664#define ia32_setup_rt_frame __setup_rt_frame
494 665
666#else /* !CONFIG_X86_32 */
667
668#ifdef CONFIG_IA32_EMULATION
669#define is_ia32 test_thread_flag(TIF_IA32)
670#else /* !CONFIG_IA32_EMULATION */
671#define is_ia32 0
672#endif /* CONFIG_IA32_EMULATION */
673
674int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
675 sigset_t *set, struct pt_regs *regs);
676int ia32_setup_frame(int sig, struct k_sigaction *ka,
677 sigset_t *set, struct pt_regs *regs);
678
679#endif /* CONFIG_X86_32 */
680
495static int 681static int
496setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 682setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
497 sigset_t *set, struct pt_regs *regs) 683 sigset_t *set, struct pt_regs *regs)
@@ -592,7 +778,13 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
592 return 0; 778 return 0;
593} 779}
594 780
781#ifdef CONFIG_X86_32
595#define NR_restart_syscall __NR_restart_syscall 782#define NR_restart_syscall __NR_restart_syscall
783#else /* !CONFIG_X86_32 */
784#define NR_restart_syscall \
785 test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
786#endif /* CONFIG_X86_32 */
787
596/* 788/*
597 * Note that 'init' is a special process: it doesn't get signals it doesn't 789 * Note that 'init' is a special process: it doesn't get signals it doesn't
598 * want to handle. Thus you cannot kill init even with a SIGKILL even by 790 * want to handle. Thus you cannot kill init even with a SIGKILL even by
@@ -704,8 +896,9 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
704 struct task_struct *me = current; 896 struct task_struct *me = current;
705 897
706 if (show_unhandled_signals && printk_ratelimit()) { 898 if (show_unhandled_signals && printk_ratelimit()) {
707 printk(KERN_INFO 899 printk("%s"
708 "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", 900 "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
901 task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
709 me->comm, me->pid, where, frame, 902 me->comm, me->pid, where, frame,
710 regs->ip, regs->sp, regs->orig_ax); 903 regs->ip, regs->sp, regs->orig_ax);
711 print_vma_addr(" in ", regs->ip); 904 print_vma_addr(" in ", regs->ip);
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
deleted file mode 100644
index a5c9627f4db9..000000000000
--- a/arch/x86/kernel/signal_64.c
+++ /dev/null
@@ -1,516 +0,0 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
4 *
5 * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
6 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
7 * 2000-2002 x86-64 support by Andi Kleen
8 */
9
10#include <linux/sched.h>
11#include <linux/mm.h>
12#include <linux/smp.h>
13#include <linux/kernel.h>
14#include <linux/signal.h>
15#include <linux/errno.h>
16#include <linux/wait.h>
17#include <linux/ptrace.h>
18#include <linux/tracehook.h>
19#include <linux/unistd.h>
20#include <linux/stddef.h>
21#include <linux/personality.h>
22#include <linux/compiler.h>
23#include <linux/uaccess.h>
24
25#include <asm/processor.h>
26#include <asm/ucontext.h>
27#include <asm/i387.h>
28#include <asm/proto.h>
29#include <asm/ia32_unistd.h>
30#include <asm/mce.h>
31#include <asm/syscall.h>
32#include <asm/syscalls.h>
33#include "sigframe.h"
34
35#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
36
37#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
38 X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
39 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
40 X86_EFLAGS_CF)
41
42#ifdef CONFIG_X86_32
43# define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF)
44#else
45# define FIX_EFLAGS __FIX_EFLAGS
46#endif
47
48asmlinkage long
49sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
50 struct pt_regs *regs)
51{
52 return do_sigaltstack(uss, uoss, regs->sp);
53}
54
55#define COPY(x) { \
56 err |= __get_user(regs->x, &sc->x); \
57}
58
59#define COPY_SEG_STRICT(seg) { \
60 unsigned short tmp; \
61 err |= __get_user(tmp, &sc->seg); \
62 regs->seg = tmp | 3; \
63}
64
65/*
66 * Do a signal return; undo the signal stack.
67 */
68static int
69restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
70 unsigned long *pax)
71{
72 void __user *buf;
73 unsigned int tmpflags;
74 unsigned int err = 0;
75
76 /* Always make any pending restarted system calls return -EINTR */
77 current_thread_info()->restart_block.fn = do_no_restart_syscall;
78
79 COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
80 COPY(dx); COPY(cx); COPY(ip);
81 COPY(r8);
82 COPY(r9);
83 COPY(r10);
84 COPY(r11);
85 COPY(r12);
86 COPY(r13);
87 COPY(r14);
88 COPY(r15);
89
90 /* Kernel saves and restores only the CS segment register on signals,
91 * which is the bare minimum needed to allow mixed 32/64-bit code.
92 * App's signal handler can save/restore other segments if needed. */
93 COPY_SEG_STRICT(cs);
94
95 err |= __get_user(tmpflags, &sc->flags);
96 regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
97 regs->orig_ax = -1; /* disable syscall checks */
98
99 err |= __get_user(buf, &sc->fpstate);
100 err |= restore_i387_xstate(buf);
101
102 err |= __get_user(*pax, &sc->ax);
103 return err;
104}
105
106static long do_rt_sigreturn(struct pt_regs *regs)
107{
108 struct rt_sigframe __user *frame;
109 unsigned long ax;
110 sigset_t set;
111
112 frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
113 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
114 goto badframe;
115 if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
116 goto badframe;
117
118 sigdelsetmask(&set, ~_BLOCKABLE);
119 spin_lock_irq(&current->sighand->siglock);
120 current->blocked = set;
121 recalc_sigpending();
122 spin_unlock_irq(&current->sighand->siglock);
123
124 if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
125 goto badframe;
126
127 if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
128 goto badframe;
129
130 return ax;
131
132badframe:
133 signal_fault(regs, frame, "rt_sigreturn");
134 return 0;
135}
136
137asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
138{
139 return do_rt_sigreturn(regs);
140}
141
142/*
143 * Set up a signal frame.
144 */
145
146static inline int
147setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
148 unsigned long mask, struct task_struct *me)
149{
150 int err = 0;
151
152 err |= __put_user(regs->cs, &sc->cs);
153 err |= __put_user(0, &sc->gs);
154 err |= __put_user(0, &sc->fs);
155
156 err |= __put_user(regs->di, &sc->di);
157 err |= __put_user(regs->si, &sc->si);
158 err |= __put_user(regs->bp, &sc->bp);
159 err |= __put_user(regs->sp, &sc->sp);
160 err |= __put_user(regs->bx, &sc->bx);
161 err |= __put_user(regs->dx, &sc->dx);
162 err |= __put_user(regs->cx, &sc->cx);
163 err |= __put_user(regs->ax, &sc->ax);
164 err |= __put_user(regs->r8, &sc->r8);
165 err |= __put_user(regs->r9, &sc->r9);
166 err |= __put_user(regs->r10, &sc->r10);
167 err |= __put_user(regs->r11, &sc->r11);
168 err |= __put_user(regs->r12, &sc->r12);
169 err |= __put_user(regs->r13, &sc->r13);
170 err |= __put_user(regs->r14, &sc->r14);
171 err |= __put_user(regs->r15, &sc->r15);
172 err |= __put_user(me->thread.trap_no, &sc->trapno);
173 err |= __put_user(me->thread.error_code, &sc->err);
174 err |= __put_user(regs->ip, &sc->ip);
175 err |= __put_user(regs->flags, &sc->flags);
176 err |= __put_user(mask, &sc->oldmask);
177 err |= __put_user(me->thread.cr2, &sc->cr2);
178
179 return err;
180}
181
182/*
183 * Determine which stack to use..
184 */
185
186static void __user *
187get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
188{
189 unsigned long sp;
190
191 /* Default to using normal stack - redzone*/
192 sp = regs->sp - 128;
193
194 /* This is the X/Open sanctioned signal stack switching. */
195 if (ka->sa.sa_flags & SA_ONSTACK) {
196 if (sas_ss_flags(sp) == 0)
197 sp = current->sas_ss_sp + current->sas_ss_size;
198 }
199
200 return (void __user *)round_down(sp - size, 64);
201}
202
203static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
204 sigset_t *set, struct pt_regs *regs)
205{
206 struct rt_sigframe __user *frame;
207 void __user *fp = NULL;
208 int err = 0;
209 struct task_struct *me = current;
210
211 if (used_math()) {
212 fp = get_stack(ka, regs, sig_xstate_size);
213 frame = (void __user *)round_down(
214 (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
215
216 if (save_i387_xstate(fp) < 0)
217 return -EFAULT;
218 } else
219 frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
220
221 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
222 return -EFAULT;
223
224 if (ka->sa.sa_flags & SA_SIGINFO) {
225 if (copy_siginfo_to_user(&frame->info, info))
226 return -EFAULT;
227 }
228
229 /* Create the ucontext. */
230 if (cpu_has_xsave)
231 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
232 else
233 err |= __put_user(0, &frame->uc.uc_flags);
234 err |= __put_user(0, &frame->uc.uc_link);
235 err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
236 err |= __put_user(sas_ss_flags(regs->sp),
237 &frame->uc.uc_stack.ss_flags);
238 err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
239 err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
240 err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
241 if (sizeof(*set) == 16) {
242 __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
243 __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
244 } else
245 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
246
247 /* Set up to return from userspace. If provided, use a stub
248 already in userspace. */
249 /* x86-64 should always use SA_RESTORER. */
250 if (ka->sa.sa_flags & SA_RESTORER) {
251 err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
252 } else {
253 /* could use a vstub here */
254 return -EFAULT;
255 }
256
257 if (err)
258 return -EFAULT;
259
260 /* Set up registers for signal handler */
261 regs->di = sig;
262 /* In case the signal handler was declared without prototypes */
263 regs->ax = 0;
264
265 /* This also works for non SA_SIGINFO handlers because they expect the
266 next argument after the signal number on the stack. */
267 regs->si = (unsigned long)&frame->info;
268 regs->dx = (unsigned long)&frame->uc;
269 regs->ip = (unsigned long) ka->sa.sa_handler;
270
271 regs->sp = (unsigned long)frame;
272
273 /* Set up the CS register to run signal handlers in 64-bit mode,
274 even if the handler happens to be interrupting 32-bit code. */
275 regs->cs = __USER_CS;
276
277 return 0;
278}
279
280/*
281 * OK, we're invoking a handler
282 */
283static int signr_convert(int sig)
284{
285 return sig;
286}
287
288#ifdef CONFIG_IA32_EMULATION
289#define is_ia32 test_thread_flag(TIF_IA32)
290#else
291#define is_ia32 0
292#endif
293
294static int
295setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
296 sigset_t *set, struct pt_regs *regs)
297{
298 int usig = signr_convert(sig);
299 int ret;
300
301 /* Set up the stack frame */
302 if (is_ia32) {
303 if (ka->sa.sa_flags & SA_SIGINFO)
304 ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
305 else
306 ret = ia32_setup_frame(usig, ka, set, regs);
307 } else
308 ret = __setup_rt_frame(sig, ka, info, set, regs);
309
310 if (ret) {
311 force_sigsegv(sig, current);
312 return -EFAULT;
313 }
314
315 return ret;
316}
317
318static int
319handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
320 sigset_t *oldset, struct pt_regs *regs)
321{
322 int ret;
323
324 /* Are we from a system call? */
325 if (syscall_get_nr(current, regs) >= 0) {
326 /* If so, check system call restarting.. */
327 switch (syscall_get_error(current, regs)) {
328 case -ERESTART_RESTARTBLOCK:
329 case -ERESTARTNOHAND:
330 regs->ax = -EINTR;
331 break;
332
333 case -ERESTARTSYS:
334 if (!(ka->sa.sa_flags & SA_RESTART)) {
335 regs->ax = -EINTR;
336 break;
337 }
338 /* fallthrough */
339 case -ERESTARTNOINTR:
340 regs->ax = regs->orig_ax;
341 regs->ip -= 2;
342 break;
343 }
344 }
345
346 /*
347 * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF
348 * flag so that register information in the sigcontext is correct.
349 */
350 if (unlikely(regs->flags & X86_EFLAGS_TF) &&
351 likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
352 regs->flags &= ~X86_EFLAGS_TF;
353
354 ret = setup_rt_frame(sig, ka, info, oldset, regs);
355
356 if (ret)
357 return ret;
358
359#ifdef CONFIG_X86_64
360 /*
361 * This has nothing to do with segment registers,
362 * despite the name. This magic affects uaccess.h
363 * macros' behavior. Reset it to the normal setting.
364 */
365 set_fs(USER_DS);
366#endif
367
368 /*
369 * Clear the direction flag as per the ABI for function entry.
370 */
371 regs->flags &= ~X86_EFLAGS_DF;
372
373 /*
374 * Clear TF when entering the signal handler, but
375 * notify any tracer that was single-stepping it.
376 * The tracer may want to single-step inside the
377 * handler too.
378 */
379 regs->flags &= ~X86_EFLAGS_TF;
380
381 spin_lock_irq(&current->sighand->siglock);
382 sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
383 if (!(ka->sa.sa_flags & SA_NODEFER))
384 sigaddset(&current->blocked, sig);
385 recalc_sigpending();
386 spin_unlock_irq(&current->sighand->siglock);
387
388 tracehook_signal_handler(sig, info, ka, regs,
389 test_thread_flag(TIF_SINGLESTEP));
390
391 return 0;
392}
393
394#define NR_restart_syscall \
395 test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
396/*
397 * Note that 'init' is a special process: it doesn't get signals it doesn't
398 * want to handle. Thus you cannot kill init even with a SIGKILL even by
399 * mistake.
400 */
401static void do_signal(struct pt_regs *regs)
402{
403 struct k_sigaction ka;
404 siginfo_t info;
405 int signr;
406 sigset_t *oldset;
407
408 /*
409 * We want the common case to go fast, which is why we may in certain
410 * cases get here from kernel mode. Just return without doing anything
411 * if so.
412 * X86_32: vm86 regs switched out by assembly code before reaching
413 * here, so testing against kernel CS suffices.
414 */
415 if (!user_mode(regs))
416 return;
417
418 if (current_thread_info()->status & TS_RESTORE_SIGMASK)
419 oldset = &current->saved_sigmask;
420 else
421 oldset = &current->blocked;
422
423 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
424 if (signr > 0) {
425 /*
426 * Re-enable any watchpoints before delivering the
427 * signal to user space. The processor register will
428 * have been cleared if the watchpoint triggered
429 * inside the kernel.
430 */
431 if (current->thread.debugreg7)
432 set_debugreg(current->thread.debugreg7, 7);
433
434 /* Whee! Actually deliver the signal. */
435 if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
436 /*
437 * A signal was successfully delivered; the saved
438 * sigmask will have been stored in the signal frame,
439 * and will be restored by sigreturn, so we can simply
440 * clear the TS_RESTORE_SIGMASK flag.
441 */
442 current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
443 }
444 return;
445 }
446
447 /* Did we come from a system call? */
448 if (syscall_get_nr(current, regs) >= 0) {
449 /* Restart the system call - no handlers present */
450 switch (syscall_get_error(current, regs)) {
451 case -ERESTARTNOHAND:
452 case -ERESTARTSYS:
453 case -ERESTARTNOINTR:
454 regs->ax = regs->orig_ax;
455 regs->ip -= 2;
456 break;
457
458 case -ERESTART_RESTARTBLOCK:
459 regs->ax = NR_restart_syscall;
460 regs->ip -= 2;
461 break;
462 }
463 }
464
465 /*
466 * If there's no signal to deliver, we just put the saved sigmask
467 * back.
468 */
469 if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
470 current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
471 sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
472 }
473}
474
475/*
476 * notification of userspace execution resumption
477 * - triggered by the TIF_WORK_MASK flags
478 */
479void
480do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
481{
482#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
483 /* notify userspace of pending MCEs */
484 if (thread_info_flags & _TIF_MCE_NOTIFY)
485 mce_notify_user();
486#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
487
488 /* deal with pending signal delivery */
489 if (thread_info_flags & _TIF_SIGPENDING)
490 do_signal(regs);
491
492 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
493 clear_thread_flag(TIF_NOTIFY_RESUME);
494 tracehook_notify_resume(regs);
495 }
496
497#ifdef CONFIG_X86_32
498 clear_thread_flag(TIF_IRET);
499#endif /* CONFIG_X86_32 */
500}
501
502void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
503{
504 struct task_struct *me = current;
505
506 if (show_unhandled_signals && printk_ratelimit()) {
507 printk(KERN_INFO
508 "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
509 me->comm, me->pid, where, frame,
510 regs->ip, regs->sp, regs->orig_ax);
511 print_vma_addr(" in ", regs->ip);
512 printk(KERN_CONT "\n");
513 }
514
515 force_sig(SIGSEGV, me);
516}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 18f9b19f5f8f..7e558db362c1 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -140,19 +140,6 @@ void native_send_call_func_ipi(cpumask_t mask)
140 send_IPI_mask(mask, CALL_FUNCTION_VECTOR); 140 send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
141} 141}
142 142
143static void stop_this_cpu(void *dummy)
144{
145 local_irq_disable();
146 /*
147 * Remove this CPU:
148 */
149 cpu_clear(smp_processor_id(), cpu_online_map);
150 disable_local_APIC();
151 if (hlt_works(smp_processor_id()))
152 for (;;) halt();
153 for (;;);
154}
155
156/* 143/*
157 * this function calls the 'stop' function on all other CPUs in the system. 144 * this function calls the 'stop' function on all other CPUs in the system.
158 */ 145 */
@@ -178,11 +165,7 @@ static void native_smp_send_stop(void)
178void smp_reschedule_interrupt(struct pt_regs *regs) 165void smp_reschedule_interrupt(struct pt_regs *regs)
179{ 166{
180 ack_APIC_irq(); 167 ack_APIC_irq();
181#ifdef CONFIG_X86_32 168 inc_irq_stat(irq_resched_count);
182 __get_cpu_var(irq_stat).irq_resched_count++;
183#else
184 add_pda(irq_resched_count, 1);
185#endif
186} 169}
187 170
188void smp_call_function_interrupt(struct pt_regs *regs) 171void smp_call_function_interrupt(struct pt_regs *regs)
@@ -190,11 +173,7 @@ void smp_call_function_interrupt(struct pt_regs *regs)
190 ack_APIC_irq(); 173 ack_APIC_irq();
191 irq_enter(); 174 irq_enter();
192 generic_smp_call_function_interrupt(); 175 generic_smp_call_function_interrupt();
193#ifdef CONFIG_X86_32 176 inc_irq_stat(irq_call_count);
194 __get_cpu_var(irq_stat).irq_call_count++;
195#else
196 add_pda(irq_call_count, 1);
197#endif
198 irq_exit(); 177 irq_exit();
199} 178}
200 179
@@ -203,11 +182,7 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
203 ack_APIC_irq(); 182 ack_APIC_irq();
204 irq_enter(); 183 irq_enter();
205 generic_smp_call_function_single_interrupt(); 184 generic_smp_call_function_single_interrupt();
206#ifdef CONFIG_X86_32 185 inc_irq_stat(irq_call_count);
207 __get_cpu_var(irq_stat).irq_call_count++;
208#else
209 add_pda(irq_call_count, 1);
210#endif
211 irq_exit(); 186 irq_exit();
212} 187}
213 188
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f71f96fc9e62..7a430c4d1551 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,6 +62,7 @@
62#include <asm/mtrr.h> 62#include <asm/mtrr.h>
63#include <asm/vmi.h> 63#include <asm/vmi.h>
64#include <asm/genapic.h> 64#include <asm/genapic.h>
65#include <asm/setup.h>
65#include <linux/mc146818rtc.h> 66#include <linux/mc146818rtc.h>
66 67
67#include <mach_apic.h> 68#include <mach_apic.h>
@@ -534,7 +535,7 @@ static void impress_friends(void)
534 pr_debug("Before bogocount - setting activated=1.\n"); 535 pr_debug("Before bogocount - setting activated=1.\n");
535} 536}
536 537
537static inline void __inquire_remote_apic(int apicid) 538void __inquire_remote_apic(int apicid)
538{ 539{
539 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; 540 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
540 char *names[] = { "ID", "VERSION", "SPIV" }; 541 char *names[] = { "ID", "VERSION", "SPIV" };
@@ -573,14 +574,13 @@ static inline void __inquire_remote_apic(int apicid)
573 } 574 }
574} 575}
575 576
576#ifdef WAKE_SECONDARY_VIA_NMI
577/* 577/*
578 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal 578 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
579 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this 579 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
580 * won't ... remember to clear down the APIC, etc later. 580 * won't ... remember to clear down the APIC, etc later.
581 */ 581 */
582static int __devinit 582int __devinit
583wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) 583wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
584{ 584{
585 unsigned long send_status, accept_status = 0; 585 unsigned long send_status, accept_status = 0;
586 int maxlvt; 586 int maxlvt;
@@ -597,7 +597,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
597 * Give the other CPU some time to accept the IPI. 597 * Give the other CPU some time to accept the IPI.
598 */ 598 */
599 udelay(200); 599 udelay(200);
600 if (APIC_INTEGRATED(apic_version[phys_apicid])) { 600 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
601 maxlvt = lapic_get_maxlvt(); 601 maxlvt = lapic_get_maxlvt();
602 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 602 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
603 apic_write(APIC_ESR, 0); 603 apic_write(APIC_ESR, 0);
@@ -612,11 +612,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
612 612
613 return (send_status | accept_status); 613 return (send_status | accept_status);
614} 614}
615#endif /* WAKE_SECONDARY_VIA_NMI */
616 615
617#ifdef WAKE_SECONDARY_VIA_INIT 616int __devinit
618static int __devinit 617wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
619wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
620{ 618{
621 unsigned long send_status, accept_status = 0; 619 unsigned long send_status, accept_status = 0;
622 int maxlvt, num_starts, j; 620 int maxlvt, num_starts, j;
@@ -735,7 +733,6 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
735 733
736 return (send_status | accept_status); 734 return (send_status | accept_status);
737} 735}
738#endif /* WAKE_SECONDARY_VIA_INIT */
739 736
740struct create_idle { 737struct create_idle {
741 struct work_struct work; 738 struct work_struct work;
@@ -1084,8 +1081,10 @@ static int __init smp_sanity_check(unsigned max_cpus)
1084#endif 1081#endif
1085 1082
1086 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { 1083 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
1087 printk(KERN_WARNING "weird, boot CPU (#%d) not listed" 1084 printk(KERN_WARNING
1088 "by the BIOS.\n", hard_smp_processor_id()); 1085 "weird, boot CPU (#%d) not listed by the BIOS.\n",
1086 hard_smp_processor_id());
1087
1089 physid_set(hard_smp_processor_id(), phys_cpu_present_map); 1088 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1090 } 1089 }
1091 1090
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 77b400f06ea2..65309e4cb1c0 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -75,7 +75,7 @@ EXPORT_SYMBOL(profile_pc);
75irqreturn_t timer_interrupt(int irq, void *dev_id) 75irqreturn_t timer_interrupt(int irq, void *dev_id)
76{ 76{
77 /* Keep nmi watchdog up to date */ 77 /* Keep nmi watchdog up to date */
78 per_cpu(irq_stat, smp_processor_id()).irq0_irqs++; 78 inc_irq_stat(irq0_irqs);
79 79
80#ifdef CONFIG_X86_IO_APIC 80#ifdef CONFIG_X86_IO_APIC
81 if (timer_ack) { 81 if (timer_ack) {
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index cb19d650c216..891e7a7c4334 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -49,9 +49,9 @@ unsigned long profile_pc(struct pt_regs *regs)
49} 49}
50EXPORT_SYMBOL(profile_pc); 50EXPORT_SYMBOL(profile_pc);
51 51
52irqreturn_t timer_interrupt(int irq, void *dev_id) 52static irqreturn_t timer_interrupt(int irq, void *dev_id)
53{ 53{
54 add_pda(irq0_irqs, 1); 54 inc_irq_stat(irq0_irqs);
55 55
56 global_clock_event->event_handler(global_clock_event); 56 global_clock_event->event_handler(global_clock_event);
57 57
@@ -80,6 +80,8 @@ unsigned long __init calibrate_cpu(void)
80 break; 80 break;
81 no_ctr_free = (i == 4); 81 no_ctr_free = (i == 4);
82 if (no_ctr_free) { 82 if (no_ctr_free) {
83 WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... "
84 "cpu_khz value may be incorrect.\n");
83 i = 3; 85 i = 3;
84 rdmsrl(MSR_K7_EVNTSEL3, evntsel3); 86 rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
85 wrmsrl(MSR_K7_EVNTSEL3, 0); 87 wrmsrl(MSR_K7_EVNTSEL3, 0);
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index f4049f3513b6..8da059f949be 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -34,9 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock);
34 */ 34 */
35void leave_mm(int cpu) 35void leave_mm(int cpu)
36{ 36{
37 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) 37 BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK);
38 BUG(); 38 cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask);
39 cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
40 load_cr3(swapper_pg_dir); 39 load_cr3(swapper_pg_dir);
41} 40}
42EXPORT_SYMBOL_GPL(leave_mm); 41EXPORT_SYMBOL_GPL(leave_mm);
@@ -104,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
104 * BUG(); 103 * BUG();
105 */ 104 */
106 105
107 if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { 106 if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) {
108 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { 107 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) {
109 if (flush_va == TLB_FLUSH_ALL) 108 if (flush_va == TLB_FLUSH_ALL)
110 local_flush_tlb(); 109 local_flush_tlb();
111 else 110 else
@@ -119,7 +118,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
119 smp_mb__after_clear_bit(); 118 smp_mb__after_clear_bit();
120out: 119out:
121 put_cpu_no_resched(); 120 put_cpu_no_resched();
122 __get_cpu_var(irq_stat).irq_tlb_count++; 121 inc_irq_stat(irq_tlb_count);
123} 122}
124 123
125void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, 124void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
@@ -238,7 +237,7 @@ static void do_flush_tlb_all(void *info)
238 unsigned long cpu = smp_processor_id(); 237 unsigned long cpu = smp_processor_id();
239 238
240 __flush_tlb_all(); 239 __flush_tlb_all();
241 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) 240 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY)
242 leave_mm(cpu); 241 leave_mm(cpu);
243} 242}
244 243
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 8f919ca69494..29887d7081a9 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -154,7 +154,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
154out: 154out:
155 ack_APIC_irq(); 155 ack_APIC_irq();
156 cpu_clear(cpu, f->flush_cpumask); 156 cpu_clear(cpu, f->flush_cpumask);
157 add_pda(irq_tlb_count, 1); 157 inc_irq_stat(irq_tlb_count);
158} 158}
159 159
160void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, 160void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 04431f34fd16..6a00e5faaa74 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -566,14 +566,10 @@ static int __init uv_ptc_init(void)
566 if (!is_uv_system()) 566 if (!is_uv_system())
567 return 0; 567 return 0;
568 568
569 if (!proc_mkdir("sgi_uv", NULL))
570 return -EINVAL;
571
572 proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); 569 proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL);
573 if (!proc_uv_ptc) { 570 if (!proc_uv_ptc) {
574 printk(KERN_ERR "unable to create %s proc entry\n", 571 printk(KERN_ERR "unable to create %s proc entry\n",
575 UV_PTC_BASENAME); 572 UV_PTC_BASENAME);
576 remove_proc_entry("sgi_uv", NULL);
577 return -EINVAL; 573 return -EINVAL;
578 } 574 }
579 proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; 575 proc_uv_ptc->proc_fops = &proc_uv_ptc_operations;
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index 1106fac6024d..808031a5ba19 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -1,10 +1,26 @@
1#include <linux/io.h> 1#include <linux/io.h>
2 2
3#include <asm/trampoline.h> 3#include <asm/trampoline.h>
4#include <asm/e820.h>
4 5
5/* ready for x86_64 and x86 */ 6/* ready for x86_64 and x86 */
6unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); 7unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
7 8
9void __init reserve_trampoline_memory(void)
10{
11#ifdef CONFIG_X86_32
12 /*
13 * But first pinch a few for the stack/trampoline stuff
14 * FIXME: Don't need the extra page at 4K, but need to fix
15 * trampoline before removing it. (see the GDT stuff)
16 */
17 reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE");
18#endif
19 /* Has to be in very low memory so we can execute real-mode AP code. */
20 reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE,
21 "TRAMPOLINE");
22}
23
8/* 24/*
9 * Currently trivial. Write the real->protected mode 25 * Currently trivial. Write the real->protected mode
10 * bootstrap into the page concerned. The caller 26 * bootstrap into the page concerned. The caller
@@ -12,7 +28,6 @@ unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
12 */ 28 */
13unsigned long setup_trampoline(void) 29unsigned long setup_trampoline(void)
14{ 30{
15 memcpy(trampoline_base, trampoline_data, 31 memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
16 trampoline_end - trampoline_data);
17 return virt_to_phys(trampoline_base); 32 return virt_to_phys(trampoline_base);
18} 33}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 04d242ab0161..141907ab6e22 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -481,11 +481,7 @@ do_nmi(struct pt_regs *regs, long error_code)
481{ 481{
482 nmi_enter(); 482 nmi_enter();
483 483
484#ifdef CONFIG_X86_32 484 inc_irq_stat(__nmi_count);
485 { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); }
486#else
487 add_pda(__nmi_count, 1);
488#endif
489 485
490 if (!ignore_nmis) 486 if (!ignore_nmis)
491 default_do_nmi(regs); 487 default_do_nmi(regs);
@@ -664,7 +660,7 @@ void math_error(void __user *ip)
664{ 660{
665 struct task_struct *task; 661 struct task_struct *task;
666 siginfo_t info; 662 siginfo_t info;
667 unsigned short cwd, swd; 663 unsigned short cwd, swd, err;
668 664
669 /* 665 /*
670 * Save the info for the exception handler and clear the error. 666 * Save the info for the exception handler and clear the error.
@@ -675,7 +671,6 @@ void math_error(void __user *ip)
675 task->thread.error_code = 0; 671 task->thread.error_code = 0;
676 info.si_signo = SIGFPE; 672 info.si_signo = SIGFPE;
677 info.si_errno = 0; 673 info.si_errno = 0;
678 info.si_code = __SI_FAULT;
679 info.si_addr = ip; 674 info.si_addr = ip;
680 /* 675 /*
681 * (~cwd & swd) will mask out exceptions that are not set to unmasked 676 * (~cwd & swd) will mask out exceptions that are not set to unmasked
@@ -689,34 +684,31 @@ void math_error(void __user *ip)
689 */ 684 */
690 cwd = get_fpu_cwd(task); 685 cwd = get_fpu_cwd(task);
691 swd = get_fpu_swd(task); 686 swd = get_fpu_swd(task);
692 switch (swd & ~cwd & 0x3f) { 687
693 case 0x000: /* No unmasked exception */ 688 err = swd & ~cwd & 0x3f;
689
694#ifdef CONFIG_X86_32 690#ifdef CONFIG_X86_32
691 if (!err)
695 return; 692 return;
696#endif 693#endif
697 default: /* Multiple exceptions */ 694
698 break; 695 if (err & 0x001) { /* Invalid op */
699 case 0x001: /* Invalid Op */
700 /* 696 /*
701 * swd & 0x240 == 0x040: Stack Underflow 697 * swd & 0x240 == 0x040: Stack Underflow
702 * swd & 0x240 == 0x240: Stack Overflow 698 * swd & 0x240 == 0x240: Stack Overflow
703 * User must clear the SF bit (0x40) if set 699 * User must clear the SF bit (0x40) if set
704 */ 700 */
705 info.si_code = FPE_FLTINV; 701 info.si_code = FPE_FLTINV;
706 break; 702 } else if (err & 0x004) { /* Divide by Zero */
707 case 0x002: /* Denormalize */
708 case 0x010: /* Underflow */
709 info.si_code = FPE_FLTUND;
710 break;
711 case 0x004: /* Zero Divide */
712 info.si_code = FPE_FLTDIV; 703 info.si_code = FPE_FLTDIV;
713 break; 704 } else if (err & 0x008) { /* Overflow */
714 case 0x008: /* Overflow */
715 info.si_code = FPE_FLTOVF; 705 info.si_code = FPE_FLTOVF;
716 break; 706 } else if (err & 0x012) { /* Denormal, Underflow */
717 case 0x020: /* Precision */ 707 info.si_code = FPE_FLTUND;
708 } else if (err & 0x020) { /* Precision */
718 info.si_code = FPE_FLTRES; 709 info.si_code = FPE_FLTRES;
719 break; 710 } else {
711 info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */
720 } 712 }
721 force_sig_info(SIGFPE, &info, task); 713 force_sig_info(SIGFPE, &info, task);
722} 714}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 424093b157d3..599e58168631 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -15,6 +15,7 @@
15#include <asm/vgtod.h> 15#include <asm/vgtod.h>
16#include <asm/time.h> 16#include <asm/time.h>
17#include <asm/delay.h> 17#include <asm/delay.h>
18#include <asm/hypervisor.h>
18 19
19unsigned int cpu_khz; /* TSC clocks / usec, not used here */ 20unsigned int cpu_khz; /* TSC clocks / usec, not used here */
20EXPORT_SYMBOL(cpu_khz); 21EXPORT_SYMBOL(cpu_khz);
@@ -31,6 +32,7 @@ static int tsc_unstable;
31 erroneous rdtsc usage on !cpu_has_tsc processors */ 32 erroneous rdtsc usage on !cpu_has_tsc processors */
32static int tsc_disabled = -1; 33static int tsc_disabled = -1;
33 34
35static int tsc_clocksource_reliable;
34/* 36/*
35 * Scheduler clock - returns current time in nanosec units. 37 * Scheduler clock - returns current time in nanosec units.
36 */ 38 */
@@ -98,6 +100,15 @@ int __init notsc_setup(char *str)
98 100
99__setup("notsc", notsc_setup); 101__setup("notsc", notsc_setup);
100 102
103static int __init tsc_setup(char *str)
104{
105 if (!strcmp(str, "reliable"))
106 tsc_clocksource_reliable = 1;
107 return 1;
108}
109
110__setup("tsc=", tsc_setup);
111
101#define MAX_RETRIES 5 112#define MAX_RETRIES 5
102#define SMI_TRESHOLD 50000 113#define SMI_TRESHOLD 50000
103 114
@@ -352,9 +363,15 @@ unsigned long native_calibrate_tsc(void)
352{ 363{
353 u64 tsc1, tsc2, delta, ref1, ref2; 364 u64 tsc1, tsc2, delta, ref1, ref2;
354 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; 365 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
355 unsigned long flags, latch, ms, fast_calibrate; 366 unsigned long flags, latch, ms, fast_calibrate, tsc_khz;
356 int hpet = is_hpet_enabled(), i, loopmin; 367 int hpet = is_hpet_enabled(), i, loopmin;
357 368
369 tsc_khz = get_hypervisor_tsc_freq();
370 if (tsc_khz) {
371 printk(KERN_INFO "TSC: Frequency read from the hypervisor\n");
372 return tsc_khz;
373 }
374
358 local_irq_save(flags); 375 local_irq_save(flags);
359 fast_calibrate = quick_pit_calibrate(); 376 fast_calibrate = quick_pit_calibrate();
360 local_irq_restore(flags); 377 local_irq_restore(flags);
@@ -731,24 +748,21 @@ static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
731 {} 748 {}
732}; 749};
733 750
734/* 751static void __init check_system_tsc_reliable(void)
735 * Geode_LX - the OLPC CPU has a possibly a very reliable TSC 752{
736 */
737#ifdef CONFIG_MGEODE_LX 753#ifdef CONFIG_MGEODE_LX
738/* RTSC counts during suspend */ 754 /* RTSC counts during suspend */
739#define RTSC_SUSP 0x100 755#define RTSC_SUSP 0x100
740
741static void __init check_geode_tsc_reliable(void)
742{
743 unsigned long res_low, res_high; 756 unsigned long res_low, res_high;
744 757
745 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); 758 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
759 /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */
746 if (res_low & RTSC_SUSP) 760 if (res_low & RTSC_SUSP)
747 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 761 tsc_clocksource_reliable = 1;
748}
749#else
750static inline void check_geode_tsc_reliable(void) { }
751#endif 762#endif
763 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
764 tsc_clocksource_reliable = 1;
765}
752 766
753/* 767/*
754 * Make an educated guess if the TSC is trustworthy and synchronized 768 * Make an educated guess if the TSC is trustworthy and synchronized
@@ -783,6 +797,8 @@ static void __init init_tsc_clocksource(void)
783{ 797{
784 clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, 798 clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
785 clocksource_tsc.shift); 799 clocksource_tsc.shift);
800 if (tsc_clocksource_reliable)
801 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
786 /* lower the rating if we already know its unstable: */ 802 /* lower the rating if we already know its unstable: */
787 if (check_tsc_unstable()) { 803 if (check_tsc_unstable()) {
788 clocksource_tsc.rating = 0; 804 clocksource_tsc.rating = 0;
@@ -843,7 +859,7 @@ void __init tsc_init(void)
843 if (unsynchronized_tsc()) 859 if (unsynchronized_tsc())
844 mark_tsc_unstable("TSCs unsynchronized"); 860 mark_tsc_unstable("TSCs unsynchronized");
845 861
846 check_geode_tsc_reliable(); 862 check_system_tsc_reliable();
847 init_tsc_clocksource(); 863 init_tsc_clocksource();
848} 864}
849 865
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 1c0dfbca87c1..bf36328f6ef9 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -112,6 +112,12 @@ void __cpuinit check_tsc_sync_source(int cpu)
112 if (unsynchronized_tsc()) 112 if (unsynchronized_tsc())
113 return; 113 return;
114 114
115 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
116 printk(KERN_INFO
117 "Skipping synchronization checks as TSC is reliable.\n");
118 return;
119 }
120
115 printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", 121 printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:",
116 smp_processor_id(), cpu); 122 smp_processor_id(), cpu);
117 123
@@ -165,7 +171,7 @@ void __cpuinit check_tsc_sync_target(void)
165{ 171{
166 int cpus = 2; 172 int cpus = 2;
167 173
168 if (unsynchronized_tsc()) 174 if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
169 return; 175 return;
170 176
171 /* 177 /*
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 22fd6577156a..23206ba16874 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -266,109 +266,6 @@ static void vmi_nop(void)
266{ 266{
267} 267}
268 268
269#ifdef CONFIG_DEBUG_PAGE_TYPE
270
271#ifdef CONFIG_X86_PAE
272#define MAX_BOOT_PTS (2048+4+1)
273#else
274#define MAX_BOOT_PTS (1024+1)
275#endif
276
277/*
278 * During boot, mem_map is not yet available in paging_init, so stash
279 * all the boot page allocations here.
280 */
281static struct {
282 u32 pfn;
283 int type;
284} boot_page_allocations[MAX_BOOT_PTS];
285static int num_boot_page_allocations;
286static int boot_allocations_applied;
287
288void vmi_apply_boot_page_allocations(void)
289{
290 int i;
291 BUG_ON(!mem_map);
292 for (i = 0; i < num_boot_page_allocations; i++) {
293 struct page *page = pfn_to_page(boot_page_allocations[i].pfn);
294 page->type = boot_page_allocations[i].type;
295 page->type = boot_page_allocations[i].type &
296 ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
297 }
298 boot_allocations_applied = 1;
299}
300
301static void record_page_type(u32 pfn, int type)
302{
303 BUG_ON(num_boot_page_allocations >= MAX_BOOT_PTS);
304 boot_page_allocations[num_boot_page_allocations].pfn = pfn;
305 boot_page_allocations[num_boot_page_allocations].type = type;
306 num_boot_page_allocations++;
307}
308
309static void check_zeroed_page(u32 pfn, int type, struct page *page)
310{
311 u32 *ptr;
312 int i;
313 int limit = PAGE_SIZE / sizeof(int);
314
315 if (page_address(page))
316 ptr = (u32 *)page_address(page);
317 else
318 ptr = (u32 *)__va(pfn << PAGE_SHIFT);
319 /*
320 * When cloning the root in non-PAE mode, only the userspace
321 * pdes need to be zeroed.
322 */
323 if (type & VMI_PAGE_CLONE)
324 limit = KERNEL_PGD_BOUNDARY;
325 for (i = 0; i < limit; i++)
326 BUG_ON(ptr[i]);
327}
328
329/*
330 * We stash the page type into struct page so we can verify the page
331 * types are used properly.
332 */
333static void vmi_set_page_type(u32 pfn, int type)
334{
335 /* PAE can have multiple roots per page - don't track */
336 if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP))
337 return;
338
339 if (boot_allocations_applied) {
340 struct page *page = pfn_to_page(pfn);
341 if (type != VMI_PAGE_NORMAL)
342 BUG_ON(page->type);
343 else
344 BUG_ON(page->type == VMI_PAGE_NORMAL);
345 page->type = type & ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
346 if (type & VMI_PAGE_ZEROED)
347 check_zeroed_page(pfn, type, page);
348 } else {
349 record_page_type(pfn, type);
350 }
351}
352
353static void vmi_check_page_type(u32 pfn, int type)
354{
355 /* PAE can have multiple roots per page - skip checks */
356 if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP))
357 return;
358
359 type &= ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
360 if (boot_allocations_applied) {
361 struct page *page = pfn_to_page(pfn);
362 BUG_ON((page->type ^ type) & VMI_PAGE_PAE);
363 BUG_ON(type == VMI_PAGE_NORMAL && page->type);
364 BUG_ON((type & page->type) == 0);
365 }
366}
367#else
368#define vmi_set_page_type(p,t) do { } while (0)
369#define vmi_check_page_type(p,t) do { } while (0)
370#endif
371
372#ifdef CONFIG_HIGHPTE 269#ifdef CONFIG_HIGHPTE
373static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) 270static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
374{ 271{
@@ -395,7 +292,6 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
395 292
396static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) 293static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn)
397{ 294{
398 vmi_set_page_type(pfn, VMI_PAGE_L1);
399 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); 295 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
400} 296}
401 297
@@ -406,27 +302,22 @@ static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn)
406 * It is called only for swapper_pg_dir, which already has 302 * It is called only for swapper_pg_dir, which already has
407 * data on it. 303 * data on it.
408 */ 304 */
409 vmi_set_page_type(pfn, VMI_PAGE_L2);
410 vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); 305 vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
411} 306}
412 307
413static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) 308static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count)
414{ 309{
415 vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
416 vmi_check_page_type(clonepfn, VMI_PAGE_L2);
417 vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); 310 vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
418} 311}
419 312
420static void vmi_release_pte(unsigned long pfn) 313static void vmi_release_pte(unsigned long pfn)
421{ 314{
422 vmi_ops.release_page(pfn, VMI_PAGE_L1); 315 vmi_ops.release_page(pfn, VMI_PAGE_L1);
423 vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
424} 316}
425 317
426static void vmi_release_pmd(unsigned long pfn) 318static void vmi_release_pmd(unsigned long pfn)
427{ 319{
428 vmi_ops.release_page(pfn, VMI_PAGE_L2); 320 vmi_ops.release_page(pfn, VMI_PAGE_L2);
429 vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
430} 321}
431 322
432/* 323/*
@@ -450,26 +341,22 @@ static void vmi_release_pmd(unsigned long pfn)
450 341
451static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 342static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
452{ 343{
453 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
454 vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); 344 vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
455} 345}
456 346
457static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 347static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
458{ 348{
459 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
460 vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); 349 vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0));
461} 350}
462 351
463static void vmi_set_pte(pte_t *ptep, pte_t pte) 352static void vmi_set_pte(pte_t *ptep, pte_t pte)
464{ 353{
465 /* XXX because of set_pmd_pte, this can be called on PT or PD layers */ 354 /* XXX because of set_pmd_pte, this can be called on PT or PD layers */
466 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE | VMI_PAGE_PD);
467 vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); 355 vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT);
468} 356}
469 357
470static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) 358static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
471{ 359{
472 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
473 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); 360 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
474} 361}
475 362
@@ -477,10 +364,8 @@ static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval)
477{ 364{
478#ifdef CONFIG_X86_PAE 365#ifdef CONFIG_X86_PAE
479 const pte_t pte = { .pte = pmdval.pmd }; 366 const pte_t pte = { .pte = pmdval.pmd };
480 vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD);
481#else 367#else
482 const pte_t pte = { pmdval.pud.pgd.pgd }; 368 const pte_t pte = { pmdval.pud.pgd.pgd };
483 vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PGD);
484#endif 369#endif
485 vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); 370 vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD);
486} 371}
@@ -502,7 +387,6 @@ static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval)
502 387
503static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) 388static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
504{ 389{
505 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
506 vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1)); 390 vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1));
507} 391}
508 392
@@ -510,21 +394,18 @@ static void vmi_set_pud(pud_t *pudp, pud_t pudval)
510{ 394{
511 /* Um, eww */ 395 /* Um, eww */
512 const pte_t pte = { .pte = pudval.pgd.pgd }; 396 const pte_t pte = { .pte = pudval.pgd.pgd };
513 vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD);
514 vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); 397 vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP);
515} 398}
516 399
517static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 400static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
518{ 401{
519 const pte_t pte = { .pte = 0 }; 402 const pte_t pte = { .pte = 0 };
520 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
521 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); 403 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
522} 404}
523 405
524static void vmi_pmd_clear(pmd_t *pmd) 406static void vmi_pmd_clear(pmd_t *pmd)
525{ 407{
526 const pte_t pte = { .pte = 0 }; 408 const pte_t pte = { .pte = 0 };
527 vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD);
528 vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); 409 vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD);
529} 410}
530#endif 411#endif
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 0b8b6690a86d..ebf2f12900f5 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -128,7 +128,16 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
128 gettimeofday(tv,NULL); 128 gettimeofday(tv,NULL);
129 return; 129 return;
130 } 130 }
131
132 /*
133 * Surround the RDTSC by barriers, to make sure it's not
134 * speculated to outside the seqlock critical section and
135 * does not cause time warps:
136 */
137 rdtsc_barrier();
131 now = vread(); 138 now = vread();
139 rdtsc_barrier();
140
132 base = __vsyscall_gtod_data.clock.cycle_last; 141 base = __vsyscall_gtod_data.clock.cycle_last;
133 mask = __vsyscall_gtod_data.clock.mask; 142 mask = __vsyscall_gtod_data.clock.mask;
134 mult = __vsyscall_gtod_data.clock.mult; 143 mult = __vsyscall_gtod_data.clock.mult;
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a5d8e1ace1cf..50a779264bb1 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -590,7 +590,8 @@ static void __init lguest_init_IRQ(void)
590 * a straightforward 1 to 1 mapping, so force that here. */ 590 * a straightforward 1 to 1 mapping, so force that here. */
591 __get_cpu_var(vector_irq)[vector] = i; 591 __get_cpu_var(vector_irq)[vector] = i;
592 if (vector != SYSCALL_VECTOR) { 592 if (vector != SYSCALL_VECTOR) {
593 set_intr_gate(vector, interrupt[vector]); 593 set_intr_gate(vector,
594 interrupt[vector-FIRST_EXTERNAL_VECTOR]);
594 set_irq_chip_and_handler_name(i, &lguest_irq_controller, 595 set_irq_chip_and_handler_name(i, &lguest_irq_controller,
595 handle_level_irq, 596 handle_level_irq,
596 "level"); 597 "level");
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 3c3b471ea496..3624a364b7f3 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -17,6 +17,7 @@
17#include <asm/bigsmp/apic.h> 17#include <asm/bigsmp/apic.h>
18#include <asm/bigsmp/ipi.h> 18#include <asm/bigsmp/ipi.h>
19#include <asm/mach-default/mach_mpparse.h> 19#include <asm/mach-default/mach_mpparse.h>
20#include <asm/mach-default/mach_wakecpu.h>
20 21
21static int dmi_bigsmp; /* can be set by dmi scanners */ 22static int dmi_bigsmp; /* can be set by dmi scanners */
22 23
diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c
index 9e835a11a13a..e63a4a76d8cd 100644
--- a/arch/x86/mach-generic/default.c
+++ b/arch/x86/mach-generic/default.c
@@ -16,6 +16,7 @@
16#include <asm/mach-default/mach_apic.h> 16#include <asm/mach-default/mach_apic.h>
17#include <asm/mach-default/mach_ipi.h> 17#include <asm/mach-default/mach_ipi.h>
18#include <asm/mach-default/mach_mpparse.h> 18#include <asm/mach-default/mach_mpparse.h>
19#include <asm/mach-default/mach_wakecpu.h>
19 20
20/* should be called last. */ 21/* should be called last. */
21static int probe_default(void) 22static int probe_default(void)
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 28459cab3ddb..7b4e6d0d1690 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -16,7 +16,19 @@
16#include <asm/es7000/apic.h> 16#include <asm/es7000/apic.h>
17#include <asm/es7000/ipi.h> 17#include <asm/es7000/ipi.h>
18#include <asm/es7000/mpparse.h> 18#include <asm/es7000/mpparse.h>
19#include <asm/es7000/wakecpu.h> 19#include <asm/mach-default/mach_wakecpu.h>
20
21void __init es7000_update_genapic_to_cluster(void)
22{
23 genapic->target_cpus = target_cpus_cluster;
24 genapic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER;
25 genapic->int_dest_mode = INT_DEST_MODE_CLUSTER;
26 genapic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER;
27
28 genapic->init_apic_ldr = init_apic_ldr_cluster;
29
30 genapic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster;
31}
20 32
21static int probe_es7000(void) 33static int probe_es7000(void)
22{ 34{
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c
index 5a7e4619e1c4..c346d9d0226f 100644
--- a/arch/x86/mach-generic/probe.c
+++ b/arch/x86/mach-generic/probe.c
@@ -15,6 +15,7 @@
15#include <asm/mpspec.h> 15#include <asm/mpspec.h>
16#include <asm/apicdef.h> 16#include <asm/apicdef.h>
17#include <asm/genapic.h> 17#include <asm/genapic.h>
18#include <asm/setup.h>
18 19
19extern struct genapic apic_numaq; 20extern struct genapic apic_numaq;
20extern struct genapic apic_summit; 21extern struct genapic apic_summit;
@@ -57,6 +58,9 @@ static int __init parse_apic(char *arg)
57 } 58 }
58 } 59 }
59 60
61 if (x86_quirks->update_genapic)
62 x86_quirks->update_genapic();
63
60 /* Parsed again by __setup for debug/verbose */ 64 /* Parsed again by __setup for debug/verbose */
61 return 0; 65 return 0;
62} 66}
@@ -72,12 +76,15 @@ void __init generic_bigsmp_probe(void)
72 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support 76 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
73 */ 77 */
74 78
75 if (!cmdline_apic && genapic == &apic_default) 79 if (!cmdline_apic && genapic == &apic_default) {
76 if (apic_bigsmp.probe()) { 80 if (apic_bigsmp.probe()) {
77 genapic = &apic_bigsmp; 81 genapic = &apic_bigsmp;
82 if (x86_quirks->update_genapic)
83 x86_quirks->update_genapic();
78 printk(KERN_INFO "Overriding APIC driver with %s\n", 84 printk(KERN_INFO "Overriding APIC driver with %s\n",
79 genapic->name); 85 genapic->name);
80 } 86 }
87 }
81#endif 88#endif
82} 89}
83 90
@@ -94,6 +101,9 @@ void __init generic_apic_probe(void)
94 /* Not visible without early console */ 101 /* Not visible without early console */
95 if (!apic_probe[i]) 102 if (!apic_probe[i])
96 panic("Didn't find an APIC driver"); 103 panic("Didn't find an APIC driver");
104
105 if (x86_quirks->update_genapic)
106 x86_quirks->update_genapic();
97 } 107 }
98 printk(KERN_INFO "Using APIC driver %s\n", genapic->name); 108 printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
99} 109}
@@ -108,6 +118,8 @@ int __init mps_oem_check(struct mp_config_table *mpc, char *oem,
108 if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) { 118 if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) {
109 if (!cmdline_apic) { 119 if (!cmdline_apic) {
110 genapic = apic_probe[i]; 120 genapic = apic_probe[i];
121 if (x86_quirks->update_genapic)
122 x86_quirks->update_genapic();
111 printk(KERN_INFO "Switched to APIC driver `%s'.\n", 123 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
112 genapic->name); 124 genapic->name);
113 } 125 }
@@ -124,6 +136,8 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
124 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { 136 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
125 if (!cmdline_apic) { 137 if (!cmdline_apic) {
126 genapic = apic_probe[i]; 138 genapic = apic_probe[i];
139 if (x86_quirks->update_genapic)
140 x86_quirks->update_genapic();
127 printk(KERN_INFO "Switched to APIC driver `%s'.\n", 141 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
128 genapic->name); 142 genapic->name);
129 } 143 }
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 6272b5e69da6..2c6d234e0009 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -16,6 +16,7 @@
16#include <asm/summit/apic.h> 16#include <asm/summit/apic.h>
17#include <asm/summit/ipi.h> 17#include <asm/summit/ipi.h>
18#include <asm/summit/mpparse.h> 18#include <asm/summit/mpparse.h>
19#include <asm/mach-default/mach_wakecpu.h>
19 20
20static int probe_summit(void) 21static int probe_summit(void)
21{ 22{
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 3a1b6ef4f05d..46b5f753ff81 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -413,6 +413,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
413 unsigned long error_code) 413 unsigned long error_code)
414{ 414{
415 unsigned long flags = oops_begin(); 415 unsigned long flags = oops_begin();
416 int sig = SIGKILL;
416 struct task_struct *tsk; 417 struct task_struct *tsk;
417 418
418 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", 419 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
@@ -423,8 +424,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
423 tsk->thread.trap_no = 14; 424 tsk->thread.trap_no = 14;
424 tsk->thread.error_code = error_code; 425 tsk->thread.error_code = error_code;
425 if (__die("Bad pagetable", regs, error_code)) 426 if (__die("Bad pagetable", regs, error_code))
426 regs = NULL; 427 sig = 0;
427 oops_end(flags, regs, SIGKILL); 428 oops_end(flags, regs, sig);
428} 429}
429#endif 430#endif
430 431
@@ -590,6 +591,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
590 int fault; 591 int fault;
591#ifdef CONFIG_X86_64 592#ifdef CONFIG_X86_64
592 unsigned long flags; 593 unsigned long flags;
594 int sig;
593#endif 595#endif
594 596
595 tsk = current; 597 tsk = current;
@@ -849,11 +851,12 @@ no_context:
849 bust_spinlocks(0); 851 bust_spinlocks(0);
850 do_exit(SIGKILL); 852 do_exit(SIGKILL);
851#else 853#else
854 sig = SIGKILL;
852 if (__die("Oops", regs, error_code)) 855 if (__die("Oops", regs, error_code))
853 regs = NULL; 856 sig = 0;
854 /* Executive summary in case the body of the oops scrolled away */ 857 /* Executive summary in case the body of the oops scrolled away */
855 printk(KERN_EMERG "CR2: %016lx\n", address); 858 printk(KERN_EMERG "CR2: %016lx\n", address);
856 oops_end(flags, regs, SIGKILL); 859 oops_end(flags, regs, sig);
857#endif 860#endif
858 861
859/* 862/*
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c483f4242079..800e1d94c1b5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -67,7 +67,7 @@ static unsigned long __meminitdata table_top;
67 67
68static int __initdata after_init_bootmem; 68static int __initdata after_init_bootmem;
69 69
70static __init void *alloc_low_page(unsigned long *phys) 70static __init void *alloc_low_page(void)
71{ 71{
72 unsigned long pfn = table_end++; 72 unsigned long pfn = table_end++;
73 void *adr; 73 void *adr;
@@ -77,7 +77,6 @@ static __init void *alloc_low_page(unsigned long *phys)
77 77
78 adr = __va(pfn * PAGE_SIZE); 78 adr = __va(pfn * PAGE_SIZE);
79 memset(adr, 0, PAGE_SIZE); 79 memset(adr, 0, PAGE_SIZE);
80 *phys = pfn * PAGE_SIZE;
81 return adr; 80 return adr;
82} 81}
83 82
@@ -92,16 +91,17 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
92 pmd_t *pmd_table; 91 pmd_t *pmd_table;
93 92
94#ifdef CONFIG_X86_PAE 93#ifdef CONFIG_X86_PAE
95 unsigned long phys;
96 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 94 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
97 if (after_init_bootmem) 95 if (after_init_bootmem)
98 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); 96 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
99 else 97 else
100 pmd_table = (pmd_t *)alloc_low_page(&phys); 98 pmd_table = (pmd_t *)alloc_low_page();
101 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); 99 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
102 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 100 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
103 pud = pud_offset(pgd, 0); 101 pud = pud_offset(pgd, 0);
104 BUG_ON(pmd_table != pmd_offset(pud, 0)); 102 BUG_ON(pmd_table != pmd_offset(pud, 0));
103
104 return pmd_table;
105 } 105 }
106#endif 106#endif
107 pud = pud_offset(pgd, 0); 107 pud = pud_offset(pgd, 0);
@@ -126,10 +126,8 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
126 if (!page_table) 126 if (!page_table)
127 page_table = 127 page_table =
128 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); 128 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
129 } else { 129 } else
130 unsigned long phys; 130 page_table = (pte_t *)alloc_low_page();
131 page_table = (pte_t *)alloc_low_page(&phys);
132 }
133 131
134 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); 132 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
135 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 133 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
@@ -969,8 +967,6 @@ void __init mem_init(void)
969 int codesize, reservedpages, datasize, initsize; 967 int codesize, reservedpages, datasize, initsize;
970 int tmp; 968 int tmp;
971 969
972 start_periodic_check_for_corruption();
973
974#ifdef CONFIG_FLATMEM 970#ifdef CONFIG_FLATMEM
975 BUG_ON(!mem_map); 971 BUG_ON(!mem_map);
976#endif 972#endif
@@ -1040,11 +1036,25 @@ void __init mem_init(void)
1040 (unsigned long)&_text, (unsigned long)&_etext, 1036 (unsigned long)&_text, (unsigned long)&_etext,
1041 ((unsigned long)&_etext - (unsigned long)&_text) >> 10); 1037 ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
1042 1038
1039 /*
1040 * Check boundaries twice: Some fundamental inconsistencies can
1041 * be detected at build time already.
1042 */
1043#define __FIXADDR_TOP (-PAGE_SIZE)
1044#ifdef CONFIG_HIGHMEM
1045 BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
1046 BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE);
1047#endif
1048#define high_memory (-128UL << 20)
1049 BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
1050#undef high_memory
1051#undef __FIXADDR_TOP
1052
1043#ifdef CONFIG_HIGHMEM 1053#ifdef CONFIG_HIGHMEM
1044 BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); 1054 BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
1045 BUG_ON(VMALLOC_END > PKMAP_BASE); 1055 BUG_ON(VMALLOC_END > PKMAP_BASE);
1046#endif 1056#endif
1047 BUG_ON(VMALLOC_START > VMALLOC_END); 1057 BUG_ON(VMALLOC_START >= VMALLOC_END);
1048 BUG_ON((unsigned long)high_memory > VMALLOC_START); 1058 BUG_ON((unsigned long)high_memory > VMALLOC_START);
1049 1059
1050 if (boot_cpu_data.wp_works_ok < 0) 1060 if (boot_cpu_data.wp_works_ok < 0)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9db01db6e3cd..9f7a0d24d42a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -902,8 +902,6 @@ void __init mem_init(void)
902 long codesize, reservedpages, datasize, initsize; 902 long codesize, reservedpages, datasize, initsize;
903 unsigned long absent_pages; 903 unsigned long absent_pages;
904 904
905 start_periodic_check_for_corruption();
906
907 pci_iommu_alloc(); 905 pci_iommu_alloc();
908 906
909 /* clear_bss() already clear the empty_zero_page */ 907 /* clear_bss() already clear the empty_zero_page */
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d4c4307ff3e0..bd85d42819e1 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -223,7 +223,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
223 * Check if the request spans more than any BAR in the iomem resource 223 * Check if the request spans more than any BAR in the iomem resource
224 * tree. 224 * tree.
225 */ 225 */
226 WARN_ON(iomem_map_sanity_check(phys_addr, size)); 226 WARN_ONCE(iomem_map_sanity_check(phys_addr, size),
227 KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
227 228
228 /* 229 /*
229 * Don't allow anybody to remap normal RAM that we're using.. 230 * Don't allow anybody to remap normal RAM that we're using..
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index eb1bf000d12e..85cbd3cd3723 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -596,6 +596,242 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
596 free_memtype(addr, addr + size); 596 free_memtype(addr, addr + size);
597} 597}
598 598
599/*
600 * Internal interface to reserve a range of physical memory with prot.
601 * Reserved non RAM regions only and after successful reserve_memtype,
602 * this func also keeps identity mapping (if any) in sync with this new prot.
603 */
604static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot)
605{
606 int is_ram = 0;
607 int id_sz, ret;
608 unsigned long flags;
609 unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
610
611 is_ram = pagerange_is_ram(paddr, paddr + size);
612
613 if (is_ram != 0) {
614 /*
615 * For mapping RAM pages, drivers need to call
616 * set_memory_[uc|wc|wb] directly, for reserve and free, before
617 * setting up the PTE.
618 */
619 WARN_ON_ONCE(1);
620 return 0;
621 }
622
623 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
624 if (ret)
625 return ret;
626
627 if (flags != want_flags) {
628 free_memtype(paddr, paddr + size);
629 printk(KERN_ERR
630 "%s:%d map pfn expected mapping type %s for %Lx-%Lx, got %s\n",
631 current->comm, current->pid,
632 cattr_name(want_flags),
633 (unsigned long long)paddr,
634 (unsigned long long)(paddr + size),
635 cattr_name(flags));
636 return -EINVAL;
637 }
638
639 /* Need to keep identity mapping in sync */
640 if (paddr >= __pa(high_memory))
641 return 0;
642
643 id_sz = (__pa(high_memory) < paddr + size) ?
644 __pa(high_memory) - paddr :
645 size;
646
647 if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
648 free_memtype(paddr, paddr + size);
649 printk(KERN_ERR
650 "%s:%d reserve_pfn_range ioremap_change_attr failed %s "
651 "for %Lx-%Lx\n",
652 current->comm, current->pid,
653 cattr_name(flags),
654 (unsigned long long)paddr,
655 (unsigned long long)(paddr + size));
656 return -EINVAL;
657 }
658 return 0;
659}
660
661/*
662 * Internal interface to free a range of physical memory.
663 * Frees non RAM regions only.
664 */
665static void free_pfn_range(u64 paddr, unsigned long size)
666{
667 int is_ram;
668
669 is_ram = pagerange_is_ram(paddr, paddr + size);
670 if (is_ram == 0)
671 free_memtype(paddr, paddr + size);
672}
673
674/*
675 * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
676 * copied through copy_page_range().
677 *
678 * If the vma has a linear pfn mapping for the entire range, we get the prot
679 * from pte and reserve the entire vma range with single reserve_pfn_range call.
680 * Otherwise, we reserve the entire vma range, my ging through the PTEs page
681 * by page to get physical address and protection.
682 */
683int track_pfn_vma_copy(struct vm_area_struct *vma)
684{
685 int retval = 0;
686 unsigned long i, j;
687 resource_size_t paddr;
688 unsigned long prot;
689 unsigned long vma_start = vma->vm_start;
690 unsigned long vma_end = vma->vm_end;
691 unsigned long vma_size = vma_end - vma_start;
692
693 if (!pat_enabled)
694 return 0;
695
696 if (is_linear_pfn_mapping(vma)) {
697 /*
698 * reserve the whole chunk covered by vma. We need the
699 * starting address and protection from pte.
700 */
701 if (follow_phys(vma, vma_start, 0, &prot, &paddr)) {
702 WARN_ON_ONCE(1);
703 return -EINVAL;
704 }
705 return reserve_pfn_range(paddr, vma_size, __pgprot(prot));
706 }
707
708 /* reserve entire vma page by page, using pfn and prot from pte */
709 for (i = 0; i < vma_size; i += PAGE_SIZE) {
710 if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
711 continue;
712
713 retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot));
714 if (retval)
715 goto cleanup_ret;
716 }
717 return 0;
718
719cleanup_ret:
720 /* Reserve error: Cleanup partial reservation and return error */
721 for (j = 0; j < i; j += PAGE_SIZE) {
722 if (follow_phys(vma, vma_start + j, 0, &prot, &paddr))
723 continue;
724
725 free_pfn_range(paddr, PAGE_SIZE);
726 }
727
728 return retval;
729}
730
731/*
732 * track_pfn_vma_new is called when a _new_ pfn mapping is being established
733 * for physical range indicated by pfn and size.
734 *
735 * prot is passed in as a parameter for the new mapping. If the vma has a
736 * linear pfn mapping for the entire range reserve the entire vma range with
737 * single reserve_pfn_range call.
738 * Otherwise, we look t the pfn and size and reserve only the specified range
739 * page by page.
740 *
741 * Note that this function can be called with caller trying to map only a
742 * subrange/page inside the vma.
743 */
744int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
745 unsigned long pfn, unsigned long size)
746{
747 int retval = 0;
748 unsigned long i, j;
749 resource_size_t base_paddr;
750 resource_size_t paddr;
751 unsigned long vma_start = vma->vm_start;
752 unsigned long vma_end = vma->vm_end;
753 unsigned long vma_size = vma_end - vma_start;
754
755 if (!pat_enabled)
756 return 0;
757
758 if (is_linear_pfn_mapping(vma)) {
759 /* reserve the whole chunk starting from vm_pgoff */
760 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
761 return reserve_pfn_range(paddr, vma_size, prot);
762 }
763
764 /* reserve page by page using pfn and size */
765 base_paddr = (resource_size_t)pfn << PAGE_SHIFT;
766 for (i = 0; i < size; i += PAGE_SIZE) {
767 paddr = base_paddr + i;
768 retval = reserve_pfn_range(paddr, PAGE_SIZE, prot);
769 if (retval)
770 goto cleanup_ret;
771 }
772 return 0;
773
774cleanup_ret:
775 /* Reserve error: Cleanup partial reservation and return error */
776 for (j = 0; j < i; j += PAGE_SIZE) {
777 paddr = base_paddr + j;
778 free_pfn_range(paddr, PAGE_SIZE);
779 }
780
781 return retval;
782}
783
784/*
785 * untrack_pfn_vma is called while unmapping a pfnmap for a region.
786 * untrack can be called for a specific region indicated by pfn and size or
787 * can be for the entire vma (in which case size can be zero).
788 */
789void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
790 unsigned long size)
791{
792 unsigned long i;
793 resource_size_t paddr;
794 unsigned long prot;
795 unsigned long vma_start = vma->vm_start;
796 unsigned long vma_end = vma->vm_end;
797 unsigned long vma_size = vma_end - vma_start;
798
799 if (!pat_enabled)
800 return;
801
802 if (is_linear_pfn_mapping(vma)) {
803 /* free the whole chunk starting from vm_pgoff */
804 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
805 free_pfn_range(paddr, vma_size);
806 return;
807 }
808
809 if (size != 0 && size != vma_size) {
810 /* free page by page, using pfn and size */
811 paddr = (resource_size_t)pfn << PAGE_SHIFT;
812 for (i = 0; i < size; i += PAGE_SIZE) {
813 paddr = paddr + i;
814 free_pfn_range(paddr, PAGE_SIZE);
815 }
816 } else {
817 /* free entire vma, page by page, using the pfn from pte */
818 for (i = 0; i < vma_size; i += PAGE_SIZE) {
819 if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
820 continue;
821
822 free_pfn_range(paddr, PAGE_SIZE);
823 }
824 }
825}
826
827pgprot_t pgprot_writecombine(pgprot_t prot)
828{
829 if (pat_enabled)
830 return __pgprot(pgprot_val(prot) | _PAGE_CACHE_WC);
831 else
832 return pgprot_noncached(prot);
833}
834
599#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) 835#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
600 836
601/* get Nth element of the linked list */ 837/* get Nth element of the linked list */
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index b67732bbb85a..bb1a01f089e2 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -23,6 +23,12 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
23unsigned int pci_early_dump_regs; 23unsigned int pci_early_dump_regs;
24static int pci_bf_sort; 24static int pci_bf_sort;
25int pci_routeirq; 25int pci_routeirq;
26int noioapicquirk;
27#ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS
28int noioapicreroute = 0;
29#else
30int noioapicreroute = 1;
31#endif
26int pcibios_last_bus = -1; 32int pcibios_last_bus = -1;
27unsigned long pirq_table_addr; 33unsigned long pirq_table_addr;
28struct pci_bus *pci_root_bus; 34struct pci_bus *pci_root_bus;
@@ -519,6 +525,17 @@ char * __devinit pcibios_setup(char *str)
519 } else if (!strcmp(str, "skip_isa_align")) { 525 } else if (!strcmp(str, "skip_isa_align")) {
520 pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; 526 pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
521 return NULL; 527 return NULL;
528 } else if (!strcmp(str, "noioapicquirk")) {
529 noioapicquirk = 1;
530 return NULL;
531 } else if (!strcmp(str, "ioapicreroute")) {
532 if (noioapicreroute != -1)
533 noioapicreroute = 0;
534 return NULL;
535 } else if (!strcmp(str, "noioapicreroute")) {
536 if (noioapicreroute != -1)
537 noioapicreroute = 1;
538 return NULL;
522 } 539 }
523 return str; 540 return str;
524} 541}
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 9915293500fb..9a5af6c8fbe9 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -173,7 +173,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus,
173 173
174#undef PCI_CONF2_ADDRESS 174#undef PCI_CONF2_ADDRESS
175 175
176static struct pci_raw_ops pci_direct_conf2 = { 176struct pci_raw_ops pci_direct_conf2 = {
177 .read = pci_conf2_read, 177 .read = pci_conf2_read,
178 .write = pci_conf2_write, 178 .write = pci_conf2_write,
179}; 179};
@@ -289,6 +289,7 @@ int __init pci_direct_probe(void)
289 289
290 if (pci_check_type1()) { 290 if (pci_check_type1()) {
291 raw_pci_ops = &pci_direct_conf1; 291 raw_pci_ops = &pci_direct_conf1;
292 port_cf9_safe = true;
292 return 1; 293 return 1;
293 } 294 }
294 release_resource(region); 295 release_resource(region);
@@ -305,6 +306,7 @@ int __init pci_direct_probe(void)
305 306
306 if (pci_check_type2()) { 307 if (pci_check_type2()) {
307 raw_pci_ops = &pci_direct_conf2; 308 raw_pci_ops = &pci_direct_conf2;
309 port_cf9_safe = true;
308 return 2; 310 return 2;
309 } 311 }
310 312
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index 15b9cf6be729..1959018aac02 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -96,6 +96,7 @@ extern struct pci_raw_ops *raw_pci_ops;
96extern struct pci_raw_ops *raw_pci_ext_ops; 96extern struct pci_raw_ops *raw_pci_ext_ops;
97 97
98extern struct pci_raw_ops pci_direct_conf1; 98extern struct pci_raw_ops pci_direct_conf1;
99extern bool port_cf9_safe;
99 100
100/* arch_initcall level */ 101/* arch_initcall level */
101extern int pci_direct_probe(void); 102extern int pci_direct_probe(void);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5e4686d70f62..bea215230b20 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -28,6 +28,7 @@
28#include <linux/console.h> 28#include <linux/console.h>
29 29
30#include <xen/interface/xen.h> 30#include <xen/interface/xen.h>
31#include <xen/interface/version.h>
31#include <xen/interface/physdev.h> 32#include <xen/interface/physdev.h>
32#include <xen/interface/vcpu.h> 33#include <xen/interface/vcpu.h>
33#include <xen/features.h> 34#include <xen/features.h>
@@ -793,7 +794,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
793 794
794 ret = 0; 795 ret = 0;
795 796
796 switch(msr) { 797 switch (msr) {
797#ifdef CONFIG_X86_64 798#ifdef CONFIG_X86_64
798 unsigned which; 799 unsigned which;
799 u64 base; 800 u64 base;
@@ -1453,7 +1454,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1453 1454
1454 ident_pte = 0; 1455 ident_pte = 0;
1455 pfn = 0; 1456 pfn = 0;
1456 for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { 1457 for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
1457 pte_t *pte_page; 1458 pte_t *pte_page;
1458 1459
1459 /* Reuse or allocate a page of ptes */ 1460 /* Reuse or allocate a page of ptes */
@@ -1471,7 +1472,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1471 } 1472 }
1472 1473
1473 /* Install mappings */ 1474 /* Install mappings */
1474 for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { 1475 for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
1475 pte_t pte; 1476 pte_t pte;
1476 1477
1477 if (pfn > max_pfn_mapped) 1478 if (pfn > max_pfn_mapped)
@@ -1485,7 +1486,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1485 } 1486 }
1486 } 1487 }
1487 1488
1488 for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) 1489 for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
1489 set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); 1490 set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
1490 1491
1491 set_page_prot(pmd, PAGE_KERNEL_RO); 1492 set_page_prot(pmd, PAGE_KERNEL_RO);
@@ -1499,7 +1500,7 @@ static void convert_pfn_mfn(void *v)
1499 1500
1500 /* All levels are converted the same way, so just treat them 1501 /* All levels are converted the same way, so just treat them
1501 as ptes. */ 1502 as ptes. */
1502 for(i = 0; i < PTRS_PER_PTE; i++) 1503 for (i = 0; i < PTRS_PER_PTE; i++)
1503 pte[i] = xen_make_pte(pte[i].pte); 1504 pte[i] = xen_make_pte(pte[i].pte);
1504} 1505}
1505 1506
@@ -1514,7 +1515,8 @@ static void convert_pfn_mfn(void *v)
1514 * of the physical mapping once some sort of allocator has been set 1515 * of the physical mapping once some sort of allocator has been set
1515 * up. 1516 * up.
1516 */ 1517 */
1517static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) 1518static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
1519 unsigned long max_pfn)
1518{ 1520{
1519 pud_t *l3; 1521 pud_t *l3;
1520 pmd_t *l2; 1522 pmd_t *l2;
@@ -1577,7 +1579,8 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pf
1577#else /* !CONFIG_X86_64 */ 1579#else /* !CONFIG_X86_64 */
1578static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; 1580static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
1579 1581
1580static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) 1582static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
1583 unsigned long max_pfn)
1581{ 1584{
1582 pmd_t *kernel_pmd; 1585 pmd_t *kernel_pmd;
1583 1586
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 636ef4caa52d..773d68d3e912 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -154,13 +154,13 @@ void xen_setup_mfn_list_list(void)
154{ 154{
155 unsigned pfn, idx; 155 unsigned pfn, idx;
156 156
157 for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { 157 for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
158 unsigned topidx = p2m_top_index(pfn); 158 unsigned topidx = p2m_top_index(pfn);
159 159
160 p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); 160 p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
161 } 161 }
162 162
163 for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { 163 for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
164 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; 164 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
165 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); 165 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
166 } 166 }
@@ -179,7 +179,7 @@ void __init xen_build_dynamic_phys_to_machine(void)
179 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); 179 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
180 unsigned pfn; 180 unsigned pfn;
181 181
182 for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { 182 for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
183 unsigned topidx = p2m_top_index(pfn); 183 unsigned topidx = p2m_top_index(pfn);
184 184
185 p2m_top[topidx] = &mfn_list[pfn]; 185 p2m_top[topidx] = &mfn_list[pfn];
@@ -207,7 +207,7 @@ static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
207 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); 207 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
208 BUG_ON(p == NULL); 208 BUG_ON(p == NULL);
209 209
210 for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++) 210 for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
211 p[i] = INVALID_P2M_ENTRY; 211 p[i] = INVALID_P2M_ENTRY;
212 212
213 if (cmpxchg(pp, p2m_missing, p) != p2m_missing) 213 if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
@@ -407,7 +407,8 @@ out:
407 preempt_enable(); 407 preempt_enable();
408} 408}
409 409
410pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 410pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
411 unsigned long addr, pte_t *ptep)
411{ 412{
412 /* Just return the pte as-is. We preserve the bits on commit */ 413 /* Just return the pte as-is. We preserve the bits on commit */
413 return *ptep; 414 return *ptep;
@@ -878,7 +879,8 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
878 879
879 if (user_pgd) { 880 if (user_pgd) {
880 xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); 881 xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
881 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); 882 xen_do_pin(MMUEXT_PIN_L4_TABLE,
883 PFN_DOWN(__pa(user_pgd)));
882 } 884 }
883 } 885 }
884#else /* CONFIG_X86_32 */ 886#else /* CONFIG_X86_32 */
@@ -993,7 +995,8 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
993 pgd_t *user_pgd = xen_get_user_pgd(pgd); 995 pgd_t *user_pgd = xen_get_user_pgd(pgd);
994 996
995 if (user_pgd) { 997 if (user_pgd) {
996 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); 998 xen_do_pin(MMUEXT_UNPIN_TABLE,
999 PFN_DOWN(__pa(user_pgd)));
997 xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); 1000 xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
998 } 1001 }
999 } 1002 }
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 8ea8a0d0b0de..c738644b5435 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -154,7 +154,7 @@ void xen_mc_flush(void)
154 ret, smp_processor_id()); 154 ret, smp_processor_id());
155 dump_stack(); 155 dump_stack();
156 for (i = 0; i < b->mcidx; i++) { 156 for (i = 0; i < b->mcidx; i++) {
157 printk(" call %2d/%d: op=%lu arg=[%lx] result=%ld\n", 157 printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\n",
158 i+1, b->mcidx, 158 i+1, b->mcidx,
159 b->debug[i].op, 159 b->debug[i].op,
160 b->debug[i].args[0], 160 b->debug[i].args[0],
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index d67901083888..15c6c68db6a2 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -28,6 +28,9 @@
28/* These are code, but not functions. Defined in entry.S */ 28/* These are code, but not functions. Defined in entry.S */
29extern const char xen_hypervisor_callback[]; 29extern const char xen_hypervisor_callback[];
30extern const char xen_failsafe_callback[]; 30extern const char xen_failsafe_callback[];
31extern void xen_sysenter_target(void);
32extern void xen_syscall_target(void);
33extern void xen_syscall32_target(void);
31 34
32 35
33/** 36/**
@@ -110,7 +113,6 @@ static __cpuinit int register_callback(unsigned type, const void *func)
110 113
111void __cpuinit xen_enable_sysenter(void) 114void __cpuinit xen_enable_sysenter(void)
112{ 115{
113 extern void xen_sysenter_target(void);
114 int ret; 116 int ret;
115 unsigned sysenter_feature; 117 unsigned sysenter_feature;
116 118
@@ -132,8 +134,6 @@ void __cpuinit xen_enable_syscall(void)
132{ 134{
133#ifdef CONFIG_X86_64 135#ifdef CONFIG_X86_64
134 int ret; 136 int ret;
135 extern void xen_syscall_target(void);
136 extern void xen_syscall32_target(void);
137 137
138 ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); 138 ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
139 if (ret != 0) { 139 if (ret != 0) {
@@ -160,7 +160,8 @@ void __init xen_arch_setup(void)
160 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); 160 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
161 161
162 if (!xen_feature(XENFEAT_auto_translated_physmap)) 162 if (!xen_feature(XENFEAT_auto_translated_physmap))
163 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3); 163 HYPERVISOR_vm_assist(VMASST_CMD_enable,
164 VMASST_TYPE_pae_extended_cr3);
164 165
165 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || 166 if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
166 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) 167 register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 11acaee14d66..bf79d83bdfbb 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -384,6 +384,27 @@ acpi_pci_free_irq(struct acpi_prt_entry *entry,
384 return irq; 384 return irq;
385} 385}
386 386
387#ifdef CONFIG_X86_IO_APIC
388extern int noioapicquirk;
389
390static int bridge_has_boot_interrupt_variant(struct pci_bus *bus)
391{
392 struct pci_bus *bus_it;
393
394 for (bus_it = bus ; bus_it ; bus_it = bus_it->parent) {
395 if (!bus_it->self)
396 return 0;
397
398 printk(KERN_INFO "vendor=%04x device=%04x\n", bus_it->self->vendor,
399 bus_it->self->device);
400
401 if (bus_it->self->irq_reroute_variant)
402 return bus_it->self->irq_reroute_variant;
403 }
404 return 0;
405}
406#endif /* CONFIG_X86_IO_APIC */
407
387/* 408/*
388 * acpi_pci_irq_lookup 409 * acpi_pci_irq_lookup
389 * success: return IRQ >= 0 410 * success: return IRQ >= 0
@@ -413,6 +434,41 @@ acpi_pci_irq_lookup(struct pci_bus *bus,
413 } 434 }
414 435
415 ret = func(entry, triggering, polarity, link); 436 ret = func(entry, triggering, polarity, link);
437
438#ifdef CONFIG_X86_IO_APIC
439 /*
440 * Some chipsets (e.g. intel 6700PXH) generate a legacy INTx when the
441 * IRQ entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel
442 * does during interrupt handling). When this INTx generation cannot be
443 * disabled, we reroute these interrupts to their legacy equivalent to
444 * get rid of spurious interrupts.
445 */
446 if (!noioapicquirk) {
447 switch (bridge_has_boot_interrupt_variant(bus)) {
448 case 0:
449 /* no rerouting necessary */
450 break;
451
452 case INTEL_IRQ_REROUTE_VARIANT:
453 /*
454 * Remap according to INTx routing table in 6700PXH
455 * specs, intel order number 302628-002, section
456 * 2.15.2. Other chipsets (80332, ...) have the same
457 * mapping and are handled here as well.
458 */
459 printk(KERN_INFO "pci irq %d -> rerouted to legacy "
460 "irq %d\n", ret, (ret % 4) + 16);
461 ret = (ret % 4) + 16;
462 break;
463
464 default:
465 printk(KERN_INFO "not rerouting irq %d to legacy irq: "
466 "unknown mapping\n", ret);
467 break;
468 }
469 }
470#endif /* CONFIG_X86_IO_APIC */
471
416 return ret; 472 return ret;
417} 473}
418 474
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 5f8d746a9b81..38aca048e951 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -374,15 +374,15 @@ static int tsc_halts_in_c(int state)
374{ 374{
375 switch (boot_cpu_data.x86_vendor) { 375 switch (boot_cpu_data.x86_vendor) {
376 case X86_VENDOR_AMD: 376 case X86_VENDOR_AMD:
377 case X86_VENDOR_INTEL:
377 /* 378 /*
378 * AMD Fam10h TSC will tick in all 379 * AMD Fam10h TSC will tick in all
379 * C/P/S0/S1 states when this bit is set. 380 * C/P/S0/S1 states when this bit is set.
380 */ 381 */
381 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 382 if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
382 return 0; 383 return 0;
384
383 /*FALL THROUGH*/ 385 /*FALL THROUGH*/
384 case X86_VENDOR_INTEL:
385 /* Several cases known where TSC halts in C2 too */
386 default: 386 default:
387 return state > ACPI_STATE_C1; 387 return state > ACPI_STATE_C1;
388 } 388 }
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 8daf4793ac32..4a597d8c2f70 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -467,6 +467,17 @@ const char *dmi_get_system_info(int field)
467} 467}
468EXPORT_SYMBOL(dmi_get_system_info); 468EXPORT_SYMBOL(dmi_get_system_info);
469 469
470/**
471 * dmi_name_in_serial - Check if string is in the DMI product serial
472 * information.
473 */
474int dmi_name_in_serial(const char *str)
475{
476 int f = DMI_PRODUCT_SERIAL;
477 if (dmi_ident[f] && strstr(dmi_ident[f], str))
478 return 1;
479 return 0;
480}
470 481
471/** 482/**
472 * dmi_name_in_vendors - Check if string is anywhere in the DMI vendor information. 483 * dmi_name_in_vendors - Check if string is anywhere in the DMI vendor information.
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
index 533923f83f1a..73b0ca061bb5 100644
--- a/drivers/misc/sgi-gru/gruprocfs.c
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -317,7 +317,6 @@ int gru_proc_init(void)
317{ 317{
318 struct proc_entry *p; 318 struct proc_entry *p;
319 319
320 proc_mkdir("sgi_uv", NULL);
321 proc_gru = proc_mkdir("sgi_uv/gru", NULL); 320 proc_gru = proc_mkdir("sgi_uv/gru", NULL);
322 321
323 for (p = proc_files; p->name; p++) 322 for (p = proc_files; p->name; p++)
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index ed1722e50049..7b4cbd5e03e9 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -194,9 +194,10 @@ enum xp_retval {
194 xpGruSendMqError, /* 59: gru send message queue related error */ 194 xpGruSendMqError, /* 59: gru send message queue related error */
195 195
196 xpBadChannelNumber, /* 60: invalid channel number */ 196 xpBadChannelNumber, /* 60: invalid channel number */
197 xpBadMsgType, /* 60: invalid message type */ 197 xpBadMsgType, /* 61: invalid message type */
198 xpBiosError, /* 62: BIOS error */
198 199
199 xpUnknownReason /* 61: unknown reason - must be last in enum */ 200 xpUnknownReason /* 63: unknown reason - must be last in enum */
200}; 201};
201 202
202/* 203/*
@@ -345,6 +346,8 @@ extern unsigned long (*xp_pa) (void *);
345extern enum xp_retval (*xp_remote_memcpy) (unsigned long, const unsigned long, 346extern enum xp_retval (*xp_remote_memcpy) (unsigned long, const unsigned long,
346 size_t); 347 size_t);
347extern int (*xp_cpu_to_nasid) (int); 348extern int (*xp_cpu_to_nasid) (int);
349extern enum xp_retval (*xp_expand_memprotect) (unsigned long, unsigned long);
350extern enum xp_retval (*xp_restrict_memprotect) (unsigned long, unsigned long);
348 351
349extern u64 xp_nofault_PIOR_target; 352extern u64 xp_nofault_PIOR_target;
350extern int xp_nofault_PIOR(void *); 353extern int xp_nofault_PIOR(void *);
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
index 66a1d19e08ad..9a2e77172d94 100644
--- a/drivers/misc/sgi-xp/xp_main.c
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -51,6 +51,13 @@ EXPORT_SYMBOL_GPL(xp_remote_memcpy);
51int (*xp_cpu_to_nasid) (int cpuid); 51int (*xp_cpu_to_nasid) (int cpuid);
52EXPORT_SYMBOL_GPL(xp_cpu_to_nasid); 52EXPORT_SYMBOL_GPL(xp_cpu_to_nasid);
53 53
54enum xp_retval (*xp_expand_memprotect) (unsigned long phys_addr,
55 unsigned long size);
56EXPORT_SYMBOL_GPL(xp_expand_memprotect);
57enum xp_retval (*xp_restrict_memprotect) (unsigned long phys_addr,
58 unsigned long size);
59EXPORT_SYMBOL_GPL(xp_restrict_memprotect);
60
54/* 61/*
55 * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level 62 * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
56 * users of XPC. 63 * users of XPC.
diff --git a/drivers/misc/sgi-xp/xp_sn2.c b/drivers/misc/sgi-xp/xp_sn2.c
index 1440134caf31..fb3ec9d735a9 100644
--- a/drivers/misc/sgi-xp/xp_sn2.c
+++ b/drivers/misc/sgi-xp/xp_sn2.c
@@ -120,6 +120,38 @@ xp_cpu_to_nasid_sn2(int cpuid)
120 return cpuid_to_nasid(cpuid); 120 return cpuid_to_nasid(cpuid);
121} 121}
122 122
123static enum xp_retval
124xp_expand_memprotect_sn2(unsigned long phys_addr, unsigned long size)
125{
126 u64 nasid_array = 0;
127 int ret;
128
129 ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1,
130 &nasid_array);
131 if (ret != 0) {
132 dev_err(xp, "sn_change_memprotect(,, "
133 "SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret);
134 return xpSalError;
135 }
136 return xpSuccess;
137}
138
139static enum xp_retval
140xp_restrict_memprotect_sn2(unsigned long phys_addr, unsigned long size)
141{
142 u64 nasid_array = 0;
143 int ret;
144
145 ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0,
146 &nasid_array);
147 if (ret != 0) {
148 dev_err(xp, "sn_change_memprotect(,, "
149 "SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret);
150 return xpSalError;
151 }
152 return xpSuccess;
153}
154
123enum xp_retval 155enum xp_retval
124xp_init_sn2(void) 156xp_init_sn2(void)
125{ 157{
@@ -132,6 +164,8 @@ xp_init_sn2(void)
132 xp_pa = xp_pa_sn2; 164 xp_pa = xp_pa_sn2;
133 xp_remote_memcpy = xp_remote_memcpy_sn2; 165 xp_remote_memcpy = xp_remote_memcpy_sn2;
134 xp_cpu_to_nasid = xp_cpu_to_nasid_sn2; 166 xp_cpu_to_nasid = xp_cpu_to_nasid_sn2;
167 xp_expand_memprotect = xp_expand_memprotect_sn2;
168 xp_restrict_memprotect = xp_restrict_memprotect_sn2;
135 169
136 return xp_register_nofault_code_sn2(); 170 return xp_register_nofault_code_sn2();
137} 171}
diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c
index d9f7ce2510bc..d238576b26fa 100644
--- a/drivers/misc/sgi-xp/xp_uv.c
+++ b/drivers/misc/sgi-xp/xp_uv.c
@@ -15,6 +15,11 @@
15 15
16#include <linux/device.h> 16#include <linux/device.h>
17#include <asm/uv/uv_hub.h> 17#include <asm/uv/uv_hub.h>
18#if defined CONFIG_X86_64
19#include <asm/uv/bios.h>
20#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
21#include <asm/sn/sn_sal.h>
22#endif
18#include "../sgi-gru/grukservices.h" 23#include "../sgi-gru/grukservices.h"
19#include "xp.h" 24#include "xp.h"
20 25
@@ -49,18 +54,79 @@ xp_cpu_to_nasid_uv(int cpuid)
49 return UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpuid)); 54 return UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpuid));
50} 55}
51 56
57static enum xp_retval
58xp_expand_memprotect_uv(unsigned long phys_addr, unsigned long size)
59{
60 int ret;
61
62#if defined CONFIG_X86_64
63 ret = uv_bios_change_memprotect(phys_addr, size, UV_MEMPROT_ALLOW_RW);
64 if (ret != BIOS_STATUS_SUCCESS) {
65 dev_err(xp, "uv_bios_change_memprotect(,, "
66 "UV_MEMPROT_ALLOW_RW) failed, ret=%d\n", ret);
67 return xpBiosError;
68 }
69
70#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
71 u64 nasid_array;
72
73 ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1,
74 &nasid_array);
75 if (ret != 0) {
76 dev_err(xp, "sn_change_memprotect(,, "
77 "SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret);
78 return xpSalError;
79 }
80#else
81 #error not a supported configuration
82#endif
83 return xpSuccess;
84}
85
86static enum xp_retval
87xp_restrict_memprotect_uv(unsigned long phys_addr, unsigned long size)
88{
89 int ret;
90
91#if defined CONFIG_X86_64
92 ret = uv_bios_change_memprotect(phys_addr, size,
93 UV_MEMPROT_RESTRICT_ACCESS);
94 if (ret != BIOS_STATUS_SUCCESS) {
95 dev_err(xp, "uv_bios_change_memprotect(,, "
96 "UV_MEMPROT_RESTRICT_ACCESS) failed, ret=%d\n", ret);
97 return xpBiosError;
98 }
99
100#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
101 u64 nasid_array;
102
103 ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0,
104 &nasid_array);
105 if (ret != 0) {
106 dev_err(xp, "sn_change_memprotect(,, "
107 "SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret);
108 return xpSalError;
109 }
110#else
111 #error not a supported configuration
112#endif
113 return xpSuccess;
114}
115
52enum xp_retval 116enum xp_retval
53xp_init_uv(void) 117xp_init_uv(void)
54{ 118{
55 BUG_ON(!is_uv()); 119 BUG_ON(!is_uv());
56 120
57 xp_max_npartitions = XP_MAX_NPARTITIONS_UV; 121 xp_max_npartitions = XP_MAX_NPARTITIONS_UV;
58 xp_partition_id = 0; /* !!! not correct value */ 122 xp_partition_id = sn_partition_id;
59 xp_region_size = 0; /* !!! not correct value */ 123 xp_region_size = sn_region_size;
60 124
61 xp_pa = xp_pa_uv; 125 xp_pa = xp_pa_uv;
62 xp_remote_memcpy = xp_remote_memcpy_uv; 126 xp_remote_memcpy = xp_remote_memcpy_uv;
63 xp_cpu_to_nasid = xp_cpu_to_nasid_uv; 127 xp_cpu_to_nasid = xp_cpu_to_nasid_uv;
128 xp_expand_memprotect = xp_expand_memprotect_uv;
129 xp_restrict_memprotect = xp_restrict_memprotect_uv;
64 130
65 return xpSuccess; 131 return xpSuccess;
66} 132}
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 619208d61862..a5bd658c2e83 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -181,6 +181,18 @@ struct xpc_vars_part_sn2 {
181 xpc_nasid_mask_nlongs)) 181 xpc_nasid_mask_nlongs))
182 182
183/* 183/*
184 * Info pertinent to a GRU message queue using a watch list for irq generation.
185 */
186struct xpc_gru_mq_uv {
187 void *address; /* address of GRU message queue */
188 unsigned int order; /* size of GRU message queue as a power of 2 */
189 int irq; /* irq raised when message is received in mq */
190 int mmr_blade; /* blade where watchlist was allocated from */
191 unsigned long mmr_offset; /* offset of irq mmr located on mmr_blade */
192 int watchlist_num; /* number of watchlist allocatd by BIOS */
193};
194
195/*
184 * The activate_mq is used to send/receive GRU messages that affect XPC's 196 * The activate_mq is used to send/receive GRU messages that affect XPC's
185 * heartbeat, partition active state, and channel state. This is UV only. 197 * heartbeat, partition active state, and channel state. This is UV only.
186 */ 198 */
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index b4882ccf6344..73b7fb8de47a 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -553,22 +553,17 @@ static u64 xpc_prot_vec_sn2[MAX_NUMNODES];
553static enum xp_retval 553static enum xp_retval
554xpc_allow_amo_ops_sn2(struct amo *amos_page) 554xpc_allow_amo_ops_sn2(struct amo *amos_page)
555{ 555{
556 u64 nasid_array = 0; 556 enum xp_retval ret = xpSuccess;
557 int ret;
558 557
559 /* 558 /*
560 * On SHUB 1.1, we cannot call sn_change_memprotect() since the BIST 559 * On SHUB 1.1, we cannot call sn_change_memprotect() since the BIST
561 * collides with memory operations. On those systems we call 560 * collides with memory operations. On those systems we call
562 * xpc_allow_amo_ops_shub_wars_1_1_sn2() instead. 561 * xpc_allow_amo_ops_shub_wars_1_1_sn2() instead.
563 */ 562 */
564 if (!enable_shub_wars_1_1()) { 563 if (!enable_shub_wars_1_1())
565 ret = sn_change_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE, 564 ret = xp_expand_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE);
566 SN_MEMPROT_ACCESS_CLASS_1, 565
567 &nasid_array); 566 return ret;
568 if (ret != 0)
569 return xpSalError;
570 }
571 return xpSuccess;
572} 567}
573 568
574/* 569/*
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index 1ac694c01623..91a55b1b1037 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -18,7 +18,15 @@
18#include <linux/interrupt.h> 18#include <linux/interrupt.h>
19#include <linux/delay.h> 19#include <linux/delay.h>
20#include <linux/device.h> 20#include <linux/device.h>
21#include <linux/err.h>
21#include <asm/uv/uv_hub.h> 22#include <asm/uv/uv_hub.h>
23#if defined CONFIG_X86_64
24#include <asm/uv/bios.h>
25#include <asm/uv/uv_irq.h>
26#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
27#include <asm/sn/intr.h>
28#include <asm/sn/sn_sal.h>
29#endif
22#include "../sgi-gru/gru.h" 30#include "../sgi-gru/gru.h"
23#include "../sgi-gru/grukservices.h" 31#include "../sgi-gru/grukservices.h"
24#include "xpc.h" 32#include "xpc.h"
@@ -27,15 +35,17 @@ static atomic64_t xpc_heartbeat_uv;
27static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV); 35static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
28 36
29#define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES) 37#define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES)
30#define XPC_NOTIFY_MSG_SIZE_UV (2 * GRU_CACHE_LINE_BYTES) 38#define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \
39 XPC_ACTIVATE_MSG_SIZE_UV)
40#define XPC_ACTIVATE_IRQ_NAME "xpc_activate"
31 41
32#define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ 42#define XPC_NOTIFY_MSG_SIZE_UV (2 * GRU_CACHE_LINE_BYTES)
33 XPC_ACTIVATE_MSG_SIZE_UV) 43#define XPC_NOTIFY_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \
34#define XPC_NOTIFY_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ 44 XPC_NOTIFY_MSG_SIZE_UV)
35 XPC_NOTIFY_MSG_SIZE_UV) 45#define XPC_NOTIFY_IRQ_NAME "xpc_notify"
36 46
37static void *xpc_activate_mq_uv; 47static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
38static void *xpc_notify_mq_uv; 48static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
39 49
40static int 50static int
41xpc_setup_partitions_sn_uv(void) 51xpc_setup_partitions_sn_uv(void)
@@ -52,62 +62,209 @@ xpc_setup_partitions_sn_uv(void)
52 return 0; 62 return 0;
53} 63}
54 64
55static void * 65static int
56xpc_create_gru_mq_uv(unsigned int mq_size, int cpuid, unsigned int irq, 66xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
67{
68#if defined CONFIG_X86_64
69 mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset);
70 if (mq->irq < 0) {
71 dev_err(xpc_part, "uv_setup_irq() returned error=%d\n",
72 mq->irq);
73 }
74
75#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
76 int mmr_pnode;
77 unsigned long mmr_value;
78
79 if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0)
80 mq->irq = SGI_XPC_ACTIVATE;
81 else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0)
82 mq->irq = SGI_XPC_NOTIFY;
83 else
84 return -EINVAL;
85
86 mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
87 mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq;
88
89 uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value);
90#else
91 #error not a supported configuration
92#endif
93
94 return 0;
95}
96
97static void
98xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
99{
100#if defined CONFIG_X86_64
101 uv_teardown_irq(mq->irq, mq->mmr_blade, mq->mmr_offset);
102
103#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
104 int mmr_pnode;
105 unsigned long mmr_value;
106
107 mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
108 mmr_value = 1UL << 16;
109
110 uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value);
111#else
112 #error not a supported configuration
113#endif
114}
115
116static int
117xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq)
118{
119 int ret;
120
121#if defined CONFIG_X86_64
122 ret = uv_bios_mq_watchlist_alloc(mq->mmr_blade, uv_gpa(mq->address),
123 mq->order, &mq->mmr_offset);
124 if (ret < 0) {
125 dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, "
126 "ret=%d\n", ret);
127 return ret;
128 }
129#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
130 ret = sn_mq_watchlist_alloc(mq->mmr_blade, uv_gpa(mq->address),
131 mq->order, &mq->mmr_offset);
132 if (ret < 0) {
133 dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n",
134 ret);
135 return -EBUSY;
136 }
137#else
138 #error not a supported configuration
139#endif
140
141 mq->watchlist_num = ret;
142 return 0;
143}
144
145static void
146xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq)
147{
148 int ret;
149
150#if defined CONFIG_X86_64
151 ret = uv_bios_mq_watchlist_free(mq->mmr_blade, mq->watchlist_num);
152 BUG_ON(ret != BIOS_STATUS_SUCCESS);
153#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
154 ret = sn_mq_watchlist_free(mq->mmr_blade, mq->watchlist_num);
155 BUG_ON(ret != SALRET_OK);
156#else
157 #error not a supported configuration
158#endif
159}
160
161static struct xpc_gru_mq_uv *
162xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
57 irq_handler_t irq_handler) 163 irq_handler_t irq_handler)
58{ 164{
165 enum xp_retval xp_ret;
59 int ret; 166 int ret;
60 int nid; 167 int nid;
61 int mq_order; 168 int pg_order;
62 struct page *page; 169 struct page *page;
63 void *mq; 170 struct xpc_gru_mq_uv *mq;
171
172 mq = kmalloc(sizeof(struct xpc_gru_mq_uv), GFP_KERNEL);
173 if (mq == NULL) {
174 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
175 "a xpc_gru_mq_uv structure\n");
176 ret = -ENOMEM;
177 goto out_1;
178 }
179
180 pg_order = get_order(mq_size);
181 mq->order = pg_order + PAGE_SHIFT;
182 mq_size = 1UL << mq->order;
183
184 mq->mmr_blade = uv_cpu_to_blade_id(cpu);
64 185
65 nid = cpu_to_node(cpuid); 186 nid = cpu_to_node(cpu);
66 mq_order = get_order(mq_size);
67 page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, 187 page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
68 mq_order); 188 pg_order);
69 if (page == NULL) { 189 if (page == NULL) {
70 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d " 190 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
71 "bytes of memory on nid=%d for GRU mq\n", mq_size, nid); 191 "bytes of memory on nid=%d for GRU mq\n", mq_size, nid);
72 return NULL; 192 ret = -ENOMEM;
193 goto out_2;
73 } 194 }
195 mq->address = page_address(page);
74 196
75 mq = page_address(page); 197 ret = gru_create_message_queue(mq->address, mq_size);
76 ret = gru_create_message_queue(mq, mq_size);
77 if (ret != 0) { 198 if (ret != 0) {
78 dev_err(xpc_part, "gru_create_message_queue() returned " 199 dev_err(xpc_part, "gru_create_message_queue() returned "
79 "error=%d\n", ret); 200 "error=%d\n", ret);
80 free_pages((unsigned long)mq, mq_order); 201 ret = -EINVAL;
81 return NULL; 202 goto out_3;
82 } 203 }
83 204
84 /* !!! Need to do some other things to set up IRQ */ 205 /* enable generation of irq when GRU mq operation occurs to this mq */
206 ret = xpc_gru_mq_watchlist_alloc_uv(mq);
207 if (ret != 0)
208 goto out_3;
85 209
86 ret = request_irq(irq, irq_handler, 0, "xpc", NULL); 210 ret = xpc_get_gru_mq_irq_uv(mq, cpu, irq_name);
211 if (ret != 0)
212 goto out_4;
213
214 ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL);
87 if (ret != 0) { 215 if (ret != 0) {
88 dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n", 216 dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n",
89 irq, ret); 217 mq->irq, ret);
90 free_pages((unsigned long)mq, mq_order); 218 goto out_5;
91 return NULL;
92 } 219 }
93 220
94 /* !!! enable generation of irq when GRU mq op occurs to this mq */ 221 /* allow other partitions to access this GRU mq */
95 222 xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size);
96 /* ??? allow other partitions to access GRU mq? */ 223 if (xp_ret != xpSuccess) {
224 ret = -EACCES;
225 goto out_6;
226 }
97 227
98 return mq; 228 return mq;
229
230 /* something went wrong */
231out_6:
232 free_irq(mq->irq, NULL);
233out_5:
234 xpc_release_gru_mq_irq_uv(mq);
235out_4:
236 xpc_gru_mq_watchlist_free_uv(mq);
237out_3:
238 free_pages((unsigned long)mq->address, pg_order);
239out_2:
240 kfree(mq);
241out_1:
242 return ERR_PTR(ret);
99} 243}
100 244
101static void 245static void
102xpc_destroy_gru_mq_uv(void *mq, unsigned int mq_size, unsigned int irq) 246xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq)
103{ 247{
104 /* ??? disallow other partitions to access GRU mq? */ 248 unsigned int mq_size;
249 int pg_order;
250 int ret;
251
252 /* disallow other partitions to access GRU mq */
253 mq_size = 1UL << mq->order;
254 ret = xp_restrict_memprotect(xp_pa(mq->address), mq_size);
255 BUG_ON(ret != xpSuccess);
105 256
106 /* !!! disable generation of irq when GRU mq op occurs to this mq */ 257 /* unregister irq handler and release mq irq/vector mapping */
258 free_irq(mq->irq, NULL);
259 xpc_release_gru_mq_irq_uv(mq);
107 260
108 free_irq(irq, NULL); 261 /* disable generation of irq when GRU mq op occurs to this mq */
262 xpc_gru_mq_watchlist_free_uv(mq);
109 263
110 free_pages((unsigned long)mq, get_order(mq_size)); 264 pg_order = mq->order - PAGE_SHIFT;
265 free_pages((unsigned long)mq->address, pg_order);
266
267 kfree(mq);
111} 268}
112 269
113static enum xp_retval 270static enum xp_retval
@@ -402,7 +559,10 @@ xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
402 struct xpc_partition *part; 559 struct xpc_partition *part;
403 int wakeup_hb_checker = 0; 560 int wakeup_hb_checker = 0;
404 561
405 while ((msg_hdr = gru_get_next_message(xpc_activate_mq_uv)) != NULL) { 562 while (1) {
563 msg_hdr = gru_get_next_message(xpc_activate_mq_uv->address);
564 if (msg_hdr == NULL)
565 break;
406 566
407 partid = msg_hdr->partid; 567 partid = msg_hdr->partid;
408 if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) { 568 if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
@@ -418,7 +578,7 @@ xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
418 } 578 }
419 } 579 }
420 580
421 gru_free_message(xpc_activate_mq_uv, msg_hdr); 581 gru_free_message(xpc_activate_mq_uv->address, msg_hdr);
422 } 582 }
423 583
424 if (wakeup_hb_checker) 584 if (wakeup_hb_checker)
@@ -482,7 +642,7 @@ xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
482 struct xpc_partition_uv *part_uv = &part->sn.uv; 642 struct xpc_partition_uv *part_uv = &part->sn.uv;
483 643
484 /* 644 /*
485 * !!! Make our side think that the remote parition sent an activate 645 * !!! Make our side think that the remote partition sent an activate
486 * !!! message our way by doing what the activate IRQ handler would 646 * !!! message our way by doing what the activate IRQ handler would
487 * !!! do had one really been sent. 647 * !!! do had one really been sent.
488 */ 648 */
@@ -500,14 +660,39 @@ static enum xp_retval
500xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa, 660xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
501 size_t *len) 661 size_t *len)
502{ 662{
503 /* !!! call the UV version of sn_partition_reserved_page_pa() */ 663 s64 status;
504 return xpUnsupported; 664 enum xp_retval ret;
665
666#if defined CONFIG_X86_64
667 status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa,
668 (u64 *)len);
669 if (status == BIOS_STATUS_SUCCESS)
670 ret = xpSuccess;
671 else if (status == BIOS_STATUS_MORE_PASSES)
672 ret = xpNeedMoreInfo;
673 else
674 ret = xpBiosError;
675
676#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
677 status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len);
678 if (status == SALRET_OK)
679 ret = xpSuccess;
680 else if (status == SALRET_MORE_PASSES)
681 ret = xpNeedMoreInfo;
682 else
683 ret = xpSalError;
684
685#else
686 #error not a supported configuration
687#endif
688
689 return ret;
505} 690}
506 691
507static int 692static int
508xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp) 693xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp)
509{ 694{
510 rp->sn.activate_mq_gpa = uv_gpa(xpc_activate_mq_uv); 695 rp->sn.activate_mq_gpa = uv_gpa(xpc_activate_mq_uv->address);
511 return 0; 696 return 0;
512} 697}
513 698
@@ -1411,22 +1596,18 @@ xpc_init_uv(void)
1411 return -E2BIG; 1596 return -E2BIG;
1412 } 1597 }
1413 1598
1414 /* ??? The cpuid argument's value is 0, is that what we want? */ 1599 xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0,
1415 /* !!! The irq argument's value isn't correct. */ 1600 XPC_ACTIVATE_IRQ_NAME,
1416 xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0, 0,
1417 xpc_handle_activate_IRQ_uv); 1601 xpc_handle_activate_IRQ_uv);
1418 if (xpc_activate_mq_uv == NULL) 1602 if (IS_ERR(xpc_activate_mq_uv))
1419 return -ENOMEM; 1603 return PTR_ERR(xpc_activate_mq_uv);
1420 1604
1421 /* ??? The cpuid argument's value is 0, is that what we want? */ 1605 xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0,
1422 /* !!! The irq argument's value isn't correct. */ 1606 XPC_NOTIFY_IRQ_NAME,
1423 xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0, 0,
1424 xpc_handle_notify_IRQ_uv); 1607 xpc_handle_notify_IRQ_uv);
1425 if (xpc_notify_mq_uv == NULL) { 1608 if (IS_ERR(xpc_notify_mq_uv)) {
1426 /* !!! The irq argument's value isn't correct. */ 1609 xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
1427 xpc_destroy_gru_mq_uv(xpc_activate_mq_uv, 1610 return PTR_ERR(xpc_notify_mq_uv);
1428 XPC_ACTIVATE_MQ_SIZE_UV, 0);
1429 return -ENOMEM;
1430 } 1611 }
1431 1612
1432 return 0; 1613 return 0;
@@ -1435,9 +1616,6 @@ xpc_init_uv(void)
1435void 1616void
1436xpc_exit_uv(void) 1617xpc_exit_uv(void)
1437{ 1618{
1438 /* !!! The irq argument's value isn't correct. */ 1619 xpc_destroy_gru_mq_uv(xpc_notify_mq_uv);
1439 xpc_destroy_gru_mq_uv(xpc_notify_mq_uv, XPC_NOTIFY_MQ_SIZE_UV, 0); 1620 xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
1440
1441 /* !!! The irq argument's value isn't correct. */
1442 xpc_destroy_gru_mq_uv(xpc_activate_mq_uv, XPC_ACTIVATE_MQ_SIZE_UV, 0);
1443} 1621}
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 5f4f85f56cb7..ce0985615133 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -606,27 +606,6 @@ static void __init quirk_ioapic_rmw(struct pci_dev *dev)
606 sis_apic_bug = 1; 606 sis_apic_bug = 1;
607} 607}
608DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SI, PCI_ANY_ID, quirk_ioapic_rmw); 608DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SI, PCI_ANY_ID, quirk_ioapic_rmw);
609
610#define AMD8131_revA0 0x01
611#define AMD8131_revB0 0x11
612#define AMD8131_MISC 0x40
613#define AMD8131_NIOAMODE_BIT 0
614static void quirk_amd_8131_ioapic(struct pci_dev *dev)
615{
616 unsigned char tmp;
617
618 if (nr_ioapics == 0)
619 return;
620
621 if (dev->revision == AMD8131_revA0 || dev->revision == AMD8131_revB0) {
622 dev_info(&dev->dev, "Fixing up AMD8131 IOAPIC mode\n");
623 pci_read_config_byte( dev, AMD8131_MISC, &tmp);
624 tmp &= ~(1 << AMD8131_NIOAMODE_BIT);
625 pci_write_config_byte( dev, AMD8131_MISC, tmp);
626 }
627}
628DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_amd_8131_ioapic);
629DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_amd_8131_ioapic);
630#endif /* CONFIG_X86_IO_APIC */ 609#endif /* CONFIG_X86_IO_APIC */
631 610
632/* 611/*
@@ -1423,6 +1402,155 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x2609, quirk_intel_pcie_pm);
1423DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260a, quirk_intel_pcie_pm); 1402DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260a, quirk_intel_pcie_pm);
1424DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260b, quirk_intel_pcie_pm); 1403DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260b, quirk_intel_pcie_pm);
1425 1404
1405#ifdef CONFIG_X86_IO_APIC
1406/*
1407 * Boot interrupts on some chipsets cannot be turned off. For these chipsets,
1408 * remap the original interrupt in the linux kernel to the boot interrupt, so
1409 * that a PCI device's interrupt handler is installed on the boot interrupt
1410 * line instead.
1411 */
1412static void quirk_reroute_to_boot_interrupts_intel(struct pci_dev *dev)
1413{
1414 if (noioapicquirk || noioapicreroute)
1415 return;
1416
1417 dev->irq_reroute_variant = INTEL_IRQ_REROUTE_VARIANT;
1418
1419 printk(KERN_INFO "PCI quirk: reroute interrupts for 0x%04x:0x%04x\n",
1420 dev->vendor, dev->device);
1421 return;
1422}
1423DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_0, quirk_reroute_to_boot_interrupts_intel);
1424DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_1, quirk_reroute_to_boot_interrupts_intel);
1425DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, quirk_reroute_to_boot_interrupts_intel);
1426DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_0, quirk_reroute_to_boot_interrupts_intel);
1427DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_1, quirk_reroute_to_boot_interrupts_intel);
1428DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXHV, quirk_reroute_to_boot_interrupts_intel);
1429DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_0, quirk_reroute_to_boot_interrupts_intel);
1430DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_1, quirk_reroute_to_boot_interrupts_intel);
1431DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_0, quirk_reroute_to_boot_interrupts_intel);
1432DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_1, quirk_reroute_to_boot_interrupts_intel);
1433DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, quirk_reroute_to_boot_interrupts_intel);
1434DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_0, quirk_reroute_to_boot_interrupts_intel);
1435DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_1, quirk_reroute_to_boot_interrupts_intel);
1436DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXHV, quirk_reroute_to_boot_interrupts_intel);
1437DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_0, quirk_reroute_to_boot_interrupts_intel);
1438DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_1, quirk_reroute_to_boot_interrupts_intel);
1439
1440/*
1441 * On some chipsets we can disable the generation of legacy INTx boot
1442 * interrupts.
1443 */
1444
1445/*
1446 * IO-APIC1 on 6300ESB generates boot interrupts, see intel order no
1447 * 300641-004US, section 5.7.3.
1448 */
1449#define INTEL_6300_IOAPIC_ABAR 0x40
1450#define INTEL_6300_DISABLE_BOOT_IRQ (1<<14)
1451
1452static void quirk_disable_intel_boot_interrupt(struct pci_dev *dev)
1453{
1454 u16 pci_config_word;
1455
1456 if (noioapicquirk)
1457 return;
1458
1459 pci_read_config_word(dev, INTEL_6300_IOAPIC_ABAR, &pci_config_word);
1460 pci_config_word |= INTEL_6300_DISABLE_BOOT_IRQ;
1461 pci_write_config_word(dev, INTEL_6300_IOAPIC_ABAR, pci_config_word);
1462
1463 printk(KERN_INFO "disabled boot interrupt on device 0x%04x:0x%04x\n",
1464 dev->vendor, dev->device);
1465}
1466DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt);
1467DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt);
1468
1469/*
1470 * disable boot interrupts on HT-1000
1471 */
1472#define BC_HT1000_FEATURE_REG 0x64
1473#define BC_HT1000_PIC_REGS_ENABLE (1<<0)
1474#define BC_HT1000_MAP_IDX 0xC00
1475#define BC_HT1000_MAP_DATA 0xC01
1476
1477static void quirk_disable_broadcom_boot_interrupt(struct pci_dev *dev)
1478{
1479 u32 pci_config_dword;
1480 u8 irq;
1481
1482 if (noioapicquirk)
1483 return;
1484
1485 pci_read_config_dword(dev, BC_HT1000_FEATURE_REG, &pci_config_dword);
1486 pci_write_config_dword(dev, BC_HT1000_FEATURE_REG, pci_config_dword |
1487 BC_HT1000_PIC_REGS_ENABLE);
1488
1489 for (irq = 0x10; irq < 0x10 + 32; irq++) {
1490 outb(irq, BC_HT1000_MAP_IDX);
1491 outb(0x00, BC_HT1000_MAP_DATA);
1492 }
1493
1494 pci_write_config_dword(dev, BC_HT1000_FEATURE_REG, pci_config_dword);
1495
1496 printk(KERN_INFO "disabled boot interrupts on PCI device"
1497 "0x%04x:0x%04x\n", dev->vendor, dev->device);
1498}
1499DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt);
1500DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt);
1501
1502/*
1503 * disable boot interrupts on AMD and ATI chipsets
1504 */
1505/*
1506 * NOIOAMODE needs to be disabled to disable "boot interrupts". For AMD 8131
1507 * rev. A0 and B0, NOIOAMODE needs to be disabled anyway to fix IO-APIC mode
1508 * (due to an erratum).
1509 */
1510#define AMD_813X_MISC 0x40
1511#define AMD_813X_NOIOAMODE (1<<0)
1512
1513static void quirk_disable_amd_813x_boot_interrupt(struct pci_dev *dev)
1514{
1515 u32 pci_config_dword;
1516
1517 if (noioapicquirk)
1518 return;
1519
1520 pci_read_config_dword(dev, AMD_813X_MISC, &pci_config_dword);
1521 pci_config_dword &= ~AMD_813X_NOIOAMODE;
1522 pci_write_config_dword(dev, AMD_813X_MISC, pci_config_dword);
1523
1524 printk(KERN_INFO "disabled boot interrupts on PCI device "
1525 "0x%04x:0x%04x\n", dev->vendor, dev->device);
1526}
1527DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_disable_amd_813x_boot_interrupt);
1528DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8132_BRIDGE, quirk_disable_amd_813x_boot_interrupt);
1529
1530#define AMD_8111_PCI_IRQ_ROUTING 0x56
1531
1532static void quirk_disable_amd_8111_boot_interrupt(struct pci_dev *dev)
1533{
1534 u16 pci_config_word;
1535
1536 if (noioapicquirk)
1537 return;
1538
1539 pci_read_config_word(dev, AMD_8111_PCI_IRQ_ROUTING, &pci_config_word);
1540 if (!pci_config_word) {
1541 printk(KERN_INFO "boot interrupts on PCI device 0x%04x:0x%04x "
1542 "already disabled\n",
1543 dev->vendor, dev->device);
1544 return;
1545 }
1546 pci_write_config_word(dev, AMD_8111_PCI_IRQ_ROUTING, 0);
1547 printk(KERN_INFO "disabled boot interrupts on PCI device "
1548 "0x%04x:0x%04x\n", dev->vendor, dev->device);
1549}
1550DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS, quirk_disable_amd_8111_boot_interrupt);
1551DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS, quirk_disable_amd_8111_boot_interrupt);
1552#endif /* CONFIG_X86_IO_APIC */
1553
1426/* 1554/*
1427 * Toshiba TC86C001 IDE controller reports the standard 8-byte BAR0 size 1555 * Toshiba TC86C001 IDE controller reports the standard 8-byte BAR0 size
1428 * but the PIO transfers won't work if BAR0 falls at the odd 8 bytes. 1556 * but the PIO transfers won't work if BAR0 falls at the odd 8 bytes.
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 526c191e84ea..8dc7109d61b7 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -44,13 +44,15 @@
44#include <linux/list.h> 44#include <linux/list.h>
45#include <linux/sysdev.h> 45#include <linux/sysdev.h>
46 46
47#include <asm/xen/hypervisor.h>
48#include <asm/page.h> 47#include <asm/page.h>
49#include <asm/pgalloc.h> 48#include <asm/pgalloc.h>
50#include <asm/pgtable.h> 49#include <asm/pgtable.h>
51#include <asm/uaccess.h> 50#include <asm/uaccess.h>
52#include <asm/tlb.h> 51#include <asm/tlb.h>
53 52
53#include <asm/xen/hypervisor.h>
54#include <asm/xen/hypercall.h>
55#include <xen/interface/xen.h>
54#include <xen/interface/memory.h> 56#include <xen/interface/memory.h>
55#include <xen/xenbus.h> 57#include <xen/xenbus.h>
56#include <xen/features.h> 58#include <xen/features.h>
diff --git a/drivers/xen/features.c b/drivers/xen/features.c
index 0707714e40d6..99eda169c779 100644
--- a/drivers/xen/features.c
+++ b/drivers/xen/features.c
@@ -8,7 +8,11 @@
8#include <linux/types.h> 8#include <linux/types.h>
9#include <linux/cache.h> 9#include <linux/cache.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <asm/xen/hypervisor.h> 11
12#include <asm/xen/hypercall.h>
13
14#include <xen/interface/xen.h>
15#include <xen/interface/version.h>
12#include <xen/features.h> 16#include <xen/features.h>
13 17
14u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; 18u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 06592b9da83c..7d8f531fb8e8 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -40,6 +40,7 @@
40#include <xen/interface/xen.h> 40#include <xen/interface/xen.h>
41#include <xen/page.h> 41#include <xen/page.h>
42#include <xen/grant_table.h> 42#include <xen/grant_table.h>
43#include <asm/xen/hypercall.h>
43 44
44#include <asm/pgtable.h> 45#include <asm/pgtable.h>
45#include <asm/sync_bitops.h> 46#include <asm/sync_bitops.h>
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 12c07c1866b2..4c794d73fb84 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -8,9 +8,17 @@
8#ifdef CONFIG_GENERIC_BUG 8#ifdef CONFIG_GENERIC_BUG
9#ifndef __ASSEMBLY__ 9#ifndef __ASSEMBLY__
10struct bug_entry { 10struct bug_entry {
11#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
11 unsigned long bug_addr; 12 unsigned long bug_addr;
13#else
14 signed int bug_addr_disp;
15#endif
12#ifdef CONFIG_DEBUG_BUGVERBOSE 16#ifdef CONFIG_DEBUG_BUGVERBOSE
17#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
13 const char *file; 18 const char *file;
19#else
20 signed int file_disp;
21#endif
14 unsigned short line; 22 unsigned short line;
15#endif 23#endif
16 unsigned short flags; 24 unsigned short flags;
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index ef87f889ef62..72ebe91005a8 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
129#define move_pte(pte, prot, old_addr, new_addr) (pte) 129#define move_pte(pte, prot, old_addr, new_addr) (pte)
130#endif 130#endif
131 131
132#ifndef pgprot_writecombine
133#define pgprot_writecombine pgprot_noncached
134#endif
135
132/* 136/*
133 * When walking page tables, get the address of the next boundary, 137 * When walking page tables, get the address of the next boundary,
134 * or the end address of the range if that comes earlier. Although no 138 * or the end address of the range if that comes earlier. Although no
@@ -289,6 +293,52 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
289#define arch_flush_lazy_cpu_mode() do {} while (0) 293#define arch_flush_lazy_cpu_mode() do {} while (0)
290#endif 294#endif
291 295
296#ifndef __HAVE_PFNMAP_TRACKING
297/*
298 * Interface that can be used by architecture code to keep track of
299 * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
300 *
301 * track_pfn_vma_new is called when a _new_ pfn mapping is being established
302 * for physical range indicated by pfn and size.
303 */
304static inline int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
305 unsigned long pfn, unsigned long size)
306{
307 return 0;
308}
309
310/*
311 * Interface that can be used by architecture code to keep track of
312 * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
313 *
314 * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
315 * copied through copy_page_range().
316 */
317static inline int track_pfn_vma_copy(struct vm_area_struct *vma)
318{
319 return 0;
320}
321
322/*
323 * Interface that can be used by architecture code to keep track of
324 * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
325 *
326 * untrack_pfn_vma is called while unmapping a pfnmap for a region.
327 * untrack can be called for a specific region indicated by pfn and size or
328 * can be for the entire vma (in which case size can be zero).
329 */
330static inline void untrack_pfn_vma(struct vm_area_struct *vma,
331 unsigned long pfn, unsigned long size)
332{
333}
334#else
335extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
336 unsigned long pfn, unsigned long size);
337extern int track_pfn_vma_copy(struct vm_area_struct *vma);
338extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
339 unsigned long size);
340#endif
341
292#endif /* !__ASSEMBLY__ */ 342#endif /* !__ASSEMBLY__ */
293 343
294#endif /* _ASM_GENERIC_PGTABLE_H */ 344#endif /* _ASM_GENERIC_PGTABLE_H */
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index e5084eb5943a..2bfda178f274 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -44,6 +44,7 @@ extern const struct dmi_device * dmi_find_device(int type, const char *name,
44extern void dmi_scan_machine(void); 44extern void dmi_scan_machine(void);
45extern int dmi_get_year(int field); 45extern int dmi_get_year(int field);
46extern int dmi_name_in_vendors(const char *str); 46extern int dmi_name_in_vendors(const char *str);
47extern int dmi_name_in_serial(const char *str);
47extern int dmi_available; 48extern int dmi_available;
48extern int dmi_walk(void (*decode)(const struct dmi_header *)); 49extern int dmi_walk(void (*decode)(const struct dmi_header *));
49 50
@@ -56,6 +57,7 @@ static inline const struct dmi_device * dmi_find_device(int type, const char *na
56static inline void dmi_scan_machine(void) { return; } 57static inline void dmi_scan_machine(void) { return; }
57static inline int dmi_get_year(int year) { return 0; } 58static inline int dmi_get_year(int year) { return 0; }
58static inline int dmi_name_in_vendors(const char *s) { return 0; } 59static inline int dmi_name_in_vendors(const char *s) { return 0; }
60static inline int dmi_name_in_serial(const char *s) { return 0; }
59#define dmi_available 0 61#define dmi_available 0
60static inline int dmi_walk(void (*decode)(const struct dmi_header *)) 62static inline int dmi_walk(void (*decode)(const struct dmi_header *))
61 { return -1; } 63 { return -1; }
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 17f76fc05173..adc34f2c6eff 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -100,6 +100,10 @@ struct kimage {
100#define KEXEC_TYPE_DEFAULT 0 100#define KEXEC_TYPE_DEFAULT 0
101#define KEXEC_TYPE_CRASH 1 101#define KEXEC_TYPE_CRASH 1
102 unsigned int preserve_context : 1; 102 unsigned int preserve_context : 1;
103
104#ifdef ARCH_HAS_KIMAGE_ARCH
105 struct kimage_arch arch;
106#endif
103}; 107};
104 108
105 109
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 9fd1f859021b..fee9e59649c1 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -64,14 +64,6 @@
64 name: 64 name:
65#endif 65#endif
66 66
67#define KPROBE_ENTRY(name) \
68 .pushsection .kprobes.text, "ax"; \
69 ENTRY(name)
70
71#define KPROBE_END(name) \
72 END(name); \
73 .popsection
74
75#ifndef END 67#ifndef END
76#define END(name) \ 68#define END(name) \
77 .size name, .-name 69 .size name, .-name
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ffee2f743418..d3ddd735e375 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -145,6 +145,23 @@ extern pgprot_t protection_map[16];
145#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ 145#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */
146#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ 146#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */
147 147
148/*
149 * This interface is used by x86 PAT code to identify a pfn mapping that is
150 * linear over entire vma. This is to optimize PAT code that deals with
151 * marking the physical region with a particular prot. This is not for generic
152 * mm use. Note also that this check will not work if the pfn mapping is
153 * linear for a vma starting at physical address 0. In which case PAT code
154 * falls back to slow path of reserving physical range page by page.
155 */
156static inline int is_linear_pfn_mapping(struct vm_area_struct *vma)
157{
158 return ((vma->vm_flags & VM_PFNMAP) && vma->vm_pgoff);
159}
160
161static inline int is_pfn_mapping(struct vm_area_struct *vma)
162{
163 return (vma->vm_flags & VM_PFNMAP);
164}
148 165
149/* 166/*
150 * vm_fault is filled by the the pagefault handler and passed to the vma's 167 * vm_fault is filled by the the pagefault handler and passed to the vma's
@@ -781,6 +798,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
781 struct vm_area_struct *vma); 798 struct vm_area_struct *vma);
782void unmap_mapping_range(struct address_space *mapping, 799void unmap_mapping_range(struct address_space *mapping,
783 loff_t const holebegin, loff_t const holelen, int even_cows); 800 loff_t const holebegin, loff_t const holelen, int even_cows);
801int follow_phys(struct vm_area_struct *vma, unsigned long address,
802 unsigned int flags, unsigned long *prot, resource_size_t *phys);
784int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, 803int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
785 void *buf, int len, int write); 804 void *buf, int len, int write);
786 805
diff --git a/include/linux/pci.h b/include/linux/pci.h
index feb4657bb043..03b0b8c3c81b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -134,6 +134,11 @@ enum pci_dev_flags {
134 PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2, 134 PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2,
135}; 135};
136 136
137enum pci_irq_reroute_variant {
138 INTEL_IRQ_REROUTE_VARIANT = 1,
139 MAX_IRQ_REROUTE_VARIANTS = 3
140};
141
137typedef unsigned short __bitwise pci_bus_flags_t; 142typedef unsigned short __bitwise pci_bus_flags_t;
138enum pci_bus_flags { 143enum pci_bus_flags {
139 PCI_BUS_FLAGS_NO_MSI = (__force pci_bus_flags_t) 1, 144 PCI_BUS_FLAGS_NO_MSI = (__force pci_bus_flags_t) 1,
@@ -218,6 +223,7 @@ struct pci_dev {
218 unsigned int no_msi:1; /* device may not use msi */ 223 unsigned int no_msi:1; /* device may not use msi */
219 unsigned int block_ucfg_access:1; /* userspace config space access is blocked */ 224 unsigned int block_ucfg_access:1; /* userspace config space access is blocked */
220 unsigned int broken_parity_status:1; /* Device generates false positive parity */ 225 unsigned int broken_parity_status:1; /* Device generates false positive parity */
226 unsigned int irq_reroute_variant:2; /* device needs IRQ rerouting variant */
221 unsigned int msi_enabled:1; 227 unsigned int msi_enabled:1;
222 unsigned int msix_enabled:1; 228 unsigned int msix_enabled:1;
223 unsigned int ari_enabled:1; /* ARI forwarding */ 229 unsigned int ari_enabled:1; /* ARI forwarding */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 1800f1d6e40d..b6e694454280 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2304,6 +2304,10 @@
2304#define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 2304#define PCI_DEVICE_ID_INTEL_PXH_0 0x0329
2305#define PCI_DEVICE_ID_INTEL_PXH_1 0x032A 2305#define PCI_DEVICE_ID_INTEL_PXH_1 0x032A
2306#define PCI_DEVICE_ID_INTEL_PXHV 0x032C 2306#define PCI_DEVICE_ID_INTEL_PXHV 0x032C
2307#define PCI_DEVICE_ID_INTEL_80332_0 0x0330
2308#define PCI_DEVICE_ID_INTEL_80332_1 0x0332
2309#define PCI_DEVICE_ID_INTEL_80333_0 0x0370
2310#define PCI_DEVICE_ID_INTEL_80333_1 0x0372
2307#define PCI_DEVICE_ID_INTEL_82375 0x0482 2311#define PCI_DEVICE_ID_INTEL_82375 0x0482
2308#define PCI_DEVICE_ID_INTEL_82424 0x0483 2312#define PCI_DEVICE_ID_INTEL_82424 0x0483
2309#define PCI_DEVICE_ID_INTEL_82378 0x0484 2313#define PCI_DEVICE_ID_INTEL_82378 0x0484
@@ -2376,6 +2380,7 @@
2376#define PCI_DEVICE_ID_INTEL_ESB_4 0x25a4 2380#define PCI_DEVICE_ID_INTEL_ESB_4 0x25a4
2377#define PCI_DEVICE_ID_INTEL_ESB_5 0x25a6 2381#define PCI_DEVICE_ID_INTEL_ESB_5 0x25a6
2378#define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab 2382#define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab
2383#define PCI_DEVICE_ID_INTEL_ESB_10 0x25ac
2379#define PCI_DEVICE_ID_INTEL_82820_HB 0x2500 2384#define PCI_DEVICE_ID_INTEL_82820_HB 0x2500
2380#define PCI_DEVICE_ID_INTEL_82820_UP_HB 0x2501 2385#define PCI_DEVICE_ID_INTEL_82820_UP_HB 0x2501
2381#define PCI_DEVICE_ID_INTEL_82850_HB 0x2530 2386#define PCI_DEVICE_ID_INTEL_82850_HB 0x2530
diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h
index 919b5bdcb2bd..2090881c3650 100644
--- a/include/xen/interface/event_channel.h
+++ b/include/xen/interface/event_channel.h
@@ -9,6 +9,8 @@
9#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ 9#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
10#define __XEN_PUBLIC_EVENT_CHANNEL_H__ 10#define __XEN_PUBLIC_EVENT_CHANNEL_H__
11 11
12#include <xen/interface/xen.h>
13
12typedef uint32_t evtchn_port_t; 14typedef uint32_t evtchn_port_t;
13DEFINE_GUEST_HANDLE(evtchn_port_t); 15DEFINE_GUEST_HANDLE(evtchn_port_t);
14 16
diff --git a/lib/bug.c b/lib/bug.c
index bfeafd60ee9f..300e41afbf97 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -5,6 +5,8 @@
5 5
6 CONFIG_BUG - emit BUG traps. Nothing happens without this. 6 CONFIG_BUG - emit BUG traps. Nothing happens without this.
7 CONFIG_GENERIC_BUG - enable this code. 7 CONFIG_GENERIC_BUG - enable this code.
8 CONFIG_GENERIC_BUG_RELATIVE_POINTERS - use 32-bit pointers relative to
9 the containing struct bug_entry for bug_addr and file.
8 CONFIG_DEBUG_BUGVERBOSE - emit full file+line information for each BUG 10 CONFIG_DEBUG_BUGVERBOSE - emit full file+line information for each BUG
9 11
10 CONFIG_BUG and CONFIG_DEBUG_BUGVERBOSE are potentially user-settable 12 CONFIG_BUG and CONFIG_DEBUG_BUGVERBOSE are potentially user-settable
@@ -43,6 +45,15 @@
43 45
44extern const struct bug_entry __start___bug_table[], __stop___bug_table[]; 46extern const struct bug_entry __start___bug_table[], __stop___bug_table[];
45 47
48static inline unsigned long bug_addr(const struct bug_entry *bug)
49{
50#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
51 return bug->bug_addr;
52#else
53 return (unsigned long)bug + bug->bug_addr_disp;
54#endif
55}
56
46#ifdef CONFIG_MODULES 57#ifdef CONFIG_MODULES
47static LIST_HEAD(module_bug_list); 58static LIST_HEAD(module_bug_list);
48 59
@@ -55,7 +66,7 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr)
55 unsigned i; 66 unsigned i;
56 67
57 for (i = 0; i < mod->num_bugs; ++i, ++bug) 68 for (i = 0; i < mod->num_bugs; ++i, ++bug)
58 if (bugaddr == bug->bug_addr) 69 if (bugaddr == bug_addr(bug))
59 return bug; 70 return bug;
60 } 71 }
61 return NULL; 72 return NULL;
@@ -108,7 +119,7 @@ const struct bug_entry *find_bug(unsigned long bugaddr)
108 const struct bug_entry *bug; 119 const struct bug_entry *bug;
109 120
110 for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) 121 for (bug = __start___bug_table; bug < __stop___bug_table; ++bug)
111 if (bugaddr == bug->bug_addr) 122 if (bugaddr == bug_addr(bug))
112 return bug; 123 return bug;
113 124
114 return module_find_bug(bugaddr); 125 return module_find_bug(bugaddr);
@@ -133,7 +144,11 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
133 144
134 if (bug) { 145 if (bug) {
135#ifdef CONFIG_DEBUG_BUGVERBOSE 146#ifdef CONFIG_DEBUG_BUGVERBOSE
147#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
136 file = bug->file; 148 file = bug->file;
149#else
150 file = (const char *)bug + bug->file_disp;
151#endif
137 line = bug->line; 152 line = bug->line;
138#endif 153#endif
139 warning = (bug->flags & BUGFLAG_WARNING) != 0; 154 warning = (bug->flags & BUGFLAG_WARNING) != 0;
diff --git a/mm/memory.c b/mm/memory.c
index 164951c47305..f01b7eed6e16 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -669,6 +669,16 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
669 if (is_vm_hugetlb_page(vma)) 669 if (is_vm_hugetlb_page(vma))
670 return copy_hugetlb_page_range(dst_mm, src_mm, vma); 670 return copy_hugetlb_page_range(dst_mm, src_mm, vma);
671 671
672 if (unlikely(is_pfn_mapping(vma))) {
673 /*
674 * We do not free on error cases below as remove_vma
675 * gets called on error from higher level routine
676 */
677 ret = track_pfn_vma_copy(vma);
678 if (ret)
679 return ret;
680 }
681
672 /* 682 /*
673 * We need to invalidate the secondary MMU mappings only when 683 * We need to invalidate the secondary MMU mappings only when
674 * there could be a permission downgrade on the ptes of the 684 * there could be a permission downgrade on the ptes of the
@@ -915,6 +925,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
915 if (vma->vm_flags & VM_ACCOUNT) 925 if (vma->vm_flags & VM_ACCOUNT)
916 *nr_accounted += (end - start) >> PAGE_SHIFT; 926 *nr_accounted += (end - start) >> PAGE_SHIFT;
917 927
928 if (unlikely(is_pfn_mapping(vma)))
929 untrack_pfn_vma(vma, 0, 0);
930
918 while (start != end) { 931 while (start != end) {
919 if (!tlb_start_valid) { 932 if (!tlb_start_valid) {
920 tlb_start = start; 933 tlb_start = start;
@@ -1430,6 +1443,7 @@ out:
1430int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, 1443int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1431 unsigned long pfn) 1444 unsigned long pfn)
1432{ 1445{
1446 int ret;
1433 /* 1447 /*
1434 * Technically, architectures with pte_special can avoid all these 1448 * Technically, architectures with pte_special can avoid all these
1435 * restrictions (same for remap_pfn_range). However we would like 1449 * restrictions (same for remap_pfn_range). However we would like
@@ -1444,7 +1458,15 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1444 1458
1445 if (addr < vma->vm_start || addr >= vma->vm_end) 1459 if (addr < vma->vm_start || addr >= vma->vm_end)
1446 return -EFAULT; 1460 return -EFAULT;
1447 return insert_pfn(vma, addr, pfn, vma->vm_page_prot); 1461 if (track_pfn_vma_new(vma, vma->vm_page_prot, pfn, PAGE_SIZE))
1462 return -EINVAL;
1463
1464 ret = insert_pfn(vma, addr, pfn, vma->vm_page_prot);
1465
1466 if (ret)
1467 untrack_pfn_vma(vma, pfn, PAGE_SIZE);
1468
1469 return ret;
1448} 1470}
1449EXPORT_SYMBOL(vm_insert_pfn); 1471EXPORT_SYMBOL(vm_insert_pfn);
1450 1472
@@ -1575,14 +1597,17 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1575 * behaviour that some programs depend on. We mark the "original" 1597 * behaviour that some programs depend on. We mark the "original"
1576 * un-COW'ed pages by matching them up with "vma->vm_pgoff". 1598 * un-COW'ed pages by matching them up with "vma->vm_pgoff".
1577 */ 1599 */
1578 if (is_cow_mapping(vma->vm_flags)) { 1600 if (addr == vma->vm_start && end == vma->vm_end)
1579 if (addr != vma->vm_start || end != vma->vm_end)
1580 return -EINVAL;
1581 vma->vm_pgoff = pfn; 1601 vma->vm_pgoff = pfn;
1582 } 1602 else if (is_cow_mapping(vma->vm_flags))
1603 return -EINVAL;
1583 1604
1584 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; 1605 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
1585 1606
1607 err = track_pfn_vma_new(vma, prot, pfn, PAGE_ALIGN(size));
1608 if (err)
1609 return -EINVAL;
1610
1586 BUG_ON(addr >= end); 1611 BUG_ON(addr >= end);
1587 pfn -= addr >> PAGE_SHIFT; 1612 pfn -= addr >> PAGE_SHIFT;
1588 pgd = pgd_offset(mm, addr); 1613 pgd = pgd_offset(mm, addr);
@@ -1594,6 +1619,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1594 if (err) 1619 if (err)
1595 break; 1620 break;
1596 } while (pgd++, addr = next, addr != end); 1621 } while (pgd++, addr = next, addr != end);
1622
1623 if (err)
1624 untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size));
1625
1597 return err; 1626 return err;
1598} 1627}
1599EXPORT_SYMBOL(remap_pfn_range); 1628EXPORT_SYMBOL(remap_pfn_range);
@@ -2865,9 +2894,9 @@ int in_gate_area_no_task(unsigned long addr)
2865#endif /* __HAVE_ARCH_GATE_AREA */ 2894#endif /* __HAVE_ARCH_GATE_AREA */
2866 2895
2867#ifdef CONFIG_HAVE_IOREMAP_PROT 2896#ifdef CONFIG_HAVE_IOREMAP_PROT
2868static resource_size_t follow_phys(struct vm_area_struct *vma, 2897int follow_phys(struct vm_area_struct *vma,
2869 unsigned long address, unsigned int flags, 2898 unsigned long address, unsigned int flags,
2870 unsigned long *prot) 2899 unsigned long *prot, resource_size_t *phys)
2871{ 2900{
2872 pgd_t *pgd; 2901 pgd_t *pgd;
2873 pud_t *pud; 2902 pud_t *pud;
@@ -2876,24 +2905,26 @@ static resource_size_t follow_phys(struct vm_area_struct *vma,
2876 spinlock_t *ptl; 2905 spinlock_t *ptl;
2877 resource_size_t phys_addr = 0; 2906 resource_size_t phys_addr = 0;
2878 struct mm_struct *mm = vma->vm_mm; 2907 struct mm_struct *mm = vma->vm_mm;
2908 int ret = -EINVAL;
2879 2909
2880 VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP))); 2910 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
2911 goto out;
2881 2912
2882 pgd = pgd_offset(mm, address); 2913 pgd = pgd_offset(mm, address);
2883 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) 2914 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
2884 goto no_page_table; 2915 goto out;
2885 2916
2886 pud = pud_offset(pgd, address); 2917 pud = pud_offset(pgd, address);
2887 if (pud_none(*pud) || unlikely(pud_bad(*pud))) 2918 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
2888 goto no_page_table; 2919 goto out;
2889 2920
2890 pmd = pmd_offset(pud, address); 2921 pmd = pmd_offset(pud, address);
2891 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) 2922 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
2892 goto no_page_table; 2923 goto out;
2893 2924
2894 /* We cannot handle huge page PFN maps. Luckily they don't exist. */ 2925 /* We cannot handle huge page PFN maps. Luckily they don't exist. */
2895 if (pmd_huge(*pmd)) 2926 if (pmd_huge(*pmd))
2896 goto no_page_table; 2927 goto out;
2897 2928
2898 ptep = pte_offset_map_lock(mm, pmd, address, &ptl); 2929 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
2899 if (!ptep) 2930 if (!ptep)
@@ -2908,13 +2939,13 @@ static resource_size_t follow_phys(struct vm_area_struct *vma,
2908 phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */ 2939 phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */
2909 2940
2910 *prot = pgprot_val(pte_pgprot(pte)); 2941 *prot = pgprot_val(pte_pgprot(pte));
2942 *phys = phys_addr;
2943 ret = 0;
2911 2944
2912unlock: 2945unlock:
2913 pte_unmap_unlock(ptep, ptl); 2946 pte_unmap_unlock(ptep, ptl);
2914out: 2947out:
2915 return phys_addr; 2948 return ret;
2916no_page_table:
2917 return 0;
2918} 2949}
2919 2950
2920int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, 2951int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
@@ -2925,12 +2956,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
2925 void *maddr; 2956 void *maddr;
2926 int offset = addr & (PAGE_SIZE-1); 2957 int offset = addr & (PAGE_SIZE-1);
2927 2958
2928 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) 2959 if (follow_phys(vma, addr, write, &prot, &phys_addr))
2929 return -EINVAL;
2930
2931 phys_addr = follow_phys(vma, addr, write, &prot);
2932
2933 if (!phys_addr)
2934 return -EINVAL; 2960 return -EINVAL;
2935 2961
2936 maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot); 2962 maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 90cb67a5417c..54a9f87e5162 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1462,6 +1462,15 @@ static int __init procswaps_init(void)
1462__initcall(procswaps_init); 1462__initcall(procswaps_init);
1463#endif /* CONFIG_PROC_FS */ 1463#endif /* CONFIG_PROC_FS */
1464 1464
1465#ifdef MAX_SWAPFILES_CHECK
1466static int __init max_swapfiles_check(void)
1467{
1468 MAX_SWAPFILES_CHECK();
1469 return 0;
1470}
1471late_initcall(max_swapfiles_check);
1472#endif
1473
1465/* 1474/*
1466 * Written 01/25/92 by Simmule Turner, heavily changed by Linus. 1475 * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
1467 * 1476 *