aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig48
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/boot/compressed/Makefile9
-rw-r--r--arch/x86/boot/compressed/eboot.c14
-rw-r--r--arch/x86/boot/header.S26
-rw-r--r--arch/x86/boot/main.c18
-rw-r--r--arch/x86/boot/tools/build.c24
-rw-r--r--arch/x86/ia32/ia32_signal.c4
-rw-r--r--arch/x86/ia32/ia32entry.S9
-rw-r--r--arch/x86/ia32/sys_ia32.c23
-rw-r--r--arch/x86/include/asm/apic.h23
-rw-r--r--arch/x86/include/asm/apicdef.h2
-rw-r--r--arch/x86/include/asm/asm.h38
-rw-r--r--arch/x86/include/asm/atomic64_32.h10
-rw-r--r--arch/x86/include/asm/boot.h2
-rw-r--r--arch/x86/include/asm/bootparam.h3
-rw-r--r--arch/x86/include/asm/compat.h2
-rw-r--r--arch/x86/include/asm/current.h2
-rw-r--r--arch/x86/include/asm/desc.h1
-rw-r--r--arch/x86/include/asm/device.h4
-rw-r--r--arch/x86/include/asm/dma-mapping.h9
-rw-r--r--arch/x86/include/asm/fpu-internal.h6
-rw-r--r--arch/x86/include/asm/ftrace.h3
-rw-r--r--arch/x86/include/asm/hardirq.h9
-rw-r--r--arch/x86/include/asm/ia32.h6
-rw-r--r--arch/x86/include/asm/io_apic.h35
-rw-r--r--arch/x86/include/asm/irq_regs.h4
-rw-r--r--arch/x86/include/asm/irq_remapping.h118
-rw-r--r--arch/x86/include/asm/kbdleds.h17
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h5
-rw-r--r--arch/x86/include/asm/mmu_context.h12
-rw-r--r--arch/x86/include/asm/mmzone_32.h6
-rw-r--r--arch/x86/include/asm/msr-index.h5
-rw-r--r--arch/x86/include/asm/msr.h9
-rw-r--r--arch/x86/include/asm/nmi.h22
-rw-r--r--arch/x86/include/asm/nops.h4
-rw-r--r--arch/x86/include/asm/page_32_types.h4
-rw-r--r--arch/x86/include/asm/page_64_types.h4
-rw-r--r--arch/x86/include/asm/paravirt.h6
-rw-r--r--arch/x86/include/asm/percpu.h24
-rw-r--r--arch/x86/include/asm/perf_event.h12
-rw-r--r--arch/x86/include/asm/processor.h5
-rw-r--r--arch/x86/include/asm/segment.h4
-rw-r--r--arch/x86/include/asm/smp.h15
-rw-r--r--arch/x86/include/asm/spinlock.h2
-rw-r--r--arch/x86/include/asm/stackprotector.h4
-rw-r--r--arch/x86/include/asm/stat.h21
-rw-r--r--arch/x86/include/asm/syscall.h27
-rw-r--r--arch/x86/include/asm/thread_info.h23
-rw-r--r--arch/x86/include/asm/tlbflush.h10
-rw-r--r--arch/x86/include/asm/topology.h38
-rw-r--r--arch/x86/include/asm/uaccess.h25
-rw-r--r--arch/x86/include/asm/x86_init.h9
-rw-r--r--arch/x86/include/asm/xsave.h10
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/apic/apic.c42
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_noop.c1
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c1
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c1
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c383
-rw-r--r--arch/x86/kernel/apic/numaq_32.c1
-rw-r--r--arch/x86/kernel/apic/probe_32.c1
-rw-r--r--arch/x86/kernel/apic/summit_32.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/check.c20
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/x86/kernel/cpu/match.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c55
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c65
-rw-r--r--arch/x86/kernel/cpu/perf_event.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c570
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c6
-rw-r--r--arch/x86/kernel/dumpstack.c23
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c2
-rw-r--r--arch/x86/kernel/entry_32.S47
-rw-r--r--arch/x86/kernel/entry_64.S16
-rw-r--r--arch/x86/kernel/ftrace.c500
-rw-r--r--arch/x86/kernel/head_32.S223
-rw-r--r--arch/x86/kernel/head_64.S80
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/init_task.c42
-rw-r--r--arch/x86/kernel/irq_32.c8
-rw-r--r--arch/x86/kernel/kprobes.c4
-rw-r--r--arch/x86/kernel/microcode_core.c9
-rw-r--r--arch/x86/kernel/nmi.c95
-rw-r--r--arch/x86/kernel/nmi_selftest.c13
-rw-r--r--arch/x86/kernel/paravirt.c12
-rw-r--r--arch/x86/kernel/pci-calgary_64.c8
-rw-r--r--arch/x86/kernel/process.c73
-rw-r--r--arch/x86/kernel/process_32.c11
-rw-r--r--arch/x86/kernel/process_64.c19
-rw-r--r--arch/x86/kernel/ptrace.c7
-rw-r--r--arch/x86/kernel/reboot.c277
-rw-r--r--arch/x86/kernel/setup.c10
-rw-r--r--arch/x86/kernel/smp.c100
-rw-r--r--arch/x86/kernel/smpboot.c191
-rw-r--r--arch/x86/kernel/test_rodata.c10
-rw-r--r--arch/x86/kernel/traps.c8
-rw-r--r--arch/x86/kernel/vsmp_64.c40
-rw-r--r--arch/x86/kernel/x86_init.c8
-rw-r--r--arch/x86/kernel/xsave.c2
-rw-r--r--arch/x86/lib/checksum_32.S9
-rw-r--r--arch/x86/lib/copy_user_64.S63
-rw-r--r--arch/x86/lib/copy_user_nocache_64.S50
-rw-r--r--arch/x86/lib/csum-copy_64.S16
-rw-r--r--arch/x86/lib/getuser.S9
-rw-r--r--arch/x86/lib/putuser.S12
-rw-r--r--arch/x86/lib/usercopy.c20
-rw-r--r--arch/x86/lib/usercopy_32.c232
-rw-r--r--arch/x86/mm/extable.c142
-rw-r--r--arch/x86/mm/init.c21
-rw-r--r--arch/x86/mm/init_64.c23
-rw-r--r--arch/x86/mm/numa_emulation.c8
-rw-r--r--arch/x86/mm/tlb.c16
-rw-r--r--arch/x86/pci/Makefile2
-rw-r--r--arch/x86/pci/acpi.c128
-rw-r--r--arch/x86/pci/amd_bus.c91
-rw-r--r--arch/x86/pci/broadcom_bus.c12
-rw-r--r--arch/x86/pci/bus_numa.c69
-rw-r--r--arch/x86/pci/bus_numa.h18
-rw-r--r--arch/x86/pci/common.c43
-rw-r--r--arch/x86/pci/fixup.c17
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/pci/sta2x11-fixup.c366
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-sci.c43
-rw-r--r--arch/x86/platform/visws/visws_quirks.c2
-rw-r--r--arch/x86/tools/.gitignore1
-rw-r--r--arch/x86/tools/Makefile4
-rw-r--r--arch/x86/tools/relocs.c (renamed from arch/x86/boot/compressed/relocs.c)242
-rw-r--r--arch/x86/um/asm/elf.h42
-rw-r--r--arch/x86/um/asm/ptrace.h34
-rw-r--r--arch/x86/um/asm/ptrace_32.h23
-rw-r--r--arch/x86/um/asm/ptrace_64.h26
-rw-r--r--arch/x86/um/checksum_32.S9
-rw-r--r--arch/x86/um/shared/sysdep/ptrace.h67
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_32.h92
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_64.h101
-rw-r--r--arch/x86/um/signal.c29
-rw-r--r--arch/x86/um/sys_call_table_64.c1
-rw-r--r--arch/x86/um/syscalls_32.c12
-rw-r--r--arch/x86/um/sysrq_32.c8
-rw-r--r--arch/x86/um/sysrq_64.c8
-rw-r--r--arch/x86/um/tls_32.c2
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/apic.c33
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--arch/x86/xen/mmu.c4
-rw-r--r--arch/x86/xen/smp.c19
-rw-r--r--arch/x86/xen/xen-asm_32.S6
-rw-r--r--arch/x86/xen/xen-ops.h4
161 files changed, 3657 insertions, 2345 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c9866b0b77d8..4d37072c498a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -12,6 +12,7 @@ config X86_32
12 12
13config X86_64 13config X86_64
14 def_bool 64BIT 14 def_bool 64BIT
15 select X86_DEV_DMA_OPS
15 16
16### Arch settings 17### Arch settings
17config X86 18config X86
@@ -40,7 +41,6 @@ config X86
40 select HAVE_FUNCTION_GRAPH_TRACER 41 select HAVE_FUNCTION_GRAPH_TRACER
41 select HAVE_FUNCTION_GRAPH_FP_TEST 42 select HAVE_FUNCTION_GRAPH_FP_TEST
42 select HAVE_FUNCTION_TRACE_MCOUNT_TEST 43 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
43 select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
44 select HAVE_SYSCALL_TRACEPOINTS 44 select HAVE_SYSCALL_TRACEPOINTS
45 select HAVE_KVM 45 select HAVE_KVM
46 select HAVE_ARCH_KGDB 46 select HAVE_ARCH_KGDB
@@ -77,11 +77,14 @@ config X86
77 select GENERIC_CLOCKEVENTS_MIN_ADJUST 77 select GENERIC_CLOCKEVENTS_MIN_ADJUST
78 select IRQ_FORCED_THREADING 78 select IRQ_FORCED_THREADING
79 select USE_GENERIC_SMP_HELPERS if SMP 79 select USE_GENERIC_SMP_HELPERS if SMP
80 select HAVE_BPF_JIT if (X86_64 && NET) 80 select HAVE_BPF_JIT if X86_64
81 select CLKEVT_I8253 81 select CLKEVT_I8253
82 select ARCH_HAVE_NMI_SAFE_CMPXCHG 82 select ARCH_HAVE_NMI_SAFE_CMPXCHG
83 select GENERIC_IOMAP 83 select GENERIC_IOMAP
84 select DCACHE_WORD_ACCESS 84 select DCACHE_WORD_ACCESS
85 select GENERIC_SMP_IDLE_THREAD
86 select HAVE_ARCH_SECCOMP_FILTER
87 select BUILDTIME_EXTABLE_SORT
85 88
86config INSTRUCTION_DECODER 89config INSTRUCTION_DECODER
87 def_bool (KPROBES || PERF_EVENTS) 90 def_bool (KPROBES || PERF_EVENTS)
@@ -160,9 +163,6 @@ config RWSEM_GENERIC_SPINLOCK
160config RWSEM_XCHGADD_ALGORITHM 163config RWSEM_XCHGADD_ALGORITHM
161 def_bool X86_XADD 164 def_bool X86_XADD
162 165
163config ARCH_HAS_CPU_IDLE_WAIT
164 def_bool y
165
166config GENERIC_CALIBRATE_DELAY 166config GENERIC_CALIBRATE_DELAY
167 def_bool y 167 def_bool y
168 168
@@ -328,6 +328,7 @@ config X86_EXTENDED_PLATFORM
328 NUMAQ (IBM/Sequent) 328 NUMAQ (IBM/Sequent)
329 RDC R-321x SoC 329 RDC R-321x SoC
330 SGI 320/540 (Visual Workstation) 330 SGI 320/540 (Visual Workstation)
331 STA2X11-based (e.g. Northville)
331 Summit/EXA (IBM x440) 332 Summit/EXA (IBM x440)
332 Unisys ES7000 IA32 series 333 Unisys ES7000 IA32 series
333 Moorestown MID devices 334 Moorestown MID devices
@@ -374,6 +375,7 @@ config X86_VSMP
374 select PARAVIRT 375 select PARAVIRT
375 depends on X86_64 && PCI 376 depends on X86_64 && PCI
376 depends on X86_EXTENDED_PLATFORM 377 depends on X86_EXTENDED_PLATFORM
378 depends on SMP
377 ---help--- 379 ---help---
378 Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is 380 Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is
379 supposed to run on these EM64T-based machines. Only choose this option 381 supposed to run on these EM64T-based machines. Only choose this option
@@ -460,10 +462,10 @@ config X86_32_NON_STANDARD
460 depends on X86_32 && SMP 462 depends on X86_32 && SMP
461 depends on X86_EXTENDED_PLATFORM 463 depends on X86_EXTENDED_PLATFORM
462 ---help--- 464 ---help---
463 This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default 465 This option compiles in the NUMAQ, Summit, bigsmp, ES7000,
464 subarchitectures. It is intended for a generic binary kernel. 466 STA2X11, default subarchitectures. It is intended for a generic
465 if you select them all, kernel will probe it one by one. and will 467 binary kernel. If you select them all, kernel will probe it
466 fallback to default. 468 one by one and will fallback to default.
467 469
468# Alphabetically sorted list of Non standard 32 bit platforms 470# Alphabetically sorted list of Non standard 32 bit platforms
469 471
@@ -503,6 +505,22 @@ config X86_VISWS
503 A kernel compiled for the Visual Workstation will run on general 505 A kernel compiled for the Visual Workstation will run on general
504 PCs as well. See <file:Documentation/sgi-visws.txt> for details. 506 PCs as well. See <file:Documentation/sgi-visws.txt> for details.
505 507
508config STA2X11
509 bool "STA2X11 Companion Chip Support"
510 depends on X86_32_NON_STANDARD && PCI
511 select X86_DEV_DMA_OPS
512 select X86_DMA_REMAP
513 select SWIOTLB
514 select MFD_STA2X11
515 select ARCH_REQUIRE_GPIOLIB
516 default n
517 ---help---
518 This adds support for boards based on the STA2X11 IO-Hub,
519 a.k.a. "ConneXt". The chip is used in place of the standard
520 PC chipset, so all "standard" peripherals are missing. If this
521 option is selected the kernel will still be able to boot on
522 standard PC machines.
523
506config X86_SUMMIT 524config X86_SUMMIT
507 bool "Summit/EXA (IBM x440)" 525 bool "Summit/EXA (IBM x440)"
508 depends on X86_32_NON_STANDARD 526 depends on X86_32_NON_STANDARD
@@ -1239,10 +1257,6 @@ config NODES_SHIFT
1239 Specify the maximum number of NUMA Nodes available on the target 1257 Specify the maximum number of NUMA Nodes available on the target
1240 system. Increases memory reserved to accommodate various tables. 1258 system. Increases memory reserved to accommodate various tables.
1241 1259
1242config HAVE_ARCH_BOOTMEM
1243 def_bool y
1244 depends on X86_32 && NUMA
1245
1246config HAVE_ARCH_ALLOC_REMAP 1260config HAVE_ARCH_ALLOC_REMAP
1247 def_bool y 1261 def_bool y
1248 depends on X86_32 && NUMA 1262 depends on X86_32 && NUMA
@@ -2215,6 +2229,14 @@ config HAVE_TEXT_POKE_SMP
2215 bool 2229 bool
2216 select STOP_MACHINE if SMP 2230 select STOP_MACHINE if SMP
2217 2231
2232config X86_DEV_DMA_OPS
2233 bool
2234 depends on X86_64 || STA2X11
2235
2236config X86_DMA_REMAP
2237 bool
2238 depends on STA2X11
2239
2218source "net/Kconfig" 2240source "net/Kconfig"
2219 2241
2220source "drivers/Kconfig" 2242source "drivers/Kconfig"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 41a7237606a3..dc611a40a336 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -134,6 +134,9 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
134KBUILD_CFLAGS += $(mflags-y) 134KBUILD_CFLAGS += $(mflags-y)
135KBUILD_AFLAGS += $(mflags-y) 135KBUILD_AFLAGS += $(mflags-y)
136 136
137archscripts:
138 $(Q)$(MAKE) $(build)=arch/x86/tools relocs
139
137### 140###
138# Syscall table generation 141# Syscall table generation
139 142
@@ -146,7 +149,6 @@ archheaders:
146head-y := arch/x86/kernel/head_$(BITS).o 149head-y := arch/x86/kernel/head_$(BITS).o
147head-y += arch/x86/kernel/head$(BITS).o 150head-y += arch/x86/kernel/head$(BITS).o
148head-y += arch/x86/kernel/head.o 151head-y += arch/x86/kernel/head.o
149head-y += arch/x86/kernel/init_task.o
150 152
151libs-y += arch/x86/lib/ 153libs-y += arch/x86/lib/
152 154
@@ -203,6 +205,7 @@ archclean:
203 $(Q)rm -rf $(objtree)/arch/i386 205 $(Q)rm -rf $(objtree)/arch/i386
204 $(Q)rm -rf $(objtree)/arch/x86_64 206 $(Q)rm -rf $(objtree)/arch/x86_64
205 $(Q)$(MAKE) $(clean)=$(boot) 207 $(Q)$(MAKE) $(clean)=$(boot)
208 $(Q)$(MAKE) $(clean)=arch/x86/tools
206 209
207define archhelp 210define archhelp
208 echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' 211 echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index fd55a2ff3ad8..e398bb5d63bb 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -40,13 +40,12 @@ OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
40$(obj)/vmlinux.bin: vmlinux FORCE 40$(obj)/vmlinux.bin: vmlinux FORCE
41 $(call if_changed,objcopy) 41 $(call if_changed,objcopy)
42 42
43targets += vmlinux.bin.all vmlinux.relocs
43 44
44targets += vmlinux.bin.all vmlinux.relocs relocs 45CMD_RELOCS = arch/x86/tools/relocs
45hostprogs-$(CONFIG_X86_NEED_RELOCS) += relocs
46
47quiet_cmd_relocs = RELOCS $@ 46quiet_cmd_relocs = RELOCS $@
48 cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $< 47 cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $<
49$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE 48$(obj)/vmlinux.relocs: vmlinux FORCE
50 $(call if_changed,relocs) 49 $(call if_changed,relocs)
51 50
52vmlinux.bin.all-y := $(obj)/vmlinux.bin 51vmlinux.bin.all-y := $(obj)/vmlinux.bin
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 0cdfc0d2315e..2c14e76bb4c7 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -904,11 +904,19 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
904 904
905 memset(boot_params, 0x0, 0x4000); 905 memset(boot_params, 0x0, 0x4000);
906 906
907 /* Copy first two sectors to boot_params */
908 memcpy(boot_params, image->image_base, 1024);
909
910 hdr = &boot_params->hdr; 907 hdr = &boot_params->hdr;
911 908
909 /* Copy the second sector to boot_params */
910 memcpy(&hdr->jump, image->image_base + 512, 512);
911
912 /*
913 * Fill out some of the header fields ourselves because the
914 * EFI firmware loader doesn't load the first sector.
915 */
916 hdr->root_flags = 1;
917 hdr->vid_mode = 0xffff;
918 hdr->boot_flag = 0xAA55;
919
912 /* 920 /*
913 * The EFI firmware loader could have placed the kernel image 921 * The EFI firmware loader could have placed the kernel image
914 * anywhere in memory, but the kernel has various restrictions 922 * anywhere in memory, but the kernel has various restrictions
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index f1bbeeb09148..8bbea6aa40d9 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -147,7 +147,7 @@ optional_header:
147 # Filled in by build.c 147 # Filled in by build.c
148 .long 0x0000 # AddressOfEntryPoint 148 .long 0x0000 # AddressOfEntryPoint
149 149
150 .long 0x0000 # BaseOfCode 150 .long 0x0200 # BaseOfCode
151#ifdef CONFIG_X86_32 151#ifdef CONFIG_X86_32
152 .long 0 # data 152 .long 0 # data
153#endif 153#endif
@@ -189,7 +189,7 @@ extra_header_fields:
189 .quad 0 # SizeOfHeapCommit 189 .quad 0 # SizeOfHeapCommit
190#endif 190#endif
191 .long 0 # LoaderFlags 191 .long 0 # LoaderFlags
192 .long 0x1 # NumberOfRvaAndSizes 192 .long 0x6 # NumberOfRvaAndSizes
193 193
194 .quad 0 # ExportTable 194 .quad 0 # ExportTable
195 .quad 0 # ImportTable 195 .quad 0 # ImportTable
@@ -217,18 +217,17 @@ section_table:
217 217
218 # 218 #
219 # The EFI application loader requires a relocation section 219 # The EFI application loader requires a relocation section
220 # because EFI applications are relocatable and not having 220 # because EFI applications must be relocatable. But since
221 # this section seems to confuse it. But since we don't need 221 # we don't need the loader to fixup any relocs for us, we
222 # the loader to fixup any relocs for us just fill it with a 222 # just create an empty (zero-length) .reloc section header.
223 # single dummy reloc.
224 # 223 #
225 .ascii ".reloc" 224 .ascii ".reloc"
226 .byte 0 225 .byte 0
227 .byte 0 226 .byte 0
228 .long reloc_end - reloc_start 227 .long 0
229 .long reloc_start 228 .long 0
230 .long reloc_end - reloc_start # SizeOfRawData 229 .long 0 # SizeOfRawData
231 .long reloc_start # PointerToRawData 230 .long 0 # PointerToRawData
232 .long 0 # PointerToRelocations 231 .long 0 # PointerToRelocations
233 .long 0 # PointerToLineNumbers 232 .long 0 # PointerToLineNumbers
234 .word 0 # NumberOfRelocations 233 .word 0 # NumberOfRelocations
@@ -469,10 +468,3 @@ setup_corrupt:
469 468
470 .data 469 .data
471dummy: .long 0 470dummy: .long 0
472
473 .section .reloc
474reloc_start:
475 .long dummy - reloc_start
476 .long 10
477 .word 0
478reloc_end:
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 40358c8905be..cf6083d444f4 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -57,14 +57,20 @@ static void copy_boot_params(void)
57} 57}
58 58
59/* 59/*
60 * Set the keyboard repeat rate to maximum. Unclear why this 60 * Query the keyboard lock status as given by the BIOS, and
61 * set the keyboard repeat rate to maximum. Unclear why the latter
61 * is done here; this might be possible to kill off as stale code. 62 * is done here; this might be possible to kill off as stale code.
62 */ 63 */
63static void keyboard_set_repeat(void) 64static void keyboard_init(void)
64{ 65{
65 struct biosregs ireg; 66 struct biosregs ireg, oreg;
66 initregs(&ireg); 67 initregs(&ireg);
67 ireg.ax = 0x0305; 68
69 ireg.ah = 0x02; /* Get keyboard status */
70 intcall(0x16, &ireg, &oreg);
71 boot_params.kbd_status = oreg.al;
72
73 ireg.ax = 0x0305; /* Set keyboard repeat rate */
68 intcall(0x16, &ireg, NULL); 74 intcall(0x16, &ireg, NULL);
69} 75}
70 76
@@ -151,8 +157,8 @@ void main(void)
151 /* Detect memory layout */ 157 /* Detect memory layout */
152 detect_memory(); 158 detect_memory();
153 159
154 /* Set keyboard repeat rate (why?) */ 160 /* Set keyboard repeat rate (why?) and query the lock flags */
155 keyboard_set_repeat(); 161 keyboard_init();
156 162
157 /* Query MCA information */ 163 /* Query MCA information */
158 query_mca(); 164 query_mca();
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 24443a332083..3f61f6e2b46f 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -198,12 +198,19 @@ int main(int argc, char ** argv)
198 198
199 pe_header = get_unaligned_le32(&buf[0x3c]); 199 pe_header = get_unaligned_le32(&buf[0x3c]);
200 200
201 /* Size of code */
202 put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]);
203
204 /* Size of image */ 201 /* Size of image */
205 put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); 202 put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
206 203
204 /*
205 * Subtract the size of the first section (512 bytes) which
206 * includes the header and .reloc section. The remaining size
207 * is that of the .text section.
208 */
209 file_sz -= 512;
210
211 /* Size of code */
212 put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]);
213
207#ifdef CONFIG_X86_32 214#ifdef CONFIG_X86_32
208 /* 215 /*
209 * Address of entry point. 216 * Address of entry point.
@@ -216,8 +223,14 @@ int main(int argc, char ** argv)
216 /* .text size */ 223 /* .text size */
217 put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]); 224 put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]);
218 225
226 /* .text vma */
227 put_unaligned_le32(0x200, &buf[pe_header + 0xb4]);
228
219 /* .text size of initialised data */ 229 /* .text size of initialised data */
220 put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]); 230 put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]);
231
232 /* .text file offset */
233 put_unaligned_le32(0x200, &buf[pe_header + 0xbc]);
221#else 234#else
222 /* 235 /*
223 * Address of entry point. startup_32 is at the beginning and 236 * Address of entry point. startup_32 is at the beginning and
@@ -231,9 +244,14 @@ int main(int argc, char ** argv)
231 /* .text size */ 244 /* .text size */
232 put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]); 245 put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]);
233 246
247 /* .text vma */
248 put_unaligned_le32(0x200, &buf[pe_header + 0xc4]);
249
234 /* .text size of initialised data */ 250 /* .text size of initialised data */
235 put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]); 251 put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]);
236 252
253 /* .text file offset */
254 put_unaligned_le32(0x200, &buf[pe_header + 0xcc]);
237#endif /* CONFIG_X86_32 */ 255#endif /* CONFIG_X86_32 */
238#endif /* CONFIG_EFI_STUB */ 256#endif /* CONFIG_EFI_STUB */
239 257
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index a69245ba27e3..0b3f2354f6aa 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -67,6 +67,10 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
67 switch (from->si_code >> 16) { 67 switch (from->si_code >> 16) {
68 case __SI_FAULT >> 16: 68 case __SI_FAULT >> 16:
69 break; 69 break;
70 case __SI_SYS >> 16:
71 put_user_ex(from->si_syscall, &to->si_syscall);
72 put_user_ex(from->si_arch, &to->si_arch);
73 break;
70 case __SI_CHLD >> 16: 74 case __SI_CHLD >> 16:
71 if (ia32) { 75 if (ia32) {
72 put_user_ex(from->si_utime, &to->si_utime); 76 put_user_ex(from->si_utime, &to->si_utime);
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index e3e734005e19..20e5f7ba0e6b 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -13,6 +13,7 @@
13#include <asm/thread_info.h> 13#include <asm/thread_info.h>
14#include <asm/segment.h> 14#include <asm/segment.h>
15#include <asm/irqflags.h> 15#include <asm/irqflags.h>
16#include <asm/asm.h>
16#include <linux/linkage.h> 17#include <linux/linkage.h>
17#include <linux/err.h> 18#include <linux/err.h>
18 19
@@ -146,9 +147,7 @@ ENTRY(ia32_sysenter_target)
146 /* no need to do an access_ok check here because rbp has been 147 /* no need to do an access_ok check here because rbp has been
147 32bit zero extended */ 148 32bit zero extended */
1481: movl (%rbp),%ebp 1491: movl (%rbp),%ebp
149 .section __ex_table,"a" 150 _ASM_EXTABLE(1b,ia32_badarg)
150 .quad 1b,ia32_badarg
151 .previous
152 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) 151 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
153 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 152 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
154 CFI_REMEMBER_STATE 153 CFI_REMEMBER_STATE
@@ -303,9 +302,7 @@ ENTRY(ia32_cstar_target)
303 32bit zero extended */ 302 32bit zero extended */
304 /* hardware stack frame is complete now */ 303 /* hardware stack frame is complete now */
3051: movl (%r8),%r9d 3041: movl (%r8),%r9d
306 .section __ex_table,"a" 305 _ASM_EXTABLE(1b,ia32_badarg)
307 .quad 1b,ia32_badarg
308 .previous
309 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) 306 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
310 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 307 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
311 CFI_REMEMBER_STATE 308 CFI_REMEMBER_STATE
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index aec2202a596c..edca9c0a79cc 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -287,11 +287,6 @@ asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
287 return ret; 287 return ret;
288} 288}
289 289
290asmlinkage long sys32_alarm(unsigned int seconds)
291{
292 return alarm_setitimer(seconds);
293}
294
295asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, 290asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
296 int options) 291 int options)
297{ 292{
@@ -300,11 +295,6 @@ asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
300 295
301/* 32-bit timeval and related flotsam. */ 296/* 32-bit timeval and related flotsam. */
302 297
303asmlinkage long sys32_sysfs(int option, u32 arg1, u32 arg2)
304{
305 return sys_sysfs(option, arg1, arg2);
306}
307
308asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, 298asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid,
309 struct compat_timespec __user *interval) 299 struct compat_timespec __user *interval)
310{ 300{
@@ -375,19 +365,6 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
375} 365}
376 366
377 367
378asmlinkage long sys32_personality(unsigned long personality)
379{
380 int ret;
381
382 if (personality(current->personality) == PER_LINUX32 &&
383 personality == PER_LINUX)
384 personality = PER_LINUX32;
385 ret = sys_personality(personality);
386 if (ret == PER_LINUX32)
387 ret = PER_LINUX;
388 return ret;
389}
390
391asmlinkage long sys32_sendfile(int out_fd, int in_fd, 368asmlinkage long sys32_sendfile(int out_fd, int in_fd,
392 compat_off_t __user *offset, s32 count) 369 compat_off_t __user *offset, s32 count)
393{ 370{
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index d85410171260..eaff4790ed96 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -138,6 +138,11 @@ static inline void native_apic_msr_write(u32 reg, u32 v)
138 wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0); 138 wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
139} 139}
140 140
141static inline void native_apic_msr_eoi_write(u32 reg, u32 v)
142{
143 wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0);
144}
145
141static inline u32 native_apic_msr_read(u32 reg) 146static inline u32 native_apic_msr_read(u32 reg)
142{ 147{
143 u64 msr; 148 u64 msr;
@@ -351,6 +356,14 @@ struct apic {
351 /* apic ops */ 356 /* apic ops */
352 u32 (*read)(u32 reg); 357 u32 (*read)(u32 reg);
353 void (*write)(u32 reg, u32 v); 358 void (*write)(u32 reg, u32 v);
359 /*
360 * ->eoi_write() has the same signature as ->write().
361 *
362 * Drivers can support both ->eoi_write() and ->write() by passing the same
363 * callback value. Kernel can override ->eoi_write() and fall back
364 * on write for EOI.
365 */
366 void (*eoi_write)(u32 reg, u32 v);
354 u64 (*icr_read)(void); 367 u64 (*icr_read)(void);
355 void (*icr_write)(u32 low, u32 high); 368 void (*icr_write)(u32 low, u32 high);
356 void (*wait_icr_idle)(void); 369 void (*wait_icr_idle)(void);
@@ -426,6 +439,11 @@ static inline void apic_write(u32 reg, u32 val)
426 apic->write(reg, val); 439 apic->write(reg, val);
427} 440}
428 441
442static inline void apic_eoi(void)
443{
444 apic->eoi_write(APIC_EOI, APIC_EOI_ACK);
445}
446
429static inline u64 apic_icr_read(void) 447static inline u64 apic_icr_read(void)
430{ 448{
431 return apic->icr_read(); 449 return apic->icr_read();
@@ -450,6 +468,7 @@ static inline u32 safe_apic_wait_icr_idle(void)
450 468
451static inline u32 apic_read(u32 reg) { return 0; } 469static inline u32 apic_read(u32 reg) { return 0; }
452static inline void apic_write(u32 reg, u32 val) { } 470static inline void apic_write(u32 reg, u32 val) { }
471static inline void apic_eoi(void) { }
453static inline u64 apic_icr_read(void) { return 0; } 472static inline u64 apic_icr_read(void) { return 0; }
454static inline void apic_icr_write(u32 low, u32 high) { } 473static inline void apic_icr_write(u32 low, u32 high) { }
455static inline void apic_wait_icr_idle(void) { } 474static inline void apic_wait_icr_idle(void) { }
@@ -463,9 +482,7 @@ static inline void ack_APIC_irq(void)
463 * ack_APIC_irq() actually gets compiled as a single instruction 482 * ack_APIC_irq() actually gets compiled as a single instruction
464 * ... yummie. 483 * ... yummie.
465 */ 484 */
466 485 apic_eoi();
467 /* Docs say use 0 for future compatibility */
468 apic_write(APIC_EOI, 0);
469} 486}
470 487
471static inline unsigned default_get_apic_id(unsigned long x) 488static inline unsigned default_get_apic_id(unsigned long x)
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 134bba00df09..c46bb99d5fb2 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -37,7 +37,7 @@
37#define APIC_ARBPRI_MASK 0xFFu 37#define APIC_ARBPRI_MASK 0xFFu
38#define APIC_PROCPRI 0xA0 38#define APIC_PROCPRI 0xA0
39#define APIC_EOI 0xB0 39#define APIC_EOI 0xB0
40#define APIC_EIO_ACK 0x0 40#define APIC_EOI_ACK 0x0 /* Docs say 0 for future compat. */
41#define APIC_RRR 0xC0 41#define APIC_RRR 0xC0
42#define APIC_LDR 0xD0 42#define APIC_LDR 0xD0
43#define APIC_LDR_MASK (0xFFu << 24) 43#define APIC_LDR_MASK (0xFFu << 24)
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 9412d6558c88..1c2d247f65ce 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -4,11 +4,9 @@
4#ifdef __ASSEMBLY__ 4#ifdef __ASSEMBLY__
5# define __ASM_FORM(x) x 5# define __ASM_FORM(x) x
6# define __ASM_FORM_COMMA(x) x, 6# define __ASM_FORM_COMMA(x) x,
7# define __ASM_EX_SEC .section __ex_table, "a"
8#else 7#else
9# define __ASM_FORM(x) " " #x " " 8# define __ASM_FORM(x) " " #x " "
10# define __ASM_FORM_COMMA(x) " " #x "," 9# define __ASM_FORM_COMMA(x) " " #x ","
11# define __ASM_EX_SEC " .section __ex_table,\"a\"\n"
12#endif 10#endif
13 11
14#ifdef CONFIG_X86_32 12#ifdef CONFIG_X86_32
@@ -42,17 +40,33 @@
42 40
43/* Exception table entry */ 41/* Exception table entry */
44#ifdef __ASSEMBLY__ 42#ifdef __ASSEMBLY__
45# define _ASM_EXTABLE(from,to) \ 43# define _ASM_EXTABLE(from,to) \
46 __ASM_EX_SEC ; \ 44 .pushsection "__ex_table","a" ; \
47 _ASM_ALIGN ; \ 45 .balign 8 ; \
48 _ASM_PTR from , to ; \ 46 .long (from) - . ; \
49 .previous 47 .long (to) - . ; \
48 .popsection
49
50# define _ASM_EXTABLE_EX(from,to) \
51 .pushsection "__ex_table","a" ; \
52 .balign 8 ; \
53 .long (from) - . ; \
54 .long (to) - . + 0x7ffffff0 ; \
55 .popsection
50#else 56#else
51# define _ASM_EXTABLE(from,to) \ 57# define _ASM_EXTABLE(from,to) \
52 __ASM_EX_SEC \ 58 " .pushsection \"__ex_table\",\"a\"\n" \
53 _ASM_ALIGN "\n" \ 59 " .balign 8\n" \
54 _ASM_PTR #from "," #to "\n" \ 60 " .long (" #from ") - .\n" \
55 " .previous\n" 61 " .long (" #to ") - .\n" \
62 " .popsection\n"
63
64# define _ASM_EXTABLE_EX(from,to) \
65 " .pushsection \"__ex_table\",\"a\"\n" \
66 " .balign 8\n" \
67 " .long (" #from ") - .\n" \
68 " .long (" #to ") - . + 0x7ffffff0\n" \
69 " .popsection\n"
56#endif 70#endif
57 71
58#endif /* _ASM_X86_ASM_H */ 72#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 198119910da5..b154de75c90c 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -63,7 +63,7 @@ ATOMIC64_DECL(add_unless);
63 63
64/** 64/**
65 * atomic64_cmpxchg - cmpxchg atomic64 variable 65 * atomic64_cmpxchg - cmpxchg atomic64 variable
66 * @p: pointer to type atomic64_t 66 * @v: pointer to type atomic64_t
67 * @o: expected value 67 * @o: expected value
68 * @n: new value 68 * @n: new value
69 * 69 *
@@ -98,7 +98,7 @@ static inline long long atomic64_xchg(atomic64_t *v, long long n)
98/** 98/**
99 * atomic64_set - set atomic64 variable 99 * atomic64_set - set atomic64 variable
100 * @v: pointer to type atomic64_t 100 * @v: pointer to type atomic64_t
101 * @n: value to assign 101 * @i: value to assign
102 * 102 *
103 * Atomically sets the value of @v to @n. 103 * Atomically sets the value of @v to @n.
104 */ 104 */
@@ -200,7 +200,7 @@ static inline long long atomic64_sub(long long i, atomic64_t *v)
200 * atomic64_sub_and_test - subtract value from variable and test result 200 * atomic64_sub_and_test - subtract value from variable and test result
201 * @i: integer value to subtract 201 * @i: integer value to subtract
202 * @v: pointer to type atomic64_t 202 * @v: pointer to type atomic64_t
203 * 203 *
204 * Atomically subtracts @i from @v and returns 204 * Atomically subtracts @i from @v and returns
205 * true if the result is zero, or false for all 205 * true if the result is zero, or false for all
206 * other cases. 206 * other cases.
@@ -224,9 +224,9 @@ static inline void atomic64_inc(atomic64_t *v)
224 224
225/** 225/**
226 * atomic64_dec - decrement atomic64 variable 226 * atomic64_dec - decrement atomic64 variable
227 * @ptr: pointer to type atomic64_t 227 * @v: pointer to type atomic64_t
228 * 228 *
229 * Atomically decrements @ptr by 1. 229 * Atomically decrements @v by 1.
230 */ 230 */
231static inline void atomic64_dec(atomic64_t *v) 231static inline void atomic64_dec(atomic64_t *v)
232{ 232{
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 5e1a2eef3e7c..b13fe63bdc59 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -19,7 +19,7 @@
19#ifdef CONFIG_X86_64 19#ifdef CONFIG_X86_64
20#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT 20#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT
21#else 21#else
22#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_ORDER) 22#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_SIZE_ORDER)
23#endif 23#endif
24#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) 24#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)
25 25
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index 2f90c51cc49d..eb45aa6b1f27 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -112,7 +112,8 @@ struct boot_params {
112 __u8 e820_entries; /* 0x1e8 */ 112 __u8 e820_entries; /* 0x1e8 */
113 __u8 eddbuf_entries; /* 0x1e9 */ 113 __u8 eddbuf_entries; /* 0x1e9 */
114 __u8 edd_mbr_sig_buf_entries; /* 0x1ea */ 114 __u8 edd_mbr_sig_buf_entries; /* 0x1ea */
115 __u8 _pad6[6]; /* 0x1eb */ 115 __u8 kbd_status; /* 0x1eb */
116 __u8 _pad6[5]; /* 0x1ec */
116 struct setup_header hdr; /* setup header */ /* 0x1f1 */ 117 struct setup_header hdr; /* setup header */ /* 0x1f1 */
117 __u8 _pad7[0x290-0x1f1-sizeof(struct setup_header)]; 118 __u8 _pad7[0x290-0x1f1-sizeof(struct setup_header)];
118 __u32 edd_mbr_sig_buffer[EDD_MBR_SIG_MAX]; /* 0x290 */ 119 __u32 edd_mbr_sig_buffer[EDD_MBR_SIG_MAX]; /* 0x290 */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index d6805798d6fc..fedf32b73e65 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -229,7 +229,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
229 sp = task_pt_regs(current)->sp; 229 sp = task_pt_regs(current)->sp;
230 } else { 230 } else {
231 /* -128 for the x32 ABI redzone */ 231 /* -128 for the x32 ABI redzone */
232 sp = percpu_read(old_rsp) - 128; 232 sp = this_cpu_read(old_rsp) - 128;
233 } 233 }
234 234
235 return (void __user *)round_down(sp - len, 16); 235 return (void __user *)round_down(sp - len, 16);
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 4d447b732d82..9476c04ee635 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -11,7 +11,7 @@ DECLARE_PER_CPU(struct task_struct *, current_task);
11 11
12static __always_inline struct task_struct *get_current(void) 12static __always_inline struct task_struct *get_current(void)
13{ 13{
14 return percpu_read_stable(current_task); 14 return this_cpu_read_stable(current_task);
15} 15}
16 16
17#define current get_current() 17#define current get_current()
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index e95822d683f4..8bf1c06070d5 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -6,6 +6,7 @@
6#include <asm/mmu.h> 6#include <asm/mmu.h>
7 7
8#include <linux/smp.h> 8#include <linux/smp.h>
9#include <linux/percpu.h>
9 10
10static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info) 11static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info)
11{ 12{
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 63a2a03d7d51..93e1c55f14ab 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -5,8 +5,8 @@ struct dev_archdata {
5#ifdef CONFIG_ACPI 5#ifdef CONFIG_ACPI
6 void *acpi_handle; 6 void *acpi_handle;
7#endif 7#endif
8#ifdef CONFIG_X86_64 8#ifdef CONFIG_X86_DEV_DMA_OPS
9struct dma_map_ops *dma_ops; 9 struct dma_map_ops *dma_ops;
10#endif 10#endif
11#if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU) 11#if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
12 void *iommu; /* hook for IOMMU specific extension */ 12 void *iommu; /* hook for IOMMU specific extension */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 4b4331d71935..61c0bd25845a 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -30,7 +30,7 @@ extern struct dma_map_ops *dma_ops;
30 30
31static inline struct dma_map_ops *get_dma_ops(struct device *dev) 31static inline struct dma_map_ops *get_dma_ops(struct device *dev)
32{ 32{
33#ifdef CONFIG_X86_32 33#ifndef CONFIG_X86_DEV_DMA_OPS
34 return dma_ops; 34 return dma_ops;
35#else 35#else
36 if (unlikely(!dev) || !dev->archdata.dma_ops) 36 if (unlikely(!dev) || !dev->archdata.dma_ops)
@@ -62,6 +62,12 @@ extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
62 dma_addr_t *dma_addr, gfp_t flag, 62 dma_addr_t *dma_addr, gfp_t flag,
63 struct dma_attrs *attrs); 63 struct dma_attrs *attrs);
64 64
65#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */
66extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
67extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
68extern phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
69#else
70
65static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) 71static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
66{ 72{
67 if (!dev->dma_mask) 73 if (!dev->dma_mask)
@@ -79,6 +85,7 @@ static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
79{ 85{
80 return daddr; 86 return daddr;
81} 87}
88#endif /* CONFIG_X86_DMA_REMAP */
82 89
83static inline void 90static inline void
84dma_cache_sync(struct device *dev, void *vaddr, size_t size, 91dma_cache_sync(struct device *dev, void *vaddr, size_t size,
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 4fa88154e4de..75f4c6d6a331 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -290,14 +290,14 @@ static inline int __thread_has_fpu(struct task_struct *tsk)
290static inline void __thread_clear_has_fpu(struct task_struct *tsk) 290static inline void __thread_clear_has_fpu(struct task_struct *tsk)
291{ 291{
292 tsk->thread.fpu.has_fpu = 0; 292 tsk->thread.fpu.has_fpu = 0;
293 percpu_write(fpu_owner_task, NULL); 293 this_cpu_write(fpu_owner_task, NULL);
294} 294}
295 295
296/* Must be paired with a 'clts' before! */ 296/* Must be paired with a 'clts' before! */
297static inline void __thread_set_has_fpu(struct task_struct *tsk) 297static inline void __thread_set_has_fpu(struct task_struct *tsk)
298{ 298{
299 tsk->thread.fpu.has_fpu = 1; 299 tsk->thread.fpu.has_fpu = 1;
300 percpu_write(fpu_owner_task, tsk); 300 this_cpu_write(fpu_owner_task, tsk);
301} 301}
302 302
303/* 303/*
@@ -344,7 +344,7 @@ typedef struct { int preload; } fpu_switch_t;
344 */ 344 */
345static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) 345static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
346{ 346{
347 return new == percpu_read_stable(fpu_owner_task) && 347 return new == this_cpu_read_stable(fpu_owner_task) &&
348 cpu == new->thread.fpu.last_cpu; 348 cpu == new->thread.fpu.last_cpu;
349} 349}
350 350
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 268c783ab1c0..18d9005d9e4f 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -34,6 +34,7 @@
34 34
35#ifndef __ASSEMBLY__ 35#ifndef __ASSEMBLY__
36extern void mcount(void); 36extern void mcount(void);
37extern int modifying_ftrace_code;
37 38
38static inline unsigned long ftrace_call_adjust(unsigned long addr) 39static inline unsigned long ftrace_call_adjust(unsigned long addr)
39{ 40{
@@ -50,6 +51,8 @@ struct dyn_arch_ftrace {
50 /* No extra data needed for x86 */ 51 /* No extra data needed for x86 */
51}; 52};
52 53
54int ftrace_int3_handler(struct pt_regs *regs);
55
53#endif /* CONFIG_DYNAMIC_FTRACE */ 56#endif /* CONFIG_DYNAMIC_FTRACE */
54#endif /* __ASSEMBLY__ */ 57#endif /* __ASSEMBLY__ */
55#endif /* CONFIG_FUNCTION_TRACER */ 58#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 382f75d735f3..d3895dbf4ddb 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -35,14 +35,15 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
35 35
36#define __ARCH_IRQ_STAT 36#define __ARCH_IRQ_STAT
37 37
38#define inc_irq_stat(member) percpu_inc(irq_stat.member) 38#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
39 39
40#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) 40#define local_softirq_pending() this_cpu_read(irq_stat.__softirq_pending)
41 41
42#define __ARCH_SET_SOFTIRQ_PENDING 42#define __ARCH_SET_SOFTIRQ_PENDING
43 43
44#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) 44#define set_softirq_pending(x) \
45#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) 45 this_cpu_write(irq_stat.__softirq_pending, (x))
46#define or_softirq_pending(x) this_cpu_or(irq_stat.__softirq_pending, (x))
46 47
47extern void ack_bad_irq(unsigned int irq); 48extern void ack_bad_irq(unsigned int irq);
48 49
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index ee52760549f0..b04cbdb138cd 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -144,6 +144,12 @@ typedef struct compat_siginfo {
144 int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ 144 int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
145 int _fd; 145 int _fd;
146 } _sigpoll; 146 } _sigpoll;
147
148 struct {
149 unsigned int _call_addr; /* calling insn */
150 int _syscall; /* triggering system call number */
151 unsigned int _arch; /* AUDIT_ARCH_* of syscall */
152 } _sigsys;
147 } _sifields; 153 } _sifields;
148} compat_siginfo_t; 154} compat_siginfo_t;
149 155
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 2c4943de5150..73d8c5398ea9 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -5,7 +5,7 @@
5#include <asm/mpspec.h> 5#include <asm/mpspec.h>
6#include <asm/apicdef.h> 6#include <asm/apicdef.h>
7#include <asm/irq_vectors.h> 7#include <asm/irq_vectors.h>
8 8#include <asm/x86_init.h>
9/* 9/*
10 * Intel IO-APIC support for SMP and UP systems. 10 * Intel IO-APIC support for SMP and UP systems.
11 * 11 *
@@ -21,15 +21,6 @@
21#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15) 21#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15)
22#define IO_APIC_REDIR_MASKED (1 << 16) 22#define IO_APIC_REDIR_MASKED (1 << 16)
23 23
24struct io_apic_ops {
25 void (*init) (void);
26 unsigned int (*read) (unsigned int apic, unsigned int reg);
27 void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
28 void (*modify)(unsigned int apic, unsigned int reg, unsigned int value);
29};
30
31void __init set_io_apic_ops(const struct io_apic_ops *);
32
33/* 24/*
34 * The structure of the IO-APIC: 25 * The structure of the IO-APIC:
35 */ 26 */
@@ -156,7 +147,6 @@ struct io_apic_irq_attr;
156extern int io_apic_set_pci_routing(struct device *dev, int irq, 147extern int io_apic_set_pci_routing(struct device *dev, int irq,
157 struct io_apic_irq_attr *irq_attr); 148 struct io_apic_irq_attr *irq_attr);
158void setup_IO_APIC_irq_extra(u32 gsi); 149void setup_IO_APIC_irq_extra(u32 gsi);
159extern void ioapic_and_gsi_init(void);
160extern void ioapic_insert_resources(void); 150extern void ioapic_insert_resources(void);
161 151
162int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); 152int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
@@ -185,12 +175,29 @@ extern void mp_save_irq(struct mpc_intsrc *m);
185 175
186extern void disable_ioapic_support(void); 176extern void disable_ioapic_support(void);
187 177
178extern void __init native_io_apic_init_mappings(void);
179extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg);
180extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val);
181extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
182
183static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
184{
185 return x86_io_apic_ops.read(apic, reg);
186}
187
188static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
189{
190 x86_io_apic_ops.write(apic, reg, value);
191}
192static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
193{
194 x86_io_apic_ops.modify(apic, reg, value);
195}
188#else /* !CONFIG_X86_IO_APIC */ 196#else /* !CONFIG_X86_IO_APIC */
189 197
190#define io_apic_assign_pci_irqs 0 198#define io_apic_assign_pci_irqs 0
191#define setup_ioapic_ids_from_mpc x86_init_noop 199#define setup_ioapic_ids_from_mpc x86_init_noop
192static const int timer_through_8259 = 0; 200static const int timer_through_8259 = 0;
193static inline void ioapic_and_gsi_init(void) { }
194static inline void ioapic_insert_resources(void) { } 201static inline void ioapic_insert_resources(void) { }
195#define gsi_top (NR_IRQS_LEGACY) 202#define gsi_top (NR_IRQS_LEGACY)
196static inline int mp_find_ioapic(u32 gsi) { return 0; } 203static inline int mp_find_ioapic(u32 gsi) { return 0; }
@@ -212,6 +219,10 @@ static inline int restore_ioapic_entries(void)
212 219
213static inline void mp_save_irq(struct mpc_intsrc *m) { }; 220static inline void mp_save_irq(struct mpc_intsrc *m) { };
214static inline void disable_ioapic_support(void) { } 221static inline void disable_ioapic_support(void) { }
222#define native_io_apic_init_mappings NULL
223#define native_io_apic_read NULL
224#define native_io_apic_write NULL
225#define native_io_apic_modify NULL
215#endif 226#endif
216 227
217#endif /* _ASM_X86_IO_APIC_H */ 228#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
index 77843225b7ea..d82250b1debb 100644
--- a/arch/x86/include/asm/irq_regs.h
+++ b/arch/x86/include/asm/irq_regs.h
@@ -15,7 +15,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs);
15 15
16static inline struct pt_regs *get_irq_regs(void) 16static inline struct pt_regs *get_irq_regs(void)
17{ 17{
18 return percpu_read(irq_regs); 18 return this_cpu_read(irq_regs);
19} 19}
20 20
21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) 21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
@@ -23,7 +23,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
23 struct pt_regs *old_regs; 23 struct pt_regs *old_regs;
24 24
25 old_regs = get_irq_regs(); 25 old_regs = get_irq_regs();
26 percpu_write(irq_regs, new_regs); 26 this_cpu_write(irq_regs, new_regs);
27 27
28 return old_regs; 28 return old_regs;
29} 29}
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 47d99934580f..5fb9bbbd2f14 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -1,45 +1,101 @@
1#ifndef _ASM_X86_IRQ_REMAPPING_H 1/*
2#define _ASM_X86_IRQ_REMAPPING_H 2 * Copyright (C) 2012 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * This header file contains the interface of the interrupt remapping code to
19 * the x86 interrupt management code.
20 */
3 21
4#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8) 22#ifndef __X86_IRQ_REMAPPING_H
23#define __X86_IRQ_REMAPPING_H
24
25#include <asm/io_apic.h>
5 26
6#ifdef CONFIG_IRQ_REMAP 27#ifdef CONFIG_IRQ_REMAP
7static void irq_remap_modify_chip_defaults(struct irq_chip *chip); 28
8static inline void prepare_irte(struct irte *irte, int vector, 29extern int irq_remapping_enabled;
9 unsigned int dest) 30
31extern void setup_irq_remapping_ops(void);
32extern int irq_remapping_supported(void);
33extern int irq_remapping_prepare(void);
34extern int irq_remapping_enable(void);
35extern void irq_remapping_disable(void);
36extern int irq_remapping_reenable(int);
37extern int irq_remap_enable_fault_handling(void);
38extern int setup_ioapic_remapped_entry(int irq,
39 struct IO_APIC_route_entry *entry,
40 unsigned int destination,
41 int vector,
42 struct io_apic_irq_attr *attr);
43extern int set_remapped_irq_affinity(struct irq_data *data,
44 const struct cpumask *mask,
45 bool force);
46extern void free_remapped_irq(int irq);
47extern void compose_remapped_msi_msg(struct pci_dev *pdev,
48 unsigned int irq, unsigned int dest,
49 struct msi_msg *msg, u8 hpet_id);
50extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
51extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
52 int index, int sub_handle);
53extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
54
55#else /* CONFIG_IRQ_REMAP */
56
57#define irq_remapping_enabled 0
58
59static inline void setup_irq_remapping_ops(void) { }
60static inline int irq_remapping_supported(void) { return 0; }
61static inline int irq_remapping_prepare(void) { return -ENODEV; }
62static inline int irq_remapping_enable(void) { return -ENODEV; }
63static inline void irq_remapping_disable(void) { }
64static inline int irq_remapping_reenable(int eim) { return -ENODEV; }
65static inline int irq_remap_enable_fault_handling(void) { return -ENODEV; }
66static inline int setup_ioapic_remapped_entry(int irq,
67 struct IO_APIC_route_entry *entry,
68 unsigned int destination,
69 int vector,
70 struct io_apic_irq_attr *attr)
71{
72 return -ENODEV;
73}
74static inline int set_remapped_irq_affinity(struct irq_data *data,
75 const struct cpumask *mask,
76 bool force)
10{ 77{
11 memset(irte, 0, sizeof(*irte)); 78 return 0;
12
13 irte->present = 1;
14 irte->dst_mode = apic->irq_dest_mode;
15 /*
16 * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
17 * actual level or edge trigger will be setup in the IO-APIC
18 * RTE. This will help simplify level triggered irq migration.
19 * For more details, see the comments (in io_apic.c) explainig IO-APIC
20 * irq migration in the presence of interrupt-remapping.
21 */
22 irte->trigger_mode = 0;
23 irte->dlvry_mode = apic->irq_delivery_mode;
24 irte->vector = vector;
25 irte->dest_id = IRTE_DEST(dest);
26 irte->redir_hint = 1;
27} 79}
28static inline bool irq_remapped(struct irq_cfg *cfg) 80static inline void free_remapped_irq(int irq) { }
81static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
82 unsigned int irq, unsigned int dest,
83 struct msi_msg *msg, u8 hpet_id)
29{ 84{
30 return cfg->irq_2_iommu.iommu != NULL;
31} 85}
32#else 86static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
33static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
34{ 87{
88 return -ENODEV;
35} 89}
36static inline bool irq_remapped(struct irq_cfg *cfg) 90static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
91 int index, int sub_handle)
37{ 92{
38 return false; 93 return -ENODEV;
39} 94}
40static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) 95static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
41{ 96{
97 return -ENODEV;
42} 98}
43#endif 99#endif /* CONFIG_IRQ_REMAP */
44 100
45#endif /* _ASM_X86_IRQ_REMAPPING_H */ 101#endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/kbdleds.h b/arch/x86/include/asm/kbdleds.h
new file mode 100644
index 000000000000..f27ac5ff597d
--- /dev/null
+++ b/arch/x86/include/asm/kbdleds.h
@@ -0,0 +1,17 @@
1#ifndef _ASM_X86_KBDLEDS_H
2#define _ASM_X86_KBDLEDS_H
3
4/*
5 * Some laptops take the 789uiojklm,. keys as number pad when NumLock is on.
6 * This seems a good reason to start with NumLock off. That's why on X86 we
7 * ask the bios for the correct state.
8 */
9
10#include <asm/setup.h>
11
12static inline int kbd_defleds(void)
13{
14 return boot_params.kbd_status & 0x20 ? (1 << VC_NUMLOCK) : 0;
15}
16
17#endif /* _ASM_X86_KBDLEDS_H */
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index d73f1571bde7..2c37aadcbc35 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -24,7 +24,6 @@ enum die_val {
24extern void printk_address(unsigned long address, int reliable); 24extern void printk_address(unsigned long address, int reliable);
25extern void die(const char *, struct pt_regs *,long); 25extern void die(const char *, struct pt_regs *,long);
26extern int __must_check __die(const char *, struct pt_regs *, long); 26extern int __must_check __die(const char *, struct pt_regs *, long);
27extern void show_registers(struct pt_regs *regs);
28extern void show_trace(struct task_struct *t, struct pt_regs *regs, 27extern void show_trace(struct task_struct *t, struct pt_regs *regs,
29 unsigned long *sp, unsigned long bp); 28 unsigned long *sp, unsigned long bp);
30extern void __show_regs(struct pt_regs *regs, int all); 29extern void __show_regs(struct pt_regs *regs, int all);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e216ba066e79..e5b97be12d2a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -27,6 +27,7 @@
27#include <asm/desc.h> 27#include <asm/desc.h>
28#include <asm/mtrr.h> 28#include <asm/mtrr.h>
29#include <asm/msr-index.h> 29#include <asm/msr-index.h>
30#include <asm/asm.h>
30 31
31#define KVM_MAX_VCPUS 254 32#define KVM_MAX_VCPUS 254
32#define KVM_SOFT_MAX_VCPUS 160 33#define KVM_SOFT_MAX_VCPUS 160
@@ -921,9 +922,7 @@ extern bool kvm_rebooting;
921 __ASM_SIZE(push) " $666b \n\t" \ 922 __ASM_SIZE(push) " $666b \n\t" \
922 "call kvm_spurious_fault \n\t" \ 923 "call kvm_spurious_fault \n\t" \
923 ".popsection \n\t" \ 924 ".popsection \n\t" \
924 ".pushsection __ex_table, \"a\" \n\t" \ 925 _ASM_EXTABLE(666b, 667b)
925 _ASM_PTR " 666b, 667b \n\t" \
926 ".popsection"
927 926
928#define __kvm_handle_fault_on_reboot(insn) \ 927#define __kvm_handle_fault_on_reboot(insn) \
929 ____kvm_handle_fault_on_reboot(insn, "") 928 ____kvm_handle_fault_on_reboot(insn, "")
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 69021528b43c..cdbf36776106 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -25,8 +25,8 @@ void destroy_context(struct mm_struct *mm);
25static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 25static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
26{ 26{
27#ifdef CONFIG_SMP 27#ifdef CONFIG_SMP
28 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) 28 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
29 percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); 29 this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
30#endif 30#endif
31} 31}
32 32
@@ -37,8 +37,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
37 37
38 if (likely(prev != next)) { 38 if (likely(prev != next)) {
39#ifdef CONFIG_SMP 39#ifdef CONFIG_SMP
40 percpu_write(cpu_tlbstate.state, TLBSTATE_OK); 40 this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
41 percpu_write(cpu_tlbstate.active_mm, next); 41 this_cpu_write(cpu_tlbstate.active_mm, next);
42#endif 42#endif
43 cpumask_set_cpu(cpu, mm_cpumask(next)); 43 cpumask_set_cpu(cpu, mm_cpumask(next));
44 44
@@ -56,8 +56,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
56 } 56 }
57#ifdef CONFIG_SMP 57#ifdef CONFIG_SMP
58 else { 58 else {
59 percpu_write(cpu_tlbstate.state, TLBSTATE_OK); 59 this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
60 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); 60 BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
61 61
62 if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) { 62 if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
63 /* We were in lazy tlb mode and leave_mm disabled 63 /* We were in lazy tlb mode and leave_mm disabled
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 55728e121473..eb05fb3b02fb 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -61,10 +61,4 @@ static inline int pfn_valid(int pfn)
61 61
62#endif /* CONFIG_DISCONTIGMEM */ 62#endif /* CONFIG_DISCONTIGMEM */
63 63
64#ifdef CONFIG_NEED_MULTIPLE_NODES
65/* always use node 0 for bootmem on this numa platform */
66#define bootmem_arch_preferred_node(__bdata, size, align, goal, limit) \
67 (NODE_DATA(0)->bdata)
68#endif /* CONFIG_NEED_MULTIPLE_NODES */
69
70#endif /* _ASM_X86_MMZONE_32_H */ 64#endif /* _ASM_X86_MMZONE_32_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ccb805966f68..957ec87385af 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -134,6 +134,8 @@
134#define MSR_AMD64_IBSFETCHCTL 0xc0011030 134#define MSR_AMD64_IBSFETCHCTL 0xc0011030
135#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 135#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
136#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 136#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
137#define MSR_AMD64_IBSFETCH_REG_COUNT 3
138#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
137#define MSR_AMD64_IBSOPCTL 0xc0011033 139#define MSR_AMD64_IBSOPCTL 0xc0011033
138#define MSR_AMD64_IBSOPRIP 0xc0011034 140#define MSR_AMD64_IBSOPRIP 0xc0011034
139#define MSR_AMD64_IBSOPDATA 0xc0011035 141#define MSR_AMD64_IBSOPDATA 0xc0011035
@@ -141,8 +143,11 @@
141#define MSR_AMD64_IBSOPDATA3 0xc0011037 143#define MSR_AMD64_IBSOPDATA3 0xc0011037
142#define MSR_AMD64_IBSDCLINAD 0xc0011038 144#define MSR_AMD64_IBSDCLINAD 0xc0011038
143#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 145#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
146#define MSR_AMD64_IBSOP_REG_COUNT 7
147#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
144#define MSR_AMD64_IBSCTL 0xc001103a 148#define MSR_AMD64_IBSCTL 0xc001103a
145#define MSR_AMD64_IBSBRTARGET 0xc001103b 149#define MSR_AMD64_IBSBRTARGET 0xc001103b
150#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
146 151
147/* Fam 15h MSRs */ 152/* Fam 15h MSRs */
148#define MSR_F15H_PERF_CTL 0xc0010200 153#define MSR_F15H_PERF_CTL 0xc0010200
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 95203d40ffdd..084ef95274cd 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -169,14 +169,7 @@ static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
169 return native_write_msr_safe(msr, low, high); 169 return native_write_msr_safe(msr, low, high);
170} 170}
171 171
172/* 172/* rdmsr with exception handling */
173 * rdmsr with exception handling.
174 *
175 * Please note that the exception handling works only after we've
176 * switched to the "smart" #GP handler in trap_init() which knows about
177 * exception tables - using this macro earlier than that causes machine
178 * hangs on boxes which do not implement the @msr in the first argument.
179 */
180#define rdmsr_safe(msr, p1, p2) \ 173#define rdmsr_safe(msr, p1, p2) \
181({ \ 174({ \
182 int __err; \ 175 int __err; \
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index fd3f9f18cf3f..0e3793b821ef 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -27,6 +27,8 @@ void arch_trigger_all_cpu_backtrace(void);
27enum { 27enum {
28 NMI_LOCAL=0, 28 NMI_LOCAL=0,
29 NMI_UNKNOWN, 29 NMI_UNKNOWN,
30 NMI_SERR,
31 NMI_IO_CHECK,
30 NMI_MAX 32 NMI_MAX
31}; 33};
32 34
@@ -35,8 +37,24 @@ enum {
35 37
36typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *); 38typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *);
37 39
38int register_nmi_handler(unsigned int, nmi_handler_t, unsigned long, 40struct nmiaction {
39 const char *); 41 struct list_head list;
42 nmi_handler_t handler;
43 unsigned long flags;
44 const char *name;
45};
46
47#define register_nmi_handler(t, fn, fg, n) \
48({ \
49 static struct nmiaction fn##_na = { \
50 .handler = (fn), \
51 .name = (n), \
52 .flags = (fg), \
53 }; \
54 __register_nmi_handler((t), &fn##_na); \
55})
56
57int __register_nmi_handler(unsigned int, struct nmiaction *);
40 58
41void unregister_nmi_handler(unsigned int, const char *); 59void unregister_nmi_handler(unsigned int, const char *);
42 60
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index 405b4032a60b..aff2b3356101 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -87,7 +87,11 @@
87#define P6_NOP8 0x0f,0x1f,0x84,0x00,0,0,0,0 87#define P6_NOP8 0x0f,0x1f,0x84,0x00,0,0,0,0
88#define P6_NOP5_ATOMIC P6_NOP5 88#define P6_NOP5_ATOMIC P6_NOP5
89 89
90#ifdef __ASSEMBLY__
91#define _ASM_MK_NOP(x) .byte x
92#else
90#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n" 93#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
94#endif
91 95
92#if defined(CONFIG_MK7) 96#if defined(CONFIG_MK7)
93#define ASM_NOP1 _ASM_MK_NOP(K7_NOP1) 97#define ASM_NOP1 _ASM_MK_NOP(K7_NOP1)
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index ade619ff9e2a..ef17af013475 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -15,8 +15,8 @@
15 */ 15 */
16#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) 16#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
17 17
18#define THREAD_ORDER 1 18#define THREAD_SIZE_ORDER 1
19#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) 19#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
20 20
21#define STACKFAULT_STACK 0 21#define STACKFAULT_STACK 0
22#define DOUBLEFAULT_STACK 1 22#define DOUBLEFAULT_STACK 1
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 7639dbf5d223..320f7bb95f76 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -1,8 +1,8 @@
1#ifndef _ASM_X86_PAGE_64_DEFS_H 1#ifndef _ASM_X86_PAGE_64_DEFS_H
2#define _ASM_X86_PAGE_64_DEFS_H 2#define _ASM_X86_PAGE_64_DEFS_H
3 3
4#define THREAD_ORDER 1 4#define THREAD_SIZE_ORDER 1
5#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) 5#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
6#define CURRENT_MASK (~(THREAD_SIZE - 1)) 6#define CURRENT_MASK (~(THREAD_SIZE - 1))
7 7
8#define EXCEPTION_STACK_ORDER 0 8#define EXCEPTION_STACK_ORDER 0
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index aa0f91308367..6cbbabf52707 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -1023,10 +1023,8 @@ extern void default_banner(void);
1023 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ 1023 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \
1024 ) 1024 )
1025 1025
1026#define GET_CR2_INTO_RCX \ 1026#define GET_CR2_INTO_RAX \
1027 call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \ 1027 call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
1028 movq %rax, %rcx; \
1029 xorq %rax, %rax;
1030 1028
1031#define PARAVIRT_ADJUST_EXCEPTION_FRAME \ 1029#define PARAVIRT_ADJUST_EXCEPTION_FRAME \
1032 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \ 1030 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 7a11910a63c4..d9b8e3f7f42a 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -46,7 +46,7 @@
46 46
47#ifdef CONFIG_SMP 47#ifdef CONFIG_SMP
48#define __percpu_prefix "%%"__stringify(__percpu_seg)":" 48#define __percpu_prefix "%%"__stringify(__percpu_seg)":"
49#define __my_cpu_offset percpu_read(this_cpu_off) 49#define __my_cpu_offset this_cpu_read(this_cpu_off)
50 50
51/* 51/*
52 * Compared to the generic __my_cpu_offset version, the following 52 * Compared to the generic __my_cpu_offset version, the following
@@ -351,23 +351,15 @@ do { \
351}) 351})
352 352
353/* 353/*
354 * percpu_read() makes gcc load the percpu variable every time it is 354 * this_cpu_read() makes gcc load the percpu variable every time it is
355 * accessed while percpu_read_stable() allows the value to be cached. 355 * accessed while this_cpu_read_stable() allows the value to be cached.
356 * percpu_read_stable() is more efficient and can be used if its value 356 * this_cpu_read_stable() is more efficient and can be used if its value
357 * is guaranteed to be valid across cpus. The current users include 357 * is guaranteed to be valid across cpus. The current users include
358 * get_current() and get_thread_info() both of which are actually 358 * get_current() and get_thread_info() both of which are actually
359 * per-thread variables implemented as per-cpu variables and thus 359 * per-thread variables implemented as per-cpu variables and thus
360 * stable for the duration of the respective task. 360 * stable for the duration of the respective task.
361 */ 361 */
362#define percpu_read(var) percpu_from_op("mov", var, "m" (var)) 362#define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
363#define percpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
364#define percpu_write(var, val) percpu_to_op("mov", var, val)
365#define percpu_add(var, val) percpu_add_op(var, val)
366#define percpu_sub(var, val) percpu_add_op(var, -(val))
367#define percpu_and(var, val) percpu_to_op("and", var, val)
368#define percpu_or(var, val) percpu_to_op("or", var, val)
369#define percpu_xor(var, val) percpu_to_op("xor", var, val)
370#define percpu_inc(var) percpu_unary_op("inc", var)
371 363
372#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 364#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
373#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 365#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
@@ -512,7 +504,11 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
512{ 504{
513 unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; 505 unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
514 506
515 return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0; 507#ifdef CONFIG_X86_64
508 return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_8(*a)) != 0;
509#else
510 return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_4(*a)) != 0;
511#endif
516} 512}
517 513
518static inline int x86_this_cpu_variable_test_bit(int nr, 514static inline int x86_this_cpu_variable_test_bit(int nr,
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 2291895b1836..588f52ea810e 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -158,6 +158,7 @@ struct x86_pmu_capability {
158#define IBS_CAPS_OPCNT (1U<<4) 158#define IBS_CAPS_OPCNT (1U<<4)
159#define IBS_CAPS_BRNTRGT (1U<<5) 159#define IBS_CAPS_BRNTRGT (1U<<5)
160#define IBS_CAPS_OPCNTEXT (1U<<6) 160#define IBS_CAPS_OPCNTEXT (1U<<6)
161#define IBS_CAPS_RIPINVALIDCHK (1U<<7)
161 162
162#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ 163#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
163 | IBS_CAPS_FETCHSAM \ 164 | IBS_CAPS_FETCHSAM \
@@ -170,21 +171,28 @@ struct x86_pmu_capability {
170#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) 171#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8)
171#define IBSCTL_LVT_OFFSET_MASK 0x0F 172#define IBSCTL_LVT_OFFSET_MASK 0x0F
172 173
173/* IbsFetchCtl bits/masks */ 174/* ibs fetch bits/masks */
174#define IBS_FETCH_RAND_EN (1ULL<<57) 175#define IBS_FETCH_RAND_EN (1ULL<<57)
175#define IBS_FETCH_VAL (1ULL<<49) 176#define IBS_FETCH_VAL (1ULL<<49)
176#define IBS_FETCH_ENABLE (1ULL<<48) 177#define IBS_FETCH_ENABLE (1ULL<<48)
177#define IBS_FETCH_CNT 0xFFFF0000ULL 178#define IBS_FETCH_CNT 0xFFFF0000ULL
178#define IBS_FETCH_MAX_CNT 0x0000FFFFULL 179#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
179 180
180/* IbsOpCtl bits */ 181/* ibs op bits/masks */
182/* lower 4 bits of the current count are ignored: */
183#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32)
181#define IBS_OP_CNT_CTL (1ULL<<19) 184#define IBS_OP_CNT_CTL (1ULL<<19)
182#define IBS_OP_VAL (1ULL<<18) 185#define IBS_OP_VAL (1ULL<<18)
183#define IBS_OP_ENABLE (1ULL<<17) 186#define IBS_OP_ENABLE (1ULL<<17)
184#define IBS_OP_MAX_CNT 0x0000FFFFULL 187#define IBS_OP_MAX_CNT 0x0000FFFFULL
185#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ 188#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
189#define IBS_RIP_INVALID (1ULL<<38)
186 190
191#ifdef CONFIG_X86_LOCAL_APIC
187extern u32 get_ibs_caps(void); 192extern u32 get_ibs_caps(void);
193#else
194static inline u32 get_ibs_caps(void) { return 0; }
195#endif
188 196
189#ifdef CONFIG_PERF_EVENTS 197#ifdef CONFIG_PERF_EVENTS
190extern void perf_events_lapic_init(void); 198extern void perf_events_lapic_init(void);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 4fa7dcceb6c0..7745b257f035 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -579,9 +579,6 @@ extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
579/* Free all resources held by a thread. */ 579/* Free all resources held by a thread. */
580extern void release_thread(struct task_struct *); 580extern void release_thread(struct task_struct *);
581 581
582/* Prepare to copy thread state - unlazy all lazy state */
583extern void prepare_to_copy(struct task_struct *tsk);
584
585unsigned long get_wchan(struct task_struct *p); 582unsigned long get_wchan(struct task_struct *p);
586 583
587/* 584/*
@@ -974,8 +971,6 @@ extern bool cpu_has_amd_erratum(const int *);
974#define cpu_has_amd_erratum(x) (false) 971#define cpu_has_amd_erratum(x) (false)
975#endif /* CONFIG_CPU_SUP_AMD */ 972#endif /* CONFIG_CPU_SUP_AMD */
976 973
977void cpu_idle_wait(void);
978
979extern unsigned long arch_align_stack(unsigned long sp); 974extern unsigned long arch_align_stack(unsigned long sp);
980extern void free_init_pages(char *what, unsigned long begin, unsigned long end); 975extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
981 976
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 165466233ab0..c48a95035a77 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -205,13 +205,15 @@
205 205
206#define IDT_ENTRIES 256 206#define IDT_ENTRIES 256
207#define NUM_EXCEPTION_VECTORS 32 207#define NUM_EXCEPTION_VECTORS 32
208/* Bitmask of exception vectors which push an error code on the stack */
209#define EXCEPTION_ERRCODE_MASK 0x00027d00
208#define GDT_SIZE (GDT_ENTRIES * 8) 210#define GDT_SIZE (GDT_ENTRIES * 8)
209#define GDT_ENTRY_TLS_ENTRIES 3 211#define GDT_ENTRY_TLS_ENTRIES 3
210#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) 212#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
211 213
212#ifdef __KERNEL__ 214#ifdef __KERNEL__
213#ifndef __ASSEMBLY__ 215#ifndef __ASSEMBLY__
214extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10]; 216extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
215 217
216/* 218/*
217 * Load a segment. Fall back on loading the zero 219 * Load a segment. Fall back on loading the zero
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 0434c400287c..f48394513c37 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -62,6 +62,8 @@ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid);
62/* Static state in head.S used to set up a CPU */ 62/* Static state in head.S used to set up a CPU */
63extern unsigned long stack_start; /* Initial stack pointer address */ 63extern unsigned long stack_start; /* Initial stack pointer address */
64 64
65struct task_struct;
66
65struct smp_ops { 67struct smp_ops {
66 void (*smp_prepare_boot_cpu)(void); 68 void (*smp_prepare_boot_cpu)(void);
67 void (*smp_prepare_cpus)(unsigned max_cpus); 69 void (*smp_prepare_cpus)(unsigned max_cpus);
@@ -70,7 +72,7 @@ struct smp_ops {
70 void (*stop_other_cpus)(int wait); 72 void (*stop_other_cpus)(int wait);
71 void (*smp_send_reschedule)(int cpu); 73 void (*smp_send_reschedule)(int cpu);
72 74
73 int (*cpu_up)(unsigned cpu); 75 int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
74 int (*cpu_disable)(void); 76 int (*cpu_disable)(void);
75 void (*cpu_die)(unsigned int cpu); 77 void (*cpu_die)(unsigned int cpu);
76 void (*play_dead)(void); 78 void (*play_dead)(void);
@@ -113,9 +115,9 @@ static inline void smp_cpus_done(unsigned int max_cpus)
113 smp_ops.smp_cpus_done(max_cpus); 115 smp_ops.smp_cpus_done(max_cpus);
114} 116}
115 117
116static inline int __cpu_up(unsigned int cpu) 118static inline int __cpu_up(unsigned int cpu, struct task_struct *tidle)
117{ 119{
118 return smp_ops.cpu_up(cpu); 120 return smp_ops.cpu_up(cpu, tidle);
119} 121}
120 122
121static inline int __cpu_disable(void) 123static inline int __cpu_disable(void)
@@ -152,7 +154,7 @@ void cpu_disable_common(void);
152void native_smp_prepare_boot_cpu(void); 154void native_smp_prepare_boot_cpu(void);
153void native_smp_prepare_cpus(unsigned int max_cpus); 155void native_smp_prepare_cpus(unsigned int max_cpus);
154void native_smp_cpus_done(unsigned int max_cpus); 156void native_smp_cpus_done(unsigned int max_cpus);
155int native_cpu_up(unsigned int cpunum); 157int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
156int native_cpu_disable(void); 158int native_cpu_disable(void);
157void native_cpu_die(unsigned int cpu); 159void native_cpu_die(unsigned int cpu);
158void native_play_dead(void); 160void native_play_dead(void);
@@ -162,6 +164,7 @@ int wbinvd_on_all_cpus(void);
162 164
163void native_send_call_func_ipi(const struct cpumask *mask); 165void native_send_call_func_ipi(const struct cpumask *mask);
164void native_send_call_func_single_ipi(int cpu); 166void native_send_call_func_single_ipi(int cpu);
167void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
165 168
166void smp_store_cpu_info(int id); 169void smp_store_cpu_info(int id);
167#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) 170#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
@@ -188,11 +191,11 @@ extern unsigned disabled_cpus __cpuinitdata;
188 * from the initial startup. We map APIC_BASE very early in page_setup(), 191 * from the initial startup. We map APIC_BASE very early in page_setup(),
189 * so this is correct in the x86 case. 192 * so this is correct in the x86 case.
190 */ 193 */
191#define raw_smp_processor_id() (percpu_read(cpu_number)) 194#define raw_smp_processor_id() (this_cpu_read(cpu_number))
192extern int safe_smp_processor_id(void); 195extern int safe_smp_processor_id(void);
193 196
194#elif defined(CONFIG_X86_64_SMP) 197#elif defined(CONFIG_X86_64_SMP)
195#define raw_smp_processor_id() (percpu_read(cpu_number)) 198#define raw_smp_processor_id() (this_cpu_read(cpu_number))
196 199
197#define stack_smp_processor_id() \ 200#define stack_smp_processor_id() \
198({ \ 201({ \
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 76bfa2cf301d..b315a33867f2 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -20,10 +20,8 @@
20 20
21#ifdef CONFIG_X86_32 21#ifdef CONFIG_X86_32
22# define LOCK_PTR_REG "a" 22# define LOCK_PTR_REG "a"
23# define REG_PTR_MODE "k"
24#else 23#else
25# define LOCK_PTR_REG "D" 24# define LOCK_PTR_REG "D"
26# define REG_PTR_MODE "q"
27#endif 25#endif
28 26
29#if defined(CONFIG_X86_32) && \ 27#if defined(CONFIG_X86_32) && \
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index b5d9533d2c38..6a998598f172 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -75,9 +75,9 @@ static __always_inline void boot_init_stack_canary(void)
75 75
76 current->stack_canary = canary; 76 current->stack_canary = canary;
77#ifdef CONFIG_X86_64 77#ifdef CONFIG_X86_64
78 percpu_write(irq_stack_union.stack_canary, canary); 78 this_cpu_write(irq_stack_union.stack_canary, canary);
79#else 79#else
80 percpu_write(stack_canary.canary, canary); 80 this_cpu_write(stack_canary.canary, canary);
81#endif 81#endif
82} 82}
83 83
diff --git a/arch/x86/include/asm/stat.h b/arch/x86/include/asm/stat.h
index e0b1d9bbcbc6..7b3ddc348585 100644
--- a/arch/x86/include/asm/stat.h
+++ b/arch/x86/include/asm/stat.h
@@ -25,6 +25,12 @@ struct stat {
25 unsigned long __unused5; 25 unsigned long __unused5;
26}; 26};
27 27
28/* We don't need to memset the whole thing just to initialize the padding */
29#define INIT_STRUCT_STAT_PADDING(st) do { \
30 st.__unused4 = 0; \
31 st.__unused5 = 0; \
32} while (0)
33
28#define STAT64_HAS_BROKEN_ST_INO 1 34#define STAT64_HAS_BROKEN_ST_INO 1
29 35
30/* This matches struct stat64 in glibc2.1, hence the absolutely 36/* This matches struct stat64 in glibc2.1, hence the absolutely
@@ -63,6 +69,12 @@ struct stat64 {
63 unsigned long long st_ino; 69 unsigned long long st_ino;
64}; 70};
65 71
72/* We don't need to memset the whole thing just to initialize the padding */
73#define INIT_STRUCT_STAT64_PADDING(st) do { \
74 memset(&st.__pad0, 0, sizeof(st.__pad0)); \
75 memset(&st.__pad3, 0, sizeof(st.__pad3)); \
76} while (0)
77
66#else /* __i386__ */ 78#else /* __i386__ */
67 79
68struct stat { 80struct stat {
@@ -87,6 +99,15 @@ struct stat {
87 unsigned long st_ctime_nsec; 99 unsigned long st_ctime_nsec;
88 long __unused[3]; 100 long __unused[3];
89}; 101};
102
103/* We don't need to memset the whole thing just to initialize the padding */
104#define INIT_STRUCT_STAT_PADDING(st) do { \
105 st.__pad0 = 0; \
106 st.__unused[0] = 0; \
107 st.__unused[1] = 0; \
108 st.__unused[2] = 0; \
109} while (0)
110
90#endif 111#endif
91 112
92/* for 32bit emulation and 32 bit kernels */ 113/* for 32bit emulation and 32 bit kernels */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 386b78686c4d..1ace47b62592 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -13,9 +13,11 @@
13#ifndef _ASM_X86_SYSCALL_H 13#ifndef _ASM_X86_SYSCALL_H
14#define _ASM_X86_SYSCALL_H 14#define _ASM_X86_SYSCALL_H
15 15
16#include <linux/audit.h>
16#include <linux/sched.h> 17#include <linux/sched.h>
17#include <linux/err.h> 18#include <linux/err.h>
18#include <asm/asm-offsets.h> /* For NR_syscalls */ 19#include <asm/asm-offsets.h> /* For NR_syscalls */
20#include <asm/thread_info.h> /* for TS_COMPAT */
19#include <asm/unistd.h> 21#include <asm/unistd.h>
20 22
21extern const unsigned long sys_call_table[]; 23extern const unsigned long sys_call_table[];
@@ -88,6 +90,12 @@ static inline void syscall_set_arguments(struct task_struct *task,
88 memcpy(&regs->bx + i, args, n * sizeof(args[0])); 90 memcpy(&regs->bx + i, args, n * sizeof(args[0]));
89} 91}
90 92
93static inline int syscall_get_arch(struct task_struct *task,
94 struct pt_regs *regs)
95{
96 return AUDIT_ARCH_I386;
97}
98
91#else /* CONFIG_X86_64 */ 99#else /* CONFIG_X86_64 */
92 100
93static inline void syscall_get_arguments(struct task_struct *task, 101static inline void syscall_get_arguments(struct task_struct *task,
@@ -212,6 +220,25 @@ static inline void syscall_set_arguments(struct task_struct *task,
212 } 220 }
213} 221}
214 222
223static inline int syscall_get_arch(struct task_struct *task,
224 struct pt_regs *regs)
225{
226#ifdef CONFIG_IA32_EMULATION
227 /*
228 * TS_COMPAT is set for 32-bit syscall entry and then
229 * remains set until we return to user mode.
230 *
231 * TIF_IA32 tasks should always have TS_COMPAT set at
232 * system call time.
233 *
234 * x32 tasks should be considered AUDIT_ARCH_X86_64.
235 */
236 if (task_thread_info(task)->status & TS_COMPAT)
237 return AUDIT_ARCH_I386;
238#endif
239 /* Both x32 and x86_64 are considered "64-bit". */
240 return AUDIT_ARCH_X86_64;
241}
215#endif /* CONFIG_X86_32 */ 242#endif /* CONFIG_X86_32 */
216 243
217#endif /* _ASM_X86_SYSCALL_H */ 244#endif /* _ASM_X86_SYSCALL_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ad6df8ccd715..3c9aebc00d39 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -155,24 +155,6 @@ struct thread_info {
155 155
156#define PREEMPT_ACTIVE 0x10000000 156#define PREEMPT_ACTIVE 0x10000000
157 157
158/* thread information allocation */
159#ifdef CONFIG_DEBUG_STACK_USAGE
160#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
161#else
162#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK)
163#endif
164
165#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
166
167#define alloc_thread_info_node(tsk, node) \
168({ \
169 struct page *page = alloc_pages_node(node, THREAD_FLAGS, \
170 THREAD_ORDER); \
171 struct thread_info *ret = page ? page_address(page) : NULL; \
172 \
173 ret; \
174})
175
176#ifdef CONFIG_X86_32 158#ifdef CONFIG_X86_32
177 159
178#define STACK_WARN (THREAD_SIZE/8) 160#define STACK_WARN (THREAD_SIZE/8)
@@ -222,7 +204,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
222static inline struct thread_info *current_thread_info(void) 204static inline struct thread_info *current_thread_info(void)
223{ 205{
224 struct thread_info *ti; 206 struct thread_info *ti;
225 ti = (void *)(percpu_read_stable(kernel_stack) + 207 ti = (void *)(this_cpu_read_stable(kernel_stack) +
226 KERNEL_STACK_OFFSET - THREAD_SIZE); 208 KERNEL_STACK_OFFSET - THREAD_SIZE);
227 return ti; 209 return ti;
228} 210}
@@ -282,8 +264,7 @@ static inline bool is_ia32_task(void)
282 264
283#ifndef __ASSEMBLY__ 265#ifndef __ASSEMBLY__
284extern void arch_task_cache_init(void); 266extern void arch_task_cache_init(void);
285extern void free_thread_info(struct thread_info *ti);
286extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); 267extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
287#define arch_task_cache_init arch_task_cache_init 268extern void arch_release_task_struct(struct task_struct *tsk);
288#endif 269#endif
289#endif /* _ASM_X86_THREAD_INFO_H */ 270#endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index c0e108e08079..36a1a2ab87d2 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -62,11 +62,7 @@ static inline void __flush_tlb_one(unsigned long addr)
62 __flush_tlb(); 62 __flush_tlb();
63} 63}
64 64
65#ifdef CONFIG_X86_32 65#define TLB_FLUSH_ALL -1UL
66# define TLB_FLUSH_ALL 0xffffffff
67#else
68# define TLB_FLUSH_ALL -1ULL
69#endif
70 66
71/* 67/*
72 * TLB flushing: 68 * TLB flushing:
@@ -156,8 +152,8 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
156 152
157static inline void reset_lazy_tlbstate(void) 153static inline void reset_lazy_tlbstate(void)
158{ 154{
159 percpu_write(cpu_tlbstate.state, 0); 155 this_cpu_write(cpu_tlbstate.state, 0);
160 percpu_write(cpu_tlbstate.active_mm, &init_mm); 156 this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
161} 157}
162 158
163#endif /* SMP */ 159#endif /* SMP */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index b9676ae37ada..095b21507b6a 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -92,44 +92,6 @@ extern void setup_node_to_cpumask_map(void);
92 92
93#define pcibus_to_node(bus) __pcibus_to_node(bus) 93#define pcibus_to_node(bus) __pcibus_to_node(bus)
94 94
95#ifdef CONFIG_X86_32
96# define SD_CACHE_NICE_TRIES 1
97# define SD_IDLE_IDX 1
98#else
99# define SD_CACHE_NICE_TRIES 2
100# define SD_IDLE_IDX 2
101#endif
102
103/* sched_domains SD_NODE_INIT for NUMA machines */
104#define SD_NODE_INIT (struct sched_domain) { \
105 .min_interval = 8, \
106 .max_interval = 32, \
107 .busy_factor = 32, \
108 .imbalance_pct = 125, \
109 .cache_nice_tries = SD_CACHE_NICE_TRIES, \
110 .busy_idx = 3, \
111 .idle_idx = SD_IDLE_IDX, \
112 .newidle_idx = 0, \
113 .wake_idx = 0, \
114 .forkexec_idx = 0, \
115 \
116 .flags = 1*SD_LOAD_BALANCE \
117 | 1*SD_BALANCE_NEWIDLE \
118 | 1*SD_BALANCE_EXEC \
119 | 1*SD_BALANCE_FORK \
120 | 0*SD_BALANCE_WAKE \
121 | 1*SD_WAKE_AFFINE \
122 | 0*SD_PREFER_LOCAL \
123 | 0*SD_SHARE_CPUPOWER \
124 | 0*SD_POWERSAVINGS_BALANCE \
125 | 0*SD_SHARE_PKG_RESOURCES \
126 | 1*SD_SERIALIZE \
127 | 0*SD_PREFER_SIBLING \
128 , \
129 .last_balance = jiffies, \
130 .balance_interval = 1, \
131}
132
133extern int __node_distance(int, int); 95extern int __node_distance(int, int);
134#define node_distance(a, b) __node_distance(a, b) 96#define node_distance(a, b) __node_distance(a, b)
135 97
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index e0544597cfe7..851fe0dc13bc 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -79,11 +79,12 @@
79#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) 79#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0))
80 80
81/* 81/*
82 * The exception table consists of pairs of addresses: the first is the 82 * The exception table consists of pairs of addresses relative to the
83 * address of an instruction that is allowed to fault, and the second is 83 * exception table enty itself: the first is the address of an
84 * the address at which the program should continue. No registers are 84 * instruction that is allowed to fault, and the second is the address
85 * modified, so it is entirely up to the continuation code to figure out 85 * at which the program should continue. No registers are modified,
86 * what to do. 86 * so it is entirely up to the continuation code to figure out what to
87 * do.
87 * 88 *
88 * All the routines below use bits of fixup code that are out of line 89 * All the routines below use bits of fixup code that are out of line
89 * with the main instruction path. This means when everything is well, 90 * with the main instruction path. This means when everything is well,
@@ -92,10 +93,14 @@
92 */ 93 */
93 94
94struct exception_table_entry { 95struct exception_table_entry {
95 unsigned long insn, fixup; 96 int insn, fixup;
96}; 97};
98/* This is not the generic standard exception_table_entry format */
99#define ARCH_HAS_SORT_EXTABLE
100#define ARCH_HAS_SEARCH_EXTABLE
97 101
98extern int fixup_exception(struct pt_regs *regs); 102extern int fixup_exception(struct pt_regs *regs);
103extern int early_fixup_exception(unsigned long *ip);
99 104
100/* 105/*
101 * These are the main single-value transfer routines. They automatically 106 * These are the main single-value transfer routines. They automatically
@@ -202,8 +207,8 @@ extern int __get_user_bad(void);
202 asm volatile("1: movl %%eax,0(%1)\n" \ 207 asm volatile("1: movl %%eax,0(%1)\n" \
203 "2: movl %%edx,4(%1)\n" \ 208 "2: movl %%edx,4(%1)\n" \
204 "3:\n" \ 209 "3:\n" \
205 _ASM_EXTABLE(1b, 2b - 1b) \ 210 _ASM_EXTABLE_EX(1b, 2b) \
206 _ASM_EXTABLE(2b, 3b - 2b) \ 211 _ASM_EXTABLE_EX(2b, 3b) \
207 : : "A" (x), "r" (addr)) 212 : : "A" (x), "r" (addr))
208 213
209#define __put_user_x8(x, ptr, __ret_pu) \ 214#define __put_user_x8(x, ptr, __ret_pu) \
@@ -408,7 +413,7 @@ do { \
408#define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ 413#define __get_user_asm_ex(x, addr, itype, rtype, ltype) \
409 asm volatile("1: mov"itype" %1,%"rtype"0\n" \ 414 asm volatile("1: mov"itype" %1,%"rtype"0\n" \
410 "2:\n" \ 415 "2:\n" \
411 _ASM_EXTABLE(1b, 2b - 1b) \ 416 _ASM_EXTABLE_EX(1b, 2b) \
412 : ltype(x) : "m" (__m(addr))) 417 : ltype(x) : "m" (__m(addr)))
413 418
414#define __put_user_nocheck(x, ptr, size) \ 419#define __put_user_nocheck(x, ptr, size) \
@@ -450,7 +455,7 @@ struct __large_struct { unsigned long buf[100]; };
450#define __put_user_asm_ex(x, addr, itype, rtype, ltype) \ 455#define __put_user_asm_ex(x, addr, itype, rtype, ltype) \
451 asm volatile("1: mov"itype" %"rtype"0,%1\n" \ 456 asm volatile("1: mov"itype" %"rtype"0,%1\n" \
452 "2:\n" \ 457 "2:\n" \
453 _ASM_EXTABLE(1b, 2b - 1b) \ 458 _ASM_EXTABLE_EX(1b, 2b) \
454 : : ltype(x), "m" (__m(addr))) 459 : : ltype(x), "m" (__m(addr)))
455 460
456/* 461/*
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 764b66a4cf89..c090af10ac7d 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -188,11 +188,18 @@ struct x86_msi_ops {
188 void (*restore_msi_irqs)(struct pci_dev *dev, int irq); 188 void (*restore_msi_irqs)(struct pci_dev *dev, int irq);
189}; 189};
190 190
191struct x86_io_apic_ops {
192 void (*init) (void);
193 unsigned int (*read) (unsigned int apic, unsigned int reg);
194 void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
195 void (*modify)(unsigned int apic, unsigned int reg, unsigned int value);
196};
197
191extern struct x86_init_ops x86_init; 198extern struct x86_init_ops x86_init;
192extern struct x86_cpuinit_ops x86_cpuinit; 199extern struct x86_cpuinit_ops x86_cpuinit;
193extern struct x86_platform_ops x86_platform; 200extern struct x86_platform_ops x86_platform;
194extern struct x86_msi_ops x86_msi; 201extern struct x86_msi_ops x86_msi;
195 202extern struct x86_io_apic_ops x86_io_apic_ops;
196extern void x86_init_noop(void); 203extern void x86_init_noop(void);
197extern void x86_init_uint_noop(unsigned int unused); 204extern void x86_init_uint_noop(unsigned int unused);
198 205
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index c6ce2452f10c..8a1b6f9b594a 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -80,10 +80,7 @@ static inline int xsave_user(struct xsave_struct __user *buf)
80 "3: movl $-1,%[err]\n" 80 "3: movl $-1,%[err]\n"
81 " jmp 2b\n" 81 " jmp 2b\n"
82 ".previous\n" 82 ".previous\n"
83 ".section __ex_table,\"a\"\n" 83 _ASM_EXTABLE(1b,3b)
84 _ASM_ALIGN "\n"
85 _ASM_PTR "1b,3b\n"
86 ".previous"
87 : [err] "=r" (err) 84 : [err] "=r" (err)
88 : "D" (buf), "a" (-1), "d" (-1), "0" (0) 85 : "D" (buf), "a" (-1), "d" (-1), "0" (0)
89 : "memory"); 86 : "memory");
@@ -106,10 +103,7 @@ static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
106 "3: movl $-1,%[err]\n" 103 "3: movl $-1,%[err]\n"
107 " jmp 2b\n" 104 " jmp 2b\n"
108 ".previous\n" 105 ".previous\n"
109 ".section __ex_table,\"a\"\n" 106 _ASM_EXTABLE(1b,3b)
110 _ASM_ALIGN "\n"
111 _ASM_PTR "1b,3b\n"
112 ".previous"
113 : [err] "=r" (err) 107 : [err] "=r" (err)
114 : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) 108 : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
115 : "memory"); /* memory required? */ 109 : "memory"); /* memory required? */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 532d2e090e6f..56ebd1f98447 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the linux kernel. 2# Makefile for the linux kernel.
3# 3#
4 4
5extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds 5extra-y := head_$(BITS).o head$(BITS).o head.o vmlinux.lds
6 6
7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) 7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
8 8
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index edc24480469f..39a222e094af 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -35,6 +35,7 @@
35#include <linux/smp.h> 35#include <linux/smp.h>
36#include <linux/mm.h> 36#include <linux/mm.h>
37 37
38#include <asm/irq_remapping.h>
38#include <asm/perf_event.h> 39#include <asm/perf_event.h>
39#include <asm/x86_init.h> 40#include <asm/x86_init.h>
40#include <asm/pgalloc.h> 41#include <asm/pgalloc.h>
@@ -1325,11 +1326,13 @@ void __cpuinit setup_local_APIC(void)
1325 acked); 1326 acked);
1326 break; 1327 break;
1327 } 1328 }
1328 if (cpu_has_tsc) { 1329 if (queued) {
1329 rdtscll(ntsc); 1330 if (cpu_has_tsc) {
1330 max_loops = (cpu_khz << 10) - (ntsc - tsc); 1331 rdtscll(ntsc);
1331 } else 1332 max_loops = (cpu_khz << 10) - (ntsc - tsc);
1332 max_loops--; 1333 } else
1334 max_loops--;
1335 }
1333 } while (queued && max_loops > 0); 1336 } while (queued && max_loops > 0);
1334 WARN_ON(max_loops <= 0); 1337 WARN_ON(max_loops <= 0);
1335 1338
@@ -1441,8 +1444,8 @@ void __init bsp_end_local_APIC_setup(void)
1441 * Now that local APIC setup is completed for BP, configure the fault 1444 * Now that local APIC setup is completed for BP, configure the fault
1442 * handling for interrupt remapping. 1445 * handling for interrupt remapping.
1443 */ 1446 */
1444 if (intr_remapping_enabled) 1447 if (irq_remapping_enabled)
1445 enable_drhd_fault_handling(); 1448 irq_remap_enable_fault_handling();
1446 1449
1447} 1450}
1448 1451
@@ -1517,7 +1520,7 @@ void enable_x2apic(void)
1517int __init enable_IR(void) 1520int __init enable_IR(void)
1518{ 1521{
1519#ifdef CONFIG_IRQ_REMAP 1522#ifdef CONFIG_IRQ_REMAP
1520 if (!intr_remapping_supported()) { 1523 if (!irq_remapping_supported()) {
1521 pr_debug("intr-remapping not supported\n"); 1524 pr_debug("intr-remapping not supported\n");
1522 return -1; 1525 return -1;
1523 } 1526 }
@@ -1528,7 +1531,7 @@ int __init enable_IR(void)
1528 return -1; 1531 return -1;
1529 } 1532 }
1530 1533
1531 return enable_intr_remapping(); 1534 return irq_remapping_enable();
1532#endif 1535#endif
1533 return -1; 1536 return -1;
1534} 1537}
@@ -1537,10 +1540,13 @@ void __init enable_IR_x2apic(void)
1537{ 1540{
1538 unsigned long flags; 1541 unsigned long flags;
1539 int ret, x2apic_enabled = 0; 1542 int ret, x2apic_enabled = 0;
1540 int dmar_table_init_ret; 1543 int hardware_init_ret;
1544
1545 /* Make sure irq_remap_ops are initialized */
1546 setup_irq_remapping_ops();
1541 1547
1542 dmar_table_init_ret = dmar_table_init(); 1548 hardware_init_ret = irq_remapping_prepare();
1543 if (dmar_table_init_ret && !x2apic_supported()) 1549 if (hardware_init_ret && !x2apic_supported())
1544 return; 1550 return;
1545 1551
1546 ret = save_ioapic_entries(); 1552 ret = save_ioapic_entries();
@@ -1556,7 +1562,7 @@ void __init enable_IR_x2apic(void)
1556 if (x2apic_preenabled && nox2apic) 1562 if (x2apic_preenabled && nox2apic)
1557 disable_x2apic(); 1563 disable_x2apic();
1558 1564
1559 if (dmar_table_init_ret) 1565 if (hardware_init_ret)
1560 ret = -1; 1566 ret = -1;
1561 else 1567 else
1562 ret = enable_IR(); 1568 ret = enable_IR();
@@ -2176,8 +2182,8 @@ static int lapic_suspend(void)
2176 local_irq_save(flags); 2182 local_irq_save(flags);
2177 disable_local_APIC(); 2183 disable_local_APIC();
2178 2184
2179 if (intr_remapping_enabled) 2185 if (irq_remapping_enabled)
2180 disable_intr_remapping(); 2186 irq_remapping_disable();
2181 2187
2182 local_irq_restore(flags); 2188 local_irq_restore(flags);
2183 return 0; 2189 return 0;
@@ -2193,7 +2199,7 @@ static void lapic_resume(void)
2193 return; 2199 return;
2194 2200
2195 local_irq_save(flags); 2201 local_irq_save(flags);
2196 if (intr_remapping_enabled) { 2202 if (irq_remapping_enabled) {
2197 /* 2203 /*
2198 * IO-APIC and PIC have their own resume routines. 2204 * IO-APIC and PIC have their own resume routines.
2199 * We just mask them here to make sure the interrupt 2205 * We just mask them here to make sure the interrupt
@@ -2245,8 +2251,8 @@ static void lapic_resume(void)
2245 apic_write(APIC_ESR, 0); 2251 apic_write(APIC_ESR, 0);
2246 apic_read(APIC_ESR); 2252 apic_read(APIC_ESR);
2247 2253
2248 if (intr_remapping_enabled) 2254 if (irq_remapping_enabled)
2249 reenable_intr_remapping(x2apic_mode); 2255 irq_remapping_reenable(x2apic_mode);
2250 2256
2251 local_irq_restore(flags); 2257 local_irq_restore(flags);
2252} 2258}
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 359b6899a36c..0e881c46e8c8 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -227,6 +227,7 @@ static struct apic apic_flat = {
227 227
228 .read = native_apic_mem_read, 228 .read = native_apic_mem_read,
229 .write = native_apic_mem_write, 229 .write = native_apic_mem_write,
230 .eoi_write = native_apic_mem_write,
230 .icr_read = native_apic_icr_read, 231 .icr_read = native_apic_icr_read,
231 .icr_write = native_apic_icr_write, 232 .icr_write = native_apic_icr_write,
232 .wait_icr_idle = native_apic_wait_icr_idle, 233 .wait_icr_idle = native_apic_wait_icr_idle,
@@ -386,6 +387,7 @@ static struct apic apic_physflat = {
386 387
387 .read = native_apic_mem_read, 388 .read = native_apic_mem_read,
388 .write = native_apic_mem_write, 389 .write = native_apic_mem_write,
390 .eoi_write = native_apic_mem_write,
389 .icr_read = native_apic_icr_read, 391 .icr_read = native_apic_icr_read,
390 .icr_write = native_apic_icr_write, 392 .icr_write = native_apic_icr_write,
391 .wait_icr_idle = native_apic_wait_icr_idle, 393 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 634ae6cdd5c9..a6e4c6e06c08 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -181,6 +181,7 @@ struct apic apic_noop = {
181 181
182 .read = noop_apic_read, 182 .read = noop_apic_read,
183 .write = noop_apic_write, 183 .write = noop_apic_write,
184 .eoi_write = noop_apic_write,
184 .icr_read = noop_apic_icr_read, 185 .icr_read = noop_apic_icr_read,
185 .icr_write = noop_apic_icr_write, 186 .icr_write = noop_apic_icr_write,
186 .wait_icr_idle = noop_apic_wait_icr_idle, 187 .wait_icr_idle = noop_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 23e75422e013..6ec6d5d297c3 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -295,6 +295,7 @@ static struct apic apic_numachip __refconst = {
295 295
296 .read = native_apic_mem_read, 296 .read = native_apic_mem_read,
297 .write = native_apic_mem_write, 297 .write = native_apic_mem_write,
298 .eoi_write = native_apic_mem_write,
298 .icr_read = native_apic_icr_read, 299 .icr_read = native_apic_icr_read,
299 .icr_write = native_apic_icr_write, 300 .icr_write = native_apic_icr_write,
300 .wait_icr_idle = native_apic_wait_icr_idle, 301 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 0cdec7065aff..31fbdbfbf960 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -248,6 +248,7 @@ static struct apic apic_bigsmp = {
248 248
249 .read = native_apic_mem_read, 249 .read = native_apic_mem_read,
250 .write = native_apic_mem_write, 250 .write = native_apic_mem_write,
251 .eoi_write = native_apic_mem_write,
251 .icr_read = native_apic_icr_read, 252 .icr_read = native_apic_icr_read,
252 .icr_write = native_apic_icr_write, 253 .icr_write = native_apic_icr_write,
253 .wait_icr_idle = native_apic_wait_icr_idle, 254 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index e42d1d3b9134..db4ab1be3c79 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -678,6 +678,7 @@ static struct apic __refdata apic_es7000_cluster = {
678 678
679 .read = native_apic_mem_read, 679 .read = native_apic_mem_read,
680 .write = native_apic_mem_write, 680 .write = native_apic_mem_write,
681 .eoi_write = native_apic_mem_write,
681 .icr_read = native_apic_icr_read, 682 .icr_read = native_apic_icr_read,
682 .icr_write = native_apic_icr_write, 683 .icr_write = native_apic_icr_write,
683 .wait_icr_idle = native_apic_wait_icr_idle, 684 .wait_icr_idle = native_apic_wait_icr_idle,
@@ -742,6 +743,7 @@ static struct apic __refdata apic_es7000 = {
742 743
743 .read = native_apic_mem_read, 744 .read = native_apic_mem_read,
744 .write = native_apic_mem_write, 745 .write = native_apic_mem_write,
746 .eoi_write = native_apic_mem_write,
745 .icr_read = native_apic_icr_read, 747 .icr_read = native_apic_icr_read,
746 .icr_write = native_apic_icr_write, 748 .icr_write = native_apic_icr_write,
747 .wait_icr_idle = native_apic_wait_icr_idle, 749 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e88300d8e80a..ffdc152e507d 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -68,23 +68,21 @@
68#define for_each_irq_pin(entry, head) \ 68#define for_each_irq_pin(entry, head) \
69 for (entry = head; entry; entry = entry->next) 69 for (entry = head; entry; entry = entry->next)
70 70
71static void __init __ioapic_init_mappings(void); 71#ifdef CONFIG_IRQ_REMAP
72 72static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
73static unsigned int __io_apic_read (unsigned int apic, unsigned int reg); 73static inline bool irq_remapped(struct irq_cfg *cfg)
74static void __io_apic_write (unsigned int apic, unsigned int reg, unsigned int val); 74{
75static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); 75 return cfg->irq_2_iommu.iommu != NULL;
76 76}
77static struct io_apic_ops io_apic_ops = { 77#else
78 .init = __ioapic_init_mappings, 78static inline bool irq_remapped(struct irq_cfg *cfg)
79 .read = __io_apic_read, 79{
80 .write = __io_apic_write, 80 return false;
81 .modify = __io_apic_modify, 81}
82}; 82static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
83
84void __init set_io_apic_ops(const struct io_apic_ops *ops)
85{ 83{
86 io_apic_ops = *ops;
87} 84}
85#endif
88 86
89/* 87/*
90 * Is the SiS APIC rmw bug present ? 88 * Is the SiS APIC rmw bug present ?
@@ -313,21 +311,6 @@ static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
313 irq_free_desc(at); 311 irq_free_desc(at);
314} 312}
315 313
316static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
317{
318 return io_apic_ops.read(apic, reg);
319}
320
321static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
322{
323 io_apic_ops.write(apic, reg, value);
324}
325
326static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
327{
328 io_apic_ops.modify(apic, reg, value);
329}
330
331 314
332struct io_apic { 315struct io_apic {
333 unsigned int index; 316 unsigned int index;
@@ -349,14 +332,14 @@ static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
349 writel(vector, &io_apic->eoi); 332 writel(vector, &io_apic->eoi);
350} 333}
351 334
352static unsigned int __io_apic_read(unsigned int apic, unsigned int reg) 335unsigned int native_io_apic_read(unsigned int apic, unsigned int reg)
353{ 336{
354 struct io_apic __iomem *io_apic = io_apic_base(apic); 337 struct io_apic __iomem *io_apic = io_apic_base(apic);
355 writel(reg, &io_apic->index); 338 writel(reg, &io_apic->index);
356 return readl(&io_apic->data); 339 return readl(&io_apic->data);
357} 340}
358 341
359static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) 342void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
360{ 343{
361 struct io_apic __iomem *io_apic = io_apic_base(apic); 344 struct io_apic __iomem *io_apic = io_apic_base(apic);
362 345
@@ -370,7 +353,7 @@ static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int va
370 * 353 *
371 * Older SiS APIC requires we rewrite the index register 354 * Older SiS APIC requires we rewrite the index register
372 */ 355 */
373static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) 356void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
374{ 357{
375 struct io_apic __iomem *io_apic = io_apic_base(apic); 358 struct io_apic __iomem *io_apic = io_apic_base(apic);
376 359
@@ -379,29 +362,6 @@ static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int v
379 writel(value, &io_apic->data); 362 writel(value, &io_apic->data);
380} 363}
381 364
382static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
383{
384 struct irq_pin_list *entry;
385 unsigned long flags;
386
387 raw_spin_lock_irqsave(&ioapic_lock, flags);
388 for_each_irq_pin(entry, cfg->irq_2_pin) {
389 unsigned int reg;
390 int pin;
391
392 pin = entry->pin;
393 reg = io_apic_read(entry->apic, 0x10 + pin*2);
394 /* Is the remote IRR bit set? */
395 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
396 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
397 return true;
398 }
399 }
400 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
401
402 return false;
403}
404
405union entry_union { 365union entry_union {
406 struct { u32 w1, w2; }; 366 struct { u32 w1, w2; };
407 struct IO_APIC_route_entry entry; 367 struct IO_APIC_route_entry entry;
@@ -1361,77 +1321,13 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
1361 fasteoi ? "fasteoi" : "edge"); 1321 fasteoi ? "fasteoi" : "edge");
1362} 1322}
1363 1323
1364
1365static int setup_ir_ioapic_entry(int irq,
1366 struct IR_IO_APIC_route_entry *entry,
1367 unsigned int destination, int vector,
1368 struct io_apic_irq_attr *attr)
1369{
1370 int index;
1371 struct irte irte;
1372 int ioapic_id = mpc_ioapic_id(attr->ioapic);
1373 struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id);
1374
1375 if (!iommu) {
1376 pr_warn("No mapping iommu for ioapic %d\n", ioapic_id);
1377 return -ENODEV;
1378 }
1379
1380 index = alloc_irte(iommu, irq, 1);
1381 if (index < 0) {
1382 pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id);
1383 return -ENOMEM;
1384 }
1385
1386 prepare_irte(&irte, vector, destination);
1387
1388 /* Set source-id of interrupt request */
1389 set_ioapic_sid(&irte, ioapic_id);
1390
1391 modify_irte(irq, &irte);
1392
1393 apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
1394 "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
1395 "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
1396 "Avail:%X Vector:%02X Dest:%08X "
1397 "SID:%04X SQ:%X SVT:%X)\n",
1398 attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
1399 irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
1400 irte.avail, irte.vector, irte.dest_id,
1401 irte.sid, irte.sq, irte.svt);
1402
1403 memset(entry, 0, sizeof(*entry));
1404
1405 entry->index2 = (index >> 15) & 0x1;
1406 entry->zero = 0;
1407 entry->format = 1;
1408 entry->index = (index & 0x7fff);
1409 /*
1410 * IO-APIC RTE will be configured with virtual vector.
1411 * irq handler will do the explicit EOI to the io-apic.
1412 */
1413 entry->vector = attr->ioapic_pin;
1414 entry->mask = 0; /* enable IRQ */
1415 entry->trigger = attr->trigger;
1416 entry->polarity = attr->polarity;
1417
1418 /* Mask level triggered irqs.
1419 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1420 */
1421 if (attr->trigger)
1422 entry->mask = 1;
1423
1424 return 0;
1425}
1426
1427static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, 1324static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
1428 unsigned int destination, int vector, 1325 unsigned int destination, int vector,
1429 struct io_apic_irq_attr *attr) 1326 struct io_apic_irq_attr *attr)
1430{ 1327{
1431 if (intr_remapping_enabled) 1328 if (irq_remapping_enabled)
1432 return setup_ir_ioapic_entry(irq, 1329 return setup_ioapic_remapped_entry(irq, entry, destination,
1433 (struct IR_IO_APIC_route_entry *)entry, 1330 vector, attr);
1434 destination, vector, attr);
1435 1331
1436 memset(entry, 0, sizeof(*entry)); 1332 memset(entry, 0, sizeof(*entry));
1437 1333
@@ -1588,7 +1484,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
1588{ 1484{
1589 struct IO_APIC_route_entry entry; 1485 struct IO_APIC_route_entry entry;
1590 1486
1591 if (intr_remapping_enabled) 1487 if (irq_remapping_enabled)
1592 return; 1488 return;
1593 1489
1594 memset(&entry, 0, sizeof(entry)); 1490 memset(&entry, 0, sizeof(entry));
@@ -1674,7 +1570,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1674 1570
1675 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 1571 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1676 1572
1677 if (intr_remapping_enabled) { 1573 if (irq_remapping_enabled) {
1678 printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR" 1574 printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR"
1679 " Pol Stat Indx2 Zero Vect:\n"); 1575 " Pol Stat Indx2 Zero Vect:\n");
1680 } else { 1576 } else {
@@ -1683,7 +1579,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1683 } 1579 }
1684 1580
1685 for (i = 0; i <= reg_01.bits.entries; i++) { 1581 for (i = 0; i <= reg_01.bits.entries; i++) {
1686 if (intr_remapping_enabled) { 1582 if (irq_remapping_enabled) {
1687 struct IO_APIC_route_entry entry; 1583 struct IO_APIC_route_entry entry;
1688 struct IR_IO_APIC_route_entry *ir_entry; 1584 struct IR_IO_APIC_route_entry *ir_entry;
1689 1585
@@ -2050,7 +1946,7 @@ void disable_IO_APIC(void)
2050 * IOAPIC RTE as well as interrupt-remapping table entry). 1946 * IOAPIC RTE as well as interrupt-remapping table entry).
2051 * As this gets called during crash dump, keep this simple for now. 1947 * As this gets called during crash dump, keep this simple for now.
2052 */ 1948 */
2053 if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) { 1949 if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) {
2054 struct IO_APIC_route_entry entry; 1950 struct IO_APIC_route_entry entry;
2055 1951
2056 memset(&entry, 0, sizeof(entry)); 1952 memset(&entry, 0, sizeof(entry));
@@ -2074,7 +1970,7 @@ void disable_IO_APIC(void)
2074 * Use virtual wire A mode when interrupt remapping is enabled. 1970 * Use virtual wire A mode when interrupt remapping is enabled.
2075 */ 1971 */
2076 if (cpu_has_apic || apic_from_smp_config()) 1972 if (cpu_has_apic || apic_from_smp_config())
2077 disconnect_bsp_APIC(!intr_remapping_enabled && 1973 disconnect_bsp_APIC(!irq_remapping_enabled &&
2078 ioapic_i8259.pin != -1); 1974 ioapic_i8259.pin != -1);
2079} 1975}
2080 1976
@@ -2390,71 +2286,6 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2390 return ret; 2286 return ret;
2391} 2287}
2392 2288
2393#ifdef CONFIG_IRQ_REMAP
2394
2395/*
2396 * Migrate the IO-APIC irq in the presence of intr-remapping.
2397 *
2398 * For both level and edge triggered, irq migration is a simple atomic
2399 * update(of vector and cpu destination) of IRTE and flush the hardware cache.
2400 *
2401 * For level triggered, we eliminate the io-apic RTE modification (with the
2402 * updated vector information), by using a virtual vector (io-apic pin number).
2403 * Real vector that is used for interrupting cpu will be coming from
2404 * the interrupt-remapping table entry.
2405 *
2406 * As the migration is a simple atomic update of IRTE, the same mechanism
2407 * is used to migrate MSI irq's in the presence of interrupt-remapping.
2408 */
2409static int
2410ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2411 bool force)
2412{
2413 struct irq_cfg *cfg = data->chip_data;
2414 unsigned int dest, irq = data->irq;
2415 struct irte irte;
2416
2417 if (!cpumask_intersects(mask, cpu_online_mask))
2418 return -EINVAL;
2419
2420 if (get_irte(irq, &irte))
2421 return -EBUSY;
2422
2423 if (assign_irq_vector(irq, cfg, mask))
2424 return -EBUSY;
2425
2426 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
2427
2428 irte.vector = cfg->vector;
2429 irte.dest_id = IRTE_DEST(dest);
2430
2431 /*
2432 * Atomically updates the IRTE with the new destination, vector
2433 * and flushes the interrupt entry cache.
2434 */
2435 modify_irte(irq, &irte);
2436
2437 /*
2438 * After this point, all the interrupts will start arriving
2439 * at the new destination. So, time to cleanup the previous
2440 * vector allocation.
2441 */
2442 if (cfg->move_in_progress)
2443 send_cleanup_vector(cfg);
2444
2445 cpumask_copy(data->affinity, mask);
2446 return 0;
2447}
2448
2449#else
2450static inline int
2451ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2452 bool force)
2453{
2454 return 0;
2455}
2456#endif
2457
2458asmlinkage void smp_irq_move_cleanup_interrupt(void) 2289asmlinkage void smp_irq_move_cleanup_interrupt(void)
2459{ 2290{
2460 unsigned vector, me; 2291 unsigned vector, me;
@@ -2552,6 +2383,29 @@ static void ack_apic_edge(struct irq_data *data)
2552atomic_t irq_mis_count; 2383atomic_t irq_mis_count;
2553 2384
2554#ifdef CONFIG_GENERIC_PENDING_IRQ 2385#ifdef CONFIG_GENERIC_PENDING_IRQ
2386static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
2387{
2388 struct irq_pin_list *entry;
2389 unsigned long flags;
2390
2391 raw_spin_lock_irqsave(&ioapic_lock, flags);
2392 for_each_irq_pin(entry, cfg->irq_2_pin) {
2393 unsigned int reg;
2394 int pin;
2395
2396 pin = entry->pin;
2397 reg = io_apic_read(entry->apic, 0x10 + pin*2);
2398 /* Is the remote IRR bit set? */
2399 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
2400 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2401 return true;
2402 }
2403 }
2404 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2405
2406 return false;
2407}
2408
2555static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) 2409static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
2556{ 2410{
2557 /* If we are moving the irq we need to mask it */ 2411 /* If we are moving the irq we need to mask it */
@@ -2699,7 +2553,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
2699 chip->irq_eoi = ir_ack_apic_level; 2553 chip->irq_eoi = ir_ack_apic_level;
2700 2554
2701#ifdef CONFIG_SMP 2555#ifdef CONFIG_SMP
2702 chip->irq_set_affinity = ir_ioapic_set_affinity; 2556 chip->irq_set_affinity = set_remapped_irq_affinity;
2703#endif 2557#endif
2704} 2558}
2705#endif /* CONFIG_IRQ_REMAP */ 2559#endif /* CONFIG_IRQ_REMAP */
@@ -2912,7 +2766,7 @@ static inline void __init check_timer(void)
2912 * 8259A. 2766 * 8259A.
2913 */ 2767 */
2914 if (pin1 == -1) { 2768 if (pin1 == -1) {
2915 if (intr_remapping_enabled) 2769 if (irq_remapping_enabled)
2916 panic("BIOS bug: timer not connected to IO-APIC"); 2770 panic("BIOS bug: timer not connected to IO-APIC");
2917 pin1 = pin2; 2771 pin1 = pin2;
2918 apic1 = apic2; 2772 apic1 = apic2;
@@ -2945,7 +2799,7 @@ static inline void __init check_timer(void)
2945 clear_IO_APIC_pin(0, pin1); 2799 clear_IO_APIC_pin(0, pin1);
2946 goto out; 2800 goto out;
2947 } 2801 }
2948 if (intr_remapping_enabled) 2802 if (irq_remapping_enabled)
2949 panic("timer doesn't work through Interrupt-remapped IO-APIC"); 2803 panic("timer doesn't work through Interrupt-remapped IO-APIC");
2950 local_irq_disable(); 2804 local_irq_disable();
2951 clear_IO_APIC_pin(apic1, pin1); 2805 clear_IO_APIC_pin(apic1, pin1);
@@ -3169,7 +3023,7 @@ void destroy_irq(unsigned int irq)
3169 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); 3023 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
3170 3024
3171 if (irq_remapped(cfg)) 3025 if (irq_remapped(cfg))
3172 free_irte(irq); 3026 free_remapped_irq(irq);
3173 raw_spin_lock_irqsave(&vector_lock, flags); 3027 raw_spin_lock_irqsave(&vector_lock, flags);
3174 __clear_irq_vector(irq, cfg); 3028 __clear_irq_vector(irq, cfg);
3175 raw_spin_unlock_irqrestore(&vector_lock, flags); 3029 raw_spin_unlock_irqrestore(&vector_lock, flags);
@@ -3198,54 +3052,34 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3198 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); 3052 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
3199 3053
3200 if (irq_remapped(cfg)) { 3054 if (irq_remapped(cfg)) {
3201 struct irte irte; 3055 compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
3202 int ir_index; 3056 return err;
3203 u16 sub_handle; 3057 }
3204
3205 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
3206 BUG_ON(ir_index == -1);
3207
3208 prepare_irte(&irte, cfg->vector, dest);
3209
3210 /* Set source-id of interrupt request */
3211 if (pdev)
3212 set_msi_sid(&irte, pdev);
3213 else
3214 set_hpet_sid(&irte, hpet_id);
3215
3216 modify_irte(irq, &irte);
3217 3058
3059 if (x2apic_enabled())
3060 msg->address_hi = MSI_ADDR_BASE_HI |
3061 MSI_ADDR_EXT_DEST_ID(dest);
3062 else
3218 msg->address_hi = MSI_ADDR_BASE_HI; 3063 msg->address_hi = MSI_ADDR_BASE_HI;
3219 msg->data = sub_handle;
3220 msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
3221 MSI_ADDR_IR_SHV |
3222 MSI_ADDR_IR_INDEX1(ir_index) |
3223 MSI_ADDR_IR_INDEX2(ir_index);
3224 } else {
3225 if (x2apic_enabled())
3226 msg->address_hi = MSI_ADDR_BASE_HI |
3227 MSI_ADDR_EXT_DEST_ID(dest);
3228 else
3229 msg->address_hi = MSI_ADDR_BASE_HI;
3230 3064
3231 msg->address_lo = 3065 msg->address_lo =
3232 MSI_ADDR_BASE_LO | 3066 MSI_ADDR_BASE_LO |
3233 ((apic->irq_dest_mode == 0) ? 3067 ((apic->irq_dest_mode == 0) ?
3234 MSI_ADDR_DEST_MODE_PHYSICAL: 3068 MSI_ADDR_DEST_MODE_PHYSICAL:
3235 MSI_ADDR_DEST_MODE_LOGICAL) | 3069 MSI_ADDR_DEST_MODE_LOGICAL) |
3236 ((apic->irq_delivery_mode != dest_LowestPrio) ? 3070 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3237 MSI_ADDR_REDIRECTION_CPU: 3071 MSI_ADDR_REDIRECTION_CPU:
3238 MSI_ADDR_REDIRECTION_LOWPRI) | 3072 MSI_ADDR_REDIRECTION_LOWPRI) |
3239 MSI_ADDR_DEST_ID(dest); 3073 MSI_ADDR_DEST_ID(dest);
3074
3075 msg->data =
3076 MSI_DATA_TRIGGER_EDGE |
3077 MSI_DATA_LEVEL_ASSERT |
3078 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3079 MSI_DATA_DELIVERY_FIXED:
3080 MSI_DATA_DELIVERY_LOWPRI) |
3081 MSI_DATA_VECTOR(cfg->vector);
3240 3082
3241 msg->data =
3242 MSI_DATA_TRIGGER_EDGE |
3243 MSI_DATA_LEVEL_ASSERT |
3244 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3245 MSI_DATA_DELIVERY_FIXED:
3246 MSI_DATA_DELIVERY_LOWPRI) |
3247 MSI_DATA_VECTOR(cfg->vector);
3248 }
3249 return err; 3083 return err;
3250} 3084}
3251 3085
@@ -3288,33 +3122,6 @@ static struct irq_chip msi_chip = {
3288 .irq_retrigger = ioapic_retrigger_irq, 3122 .irq_retrigger = ioapic_retrigger_irq,
3289}; 3123};
3290 3124
3291/*
3292 * Map the PCI dev to the corresponding remapping hardware unit
3293 * and allocate 'nvec' consecutive interrupt-remapping table entries
3294 * in it.
3295 */
3296static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3297{
3298 struct intel_iommu *iommu;
3299 int index;
3300
3301 iommu = map_dev_to_ir(dev);
3302 if (!iommu) {
3303 printk(KERN_ERR
3304 "Unable to map PCI %s to iommu\n", pci_name(dev));
3305 return -ENOENT;
3306 }
3307
3308 index = alloc_irte(iommu, irq, nvec);
3309 if (index < 0) {
3310 printk(KERN_ERR
3311 "Unable to allocate %d IRTE for PCI %s\n", nvec,
3312 pci_name(dev));
3313 return -ENOSPC;
3314 }
3315 return index;
3316}
3317
3318static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) 3125static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3319{ 3126{
3320 struct irq_chip *chip = &msi_chip; 3127 struct irq_chip *chip = &msi_chip;
@@ -3345,7 +3152,6 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3345 int node, ret, sub_handle, index = 0; 3152 int node, ret, sub_handle, index = 0;
3346 unsigned int irq, irq_want; 3153 unsigned int irq, irq_want;
3347 struct msi_desc *msidesc; 3154 struct msi_desc *msidesc;
3348 struct intel_iommu *iommu = NULL;
3349 3155
3350 /* x86 doesn't support multiple MSI yet */ 3156 /* x86 doesn't support multiple MSI yet */
3351 if (type == PCI_CAP_ID_MSI && nvec > 1) 3157 if (type == PCI_CAP_ID_MSI && nvec > 1)
@@ -3359,7 +3165,7 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3359 if (irq == 0) 3165 if (irq == 0)
3360 return -1; 3166 return -1;
3361 irq_want = irq + 1; 3167 irq_want = irq + 1;
3362 if (!intr_remapping_enabled) 3168 if (!irq_remapping_enabled)
3363 goto no_ir; 3169 goto no_ir;
3364 3170
3365 if (!sub_handle) { 3171 if (!sub_handle) {
@@ -3367,23 +3173,16 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3367 * allocate the consecutive block of IRTE's 3173 * allocate the consecutive block of IRTE's
3368 * for 'nvec' 3174 * for 'nvec'
3369 */ 3175 */
3370 index = msi_alloc_irte(dev, irq, nvec); 3176 index = msi_alloc_remapped_irq(dev, irq, nvec);
3371 if (index < 0) { 3177 if (index < 0) {
3372 ret = index; 3178 ret = index;
3373 goto error; 3179 goto error;
3374 } 3180 }
3375 } else { 3181 } else {
3376 iommu = map_dev_to_ir(dev); 3182 ret = msi_setup_remapped_irq(dev, irq, index,
3377 if (!iommu) { 3183 sub_handle);
3378 ret = -ENOENT; 3184 if (ret < 0)
3379 goto error; 3185 goto error;
3380 }
3381 /*
3382 * setup the mapping between the irq and the IRTE
3383 * base index, the sub_handle pointing to the
3384 * appropriate interrupt remap table entry.
3385 */
3386 set_irte_irq(irq, iommu, index, sub_handle);
3387 } 3186 }
3388no_ir: 3187no_ir:
3389 ret = setup_msi_irq(dev, msidesc, irq); 3188 ret = setup_msi_irq(dev, msidesc, irq);
@@ -3501,15 +3300,8 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3501 struct msi_msg msg; 3300 struct msi_msg msg;
3502 int ret; 3301 int ret;
3503 3302
3504 if (intr_remapping_enabled) { 3303 if (irq_remapping_enabled) {
3505 struct intel_iommu *iommu = map_hpet_to_ir(id); 3304 if (!setup_hpet_msi_remapped(irq, id))
3506 int index;
3507
3508 if (!iommu)
3509 return -1;
3510
3511 index = alloc_irte(iommu, irq, 1);
3512 if (index < 0)
3513 return -1; 3305 return -1;
3514 } 3306 }
3515 3307
@@ -3888,8 +3680,8 @@ void __init setup_ioapic_dest(void)
3888 else 3680 else
3889 mask = apic->target_cpus(); 3681 mask = apic->target_cpus();
3890 3682
3891 if (intr_remapping_enabled) 3683 if (irq_remapping_enabled)
3892 ir_ioapic_set_affinity(idata, mask, false); 3684 set_remapped_irq_affinity(idata, mask, false);
3893 else 3685 else
3894 ioapic_set_affinity(idata, mask, false); 3686 ioapic_set_affinity(idata, mask, false);
3895 } 3687 }
@@ -3931,12 +3723,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
3931 return res; 3723 return res;
3932} 3724}
3933 3725
3934void __init ioapic_and_gsi_init(void) 3726void __init native_io_apic_init_mappings(void)
3935{
3936 io_apic_ops.init();
3937}
3938
3939static void __init __ioapic_init_mappings(void)
3940{ 3727{
3941 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; 3728 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
3942 struct resource *ioapic_res; 3729 struct resource *ioapic_res;
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 00d2422ca7c9..f00a68cca37a 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -530,6 +530,7 @@ static struct apic __refdata apic_numaq = {
530 530
531 .read = native_apic_mem_read, 531 .read = native_apic_mem_read,
532 .write = native_apic_mem_write, 532 .write = native_apic_mem_write,
533 .eoi_write = native_apic_mem_write,
533 .icr_read = native_apic_icr_read, 534 .icr_read = native_apic_icr_read,
534 .icr_write = native_apic_icr_write, 535 .icr_write = native_apic_icr_write,
535 .wait_icr_idle = native_apic_wait_icr_idle, 536 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index ff2c1b9aac4d..1b291da09e60 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -142,6 +142,7 @@ static struct apic apic_default = {
142 142
143 .read = native_apic_mem_read, 143 .read = native_apic_mem_read,
144 .write = native_apic_mem_write, 144 .write = native_apic_mem_write,
145 .eoi_write = native_apic_mem_write,
145 .icr_read = native_apic_icr_read, 146 .icr_read = native_apic_icr_read,
146 .icr_write = native_apic_icr_write, 147 .icr_write = native_apic_icr_write,
147 .wait_icr_idle = native_apic_wait_icr_idle, 148 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index fea000b27f07..659897c00755 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -546,6 +546,7 @@ static struct apic apic_summit = {
546 546
547 .read = native_apic_mem_read, 547 .read = native_apic_mem_read,
548 .write = native_apic_mem_write, 548 .write = native_apic_mem_write,
549 .eoi_write = native_apic_mem_write,
549 .icr_read = native_apic_icr_read, 550 .icr_read = native_apic_icr_read,
550 .icr_write = native_apic_icr_write, 551 .icr_write = native_apic_icr_write,
551 .wait_icr_idle = native_apic_wait_icr_idle, 552 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 48f3103b3c93..ff35cff0e1a7 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -260,6 +260,7 @@ static struct apic apic_x2apic_cluster = {
260 260
261 .read = native_apic_msr_read, 261 .read = native_apic_msr_read,
262 .write = native_apic_msr_write, 262 .write = native_apic_msr_write,
263 .eoi_write = native_apic_msr_eoi_write,
263 .icr_read = native_x2apic_icr_read, 264 .icr_read = native_x2apic_icr_read,
264 .icr_write = native_x2apic_icr_write, 265 .icr_write = native_x2apic_icr_write,
265 .wait_icr_idle = native_x2apic_wait_icr_idle, 266 .wait_icr_idle = native_x2apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 991e315f4227..c17e982db275 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -172,6 +172,7 @@ static struct apic apic_x2apic_phys = {
172 172
173 .read = native_apic_msr_read, 173 .read = native_apic_msr_read,
174 .write = native_apic_msr_write, 174 .write = native_apic_msr_write,
175 .eoi_write = native_apic_msr_eoi_write,
175 .icr_read = native_x2apic_icr_read, 176 .icr_read = native_x2apic_icr_read,
176 .icr_write = native_x2apic_icr_write, 177 .icr_write = native_x2apic_icr_write,
177 .wait_icr_idle = native_x2apic_wait_icr_idle, 178 .wait_icr_idle = native_x2apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 87bfa69e216e..c6d03f7a4401 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -404,6 +404,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
404 404
405 .read = native_apic_msr_read, 405 .read = native_apic_msr_read,
406 .write = native_apic_msr_write, 406 .write = native_apic_msr_write,
407 .eoi_write = native_apic_msr_eoi_write,
407 .icr_read = native_x2apic_icr_read, 408 .icr_read = native_x2apic_icr_read,
408 .icr_write = native_x2apic_icr_write, 409 .icr_write = native_x2apic_icr_write,
409 .wait_icr_idle = native_x2apic_wait_icr_idle, 410 .wait_icr_idle = native_x2apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 459e78cbf61e..07b0c0db466c 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -2401,7 +2401,7 @@ static void __exit apm_exit(void)
2401 * (pm_idle), Wait for all processors to update cached/local 2401 * (pm_idle), Wait for all processors to update cached/local
2402 * copies of pm_idle before proceeding. 2402 * copies of pm_idle before proceeding.
2403 */ 2403 */
2404 cpu_idle_wait(); 2404 kick_all_cpus_sync();
2405 } 2405 }
2406 if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) 2406 if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
2407 && (apm_info.connection_version > 0x0100)) { 2407 && (apm_info.connection_version > 0x0100)) {
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 5da1269e8ddc..e2dbcb7dabdd 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -27,21 +27,29 @@ static int num_scan_areas;
27 27
28static __init int set_corruption_check(char *arg) 28static __init int set_corruption_check(char *arg)
29{ 29{
30 char *end; 30 ssize_t ret;
31 unsigned long val;
31 32
32 memory_corruption_check = simple_strtol(arg, &end, 10); 33 ret = kstrtoul(arg, 10, &val);
34 if (ret)
35 return ret;
33 36
34 return (*end == 0) ? 0 : -EINVAL; 37 memory_corruption_check = val;
38 return 0;
35} 39}
36early_param("memory_corruption_check", set_corruption_check); 40early_param("memory_corruption_check", set_corruption_check);
37 41
38static __init int set_corruption_check_period(char *arg) 42static __init int set_corruption_check_period(char *arg)
39{ 43{
40 char *end; 44 ssize_t ret;
45 unsigned long val;
41 46
42 corruption_check_period = simple_strtoul(arg, &end, 10); 47 ret = kstrtoul(arg, 10, &val);
48 if (ret)
49 return ret;
43 50
44 return (*end == 0) ? 0 : -EINVAL; 51 corruption_check_period = val;
52 return 0;
45} 53}
46early_param("memory_corruption_check_period", set_corruption_check_period); 54early_param("memory_corruption_check_period", set_corruption_check_period);
47 55
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cf79302198a6..82f29e70d058 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1185,7 +1185,7 @@ void __cpuinit cpu_init(void)
1185 oist = &per_cpu(orig_ist, cpu); 1185 oist = &per_cpu(orig_ist, cpu);
1186 1186
1187#ifdef CONFIG_NUMA 1187#ifdef CONFIG_NUMA
1188 if (cpu != 0 && percpu_read(numa_node) == 0 && 1188 if (cpu != 0 && this_cpu_read(numa_node) == 0 &&
1189 early_cpu_to_node(cpu) != NUMA_NO_NODE) 1189 early_cpu_to_node(cpu) != NUMA_NO_NODE)
1190 set_numa_node(early_cpu_to_node(cpu)); 1190 set_numa_node(early_cpu_to_node(cpu));
1191#endif 1191#endif
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index b8f3653dddbc..9a7c90d80bc4 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -615,14 +615,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
615 new_l2 = this_leaf.size/1024; 615 new_l2 = this_leaf.size/1024;
616 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 616 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
617 index_msb = get_count_order(num_threads_sharing); 617 index_msb = get_count_order(num_threads_sharing);
618 l2_id = c->apicid >> index_msb; 618 l2_id = c->apicid & ~((1 << index_msb) - 1);
619 break; 619 break;
620 case 3: 620 case 3:
621 new_l3 = this_leaf.size/1024; 621 new_l3 = this_leaf.size/1024;
622 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 622 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
623 index_msb = get_count_order( 623 index_msb = get_count_order(
624 num_threads_sharing); 624 num_threads_sharing);
625 l3_id = c->apicid >> index_msb; 625 l3_id = c->apicid & ~((1 << index_msb) - 1);
626 break; 626 break;
627 default: 627 default:
628 break; 628 break;
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 5502b289341b..36565373af87 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -23,7 +23,7 @@
23 * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor) 23 * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor)
24 * 24 *
25 * Arrays used to match for this should also be declared using 25 * Arrays used to match for this should also be declared using
26 * MODULE_DEVICE_TABLE(x86_cpu, ...) 26 * MODULE_DEVICE_TABLE(x86cpu, ...)
27 * 27 *
28 * This always matches against the boot cpu, assuming models and features are 28 * This always matches against the boot cpu, assuming models and features are
29 * consistent over all CPUs. 29 * consistent over all CPUs.
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d086a09c087d..2afcbd253e1d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -583,7 +583,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
583 struct mce m; 583 struct mce m;
584 int i; 584 int i;
585 585
586 percpu_inc(mce_poll_count); 586 this_cpu_inc(mce_poll_count);
587 587
588 mce_gather_info(&m, NULL); 588 mce_gather_info(&m, NULL);
589 589
@@ -945,9 +945,10 @@ struct mce_info {
945 atomic_t inuse; 945 atomic_t inuse;
946 struct task_struct *t; 946 struct task_struct *t;
947 __u64 paddr; 947 __u64 paddr;
948 int restartable;
948} mce_info[MCE_INFO_MAX]; 949} mce_info[MCE_INFO_MAX];
949 950
950static void mce_save_info(__u64 addr) 951static void mce_save_info(__u64 addr, int c)
951{ 952{
952 struct mce_info *mi; 953 struct mce_info *mi;
953 954
@@ -955,6 +956,7 @@ static void mce_save_info(__u64 addr)
955 if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { 956 if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
956 mi->t = current; 957 mi->t = current;
957 mi->paddr = addr; 958 mi->paddr = addr;
959 mi->restartable = c;
958 return; 960 return;
959 } 961 }
960 } 962 }
@@ -1015,7 +1017,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1015 1017
1016 atomic_inc(&mce_entry); 1018 atomic_inc(&mce_entry);
1017 1019
1018 percpu_inc(mce_exception_count); 1020 this_cpu_inc(mce_exception_count);
1019 1021
1020 if (!banks) 1022 if (!banks)
1021 goto out; 1023 goto out;
@@ -1130,7 +1132,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1130 mce_panic("Fatal machine check on current CPU", &m, msg); 1132 mce_panic("Fatal machine check on current CPU", &m, msg);
1131 if (worst == MCE_AR_SEVERITY) { 1133 if (worst == MCE_AR_SEVERITY) {
1132 /* schedule action before return to userland */ 1134 /* schedule action before return to userland */
1133 mce_save_info(m.addr); 1135 mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV);
1134 set_thread_flag(TIF_MCE_NOTIFY); 1136 set_thread_flag(TIF_MCE_NOTIFY);
1135 } else if (kill_it) { 1137 } else if (kill_it) {
1136 force_sig(SIGBUS, current); 1138 force_sig(SIGBUS, current);
@@ -1179,7 +1181,13 @@ void mce_notify_process(void)
1179 1181
1180 pr_err("Uncorrected hardware memory error in user-access at %llx", 1182 pr_err("Uncorrected hardware memory error in user-access at %llx",
1181 mi->paddr); 1183 mi->paddr);
1182 if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) { 1184 /*
1185 * We must call memory_failure() here even if the current process is
1186 * doomed. We still need to mark the page as poisoned and alert any
1187 * other users of the page.
1188 */
1189 if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 ||
1190 mi->restartable == 0) {
1183 pr_err("Memory error not recovered"); 1191 pr_err("Memory error not recovered");
1184 force_sig(SIGBUS, current); 1192 force_sig(SIGBUS, current);
1185 } 1193 }
@@ -1423,6 +1431,43 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1423 */ 1431 */
1424 if (c->x86 == 6 && banks > 0) 1432 if (c->x86 == 6 && banks > 0)
1425 mce_banks[0].ctl = 0; 1433 mce_banks[0].ctl = 0;
1434
1435 /*
1436 * Turn off MC4_MISC thresholding banks on those models since
1437 * they're not supported there.
1438 */
1439 if (c->x86 == 0x15 &&
1440 (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
1441 int i;
1442 u64 val, hwcr;
1443 bool need_toggle;
1444 u32 msrs[] = {
1445 0x00000413, /* MC4_MISC0 */
1446 0xc0000408, /* MC4_MISC1 */
1447 };
1448
1449 rdmsrl(MSR_K7_HWCR, hwcr);
1450
1451 /* McStatusWrEn has to be set */
1452 need_toggle = !(hwcr & BIT(18));
1453
1454 if (need_toggle)
1455 wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
1456
1457 for (i = 0; i < ARRAY_SIZE(msrs); i++) {
1458 rdmsrl(msrs[i], val);
1459
1460 /* CntP bit set? */
1461 if (val & BIT(62)) {
1462 val &= ~BIT(62);
1463 wrmsrl(msrs[i], val);
1464 }
1465 }
1466
1467 /* restore old settings */
1468 if (need_toggle)
1469 wrmsrl(MSR_K7_HWCR, hwcr);
1470 }
1426 } 1471 }
1427 1472
1428 if (c->x86_vendor == X86_VENDOR_INTEL) { 1473 if (c->x86_vendor == X86_VENDOR_INTEL) {
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 99b57179f912..f4873a64f46d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -51,6 +51,7 @@ struct threshold_block {
51 unsigned int cpu; 51 unsigned int cpu;
52 u32 address; 52 u32 address;
53 u16 interrupt_enable; 53 u16 interrupt_enable;
54 bool interrupt_capable;
54 u16 threshold_limit; 55 u16 threshold_limit;
55 struct kobject kobj; 56 struct kobject kobj;
56 struct list_head miscj; 57 struct list_head miscj;
@@ -83,6 +84,21 @@ struct thresh_restart {
83 u16 old_limit; 84 u16 old_limit;
84}; 85};
85 86
87static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
88{
89 /*
90 * bank 4 supports APIC LVT interrupts implicitly since forever.
91 */
92 if (bank == 4)
93 return true;
94
95 /*
96 * IntP: interrupt present; if this bit is set, the thresholding
97 * bank can generate APIC LVT interrupts
98 */
99 return msr_high_bits & BIT(28);
100}
101
86static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) 102static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
87{ 103{
88 int msr = (hi & MASK_LVTOFF_HI) >> 20; 104 int msr = (hi & MASK_LVTOFF_HI) >> 20;
@@ -104,8 +120,10 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
104 return 1; 120 return 1;
105}; 121};
106 122
107/* must be called with correct cpu affinity */ 123/*
108/* Called via smp_call_function_single() */ 124 * Called via smp_call_function_single(), must be called with correct
125 * cpu affinity.
126 */
109static void threshold_restart_bank(void *_tr) 127static void threshold_restart_bank(void *_tr)
110{ 128{
111 struct thresh_restart *tr = _tr; 129 struct thresh_restart *tr = _tr;
@@ -128,6 +146,12 @@ static void threshold_restart_bank(void *_tr)
128 (new_count & THRESHOLD_MAX); 146 (new_count & THRESHOLD_MAX);
129 } 147 }
130 148
149 /* clear IntType */
150 hi &= ~MASK_INT_TYPE_HI;
151
152 if (!tr->b->interrupt_capable)
153 goto done;
154
131 if (tr->set_lvt_off) { 155 if (tr->set_lvt_off) {
132 if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) { 156 if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
133 /* set new lvt offset */ 157 /* set new lvt offset */
@@ -136,9 +160,10 @@ static void threshold_restart_bank(void *_tr)
136 } 160 }
137 } 161 }
138 162
139 tr->b->interrupt_enable ? 163 if (tr->b->interrupt_enable)
140 (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : 164 hi |= INT_TYPE_APIC;
141 (hi &= ~MASK_INT_TYPE_HI); 165
166 done:
142 167
143 hi |= MASK_COUNT_EN_HI; 168 hi |= MASK_COUNT_EN_HI;
144 wrmsr(tr->b->address, lo, hi); 169 wrmsr(tr->b->address, lo, hi);
@@ -202,14 +227,17 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
202 if (shared_bank[bank] && c->cpu_core_id) 227 if (shared_bank[bank] && c->cpu_core_id)
203 break; 228 break;
204 229
205 offset = setup_APIC_mce(offset,
206 (high & MASK_LVTOFF_HI) >> 20);
207
208 memset(&b, 0, sizeof(b)); 230 memset(&b, 0, sizeof(b));
209 b.cpu = cpu; 231 b.cpu = cpu;
210 b.bank = bank; 232 b.bank = bank;
211 b.block = block; 233 b.block = block;
212 b.address = address; 234 b.address = address;
235 b.interrupt_capable = lvt_interrupt_supported(bank, high);
236
237 if (b.interrupt_capable) {
238 int new = (high & MASK_LVTOFF_HI) >> 20;
239 offset = setup_APIC_mce(offset, new);
240 }
213 241
214 mce_threshold_block_init(&b, offset); 242 mce_threshold_block_init(&b, offset);
215 mce_threshold_vector = amd_threshold_interrupt; 243 mce_threshold_vector = amd_threshold_interrupt;
@@ -309,6 +337,9 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
309 struct thresh_restart tr; 337 struct thresh_restart tr;
310 unsigned long new; 338 unsigned long new;
311 339
340 if (!b->interrupt_capable)
341 return -EINVAL;
342
312 if (strict_strtoul(buf, 0, &new) < 0) 343 if (strict_strtoul(buf, 0, &new) < 0)
313 return -EINVAL; 344 return -EINVAL;
314 345
@@ -390,10 +421,10 @@ RW_ATTR(threshold_limit);
390RW_ATTR(error_count); 421RW_ATTR(error_count);
391 422
392static struct attribute *default_attrs[] = { 423static struct attribute *default_attrs[] = {
393 &interrupt_enable.attr,
394 &threshold_limit.attr, 424 &threshold_limit.attr,
395 &error_count.attr, 425 &error_count.attr,
396 NULL 426 NULL, /* possibly interrupt_enable if supported, see below */
427 NULL,
397}; 428};
398 429
399#define to_block(k) container_of(k, struct threshold_block, kobj) 430#define to_block(k) container_of(k, struct threshold_block, kobj)
@@ -467,8 +498,14 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
467 b->cpu = cpu; 498 b->cpu = cpu;
468 b->address = address; 499 b->address = address;
469 b->interrupt_enable = 0; 500 b->interrupt_enable = 0;
501 b->interrupt_capable = lvt_interrupt_supported(bank, high);
470 b->threshold_limit = THRESHOLD_MAX; 502 b->threshold_limit = THRESHOLD_MAX;
471 503
504 if (b->interrupt_capable)
505 threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
506 else
507 threshold_ktype.default_attrs[2] = NULL;
508
472 INIT_LIST_HEAD(&b->miscj); 509 INIT_LIST_HEAD(&b->miscj);
473 510
474 if (per_cpu(threshold_banks, cpu)[bank]->blocks) { 511 if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index bb8e03407e18..e049d6da0183 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -484,9 +484,6 @@ static int __x86_pmu_event_init(struct perf_event *event)
484 484
485 /* mark unused */ 485 /* mark unused */
486 event->hw.extra_reg.idx = EXTRA_REG_NONE; 486 event->hw.extra_reg.idx = EXTRA_REG_NONE;
487
488 /* mark not used */
489 event->hw.extra_reg.idx = EXTRA_REG_NONE;
490 event->hw.branch_reg.idx = EXTRA_REG_NONE; 487 event->hw.branch_reg.idx = EXTRA_REG_NONE;
491 488
492 return x86_pmu.hw_config(event); 489 return x86_pmu.hw_config(event);
@@ -1186,8 +1183,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
1186 int idx, handled = 0; 1183 int idx, handled = 0;
1187 u64 val; 1184 u64 val;
1188 1185
1189 perf_sample_data_init(&data, 0);
1190
1191 cpuc = &__get_cpu_var(cpu_hw_events); 1186 cpuc = &__get_cpu_var(cpu_hw_events);
1192 1187
1193 /* 1188 /*
@@ -1222,7 +1217,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
1222 * event overflow 1217 * event overflow
1223 */ 1218 */
1224 handled++; 1219 handled++;
1225 data.period = event->hw.last_period; 1220 perf_sample_data_init(&data, 0, event->hw.last_period);
1226 1221
1227 if (!x86_perf_event_set_period(event)) 1222 if (!x86_perf_event_set_period(event))
1228 continue; 1223 continue;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 9edc786aef89..11a4eb9131d5 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event)
134 134
135static int amd_pmu_hw_config(struct perf_event *event) 135static int amd_pmu_hw_config(struct perf_event *event)
136{ 136{
137 int ret = x86_pmu_hw_config(event); 137 int ret;
138 138
139 /* pass precise event sampling to ibs: */
140 if (event->attr.precise_ip && get_ibs_caps())
141 return -ENOENT;
142
143 ret = x86_pmu_hw_config(event);
139 if (ret) 144 if (ret)
140 return ret; 145 return ret;
141 146
@@ -205,10 +210,8 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
205 * when we come here 210 * when we come here
206 */ 211 */
207 for (i = 0; i < x86_pmu.num_counters; i++) { 212 for (i = 0; i < x86_pmu.num_counters; i++) {
208 if (nb->owners[i] == event) { 213 if (cmpxchg(nb->owners + i, event, NULL) == event)
209 cmpxchg(nb->owners+i, event, NULL);
210 break; 214 break;
211 }
212 } 215 }
213} 216}
214 217
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 3b8a2d30d14e..da9bcdcd9856 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -9,6 +9,7 @@
9#include <linux/perf_event.h> 9#include <linux/perf_event.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <linux/ptrace.h>
12 13
13#include <asm/apic.h> 14#include <asm/apic.h>
14 15
@@ -16,36 +17,591 @@ static u32 ibs_caps;
16 17
17#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) 18#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
18 19
19static struct pmu perf_ibs; 20#include <linux/kprobes.h>
21#include <linux/hardirq.h>
22
23#include <asm/nmi.h>
24
25#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
26#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
27
28enum ibs_states {
29 IBS_ENABLED = 0,
30 IBS_STARTED = 1,
31 IBS_STOPPING = 2,
32
33 IBS_MAX_STATES,
34};
35
36struct cpu_perf_ibs {
37 struct perf_event *event;
38 unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
39};
40
41struct perf_ibs {
42 struct pmu pmu;
43 unsigned int msr;
44 u64 config_mask;
45 u64 cnt_mask;
46 u64 enable_mask;
47 u64 valid_mask;
48 u64 max_period;
49 unsigned long offset_mask[1];
50 int offset_max;
51 struct cpu_perf_ibs __percpu *pcpu;
52 u64 (*get_count)(u64 config);
53};
54
55struct perf_ibs_data {
56 u32 size;
57 union {
58 u32 data[0]; /* data buffer starts here */
59 u32 caps;
60 };
61 u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
62};
63
64static int
65perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
66{
67 s64 left = local64_read(&hwc->period_left);
68 s64 period = hwc->sample_period;
69 int overflow = 0;
70
71 /*
72 * If we are way outside a reasonable range then just skip forward:
73 */
74 if (unlikely(left <= -period)) {
75 left = period;
76 local64_set(&hwc->period_left, left);
77 hwc->last_period = period;
78 overflow = 1;
79 }
80
81 if (unlikely(left < (s64)min)) {
82 left += period;
83 local64_set(&hwc->period_left, left);
84 hwc->last_period = period;
85 overflow = 1;
86 }
87
88 /*
89 * If the hw period that triggers the sw overflow is too short
90 * we might hit the irq handler. This biases the results.
91 * Thus we shorten the next-to-last period and set the last
92 * period to the max period.
93 */
94 if (left > max) {
95 left -= max;
96 if (left > max)
97 left = max;
98 else if (left < min)
99 left = min;
100 }
101
102 *hw_period = (u64)left;
103
104 return overflow;
105}
106
107static int
108perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
109{
110 struct hw_perf_event *hwc = &event->hw;
111 int shift = 64 - width;
112 u64 prev_raw_count;
113 u64 delta;
114
115 /*
116 * Careful: an NMI might modify the previous event value.
117 *
118 * Our tactic to handle this is to first atomically read and
119 * exchange a new raw count - then add that new-prev delta
120 * count to the generic event atomically:
121 */
122 prev_raw_count = local64_read(&hwc->prev_count);
123 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
124 new_raw_count) != prev_raw_count)
125 return 0;
126
127 /*
128 * Now we have the new raw value and have updated the prev
129 * timestamp already. We can now calculate the elapsed delta
130 * (event-)time and add that to the generic event.
131 *
132 * Careful, not all hw sign-extends above the physical width
133 * of the count.
134 */
135 delta = (new_raw_count << shift) - (prev_raw_count << shift);
136 delta >>= shift;
137
138 local64_add(delta, &event->count);
139 local64_sub(delta, &hwc->period_left);
140
141 return 1;
142}
143
144static struct perf_ibs perf_ibs_fetch;
145static struct perf_ibs perf_ibs_op;
146
147static struct perf_ibs *get_ibs_pmu(int type)
148{
149 if (perf_ibs_fetch.pmu.type == type)
150 return &perf_ibs_fetch;
151 if (perf_ibs_op.pmu.type == type)
152 return &perf_ibs_op;
153 return NULL;
154}
155
156/*
157 * Use IBS for precise event sampling:
158 *
159 * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
160 * perf record -a -e r076:p ... # same as -e cpu-cycles:p
161 * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
162 *
163 * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
164 * MSRC001_1033) is used to select either cycle or micro-ops counting
165 * mode.
166 *
167 * The rip of IBS samples has skid 0. Thus, IBS supports precise
168 * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
169 * rip is invalid when IBS was not able to record the rip correctly.
170 * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
171 *
172 */
173static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
174{
175 switch (event->attr.precise_ip) {
176 case 0:
177 return -ENOENT;
178 case 1:
179 case 2:
180 break;
181 default:
182 return -EOPNOTSUPP;
183 }
184
185 switch (event->attr.type) {
186 case PERF_TYPE_HARDWARE:
187 switch (event->attr.config) {
188 case PERF_COUNT_HW_CPU_CYCLES:
189 *config = 0;
190 return 0;
191 }
192 break;
193 case PERF_TYPE_RAW:
194 switch (event->attr.config) {
195 case 0x0076:
196 *config = 0;
197 return 0;
198 case 0x00C1:
199 *config = IBS_OP_CNT_CTL;
200 return 0;
201 }
202 break;
203 default:
204 return -ENOENT;
205 }
206
207 return -EOPNOTSUPP;
208}
20 209
21static int perf_ibs_init(struct perf_event *event) 210static int perf_ibs_init(struct perf_event *event)
22{ 211{
23 if (perf_ibs.type != event->attr.type) 212 struct hw_perf_event *hwc = &event->hw;
213 struct perf_ibs *perf_ibs;
214 u64 max_cnt, config;
215 int ret;
216
217 perf_ibs = get_ibs_pmu(event->attr.type);
218 if (perf_ibs) {
219 config = event->attr.config;
220 } else {
221 perf_ibs = &perf_ibs_op;
222 ret = perf_ibs_precise_event(event, &config);
223 if (ret)
224 return ret;
225 }
226
227 if (event->pmu != &perf_ibs->pmu)
24 return -ENOENT; 228 return -ENOENT;
229
230 if (config & ~perf_ibs->config_mask)
231 return -EINVAL;
232
233 if (hwc->sample_period) {
234 if (config & perf_ibs->cnt_mask)
235 /* raw max_cnt may not be set */
236 return -EINVAL;
237 if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
238 /*
239 * lower 4 bits can not be set in ibs max cnt,
240 * but allowing it in case we adjust the
241 * sample period to set a frequency.
242 */
243 return -EINVAL;
244 hwc->sample_period &= ~0x0FULL;
245 if (!hwc->sample_period)
246 hwc->sample_period = 0x10;
247 } else {
248 max_cnt = config & perf_ibs->cnt_mask;
249 config &= ~perf_ibs->cnt_mask;
250 event->attr.sample_period = max_cnt << 4;
251 hwc->sample_period = event->attr.sample_period;
252 }
253
254 if (!hwc->sample_period)
255 return -EINVAL;
256
257 /*
258 * If we modify hwc->sample_period, we also need to update
259 * hwc->last_period and hwc->period_left.
260 */
261 hwc->last_period = hwc->sample_period;
262 local64_set(&hwc->period_left, hwc->sample_period);
263
264 hwc->config_base = perf_ibs->msr;
265 hwc->config = config;
266
25 return 0; 267 return 0;
26} 268}
27 269
270static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
271 struct hw_perf_event *hwc, u64 *period)
272{
273 int overflow;
274
275 /* ignore lower 4 bits in min count: */
276 overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
277 local64_set(&hwc->prev_count, 0);
278
279 return overflow;
280}
281
282static u64 get_ibs_fetch_count(u64 config)
283{
284 return (config & IBS_FETCH_CNT) >> 12;
285}
286
287static u64 get_ibs_op_count(u64 config)
288{
289 u64 count = 0;
290
291 if (config & IBS_OP_VAL)
292 count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
293
294 if (ibs_caps & IBS_CAPS_RDWROPCNT)
295 count += (config & IBS_OP_CUR_CNT) >> 32;
296
297 return count;
298}
299
300static void
301perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
302 u64 *config)
303{
304 u64 count = perf_ibs->get_count(*config);
305
306 /*
307 * Set width to 64 since we do not overflow on max width but
308 * instead on max count. In perf_ibs_set_period() we clear
309 * prev count manually on overflow.
310 */
311 while (!perf_event_try_update(event, count, 64)) {
312 rdmsrl(event->hw.config_base, *config);
313 count = perf_ibs->get_count(*config);
314 }
315}
316
317static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
318 struct hw_perf_event *hwc, u64 config)
319{
320 wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
321}
322
323/*
324 * Erratum #420 Instruction-Based Sampling Engine May Generate
325 * Interrupt that Cannot Be Cleared:
326 *
327 * Must clear counter mask first, then clear the enable bit. See
328 * Revision Guide for AMD Family 10h Processors, Publication #41322.
329 */
330static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
331 struct hw_perf_event *hwc, u64 config)
332{
333 config &= ~perf_ibs->cnt_mask;
334 wrmsrl(hwc->config_base, config);
335 config &= ~perf_ibs->enable_mask;
336 wrmsrl(hwc->config_base, config);
337}
338
339/*
340 * We cannot restore the ibs pmu state, so we always needs to update
341 * the event while stopping it and then reset the state when starting
342 * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
343 * perf_ibs_start()/perf_ibs_stop() and instead always do it.
344 */
345static void perf_ibs_start(struct perf_event *event, int flags)
346{
347 struct hw_perf_event *hwc = &event->hw;
348 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
349 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
350 u64 period;
351
352 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
353 return;
354
355 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
356 hwc->state = 0;
357
358 perf_ibs_set_period(perf_ibs, hwc, &period);
359 set_bit(IBS_STARTED, pcpu->state);
360 perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
361
362 perf_event_update_userpage(event);
363}
364
365static void perf_ibs_stop(struct perf_event *event, int flags)
366{
367 struct hw_perf_event *hwc = &event->hw;
368 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
369 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
370 u64 config;
371 int stopping;
372
373 stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
374
375 if (!stopping && (hwc->state & PERF_HES_UPTODATE))
376 return;
377
378 rdmsrl(hwc->config_base, config);
379
380 if (stopping) {
381 set_bit(IBS_STOPPING, pcpu->state);
382 perf_ibs_disable_event(perf_ibs, hwc, config);
383 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
384 hwc->state |= PERF_HES_STOPPED;
385 }
386
387 if (hwc->state & PERF_HES_UPTODATE)
388 return;
389
390 /*
391 * Clear valid bit to not count rollovers on update, rollovers
392 * are only updated in the irq handler.
393 */
394 config &= ~perf_ibs->valid_mask;
395
396 perf_ibs_event_update(perf_ibs, event, &config);
397 hwc->state |= PERF_HES_UPTODATE;
398}
399
28static int perf_ibs_add(struct perf_event *event, int flags) 400static int perf_ibs_add(struct perf_event *event, int flags)
29{ 401{
402 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
403 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
404
405 if (test_and_set_bit(IBS_ENABLED, pcpu->state))
406 return -ENOSPC;
407
408 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
409
410 pcpu->event = event;
411
412 if (flags & PERF_EF_START)
413 perf_ibs_start(event, PERF_EF_RELOAD);
414
30 return 0; 415 return 0;
31} 416}
32 417
33static void perf_ibs_del(struct perf_event *event, int flags) 418static void perf_ibs_del(struct perf_event *event, int flags)
34{ 419{
420 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
421 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
422
423 if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
424 return;
425
426 perf_ibs_stop(event, PERF_EF_UPDATE);
427
428 pcpu->event = NULL;
429
430 perf_event_update_userpage(event);
35} 431}
36 432
37static struct pmu perf_ibs = { 433static void perf_ibs_read(struct perf_event *event) { }
38 .event_init= perf_ibs_init, 434
39 .add= perf_ibs_add, 435static struct perf_ibs perf_ibs_fetch = {
40 .del= perf_ibs_del, 436 .pmu = {
437 .task_ctx_nr = perf_invalid_context,
438
439 .event_init = perf_ibs_init,
440 .add = perf_ibs_add,
441 .del = perf_ibs_del,
442 .start = perf_ibs_start,
443 .stop = perf_ibs_stop,
444 .read = perf_ibs_read,
445 },
446 .msr = MSR_AMD64_IBSFETCHCTL,
447 .config_mask = IBS_FETCH_CONFIG_MASK,
448 .cnt_mask = IBS_FETCH_MAX_CNT,
449 .enable_mask = IBS_FETCH_ENABLE,
450 .valid_mask = IBS_FETCH_VAL,
451 .max_period = IBS_FETCH_MAX_CNT << 4,
452 .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
453 .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
454
455 .get_count = get_ibs_fetch_count,
41}; 456};
42 457
458static struct perf_ibs perf_ibs_op = {
459 .pmu = {
460 .task_ctx_nr = perf_invalid_context,
461
462 .event_init = perf_ibs_init,
463 .add = perf_ibs_add,
464 .del = perf_ibs_del,
465 .start = perf_ibs_start,
466 .stop = perf_ibs_stop,
467 .read = perf_ibs_read,
468 },
469 .msr = MSR_AMD64_IBSOPCTL,
470 .config_mask = IBS_OP_CONFIG_MASK,
471 .cnt_mask = IBS_OP_MAX_CNT,
472 .enable_mask = IBS_OP_ENABLE,
473 .valid_mask = IBS_OP_VAL,
474 .max_period = IBS_OP_MAX_CNT << 4,
475 .offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
476 .offset_max = MSR_AMD64_IBSOP_REG_COUNT,
477
478 .get_count = get_ibs_op_count,
479};
480
481static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
482{
483 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
484 struct perf_event *event = pcpu->event;
485 struct hw_perf_event *hwc = &event->hw;
486 struct perf_sample_data data;
487 struct perf_raw_record raw;
488 struct pt_regs regs;
489 struct perf_ibs_data ibs_data;
490 int offset, size, check_rip, offset_max, throttle = 0;
491 unsigned int msr;
492 u64 *buf, *config, period;
493
494 if (!test_bit(IBS_STARTED, pcpu->state)) {
495 /*
496 * Catch spurious interrupts after stopping IBS: After
497 * disabling IBS there could be still incomming NMIs
498 * with samples that even have the valid bit cleared.
499 * Mark all this NMIs as handled.
500 */
501 return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
502 }
503
504 msr = hwc->config_base;
505 buf = ibs_data.regs;
506 rdmsrl(msr, *buf);
507 if (!(*buf++ & perf_ibs->valid_mask))
508 return 0;
509
510 config = &ibs_data.regs[0];
511 perf_ibs_event_update(perf_ibs, event, config);
512 perf_sample_data_init(&data, 0, hwc->last_period);
513 if (!perf_ibs_set_period(perf_ibs, hwc, &period))
514 goto out; /* no sw counter overflow */
515
516 ibs_data.caps = ibs_caps;
517 size = 1;
518 offset = 1;
519 check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
520 if (event->attr.sample_type & PERF_SAMPLE_RAW)
521 offset_max = perf_ibs->offset_max;
522 else if (check_rip)
523 offset_max = 2;
524 else
525 offset_max = 1;
526 do {
527 rdmsrl(msr + offset, *buf++);
528 size++;
529 offset = find_next_bit(perf_ibs->offset_mask,
530 perf_ibs->offset_max,
531 offset + 1);
532 } while (offset < offset_max);
533 ibs_data.size = sizeof(u64) * size;
534
535 regs = *iregs;
536 if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
537 regs.flags &= ~PERF_EFLAGS_EXACT;
538 } else {
539 instruction_pointer_set(&regs, ibs_data.regs[1]);
540 regs.flags |= PERF_EFLAGS_EXACT;
541 }
542
543 if (event->attr.sample_type & PERF_SAMPLE_RAW) {
544 raw.size = sizeof(u32) + ibs_data.size;
545 raw.data = ibs_data.data;
546 data.raw = &raw;
547 }
548
549 throttle = perf_event_overflow(event, &data, &regs);
550out:
551 if (throttle)
552 perf_ibs_disable_event(perf_ibs, hwc, *config);
553 else
554 perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
555
556 perf_event_update_userpage(event);
557
558 return 1;
559}
560
561static int __kprobes
562perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
563{
564 int handled = 0;
565
566 handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
567 handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
568
569 if (handled)
570 inc_irq_stat(apic_perf_irqs);
571
572 return handled;
573}
574
575static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
576{
577 struct cpu_perf_ibs __percpu *pcpu;
578 int ret;
579
580 pcpu = alloc_percpu(struct cpu_perf_ibs);
581 if (!pcpu)
582 return -ENOMEM;
583
584 perf_ibs->pcpu = pcpu;
585
586 ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
587 if (ret) {
588 perf_ibs->pcpu = NULL;
589 free_percpu(pcpu);
590 }
591
592 return ret;
593}
594
43static __init int perf_event_ibs_init(void) 595static __init int perf_event_ibs_init(void)
44{ 596{
45 if (!ibs_caps) 597 if (!ibs_caps)
46 return -ENODEV; /* ibs not supported by the cpu */ 598 return -ENODEV; /* ibs not supported by the cpu */
47 599
48 perf_pmu_register(&perf_ibs, "ibs", -1); 600 perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
601 if (ibs_caps & IBS_CAPS_OPCNT)
602 perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
603 perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
604 register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
49 printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); 605 printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
50 606
51 return 0; 607 return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 26b3e2fef104..166546ec6aef 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1027,8 +1027,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1027 u64 status; 1027 u64 status;
1028 int handled; 1028 int handled;
1029 1029
1030 perf_sample_data_init(&data, 0);
1031
1032 cpuc = &__get_cpu_var(cpu_hw_events); 1030 cpuc = &__get_cpu_var(cpu_hw_events);
1033 1031
1034 /* 1032 /*
@@ -1082,7 +1080,7 @@ again:
1082 if (!intel_pmu_save_and_restart(event)) 1080 if (!intel_pmu_save_and_restart(event))
1083 continue; 1081 continue;
1084 1082
1085 data.period = event->hw.last_period; 1083 perf_sample_data_init(&data, 0, event->hw.last_period);
1086 1084
1087 if (has_branch_stack(event)) 1085 if (has_branch_stack(event))
1088 data.br_stack = &cpuc->lbr_stack; 1086 data.br_stack = &cpuc->lbr_stack;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 7f64df19e7dd..5a3edc27f6e5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -316,8 +316,7 @@ int intel_pmu_drain_bts_buffer(void)
316 316
317 ds->bts_index = ds->bts_buffer_base; 317 ds->bts_index = ds->bts_buffer_base;
318 318
319 perf_sample_data_init(&data, 0); 319 perf_sample_data_init(&data, 0, event->hw.last_period);
320 data.period = event->hw.last_period;
321 regs.ip = 0; 320 regs.ip = 0;
322 321
323 /* 322 /*
@@ -564,8 +563,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
564 if (!intel_pmu_save_and_restart(event)) 563 if (!intel_pmu_save_and_restart(event))
565 return; 564 return;
566 565
567 perf_sample_data_init(&data, 0); 566 perf_sample_data_init(&data, 0, event->hw.last_period);
568 data.period = event->hw.last_period;
569 567
570 /* 568 /*
571 * We use the interrupt regs as a base because the PEBS record 569 * We use the interrupt regs as a base because the PEBS record
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index a2dfacfd7103..47124a73dd73 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1005,8 +1005,6 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
1005 int idx, handled = 0; 1005 int idx, handled = 0;
1006 u64 val; 1006 u64 val;
1007 1007
1008 perf_sample_data_init(&data, 0);
1009
1010 cpuc = &__get_cpu_var(cpu_hw_events); 1008 cpuc = &__get_cpu_var(cpu_hw_events);
1011 1009
1012 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1010 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -1034,10 +1032,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
1034 handled += overflow; 1032 handled += overflow;
1035 1033
1036 /* event overflow for sure */ 1034 /* event overflow for sure */
1037 data.period = event->hw.last_period; 1035 perf_sample_data_init(&data, 0, hwc->last_period);
1038 1036
1039 if (!x86_perf_event_set_period(event)) 1037 if (!x86_perf_event_set_period(event))
1040 continue; 1038 continue;
1039
1040
1041 if (perf_event_overflow(event, &data, regs)) 1041 if (perf_event_overflow(event, &data, regs))
1042 x86_pmu_stop(event, 0); 1042 x86_pmu_stop(event, 0);
1043 } 1043 }
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 1b81839b6c88..571246d81edf 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -271,7 +271,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
271 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) 271 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
272 return 1; 272 return 1;
273 273
274 show_registers(regs); 274 show_regs(regs);
275#ifdef CONFIG_X86_32 275#ifdef CONFIG_X86_32
276 if (user_mode_vm(regs)) { 276 if (user_mode_vm(regs)) {
277 sp = regs->sp; 277 sp = regs->sp;
@@ -311,16 +311,33 @@ void die(const char *str, struct pt_regs *regs, long err)
311 311
312static int __init kstack_setup(char *s) 312static int __init kstack_setup(char *s)
313{ 313{
314 ssize_t ret;
315 unsigned long val;
316
314 if (!s) 317 if (!s)
315 return -EINVAL; 318 return -EINVAL;
316 kstack_depth_to_print = simple_strtoul(s, NULL, 0); 319
320 ret = kstrtoul(s, 0, &val);
321 if (ret)
322 return ret;
323 kstack_depth_to_print = val;
317 return 0; 324 return 0;
318} 325}
319early_param("kstack", kstack_setup); 326early_param("kstack", kstack_setup);
320 327
321static int __init code_bytes_setup(char *s) 328static int __init code_bytes_setup(char *s)
322{ 329{
323 code_bytes = simple_strtoul(s, NULL, 0); 330 ssize_t ret;
331 unsigned long val;
332
333 if (!s)
334 return -EINVAL;
335
336 ret = kstrtoul(s, 0, &val);
337 if (ret)
338 return ret;
339
340 code_bytes = val;
324 if (code_bytes > 8192) 341 if (code_bytes > 8192)
325 code_bytes = 8192; 342 code_bytes = 8192;
326 343
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 88ec9129271d..e0b1d783daab 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -82,7 +82,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
82} 82}
83 83
84 84
85void show_registers(struct pt_regs *regs) 85void show_regs(struct pt_regs *regs)
86{ 86{
87 int i; 87 int i;
88 88
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 17107bd6e1f0..791b76122aa8 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -245,7 +245,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
245 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 245 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
246} 246}
247 247
248void show_registers(struct pt_regs *regs) 248void show_regs(struct pt_regs *regs)
249{ 249{
250 int i; 250 int i;
251 unsigned long sp; 251 unsigned long sp;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 7b784f4ef1e4..01ccf9b71473 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -56,6 +56,7 @@
56#include <asm/irq_vectors.h> 56#include <asm/irq_vectors.h>
57#include <asm/cpufeature.h> 57#include <asm/cpufeature.h>
58#include <asm/alternative-asm.h> 58#include <asm/alternative-asm.h>
59#include <asm/asm.h>
59 60
60/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 61/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
61#include <linux/elf-em.h> 62#include <linux/elf-em.h>
@@ -151,10 +152,8 @@
151.pushsection .fixup, "ax" 152.pushsection .fixup, "ax"
15299: movl $0, (%esp) 15399: movl $0, (%esp)
153 jmp 98b 154 jmp 98b
154.section __ex_table, "a"
155 .align 4
156 .long 98b, 99b
157.popsection 155.popsection
156 _ASM_EXTABLE(98b,99b)
158.endm 157.endm
159 158
160.macro PTGS_TO_GS 159.macro PTGS_TO_GS
@@ -164,10 +163,8 @@
164.pushsection .fixup, "ax" 163.pushsection .fixup, "ax"
16599: movl $0, PT_GS(%esp) 16499: movl $0, PT_GS(%esp)
166 jmp 98b 165 jmp 98b
167.section __ex_table, "a"
168 .align 4
169 .long 98b, 99b
170.popsection 166.popsection
167 _ASM_EXTABLE(98b,99b)
171.endm 168.endm
172 169
173.macro GS_TO_REG reg 170.macro GS_TO_REG reg
@@ -249,12 +246,10 @@
249 jmp 2b 246 jmp 2b
2506: movl $0, (%esp) 2476: movl $0, (%esp)
251 jmp 3b 248 jmp 3b
252.section __ex_table, "a"
253 .align 4
254 .long 1b, 4b
255 .long 2b, 5b
256 .long 3b, 6b
257.popsection 249.popsection
250 _ASM_EXTABLE(1b,4b)
251 _ASM_EXTABLE(2b,5b)
252 _ASM_EXTABLE(3b,6b)
258 POP_GS_EX 253 POP_GS_EX
259.endm 254.endm
260 255
@@ -415,10 +410,7 @@ sysenter_past_esp:
415 jae syscall_fault 410 jae syscall_fault
4161: movl (%ebp),%ebp 4111: movl (%ebp),%ebp
417 movl %ebp,PT_EBP(%esp) 412 movl %ebp,PT_EBP(%esp)
418.section __ex_table,"a" 413 _ASM_EXTABLE(1b,syscall_fault)
419 .align 4
420 .long 1b,syscall_fault
421.previous
422 414
423 GET_THREAD_INFO(%ebp) 415 GET_THREAD_INFO(%ebp)
424 416
@@ -485,10 +477,8 @@ sysexit_audit:
485.pushsection .fixup,"ax" 477.pushsection .fixup,"ax"
4862: movl $0,PT_FS(%esp) 4782: movl $0,PT_FS(%esp)
487 jmp 1b 479 jmp 1b
488.section __ex_table,"a"
489 .align 4
490 .long 1b,2b
491.popsection 480.popsection
481 _ASM_EXTABLE(1b,2b)
492 PTGS_TO_GS_EX 482 PTGS_TO_GS_EX
493ENDPROC(ia32_sysenter_target) 483ENDPROC(ia32_sysenter_target)
494 484
@@ -543,10 +533,7 @@ ENTRY(iret_exc)
543 pushl $do_iret_error 533 pushl $do_iret_error
544 jmp error_code 534 jmp error_code
545.previous 535.previous
546.section __ex_table,"a" 536 _ASM_EXTABLE(irq_return,iret_exc)
547 .align 4
548 .long irq_return,iret_exc
549.previous
550 537
551 CFI_RESTORE_STATE 538 CFI_RESTORE_STATE
552ldt_ss: 539ldt_ss:
@@ -901,10 +888,7 @@ END(device_not_available)
901#ifdef CONFIG_PARAVIRT 888#ifdef CONFIG_PARAVIRT
902ENTRY(native_iret) 889ENTRY(native_iret)
903 iret 890 iret
904.section __ex_table,"a" 891 _ASM_EXTABLE(native_iret, iret_exc)
905 .align 4
906 .long native_iret, iret_exc
907.previous
908END(native_iret) 892END(native_iret)
909 893
910ENTRY(native_irq_enable_sysexit) 894ENTRY(native_irq_enable_sysexit)
@@ -1093,13 +1077,10 @@ ENTRY(xen_failsafe_callback)
1093 movl %eax,16(%esp) 1077 movl %eax,16(%esp)
1094 jmp 4b 1078 jmp 4b
1095.previous 1079.previous
1096.section __ex_table,"a" 1080 _ASM_EXTABLE(1b,6b)
1097 .align 4 1081 _ASM_EXTABLE(2b,7b)
1098 .long 1b,6b 1082 _ASM_EXTABLE(3b,8b)
1099 .long 2b,7b 1083 _ASM_EXTABLE(4b,9b)
1100 .long 3b,8b
1101 .long 4b,9b
1102.previous
1103ENDPROC(xen_failsafe_callback) 1084ENDPROC(xen_failsafe_callback)
1104 1085
1105BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, 1086BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK,
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index cdc79b5cfcd9..320852d02026 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -55,6 +55,7 @@
55#include <asm/paravirt.h> 55#include <asm/paravirt.h>
56#include <asm/ftrace.h> 56#include <asm/ftrace.h>
57#include <asm/percpu.h> 57#include <asm/percpu.h>
58#include <asm/asm.h>
58#include <linux/err.h> 59#include <linux/err.h>
59 60
60/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 61/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -900,18 +901,12 @@ restore_args:
900 901
901irq_return: 902irq_return:
902 INTERRUPT_RETURN 903 INTERRUPT_RETURN
903 904 _ASM_EXTABLE(irq_return, bad_iret)
904 .section __ex_table, "a"
905 .quad irq_return, bad_iret
906 .previous
907 905
908#ifdef CONFIG_PARAVIRT 906#ifdef CONFIG_PARAVIRT
909ENTRY(native_iret) 907ENTRY(native_iret)
910 iretq 908 iretq
911 909 _ASM_EXTABLE(native_iret, bad_iret)
912 .section __ex_table,"a"
913 .quad native_iret, bad_iret
914 .previous
915#endif 910#endif
916 911
917 .section .fixup,"ax" 912 .section .fixup,"ax"
@@ -1181,10 +1176,7 @@ gs_change:
1181 CFI_ENDPROC 1176 CFI_ENDPROC
1182END(native_load_gs_index) 1177END(native_load_gs_index)
1183 1178
1184 .section __ex_table,"a" 1179 _ASM_EXTABLE(gs_change,bad_gs)
1185 .align 8
1186 .quad gs_change,bad_gs
1187 .previous
1188 .section .fixup,"ax" 1180 .section .fixup,"ax"
1189 /* running with kernelgs */ 1181 /* running with kernelgs */
1190bad_gs: 1182bad_gs:
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c9a281f272fd..32ff36596ab1 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -24,40 +24,21 @@
24#include <trace/syscall.h> 24#include <trace/syscall.h>
25 25
26#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
27#include <asm/kprobes.h>
27#include <asm/ftrace.h> 28#include <asm/ftrace.h>
28#include <asm/nops.h> 29#include <asm/nops.h>
29#include <asm/nmi.h>
30
31 30
32#ifdef CONFIG_DYNAMIC_FTRACE 31#ifdef CONFIG_DYNAMIC_FTRACE
33 32
34/*
35 * modifying_code is set to notify NMIs that they need to use
36 * memory barriers when entering or exiting. But we don't want
37 * to burden NMIs with unnecessary memory barriers when code
38 * modification is not being done (which is most of the time).
39 *
40 * A mutex is already held when ftrace_arch_code_modify_prepare
41 * and post_process are called. No locks need to be taken here.
42 *
43 * Stop machine will make sure currently running NMIs are done
44 * and new NMIs will see the updated variable before we need
45 * to worry about NMIs doing memory barriers.
46 */
47static int modifying_code __read_mostly;
48static DEFINE_PER_CPU(int, save_modifying_code);
49
50int ftrace_arch_code_modify_prepare(void) 33int ftrace_arch_code_modify_prepare(void)
51{ 34{
52 set_kernel_text_rw(); 35 set_kernel_text_rw();
53 set_all_modules_text_rw(); 36 set_all_modules_text_rw();
54 modifying_code = 1;
55 return 0; 37 return 0;
56} 38}
57 39
58int ftrace_arch_code_modify_post_process(void) 40int ftrace_arch_code_modify_post_process(void)
59{ 41{
60 modifying_code = 0;
61 set_all_modules_text_ro(); 42 set_all_modules_text_ro();
62 set_kernel_text_ro(); 43 set_kernel_text_ro();
63 return 0; 44 return 0;
@@ -90,134 +71,6 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
90 return calc.code; 71 return calc.code;
91} 72}
92 73
93/*
94 * Modifying code must take extra care. On an SMP machine, if
95 * the code being modified is also being executed on another CPU
96 * that CPU will have undefined results and possibly take a GPF.
97 * We use kstop_machine to stop other CPUS from exectuing code.
98 * But this does not stop NMIs from happening. We still need
99 * to protect against that. We separate out the modification of
100 * the code to take care of this.
101 *
102 * Two buffers are added: An IP buffer and a "code" buffer.
103 *
104 * 1) Put the instruction pointer into the IP buffer
105 * and the new code into the "code" buffer.
106 * 2) Wait for any running NMIs to finish and set a flag that says
107 * we are modifying code, it is done in an atomic operation.
108 * 3) Write the code
109 * 4) clear the flag.
110 * 5) Wait for any running NMIs to finish.
111 *
112 * If an NMI is executed, the first thing it does is to call
113 * "ftrace_nmi_enter". This will check if the flag is set to write
114 * and if it is, it will write what is in the IP and "code" buffers.
115 *
116 * The trick is, it does not matter if everyone is writing the same
117 * content to the code location. Also, if a CPU is executing code
118 * it is OK to write to that code location if the contents being written
119 * are the same as what exists.
120 */
121
122#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
123static atomic_t nmi_running = ATOMIC_INIT(0);
124static int mod_code_status; /* holds return value of text write */
125static void *mod_code_ip; /* holds the IP to write to */
126static const void *mod_code_newcode; /* holds the text to write to the IP */
127
128static unsigned nmi_wait_count;
129static atomic_t nmi_update_count = ATOMIC_INIT(0);
130
131int ftrace_arch_read_dyn_info(char *buf, int size)
132{
133 int r;
134
135 r = snprintf(buf, size, "%u %u",
136 nmi_wait_count,
137 atomic_read(&nmi_update_count));
138 return r;
139}
140
141static void clear_mod_flag(void)
142{
143 int old = atomic_read(&nmi_running);
144
145 for (;;) {
146 int new = old & ~MOD_CODE_WRITE_FLAG;
147
148 if (old == new)
149 break;
150
151 old = atomic_cmpxchg(&nmi_running, old, new);
152 }
153}
154
155static void ftrace_mod_code(void)
156{
157 /*
158 * Yes, more than one CPU process can be writing to mod_code_status.
159 * (and the code itself)
160 * But if one were to fail, then they all should, and if one were
161 * to succeed, then they all should.
162 */
163 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
164 MCOUNT_INSN_SIZE);
165
166 /* if we fail, then kill any new writers */
167 if (mod_code_status)
168 clear_mod_flag();
169}
170
171void ftrace_nmi_enter(void)
172{
173 __this_cpu_write(save_modifying_code, modifying_code);
174
175 if (!__this_cpu_read(save_modifying_code))
176 return;
177
178 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
179 smp_rmb();
180 ftrace_mod_code();
181 atomic_inc(&nmi_update_count);
182 }
183 /* Must have previous changes seen before executions */
184 smp_mb();
185}
186
187void ftrace_nmi_exit(void)
188{
189 if (!__this_cpu_read(save_modifying_code))
190 return;
191
192 /* Finish all executions before clearing nmi_running */
193 smp_mb();
194 atomic_dec(&nmi_running);
195}
196
197static void wait_for_nmi_and_set_mod_flag(void)
198{
199 if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
200 return;
201
202 do {
203 cpu_relax();
204 } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
205
206 nmi_wait_count++;
207}
208
209static void wait_for_nmi(void)
210{
211 if (!atomic_read(&nmi_running))
212 return;
213
214 do {
215 cpu_relax();
216 } while (atomic_read(&nmi_running));
217
218 nmi_wait_count++;
219}
220
221static inline int 74static inline int
222within(unsigned long addr, unsigned long start, unsigned long end) 75within(unsigned long addr, unsigned long start, unsigned long end)
223{ 76{
@@ -238,26 +91,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
238 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 91 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
239 ip = (unsigned long)__va(__pa(ip)); 92 ip = (unsigned long)__va(__pa(ip));
240 93
241 mod_code_ip = (void *)ip; 94 return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
242 mod_code_newcode = new_code;
243
244 /* The buffers need to be visible before we let NMIs write them */
245 smp_mb();
246
247 wait_for_nmi_and_set_mod_flag();
248
249 /* Make sure all running NMIs have finished before we write the code */
250 smp_mb();
251
252 ftrace_mod_code();
253
254 /* Make sure the write happens before clearing the bit */
255 smp_mb();
256
257 clear_mod_flag();
258 wait_for_nmi();
259
260 return mod_code_status;
261} 95}
262 96
263static const unsigned char *ftrace_nop_replace(void) 97static const unsigned char *ftrace_nop_replace(void)
@@ -334,6 +168,336 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
334 return ret; 168 return ret;
335} 169}
336 170
171int modifying_ftrace_code __read_mostly;
172
173/*
174 * A breakpoint was added to the code address we are about to
175 * modify, and this is the handle that will just skip over it.
176 * We are either changing a nop into a trace call, or a trace
177 * call to a nop. While the change is taking place, we treat
178 * it just like it was a nop.
179 */
180int ftrace_int3_handler(struct pt_regs *regs)
181{
182 if (WARN_ON_ONCE(!regs))
183 return 0;
184
185 if (!ftrace_location(regs->ip - 1))
186 return 0;
187
188 regs->ip += MCOUNT_INSN_SIZE - 1;
189
190 return 1;
191}
192
193static int ftrace_write(unsigned long ip, const char *val, int size)
194{
195 /*
196 * On x86_64, kernel text mappings are mapped read-only with
197 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
198 * of the kernel text mapping to modify the kernel text.
199 *
200 * For 32bit kernels, these mappings are same and we can use
201 * kernel identity mapping to modify code.
202 */
203 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
204 ip = (unsigned long)__va(__pa(ip));
205
206 return probe_kernel_write((void *)ip, val, size);
207}
208
209static int add_break(unsigned long ip, const char *old)
210{
211 unsigned char replaced[MCOUNT_INSN_SIZE];
212 unsigned char brk = BREAKPOINT_INSTRUCTION;
213
214 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
215 return -EFAULT;
216
217 /* Make sure it is what we expect it to be */
218 if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
219 return -EINVAL;
220
221 if (ftrace_write(ip, &brk, 1))
222 return -EPERM;
223
224 return 0;
225}
226
227static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
228{
229 unsigned const char *old;
230 unsigned long ip = rec->ip;
231
232 old = ftrace_call_replace(ip, addr);
233
234 return add_break(rec->ip, old);
235}
236
237
238static int add_brk_on_nop(struct dyn_ftrace *rec)
239{
240 unsigned const char *old;
241
242 old = ftrace_nop_replace();
243
244 return add_break(rec->ip, old);
245}
246
247static int add_breakpoints(struct dyn_ftrace *rec, int enable)
248{
249 unsigned long ftrace_addr;
250 int ret;
251
252 ret = ftrace_test_record(rec, enable);
253
254 ftrace_addr = (unsigned long)FTRACE_ADDR;
255
256 switch (ret) {
257 case FTRACE_UPDATE_IGNORE:
258 return 0;
259
260 case FTRACE_UPDATE_MAKE_CALL:
261 /* converting nop to call */
262 return add_brk_on_nop(rec);
263
264 case FTRACE_UPDATE_MAKE_NOP:
265 /* converting a call to a nop */
266 return add_brk_on_call(rec, ftrace_addr);
267 }
268 return 0;
269}
270
271/*
272 * On error, we need to remove breakpoints. This needs to
273 * be done caefully. If the address does not currently have a
274 * breakpoint, we know we are done. Otherwise, we look at the
275 * remaining 4 bytes of the instruction. If it matches a nop
276 * we replace the breakpoint with the nop. Otherwise we replace
277 * it with the call instruction.
278 */
279static int remove_breakpoint(struct dyn_ftrace *rec)
280{
281 unsigned char ins[MCOUNT_INSN_SIZE];
282 unsigned char brk = BREAKPOINT_INSTRUCTION;
283 const unsigned char *nop;
284 unsigned long ftrace_addr;
285 unsigned long ip = rec->ip;
286
287 /* If we fail the read, just give up */
288 if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
289 return -EFAULT;
290
291 /* If this does not have a breakpoint, we are done */
292 if (ins[0] != brk)
293 return -1;
294
295 nop = ftrace_nop_replace();
296
297 /*
298 * If the last 4 bytes of the instruction do not match
299 * a nop, then we assume that this is a call to ftrace_addr.
300 */
301 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
302 /*
303 * For extra paranoidism, we check if the breakpoint is on
304 * a call that would actually jump to the ftrace_addr.
305 * If not, don't touch the breakpoint, we make just create
306 * a disaster.
307 */
308 ftrace_addr = (unsigned long)FTRACE_ADDR;
309 nop = ftrace_call_replace(ip, ftrace_addr);
310
311 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
312 return -EINVAL;
313 }
314
315 return probe_kernel_write((void *)ip, &nop[0], 1);
316}
317
318static int add_update_code(unsigned long ip, unsigned const char *new)
319{
320 /* skip breakpoint */
321 ip++;
322 new++;
323 if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1))
324 return -EPERM;
325 return 0;
326}
327
328static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
329{
330 unsigned long ip = rec->ip;
331 unsigned const char *new;
332
333 new = ftrace_call_replace(ip, addr);
334 return add_update_code(ip, new);
335}
336
337static int add_update_nop(struct dyn_ftrace *rec)
338{
339 unsigned long ip = rec->ip;
340 unsigned const char *new;
341
342 new = ftrace_nop_replace();
343 return add_update_code(ip, new);
344}
345
346static int add_update(struct dyn_ftrace *rec, int enable)
347{
348 unsigned long ftrace_addr;
349 int ret;
350
351 ret = ftrace_test_record(rec, enable);
352
353 ftrace_addr = (unsigned long)FTRACE_ADDR;
354
355 switch (ret) {
356 case FTRACE_UPDATE_IGNORE:
357 return 0;
358
359 case FTRACE_UPDATE_MAKE_CALL:
360 /* converting nop to call */
361 return add_update_call(rec, ftrace_addr);
362
363 case FTRACE_UPDATE_MAKE_NOP:
364 /* converting a call to a nop */
365 return add_update_nop(rec);
366 }
367
368 return 0;
369}
370
371static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
372{
373 unsigned long ip = rec->ip;
374 unsigned const char *new;
375
376 new = ftrace_call_replace(ip, addr);
377
378 if (ftrace_write(ip, new, 1))
379 return -EPERM;
380
381 return 0;
382}
383
384static int finish_update_nop(struct dyn_ftrace *rec)
385{
386 unsigned long ip = rec->ip;
387 unsigned const char *new;
388
389 new = ftrace_nop_replace();
390
391 if (ftrace_write(ip, new, 1))
392 return -EPERM;
393 return 0;
394}
395
396static int finish_update(struct dyn_ftrace *rec, int enable)
397{
398 unsigned long ftrace_addr;
399 int ret;
400
401 ret = ftrace_update_record(rec, enable);
402
403 ftrace_addr = (unsigned long)FTRACE_ADDR;
404
405 switch (ret) {
406 case FTRACE_UPDATE_IGNORE:
407 return 0;
408
409 case FTRACE_UPDATE_MAKE_CALL:
410 /* converting nop to call */
411 return finish_update_call(rec, ftrace_addr);
412
413 case FTRACE_UPDATE_MAKE_NOP:
414 /* converting a call to a nop */
415 return finish_update_nop(rec);
416 }
417
418 return 0;
419}
420
421static void do_sync_core(void *data)
422{
423 sync_core();
424}
425
426static void run_sync(void)
427{
428 int enable_irqs = irqs_disabled();
429
430 /* We may be called with interrupts disbled (on bootup). */
431 if (enable_irqs)
432 local_irq_enable();
433 on_each_cpu(do_sync_core, NULL, 1);
434 if (enable_irqs)
435 local_irq_disable();
436}
437
438void ftrace_replace_code(int enable)
439{
440 struct ftrace_rec_iter *iter;
441 struct dyn_ftrace *rec;
442 const char *report = "adding breakpoints";
443 int count = 0;
444 int ret;
445
446 for_ftrace_rec_iter(iter) {
447 rec = ftrace_rec_iter_record(iter);
448
449 ret = add_breakpoints(rec, enable);
450 if (ret)
451 goto remove_breakpoints;
452 count++;
453 }
454
455 run_sync();
456
457 report = "updating code";
458
459 for_ftrace_rec_iter(iter) {
460 rec = ftrace_rec_iter_record(iter);
461
462 ret = add_update(rec, enable);
463 if (ret)
464 goto remove_breakpoints;
465 }
466
467 run_sync();
468
469 report = "removing breakpoints";
470
471 for_ftrace_rec_iter(iter) {
472 rec = ftrace_rec_iter_record(iter);
473
474 ret = finish_update(rec, enable);
475 if (ret)
476 goto remove_breakpoints;
477 }
478
479 run_sync();
480
481 return;
482
483 remove_breakpoints:
484 ftrace_bug(ret, rec ? rec->ip : 0);
485 printk(KERN_WARNING "Failed on %s (%d):\n", report, count);
486 for_ftrace_rec_iter(iter) {
487 rec = ftrace_rec_iter_record(iter);
488 remove_breakpoint(rec);
489 }
490}
491
492void arch_ftrace_update_code(int command)
493{
494 modifying_ftrace_code++;
495
496 ftrace_modify_all_code(command);
497
498 modifying_ftrace_code--;
499}
500
337int __init ftrace_dyn_arch_init(void *data) 501int __init ftrace_dyn_arch_init(void *data)
338{ 502{
339 /* The return code is retured via data */ 503 /* The return code is retured via data */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index ce0be7cd085e..463c9797ca6a 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -21,6 +21,7 @@
21#include <asm/msr-index.h> 21#include <asm/msr-index.h>
22#include <asm/cpufeature.h> 22#include <asm/cpufeature.h>
23#include <asm/percpu.h> 23#include <asm/percpu.h>
24#include <asm/nops.h>
24 25
25/* Physical address */ 26/* Physical address */
26#define pa(X) ((X) - __PAGE_OFFSET) 27#define pa(X) ((X) - __PAGE_OFFSET)
@@ -363,28 +364,23 @@ default_entry:
363 pushl $0 364 pushl $0
364 popfl 365 popfl
365 366
366#ifdef CONFIG_SMP
367 cmpb $0, ready
368 jnz checkCPUtype
369#endif /* CONFIG_SMP */
370
371/* 367/*
372 * start system 32-bit setup. We need to re-do some of the things done 368 * start system 32-bit setup. We need to re-do some of the things done
373 * in 16-bit mode for the "real" operations. 369 * in 16-bit mode for the "real" operations.
374 */ 370 */
375 call setup_idt 371 movl setup_once_ref,%eax
376 372 andl %eax,%eax
377checkCPUtype: 373 jz 1f # Did we do this already?
378 374 call *%eax
379 movl $-1,X86_CPUID # -1 for no CPUID initially 3751:
380 376
381/* check if it is 486 or 386. */ 377/* check if it is 486 or 386. */
382/* 378/*
383 * XXX - this does a lot of unnecessary setup. Alignment checks don't 379 * XXX - this does a lot of unnecessary setup. Alignment checks don't
384 * apply at our cpl of 0 and the stack ought to be aligned already, and 380 * apply at our cpl of 0 and the stack ought to be aligned already, and
385 * we don't need to preserve eflags. 381 * we don't need to preserve eflags.
386 */ 382 */
387 383 movl $-1,X86_CPUID # -1 for no CPUID initially
388 movb $3,X86 # at least 386 384 movb $3,X86 # at least 386
389 pushfl # push EFLAGS 385 pushfl # push EFLAGS
390 popl %eax # get EFLAGS 386 popl %eax # get EFLAGS
@@ -450,21 +446,6 @@ is386: movl $2,%ecx # set MP
450 movl $(__KERNEL_PERCPU), %eax 446 movl $(__KERNEL_PERCPU), %eax
451 movl %eax,%fs # set this cpu's percpu 447 movl %eax,%fs # set this cpu's percpu
452 448
453#ifdef CONFIG_CC_STACKPROTECTOR
454 /*
455 * The linker can't handle this by relocation. Manually set
456 * base address in stack canary segment descriptor.
457 */
458 cmpb $0,ready
459 jne 1f
460 movl $gdt_page,%eax
461 movl $stack_canary,%ecx
462 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
463 shrl $16, %ecx
464 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
465 movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
4661:
467#endif
468 movl $(__KERNEL_STACK_CANARY),%eax 449 movl $(__KERNEL_STACK_CANARY),%eax
469 movl %eax,%gs 450 movl %eax,%gs
470 451
@@ -473,7 +454,6 @@ is386: movl $2,%ecx # set MP
473 454
474 cld # gcc2 wants the direction flag cleared at all times 455 cld # gcc2 wants the direction flag cleared at all times
475 pushl $0 # fake return address for unwinder 456 pushl $0 # fake return address for unwinder
476 movb $1, ready
477 jmp *(initial_code) 457 jmp *(initial_code)
478 458
479/* 459/*
@@ -495,81 +475,122 @@ check_x87:
495 .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ 475 .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */
496 ret 476 ret
497 477
478
479#include "verify_cpu.S"
480
498/* 481/*
499 * setup_idt 482 * setup_once
500 * 483 *
501 * sets up a idt with 256 entries pointing to 484 * The setup work we only want to run on the BSP.
502 * ignore_int, interrupt gates. It doesn't actually load
503 * idt - that can be done only after paging has been enabled
504 * and the kernel moved to PAGE_OFFSET. Interrupts
505 * are enabled elsewhere, when we can be relatively
506 * sure everything is ok.
507 * 485 *
508 * Warning: %esi is live across this function. 486 * Warning: %esi is live across this function.
509 */ 487 */
510setup_idt: 488__INIT
511 lea ignore_int,%edx 489setup_once:
512 movl $(__KERNEL_CS << 16),%eax 490 /*
513 movw %dx,%ax /* selector = 0x0010 = cs */ 491 * Set up a idt with 256 entries pointing to ignore_int,
514 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ 492 * interrupt gates. It doesn't actually load idt - that needs
493 * to be done on each CPU. Interrupts are enabled elsewhere,
494 * when we can be relatively sure everything is ok.
495 */
515 496
516 lea idt_table,%edi 497 movl $idt_table,%edi
517 mov $256,%ecx 498 movl $early_idt_handlers,%eax
518rp_sidt: 499 movl $NUM_EXCEPTION_VECTORS,%ecx
5001:
519 movl %eax,(%edi) 501 movl %eax,(%edi)
520 movl %edx,4(%edi) 502 movl %eax,4(%edi)
503 /* interrupt gate, dpl=0, present */
504 movl $(0x8E000000 + __KERNEL_CS),2(%edi)
505 addl $9,%eax
521 addl $8,%edi 506 addl $8,%edi
522 dec %ecx 507 loop 1b
523 jne rp_sidt
524 508
525.macro set_early_handler handler,trapno 509 movl $256 - NUM_EXCEPTION_VECTORS,%ecx
526 lea \handler,%edx 510 movl $ignore_int,%edx
527 movl $(__KERNEL_CS << 16),%eax 511 movl $(__KERNEL_CS << 16),%eax
528 movw %dx,%ax 512 movw %dx,%ax /* selector = 0x0010 = cs */
529 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ 513 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
530 lea idt_table,%edi 5142:
531 movl %eax,8*\trapno(%edi) 515 movl %eax,(%edi)
532 movl %edx,8*\trapno+4(%edi) 516 movl %edx,4(%edi)
533.endm 517 addl $8,%edi
518 loop 2b
534 519
535 set_early_handler handler=early_divide_err,trapno=0 520#ifdef CONFIG_CC_STACKPROTECTOR
536 set_early_handler handler=early_illegal_opcode,trapno=6 521 /*
537 set_early_handler handler=early_protection_fault,trapno=13 522 * Configure the stack canary. The linker can't handle this by
538 set_early_handler handler=early_page_fault,trapno=14 523 * relocation. Manually set base address in stack canary
524 * segment descriptor.
525 */
526 movl $gdt_page,%eax
527 movl $stack_canary,%ecx
528 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
529 shrl $16, %ecx
530 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
531 movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
532#endif
539 533
534 andl $0,setup_once_ref /* Once is enough, thanks */
540 ret 535 ret
541 536
542early_divide_err: 537ENTRY(early_idt_handlers)
543 xor %edx,%edx 538 # 36(%esp) %eflags
544 pushl $0 /* fake errcode */ 539 # 32(%esp) %cs
545 jmp early_fault 540 # 28(%esp) %eip
541 # 24(%rsp) error code
542 i = 0
543 .rept NUM_EXCEPTION_VECTORS
544 .if (EXCEPTION_ERRCODE_MASK >> i) & 1
545 ASM_NOP2
546 .else
547 pushl $0 # Dummy error code, to make stack frame uniform
548 .endif
549 pushl $i # 20(%esp) Vector number
550 jmp early_idt_handler
551 i = i + 1
552 .endr
553ENDPROC(early_idt_handlers)
554
555 /* This is global to keep gas from relaxing the jumps */
556ENTRY(early_idt_handler)
557 cld
558 cmpl $2,%ss:early_recursion_flag
559 je hlt_loop
560 incl %ss:early_recursion_flag
546 561
547early_illegal_opcode: 562 push %eax # 16(%esp)
548 movl $6,%edx 563 push %ecx # 12(%esp)
549 pushl $0 /* fake errcode */ 564 push %edx # 8(%esp)
550 jmp early_fault 565 push %ds # 4(%esp)
566 push %es # 0(%esp)
567 movl $(__KERNEL_DS),%eax
568 movl %eax,%ds
569 movl %eax,%es
551 570
552early_protection_fault: 571 cmpl $(__KERNEL_CS),32(%esp)
553 movl $13,%edx 572 jne 10f
554 jmp early_fault
555 573
556early_page_fault: 574 leal 28(%esp),%eax # Pointer to %eip
557 movl $14,%edx 575 call early_fixup_exception
558 jmp early_fault 576 andl %eax,%eax
577 jnz ex_entry /* found an exception entry */
559 578
560early_fault: 57910:
561 cld
562#ifdef CONFIG_PRINTK 580#ifdef CONFIG_PRINTK
563 pusha 581 xorl %eax,%eax
564 movl $(__KERNEL_DS),%eax 582 movw %ax,2(%esp) /* clean up the segment values on some cpus */
565 movl %eax,%ds 583 movw %ax,6(%esp)
566 movl %eax,%es 584 movw %ax,34(%esp)
567 cmpl $2,early_recursion_flag 585 leal 40(%esp),%eax
568 je hlt_loop 586 pushl %eax /* %esp before the exception */
569 incl early_recursion_flag 587 pushl %ebx
588 pushl %ebp
589 pushl %esi
590 pushl %edi
570 movl %cr2,%eax 591 movl %cr2,%eax
571 pushl %eax 592 pushl %eax
572 pushl %edx /* trapno */ 593 pushl (20+6*4)(%esp) /* trapno */
573 pushl $fault_msg 594 pushl $fault_msg
574 call printk 595 call printk
575#endif 596#endif
@@ -578,6 +599,17 @@ hlt_loop:
578 hlt 599 hlt
579 jmp hlt_loop 600 jmp hlt_loop
580 601
602ex_entry:
603 pop %es
604 pop %ds
605 pop %edx
606 pop %ecx
607 pop %eax
608 addl $8,%esp /* drop vector number and error code */
609 decl %ss:early_recursion_flag
610 iret
611ENDPROC(early_idt_handler)
612
581/* This is the default interrupt "handler" :-) */ 613/* This is the default interrupt "handler" :-) */
582 ALIGN 614 ALIGN
583ignore_int: 615ignore_int:
@@ -611,13 +643,18 @@ ignore_int:
611 popl %eax 643 popl %eax
612#endif 644#endif
613 iret 645 iret
646ENDPROC(ignore_int)
647__INITDATA
648 .align 4
649early_recursion_flag:
650 .long 0
614 651
615#include "verify_cpu.S" 652__REFDATA
616 653 .align 4
617 __REFDATA
618.align 4
619ENTRY(initial_code) 654ENTRY(initial_code)
620 .long i386_start_kernel 655 .long i386_start_kernel
656ENTRY(setup_once_ref)
657 .long setup_once
621 658
622/* 659/*
623 * BSS section 660 * BSS section
@@ -670,22 +707,19 @@ ENTRY(initial_page_table)
670ENTRY(stack_start) 707ENTRY(stack_start)
671 .long init_thread_union+THREAD_SIZE 708 .long init_thread_union+THREAD_SIZE
672 709
673early_recursion_flag: 710__INITRODATA
674 .long 0
675
676ready: .byte 0
677
678int_msg: 711int_msg:
679 .asciz "Unknown interrupt or fault at: %p %p %p\n" 712 .asciz "Unknown interrupt or fault at: %p %p %p\n"
680 713
681fault_msg: 714fault_msg:
682/* fault info: */ 715/* fault info: */
683 .ascii "BUG: Int %d: CR2 %p\n" 716 .ascii "BUG: Int %d: CR2 %p\n"
684/* pusha regs: */ 717/* regs pushed in early_idt_handler: */
685 .ascii " EDI %p ESI %p EBP %p ESP %p\n" 718 .ascii " EDI %p ESI %p EBP %p EBX %p\n"
686 .ascii " EBX %p EDX %p ECX %p EAX %p\n" 719 .ascii " ESP %p ES %p DS %p\n"
720 .ascii " EDX %p ECX %p EAX %p\n"
687/* fault frame: */ 721/* fault frame: */
688 .ascii " err %p EIP %p CS %p flg %p\n" 722 .ascii " vec %p err %p EIP %p CS %p flg %p\n"
689 .ascii "Stack: %p %p %p %p %p %p %p %p\n" 723 .ascii "Stack: %p %p %p %p %p %p %p %p\n"
690 .ascii " %p %p %p %p %p %p %p %p\n" 724 .ascii " %p %p %p %p %p %p %p %p\n"
691 .asciz " %p %p %p %p %p %p %p %p\n" 725 .asciz " %p %p %p %p %p %p %p %p\n"
@@ -699,6 +733,7 @@ fault_msg:
699 * segment size, and 32-bit linear address value: 733 * segment size, and 32-bit linear address value:
700 */ 734 */
701 735
736 .data
702.globl boot_gdt_descr 737.globl boot_gdt_descr
703.globl idt_descr 738.globl idt_descr
704 739
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 40f4eb3766d1..7a40f2447321 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -19,12 +19,15 @@
19#include <asm/cache.h> 19#include <asm/cache.h>
20#include <asm/processor-flags.h> 20#include <asm/processor-flags.h>
21#include <asm/percpu.h> 21#include <asm/percpu.h>
22#include <asm/nops.h>
22 23
23#ifdef CONFIG_PARAVIRT 24#ifdef CONFIG_PARAVIRT
24#include <asm/asm-offsets.h> 25#include <asm/asm-offsets.h>
25#include <asm/paravirt.h> 26#include <asm/paravirt.h>
27#define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg
26#else 28#else
27#define GET_CR2_INTO_RCX movq %cr2, %rcx 29#define GET_CR2_INTO(reg) movq %cr2, reg
30#define INTERRUPT_RETURN iretq
28#endif 31#endif
29 32
30/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE 33/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE
@@ -270,36 +273,56 @@ bad_address:
270 jmp bad_address 273 jmp bad_address
271 274
272 .section ".init.text","ax" 275 .section ".init.text","ax"
273#ifdef CONFIG_EARLY_PRINTK
274 .globl early_idt_handlers 276 .globl early_idt_handlers
275early_idt_handlers: 277early_idt_handlers:
278 # 104(%rsp) %rflags
279 # 96(%rsp) %cs
280 # 88(%rsp) %rip
281 # 80(%rsp) error code
276 i = 0 282 i = 0
277 .rept NUM_EXCEPTION_VECTORS 283 .rept NUM_EXCEPTION_VECTORS
278 movl $i, %esi 284 .if (EXCEPTION_ERRCODE_MASK >> i) & 1
285 ASM_NOP2
286 .else
287 pushq $0 # Dummy error code, to make stack frame uniform
288 .endif
289 pushq $i # 72(%rsp) Vector number
279 jmp early_idt_handler 290 jmp early_idt_handler
280 i = i + 1 291 i = i + 1
281 .endr 292 .endr
282#endif
283 293
284ENTRY(early_idt_handler) 294ENTRY(early_idt_handler)
285#ifdef CONFIG_EARLY_PRINTK 295 cld
296
286 cmpl $2,early_recursion_flag(%rip) 297 cmpl $2,early_recursion_flag(%rip)
287 jz 1f 298 jz 1f
288 incl early_recursion_flag(%rip) 299 incl early_recursion_flag(%rip)
289 GET_CR2_INTO_RCX 300
290 movq %rcx,%r9 301 pushq %rax # 64(%rsp)
291 xorl %r8d,%r8d # zero for error code 302 pushq %rcx # 56(%rsp)
292 movl %esi,%ecx # get vector number 303 pushq %rdx # 48(%rsp)
293 # Test %ecx against mask of vectors that push error code. 304 pushq %rsi # 40(%rsp)
294 cmpl $31,%ecx 305 pushq %rdi # 32(%rsp)
295 ja 0f 306 pushq %r8 # 24(%rsp)
296 movl $1,%eax 307 pushq %r9 # 16(%rsp)
297 salq %cl,%rax 308 pushq %r10 # 8(%rsp)
298 testl $0x27d00,%eax 309 pushq %r11 # 0(%rsp)
299 je 0f 310
300 popq %r8 # get error code 311 cmpl $__KERNEL_CS,96(%rsp)
3010: movq 0(%rsp),%rcx # get ip 312 jne 10f
302 movq 8(%rsp),%rdx # get cs 313
314 leaq 88(%rsp),%rdi # Pointer to %rip
315 call early_fixup_exception
316 andl %eax,%eax
317 jnz 20f # Found an exception entry
318
31910:
320#ifdef CONFIG_EARLY_PRINTK
321 GET_CR2_INTO(%r9) # can clobber any volatile register if pv
322 movl 80(%rsp),%r8d # error code
323 movl 72(%rsp),%esi # vector number
324 movl 96(%rsp),%edx # %cs
325 movq 88(%rsp),%rcx # %rip
303 xorl %eax,%eax 326 xorl %eax,%eax
304 leaq early_idt_msg(%rip),%rdi 327 leaq early_idt_msg(%rip),%rdi
305 call early_printk 328 call early_printk
@@ -308,17 +331,32 @@ ENTRY(early_idt_handler)
308 call dump_stack 331 call dump_stack
309#ifdef CONFIG_KALLSYMS 332#ifdef CONFIG_KALLSYMS
310 leaq early_idt_ripmsg(%rip),%rdi 333 leaq early_idt_ripmsg(%rip),%rdi
311 movq 0(%rsp),%rsi # get rip again 334 movq 40(%rsp),%rsi # %rip again
312 call __print_symbol 335 call __print_symbol
313#endif 336#endif
314#endif /* EARLY_PRINTK */ 337#endif /* EARLY_PRINTK */
3151: hlt 3381: hlt
316 jmp 1b 339 jmp 1b
317 340
318#ifdef CONFIG_EARLY_PRINTK 34120: # Exception table entry found
342 popq %r11
343 popq %r10
344 popq %r9
345 popq %r8
346 popq %rdi
347 popq %rsi
348 popq %rdx
349 popq %rcx
350 popq %rax
351 addq $16,%rsp # drop vector number and error code
352 decl early_recursion_flag(%rip)
353 INTERRUPT_RETURN
354
355 .balign 4
319early_recursion_flag: 356early_recursion_flag:
320 .long 0 357 .long 0
321 358
359#ifdef CONFIG_EARLY_PRINTK
322early_idt_msg: 360early_idt_msg:
323 .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n" 361 .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
324early_idt_ripmsg: 362early_idt_ripmsg:
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 2d6e6498c176..f250431fb505 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -88,7 +88,7 @@ void kernel_fpu_begin(void)
88 __thread_clear_has_fpu(me); 88 __thread_clear_has_fpu(me);
89 /* We do 'stts()' in kernel_fpu_end() */ 89 /* We do 'stts()' in kernel_fpu_end() */
90 } else { 90 } else {
91 percpu_write(fpu_owner_task, NULL); 91 this_cpu_write(fpu_owner_task, NULL);
92 clts(); 92 clts();
93 } 93 }
94} 94}
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
deleted file mode 100644
index 43e9ccf44947..000000000000
--- a/arch/x86/kernel/init_task.c
+++ /dev/null
@@ -1,42 +0,0 @@
1#include <linux/mm.h>
2#include <linux/module.h>
3#include <linux/sched.h>
4#include <linux/init.h>
5#include <linux/init_task.h>
6#include <linux/fs.h>
7#include <linux/mqueue.h>
8
9#include <asm/uaccess.h>
10#include <asm/pgtable.h>
11#include <asm/desc.h>
12
13static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
14static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
15
16/*
17 * Initial thread structure.
18 *
19 * We need to make sure that this is THREAD_SIZE aligned due to the
20 * way process stacks are handled. This is done by having a special
21 * "init_task" linker map entry..
22 */
23union thread_union init_thread_union __init_task_data =
24 { INIT_THREAD_INFO(init_task) };
25
26/*
27 * Initial task structure.
28 *
29 * All other task structs will be allocated on slabs in fork.c
30 */
31struct task_struct init_task = INIT_TASK(init_task);
32EXPORT_SYMBOL(init_task);
33
34/*
35 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
36 * no more per-task TSS's. The TSS size is kept cacheline-aligned
37 * so they are allowed to end up in the .data..cacheline_aligned
38 * section. Since TSS's are completely CPU-local, we want them
39 * on exact cacheline boundaries, to eliminate cacheline ping-pong.
40 */
41DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
42
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 58b7f27cb3e9..344faf8d0d62 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -127,8 +127,8 @@ void __cpuinit irq_ctx_init(int cpu)
127 return; 127 return;
128 128
129 irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), 129 irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
130 THREAD_FLAGS, 130 THREADINFO_GFP,
131 THREAD_ORDER)); 131 THREAD_SIZE_ORDER));
132 memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); 132 memset(&irqctx->tinfo, 0, sizeof(struct thread_info));
133 irqctx->tinfo.cpu = cpu; 133 irqctx->tinfo.cpu = cpu;
134 irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; 134 irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
@@ -137,8 +137,8 @@ void __cpuinit irq_ctx_init(int cpu)
137 per_cpu(hardirq_ctx, cpu) = irqctx; 137 per_cpu(hardirq_ctx, cpu) = irqctx;
138 138
139 irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), 139 irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
140 THREAD_FLAGS, 140 THREADINFO_GFP,
141 THREAD_ORDER)); 141 THREAD_SIZE_ORDER));
142 memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); 142 memset(&irqctx->tinfo, 0, sizeof(struct thread_info));
143 irqctx->tinfo.cpu = cpu; 143 irqctx->tinfo.cpu = cpu;
144 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); 144 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e213fc8408d2..e2f751efb7b1 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1037,9 +1037,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1037 "current sp %p does not match saved sp %p\n", 1037 "current sp %p does not match saved sp %p\n",
1038 stack_addr(regs), kcb->jprobe_saved_sp); 1038 stack_addr(regs), kcb->jprobe_saved_sp);
1039 printk(KERN_ERR "Saved registers for jprobe %p\n", jp); 1039 printk(KERN_ERR "Saved registers for jprobe %p\n", jp);
1040 show_registers(saved_regs); 1040 show_regs(saved_regs);
1041 printk(KERN_ERR "Current registers\n"); 1041 printk(KERN_ERR "Current registers\n");
1042 show_registers(regs); 1042 show_regs(regs);
1043 BUG(); 1043 BUG();
1044 } 1044 }
1045 *regs = kcb->jprobe_saved_regs; 1045 *regs = kcb->jprobe_saved_regs;
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index c9bda6d6035c..fbdfc6917180 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -299,12 +299,11 @@ static ssize_t reload_store(struct device *dev,
299{ 299{
300 unsigned long val; 300 unsigned long val;
301 int cpu = dev->id; 301 int cpu = dev->id;
302 int ret = 0; 302 ssize_t ret = 0;
303 char *end;
304 303
305 val = simple_strtoul(buf, &end, 0); 304 ret = kstrtoul(buf, 0, &val);
306 if (end == buf) 305 if (ret)
307 return -EINVAL; 306 return ret;
308 307
309 if (val == 1) { 308 if (val == 1) {
310 get_online_cpus(); 309 get_online_cpus();
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 47acaf319165..bffdfd48c1f2 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -31,14 +31,6 @@
31#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/x86_init.h> 32#include <asm/x86_init.h>
33 33
34#define NMI_MAX_NAMELEN 16
35struct nmiaction {
36 struct list_head list;
37 nmi_handler_t handler;
38 unsigned int flags;
39 char *name;
40};
41
42struct nmi_desc { 34struct nmi_desc {
43 spinlock_t lock; 35 spinlock_t lock;
44 struct list_head head; 36 struct list_head head;
@@ -54,6 +46,14 @@ static struct nmi_desc nmi_desc[NMI_MAX] =
54 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock), 46 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
55 .head = LIST_HEAD_INIT(nmi_desc[1].head), 47 .head = LIST_HEAD_INIT(nmi_desc[1].head),
56 }, 48 },
49 {
50 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
51 .head = LIST_HEAD_INIT(nmi_desc[2].head),
52 },
53 {
54 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
55 .head = LIST_HEAD_INIT(nmi_desc[3].head),
56 },
57 57
58}; 58};
59 59
@@ -84,7 +84,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
84 84
85#define nmi_to_desc(type) (&nmi_desc[type]) 85#define nmi_to_desc(type) (&nmi_desc[type])
86 86
87static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) 87static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
88{ 88{
89 struct nmi_desc *desc = nmi_to_desc(type); 89 struct nmi_desc *desc = nmi_to_desc(type);
90 struct nmiaction *a; 90 struct nmiaction *a;
@@ -107,11 +107,14 @@ static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs,
107 return handled; 107 return handled;
108} 108}
109 109
110static int __setup_nmi(unsigned int type, struct nmiaction *action) 110int __register_nmi_handler(unsigned int type, struct nmiaction *action)
111{ 111{
112 struct nmi_desc *desc = nmi_to_desc(type); 112 struct nmi_desc *desc = nmi_to_desc(type);
113 unsigned long flags; 113 unsigned long flags;
114 114
115 if (!action->handler)
116 return -EINVAL;
117
115 spin_lock_irqsave(&desc->lock, flags); 118 spin_lock_irqsave(&desc->lock, flags);
116 119
117 /* 120 /*
@@ -120,6 +123,8 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action)
120 * to manage expectations 123 * to manage expectations
121 */ 124 */
122 WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); 125 WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
126 WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
127 WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
123 128
124 /* 129 /*
125 * some handlers need to be executed first otherwise a fake 130 * some handlers need to be executed first otherwise a fake
@@ -133,8 +138,9 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action)
133 spin_unlock_irqrestore(&desc->lock, flags); 138 spin_unlock_irqrestore(&desc->lock, flags);
134 return 0; 139 return 0;
135} 140}
141EXPORT_SYMBOL(__register_nmi_handler);
136 142
137static struct nmiaction *__free_nmi(unsigned int type, const char *name) 143void unregister_nmi_handler(unsigned int type, const char *name)
138{ 144{
139 struct nmi_desc *desc = nmi_to_desc(type); 145 struct nmi_desc *desc = nmi_to_desc(type);
140 struct nmiaction *n; 146 struct nmiaction *n;
@@ -157,61 +163,16 @@ static struct nmiaction *__free_nmi(unsigned int type, const char *name)
157 163
158 spin_unlock_irqrestore(&desc->lock, flags); 164 spin_unlock_irqrestore(&desc->lock, flags);
159 synchronize_rcu(); 165 synchronize_rcu();
160 return (n);
161} 166}
162
163int register_nmi_handler(unsigned int type, nmi_handler_t handler,
164 unsigned long nmiflags, const char *devname)
165{
166 struct nmiaction *action;
167 int retval = -ENOMEM;
168
169 if (!handler)
170 return -EINVAL;
171
172 action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL);
173 if (!action)
174 goto fail_action;
175
176 action->handler = handler;
177 action->flags = nmiflags;
178 action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL);
179 if (!action->name)
180 goto fail_action_name;
181
182 retval = __setup_nmi(type, action);
183
184 if (retval)
185 goto fail_setup_nmi;
186
187 return retval;
188
189fail_setup_nmi:
190 kfree(action->name);
191fail_action_name:
192 kfree(action);
193fail_action:
194
195 return retval;
196}
197EXPORT_SYMBOL_GPL(register_nmi_handler);
198
199void unregister_nmi_handler(unsigned int type, const char *name)
200{
201 struct nmiaction *a;
202
203 a = __free_nmi(type, name);
204 if (a) {
205 kfree(a->name);
206 kfree(a);
207 }
208}
209
210EXPORT_SYMBOL_GPL(unregister_nmi_handler); 167EXPORT_SYMBOL_GPL(unregister_nmi_handler);
211 168
212static notrace __kprobes void 169static __kprobes void
213pci_serr_error(unsigned char reason, struct pt_regs *regs) 170pci_serr_error(unsigned char reason, struct pt_regs *regs)
214{ 171{
172 /* check to see if anyone registered against these types of errors */
173 if (nmi_handle(NMI_SERR, regs, false))
174 return;
175
215 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", 176 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
216 reason, smp_processor_id()); 177 reason, smp_processor_id());
217 178
@@ -236,15 +197,19 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
236 outb(reason, NMI_REASON_PORT); 197 outb(reason, NMI_REASON_PORT);
237} 198}
238 199
239static notrace __kprobes void 200static __kprobes void
240io_check_error(unsigned char reason, struct pt_regs *regs) 201io_check_error(unsigned char reason, struct pt_regs *regs)
241{ 202{
242 unsigned long i; 203 unsigned long i;
243 204
205 /* check to see if anyone registered against these types of errors */
206 if (nmi_handle(NMI_IO_CHECK, regs, false))
207 return;
208
244 pr_emerg( 209 pr_emerg(
245 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", 210 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
246 reason, smp_processor_id()); 211 reason, smp_processor_id());
247 show_registers(regs); 212 show_regs(regs);
248 213
249 if (panic_on_io_nmi) 214 if (panic_on_io_nmi)
250 panic("NMI IOCK error: Not continuing"); 215 panic("NMI IOCK error: Not continuing");
@@ -263,7 +228,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
263 outb(reason, NMI_REASON_PORT); 228 outb(reason, NMI_REASON_PORT);
264} 229}
265 230
266static notrace __kprobes void 231static __kprobes void
267unknown_nmi_error(unsigned char reason, struct pt_regs *regs) 232unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
268{ 233{
269 int handled; 234 int handled;
@@ -305,7 +270,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
305static DEFINE_PER_CPU(bool, swallow_nmi); 270static DEFINE_PER_CPU(bool, swallow_nmi);
306static DEFINE_PER_CPU(unsigned long, last_nmi_rip); 271static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
307 272
308static notrace __kprobes void default_do_nmi(struct pt_regs *regs) 273static __kprobes void default_do_nmi(struct pt_regs *regs)
309{ 274{
310 unsigned char reason = 0; 275 unsigned char reason = 0;
311 int handled; 276 int handled;
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index 2c39dcd510fa..e31bf8d5c4d2 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -13,6 +13,7 @@
13#include <linux/cpumask.h> 13#include <linux/cpumask.h>
14#include <linux/delay.h> 14#include <linux/delay.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/percpu.h>
16 17
17#include <asm/apic.h> 18#include <asm/apic.h>
18#include <asm/nmi.h> 19#include <asm/nmi.h>
@@ -117,15 +118,15 @@ static void __init dotest(void (*testcase_fn)(void), int expected)
117 unexpected_testcase_failures++; 118 unexpected_testcase_failures++;
118 119
119 if (nmi_fail == FAILURE) 120 if (nmi_fail == FAILURE)
120 printk("FAILED |"); 121 printk(KERN_CONT "FAILED |");
121 else if (nmi_fail == TIMEOUT) 122 else if (nmi_fail == TIMEOUT)
122 printk("TIMEOUT|"); 123 printk(KERN_CONT "TIMEOUT|");
123 else 124 else
124 printk("ERROR |"); 125 printk(KERN_CONT "ERROR |");
125 dump_stack(); 126 dump_stack();
126 } else { 127 } else {
127 testcase_successes++; 128 testcase_successes++;
128 printk(" ok |"); 129 printk(KERN_CONT " ok |");
129 } 130 }
130 testcase_total++; 131 testcase_total++;
131 132
@@ -150,10 +151,10 @@ void __init nmi_selftest(void)
150 151
151 print_testname("remote IPI"); 152 print_testname("remote IPI");
152 dotest(remote_ipi, SUCCESS); 153 dotest(remote_ipi, SUCCESS);
153 printk("\n"); 154 printk(KERN_CONT "\n");
154 print_testname("local IPI"); 155 print_testname("local IPI");
155 dotest(local_ipi, SUCCESS); 156 dotest(local_ipi, SUCCESS);
156 printk("\n"); 157 printk(KERN_CONT "\n");
157 158
158 cleanup_nmi_testsuite(); 159 cleanup_nmi_testsuite();
159 160
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index ab137605e694..9ce885996fd7 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -241,16 +241,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA
241 241
242static inline void enter_lazy(enum paravirt_lazy_mode mode) 242static inline void enter_lazy(enum paravirt_lazy_mode mode)
243{ 243{
244 BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); 244 BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
245 245
246 percpu_write(paravirt_lazy_mode, mode); 246 this_cpu_write(paravirt_lazy_mode, mode);
247} 247}
248 248
249static void leave_lazy(enum paravirt_lazy_mode mode) 249static void leave_lazy(enum paravirt_lazy_mode mode)
250{ 250{
251 BUG_ON(percpu_read(paravirt_lazy_mode) != mode); 251 BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode);
252 252
253 percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); 253 this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
254} 254}
255 255
256void paravirt_enter_lazy_mmu(void) 256void paravirt_enter_lazy_mmu(void)
@@ -267,7 +267,7 @@ void paravirt_start_context_switch(struct task_struct *prev)
267{ 267{
268 BUG_ON(preemptible()); 268 BUG_ON(preemptible());
269 269
270 if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { 270 if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
271 arch_leave_lazy_mmu_mode(); 271 arch_leave_lazy_mmu_mode();
272 set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); 272 set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
273 } 273 }
@@ -289,7 +289,7 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
289 if (in_interrupt()) 289 if (in_interrupt())
290 return PARAVIRT_LAZY_NONE; 290 return PARAVIRT_LAZY_NONE;
291 291
292 return percpu_read(paravirt_lazy_mode); 292 return this_cpu_read(paravirt_lazy_mode);
293} 293}
294 294
295void arch_flush_lazy_mmu_mode(void) 295void arch_flush_lazy_mmu_mode(void)
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index d0b2fb9ccbb1..b72838bae64a 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1480,8 +1480,9 @@ cleanup:
1480static int __init calgary_parse_options(char *p) 1480static int __init calgary_parse_options(char *p)
1481{ 1481{
1482 unsigned int bridge; 1482 unsigned int bridge;
1483 unsigned long val;
1483 size_t len; 1484 size_t len;
1484 char* endp; 1485 ssize_t ret;
1485 1486
1486 while (*p) { 1487 while (*p) {
1487 if (!strncmp(p, "64k", 3)) 1488 if (!strncmp(p, "64k", 3))
@@ -1512,10 +1513,11 @@ static int __init calgary_parse_options(char *p)
1512 ++p; 1513 ++p;
1513 if (*p == '\0') 1514 if (*p == '\0')
1514 break; 1515 break;
1515 bridge = simple_strtoul(p, &endp, 0); 1516 ret = kstrtoul(p, 0, &val);
1516 if (p == endp) 1517 if (ret)
1517 break; 1518 break;
1518 1519
1520 bridge = val;
1519 if (bridge < MAX_PHB_BUS_NUM) { 1521 if (bridge < MAX_PHB_BUS_NUM) {
1520 printk(KERN_INFO "Calgary: disabling " 1522 printk(KERN_INFO "Calgary: disabling "
1521 "translation for PHB %#x\n", bridge); 1523 "translation for PHB %#x\n", bridge);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 1d92a5ab6e8b..735279e54e59 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -27,6 +27,15 @@
27#include <asm/debugreg.h> 27#include <asm/debugreg.h>
28#include <asm/nmi.h> 28#include <asm/nmi.h>
29 29
30/*
31 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
32 * no more per-task TSS's. The TSS size is kept cacheline-aligned
33 * so they are allowed to end up in the .data..cacheline_aligned
34 * section. Since TSS's are completely CPU-local, we want them
35 * on exact cacheline boundaries, to eliminate cacheline ping-pong.
36 */
37DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
38
30#ifdef CONFIG_X86_64 39#ifdef CONFIG_X86_64
31static DEFINE_PER_CPU(unsigned char, is_idle); 40static DEFINE_PER_CPU(unsigned char, is_idle);
32static ATOMIC_NOTIFIER_HEAD(idle_notifier); 41static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -47,10 +56,16 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
47struct kmem_cache *task_xstate_cachep; 56struct kmem_cache *task_xstate_cachep;
48EXPORT_SYMBOL_GPL(task_xstate_cachep); 57EXPORT_SYMBOL_GPL(task_xstate_cachep);
49 58
59/*
60 * this gets called so that we can store lazy state into memory and copy the
61 * current task into the new thread.
62 */
50int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 63int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
51{ 64{
52 int ret; 65 int ret;
53 66
67 unlazy_fpu(src);
68
54 *dst = *src; 69 *dst = *src;
55 if (fpu_allocated(&src->thread.fpu)) { 70 if (fpu_allocated(&src->thread.fpu)) {
56 memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); 71 memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
@@ -67,10 +82,9 @@ void free_thread_xstate(struct task_struct *tsk)
67 fpu_free(&tsk->thread.fpu); 82 fpu_free(&tsk->thread.fpu);
68} 83}
69 84
70void free_thread_info(struct thread_info *ti) 85void arch_release_task_struct(struct task_struct *tsk)
71{ 86{
72 free_thread_xstate(ti->task); 87 free_thread_xstate(tsk);
73 free_pages((unsigned long)ti, THREAD_ORDER);
74} 88}
75 89
76void arch_task_cache_init(void) 90void arch_task_cache_init(void)
@@ -81,6 +95,16 @@ void arch_task_cache_init(void)
81 SLAB_PANIC | SLAB_NOTRACK, NULL); 95 SLAB_PANIC | SLAB_NOTRACK, NULL);
82} 96}
83 97
98static inline void drop_fpu(struct task_struct *tsk)
99{
100 /*
101 * Forget coprocessor state..
102 */
103 tsk->fpu_counter = 0;
104 clear_fpu(tsk);
105 clear_used_math();
106}
107
84/* 108/*
85 * Free current thread data structures etc.. 109 * Free current thread data structures etc..
86 */ 110 */
@@ -103,12 +127,8 @@ void exit_thread(void)
103 put_cpu(); 127 put_cpu();
104 kfree(bp); 128 kfree(bp);
105 } 129 }
106}
107 130
108void show_regs(struct pt_regs *regs) 131 drop_fpu(me);
109{
110 show_registers(regs);
111 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 0);
112} 132}
113 133
114void show_regs_common(void) 134void show_regs_common(void)
@@ -143,12 +163,7 @@ void flush_thread(void)
143 163
144 flush_ptrace_hw_breakpoint(tsk); 164 flush_ptrace_hw_breakpoint(tsk);
145 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 165 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
146 /* 166 drop_fpu(tsk);
147 * Forget coprocessor state..
148 */
149 tsk->fpu_counter = 0;
150 clear_fpu(tsk);
151 clear_used_math();
152} 167}
153 168
154static void hard_disable_TSC(void) 169static void hard_disable_TSC(void)
@@ -377,7 +392,7 @@ static inline void play_dead(void)
377#ifdef CONFIG_X86_64 392#ifdef CONFIG_X86_64
378void enter_idle(void) 393void enter_idle(void)
379{ 394{
380 percpu_write(is_idle, 1); 395 this_cpu_write(is_idle, 1);
381 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 396 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
382} 397}
383 398
@@ -516,26 +531,6 @@ void stop_this_cpu(void *dummy)
516 } 531 }
517} 532}
518 533
519static void do_nothing(void *unused)
520{
521}
522
523/*
524 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
525 * pm_idle and update to new pm_idle value. Required while changing pm_idle
526 * handler on SMP systems.
527 *
528 * Caller must have changed pm_idle to the new value before the call. Old
529 * pm_idle value will not be used by any CPU after the return of this function.
530 */
531void cpu_idle_wait(void)
532{
533 smp_mb();
534 /* kick all the CPUs so that they exit out of pm_idle */
535 smp_call_function(do_nothing, NULL, 1);
536}
537EXPORT_SYMBOL_GPL(cpu_idle_wait);
538
539/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 534/* Default MONITOR/MWAIT with no hints, used for default C1 state */
540static void mwait_idle(void) 535static void mwait_idle(void)
541{ 536{
@@ -594,9 +589,17 @@ int mwait_usable(const struct cpuinfo_x86 *c)
594{ 589{
595 u32 eax, ebx, ecx, edx; 590 u32 eax, ebx, ecx, edx;
596 591
592 /* Use mwait if idle=mwait boot option is given */
597 if (boot_option_idle_override == IDLE_FORCE_MWAIT) 593 if (boot_option_idle_override == IDLE_FORCE_MWAIT)
598 return 1; 594 return 1;
599 595
596 /*
597 * Any idle= boot option other than idle=mwait means that we must not
598 * use mwait. Eg: idle=halt or idle=poll or idle=nomwait
599 */
600 if (boot_option_idle_override != IDLE_NO_OVERRIDE)
601 return 0;
602
600 if (c->cpuid_level < MWAIT_INFO) 603 if (c->cpuid_level < MWAIT_INFO)
601 return 0; 604 return 0;
602 605
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index ae6847303e26..516fa186121b 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -126,15 +126,6 @@ void release_thread(struct task_struct *dead_task)
126 release_vm86_irqs(dead_task); 126 release_vm86_irqs(dead_task);
127} 127}
128 128
129/*
130 * This gets called before we allocate a new thread and copy
131 * the current task into it.
132 */
133void prepare_to_copy(struct task_struct *tsk)
134{
135 unlazy_fpu(tsk);
136}
137
138int copy_thread(unsigned long clone_flags, unsigned long sp, 129int copy_thread(unsigned long clone_flags, unsigned long sp,
139 unsigned long unused, 130 unsigned long unused,
140 struct task_struct *p, struct pt_regs *regs) 131 struct task_struct *p, struct pt_regs *regs)
@@ -302,7 +293,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
302 293
303 switch_fpu_finish(next_p, fpu); 294 switch_fpu_finish(next_p, fpu);
304 295
305 percpu_write(current_task, next_p); 296 this_cpu_write(current_task, next_p);
306 297
307 return prev_p; 298 return prev_p;
308} 299}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 43d8b48b23e6..61cdf7fdf099 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -145,15 +145,6 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
145 return get_desc_base(&t->thread.tls_array[tls]); 145 return get_desc_base(&t->thread.tls_array[tls]);
146} 146}
147 147
148/*
149 * This gets called before we allocate a new thread and copy
150 * the current task into it.
151 */
152void prepare_to_copy(struct task_struct *tsk)
153{
154 unlazy_fpu(tsk);
155}
156
157int copy_thread(unsigned long clone_flags, unsigned long sp, 148int copy_thread(unsigned long clone_flags, unsigned long sp,
158 unsigned long unused, 149 unsigned long unused,
159 struct task_struct *p, struct pt_regs *regs) 150 struct task_struct *p, struct pt_regs *regs)
@@ -237,7 +228,7 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
237 current->thread.usersp = new_sp; 228 current->thread.usersp = new_sp;
238 regs->ip = new_ip; 229 regs->ip = new_ip;
239 regs->sp = new_sp; 230 regs->sp = new_sp;
240 percpu_write(old_rsp, new_sp); 231 this_cpu_write(old_rsp, new_sp);
241 regs->cs = _cs; 232 regs->cs = _cs;
242 regs->ss = _ss; 233 regs->ss = _ss;
243 regs->flags = X86_EFLAGS_IF; 234 regs->flags = X86_EFLAGS_IF;
@@ -359,11 +350,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
359 /* 350 /*
360 * Switch the PDA and FPU contexts. 351 * Switch the PDA and FPU contexts.
361 */ 352 */
362 prev->usersp = percpu_read(old_rsp); 353 prev->usersp = this_cpu_read(old_rsp);
363 percpu_write(old_rsp, next->usersp); 354 this_cpu_write(old_rsp, next->usersp);
364 percpu_write(current_task, next_p); 355 this_cpu_write(current_task, next_p);
365 356
366 percpu_write(kernel_stack, 357 this_cpu_write(kernel_stack,
367 (unsigned long)task_stack_page(next_p) + 358 (unsigned long)task_stack_page(next_p) +
368 THREAD_SIZE - KERNEL_STACK_OFFSET); 359 THREAD_SIZE - KERNEL_STACK_OFFSET);
369 360
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 685845cf16e0..13b1990c7c58 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1480,7 +1480,11 @@ long syscall_trace_enter(struct pt_regs *regs)
1480 regs->flags |= X86_EFLAGS_TF; 1480 regs->flags |= X86_EFLAGS_TF;
1481 1481
1482 /* do the secure computing check first */ 1482 /* do the secure computing check first */
1483 secure_computing(regs->orig_ax); 1483 if (secure_computing(regs->orig_ax)) {
1484 /* seccomp failures shouldn't expose any additional code. */
1485 ret = -1L;
1486 goto out;
1487 }
1484 1488
1485 if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) 1489 if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
1486 ret = -1L; 1490 ret = -1L;
@@ -1505,6 +1509,7 @@ long syscall_trace_enter(struct pt_regs *regs)
1505 regs->dx, regs->r10); 1509 regs->dx, regs->r10);
1506#endif 1510#endif
1507 1511
1512out:
1508 return ret ?: regs->orig_ax; 1513 return ret ?: regs->orig_ax;
1509} 1514}
1510 1515
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index d840e69a853c..77215c23fba1 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -39,7 +39,8 @@ static int reboot_mode;
39enum reboot_type reboot_type = BOOT_ACPI; 39enum reboot_type reboot_type = BOOT_ACPI;
40int reboot_force; 40int reboot_force;
41 41
42/* This variable is used privately to keep track of whether or not 42/*
43 * This variable is used privately to keep track of whether or not
43 * reboot_type is still set to its default value (i.e., reboot= hasn't 44 * reboot_type is still set to its default value (i.e., reboot= hasn't
44 * been set on the command line). This is needed so that we can 45 * been set on the command line). This is needed so that we can
45 * suppress DMI scanning for reboot quirks. Without it, it's 46 * suppress DMI scanning for reboot quirks. Without it, it's
@@ -51,7 +52,8 @@ static int reboot_default = 1;
51static int reboot_cpu = -1; 52static int reboot_cpu = -1;
52#endif 53#endif
53 54
54/* This is set if we need to go through the 'emergency' path. 55/*
56 * This is set if we need to go through the 'emergency' path.
55 * When machine_emergency_restart() is called, we may be on 57 * When machine_emergency_restart() is called, we may be on
56 * an inconsistent state and won't be able to do a clean cleanup 58 * an inconsistent state and won't be able to do a clean cleanup
57 */ 59 */
@@ -60,22 +62,24 @@ static int reboot_emergency;
60/* This is set by the PCI code if either type 1 or type 2 PCI is detected */ 62/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
61bool port_cf9_safe = false; 63bool port_cf9_safe = false;
62 64
63/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] 65/*
64 warm Don't set the cold reboot flag 66 * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
65 cold Set the cold reboot flag 67 * warm Don't set the cold reboot flag
66 bios Reboot by jumping through the BIOS (only for X86_32) 68 * cold Set the cold reboot flag
67 smp Reboot by executing reset on BSP or other CPU (only for X86_32) 69 * bios Reboot by jumping through the BIOS (only for X86_32)
68 triple Force a triple fault (init) 70 * smp Reboot by executing reset on BSP or other CPU (only for X86_32)
69 kbd Use the keyboard controller. cold reset (default) 71 * triple Force a triple fault (init)
70 acpi Use the RESET_REG in the FADT 72 * kbd Use the keyboard controller. cold reset (default)
71 efi Use efi reset_system runtime service 73 * acpi Use the RESET_REG in the FADT
72 pci Use the so-called "PCI reset register", CF9 74 * efi Use efi reset_system runtime service
73 force Avoid anything that could hang. 75 * pci Use the so-called "PCI reset register", CF9
76 * force Avoid anything that could hang.
74 */ 77 */
75static int __init reboot_setup(char *str) 78static int __init reboot_setup(char *str)
76{ 79{
77 for (;;) { 80 for (;;) {
78 /* Having anything passed on the command line via 81 /*
82 * Having anything passed on the command line via
79 * reboot= will cause us to disable DMI checking 83 * reboot= will cause us to disable DMI checking
80 * below. 84 * below.
81 */ 85 */
@@ -98,9 +102,11 @@ static int __init reboot_setup(char *str)
98 if (isdigit(*(str+2))) 102 if (isdigit(*(str+2)))
99 reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0'); 103 reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0');
100 } 104 }
101 /* we will leave sorting out the final value 105 /*
102 when we are ready to reboot, since we might not 106 * We will leave sorting out the final value
103 have detected BSP APIC ID or smp_num_cpu */ 107 * when we are ready to reboot, since we might not
108 * have detected BSP APIC ID or smp_num_cpu
109 */
104 break; 110 break;
105#endif /* CONFIG_SMP */ 111#endif /* CONFIG_SMP */
106 112
@@ -150,6 +156,82 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
150 return 0; 156 return 0;
151} 157}
152 158
159extern const unsigned char machine_real_restart_asm[];
160extern const u64 machine_real_restart_gdt[3];
161
162void machine_real_restart(unsigned int type)
163{
164 void *restart_va;
165 unsigned long restart_pa;
166 void (*restart_lowmem)(unsigned int);
167 u64 *lowmem_gdt;
168
169 local_irq_disable();
170
171 /*
172 * Write zero to CMOS register number 0x0f, which the BIOS POST
173 * routine will recognize as telling it to do a proper reboot. (Well
174 * that's what this book in front of me says -- it may only apply to
175 * the Phoenix BIOS though, it's not clear). At the same time,
176 * disable NMIs by setting the top bit in the CMOS address register,
177 * as we're about to do peculiar things to the CPU. I'm not sure if
178 * `outb_p' is needed instead of just `outb'. Use it to be on the
179 * safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.)
180 */
181 spin_lock(&rtc_lock);
182 CMOS_WRITE(0x00, 0x8f);
183 spin_unlock(&rtc_lock);
184
185 /*
186 * Switch back to the initial page table.
187 */
188 load_cr3(initial_page_table);
189
190 /*
191 * Write 0x1234 to absolute memory location 0x472. The BIOS reads
192 * this on booting to tell it to "Bypass memory test (also warm
193 * boot)". This seems like a fairly standard thing that gets set by
194 * REBOOT.COM programs, and the previous reset routine did this
195 * too. */
196 *((unsigned short *)0x472) = reboot_mode;
197
198 /* Patch the GDT in the low memory trampoline */
199 lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt);
200
201 restart_va = TRAMPOLINE_SYM(machine_real_restart_asm);
202 restart_pa = virt_to_phys(restart_va);
203 restart_lowmem = (void (*)(unsigned int))restart_pa;
204
205 /* GDT[0]: GDT self-pointer */
206 lowmem_gdt[0] =
207 (u64)(sizeof(machine_real_restart_gdt) - 1) +
208 ((u64)virt_to_phys(lowmem_gdt) << 16);
209 /* GDT[1]: 64K real mode code segment */
210 lowmem_gdt[1] =
211 GDT_ENTRY(0x009b, restart_pa, 0xffff);
212
213 /* Jump to the identity-mapped low memory code */
214 restart_lowmem(type);
215}
216#ifdef CONFIG_APM_MODULE
217EXPORT_SYMBOL(machine_real_restart);
218#endif
219
220#endif /* CONFIG_X86_32 */
221
222/*
223 * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
224 */
225static int __init set_pci_reboot(const struct dmi_system_id *d)
226{
227 if (reboot_type != BOOT_CF9) {
228 reboot_type = BOOT_CF9;
229 printk(KERN_INFO "%s series board detected. "
230 "Selecting PCI-method for reboots.\n", d->ident);
231 }
232 return 0;
233}
234
153static int __init set_kbd_reboot(const struct dmi_system_id *d) 235static int __init set_kbd_reboot(const struct dmi_system_id *d)
154{ 236{
155 if (reboot_type != BOOT_KBD) { 237 if (reboot_type != BOOT_KBD) {
@@ -159,7 +241,12 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d)
159 return 0; 241 return 0;
160} 242}
161 243
244/*
245 * This is a single dmi_table handling all reboot quirks. Note that
246 * REBOOT_BIOS is only available for 32bit
247 */
162static struct dmi_system_id __initdata reboot_dmi_table[] = { 248static struct dmi_system_id __initdata reboot_dmi_table[] = {
249#ifdef CONFIG_X86_32
163 { /* Handle problems with rebooting on Dell E520's */ 250 { /* Handle problems with rebooting on Dell E520's */
164 .callback = set_bios_reboot, 251 .callback = set_bios_reboot,
165 .ident = "Dell E520", 252 .ident = "Dell E520",
@@ -184,7 +271,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
184 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"), 271 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"),
185 }, 272 },
186 }, 273 },
187 { /* Handle problems with rebooting on Dell Optiplex 745's SFF*/ 274 { /* Handle problems with rebooting on Dell Optiplex 745's SFF */
188 .callback = set_bios_reboot, 275 .callback = set_bios_reboot,
189 .ident = "Dell OptiPlex 745", 276 .ident = "Dell OptiPlex 745",
190 .matches = { 277 .matches = {
@@ -192,7 +279,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
192 DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), 279 DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
193 }, 280 },
194 }, 281 },
195 { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/ 282 { /* Handle problems with rebooting on Dell Optiplex 745's DFF */
196 .callback = set_bios_reboot, 283 .callback = set_bios_reboot,
197 .ident = "Dell OptiPlex 745", 284 .ident = "Dell OptiPlex 745",
198 .matches = { 285 .matches = {
@@ -201,7 +288,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
201 DMI_MATCH(DMI_BOARD_NAME, "0MM599"), 288 DMI_MATCH(DMI_BOARD_NAME, "0MM599"),
202 }, 289 },
203 }, 290 },
204 { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */ 291 { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */
205 .callback = set_bios_reboot, 292 .callback = set_bios_reboot,
206 .ident = "Dell OptiPlex 745", 293 .ident = "Dell OptiPlex 745",
207 .matches = { 294 .matches = {
@@ -210,7 +297,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
210 DMI_MATCH(DMI_BOARD_NAME, "0KW626"), 297 DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
211 }, 298 },
212 }, 299 },
213 { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */ 300 { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */
214 .callback = set_bios_reboot, 301 .callback = set_bios_reboot,
215 .ident = "Dell OptiPlex 330", 302 .ident = "Dell OptiPlex 330",
216 .matches = { 303 .matches = {
@@ -219,7 +306,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
219 DMI_MATCH(DMI_BOARD_NAME, "0KP561"), 306 DMI_MATCH(DMI_BOARD_NAME, "0KP561"),
220 }, 307 },
221 }, 308 },
222 { /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */ 309 { /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */
223 .callback = set_bios_reboot, 310 .callback = set_bios_reboot,
224 .ident = "Dell OptiPlex 360", 311 .ident = "Dell OptiPlex 360",
225 .matches = { 312 .matches = {
@@ -228,7 +315,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
228 DMI_MATCH(DMI_BOARD_NAME, "0T656F"), 315 DMI_MATCH(DMI_BOARD_NAME, "0T656F"),
229 }, 316 },
230 }, 317 },
231 { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G*/ 318 { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G */
232 .callback = set_bios_reboot, 319 .callback = set_bios_reboot,
233 .ident = "Dell OptiPlex 760", 320 .ident = "Dell OptiPlex 760",
234 .matches = { 321 .matches = {
@@ -301,7 +388,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
301 DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), 388 DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"),
302 }, 389 },
303 }, 390 },
304 { /* Handle problems with rebooting on ASUS P4S800 */ 391 { /* Handle problems with rebooting on ASUS P4S800 */
305 .callback = set_bios_reboot, 392 .callback = set_bios_reboot,
306 .ident = "ASUS P4S800", 393 .ident = "ASUS P4S800",
307 .matches = { 394 .matches = {
@@ -309,7 +396,9 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
309 DMI_MATCH(DMI_BOARD_NAME, "P4S800"), 396 DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
310 }, 397 },
311 }, 398 },
312 { /* Handle reboot issue on Acer Aspire one */ 399#endif /* CONFIG_X86_32 */
400
401 { /* Handle reboot issue on Acer Aspire one */
313 .callback = set_kbd_reboot, 402 .callback = set_kbd_reboot,
314 .ident = "Acer Aspire One A110", 403 .ident = "Acer Aspire One A110",
315 .matches = { 404 .matches = {
@@ -317,96 +406,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
317 DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"), 406 DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
318 }, 407 },
319 }, 408 },
320 { }
321};
322
323static int __init reboot_init(void)
324{
325 /* Only do the DMI check if reboot_type hasn't been overridden
326 * on the command line
327 */
328 if (reboot_default) {
329 dmi_check_system(reboot_dmi_table);
330 }
331 return 0;
332}
333core_initcall(reboot_init);
334
335extern const unsigned char machine_real_restart_asm[];
336extern const u64 machine_real_restart_gdt[3];
337
338void machine_real_restart(unsigned int type)
339{
340 void *restart_va;
341 unsigned long restart_pa;
342 void (*restart_lowmem)(unsigned int);
343 u64 *lowmem_gdt;
344
345 local_irq_disable();
346
347 /* Write zero to CMOS register number 0x0f, which the BIOS POST
348 routine will recognize as telling it to do a proper reboot. (Well
349 that's what this book in front of me says -- it may only apply to
350 the Phoenix BIOS though, it's not clear). At the same time,
351 disable NMIs by setting the top bit in the CMOS address register,
352 as we're about to do peculiar things to the CPU. I'm not sure if
353 `outb_p' is needed instead of just `outb'. Use it to be on the
354 safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.)
355 */
356 spin_lock(&rtc_lock);
357 CMOS_WRITE(0x00, 0x8f);
358 spin_unlock(&rtc_lock);
359
360 /*
361 * Switch back to the initial page table.
362 */
363 load_cr3(initial_page_table);
364
365 /* Write 0x1234 to absolute memory location 0x472. The BIOS reads
366 this on booting to tell it to "Bypass memory test (also warm
367 boot)". This seems like a fairly standard thing that gets set by
368 REBOOT.COM programs, and the previous reset routine did this
369 too. */
370 *((unsigned short *)0x472) = reboot_mode;
371
372 /* Patch the GDT in the low memory trampoline */
373 lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt);
374
375 restart_va = TRAMPOLINE_SYM(machine_real_restart_asm);
376 restart_pa = virt_to_phys(restart_va);
377 restart_lowmem = (void (*)(unsigned int))restart_pa;
378
379 /* GDT[0]: GDT self-pointer */
380 lowmem_gdt[0] =
381 (u64)(sizeof(machine_real_restart_gdt) - 1) +
382 ((u64)virt_to_phys(lowmem_gdt) << 16);
383 /* GDT[1]: 64K real mode code segment */
384 lowmem_gdt[1] =
385 GDT_ENTRY(0x009b, restart_pa, 0xffff);
386
387 /* Jump to the identity-mapped low memory code */
388 restart_lowmem(type);
389}
390#ifdef CONFIG_APM_MODULE
391EXPORT_SYMBOL(machine_real_restart);
392#endif
393
394#endif /* CONFIG_X86_32 */
395
396/*
397 * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
398 */
399static int __init set_pci_reboot(const struct dmi_system_id *d)
400{
401 if (reboot_type != BOOT_CF9) {
402 reboot_type = BOOT_CF9;
403 printk(KERN_INFO "%s series board detected. "
404 "Selecting PCI-method for reboots.\n", d->ident);
405 }
406 return 0;
407}
408
409static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
410 { /* Handle problems with rebooting on Apple MacBook5 */ 409 { /* Handle problems with rebooting on Apple MacBook5 */
411 .callback = set_pci_reboot, 410 .callback = set_pci_reboot,
412 .ident = "Apple MacBook5", 411 .ident = "Apple MacBook5",
@@ -474,17 +473,17 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
474 { } 473 { }
475}; 474};
476 475
477static int __init pci_reboot_init(void) 476static int __init reboot_init(void)
478{ 477{
479 /* Only do the DMI check if reboot_type hasn't been overridden 478 /*
479 * Only do the DMI check if reboot_type hasn't been overridden
480 * on the command line 480 * on the command line
481 */ 481 */
482 if (reboot_default) { 482 if (reboot_default)
483 dmi_check_system(pci_reboot_dmi_table); 483 dmi_check_system(reboot_dmi_table);
484 }
485 return 0; 484 return 0;
486} 485}
487core_initcall(pci_reboot_init); 486core_initcall(reboot_init);
488 487
489static inline void kb_wait(void) 488static inline void kb_wait(void)
490{ 489{
@@ -502,14 +501,14 @@ static void vmxoff_nmi(int cpu, struct pt_regs *regs)
502 cpu_emergency_vmxoff(); 501 cpu_emergency_vmxoff();
503} 502}
504 503
505/* Use NMIs as IPIs to tell all CPUs to disable virtualization 504/* Use NMIs as IPIs to tell all CPUs to disable virtualization */
506 */
507static void emergency_vmx_disable_all(void) 505static void emergency_vmx_disable_all(void)
508{ 506{
509 /* Just make sure we won't change CPUs while doing this */ 507 /* Just make sure we won't change CPUs while doing this */
510 local_irq_disable(); 508 local_irq_disable();
511 509
512 /* We need to disable VMX on all CPUs before rebooting, otherwise 510 /*
511 * We need to disable VMX on all CPUs before rebooting, otherwise
513 * we risk hanging up the machine, because the CPU ignore INIT 512 * we risk hanging up the machine, because the CPU ignore INIT
514 * signals when VMX is enabled. 513 * signals when VMX is enabled.
515 * 514 *
@@ -528,8 +527,7 @@ static void emergency_vmx_disable_all(void)
528 * is still enabling VMX. 527 * is still enabling VMX.
529 */ 528 */
530 if (cpu_has_vmx() && cpu_vmx_enabled()) { 529 if (cpu_has_vmx() && cpu_vmx_enabled()) {
531 /* Disable VMX on this CPU. 530 /* Disable VMX on this CPU. */
532 */
533 cpu_vmxoff(); 531 cpu_vmxoff();
534 532
535 /* Halt and disable VMX on the other CPUs */ 533 /* Halt and disable VMX on the other CPUs */
@@ -574,12 +572,12 @@ static void native_machine_emergency_restart(void)
574 /* Could also try the reset bit in the Hammer NB */ 572 /* Could also try the reset bit in the Hammer NB */
575 switch (reboot_type) { 573 switch (reboot_type) {
576 case BOOT_KBD: 574 case BOOT_KBD:
577 mach_reboot_fixups(); /* for board specific fixups */ 575 mach_reboot_fixups(); /* For board specific fixups */
578 576
579 for (i = 0; i < 10; i++) { 577 for (i = 0; i < 10; i++) {
580 kb_wait(); 578 kb_wait();
581 udelay(50); 579 udelay(50);
582 outb(0xfe, 0x64); /* pulse reset low */ 580 outb(0xfe, 0x64); /* Pulse reset low */
583 udelay(50); 581 udelay(50);
584 } 582 }
585 if (attempt == 0 && orig_reboot_type == BOOT_ACPI) { 583 if (attempt == 0 && orig_reboot_type == BOOT_ACPI) {
@@ -621,7 +619,7 @@ static void native_machine_emergency_restart(void)
621 619
622 case BOOT_CF9: 620 case BOOT_CF9:
623 port_cf9_safe = true; 621 port_cf9_safe = true;
624 /* fall through */ 622 /* Fall through */
625 623
626 case BOOT_CF9_COND: 624 case BOOT_CF9_COND:
627 if (port_cf9_safe) { 625 if (port_cf9_safe) {
@@ -659,7 +657,8 @@ void native_machine_shutdown(void)
659 /* Make certain I only run on the appropriate processor */ 657 /* Make certain I only run on the appropriate processor */
660 set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); 658 set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
661 659
662 /* O.K Now that I'm on the appropriate processor, 660 /*
661 * O.K Now that I'm on the appropriate processor,
663 * stop all of the others. 662 * stop all of the others.
664 */ 663 */
665 stop_other_cpus(); 664 stop_other_cpus();
@@ -697,12 +696,11 @@ static void native_machine_restart(char *__unused)
697 696
698static void native_machine_halt(void) 697static void native_machine_halt(void)
699{ 698{
700 /* stop other cpus and apics */ 699 /* Stop other cpus and apics */
701 machine_shutdown(); 700 machine_shutdown();
702 701
703 tboot_shutdown(TB_SHUTDOWN_HALT); 702 tboot_shutdown(TB_SHUTDOWN_HALT);
704 703
705 /* stop this cpu */
706 stop_this_cpu(NULL); 704 stop_this_cpu(NULL);
707} 705}
708 706
@@ -713,7 +711,7 @@ static void native_machine_power_off(void)
713 machine_shutdown(); 711 machine_shutdown();
714 pm_power_off(); 712 pm_power_off();
715 } 713 }
716 /* a fallback in case there is no PM info available */ 714 /* A fallback in case there is no PM info available */
717 tboot_shutdown(TB_SHUTDOWN_HALT); 715 tboot_shutdown(TB_SHUTDOWN_HALT);
718} 716}
719 717
@@ -775,7 +773,8 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
775 773
776 cpu = raw_smp_processor_id(); 774 cpu = raw_smp_processor_id();
777 775
778 /* Don't do anything if this handler is invoked on crashing cpu. 776 /*
777 * Don't do anything if this handler is invoked on crashing cpu.
779 * Otherwise, system will completely hang. Crashing cpu can get 778 * Otherwise, system will completely hang. Crashing cpu can get
780 * an NMI if system was initially booted with nmi_watchdog parameter. 779 * an NMI if system was initially booted with nmi_watchdog parameter.
781 */ 780 */
@@ -799,7 +798,8 @@ static void smp_send_nmi_allbutself(void)
799 apic->send_IPI_allbutself(NMI_VECTOR); 798 apic->send_IPI_allbutself(NMI_VECTOR);
800} 799}
801 800
802/* Halt all other CPUs, calling the specified function on each of them 801/*
802 * Halt all other CPUs, calling the specified function on each of them
803 * 803 *
804 * This function can be used to halt all other CPUs on crash 804 * This function can be used to halt all other CPUs on crash
805 * or emergency reboot time. The function passed as parameter 805 * or emergency reboot time. The function passed as parameter
@@ -810,7 +810,7 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
810 unsigned long msecs; 810 unsigned long msecs;
811 local_irq_disable(); 811 local_irq_disable();
812 812
813 /* Make a note of crashing cpu. Will be used in NMI callback.*/ 813 /* Make a note of crashing cpu. Will be used in NMI callback. */
814 crashing_cpu = safe_smp_processor_id(); 814 crashing_cpu = safe_smp_processor_id();
815 815
816 shootdown_callback = callback; 816 shootdown_callback = callback;
@@ -819,8 +819,9 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
819 /* Would it be better to replace the trap vector here? */ 819 /* Would it be better to replace the trap vector here? */
820 if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback, 820 if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback,
821 NMI_FLAG_FIRST, "crash")) 821 NMI_FLAG_FIRST, "crash"))
822 return; /* return what? */ 822 return; /* Return what? */
823 /* Ensure the new callback function is set before sending 823 /*
824 * Ensure the new callback function is set before sending
824 * out the NMI 825 * out the NMI
825 */ 826 */
826 wmb(); 827 wmb();
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1a2901562059..9b4204e06665 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -393,10 +393,9 @@ static void __init reserve_initrd(void)
393 initrd_start = 0; 393 initrd_start = 0;
394 394
395 if (ramdisk_size >= (end_of_lowmem>>1)) { 395 if (ramdisk_size >= (end_of_lowmem>>1)) {
396 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); 396 panic("initrd too large to handle, "
397 printk(KERN_ERR "initrd too large to handle, " 397 "disabling initrd (%lld needed, %lld available)\n",
398 "disabling initrd\n"); 398 ramdisk_size, end_of_lowmem>>1);
399 return;
400 } 399 }
401 400
402 printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image, 401 printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image,
@@ -1012,7 +1011,8 @@ void __init setup_arch(char **cmdline_p)
1012 init_cpu_to_node(); 1011 init_cpu_to_node();
1013 1012
1014 init_apic_mappings(); 1013 init_apic_mappings();
1015 ioapic_and_gsi_init(); 1014 if (x86_io_apic_ops.init)
1015 x86_io_apic_ops.init();
1016 1016
1017 kvm_guest_init(); 1017 kvm_guest_init();
1018 1018
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 66c74f481cab..48d2b7ded422 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -109,6 +109,9 @@
109 * about nothing of note with C stepping upwards. 109 * about nothing of note with C stepping upwards.
110 */ 110 */
111 111
112static atomic_t stopping_cpu = ATOMIC_INIT(-1);
113static bool smp_no_nmi_ipi = false;
114
112/* 115/*
113 * this function sends a 'reschedule' IPI to another CPU. 116 * this function sends a 'reschedule' IPI to another CPU.
114 * it goes straight through and wastes no time serializing 117 * it goes straight through and wastes no time serializing
@@ -149,8 +152,6 @@ void native_send_call_func_ipi(const struct cpumask *mask)
149 free_cpumask_var(allbutself); 152 free_cpumask_var(allbutself);
150} 153}
151 154
152static atomic_t stopping_cpu = ATOMIC_INIT(-1);
153
154static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) 155static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
155{ 156{
156 /* We are registered on stopping cpu too, avoid spurious NMI */ 157 /* We are registered on stopping cpu too, avoid spurious NMI */
@@ -162,7 +163,19 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
162 return NMI_HANDLED; 163 return NMI_HANDLED;
163} 164}
164 165
165static void native_nmi_stop_other_cpus(int wait) 166/*
167 * this function calls the 'stop' function on all other CPUs in the system.
168 */
169
170asmlinkage void smp_reboot_interrupt(void)
171{
172 ack_APIC_irq();
173 irq_enter();
174 stop_this_cpu(NULL);
175 irq_exit();
176}
177
178static void native_stop_other_cpus(int wait)
166{ 179{
167 unsigned long flags; 180 unsigned long flags;
168 unsigned long timeout; 181 unsigned long timeout;
@@ -174,20 +187,25 @@ static void native_nmi_stop_other_cpus(int wait)
174 * Use an own vector here because smp_call_function 187 * Use an own vector here because smp_call_function
175 * does lots of things not suitable in a panic situation. 188 * does lots of things not suitable in a panic situation.
176 */ 189 */
190
191 /*
192 * We start by using the REBOOT_VECTOR irq.
193 * The irq is treated as a sync point to allow critical
194 * regions of code on other cpus to release their spin locks
195 * and re-enable irqs. Jumping straight to an NMI might
196 * accidentally cause deadlocks with further shutdown/panic
197 * code. By syncing, we give the cpus up to one second to
198 * finish their work before we force them off with the NMI.
199 */
177 if (num_online_cpus() > 1) { 200 if (num_online_cpus() > 1) {
178 /* did someone beat us here? */ 201 /* did someone beat us here? */
179 if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1) 202 if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1)
180 return; 203 return;
181 204
182 if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback, 205 /* sync above data before sending IRQ */
183 NMI_FLAG_FIRST, "smp_stop"))
184 /* Note: we ignore failures here */
185 return;
186
187 /* sync above data before sending NMI */
188 wmb(); 206 wmb();
189 207
190 apic->send_IPI_allbutself(NMI_VECTOR); 208 apic->send_IPI_allbutself(REBOOT_VECTOR);
191 209
192 /* 210 /*
193 * Don't wait longer than a second if the caller 211 * Don't wait longer than a second if the caller
@@ -197,63 +215,37 @@ static void native_nmi_stop_other_cpus(int wait)
197 while (num_online_cpus() > 1 && (wait || timeout--)) 215 while (num_online_cpus() > 1 && (wait || timeout--))
198 udelay(1); 216 udelay(1);
199 } 217 }
218
219 /* if the REBOOT_VECTOR didn't work, try with the NMI */
220 if ((num_online_cpus() > 1) && (!smp_no_nmi_ipi)) {
221 if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback,
222 NMI_FLAG_FIRST, "smp_stop"))
223 /* Note: we ignore failures here */
224 /* Hope the REBOOT_IRQ is good enough */
225 goto finish;
200 226
201 local_irq_save(flags); 227 /* sync above data before sending IRQ */
202 disable_local_APIC(); 228 wmb();
203 local_irq_restore(flags);
204}
205
206/*
207 * this function calls the 'stop' function on all other CPUs in the system.
208 */
209
210asmlinkage void smp_reboot_interrupt(void)
211{
212 ack_APIC_irq();
213 irq_enter();
214 stop_this_cpu(NULL);
215 irq_exit();
216}
217
218static void native_irq_stop_other_cpus(int wait)
219{
220 unsigned long flags;
221 unsigned long timeout;
222 229
223 if (reboot_force) 230 pr_emerg("Shutting down cpus with NMI\n");
224 return;
225 231
226 /* 232 apic->send_IPI_allbutself(NMI_VECTOR);
227 * Use an own vector here because smp_call_function
228 * does lots of things not suitable in a panic situation.
229 * On most systems we could also use an NMI here,
230 * but there are a few systems around where NMI
231 * is problematic so stay with an non NMI for now
232 * (this implies we cannot stop CPUs spinning with irq off
233 * currently)
234 */
235 if (num_online_cpus() > 1) {
236 apic->send_IPI_allbutself(REBOOT_VECTOR);
237 233
238 /* 234 /*
239 * Don't wait longer than a second if the caller 235 * Don't wait longer than a 10 ms if the caller
240 * didn't ask us to wait. 236 * didn't ask us to wait.
241 */ 237 */
242 timeout = USEC_PER_SEC; 238 timeout = USEC_PER_MSEC * 10;
243 while (num_online_cpus() > 1 && (wait || timeout--)) 239 while (num_online_cpus() > 1 && (wait || timeout--))
244 udelay(1); 240 udelay(1);
245 } 241 }
246 242
243finish:
247 local_irq_save(flags); 244 local_irq_save(flags);
248 disable_local_APIC(); 245 disable_local_APIC();
249 local_irq_restore(flags); 246 local_irq_restore(flags);
250} 247}
251 248
252static void native_smp_disable_nmi_ipi(void)
253{
254 smp_ops.stop_other_cpus = native_irq_stop_other_cpus;
255}
256
257/* 249/*
258 * Reschedule call back. 250 * Reschedule call back.
259 */ 251 */
@@ -287,8 +279,8 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
287 279
288static int __init nonmi_ipi_setup(char *str) 280static int __init nonmi_ipi_setup(char *str)
289{ 281{
290 native_smp_disable_nmi_ipi(); 282 smp_no_nmi_ipi = true;
291 return 1; 283 return 1;
292} 284}
293 285
294__setup("nonmi_ipi", nonmi_ipi_setup); 286__setup("nonmi_ipi", nonmi_ipi_setup);
@@ -298,7 +290,7 @@ struct smp_ops smp_ops = {
298 .smp_prepare_cpus = native_smp_prepare_cpus, 290 .smp_prepare_cpus = native_smp_prepare_cpus,
299 .smp_cpus_done = native_smp_cpus_done, 291 .smp_cpus_done = native_smp_cpus_done,
300 292
301 .stop_other_cpus = native_nmi_stop_other_cpus, 293 .stop_other_cpus = native_stop_other_cpus,
302 .smp_send_reschedule = native_smp_send_reschedule, 294 .smp_send_reschedule = native_smp_send_reschedule,
303 295
304 .cpu_up = native_cpu_up, 296 .cpu_up = native_cpu_up,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6e1e406038c2..433529e29be4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -76,20 +76,8 @@
76/* State of each CPU */ 76/* State of each CPU */
77DEFINE_PER_CPU(int, cpu_state) = { 0 }; 77DEFINE_PER_CPU(int, cpu_state) = { 0 };
78 78
79/* Store all idle threads, this can be reused instead of creating
80* a new thread. Also avoids complicated thread destroy functionality
81* for idle threads.
82*/
83#ifdef CONFIG_HOTPLUG_CPU 79#ifdef CONFIG_HOTPLUG_CPU
84/* 80/*
85 * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is
86 * removed after init for !CONFIG_HOTPLUG_CPU.
87 */
88static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
89#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
90#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
91
92/*
93 * We need this for trampoline_base protection from concurrent accesses when 81 * We need this for trampoline_base protection from concurrent accesses when
94 * off- and onlining cores wildly. 82 * off- and onlining cores wildly.
95 */ 83 */
@@ -97,20 +85,16 @@ static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
97 85
98void cpu_hotplug_driver_lock(void) 86void cpu_hotplug_driver_lock(void)
99{ 87{
100 mutex_lock(&x86_cpu_hotplug_driver_mutex); 88 mutex_lock(&x86_cpu_hotplug_driver_mutex);
101} 89}
102 90
103void cpu_hotplug_driver_unlock(void) 91void cpu_hotplug_driver_unlock(void)
104{ 92{
105 mutex_unlock(&x86_cpu_hotplug_driver_mutex); 93 mutex_unlock(&x86_cpu_hotplug_driver_mutex);
106} 94}
107 95
108ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; } 96ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
109ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; } 97ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
110#else
111static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
112#define get_idle_for_cpu(x) (idle_thread_array[(x)])
113#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
114#endif 98#endif
115 99
116/* Number of siblings per CPU package */ 100/* Number of siblings per CPU package */
@@ -315,59 +299,90 @@ void __cpuinit smp_store_cpu_info(int id)
315 identify_secondary_cpu(c); 299 identify_secondary_cpu(c);
316} 300}
317 301
318static void __cpuinit link_thread_siblings(int cpu1, int cpu2) 302static bool __cpuinit
303topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
319{ 304{
320 cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); 305 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
321 cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1)); 306
322 cpumask_set_cpu(cpu1, cpu_core_mask(cpu2)); 307 return !WARN_ONCE(cpu_to_node(cpu1) != cpu_to_node(cpu2),
323 cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); 308 "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
324 cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2)); 309 "[node: %d != %d]. Ignoring dependency.\n",
325 cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1)); 310 cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
326} 311}
327 312
313#define link_mask(_m, c1, c2) \
314do { \
315 cpumask_set_cpu((c1), cpu_##_m##_mask(c2)); \
316 cpumask_set_cpu((c2), cpu_##_m##_mask(c1)); \
317} while (0)
318
319static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
320{
321 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
322 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
323
324 if (c->phys_proc_id == o->phys_proc_id &&
325 per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) &&
326 c->compute_unit_id == o->compute_unit_id)
327 return topology_sane(c, o, "smt");
328
329 } else if (c->phys_proc_id == o->phys_proc_id &&
330 c->cpu_core_id == o->cpu_core_id) {
331 return topology_sane(c, o, "smt");
332 }
333
334 return false;
335}
336
337static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
338{
339 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
340
341 if (per_cpu(cpu_llc_id, cpu1) != BAD_APICID &&
342 per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2))
343 return topology_sane(c, o, "llc");
344
345 return false;
346}
347
348static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
349{
350 if (c->phys_proc_id == o->phys_proc_id)
351 return topology_sane(c, o, "mc");
352
353 return false;
354}
328 355
329void __cpuinit set_cpu_sibling_map(int cpu) 356void __cpuinit set_cpu_sibling_map(int cpu)
330{ 357{
331 int i; 358 bool has_mc = boot_cpu_data.x86_max_cores > 1;
359 bool has_smt = smp_num_siblings > 1;
332 struct cpuinfo_x86 *c = &cpu_data(cpu); 360 struct cpuinfo_x86 *c = &cpu_data(cpu);
361 struct cpuinfo_x86 *o;
362 int i;
333 363
334 cpumask_set_cpu(cpu, cpu_sibling_setup_mask); 364 cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
335 365
336 if (smp_num_siblings > 1) { 366 if (!has_smt && !has_mc) {
337 for_each_cpu(i, cpu_sibling_setup_mask) {
338 struct cpuinfo_x86 *o = &cpu_data(i);
339
340 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
341 if (c->phys_proc_id == o->phys_proc_id &&
342 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) &&
343 c->compute_unit_id == o->compute_unit_id)
344 link_thread_siblings(cpu, i);
345 } else if (c->phys_proc_id == o->phys_proc_id &&
346 c->cpu_core_id == o->cpu_core_id) {
347 link_thread_siblings(cpu, i);
348 }
349 }
350 } else {
351 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); 367 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
352 } 368 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
353 369 cpumask_set_cpu(cpu, cpu_core_mask(cpu));
354 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
355
356 if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
357 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
358 c->booted_cores = 1; 370 c->booted_cores = 1;
359 return; 371 return;
360 } 372 }
361 373
362 for_each_cpu(i, cpu_sibling_setup_mask) { 374 for_each_cpu(i, cpu_sibling_setup_mask) {
363 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && 375 o = &cpu_data(i);
364 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { 376
365 cpumask_set_cpu(i, cpu_llc_shared_mask(cpu)); 377 if ((i == cpu) || (has_smt && match_smt(c, o)))
366 cpumask_set_cpu(cpu, cpu_llc_shared_mask(i)); 378 link_mask(sibling, cpu, i);
367 } 379
368 if (c->phys_proc_id == cpu_data(i).phys_proc_id) { 380 if ((i == cpu) || (has_mc && match_llc(c, o)))
369 cpumask_set_cpu(i, cpu_core_mask(cpu)); 381 link_mask(llc_shared, cpu, i);
370 cpumask_set_cpu(cpu, cpu_core_mask(i)); 382
383 if ((i == cpu) || (has_mc && match_mc(c, o))) {
384 link_mask(core, cpu, i);
385
371 /* 386 /*
372 * Does this new cpu bringup a new core? 387 * Does this new cpu bringup a new core?
373 */ 388 */
@@ -398,8 +413,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
398 * For perf, we return last level cache shared map. 413 * For perf, we return last level cache shared map.
399 * And for power savings, we return cpu_core_map 414 * And for power savings, we return cpu_core_map
400 */ 415 */
401 if ((sched_mc_power_savings || sched_smt_power_savings) && 416 if (!(cpu_has(c, X86_FEATURE_AMD_DCM)))
402 !(cpu_has(c, X86_FEATURE_AMD_DCM)))
403 return cpu_core_mask(cpu); 417 return cpu_core_mask(cpu);
404 else 418 else
405 return cpu_llc_shared_mask(cpu); 419 return cpu_llc_shared_mask(cpu);
@@ -618,22 +632,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
618 return (send_status | accept_status); 632 return (send_status | accept_status);
619} 633}
620 634
621struct create_idle {
622 struct work_struct work;
623 struct task_struct *idle;
624 struct completion done;
625 int cpu;
626};
627
628static void __cpuinit do_fork_idle(struct work_struct *work)
629{
630 struct create_idle *c_idle =
631 container_of(work, struct create_idle, work);
632
633 c_idle->idle = fork_idle(c_idle->cpu);
634 complete(&c_idle->done);
635}
636
637/* reduce the number of lines printed when booting a large cpu count system */ 635/* reduce the number of lines printed when booting a large cpu count system */
638static void __cpuinit announce_cpu(int cpu, int apicid) 636static void __cpuinit announce_cpu(int cpu, int apicid)
639{ 637{
@@ -660,58 +658,31 @@ static void __cpuinit announce_cpu(int cpu, int apicid)
660 * Returns zero if CPU booted OK, else error code from 658 * Returns zero if CPU booted OK, else error code from
661 * ->wakeup_secondary_cpu. 659 * ->wakeup_secondary_cpu.
662 */ 660 */
663static int __cpuinit do_boot_cpu(int apicid, int cpu) 661static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
664{ 662{
665 unsigned long boot_error = 0; 663 unsigned long boot_error = 0;
666 unsigned long start_ip; 664 unsigned long start_ip;
667 int timeout; 665 int timeout;
668 struct create_idle c_idle = {
669 .cpu = cpu,
670 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
671 };
672
673 INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
674 666
675 alternatives_smp_switch(1); 667 alternatives_smp_switch(1);
676 668
677 c_idle.idle = get_idle_for_cpu(cpu); 669 idle->thread.sp = (unsigned long) (((struct pt_regs *)
678 670 (THREAD_SIZE + task_stack_page(idle))) - 1);
679 /* 671 per_cpu(current_task, cpu) = idle;
680 * We can't use kernel_thread since we must avoid to
681 * reschedule the child.
682 */
683 if (c_idle.idle) {
684 c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *)
685 (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1);
686 init_idle(c_idle.idle, cpu);
687 goto do_rest;
688 }
689
690 schedule_work(&c_idle.work);
691 wait_for_completion(&c_idle.done);
692 672
693 if (IS_ERR(c_idle.idle)) {
694 printk("failed fork for CPU %d\n", cpu);
695 destroy_work_on_stack(&c_idle.work);
696 return PTR_ERR(c_idle.idle);
697 }
698
699 set_idle_for_cpu(cpu, c_idle.idle);
700do_rest:
701 per_cpu(current_task, cpu) = c_idle.idle;
702#ifdef CONFIG_X86_32 673#ifdef CONFIG_X86_32
703 /* Stack for startup_32 can be just as for start_secondary onwards */ 674 /* Stack for startup_32 can be just as for start_secondary onwards */
704 irq_ctx_init(cpu); 675 irq_ctx_init(cpu);
705#else 676#else
706 clear_tsk_thread_flag(c_idle.idle, TIF_FORK); 677 clear_tsk_thread_flag(idle, TIF_FORK);
707 initial_gs = per_cpu_offset(cpu); 678 initial_gs = per_cpu_offset(cpu);
708 per_cpu(kernel_stack, cpu) = 679 per_cpu(kernel_stack, cpu) =
709 (unsigned long)task_stack_page(c_idle.idle) - 680 (unsigned long)task_stack_page(idle) -
710 KERNEL_STACK_OFFSET + THREAD_SIZE; 681 KERNEL_STACK_OFFSET + THREAD_SIZE;
711#endif 682#endif
712 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 683 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
713 initial_code = (unsigned long)start_secondary; 684 initial_code = (unsigned long)start_secondary;
714 stack_start = c_idle.idle->thread.sp; 685 stack_start = idle->thread.sp;
715 686
716 /* start_ip had better be page-aligned! */ 687 /* start_ip had better be page-aligned! */
717 start_ip = trampoline_address(); 688 start_ip = trampoline_address();
@@ -813,12 +784,10 @@ do_rest:
813 */ 784 */
814 smpboot_restore_warm_reset_vector(); 785 smpboot_restore_warm_reset_vector();
815 } 786 }
816
817 destroy_work_on_stack(&c_idle.work);
818 return boot_error; 787 return boot_error;
819} 788}
820 789
821int __cpuinit native_cpu_up(unsigned int cpu) 790int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
822{ 791{
823 int apicid = apic->cpu_present_to_apicid(cpu); 792 int apicid = apic->cpu_present_to_apicid(cpu);
824 unsigned long flags; 793 unsigned long flags;
@@ -851,7 +820,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
851 820
852 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 821 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
853 822
854 err = do_boot_cpu(apicid, cpu); 823 err = do_boot_cpu(apicid, cpu, tidle);
855 if (err) { 824 if (err) {
856 pr_debug("do_boot_cpu failed %d\n", err); 825 pr_debug("do_boot_cpu failed %d\n", err);
857 return -EIO; 826 return -EIO;
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
index c29e235792af..b79133abda48 100644
--- a/arch/x86/kernel/test_rodata.c
+++ b/arch/x86/kernel/test_rodata.c
@@ -12,6 +12,7 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <asm/cacheflush.h> 13#include <asm/cacheflush.h>
14#include <asm/sections.h> 14#include <asm/sections.h>
15#include <asm/asm.h>
15 16
16int rodata_test(void) 17int rodata_test(void)
17{ 18{
@@ -42,14 +43,7 @@ int rodata_test(void)
42 ".section .fixup,\"ax\"\n" 43 ".section .fixup,\"ax\"\n"
43 "2: jmp 1b\n" 44 "2: jmp 1b\n"
44 ".previous\n" 45 ".previous\n"
45 ".section __ex_table,\"a\"\n" 46 _ASM_EXTABLE(0b,2b)
46 " .align 16\n"
47#ifdef CONFIG_X86_32
48 " .long 0b,2b\n"
49#else
50 " .quad 0b,2b\n"
51#endif
52 ".previous"
53 : [rslt] "=r" (result) 47 : [rslt] "=r" (result)
54 : [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL) 48 : [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL)
55 ); 49 );
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ff9281f16029..92d5756d85fc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -50,6 +50,7 @@
50#include <asm/processor.h> 50#include <asm/processor.h>
51#include <asm/debugreg.h> 51#include <asm/debugreg.h>
52#include <linux/atomic.h> 52#include <linux/atomic.h>
53#include <asm/ftrace.h>
53#include <asm/traps.h> 54#include <asm/traps.h>
54#include <asm/desc.h> 55#include <asm/desc.h>
55#include <asm/i387.h> 56#include <asm/i387.h>
@@ -303,8 +304,13 @@ gp_in_kernel:
303} 304}
304 305
305/* May run on IST stack. */ 306/* May run on IST stack. */
306dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) 307dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
307{ 308{
309#ifdef CONFIG_DYNAMIC_FTRACE
310 /* ftrace must be first, everything else may cause a recursive crash */
311 if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs))
312 return;
313#endif
308#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 314#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
309 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 315 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
310 SIGTRAP) == NOTIFY_STOP) 316 SIGTRAP) == NOTIFY_STOP)
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index a1d804bcd483..8eeb55a551b4 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -15,6 +15,7 @@
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/pci_ids.h> 16#include <linux/pci_ids.h>
17#include <linux/pci_regs.h> 17#include <linux/pci_regs.h>
18#include <linux/smp.h>
18 19
19#include <asm/apic.h> 20#include <asm/apic.h>
20#include <asm/pci-direct.h> 21#include <asm/pci-direct.h>
@@ -22,6 +23,8 @@
22#include <asm/paravirt.h> 23#include <asm/paravirt.h>
23#include <asm/setup.h> 24#include <asm/setup.h>
24 25
26#define TOPOLOGY_REGISTER_OFFSET 0x10
27
25#if defined CONFIG_PCI && defined CONFIG_PARAVIRT 28#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
26/* 29/*
27 * Interrupt control on vSMPowered systems: 30 * Interrupt control on vSMPowered systems:
@@ -149,12 +152,49 @@ int is_vsmp_box(void)
149 return 0; 152 return 0;
150} 153}
151#endif 154#endif
155
156static void __init vsmp_cap_cpus(void)
157{
158#if !defined(CONFIG_X86_VSMP) && defined(CONFIG_SMP)
159 void __iomem *address;
160 unsigned int cfg, topology, node_shift, maxcpus;
161
162 /*
163 * CONFIG_X86_VSMP is not configured, so limit the number CPUs to the
164 * ones present in the first board, unless explicitly overridden by
165 * setup_max_cpus
166 */
167 if (setup_max_cpus != NR_CPUS)
168 return;
169
170 /* Read the vSMP Foundation topology register */
171 cfg = read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0);
172 address = early_ioremap(cfg + TOPOLOGY_REGISTER_OFFSET, 4);
173 if (WARN_ON(!address))
174 return;
175
176 topology = readl(address);
177 node_shift = (topology >> 16) & 0x7;
178 if (!node_shift)
179 /* The value 0 should be decoded as 8 */
180 node_shift = 8;
181 maxcpus = (topology & ((1 << node_shift) - 1)) + 1;
182
183 pr_info("vSMP CTL: Capping CPUs to %d (CONFIG_X86_VSMP is unset)\n",
184 maxcpus);
185 setup_max_cpus = maxcpus;
186 early_iounmap(address, 4);
187#endif
188}
189
152void __init vsmp_init(void) 190void __init vsmp_init(void)
153{ 191{
154 detect_vsmp_box(); 192 detect_vsmp_box();
155 if (!is_vsmp_box()) 193 if (!is_vsmp_box())
156 return; 194 return;
157 195
196 vsmp_cap_cpus();
197
158 set_vsmp_pv_ops(); 198 set_vsmp_pv_ops();
159 return; 199 return;
160} 200}
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 9cf71d0b2d37..35c5e543f550 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -18,6 +18,7 @@
18#include <asm/e820.h> 18#include <asm/e820.h>
19#include <asm/time.h> 19#include <asm/time.h>
20#include <asm/irq.h> 20#include <asm/irq.h>
21#include <asm/io_apic.h>
21#include <asm/pat.h> 22#include <asm/pat.h>
22#include <asm/tsc.h> 23#include <asm/tsc.h>
23#include <asm/iommu.h> 24#include <asm/iommu.h>
@@ -119,3 +120,10 @@ struct x86_msi_ops x86_msi = {
119 .teardown_msi_irqs = default_teardown_msi_irqs, 120 .teardown_msi_irqs = default_teardown_msi_irqs,
120 .restore_msi_irqs = default_restore_msi_irqs, 121 .restore_msi_irqs = default_restore_msi_irqs,
121}; 122};
123
124struct x86_io_apic_ops x86_io_apic_ops = {
125 .init = native_io_apic_init_mappings,
126 .read = native_io_apic_read,
127 .write = native_io_apic_write,
128 .modify = native_io_apic_modify,
129};
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index e62728e30b01..bd18149b2b0f 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -48,8 +48,6 @@ void __sanitize_i387_state(struct task_struct *tsk)
48 if (!fx) 48 if (!fx)
49 return; 49 return;
50 50
51 BUG_ON(__thread_has_fpu(tsk));
52
53 xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; 51 xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
54 52
55 /* 53 /*
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 78d16a554db0..2af5df3ade7c 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -28,6 +28,7 @@
28#include <linux/linkage.h> 28#include <linux/linkage.h>
29#include <asm/dwarf2.h> 29#include <asm/dwarf2.h>
30#include <asm/errno.h> 30#include <asm/errno.h>
31#include <asm/asm.h>
31 32
32/* 33/*
33 * computes a partial checksum, e.g. for TCP/UDP fragments 34 * computes a partial checksum, e.g. for TCP/UDP fragments
@@ -282,15 +283,11 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
282 283
283#define SRC(y...) \ 284#define SRC(y...) \
284 9999: y; \ 285 9999: y; \
285 .section __ex_table, "a"; \ 286 _ASM_EXTABLE(9999b, 6001f)
286 .long 9999b, 6001f ; \
287 .previous
288 287
289#define DST(y...) \ 288#define DST(y...) \
290 9999: y; \ 289 9999: y; \
291 .section __ex_table, "a"; \ 290 _ASM_EXTABLE(9999b, 6002f)
292 .long 9999b, 6002f ; \
293 .previous
294 291
295#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 292#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
296 293
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 024840266ba0..5b2995f4557a 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -16,6 +16,7 @@
16#include <asm/thread_info.h> 16#include <asm/thread_info.h>
17#include <asm/cpufeature.h> 17#include <asm/cpufeature.h>
18#include <asm/alternative-asm.h> 18#include <asm/alternative-asm.h>
19#include <asm/asm.h>
19 20
20/* 21/*
21 * By placing feature2 after feature1 in altinstructions section, we logically 22 * By placing feature2 after feature1 in altinstructions section, we logically
@@ -63,11 +64,8 @@
63 jmp copy_user_handle_tail 64 jmp copy_user_handle_tail
64 .previous 65 .previous
65 66
66 .section __ex_table,"a" 67 _ASM_EXTABLE(100b,103b)
67 .align 8 68 _ASM_EXTABLE(101b,103b)
68 .quad 100b,103b
69 .quad 101b,103b
70 .previous
71#endif 69#endif
72 .endm 70 .endm
73 71
@@ -191,29 +189,26 @@ ENTRY(copy_user_generic_unrolled)
19160: jmp copy_user_handle_tail /* ecx is zerorest also */ 18960: jmp copy_user_handle_tail /* ecx is zerorest also */
192 .previous 190 .previous
193 191
194 .section __ex_table,"a" 192 _ASM_EXTABLE(1b,30b)
195 .align 8 193 _ASM_EXTABLE(2b,30b)
196 .quad 1b,30b 194 _ASM_EXTABLE(3b,30b)
197 .quad 2b,30b 195 _ASM_EXTABLE(4b,30b)
198 .quad 3b,30b 196 _ASM_EXTABLE(5b,30b)
199 .quad 4b,30b 197 _ASM_EXTABLE(6b,30b)
200 .quad 5b,30b 198 _ASM_EXTABLE(7b,30b)
201 .quad 6b,30b 199 _ASM_EXTABLE(8b,30b)
202 .quad 7b,30b 200 _ASM_EXTABLE(9b,30b)
203 .quad 8b,30b 201 _ASM_EXTABLE(10b,30b)
204 .quad 9b,30b 202 _ASM_EXTABLE(11b,30b)
205 .quad 10b,30b 203 _ASM_EXTABLE(12b,30b)
206 .quad 11b,30b 204 _ASM_EXTABLE(13b,30b)
207 .quad 12b,30b 205 _ASM_EXTABLE(14b,30b)
208 .quad 13b,30b 206 _ASM_EXTABLE(15b,30b)
209 .quad 14b,30b 207 _ASM_EXTABLE(16b,30b)
210 .quad 15b,30b 208 _ASM_EXTABLE(18b,40b)
211 .quad 16b,30b 209 _ASM_EXTABLE(19b,40b)
212 .quad 18b,40b 210 _ASM_EXTABLE(21b,50b)
213 .quad 19b,40b 211 _ASM_EXTABLE(22b,50b)
214 .quad 21b,50b
215 .quad 22b,50b
216 .previous
217 CFI_ENDPROC 212 CFI_ENDPROC
218ENDPROC(copy_user_generic_unrolled) 213ENDPROC(copy_user_generic_unrolled)
219 214
@@ -259,11 +254,8 @@ ENTRY(copy_user_generic_string)
259 jmp copy_user_handle_tail 254 jmp copy_user_handle_tail
260 .previous 255 .previous
261 256
262 .section __ex_table,"a" 257 _ASM_EXTABLE(1b,11b)
263 .align 8 258 _ASM_EXTABLE(3b,12b)
264 .quad 1b,11b
265 .quad 3b,12b
266 .previous
267 CFI_ENDPROC 259 CFI_ENDPROC
268ENDPROC(copy_user_generic_string) 260ENDPROC(copy_user_generic_string)
269 261
@@ -294,9 +286,6 @@ ENTRY(copy_user_enhanced_fast_string)
294 jmp copy_user_handle_tail 286 jmp copy_user_handle_tail
295 .previous 287 .previous
296 288
297 .section __ex_table,"a" 289 _ASM_EXTABLE(1b,12b)
298 .align 8
299 .quad 1b,12b
300 .previous
301 CFI_ENDPROC 290 CFI_ENDPROC
302ENDPROC(copy_user_enhanced_fast_string) 291ENDPROC(copy_user_enhanced_fast_string)
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
index cb0c112386fb..cacddc7163eb 100644
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -14,6 +14,7 @@
14#include <asm/current.h> 14#include <asm/current.h>
15#include <asm/asm-offsets.h> 15#include <asm/asm-offsets.h>
16#include <asm/thread_info.h> 16#include <asm/thread_info.h>
17#include <asm/asm.h>
17 18
18 .macro ALIGN_DESTINATION 19 .macro ALIGN_DESTINATION
19#ifdef FIX_ALIGNMENT 20#ifdef FIX_ALIGNMENT
@@ -36,11 +37,8 @@
36 jmp copy_user_handle_tail 37 jmp copy_user_handle_tail
37 .previous 38 .previous
38 39
39 .section __ex_table,"a" 40 _ASM_EXTABLE(100b,103b)
40 .align 8 41 _ASM_EXTABLE(101b,103b)
41 .quad 100b,103b
42 .quad 101b,103b
43 .previous
44#endif 42#endif
45 .endm 43 .endm
46 44
@@ -111,27 +109,25 @@ ENTRY(__copy_user_nocache)
111 jmp copy_user_handle_tail 109 jmp copy_user_handle_tail
112 .previous 110 .previous
113 111
114 .section __ex_table,"a" 112 _ASM_EXTABLE(1b,30b)
115 .quad 1b,30b 113 _ASM_EXTABLE(2b,30b)
116 .quad 2b,30b 114 _ASM_EXTABLE(3b,30b)
117 .quad 3b,30b 115 _ASM_EXTABLE(4b,30b)
118 .quad 4b,30b 116 _ASM_EXTABLE(5b,30b)
119 .quad 5b,30b 117 _ASM_EXTABLE(6b,30b)
120 .quad 6b,30b 118 _ASM_EXTABLE(7b,30b)
121 .quad 7b,30b 119 _ASM_EXTABLE(8b,30b)
122 .quad 8b,30b 120 _ASM_EXTABLE(9b,30b)
123 .quad 9b,30b 121 _ASM_EXTABLE(10b,30b)
124 .quad 10b,30b 122 _ASM_EXTABLE(11b,30b)
125 .quad 11b,30b 123 _ASM_EXTABLE(12b,30b)
126 .quad 12b,30b 124 _ASM_EXTABLE(13b,30b)
127 .quad 13b,30b 125 _ASM_EXTABLE(14b,30b)
128 .quad 14b,30b 126 _ASM_EXTABLE(15b,30b)
129 .quad 15b,30b 127 _ASM_EXTABLE(16b,30b)
130 .quad 16b,30b 128 _ASM_EXTABLE(18b,40b)
131 .quad 18b,40b 129 _ASM_EXTABLE(19b,40b)
132 .quad 19b,40b 130 _ASM_EXTABLE(21b,50b)
133 .quad 21b,50b 131 _ASM_EXTABLE(22b,50b)
134 .quad 22b,50b
135 .previous
136 CFI_ENDPROC 132 CFI_ENDPROC
137ENDPROC(__copy_user_nocache) 133ENDPROC(__copy_user_nocache)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index fb903b758da8..2419d5fefae3 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -8,6 +8,7 @@
8#include <linux/linkage.h> 8#include <linux/linkage.h>
9#include <asm/dwarf2.h> 9#include <asm/dwarf2.h>
10#include <asm/errno.h> 10#include <asm/errno.h>
11#include <asm/asm.h>
11 12
12/* 13/*
13 * Checksum copy with exception handling. 14 * Checksum copy with exception handling.
@@ -31,26 +32,17 @@
31 32
32 .macro source 33 .macro source
3310: 3410:
34 .section __ex_table, "a" 35 _ASM_EXTABLE(10b, .Lbad_source)
35 .align 8
36 .quad 10b, .Lbad_source
37 .previous
38 .endm 36 .endm
39 37
40 .macro dest 38 .macro dest
4120: 3920:
42 .section __ex_table, "a" 40 _ASM_EXTABLE(20b, .Lbad_dest)
43 .align 8
44 .quad 20b, .Lbad_dest
45 .previous
46 .endm 41 .endm
47 42
48 .macro ignore L=.Lignore 43 .macro ignore L=.Lignore
4930: 4430:
50 .section __ex_table, "a" 45 _ASM_EXTABLE(30b, \L)
51 .align 8
52 .quad 30b, \L
53 .previous
54 .endm 46 .endm
55 47
56 48
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 51f1504cddd9..b33b1fb1e6d4 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -95,10 +95,9 @@ bad_get_user:
95 CFI_ENDPROC 95 CFI_ENDPROC
96END(bad_get_user) 96END(bad_get_user)
97 97
98.section __ex_table,"a" 98 _ASM_EXTABLE(1b,bad_get_user)
99 _ASM_PTR 1b,bad_get_user 99 _ASM_EXTABLE(2b,bad_get_user)
100 _ASM_PTR 2b,bad_get_user 100 _ASM_EXTABLE(3b,bad_get_user)
101 _ASM_PTR 3b,bad_get_user
102#ifdef CONFIG_X86_64 101#ifdef CONFIG_X86_64
103 _ASM_PTR 4b,bad_get_user 102 _ASM_EXTABLE(4b,bad_get_user)
104#endif 103#endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 36b0d15ae6e9..7f951c8f76c4 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -86,12 +86,10 @@ bad_put_user:
86 EXIT 86 EXIT
87END(bad_put_user) 87END(bad_put_user)
88 88
89.section __ex_table,"a" 89 _ASM_EXTABLE(1b,bad_put_user)
90 _ASM_PTR 1b,bad_put_user 90 _ASM_EXTABLE(2b,bad_put_user)
91 _ASM_PTR 2b,bad_put_user 91 _ASM_EXTABLE(3b,bad_put_user)
92 _ASM_PTR 3b,bad_put_user 92 _ASM_EXTABLE(4b,bad_put_user)
93 _ASM_PTR 4b,bad_put_user
94#ifdef CONFIG_X86_32 93#ifdef CONFIG_X86_32
95 _ASM_PTR 5b,bad_put_user 94 _ASM_EXTABLE(5b,bad_put_user)
96#endif 95#endif
97.previous
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index d6ae30bbd7bb..2e4e4b02c37a 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -44,13 +44,6 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
44} 44}
45EXPORT_SYMBOL_GPL(copy_from_user_nmi); 45EXPORT_SYMBOL_GPL(copy_from_user_nmi);
46 46
47static inline unsigned long count_bytes(unsigned long mask)
48{
49 mask = (mask - 1) & ~mask;
50 mask >>= 7;
51 return count_masked_bytes(mask);
52}
53
54/* 47/*
55 * Do a strncpy, return length of string without final '\0'. 48 * Do a strncpy, return length of string without final '\0'.
56 * 'count' is the user-supplied count (return 'count' if we 49 * 'count' is the user-supplied count (return 'count' if we
@@ -69,16 +62,19 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long
69 max = count; 62 max = count;
70 63
71 while (max >= sizeof(unsigned long)) { 64 while (max >= sizeof(unsigned long)) {
72 unsigned long c; 65 unsigned long c, mask;
73 66
74 /* Fall back to byte-at-a-time if we get a page fault */ 67 /* Fall back to byte-at-a-time if we get a page fault */
75 if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) 68 if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
76 break; 69 break;
77 /* This can write a few bytes past the NUL character, but that's ok */ 70 mask = has_zero(c);
71 if (mask) {
72 mask = (mask - 1) & ~mask;
73 mask >>= 7;
74 *(unsigned long *)(dst+res) = c & mask;
75 return res + count_masked_bytes(mask);
76 }
78 *(unsigned long *)(dst+res) = c; 77 *(unsigned long *)(dst+res) = c;
79 c = has_zero(c);
80 if (c)
81 return res + count_bytes(c);
82 res += sizeof(unsigned long); 78 res += sizeof(unsigned long);
83 max -= sizeof(unsigned long); 79 max -= sizeof(unsigned long);
84 } 80 }
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index ef2a6a5d78e3..883b216c60b2 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -13,6 +13,7 @@
13#include <linux/interrupt.h> 13#include <linux/interrupt.h>
14#include <asm/uaccess.h> 14#include <asm/uaccess.h>
15#include <asm/mmx.h> 15#include <asm/mmx.h>
16#include <asm/asm.h>
16 17
17#ifdef CONFIG_X86_INTEL_USERCOPY 18#ifdef CONFIG_X86_INTEL_USERCOPY
18/* 19/*
@@ -127,10 +128,7 @@ long strnlen_user(const char __user *s, long n)
127 "3: movb $1,%%al\n" 128 "3: movb $1,%%al\n"
128 " jmp 1b\n" 129 " jmp 1b\n"
129 ".previous\n" 130 ".previous\n"
130 ".section __ex_table,\"a\"\n" 131 _ASM_EXTABLE(0b,2b)
131 " .align 4\n"
132 " .long 0b,2b\n"
133 ".previous"
134 :"=&r" (n), "=&D" (s), "=&a" (res), "=&c" (tmp) 132 :"=&r" (n), "=&D" (s), "=&a" (res), "=&c" (tmp)
135 :"0" (n), "1" (s), "2" (0), "3" (mask) 133 :"0" (n), "1" (s), "2" (0), "3" (mask)
136 :"cc"); 134 :"cc");
@@ -199,47 +197,44 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size)
199 "101: lea 0(%%eax,%0,4),%0\n" 197 "101: lea 0(%%eax,%0,4),%0\n"
200 " jmp 100b\n" 198 " jmp 100b\n"
201 ".previous\n" 199 ".previous\n"
202 ".section __ex_table,\"a\"\n" 200 _ASM_EXTABLE(1b,100b)
203 " .align 4\n" 201 _ASM_EXTABLE(2b,100b)
204 " .long 1b,100b\n" 202 _ASM_EXTABLE(3b,100b)
205 " .long 2b,100b\n" 203 _ASM_EXTABLE(4b,100b)
206 " .long 3b,100b\n" 204 _ASM_EXTABLE(5b,100b)
207 " .long 4b,100b\n" 205 _ASM_EXTABLE(6b,100b)
208 " .long 5b,100b\n" 206 _ASM_EXTABLE(7b,100b)
209 " .long 6b,100b\n" 207 _ASM_EXTABLE(8b,100b)
210 " .long 7b,100b\n" 208 _ASM_EXTABLE(9b,100b)
211 " .long 8b,100b\n" 209 _ASM_EXTABLE(10b,100b)
212 " .long 9b,100b\n" 210 _ASM_EXTABLE(11b,100b)
213 " .long 10b,100b\n" 211 _ASM_EXTABLE(12b,100b)
214 " .long 11b,100b\n" 212 _ASM_EXTABLE(13b,100b)
215 " .long 12b,100b\n" 213 _ASM_EXTABLE(14b,100b)
216 " .long 13b,100b\n" 214 _ASM_EXTABLE(15b,100b)
217 " .long 14b,100b\n" 215 _ASM_EXTABLE(16b,100b)
218 " .long 15b,100b\n" 216 _ASM_EXTABLE(17b,100b)
219 " .long 16b,100b\n" 217 _ASM_EXTABLE(18b,100b)
220 " .long 17b,100b\n" 218 _ASM_EXTABLE(19b,100b)
221 " .long 18b,100b\n" 219 _ASM_EXTABLE(20b,100b)
222 " .long 19b,100b\n" 220 _ASM_EXTABLE(21b,100b)
223 " .long 20b,100b\n" 221 _ASM_EXTABLE(22b,100b)
224 " .long 21b,100b\n" 222 _ASM_EXTABLE(23b,100b)
225 " .long 22b,100b\n" 223 _ASM_EXTABLE(24b,100b)
226 " .long 23b,100b\n" 224 _ASM_EXTABLE(25b,100b)
227 " .long 24b,100b\n" 225 _ASM_EXTABLE(26b,100b)
228 " .long 25b,100b\n" 226 _ASM_EXTABLE(27b,100b)
229 " .long 26b,100b\n" 227 _ASM_EXTABLE(28b,100b)
230 " .long 27b,100b\n" 228 _ASM_EXTABLE(29b,100b)
231 " .long 28b,100b\n" 229 _ASM_EXTABLE(30b,100b)
232 " .long 29b,100b\n" 230 _ASM_EXTABLE(31b,100b)
233 " .long 30b,100b\n" 231 _ASM_EXTABLE(32b,100b)
234 " .long 31b,100b\n" 232 _ASM_EXTABLE(33b,100b)
235 " .long 32b,100b\n" 233 _ASM_EXTABLE(34b,100b)
236 " .long 33b,100b\n" 234 _ASM_EXTABLE(35b,100b)
237 " .long 34b,100b\n" 235 _ASM_EXTABLE(36b,100b)
238 " .long 35b,100b\n" 236 _ASM_EXTABLE(37b,100b)
239 " .long 36b,100b\n" 237 _ASM_EXTABLE(99b,101b)
240 " .long 37b,100b\n"
241 " .long 99b,101b\n"
242 ".previous"
243 : "=&c"(size), "=&D" (d0), "=&S" (d1) 238 : "=&c"(size), "=&D" (d0), "=&S" (d1)
244 : "1"(to), "2"(from), "0"(size) 239 : "1"(to), "2"(from), "0"(size)
245 : "eax", "edx", "memory"); 240 : "eax", "edx", "memory");
@@ -312,29 +307,26 @@ __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size)
312 " popl %0\n" 307 " popl %0\n"
313 " jmp 8b\n" 308 " jmp 8b\n"
314 ".previous\n" 309 ".previous\n"
315 ".section __ex_table,\"a\"\n" 310 _ASM_EXTABLE(0b,16b)
316 " .align 4\n" 311 _ASM_EXTABLE(1b,16b)
317 " .long 0b,16b\n" 312 _ASM_EXTABLE(2b,16b)
318 " .long 1b,16b\n" 313 _ASM_EXTABLE(21b,16b)
319 " .long 2b,16b\n" 314 _ASM_EXTABLE(3b,16b)
320 " .long 21b,16b\n" 315 _ASM_EXTABLE(31b,16b)
321 " .long 3b,16b\n" 316 _ASM_EXTABLE(4b,16b)
322 " .long 31b,16b\n" 317 _ASM_EXTABLE(41b,16b)
323 " .long 4b,16b\n" 318 _ASM_EXTABLE(10b,16b)
324 " .long 41b,16b\n" 319 _ASM_EXTABLE(51b,16b)
325 " .long 10b,16b\n" 320 _ASM_EXTABLE(11b,16b)
326 " .long 51b,16b\n" 321 _ASM_EXTABLE(61b,16b)
327 " .long 11b,16b\n" 322 _ASM_EXTABLE(12b,16b)
328 " .long 61b,16b\n" 323 _ASM_EXTABLE(71b,16b)
329 " .long 12b,16b\n" 324 _ASM_EXTABLE(13b,16b)
330 " .long 71b,16b\n" 325 _ASM_EXTABLE(81b,16b)
331 " .long 13b,16b\n" 326 _ASM_EXTABLE(14b,16b)
332 " .long 81b,16b\n" 327 _ASM_EXTABLE(91b,16b)
333 " .long 14b,16b\n" 328 _ASM_EXTABLE(6b,9b)
334 " .long 91b,16b\n" 329 _ASM_EXTABLE(7b,16b)
335 " .long 6b,9b\n"
336 " .long 7b,16b\n"
337 ".previous"
338 : "=&c"(size), "=&D" (d0), "=&S" (d1) 330 : "=&c"(size), "=&D" (d0), "=&S" (d1)
339 : "1"(to), "2"(from), "0"(size) 331 : "1"(to), "2"(from), "0"(size)
340 : "eax", "edx", "memory"); 332 : "eax", "edx", "memory");
@@ -414,29 +406,26 @@ static unsigned long __copy_user_zeroing_intel_nocache(void *to,
414 " popl %0\n" 406 " popl %0\n"
415 " jmp 8b\n" 407 " jmp 8b\n"
416 ".previous\n" 408 ".previous\n"
417 ".section __ex_table,\"a\"\n" 409 _ASM_EXTABLE(0b,16b)
418 " .align 4\n" 410 _ASM_EXTABLE(1b,16b)
419 " .long 0b,16b\n" 411 _ASM_EXTABLE(2b,16b)
420 " .long 1b,16b\n" 412 _ASM_EXTABLE(21b,16b)
421 " .long 2b,16b\n" 413 _ASM_EXTABLE(3b,16b)
422 " .long 21b,16b\n" 414 _ASM_EXTABLE(31b,16b)
423 " .long 3b,16b\n" 415 _ASM_EXTABLE(4b,16b)
424 " .long 31b,16b\n" 416 _ASM_EXTABLE(41b,16b)
425 " .long 4b,16b\n" 417 _ASM_EXTABLE(10b,16b)
426 " .long 41b,16b\n" 418 _ASM_EXTABLE(51b,16b)
427 " .long 10b,16b\n" 419 _ASM_EXTABLE(11b,16b)
428 " .long 51b,16b\n" 420 _ASM_EXTABLE(61b,16b)
429 " .long 11b,16b\n" 421 _ASM_EXTABLE(12b,16b)
430 " .long 61b,16b\n" 422 _ASM_EXTABLE(71b,16b)
431 " .long 12b,16b\n" 423 _ASM_EXTABLE(13b,16b)
432 " .long 71b,16b\n" 424 _ASM_EXTABLE(81b,16b)
433 " .long 13b,16b\n" 425 _ASM_EXTABLE(14b,16b)
434 " .long 81b,16b\n" 426 _ASM_EXTABLE(91b,16b)
435 " .long 14b,16b\n" 427 _ASM_EXTABLE(6b,9b)
436 " .long 91b,16b\n" 428 _ASM_EXTABLE(7b,16b)
437 " .long 6b,9b\n"
438 " .long 7b,16b\n"
439 ".previous"
440 : "=&c"(size), "=&D" (d0), "=&S" (d1) 429 : "=&c"(size), "=&D" (d0), "=&S" (d1)
441 : "1"(to), "2"(from), "0"(size) 430 : "1"(to), "2"(from), "0"(size)
442 : "eax", "edx", "memory"); 431 : "eax", "edx", "memory");
@@ -505,29 +494,26 @@ static unsigned long __copy_user_intel_nocache(void *to,
505 "9: lea 0(%%eax,%0,4),%0\n" 494 "9: lea 0(%%eax,%0,4),%0\n"
506 "16: jmp 8b\n" 495 "16: jmp 8b\n"
507 ".previous\n" 496 ".previous\n"
508 ".section __ex_table,\"a\"\n" 497 _ASM_EXTABLE(0b,16b)
509 " .align 4\n" 498 _ASM_EXTABLE(1b,16b)
510 " .long 0b,16b\n" 499 _ASM_EXTABLE(2b,16b)
511 " .long 1b,16b\n" 500 _ASM_EXTABLE(21b,16b)
512 " .long 2b,16b\n" 501 _ASM_EXTABLE(3b,16b)
513 " .long 21b,16b\n" 502 _ASM_EXTABLE(31b,16b)
514 " .long 3b,16b\n" 503 _ASM_EXTABLE(4b,16b)
515 " .long 31b,16b\n" 504 _ASM_EXTABLE(41b,16b)
516 " .long 4b,16b\n" 505 _ASM_EXTABLE(10b,16b)
517 " .long 41b,16b\n" 506 _ASM_EXTABLE(51b,16b)
518 " .long 10b,16b\n" 507 _ASM_EXTABLE(11b,16b)
519 " .long 51b,16b\n" 508 _ASM_EXTABLE(61b,16b)
520 " .long 11b,16b\n" 509 _ASM_EXTABLE(12b,16b)
521 " .long 61b,16b\n" 510 _ASM_EXTABLE(71b,16b)
522 " .long 12b,16b\n" 511 _ASM_EXTABLE(13b,16b)
523 " .long 71b,16b\n" 512 _ASM_EXTABLE(81b,16b)
524 " .long 13b,16b\n" 513 _ASM_EXTABLE(14b,16b)
525 " .long 81b,16b\n" 514 _ASM_EXTABLE(91b,16b)
526 " .long 14b,16b\n" 515 _ASM_EXTABLE(6b,9b)
527 " .long 91b,16b\n" 516 _ASM_EXTABLE(7b,16b)
528 " .long 6b,9b\n"
529 " .long 7b,16b\n"
530 ".previous"
531 : "=&c"(size), "=&D" (d0), "=&S" (d1) 517 : "=&c"(size), "=&D" (d0), "=&S" (d1)
532 : "1"(to), "2"(from), "0"(size) 518 : "1"(to), "2"(from), "0"(size)
533 : "eax", "edx", "memory"); 519 : "eax", "edx", "memory");
@@ -574,12 +560,9 @@ do { \
574 "3: lea 0(%3,%0,4),%0\n" \ 560 "3: lea 0(%3,%0,4),%0\n" \
575 " jmp 2b\n" \ 561 " jmp 2b\n" \
576 ".previous\n" \ 562 ".previous\n" \
577 ".section __ex_table,\"a\"\n" \ 563 _ASM_EXTABLE(4b,5b) \
578 " .align 4\n" \ 564 _ASM_EXTABLE(0b,3b) \
579 " .long 4b,5b\n" \ 565 _ASM_EXTABLE(1b,2b) \
580 " .long 0b,3b\n" \
581 " .long 1b,2b\n" \
582 ".previous" \
583 : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ 566 : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
584 : "3"(size), "0"(size), "1"(to), "2"(from) \ 567 : "3"(size), "0"(size), "1"(to), "2"(from) \
585 : "memory"); \ 568 : "memory"); \
@@ -616,12 +599,9 @@ do { \
616 " popl %0\n" \ 599 " popl %0\n" \
617 " jmp 2b\n" \ 600 " jmp 2b\n" \
618 ".previous\n" \ 601 ".previous\n" \
619 ".section __ex_table,\"a\"\n" \ 602 _ASM_EXTABLE(4b,5b) \
620 " .align 4\n" \ 603 _ASM_EXTABLE(0b,3b) \
621 " .long 4b,5b\n" \ 604 _ASM_EXTABLE(1b,6b) \
622 " .long 0b,3b\n" \
623 " .long 1b,6b\n" \
624 ".previous" \
625 : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ 605 : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
626 : "3"(size), "0"(size), "1"(to), "2"(from) \ 606 : "3"(size), "0"(size), "1"(to), "2"(from) \
627 : "memory"); \ 607 : "memory"); \
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 1fb85dbe390a..903ec1e9c326 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,11 +1,23 @@
1#include <linux/module.h> 1#include <linux/module.h>
2#include <linux/spinlock.h> 2#include <linux/spinlock.h>
3#include <linux/sort.h>
3#include <asm/uaccess.h> 4#include <asm/uaccess.h>
4 5
6static inline unsigned long
7ex_insn_addr(const struct exception_table_entry *x)
8{
9 return (unsigned long)&x->insn + x->insn;
10}
11static inline unsigned long
12ex_fixup_addr(const struct exception_table_entry *x)
13{
14 return (unsigned long)&x->fixup + x->fixup;
15}
5 16
6int fixup_exception(struct pt_regs *regs) 17int fixup_exception(struct pt_regs *regs)
7{ 18{
8 const struct exception_table_entry *fixup; 19 const struct exception_table_entry *fixup;
20 unsigned long new_ip;
9 21
10#ifdef CONFIG_PNPBIOS 22#ifdef CONFIG_PNPBIOS
11 if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { 23 if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
@@ -23,15 +35,135 @@ int fixup_exception(struct pt_regs *regs)
23 35
24 fixup = search_exception_tables(regs->ip); 36 fixup = search_exception_tables(regs->ip);
25 if (fixup) { 37 if (fixup) {
26 /* If fixup is less than 16, it means uaccess error */ 38 new_ip = ex_fixup_addr(fixup);
27 if (fixup->fixup < 16) { 39
40 if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
41 /* Special hack for uaccess_err */
28 current_thread_info()->uaccess_err = 1; 42 current_thread_info()->uaccess_err = 1;
29 regs->ip += fixup->fixup; 43 new_ip -= 0x7ffffff0;
30 return 1;
31 } 44 }
32 regs->ip = fixup->fixup; 45 regs->ip = new_ip;
33 return 1; 46 return 1;
34 } 47 }
35 48
36 return 0; 49 return 0;
37} 50}
51
52/* Restricted version used during very early boot */
53int __init early_fixup_exception(unsigned long *ip)
54{
55 const struct exception_table_entry *fixup;
56 unsigned long new_ip;
57
58 fixup = search_exception_tables(*ip);
59 if (fixup) {
60 new_ip = ex_fixup_addr(fixup);
61
62 if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
63 /* uaccess handling not supported during early boot */
64 return 0;
65 }
66
67 *ip = new_ip;
68 return 1;
69 }
70
71 return 0;
72}
73
74/*
75 * Search one exception table for an entry corresponding to the
76 * given instruction address, and return the address of the entry,
77 * or NULL if none is found.
78 * We use a binary search, and thus we assume that the table is
79 * already sorted.
80 */
81const struct exception_table_entry *
82search_extable(const struct exception_table_entry *first,
83 const struct exception_table_entry *last,
84 unsigned long value)
85{
86 while (first <= last) {
87 const struct exception_table_entry *mid;
88 unsigned long addr;
89
90 mid = ((last - first) >> 1) + first;
91 addr = ex_insn_addr(mid);
92 if (addr < value)
93 first = mid + 1;
94 else if (addr > value)
95 last = mid - 1;
96 else
97 return mid;
98 }
99 return NULL;
100}
101
102/*
103 * The exception table needs to be sorted so that the binary
104 * search that we use to find entries in it works properly.
105 * This is used both for the kernel exception table and for
106 * the exception tables of modules that get loaded.
107 *
108 */
109static int cmp_ex(const void *a, const void *b)
110{
111 const struct exception_table_entry *x = a, *y = b;
112
113 /*
114 * This value will always end up fittin in an int, because on
115 * both i386 and x86-64 the kernel symbol-reachable address
116 * space is < 2 GiB.
117 *
118 * This compare is only valid after normalization.
119 */
120 return x->insn - y->insn;
121}
122
123void sort_extable(struct exception_table_entry *start,
124 struct exception_table_entry *finish)
125{
126 struct exception_table_entry *p;
127 int i;
128
129 /* Convert all entries to being relative to the start of the section */
130 i = 0;
131 for (p = start; p < finish; p++) {
132 p->insn += i;
133 i += 4;
134 p->fixup += i;
135 i += 4;
136 }
137
138 sort(start, finish - start, sizeof(struct exception_table_entry),
139 cmp_ex, NULL);
140
141 /* Denormalize all entries */
142 i = 0;
143 for (p = start; p < finish; p++) {
144 p->insn -= i;
145 i += 4;
146 p->fixup -= i;
147 i += 4;
148 }
149}
150
151#ifdef CONFIG_MODULES
152/*
153 * If the exception table is sorted, any referring to the module init
154 * will be at the beginning or the end.
155 */
156void trim_init_extable(struct module *m)
157{
158 /*trim the beginning*/
159 while (m->num_exentries &&
160 within_module_init(ex_insn_addr(&m->extable[0]), m)) {
161 m->extable++;
162 m->num_exentries--;
163 }
164 /*trim the end*/
165 while (m->num_exentries &&
166 within_module_init(ex_insn_addr(&m->extable[m->num_exentries-1]), m))
167 m->num_exentries--;
168}
169#endif /* CONFIG_MODULES */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 4f0cec7e4ffb..319b6f2fb8b9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -29,8 +29,14 @@ int direct_gbpages
29#endif 29#endif
30; 30;
31 31
32static void __init find_early_table_space(unsigned long end, int use_pse, 32struct map_range {
33 int use_gbpages) 33 unsigned long start;
34 unsigned long end;
35 unsigned page_size_mask;
36};
37
38static void __init find_early_table_space(struct map_range *mr, unsigned long end,
39 int use_pse, int use_gbpages)
34{ 40{
35 unsigned long puds, pmds, ptes, tables, start = 0, good_end = end; 41 unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
36 phys_addr_t base; 42 phys_addr_t base;
@@ -55,6 +61,9 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
55#ifdef CONFIG_X86_32 61#ifdef CONFIG_X86_32
56 extra += PMD_SIZE; 62 extra += PMD_SIZE;
57#endif 63#endif
64 /* The first 2/4M doesn't use large pages. */
65 extra += mr->end - mr->start;
66
58 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; 67 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
59 } else 68 } else
60 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; 69 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -84,12 +93,6 @@ void __init native_pagetable_reserve(u64 start, u64 end)
84 memblock_reserve(start, end - start); 93 memblock_reserve(start, end - start);
85} 94}
86 95
87struct map_range {
88 unsigned long start;
89 unsigned long end;
90 unsigned page_size_mask;
91};
92
93#ifdef CONFIG_X86_32 96#ifdef CONFIG_X86_32
94#define NR_RANGE_MR 3 97#define NR_RANGE_MR 3
95#else /* CONFIG_X86_64 */ 98#else /* CONFIG_X86_64 */
@@ -261,7 +264,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
261 * nodes are discovered. 264 * nodes are discovered.
262 */ 265 */
263 if (!after_bootmem) 266 if (!after_bootmem)
264 find_early_table_space(end, use_pse, use_gbpages); 267 find_early_table_space(&mr[0], end, use_pse, use_gbpages);
265 268
266 for (i = 0; i < nr_range; i++) 269 for (i = 0; i < nr_range; i++)
267 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, 270 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index fc18be0f6f29..2b6b4a3c8beb 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -407,12 +407,12 @@ static unsigned long __meminit
407phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, 407phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
408 unsigned long page_size_mask, pgprot_t prot) 408 unsigned long page_size_mask, pgprot_t prot)
409{ 409{
410 unsigned long pages = 0; 410 unsigned long pages = 0, next;
411 unsigned long last_map_addr = end; 411 unsigned long last_map_addr = end;
412 412
413 int i = pmd_index(address); 413 int i = pmd_index(address);
414 414
415 for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { 415 for (; i < PTRS_PER_PMD; i++, address = next) {
416 unsigned long pte_phys; 416 unsigned long pte_phys;
417 pmd_t *pmd = pmd_page + pmd_index(address); 417 pmd_t *pmd = pmd_page + pmd_index(address);
418 pte_t *pte; 418 pte_t *pte;
@@ -426,6 +426,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
426 break; 426 break;
427 } 427 }
428 428
429 next = (address & PMD_MASK) + PMD_SIZE;
430
429 if (pmd_val(*pmd)) { 431 if (pmd_val(*pmd)) {
430 if (!pmd_large(*pmd)) { 432 if (!pmd_large(*pmd)) {
431 spin_lock(&init_mm.page_table_lock); 433 spin_lock(&init_mm.page_table_lock);
@@ -449,7 +451,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
449 * attributes. 451 * attributes.
450 */ 452 */
451 if (page_size_mask & (1 << PG_LEVEL_2M)) { 453 if (page_size_mask & (1 << PG_LEVEL_2M)) {
452 pages++; 454 last_map_addr = next;
453 continue; 455 continue;
454 } 456 }
455 new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); 457 new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
@@ -462,7 +464,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
462 pfn_pte(address >> PAGE_SHIFT, 464 pfn_pte(address >> PAGE_SHIFT,
463 __pgprot(pgprot_val(prot) | _PAGE_PSE))); 465 __pgprot(pgprot_val(prot) | _PAGE_PSE)));
464 spin_unlock(&init_mm.page_table_lock); 466 spin_unlock(&init_mm.page_table_lock);
465 last_map_addr = (address & PMD_MASK) + PMD_SIZE; 467 last_map_addr = next;
466 continue; 468 continue;
467 } 469 }
468 470
@@ -482,11 +484,11 @@ static unsigned long __meminit
482phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, 484phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
483 unsigned long page_size_mask) 485 unsigned long page_size_mask)
484{ 486{
485 unsigned long pages = 0; 487 unsigned long pages = 0, next;
486 unsigned long last_map_addr = end; 488 unsigned long last_map_addr = end;
487 int i = pud_index(addr); 489 int i = pud_index(addr);
488 490
489 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) { 491 for (; i < PTRS_PER_PUD; i++, addr = next) {
490 unsigned long pmd_phys; 492 unsigned long pmd_phys;
491 pud_t *pud = pud_page + pud_index(addr); 493 pud_t *pud = pud_page + pud_index(addr);
492 pmd_t *pmd; 494 pmd_t *pmd;
@@ -495,8 +497,9 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
495 if (addr >= end) 497 if (addr >= end)
496 break; 498 break;
497 499
498 if (!after_bootmem && 500 next = (addr & PUD_MASK) + PUD_SIZE;
499 !e820_any_mapped(addr, addr+PUD_SIZE, 0)) { 501
502 if (!after_bootmem && !e820_any_mapped(addr, next, 0)) {
500 set_pud(pud, __pud(0)); 503 set_pud(pud, __pud(0));
501 continue; 504 continue;
502 } 505 }
@@ -523,7 +526,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
523 * attributes. 526 * attributes.
524 */ 527 */
525 if (page_size_mask & (1 << PG_LEVEL_1G)) { 528 if (page_size_mask & (1 << PG_LEVEL_1G)) {
526 pages++; 529 last_map_addr = next;
527 continue; 530 continue;
528 } 531 }
529 prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); 532 prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
@@ -535,7 +538,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
535 set_pte((pte_t *)pud, 538 set_pte((pte_t *)pud,
536 pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); 539 pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
537 spin_unlock(&init_mm.page_table_lock); 540 spin_unlock(&init_mm.page_table_lock);
538 last_map_addr = (addr & PUD_MASK) + PUD_SIZE; 541 last_map_addr = next;
539 continue; 542 continue;
540 } 543 }
541 544
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 53489ff6bf82..871dd8868170 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -339,9 +339,11 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
339 } else { 339 } else {
340 unsigned long n; 340 unsigned long n;
341 341
342 n = simple_strtoul(emu_cmdline, NULL, 0); 342 n = simple_strtoul(emu_cmdline, &emu_cmdline, 0);
343 ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n); 343 ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n);
344 } 344 }
345 if (*emu_cmdline == ':')
346 emu_cmdline++;
345 347
346 if (ret < 0) 348 if (ret < 0)
347 goto no_emu; 349 goto no_emu;
@@ -418,7 +420,9 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
418 int physj = emu_nid_to_phys[j]; 420 int physj = emu_nid_to_phys[j];
419 int dist; 421 int dist;
420 422
421 if (physi >= numa_dist_cnt || physj >= numa_dist_cnt) 423 if (get_option(&emu_cmdline, &dist) == 2)
424 ;
425 else if (physi >= numa_dist_cnt || physj >= numa_dist_cnt)
422 dist = physi == physj ? 426 dist = physi == physj ?
423 LOCAL_DISTANCE : REMOTE_DISTANCE; 427 LOCAL_DISTANCE : REMOTE_DISTANCE;
424 else 428 else
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index d6c0418c3e47..5e57e113b72c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -61,11 +61,13 @@ static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
61 */ 61 */
62void leave_mm(int cpu) 62void leave_mm(int cpu)
63{ 63{
64 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) 64 struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm);
65 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
65 BUG(); 66 BUG();
66 cpumask_clear_cpu(cpu, 67 if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
67 mm_cpumask(percpu_read(cpu_tlbstate.active_mm))); 68 cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
68 load_cr3(swapper_pg_dir); 69 load_cr3(swapper_pg_dir);
70 }
69} 71}
70EXPORT_SYMBOL_GPL(leave_mm); 72EXPORT_SYMBOL_GPL(leave_mm);
71 73
@@ -152,8 +154,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
152 * BUG(); 154 * BUG();
153 */ 155 */
154 156
155 if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) { 157 if (f->flush_mm == this_cpu_read(cpu_tlbstate.active_mm)) {
156 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { 158 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
157 if (f->flush_va == TLB_FLUSH_ALL) 159 if (f->flush_va == TLB_FLUSH_ALL)
158 local_flush_tlb(); 160 local_flush_tlb();
159 else 161 else
@@ -322,7 +324,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
322static void do_flush_tlb_all(void *info) 324static void do_flush_tlb_all(void *info)
323{ 325{
324 __flush_tlb_all(); 326 __flush_tlb_all();
325 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) 327 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
326 leave_mm(smp_processor_id()); 328 leave_mm(smp_processor_id());
327} 329}
328 330
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index e76e18c94a3c..3af5a1e79c9c 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -11,6 +11,8 @@ obj-$(CONFIG_X86_INTEL_CE) += ce4100.o
11obj-$(CONFIG_ACPI) += acpi.o 11obj-$(CONFIG_ACPI) += acpi.o
12obj-y += legacy.o irq.o 12obj-y += legacy.o irq.o
13 13
14obj-$(CONFIG_STA2X11) += sta2x11-fixup.o
15
14obj-$(CONFIG_X86_VISWS) += visws.o 16obj-$(CONFIG_X86_VISWS) += visws.o
15 17
16obj-$(CONFIG_X86_NUMAQ) += numaq_32.o 18obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index ed2835e148b5..fc09c2754e08 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -9,11 +9,11 @@
9 9
10struct pci_root_info { 10struct pci_root_info {
11 struct acpi_device *bridge; 11 struct acpi_device *bridge;
12 char *name; 12 char name[16];
13 unsigned int res_num; 13 unsigned int res_num;
14 struct resource *res; 14 struct resource *res;
15 struct list_head *resources;
16 int busnum; 15 int busnum;
16 struct pci_sysdata sd;
17}; 17};
18 18
19static bool pci_use_crs = true; 19static bool pci_use_crs = true;
@@ -245,13 +245,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
245 return AE_OK; 245 return AE_OK;
246} 246}
247 247
248static bool resource_contains(struct resource *res, resource_size_t point)
249{
250 if (res->start <= point && point <= res->end)
251 return true;
252 return false;
253}
254
255static void coalesce_windows(struct pci_root_info *info, unsigned long type) 248static void coalesce_windows(struct pci_root_info *info, unsigned long type)
256{ 249{
257 int i, j; 250 int i, j;
@@ -272,10 +265,7 @@ static void coalesce_windows(struct pci_root_info *info, unsigned long type)
272 * our resources no longer match the ACPI _CRS, but 265 * our resources no longer match the ACPI _CRS, but
273 * the kernel resource tree doesn't allow overlaps. 266 * the kernel resource tree doesn't allow overlaps.
274 */ 267 */
275 if (resource_contains(res1, res2->start) || 268 if (resource_overlaps(res1, res2)) {
276 resource_contains(res1, res2->end) ||
277 resource_contains(res2, res1->start) ||
278 resource_contains(res2, res1->end)) {
279 res1->start = min(res1->start, res2->start); 269 res1->start = min(res1->start, res2->start);
280 res1->end = max(res1->end, res2->end); 270 res1->end = max(res1->end, res2->end);
281 dev_info(&info->bridge->dev, 271 dev_info(&info->bridge->dev,
@@ -287,7 +277,8 @@ static void coalesce_windows(struct pci_root_info *info, unsigned long type)
287 } 277 }
288} 278}
289 279
290static void add_resources(struct pci_root_info *info) 280static void add_resources(struct pci_root_info *info,
281 struct list_head *resources)
291{ 282{
292 int i; 283 int i;
293 struct resource *res, *root, *conflict; 284 struct resource *res, *root, *conflict;
@@ -311,53 +302,74 @@ static void add_resources(struct pci_root_info *info)
311 "ignoring host bridge window %pR (conflicts with %s %pR)\n", 302 "ignoring host bridge window %pR (conflicts with %s %pR)\n",
312 res, conflict->name, conflict); 303 res, conflict->name, conflict);
313 else 304 else
314 pci_add_resource(info->resources, res); 305 pci_add_resource(resources, res);
315 } 306 }
316} 307}
317 308
309static void free_pci_root_info_res(struct pci_root_info *info)
310{
311 kfree(info->res);
312 info->res = NULL;
313 info->res_num = 0;
314}
315
316static void __release_pci_root_info(struct pci_root_info *info)
317{
318 int i;
319 struct resource *res;
320
321 for (i = 0; i < info->res_num; i++) {
322 res = &info->res[i];
323
324 if (!res->parent)
325 continue;
326
327 if (!(res->flags & (IORESOURCE_MEM | IORESOURCE_IO)))
328 continue;
329
330 release_resource(res);
331 }
332
333 free_pci_root_info_res(info);
334
335 kfree(info);
336}
337static void release_pci_root_info(struct pci_host_bridge *bridge)
338{
339 struct pci_root_info *info = bridge->release_data;
340
341 __release_pci_root_info(info);
342}
343
318static void 344static void
319get_current_resources(struct acpi_device *device, int busnum, 345probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device,
320 int domain, struct list_head *resources) 346 int busnum, int domain)
321{ 347{
322 struct pci_root_info info;
323 size_t size; 348 size_t size;
324 349
325 info.bridge = device; 350 info->bridge = device;
326 info.res_num = 0; 351 info->res_num = 0;
327 info.resources = resources;
328 acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, 352 acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource,
329 &info); 353 info);
330 if (!info.res_num) 354 if (!info->res_num)
331 return; 355 return;
332 356
333 size = sizeof(*info.res) * info.res_num; 357 size = sizeof(*info->res) * info->res_num;
334 info.res = kmalloc(size, GFP_KERNEL); 358 info->res_num = 0;
335 if (!info.res) 359 info->res = kmalloc(size, GFP_KERNEL);
360 if (!info->res)
336 return; 361 return;
337 362
338 info.name = kasprintf(GFP_KERNEL, "PCI Bus %04x:%02x", domain, busnum); 363 sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum);
339 if (!info.name)
340 goto name_alloc_fail;
341 364
342 info.res_num = 0;
343 acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, 365 acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource,
344 &info); 366 info);
345
346 if (pci_use_crs) {
347 add_resources(&info);
348
349 return;
350 }
351
352 kfree(info.name);
353
354name_alloc_fail:
355 kfree(info.res);
356} 367}
357 368
358struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) 369struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
359{ 370{
360 struct acpi_device *device = root->device; 371 struct acpi_device *device = root->device;
372 struct pci_root_info *info = NULL;
361 int domain = root->segment; 373 int domain = root->segment;
362 int busnum = root->secondary.start; 374 int busnum = root->secondary.start;
363 LIST_HEAD(resources); 375 LIST_HEAD(resources);
@@ -389,17 +401,14 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
389 if (node != -1 && !node_online(node)) 401 if (node != -1 && !node_online(node))
390 node = -1; 402 node = -1;
391 403
392 /* Allocate per-root-bus (not per bus) arch-specific data. 404 info = kzalloc(sizeof(*info), GFP_KERNEL);
393 * TODO: leak; this memory is never freed. 405 if (!info) {
394 * It's arguable whether it's worth the trouble to care.
395 */
396 sd = kzalloc(sizeof(*sd), GFP_KERNEL);
397 if (!sd) {
398 printk(KERN_WARNING "pci_bus %04x:%02x: " 406 printk(KERN_WARNING "pci_bus %04x:%02x: "
399 "ignored (out of memory)\n", domain, busnum); 407 "ignored (out of memory)\n", domain, busnum);
400 return NULL; 408 return NULL;
401 } 409 }
402 410
411 sd = &info->sd;
403 sd->domain = domain; 412 sd->domain = domain;
404 sd->node = node; 413 sd->node = node;
405 /* 414 /*
@@ -413,22 +422,32 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
413 * be replaced by sd. 422 * be replaced by sd.
414 */ 423 */
415 memcpy(bus->sysdata, sd, sizeof(*sd)); 424 memcpy(bus->sysdata, sd, sizeof(*sd));
416 kfree(sd); 425 kfree(info);
417 } else { 426 } else {
418 get_current_resources(device, busnum, domain, &resources); 427 probe_pci_root_info(info, device, busnum, domain);
419 428
420 /* 429 /*
421 * _CRS with no apertures is normal, so only fall back to 430 * _CRS with no apertures is normal, so only fall back to
422 * defaults or native bridge info if we're ignoring _CRS. 431 * defaults or native bridge info if we're ignoring _CRS.
423 */ 432 */
424 if (!pci_use_crs) 433 if (pci_use_crs)
434 add_resources(info, &resources);
435 else {
436 free_pci_root_info_res(info);
425 x86_pci_root_bus_resources(busnum, &resources); 437 x86_pci_root_bus_resources(busnum, &resources);
438 }
439
426 bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd, 440 bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd,
427 &resources); 441 &resources);
428 if (bus) 442 if (bus) {
429 bus->subordinate = pci_scan_child_bus(bus); 443 bus->subordinate = pci_scan_child_bus(bus);
430 else 444 pci_set_host_bridge_release(
445 to_pci_host_bridge(bus->bridge),
446 release_pci_root_info, info);
447 } else {
431 pci_free_resource_list(&resources); 448 pci_free_resource_list(&resources);
449 __release_pci_root_info(info);
450 }
432 } 451 }
433 452
434 /* After the PCI-E bus has been walked and all devices discovered, 453 /* After the PCI-E bus has been walked and all devices discovered,
@@ -445,9 +464,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
445 } 464 }
446 } 465 }
447 466
448 if (!bus)
449 kfree(sd);
450
451 if (bus && node != -1) { 467 if (bus && node != -1) {
452#ifdef CONFIG_ACPI_NUMA 468#ifdef CONFIG_ACPI_NUMA
453 if (pxm >= 0) 469 if (pxm >= 0)
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 0567df3890e1..5aed49bff058 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -32,6 +32,27 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = {
32 32
33#define RANGE_NUM 16 33#define RANGE_NUM 16
34 34
35static struct pci_root_info __init *find_pci_root_info(int node, int link)
36{
37 struct pci_root_info *info;
38
39 /* find the position */
40 list_for_each_entry(info, &pci_root_infos, list)
41 if (info->node == node && info->link == link)
42 return info;
43
44 return NULL;
45}
46
47static void __init set_mp_bus_range_to_node(int min_bus, int max_bus, int node)
48{
49#ifdef CONFIG_NUMA
50 int j;
51
52 for (j = min_bus; j <= max_bus; j++)
53 set_mp_bus_to_node(j, node);
54#endif
55}
35/** 56/**
36 * early_fill_mp_bus_to_node() 57 * early_fill_mp_bus_to_node()
37 * called before pcibios_scan_root and pci_scan_bus 58 * called before pcibios_scan_root and pci_scan_bus
@@ -41,7 +62,6 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = {
41static int __init early_fill_mp_bus_info(void) 62static int __init early_fill_mp_bus_info(void)
42{ 63{
43 int i; 64 int i;
44 int j;
45 unsigned bus; 65 unsigned bus;
46 unsigned slot; 66 unsigned slot;
47 int node; 67 int node;
@@ -50,7 +70,6 @@ static int __init early_fill_mp_bus_info(void)
50 int def_link; 70 int def_link;
51 struct pci_root_info *info; 71 struct pci_root_info *info;
52 u32 reg; 72 u32 reg;
53 struct resource *res;
54 u64 start; 73 u64 start;
55 u64 end; 74 u64 end;
56 struct range range[RANGE_NUM]; 75 struct range range[RANGE_NUM];
@@ -86,7 +105,6 @@ static int __init early_fill_mp_bus_info(void)
86 if (!found) 105 if (!found)
87 return 0; 106 return 0;
88 107
89 pci_root_num = 0;
90 for (i = 0; i < 4; i++) { 108 for (i = 0; i < 4; i++) {
91 int min_bus; 109 int min_bus;
92 int max_bus; 110 int max_bus;
@@ -99,19 +117,11 @@ static int __init early_fill_mp_bus_info(void)
99 min_bus = (reg >> 16) & 0xff; 117 min_bus = (reg >> 16) & 0xff;
100 max_bus = (reg >> 24) & 0xff; 118 max_bus = (reg >> 24) & 0xff;
101 node = (reg >> 4) & 0x07; 119 node = (reg >> 4) & 0x07;
102#ifdef CONFIG_NUMA 120 set_mp_bus_range_to_node(min_bus, max_bus, node);
103 for (j = min_bus; j <= max_bus; j++)
104 set_mp_bus_to_node(j, node);
105#endif
106 link = (reg >> 8) & 0x03; 121 link = (reg >> 8) & 0x03;
107 122
108 info = &pci_root_info[pci_root_num]; 123 info = alloc_pci_root_info(min_bus, max_bus, node, link);
109 info->bus_min = min_bus;
110 info->bus_max = max_bus;
111 info->node = node;
112 info->link = link;
113 sprintf(info->name, "PCI Bus #%02x", min_bus); 124 sprintf(info->name, "PCI Bus #%02x", min_bus);
114 pci_root_num++;
115 } 125 }
116 126
117 /* get the default node and link for left over res */ 127 /* get the default node and link for left over res */
@@ -134,16 +144,10 @@ static int __init early_fill_mp_bus_info(void)
134 link = (reg >> 4) & 0x03; 144 link = (reg >> 4) & 0x03;
135 end = (reg & 0xfff000) | 0xfff; 145 end = (reg & 0xfff000) | 0xfff;
136 146
137 /* find the position */ 147 info = find_pci_root_info(node, link);
138 for (j = 0; j < pci_root_num; j++) { 148 if (!info)
139 info = &pci_root_info[j];
140 if (info->node == node && info->link == link)
141 break;
142 }
143 if (j == pci_root_num)
144 continue; /* not found */ 149 continue; /* not found */
145 150
146 info = &pci_root_info[j];
147 printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", 151 printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n",
148 node, link, start, end); 152 node, link, start, end);
149 153
@@ -155,13 +159,8 @@ static int __init early_fill_mp_bus_info(void)
155 } 159 }
156 /* add left over io port range to def node/link, [0, 0xffff] */ 160 /* add left over io port range to def node/link, [0, 0xffff] */
157 /* find the position */ 161 /* find the position */
158 for (j = 0; j < pci_root_num; j++) { 162 info = find_pci_root_info(def_node, def_link);
159 info = &pci_root_info[j]; 163 if (info) {
160 if (info->node == def_node && info->link == def_link)
161 break;
162 }
163 if (j < pci_root_num) {
164 info = &pci_root_info[j];
165 for (i = 0; i < RANGE_NUM; i++) { 164 for (i = 0; i < RANGE_NUM; i++) {
166 if (!range[i].end) 165 if (!range[i].end)
167 continue; 166 continue;
@@ -214,16 +213,10 @@ static int __init early_fill_mp_bus_info(void)
214 end <<= 8; 213 end <<= 8;
215 end |= 0xffff; 214 end |= 0xffff;
216 215
217 /* find the position */ 216 info = find_pci_root_info(node, link);
218 for (j = 0; j < pci_root_num; j++) {
219 info = &pci_root_info[j];
220 if (info->node == node && info->link == link)
221 break;
222 }
223 if (j == pci_root_num)
224 continue; /* not found */
225 217
226 info = &pci_root_info[j]; 218 if (!info)
219 continue;
227 220
228 printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", 221 printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]",
229 node, link, start, end); 222 node, link, start, end);
@@ -291,14 +284,8 @@ static int __init early_fill_mp_bus_info(void)
291 * add left over mmio range to def node/link ? 284 * add left over mmio range to def node/link ?
292 * that is tricky, just record range in from start_min to 4G 285 * that is tricky, just record range in from start_min to 4G
293 */ 286 */
294 for (j = 0; j < pci_root_num; j++) { 287 info = find_pci_root_info(def_node, def_link);
295 info = &pci_root_info[j]; 288 if (info) {
296 if (info->node == def_node && info->link == def_link)
297 break;
298 }
299 if (j < pci_root_num) {
300 info = &pci_root_info[j];
301
302 for (i = 0; i < RANGE_NUM; i++) { 289 for (i = 0; i < RANGE_NUM; i++) {
303 if (!range[i].end) 290 if (!range[i].end)
304 continue; 291 continue;
@@ -309,20 +296,16 @@ static int __init early_fill_mp_bus_info(void)
309 } 296 }
310 } 297 }
311 298
312 for (i = 0; i < pci_root_num; i++) { 299 list_for_each_entry(info, &pci_root_infos, list) {
313 int res_num;
314 int busnum; 300 int busnum;
301 struct pci_root_res *root_res;
315 302
316 info = &pci_root_info[i];
317 res_num = info->res_num;
318 busnum = info->bus_min; 303 busnum = info->bus_min;
319 printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n", 304 printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n",
320 info->bus_min, info->bus_max, info->node, info->link); 305 info->bus_min, info->bus_max, info->node, info->link);
321 for (j = 0; j < res_num; j++) { 306 list_for_each_entry(root_res, &info->resources, list)
322 res = &info->res[j]; 307 printk(KERN_DEBUG "bus: %02x %pR\n",
323 printk(KERN_DEBUG "bus: %02x index %x %pR\n", 308 busnum, &root_res->res);
324 busnum, j, res);
325 }
326 } 309 }
327 310
328 return 0; 311 return 0;
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c
index f3a7c569a403..614392ced7d6 100644
--- a/arch/x86/pci/broadcom_bus.c
+++ b/arch/x86/pci/broadcom_bus.c
@@ -22,19 +22,15 @@
22static void __init cnb20le_res(u8 bus, u8 slot, u8 func) 22static void __init cnb20le_res(u8 bus, u8 slot, u8 func)
23{ 23{
24 struct pci_root_info *info; 24 struct pci_root_info *info;
25 struct pci_root_res *root_res;
25 struct resource res; 26 struct resource res;
26 u16 word1, word2; 27 u16 word1, word2;
27 u8 fbus, lbus; 28 u8 fbus, lbus;
28 int i;
29
30 info = &pci_root_info[pci_root_num];
31 pci_root_num++;
32 29
33 /* read the PCI bus numbers */ 30 /* read the PCI bus numbers */
34 fbus = read_pci_config_byte(bus, slot, func, 0x44); 31 fbus = read_pci_config_byte(bus, slot, func, 0x44);
35 lbus = read_pci_config_byte(bus, slot, func, 0x45); 32 lbus = read_pci_config_byte(bus, slot, func, 0x45);
36 info->bus_min = fbus; 33 info = alloc_pci_root_info(fbus, lbus, 0, 0);
37 info->bus_max = lbus;
38 34
39 /* 35 /*
40 * Add the legacy IDE ports on bus 0 36 * Add the legacy IDE ports on bus 0
@@ -86,8 +82,8 @@ static void __init cnb20le_res(u8 bus, u8 slot, u8 func)
86 res.flags = IORESOURCE_BUS; 82 res.flags = IORESOURCE_BUS;
87 printk(KERN_INFO "CNB20LE PCI Host Bridge (domain 0000 %pR)\n", &res); 83 printk(KERN_INFO "CNB20LE PCI Host Bridge (domain 0000 %pR)\n", &res);
88 84
89 for (i = 0; i < info->res_num; i++) 85 list_for_each_entry(root_res, &info->resources, list)
90 printk(KERN_INFO "host bridge window %pR\n", &info->res[i]); 86 printk(KERN_INFO "host bridge window %pR\n", &root_res->res);
91} 87}
92 88
93static int __init broadcom_postcore_init(void) 89static int __init broadcom_postcore_init(void)
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index fd3f65510e9d..306579f7d0fd 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -4,35 +4,38 @@
4 4
5#include "bus_numa.h" 5#include "bus_numa.h"
6 6
7int pci_root_num; 7LIST_HEAD(pci_root_infos);
8struct pci_root_info pci_root_info[PCI_ROOT_NR];
9 8
10void x86_pci_root_bus_resources(int bus, struct list_head *resources) 9static struct pci_root_info *x86_find_pci_root_info(int bus)
11{ 10{
12 int i;
13 int j;
14 struct pci_root_info *info; 11 struct pci_root_info *info;
15 12
16 if (!pci_root_num) 13 if (list_empty(&pci_root_infos))
17 goto default_resources; 14 return NULL;
18 15
19 for (i = 0; i < pci_root_num; i++) { 16 list_for_each_entry(info, &pci_root_infos, list)
20 if (pci_root_info[i].bus_min == bus) 17 if (info->bus_min == bus)
21 break; 18 return info;
22 } 19
20 return NULL;
21}
23 22
24 if (i == pci_root_num) 23void x86_pci_root_bus_resources(int bus, struct list_head *resources)
24{
25 struct pci_root_info *info = x86_find_pci_root_info(bus);
26 struct pci_root_res *root_res;
27
28 if (!info)
25 goto default_resources; 29 goto default_resources;
26 30
27 printk(KERN_DEBUG "PCI: root bus %02x: hardware-probed resources\n", 31 printk(KERN_DEBUG "PCI: root bus %02x: hardware-probed resources\n",
28 bus); 32 bus);
29 33
30 info = &pci_root_info[i]; 34 list_for_each_entry(root_res, &info->resources, list) {
31 for (j = 0; j < info->res_num; j++) {
32 struct resource *res; 35 struct resource *res;
33 struct resource *root; 36 struct resource *root;
34 37
35 res = &info->res[j]; 38 res = &root_res->res;
36 pci_add_resource(resources, res); 39 pci_add_resource(resources, res);
37 if (res->flags & IORESOURCE_IO) 40 if (res->flags & IORESOURCE_IO)
38 root = &ioport_resource; 41 root = &ioport_resource;
@@ -53,11 +56,32 @@ default_resources:
53 pci_add_resource(resources, &iomem_resource); 56 pci_add_resource(resources, &iomem_resource);
54} 57}
55 58
59struct pci_root_info __init *alloc_pci_root_info(int bus_min, int bus_max,
60 int node, int link)
61{
62 struct pci_root_info *info;
63
64 info = kzalloc(sizeof(*info), GFP_KERNEL);
65
66 if (!info)
67 return info;
68
69 INIT_LIST_HEAD(&info->resources);
70 info->bus_min = bus_min;
71 info->bus_max = bus_max;
72 info->node = node;
73 info->link = link;
74
75 list_add_tail(&info->list, &pci_root_infos);
76
77 return info;
78}
79
56void __devinit update_res(struct pci_root_info *info, resource_size_t start, 80void __devinit update_res(struct pci_root_info *info, resource_size_t start,
57 resource_size_t end, unsigned long flags, int merge) 81 resource_size_t end, unsigned long flags, int merge)
58{ 82{
59 int i;
60 struct resource *res; 83 struct resource *res;
84 struct pci_root_res *root_res;
61 85
62 if (start > end) 86 if (start > end)
63 return; 87 return;
@@ -69,11 +93,11 @@ void __devinit update_res(struct pci_root_info *info, resource_size_t start,
69 goto addit; 93 goto addit;
70 94
71 /* try to merge it with old one */ 95 /* try to merge it with old one */
72 for (i = 0; i < info->res_num; i++) { 96 list_for_each_entry(root_res, &info->resources, list) {
73 resource_size_t final_start, final_end; 97 resource_size_t final_start, final_end;
74 resource_size_t common_start, common_end; 98 resource_size_t common_start, common_end;
75 99
76 res = &info->res[i]; 100 res = &root_res->res;
77 if (res->flags != flags) 101 if (res->flags != flags)
78 continue; 102 continue;
79 103
@@ -93,14 +117,15 @@ void __devinit update_res(struct pci_root_info *info, resource_size_t start,
93addit: 117addit:
94 118
95 /* need to add that */ 119 /* need to add that */
96 if (info->res_num >= RES_NUM) 120 root_res = kzalloc(sizeof(*root_res), GFP_KERNEL);
121 if (!root_res)
97 return; 122 return;
98 123
99 res = &info->res[info->res_num]; 124 res = &root_res->res;
100 res->name = info->name; 125 res->name = info->name;
101 res->flags = flags; 126 res->flags = flags;
102 res->start = start; 127 res->start = start;
103 res->end = end; 128 res->end = end;
104 res->child = NULL; 129
105 info->res_num++; 130 list_add_tail(&root_res->list, &info->resources);
106} 131}
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h
index 804a4b40c31a..226a466b2b2b 100644
--- a/arch/x86/pci/bus_numa.h
+++ b/arch/x86/pci/bus_numa.h
@@ -4,22 +4,24 @@
4 * sub bus (transparent) will use entres from 3 to store extra from 4 * sub bus (transparent) will use entres from 3 to store extra from
5 * root, so need to make sure we have enough slot there. 5 * root, so need to make sure we have enough slot there.
6 */ 6 */
7#define RES_NUM 16 7struct pci_root_res {
8 struct list_head list;
9 struct resource res;
10};
11
8struct pci_root_info { 12struct pci_root_info {
13 struct list_head list;
9 char name[12]; 14 char name[12];
10 unsigned int res_num; 15 struct list_head resources;
11 struct resource res[RES_NUM];
12 int bus_min; 16 int bus_min;
13 int bus_max; 17 int bus_max;
14 int node; 18 int node;
15 int link; 19 int link;
16}; 20};
17 21
18/* 4 at this time, it may become to 32 */ 22extern struct list_head pci_root_infos;
19#define PCI_ROOT_NR 4 23struct pci_root_info *alloc_pci_root_info(int bus_min, int bus_max,
20extern int pci_root_num; 24 int node, int link);
21extern struct pci_root_info pci_root_info[PCI_ROOT_NR];
22
23extern void update_res(struct pci_root_info *info, resource_size_t start, 25extern void update_res(struct pci_root_info *info, resource_size_t start,
24 resource_size_t end, unsigned long flags, int merge); 26 resource_size_t end, unsigned long flags, int merge);
25#endif 27#endif
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 323481e06ef8..0ad990a20d4a 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -11,6 +11,7 @@
11#include <linux/dmi.h> 11#include <linux/dmi.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13 13
14#include <asm-generic/pci-bridge.h>
14#include <asm/acpi.h> 15#include <asm/acpi.h>
15#include <asm/segment.h> 16#include <asm/segment.h>
16#include <asm/io.h> 17#include <asm/io.h>
@@ -229,6 +230,14 @@ static int __devinit assign_all_busses(const struct dmi_system_id *d)
229} 230}
230#endif 231#endif
231 232
233static int __devinit set_scan_all(const struct dmi_system_id *d)
234{
235 printk(KERN_INFO "PCI: %s detected, enabling pci=pcie_scan_all\n",
236 d->ident);
237 pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS);
238 return 0;
239}
240
232static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = { 241static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = {
233#ifdef __i386__ 242#ifdef __i386__
234/* 243/*
@@ -420,6 +429,13 @@ static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = {
420 DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"), 429 DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"),
421 }, 430 },
422 }, 431 },
432 {
433 .callback = set_scan_all,
434 .ident = "Stratus/NEC ftServer",
435 .matches = {
436 DMI_MATCH(DMI_SYS_VENDOR, "ftServer"),
437 },
438 },
423 {} 439 {}
424}; 440};
425 441
@@ -430,9 +446,7 @@ void __init dmi_check_pciprobe(void)
430 446
431struct pci_bus * __devinit pcibios_scan_root(int busnum) 447struct pci_bus * __devinit pcibios_scan_root(int busnum)
432{ 448{
433 LIST_HEAD(resources);
434 struct pci_bus *bus = NULL; 449 struct pci_bus *bus = NULL;
435 struct pci_sysdata *sd;
436 450
437 while ((bus = pci_find_next_bus(bus)) != NULL) { 451 while ((bus = pci_find_next_bus(bus)) != NULL) {
438 if (bus->number == busnum) { 452 if (bus->number == busnum) {
@@ -441,28 +455,10 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
441 } 455 }
442 } 456 }
443 457
444 /* Allocate per-root-bus (not per bus) arch-specific data. 458 return pci_scan_bus_on_node(busnum, &pci_root_ops,
445 * TODO: leak; this memory is never freed. 459 get_mp_bus_to_node(busnum));
446 * It's arguable whether it's worth the trouble to care.
447 */
448 sd = kzalloc(sizeof(*sd), GFP_KERNEL);
449 if (!sd) {
450 printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
451 return NULL;
452 }
453
454 sd->node = get_mp_bus_to_node(busnum);
455
456 printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum);
457 x86_pci_root_bus_resources(busnum, &resources);
458 bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, sd, &resources);
459 if (!bus) {
460 pci_free_resource_list(&resources);
461 kfree(sd);
462 }
463
464 return bus;
465} 460}
461
466void __init pcibios_set_cache_line_size(void) 462void __init pcibios_set_cache_line_size(void)
467{ 463{
468 struct cpuinfo_x86 *c = &boot_cpu_data; 464 struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -656,6 +652,7 @@ struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops,
656 } 652 }
657 sd->node = node; 653 sd->node = node;
658 x86_pci_root_bus_resources(busno, &resources); 654 x86_pci_root_bus_resources(busno, &resources);
655 printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busno);
659 bus = pci_scan_root_bus(NULL, busno, ops, sd, &resources); 656 bus = pci_scan_root_bus(NULL, busno, ops, sd, &resources);
660 if (!bus) { 657 if (!bus) {
661 pci_free_resource_list(&resources); 658 pci_free_resource_list(&resources);
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index d0e6e403b4f6..5dd467bd6121 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -519,3 +519,20 @@ static void sb600_disable_hpet_bar(struct pci_dev *dev)
519 } 519 }
520} 520}
521DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar); 521DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar);
522
523/*
524 * Twinhead H12Y needs us to block out a region otherwise we map devices
525 * there and any access kills the box.
526 *
527 * See: https://bugzilla.kernel.org/show_bug.cgi?id=10231
528 *
529 * Match off the LPC and svid/sdid (older kernels lose the bridge subvendor)
530 */
531static void __devinit twinhead_reserve_killing_zone(struct pci_dev *dev)
532{
533 if (dev->subsystem_vendor == 0x14FF && dev->subsystem_device == 0xA003) {
534 pr_info("Reserving memory on Twinhead H12Y\n");
535 request_mem_region(0xFFB00000, 0x100000, "twinhead");
536 }
537}
538DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone);
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 831971e731f7..dd8ca6f7223b 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -57,7 +57,7 @@ static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev)
57{ 57{
58 struct pcibios_fwaddrmap *map; 58 struct pcibios_fwaddrmap *map;
59 59
60 WARN_ON(!spin_is_locked(&pcibios_fwaddrmap_lock)); 60 WARN_ON_SMP(!spin_is_locked(&pcibios_fwaddrmap_lock));
61 61
62 list_for_each_entry(map, &pcibios_fwaddrmappings, list) 62 list_for_each_entry(map, &pcibios_fwaddrmappings, list)
63 if (map->dev == dev) 63 if (map->dev == dev)
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
new file mode 100644
index 000000000000..9d8a509c9730
--- /dev/null
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -0,0 +1,366 @@
1/*
2 * arch/x86/pci/sta2x11-fixup.c
3 * glue code for lib/swiotlb.c and DMA translation between STA2x11
4 * AMBA memory mapping and the X86 memory mapping
5 *
6 * ST Microelectronics ConneXt (STA2X11/STA2X10)
7 *
8 * Copyright (c) 2010-2011 Wind River Systems, Inc.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
17 * See the GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 *
23 */
24
25#include <linux/pci.h>
26#include <linux/pci_ids.h>
27#include <linux/export.h>
28#include <linux/list.h>
29
30#define STA2X11_SWIOTLB_SIZE (4*1024*1024)
31extern int swiotlb_late_init_with_default_size(size_t default_size);
32
33/*
34 * We build a list of bus numbers that are under the ConneXt. The
35 * main bridge hosts 4 busses, which are the 4 endpoints, in order.
36 */
37#define STA2X11_NR_EP 4 /* 0..3 included */
38#define STA2X11_NR_FUNCS 8 /* 0..7 included */
39#define STA2X11_AMBA_SIZE (512 << 20)
40
41struct sta2x11_ahb_regs { /* saved during suspend */
42 u32 base, pexlbase, pexhbase, crw;
43};
44
45struct sta2x11_mapping {
46 u32 amba_base;
47 int is_suspended;
48 struct sta2x11_ahb_regs regs[STA2X11_NR_FUNCS];
49};
50
51struct sta2x11_instance {
52 struct list_head list;
53 int bus0;
54 struct sta2x11_mapping map[STA2X11_NR_EP];
55};
56
57static LIST_HEAD(sta2x11_instance_list);
58
59/* At probe time, record new instances of this bridge (likely one only) */
60static void sta2x11_new_instance(struct pci_dev *pdev)
61{
62 struct sta2x11_instance *instance;
63
64 instance = kzalloc(sizeof(*instance), GFP_ATOMIC);
65 if (!instance)
66 return;
67 /* This has a subordinate bridge, with 4 more-subordinate ones */
68 instance->bus0 = pdev->subordinate->number + 1;
69
70 if (list_empty(&sta2x11_instance_list)) {
71 int size = STA2X11_SWIOTLB_SIZE;
72 /* First instance: register your own swiotlb area */
73 dev_info(&pdev->dev, "Using SWIOTLB (size %i)\n", size);
74 if (swiotlb_late_init_with_default_size(size))
75 dev_emerg(&pdev->dev, "init swiotlb failed\n");
76 }
77 list_add(&instance->list, &sta2x11_instance_list);
78}
79DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, 0xcc17, sta2x11_new_instance);
80
81/*
82 * Utility functions used in this file from below
83 */
84static struct sta2x11_instance *sta2x11_pdev_to_instance(struct pci_dev *pdev)
85{
86 struct sta2x11_instance *instance;
87 int ep;
88
89 list_for_each_entry(instance, &sta2x11_instance_list, list) {
90 ep = pdev->bus->number - instance->bus0;
91 if (ep >= 0 && ep < STA2X11_NR_EP)
92 return instance;
93 }
94 return NULL;
95}
96
97static int sta2x11_pdev_to_ep(struct pci_dev *pdev)
98{
99 struct sta2x11_instance *instance;
100
101 instance = sta2x11_pdev_to_instance(pdev);
102 if (!instance)
103 return -1;
104
105 return pdev->bus->number - instance->bus0;
106}
107
108static struct sta2x11_mapping *sta2x11_pdev_to_mapping(struct pci_dev *pdev)
109{
110 struct sta2x11_instance *instance;
111 int ep;
112
113 instance = sta2x11_pdev_to_instance(pdev);
114 if (!instance)
115 return NULL;
116 ep = sta2x11_pdev_to_ep(pdev);
117 return instance->map + ep;
118}
119
120/* This is exported, as some devices need to access the MFD registers */
121struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev)
122{
123 return sta2x11_pdev_to_instance(pdev);
124}
125EXPORT_SYMBOL(sta2x11_get_instance);
126
127
128/**
129 * p2a - Translate physical address to STA2x11 AMBA address,
130 * used for DMA transfers to STA2x11
131 * @p: Physical address
132 * @pdev: PCI device (must be hosted within the connext)
133 */
134static dma_addr_t p2a(dma_addr_t p, struct pci_dev *pdev)
135{
136 struct sta2x11_mapping *map;
137 dma_addr_t a;
138
139 map = sta2x11_pdev_to_mapping(pdev);
140 a = p + map->amba_base;
141 return a;
142}
143
144/**
145 * a2p - Translate STA2x11 AMBA address to physical address
146 * used for DMA transfers from STA2x11
147 * @a: STA2x11 AMBA address
148 * @pdev: PCI device (must be hosted within the connext)
149 */
150static dma_addr_t a2p(dma_addr_t a, struct pci_dev *pdev)
151{
152 struct sta2x11_mapping *map;
153 dma_addr_t p;
154
155 map = sta2x11_pdev_to_mapping(pdev);
156 p = a - map->amba_base;
157 return p;
158}
159
160/**
161 * sta2x11_swiotlb_alloc_coherent - Allocate swiotlb bounce buffers
162 * returns virtual address. This is the only "special" function here.
163 * @dev: PCI device
164 * @size: Size of the buffer
165 * @dma_handle: DMA address
166 * @flags: memory flags
167 */
168static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
169 size_t size,
170 dma_addr_t *dma_handle,
171 gfp_t flags,
172 struct dma_attrs *attrs)
173{
174 void *vaddr;
175
176 vaddr = dma_generic_alloc_coherent(dev, size, dma_handle, flags, attrs);
177 if (!vaddr)
178 vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, flags);
179 *dma_handle = p2a(*dma_handle, to_pci_dev(dev));
180 return vaddr;
181}
182
183/* We have our own dma_ops: the same as swiotlb but from alloc (above) */
184static struct dma_map_ops sta2x11_dma_ops = {
185 .alloc = sta2x11_swiotlb_alloc_coherent,
186 .free = swiotlb_free_coherent,
187 .map_page = swiotlb_map_page,
188 .unmap_page = swiotlb_unmap_page,
189 .map_sg = swiotlb_map_sg_attrs,
190 .unmap_sg = swiotlb_unmap_sg_attrs,
191 .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
192 .sync_single_for_device = swiotlb_sync_single_for_device,
193 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
194 .sync_sg_for_device = swiotlb_sync_sg_for_device,
195 .mapping_error = swiotlb_dma_mapping_error,
196 .dma_supported = NULL, /* FIXME: we should use this instead! */
197};
198
199/* At setup time, we use our own ops if the device is a ConneXt one */
200static void sta2x11_setup_pdev(struct pci_dev *pdev)
201{
202 struct sta2x11_instance *instance = sta2x11_pdev_to_instance(pdev);
203
204 if (!instance) /* either a sta2x11 bridge or another ST device */
205 return;
206 pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
207 pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
208 pdev->dev.archdata.dma_ops = &sta2x11_dma_ops;
209
210 /* We must enable all devices as master, for audio DMA to work */
211 pci_set_master(pdev);
212}
213DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, sta2x11_setup_pdev);
214
215/*
216 * The following three functions are exported (used in swiotlb: FIXME)
217 */
218/**
219 * dma_capable - Check if device can manage DMA transfers (FIXME: kill it)
220 * @dev: device for a PCI device
221 * @addr: DMA address
222 * @size: DMA size
223 */
224bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
225{
226 struct sta2x11_mapping *map;
227
228 if (dev->archdata.dma_ops != &sta2x11_dma_ops) {
229 if (!dev->dma_mask)
230 return false;
231 return addr + size - 1 <= *dev->dma_mask;
232 }
233
234 map = sta2x11_pdev_to_mapping(to_pci_dev(dev));
235
236 if (!map || (addr < map->amba_base))
237 return false;
238 if (addr + size >= map->amba_base + STA2X11_AMBA_SIZE) {
239 return false;
240 }
241
242 return true;
243}
244
245/**
246 * phys_to_dma - Return the DMA AMBA address used for this STA2x11 device
247 * @dev: device for a PCI device
248 * @paddr: Physical address
249 */
250dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
251{
252 if (dev->archdata.dma_ops != &sta2x11_dma_ops)
253 return paddr;
254 return p2a(paddr, to_pci_dev(dev));
255}
256
257/**
258 * dma_to_phys - Return the physical address used for this STA2x11 DMA address
259 * @dev: device for a PCI device
260 * @daddr: STA2x11 AMBA DMA address
261 */
262phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
263{
264 if (dev->archdata.dma_ops != &sta2x11_dma_ops)
265 return daddr;
266 return a2p(daddr, to_pci_dev(dev));
267}
268
269
270/*
271 * At boot we must set up the mappings for the pcie-to-amba bridge.
272 * It involves device access, and the same happens at suspend/resume time
273 */
274
275#define AHB_MAPB 0xCA4
276#define AHB_CRW(i) (AHB_MAPB + 0 + (i) * 0x10)
277#define AHB_CRW_SZMASK 0xfffffc00UL
278#define AHB_CRW_ENABLE (1 << 0)
279#define AHB_CRW_WTYPE_MEM (2 << 1)
280#define AHB_CRW_ROE (1UL << 3) /* Relax Order Ena */
281#define AHB_CRW_NSE (1UL << 4) /* No Snoop Enable */
282#define AHB_BASE(i) (AHB_MAPB + 4 + (i) * 0x10)
283#define AHB_PEXLBASE(i) (AHB_MAPB + 8 + (i) * 0x10)
284#define AHB_PEXHBASE(i) (AHB_MAPB + 12 + (i) * 0x10)
285
286/* At probe time, enable mapping for each endpoint, using the pdev */
287static void sta2x11_map_ep(struct pci_dev *pdev)
288{
289 struct sta2x11_mapping *map = sta2x11_pdev_to_mapping(pdev);
290 int i;
291
292 if (!map)
293 return;
294 pci_read_config_dword(pdev, AHB_BASE(0), &map->amba_base);
295
296 /* Configure AHB mapping */
297 pci_write_config_dword(pdev, AHB_PEXLBASE(0), 0);
298 pci_write_config_dword(pdev, AHB_PEXHBASE(0), 0);
299 pci_write_config_dword(pdev, AHB_CRW(0), STA2X11_AMBA_SIZE |
300 AHB_CRW_WTYPE_MEM | AHB_CRW_ENABLE);
301
302 /* Disable all the other windows */
303 for (i = 1; i < STA2X11_NR_FUNCS; i++)
304 pci_write_config_dword(pdev, AHB_CRW(i), 0);
305
306 dev_info(&pdev->dev,
307 "sta2x11: Map EP %i: AMBA address %#8x-%#8x\n",
308 sta2x11_pdev_to_ep(pdev), map->amba_base,
309 map->amba_base + STA2X11_AMBA_SIZE - 1);
310}
311DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, sta2x11_map_ep);
312
313#ifdef CONFIG_PM /* Some register values must be saved and restored */
314
315static void suspend_mapping(struct pci_dev *pdev)
316{
317 struct sta2x11_mapping *map = sta2x11_pdev_to_mapping(pdev);
318 int i;
319
320 if (!map)
321 return;
322
323 if (map->is_suspended)
324 return;
325 map->is_suspended = 1;
326
327 /* Save all window configs */
328 for (i = 0; i < STA2X11_NR_FUNCS; i++) {
329 struct sta2x11_ahb_regs *regs = map->regs + i;
330
331 pci_read_config_dword(pdev, AHB_BASE(i), &regs->base);
332 pci_read_config_dword(pdev, AHB_PEXLBASE(i), &regs->pexlbase);
333 pci_read_config_dword(pdev, AHB_PEXHBASE(i), &regs->pexhbase);
334 pci_read_config_dword(pdev, AHB_CRW(i), &regs->crw);
335 }
336}
337DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, suspend_mapping);
338
339static void resume_mapping(struct pci_dev *pdev)
340{
341 struct sta2x11_mapping *map = sta2x11_pdev_to_mapping(pdev);
342 int i;
343
344 if (!map)
345 return;
346
347
348 if (!map->is_suspended)
349 goto out;
350 map->is_suspended = 0;
351
352 /* Restore all window configs */
353 for (i = 0; i < STA2X11_NR_FUNCS; i++) {
354 struct sta2x11_ahb_regs *regs = map->regs + i;
355
356 pci_write_config_dword(pdev, AHB_BASE(i), regs->base);
357 pci_write_config_dword(pdev, AHB_PEXLBASE(i), regs->pexlbase);
358 pci_write_config_dword(pdev, AHB_PEXHBASE(i), regs->pexhbase);
359 pci_write_config_dword(pdev, AHB_CRW(i), regs->crw);
360 }
361out:
362 pci_set_master(pdev); /* Like at boot, enable master on all devices */
363}
364DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, resume_mapping);
365
366#endif /* CONFIG_PM */
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
index 1d4c783d7325..04b8c73659c5 100644
--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
@@ -18,6 +18,7 @@
18#include <linux/interrupt.h> 18#include <linux/interrupt.h>
19#include <linux/platform_device.h> 19#include <linux/platform_device.h>
20#include <linux/pm.h> 20#include <linux/pm.h>
21#include <linux/pm_wakeup.h>
21#include <linux/mfd/core.h> 22#include <linux/mfd/core.h>
22#include <linux/power_supply.h> 23#include <linux/power_supply.h>
23#include <linux/suspend.h> 24#include <linux/suspend.h>
@@ -83,8 +84,12 @@ static void send_ebook_state(void)
83 return; 84 return;
84 } 85 }
85 86
87 if (!!test_bit(SW_TABLET_MODE, ebook_switch_idev->sw) == state)
88 return; /* Nothing new to report. */
89
86 input_report_switch(ebook_switch_idev, SW_TABLET_MODE, state); 90 input_report_switch(ebook_switch_idev, SW_TABLET_MODE, state);
87 input_sync(ebook_switch_idev); 91 input_sync(ebook_switch_idev);
92 pm_wakeup_event(&ebook_switch_idev->dev, 0);
88} 93}
89 94
90static void flip_lid_inverter(void) 95static void flip_lid_inverter(void)
@@ -123,8 +128,12 @@ static void detect_lid_state(void)
123/* Report current lid switch state through input layer */ 128/* Report current lid switch state through input layer */
124static void send_lid_state(void) 129static void send_lid_state(void)
125{ 130{
131 if (!!test_bit(SW_LID, lid_switch_idev->sw) == !lid_open)
132 return; /* Nothing new to report. */
133
126 input_report_switch(lid_switch_idev, SW_LID, !lid_open); 134 input_report_switch(lid_switch_idev, SW_LID, !lid_open);
127 input_sync(lid_switch_idev); 135 input_sync(lid_switch_idev);
136 pm_wakeup_event(&lid_switch_idev->dev, 0);
128} 137}
129 138
130static ssize_t lid_wake_mode_show(struct device *dev, 139static ssize_t lid_wake_mode_show(struct device *dev,
@@ -213,11 +222,30 @@ static irqreturn_t xo1_sci_intr(int irq, void *dev_id)
213 222
214 dev_dbg(&pdev->dev, "sts %x gpe %x\n", sts, gpe); 223 dev_dbg(&pdev->dev, "sts %x gpe %x\n", sts, gpe);
215 224
216 if (sts & CS5536_PWRBTN_FLAG && !(sts & CS5536_WAK_FLAG)) { 225 if (sts & CS5536_PWRBTN_FLAG) {
217 input_report_key(power_button_idev, KEY_POWER, 1); 226 if (!(sts & CS5536_WAK_FLAG)) {
218 input_sync(power_button_idev); 227 /* Only report power button input when it was pressed
219 input_report_key(power_button_idev, KEY_POWER, 0); 228 * during regular operation (as opposed to when it
220 input_sync(power_button_idev); 229 * was used to wake the system). */
230 input_report_key(power_button_idev, KEY_POWER, 1);
231 input_sync(power_button_idev);
232 input_report_key(power_button_idev, KEY_POWER, 0);
233 input_sync(power_button_idev);
234 }
235 /* Report the wakeup event in all cases. */
236 pm_wakeup_event(&power_button_idev->dev, 0);
237 }
238
239 if ((sts & (CS5536_RTC_FLAG | CS5536_WAK_FLAG)) ==
240 (CS5536_RTC_FLAG | CS5536_WAK_FLAG)) {
241 /* When the system is woken by the RTC alarm, report the
242 * event on the rtc device. */
243 struct device *rtc = bus_find_device_by_name(
244 &platform_bus_type, NULL, "rtc_cmos");
245 if (rtc) {
246 pm_wakeup_event(rtc, 0);
247 put_device(rtc);
248 }
221 } 249 }
222 250
223 if (gpe & CS5536_GPIOM7_PME_FLAG) { /* EC GPIO */ 251 if (gpe & CS5536_GPIOM7_PME_FLAG) { /* EC GPIO */
@@ -310,9 +338,10 @@ static int __devinit setup_sci_interrupt(struct platform_device *pdev)
310 outb(lo, CS5536_PIC_INT_SEL2); 338 outb(lo, CS5536_PIC_INT_SEL2);
311 } 339 }
312 340
313 /* Enable SCI from power button, and clear pending interrupts */ 341 /* Enable interesting SCI events, and clear pending interrupts */
314 sts = inl(acpi_base + CS5536_PM1_STS); 342 sts = inl(acpi_base + CS5536_PM1_STS);
315 outl((CS5536_PM_PWRBTN << 16) | 0xffff, acpi_base + CS5536_PM1_STS); 343 outl(((CS5536_PM_PWRBTN | CS5536_PM_RTC) << 16) | 0xffff,
344 acpi_base + CS5536_PM1_STS);
316 345
317 r = request_irq(sci_irq, xo1_sci_intr, 0, DRV_NAME, pdev); 346 r = request_irq(sci_irq, xo1_sci_intr, 0, DRV_NAME, pdev);
318 if (r) 347 if (r)
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c
index c7abf13a213f..94d8a39332ec 100644
--- a/arch/x86/platform/visws/visws_quirks.c
+++ b/arch/x86/platform/visws/visws_quirks.c
@@ -445,7 +445,7 @@ static void ack_cobalt_irq(struct irq_data *data)
445 445
446 spin_lock_irqsave(&cobalt_lock, flags); 446 spin_lock_irqsave(&cobalt_lock, flags);
447 disable_cobalt_irq(data); 447 disable_cobalt_irq(data);
448 apic_write(APIC_EOI, APIC_EIO_ACK); 448 apic_write(APIC_EOI, APIC_EOI_ACK);
449 spin_unlock_irqrestore(&cobalt_lock, flags); 449 spin_unlock_irqrestore(&cobalt_lock, flags);
450} 450}
451 451
diff --git a/arch/x86/tools/.gitignore b/arch/x86/tools/.gitignore
new file mode 100644
index 000000000000..be0ed065249b
--- /dev/null
+++ b/arch/x86/tools/.gitignore
@@ -0,0 +1 @@
relocs
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
index d511aa97533a..733057b435b0 100644
--- a/arch/x86/tools/Makefile
+++ b/arch/x86/tools/Makefile
@@ -36,3 +36,7 @@ HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x
36$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c 36$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
37 37
38$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c 38$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
39
40HOST_EXTRACFLAGS += -I$(srctree)/tools/include
41hostprogs-y += relocs
42relocs: $(obj)/relocs
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/tools/relocs.c
index fb7117a4ade1..b43cfcd9bf40 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -18,6 +18,8 @@ static void die(char *fmt, ...);
18static Elf32_Ehdr ehdr; 18static Elf32_Ehdr ehdr;
19static unsigned long reloc_count, reloc_idx; 19static unsigned long reloc_count, reloc_idx;
20static unsigned long *relocs; 20static unsigned long *relocs;
21static unsigned long reloc16_count, reloc16_idx;
22static unsigned long *relocs16;
21 23
22struct section { 24struct section {
23 Elf32_Shdr shdr; 25 Elf32_Shdr shdr;
@@ -28,52 +30,86 @@ struct section {
28}; 30};
29static struct section *secs; 31static struct section *secs;
30 32
33enum symtype {
34 S_ABS,
35 S_REL,
36 S_SEG,
37 S_LIN,
38 S_NSYMTYPES
39};
40
41static const char * const sym_regex_kernel[S_NSYMTYPES] = {
31/* 42/*
32 * Following symbols have been audited. There values are constant and do 43 * Following symbols have been audited. There values are constant and do
33 * not change if bzImage is loaded at a different physical address than 44 * not change if bzImage is loaded at a different physical address than
34 * the address for which it has been compiled. Don't warn user about 45 * the address for which it has been compiled. Don't warn user about
35 * absolute relocations present w.r.t these symbols. 46 * absolute relocations present w.r.t these symbols.
36 */ 47 */
37static const char abs_sym_regex[] = 48 [S_ABS] =
38 "^(xen_irq_disable_direct_reloc$|" 49 "^(xen_irq_disable_direct_reloc$|"
39 "xen_save_fl_direct_reloc$|" 50 "xen_save_fl_direct_reloc$|"
40 "VDSO|" 51 "VDSO|"
41 "__crc_)"; 52 "__crc_)",
42static regex_t abs_sym_regex_c;
43static int is_abs_reloc(const char *sym_name)
44{
45 return !regexec(&abs_sym_regex_c, sym_name, 0, NULL, 0);
46}
47 53
48/* 54/*
49 * These symbols are known to be relative, even if the linker marks them 55 * These symbols are known to be relative, even if the linker marks them
50 * as absolute (typically defined outside any section in the linker script.) 56 * as absolute (typically defined outside any section in the linker script.)
51 */ 57 */
52static const char rel_sym_regex[] = 58 [S_REL] =
53 "^_end$"; 59 "^(__init_(begin|end)|"
54static regex_t rel_sym_regex_c; 60 "__x86_cpu_dev_(start|end)|"
55static int is_rel_reloc(const char *sym_name) 61 "(__parainstructions|__alt_instructions)(|_end)|"
62 "(__iommu_table|__apicdrivers|__smp_locks)(|_end)|"
63 "_end)$"
64};
65
66
67static const char * const sym_regex_realmode[S_NSYMTYPES] = {
68/*
69 * These are 16-bit segment symbols when compiling 16-bit code.
70 */
71 [S_SEG] =
72 "^real_mode_seg$",
73
74/*
75 * These are offsets belonging to segments, as opposed to linear addresses,
76 * when compiling 16-bit code.
77 */
78 [S_LIN] =
79 "^pa_",
80};
81
82static const char * const *sym_regex;
83
84static regex_t sym_regex_c[S_NSYMTYPES];
85static int is_reloc(enum symtype type, const char *sym_name)
56{ 86{
57 return !regexec(&rel_sym_regex_c, sym_name, 0, NULL, 0); 87 return sym_regex[type] &&
88 !regexec(&sym_regex_c[type], sym_name, 0, NULL, 0);
58} 89}
59 90
60static void regex_init(void) 91static void regex_init(int use_real_mode)
61{ 92{
62 char errbuf[128]; 93 char errbuf[128];
63 int err; 94 int err;
64 95 int i;
65 err = regcomp(&abs_sym_regex_c, abs_sym_regex, 96
66 REG_EXTENDED|REG_NOSUB); 97 if (use_real_mode)
67 if (err) { 98 sym_regex = sym_regex_realmode;
68 regerror(err, &abs_sym_regex_c, errbuf, sizeof errbuf); 99 else
69 die("%s", errbuf); 100 sym_regex = sym_regex_kernel;
70 }
71 101
72 err = regcomp(&rel_sym_regex_c, rel_sym_regex, 102 for (i = 0; i < S_NSYMTYPES; i++) {
73 REG_EXTENDED|REG_NOSUB); 103 if (!sym_regex[i])
74 if (err) { 104 continue;
75 regerror(err, &rel_sym_regex_c, errbuf, sizeof errbuf); 105
76 die("%s", errbuf); 106 err = regcomp(&sym_regex_c[i], sym_regex[i],
107 REG_EXTENDED|REG_NOSUB);
108
109 if (err) {
110 regerror(err, &sym_regex_c[i], errbuf, sizeof errbuf);
111 die("%s", errbuf);
112 }
77 } 113 }
78} 114}
79 115
@@ -154,6 +190,10 @@ static const char *rel_type(unsigned type)
154 REL_TYPE(R_386_RELATIVE), 190 REL_TYPE(R_386_RELATIVE),
155 REL_TYPE(R_386_GOTOFF), 191 REL_TYPE(R_386_GOTOFF),
156 REL_TYPE(R_386_GOTPC), 192 REL_TYPE(R_386_GOTPC),
193 REL_TYPE(R_386_8),
194 REL_TYPE(R_386_PC8),
195 REL_TYPE(R_386_16),
196 REL_TYPE(R_386_PC16),
157#undef REL_TYPE 197#undef REL_TYPE
158 }; 198 };
159 const char *name = "unknown type rel type name"; 199 const char *name = "unknown type rel type name";
@@ -189,7 +229,7 @@ static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym)
189 name = sym_strtab + sym->st_name; 229 name = sym_strtab + sym->st_name;
190 } 230 }
191 else { 231 else {
192 name = sec_name(secs[sym->st_shndx].shdr.sh_name); 232 name = sec_name(sym->st_shndx);
193 } 233 }
194 return name; 234 return name;
195} 235}
@@ -472,7 +512,7 @@ static void print_absolute_relocs(void)
472 * Before warning check if this absolute symbol 512 * Before warning check if this absolute symbol
473 * relocation is harmless. 513 * relocation is harmless.
474 */ 514 */
475 if (is_abs_reloc(name) || is_rel_reloc(name)) 515 if (is_reloc(S_ABS, name) || is_reloc(S_REL, name))
476 continue; 516 continue;
477 517
478 if (!printed) { 518 if (!printed) {
@@ -496,7 +536,8 @@ static void print_absolute_relocs(void)
496 printf("\n"); 536 printf("\n");
497} 537}
498 538
499static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) 539static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym),
540 int use_real_mode)
500{ 541{
501 int i; 542 int i;
502 /* Walk through the relocations */ 543 /* Walk through the relocations */
@@ -521,30 +562,67 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
521 Elf32_Rel *rel; 562 Elf32_Rel *rel;
522 Elf32_Sym *sym; 563 Elf32_Sym *sym;
523 unsigned r_type; 564 unsigned r_type;
565 const char *symname;
566 int shn_abs;
567
524 rel = &sec->reltab[j]; 568 rel = &sec->reltab[j];
525 sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; 569 sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
526 r_type = ELF32_R_TYPE(rel->r_info); 570 r_type = ELF32_R_TYPE(rel->r_info);
527 /* Don't visit relocations to absolute symbols */ 571
528 if (sym->st_shndx == SHN_ABS && 572 shn_abs = sym->st_shndx == SHN_ABS;
529 !is_rel_reloc(sym_name(sym_strtab, sym))) { 573
530 continue;
531 }
532 switch (r_type) { 574 switch (r_type) {
533 case R_386_NONE: 575 case R_386_NONE:
534 case R_386_PC32: 576 case R_386_PC32:
577 case R_386_PC16:
578 case R_386_PC8:
535 /* 579 /*
536 * NONE can be ignored and and PC relative 580 * NONE can be ignored and and PC relative
537 * relocations don't need to be adjusted. 581 * relocations don't need to be adjusted.
538 */ 582 */
539 break; 583 break;
584
585 case R_386_16:
586 symname = sym_name(sym_strtab, sym);
587 if (!use_real_mode)
588 goto bad;
589 if (shn_abs) {
590 if (is_reloc(S_ABS, symname))
591 break;
592 else if (!is_reloc(S_SEG, symname))
593 goto bad;
594 } else {
595 if (is_reloc(S_LIN, symname))
596 goto bad;
597 else
598 break;
599 }
600 visit(rel, sym);
601 break;
602
540 case R_386_32: 603 case R_386_32:
541 /* Visit relocations that need to be adjusted */ 604 symname = sym_name(sym_strtab, sym);
605 if (shn_abs) {
606 if (is_reloc(S_ABS, symname))
607 break;
608 else if (!is_reloc(S_REL, symname))
609 goto bad;
610 } else {
611 if (use_real_mode &&
612 !is_reloc(S_LIN, symname))
613 break;
614 }
542 visit(rel, sym); 615 visit(rel, sym);
543 break; 616 break;
544 default: 617 default:
545 die("Unsupported relocation type: %s (%d)\n", 618 die("Unsupported relocation type: %s (%d)\n",
546 rel_type(r_type), r_type); 619 rel_type(r_type), r_type);
547 break; 620 break;
621 bad:
622 symname = sym_name(sym_strtab, sym);
623 die("Invalid %s %s relocation: %s\n",
624 shn_abs ? "absolute" : "relative",
625 rel_type(r_type), symname);
548 } 626 }
549 } 627 }
550 } 628 }
@@ -552,13 +630,19 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
552 630
553static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym) 631static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym)
554{ 632{
555 reloc_count += 1; 633 if (ELF32_R_TYPE(rel->r_info) == R_386_16)
634 reloc16_count++;
635 else
636 reloc_count++;
556} 637}
557 638
558static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym) 639static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym)
559{ 640{
560 /* Remember the address that needs to be adjusted. */ 641 /* Remember the address that needs to be adjusted. */
561 relocs[reloc_idx++] = rel->r_offset; 642 if (ELF32_R_TYPE(rel->r_info) == R_386_16)
643 relocs16[reloc16_idx++] = rel->r_offset;
644 else
645 relocs[reloc_idx++] = rel->r_offset;
562} 646}
563 647
564static int cmp_relocs(const void *va, const void *vb) 648static int cmp_relocs(const void *va, const void *vb)
@@ -568,23 +652,41 @@ static int cmp_relocs(const void *va, const void *vb)
568 return (*a == *b)? 0 : (*a > *b)? 1 : -1; 652 return (*a == *b)? 0 : (*a > *b)? 1 : -1;
569} 653}
570 654
571static void emit_relocs(int as_text) 655static int write32(unsigned int v, FILE *f)
656{
657 unsigned char buf[4];
658
659 put_unaligned_le32(v, buf);
660 return fwrite(buf, 1, 4, f) == 4 ? 0 : -1;
661}
662
663static void emit_relocs(int as_text, int use_real_mode)
572{ 664{
573 int i; 665 int i;
574 /* Count how many relocations I have and allocate space for them. */ 666 /* Count how many relocations I have and allocate space for them. */
575 reloc_count = 0; 667 reloc_count = 0;
576 walk_relocs(count_reloc); 668 walk_relocs(count_reloc, use_real_mode);
577 relocs = malloc(reloc_count * sizeof(relocs[0])); 669 relocs = malloc(reloc_count * sizeof(relocs[0]));
578 if (!relocs) { 670 if (!relocs) {
579 die("malloc of %d entries for relocs failed\n", 671 die("malloc of %d entries for relocs failed\n",
580 reloc_count); 672 reloc_count);
581 } 673 }
674
675 relocs16 = malloc(reloc16_count * sizeof(relocs[0]));
676 if (!relocs16) {
677 die("malloc of %d entries for relocs16 failed\n",
678 reloc16_count);
679 }
582 /* Collect up the relocations */ 680 /* Collect up the relocations */
583 reloc_idx = 0; 681 reloc_idx = 0;
584 walk_relocs(collect_reloc); 682 walk_relocs(collect_reloc, use_real_mode);
683
684 if (reloc16_count && !use_real_mode)
685 die("Segment relocations found but --realmode not specified\n");
585 686
586 /* Order the relocations for more efficient processing */ 687 /* Order the relocations for more efficient processing */
587 qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs); 688 qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs);
689 qsort(relocs16, reloc16_count, sizeof(relocs16[0]), cmp_relocs);
588 690
589 /* Print the relocations */ 691 /* Print the relocations */
590 if (as_text) { 692 if (as_text) {
@@ -593,58 +695,83 @@ static void emit_relocs(int as_text)
593 */ 695 */
594 printf(".section \".data.reloc\",\"a\"\n"); 696 printf(".section \".data.reloc\",\"a\"\n");
595 printf(".balign 4\n"); 697 printf(".balign 4\n");
596 for (i = 0; i < reloc_count; i++) { 698 if (use_real_mode) {
597 printf("\t .long 0x%08lx\n", relocs[i]); 699 printf("\t.long %lu\n", reloc16_count);
700 for (i = 0; i < reloc16_count; i++)
701 printf("\t.long 0x%08lx\n", relocs16[i]);
702 printf("\t.long %lu\n", reloc_count);
703 for (i = 0; i < reloc_count; i++) {
704 printf("\t.long 0x%08lx\n", relocs[i]);
705 }
706 } else {
707 /* Print a stop */
708 printf("\t.long 0x%08lx\n", (unsigned long)0);
709 for (i = 0; i < reloc_count; i++) {
710 printf("\t.long 0x%08lx\n", relocs[i]);
711 }
598 } 712 }
713
599 printf("\n"); 714 printf("\n");
600 } 715 }
601 else { 716 else {
602 unsigned char buf[4]; 717 if (use_real_mode) {
603 /* Print a stop */ 718 write32(reloc16_count, stdout);
604 fwrite("\0\0\0\0", 4, 1, stdout); 719 for (i = 0; i < reloc16_count; i++)
605 /* Now print each relocation */ 720 write32(relocs16[i], stdout);
606 for (i = 0; i < reloc_count; i++) { 721 write32(reloc_count, stdout);
607 put_unaligned_le32(relocs[i], buf); 722
608 fwrite(buf, 4, 1, stdout); 723 /* Now print each relocation */
724 for (i = 0; i < reloc_count; i++)
725 write32(relocs[i], stdout);
726 } else {
727 /* Print a stop */
728 write32(0, stdout);
729
730 /* Now print each relocation */
731 for (i = 0; i < reloc_count; i++) {
732 write32(relocs[i], stdout);
733 }
609 } 734 }
610 } 735 }
611} 736}
612 737
613static void usage(void) 738static void usage(void)
614{ 739{
615 die("relocs [--abs-syms |--abs-relocs | --text] vmlinux\n"); 740 die("relocs [--abs-syms|--abs-relocs|--text|--realmode] vmlinux\n");
616} 741}
617 742
618int main(int argc, char **argv) 743int main(int argc, char **argv)
619{ 744{
620 int show_absolute_syms, show_absolute_relocs; 745 int show_absolute_syms, show_absolute_relocs;
621 int as_text; 746 int as_text, use_real_mode;
622 const char *fname; 747 const char *fname;
623 FILE *fp; 748 FILE *fp;
624 int i; 749 int i;
625 750
626 regex_init();
627
628 show_absolute_syms = 0; 751 show_absolute_syms = 0;
629 show_absolute_relocs = 0; 752 show_absolute_relocs = 0;
630 as_text = 0; 753 as_text = 0;
754 use_real_mode = 0;
631 fname = NULL; 755 fname = NULL;
632 for (i = 1; i < argc; i++) { 756 for (i = 1; i < argc; i++) {
633 char *arg = argv[i]; 757 char *arg = argv[i];
634 if (*arg == '-') { 758 if (*arg == '-') {
635 if (strcmp(argv[1], "--abs-syms") == 0) { 759 if (strcmp(arg, "--abs-syms") == 0) {
636 show_absolute_syms = 1; 760 show_absolute_syms = 1;
637 continue; 761 continue;
638 } 762 }
639 763 if (strcmp(arg, "--abs-relocs") == 0) {
640 if (strcmp(argv[1], "--abs-relocs") == 0) {
641 show_absolute_relocs = 1; 764 show_absolute_relocs = 1;
642 continue; 765 continue;
643 } 766 }
644 else if (strcmp(argv[1], "--text") == 0) { 767 if (strcmp(arg, "--text") == 0) {
645 as_text = 1; 768 as_text = 1;
646 continue; 769 continue;
647 } 770 }
771 if (strcmp(arg, "--realmode") == 0) {
772 use_real_mode = 1;
773 continue;
774 }
648 } 775 }
649 else if (!fname) { 776 else if (!fname) {
650 fname = arg; 777 fname = arg;
@@ -655,6 +782,7 @@ int main(int argc, char **argv)
655 if (!fname) { 782 if (!fname) {
656 usage(); 783 usage();
657 } 784 }
785 regex_init(use_real_mode);
658 fp = fopen(fname, "r"); 786 fp = fopen(fname, "r");
659 if (!fp) { 787 if (!fp) {
660 die("Cannot open %s: %s\n", 788 die("Cannot open %s: %s\n",
@@ -673,6 +801,6 @@ int main(int argc, char **argv)
673 print_absolute_relocs(); 801 print_absolute_relocs();
674 return 0; 802 return 0;
675 } 803 }
676 emit_relocs(as_text); 804 emit_relocs(as_text, use_real_mode);
677 return 0; 805 return 0;
678} 806}
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index f3b0633b69a1..0e07adc8cbe4 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -34,25 +34,25 @@
34#define ELF_ARCH EM_386 34#define ELF_ARCH EM_386
35 35
36#define ELF_PLAT_INIT(regs, load_addr) do { \ 36#define ELF_PLAT_INIT(regs, load_addr) do { \
37 PT_REGS_EBX(regs) = 0; \ 37 PT_REGS_BX(regs) = 0; \
38 PT_REGS_ECX(regs) = 0; \ 38 PT_REGS_CX(regs) = 0; \
39 PT_REGS_EDX(regs) = 0; \ 39 PT_REGS_DX(regs) = 0; \
40 PT_REGS_ESI(regs) = 0; \ 40 PT_REGS_SI(regs) = 0; \
41 PT_REGS_EDI(regs) = 0; \ 41 PT_REGS_DI(regs) = 0; \
42 PT_REGS_EBP(regs) = 0; \ 42 PT_REGS_BP(regs) = 0; \
43 PT_REGS_EAX(regs) = 0; \ 43 PT_REGS_AX(regs) = 0; \
44} while (0) 44} while (0)
45 45
46/* Shamelessly stolen from include/asm-i386/elf.h */ 46/* Shamelessly stolen from include/asm-i386/elf.h */
47 47
48#define ELF_CORE_COPY_REGS(pr_reg, regs) do { \ 48#define ELF_CORE_COPY_REGS(pr_reg, regs) do { \
49 pr_reg[0] = PT_REGS_EBX(regs); \ 49 pr_reg[0] = PT_REGS_BX(regs); \
50 pr_reg[1] = PT_REGS_ECX(regs); \ 50 pr_reg[1] = PT_REGS_CX(regs); \
51 pr_reg[2] = PT_REGS_EDX(regs); \ 51 pr_reg[2] = PT_REGS_DX(regs); \
52 pr_reg[3] = PT_REGS_ESI(regs); \ 52 pr_reg[3] = PT_REGS_SI(regs); \
53 pr_reg[4] = PT_REGS_EDI(regs); \ 53 pr_reg[4] = PT_REGS_DI(regs); \
54 pr_reg[5] = PT_REGS_EBP(regs); \ 54 pr_reg[5] = PT_REGS_BP(regs); \
55 pr_reg[6] = PT_REGS_EAX(regs); \ 55 pr_reg[6] = PT_REGS_AX(regs); \
56 pr_reg[7] = PT_REGS_DS(regs); \ 56 pr_reg[7] = PT_REGS_DS(regs); \
57 pr_reg[8] = PT_REGS_ES(regs); \ 57 pr_reg[8] = PT_REGS_ES(regs); \
58 /* fake once used fs and gs selectors? */ \ 58 /* fake once used fs and gs selectors? */ \
@@ -130,13 +130,13 @@ do { \
130#define ELF_ARCH EM_X86_64 130#define ELF_ARCH EM_X86_64
131 131
132#define ELF_PLAT_INIT(regs, load_addr) do { \ 132#define ELF_PLAT_INIT(regs, load_addr) do { \
133 PT_REGS_RBX(regs) = 0; \ 133 PT_REGS_BX(regs) = 0; \
134 PT_REGS_RCX(regs) = 0; \ 134 PT_REGS_CX(regs) = 0; \
135 PT_REGS_RDX(regs) = 0; \ 135 PT_REGS_DX(regs) = 0; \
136 PT_REGS_RSI(regs) = 0; \ 136 PT_REGS_SI(regs) = 0; \
137 PT_REGS_RDI(regs) = 0; \ 137 PT_REGS_DI(regs) = 0; \
138 PT_REGS_RBP(regs) = 0; \ 138 PT_REGS_BP(regs) = 0; \
139 PT_REGS_RAX(regs) = 0; \ 139 PT_REGS_AX(regs) = 0; \
140 PT_REGS_R8(regs) = 0; \ 140 PT_REGS_R8(regs) = 0; \
141 PT_REGS_R9(regs) = 0; \ 141 PT_REGS_R9(regs) = 0; \
142 PT_REGS_R10(regs) = 0; \ 142 PT_REGS_R10(regs) = 0; \
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
index c8aca8c501b0..950dfb7b8417 100644
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h
@@ -1,5 +1,39 @@
1#ifndef __UM_X86_PTRACE_H
2#define __UM_X86_PTRACE_H
3
1#ifdef CONFIG_X86_32 4#ifdef CONFIG_X86_32
2# include "ptrace_32.h" 5# include "ptrace_32.h"
3#else 6#else
4# include "ptrace_64.h" 7# include "ptrace_64.h"
5#endif 8#endif
9
10#define PT_REGS_AX(r) UPT_AX(&(r)->regs)
11#define PT_REGS_BX(r) UPT_BX(&(r)->regs)
12#define PT_REGS_CX(r) UPT_CX(&(r)->regs)
13#define PT_REGS_DX(r) UPT_DX(&(r)->regs)
14
15#define PT_REGS_SI(r) UPT_SI(&(r)->regs)
16#define PT_REGS_DI(r) UPT_DI(&(r)->regs)
17#define PT_REGS_BP(r) UPT_BP(&(r)->regs)
18#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
19
20#define PT_REGS_CS(r) UPT_CS(&(r)->regs)
21#define PT_REGS_SS(r) UPT_SS(&(r)->regs)
22#define PT_REGS_DS(r) UPT_DS(&(r)->regs)
23#define PT_REGS_ES(r) UPT_ES(&(r)->regs)
24
25#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_AX(r)
26#define PT_REGS_SYSCALL_RET(r) PT_REGS_AX(r)
27
28#define PT_FIX_EXEC_STACK(sp) do ; while(0)
29
30#define profile_pc(regs) PT_REGS_IP(regs)
31
32#define UPT_RESTART_SYSCALL(r) (UPT_IP(r) -= 2)
33#define UPT_SET_SYSCALL_RETURN(r, res) (UPT_AX(r) = (res))
34
35static inline long regs_return_value(struct uml_pt_regs *regs)
36{
37 return UPT_AX(regs);
38}
39#endif /* __UM_X86_PTRACE_H */
diff --git a/arch/x86/um/asm/ptrace_32.h b/arch/x86/um/asm/ptrace_32.h
index 5d2a59112537..2cf225351b65 100644
--- a/arch/x86/um/asm/ptrace_32.h
+++ b/arch/x86/um/asm/ptrace_32.h
@@ -11,29 +11,6 @@
11#include "linux/compiler.h" 11#include "linux/compiler.h"
12#include "asm/ptrace-generic.h" 12#include "asm/ptrace-generic.h"
13 13
14#define PT_REGS_EAX(r) UPT_EAX(&(r)->regs)
15#define PT_REGS_EBX(r) UPT_EBX(&(r)->regs)
16#define PT_REGS_ECX(r) UPT_ECX(&(r)->regs)
17#define PT_REGS_EDX(r) UPT_EDX(&(r)->regs)
18#define PT_REGS_ESI(r) UPT_ESI(&(r)->regs)
19#define PT_REGS_EDI(r) UPT_EDI(&(r)->regs)
20#define PT_REGS_EBP(r) UPT_EBP(&(r)->regs)
21
22#define PT_REGS_CS(r) UPT_CS(&(r)->regs)
23#define PT_REGS_SS(r) UPT_SS(&(r)->regs)
24#define PT_REGS_DS(r) UPT_DS(&(r)->regs)
25#define PT_REGS_ES(r) UPT_ES(&(r)->regs)
26#define PT_REGS_FS(r) UPT_FS(&(r)->regs)
27#define PT_REGS_GS(r) UPT_GS(&(r)->regs)
28
29#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
30
31#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_EAX(r)
32#define PT_REGS_SYSCALL_RET(r) PT_REGS_EAX(r)
33#define PT_FIX_EXEC_STACK(sp) do ; while(0)
34
35#define profile_pc(regs) PT_REGS_IP(regs)
36
37#define user_mode(r) UPT_IS_USER(&(r)->regs) 14#define user_mode(r) UPT_IS_USER(&(r)->regs)
38 15
39/* 16/*
diff --git a/arch/x86/um/asm/ptrace_64.h b/arch/x86/um/asm/ptrace_64.h
index 706a0d80545c..ea7bff394320 100644
--- a/arch/x86/um/asm/ptrace_64.h
+++ b/arch/x86/um/asm/ptrace_64.h
@@ -15,13 +15,6 @@
15 15
16#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64 16#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
17 17
18#define PT_REGS_RBX(r) UPT_RBX(&(r)->regs)
19#define PT_REGS_RCX(r) UPT_RCX(&(r)->regs)
20#define PT_REGS_RDX(r) UPT_RDX(&(r)->regs)
21#define PT_REGS_RSI(r) UPT_RSI(&(r)->regs)
22#define PT_REGS_RDI(r) UPT_RDI(&(r)->regs)
23#define PT_REGS_RBP(r) UPT_RBP(&(r)->regs)
24#define PT_REGS_RAX(r) UPT_RAX(&(r)->regs)
25#define PT_REGS_R8(r) UPT_R8(&(r)->regs) 18#define PT_REGS_R8(r) UPT_R8(&(r)->regs)
26#define PT_REGS_R9(r) UPT_R9(&(r)->regs) 19#define PT_REGS_R9(r) UPT_R9(&(r)->regs)
27#define PT_REGS_R10(r) UPT_R10(&(r)->regs) 20#define PT_REGS_R10(r) UPT_R10(&(r)->regs)
@@ -31,27 +24,8 @@
31#define PT_REGS_R14(r) UPT_R14(&(r)->regs) 24#define PT_REGS_R14(r) UPT_R14(&(r)->regs)
32#define PT_REGS_R15(r) UPT_R15(&(r)->regs) 25#define PT_REGS_R15(r) UPT_R15(&(r)->regs)
33 26
34#define PT_REGS_FS(r) UPT_FS(&(r)->regs)
35#define PT_REGS_GS(r) UPT_GS(&(r)->regs)
36#define PT_REGS_DS(r) UPT_DS(&(r)->regs)
37#define PT_REGS_ES(r) UPT_ES(&(r)->regs)
38#define PT_REGS_SS(r) UPT_SS(&(r)->regs)
39#define PT_REGS_CS(r) UPT_CS(&(r)->regs)
40
41#define PT_REGS_ORIG_RAX(r) UPT_ORIG_RAX(&(r)->regs)
42#define PT_REGS_RIP(r) UPT_IP(&(r)->regs)
43#define PT_REGS_SP(r) UPT_SP(&(r)->regs)
44
45#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
46
47/* XXX */ 27/* XXX */
48#define user_mode(r) UPT_IS_USER(&(r)->regs) 28#define user_mode(r) UPT_IS_USER(&(r)->regs)
49#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_RAX(r)
50#define PT_REGS_SYSCALL_RET(r) PT_REGS_RAX(r)
51
52#define PT_FIX_EXEC_STACK(sp) do ; while(0)
53
54#define profile_pc(regs) PT_REGS_IP(regs)
55 29
56struct user_desc; 30struct user_desc;
57 31
diff --git a/arch/x86/um/checksum_32.S b/arch/x86/um/checksum_32.S
index f058d2f82e18..8d0c420465cc 100644
--- a/arch/x86/um/checksum_32.S
+++ b/arch/x86/um/checksum_32.S
@@ -26,6 +26,7 @@
26 */ 26 */
27 27
28#include <asm/errno.h> 28#include <asm/errno.h>
29#include <asm/asm.h>
29 30
30/* 31/*
31 * computes a partial checksum, e.g. for TCP/UDP fragments 32 * computes a partial checksum, e.g. for TCP/UDP fragments
@@ -232,15 +233,11 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
232 233
233#define SRC(y...) \ 234#define SRC(y...) \
234 9999: y; \ 235 9999: y; \
235 .section __ex_table, "a"; \ 236 _ASM_EXTABLE(9999b, 6001f)
236 .long 9999b, 6001f ; \
237 .previous
238 237
239#define DST(y...) \ 238#define DST(y...) \
240 9999: y; \ 239 9999: y; \
241 .section __ex_table, "a"; \ 240 _ASM_EXTABLE(9999b, 6002f)
242 .long 9999b, 6002f ; \
243 .previous
244 241
245.align 4 242.align 4
246 243
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
index 2bbe1ec2d96a..6ce2d76eb908 100644
--- a/arch/x86/um/shared/sysdep/ptrace.h
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -1,15 +1,74 @@
1#ifndef __SYSDEP_X86_PTRACE_H 1#ifndef __SYSDEP_X86_PTRACE_H
2#define __SYSDEP_X86_PTRACE_H 2#define __SYSDEP_X86_PTRACE_H
3 3
4#include <generated/user_constants.h>
5#include "sysdep/faultinfo.h"
6
7#define MAX_REG_OFFSET (UM_FRAME_SIZE)
8#define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long))
9
10#define REGS_IP(r) ((r)[HOST_IP])
11#define REGS_SP(r) ((r)[HOST_SP])
12#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
13#define REGS_AX(r) ((r)[HOST_AX])
14#define REGS_BX(r) ((r)[HOST_BX])
15#define REGS_CX(r) ((r)[HOST_CX])
16#define REGS_DX(r) ((r)[HOST_DX])
17#define REGS_SI(r) ((r)[HOST_SI])
18#define REGS_DI(r) ((r)[HOST_DI])
19#define REGS_BP(r) ((r)[HOST_BP])
20#define REGS_CS(r) ((r)[HOST_CS])
21#define REGS_SS(r) ((r)[HOST_SS])
22#define REGS_DS(r) ((r)[HOST_DS])
23#define REGS_ES(r) ((r)[HOST_ES])
24
25#define UPT_IP(r) REGS_IP((r)->gp)
26#define UPT_SP(r) REGS_SP((r)->gp)
27#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
28#define UPT_AX(r) REGS_AX((r)->gp)
29#define UPT_BX(r) REGS_BX((r)->gp)
30#define UPT_CX(r) REGS_CX((r)->gp)
31#define UPT_DX(r) REGS_DX((r)->gp)
32#define UPT_SI(r) REGS_SI((r)->gp)
33#define UPT_DI(r) REGS_DI((r)->gp)
34#define UPT_BP(r) REGS_BP((r)->gp)
35#define UPT_CS(r) REGS_CS((r)->gp)
36#define UPT_SS(r) REGS_SS((r)->gp)
37#define UPT_DS(r) REGS_DS((r)->gp)
38#define UPT_ES(r) REGS_ES((r)->gp)
39
4#ifdef __i386__ 40#ifdef __i386__
5#include "ptrace_32.h" 41#include "ptrace_32.h"
6#else 42#else
7#include "ptrace_64.h" 43#include "ptrace_64.h"
8#endif 44#endif
9 45
10static inline long regs_return_value(struct uml_pt_regs *regs) 46struct syscall_args {
11{ 47 unsigned long args[6];
12 return UPT_SYSCALL_RET(regs); 48};
13} 49
50#define SYSCALL_ARGS(r) ((struct syscall_args) \
51 { .args = { UPT_SYSCALL_ARG1(r), \
52 UPT_SYSCALL_ARG2(r), \
53 UPT_SYSCALL_ARG3(r), \
54 UPT_SYSCALL_ARG4(r), \
55 UPT_SYSCALL_ARG5(r), \
56 UPT_SYSCALL_ARG6(r) } } )
57
58struct uml_pt_regs {
59 unsigned long gp[MAX_REG_NR];
60 unsigned long fp[MAX_FP_NR];
61 struct faultinfo faultinfo;
62 long syscall;
63 int is_user;
64};
65
66#define EMPTY_UML_PT_REGS { }
67
68#define UPT_SYSCALL_NR(r) ((r)->syscall)
69#define UPT_FAULTINFO(r) (&(r)->faultinfo)
70#define UPT_IS_USER(r) ((r)->is_user)
71
72extern int user_context(unsigned long sp);
14 73
15#endif /* __SYSDEP_X86_PTRACE_H */ 74#endif /* __SYSDEP_X86_PTRACE_H */
diff --git a/arch/x86/um/shared/sysdep/ptrace_32.h b/arch/x86/um/shared/sysdep/ptrace_32.h
index befd1df32ed0..b94a108de1dc 100644
--- a/arch/x86/um/shared/sysdep/ptrace_32.h
+++ b/arch/x86/um/shared/sysdep/ptrace_32.h
@@ -6,11 +6,7 @@
6#ifndef __SYSDEP_I386_PTRACE_H 6#ifndef __SYSDEP_I386_PTRACE_H
7#define __SYSDEP_I386_PTRACE_H 7#define __SYSDEP_I386_PTRACE_H
8 8
9#include <generated/user_constants.h> 9#define MAX_FP_NR HOST_FPX_SIZE
10#include "sysdep/faultinfo.h"
11
12#define MAX_REG_NR (UM_FRAME_SIZE / sizeof(unsigned long))
13#define MAX_REG_OFFSET (UM_FRAME_SIZE)
14 10
15static inline void update_debugregs(int seq) {} 11static inline void update_debugregs(int seq) {}
16 12
@@ -24,90 +20,16 @@ void set_using_sysemu(int value);
24int get_using_sysemu(void); 20int get_using_sysemu(void);
25extern int sysemu_supported; 21extern int sysemu_supported;
26 22
27#define REGS_IP(r) ((r)[HOST_IP])
28#define REGS_SP(r) ((r)[HOST_SP])
29#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
30#define REGS_EAX(r) ((r)[HOST_AX])
31#define REGS_EBX(r) ((r)[HOST_BX])
32#define REGS_ECX(r) ((r)[HOST_CX])
33#define REGS_EDX(r) ((r)[HOST_DX])
34#define REGS_ESI(r) ((r)[HOST_SI])
35#define REGS_EDI(r) ((r)[HOST_DI])
36#define REGS_EBP(r) ((r)[HOST_BP])
37#define REGS_CS(r) ((r)[HOST_CS])
38#define REGS_SS(r) ((r)[HOST_SS])
39#define REGS_DS(r) ((r)[HOST_DS])
40#define REGS_ES(r) ((r)[HOST_ES])
41#define REGS_FS(r) ((r)[HOST_FS])
42#define REGS_GS(r) ((r)[HOST_GS])
43
44#define REGS_SET_SYSCALL_RETURN(r, res) REGS_EAX(r) = (res)
45
46#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
47#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
48
49#ifndef PTRACE_SYSEMU_SINGLESTEP 23#ifndef PTRACE_SYSEMU_SINGLESTEP
50#define PTRACE_SYSEMU_SINGLESTEP 32 24#define PTRACE_SYSEMU_SINGLESTEP 32
51#endif 25#endif
52 26
53struct uml_pt_regs { 27#define UPT_SYSCALL_ARG1(r) UPT_BX(r)
54 unsigned long gp[MAX_REG_NR]; 28#define UPT_SYSCALL_ARG2(r) UPT_CX(r)
55 unsigned long fp[HOST_FPX_SIZE]; 29#define UPT_SYSCALL_ARG3(r) UPT_DX(r)
56 struct faultinfo faultinfo; 30#define UPT_SYSCALL_ARG4(r) UPT_SI(r)
57 long syscall; 31#define UPT_SYSCALL_ARG5(r) UPT_DI(r)
58 int is_user; 32#define UPT_SYSCALL_ARG6(r) UPT_BP(r)
59};
60
61#define EMPTY_UML_PT_REGS { }
62
63#define UPT_IP(r) REGS_IP((r)->gp)
64#define UPT_SP(r) REGS_SP((r)->gp)
65#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
66#define UPT_EAX(r) REGS_EAX((r)->gp)
67#define UPT_EBX(r) REGS_EBX((r)->gp)
68#define UPT_ECX(r) REGS_ECX((r)->gp)
69#define UPT_EDX(r) REGS_EDX((r)->gp)
70#define UPT_ESI(r) REGS_ESI((r)->gp)
71#define UPT_EDI(r) REGS_EDI((r)->gp)
72#define UPT_EBP(r) REGS_EBP((r)->gp)
73#define UPT_ORIG_EAX(r) ((r)->syscall)
74#define UPT_CS(r) REGS_CS((r)->gp)
75#define UPT_SS(r) REGS_SS((r)->gp)
76#define UPT_DS(r) REGS_DS((r)->gp)
77#define UPT_ES(r) REGS_ES((r)->gp)
78#define UPT_FS(r) REGS_FS((r)->gp)
79#define UPT_GS(r) REGS_GS((r)->gp)
80
81#define UPT_SYSCALL_ARG1(r) UPT_EBX(r)
82#define UPT_SYSCALL_ARG2(r) UPT_ECX(r)
83#define UPT_SYSCALL_ARG3(r) UPT_EDX(r)
84#define UPT_SYSCALL_ARG4(r) UPT_ESI(r)
85#define UPT_SYSCALL_ARG5(r) UPT_EDI(r)
86#define UPT_SYSCALL_ARG6(r) UPT_EBP(r)
87
88extern int user_context(unsigned long sp);
89
90#define UPT_IS_USER(r) ((r)->is_user)
91
92struct syscall_args {
93 unsigned long args[6];
94};
95
96#define SYSCALL_ARGS(r) ((struct syscall_args) \
97 { .args = { UPT_SYSCALL_ARG1(r), \
98 UPT_SYSCALL_ARG2(r), \
99 UPT_SYSCALL_ARG3(r), \
100 UPT_SYSCALL_ARG4(r), \
101 UPT_SYSCALL_ARG5(r), \
102 UPT_SYSCALL_ARG6(r) } } )
103
104#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
105
106#define UPT_ORIG_SYSCALL(r) UPT_EAX(r)
107#define UPT_SYSCALL_NR(r) UPT_ORIG_EAX(r)
108#define UPT_SYSCALL_RET(r) UPT_EAX(r)
109
110#define UPT_FAULTINFO(r) (&(r)->faultinfo)
111 33
112extern void arch_init_registers(int pid); 34extern void arch_init_registers(int pid);
113 35
diff --git a/arch/x86/um/shared/sysdep/ptrace_64.h b/arch/x86/um/shared/sysdep/ptrace_64.h
index 031edc53ac57..919789f1071e 100644
--- a/arch/x86/um/shared/sysdep/ptrace_64.h
+++ b/arch/x86/um/shared/sysdep/ptrace_64.h
@@ -8,22 +8,8 @@
8#ifndef __SYSDEP_X86_64_PTRACE_H 8#ifndef __SYSDEP_X86_64_PTRACE_H
9#define __SYSDEP_X86_64_PTRACE_H 9#define __SYSDEP_X86_64_PTRACE_H
10 10
11#include <generated/user_constants.h> 11#define MAX_FP_NR HOST_FP_SIZE
12#include "sysdep/faultinfo.h"
13 12
14#define MAX_REG_OFFSET (UM_FRAME_SIZE)
15#define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long))
16
17#define REGS_IP(r) ((r)[HOST_IP])
18#define REGS_SP(r) ((r)[HOST_SP])
19
20#define REGS_RBX(r) ((r)[HOST_BX])
21#define REGS_RCX(r) ((r)[HOST_CX])
22#define REGS_RDX(r) ((r)[HOST_DX])
23#define REGS_RSI(r) ((r)[HOST_SI])
24#define REGS_RDI(r) ((r)[HOST_DI])
25#define REGS_RBP(r) ((r)[HOST_BP])
26#define REGS_RAX(r) ((r)[HOST_AX])
27#define REGS_R8(r) ((r)[HOST_R8]) 13#define REGS_R8(r) ((r)[HOST_R8])
28#define REGS_R9(r) ((r)[HOST_R9]) 14#define REGS_R9(r) ((r)[HOST_R9])
29#define REGS_R10(r) ((r)[HOST_R10]) 15#define REGS_R10(r) ((r)[HOST_R10])
@@ -32,9 +18,6 @@
32#define REGS_R13(r) ((r)[HOST_R13]) 18#define REGS_R13(r) ((r)[HOST_R13])
33#define REGS_R14(r) ((r)[HOST_R14]) 19#define REGS_R14(r) ((r)[HOST_R14])
34#define REGS_R15(r) ((r)[HOST_R15]) 20#define REGS_R15(r) ((r)[HOST_R15])
35#define REGS_CS(r) ((r)[HOST_CS])
36#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
37#define REGS_SS(r) ((r)[HOST_SS])
38 21
39#define HOST_FS_BASE 21 22#define HOST_FS_BASE 21
40#define HOST_GS_BASE 22 23#define HOST_GS_BASE 22
@@ -58,45 +41,6 @@
58#define GS (HOST_GS * sizeof(long)) 41#define GS (HOST_GS * sizeof(long))
59#endif 42#endif
60 43
61#define REGS_FS_BASE(r) ((r)[HOST_FS_BASE])
62#define REGS_GS_BASE(r) ((r)[HOST_GS_BASE])
63#define REGS_DS(r) ((r)[HOST_DS])
64#define REGS_ES(r) ((r)[HOST_ES])
65#define REGS_FS(r) ((r)[HOST_FS])
66#define REGS_GS(r) ((r)[HOST_GS])
67
68#define REGS_ORIG_RAX(r) ((r)[HOST_ORIG_AX])
69
70#define REGS_SET_SYSCALL_RETURN(r, res) REGS_RAX(r) = (res)
71
72#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
73#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
74
75#define REGS_FAULT_ADDR(r) ((r)->fault_addr)
76
77#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type)
78
79#define REGS_TRAP(r) ((r)->trap_type)
80
81#define REGS_ERR(r) ((r)->fault_type)
82
83struct uml_pt_regs {
84 unsigned long gp[MAX_REG_NR];
85 unsigned long fp[HOST_FP_SIZE];
86 struct faultinfo faultinfo;
87 long syscall;
88 int is_user;
89};
90
91#define EMPTY_UML_PT_REGS { }
92
93#define UPT_RBX(r) REGS_RBX((r)->gp)
94#define UPT_RCX(r) REGS_RCX((r)->gp)
95#define UPT_RDX(r) REGS_RDX((r)->gp)
96#define UPT_RSI(r) REGS_RSI((r)->gp)
97#define UPT_RDI(r) REGS_RDI((r)->gp)
98#define UPT_RBP(r) REGS_RBP((r)->gp)
99#define UPT_RAX(r) REGS_RAX((r)->gp)
100#define UPT_R8(r) REGS_R8((r)->gp) 44#define UPT_R8(r) REGS_R8((r)->gp)
101#define UPT_R9(r) REGS_R9((r)->gp) 45#define UPT_R9(r) REGS_R9((r)->gp)
102#define UPT_R10(r) REGS_R10((r)->gp) 46#define UPT_R10(r) REGS_R10((r)->gp)
@@ -105,51 +49,14 @@ struct uml_pt_regs {
105#define UPT_R13(r) REGS_R13((r)->gp) 49#define UPT_R13(r) REGS_R13((r)->gp)
106#define UPT_R14(r) REGS_R14((r)->gp) 50#define UPT_R14(r) REGS_R14((r)->gp)
107#define UPT_R15(r) REGS_R15((r)->gp) 51#define UPT_R15(r) REGS_R15((r)->gp)
108#define UPT_CS(r) REGS_CS((r)->gp)
109#define UPT_FS_BASE(r) REGS_FS_BASE((r)->gp)
110#define UPT_FS(r) REGS_FS((r)->gp)
111#define UPT_GS_BASE(r) REGS_GS_BASE((r)->gp)
112#define UPT_GS(r) REGS_GS((r)->gp)
113#define UPT_DS(r) REGS_DS((r)->gp)
114#define UPT_ES(r) REGS_ES((r)->gp)
115#define UPT_CS(r) REGS_CS((r)->gp)
116#define UPT_SS(r) REGS_SS((r)->gp)
117#define UPT_ORIG_RAX(r) REGS_ORIG_RAX((r)->gp)
118
119#define UPT_IP(r) REGS_IP((r)->gp)
120#define UPT_SP(r) REGS_SP((r)->gp)
121
122#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
123#define UPT_SYSCALL_NR(r) ((r)->syscall)
124#define UPT_SYSCALL_RET(r) UPT_RAX(r)
125
126extern int user_context(unsigned long sp);
127 52
128#define UPT_IS_USER(r) ((r)->is_user) 53#define UPT_SYSCALL_ARG1(r) UPT_DI(r)
129 54#define UPT_SYSCALL_ARG2(r) UPT_SI(r)
130#define UPT_SYSCALL_ARG1(r) UPT_RDI(r) 55#define UPT_SYSCALL_ARG3(r) UPT_DX(r)
131#define UPT_SYSCALL_ARG2(r) UPT_RSI(r)
132#define UPT_SYSCALL_ARG3(r) UPT_RDX(r)
133#define UPT_SYSCALL_ARG4(r) UPT_R10(r) 56#define UPT_SYSCALL_ARG4(r) UPT_R10(r)
134#define UPT_SYSCALL_ARG5(r) UPT_R8(r) 57#define UPT_SYSCALL_ARG5(r) UPT_R8(r)
135#define UPT_SYSCALL_ARG6(r) UPT_R9(r) 58#define UPT_SYSCALL_ARG6(r) UPT_R9(r)
136 59
137struct syscall_args {
138 unsigned long args[6];
139};
140
141#define SYSCALL_ARGS(r) ((struct syscall_args) \
142 { .args = { UPT_SYSCALL_ARG1(r), \
143 UPT_SYSCALL_ARG2(r), \
144 UPT_SYSCALL_ARG3(r), \
145 UPT_SYSCALL_ARG4(r), \
146 UPT_SYSCALL_ARG5(r), \
147 UPT_SYSCALL_ARG6(r) } } )
148
149#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
150
151#define UPT_FAULTINFO(r) (&(r)->faultinfo)
152
153static inline void arch_init_registers(int pid) 60static inline void arch_init_registers(int pid)
154{ 61{
155} 62}
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 4883b9546016..bb0fb03b9f85 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -156,6 +156,9 @@ static int copy_sc_from_user(struct pt_regs *regs,
156 struct sigcontext sc; 156 struct sigcontext sc;
157 int err, pid; 157 int err, pid;
158 158
159 /* Always make any pending restarted system calls return -EINTR */
160 current_thread_info()->restart_block.fn = do_no_restart_syscall;
161
159 err = copy_from_user(&sc, from, sizeof(sc)); 162 err = copy_from_user(&sc, from, sizeof(sc));
160 if (err) 163 if (err)
161 return err; 164 return err;
@@ -410,9 +413,9 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
410 413
411 PT_REGS_SP(regs) = (unsigned long) frame; 414 PT_REGS_SP(regs) = (unsigned long) frame;
412 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; 415 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
413 PT_REGS_EAX(regs) = (unsigned long) sig; 416 PT_REGS_AX(regs) = (unsigned long) sig;
414 PT_REGS_EDX(regs) = (unsigned long) 0; 417 PT_REGS_DX(regs) = (unsigned long) 0;
415 PT_REGS_ECX(regs) = (unsigned long) 0; 418 PT_REGS_CX(regs) = (unsigned long) 0;
416 419
417 if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) 420 if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
418 ptrace_notify(SIGTRAP); 421 ptrace_notify(SIGTRAP);
@@ -460,9 +463,9 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
460 463
461 PT_REGS_SP(regs) = (unsigned long) frame; 464 PT_REGS_SP(regs) = (unsigned long) frame;
462 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; 465 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
463 PT_REGS_EAX(regs) = (unsigned long) sig; 466 PT_REGS_AX(regs) = (unsigned long) sig;
464 PT_REGS_EDX(regs) = (unsigned long) &frame->info; 467 PT_REGS_DX(regs) = (unsigned long) &frame->info;
465 PT_REGS_ECX(regs) = (unsigned long) &frame->uc; 468 PT_REGS_CX(regs) = (unsigned long) &frame->uc;
466 469
467 if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) 470 if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
468 ptrace_notify(SIGTRAP); 471 ptrace_notify(SIGTRAP);
@@ -541,8 +544,8 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
541 set->sig[0]); 544 set->sig[0]);
542 err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate); 545 err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);
543 if (sizeof(*set) == 16) { 546 if (sizeof(*set) == 16) {
544 __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); 547 err |= __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
545 __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); 548 err |= __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
546 } 549 }
547 else 550 else
548 err |= __copy_to_user(&frame->uc.uc_sigmask, set, 551 err |= __copy_to_user(&frame->uc.uc_sigmask, set,
@@ -570,17 +573,17 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
570 } 573 }
571 574
572 PT_REGS_SP(regs) = (unsigned long) frame; 575 PT_REGS_SP(regs) = (unsigned long) frame;
573 PT_REGS_RDI(regs) = sig; 576 PT_REGS_DI(regs) = sig;
574 /* In case the signal handler was declared without prototypes */ 577 /* In case the signal handler was declared without prototypes */
575 PT_REGS_RAX(regs) = 0; 578 PT_REGS_AX(regs) = 0;
576 579
577 /* 580 /*
578 * This also works for non SA_SIGINFO handlers because they expect the 581 * This also works for non SA_SIGINFO handlers because they expect the
579 * next argument after the signal number on the stack. 582 * next argument after the signal number on the stack.
580 */ 583 */
581 PT_REGS_RSI(regs) = (unsigned long) &frame->info; 584 PT_REGS_SI(regs) = (unsigned long) &frame->info;
582 PT_REGS_RDX(regs) = (unsigned long) &frame->uc; 585 PT_REGS_DX(regs) = (unsigned long) &frame->uc;
583 PT_REGS_RIP(regs) = (unsigned long) ka->sa.sa_handler; 586 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
584 out: 587 out:
585 return err; 588 return err;
586} 589}
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 9924776f4265..170bd926a69c 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -31,7 +31,6 @@
31#define stub_fork sys_fork 31#define stub_fork sys_fork
32#define stub_vfork sys_vfork 32#define stub_vfork sys_vfork
33#define stub_execve sys_execve 33#define stub_execve sys_execve
34#define stub_rt_sigsuspend sys_rt_sigsuspend
35#define stub_sigaltstack sys_sigaltstack 34#define stub_sigaltstack sys_sigaltstack
36#define stub_rt_sigreturn sys_rt_sigreturn 35#define stub_rt_sigreturn sys_rt_sigreturn
37 36
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
index 70ca357393b8..b853e8600b9d 100644
--- a/arch/x86/um/syscalls_32.c
+++ b/arch/x86/um/syscalls_32.c
@@ -44,10 +44,10 @@ long sys_sigaction(int sig, const struct old_sigaction __user *act,
44 old_sigset_t mask; 44 old_sigset_t mask;
45 if (!access_ok(VERIFY_READ, act, sizeof(*act)) || 45 if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
46 __get_user(new_ka.sa.sa_handler, &act->sa_handler) || 46 __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
47 __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) 47 __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) ||
48 __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
49 __get_user(mask, &act->sa_mask))
48 return -EFAULT; 50 return -EFAULT;
49 __get_user(new_ka.sa.sa_flags, &act->sa_flags);
50 __get_user(mask, &act->sa_mask);
51 siginitset(&new_ka.sa.sa_mask, mask); 51 siginitset(&new_ka.sa.sa_mask, mask);
52 } 52 }
53 53
@@ -56,10 +56,10 @@ long sys_sigaction(int sig, const struct old_sigaction __user *act,
56 if (!ret && oact) { 56 if (!ret && oact) {
57 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || 57 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
58 __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || 58 __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
59 __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) 59 __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) ||
60 __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
61 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
60 return -EFAULT; 62 return -EFAULT;
61 __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
62 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
63 } 63 }
64 64
65 return ret; 65 return ret;
diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c
index 171b3e9dc867..2d5cc51e9bef 100644
--- a/arch/x86/um/sysrq_32.c
+++ b/arch/x86/um/sysrq_32.c
@@ -23,12 +23,10 @@ void show_regs(struct pt_regs *regs)
23 printk(" EFLAGS: %08lx\n %s\n", PT_REGS_EFLAGS(regs), 23 printk(" EFLAGS: %08lx\n %s\n", PT_REGS_EFLAGS(regs),
24 print_tainted()); 24 print_tainted());
25 printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", 25 printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
26 PT_REGS_EAX(regs), PT_REGS_EBX(regs), 26 PT_REGS_AX(regs), PT_REGS_BX(regs),
27 PT_REGS_ECX(regs), 27 PT_REGS_CX(regs), PT_REGS_DX(regs));
28 PT_REGS_EDX(regs));
29 printk("ESI: %08lx EDI: %08lx EBP: %08lx", 28 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
30 PT_REGS_ESI(regs), PT_REGS_EDI(regs), 29 PT_REGS_SI(regs), PT_REGS_DI(regs), PT_REGS_BP(regs));
31 PT_REGS_EBP(regs));
32 printk(" DS: %04lx ES: %04lx\n", 30 printk(" DS: %04lx ES: %04lx\n",
33 0xffff & PT_REGS_DS(regs), 31 0xffff & PT_REGS_DS(regs),
34 0xffff & PT_REGS_ES(regs)); 32 0xffff & PT_REGS_ES(regs));
diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c
index e8913436d7dc..08258f179969 100644
--- a/arch/x86/um/sysrq_64.c
+++ b/arch/x86/um/sysrq_64.c
@@ -19,15 +19,15 @@ void __show_regs(struct pt_regs *regs)
19 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current), 19 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current),
20 current->comm, print_tainted(), init_utsname()->release); 20 current->comm, print_tainted(), init_utsname()->release);
21 printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff, 21 printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff,
22 PT_REGS_RIP(regs)); 22 PT_REGS_IP(regs));
23 printk(KERN_INFO "RSP: %016lx EFLAGS: %08lx\n", PT_REGS_SP(regs), 23 printk(KERN_INFO "RSP: %016lx EFLAGS: %08lx\n", PT_REGS_SP(regs),
24 PT_REGS_EFLAGS(regs)); 24 PT_REGS_EFLAGS(regs));
25 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", 25 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
26 PT_REGS_RAX(regs), PT_REGS_RBX(regs), PT_REGS_RCX(regs)); 26 PT_REGS_AX(regs), PT_REGS_BX(regs), PT_REGS_CX(regs));
27 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", 27 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
28 PT_REGS_RDX(regs), PT_REGS_RSI(regs), PT_REGS_RDI(regs)); 28 PT_REGS_DX(regs), PT_REGS_SI(regs), PT_REGS_DI(regs));
29 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", 29 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
30 PT_REGS_RBP(regs), PT_REGS_R8(regs), PT_REGS_R9(regs)); 30 PT_REGS_BP(regs), PT_REGS_R8(regs), PT_REGS_R9(regs));
31 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", 31 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
32 PT_REGS_R10(regs), PT_REGS_R11(regs), PT_REGS_R12(regs)); 32 PT_REGS_R10(regs), PT_REGS_R11(regs), PT_REGS_R12(regs));
33 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", 33 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c
index c6c7131e563b..baba84f8ecb8 100644
--- a/arch/x86/um/tls_32.c
+++ b/arch/x86/um/tls_32.c
@@ -219,7 +219,7 @@ int arch_copy_tls(struct task_struct *new)
219 int idx, ret = -EFAULT; 219 int idx, ret = -EFAULT;
220 220
221 if (copy_from_user(&info, 221 if (copy_from_user(&info,
222 (void __user *) UPT_ESI(&new->thread.regs.regs), 222 (void __user *) UPT_SI(&new->thread.regs.regs),
223 sizeof(info))) 223 sizeof(info)))
224 goto out; 224 goto out;
225 225
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index add2c2d729ce..96ab2c09cb68 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -20,5 +20,5 @@ obj-$(CONFIG_EVENT_TRACING) += trace.o
20obj-$(CONFIG_SMP) += smp.o 20obj-$(CONFIG_SMP) += smp.o
21obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o 21obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
22obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o 22obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
23obj-$(CONFIG_XEN_DOM0) += vga.o 23obj-$(CONFIG_XEN_DOM0) += apic.o vga.o
24obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o 24obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
new file mode 100644
index 000000000000..ec57bd3818a4
--- /dev/null
+++ b/arch/x86/xen/apic.c
@@ -0,0 +1,33 @@
1#include <linux/init.h>
2
3#include <asm/x86_init.h>
4#include <asm/apic.h>
5#include <asm/xen/hypercall.h>
6
7#include <xen/xen.h>
8#include <xen/interface/physdev.h>
9
10unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
11{
12 struct physdev_apic apic_op;
13 int ret;
14
15 apic_op.apic_physbase = mpc_ioapic_addr(apic);
16 apic_op.reg = reg;
17 ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
18 if (!ret)
19 return apic_op.value;
20
21 /* fallback to return an emulated IO_APIC values */
22 if (reg == 0x1)
23 return 0x00170020;
24 else if (reg == 0x0)
25 return apic << 24;
26
27 return 0xfd;
28}
29
30void __init xen_init_apic(void)
31{
32 x86_io_apic_ops.read = xen_io_apic_read;
33}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 95dccce8e979..c0f5facdb10c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1396,6 +1396,8 @@ asmlinkage void __init xen_start_kernel(void)
1396 xen_start_info->console.domU.mfn = 0; 1396 xen_start_info->console.domU.mfn = 0;
1397 xen_start_info->console.domU.evtchn = 0; 1397 xen_start_info->console.domU.evtchn = 0;
1398 1398
1399 xen_init_apic();
1400
1399 /* Make sure ACS will be enabled */ 1401 /* Make sure ACS will be enabled */
1400 pci_request_acs(); 1402 pci_request_acs();
1401 } 1403 }
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 69f5857660ac..3506cd4f9a43 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1864,7 +1864,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1864#endif /* CONFIG_X86_64 */ 1864#endif /* CONFIG_X86_64 */
1865 1865
1866static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; 1866static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
1867static unsigned char fake_ioapic_mapping[PAGE_SIZE] __page_aligned_bss;
1868 1867
1869static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) 1868static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1870{ 1869{
@@ -1905,7 +1904,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1905 * We just don't map the IO APIC - all access is via 1904 * We just don't map the IO APIC - all access is via
1906 * hypercalls. Keep the address in the pte for reference. 1905 * hypercalls. Keep the address in the pte for reference.
1907 */ 1906 */
1908 pte = pfn_pte(PFN_DOWN(__pa(fake_ioapic_mapping)), PAGE_KERNEL); 1907 pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
1909 break; 1908 break;
1910#endif 1909#endif
1911 1910
@@ -2070,7 +2069,6 @@ void __init xen_init_mmu_ops(void)
2070 pv_mmu_ops = xen_mmu_ops; 2069 pv_mmu_ops = xen_mmu_ops;
2071 2070
2072 memset(dummy_mapping, 0xff, PAGE_SIZE); 2071 memset(dummy_mapping, 0xff, PAGE_SIZE);
2073 memset(fake_ioapic_mapping, 0xfd, PAGE_SIZE);
2074} 2072}
2075 2073
2076/* Protected by xen_reservation_lock. */ 2074/* Protected by xen_reservation_lock. */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 0503c0c493a9..3700945ed0d5 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -265,18 +265,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
265 set_cpu_possible(cpu, false); 265 set_cpu_possible(cpu, false);
266 } 266 }
267 267
268 for_each_possible_cpu (cpu) { 268 for_each_possible_cpu(cpu)
269 struct task_struct *idle;
270
271 if (cpu == 0)
272 continue;
273
274 idle = fork_idle(cpu);
275 if (IS_ERR(idle))
276 panic("failed fork for CPU %d", cpu);
277
278 set_cpu_present(cpu, true); 269 set_cpu_present(cpu, true);
279 }
280} 270}
281 271
282static int __cpuinit 272static int __cpuinit
@@ -346,9 +336,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
346 return 0; 336 return 0;
347} 337}
348 338
349static int __cpuinit xen_cpu_up(unsigned int cpu) 339static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle)
350{ 340{
351 struct task_struct *idle = idle_task(cpu);
352 int rc; 341 int rc;
353 342
354 per_cpu(current_task, cpu) = idle; 343 per_cpu(current_task, cpu) = idle;
@@ -562,10 +551,10 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
562 xen_init_lock_cpu(0); 551 xen_init_lock_cpu(0);
563} 552}
564 553
565static int __cpuinit xen_hvm_cpu_up(unsigned int cpu) 554static int __cpuinit xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle)
566{ 555{
567 int rc; 556 int rc;
568 rc = native_cpu_up(cpu); 557 rc = native_cpu_up(cpu, tidle);
569 WARN_ON (xen_smp_intr_init(cpu)); 558 WARN_ON (xen_smp_intr_init(cpu));
570 return rc; 559 return rc;
571} 560}
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index b040b0e518ca..f9643fc50de5 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -14,6 +14,7 @@
14#include <asm/thread_info.h> 14#include <asm/thread_info.h>
15#include <asm/processor-flags.h> 15#include <asm/processor-flags.h>
16#include <asm/segment.h> 16#include <asm/segment.h>
17#include <asm/asm.h>
17 18
18#include <xen/interface/xen.h> 19#include <xen/interface/xen.h>
19 20
@@ -137,10 +138,7 @@ iret_restore_end:
137 138
1381: iret 1391: iret
139xen_iret_end_crit: 140xen_iret_end_crit:
140.section __ex_table, "a" 141 _ASM_EXTABLE(1b, iret_exc)
141 .align 4
142 .long 1b, iret_exc
143.previous
144 142
145hyper_iret: 143hyper_iret:
146 /* put this out of line since its very rarely used */ 144 /* put this out of line since its very rarely used */
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index b095739ccd4c..45c0c0667bd9 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -92,11 +92,15 @@ struct dom0_vga_console_info;
92 92
93#ifdef CONFIG_XEN_DOM0 93#ifdef CONFIG_XEN_DOM0
94void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); 94void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
95void __init xen_init_apic(void);
95#else 96#else
96static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, 97static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
97 size_t size) 98 size_t size)
98{ 99{
99} 100}
101static inline void __init xen_init_apic(void)
102{
103}
100#endif 104#endif
101 105
102/* Declare an asm function, along with symbols needed to make it 106/* Declare an asm function, along with symbols needed to make it