aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig15
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/Makefile.um3
-rw-r--r--arch/x86/boot/compressed/Makefile9
-rw-r--r--arch/x86/boot/compressed/eboot.c14
-rw-r--r--arch/x86/boot/compressed/head_32.S14
-rw-r--r--arch/x86/boot/compressed/head_64.S22
-rw-r--r--arch/x86/boot/header.S26
-rw-r--r--arch/x86/boot/tools/build.c39
-rw-r--r--arch/x86/ia32/ia32_aout.c35
-rw-r--r--arch/x86/ia32/ia32_signal.c4
-rw-r--r--arch/x86/ia32/ia32entry.S9
-rw-r--r--arch/x86/ia32/sys_ia32.c23
-rw-r--r--arch/x86/include/asm/apic.h23
-rw-r--r--arch/x86/include/asm/apicdef.h2
-rw-r--r--arch/x86/include/asm/asm.h38
-rw-r--r--arch/x86/include/asm/atomic64_32.h10
-rw-r--r--arch/x86/include/asm/boot.h2
-rw-r--r--arch/x86/include/asm/cmpxchg.h4
-rw-r--r--arch/x86/include/asm/compat.h2
-rw-r--r--arch/x86/include/asm/current.h2
-rw-r--r--arch/x86/include/asm/desc.h1
-rw-r--r--arch/x86/include/asm/fpu-internal.h6
-rw-r--r--arch/x86/include/asm/ftrace.h3
-rw-r--r--arch/x86/include/asm/hardirq.h9
-rw-r--r--arch/x86/include/asm/ia32.h6
-rw-r--r--arch/x86/include/asm/io_apic.h35
-rw-r--r--arch/x86/include/asm/irq_regs.h4
-rw-r--r--arch/x86/include/asm/irq_remapping.h118
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h5
-rw-r--r--arch/x86/include/asm/kvm_para.h3
-rw-r--r--arch/x86/include/asm/mmu_context.h12
-rw-r--r--arch/x86/include/asm/mmzone_32.h6
-rw-r--r--arch/x86/include/asm/msr-index.h5
-rw-r--r--arch/x86/include/asm/msr.h9
-rw-r--r--arch/x86/include/asm/nmi.h22
-rw-r--r--arch/x86/include/asm/nops.h4
-rw-r--r--arch/x86/include/asm/page_32_types.h4
-rw-r--r--arch/x86/include/asm/page_64_types.h4
-rw-r--r--arch/x86/include/asm/paravirt.h6
-rw-r--r--arch/x86/include/asm/percpu.h24
-rw-r--r--arch/x86/include/asm/perf_event.h12
-rw-r--r--arch/x86/include/asm/posix_types.h6
-rw-r--r--arch/x86/include/asm/processor.h5
-rw-r--r--arch/x86/include/asm/segment.h4
-rw-r--r--arch/x86/include/asm/sigcontext.h2
-rw-r--r--arch/x86/include/asm/siginfo.h8
-rw-r--r--arch/x86/include/asm/smp.h15
-rw-r--r--arch/x86/include/asm/spinlock.h2
-rw-r--r--arch/x86/include/asm/stackprotector.h4
-rw-r--r--arch/x86/include/asm/stat.h21
-rw-r--r--arch/x86/include/asm/syscall.h27
-rw-r--r--arch/x86/include/asm/thread_info.h23
-rw-r--r--arch/x86/include/asm/tlbflush.h10
-rw-r--r--arch/x86/include/asm/topology.h38
-rw-r--r--arch/x86/include/asm/uaccess.h27
-rw-r--r--arch/x86/include/asm/uaccess_32.h5
-rw-r--r--arch/x86/include/asm/uaccess_64.h4
-rw-r--r--arch/x86/include/asm/unistd.h6
-rw-r--r--arch/x86/include/asm/word-at-a-time.h33
-rw-r--r--arch/x86/include/asm/x86_init.h10
-rw-r--r--arch/x86/include/asm/xsave.h10
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c2
-rw-r--r--arch/x86/kernel/acpi/sleep.c4
-rw-r--r--arch/x86/kernel/acpi/sleep.h4
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S4
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S4
-rw-r--r--arch/x86/kernel/apic/apic.c76
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_noop.c1
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c8
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c1
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c383
-rw-r--r--arch/x86/kernel/apic/numaq_32.c1
-rw-r--r--arch/x86/kernel/apic/probe_32.c1
-rw-r--r--arch/x86/kernel/apic/summit_32.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c7
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/check.c20
-rw-r--r--arch/x86/kernel/cpu/amd.c29
-rw-r--r--arch/x86/kernel/cpu/common.c11
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c12
-rw-r--r--arch/x86/kernel/cpu/match.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c55
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c65
-rw-r--r--arch/x86/kernel/cpu/perf_event.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c570
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c6
-rw-r--r--arch/x86/kernel/dumpstack.c23
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c2
-rw-r--r--arch/x86/kernel/entry_32.S47
-rw-r--r--arch/x86/kernel/entry_64.S16
-rw-r--r--arch/x86/kernel/ftrace.c500
-rw-r--r--arch/x86/kernel/head_32.S223
-rw-r--r--arch/x86/kernel/head_64.S80
-rw-r--r--arch/x86/kernel/i387.c3
-rw-r--r--arch/x86/kernel/init_task.c42
-rw-r--r--arch/x86/kernel/irq_32.c8
-rw-r--r--arch/x86/kernel/kprobes.c4
-rw-r--r--arch/x86/kernel/kvm.c9
-rw-r--r--arch/x86/kernel/microcode_amd.c12
-rw-r--r--arch/x86/kernel/microcode_core.c19
-rw-r--r--arch/x86/kernel/microcode_intel.c14
-rw-r--r--arch/x86/kernel/nmi.c95
-rw-r--r--arch/x86/kernel/nmi_selftest.c13
-rw-r--r--arch/x86/kernel/paravirt.c12
-rw-r--r--arch/x86/kernel/pci-calgary_64.c8
-rw-r--r--arch/x86/kernel/process.c73
-rw-r--r--arch/x86/kernel/process_32.c11
-rw-r--r--arch/x86/kernel/process_64.c20
-rw-r--r--arch/x86/kernel/ptrace.c7
-rw-r--r--arch/x86/kernel/setup.c10
-rw-r--r--arch/x86/kernel/setup_percpu.c14
-rw-r--r--arch/x86/kernel/smpboot.c191
-rw-r--r--arch/x86/kernel/test_rodata.c10
-rw-r--r--arch/x86/kernel/traps.c8
-rw-r--r--arch/x86/kernel/vsyscall_64.c6
-rw-r--r--arch/x86/kernel/x86_init.c9
-rw-r--r--arch/x86/kernel/xsave.c2
-rw-r--r--arch/x86/kvm/pmu.c18
-rw-r--r--arch/x86/kvm/vmx.c5
-rw-r--r--arch/x86/kvm/x86.c9
-rw-r--r--arch/x86/lib/checksum_32.S9
-rw-r--r--arch/x86/lib/copy_user_64.S63
-rw-r--r--arch/x86/lib/copy_user_nocache_64.S50
-rw-r--r--arch/x86/lib/csum-copy_64.S16
-rw-r--r--arch/x86/lib/getuser.S9
-rw-r--r--arch/x86/lib/insn.c53
-rw-r--r--arch/x86/lib/putuser.S12
-rw-r--r--arch/x86/lib/usercopy.c99
-rw-r--r--arch/x86/lib/usercopy_32.c319
-rw-r--r--arch/x86/lib/usercopy_64.c49
-rw-r--r--arch/x86/mm/extable.c142
-rw-r--r--arch/x86/mm/init.c21
-rw-r--r--arch/x86/mm/init_64.c23
-rw-r--r--arch/x86/mm/numa_emulation.c8
-rw-r--r--arch/x86/mm/tlb.c16
-rw-r--r--arch/x86/pci/acpi.c128
-rw-r--r--arch/x86/pci/amd_bus.c91
-rw-r--r--arch/x86/pci/broadcom_bus.c12
-rw-r--r--arch/x86/pci/bus_numa.c69
-rw-r--r--arch/x86/pci/bus_numa.h18
-rw-r--r--arch/x86/pci/common.c43
-rw-r--r--arch/x86/pci/fixup.c17
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/platform/geode/net5501.c2
-rw-r--r--arch/x86/platform/mrst/mrst.c4
-rw-r--r--arch/x86/platform/visws/visws_quirks.c2
-rw-r--r--arch/x86/tools/.gitignore1
-rw-r--r--arch/x86/tools/Makefile4
-rw-r--r--arch/x86/tools/relocs.c (renamed from arch/x86/boot/compressed/relocs.c)244
-rw-r--r--arch/x86/um/asm/barrier.h75
-rw-r--r--arch/x86/um/asm/elf.h42
-rw-r--r--arch/x86/um/asm/ptrace.h34
-rw-r--r--arch/x86/um/asm/ptrace_32.h23
-rw-r--r--arch/x86/um/asm/ptrace_64.h26
-rw-r--r--arch/x86/um/asm/system.h135
-rw-r--r--arch/x86/um/checksum_32.S9
-rw-r--r--arch/x86/um/shared/sysdep/ptrace.h67
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_32.h92
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_64.h101
-rw-r--r--arch/x86/um/signal.c29
-rw-r--r--arch/x86/um/sys_call_table_64.c1
-rw-r--r--arch/x86/um/syscalls_32.c12
-rw-r--r--arch/x86/um/sysrq_32.c8
-rw-r--r--arch/x86/um/sysrq_64.c8
-rw-r--r--arch/x86/um/tls_32.c2
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/apic.c33
-rw-r--r--arch/x86/xen/enlighten.c48
-rw-r--r--arch/x86/xen/mmu.c11
-rw-r--r--arch/x86/xen/smp.c34
-rw-r--r--arch/x86/xen/xen-asm.S2
-rw-r--r--arch/x86/xen/xen-asm_32.S6
-rw-r--r--arch/x86/xen/xen-ops.h4
184 files changed, 3453 insertions, 2567 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d2599a0ea208..4d37072c498a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -41,7 +41,6 @@ config X86
41 select HAVE_FUNCTION_GRAPH_TRACER 41 select HAVE_FUNCTION_GRAPH_TRACER
42 select HAVE_FUNCTION_GRAPH_FP_TEST 42 select HAVE_FUNCTION_GRAPH_FP_TEST
43 select HAVE_FUNCTION_TRACE_MCOUNT_TEST 43 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
44 select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
45 select HAVE_SYSCALL_TRACEPOINTS 44 select HAVE_SYSCALL_TRACEPOINTS
46 select HAVE_KVM 45 select HAVE_KVM
47 select HAVE_ARCH_KGDB 46 select HAVE_ARCH_KGDB
@@ -78,11 +77,14 @@ config X86
78 select GENERIC_CLOCKEVENTS_MIN_ADJUST 77 select GENERIC_CLOCKEVENTS_MIN_ADJUST
79 select IRQ_FORCED_THREADING 78 select IRQ_FORCED_THREADING
80 select USE_GENERIC_SMP_HELPERS if SMP 79 select USE_GENERIC_SMP_HELPERS if SMP
81 select HAVE_BPF_JIT if (X86_64 && NET) 80 select HAVE_BPF_JIT if X86_64
82 select CLKEVT_I8253 81 select CLKEVT_I8253
83 select ARCH_HAVE_NMI_SAFE_CMPXCHG 82 select ARCH_HAVE_NMI_SAFE_CMPXCHG
84 select GENERIC_IOMAP 83 select GENERIC_IOMAP
85 select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC 84 select DCACHE_WORD_ACCESS
85 select GENERIC_SMP_IDLE_THREAD
86 select HAVE_ARCH_SECCOMP_FILTER
87 select BUILDTIME_EXTABLE_SORT
86 88
87config INSTRUCTION_DECODER 89config INSTRUCTION_DECODER
88 def_bool (KPROBES || PERF_EVENTS) 90 def_bool (KPROBES || PERF_EVENTS)
@@ -161,9 +163,6 @@ config RWSEM_GENERIC_SPINLOCK
161config RWSEM_XCHGADD_ALGORITHM 163config RWSEM_XCHGADD_ALGORITHM
162 def_bool X86_XADD 164 def_bool X86_XADD
163 165
164config ARCH_HAS_CPU_IDLE_WAIT
165 def_bool y
166
167config GENERIC_CALIBRATE_DELAY 166config GENERIC_CALIBRATE_DELAY
168 def_bool y 167 def_bool y
169 168
@@ -1258,10 +1257,6 @@ config NODES_SHIFT
1258 Specify the maximum number of NUMA Nodes available on the target 1257 Specify the maximum number of NUMA Nodes available on the target
1259 system. Increases memory reserved to accommodate various tables. 1258 system. Increases memory reserved to accommodate various tables.
1260 1259
1261config HAVE_ARCH_BOOTMEM
1262 def_bool y
1263 depends on X86_32 && NUMA
1264
1265config HAVE_ARCH_ALLOC_REMAP 1260config HAVE_ARCH_ALLOC_REMAP
1266 def_bool y 1261 def_bool y
1267 depends on X86_32 && NUMA 1262 depends on X86_32 && NUMA
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 41a7237606a3..dc611a40a336 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -134,6 +134,9 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
134KBUILD_CFLAGS += $(mflags-y) 134KBUILD_CFLAGS += $(mflags-y)
135KBUILD_AFLAGS += $(mflags-y) 135KBUILD_AFLAGS += $(mflags-y)
136 136
137archscripts:
138 $(Q)$(MAKE) $(build)=arch/x86/tools relocs
139
137### 140###
138# Syscall table generation 141# Syscall table generation
139 142
@@ -146,7 +149,6 @@ archheaders:
146head-y := arch/x86/kernel/head_$(BITS).o 149head-y := arch/x86/kernel/head_$(BITS).o
147head-y += arch/x86/kernel/head$(BITS).o 150head-y += arch/x86/kernel/head$(BITS).o
148head-y += arch/x86/kernel/head.o 151head-y += arch/x86/kernel/head.o
149head-y += arch/x86/kernel/init_task.o
150 152
151libs-y += arch/x86/lib/ 153libs-y += arch/x86/lib/
152 154
@@ -203,6 +205,7 @@ archclean:
203 $(Q)rm -rf $(objtree)/arch/i386 205 $(Q)rm -rf $(objtree)/arch/i386
204 $(Q)rm -rf $(objtree)/arch/x86_64 206 $(Q)rm -rf $(objtree)/arch/x86_64
205 $(Q)$(MAKE) $(clean)=$(boot) 207 $(Q)$(MAKE) $(clean)=$(boot)
208 $(Q)$(MAKE) $(clean)=arch/x86/tools
206 209
207define archhelp 210define archhelp
208 echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' 211 echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um
index 4be406abeefd..36b62bc52638 100644
--- a/arch/x86/Makefile.um
+++ b/arch/x86/Makefile.um
@@ -14,6 +14,9 @@ LINK-y += $(call cc-option,-m32)
14 14
15export LDFLAGS 15export LDFLAGS
16 16
17LDS_EXTRA := -Ui386
18export LDS_EXTRA
19
17# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. 20# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y.
18include $(srctree)/arch/x86/Makefile_32.cpu 21include $(srctree)/arch/x86/Makefile_32.cpu
19 22
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index fd55a2ff3ad8..e398bb5d63bb 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -40,13 +40,12 @@ OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
40$(obj)/vmlinux.bin: vmlinux FORCE 40$(obj)/vmlinux.bin: vmlinux FORCE
41 $(call if_changed,objcopy) 41 $(call if_changed,objcopy)
42 42
43targets += vmlinux.bin.all vmlinux.relocs
43 44
44targets += vmlinux.bin.all vmlinux.relocs relocs 45CMD_RELOCS = arch/x86/tools/relocs
45hostprogs-$(CONFIG_X86_NEED_RELOCS) += relocs
46
47quiet_cmd_relocs = RELOCS $@ 46quiet_cmd_relocs = RELOCS $@
48 cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $< 47 cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $<
49$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE 48$(obj)/vmlinux.relocs: vmlinux FORCE
50 $(call if_changed,relocs) 49 $(call if_changed,relocs)
51 50
52vmlinux.bin.all-y := $(obj)/vmlinux.bin 51vmlinux.bin.all-y := $(obj)/vmlinux.bin
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 0cdfc0d2315e..2c14e76bb4c7 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -904,11 +904,19 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
904 904
905 memset(boot_params, 0x0, 0x4000); 905 memset(boot_params, 0x0, 0x4000);
906 906
907 /* Copy first two sectors to boot_params */
908 memcpy(boot_params, image->image_base, 1024);
909
910 hdr = &boot_params->hdr; 907 hdr = &boot_params->hdr;
911 908
909 /* Copy the second sector to boot_params */
910 memcpy(&hdr->jump, image->image_base + 512, 512);
911
912 /*
913 * Fill out some of the header fields ourselves because the
914 * EFI firmware loader doesn't load the first sector.
915 */
916 hdr->root_flags = 1;
917 hdr->vid_mode = 0xffff;
918 hdr->boot_flag = 0xAA55;
919
912 /* 920 /*
913 * The EFI firmware loader could have placed the kernel image 921 * The EFI firmware loader could have placed the kernel image
914 * anywhere in memory, but the kernel has various restrictions 922 * anywhere in memory, but the kernel has various restrictions
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index a0559930a180..c85e3ac99bba 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -33,6 +33,9 @@
33 __HEAD 33 __HEAD
34ENTRY(startup_32) 34ENTRY(startup_32)
35#ifdef CONFIG_EFI_STUB 35#ifdef CONFIG_EFI_STUB
36 jmp preferred_addr
37
38 .balign 0x10
36 /* 39 /*
37 * We don't need the return address, so set up the stack so 40 * We don't need the return address, so set up the stack so
38 * efi_main() can find its arugments. 41 * efi_main() can find its arugments.
@@ -41,12 +44,17 @@ ENTRY(startup_32)
41 44
42 call efi_main 45 call efi_main
43 cmpl $0, %eax 46 cmpl $0, %eax
44 je preferred_addr
45 movl %eax, %esi 47 movl %eax, %esi
46 call 1f 48 jne 2f
471: 491:
50 /* EFI init failed, so hang. */
51 hlt
52 jmp 1b
532:
54 call 3f
553:
48 popl %eax 56 popl %eax
49 subl $1b, %eax 57 subl $3b, %eax
50 subl BP_pref_address(%esi), %eax 58 subl BP_pref_address(%esi), %eax
51 add BP_code32_start(%esi), %eax 59 add BP_code32_start(%esi), %eax
52 leal preferred_addr(%eax), %eax 60 leal preferred_addr(%eax), %eax
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 558d76ce23bc..87e03a13d8e3 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -200,18 +200,28 @@ ENTRY(startup_64)
200 * entire text+data+bss and hopefully all of memory. 200 * entire text+data+bss and hopefully all of memory.
201 */ 201 */
202#ifdef CONFIG_EFI_STUB 202#ifdef CONFIG_EFI_STUB
203 pushq %rsi 203 /*
204 * The entry point for the PE/COFF executable is 0x210, so only
205 * legacy boot loaders will execute this jmp.
206 */
207 jmp preferred_addr
208
209 .org 0x210
204 mov %rcx, %rdi 210 mov %rcx, %rdi
205 mov %rdx, %rsi 211 mov %rdx, %rsi
206 call efi_main 212 call efi_main
207 popq %rsi
208 cmpq $0,%rax
209 je preferred_addr
210 movq %rax,%rsi 213 movq %rax,%rsi
211 call 1f 214 cmpq $0,%rax
215 jne 2f
2121: 2161:
217 /* EFI init failed, so hang. */
218 hlt
219 jmp 1b
2202:
221 call 3f
2223:
213 popq %rax 223 popq %rax
214 subq $1b, %rax 224 subq $3b, %rax
215 subq BP_pref_address(%rsi), %rax 225 subq BP_pref_address(%rsi), %rax
216 add BP_code32_start(%esi), %eax 226 add BP_code32_start(%esi), %eax
217 leaq preferred_addr(%rax), %rax 227 leaq preferred_addr(%rax), %rax
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index f1bbeeb09148..8bbea6aa40d9 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -147,7 +147,7 @@ optional_header:
147 # Filled in by build.c 147 # Filled in by build.c
148 .long 0x0000 # AddressOfEntryPoint 148 .long 0x0000 # AddressOfEntryPoint
149 149
150 .long 0x0000 # BaseOfCode 150 .long 0x0200 # BaseOfCode
151#ifdef CONFIG_X86_32 151#ifdef CONFIG_X86_32
152 .long 0 # data 152 .long 0 # data
153#endif 153#endif
@@ -189,7 +189,7 @@ extra_header_fields:
189 .quad 0 # SizeOfHeapCommit 189 .quad 0 # SizeOfHeapCommit
190#endif 190#endif
191 .long 0 # LoaderFlags 191 .long 0 # LoaderFlags
192 .long 0x1 # NumberOfRvaAndSizes 192 .long 0x6 # NumberOfRvaAndSizes
193 193
194 .quad 0 # ExportTable 194 .quad 0 # ExportTable
195 .quad 0 # ImportTable 195 .quad 0 # ImportTable
@@ -217,18 +217,17 @@ section_table:
217 217
218 # 218 #
219 # The EFI application loader requires a relocation section 219 # The EFI application loader requires a relocation section
220 # because EFI applications are relocatable and not having 220 # because EFI applications must be relocatable. But since
221 # this section seems to confuse it. But since we don't need 221 # we don't need the loader to fixup any relocs for us, we
222 # the loader to fixup any relocs for us just fill it with a 222 # just create an empty (zero-length) .reloc section header.
223 # single dummy reloc.
224 # 223 #
225 .ascii ".reloc" 224 .ascii ".reloc"
226 .byte 0 225 .byte 0
227 .byte 0 226 .byte 0
228 .long reloc_end - reloc_start 227 .long 0
229 .long reloc_start 228 .long 0
230 .long reloc_end - reloc_start # SizeOfRawData 229 .long 0 # SizeOfRawData
231 .long reloc_start # PointerToRawData 230 .long 0 # PointerToRawData
232 .long 0 # PointerToRelocations 231 .long 0 # PointerToRelocations
233 .long 0 # PointerToLineNumbers 232 .long 0 # PointerToLineNumbers
234 .word 0 # NumberOfRelocations 233 .word 0 # NumberOfRelocations
@@ -469,10 +468,3 @@ setup_corrupt:
469 468
470 .data 469 .data
471dummy: .long 0 470dummy: .long 0
472
473 .section .reloc
474reloc_start:
475 .long dummy - reloc_start
476 .long 10
477 .word 0
478reloc_end:
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index ed549767a231..3f61f6e2b46f 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -198,35 +198,60 @@ int main(int argc, char ** argv)
198 198
199 pe_header = get_unaligned_le32(&buf[0x3c]); 199 pe_header = get_unaligned_le32(&buf[0x3c]);
200 200
201 /* Size of code */
202 put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]);
203
204 /* Size of image */ 201 /* Size of image */
205 put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); 202 put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
206 203
204 /*
205 * Subtract the size of the first section (512 bytes) which
206 * includes the header and .reloc section. The remaining size
207 * is that of the .text section.
208 */
209 file_sz -= 512;
210
211 /* Size of code */
212 put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]);
213
207#ifdef CONFIG_X86_32 214#ifdef CONFIG_X86_32
208 /* Address of entry point */ 215 /*
209 put_unaligned_le32(i, &buf[pe_header + 0x28]); 216 * Address of entry point.
217 *
218 * The EFI stub entry point is +16 bytes from the start of
219 * the .text section.
220 */
221 put_unaligned_le32(i + 16, &buf[pe_header + 0x28]);
210 222
211 /* .text size */ 223 /* .text size */
212 put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]); 224 put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]);
213 225
226 /* .text vma */
227 put_unaligned_le32(0x200, &buf[pe_header + 0xb4]);
228
214 /* .text size of initialised data */ 229 /* .text size of initialised data */
215 put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]); 230 put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]);
231
232 /* .text file offset */
233 put_unaligned_le32(0x200, &buf[pe_header + 0xbc]);
216#else 234#else
217 /* 235 /*
218 * Address of entry point. startup_32 is at the beginning and 236 * Address of entry point. startup_32 is at the beginning and
219 * the 64-bit entry point (startup_64) is always 512 bytes 237 * the 64-bit entry point (startup_64) is always 512 bytes
220 * after. 238 * after. The EFI stub entry point is 16 bytes after that, as
239 * the first instruction allows legacy loaders to jump over
240 * the EFI stub initialisation
221 */ 241 */
222 put_unaligned_le32(i + 512, &buf[pe_header + 0x28]); 242 put_unaligned_le32(i + 528, &buf[pe_header + 0x28]);
223 243
224 /* .text size */ 244 /* .text size */
225 put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]); 245 put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]);
226 246
247 /* .text vma */
248 put_unaligned_le32(0x200, &buf[pe_header + 0xc4]);
249
227 /* .text size of initialised data */ 250 /* .text size of initialised data */
228 put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]); 251 put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]);
229 252
253 /* .text file offset */
254 put_unaligned_le32(0x200, &buf[pe_header + 0xcc]);
230#endif /* CONFIG_X86_32 */ 255#endif /* CONFIG_X86_32 */
231#endif /* CONFIG_EFI_STUB */ 256#endif /* CONFIG_EFI_STUB */
232 257
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index d511d951a052..07b3a68d2d29 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -119,9 +119,7 @@ static void set_brk(unsigned long start, unsigned long end)
119 end = PAGE_ALIGN(end); 119 end = PAGE_ALIGN(end);
120 if (end <= start) 120 if (end <= start)
121 return; 121 return;
122 down_write(&current->mm->mmap_sem); 122 vm_brk(start, end - start);
123 do_brk(start, end - start);
124 up_write(&current->mm->mmap_sem);
125} 123}
126 124
127#ifdef CORE_DUMP 125#ifdef CORE_DUMP
@@ -296,8 +294,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
296 294
297 /* OK, This is the point of no return */ 295 /* OK, This is the point of no return */
298 set_personality(PER_LINUX); 296 set_personality(PER_LINUX);
299 set_thread_flag(TIF_IA32); 297 set_personality_ia32(false);
300 current->mm->context.ia32_compat = 1;
301 298
302 setup_new_exec(bprm); 299 setup_new_exec(bprm);
303 300
@@ -332,9 +329,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
332 pos = 32; 329 pos = 32;
333 map_size = ex.a_text+ex.a_data; 330 map_size = ex.a_text+ex.a_data;
334 331
335 down_write(&current->mm->mmap_sem); 332 error = vm_brk(text_addr & PAGE_MASK, map_size);
336 error = do_brk(text_addr & PAGE_MASK, map_size);
337 up_write(&current->mm->mmap_sem);
338 333
339 if (error != (text_addr & PAGE_MASK)) { 334 if (error != (text_addr & PAGE_MASK)) {
340 send_sig(SIGKILL, current, 0); 335 send_sig(SIGKILL, current, 0);
@@ -373,9 +368,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
373 if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) { 368 if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) {
374 loff_t pos = fd_offset; 369 loff_t pos = fd_offset;
375 370
376 down_write(&current->mm->mmap_sem); 371 vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
377 do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
378 up_write(&current->mm->mmap_sem);
379 bprm->file->f_op->read(bprm->file, 372 bprm->file->f_op->read(bprm->file,
380 (char __user *)N_TXTADDR(ex), 373 (char __user *)N_TXTADDR(ex),
381 ex.a_text+ex.a_data, &pos); 374 ex.a_text+ex.a_data, &pos);
@@ -385,26 +378,22 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
385 goto beyond_if; 378 goto beyond_if;
386 } 379 }
387 380
388 down_write(&current->mm->mmap_sem); 381 error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
389 error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
390 PROT_READ | PROT_EXEC, 382 PROT_READ | PROT_EXEC,
391 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | 383 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
392 MAP_EXECUTABLE | MAP_32BIT, 384 MAP_EXECUTABLE | MAP_32BIT,
393 fd_offset); 385 fd_offset);
394 up_write(&current->mm->mmap_sem);
395 386
396 if (error != N_TXTADDR(ex)) { 387 if (error != N_TXTADDR(ex)) {
397 send_sig(SIGKILL, current, 0); 388 send_sig(SIGKILL, current, 0);
398 return error; 389 return error;
399 } 390 }
400 391
401 down_write(&current->mm->mmap_sem); 392 error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
402 error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
403 PROT_READ | PROT_WRITE | PROT_EXEC, 393 PROT_READ | PROT_WRITE | PROT_EXEC,
404 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | 394 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE |
405 MAP_EXECUTABLE | MAP_32BIT, 395 MAP_EXECUTABLE | MAP_32BIT,
406 fd_offset + ex.a_text); 396 fd_offset + ex.a_text);
407 up_write(&current->mm->mmap_sem);
408 if (error != N_DATADDR(ex)) { 397 if (error != N_DATADDR(ex)) {
409 send_sig(SIGKILL, current, 0); 398 send_sig(SIGKILL, current, 0);
410 return error; 399 return error;
@@ -476,9 +465,7 @@ static int load_aout_library(struct file *file)
476 error_time = jiffies; 465 error_time = jiffies;
477 } 466 }
478#endif 467#endif
479 down_write(&current->mm->mmap_sem); 468 vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
480 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
481 up_write(&current->mm->mmap_sem);
482 469
483 file->f_op->read(file, (char __user *)start_addr, 470 file->f_op->read(file, (char __user *)start_addr,
484 ex.a_text + ex.a_data, &pos); 471 ex.a_text + ex.a_data, &pos);
@@ -490,12 +477,10 @@ static int load_aout_library(struct file *file)
490 goto out; 477 goto out;
491 } 478 }
492 /* Now use mmap to map the library into memory. */ 479 /* Now use mmap to map the library into memory. */
493 down_write(&current->mm->mmap_sem); 480 error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
494 error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
495 PROT_READ | PROT_WRITE | PROT_EXEC, 481 PROT_READ | PROT_WRITE | PROT_EXEC,
496 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT, 482 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT,
497 N_TXTOFF(ex)); 483 N_TXTOFF(ex));
498 up_write(&current->mm->mmap_sem);
499 retval = error; 484 retval = error;
500 if (error != start_addr) 485 if (error != start_addr)
501 goto out; 486 goto out;
@@ -503,9 +488,7 @@ static int load_aout_library(struct file *file)
503 len = PAGE_ALIGN(ex.a_text + ex.a_data); 488 len = PAGE_ALIGN(ex.a_text + ex.a_data);
504 bss = ex.a_text + ex.a_data + ex.a_bss; 489 bss = ex.a_text + ex.a_data + ex.a_bss;
505 if (bss > len) { 490 if (bss > len) {
506 down_write(&current->mm->mmap_sem); 491 error = vm_brk(start_addr + len, bss - len);
507 error = do_brk(start_addr + len, bss - len);
508 up_write(&current->mm->mmap_sem);
509 retval = error; 492 retval = error;
510 if (error != start_addr + len) 493 if (error != start_addr + len)
511 goto out; 494 goto out;
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index a69245ba27e3..0b3f2354f6aa 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -67,6 +67,10 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
67 switch (from->si_code >> 16) { 67 switch (from->si_code >> 16) {
68 case __SI_FAULT >> 16: 68 case __SI_FAULT >> 16:
69 break; 69 break;
70 case __SI_SYS >> 16:
71 put_user_ex(from->si_syscall, &to->si_syscall);
72 put_user_ex(from->si_arch, &to->si_arch);
73 break;
70 case __SI_CHLD >> 16: 74 case __SI_CHLD >> 16:
71 if (ia32) { 75 if (ia32) {
72 put_user_ex(from->si_utime, &to->si_utime); 76 put_user_ex(from->si_utime, &to->si_utime);
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index e3e734005e19..20e5f7ba0e6b 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -13,6 +13,7 @@
13#include <asm/thread_info.h> 13#include <asm/thread_info.h>
14#include <asm/segment.h> 14#include <asm/segment.h>
15#include <asm/irqflags.h> 15#include <asm/irqflags.h>
16#include <asm/asm.h>
16#include <linux/linkage.h> 17#include <linux/linkage.h>
17#include <linux/err.h> 18#include <linux/err.h>
18 19
@@ -146,9 +147,7 @@ ENTRY(ia32_sysenter_target)
146 /* no need to do an access_ok check here because rbp has been 147 /* no need to do an access_ok check here because rbp has been
147 32bit zero extended */ 148 32bit zero extended */
1481: movl (%rbp),%ebp 1491: movl (%rbp),%ebp
149 .section __ex_table,"a" 150 _ASM_EXTABLE(1b,ia32_badarg)
150 .quad 1b,ia32_badarg
151 .previous
152 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) 151 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
153 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 152 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
154 CFI_REMEMBER_STATE 153 CFI_REMEMBER_STATE
@@ -303,9 +302,7 @@ ENTRY(ia32_cstar_target)
303 32bit zero extended */ 302 32bit zero extended */
304 /* hardware stack frame is complete now */ 303 /* hardware stack frame is complete now */
3051: movl (%r8),%r9d 3041: movl (%r8),%r9d
306 .section __ex_table,"a" 305 _ASM_EXTABLE(1b,ia32_badarg)
307 .quad 1b,ia32_badarg
308 .previous
309 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) 306 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
310 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) 307 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
311 CFI_REMEMBER_STATE 308 CFI_REMEMBER_STATE
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index aec2202a596c..edca9c0a79cc 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -287,11 +287,6 @@ asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
287 return ret; 287 return ret;
288} 288}
289 289
290asmlinkage long sys32_alarm(unsigned int seconds)
291{
292 return alarm_setitimer(seconds);
293}
294
295asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, 290asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
296 int options) 291 int options)
297{ 292{
@@ -300,11 +295,6 @@ asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
300 295
301/* 32-bit timeval and related flotsam. */ 296/* 32-bit timeval and related flotsam. */
302 297
303asmlinkage long sys32_sysfs(int option, u32 arg1, u32 arg2)
304{
305 return sys_sysfs(option, arg1, arg2);
306}
307
308asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, 298asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid,
309 struct compat_timespec __user *interval) 299 struct compat_timespec __user *interval)
310{ 300{
@@ -375,19 +365,6 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
375} 365}
376 366
377 367
378asmlinkage long sys32_personality(unsigned long personality)
379{
380 int ret;
381
382 if (personality(current->personality) == PER_LINUX32 &&
383 personality == PER_LINUX)
384 personality = PER_LINUX32;
385 ret = sys_personality(personality);
386 if (ret == PER_LINUX32)
387 ret = PER_LINUX;
388 return ret;
389}
390
391asmlinkage long sys32_sendfile(int out_fd, int in_fd, 368asmlinkage long sys32_sendfile(int out_fd, int in_fd,
392 compat_off_t __user *offset, s32 count) 369 compat_off_t __user *offset, s32 count)
393{ 370{
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index d85410171260..eaff4790ed96 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -138,6 +138,11 @@ static inline void native_apic_msr_write(u32 reg, u32 v)
138 wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0); 138 wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
139} 139}
140 140
141static inline void native_apic_msr_eoi_write(u32 reg, u32 v)
142{
143 wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0);
144}
145
141static inline u32 native_apic_msr_read(u32 reg) 146static inline u32 native_apic_msr_read(u32 reg)
142{ 147{
143 u64 msr; 148 u64 msr;
@@ -351,6 +356,14 @@ struct apic {
351 /* apic ops */ 356 /* apic ops */
352 u32 (*read)(u32 reg); 357 u32 (*read)(u32 reg);
353 void (*write)(u32 reg, u32 v); 358 void (*write)(u32 reg, u32 v);
359 /*
360 * ->eoi_write() has the same signature as ->write().
361 *
362 * Drivers can support both ->eoi_write() and ->write() by passing the same
363 * callback value. Kernel can override ->eoi_write() and fall back
364 * on write for EOI.
365 */
366 void (*eoi_write)(u32 reg, u32 v);
354 u64 (*icr_read)(void); 367 u64 (*icr_read)(void);
355 void (*icr_write)(u32 low, u32 high); 368 void (*icr_write)(u32 low, u32 high);
356 void (*wait_icr_idle)(void); 369 void (*wait_icr_idle)(void);
@@ -426,6 +439,11 @@ static inline void apic_write(u32 reg, u32 val)
426 apic->write(reg, val); 439 apic->write(reg, val);
427} 440}
428 441
442static inline void apic_eoi(void)
443{
444 apic->eoi_write(APIC_EOI, APIC_EOI_ACK);
445}
446
429static inline u64 apic_icr_read(void) 447static inline u64 apic_icr_read(void)
430{ 448{
431 return apic->icr_read(); 449 return apic->icr_read();
@@ -450,6 +468,7 @@ static inline u32 safe_apic_wait_icr_idle(void)
450 468
451static inline u32 apic_read(u32 reg) { return 0; } 469static inline u32 apic_read(u32 reg) { return 0; }
452static inline void apic_write(u32 reg, u32 val) { } 470static inline void apic_write(u32 reg, u32 val) { }
471static inline void apic_eoi(void) { }
453static inline u64 apic_icr_read(void) { return 0; } 472static inline u64 apic_icr_read(void) { return 0; }
454static inline void apic_icr_write(u32 low, u32 high) { } 473static inline void apic_icr_write(u32 low, u32 high) { }
455static inline void apic_wait_icr_idle(void) { } 474static inline void apic_wait_icr_idle(void) { }
@@ -463,9 +482,7 @@ static inline void ack_APIC_irq(void)
463 * ack_APIC_irq() actually gets compiled as a single instruction 482 * ack_APIC_irq() actually gets compiled as a single instruction
464 * ... yummie. 483 * ... yummie.
465 */ 484 */
466 485 apic_eoi();
467 /* Docs say use 0 for future compatibility */
468 apic_write(APIC_EOI, 0);
469} 486}
470 487
471static inline unsigned default_get_apic_id(unsigned long x) 488static inline unsigned default_get_apic_id(unsigned long x)
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 134bba00df09..c46bb99d5fb2 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -37,7 +37,7 @@
37#define APIC_ARBPRI_MASK 0xFFu 37#define APIC_ARBPRI_MASK 0xFFu
38#define APIC_PROCPRI 0xA0 38#define APIC_PROCPRI 0xA0
39#define APIC_EOI 0xB0 39#define APIC_EOI 0xB0
40#define APIC_EIO_ACK 0x0 40#define APIC_EOI_ACK 0x0 /* Docs say 0 for future compat. */
41#define APIC_RRR 0xC0 41#define APIC_RRR 0xC0
42#define APIC_LDR 0xD0 42#define APIC_LDR 0xD0
43#define APIC_LDR_MASK (0xFFu << 24) 43#define APIC_LDR_MASK (0xFFu << 24)
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 9412d6558c88..1c2d247f65ce 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -4,11 +4,9 @@
4#ifdef __ASSEMBLY__ 4#ifdef __ASSEMBLY__
5# define __ASM_FORM(x) x 5# define __ASM_FORM(x) x
6# define __ASM_FORM_COMMA(x) x, 6# define __ASM_FORM_COMMA(x) x,
7# define __ASM_EX_SEC .section __ex_table, "a"
8#else 7#else
9# define __ASM_FORM(x) " " #x " " 8# define __ASM_FORM(x) " " #x " "
10# define __ASM_FORM_COMMA(x) " " #x "," 9# define __ASM_FORM_COMMA(x) " " #x ","
11# define __ASM_EX_SEC " .section __ex_table,\"a\"\n"
12#endif 10#endif
13 11
14#ifdef CONFIG_X86_32 12#ifdef CONFIG_X86_32
@@ -42,17 +40,33 @@
42 40
43/* Exception table entry */ 41/* Exception table entry */
44#ifdef __ASSEMBLY__ 42#ifdef __ASSEMBLY__
45# define _ASM_EXTABLE(from,to) \ 43# define _ASM_EXTABLE(from,to) \
46 __ASM_EX_SEC ; \ 44 .pushsection "__ex_table","a" ; \
47 _ASM_ALIGN ; \ 45 .balign 8 ; \
48 _ASM_PTR from , to ; \ 46 .long (from) - . ; \
49 .previous 47 .long (to) - . ; \
48 .popsection
49
50# define _ASM_EXTABLE_EX(from,to) \
51 .pushsection "__ex_table","a" ; \
52 .balign 8 ; \
53 .long (from) - . ; \
54 .long (to) - . + 0x7ffffff0 ; \
55 .popsection
50#else 56#else
51# define _ASM_EXTABLE(from,to) \ 57# define _ASM_EXTABLE(from,to) \
52 __ASM_EX_SEC \ 58 " .pushsection \"__ex_table\",\"a\"\n" \
53 _ASM_ALIGN "\n" \ 59 " .balign 8\n" \
54 _ASM_PTR #from "," #to "\n" \ 60 " .long (" #from ") - .\n" \
55 " .previous\n" 61 " .long (" #to ") - .\n" \
62 " .popsection\n"
63
64# define _ASM_EXTABLE_EX(from,to) \
65 " .pushsection \"__ex_table\",\"a\"\n" \
66 " .balign 8\n" \
67 " .long (" #from ") - .\n" \
68 " .long (" #to ") - . + 0x7ffffff0\n" \
69 " .popsection\n"
56#endif 70#endif
57 71
58#endif /* _ASM_X86_ASM_H */ 72#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 198119910da5..b154de75c90c 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -63,7 +63,7 @@ ATOMIC64_DECL(add_unless);
63 63
64/** 64/**
65 * atomic64_cmpxchg - cmpxchg atomic64 variable 65 * atomic64_cmpxchg - cmpxchg atomic64 variable
66 * @p: pointer to type atomic64_t 66 * @v: pointer to type atomic64_t
67 * @o: expected value 67 * @o: expected value
68 * @n: new value 68 * @n: new value
69 * 69 *
@@ -98,7 +98,7 @@ static inline long long atomic64_xchg(atomic64_t *v, long long n)
98/** 98/**
99 * atomic64_set - set atomic64 variable 99 * atomic64_set - set atomic64 variable
100 * @v: pointer to type atomic64_t 100 * @v: pointer to type atomic64_t
101 * @n: value to assign 101 * @i: value to assign
102 * 102 *
103 * Atomically sets the value of @v to @n. 103 * Atomically sets the value of @v to @n.
104 */ 104 */
@@ -200,7 +200,7 @@ static inline long long atomic64_sub(long long i, atomic64_t *v)
200 * atomic64_sub_and_test - subtract value from variable and test result 200 * atomic64_sub_and_test - subtract value from variable and test result
201 * @i: integer value to subtract 201 * @i: integer value to subtract
202 * @v: pointer to type atomic64_t 202 * @v: pointer to type atomic64_t
203 * 203 *
204 * Atomically subtracts @i from @v and returns 204 * Atomically subtracts @i from @v and returns
205 * true if the result is zero, or false for all 205 * true if the result is zero, or false for all
206 * other cases. 206 * other cases.
@@ -224,9 +224,9 @@ static inline void atomic64_inc(atomic64_t *v)
224 224
225/** 225/**
226 * atomic64_dec - decrement atomic64 variable 226 * atomic64_dec - decrement atomic64 variable
227 * @ptr: pointer to type atomic64_t 227 * @v: pointer to type atomic64_t
228 * 228 *
229 * Atomically decrements @ptr by 1. 229 * Atomically decrements @v by 1.
230 */ 230 */
231static inline void atomic64_dec(atomic64_t *v) 231static inline void atomic64_dec(atomic64_t *v)
232{ 232{
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 5e1a2eef3e7c..b13fe63bdc59 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -19,7 +19,7 @@
19#ifdef CONFIG_X86_64 19#ifdef CONFIG_X86_64
20#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT 20#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT
21#else 21#else
22#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_ORDER) 22#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_SIZE_ORDER)
23#endif 23#endif
24#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) 24#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)
25 25
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index b3b733262909..99480e55973d 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -43,7 +43,7 @@ extern void __add_wrong_size(void)
43 switch (sizeof(*(ptr))) { \ 43 switch (sizeof(*(ptr))) { \
44 case __X86_CASE_B: \ 44 case __X86_CASE_B: \
45 asm volatile (lock #op "b %b0, %1\n" \ 45 asm volatile (lock #op "b %b0, %1\n" \
46 : "+r" (__ret), "+m" (*(ptr)) \ 46 : "+q" (__ret), "+m" (*(ptr)) \
47 : : "memory", "cc"); \ 47 : : "memory", "cc"); \
48 break; \ 48 break; \
49 case __X86_CASE_W: \ 49 case __X86_CASE_W: \
@@ -173,7 +173,7 @@ extern void __add_wrong_size(void)
173 switch (sizeof(*(ptr))) { \ 173 switch (sizeof(*(ptr))) { \
174 case __X86_CASE_B: \ 174 case __X86_CASE_B: \
175 asm volatile (lock "addb %b1, %0\n" \ 175 asm volatile (lock "addb %b1, %0\n" \
176 : "+m" (*(ptr)) : "ri" (inc) \ 176 : "+m" (*(ptr)) : "qi" (inc) \
177 : "memory", "cc"); \ 177 : "memory", "cc"); \
178 break; \ 178 break; \
179 case __X86_CASE_W: \ 179 case __X86_CASE_W: \
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index d6805798d6fc..fedf32b73e65 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -229,7 +229,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
229 sp = task_pt_regs(current)->sp; 229 sp = task_pt_regs(current)->sp;
230 } else { 230 } else {
231 /* -128 for the x32 ABI redzone */ 231 /* -128 for the x32 ABI redzone */
232 sp = percpu_read(old_rsp) - 128; 232 sp = this_cpu_read(old_rsp) - 128;
233 } 233 }
234 234
235 return (void __user *)round_down(sp - len, 16); 235 return (void __user *)round_down(sp - len, 16);
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 4d447b732d82..9476c04ee635 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -11,7 +11,7 @@ DECLARE_PER_CPU(struct task_struct *, current_task);
11 11
12static __always_inline struct task_struct *get_current(void) 12static __always_inline struct task_struct *get_current(void)
13{ 13{
14 return percpu_read_stable(current_task); 14 return this_cpu_read_stable(current_task);
15} 15}
16 16
17#define current get_current() 17#define current get_current()
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index e95822d683f4..8bf1c06070d5 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -6,6 +6,7 @@
6#include <asm/mmu.h> 6#include <asm/mmu.h>
7 7
8#include <linux/smp.h> 8#include <linux/smp.h>
9#include <linux/percpu.h>
9 10
10static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info) 11static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info)
11{ 12{
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 4fa88154e4de..75f4c6d6a331 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -290,14 +290,14 @@ static inline int __thread_has_fpu(struct task_struct *tsk)
290static inline void __thread_clear_has_fpu(struct task_struct *tsk) 290static inline void __thread_clear_has_fpu(struct task_struct *tsk)
291{ 291{
292 tsk->thread.fpu.has_fpu = 0; 292 tsk->thread.fpu.has_fpu = 0;
293 percpu_write(fpu_owner_task, NULL); 293 this_cpu_write(fpu_owner_task, NULL);
294} 294}
295 295
296/* Must be paired with a 'clts' before! */ 296/* Must be paired with a 'clts' before! */
297static inline void __thread_set_has_fpu(struct task_struct *tsk) 297static inline void __thread_set_has_fpu(struct task_struct *tsk)
298{ 298{
299 tsk->thread.fpu.has_fpu = 1; 299 tsk->thread.fpu.has_fpu = 1;
300 percpu_write(fpu_owner_task, tsk); 300 this_cpu_write(fpu_owner_task, tsk);
301} 301}
302 302
303/* 303/*
@@ -344,7 +344,7 @@ typedef struct { int preload; } fpu_switch_t;
344 */ 344 */
345static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) 345static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
346{ 346{
347 return new == percpu_read_stable(fpu_owner_task) && 347 return new == this_cpu_read_stable(fpu_owner_task) &&
348 cpu == new->thread.fpu.last_cpu; 348 cpu == new->thread.fpu.last_cpu;
349} 349}
350 350
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 268c783ab1c0..18d9005d9e4f 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -34,6 +34,7 @@
34 34
35#ifndef __ASSEMBLY__ 35#ifndef __ASSEMBLY__
36extern void mcount(void); 36extern void mcount(void);
37extern int modifying_ftrace_code;
37 38
38static inline unsigned long ftrace_call_adjust(unsigned long addr) 39static inline unsigned long ftrace_call_adjust(unsigned long addr)
39{ 40{
@@ -50,6 +51,8 @@ struct dyn_arch_ftrace {
50 /* No extra data needed for x86 */ 51 /* No extra data needed for x86 */
51}; 52};
52 53
54int ftrace_int3_handler(struct pt_regs *regs);
55
53#endif /* CONFIG_DYNAMIC_FTRACE */ 56#endif /* CONFIG_DYNAMIC_FTRACE */
54#endif /* __ASSEMBLY__ */ 57#endif /* __ASSEMBLY__ */
55#endif /* CONFIG_FUNCTION_TRACER */ 58#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 382f75d735f3..d3895dbf4ddb 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -35,14 +35,15 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
35 35
36#define __ARCH_IRQ_STAT 36#define __ARCH_IRQ_STAT
37 37
38#define inc_irq_stat(member) percpu_inc(irq_stat.member) 38#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
39 39
40#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) 40#define local_softirq_pending() this_cpu_read(irq_stat.__softirq_pending)
41 41
42#define __ARCH_SET_SOFTIRQ_PENDING 42#define __ARCH_SET_SOFTIRQ_PENDING
43 43
44#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) 44#define set_softirq_pending(x) \
45#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) 45 this_cpu_write(irq_stat.__softirq_pending, (x))
46#define or_softirq_pending(x) this_cpu_or(irq_stat.__softirq_pending, (x))
46 47
47extern void ack_bad_irq(unsigned int irq); 48extern void ack_bad_irq(unsigned int irq);
48 49
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index ee52760549f0..b04cbdb138cd 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -144,6 +144,12 @@ typedef struct compat_siginfo {
144 int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ 144 int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
145 int _fd; 145 int _fd;
146 } _sigpoll; 146 } _sigpoll;
147
148 struct {
149 unsigned int _call_addr; /* calling insn */
150 int _syscall; /* triggering system call number */
151 unsigned int _arch; /* AUDIT_ARCH_* of syscall */
152 } _sigsys;
147 } _sifields; 153 } _sifields;
148} compat_siginfo_t; 154} compat_siginfo_t;
149 155
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 2c4943de5150..73d8c5398ea9 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -5,7 +5,7 @@
5#include <asm/mpspec.h> 5#include <asm/mpspec.h>
6#include <asm/apicdef.h> 6#include <asm/apicdef.h>
7#include <asm/irq_vectors.h> 7#include <asm/irq_vectors.h>
8 8#include <asm/x86_init.h>
9/* 9/*
10 * Intel IO-APIC support for SMP and UP systems. 10 * Intel IO-APIC support for SMP and UP systems.
11 * 11 *
@@ -21,15 +21,6 @@
21#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15) 21#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15)
22#define IO_APIC_REDIR_MASKED (1 << 16) 22#define IO_APIC_REDIR_MASKED (1 << 16)
23 23
24struct io_apic_ops {
25 void (*init) (void);
26 unsigned int (*read) (unsigned int apic, unsigned int reg);
27 void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
28 void (*modify)(unsigned int apic, unsigned int reg, unsigned int value);
29};
30
31void __init set_io_apic_ops(const struct io_apic_ops *);
32
33/* 24/*
34 * The structure of the IO-APIC: 25 * The structure of the IO-APIC:
35 */ 26 */
@@ -156,7 +147,6 @@ struct io_apic_irq_attr;
156extern int io_apic_set_pci_routing(struct device *dev, int irq, 147extern int io_apic_set_pci_routing(struct device *dev, int irq,
157 struct io_apic_irq_attr *irq_attr); 148 struct io_apic_irq_attr *irq_attr);
158void setup_IO_APIC_irq_extra(u32 gsi); 149void setup_IO_APIC_irq_extra(u32 gsi);
159extern void ioapic_and_gsi_init(void);
160extern void ioapic_insert_resources(void); 150extern void ioapic_insert_resources(void);
161 151
162int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); 152int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
@@ -185,12 +175,29 @@ extern void mp_save_irq(struct mpc_intsrc *m);
185 175
186extern void disable_ioapic_support(void); 176extern void disable_ioapic_support(void);
187 177
178extern void __init native_io_apic_init_mappings(void);
179extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg);
180extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val);
181extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
182
183static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
184{
185 return x86_io_apic_ops.read(apic, reg);
186}
187
188static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
189{
190 x86_io_apic_ops.write(apic, reg, value);
191}
192static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
193{
194 x86_io_apic_ops.modify(apic, reg, value);
195}
188#else /* !CONFIG_X86_IO_APIC */ 196#else /* !CONFIG_X86_IO_APIC */
189 197
190#define io_apic_assign_pci_irqs 0 198#define io_apic_assign_pci_irqs 0
191#define setup_ioapic_ids_from_mpc x86_init_noop 199#define setup_ioapic_ids_from_mpc x86_init_noop
192static const int timer_through_8259 = 0; 200static const int timer_through_8259 = 0;
193static inline void ioapic_and_gsi_init(void) { }
194static inline void ioapic_insert_resources(void) { } 201static inline void ioapic_insert_resources(void) { }
195#define gsi_top (NR_IRQS_LEGACY) 202#define gsi_top (NR_IRQS_LEGACY)
196static inline int mp_find_ioapic(u32 gsi) { return 0; } 203static inline int mp_find_ioapic(u32 gsi) { return 0; }
@@ -212,6 +219,10 @@ static inline int restore_ioapic_entries(void)
212 219
213static inline void mp_save_irq(struct mpc_intsrc *m) { }; 220static inline void mp_save_irq(struct mpc_intsrc *m) { };
214static inline void disable_ioapic_support(void) { } 221static inline void disable_ioapic_support(void) { }
222#define native_io_apic_init_mappings NULL
223#define native_io_apic_read NULL
224#define native_io_apic_write NULL
225#define native_io_apic_modify NULL
215#endif 226#endif
216 227
217#endif /* _ASM_X86_IO_APIC_H */ 228#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
index 77843225b7ea..d82250b1debb 100644
--- a/arch/x86/include/asm/irq_regs.h
+++ b/arch/x86/include/asm/irq_regs.h
@@ -15,7 +15,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs);
15 15
16static inline struct pt_regs *get_irq_regs(void) 16static inline struct pt_regs *get_irq_regs(void)
17{ 17{
18 return percpu_read(irq_regs); 18 return this_cpu_read(irq_regs);
19} 19}
20 20
21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) 21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
@@ -23,7 +23,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
23 struct pt_regs *old_regs; 23 struct pt_regs *old_regs;
24 24
25 old_regs = get_irq_regs(); 25 old_regs = get_irq_regs();
26 percpu_write(irq_regs, new_regs); 26 this_cpu_write(irq_regs, new_regs);
27 27
28 return old_regs; 28 return old_regs;
29} 29}
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 47d99934580f..5fb9bbbd2f14 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -1,45 +1,101 @@
1#ifndef _ASM_X86_IRQ_REMAPPING_H 1/*
2#define _ASM_X86_IRQ_REMAPPING_H 2 * Copyright (C) 2012 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * This header file contains the interface of the interrupt remapping code to
19 * the x86 interrupt management code.
20 */
3 21
4#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8) 22#ifndef __X86_IRQ_REMAPPING_H
23#define __X86_IRQ_REMAPPING_H
24
25#include <asm/io_apic.h>
5 26
6#ifdef CONFIG_IRQ_REMAP 27#ifdef CONFIG_IRQ_REMAP
7static void irq_remap_modify_chip_defaults(struct irq_chip *chip); 28
8static inline void prepare_irte(struct irte *irte, int vector, 29extern int irq_remapping_enabled;
9 unsigned int dest) 30
31extern void setup_irq_remapping_ops(void);
32extern int irq_remapping_supported(void);
33extern int irq_remapping_prepare(void);
34extern int irq_remapping_enable(void);
35extern void irq_remapping_disable(void);
36extern int irq_remapping_reenable(int);
37extern int irq_remap_enable_fault_handling(void);
38extern int setup_ioapic_remapped_entry(int irq,
39 struct IO_APIC_route_entry *entry,
40 unsigned int destination,
41 int vector,
42 struct io_apic_irq_attr *attr);
43extern int set_remapped_irq_affinity(struct irq_data *data,
44 const struct cpumask *mask,
45 bool force);
46extern void free_remapped_irq(int irq);
47extern void compose_remapped_msi_msg(struct pci_dev *pdev,
48 unsigned int irq, unsigned int dest,
49 struct msi_msg *msg, u8 hpet_id);
50extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
51extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
52 int index, int sub_handle);
53extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
54
55#else /* CONFIG_IRQ_REMAP */
56
57#define irq_remapping_enabled 0
58
59static inline void setup_irq_remapping_ops(void) { }
60static inline int irq_remapping_supported(void) { return 0; }
61static inline int irq_remapping_prepare(void) { return -ENODEV; }
62static inline int irq_remapping_enable(void) { return -ENODEV; }
63static inline void irq_remapping_disable(void) { }
64static inline int irq_remapping_reenable(int eim) { return -ENODEV; }
65static inline int irq_remap_enable_fault_handling(void) { return -ENODEV; }
66static inline int setup_ioapic_remapped_entry(int irq,
67 struct IO_APIC_route_entry *entry,
68 unsigned int destination,
69 int vector,
70 struct io_apic_irq_attr *attr)
71{
72 return -ENODEV;
73}
74static inline int set_remapped_irq_affinity(struct irq_data *data,
75 const struct cpumask *mask,
76 bool force)
10{ 77{
11 memset(irte, 0, sizeof(*irte)); 78 return 0;
12
13 irte->present = 1;
14 irte->dst_mode = apic->irq_dest_mode;
15 /*
16 * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
17 * actual level or edge trigger will be setup in the IO-APIC
18 * RTE. This will help simplify level triggered irq migration.
19 * For more details, see the comments (in io_apic.c) explainig IO-APIC
20 * irq migration in the presence of interrupt-remapping.
21 */
22 irte->trigger_mode = 0;
23 irte->dlvry_mode = apic->irq_delivery_mode;
24 irte->vector = vector;
25 irte->dest_id = IRTE_DEST(dest);
26 irte->redir_hint = 1;
27} 79}
28static inline bool irq_remapped(struct irq_cfg *cfg) 80static inline void free_remapped_irq(int irq) { }
81static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
82 unsigned int irq, unsigned int dest,
83 struct msi_msg *msg, u8 hpet_id)
29{ 84{
30 return cfg->irq_2_iommu.iommu != NULL;
31} 85}
32#else 86static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
33static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
34{ 87{
88 return -ENODEV;
35} 89}
36static inline bool irq_remapped(struct irq_cfg *cfg) 90static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
91 int index, int sub_handle)
37{ 92{
38 return false; 93 return -ENODEV;
39} 94}
40static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) 95static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
41{ 96{
97 return -ENODEV;
42} 98}
43#endif 99#endif /* CONFIG_IRQ_REMAP */
44 100
45#endif /* _ASM_X86_IRQ_REMAPPING_H */ 101#endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index d73f1571bde7..2c37aadcbc35 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -24,7 +24,6 @@ enum die_val {
24extern void printk_address(unsigned long address, int reliable); 24extern void printk_address(unsigned long address, int reliable);
25extern void die(const char *, struct pt_regs *,long); 25extern void die(const char *, struct pt_regs *,long);
26extern int __must_check __die(const char *, struct pt_regs *, long); 26extern int __must_check __die(const char *, struct pt_regs *, long);
27extern void show_registers(struct pt_regs *regs);
28extern void show_trace(struct task_struct *t, struct pt_regs *regs, 27extern void show_trace(struct task_struct *t, struct pt_regs *regs,
29 unsigned long *sp, unsigned long bp); 28 unsigned long *sp, unsigned long bp);
30extern void __show_regs(struct pt_regs *regs, int all); 29extern void __show_regs(struct pt_regs *regs, int all);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e216ba066e79..e5b97be12d2a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -27,6 +27,7 @@
27#include <asm/desc.h> 27#include <asm/desc.h>
28#include <asm/mtrr.h> 28#include <asm/mtrr.h>
29#include <asm/msr-index.h> 29#include <asm/msr-index.h>
30#include <asm/asm.h>
30 31
31#define KVM_MAX_VCPUS 254 32#define KVM_MAX_VCPUS 254
32#define KVM_SOFT_MAX_VCPUS 160 33#define KVM_SOFT_MAX_VCPUS 160
@@ -921,9 +922,7 @@ extern bool kvm_rebooting;
921 __ASM_SIZE(push) " $666b \n\t" \ 922 __ASM_SIZE(push) " $666b \n\t" \
922 "call kvm_spurious_fault \n\t" \ 923 "call kvm_spurious_fault \n\t" \
923 ".popsection \n\t" \ 924 ".popsection \n\t" \
924 ".pushsection __ex_table, \"a\" \n\t" \ 925 _ASM_EXTABLE(666b, 667b)
925 _ASM_PTR " 666b, 667b \n\t" \
926 ".popsection"
927 926
928#define __kvm_handle_fault_on_reboot(insn) \ 927#define __kvm_handle_fault_on_reboot(insn) \
929 ____kvm_handle_fault_on_reboot(insn, "") 928 ____kvm_handle_fault_on_reboot(insn, "")
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 734c3767cfac..183922e13de1 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -170,6 +170,9 @@ static inline int kvm_para_available(void)
170 unsigned int eax, ebx, ecx, edx; 170 unsigned int eax, ebx, ecx, edx;
171 char signature[13]; 171 char signature[13];
172 172
173 if (boot_cpu_data.cpuid_level < 0)
174 return 0; /* So we don't blow up on old processors */
175
173 cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); 176 cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
174 memcpy(signature + 0, &ebx, 4); 177 memcpy(signature + 0, &ebx, 4);
175 memcpy(signature + 4, &ecx, 4); 178 memcpy(signature + 4, &ecx, 4);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 69021528b43c..cdbf36776106 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -25,8 +25,8 @@ void destroy_context(struct mm_struct *mm);
25static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 25static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
26{ 26{
27#ifdef CONFIG_SMP 27#ifdef CONFIG_SMP
28 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) 28 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
29 percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); 29 this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
30#endif 30#endif
31} 31}
32 32
@@ -37,8 +37,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
37 37
38 if (likely(prev != next)) { 38 if (likely(prev != next)) {
39#ifdef CONFIG_SMP 39#ifdef CONFIG_SMP
40 percpu_write(cpu_tlbstate.state, TLBSTATE_OK); 40 this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
41 percpu_write(cpu_tlbstate.active_mm, next); 41 this_cpu_write(cpu_tlbstate.active_mm, next);
42#endif 42#endif
43 cpumask_set_cpu(cpu, mm_cpumask(next)); 43 cpumask_set_cpu(cpu, mm_cpumask(next));
44 44
@@ -56,8 +56,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
56 } 56 }
57#ifdef CONFIG_SMP 57#ifdef CONFIG_SMP
58 else { 58 else {
59 percpu_write(cpu_tlbstate.state, TLBSTATE_OK); 59 this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
60 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); 60 BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
61 61
62 if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) { 62 if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
63 /* We were in lazy tlb mode and leave_mm disabled 63 /* We were in lazy tlb mode and leave_mm disabled
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 55728e121473..eb05fb3b02fb 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -61,10 +61,4 @@ static inline int pfn_valid(int pfn)
61 61
62#endif /* CONFIG_DISCONTIGMEM */ 62#endif /* CONFIG_DISCONTIGMEM */
63 63
64#ifdef CONFIG_NEED_MULTIPLE_NODES
65/* always use node 0 for bootmem on this numa platform */
66#define bootmem_arch_preferred_node(__bdata, size, align, goal, limit) \
67 (NODE_DATA(0)->bdata)
68#endif /* CONFIG_NEED_MULTIPLE_NODES */
69
70#endif /* _ASM_X86_MMZONE_32_H */ 64#endif /* _ASM_X86_MMZONE_32_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ccb805966f68..957ec87385af 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -134,6 +134,8 @@
134#define MSR_AMD64_IBSFETCHCTL 0xc0011030 134#define MSR_AMD64_IBSFETCHCTL 0xc0011030
135#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 135#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
136#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 136#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
137#define MSR_AMD64_IBSFETCH_REG_COUNT 3
138#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
137#define MSR_AMD64_IBSOPCTL 0xc0011033 139#define MSR_AMD64_IBSOPCTL 0xc0011033
138#define MSR_AMD64_IBSOPRIP 0xc0011034 140#define MSR_AMD64_IBSOPRIP 0xc0011034
139#define MSR_AMD64_IBSOPDATA 0xc0011035 141#define MSR_AMD64_IBSOPDATA 0xc0011035
@@ -141,8 +143,11 @@
141#define MSR_AMD64_IBSOPDATA3 0xc0011037 143#define MSR_AMD64_IBSOPDATA3 0xc0011037
142#define MSR_AMD64_IBSDCLINAD 0xc0011038 144#define MSR_AMD64_IBSDCLINAD 0xc0011038
143#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 145#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
146#define MSR_AMD64_IBSOP_REG_COUNT 7
147#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
144#define MSR_AMD64_IBSCTL 0xc001103a 148#define MSR_AMD64_IBSCTL 0xc001103a
145#define MSR_AMD64_IBSBRTARGET 0xc001103b 149#define MSR_AMD64_IBSBRTARGET 0xc001103b
150#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
146 151
147/* Fam 15h MSRs */ 152/* Fam 15h MSRs */
148#define MSR_F15H_PERF_CTL 0xc0010200 153#define MSR_F15H_PERF_CTL 0xc0010200
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 95203d40ffdd..084ef95274cd 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -169,14 +169,7 @@ static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
169 return native_write_msr_safe(msr, low, high); 169 return native_write_msr_safe(msr, low, high);
170} 170}
171 171
172/* 172/* rdmsr with exception handling */
173 * rdmsr with exception handling.
174 *
175 * Please note that the exception handling works only after we've
176 * switched to the "smart" #GP handler in trap_init() which knows about
177 * exception tables - using this macro earlier than that causes machine
178 * hangs on boxes which do not implement the @msr in the first argument.
179 */
180#define rdmsr_safe(msr, p1, p2) \ 173#define rdmsr_safe(msr, p1, p2) \
181({ \ 174({ \
182 int __err; \ 175 int __err; \
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index fd3f9f18cf3f..0e3793b821ef 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -27,6 +27,8 @@ void arch_trigger_all_cpu_backtrace(void);
27enum { 27enum {
28 NMI_LOCAL=0, 28 NMI_LOCAL=0,
29 NMI_UNKNOWN, 29 NMI_UNKNOWN,
30 NMI_SERR,
31 NMI_IO_CHECK,
30 NMI_MAX 32 NMI_MAX
31}; 33};
32 34
@@ -35,8 +37,24 @@ enum {
35 37
36typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *); 38typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *);
37 39
38int register_nmi_handler(unsigned int, nmi_handler_t, unsigned long, 40struct nmiaction {
39 const char *); 41 struct list_head list;
42 nmi_handler_t handler;
43 unsigned long flags;
44 const char *name;
45};
46
47#define register_nmi_handler(t, fn, fg, n) \
48({ \
49 static struct nmiaction fn##_na = { \
50 .handler = (fn), \
51 .name = (n), \
52 .flags = (fg), \
53 }; \
54 __register_nmi_handler((t), &fn##_na); \
55})
56
57int __register_nmi_handler(unsigned int, struct nmiaction *);
40 58
41void unregister_nmi_handler(unsigned int, const char *); 59void unregister_nmi_handler(unsigned int, const char *);
42 60
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index 405b4032a60b..aff2b3356101 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -87,7 +87,11 @@
87#define P6_NOP8 0x0f,0x1f,0x84,0x00,0,0,0,0 87#define P6_NOP8 0x0f,0x1f,0x84,0x00,0,0,0,0
88#define P6_NOP5_ATOMIC P6_NOP5 88#define P6_NOP5_ATOMIC P6_NOP5
89 89
90#ifdef __ASSEMBLY__
91#define _ASM_MK_NOP(x) .byte x
92#else
90#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n" 93#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
94#endif
91 95
92#if defined(CONFIG_MK7) 96#if defined(CONFIG_MK7)
93#define ASM_NOP1 _ASM_MK_NOP(K7_NOP1) 97#define ASM_NOP1 _ASM_MK_NOP(K7_NOP1)
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index ade619ff9e2a..ef17af013475 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -15,8 +15,8 @@
15 */ 15 */
16#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) 16#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
17 17
18#define THREAD_ORDER 1 18#define THREAD_SIZE_ORDER 1
19#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) 19#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
20 20
21#define STACKFAULT_STACK 0 21#define STACKFAULT_STACK 0
22#define DOUBLEFAULT_STACK 1 22#define DOUBLEFAULT_STACK 1
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 7639dbf5d223..320f7bb95f76 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -1,8 +1,8 @@
1#ifndef _ASM_X86_PAGE_64_DEFS_H 1#ifndef _ASM_X86_PAGE_64_DEFS_H
2#define _ASM_X86_PAGE_64_DEFS_H 2#define _ASM_X86_PAGE_64_DEFS_H
3 3
4#define THREAD_ORDER 1 4#define THREAD_SIZE_ORDER 1
5#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) 5#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
6#define CURRENT_MASK (~(THREAD_SIZE - 1)) 6#define CURRENT_MASK (~(THREAD_SIZE - 1))
7 7
8#define EXCEPTION_STACK_ORDER 0 8#define EXCEPTION_STACK_ORDER 0
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index aa0f91308367..6cbbabf52707 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -1023,10 +1023,8 @@ extern void default_banner(void);
1023 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ 1023 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \
1024 ) 1024 )
1025 1025
1026#define GET_CR2_INTO_RCX \ 1026#define GET_CR2_INTO_RAX \
1027 call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \ 1027 call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
1028 movq %rax, %rcx; \
1029 xorq %rax, %rax;
1030 1028
1031#define PARAVIRT_ADJUST_EXCEPTION_FRAME \ 1029#define PARAVIRT_ADJUST_EXCEPTION_FRAME \
1032 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \ 1030 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 7a11910a63c4..d9b8e3f7f42a 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -46,7 +46,7 @@
46 46
47#ifdef CONFIG_SMP 47#ifdef CONFIG_SMP
48#define __percpu_prefix "%%"__stringify(__percpu_seg)":" 48#define __percpu_prefix "%%"__stringify(__percpu_seg)":"
49#define __my_cpu_offset percpu_read(this_cpu_off) 49#define __my_cpu_offset this_cpu_read(this_cpu_off)
50 50
51/* 51/*
52 * Compared to the generic __my_cpu_offset version, the following 52 * Compared to the generic __my_cpu_offset version, the following
@@ -351,23 +351,15 @@ do { \
351}) 351})
352 352
353/* 353/*
354 * percpu_read() makes gcc load the percpu variable every time it is 354 * this_cpu_read() makes gcc load the percpu variable every time it is
355 * accessed while percpu_read_stable() allows the value to be cached. 355 * accessed while this_cpu_read_stable() allows the value to be cached.
356 * percpu_read_stable() is more efficient and can be used if its value 356 * this_cpu_read_stable() is more efficient and can be used if its value
357 * is guaranteed to be valid across cpus. The current users include 357 * is guaranteed to be valid across cpus. The current users include
358 * get_current() and get_thread_info() both of which are actually 358 * get_current() and get_thread_info() both of which are actually
359 * per-thread variables implemented as per-cpu variables and thus 359 * per-thread variables implemented as per-cpu variables and thus
360 * stable for the duration of the respective task. 360 * stable for the duration of the respective task.
361 */ 361 */
362#define percpu_read(var) percpu_from_op("mov", var, "m" (var)) 362#define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
363#define percpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
364#define percpu_write(var, val) percpu_to_op("mov", var, val)
365#define percpu_add(var, val) percpu_add_op(var, val)
366#define percpu_sub(var, val) percpu_add_op(var, -(val))
367#define percpu_and(var, val) percpu_to_op("and", var, val)
368#define percpu_or(var, val) percpu_to_op("or", var, val)
369#define percpu_xor(var, val) percpu_to_op("xor", var, val)
370#define percpu_inc(var) percpu_unary_op("inc", var)
371 363
372#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 364#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
373#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 365#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
@@ -512,7 +504,11 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
512{ 504{
513 unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; 505 unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
514 506
515 return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0; 507#ifdef CONFIG_X86_64
508 return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_8(*a)) != 0;
509#else
510 return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_4(*a)) != 0;
511#endif
516} 512}
517 513
518static inline int x86_this_cpu_variable_test_bit(int nr, 514static inline int x86_this_cpu_variable_test_bit(int nr,
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 2291895b1836..588f52ea810e 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -158,6 +158,7 @@ struct x86_pmu_capability {
158#define IBS_CAPS_OPCNT (1U<<4) 158#define IBS_CAPS_OPCNT (1U<<4)
159#define IBS_CAPS_BRNTRGT (1U<<5) 159#define IBS_CAPS_BRNTRGT (1U<<5)
160#define IBS_CAPS_OPCNTEXT (1U<<6) 160#define IBS_CAPS_OPCNTEXT (1U<<6)
161#define IBS_CAPS_RIPINVALIDCHK (1U<<7)
161 162
162#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ 163#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
163 | IBS_CAPS_FETCHSAM \ 164 | IBS_CAPS_FETCHSAM \
@@ -170,21 +171,28 @@ struct x86_pmu_capability {
170#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) 171#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8)
171#define IBSCTL_LVT_OFFSET_MASK 0x0F 172#define IBSCTL_LVT_OFFSET_MASK 0x0F
172 173
173/* IbsFetchCtl bits/masks */ 174/* ibs fetch bits/masks */
174#define IBS_FETCH_RAND_EN (1ULL<<57) 175#define IBS_FETCH_RAND_EN (1ULL<<57)
175#define IBS_FETCH_VAL (1ULL<<49) 176#define IBS_FETCH_VAL (1ULL<<49)
176#define IBS_FETCH_ENABLE (1ULL<<48) 177#define IBS_FETCH_ENABLE (1ULL<<48)
177#define IBS_FETCH_CNT 0xFFFF0000ULL 178#define IBS_FETCH_CNT 0xFFFF0000ULL
178#define IBS_FETCH_MAX_CNT 0x0000FFFFULL 179#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
179 180
180/* IbsOpCtl bits */ 181/* ibs op bits/masks */
182/* lower 4 bits of the current count are ignored: */
183#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32)
181#define IBS_OP_CNT_CTL (1ULL<<19) 184#define IBS_OP_CNT_CTL (1ULL<<19)
182#define IBS_OP_VAL (1ULL<<18) 185#define IBS_OP_VAL (1ULL<<18)
183#define IBS_OP_ENABLE (1ULL<<17) 186#define IBS_OP_ENABLE (1ULL<<17)
184#define IBS_OP_MAX_CNT 0x0000FFFFULL 187#define IBS_OP_MAX_CNT 0x0000FFFFULL
185#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ 188#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
189#define IBS_RIP_INVALID (1ULL<<38)
186 190
191#ifdef CONFIG_X86_LOCAL_APIC
187extern u32 get_ibs_caps(void); 192extern u32 get_ibs_caps(void);
193#else
194static inline u32 get_ibs_caps(void) { return 0; }
195#endif
188 196
189#ifdef CONFIG_PERF_EVENTS 197#ifdef CONFIG_PERF_EVENTS
190extern void perf_events_lapic_init(void); 198extern void perf_events_lapic_init(void);
diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h
index 3427b7798dbc..7ef7c3020e5c 100644
--- a/arch/x86/include/asm/posix_types.h
+++ b/arch/x86/include/asm/posix_types.h
@@ -7,9 +7,9 @@
7#else 7#else
8# ifdef __i386__ 8# ifdef __i386__
9# include "posix_types_32.h" 9# include "posix_types_32.h"
10# elif defined(__LP64__) 10# elif defined(__ILP32__)
11# include "posix_types_64.h"
12# else
13# include "posix_types_x32.h" 11# include "posix_types_x32.h"
12# else
13# include "posix_types_64.h"
14# endif 14# endif
15#endif 15#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 4fa7dcceb6c0..7745b257f035 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -579,9 +579,6 @@ extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
579/* Free all resources held by a thread. */ 579/* Free all resources held by a thread. */
580extern void release_thread(struct task_struct *); 580extern void release_thread(struct task_struct *);
581 581
582/* Prepare to copy thread state - unlazy all lazy state */
583extern void prepare_to_copy(struct task_struct *tsk);
584
585unsigned long get_wchan(struct task_struct *p); 582unsigned long get_wchan(struct task_struct *p);
586 583
587/* 584/*
@@ -974,8 +971,6 @@ extern bool cpu_has_amd_erratum(const int *);
974#define cpu_has_amd_erratum(x) (false) 971#define cpu_has_amd_erratum(x) (false)
975#endif /* CONFIG_CPU_SUP_AMD */ 972#endif /* CONFIG_CPU_SUP_AMD */
976 973
977void cpu_idle_wait(void);
978
979extern unsigned long arch_align_stack(unsigned long sp); 974extern unsigned long arch_align_stack(unsigned long sp);
980extern void free_init_pages(char *what, unsigned long begin, unsigned long end); 975extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
981 976
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 165466233ab0..c48a95035a77 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -205,13 +205,15 @@
205 205
206#define IDT_ENTRIES 256 206#define IDT_ENTRIES 256
207#define NUM_EXCEPTION_VECTORS 32 207#define NUM_EXCEPTION_VECTORS 32
208/* Bitmask of exception vectors which push an error code on the stack */
209#define EXCEPTION_ERRCODE_MASK 0x00027d00
208#define GDT_SIZE (GDT_ENTRIES * 8) 210#define GDT_SIZE (GDT_ENTRIES * 8)
209#define GDT_ENTRY_TLS_ENTRIES 3 211#define GDT_ENTRY_TLS_ENTRIES 3
210#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) 212#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
211 213
212#ifdef __KERNEL__ 214#ifdef __KERNEL__
213#ifndef __ASSEMBLY__ 215#ifndef __ASSEMBLY__
214extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10]; 216extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
215 217
216/* 218/*
217 * Load a segment. Fall back on loading the zero 219 * Load a segment. Fall back on loading the zero
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 4a085383af27..5ca71c065eef 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -257,7 +257,7 @@ struct sigcontext {
257 __u64 oldmask; 257 __u64 oldmask;
258 __u64 cr2; 258 __u64 cr2;
259 struct _fpstate __user *fpstate; /* zero when no FPU context */ 259 struct _fpstate __user *fpstate; /* zero when no FPU context */
260#ifndef __LP64__ 260#ifdef __ILP32__
261 __u32 __fpstate_pad; 261 __u32 __fpstate_pad;
262#endif 262#endif
263 __u64 reserved1[8]; 263 __u64 reserved1[8];
diff --git a/arch/x86/include/asm/siginfo.h b/arch/x86/include/asm/siginfo.h
index fc1aa5535646..34c47b3341c0 100644
--- a/arch/x86/include/asm/siginfo.h
+++ b/arch/x86/include/asm/siginfo.h
@@ -2,7 +2,13 @@
2#define _ASM_X86_SIGINFO_H 2#define _ASM_X86_SIGINFO_H
3 3
4#ifdef __x86_64__ 4#ifdef __x86_64__
5# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) 5# ifdef __ILP32__ /* x32 */
6typedef long long __kernel_si_clock_t __attribute__((aligned(4)));
7# define __ARCH_SI_CLOCK_T __kernel_si_clock_t
8# define __ARCH_SI_ATTRIBUTES __attribute__((aligned(8)))
9# else /* x86-64 */
10# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
11# endif
6#endif 12#endif
7 13
8#include <asm-generic/siginfo.h> 14#include <asm-generic/siginfo.h>
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 0434c400287c..f48394513c37 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -62,6 +62,8 @@ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid);
62/* Static state in head.S used to set up a CPU */ 62/* Static state in head.S used to set up a CPU */
63extern unsigned long stack_start; /* Initial stack pointer address */ 63extern unsigned long stack_start; /* Initial stack pointer address */
64 64
65struct task_struct;
66
65struct smp_ops { 67struct smp_ops {
66 void (*smp_prepare_boot_cpu)(void); 68 void (*smp_prepare_boot_cpu)(void);
67 void (*smp_prepare_cpus)(unsigned max_cpus); 69 void (*smp_prepare_cpus)(unsigned max_cpus);
@@ -70,7 +72,7 @@ struct smp_ops {
70 void (*stop_other_cpus)(int wait); 72 void (*stop_other_cpus)(int wait);
71 void (*smp_send_reschedule)(int cpu); 73 void (*smp_send_reschedule)(int cpu);
72 74
73 int (*cpu_up)(unsigned cpu); 75 int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
74 int (*cpu_disable)(void); 76 int (*cpu_disable)(void);
75 void (*cpu_die)(unsigned int cpu); 77 void (*cpu_die)(unsigned int cpu);
76 void (*play_dead)(void); 78 void (*play_dead)(void);
@@ -113,9 +115,9 @@ static inline void smp_cpus_done(unsigned int max_cpus)
113 smp_ops.smp_cpus_done(max_cpus); 115 smp_ops.smp_cpus_done(max_cpus);
114} 116}
115 117
116static inline int __cpu_up(unsigned int cpu) 118static inline int __cpu_up(unsigned int cpu, struct task_struct *tidle)
117{ 119{
118 return smp_ops.cpu_up(cpu); 120 return smp_ops.cpu_up(cpu, tidle);
119} 121}
120 122
121static inline int __cpu_disable(void) 123static inline int __cpu_disable(void)
@@ -152,7 +154,7 @@ void cpu_disable_common(void);
152void native_smp_prepare_boot_cpu(void); 154void native_smp_prepare_boot_cpu(void);
153void native_smp_prepare_cpus(unsigned int max_cpus); 155void native_smp_prepare_cpus(unsigned int max_cpus);
154void native_smp_cpus_done(unsigned int max_cpus); 156void native_smp_cpus_done(unsigned int max_cpus);
155int native_cpu_up(unsigned int cpunum); 157int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
156int native_cpu_disable(void); 158int native_cpu_disable(void);
157void native_cpu_die(unsigned int cpu); 159void native_cpu_die(unsigned int cpu);
158void native_play_dead(void); 160void native_play_dead(void);
@@ -162,6 +164,7 @@ int wbinvd_on_all_cpus(void);
162 164
163void native_send_call_func_ipi(const struct cpumask *mask); 165void native_send_call_func_ipi(const struct cpumask *mask);
164void native_send_call_func_single_ipi(int cpu); 166void native_send_call_func_single_ipi(int cpu);
167void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
165 168
166void smp_store_cpu_info(int id); 169void smp_store_cpu_info(int id);
167#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) 170#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
@@ -188,11 +191,11 @@ extern unsigned disabled_cpus __cpuinitdata;
188 * from the initial startup. We map APIC_BASE very early in page_setup(), 191 * from the initial startup. We map APIC_BASE very early in page_setup(),
189 * so this is correct in the x86 case. 192 * so this is correct in the x86 case.
190 */ 193 */
191#define raw_smp_processor_id() (percpu_read(cpu_number)) 194#define raw_smp_processor_id() (this_cpu_read(cpu_number))
192extern int safe_smp_processor_id(void); 195extern int safe_smp_processor_id(void);
193 196
194#elif defined(CONFIG_X86_64_SMP) 197#elif defined(CONFIG_X86_64_SMP)
195#define raw_smp_processor_id() (percpu_read(cpu_number)) 198#define raw_smp_processor_id() (this_cpu_read(cpu_number))
196 199
197#define stack_smp_processor_id() \ 200#define stack_smp_processor_id() \
198({ \ 201({ \
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 76bfa2cf301d..b315a33867f2 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -20,10 +20,8 @@
20 20
21#ifdef CONFIG_X86_32 21#ifdef CONFIG_X86_32
22# define LOCK_PTR_REG "a" 22# define LOCK_PTR_REG "a"
23# define REG_PTR_MODE "k"
24#else 23#else
25# define LOCK_PTR_REG "D" 24# define LOCK_PTR_REG "D"
26# define REG_PTR_MODE "q"
27#endif 25#endif
28 26
29#if defined(CONFIG_X86_32) && \ 27#if defined(CONFIG_X86_32) && \
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index b5d9533d2c38..6a998598f172 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -75,9 +75,9 @@ static __always_inline void boot_init_stack_canary(void)
75 75
76 current->stack_canary = canary; 76 current->stack_canary = canary;
77#ifdef CONFIG_X86_64 77#ifdef CONFIG_X86_64
78 percpu_write(irq_stack_union.stack_canary, canary); 78 this_cpu_write(irq_stack_union.stack_canary, canary);
79#else 79#else
80 percpu_write(stack_canary.canary, canary); 80 this_cpu_write(stack_canary.canary, canary);
81#endif 81#endif
82} 82}
83 83
diff --git a/arch/x86/include/asm/stat.h b/arch/x86/include/asm/stat.h
index e0b1d9bbcbc6..7b3ddc348585 100644
--- a/arch/x86/include/asm/stat.h
+++ b/arch/x86/include/asm/stat.h
@@ -25,6 +25,12 @@ struct stat {
25 unsigned long __unused5; 25 unsigned long __unused5;
26}; 26};
27 27
28/* We don't need to memset the whole thing just to initialize the padding */
29#define INIT_STRUCT_STAT_PADDING(st) do { \
30 st.__unused4 = 0; \
31 st.__unused5 = 0; \
32} while (0)
33
28#define STAT64_HAS_BROKEN_ST_INO 1 34#define STAT64_HAS_BROKEN_ST_INO 1
29 35
30/* This matches struct stat64 in glibc2.1, hence the absolutely 36/* This matches struct stat64 in glibc2.1, hence the absolutely
@@ -63,6 +69,12 @@ struct stat64 {
63 unsigned long long st_ino; 69 unsigned long long st_ino;
64}; 70};
65 71
72/* We don't need to memset the whole thing just to initialize the padding */
73#define INIT_STRUCT_STAT64_PADDING(st) do { \
74 memset(&st.__pad0, 0, sizeof(st.__pad0)); \
75 memset(&st.__pad3, 0, sizeof(st.__pad3)); \
76} while (0)
77
66#else /* __i386__ */ 78#else /* __i386__ */
67 79
68struct stat { 80struct stat {
@@ -87,6 +99,15 @@ struct stat {
87 unsigned long st_ctime_nsec; 99 unsigned long st_ctime_nsec;
88 long __unused[3]; 100 long __unused[3];
89}; 101};
102
103/* We don't need to memset the whole thing just to initialize the padding */
104#define INIT_STRUCT_STAT_PADDING(st) do { \
105 st.__pad0 = 0; \
106 st.__unused[0] = 0; \
107 st.__unused[1] = 0; \
108 st.__unused[2] = 0; \
109} while (0)
110
90#endif 111#endif
91 112
92/* for 32bit emulation and 32 bit kernels */ 113/* for 32bit emulation and 32 bit kernels */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 386b78686c4d..1ace47b62592 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -13,9 +13,11 @@
13#ifndef _ASM_X86_SYSCALL_H 13#ifndef _ASM_X86_SYSCALL_H
14#define _ASM_X86_SYSCALL_H 14#define _ASM_X86_SYSCALL_H
15 15
16#include <linux/audit.h>
16#include <linux/sched.h> 17#include <linux/sched.h>
17#include <linux/err.h> 18#include <linux/err.h>
18#include <asm/asm-offsets.h> /* For NR_syscalls */ 19#include <asm/asm-offsets.h> /* For NR_syscalls */
20#include <asm/thread_info.h> /* for TS_COMPAT */
19#include <asm/unistd.h> 21#include <asm/unistd.h>
20 22
21extern const unsigned long sys_call_table[]; 23extern const unsigned long sys_call_table[];
@@ -88,6 +90,12 @@ static inline void syscall_set_arguments(struct task_struct *task,
88 memcpy(&regs->bx + i, args, n * sizeof(args[0])); 90 memcpy(&regs->bx + i, args, n * sizeof(args[0]));
89} 91}
90 92
93static inline int syscall_get_arch(struct task_struct *task,
94 struct pt_regs *regs)
95{
96 return AUDIT_ARCH_I386;
97}
98
91#else /* CONFIG_X86_64 */ 99#else /* CONFIG_X86_64 */
92 100
93static inline void syscall_get_arguments(struct task_struct *task, 101static inline void syscall_get_arguments(struct task_struct *task,
@@ -212,6 +220,25 @@ static inline void syscall_set_arguments(struct task_struct *task,
212 } 220 }
213} 221}
214 222
223static inline int syscall_get_arch(struct task_struct *task,
224 struct pt_regs *regs)
225{
226#ifdef CONFIG_IA32_EMULATION
227 /*
228 * TS_COMPAT is set for 32-bit syscall entry and then
229 * remains set until we return to user mode.
230 *
231 * TIF_IA32 tasks should always have TS_COMPAT set at
232 * system call time.
233 *
234 * x32 tasks should be considered AUDIT_ARCH_X86_64.
235 */
236 if (task_thread_info(task)->status & TS_COMPAT)
237 return AUDIT_ARCH_I386;
238#endif
239 /* Both x32 and x86_64 are considered "64-bit". */
240 return AUDIT_ARCH_X86_64;
241}
215#endif /* CONFIG_X86_32 */ 242#endif /* CONFIG_X86_32 */
216 243
217#endif /* _ASM_X86_SYSCALL_H */ 244#endif /* _ASM_X86_SYSCALL_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ad6df8ccd715..3c9aebc00d39 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -155,24 +155,6 @@ struct thread_info {
155 155
156#define PREEMPT_ACTIVE 0x10000000 156#define PREEMPT_ACTIVE 0x10000000
157 157
158/* thread information allocation */
159#ifdef CONFIG_DEBUG_STACK_USAGE
160#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
161#else
162#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK)
163#endif
164
165#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
166
167#define alloc_thread_info_node(tsk, node) \
168({ \
169 struct page *page = alloc_pages_node(node, THREAD_FLAGS, \
170 THREAD_ORDER); \
171 struct thread_info *ret = page ? page_address(page) : NULL; \
172 \
173 ret; \
174})
175
176#ifdef CONFIG_X86_32 158#ifdef CONFIG_X86_32
177 159
178#define STACK_WARN (THREAD_SIZE/8) 160#define STACK_WARN (THREAD_SIZE/8)
@@ -222,7 +204,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
222static inline struct thread_info *current_thread_info(void) 204static inline struct thread_info *current_thread_info(void)
223{ 205{
224 struct thread_info *ti; 206 struct thread_info *ti;
225 ti = (void *)(percpu_read_stable(kernel_stack) + 207 ti = (void *)(this_cpu_read_stable(kernel_stack) +
226 KERNEL_STACK_OFFSET - THREAD_SIZE); 208 KERNEL_STACK_OFFSET - THREAD_SIZE);
227 return ti; 209 return ti;
228} 210}
@@ -282,8 +264,7 @@ static inline bool is_ia32_task(void)
282 264
283#ifndef __ASSEMBLY__ 265#ifndef __ASSEMBLY__
284extern void arch_task_cache_init(void); 266extern void arch_task_cache_init(void);
285extern void free_thread_info(struct thread_info *ti);
286extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); 267extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
287#define arch_task_cache_init arch_task_cache_init 268extern void arch_release_task_struct(struct task_struct *tsk);
288#endif 269#endif
289#endif /* _ASM_X86_THREAD_INFO_H */ 270#endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index c0e108e08079..36a1a2ab87d2 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -62,11 +62,7 @@ static inline void __flush_tlb_one(unsigned long addr)
62 __flush_tlb(); 62 __flush_tlb();
63} 63}
64 64
65#ifdef CONFIG_X86_32 65#define TLB_FLUSH_ALL -1UL
66# define TLB_FLUSH_ALL 0xffffffff
67#else
68# define TLB_FLUSH_ALL -1ULL
69#endif
70 66
71/* 67/*
72 * TLB flushing: 68 * TLB flushing:
@@ -156,8 +152,8 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
156 152
157static inline void reset_lazy_tlbstate(void) 153static inline void reset_lazy_tlbstate(void)
158{ 154{
159 percpu_write(cpu_tlbstate.state, 0); 155 this_cpu_write(cpu_tlbstate.state, 0);
160 percpu_write(cpu_tlbstate.active_mm, &init_mm); 156 this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
161} 157}
162 158
163#endif /* SMP */ 159#endif /* SMP */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index b9676ae37ada..095b21507b6a 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -92,44 +92,6 @@ extern void setup_node_to_cpumask_map(void);
92 92
93#define pcibus_to_node(bus) __pcibus_to_node(bus) 93#define pcibus_to_node(bus) __pcibus_to_node(bus)
94 94
95#ifdef CONFIG_X86_32
96# define SD_CACHE_NICE_TRIES 1
97# define SD_IDLE_IDX 1
98#else
99# define SD_CACHE_NICE_TRIES 2
100# define SD_IDLE_IDX 2
101#endif
102
103/* sched_domains SD_NODE_INIT for NUMA machines */
104#define SD_NODE_INIT (struct sched_domain) { \
105 .min_interval = 8, \
106 .max_interval = 32, \
107 .busy_factor = 32, \
108 .imbalance_pct = 125, \
109 .cache_nice_tries = SD_CACHE_NICE_TRIES, \
110 .busy_idx = 3, \
111 .idle_idx = SD_IDLE_IDX, \
112 .newidle_idx = 0, \
113 .wake_idx = 0, \
114 .forkexec_idx = 0, \
115 \
116 .flags = 1*SD_LOAD_BALANCE \
117 | 1*SD_BALANCE_NEWIDLE \
118 | 1*SD_BALANCE_EXEC \
119 | 1*SD_BALANCE_FORK \
120 | 0*SD_BALANCE_WAKE \
121 | 1*SD_WAKE_AFFINE \
122 | 0*SD_PREFER_LOCAL \
123 | 0*SD_SHARE_CPUPOWER \
124 | 0*SD_POWERSAVINGS_BALANCE \
125 | 0*SD_SHARE_PKG_RESOURCES \
126 | 1*SD_SERIALIZE \
127 | 0*SD_PREFER_SIBLING \
128 , \
129 .last_balance = jiffies, \
130 .balance_interval = 1, \
131}
132
133extern int __node_distance(int, int); 95extern int __node_distance(int, int);
134#define node_distance(a, b) __node_distance(a, b) 96#define node_distance(a, b) __node_distance(a, b)
135 97
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 8be5f54d9360..851fe0dc13bc 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -79,11 +79,12 @@
79#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) 79#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0))
80 80
81/* 81/*
82 * The exception table consists of pairs of addresses: the first is the 82 * The exception table consists of pairs of addresses relative to the
83 * address of an instruction that is allowed to fault, and the second is 83 * exception table enty itself: the first is the address of an
84 * the address at which the program should continue. No registers are 84 * instruction that is allowed to fault, and the second is the address
85 * modified, so it is entirely up to the continuation code to figure out 85 * at which the program should continue. No registers are modified,
86 * what to do. 86 * so it is entirely up to the continuation code to figure out what to
87 * do.
87 * 88 *
88 * All the routines below use bits of fixup code that are out of line 89 * All the routines below use bits of fixup code that are out of line
89 * with the main instruction path. This means when everything is well, 90 * with the main instruction path. This means when everything is well,
@@ -92,10 +93,14 @@
92 */ 93 */
93 94
94struct exception_table_entry { 95struct exception_table_entry {
95 unsigned long insn, fixup; 96 int insn, fixup;
96}; 97};
98/* This is not the generic standard exception_table_entry format */
99#define ARCH_HAS_SORT_EXTABLE
100#define ARCH_HAS_SEARCH_EXTABLE
97 101
98extern int fixup_exception(struct pt_regs *regs); 102extern int fixup_exception(struct pt_regs *regs);
103extern int early_fixup_exception(unsigned long *ip);
99 104
100/* 105/*
101 * These are the main single-value transfer routines. They automatically 106 * These are the main single-value transfer routines. They automatically
@@ -202,8 +207,8 @@ extern int __get_user_bad(void);
202 asm volatile("1: movl %%eax,0(%1)\n" \ 207 asm volatile("1: movl %%eax,0(%1)\n" \
203 "2: movl %%edx,4(%1)\n" \ 208 "2: movl %%edx,4(%1)\n" \
204 "3:\n" \ 209 "3:\n" \
205 _ASM_EXTABLE(1b, 2b - 1b) \ 210 _ASM_EXTABLE_EX(1b, 2b) \
206 _ASM_EXTABLE(2b, 3b - 2b) \ 211 _ASM_EXTABLE_EX(2b, 3b) \
207 : : "A" (x), "r" (addr)) 212 : : "A" (x), "r" (addr))
208 213
209#define __put_user_x8(x, ptr, __ret_pu) \ 214#define __put_user_x8(x, ptr, __ret_pu) \
@@ -408,7 +413,7 @@ do { \
408#define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ 413#define __get_user_asm_ex(x, addr, itype, rtype, ltype) \
409 asm volatile("1: mov"itype" %1,%"rtype"0\n" \ 414 asm volatile("1: mov"itype" %1,%"rtype"0\n" \
410 "2:\n" \ 415 "2:\n" \
411 _ASM_EXTABLE(1b, 2b - 1b) \ 416 _ASM_EXTABLE_EX(1b, 2b) \
412 : ltype(x) : "m" (__m(addr))) 417 : ltype(x) : "m" (__m(addr)))
413 418
414#define __put_user_nocheck(x, ptr, size) \ 419#define __put_user_nocheck(x, ptr, size) \
@@ -450,7 +455,7 @@ struct __large_struct { unsigned long buf[100]; };
450#define __put_user_asm_ex(x, addr, itype, rtype, ltype) \ 455#define __put_user_asm_ex(x, addr, itype, rtype, ltype) \
451 asm volatile("1: mov"itype" %"rtype"0,%1\n" \ 456 asm volatile("1: mov"itype" %"rtype"0,%1\n" \
452 "2:\n" \ 457 "2:\n" \
453 _ASM_EXTABLE(1b, 2b - 1b) \ 458 _ASM_EXTABLE_EX(1b, 2b) \
454 : : ltype(x), "m" (__m(addr))) 459 : : ltype(x), "m" (__m(addr)))
455 460
456/* 461/*
@@ -557,6 +562,8 @@ struct __large_struct { unsigned long buf[100]; };
557 562
558extern unsigned long 563extern unsigned long
559copy_from_user_nmi(void *to, const void __user *from, unsigned long n); 564copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
565extern __must_check long
566strncpy_from_user(char *dst, const char __user *src, long count);
560 567
561/* 568/*
562 * movsl can be slow when source and dest are not both 8-byte aligned 569 * movsl can be slow when source and dest are not both 8-byte aligned
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 566e803cc602..8084bc73b18c 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -213,11 +213,6 @@ static inline unsigned long __must_check copy_from_user(void *to,
213 return n; 213 return n;
214} 214}
215 215
216long __must_check strncpy_from_user(char *dst, const char __user *src,
217 long count);
218long __must_check __strncpy_from_user(char *dst,
219 const char __user *src, long count);
220
221/** 216/**
222 * strlen_user: - Get the size of a string in user space. 217 * strlen_user: - Get the size of a string in user space.
223 * @str: The string to measure. 218 * @str: The string to measure.
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 1c66d30971ad..fcd4b6f3ef02 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -208,10 +208,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
208 } 208 }
209} 209}
210 210
211__must_check long
212strncpy_from_user(char *dst, const char __user *src, long count);
213__must_check long
214__strncpy_from_user(char *dst, const char __user *src, long count);
215__must_check long strnlen_user(const char __user *str, long n); 211__must_check long strnlen_user(const char __user *str, long n);
216__must_check long __strnlen_user(const char __user *str, long n); 212__must_check long __strnlen_user(const char __user *str, long n);
217__must_check long strlen_user(const char __user *str); 213__must_check long strlen_user(const char __user *str);
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 37cdc9d99bb1..4437001d8e3d 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -63,10 +63,10 @@
63#else 63#else
64# ifdef __i386__ 64# ifdef __i386__
65# include <asm/unistd_32.h> 65# include <asm/unistd_32.h>
66# elif defined(__LP64__) 66# elif defined(__ILP32__)
67# include <asm/unistd_64.h>
68# else
69# include <asm/unistd_x32.h> 67# include <asm/unistd_x32.h>
68# else
69# include <asm/unistd_64.h>
70# endif 70# endif
71#endif 71#endif
72 72
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
index 6fe6767b7124..e58f03b206c3 100644
--- a/arch/x86/include/asm/word-at-a-time.h
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -43,4 +43,37 @@ static inline unsigned long has_zero(unsigned long a)
43 return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80); 43 return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80);
44} 44}
45 45
46/*
47 * Load an unaligned word from kernel space.
48 *
49 * In the (very unlikely) case of the word being a page-crosser
50 * and the next page not being mapped, take the exception and
51 * return zeroes in the non-existing part.
52 */
53static inline unsigned long load_unaligned_zeropad(const void *addr)
54{
55 unsigned long ret, dummy;
56
57 asm(
58 "1:\tmov %2,%0\n"
59 "2:\n"
60 ".section .fixup,\"ax\"\n"
61 "3:\t"
62 "lea %2,%1\n\t"
63 "and %3,%1\n\t"
64 "mov (%1),%0\n\t"
65 "leal %2,%%ecx\n\t"
66 "andl %4,%%ecx\n\t"
67 "shll $3,%%ecx\n\t"
68 "shr %%cl,%0\n\t"
69 "jmp 2b\n"
70 ".previous\n"
71 _ASM_EXTABLE(1b, 3b)
72 :"=&r" (ret),"=&c" (dummy)
73 :"m" (*(unsigned long *)addr),
74 "i" (-sizeof(unsigned long)),
75 "i" (sizeof(unsigned long)-1));
76 return ret;
77}
78
46#endif /* _ASM_WORD_AT_A_TIME_H */ 79#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index baaca8defec8..c090af10ac7d 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -188,13 +188,19 @@ struct x86_msi_ops {
188 void (*restore_msi_irqs)(struct pci_dev *dev, int irq); 188 void (*restore_msi_irqs)(struct pci_dev *dev, int irq);
189}; 189};
190 190
191struct x86_io_apic_ops {
192 void (*init) (void);
193 unsigned int (*read) (unsigned int apic, unsigned int reg);
194 void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
195 void (*modify)(unsigned int apic, unsigned int reg, unsigned int value);
196};
197
191extern struct x86_init_ops x86_init; 198extern struct x86_init_ops x86_init;
192extern struct x86_cpuinit_ops x86_cpuinit; 199extern struct x86_cpuinit_ops x86_cpuinit;
193extern struct x86_platform_ops x86_platform; 200extern struct x86_platform_ops x86_platform;
194extern struct x86_msi_ops x86_msi; 201extern struct x86_msi_ops x86_msi;
195 202extern struct x86_io_apic_ops x86_io_apic_ops;
196extern void x86_init_noop(void); 203extern void x86_init_noop(void);
197extern void x86_init_uint_noop(unsigned int unused); 204extern void x86_init_uint_noop(unsigned int unused);
198extern void x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node);
199 205
200#endif 206#endif
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index c6ce2452f10c..8a1b6f9b594a 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -80,10 +80,7 @@ static inline int xsave_user(struct xsave_struct __user *buf)
80 "3: movl $-1,%[err]\n" 80 "3: movl $-1,%[err]\n"
81 " jmp 2b\n" 81 " jmp 2b\n"
82 ".previous\n" 82 ".previous\n"
83 ".section __ex_table,\"a\"\n" 83 _ASM_EXTABLE(1b,3b)
84 _ASM_ALIGN "\n"
85 _ASM_PTR "1b,3b\n"
86 ".previous"
87 : [err] "=r" (err) 84 : [err] "=r" (err)
88 : "D" (buf), "a" (-1), "d" (-1), "0" (0) 85 : "D" (buf), "a" (-1), "d" (-1), "0" (0)
89 : "memory"); 86 : "memory");
@@ -106,10 +103,7 @@ static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
106 "3: movl $-1,%[err]\n" 103 "3: movl $-1,%[err]\n"
107 " jmp 2b\n" 104 " jmp 2b\n"
108 ".previous\n" 105 ".previous\n"
109 ".section __ex_table,\"a\"\n" 106 _ASM_EXTABLE(1b,3b)
110 _ASM_ALIGN "\n"
111 _ASM_PTR "1b,3b\n"
112 ".previous"
113 : [err] "=r" (err) 107 : [err] "=r" (err)
114 : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) 108 : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
115 : "memory"); /* memory required? */ 109 : "memory"); /* memory required? */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 532d2e090e6f..56ebd1f98447 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the linux kernel. 2# Makefile for the linux kernel.
3# 3#
4 4
5extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds 5extra-y := head_$(BITS).o head$(BITS).o head.o vmlinux.lds
6 6
7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) 7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
8 8
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index a415b1f44365..7c439fe4941b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -593,7 +593,7 @@ void __init acpi_set_irq_model_ioapic(void)
593#ifdef CONFIG_ACPI_HOTPLUG_CPU 593#ifdef CONFIG_ACPI_HOTPLUG_CPU
594#include <acpi/processor.h> 594#include <acpi/processor.h>
595 595
596static void __cpuinitdata acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) 596static void __cpuinit acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
597{ 597{
598#ifdef CONFIG_ACPI_NUMA 598#ifdef CONFIG_ACPI_NUMA
599 int nid; 599 int nid;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 103b6ab368d3..146a49c763a4 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -24,6 +24,10 @@ unsigned long acpi_realmode_flags;
24static char temp_stack[4096]; 24static char temp_stack[4096];
25#endif 25#endif
26 26
27asmlinkage void acpi_enter_s3(void)
28{
29 acpi_enter_sleep_state(3, wake_sleep_flags);
30}
27/** 31/**
28 * acpi_suspend_lowlevel - save kernel state 32 * acpi_suspend_lowlevel - save kernel state
29 * 33 *
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index 416d4be13fef..d68677a2a010 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -3,12 +3,16 @@
3 */ 3 */
4 4
5#include <asm/trampoline.h> 5#include <asm/trampoline.h>
6#include <linux/linkage.h>
6 7
7extern unsigned long saved_video_mode; 8extern unsigned long saved_video_mode;
8extern long saved_magic; 9extern long saved_magic;
9 10
10extern int wakeup_pmode_return; 11extern int wakeup_pmode_return;
11 12
13extern u8 wake_sleep_flags;
14extern asmlinkage void acpi_enter_s3(void);
15
12extern unsigned long acpi_copy_wakeup_routine(unsigned long); 16extern unsigned long acpi_copy_wakeup_routine(unsigned long);
13extern void wakeup_long64(void); 17extern void wakeup_long64(void);
14 18
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index 13ab720573e3..72610839f03b 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -74,9 +74,7 @@ restore_registers:
74ENTRY(do_suspend_lowlevel) 74ENTRY(do_suspend_lowlevel)
75 call save_processor_state 75 call save_processor_state
76 call save_registers 76 call save_registers
77 pushl $3 77 call acpi_enter_s3
78 call acpi_enter_sleep_state
79 addl $4, %esp
80 78
81# In case of S3 failure, we'll emerge here. Jump 79# In case of S3 failure, we'll emerge here. Jump
82# to ret_point to recover 80# to ret_point to recover
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 8ea5164cbd04..014d1d28c397 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -71,9 +71,7 @@ ENTRY(do_suspend_lowlevel)
71 movq %rsi, saved_rsi 71 movq %rsi, saved_rsi
72 72
73 addq $8, %rsp 73 addq $8, %rsp
74 movl $3, %edi 74 call acpi_enter_s3
75 xorl %eax, %eax
76 call acpi_enter_sleep_state
77 /* in case something went wrong, restore the machine status and go on */ 75 /* in case something went wrong, restore the machine status and go on */
78 jmp resume_point 76 jmp resume_point
79 77
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 11544d8f1e97..39a222e094af 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -35,6 +35,7 @@
35#include <linux/smp.h> 35#include <linux/smp.h>
36#include <linux/mm.h> 36#include <linux/mm.h>
37 37
38#include <asm/irq_remapping.h>
38#include <asm/perf_event.h> 39#include <asm/perf_event.h>
39#include <asm/x86_init.h> 40#include <asm/x86_init.h>
40#include <asm/pgalloc.h> 41#include <asm/pgalloc.h>
@@ -1325,11 +1326,13 @@ void __cpuinit setup_local_APIC(void)
1325 acked); 1326 acked);
1326 break; 1327 break;
1327 } 1328 }
1328 if (cpu_has_tsc) { 1329 if (queued) {
1329 rdtscll(ntsc); 1330 if (cpu_has_tsc) {
1330 max_loops = (cpu_khz << 10) - (ntsc - tsc); 1331 rdtscll(ntsc);
1331 } else 1332 max_loops = (cpu_khz << 10) - (ntsc - tsc);
1332 max_loops--; 1333 } else
1334 max_loops--;
1335 }
1333 } while (queued && max_loops > 0); 1336 } while (queued && max_loops > 0);
1334 WARN_ON(max_loops <= 0); 1337 WARN_ON(max_loops <= 0);
1335 1338
@@ -1441,8 +1444,8 @@ void __init bsp_end_local_APIC_setup(void)
1441 * Now that local APIC setup is completed for BP, configure the fault 1444 * Now that local APIC setup is completed for BP, configure the fault
1442 * handling for interrupt remapping. 1445 * handling for interrupt remapping.
1443 */ 1446 */
1444 if (intr_remapping_enabled) 1447 if (irq_remapping_enabled)
1445 enable_drhd_fault_handling(); 1448 irq_remap_enable_fault_handling();
1446 1449
1447} 1450}
1448 1451
@@ -1517,7 +1520,7 @@ void enable_x2apic(void)
1517int __init enable_IR(void) 1520int __init enable_IR(void)
1518{ 1521{
1519#ifdef CONFIG_IRQ_REMAP 1522#ifdef CONFIG_IRQ_REMAP
1520 if (!intr_remapping_supported()) { 1523 if (!irq_remapping_supported()) {
1521 pr_debug("intr-remapping not supported\n"); 1524 pr_debug("intr-remapping not supported\n");
1522 return -1; 1525 return -1;
1523 } 1526 }
@@ -1528,7 +1531,7 @@ int __init enable_IR(void)
1528 return -1; 1531 return -1;
1529 } 1532 }
1530 1533
1531 return enable_intr_remapping(); 1534 return irq_remapping_enable();
1532#endif 1535#endif
1533 return -1; 1536 return -1;
1534} 1537}
@@ -1537,10 +1540,13 @@ void __init enable_IR_x2apic(void)
1537{ 1540{
1538 unsigned long flags; 1541 unsigned long flags;
1539 int ret, x2apic_enabled = 0; 1542 int ret, x2apic_enabled = 0;
1540 int dmar_table_init_ret; 1543 int hardware_init_ret;
1544
1545 /* Make sure irq_remap_ops are initialized */
1546 setup_irq_remapping_ops();
1541 1547
1542 dmar_table_init_ret = dmar_table_init(); 1548 hardware_init_ret = irq_remapping_prepare();
1543 if (dmar_table_init_ret && !x2apic_supported()) 1549 if (hardware_init_ret && !x2apic_supported())
1544 return; 1550 return;
1545 1551
1546 ret = save_ioapic_entries(); 1552 ret = save_ioapic_entries();
@@ -1556,7 +1562,7 @@ void __init enable_IR_x2apic(void)
1556 if (x2apic_preenabled && nox2apic) 1562 if (x2apic_preenabled && nox2apic)
1557 disable_x2apic(); 1563 disable_x2apic();
1558 1564
1559 if (dmar_table_init_ret) 1565 if (hardware_init_ret)
1560 ret = -1; 1566 ret = -1;
1561 else 1567 else
1562 ret = enable_IR(); 1568 ret = enable_IR();
@@ -1637,9 +1643,11 @@ static int __init apic_verify(void)
1637 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; 1643 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1638 1644
1639 /* The BIOS may have set up the APIC at some other address */ 1645 /* The BIOS may have set up the APIC at some other address */
1640 rdmsr(MSR_IA32_APICBASE, l, h); 1646 if (boot_cpu_data.x86 >= 6) {
1641 if (l & MSR_IA32_APICBASE_ENABLE) 1647 rdmsr(MSR_IA32_APICBASE, l, h);
1642 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; 1648 if (l & MSR_IA32_APICBASE_ENABLE)
1649 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1650 }
1643 1651
1644 pr_info("Found and enabled local APIC!\n"); 1652 pr_info("Found and enabled local APIC!\n");
1645 return 0; 1653 return 0;
@@ -1657,13 +1665,15 @@ int __init apic_force_enable(unsigned long addr)
1657 * MSR. This can only be done in software for Intel P6 or later 1665 * MSR. This can only be done in software for Intel P6 or later
1658 * and AMD K7 (Model > 1) or later. 1666 * and AMD K7 (Model > 1) or later.
1659 */ 1667 */
1660 rdmsr(MSR_IA32_APICBASE, l, h); 1668 if (boot_cpu_data.x86 >= 6) {
1661 if (!(l & MSR_IA32_APICBASE_ENABLE)) { 1669 rdmsr(MSR_IA32_APICBASE, l, h);
1662 pr_info("Local APIC disabled by BIOS -- reenabling.\n"); 1670 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1663 l &= ~MSR_IA32_APICBASE_BASE; 1671 pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1664 l |= MSR_IA32_APICBASE_ENABLE | addr; 1672 l &= ~MSR_IA32_APICBASE_BASE;
1665 wrmsr(MSR_IA32_APICBASE, l, h); 1673 l |= MSR_IA32_APICBASE_ENABLE | addr;
1666 enabled_via_apicbase = 1; 1674 wrmsr(MSR_IA32_APICBASE, l, h);
1675 enabled_via_apicbase = 1;
1676 }
1667 } 1677 }
1668 return apic_verify(); 1678 return apic_verify();
1669} 1679}
@@ -2172,8 +2182,8 @@ static int lapic_suspend(void)
2172 local_irq_save(flags); 2182 local_irq_save(flags);
2173 disable_local_APIC(); 2183 disable_local_APIC();
2174 2184
2175 if (intr_remapping_enabled) 2185 if (irq_remapping_enabled)
2176 disable_intr_remapping(); 2186 irq_remapping_disable();
2177 2187
2178 local_irq_restore(flags); 2188 local_irq_restore(flags);
2179 return 0; 2189 return 0;
@@ -2189,7 +2199,7 @@ static void lapic_resume(void)
2189 return; 2199 return;
2190 2200
2191 local_irq_save(flags); 2201 local_irq_save(flags);
2192 if (intr_remapping_enabled) { 2202 if (irq_remapping_enabled) {
2193 /* 2203 /*
2194 * IO-APIC and PIC have their own resume routines. 2204 * IO-APIC and PIC have their own resume routines.
2195 * We just mask them here to make sure the interrupt 2205 * We just mask them here to make sure the interrupt
@@ -2209,10 +2219,12 @@ static void lapic_resume(void)
2209 * FIXME! This will be wrong if we ever support suspend on 2219 * FIXME! This will be wrong if we ever support suspend on
2210 * SMP! We'll need to do this as part of the CPU restore! 2220 * SMP! We'll need to do this as part of the CPU restore!
2211 */ 2221 */
2212 rdmsr(MSR_IA32_APICBASE, l, h); 2222 if (boot_cpu_data.x86 >= 6) {
2213 l &= ~MSR_IA32_APICBASE_BASE; 2223 rdmsr(MSR_IA32_APICBASE, l, h);
2214 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 2224 l &= ~MSR_IA32_APICBASE_BASE;
2215 wrmsr(MSR_IA32_APICBASE, l, h); 2225 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
2226 wrmsr(MSR_IA32_APICBASE, l, h);
2227 }
2216 } 2228 }
2217 2229
2218 maxlvt = lapic_get_maxlvt(); 2230 maxlvt = lapic_get_maxlvt();
@@ -2239,8 +2251,8 @@ static void lapic_resume(void)
2239 apic_write(APIC_ESR, 0); 2251 apic_write(APIC_ESR, 0);
2240 apic_read(APIC_ESR); 2252 apic_read(APIC_ESR);
2241 2253
2242 if (intr_remapping_enabled) 2254 if (irq_remapping_enabled)
2243 reenable_intr_remapping(x2apic_mode); 2255 irq_remapping_reenable(x2apic_mode);
2244 2256
2245 local_irq_restore(flags); 2257 local_irq_restore(flags);
2246} 2258}
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 359b6899a36c..0e881c46e8c8 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -227,6 +227,7 @@ static struct apic apic_flat = {
227 227
228 .read = native_apic_mem_read, 228 .read = native_apic_mem_read,
229 .write = native_apic_mem_write, 229 .write = native_apic_mem_write,
230 .eoi_write = native_apic_mem_write,
230 .icr_read = native_apic_icr_read, 231 .icr_read = native_apic_icr_read,
231 .icr_write = native_apic_icr_write, 232 .icr_write = native_apic_icr_write,
232 .wait_icr_idle = native_apic_wait_icr_idle, 233 .wait_icr_idle = native_apic_wait_icr_idle,
@@ -386,6 +387,7 @@ static struct apic apic_physflat = {
386 387
387 .read = native_apic_mem_read, 388 .read = native_apic_mem_read,
388 .write = native_apic_mem_write, 389 .write = native_apic_mem_write,
390 .eoi_write = native_apic_mem_write,
389 .icr_read = native_apic_icr_read, 391 .icr_read = native_apic_icr_read,
390 .icr_write = native_apic_icr_write, 392 .icr_write = native_apic_icr_write,
391 .wait_icr_idle = native_apic_wait_icr_idle, 393 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 634ae6cdd5c9..a6e4c6e06c08 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -181,6 +181,7 @@ struct apic apic_noop = {
181 181
182 .read = noop_apic_read, 182 .read = noop_apic_read,
183 .write = noop_apic_write, 183 .write = noop_apic_write,
184 .eoi_write = noop_apic_write,
184 .icr_read = noop_apic_icr_read, 185 .icr_read = noop_apic_icr_read,
185 .icr_write = noop_apic_icr_write, 186 .icr_write = noop_apic_icr_write,
186 .wait_icr_idle = noop_apic_wait_icr_idle, 187 .wait_icr_idle = noop_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 899803e03214..6ec6d5d297c3 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -207,8 +207,11 @@ static void __init map_csrs(void)
207 207
208static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) 208static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
209{ 209{
210 c->phys_proc_id = node; 210
211 per_cpu(cpu_llc_id, smp_processor_id()) = node; 211 if (c->phys_proc_id != node) {
212 c->phys_proc_id = node;
213 per_cpu(cpu_llc_id, smp_processor_id()) = node;
214 }
212} 215}
213 216
214static int __init numachip_system_init(void) 217static int __init numachip_system_init(void)
@@ -292,6 +295,7 @@ static struct apic apic_numachip __refconst = {
292 295
293 .read = native_apic_mem_read, 296 .read = native_apic_mem_read,
294 .write = native_apic_mem_write, 297 .write = native_apic_mem_write,
298 .eoi_write = native_apic_mem_write,
295 .icr_read = native_apic_icr_read, 299 .icr_read = native_apic_icr_read,
296 .icr_write = native_apic_icr_write, 300 .icr_write = native_apic_icr_write,
297 .wait_icr_idle = native_apic_wait_icr_idle, 301 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 0cdec7065aff..31fbdbfbf960 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -248,6 +248,7 @@ static struct apic apic_bigsmp = {
248 248
249 .read = native_apic_mem_read, 249 .read = native_apic_mem_read,
250 .write = native_apic_mem_write, 250 .write = native_apic_mem_write,
251 .eoi_write = native_apic_mem_write,
251 .icr_read = native_apic_icr_read, 252 .icr_read = native_apic_icr_read,
252 .icr_write = native_apic_icr_write, 253 .icr_write = native_apic_icr_write,
253 .wait_icr_idle = native_apic_wait_icr_idle, 254 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index e42d1d3b9134..db4ab1be3c79 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -678,6 +678,7 @@ static struct apic __refdata apic_es7000_cluster = {
678 678
679 .read = native_apic_mem_read, 679 .read = native_apic_mem_read,
680 .write = native_apic_mem_write, 680 .write = native_apic_mem_write,
681 .eoi_write = native_apic_mem_write,
681 .icr_read = native_apic_icr_read, 682 .icr_read = native_apic_icr_read,
682 .icr_write = native_apic_icr_write, 683 .icr_write = native_apic_icr_write,
683 .wait_icr_idle = native_apic_wait_icr_idle, 684 .wait_icr_idle = native_apic_wait_icr_idle,
@@ -742,6 +743,7 @@ static struct apic __refdata apic_es7000 = {
742 743
743 .read = native_apic_mem_read, 744 .read = native_apic_mem_read,
744 .write = native_apic_mem_write, 745 .write = native_apic_mem_write,
746 .eoi_write = native_apic_mem_write,
745 .icr_read = native_apic_icr_read, 747 .icr_read = native_apic_icr_read,
746 .icr_write = native_apic_icr_write, 748 .icr_write = native_apic_icr_write,
747 .wait_icr_idle = native_apic_wait_icr_idle, 749 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e88300d8e80a..ffdc152e507d 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -68,23 +68,21 @@
68#define for_each_irq_pin(entry, head) \ 68#define for_each_irq_pin(entry, head) \
69 for (entry = head; entry; entry = entry->next) 69 for (entry = head; entry; entry = entry->next)
70 70
71static void __init __ioapic_init_mappings(void); 71#ifdef CONFIG_IRQ_REMAP
72 72static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
73static unsigned int __io_apic_read (unsigned int apic, unsigned int reg); 73static inline bool irq_remapped(struct irq_cfg *cfg)
74static void __io_apic_write (unsigned int apic, unsigned int reg, unsigned int val); 74{
75static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); 75 return cfg->irq_2_iommu.iommu != NULL;
76 76}
77static struct io_apic_ops io_apic_ops = { 77#else
78 .init = __ioapic_init_mappings, 78static inline bool irq_remapped(struct irq_cfg *cfg)
79 .read = __io_apic_read, 79{
80 .write = __io_apic_write, 80 return false;
81 .modify = __io_apic_modify, 81}
82}; 82static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
83
84void __init set_io_apic_ops(const struct io_apic_ops *ops)
85{ 83{
86 io_apic_ops = *ops;
87} 84}
85#endif
88 86
89/* 87/*
90 * Is the SiS APIC rmw bug present ? 88 * Is the SiS APIC rmw bug present ?
@@ -313,21 +311,6 @@ static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
313 irq_free_desc(at); 311 irq_free_desc(at);
314} 312}
315 313
316static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
317{
318 return io_apic_ops.read(apic, reg);
319}
320
321static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
322{
323 io_apic_ops.write(apic, reg, value);
324}
325
326static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
327{
328 io_apic_ops.modify(apic, reg, value);
329}
330
331 314
332struct io_apic { 315struct io_apic {
333 unsigned int index; 316 unsigned int index;
@@ -349,14 +332,14 @@ static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
349 writel(vector, &io_apic->eoi); 332 writel(vector, &io_apic->eoi);
350} 333}
351 334
352static unsigned int __io_apic_read(unsigned int apic, unsigned int reg) 335unsigned int native_io_apic_read(unsigned int apic, unsigned int reg)
353{ 336{
354 struct io_apic __iomem *io_apic = io_apic_base(apic); 337 struct io_apic __iomem *io_apic = io_apic_base(apic);
355 writel(reg, &io_apic->index); 338 writel(reg, &io_apic->index);
356 return readl(&io_apic->data); 339 return readl(&io_apic->data);
357} 340}
358 341
359static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) 342void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
360{ 343{
361 struct io_apic __iomem *io_apic = io_apic_base(apic); 344 struct io_apic __iomem *io_apic = io_apic_base(apic);
362 345
@@ -370,7 +353,7 @@ static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int va
370 * 353 *
371 * Older SiS APIC requires we rewrite the index register 354 * Older SiS APIC requires we rewrite the index register
372 */ 355 */
373static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) 356void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
374{ 357{
375 struct io_apic __iomem *io_apic = io_apic_base(apic); 358 struct io_apic __iomem *io_apic = io_apic_base(apic);
376 359
@@ -379,29 +362,6 @@ static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int v
379 writel(value, &io_apic->data); 362 writel(value, &io_apic->data);
380} 363}
381 364
382static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
383{
384 struct irq_pin_list *entry;
385 unsigned long flags;
386
387 raw_spin_lock_irqsave(&ioapic_lock, flags);
388 for_each_irq_pin(entry, cfg->irq_2_pin) {
389 unsigned int reg;
390 int pin;
391
392 pin = entry->pin;
393 reg = io_apic_read(entry->apic, 0x10 + pin*2);
394 /* Is the remote IRR bit set? */
395 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
396 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
397 return true;
398 }
399 }
400 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
401
402 return false;
403}
404
405union entry_union { 365union entry_union {
406 struct { u32 w1, w2; }; 366 struct { u32 w1, w2; };
407 struct IO_APIC_route_entry entry; 367 struct IO_APIC_route_entry entry;
@@ -1361,77 +1321,13 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
1361 fasteoi ? "fasteoi" : "edge"); 1321 fasteoi ? "fasteoi" : "edge");
1362} 1322}
1363 1323
1364
1365static int setup_ir_ioapic_entry(int irq,
1366 struct IR_IO_APIC_route_entry *entry,
1367 unsigned int destination, int vector,
1368 struct io_apic_irq_attr *attr)
1369{
1370 int index;
1371 struct irte irte;
1372 int ioapic_id = mpc_ioapic_id(attr->ioapic);
1373 struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id);
1374
1375 if (!iommu) {
1376 pr_warn("No mapping iommu for ioapic %d\n", ioapic_id);
1377 return -ENODEV;
1378 }
1379
1380 index = alloc_irte(iommu, irq, 1);
1381 if (index < 0) {
1382 pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id);
1383 return -ENOMEM;
1384 }
1385
1386 prepare_irte(&irte, vector, destination);
1387
1388 /* Set source-id of interrupt request */
1389 set_ioapic_sid(&irte, ioapic_id);
1390
1391 modify_irte(irq, &irte);
1392
1393 apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
1394 "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
1395 "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
1396 "Avail:%X Vector:%02X Dest:%08X "
1397 "SID:%04X SQ:%X SVT:%X)\n",
1398 attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
1399 irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
1400 irte.avail, irte.vector, irte.dest_id,
1401 irte.sid, irte.sq, irte.svt);
1402
1403 memset(entry, 0, sizeof(*entry));
1404
1405 entry->index2 = (index >> 15) & 0x1;
1406 entry->zero = 0;
1407 entry->format = 1;
1408 entry->index = (index & 0x7fff);
1409 /*
1410 * IO-APIC RTE will be configured with virtual vector.
1411 * irq handler will do the explicit EOI to the io-apic.
1412 */
1413 entry->vector = attr->ioapic_pin;
1414 entry->mask = 0; /* enable IRQ */
1415 entry->trigger = attr->trigger;
1416 entry->polarity = attr->polarity;
1417
1418 /* Mask level triggered irqs.
1419 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1420 */
1421 if (attr->trigger)
1422 entry->mask = 1;
1423
1424 return 0;
1425}
1426
1427static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, 1324static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
1428 unsigned int destination, int vector, 1325 unsigned int destination, int vector,
1429 struct io_apic_irq_attr *attr) 1326 struct io_apic_irq_attr *attr)
1430{ 1327{
1431 if (intr_remapping_enabled) 1328 if (irq_remapping_enabled)
1432 return setup_ir_ioapic_entry(irq, 1329 return setup_ioapic_remapped_entry(irq, entry, destination,
1433 (struct IR_IO_APIC_route_entry *)entry, 1330 vector, attr);
1434 destination, vector, attr);
1435 1331
1436 memset(entry, 0, sizeof(*entry)); 1332 memset(entry, 0, sizeof(*entry));
1437 1333
@@ -1588,7 +1484,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
1588{ 1484{
1589 struct IO_APIC_route_entry entry; 1485 struct IO_APIC_route_entry entry;
1590 1486
1591 if (intr_remapping_enabled) 1487 if (irq_remapping_enabled)
1592 return; 1488 return;
1593 1489
1594 memset(&entry, 0, sizeof(entry)); 1490 memset(&entry, 0, sizeof(entry));
@@ -1674,7 +1570,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1674 1570
1675 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 1571 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1676 1572
1677 if (intr_remapping_enabled) { 1573 if (irq_remapping_enabled) {
1678 printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR" 1574 printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR"
1679 " Pol Stat Indx2 Zero Vect:\n"); 1575 " Pol Stat Indx2 Zero Vect:\n");
1680 } else { 1576 } else {
@@ -1683,7 +1579,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1683 } 1579 }
1684 1580
1685 for (i = 0; i <= reg_01.bits.entries; i++) { 1581 for (i = 0; i <= reg_01.bits.entries; i++) {
1686 if (intr_remapping_enabled) { 1582 if (irq_remapping_enabled) {
1687 struct IO_APIC_route_entry entry; 1583 struct IO_APIC_route_entry entry;
1688 struct IR_IO_APIC_route_entry *ir_entry; 1584 struct IR_IO_APIC_route_entry *ir_entry;
1689 1585
@@ -2050,7 +1946,7 @@ void disable_IO_APIC(void)
2050 * IOAPIC RTE as well as interrupt-remapping table entry). 1946 * IOAPIC RTE as well as interrupt-remapping table entry).
2051 * As this gets called during crash dump, keep this simple for now. 1947 * As this gets called during crash dump, keep this simple for now.
2052 */ 1948 */
2053 if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) { 1949 if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) {
2054 struct IO_APIC_route_entry entry; 1950 struct IO_APIC_route_entry entry;
2055 1951
2056 memset(&entry, 0, sizeof(entry)); 1952 memset(&entry, 0, sizeof(entry));
@@ -2074,7 +1970,7 @@ void disable_IO_APIC(void)
2074 * Use virtual wire A mode when interrupt remapping is enabled. 1970 * Use virtual wire A mode when interrupt remapping is enabled.
2075 */ 1971 */
2076 if (cpu_has_apic || apic_from_smp_config()) 1972 if (cpu_has_apic || apic_from_smp_config())
2077 disconnect_bsp_APIC(!intr_remapping_enabled && 1973 disconnect_bsp_APIC(!irq_remapping_enabled &&
2078 ioapic_i8259.pin != -1); 1974 ioapic_i8259.pin != -1);
2079} 1975}
2080 1976
@@ -2390,71 +2286,6 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2390 return ret; 2286 return ret;
2391} 2287}
2392 2288
2393#ifdef CONFIG_IRQ_REMAP
2394
2395/*
2396 * Migrate the IO-APIC irq in the presence of intr-remapping.
2397 *
2398 * For both level and edge triggered, irq migration is a simple atomic
2399 * update(of vector and cpu destination) of IRTE and flush the hardware cache.
2400 *
2401 * For level triggered, we eliminate the io-apic RTE modification (with the
2402 * updated vector information), by using a virtual vector (io-apic pin number).
2403 * Real vector that is used for interrupting cpu will be coming from
2404 * the interrupt-remapping table entry.
2405 *
2406 * As the migration is a simple atomic update of IRTE, the same mechanism
2407 * is used to migrate MSI irq's in the presence of interrupt-remapping.
2408 */
2409static int
2410ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2411 bool force)
2412{
2413 struct irq_cfg *cfg = data->chip_data;
2414 unsigned int dest, irq = data->irq;
2415 struct irte irte;
2416
2417 if (!cpumask_intersects(mask, cpu_online_mask))
2418 return -EINVAL;
2419
2420 if (get_irte(irq, &irte))
2421 return -EBUSY;
2422
2423 if (assign_irq_vector(irq, cfg, mask))
2424 return -EBUSY;
2425
2426 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
2427
2428 irte.vector = cfg->vector;
2429 irte.dest_id = IRTE_DEST(dest);
2430
2431 /*
2432 * Atomically updates the IRTE with the new destination, vector
2433 * and flushes the interrupt entry cache.
2434 */
2435 modify_irte(irq, &irte);
2436
2437 /*
2438 * After this point, all the interrupts will start arriving
2439 * at the new destination. So, time to cleanup the previous
2440 * vector allocation.
2441 */
2442 if (cfg->move_in_progress)
2443 send_cleanup_vector(cfg);
2444
2445 cpumask_copy(data->affinity, mask);
2446 return 0;
2447}
2448
2449#else
2450static inline int
2451ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2452 bool force)
2453{
2454 return 0;
2455}
2456#endif
2457
2458asmlinkage void smp_irq_move_cleanup_interrupt(void) 2289asmlinkage void smp_irq_move_cleanup_interrupt(void)
2459{ 2290{
2460 unsigned vector, me; 2291 unsigned vector, me;
@@ -2552,6 +2383,29 @@ static void ack_apic_edge(struct irq_data *data)
2552atomic_t irq_mis_count; 2383atomic_t irq_mis_count;
2553 2384
2554#ifdef CONFIG_GENERIC_PENDING_IRQ 2385#ifdef CONFIG_GENERIC_PENDING_IRQ
2386static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
2387{
2388 struct irq_pin_list *entry;
2389 unsigned long flags;
2390
2391 raw_spin_lock_irqsave(&ioapic_lock, flags);
2392 for_each_irq_pin(entry, cfg->irq_2_pin) {
2393 unsigned int reg;
2394 int pin;
2395
2396 pin = entry->pin;
2397 reg = io_apic_read(entry->apic, 0x10 + pin*2);
2398 /* Is the remote IRR bit set? */
2399 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
2400 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2401 return true;
2402 }
2403 }
2404 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2405
2406 return false;
2407}
2408
2555static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) 2409static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
2556{ 2410{
2557 /* If we are moving the irq we need to mask it */ 2411 /* If we are moving the irq we need to mask it */
@@ -2699,7 +2553,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
2699 chip->irq_eoi = ir_ack_apic_level; 2553 chip->irq_eoi = ir_ack_apic_level;
2700 2554
2701#ifdef CONFIG_SMP 2555#ifdef CONFIG_SMP
2702 chip->irq_set_affinity = ir_ioapic_set_affinity; 2556 chip->irq_set_affinity = set_remapped_irq_affinity;
2703#endif 2557#endif
2704} 2558}
2705#endif /* CONFIG_IRQ_REMAP */ 2559#endif /* CONFIG_IRQ_REMAP */
@@ -2912,7 +2766,7 @@ static inline void __init check_timer(void)
2912 * 8259A. 2766 * 8259A.
2913 */ 2767 */
2914 if (pin1 == -1) { 2768 if (pin1 == -1) {
2915 if (intr_remapping_enabled) 2769 if (irq_remapping_enabled)
2916 panic("BIOS bug: timer not connected to IO-APIC"); 2770 panic("BIOS bug: timer not connected to IO-APIC");
2917 pin1 = pin2; 2771 pin1 = pin2;
2918 apic1 = apic2; 2772 apic1 = apic2;
@@ -2945,7 +2799,7 @@ static inline void __init check_timer(void)
2945 clear_IO_APIC_pin(0, pin1); 2799 clear_IO_APIC_pin(0, pin1);
2946 goto out; 2800 goto out;
2947 } 2801 }
2948 if (intr_remapping_enabled) 2802 if (irq_remapping_enabled)
2949 panic("timer doesn't work through Interrupt-remapped IO-APIC"); 2803 panic("timer doesn't work through Interrupt-remapped IO-APIC");
2950 local_irq_disable(); 2804 local_irq_disable();
2951 clear_IO_APIC_pin(apic1, pin1); 2805 clear_IO_APIC_pin(apic1, pin1);
@@ -3169,7 +3023,7 @@ void destroy_irq(unsigned int irq)
3169 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); 3023 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
3170 3024
3171 if (irq_remapped(cfg)) 3025 if (irq_remapped(cfg))
3172 free_irte(irq); 3026 free_remapped_irq(irq);
3173 raw_spin_lock_irqsave(&vector_lock, flags); 3027 raw_spin_lock_irqsave(&vector_lock, flags);
3174 __clear_irq_vector(irq, cfg); 3028 __clear_irq_vector(irq, cfg);
3175 raw_spin_unlock_irqrestore(&vector_lock, flags); 3029 raw_spin_unlock_irqrestore(&vector_lock, flags);
@@ -3198,54 +3052,34 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3198 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); 3052 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
3199 3053
3200 if (irq_remapped(cfg)) { 3054 if (irq_remapped(cfg)) {
3201 struct irte irte; 3055 compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
3202 int ir_index; 3056 return err;
3203 u16 sub_handle; 3057 }
3204
3205 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
3206 BUG_ON(ir_index == -1);
3207
3208 prepare_irte(&irte, cfg->vector, dest);
3209
3210 /* Set source-id of interrupt request */
3211 if (pdev)
3212 set_msi_sid(&irte, pdev);
3213 else
3214 set_hpet_sid(&irte, hpet_id);
3215
3216 modify_irte(irq, &irte);
3217 3058
3059 if (x2apic_enabled())
3060 msg->address_hi = MSI_ADDR_BASE_HI |
3061 MSI_ADDR_EXT_DEST_ID(dest);
3062 else
3218 msg->address_hi = MSI_ADDR_BASE_HI; 3063 msg->address_hi = MSI_ADDR_BASE_HI;
3219 msg->data = sub_handle;
3220 msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
3221 MSI_ADDR_IR_SHV |
3222 MSI_ADDR_IR_INDEX1(ir_index) |
3223 MSI_ADDR_IR_INDEX2(ir_index);
3224 } else {
3225 if (x2apic_enabled())
3226 msg->address_hi = MSI_ADDR_BASE_HI |
3227 MSI_ADDR_EXT_DEST_ID(dest);
3228 else
3229 msg->address_hi = MSI_ADDR_BASE_HI;
3230 3064
3231 msg->address_lo = 3065 msg->address_lo =
3232 MSI_ADDR_BASE_LO | 3066 MSI_ADDR_BASE_LO |
3233 ((apic->irq_dest_mode == 0) ? 3067 ((apic->irq_dest_mode == 0) ?
3234 MSI_ADDR_DEST_MODE_PHYSICAL: 3068 MSI_ADDR_DEST_MODE_PHYSICAL:
3235 MSI_ADDR_DEST_MODE_LOGICAL) | 3069 MSI_ADDR_DEST_MODE_LOGICAL) |
3236 ((apic->irq_delivery_mode != dest_LowestPrio) ? 3070 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3237 MSI_ADDR_REDIRECTION_CPU: 3071 MSI_ADDR_REDIRECTION_CPU:
3238 MSI_ADDR_REDIRECTION_LOWPRI) | 3072 MSI_ADDR_REDIRECTION_LOWPRI) |
3239 MSI_ADDR_DEST_ID(dest); 3073 MSI_ADDR_DEST_ID(dest);
3074
3075 msg->data =
3076 MSI_DATA_TRIGGER_EDGE |
3077 MSI_DATA_LEVEL_ASSERT |
3078 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3079 MSI_DATA_DELIVERY_FIXED:
3080 MSI_DATA_DELIVERY_LOWPRI) |
3081 MSI_DATA_VECTOR(cfg->vector);
3240 3082
3241 msg->data =
3242 MSI_DATA_TRIGGER_EDGE |
3243 MSI_DATA_LEVEL_ASSERT |
3244 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3245 MSI_DATA_DELIVERY_FIXED:
3246 MSI_DATA_DELIVERY_LOWPRI) |
3247 MSI_DATA_VECTOR(cfg->vector);
3248 }
3249 return err; 3083 return err;
3250} 3084}
3251 3085
@@ -3288,33 +3122,6 @@ static struct irq_chip msi_chip = {
3288 .irq_retrigger = ioapic_retrigger_irq, 3122 .irq_retrigger = ioapic_retrigger_irq,
3289}; 3123};
3290 3124
3291/*
3292 * Map the PCI dev to the corresponding remapping hardware unit
3293 * and allocate 'nvec' consecutive interrupt-remapping table entries
3294 * in it.
3295 */
3296static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3297{
3298 struct intel_iommu *iommu;
3299 int index;
3300
3301 iommu = map_dev_to_ir(dev);
3302 if (!iommu) {
3303 printk(KERN_ERR
3304 "Unable to map PCI %s to iommu\n", pci_name(dev));
3305 return -ENOENT;
3306 }
3307
3308 index = alloc_irte(iommu, irq, nvec);
3309 if (index < 0) {
3310 printk(KERN_ERR
3311 "Unable to allocate %d IRTE for PCI %s\n", nvec,
3312 pci_name(dev));
3313 return -ENOSPC;
3314 }
3315 return index;
3316}
3317
3318static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) 3125static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3319{ 3126{
3320 struct irq_chip *chip = &msi_chip; 3127 struct irq_chip *chip = &msi_chip;
@@ -3345,7 +3152,6 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3345 int node, ret, sub_handle, index = 0; 3152 int node, ret, sub_handle, index = 0;
3346 unsigned int irq, irq_want; 3153 unsigned int irq, irq_want;
3347 struct msi_desc *msidesc; 3154 struct msi_desc *msidesc;
3348 struct intel_iommu *iommu = NULL;
3349 3155
3350 /* x86 doesn't support multiple MSI yet */ 3156 /* x86 doesn't support multiple MSI yet */
3351 if (type == PCI_CAP_ID_MSI && nvec > 1) 3157 if (type == PCI_CAP_ID_MSI && nvec > 1)
@@ -3359,7 +3165,7 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3359 if (irq == 0) 3165 if (irq == 0)
3360 return -1; 3166 return -1;
3361 irq_want = irq + 1; 3167 irq_want = irq + 1;
3362 if (!intr_remapping_enabled) 3168 if (!irq_remapping_enabled)
3363 goto no_ir; 3169 goto no_ir;
3364 3170
3365 if (!sub_handle) { 3171 if (!sub_handle) {
@@ -3367,23 +3173,16 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3367 * allocate the consecutive block of IRTE's 3173 * allocate the consecutive block of IRTE's
3368 * for 'nvec' 3174 * for 'nvec'
3369 */ 3175 */
3370 index = msi_alloc_irte(dev, irq, nvec); 3176 index = msi_alloc_remapped_irq(dev, irq, nvec);
3371 if (index < 0) { 3177 if (index < 0) {
3372 ret = index; 3178 ret = index;
3373 goto error; 3179 goto error;
3374 } 3180 }
3375 } else { 3181 } else {
3376 iommu = map_dev_to_ir(dev); 3182 ret = msi_setup_remapped_irq(dev, irq, index,
3377 if (!iommu) { 3183 sub_handle);
3378 ret = -ENOENT; 3184 if (ret < 0)
3379 goto error; 3185 goto error;
3380 }
3381 /*
3382 * setup the mapping between the irq and the IRTE
3383 * base index, the sub_handle pointing to the
3384 * appropriate interrupt remap table entry.
3385 */
3386 set_irte_irq(irq, iommu, index, sub_handle);
3387 } 3186 }
3388no_ir: 3187no_ir:
3389 ret = setup_msi_irq(dev, msidesc, irq); 3188 ret = setup_msi_irq(dev, msidesc, irq);
@@ -3501,15 +3300,8 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3501 struct msi_msg msg; 3300 struct msi_msg msg;
3502 int ret; 3301 int ret;
3503 3302
3504 if (intr_remapping_enabled) { 3303 if (irq_remapping_enabled) {
3505 struct intel_iommu *iommu = map_hpet_to_ir(id); 3304 if (!setup_hpet_msi_remapped(irq, id))
3506 int index;
3507
3508 if (!iommu)
3509 return -1;
3510
3511 index = alloc_irte(iommu, irq, 1);
3512 if (index < 0)
3513 return -1; 3305 return -1;
3514 } 3306 }
3515 3307
@@ -3888,8 +3680,8 @@ void __init setup_ioapic_dest(void)
3888 else 3680 else
3889 mask = apic->target_cpus(); 3681 mask = apic->target_cpus();
3890 3682
3891 if (intr_remapping_enabled) 3683 if (irq_remapping_enabled)
3892 ir_ioapic_set_affinity(idata, mask, false); 3684 set_remapped_irq_affinity(idata, mask, false);
3893 else 3685 else
3894 ioapic_set_affinity(idata, mask, false); 3686 ioapic_set_affinity(idata, mask, false);
3895 } 3687 }
@@ -3931,12 +3723,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
3931 return res; 3723 return res;
3932} 3724}
3933 3725
3934void __init ioapic_and_gsi_init(void) 3726void __init native_io_apic_init_mappings(void)
3935{
3936 io_apic_ops.init();
3937}
3938
3939static void __init __ioapic_init_mappings(void)
3940{ 3727{
3941 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; 3728 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
3942 struct resource *ioapic_res; 3729 struct resource *ioapic_res;
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 00d2422ca7c9..f00a68cca37a 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -530,6 +530,7 @@ static struct apic __refdata apic_numaq = {
530 530
531 .read = native_apic_mem_read, 531 .read = native_apic_mem_read,
532 .write = native_apic_mem_write, 532 .write = native_apic_mem_write,
533 .eoi_write = native_apic_mem_write,
533 .icr_read = native_apic_icr_read, 534 .icr_read = native_apic_icr_read,
534 .icr_write = native_apic_icr_write, 535 .icr_write = native_apic_icr_write,
535 .wait_icr_idle = native_apic_wait_icr_idle, 536 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index ff2c1b9aac4d..1b291da09e60 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -142,6 +142,7 @@ static struct apic apic_default = {
142 142
143 .read = native_apic_mem_read, 143 .read = native_apic_mem_read,
144 .write = native_apic_mem_write, 144 .write = native_apic_mem_write,
145 .eoi_write = native_apic_mem_write,
145 .icr_read = native_apic_icr_read, 146 .icr_read = native_apic_icr_read,
146 .icr_write = native_apic_icr_write, 147 .icr_write = native_apic_icr_write,
147 .wait_icr_idle = native_apic_wait_icr_idle, 148 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index fea000b27f07..659897c00755 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -546,6 +546,7 @@ static struct apic apic_summit = {
546 546
547 .read = native_apic_mem_read, 547 .read = native_apic_mem_read,
548 .write = native_apic_mem_write, 548 .write = native_apic_mem_write,
549 .eoi_write = native_apic_mem_write,
549 .icr_read = native_apic_icr_read, 550 .icr_read = native_apic_icr_read,
550 .icr_write = native_apic_icr_write, 551 .icr_write = native_apic_icr_write,
551 .wait_icr_idle = native_apic_wait_icr_idle, 552 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 48f3103b3c93..ff35cff0e1a7 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -260,6 +260,7 @@ static struct apic apic_x2apic_cluster = {
260 260
261 .read = native_apic_msr_read, 261 .read = native_apic_msr_read,
262 .write = native_apic_msr_write, 262 .write = native_apic_msr_write,
263 .eoi_write = native_apic_msr_eoi_write,
263 .icr_read = native_x2apic_icr_read, 264 .icr_read = native_x2apic_icr_read,
264 .icr_write = native_x2apic_icr_write, 265 .icr_write = native_x2apic_icr_write,
265 .wait_icr_idle = native_x2apic_wait_icr_idle, 266 .wait_icr_idle = native_x2apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 8a778db45e3a..c17e982db275 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -24,6 +24,12 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
24{ 24{
25 if (x2apic_phys) 25 if (x2apic_phys)
26 return x2apic_enabled(); 26 return x2apic_enabled();
27 else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
28 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) &&
29 x2apic_enabled()) {
30 printk(KERN_DEBUG "System requires x2apic physical mode\n");
31 return 1;
32 }
27 else 33 else
28 return 0; 34 return 0;
29} 35}
@@ -166,6 +172,7 @@ static struct apic apic_x2apic_phys = {
166 172
167 .read = native_apic_msr_read, 173 .read = native_apic_msr_read,
168 .write = native_apic_msr_write, 174 .write = native_apic_msr_write,
175 .eoi_write = native_apic_msr_eoi_write,
169 .icr_read = native_x2apic_icr_read, 176 .icr_read = native_x2apic_icr_read,
170 .icr_write = native_x2apic_icr_write, 177 .icr_write = native_x2apic_icr_write,
171 .wait_icr_idle = native_x2apic_wait_icr_idle, 178 .wait_icr_idle = native_x2apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 87bfa69e216e..c6d03f7a4401 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -404,6 +404,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
404 404
405 .read = native_apic_msr_read, 405 .read = native_apic_msr_read,
406 .write = native_apic_msr_write, 406 .write = native_apic_msr_write,
407 .eoi_write = native_apic_msr_eoi_write,
407 .icr_read = native_x2apic_icr_read, 408 .icr_read = native_x2apic_icr_read,
408 .icr_write = native_x2apic_icr_write, 409 .icr_write = native_x2apic_icr_write,
409 .wait_icr_idle = native_x2apic_wait_icr_idle, 410 .wait_icr_idle = native_x2apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 459e78cbf61e..07b0c0db466c 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -2401,7 +2401,7 @@ static void __exit apm_exit(void)
2401 * (pm_idle), Wait for all processors to update cached/local 2401 * (pm_idle), Wait for all processors to update cached/local
2402 * copies of pm_idle before proceeding. 2402 * copies of pm_idle before proceeding.
2403 */ 2403 */
2404 cpu_idle_wait(); 2404 kick_all_cpus_sync();
2405 } 2405 }
2406 if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) 2406 if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
2407 && (apm_info.connection_version > 0x0100)) { 2407 && (apm_info.connection_version > 0x0100)) {
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 5da1269e8ddc..e2dbcb7dabdd 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -27,21 +27,29 @@ static int num_scan_areas;
27 27
28static __init int set_corruption_check(char *arg) 28static __init int set_corruption_check(char *arg)
29{ 29{
30 char *end; 30 ssize_t ret;
31 unsigned long val;
31 32
32 memory_corruption_check = simple_strtol(arg, &end, 10); 33 ret = kstrtoul(arg, 10, &val);
34 if (ret)
35 return ret;
33 36
34 return (*end == 0) ? 0 : -EINVAL; 37 memory_corruption_check = val;
38 return 0;
35} 39}
36early_param("memory_corruption_check", set_corruption_check); 40early_param("memory_corruption_check", set_corruption_check);
37 41
38static __init int set_corruption_check_period(char *arg) 42static __init int set_corruption_check_period(char *arg)
39{ 43{
40 char *end; 44 ssize_t ret;
45 unsigned long val;
41 46
42 corruption_check_period = simple_strtoul(arg, &end, 10); 47 ret = kstrtoul(arg, 10, &val);
48 if (ret)
49 return ret;
43 50
44 return (*end == 0) ? 0 : -EINVAL; 51 corruption_check_period = val;
52 return 0;
45} 53}
46early_param("memory_corruption_check_period", set_corruption_check_period); 54early_param("memory_corruption_check_period", set_corruption_check_period);
47 55
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 0a44b90602b0..146bb6218eec 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -26,7 +26,8 @@
26 * contact AMD for precise details and a CPU swap. 26 * contact AMD for precise details and a CPU swap.
27 * 27 *
28 * See http://www.multimania.com/poulot/k6bug.html 28 * See http://www.multimania.com/poulot/k6bug.html
29 * http://www.amd.com/K6/k6docs/revgd.html 29 * and section 2.6.2 of "AMD-K6 Processor Revision Guide - Model 6"
30 * (Publication # 21266 Issue Date: August 1998)
30 * 31 *
31 * The following test is erm.. interesting. AMD neglected to up 32 * The following test is erm.. interesting. AMD neglected to up
32 * the chip setting when fixing the bug but they also tweaked some 33 * the chip setting when fixing the bug but they also tweaked some
@@ -94,7 +95,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
94 "system stability may be impaired when more than 32 MB are used.\n"); 95 "system stability may be impaired when more than 32 MB are used.\n");
95 else 96 else
96 printk(KERN_CONT "probably OK (after B9730xxxx).\n"); 97 printk(KERN_CONT "probably OK (after B9730xxxx).\n");
97 printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
98 } 98 }
99 99
100 /* K6 with old style WHCR */ 100 /* K6 with old style WHCR */
@@ -353,10 +353,11 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
353 node = per_cpu(cpu_llc_id, cpu); 353 node = per_cpu(cpu_llc_id, cpu);
354 354
355 /* 355 /*
356 * If core numbers are inconsistent, it's likely a multi-fabric platform, 356 * On multi-fabric platform (e.g. Numascale NumaChip) a
357 * so invoke platform-specific handler 357 * platform-specific handler needs to be called to fixup some
358 * IDs of the CPU.
358 */ 359 */
359 if (c->phys_proc_id != node) 360 if (x86_cpuinit.fixup_cpu_id)
360 x86_cpuinit.fixup_cpu_id(c, node); 361 x86_cpuinit.fixup_cpu_id(c, node);
361 362
362 if (!node_online(node)) { 363 if (!node_online(node)) {
@@ -579,6 +580,24 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
579 } 580 }
580 } 581 }
581 582
583 /* re-enable TopologyExtensions if switched off by BIOS */
584 if ((c->x86 == 0x15) &&
585 (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
586 !cpu_has(c, X86_FEATURE_TOPOEXT)) {
587 u64 val;
588
589 if (!rdmsrl_amd_safe(0xc0011005, &val)) {
590 val |= 1ULL << 54;
591 wrmsrl_amd_safe(0xc0011005, val);
592 rdmsrl(0xc0011005, val);
593 if (val & (1ULL << 54)) {
594 set_cpu_cap(c, X86_FEATURE_TOPOEXT);
595 printk(KERN_INFO FW_INFO "CPU: Re-enabling "
596 "disabled Topology Extensions Support\n");
597 }
598 }
599 }
600
582 cpu_detect_cache_sizes(c); 601 cpu_detect_cache_sizes(c);
583 602
584 /* Multi core CPU? */ 603 /* Multi core CPU? */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 67e258362a3d..82f29e70d058 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1163,15 +1163,6 @@ static void dbg_restore_debug_regs(void)
1163#endif /* ! CONFIG_KGDB */ 1163#endif /* ! CONFIG_KGDB */
1164 1164
1165/* 1165/*
1166 * Prints an error where the NUMA and configured core-number mismatch and the
1167 * platform didn't override this to fix it up
1168 */
1169void __cpuinit x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node)
1170{
1171 pr_err("NUMA core number %d differs from configured core number %d\n", node, c->phys_proc_id);
1172}
1173
1174/*
1175 * cpu_init() initializes state that is per-CPU. Some data is already 1166 * cpu_init() initializes state that is per-CPU. Some data is already
1176 * initialized (naturally) in the bootstrap process, such as the GDT 1167 * initialized (naturally) in the bootstrap process, such as the GDT
1177 * and IDT. We reload them nevertheless, this function acts as a 1168 * and IDT. We reload them nevertheless, this function acts as a
@@ -1194,7 +1185,7 @@ void __cpuinit cpu_init(void)
1194 oist = &per_cpu(orig_ist, cpu); 1185 oist = &per_cpu(orig_ist, cpu);
1195 1186
1196#ifdef CONFIG_NUMA 1187#ifdef CONFIG_NUMA
1197 if (cpu != 0 && percpu_read(numa_node) == 0 && 1188 if (cpu != 0 && this_cpu_read(numa_node) == 0 &&
1198 early_cpu_to_node(cpu) != NUMA_NO_NODE) 1189 early_cpu_to_node(cpu) != NUMA_NO_NODE)
1199 set_numa_node(early_cpu_to_node(cpu)); 1190 set_numa_node(early_cpu_to_node(cpu));
1200#endif 1191#endif
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 73d08ed98a64..9a7c90d80bc4 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -433,14 +433,14 @@ int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot,
433 /* check if @slot is already used or the index is already disabled */ 433 /* check if @slot is already used or the index is already disabled */
434 ret = amd_get_l3_disable_slot(nb, slot); 434 ret = amd_get_l3_disable_slot(nb, slot);
435 if (ret >= 0) 435 if (ret >= 0)
436 return -EINVAL; 436 return -EEXIST;
437 437
438 if (index > nb->l3_cache.indices) 438 if (index > nb->l3_cache.indices)
439 return -EINVAL; 439 return -EINVAL;
440 440
441 /* check whether the other slot has disabled the same index already */ 441 /* check whether the other slot has disabled the same index already */
442 if (index == amd_get_l3_disable_slot(nb, !slot)) 442 if (index == amd_get_l3_disable_slot(nb, !slot))
443 return -EINVAL; 443 return -EEXIST;
444 444
445 amd_l3_disable_index(nb, cpu, slot, index); 445 amd_l3_disable_index(nb, cpu, slot, index);
446 446
@@ -468,8 +468,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
468 err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val); 468 err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
469 if (err) { 469 if (err) {
470 if (err == -EEXIST) 470 if (err == -EEXIST)
471 printk(KERN_WARNING "L3 disable slot %d in use!\n", 471 pr_warning("L3 slot %d in use/index already disabled!\n",
472 slot); 472 slot);
473 return err; 473 return err;
474 } 474 }
475 return count; 475 return count;
@@ -615,14 +615,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
615 new_l2 = this_leaf.size/1024; 615 new_l2 = this_leaf.size/1024;
616 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 616 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
617 index_msb = get_count_order(num_threads_sharing); 617 index_msb = get_count_order(num_threads_sharing);
618 l2_id = c->apicid >> index_msb; 618 l2_id = c->apicid & ~((1 << index_msb) - 1);
619 break; 619 break;
620 case 3: 620 case 3:
621 new_l3 = this_leaf.size/1024; 621 new_l3 = this_leaf.size/1024;
622 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 622 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
623 index_msb = get_count_order( 623 index_msb = get_count_order(
624 num_threads_sharing); 624 num_threads_sharing);
625 l3_id = c->apicid >> index_msb; 625 l3_id = c->apicid & ~((1 << index_msb) - 1);
626 break; 626 break;
627 default: 627 default:
628 break; 628 break;
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 5502b289341b..36565373af87 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -23,7 +23,7 @@
23 * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor) 23 * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor)
24 * 24 *
25 * Arrays used to match for this should also be declared using 25 * Arrays used to match for this should also be declared using
26 * MODULE_DEVICE_TABLE(x86_cpu, ...) 26 * MODULE_DEVICE_TABLE(x86cpu, ...)
27 * 27 *
28 * This always matches against the boot cpu, assuming models and features are 28 * This always matches against the boot cpu, assuming models and features are
29 * consistent over all CPUs. 29 * consistent over all CPUs.
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d086a09c087d..2afcbd253e1d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -583,7 +583,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
583 struct mce m; 583 struct mce m;
584 int i; 584 int i;
585 585
586 percpu_inc(mce_poll_count); 586 this_cpu_inc(mce_poll_count);
587 587
588 mce_gather_info(&m, NULL); 588 mce_gather_info(&m, NULL);
589 589
@@ -945,9 +945,10 @@ struct mce_info {
945 atomic_t inuse; 945 atomic_t inuse;
946 struct task_struct *t; 946 struct task_struct *t;
947 __u64 paddr; 947 __u64 paddr;
948 int restartable;
948} mce_info[MCE_INFO_MAX]; 949} mce_info[MCE_INFO_MAX];
949 950
950static void mce_save_info(__u64 addr) 951static void mce_save_info(__u64 addr, int c)
951{ 952{
952 struct mce_info *mi; 953 struct mce_info *mi;
953 954
@@ -955,6 +956,7 @@ static void mce_save_info(__u64 addr)
955 if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { 956 if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
956 mi->t = current; 957 mi->t = current;
957 mi->paddr = addr; 958 mi->paddr = addr;
959 mi->restartable = c;
958 return; 960 return;
959 } 961 }
960 } 962 }
@@ -1015,7 +1017,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1015 1017
1016 atomic_inc(&mce_entry); 1018 atomic_inc(&mce_entry);
1017 1019
1018 percpu_inc(mce_exception_count); 1020 this_cpu_inc(mce_exception_count);
1019 1021
1020 if (!banks) 1022 if (!banks)
1021 goto out; 1023 goto out;
@@ -1130,7 +1132,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1130 mce_panic("Fatal machine check on current CPU", &m, msg); 1132 mce_panic("Fatal machine check on current CPU", &m, msg);
1131 if (worst == MCE_AR_SEVERITY) { 1133 if (worst == MCE_AR_SEVERITY) {
1132 /* schedule action before return to userland */ 1134 /* schedule action before return to userland */
1133 mce_save_info(m.addr); 1135 mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV);
1134 set_thread_flag(TIF_MCE_NOTIFY); 1136 set_thread_flag(TIF_MCE_NOTIFY);
1135 } else if (kill_it) { 1137 } else if (kill_it) {
1136 force_sig(SIGBUS, current); 1138 force_sig(SIGBUS, current);
@@ -1179,7 +1181,13 @@ void mce_notify_process(void)
1179 1181
1180 pr_err("Uncorrected hardware memory error in user-access at %llx", 1182 pr_err("Uncorrected hardware memory error in user-access at %llx",
1181 mi->paddr); 1183 mi->paddr);
1182 if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) { 1184 /*
1185 * We must call memory_failure() here even if the current process is
1186 * doomed. We still need to mark the page as poisoned and alert any
1187 * other users of the page.
1188 */
1189 if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 ||
1190 mi->restartable == 0) {
1183 pr_err("Memory error not recovered"); 1191 pr_err("Memory error not recovered");
1184 force_sig(SIGBUS, current); 1192 force_sig(SIGBUS, current);
1185 } 1193 }
@@ -1423,6 +1431,43 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1423 */ 1431 */
1424 if (c->x86 == 6 && banks > 0) 1432 if (c->x86 == 6 && banks > 0)
1425 mce_banks[0].ctl = 0; 1433 mce_banks[0].ctl = 0;
1434
1435 /*
1436 * Turn off MC4_MISC thresholding banks on those models since
1437 * they're not supported there.
1438 */
1439 if (c->x86 == 0x15 &&
1440 (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
1441 int i;
1442 u64 val, hwcr;
1443 bool need_toggle;
1444 u32 msrs[] = {
1445 0x00000413, /* MC4_MISC0 */
1446 0xc0000408, /* MC4_MISC1 */
1447 };
1448
1449 rdmsrl(MSR_K7_HWCR, hwcr);
1450
1451 /* McStatusWrEn has to be set */
1452 need_toggle = !(hwcr & BIT(18));
1453
1454 if (need_toggle)
1455 wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
1456
1457 for (i = 0; i < ARRAY_SIZE(msrs); i++) {
1458 rdmsrl(msrs[i], val);
1459
1460 /* CntP bit set? */
1461 if (val & BIT(62)) {
1462 val &= ~BIT(62);
1463 wrmsrl(msrs[i], val);
1464 }
1465 }
1466
1467 /* restore old settings */
1468 if (need_toggle)
1469 wrmsrl(MSR_K7_HWCR, hwcr);
1470 }
1426 } 1471 }
1427 1472
1428 if (c->x86_vendor == X86_VENDOR_INTEL) { 1473 if (c->x86_vendor == X86_VENDOR_INTEL) {
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 99b57179f912..f4873a64f46d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -51,6 +51,7 @@ struct threshold_block {
51 unsigned int cpu; 51 unsigned int cpu;
52 u32 address; 52 u32 address;
53 u16 interrupt_enable; 53 u16 interrupt_enable;
54 bool interrupt_capable;
54 u16 threshold_limit; 55 u16 threshold_limit;
55 struct kobject kobj; 56 struct kobject kobj;
56 struct list_head miscj; 57 struct list_head miscj;
@@ -83,6 +84,21 @@ struct thresh_restart {
83 u16 old_limit; 84 u16 old_limit;
84}; 85};
85 86
87static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
88{
89 /*
90 * bank 4 supports APIC LVT interrupts implicitly since forever.
91 */
92 if (bank == 4)
93 return true;
94
95 /*
96 * IntP: interrupt present; if this bit is set, the thresholding
97 * bank can generate APIC LVT interrupts
98 */
99 return msr_high_bits & BIT(28);
100}
101
86static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) 102static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
87{ 103{
88 int msr = (hi & MASK_LVTOFF_HI) >> 20; 104 int msr = (hi & MASK_LVTOFF_HI) >> 20;
@@ -104,8 +120,10 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
104 return 1; 120 return 1;
105}; 121};
106 122
107/* must be called with correct cpu affinity */ 123/*
108/* Called via smp_call_function_single() */ 124 * Called via smp_call_function_single(), must be called with correct
125 * cpu affinity.
126 */
109static void threshold_restart_bank(void *_tr) 127static void threshold_restart_bank(void *_tr)
110{ 128{
111 struct thresh_restart *tr = _tr; 129 struct thresh_restart *tr = _tr;
@@ -128,6 +146,12 @@ static void threshold_restart_bank(void *_tr)
128 (new_count & THRESHOLD_MAX); 146 (new_count & THRESHOLD_MAX);
129 } 147 }
130 148
149 /* clear IntType */
150 hi &= ~MASK_INT_TYPE_HI;
151
152 if (!tr->b->interrupt_capable)
153 goto done;
154
131 if (tr->set_lvt_off) { 155 if (tr->set_lvt_off) {
132 if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) { 156 if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
133 /* set new lvt offset */ 157 /* set new lvt offset */
@@ -136,9 +160,10 @@ static void threshold_restart_bank(void *_tr)
136 } 160 }
137 } 161 }
138 162
139 tr->b->interrupt_enable ? 163 if (tr->b->interrupt_enable)
140 (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : 164 hi |= INT_TYPE_APIC;
141 (hi &= ~MASK_INT_TYPE_HI); 165
166 done:
142 167
143 hi |= MASK_COUNT_EN_HI; 168 hi |= MASK_COUNT_EN_HI;
144 wrmsr(tr->b->address, lo, hi); 169 wrmsr(tr->b->address, lo, hi);
@@ -202,14 +227,17 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
202 if (shared_bank[bank] && c->cpu_core_id) 227 if (shared_bank[bank] && c->cpu_core_id)
203 break; 228 break;
204 229
205 offset = setup_APIC_mce(offset,
206 (high & MASK_LVTOFF_HI) >> 20);
207
208 memset(&b, 0, sizeof(b)); 230 memset(&b, 0, sizeof(b));
209 b.cpu = cpu; 231 b.cpu = cpu;
210 b.bank = bank; 232 b.bank = bank;
211 b.block = block; 233 b.block = block;
212 b.address = address; 234 b.address = address;
235 b.interrupt_capable = lvt_interrupt_supported(bank, high);
236
237 if (b.interrupt_capable) {
238 int new = (high & MASK_LVTOFF_HI) >> 20;
239 offset = setup_APIC_mce(offset, new);
240 }
213 241
214 mce_threshold_block_init(&b, offset); 242 mce_threshold_block_init(&b, offset);
215 mce_threshold_vector = amd_threshold_interrupt; 243 mce_threshold_vector = amd_threshold_interrupt;
@@ -309,6 +337,9 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
309 struct thresh_restart tr; 337 struct thresh_restart tr;
310 unsigned long new; 338 unsigned long new;
311 339
340 if (!b->interrupt_capable)
341 return -EINVAL;
342
312 if (strict_strtoul(buf, 0, &new) < 0) 343 if (strict_strtoul(buf, 0, &new) < 0)
313 return -EINVAL; 344 return -EINVAL;
314 345
@@ -390,10 +421,10 @@ RW_ATTR(threshold_limit);
390RW_ATTR(error_count); 421RW_ATTR(error_count);
391 422
392static struct attribute *default_attrs[] = { 423static struct attribute *default_attrs[] = {
393 &interrupt_enable.attr,
394 &threshold_limit.attr, 424 &threshold_limit.attr,
395 &error_count.attr, 425 &error_count.attr,
396 NULL 426 NULL, /* possibly interrupt_enable if supported, see below */
427 NULL,
397}; 428};
398 429
399#define to_block(k) container_of(k, struct threshold_block, kobj) 430#define to_block(k) container_of(k, struct threshold_block, kobj)
@@ -467,8 +498,14 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
467 b->cpu = cpu; 498 b->cpu = cpu;
468 b->address = address; 499 b->address = address;
469 b->interrupt_enable = 0; 500 b->interrupt_enable = 0;
501 b->interrupt_capable = lvt_interrupt_supported(bank, high);
470 b->threshold_limit = THRESHOLD_MAX; 502 b->threshold_limit = THRESHOLD_MAX;
471 503
504 if (b->interrupt_capable)
505 threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
506 else
507 threshold_ktype.default_attrs[2] = NULL;
508
472 INIT_LIST_HEAD(&b->miscj); 509 INIT_LIST_HEAD(&b->miscj);
473 510
474 if (per_cpu(threshold_banks, cpu)[bank]->blocks) { 511 if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index bb8e03407e18..e049d6da0183 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -484,9 +484,6 @@ static int __x86_pmu_event_init(struct perf_event *event)
484 484
485 /* mark unused */ 485 /* mark unused */
486 event->hw.extra_reg.idx = EXTRA_REG_NONE; 486 event->hw.extra_reg.idx = EXTRA_REG_NONE;
487
488 /* mark not used */
489 event->hw.extra_reg.idx = EXTRA_REG_NONE;
490 event->hw.branch_reg.idx = EXTRA_REG_NONE; 487 event->hw.branch_reg.idx = EXTRA_REG_NONE;
491 488
492 return x86_pmu.hw_config(event); 489 return x86_pmu.hw_config(event);
@@ -1186,8 +1183,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
1186 int idx, handled = 0; 1183 int idx, handled = 0;
1187 u64 val; 1184 u64 val;
1188 1185
1189 perf_sample_data_init(&data, 0);
1190
1191 cpuc = &__get_cpu_var(cpu_hw_events); 1186 cpuc = &__get_cpu_var(cpu_hw_events);
1192 1187
1193 /* 1188 /*
@@ -1222,7 +1217,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
1222 * event overflow 1217 * event overflow
1223 */ 1218 */
1224 handled++; 1219 handled++;
1225 data.period = event->hw.last_period; 1220 perf_sample_data_init(&data, 0, event->hw.last_period);
1226 1221
1227 if (!x86_perf_event_set_period(event)) 1222 if (!x86_perf_event_set_period(event))
1228 continue; 1223 continue;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 95e7fe1c5f0b..65652265fffd 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event)
134 134
135static int amd_pmu_hw_config(struct perf_event *event) 135static int amd_pmu_hw_config(struct perf_event *event)
136{ 136{
137 int ret = x86_pmu_hw_config(event); 137 int ret;
138 138
139 /* pass precise event sampling to ibs: */
140 if (event->attr.precise_ip && get_ibs_caps())
141 return -ENOENT;
142
143 ret = x86_pmu_hw_config(event);
139 if (ret) 144 if (ret)
140 return ret; 145 return ret;
141 146
@@ -205,10 +210,8 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
205 * when we come here 210 * when we come here
206 */ 211 */
207 for (i = 0; i < x86_pmu.num_counters; i++) { 212 for (i = 0; i < x86_pmu.num_counters; i++) {
208 if (nb->owners[i] == event) { 213 if (cmpxchg(nb->owners + i, event, NULL) == event)
209 cmpxchg(nb->owners+i, event, NULL);
210 break; 214 break;
211 }
212 } 215 }
213} 216}
214 217
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 3b8a2d30d14e..da9bcdcd9856 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -9,6 +9,7 @@
9#include <linux/perf_event.h> 9#include <linux/perf_event.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <linux/ptrace.h>
12 13
13#include <asm/apic.h> 14#include <asm/apic.h>
14 15
@@ -16,36 +17,591 @@ static u32 ibs_caps;
16 17
17#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) 18#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
18 19
19static struct pmu perf_ibs; 20#include <linux/kprobes.h>
21#include <linux/hardirq.h>
22
23#include <asm/nmi.h>
24
25#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
26#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
27
28enum ibs_states {
29 IBS_ENABLED = 0,
30 IBS_STARTED = 1,
31 IBS_STOPPING = 2,
32
33 IBS_MAX_STATES,
34};
35
36struct cpu_perf_ibs {
37 struct perf_event *event;
38 unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
39};
40
41struct perf_ibs {
42 struct pmu pmu;
43 unsigned int msr;
44 u64 config_mask;
45 u64 cnt_mask;
46 u64 enable_mask;
47 u64 valid_mask;
48 u64 max_period;
49 unsigned long offset_mask[1];
50 int offset_max;
51 struct cpu_perf_ibs __percpu *pcpu;
52 u64 (*get_count)(u64 config);
53};
54
55struct perf_ibs_data {
56 u32 size;
57 union {
58 u32 data[0]; /* data buffer starts here */
59 u32 caps;
60 };
61 u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
62};
63
64static int
65perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
66{
67 s64 left = local64_read(&hwc->period_left);
68 s64 period = hwc->sample_period;
69 int overflow = 0;
70
71 /*
72 * If we are way outside a reasonable range then just skip forward:
73 */
74 if (unlikely(left <= -period)) {
75 left = period;
76 local64_set(&hwc->period_left, left);
77 hwc->last_period = period;
78 overflow = 1;
79 }
80
81 if (unlikely(left < (s64)min)) {
82 left += period;
83 local64_set(&hwc->period_left, left);
84 hwc->last_period = period;
85 overflow = 1;
86 }
87
88 /*
89 * If the hw period that triggers the sw overflow is too short
90 * we might hit the irq handler. This biases the results.
91 * Thus we shorten the next-to-last period and set the last
92 * period to the max period.
93 */
94 if (left > max) {
95 left -= max;
96 if (left > max)
97 left = max;
98 else if (left < min)
99 left = min;
100 }
101
102 *hw_period = (u64)left;
103
104 return overflow;
105}
106
107static int
108perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
109{
110 struct hw_perf_event *hwc = &event->hw;
111 int shift = 64 - width;
112 u64 prev_raw_count;
113 u64 delta;
114
115 /*
116 * Careful: an NMI might modify the previous event value.
117 *
118 * Our tactic to handle this is to first atomically read and
119 * exchange a new raw count - then add that new-prev delta
120 * count to the generic event atomically:
121 */
122 prev_raw_count = local64_read(&hwc->prev_count);
123 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
124 new_raw_count) != prev_raw_count)
125 return 0;
126
127 /*
128 * Now we have the new raw value and have updated the prev
129 * timestamp already. We can now calculate the elapsed delta
130 * (event-)time and add that to the generic event.
131 *
132 * Careful, not all hw sign-extends above the physical width
133 * of the count.
134 */
135 delta = (new_raw_count << shift) - (prev_raw_count << shift);
136 delta >>= shift;
137
138 local64_add(delta, &event->count);
139 local64_sub(delta, &hwc->period_left);
140
141 return 1;
142}
143
144static struct perf_ibs perf_ibs_fetch;
145static struct perf_ibs perf_ibs_op;
146
147static struct perf_ibs *get_ibs_pmu(int type)
148{
149 if (perf_ibs_fetch.pmu.type == type)
150 return &perf_ibs_fetch;
151 if (perf_ibs_op.pmu.type == type)
152 return &perf_ibs_op;
153 return NULL;
154}
155
156/*
157 * Use IBS for precise event sampling:
158 *
159 * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
160 * perf record -a -e r076:p ... # same as -e cpu-cycles:p
161 * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
162 *
163 * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
164 * MSRC001_1033) is used to select either cycle or micro-ops counting
165 * mode.
166 *
167 * The rip of IBS samples has skid 0. Thus, IBS supports precise
168 * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
169 * rip is invalid when IBS was not able to record the rip correctly.
170 * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
171 *
172 */
173static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
174{
175 switch (event->attr.precise_ip) {
176 case 0:
177 return -ENOENT;
178 case 1:
179 case 2:
180 break;
181 default:
182 return -EOPNOTSUPP;
183 }
184
185 switch (event->attr.type) {
186 case PERF_TYPE_HARDWARE:
187 switch (event->attr.config) {
188 case PERF_COUNT_HW_CPU_CYCLES:
189 *config = 0;
190 return 0;
191 }
192 break;
193 case PERF_TYPE_RAW:
194 switch (event->attr.config) {
195 case 0x0076:
196 *config = 0;
197 return 0;
198 case 0x00C1:
199 *config = IBS_OP_CNT_CTL;
200 return 0;
201 }
202 break;
203 default:
204 return -ENOENT;
205 }
206
207 return -EOPNOTSUPP;
208}
20 209
21static int perf_ibs_init(struct perf_event *event) 210static int perf_ibs_init(struct perf_event *event)
22{ 211{
23 if (perf_ibs.type != event->attr.type) 212 struct hw_perf_event *hwc = &event->hw;
213 struct perf_ibs *perf_ibs;
214 u64 max_cnt, config;
215 int ret;
216
217 perf_ibs = get_ibs_pmu(event->attr.type);
218 if (perf_ibs) {
219 config = event->attr.config;
220 } else {
221 perf_ibs = &perf_ibs_op;
222 ret = perf_ibs_precise_event(event, &config);
223 if (ret)
224 return ret;
225 }
226
227 if (event->pmu != &perf_ibs->pmu)
24 return -ENOENT; 228 return -ENOENT;
229
230 if (config & ~perf_ibs->config_mask)
231 return -EINVAL;
232
233 if (hwc->sample_period) {
234 if (config & perf_ibs->cnt_mask)
235 /* raw max_cnt may not be set */
236 return -EINVAL;
237 if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
238 /*
239 * lower 4 bits can not be set in ibs max cnt,
240 * but allowing it in case we adjust the
241 * sample period to set a frequency.
242 */
243 return -EINVAL;
244 hwc->sample_period &= ~0x0FULL;
245 if (!hwc->sample_period)
246 hwc->sample_period = 0x10;
247 } else {
248 max_cnt = config & perf_ibs->cnt_mask;
249 config &= ~perf_ibs->cnt_mask;
250 event->attr.sample_period = max_cnt << 4;
251 hwc->sample_period = event->attr.sample_period;
252 }
253
254 if (!hwc->sample_period)
255 return -EINVAL;
256
257 /*
258 * If we modify hwc->sample_period, we also need to update
259 * hwc->last_period and hwc->period_left.
260 */
261 hwc->last_period = hwc->sample_period;
262 local64_set(&hwc->period_left, hwc->sample_period);
263
264 hwc->config_base = perf_ibs->msr;
265 hwc->config = config;
266
25 return 0; 267 return 0;
26} 268}
27 269
270static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
271 struct hw_perf_event *hwc, u64 *period)
272{
273 int overflow;
274
275 /* ignore lower 4 bits in min count: */
276 overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
277 local64_set(&hwc->prev_count, 0);
278
279 return overflow;
280}
281
282static u64 get_ibs_fetch_count(u64 config)
283{
284 return (config & IBS_FETCH_CNT) >> 12;
285}
286
287static u64 get_ibs_op_count(u64 config)
288{
289 u64 count = 0;
290
291 if (config & IBS_OP_VAL)
292 count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
293
294 if (ibs_caps & IBS_CAPS_RDWROPCNT)
295 count += (config & IBS_OP_CUR_CNT) >> 32;
296
297 return count;
298}
299
300static void
301perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
302 u64 *config)
303{
304 u64 count = perf_ibs->get_count(*config);
305
306 /*
307 * Set width to 64 since we do not overflow on max width but
308 * instead on max count. In perf_ibs_set_period() we clear
309 * prev count manually on overflow.
310 */
311 while (!perf_event_try_update(event, count, 64)) {
312 rdmsrl(event->hw.config_base, *config);
313 count = perf_ibs->get_count(*config);
314 }
315}
316
317static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
318 struct hw_perf_event *hwc, u64 config)
319{
320 wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
321}
322
323/*
324 * Erratum #420 Instruction-Based Sampling Engine May Generate
325 * Interrupt that Cannot Be Cleared:
326 *
327 * Must clear counter mask first, then clear the enable bit. See
328 * Revision Guide for AMD Family 10h Processors, Publication #41322.
329 */
330static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
331 struct hw_perf_event *hwc, u64 config)
332{
333 config &= ~perf_ibs->cnt_mask;
334 wrmsrl(hwc->config_base, config);
335 config &= ~perf_ibs->enable_mask;
336 wrmsrl(hwc->config_base, config);
337}
338
339/*
340 * We cannot restore the ibs pmu state, so we always needs to update
341 * the event while stopping it and then reset the state when starting
342 * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
343 * perf_ibs_start()/perf_ibs_stop() and instead always do it.
344 */
345static void perf_ibs_start(struct perf_event *event, int flags)
346{
347 struct hw_perf_event *hwc = &event->hw;
348 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
349 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
350 u64 period;
351
352 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
353 return;
354
355 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
356 hwc->state = 0;
357
358 perf_ibs_set_period(perf_ibs, hwc, &period);
359 set_bit(IBS_STARTED, pcpu->state);
360 perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
361
362 perf_event_update_userpage(event);
363}
364
365static void perf_ibs_stop(struct perf_event *event, int flags)
366{
367 struct hw_perf_event *hwc = &event->hw;
368 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
369 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
370 u64 config;
371 int stopping;
372
373 stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
374
375 if (!stopping && (hwc->state & PERF_HES_UPTODATE))
376 return;
377
378 rdmsrl(hwc->config_base, config);
379
380 if (stopping) {
381 set_bit(IBS_STOPPING, pcpu->state);
382 perf_ibs_disable_event(perf_ibs, hwc, config);
383 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
384 hwc->state |= PERF_HES_STOPPED;
385 }
386
387 if (hwc->state & PERF_HES_UPTODATE)
388 return;
389
390 /*
391 * Clear valid bit to not count rollovers on update, rollovers
392 * are only updated in the irq handler.
393 */
394 config &= ~perf_ibs->valid_mask;
395
396 perf_ibs_event_update(perf_ibs, event, &config);
397 hwc->state |= PERF_HES_UPTODATE;
398}
399
28static int perf_ibs_add(struct perf_event *event, int flags) 400static int perf_ibs_add(struct perf_event *event, int flags)
29{ 401{
402 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
403 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
404
405 if (test_and_set_bit(IBS_ENABLED, pcpu->state))
406 return -ENOSPC;
407
408 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
409
410 pcpu->event = event;
411
412 if (flags & PERF_EF_START)
413 perf_ibs_start(event, PERF_EF_RELOAD);
414
30 return 0; 415 return 0;
31} 416}
32 417
33static void perf_ibs_del(struct perf_event *event, int flags) 418static void perf_ibs_del(struct perf_event *event, int flags)
34{ 419{
420 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
421 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
422
423 if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
424 return;
425
426 perf_ibs_stop(event, PERF_EF_UPDATE);
427
428 pcpu->event = NULL;
429
430 perf_event_update_userpage(event);
35} 431}
36 432
37static struct pmu perf_ibs = { 433static void perf_ibs_read(struct perf_event *event) { }
38 .event_init= perf_ibs_init, 434
39 .add= perf_ibs_add, 435static struct perf_ibs perf_ibs_fetch = {
40 .del= perf_ibs_del, 436 .pmu = {
437 .task_ctx_nr = perf_invalid_context,
438
439 .event_init = perf_ibs_init,
440 .add = perf_ibs_add,
441 .del = perf_ibs_del,
442 .start = perf_ibs_start,
443 .stop = perf_ibs_stop,
444 .read = perf_ibs_read,
445 },
446 .msr = MSR_AMD64_IBSFETCHCTL,
447 .config_mask = IBS_FETCH_CONFIG_MASK,
448 .cnt_mask = IBS_FETCH_MAX_CNT,
449 .enable_mask = IBS_FETCH_ENABLE,
450 .valid_mask = IBS_FETCH_VAL,
451 .max_period = IBS_FETCH_MAX_CNT << 4,
452 .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
453 .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
454
455 .get_count = get_ibs_fetch_count,
41}; 456};
42 457
458static struct perf_ibs perf_ibs_op = {
459 .pmu = {
460 .task_ctx_nr = perf_invalid_context,
461
462 .event_init = perf_ibs_init,
463 .add = perf_ibs_add,
464 .del = perf_ibs_del,
465 .start = perf_ibs_start,
466 .stop = perf_ibs_stop,
467 .read = perf_ibs_read,
468 },
469 .msr = MSR_AMD64_IBSOPCTL,
470 .config_mask = IBS_OP_CONFIG_MASK,
471 .cnt_mask = IBS_OP_MAX_CNT,
472 .enable_mask = IBS_OP_ENABLE,
473 .valid_mask = IBS_OP_VAL,
474 .max_period = IBS_OP_MAX_CNT << 4,
475 .offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
476 .offset_max = MSR_AMD64_IBSOP_REG_COUNT,
477
478 .get_count = get_ibs_op_count,
479};
480
481static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
482{
483 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
484 struct perf_event *event = pcpu->event;
485 struct hw_perf_event *hwc = &event->hw;
486 struct perf_sample_data data;
487 struct perf_raw_record raw;
488 struct pt_regs regs;
489 struct perf_ibs_data ibs_data;
490 int offset, size, check_rip, offset_max, throttle = 0;
491 unsigned int msr;
492 u64 *buf, *config, period;
493
494 if (!test_bit(IBS_STARTED, pcpu->state)) {
495 /*
496 * Catch spurious interrupts after stopping IBS: After
497 * disabling IBS there could be still incomming NMIs
498 * with samples that even have the valid bit cleared.
499 * Mark all this NMIs as handled.
500 */
501 return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
502 }
503
504 msr = hwc->config_base;
505 buf = ibs_data.regs;
506 rdmsrl(msr, *buf);
507 if (!(*buf++ & perf_ibs->valid_mask))
508 return 0;
509
510 config = &ibs_data.regs[0];
511 perf_ibs_event_update(perf_ibs, event, config);
512 perf_sample_data_init(&data, 0, hwc->last_period);
513 if (!perf_ibs_set_period(perf_ibs, hwc, &period))
514 goto out; /* no sw counter overflow */
515
516 ibs_data.caps = ibs_caps;
517 size = 1;
518 offset = 1;
519 check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
520 if (event->attr.sample_type & PERF_SAMPLE_RAW)
521 offset_max = perf_ibs->offset_max;
522 else if (check_rip)
523 offset_max = 2;
524 else
525 offset_max = 1;
526 do {
527 rdmsrl(msr + offset, *buf++);
528 size++;
529 offset = find_next_bit(perf_ibs->offset_mask,
530 perf_ibs->offset_max,
531 offset + 1);
532 } while (offset < offset_max);
533 ibs_data.size = sizeof(u64) * size;
534
535 regs = *iregs;
536 if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
537 regs.flags &= ~PERF_EFLAGS_EXACT;
538 } else {
539 instruction_pointer_set(&regs, ibs_data.regs[1]);
540 regs.flags |= PERF_EFLAGS_EXACT;
541 }
542
543 if (event->attr.sample_type & PERF_SAMPLE_RAW) {
544 raw.size = sizeof(u32) + ibs_data.size;
545 raw.data = ibs_data.data;
546 data.raw = &raw;
547 }
548
549 throttle = perf_event_overflow(event, &data, &regs);
550out:
551 if (throttle)
552 perf_ibs_disable_event(perf_ibs, hwc, *config);
553 else
554 perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
555
556 perf_event_update_userpage(event);
557
558 return 1;
559}
560
561static int __kprobes
562perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
563{
564 int handled = 0;
565
566 handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
567 handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
568
569 if (handled)
570 inc_irq_stat(apic_perf_irqs);
571
572 return handled;
573}
574
575static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
576{
577 struct cpu_perf_ibs __percpu *pcpu;
578 int ret;
579
580 pcpu = alloc_percpu(struct cpu_perf_ibs);
581 if (!pcpu)
582 return -ENOMEM;
583
584 perf_ibs->pcpu = pcpu;
585
586 ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
587 if (ret) {
588 perf_ibs->pcpu = NULL;
589 free_percpu(pcpu);
590 }
591
592 return ret;
593}
594
43static __init int perf_event_ibs_init(void) 595static __init int perf_event_ibs_init(void)
44{ 596{
45 if (!ibs_caps) 597 if (!ibs_caps)
46 return -ENODEV; /* ibs not supported by the cpu */ 598 return -ENODEV; /* ibs not supported by the cpu */
47 599
48 perf_pmu_register(&perf_ibs, "ibs", -1); 600 perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
601 if (ibs_caps & IBS_CAPS_OPCNT)
602 perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
603 perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
604 register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
49 printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); 605 printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
50 606
51 return 0; 607 return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 26b3e2fef104..166546ec6aef 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1027,8 +1027,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1027 u64 status; 1027 u64 status;
1028 int handled; 1028 int handled;
1029 1029
1030 perf_sample_data_init(&data, 0);
1031
1032 cpuc = &__get_cpu_var(cpu_hw_events); 1030 cpuc = &__get_cpu_var(cpu_hw_events);
1033 1031
1034 /* 1032 /*
@@ -1082,7 +1080,7 @@ again:
1082 if (!intel_pmu_save_and_restart(event)) 1080 if (!intel_pmu_save_and_restart(event))
1083 continue; 1081 continue;
1084 1082
1085 data.period = event->hw.last_period; 1083 perf_sample_data_init(&data, 0, event->hw.last_period);
1086 1084
1087 if (has_branch_stack(event)) 1085 if (has_branch_stack(event))
1088 data.br_stack = &cpuc->lbr_stack; 1086 data.br_stack = &cpuc->lbr_stack;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 7f64df19e7dd..5a3edc27f6e5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -316,8 +316,7 @@ int intel_pmu_drain_bts_buffer(void)
316 316
317 ds->bts_index = ds->bts_buffer_base; 317 ds->bts_index = ds->bts_buffer_base;
318 318
319 perf_sample_data_init(&data, 0); 319 perf_sample_data_init(&data, 0, event->hw.last_period);
320 data.period = event->hw.last_period;
321 regs.ip = 0; 320 regs.ip = 0;
322 321
323 /* 322 /*
@@ -564,8 +563,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
564 if (!intel_pmu_save_and_restart(event)) 563 if (!intel_pmu_save_and_restart(event))
565 return; 564 return;
566 565
567 perf_sample_data_init(&data, 0); 566 perf_sample_data_init(&data, 0, event->hw.last_period);
568 data.period = event->hw.last_period;
569 567
570 /* 568 /*
571 * We use the interrupt regs as a base because the PEBS record 569 * We use the interrupt regs as a base because the PEBS record
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index a2dfacfd7103..47124a73dd73 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1005,8 +1005,6 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
1005 int idx, handled = 0; 1005 int idx, handled = 0;
1006 u64 val; 1006 u64 val;
1007 1007
1008 perf_sample_data_init(&data, 0);
1009
1010 cpuc = &__get_cpu_var(cpu_hw_events); 1008 cpuc = &__get_cpu_var(cpu_hw_events);
1011 1009
1012 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1010 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -1034,10 +1032,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
1034 handled += overflow; 1032 handled += overflow;
1035 1033
1036 /* event overflow for sure */ 1034 /* event overflow for sure */
1037 data.period = event->hw.last_period; 1035 perf_sample_data_init(&data, 0, hwc->last_period);
1038 1036
1039 if (!x86_perf_event_set_period(event)) 1037 if (!x86_perf_event_set_period(event))
1040 continue; 1038 continue;
1039
1040
1041 if (perf_event_overflow(event, &data, regs)) 1041 if (perf_event_overflow(event, &data, regs))
1042 x86_pmu_stop(event, 0); 1042 x86_pmu_stop(event, 0);
1043 } 1043 }
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 1b81839b6c88..571246d81edf 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -271,7 +271,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
271 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) 271 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
272 return 1; 272 return 1;
273 273
274 show_registers(regs); 274 show_regs(regs);
275#ifdef CONFIG_X86_32 275#ifdef CONFIG_X86_32
276 if (user_mode_vm(regs)) { 276 if (user_mode_vm(regs)) {
277 sp = regs->sp; 277 sp = regs->sp;
@@ -311,16 +311,33 @@ void die(const char *str, struct pt_regs *regs, long err)
311 311
312static int __init kstack_setup(char *s) 312static int __init kstack_setup(char *s)
313{ 313{
314 ssize_t ret;
315 unsigned long val;
316
314 if (!s) 317 if (!s)
315 return -EINVAL; 318 return -EINVAL;
316 kstack_depth_to_print = simple_strtoul(s, NULL, 0); 319
320 ret = kstrtoul(s, 0, &val);
321 if (ret)
322 return ret;
323 kstack_depth_to_print = val;
317 return 0; 324 return 0;
318} 325}
319early_param("kstack", kstack_setup); 326early_param("kstack", kstack_setup);
320 327
321static int __init code_bytes_setup(char *s) 328static int __init code_bytes_setup(char *s)
322{ 329{
323 code_bytes = simple_strtoul(s, NULL, 0); 330 ssize_t ret;
331 unsigned long val;
332
333 if (!s)
334 return -EINVAL;
335
336 ret = kstrtoul(s, 0, &val);
337 if (ret)
338 return ret;
339
340 code_bytes = val;
324 if (code_bytes > 8192) 341 if (code_bytes > 8192)
325 code_bytes = 8192; 342 code_bytes = 8192;
326 343
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 88ec9129271d..e0b1d783daab 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -82,7 +82,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
82} 82}
83 83
84 84
85void show_registers(struct pt_regs *regs) 85void show_regs(struct pt_regs *regs)
86{ 86{
87 int i; 87 int i;
88 88
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 17107bd6e1f0..791b76122aa8 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -245,7 +245,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
245 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 245 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
246} 246}
247 247
248void show_registers(struct pt_regs *regs) 248void show_regs(struct pt_regs *regs)
249{ 249{
250 int i; 250 int i;
251 unsigned long sp; 251 unsigned long sp;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 7b784f4ef1e4..01ccf9b71473 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -56,6 +56,7 @@
56#include <asm/irq_vectors.h> 56#include <asm/irq_vectors.h>
57#include <asm/cpufeature.h> 57#include <asm/cpufeature.h>
58#include <asm/alternative-asm.h> 58#include <asm/alternative-asm.h>
59#include <asm/asm.h>
59 60
60/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 61/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
61#include <linux/elf-em.h> 62#include <linux/elf-em.h>
@@ -151,10 +152,8 @@
151.pushsection .fixup, "ax" 152.pushsection .fixup, "ax"
15299: movl $0, (%esp) 15399: movl $0, (%esp)
153 jmp 98b 154 jmp 98b
154.section __ex_table, "a"
155 .align 4
156 .long 98b, 99b
157.popsection 155.popsection
156 _ASM_EXTABLE(98b,99b)
158.endm 157.endm
159 158
160.macro PTGS_TO_GS 159.macro PTGS_TO_GS
@@ -164,10 +163,8 @@
164.pushsection .fixup, "ax" 163.pushsection .fixup, "ax"
16599: movl $0, PT_GS(%esp) 16499: movl $0, PT_GS(%esp)
166 jmp 98b 165 jmp 98b
167.section __ex_table, "a"
168 .align 4
169 .long 98b, 99b
170.popsection 166.popsection
167 _ASM_EXTABLE(98b,99b)
171.endm 168.endm
172 169
173.macro GS_TO_REG reg 170.macro GS_TO_REG reg
@@ -249,12 +246,10 @@
249 jmp 2b 246 jmp 2b
2506: movl $0, (%esp) 2476: movl $0, (%esp)
251 jmp 3b 248 jmp 3b
252.section __ex_table, "a"
253 .align 4
254 .long 1b, 4b
255 .long 2b, 5b
256 .long 3b, 6b
257.popsection 249.popsection
250 _ASM_EXTABLE(1b,4b)
251 _ASM_EXTABLE(2b,5b)
252 _ASM_EXTABLE(3b,6b)
258 POP_GS_EX 253 POP_GS_EX
259.endm 254.endm
260 255
@@ -415,10 +410,7 @@ sysenter_past_esp:
415 jae syscall_fault 410 jae syscall_fault
4161: movl (%ebp),%ebp 4111: movl (%ebp),%ebp
417 movl %ebp,PT_EBP(%esp) 412 movl %ebp,PT_EBP(%esp)
418.section __ex_table,"a" 413 _ASM_EXTABLE(1b,syscall_fault)
419 .align 4
420 .long 1b,syscall_fault
421.previous
422 414
423 GET_THREAD_INFO(%ebp) 415 GET_THREAD_INFO(%ebp)
424 416
@@ -485,10 +477,8 @@ sysexit_audit:
485.pushsection .fixup,"ax" 477.pushsection .fixup,"ax"
4862: movl $0,PT_FS(%esp) 4782: movl $0,PT_FS(%esp)
487 jmp 1b 479 jmp 1b
488.section __ex_table,"a"
489 .align 4
490 .long 1b,2b
491.popsection 480.popsection
481 _ASM_EXTABLE(1b,2b)
492 PTGS_TO_GS_EX 482 PTGS_TO_GS_EX
493ENDPROC(ia32_sysenter_target) 483ENDPROC(ia32_sysenter_target)
494 484
@@ -543,10 +533,7 @@ ENTRY(iret_exc)
543 pushl $do_iret_error 533 pushl $do_iret_error
544 jmp error_code 534 jmp error_code
545.previous 535.previous
546.section __ex_table,"a" 536 _ASM_EXTABLE(irq_return,iret_exc)
547 .align 4
548 .long irq_return,iret_exc
549.previous
550 537
551 CFI_RESTORE_STATE 538 CFI_RESTORE_STATE
552ldt_ss: 539ldt_ss:
@@ -901,10 +888,7 @@ END(device_not_available)
901#ifdef CONFIG_PARAVIRT 888#ifdef CONFIG_PARAVIRT
902ENTRY(native_iret) 889ENTRY(native_iret)
903 iret 890 iret
904.section __ex_table,"a" 891 _ASM_EXTABLE(native_iret, iret_exc)
905 .align 4
906 .long native_iret, iret_exc
907.previous
908END(native_iret) 892END(native_iret)
909 893
910ENTRY(native_irq_enable_sysexit) 894ENTRY(native_irq_enable_sysexit)
@@ -1093,13 +1077,10 @@ ENTRY(xen_failsafe_callback)
1093 movl %eax,16(%esp) 1077 movl %eax,16(%esp)
1094 jmp 4b 1078 jmp 4b
1095.previous 1079.previous
1096.section __ex_table,"a" 1080 _ASM_EXTABLE(1b,6b)
1097 .align 4 1081 _ASM_EXTABLE(2b,7b)
1098 .long 1b,6b 1082 _ASM_EXTABLE(3b,8b)
1099 .long 2b,7b 1083 _ASM_EXTABLE(4b,9b)
1100 .long 3b,8b
1101 .long 4b,9b
1102.previous
1103ENDPROC(xen_failsafe_callback) 1084ENDPROC(xen_failsafe_callback)
1104 1085
1105BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, 1086BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK,
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index cdc79b5cfcd9..320852d02026 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -55,6 +55,7 @@
55#include <asm/paravirt.h> 55#include <asm/paravirt.h>
56#include <asm/ftrace.h> 56#include <asm/ftrace.h>
57#include <asm/percpu.h> 57#include <asm/percpu.h>
58#include <asm/asm.h>
58#include <linux/err.h> 59#include <linux/err.h>
59 60
60/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 61/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -900,18 +901,12 @@ restore_args:
900 901
901irq_return: 902irq_return:
902 INTERRUPT_RETURN 903 INTERRUPT_RETURN
903 904 _ASM_EXTABLE(irq_return, bad_iret)
904 .section __ex_table, "a"
905 .quad irq_return, bad_iret
906 .previous
907 905
908#ifdef CONFIG_PARAVIRT 906#ifdef CONFIG_PARAVIRT
909ENTRY(native_iret) 907ENTRY(native_iret)
910 iretq 908 iretq
911 909 _ASM_EXTABLE(native_iret, bad_iret)
912 .section __ex_table,"a"
913 .quad native_iret, bad_iret
914 .previous
915#endif 910#endif
916 911
917 .section .fixup,"ax" 912 .section .fixup,"ax"
@@ -1181,10 +1176,7 @@ gs_change:
1181 CFI_ENDPROC 1176 CFI_ENDPROC
1182END(native_load_gs_index) 1177END(native_load_gs_index)
1183 1178
1184 .section __ex_table,"a" 1179 _ASM_EXTABLE(gs_change,bad_gs)
1185 .align 8
1186 .quad gs_change,bad_gs
1187 .previous
1188 .section .fixup,"ax" 1180 .section .fixup,"ax"
1189 /* running with kernelgs */ 1181 /* running with kernelgs */
1190bad_gs: 1182bad_gs:
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c9a281f272fd..32ff36596ab1 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -24,40 +24,21 @@
24#include <trace/syscall.h> 24#include <trace/syscall.h>
25 25
26#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
27#include <asm/kprobes.h>
27#include <asm/ftrace.h> 28#include <asm/ftrace.h>
28#include <asm/nops.h> 29#include <asm/nops.h>
29#include <asm/nmi.h>
30
31 30
32#ifdef CONFIG_DYNAMIC_FTRACE 31#ifdef CONFIG_DYNAMIC_FTRACE
33 32
34/*
35 * modifying_code is set to notify NMIs that they need to use
36 * memory barriers when entering or exiting. But we don't want
37 * to burden NMIs with unnecessary memory barriers when code
38 * modification is not being done (which is most of the time).
39 *
40 * A mutex is already held when ftrace_arch_code_modify_prepare
41 * and post_process are called. No locks need to be taken here.
42 *
43 * Stop machine will make sure currently running NMIs are done
44 * and new NMIs will see the updated variable before we need
45 * to worry about NMIs doing memory barriers.
46 */
47static int modifying_code __read_mostly;
48static DEFINE_PER_CPU(int, save_modifying_code);
49
50int ftrace_arch_code_modify_prepare(void) 33int ftrace_arch_code_modify_prepare(void)
51{ 34{
52 set_kernel_text_rw(); 35 set_kernel_text_rw();
53 set_all_modules_text_rw(); 36 set_all_modules_text_rw();
54 modifying_code = 1;
55 return 0; 37 return 0;
56} 38}
57 39
58int ftrace_arch_code_modify_post_process(void) 40int ftrace_arch_code_modify_post_process(void)
59{ 41{
60 modifying_code = 0;
61 set_all_modules_text_ro(); 42 set_all_modules_text_ro();
62 set_kernel_text_ro(); 43 set_kernel_text_ro();
63 return 0; 44 return 0;
@@ -90,134 +71,6 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
90 return calc.code; 71 return calc.code;
91} 72}
92 73
93/*
94 * Modifying code must take extra care. On an SMP machine, if
95 * the code being modified is also being executed on another CPU
96 * that CPU will have undefined results and possibly take a GPF.
97 * We use kstop_machine to stop other CPUS from exectuing code.
98 * But this does not stop NMIs from happening. We still need
99 * to protect against that. We separate out the modification of
100 * the code to take care of this.
101 *
102 * Two buffers are added: An IP buffer and a "code" buffer.
103 *
104 * 1) Put the instruction pointer into the IP buffer
105 * and the new code into the "code" buffer.
106 * 2) Wait for any running NMIs to finish and set a flag that says
107 * we are modifying code, it is done in an atomic operation.
108 * 3) Write the code
109 * 4) clear the flag.
110 * 5) Wait for any running NMIs to finish.
111 *
112 * If an NMI is executed, the first thing it does is to call
113 * "ftrace_nmi_enter". This will check if the flag is set to write
114 * and if it is, it will write what is in the IP and "code" buffers.
115 *
116 * The trick is, it does not matter if everyone is writing the same
117 * content to the code location. Also, if a CPU is executing code
118 * it is OK to write to that code location if the contents being written
119 * are the same as what exists.
120 */
121
122#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
123static atomic_t nmi_running = ATOMIC_INIT(0);
124static int mod_code_status; /* holds return value of text write */
125static void *mod_code_ip; /* holds the IP to write to */
126static const void *mod_code_newcode; /* holds the text to write to the IP */
127
128static unsigned nmi_wait_count;
129static atomic_t nmi_update_count = ATOMIC_INIT(0);
130
131int ftrace_arch_read_dyn_info(char *buf, int size)
132{
133 int r;
134
135 r = snprintf(buf, size, "%u %u",
136 nmi_wait_count,
137 atomic_read(&nmi_update_count));
138 return r;
139}
140
141static void clear_mod_flag(void)
142{
143 int old = atomic_read(&nmi_running);
144
145 for (;;) {
146 int new = old & ~MOD_CODE_WRITE_FLAG;
147
148 if (old == new)
149 break;
150
151 old = atomic_cmpxchg(&nmi_running, old, new);
152 }
153}
154
155static void ftrace_mod_code(void)
156{
157 /*
158 * Yes, more than one CPU process can be writing to mod_code_status.
159 * (and the code itself)
160 * But if one were to fail, then they all should, and if one were
161 * to succeed, then they all should.
162 */
163 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
164 MCOUNT_INSN_SIZE);
165
166 /* if we fail, then kill any new writers */
167 if (mod_code_status)
168 clear_mod_flag();
169}
170
171void ftrace_nmi_enter(void)
172{
173 __this_cpu_write(save_modifying_code, modifying_code);
174
175 if (!__this_cpu_read(save_modifying_code))
176 return;
177
178 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
179 smp_rmb();
180 ftrace_mod_code();
181 atomic_inc(&nmi_update_count);
182 }
183 /* Must have previous changes seen before executions */
184 smp_mb();
185}
186
187void ftrace_nmi_exit(void)
188{
189 if (!__this_cpu_read(save_modifying_code))
190 return;
191
192 /* Finish all executions before clearing nmi_running */
193 smp_mb();
194 atomic_dec(&nmi_running);
195}
196
197static void wait_for_nmi_and_set_mod_flag(void)
198{
199 if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
200 return;
201
202 do {
203 cpu_relax();
204 } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
205
206 nmi_wait_count++;
207}
208
209static void wait_for_nmi(void)
210{
211 if (!atomic_read(&nmi_running))
212 return;
213
214 do {
215 cpu_relax();
216 } while (atomic_read(&nmi_running));
217
218 nmi_wait_count++;
219}
220
221static inline int 74static inline int
222within(unsigned long addr, unsigned long start, unsigned long end) 75within(unsigned long addr, unsigned long start, unsigned long end)
223{ 76{
@@ -238,26 +91,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
238 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 91 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
239 ip = (unsigned long)__va(__pa(ip)); 92 ip = (unsigned long)__va(__pa(ip));
240 93
241 mod_code_ip = (void *)ip; 94 return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
242 mod_code_newcode = new_code;
243
244 /* The buffers need to be visible before we let NMIs write them */
245 smp_mb();
246
247 wait_for_nmi_and_set_mod_flag();
248
249 /* Make sure all running NMIs have finished before we write the code */
250 smp_mb();
251
252 ftrace_mod_code();
253
254 /* Make sure the write happens before clearing the bit */
255 smp_mb();
256
257 clear_mod_flag();
258 wait_for_nmi();
259
260 return mod_code_status;
261} 95}
262 96
263static const unsigned char *ftrace_nop_replace(void) 97static const unsigned char *ftrace_nop_replace(void)
@@ -334,6 +168,336 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
334 return ret; 168 return ret;
335} 169}
336 170
171int modifying_ftrace_code __read_mostly;
172
173/*
174 * A breakpoint was added to the code address we are about to
175 * modify, and this is the handle that will just skip over it.
176 * We are either changing a nop into a trace call, or a trace
177 * call to a nop. While the change is taking place, we treat
178 * it just like it was a nop.
179 */
180int ftrace_int3_handler(struct pt_regs *regs)
181{
182 if (WARN_ON_ONCE(!regs))
183 return 0;
184
185 if (!ftrace_location(regs->ip - 1))
186 return 0;
187
188 regs->ip += MCOUNT_INSN_SIZE - 1;
189
190 return 1;
191}
192
193static int ftrace_write(unsigned long ip, const char *val, int size)
194{
195 /*
196 * On x86_64, kernel text mappings are mapped read-only with
197 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
198 * of the kernel text mapping to modify the kernel text.
199 *
200 * For 32bit kernels, these mappings are same and we can use
201 * kernel identity mapping to modify code.
202 */
203 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
204 ip = (unsigned long)__va(__pa(ip));
205
206 return probe_kernel_write((void *)ip, val, size);
207}
208
209static int add_break(unsigned long ip, const char *old)
210{
211 unsigned char replaced[MCOUNT_INSN_SIZE];
212 unsigned char brk = BREAKPOINT_INSTRUCTION;
213
214 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
215 return -EFAULT;
216
217 /* Make sure it is what we expect it to be */
218 if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
219 return -EINVAL;
220
221 if (ftrace_write(ip, &brk, 1))
222 return -EPERM;
223
224 return 0;
225}
226
227static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
228{
229 unsigned const char *old;
230 unsigned long ip = rec->ip;
231
232 old = ftrace_call_replace(ip, addr);
233
234 return add_break(rec->ip, old);
235}
236
237
238static int add_brk_on_nop(struct dyn_ftrace *rec)
239{
240 unsigned const char *old;
241
242 old = ftrace_nop_replace();
243
244 return add_break(rec->ip, old);
245}
246
247static int add_breakpoints(struct dyn_ftrace *rec, int enable)
248{
249 unsigned long ftrace_addr;
250 int ret;
251
252 ret = ftrace_test_record(rec, enable);
253
254 ftrace_addr = (unsigned long)FTRACE_ADDR;
255
256 switch (ret) {
257 case FTRACE_UPDATE_IGNORE:
258 return 0;
259
260 case FTRACE_UPDATE_MAKE_CALL:
261 /* converting nop to call */
262 return add_brk_on_nop(rec);
263
264 case FTRACE_UPDATE_MAKE_NOP:
265 /* converting a call to a nop */
266 return add_brk_on_call(rec, ftrace_addr);
267 }
268 return 0;
269}
270
271/*
272 * On error, we need to remove breakpoints. This needs to
273 * be done caefully. If the address does not currently have a
274 * breakpoint, we know we are done. Otherwise, we look at the
275 * remaining 4 bytes of the instruction. If it matches a nop
276 * we replace the breakpoint with the nop. Otherwise we replace
277 * it with the call instruction.
278 */
279static int remove_breakpoint(struct dyn_ftrace *rec)
280{
281 unsigned char ins[MCOUNT_INSN_SIZE];
282 unsigned char brk = BREAKPOINT_INSTRUCTION;
283 const unsigned char *nop;
284 unsigned long ftrace_addr;
285 unsigned long ip = rec->ip;
286
287 /* If we fail the read, just give up */
288 if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
289 return -EFAULT;
290
291 /* If this does not have a breakpoint, we are done */
292 if (ins[0] != brk)
293 return -1;
294
295 nop = ftrace_nop_replace();
296
297 /*
298 * If the last 4 bytes of the instruction do not match
299 * a nop, then we assume that this is a call to ftrace_addr.
300 */
301 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
302 /*
303 * For extra paranoidism, we check if the breakpoint is on
304 * a call that would actually jump to the ftrace_addr.
305 * If not, don't touch the breakpoint, we make just create
306 * a disaster.
307 */
308 ftrace_addr = (unsigned long)FTRACE_ADDR;
309 nop = ftrace_call_replace(ip, ftrace_addr);
310
311 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
312 return -EINVAL;
313 }
314
315 return probe_kernel_write((void *)ip, &nop[0], 1);
316}
317
318static int add_update_code(unsigned long ip, unsigned const char *new)
319{
320 /* skip breakpoint */
321 ip++;
322 new++;
323 if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1))
324 return -EPERM;
325 return 0;
326}
327
328static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
329{
330 unsigned long ip = rec->ip;
331 unsigned const char *new;
332
333 new = ftrace_call_replace(ip, addr);
334 return add_update_code(ip, new);
335}
336
337static int add_update_nop(struct dyn_ftrace *rec)
338{
339 unsigned long ip = rec->ip;
340 unsigned const char *new;
341
342 new = ftrace_nop_replace();
343 return add_update_code(ip, new);
344}
345
346static int add_update(struct dyn_ftrace *rec, int enable)
347{
348 unsigned long ftrace_addr;
349 int ret;
350
351 ret = ftrace_test_record(rec, enable);
352
353 ftrace_addr = (unsigned long)FTRACE_ADDR;
354
355 switch (ret) {
356 case FTRACE_UPDATE_IGNORE:
357 return 0;
358
359 case FTRACE_UPDATE_MAKE_CALL:
360 /* converting nop to call */
361 return add_update_call(rec, ftrace_addr);
362
363 case FTRACE_UPDATE_MAKE_NOP:
364 /* converting a call to a nop */
365 return add_update_nop(rec);
366 }
367
368 return 0;
369}
370
371static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
372{
373 unsigned long ip = rec->ip;
374 unsigned const char *new;
375
376 new = ftrace_call_replace(ip, addr);
377
378 if (ftrace_write(ip, new, 1))
379 return -EPERM;
380
381 return 0;
382}
383
384static int finish_update_nop(struct dyn_ftrace *rec)
385{
386 unsigned long ip = rec->ip;
387 unsigned const char *new;
388
389 new = ftrace_nop_replace();
390
391 if (ftrace_write(ip, new, 1))
392 return -EPERM;
393 return 0;
394}
395
396static int finish_update(struct dyn_ftrace *rec, int enable)
397{
398 unsigned long ftrace_addr;
399 int ret;
400
401 ret = ftrace_update_record(rec, enable);
402
403 ftrace_addr = (unsigned long)FTRACE_ADDR;
404
405 switch (ret) {
406 case FTRACE_UPDATE_IGNORE:
407 return 0;
408
409 case FTRACE_UPDATE_MAKE_CALL:
410 /* converting nop to call */
411 return finish_update_call(rec, ftrace_addr);
412
413 case FTRACE_UPDATE_MAKE_NOP:
414 /* converting a call to a nop */
415 return finish_update_nop(rec);
416 }
417
418 return 0;
419}
420
421static void do_sync_core(void *data)
422{
423 sync_core();
424}
425
426static void run_sync(void)
427{
428 int enable_irqs = irqs_disabled();
429
430 /* We may be called with interrupts disbled (on bootup). */
431 if (enable_irqs)
432 local_irq_enable();
433 on_each_cpu(do_sync_core, NULL, 1);
434 if (enable_irqs)
435 local_irq_disable();
436}
437
438void ftrace_replace_code(int enable)
439{
440 struct ftrace_rec_iter *iter;
441 struct dyn_ftrace *rec;
442 const char *report = "adding breakpoints";
443 int count = 0;
444 int ret;
445
446 for_ftrace_rec_iter(iter) {
447 rec = ftrace_rec_iter_record(iter);
448
449 ret = add_breakpoints(rec, enable);
450 if (ret)
451 goto remove_breakpoints;
452 count++;
453 }
454
455 run_sync();
456
457 report = "updating code";
458
459 for_ftrace_rec_iter(iter) {
460 rec = ftrace_rec_iter_record(iter);
461
462 ret = add_update(rec, enable);
463 if (ret)
464 goto remove_breakpoints;
465 }
466
467 run_sync();
468
469 report = "removing breakpoints";
470
471 for_ftrace_rec_iter(iter) {
472 rec = ftrace_rec_iter_record(iter);
473
474 ret = finish_update(rec, enable);
475 if (ret)
476 goto remove_breakpoints;
477 }
478
479 run_sync();
480
481 return;
482
483 remove_breakpoints:
484 ftrace_bug(ret, rec ? rec->ip : 0);
485 printk(KERN_WARNING "Failed on %s (%d):\n", report, count);
486 for_ftrace_rec_iter(iter) {
487 rec = ftrace_rec_iter_record(iter);
488 remove_breakpoint(rec);
489 }
490}
491
492void arch_ftrace_update_code(int command)
493{
494 modifying_ftrace_code++;
495
496 ftrace_modify_all_code(command);
497
498 modifying_ftrace_code--;
499}
500
337int __init ftrace_dyn_arch_init(void *data) 501int __init ftrace_dyn_arch_init(void *data)
338{ 502{
339 /* The return code is retured via data */ 503 /* The return code is retured via data */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index ce0be7cd085e..463c9797ca6a 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -21,6 +21,7 @@
21#include <asm/msr-index.h> 21#include <asm/msr-index.h>
22#include <asm/cpufeature.h> 22#include <asm/cpufeature.h>
23#include <asm/percpu.h> 23#include <asm/percpu.h>
24#include <asm/nops.h>
24 25
25/* Physical address */ 26/* Physical address */
26#define pa(X) ((X) - __PAGE_OFFSET) 27#define pa(X) ((X) - __PAGE_OFFSET)
@@ -363,28 +364,23 @@ default_entry:
363 pushl $0 364 pushl $0
364 popfl 365 popfl
365 366
366#ifdef CONFIG_SMP
367 cmpb $0, ready
368 jnz checkCPUtype
369#endif /* CONFIG_SMP */
370
371/* 367/*
372 * start system 32-bit setup. We need to re-do some of the things done 368 * start system 32-bit setup. We need to re-do some of the things done
373 * in 16-bit mode for the "real" operations. 369 * in 16-bit mode for the "real" operations.
374 */ 370 */
375 call setup_idt 371 movl setup_once_ref,%eax
376 372 andl %eax,%eax
377checkCPUtype: 373 jz 1f # Did we do this already?
378 374 call *%eax
379 movl $-1,X86_CPUID # -1 for no CPUID initially 3751:
380 376
381/* check if it is 486 or 386. */ 377/* check if it is 486 or 386. */
382/* 378/*
383 * XXX - this does a lot of unnecessary setup. Alignment checks don't 379 * XXX - this does a lot of unnecessary setup. Alignment checks don't
384 * apply at our cpl of 0 and the stack ought to be aligned already, and 380 * apply at our cpl of 0 and the stack ought to be aligned already, and
385 * we don't need to preserve eflags. 381 * we don't need to preserve eflags.
386 */ 382 */
387 383 movl $-1,X86_CPUID # -1 for no CPUID initially
388 movb $3,X86 # at least 386 384 movb $3,X86 # at least 386
389 pushfl # push EFLAGS 385 pushfl # push EFLAGS
390 popl %eax # get EFLAGS 386 popl %eax # get EFLAGS
@@ -450,21 +446,6 @@ is386: movl $2,%ecx # set MP
450 movl $(__KERNEL_PERCPU), %eax 446 movl $(__KERNEL_PERCPU), %eax
451 movl %eax,%fs # set this cpu's percpu 447 movl %eax,%fs # set this cpu's percpu
452 448
453#ifdef CONFIG_CC_STACKPROTECTOR
454 /*
455 * The linker can't handle this by relocation. Manually set
456 * base address in stack canary segment descriptor.
457 */
458 cmpb $0,ready
459 jne 1f
460 movl $gdt_page,%eax
461 movl $stack_canary,%ecx
462 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
463 shrl $16, %ecx
464 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
465 movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
4661:
467#endif
468 movl $(__KERNEL_STACK_CANARY),%eax 449 movl $(__KERNEL_STACK_CANARY),%eax
469 movl %eax,%gs 450 movl %eax,%gs
470 451
@@ -473,7 +454,6 @@ is386: movl $2,%ecx # set MP
473 454
474 cld # gcc2 wants the direction flag cleared at all times 455 cld # gcc2 wants the direction flag cleared at all times
475 pushl $0 # fake return address for unwinder 456 pushl $0 # fake return address for unwinder
476 movb $1, ready
477 jmp *(initial_code) 457 jmp *(initial_code)
478 458
479/* 459/*
@@ -495,81 +475,122 @@ check_x87:
495 .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ 475 .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */
496 ret 476 ret
497 477
478
479#include "verify_cpu.S"
480
498/* 481/*
499 * setup_idt 482 * setup_once
500 * 483 *
501 * sets up a idt with 256 entries pointing to 484 * The setup work we only want to run on the BSP.
502 * ignore_int, interrupt gates. It doesn't actually load
503 * idt - that can be done only after paging has been enabled
504 * and the kernel moved to PAGE_OFFSET. Interrupts
505 * are enabled elsewhere, when we can be relatively
506 * sure everything is ok.
507 * 485 *
508 * Warning: %esi is live across this function. 486 * Warning: %esi is live across this function.
509 */ 487 */
510setup_idt: 488__INIT
511 lea ignore_int,%edx 489setup_once:
512 movl $(__KERNEL_CS << 16),%eax 490 /*
513 movw %dx,%ax /* selector = 0x0010 = cs */ 491 * Set up a idt with 256 entries pointing to ignore_int,
514 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ 492 * interrupt gates. It doesn't actually load idt - that needs
493 * to be done on each CPU. Interrupts are enabled elsewhere,
494 * when we can be relatively sure everything is ok.
495 */
515 496
516 lea idt_table,%edi 497 movl $idt_table,%edi
517 mov $256,%ecx 498 movl $early_idt_handlers,%eax
518rp_sidt: 499 movl $NUM_EXCEPTION_VECTORS,%ecx
5001:
519 movl %eax,(%edi) 501 movl %eax,(%edi)
520 movl %edx,4(%edi) 502 movl %eax,4(%edi)
503 /* interrupt gate, dpl=0, present */
504 movl $(0x8E000000 + __KERNEL_CS),2(%edi)
505 addl $9,%eax
521 addl $8,%edi 506 addl $8,%edi
522 dec %ecx 507 loop 1b
523 jne rp_sidt
524 508
525.macro set_early_handler handler,trapno 509 movl $256 - NUM_EXCEPTION_VECTORS,%ecx
526 lea \handler,%edx 510 movl $ignore_int,%edx
527 movl $(__KERNEL_CS << 16),%eax 511 movl $(__KERNEL_CS << 16),%eax
528 movw %dx,%ax 512 movw %dx,%ax /* selector = 0x0010 = cs */
529 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ 513 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
530 lea idt_table,%edi 5142:
531 movl %eax,8*\trapno(%edi) 515 movl %eax,(%edi)
532 movl %edx,8*\trapno+4(%edi) 516 movl %edx,4(%edi)
533.endm 517 addl $8,%edi
518 loop 2b
534 519
535 set_early_handler handler=early_divide_err,trapno=0 520#ifdef CONFIG_CC_STACKPROTECTOR
536 set_early_handler handler=early_illegal_opcode,trapno=6 521 /*
537 set_early_handler handler=early_protection_fault,trapno=13 522 * Configure the stack canary. The linker can't handle this by
538 set_early_handler handler=early_page_fault,trapno=14 523 * relocation. Manually set base address in stack canary
524 * segment descriptor.
525 */
526 movl $gdt_page,%eax
527 movl $stack_canary,%ecx
528 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
529 shrl $16, %ecx
530 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
531 movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
532#endif
539 533
534 andl $0,setup_once_ref /* Once is enough, thanks */
540 ret 535 ret
541 536
542early_divide_err: 537ENTRY(early_idt_handlers)
543 xor %edx,%edx 538 # 36(%esp) %eflags
544 pushl $0 /* fake errcode */ 539 # 32(%esp) %cs
545 jmp early_fault 540 # 28(%esp) %eip
541 # 24(%rsp) error code
542 i = 0
543 .rept NUM_EXCEPTION_VECTORS
544 .if (EXCEPTION_ERRCODE_MASK >> i) & 1
545 ASM_NOP2
546 .else
547 pushl $0 # Dummy error code, to make stack frame uniform
548 .endif
549 pushl $i # 20(%esp) Vector number
550 jmp early_idt_handler
551 i = i + 1
552 .endr
553ENDPROC(early_idt_handlers)
554
555 /* This is global to keep gas from relaxing the jumps */
556ENTRY(early_idt_handler)
557 cld
558 cmpl $2,%ss:early_recursion_flag
559 je hlt_loop
560 incl %ss:early_recursion_flag
546 561
547early_illegal_opcode: 562 push %eax # 16(%esp)
548 movl $6,%edx 563 push %ecx # 12(%esp)
549 pushl $0 /* fake errcode */ 564 push %edx # 8(%esp)
550 jmp early_fault 565 push %ds # 4(%esp)
566 push %es # 0(%esp)
567 movl $(__KERNEL_DS),%eax
568 movl %eax,%ds
569 movl %eax,%es
551 570
552early_protection_fault: 571 cmpl $(__KERNEL_CS),32(%esp)
553 movl $13,%edx 572 jne 10f
554 jmp early_fault
555 573
556early_page_fault: 574 leal 28(%esp),%eax # Pointer to %eip
557 movl $14,%edx 575 call early_fixup_exception
558 jmp early_fault 576 andl %eax,%eax
577 jnz ex_entry /* found an exception entry */
559 578
560early_fault: 57910:
561 cld
562#ifdef CONFIG_PRINTK 580#ifdef CONFIG_PRINTK
563 pusha 581 xorl %eax,%eax
564 movl $(__KERNEL_DS),%eax 582 movw %ax,2(%esp) /* clean up the segment values on some cpus */
565 movl %eax,%ds 583 movw %ax,6(%esp)
566 movl %eax,%es 584 movw %ax,34(%esp)
567 cmpl $2,early_recursion_flag 585 leal 40(%esp),%eax
568 je hlt_loop 586 pushl %eax /* %esp before the exception */
569 incl early_recursion_flag 587 pushl %ebx
588 pushl %ebp
589 pushl %esi
590 pushl %edi
570 movl %cr2,%eax 591 movl %cr2,%eax
571 pushl %eax 592 pushl %eax
572 pushl %edx /* trapno */ 593 pushl (20+6*4)(%esp) /* trapno */
573 pushl $fault_msg 594 pushl $fault_msg
574 call printk 595 call printk
575#endif 596#endif
@@ -578,6 +599,17 @@ hlt_loop:
578 hlt 599 hlt
579 jmp hlt_loop 600 jmp hlt_loop
580 601
602ex_entry:
603 pop %es
604 pop %ds
605 pop %edx
606 pop %ecx
607 pop %eax
608 addl $8,%esp /* drop vector number and error code */
609 decl %ss:early_recursion_flag
610 iret
611ENDPROC(early_idt_handler)
612
581/* This is the default interrupt "handler" :-) */ 613/* This is the default interrupt "handler" :-) */
582 ALIGN 614 ALIGN
583ignore_int: 615ignore_int:
@@ -611,13 +643,18 @@ ignore_int:
611 popl %eax 643 popl %eax
612#endif 644#endif
613 iret 645 iret
646ENDPROC(ignore_int)
647__INITDATA
648 .align 4
649early_recursion_flag:
650 .long 0
614 651
615#include "verify_cpu.S" 652__REFDATA
616 653 .align 4
617 __REFDATA
618.align 4
619ENTRY(initial_code) 654ENTRY(initial_code)
620 .long i386_start_kernel 655 .long i386_start_kernel
656ENTRY(setup_once_ref)
657 .long setup_once
621 658
622/* 659/*
623 * BSS section 660 * BSS section
@@ -670,22 +707,19 @@ ENTRY(initial_page_table)
670ENTRY(stack_start) 707ENTRY(stack_start)
671 .long init_thread_union+THREAD_SIZE 708 .long init_thread_union+THREAD_SIZE
672 709
673early_recursion_flag: 710__INITRODATA
674 .long 0
675
676ready: .byte 0
677
678int_msg: 711int_msg:
679 .asciz "Unknown interrupt or fault at: %p %p %p\n" 712 .asciz "Unknown interrupt or fault at: %p %p %p\n"
680 713
681fault_msg: 714fault_msg:
682/* fault info: */ 715/* fault info: */
683 .ascii "BUG: Int %d: CR2 %p\n" 716 .ascii "BUG: Int %d: CR2 %p\n"
684/* pusha regs: */ 717/* regs pushed in early_idt_handler: */
685 .ascii " EDI %p ESI %p EBP %p ESP %p\n" 718 .ascii " EDI %p ESI %p EBP %p EBX %p\n"
686 .ascii " EBX %p EDX %p ECX %p EAX %p\n" 719 .ascii " ESP %p ES %p DS %p\n"
720 .ascii " EDX %p ECX %p EAX %p\n"
687/* fault frame: */ 721/* fault frame: */
688 .ascii " err %p EIP %p CS %p flg %p\n" 722 .ascii " vec %p err %p EIP %p CS %p flg %p\n"
689 .ascii "Stack: %p %p %p %p %p %p %p %p\n" 723 .ascii "Stack: %p %p %p %p %p %p %p %p\n"
690 .ascii " %p %p %p %p %p %p %p %p\n" 724 .ascii " %p %p %p %p %p %p %p %p\n"
691 .asciz " %p %p %p %p %p %p %p %p\n" 725 .asciz " %p %p %p %p %p %p %p %p\n"
@@ -699,6 +733,7 @@ fault_msg:
699 * segment size, and 32-bit linear address value: 733 * segment size, and 32-bit linear address value:
700 */ 734 */
701 735
736 .data
702.globl boot_gdt_descr 737.globl boot_gdt_descr
703.globl idt_descr 738.globl idt_descr
704 739
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 40f4eb3766d1..7a40f2447321 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -19,12 +19,15 @@
19#include <asm/cache.h> 19#include <asm/cache.h>
20#include <asm/processor-flags.h> 20#include <asm/processor-flags.h>
21#include <asm/percpu.h> 21#include <asm/percpu.h>
22#include <asm/nops.h>
22 23
23#ifdef CONFIG_PARAVIRT 24#ifdef CONFIG_PARAVIRT
24#include <asm/asm-offsets.h> 25#include <asm/asm-offsets.h>
25#include <asm/paravirt.h> 26#include <asm/paravirt.h>
27#define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg
26#else 28#else
27#define GET_CR2_INTO_RCX movq %cr2, %rcx 29#define GET_CR2_INTO(reg) movq %cr2, reg
30#define INTERRUPT_RETURN iretq
28#endif 31#endif
29 32
30/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE 33/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE
@@ -270,36 +273,56 @@ bad_address:
270 jmp bad_address 273 jmp bad_address
271 274
272 .section ".init.text","ax" 275 .section ".init.text","ax"
273#ifdef CONFIG_EARLY_PRINTK
274 .globl early_idt_handlers 276 .globl early_idt_handlers
275early_idt_handlers: 277early_idt_handlers:
278 # 104(%rsp) %rflags
279 # 96(%rsp) %cs
280 # 88(%rsp) %rip
281 # 80(%rsp) error code
276 i = 0 282 i = 0
277 .rept NUM_EXCEPTION_VECTORS 283 .rept NUM_EXCEPTION_VECTORS
278 movl $i, %esi 284 .if (EXCEPTION_ERRCODE_MASK >> i) & 1
285 ASM_NOP2
286 .else
287 pushq $0 # Dummy error code, to make stack frame uniform
288 .endif
289 pushq $i # 72(%rsp) Vector number
279 jmp early_idt_handler 290 jmp early_idt_handler
280 i = i + 1 291 i = i + 1
281 .endr 292 .endr
282#endif
283 293
284ENTRY(early_idt_handler) 294ENTRY(early_idt_handler)
285#ifdef CONFIG_EARLY_PRINTK 295 cld
296
286 cmpl $2,early_recursion_flag(%rip) 297 cmpl $2,early_recursion_flag(%rip)
287 jz 1f 298 jz 1f
288 incl early_recursion_flag(%rip) 299 incl early_recursion_flag(%rip)
289 GET_CR2_INTO_RCX 300
290 movq %rcx,%r9 301 pushq %rax # 64(%rsp)
291 xorl %r8d,%r8d # zero for error code 302 pushq %rcx # 56(%rsp)
292 movl %esi,%ecx # get vector number 303 pushq %rdx # 48(%rsp)
293 # Test %ecx against mask of vectors that push error code. 304 pushq %rsi # 40(%rsp)
294 cmpl $31,%ecx 305 pushq %rdi # 32(%rsp)
295 ja 0f 306 pushq %r8 # 24(%rsp)
296 movl $1,%eax 307 pushq %r9 # 16(%rsp)
297 salq %cl,%rax 308 pushq %r10 # 8(%rsp)
298 testl $0x27d00,%eax 309 pushq %r11 # 0(%rsp)
299 je 0f 310
300 popq %r8 # get error code 311 cmpl $__KERNEL_CS,96(%rsp)
3010: movq 0(%rsp),%rcx # get ip 312 jne 10f
302 movq 8(%rsp),%rdx # get cs 313
314 leaq 88(%rsp),%rdi # Pointer to %rip
315 call early_fixup_exception
316 andl %eax,%eax
317 jnz 20f # Found an exception entry
318
31910:
320#ifdef CONFIG_EARLY_PRINTK
321 GET_CR2_INTO(%r9) # can clobber any volatile register if pv
322 movl 80(%rsp),%r8d # error code
323 movl 72(%rsp),%esi # vector number
324 movl 96(%rsp),%edx # %cs
325 movq 88(%rsp),%rcx # %rip
303 xorl %eax,%eax 326 xorl %eax,%eax
304 leaq early_idt_msg(%rip),%rdi 327 leaq early_idt_msg(%rip),%rdi
305 call early_printk 328 call early_printk
@@ -308,17 +331,32 @@ ENTRY(early_idt_handler)
308 call dump_stack 331 call dump_stack
309#ifdef CONFIG_KALLSYMS 332#ifdef CONFIG_KALLSYMS
310 leaq early_idt_ripmsg(%rip),%rdi 333 leaq early_idt_ripmsg(%rip),%rdi
311 movq 0(%rsp),%rsi # get rip again 334 movq 40(%rsp),%rsi # %rip again
312 call __print_symbol 335 call __print_symbol
313#endif 336#endif
314#endif /* EARLY_PRINTK */ 337#endif /* EARLY_PRINTK */
3151: hlt 3381: hlt
316 jmp 1b 339 jmp 1b
317 340
318#ifdef CONFIG_EARLY_PRINTK 34120: # Exception table entry found
342 popq %r11
343 popq %r10
344 popq %r9
345 popq %r8
346 popq %rdi
347 popq %rsi
348 popq %rdx
349 popq %rcx
350 popq %rax
351 addq $16,%rsp # drop vector number and error code
352 decl early_recursion_flag(%rip)
353 INTERRUPT_RETURN
354
355 .balign 4
319early_recursion_flag: 356early_recursion_flag:
320 .long 0 357 .long 0
321 358
359#ifdef CONFIG_EARLY_PRINTK
322early_idt_msg: 360early_idt_msg:
323 .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n" 361 .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
324early_idt_ripmsg: 362early_idt_ripmsg:
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 7734bcbb5a3a..f250431fb505 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -88,7 +88,7 @@ void kernel_fpu_begin(void)
88 __thread_clear_has_fpu(me); 88 __thread_clear_has_fpu(me);
89 /* We do 'stts()' in kernel_fpu_end() */ 89 /* We do 'stts()' in kernel_fpu_end() */
90 } else { 90 } else {
91 percpu_write(fpu_owner_task, NULL); 91 this_cpu_write(fpu_owner_task, NULL);
92 clts(); 92 clts();
93 } 93 }
94} 94}
@@ -235,6 +235,7 @@ int init_fpu(struct task_struct *tsk)
235 if (tsk_used_math(tsk)) { 235 if (tsk_used_math(tsk)) {
236 if (HAVE_HWFP && tsk == current) 236 if (HAVE_HWFP && tsk == current)
237 unlazy_fpu(tsk); 237 unlazy_fpu(tsk);
238 tsk->thread.fpu.last_cpu = ~0;
238 return 0; 239 return 0;
239 } 240 }
240 241
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
deleted file mode 100644
index 43e9ccf44947..000000000000
--- a/arch/x86/kernel/init_task.c
+++ /dev/null
@@ -1,42 +0,0 @@
1#include <linux/mm.h>
2#include <linux/module.h>
3#include <linux/sched.h>
4#include <linux/init.h>
5#include <linux/init_task.h>
6#include <linux/fs.h>
7#include <linux/mqueue.h>
8
9#include <asm/uaccess.h>
10#include <asm/pgtable.h>
11#include <asm/desc.h>
12
13static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
14static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
15
16/*
17 * Initial thread structure.
18 *
19 * We need to make sure that this is THREAD_SIZE aligned due to the
20 * way process stacks are handled. This is done by having a special
21 * "init_task" linker map entry..
22 */
23union thread_union init_thread_union __init_task_data =
24 { INIT_THREAD_INFO(init_task) };
25
26/*
27 * Initial task structure.
28 *
29 * All other task structs will be allocated on slabs in fork.c
30 */
31struct task_struct init_task = INIT_TASK(init_task);
32EXPORT_SYMBOL(init_task);
33
34/*
35 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
36 * no more per-task TSS's. The TSS size is kept cacheline-aligned
37 * so they are allowed to end up in the .data..cacheline_aligned
38 * section. Since TSS's are completely CPU-local, we want them
39 * on exact cacheline boundaries, to eliminate cacheline ping-pong.
40 */
41DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
42
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 58b7f27cb3e9..344faf8d0d62 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -127,8 +127,8 @@ void __cpuinit irq_ctx_init(int cpu)
127 return; 127 return;
128 128
129 irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), 129 irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
130 THREAD_FLAGS, 130 THREADINFO_GFP,
131 THREAD_ORDER)); 131 THREAD_SIZE_ORDER));
132 memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); 132 memset(&irqctx->tinfo, 0, sizeof(struct thread_info));
133 irqctx->tinfo.cpu = cpu; 133 irqctx->tinfo.cpu = cpu;
134 irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; 134 irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
@@ -137,8 +137,8 @@ void __cpuinit irq_ctx_init(int cpu)
137 per_cpu(hardirq_ctx, cpu) = irqctx; 137 per_cpu(hardirq_ctx, cpu) = irqctx;
138 138
139 irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), 139 irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
140 THREAD_FLAGS, 140 THREADINFO_GFP,
141 THREAD_ORDER)); 141 THREAD_SIZE_ORDER));
142 memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); 142 memset(&irqctx->tinfo, 0, sizeof(struct thread_info));
143 irqctx->tinfo.cpu = cpu; 143 irqctx->tinfo.cpu = cpu;
144 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); 144 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e213fc8408d2..e2f751efb7b1 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1037,9 +1037,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1037 "current sp %p does not match saved sp %p\n", 1037 "current sp %p does not match saved sp %p\n",
1038 stack_addr(regs), kcb->jprobe_saved_sp); 1038 stack_addr(regs), kcb->jprobe_saved_sp);
1039 printk(KERN_ERR "Saved registers for jprobe %p\n", jp); 1039 printk(KERN_ERR "Saved registers for jprobe %p\n", jp);
1040 show_registers(saved_regs); 1040 show_regs(saved_regs);
1041 printk(KERN_ERR "Current registers\n"); 1041 printk(KERN_ERR "Current registers\n");
1042 show_registers(regs); 1042 show_regs(regs);
1043 BUG(); 1043 BUG();
1044 } 1044 }
1045 *regs = kcb->jprobe_saved_regs; 1045 *regs = kcb->jprobe_saved_regs;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b8ba6e4a27e4..e554e5ad2fe8 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -79,7 +79,6 @@ struct kvm_task_sleep_node {
79 u32 token; 79 u32 token;
80 int cpu; 80 int cpu;
81 bool halted; 81 bool halted;
82 struct mm_struct *mm;
83}; 82};
84 83
85static struct kvm_task_sleep_head { 84static struct kvm_task_sleep_head {
@@ -126,9 +125,7 @@ void kvm_async_pf_task_wait(u32 token)
126 125
127 n.token = token; 126 n.token = token;
128 n.cpu = smp_processor_id(); 127 n.cpu = smp_processor_id();
129 n.mm = current->active_mm;
130 n.halted = idle || preempt_count() > 1; 128 n.halted = idle || preempt_count() > 1;
131 atomic_inc(&n.mm->mm_count);
132 init_waitqueue_head(&n.wq); 129 init_waitqueue_head(&n.wq);
133 hlist_add_head(&n.link, &b->list); 130 hlist_add_head(&n.link, &b->list);
134 spin_unlock(&b->lock); 131 spin_unlock(&b->lock);
@@ -161,9 +158,6 @@ EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait);
161static void apf_task_wake_one(struct kvm_task_sleep_node *n) 158static void apf_task_wake_one(struct kvm_task_sleep_node *n)
162{ 159{
163 hlist_del_init(&n->link); 160 hlist_del_init(&n->link);
164 if (!n->mm)
165 return;
166 mmdrop(n->mm);
167 if (n->halted) 161 if (n->halted)
168 smp_send_reschedule(n->cpu); 162 smp_send_reschedule(n->cpu);
169 else if (waitqueue_active(&n->wq)) 163 else if (waitqueue_active(&n->wq))
@@ -207,7 +201,7 @@ again:
207 * async PF was not yet handled. 201 * async PF was not yet handled.
208 * Add dummy entry for the token. 202 * Add dummy entry for the token.
209 */ 203 */
210 n = kmalloc(sizeof(*n), GFP_ATOMIC); 204 n = kzalloc(sizeof(*n), GFP_ATOMIC);
211 if (!n) { 205 if (!n) {
212 /* 206 /*
213 * Allocation failed! Busy wait while other cpu 207 * Allocation failed! Busy wait while other cpu
@@ -219,7 +213,6 @@ again:
219 } 213 }
220 n->token = token; 214 n->token = token;
221 n->cpu = smp_processor_id(); 215 n->cpu = smp_processor_id();
222 n->mm = NULL;
223 init_waitqueue_head(&n->wq); 216 init_waitqueue_head(&n->wq);
224 hlist_add_head(&n->link, &b->list); 217 hlist_add_head(&n->link, &b->list);
225 } else 218 } else
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 73465aab28f8..8a2ce8fd41c0 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -82,11 +82,6 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
82{ 82{
83 struct cpuinfo_x86 *c = &cpu_data(cpu); 83 struct cpuinfo_x86 *c = &cpu_data(cpu);
84 84
85 if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
86 pr_warning("CPU%d: family %d not supported\n", cpu, c->x86);
87 return -1;
88 }
89
90 csig->rev = c->microcode; 85 csig->rev = c->microcode;
91 pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); 86 pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
92 87
@@ -380,6 +375,13 @@ static struct microcode_ops microcode_amd_ops = {
380 375
381struct microcode_ops * __init init_amd_microcode(void) 376struct microcode_ops * __init init_amd_microcode(void)
382{ 377{
378 struct cpuinfo_x86 *c = &cpu_data(0);
379
380 if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
381 pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
382 return NULL;
383 }
384
383 patch = (void *)get_zeroed_page(GFP_KERNEL); 385 patch = (void *)get_zeroed_page(GFP_KERNEL);
384 if (!patch) 386 if (!patch)
385 return NULL; 387 return NULL;
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 87a0f8688301..fbdfc6917180 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -299,12 +299,11 @@ static ssize_t reload_store(struct device *dev,
299{ 299{
300 unsigned long val; 300 unsigned long val;
301 int cpu = dev->id; 301 int cpu = dev->id;
302 int ret = 0; 302 ssize_t ret = 0;
303 char *end;
304 303
305 val = simple_strtoul(buf, &end, 0); 304 ret = kstrtoul(buf, 0, &val);
306 if (end == buf) 305 if (ret)
307 return -EINVAL; 306 return ret;
308 307
309 if (val == 1) { 308 if (val == 1) {
310 get_online_cpus(); 309 get_online_cpus();
@@ -419,10 +418,8 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif)
419 if (err) 418 if (err)
420 return err; 419 return err;
421 420
422 if (microcode_init_cpu(cpu) == UCODE_ERROR) { 421 if (microcode_init_cpu(cpu) == UCODE_ERROR)
423 sysfs_remove_group(&dev->kobj, &mc_attr_group);
424 return -EINVAL; 422 return -EINVAL;
425 }
426 423
427 return err; 424 return err;
428} 425}
@@ -528,11 +525,11 @@ static int __init microcode_init(void)
528 microcode_ops = init_intel_microcode(); 525 microcode_ops = init_intel_microcode();
529 else if (c->x86_vendor == X86_VENDOR_AMD) 526 else if (c->x86_vendor == X86_VENDOR_AMD)
530 microcode_ops = init_amd_microcode(); 527 microcode_ops = init_amd_microcode();
531 528 else
532 if (!microcode_ops) {
533 pr_err("no support for this CPU vendor\n"); 529 pr_err("no support for this CPU vendor\n");
530
531 if (!microcode_ops)
534 return -ENODEV; 532 return -ENODEV;
535 }
536 533
537 microcode_pdev = platform_device_register_simple("microcode", -1, 534 microcode_pdev = platform_device_register_simple("microcode", -1,
538 NULL, 0); 535 NULL, 0);
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 3ca42d0e43a2..0327e2b3c408 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -147,12 +147,6 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
147 147
148 memset(csig, 0, sizeof(*csig)); 148 memset(csig, 0, sizeof(*csig));
149 149
150 if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
151 cpu_has(c, X86_FEATURE_IA64)) {
152 pr_err("CPU%d not a capable Intel processor\n", cpu_num);
153 return -1;
154 }
155
156 csig->sig = cpuid_eax(0x00000001); 150 csig->sig = cpuid_eax(0x00000001);
157 151
158 if ((c->x86_model >= 5) || (c->x86 > 6)) { 152 if ((c->x86_model >= 5) || (c->x86 > 6)) {
@@ -463,6 +457,14 @@ static struct microcode_ops microcode_intel_ops = {
463 457
464struct microcode_ops * __init init_intel_microcode(void) 458struct microcode_ops * __init init_intel_microcode(void)
465{ 459{
460 struct cpuinfo_x86 *c = &cpu_data(0);
461
462 if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
463 cpu_has(c, X86_FEATURE_IA64)) {
464 pr_err("Intel CPU family 0x%x not supported\n", c->x86);
465 return NULL;
466 }
467
466 return &microcode_intel_ops; 468 return &microcode_intel_ops;
467} 469}
468 470
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 47acaf319165..bffdfd48c1f2 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -31,14 +31,6 @@
31#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/x86_init.h> 32#include <asm/x86_init.h>
33 33
34#define NMI_MAX_NAMELEN 16
35struct nmiaction {
36 struct list_head list;
37 nmi_handler_t handler;
38 unsigned int flags;
39 char *name;
40};
41
42struct nmi_desc { 34struct nmi_desc {
43 spinlock_t lock; 35 spinlock_t lock;
44 struct list_head head; 36 struct list_head head;
@@ -54,6 +46,14 @@ static struct nmi_desc nmi_desc[NMI_MAX] =
54 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock), 46 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
55 .head = LIST_HEAD_INIT(nmi_desc[1].head), 47 .head = LIST_HEAD_INIT(nmi_desc[1].head),
56 }, 48 },
49 {
50 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
51 .head = LIST_HEAD_INIT(nmi_desc[2].head),
52 },
53 {
54 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
55 .head = LIST_HEAD_INIT(nmi_desc[3].head),
56 },
57 57
58}; 58};
59 59
@@ -84,7 +84,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
84 84
85#define nmi_to_desc(type) (&nmi_desc[type]) 85#define nmi_to_desc(type) (&nmi_desc[type])
86 86
87static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) 87static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
88{ 88{
89 struct nmi_desc *desc = nmi_to_desc(type); 89 struct nmi_desc *desc = nmi_to_desc(type);
90 struct nmiaction *a; 90 struct nmiaction *a;
@@ -107,11 +107,14 @@ static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs,
107 return handled; 107 return handled;
108} 108}
109 109
110static int __setup_nmi(unsigned int type, struct nmiaction *action) 110int __register_nmi_handler(unsigned int type, struct nmiaction *action)
111{ 111{
112 struct nmi_desc *desc = nmi_to_desc(type); 112 struct nmi_desc *desc = nmi_to_desc(type);
113 unsigned long flags; 113 unsigned long flags;
114 114
115 if (!action->handler)
116 return -EINVAL;
117
115 spin_lock_irqsave(&desc->lock, flags); 118 spin_lock_irqsave(&desc->lock, flags);
116 119
117 /* 120 /*
@@ -120,6 +123,8 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action)
120 * to manage expectations 123 * to manage expectations
121 */ 124 */
122 WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); 125 WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
126 WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
127 WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
123 128
124 /* 129 /*
125 * some handlers need to be executed first otherwise a fake 130 * some handlers need to be executed first otherwise a fake
@@ -133,8 +138,9 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action)
133 spin_unlock_irqrestore(&desc->lock, flags); 138 spin_unlock_irqrestore(&desc->lock, flags);
134 return 0; 139 return 0;
135} 140}
141EXPORT_SYMBOL(__register_nmi_handler);
136 142
137static struct nmiaction *__free_nmi(unsigned int type, const char *name) 143void unregister_nmi_handler(unsigned int type, const char *name)
138{ 144{
139 struct nmi_desc *desc = nmi_to_desc(type); 145 struct nmi_desc *desc = nmi_to_desc(type);
140 struct nmiaction *n; 146 struct nmiaction *n;
@@ -157,61 +163,16 @@ static struct nmiaction *__free_nmi(unsigned int type, const char *name)
157 163
158 spin_unlock_irqrestore(&desc->lock, flags); 164 spin_unlock_irqrestore(&desc->lock, flags);
159 synchronize_rcu(); 165 synchronize_rcu();
160 return (n);
161} 166}
162
163int register_nmi_handler(unsigned int type, nmi_handler_t handler,
164 unsigned long nmiflags, const char *devname)
165{
166 struct nmiaction *action;
167 int retval = -ENOMEM;
168
169 if (!handler)
170 return -EINVAL;
171
172 action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL);
173 if (!action)
174 goto fail_action;
175
176 action->handler = handler;
177 action->flags = nmiflags;
178 action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL);
179 if (!action->name)
180 goto fail_action_name;
181
182 retval = __setup_nmi(type, action);
183
184 if (retval)
185 goto fail_setup_nmi;
186
187 return retval;
188
189fail_setup_nmi:
190 kfree(action->name);
191fail_action_name:
192 kfree(action);
193fail_action:
194
195 return retval;
196}
197EXPORT_SYMBOL_GPL(register_nmi_handler);
198
199void unregister_nmi_handler(unsigned int type, const char *name)
200{
201 struct nmiaction *a;
202
203 a = __free_nmi(type, name);
204 if (a) {
205 kfree(a->name);
206 kfree(a);
207 }
208}
209
210EXPORT_SYMBOL_GPL(unregister_nmi_handler); 167EXPORT_SYMBOL_GPL(unregister_nmi_handler);
211 168
212static notrace __kprobes void 169static __kprobes void
213pci_serr_error(unsigned char reason, struct pt_regs *regs) 170pci_serr_error(unsigned char reason, struct pt_regs *regs)
214{ 171{
172 /* check to see if anyone registered against these types of errors */
173 if (nmi_handle(NMI_SERR, regs, false))
174 return;
175
215 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", 176 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
216 reason, smp_processor_id()); 177 reason, smp_processor_id());
217 178
@@ -236,15 +197,19 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
236 outb(reason, NMI_REASON_PORT); 197 outb(reason, NMI_REASON_PORT);
237} 198}
238 199
239static notrace __kprobes void 200static __kprobes void
240io_check_error(unsigned char reason, struct pt_regs *regs) 201io_check_error(unsigned char reason, struct pt_regs *regs)
241{ 202{
242 unsigned long i; 203 unsigned long i;
243 204
205 /* check to see if anyone registered against these types of errors */
206 if (nmi_handle(NMI_IO_CHECK, regs, false))
207 return;
208
244 pr_emerg( 209 pr_emerg(
245 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", 210 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
246 reason, smp_processor_id()); 211 reason, smp_processor_id());
247 show_registers(regs); 212 show_regs(regs);
248 213
249 if (panic_on_io_nmi) 214 if (panic_on_io_nmi)
250 panic("NMI IOCK error: Not continuing"); 215 panic("NMI IOCK error: Not continuing");
@@ -263,7 +228,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
263 outb(reason, NMI_REASON_PORT); 228 outb(reason, NMI_REASON_PORT);
264} 229}
265 230
266static notrace __kprobes void 231static __kprobes void
267unknown_nmi_error(unsigned char reason, struct pt_regs *regs) 232unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
268{ 233{
269 int handled; 234 int handled;
@@ -305,7 +270,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
305static DEFINE_PER_CPU(bool, swallow_nmi); 270static DEFINE_PER_CPU(bool, swallow_nmi);
306static DEFINE_PER_CPU(unsigned long, last_nmi_rip); 271static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
307 272
308static notrace __kprobes void default_do_nmi(struct pt_regs *regs) 273static __kprobes void default_do_nmi(struct pt_regs *regs)
309{ 274{
310 unsigned char reason = 0; 275 unsigned char reason = 0;
311 int handled; 276 int handled;
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index 2c39dcd510fa..e31bf8d5c4d2 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -13,6 +13,7 @@
13#include <linux/cpumask.h> 13#include <linux/cpumask.h>
14#include <linux/delay.h> 14#include <linux/delay.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/percpu.h>
16 17
17#include <asm/apic.h> 18#include <asm/apic.h>
18#include <asm/nmi.h> 19#include <asm/nmi.h>
@@ -117,15 +118,15 @@ static void __init dotest(void (*testcase_fn)(void), int expected)
117 unexpected_testcase_failures++; 118 unexpected_testcase_failures++;
118 119
119 if (nmi_fail == FAILURE) 120 if (nmi_fail == FAILURE)
120 printk("FAILED |"); 121 printk(KERN_CONT "FAILED |");
121 else if (nmi_fail == TIMEOUT) 122 else if (nmi_fail == TIMEOUT)
122 printk("TIMEOUT|"); 123 printk(KERN_CONT "TIMEOUT|");
123 else 124 else
124 printk("ERROR |"); 125 printk(KERN_CONT "ERROR |");
125 dump_stack(); 126 dump_stack();
126 } else { 127 } else {
127 testcase_successes++; 128 testcase_successes++;
128 printk(" ok |"); 129 printk(KERN_CONT " ok |");
129 } 130 }
130 testcase_total++; 131 testcase_total++;
131 132
@@ -150,10 +151,10 @@ void __init nmi_selftest(void)
150 151
151 print_testname("remote IPI"); 152 print_testname("remote IPI");
152 dotest(remote_ipi, SUCCESS); 153 dotest(remote_ipi, SUCCESS);
153 printk("\n"); 154 printk(KERN_CONT "\n");
154 print_testname("local IPI"); 155 print_testname("local IPI");
155 dotest(local_ipi, SUCCESS); 156 dotest(local_ipi, SUCCESS);
156 printk("\n"); 157 printk(KERN_CONT "\n");
157 158
158 cleanup_nmi_testsuite(); 159 cleanup_nmi_testsuite();
159 160
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index ab137605e694..9ce885996fd7 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -241,16 +241,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA
241 241
242static inline void enter_lazy(enum paravirt_lazy_mode mode) 242static inline void enter_lazy(enum paravirt_lazy_mode mode)
243{ 243{
244 BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); 244 BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
245 245
246 percpu_write(paravirt_lazy_mode, mode); 246 this_cpu_write(paravirt_lazy_mode, mode);
247} 247}
248 248
249static void leave_lazy(enum paravirt_lazy_mode mode) 249static void leave_lazy(enum paravirt_lazy_mode mode)
250{ 250{
251 BUG_ON(percpu_read(paravirt_lazy_mode) != mode); 251 BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode);
252 252
253 percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); 253 this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
254} 254}
255 255
256void paravirt_enter_lazy_mmu(void) 256void paravirt_enter_lazy_mmu(void)
@@ -267,7 +267,7 @@ void paravirt_start_context_switch(struct task_struct *prev)
267{ 267{
268 BUG_ON(preemptible()); 268 BUG_ON(preemptible());
269 269
270 if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { 270 if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
271 arch_leave_lazy_mmu_mode(); 271 arch_leave_lazy_mmu_mode();
272 set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); 272 set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
273 } 273 }
@@ -289,7 +289,7 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
289 if (in_interrupt()) 289 if (in_interrupt())
290 return PARAVIRT_LAZY_NONE; 290 return PARAVIRT_LAZY_NONE;
291 291
292 return percpu_read(paravirt_lazy_mode); 292 return this_cpu_read(paravirt_lazy_mode);
293} 293}
294 294
295void arch_flush_lazy_mmu_mode(void) 295void arch_flush_lazy_mmu_mode(void)
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index d0b2fb9ccbb1..b72838bae64a 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1480,8 +1480,9 @@ cleanup:
1480static int __init calgary_parse_options(char *p) 1480static int __init calgary_parse_options(char *p)
1481{ 1481{
1482 unsigned int bridge; 1482 unsigned int bridge;
1483 unsigned long val;
1483 size_t len; 1484 size_t len;
1484 char* endp; 1485 ssize_t ret;
1485 1486
1486 while (*p) { 1487 while (*p) {
1487 if (!strncmp(p, "64k", 3)) 1488 if (!strncmp(p, "64k", 3))
@@ -1512,10 +1513,11 @@ static int __init calgary_parse_options(char *p)
1512 ++p; 1513 ++p;
1513 if (*p == '\0') 1514 if (*p == '\0')
1514 break; 1515 break;
1515 bridge = simple_strtoul(p, &endp, 0); 1516 ret = kstrtoul(p, 0, &val);
1516 if (p == endp) 1517 if (ret)
1517 break; 1518 break;
1518 1519
1520 bridge = val;
1519 if (bridge < MAX_PHB_BUS_NUM) { 1521 if (bridge < MAX_PHB_BUS_NUM) {
1520 printk(KERN_INFO "Calgary: disabling " 1522 printk(KERN_INFO "Calgary: disabling "
1521 "translation for PHB %#x\n", bridge); 1523 "translation for PHB %#x\n", bridge);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 1d92a5ab6e8b..735279e54e59 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -27,6 +27,15 @@
27#include <asm/debugreg.h> 27#include <asm/debugreg.h>
28#include <asm/nmi.h> 28#include <asm/nmi.h>
29 29
30/*
31 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
32 * no more per-task TSS's. The TSS size is kept cacheline-aligned
33 * so they are allowed to end up in the .data..cacheline_aligned
34 * section. Since TSS's are completely CPU-local, we want them
35 * on exact cacheline boundaries, to eliminate cacheline ping-pong.
36 */
37DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
38
30#ifdef CONFIG_X86_64 39#ifdef CONFIG_X86_64
31static DEFINE_PER_CPU(unsigned char, is_idle); 40static DEFINE_PER_CPU(unsigned char, is_idle);
32static ATOMIC_NOTIFIER_HEAD(idle_notifier); 41static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -47,10 +56,16 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
47struct kmem_cache *task_xstate_cachep; 56struct kmem_cache *task_xstate_cachep;
48EXPORT_SYMBOL_GPL(task_xstate_cachep); 57EXPORT_SYMBOL_GPL(task_xstate_cachep);
49 58
59/*
60 * this gets called so that we can store lazy state into memory and copy the
61 * current task into the new thread.
62 */
50int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 63int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
51{ 64{
52 int ret; 65 int ret;
53 66
67 unlazy_fpu(src);
68
54 *dst = *src; 69 *dst = *src;
55 if (fpu_allocated(&src->thread.fpu)) { 70 if (fpu_allocated(&src->thread.fpu)) {
56 memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); 71 memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
@@ -67,10 +82,9 @@ void free_thread_xstate(struct task_struct *tsk)
67 fpu_free(&tsk->thread.fpu); 82 fpu_free(&tsk->thread.fpu);
68} 83}
69 84
70void free_thread_info(struct thread_info *ti) 85void arch_release_task_struct(struct task_struct *tsk)
71{ 86{
72 free_thread_xstate(ti->task); 87 free_thread_xstate(tsk);
73 free_pages((unsigned long)ti, THREAD_ORDER);
74} 88}
75 89
76void arch_task_cache_init(void) 90void arch_task_cache_init(void)
@@ -81,6 +95,16 @@ void arch_task_cache_init(void)
81 SLAB_PANIC | SLAB_NOTRACK, NULL); 95 SLAB_PANIC | SLAB_NOTRACK, NULL);
82} 96}
83 97
98static inline void drop_fpu(struct task_struct *tsk)
99{
100 /*
101 * Forget coprocessor state..
102 */
103 tsk->fpu_counter = 0;
104 clear_fpu(tsk);
105 clear_used_math();
106}
107
84/* 108/*
85 * Free current thread data structures etc.. 109 * Free current thread data structures etc..
86 */ 110 */
@@ -103,12 +127,8 @@ void exit_thread(void)
103 put_cpu(); 127 put_cpu();
104 kfree(bp); 128 kfree(bp);
105 } 129 }
106}
107 130
108void show_regs(struct pt_regs *regs) 131 drop_fpu(me);
109{
110 show_registers(regs);
111 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 0);
112} 132}
113 133
114void show_regs_common(void) 134void show_regs_common(void)
@@ -143,12 +163,7 @@ void flush_thread(void)
143 163
144 flush_ptrace_hw_breakpoint(tsk); 164 flush_ptrace_hw_breakpoint(tsk);
145 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 165 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
146 /* 166 drop_fpu(tsk);
147 * Forget coprocessor state..
148 */
149 tsk->fpu_counter = 0;
150 clear_fpu(tsk);
151 clear_used_math();
152} 167}
153 168
154static void hard_disable_TSC(void) 169static void hard_disable_TSC(void)
@@ -377,7 +392,7 @@ static inline void play_dead(void)
377#ifdef CONFIG_X86_64 392#ifdef CONFIG_X86_64
378void enter_idle(void) 393void enter_idle(void)
379{ 394{
380 percpu_write(is_idle, 1); 395 this_cpu_write(is_idle, 1);
381 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 396 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
382} 397}
383 398
@@ -516,26 +531,6 @@ void stop_this_cpu(void *dummy)
516 } 531 }
517} 532}
518 533
519static void do_nothing(void *unused)
520{
521}
522
523/*
524 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
525 * pm_idle and update to new pm_idle value. Required while changing pm_idle
526 * handler on SMP systems.
527 *
528 * Caller must have changed pm_idle to the new value before the call. Old
529 * pm_idle value will not be used by any CPU after the return of this function.
530 */
531void cpu_idle_wait(void)
532{
533 smp_mb();
534 /* kick all the CPUs so that they exit out of pm_idle */
535 smp_call_function(do_nothing, NULL, 1);
536}
537EXPORT_SYMBOL_GPL(cpu_idle_wait);
538
539/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 534/* Default MONITOR/MWAIT with no hints, used for default C1 state */
540static void mwait_idle(void) 535static void mwait_idle(void)
541{ 536{
@@ -594,9 +589,17 @@ int mwait_usable(const struct cpuinfo_x86 *c)
594{ 589{
595 u32 eax, ebx, ecx, edx; 590 u32 eax, ebx, ecx, edx;
596 591
592 /* Use mwait if idle=mwait boot option is given */
597 if (boot_option_idle_override == IDLE_FORCE_MWAIT) 593 if (boot_option_idle_override == IDLE_FORCE_MWAIT)
598 return 1; 594 return 1;
599 595
596 /*
597 * Any idle= boot option other than idle=mwait means that we must not
598 * use mwait. Eg: idle=halt or idle=poll or idle=nomwait
599 */
600 if (boot_option_idle_override != IDLE_NO_OVERRIDE)
601 return 0;
602
600 if (c->cpuid_level < MWAIT_INFO) 603 if (c->cpuid_level < MWAIT_INFO)
601 return 0; 604 return 0;
602 605
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index ae6847303e26..516fa186121b 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -126,15 +126,6 @@ void release_thread(struct task_struct *dead_task)
126 release_vm86_irqs(dead_task); 126 release_vm86_irqs(dead_task);
127} 127}
128 128
129/*
130 * This gets called before we allocate a new thread and copy
131 * the current task into it.
132 */
133void prepare_to_copy(struct task_struct *tsk)
134{
135 unlazy_fpu(tsk);
136}
137
138int copy_thread(unsigned long clone_flags, unsigned long sp, 129int copy_thread(unsigned long clone_flags, unsigned long sp,
139 unsigned long unused, 130 unsigned long unused,
140 struct task_struct *p, struct pt_regs *regs) 131 struct task_struct *p, struct pt_regs *regs)
@@ -302,7 +293,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
302 293
303 switch_fpu_finish(next_p, fpu); 294 switch_fpu_finish(next_p, fpu);
304 295
305 percpu_write(current_task, next_p); 296 this_cpu_write(current_task, next_p);
306 297
307 return prev_p; 298 return prev_p;
308} 299}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 733ca39f367e..61cdf7fdf099 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -145,15 +145,6 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
145 return get_desc_base(&t->thread.tls_array[tls]); 145 return get_desc_base(&t->thread.tls_array[tls]);
146} 146}
147 147
148/*
149 * This gets called before we allocate a new thread and copy
150 * the current task into it.
151 */
152void prepare_to_copy(struct task_struct *tsk)
153{
154 unlazy_fpu(tsk);
155}
156
157int copy_thread(unsigned long clone_flags, unsigned long sp, 148int copy_thread(unsigned long clone_flags, unsigned long sp,
158 unsigned long unused, 149 unsigned long unused,
159 struct task_struct *p, struct pt_regs *regs) 150 struct task_struct *p, struct pt_regs *regs)
@@ -237,7 +228,7 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
237 current->thread.usersp = new_sp; 228 current->thread.usersp = new_sp;
238 regs->ip = new_ip; 229 regs->ip = new_ip;
239 regs->sp = new_sp; 230 regs->sp = new_sp;
240 percpu_write(old_rsp, new_sp); 231 this_cpu_write(old_rsp, new_sp);
241 regs->cs = _cs; 232 regs->cs = _cs;
242 regs->ss = _ss; 233 regs->ss = _ss;
243 regs->flags = X86_EFLAGS_IF; 234 regs->flags = X86_EFLAGS_IF;
@@ -359,11 +350,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
359 /* 350 /*
360 * Switch the PDA and FPU contexts. 351 * Switch the PDA and FPU contexts.
361 */ 352 */
362 prev->usersp = percpu_read(old_rsp); 353 prev->usersp = this_cpu_read(old_rsp);
363 percpu_write(old_rsp, next->usersp); 354 this_cpu_write(old_rsp, next->usersp);
364 percpu_write(current_task, next_p); 355 this_cpu_write(current_task, next_p);
365 356
366 percpu_write(kernel_stack, 357 this_cpu_write(kernel_stack,
367 (unsigned long)task_stack_page(next_p) + 358 (unsigned long)task_stack_page(next_p) +
368 THREAD_SIZE - KERNEL_STACK_OFFSET); 359 THREAD_SIZE - KERNEL_STACK_OFFSET);
369 360
@@ -423,6 +414,7 @@ void set_personality_ia32(bool x32)
423 current_thread_info()->status |= TS_COMPAT; 414 current_thread_info()->status |= TS_COMPAT;
424 } 415 }
425} 416}
417EXPORT_SYMBOL_GPL(set_personality_ia32);
426 418
427unsigned long get_wchan(struct task_struct *p) 419unsigned long get_wchan(struct task_struct *p)
428{ 420{
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 685845cf16e0..13b1990c7c58 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1480,7 +1480,11 @@ long syscall_trace_enter(struct pt_regs *regs)
1480 regs->flags |= X86_EFLAGS_TF; 1480 regs->flags |= X86_EFLAGS_TF;
1481 1481
1482 /* do the secure computing check first */ 1482 /* do the secure computing check first */
1483 secure_computing(regs->orig_ax); 1483 if (secure_computing(regs->orig_ax)) {
1484 /* seccomp failures shouldn't expose any additional code. */
1485 ret = -1L;
1486 goto out;
1487 }
1484 1488
1485 if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) 1489 if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
1486 ret = -1L; 1490 ret = -1L;
@@ -1505,6 +1509,7 @@ long syscall_trace_enter(struct pt_regs *regs)
1505 regs->dx, regs->r10); 1509 regs->dx, regs->r10);
1506#endif 1510#endif
1507 1511
1512out:
1508 return ret ?: regs->orig_ax; 1513 return ret ?: regs->orig_ax;
1509} 1514}
1510 1515
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1a2901562059..9b4204e06665 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -393,10 +393,9 @@ static void __init reserve_initrd(void)
393 initrd_start = 0; 393 initrd_start = 0;
394 394
395 if (ramdisk_size >= (end_of_lowmem>>1)) { 395 if (ramdisk_size >= (end_of_lowmem>>1)) {
396 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); 396 panic("initrd too large to handle, "
397 printk(KERN_ERR "initrd too large to handle, " 397 "disabling initrd (%lld needed, %lld available)\n",
398 "disabling initrd\n"); 398 ramdisk_size, end_of_lowmem>>1);
399 return;
400 } 399 }
401 400
402 printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image, 401 printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image,
@@ -1012,7 +1011,8 @@ void __init setup_arch(char **cmdline_p)
1012 init_cpu_to_node(); 1011 init_cpu_to_node();
1013 1012
1014 init_apic_mappings(); 1013 init_apic_mappings();
1015 ioapic_and_gsi_init(); 1014 if (x86_io_apic_ops.init)
1015 x86_io_apic_ops.init();
1016 1016
1017 kvm_guest_init(); 1017 kvm_guest_init();
1018 1018
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 71f4727da373..5a98aa272184 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -185,10 +185,22 @@ void __init setup_per_cpu_areas(void)
185#endif 185#endif
186 rc = -EINVAL; 186 rc = -EINVAL;
187 if (pcpu_chosen_fc != PCPU_FC_PAGE) { 187 if (pcpu_chosen_fc != PCPU_FC_PAGE) {
188 const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE;
189 const size_t dyn_size = PERCPU_MODULE_RESERVE + 188 const size_t dyn_size = PERCPU_MODULE_RESERVE +
190 PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; 189 PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE;
190 size_t atom_size;
191 191
192 /*
193 * On 64bit, use PMD_SIZE for atom_size so that embedded
194 * percpu areas are aligned to PMD. This, in the future,
195 * can also allow using PMD mappings in vmalloc area. Use
196 * PAGE_SIZE on 32bit as vmalloc space is highly contended
197 * and large vmalloc area allocs can easily fail.
198 */
199#ifdef CONFIG_X86_64
200 atom_size = PMD_SIZE;
201#else
202 atom_size = PAGE_SIZE;
203#endif
192 rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, 204 rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
193 dyn_size, atom_size, 205 dyn_size, atom_size,
194 pcpu_cpu_distance, 206 pcpu_cpu_distance,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6e1e406038c2..433529e29be4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -76,20 +76,8 @@
76/* State of each CPU */ 76/* State of each CPU */
77DEFINE_PER_CPU(int, cpu_state) = { 0 }; 77DEFINE_PER_CPU(int, cpu_state) = { 0 };
78 78
79/* Store all idle threads, this can be reused instead of creating
80* a new thread. Also avoids complicated thread destroy functionality
81* for idle threads.
82*/
83#ifdef CONFIG_HOTPLUG_CPU 79#ifdef CONFIG_HOTPLUG_CPU
84/* 80/*
85 * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is
86 * removed after init for !CONFIG_HOTPLUG_CPU.
87 */
88static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
89#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
90#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
91
92/*
93 * We need this for trampoline_base protection from concurrent accesses when 81 * We need this for trampoline_base protection from concurrent accesses when
94 * off- and onlining cores wildly. 82 * off- and onlining cores wildly.
95 */ 83 */
@@ -97,20 +85,16 @@ static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
97 85
98void cpu_hotplug_driver_lock(void) 86void cpu_hotplug_driver_lock(void)
99{ 87{
100 mutex_lock(&x86_cpu_hotplug_driver_mutex); 88 mutex_lock(&x86_cpu_hotplug_driver_mutex);
101} 89}
102 90
103void cpu_hotplug_driver_unlock(void) 91void cpu_hotplug_driver_unlock(void)
104{ 92{
105 mutex_unlock(&x86_cpu_hotplug_driver_mutex); 93 mutex_unlock(&x86_cpu_hotplug_driver_mutex);
106} 94}
107 95
108ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; } 96ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
109ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; } 97ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
110#else
111static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
112#define get_idle_for_cpu(x) (idle_thread_array[(x)])
113#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
114#endif 98#endif
115 99
116/* Number of siblings per CPU package */ 100/* Number of siblings per CPU package */
@@ -315,59 +299,90 @@ void __cpuinit smp_store_cpu_info(int id)
315 identify_secondary_cpu(c); 299 identify_secondary_cpu(c);
316} 300}
317 301
318static void __cpuinit link_thread_siblings(int cpu1, int cpu2) 302static bool __cpuinit
303topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
319{ 304{
320 cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); 305 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
321 cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1)); 306
322 cpumask_set_cpu(cpu1, cpu_core_mask(cpu2)); 307 return !WARN_ONCE(cpu_to_node(cpu1) != cpu_to_node(cpu2),
323 cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); 308 "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
324 cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2)); 309 "[node: %d != %d]. Ignoring dependency.\n",
325 cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1)); 310 cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
326} 311}
327 312
313#define link_mask(_m, c1, c2) \
314do { \
315 cpumask_set_cpu((c1), cpu_##_m##_mask(c2)); \
316 cpumask_set_cpu((c2), cpu_##_m##_mask(c1)); \
317} while (0)
318
319static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
320{
321 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
322 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
323
324 if (c->phys_proc_id == o->phys_proc_id &&
325 per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) &&
326 c->compute_unit_id == o->compute_unit_id)
327 return topology_sane(c, o, "smt");
328
329 } else if (c->phys_proc_id == o->phys_proc_id &&
330 c->cpu_core_id == o->cpu_core_id) {
331 return topology_sane(c, o, "smt");
332 }
333
334 return false;
335}
336
337static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
338{
339 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
340
341 if (per_cpu(cpu_llc_id, cpu1) != BAD_APICID &&
342 per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2))
343 return topology_sane(c, o, "llc");
344
345 return false;
346}
347
348static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
349{
350 if (c->phys_proc_id == o->phys_proc_id)
351 return topology_sane(c, o, "mc");
352
353 return false;
354}
328 355
329void __cpuinit set_cpu_sibling_map(int cpu) 356void __cpuinit set_cpu_sibling_map(int cpu)
330{ 357{
331 int i; 358 bool has_mc = boot_cpu_data.x86_max_cores > 1;
359 bool has_smt = smp_num_siblings > 1;
332 struct cpuinfo_x86 *c = &cpu_data(cpu); 360 struct cpuinfo_x86 *c = &cpu_data(cpu);
361 struct cpuinfo_x86 *o;
362 int i;
333 363
334 cpumask_set_cpu(cpu, cpu_sibling_setup_mask); 364 cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
335 365
336 if (smp_num_siblings > 1) { 366 if (!has_smt && !has_mc) {
337 for_each_cpu(i, cpu_sibling_setup_mask) {
338 struct cpuinfo_x86 *o = &cpu_data(i);
339
340 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
341 if (c->phys_proc_id == o->phys_proc_id &&
342 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) &&
343 c->compute_unit_id == o->compute_unit_id)
344 link_thread_siblings(cpu, i);
345 } else if (c->phys_proc_id == o->phys_proc_id &&
346 c->cpu_core_id == o->cpu_core_id) {
347 link_thread_siblings(cpu, i);
348 }
349 }
350 } else {
351 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); 367 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
352 } 368 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
353 369 cpumask_set_cpu(cpu, cpu_core_mask(cpu));
354 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
355
356 if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
357 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
358 c->booted_cores = 1; 370 c->booted_cores = 1;
359 return; 371 return;
360 } 372 }
361 373
362 for_each_cpu(i, cpu_sibling_setup_mask) { 374 for_each_cpu(i, cpu_sibling_setup_mask) {
363 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && 375 o = &cpu_data(i);
364 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { 376
365 cpumask_set_cpu(i, cpu_llc_shared_mask(cpu)); 377 if ((i == cpu) || (has_smt && match_smt(c, o)))
366 cpumask_set_cpu(cpu, cpu_llc_shared_mask(i)); 378 link_mask(sibling, cpu, i);
367 } 379
368 if (c->phys_proc_id == cpu_data(i).phys_proc_id) { 380 if ((i == cpu) || (has_mc && match_llc(c, o)))
369 cpumask_set_cpu(i, cpu_core_mask(cpu)); 381 link_mask(llc_shared, cpu, i);
370 cpumask_set_cpu(cpu, cpu_core_mask(i)); 382
383 if ((i == cpu) || (has_mc && match_mc(c, o))) {
384 link_mask(core, cpu, i);
385
371 /* 386 /*
372 * Does this new cpu bringup a new core? 387 * Does this new cpu bringup a new core?
373 */ 388 */
@@ -398,8 +413,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
398 * For perf, we return last level cache shared map. 413 * For perf, we return last level cache shared map.
399 * And for power savings, we return cpu_core_map 414 * And for power savings, we return cpu_core_map
400 */ 415 */
401 if ((sched_mc_power_savings || sched_smt_power_savings) && 416 if (!(cpu_has(c, X86_FEATURE_AMD_DCM)))
402 !(cpu_has(c, X86_FEATURE_AMD_DCM)))
403 return cpu_core_mask(cpu); 417 return cpu_core_mask(cpu);
404 else 418 else
405 return cpu_llc_shared_mask(cpu); 419 return cpu_llc_shared_mask(cpu);
@@ -618,22 +632,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
618 return (send_status | accept_status); 632 return (send_status | accept_status);
619} 633}
620 634
621struct create_idle {
622 struct work_struct work;
623 struct task_struct *idle;
624 struct completion done;
625 int cpu;
626};
627
628static void __cpuinit do_fork_idle(struct work_struct *work)
629{
630 struct create_idle *c_idle =
631 container_of(work, struct create_idle, work);
632
633 c_idle->idle = fork_idle(c_idle->cpu);
634 complete(&c_idle->done);
635}
636
637/* reduce the number of lines printed when booting a large cpu count system */ 635/* reduce the number of lines printed when booting a large cpu count system */
638static void __cpuinit announce_cpu(int cpu, int apicid) 636static void __cpuinit announce_cpu(int cpu, int apicid)
639{ 637{
@@ -660,58 +658,31 @@ static void __cpuinit announce_cpu(int cpu, int apicid)
660 * Returns zero if CPU booted OK, else error code from 658 * Returns zero if CPU booted OK, else error code from
661 * ->wakeup_secondary_cpu. 659 * ->wakeup_secondary_cpu.
662 */ 660 */
663static int __cpuinit do_boot_cpu(int apicid, int cpu) 661static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
664{ 662{
665 unsigned long boot_error = 0; 663 unsigned long boot_error = 0;
666 unsigned long start_ip; 664 unsigned long start_ip;
667 int timeout; 665 int timeout;
668 struct create_idle c_idle = {
669 .cpu = cpu,
670 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
671 };
672
673 INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
674 666
675 alternatives_smp_switch(1); 667 alternatives_smp_switch(1);
676 668
677 c_idle.idle = get_idle_for_cpu(cpu); 669 idle->thread.sp = (unsigned long) (((struct pt_regs *)
678 670 (THREAD_SIZE + task_stack_page(idle))) - 1);
679 /* 671 per_cpu(current_task, cpu) = idle;
680 * We can't use kernel_thread since we must avoid to
681 * reschedule the child.
682 */
683 if (c_idle.idle) {
684 c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *)
685 (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1);
686 init_idle(c_idle.idle, cpu);
687 goto do_rest;
688 }
689
690 schedule_work(&c_idle.work);
691 wait_for_completion(&c_idle.done);
692 672
693 if (IS_ERR(c_idle.idle)) {
694 printk("failed fork for CPU %d\n", cpu);
695 destroy_work_on_stack(&c_idle.work);
696 return PTR_ERR(c_idle.idle);
697 }
698
699 set_idle_for_cpu(cpu, c_idle.idle);
700do_rest:
701 per_cpu(current_task, cpu) = c_idle.idle;
702#ifdef CONFIG_X86_32 673#ifdef CONFIG_X86_32
703 /* Stack for startup_32 can be just as for start_secondary onwards */ 674 /* Stack for startup_32 can be just as for start_secondary onwards */
704 irq_ctx_init(cpu); 675 irq_ctx_init(cpu);
705#else 676#else
706 clear_tsk_thread_flag(c_idle.idle, TIF_FORK); 677 clear_tsk_thread_flag(idle, TIF_FORK);
707 initial_gs = per_cpu_offset(cpu); 678 initial_gs = per_cpu_offset(cpu);
708 per_cpu(kernel_stack, cpu) = 679 per_cpu(kernel_stack, cpu) =
709 (unsigned long)task_stack_page(c_idle.idle) - 680 (unsigned long)task_stack_page(idle) -
710 KERNEL_STACK_OFFSET + THREAD_SIZE; 681 KERNEL_STACK_OFFSET + THREAD_SIZE;
711#endif 682#endif
712 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 683 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
713 initial_code = (unsigned long)start_secondary; 684 initial_code = (unsigned long)start_secondary;
714 stack_start = c_idle.idle->thread.sp; 685 stack_start = idle->thread.sp;
715 686
716 /* start_ip had better be page-aligned! */ 687 /* start_ip had better be page-aligned! */
717 start_ip = trampoline_address(); 688 start_ip = trampoline_address();
@@ -813,12 +784,10 @@ do_rest:
813 */ 784 */
814 smpboot_restore_warm_reset_vector(); 785 smpboot_restore_warm_reset_vector();
815 } 786 }
816
817 destroy_work_on_stack(&c_idle.work);
818 return boot_error; 787 return boot_error;
819} 788}
820 789
821int __cpuinit native_cpu_up(unsigned int cpu) 790int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
822{ 791{
823 int apicid = apic->cpu_present_to_apicid(cpu); 792 int apicid = apic->cpu_present_to_apicid(cpu);
824 unsigned long flags; 793 unsigned long flags;
@@ -851,7 +820,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
851 820
852 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 821 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
853 822
854 err = do_boot_cpu(apicid, cpu); 823 err = do_boot_cpu(apicid, cpu, tidle);
855 if (err) { 824 if (err) {
856 pr_debug("do_boot_cpu failed %d\n", err); 825 pr_debug("do_boot_cpu failed %d\n", err);
857 return -EIO; 826 return -EIO;
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
index c29e235792af..b79133abda48 100644
--- a/arch/x86/kernel/test_rodata.c
+++ b/arch/x86/kernel/test_rodata.c
@@ -12,6 +12,7 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <asm/cacheflush.h> 13#include <asm/cacheflush.h>
14#include <asm/sections.h> 14#include <asm/sections.h>
15#include <asm/asm.h>
15 16
16int rodata_test(void) 17int rodata_test(void)
17{ 18{
@@ -42,14 +43,7 @@ int rodata_test(void)
42 ".section .fixup,\"ax\"\n" 43 ".section .fixup,\"ax\"\n"
43 "2: jmp 1b\n" 44 "2: jmp 1b\n"
44 ".previous\n" 45 ".previous\n"
45 ".section __ex_table,\"a\"\n" 46 _ASM_EXTABLE(0b,2b)
46 " .align 16\n"
47#ifdef CONFIG_X86_32
48 " .long 0b,2b\n"
49#else
50 " .quad 0b,2b\n"
51#endif
52 ".previous"
53 : [rslt] "=r" (result) 47 : [rslt] "=r" (result)
54 : [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL) 48 : [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL)
55 ); 49 );
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ff9281f16029..92d5756d85fc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -50,6 +50,7 @@
50#include <asm/processor.h> 50#include <asm/processor.h>
51#include <asm/debugreg.h> 51#include <asm/debugreg.h>
52#include <linux/atomic.h> 52#include <linux/atomic.h>
53#include <asm/ftrace.h>
53#include <asm/traps.h> 54#include <asm/traps.h>
54#include <asm/desc.h> 55#include <asm/desc.h>
55#include <asm/i387.h> 56#include <asm/i387.h>
@@ -303,8 +304,13 @@ gp_in_kernel:
303} 304}
304 305
305/* May run on IST stack. */ 306/* May run on IST stack. */
306dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) 307dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
307{ 308{
309#ifdef CONFIG_DYNAMIC_FTRACE
310 /* ftrace must be first, everything else may cause a recursive crash */
311 if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs))
312 return;
313#endif
308#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 314#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
309 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 315 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
310 SIGTRAP) == NOTIFY_STOP) 316 SIGTRAP) == NOTIFY_STOP)
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index f386dc49f988..7515cf0e1805 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -216,9 +216,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
216 current_thread_info()->sig_on_uaccess_error = 1; 216 current_thread_info()->sig_on_uaccess_error = 1;
217 217
218 /* 218 /*
219 * 0 is a valid user pointer (in the access_ok sense) on 32-bit and 219 * NULL is a valid user pointer (in the access_ok sense) on 32-bit and
220 * 64-bit, so we don't need to special-case it here. For all the 220 * 64-bit, so we don't need to special-case it here. For all the
221 * vsyscalls, 0 means "don't write anything" not "write it at 221 * vsyscalls, NULL means "don't write anything" not "write it at
222 * address 0". 222 * address 0".
223 */ 223 */
224 ret = -EFAULT; 224 ret = -EFAULT;
@@ -247,7 +247,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
247 247
248 ret = sys_getcpu((unsigned __user *)regs->di, 248 ret = sys_getcpu((unsigned __user *)regs->di,
249 (unsigned __user *)regs->si, 249 (unsigned __user *)regs->si,
250 0); 250 NULL);
251 break; 251 break;
252 } 252 }
253 253
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index e9f265fd79ae..35c5e543f550 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -18,6 +18,7 @@
18#include <asm/e820.h> 18#include <asm/e820.h>
19#include <asm/time.h> 19#include <asm/time.h>
20#include <asm/irq.h> 20#include <asm/irq.h>
21#include <asm/io_apic.h>
21#include <asm/pat.h> 22#include <asm/pat.h>
22#include <asm/tsc.h> 23#include <asm/tsc.h>
23#include <asm/iommu.h> 24#include <asm/iommu.h>
@@ -93,7 +94,6 @@ struct x86_init_ops x86_init __initdata = {
93struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { 94struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
94 .early_percpu_clock_init = x86_init_noop, 95 .early_percpu_clock_init = x86_init_noop,
95 .setup_percpu_clockev = setup_secondary_APIC_clock, 96 .setup_percpu_clockev = setup_secondary_APIC_clock,
96 .fixup_cpu_id = x86_default_fixup_cpu_id,
97}; 97};
98 98
99static void default_nmi_init(void) { }; 99static void default_nmi_init(void) { };
@@ -120,3 +120,10 @@ struct x86_msi_ops x86_msi = {
120 .teardown_msi_irqs = default_teardown_msi_irqs, 120 .teardown_msi_irqs = default_teardown_msi_irqs,
121 .restore_msi_irqs = default_restore_msi_irqs, 121 .restore_msi_irqs = default_restore_msi_irqs,
122}; 122};
123
124struct x86_io_apic_ops x86_io_apic_ops = {
125 .init = native_io_apic_init_mappings,
126 .read = native_io_apic_read,
127 .write = native_io_apic_write,
128 .modify = native_io_apic_modify,
129};
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index e62728e30b01..bd18149b2b0f 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -48,8 +48,6 @@ void __sanitize_i387_state(struct task_struct *tsk)
48 if (!fx) 48 if (!fx)
49 return; 49 return;
50 50
51 BUG_ON(__thread_has_fpu(tsk));
52
53 xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; 51 xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
54 52
55 /* 53 /*
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 173df38dbda5..2e88438ffd83 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -459,17 +459,17 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
459 pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1); 459 pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1);
460 460
461 if (pmu->version == 1) { 461 if (pmu->version == 1) {
462 pmu->global_ctrl = (1 << pmu->nr_arch_gp_counters) - 1; 462 pmu->nr_arch_fixed_counters = 0;
463 return; 463 } else {
464 pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f),
465 X86_PMC_MAX_FIXED);
466 pmu->counter_bitmask[KVM_PMC_FIXED] =
467 ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;
464 } 468 }
465 469
466 pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), 470 pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
467 X86_PMC_MAX_FIXED); 471 (((1ull << pmu->nr_arch_fixed_counters) - 1) << X86_PMC_IDX_FIXED);
468 pmu->counter_bitmask[KVM_PMC_FIXED] = 472 pmu->global_ctrl_mask = ~pmu->global_ctrl;
469 ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;
470 pmu->global_ctrl_mask = ~(((1 << pmu->nr_arch_gp_counters) - 1)
471 | (((1ull << pmu->nr_arch_fixed_counters) - 1)
472 << X86_PMC_IDX_FIXED));
473} 473}
474 474
475void kvm_pmu_init(struct kvm_vcpu *vcpu) 475void kvm_pmu_init(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ad85adfef843..4ff0ab9bc3c8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2210,9 +2210,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
2210 msr = find_msr_entry(vmx, msr_index); 2210 msr = find_msr_entry(vmx, msr_index);
2211 if (msr) { 2211 if (msr) {
2212 msr->data = data; 2212 msr->data = data;
2213 if (msr - vmx->guest_msrs < vmx->save_nmsrs) 2213 if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
2214 preempt_disable();
2214 kvm_set_shared_msr(msr->index, msr->data, 2215 kvm_set_shared_msr(msr->index, msr->data,
2215 msr->mask); 2216 msr->mask);
2217 preempt_enable();
2218 }
2216 break; 2219 break;
2217 } 2220 }
2218 ret = kvm_set_msr_common(vcpu, msr_index, data); 2221 ret = kvm_set_msr_common(vcpu, msr_index, data);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4044ce0bf7c1..185a2b823a2d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6336,13 +6336,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
6336 if (npages && !old.rmap) { 6336 if (npages && !old.rmap) {
6337 unsigned long userspace_addr; 6337 unsigned long userspace_addr;
6338 6338
6339 down_write(&current->mm->mmap_sem); 6339 userspace_addr = vm_mmap(NULL, 0,
6340 userspace_addr = do_mmap(NULL, 0,
6341 npages * PAGE_SIZE, 6340 npages * PAGE_SIZE,
6342 PROT_READ | PROT_WRITE, 6341 PROT_READ | PROT_WRITE,
6343 map_flags, 6342 map_flags,
6344 0); 6343 0);
6345 up_write(&current->mm->mmap_sem);
6346 6344
6347 if (IS_ERR((void *)userspace_addr)) 6345 if (IS_ERR((void *)userspace_addr))
6348 return PTR_ERR((void *)userspace_addr); 6346 return PTR_ERR((void *)userspace_addr);
@@ -6366,10 +6364,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
6366 if (!user_alloc && !old.user_alloc && old.rmap && !npages) { 6364 if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
6367 int ret; 6365 int ret;
6368 6366
6369 down_write(&current->mm->mmap_sem); 6367 ret = vm_munmap(old.userspace_addr,
6370 ret = do_munmap(current->mm, old.userspace_addr,
6371 old.npages * PAGE_SIZE); 6368 old.npages * PAGE_SIZE);
6372 up_write(&current->mm->mmap_sem);
6373 if (ret < 0) 6369 if (ret < 0)
6374 printk(KERN_WARNING 6370 printk(KERN_WARNING
6375 "kvm_vm_ioctl_set_memory_region: " 6371 "kvm_vm_ioctl_set_memory_region: "
@@ -6585,6 +6581,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
6585 kvm_inject_page_fault(vcpu, &fault); 6581 kvm_inject_page_fault(vcpu, &fault);
6586 } 6582 }
6587 vcpu->arch.apf.halted = false; 6583 vcpu->arch.apf.halted = false;
6584 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
6588} 6585}
6589 6586
6590bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) 6587bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 78d16a554db0..2af5df3ade7c 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -28,6 +28,7 @@
28#include <linux/linkage.h> 28#include <linux/linkage.h>
29#include <asm/dwarf2.h> 29#include <asm/dwarf2.h>
30#include <asm/errno.h> 30#include <asm/errno.h>
31#include <asm/asm.h>
31 32
32/* 33/*
33 * computes a partial checksum, e.g. for TCP/UDP fragments 34 * computes a partial checksum, e.g. for TCP/UDP fragments
@@ -282,15 +283,11 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
282 283
283#define SRC(y...) \ 284#define SRC(y...) \
284 9999: y; \ 285 9999: y; \
285 .section __ex_table, "a"; \ 286 _ASM_EXTABLE(9999b, 6001f)
286 .long 9999b, 6001f ; \
287 .previous
288 287
289#define DST(y...) \ 288#define DST(y...) \
290 9999: y; \ 289 9999: y; \
291 .section __ex_table, "a"; \ 290 _ASM_EXTABLE(9999b, 6002f)
292 .long 9999b, 6002f ; \
293 .previous
294 291
295#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 292#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
296 293
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 024840266ba0..5b2995f4557a 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -16,6 +16,7 @@
16#include <asm/thread_info.h> 16#include <asm/thread_info.h>
17#include <asm/cpufeature.h> 17#include <asm/cpufeature.h>
18#include <asm/alternative-asm.h> 18#include <asm/alternative-asm.h>
19#include <asm/asm.h>
19 20
20/* 21/*
21 * By placing feature2 after feature1 in altinstructions section, we logically 22 * By placing feature2 after feature1 in altinstructions section, we logically
@@ -63,11 +64,8 @@
63 jmp copy_user_handle_tail 64 jmp copy_user_handle_tail
64 .previous 65 .previous
65 66
66 .section __ex_table,"a" 67 _ASM_EXTABLE(100b,103b)
67 .align 8 68 _ASM_EXTABLE(101b,103b)
68 .quad 100b,103b
69 .quad 101b,103b
70 .previous
71#endif 69#endif
72 .endm 70 .endm
73 71
@@ -191,29 +189,26 @@ ENTRY(copy_user_generic_unrolled)
19160: jmp copy_user_handle_tail /* ecx is zerorest also */ 18960: jmp copy_user_handle_tail /* ecx is zerorest also */
192 .previous 190 .previous
193 191
194 .section __ex_table,"a" 192 _ASM_EXTABLE(1b,30b)
195 .align 8 193 _ASM_EXTABLE(2b,30b)
196 .quad 1b,30b 194 _ASM_EXTABLE(3b,30b)
197 .quad 2b,30b 195 _ASM_EXTABLE(4b,30b)
198 .quad 3b,30b 196 _ASM_EXTABLE(5b,30b)
199 .quad 4b,30b 197 _ASM_EXTABLE(6b,30b)
200 .quad 5b,30b 198 _ASM_EXTABLE(7b,30b)
201 .quad 6b,30b 199 _ASM_EXTABLE(8b,30b)
202 .quad 7b,30b 200 _ASM_EXTABLE(9b,30b)
203 .quad 8b,30b 201 _ASM_EXTABLE(10b,30b)
204 .quad 9b,30b 202 _ASM_EXTABLE(11b,30b)
205 .quad 10b,30b 203 _ASM_EXTABLE(12b,30b)
206 .quad 11b,30b 204 _ASM_EXTABLE(13b,30b)
207 .quad 12b,30b 205 _ASM_EXTABLE(14b,30b)
208 .quad 13b,30b 206 _ASM_EXTABLE(15b,30b)
209 .quad 14b,30b 207 _ASM_EXTABLE(16b,30b)
210 .quad 15b,30b 208 _ASM_EXTABLE(18b,40b)
211 .quad 16b,30b 209 _ASM_EXTABLE(19b,40b)
212 .quad 18b,40b 210 _ASM_EXTABLE(21b,50b)
213 .quad 19b,40b 211 _ASM_EXTABLE(22b,50b)
214 .quad 21b,50b
215 .quad 22b,50b
216 .previous
217 CFI_ENDPROC 212 CFI_ENDPROC
218ENDPROC(copy_user_generic_unrolled) 213ENDPROC(copy_user_generic_unrolled)
219 214
@@ -259,11 +254,8 @@ ENTRY(copy_user_generic_string)
259 jmp copy_user_handle_tail 254 jmp copy_user_handle_tail
260 .previous 255 .previous
261 256
262 .section __ex_table,"a" 257 _ASM_EXTABLE(1b,11b)
263 .align 8 258 _ASM_EXTABLE(3b,12b)
264 .quad 1b,11b
265 .quad 3b,12b
266 .previous
267 CFI_ENDPROC 259 CFI_ENDPROC
268ENDPROC(copy_user_generic_string) 260ENDPROC(copy_user_generic_string)
269 261
@@ -294,9 +286,6 @@ ENTRY(copy_user_enhanced_fast_string)
294 jmp copy_user_handle_tail 286 jmp copy_user_handle_tail
295 .previous 287 .previous
296 288
297 .section __ex_table,"a" 289 _ASM_EXTABLE(1b,12b)
298 .align 8
299 .quad 1b,12b
300 .previous
301 CFI_ENDPROC 290 CFI_ENDPROC
302ENDPROC(copy_user_enhanced_fast_string) 291ENDPROC(copy_user_enhanced_fast_string)
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
index cb0c112386fb..cacddc7163eb 100644
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -14,6 +14,7 @@
14#include <asm/current.h> 14#include <asm/current.h>
15#include <asm/asm-offsets.h> 15#include <asm/asm-offsets.h>
16#include <asm/thread_info.h> 16#include <asm/thread_info.h>
17#include <asm/asm.h>
17 18
18 .macro ALIGN_DESTINATION 19 .macro ALIGN_DESTINATION
19#ifdef FIX_ALIGNMENT 20#ifdef FIX_ALIGNMENT
@@ -36,11 +37,8 @@
36 jmp copy_user_handle_tail 37 jmp copy_user_handle_tail
37 .previous 38 .previous
38 39
39 .section __ex_table,"a" 40 _ASM_EXTABLE(100b,103b)
40 .align 8 41 _ASM_EXTABLE(101b,103b)
41 .quad 100b,103b
42 .quad 101b,103b
43 .previous
44#endif 42#endif
45 .endm 43 .endm
46 44
@@ -111,27 +109,25 @@ ENTRY(__copy_user_nocache)
111 jmp copy_user_handle_tail 109 jmp copy_user_handle_tail
112 .previous 110 .previous
113 111
114 .section __ex_table,"a" 112 _ASM_EXTABLE(1b,30b)
115 .quad 1b,30b 113 _ASM_EXTABLE(2b,30b)
116 .quad 2b,30b 114 _ASM_EXTABLE(3b,30b)
117 .quad 3b,30b 115 _ASM_EXTABLE(4b,30b)
118 .quad 4b,30b 116 _ASM_EXTABLE(5b,30b)
119 .quad 5b,30b 117 _ASM_EXTABLE(6b,30b)
120 .quad 6b,30b 118 _ASM_EXTABLE(7b,30b)
121 .quad 7b,30b 119 _ASM_EXTABLE(8b,30b)
122 .quad 8b,30b 120 _ASM_EXTABLE(9b,30b)
123 .quad 9b,30b 121 _ASM_EXTABLE(10b,30b)
124 .quad 10b,30b 122 _ASM_EXTABLE(11b,30b)
125 .quad 11b,30b 123 _ASM_EXTABLE(12b,30b)
126 .quad 12b,30b 124 _ASM_EXTABLE(13b,30b)
127 .quad 13b,30b 125 _ASM_EXTABLE(14b,30b)
128 .quad 14b,30b 126 _ASM_EXTABLE(15b,30b)
129 .quad 15b,30b 127 _ASM_EXTABLE(16b,30b)
130 .quad 16b,30b 128 _ASM_EXTABLE(18b,40b)
131 .quad 18b,40b 129 _ASM_EXTABLE(19b,40b)
132 .quad 19b,40b 130 _ASM_EXTABLE(21b,50b)
133 .quad 21b,50b 131 _ASM_EXTABLE(22b,50b)
134 .quad 22b,50b
135 .previous
136 CFI_ENDPROC 132 CFI_ENDPROC
137ENDPROC(__copy_user_nocache) 133ENDPROC(__copy_user_nocache)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index fb903b758da8..2419d5fefae3 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -8,6 +8,7 @@
8#include <linux/linkage.h> 8#include <linux/linkage.h>
9#include <asm/dwarf2.h> 9#include <asm/dwarf2.h>
10#include <asm/errno.h> 10#include <asm/errno.h>
11#include <asm/asm.h>
11 12
12/* 13/*
13 * Checksum copy with exception handling. 14 * Checksum copy with exception handling.
@@ -31,26 +32,17 @@
31 32
32 .macro source 33 .macro source
3310: 3410:
34 .section __ex_table, "a" 35 _ASM_EXTABLE(10b, .Lbad_source)
35 .align 8
36 .quad 10b, .Lbad_source
37 .previous
38 .endm 36 .endm
39 37
40 .macro dest 38 .macro dest
4120: 3920:
42 .section __ex_table, "a" 40 _ASM_EXTABLE(20b, .Lbad_dest)
43 .align 8
44 .quad 20b, .Lbad_dest
45 .previous
46 .endm 41 .endm
47 42
48 .macro ignore L=.Lignore 43 .macro ignore L=.Lignore
4930: 4430:
50 .section __ex_table, "a" 45 _ASM_EXTABLE(30b, \L)
51 .align 8
52 .quad 30b, \L
53 .previous
54 .endm 46 .endm
55 47
56 48
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 51f1504cddd9..b33b1fb1e6d4 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -95,10 +95,9 @@ bad_get_user:
95 CFI_ENDPROC 95 CFI_ENDPROC
96END(bad_get_user) 96END(bad_get_user)
97 97
98.section __ex_table,"a" 98 _ASM_EXTABLE(1b,bad_get_user)
99 _ASM_PTR 1b,bad_get_user 99 _ASM_EXTABLE(2b,bad_get_user)
100 _ASM_PTR 2b,bad_get_user 100 _ASM_EXTABLE(3b,bad_get_user)
101 _ASM_PTR 3b,bad_get_user
102#ifdef CONFIG_X86_64 101#ifdef CONFIG_X86_64
103 _ASM_PTR 4b,bad_get_user 102 _ASM_EXTABLE(4b,bad_get_user)
104#endif 103#endif
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 25feb1ae71c5..b1e6c4b2e8eb 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -379,8 +379,8 @@ err_out:
379 return; 379 return;
380} 380}
381 381
382/* Decode moffset16/32/64 */ 382/* Decode moffset16/32/64. Return 0 if failed */
383static void __get_moffset(struct insn *insn) 383static int __get_moffset(struct insn *insn)
384{ 384{
385 switch (insn->addr_bytes) { 385 switch (insn->addr_bytes) {
386 case 2: 386 case 2:
@@ -397,15 +397,19 @@ static void __get_moffset(struct insn *insn)
397 insn->moffset2.value = get_next(int, insn); 397 insn->moffset2.value = get_next(int, insn);
398 insn->moffset2.nbytes = 4; 398 insn->moffset2.nbytes = 4;
399 break; 399 break;
400 default: /* opnd_bytes must be modified manually */
401 goto err_out;
400 } 402 }
401 insn->moffset1.got = insn->moffset2.got = 1; 403 insn->moffset1.got = insn->moffset2.got = 1;
402 404
405 return 1;
406
403err_out: 407err_out:
404 return; 408 return 0;
405} 409}
406 410
407/* Decode imm v32(Iz) */ 411/* Decode imm v32(Iz). Return 0 if failed */
408static void __get_immv32(struct insn *insn) 412static int __get_immv32(struct insn *insn)
409{ 413{
410 switch (insn->opnd_bytes) { 414 switch (insn->opnd_bytes) {
411 case 2: 415 case 2:
@@ -417,14 +421,18 @@ static void __get_immv32(struct insn *insn)
417 insn->immediate.value = get_next(int, insn); 421 insn->immediate.value = get_next(int, insn);
418 insn->immediate.nbytes = 4; 422 insn->immediate.nbytes = 4;
419 break; 423 break;
424 default: /* opnd_bytes must be modified manually */
425 goto err_out;
420 } 426 }
421 427
428 return 1;
429
422err_out: 430err_out:
423 return; 431 return 0;
424} 432}
425 433
426/* Decode imm v64(Iv/Ov) */ 434/* Decode imm v64(Iv/Ov), Return 0 if failed */
427static void __get_immv(struct insn *insn) 435static int __get_immv(struct insn *insn)
428{ 436{
429 switch (insn->opnd_bytes) { 437 switch (insn->opnd_bytes) {
430 case 2: 438 case 2:
@@ -441,15 +449,18 @@ static void __get_immv(struct insn *insn)
441 insn->immediate2.value = get_next(int, insn); 449 insn->immediate2.value = get_next(int, insn);
442 insn->immediate2.nbytes = 4; 450 insn->immediate2.nbytes = 4;
443 break; 451 break;
452 default: /* opnd_bytes must be modified manually */
453 goto err_out;
444 } 454 }
445 insn->immediate1.got = insn->immediate2.got = 1; 455 insn->immediate1.got = insn->immediate2.got = 1;
446 456
457 return 1;
447err_out: 458err_out:
448 return; 459 return 0;
449} 460}
450 461
451/* Decode ptr16:16/32(Ap) */ 462/* Decode ptr16:16/32(Ap) */
452static void __get_immptr(struct insn *insn) 463static int __get_immptr(struct insn *insn)
453{ 464{
454 switch (insn->opnd_bytes) { 465 switch (insn->opnd_bytes) {
455 case 2: 466 case 2:
@@ -462,14 +473,17 @@ static void __get_immptr(struct insn *insn)
462 break; 473 break;
463 case 8: 474 case 8:
464 /* ptr16:64 is not exist (no segment) */ 475 /* ptr16:64 is not exist (no segment) */
465 return; 476 return 0;
477 default: /* opnd_bytes must be modified manually */
478 goto err_out;
466 } 479 }
467 insn->immediate2.value = get_next(unsigned short, insn); 480 insn->immediate2.value = get_next(unsigned short, insn);
468 insn->immediate2.nbytes = 2; 481 insn->immediate2.nbytes = 2;
469 insn->immediate1.got = insn->immediate2.got = 1; 482 insn->immediate1.got = insn->immediate2.got = 1;
470 483
484 return 1;
471err_out: 485err_out:
472 return; 486 return 0;
473} 487}
474 488
475/** 489/**
@@ -489,7 +503,8 @@ void insn_get_immediate(struct insn *insn)
489 insn_get_displacement(insn); 503 insn_get_displacement(insn);
490 504
491 if (inat_has_moffset(insn->attr)) { 505 if (inat_has_moffset(insn->attr)) {
492 __get_moffset(insn); 506 if (!__get_moffset(insn))
507 goto err_out;
493 goto done; 508 goto done;
494 } 509 }
495 510
@@ -517,16 +532,20 @@ void insn_get_immediate(struct insn *insn)
517 insn->immediate2.nbytes = 4; 532 insn->immediate2.nbytes = 4;
518 break; 533 break;
519 case INAT_IMM_PTR: 534 case INAT_IMM_PTR:
520 __get_immptr(insn); 535 if (!__get_immptr(insn))
536 goto err_out;
521 break; 537 break;
522 case INAT_IMM_VWORD32: 538 case INAT_IMM_VWORD32:
523 __get_immv32(insn); 539 if (!__get_immv32(insn))
540 goto err_out;
524 break; 541 break;
525 case INAT_IMM_VWORD: 542 case INAT_IMM_VWORD:
526 __get_immv(insn); 543 if (!__get_immv(insn))
544 goto err_out;
527 break; 545 break;
528 default: 546 default:
529 break; 547 /* Here, insn must have an immediate, but failed */
548 goto err_out;
530 } 549 }
531 if (inat_has_second_immediate(insn->attr)) { 550 if (inat_has_second_immediate(insn->attr)) {
532 insn->immediate2.value = get_next(char, insn); 551 insn->immediate2.value = get_next(char, insn);
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 36b0d15ae6e9..7f951c8f76c4 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -86,12 +86,10 @@ bad_put_user:
86 EXIT 86 EXIT
87END(bad_put_user) 87END(bad_put_user)
88 88
89.section __ex_table,"a" 89 _ASM_EXTABLE(1b,bad_put_user)
90 _ASM_PTR 1b,bad_put_user 90 _ASM_EXTABLE(2b,bad_put_user)
91 _ASM_PTR 2b,bad_put_user 91 _ASM_EXTABLE(3b,bad_put_user)
92 _ASM_PTR 3b,bad_put_user 92 _ASM_EXTABLE(4b,bad_put_user)
93 _ASM_PTR 4b,bad_put_user
94#ifdef CONFIG_X86_32 93#ifdef CONFIG_X86_32
95 _ASM_PTR 5b,bad_put_user 94 _ASM_EXTABLE(5b,bad_put_user)
96#endif 95#endif
97.previous
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index 97be9cb54483..2e4e4b02c37a 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -7,6 +7,8 @@
7#include <linux/highmem.h> 7#include <linux/highmem.h>
8#include <linux/module.h> 8#include <linux/module.h>
9 9
10#include <asm/word-at-a-time.h>
11
10/* 12/*
11 * best effort, GUP based copy_from_user() that is NMI-safe 13 * best effort, GUP based copy_from_user() that is NMI-safe
12 */ 14 */
@@ -41,3 +43,100 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
41 return len; 43 return len;
42} 44}
43EXPORT_SYMBOL_GPL(copy_from_user_nmi); 45EXPORT_SYMBOL_GPL(copy_from_user_nmi);
46
47/*
48 * Do a strncpy, return length of string without final '\0'.
49 * 'count' is the user-supplied count (return 'count' if we
50 * hit it), 'max' is the address space maximum (and we return
51 * -EFAULT if we hit it).
52 */
53static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max)
54{
55 long res = 0;
56
57 /*
58 * Truncate 'max' to the user-specified limit, so that
59 * we only have one limit we need to check in the loop
60 */
61 if (max > count)
62 max = count;
63
64 while (max >= sizeof(unsigned long)) {
65 unsigned long c, mask;
66
67 /* Fall back to byte-at-a-time if we get a page fault */
68 if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
69 break;
70 mask = has_zero(c);
71 if (mask) {
72 mask = (mask - 1) & ~mask;
73 mask >>= 7;
74 *(unsigned long *)(dst+res) = c & mask;
75 return res + count_masked_bytes(mask);
76 }
77 *(unsigned long *)(dst+res) = c;
78 res += sizeof(unsigned long);
79 max -= sizeof(unsigned long);
80 }
81
82 while (max) {
83 char c;
84
85 if (unlikely(__get_user(c,src+res)))
86 return -EFAULT;
87 dst[res] = c;
88 if (!c)
89 return res;
90 res++;
91 max--;
92 }
93
94 /*
95 * Uhhuh. We hit 'max'. But was that the user-specified maximum
96 * too? If so, that's ok - we got as much as the user asked for.
97 */
98 if (res >= count)
99 return res;
100
101 /*
102 * Nope: we hit the address space limit, and we still had more
103 * characters the caller would have wanted. That's an EFAULT.
104 */
105 return -EFAULT;
106}
107
108/**
109 * strncpy_from_user: - Copy a NUL terminated string from userspace.
110 * @dst: Destination address, in kernel space. This buffer must be at
111 * least @count bytes long.
112 * @src: Source address, in user space.
113 * @count: Maximum number of bytes to copy, including the trailing NUL.
114 *
115 * Copies a NUL-terminated string from userspace to kernel space.
116 *
117 * On success, returns the length of the string (not including the trailing
118 * NUL).
119 *
120 * If access to userspace fails, returns -EFAULT (some data may have been
121 * copied).
122 *
123 * If @count is smaller than the length of the string, copies @count bytes
124 * and returns @count.
125 */
126long
127strncpy_from_user(char *dst, const char __user *src, long count)
128{
129 unsigned long max_addr, src_addr;
130
131 if (unlikely(count <= 0))
132 return 0;
133
134 max_addr = current_thread_info()->addr_limit.seg;
135 src_addr = (unsigned long)src;
136 if (likely(src_addr < max_addr)) {
137 unsigned long max = max_addr - src_addr;
138 return do_strncpy_from_user(dst, src, count, max);
139 }
140 return -EFAULT;
141}
142EXPORT_SYMBOL(strncpy_from_user);
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index d9b094ca7aaa..883b216c60b2 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -13,6 +13,7 @@
13#include <linux/interrupt.h> 13#include <linux/interrupt.h>
14#include <asm/uaccess.h> 14#include <asm/uaccess.h>
15#include <asm/mmx.h> 15#include <asm/mmx.h>
16#include <asm/asm.h>
16 17
17#ifdef CONFIG_X86_INTEL_USERCOPY 18#ifdef CONFIG_X86_INTEL_USERCOPY
18/* 19/*
@@ -33,93 +34,6 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
33 __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n)) 34 __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n))
34 35
35/* 36/*
36 * Copy a null terminated string from userspace.
37 */
38
39#define __do_strncpy_from_user(dst, src, count, res) \
40do { \
41 int __d0, __d1, __d2; \
42 might_fault(); \
43 __asm__ __volatile__( \
44 " testl %1,%1\n" \
45 " jz 2f\n" \
46 "0: lodsb\n" \
47 " stosb\n" \
48 " testb %%al,%%al\n" \
49 " jz 1f\n" \
50 " decl %1\n" \
51 " jnz 0b\n" \
52 "1: subl %1,%0\n" \
53 "2:\n" \
54 ".section .fixup,\"ax\"\n" \
55 "3: movl %5,%0\n" \
56 " jmp 2b\n" \
57 ".previous\n" \
58 _ASM_EXTABLE(0b,3b) \
59 : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \
60 "=&D" (__d2) \
61 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
62 : "memory"); \
63} while (0)
64
65/**
66 * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
67 * @dst: Destination address, in kernel space. This buffer must be at
68 * least @count bytes long.
69 * @src: Source address, in user space.
70 * @count: Maximum number of bytes to copy, including the trailing NUL.
71 *
72 * Copies a NUL-terminated string from userspace to kernel space.
73 * Caller must check the specified block with access_ok() before calling
74 * this function.
75 *
76 * On success, returns the length of the string (not including the trailing
77 * NUL).
78 *
79 * If access to userspace fails, returns -EFAULT (some data may have been
80 * copied).
81 *
82 * If @count is smaller than the length of the string, copies @count bytes
83 * and returns @count.
84 */
85long
86__strncpy_from_user(char *dst, const char __user *src, long count)
87{
88 long res;
89 __do_strncpy_from_user(dst, src, count, res);
90 return res;
91}
92EXPORT_SYMBOL(__strncpy_from_user);
93
94/**
95 * strncpy_from_user: - Copy a NUL terminated string from userspace.
96 * @dst: Destination address, in kernel space. This buffer must be at
97 * least @count bytes long.
98 * @src: Source address, in user space.
99 * @count: Maximum number of bytes to copy, including the trailing NUL.
100 *
101 * Copies a NUL-terminated string from userspace to kernel space.
102 *
103 * On success, returns the length of the string (not including the trailing
104 * NUL).
105 *
106 * If access to userspace fails, returns -EFAULT (some data may have been
107 * copied).
108 *
109 * If @count is smaller than the length of the string, copies @count bytes
110 * and returns @count.
111 */
112long
113strncpy_from_user(char *dst, const char __user *src, long count)
114{
115 long res = -EFAULT;
116 if (access_ok(VERIFY_READ, src, 1))
117 __do_strncpy_from_user(dst, src, count, res);
118 return res;
119}
120EXPORT_SYMBOL(strncpy_from_user);
121
122/*
123 * Zero Userspace 37 * Zero Userspace
124 */ 38 */
125 39
@@ -214,10 +128,7 @@ long strnlen_user(const char __user *s, long n)
214 "3: movb $1,%%al\n" 128 "3: movb $1,%%al\n"
215 " jmp 1b\n" 129 " jmp 1b\n"
216 ".previous\n" 130 ".previous\n"
217 ".section __ex_table,\"a\"\n" 131 _ASM_EXTABLE(0b,2b)
218 " .align 4\n"
219 " .long 0b,2b\n"
220 ".previous"
221 :"=&r" (n), "=&D" (s), "=&a" (res), "=&c" (tmp) 132 :"=&r" (n), "=&D" (s), "=&a" (res), "=&c" (tmp)
222 :"0" (n), "1" (s), "2" (0), "3" (mask) 133 :"0" (n), "1" (s), "2" (0), "3" (mask)
223 :"cc"); 134 :"cc");
@@ -286,47 +197,44 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size)
286 "101: lea 0(%%eax,%0,4),%0\n" 197 "101: lea 0(%%eax,%0,4),%0\n"
287 " jmp 100b\n" 198 " jmp 100b\n"
288 ".previous\n" 199 ".previous\n"
289 ".section __ex_table,\"a\"\n" 200 _ASM_EXTABLE(1b,100b)
290 " .align 4\n" 201 _ASM_EXTABLE(2b,100b)
291 " .long 1b,100b\n" 202 _ASM_EXTABLE(3b,100b)
292 " .long 2b,100b\n" 203 _ASM_EXTABLE(4b,100b)
293 " .long 3b,100b\n" 204 _ASM_EXTABLE(5b,100b)
294 " .long 4b,100b\n" 205 _ASM_EXTABLE(6b,100b)
295 " .long 5b,100b\n" 206 _ASM_EXTABLE(7b,100b)
296 " .long 6b,100b\n" 207 _ASM_EXTABLE(8b,100b)
297 " .long 7b,100b\n" 208 _ASM_EXTABLE(9b,100b)
298 " .long 8b,100b\n" 209 _ASM_EXTABLE(10b,100b)
299 " .long 9b,100b\n" 210 _ASM_EXTABLE(11b,100b)
300 " .long 10b,100b\n" 211 _ASM_EXTABLE(12b,100b)
301 " .long 11b,100b\n" 212 _ASM_EXTABLE(13b,100b)
302 " .long 12b,100b\n" 213 _ASM_EXTABLE(14b,100b)
303 " .long 13b,100b\n" 214 _ASM_EXTABLE(15b,100b)
304 " .long 14b,100b\n" 215 _ASM_EXTABLE(16b,100b)
305 " .long 15b,100b\n" 216 _ASM_EXTABLE(17b,100b)
306 " .long 16b,100b\n" 217 _ASM_EXTABLE(18b,100b)
307 " .long 17b,100b\n" 218 _ASM_EXTABLE(19b,100b)
308 " .long 18b,100b\n" 219 _ASM_EXTABLE(20b,100b)
309 " .long 19b,100b\n" 220 _ASM_EXTABLE(21b,100b)
310 " .long 20b,100b\n" 221 _ASM_EXTABLE(22b,100b)
311 " .long 21b,100b\n" 222 _ASM_EXTABLE(23b,100b)
312 " .long 22b,100b\n" 223 _ASM_EXTABLE(24b,100b)
313 " .long 23b,100b\n" 224 _ASM_EXTABLE(25b,100b)
314 " .long 24b,100b\n" 225 _ASM_EXTABLE(26b,100b)
315 " .long 25b,100b\n" 226 _ASM_EXTABLE(27b,100b)
316 " .long 26b,100b\n" 227 _ASM_EXTABLE(28b,100b)
317 " .long 27b,100b\n" 228 _ASM_EXTABLE(29b,100b)
318 " .long 28b,100b\n" 229 _ASM_EXTABLE(30b,100b)
319 " .long 29b,100b\n" 230 _ASM_EXTABLE(31b,100b)
320 " .long 30b,100b\n" 231 _ASM_EXTABLE(32b,100b)
321 " .long 31b,100b\n" 232 _ASM_EXTABLE(33b,100b)
322 " .long 32b,100b\n" 233 _ASM_EXTABLE(34b,100b)
323 " .long 33b,100b\n" 234 _ASM_EXTABLE(35b,100b)
324 " .long 34b,100b\n" 235 _ASM_EXTABLE(36b,100b)
325 " .long 35b,100b\n" 236 _ASM_EXTABLE(37b,100b)
326 " .long 36b,100b\n" 237 _ASM_EXTABLE(99b,101b)
327 " .long 37b,100b\n"
328 " .long 99b,101b\n"
329 ".previous"
330 : "=&c"(size), "=&D" (d0), "=&S" (d1) 238 : "=&c"(size), "=&D" (d0), "=&S" (d1)
331 : "1"(to), "2"(from), "0"(size) 239 : "1"(to), "2"(from), "0"(size)
332 : "eax", "edx", "memory"); 240 : "eax", "edx", "memory");
@@ -399,29 +307,26 @@ __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size)
399 " popl %0\n" 307 " popl %0\n"
400 " jmp 8b\n" 308 " jmp 8b\n"
401 ".previous\n" 309 ".previous\n"
402 ".section __ex_table,\"a\"\n" 310 _ASM_EXTABLE(0b,16b)
403 " .align 4\n" 311 _ASM_EXTABLE(1b,16b)
404 " .long 0b,16b\n" 312 _ASM_EXTABLE(2b,16b)
405 " .long 1b,16b\n" 313 _ASM_EXTABLE(21b,16b)
406 " .long 2b,16b\n" 314 _ASM_EXTABLE(3b,16b)
407 " .long 21b,16b\n" 315 _ASM_EXTABLE(31b,16b)
408 " .long 3b,16b\n" 316 _ASM_EXTABLE(4b,16b)
409 " .long 31b,16b\n" 317 _ASM_EXTABLE(41b,16b)
410 " .long 4b,16b\n" 318 _ASM_EXTABLE(10b,16b)
411 " .long 41b,16b\n" 319 _ASM_EXTABLE(51b,16b)
412 " .long 10b,16b\n" 320 _ASM_EXTABLE(11b,16b)
413 " .long 51b,16b\n" 321 _ASM_EXTABLE(61b,16b)
414 " .long 11b,16b\n" 322 _ASM_EXTABLE(12b,16b)
415 " .long 61b,16b\n" 323 _ASM_EXTABLE(71b,16b)
416 " .long 12b,16b\n" 324 _ASM_EXTABLE(13b,16b)
417 " .long 71b,16b\n" 325 _ASM_EXTABLE(81b,16b)
418 " .long 13b,16b\n" 326 _ASM_EXTABLE(14b,16b)
419 " .long 81b,16b\n" 327 _ASM_EXTABLE(91b,16b)
420 " .long 14b,16b\n" 328 _ASM_EXTABLE(6b,9b)
421 " .long 91b,16b\n" 329 _ASM_EXTABLE(7b,16b)
422 " .long 6b,9b\n"
423 " .long 7b,16b\n"
424 ".previous"
425 : "=&c"(size), "=&D" (d0), "=&S" (d1) 330 : "=&c"(size), "=&D" (d0), "=&S" (d1)
426 : "1"(to), "2"(from), "0"(size) 331 : "1"(to), "2"(from), "0"(size)
427 : "eax", "edx", "memory"); 332 : "eax", "edx", "memory");
@@ -501,29 +406,26 @@ static unsigned long __copy_user_zeroing_intel_nocache(void *to,
501 " popl %0\n" 406 " popl %0\n"
502 " jmp 8b\n" 407 " jmp 8b\n"
503 ".previous\n" 408 ".previous\n"
504 ".section __ex_table,\"a\"\n" 409 _ASM_EXTABLE(0b,16b)
505 " .align 4\n" 410 _ASM_EXTABLE(1b,16b)
506 " .long 0b,16b\n" 411 _ASM_EXTABLE(2b,16b)
507 " .long 1b,16b\n" 412 _ASM_EXTABLE(21b,16b)
508 " .long 2b,16b\n" 413 _ASM_EXTABLE(3b,16b)
509 " .long 21b,16b\n" 414 _ASM_EXTABLE(31b,16b)
510 " .long 3b,16b\n" 415 _ASM_EXTABLE(4b,16b)
511 " .long 31b,16b\n" 416 _ASM_EXTABLE(41b,16b)
512 " .long 4b,16b\n" 417 _ASM_EXTABLE(10b,16b)
513 " .long 41b,16b\n" 418 _ASM_EXTABLE(51b,16b)
514 " .long 10b,16b\n" 419 _ASM_EXTABLE(11b,16b)
515 " .long 51b,16b\n" 420 _ASM_EXTABLE(61b,16b)
516 " .long 11b,16b\n" 421 _ASM_EXTABLE(12b,16b)
517 " .long 61b,16b\n" 422 _ASM_EXTABLE(71b,16b)
518 " .long 12b,16b\n" 423 _ASM_EXTABLE(13b,16b)
519 " .long 71b,16b\n" 424 _ASM_EXTABLE(81b,16b)
520 " .long 13b,16b\n" 425 _ASM_EXTABLE(14b,16b)
521 " .long 81b,16b\n" 426 _ASM_EXTABLE(91b,16b)
522 " .long 14b,16b\n" 427 _ASM_EXTABLE(6b,9b)
523 " .long 91b,16b\n" 428 _ASM_EXTABLE(7b,16b)
524 " .long 6b,9b\n"
525 " .long 7b,16b\n"
526 ".previous"
527 : "=&c"(size), "=&D" (d0), "=&S" (d1) 429 : "=&c"(size), "=&D" (d0), "=&S" (d1)
528 : "1"(to), "2"(from), "0"(size) 430 : "1"(to), "2"(from), "0"(size)
529 : "eax", "edx", "memory"); 431 : "eax", "edx", "memory");
@@ -592,29 +494,26 @@ static unsigned long __copy_user_intel_nocache(void *to,
592 "9: lea 0(%%eax,%0,4),%0\n" 494 "9: lea 0(%%eax,%0,4),%0\n"
593 "16: jmp 8b\n" 495 "16: jmp 8b\n"
594 ".previous\n" 496 ".previous\n"
595 ".section __ex_table,\"a\"\n" 497 _ASM_EXTABLE(0b,16b)
596 " .align 4\n" 498 _ASM_EXTABLE(1b,16b)
597 " .long 0b,16b\n" 499 _ASM_EXTABLE(2b,16b)
598 " .long 1b,16b\n" 500 _ASM_EXTABLE(21b,16b)
599 " .long 2b,16b\n" 501 _ASM_EXTABLE(3b,16b)
600 " .long 21b,16b\n" 502 _ASM_EXTABLE(31b,16b)
601 " .long 3b,16b\n" 503 _ASM_EXTABLE(4b,16b)
602 " .long 31b,16b\n" 504 _ASM_EXTABLE(41b,16b)
603 " .long 4b,16b\n" 505 _ASM_EXTABLE(10b,16b)
604 " .long 41b,16b\n" 506 _ASM_EXTABLE(51b,16b)
605 " .long 10b,16b\n" 507 _ASM_EXTABLE(11b,16b)
606 " .long 51b,16b\n" 508 _ASM_EXTABLE(61b,16b)
607 " .long 11b,16b\n" 509 _ASM_EXTABLE(12b,16b)
608 " .long 61b,16b\n" 510 _ASM_EXTABLE(71b,16b)
609 " .long 12b,16b\n" 511 _ASM_EXTABLE(13b,16b)
610 " .long 71b,16b\n" 512 _ASM_EXTABLE(81b,16b)
611 " .long 13b,16b\n" 513 _ASM_EXTABLE(14b,16b)
612 " .long 81b,16b\n" 514 _ASM_EXTABLE(91b,16b)
613 " .long 14b,16b\n" 515 _ASM_EXTABLE(6b,9b)
614 " .long 91b,16b\n" 516 _ASM_EXTABLE(7b,16b)
615 " .long 6b,9b\n"
616 " .long 7b,16b\n"
617 ".previous"
618 : "=&c"(size), "=&D" (d0), "=&S" (d1) 517 : "=&c"(size), "=&D" (d0), "=&S" (d1)
619 : "1"(to), "2"(from), "0"(size) 518 : "1"(to), "2"(from), "0"(size)
620 : "eax", "edx", "memory"); 519 : "eax", "edx", "memory");
@@ -661,12 +560,9 @@ do { \
661 "3: lea 0(%3,%0,4),%0\n" \ 560 "3: lea 0(%3,%0,4),%0\n" \
662 " jmp 2b\n" \ 561 " jmp 2b\n" \
663 ".previous\n" \ 562 ".previous\n" \
664 ".section __ex_table,\"a\"\n" \ 563 _ASM_EXTABLE(4b,5b) \
665 " .align 4\n" \ 564 _ASM_EXTABLE(0b,3b) \
666 " .long 4b,5b\n" \ 565 _ASM_EXTABLE(1b,2b) \
667 " .long 0b,3b\n" \
668 " .long 1b,2b\n" \
669 ".previous" \
670 : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ 566 : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
671 : "3"(size), "0"(size), "1"(to), "2"(from) \ 567 : "3"(size), "0"(size), "1"(to), "2"(from) \
672 : "memory"); \ 568 : "memory"); \
@@ -703,12 +599,9 @@ do { \
703 " popl %0\n" \ 599 " popl %0\n" \
704 " jmp 2b\n" \ 600 " jmp 2b\n" \
705 ".previous\n" \ 601 ".previous\n" \
706 ".section __ex_table,\"a\"\n" \ 602 _ASM_EXTABLE(4b,5b) \
707 " .align 4\n" \ 603 _ASM_EXTABLE(0b,3b) \
708 " .long 4b,5b\n" \ 604 _ASM_EXTABLE(1b,6b) \
709 " .long 0b,3b\n" \
710 " .long 1b,6b\n" \
711 ".previous" \
712 : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ 605 : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
713 : "3"(size), "0"(size), "1"(to), "2"(from) \ 606 : "3"(size), "0"(size), "1"(to), "2"(from) \
714 : "memory"); \ 607 : "memory"); \
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index b7c2849ffb66..0d0326f388c0 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -9,55 +9,6 @@
9#include <asm/uaccess.h> 9#include <asm/uaccess.h>
10 10
11/* 11/*
12 * Copy a null terminated string from userspace.
13 */
14
15#define __do_strncpy_from_user(dst,src,count,res) \
16do { \
17 long __d0, __d1, __d2; \
18 might_fault(); \
19 __asm__ __volatile__( \
20 " testq %1,%1\n" \
21 " jz 2f\n" \
22 "0: lodsb\n" \
23 " stosb\n" \
24 " testb %%al,%%al\n" \
25 " jz 1f\n" \
26 " decq %1\n" \
27 " jnz 0b\n" \
28 "1: subq %1,%0\n" \
29 "2:\n" \
30 ".section .fixup,\"ax\"\n" \
31 "3: movq %5,%0\n" \
32 " jmp 2b\n" \
33 ".previous\n" \
34 _ASM_EXTABLE(0b,3b) \
35 : "=&r"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \
36 "=&D" (__d2) \
37 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
38 : "memory"); \
39} while (0)
40
41long
42__strncpy_from_user(char *dst, const char __user *src, long count)
43{
44 long res;
45 __do_strncpy_from_user(dst, src, count, res);
46 return res;
47}
48EXPORT_SYMBOL(__strncpy_from_user);
49
50long
51strncpy_from_user(char *dst, const char __user *src, long count)
52{
53 long res = -EFAULT;
54 if (access_ok(VERIFY_READ, src, 1))
55 return __strncpy_from_user(dst, src, count);
56 return res;
57}
58EXPORT_SYMBOL(strncpy_from_user);
59
60/*
61 * Zero Userspace 12 * Zero Userspace
62 */ 13 */
63 14
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 1fb85dbe390a..903ec1e9c326 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,11 +1,23 @@
1#include <linux/module.h> 1#include <linux/module.h>
2#include <linux/spinlock.h> 2#include <linux/spinlock.h>
3#include <linux/sort.h>
3#include <asm/uaccess.h> 4#include <asm/uaccess.h>
4 5
6static inline unsigned long
7ex_insn_addr(const struct exception_table_entry *x)
8{
9 return (unsigned long)&x->insn + x->insn;
10}
11static inline unsigned long
12ex_fixup_addr(const struct exception_table_entry *x)
13{
14 return (unsigned long)&x->fixup + x->fixup;
15}
5 16
6int fixup_exception(struct pt_regs *regs) 17int fixup_exception(struct pt_regs *regs)
7{ 18{
8 const struct exception_table_entry *fixup; 19 const struct exception_table_entry *fixup;
20 unsigned long new_ip;
9 21
10#ifdef CONFIG_PNPBIOS 22#ifdef CONFIG_PNPBIOS
11 if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { 23 if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
@@ -23,15 +35,135 @@ int fixup_exception(struct pt_regs *regs)
23 35
24 fixup = search_exception_tables(regs->ip); 36 fixup = search_exception_tables(regs->ip);
25 if (fixup) { 37 if (fixup) {
26 /* If fixup is less than 16, it means uaccess error */ 38 new_ip = ex_fixup_addr(fixup);
27 if (fixup->fixup < 16) { 39
40 if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
41 /* Special hack for uaccess_err */
28 current_thread_info()->uaccess_err = 1; 42 current_thread_info()->uaccess_err = 1;
29 regs->ip += fixup->fixup; 43 new_ip -= 0x7ffffff0;
30 return 1;
31 } 44 }
32 regs->ip = fixup->fixup; 45 regs->ip = new_ip;
33 return 1; 46 return 1;
34 } 47 }
35 48
36 return 0; 49 return 0;
37} 50}
51
52/* Restricted version used during very early boot */
53int __init early_fixup_exception(unsigned long *ip)
54{
55 const struct exception_table_entry *fixup;
56 unsigned long new_ip;
57
58 fixup = search_exception_tables(*ip);
59 if (fixup) {
60 new_ip = ex_fixup_addr(fixup);
61
62 if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
63 /* uaccess handling not supported during early boot */
64 return 0;
65 }
66
67 *ip = new_ip;
68 return 1;
69 }
70
71 return 0;
72}
73
74/*
75 * Search one exception table for an entry corresponding to the
76 * given instruction address, and return the address of the entry,
77 * or NULL if none is found.
78 * We use a binary search, and thus we assume that the table is
79 * already sorted.
80 */
81const struct exception_table_entry *
82search_extable(const struct exception_table_entry *first,
83 const struct exception_table_entry *last,
84 unsigned long value)
85{
86 while (first <= last) {
87 const struct exception_table_entry *mid;
88 unsigned long addr;
89
90 mid = ((last - first) >> 1) + first;
91 addr = ex_insn_addr(mid);
92 if (addr < value)
93 first = mid + 1;
94 else if (addr > value)
95 last = mid - 1;
96 else
97 return mid;
98 }
99 return NULL;
100}
101
102/*
103 * The exception table needs to be sorted so that the binary
104 * search that we use to find entries in it works properly.
105 * This is used both for the kernel exception table and for
106 * the exception tables of modules that get loaded.
107 *
108 */
109static int cmp_ex(const void *a, const void *b)
110{
111 const struct exception_table_entry *x = a, *y = b;
112
113 /*
114 * This value will always end up fittin in an int, because on
115 * both i386 and x86-64 the kernel symbol-reachable address
116 * space is < 2 GiB.
117 *
118 * This compare is only valid after normalization.
119 */
120 return x->insn - y->insn;
121}
122
123void sort_extable(struct exception_table_entry *start,
124 struct exception_table_entry *finish)
125{
126 struct exception_table_entry *p;
127 int i;
128
129 /* Convert all entries to being relative to the start of the section */
130 i = 0;
131 for (p = start; p < finish; p++) {
132 p->insn += i;
133 i += 4;
134 p->fixup += i;
135 i += 4;
136 }
137
138 sort(start, finish - start, sizeof(struct exception_table_entry),
139 cmp_ex, NULL);
140
141 /* Denormalize all entries */
142 i = 0;
143 for (p = start; p < finish; p++) {
144 p->insn -= i;
145 i += 4;
146 p->fixup -= i;
147 i += 4;
148 }
149}
150
151#ifdef CONFIG_MODULES
152/*
153 * If the exception table is sorted, any referring to the module init
154 * will be at the beginning or the end.
155 */
156void trim_init_extable(struct module *m)
157{
158 /*trim the beginning*/
159 while (m->num_exentries &&
160 within_module_init(ex_insn_addr(&m->extable[0]), m)) {
161 m->extable++;
162 m->num_exentries--;
163 }
164 /*trim the end*/
165 while (m->num_exentries &&
166 within_module_init(ex_insn_addr(&m->extable[m->num_exentries-1]), m))
167 m->num_exentries--;
168}
169#endif /* CONFIG_MODULES */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 4f0cec7e4ffb..319b6f2fb8b9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -29,8 +29,14 @@ int direct_gbpages
29#endif 29#endif
30; 30;
31 31
32static void __init find_early_table_space(unsigned long end, int use_pse, 32struct map_range {
33 int use_gbpages) 33 unsigned long start;
34 unsigned long end;
35 unsigned page_size_mask;
36};
37
38static void __init find_early_table_space(struct map_range *mr, unsigned long end,
39 int use_pse, int use_gbpages)
34{ 40{
35 unsigned long puds, pmds, ptes, tables, start = 0, good_end = end; 41 unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
36 phys_addr_t base; 42 phys_addr_t base;
@@ -55,6 +61,9 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
55#ifdef CONFIG_X86_32 61#ifdef CONFIG_X86_32
56 extra += PMD_SIZE; 62 extra += PMD_SIZE;
57#endif 63#endif
64 /* The first 2/4M doesn't use large pages. */
65 extra += mr->end - mr->start;
66
58 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; 67 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
59 } else 68 } else
60 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; 69 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -84,12 +93,6 @@ void __init native_pagetable_reserve(u64 start, u64 end)
84 memblock_reserve(start, end - start); 93 memblock_reserve(start, end - start);
85} 94}
86 95
87struct map_range {
88 unsigned long start;
89 unsigned long end;
90 unsigned page_size_mask;
91};
92
93#ifdef CONFIG_X86_32 96#ifdef CONFIG_X86_32
94#define NR_RANGE_MR 3 97#define NR_RANGE_MR 3
95#else /* CONFIG_X86_64 */ 98#else /* CONFIG_X86_64 */
@@ -261,7 +264,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
261 * nodes are discovered. 264 * nodes are discovered.
262 */ 265 */
263 if (!after_bootmem) 266 if (!after_bootmem)
264 find_early_table_space(end, use_pse, use_gbpages); 267 find_early_table_space(&mr[0], end, use_pse, use_gbpages);
265 268
266 for (i = 0; i < nr_range; i++) 269 for (i = 0; i < nr_range; i++)
267 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, 270 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index fc18be0f6f29..2b6b4a3c8beb 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -407,12 +407,12 @@ static unsigned long __meminit
407phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, 407phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
408 unsigned long page_size_mask, pgprot_t prot) 408 unsigned long page_size_mask, pgprot_t prot)
409{ 409{
410 unsigned long pages = 0; 410 unsigned long pages = 0, next;
411 unsigned long last_map_addr = end; 411 unsigned long last_map_addr = end;
412 412
413 int i = pmd_index(address); 413 int i = pmd_index(address);
414 414
415 for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { 415 for (; i < PTRS_PER_PMD; i++, address = next) {
416 unsigned long pte_phys; 416 unsigned long pte_phys;
417 pmd_t *pmd = pmd_page + pmd_index(address); 417 pmd_t *pmd = pmd_page + pmd_index(address);
418 pte_t *pte; 418 pte_t *pte;
@@ -426,6 +426,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
426 break; 426 break;
427 } 427 }
428 428
429 next = (address & PMD_MASK) + PMD_SIZE;
430
429 if (pmd_val(*pmd)) { 431 if (pmd_val(*pmd)) {
430 if (!pmd_large(*pmd)) { 432 if (!pmd_large(*pmd)) {
431 spin_lock(&init_mm.page_table_lock); 433 spin_lock(&init_mm.page_table_lock);
@@ -449,7 +451,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
449 * attributes. 451 * attributes.
450 */ 452 */
451 if (page_size_mask & (1 << PG_LEVEL_2M)) { 453 if (page_size_mask & (1 << PG_LEVEL_2M)) {
452 pages++; 454 last_map_addr = next;
453 continue; 455 continue;
454 } 456 }
455 new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); 457 new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
@@ -462,7 +464,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
462 pfn_pte(address >> PAGE_SHIFT, 464 pfn_pte(address >> PAGE_SHIFT,
463 __pgprot(pgprot_val(prot) | _PAGE_PSE))); 465 __pgprot(pgprot_val(prot) | _PAGE_PSE)));
464 spin_unlock(&init_mm.page_table_lock); 466 spin_unlock(&init_mm.page_table_lock);
465 last_map_addr = (address & PMD_MASK) + PMD_SIZE; 467 last_map_addr = next;
466 continue; 468 continue;
467 } 469 }
468 470
@@ -482,11 +484,11 @@ static unsigned long __meminit
482phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, 484phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
483 unsigned long page_size_mask) 485 unsigned long page_size_mask)
484{ 486{
485 unsigned long pages = 0; 487 unsigned long pages = 0, next;
486 unsigned long last_map_addr = end; 488 unsigned long last_map_addr = end;
487 int i = pud_index(addr); 489 int i = pud_index(addr);
488 490
489 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) { 491 for (; i < PTRS_PER_PUD; i++, addr = next) {
490 unsigned long pmd_phys; 492 unsigned long pmd_phys;
491 pud_t *pud = pud_page + pud_index(addr); 493 pud_t *pud = pud_page + pud_index(addr);
492 pmd_t *pmd; 494 pmd_t *pmd;
@@ -495,8 +497,9 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
495 if (addr >= end) 497 if (addr >= end)
496 break; 498 break;
497 499
498 if (!after_bootmem && 500 next = (addr & PUD_MASK) + PUD_SIZE;
499 !e820_any_mapped(addr, addr+PUD_SIZE, 0)) { 501
502 if (!after_bootmem && !e820_any_mapped(addr, next, 0)) {
500 set_pud(pud, __pud(0)); 503 set_pud(pud, __pud(0));
501 continue; 504 continue;
502 } 505 }
@@ -523,7 +526,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
523 * attributes. 526 * attributes.
524 */ 527 */
525 if (page_size_mask & (1 << PG_LEVEL_1G)) { 528 if (page_size_mask & (1 << PG_LEVEL_1G)) {
526 pages++; 529 last_map_addr = next;
527 continue; 530 continue;
528 } 531 }
529 prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); 532 prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
@@ -535,7 +538,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
535 set_pte((pte_t *)pud, 538 set_pte((pte_t *)pud,
536 pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); 539 pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
537 spin_unlock(&init_mm.page_table_lock); 540 spin_unlock(&init_mm.page_table_lock);
538 last_map_addr = (addr & PUD_MASK) + PUD_SIZE; 541 last_map_addr = next;
539 continue; 542 continue;
540 } 543 }
541 544
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 53489ff6bf82..871dd8868170 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -339,9 +339,11 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
339 } else { 339 } else {
340 unsigned long n; 340 unsigned long n;
341 341
342 n = simple_strtoul(emu_cmdline, NULL, 0); 342 n = simple_strtoul(emu_cmdline, &emu_cmdline, 0);
343 ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n); 343 ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n);
344 } 344 }
345 if (*emu_cmdline == ':')
346 emu_cmdline++;
345 347
346 if (ret < 0) 348 if (ret < 0)
347 goto no_emu; 349 goto no_emu;
@@ -418,7 +420,9 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
418 int physj = emu_nid_to_phys[j]; 420 int physj = emu_nid_to_phys[j];
419 int dist; 421 int dist;
420 422
421 if (physi >= numa_dist_cnt || physj >= numa_dist_cnt) 423 if (get_option(&emu_cmdline, &dist) == 2)
424 ;
425 else if (physi >= numa_dist_cnt || physj >= numa_dist_cnt)
422 dist = physi == physj ? 426 dist = physi == physj ?
423 LOCAL_DISTANCE : REMOTE_DISTANCE; 427 LOCAL_DISTANCE : REMOTE_DISTANCE;
424 else 428 else
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index d6c0418c3e47..5e57e113b72c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -61,11 +61,13 @@ static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
61 */ 61 */
62void leave_mm(int cpu) 62void leave_mm(int cpu)
63{ 63{
64 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) 64 struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm);
65 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
65 BUG(); 66 BUG();
66 cpumask_clear_cpu(cpu, 67 if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
67 mm_cpumask(percpu_read(cpu_tlbstate.active_mm))); 68 cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
68 load_cr3(swapper_pg_dir); 69 load_cr3(swapper_pg_dir);
70 }
69} 71}
70EXPORT_SYMBOL_GPL(leave_mm); 72EXPORT_SYMBOL_GPL(leave_mm);
71 73
@@ -152,8 +154,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
152 * BUG(); 154 * BUG();
153 */ 155 */
154 156
155 if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) { 157 if (f->flush_mm == this_cpu_read(cpu_tlbstate.active_mm)) {
156 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { 158 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
157 if (f->flush_va == TLB_FLUSH_ALL) 159 if (f->flush_va == TLB_FLUSH_ALL)
158 local_flush_tlb(); 160 local_flush_tlb();
159 else 161 else
@@ -322,7 +324,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
322static void do_flush_tlb_all(void *info) 324static void do_flush_tlb_all(void *info)
323{ 325{
324 __flush_tlb_all(); 326 __flush_tlb_all();
325 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) 327 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
326 leave_mm(smp_processor_id()); 328 leave_mm(smp_processor_id());
327} 329}
328 330
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index ed2835e148b5..fc09c2754e08 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -9,11 +9,11 @@
9 9
10struct pci_root_info { 10struct pci_root_info {
11 struct acpi_device *bridge; 11 struct acpi_device *bridge;
12 char *name; 12 char name[16];
13 unsigned int res_num; 13 unsigned int res_num;
14 struct resource *res; 14 struct resource *res;
15 struct list_head *resources;
16 int busnum; 15 int busnum;
16 struct pci_sysdata sd;
17}; 17};
18 18
19static bool pci_use_crs = true; 19static bool pci_use_crs = true;
@@ -245,13 +245,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
245 return AE_OK; 245 return AE_OK;
246} 246}
247 247
248static bool resource_contains(struct resource *res, resource_size_t point)
249{
250 if (res->start <= point && point <= res->end)
251 return true;
252 return false;
253}
254
255static void coalesce_windows(struct pci_root_info *info, unsigned long type) 248static void coalesce_windows(struct pci_root_info *info, unsigned long type)
256{ 249{
257 int i, j; 250 int i, j;
@@ -272,10 +265,7 @@ static void coalesce_windows(struct pci_root_info *info, unsigned long type)
272 * our resources no longer match the ACPI _CRS, but 265 * our resources no longer match the ACPI _CRS, but
273 * the kernel resource tree doesn't allow overlaps. 266 * the kernel resource tree doesn't allow overlaps.
274 */ 267 */
275 if (resource_contains(res1, res2->start) || 268 if (resource_overlaps(res1, res2)) {
276 resource_contains(res1, res2->end) ||
277 resource_contains(res2, res1->start) ||
278 resource_contains(res2, res1->end)) {
279 res1->start = min(res1->start, res2->start); 269 res1->start = min(res1->start, res2->start);
280 res1->end = max(res1->end, res2->end); 270 res1->end = max(res1->end, res2->end);
281 dev_info(&info->bridge->dev, 271 dev_info(&info->bridge->dev,
@@ -287,7 +277,8 @@ static void coalesce_windows(struct pci_root_info *info, unsigned long type)
287 } 277 }
288} 278}
289 279
290static void add_resources(struct pci_root_info *info) 280static void add_resources(struct pci_root_info *info,
281 struct list_head *resources)
291{ 282{
292 int i; 283 int i;
293 struct resource *res, *root, *conflict; 284 struct resource *res, *root, *conflict;
@@ -311,53 +302,74 @@ static void add_resources(struct pci_root_info *info)
311 "ignoring host bridge window %pR (conflicts with %s %pR)\n", 302 "ignoring host bridge window %pR (conflicts with %s %pR)\n",
312 res, conflict->name, conflict); 303 res, conflict->name, conflict);
313 else 304 else
314 pci_add_resource(info->resources, res); 305 pci_add_resource(resources, res);
315 } 306 }
316} 307}
317 308
309static void free_pci_root_info_res(struct pci_root_info *info)
310{
311 kfree(info->res);
312 info->res = NULL;
313 info->res_num = 0;
314}
315
316static void __release_pci_root_info(struct pci_root_info *info)
317{
318 int i;
319 struct resource *res;
320
321 for (i = 0; i < info->res_num; i++) {
322 res = &info->res[i];
323
324 if (!res->parent)
325 continue;
326
327 if (!(res->flags & (IORESOURCE_MEM | IORESOURCE_IO)))
328 continue;
329
330 release_resource(res);
331 }
332
333 free_pci_root_info_res(info);
334
335 kfree(info);
336}
337static void release_pci_root_info(struct pci_host_bridge *bridge)
338{
339 struct pci_root_info *info = bridge->release_data;
340
341 __release_pci_root_info(info);
342}
343
318static void 344static void
319get_current_resources(struct acpi_device *device, int busnum, 345probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device,
320 int domain, struct list_head *resources) 346 int busnum, int domain)
321{ 347{
322 struct pci_root_info info;
323 size_t size; 348 size_t size;
324 349
325 info.bridge = device; 350 info->bridge = device;
326 info.res_num = 0; 351 info->res_num = 0;
327 info.resources = resources;
328 acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, 352 acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource,
329 &info); 353 info);
330 if (!info.res_num) 354 if (!info->res_num)
331 return; 355 return;
332 356
333 size = sizeof(*info.res) * info.res_num; 357 size = sizeof(*info->res) * info->res_num;
334 info.res = kmalloc(size, GFP_KERNEL); 358 info->res_num = 0;
335 if (!info.res) 359 info->res = kmalloc(size, GFP_KERNEL);
360 if (!info->res)
336 return; 361 return;
337 362
338 info.name = kasprintf(GFP_KERNEL, "PCI Bus %04x:%02x", domain, busnum); 363 sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum);
339 if (!info.name)
340 goto name_alloc_fail;
341 364
342 info.res_num = 0;
343 acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, 365 acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource,
344 &info); 366 info);
345
346 if (pci_use_crs) {
347 add_resources(&info);
348
349 return;
350 }
351
352 kfree(info.name);
353
354name_alloc_fail:
355 kfree(info.res);
356} 367}
357 368
358struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) 369struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
359{ 370{
360 struct acpi_device *device = root->device; 371 struct acpi_device *device = root->device;
372 struct pci_root_info *info = NULL;
361 int domain = root->segment; 373 int domain = root->segment;
362 int busnum = root->secondary.start; 374 int busnum = root->secondary.start;
363 LIST_HEAD(resources); 375 LIST_HEAD(resources);
@@ -389,17 +401,14 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
389 if (node != -1 && !node_online(node)) 401 if (node != -1 && !node_online(node))
390 node = -1; 402 node = -1;
391 403
392 /* Allocate per-root-bus (not per bus) arch-specific data. 404 info = kzalloc(sizeof(*info), GFP_KERNEL);
393 * TODO: leak; this memory is never freed. 405 if (!info) {
394 * It's arguable whether it's worth the trouble to care.
395 */
396 sd = kzalloc(sizeof(*sd), GFP_KERNEL);
397 if (!sd) {
398 printk(KERN_WARNING "pci_bus %04x:%02x: " 406 printk(KERN_WARNING "pci_bus %04x:%02x: "
399 "ignored (out of memory)\n", domain, busnum); 407 "ignored (out of memory)\n", domain, busnum);
400 return NULL; 408 return NULL;
401 } 409 }
402 410
411 sd = &info->sd;
403 sd->domain = domain; 412 sd->domain = domain;
404 sd->node = node; 413 sd->node = node;
405 /* 414 /*
@@ -413,22 +422,32 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
413 * be replaced by sd. 422 * be replaced by sd.
414 */ 423 */
415 memcpy(bus->sysdata, sd, sizeof(*sd)); 424 memcpy(bus->sysdata, sd, sizeof(*sd));
416 kfree(sd); 425 kfree(info);
417 } else { 426 } else {
418 get_current_resources(device, busnum, domain, &resources); 427 probe_pci_root_info(info, device, busnum, domain);
419 428
420 /* 429 /*
421 * _CRS with no apertures is normal, so only fall back to 430 * _CRS with no apertures is normal, so only fall back to
422 * defaults or native bridge info if we're ignoring _CRS. 431 * defaults or native bridge info if we're ignoring _CRS.
423 */ 432 */
424 if (!pci_use_crs) 433 if (pci_use_crs)
434 add_resources(info, &resources);
435 else {
436 free_pci_root_info_res(info);
425 x86_pci_root_bus_resources(busnum, &resources); 437 x86_pci_root_bus_resources(busnum, &resources);
438 }
439
426 bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd, 440 bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd,
427 &resources); 441 &resources);
428 if (bus) 442 if (bus) {
429 bus->subordinate = pci_scan_child_bus(bus); 443 bus->subordinate = pci_scan_child_bus(bus);
430 else 444 pci_set_host_bridge_release(
445 to_pci_host_bridge(bus->bridge),
446 release_pci_root_info, info);
447 } else {
431 pci_free_resource_list(&resources); 448 pci_free_resource_list(&resources);
449 __release_pci_root_info(info);
450 }
432 } 451 }
433 452
434 /* After the PCI-E bus has been walked and all devices discovered, 453 /* After the PCI-E bus has been walked and all devices discovered,
@@ -445,9 +464,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
445 } 464 }
446 } 465 }
447 466
448 if (!bus)
449 kfree(sd);
450
451 if (bus && node != -1) { 467 if (bus && node != -1) {
452#ifdef CONFIG_ACPI_NUMA 468#ifdef CONFIG_ACPI_NUMA
453 if (pxm >= 0) 469 if (pxm >= 0)
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 0567df3890e1..5aed49bff058 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -32,6 +32,27 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = {
32 32
33#define RANGE_NUM 16 33#define RANGE_NUM 16
34 34
35static struct pci_root_info __init *find_pci_root_info(int node, int link)
36{
37 struct pci_root_info *info;
38
39 /* find the position */
40 list_for_each_entry(info, &pci_root_infos, list)
41 if (info->node == node && info->link == link)
42 return info;
43
44 return NULL;
45}
46
47static void __init set_mp_bus_range_to_node(int min_bus, int max_bus, int node)
48{
49#ifdef CONFIG_NUMA
50 int j;
51
52 for (j = min_bus; j <= max_bus; j++)
53 set_mp_bus_to_node(j, node);
54#endif
55}
35/** 56/**
36 * early_fill_mp_bus_to_node() 57 * early_fill_mp_bus_to_node()
37 * called before pcibios_scan_root and pci_scan_bus 58 * called before pcibios_scan_root and pci_scan_bus
@@ -41,7 +62,6 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = {
41static int __init early_fill_mp_bus_info(void) 62static int __init early_fill_mp_bus_info(void)
42{ 63{
43 int i; 64 int i;
44 int j;
45 unsigned bus; 65 unsigned bus;
46 unsigned slot; 66 unsigned slot;
47 int node; 67 int node;
@@ -50,7 +70,6 @@ static int __init early_fill_mp_bus_info(void)
50 int def_link; 70 int def_link;
51 struct pci_root_info *info; 71 struct pci_root_info *info;
52 u32 reg; 72 u32 reg;
53 struct resource *res;
54 u64 start; 73 u64 start;
55 u64 end; 74 u64 end;
56 struct range range[RANGE_NUM]; 75 struct range range[RANGE_NUM];
@@ -86,7 +105,6 @@ static int __init early_fill_mp_bus_info(void)
86 if (!found) 105 if (!found)
87 return 0; 106 return 0;
88 107
89 pci_root_num = 0;
90 for (i = 0; i < 4; i++) { 108 for (i = 0; i < 4; i++) {
91 int min_bus; 109 int min_bus;
92 int max_bus; 110 int max_bus;
@@ -99,19 +117,11 @@ static int __init early_fill_mp_bus_info(void)
99 min_bus = (reg >> 16) & 0xff; 117 min_bus = (reg >> 16) & 0xff;
100 max_bus = (reg >> 24) & 0xff; 118 max_bus = (reg >> 24) & 0xff;
101 node = (reg >> 4) & 0x07; 119 node = (reg >> 4) & 0x07;
102#ifdef CONFIG_NUMA 120 set_mp_bus_range_to_node(min_bus, max_bus, node);
103 for (j = min_bus; j <= max_bus; j++)
104 set_mp_bus_to_node(j, node);
105#endif
106 link = (reg >> 8) & 0x03; 121 link = (reg >> 8) & 0x03;
107 122
108 info = &pci_root_info[pci_root_num]; 123 info = alloc_pci_root_info(min_bus, max_bus, node, link);
109 info->bus_min = min_bus;
110 info->bus_max = max_bus;
111 info->node = node;
112 info->link = link;
113 sprintf(info->name, "PCI Bus #%02x", min_bus); 124 sprintf(info->name, "PCI Bus #%02x", min_bus);
114 pci_root_num++;
115 } 125 }
116 126
117 /* get the default node and link for left over res */ 127 /* get the default node and link for left over res */
@@ -134,16 +144,10 @@ static int __init early_fill_mp_bus_info(void)
134 link = (reg >> 4) & 0x03; 144 link = (reg >> 4) & 0x03;
135 end = (reg & 0xfff000) | 0xfff; 145 end = (reg & 0xfff000) | 0xfff;
136 146
137 /* find the position */ 147 info = find_pci_root_info(node, link);
138 for (j = 0; j < pci_root_num; j++) { 148 if (!info)
139 info = &pci_root_info[j];
140 if (info->node == node && info->link == link)
141 break;
142 }
143 if (j == pci_root_num)
144 continue; /* not found */ 149 continue; /* not found */
145 150
146 info = &pci_root_info[j];
147 printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", 151 printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n",
148 node, link, start, end); 152 node, link, start, end);
149 153
@@ -155,13 +159,8 @@ static int __init early_fill_mp_bus_info(void)
155 } 159 }
156 /* add left over io port range to def node/link, [0, 0xffff] */ 160 /* add left over io port range to def node/link, [0, 0xffff] */
157 /* find the position */ 161 /* find the position */
158 for (j = 0; j < pci_root_num; j++) { 162 info = find_pci_root_info(def_node, def_link);
159 info = &pci_root_info[j]; 163 if (info) {
160 if (info->node == def_node && info->link == def_link)
161 break;
162 }
163 if (j < pci_root_num) {
164 info = &pci_root_info[j];
165 for (i = 0; i < RANGE_NUM; i++) { 164 for (i = 0; i < RANGE_NUM; i++) {
166 if (!range[i].end) 165 if (!range[i].end)
167 continue; 166 continue;
@@ -214,16 +213,10 @@ static int __init early_fill_mp_bus_info(void)
214 end <<= 8; 213 end <<= 8;
215 end |= 0xffff; 214 end |= 0xffff;
216 215
217 /* find the position */ 216 info = find_pci_root_info(node, link);
218 for (j = 0; j < pci_root_num; j++) {
219 info = &pci_root_info[j];
220 if (info->node == node && info->link == link)
221 break;
222 }
223 if (j == pci_root_num)
224 continue; /* not found */
225 217
226 info = &pci_root_info[j]; 218 if (!info)
219 continue;
227 220
228 printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", 221 printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]",
229 node, link, start, end); 222 node, link, start, end);
@@ -291,14 +284,8 @@ static int __init early_fill_mp_bus_info(void)
291 * add left over mmio range to def node/link ? 284 * add left over mmio range to def node/link ?
292 * that is tricky, just record range in from start_min to 4G 285 * that is tricky, just record range in from start_min to 4G
293 */ 286 */
294 for (j = 0; j < pci_root_num; j++) { 287 info = find_pci_root_info(def_node, def_link);
295 info = &pci_root_info[j]; 288 if (info) {
296 if (info->node == def_node && info->link == def_link)
297 break;
298 }
299 if (j < pci_root_num) {
300 info = &pci_root_info[j];
301
302 for (i = 0; i < RANGE_NUM; i++) { 289 for (i = 0; i < RANGE_NUM; i++) {
303 if (!range[i].end) 290 if (!range[i].end)
304 continue; 291 continue;
@@ -309,20 +296,16 @@ static int __init early_fill_mp_bus_info(void)
309 } 296 }
310 } 297 }
311 298
312 for (i = 0; i < pci_root_num; i++) { 299 list_for_each_entry(info, &pci_root_infos, list) {
313 int res_num;
314 int busnum; 300 int busnum;
301 struct pci_root_res *root_res;
315 302
316 info = &pci_root_info[i];
317 res_num = info->res_num;
318 busnum = info->bus_min; 303 busnum = info->bus_min;
319 printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n", 304 printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n",
320 info->bus_min, info->bus_max, info->node, info->link); 305 info->bus_min, info->bus_max, info->node, info->link);
321 for (j = 0; j < res_num; j++) { 306 list_for_each_entry(root_res, &info->resources, list)
322 res = &info->res[j]; 307 printk(KERN_DEBUG "bus: %02x %pR\n",
323 printk(KERN_DEBUG "bus: %02x index %x %pR\n", 308 busnum, &root_res->res);
324 busnum, j, res);
325 }
326 } 309 }
327 310
328 return 0; 311 return 0;
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c
index f3a7c569a403..614392ced7d6 100644
--- a/arch/x86/pci/broadcom_bus.c
+++ b/arch/x86/pci/broadcom_bus.c
@@ -22,19 +22,15 @@
22static void __init cnb20le_res(u8 bus, u8 slot, u8 func) 22static void __init cnb20le_res(u8 bus, u8 slot, u8 func)
23{ 23{
24 struct pci_root_info *info; 24 struct pci_root_info *info;
25 struct pci_root_res *root_res;
25 struct resource res; 26 struct resource res;
26 u16 word1, word2; 27 u16 word1, word2;
27 u8 fbus, lbus; 28 u8 fbus, lbus;
28 int i;
29
30 info = &pci_root_info[pci_root_num];
31 pci_root_num++;
32 29
33 /* read the PCI bus numbers */ 30 /* read the PCI bus numbers */
34 fbus = read_pci_config_byte(bus, slot, func, 0x44); 31 fbus = read_pci_config_byte(bus, slot, func, 0x44);
35 lbus = read_pci_config_byte(bus, slot, func, 0x45); 32 lbus = read_pci_config_byte(bus, slot, func, 0x45);
36 info->bus_min = fbus; 33 info = alloc_pci_root_info(fbus, lbus, 0, 0);
37 info->bus_max = lbus;
38 34
39 /* 35 /*
40 * Add the legacy IDE ports on bus 0 36 * Add the legacy IDE ports on bus 0
@@ -86,8 +82,8 @@ static void __init cnb20le_res(u8 bus, u8 slot, u8 func)
86 res.flags = IORESOURCE_BUS; 82 res.flags = IORESOURCE_BUS;
87 printk(KERN_INFO "CNB20LE PCI Host Bridge (domain 0000 %pR)\n", &res); 83 printk(KERN_INFO "CNB20LE PCI Host Bridge (domain 0000 %pR)\n", &res);
88 84
89 for (i = 0; i < info->res_num; i++) 85 list_for_each_entry(root_res, &info->resources, list)
90 printk(KERN_INFO "host bridge window %pR\n", &info->res[i]); 86 printk(KERN_INFO "host bridge window %pR\n", &root_res->res);
91} 87}
92 88
93static int __init broadcom_postcore_init(void) 89static int __init broadcom_postcore_init(void)
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index fd3f65510e9d..306579f7d0fd 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -4,35 +4,38 @@
4 4
5#include "bus_numa.h" 5#include "bus_numa.h"
6 6
7int pci_root_num; 7LIST_HEAD(pci_root_infos);
8struct pci_root_info pci_root_info[PCI_ROOT_NR];
9 8
10void x86_pci_root_bus_resources(int bus, struct list_head *resources) 9static struct pci_root_info *x86_find_pci_root_info(int bus)
11{ 10{
12 int i;
13 int j;
14 struct pci_root_info *info; 11 struct pci_root_info *info;
15 12
16 if (!pci_root_num) 13 if (list_empty(&pci_root_infos))
17 goto default_resources; 14 return NULL;
18 15
19 for (i = 0; i < pci_root_num; i++) { 16 list_for_each_entry(info, &pci_root_infos, list)
20 if (pci_root_info[i].bus_min == bus) 17 if (info->bus_min == bus)
21 break; 18 return info;
22 } 19
20 return NULL;
21}
23 22
24 if (i == pci_root_num) 23void x86_pci_root_bus_resources(int bus, struct list_head *resources)
24{
25 struct pci_root_info *info = x86_find_pci_root_info(bus);
26 struct pci_root_res *root_res;
27
28 if (!info)
25 goto default_resources; 29 goto default_resources;
26 30
27 printk(KERN_DEBUG "PCI: root bus %02x: hardware-probed resources\n", 31 printk(KERN_DEBUG "PCI: root bus %02x: hardware-probed resources\n",
28 bus); 32 bus);
29 33
30 info = &pci_root_info[i]; 34 list_for_each_entry(root_res, &info->resources, list) {
31 for (j = 0; j < info->res_num; j++) {
32 struct resource *res; 35 struct resource *res;
33 struct resource *root; 36 struct resource *root;
34 37
35 res = &info->res[j]; 38 res = &root_res->res;
36 pci_add_resource(resources, res); 39 pci_add_resource(resources, res);
37 if (res->flags & IORESOURCE_IO) 40 if (res->flags & IORESOURCE_IO)
38 root = &ioport_resource; 41 root = &ioport_resource;
@@ -53,11 +56,32 @@ default_resources:
53 pci_add_resource(resources, &iomem_resource); 56 pci_add_resource(resources, &iomem_resource);
54} 57}
55 58
59struct pci_root_info __init *alloc_pci_root_info(int bus_min, int bus_max,
60 int node, int link)
61{
62 struct pci_root_info *info;
63
64 info = kzalloc(sizeof(*info), GFP_KERNEL);
65
66 if (!info)
67 return info;
68
69 INIT_LIST_HEAD(&info->resources);
70 info->bus_min = bus_min;
71 info->bus_max = bus_max;
72 info->node = node;
73 info->link = link;
74
75 list_add_tail(&info->list, &pci_root_infos);
76
77 return info;
78}
79
56void __devinit update_res(struct pci_root_info *info, resource_size_t start, 80void __devinit update_res(struct pci_root_info *info, resource_size_t start,
57 resource_size_t end, unsigned long flags, int merge) 81 resource_size_t end, unsigned long flags, int merge)
58{ 82{
59 int i;
60 struct resource *res; 83 struct resource *res;
84 struct pci_root_res *root_res;
61 85
62 if (start > end) 86 if (start > end)
63 return; 87 return;
@@ -69,11 +93,11 @@ void __devinit update_res(struct pci_root_info *info, resource_size_t start,
69 goto addit; 93 goto addit;
70 94
71 /* try to merge it with old one */ 95 /* try to merge it with old one */
72 for (i = 0; i < info->res_num; i++) { 96 list_for_each_entry(root_res, &info->resources, list) {
73 resource_size_t final_start, final_end; 97 resource_size_t final_start, final_end;
74 resource_size_t common_start, common_end; 98 resource_size_t common_start, common_end;
75 99
76 res = &info->res[i]; 100 res = &root_res->res;
77 if (res->flags != flags) 101 if (res->flags != flags)
78 continue; 102 continue;
79 103
@@ -93,14 +117,15 @@ void __devinit update_res(struct pci_root_info *info, resource_size_t start,
93addit: 117addit:
94 118
95 /* need to add that */ 119 /* need to add that */
96 if (info->res_num >= RES_NUM) 120 root_res = kzalloc(sizeof(*root_res), GFP_KERNEL);
121 if (!root_res)
97 return; 122 return;
98 123
99 res = &info->res[info->res_num]; 124 res = &root_res->res;
100 res->name = info->name; 125 res->name = info->name;
101 res->flags = flags; 126 res->flags = flags;
102 res->start = start; 127 res->start = start;
103 res->end = end; 128 res->end = end;
104 res->child = NULL; 129
105 info->res_num++; 130 list_add_tail(&root_res->list, &info->resources);
106} 131}
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h
index 804a4b40c31a..226a466b2b2b 100644
--- a/arch/x86/pci/bus_numa.h
+++ b/arch/x86/pci/bus_numa.h
@@ -4,22 +4,24 @@
4 * sub bus (transparent) will use entres from 3 to store extra from 4 * sub bus (transparent) will use entres from 3 to store extra from
5 * root, so need to make sure we have enough slot there. 5 * root, so need to make sure we have enough slot there.
6 */ 6 */
7#define RES_NUM 16 7struct pci_root_res {
8 struct list_head list;
9 struct resource res;
10};
11
8struct pci_root_info { 12struct pci_root_info {
13 struct list_head list;
9 char name[12]; 14 char name[12];
10 unsigned int res_num; 15 struct list_head resources;
11 struct resource res[RES_NUM];
12 int bus_min; 16 int bus_min;
13 int bus_max; 17 int bus_max;
14 int node; 18 int node;
15 int link; 19 int link;
16}; 20};
17 21
18/* 4 at this time, it may become to 32 */ 22extern struct list_head pci_root_infos;
19#define PCI_ROOT_NR 4 23struct pci_root_info *alloc_pci_root_info(int bus_min, int bus_max,
20extern int pci_root_num; 24 int node, int link);
21extern struct pci_root_info pci_root_info[PCI_ROOT_NR];
22
23extern void update_res(struct pci_root_info *info, resource_size_t start, 25extern void update_res(struct pci_root_info *info, resource_size_t start,
24 resource_size_t end, unsigned long flags, int merge); 26 resource_size_t end, unsigned long flags, int merge);
25#endif 27#endif
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 323481e06ef8..0ad990a20d4a 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -11,6 +11,7 @@
11#include <linux/dmi.h> 11#include <linux/dmi.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13 13
14#include <asm-generic/pci-bridge.h>
14#include <asm/acpi.h> 15#include <asm/acpi.h>
15#include <asm/segment.h> 16#include <asm/segment.h>
16#include <asm/io.h> 17#include <asm/io.h>
@@ -229,6 +230,14 @@ static int __devinit assign_all_busses(const struct dmi_system_id *d)
229} 230}
230#endif 231#endif
231 232
233static int __devinit set_scan_all(const struct dmi_system_id *d)
234{
235 printk(KERN_INFO "PCI: %s detected, enabling pci=pcie_scan_all\n",
236 d->ident);
237 pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS);
238 return 0;
239}
240
232static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = { 241static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = {
233#ifdef __i386__ 242#ifdef __i386__
234/* 243/*
@@ -420,6 +429,13 @@ static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = {
420 DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"), 429 DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"),
421 }, 430 },
422 }, 431 },
432 {
433 .callback = set_scan_all,
434 .ident = "Stratus/NEC ftServer",
435 .matches = {
436 DMI_MATCH(DMI_SYS_VENDOR, "ftServer"),
437 },
438 },
423 {} 439 {}
424}; 440};
425 441
@@ -430,9 +446,7 @@ void __init dmi_check_pciprobe(void)
430 446
431struct pci_bus * __devinit pcibios_scan_root(int busnum) 447struct pci_bus * __devinit pcibios_scan_root(int busnum)
432{ 448{
433 LIST_HEAD(resources);
434 struct pci_bus *bus = NULL; 449 struct pci_bus *bus = NULL;
435 struct pci_sysdata *sd;
436 450
437 while ((bus = pci_find_next_bus(bus)) != NULL) { 451 while ((bus = pci_find_next_bus(bus)) != NULL) {
438 if (bus->number == busnum) { 452 if (bus->number == busnum) {
@@ -441,28 +455,10 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
441 } 455 }
442 } 456 }
443 457
444 /* Allocate per-root-bus (not per bus) arch-specific data. 458 return pci_scan_bus_on_node(busnum, &pci_root_ops,
445 * TODO: leak; this memory is never freed. 459 get_mp_bus_to_node(busnum));
446 * It's arguable whether it's worth the trouble to care.
447 */
448 sd = kzalloc(sizeof(*sd), GFP_KERNEL);
449 if (!sd) {
450 printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
451 return NULL;
452 }
453
454 sd->node = get_mp_bus_to_node(busnum);
455
456 printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum);
457 x86_pci_root_bus_resources(busnum, &resources);
458 bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, sd, &resources);
459 if (!bus) {
460 pci_free_resource_list(&resources);
461 kfree(sd);
462 }
463
464 return bus;
465} 460}
461
466void __init pcibios_set_cache_line_size(void) 462void __init pcibios_set_cache_line_size(void)
467{ 463{
468 struct cpuinfo_x86 *c = &boot_cpu_data; 464 struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -656,6 +652,7 @@ struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops,
656 } 652 }
657 sd->node = node; 653 sd->node = node;
658 x86_pci_root_bus_resources(busno, &resources); 654 x86_pci_root_bus_resources(busno, &resources);
655 printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busno);
659 bus = pci_scan_root_bus(NULL, busno, ops, sd, &resources); 656 bus = pci_scan_root_bus(NULL, busno, ops, sd, &resources);
660 if (!bus) { 657 if (!bus) {
661 pci_free_resource_list(&resources); 658 pci_free_resource_list(&resources);
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index d0e6e403b4f6..5dd467bd6121 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -519,3 +519,20 @@ static void sb600_disable_hpet_bar(struct pci_dev *dev)
519 } 519 }
520} 520}
521DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar); 521DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar);
522
523/*
524 * Twinhead H12Y needs us to block out a region otherwise we map devices
525 * there and any access kills the box.
526 *
527 * See: https://bugzilla.kernel.org/show_bug.cgi?id=10231
528 *
529 * Match off the LPC and svid/sdid (older kernels lose the bridge subvendor)
530 */
531static void __devinit twinhead_reserve_killing_zone(struct pci_dev *dev)
532{
533 if (dev->subsystem_vendor == 0x14FF && dev->subsystem_device == 0xA003) {
534 pr_info("Reserving memory on Twinhead H12Y\n");
535 request_mem_region(0xFFB00000, 0x100000, "twinhead");
536 }
537}
538DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone);
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 831971e731f7..dd8ca6f7223b 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -57,7 +57,7 @@ static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev)
57{ 57{
58 struct pcibios_fwaddrmap *map; 58 struct pcibios_fwaddrmap *map;
59 59
60 WARN_ON(!spin_is_locked(&pcibios_fwaddrmap_lock)); 60 WARN_ON_SMP(!spin_is_locked(&pcibios_fwaddrmap_lock));
61 61
62 list_for_each_entry(map, &pcibios_fwaddrmappings, list) 62 list_for_each_entry(map, &pcibios_fwaddrmappings, list)
63 if (map->dev == dev) 63 if (map->dev == dev)
diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c
index 66d377e334f7..646e3b5b4bb6 100644
--- a/arch/x86/platform/geode/net5501.c
+++ b/arch/x86/platform/geode/net5501.c
@@ -63,7 +63,7 @@ static struct gpio_led net5501_leds[] = {
63 .name = "net5501:1", 63 .name = "net5501:1",
64 .gpio = 6, 64 .gpio = 6,
65 .default_trigger = "default-on", 65 .default_trigger = "default-on",
66 .active_low = 1, 66 .active_low = 0,
67 }, 67 },
68}; 68};
69 69
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index e0a37233c0af..e31bcd8f2eee 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -805,7 +805,7 @@ void intel_scu_devices_create(void)
805 } else 805 } else
806 i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1); 806 i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1);
807 } 807 }
808 intel_scu_notifier_post(SCU_AVAILABLE, 0L); 808 intel_scu_notifier_post(SCU_AVAILABLE, NULL);
809} 809}
810EXPORT_SYMBOL_GPL(intel_scu_devices_create); 810EXPORT_SYMBOL_GPL(intel_scu_devices_create);
811 811
@@ -814,7 +814,7 @@ void intel_scu_devices_destroy(void)
814{ 814{
815 int i; 815 int i;
816 816
817 intel_scu_notifier_post(SCU_DOWN, 0L); 817 intel_scu_notifier_post(SCU_DOWN, NULL);
818 818
819 for (i = 0; i < ipc_next_dev; i++) 819 for (i = 0; i < ipc_next_dev; i++)
820 platform_device_del(ipc_devs[i]); 820 platform_device_del(ipc_devs[i]);
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c
index c7abf13a213f..94d8a39332ec 100644
--- a/arch/x86/platform/visws/visws_quirks.c
+++ b/arch/x86/platform/visws/visws_quirks.c
@@ -445,7 +445,7 @@ static void ack_cobalt_irq(struct irq_data *data)
445 445
446 spin_lock_irqsave(&cobalt_lock, flags); 446 spin_lock_irqsave(&cobalt_lock, flags);
447 disable_cobalt_irq(data); 447 disable_cobalt_irq(data);
448 apic_write(APIC_EOI, APIC_EIO_ACK); 448 apic_write(APIC_EOI, APIC_EOI_ACK);
449 spin_unlock_irqrestore(&cobalt_lock, flags); 449 spin_unlock_irqrestore(&cobalt_lock, flags);
450} 450}
451 451
diff --git a/arch/x86/tools/.gitignore b/arch/x86/tools/.gitignore
new file mode 100644
index 000000000000..be0ed065249b
--- /dev/null
+++ b/arch/x86/tools/.gitignore
@@ -0,0 +1 @@
relocs
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
index d511aa97533a..733057b435b0 100644
--- a/arch/x86/tools/Makefile
+++ b/arch/x86/tools/Makefile
@@ -36,3 +36,7 @@ HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x
36$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c 36$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
37 37
38$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c 38$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
39
40HOST_EXTRACFLAGS += -I$(srctree)/tools/include
41hostprogs-y += relocs
42relocs: $(obj)/relocs
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/tools/relocs.c
index d3c0b0277666..b43cfcd9bf40 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -18,6 +18,8 @@ static void die(char *fmt, ...);
18static Elf32_Ehdr ehdr; 18static Elf32_Ehdr ehdr;
19static unsigned long reloc_count, reloc_idx; 19static unsigned long reloc_count, reloc_idx;
20static unsigned long *relocs; 20static unsigned long *relocs;
21static unsigned long reloc16_count, reloc16_idx;
22static unsigned long *relocs16;
21 23
22struct section { 24struct section {
23 Elf32_Shdr shdr; 25 Elf32_Shdr shdr;
@@ -28,52 +30,86 @@ struct section {
28}; 30};
29static struct section *secs; 31static struct section *secs;
30 32
33enum symtype {
34 S_ABS,
35 S_REL,
36 S_SEG,
37 S_LIN,
38 S_NSYMTYPES
39};
40
41static const char * const sym_regex_kernel[S_NSYMTYPES] = {
31/* 42/*
32 * Following symbols have been audited. There values are constant and do 43 * Following symbols have been audited. There values are constant and do
33 * not change if bzImage is loaded at a different physical address than 44 * not change if bzImage is loaded at a different physical address than
34 * the address for which it has been compiled. Don't warn user about 45 * the address for which it has been compiled. Don't warn user about
35 * absolute relocations present w.r.t these symbols. 46 * absolute relocations present w.r.t these symbols.
36 */ 47 */
37static const char abs_sym_regex[] = 48 [S_ABS] =
38 "^(xen_irq_disable_direct_reloc$|" 49 "^(xen_irq_disable_direct_reloc$|"
39 "xen_save_fl_direct_reloc$|" 50 "xen_save_fl_direct_reloc$|"
40 "VDSO|" 51 "VDSO|"
41 "__crc_)"; 52 "__crc_)",
42static regex_t abs_sym_regex_c;
43static int is_abs_reloc(const char *sym_name)
44{
45 return !regexec(&abs_sym_regex_c, sym_name, 0, NULL, 0);
46}
47 53
48/* 54/*
49 * These symbols are known to be relative, even if the linker marks them 55 * These symbols are known to be relative, even if the linker marks them
50 * as absolute (typically defined outside any section in the linker script.) 56 * as absolute (typically defined outside any section in the linker script.)
51 */ 57 */
52static const char rel_sym_regex[] = 58 [S_REL] =
53 "^_end$"; 59 "^(__init_(begin|end)|"
54static regex_t rel_sym_regex_c; 60 "__x86_cpu_dev_(start|end)|"
55static int is_rel_reloc(const char *sym_name) 61 "(__parainstructions|__alt_instructions)(|_end)|"
62 "(__iommu_table|__apicdrivers|__smp_locks)(|_end)|"
63 "_end)$"
64};
65
66
67static const char * const sym_regex_realmode[S_NSYMTYPES] = {
68/*
69 * These are 16-bit segment symbols when compiling 16-bit code.
70 */
71 [S_SEG] =
72 "^real_mode_seg$",
73
74/*
75 * These are offsets belonging to segments, as opposed to linear addresses,
76 * when compiling 16-bit code.
77 */
78 [S_LIN] =
79 "^pa_",
80};
81
82static const char * const *sym_regex;
83
84static regex_t sym_regex_c[S_NSYMTYPES];
85static int is_reloc(enum symtype type, const char *sym_name)
56{ 86{
57 return !regexec(&rel_sym_regex_c, sym_name, 0, NULL, 0); 87 return sym_regex[type] &&
88 !regexec(&sym_regex_c[type], sym_name, 0, NULL, 0);
58} 89}
59 90
60static void regex_init(void) 91static void regex_init(int use_real_mode)
61{ 92{
62 char errbuf[128]; 93 char errbuf[128];
63 int err; 94 int err;
64 95 int i;
65 err = regcomp(&abs_sym_regex_c, abs_sym_regex, 96
66 REG_EXTENDED|REG_NOSUB); 97 if (use_real_mode)
67 if (err) { 98 sym_regex = sym_regex_realmode;
68 regerror(err, &abs_sym_regex_c, errbuf, sizeof errbuf); 99 else
69 die("%s", errbuf); 100 sym_regex = sym_regex_kernel;
70 }
71 101
72 err = regcomp(&rel_sym_regex_c, rel_sym_regex, 102 for (i = 0; i < S_NSYMTYPES; i++) {
73 REG_EXTENDED|REG_NOSUB); 103 if (!sym_regex[i])
74 if (err) { 104 continue;
75 regerror(err, &rel_sym_regex_c, errbuf, sizeof errbuf); 105
76 die("%s", errbuf); 106 err = regcomp(&sym_regex_c[i], sym_regex[i],
107 REG_EXTENDED|REG_NOSUB);
108
109 if (err) {
110 regerror(err, &sym_regex_c[i], errbuf, sizeof errbuf);
111 die("%s", errbuf);
112 }
77 } 113 }
78} 114}
79 115
@@ -154,6 +190,10 @@ static const char *rel_type(unsigned type)
154 REL_TYPE(R_386_RELATIVE), 190 REL_TYPE(R_386_RELATIVE),
155 REL_TYPE(R_386_GOTOFF), 191 REL_TYPE(R_386_GOTOFF),
156 REL_TYPE(R_386_GOTPC), 192 REL_TYPE(R_386_GOTPC),
193 REL_TYPE(R_386_8),
194 REL_TYPE(R_386_PC8),
195 REL_TYPE(R_386_16),
196 REL_TYPE(R_386_PC16),
157#undef REL_TYPE 197#undef REL_TYPE
158 }; 198 };
159 const char *name = "unknown type rel type name"; 199 const char *name = "unknown type rel type name";
@@ -189,7 +229,7 @@ static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym)
189 name = sym_strtab + sym->st_name; 229 name = sym_strtab + sym->st_name;
190 } 230 }
191 else { 231 else {
192 name = sec_name(secs[sym->st_shndx].shdr.sh_name); 232 name = sec_name(sym->st_shndx);
193 } 233 }
194 return name; 234 return name;
195} 235}
@@ -403,13 +443,11 @@ static void print_absolute_symbols(void)
403 for (i = 0; i < ehdr.e_shnum; i++) { 443 for (i = 0; i < ehdr.e_shnum; i++) {
404 struct section *sec = &secs[i]; 444 struct section *sec = &secs[i];
405 char *sym_strtab; 445 char *sym_strtab;
406 Elf32_Sym *sh_symtab;
407 int j; 446 int j;
408 447
409 if (sec->shdr.sh_type != SHT_SYMTAB) { 448 if (sec->shdr.sh_type != SHT_SYMTAB) {
410 continue; 449 continue;
411 } 450 }
412 sh_symtab = sec->symtab;
413 sym_strtab = sec->link->strtab; 451 sym_strtab = sec->link->strtab;
414 for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) { 452 for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) {
415 Elf32_Sym *sym; 453 Elf32_Sym *sym;
@@ -474,7 +512,7 @@ static void print_absolute_relocs(void)
474 * Before warning check if this absolute symbol 512 * Before warning check if this absolute symbol
475 * relocation is harmless. 513 * relocation is harmless.
476 */ 514 */
477 if (is_abs_reloc(name) || is_rel_reloc(name)) 515 if (is_reloc(S_ABS, name) || is_reloc(S_REL, name))
478 continue; 516 continue;
479 517
480 if (!printed) { 518 if (!printed) {
@@ -498,7 +536,8 @@ static void print_absolute_relocs(void)
498 printf("\n"); 536 printf("\n");
499} 537}
500 538
501static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) 539static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym),
540 int use_real_mode)
502{ 541{
503 int i; 542 int i;
504 /* Walk through the relocations */ 543 /* Walk through the relocations */
@@ -523,30 +562,67 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
523 Elf32_Rel *rel; 562 Elf32_Rel *rel;
524 Elf32_Sym *sym; 563 Elf32_Sym *sym;
525 unsigned r_type; 564 unsigned r_type;
565 const char *symname;
566 int shn_abs;
567
526 rel = &sec->reltab[j]; 568 rel = &sec->reltab[j];
527 sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; 569 sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
528 r_type = ELF32_R_TYPE(rel->r_info); 570 r_type = ELF32_R_TYPE(rel->r_info);
529 /* Don't visit relocations to absolute symbols */ 571
530 if (sym->st_shndx == SHN_ABS && 572 shn_abs = sym->st_shndx == SHN_ABS;
531 !is_rel_reloc(sym_name(sym_strtab, sym))) { 573
532 continue;
533 }
534 switch (r_type) { 574 switch (r_type) {
535 case R_386_NONE: 575 case R_386_NONE:
536 case R_386_PC32: 576 case R_386_PC32:
577 case R_386_PC16:
578 case R_386_PC8:
537 /* 579 /*
538 * NONE can be ignored and and PC relative 580 * NONE can be ignored and and PC relative
539 * relocations don't need to be adjusted. 581 * relocations don't need to be adjusted.
540 */ 582 */
541 break; 583 break;
584
585 case R_386_16:
586 symname = sym_name(sym_strtab, sym);
587 if (!use_real_mode)
588 goto bad;
589 if (shn_abs) {
590 if (is_reloc(S_ABS, symname))
591 break;
592 else if (!is_reloc(S_SEG, symname))
593 goto bad;
594 } else {
595 if (is_reloc(S_LIN, symname))
596 goto bad;
597 else
598 break;
599 }
600 visit(rel, sym);
601 break;
602
542 case R_386_32: 603 case R_386_32:
543 /* Visit relocations that need to be adjusted */ 604 symname = sym_name(sym_strtab, sym);
605 if (shn_abs) {
606 if (is_reloc(S_ABS, symname))
607 break;
608 else if (!is_reloc(S_REL, symname))
609 goto bad;
610 } else {
611 if (use_real_mode &&
612 !is_reloc(S_LIN, symname))
613 break;
614 }
544 visit(rel, sym); 615 visit(rel, sym);
545 break; 616 break;
546 default: 617 default:
547 die("Unsupported relocation type: %s (%d)\n", 618 die("Unsupported relocation type: %s (%d)\n",
548 rel_type(r_type), r_type); 619 rel_type(r_type), r_type);
549 break; 620 break;
621 bad:
622 symname = sym_name(sym_strtab, sym);
623 die("Invalid %s %s relocation: %s\n",
624 shn_abs ? "absolute" : "relative",
625 rel_type(r_type), symname);
550 } 626 }
551 } 627 }
552 } 628 }
@@ -554,13 +630,19 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
554 630
555static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym) 631static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym)
556{ 632{
557 reloc_count += 1; 633 if (ELF32_R_TYPE(rel->r_info) == R_386_16)
634 reloc16_count++;
635 else
636 reloc_count++;
558} 637}
559 638
560static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym) 639static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym)
561{ 640{
562 /* Remember the address that needs to be adjusted. */ 641 /* Remember the address that needs to be adjusted. */
563 relocs[reloc_idx++] = rel->r_offset; 642 if (ELF32_R_TYPE(rel->r_info) == R_386_16)
643 relocs16[reloc16_idx++] = rel->r_offset;
644 else
645 relocs[reloc_idx++] = rel->r_offset;
564} 646}
565 647
566static int cmp_relocs(const void *va, const void *vb) 648static int cmp_relocs(const void *va, const void *vb)
@@ -570,23 +652,41 @@ static int cmp_relocs(const void *va, const void *vb)
570 return (*a == *b)? 0 : (*a > *b)? 1 : -1; 652 return (*a == *b)? 0 : (*a > *b)? 1 : -1;
571} 653}
572 654
573static void emit_relocs(int as_text) 655static int write32(unsigned int v, FILE *f)
656{
657 unsigned char buf[4];
658
659 put_unaligned_le32(v, buf);
660 return fwrite(buf, 1, 4, f) == 4 ? 0 : -1;
661}
662
663static void emit_relocs(int as_text, int use_real_mode)
574{ 664{
575 int i; 665 int i;
576 /* Count how many relocations I have and allocate space for them. */ 666 /* Count how many relocations I have and allocate space for them. */
577 reloc_count = 0; 667 reloc_count = 0;
578 walk_relocs(count_reloc); 668 walk_relocs(count_reloc, use_real_mode);
579 relocs = malloc(reloc_count * sizeof(relocs[0])); 669 relocs = malloc(reloc_count * sizeof(relocs[0]));
580 if (!relocs) { 670 if (!relocs) {
581 die("malloc of %d entries for relocs failed\n", 671 die("malloc of %d entries for relocs failed\n",
582 reloc_count); 672 reloc_count);
583 } 673 }
674
675 relocs16 = malloc(reloc16_count * sizeof(relocs[0]));
676 if (!relocs16) {
677 die("malloc of %d entries for relocs16 failed\n",
678 reloc16_count);
679 }
584 /* Collect up the relocations */ 680 /* Collect up the relocations */
585 reloc_idx = 0; 681 reloc_idx = 0;
586 walk_relocs(collect_reloc); 682 walk_relocs(collect_reloc, use_real_mode);
683
684 if (reloc16_count && !use_real_mode)
685 die("Segment relocations found but --realmode not specified\n");
587 686
588 /* Order the relocations for more efficient processing */ 687 /* Order the relocations for more efficient processing */
589 qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs); 688 qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs);
689 qsort(relocs16, reloc16_count, sizeof(relocs16[0]), cmp_relocs);
590 690
591 /* Print the relocations */ 691 /* Print the relocations */
592 if (as_text) { 692 if (as_text) {
@@ -595,58 +695,83 @@ static void emit_relocs(int as_text)
595 */ 695 */
596 printf(".section \".data.reloc\",\"a\"\n"); 696 printf(".section \".data.reloc\",\"a\"\n");
597 printf(".balign 4\n"); 697 printf(".balign 4\n");
598 for (i = 0; i < reloc_count; i++) { 698 if (use_real_mode) {
599 printf("\t .long 0x%08lx\n", relocs[i]); 699 printf("\t.long %lu\n", reloc16_count);
700 for (i = 0; i < reloc16_count; i++)
701 printf("\t.long 0x%08lx\n", relocs16[i]);
702 printf("\t.long %lu\n", reloc_count);
703 for (i = 0; i < reloc_count; i++) {
704 printf("\t.long 0x%08lx\n", relocs[i]);
705 }
706 } else {
707 /* Print a stop */
708 printf("\t.long 0x%08lx\n", (unsigned long)0);
709 for (i = 0; i < reloc_count; i++) {
710 printf("\t.long 0x%08lx\n", relocs[i]);
711 }
600 } 712 }
713
601 printf("\n"); 714 printf("\n");
602 } 715 }
603 else { 716 else {
604 unsigned char buf[4]; 717 if (use_real_mode) {
605 /* Print a stop */ 718 write32(reloc16_count, stdout);
606 fwrite("\0\0\0\0", 4, 1, stdout); 719 for (i = 0; i < reloc16_count; i++)
607 /* Now print each relocation */ 720 write32(relocs16[i], stdout);
608 for (i = 0; i < reloc_count; i++) { 721 write32(reloc_count, stdout);
609 put_unaligned_le32(relocs[i], buf); 722
610 fwrite(buf, 4, 1, stdout); 723 /* Now print each relocation */
724 for (i = 0; i < reloc_count; i++)
725 write32(relocs[i], stdout);
726 } else {
727 /* Print a stop */
728 write32(0, stdout);
729
730 /* Now print each relocation */
731 for (i = 0; i < reloc_count; i++) {
732 write32(relocs[i], stdout);
733 }
611 } 734 }
612 } 735 }
613} 736}
614 737
615static void usage(void) 738static void usage(void)
616{ 739{
617 die("relocs [--abs-syms |--abs-relocs | --text] vmlinux\n"); 740 die("relocs [--abs-syms|--abs-relocs|--text|--realmode] vmlinux\n");
618} 741}
619 742
620int main(int argc, char **argv) 743int main(int argc, char **argv)
621{ 744{
622 int show_absolute_syms, show_absolute_relocs; 745 int show_absolute_syms, show_absolute_relocs;
623 int as_text; 746 int as_text, use_real_mode;
624 const char *fname; 747 const char *fname;
625 FILE *fp; 748 FILE *fp;
626 int i; 749 int i;
627 750
628 regex_init();
629
630 show_absolute_syms = 0; 751 show_absolute_syms = 0;
631 show_absolute_relocs = 0; 752 show_absolute_relocs = 0;
632 as_text = 0; 753 as_text = 0;
754 use_real_mode = 0;
633 fname = NULL; 755 fname = NULL;
634 for (i = 1; i < argc; i++) { 756 for (i = 1; i < argc; i++) {
635 char *arg = argv[i]; 757 char *arg = argv[i];
636 if (*arg == '-') { 758 if (*arg == '-') {
637 if (strcmp(argv[1], "--abs-syms") == 0) { 759 if (strcmp(arg, "--abs-syms") == 0) {
638 show_absolute_syms = 1; 760 show_absolute_syms = 1;
639 continue; 761 continue;
640 } 762 }
641 763 if (strcmp(arg, "--abs-relocs") == 0) {
642 if (strcmp(argv[1], "--abs-relocs") == 0) {
643 show_absolute_relocs = 1; 764 show_absolute_relocs = 1;
644 continue; 765 continue;
645 } 766 }
646 else if (strcmp(argv[1], "--text") == 0) { 767 if (strcmp(arg, "--text") == 0) {
647 as_text = 1; 768 as_text = 1;
648 continue; 769 continue;
649 } 770 }
771 if (strcmp(arg, "--realmode") == 0) {
772 use_real_mode = 1;
773 continue;
774 }
650 } 775 }
651 else if (!fname) { 776 else if (!fname) {
652 fname = arg; 777 fname = arg;
@@ -657,6 +782,7 @@ int main(int argc, char **argv)
657 if (!fname) { 782 if (!fname) {
658 usage(); 783 usage();
659 } 784 }
785 regex_init(use_real_mode);
660 fp = fopen(fname, "r"); 786 fp = fopen(fname, "r");
661 if (!fp) { 787 if (!fp) {
662 die("Cannot open %s: %s\n", 788 die("Cannot open %s: %s\n",
@@ -675,6 +801,6 @@ int main(int argc, char **argv)
675 print_absolute_relocs(); 801 print_absolute_relocs();
676 return 0; 802 return 0;
677 } 803 }
678 emit_relocs(as_text); 804 emit_relocs(as_text, use_real_mode);
679 return 0; 805 return 0;
680} 806}
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
new file mode 100644
index 000000000000..7d01b8c56c00
--- /dev/null
+++ b/arch/x86/um/asm/barrier.h
@@ -0,0 +1,75 @@
1#ifndef _ASM_UM_BARRIER_H_
2#define _ASM_UM_BARRIER_H_
3
4#include <asm/asm.h>
5#include <asm/segment.h>
6#include <asm/cpufeature.h>
7#include <asm/cmpxchg.h>
8#include <asm/nops.h>
9
10#include <linux/kernel.h>
11#include <linux/irqflags.h>
12
13/*
14 * Force strict CPU ordering.
15 * And yes, this is required on UP too when we're talking
16 * to devices.
17 */
18#ifdef CONFIG_X86_32
19
20#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
21#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
22#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
23
24#else /* CONFIG_X86_32 */
25
26#define mb() asm volatile("mfence" : : : "memory")
27#define rmb() asm volatile("lfence" : : : "memory")
28#define wmb() asm volatile("sfence" : : : "memory")
29
30#endif /* CONFIG_X86_32 */
31
32#define read_barrier_depends() do { } while (0)
33
34#ifdef CONFIG_SMP
35
36#define smp_mb() mb()
37#ifdef CONFIG_X86_PPRO_FENCE
38#define smp_rmb() rmb()
39#else /* CONFIG_X86_PPRO_FENCE */
40#define smp_rmb() barrier()
41#endif /* CONFIG_X86_PPRO_FENCE */
42
43#ifdef CONFIG_X86_OOSTORE
44#define smp_wmb() wmb()
45#else /* CONFIG_X86_OOSTORE */
46#define smp_wmb() barrier()
47#endif /* CONFIG_X86_OOSTORE */
48
49#define smp_read_barrier_depends() read_barrier_depends()
50#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
51
52#else /* CONFIG_SMP */
53
54#define smp_mb() barrier()
55#define smp_rmb() barrier()
56#define smp_wmb() barrier()
57#define smp_read_barrier_depends() do { } while (0)
58#define set_mb(var, value) do { var = value; barrier(); } while (0)
59
60#endif /* CONFIG_SMP */
61
62/*
63 * Stop RDTSC speculation. This is needed when you need to use RDTSC
64 * (or get_cycles or vread that possibly accesses the TSC) in a defined
65 * code region.
66 *
67 * (Could use an alternative three way for this if there was one.)
68 */
69static inline void rdtsc_barrier(void)
70{
71 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
72 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
73}
74
75#endif
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index f3b0633b69a1..0e07adc8cbe4 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -34,25 +34,25 @@
34#define ELF_ARCH EM_386 34#define ELF_ARCH EM_386
35 35
36#define ELF_PLAT_INIT(regs, load_addr) do { \ 36#define ELF_PLAT_INIT(regs, load_addr) do { \
37 PT_REGS_EBX(regs) = 0; \ 37 PT_REGS_BX(regs) = 0; \
38 PT_REGS_ECX(regs) = 0; \ 38 PT_REGS_CX(regs) = 0; \
39 PT_REGS_EDX(regs) = 0; \ 39 PT_REGS_DX(regs) = 0; \
40 PT_REGS_ESI(regs) = 0; \ 40 PT_REGS_SI(regs) = 0; \
41 PT_REGS_EDI(regs) = 0; \ 41 PT_REGS_DI(regs) = 0; \
42 PT_REGS_EBP(regs) = 0; \ 42 PT_REGS_BP(regs) = 0; \
43 PT_REGS_EAX(regs) = 0; \ 43 PT_REGS_AX(regs) = 0; \
44} while (0) 44} while (0)
45 45
46/* Shamelessly stolen from include/asm-i386/elf.h */ 46/* Shamelessly stolen from include/asm-i386/elf.h */
47 47
48#define ELF_CORE_COPY_REGS(pr_reg, regs) do { \ 48#define ELF_CORE_COPY_REGS(pr_reg, regs) do { \
49 pr_reg[0] = PT_REGS_EBX(regs); \ 49 pr_reg[0] = PT_REGS_BX(regs); \
50 pr_reg[1] = PT_REGS_ECX(regs); \ 50 pr_reg[1] = PT_REGS_CX(regs); \
51 pr_reg[2] = PT_REGS_EDX(regs); \ 51 pr_reg[2] = PT_REGS_DX(regs); \
52 pr_reg[3] = PT_REGS_ESI(regs); \ 52 pr_reg[3] = PT_REGS_SI(regs); \
53 pr_reg[4] = PT_REGS_EDI(regs); \ 53 pr_reg[4] = PT_REGS_DI(regs); \
54 pr_reg[5] = PT_REGS_EBP(regs); \ 54 pr_reg[5] = PT_REGS_BP(regs); \
55 pr_reg[6] = PT_REGS_EAX(regs); \ 55 pr_reg[6] = PT_REGS_AX(regs); \
56 pr_reg[7] = PT_REGS_DS(regs); \ 56 pr_reg[7] = PT_REGS_DS(regs); \
57 pr_reg[8] = PT_REGS_ES(regs); \ 57 pr_reg[8] = PT_REGS_ES(regs); \
58 /* fake once used fs and gs selectors? */ \ 58 /* fake once used fs and gs selectors? */ \
@@ -130,13 +130,13 @@ do { \
130#define ELF_ARCH EM_X86_64 130#define ELF_ARCH EM_X86_64
131 131
132#define ELF_PLAT_INIT(regs, load_addr) do { \ 132#define ELF_PLAT_INIT(regs, load_addr) do { \
133 PT_REGS_RBX(regs) = 0; \ 133 PT_REGS_BX(regs) = 0; \
134 PT_REGS_RCX(regs) = 0; \ 134 PT_REGS_CX(regs) = 0; \
135 PT_REGS_RDX(regs) = 0; \ 135 PT_REGS_DX(regs) = 0; \
136 PT_REGS_RSI(regs) = 0; \ 136 PT_REGS_SI(regs) = 0; \
137 PT_REGS_RDI(regs) = 0; \ 137 PT_REGS_DI(regs) = 0; \
138 PT_REGS_RBP(regs) = 0; \ 138 PT_REGS_BP(regs) = 0; \
139 PT_REGS_RAX(regs) = 0; \ 139 PT_REGS_AX(regs) = 0; \
140 PT_REGS_R8(regs) = 0; \ 140 PT_REGS_R8(regs) = 0; \
141 PT_REGS_R9(regs) = 0; \ 141 PT_REGS_R9(regs) = 0; \
142 PT_REGS_R10(regs) = 0; \ 142 PT_REGS_R10(regs) = 0; \
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
index c8aca8c501b0..950dfb7b8417 100644
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h
@@ -1,5 +1,39 @@
1#ifndef __UM_X86_PTRACE_H
2#define __UM_X86_PTRACE_H
3
1#ifdef CONFIG_X86_32 4#ifdef CONFIG_X86_32
2# include "ptrace_32.h" 5# include "ptrace_32.h"
3#else 6#else
4# include "ptrace_64.h" 7# include "ptrace_64.h"
5#endif 8#endif
9
10#define PT_REGS_AX(r) UPT_AX(&(r)->regs)
11#define PT_REGS_BX(r) UPT_BX(&(r)->regs)
12#define PT_REGS_CX(r) UPT_CX(&(r)->regs)
13#define PT_REGS_DX(r) UPT_DX(&(r)->regs)
14
15#define PT_REGS_SI(r) UPT_SI(&(r)->regs)
16#define PT_REGS_DI(r) UPT_DI(&(r)->regs)
17#define PT_REGS_BP(r) UPT_BP(&(r)->regs)
18#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
19
20#define PT_REGS_CS(r) UPT_CS(&(r)->regs)
21#define PT_REGS_SS(r) UPT_SS(&(r)->regs)
22#define PT_REGS_DS(r) UPT_DS(&(r)->regs)
23#define PT_REGS_ES(r) UPT_ES(&(r)->regs)
24
25#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_AX(r)
26#define PT_REGS_SYSCALL_RET(r) PT_REGS_AX(r)
27
28#define PT_FIX_EXEC_STACK(sp) do ; while(0)
29
30#define profile_pc(regs) PT_REGS_IP(regs)
31
32#define UPT_RESTART_SYSCALL(r) (UPT_IP(r) -= 2)
33#define UPT_SET_SYSCALL_RETURN(r, res) (UPT_AX(r) = (res))
34
35static inline long regs_return_value(struct uml_pt_regs *regs)
36{
37 return UPT_AX(regs);
38}
39#endif /* __UM_X86_PTRACE_H */
diff --git a/arch/x86/um/asm/ptrace_32.h b/arch/x86/um/asm/ptrace_32.h
index 5d2a59112537..2cf225351b65 100644
--- a/arch/x86/um/asm/ptrace_32.h
+++ b/arch/x86/um/asm/ptrace_32.h
@@ -11,29 +11,6 @@
11#include "linux/compiler.h" 11#include "linux/compiler.h"
12#include "asm/ptrace-generic.h" 12#include "asm/ptrace-generic.h"
13 13
14#define PT_REGS_EAX(r) UPT_EAX(&(r)->regs)
15#define PT_REGS_EBX(r) UPT_EBX(&(r)->regs)
16#define PT_REGS_ECX(r) UPT_ECX(&(r)->regs)
17#define PT_REGS_EDX(r) UPT_EDX(&(r)->regs)
18#define PT_REGS_ESI(r) UPT_ESI(&(r)->regs)
19#define PT_REGS_EDI(r) UPT_EDI(&(r)->regs)
20#define PT_REGS_EBP(r) UPT_EBP(&(r)->regs)
21
22#define PT_REGS_CS(r) UPT_CS(&(r)->regs)
23#define PT_REGS_SS(r) UPT_SS(&(r)->regs)
24#define PT_REGS_DS(r) UPT_DS(&(r)->regs)
25#define PT_REGS_ES(r) UPT_ES(&(r)->regs)
26#define PT_REGS_FS(r) UPT_FS(&(r)->regs)
27#define PT_REGS_GS(r) UPT_GS(&(r)->regs)
28
29#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
30
31#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_EAX(r)
32#define PT_REGS_SYSCALL_RET(r) PT_REGS_EAX(r)
33#define PT_FIX_EXEC_STACK(sp) do ; while(0)
34
35#define profile_pc(regs) PT_REGS_IP(regs)
36
37#define user_mode(r) UPT_IS_USER(&(r)->regs) 14#define user_mode(r) UPT_IS_USER(&(r)->regs)
38 15
39/* 16/*
diff --git a/arch/x86/um/asm/ptrace_64.h b/arch/x86/um/asm/ptrace_64.h
index 706a0d80545c..ea7bff394320 100644
--- a/arch/x86/um/asm/ptrace_64.h
+++ b/arch/x86/um/asm/ptrace_64.h
@@ -15,13 +15,6 @@
15 15
16#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64 16#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
17 17
18#define PT_REGS_RBX(r) UPT_RBX(&(r)->regs)
19#define PT_REGS_RCX(r) UPT_RCX(&(r)->regs)
20#define PT_REGS_RDX(r) UPT_RDX(&(r)->regs)
21#define PT_REGS_RSI(r) UPT_RSI(&(r)->regs)
22#define PT_REGS_RDI(r) UPT_RDI(&(r)->regs)
23#define PT_REGS_RBP(r) UPT_RBP(&(r)->regs)
24#define PT_REGS_RAX(r) UPT_RAX(&(r)->regs)
25#define PT_REGS_R8(r) UPT_R8(&(r)->regs) 18#define PT_REGS_R8(r) UPT_R8(&(r)->regs)
26#define PT_REGS_R9(r) UPT_R9(&(r)->regs) 19#define PT_REGS_R9(r) UPT_R9(&(r)->regs)
27#define PT_REGS_R10(r) UPT_R10(&(r)->regs) 20#define PT_REGS_R10(r) UPT_R10(&(r)->regs)
@@ -31,27 +24,8 @@
31#define PT_REGS_R14(r) UPT_R14(&(r)->regs) 24#define PT_REGS_R14(r) UPT_R14(&(r)->regs)
32#define PT_REGS_R15(r) UPT_R15(&(r)->regs) 25#define PT_REGS_R15(r) UPT_R15(&(r)->regs)
33 26
34#define PT_REGS_FS(r) UPT_FS(&(r)->regs)
35#define PT_REGS_GS(r) UPT_GS(&(r)->regs)
36#define PT_REGS_DS(r) UPT_DS(&(r)->regs)
37#define PT_REGS_ES(r) UPT_ES(&(r)->regs)
38#define PT_REGS_SS(r) UPT_SS(&(r)->regs)
39#define PT_REGS_CS(r) UPT_CS(&(r)->regs)
40
41#define PT_REGS_ORIG_RAX(r) UPT_ORIG_RAX(&(r)->regs)
42#define PT_REGS_RIP(r) UPT_IP(&(r)->regs)
43#define PT_REGS_SP(r) UPT_SP(&(r)->regs)
44
45#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs)
46
47/* XXX */ 27/* XXX */
48#define user_mode(r) UPT_IS_USER(&(r)->regs) 28#define user_mode(r) UPT_IS_USER(&(r)->regs)
49#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_RAX(r)
50#define PT_REGS_SYSCALL_RET(r) PT_REGS_RAX(r)
51
52#define PT_FIX_EXEC_STACK(sp) do ; while(0)
53
54#define profile_pc(regs) PT_REGS_IP(regs)
55 29
56struct user_desc; 30struct user_desc;
57 31
diff --git a/arch/x86/um/asm/system.h b/arch/x86/um/asm/system.h
deleted file mode 100644
index a459fd9b7598..000000000000
--- a/arch/x86/um/asm/system.h
+++ /dev/null
@@ -1,135 +0,0 @@
1#ifndef _ASM_X86_SYSTEM_H_
2#define _ASM_X86_SYSTEM_H_
3
4#include <asm/asm.h>
5#include <asm/segment.h>
6#include <asm/cpufeature.h>
7#include <asm/cmpxchg.h>
8#include <asm/nops.h>
9
10#include <linux/kernel.h>
11#include <linux/irqflags.h>
12
13/* entries in ARCH_DLINFO: */
14#ifdef CONFIG_IA32_EMULATION
15# define AT_VECTOR_SIZE_ARCH 2
16#else
17# define AT_VECTOR_SIZE_ARCH 1
18#endif
19
20extern unsigned long arch_align_stack(unsigned long sp);
21
22void default_idle(void);
23
24/*
25 * Force strict CPU ordering.
26 * And yes, this is required on UP too when we're talking
27 * to devices.
28 */
29#ifdef CONFIG_X86_32
30/*
31 * Some non-Intel clones support out of order store. wmb() ceases to be a
32 * nop for these.
33 */
34#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
35#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
36#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
37#else
38#define mb() asm volatile("mfence":::"memory")
39#define rmb() asm volatile("lfence":::"memory")
40#define wmb() asm volatile("sfence" ::: "memory")
41#endif
42
43/**
44 * read_barrier_depends - Flush all pending reads that subsequents reads
45 * depend on.
46 *
47 * No data-dependent reads from memory-like regions are ever reordered
48 * over this barrier. All reads preceding this primitive are guaranteed
49 * to access memory (but not necessarily other CPUs' caches) before any
50 * reads following this primitive that depend on the data return by
51 * any of the preceding reads. This primitive is much lighter weight than
52 * rmb() on most CPUs, and is never heavier weight than is
53 * rmb().
54 *
55 * These ordering constraints are respected by both the local CPU
56 * and the compiler.
57 *
58 * Ordering is not guaranteed by anything other than these primitives,
59 * not even by data dependencies. See the documentation for
60 * memory_barrier() for examples and URLs to more information.
61 *
62 * For example, the following code would force ordering (the initial
63 * value of "a" is zero, "b" is one, and "p" is "&a"):
64 *
65 * <programlisting>
66 * CPU 0 CPU 1
67 *
68 * b = 2;
69 * memory_barrier();
70 * p = &b; q = p;
71 * read_barrier_depends();
72 * d = *q;
73 * </programlisting>
74 *
75 * because the read of "*q" depends on the read of "p" and these
76 * two reads are separated by a read_barrier_depends(). However,
77 * the following code, with the same initial values for "a" and "b":
78 *
79 * <programlisting>
80 * CPU 0 CPU 1
81 *
82 * a = 2;
83 * memory_barrier();
84 * b = 3; y = b;
85 * read_barrier_depends();
86 * x = a;
87 * </programlisting>
88 *
89 * does not enforce ordering, since there is no data dependency between
90 * the read of "a" and the read of "b". Therefore, on some CPUs, such
91 * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
92 * in cases like this where there are no data dependencies.
93 **/
94
95#define read_barrier_depends() do { } while (0)
96
97#ifdef CONFIG_SMP
98#define smp_mb() mb()
99#ifdef CONFIG_X86_PPRO_FENCE
100# define smp_rmb() rmb()
101#else
102# define smp_rmb() barrier()
103#endif
104#ifdef CONFIG_X86_OOSTORE
105# define smp_wmb() wmb()
106#else
107# define smp_wmb() barrier()
108#endif
109#define smp_read_barrier_depends() read_barrier_depends()
110#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
111#else
112#define smp_mb() barrier()
113#define smp_rmb() barrier()
114#define smp_wmb() barrier()
115#define smp_read_barrier_depends() do { } while (0)
116#define set_mb(var, value) do { var = value; barrier(); } while (0)
117#endif
118
119/*
120 * Stop RDTSC speculation. This is needed when you need to use RDTSC
121 * (or get_cycles or vread that possibly accesses the TSC) in a defined
122 * code region.
123 *
124 * (Could use an alternative three way for this if there was one.)
125 */
126static inline void rdtsc_barrier(void)
127{
128 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
129 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
130}
131
132extern void *_switch_to(void *prev, void *next, void *last);
133#define switch_to(prev, next, last) prev = _switch_to(prev, next, last)
134
135#endif
diff --git a/arch/x86/um/checksum_32.S b/arch/x86/um/checksum_32.S
index f058d2f82e18..8d0c420465cc 100644
--- a/arch/x86/um/checksum_32.S
+++ b/arch/x86/um/checksum_32.S
@@ -26,6 +26,7 @@
26 */ 26 */
27 27
28#include <asm/errno.h> 28#include <asm/errno.h>
29#include <asm/asm.h>
29 30
30/* 31/*
31 * computes a partial checksum, e.g. for TCP/UDP fragments 32 * computes a partial checksum, e.g. for TCP/UDP fragments
@@ -232,15 +233,11 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
232 233
233#define SRC(y...) \ 234#define SRC(y...) \
234 9999: y; \ 235 9999: y; \
235 .section __ex_table, "a"; \ 236 _ASM_EXTABLE(9999b, 6001f)
236 .long 9999b, 6001f ; \
237 .previous
238 237
239#define DST(y...) \ 238#define DST(y...) \
240 9999: y; \ 239 9999: y; \
241 .section __ex_table, "a"; \ 240 _ASM_EXTABLE(9999b, 6002f)
242 .long 9999b, 6002f ; \
243 .previous
244 241
245.align 4 242.align 4
246 243
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
index 2bbe1ec2d96a..6ce2d76eb908 100644
--- a/arch/x86/um/shared/sysdep/ptrace.h
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -1,15 +1,74 @@
1#ifndef __SYSDEP_X86_PTRACE_H 1#ifndef __SYSDEP_X86_PTRACE_H
2#define __SYSDEP_X86_PTRACE_H 2#define __SYSDEP_X86_PTRACE_H
3 3
4#include <generated/user_constants.h>
5#include "sysdep/faultinfo.h"
6
7#define MAX_REG_OFFSET (UM_FRAME_SIZE)
8#define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long))
9
10#define REGS_IP(r) ((r)[HOST_IP])
11#define REGS_SP(r) ((r)[HOST_SP])
12#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
13#define REGS_AX(r) ((r)[HOST_AX])
14#define REGS_BX(r) ((r)[HOST_BX])
15#define REGS_CX(r) ((r)[HOST_CX])
16#define REGS_DX(r) ((r)[HOST_DX])
17#define REGS_SI(r) ((r)[HOST_SI])
18#define REGS_DI(r) ((r)[HOST_DI])
19#define REGS_BP(r) ((r)[HOST_BP])
20#define REGS_CS(r) ((r)[HOST_CS])
21#define REGS_SS(r) ((r)[HOST_SS])
22#define REGS_DS(r) ((r)[HOST_DS])
23#define REGS_ES(r) ((r)[HOST_ES])
24
25#define UPT_IP(r) REGS_IP((r)->gp)
26#define UPT_SP(r) REGS_SP((r)->gp)
27#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
28#define UPT_AX(r) REGS_AX((r)->gp)
29#define UPT_BX(r) REGS_BX((r)->gp)
30#define UPT_CX(r) REGS_CX((r)->gp)
31#define UPT_DX(r) REGS_DX((r)->gp)
32#define UPT_SI(r) REGS_SI((r)->gp)
33#define UPT_DI(r) REGS_DI((r)->gp)
34#define UPT_BP(r) REGS_BP((r)->gp)
35#define UPT_CS(r) REGS_CS((r)->gp)
36#define UPT_SS(r) REGS_SS((r)->gp)
37#define UPT_DS(r) REGS_DS((r)->gp)
38#define UPT_ES(r) REGS_ES((r)->gp)
39
4#ifdef __i386__ 40#ifdef __i386__
5#include "ptrace_32.h" 41#include "ptrace_32.h"
6#else 42#else
7#include "ptrace_64.h" 43#include "ptrace_64.h"
8#endif 44#endif
9 45
10static inline long regs_return_value(struct uml_pt_regs *regs) 46struct syscall_args {
11{ 47 unsigned long args[6];
12 return UPT_SYSCALL_RET(regs); 48};
13} 49
50#define SYSCALL_ARGS(r) ((struct syscall_args) \
51 { .args = { UPT_SYSCALL_ARG1(r), \
52 UPT_SYSCALL_ARG2(r), \
53 UPT_SYSCALL_ARG3(r), \
54 UPT_SYSCALL_ARG4(r), \
55 UPT_SYSCALL_ARG5(r), \
56 UPT_SYSCALL_ARG6(r) } } )
57
58struct uml_pt_regs {
59 unsigned long gp[MAX_REG_NR];
60 unsigned long fp[MAX_FP_NR];
61 struct faultinfo faultinfo;
62 long syscall;
63 int is_user;
64};
65
66#define EMPTY_UML_PT_REGS { }
67
68#define UPT_SYSCALL_NR(r) ((r)->syscall)
69#define UPT_FAULTINFO(r) (&(r)->faultinfo)
70#define UPT_IS_USER(r) ((r)->is_user)
71
72extern int user_context(unsigned long sp);
14 73
15#endif /* __SYSDEP_X86_PTRACE_H */ 74#endif /* __SYSDEP_X86_PTRACE_H */
diff --git a/arch/x86/um/shared/sysdep/ptrace_32.h b/arch/x86/um/shared/sysdep/ptrace_32.h
index befd1df32ed0..b94a108de1dc 100644
--- a/arch/x86/um/shared/sysdep/ptrace_32.h
+++ b/arch/x86/um/shared/sysdep/ptrace_32.h
@@ -6,11 +6,7 @@
6#ifndef __SYSDEP_I386_PTRACE_H 6#ifndef __SYSDEP_I386_PTRACE_H
7#define __SYSDEP_I386_PTRACE_H 7#define __SYSDEP_I386_PTRACE_H
8 8
9#include <generated/user_constants.h> 9#define MAX_FP_NR HOST_FPX_SIZE
10#include "sysdep/faultinfo.h"
11
12#define MAX_REG_NR (UM_FRAME_SIZE / sizeof(unsigned long))
13#define MAX_REG_OFFSET (UM_FRAME_SIZE)
14 10
15static inline void update_debugregs(int seq) {} 11static inline void update_debugregs(int seq) {}
16 12
@@ -24,90 +20,16 @@ void set_using_sysemu(int value);
24int get_using_sysemu(void); 20int get_using_sysemu(void);
25extern int sysemu_supported; 21extern int sysemu_supported;
26 22
27#define REGS_IP(r) ((r)[HOST_IP])
28#define REGS_SP(r) ((r)[HOST_SP])
29#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
30#define REGS_EAX(r) ((r)[HOST_AX])
31#define REGS_EBX(r) ((r)[HOST_BX])
32#define REGS_ECX(r) ((r)[HOST_CX])
33#define REGS_EDX(r) ((r)[HOST_DX])
34#define REGS_ESI(r) ((r)[HOST_SI])
35#define REGS_EDI(r) ((r)[HOST_DI])
36#define REGS_EBP(r) ((r)[HOST_BP])
37#define REGS_CS(r) ((r)[HOST_CS])
38#define REGS_SS(r) ((r)[HOST_SS])
39#define REGS_DS(r) ((r)[HOST_DS])
40#define REGS_ES(r) ((r)[HOST_ES])
41#define REGS_FS(r) ((r)[HOST_FS])
42#define REGS_GS(r) ((r)[HOST_GS])
43
44#define REGS_SET_SYSCALL_RETURN(r, res) REGS_EAX(r) = (res)
45
46#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
47#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
48
49#ifndef PTRACE_SYSEMU_SINGLESTEP 23#ifndef PTRACE_SYSEMU_SINGLESTEP
50#define PTRACE_SYSEMU_SINGLESTEP 32 24#define PTRACE_SYSEMU_SINGLESTEP 32
51#endif 25#endif
52 26
53struct uml_pt_regs { 27#define UPT_SYSCALL_ARG1(r) UPT_BX(r)
54 unsigned long gp[MAX_REG_NR]; 28#define UPT_SYSCALL_ARG2(r) UPT_CX(r)
55 unsigned long fp[HOST_FPX_SIZE]; 29#define UPT_SYSCALL_ARG3(r) UPT_DX(r)
56 struct faultinfo faultinfo; 30#define UPT_SYSCALL_ARG4(r) UPT_SI(r)
57 long syscall; 31#define UPT_SYSCALL_ARG5(r) UPT_DI(r)
58 int is_user; 32#define UPT_SYSCALL_ARG6(r) UPT_BP(r)
59};
60
61#define EMPTY_UML_PT_REGS { }
62
63#define UPT_IP(r) REGS_IP((r)->gp)
64#define UPT_SP(r) REGS_SP((r)->gp)
65#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
66#define UPT_EAX(r) REGS_EAX((r)->gp)
67#define UPT_EBX(r) REGS_EBX((r)->gp)
68#define UPT_ECX(r) REGS_ECX((r)->gp)
69#define UPT_EDX(r) REGS_EDX((r)->gp)
70#define UPT_ESI(r) REGS_ESI((r)->gp)
71#define UPT_EDI(r) REGS_EDI((r)->gp)
72#define UPT_EBP(r) REGS_EBP((r)->gp)
73#define UPT_ORIG_EAX(r) ((r)->syscall)
74#define UPT_CS(r) REGS_CS((r)->gp)
75#define UPT_SS(r) REGS_SS((r)->gp)
76#define UPT_DS(r) REGS_DS((r)->gp)
77#define UPT_ES(r) REGS_ES((r)->gp)
78#define UPT_FS(r) REGS_FS((r)->gp)
79#define UPT_GS(r) REGS_GS((r)->gp)
80
81#define UPT_SYSCALL_ARG1(r) UPT_EBX(r)
82#define UPT_SYSCALL_ARG2(r) UPT_ECX(r)
83#define UPT_SYSCALL_ARG3(r) UPT_EDX(r)
84#define UPT_SYSCALL_ARG4(r) UPT_ESI(r)
85#define UPT_SYSCALL_ARG5(r) UPT_EDI(r)
86#define UPT_SYSCALL_ARG6(r) UPT_EBP(r)
87
88extern int user_context(unsigned long sp);
89
90#define UPT_IS_USER(r) ((r)->is_user)
91
92struct syscall_args {
93 unsigned long args[6];
94};
95
96#define SYSCALL_ARGS(r) ((struct syscall_args) \
97 { .args = { UPT_SYSCALL_ARG1(r), \
98 UPT_SYSCALL_ARG2(r), \
99 UPT_SYSCALL_ARG3(r), \
100 UPT_SYSCALL_ARG4(r), \
101 UPT_SYSCALL_ARG5(r), \
102 UPT_SYSCALL_ARG6(r) } } )
103
104#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
105
106#define UPT_ORIG_SYSCALL(r) UPT_EAX(r)
107#define UPT_SYSCALL_NR(r) UPT_ORIG_EAX(r)
108#define UPT_SYSCALL_RET(r) UPT_EAX(r)
109
110#define UPT_FAULTINFO(r) (&(r)->faultinfo)
111 33
112extern void arch_init_registers(int pid); 34extern void arch_init_registers(int pid);
113 35
diff --git a/arch/x86/um/shared/sysdep/ptrace_64.h b/arch/x86/um/shared/sysdep/ptrace_64.h
index 031edc53ac57..919789f1071e 100644
--- a/arch/x86/um/shared/sysdep/ptrace_64.h
+++ b/arch/x86/um/shared/sysdep/ptrace_64.h
@@ -8,22 +8,8 @@
8#ifndef __SYSDEP_X86_64_PTRACE_H 8#ifndef __SYSDEP_X86_64_PTRACE_H
9#define __SYSDEP_X86_64_PTRACE_H 9#define __SYSDEP_X86_64_PTRACE_H
10 10
11#include <generated/user_constants.h> 11#define MAX_FP_NR HOST_FP_SIZE
12#include "sysdep/faultinfo.h"
13 12
14#define MAX_REG_OFFSET (UM_FRAME_SIZE)
15#define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long))
16
17#define REGS_IP(r) ((r)[HOST_IP])
18#define REGS_SP(r) ((r)[HOST_SP])
19
20#define REGS_RBX(r) ((r)[HOST_BX])
21#define REGS_RCX(r) ((r)[HOST_CX])
22#define REGS_RDX(r) ((r)[HOST_DX])
23#define REGS_RSI(r) ((r)[HOST_SI])
24#define REGS_RDI(r) ((r)[HOST_DI])
25#define REGS_RBP(r) ((r)[HOST_BP])
26#define REGS_RAX(r) ((r)[HOST_AX])
27#define REGS_R8(r) ((r)[HOST_R8]) 13#define REGS_R8(r) ((r)[HOST_R8])
28#define REGS_R9(r) ((r)[HOST_R9]) 14#define REGS_R9(r) ((r)[HOST_R9])
29#define REGS_R10(r) ((r)[HOST_R10]) 15#define REGS_R10(r) ((r)[HOST_R10])
@@ -32,9 +18,6 @@
32#define REGS_R13(r) ((r)[HOST_R13]) 18#define REGS_R13(r) ((r)[HOST_R13])
33#define REGS_R14(r) ((r)[HOST_R14]) 19#define REGS_R14(r) ((r)[HOST_R14])
34#define REGS_R15(r) ((r)[HOST_R15]) 20#define REGS_R15(r) ((r)[HOST_R15])
35#define REGS_CS(r) ((r)[HOST_CS])
36#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS])
37#define REGS_SS(r) ((r)[HOST_SS])
38 21
39#define HOST_FS_BASE 21 22#define HOST_FS_BASE 21
40#define HOST_GS_BASE 22 23#define HOST_GS_BASE 22
@@ -58,45 +41,6 @@
58#define GS (HOST_GS * sizeof(long)) 41#define GS (HOST_GS * sizeof(long))
59#endif 42#endif
60 43
61#define REGS_FS_BASE(r) ((r)[HOST_FS_BASE])
62#define REGS_GS_BASE(r) ((r)[HOST_GS_BASE])
63#define REGS_DS(r) ((r)[HOST_DS])
64#define REGS_ES(r) ((r)[HOST_ES])
65#define REGS_FS(r) ((r)[HOST_FS])
66#define REGS_GS(r) ((r)[HOST_GS])
67
68#define REGS_ORIG_RAX(r) ((r)[HOST_ORIG_AX])
69
70#define REGS_SET_SYSCALL_RETURN(r, res) REGS_RAX(r) = (res)
71
72#define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
73#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
74
75#define REGS_FAULT_ADDR(r) ((r)->fault_addr)
76
77#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type)
78
79#define REGS_TRAP(r) ((r)->trap_type)
80
81#define REGS_ERR(r) ((r)->fault_type)
82
83struct uml_pt_regs {
84 unsigned long gp[MAX_REG_NR];
85 unsigned long fp[HOST_FP_SIZE];
86 struct faultinfo faultinfo;
87 long syscall;
88 int is_user;
89};
90
91#define EMPTY_UML_PT_REGS { }
92
93#define UPT_RBX(r) REGS_RBX((r)->gp)
94#define UPT_RCX(r) REGS_RCX((r)->gp)
95#define UPT_RDX(r) REGS_RDX((r)->gp)
96#define UPT_RSI(r) REGS_RSI((r)->gp)
97#define UPT_RDI(r) REGS_RDI((r)->gp)
98#define UPT_RBP(r) REGS_RBP((r)->gp)
99#define UPT_RAX(r) REGS_RAX((r)->gp)
100#define UPT_R8(r) REGS_R8((r)->gp) 44#define UPT_R8(r) REGS_R8((r)->gp)
101#define UPT_R9(r) REGS_R9((r)->gp) 45#define UPT_R9(r) REGS_R9((r)->gp)
102#define UPT_R10(r) REGS_R10((r)->gp) 46#define UPT_R10(r) REGS_R10((r)->gp)
@@ -105,51 +49,14 @@ struct uml_pt_regs {
105#define UPT_R13(r) REGS_R13((r)->gp) 49#define UPT_R13(r) REGS_R13((r)->gp)
106#define UPT_R14(r) REGS_R14((r)->gp) 50#define UPT_R14(r) REGS_R14((r)->gp)
107#define UPT_R15(r) REGS_R15((r)->gp) 51#define UPT_R15(r) REGS_R15((r)->gp)
108#define UPT_CS(r) REGS_CS((r)->gp)
109#define UPT_FS_BASE(r) REGS_FS_BASE((r)->gp)
110#define UPT_FS(r) REGS_FS((r)->gp)
111#define UPT_GS_BASE(r) REGS_GS_BASE((r)->gp)
112#define UPT_GS(r) REGS_GS((r)->gp)
113#define UPT_DS(r) REGS_DS((r)->gp)
114#define UPT_ES(r) REGS_ES((r)->gp)
115#define UPT_CS(r) REGS_CS((r)->gp)
116#define UPT_SS(r) REGS_SS((r)->gp)
117#define UPT_ORIG_RAX(r) REGS_ORIG_RAX((r)->gp)
118
119#define UPT_IP(r) REGS_IP((r)->gp)
120#define UPT_SP(r) REGS_SP((r)->gp)
121
122#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
123#define UPT_SYSCALL_NR(r) ((r)->syscall)
124#define UPT_SYSCALL_RET(r) UPT_RAX(r)
125
126extern int user_context(unsigned long sp);
127 52
128#define UPT_IS_USER(r) ((r)->is_user) 53#define UPT_SYSCALL_ARG1(r) UPT_DI(r)
129 54#define UPT_SYSCALL_ARG2(r) UPT_SI(r)
130#define UPT_SYSCALL_ARG1(r) UPT_RDI(r) 55#define UPT_SYSCALL_ARG3(r) UPT_DX(r)
131#define UPT_SYSCALL_ARG2(r) UPT_RSI(r)
132#define UPT_SYSCALL_ARG3(r) UPT_RDX(r)
133#define UPT_SYSCALL_ARG4(r) UPT_R10(r) 56#define UPT_SYSCALL_ARG4(r) UPT_R10(r)
134#define UPT_SYSCALL_ARG5(r) UPT_R8(r) 57#define UPT_SYSCALL_ARG5(r) UPT_R8(r)
135#define UPT_SYSCALL_ARG6(r) UPT_R9(r) 58#define UPT_SYSCALL_ARG6(r) UPT_R9(r)
136 59
137struct syscall_args {
138 unsigned long args[6];
139};
140
141#define SYSCALL_ARGS(r) ((struct syscall_args) \
142 { .args = { UPT_SYSCALL_ARG1(r), \
143 UPT_SYSCALL_ARG2(r), \
144 UPT_SYSCALL_ARG3(r), \
145 UPT_SYSCALL_ARG4(r), \
146 UPT_SYSCALL_ARG5(r), \
147 UPT_SYSCALL_ARG6(r) } } )
148
149#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
150
151#define UPT_FAULTINFO(r) (&(r)->faultinfo)
152
153static inline void arch_init_registers(int pid) 60static inline void arch_init_registers(int pid)
154{ 61{
155} 62}
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 4883b9546016..bb0fb03b9f85 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -156,6 +156,9 @@ static int copy_sc_from_user(struct pt_regs *regs,
156 struct sigcontext sc; 156 struct sigcontext sc;
157 int err, pid; 157 int err, pid;
158 158
159 /* Always make any pending restarted system calls return -EINTR */
160 current_thread_info()->restart_block.fn = do_no_restart_syscall;
161
159 err = copy_from_user(&sc, from, sizeof(sc)); 162 err = copy_from_user(&sc, from, sizeof(sc));
160 if (err) 163 if (err)
161 return err; 164 return err;
@@ -410,9 +413,9 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
410 413
411 PT_REGS_SP(regs) = (unsigned long) frame; 414 PT_REGS_SP(regs) = (unsigned long) frame;
412 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; 415 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
413 PT_REGS_EAX(regs) = (unsigned long) sig; 416 PT_REGS_AX(regs) = (unsigned long) sig;
414 PT_REGS_EDX(regs) = (unsigned long) 0; 417 PT_REGS_DX(regs) = (unsigned long) 0;
415 PT_REGS_ECX(regs) = (unsigned long) 0; 418 PT_REGS_CX(regs) = (unsigned long) 0;
416 419
417 if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) 420 if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
418 ptrace_notify(SIGTRAP); 421 ptrace_notify(SIGTRAP);
@@ -460,9 +463,9 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
460 463
461 PT_REGS_SP(regs) = (unsigned long) frame; 464 PT_REGS_SP(regs) = (unsigned long) frame;
462 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; 465 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
463 PT_REGS_EAX(regs) = (unsigned long) sig; 466 PT_REGS_AX(regs) = (unsigned long) sig;
464 PT_REGS_EDX(regs) = (unsigned long) &frame->info; 467 PT_REGS_DX(regs) = (unsigned long) &frame->info;
465 PT_REGS_ECX(regs) = (unsigned long) &frame->uc; 468 PT_REGS_CX(regs) = (unsigned long) &frame->uc;
466 469
467 if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) 470 if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
468 ptrace_notify(SIGTRAP); 471 ptrace_notify(SIGTRAP);
@@ -541,8 +544,8 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
541 set->sig[0]); 544 set->sig[0]);
542 err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate); 545 err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);
543 if (sizeof(*set) == 16) { 546 if (sizeof(*set) == 16) {
544 __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); 547 err |= __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
545 __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); 548 err |= __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
546 } 549 }
547 else 550 else
548 err |= __copy_to_user(&frame->uc.uc_sigmask, set, 551 err |= __copy_to_user(&frame->uc.uc_sigmask, set,
@@ -570,17 +573,17 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
570 } 573 }
571 574
572 PT_REGS_SP(regs) = (unsigned long) frame; 575 PT_REGS_SP(regs) = (unsigned long) frame;
573 PT_REGS_RDI(regs) = sig; 576 PT_REGS_DI(regs) = sig;
574 /* In case the signal handler was declared without prototypes */ 577 /* In case the signal handler was declared without prototypes */
575 PT_REGS_RAX(regs) = 0; 578 PT_REGS_AX(regs) = 0;
576 579
577 /* 580 /*
578 * This also works for non SA_SIGINFO handlers because they expect the 581 * This also works for non SA_SIGINFO handlers because they expect the
579 * next argument after the signal number on the stack. 582 * next argument after the signal number on the stack.
580 */ 583 */
581 PT_REGS_RSI(regs) = (unsigned long) &frame->info; 584 PT_REGS_SI(regs) = (unsigned long) &frame->info;
582 PT_REGS_RDX(regs) = (unsigned long) &frame->uc; 585 PT_REGS_DX(regs) = (unsigned long) &frame->uc;
583 PT_REGS_RIP(regs) = (unsigned long) ka->sa.sa_handler; 586 PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
584 out: 587 out:
585 return err; 588 return err;
586} 589}
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 9924776f4265..170bd926a69c 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -31,7 +31,6 @@
31#define stub_fork sys_fork 31#define stub_fork sys_fork
32#define stub_vfork sys_vfork 32#define stub_vfork sys_vfork
33#define stub_execve sys_execve 33#define stub_execve sys_execve
34#define stub_rt_sigsuspend sys_rt_sigsuspend
35#define stub_sigaltstack sys_sigaltstack 34#define stub_sigaltstack sys_sigaltstack
36#define stub_rt_sigreturn sys_rt_sigreturn 35#define stub_rt_sigreturn sys_rt_sigreturn
37 36
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
index 70ca357393b8..b853e8600b9d 100644
--- a/arch/x86/um/syscalls_32.c
+++ b/arch/x86/um/syscalls_32.c
@@ -44,10 +44,10 @@ long sys_sigaction(int sig, const struct old_sigaction __user *act,
44 old_sigset_t mask; 44 old_sigset_t mask;
45 if (!access_ok(VERIFY_READ, act, sizeof(*act)) || 45 if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
46 __get_user(new_ka.sa.sa_handler, &act->sa_handler) || 46 __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
47 __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) 47 __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) ||
48 __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
49 __get_user(mask, &act->sa_mask))
48 return -EFAULT; 50 return -EFAULT;
49 __get_user(new_ka.sa.sa_flags, &act->sa_flags);
50 __get_user(mask, &act->sa_mask);
51 siginitset(&new_ka.sa.sa_mask, mask); 51 siginitset(&new_ka.sa.sa_mask, mask);
52 } 52 }
53 53
@@ -56,10 +56,10 @@ long sys_sigaction(int sig, const struct old_sigaction __user *act,
56 if (!ret && oact) { 56 if (!ret && oact) {
57 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || 57 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
58 __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || 58 __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
59 __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) 59 __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) ||
60 __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
61 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
60 return -EFAULT; 62 return -EFAULT;
61 __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
62 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
63 } 63 }
64 64
65 return ret; 65 return ret;
diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c
index 171b3e9dc867..2d5cc51e9bef 100644
--- a/arch/x86/um/sysrq_32.c
+++ b/arch/x86/um/sysrq_32.c
@@ -23,12 +23,10 @@ void show_regs(struct pt_regs *regs)
23 printk(" EFLAGS: %08lx\n %s\n", PT_REGS_EFLAGS(regs), 23 printk(" EFLAGS: %08lx\n %s\n", PT_REGS_EFLAGS(regs),
24 print_tainted()); 24 print_tainted());
25 printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", 25 printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
26 PT_REGS_EAX(regs), PT_REGS_EBX(regs), 26 PT_REGS_AX(regs), PT_REGS_BX(regs),
27 PT_REGS_ECX(regs), 27 PT_REGS_CX(regs), PT_REGS_DX(regs));
28 PT_REGS_EDX(regs));
29 printk("ESI: %08lx EDI: %08lx EBP: %08lx", 28 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
30 PT_REGS_ESI(regs), PT_REGS_EDI(regs), 29 PT_REGS_SI(regs), PT_REGS_DI(regs), PT_REGS_BP(regs));
31 PT_REGS_EBP(regs));
32 printk(" DS: %04lx ES: %04lx\n", 30 printk(" DS: %04lx ES: %04lx\n",
33 0xffff & PT_REGS_DS(regs), 31 0xffff & PT_REGS_DS(regs),
34 0xffff & PT_REGS_ES(regs)); 32 0xffff & PT_REGS_ES(regs));
diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c
index e8913436d7dc..08258f179969 100644
--- a/arch/x86/um/sysrq_64.c
+++ b/arch/x86/um/sysrq_64.c
@@ -19,15 +19,15 @@ void __show_regs(struct pt_regs *regs)
19 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current), 19 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current),
20 current->comm, print_tainted(), init_utsname()->release); 20 current->comm, print_tainted(), init_utsname()->release);
21 printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff, 21 printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff,
22 PT_REGS_RIP(regs)); 22 PT_REGS_IP(regs));
23 printk(KERN_INFO "RSP: %016lx EFLAGS: %08lx\n", PT_REGS_SP(regs), 23 printk(KERN_INFO "RSP: %016lx EFLAGS: %08lx\n", PT_REGS_SP(regs),
24 PT_REGS_EFLAGS(regs)); 24 PT_REGS_EFLAGS(regs));
25 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", 25 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
26 PT_REGS_RAX(regs), PT_REGS_RBX(regs), PT_REGS_RCX(regs)); 26 PT_REGS_AX(regs), PT_REGS_BX(regs), PT_REGS_CX(regs));
27 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", 27 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
28 PT_REGS_RDX(regs), PT_REGS_RSI(regs), PT_REGS_RDI(regs)); 28 PT_REGS_DX(regs), PT_REGS_SI(regs), PT_REGS_DI(regs));
29 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", 29 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
30 PT_REGS_RBP(regs), PT_REGS_R8(regs), PT_REGS_R9(regs)); 30 PT_REGS_BP(regs), PT_REGS_R8(regs), PT_REGS_R9(regs));
31 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", 31 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
32 PT_REGS_R10(regs), PT_REGS_R11(regs), PT_REGS_R12(regs)); 32 PT_REGS_R10(regs), PT_REGS_R11(regs), PT_REGS_R12(regs));
33 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", 33 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c
index c6c7131e563b..baba84f8ecb8 100644
--- a/arch/x86/um/tls_32.c
+++ b/arch/x86/um/tls_32.c
@@ -219,7 +219,7 @@ int arch_copy_tls(struct task_struct *new)
219 int idx, ret = -EFAULT; 219 int idx, ret = -EFAULT;
220 220
221 if (copy_from_user(&info, 221 if (copy_from_user(&info,
222 (void __user *) UPT_ESI(&new->thread.regs.regs), 222 (void __user *) UPT_SI(&new->thread.regs.regs),
223 sizeof(info))) 223 sizeof(info)))
224 goto out; 224 goto out;
225 225
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index add2c2d729ce..96ab2c09cb68 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -20,5 +20,5 @@ obj-$(CONFIG_EVENT_TRACING) += trace.o
20obj-$(CONFIG_SMP) += smp.o 20obj-$(CONFIG_SMP) += smp.o
21obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o 21obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
22obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o 22obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
23obj-$(CONFIG_XEN_DOM0) += vga.o 23obj-$(CONFIG_XEN_DOM0) += apic.o vga.o
24obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o 24obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
new file mode 100644
index 000000000000..ec57bd3818a4
--- /dev/null
+++ b/arch/x86/xen/apic.c
@@ -0,0 +1,33 @@
1#include <linux/init.h>
2
3#include <asm/x86_init.h>
4#include <asm/apic.h>
5#include <asm/xen/hypercall.h>
6
7#include <xen/xen.h>
8#include <xen/interface/physdev.h>
9
10unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
11{
12 struct physdev_apic apic_op;
13 int ret;
14
15 apic_op.apic_physbase = mpc_ioapic_addr(apic);
16 apic_op.reg = reg;
17 ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
18 if (!ret)
19 return apic_op.value;
20
21 /* fallback to return an emulated IO_APIC values */
22 if (reg == 0x1)
23 return 0x00170020;
24 else if (reg == 0x0)
25 return apic << 24;
26
27 return 0xfd;
28}
29
30void __init xen_init_apic(void)
31{
32 x86_io_apic_ops.read = xen_io_apic_read;
33}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 4f51bebac02c..c0f5facdb10c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -63,6 +63,7 @@
63#include <asm/stackprotector.h> 63#include <asm/stackprotector.h>
64#include <asm/hypervisor.h> 64#include <asm/hypervisor.h>
65#include <asm/mwait.h> 65#include <asm/mwait.h>
66#include <asm/pci_x86.h>
66 67
67#ifdef CONFIG_ACPI 68#ifdef CONFIG_ACPI
68#include <linux/acpi.h> 69#include <linux/acpi.h>
@@ -261,7 +262,8 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
261 262
262static bool __init xen_check_mwait(void) 263static bool __init xen_check_mwait(void)
263{ 264{
264#ifdef CONFIG_ACPI 265#if defined(CONFIG_ACPI) && !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR) && \
266 !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR_MODULE)
265 struct xen_platform_op op = { 267 struct xen_platform_op op = {
266 .cmd = XENPF_set_processor_pminfo, 268 .cmd = XENPF_set_processor_pminfo,
267 .u.set_pminfo.id = -1, 269 .u.set_pminfo.id = -1,
@@ -349,7 +351,6 @@ static void __init xen_init_cpuid_mask(void)
349 /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ 351 /* Xen will set CR4.OSXSAVE if supported and not disabled by force */
350 if ((cx & xsave_mask) != xsave_mask) 352 if ((cx & xsave_mask) != xsave_mask)
351 cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ 353 cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
352
353 if (xen_check_mwait()) 354 if (xen_check_mwait())
354 cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32)); 355 cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
355} 356}
@@ -809,9 +810,40 @@ static void xen_io_delay(void)
809} 810}
810 811
811#ifdef CONFIG_X86_LOCAL_APIC 812#ifdef CONFIG_X86_LOCAL_APIC
813static unsigned long xen_set_apic_id(unsigned int x)
814{
815 WARN_ON(1);
816 return x;
817}
818static unsigned int xen_get_apic_id(unsigned long x)
819{
820 return ((x)>>24) & 0xFFu;
821}
812static u32 xen_apic_read(u32 reg) 822static u32 xen_apic_read(u32 reg)
813{ 823{
814 return 0; 824 struct xen_platform_op op = {
825 .cmd = XENPF_get_cpuinfo,
826 .interface_version = XENPF_INTERFACE_VERSION,
827 .u.pcpu_info.xen_cpuid = 0,
828 };
829 int ret = 0;
830
831 /* Shouldn't need this as APIC is turned off for PV, and we only
832 * get called on the bootup processor. But just in case. */
833 if (!xen_initial_domain() || smp_processor_id())
834 return 0;
835
836 if (reg == APIC_LVR)
837 return 0x10;
838
839 if (reg != APIC_ID)
840 return 0;
841
842 ret = HYPERVISOR_dom0_op(&op);
843 if (ret)
844 return 0;
845
846 return op.u.pcpu_info.apic_id << 24;
815} 847}
816 848
817static void xen_apic_write(u32 reg, u32 val) 849static void xen_apic_write(u32 reg, u32 val)
@@ -849,6 +881,8 @@ static void set_xen_basic_apic_ops(void)
849 apic->icr_write = xen_apic_icr_write; 881 apic->icr_write = xen_apic_icr_write;
850 apic->wait_icr_idle = xen_apic_wait_icr_idle; 882 apic->wait_icr_idle = xen_apic_wait_icr_idle;
851 apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; 883 apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle;
884 apic->set_apic_id = xen_set_apic_id;
885 apic->get_apic_id = xen_get_apic_id;
852} 886}
853 887
854#endif 888#endif
@@ -1362,11 +1396,15 @@ asmlinkage void __init xen_start_kernel(void)
1362 xen_start_info->console.domU.mfn = 0; 1396 xen_start_info->console.domU.mfn = 0;
1363 xen_start_info->console.domU.evtchn = 0; 1397 xen_start_info->console.domU.evtchn = 0;
1364 1398
1399 xen_init_apic();
1400
1365 /* Make sure ACS will be enabled */ 1401 /* Make sure ACS will be enabled */
1366 pci_request_acs(); 1402 pci_request_acs();
1367 } 1403 }
1368 1404#ifdef CONFIG_PCI
1369 1405 /* PCI BIOS service won't work from a PV guest. */
1406 pci_probe &= ~PCI_PROBE_BIOS;
1407#endif
1370 xen_raw_console_write("about to get started...\n"); 1408 xen_raw_console_write("about to get started...\n");
1371 1409
1372 xen_setup_runstate_info(0); 1410 xen_setup_runstate_info(0);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index b8e279479a6b..3506cd4f9a43 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -353,8 +353,13 @@ static pteval_t pte_mfn_to_pfn(pteval_t val)
353{ 353{
354 if (val & _PAGE_PRESENT) { 354 if (val & _PAGE_PRESENT) {
355 unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; 355 unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
356 unsigned long pfn = mfn_to_pfn(mfn);
357
356 pteval_t flags = val & PTE_FLAGS_MASK; 358 pteval_t flags = val & PTE_FLAGS_MASK;
357 val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; 359 if (unlikely(pfn == ~0))
360 val = flags & ~_PAGE_PRESENT;
361 else
362 val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
358 } 363 }
359 364
360 return val; 365 return val;
@@ -1859,7 +1864,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1859#endif /* CONFIG_X86_64 */ 1864#endif /* CONFIG_X86_64 */
1860 1865
1861static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; 1866static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
1862static unsigned char fake_ioapic_mapping[PAGE_SIZE] __page_aligned_bss;
1863 1867
1864static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) 1868static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1865{ 1869{
@@ -1900,7 +1904,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1900 * We just don't map the IO APIC - all access is via 1904 * We just don't map the IO APIC - all access is via
1901 * hypercalls. Keep the address in the pte for reference. 1905 * hypercalls. Keep the address in the pte for reference.
1902 */ 1906 */
1903 pte = pfn_pte(PFN_DOWN(__pa(fake_ioapic_mapping)), PAGE_KERNEL); 1907 pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
1904 break; 1908 break;
1905#endif 1909#endif
1906 1910
@@ -2065,7 +2069,6 @@ void __init xen_init_mmu_ops(void)
2065 pv_mmu_ops = xen_mmu_ops; 2069 pv_mmu_ops = xen_mmu_ops;
2066 2070
2067 memset(dummy_mapping, 0xff, PAGE_SIZE); 2071 memset(dummy_mapping, 0xff, PAGE_SIZE);
2068 memset(fake_ioapic_mapping, 0xfd, PAGE_SIZE);
2069} 2072}
2070 2073
2071/* Protected by xen_reservation_lock. */ 2074/* Protected by xen_reservation_lock. */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 5fac6919b957..3700945ed0d5 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -178,6 +178,7 @@ static void __init xen_fill_possible_map(void)
178static void __init xen_filter_cpu_maps(void) 178static void __init xen_filter_cpu_maps(void)
179{ 179{
180 int i, rc; 180 int i, rc;
181 unsigned int subtract = 0;
181 182
182 if (!xen_initial_domain()) 183 if (!xen_initial_domain())
183 return; 184 return;
@@ -192,8 +193,22 @@ static void __init xen_filter_cpu_maps(void)
192 } else { 193 } else {
193 set_cpu_possible(i, false); 194 set_cpu_possible(i, false);
194 set_cpu_present(i, false); 195 set_cpu_present(i, false);
196 subtract++;
195 } 197 }
196 } 198 }
199#ifdef CONFIG_HOTPLUG_CPU
200 /* This is akin to using 'nr_cpus' on the Linux command line.
201 * Which is OK as when we use 'dom0_max_vcpus=X' we can only
202 * have up to X, while nr_cpu_ids is greater than X. This
203 * normally is not a problem, except when CPU hotplugging
204 * is involved and then there might be more than X CPUs
205 * in the guest - which will not work as there is no
206 * hypercall to expand the max number of VCPUs an already
207 * running guest has. So cap it up to X. */
208 if (subtract)
209 nr_cpu_ids = nr_cpu_ids - subtract;
210#endif
211
197} 212}
198 213
199static void __init xen_smp_prepare_boot_cpu(void) 214static void __init xen_smp_prepare_boot_cpu(void)
@@ -250,18 +265,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
250 set_cpu_possible(cpu, false); 265 set_cpu_possible(cpu, false);
251 } 266 }
252 267
253 for_each_possible_cpu (cpu) { 268 for_each_possible_cpu(cpu)
254 struct task_struct *idle;
255
256 if (cpu == 0)
257 continue;
258
259 idle = fork_idle(cpu);
260 if (IS_ERR(idle))
261 panic("failed fork for CPU %d", cpu);
262
263 set_cpu_present(cpu, true); 269 set_cpu_present(cpu, true);
264 }
265} 270}
266 271
267static int __cpuinit 272static int __cpuinit
@@ -331,9 +336,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
331 return 0; 336 return 0;
332} 337}
333 338
334static int __cpuinit xen_cpu_up(unsigned int cpu) 339static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle)
335{ 340{
336 struct task_struct *idle = idle_task(cpu);
337 int rc; 341 int rc;
338 342
339 per_cpu(current_task, cpu) = idle; 343 per_cpu(current_task, cpu) = idle;
@@ -547,10 +551,10 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
547 xen_init_lock_cpu(0); 551 xen_init_lock_cpu(0);
548} 552}
549 553
550static int __cpuinit xen_hvm_cpu_up(unsigned int cpu) 554static int __cpuinit xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle)
551{ 555{
552 int rc; 556 int rc;
553 rc = native_cpu_up(cpu); 557 rc = native_cpu_up(cpu, tidle);
554 WARN_ON (xen_smp_intr_init(cpu)); 558 WARN_ON (xen_smp_intr_init(cpu));
555 return rc; 559 return rc;
556} 560}
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 79d7362ad6d1..3e45aa000718 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -96,7 +96,7 @@ ENTRY(xen_restore_fl_direct)
96 96
97 /* check for unmasked and pending */ 97 /* check for unmasked and pending */
98 cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending 98 cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
99 jz 1f 99 jnz 1f
1002: call check_events 1002: call check_events
1011: 1011:
102ENDPATCH(xen_restore_fl_direct) 102ENDPATCH(xen_restore_fl_direct)
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index b040b0e518ca..f9643fc50de5 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -14,6 +14,7 @@
14#include <asm/thread_info.h> 14#include <asm/thread_info.h>
15#include <asm/processor-flags.h> 15#include <asm/processor-flags.h>
16#include <asm/segment.h> 16#include <asm/segment.h>
17#include <asm/asm.h>
17 18
18#include <xen/interface/xen.h> 19#include <xen/interface/xen.h>
19 20
@@ -137,10 +138,7 @@ iret_restore_end:
137 138
1381: iret 1391: iret
139xen_iret_end_crit: 140xen_iret_end_crit:
140.section __ex_table, "a" 141 _ASM_EXTABLE(1b, iret_exc)
141 .align 4
142 .long 1b, iret_exc
143.previous
144 142
145hyper_iret: 143hyper_iret:
146 /* put this out of line since its very rarely used */ 144 /* put this out of line since its very rarely used */
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index b095739ccd4c..45c0c0667bd9 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -92,11 +92,15 @@ struct dom0_vga_console_info;
92 92
93#ifdef CONFIG_XEN_DOM0 93#ifdef CONFIG_XEN_DOM0
94void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); 94void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
95void __init xen_init_apic(void);
95#else 96#else
96static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, 97static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
97 size_t size) 98 size_t size)
98{ 99{
99} 100}
101static inline void __init xen_init_apic(void)
102{
103}
100#endif 104#endif
101 105
102/* Declare an asm function, along with symbols needed to make it 106/* Declare an asm function, along with symbols needed to make it