aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/boot/Makefile3
-rw-r--r--arch/x86/boot/tools/build.c40
-rw-r--r--arch/x86/configs/i386_defconfig2
-rw-r--r--arch/x86/configs/x86_64_defconfig2
-rw-r--r--arch/x86/include/asm/atomic.h29
-rw-r--r--arch/x86/include/asm/atomic64_64.h28
-rw-r--r--arch/x86/include/asm/bitops.h24
-rw-r--r--arch/x86/include/asm/calling.h50
-rw-r--r--arch/x86/include/asm/cpufeature.h6
-rw-r--r--arch/x86/include/asm/jump_label.h2
-rw-r--r--arch/x86/include/asm/local.h28
-rw-r--r--arch/x86/include/asm/misc.h6
-rw-r--r--arch/x86/include/asm/mutex_64.h4
-rw-r--r--arch/x86/include/asm/percpu.h3
-rw-r--r--arch/x86/include/asm/preempt.h100
-rw-r--r--arch/x86/include/asm/rmwcc.h41
-rw-r--r--arch/x86/include/asm/thread_info.h5
-rw-r--r--arch/x86/include/asm/uaccess.h98
-rw-r--r--arch/x86/include/asm/uaccess_32.h29
-rw-r--r--arch/x86/include/asm/uaccess_64.h28
-rw-r--r--arch/x86/include/asm/uprobes.h12
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/cpu/common.c5
-rw-r--r--arch/x86/kernel/cpu/perf_event.c21
-rw-r--r--arch/x86/kernel/cpu/perf_event.h6
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c78
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c203
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c31
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c134
-rw-r--r--arch/x86/kernel/crash.c2
-rw-r--r--arch/x86/kernel/entry_32.S7
-rw-r--r--arch/x86/kernel/entry_64.S8
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c7
-rw-r--r--arch/x86/kernel/i8259.c3
-rw-r--r--arch/x86/kernel/irq_32.c34
-rw-r--r--arch/x86/kernel/irq_64.c21
-rw-r--r--arch/x86/kernel/kvm.c19
-rw-r--r--arch/x86/kernel/nmi.c4
-rw-r--r--arch/x86/kernel/preempt.S25
-rw-r--r--arch/x86/kernel/process.c6
-rw-r--r--arch/x86/kernel/process_32.c8
-rw-r--r--arch/x86/kernel/process_64.c8
-rw-r--r--arch/x86/kernel/reboot.c16
-rw-r--r--arch/x86/kernel/rtc.c8
-rw-r--r--arch/x86/kernel/smpboot.c37
-rw-r--r--arch/x86/kernel/traps.c4
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c7
-rw-r--r--arch/x86/kvm/vmx.c24
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/misc.c21
-rw-r--r--arch/x86/lib/usercopy.c43
-rw-r--r--arch/x86/lib/usercopy_32.c8
-rw-r--r--arch/x86/mm/fault.c41
-rw-r--r--arch/x86/net/bpf_jit_comp.c18
-rw-r--r--arch/x86/oprofile/backtrace.c4
-rw-r--r--arch/x86/xen/smp.c9
59 files changed, 956 insertions, 469 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ee2fb9d37745..5cd6eea9b7b3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -123,6 +123,7 @@ config X86
123 select COMPAT_OLD_SIGACTION if IA32_EMULATION 123 select COMPAT_OLD_SIGACTION if IA32_EMULATION
124 select RTC_LIB 124 select RTC_LIB
125 select HAVE_DEBUG_STACKOVERFLOW 125 select HAVE_DEBUG_STACKOVERFLOW
126 select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64
126 127
127config INSTRUCTION_DECODER 128config INSTRUCTION_DECODER
128 def_bool y 129 def_bool y
@@ -860,7 +861,7 @@ source "kernel/Kconfig.preempt"
860 861
861config X86_UP_APIC 862config X86_UP_APIC
862 bool "Local APIC support on uniprocessors" 863 bool "Local APIC support on uniprocessors"
863 depends on X86_32 && !SMP && !X86_32_NON_STANDARD 864 depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI
864 ---help--- 865 ---help---
865 A local APIC (Advanced Programmable Interrupt Controller) is an 866 A local APIC (Advanced Programmable Interrupt Controller) is an
866 integrated interrupt controller in the CPU. If you have a single-CPU 867 integrated interrupt controller in the CPU. If you have a single-CPU
@@ -885,11 +886,11 @@ config X86_UP_IOAPIC
885 886
886config X86_LOCAL_APIC 887config X86_LOCAL_APIC
887 def_bool y 888 def_bool y
888 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC 889 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI
889 890
890config X86_IO_APIC 891config X86_IO_APIC
891 def_bool y 892 def_bool y
892 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC 893 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI
893 894
894config X86_VISWS_APIC 895config X86_VISWS_APIC
895 def_bool y 896 def_bool y
@@ -1033,6 +1034,7 @@ config X86_REBOOTFIXUPS
1033 1034
1034config MICROCODE 1035config MICROCODE
1035 tristate "CPU microcode loading support" 1036 tristate "CPU microcode loading support"
1037 depends on CPU_SUP_AMD || CPU_SUP_INTEL
1036 select FW_LOADER 1038 select FW_LOADER
1037 ---help--- 1039 ---help---
1038 1040
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 379814bc41e3..dce69a256896 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -71,7 +71,8 @@ GCOV_PROFILE := n
71$(obj)/bzImage: asflags-y := $(SVGA_MODE) 71$(obj)/bzImage: asflags-y := $(SVGA_MODE)
72 72
73quiet_cmd_image = BUILD $@ 73quiet_cmd_image = BUILD $@
74cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/zoffset.h > $@ 74cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
75 $(obj)/zoffset.h $@
75 76
76$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE 77$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
77 $(call if_changed,image) 78 $(call if_changed,image)
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index c941d6a8887f..8e15b22391fc 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -5,14 +5,15 @@
5 */ 5 */
6 6
7/* 7/*
8 * This file builds a disk-image from two different files: 8 * This file builds a disk-image from three different files:
9 * 9 *
10 * - setup: 8086 machine code, sets up system parm 10 * - setup: 8086 machine code, sets up system parm
11 * - system: 80386 code for actual system 11 * - system: 80386 code for actual system
12 * - zoffset.h: header with ZO_* defines
12 * 13 *
13 * It does some checking that all files are of the correct type, and 14 * It does some checking that all files are of the correct type, and writes
14 * just writes the result to stdout, removing headers and padding to 15 * the result to the specified destination, removing headers and padding to
15 * the right amount. It also writes some system data to stderr. 16 * the right amount. It also writes some system data to stdout.
16 */ 17 */
17 18
18/* 19/*
@@ -136,7 +137,7 @@ static void die(const char * str, ...)
136 137
137static void usage(void) 138static void usage(void)
138{ 139{
139 die("Usage: build setup system [zoffset.h] [> image]"); 140 die("Usage: build setup system zoffset.h image");
140} 141}
141 142
142#ifdef CONFIG_EFI_STUB 143#ifdef CONFIG_EFI_STUB
@@ -265,7 +266,7 @@ int main(int argc, char ** argv)
265 int c; 266 int c;
266 u32 sys_size; 267 u32 sys_size;
267 struct stat sb; 268 struct stat sb;
268 FILE *file; 269 FILE *file, *dest;
269 int fd; 270 int fd;
270 void *kernel; 271 void *kernel;
271 u32 crc = 0xffffffffUL; 272 u32 crc = 0xffffffffUL;
@@ -280,10 +281,13 @@ int main(int argc, char ** argv)
280 startup_64 = 0x200; 281 startup_64 = 0x200;
281#endif 282#endif
282 283
283 if (argc == 4) 284 if (argc != 5)
284 parse_zoffset(argv[3]);
285 else if (argc != 3)
286 usage(); 285 usage();
286 parse_zoffset(argv[3]);
287
288 dest = fopen(argv[4], "w");
289 if (!dest)
290 die("Unable to write `%s': %m", argv[4]);
287 291
288 /* Copy the setup code */ 292 /* Copy the setup code */
289 file = fopen(argv[1], "r"); 293 file = fopen(argv[1], "r");
@@ -318,7 +322,7 @@ int main(int argc, char ** argv)
318 /* Set the default root device */ 322 /* Set the default root device */
319 put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); 323 put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
320 324
321 fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i); 325 printf("Setup is %d bytes (padded to %d bytes).\n", c, i);
322 326
323 /* Open and stat the kernel file */ 327 /* Open and stat the kernel file */
324 fd = open(argv[2], O_RDONLY); 328 fd = open(argv[2], O_RDONLY);
@@ -327,7 +331,7 @@ int main(int argc, char ** argv)
327 if (fstat(fd, &sb)) 331 if (fstat(fd, &sb))
328 die("Unable to stat `%s': %m", argv[2]); 332 die("Unable to stat `%s': %m", argv[2]);
329 sz = sb.st_size; 333 sz = sb.st_size;
330 fprintf (stderr, "System is %d kB\n", (sz+1023)/1024); 334 printf("System is %d kB\n", (sz+1023)/1024);
331 kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); 335 kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
332 if (kernel == MAP_FAILED) 336 if (kernel == MAP_FAILED)
333 die("Unable to mmap '%s': %m", argv[2]); 337 die("Unable to mmap '%s': %m", argv[2]);
@@ -348,27 +352,31 @@ int main(int argc, char ** argv)
348#endif 352#endif
349 353
350 crc = partial_crc32(buf, i, crc); 354 crc = partial_crc32(buf, i, crc);
351 if (fwrite(buf, 1, i, stdout) != i) 355 if (fwrite(buf, 1, i, dest) != i)
352 die("Writing setup failed"); 356 die("Writing setup failed");
353 357
354 /* Copy the kernel code */ 358 /* Copy the kernel code */
355 crc = partial_crc32(kernel, sz, crc); 359 crc = partial_crc32(kernel, sz, crc);
356 if (fwrite(kernel, 1, sz, stdout) != sz) 360 if (fwrite(kernel, 1, sz, dest) != sz)
357 die("Writing kernel failed"); 361 die("Writing kernel failed");
358 362
359 /* Add padding leaving 4 bytes for the checksum */ 363 /* Add padding leaving 4 bytes for the checksum */
360 while (sz++ < (sys_size*16) - 4) { 364 while (sz++ < (sys_size*16) - 4) {
361 crc = partial_crc32_one('\0', crc); 365 crc = partial_crc32_one('\0', crc);
362 if (fwrite("\0", 1, 1, stdout) != 1) 366 if (fwrite("\0", 1, 1, dest) != 1)
363 die("Writing padding failed"); 367 die("Writing padding failed");
364 } 368 }
365 369
366 /* Write the CRC */ 370 /* Write the CRC */
367 fprintf(stderr, "CRC %x\n", crc); 371 printf("CRC %x\n", crc);
368 put_unaligned_le32(crc, buf); 372 put_unaligned_le32(crc, buf);
369 if (fwrite(buf, 1, 4, stdout) != 4) 373 if (fwrite(buf, 1, 4, dest) != 4)
370 die("Writing CRC failed"); 374 die("Writing CRC failed");
371 375
376 /* Catch any delayed write failures */
377 if (fclose(dest))
378 die("Writing image failed");
379
372 close(fd); 380 close(fd);
373 381
374 /* Everything is OK */ 382 /* Everything is OK */
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 94447086e551..a7fef2621cc9 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -142,6 +142,8 @@ CONFIG_MAC80211=y
142CONFIG_MAC80211_LEDS=y 142CONFIG_MAC80211_LEDS=y
143CONFIG_RFKILL=y 143CONFIG_RFKILL=y
144CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" 144CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
145CONFIG_DEVTMPFS=y
146CONFIG_DEVTMPFS_MOUNT=y
145CONFIG_DEBUG_DEVRES=y 147CONFIG_DEBUG_DEVRES=y
146CONFIG_CONNECTOR=y 148CONFIG_CONNECTOR=y
147CONFIG_BLK_DEV_LOOP=y 149CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 671524d0f6c0..c1119d4c1281 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -141,6 +141,8 @@ CONFIG_MAC80211=y
141CONFIG_MAC80211_LEDS=y 141CONFIG_MAC80211_LEDS=y
142CONFIG_RFKILL=y 142CONFIG_RFKILL=y
143CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" 143CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
144CONFIG_DEVTMPFS=y
145CONFIG_DEVTMPFS_MOUNT=y
144CONFIG_DEBUG_DEVRES=y 146CONFIG_DEBUG_DEVRES=y
145CONFIG_CONNECTOR=y 147CONFIG_CONNECTOR=y
146CONFIG_BLK_DEV_LOOP=y 148CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 722aa3b04624..da31c8b8a92d 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -6,6 +6,7 @@
6#include <asm/processor.h> 6#include <asm/processor.h>
7#include <asm/alternative.h> 7#include <asm/alternative.h>
8#include <asm/cmpxchg.h> 8#include <asm/cmpxchg.h>
9#include <asm/rmwcc.h>
9 10
10/* 11/*
11 * Atomic operations that C can't guarantee us. Useful for 12 * Atomic operations that C can't guarantee us. Useful for
@@ -76,12 +77,7 @@ static inline void atomic_sub(int i, atomic_t *v)
76 */ 77 */
77static inline int atomic_sub_and_test(int i, atomic_t *v) 78static inline int atomic_sub_and_test(int i, atomic_t *v)
78{ 79{
79 unsigned char c; 80 GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, i, "%0", "e");
80
81 asm volatile(LOCK_PREFIX "subl %2,%0; sete %1"
82 : "+m" (v->counter), "=qm" (c)
83 : "ir" (i) : "memory");
84 return c;
85} 81}
86 82
87/** 83/**
@@ -118,12 +114,7 @@ static inline void atomic_dec(atomic_t *v)
118 */ 114 */
119static inline int atomic_dec_and_test(atomic_t *v) 115static inline int atomic_dec_and_test(atomic_t *v)
120{ 116{
121 unsigned char c; 117 GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
122
123 asm volatile(LOCK_PREFIX "decl %0; sete %1"
124 : "+m" (v->counter), "=qm" (c)
125 : : "memory");
126 return c != 0;
127} 118}
128 119
129/** 120/**
@@ -136,12 +127,7 @@ static inline int atomic_dec_and_test(atomic_t *v)
136 */ 127 */
137static inline int atomic_inc_and_test(atomic_t *v) 128static inline int atomic_inc_and_test(atomic_t *v)
138{ 129{
139 unsigned char c; 130 GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e");
140
141 asm volatile(LOCK_PREFIX "incl %0; sete %1"
142 : "+m" (v->counter), "=qm" (c)
143 : : "memory");
144 return c != 0;
145} 131}
146 132
147/** 133/**
@@ -155,12 +141,7 @@ static inline int atomic_inc_and_test(atomic_t *v)
155 */ 141 */
156static inline int atomic_add_negative(int i, atomic_t *v) 142static inline int atomic_add_negative(int i, atomic_t *v)
157{ 143{
158 unsigned char c; 144 GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, i, "%0", "s");
159
160 asm volatile(LOCK_PREFIX "addl %2,%0; sets %1"
161 : "+m" (v->counter), "=qm" (c)
162 : "ir" (i) : "memory");
163 return c;
164} 145}
165 146
166/** 147/**
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 0e1cbfc8ee06..3f065c985aee 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -72,12 +72,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)
72 */ 72 */
73static inline int atomic64_sub_and_test(long i, atomic64_t *v) 73static inline int atomic64_sub_and_test(long i, atomic64_t *v)
74{ 74{
75 unsigned char c; 75 GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, i, "%0", "e");
76
77 asm volatile(LOCK_PREFIX "subq %2,%0; sete %1"
78 : "=m" (v->counter), "=qm" (c)
79 : "er" (i), "m" (v->counter) : "memory");
80 return c;
81} 76}
82 77
83/** 78/**
@@ -116,12 +111,7 @@ static inline void atomic64_dec(atomic64_t *v)
116 */ 111 */
117static inline int atomic64_dec_and_test(atomic64_t *v) 112static inline int atomic64_dec_and_test(atomic64_t *v)
118{ 113{
119 unsigned char c; 114 GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e");
120
121 asm volatile(LOCK_PREFIX "decq %0; sete %1"
122 : "=m" (v->counter), "=qm" (c)
123 : "m" (v->counter) : "memory");
124 return c != 0;
125} 115}
126 116
127/** 117/**
@@ -134,12 +124,7 @@ static inline int atomic64_dec_and_test(atomic64_t *v)
134 */ 124 */
135static inline int atomic64_inc_and_test(atomic64_t *v) 125static inline int atomic64_inc_and_test(atomic64_t *v)
136{ 126{
137 unsigned char c; 127 GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e");
138
139 asm volatile(LOCK_PREFIX "incq %0; sete %1"
140 : "=m" (v->counter), "=qm" (c)
141 : "m" (v->counter) : "memory");
142 return c != 0;
143} 128}
144 129
145/** 130/**
@@ -153,12 +138,7 @@ static inline int atomic64_inc_and_test(atomic64_t *v)
153 */ 138 */
154static inline int atomic64_add_negative(long i, atomic64_t *v) 139static inline int atomic64_add_negative(long i, atomic64_t *v)
155{ 140{
156 unsigned char c; 141 GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, i, "%0", "s");
157
158 asm volatile(LOCK_PREFIX "addq %2,%0; sets %1"
159 : "=m" (v->counter), "=qm" (c)
160 : "er" (i), "m" (v->counter) : "memory");
161 return c;
162} 142}
163 143
164/** 144/**
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 41639ce8fd63..6d76d0935989 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -14,6 +14,7 @@
14 14
15#include <linux/compiler.h> 15#include <linux/compiler.h>
16#include <asm/alternative.h> 16#include <asm/alternative.h>
17#include <asm/rmwcc.h>
17 18
18#if BITS_PER_LONG == 32 19#if BITS_PER_LONG == 32
19# define _BITOPS_LONG_SHIFT 5 20# define _BITOPS_LONG_SHIFT 5
@@ -204,12 +205,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr)
204 */ 205 */
205static inline int test_and_set_bit(long nr, volatile unsigned long *addr) 206static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
206{ 207{
207 int oldbit; 208 GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, nr, "%0", "c");
208
209 asm volatile(LOCK_PREFIX "bts %2,%1\n\t"
210 "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
211
212 return oldbit;
213} 209}
214 210
215/** 211/**
@@ -255,13 +251,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
255 */ 251 */
256static inline int test_and_clear_bit(long nr, volatile unsigned long *addr) 252static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
257{ 253{
258 int oldbit; 254 GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, nr, "%0", "c");
259
260 asm volatile(LOCK_PREFIX "btr %2,%1\n\t"
261 "sbb %0,%0"
262 : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
263
264 return oldbit;
265} 255}
266 256
267/** 257/**
@@ -314,13 +304,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
314 */ 304 */
315static inline int test_and_change_bit(long nr, volatile unsigned long *addr) 305static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
316{ 306{
317 int oldbit; 307 GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, nr, "%0", "c");
318
319 asm volatile(LOCK_PREFIX "btc %2,%1\n\t"
320 "sbb %0,%0"
321 : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
322
323 return oldbit;
324} 308}
325 309
326static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr) 310static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr)
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 0fa675033912..cb4c73bfeb48 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -48,6 +48,8 @@ For 32-bit we have the following conventions - kernel is built with
48 48
49#include <asm/dwarf2.h> 49#include <asm/dwarf2.h>
50 50
51#ifdef CONFIG_X86_64
52
51/* 53/*
52 * 64-bit system call stack frame layout defines and helpers, 54 * 64-bit system call stack frame layout defines and helpers,
53 * for assembly code: 55 * for assembly code:
@@ -192,3 +194,51 @@ For 32-bit we have the following conventions - kernel is built with
192 .macro icebp 194 .macro icebp
193 .byte 0xf1 195 .byte 0xf1
194 .endm 196 .endm
197
198#else /* CONFIG_X86_64 */
199
200/*
201 * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
202 * are different from the entry_32.S versions in not changing the segment
203 * registers. So only suitable for in kernel use, not when transitioning
204 * from or to user space. The resulting stack frame is not a standard
205 * pt_regs frame. The main use case is calling C code from assembler
206 * when all the registers need to be preserved.
207 */
208
209 .macro SAVE_ALL
210 pushl_cfi %eax
211 CFI_REL_OFFSET eax, 0
212 pushl_cfi %ebp
213 CFI_REL_OFFSET ebp, 0
214 pushl_cfi %edi
215 CFI_REL_OFFSET edi, 0
216 pushl_cfi %esi
217 CFI_REL_OFFSET esi, 0
218 pushl_cfi %edx
219 CFI_REL_OFFSET edx, 0
220 pushl_cfi %ecx
221 CFI_REL_OFFSET ecx, 0
222 pushl_cfi %ebx
223 CFI_REL_OFFSET ebx, 0
224 .endm
225
226 .macro RESTORE_ALL
227 popl_cfi %ebx
228 CFI_RESTORE ebx
229 popl_cfi %ecx
230 CFI_RESTORE ecx
231 popl_cfi %edx
232 CFI_RESTORE edx
233 popl_cfi %esi
234 CFI_RESTORE esi
235 popl_cfi %edi
236 CFI_RESTORE edi
237 popl_cfi %ebp
238 CFI_RESTORE ebp
239 popl_cfi %eax
240 CFI_RESTORE eax
241 .endm
242
243#endif /* CONFIG_X86_64 */
244
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index d3f5c63078d8..89270b4318db 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -374,7 +374,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
374 * Catch too early usage of this before alternatives 374 * Catch too early usage of this before alternatives
375 * have run. 375 * have run.
376 */ 376 */
377 asm goto("1: jmp %l[t_warn]\n" 377 asm_volatile_goto("1: jmp %l[t_warn]\n"
378 "2:\n" 378 "2:\n"
379 ".section .altinstructions,\"a\"\n" 379 ".section .altinstructions,\"a\"\n"
380 " .long 1b - .\n" 380 " .long 1b - .\n"
@@ -388,7 +388,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
388 388
389#endif 389#endif
390 390
391 asm goto("1: jmp %l[t_no]\n" 391 asm_volatile_goto("1: jmp %l[t_no]\n"
392 "2:\n" 392 "2:\n"
393 ".section .altinstructions,\"a\"\n" 393 ".section .altinstructions,\"a\"\n"
394 " .long 1b - .\n" 394 " .long 1b - .\n"
@@ -453,7 +453,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
453 * have. Thus, we force the jump to the widest, 4-byte, signed relative 453 * have. Thus, we force the jump to the widest, 4-byte, signed relative
454 * offset even though the last would often fit in less bytes. 454 * offset even though the last would often fit in less bytes.
455 */ 455 */
456 asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" 456 asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
457 "2:\n" 457 "2:\n"
458 ".section .altinstructions,\"a\"\n" 458 ".section .altinstructions,\"a\"\n"
459 " .long 1b - .\n" /* src offset */ 459 " .long 1b - .\n" /* src offset */
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 64507f35800c..6a2cefb4395a 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -18,7 +18,7 @@
18 18
19static __always_inline bool arch_static_branch(struct static_key *key) 19static __always_inline bool arch_static_branch(struct static_key *key)
20{ 20{
21 asm goto("1:" 21 asm_volatile_goto("1:"
22 ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" 22 ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
23 ".pushsection __jump_table, \"aw\" \n\t" 23 ".pushsection __jump_table, \"aw\" \n\t"
24 _ASM_ALIGN "\n\t" 24 _ASM_ALIGN "\n\t"
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 2d89e3980cbd..5b23e605e707 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -52,12 +52,7 @@ static inline void local_sub(long i, local_t *l)
52 */ 52 */
53static inline int local_sub_and_test(long i, local_t *l) 53static inline int local_sub_and_test(long i, local_t *l)
54{ 54{
55 unsigned char c; 55 GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, i, "%0", "e");
56
57 asm volatile(_ASM_SUB "%2,%0; sete %1"
58 : "+m" (l->a.counter), "=qm" (c)
59 : "ir" (i) : "memory");
60 return c;
61} 56}
62 57
63/** 58/**
@@ -70,12 +65,7 @@ static inline int local_sub_and_test(long i, local_t *l)
70 */ 65 */
71static inline int local_dec_and_test(local_t *l) 66static inline int local_dec_and_test(local_t *l)
72{ 67{
73 unsigned char c; 68 GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e");
74
75 asm volatile(_ASM_DEC "%0; sete %1"
76 : "+m" (l->a.counter), "=qm" (c)
77 : : "memory");
78 return c != 0;
79} 69}
80 70
81/** 71/**
@@ -88,12 +78,7 @@ static inline int local_dec_and_test(local_t *l)
88 */ 78 */
89static inline int local_inc_and_test(local_t *l) 79static inline int local_inc_and_test(local_t *l)
90{ 80{
91 unsigned char c; 81 GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e");
92
93 asm volatile(_ASM_INC "%0; sete %1"
94 : "+m" (l->a.counter), "=qm" (c)
95 : : "memory");
96 return c != 0;
97} 82}
98 83
99/** 84/**
@@ -107,12 +92,7 @@ static inline int local_inc_and_test(local_t *l)
107 */ 92 */
108static inline int local_add_negative(long i, local_t *l) 93static inline int local_add_negative(long i, local_t *l)
109{ 94{
110 unsigned char c; 95 GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, i, "%0", "s");
111
112 asm volatile(_ASM_ADD "%2,%0; sets %1"
113 : "+m" (l->a.counter), "=qm" (c)
114 : "ir" (i) : "memory");
115 return c;
116} 96}
117 97
118/** 98/**
diff --git a/arch/x86/include/asm/misc.h b/arch/x86/include/asm/misc.h
new file mode 100644
index 000000000000..475f5bbc7f53
--- /dev/null
+++ b/arch/x86/include/asm/misc.h
@@ -0,0 +1,6 @@
1#ifndef _ASM_X86_MISC_H
2#define _ASM_X86_MISC_H
3
4int num_digits(int val);
5
6#endif /* _ASM_X86_MISC_H */
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index e7e6751648ed..07537a44216e 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -20,7 +20,7 @@
20static inline void __mutex_fastpath_lock(atomic_t *v, 20static inline void __mutex_fastpath_lock(atomic_t *v,
21 void (*fail_fn)(atomic_t *)) 21 void (*fail_fn)(atomic_t *))
22{ 22{
23 asm volatile goto(LOCK_PREFIX " decl %0\n" 23 asm_volatile_goto(LOCK_PREFIX " decl %0\n"
24 " jns %l[exit]\n" 24 " jns %l[exit]\n"
25 : : "m" (v->counter) 25 : : "m" (v->counter)
26 : "memory", "cc" 26 : "memory", "cc"
@@ -75,7 +75,7 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count)
75static inline void __mutex_fastpath_unlock(atomic_t *v, 75static inline void __mutex_fastpath_unlock(atomic_t *v,
76 void (*fail_fn)(atomic_t *)) 76 void (*fail_fn)(atomic_t *))
77{ 77{
78 asm volatile goto(LOCK_PREFIX " incl %0\n" 78 asm_volatile_goto(LOCK_PREFIX " incl %0\n"
79 " jg %l[exit]\n" 79 " jg %l[exit]\n"
80 : : "m" (v->counter) 80 : : "m" (v->counter)
81 : "memory", "cc" 81 : "memory", "cc"
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 0da5200ee79d..b3e18f800302 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -128,7 +128,8 @@ do { \
128do { \ 128do { \
129 typedef typeof(var) pao_T__; \ 129 typedef typeof(var) pao_T__; \
130 const int pao_ID__ = (__builtin_constant_p(val) && \ 130 const int pao_ID__ = (__builtin_constant_p(val) && \
131 ((val) == 1 || (val) == -1)) ? (val) : 0; \ 131 ((val) == 1 || (val) == -1)) ? \
132 (int)(val) : 0; \
132 if (0) { \ 133 if (0) { \
133 pao_T__ pao_tmp__; \ 134 pao_T__ pao_tmp__; \
134 pao_tmp__ = (val); \ 135 pao_tmp__ = (val); \
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
new file mode 100644
index 000000000000..8729723636fd
--- /dev/null
+++ b/arch/x86/include/asm/preempt.h
@@ -0,0 +1,100 @@
1#ifndef __ASM_PREEMPT_H
2#define __ASM_PREEMPT_H
3
4#include <asm/rmwcc.h>
5#include <asm/percpu.h>
6#include <linux/thread_info.h>
7
8DECLARE_PER_CPU(int, __preempt_count);
9
10/*
11 * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
12 * that think a non-zero value indicates we cannot preempt.
13 */
14static __always_inline int preempt_count(void)
15{
16 return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
17}
18
19static __always_inline void preempt_count_set(int pc)
20{
21 __this_cpu_write_4(__preempt_count, pc);
22}
23
24/*
25 * must be macros to avoid header recursion hell
26 */
27#define task_preempt_count(p) \
28 (task_thread_info(p)->saved_preempt_count & ~PREEMPT_NEED_RESCHED)
29
30#define init_task_preempt_count(p) do { \
31 task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \
32} while (0)
33
34#define init_idle_preempt_count(p, cpu) do { \
35 task_thread_info(p)->saved_preempt_count = PREEMPT_ENABLED; \
36 per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \
37} while (0)
38
39/*
40 * We fold the NEED_RESCHED bit into the preempt count such that
41 * preempt_enable() can decrement and test for needing to reschedule with a
42 * single instruction.
43 *
44 * We invert the actual bit, so that when the decrement hits 0 we know we both
45 * need to resched (the bit is cleared) and can resched (no preempt count).
46 */
47
48static __always_inline void set_preempt_need_resched(void)
49{
50 __this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
51}
52
53static __always_inline void clear_preempt_need_resched(void)
54{
55 __this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
56}
57
58static __always_inline bool test_preempt_need_resched(void)
59{
60 return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
61}
62
63/*
64 * The various preempt_count add/sub methods
65 */
66
67static __always_inline void __preempt_count_add(int val)
68{
69 __this_cpu_add_4(__preempt_count, val);
70}
71
72static __always_inline void __preempt_count_sub(int val)
73{
74 __this_cpu_add_4(__preempt_count, -val);
75}
76
77static __always_inline bool __preempt_count_dec_and_test(void)
78{
79 GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
80}
81
82/*
83 * Returns true when we need to resched and can (barring IRQ state).
84 */
85static __always_inline bool should_resched(void)
86{
87 return unlikely(!__this_cpu_read_4(__preempt_count));
88}
89
90#ifdef CONFIG_PREEMPT
91 extern asmlinkage void ___preempt_schedule(void);
92# define __preempt_schedule() asm ("call ___preempt_schedule")
93 extern asmlinkage void preempt_schedule(void);
94# ifdef CONFIG_CONTEXT_TRACKING
95 extern asmlinkage void ___preempt_schedule_context(void);
96# define __preempt_schedule_context() asm ("call ___preempt_schedule_context")
97# endif
98#endif
99
100#endif /* __ASM_PREEMPT_H */
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
new file mode 100644
index 000000000000..1ff990f1de8e
--- /dev/null
+++ b/arch/x86/include/asm/rmwcc.h
@@ -0,0 +1,41 @@
1#ifndef _ASM_X86_RMWcc
2#define _ASM_X86_RMWcc
3
4#ifdef CC_HAVE_ASM_GOTO
5
6#define __GEN_RMWcc(fullop, var, cc, ...) \
7do { \
8 asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \
9 : : "m" (var), ## __VA_ARGS__ \
10 : "memory" : cc_label); \
11 return 0; \
12cc_label: \
13 return 1; \
14} while (0)
15
16#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
17 __GEN_RMWcc(op " " arg0, var, cc)
18
19#define GEN_BINARY_RMWcc(op, var, val, arg0, cc) \
20 __GEN_RMWcc(op " %1, " arg0, var, cc, "er" (val))
21
22#else /* !CC_HAVE_ASM_GOTO */
23
24#define __GEN_RMWcc(fullop, var, cc, ...) \
25do { \
26 char c; \
27 asm volatile (fullop "; set" cc " %1" \
28 : "+m" (var), "=qm" (c) \
29 : __VA_ARGS__ : "memory"); \
30 return c != 0; \
31} while (0)
32
33#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
34 __GEN_RMWcc(op " " arg0, var, cc)
35
36#define GEN_BINARY_RMWcc(op, var, val, arg0, cc) \
37 __GEN_RMWcc(op " %2, " arg0, var, cc, "er" (val))
38
39#endif /* CC_HAVE_ASM_GOTO */
40
41#endif /* _ASM_X86_RMWcc */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 27811190cbd7..c46a46be1ec6 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -28,8 +28,7 @@ struct thread_info {
28 __u32 flags; /* low level flags */ 28 __u32 flags; /* low level flags */
29 __u32 status; /* thread synchronous flags */ 29 __u32 status; /* thread synchronous flags */
30 __u32 cpu; /* current CPU */ 30 __u32 cpu; /* current CPU */
31 int preempt_count; /* 0 => preemptable, 31 int saved_preempt_count;
32 <0 => BUG */
33 mm_segment_t addr_limit; 32 mm_segment_t addr_limit;
34 struct restart_block restart_block; 33 struct restart_block restart_block;
35 void __user *sysenter_return; 34 void __user *sysenter_return;
@@ -49,7 +48,7 @@ struct thread_info {
49 .exec_domain = &default_exec_domain, \ 48 .exec_domain = &default_exec_domain, \
50 .flags = 0, \ 49 .flags = 0, \
51 .cpu = 0, \ 50 .cpu = 0, \
52 .preempt_count = INIT_PREEMPT_COUNT, \ 51 .saved_preempt_count = INIT_PREEMPT_COUNT, \
53 .addr_limit = KERNEL_DS, \ 52 .addr_limit = KERNEL_DS, \
54 .restart_block = { \ 53 .restart_block = { \
55 .fn = do_no_restart_syscall, \ 54 .fn = do_no_restart_syscall, \
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 5838fa911aa0..8ec57c07b125 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -542,5 +542,103 @@ extern struct movsl_mask {
542# include <asm/uaccess_64.h> 542# include <asm/uaccess_64.h>
543#endif 543#endif
544 544
545unsigned long __must_check _copy_from_user(void *to, const void __user *from,
546 unsigned n);
547unsigned long __must_check _copy_to_user(void __user *to, const void *from,
548 unsigned n);
549
550#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
551# define copy_user_diag __compiletime_error
552#else
553# define copy_user_diag __compiletime_warning
554#endif
555
556extern void copy_user_diag("copy_from_user() buffer size is too small")
557copy_from_user_overflow(void);
558extern void copy_user_diag("copy_to_user() buffer size is too small")
559copy_to_user_overflow(void) __asm__("copy_from_user_overflow");
560
561#undef copy_user_diag
562
563#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
564
565extern void
566__compiletime_warning("copy_from_user() buffer size is not provably correct")
567__copy_from_user_overflow(void) __asm__("copy_from_user_overflow");
568#define __copy_from_user_overflow(size, count) __copy_from_user_overflow()
569
570extern void
571__compiletime_warning("copy_to_user() buffer size is not provably correct")
572__copy_to_user_overflow(void) __asm__("copy_from_user_overflow");
573#define __copy_to_user_overflow(size, count) __copy_to_user_overflow()
574
575#else
576
577static inline void
578__copy_from_user_overflow(int size, unsigned long count)
579{
580 WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
581}
582
583#define __copy_to_user_overflow __copy_from_user_overflow
584
585#endif
586
587static inline unsigned long __must_check
588copy_from_user(void *to, const void __user *from, unsigned long n)
589{
590 int sz = __compiletime_object_size(to);
591
592 might_fault();
593
594 /*
595 * While we would like to have the compiler do the checking for us
596 * even in the non-constant size case, any false positives there are
597 * a problem (especially when DEBUG_STRICT_USER_COPY_CHECKS, but even
598 * without - the [hopefully] dangerous looking nature of the warning
599 * would make people go look at the respecitive call sites over and
600 * over again just to find that there's no problem).
601 *
602 * And there are cases where it's just not realistic for the compiler
603 * to prove the count to be in range. For example when multiple call
604 * sites of a helper function - perhaps in different source files -
605 * all doing proper range checking, yet the helper function not doing
606 * so again.
607 *
608 * Therefore limit the compile time checking to the constant size
609 * case, and do only runtime checking for non-constant sizes.
610 */
611
612 if (likely(sz < 0 || sz >= n))
613 n = _copy_from_user(to, from, n);
614 else if(__builtin_constant_p(n))
615 copy_from_user_overflow();
616 else
617 __copy_from_user_overflow(sz, n);
618
619 return n;
620}
621
622static inline unsigned long __must_check
623copy_to_user(void __user *to, const void *from, unsigned long n)
624{
625 int sz = __compiletime_object_size(from);
626
627 might_fault();
628
629 /* See the comment in copy_from_user() above. */
630 if (likely(sz < 0 || sz >= n))
631 n = _copy_to_user(to, from, n);
632 else if(__builtin_constant_p(n))
633 copy_to_user_overflow();
634 else
635 __copy_to_user_overflow(sz, n);
636
637 return n;
638}
639
640#undef __copy_from_user_overflow
641#undef __copy_to_user_overflow
642
545#endif /* _ASM_X86_UACCESS_H */ 643#endif /* _ASM_X86_UACCESS_H */
546 644
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 7f760a9f1f61..3c03a5de64d3 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -184,33 +184,4 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from,
184 return __copy_from_user_ll_nocache_nozero(to, from, n); 184 return __copy_from_user_ll_nocache_nozero(to, from, n);
185} 185}
186 186
187unsigned long __must_check copy_to_user(void __user *to,
188 const void *from, unsigned long n);
189unsigned long __must_check _copy_from_user(void *to,
190 const void __user *from,
191 unsigned long n);
192
193
194extern void copy_from_user_overflow(void)
195#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
196 __compiletime_error("copy_from_user() buffer size is not provably correct")
197#else
198 __compiletime_warning("copy_from_user() buffer size is not provably correct")
199#endif
200;
201
202static inline unsigned long __must_check copy_from_user(void *to,
203 const void __user *from,
204 unsigned long n)
205{
206 int sz = __compiletime_object_size(to);
207
208 if (likely(sz == -1 || sz >= n))
209 n = _copy_from_user(to, from, n);
210 else
211 copy_from_user_overflow();
212
213 return n;
214}
215
216#endif /* _ASM_X86_UACCESS_32_H */ 187#endif /* _ASM_X86_UACCESS_32_H */
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 4f7923dd0007..0acae710fa00 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -46,36 +46,8 @@ copy_user_generic(void *to, const void *from, unsigned len)
46} 46}
47 47
48__must_check unsigned long 48__must_check unsigned long
49_copy_to_user(void __user *to, const void *from, unsigned len);
50__must_check unsigned long
51_copy_from_user(void *to, const void __user *from, unsigned len);
52__must_check unsigned long
53copy_in_user(void __user *to, const void __user *from, unsigned len); 49copy_in_user(void __user *to, const void __user *from, unsigned len);
54 50
55static inline unsigned long __must_check copy_from_user(void *to,
56 const void __user *from,
57 unsigned long n)
58{
59 int sz = __compiletime_object_size(to);
60
61 might_fault();
62 if (likely(sz == -1 || sz >= n))
63 n = _copy_from_user(to, from, n);
64#ifdef CONFIG_DEBUG_VM
65 else
66 WARN(1, "Buffer overflow detected!\n");
67#endif
68 return n;
69}
70
71static __always_inline __must_check
72int copy_to_user(void __user *dst, const void *src, unsigned size)
73{
74 might_fault();
75
76 return _copy_to_user(dst, src, size);
77}
78
79static __always_inline __must_check 51static __always_inline __must_check
80int __copy_from_user(void *dst, const void __user *src, unsigned size) 52int __copy_from_user(void *dst, const void __user *src, unsigned size)
81{ 53{
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 6e5197910fd8..3087ea9c5f2e 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -35,7 +35,10 @@ typedef u8 uprobe_opcode_t;
35 35
36struct arch_uprobe { 36struct arch_uprobe {
37 u16 fixups; 37 u16 fixups;
38 u8 insn[MAX_UINSN_BYTES]; 38 union {
39 u8 insn[MAX_UINSN_BYTES];
40 u8 ixol[MAX_UINSN_BYTES];
41 };
39#ifdef CONFIG_X86_64 42#ifdef CONFIG_X86_64
40 unsigned long rip_rela_target_address; 43 unsigned long rip_rela_target_address;
41#endif 44#endif
@@ -49,11 +52,4 @@ struct arch_uprobe_task {
49 unsigned int saved_tf; 52 unsigned int saved_tf;
50}; 53};
51 54
52extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
53extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
54extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
55extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
56extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
57extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
58extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
59#endif /* _ASM_UPROBES_H */ 55#endif /* _ASM_UPROBES_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a5408b965c9d..9b0a34e2cd79 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -36,6 +36,8 @@ obj-y += tsc.o io_delay.o rtc.o
36obj-y += pci-iommu_table.o 36obj-y += pci-iommu_table.o
37obj-y += resource.o 37obj-y += resource.o
38 38
39obj-$(CONFIG_PREEMPT) += preempt.o
40
39obj-y += process.o 41obj-y += process.o
40obj-y += i387.o xsave.o 42obj-y += i387.o xsave.o
41obj-y += ptrace.o 43obj-y += ptrace.o
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 1191ac1c9d25..a419814cea57 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -113,7 +113,7 @@ static int __init early_get_pnodeid(void)
113 break; 113 break;
114 case UV3_HUB_PART_NUMBER: 114 case UV3_HUB_PART_NUMBER:
115 case UV3_HUB_PART_NUMBER_X: 115 case UV3_HUB_PART_NUMBER_X:
116 uv_min_hub_revision_id += UV3_HUB_REVISION_BASE - 1; 116 uv_min_hub_revision_id += UV3_HUB_REVISION_BASE;
117 break; 117 break;
118 } 118 }
119 119
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 28610822fb3c..9f6b9341950f 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -32,7 +32,6 @@ void common(void) {
32 OFFSET(TI_flags, thread_info, flags); 32 OFFSET(TI_flags, thread_info, flags);
33 OFFSET(TI_status, thread_info, status); 33 OFFSET(TI_status, thread_info, status);
34 OFFSET(TI_addr_limit, thread_info, addr_limit); 34 OFFSET(TI_addr_limit, thread_info, addr_limit);
35 OFFSET(TI_preempt_count, thread_info, preempt_count);
36 35
37 BLANK(); 36 BLANK();
38 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); 37 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2793d1f095a2..5223fe6dec7b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1095,6 +1095,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
1095 1095
1096DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; 1096DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
1097 1097
1098DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
1099EXPORT_PER_CPU_SYMBOL(__preempt_count);
1100
1098DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); 1101DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
1099 1102
1100/* 1103/*
@@ -1169,6 +1172,8 @@ void debug_stack_reset(void)
1169 1172
1170DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 1173DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
1171EXPORT_PER_CPU_SYMBOL(current_task); 1174EXPORT_PER_CPU_SYMBOL(current_task);
1175DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
1176EXPORT_PER_CPU_SYMBOL(__preempt_count);
1172DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); 1177DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
1173 1178
1174#ifdef CONFIG_CC_STACKPROTECTOR 1179#ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 897783b3302a..8e132931614d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1276,16 +1276,16 @@ void perf_events_lapic_init(void)
1276static int __kprobes 1276static int __kprobes
1277perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) 1277perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
1278{ 1278{
1279 int ret;
1280 u64 start_clock; 1279 u64 start_clock;
1281 u64 finish_clock; 1280 u64 finish_clock;
1281 int ret;
1282 1282
1283 if (!atomic_read(&active_events)) 1283 if (!atomic_read(&active_events))
1284 return NMI_DONE; 1284 return NMI_DONE;
1285 1285
1286 start_clock = local_clock(); 1286 start_clock = sched_clock();
1287 ret = x86_pmu.handle_irq(regs); 1287 ret = x86_pmu.handle_irq(regs);
1288 finish_clock = local_clock(); 1288 finish_clock = sched_clock();
1289 1289
1290 perf_sample_event_took(finish_clock - start_clock); 1290 perf_sample_event_took(finish_clock - start_clock);
1291 1291
@@ -1888,10 +1888,7 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
1888 userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; 1888 userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
1889 userpg->pmc_width = x86_pmu.cntval_bits; 1889 userpg->pmc_width = x86_pmu.cntval_bits;
1890 1890
1891 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 1891 if (!sched_clock_stable)
1892 return;
1893
1894 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1895 return; 1892 return;
1896 1893
1897 userpg->cap_user_time = 1; 1894 userpg->cap_user_time = 1;
@@ -1899,10 +1896,8 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
1899 userpg->time_shift = CYC2NS_SCALE_FACTOR; 1896 userpg->time_shift = CYC2NS_SCALE_FACTOR;
1900 userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; 1897 userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
1901 1898
1902 if (sched_clock_stable && !check_tsc_disabled()) { 1899 userpg->cap_user_time_zero = 1;
1903 userpg->cap_user_time_zero = 1; 1900 userpg->time_zero = this_cpu_read(cyc2ns_offset);
1904 userpg->time_zero = this_cpu_read(cyc2ns_offset);
1905 }
1906} 1901}
1907 1902
1908/* 1903/*
@@ -1994,7 +1989,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1994 frame.return_address = 0; 1989 frame.return_address = 0;
1995 1990
1996 bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); 1991 bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
1997 if (bytes != sizeof(frame)) 1992 if (bytes != 0)
1998 break; 1993 break;
1999 1994
2000 if (!valid_user_frame(fp, sizeof(frame))) 1995 if (!valid_user_frame(fp, sizeof(frame)))
@@ -2046,7 +2041,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
2046 frame.return_address = 0; 2041 frame.return_address = 0;
2047 2042
2048 bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); 2043 bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
2049 if (bytes != sizeof(frame)) 2044 if (bytes != 0)
2050 break; 2045 break;
2051 2046
2052 if (!valid_user_frame(fp, sizeof(frame))) 2047 if (!valid_user_frame(fp, sizeof(frame)))
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index cc16faae0538..fd00bb29425d 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -164,6 +164,11 @@ struct cpu_hw_events {
164 struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX]; 164 struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX];
165 165
166 /* 166 /*
167 * Intel checkpoint mask
168 */
169 u64 intel_cp_status;
170
171 /*
167 * manage shared (per-core, per-cpu) registers 172 * manage shared (per-core, per-cpu) registers
168 * used on Intel NHM/WSM/SNB 173 * used on Intel NHM/WSM/SNB
169 */ 174 */
@@ -440,6 +445,7 @@ struct x86_pmu {
440 int lbr_nr; /* hardware stack size */ 445 int lbr_nr; /* hardware stack size */
441 u64 lbr_sel_mask; /* LBR_SELECT valid bits */ 446 u64 lbr_sel_mask; /* LBR_SELECT valid bits */
442 const int *lbr_sel_map; /* lbr_select mappings */ 447 const int *lbr_sel_map; /* lbr_select mappings */
448 bool lbr_double_abort; /* duplicated lbr aborts */
443 449
444 /* 450 /*
445 * Extra registers for events 451 * Extra registers for events
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index f31a1655d1ff..0fa4f242f050 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -190,9 +190,9 @@ static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
190 EVENT_EXTRA_END 190 EVENT_EXTRA_END
191}; 191};
192 192
193EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); 193EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
194EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); 194EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
195EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); 195EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
196 196
197struct attribute *nhm_events_attrs[] = { 197struct attribute *nhm_events_attrs[] = {
198 EVENT_PTR(mem_ld_nhm), 198 EVENT_PTR(mem_ld_nhm),
@@ -1184,6 +1184,11 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
1184 wrmsrl(hwc->config_base, ctrl_val); 1184 wrmsrl(hwc->config_base, ctrl_val);
1185} 1185}
1186 1186
1187static inline bool event_is_checkpointed(struct perf_event *event)
1188{
1189 return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
1190}
1191
1187static void intel_pmu_disable_event(struct perf_event *event) 1192static void intel_pmu_disable_event(struct perf_event *event)
1188{ 1193{
1189 struct hw_perf_event *hwc = &event->hw; 1194 struct hw_perf_event *hwc = &event->hw;
@@ -1197,6 +1202,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
1197 1202
1198 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); 1203 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
1199 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); 1204 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
1205 cpuc->intel_cp_status &= ~(1ull << hwc->idx);
1200 1206
1201 /* 1207 /*
1202 * must disable before any actual event 1208 * must disable before any actual event
@@ -1271,6 +1277,9 @@ static void intel_pmu_enable_event(struct perf_event *event)
1271 if (event->attr.exclude_guest) 1277 if (event->attr.exclude_guest)
1272 cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx); 1278 cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
1273 1279
1280 if (unlikely(event_is_checkpointed(event)))
1281 cpuc->intel_cp_status |= (1ull << hwc->idx);
1282
1274 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 1283 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1275 intel_pmu_enable_fixed(hwc); 1284 intel_pmu_enable_fixed(hwc);
1276 return; 1285 return;
@@ -1289,6 +1298,17 @@ static void intel_pmu_enable_event(struct perf_event *event)
1289int intel_pmu_save_and_restart(struct perf_event *event) 1298int intel_pmu_save_and_restart(struct perf_event *event)
1290{ 1299{
1291 x86_perf_event_update(event); 1300 x86_perf_event_update(event);
1301 /*
1302 * For a checkpointed counter always reset back to 0. This
1303 * avoids a situation where the counter overflows, aborts the
1304 * transaction and is then set back to shortly before the
1305 * overflow, and overflows and aborts again.
1306 */
1307 if (unlikely(event_is_checkpointed(event))) {
1308 /* No race with NMIs because the counter should not be armed */
1309 wrmsrl(event->hw.event_base, 0);
1310 local64_set(&event->hw.prev_count, 0);
1311 }
1292 return x86_perf_event_set_period(event); 1312 return x86_perf_event_set_period(event);
1293} 1313}
1294 1314
@@ -1372,6 +1392,13 @@ again:
1372 x86_pmu.drain_pebs(regs); 1392 x86_pmu.drain_pebs(regs);
1373 } 1393 }
1374 1394
1395 /*
1396 * Checkpointed counters can lead to 'spurious' PMIs because the
1397 * rollback caused by the PMI will have cleared the overflow status
1398 * bit. Therefore always force probe these counters.
1399 */
1400 status |= cpuc->intel_cp_status;
1401
1375 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 1402 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1376 struct perf_event *event = cpuc->events[bit]; 1403 struct perf_event *event = cpuc->events[bit];
1377 1404
@@ -1837,6 +1864,20 @@ static int hsw_hw_config(struct perf_event *event)
1837 event->attr.precise_ip > 0)) 1864 event->attr.precise_ip > 0))
1838 return -EOPNOTSUPP; 1865 return -EOPNOTSUPP;
1839 1866
1867 if (event_is_checkpointed(event)) {
1868 /*
1869 * Sampling of checkpointed events can cause situations where
1870 * the CPU constantly aborts because of a overflow, which is
1871 * then checkpointed back and ignored. Forbid checkpointing
1872 * for sampling.
1873 *
1874 * But still allow a long sampling period, so that perf stat
1875 * from KVM works.
1876 */
1877 if (event->attr.sample_period > 0 &&
1878 event->attr.sample_period < 0x7fffffff)
1879 return -EOPNOTSUPP;
1880 }
1840 return 0; 1881 return 0;
1841} 1882}
1842 1883
@@ -2182,10 +2223,36 @@ static __init void intel_nehalem_quirk(void)
2182 } 2223 }
2183} 2224}
2184 2225
2185EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); 2226EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
2186EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") 2227EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
2228
2229/* Haswell special events */
2230EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1");
2231EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2");
2232EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4");
2233EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2");
2234EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1");
2235EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1");
2236EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2");
2237EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4");
2238EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2");
2239EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1");
2240EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1");
2241EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1");
2187 2242
2188static struct attribute *hsw_events_attrs[] = { 2243static struct attribute *hsw_events_attrs[] = {
2244 EVENT_PTR(tx_start),
2245 EVENT_PTR(tx_commit),
2246 EVENT_PTR(tx_abort),
2247 EVENT_PTR(tx_capacity),
2248 EVENT_PTR(tx_conflict),
2249 EVENT_PTR(el_start),
2250 EVENT_PTR(el_commit),
2251 EVENT_PTR(el_abort),
2252 EVENT_PTR(el_capacity),
2253 EVENT_PTR(el_conflict),
2254 EVENT_PTR(cycles_t),
2255 EVENT_PTR(cycles_ct),
2189 EVENT_PTR(mem_ld_hsw), 2256 EVENT_PTR(mem_ld_hsw),
2190 EVENT_PTR(mem_st_hsw), 2257 EVENT_PTR(mem_st_hsw),
2191 NULL 2258 NULL
@@ -2452,6 +2519,7 @@ __init int intel_pmu_init(void)
2452 x86_pmu.hw_config = hsw_hw_config; 2519 x86_pmu.hw_config = hsw_hw_config;
2453 x86_pmu.get_event_constraints = hsw_get_event_constraints; 2520 x86_pmu.get_event_constraints = hsw_get_event_constraints;
2454 x86_pmu.cpu_events = hsw_events_attrs; 2521 x86_pmu.cpu_events = hsw_events_attrs;
2522 x86_pmu.lbr_double_abort = true;
2455 pr_cont("Haswell events, "); 2523 pr_cont("Haswell events, ");
2456 break; 2524 break;
2457 2525
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index ab3ba1c1b7dd..ae96cfa5eddd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -12,6 +12,7 @@
12 12
13#define BTS_BUFFER_SIZE (PAGE_SIZE << 4) 13#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
14#define PEBS_BUFFER_SIZE PAGE_SIZE 14#define PEBS_BUFFER_SIZE PAGE_SIZE
15#define PEBS_FIXUP_SIZE PAGE_SIZE
15 16
16/* 17/*
17 * pebs_record_32 for p4 and core not supported 18 * pebs_record_32 for p4 and core not supported
@@ -182,18 +183,32 @@ struct pebs_record_nhm {
182 * Same as pebs_record_nhm, with two additional fields. 183 * Same as pebs_record_nhm, with two additional fields.
183 */ 184 */
184struct pebs_record_hsw { 185struct pebs_record_hsw {
185 struct pebs_record_nhm nhm; 186 u64 flags, ip;
186 /* 187 u64 ax, bx, cx, dx;
187 * Real IP of the event. In the Intel documentation this 188 u64 si, di, bp, sp;
188 * is called eventingrip. 189 u64 r8, r9, r10, r11;
189 */ 190 u64 r12, r13, r14, r15;
190 u64 real_ip; 191 u64 status, dla, dse, lat;
191 /* 192 u64 real_ip, tsx_tuning;
192 * TSX tuning information field: abort cycles and abort flags. 193};
193 */ 194
194 u64 tsx_tuning; 195union hsw_tsx_tuning {
196 struct {
197 u32 cycles_last_block : 32,
198 hle_abort : 1,
199 rtm_abort : 1,
200 instruction_abort : 1,
201 non_instruction_abort : 1,
202 retry : 1,
203 data_conflict : 1,
204 capacity_writes : 1,
205 capacity_reads : 1;
206 };
207 u64 value;
195}; 208};
196 209
210#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL
211
197void init_debug_store_on_cpu(int cpu) 212void init_debug_store_on_cpu(int cpu)
198{ 213{
199 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 214 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -214,12 +229,14 @@ void fini_debug_store_on_cpu(int cpu)
214 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); 229 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
215} 230}
216 231
232static DEFINE_PER_CPU(void *, insn_buffer);
233
217static int alloc_pebs_buffer(int cpu) 234static int alloc_pebs_buffer(int cpu)
218{ 235{
219 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 236 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
220 int node = cpu_to_node(cpu); 237 int node = cpu_to_node(cpu);
221 int max, thresh = 1; /* always use a single PEBS record */ 238 int max, thresh = 1; /* always use a single PEBS record */
222 void *buffer; 239 void *buffer, *ibuffer;
223 240
224 if (!x86_pmu.pebs) 241 if (!x86_pmu.pebs)
225 return 0; 242 return 0;
@@ -228,6 +245,19 @@ static int alloc_pebs_buffer(int cpu)
228 if (unlikely(!buffer)) 245 if (unlikely(!buffer))
229 return -ENOMEM; 246 return -ENOMEM;
230 247
248 /*
249 * HSW+ already provides us the eventing ip; no need to allocate this
250 * buffer then.
251 */
252 if (x86_pmu.intel_cap.pebs_format < 2) {
253 ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
254 if (!ibuffer) {
255 kfree(buffer);
256 return -ENOMEM;
257 }
258 per_cpu(insn_buffer, cpu) = ibuffer;
259 }
260
231 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; 261 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
232 262
233 ds->pebs_buffer_base = (u64)(unsigned long)buffer; 263 ds->pebs_buffer_base = (u64)(unsigned long)buffer;
@@ -248,6 +278,9 @@ static void release_pebs_buffer(int cpu)
248 if (!ds || !x86_pmu.pebs) 278 if (!ds || !x86_pmu.pebs)
249 return; 279 return;
250 280
281 kfree(per_cpu(insn_buffer, cpu));
282 per_cpu(insn_buffer, cpu) = NULL;
283
251 kfree((void *)(unsigned long)ds->pebs_buffer_base); 284 kfree((void *)(unsigned long)ds->pebs_buffer_base);
252 ds->pebs_buffer_base = 0; 285 ds->pebs_buffer_base = 0;
253} 286}
@@ -715,6 +748,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
715 unsigned long old_to, to = cpuc->lbr_entries[0].to; 748 unsigned long old_to, to = cpuc->lbr_entries[0].to;
716 unsigned long ip = regs->ip; 749 unsigned long ip = regs->ip;
717 int is_64bit = 0; 750 int is_64bit = 0;
751 void *kaddr;
718 752
719 /* 753 /*
720 * We don't need to fixup if the PEBS assist is fault like 754 * We don't need to fixup if the PEBS assist is fault like
@@ -738,7 +772,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
738 * unsigned math, either ip is before the start (impossible) or 772 * unsigned math, either ip is before the start (impossible) or
739 * the basic block is larger than 1 page (sanity) 773 * the basic block is larger than 1 page (sanity)
740 */ 774 */
741 if ((ip - to) > PAGE_SIZE) 775 if ((ip - to) > PEBS_FIXUP_SIZE)
742 return 0; 776 return 0;
743 777
744 /* 778 /*
@@ -749,29 +783,33 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
749 return 1; 783 return 1;
750 } 784 }
751 785
786 if (!kernel_ip(ip)) {
787 int size, bytes;
788 u8 *buf = this_cpu_read(insn_buffer);
789
790 size = ip - to; /* Must fit our buffer, see above */
791 bytes = copy_from_user_nmi(buf, (void __user *)to, size);
792 if (bytes != 0)
793 return 0;
794
795 kaddr = buf;
796 } else {
797 kaddr = (void *)to;
798 }
799
752 do { 800 do {
753 struct insn insn; 801 struct insn insn;
754 u8 buf[MAX_INSN_SIZE];
755 void *kaddr;
756 802
757 old_to = to; 803 old_to = to;
758 if (!kernel_ip(ip)) {
759 int bytes, size = MAX_INSN_SIZE;
760
761 bytes = copy_from_user_nmi(buf, (void __user *)to, size);
762 if (bytes != size)
763 return 0;
764
765 kaddr = buf;
766 } else
767 kaddr = (void *)to;
768 804
769#ifdef CONFIG_X86_64 805#ifdef CONFIG_X86_64
770 is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); 806 is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
771#endif 807#endif
772 insn_init(&insn, kaddr, is_64bit); 808 insn_init(&insn, kaddr, is_64bit);
773 insn_get_length(&insn); 809 insn_get_length(&insn);
810
774 to += insn.length; 811 to += insn.length;
812 kaddr += insn.length;
775 } while (to < ip); 813 } while (to < ip);
776 814
777 if (to == ip) { 815 if (to == ip) {
@@ -786,16 +824,34 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
786 return 0; 824 return 0;
787} 825}
788 826
827static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
828{
829 if (pebs->tsx_tuning) {
830 union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
831 return tsx.cycles_last_block;
832 }
833 return 0;
834}
835
836static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
837{
838 u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
839
840 /* For RTM XABORTs also log the abort code from AX */
841 if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
842 txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
843 return txn;
844}
845
789static void __intel_pmu_pebs_event(struct perf_event *event, 846static void __intel_pmu_pebs_event(struct perf_event *event,
790 struct pt_regs *iregs, void *__pebs) 847 struct pt_regs *iregs, void *__pebs)
791{ 848{
792 /* 849 /*
793 * We cast to pebs_record_nhm to get the load latency data 850 * We cast to the biggest pebs_record but are careful not to
794 * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used 851 * unconditionally access the 'extra' entries.
795 */ 852 */
796 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 853 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
797 struct pebs_record_nhm *pebs = __pebs; 854 struct pebs_record_hsw *pebs = __pebs;
798 struct pebs_record_hsw *pebs_hsw = __pebs;
799 struct perf_sample_data data; 855 struct perf_sample_data data;
800 struct pt_regs regs; 856 struct pt_regs regs;
801 u64 sample_type; 857 u64 sample_type;
@@ -854,7 +910,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
854 regs.sp = pebs->sp; 910 regs.sp = pebs->sp;
855 911
856 if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { 912 if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
857 regs.ip = pebs_hsw->real_ip; 913 regs.ip = pebs->real_ip;
858 regs.flags |= PERF_EFLAGS_EXACT; 914 regs.flags |= PERF_EFLAGS_EXACT;
859 } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs)) 915 } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
860 regs.flags |= PERF_EFLAGS_EXACT; 916 regs.flags |= PERF_EFLAGS_EXACT;
@@ -862,9 +918,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
862 regs.flags &= ~PERF_EFLAGS_EXACT; 918 regs.flags &= ~PERF_EFLAGS_EXACT;
863 919
864 if ((event->attr.sample_type & PERF_SAMPLE_ADDR) && 920 if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
865 x86_pmu.intel_cap.pebs_format >= 1) 921 x86_pmu.intel_cap.pebs_format >= 1)
866 data.addr = pebs->dla; 922 data.addr = pebs->dla;
867 923
924 if (x86_pmu.intel_cap.pebs_format >= 2) {
925 /* Only set the TSX weight when no memory weight. */
926 if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll)
927 data.weight = intel_hsw_weight(pebs);
928
929 if (event->attr.sample_type & PERF_SAMPLE_TRANSACTION)
930 data.txn = intel_hsw_transaction(pebs);
931 }
932
868 if (has_branch_stack(event)) 933 if (has_branch_stack(event))
869 data.br_stack = &cpuc->lbr_stack; 934 data.br_stack = &cpuc->lbr_stack;
870 935
@@ -913,17 +978,34 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
913 __intel_pmu_pebs_event(event, iregs, at); 978 __intel_pmu_pebs_event(event, iregs, at);
914} 979}
915 980
916static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, 981static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
917 void *top)
918{ 982{
919 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 983 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
920 struct debug_store *ds = cpuc->ds; 984 struct debug_store *ds = cpuc->ds;
921 struct perf_event *event = NULL; 985 struct perf_event *event = NULL;
986 void *at, *top;
922 u64 status = 0; 987 u64 status = 0;
923 int bit; 988 int bit;
924 989
990 if (!x86_pmu.pebs_active)
991 return;
992
993 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
994 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
995
925 ds->pebs_index = ds->pebs_buffer_base; 996 ds->pebs_index = ds->pebs_buffer_base;
926 997
998 if (unlikely(at > top))
999 return;
1000
1001 /*
1002 * Should not happen, we program the threshold at 1 and do not
1003 * set a reset value.
1004 */
1005 WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
1006 "Unexpected number of pebs records %ld\n",
1007 (long)(top - at) / x86_pmu.pebs_record_size);
1008
927 for (; at < top; at += x86_pmu.pebs_record_size) { 1009 for (; at < top; at += x86_pmu.pebs_record_size) {
928 struct pebs_record_nhm *p = at; 1010 struct pebs_record_nhm *p = at;
929 1011
@@ -951,61 +1033,6 @@ static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
951 } 1033 }
952} 1034}
953 1035
954static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
955{
956 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
957 struct debug_store *ds = cpuc->ds;
958 struct pebs_record_nhm *at, *top;
959 int n;
960
961 if (!x86_pmu.pebs_active)
962 return;
963
964 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
965 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
966
967 ds->pebs_index = ds->pebs_buffer_base;
968
969 n = top - at;
970 if (n <= 0)
971 return;
972
973 /*
974 * Should not happen, we program the threshold at 1 and do not
975 * set a reset value.
976 */
977 WARN_ONCE(n > x86_pmu.max_pebs_events,
978 "Unexpected number of pebs records %d\n", n);
979
980 return __intel_pmu_drain_pebs_nhm(iregs, at, top);
981}
982
983static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
984{
985 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
986 struct debug_store *ds = cpuc->ds;
987 struct pebs_record_hsw *at, *top;
988 int n;
989
990 if (!x86_pmu.pebs_active)
991 return;
992
993 at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
994 top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
995
996 n = top - at;
997 if (n <= 0)
998 return;
999 /*
1000 * Should not happen, we program the threshold at 1 and do not
1001 * set a reset value.
1002 */
1003 WARN_ONCE(n > x86_pmu.max_pebs_events,
1004 "Unexpected number of pebs records %d\n", n);
1005
1006 return __intel_pmu_drain_pebs_nhm(iregs, at, top);
1007}
1008
1009/* 1036/*
1010 * BTS, PEBS probe and setup 1037 * BTS, PEBS probe and setup
1011 */ 1038 */
@@ -1040,7 +1067,7 @@ void intel_ds_init(void)
1040 case 2: 1067 case 2:
1041 pr_cont("PEBS fmt2%c, ", pebs_type); 1068 pr_cont("PEBS fmt2%c, ", pebs_type);
1042 x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); 1069 x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
1043 x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw; 1070 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
1044 break; 1071 break;
1045 1072
1046 default: 1073 default:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d5be06a5005e..d82d155aca8c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -284,6 +284,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
284 int lbr_format = x86_pmu.intel_cap.lbr_format; 284 int lbr_format = x86_pmu.intel_cap.lbr_format;
285 u64 tos = intel_pmu_lbr_tos(); 285 u64 tos = intel_pmu_lbr_tos();
286 int i; 286 int i;
287 int out = 0;
287 288
288 for (i = 0; i < x86_pmu.lbr_nr; i++) { 289 for (i = 0; i < x86_pmu.lbr_nr; i++) {
289 unsigned long lbr_idx = (tos - i) & mask; 290 unsigned long lbr_idx = (tos - i) & mask;
@@ -306,15 +307,27 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
306 } 307 }
307 from = (u64)((((s64)from) << skip) >> skip); 308 from = (u64)((((s64)from) << skip) >> skip);
308 309
309 cpuc->lbr_entries[i].from = from; 310 /*
310 cpuc->lbr_entries[i].to = to; 311 * Some CPUs report duplicated abort records,
311 cpuc->lbr_entries[i].mispred = mis; 312 * with the second entry not having an abort bit set.
312 cpuc->lbr_entries[i].predicted = pred; 313 * Skip them here. This loop runs backwards,
313 cpuc->lbr_entries[i].in_tx = in_tx; 314 * so we need to undo the previous record.
314 cpuc->lbr_entries[i].abort = abort; 315 * If the abort just happened outside the window
315 cpuc->lbr_entries[i].reserved = 0; 316 * the extra entry cannot be removed.
317 */
318 if (abort && x86_pmu.lbr_double_abort && out > 0)
319 out--;
320
321 cpuc->lbr_entries[out].from = from;
322 cpuc->lbr_entries[out].to = to;
323 cpuc->lbr_entries[out].mispred = mis;
324 cpuc->lbr_entries[out].predicted = pred;
325 cpuc->lbr_entries[out].in_tx = in_tx;
326 cpuc->lbr_entries[out].abort = abort;
327 cpuc->lbr_entries[out].reserved = 0;
328 out++;
316 } 329 }
317 cpuc->lbr_stack.nr = i; 330 cpuc->lbr_stack.nr = out;
318} 331}
319 332
320void intel_pmu_lbr_read(void) 333void intel_pmu_lbr_read(void)
@@ -478,7 +491,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
478 491
479 /* may fail if text not present */ 492 /* may fail if text not present */
480 bytes = copy_from_user_nmi(buf, (void __user *)from, size); 493 bytes = copy_from_user_nmi(buf, (void __user *)from, size);
481 if (bytes != size) 494 if (bytes != 0)
482 return X86_BR_NONE; 495 return X86_BR_NONE;
483 496
484 addr = buf; 497 addr = buf;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 4118f9f68315..29c248799ced 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -997,6 +997,20 @@ static int snbep_pci2phy_map_init(int devid)
997 } 997 }
998 } 998 }
999 999
1000 if (!err) {
1001 /*
1002 * For PCI bus with no UBOX device, find the next bus
1003 * that has UBOX device and use its mapping.
1004 */
1005 i = -1;
1006 for (bus = 255; bus >= 0; bus--) {
1007 if (pcibus_to_physid[bus] >= 0)
1008 i = pcibus_to_physid[bus];
1009 else
1010 pcibus_to_physid[bus] = i;
1011 }
1012 }
1013
1000 if (ubox_dev) 1014 if (ubox_dev)
1001 pci_dev_put(ubox_dev); 1015 pci_dev_put(ubox_dev);
1002 1016
@@ -1099,6 +1113,24 @@ static struct attribute *ivt_uncore_qpi_formats_attr[] = {
1099 &format_attr_umask.attr, 1113 &format_attr_umask.attr,
1100 &format_attr_edge.attr, 1114 &format_attr_edge.attr,
1101 &format_attr_thresh8.attr, 1115 &format_attr_thresh8.attr,
1116 &format_attr_match_rds.attr,
1117 &format_attr_match_rnid30.attr,
1118 &format_attr_match_rnid4.attr,
1119 &format_attr_match_dnid.attr,
1120 &format_attr_match_mc.attr,
1121 &format_attr_match_opc.attr,
1122 &format_attr_match_vnw.attr,
1123 &format_attr_match0.attr,
1124 &format_attr_match1.attr,
1125 &format_attr_mask_rds.attr,
1126 &format_attr_mask_rnid30.attr,
1127 &format_attr_mask_rnid4.attr,
1128 &format_attr_mask_dnid.attr,
1129 &format_attr_mask_mc.attr,
1130 &format_attr_mask_opc.attr,
1131 &format_attr_mask_vnw.attr,
1132 &format_attr_mask0.attr,
1133 &format_attr_mask1.attr,
1102 NULL, 1134 NULL,
1103}; 1135};
1104 1136
@@ -1312,17 +1344,83 @@ static struct intel_uncore_type ivt_uncore_imc = {
1312 IVT_UNCORE_PCI_COMMON_INIT(), 1344 IVT_UNCORE_PCI_COMMON_INIT(),
1313}; 1345};
1314 1346
1347/* registers in IRP boxes are not properly aligned */
1348static unsigned ivt_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4};
1349static unsigned ivt_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0};
1350
1351static void ivt_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event)
1352{
1353 struct pci_dev *pdev = box->pci_dev;
1354 struct hw_perf_event *hwc = &event->hw;
1355
1356 pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx],
1357 hwc->config | SNBEP_PMON_CTL_EN);
1358}
1359
1360static void ivt_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event)
1361{
1362 struct pci_dev *pdev = box->pci_dev;
1363 struct hw_perf_event *hwc = &event->hw;
1364
1365 pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx], hwc->config);
1366}
1367
1368static u64 ivt_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
1369{
1370 struct pci_dev *pdev = box->pci_dev;
1371 struct hw_perf_event *hwc = &event->hw;
1372 u64 count = 0;
1373
1374 pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
1375 pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
1376
1377 return count;
1378}
1379
1380static struct intel_uncore_ops ivt_uncore_irp_ops = {
1381 .init_box = ivt_uncore_pci_init_box,
1382 .disable_box = snbep_uncore_pci_disable_box,
1383 .enable_box = snbep_uncore_pci_enable_box,
1384 .disable_event = ivt_uncore_irp_disable_event,
1385 .enable_event = ivt_uncore_irp_enable_event,
1386 .read_counter = ivt_uncore_irp_read_counter,
1387};
1388
1389static struct intel_uncore_type ivt_uncore_irp = {
1390 .name = "irp",
1391 .num_counters = 4,
1392 .num_boxes = 1,
1393 .perf_ctr_bits = 48,
1394 .event_mask = IVT_PMON_RAW_EVENT_MASK,
1395 .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
1396 .ops = &ivt_uncore_irp_ops,
1397 .format_group = &ivt_uncore_format_group,
1398};
1399
1400static struct intel_uncore_ops ivt_uncore_qpi_ops = {
1401 .init_box = ivt_uncore_pci_init_box,
1402 .disable_box = snbep_uncore_pci_disable_box,
1403 .enable_box = snbep_uncore_pci_enable_box,
1404 .disable_event = snbep_uncore_pci_disable_event,
1405 .enable_event = snbep_qpi_enable_event,
1406 .read_counter = snbep_uncore_pci_read_counter,
1407 .hw_config = snbep_qpi_hw_config,
1408 .get_constraint = uncore_get_constraint,
1409 .put_constraint = uncore_put_constraint,
1410};
1411
1315static struct intel_uncore_type ivt_uncore_qpi = { 1412static struct intel_uncore_type ivt_uncore_qpi = {
1316 .name = "qpi", 1413 .name = "qpi",
1317 .num_counters = 4, 1414 .num_counters = 4,
1318 .num_boxes = 3, 1415 .num_boxes = 3,
1319 .perf_ctr_bits = 48, 1416 .perf_ctr_bits = 48,
1320 .perf_ctr = SNBEP_PCI_PMON_CTR0, 1417 .perf_ctr = SNBEP_PCI_PMON_CTR0,
1321 .event_ctl = SNBEP_PCI_PMON_CTL0, 1418 .event_ctl = SNBEP_PCI_PMON_CTL0,
1322 .event_mask = IVT_QPI_PCI_PMON_RAW_EVENT_MASK, 1419 .event_mask = IVT_QPI_PCI_PMON_RAW_EVENT_MASK,
1323 .box_ctl = SNBEP_PCI_PMON_BOX_CTL, 1420 .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
1324 .ops = &ivt_uncore_pci_ops, 1421 .num_shared_regs = 1,
1325 .format_group = &ivt_uncore_qpi_format_group, 1422 .ops = &ivt_uncore_qpi_ops,
1423 .format_group = &ivt_uncore_qpi_format_group,
1326}; 1424};
1327 1425
1328static struct intel_uncore_type ivt_uncore_r2pcie = { 1426static struct intel_uncore_type ivt_uncore_r2pcie = {
@@ -1346,6 +1444,7 @@ static struct intel_uncore_type ivt_uncore_r3qpi = {
1346enum { 1444enum {
1347 IVT_PCI_UNCORE_HA, 1445 IVT_PCI_UNCORE_HA,
1348 IVT_PCI_UNCORE_IMC, 1446 IVT_PCI_UNCORE_IMC,
1447 IVT_PCI_UNCORE_IRP,
1349 IVT_PCI_UNCORE_QPI, 1448 IVT_PCI_UNCORE_QPI,
1350 IVT_PCI_UNCORE_R2PCIE, 1449 IVT_PCI_UNCORE_R2PCIE,
1351 IVT_PCI_UNCORE_R3QPI, 1450 IVT_PCI_UNCORE_R3QPI,
@@ -1354,6 +1453,7 @@ enum {
1354static struct intel_uncore_type *ivt_pci_uncores[] = { 1453static struct intel_uncore_type *ivt_pci_uncores[] = {
1355 [IVT_PCI_UNCORE_HA] = &ivt_uncore_ha, 1454 [IVT_PCI_UNCORE_HA] = &ivt_uncore_ha,
1356 [IVT_PCI_UNCORE_IMC] = &ivt_uncore_imc, 1455 [IVT_PCI_UNCORE_IMC] = &ivt_uncore_imc,
1456 [IVT_PCI_UNCORE_IRP] = &ivt_uncore_irp,
1357 [IVT_PCI_UNCORE_QPI] = &ivt_uncore_qpi, 1457 [IVT_PCI_UNCORE_QPI] = &ivt_uncore_qpi,
1358 [IVT_PCI_UNCORE_R2PCIE] = &ivt_uncore_r2pcie, 1458 [IVT_PCI_UNCORE_R2PCIE] = &ivt_uncore_r2pcie,
1359 [IVT_PCI_UNCORE_R3QPI] = &ivt_uncore_r3qpi, 1459 [IVT_PCI_UNCORE_R3QPI] = &ivt_uncore_r3qpi,
@@ -1401,6 +1501,10 @@ static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = {
1401 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1), 1501 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1),
1402 .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 7), 1502 .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 7),
1403 }, 1503 },
1504 { /* IRP */
1505 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39),
1506 .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IRP, 0),
1507 },
1404 { /* QPI0 Port 0 */ 1508 { /* QPI0 Port 0 */
1405 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32), 1509 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32),
1406 .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 0), 1510 .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 0),
@@ -1429,6 +1533,16 @@ static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = {
1429 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e), 1533 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e),
1430 .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 2), 1534 .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 2),
1431 }, 1535 },
1536 { /* QPI Port 0 filter */
1537 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86),
1538 .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
1539 SNBEP_PCI_QPI_PORT0_FILTER),
1540 },
1541 { /* QPI Port 0 filter */
1542 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96),
1543 .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
1544 SNBEP_PCI_QPI_PORT1_FILTER),
1545 },
1432 { /* end: all zeroes */ } 1546 { /* end: all zeroes */ }
1433}; 1547};
1434 1548
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index e0e0841eef45..18677a90d6a3 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -127,12 +127,12 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
127 cpu_emergency_vmxoff(); 127 cpu_emergency_vmxoff();
128 cpu_emergency_svm_disable(); 128 cpu_emergency_svm_disable();
129 129
130 lapic_shutdown();
131#ifdef CONFIG_X86_IO_APIC 130#ifdef CONFIG_X86_IO_APIC
132 /* Prevent crash_kexec() from deadlocking on ioapic_lock. */ 131 /* Prevent crash_kexec() from deadlocking on ioapic_lock. */
133 ioapic_zap_locks(); 132 ioapic_zap_locks();
134 disable_IO_APIC(); 133 disable_IO_APIC();
135#endif 134#endif
135 lapic_shutdown();
136#ifdef CONFIG_HPET_TIMER 136#ifdef CONFIG_HPET_TIMER
137 hpet_disable(); 137 hpet_disable();
138#endif 138#endif
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f0dcb0ceb6a2..fd1bc1b15e6d 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -362,12 +362,9 @@ END(ret_from_exception)
362#ifdef CONFIG_PREEMPT 362#ifdef CONFIG_PREEMPT
363ENTRY(resume_kernel) 363ENTRY(resume_kernel)
364 DISABLE_INTERRUPTS(CLBR_ANY) 364 DISABLE_INTERRUPTS(CLBR_ANY)
365 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
366 jnz restore_all
367need_resched: 365need_resched:
368 movl TI_flags(%ebp), %ecx # need_resched set ? 366 cmpl $0,PER_CPU_VAR(__preempt_count)
369 testb $_TIF_NEED_RESCHED, %cl 367 jnz restore_all
370 jz restore_all
371 testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? 368 testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
372 jz restore_all 369 jz restore_all
373 call preempt_schedule_irq 370 call preempt_schedule_irq
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b077f4cc225a..603be7c70675 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1103,10 +1103,8 @@ retint_signal:
1103 /* Returning to kernel space. Check if we need preemption */ 1103 /* Returning to kernel space. Check if we need preemption */
1104 /* rcx: threadinfo. interrupts off. */ 1104 /* rcx: threadinfo. interrupts off. */
1105ENTRY(retint_kernel) 1105ENTRY(retint_kernel)
1106 cmpl $0,TI_preempt_count(%rcx) 1106 cmpl $0,PER_CPU_VAR(__preempt_count)
1107 jnz retint_restore_args 1107 jnz retint_restore_args
1108 bt $TIF_NEED_RESCHED,TI_flags(%rcx)
1109 jnc retint_restore_args
1110 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ 1108 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
1111 jnc retint_restore_args 1109 jnc retint_restore_args
1112 call preempt_schedule_irq 1110 call preempt_schedule_irq
@@ -1342,7 +1340,7 @@ bad_gs:
1342 .previous 1340 .previous
1343 1341
1344/* Call softirq on interrupt stack. Interrupts are off. */ 1342/* Call softirq on interrupt stack. Interrupts are off. */
1345ENTRY(call_softirq) 1343ENTRY(do_softirq_own_stack)
1346 CFI_STARTPROC 1344 CFI_STARTPROC
1347 pushq_cfi %rbp 1345 pushq_cfi %rbp
1348 CFI_REL_OFFSET rbp,0 1346 CFI_REL_OFFSET rbp,0
@@ -1359,7 +1357,7 @@ ENTRY(call_softirq)
1359 decl PER_CPU_VAR(irq_count) 1357 decl PER_CPU_VAR(irq_count)
1360 ret 1358 ret
1361 CFI_ENDPROC 1359 CFI_ENDPROC
1362END(call_softirq) 1360END(do_softirq_own_stack)
1363 1361
1364#ifdef CONFIG_XEN 1362#ifdef CONFIG_XEN
1365zeroentry xen_hypervisor_callback xen_do_hypervisor_callback 1363zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 0fa69127209a..05fd74f537d6 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -37,3 +37,10 @@ EXPORT_SYMBOL(strstr);
37 37
38EXPORT_SYMBOL(csum_partial); 38EXPORT_SYMBOL(csum_partial);
39EXPORT_SYMBOL(empty_zero_page); 39EXPORT_SYMBOL(empty_zero_page);
40
41#ifdef CONFIG_PREEMPT
42EXPORT_SYMBOL(___preempt_schedule);
43#ifdef CONFIG_CONTEXT_TRACKING
44EXPORT_SYMBOL(___preempt_schedule_context);
45#endif
46#endif
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 9a5c460404dc..2e977b5d61dd 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -312,8 +312,7 @@ static void init_8259A(int auto_eoi)
312 */ 312 */
313 outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ 313 outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
314 314
315 /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64, 315 /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
316 to 0x20-0x27 on i386 */
317 outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); 316 outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
318 317
319 /* 8259A-1 (the master) has a slave on IR2 */ 318 /* 8259A-1 (the master) has a slave on IR2 */
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 4186755f1d7c..d7fcbedc9c43 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -100,9 +100,6 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
100 irqctx->tinfo.task = curctx->tinfo.task; 100 irqctx->tinfo.task = curctx->tinfo.task;
101 irqctx->tinfo.previous_esp = current_stack_pointer; 101 irqctx->tinfo.previous_esp = current_stack_pointer;
102 102
103 /* Copy the preempt_count so that the [soft]irq checks work. */
104 irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count;
105
106 if (unlikely(overflow)) 103 if (unlikely(overflow))
107 call_on_stack(print_stack_overflow, isp); 104 call_on_stack(print_stack_overflow, isp);
108 105
@@ -131,7 +128,6 @@ void irq_ctx_init(int cpu)
131 THREAD_SIZE_ORDER)); 128 THREAD_SIZE_ORDER));
132 memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); 129 memset(&irqctx->tinfo, 0, sizeof(struct thread_info));
133 irqctx->tinfo.cpu = cpu; 130 irqctx->tinfo.cpu = cpu;
134 irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
135 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); 131 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
136 132
137 per_cpu(hardirq_ctx, cpu) = irqctx; 133 per_cpu(hardirq_ctx, cpu) = irqctx;
@@ -149,35 +145,21 @@ void irq_ctx_init(int cpu)
149 cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); 145 cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu));
150} 146}
151 147
152asmlinkage void do_softirq(void) 148void do_softirq_own_stack(void)
153{ 149{
154 unsigned long flags;
155 struct thread_info *curctx; 150 struct thread_info *curctx;
156 union irq_ctx *irqctx; 151 union irq_ctx *irqctx;
157 u32 *isp; 152 u32 *isp;
158 153
159 if (in_interrupt()) 154 curctx = current_thread_info();
160 return; 155 irqctx = __this_cpu_read(softirq_ctx);
161 156 irqctx->tinfo.task = curctx->task;
162 local_irq_save(flags); 157 irqctx->tinfo.previous_esp = current_stack_pointer;
163
164 if (local_softirq_pending()) {
165 curctx = current_thread_info();
166 irqctx = __this_cpu_read(softirq_ctx);
167 irqctx->tinfo.task = curctx->task;
168 irqctx->tinfo.previous_esp = current_stack_pointer;
169
170 /* build the stack frame on the softirq stack */
171 isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));
172 158
173 call_on_stack(__do_softirq, isp); 159 /* build the stack frame on the softirq stack */
174 /* 160 isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));
175 * Shouldn't happen, we returned above if in_interrupt():
176 */
177 WARN_ON_ONCE(softirq_count());
178 }
179 161
180 local_irq_restore(flags); 162 call_on_stack(__do_softirq, isp);
181} 163}
182 164
183bool handle_irq(unsigned irq, struct pt_regs *regs) 165bool handle_irq(unsigned irq, struct pt_regs *regs)
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index d04d3ecded62..4d1c746892eb 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -87,24 +87,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
87 generic_handle_irq_desc(irq, desc); 87 generic_handle_irq_desc(irq, desc);
88 return true; 88 return true;
89} 89}
90
91
92extern void call_softirq(void);
93
94asmlinkage void do_softirq(void)
95{
96 __u32 pending;
97 unsigned long flags;
98
99 if (in_interrupt())
100 return;
101
102 local_irq_save(flags);
103 pending = local_softirq_pending();
104 /* Switch to interrupt stack */
105 if (pending) {
106 call_softirq();
107 WARN_ON_ONCE(softirq_count());
108 }
109 local_irq_restore(flags);
110}
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 697b93af02dd..b2046e4d0b59 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -609,7 +609,7 @@ static struct dentry *d_kvm_debug;
609 609
610struct dentry *kvm_init_debugfs(void) 610struct dentry *kvm_init_debugfs(void)
611{ 611{
612 d_kvm_debug = debugfs_create_dir("kvm", NULL); 612 d_kvm_debug = debugfs_create_dir("kvm-guest", NULL);
613 if (!d_kvm_debug) 613 if (!d_kvm_debug)
614 printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n"); 614 printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
615 615
@@ -775,11 +775,22 @@ void __init kvm_spinlock_init(void)
775 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) 775 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
776 return; 776 return;
777 777
778 printk(KERN_INFO "KVM setup paravirtual spinlock\n"); 778 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
779 pv_lock_ops.unlock_kick = kvm_unlock_kick;
780}
781
782static __init int kvm_spinlock_init_jump(void)
783{
784 if (!kvm_para_available())
785 return 0;
786 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
787 return 0;
779 788
780 static_key_slow_inc(&paravirt_ticketlocks_enabled); 789 static_key_slow_inc(&paravirt_ticketlocks_enabled);
790 printk(KERN_INFO "KVM setup paravirtual spinlock\n");
781 791
782 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); 792 return 0;
783 pv_lock_ops.unlock_kick = kvm_unlock_kick;
784} 793}
794early_initcall(kvm_spinlock_init_jump);
795
785#endif /* CONFIG_PARAVIRT_SPINLOCKS */ 796#endif /* CONFIG_PARAVIRT_SPINLOCKS */
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index ba77ebc2c353..6fcb49ce50a1 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -113,10 +113,10 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
113 u64 before, delta, whole_msecs; 113 u64 before, delta, whole_msecs;
114 int remainder_ns, decimal_msecs, thishandled; 114 int remainder_ns, decimal_msecs, thishandled;
115 115
116 before = local_clock(); 116 before = sched_clock();
117 thishandled = a->handler(type, regs); 117 thishandled = a->handler(type, regs);
118 handled += thishandled; 118 handled += thishandled;
119 delta = local_clock() - before; 119 delta = sched_clock() - before;
120 trace_nmi_handler(a->handler, (int)delta, thishandled); 120 trace_nmi_handler(a->handler, (int)delta, thishandled);
121 121
122 if (delta < nmi_longest_ns) 122 if (delta < nmi_longest_ns)
diff --git a/arch/x86/kernel/preempt.S b/arch/x86/kernel/preempt.S
new file mode 100644
index 000000000000..ca7f0d58a87d
--- /dev/null
+++ b/arch/x86/kernel/preempt.S
@@ -0,0 +1,25 @@
1
2#include <linux/linkage.h>
3#include <asm/dwarf2.h>
4#include <asm/asm.h>
5#include <asm/calling.h>
6
7ENTRY(___preempt_schedule)
8 CFI_STARTPROC
9 SAVE_ALL
10 call preempt_schedule
11 RESTORE_ALL
12 ret
13 CFI_ENDPROC
14
15#ifdef CONFIG_CONTEXT_TRACKING
16
17ENTRY(___preempt_schedule_context)
18 CFI_STARTPROC
19 SAVE_ALL
20 call preempt_schedule_context
21 RESTORE_ALL
22 ret
23 CFI_ENDPROC
24
25#endif
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c83516be1052..3fb8d95ab8b5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -391,9 +391,9 @@ static void amd_e400_idle(void)
391 * The switch back from broadcast mode needs to be 391 * The switch back from broadcast mode needs to be
392 * called with interrupts disabled. 392 * called with interrupts disabled.
393 */ 393 */
394 local_irq_disable(); 394 local_irq_disable();
395 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); 395 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
396 local_irq_enable(); 396 local_irq_enable();
397 } else 397 } else
398 default_idle(); 398 default_idle();
399} 399}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 884f98f69354..c2ec1aa6d454 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -292,6 +292,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
292 set_iopl_mask(next->iopl); 292 set_iopl_mask(next->iopl);
293 293
294 /* 294 /*
295 * If it were not for PREEMPT_ACTIVE we could guarantee that the
296 * preempt_count of all tasks was equal here and this would not be
297 * needed.
298 */
299 task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
300 this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
301
302 /*
295 * Now maybe handle debug registers and/or IO bitmaps 303 * Now maybe handle debug registers and/or IO bitmaps
296 */ 304 */
297 if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || 305 if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index bb1dc51bab05..45ab4d6fc8a7 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -363,6 +363,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
363 this_cpu_write(old_rsp, next->usersp); 363 this_cpu_write(old_rsp, next->usersp);
364 this_cpu_write(current_task, next_p); 364 this_cpu_write(current_task, next_p);
365 365
366 /*
367 * If it were not for PREEMPT_ACTIVE we could guarantee that the
368 * preempt_count of all tasks was equal here and this would not be
369 * needed.
370 */
371 task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
372 this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
373
366 this_cpu_write(kernel_stack, 374 this_cpu_write(kernel_stack,
367 (unsigned long)task_stack_page(next_p) + 375 (unsigned long)task_stack_page(next_p) +
368 THREAD_SIZE - KERNEL_STACK_OFFSET); 376 THREAD_SIZE - KERNEL_STACK_OFFSET);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e643e744e4d8..618ce264b237 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -326,6 +326,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
326 DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"), 326 DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"),
327 }, 327 },
328 }, 328 },
329 { /* Handle problems with rebooting on the Latitude E5410. */
330 .callback = set_pci_reboot,
331 .ident = "Dell Latitude E5410",
332 .matches = {
333 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
334 DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5410"),
335 },
336 },
329 { /* Handle problems with rebooting on the Latitude E5420. */ 337 { /* Handle problems with rebooting on the Latitude E5420. */
330 .callback = set_pci_reboot, 338 .callback = set_pci_reboot,
331 .ident = "Dell Latitude E5420", 339 .ident = "Dell Latitude E5420",
@@ -542,6 +550,10 @@ static void native_machine_emergency_restart(void)
542void native_machine_shutdown(void) 550void native_machine_shutdown(void)
543{ 551{
544 /* Stop the cpus and apics */ 552 /* Stop the cpus and apics */
553#ifdef CONFIG_X86_IO_APIC
554 disable_IO_APIC();
555#endif
556
545#ifdef CONFIG_SMP 557#ifdef CONFIG_SMP
546 /* 558 /*
547 * Stop all of the others. Also disable the local irq to 559 * Stop all of the others. Also disable the local irq to
@@ -554,10 +566,6 @@ void native_machine_shutdown(void)
554 566
555 lapic_shutdown(); 567 lapic_shutdown();
556 568
557#ifdef CONFIG_X86_IO_APIC
558 disable_IO_APIC();
559#endif
560
561#ifdef CONFIG_HPET_TIMER 569#ifdef CONFIG_HPET_TIMER
562 hpet_disable(); 570 hpet_disable();
563#endif 571#endif
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 0aa29394ed6f..5b9dd445eb89 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -192,6 +192,14 @@ static __init int add_rtc_cmos(void)
192 if (mrst_identify_cpu()) 192 if (mrst_identify_cpu())
193 return -ENODEV; 193 return -ENODEV;
194 194
195#ifdef CONFIG_ACPI
196 if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) {
197 /* This warning can likely go away again in a year or two. */
198 pr_info("ACPI: not registering RTC platform device\n");
199 return -ENODEV;
200 }
201#endif
202
195 platform_device_register(&rtc_device); 203 platform_device_register(&rtc_device);
196 dev_info(&rtc_device.dev, 204 dev_info(&rtc_device.dev,
197 "registered platform RTC device (no PNP device found)\n"); 205 "registered platform RTC device (no PNP device found)\n");
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6cacab671f9b..2a165580fa16 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -73,11 +73,10 @@
73#include <asm/setup.h> 73#include <asm/setup.h>
74#include <asm/uv/uv.h> 74#include <asm/uv/uv.h>
75#include <linux/mc146818rtc.h> 75#include <linux/mc146818rtc.h>
76
77#include <asm/smpboot_hooks.h> 76#include <asm/smpboot_hooks.h>
78#include <asm/i8259.h> 77#include <asm/i8259.h>
79
80#include <asm/realmode.h> 78#include <asm/realmode.h>
79#include <asm/misc.h>
81 80
82/* State of each CPU */ 81/* State of each CPU */
83DEFINE_PER_CPU(int, cpu_state) = { 0 }; 82DEFINE_PER_CPU(int, cpu_state) = { 0 };
@@ -648,22 +647,46 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
648 return (send_status | accept_status); 647 return (send_status | accept_status);
649} 648}
650 649
650void smp_announce(void)
651{
652 int num_nodes = num_online_nodes();
653
654 printk(KERN_INFO "x86: Booted up %d node%s, %d CPUs\n",
655 num_nodes, (num_nodes > 1 ? "s" : ""), num_online_cpus());
656}
657
651/* reduce the number of lines printed when booting a large cpu count system */ 658/* reduce the number of lines printed when booting a large cpu count system */
652static void announce_cpu(int cpu, int apicid) 659static void announce_cpu(int cpu, int apicid)
653{ 660{
654 static int current_node = -1; 661 static int current_node = -1;
655 int node = early_cpu_to_node(cpu); 662 int node = early_cpu_to_node(cpu);
656 int max_cpu_present = find_last_bit(cpumask_bits(cpu_present_mask), NR_CPUS); 663 static int width, node_width;
664
665 if (!width)
666 width = num_digits(num_possible_cpus()) + 1; /* + '#' sign */
667
668 if (!node_width)
669 node_width = num_digits(num_possible_nodes()) + 1; /* + '#' */
670
671 if (cpu == 1)
672 printk(KERN_INFO "x86: Booting SMP configuration:\n");
657 673
658 if (system_state == SYSTEM_BOOTING) { 674 if (system_state == SYSTEM_BOOTING) {
659 if (node != current_node) { 675 if (node != current_node) {
660 if (current_node > (-1)) 676 if (current_node > (-1))
661 pr_cont(" OK\n"); 677 pr_cont("\n");
662 current_node = node; 678 current_node = node;
663 pr_info("Booting Node %3d, Processors ", node); 679
680 printk(KERN_INFO ".... node %*s#%d, CPUs: ",
681 node_width - num_digits(node), " ", node);
664 } 682 }
665 pr_cont(" #%4d%s", cpu, cpu == max_cpu_present ? " OK\n" : ""); 683
666 return; 684 /* Add padding for the BSP */
685 if (cpu == 1)
686 pr_cont("%*s", width + 1, " ");
687
688 pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu);
689
667 } else 690 } else
668 pr_info("Booting Node %d Processor %d APIC 0x%x\n", 691 pr_info("Booting Node %d Processor %d APIC 0x%x\n",
669 node, cpu, apicid); 692 node, cpu, apicid);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8c8093b146ca..729aa779ff75 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -88,7 +88,7 @@ static inline void conditional_sti(struct pt_regs *regs)
88 88
89static inline void preempt_conditional_sti(struct pt_regs *regs) 89static inline void preempt_conditional_sti(struct pt_regs *regs)
90{ 90{
91 inc_preempt_count(); 91 preempt_count_inc();
92 if (regs->flags & X86_EFLAGS_IF) 92 if (regs->flags & X86_EFLAGS_IF)
93 local_irq_enable(); 93 local_irq_enable();
94} 94}
@@ -103,7 +103,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
103{ 103{
104 if (regs->flags & X86_EFLAGS_IF) 104 if (regs->flags & X86_EFLAGS_IF)
105 local_irq_disable(); 105 local_irq_disable();
106 dec_preempt_count(); 106 preempt_count_dec();
107} 107}
108 108
109static int __kprobes 109static int __kprobes
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index b014d9414d08..040681928e9d 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -66,3 +66,10 @@ EXPORT_SYMBOL(empty_zero_page);
66#ifndef CONFIG_PARAVIRT 66#ifndef CONFIG_PARAVIRT
67EXPORT_SYMBOL(native_load_gs_index); 67EXPORT_SYMBOL(native_load_gs_index);
68#endif 68#endif
69
70#ifdef CONFIG_PREEMPT
71EXPORT_SYMBOL(___preempt_schedule);
72#ifdef CONFIG_CONTEXT_TRACKING
73EXPORT_SYMBOL(___preempt_schedule_context);
74#endif
75#endif
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3b8e7459dd4d..2b2fce1b2009 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3255,25 +3255,29 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
3255 3255
3256static void ept_load_pdptrs(struct kvm_vcpu *vcpu) 3256static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
3257{ 3257{
3258 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
3259
3258 if (!test_bit(VCPU_EXREG_PDPTR, 3260 if (!test_bit(VCPU_EXREG_PDPTR,
3259 (unsigned long *)&vcpu->arch.regs_dirty)) 3261 (unsigned long *)&vcpu->arch.regs_dirty))
3260 return; 3262 return;
3261 3263
3262 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { 3264 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
3263 vmcs_write64(GUEST_PDPTR0, vcpu->arch.mmu.pdptrs[0]); 3265 vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
3264 vmcs_write64(GUEST_PDPTR1, vcpu->arch.mmu.pdptrs[1]); 3266 vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
3265 vmcs_write64(GUEST_PDPTR2, vcpu->arch.mmu.pdptrs[2]); 3267 vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
3266 vmcs_write64(GUEST_PDPTR3, vcpu->arch.mmu.pdptrs[3]); 3268 vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
3267 } 3269 }
3268} 3270}
3269 3271
3270static void ept_save_pdptrs(struct kvm_vcpu *vcpu) 3272static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
3271{ 3273{
3274 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
3275
3272 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { 3276 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
3273 vcpu->arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0); 3277 mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
3274 vcpu->arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1); 3278 mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
3275 vcpu->arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2); 3279 mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
3276 vcpu->arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3); 3280 mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
3277 } 3281 }
3278 3282
3279 __set_bit(VCPU_EXREG_PDPTR, 3283 __set_bit(VCPU_EXREG_PDPTR,
@@ -7777,10 +7781,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7777 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); 7781 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
7778 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); 7782 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
7779 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); 7783 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
7780 __clear_bit(VCPU_EXREG_PDPTR,
7781 (unsigned long *)&vcpu->arch.regs_avail);
7782 __clear_bit(VCPU_EXREG_PDPTR,
7783 (unsigned long *)&vcpu->arch.regs_dirty);
7784 } 7784 }
7785 7785
7786 kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); 7786 kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 96b2c6697c9d..992d63bb154f 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -16,7 +16,7 @@ clean-files := inat-tables.c
16 16
17obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o 17obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
18 18
19lib-y := delay.o 19lib-y := delay.o misc.o
20lib-y += thunk_$(BITS).o 20lib-y += thunk_$(BITS).o
21lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o 21lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
22lib-y += memcpy_$(BITS).o 22lib-y += memcpy_$(BITS).o
diff --git a/arch/x86/lib/misc.c b/arch/x86/lib/misc.c
new file mode 100644
index 000000000000..76b373af03f0
--- /dev/null
+++ b/arch/x86/lib/misc.c
@@ -0,0 +1,21 @@
1/*
2 * Count the digits of @val including a possible sign.
3 *
4 * (Typed on and submitted from hpa's mobile phone.)
5 */
6int num_digits(int val)
7{
8 int m = 10;
9 int d = 1;
10
11 if (val < 0) {
12 d++;
13 val = -val;
14 }
15
16 while (val >= m) {
17 m *= 10;
18 d++;
19 }
20 return d;
21}
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index 4f74d94c8d97..ddf9ecb53cc3 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -11,39 +11,26 @@
11#include <linux/sched.h> 11#include <linux/sched.h>
12 12
13/* 13/*
14 * best effort, GUP based copy_from_user() that is NMI-safe 14 * We rely on the nested NMI work to allow atomic faults from the NMI path; the
15 * nested NMI paths are careful to preserve CR2.
15 */ 16 */
16unsigned long 17unsigned long
17copy_from_user_nmi(void *to, const void __user *from, unsigned long n) 18copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
18{ 19{
19 unsigned long offset, addr = (unsigned long)from; 20 unsigned long ret;
20 unsigned long size, len = 0;
21 struct page *page;
22 void *map;
23 int ret;
24 21
25 if (__range_not_ok(from, n, TASK_SIZE)) 22 if (__range_not_ok(from, n, TASK_SIZE))
26 return len; 23 return 0;
27 24
28 do { 25 /*
29 ret = __get_user_pages_fast(addr, 1, 0, &page); 26 * Even though this function is typically called from NMI/IRQ context
30 if (!ret) 27 * disable pagefaults so that its behaviour is consistent even when
31 break; 28 * called form other contexts.
32 29 */
33 offset = addr & (PAGE_SIZE - 1); 30 pagefault_disable();
34 size = min(PAGE_SIZE - offset, n - len); 31 ret = __copy_from_user_inatomic(to, from, n);
35 32 pagefault_enable();
36 map = kmap_atomic(page); 33
37 memcpy(to, map+offset, size); 34 return ret;
38 kunmap_atomic(map);
39 put_page(page);
40
41 len += size;
42 to += size;
43 addr += size;
44
45 } while (len < n);
46
47 return len;
48} 35}
49EXPORT_SYMBOL_GPL(copy_from_user_nmi); 36EXPORT_SYMBOL_GPL(copy_from_user_nmi);
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 3eb18acd0e40..e2f5e21c03b3 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -654,14 +654,13 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
654 * Returns number of bytes that could not be copied. 654 * Returns number of bytes that could not be copied.
655 * On success, this will be zero. 655 * On success, this will be zero.
656 */ 656 */
657unsigned long 657unsigned long _copy_to_user(void __user *to, const void *from, unsigned n)
658copy_to_user(void __user *to, const void *from, unsigned long n)
659{ 658{
660 if (access_ok(VERIFY_WRITE, to, n)) 659 if (access_ok(VERIFY_WRITE, to, n))
661 n = __copy_to_user(to, from, n); 660 n = __copy_to_user(to, from, n);
662 return n; 661 return n;
663} 662}
664EXPORT_SYMBOL(copy_to_user); 663EXPORT_SYMBOL(_copy_to_user);
665 664
666/** 665/**
667 * copy_from_user: - Copy a block of data from user space. 666 * copy_from_user: - Copy a block of data from user space.
@@ -679,8 +678,7 @@ EXPORT_SYMBOL(copy_to_user);
679 * If some data could not be copied, this function will pad the copied 678 * If some data could not be copied, this function will pad the copied
680 * data to the requested size using zero bytes. 679 * data to the requested size using zero bytes.
681 */ 680 */
682unsigned long 681unsigned long _copy_from_user(void *to, const void __user *from, unsigned n)
683_copy_from_user(void *to, const void __user *from, unsigned long n)
684{ 682{
685 if (access_ok(VERIFY_READ, from, n)) 683 if (access_ok(VERIFY_READ, from, n))
686 n = __copy_from_user(to, from, n); 684 n = __copy_from_user(to, from, n);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 3aaeffcfd67a..7a517bb41060 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -51,7 +51,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
51 return 0; 51 return 0;
52} 52}
53 53
54static inline int __kprobes notify_page_fault(struct pt_regs *regs) 54static inline int __kprobes kprobes_fault(struct pt_regs *regs)
55{ 55{
56 int ret = 0; 56 int ret = 0;
57 57
@@ -1048,7 +1048,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
1048 return; 1048 return;
1049 1049
1050 /* kprobes don't want to hook the spurious faults: */ 1050 /* kprobes don't want to hook the spurious faults: */
1051 if (notify_page_fault(regs)) 1051 if (kprobes_fault(regs))
1052 return; 1052 return;
1053 /* 1053 /*
1054 * Don't take the mm semaphore here. If we fixup a prefetch 1054 * Don't take the mm semaphore here. If we fixup a prefetch
@@ -1060,23 +1060,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
1060 } 1060 }
1061 1061
1062 /* kprobes don't want to hook the spurious faults: */ 1062 /* kprobes don't want to hook the spurious faults: */
1063 if (unlikely(notify_page_fault(regs))) 1063 if (unlikely(kprobes_fault(regs)))
1064 return; 1064 return;
1065 /*
1066 * It's safe to allow irq's after cr2 has been saved and the
1067 * vmalloc fault has been handled.
1068 *
1069 * User-mode registers count as a user access even for any
1070 * potential system fault or CPU buglet:
1071 */
1072 if (user_mode_vm(regs)) {
1073 local_irq_enable();
1074 error_code |= PF_USER;
1075 flags |= FAULT_FLAG_USER;
1076 } else {
1077 if (regs->flags & X86_EFLAGS_IF)
1078 local_irq_enable();
1079 }
1080 1065
1081 if (unlikely(error_code & PF_RSVD)) 1066 if (unlikely(error_code & PF_RSVD))
1082 pgtable_bad(regs, error_code, address); 1067 pgtable_bad(regs, error_code, address);
@@ -1088,8 +1073,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
1088 } 1073 }
1089 } 1074 }
1090 1075
1091 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
1092
1093 /* 1076 /*
1094 * If we're in an interrupt, have no user context or are running 1077 * If we're in an interrupt, have no user context or are running
1095 * in an atomic region then we must not take the fault: 1078 * in an atomic region then we must not take the fault:
@@ -1099,6 +1082,24 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
1099 return; 1082 return;
1100 } 1083 }
1101 1084
1085 /*
1086 * It's safe to allow irq's after cr2 has been saved and the
1087 * vmalloc fault has been handled.
1088 *
1089 * User-mode registers count as a user access even for any
1090 * potential system fault or CPU buglet:
1091 */
1092 if (user_mode_vm(regs)) {
1093 local_irq_enable();
1094 error_code |= PF_USER;
1095 flags |= FAULT_FLAG_USER;
1096 } else {
1097 if (regs->flags & X86_EFLAGS_IF)
1098 local_irq_enable();
1099 }
1100
1101 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
1102
1102 if (error_code & PF_WRITE) 1103 if (error_code & PF_WRITE)
1103 flags |= FAULT_FLAG_WRITE; 1104 flags |= FAULT_FLAG_WRITE;
1104 1105
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 79c216aa0e2b..516593e1ce33 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -772,13 +772,21 @@ out:
772 return; 772 return;
773} 773}
774 774
775static void bpf_jit_free_deferred(struct work_struct *work)
776{
777 struct sk_filter *fp = container_of(work, struct sk_filter, work);
778 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
779 struct bpf_binary_header *header = (void *)addr;
780
781 set_memory_rw(addr, header->pages);
782 module_free(NULL, header);
783 kfree(fp);
784}
785
775void bpf_jit_free(struct sk_filter *fp) 786void bpf_jit_free(struct sk_filter *fp)
776{ 787{
777 if (fp->bpf_func != sk_run_filter) { 788 if (fp->bpf_func != sk_run_filter) {
778 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; 789 INIT_WORK(&fp->work, bpf_jit_free_deferred);
779 struct bpf_binary_header *header = (void *)addr; 790 schedule_work(&fp->work);
780
781 set_memory_rw(addr, header->pages);
782 module_free(NULL, header);
783 } 791 }
784} 792}
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index d6aa6e8315d1..5d04be5efb64 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -47,7 +47,7 @@ dump_user_backtrace_32(struct stack_frame_ia32 *head)
47 unsigned long bytes; 47 unsigned long bytes;
48 48
49 bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); 49 bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
50 if (bytes != sizeof(bufhead)) 50 if (bytes != 0)
51 return NULL; 51 return NULL;
52 52
53 fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame); 53 fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
@@ -93,7 +93,7 @@ static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
93 unsigned long bytes; 93 unsigned long bytes;
94 94
95 bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); 95 bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
96 if (bytes != sizeof(bufhead)) 96 if (bytes != 0)
97 return NULL; 97 return NULL;
98 98
99 oprofile_add_trace(bufhead[0].return_address); 99 oprofile_add_trace(bufhead[0].return_address);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d1e4777b4e75..31d04758b76f 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -278,6 +278,15 @@ static void __init xen_smp_prepare_boot_cpu(void)
278 old memory can be recycled */ 278 old memory can be recycled */
279 make_lowmem_page_readwrite(xen_initial_gdt); 279 make_lowmem_page_readwrite(xen_initial_gdt);
280 280
281#ifdef CONFIG_X86_32
282 /*
283 * Xen starts us with XEN_FLAT_RING1_DS, but linux code
284 * expects __USER_DS
285 */
286 loadsegment(ds, __USER_DS);
287 loadsegment(es, __USER_DS);
288#endif
289
281 xen_filter_cpu_maps(); 290 xen_filter_cpu_maps();
282 xen_setup_vcpu_info_placement(); 291 xen_setup_vcpu_info_placement();
283 } 292 }