diff options
Diffstat (limited to 'arch/x86')
59 files changed, 956 insertions, 469 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ee2fb9d37745..5cd6eea9b7b3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -123,6 +123,7 @@ config X86 | |||
123 | select COMPAT_OLD_SIGACTION if IA32_EMULATION | 123 | select COMPAT_OLD_SIGACTION if IA32_EMULATION |
124 | select RTC_LIB | 124 | select RTC_LIB |
125 | select HAVE_DEBUG_STACKOVERFLOW | 125 | select HAVE_DEBUG_STACKOVERFLOW |
126 | select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 | ||
126 | 127 | ||
127 | config INSTRUCTION_DECODER | 128 | config INSTRUCTION_DECODER |
128 | def_bool y | 129 | def_bool y |
@@ -860,7 +861,7 @@ source "kernel/Kconfig.preempt" | |||
860 | 861 | ||
861 | config X86_UP_APIC | 862 | config X86_UP_APIC |
862 | bool "Local APIC support on uniprocessors" | 863 | bool "Local APIC support on uniprocessors" |
863 | depends on X86_32 && !SMP && !X86_32_NON_STANDARD | 864 | depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI |
864 | ---help--- | 865 | ---help--- |
865 | A local APIC (Advanced Programmable Interrupt Controller) is an | 866 | A local APIC (Advanced Programmable Interrupt Controller) is an |
866 | integrated interrupt controller in the CPU. If you have a single-CPU | 867 | integrated interrupt controller in the CPU. If you have a single-CPU |
@@ -885,11 +886,11 @@ config X86_UP_IOAPIC | |||
885 | 886 | ||
886 | config X86_LOCAL_APIC | 887 | config X86_LOCAL_APIC |
887 | def_bool y | 888 | def_bool y |
888 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC | 889 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI |
889 | 890 | ||
890 | config X86_IO_APIC | 891 | config X86_IO_APIC |
891 | def_bool y | 892 | def_bool y |
892 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC | 893 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI |
893 | 894 | ||
894 | config X86_VISWS_APIC | 895 | config X86_VISWS_APIC |
895 | def_bool y | 896 | def_bool y |
@@ -1033,6 +1034,7 @@ config X86_REBOOTFIXUPS | |||
1033 | 1034 | ||
1034 | config MICROCODE | 1035 | config MICROCODE |
1035 | tristate "CPU microcode loading support" | 1036 | tristate "CPU microcode loading support" |
1037 | depends on CPU_SUP_AMD || CPU_SUP_INTEL | ||
1036 | select FW_LOADER | 1038 | select FW_LOADER |
1037 | ---help--- | 1039 | ---help--- |
1038 | 1040 | ||
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 379814bc41e3..dce69a256896 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile | |||
@@ -71,7 +71,8 @@ GCOV_PROFILE := n | |||
71 | $(obj)/bzImage: asflags-y := $(SVGA_MODE) | 71 | $(obj)/bzImage: asflags-y := $(SVGA_MODE) |
72 | 72 | ||
73 | quiet_cmd_image = BUILD $@ | 73 | quiet_cmd_image = BUILD $@ |
74 | cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/zoffset.h > $@ | 74 | cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \ |
75 | $(obj)/zoffset.h $@ | ||
75 | 76 | ||
76 | $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE | 77 | $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE |
77 | $(call if_changed,image) | 78 | $(call if_changed,image) |
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index c941d6a8887f..8e15b22391fc 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c | |||
@@ -5,14 +5,15 @@ | |||
5 | */ | 5 | */ |
6 | 6 | ||
7 | /* | 7 | /* |
8 | * This file builds a disk-image from two different files: | 8 | * This file builds a disk-image from three different files: |
9 | * | 9 | * |
10 | * - setup: 8086 machine code, sets up system parm | 10 | * - setup: 8086 machine code, sets up system parm |
11 | * - system: 80386 code for actual system | 11 | * - system: 80386 code for actual system |
12 | * - zoffset.h: header with ZO_* defines | ||
12 | * | 13 | * |
13 | * It does some checking that all files are of the correct type, and | 14 | * It does some checking that all files are of the correct type, and writes |
14 | * just writes the result to stdout, removing headers and padding to | 15 | * the result to the specified destination, removing headers and padding to |
15 | * the right amount. It also writes some system data to stderr. | 16 | * the right amount. It also writes some system data to stdout. |
16 | */ | 17 | */ |
17 | 18 | ||
18 | /* | 19 | /* |
@@ -136,7 +137,7 @@ static void die(const char * str, ...) | |||
136 | 137 | ||
137 | static void usage(void) | 138 | static void usage(void) |
138 | { | 139 | { |
139 | die("Usage: build setup system [zoffset.h] [> image]"); | 140 | die("Usage: build setup system zoffset.h image"); |
140 | } | 141 | } |
141 | 142 | ||
142 | #ifdef CONFIG_EFI_STUB | 143 | #ifdef CONFIG_EFI_STUB |
@@ -265,7 +266,7 @@ int main(int argc, char ** argv) | |||
265 | int c; | 266 | int c; |
266 | u32 sys_size; | 267 | u32 sys_size; |
267 | struct stat sb; | 268 | struct stat sb; |
268 | FILE *file; | 269 | FILE *file, *dest; |
269 | int fd; | 270 | int fd; |
270 | void *kernel; | 271 | void *kernel; |
271 | u32 crc = 0xffffffffUL; | 272 | u32 crc = 0xffffffffUL; |
@@ -280,10 +281,13 @@ int main(int argc, char ** argv) | |||
280 | startup_64 = 0x200; | 281 | startup_64 = 0x200; |
281 | #endif | 282 | #endif |
282 | 283 | ||
283 | if (argc == 4) | 284 | if (argc != 5) |
284 | parse_zoffset(argv[3]); | ||
285 | else if (argc != 3) | ||
286 | usage(); | 285 | usage(); |
286 | parse_zoffset(argv[3]); | ||
287 | |||
288 | dest = fopen(argv[4], "w"); | ||
289 | if (!dest) | ||
290 | die("Unable to write `%s': %m", argv[4]); | ||
287 | 291 | ||
288 | /* Copy the setup code */ | 292 | /* Copy the setup code */ |
289 | file = fopen(argv[1], "r"); | 293 | file = fopen(argv[1], "r"); |
@@ -318,7 +322,7 @@ int main(int argc, char ** argv) | |||
318 | /* Set the default root device */ | 322 | /* Set the default root device */ |
319 | put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); | 323 | put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); |
320 | 324 | ||
321 | fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i); | 325 | printf("Setup is %d bytes (padded to %d bytes).\n", c, i); |
322 | 326 | ||
323 | /* Open and stat the kernel file */ | 327 | /* Open and stat the kernel file */ |
324 | fd = open(argv[2], O_RDONLY); | 328 | fd = open(argv[2], O_RDONLY); |
@@ -327,7 +331,7 @@ int main(int argc, char ** argv) | |||
327 | if (fstat(fd, &sb)) | 331 | if (fstat(fd, &sb)) |
328 | die("Unable to stat `%s': %m", argv[2]); | 332 | die("Unable to stat `%s': %m", argv[2]); |
329 | sz = sb.st_size; | 333 | sz = sb.st_size; |
330 | fprintf (stderr, "System is %d kB\n", (sz+1023)/1024); | 334 | printf("System is %d kB\n", (sz+1023)/1024); |
331 | kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); | 335 | kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); |
332 | if (kernel == MAP_FAILED) | 336 | if (kernel == MAP_FAILED) |
333 | die("Unable to mmap '%s': %m", argv[2]); | 337 | die("Unable to mmap '%s': %m", argv[2]); |
@@ -348,27 +352,31 @@ int main(int argc, char ** argv) | |||
348 | #endif | 352 | #endif |
349 | 353 | ||
350 | crc = partial_crc32(buf, i, crc); | 354 | crc = partial_crc32(buf, i, crc); |
351 | if (fwrite(buf, 1, i, stdout) != i) | 355 | if (fwrite(buf, 1, i, dest) != i) |
352 | die("Writing setup failed"); | 356 | die("Writing setup failed"); |
353 | 357 | ||
354 | /* Copy the kernel code */ | 358 | /* Copy the kernel code */ |
355 | crc = partial_crc32(kernel, sz, crc); | 359 | crc = partial_crc32(kernel, sz, crc); |
356 | if (fwrite(kernel, 1, sz, stdout) != sz) | 360 | if (fwrite(kernel, 1, sz, dest) != sz) |
357 | die("Writing kernel failed"); | 361 | die("Writing kernel failed"); |
358 | 362 | ||
359 | /* Add padding leaving 4 bytes for the checksum */ | 363 | /* Add padding leaving 4 bytes for the checksum */ |
360 | while (sz++ < (sys_size*16) - 4) { | 364 | while (sz++ < (sys_size*16) - 4) { |
361 | crc = partial_crc32_one('\0', crc); | 365 | crc = partial_crc32_one('\0', crc); |
362 | if (fwrite("\0", 1, 1, stdout) != 1) | 366 | if (fwrite("\0", 1, 1, dest) != 1) |
363 | die("Writing padding failed"); | 367 | die("Writing padding failed"); |
364 | } | 368 | } |
365 | 369 | ||
366 | /* Write the CRC */ | 370 | /* Write the CRC */ |
367 | fprintf(stderr, "CRC %x\n", crc); | 371 | printf("CRC %x\n", crc); |
368 | put_unaligned_le32(crc, buf); | 372 | put_unaligned_le32(crc, buf); |
369 | if (fwrite(buf, 1, 4, stdout) != 4) | 373 | if (fwrite(buf, 1, 4, dest) != 4) |
370 | die("Writing CRC failed"); | 374 | die("Writing CRC failed"); |
371 | 375 | ||
376 | /* Catch any delayed write failures */ | ||
377 | if (fclose(dest)) | ||
378 | die("Writing image failed"); | ||
379 | |||
372 | close(fd); | 380 | close(fd); |
373 | 381 | ||
374 | /* Everything is OK */ | 382 | /* Everything is OK */ |
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 94447086e551..a7fef2621cc9 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig | |||
@@ -142,6 +142,8 @@ CONFIG_MAC80211=y | |||
142 | CONFIG_MAC80211_LEDS=y | 142 | CONFIG_MAC80211_LEDS=y |
143 | CONFIG_RFKILL=y | 143 | CONFIG_RFKILL=y |
144 | CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" | 144 | CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" |
145 | CONFIG_DEVTMPFS=y | ||
146 | CONFIG_DEVTMPFS_MOUNT=y | ||
145 | CONFIG_DEBUG_DEVRES=y | 147 | CONFIG_DEBUG_DEVRES=y |
146 | CONFIG_CONNECTOR=y | 148 | CONFIG_CONNECTOR=y |
147 | CONFIG_BLK_DEV_LOOP=y | 149 | CONFIG_BLK_DEV_LOOP=y |
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 671524d0f6c0..c1119d4c1281 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig | |||
@@ -141,6 +141,8 @@ CONFIG_MAC80211=y | |||
141 | CONFIG_MAC80211_LEDS=y | 141 | CONFIG_MAC80211_LEDS=y |
142 | CONFIG_RFKILL=y | 142 | CONFIG_RFKILL=y |
143 | CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" | 143 | CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" |
144 | CONFIG_DEVTMPFS=y | ||
145 | CONFIG_DEVTMPFS_MOUNT=y | ||
144 | CONFIG_DEBUG_DEVRES=y | 146 | CONFIG_DEBUG_DEVRES=y |
145 | CONFIG_CONNECTOR=y | 147 | CONFIG_CONNECTOR=y |
146 | CONFIG_BLK_DEV_LOOP=y | 148 | CONFIG_BLK_DEV_LOOP=y |
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 722aa3b04624..da31c8b8a92d 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <asm/processor.h> | 6 | #include <asm/processor.h> |
7 | #include <asm/alternative.h> | 7 | #include <asm/alternative.h> |
8 | #include <asm/cmpxchg.h> | 8 | #include <asm/cmpxchg.h> |
9 | #include <asm/rmwcc.h> | ||
9 | 10 | ||
10 | /* | 11 | /* |
11 | * Atomic operations that C can't guarantee us. Useful for | 12 | * Atomic operations that C can't guarantee us. Useful for |
@@ -76,12 +77,7 @@ static inline void atomic_sub(int i, atomic_t *v) | |||
76 | */ | 77 | */ |
77 | static inline int atomic_sub_and_test(int i, atomic_t *v) | 78 | static inline int atomic_sub_and_test(int i, atomic_t *v) |
78 | { | 79 | { |
79 | unsigned char c; | 80 | GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, i, "%0", "e"); |
80 | |||
81 | asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" | ||
82 | : "+m" (v->counter), "=qm" (c) | ||
83 | : "ir" (i) : "memory"); | ||
84 | return c; | ||
85 | } | 81 | } |
86 | 82 | ||
87 | /** | 83 | /** |
@@ -118,12 +114,7 @@ static inline void atomic_dec(atomic_t *v) | |||
118 | */ | 114 | */ |
119 | static inline int atomic_dec_and_test(atomic_t *v) | 115 | static inline int atomic_dec_and_test(atomic_t *v) |
120 | { | 116 | { |
121 | unsigned char c; | 117 | GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); |
122 | |||
123 | asm volatile(LOCK_PREFIX "decl %0; sete %1" | ||
124 | : "+m" (v->counter), "=qm" (c) | ||
125 | : : "memory"); | ||
126 | return c != 0; | ||
127 | } | 118 | } |
128 | 119 | ||
129 | /** | 120 | /** |
@@ -136,12 +127,7 @@ static inline int atomic_dec_and_test(atomic_t *v) | |||
136 | */ | 127 | */ |
137 | static inline int atomic_inc_and_test(atomic_t *v) | 128 | static inline int atomic_inc_and_test(atomic_t *v) |
138 | { | 129 | { |
139 | unsigned char c; | 130 | GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e"); |
140 | |||
141 | asm volatile(LOCK_PREFIX "incl %0; sete %1" | ||
142 | : "+m" (v->counter), "=qm" (c) | ||
143 | : : "memory"); | ||
144 | return c != 0; | ||
145 | } | 131 | } |
146 | 132 | ||
147 | /** | 133 | /** |
@@ -155,12 +141,7 @@ static inline int atomic_inc_and_test(atomic_t *v) | |||
155 | */ | 141 | */ |
156 | static inline int atomic_add_negative(int i, atomic_t *v) | 142 | static inline int atomic_add_negative(int i, atomic_t *v) |
157 | { | 143 | { |
158 | unsigned char c; | 144 | GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, i, "%0", "s"); |
159 | |||
160 | asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" | ||
161 | : "+m" (v->counter), "=qm" (c) | ||
162 | : "ir" (i) : "memory"); | ||
163 | return c; | ||
164 | } | 145 | } |
165 | 146 | ||
166 | /** | 147 | /** |
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 0e1cbfc8ee06..3f065c985aee 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h | |||
@@ -72,12 +72,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) | |||
72 | */ | 72 | */ |
73 | static inline int atomic64_sub_and_test(long i, atomic64_t *v) | 73 | static inline int atomic64_sub_and_test(long i, atomic64_t *v) |
74 | { | 74 | { |
75 | unsigned char c; | 75 | GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, i, "%0", "e"); |
76 | |||
77 | asm volatile(LOCK_PREFIX "subq %2,%0; sete %1" | ||
78 | : "=m" (v->counter), "=qm" (c) | ||
79 | : "er" (i), "m" (v->counter) : "memory"); | ||
80 | return c; | ||
81 | } | 76 | } |
82 | 77 | ||
83 | /** | 78 | /** |
@@ -116,12 +111,7 @@ static inline void atomic64_dec(atomic64_t *v) | |||
116 | */ | 111 | */ |
117 | static inline int atomic64_dec_and_test(atomic64_t *v) | 112 | static inline int atomic64_dec_and_test(atomic64_t *v) |
118 | { | 113 | { |
119 | unsigned char c; | 114 | GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e"); |
120 | |||
121 | asm volatile(LOCK_PREFIX "decq %0; sete %1" | ||
122 | : "=m" (v->counter), "=qm" (c) | ||
123 | : "m" (v->counter) : "memory"); | ||
124 | return c != 0; | ||
125 | } | 115 | } |
126 | 116 | ||
127 | /** | 117 | /** |
@@ -134,12 +124,7 @@ static inline int atomic64_dec_and_test(atomic64_t *v) | |||
134 | */ | 124 | */ |
135 | static inline int atomic64_inc_and_test(atomic64_t *v) | 125 | static inline int atomic64_inc_and_test(atomic64_t *v) |
136 | { | 126 | { |
137 | unsigned char c; | 127 | GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e"); |
138 | |||
139 | asm volatile(LOCK_PREFIX "incq %0; sete %1" | ||
140 | : "=m" (v->counter), "=qm" (c) | ||
141 | : "m" (v->counter) : "memory"); | ||
142 | return c != 0; | ||
143 | } | 128 | } |
144 | 129 | ||
145 | /** | 130 | /** |
@@ -153,12 +138,7 @@ static inline int atomic64_inc_and_test(atomic64_t *v) | |||
153 | */ | 138 | */ |
154 | static inline int atomic64_add_negative(long i, atomic64_t *v) | 139 | static inline int atomic64_add_negative(long i, atomic64_t *v) |
155 | { | 140 | { |
156 | unsigned char c; | 141 | GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, i, "%0", "s"); |
157 | |||
158 | asm volatile(LOCK_PREFIX "addq %2,%0; sets %1" | ||
159 | : "=m" (v->counter), "=qm" (c) | ||
160 | : "er" (i), "m" (v->counter) : "memory"); | ||
161 | return c; | ||
162 | } | 142 | } |
163 | 143 | ||
164 | /** | 144 | /** |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 41639ce8fd63..6d76d0935989 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -14,6 +14,7 @@ | |||
14 | 14 | ||
15 | #include <linux/compiler.h> | 15 | #include <linux/compiler.h> |
16 | #include <asm/alternative.h> | 16 | #include <asm/alternative.h> |
17 | #include <asm/rmwcc.h> | ||
17 | 18 | ||
18 | #if BITS_PER_LONG == 32 | 19 | #if BITS_PER_LONG == 32 |
19 | # define _BITOPS_LONG_SHIFT 5 | 20 | # define _BITOPS_LONG_SHIFT 5 |
@@ -204,12 +205,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr) | |||
204 | */ | 205 | */ |
205 | static inline int test_and_set_bit(long nr, volatile unsigned long *addr) | 206 | static inline int test_and_set_bit(long nr, volatile unsigned long *addr) |
206 | { | 207 | { |
207 | int oldbit; | 208 | GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, nr, "%0", "c"); |
208 | |||
209 | asm volatile(LOCK_PREFIX "bts %2,%1\n\t" | ||
210 | "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); | ||
211 | |||
212 | return oldbit; | ||
213 | } | 209 | } |
214 | 210 | ||
215 | /** | 211 | /** |
@@ -255,13 +251,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr) | |||
255 | */ | 251 | */ |
256 | static inline int test_and_clear_bit(long nr, volatile unsigned long *addr) | 252 | static inline int test_and_clear_bit(long nr, volatile unsigned long *addr) |
257 | { | 253 | { |
258 | int oldbit; | 254 | GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, nr, "%0", "c"); |
259 | |||
260 | asm volatile(LOCK_PREFIX "btr %2,%1\n\t" | ||
261 | "sbb %0,%0" | ||
262 | : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); | ||
263 | |||
264 | return oldbit; | ||
265 | } | 255 | } |
266 | 256 | ||
267 | /** | 257 | /** |
@@ -314,13 +304,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr) | |||
314 | */ | 304 | */ |
315 | static inline int test_and_change_bit(long nr, volatile unsigned long *addr) | 305 | static inline int test_and_change_bit(long nr, volatile unsigned long *addr) |
316 | { | 306 | { |
317 | int oldbit; | 307 | GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, nr, "%0", "c"); |
318 | |||
319 | asm volatile(LOCK_PREFIX "btc %2,%1\n\t" | ||
320 | "sbb %0,%0" | ||
321 | : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); | ||
322 | |||
323 | return oldbit; | ||
324 | } | 308 | } |
325 | 309 | ||
326 | static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr) | 310 | static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr) |
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 0fa675033912..cb4c73bfeb48 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h | |||
@@ -48,6 +48,8 @@ For 32-bit we have the following conventions - kernel is built with | |||
48 | 48 | ||
49 | #include <asm/dwarf2.h> | 49 | #include <asm/dwarf2.h> |
50 | 50 | ||
51 | #ifdef CONFIG_X86_64 | ||
52 | |||
51 | /* | 53 | /* |
52 | * 64-bit system call stack frame layout defines and helpers, | 54 | * 64-bit system call stack frame layout defines and helpers, |
53 | * for assembly code: | 55 | * for assembly code: |
@@ -192,3 +194,51 @@ For 32-bit we have the following conventions - kernel is built with | |||
192 | .macro icebp | 194 | .macro icebp |
193 | .byte 0xf1 | 195 | .byte 0xf1 |
194 | .endm | 196 | .endm |
197 | |||
198 | #else /* CONFIG_X86_64 */ | ||
199 | |||
200 | /* | ||
201 | * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These | ||
202 | * are different from the entry_32.S versions in not changing the segment | ||
203 | * registers. So only suitable for in kernel use, not when transitioning | ||
204 | * from or to user space. The resulting stack frame is not a standard | ||
205 | * pt_regs frame. The main use case is calling C code from assembler | ||
206 | * when all the registers need to be preserved. | ||
207 | */ | ||
208 | |||
209 | .macro SAVE_ALL | ||
210 | pushl_cfi %eax | ||
211 | CFI_REL_OFFSET eax, 0 | ||
212 | pushl_cfi %ebp | ||
213 | CFI_REL_OFFSET ebp, 0 | ||
214 | pushl_cfi %edi | ||
215 | CFI_REL_OFFSET edi, 0 | ||
216 | pushl_cfi %esi | ||
217 | CFI_REL_OFFSET esi, 0 | ||
218 | pushl_cfi %edx | ||
219 | CFI_REL_OFFSET edx, 0 | ||
220 | pushl_cfi %ecx | ||
221 | CFI_REL_OFFSET ecx, 0 | ||
222 | pushl_cfi %ebx | ||
223 | CFI_REL_OFFSET ebx, 0 | ||
224 | .endm | ||
225 | |||
226 | .macro RESTORE_ALL | ||
227 | popl_cfi %ebx | ||
228 | CFI_RESTORE ebx | ||
229 | popl_cfi %ecx | ||
230 | CFI_RESTORE ecx | ||
231 | popl_cfi %edx | ||
232 | CFI_RESTORE edx | ||
233 | popl_cfi %esi | ||
234 | CFI_RESTORE esi | ||
235 | popl_cfi %edi | ||
236 | CFI_RESTORE edi | ||
237 | popl_cfi %ebp | ||
238 | CFI_RESTORE ebp | ||
239 | popl_cfi %eax | ||
240 | CFI_RESTORE eax | ||
241 | .endm | ||
242 | |||
243 | #endif /* CONFIG_X86_64 */ | ||
244 | |||
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index d3f5c63078d8..89270b4318db 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -374,7 +374,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) | |||
374 | * Catch too early usage of this before alternatives | 374 | * Catch too early usage of this before alternatives |
375 | * have run. | 375 | * have run. |
376 | */ | 376 | */ |
377 | asm goto("1: jmp %l[t_warn]\n" | 377 | asm_volatile_goto("1: jmp %l[t_warn]\n" |
378 | "2:\n" | 378 | "2:\n" |
379 | ".section .altinstructions,\"a\"\n" | 379 | ".section .altinstructions,\"a\"\n" |
380 | " .long 1b - .\n" | 380 | " .long 1b - .\n" |
@@ -388,7 +388,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) | |||
388 | 388 | ||
389 | #endif | 389 | #endif |
390 | 390 | ||
391 | asm goto("1: jmp %l[t_no]\n" | 391 | asm_volatile_goto("1: jmp %l[t_no]\n" |
392 | "2:\n" | 392 | "2:\n" |
393 | ".section .altinstructions,\"a\"\n" | 393 | ".section .altinstructions,\"a\"\n" |
394 | " .long 1b - .\n" | 394 | " .long 1b - .\n" |
@@ -453,7 +453,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) | |||
453 | * have. Thus, we force the jump to the widest, 4-byte, signed relative | 453 | * have. Thus, we force the jump to the widest, 4-byte, signed relative |
454 | * offset even though the last would often fit in less bytes. | 454 | * offset even though the last would often fit in less bytes. |
455 | */ | 455 | */ |
456 | asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" | 456 | asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" |
457 | "2:\n" | 457 | "2:\n" |
458 | ".section .altinstructions,\"a\"\n" | 458 | ".section .altinstructions,\"a\"\n" |
459 | " .long 1b - .\n" /* src offset */ | 459 | " .long 1b - .\n" /* src offset */ |
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 64507f35800c..6a2cefb4395a 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h | |||
@@ -18,7 +18,7 @@ | |||
18 | 18 | ||
19 | static __always_inline bool arch_static_branch(struct static_key *key) | 19 | static __always_inline bool arch_static_branch(struct static_key *key) |
20 | { | 20 | { |
21 | asm goto("1:" | 21 | asm_volatile_goto("1:" |
22 | ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" | 22 | ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" |
23 | ".pushsection __jump_table, \"aw\" \n\t" | 23 | ".pushsection __jump_table, \"aw\" \n\t" |
24 | _ASM_ALIGN "\n\t" | 24 | _ASM_ALIGN "\n\t" |
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 2d89e3980cbd..5b23e605e707 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h | |||
@@ -52,12 +52,7 @@ static inline void local_sub(long i, local_t *l) | |||
52 | */ | 52 | */ |
53 | static inline int local_sub_and_test(long i, local_t *l) | 53 | static inline int local_sub_and_test(long i, local_t *l) |
54 | { | 54 | { |
55 | unsigned char c; | 55 | GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, i, "%0", "e"); |
56 | |||
57 | asm volatile(_ASM_SUB "%2,%0; sete %1" | ||
58 | : "+m" (l->a.counter), "=qm" (c) | ||
59 | : "ir" (i) : "memory"); | ||
60 | return c; | ||
61 | } | 56 | } |
62 | 57 | ||
63 | /** | 58 | /** |
@@ -70,12 +65,7 @@ static inline int local_sub_and_test(long i, local_t *l) | |||
70 | */ | 65 | */ |
71 | static inline int local_dec_and_test(local_t *l) | 66 | static inline int local_dec_and_test(local_t *l) |
72 | { | 67 | { |
73 | unsigned char c; | 68 | GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e"); |
74 | |||
75 | asm volatile(_ASM_DEC "%0; sete %1" | ||
76 | : "+m" (l->a.counter), "=qm" (c) | ||
77 | : : "memory"); | ||
78 | return c != 0; | ||
79 | } | 69 | } |
80 | 70 | ||
81 | /** | 71 | /** |
@@ -88,12 +78,7 @@ static inline int local_dec_and_test(local_t *l) | |||
88 | */ | 78 | */ |
89 | static inline int local_inc_and_test(local_t *l) | 79 | static inline int local_inc_and_test(local_t *l) |
90 | { | 80 | { |
91 | unsigned char c; | 81 | GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e"); |
92 | |||
93 | asm volatile(_ASM_INC "%0; sete %1" | ||
94 | : "+m" (l->a.counter), "=qm" (c) | ||
95 | : : "memory"); | ||
96 | return c != 0; | ||
97 | } | 82 | } |
98 | 83 | ||
99 | /** | 84 | /** |
@@ -107,12 +92,7 @@ static inline int local_inc_and_test(local_t *l) | |||
107 | */ | 92 | */ |
108 | static inline int local_add_negative(long i, local_t *l) | 93 | static inline int local_add_negative(long i, local_t *l) |
109 | { | 94 | { |
110 | unsigned char c; | 95 | GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, i, "%0", "s"); |
111 | |||
112 | asm volatile(_ASM_ADD "%2,%0; sets %1" | ||
113 | : "+m" (l->a.counter), "=qm" (c) | ||
114 | : "ir" (i) : "memory"); | ||
115 | return c; | ||
116 | } | 96 | } |
117 | 97 | ||
118 | /** | 98 | /** |
diff --git a/arch/x86/include/asm/misc.h b/arch/x86/include/asm/misc.h new file mode 100644 index 000000000000..475f5bbc7f53 --- /dev/null +++ b/arch/x86/include/asm/misc.h | |||
@@ -0,0 +1,6 @@ | |||
1 | #ifndef _ASM_X86_MISC_H | ||
2 | #define _ASM_X86_MISC_H | ||
3 | |||
4 | int num_digits(int val); | ||
5 | |||
6 | #endif /* _ASM_X86_MISC_H */ | ||
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h index e7e6751648ed..07537a44216e 100644 --- a/arch/x86/include/asm/mutex_64.h +++ b/arch/x86/include/asm/mutex_64.h | |||
@@ -20,7 +20,7 @@ | |||
20 | static inline void __mutex_fastpath_lock(atomic_t *v, | 20 | static inline void __mutex_fastpath_lock(atomic_t *v, |
21 | void (*fail_fn)(atomic_t *)) | 21 | void (*fail_fn)(atomic_t *)) |
22 | { | 22 | { |
23 | asm volatile goto(LOCK_PREFIX " decl %0\n" | 23 | asm_volatile_goto(LOCK_PREFIX " decl %0\n" |
24 | " jns %l[exit]\n" | 24 | " jns %l[exit]\n" |
25 | : : "m" (v->counter) | 25 | : : "m" (v->counter) |
26 | : "memory", "cc" | 26 | : "memory", "cc" |
@@ -75,7 +75,7 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count) | |||
75 | static inline void __mutex_fastpath_unlock(atomic_t *v, | 75 | static inline void __mutex_fastpath_unlock(atomic_t *v, |
76 | void (*fail_fn)(atomic_t *)) | 76 | void (*fail_fn)(atomic_t *)) |
77 | { | 77 | { |
78 | asm volatile goto(LOCK_PREFIX " incl %0\n" | 78 | asm_volatile_goto(LOCK_PREFIX " incl %0\n" |
79 | " jg %l[exit]\n" | 79 | " jg %l[exit]\n" |
80 | : : "m" (v->counter) | 80 | : : "m" (v->counter) |
81 | : "memory", "cc" | 81 | : "memory", "cc" |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0da5200ee79d..b3e18f800302 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -128,7 +128,8 @@ do { \ | |||
128 | do { \ | 128 | do { \ |
129 | typedef typeof(var) pao_T__; \ | 129 | typedef typeof(var) pao_T__; \ |
130 | const int pao_ID__ = (__builtin_constant_p(val) && \ | 130 | const int pao_ID__ = (__builtin_constant_p(val) && \ |
131 | ((val) == 1 || (val) == -1)) ? (val) : 0; \ | 131 | ((val) == 1 || (val) == -1)) ? \ |
132 | (int)(val) : 0; \ | ||
132 | if (0) { \ | 133 | if (0) { \ |
133 | pao_T__ pao_tmp__; \ | 134 | pao_T__ pao_tmp__; \ |
134 | pao_tmp__ = (val); \ | 135 | pao_tmp__ = (val); \ |
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h new file mode 100644 index 000000000000..8729723636fd --- /dev/null +++ b/arch/x86/include/asm/preempt.h | |||
@@ -0,0 +1,100 @@ | |||
1 | #ifndef __ASM_PREEMPT_H | ||
2 | #define __ASM_PREEMPT_H | ||
3 | |||
4 | #include <asm/rmwcc.h> | ||
5 | #include <asm/percpu.h> | ||
6 | #include <linux/thread_info.h> | ||
7 | |||
8 | DECLARE_PER_CPU(int, __preempt_count); | ||
9 | |||
10 | /* | ||
11 | * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users | ||
12 | * that think a non-zero value indicates we cannot preempt. | ||
13 | */ | ||
14 | static __always_inline int preempt_count(void) | ||
15 | { | ||
16 | return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; | ||
17 | } | ||
18 | |||
19 | static __always_inline void preempt_count_set(int pc) | ||
20 | { | ||
21 | __this_cpu_write_4(__preempt_count, pc); | ||
22 | } | ||
23 | |||
24 | /* | ||
25 | * must be macros to avoid header recursion hell | ||
26 | */ | ||
27 | #define task_preempt_count(p) \ | ||
28 | (task_thread_info(p)->saved_preempt_count & ~PREEMPT_NEED_RESCHED) | ||
29 | |||
30 | #define init_task_preempt_count(p) do { \ | ||
31 | task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \ | ||
32 | } while (0) | ||
33 | |||
34 | #define init_idle_preempt_count(p, cpu) do { \ | ||
35 | task_thread_info(p)->saved_preempt_count = PREEMPT_ENABLED; \ | ||
36 | per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \ | ||
37 | } while (0) | ||
38 | |||
39 | /* | ||
40 | * We fold the NEED_RESCHED bit into the preempt count such that | ||
41 | * preempt_enable() can decrement and test for needing to reschedule with a | ||
42 | * single instruction. | ||
43 | * | ||
44 | * We invert the actual bit, so that when the decrement hits 0 we know we both | ||
45 | * need to resched (the bit is cleared) and can resched (no preempt count). | ||
46 | */ | ||
47 | |||
48 | static __always_inline void set_preempt_need_resched(void) | ||
49 | { | ||
50 | __this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); | ||
51 | } | ||
52 | |||
53 | static __always_inline void clear_preempt_need_resched(void) | ||
54 | { | ||
55 | __this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); | ||
56 | } | ||
57 | |||
58 | static __always_inline bool test_preempt_need_resched(void) | ||
59 | { | ||
60 | return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * The various preempt_count add/sub methods | ||
65 | */ | ||
66 | |||
67 | static __always_inline void __preempt_count_add(int val) | ||
68 | { | ||
69 | __this_cpu_add_4(__preempt_count, val); | ||
70 | } | ||
71 | |||
72 | static __always_inline void __preempt_count_sub(int val) | ||
73 | { | ||
74 | __this_cpu_add_4(__preempt_count, -val); | ||
75 | } | ||
76 | |||
77 | static __always_inline bool __preempt_count_dec_and_test(void) | ||
78 | { | ||
79 | GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * Returns true when we need to resched and can (barring IRQ state). | ||
84 | */ | ||
85 | static __always_inline bool should_resched(void) | ||
86 | { | ||
87 | return unlikely(!__this_cpu_read_4(__preempt_count)); | ||
88 | } | ||
89 | |||
90 | #ifdef CONFIG_PREEMPT | ||
91 | extern asmlinkage void ___preempt_schedule(void); | ||
92 | # define __preempt_schedule() asm ("call ___preempt_schedule") | ||
93 | extern asmlinkage void preempt_schedule(void); | ||
94 | # ifdef CONFIG_CONTEXT_TRACKING | ||
95 | extern asmlinkage void ___preempt_schedule_context(void); | ||
96 | # define __preempt_schedule_context() asm ("call ___preempt_schedule_context") | ||
97 | # endif | ||
98 | #endif | ||
99 | |||
100 | #endif /* __ASM_PREEMPT_H */ | ||
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h new file mode 100644 index 000000000000..1ff990f1de8e --- /dev/null +++ b/arch/x86/include/asm/rmwcc.h | |||
@@ -0,0 +1,41 @@ | |||
1 | #ifndef _ASM_X86_RMWcc | ||
2 | #define _ASM_X86_RMWcc | ||
3 | |||
4 | #ifdef CC_HAVE_ASM_GOTO | ||
5 | |||
6 | #define __GEN_RMWcc(fullop, var, cc, ...) \ | ||
7 | do { \ | ||
8 | asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ | ||
9 | : : "m" (var), ## __VA_ARGS__ \ | ||
10 | : "memory" : cc_label); \ | ||
11 | return 0; \ | ||
12 | cc_label: \ | ||
13 | return 1; \ | ||
14 | } while (0) | ||
15 | |||
16 | #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ | ||
17 | __GEN_RMWcc(op " " arg0, var, cc) | ||
18 | |||
19 | #define GEN_BINARY_RMWcc(op, var, val, arg0, cc) \ | ||
20 | __GEN_RMWcc(op " %1, " arg0, var, cc, "er" (val)) | ||
21 | |||
22 | #else /* !CC_HAVE_ASM_GOTO */ | ||
23 | |||
24 | #define __GEN_RMWcc(fullop, var, cc, ...) \ | ||
25 | do { \ | ||
26 | char c; \ | ||
27 | asm volatile (fullop "; set" cc " %1" \ | ||
28 | : "+m" (var), "=qm" (c) \ | ||
29 | : __VA_ARGS__ : "memory"); \ | ||
30 | return c != 0; \ | ||
31 | } while (0) | ||
32 | |||
33 | #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ | ||
34 | __GEN_RMWcc(op " " arg0, var, cc) | ||
35 | |||
36 | #define GEN_BINARY_RMWcc(op, var, val, arg0, cc) \ | ||
37 | __GEN_RMWcc(op " %2, " arg0, var, cc, "er" (val)) | ||
38 | |||
39 | #endif /* CC_HAVE_ASM_GOTO */ | ||
40 | |||
41 | #endif /* _ASM_X86_RMWcc */ | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 27811190cbd7..c46a46be1ec6 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -28,8 +28,7 @@ struct thread_info { | |||
28 | __u32 flags; /* low level flags */ | 28 | __u32 flags; /* low level flags */ |
29 | __u32 status; /* thread synchronous flags */ | 29 | __u32 status; /* thread synchronous flags */ |
30 | __u32 cpu; /* current CPU */ | 30 | __u32 cpu; /* current CPU */ |
31 | int preempt_count; /* 0 => preemptable, | 31 | int saved_preempt_count; |
32 | <0 => BUG */ | ||
33 | mm_segment_t addr_limit; | 32 | mm_segment_t addr_limit; |
34 | struct restart_block restart_block; | 33 | struct restart_block restart_block; |
35 | void __user *sysenter_return; | 34 | void __user *sysenter_return; |
@@ -49,7 +48,7 @@ struct thread_info { | |||
49 | .exec_domain = &default_exec_domain, \ | 48 | .exec_domain = &default_exec_domain, \ |
50 | .flags = 0, \ | 49 | .flags = 0, \ |
51 | .cpu = 0, \ | 50 | .cpu = 0, \ |
52 | .preempt_count = INIT_PREEMPT_COUNT, \ | 51 | .saved_preempt_count = INIT_PREEMPT_COUNT, \ |
53 | .addr_limit = KERNEL_DS, \ | 52 | .addr_limit = KERNEL_DS, \ |
54 | .restart_block = { \ | 53 | .restart_block = { \ |
55 | .fn = do_no_restart_syscall, \ | 54 | .fn = do_no_restart_syscall, \ |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 5838fa911aa0..8ec57c07b125 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -542,5 +542,103 @@ extern struct movsl_mask { | |||
542 | # include <asm/uaccess_64.h> | 542 | # include <asm/uaccess_64.h> |
543 | #endif | 543 | #endif |
544 | 544 | ||
545 | unsigned long __must_check _copy_from_user(void *to, const void __user *from, | ||
546 | unsigned n); | ||
547 | unsigned long __must_check _copy_to_user(void __user *to, const void *from, | ||
548 | unsigned n); | ||
549 | |||
550 | #ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS | ||
551 | # define copy_user_diag __compiletime_error | ||
552 | #else | ||
553 | # define copy_user_diag __compiletime_warning | ||
554 | #endif | ||
555 | |||
556 | extern void copy_user_diag("copy_from_user() buffer size is too small") | ||
557 | copy_from_user_overflow(void); | ||
558 | extern void copy_user_diag("copy_to_user() buffer size is too small") | ||
559 | copy_to_user_overflow(void) __asm__("copy_from_user_overflow"); | ||
560 | |||
561 | #undef copy_user_diag | ||
562 | |||
563 | #ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS | ||
564 | |||
565 | extern void | ||
566 | __compiletime_warning("copy_from_user() buffer size is not provably correct") | ||
567 | __copy_from_user_overflow(void) __asm__("copy_from_user_overflow"); | ||
568 | #define __copy_from_user_overflow(size, count) __copy_from_user_overflow() | ||
569 | |||
570 | extern void | ||
571 | __compiletime_warning("copy_to_user() buffer size is not provably correct") | ||
572 | __copy_to_user_overflow(void) __asm__("copy_from_user_overflow"); | ||
573 | #define __copy_to_user_overflow(size, count) __copy_to_user_overflow() | ||
574 | |||
575 | #else | ||
576 | |||
577 | static inline void | ||
578 | __copy_from_user_overflow(int size, unsigned long count) | ||
579 | { | ||
580 | WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); | ||
581 | } | ||
582 | |||
583 | #define __copy_to_user_overflow __copy_from_user_overflow | ||
584 | |||
585 | #endif | ||
586 | |||
587 | static inline unsigned long __must_check | ||
588 | copy_from_user(void *to, const void __user *from, unsigned long n) | ||
589 | { | ||
590 | int sz = __compiletime_object_size(to); | ||
591 | |||
592 | might_fault(); | ||
593 | |||
594 | /* | ||
595 | * While we would like to have the compiler do the checking for us | ||
596 | * even in the non-constant size case, any false positives there are | ||
597 | * a problem (especially when DEBUG_STRICT_USER_COPY_CHECKS, but even | ||
598 | * without - the [hopefully] dangerous looking nature of the warning | ||
599 | * would make people go look at the respecitive call sites over and | ||
600 | * over again just to find that there's no problem). | ||
601 | * | ||
602 | * And there are cases where it's just not realistic for the compiler | ||
603 | * to prove the count to be in range. For example when multiple call | ||
604 | * sites of a helper function - perhaps in different source files - | ||
605 | * all doing proper range checking, yet the helper function not doing | ||
606 | * so again. | ||
607 | * | ||
608 | * Therefore limit the compile time checking to the constant size | ||
609 | * case, and do only runtime checking for non-constant sizes. | ||
610 | */ | ||
611 | |||
612 | if (likely(sz < 0 || sz >= n)) | ||
613 | n = _copy_from_user(to, from, n); | ||
614 | else if(__builtin_constant_p(n)) | ||
615 | copy_from_user_overflow(); | ||
616 | else | ||
617 | __copy_from_user_overflow(sz, n); | ||
618 | |||
619 | return n; | ||
620 | } | ||
621 | |||
622 | static inline unsigned long __must_check | ||
623 | copy_to_user(void __user *to, const void *from, unsigned long n) | ||
624 | { | ||
625 | int sz = __compiletime_object_size(from); | ||
626 | |||
627 | might_fault(); | ||
628 | |||
629 | /* See the comment in copy_from_user() above. */ | ||
630 | if (likely(sz < 0 || sz >= n)) | ||
631 | n = _copy_to_user(to, from, n); | ||
632 | else if(__builtin_constant_p(n)) | ||
633 | copy_to_user_overflow(); | ||
634 | else | ||
635 | __copy_to_user_overflow(sz, n); | ||
636 | |||
637 | return n; | ||
638 | } | ||
639 | |||
640 | #undef __copy_from_user_overflow | ||
641 | #undef __copy_to_user_overflow | ||
642 | |||
545 | #endif /* _ASM_X86_UACCESS_H */ | 643 | #endif /* _ASM_X86_UACCESS_H */ |
546 | 644 | ||
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 7f760a9f1f61..3c03a5de64d3 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h | |||
@@ -184,33 +184,4 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from, | |||
184 | return __copy_from_user_ll_nocache_nozero(to, from, n); | 184 | return __copy_from_user_ll_nocache_nozero(to, from, n); |
185 | } | 185 | } |
186 | 186 | ||
187 | unsigned long __must_check copy_to_user(void __user *to, | ||
188 | const void *from, unsigned long n); | ||
189 | unsigned long __must_check _copy_from_user(void *to, | ||
190 | const void __user *from, | ||
191 | unsigned long n); | ||
192 | |||
193 | |||
194 | extern void copy_from_user_overflow(void) | ||
195 | #ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS | ||
196 | __compiletime_error("copy_from_user() buffer size is not provably correct") | ||
197 | #else | ||
198 | __compiletime_warning("copy_from_user() buffer size is not provably correct") | ||
199 | #endif | ||
200 | ; | ||
201 | |||
202 | static inline unsigned long __must_check copy_from_user(void *to, | ||
203 | const void __user *from, | ||
204 | unsigned long n) | ||
205 | { | ||
206 | int sz = __compiletime_object_size(to); | ||
207 | |||
208 | if (likely(sz == -1 || sz >= n)) | ||
209 | n = _copy_from_user(to, from, n); | ||
210 | else | ||
211 | copy_from_user_overflow(); | ||
212 | |||
213 | return n; | ||
214 | } | ||
215 | |||
216 | #endif /* _ASM_X86_UACCESS_32_H */ | 187 | #endif /* _ASM_X86_UACCESS_32_H */ |
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 4f7923dd0007..0acae710fa00 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -46,36 +46,8 @@ copy_user_generic(void *to, const void *from, unsigned len) | |||
46 | } | 46 | } |
47 | 47 | ||
48 | __must_check unsigned long | 48 | __must_check unsigned long |
49 | _copy_to_user(void __user *to, const void *from, unsigned len); | ||
50 | __must_check unsigned long | ||
51 | _copy_from_user(void *to, const void __user *from, unsigned len); | ||
52 | __must_check unsigned long | ||
53 | copy_in_user(void __user *to, const void __user *from, unsigned len); | 49 | copy_in_user(void __user *to, const void __user *from, unsigned len); |
54 | 50 | ||
55 | static inline unsigned long __must_check copy_from_user(void *to, | ||
56 | const void __user *from, | ||
57 | unsigned long n) | ||
58 | { | ||
59 | int sz = __compiletime_object_size(to); | ||
60 | |||
61 | might_fault(); | ||
62 | if (likely(sz == -1 || sz >= n)) | ||
63 | n = _copy_from_user(to, from, n); | ||
64 | #ifdef CONFIG_DEBUG_VM | ||
65 | else | ||
66 | WARN(1, "Buffer overflow detected!\n"); | ||
67 | #endif | ||
68 | return n; | ||
69 | } | ||
70 | |||
71 | static __always_inline __must_check | ||
72 | int copy_to_user(void __user *dst, const void *src, unsigned size) | ||
73 | { | ||
74 | might_fault(); | ||
75 | |||
76 | return _copy_to_user(dst, src, size); | ||
77 | } | ||
78 | |||
79 | static __always_inline __must_check | 51 | static __always_inline __must_check |
80 | int __copy_from_user(void *dst, const void __user *src, unsigned size) | 52 | int __copy_from_user(void *dst, const void __user *src, unsigned size) |
81 | { | 53 | { |
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 6e5197910fd8..3087ea9c5f2e 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h | |||
@@ -35,7 +35,10 @@ typedef u8 uprobe_opcode_t; | |||
35 | 35 | ||
36 | struct arch_uprobe { | 36 | struct arch_uprobe { |
37 | u16 fixups; | 37 | u16 fixups; |
38 | u8 insn[MAX_UINSN_BYTES]; | 38 | union { |
39 | u8 insn[MAX_UINSN_BYTES]; | ||
40 | u8 ixol[MAX_UINSN_BYTES]; | ||
41 | }; | ||
39 | #ifdef CONFIG_X86_64 | 42 | #ifdef CONFIG_X86_64 |
40 | unsigned long rip_rela_target_address; | 43 | unsigned long rip_rela_target_address; |
41 | #endif | 44 | #endif |
@@ -49,11 +52,4 @@ struct arch_uprobe_task { | |||
49 | unsigned int saved_tf; | 52 | unsigned int saved_tf; |
50 | }; | 53 | }; |
51 | 54 | ||
52 | extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); | ||
53 | extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); | ||
54 | extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); | ||
55 | extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); | ||
56 | extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); | ||
57 | extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); | ||
58 | extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); | ||
59 | #endif /* _ASM_UPROBES_H */ | 55 | #endif /* _ASM_UPROBES_H */ |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a5408b965c9d..9b0a34e2cd79 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -36,6 +36,8 @@ obj-y += tsc.o io_delay.o rtc.o | |||
36 | obj-y += pci-iommu_table.o | 36 | obj-y += pci-iommu_table.o |
37 | obj-y += resource.o | 37 | obj-y += resource.o |
38 | 38 | ||
39 | obj-$(CONFIG_PREEMPT) += preempt.o | ||
40 | |||
39 | obj-y += process.o | 41 | obj-y += process.o |
40 | obj-y += i387.o xsave.o | 42 | obj-y += i387.o xsave.o |
41 | obj-y += ptrace.o | 43 | obj-y += ptrace.o |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 1191ac1c9d25..a419814cea57 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -113,7 +113,7 @@ static int __init early_get_pnodeid(void) | |||
113 | break; | 113 | break; |
114 | case UV3_HUB_PART_NUMBER: | 114 | case UV3_HUB_PART_NUMBER: |
115 | case UV3_HUB_PART_NUMBER_X: | 115 | case UV3_HUB_PART_NUMBER_X: |
116 | uv_min_hub_revision_id += UV3_HUB_REVISION_BASE - 1; | 116 | uv_min_hub_revision_id += UV3_HUB_REVISION_BASE; |
117 | break; | 117 | break; |
118 | } | 118 | } |
119 | 119 | ||
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 28610822fb3c..9f6b9341950f 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c | |||
@@ -32,7 +32,6 @@ void common(void) { | |||
32 | OFFSET(TI_flags, thread_info, flags); | 32 | OFFSET(TI_flags, thread_info, flags); |
33 | OFFSET(TI_status, thread_info, status); | 33 | OFFSET(TI_status, thread_info, status); |
34 | OFFSET(TI_addr_limit, thread_info, addr_limit); | 34 | OFFSET(TI_addr_limit, thread_info, addr_limit); |
35 | OFFSET(TI_preempt_count, thread_info, preempt_count); | ||
36 | 35 | ||
37 | BLANK(); | 36 | BLANK(); |
38 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); | 37 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2793d1f095a2..5223fe6dec7b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1095,6 +1095,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = | |||
1095 | 1095 | ||
1096 | DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; | 1096 | DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; |
1097 | 1097 | ||
1098 | DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; | ||
1099 | EXPORT_PER_CPU_SYMBOL(__preempt_count); | ||
1100 | |||
1098 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); | 1101 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); |
1099 | 1102 | ||
1100 | /* | 1103 | /* |
@@ -1169,6 +1172,8 @@ void debug_stack_reset(void) | |||
1169 | 1172 | ||
1170 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | 1173 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; |
1171 | EXPORT_PER_CPU_SYMBOL(current_task); | 1174 | EXPORT_PER_CPU_SYMBOL(current_task); |
1175 | DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; | ||
1176 | EXPORT_PER_CPU_SYMBOL(__preempt_count); | ||
1172 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); | 1177 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); |
1173 | 1178 | ||
1174 | #ifdef CONFIG_CC_STACKPROTECTOR | 1179 | #ifdef CONFIG_CC_STACKPROTECTOR |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 897783b3302a..8e132931614d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -1276,16 +1276,16 @@ void perf_events_lapic_init(void) | |||
1276 | static int __kprobes | 1276 | static int __kprobes |
1277 | perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) | 1277 | perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) |
1278 | { | 1278 | { |
1279 | int ret; | ||
1280 | u64 start_clock; | 1279 | u64 start_clock; |
1281 | u64 finish_clock; | 1280 | u64 finish_clock; |
1281 | int ret; | ||
1282 | 1282 | ||
1283 | if (!atomic_read(&active_events)) | 1283 | if (!atomic_read(&active_events)) |
1284 | return NMI_DONE; | 1284 | return NMI_DONE; |
1285 | 1285 | ||
1286 | start_clock = local_clock(); | 1286 | start_clock = sched_clock(); |
1287 | ret = x86_pmu.handle_irq(regs); | 1287 | ret = x86_pmu.handle_irq(regs); |
1288 | finish_clock = local_clock(); | 1288 | finish_clock = sched_clock(); |
1289 | 1289 | ||
1290 | perf_sample_event_took(finish_clock - start_clock); | 1290 | perf_sample_event_took(finish_clock - start_clock); |
1291 | 1291 | ||
@@ -1888,10 +1888,7 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) | |||
1888 | userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; | 1888 | userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; |
1889 | userpg->pmc_width = x86_pmu.cntval_bits; | 1889 | userpg->pmc_width = x86_pmu.cntval_bits; |
1890 | 1890 | ||
1891 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 1891 | if (!sched_clock_stable) |
1892 | return; | ||
1893 | |||
1894 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) | ||
1895 | return; | 1892 | return; |
1896 | 1893 | ||
1897 | userpg->cap_user_time = 1; | 1894 | userpg->cap_user_time = 1; |
@@ -1899,10 +1896,8 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) | |||
1899 | userpg->time_shift = CYC2NS_SCALE_FACTOR; | 1896 | userpg->time_shift = CYC2NS_SCALE_FACTOR; |
1900 | userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; | 1897 | userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; |
1901 | 1898 | ||
1902 | if (sched_clock_stable && !check_tsc_disabled()) { | 1899 | userpg->cap_user_time_zero = 1; |
1903 | userpg->cap_user_time_zero = 1; | 1900 | userpg->time_zero = this_cpu_read(cyc2ns_offset); |
1904 | userpg->time_zero = this_cpu_read(cyc2ns_offset); | ||
1905 | } | ||
1906 | } | 1901 | } |
1907 | 1902 | ||
1908 | /* | 1903 | /* |
@@ -1994,7 +1989,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1994 | frame.return_address = 0; | 1989 | frame.return_address = 0; |
1995 | 1990 | ||
1996 | bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); | 1991 | bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); |
1997 | if (bytes != sizeof(frame)) | 1992 | if (bytes != 0) |
1998 | break; | 1993 | break; |
1999 | 1994 | ||
2000 | if (!valid_user_frame(fp, sizeof(frame))) | 1995 | if (!valid_user_frame(fp, sizeof(frame))) |
@@ -2046,7 +2041,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
2046 | frame.return_address = 0; | 2041 | frame.return_address = 0; |
2047 | 2042 | ||
2048 | bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); | 2043 | bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); |
2049 | if (bytes != sizeof(frame)) | 2044 | if (bytes != 0) |
2050 | break; | 2045 | break; |
2051 | 2046 | ||
2052 | if (!valid_user_frame(fp, sizeof(frame))) | 2047 | if (!valid_user_frame(fp, sizeof(frame))) |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index cc16faae0538..fd00bb29425d 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -164,6 +164,11 @@ struct cpu_hw_events { | |||
164 | struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX]; | 164 | struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX]; |
165 | 165 | ||
166 | /* | 166 | /* |
167 | * Intel checkpoint mask | ||
168 | */ | ||
169 | u64 intel_cp_status; | ||
170 | |||
171 | /* | ||
167 | * manage shared (per-core, per-cpu) registers | 172 | * manage shared (per-core, per-cpu) registers |
168 | * used on Intel NHM/WSM/SNB | 173 | * used on Intel NHM/WSM/SNB |
169 | */ | 174 | */ |
@@ -440,6 +445,7 @@ struct x86_pmu { | |||
440 | int lbr_nr; /* hardware stack size */ | 445 | int lbr_nr; /* hardware stack size */ |
441 | u64 lbr_sel_mask; /* LBR_SELECT valid bits */ | 446 | u64 lbr_sel_mask; /* LBR_SELECT valid bits */ |
442 | const int *lbr_sel_map; /* lbr_select mappings */ | 447 | const int *lbr_sel_map; /* lbr_select mappings */ |
448 | bool lbr_double_abort; /* duplicated lbr aborts */ | ||
443 | 449 | ||
444 | /* | 450 | /* |
445 | * Extra registers for events | 451 | * Extra registers for events |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index f31a1655d1ff..0fa4f242f050 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -190,9 +190,9 @@ static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { | |||
190 | EVENT_EXTRA_END | 190 | EVENT_EXTRA_END |
191 | }; | 191 | }; |
192 | 192 | ||
193 | EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); | 193 | EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); |
194 | EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); | 194 | EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); |
195 | EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); | 195 | EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); |
196 | 196 | ||
197 | struct attribute *nhm_events_attrs[] = { | 197 | struct attribute *nhm_events_attrs[] = { |
198 | EVENT_PTR(mem_ld_nhm), | 198 | EVENT_PTR(mem_ld_nhm), |
@@ -1184,6 +1184,11 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) | |||
1184 | wrmsrl(hwc->config_base, ctrl_val); | 1184 | wrmsrl(hwc->config_base, ctrl_val); |
1185 | } | 1185 | } |
1186 | 1186 | ||
1187 | static inline bool event_is_checkpointed(struct perf_event *event) | ||
1188 | { | ||
1189 | return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0; | ||
1190 | } | ||
1191 | |||
1187 | static void intel_pmu_disable_event(struct perf_event *event) | 1192 | static void intel_pmu_disable_event(struct perf_event *event) |
1188 | { | 1193 | { |
1189 | struct hw_perf_event *hwc = &event->hw; | 1194 | struct hw_perf_event *hwc = &event->hw; |
@@ -1197,6 +1202,7 @@ static void intel_pmu_disable_event(struct perf_event *event) | |||
1197 | 1202 | ||
1198 | cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); | 1203 | cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); |
1199 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); | 1204 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); |
1205 | cpuc->intel_cp_status &= ~(1ull << hwc->idx); | ||
1200 | 1206 | ||
1201 | /* | 1207 | /* |
1202 | * must disable before any actual event | 1208 | * must disable before any actual event |
@@ -1271,6 +1277,9 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
1271 | if (event->attr.exclude_guest) | 1277 | if (event->attr.exclude_guest) |
1272 | cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx); | 1278 | cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx); |
1273 | 1279 | ||
1280 | if (unlikely(event_is_checkpointed(event))) | ||
1281 | cpuc->intel_cp_status |= (1ull << hwc->idx); | ||
1282 | |||
1274 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 1283 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
1275 | intel_pmu_enable_fixed(hwc); | 1284 | intel_pmu_enable_fixed(hwc); |
1276 | return; | 1285 | return; |
@@ -1289,6 +1298,17 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
1289 | int intel_pmu_save_and_restart(struct perf_event *event) | 1298 | int intel_pmu_save_and_restart(struct perf_event *event) |
1290 | { | 1299 | { |
1291 | x86_perf_event_update(event); | 1300 | x86_perf_event_update(event); |
1301 | /* | ||
1302 | * For a checkpointed counter always reset back to 0. This | ||
1303 | * avoids a situation where the counter overflows, aborts the | ||
1304 | * transaction and is then set back to shortly before the | ||
1305 | * overflow, and overflows and aborts again. | ||
1306 | */ | ||
1307 | if (unlikely(event_is_checkpointed(event))) { | ||
1308 | /* No race with NMIs because the counter should not be armed */ | ||
1309 | wrmsrl(event->hw.event_base, 0); | ||
1310 | local64_set(&event->hw.prev_count, 0); | ||
1311 | } | ||
1292 | return x86_perf_event_set_period(event); | 1312 | return x86_perf_event_set_period(event); |
1293 | } | 1313 | } |
1294 | 1314 | ||
@@ -1372,6 +1392,13 @@ again: | |||
1372 | x86_pmu.drain_pebs(regs); | 1392 | x86_pmu.drain_pebs(regs); |
1373 | } | 1393 | } |
1374 | 1394 | ||
1395 | /* | ||
1396 | * Checkpointed counters can lead to 'spurious' PMIs because the | ||
1397 | * rollback caused by the PMI will have cleared the overflow status | ||
1398 | * bit. Therefore always force probe these counters. | ||
1399 | */ | ||
1400 | status |= cpuc->intel_cp_status; | ||
1401 | |||
1375 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | 1402 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { |
1376 | struct perf_event *event = cpuc->events[bit]; | 1403 | struct perf_event *event = cpuc->events[bit]; |
1377 | 1404 | ||
@@ -1837,6 +1864,20 @@ static int hsw_hw_config(struct perf_event *event) | |||
1837 | event->attr.precise_ip > 0)) | 1864 | event->attr.precise_ip > 0)) |
1838 | return -EOPNOTSUPP; | 1865 | return -EOPNOTSUPP; |
1839 | 1866 | ||
1867 | if (event_is_checkpointed(event)) { | ||
1868 | /* | ||
1869 | * Sampling of checkpointed events can cause situations where | ||
1870 | * the CPU constantly aborts because of a overflow, which is | ||
1871 | * then checkpointed back and ignored. Forbid checkpointing | ||
1872 | * for sampling. | ||
1873 | * | ||
1874 | * But still allow a long sampling period, so that perf stat | ||
1875 | * from KVM works. | ||
1876 | */ | ||
1877 | if (event->attr.sample_period > 0 && | ||
1878 | event->attr.sample_period < 0x7fffffff) | ||
1879 | return -EOPNOTSUPP; | ||
1880 | } | ||
1840 | return 0; | 1881 | return 0; |
1841 | } | 1882 | } |
1842 | 1883 | ||
@@ -2182,10 +2223,36 @@ static __init void intel_nehalem_quirk(void) | |||
2182 | } | 2223 | } |
2183 | } | 2224 | } |
2184 | 2225 | ||
2185 | EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); | 2226 | EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); |
2186 | EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") | 2227 | EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") |
2228 | |||
2229 | /* Haswell special events */ | ||
2230 | EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1"); | ||
2231 | EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2"); | ||
2232 | EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4"); | ||
2233 | EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2"); | ||
2234 | EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1"); | ||
2235 | EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1"); | ||
2236 | EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2"); | ||
2237 | EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4"); | ||
2238 | EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2"); | ||
2239 | EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1"); | ||
2240 | EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1"); | ||
2241 | EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1"); | ||
2187 | 2242 | ||
2188 | static struct attribute *hsw_events_attrs[] = { | 2243 | static struct attribute *hsw_events_attrs[] = { |
2244 | EVENT_PTR(tx_start), | ||
2245 | EVENT_PTR(tx_commit), | ||
2246 | EVENT_PTR(tx_abort), | ||
2247 | EVENT_PTR(tx_capacity), | ||
2248 | EVENT_PTR(tx_conflict), | ||
2249 | EVENT_PTR(el_start), | ||
2250 | EVENT_PTR(el_commit), | ||
2251 | EVENT_PTR(el_abort), | ||
2252 | EVENT_PTR(el_capacity), | ||
2253 | EVENT_PTR(el_conflict), | ||
2254 | EVENT_PTR(cycles_t), | ||
2255 | EVENT_PTR(cycles_ct), | ||
2189 | EVENT_PTR(mem_ld_hsw), | 2256 | EVENT_PTR(mem_ld_hsw), |
2190 | EVENT_PTR(mem_st_hsw), | 2257 | EVENT_PTR(mem_st_hsw), |
2191 | NULL | 2258 | NULL |
@@ -2452,6 +2519,7 @@ __init int intel_pmu_init(void) | |||
2452 | x86_pmu.hw_config = hsw_hw_config; | 2519 | x86_pmu.hw_config = hsw_hw_config; |
2453 | x86_pmu.get_event_constraints = hsw_get_event_constraints; | 2520 | x86_pmu.get_event_constraints = hsw_get_event_constraints; |
2454 | x86_pmu.cpu_events = hsw_events_attrs; | 2521 | x86_pmu.cpu_events = hsw_events_attrs; |
2522 | x86_pmu.lbr_double_abort = true; | ||
2455 | pr_cont("Haswell events, "); | 2523 | pr_cont("Haswell events, "); |
2456 | break; | 2524 | break; |
2457 | 2525 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index ab3ba1c1b7dd..ae96cfa5eddd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) | 13 | #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) |
14 | #define PEBS_BUFFER_SIZE PAGE_SIZE | 14 | #define PEBS_BUFFER_SIZE PAGE_SIZE |
15 | #define PEBS_FIXUP_SIZE PAGE_SIZE | ||
15 | 16 | ||
16 | /* | 17 | /* |
17 | * pebs_record_32 for p4 and core not supported | 18 | * pebs_record_32 for p4 and core not supported |
@@ -182,18 +183,32 @@ struct pebs_record_nhm { | |||
182 | * Same as pebs_record_nhm, with two additional fields. | 183 | * Same as pebs_record_nhm, with two additional fields. |
183 | */ | 184 | */ |
184 | struct pebs_record_hsw { | 185 | struct pebs_record_hsw { |
185 | struct pebs_record_nhm nhm; | 186 | u64 flags, ip; |
186 | /* | 187 | u64 ax, bx, cx, dx; |
187 | * Real IP of the event. In the Intel documentation this | 188 | u64 si, di, bp, sp; |
188 | * is called eventingrip. | 189 | u64 r8, r9, r10, r11; |
189 | */ | 190 | u64 r12, r13, r14, r15; |
190 | u64 real_ip; | 191 | u64 status, dla, dse, lat; |
191 | /* | 192 | u64 real_ip, tsx_tuning; |
192 | * TSX tuning information field: abort cycles and abort flags. | 193 | }; |
193 | */ | 194 | |
194 | u64 tsx_tuning; | 195 | union hsw_tsx_tuning { |
196 | struct { | ||
197 | u32 cycles_last_block : 32, | ||
198 | hle_abort : 1, | ||
199 | rtm_abort : 1, | ||
200 | instruction_abort : 1, | ||
201 | non_instruction_abort : 1, | ||
202 | retry : 1, | ||
203 | data_conflict : 1, | ||
204 | capacity_writes : 1, | ||
205 | capacity_reads : 1; | ||
206 | }; | ||
207 | u64 value; | ||
195 | }; | 208 | }; |
196 | 209 | ||
210 | #define PEBS_HSW_TSX_FLAGS 0xff00000000ULL | ||
211 | |||
197 | void init_debug_store_on_cpu(int cpu) | 212 | void init_debug_store_on_cpu(int cpu) |
198 | { | 213 | { |
199 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | 214 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; |
@@ -214,12 +229,14 @@ void fini_debug_store_on_cpu(int cpu) | |||
214 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | 229 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); |
215 | } | 230 | } |
216 | 231 | ||
232 | static DEFINE_PER_CPU(void *, insn_buffer); | ||
233 | |||
217 | static int alloc_pebs_buffer(int cpu) | 234 | static int alloc_pebs_buffer(int cpu) |
218 | { | 235 | { |
219 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | 236 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; |
220 | int node = cpu_to_node(cpu); | 237 | int node = cpu_to_node(cpu); |
221 | int max, thresh = 1; /* always use a single PEBS record */ | 238 | int max, thresh = 1; /* always use a single PEBS record */ |
222 | void *buffer; | 239 | void *buffer, *ibuffer; |
223 | 240 | ||
224 | if (!x86_pmu.pebs) | 241 | if (!x86_pmu.pebs) |
225 | return 0; | 242 | return 0; |
@@ -228,6 +245,19 @@ static int alloc_pebs_buffer(int cpu) | |||
228 | if (unlikely(!buffer)) | 245 | if (unlikely(!buffer)) |
229 | return -ENOMEM; | 246 | return -ENOMEM; |
230 | 247 | ||
248 | /* | ||
249 | * HSW+ already provides us the eventing ip; no need to allocate this | ||
250 | * buffer then. | ||
251 | */ | ||
252 | if (x86_pmu.intel_cap.pebs_format < 2) { | ||
253 | ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); | ||
254 | if (!ibuffer) { | ||
255 | kfree(buffer); | ||
256 | return -ENOMEM; | ||
257 | } | ||
258 | per_cpu(insn_buffer, cpu) = ibuffer; | ||
259 | } | ||
260 | |||
231 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; | 261 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; |
232 | 262 | ||
233 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; | 263 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; |
@@ -248,6 +278,9 @@ static void release_pebs_buffer(int cpu) | |||
248 | if (!ds || !x86_pmu.pebs) | 278 | if (!ds || !x86_pmu.pebs) |
249 | return; | 279 | return; |
250 | 280 | ||
281 | kfree(per_cpu(insn_buffer, cpu)); | ||
282 | per_cpu(insn_buffer, cpu) = NULL; | ||
283 | |||
251 | kfree((void *)(unsigned long)ds->pebs_buffer_base); | 284 | kfree((void *)(unsigned long)ds->pebs_buffer_base); |
252 | ds->pebs_buffer_base = 0; | 285 | ds->pebs_buffer_base = 0; |
253 | } | 286 | } |
@@ -715,6 +748,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
715 | unsigned long old_to, to = cpuc->lbr_entries[0].to; | 748 | unsigned long old_to, to = cpuc->lbr_entries[0].to; |
716 | unsigned long ip = regs->ip; | 749 | unsigned long ip = regs->ip; |
717 | int is_64bit = 0; | 750 | int is_64bit = 0; |
751 | void *kaddr; | ||
718 | 752 | ||
719 | /* | 753 | /* |
720 | * We don't need to fixup if the PEBS assist is fault like | 754 | * We don't need to fixup if the PEBS assist is fault like |
@@ -738,7 +772,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
738 | * unsigned math, either ip is before the start (impossible) or | 772 | * unsigned math, either ip is before the start (impossible) or |
739 | * the basic block is larger than 1 page (sanity) | 773 | * the basic block is larger than 1 page (sanity) |
740 | */ | 774 | */ |
741 | if ((ip - to) > PAGE_SIZE) | 775 | if ((ip - to) > PEBS_FIXUP_SIZE) |
742 | return 0; | 776 | return 0; |
743 | 777 | ||
744 | /* | 778 | /* |
@@ -749,29 +783,33 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
749 | return 1; | 783 | return 1; |
750 | } | 784 | } |
751 | 785 | ||
786 | if (!kernel_ip(ip)) { | ||
787 | int size, bytes; | ||
788 | u8 *buf = this_cpu_read(insn_buffer); | ||
789 | |||
790 | size = ip - to; /* Must fit our buffer, see above */ | ||
791 | bytes = copy_from_user_nmi(buf, (void __user *)to, size); | ||
792 | if (bytes != 0) | ||
793 | return 0; | ||
794 | |||
795 | kaddr = buf; | ||
796 | } else { | ||
797 | kaddr = (void *)to; | ||
798 | } | ||
799 | |||
752 | do { | 800 | do { |
753 | struct insn insn; | 801 | struct insn insn; |
754 | u8 buf[MAX_INSN_SIZE]; | ||
755 | void *kaddr; | ||
756 | 802 | ||
757 | old_to = to; | 803 | old_to = to; |
758 | if (!kernel_ip(ip)) { | ||
759 | int bytes, size = MAX_INSN_SIZE; | ||
760 | |||
761 | bytes = copy_from_user_nmi(buf, (void __user *)to, size); | ||
762 | if (bytes != size) | ||
763 | return 0; | ||
764 | |||
765 | kaddr = buf; | ||
766 | } else | ||
767 | kaddr = (void *)to; | ||
768 | 804 | ||
769 | #ifdef CONFIG_X86_64 | 805 | #ifdef CONFIG_X86_64 |
770 | is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); | 806 | is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); |
771 | #endif | 807 | #endif |
772 | insn_init(&insn, kaddr, is_64bit); | 808 | insn_init(&insn, kaddr, is_64bit); |
773 | insn_get_length(&insn); | 809 | insn_get_length(&insn); |
810 | |||
774 | to += insn.length; | 811 | to += insn.length; |
812 | kaddr += insn.length; | ||
775 | } while (to < ip); | 813 | } while (to < ip); |
776 | 814 | ||
777 | if (to == ip) { | 815 | if (to == ip) { |
@@ -786,16 +824,34 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
786 | return 0; | 824 | return 0; |
787 | } | 825 | } |
788 | 826 | ||
827 | static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs) | ||
828 | { | ||
829 | if (pebs->tsx_tuning) { | ||
830 | union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning }; | ||
831 | return tsx.cycles_last_block; | ||
832 | } | ||
833 | return 0; | ||
834 | } | ||
835 | |||
836 | static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs) | ||
837 | { | ||
838 | u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; | ||
839 | |||
840 | /* For RTM XABORTs also log the abort code from AX */ | ||
841 | if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1)) | ||
842 | txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; | ||
843 | return txn; | ||
844 | } | ||
845 | |||
789 | static void __intel_pmu_pebs_event(struct perf_event *event, | 846 | static void __intel_pmu_pebs_event(struct perf_event *event, |
790 | struct pt_regs *iregs, void *__pebs) | 847 | struct pt_regs *iregs, void *__pebs) |
791 | { | 848 | { |
792 | /* | 849 | /* |
793 | * We cast to pebs_record_nhm to get the load latency data | 850 | * We cast to the biggest pebs_record but are careful not to |
794 | * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used | 851 | * unconditionally access the 'extra' entries. |
795 | */ | 852 | */ |
796 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 853 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
797 | struct pebs_record_nhm *pebs = __pebs; | 854 | struct pebs_record_hsw *pebs = __pebs; |
798 | struct pebs_record_hsw *pebs_hsw = __pebs; | ||
799 | struct perf_sample_data data; | 855 | struct perf_sample_data data; |
800 | struct pt_regs regs; | 856 | struct pt_regs regs; |
801 | u64 sample_type; | 857 | u64 sample_type; |
@@ -854,7 +910,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
854 | regs.sp = pebs->sp; | 910 | regs.sp = pebs->sp; |
855 | 911 | ||
856 | if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { | 912 | if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { |
857 | regs.ip = pebs_hsw->real_ip; | 913 | regs.ip = pebs->real_ip; |
858 | regs.flags |= PERF_EFLAGS_EXACT; | 914 | regs.flags |= PERF_EFLAGS_EXACT; |
859 | } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) | 915 | } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) |
860 | regs.flags |= PERF_EFLAGS_EXACT; | 916 | regs.flags |= PERF_EFLAGS_EXACT; |
@@ -862,9 +918,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
862 | regs.flags &= ~PERF_EFLAGS_EXACT; | 918 | regs.flags &= ~PERF_EFLAGS_EXACT; |
863 | 919 | ||
864 | if ((event->attr.sample_type & PERF_SAMPLE_ADDR) && | 920 | if ((event->attr.sample_type & PERF_SAMPLE_ADDR) && |
865 | x86_pmu.intel_cap.pebs_format >= 1) | 921 | x86_pmu.intel_cap.pebs_format >= 1) |
866 | data.addr = pebs->dla; | 922 | data.addr = pebs->dla; |
867 | 923 | ||
924 | if (x86_pmu.intel_cap.pebs_format >= 2) { | ||
925 | /* Only set the TSX weight when no memory weight. */ | ||
926 | if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll) | ||
927 | data.weight = intel_hsw_weight(pebs); | ||
928 | |||
929 | if (event->attr.sample_type & PERF_SAMPLE_TRANSACTION) | ||
930 | data.txn = intel_hsw_transaction(pebs); | ||
931 | } | ||
932 | |||
868 | if (has_branch_stack(event)) | 933 | if (has_branch_stack(event)) |
869 | data.br_stack = &cpuc->lbr_stack; | 934 | data.br_stack = &cpuc->lbr_stack; |
870 | 935 | ||
@@ -913,17 +978,34 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | |||
913 | __intel_pmu_pebs_event(event, iregs, at); | 978 | __intel_pmu_pebs_event(event, iregs, at); |
914 | } | 979 | } |
915 | 980 | ||
916 | static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, | 981 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) |
917 | void *top) | ||
918 | { | 982 | { |
919 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 983 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
920 | struct debug_store *ds = cpuc->ds; | 984 | struct debug_store *ds = cpuc->ds; |
921 | struct perf_event *event = NULL; | 985 | struct perf_event *event = NULL; |
986 | void *at, *top; | ||
922 | u64 status = 0; | 987 | u64 status = 0; |
923 | int bit; | 988 | int bit; |
924 | 989 | ||
990 | if (!x86_pmu.pebs_active) | ||
991 | return; | ||
992 | |||
993 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
994 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
995 | |||
925 | ds->pebs_index = ds->pebs_buffer_base; | 996 | ds->pebs_index = ds->pebs_buffer_base; |
926 | 997 | ||
998 | if (unlikely(at > top)) | ||
999 | return; | ||
1000 | |||
1001 | /* | ||
1002 | * Should not happen, we program the threshold at 1 and do not | ||
1003 | * set a reset value. | ||
1004 | */ | ||
1005 | WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size, | ||
1006 | "Unexpected number of pebs records %ld\n", | ||
1007 | (long)(top - at) / x86_pmu.pebs_record_size); | ||
1008 | |||
927 | for (; at < top; at += x86_pmu.pebs_record_size) { | 1009 | for (; at < top; at += x86_pmu.pebs_record_size) { |
928 | struct pebs_record_nhm *p = at; | 1010 | struct pebs_record_nhm *p = at; |
929 | 1011 | ||
@@ -951,61 +1033,6 @@ static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, | |||
951 | } | 1033 | } |
952 | } | 1034 | } |
953 | 1035 | ||
954 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | ||
955 | { | ||
956 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
957 | struct debug_store *ds = cpuc->ds; | ||
958 | struct pebs_record_nhm *at, *top; | ||
959 | int n; | ||
960 | |||
961 | if (!x86_pmu.pebs_active) | ||
962 | return; | ||
963 | |||
964 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
965 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
966 | |||
967 | ds->pebs_index = ds->pebs_buffer_base; | ||
968 | |||
969 | n = top - at; | ||
970 | if (n <= 0) | ||
971 | return; | ||
972 | |||
973 | /* | ||
974 | * Should not happen, we program the threshold at 1 and do not | ||
975 | * set a reset value. | ||
976 | */ | ||
977 | WARN_ONCE(n > x86_pmu.max_pebs_events, | ||
978 | "Unexpected number of pebs records %d\n", n); | ||
979 | |||
980 | return __intel_pmu_drain_pebs_nhm(iregs, at, top); | ||
981 | } | ||
982 | |||
983 | static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs) | ||
984 | { | ||
985 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
986 | struct debug_store *ds = cpuc->ds; | ||
987 | struct pebs_record_hsw *at, *top; | ||
988 | int n; | ||
989 | |||
990 | if (!x86_pmu.pebs_active) | ||
991 | return; | ||
992 | |||
993 | at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base; | ||
994 | top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index; | ||
995 | |||
996 | n = top - at; | ||
997 | if (n <= 0) | ||
998 | return; | ||
999 | /* | ||
1000 | * Should not happen, we program the threshold at 1 and do not | ||
1001 | * set a reset value. | ||
1002 | */ | ||
1003 | WARN_ONCE(n > x86_pmu.max_pebs_events, | ||
1004 | "Unexpected number of pebs records %d\n", n); | ||
1005 | |||
1006 | return __intel_pmu_drain_pebs_nhm(iregs, at, top); | ||
1007 | } | ||
1008 | |||
1009 | /* | 1036 | /* |
1010 | * BTS, PEBS probe and setup | 1037 | * BTS, PEBS probe and setup |
1011 | */ | 1038 | */ |
@@ -1040,7 +1067,7 @@ void intel_ds_init(void) | |||
1040 | case 2: | 1067 | case 2: |
1041 | pr_cont("PEBS fmt2%c, ", pebs_type); | 1068 | pr_cont("PEBS fmt2%c, ", pebs_type); |
1042 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); | 1069 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); |
1043 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw; | 1070 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; |
1044 | break; | 1071 | break; |
1045 | 1072 | ||
1046 | default: | 1073 | default: |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index d5be06a5005e..d82d155aca8c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -284,6 +284,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | |||
284 | int lbr_format = x86_pmu.intel_cap.lbr_format; | 284 | int lbr_format = x86_pmu.intel_cap.lbr_format; |
285 | u64 tos = intel_pmu_lbr_tos(); | 285 | u64 tos = intel_pmu_lbr_tos(); |
286 | int i; | 286 | int i; |
287 | int out = 0; | ||
287 | 288 | ||
288 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | 289 | for (i = 0; i < x86_pmu.lbr_nr; i++) { |
289 | unsigned long lbr_idx = (tos - i) & mask; | 290 | unsigned long lbr_idx = (tos - i) & mask; |
@@ -306,15 +307,27 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | |||
306 | } | 307 | } |
307 | from = (u64)((((s64)from) << skip) >> skip); | 308 | from = (u64)((((s64)from) << skip) >> skip); |
308 | 309 | ||
309 | cpuc->lbr_entries[i].from = from; | 310 | /* |
310 | cpuc->lbr_entries[i].to = to; | 311 | * Some CPUs report duplicated abort records, |
311 | cpuc->lbr_entries[i].mispred = mis; | 312 | * with the second entry not having an abort bit set. |
312 | cpuc->lbr_entries[i].predicted = pred; | 313 | * Skip them here. This loop runs backwards, |
313 | cpuc->lbr_entries[i].in_tx = in_tx; | 314 | * so we need to undo the previous record. |
314 | cpuc->lbr_entries[i].abort = abort; | 315 | * If the abort just happened outside the window |
315 | cpuc->lbr_entries[i].reserved = 0; | 316 | * the extra entry cannot be removed. |
317 | */ | ||
318 | if (abort && x86_pmu.lbr_double_abort && out > 0) | ||
319 | out--; | ||
320 | |||
321 | cpuc->lbr_entries[out].from = from; | ||
322 | cpuc->lbr_entries[out].to = to; | ||
323 | cpuc->lbr_entries[out].mispred = mis; | ||
324 | cpuc->lbr_entries[out].predicted = pred; | ||
325 | cpuc->lbr_entries[out].in_tx = in_tx; | ||
326 | cpuc->lbr_entries[out].abort = abort; | ||
327 | cpuc->lbr_entries[out].reserved = 0; | ||
328 | out++; | ||
316 | } | 329 | } |
317 | cpuc->lbr_stack.nr = i; | 330 | cpuc->lbr_stack.nr = out; |
318 | } | 331 | } |
319 | 332 | ||
320 | void intel_pmu_lbr_read(void) | 333 | void intel_pmu_lbr_read(void) |
@@ -478,7 +491,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort) | |||
478 | 491 | ||
479 | /* may fail if text not present */ | 492 | /* may fail if text not present */ |
480 | bytes = copy_from_user_nmi(buf, (void __user *)from, size); | 493 | bytes = copy_from_user_nmi(buf, (void __user *)from, size); |
481 | if (bytes != size) | 494 | if (bytes != 0) |
482 | return X86_BR_NONE; | 495 | return X86_BR_NONE; |
483 | 496 | ||
484 | addr = buf; | 497 | addr = buf; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 4118f9f68315..29c248799ced 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
@@ -997,6 +997,20 @@ static int snbep_pci2phy_map_init(int devid) | |||
997 | } | 997 | } |
998 | } | 998 | } |
999 | 999 | ||
1000 | if (!err) { | ||
1001 | /* | ||
1002 | * For PCI bus with no UBOX device, find the next bus | ||
1003 | * that has UBOX device and use its mapping. | ||
1004 | */ | ||
1005 | i = -1; | ||
1006 | for (bus = 255; bus >= 0; bus--) { | ||
1007 | if (pcibus_to_physid[bus] >= 0) | ||
1008 | i = pcibus_to_physid[bus]; | ||
1009 | else | ||
1010 | pcibus_to_physid[bus] = i; | ||
1011 | } | ||
1012 | } | ||
1013 | |||
1000 | if (ubox_dev) | 1014 | if (ubox_dev) |
1001 | pci_dev_put(ubox_dev); | 1015 | pci_dev_put(ubox_dev); |
1002 | 1016 | ||
@@ -1099,6 +1113,24 @@ static struct attribute *ivt_uncore_qpi_formats_attr[] = { | |||
1099 | &format_attr_umask.attr, | 1113 | &format_attr_umask.attr, |
1100 | &format_attr_edge.attr, | 1114 | &format_attr_edge.attr, |
1101 | &format_attr_thresh8.attr, | 1115 | &format_attr_thresh8.attr, |
1116 | &format_attr_match_rds.attr, | ||
1117 | &format_attr_match_rnid30.attr, | ||
1118 | &format_attr_match_rnid4.attr, | ||
1119 | &format_attr_match_dnid.attr, | ||
1120 | &format_attr_match_mc.attr, | ||
1121 | &format_attr_match_opc.attr, | ||
1122 | &format_attr_match_vnw.attr, | ||
1123 | &format_attr_match0.attr, | ||
1124 | &format_attr_match1.attr, | ||
1125 | &format_attr_mask_rds.attr, | ||
1126 | &format_attr_mask_rnid30.attr, | ||
1127 | &format_attr_mask_rnid4.attr, | ||
1128 | &format_attr_mask_dnid.attr, | ||
1129 | &format_attr_mask_mc.attr, | ||
1130 | &format_attr_mask_opc.attr, | ||
1131 | &format_attr_mask_vnw.attr, | ||
1132 | &format_attr_mask0.attr, | ||
1133 | &format_attr_mask1.attr, | ||
1102 | NULL, | 1134 | NULL, |
1103 | }; | 1135 | }; |
1104 | 1136 | ||
@@ -1312,17 +1344,83 @@ static struct intel_uncore_type ivt_uncore_imc = { | |||
1312 | IVT_UNCORE_PCI_COMMON_INIT(), | 1344 | IVT_UNCORE_PCI_COMMON_INIT(), |
1313 | }; | 1345 | }; |
1314 | 1346 | ||
1347 | /* registers in IRP boxes are not properly aligned */ | ||
1348 | static unsigned ivt_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4}; | ||
1349 | static unsigned ivt_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0}; | ||
1350 | |||
1351 | static void ivt_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
1352 | { | ||
1353 | struct pci_dev *pdev = box->pci_dev; | ||
1354 | struct hw_perf_event *hwc = &event->hw; | ||
1355 | |||
1356 | pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx], | ||
1357 | hwc->config | SNBEP_PMON_CTL_EN); | ||
1358 | } | ||
1359 | |||
1360 | static void ivt_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
1361 | { | ||
1362 | struct pci_dev *pdev = box->pci_dev; | ||
1363 | struct hw_perf_event *hwc = &event->hw; | ||
1364 | |||
1365 | pci_write_config_dword(pdev, ivt_uncore_irp_ctls[hwc->idx], hwc->config); | ||
1366 | } | ||
1367 | |||
1368 | static u64 ivt_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event) | ||
1369 | { | ||
1370 | struct pci_dev *pdev = box->pci_dev; | ||
1371 | struct hw_perf_event *hwc = &event->hw; | ||
1372 | u64 count = 0; | ||
1373 | |||
1374 | pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx], (u32 *)&count); | ||
1375 | pci_read_config_dword(pdev, ivt_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1); | ||
1376 | |||
1377 | return count; | ||
1378 | } | ||
1379 | |||
1380 | static struct intel_uncore_ops ivt_uncore_irp_ops = { | ||
1381 | .init_box = ivt_uncore_pci_init_box, | ||
1382 | .disable_box = snbep_uncore_pci_disable_box, | ||
1383 | .enable_box = snbep_uncore_pci_enable_box, | ||
1384 | .disable_event = ivt_uncore_irp_disable_event, | ||
1385 | .enable_event = ivt_uncore_irp_enable_event, | ||
1386 | .read_counter = ivt_uncore_irp_read_counter, | ||
1387 | }; | ||
1388 | |||
1389 | static struct intel_uncore_type ivt_uncore_irp = { | ||
1390 | .name = "irp", | ||
1391 | .num_counters = 4, | ||
1392 | .num_boxes = 1, | ||
1393 | .perf_ctr_bits = 48, | ||
1394 | .event_mask = IVT_PMON_RAW_EVENT_MASK, | ||
1395 | .box_ctl = SNBEP_PCI_PMON_BOX_CTL, | ||
1396 | .ops = &ivt_uncore_irp_ops, | ||
1397 | .format_group = &ivt_uncore_format_group, | ||
1398 | }; | ||
1399 | |||
1400 | static struct intel_uncore_ops ivt_uncore_qpi_ops = { | ||
1401 | .init_box = ivt_uncore_pci_init_box, | ||
1402 | .disable_box = snbep_uncore_pci_disable_box, | ||
1403 | .enable_box = snbep_uncore_pci_enable_box, | ||
1404 | .disable_event = snbep_uncore_pci_disable_event, | ||
1405 | .enable_event = snbep_qpi_enable_event, | ||
1406 | .read_counter = snbep_uncore_pci_read_counter, | ||
1407 | .hw_config = snbep_qpi_hw_config, | ||
1408 | .get_constraint = uncore_get_constraint, | ||
1409 | .put_constraint = uncore_put_constraint, | ||
1410 | }; | ||
1411 | |||
1315 | static struct intel_uncore_type ivt_uncore_qpi = { | 1412 | static struct intel_uncore_type ivt_uncore_qpi = { |
1316 | .name = "qpi", | 1413 | .name = "qpi", |
1317 | .num_counters = 4, | 1414 | .num_counters = 4, |
1318 | .num_boxes = 3, | 1415 | .num_boxes = 3, |
1319 | .perf_ctr_bits = 48, | 1416 | .perf_ctr_bits = 48, |
1320 | .perf_ctr = SNBEP_PCI_PMON_CTR0, | 1417 | .perf_ctr = SNBEP_PCI_PMON_CTR0, |
1321 | .event_ctl = SNBEP_PCI_PMON_CTL0, | 1418 | .event_ctl = SNBEP_PCI_PMON_CTL0, |
1322 | .event_mask = IVT_QPI_PCI_PMON_RAW_EVENT_MASK, | 1419 | .event_mask = IVT_QPI_PCI_PMON_RAW_EVENT_MASK, |
1323 | .box_ctl = SNBEP_PCI_PMON_BOX_CTL, | 1420 | .box_ctl = SNBEP_PCI_PMON_BOX_CTL, |
1324 | .ops = &ivt_uncore_pci_ops, | 1421 | .num_shared_regs = 1, |
1325 | .format_group = &ivt_uncore_qpi_format_group, | 1422 | .ops = &ivt_uncore_qpi_ops, |
1423 | .format_group = &ivt_uncore_qpi_format_group, | ||
1326 | }; | 1424 | }; |
1327 | 1425 | ||
1328 | static struct intel_uncore_type ivt_uncore_r2pcie = { | 1426 | static struct intel_uncore_type ivt_uncore_r2pcie = { |
@@ -1346,6 +1444,7 @@ static struct intel_uncore_type ivt_uncore_r3qpi = { | |||
1346 | enum { | 1444 | enum { |
1347 | IVT_PCI_UNCORE_HA, | 1445 | IVT_PCI_UNCORE_HA, |
1348 | IVT_PCI_UNCORE_IMC, | 1446 | IVT_PCI_UNCORE_IMC, |
1447 | IVT_PCI_UNCORE_IRP, | ||
1349 | IVT_PCI_UNCORE_QPI, | 1448 | IVT_PCI_UNCORE_QPI, |
1350 | IVT_PCI_UNCORE_R2PCIE, | 1449 | IVT_PCI_UNCORE_R2PCIE, |
1351 | IVT_PCI_UNCORE_R3QPI, | 1450 | IVT_PCI_UNCORE_R3QPI, |
@@ -1354,6 +1453,7 @@ enum { | |||
1354 | static struct intel_uncore_type *ivt_pci_uncores[] = { | 1453 | static struct intel_uncore_type *ivt_pci_uncores[] = { |
1355 | [IVT_PCI_UNCORE_HA] = &ivt_uncore_ha, | 1454 | [IVT_PCI_UNCORE_HA] = &ivt_uncore_ha, |
1356 | [IVT_PCI_UNCORE_IMC] = &ivt_uncore_imc, | 1455 | [IVT_PCI_UNCORE_IMC] = &ivt_uncore_imc, |
1456 | [IVT_PCI_UNCORE_IRP] = &ivt_uncore_irp, | ||
1357 | [IVT_PCI_UNCORE_QPI] = &ivt_uncore_qpi, | 1457 | [IVT_PCI_UNCORE_QPI] = &ivt_uncore_qpi, |
1358 | [IVT_PCI_UNCORE_R2PCIE] = &ivt_uncore_r2pcie, | 1458 | [IVT_PCI_UNCORE_R2PCIE] = &ivt_uncore_r2pcie, |
1359 | [IVT_PCI_UNCORE_R3QPI] = &ivt_uncore_r3qpi, | 1459 | [IVT_PCI_UNCORE_R3QPI] = &ivt_uncore_r3qpi, |
@@ -1401,6 +1501,10 @@ static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = { | |||
1401 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1), | 1501 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1), |
1402 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 7), | 1502 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 7), |
1403 | }, | 1503 | }, |
1504 | { /* IRP */ | ||
1505 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39), | ||
1506 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IRP, 0), | ||
1507 | }, | ||
1404 | { /* QPI0 Port 0 */ | 1508 | { /* QPI0 Port 0 */ |
1405 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32), | 1509 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32), |
1406 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 0), | 1510 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 0), |
@@ -1429,6 +1533,16 @@ static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = { | |||
1429 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e), | 1533 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e), |
1430 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 2), | 1534 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 2), |
1431 | }, | 1535 | }, |
1536 | { /* QPI Port 0 filter */ | ||
1537 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86), | ||
1538 | .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, | ||
1539 | SNBEP_PCI_QPI_PORT0_FILTER), | ||
1540 | }, | ||
1541 | { /* QPI Port 0 filter */ | ||
1542 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96), | ||
1543 | .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, | ||
1544 | SNBEP_PCI_QPI_PORT1_FILTER), | ||
1545 | }, | ||
1432 | { /* end: all zeroes */ } | 1546 | { /* end: all zeroes */ } |
1433 | }; | 1547 | }; |
1434 | 1548 | ||
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index e0e0841eef45..18677a90d6a3 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -127,12 +127,12 @@ void native_machine_crash_shutdown(struct pt_regs *regs) | |||
127 | cpu_emergency_vmxoff(); | 127 | cpu_emergency_vmxoff(); |
128 | cpu_emergency_svm_disable(); | 128 | cpu_emergency_svm_disable(); |
129 | 129 | ||
130 | lapic_shutdown(); | ||
131 | #ifdef CONFIG_X86_IO_APIC | 130 | #ifdef CONFIG_X86_IO_APIC |
132 | /* Prevent crash_kexec() from deadlocking on ioapic_lock. */ | 131 | /* Prevent crash_kexec() from deadlocking on ioapic_lock. */ |
133 | ioapic_zap_locks(); | 132 | ioapic_zap_locks(); |
134 | disable_IO_APIC(); | 133 | disable_IO_APIC(); |
135 | #endif | 134 | #endif |
135 | lapic_shutdown(); | ||
136 | #ifdef CONFIG_HPET_TIMER | 136 | #ifdef CONFIG_HPET_TIMER |
137 | hpet_disable(); | 137 | hpet_disable(); |
138 | #endif | 138 | #endif |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f0dcb0ceb6a2..fd1bc1b15e6d 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -362,12 +362,9 @@ END(ret_from_exception) | |||
362 | #ifdef CONFIG_PREEMPT | 362 | #ifdef CONFIG_PREEMPT |
363 | ENTRY(resume_kernel) | 363 | ENTRY(resume_kernel) |
364 | DISABLE_INTERRUPTS(CLBR_ANY) | 364 | DISABLE_INTERRUPTS(CLBR_ANY) |
365 | cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? | ||
366 | jnz restore_all | ||
367 | need_resched: | 365 | need_resched: |
368 | movl TI_flags(%ebp), %ecx # need_resched set ? | 366 | cmpl $0,PER_CPU_VAR(__preempt_count) |
369 | testb $_TIF_NEED_RESCHED, %cl | 367 | jnz restore_all |
370 | jz restore_all | ||
371 | testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? | 368 | testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? |
372 | jz restore_all | 369 | jz restore_all |
373 | call preempt_schedule_irq | 370 | call preempt_schedule_irq |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b077f4cc225a..603be7c70675 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1103,10 +1103,8 @@ retint_signal: | |||
1103 | /* Returning to kernel space. Check if we need preemption */ | 1103 | /* Returning to kernel space. Check if we need preemption */ |
1104 | /* rcx: threadinfo. interrupts off. */ | 1104 | /* rcx: threadinfo. interrupts off. */ |
1105 | ENTRY(retint_kernel) | 1105 | ENTRY(retint_kernel) |
1106 | cmpl $0,TI_preempt_count(%rcx) | 1106 | cmpl $0,PER_CPU_VAR(__preempt_count) |
1107 | jnz retint_restore_args | 1107 | jnz retint_restore_args |
1108 | bt $TIF_NEED_RESCHED,TI_flags(%rcx) | ||
1109 | jnc retint_restore_args | ||
1110 | bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ | 1108 | bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ |
1111 | jnc retint_restore_args | 1109 | jnc retint_restore_args |
1112 | call preempt_schedule_irq | 1110 | call preempt_schedule_irq |
@@ -1342,7 +1340,7 @@ bad_gs: | |||
1342 | .previous | 1340 | .previous |
1343 | 1341 | ||
1344 | /* Call softirq on interrupt stack. Interrupts are off. */ | 1342 | /* Call softirq on interrupt stack. Interrupts are off. */ |
1345 | ENTRY(call_softirq) | 1343 | ENTRY(do_softirq_own_stack) |
1346 | CFI_STARTPROC | 1344 | CFI_STARTPROC |
1347 | pushq_cfi %rbp | 1345 | pushq_cfi %rbp |
1348 | CFI_REL_OFFSET rbp,0 | 1346 | CFI_REL_OFFSET rbp,0 |
@@ -1359,7 +1357,7 @@ ENTRY(call_softirq) | |||
1359 | decl PER_CPU_VAR(irq_count) | 1357 | decl PER_CPU_VAR(irq_count) |
1360 | ret | 1358 | ret |
1361 | CFI_ENDPROC | 1359 | CFI_ENDPROC |
1362 | END(call_softirq) | 1360 | END(do_softirq_own_stack) |
1363 | 1361 | ||
1364 | #ifdef CONFIG_XEN | 1362 | #ifdef CONFIG_XEN |
1365 | zeroentry xen_hypervisor_callback xen_do_hypervisor_callback | 1363 | zeroentry xen_hypervisor_callback xen_do_hypervisor_callback |
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 0fa69127209a..05fd74f537d6 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c | |||
@@ -37,3 +37,10 @@ EXPORT_SYMBOL(strstr); | |||
37 | 37 | ||
38 | EXPORT_SYMBOL(csum_partial); | 38 | EXPORT_SYMBOL(csum_partial); |
39 | EXPORT_SYMBOL(empty_zero_page); | 39 | EXPORT_SYMBOL(empty_zero_page); |
40 | |||
41 | #ifdef CONFIG_PREEMPT | ||
42 | EXPORT_SYMBOL(___preempt_schedule); | ||
43 | #ifdef CONFIG_CONTEXT_TRACKING | ||
44 | EXPORT_SYMBOL(___preempt_schedule_context); | ||
45 | #endif | ||
46 | #endif | ||
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 9a5c460404dc..2e977b5d61dd 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c | |||
@@ -312,8 +312,7 @@ static void init_8259A(int auto_eoi) | |||
312 | */ | 312 | */ |
313 | outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ | 313 | outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ |
314 | 314 | ||
315 | /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64, | 315 | /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ |
316 | to 0x20-0x27 on i386 */ | ||
317 | outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); | 316 | outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); |
318 | 317 | ||
319 | /* 8259A-1 (the master) has a slave on IR2 */ | 318 | /* 8259A-1 (the master) has a slave on IR2 */ |
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 4186755f1d7c..d7fcbedc9c43 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -100,9 +100,6 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) | |||
100 | irqctx->tinfo.task = curctx->tinfo.task; | 100 | irqctx->tinfo.task = curctx->tinfo.task; |
101 | irqctx->tinfo.previous_esp = current_stack_pointer; | 101 | irqctx->tinfo.previous_esp = current_stack_pointer; |
102 | 102 | ||
103 | /* Copy the preempt_count so that the [soft]irq checks work. */ | ||
104 | irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count; | ||
105 | |||
106 | if (unlikely(overflow)) | 103 | if (unlikely(overflow)) |
107 | call_on_stack(print_stack_overflow, isp); | 104 | call_on_stack(print_stack_overflow, isp); |
108 | 105 | ||
@@ -131,7 +128,6 @@ void irq_ctx_init(int cpu) | |||
131 | THREAD_SIZE_ORDER)); | 128 | THREAD_SIZE_ORDER)); |
132 | memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); | 129 | memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); |
133 | irqctx->tinfo.cpu = cpu; | 130 | irqctx->tinfo.cpu = cpu; |
134 | irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; | ||
135 | irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); | 131 | irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); |
136 | 132 | ||
137 | per_cpu(hardirq_ctx, cpu) = irqctx; | 133 | per_cpu(hardirq_ctx, cpu) = irqctx; |
@@ -149,35 +145,21 @@ void irq_ctx_init(int cpu) | |||
149 | cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); | 145 | cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); |
150 | } | 146 | } |
151 | 147 | ||
152 | asmlinkage void do_softirq(void) | 148 | void do_softirq_own_stack(void) |
153 | { | 149 | { |
154 | unsigned long flags; | ||
155 | struct thread_info *curctx; | 150 | struct thread_info *curctx; |
156 | union irq_ctx *irqctx; | 151 | union irq_ctx *irqctx; |
157 | u32 *isp; | 152 | u32 *isp; |
158 | 153 | ||
159 | if (in_interrupt()) | 154 | curctx = current_thread_info(); |
160 | return; | 155 | irqctx = __this_cpu_read(softirq_ctx); |
161 | 156 | irqctx->tinfo.task = curctx->task; | |
162 | local_irq_save(flags); | 157 | irqctx->tinfo.previous_esp = current_stack_pointer; |
163 | |||
164 | if (local_softirq_pending()) { | ||
165 | curctx = current_thread_info(); | ||
166 | irqctx = __this_cpu_read(softirq_ctx); | ||
167 | irqctx->tinfo.task = curctx->task; | ||
168 | irqctx->tinfo.previous_esp = current_stack_pointer; | ||
169 | |||
170 | /* build the stack frame on the softirq stack */ | ||
171 | isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); | ||
172 | 158 | ||
173 | call_on_stack(__do_softirq, isp); | 159 | /* build the stack frame on the softirq stack */ |
174 | /* | 160 | isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); |
175 | * Shouldn't happen, we returned above if in_interrupt(): | ||
176 | */ | ||
177 | WARN_ON_ONCE(softirq_count()); | ||
178 | } | ||
179 | 161 | ||
180 | local_irq_restore(flags); | 162 | call_on_stack(__do_softirq, isp); |
181 | } | 163 | } |
182 | 164 | ||
183 | bool handle_irq(unsigned irq, struct pt_regs *regs) | 165 | bool handle_irq(unsigned irq, struct pt_regs *regs) |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index d04d3ecded62..4d1c746892eb 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -87,24 +87,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) | |||
87 | generic_handle_irq_desc(irq, desc); | 87 | generic_handle_irq_desc(irq, desc); |
88 | return true; | 88 | return true; |
89 | } | 89 | } |
90 | |||
91 | |||
92 | extern void call_softirq(void); | ||
93 | |||
94 | asmlinkage void do_softirq(void) | ||
95 | { | ||
96 | __u32 pending; | ||
97 | unsigned long flags; | ||
98 | |||
99 | if (in_interrupt()) | ||
100 | return; | ||
101 | |||
102 | local_irq_save(flags); | ||
103 | pending = local_softirq_pending(); | ||
104 | /* Switch to interrupt stack */ | ||
105 | if (pending) { | ||
106 | call_softirq(); | ||
107 | WARN_ON_ONCE(softirq_count()); | ||
108 | } | ||
109 | local_irq_restore(flags); | ||
110 | } | ||
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 697b93af02dd..b2046e4d0b59 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -609,7 +609,7 @@ static struct dentry *d_kvm_debug; | |||
609 | 609 | ||
610 | struct dentry *kvm_init_debugfs(void) | 610 | struct dentry *kvm_init_debugfs(void) |
611 | { | 611 | { |
612 | d_kvm_debug = debugfs_create_dir("kvm", NULL); | 612 | d_kvm_debug = debugfs_create_dir("kvm-guest", NULL); |
613 | if (!d_kvm_debug) | 613 | if (!d_kvm_debug) |
614 | printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n"); | 614 | printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n"); |
615 | 615 | ||
@@ -775,11 +775,22 @@ void __init kvm_spinlock_init(void) | |||
775 | if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) | 775 | if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) |
776 | return; | 776 | return; |
777 | 777 | ||
778 | printk(KERN_INFO "KVM setup paravirtual spinlock\n"); | 778 | pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); |
779 | pv_lock_ops.unlock_kick = kvm_unlock_kick; | ||
780 | } | ||
781 | |||
782 | static __init int kvm_spinlock_init_jump(void) | ||
783 | { | ||
784 | if (!kvm_para_available()) | ||
785 | return 0; | ||
786 | if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) | ||
787 | return 0; | ||
779 | 788 | ||
780 | static_key_slow_inc(¶virt_ticketlocks_enabled); | 789 | static_key_slow_inc(¶virt_ticketlocks_enabled); |
790 | printk(KERN_INFO "KVM setup paravirtual spinlock\n"); | ||
781 | 791 | ||
782 | pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); | 792 | return 0; |
783 | pv_lock_ops.unlock_kick = kvm_unlock_kick; | ||
784 | } | 793 | } |
794 | early_initcall(kvm_spinlock_init_jump); | ||
795 | |||
785 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | 796 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ba77ebc2c353..6fcb49ce50a1 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -113,10 +113,10 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 | |||
113 | u64 before, delta, whole_msecs; | 113 | u64 before, delta, whole_msecs; |
114 | int remainder_ns, decimal_msecs, thishandled; | 114 | int remainder_ns, decimal_msecs, thishandled; |
115 | 115 | ||
116 | before = local_clock(); | 116 | before = sched_clock(); |
117 | thishandled = a->handler(type, regs); | 117 | thishandled = a->handler(type, regs); |
118 | handled += thishandled; | 118 | handled += thishandled; |
119 | delta = local_clock() - before; | 119 | delta = sched_clock() - before; |
120 | trace_nmi_handler(a->handler, (int)delta, thishandled); | 120 | trace_nmi_handler(a->handler, (int)delta, thishandled); |
121 | 121 | ||
122 | if (delta < nmi_longest_ns) | 122 | if (delta < nmi_longest_ns) |
diff --git a/arch/x86/kernel/preempt.S b/arch/x86/kernel/preempt.S new file mode 100644 index 000000000000..ca7f0d58a87d --- /dev/null +++ b/arch/x86/kernel/preempt.S | |||
@@ -0,0 +1,25 @@ | |||
1 | |||
2 | #include <linux/linkage.h> | ||
3 | #include <asm/dwarf2.h> | ||
4 | #include <asm/asm.h> | ||
5 | #include <asm/calling.h> | ||
6 | |||
7 | ENTRY(___preempt_schedule) | ||
8 | CFI_STARTPROC | ||
9 | SAVE_ALL | ||
10 | call preempt_schedule | ||
11 | RESTORE_ALL | ||
12 | ret | ||
13 | CFI_ENDPROC | ||
14 | |||
15 | #ifdef CONFIG_CONTEXT_TRACKING | ||
16 | |||
17 | ENTRY(___preempt_schedule_context) | ||
18 | CFI_STARTPROC | ||
19 | SAVE_ALL | ||
20 | call preempt_schedule_context | ||
21 | RESTORE_ALL | ||
22 | ret | ||
23 | CFI_ENDPROC | ||
24 | |||
25 | #endif | ||
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c83516be1052..3fb8d95ab8b5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -391,9 +391,9 @@ static void amd_e400_idle(void) | |||
391 | * The switch back from broadcast mode needs to be | 391 | * The switch back from broadcast mode needs to be |
392 | * called with interrupts disabled. | 392 | * called with interrupts disabled. |
393 | */ | 393 | */ |
394 | local_irq_disable(); | 394 | local_irq_disable(); |
395 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); | 395 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); |
396 | local_irq_enable(); | 396 | local_irq_enable(); |
397 | } else | 397 | } else |
398 | default_idle(); | 398 | default_idle(); |
399 | } | 399 | } |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 884f98f69354..c2ec1aa6d454 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -292,6 +292,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
292 | set_iopl_mask(next->iopl); | 292 | set_iopl_mask(next->iopl); |
293 | 293 | ||
294 | /* | 294 | /* |
295 | * If it were not for PREEMPT_ACTIVE we could guarantee that the | ||
296 | * preempt_count of all tasks was equal here and this would not be | ||
297 | * needed. | ||
298 | */ | ||
299 | task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); | ||
300 | this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); | ||
301 | |||
302 | /* | ||
295 | * Now maybe handle debug registers and/or IO bitmaps | 303 | * Now maybe handle debug registers and/or IO bitmaps |
296 | */ | 304 | */ |
297 | if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || | 305 | if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index bb1dc51bab05..45ab4d6fc8a7 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -363,6 +363,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
363 | this_cpu_write(old_rsp, next->usersp); | 363 | this_cpu_write(old_rsp, next->usersp); |
364 | this_cpu_write(current_task, next_p); | 364 | this_cpu_write(current_task, next_p); |
365 | 365 | ||
366 | /* | ||
367 | * If it were not for PREEMPT_ACTIVE we could guarantee that the | ||
368 | * preempt_count of all tasks was equal here and this would not be | ||
369 | * needed. | ||
370 | */ | ||
371 | task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); | ||
372 | this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); | ||
373 | |||
366 | this_cpu_write(kernel_stack, | 374 | this_cpu_write(kernel_stack, |
367 | (unsigned long)task_stack_page(next_p) + | 375 | (unsigned long)task_stack_page(next_p) + |
368 | THREAD_SIZE - KERNEL_STACK_OFFSET); | 376 | THREAD_SIZE - KERNEL_STACK_OFFSET); |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e643e744e4d8..618ce264b237 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -326,6 +326,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
326 | DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"), | 326 | DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"), |
327 | }, | 327 | }, |
328 | }, | 328 | }, |
329 | { /* Handle problems with rebooting on the Latitude E5410. */ | ||
330 | .callback = set_pci_reboot, | ||
331 | .ident = "Dell Latitude E5410", | ||
332 | .matches = { | ||
333 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | ||
334 | DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5410"), | ||
335 | }, | ||
336 | }, | ||
329 | { /* Handle problems with rebooting on the Latitude E5420. */ | 337 | { /* Handle problems with rebooting on the Latitude E5420. */ |
330 | .callback = set_pci_reboot, | 338 | .callback = set_pci_reboot, |
331 | .ident = "Dell Latitude E5420", | 339 | .ident = "Dell Latitude E5420", |
@@ -542,6 +550,10 @@ static void native_machine_emergency_restart(void) | |||
542 | void native_machine_shutdown(void) | 550 | void native_machine_shutdown(void) |
543 | { | 551 | { |
544 | /* Stop the cpus and apics */ | 552 | /* Stop the cpus and apics */ |
553 | #ifdef CONFIG_X86_IO_APIC | ||
554 | disable_IO_APIC(); | ||
555 | #endif | ||
556 | |||
545 | #ifdef CONFIG_SMP | 557 | #ifdef CONFIG_SMP |
546 | /* | 558 | /* |
547 | * Stop all of the others. Also disable the local irq to | 559 | * Stop all of the others. Also disable the local irq to |
@@ -554,10 +566,6 @@ void native_machine_shutdown(void) | |||
554 | 566 | ||
555 | lapic_shutdown(); | 567 | lapic_shutdown(); |
556 | 568 | ||
557 | #ifdef CONFIG_X86_IO_APIC | ||
558 | disable_IO_APIC(); | ||
559 | #endif | ||
560 | |||
561 | #ifdef CONFIG_HPET_TIMER | 569 | #ifdef CONFIG_HPET_TIMER |
562 | hpet_disable(); | 570 | hpet_disable(); |
563 | #endif | 571 | #endif |
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 0aa29394ed6f..5b9dd445eb89 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
@@ -192,6 +192,14 @@ static __init int add_rtc_cmos(void) | |||
192 | if (mrst_identify_cpu()) | 192 | if (mrst_identify_cpu()) |
193 | return -ENODEV; | 193 | return -ENODEV; |
194 | 194 | ||
195 | #ifdef CONFIG_ACPI | ||
196 | if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) { | ||
197 | /* This warning can likely go away again in a year or two. */ | ||
198 | pr_info("ACPI: not registering RTC platform device\n"); | ||
199 | return -ENODEV; | ||
200 | } | ||
201 | #endif | ||
202 | |||
195 | platform_device_register(&rtc_device); | 203 | platform_device_register(&rtc_device); |
196 | dev_info(&rtc_device.dev, | 204 | dev_info(&rtc_device.dev, |
197 | "registered platform RTC device (no PNP device found)\n"); | 205 | "registered platform RTC device (no PNP device found)\n"); |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6cacab671f9b..2a165580fa16 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -73,11 +73,10 @@ | |||
73 | #include <asm/setup.h> | 73 | #include <asm/setup.h> |
74 | #include <asm/uv/uv.h> | 74 | #include <asm/uv/uv.h> |
75 | #include <linux/mc146818rtc.h> | 75 | #include <linux/mc146818rtc.h> |
76 | |||
77 | #include <asm/smpboot_hooks.h> | 76 | #include <asm/smpboot_hooks.h> |
78 | #include <asm/i8259.h> | 77 | #include <asm/i8259.h> |
79 | |||
80 | #include <asm/realmode.h> | 78 | #include <asm/realmode.h> |
79 | #include <asm/misc.h> | ||
81 | 80 | ||
82 | /* State of each CPU */ | 81 | /* State of each CPU */ |
83 | DEFINE_PER_CPU(int, cpu_state) = { 0 }; | 82 | DEFINE_PER_CPU(int, cpu_state) = { 0 }; |
@@ -648,22 +647,46 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) | |||
648 | return (send_status | accept_status); | 647 | return (send_status | accept_status); |
649 | } | 648 | } |
650 | 649 | ||
650 | void smp_announce(void) | ||
651 | { | ||
652 | int num_nodes = num_online_nodes(); | ||
653 | |||
654 | printk(KERN_INFO "x86: Booted up %d node%s, %d CPUs\n", | ||
655 | num_nodes, (num_nodes > 1 ? "s" : ""), num_online_cpus()); | ||
656 | } | ||
657 | |||
651 | /* reduce the number of lines printed when booting a large cpu count system */ | 658 | /* reduce the number of lines printed when booting a large cpu count system */ |
652 | static void announce_cpu(int cpu, int apicid) | 659 | static void announce_cpu(int cpu, int apicid) |
653 | { | 660 | { |
654 | static int current_node = -1; | 661 | static int current_node = -1; |
655 | int node = early_cpu_to_node(cpu); | 662 | int node = early_cpu_to_node(cpu); |
656 | int max_cpu_present = find_last_bit(cpumask_bits(cpu_present_mask), NR_CPUS); | 663 | static int width, node_width; |
664 | |||
665 | if (!width) | ||
666 | width = num_digits(num_possible_cpus()) + 1; /* + '#' sign */ | ||
667 | |||
668 | if (!node_width) | ||
669 | node_width = num_digits(num_possible_nodes()) + 1; /* + '#' */ | ||
670 | |||
671 | if (cpu == 1) | ||
672 | printk(KERN_INFO "x86: Booting SMP configuration:\n"); | ||
657 | 673 | ||
658 | if (system_state == SYSTEM_BOOTING) { | 674 | if (system_state == SYSTEM_BOOTING) { |
659 | if (node != current_node) { | 675 | if (node != current_node) { |
660 | if (current_node > (-1)) | 676 | if (current_node > (-1)) |
661 | pr_cont(" OK\n"); | 677 | pr_cont("\n"); |
662 | current_node = node; | 678 | current_node = node; |
663 | pr_info("Booting Node %3d, Processors ", node); | 679 | |
680 | printk(KERN_INFO ".... node %*s#%d, CPUs: ", | ||
681 | node_width - num_digits(node), " ", node); | ||
664 | } | 682 | } |
665 | pr_cont(" #%4d%s", cpu, cpu == max_cpu_present ? " OK\n" : ""); | 683 | |
666 | return; | 684 | /* Add padding for the BSP */ |
685 | if (cpu == 1) | ||
686 | pr_cont("%*s", width + 1, " "); | ||
687 | |||
688 | pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu); | ||
689 | |||
667 | } else | 690 | } else |
668 | pr_info("Booting Node %d Processor %d APIC 0x%x\n", | 691 | pr_info("Booting Node %d Processor %d APIC 0x%x\n", |
669 | node, cpu, apicid); | 692 | node, cpu, apicid); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 8c8093b146ca..729aa779ff75 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -88,7 +88,7 @@ static inline void conditional_sti(struct pt_regs *regs) | |||
88 | 88 | ||
89 | static inline void preempt_conditional_sti(struct pt_regs *regs) | 89 | static inline void preempt_conditional_sti(struct pt_regs *regs) |
90 | { | 90 | { |
91 | inc_preempt_count(); | 91 | preempt_count_inc(); |
92 | if (regs->flags & X86_EFLAGS_IF) | 92 | if (regs->flags & X86_EFLAGS_IF) |
93 | local_irq_enable(); | 93 | local_irq_enable(); |
94 | } | 94 | } |
@@ -103,7 +103,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) | |||
103 | { | 103 | { |
104 | if (regs->flags & X86_EFLAGS_IF) | 104 | if (regs->flags & X86_EFLAGS_IF) |
105 | local_irq_disable(); | 105 | local_irq_disable(); |
106 | dec_preempt_count(); | 106 | preempt_count_dec(); |
107 | } | 107 | } |
108 | 108 | ||
109 | static int __kprobes | 109 | static int __kprobes |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index b014d9414d08..040681928e9d 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -66,3 +66,10 @@ EXPORT_SYMBOL(empty_zero_page); | |||
66 | #ifndef CONFIG_PARAVIRT | 66 | #ifndef CONFIG_PARAVIRT |
67 | EXPORT_SYMBOL(native_load_gs_index); | 67 | EXPORT_SYMBOL(native_load_gs_index); |
68 | #endif | 68 | #endif |
69 | |||
70 | #ifdef CONFIG_PREEMPT | ||
71 | EXPORT_SYMBOL(___preempt_schedule); | ||
72 | #ifdef CONFIG_CONTEXT_TRACKING | ||
73 | EXPORT_SYMBOL(___preempt_schedule_context); | ||
74 | #endif | ||
75 | #endif | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3b8e7459dd4d..2b2fce1b2009 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -3255,25 +3255,29 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | |||
3255 | 3255 | ||
3256 | static void ept_load_pdptrs(struct kvm_vcpu *vcpu) | 3256 | static void ept_load_pdptrs(struct kvm_vcpu *vcpu) |
3257 | { | 3257 | { |
3258 | struct kvm_mmu *mmu = vcpu->arch.walk_mmu; | ||
3259 | |||
3258 | if (!test_bit(VCPU_EXREG_PDPTR, | 3260 | if (!test_bit(VCPU_EXREG_PDPTR, |
3259 | (unsigned long *)&vcpu->arch.regs_dirty)) | 3261 | (unsigned long *)&vcpu->arch.regs_dirty)) |
3260 | return; | 3262 | return; |
3261 | 3263 | ||
3262 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { | 3264 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { |
3263 | vmcs_write64(GUEST_PDPTR0, vcpu->arch.mmu.pdptrs[0]); | 3265 | vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]); |
3264 | vmcs_write64(GUEST_PDPTR1, vcpu->arch.mmu.pdptrs[1]); | 3266 | vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]); |
3265 | vmcs_write64(GUEST_PDPTR2, vcpu->arch.mmu.pdptrs[2]); | 3267 | vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]); |
3266 | vmcs_write64(GUEST_PDPTR3, vcpu->arch.mmu.pdptrs[3]); | 3268 | vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]); |
3267 | } | 3269 | } |
3268 | } | 3270 | } |
3269 | 3271 | ||
3270 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu) | 3272 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu) |
3271 | { | 3273 | { |
3274 | struct kvm_mmu *mmu = vcpu->arch.walk_mmu; | ||
3275 | |||
3272 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { | 3276 | if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { |
3273 | vcpu->arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0); | 3277 | mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); |
3274 | vcpu->arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1); | 3278 | mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); |
3275 | vcpu->arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2); | 3279 | mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); |
3276 | vcpu->arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3); | 3280 | mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); |
3277 | } | 3281 | } |
3278 | 3282 | ||
3279 | __set_bit(VCPU_EXREG_PDPTR, | 3283 | __set_bit(VCPU_EXREG_PDPTR, |
@@ -7777,10 +7781,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7777 | vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); | 7781 | vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); |
7778 | vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); | 7782 | vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); |
7779 | vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); | 7783 | vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); |
7780 | __clear_bit(VCPU_EXREG_PDPTR, | ||
7781 | (unsigned long *)&vcpu->arch.regs_avail); | ||
7782 | __clear_bit(VCPU_EXREG_PDPTR, | ||
7783 | (unsigned long *)&vcpu->arch.regs_dirty); | ||
7784 | } | 7784 | } |
7785 | 7785 | ||
7786 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); | 7786 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 96b2c6697c9d..992d63bb154f 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -16,7 +16,7 @@ clean-files := inat-tables.c | |||
16 | 16 | ||
17 | obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o | 17 | obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o |
18 | 18 | ||
19 | lib-y := delay.o | 19 | lib-y := delay.o misc.o |
20 | lib-y += thunk_$(BITS).o | 20 | lib-y += thunk_$(BITS).o |
21 | lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o | 21 | lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o |
22 | lib-y += memcpy_$(BITS).o | 22 | lib-y += memcpy_$(BITS).o |
diff --git a/arch/x86/lib/misc.c b/arch/x86/lib/misc.c new file mode 100644 index 000000000000..76b373af03f0 --- /dev/null +++ b/arch/x86/lib/misc.c | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Count the digits of @val including a possible sign. | ||
3 | * | ||
4 | * (Typed on and submitted from hpa's mobile phone.) | ||
5 | */ | ||
6 | int num_digits(int val) | ||
7 | { | ||
8 | int m = 10; | ||
9 | int d = 1; | ||
10 | |||
11 | if (val < 0) { | ||
12 | d++; | ||
13 | val = -val; | ||
14 | } | ||
15 | |||
16 | while (val >= m) { | ||
17 | m *= 10; | ||
18 | d++; | ||
19 | } | ||
20 | return d; | ||
21 | } | ||
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 4f74d94c8d97..ddf9ecb53cc3 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c | |||
@@ -11,39 +11,26 @@ | |||
11 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
12 | 12 | ||
13 | /* | 13 | /* |
14 | * best effort, GUP based copy_from_user() that is NMI-safe | 14 | * We rely on the nested NMI work to allow atomic faults from the NMI path; the |
15 | * nested NMI paths are careful to preserve CR2. | ||
15 | */ | 16 | */ |
16 | unsigned long | 17 | unsigned long |
17 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | 18 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) |
18 | { | 19 | { |
19 | unsigned long offset, addr = (unsigned long)from; | 20 | unsigned long ret; |
20 | unsigned long size, len = 0; | ||
21 | struct page *page; | ||
22 | void *map; | ||
23 | int ret; | ||
24 | 21 | ||
25 | if (__range_not_ok(from, n, TASK_SIZE)) | 22 | if (__range_not_ok(from, n, TASK_SIZE)) |
26 | return len; | 23 | return 0; |
27 | 24 | ||
28 | do { | 25 | /* |
29 | ret = __get_user_pages_fast(addr, 1, 0, &page); | 26 | * Even though this function is typically called from NMI/IRQ context |
30 | if (!ret) | 27 | * disable pagefaults so that its behaviour is consistent even when |
31 | break; | 28 | * called form other contexts. |
32 | 29 | */ | |
33 | offset = addr & (PAGE_SIZE - 1); | 30 | pagefault_disable(); |
34 | size = min(PAGE_SIZE - offset, n - len); | 31 | ret = __copy_from_user_inatomic(to, from, n); |
35 | 32 | pagefault_enable(); | |
36 | map = kmap_atomic(page); | 33 | |
37 | memcpy(to, map+offset, size); | 34 | return ret; |
38 | kunmap_atomic(map); | ||
39 | put_page(page); | ||
40 | |||
41 | len += size; | ||
42 | to += size; | ||
43 | addr += size; | ||
44 | |||
45 | } while (len < n); | ||
46 | |||
47 | return len; | ||
48 | } | 35 | } |
49 | EXPORT_SYMBOL_GPL(copy_from_user_nmi); | 36 | EXPORT_SYMBOL_GPL(copy_from_user_nmi); |
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 3eb18acd0e40..e2f5e21c03b3 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c | |||
@@ -654,14 +654,13 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); | |||
654 | * Returns number of bytes that could not be copied. | 654 | * Returns number of bytes that could not be copied. |
655 | * On success, this will be zero. | 655 | * On success, this will be zero. |
656 | */ | 656 | */ |
657 | unsigned long | 657 | unsigned long _copy_to_user(void __user *to, const void *from, unsigned n) |
658 | copy_to_user(void __user *to, const void *from, unsigned long n) | ||
659 | { | 658 | { |
660 | if (access_ok(VERIFY_WRITE, to, n)) | 659 | if (access_ok(VERIFY_WRITE, to, n)) |
661 | n = __copy_to_user(to, from, n); | 660 | n = __copy_to_user(to, from, n); |
662 | return n; | 661 | return n; |
663 | } | 662 | } |
664 | EXPORT_SYMBOL(copy_to_user); | 663 | EXPORT_SYMBOL(_copy_to_user); |
665 | 664 | ||
666 | /** | 665 | /** |
667 | * copy_from_user: - Copy a block of data from user space. | 666 | * copy_from_user: - Copy a block of data from user space. |
@@ -679,8 +678,7 @@ EXPORT_SYMBOL(copy_to_user); | |||
679 | * If some data could not be copied, this function will pad the copied | 678 | * If some data could not be copied, this function will pad the copied |
680 | * data to the requested size using zero bytes. | 679 | * data to the requested size using zero bytes. |
681 | */ | 680 | */ |
682 | unsigned long | 681 | unsigned long _copy_from_user(void *to, const void __user *from, unsigned n) |
683 | _copy_from_user(void *to, const void __user *from, unsigned long n) | ||
684 | { | 682 | { |
685 | if (access_ok(VERIFY_READ, from, n)) | 683 | if (access_ok(VERIFY_READ, from, n)) |
686 | n = __copy_from_user(to, from, n); | 684 | n = __copy_from_user(to, from, n); |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 3aaeffcfd67a..7a517bb41060 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -51,7 +51,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr) | |||
51 | return 0; | 51 | return 0; |
52 | } | 52 | } |
53 | 53 | ||
54 | static inline int __kprobes notify_page_fault(struct pt_regs *regs) | 54 | static inline int __kprobes kprobes_fault(struct pt_regs *regs) |
55 | { | 55 | { |
56 | int ret = 0; | 56 | int ret = 0; |
57 | 57 | ||
@@ -1048,7 +1048,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1048 | return; | 1048 | return; |
1049 | 1049 | ||
1050 | /* kprobes don't want to hook the spurious faults: */ | 1050 | /* kprobes don't want to hook the spurious faults: */ |
1051 | if (notify_page_fault(regs)) | 1051 | if (kprobes_fault(regs)) |
1052 | return; | 1052 | return; |
1053 | /* | 1053 | /* |
1054 | * Don't take the mm semaphore here. If we fixup a prefetch | 1054 | * Don't take the mm semaphore here. If we fixup a prefetch |
@@ -1060,23 +1060,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1060 | } | 1060 | } |
1061 | 1061 | ||
1062 | /* kprobes don't want to hook the spurious faults: */ | 1062 | /* kprobes don't want to hook the spurious faults: */ |
1063 | if (unlikely(notify_page_fault(regs))) | 1063 | if (unlikely(kprobes_fault(regs))) |
1064 | return; | 1064 | return; |
1065 | /* | ||
1066 | * It's safe to allow irq's after cr2 has been saved and the | ||
1067 | * vmalloc fault has been handled. | ||
1068 | * | ||
1069 | * User-mode registers count as a user access even for any | ||
1070 | * potential system fault or CPU buglet: | ||
1071 | */ | ||
1072 | if (user_mode_vm(regs)) { | ||
1073 | local_irq_enable(); | ||
1074 | error_code |= PF_USER; | ||
1075 | flags |= FAULT_FLAG_USER; | ||
1076 | } else { | ||
1077 | if (regs->flags & X86_EFLAGS_IF) | ||
1078 | local_irq_enable(); | ||
1079 | } | ||
1080 | 1065 | ||
1081 | if (unlikely(error_code & PF_RSVD)) | 1066 | if (unlikely(error_code & PF_RSVD)) |
1082 | pgtable_bad(regs, error_code, address); | 1067 | pgtable_bad(regs, error_code, address); |
@@ -1088,8 +1073,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1088 | } | 1073 | } |
1089 | } | 1074 | } |
1090 | 1075 | ||
1091 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); | ||
1092 | |||
1093 | /* | 1076 | /* |
1094 | * If we're in an interrupt, have no user context or are running | 1077 | * If we're in an interrupt, have no user context or are running |
1095 | * in an atomic region then we must not take the fault: | 1078 | * in an atomic region then we must not take the fault: |
@@ -1099,6 +1082,24 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1099 | return; | 1082 | return; |
1100 | } | 1083 | } |
1101 | 1084 | ||
1085 | /* | ||
1086 | * It's safe to allow irq's after cr2 has been saved and the | ||
1087 | * vmalloc fault has been handled. | ||
1088 | * | ||
1089 | * User-mode registers count as a user access even for any | ||
1090 | * potential system fault or CPU buglet: | ||
1091 | */ | ||
1092 | if (user_mode_vm(regs)) { | ||
1093 | local_irq_enable(); | ||
1094 | error_code |= PF_USER; | ||
1095 | flags |= FAULT_FLAG_USER; | ||
1096 | } else { | ||
1097 | if (regs->flags & X86_EFLAGS_IF) | ||
1098 | local_irq_enable(); | ||
1099 | } | ||
1100 | |||
1101 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); | ||
1102 | |||
1102 | if (error_code & PF_WRITE) | 1103 | if (error_code & PF_WRITE) |
1103 | flags |= FAULT_FLAG_WRITE; | 1104 | flags |= FAULT_FLAG_WRITE; |
1104 | 1105 | ||
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 79c216aa0e2b..516593e1ce33 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c | |||
@@ -772,13 +772,21 @@ out: | |||
772 | return; | 772 | return; |
773 | } | 773 | } |
774 | 774 | ||
775 | static void bpf_jit_free_deferred(struct work_struct *work) | ||
776 | { | ||
777 | struct sk_filter *fp = container_of(work, struct sk_filter, work); | ||
778 | unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; | ||
779 | struct bpf_binary_header *header = (void *)addr; | ||
780 | |||
781 | set_memory_rw(addr, header->pages); | ||
782 | module_free(NULL, header); | ||
783 | kfree(fp); | ||
784 | } | ||
785 | |||
775 | void bpf_jit_free(struct sk_filter *fp) | 786 | void bpf_jit_free(struct sk_filter *fp) |
776 | { | 787 | { |
777 | if (fp->bpf_func != sk_run_filter) { | 788 | if (fp->bpf_func != sk_run_filter) { |
778 | unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; | 789 | INIT_WORK(&fp->work, bpf_jit_free_deferred); |
779 | struct bpf_binary_header *header = (void *)addr; | 790 | schedule_work(&fp->work); |
780 | |||
781 | set_memory_rw(addr, header->pages); | ||
782 | module_free(NULL, header); | ||
783 | } | 791 | } |
784 | } | 792 | } |
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index d6aa6e8315d1..5d04be5efb64 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c | |||
@@ -47,7 +47,7 @@ dump_user_backtrace_32(struct stack_frame_ia32 *head) | |||
47 | unsigned long bytes; | 47 | unsigned long bytes; |
48 | 48 | ||
49 | bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); | 49 | bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); |
50 | if (bytes != sizeof(bufhead)) | 50 | if (bytes != 0) |
51 | return NULL; | 51 | return NULL; |
52 | 52 | ||
53 | fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame); | 53 | fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame); |
@@ -93,7 +93,7 @@ static struct stack_frame *dump_user_backtrace(struct stack_frame *head) | |||
93 | unsigned long bytes; | 93 | unsigned long bytes; |
94 | 94 | ||
95 | bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); | 95 | bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); |
96 | if (bytes != sizeof(bufhead)) | 96 | if (bytes != 0) |
97 | return NULL; | 97 | return NULL; |
98 | 98 | ||
99 | oprofile_add_trace(bufhead[0].return_address); | 99 | oprofile_add_trace(bufhead[0].return_address); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index d1e4777b4e75..31d04758b76f 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -278,6 +278,15 @@ static void __init xen_smp_prepare_boot_cpu(void) | |||
278 | old memory can be recycled */ | 278 | old memory can be recycled */ |
279 | make_lowmem_page_readwrite(xen_initial_gdt); | 279 | make_lowmem_page_readwrite(xen_initial_gdt); |
280 | 280 | ||
281 | #ifdef CONFIG_X86_32 | ||
282 | /* | ||
283 | * Xen starts us with XEN_FLAT_RING1_DS, but linux code | ||
284 | * expects __USER_DS | ||
285 | */ | ||
286 | loadsegment(ds, __USER_DS); | ||
287 | loadsegment(es, __USER_DS); | ||
288 | #endif | ||
289 | |||
281 | xen_filter_cpu_maps(); | 290 | xen_filter_cpu_maps(); |
282 | xen_setup_vcpu_info_placement(); | 291 | xen_setup_vcpu_info_placement(); |
283 | } | 292 | } |