diff options
Diffstat (limited to 'arch/x86')
150 files changed, 3346 insertions, 1528 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b32ebf92b0ce..5c0ed72c02a2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -16,6 +16,7 @@ config X86_64 | |||
16 | def_bool y | 16 | def_bool y |
17 | depends on 64BIT | 17 | depends on 64BIT |
18 | select X86_DEV_DMA_OPS | 18 | select X86_DEV_DMA_OPS |
19 | select ARCH_USE_CMPXCHG_LOCKREF | ||
19 | 20 | ||
20 | ### Arch settings | 21 | ### Arch settings |
21 | config X86 | 22 | config X86 |
@@ -81,7 +82,6 @@ config X86 | |||
81 | select HAVE_USER_RETURN_NOTIFIER | 82 | select HAVE_USER_RETURN_NOTIFIER |
82 | select ARCH_BINFMT_ELF_RANDOMIZE_PIE | 83 | select ARCH_BINFMT_ELF_RANDOMIZE_PIE |
83 | select HAVE_ARCH_JUMP_LABEL | 84 | select HAVE_ARCH_JUMP_LABEL |
84 | select HAVE_TEXT_POKE_SMP | ||
85 | select HAVE_GENERIC_HARDIRQS | 85 | select HAVE_GENERIC_HARDIRQS |
86 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE | 86 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE |
87 | select SPARSE_IRQ | 87 | select SPARSE_IRQ |
@@ -632,6 +632,7 @@ config PARAVIRT_DEBUG | |||
632 | config PARAVIRT_SPINLOCKS | 632 | config PARAVIRT_SPINLOCKS |
633 | bool "Paravirtualization layer for spinlocks" | 633 | bool "Paravirtualization layer for spinlocks" |
634 | depends on PARAVIRT && SMP | 634 | depends on PARAVIRT && SMP |
635 | select UNINLINE_SPIN_UNLOCK | ||
635 | ---help--- | 636 | ---help--- |
636 | Paravirtualized spinlocks allow a pvops backend to replace the | 637 | Paravirtualized spinlocks allow a pvops backend to replace the |
637 | spinlock implementation with something virtualization-friendly | 638 | spinlock implementation with something virtualization-friendly |
@@ -656,6 +657,15 @@ config KVM_GUEST | |||
656 | underlying device model, the host provides the guest with | 657 | underlying device model, the host provides the guest with |
657 | timing infrastructure such as time of day, and system time | 658 | timing infrastructure such as time of day, and system time |
658 | 659 | ||
660 | config KVM_DEBUG_FS | ||
661 | bool "Enable debug information for KVM Guests in debugfs" | ||
662 | depends on KVM_GUEST && DEBUG_FS | ||
663 | default n | ||
664 | ---help--- | ||
665 | This option enables collection of various statistics for KVM guest. | ||
666 | Statistics are displayed in debugfs filesystem. Enabling this option | ||
667 | may incur significant overhead. | ||
668 | |||
659 | source "arch/x86/lguest/Kconfig" | 669 | source "arch/x86/lguest/Kconfig" |
660 | 670 | ||
661 | config PARAVIRT_TIME_ACCOUNTING | 671 | config PARAVIRT_TIME_ACCOUNTING |
@@ -1344,8 +1354,12 @@ config ARCH_SELECT_MEMORY_MODEL | |||
1344 | depends on ARCH_SPARSEMEM_ENABLE | 1354 | depends on ARCH_SPARSEMEM_ENABLE |
1345 | 1355 | ||
1346 | config ARCH_MEMORY_PROBE | 1356 | config ARCH_MEMORY_PROBE |
1347 | def_bool y | 1357 | bool "Enable sysfs memory/probe interface" |
1348 | depends on X86_64 && MEMORY_HOTPLUG | 1358 | depends on X86_64 && MEMORY_HOTPLUG |
1359 | help | ||
1360 | This option enables a sysfs memory/probe interface for testing. | ||
1361 | See Documentation/memory-hotplug.txt for more information. | ||
1362 | If you are unsure how to answer this question, answer N. | ||
1349 | 1363 | ||
1350 | config ARCH_PROC_KCORE_TEXT | 1364 | config ARCH_PROC_KCORE_TEXT |
1351 | def_bool y | 1365 | def_bool y |
@@ -1627,9 +1641,9 @@ config KEXEC | |||
1627 | 1641 | ||
1628 | It is an ongoing process to be certain the hardware in a machine | 1642 | It is an ongoing process to be certain the hardware in a machine |
1629 | is properly shutdown, so do not be surprised if this code does not | 1643 | is properly shutdown, so do not be surprised if this code does not |
1630 | initially work for you. It may help to enable device hotplugging | 1644 | initially work for you. As of this writing the exact hardware |
1631 | support. As of this writing the exact hardware interface is | 1645 | interface is strongly in flux, so no good recommendation can be |
1632 | strongly in flux, so no good recommendation can be made. | 1646 | made. |
1633 | 1647 | ||
1634 | config CRASH_DUMP | 1648 | config CRASH_DUMP |
1635 | bool "kernel crash dumps" | 1649 | bool "kernel crash dumps" |
@@ -1716,9 +1730,10 @@ config X86_NEED_RELOCS | |||
1716 | depends on X86_32 && RELOCATABLE | 1730 | depends on X86_32 && RELOCATABLE |
1717 | 1731 | ||
1718 | config PHYSICAL_ALIGN | 1732 | config PHYSICAL_ALIGN |
1719 | hex "Alignment value to which kernel should be aligned" if X86_32 | 1733 | hex "Alignment value to which kernel should be aligned" |
1720 | default "0x1000000" | 1734 | default "0x1000000" |
1721 | range 0x2000 0x1000000 | 1735 | range 0x2000 0x1000000 if X86_32 |
1736 | range 0x200000 0x1000000 if X86_64 | ||
1722 | ---help--- | 1737 | ---help--- |
1723 | This value puts the alignment restrictions on physical address | 1738 | This value puts the alignment restrictions on physical address |
1724 | where kernel is loaded and run from. Kernel is compiled for an | 1739 | where kernel is loaded and run from. Kernel is compiled for an |
@@ -1736,6 +1751,9 @@ config PHYSICAL_ALIGN | |||
1736 | end result is that kernel runs from a physical address meeting | 1751 | end result is that kernel runs from a physical address meeting |
1737 | above alignment restrictions. | 1752 | above alignment restrictions. |
1738 | 1753 | ||
1754 | On 32-bit this value must be a multiple of 0x2000. On 64-bit | ||
1755 | this value must be a multiple of 0x200000. | ||
1756 | |||
1739 | Don't change this unless you know what you are doing. | 1757 | Don't change this unless you know what you are doing. |
1740 | 1758 | ||
1741 | config HOTPLUG_CPU | 1759 | config HOTPLUG_CPU |
@@ -2270,6 +2288,32 @@ config RAPIDIO | |||
2270 | 2288 | ||
2271 | source "drivers/rapidio/Kconfig" | 2289 | source "drivers/rapidio/Kconfig" |
2272 | 2290 | ||
2291 | config X86_SYSFB | ||
2292 | bool "Mark VGA/VBE/EFI FB as generic system framebuffer" | ||
2293 | help | ||
2294 | Firmwares often provide initial graphics framebuffers so the BIOS, | ||
2295 | bootloader or kernel can show basic video-output during boot for | ||
2296 | user-guidance and debugging. Historically, x86 used the VESA BIOS | ||
2297 | Extensions and EFI-framebuffers for this, which are mostly limited | ||
2298 | to x86. | ||
2299 | This option, if enabled, marks VGA/VBE/EFI framebuffers as generic | ||
2300 | framebuffers so the new generic system-framebuffer drivers can be | ||
2301 | used on x86. If the framebuffer is not compatible with the generic | ||
2302 | modes, it is adverticed as fallback platform framebuffer so legacy | ||
2303 | drivers like efifb, vesafb and uvesafb can pick it up. | ||
2304 | If this option is not selected, all system framebuffers are always | ||
2305 | marked as fallback platform framebuffers as usual. | ||
2306 | |||
2307 | Note: Legacy fbdev drivers, including vesafb, efifb, uvesafb, will | ||
2308 | not be able to pick up generic system framebuffers if this option | ||
2309 | is selected. You are highly encouraged to enable simplefb as | ||
2310 | replacement if you select this option. simplefb can correctly deal | ||
2311 | with generic system framebuffers. But you should still keep vesafb | ||
2312 | and others enabled as fallback if a system framebuffer is | ||
2313 | incompatible with simplefb. | ||
2314 | |||
2315 | If unsure, say Y. | ||
2316 | |||
2273 | endmenu | 2317 | endmenu |
2274 | 2318 | ||
2275 | 2319 | ||
@@ -2332,10 +2376,6 @@ config HAVE_ATOMIC_IOMAP | |||
2332 | def_bool y | 2376 | def_bool y |
2333 | depends on X86_32 | 2377 | depends on X86_32 |
2334 | 2378 | ||
2335 | config HAVE_TEXT_POKE_SMP | ||
2336 | bool | ||
2337 | select STOP_MACHINE if SMP | ||
2338 | |||
2339 | config X86_DEV_DMA_OPS | 2379 | config X86_DEV_DMA_OPS |
2340 | bool | 2380 | bool |
2341 | depends on X86_64 || STA2X11 | 2381 | depends on X86_64 || STA2X11 |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 07639c656fcd..41250fb33985 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -16,6 +16,10 @@ endif | |||
16 | # e.g.: obj-y += foo_$(BITS).o | 16 | # e.g.: obj-y += foo_$(BITS).o |
17 | export BITS | 17 | export BITS |
18 | 18 | ||
19 | ifdef CONFIG_X86_NEED_RELOCS | ||
20 | LDFLAGS_vmlinux := --emit-relocs | ||
21 | endif | ||
22 | |||
19 | ifeq ($(CONFIG_X86_32),y) | 23 | ifeq ($(CONFIG_X86_32),y) |
20 | BITS := 32 | 24 | BITS := 32 |
21 | UTS_MACHINE := i386 | 25 | UTS_MACHINE := i386 |
@@ -25,10 +29,6 @@ ifeq ($(CONFIG_X86_32),y) | |||
25 | KBUILD_AFLAGS += $(biarch) | 29 | KBUILD_AFLAGS += $(biarch) |
26 | KBUILD_CFLAGS += $(biarch) | 30 | KBUILD_CFLAGS += $(biarch) |
27 | 31 | ||
28 | ifdef CONFIG_RELOCATABLE | ||
29 | LDFLAGS_vmlinux := --emit-relocs | ||
30 | endif | ||
31 | |||
32 | KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return | 32 | KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return |
33 | 33 | ||
34 | # Never want PIC in a 32-bit kernel, prevent breakage with GCC built | 34 | # Never want PIC in a 32-bit kernel, prevent breakage with GCC built |
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 5b7531966b84..ef72baeff484 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h | |||
@@ -355,6 +355,7 @@ int strncmp(const char *cs, const char *ct, size_t count); | |||
355 | size_t strnlen(const char *s, size_t maxlen); | 355 | size_t strnlen(const char *s, size_t maxlen); |
356 | unsigned int atou(const char *s); | 356 | unsigned int atou(const char *s); |
357 | unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base); | 357 | unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base); |
358 | size_t strlen(const char *s); | ||
358 | 359 | ||
359 | /* tty.c */ | 360 | /* tty.c */ |
360 | void puts(const char *); | 361 | void puts(const char *); |
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index d606463aa6d6..b7388a425f09 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c | |||
@@ -225,7 +225,7 @@ static void low_free(unsigned long size, unsigned long addr) | |||
225 | unsigned long nr_pages; | 225 | unsigned long nr_pages; |
226 | 226 | ||
227 | nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; | 227 | nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; |
228 | efi_call_phys2(sys_table->boottime->free_pages, addr, size); | 228 | efi_call_phys2(sys_table->boottime->free_pages, addr, nr_pages); |
229 | } | 229 | } |
230 | 230 | ||
231 | static void find_bits(unsigned long mask, u8 *pos, u8 *size) | 231 | static void find_bits(unsigned long mask, u8 *pos, u8 *size) |
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 1e3184f6072f..5d6f6891b188 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S | |||
@@ -181,8 +181,9 @@ relocated: | |||
181 | /* | 181 | /* |
182 | * Do the decompression, and jump to the new kernel.. | 182 | * Do the decompression, and jump to the new kernel.. |
183 | */ | 183 | */ |
184 | leal z_extract_offset_negative(%ebx), %ebp | ||
185 | /* push arguments for decompress_kernel: */ | 184 | /* push arguments for decompress_kernel: */ |
185 | pushl $z_output_len /* decompressed length */ | ||
186 | leal z_extract_offset_negative(%ebx), %ebp | ||
186 | pushl %ebp /* output address */ | 187 | pushl %ebp /* output address */ |
187 | pushl $z_input_len /* input_len */ | 188 | pushl $z_input_len /* input_len */ |
188 | leal input_data(%ebx), %eax | 189 | leal input_data(%ebx), %eax |
@@ -191,33 +192,7 @@ relocated: | |||
191 | pushl %eax /* heap area */ | 192 | pushl %eax /* heap area */ |
192 | pushl %esi /* real mode pointer */ | 193 | pushl %esi /* real mode pointer */ |
193 | call decompress_kernel | 194 | call decompress_kernel |
194 | addl $20, %esp | 195 | addl $24, %esp |
195 | |||
196 | #if CONFIG_RELOCATABLE | ||
197 | /* | ||
198 | * Find the address of the relocations. | ||
199 | */ | ||
200 | leal z_output_len(%ebp), %edi | ||
201 | |||
202 | /* | ||
203 | * Calculate the delta between where vmlinux was compiled to run | ||
204 | * and where it was actually loaded. | ||
205 | */ | ||
206 | movl %ebp, %ebx | ||
207 | subl $LOAD_PHYSICAL_ADDR, %ebx | ||
208 | jz 2f /* Nothing to be done if loaded at compiled addr. */ | ||
209 | /* | ||
210 | * Process relocations. | ||
211 | */ | ||
212 | |||
213 | 1: subl $4, %edi | ||
214 | movl (%edi), %ecx | ||
215 | testl %ecx, %ecx | ||
216 | jz 2f | ||
217 | addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) | ||
218 | jmp 1b | ||
219 | 2: | ||
220 | #endif | ||
221 | 196 | ||
222 | /* | 197 | /* |
223 | * Jump to the decompressed kernel. | 198 | * Jump to the decompressed kernel. |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 06e71c2c16bf..c337422b575d 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -338,6 +338,7 @@ relocated: | |||
338 | leaq input_data(%rip), %rdx /* input_data */ | 338 | leaq input_data(%rip), %rdx /* input_data */ |
339 | movl $z_input_len, %ecx /* input_len */ | 339 | movl $z_input_len, %ecx /* input_len */ |
340 | movq %rbp, %r8 /* output target address */ | 340 | movq %rbp, %r8 /* output target address */ |
341 | movq $z_output_len, %r9 /* decompressed length */ | ||
341 | call decompress_kernel | 342 | call decompress_kernel |
342 | popq %rsi | 343 | popq %rsi |
343 | 344 | ||
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 0319c88290a5..434f077d2c4d 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -271,6 +271,79 @@ static void error(char *x) | |||
271 | asm("hlt"); | 271 | asm("hlt"); |
272 | } | 272 | } |
273 | 273 | ||
274 | #if CONFIG_X86_NEED_RELOCS | ||
275 | static void handle_relocations(void *output, unsigned long output_len) | ||
276 | { | ||
277 | int *reloc; | ||
278 | unsigned long delta, map, ptr; | ||
279 | unsigned long min_addr = (unsigned long)output; | ||
280 | unsigned long max_addr = min_addr + output_len; | ||
281 | |||
282 | /* | ||
283 | * Calculate the delta between where vmlinux was linked to load | ||
284 | * and where it was actually loaded. | ||
285 | */ | ||
286 | delta = min_addr - LOAD_PHYSICAL_ADDR; | ||
287 | if (!delta) { | ||
288 | debug_putstr("No relocation needed... "); | ||
289 | return; | ||
290 | } | ||
291 | debug_putstr("Performing relocations... "); | ||
292 | |||
293 | /* | ||
294 | * The kernel contains a table of relocation addresses. Those | ||
295 | * addresses have the final load address of the kernel in virtual | ||
296 | * memory. We are currently working in the self map. So we need to | ||
297 | * create an adjustment for kernel memory addresses to the self map. | ||
298 | * This will involve subtracting out the base address of the kernel. | ||
299 | */ | ||
300 | map = delta - __START_KERNEL_map; | ||
301 | |||
302 | /* | ||
303 | * Process relocations: 32 bit relocations first then 64 bit after. | ||
304 | * Two sets of binary relocations are added to the end of the kernel | ||
305 | * before compression. Each relocation table entry is the kernel | ||
306 | * address of the location which needs to be updated stored as a | ||
307 | * 32-bit value which is sign extended to 64 bits. | ||
308 | * | ||
309 | * Format is: | ||
310 | * | ||
311 | * kernel bits... | ||
312 | * 0 - zero terminator for 64 bit relocations | ||
313 | * 64 bit relocation repeated | ||
314 | * 0 - zero terminator for 32 bit relocations | ||
315 | * 32 bit relocation repeated | ||
316 | * | ||
317 | * So we work backwards from the end of the decompressed image. | ||
318 | */ | ||
319 | for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) { | ||
320 | int extended = *reloc; | ||
321 | extended += map; | ||
322 | |||
323 | ptr = (unsigned long)extended; | ||
324 | if (ptr < min_addr || ptr > max_addr) | ||
325 | error("32-bit relocation outside of kernel!\n"); | ||
326 | |||
327 | *(uint32_t *)ptr += delta; | ||
328 | } | ||
329 | #ifdef CONFIG_X86_64 | ||
330 | for (reloc--; *reloc; reloc--) { | ||
331 | long extended = *reloc; | ||
332 | extended += map; | ||
333 | |||
334 | ptr = (unsigned long)extended; | ||
335 | if (ptr < min_addr || ptr > max_addr) | ||
336 | error("64-bit relocation outside of kernel!\n"); | ||
337 | |||
338 | *(uint64_t *)ptr += delta; | ||
339 | } | ||
340 | #endif | ||
341 | } | ||
342 | #else | ||
343 | static inline void handle_relocations(void *output, unsigned long output_len) | ||
344 | { } | ||
345 | #endif | ||
346 | |||
274 | static void parse_elf(void *output) | 347 | static void parse_elf(void *output) |
275 | { | 348 | { |
276 | #ifdef CONFIG_X86_64 | 349 | #ifdef CONFIG_X86_64 |
@@ -325,7 +398,8 @@ static void parse_elf(void *output) | |||
325 | asmlinkage void decompress_kernel(void *rmode, memptr heap, | 398 | asmlinkage void decompress_kernel(void *rmode, memptr heap, |
326 | unsigned char *input_data, | 399 | unsigned char *input_data, |
327 | unsigned long input_len, | 400 | unsigned long input_len, |
328 | unsigned char *output) | 401 | unsigned char *output, |
402 | unsigned long output_len) | ||
329 | { | 403 | { |
330 | real_mode = rmode; | 404 | real_mode = rmode; |
331 | 405 | ||
@@ -365,6 +439,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
365 | debug_putstr("\nDecompressing Linux... "); | 439 | debug_putstr("\nDecompressing Linux... "); |
366 | decompress(input_data, input_len, NULL, NULL, output, NULL, error); | 440 | decompress(input_data, input_len, NULL, NULL, output, NULL, error); |
367 | parse_elf(output); | 441 | parse_elf(output); |
442 | handle_relocations(output, output_len); | ||
368 | debug_putstr("done.\nBooting the kernel.\n"); | 443 | debug_putstr("done.\nBooting the kernel.\n"); |
369 | return; | 444 | return; |
370 | } | 445 | } |
diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c index cdac91ca55d3..565083c16e5c 100644 --- a/arch/x86/boot/printf.c +++ b/arch/x86/boot/printf.c | |||
@@ -55,7 +55,7 @@ static char *number(char *str, long num, int base, int size, int precision, | |||
55 | locase = (type & SMALL); | 55 | locase = (type & SMALL); |
56 | if (type & LEFT) | 56 | if (type & LEFT) |
57 | type &= ~ZEROPAD; | 57 | type &= ~ZEROPAD; |
58 | if (base < 2 || base > 36) | 58 | if (base < 2 || base > 16) |
59 | return NULL; | 59 | return NULL; |
60 | c = (type & ZEROPAD) ? '0' : ' '; | 60 | c = (type & ZEROPAD) ? '0' : ' '; |
61 | sign = 0; | 61 | sign = 0; |
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index bccfca68430e..665a730307f2 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c | |||
@@ -457,7 +457,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig, | |||
457 | else | 457 | else |
458 | put_user_ex(0, &frame->uc.uc_flags); | 458 | put_user_ex(0, &frame->uc.uc_flags); |
459 | put_user_ex(0, &frame->uc.uc_link); | 459 | put_user_ex(0, &frame->uc.uc_link); |
460 | err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); | 460 | compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp); |
461 | 461 | ||
462 | if (ksig->ka.sa.sa_flags & SA_RESTORER) | 462 | if (ksig->ka.sa.sa_flags & SA_RESTORER) |
463 | restorer = ksig->ka.sa.sa_restorer; | 463 | restorer = ksig->ka.sa.sa_restorer; |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 474dc1b59f72..4299eb05023c 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -452,7 +452,7 @@ ia32_badsys: | |||
452 | 452 | ||
453 | CFI_ENDPROC | 453 | CFI_ENDPROC |
454 | 454 | ||
455 | .macro PTREGSCALL label, func, arg | 455 | .macro PTREGSCALL label, func |
456 | ALIGN | 456 | ALIGN |
457 | GLOBAL(\label) | 457 | GLOBAL(\label) |
458 | leaq \func(%rip),%rax | 458 | leaq \func(%rip),%rax |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 2dfac58f3b11..b1977bad5435 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -86,6 +86,7 @@ extern int acpi_pci_disabled; | |||
86 | extern int acpi_skip_timer_override; | 86 | extern int acpi_skip_timer_override; |
87 | extern int acpi_use_timer_override; | 87 | extern int acpi_use_timer_override; |
88 | extern int acpi_fix_pin2_polarity; | 88 | extern int acpi_fix_pin2_polarity; |
89 | extern int acpi_disable_cmcff; | ||
89 | 90 | ||
90 | extern u8 acpi_sci_flags; | 91 | extern u8 acpi_sci_flags; |
91 | extern int acpi_sci_override_gsi; | 92 | extern int acpi_sci_override_gsi; |
@@ -168,6 +169,7 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf) | |||
168 | 169 | ||
169 | #define acpi_lapic 0 | 170 | #define acpi_lapic 0 |
170 | #define acpi_ioapic 0 | 171 | #define acpi_ioapic 0 |
172 | #define acpi_disable_cmcff 0 | ||
171 | static inline void acpi_noirq_set(void) { } | 173 | static inline void acpi_noirq_set(void) { } |
172 | static inline void acpi_disable_pci(void) { } | 174 | static inline void acpi_disable_pci(void) { } |
173 | static inline void disable_acpi(void) { } | 175 | static inline void disable_acpi(void) { } |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 58ed6d96a6ac..0a3f9c9f98d5 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/stddef.h> | 5 | #include <linux/stddef.h> |
6 | #include <linux/stringify.h> | 6 | #include <linux/stringify.h> |
7 | #include <asm/asm.h> | 7 | #include <asm/asm.h> |
8 | #include <asm/ptrace.h> | ||
8 | 9 | ||
9 | /* | 10 | /* |
10 | * Alternative inline assembly for SMP. | 11 | * Alternative inline assembly for SMP. |
@@ -220,20 +221,11 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len); | |||
220 | * no thread can be preempted in the instructions being modified (no iret to an | 221 | * no thread can be preempted in the instructions being modified (no iret to an |
221 | * invalid instruction possible) or if the instructions are changed from a | 222 | * invalid instruction possible) or if the instructions are changed from a |
222 | * consistent state to another consistent state atomically. | 223 | * consistent state to another consistent state atomically. |
223 | * More care must be taken when modifying code in the SMP case because of | ||
224 | * Intel's errata. text_poke_smp() takes care that errata, but still | ||
225 | * doesn't support NMI/MCE handler code modifying. | ||
226 | * On the local CPU you need to be protected again NMI or MCE handlers seeing an | 224 | * On the local CPU you need to be protected again NMI or MCE handlers seeing an |
227 | * inconsistent instruction while you patch. | 225 | * inconsistent instruction while you patch. |
228 | */ | 226 | */ |
229 | struct text_poke_param { | ||
230 | void *addr; | ||
231 | const void *opcode; | ||
232 | size_t len; | ||
233 | }; | ||
234 | |||
235 | extern void *text_poke(void *addr, const void *opcode, size_t len); | 227 | extern void *text_poke(void *addr, const void *opcode, size_t len); |
236 | extern void *text_poke_smp(void *addr, const void *opcode, size_t len); | 228 | extern int poke_int3_handler(struct pt_regs *regs); |
237 | extern void text_poke_smp_batch(struct text_poke_param *params, int n); | 229 | extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); |
238 | 230 | ||
239 | #endif /* _ASM_X86_ALTERNATIVE_H */ | 231 | #endif /* _ASM_X86_ALTERNATIVE_H */ |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index f8119b582c3c..1d2091a226bc 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -715,4 +715,6 @@ static inline void exiting_ack_irq(void) | |||
715 | ack_APIC_irq(); | 715 | ack_APIC_irq(); |
716 | } | 716 | } |
717 | 717 | ||
718 | extern void ioapic_zap_locks(void); | ||
719 | |||
718 | #endif /* _ASM_X86_APIC_H */ | 720 | #endif /* _ASM_X86_APIC_H */ |
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 1c2d247f65ce..4582e8e1cd1a 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h | |||
@@ -3,21 +3,25 @@ | |||
3 | 3 | ||
4 | #ifdef __ASSEMBLY__ | 4 | #ifdef __ASSEMBLY__ |
5 | # define __ASM_FORM(x) x | 5 | # define __ASM_FORM(x) x |
6 | # define __ASM_FORM_RAW(x) x | ||
6 | # define __ASM_FORM_COMMA(x) x, | 7 | # define __ASM_FORM_COMMA(x) x, |
7 | #else | 8 | #else |
8 | # define __ASM_FORM(x) " " #x " " | 9 | # define __ASM_FORM(x) " " #x " " |
10 | # define __ASM_FORM_RAW(x) #x | ||
9 | # define __ASM_FORM_COMMA(x) " " #x "," | 11 | # define __ASM_FORM_COMMA(x) " " #x "," |
10 | #endif | 12 | #endif |
11 | 13 | ||
12 | #ifdef CONFIG_X86_32 | 14 | #ifdef CONFIG_X86_32 |
13 | # define __ASM_SEL(a,b) __ASM_FORM(a) | 15 | # define __ASM_SEL(a,b) __ASM_FORM(a) |
16 | # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) | ||
14 | #else | 17 | #else |
15 | # define __ASM_SEL(a,b) __ASM_FORM(b) | 18 | # define __ASM_SEL(a,b) __ASM_FORM(b) |
19 | # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) | ||
16 | #endif | 20 | #endif |
17 | 21 | ||
18 | #define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \ | 22 | #define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \ |
19 | inst##q##__VA_ARGS__) | 23 | inst##q##__VA_ARGS__) |
20 | #define __ASM_REG(reg) __ASM_SEL(e##reg, r##reg) | 24 | #define __ASM_REG(reg) __ASM_SEL_RAW(e##reg, r##reg) |
21 | 25 | ||
22 | #define _ASM_PTR __ASM_SEL(.long, .quad) | 26 | #define _ASM_PTR __ASM_SEL(.long, .quad) |
23 | #define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8) | 27 | #define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8) |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 6dfd0195bb55..41639ce8fd63 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -15,6 +15,14 @@ | |||
15 | #include <linux/compiler.h> | 15 | #include <linux/compiler.h> |
16 | #include <asm/alternative.h> | 16 | #include <asm/alternative.h> |
17 | 17 | ||
18 | #if BITS_PER_LONG == 32 | ||
19 | # define _BITOPS_LONG_SHIFT 5 | ||
20 | #elif BITS_PER_LONG == 64 | ||
21 | # define _BITOPS_LONG_SHIFT 6 | ||
22 | #else | ||
23 | # error "Unexpected BITS_PER_LONG" | ||
24 | #endif | ||
25 | |||
18 | #define BIT_64(n) (U64_C(1) << (n)) | 26 | #define BIT_64(n) (U64_C(1) << (n)) |
19 | 27 | ||
20 | /* | 28 | /* |
@@ -59,7 +67,7 @@ | |||
59 | * restricted to acting on a single-word quantity. | 67 | * restricted to acting on a single-word quantity. |
60 | */ | 68 | */ |
61 | static __always_inline void | 69 | static __always_inline void |
62 | set_bit(unsigned int nr, volatile unsigned long *addr) | 70 | set_bit(long nr, volatile unsigned long *addr) |
63 | { | 71 | { |
64 | if (IS_IMMEDIATE(nr)) { | 72 | if (IS_IMMEDIATE(nr)) { |
65 | asm volatile(LOCK_PREFIX "orb %1,%0" | 73 | asm volatile(LOCK_PREFIX "orb %1,%0" |
@@ -81,7 +89,7 @@ set_bit(unsigned int nr, volatile unsigned long *addr) | |||
81 | * If it's called on the same region of memory simultaneously, the effect | 89 | * If it's called on the same region of memory simultaneously, the effect |
82 | * may be that only one operation succeeds. | 90 | * may be that only one operation succeeds. |
83 | */ | 91 | */ |
84 | static inline void __set_bit(int nr, volatile unsigned long *addr) | 92 | static inline void __set_bit(long nr, volatile unsigned long *addr) |
85 | { | 93 | { |
86 | asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); | 94 | asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); |
87 | } | 95 | } |
@@ -97,7 +105,7 @@ static inline void __set_bit(int nr, volatile unsigned long *addr) | |||
97 | * in order to ensure changes are visible on other processors. | 105 | * in order to ensure changes are visible on other processors. |
98 | */ | 106 | */ |
99 | static __always_inline void | 107 | static __always_inline void |
100 | clear_bit(int nr, volatile unsigned long *addr) | 108 | clear_bit(long nr, volatile unsigned long *addr) |
101 | { | 109 | { |
102 | if (IS_IMMEDIATE(nr)) { | 110 | if (IS_IMMEDIATE(nr)) { |
103 | asm volatile(LOCK_PREFIX "andb %1,%0" | 111 | asm volatile(LOCK_PREFIX "andb %1,%0" |
@@ -118,13 +126,13 @@ clear_bit(int nr, volatile unsigned long *addr) | |||
118 | * clear_bit() is atomic and implies release semantics before the memory | 126 | * clear_bit() is atomic and implies release semantics before the memory |
119 | * operation. It can be used for an unlock. | 127 | * operation. It can be used for an unlock. |
120 | */ | 128 | */ |
121 | static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr) | 129 | static inline void clear_bit_unlock(long nr, volatile unsigned long *addr) |
122 | { | 130 | { |
123 | barrier(); | 131 | barrier(); |
124 | clear_bit(nr, addr); | 132 | clear_bit(nr, addr); |
125 | } | 133 | } |
126 | 134 | ||
127 | static inline void __clear_bit(int nr, volatile unsigned long *addr) | 135 | static inline void __clear_bit(long nr, volatile unsigned long *addr) |
128 | { | 136 | { |
129 | asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); | 137 | asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); |
130 | } | 138 | } |
@@ -141,7 +149,7 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr) | |||
141 | * No memory barrier is required here, because x86 cannot reorder stores past | 149 | * No memory barrier is required here, because x86 cannot reorder stores past |
142 | * older loads. Same principle as spin_unlock. | 150 | * older loads. Same principle as spin_unlock. |
143 | */ | 151 | */ |
144 | static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr) | 152 | static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) |
145 | { | 153 | { |
146 | barrier(); | 154 | barrier(); |
147 | __clear_bit(nr, addr); | 155 | __clear_bit(nr, addr); |
@@ -159,7 +167,7 @@ static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr) | |||
159 | * If it's called on the same region of memory simultaneously, the effect | 167 | * If it's called on the same region of memory simultaneously, the effect |
160 | * may be that only one operation succeeds. | 168 | * may be that only one operation succeeds. |
161 | */ | 169 | */ |
162 | static inline void __change_bit(int nr, volatile unsigned long *addr) | 170 | static inline void __change_bit(long nr, volatile unsigned long *addr) |
163 | { | 171 | { |
164 | asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); | 172 | asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); |
165 | } | 173 | } |
@@ -173,7 +181,7 @@ static inline void __change_bit(int nr, volatile unsigned long *addr) | |||
173 | * Note that @nr may be almost arbitrarily large; this function is not | 181 | * Note that @nr may be almost arbitrarily large; this function is not |
174 | * restricted to acting on a single-word quantity. | 182 | * restricted to acting on a single-word quantity. |
175 | */ | 183 | */ |
176 | static inline void change_bit(int nr, volatile unsigned long *addr) | 184 | static inline void change_bit(long nr, volatile unsigned long *addr) |
177 | { | 185 | { |
178 | if (IS_IMMEDIATE(nr)) { | 186 | if (IS_IMMEDIATE(nr)) { |
179 | asm volatile(LOCK_PREFIX "xorb %1,%0" | 187 | asm volatile(LOCK_PREFIX "xorb %1,%0" |
@@ -194,7 +202,7 @@ static inline void change_bit(int nr, volatile unsigned long *addr) | |||
194 | * This operation is atomic and cannot be reordered. | 202 | * This operation is atomic and cannot be reordered. |
195 | * It also implies a memory barrier. | 203 | * It also implies a memory barrier. |
196 | */ | 204 | */ |
197 | static inline int test_and_set_bit(int nr, volatile unsigned long *addr) | 205 | static inline int test_and_set_bit(long nr, volatile unsigned long *addr) |
198 | { | 206 | { |
199 | int oldbit; | 207 | int oldbit; |
200 | 208 | ||
@@ -212,7 +220,7 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr) | |||
212 | * This is the same as test_and_set_bit on x86. | 220 | * This is the same as test_and_set_bit on x86. |
213 | */ | 221 | */ |
214 | static __always_inline int | 222 | static __always_inline int |
215 | test_and_set_bit_lock(int nr, volatile unsigned long *addr) | 223 | test_and_set_bit_lock(long nr, volatile unsigned long *addr) |
216 | { | 224 | { |
217 | return test_and_set_bit(nr, addr); | 225 | return test_and_set_bit(nr, addr); |
218 | } | 226 | } |
@@ -226,7 +234,7 @@ test_and_set_bit_lock(int nr, volatile unsigned long *addr) | |||
226 | * If two examples of this operation race, one can appear to succeed | 234 | * If two examples of this operation race, one can appear to succeed |
227 | * but actually fail. You must protect multiple accesses with a lock. | 235 | * but actually fail. You must protect multiple accesses with a lock. |
228 | */ | 236 | */ |
229 | static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) | 237 | static inline int __test_and_set_bit(long nr, volatile unsigned long *addr) |
230 | { | 238 | { |
231 | int oldbit; | 239 | int oldbit; |
232 | 240 | ||
@@ -245,7 +253,7 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) | |||
245 | * This operation is atomic and cannot be reordered. | 253 | * This operation is atomic and cannot be reordered. |
246 | * It also implies a memory barrier. | 254 | * It also implies a memory barrier. |
247 | */ | 255 | */ |
248 | static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) | 256 | static inline int test_and_clear_bit(long nr, volatile unsigned long *addr) |
249 | { | 257 | { |
250 | int oldbit; | 258 | int oldbit; |
251 | 259 | ||
@@ -272,7 +280,7 @@ static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) | |||
272 | * accessed from a hypervisor on the same CPU if running in a VM: don't change | 280 | * accessed from a hypervisor on the same CPU if running in a VM: don't change |
273 | * this without also updating arch/x86/kernel/kvm.c | 281 | * this without also updating arch/x86/kernel/kvm.c |
274 | */ | 282 | */ |
275 | static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) | 283 | static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) |
276 | { | 284 | { |
277 | int oldbit; | 285 | int oldbit; |
278 | 286 | ||
@@ -284,7 +292,7 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) | |||
284 | } | 292 | } |
285 | 293 | ||
286 | /* WARNING: non atomic and it can be reordered! */ | 294 | /* WARNING: non atomic and it can be reordered! */ |
287 | static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) | 295 | static inline int __test_and_change_bit(long nr, volatile unsigned long *addr) |
288 | { | 296 | { |
289 | int oldbit; | 297 | int oldbit; |
290 | 298 | ||
@@ -304,7 +312,7 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) | |||
304 | * This operation is atomic and cannot be reordered. | 312 | * This operation is atomic and cannot be reordered. |
305 | * It also implies a memory barrier. | 313 | * It also implies a memory barrier. |
306 | */ | 314 | */ |
307 | static inline int test_and_change_bit(int nr, volatile unsigned long *addr) | 315 | static inline int test_and_change_bit(long nr, volatile unsigned long *addr) |
308 | { | 316 | { |
309 | int oldbit; | 317 | int oldbit; |
310 | 318 | ||
@@ -315,13 +323,13 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr) | |||
315 | return oldbit; | 323 | return oldbit; |
316 | } | 324 | } |
317 | 325 | ||
318 | static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) | 326 | static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr) |
319 | { | 327 | { |
320 | return ((1UL << (nr % BITS_PER_LONG)) & | 328 | return ((1UL << (nr & (BITS_PER_LONG-1))) & |
321 | (addr[nr / BITS_PER_LONG])) != 0; | 329 | (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; |
322 | } | 330 | } |
323 | 331 | ||
324 | static inline int variable_test_bit(int nr, volatile const unsigned long *addr) | 332 | static inline int variable_test_bit(long nr, volatile const unsigned long *addr) |
325 | { | 333 | { |
326 | int oldbit; | 334 | int oldbit; |
327 | 335 | ||
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index 653668d140f9..4a8cb8d7cbd5 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h | |||
@@ -35,9 +35,9 @@ static void sanitize_boot_params(struct boot_params *boot_params) | |||
35 | */ | 35 | */ |
36 | if (boot_params->sentinel) { | 36 | if (boot_params->sentinel) { |
37 | /* fields in boot_params are left uninitialized, clear them */ | 37 | /* fields in boot_params are left uninitialized, clear them */ |
38 | memset(&boot_params->olpc_ofw_header, 0, | 38 | memset(&boot_params->ext_ramdisk_image, 0, |
39 | (char *)&boot_params->efi_info - | 39 | (char *)&boot_params->efi_info - |
40 | (char *)&boot_params->olpc_ofw_header); | 40 | (char *)&boot_params->ext_ramdisk_image); |
41 | memset(&boot_params->kbd_status, 0, | 41 | memset(&boot_params->kbd_status, 0, |
42 | (char *)&boot_params->hdr - | 42 | (char *)&boot_params->hdr - |
43 | (char *)&boot_params->kbd_status); | 43 | (char *)&boot_params->kbd_status); |
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h index 46fc474fd819..f50de6951738 100644 --- a/arch/x86/include/asm/checksum_32.h +++ b/arch/x86/include/asm/checksum_32.h | |||
@@ -49,9 +49,15 @@ static inline __wsum csum_partial_copy_from_user(const void __user *src, | |||
49 | int len, __wsum sum, | 49 | int len, __wsum sum, |
50 | int *err_ptr) | 50 | int *err_ptr) |
51 | { | 51 | { |
52 | __wsum ret; | ||
53 | |||
52 | might_sleep(); | 54 | might_sleep(); |
53 | return csum_partial_copy_generic((__force void *)src, dst, | 55 | stac(); |
54 | len, sum, err_ptr, NULL); | 56 | ret = csum_partial_copy_generic((__force void *)src, dst, |
57 | len, sum, err_ptr, NULL); | ||
58 | clac(); | ||
59 | |||
60 | return ret; | ||
55 | } | 61 | } |
56 | 62 | ||
57 | /* | 63 | /* |
@@ -176,10 +182,16 @@ static inline __wsum csum_and_copy_to_user(const void *src, | |||
176 | int len, __wsum sum, | 182 | int len, __wsum sum, |
177 | int *err_ptr) | 183 | int *err_ptr) |
178 | { | 184 | { |
185 | __wsum ret; | ||
186 | |||
179 | might_sleep(); | 187 | might_sleep(); |
180 | if (access_ok(VERIFY_WRITE, dst, len)) | 188 | if (access_ok(VERIFY_WRITE, dst, len)) { |
181 | return csum_partial_copy_generic(src, (__force void *)dst, | 189 | stac(); |
182 | len, sum, NULL, err_ptr); | 190 | ret = csum_partial_copy_generic(src, (__force void *)dst, |
191 | len, sum, NULL, err_ptr); | ||
192 | clac(); | ||
193 | return ret; | ||
194 | } | ||
183 | 195 | ||
184 | if (len) | 196 | if (len) |
185 | *err_ptr = -EFAULT; | 197 | *err_ptr = -EFAULT; |
diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h index 9bfdc41629ec..e6fd8a026c7b 100644 --- a/arch/x86/include/asm/checksum_64.h +++ b/arch/x86/include/asm/checksum_64.h | |||
@@ -133,7 +133,7 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum); | |||
133 | 133 | ||
134 | 134 | ||
135 | /* Do not call this directly. Use the wrappers below */ | 135 | /* Do not call this directly. Use the wrappers below */ |
136 | extern __wsum csum_partial_copy_generic(const void *src, const void *dst, | 136 | extern __visible __wsum csum_partial_copy_generic(const void *src, const void *dst, |
137 | int len, __wsum sum, | 137 | int len, __wsum sum, |
138 | int *src_err_ptr, int *dst_err_ptr); | 138 | int *src_err_ptr, int *dst_err_ptr); |
139 | 139 | ||
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 47538a61c91b..d3f5c63078d8 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -366,9 +366,10 @@ extern bool __static_cpu_has_safe(u16 bit); | |||
366 | */ | 366 | */ |
367 | static __always_inline __pure bool __static_cpu_has(u16 bit) | 367 | static __always_inline __pure bool __static_cpu_has(u16 bit) |
368 | { | 368 | { |
369 | #if __GNUC__ > 4 || __GNUC_MINOR__ >= 5 | 369 | #ifdef CC_HAVE_ASM_GOTO |
370 | 370 | ||
371 | #ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS | 371 | #ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS |
372 | |||
372 | /* | 373 | /* |
373 | * Catch too early usage of this before alternatives | 374 | * Catch too early usage of this before alternatives |
374 | * have run. | 375 | * have run. |
@@ -384,6 +385,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) | |||
384 | ".previous\n" | 385 | ".previous\n" |
385 | /* skipping size check since replacement size = 0 */ | 386 | /* skipping size check since replacement size = 0 */ |
386 | : : "i" (X86_FEATURE_ALWAYS) : : t_warn); | 387 | : : "i" (X86_FEATURE_ALWAYS) : : t_warn); |
388 | |||
387 | #endif | 389 | #endif |
388 | 390 | ||
389 | asm goto("1: jmp %l[t_no]\n" | 391 | asm goto("1: jmp %l[t_no]\n" |
@@ -406,7 +408,9 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) | |||
406 | warn_pre_alternatives(); | 408 | warn_pre_alternatives(); |
407 | return false; | 409 | return false; |
408 | #endif | 410 | #endif |
409 | #else /* GCC_VERSION >= 40500 */ | 411 | |
412 | #else /* CC_HAVE_ASM_GOTO */ | ||
413 | |||
410 | u8 flag; | 414 | u8 flag; |
411 | /* Open-coded due to __stringify() in ALTERNATIVE() */ | 415 | /* Open-coded due to __stringify() in ALTERNATIVE() */ |
412 | asm volatile("1: movb $0,%0\n" | 416 | asm volatile("1: movb $0,%0\n" |
@@ -427,7 +431,8 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) | |||
427 | ".previous\n" | 431 | ".previous\n" |
428 | : "=qm" (flag) : "i" (bit)); | 432 | : "=qm" (flag) : "i" (bit)); |
429 | return flag; | 433 | return flag; |
430 | #endif | 434 | |
435 | #endif /* CC_HAVE_ASM_GOTO */ | ||
431 | } | 436 | } |
432 | 437 | ||
433 | #define static_cpu_has(bit) \ | 438 | #define static_cpu_has(bit) \ |
@@ -441,7 +446,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) | |||
441 | 446 | ||
442 | static __always_inline __pure bool _static_cpu_has_safe(u16 bit) | 447 | static __always_inline __pure bool _static_cpu_has_safe(u16 bit) |
443 | { | 448 | { |
444 | #if __GNUC__ > 4 || __GNUC_MINOR__ >= 5 | 449 | #ifdef CC_HAVE_ASM_GOTO |
445 | /* | 450 | /* |
446 | * We need to spell the jumps to the compiler because, depending on the offset, | 451 | * We need to spell the jumps to the compiler because, depending on the offset, |
447 | * the replacement jump can be bigger than the original jump, and this we cannot | 452 | * the replacement jump can be bigger than the original jump, and this we cannot |
@@ -475,7 +480,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) | |||
475 | return false; | 480 | return false; |
476 | t_dynamic: | 481 | t_dynamic: |
477 | return __static_cpu_has_safe(bit); | 482 | return __static_cpu_has_safe(bit); |
478 | #else /* GCC_VERSION >= 40500 */ | 483 | #else |
479 | u8 flag; | 484 | u8 flag; |
480 | /* Open-coded due to __stringify() in ALTERNATIVE() */ | 485 | /* Open-coded due to __stringify() in ALTERNATIVE() */ |
481 | asm volatile("1: movb $2,%0\n" | 486 | asm volatile("1: movb $2,%0\n" |
@@ -511,7 +516,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) | |||
511 | : "=qm" (flag) | 516 | : "=qm" (flag) |
512 | : "i" (bit), "i" (X86_FEATURE_ALWAYS)); | 517 | : "i" (bit), "i" (X86_FEATURE_ALWAYS)); |
513 | return (flag == 2 ? __static_cpu_has_safe(bit) : flag); | 518 | return (flag == 2 ? __static_cpu_has_safe(bit) : flag); |
514 | #endif | 519 | #endif /* CC_HAVE_ASM_GOTO */ |
515 | } | 520 | } |
516 | 521 | ||
517 | #define static_cpu_has_safe(bit) \ | 522 | #define static_cpu_has_safe(bit) \ |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index cccd07fa5e3a..779c2efe2e97 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -29,7 +29,7 @@ extern void e820_setup_gap(void); | |||
29 | extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, | 29 | extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, |
30 | unsigned long start_addr, unsigned long long end_addr); | 30 | unsigned long start_addr, unsigned long long end_addr); |
31 | struct setup_data; | 31 | struct setup_data; |
32 | extern void parse_e820_ext(struct setup_data *data); | 32 | extern void parse_e820_ext(u64 phys_addr, u32 data_len); |
33 | 33 | ||
34 | #if defined(CONFIG_X86_64) || \ | 34 | #if defined(CONFIG_X86_64) || \ |
35 | (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) | 35 | (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index e4ac559c4a24..92b3bae08b74 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -26,56 +26,56 @@ | |||
26 | #include <asm/sections.h> | 26 | #include <asm/sections.h> |
27 | 27 | ||
28 | /* Interrupt handlers registered during init_IRQ */ | 28 | /* Interrupt handlers registered during init_IRQ */ |
29 | extern void apic_timer_interrupt(void); | 29 | extern asmlinkage void apic_timer_interrupt(void); |
30 | extern void x86_platform_ipi(void); | 30 | extern asmlinkage void x86_platform_ipi(void); |
31 | extern void kvm_posted_intr_ipi(void); | 31 | extern asmlinkage void kvm_posted_intr_ipi(void); |
32 | extern void error_interrupt(void); | 32 | extern asmlinkage void error_interrupt(void); |
33 | extern void irq_work_interrupt(void); | 33 | extern asmlinkage void irq_work_interrupt(void); |
34 | 34 | ||
35 | extern void spurious_interrupt(void); | 35 | extern asmlinkage void spurious_interrupt(void); |
36 | extern void thermal_interrupt(void); | 36 | extern asmlinkage void thermal_interrupt(void); |
37 | extern void reschedule_interrupt(void); | 37 | extern asmlinkage void reschedule_interrupt(void); |
38 | 38 | ||
39 | extern void invalidate_interrupt(void); | 39 | extern asmlinkage void invalidate_interrupt(void); |
40 | extern void invalidate_interrupt0(void); | 40 | extern asmlinkage void invalidate_interrupt0(void); |
41 | extern void invalidate_interrupt1(void); | 41 | extern asmlinkage void invalidate_interrupt1(void); |
42 | extern void invalidate_interrupt2(void); | 42 | extern asmlinkage void invalidate_interrupt2(void); |
43 | extern void invalidate_interrupt3(void); | 43 | extern asmlinkage void invalidate_interrupt3(void); |
44 | extern void invalidate_interrupt4(void); | 44 | extern asmlinkage void invalidate_interrupt4(void); |
45 | extern void invalidate_interrupt5(void); | 45 | extern asmlinkage void invalidate_interrupt5(void); |
46 | extern void invalidate_interrupt6(void); | 46 | extern asmlinkage void invalidate_interrupt6(void); |
47 | extern void invalidate_interrupt7(void); | 47 | extern asmlinkage void invalidate_interrupt7(void); |
48 | extern void invalidate_interrupt8(void); | 48 | extern asmlinkage void invalidate_interrupt8(void); |
49 | extern void invalidate_interrupt9(void); | 49 | extern asmlinkage void invalidate_interrupt9(void); |
50 | extern void invalidate_interrupt10(void); | 50 | extern asmlinkage void invalidate_interrupt10(void); |
51 | extern void invalidate_interrupt11(void); | 51 | extern asmlinkage void invalidate_interrupt11(void); |
52 | extern void invalidate_interrupt12(void); | 52 | extern asmlinkage void invalidate_interrupt12(void); |
53 | extern void invalidate_interrupt13(void); | 53 | extern asmlinkage void invalidate_interrupt13(void); |
54 | extern void invalidate_interrupt14(void); | 54 | extern asmlinkage void invalidate_interrupt14(void); |
55 | extern void invalidate_interrupt15(void); | 55 | extern asmlinkage void invalidate_interrupt15(void); |
56 | extern void invalidate_interrupt16(void); | 56 | extern asmlinkage void invalidate_interrupt16(void); |
57 | extern void invalidate_interrupt17(void); | 57 | extern asmlinkage void invalidate_interrupt17(void); |
58 | extern void invalidate_interrupt18(void); | 58 | extern asmlinkage void invalidate_interrupt18(void); |
59 | extern void invalidate_interrupt19(void); | 59 | extern asmlinkage void invalidate_interrupt19(void); |
60 | extern void invalidate_interrupt20(void); | 60 | extern asmlinkage void invalidate_interrupt20(void); |
61 | extern void invalidate_interrupt21(void); | 61 | extern asmlinkage void invalidate_interrupt21(void); |
62 | extern void invalidate_interrupt22(void); | 62 | extern asmlinkage void invalidate_interrupt22(void); |
63 | extern void invalidate_interrupt23(void); | 63 | extern asmlinkage void invalidate_interrupt23(void); |
64 | extern void invalidate_interrupt24(void); | 64 | extern asmlinkage void invalidate_interrupt24(void); |
65 | extern void invalidate_interrupt25(void); | 65 | extern asmlinkage void invalidate_interrupt25(void); |
66 | extern void invalidate_interrupt26(void); | 66 | extern asmlinkage void invalidate_interrupt26(void); |
67 | extern void invalidate_interrupt27(void); | 67 | extern asmlinkage void invalidate_interrupt27(void); |
68 | extern void invalidate_interrupt28(void); | 68 | extern asmlinkage void invalidate_interrupt28(void); |
69 | extern void invalidate_interrupt29(void); | 69 | extern asmlinkage void invalidate_interrupt29(void); |
70 | extern void invalidate_interrupt30(void); | 70 | extern asmlinkage void invalidate_interrupt30(void); |
71 | extern void invalidate_interrupt31(void); | 71 | extern asmlinkage void invalidate_interrupt31(void); |
72 | 72 | ||
73 | extern void irq_move_cleanup_interrupt(void); | 73 | extern asmlinkage void irq_move_cleanup_interrupt(void); |
74 | extern void reboot_interrupt(void); | 74 | extern asmlinkage void reboot_interrupt(void); |
75 | extern void threshold_interrupt(void); | 75 | extern asmlinkage void threshold_interrupt(void); |
76 | 76 | ||
77 | extern void call_function_interrupt(void); | 77 | extern asmlinkage void call_function_interrupt(void); |
78 | extern void call_function_single_interrupt(void); | 78 | extern asmlinkage void call_function_single_interrupt(void); |
79 | 79 | ||
80 | #ifdef CONFIG_TRACING | 80 | #ifdef CONFIG_TRACING |
81 | /* Interrupt handlers registered during init_IRQ */ | 81 | /* Interrupt handlers registered during init_IRQ */ |
@@ -172,22 +172,18 @@ extern atomic_t irq_mis_count; | |||
172 | extern void eisa_set_level_irq(unsigned int irq); | 172 | extern void eisa_set_level_irq(unsigned int irq); |
173 | 173 | ||
174 | /* SMP */ | 174 | /* SMP */ |
175 | extern void smp_apic_timer_interrupt(struct pt_regs *); | 175 | extern __visible void smp_apic_timer_interrupt(struct pt_regs *); |
176 | extern void smp_spurious_interrupt(struct pt_regs *); | 176 | extern __visible void smp_spurious_interrupt(struct pt_regs *); |
177 | extern void smp_x86_platform_ipi(struct pt_regs *); | 177 | extern __visible void smp_x86_platform_ipi(struct pt_regs *); |
178 | extern void smp_error_interrupt(struct pt_regs *); | 178 | extern __visible void smp_error_interrupt(struct pt_regs *); |
179 | #ifdef CONFIG_X86_IO_APIC | 179 | #ifdef CONFIG_X86_IO_APIC |
180 | extern asmlinkage void smp_irq_move_cleanup_interrupt(void); | 180 | extern asmlinkage void smp_irq_move_cleanup_interrupt(void); |
181 | #endif | 181 | #endif |
182 | #ifdef CONFIG_SMP | 182 | #ifdef CONFIG_SMP |
183 | extern void smp_reschedule_interrupt(struct pt_regs *); | 183 | extern __visible void smp_reschedule_interrupt(struct pt_regs *); |
184 | extern void smp_call_function_interrupt(struct pt_regs *); | 184 | extern __visible void smp_call_function_interrupt(struct pt_regs *); |
185 | extern void smp_call_function_single_interrupt(struct pt_regs *); | 185 | extern __visible void smp_call_function_single_interrupt(struct pt_regs *); |
186 | #ifdef CONFIG_X86_32 | 186 | extern __visible void smp_invalidate_interrupt(struct pt_regs *); |
187 | extern void smp_invalidate_interrupt(struct pt_regs *); | ||
188 | #else | ||
189 | extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *); | ||
190 | #endif | ||
191 | #endif | 187 | #endif |
192 | 188 | ||
193 | extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); | 189 | extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); |
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 2d4b5e6107cd..e42f758a0fbd 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h | |||
@@ -33,7 +33,7 @@ struct hypervisor_x86 { | |||
33 | const char *name; | 33 | const char *name; |
34 | 34 | ||
35 | /* Detection routine */ | 35 | /* Detection routine */ |
36 | bool (*detect)(void); | 36 | uint32_t (*detect)(void); |
37 | 37 | ||
38 | /* Adjust CPU feature bits (run once per CPU) */ | 38 | /* Adjust CPU feature bits (run once per CPU) */ |
39 | void (*set_cpu_features)(struct cpuinfo_x86 *); | 39 | void (*set_cpu_features)(struct cpuinfo_x86 *); |
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 57873beb3292..0ea10f27d613 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -33,7 +33,7 @@ extern void (*x86_platform_ipi_callback)(void); | |||
33 | extern void native_init_IRQ(void); | 33 | extern void native_init_IRQ(void); |
34 | extern bool handle_irq(unsigned irq, struct pt_regs *regs); | 34 | extern bool handle_irq(unsigned irq, struct pt_regs *regs); |
35 | 35 | ||
36 | extern unsigned int do_IRQ(struct pt_regs *regs); | 36 | extern __visible unsigned int do_IRQ(struct pt_regs *regs); |
37 | 37 | ||
38 | /* Interrupt vector management */ | 38 | /* Interrupt vector management */ |
39 | extern DECLARE_BITMAP(used_vectors, NR_VECTORS); | 39 | extern DECLARE_BITMAP(used_vectors, NR_VECTORS); |
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 5a6d2873f80e..9454c167629f 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h | |||
@@ -49,10 +49,10 @@ typedef u8 kprobe_opcode_t; | |||
49 | #define flush_insn_slot(p) do { } while (0) | 49 | #define flush_insn_slot(p) do { } while (0) |
50 | 50 | ||
51 | /* optinsn template addresses */ | 51 | /* optinsn template addresses */ |
52 | extern kprobe_opcode_t optprobe_template_entry; | 52 | extern __visible kprobe_opcode_t optprobe_template_entry; |
53 | extern kprobe_opcode_t optprobe_template_val; | 53 | extern __visible kprobe_opcode_t optprobe_template_val; |
54 | extern kprobe_opcode_t optprobe_template_call; | 54 | extern __visible kprobe_opcode_t optprobe_template_call; |
55 | extern kprobe_opcode_t optprobe_template_end; | 55 | extern __visible kprobe_opcode_t optprobe_template_end; |
56 | #define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE) | 56 | #define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE) |
57 | #define MAX_OPTINSN_SIZE \ | 57 | #define MAX_OPTINSN_SIZE \ |
58 | (((unsigned long)&optprobe_template_end - \ | 58 | (((unsigned long)&optprobe_template_end - \ |
@@ -62,7 +62,7 @@ extern kprobe_opcode_t optprobe_template_end; | |||
62 | extern const int kretprobe_blacklist_size; | 62 | extern const int kretprobe_blacklist_size; |
63 | 63 | ||
64 | void arch_remove_kprobe(struct kprobe *p); | 64 | void arch_remove_kprobe(struct kprobe *p); |
65 | void kretprobe_trampoline(void); | 65 | asmlinkage void kretprobe_trampoline(void); |
66 | 66 | ||
67 | /* Architecture specific copy of original instruction*/ | 67 | /* Architecture specific copy of original instruction*/ |
68 | struct arch_specific_insn { | 68 | struct arch_specific_insn { |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f87f7fcefa0a..c76ff74a98f2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -286,6 +286,7 @@ struct kvm_mmu { | |||
286 | u64 *pae_root; | 286 | u64 *pae_root; |
287 | u64 *lm_root; | 287 | u64 *lm_root; |
288 | u64 rsvd_bits_mask[2][4]; | 288 | u64 rsvd_bits_mask[2][4]; |
289 | u64 bad_mt_xwr; | ||
289 | 290 | ||
290 | /* | 291 | /* |
291 | * Bitmap: bit set = last pte in walk | 292 | * Bitmap: bit set = last pte in walk |
@@ -323,6 +324,7 @@ struct kvm_pmu { | |||
323 | u64 global_ovf_ctrl; | 324 | u64 global_ovf_ctrl; |
324 | u64 counter_bitmask[2]; | 325 | u64 counter_bitmask[2]; |
325 | u64 global_ctrl_mask; | 326 | u64 global_ctrl_mask; |
327 | u64 reserved_bits; | ||
326 | u8 version; | 328 | u8 version; |
327 | struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; | 329 | struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; |
328 | struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; | 330 | struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; |
@@ -511,6 +513,14 @@ struct kvm_vcpu_arch { | |||
511 | * instruction. | 513 | * instruction. |
512 | */ | 514 | */ |
513 | bool write_fault_to_shadow_pgtable; | 515 | bool write_fault_to_shadow_pgtable; |
516 | |||
517 | /* set at EPT violation at this point */ | ||
518 | unsigned long exit_qualification; | ||
519 | |||
520 | /* pv related host specific info */ | ||
521 | struct { | ||
522 | bool pv_unhalted; | ||
523 | } pv; | ||
514 | }; | 524 | }; |
515 | 525 | ||
516 | struct kvm_lpage_info { | 526 | struct kvm_lpage_info { |
@@ -802,8 +812,8 @@ extern u32 kvm_min_guest_tsc_khz; | |||
802 | extern u32 kvm_max_guest_tsc_khz; | 812 | extern u32 kvm_max_guest_tsc_khz; |
803 | 813 | ||
804 | enum emulation_result { | 814 | enum emulation_result { |
805 | EMULATE_DONE, /* no further processing */ | 815 | EMULATE_DONE, /* no further processing */ |
806 | EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ | 816 | EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */ |
807 | EMULATE_FAIL, /* can't emulate this instruction */ | 817 | EMULATE_FAIL, /* can't emulate this instruction */ |
808 | }; | 818 | }; |
809 | 819 | ||
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 695399f2d5eb..1df115909758 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -85,26 +85,20 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, | |||
85 | return ret; | 85 | return ret; |
86 | } | 86 | } |
87 | 87 | ||
88 | static inline bool kvm_para_available(void) | 88 | static inline uint32_t kvm_cpuid_base(void) |
89 | { | 89 | { |
90 | unsigned int eax, ebx, ecx, edx; | ||
91 | char signature[13]; | ||
92 | |||
93 | if (boot_cpu_data.cpuid_level < 0) | 90 | if (boot_cpu_data.cpuid_level < 0) |
94 | return false; /* So we don't blow up on old processors */ | 91 | return 0; /* So we don't blow up on old processors */ |
95 | 92 | ||
96 | if (cpu_has_hypervisor) { | 93 | if (cpu_has_hypervisor) |
97 | cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); | 94 | return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0); |
98 | memcpy(signature + 0, &ebx, 4); | ||
99 | memcpy(signature + 4, &ecx, 4); | ||
100 | memcpy(signature + 8, &edx, 4); | ||
101 | signature[12] = 0; | ||
102 | 95 | ||
103 | if (strcmp(signature, "KVMKVMKVM") == 0) | 96 | return 0; |
104 | return true; | 97 | } |
105 | } | ||
106 | 98 | ||
107 | return false; | 99 | static inline bool kvm_para_available(void) |
100 | { | ||
101 | return kvm_cpuid_base() != 0; | ||
108 | } | 102 | } |
109 | 103 | ||
110 | static inline unsigned int kvm_arch_para_features(void) | 104 | static inline unsigned int kvm_arch_para_features(void) |
@@ -118,10 +112,20 @@ void kvm_async_pf_task_wait(u32 token); | |||
118 | void kvm_async_pf_task_wake(u32 token); | 112 | void kvm_async_pf_task_wake(u32 token); |
119 | u32 kvm_read_and_reset_pf_reason(void); | 113 | u32 kvm_read_and_reset_pf_reason(void); |
120 | extern void kvm_disable_steal_time(void); | 114 | extern void kvm_disable_steal_time(void); |
121 | #else | 115 | |
122 | #define kvm_guest_init() do { } while (0) | 116 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
117 | void __init kvm_spinlock_init(void); | ||
118 | #else /* !CONFIG_PARAVIRT_SPINLOCKS */ | ||
119 | static inline void kvm_spinlock_init(void) | ||
120 | { | ||
121 | } | ||
122 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
123 | |||
124 | #else /* CONFIG_KVM_GUEST */ | ||
125 | #define kvm_guest_init() do {} while (0) | ||
123 | #define kvm_async_pf_task_wait(T) do {} while(0) | 126 | #define kvm_async_pf_task_wait(T) do {} while(0) |
124 | #define kvm_async_pf_task_wake(T) do {} while(0) | 127 | #define kvm_async_pf_task_wake(T) do {} while(0) |
128 | |||
125 | static inline u32 kvm_read_and_reset_pf_reason(void) | 129 | static inline u32 kvm_read_and_reset_pf_reason(void) |
126 | { | 130 | { |
127 | return 0; | 131 | return 0; |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 29e3093bbd21..cbe6b9e404ce 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -32,11 +32,20 @@ | |||
32 | #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ | 32 | #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ |
33 | #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ | 33 | #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ |
34 | #define MCI_STATUS_AR (1ULL<<55) /* Action required */ | 34 | #define MCI_STATUS_AR (1ULL<<55) /* Action required */ |
35 | #define MCACOD 0xffff /* MCA Error Code */ | 35 | |
36 | /* | ||
37 | * Note that the full MCACOD field of IA32_MCi_STATUS MSR is | ||
38 | * bits 15:0. But bit 12 is the 'F' bit, defined for corrected | ||
39 | * errors to indicate that errors are being filtered by hardware. | ||
40 | * We should mask out bit 12 when looking for specific signatures | ||
41 | * of uncorrected errors - so the F bit is deliberately skipped | ||
42 | * in this #define. | ||
43 | */ | ||
44 | #define MCACOD 0xefff /* MCA Error Code */ | ||
36 | 45 | ||
37 | /* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ | 46 | /* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ |
38 | #define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ | 47 | #define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ |
39 | #define MCACOD_SCRUBMSK 0xfff0 | 48 | #define MCACOD_SCRUBMSK 0xeff0 /* Skip bit 12 ('F' bit) */ |
40 | #define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ | 49 | #define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ |
41 | #define MCACOD_DATA 0x0134 /* Data Load */ | 50 | #define MCACOD_DATA 0x0134 /* Data Load */ |
42 | #define MCACOD_INSTR 0x0150 /* Instruction Fetch */ | 51 | #define MCACOD_INSTR 0x0150 /* Instruction Fetch */ |
@@ -188,6 +197,9 @@ extern void register_mce_write_callback(ssize_t (*)(struct file *filp, | |||
188 | const char __user *ubuf, | 197 | const char __user *ubuf, |
189 | size_t usize, loff_t *off)); | 198 | size_t usize, loff_t *off)); |
190 | 199 | ||
200 | /* Disable CMCI/polling for MCA bank claimed by firmware */ | ||
201 | extern void mce_disable_bank(int bank); | ||
202 | |||
191 | /* | 203 | /* |
192 | * Exception handler | 204 | * Exception handler |
193 | */ | 205 | */ |
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 50e5c58ced23..4c019179a57d 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h | |||
@@ -59,7 +59,7 @@ static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table, | |||
59 | 59 | ||
60 | extern int __apply_microcode_amd(struct microcode_amd *mc_amd); | 60 | extern int __apply_microcode_amd(struct microcode_amd *mc_amd); |
61 | extern int apply_microcode_amd(int cpu); | 61 | extern int apply_microcode_amd(int cpu); |
62 | extern enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size); | 62 | extern enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size); |
63 | 63 | ||
64 | #ifdef CONFIG_MICROCODE_AMD_EARLY | 64 | #ifdef CONFIG_MICROCODE_AMD_EARLY |
65 | #ifdef CONFIG_X86_32 | 65 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index cdbf36776106..be12c534fd59 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h | |||
@@ -45,22 +45,28 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, | |||
45 | /* Re-load page tables */ | 45 | /* Re-load page tables */ |
46 | load_cr3(next->pgd); | 46 | load_cr3(next->pgd); |
47 | 47 | ||
48 | /* stop flush ipis for the previous mm */ | 48 | /* Stop flush ipis for the previous mm */ |
49 | cpumask_clear_cpu(cpu, mm_cpumask(prev)); | 49 | cpumask_clear_cpu(cpu, mm_cpumask(prev)); |
50 | 50 | ||
51 | /* | 51 | /* Load the LDT, if the LDT is different: */ |
52 | * load the LDT, if the LDT is different: | ||
53 | */ | ||
54 | if (unlikely(prev->context.ldt != next->context.ldt)) | 52 | if (unlikely(prev->context.ldt != next->context.ldt)) |
55 | load_LDT_nolock(&next->context); | 53 | load_LDT_nolock(&next->context); |
56 | } | 54 | } |
57 | #ifdef CONFIG_SMP | 55 | #ifdef CONFIG_SMP |
58 | else { | 56 | else { |
59 | this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); | 57 | this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); |
60 | BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next); | 58 | BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next); |
61 | 59 | ||
62 | if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) { | 60 | if (!cpumask_test_cpu(cpu, mm_cpumask(next))) { |
63 | /* We were in lazy tlb mode and leave_mm disabled | 61 | /* |
62 | * On established mms, the mm_cpumask is only changed | ||
63 | * from irq context, from ptep_clear_flush() while in | ||
64 | * lazy tlb mode, and here. Irqs are blocked during | ||
65 | * schedule, protecting us from simultaneous changes. | ||
66 | */ | ||
67 | cpumask_set_cpu(cpu, mm_cpumask(next)); | ||
68 | /* | ||
69 | * We were in lazy tlb mode and leave_mm disabled | ||
64 | * tlb flush IPI delivery. We must reload CR3 | 70 | * tlb flush IPI delivery. We must reload CR3 |
65 | * to make sure to use no freed page tables. | 71 | * to make sure to use no freed page tables. |
66 | */ | 72 | */ |
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h index 2c543fff241b..e7e6751648ed 100644 --- a/arch/x86/include/asm/mutex_64.h +++ b/arch/x86/include/asm/mutex_64.h | |||
@@ -16,6 +16,20 @@ | |||
16 | * | 16 | * |
17 | * Atomically decrements @v and calls <fail_fn> if the result is negative. | 17 | * Atomically decrements @v and calls <fail_fn> if the result is negative. |
18 | */ | 18 | */ |
19 | #ifdef CC_HAVE_ASM_GOTO | ||
20 | static inline void __mutex_fastpath_lock(atomic_t *v, | ||
21 | void (*fail_fn)(atomic_t *)) | ||
22 | { | ||
23 | asm volatile goto(LOCK_PREFIX " decl %0\n" | ||
24 | " jns %l[exit]\n" | ||
25 | : : "m" (v->counter) | ||
26 | : "memory", "cc" | ||
27 | : exit); | ||
28 | fail_fn(v); | ||
29 | exit: | ||
30 | return; | ||
31 | } | ||
32 | #else | ||
19 | #define __mutex_fastpath_lock(v, fail_fn) \ | 33 | #define __mutex_fastpath_lock(v, fail_fn) \ |
20 | do { \ | 34 | do { \ |
21 | unsigned long dummy; \ | 35 | unsigned long dummy; \ |
@@ -32,6 +46,7 @@ do { \ | |||
32 | : "rax", "rsi", "rdx", "rcx", \ | 46 | : "rax", "rsi", "rdx", "rcx", \ |
33 | "r8", "r9", "r10", "r11", "memory"); \ | 47 | "r8", "r9", "r10", "r11", "memory"); \ |
34 | } while (0) | 48 | } while (0) |
49 | #endif | ||
35 | 50 | ||
36 | /** | 51 | /** |
37 | * __mutex_fastpath_lock_retval - try to take the lock by moving the count | 52 | * __mutex_fastpath_lock_retval - try to take the lock by moving the count |
@@ -56,6 +71,20 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count) | |||
56 | * | 71 | * |
57 | * Atomically increments @v and calls <fail_fn> if the result is nonpositive. | 72 | * Atomically increments @v and calls <fail_fn> if the result is nonpositive. |
58 | */ | 73 | */ |
74 | #ifdef CC_HAVE_ASM_GOTO | ||
75 | static inline void __mutex_fastpath_unlock(atomic_t *v, | ||
76 | void (*fail_fn)(atomic_t *)) | ||
77 | { | ||
78 | asm volatile goto(LOCK_PREFIX " incl %0\n" | ||
79 | " jg %l[exit]\n" | ||
80 | : : "m" (v->counter) | ||
81 | : "memory", "cc" | ||
82 | : exit); | ||
83 | fail_fn(v); | ||
84 | exit: | ||
85 | return; | ||
86 | } | ||
87 | #else | ||
59 | #define __mutex_fastpath_unlock(v, fail_fn) \ | 88 | #define __mutex_fastpath_unlock(v, fail_fn) \ |
60 | do { \ | 89 | do { \ |
61 | unsigned long dummy; \ | 90 | unsigned long dummy; \ |
@@ -72,6 +101,7 @@ do { \ | |||
72 | : "rax", "rsi", "rdx", "rcx", \ | 101 | : "rax", "rsi", "rdx", "rcx", \ |
73 | "r8", "r9", "r10", "r11", "memory"); \ | 102 | "r8", "r9", "r10", "r11", "memory"); \ |
74 | } while (0) | 103 | } while (0) |
104 | #endif | ||
75 | 105 | ||
76 | #define __mutex_slowpath_needs_to_unlock() 1 | 106 | #define __mutex_slowpath_needs_to_unlock() 1 |
77 | 107 | ||
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index ef17af013475..f48b17df4224 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h | |||
@@ -15,6 +15,8 @@ | |||
15 | */ | 15 | */ |
16 | #define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) | 16 | #define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) |
17 | 17 | ||
18 | #define __START_KERNEL_map __PAGE_OFFSET | ||
19 | |||
18 | #define THREAD_SIZE_ORDER 1 | 20 | #define THREAD_SIZE_ORDER 1 |
19 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) | 21 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) |
20 | 22 | ||
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 6c896fbe21db..43dcd804ebd5 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h | |||
@@ -32,11 +32,6 @@ | |||
32 | */ | 32 | */ |
33 | #define __PAGE_OFFSET _AC(0xffff880000000000, UL) | 33 | #define __PAGE_OFFSET _AC(0xffff880000000000, UL) |
34 | 34 | ||
35 | #define __PHYSICAL_START ((CONFIG_PHYSICAL_START + \ | ||
36 | (CONFIG_PHYSICAL_ALIGN - 1)) & \ | ||
37 | ~(CONFIG_PHYSICAL_ALIGN - 1)) | ||
38 | |||
39 | #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) | ||
40 | #define __START_KERNEL_map _AC(0xffffffff80000000, UL) | 35 | #define __START_KERNEL_map _AC(0xffffffff80000000, UL) |
41 | 36 | ||
42 | /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ | 37 | /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ |
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 54c97879195e..f97fbe3abb67 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
@@ -33,6 +33,11 @@ | |||
33 | (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ | 33 | (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ |
34 | VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) | 34 | VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) |
35 | 35 | ||
36 | #define __PHYSICAL_START ALIGN(CONFIG_PHYSICAL_START, \ | ||
37 | CONFIG_PHYSICAL_ALIGN) | ||
38 | |||
39 | #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) | ||
40 | |||
36 | #ifdef CONFIG_X86_64 | 41 | #ifdef CONFIG_X86_64 |
37 | #include <asm/page_64_types.h> | 42 | #include <asm/page_64_types.h> |
38 | #else | 43 | #else |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index cfdc9ee4c900..401f350ef71b 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -712,36 +712,16 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, | |||
712 | 712 | ||
713 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) | 713 | #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) |
714 | 714 | ||
715 | static inline int arch_spin_is_locked(struct arch_spinlock *lock) | 715 | static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock, |
716 | __ticket_t ticket) | ||
716 | { | 717 | { |
717 | return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); | 718 | PVOP_VCALLEE2(pv_lock_ops.lock_spinning, lock, ticket); |
718 | } | 719 | } |
719 | 720 | ||
720 | static inline int arch_spin_is_contended(struct arch_spinlock *lock) | 721 | static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock, |
722 | __ticket_t ticket) | ||
721 | { | 723 | { |
722 | return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); | 724 | PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket); |
723 | } | ||
724 | #define arch_spin_is_contended arch_spin_is_contended | ||
725 | |||
726 | static __always_inline void arch_spin_lock(struct arch_spinlock *lock) | ||
727 | { | ||
728 | PVOP_VCALL1(pv_lock_ops.spin_lock, lock); | ||
729 | } | ||
730 | |||
731 | static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock, | ||
732 | unsigned long flags) | ||
733 | { | ||
734 | PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags); | ||
735 | } | ||
736 | |||
737 | static __always_inline int arch_spin_trylock(struct arch_spinlock *lock) | ||
738 | { | ||
739 | return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); | ||
740 | } | ||
741 | |||
742 | static __always_inline void arch_spin_unlock(struct arch_spinlock *lock) | ||
743 | { | ||
744 | PVOP_VCALL1(pv_lock_ops.spin_unlock, lock); | ||
745 | } | 725 | } |
746 | 726 | ||
747 | #endif | 727 | #endif |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 0db1fcac668c..aab8f671b523 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -327,13 +327,15 @@ struct pv_mmu_ops { | |||
327 | }; | 327 | }; |
328 | 328 | ||
329 | struct arch_spinlock; | 329 | struct arch_spinlock; |
330 | #ifdef CONFIG_SMP | ||
331 | #include <asm/spinlock_types.h> | ||
332 | #else | ||
333 | typedef u16 __ticket_t; | ||
334 | #endif | ||
335 | |||
330 | struct pv_lock_ops { | 336 | struct pv_lock_ops { |
331 | int (*spin_is_locked)(struct arch_spinlock *lock); | 337 | struct paravirt_callee_save lock_spinning; |
332 | int (*spin_is_contended)(struct arch_spinlock *lock); | 338 | void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket); |
333 | void (*spin_lock)(struct arch_spinlock *lock); | ||
334 | void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags); | ||
335 | int (*spin_trylock)(struct arch_spinlock *lock); | ||
336 | void (*spin_unlock)(struct arch_spinlock *lock); | ||
337 | }; | 339 | }; |
338 | 340 | ||
339 | /* This contains all the paravirt structures: we get a convenient | 341 | /* This contains all the paravirt structures: we get a convenient |
@@ -387,7 +389,8 @@ extern struct pv_lock_ops pv_lock_ops; | |||
387 | 389 | ||
388 | /* Simple instruction patching code. */ | 390 | /* Simple instruction patching code. */ |
389 | #define DEF_NATIVE(ops, name, code) \ | 391 | #define DEF_NATIVE(ops, name, code) \ |
390 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | 392 | extern const char start_##ops##_##name[] __visible, \ |
393 | end_##ops##_##name[] __visible; \ | ||
391 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | 394 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") |
392 | 395 | ||
393 | unsigned paravirt_patch_nop(void); | 396 | unsigned paravirt_patch_nop(void); |
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index f2b489cf1602..3bf2dd0cf61f 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h | |||
@@ -55,9 +55,53 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) | |||
55 | #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) | 55 | #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) |
56 | #endif | 56 | #endif |
57 | 57 | ||
58 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
59 | |||
60 | /* | ||
61 | * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE, _PAGE_BIT_SOFT_DIRTY and | ||
62 | * _PAGE_BIT_PROTNONE are taken, split up the 28 bits of offset | ||
63 | * into this range. | ||
64 | */ | ||
65 | #define PTE_FILE_MAX_BITS 28 | ||
66 | #define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1) | ||
67 | #define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1) | ||
68 | #define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1) | ||
69 | #define PTE_FILE_SHIFT4 (_PAGE_BIT_SOFT_DIRTY + 1) | ||
70 | #define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1) | ||
71 | #define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) | ||
72 | #define PTE_FILE_BITS3 (PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1) | ||
73 | |||
74 | #define pte_to_pgoff(pte) \ | ||
75 | ((((pte).pte_low >> (PTE_FILE_SHIFT1)) \ | ||
76 | & ((1U << PTE_FILE_BITS1) - 1))) \ | ||
77 | + ((((pte).pte_low >> (PTE_FILE_SHIFT2)) \ | ||
78 | & ((1U << PTE_FILE_BITS2) - 1)) \ | ||
79 | << (PTE_FILE_BITS1)) \ | ||
80 | + ((((pte).pte_low >> (PTE_FILE_SHIFT3)) \ | ||
81 | & ((1U << PTE_FILE_BITS3) - 1)) \ | ||
82 | << (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ | ||
83 | + ((((pte).pte_low >> (PTE_FILE_SHIFT4))) \ | ||
84 | << (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)) | ||
85 | |||
86 | #define pgoff_to_pte(off) \ | ||
87 | ((pte_t) { .pte_low = \ | ||
88 | ((((off)) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \ | ||
89 | + ((((off) >> PTE_FILE_BITS1) \ | ||
90 | & ((1U << PTE_FILE_BITS2) - 1)) \ | ||
91 | << PTE_FILE_SHIFT2) \ | ||
92 | + ((((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ | ||
93 | & ((1U << PTE_FILE_BITS3) - 1)) \ | ||
94 | << PTE_FILE_SHIFT3) \ | ||
95 | + ((((off) >> \ | ||
96 | (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))) \ | ||
97 | << PTE_FILE_SHIFT4) \ | ||
98 | + _PAGE_FILE }) | ||
99 | |||
100 | #else /* CONFIG_MEM_SOFT_DIRTY */ | ||
101 | |||
58 | /* | 102 | /* |
59 | * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, | 103 | * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, |
60 | * split up the 29 bits of offset into this range: | 104 | * split up the 29 bits of offset into this range. |
61 | */ | 105 | */ |
62 | #define PTE_FILE_MAX_BITS 29 | 106 | #define PTE_FILE_MAX_BITS 29 |
63 | #define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1) | 107 | #define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1) |
@@ -88,6 +132,8 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) | |||
88 | << PTE_FILE_SHIFT3) \ | 132 | << PTE_FILE_SHIFT3) \ |
89 | + _PAGE_FILE }) | 133 | + _PAGE_FILE }) |
90 | 134 | ||
135 | #endif /* CONFIG_MEM_SOFT_DIRTY */ | ||
136 | |||
91 | /* Encode and de-code a swap entry */ | 137 | /* Encode and de-code a swap entry */ |
92 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE | 138 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE |
93 | #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) | 139 | #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) |
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 4cc9f2b7cdc3..81bb91b49a88 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h | |||
@@ -179,6 +179,9 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) | |||
179 | /* | 179 | /* |
180 | * Bits 0, 6 and 7 are taken in the low part of the pte, | 180 | * Bits 0, 6 and 7 are taken in the low part of the pte, |
181 | * put the 32 bits of offset into the high part. | 181 | * put the 32 bits of offset into the high part. |
182 | * | ||
183 | * For soft-dirty tracking 11 bit is taken from | ||
184 | * the low part of pte as well. | ||
182 | */ | 185 | */ |
183 | #define pte_to_pgoff(pte) ((pte).pte_high) | 186 | #define pte_to_pgoff(pte) ((pte).pte_high) |
184 | #define pgoff_to_pte(off) \ | 187 | #define pgoff_to_pte(off) \ |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 7dc305a46058..8d16befdec88 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -22,7 +22,8 @@ | |||
22 | * ZERO_PAGE is a global shared page that is always zero: used | 22 | * ZERO_PAGE is a global shared page that is always zero: used |
23 | * for zero-mapped memory areas etc.. | 23 | * for zero-mapped memory areas etc.. |
24 | */ | 24 | */ |
25 | extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; | 25 | extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] |
26 | __visible; | ||
26 | #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) | 27 | #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) |
27 | 28 | ||
28 | extern spinlock_t pgd_lock; | 29 | extern spinlock_t pgd_lock; |
@@ -314,6 +315,36 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) | |||
314 | return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); | 315 | return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); |
315 | } | 316 | } |
316 | 317 | ||
318 | static inline pte_t pte_swp_mksoft_dirty(pte_t pte) | ||
319 | { | ||
320 | return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); | ||
321 | } | ||
322 | |||
323 | static inline int pte_swp_soft_dirty(pte_t pte) | ||
324 | { | ||
325 | return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; | ||
326 | } | ||
327 | |||
328 | static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) | ||
329 | { | ||
330 | return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); | ||
331 | } | ||
332 | |||
333 | static inline pte_t pte_file_clear_soft_dirty(pte_t pte) | ||
334 | { | ||
335 | return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); | ||
336 | } | ||
337 | |||
338 | static inline pte_t pte_file_mksoft_dirty(pte_t pte) | ||
339 | { | ||
340 | return pte_set_flags(pte, _PAGE_SOFT_DIRTY); | ||
341 | } | ||
342 | |||
343 | static inline int pte_file_soft_dirty(pte_t pte) | ||
344 | { | ||
345 | return pte_flags(pte) & _PAGE_SOFT_DIRTY; | ||
346 | } | ||
347 | |||
317 | /* | 348 | /* |
318 | * Mask out unsupported bits in a present pgprot. Non-present pgprots | 349 | * Mask out unsupported bits in a present pgprot. Non-present pgprots |
319 | * can use those bits for other purposes, so leave them be. | 350 | * can use those bits for other purposes, so leave them be. |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index c98ac63aae48..f4843e031131 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -61,12 +61,27 @@ | |||
61 | * they do not conflict with each other. | 61 | * they do not conflict with each other. |
62 | */ | 62 | */ |
63 | 63 | ||
64 | #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_HIDDEN | ||
65 | |||
64 | #ifdef CONFIG_MEM_SOFT_DIRTY | 66 | #ifdef CONFIG_MEM_SOFT_DIRTY |
65 | #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) | 67 | #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY) |
66 | #else | 68 | #else |
67 | #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0)) | 69 | #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0)) |
68 | #endif | 70 | #endif |
69 | 71 | ||
72 | /* | ||
73 | * Tracking soft dirty bit when a page goes to a swap is tricky. | ||
74 | * We need a bit which can be stored in pte _and_ not conflict | ||
75 | * with swap entry format. On x86 bits 6 and 7 are *not* involved | ||
76 | * into swap entry computation, but bit 6 is used for nonlinear | ||
77 | * file mapping, so we borrow bit 7 for soft dirty tracking. | ||
78 | */ | ||
79 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
80 | #define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE | ||
81 | #else | ||
82 | #define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0)) | ||
83 | #endif | ||
84 | |||
70 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | 85 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
71 | #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) | 86 | #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) |
72 | #else | 87 | #else |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 24cf5aefb704..987c75ecc334 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -412,7 +412,7 @@ union irq_stack_union { | |||
412 | }; | 412 | }; |
413 | }; | 413 | }; |
414 | 414 | ||
415 | DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union); | 415 | DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible; |
416 | DECLARE_INIT_PER_CPU(irq_stack_union); | 416 | DECLARE_INIT_PER_CPU(irq_stack_union); |
417 | 417 | ||
418 | DECLARE_PER_CPU(char *, irq_stack_ptr); | 418 | DECLARE_PER_CPU(char *, irq_stack_ptr); |
@@ -942,33 +942,19 @@ extern int set_tsc_mode(unsigned int val); | |||
942 | 942 | ||
943 | extern u16 amd_get_nb_id(int cpu); | 943 | extern u16 amd_get_nb_id(int cpu); |
944 | 944 | ||
945 | struct aperfmperf { | 945 | static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) |
946 | u64 aperf, mperf; | ||
947 | }; | ||
948 | |||
949 | static inline void get_aperfmperf(struct aperfmperf *am) | ||
950 | { | 946 | { |
951 | WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF)); | 947 | uint32_t base, eax, signature[3]; |
952 | |||
953 | rdmsrl(MSR_IA32_APERF, am->aperf); | ||
954 | rdmsrl(MSR_IA32_MPERF, am->mperf); | ||
955 | } | ||
956 | 948 | ||
957 | #define APERFMPERF_SHIFT 10 | 949 | for (base = 0x40000000; base < 0x40010000; base += 0x100) { |
950 | cpuid(base, &eax, &signature[0], &signature[1], &signature[2]); | ||
958 | 951 | ||
959 | static inline | 952 | if (!memcmp(sig, signature, 12) && |
960 | unsigned long calc_aperfmperf_ratio(struct aperfmperf *old, | 953 | (leaves == 0 || ((eax - base) >= leaves))) |
961 | struct aperfmperf *new) | 954 | return base; |
962 | { | 955 | } |
963 | u64 aperf = new->aperf - old->aperf; | ||
964 | u64 mperf = new->mperf - old->mperf; | ||
965 | unsigned long ratio = aperf; | ||
966 | |||
967 | mperf >>= APERFMPERF_SHIFT; | ||
968 | if (mperf) | ||
969 | ratio = div64_u64(aperf, mperf); | ||
970 | 956 | ||
971 | return ratio; | 957 | return 0; |
972 | } | 958 | } |
973 | 959 | ||
974 | extern unsigned long arch_align_stack(unsigned long sp); | 960 | extern unsigned long arch_align_stack(unsigned long sp); |
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 109a9dd5d454..be8269b00e2a 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h | |||
@@ -93,7 +93,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, | |||
93 | 93 | ||
94 | struct pvclock_vsyscall_time_info { | 94 | struct pvclock_vsyscall_time_info { |
95 | struct pvclock_vcpu_time_info pvti; | 95 | struct pvclock_vcpu_time_info pvti; |
96 | u32 migrate_count; | ||
97 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); | 96 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); |
98 | 97 | ||
99 | #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) | 98 | #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) |
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index b7bf3505e1ec..347555492dad 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -6,6 +6,8 @@ | |||
6 | 6 | ||
7 | #define COMMAND_LINE_SIZE 2048 | 7 | #define COMMAND_LINE_SIZE 2048 |
8 | 8 | ||
9 | #include <linux/linkage.h> | ||
10 | |||
9 | #ifdef __i386__ | 11 | #ifdef __i386__ |
10 | 12 | ||
11 | #include <linux/pfn.h> | 13 | #include <linux/pfn.h> |
@@ -108,11 +110,11 @@ void *extend_brk(size_t size, size_t align); | |||
108 | extern void probe_roms(void); | 110 | extern void probe_roms(void); |
109 | #ifdef __i386__ | 111 | #ifdef __i386__ |
110 | 112 | ||
111 | void __init i386_start_kernel(void); | 113 | asmlinkage void __init i386_start_kernel(void); |
112 | 114 | ||
113 | #else | 115 | #else |
114 | void __init x86_64_start_kernel(char *real_mode); | 116 | asmlinkage void __init x86_64_start_kernel(char *real_mode); |
115 | void __init x86_64_start_reservations(char *real_mode_data); | 117 | asmlinkage void __init x86_64_start_reservations(char *real_mode_data); |
116 | 118 | ||
117 | #endif /* __i386__ */ | 119 | #endif /* __i386__ */ |
118 | #endif /* _SETUP */ | 120 | #endif /* _SETUP */ |
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 2f4d924fe6c9..645cad2c95ff 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h | |||
@@ -101,7 +101,7 @@ static inline void native_wbinvd(void) | |||
101 | asm volatile("wbinvd": : :"memory"); | 101 | asm volatile("wbinvd": : :"memory"); |
102 | } | 102 | } |
103 | 103 | ||
104 | extern void native_load_gs_index(unsigned); | 104 | extern asmlinkage void native_load_gs_index(unsigned); |
105 | 105 | ||
106 | #ifdef CONFIG_PARAVIRT | 106 | #ifdef CONFIG_PARAVIRT |
107 | #include <asm/paravirt.h> | 107 | #include <asm/paravirt.h> |
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 33692eaabab5..bf156ded74b5 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -1,11 +1,14 @@ | |||
1 | #ifndef _ASM_X86_SPINLOCK_H | 1 | #ifndef _ASM_X86_SPINLOCK_H |
2 | #define _ASM_X86_SPINLOCK_H | 2 | #define _ASM_X86_SPINLOCK_H |
3 | 3 | ||
4 | #include <linux/jump_label.h> | ||
4 | #include <linux/atomic.h> | 5 | #include <linux/atomic.h> |
5 | #include <asm/page.h> | 6 | #include <asm/page.h> |
6 | #include <asm/processor.h> | 7 | #include <asm/processor.h> |
7 | #include <linux/compiler.h> | 8 | #include <linux/compiler.h> |
8 | #include <asm/paravirt.h> | 9 | #include <asm/paravirt.h> |
10 | #include <asm/bitops.h> | ||
11 | |||
9 | /* | 12 | /* |
10 | * Your basic SMP spinlocks, allowing only a single CPU anywhere | 13 | * Your basic SMP spinlocks, allowing only a single CPU anywhere |
11 | * | 14 | * |
@@ -34,6 +37,36 @@ | |||
34 | # define UNLOCK_LOCK_PREFIX | 37 | # define UNLOCK_LOCK_PREFIX |
35 | #endif | 38 | #endif |
36 | 39 | ||
40 | /* How long a lock should spin before we consider blocking */ | ||
41 | #define SPIN_THRESHOLD (1 << 15) | ||
42 | |||
43 | extern struct static_key paravirt_ticketlocks_enabled; | ||
44 | static __always_inline bool static_key_false(struct static_key *key); | ||
45 | |||
46 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
47 | |||
48 | static inline void __ticket_enter_slowpath(arch_spinlock_t *lock) | ||
49 | { | ||
50 | set_bit(0, (volatile unsigned long *)&lock->tickets.tail); | ||
51 | } | ||
52 | |||
53 | #else /* !CONFIG_PARAVIRT_SPINLOCKS */ | ||
54 | static __always_inline void __ticket_lock_spinning(arch_spinlock_t *lock, | ||
55 | __ticket_t ticket) | ||
56 | { | ||
57 | } | ||
58 | static inline void __ticket_unlock_kick(arch_spinlock_t *lock, | ||
59 | __ticket_t ticket) | ||
60 | { | ||
61 | } | ||
62 | |||
63 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
64 | |||
65 | static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) | ||
66 | { | ||
67 | return lock.tickets.head == lock.tickets.tail; | ||
68 | } | ||
69 | |||
37 | /* | 70 | /* |
38 | * Ticket locks are conceptually two parts, one indicating the current head of | 71 | * Ticket locks are conceptually two parts, one indicating the current head of |
39 | * the queue, and the other indicating the current tail. The lock is acquired | 72 | * the queue, and the other indicating the current tail. The lock is acquired |
@@ -47,81 +80,101 @@ | |||
47 | * in the high part, because a wide xadd increment of the low part would carry | 80 | * in the high part, because a wide xadd increment of the low part would carry |
48 | * up and contaminate the high part. | 81 | * up and contaminate the high part. |
49 | */ | 82 | */ |
50 | static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) | 83 | static __always_inline void arch_spin_lock(arch_spinlock_t *lock) |
51 | { | 84 | { |
52 | register struct __raw_tickets inc = { .tail = 1 }; | 85 | register struct __raw_tickets inc = { .tail = TICKET_LOCK_INC }; |
53 | 86 | ||
54 | inc = xadd(&lock->tickets, inc); | 87 | inc = xadd(&lock->tickets, inc); |
88 | if (likely(inc.head == inc.tail)) | ||
89 | goto out; | ||
55 | 90 | ||
91 | inc.tail &= ~TICKET_SLOWPATH_FLAG; | ||
56 | for (;;) { | 92 | for (;;) { |
57 | if (inc.head == inc.tail) | 93 | unsigned count = SPIN_THRESHOLD; |
58 | break; | 94 | |
59 | cpu_relax(); | 95 | do { |
60 | inc.head = ACCESS_ONCE(lock->tickets.head); | 96 | if (ACCESS_ONCE(lock->tickets.head) == inc.tail) |
97 | goto out; | ||
98 | cpu_relax(); | ||
99 | } while (--count); | ||
100 | __ticket_lock_spinning(lock, inc.tail); | ||
61 | } | 101 | } |
62 | barrier(); /* make sure nothing creeps before the lock is taken */ | 102 | out: barrier(); /* make sure nothing creeps before the lock is taken */ |
63 | } | 103 | } |
64 | 104 | ||
65 | static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) | 105 | static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) |
66 | { | 106 | { |
67 | arch_spinlock_t old, new; | 107 | arch_spinlock_t old, new; |
68 | 108 | ||
69 | old.tickets = ACCESS_ONCE(lock->tickets); | 109 | old.tickets = ACCESS_ONCE(lock->tickets); |
70 | if (old.tickets.head != old.tickets.tail) | 110 | if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG)) |
71 | return 0; | 111 | return 0; |
72 | 112 | ||
73 | new.head_tail = old.head_tail + (1 << TICKET_SHIFT); | 113 | new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT); |
74 | 114 | ||
75 | /* cmpxchg is a full barrier, so nothing can move before it */ | 115 | /* cmpxchg is a full barrier, so nothing can move before it */ |
76 | return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; | 116 | return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; |
77 | } | 117 | } |
78 | 118 | ||
79 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) | 119 | static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock, |
120 | arch_spinlock_t old) | ||
80 | { | 121 | { |
81 | __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX); | 122 | arch_spinlock_t new; |
123 | |||
124 | BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS); | ||
125 | |||
126 | /* Perform the unlock on the "before" copy */ | ||
127 | old.tickets.head += TICKET_LOCK_INC; | ||
128 | |||
129 | /* Clear the slowpath flag */ | ||
130 | new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT); | ||
131 | |||
132 | /* | ||
133 | * If the lock is uncontended, clear the flag - use cmpxchg in | ||
134 | * case it changes behind our back though. | ||
135 | */ | ||
136 | if (new.tickets.head != new.tickets.tail || | ||
137 | cmpxchg(&lock->head_tail, old.head_tail, | ||
138 | new.head_tail) != old.head_tail) { | ||
139 | /* | ||
140 | * Lock still has someone queued for it, so wake up an | ||
141 | * appropriate waiter. | ||
142 | */ | ||
143 | __ticket_unlock_kick(lock, old.tickets.head); | ||
144 | } | ||
82 | } | 145 | } |
83 | 146 | ||
84 | static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) | 147 | static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) |
85 | { | 148 | { |
86 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); | 149 | if (TICKET_SLOWPATH_FLAG && |
150 | static_key_false(¶virt_ticketlocks_enabled)) { | ||
151 | arch_spinlock_t prev; | ||
87 | 152 | ||
88 | return tmp.tail != tmp.head; | 153 | prev = *lock; |
89 | } | 154 | add_smp(&lock->tickets.head, TICKET_LOCK_INC); |
90 | 155 | ||
91 | static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) | 156 | /* add_smp() is a full mb() */ |
92 | { | ||
93 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); | ||
94 | 157 | ||
95 | return (__ticket_t)(tmp.tail - tmp.head) > 1; | 158 | if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG)) |
159 | __ticket_unlock_slowpath(lock, prev); | ||
160 | } else | ||
161 | __add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX); | ||
96 | } | 162 | } |
97 | 163 | ||
98 | #ifndef CONFIG_PARAVIRT_SPINLOCKS | ||
99 | |||
100 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) | 164 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) |
101 | { | 165 | { |
102 | return __ticket_spin_is_locked(lock); | 166 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); |
103 | } | ||
104 | |||
105 | static inline int arch_spin_is_contended(arch_spinlock_t *lock) | ||
106 | { | ||
107 | return __ticket_spin_is_contended(lock); | ||
108 | } | ||
109 | #define arch_spin_is_contended arch_spin_is_contended | ||
110 | 167 | ||
111 | static __always_inline void arch_spin_lock(arch_spinlock_t *lock) | 168 | return tmp.tail != tmp.head; |
112 | { | ||
113 | __ticket_spin_lock(lock); | ||
114 | } | 169 | } |
115 | 170 | ||
116 | static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) | 171 | static inline int arch_spin_is_contended(arch_spinlock_t *lock) |
117 | { | 172 | { |
118 | return __ticket_spin_trylock(lock); | 173 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); |
119 | } | ||
120 | 174 | ||
121 | static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) | 175 | return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; |
122 | { | ||
123 | __ticket_spin_unlock(lock); | ||
124 | } | 176 | } |
177 | #define arch_spin_is_contended arch_spin_is_contended | ||
125 | 178 | ||
126 | static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, | 179 | static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, |
127 | unsigned long flags) | 180 | unsigned long flags) |
@@ -129,8 +182,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, | |||
129 | arch_spin_lock(lock); | 182 | arch_spin_lock(lock); |
130 | } | 183 | } |
131 | 184 | ||
132 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
133 | |||
134 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) | 185 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) |
135 | { | 186 | { |
136 | while (arch_spin_is_locked(lock)) | 187 | while (arch_spin_is_locked(lock)) |
@@ -233,8 +284,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) | |||
233 | #define arch_read_relax(lock) cpu_relax() | 284 | #define arch_read_relax(lock) cpu_relax() |
234 | #define arch_write_relax(lock) cpu_relax() | 285 | #define arch_write_relax(lock) cpu_relax() |
235 | 286 | ||
236 | /* The {read|write|spin}_lock() on x86 are full memory barriers. */ | ||
237 | static inline void smp_mb__after_lock(void) { } | ||
238 | #define ARCH_HAS_SMP_MB_AFTER_LOCK | ||
239 | |||
240 | #endif /* _ASM_X86_SPINLOCK_H */ | 287 | #endif /* _ASM_X86_SPINLOCK_H */ |
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index ad0ad07fc006..4f1bea19945b 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h | |||
@@ -1,13 +1,17 @@ | |||
1 | #ifndef _ASM_X86_SPINLOCK_TYPES_H | 1 | #ifndef _ASM_X86_SPINLOCK_TYPES_H |
2 | #define _ASM_X86_SPINLOCK_TYPES_H | 2 | #define _ASM_X86_SPINLOCK_TYPES_H |
3 | 3 | ||
4 | #ifndef __LINUX_SPINLOCK_TYPES_H | ||
5 | # error "please don't include this file directly" | ||
6 | #endif | ||
7 | |||
8 | #include <linux/types.h> | 4 | #include <linux/types.h> |
9 | 5 | ||
10 | #if (CONFIG_NR_CPUS < 256) | 6 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
7 | #define __TICKET_LOCK_INC 2 | ||
8 | #define TICKET_SLOWPATH_FLAG ((__ticket_t)1) | ||
9 | #else | ||
10 | #define __TICKET_LOCK_INC 1 | ||
11 | #define TICKET_SLOWPATH_FLAG ((__ticket_t)0) | ||
12 | #endif | ||
13 | |||
14 | #if (CONFIG_NR_CPUS < (256 / __TICKET_LOCK_INC)) | ||
11 | typedef u8 __ticket_t; | 15 | typedef u8 __ticket_t; |
12 | typedef u16 __ticketpair_t; | 16 | typedef u16 __ticketpair_t; |
13 | #else | 17 | #else |
@@ -15,6 +19,8 @@ typedef u16 __ticket_t; | |||
15 | typedef u32 __ticketpair_t; | 19 | typedef u32 __ticketpair_t; |
16 | #endif | 20 | #endif |
17 | 21 | ||
22 | #define TICKET_LOCK_INC ((__ticket_t)__TICKET_LOCK_INC) | ||
23 | |||
18 | #define TICKET_SHIFT (sizeof(__ticket_t) * 8) | 24 | #define TICKET_SHIFT (sizeof(__ticket_t) * 8) |
19 | 25 | ||
20 | typedef struct arch_spinlock { | 26 | typedef struct arch_spinlock { |
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 4ec45b3abba1..d7f3b3b78ac3 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h | |||
@@ -2,8 +2,8 @@ | |||
2 | #define _ASM_X86_SWITCH_TO_H | 2 | #define _ASM_X86_SWITCH_TO_H |
3 | 3 | ||
4 | struct task_struct; /* one of the stranger aspects of C forward declarations */ | 4 | struct task_struct; /* one of the stranger aspects of C forward declarations */ |
5 | struct task_struct *__switch_to(struct task_struct *prev, | 5 | __visible struct task_struct *__switch_to(struct task_struct *prev, |
6 | struct task_struct *next); | 6 | struct task_struct *next); |
7 | struct tss_struct; | 7 | struct tss_struct; |
8 | void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | 8 | void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, |
9 | struct tss_struct *tss); | 9 | struct tss_struct *tss); |
diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h index 9d09b4073b60..05af3b31d522 100644 --- a/arch/x86/include/asm/sync_bitops.h +++ b/arch/x86/include/asm/sync_bitops.h | |||
@@ -26,9 +26,9 @@ | |||
26 | * Note that @nr may be almost arbitrarily large; this function is not | 26 | * Note that @nr may be almost arbitrarily large; this function is not |
27 | * restricted to acting on a single-word quantity. | 27 | * restricted to acting on a single-word quantity. |
28 | */ | 28 | */ |
29 | static inline void sync_set_bit(int nr, volatile unsigned long *addr) | 29 | static inline void sync_set_bit(long nr, volatile unsigned long *addr) |
30 | { | 30 | { |
31 | asm volatile("lock; btsl %1,%0" | 31 | asm volatile("lock; bts %1,%0" |
32 | : "+m" (ADDR) | 32 | : "+m" (ADDR) |
33 | : "Ir" (nr) | 33 | : "Ir" (nr) |
34 | : "memory"); | 34 | : "memory"); |
@@ -44,9 +44,9 @@ static inline void sync_set_bit(int nr, volatile unsigned long *addr) | |||
44 | * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() | 44 | * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() |
45 | * in order to ensure changes are visible on other processors. | 45 | * in order to ensure changes are visible on other processors. |
46 | */ | 46 | */ |
47 | static inline void sync_clear_bit(int nr, volatile unsigned long *addr) | 47 | static inline void sync_clear_bit(long nr, volatile unsigned long *addr) |
48 | { | 48 | { |
49 | asm volatile("lock; btrl %1,%0" | 49 | asm volatile("lock; btr %1,%0" |
50 | : "+m" (ADDR) | 50 | : "+m" (ADDR) |
51 | : "Ir" (nr) | 51 | : "Ir" (nr) |
52 | : "memory"); | 52 | : "memory"); |
@@ -61,9 +61,9 @@ static inline void sync_clear_bit(int nr, volatile unsigned long *addr) | |||
61 | * Note that @nr may be almost arbitrarily large; this function is not | 61 | * Note that @nr may be almost arbitrarily large; this function is not |
62 | * restricted to acting on a single-word quantity. | 62 | * restricted to acting on a single-word quantity. |
63 | */ | 63 | */ |
64 | static inline void sync_change_bit(int nr, volatile unsigned long *addr) | 64 | static inline void sync_change_bit(long nr, volatile unsigned long *addr) |
65 | { | 65 | { |
66 | asm volatile("lock; btcl %1,%0" | 66 | asm volatile("lock; btc %1,%0" |
67 | : "+m" (ADDR) | 67 | : "+m" (ADDR) |
68 | : "Ir" (nr) | 68 | : "Ir" (nr) |
69 | : "memory"); | 69 | : "memory"); |
@@ -77,11 +77,11 @@ static inline void sync_change_bit(int nr, volatile unsigned long *addr) | |||
77 | * This operation is atomic and cannot be reordered. | 77 | * This operation is atomic and cannot be reordered. |
78 | * It also implies a memory barrier. | 78 | * It also implies a memory barrier. |
79 | */ | 79 | */ |
80 | static inline int sync_test_and_set_bit(int nr, volatile unsigned long *addr) | 80 | static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr) |
81 | { | 81 | { |
82 | int oldbit; | 82 | int oldbit; |
83 | 83 | ||
84 | asm volatile("lock; btsl %2,%1\n\tsbbl %0,%0" | 84 | asm volatile("lock; bts %2,%1\n\tsbbl %0,%0" |
85 | : "=r" (oldbit), "+m" (ADDR) | 85 | : "=r" (oldbit), "+m" (ADDR) |
86 | : "Ir" (nr) : "memory"); | 86 | : "Ir" (nr) : "memory"); |
87 | return oldbit; | 87 | return oldbit; |
@@ -95,11 +95,11 @@ static inline int sync_test_and_set_bit(int nr, volatile unsigned long *addr) | |||
95 | * This operation is atomic and cannot be reordered. | 95 | * This operation is atomic and cannot be reordered. |
96 | * It also implies a memory barrier. | 96 | * It also implies a memory barrier. |
97 | */ | 97 | */ |
98 | static inline int sync_test_and_clear_bit(int nr, volatile unsigned long *addr) | 98 | static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr) |
99 | { | 99 | { |
100 | int oldbit; | 100 | int oldbit; |
101 | 101 | ||
102 | asm volatile("lock; btrl %2,%1\n\tsbbl %0,%0" | 102 | asm volatile("lock; btr %2,%1\n\tsbbl %0,%0" |
103 | : "=r" (oldbit), "+m" (ADDR) | 103 | : "=r" (oldbit), "+m" (ADDR) |
104 | : "Ir" (nr) : "memory"); | 104 | : "Ir" (nr) : "memory"); |
105 | return oldbit; | 105 | return oldbit; |
@@ -113,11 +113,11 @@ static inline int sync_test_and_clear_bit(int nr, volatile unsigned long *addr) | |||
113 | * This operation is atomic and cannot be reordered. | 113 | * This operation is atomic and cannot be reordered. |
114 | * It also implies a memory barrier. | 114 | * It also implies a memory barrier. |
115 | */ | 115 | */ |
116 | static inline int sync_test_and_change_bit(int nr, volatile unsigned long *addr) | 116 | static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr) |
117 | { | 117 | { |
118 | int oldbit; | 118 | int oldbit; |
119 | 119 | ||
120 | asm volatile("lock; btcl %2,%1\n\tsbbl %0,%0" | 120 | asm volatile("lock; btc %2,%1\n\tsbbl %0,%0" |
121 | : "=r" (oldbit), "+m" (ADDR) | 121 | : "=r" (oldbit), "+m" (ADDR) |
122 | : "Ir" (nr) : "memory"); | 122 | : "Ir" (nr) : "memory"); |
123 | return oldbit; | 123 | return oldbit; |
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 2e188d68397c..aea284b41312 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h | |||
@@ -20,7 +20,8 @@ | |||
20 | #include <asm/thread_info.h> /* for TS_COMPAT */ | 20 | #include <asm/thread_info.h> /* for TS_COMPAT */ |
21 | #include <asm/unistd.h> | 21 | #include <asm/unistd.h> |
22 | 22 | ||
23 | extern const unsigned long sys_call_table[]; | 23 | typedef void (*sys_call_ptr_t)(void); |
24 | extern const sys_call_ptr_t sys_call_table[]; | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * Only the low 32 bits of orig_ax are meaningful, so we return int. | 27 | * Only the low 32 bits of orig_ax are meaningful, so we return int. |
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 2917a6452c49..592a6a672e07 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h | |||
@@ -24,7 +24,7 @@ asmlinkage long sys_iopl(unsigned int); | |||
24 | asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); | 24 | asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); |
25 | 25 | ||
26 | /* kernel/signal.c */ | 26 | /* kernel/signal.c */ |
27 | long sys_rt_sigreturn(void); | 27 | asmlinkage long sys_rt_sigreturn(void); |
28 | 28 | ||
29 | /* kernel/tls.c */ | 29 | /* kernel/tls.c */ |
30 | asmlinkage long sys_set_thread_area(struct user_desc __user *); | 30 | asmlinkage long sys_set_thread_area(struct user_desc __user *); |
@@ -34,7 +34,7 @@ asmlinkage long sys_get_thread_area(struct user_desc __user *); | |||
34 | #ifdef CONFIG_X86_32 | 34 | #ifdef CONFIG_X86_32 |
35 | 35 | ||
36 | /* kernel/signal.c */ | 36 | /* kernel/signal.c */ |
37 | unsigned long sys_sigreturn(void); | 37 | asmlinkage unsigned long sys_sigreturn(void); |
38 | 38 | ||
39 | /* kernel/vm86_32.c */ | 39 | /* kernel/vm86_32.c */ |
40 | asmlinkage long sys_vm86old(struct vm86_struct __user *); | 40 | asmlinkage long sys_vm86old(struct vm86_struct __user *); |
@@ -44,7 +44,7 @@ asmlinkage long sys_vm86(unsigned long, unsigned long); | |||
44 | 44 | ||
45 | /* X86_64 only */ | 45 | /* X86_64 only */ |
46 | /* kernel/process_64.c */ | 46 | /* kernel/process_64.c */ |
47 | long sys_arch_prctl(int, unsigned long); | 47 | asmlinkage long sys_arch_prctl(int, unsigned long); |
48 | 48 | ||
49 | /* kernel/sys_x86_64.c */ | 49 | /* kernel/sys_x86_64.c */ |
50 | asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, | 50 | asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, |
diff --git a/arch/x86/include/asm/sysfb.h b/arch/x86/include/asm/sysfb.h new file mode 100644 index 000000000000..2aeb3e25579c --- /dev/null +++ b/arch/x86/include/asm/sysfb.h | |||
@@ -0,0 +1,98 @@ | |||
1 | #ifndef _ARCH_X86_KERNEL_SYSFB_H | ||
2 | #define _ARCH_X86_KERNEL_SYSFB_H | ||
3 | |||
4 | /* | ||
5 | * Generic System Framebuffers on x86 | ||
6 | * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms of the GNU General Public License as published by the Free | ||
10 | * Software Foundation; either version 2 of the License, or (at your option) | ||
11 | * any later version. | ||
12 | */ | ||
13 | |||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/platform_data/simplefb.h> | ||
16 | #include <linux/screen_info.h> | ||
17 | |||
18 | enum { | ||
19 | M_I17, /* 17-Inch iMac */ | ||
20 | M_I20, /* 20-Inch iMac */ | ||
21 | M_I20_SR, /* 20-Inch iMac (Santa Rosa) */ | ||
22 | M_I24, /* 24-Inch iMac */ | ||
23 | M_I24_8_1, /* 24-Inch iMac, 8,1th gen */ | ||
24 | M_I24_10_1, /* 24-Inch iMac, 10,1th gen */ | ||
25 | M_I27_11_1, /* 27-Inch iMac, 11,1th gen */ | ||
26 | M_MINI, /* Mac Mini */ | ||
27 | M_MINI_3_1, /* Mac Mini, 3,1th gen */ | ||
28 | M_MINI_4_1, /* Mac Mini, 4,1th gen */ | ||
29 | M_MB, /* MacBook */ | ||
30 | M_MB_2, /* MacBook, 2nd rev. */ | ||
31 | M_MB_3, /* MacBook, 3rd rev. */ | ||
32 | M_MB_5_1, /* MacBook, 5th rev. */ | ||
33 | M_MB_6_1, /* MacBook, 6th rev. */ | ||
34 | M_MB_7_1, /* MacBook, 7th rev. */ | ||
35 | M_MB_SR, /* MacBook, 2nd gen, (Santa Rosa) */ | ||
36 | M_MBA, /* MacBook Air */ | ||
37 | M_MBA_3, /* Macbook Air, 3rd rev */ | ||
38 | M_MBP, /* MacBook Pro */ | ||
39 | M_MBP_2, /* MacBook Pro 2nd gen */ | ||
40 | M_MBP_2_2, /* MacBook Pro 2,2nd gen */ | ||
41 | M_MBP_SR, /* MacBook Pro (Santa Rosa) */ | ||
42 | M_MBP_4, /* MacBook Pro, 4th gen */ | ||
43 | M_MBP_5_1, /* MacBook Pro, 5,1th gen */ | ||
44 | M_MBP_5_2, /* MacBook Pro, 5,2th gen */ | ||
45 | M_MBP_5_3, /* MacBook Pro, 5,3rd gen */ | ||
46 | M_MBP_6_1, /* MacBook Pro, 6,1th gen */ | ||
47 | M_MBP_6_2, /* MacBook Pro, 6,2th gen */ | ||
48 | M_MBP_7_1, /* MacBook Pro, 7,1th gen */ | ||
49 | M_MBP_8_2, /* MacBook Pro, 8,2nd gen */ | ||
50 | M_UNKNOWN /* placeholder */ | ||
51 | }; | ||
52 | |||
53 | struct efifb_dmi_info { | ||
54 | char *optname; | ||
55 | unsigned long base; | ||
56 | int stride; | ||
57 | int width; | ||
58 | int height; | ||
59 | int flags; | ||
60 | }; | ||
61 | |||
62 | #ifdef CONFIG_EFI | ||
63 | |||
64 | extern struct efifb_dmi_info efifb_dmi_list[]; | ||
65 | void sysfb_apply_efi_quirks(void); | ||
66 | |||
67 | #else /* CONFIG_EFI */ | ||
68 | |||
69 | static inline void sysfb_apply_efi_quirks(void) | ||
70 | { | ||
71 | } | ||
72 | |||
73 | #endif /* CONFIG_EFI */ | ||
74 | |||
75 | #ifdef CONFIG_X86_SYSFB | ||
76 | |||
77 | bool parse_mode(const struct screen_info *si, | ||
78 | struct simplefb_platform_data *mode); | ||
79 | int create_simplefb(const struct screen_info *si, | ||
80 | const struct simplefb_platform_data *mode); | ||
81 | |||
82 | #else /* CONFIG_X86_SYSFB */ | ||
83 | |||
84 | static inline bool parse_mode(const struct screen_info *si, | ||
85 | struct simplefb_platform_data *mode) | ||
86 | { | ||
87 | return false; | ||
88 | } | ||
89 | |||
90 | static inline int create_simplefb(const struct screen_info *si, | ||
91 | const struct simplefb_platform_data *mode) | ||
92 | { | ||
93 | return -EINVAL; | ||
94 | } | ||
95 | |||
96 | #endif /* CONFIG_X86_SYSFB */ | ||
97 | |||
98 | #endif /* _ARCH_X86_KERNEL_SYSFB_H */ | ||
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 095b21507b6a..d35f24e231cd 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -124,9 +124,6 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); | |||
124 | #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) | 124 | #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) |
125 | #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) | 125 | #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) |
126 | #define topology_thread_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) | 126 | #define topology_thread_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) |
127 | |||
128 | /* indicates that pointers to the topology cpumask_t maps are valid */ | ||
129 | #define arch_provides_topology_pointers yes | ||
130 | #endif | 127 | #endif |
131 | 128 | ||
132 | static inline void arch_fix_phys_package_id(int num, u32 slot) | 129 | static inline void arch_fix_phys_package_id(int num, u32 slot) |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 88eae2aec619..7036cb60cd87 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -6,11 +6,7 @@ | |||
6 | #include <asm/debugreg.h> | 6 | #include <asm/debugreg.h> |
7 | #include <asm/siginfo.h> /* TRAP_TRACE, ... */ | 7 | #include <asm/siginfo.h> /* TRAP_TRACE, ... */ |
8 | 8 | ||
9 | #ifdef CONFIG_X86_32 | 9 | #define dotraplinkage __visible |
10 | #define dotraplinkage | ||
11 | #else | ||
12 | #define dotraplinkage asmlinkage | ||
13 | #endif | ||
14 | 10 | ||
15 | asmlinkage void divide_error(void); | 11 | asmlinkage void divide_error(void); |
16 | asmlinkage void debug(void); | 12 | asmlinkage void debug(void); |
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index c91e8b9d588b..235be70d5bb4 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -49,6 +49,7 @@ extern void tsc_init(void); | |||
49 | extern void mark_tsc_unstable(char *reason); | 49 | extern void mark_tsc_unstable(char *reason); |
50 | extern int unsynchronized_tsc(void); | 50 | extern int unsynchronized_tsc(void); |
51 | extern int check_tsc_unstable(void); | 51 | extern int check_tsc_unstable(void); |
52 | extern int check_tsc_disabled(void); | ||
52 | extern unsigned long native_calibrate_tsc(void); | 53 | extern unsigned long native_calibrate_tsc(void); |
53 | 54 | ||
54 | extern int tsc_clocksource_reliable; | 55 | extern int tsc_clocksource_reliable; |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 5ee26875baea..5838fa911aa0 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -153,16 +153,19 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) | |||
153 | * Careful: we have to cast the result to the type of the pointer | 153 | * Careful: we have to cast the result to the type of the pointer |
154 | * for sign reasons. | 154 | * for sign reasons. |
155 | * | 155 | * |
156 | * The use of %edx as the register specifier is a bit of a | 156 | * The use of _ASM_DX as the register specifier is a bit of a |
157 | * simplification, as gcc only cares about it as the starting point | 157 | * simplification, as gcc only cares about it as the starting point |
158 | * and not size: for a 64-bit value it will use %ecx:%edx on 32 bits | 158 | * and not size: for a 64-bit value it will use %ecx:%edx on 32 bits |
159 | * (%ecx being the next register in gcc's x86 register sequence), and | 159 | * (%ecx being the next register in gcc's x86 register sequence), and |
160 | * %rdx on 64 bits. | 160 | * %rdx on 64 bits. |
161 | * | ||
162 | * Clang/LLVM cares about the size of the register, but still wants | ||
163 | * the base register for something that ends up being a pair. | ||
161 | */ | 164 | */ |
162 | #define get_user(x, ptr) \ | 165 | #define get_user(x, ptr) \ |
163 | ({ \ | 166 | ({ \ |
164 | int __ret_gu; \ | 167 | int __ret_gu; \ |
165 | register __inttype(*(ptr)) __val_gu asm("%edx"); \ | 168 | register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ |
166 | __chk_user_ptr(ptr); \ | 169 | __chk_user_ptr(ptr); \ |
167 | might_fault(); \ | 170 | might_fault(); \ |
168 | asm volatile("call __get_user_%P3" \ | 171 | asm volatile("call __get_user_%P3" \ |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index f3e01a2cbaa1..966502d4682e 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -387,6 +387,7 @@ enum vmcs_field { | |||
387 | #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 | 387 | #define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0 |
388 | #define VMX_EPT_EXTENT_CONTEXT 1 | 388 | #define VMX_EPT_EXTENT_CONTEXT 1 |
389 | #define VMX_EPT_EXTENT_GLOBAL 2 | 389 | #define VMX_EPT_EXTENT_GLOBAL 2 |
390 | #define VMX_EPT_EXTENT_SHIFT 24 | ||
390 | 391 | ||
391 | #define VMX_EPT_EXECUTE_ONLY_BIT (1ull) | 392 | #define VMX_EPT_EXECUTE_ONLY_BIT (1ull) |
392 | #define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) | 393 | #define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) |
@@ -394,6 +395,7 @@ enum vmcs_field { | |||
394 | #define VMX_EPTP_WB_BIT (1ull << 14) | 395 | #define VMX_EPTP_WB_BIT (1ull << 14) |
395 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) | 396 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) |
396 | #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) | 397 | #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) |
398 | #define VMX_EPT_INVEPT_BIT (1ull << 20) | ||
397 | #define VMX_EPT_AD_BIT (1ull << 21) | 399 | #define VMX_EPT_AD_BIT (1ull << 21) |
398 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) | 400 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) |
399 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) | 401 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) |
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index de656ac2af41..d76ac40da206 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h | |||
@@ -35,7 +35,7 @@ | |||
35 | 35 | ||
36 | #define DEFINE_VVAR(type, name) \ | 36 | #define DEFINE_VVAR(type, name) \ |
37 | type name \ | 37 | type name \ |
38 | __attribute__((section(".vvar_" #name), aligned(16))) | 38 | __attribute__((section(".vvar_" #name), aligned(16))) __visible |
39 | 39 | ||
40 | #define VVAR(name) (*vvaraddr_ ## name) | 40 | #define VVAR(name) (*vvaraddr_ ## name) |
41 | 41 | ||
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index ca842f2769ef..608a79d5a466 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h | |||
@@ -7,6 +7,7 @@ enum ipi_vector { | |||
7 | XEN_CALL_FUNCTION_SINGLE_VECTOR, | 7 | XEN_CALL_FUNCTION_SINGLE_VECTOR, |
8 | XEN_SPIN_UNLOCK_VECTOR, | 8 | XEN_SPIN_UNLOCK_VECTOR, |
9 | XEN_IRQ_WORK_VECTOR, | 9 | XEN_IRQ_WORK_VECTOR, |
10 | XEN_NMI_VECTOR, | ||
10 | 11 | ||
11 | XEN_NR_IPIS, | 12 | XEN_NR_IPIS, |
12 | }; | 13 | }; |
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 125f344f06a9..d866959e5685 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h | |||
@@ -40,21 +40,7 @@ extern struct start_info *xen_start_info; | |||
40 | 40 | ||
41 | static inline uint32_t xen_cpuid_base(void) | 41 | static inline uint32_t xen_cpuid_base(void) |
42 | { | 42 | { |
43 | uint32_t base, eax, ebx, ecx, edx; | 43 | return hypervisor_cpuid_base("XenVMMXenVMM", 2); |
44 | char signature[13]; | ||
45 | |||
46 | for (base = 0x40000000; base < 0x40010000; base += 0x100) { | ||
47 | cpuid(base, &eax, &ebx, &ecx, &edx); | ||
48 | *(uint32_t *)(signature + 0) = ebx; | ||
49 | *(uint32_t *)(signature + 4) = ecx; | ||
50 | *(uint32_t *)(signature + 8) = edx; | ||
51 | signature[12] = 0; | ||
52 | |||
53 | if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2)) | ||
54 | return base; | ||
55 | } | ||
56 | |||
57 | return 0; | ||
58 | } | 44 | } |
59 | 45 | ||
60 | #ifdef CONFIG_XEN | 46 | #ifdef CONFIG_XEN |
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 06fdbd987e97..94dc8ca434e0 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h | |||
@@ -23,6 +23,7 @@ | |||
23 | #define KVM_FEATURE_ASYNC_PF 4 | 23 | #define KVM_FEATURE_ASYNC_PF 4 |
24 | #define KVM_FEATURE_STEAL_TIME 5 | 24 | #define KVM_FEATURE_STEAL_TIME 5 |
25 | #define KVM_FEATURE_PV_EOI 6 | 25 | #define KVM_FEATURE_PV_EOI 6 |
26 | #define KVM_FEATURE_PV_UNHALT 7 | ||
26 | 27 | ||
27 | /* The last 8 bits are used to indicate how to interpret the flags field | 28 | /* The last 8 bits are used to indicate how to interpret the flags field |
28 | * in pvclock structure. If no bits are set, all flags are ignored. | 29 | * in pvclock structure. If no bits are set, all flags are ignored. |
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index d651082c7cf7..0e79420376eb 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h | |||
@@ -65,6 +65,7 @@ | |||
65 | #define EXIT_REASON_EOI_INDUCED 45 | 65 | #define EXIT_REASON_EOI_INDUCED 45 |
66 | #define EXIT_REASON_EPT_VIOLATION 48 | 66 | #define EXIT_REASON_EPT_VIOLATION 48 |
67 | #define EXIT_REASON_EPT_MISCONFIG 49 | 67 | #define EXIT_REASON_EPT_MISCONFIG 49 |
68 | #define EXIT_REASON_INVEPT 50 | ||
68 | #define EXIT_REASON_PREEMPTION_TIMER 52 | 69 | #define EXIT_REASON_PREEMPTION_TIMER 52 |
69 | #define EXIT_REASON_WBINVD 54 | 70 | #define EXIT_REASON_WBINVD 54 |
70 | #define EXIT_REASON_XSETBV 55 | 71 | #define EXIT_REASON_XSETBV 55 |
@@ -106,12 +107,13 @@ | |||
106 | { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ | 107 | { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ |
107 | { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ | 108 | { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ |
108 | { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ | 109 | { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ |
110 | { EXIT_REASON_INVEPT, "INVEPT" }, \ | ||
111 | { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" }, \ | ||
109 | { EXIT_REASON_WBINVD, "WBINVD" }, \ | 112 | { EXIT_REASON_WBINVD, "WBINVD" }, \ |
110 | { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ | 113 | { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ |
111 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ | 114 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ |
112 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ | 115 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ |
113 | { EXIT_REASON_INVD, "INVD" }, \ | 116 | { EXIT_REASON_INVD, "INVD" }, \ |
114 | { EXIT_REASON_INVPCID, "INVPCID" }, \ | 117 | { EXIT_REASON_INVPCID, "INVPCID" } |
115 | { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" } | ||
116 | 118 | ||
117 | #endif /* _UAPIVMX_H */ | 119 | #endif /* _UAPIVMX_H */ |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 88d99ea77723..a5408b965c9d 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -103,6 +103,9 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o | |||
103 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | 103 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o |
104 | obj-$(CONFIG_OF) += devicetree.o | 104 | obj-$(CONFIG_OF) += devicetree.o |
105 | obj-$(CONFIG_UPROBES) += uprobes.o | 105 | obj-$(CONFIG_UPROBES) += uprobes.o |
106 | obj-y += sysfb.o | ||
107 | obj-$(CONFIG_X86_SYSFB) += sysfb_simplefb.o | ||
108 | obj-$(CONFIG_EFI) += sysfb_efi.o | ||
106 | 109 | ||
107 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o | 110 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o |
108 | obj-$(CONFIG_TRACING) += tracepoint.o | 111 | obj-$(CONFIG_TRACING) += tracepoint.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 2627a81253ee..40c76604199f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -67,6 +67,7 @@ EXPORT_SYMBOL(acpi_pci_disabled); | |||
67 | int acpi_lapic; | 67 | int acpi_lapic; |
68 | int acpi_ioapic; | 68 | int acpi_ioapic; |
69 | int acpi_strict; | 69 | int acpi_strict; |
70 | int acpi_disable_cmcff; | ||
70 | 71 | ||
71 | u8 acpi_sci_flags __initdata; | 72 | u8 acpi_sci_flags __initdata; |
72 | int acpi_sci_override_gsi __initdata; | 73 | int acpi_sci_override_gsi __initdata; |
@@ -141,16 +142,8 @@ static u32 irq_to_gsi(int irq) | |||
141 | } | 142 | } |
142 | 143 | ||
143 | /* | 144 | /* |
144 | * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, | 145 | * This is just a simple wrapper around early_ioremap(), |
145 | * to map the target physical address. The problem is that set_fixmap() | 146 | * with sanity checks for phys == 0 and size == 0. |
146 | * provides a single page, and it is possible that the page is not | ||
147 | * sufficient. | ||
148 | * By using this area, we can map up to MAX_IO_APICS pages temporarily, | ||
149 | * i.e. until the next __va_range() call. | ||
150 | * | ||
151 | * Important Safety Note: The fixed I/O APIC page numbers are *subtracted* | ||
152 | * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and | ||
153 | * count idx down while incrementing the phys address. | ||
154 | */ | 147 | */ |
155 | char *__init __acpi_map_table(unsigned long phys, unsigned long size) | 148 | char *__init __acpi_map_table(unsigned long phys, unsigned long size) |
156 | { | 149 | { |
@@ -160,6 +153,7 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) | |||
160 | 153 | ||
161 | return early_ioremap(phys, size); | 154 | return early_ioremap(phys, size); |
162 | } | 155 | } |
156 | |||
163 | void __init __acpi_unmap_table(char *map, unsigned long size) | 157 | void __init __acpi_unmap_table(char *map, unsigned long size) |
164 | { | 158 | { |
165 | if (!map || !size) | 159 | if (!map || !size) |
@@ -199,7 +193,7 @@ static void acpi_register_lapic(int id, u8 enabled) | |||
199 | { | 193 | { |
200 | unsigned int ver = 0; | 194 | unsigned int ver = 0; |
201 | 195 | ||
202 | if (id >= (MAX_LOCAL_APIC-1)) { | 196 | if (id >= MAX_LOCAL_APIC) { |
203 | printk(KERN_INFO PREFIX "skipped apicid that is too big\n"); | 197 | printk(KERN_INFO PREFIX "skipped apicid that is too big\n"); |
204 | return; | 198 | return; |
205 | } | 199 | } |
@@ -1120,6 +1114,7 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
1120 | int ioapic; | 1114 | int ioapic; |
1121 | int ioapic_pin; | 1115 | int ioapic_pin; |
1122 | struct io_apic_irq_attr irq_attr; | 1116 | struct io_apic_irq_attr irq_attr; |
1117 | int ret; | ||
1123 | 1118 | ||
1124 | if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) | 1119 | if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) |
1125 | return gsi; | 1120 | return gsi; |
@@ -1149,7 +1144,9 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
1149 | set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, | 1144 | set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, |
1150 | trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, | 1145 | trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, |
1151 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); | 1146 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); |
1152 | io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr); | 1147 | ret = io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr); |
1148 | if (ret < 0) | ||
1149 | gsi = INT_MIN; | ||
1153 | 1150 | ||
1154 | return gsi; | 1151 | return gsi; |
1155 | } | 1152 | } |
@@ -1626,6 +1623,10 @@ static int __init parse_acpi(char *arg) | |||
1626 | /* "acpi=copy_dsdt" copys DSDT */ | 1623 | /* "acpi=copy_dsdt" copys DSDT */ |
1627 | else if (strcmp(arg, "copy_dsdt") == 0) { | 1624 | else if (strcmp(arg, "copy_dsdt") == 0) { |
1628 | acpi_gbl_copy_dsdt_locally = 1; | 1625 | acpi_gbl_copy_dsdt_locally = 1; |
1626 | } | ||
1627 | /* "acpi=nocmcff" disables FF mode for corrected errors */ | ||
1628 | else if (strcmp(arg, "nocmcff") == 0) { | ||
1629 | acpi_disable_cmcff = 1; | ||
1629 | } else { | 1630 | } else { |
1630 | /* Core will printk when we return error. */ | 1631 | /* Core will printk when we return error. */ |
1631 | return -EINVAL; | 1632 | return -EINVAL; |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c15cf9a25e27..15e8563e5c24 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/memory.h> | 11 | #include <linux/memory.h> |
12 | #include <linux/stop_machine.h> | 12 | #include <linux/stop_machine.h> |
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/kdebug.h> | ||
14 | #include <asm/alternative.h> | 15 | #include <asm/alternative.h> |
15 | #include <asm/sections.h> | 16 | #include <asm/sections.h> |
16 | #include <asm/pgtable.h> | 17 | #include <asm/pgtable.h> |
@@ -596,97 +597,93 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) | |||
596 | return addr; | 597 | return addr; |
597 | } | 598 | } |
598 | 599 | ||
599 | /* | 600 | static void do_sync_core(void *info) |
600 | * Cross-modifying kernel text with stop_machine(). | 601 | { |
601 | * This code originally comes from immediate value. | 602 | sync_core(); |
602 | */ | 603 | } |
603 | static atomic_t stop_machine_first; | ||
604 | static int wrote_text; | ||
605 | 604 | ||
606 | struct text_poke_params { | 605 | static bool bp_patching_in_progress; |
607 | struct text_poke_param *params; | 606 | static void *bp_int3_handler, *bp_int3_addr; |
608 | int nparams; | ||
609 | }; | ||
610 | 607 | ||
611 | static int __kprobes stop_machine_text_poke(void *data) | 608 | int poke_int3_handler(struct pt_regs *regs) |
612 | { | 609 | { |
613 | struct text_poke_params *tpp = data; | 610 | /* bp_patching_in_progress */ |
614 | struct text_poke_param *p; | 611 | smp_rmb(); |
615 | int i; | ||
616 | 612 | ||
617 | if (atomic_xchg(&stop_machine_first, 0)) { | 613 | if (likely(!bp_patching_in_progress)) |
618 | for (i = 0; i < tpp->nparams; i++) { | 614 | return 0; |
619 | p = &tpp->params[i]; | ||
620 | text_poke(p->addr, p->opcode, p->len); | ||
621 | } | ||
622 | smp_wmb(); /* Make sure other cpus see that this has run */ | ||
623 | wrote_text = 1; | ||
624 | } else { | ||
625 | while (!wrote_text) | ||
626 | cpu_relax(); | ||
627 | smp_mb(); /* Load wrote_text before following execution */ | ||
628 | } | ||
629 | 615 | ||
630 | for (i = 0; i < tpp->nparams; i++) { | 616 | if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr) |
631 | p = &tpp->params[i]; | 617 | return 0; |
632 | flush_icache_range((unsigned long)p->addr, | 618 | |
633 | (unsigned long)p->addr + p->len); | 619 | /* set up the specified breakpoint handler */ |
634 | } | 620 | regs->ip = (unsigned long) bp_int3_handler; |
635 | /* | 621 | |
636 | * Intel Archiecture Software Developer's Manual section 7.1.3 specifies | 622 | return 1; |
637 | * that a core serializing instruction such as "cpuid" should be | ||
638 | * executed on _each_ core before the new instruction is made visible. | ||
639 | */ | ||
640 | sync_core(); | ||
641 | return 0; | ||
642 | } | ||
643 | 623 | ||
644 | /** | ||
645 | * text_poke_smp - Update instructions on a live kernel on SMP | ||
646 | * @addr: address to modify | ||
647 | * @opcode: source of the copy | ||
648 | * @len: length to copy | ||
649 | * | ||
650 | * Modify multi-byte instruction by using stop_machine() on SMP. This allows | ||
651 | * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying | ||
652 | * should be allowed, since stop_machine() does _not_ protect code against | ||
653 | * NMI and MCE. | ||
654 | * | ||
655 | * Note: Must be called under get_online_cpus() and text_mutex. | ||
656 | */ | ||
657 | void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) | ||
658 | { | ||
659 | struct text_poke_params tpp; | ||
660 | struct text_poke_param p; | ||
661 | |||
662 | p.addr = addr; | ||
663 | p.opcode = opcode; | ||
664 | p.len = len; | ||
665 | tpp.params = &p; | ||
666 | tpp.nparams = 1; | ||
667 | atomic_set(&stop_machine_first, 1); | ||
668 | wrote_text = 0; | ||
669 | /* Use __stop_machine() because the caller already got online_cpus. */ | ||
670 | __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask); | ||
671 | return addr; | ||
672 | } | 624 | } |
673 | 625 | ||
674 | /** | 626 | /** |
675 | * text_poke_smp_batch - Update instructions on a live kernel on SMP | 627 | * text_poke_bp() -- update instructions on live kernel on SMP |
676 | * @params: an array of text_poke parameters | 628 | * @addr: address to patch |
677 | * @n: the number of elements in params. | 629 | * @opcode: opcode of new instruction |
630 | * @len: length to copy | ||
631 | * @handler: address to jump to when the temporary breakpoint is hit | ||
678 | * | 632 | * |
679 | * Modify multi-byte instruction by using stop_machine() on SMP. Since the | 633 | * Modify multi-byte instruction by using int3 breakpoint on SMP. |
680 | * stop_machine() is heavy task, it is better to aggregate text_poke requests | 634 | * We completely avoid stop_machine() here, and achieve the |
681 | * and do it once if possible. | 635 | * synchronization using int3 breakpoint. |
682 | * | 636 | * |
683 | * Note: Must be called under get_online_cpus() and text_mutex. | 637 | * The way it is done: |
638 | * - add a int3 trap to the address that will be patched | ||
639 | * - sync cores | ||
640 | * - update all but the first byte of the patched range | ||
641 | * - sync cores | ||
642 | * - replace the first byte (int3) by the first byte of | ||
643 | * replacing opcode | ||
644 | * - sync cores | ||
645 | * | ||
646 | * Note: must be called under text_mutex. | ||
684 | */ | 647 | */ |
685 | void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n) | 648 | void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) |
686 | { | 649 | { |
687 | struct text_poke_params tpp = {.params = params, .nparams = n}; | 650 | unsigned char int3 = 0xcc; |
651 | |||
652 | bp_int3_handler = handler; | ||
653 | bp_int3_addr = (u8 *)addr + sizeof(int3); | ||
654 | bp_patching_in_progress = true; | ||
655 | /* | ||
656 | * Corresponding read barrier in int3 notifier for | ||
657 | * making sure the in_progress flags is correctly ordered wrt. | ||
658 | * patching | ||
659 | */ | ||
660 | smp_wmb(); | ||
661 | |||
662 | text_poke(addr, &int3, sizeof(int3)); | ||
688 | 663 | ||
689 | atomic_set(&stop_machine_first, 1); | 664 | on_each_cpu(do_sync_core, NULL, 1); |
690 | wrote_text = 0; | 665 | |
691 | __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask); | 666 | if (len - sizeof(int3) > 0) { |
667 | /* patch all but the first byte */ | ||
668 | text_poke((char *)addr + sizeof(int3), | ||
669 | (const char *) opcode + sizeof(int3), | ||
670 | len - sizeof(int3)); | ||
671 | /* | ||
672 | * According to Intel, this core syncing is very likely | ||
673 | * not necessary and we'd be safe even without it. But | ||
674 | * better safe than sorry (plus there's not only Intel). | ||
675 | */ | ||
676 | on_each_cpu(do_sync_core, NULL, 1); | ||
677 | } | ||
678 | |||
679 | /* patch the first byte */ | ||
680 | text_poke(addr, opcode, sizeof(int3)); | ||
681 | |||
682 | on_each_cpu(do_sync_core, NULL, 1); | ||
683 | |||
684 | bp_patching_in_progress = false; | ||
685 | smp_wmb(); | ||
686 | |||
687 | return addr; | ||
692 | } | 688 | } |
689 | |||
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 3048ded1b598..59554dca96ec 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c | |||
@@ -20,6 +20,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { | |||
20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, | 20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, |
21 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, | 21 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, |
22 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, | 22 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, |
23 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) }, | ||
23 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, | 24 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, |
24 | {} | 25 | {} |
25 | }; | 26 | }; |
@@ -27,6 +28,7 @@ EXPORT_SYMBOL(amd_nb_misc_ids); | |||
27 | 28 | ||
28 | static const struct pci_device_id amd_nb_link_ids[] = { | 29 | static const struct pci_device_id amd_nb_link_ids[] = { |
29 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, | 30 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, |
31 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) }, | ||
30 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, | 32 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, |
31 | {} | 33 | {} |
32 | }; | 34 | }; |
@@ -81,13 +83,20 @@ int amd_cache_northbridges(void) | |||
81 | next_northbridge(misc, amd_nb_misc_ids); | 83 | next_northbridge(misc, amd_nb_misc_ids); |
82 | node_to_amd_nb(i)->link = link = | 84 | node_to_amd_nb(i)->link = link = |
83 | next_northbridge(link, amd_nb_link_ids); | 85 | next_northbridge(link, amd_nb_link_ids); |
84 | } | 86 | } |
85 | 87 | ||
88 | /* GART present only on Fam15h upto model 0fh */ | ||
86 | if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || | 89 | if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || |
87 | boot_cpu_data.x86 == 0x15) | 90 | (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10)) |
88 | amd_northbridges.flags |= AMD_NB_GART; | 91 | amd_northbridges.flags |= AMD_NB_GART; |
89 | 92 | ||
90 | /* | 93 | /* |
94 | * Check for L3 cache presence. | ||
95 | */ | ||
96 | if (!cpuid_edx(0x80000006)) | ||
97 | return 0; | ||
98 | |||
99 | /* | ||
91 | * Some CPU families support L3 Cache Index Disable. There are some | 100 | * Some CPU families support L3 Cache Index Disable. There are some |
92 | * limitations because of E382 and E388 on family 0x10. | 101 | * limitations because of E382 and E388 on family 0x10. |
93 | */ | 102 | */ |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index eca89c53a7f5..a7eb82d9b012 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -913,7 +913,7 @@ static void local_apic_timer_interrupt(void) | |||
913 | * [ if a single-CPU system runs an SMP kernel then we call the local | 913 | * [ if a single-CPU system runs an SMP kernel then we call the local |
914 | * interrupt as well. Thus we cannot inline the local irq ... ] | 914 | * interrupt as well. Thus we cannot inline the local irq ... ] |
915 | */ | 915 | */ |
916 | void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) | 916 | __visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) |
917 | { | 917 | { |
918 | struct pt_regs *old_regs = set_irq_regs(regs); | 918 | struct pt_regs *old_regs = set_irq_regs(regs); |
919 | 919 | ||
@@ -932,7 +932,7 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) | |||
932 | set_irq_regs(old_regs); | 932 | set_irq_regs(old_regs); |
933 | } | 933 | } |
934 | 934 | ||
935 | void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs) | 935 | __visible void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs) |
936 | { | 936 | { |
937 | struct pt_regs *old_regs = set_irq_regs(regs); | 937 | struct pt_regs *old_regs = set_irq_regs(regs); |
938 | 938 | ||
@@ -1946,14 +1946,14 @@ static inline void __smp_spurious_interrupt(void) | |||
1946 | "should never happen.\n", smp_processor_id()); | 1946 | "should never happen.\n", smp_processor_id()); |
1947 | } | 1947 | } |
1948 | 1948 | ||
1949 | void smp_spurious_interrupt(struct pt_regs *regs) | 1949 | __visible void smp_spurious_interrupt(struct pt_regs *regs) |
1950 | { | 1950 | { |
1951 | entering_irq(); | 1951 | entering_irq(); |
1952 | __smp_spurious_interrupt(); | 1952 | __smp_spurious_interrupt(); |
1953 | exiting_irq(); | 1953 | exiting_irq(); |
1954 | } | 1954 | } |
1955 | 1955 | ||
1956 | void smp_trace_spurious_interrupt(struct pt_regs *regs) | 1956 | __visible void smp_trace_spurious_interrupt(struct pt_regs *regs) |
1957 | { | 1957 | { |
1958 | entering_irq(); | 1958 | entering_irq(); |
1959 | trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR); | 1959 | trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR); |
@@ -2002,14 +2002,14 @@ static inline void __smp_error_interrupt(struct pt_regs *regs) | |||
2002 | 2002 | ||
2003 | } | 2003 | } |
2004 | 2004 | ||
2005 | void smp_error_interrupt(struct pt_regs *regs) | 2005 | __visible void smp_error_interrupt(struct pt_regs *regs) |
2006 | { | 2006 | { |
2007 | entering_irq(); | 2007 | entering_irq(); |
2008 | __smp_error_interrupt(regs); | 2008 | __smp_error_interrupt(regs); |
2009 | exiting_irq(); | 2009 | exiting_irq(); |
2010 | } | 2010 | } |
2011 | 2011 | ||
2012 | void smp_trace_error_interrupt(struct pt_regs *regs) | 2012 | __visible void smp_trace_error_interrupt(struct pt_regs *regs) |
2013 | { | 2013 | { |
2014 | entering_irq(); | 2014 | entering_irq(); |
2015 | trace_error_apic_entry(ERROR_APIC_VECTOR); | 2015 | trace_error_apic_entry(ERROR_APIC_VECTOR); |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9ed796ccc32c..e63a5bd2a78f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -1534,6 +1534,11 @@ void intel_ir_io_apic_print_entries(unsigned int apic, | |||
1534 | } | 1534 | } |
1535 | } | 1535 | } |
1536 | 1536 | ||
1537 | void ioapic_zap_locks(void) | ||
1538 | { | ||
1539 | raw_spin_lock_init(&ioapic_lock); | ||
1540 | } | ||
1541 | |||
1537 | __apicdebuginit(void) print_IO_APIC(int ioapic_idx) | 1542 | __apicdebuginit(void) print_IO_APIC(int ioapic_idx) |
1538 | { | 1543 | { |
1539 | union IO_APIC_reg_00 reg_00; | 1544 | union IO_APIC_reg_00 reg_00; |
@@ -3375,12 +3380,15 @@ int io_apic_setup_irq_pin_once(unsigned int irq, int node, | |||
3375 | { | 3380 | { |
3376 | unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin; | 3381 | unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin; |
3377 | int ret; | 3382 | int ret; |
3383 | struct IO_APIC_route_entry orig_entry; | ||
3378 | 3384 | ||
3379 | /* Avoid redundant programming */ | 3385 | /* Avoid redundant programming */ |
3380 | if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) { | 3386 | if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) { |
3381 | pr_debug("Pin %d-%d already programmed\n", | 3387 | pr_debug("Pin %d-%d already programmed\n", mpc_ioapic_id(ioapic_idx), pin); |
3382 | mpc_ioapic_id(ioapic_idx), pin); | 3388 | orig_entry = ioapic_read_entry(attr->ioapic, pin); |
3383 | return 0; | 3389 | if (attr->trigger == orig_entry.trigger && attr->polarity == orig_entry.polarity) |
3390 | return 0; | ||
3391 | return -EBUSY; | ||
3384 | } | 3392 | } |
3385 | ret = io_apic_setup_irq_pin(irq, node, attr); | 3393 | ret = io_apic_setup_irq_pin(irq, node, attr); |
3386 | if (!ret) | 3394 | if (!ret) |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 53a4e2744846..3ab03430211d 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -392,7 +392,7 @@ static struct cpuidle_device apm_cpuidle_device; | |||
392 | /* | 392 | /* |
393 | * Local variables | 393 | * Local variables |
394 | */ | 394 | */ |
395 | static struct { | 395 | __visible struct { |
396 | unsigned long offset; | 396 | unsigned long offset; |
397 | unsigned short segment; | 397 | unsigned short segment; |
398 | } apm_bios_entry; | 398 | } apm_bios_entry; |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f654ecefea5b..903a264af981 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -66,8 +66,8 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) | |||
66 | * performance at the same time.. | 66 | * performance at the same time.. |
67 | */ | 67 | */ |
68 | 68 | ||
69 | extern void vide(void); | 69 | extern __visible void vide(void); |
70 | __asm__(".align 4\nvide: ret"); | 70 | __asm__(".globl vide\n\t.align 4\nvide: ret"); |
71 | 71 | ||
72 | static void init_amd_k5(struct cpuinfo_x86 *c) | 72 | static void init_amd_k5(struct cpuinfo_x86 *c) |
73 | { | 73 | { |
@@ -512,7 +512,7 @@ static void early_init_amd(struct cpuinfo_x86 *c) | |||
512 | 512 | ||
513 | static const int amd_erratum_383[]; | 513 | static const int amd_erratum_383[]; |
514 | static const int amd_erratum_400[]; | 514 | static const int amd_erratum_400[]; |
515 | static bool cpu_has_amd_erratum(const int *erratum); | 515 | static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); |
516 | 516 | ||
517 | static void init_amd(struct cpuinfo_x86 *c) | 517 | static void init_amd(struct cpuinfo_x86 *c) |
518 | { | 518 | { |
@@ -729,11 +729,11 @@ static void init_amd(struct cpuinfo_x86 *c) | |||
729 | value &= ~(1ULL << 24); | 729 | value &= ~(1ULL << 24); |
730 | wrmsrl_safe(MSR_AMD64_BU_CFG2, value); | 730 | wrmsrl_safe(MSR_AMD64_BU_CFG2, value); |
731 | 731 | ||
732 | if (cpu_has_amd_erratum(amd_erratum_383)) | 732 | if (cpu_has_amd_erratum(c, amd_erratum_383)) |
733 | set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); | 733 | set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); |
734 | } | 734 | } |
735 | 735 | ||
736 | if (cpu_has_amd_erratum(amd_erratum_400)) | 736 | if (cpu_has_amd_erratum(c, amd_erratum_400)) |
737 | set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); | 737 | set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); |
738 | 738 | ||
739 | rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); | 739 | rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); |
@@ -878,23 +878,13 @@ static const int amd_erratum_400[] = | |||
878 | static const int amd_erratum_383[] = | 878 | static const int amd_erratum_383[] = |
879 | AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); | 879 | AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); |
880 | 880 | ||
881 | static bool cpu_has_amd_erratum(const int *erratum) | 881 | |
882 | static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) | ||
882 | { | 883 | { |
883 | struct cpuinfo_x86 *cpu = __this_cpu_ptr(&cpu_info); | ||
884 | int osvw_id = *erratum++; | 884 | int osvw_id = *erratum++; |
885 | u32 range; | 885 | u32 range; |
886 | u32 ms; | 886 | u32 ms; |
887 | 887 | ||
888 | /* | ||
889 | * If called early enough that current_cpu_data hasn't been initialized | ||
890 | * yet, fall back to boot_cpu_data. | ||
891 | */ | ||
892 | if (cpu->x86 == 0) | ||
893 | cpu = &boot_cpu_data; | ||
894 | |||
895 | if (cpu->x86_vendor != X86_VENDOR_AMD) | ||
896 | return false; | ||
897 | |||
898 | if (osvw_id >= 0 && osvw_id < 65536 && | 888 | if (osvw_id >= 0 && osvw_id < 65536 && |
899 | cpu_has(cpu, X86_FEATURE_OSVW)) { | 889 | cpu_has(cpu, X86_FEATURE_OSVW)) { |
900 | u64 osvw_len; | 890 | u64 osvw_len; |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 25eb2747b063..2793d1f095a2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1076,7 +1076,7 @@ struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, | |||
1076 | (unsigned long) debug_idt_table }; | 1076 | (unsigned long) debug_idt_table }; |
1077 | 1077 | ||
1078 | DEFINE_PER_CPU_FIRST(union irq_stack_union, | 1078 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
1079 | irq_stack_union) __aligned(PAGE_SIZE); | 1079 | irq_stack_union) __aligned(PAGE_SIZE) __visible; |
1080 | 1080 | ||
1081 | /* | 1081 | /* |
1082 | * The following four percpu variables are hot. Align current_task to | 1082 | * The following four percpu variables are hot. Align current_task to |
@@ -1093,7 +1093,7 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack); | |||
1093 | DEFINE_PER_CPU(char *, irq_stack_ptr) = | 1093 | DEFINE_PER_CPU(char *, irq_stack_ptr) = |
1094 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; | 1094 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; |
1095 | 1095 | ||
1096 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; | 1096 | DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; |
1097 | 1097 | ||
1098 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); | 1098 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); |
1099 | 1099 | ||
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 87279212d318..36ce402a3fa5 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -25,11 +25,6 @@ | |||
25 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
26 | #include <asm/hypervisor.h> | 26 | #include <asm/hypervisor.h> |
27 | 27 | ||
28 | /* | ||
29 | * Hypervisor detect order. This is specified explicitly here because | ||
30 | * some hypervisors might implement compatibility modes for other | ||
31 | * hypervisors and therefore need to be detected in specific sequence. | ||
32 | */ | ||
33 | static const __initconst struct hypervisor_x86 * const hypervisors[] = | 28 | static const __initconst struct hypervisor_x86 * const hypervisors[] = |
34 | { | 29 | { |
35 | #ifdef CONFIG_XEN_PVHVM | 30 | #ifdef CONFIG_XEN_PVHVM |
@@ -49,15 +44,19 @@ static inline void __init | |||
49 | detect_hypervisor_vendor(void) | 44 | detect_hypervisor_vendor(void) |
50 | { | 45 | { |
51 | const struct hypervisor_x86 *h, * const *p; | 46 | const struct hypervisor_x86 *h, * const *p; |
47 | uint32_t pri, max_pri = 0; | ||
52 | 48 | ||
53 | for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { | 49 | for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { |
54 | h = *p; | 50 | h = *p; |
55 | if (h->detect()) { | 51 | pri = h->detect(); |
52 | if (pri != 0 && pri > max_pri) { | ||
53 | max_pri = pri; | ||
56 | x86_hyper = h; | 54 | x86_hyper = h; |
57 | printk(KERN_INFO "Hypervisor detected: %s\n", h->name); | ||
58 | break; | ||
59 | } | 55 | } |
60 | } | 56 | } |
57 | |||
58 | if (max_pri) | ||
59 | printk(KERN_INFO "Hypervisor detected: %s\n", x86_hyper->name); | ||
61 | } | 60 | } |
62 | 61 | ||
63 | void init_hypervisor(struct cpuinfo_x86 *c) | 62 | void init_hypervisor(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 5b7d4fa5d3b7..09edd0b65fef 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -25,15 +25,18 @@ int mce_severity(struct mce *a, int tolerant, char **msg); | |||
25 | struct dentry *mce_get_debugfs_dir(void); | 25 | struct dentry *mce_get_debugfs_dir(void); |
26 | 26 | ||
27 | extern struct mce_bank *mce_banks; | 27 | extern struct mce_bank *mce_banks; |
28 | extern mce_banks_t mce_banks_ce_disabled; | ||
28 | 29 | ||
29 | #ifdef CONFIG_X86_MCE_INTEL | 30 | #ifdef CONFIG_X86_MCE_INTEL |
30 | unsigned long mce_intel_adjust_timer(unsigned long interval); | 31 | unsigned long mce_intel_adjust_timer(unsigned long interval); |
31 | void mce_intel_cmci_poll(void); | 32 | void mce_intel_cmci_poll(void); |
32 | void mce_intel_hcpu_update(unsigned long cpu); | 33 | void mce_intel_hcpu_update(unsigned long cpu); |
34 | void cmci_disable_bank(int bank); | ||
33 | #else | 35 | #else |
34 | # define mce_intel_adjust_timer mce_adjust_timer_default | 36 | # define mce_intel_adjust_timer mce_adjust_timer_default |
35 | static inline void mce_intel_cmci_poll(void) { } | 37 | static inline void mce_intel_cmci_poll(void) { } |
36 | static inline void mce_intel_hcpu_update(unsigned long cpu) { } | 38 | static inline void mce_intel_hcpu_update(unsigned long cpu) { } |
39 | static inline void cmci_disable_bank(int bank) { } | ||
37 | #endif | 40 | #endif |
38 | 41 | ||
39 | void mce_timer_kick(unsigned long interval); | 42 | void mce_timer_kick(unsigned long interval); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index e2703520d120..c370e1c4468b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -111,8 +111,8 @@ static struct severity { | |||
111 | #ifdef CONFIG_MEMORY_FAILURE | 111 | #ifdef CONFIG_MEMORY_FAILURE |
112 | MCESEV( | 112 | MCESEV( |
113 | KEEP, "Action required but unaffected thread is continuable", | 113 | KEEP, "Action required but unaffected thread is continuable", |
114 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR), | 114 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR), |
115 | MCGMASK(MCG_STATUS_RIPV, MCG_STATUS_RIPV) | 115 | MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV) |
116 | ), | 116 | ), |
117 | MCESEV( | 117 | MCESEV( |
118 | AR, "Action required: data load error in a user process", | 118 | AR, "Action required: data load error in a user process", |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 87a65c939bcd..b3218cdee95f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -97,6 +97,15 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | |||
97 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | 97 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL |
98 | }; | 98 | }; |
99 | 99 | ||
100 | /* | ||
101 | * MCA banks controlled through firmware first for corrected errors. | ||
102 | * This is a global list of banks for which we won't enable CMCI and we | ||
103 | * won't poll. Firmware controls these banks and is responsible for | ||
104 | * reporting corrected errors through GHES. Uncorrected/recoverable | ||
105 | * errors are still notified through a machine check. | ||
106 | */ | ||
107 | mce_banks_t mce_banks_ce_disabled; | ||
108 | |||
100 | static DEFINE_PER_CPU(struct work_struct, mce_work); | 109 | static DEFINE_PER_CPU(struct work_struct, mce_work); |
101 | 110 | ||
102 | static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); | 111 | static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); |
@@ -1935,6 +1944,25 @@ static struct miscdevice mce_chrdev_device = { | |||
1935 | &mce_chrdev_ops, | 1944 | &mce_chrdev_ops, |
1936 | }; | 1945 | }; |
1937 | 1946 | ||
1947 | static void __mce_disable_bank(void *arg) | ||
1948 | { | ||
1949 | int bank = *((int *)arg); | ||
1950 | __clear_bit(bank, __get_cpu_var(mce_poll_banks)); | ||
1951 | cmci_disable_bank(bank); | ||
1952 | } | ||
1953 | |||
1954 | void mce_disable_bank(int bank) | ||
1955 | { | ||
1956 | if (bank >= mca_cfg.banks) { | ||
1957 | pr_warn(FW_BUG | ||
1958 | "Ignoring request to disable invalid MCA bank %d.\n", | ||
1959 | bank); | ||
1960 | return; | ||
1961 | } | ||
1962 | set_bit(bank, mce_banks_ce_disabled); | ||
1963 | on_each_cpu(__mce_disable_bank, &bank, 1); | ||
1964 | } | ||
1965 | |||
1938 | /* | 1966 | /* |
1939 | * mce=off Disables machine check | 1967 | * mce=off Disables machine check |
1940 | * mce=no_cmci Disables CMCI | 1968 | * mce=no_cmci Disables CMCI |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index d56405309dc1..4cfe0458ca66 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -203,6 +203,10 @@ static void cmci_discover(int banks) | |||
203 | if (test_bit(i, owned)) | 203 | if (test_bit(i, owned)) |
204 | continue; | 204 | continue; |
205 | 205 | ||
206 | /* Skip banks in firmware first mode */ | ||
207 | if (test_bit(i, mce_banks_ce_disabled)) | ||
208 | continue; | ||
209 | |||
206 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 210 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
207 | 211 | ||
208 | /* Already owned by someone else? */ | 212 | /* Already owned by someone else? */ |
@@ -271,6 +275,19 @@ void cmci_recheck(void) | |||
271 | local_irq_restore(flags); | 275 | local_irq_restore(flags); |
272 | } | 276 | } |
273 | 277 | ||
278 | /* Caller must hold the lock on cmci_discover_lock */ | ||
279 | static void __cmci_disable_bank(int bank) | ||
280 | { | ||
281 | u64 val; | ||
282 | |||
283 | if (!test_bit(bank, __get_cpu_var(mce_banks_owned))) | ||
284 | return; | ||
285 | rdmsrl(MSR_IA32_MCx_CTL2(bank), val); | ||
286 | val &= ~MCI_CTL2_CMCI_EN; | ||
287 | wrmsrl(MSR_IA32_MCx_CTL2(bank), val); | ||
288 | __clear_bit(bank, __get_cpu_var(mce_banks_owned)); | ||
289 | } | ||
290 | |||
274 | /* | 291 | /* |
275 | * Disable CMCI on this CPU for all banks it owns when it goes down. | 292 | * Disable CMCI on this CPU for all banks it owns when it goes down. |
276 | * This allows other CPUs to claim the banks on rediscovery. | 293 | * This allows other CPUs to claim the banks on rediscovery. |
@@ -280,20 +297,12 @@ void cmci_clear(void) | |||
280 | unsigned long flags; | 297 | unsigned long flags; |
281 | int i; | 298 | int i; |
282 | int banks; | 299 | int banks; |
283 | u64 val; | ||
284 | 300 | ||
285 | if (!cmci_supported(&banks)) | 301 | if (!cmci_supported(&banks)) |
286 | return; | 302 | return; |
287 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); | 303 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); |
288 | for (i = 0; i < banks; i++) { | 304 | for (i = 0; i < banks; i++) |
289 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) | 305 | __cmci_disable_bank(i); |
290 | continue; | ||
291 | /* Disable CMCI */ | ||
292 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | ||
293 | val &= ~MCI_CTL2_CMCI_EN; | ||
294 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | ||
295 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | ||
296 | } | ||
297 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | 306 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
298 | } | 307 | } |
299 | 308 | ||
@@ -327,6 +336,19 @@ void cmci_reenable(void) | |||
327 | cmci_discover(banks); | 336 | cmci_discover(banks); |
328 | } | 337 | } |
329 | 338 | ||
339 | void cmci_disable_bank(int bank) | ||
340 | { | ||
341 | int banks; | ||
342 | unsigned long flags; | ||
343 | |||
344 | if (!cmci_supported(&banks)) | ||
345 | return; | ||
346 | |||
347 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); | ||
348 | __cmci_disable_bank(bank); | ||
349 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
350 | } | ||
351 | |||
330 | static void intel_init_cmci(void) | 352 | static void intel_init_cmci(void) |
331 | { | 353 | { |
332 | int banks; | 354 | int banks; |
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 8f4be53ea04b..71a39f3621ba 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c | |||
@@ -27,20 +27,23 @@ | |||
27 | struct ms_hyperv_info ms_hyperv; | 27 | struct ms_hyperv_info ms_hyperv; |
28 | EXPORT_SYMBOL_GPL(ms_hyperv); | 28 | EXPORT_SYMBOL_GPL(ms_hyperv); |
29 | 29 | ||
30 | static bool __init ms_hyperv_platform(void) | 30 | static uint32_t __init ms_hyperv_platform(void) |
31 | { | 31 | { |
32 | u32 eax; | 32 | u32 eax; |
33 | u32 hyp_signature[3]; | 33 | u32 hyp_signature[3]; |
34 | 34 | ||
35 | if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) | 35 | if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) |
36 | return false; | 36 | return 0; |
37 | 37 | ||
38 | cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, | 38 | cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, |
39 | &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); | 39 | &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); |
40 | 40 | ||
41 | return eax >= HYPERV_CPUID_MIN && | 41 | if (eax >= HYPERV_CPUID_MIN && |
42 | eax <= HYPERV_CPUID_MAX && | 42 | eax <= HYPERV_CPUID_MAX && |
43 | !memcmp("Microsoft Hv", hyp_signature, 12); | 43 | !memcmp("Microsoft Hv", hyp_signature, 12)) |
44 | return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS; | ||
45 | |||
46 | return 0; | ||
44 | } | 47 | } |
45 | 48 | ||
46 | static cycle_t read_hv_clock(struct clocksource *arg) | 49 | static cycle_t read_hv_clock(struct clocksource *arg) |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index a7c7305030cc..8355c84b9729 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -1884,6 +1884,7 @@ static struct pmu pmu = { | |||
1884 | void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) | 1884 | void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) |
1885 | { | 1885 | { |
1886 | userpg->cap_usr_time = 0; | 1886 | userpg->cap_usr_time = 0; |
1887 | userpg->cap_usr_time_zero = 0; | ||
1887 | userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc; | 1888 | userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc; |
1888 | userpg->pmc_width = x86_pmu.cntval_bits; | 1889 | userpg->pmc_width = x86_pmu.cntval_bits; |
1889 | 1890 | ||
@@ -1897,6 +1898,11 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) | |||
1897 | userpg->time_mult = this_cpu_read(cyc2ns); | 1898 | userpg->time_mult = this_cpu_read(cyc2ns); |
1898 | userpg->time_shift = CYC2NS_SCALE_FACTOR; | 1899 | userpg->time_shift = CYC2NS_SCALE_FACTOR; |
1899 | userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; | 1900 | userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; |
1901 | |||
1902 | if (sched_clock_stable && !check_tsc_disabled()) { | ||
1903 | userpg->cap_usr_time_zero = 1; | ||
1904 | userpg->time_zero = this_cpu_read(cyc2ns_offset); | ||
1905 | } | ||
1900 | } | 1906 | } |
1901 | 1907 | ||
1902 | /* | 1908 | /* |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 97e557bc4c91..cc16faae0538 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -641,6 +641,8 @@ extern struct event_constraint intel_core2_pebs_event_constraints[]; | |||
641 | 641 | ||
642 | extern struct event_constraint intel_atom_pebs_event_constraints[]; | 642 | extern struct event_constraint intel_atom_pebs_event_constraints[]; |
643 | 643 | ||
644 | extern struct event_constraint intel_slm_pebs_event_constraints[]; | ||
645 | |||
644 | extern struct event_constraint intel_nehalem_pebs_event_constraints[]; | 646 | extern struct event_constraint intel_nehalem_pebs_event_constraints[]; |
645 | 647 | ||
646 | extern struct event_constraint intel_westmere_pebs_event_constraints[]; | 648 | extern struct event_constraint intel_westmere_pebs_event_constraints[]; |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 4cbe03287b08..beeb7cc07044 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -347,8 +347,7 @@ static struct amd_nb *amd_alloc_nb(int cpu) | |||
347 | struct amd_nb *nb; | 347 | struct amd_nb *nb; |
348 | int i; | 348 | int i; |
349 | 349 | ||
350 | nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO, | 350 | nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu)); |
351 | cpu_to_node(cpu)); | ||
352 | if (!nb) | 351 | if (!nb) |
353 | return NULL; | 352 | return NULL; |
354 | 353 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index fbc9210b45bc..0abf6742a8b0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -81,7 +81,8 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = | |||
81 | 81 | ||
82 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = | 82 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = |
83 | { | 83 | { |
84 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), | 84 | /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ |
85 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), | ||
85 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), | 86 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), |
86 | EVENT_EXTRA_END | 87 | EVENT_EXTRA_END |
87 | }; | 88 | }; |
@@ -143,8 +144,9 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly = | |||
143 | 144 | ||
144 | static struct extra_reg intel_westmere_extra_regs[] __read_mostly = | 145 | static struct extra_reg intel_westmere_extra_regs[] __read_mostly = |
145 | { | 146 | { |
146 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), | 147 | /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ |
147 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), | 148 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
149 | INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), | ||
148 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), | 150 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), |
149 | EVENT_EXTRA_END | 151 | EVENT_EXTRA_END |
150 | }; | 152 | }; |
@@ -162,16 +164,27 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = | |||
162 | EVENT_CONSTRAINT_END | 164 | EVENT_CONSTRAINT_END |
163 | }; | 165 | }; |
164 | 166 | ||
167 | static struct event_constraint intel_slm_event_constraints[] __read_mostly = | ||
168 | { | ||
169 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
170 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
171 | FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */ | ||
172 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */ | ||
173 | EVENT_CONSTRAINT_END | ||
174 | }; | ||
175 | |||
165 | static struct extra_reg intel_snb_extra_regs[] __read_mostly = { | 176 | static struct extra_reg intel_snb_extra_regs[] __read_mostly = { |
166 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), | 177 | /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ |
167 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), | 178 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), |
179 | INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), | ||
168 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), | 180 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), |
169 | EVENT_EXTRA_END | 181 | EVENT_EXTRA_END |
170 | }; | 182 | }; |
171 | 183 | ||
172 | static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { | 184 | static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { |
173 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), | 185 | /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ |
174 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), | 186 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), |
187 | INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), | ||
175 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), | 188 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), |
176 | EVENT_EXTRA_END | 189 | EVENT_EXTRA_END |
177 | }; | 190 | }; |
@@ -882,6 +895,140 @@ static __initconst const u64 atom_hw_cache_event_ids | |||
882 | }, | 895 | }, |
883 | }; | 896 | }; |
884 | 897 | ||
898 | static struct extra_reg intel_slm_extra_regs[] __read_mostly = | ||
899 | { | ||
900 | /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ | ||
901 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffff, RSP_0), | ||
902 | INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffff, RSP_1), | ||
903 | EVENT_EXTRA_END | ||
904 | }; | ||
905 | |||
906 | #define SLM_DMND_READ SNB_DMND_DATA_RD | ||
907 | #define SLM_DMND_WRITE SNB_DMND_RFO | ||
908 | #define SLM_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) | ||
909 | |||
910 | #define SLM_SNP_ANY (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM) | ||
911 | #define SLM_LLC_ACCESS SNB_RESP_ANY | ||
912 | #define SLM_LLC_MISS (SLM_SNP_ANY|SNB_NON_DRAM) | ||
913 | |||
914 | static __initconst const u64 slm_hw_cache_extra_regs | ||
915 | [PERF_COUNT_HW_CACHE_MAX] | ||
916 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
917 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
918 | { | ||
919 | [ C(LL ) ] = { | ||
920 | [ C(OP_READ) ] = { | ||
921 | [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS, | ||
922 | [ C(RESULT_MISS) ] = SLM_DMND_READ|SLM_LLC_MISS, | ||
923 | }, | ||
924 | [ C(OP_WRITE) ] = { | ||
925 | [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS, | ||
926 | [ C(RESULT_MISS) ] = SLM_DMND_WRITE|SLM_LLC_MISS, | ||
927 | }, | ||
928 | [ C(OP_PREFETCH) ] = { | ||
929 | [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS, | ||
930 | [ C(RESULT_MISS) ] = SLM_DMND_PREFETCH|SLM_LLC_MISS, | ||
931 | }, | ||
932 | }, | ||
933 | }; | ||
934 | |||
935 | static __initconst const u64 slm_hw_cache_event_ids | ||
936 | [PERF_COUNT_HW_CACHE_MAX] | ||
937 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
938 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
939 | { | ||
940 | [ C(L1D) ] = { | ||
941 | [ C(OP_READ) ] = { | ||
942 | [ C(RESULT_ACCESS) ] = 0, | ||
943 | [ C(RESULT_MISS) ] = 0x0104, /* LD_DCU_MISS */ | ||
944 | }, | ||
945 | [ C(OP_WRITE) ] = { | ||
946 | [ C(RESULT_ACCESS) ] = 0, | ||
947 | [ C(RESULT_MISS) ] = 0, | ||
948 | }, | ||
949 | [ C(OP_PREFETCH) ] = { | ||
950 | [ C(RESULT_ACCESS) ] = 0, | ||
951 | [ C(RESULT_MISS) ] = 0, | ||
952 | }, | ||
953 | }, | ||
954 | [ C(L1I ) ] = { | ||
955 | [ C(OP_READ) ] = { | ||
956 | [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */ | ||
957 | [ C(RESULT_MISS) ] = 0x0280, /* ICACGE.MISSES */ | ||
958 | }, | ||
959 | [ C(OP_WRITE) ] = { | ||
960 | [ C(RESULT_ACCESS) ] = -1, | ||
961 | [ C(RESULT_MISS) ] = -1, | ||
962 | }, | ||
963 | [ C(OP_PREFETCH) ] = { | ||
964 | [ C(RESULT_ACCESS) ] = 0, | ||
965 | [ C(RESULT_MISS) ] = 0, | ||
966 | }, | ||
967 | }, | ||
968 | [ C(LL ) ] = { | ||
969 | [ C(OP_READ) ] = { | ||
970 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ | ||
971 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
972 | /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ | ||
973 | [ C(RESULT_MISS) ] = 0x01b7, | ||
974 | }, | ||
975 | [ C(OP_WRITE) ] = { | ||
976 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ | ||
977 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
978 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ | ||
979 | [ C(RESULT_MISS) ] = 0x01b7, | ||
980 | }, | ||
981 | [ C(OP_PREFETCH) ] = { | ||
982 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ | ||
983 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
984 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ | ||
985 | [ C(RESULT_MISS) ] = 0x01b7, | ||
986 | }, | ||
987 | }, | ||
988 | [ C(DTLB) ] = { | ||
989 | [ C(OP_READ) ] = { | ||
990 | [ C(RESULT_ACCESS) ] = 0, | ||
991 | [ C(RESULT_MISS) ] = 0x0804, /* LD_DTLB_MISS */ | ||
992 | }, | ||
993 | [ C(OP_WRITE) ] = { | ||
994 | [ C(RESULT_ACCESS) ] = 0, | ||
995 | [ C(RESULT_MISS) ] = 0, | ||
996 | }, | ||
997 | [ C(OP_PREFETCH) ] = { | ||
998 | [ C(RESULT_ACCESS) ] = 0, | ||
999 | [ C(RESULT_MISS) ] = 0, | ||
1000 | }, | ||
1001 | }, | ||
1002 | [ C(ITLB) ] = { | ||
1003 | [ C(OP_READ) ] = { | ||
1004 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
1005 | [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ | ||
1006 | }, | ||
1007 | [ C(OP_WRITE) ] = { | ||
1008 | [ C(RESULT_ACCESS) ] = -1, | ||
1009 | [ C(RESULT_MISS) ] = -1, | ||
1010 | }, | ||
1011 | [ C(OP_PREFETCH) ] = { | ||
1012 | [ C(RESULT_ACCESS) ] = -1, | ||
1013 | [ C(RESULT_MISS) ] = -1, | ||
1014 | }, | ||
1015 | }, | ||
1016 | [ C(BPU ) ] = { | ||
1017 | [ C(OP_READ) ] = { | ||
1018 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
1019 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
1020 | }, | ||
1021 | [ C(OP_WRITE) ] = { | ||
1022 | [ C(RESULT_ACCESS) ] = -1, | ||
1023 | [ C(RESULT_MISS) ] = -1, | ||
1024 | }, | ||
1025 | [ C(OP_PREFETCH) ] = { | ||
1026 | [ C(RESULT_ACCESS) ] = -1, | ||
1027 | [ C(RESULT_MISS) ] = -1, | ||
1028 | }, | ||
1029 | }, | ||
1030 | }; | ||
1031 | |||
885 | static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) | 1032 | static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) |
886 | { | 1033 | { |
887 | /* user explicitly requested branch sampling */ | 1034 | /* user explicitly requested branch sampling */ |
@@ -1301,11 +1448,11 @@ static void intel_fixup_er(struct perf_event *event, int idx) | |||
1301 | 1448 | ||
1302 | if (idx == EXTRA_REG_RSP_0) { | 1449 | if (idx == EXTRA_REG_RSP_0) { |
1303 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | 1450 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; |
1304 | event->hw.config |= 0x01b7; | 1451 | event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event; |
1305 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; | 1452 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; |
1306 | } else if (idx == EXTRA_REG_RSP_1) { | 1453 | } else if (idx == EXTRA_REG_RSP_1) { |
1307 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | 1454 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; |
1308 | event->hw.config |= 0x01bb; | 1455 | event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event; |
1309 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; | 1456 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; |
1310 | } | 1457 | } |
1311 | } | 1458 | } |
@@ -2176,6 +2323,21 @@ __init int intel_pmu_init(void) | |||
2176 | pr_cont("Atom events, "); | 2323 | pr_cont("Atom events, "); |
2177 | break; | 2324 | break; |
2178 | 2325 | ||
2326 | case 55: /* Atom 22nm "Silvermont" */ | ||
2327 | memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, | ||
2328 | sizeof(hw_cache_event_ids)); | ||
2329 | memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, | ||
2330 | sizeof(hw_cache_extra_regs)); | ||
2331 | |||
2332 | intel_pmu_lbr_init_atom(); | ||
2333 | |||
2334 | x86_pmu.event_constraints = intel_slm_event_constraints; | ||
2335 | x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; | ||
2336 | x86_pmu.extra_regs = intel_slm_extra_regs; | ||
2337 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | ||
2338 | pr_cont("Silvermont events, "); | ||
2339 | break; | ||
2340 | |||
2179 | case 37: /* 32 nm nehalem, "Clarkdale" */ | 2341 | case 37: /* 32 nm nehalem, "Clarkdale" */ |
2180 | case 44: /* 32 nm nehalem, "Gulftown" */ | 2342 | case 44: /* 32 nm nehalem, "Gulftown" */ |
2181 | case 47: /* 32 nm Xeon E7 */ | 2343 | case 47: /* 32 nm Xeon E7 */ |
@@ -2270,6 +2432,7 @@ __init int intel_pmu_init(void) | |||
2270 | case 70: | 2432 | case 70: |
2271 | case 71: | 2433 | case 71: |
2272 | case 63: | 2434 | case 63: |
2435 | case 69: | ||
2273 | x86_pmu.late_ack = true; | 2436 | x86_pmu.late_ack = true; |
2274 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); | 2437 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); |
2275 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); | 2438 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 3065c57a63c1..63438aad177f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -224,7 +224,7 @@ static int alloc_pebs_buffer(int cpu) | |||
224 | if (!x86_pmu.pebs) | 224 | if (!x86_pmu.pebs) |
225 | return 0; | 225 | return 0; |
226 | 226 | ||
227 | buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node); | 227 | buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node); |
228 | if (unlikely(!buffer)) | 228 | if (unlikely(!buffer)) |
229 | return -ENOMEM; | 229 | return -ENOMEM; |
230 | 230 | ||
@@ -262,7 +262,7 @@ static int alloc_bts_buffer(int cpu) | |||
262 | if (!x86_pmu.bts) | 262 | if (!x86_pmu.bts) |
263 | return 0; | 263 | return 0; |
264 | 264 | ||
265 | buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node); | 265 | buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL, node); |
266 | if (unlikely(!buffer)) | 266 | if (unlikely(!buffer)) |
267 | return -ENOMEM; | 267 | return -ENOMEM; |
268 | 268 | ||
@@ -295,7 +295,7 @@ static int alloc_ds_buffer(int cpu) | |||
295 | int node = cpu_to_node(cpu); | 295 | int node = cpu_to_node(cpu); |
296 | struct debug_store *ds; | 296 | struct debug_store *ds; |
297 | 297 | ||
298 | ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node); | 298 | ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node); |
299 | if (unlikely(!ds)) | 299 | if (unlikely(!ds)) |
300 | return -ENOMEM; | 300 | return -ENOMEM; |
301 | 301 | ||
@@ -517,6 +517,32 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { | |||
517 | EVENT_CONSTRAINT_END | 517 | EVENT_CONSTRAINT_END |
518 | }; | 518 | }; |
519 | 519 | ||
520 | struct event_constraint intel_slm_pebs_event_constraints[] = { | ||
521 | INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */ | ||
522 | INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */ | ||
523 | INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */ | ||
524 | INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */ | ||
525 | INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */ | ||
526 | INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */ | ||
527 | INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */ | ||
528 | INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */ | ||
529 | INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */ | ||
530 | INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */ | ||
531 | INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */ | ||
532 | INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */ | ||
533 | INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */ | ||
534 | INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */ | ||
535 | INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */ | ||
536 | INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */ | ||
537 | INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */ | ||
538 | INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */ | ||
539 | INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */ | ||
540 | INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */ | ||
541 | INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */ | ||
542 | INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */ | ||
543 | EVENT_CONSTRAINT_END | ||
544 | }; | ||
545 | |||
520 | struct event_constraint intel_nehalem_pebs_event_constraints[] = { | 546 | struct event_constraint intel_nehalem_pebs_event_constraints[] = { |
521 | INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ | 547 | INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ |
522 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ | 548 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index cad791dbde95..fd8011ed4dcd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
@@ -6,6 +6,8 @@ static struct intel_uncore_type **pci_uncores = empty_uncore; | |||
6 | /* pci bus to socket mapping */ | 6 | /* pci bus to socket mapping */ |
7 | static int pcibus_to_physid[256] = { [0 ... 255] = -1, }; | 7 | static int pcibus_to_physid[256] = { [0 ... 255] = -1, }; |
8 | 8 | ||
9 | static struct pci_dev *extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; | ||
10 | |||
9 | static DEFINE_RAW_SPINLOCK(uncore_box_lock); | 11 | static DEFINE_RAW_SPINLOCK(uncore_box_lock); |
10 | 12 | ||
11 | /* mask of cpus that collect uncore events */ | 13 | /* mask of cpus that collect uncore events */ |
@@ -45,6 +47,24 @@ DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7"); | |||
45 | DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15"); | 47 | DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15"); |
46 | DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23"); | 48 | DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23"); |
47 | DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31"); | 49 | DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31"); |
50 | DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51"); | ||
51 | DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35"); | ||
52 | DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31"); | ||
53 | DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17"); | ||
54 | DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12"); | ||
55 | DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8"); | ||
56 | DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4"); | ||
57 | DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31"); | ||
58 | DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63"); | ||
59 | DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51"); | ||
60 | DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35"); | ||
61 | DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31"); | ||
62 | DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17"); | ||
63 | DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12"); | ||
64 | DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8"); | ||
65 | DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4"); | ||
66 | DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31"); | ||
67 | DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63"); | ||
48 | 68 | ||
49 | static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) | 69 | static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) |
50 | { | 70 | { |
@@ -281,7 +301,7 @@ static struct attribute *snbep_uncore_cbox_formats_attr[] = { | |||
281 | }; | 301 | }; |
282 | 302 | ||
283 | static struct attribute *snbep_uncore_pcu_formats_attr[] = { | 303 | static struct attribute *snbep_uncore_pcu_formats_attr[] = { |
284 | &format_attr_event.attr, | 304 | &format_attr_event_ext.attr, |
285 | &format_attr_occ_sel.attr, | 305 | &format_attr_occ_sel.attr, |
286 | &format_attr_edge.attr, | 306 | &format_attr_edge.attr, |
287 | &format_attr_inv.attr, | 307 | &format_attr_inv.attr, |
@@ -301,6 +321,24 @@ static struct attribute *snbep_uncore_qpi_formats_attr[] = { | |||
301 | &format_attr_edge.attr, | 321 | &format_attr_edge.attr, |
302 | &format_attr_inv.attr, | 322 | &format_attr_inv.attr, |
303 | &format_attr_thresh8.attr, | 323 | &format_attr_thresh8.attr, |
324 | &format_attr_match_rds.attr, | ||
325 | &format_attr_match_rnid30.attr, | ||
326 | &format_attr_match_rnid4.attr, | ||
327 | &format_attr_match_dnid.attr, | ||
328 | &format_attr_match_mc.attr, | ||
329 | &format_attr_match_opc.attr, | ||
330 | &format_attr_match_vnw.attr, | ||
331 | &format_attr_match0.attr, | ||
332 | &format_attr_match1.attr, | ||
333 | &format_attr_mask_rds.attr, | ||
334 | &format_attr_mask_rnid30.attr, | ||
335 | &format_attr_mask_rnid4.attr, | ||
336 | &format_attr_mask_dnid.attr, | ||
337 | &format_attr_mask_mc.attr, | ||
338 | &format_attr_mask_opc.attr, | ||
339 | &format_attr_mask_vnw.attr, | ||
340 | &format_attr_mask0.attr, | ||
341 | &format_attr_mask1.attr, | ||
304 | NULL, | 342 | NULL, |
305 | }; | 343 | }; |
306 | 344 | ||
@@ -314,8 +352,8 @@ static struct uncore_event_desc snbep_uncore_imc_events[] = { | |||
314 | static struct uncore_event_desc snbep_uncore_qpi_events[] = { | 352 | static struct uncore_event_desc snbep_uncore_qpi_events[] = { |
315 | INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"), | 353 | INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"), |
316 | INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"), | 354 | INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"), |
317 | INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x02,umask=0x08"), | 355 | INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x102,umask=0x08"), |
318 | INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x03,umask=0x04"), | 356 | INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x103,umask=0x04"), |
319 | { /* end: all zeroes */ }, | 357 | { /* end: all zeroes */ }, |
320 | }; | 358 | }; |
321 | 359 | ||
@@ -356,13 +394,16 @@ static struct intel_uncore_ops snbep_uncore_msr_ops = { | |||
356 | SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), | 394 | SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), |
357 | }; | 395 | }; |
358 | 396 | ||
397 | #define SNBEP_UNCORE_PCI_OPS_COMMON_INIT() \ | ||
398 | .init_box = snbep_uncore_pci_init_box, \ | ||
399 | .disable_box = snbep_uncore_pci_disable_box, \ | ||
400 | .enable_box = snbep_uncore_pci_enable_box, \ | ||
401 | .disable_event = snbep_uncore_pci_disable_event, \ | ||
402 | .read_counter = snbep_uncore_pci_read_counter | ||
403 | |||
359 | static struct intel_uncore_ops snbep_uncore_pci_ops = { | 404 | static struct intel_uncore_ops snbep_uncore_pci_ops = { |
360 | .init_box = snbep_uncore_pci_init_box, | 405 | SNBEP_UNCORE_PCI_OPS_COMMON_INIT(), |
361 | .disable_box = snbep_uncore_pci_disable_box, | 406 | .enable_event = snbep_uncore_pci_enable_event, \ |
362 | .enable_box = snbep_uncore_pci_enable_box, | ||
363 | .disable_event = snbep_uncore_pci_disable_event, | ||
364 | .enable_event = snbep_uncore_pci_enable_event, | ||
365 | .read_counter = snbep_uncore_pci_read_counter, | ||
366 | }; | 407 | }; |
367 | 408 | ||
368 | static struct event_constraint snbep_uncore_cbox_constraints[] = { | 409 | static struct event_constraint snbep_uncore_cbox_constraints[] = { |
@@ -726,6 +767,61 @@ static struct intel_uncore_type *snbep_msr_uncores[] = { | |||
726 | NULL, | 767 | NULL, |
727 | }; | 768 | }; |
728 | 769 | ||
770 | enum { | ||
771 | SNBEP_PCI_QPI_PORT0_FILTER, | ||
772 | SNBEP_PCI_QPI_PORT1_FILTER, | ||
773 | }; | ||
774 | |||
775 | static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event) | ||
776 | { | ||
777 | struct hw_perf_event *hwc = &event->hw; | ||
778 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; | ||
779 | struct hw_perf_event_extra *reg2 = &hwc->branch_reg; | ||
780 | |||
781 | if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) { | ||
782 | reg1->idx = 0; | ||
783 | reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0; | ||
784 | reg1->config = event->attr.config1; | ||
785 | reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0; | ||
786 | reg2->config = event->attr.config2; | ||
787 | } | ||
788 | return 0; | ||
789 | } | ||
790 | |||
791 | static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
792 | { | ||
793 | struct pci_dev *pdev = box->pci_dev; | ||
794 | struct hw_perf_event *hwc = &event->hw; | ||
795 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; | ||
796 | struct hw_perf_event_extra *reg2 = &hwc->branch_reg; | ||
797 | |||
798 | if (reg1->idx != EXTRA_REG_NONE) { | ||
799 | int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER; | ||
800 | struct pci_dev *filter_pdev = extra_pci_dev[box->phys_id][idx]; | ||
801 | WARN_ON_ONCE(!filter_pdev); | ||
802 | if (filter_pdev) { | ||
803 | pci_write_config_dword(filter_pdev, reg1->reg, | ||
804 | (u32)reg1->config); | ||
805 | pci_write_config_dword(filter_pdev, reg1->reg + 4, | ||
806 | (u32)(reg1->config >> 32)); | ||
807 | pci_write_config_dword(filter_pdev, reg2->reg, | ||
808 | (u32)reg2->config); | ||
809 | pci_write_config_dword(filter_pdev, reg2->reg + 4, | ||
810 | (u32)(reg2->config >> 32)); | ||
811 | } | ||
812 | } | ||
813 | |||
814 | pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); | ||
815 | } | ||
816 | |||
817 | static struct intel_uncore_ops snbep_uncore_qpi_ops = { | ||
818 | SNBEP_UNCORE_PCI_OPS_COMMON_INIT(), | ||
819 | .enable_event = snbep_qpi_enable_event, | ||
820 | .hw_config = snbep_qpi_hw_config, | ||
821 | .get_constraint = uncore_get_constraint, | ||
822 | .put_constraint = uncore_put_constraint, | ||
823 | }; | ||
824 | |||
729 | #define SNBEP_UNCORE_PCI_COMMON_INIT() \ | 825 | #define SNBEP_UNCORE_PCI_COMMON_INIT() \ |
730 | .perf_ctr = SNBEP_PCI_PMON_CTR0, \ | 826 | .perf_ctr = SNBEP_PCI_PMON_CTR0, \ |
731 | .event_ctl = SNBEP_PCI_PMON_CTL0, \ | 827 | .event_ctl = SNBEP_PCI_PMON_CTL0, \ |
@@ -755,17 +851,18 @@ static struct intel_uncore_type snbep_uncore_imc = { | |||
755 | }; | 851 | }; |
756 | 852 | ||
757 | static struct intel_uncore_type snbep_uncore_qpi = { | 853 | static struct intel_uncore_type snbep_uncore_qpi = { |
758 | .name = "qpi", | 854 | .name = "qpi", |
759 | .num_counters = 4, | 855 | .num_counters = 4, |
760 | .num_boxes = 2, | 856 | .num_boxes = 2, |
761 | .perf_ctr_bits = 48, | 857 | .perf_ctr_bits = 48, |
762 | .perf_ctr = SNBEP_PCI_PMON_CTR0, | 858 | .perf_ctr = SNBEP_PCI_PMON_CTR0, |
763 | .event_ctl = SNBEP_PCI_PMON_CTL0, | 859 | .event_ctl = SNBEP_PCI_PMON_CTL0, |
764 | .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, | 860 | .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, |
765 | .box_ctl = SNBEP_PCI_PMON_BOX_CTL, | 861 | .box_ctl = SNBEP_PCI_PMON_BOX_CTL, |
766 | .ops = &snbep_uncore_pci_ops, | 862 | .num_shared_regs = 1, |
767 | .event_descs = snbep_uncore_qpi_events, | 863 | .ops = &snbep_uncore_qpi_ops, |
768 | .format_group = &snbep_uncore_qpi_format_group, | 864 | .event_descs = snbep_uncore_qpi_events, |
865 | .format_group = &snbep_uncore_qpi_format_group, | ||
769 | }; | 866 | }; |
770 | 867 | ||
771 | 868 | ||
@@ -807,43 +904,53 @@ static struct intel_uncore_type *snbep_pci_uncores[] = { | |||
807 | static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = { | 904 | static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = { |
808 | { /* Home Agent */ | 905 | { /* Home Agent */ |
809 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA), | 906 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA), |
810 | .driver_data = SNBEP_PCI_UNCORE_HA, | 907 | .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0), |
811 | }, | 908 | }, |
812 | { /* MC Channel 0 */ | 909 | { /* MC Channel 0 */ |
813 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0), | 910 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0), |
814 | .driver_data = SNBEP_PCI_UNCORE_IMC, | 911 | .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0), |
815 | }, | 912 | }, |
816 | { /* MC Channel 1 */ | 913 | { /* MC Channel 1 */ |
817 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1), | 914 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1), |
818 | .driver_data = SNBEP_PCI_UNCORE_IMC, | 915 | .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1), |
819 | }, | 916 | }, |
820 | { /* MC Channel 2 */ | 917 | { /* MC Channel 2 */ |
821 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2), | 918 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2), |
822 | .driver_data = SNBEP_PCI_UNCORE_IMC, | 919 | .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2), |
823 | }, | 920 | }, |
824 | { /* MC Channel 3 */ | 921 | { /* MC Channel 3 */ |
825 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3), | 922 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3), |
826 | .driver_data = SNBEP_PCI_UNCORE_IMC, | 923 | .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3), |
827 | }, | 924 | }, |
828 | { /* QPI Port 0 */ | 925 | { /* QPI Port 0 */ |
829 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0), | 926 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0), |
830 | .driver_data = SNBEP_PCI_UNCORE_QPI, | 927 | .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0), |
831 | }, | 928 | }, |
832 | { /* QPI Port 1 */ | 929 | { /* QPI Port 1 */ |
833 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1), | 930 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1), |
834 | .driver_data = SNBEP_PCI_UNCORE_QPI, | 931 | .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1), |
835 | }, | 932 | }, |
836 | { /* R2PCIe */ | 933 | { /* R2PCIe */ |
837 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE), | 934 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE), |
838 | .driver_data = SNBEP_PCI_UNCORE_R2PCIE, | 935 | .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0), |
839 | }, | 936 | }, |
840 | { /* R3QPI Link 0 */ | 937 | { /* R3QPI Link 0 */ |
841 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0), | 938 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0), |
842 | .driver_data = SNBEP_PCI_UNCORE_R3QPI, | 939 | .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0), |
843 | }, | 940 | }, |
844 | { /* R3QPI Link 1 */ | 941 | { /* R3QPI Link 1 */ |
845 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1), | 942 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1), |
846 | .driver_data = SNBEP_PCI_UNCORE_R3QPI, | 943 | .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1), |
944 | }, | ||
945 | { /* QPI Port 0 filter */ | ||
946 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86), | ||
947 | .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, | ||
948 | SNBEP_PCI_QPI_PORT0_FILTER), | ||
949 | }, | ||
950 | { /* QPI Port 0 filter */ | ||
951 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96), | ||
952 | .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, | ||
953 | SNBEP_PCI_QPI_PORT1_FILTER), | ||
847 | }, | 954 | }, |
848 | { /* end: all zeroes */ } | 955 | { /* end: all zeroes */ } |
849 | }; | 956 | }; |
@@ -1256,71 +1363,71 @@ static struct intel_uncore_type *ivt_pci_uncores[] = { | |||
1256 | static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = { | 1363 | static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = { |
1257 | { /* Home Agent 0 */ | 1364 | { /* Home Agent 0 */ |
1258 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30), | 1365 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30), |
1259 | .driver_data = IVT_PCI_UNCORE_HA, | 1366 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 0), |
1260 | }, | 1367 | }, |
1261 | { /* Home Agent 1 */ | 1368 | { /* Home Agent 1 */ |
1262 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38), | 1369 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38), |
1263 | .driver_data = IVT_PCI_UNCORE_HA, | 1370 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 1), |
1264 | }, | 1371 | }, |
1265 | { /* MC0 Channel 0 */ | 1372 | { /* MC0 Channel 0 */ |
1266 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4), | 1373 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4), |
1267 | .driver_data = IVT_PCI_UNCORE_IMC, | 1374 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 0), |
1268 | }, | 1375 | }, |
1269 | { /* MC0 Channel 1 */ | 1376 | { /* MC0 Channel 1 */ |
1270 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5), | 1377 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5), |
1271 | .driver_data = IVT_PCI_UNCORE_IMC, | 1378 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 1), |
1272 | }, | 1379 | }, |
1273 | { /* MC0 Channel 3 */ | 1380 | { /* MC0 Channel 3 */ |
1274 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0), | 1381 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0), |
1275 | .driver_data = IVT_PCI_UNCORE_IMC, | 1382 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 2), |
1276 | }, | 1383 | }, |
1277 | { /* MC0 Channel 4 */ | 1384 | { /* MC0 Channel 4 */ |
1278 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1), | 1385 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1), |
1279 | .driver_data = IVT_PCI_UNCORE_IMC, | 1386 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 3), |
1280 | }, | 1387 | }, |
1281 | { /* MC1 Channel 0 */ | 1388 | { /* MC1 Channel 0 */ |
1282 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4), | 1389 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4), |
1283 | .driver_data = IVT_PCI_UNCORE_IMC, | 1390 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 4), |
1284 | }, | 1391 | }, |
1285 | { /* MC1 Channel 1 */ | 1392 | { /* MC1 Channel 1 */ |
1286 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5), | 1393 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5), |
1287 | .driver_data = IVT_PCI_UNCORE_IMC, | 1394 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 5), |
1288 | }, | 1395 | }, |
1289 | { /* MC1 Channel 3 */ | 1396 | { /* MC1 Channel 3 */ |
1290 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0), | 1397 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0), |
1291 | .driver_data = IVT_PCI_UNCORE_IMC, | 1398 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 6), |
1292 | }, | 1399 | }, |
1293 | { /* MC1 Channel 4 */ | 1400 | { /* MC1 Channel 4 */ |
1294 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1), | 1401 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1), |
1295 | .driver_data = IVT_PCI_UNCORE_IMC, | 1402 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 7), |
1296 | }, | 1403 | }, |
1297 | { /* QPI0 Port 0 */ | 1404 | { /* QPI0 Port 0 */ |
1298 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32), | 1405 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32), |
1299 | .driver_data = IVT_PCI_UNCORE_QPI, | 1406 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 0), |
1300 | }, | 1407 | }, |
1301 | { /* QPI0 Port 1 */ | 1408 | { /* QPI0 Port 1 */ |
1302 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33), | 1409 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33), |
1303 | .driver_data = IVT_PCI_UNCORE_QPI, | 1410 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 1), |
1304 | }, | 1411 | }, |
1305 | { /* QPI1 Port 2 */ | 1412 | { /* QPI1 Port 2 */ |
1306 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a), | 1413 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a), |
1307 | .driver_data = IVT_PCI_UNCORE_QPI, | 1414 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 2), |
1308 | }, | 1415 | }, |
1309 | { /* R2PCIe */ | 1416 | { /* R2PCIe */ |
1310 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34), | 1417 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34), |
1311 | .driver_data = IVT_PCI_UNCORE_R2PCIE, | 1418 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R2PCIE, 0), |
1312 | }, | 1419 | }, |
1313 | { /* R3QPI0 Link 0 */ | 1420 | { /* R3QPI0 Link 0 */ |
1314 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36), | 1421 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36), |
1315 | .driver_data = IVT_PCI_UNCORE_R3QPI, | 1422 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 0), |
1316 | }, | 1423 | }, |
1317 | { /* R3QPI0 Link 1 */ | 1424 | { /* R3QPI0 Link 1 */ |
1318 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37), | 1425 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37), |
1319 | .driver_data = IVT_PCI_UNCORE_R3QPI, | 1426 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 1), |
1320 | }, | 1427 | }, |
1321 | { /* R3QPI1 Link 2 */ | 1428 | { /* R3QPI1 Link 2 */ |
1322 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e), | 1429 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e), |
1323 | .driver_data = IVT_PCI_UNCORE_R3QPI, | 1430 | .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 2), |
1324 | }, | 1431 | }, |
1325 | { /* end: all zeroes */ } | 1432 | { /* end: all zeroes */ } |
1326 | }; | 1433 | }; |
@@ -2606,7 +2713,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp | |||
2606 | 2713 | ||
2607 | size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); | 2714 | size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); |
2608 | 2715 | ||
2609 | box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu)); | 2716 | box = kzalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); |
2610 | if (!box) | 2717 | if (!box) |
2611 | return NULL; | 2718 | return NULL; |
2612 | 2719 | ||
@@ -3167,16 +3274,24 @@ static bool pcidrv_registered; | |||
3167 | /* | 3274 | /* |
3168 | * add a pci uncore device | 3275 | * add a pci uncore device |
3169 | */ | 3276 | */ |
3170 | static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev) | 3277 | static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) |
3171 | { | 3278 | { |
3172 | struct intel_uncore_pmu *pmu; | 3279 | struct intel_uncore_pmu *pmu; |
3173 | struct intel_uncore_box *box; | 3280 | struct intel_uncore_box *box; |
3174 | int i, phys_id; | 3281 | struct intel_uncore_type *type; |
3282 | int phys_id; | ||
3175 | 3283 | ||
3176 | phys_id = pcibus_to_physid[pdev->bus->number]; | 3284 | phys_id = pcibus_to_physid[pdev->bus->number]; |
3177 | if (phys_id < 0) | 3285 | if (phys_id < 0) |
3178 | return -ENODEV; | 3286 | return -ENODEV; |
3179 | 3287 | ||
3288 | if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { | ||
3289 | extra_pci_dev[phys_id][UNCORE_PCI_DEV_IDX(id->driver_data)] = pdev; | ||
3290 | pci_set_drvdata(pdev, NULL); | ||
3291 | return 0; | ||
3292 | } | ||
3293 | |||
3294 | type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; | ||
3180 | box = uncore_alloc_box(type, 0); | 3295 | box = uncore_alloc_box(type, 0); |
3181 | if (!box) | 3296 | if (!box) |
3182 | return -ENOMEM; | 3297 | return -ENOMEM; |
@@ -3185,21 +3300,11 @@ static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev) | |||
3185 | * for performance monitoring unit with multiple boxes, | 3300 | * for performance monitoring unit with multiple boxes, |
3186 | * each box has a different function id. | 3301 | * each box has a different function id. |
3187 | */ | 3302 | */ |
3188 | for (i = 0; i < type->num_boxes; i++) { | 3303 | pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; |
3189 | pmu = &type->pmus[i]; | 3304 | if (pmu->func_id < 0) |
3190 | if (pmu->func_id == pdev->devfn) | 3305 | pmu->func_id = pdev->devfn; |
3191 | break; | 3306 | else |
3192 | if (pmu->func_id < 0) { | 3307 | WARN_ON_ONCE(pmu->func_id != pdev->devfn); |
3193 | pmu->func_id = pdev->devfn; | ||
3194 | break; | ||
3195 | } | ||
3196 | pmu = NULL; | ||
3197 | } | ||
3198 | |||
3199 | if (!pmu) { | ||
3200 | kfree(box); | ||
3201 | return -EINVAL; | ||
3202 | } | ||
3203 | 3308 | ||
3204 | box->phys_id = phys_id; | 3309 | box->phys_id = phys_id; |
3205 | box->pci_dev = pdev; | 3310 | box->pci_dev = pdev; |
@@ -3217,9 +3322,22 @@ static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev) | |||
3217 | static void uncore_pci_remove(struct pci_dev *pdev) | 3322 | static void uncore_pci_remove(struct pci_dev *pdev) |
3218 | { | 3323 | { |
3219 | struct intel_uncore_box *box = pci_get_drvdata(pdev); | 3324 | struct intel_uncore_box *box = pci_get_drvdata(pdev); |
3220 | struct intel_uncore_pmu *pmu = box->pmu; | 3325 | struct intel_uncore_pmu *pmu; |
3221 | int cpu, phys_id = pcibus_to_physid[pdev->bus->number]; | 3326 | int i, cpu, phys_id = pcibus_to_physid[pdev->bus->number]; |
3222 | 3327 | ||
3328 | box = pci_get_drvdata(pdev); | ||
3329 | if (!box) { | ||
3330 | for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { | ||
3331 | if (extra_pci_dev[phys_id][i] == pdev) { | ||
3332 | extra_pci_dev[phys_id][i] = NULL; | ||
3333 | break; | ||
3334 | } | ||
3335 | } | ||
3336 | WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX); | ||
3337 | return; | ||
3338 | } | ||
3339 | |||
3340 | pmu = box->pmu; | ||
3223 | if (WARN_ON_ONCE(phys_id != box->phys_id)) | 3341 | if (WARN_ON_ONCE(phys_id != box->phys_id)) |
3224 | return; | 3342 | return; |
3225 | 3343 | ||
@@ -3240,12 +3358,6 @@ static void uncore_pci_remove(struct pci_dev *pdev) | |||
3240 | kfree(box); | 3358 | kfree(box); |
3241 | } | 3359 | } |
3242 | 3360 | ||
3243 | static int uncore_pci_probe(struct pci_dev *pdev, | ||
3244 | const struct pci_device_id *id) | ||
3245 | { | ||
3246 | return uncore_pci_add(pci_uncores[id->driver_data], pdev); | ||
3247 | } | ||
3248 | |||
3249 | static int __init uncore_pci_init(void) | 3361 | static int __init uncore_pci_init(void) |
3250 | { | 3362 | { |
3251 | int ret; | 3363 | int ret; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 47b3d00c9d89..a80ab71a883d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h | |||
@@ -12,6 +12,15 @@ | |||
12 | #define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC | 12 | #define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC |
13 | #define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1) | 13 | #define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1) |
14 | 14 | ||
15 | #define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx) | ||
16 | #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) | ||
17 | #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) | ||
18 | #define UNCORE_EXTRA_PCI_DEV 0xff | ||
19 | #define UNCORE_EXTRA_PCI_DEV_MAX 2 | ||
20 | |||
21 | /* support up to 8 sockets */ | ||
22 | #define UNCORE_SOCKET_MAX 8 | ||
23 | |||
15 | #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) | 24 | #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) |
16 | 25 | ||
17 | /* SNB event control */ | 26 | /* SNB event control */ |
@@ -108,6 +117,7 @@ | |||
108 | (SNBEP_PMON_CTL_EV_SEL_MASK | \ | 117 | (SNBEP_PMON_CTL_EV_SEL_MASK | \ |
109 | SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ | 118 | SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ |
110 | SNBEP_PMON_CTL_EDGE_DET | \ | 119 | SNBEP_PMON_CTL_EDGE_DET | \ |
120 | SNBEP_PMON_CTL_EV_SEL_EXT | \ | ||
111 | SNBEP_PMON_CTL_INVERT | \ | 121 | SNBEP_PMON_CTL_INVERT | \ |
112 | SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ | 122 | SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ |
113 | SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ | 123 | SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ |
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 7076878404ec..628a059a9a06 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -93,7 +93,7 @@ static void __init vmware_platform_setup(void) | |||
93 | * serial key should be enough, as this will always have a VMware | 93 | * serial key should be enough, as this will always have a VMware |
94 | * specific string when running under VMware hypervisor. | 94 | * specific string when running under VMware hypervisor. |
95 | */ | 95 | */ |
96 | static bool __init vmware_platform(void) | 96 | static uint32_t __init vmware_platform(void) |
97 | { | 97 | { |
98 | if (cpu_has_hypervisor) { | 98 | if (cpu_has_hypervisor) { |
99 | unsigned int eax; | 99 | unsigned int eax; |
@@ -102,12 +102,12 @@ static bool __init vmware_platform(void) | |||
102 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0], | 102 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0], |
103 | &hyper_vendor_id[1], &hyper_vendor_id[2]); | 103 | &hyper_vendor_id[1], &hyper_vendor_id[2]); |
104 | if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) | 104 | if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) |
105 | return true; | 105 | return CPUID_VMWARE_INFO_LEAF; |
106 | } else if (dmi_available && dmi_name_in_serial("VMware") && | 106 | } else if (dmi_available && dmi_name_in_serial("VMware") && |
107 | __vmware_platform()) | 107 | __vmware_platform()) |
108 | return true; | 108 | return 1; |
109 | 109 | ||
110 | return false; | 110 | return 0; |
111 | } | 111 | } |
112 | 112 | ||
113 | /* | 113 | /* |
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 74467feb4dc5..e0e0841eef45 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -128,7 +128,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs) | |||
128 | cpu_emergency_svm_disable(); | 128 | cpu_emergency_svm_disable(); |
129 | 129 | ||
130 | lapic_shutdown(); | 130 | lapic_shutdown(); |
131 | #if defined(CONFIG_X86_IO_APIC) | 131 | #ifdef CONFIG_X86_IO_APIC |
132 | /* Prevent crash_kexec() from deadlocking on ioapic_lock. */ | ||
133 | ioapic_zap_locks(); | ||
132 | disable_IO_APIC(); | 134 | disable_IO_APIC(); |
133 | #endif | 135 | #endif |
134 | #ifdef CONFIG_HPET_TIMER | 136 | #ifdef CONFIG_HPET_TIMER |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index d32abeabbda5..174da5fc5a7b 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -658,15 +658,18 @@ __init void e820_setup_gap(void) | |||
658 | * boot_params.e820_map, others are passed via SETUP_E820_EXT node of | 658 | * boot_params.e820_map, others are passed via SETUP_E820_EXT node of |
659 | * linked list of struct setup_data, which is parsed here. | 659 | * linked list of struct setup_data, which is parsed here. |
660 | */ | 660 | */ |
661 | void __init parse_e820_ext(struct setup_data *sdata) | 661 | void __init parse_e820_ext(u64 phys_addr, u32 data_len) |
662 | { | 662 | { |
663 | int entries; | 663 | int entries; |
664 | struct e820entry *extmap; | 664 | struct e820entry *extmap; |
665 | struct setup_data *sdata; | ||
665 | 666 | ||
667 | sdata = early_memremap(phys_addr, data_len); | ||
666 | entries = sdata->len / sizeof(struct e820entry); | 668 | entries = sdata->len / sizeof(struct e820entry); |
667 | extmap = (struct e820entry *)(sdata->data); | 669 | extmap = (struct e820entry *)(sdata->data); |
668 | __append_e820_map(extmap, entries); | 670 | __append_e820_map(extmap, entries); |
669 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 671 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
672 | early_iounmap(sdata, data_len); | ||
670 | printk(KERN_INFO "e820: extended physical RAM map:\n"); | 673 | printk(KERN_INFO "e820: extended physical RAM map:\n"); |
671 | e820_print_map("extended"); | 674 | e820_print_map("extended"); |
672 | } | 675 | } |
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 94ab6b90dd3f..63bdb29b2549 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
@@ -196,15 +196,23 @@ static void __init ati_bugs_contd(int num, int slot, int func) | |||
196 | static void __init intel_remapping_check(int num, int slot, int func) | 196 | static void __init intel_remapping_check(int num, int slot, int func) |
197 | { | 197 | { |
198 | u8 revision; | 198 | u8 revision; |
199 | u16 device; | ||
199 | 200 | ||
201 | device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID); | ||
200 | revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID); | 202 | revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID); |
201 | 203 | ||
202 | /* | 204 | /* |
203 | * Revision 0x13 of this chipset supports irq remapping | 205 | * Revision 13 of all triggering devices id in this quirk have |
204 | * but has an erratum that breaks its behavior, flag it as such | 206 | * a problem draining interrupts when irq remapping is enabled, |
207 | * and should be flagged as broken. Additionally revisions 0x12 | ||
208 | * and 0x22 of device id 0x3405 has this problem. | ||
205 | */ | 209 | */ |
206 | if (revision == 0x13) | 210 | if (revision == 0x13) |
207 | set_irq_remapping_broken(); | 211 | set_irq_remapping_broken(); |
212 | else if ((device == 0x3405) && | ||
213 | ((revision == 0x12) || | ||
214 | (revision == 0x22))) | ||
215 | set_irq_remapping_broken(); | ||
208 | 216 | ||
209 | } | 217 | } |
210 | 218 | ||
@@ -239,6 +247,8 @@ static struct chipset early_qrk[] __initdata = { | |||
239 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, | 247 | PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, |
240 | { PCI_VENDOR_ID_INTEL, 0x3403, PCI_CLASS_BRIDGE_HOST, | 248 | { PCI_VENDOR_ID_INTEL, 0x3403, PCI_CLASS_BRIDGE_HOST, |
241 | PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, | 249 | PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, |
250 | { PCI_VENDOR_ID_INTEL, 0x3405, PCI_CLASS_BRIDGE_HOST, | ||
251 | PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, | ||
242 | { PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST, | 252 | { PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST, |
243 | PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, | 253 | PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, |
244 | {} | 254 | {} |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 138463a24877..06f87bece92a 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -29,7 +29,7 @@ static void __init i386_default_early_setup(void) | |||
29 | reserve_ebda_region(); | 29 | reserve_ebda_region(); |
30 | } | 30 | } |
31 | 31 | ||
32 | void __init i386_start_kernel(void) | 32 | asmlinkage void __init i386_start_kernel(void) |
33 | { | 33 | { |
34 | sanitize_boot_params(&boot_params); | 34 | sanitize_boot_params(&boot_params); |
35 | 35 | ||
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 55b67614ed94..1be8e43b669e 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -137,7 +137,7 @@ static void __init copy_bootdata(char *real_mode_data) | |||
137 | } | 137 | } |
138 | } | 138 | } |
139 | 139 | ||
140 | void __init x86_64_start_kernel(char * real_mode_data) | 140 | asmlinkage void __init x86_64_start_kernel(char * real_mode_data) |
141 | { | 141 | { |
142 | int i; | 142 | int i; |
143 | 143 | ||
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 5dd87a89f011..81ba27679f18 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -409,6 +409,7 @@ enable_paging: | |||
409 | /* | 409 | /* |
410 | * Check if it is 486 | 410 | * Check if it is 486 |
411 | */ | 411 | */ |
412 | movb $4,X86 # at least 486 | ||
412 | cmpl $-1,X86_CPUID | 413 | cmpl $-1,X86_CPUID |
413 | je is486 | 414 | je is486 |
414 | 415 | ||
@@ -436,7 +437,6 @@ enable_paging: | |||
436 | movl %edx,X86_CAPABILITY | 437 | movl %edx,X86_CAPABILITY |
437 | 438 | ||
438 | is486: | 439 | is486: |
439 | movb $4,X86 | ||
440 | movl $0x50022,%ecx # set AM, WP, NE and MP | 440 | movl $0x50022,%ecx # set AM, WP, NE and MP |
441 | movl %cr0,%eax | 441 | movl %cr0,%eax |
442 | andl $0x80000011,%eax # Save PG,PE,ET | 442 | andl $0x80000011,%eax # Save PG,PE,ET |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 202d24f0f7e7..5d576ab34403 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -116,7 +116,7 @@ static void mxcsr_feature_mask_init(void) | |||
116 | 116 | ||
117 | if (cpu_has_fxsr) { | 117 | if (cpu_has_fxsr) { |
118 | memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); | 118 | memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); |
119 | asm volatile("fxsave %0" : : "m" (fx_scratch)); | 119 | asm volatile("fxsave %0" : "+m" (fx_scratch)); |
120 | mask = fx_scratch.mxcsr_mask; | 120 | mask = fx_scratch.mxcsr_mask; |
121 | if (mask == 0) | 121 | if (mask == 0) |
122 | mask = 0x0000ffbf; | 122 | mask = 0x0000ffbf; |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3a8185c042a2..22d0687e7fda 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -177,7 +177,7 @@ u64 arch_irq_stat(void) | |||
177 | * SMP cross-CPU interrupts have their own specific | 177 | * SMP cross-CPU interrupts have their own specific |
178 | * handlers). | 178 | * handlers). |
179 | */ | 179 | */ |
180 | unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | 180 | __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) |
181 | { | 181 | { |
182 | struct pt_regs *old_regs = set_irq_regs(regs); | 182 | struct pt_regs *old_regs = set_irq_regs(regs); |
183 | 183 | ||
@@ -215,7 +215,7 @@ void __smp_x86_platform_ipi(void) | |||
215 | x86_platform_ipi_callback(); | 215 | x86_platform_ipi_callback(); |
216 | } | 216 | } |
217 | 217 | ||
218 | void smp_x86_platform_ipi(struct pt_regs *regs) | 218 | __visible void smp_x86_platform_ipi(struct pt_regs *regs) |
219 | { | 219 | { |
220 | struct pt_regs *old_regs = set_irq_regs(regs); | 220 | struct pt_regs *old_regs = set_irq_regs(regs); |
221 | 221 | ||
@@ -229,7 +229,7 @@ void smp_x86_platform_ipi(struct pt_regs *regs) | |||
229 | /* | 229 | /* |
230 | * Handler for POSTED_INTERRUPT_VECTOR. | 230 | * Handler for POSTED_INTERRUPT_VECTOR. |
231 | */ | 231 | */ |
232 | void smp_kvm_posted_intr_ipi(struct pt_regs *regs) | 232 | __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs) |
233 | { | 233 | { |
234 | struct pt_regs *old_regs = set_irq_regs(regs); | 234 | struct pt_regs *old_regs = set_irq_regs(regs); |
235 | 235 | ||
@@ -247,7 +247,7 @@ void smp_kvm_posted_intr_ipi(struct pt_regs *regs) | |||
247 | } | 247 | } |
248 | #endif | 248 | #endif |
249 | 249 | ||
250 | void smp_trace_x86_platform_ipi(struct pt_regs *regs) | 250 | __visible void smp_trace_x86_platform_ipi(struct pt_regs *regs) |
251 | { | 251 | { |
252 | struct pt_regs *old_regs = set_irq_regs(regs); | 252 | struct pt_regs *old_regs = set_irq_regs(regs); |
253 | 253 | ||
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index 636a55e4a13c..1de84e3ab4e0 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c | |||
@@ -22,14 +22,14 @@ static inline void __smp_irq_work_interrupt(void) | |||
22 | irq_work_run(); | 22 | irq_work_run(); |
23 | } | 23 | } |
24 | 24 | ||
25 | void smp_irq_work_interrupt(struct pt_regs *regs) | 25 | __visible void smp_irq_work_interrupt(struct pt_regs *regs) |
26 | { | 26 | { |
27 | irq_work_entering_irq(); | 27 | irq_work_entering_irq(); |
28 | __smp_irq_work_interrupt(); | 28 | __smp_irq_work_interrupt(); |
29 | exiting_irq(); | 29 | exiting_irq(); |
30 | } | 30 | } |
31 | 31 | ||
32 | void smp_trace_irq_work_interrupt(struct pt_regs *regs) | 32 | __visible void smp_trace_irq_work_interrupt(struct pt_regs *regs) |
33 | { | 33 | { |
34 | irq_work_entering_irq(); | 34 | irq_work_entering_irq(); |
35 | trace_irq_work_entry(IRQ_WORK_VECTOR); | 35 | trace_irq_work_entry(IRQ_WORK_VECTOR); |
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 2889b3d43882..460f5d9ceebb 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c | |||
@@ -37,7 +37,19 @@ static void __jump_label_transform(struct jump_entry *entry, | |||
37 | } else | 37 | } else |
38 | memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE); | 38 | memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE); |
39 | 39 | ||
40 | (*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); | 40 | /* |
41 | * Make text_poke_bp() a default fallback poker. | ||
42 | * | ||
43 | * At the time the change is being done, just ignore whether we | ||
44 | * are doing nop -> jump or jump -> nop transition, and assume | ||
45 | * always nop being the 'currently valid' instruction | ||
46 | * | ||
47 | */ | ||
48 | if (poker) | ||
49 | (*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); | ||
50 | else | ||
51 | text_poke_bp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE, | ||
52 | (void *)entry->code + JUMP_LABEL_NOP_SIZE); | ||
41 | } | 53 | } |
42 | 54 | ||
43 | void arch_jump_label_transform(struct jump_entry *entry, | 55 | void arch_jump_label_transform(struct jump_entry *entry, |
@@ -45,7 +57,7 @@ void arch_jump_label_transform(struct jump_entry *entry, | |||
45 | { | 57 | { |
46 | get_online_cpus(); | 58 | get_online_cpus(); |
47 | mutex_lock(&text_mutex); | 59 | mutex_lock(&text_mutex); |
48 | __jump_label_transform(entry, type, text_poke_smp); | 60 | __jump_label_transform(entry, type, NULL); |
49 | mutex_unlock(&text_mutex); | 61 | mutex_unlock(&text_mutex); |
50 | put_online_cpus(); | 62 | put_online_cpus(); |
51 | } | 63 | } |
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index 2e9d4b5af036..c6ee63f927ab 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h | |||
@@ -82,14 +82,9 @@ extern void synthesize_reljump(void *from, void *to); | |||
82 | extern void synthesize_relcall(void *from, void *to); | 82 | extern void synthesize_relcall(void *from, void *to); |
83 | 83 | ||
84 | #ifdef CONFIG_OPTPROBES | 84 | #ifdef CONFIG_OPTPROBES |
85 | extern int arch_init_optprobes(void); | ||
86 | extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter); | 85 | extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter); |
87 | extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr); | 86 | extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr); |
88 | #else /* !CONFIG_OPTPROBES */ | 87 | #else /* !CONFIG_OPTPROBES */ |
89 | static inline int arch_init_optprobes(void) | ||
90 | { | ||
91 | return 0; | ||
92 | } | ||
93 | static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) | 88 | static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) |
94 | { | 89 | { |
95 | return 0; | 90 | return 0; |
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 211bce445522..79a3f9682871 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c | |||
@@ -661,7 +661,7 @@ static void __used __kprobes kretprobe_trampoline_holder(void) | |||
661 | /* | 661 | /* |
662 | * Called from kretprobe_trampoline | 662 | * Called from kretprobe_trampoline |
663 | */ | 663 | */ |
664 | static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | 664 | __visible __used __kprobes void *trampoline_handler(struct pt_regs *regs) |
665 | { | 665 | { |
666 | struct kretprobe_instance *ri = NULL; | 666 | struct kretprobe_instance *ri = NULL; |
667 | struct hlist_head *head, empty_rp; | 667 | struct hlist_head *head, empty_rp; |
@@ -1068,7 +1068,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
1068 | 1068 | ||
1069 | int __init arch_init_kprobes(void) | 1069 | int __init arch_init_kprobes(void) |
1070 | { | 1070 | { |
1071 | return arch_init_optprobes(); | 1071 | return 0; |
1072 | } | 1072 | } |
1073 | 1073 | ||
1074 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) | 1074 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) |
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 76dc6f095724..898160b42e43 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c | |||
@@ -88,9 +88,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long v | |||
88 | *(unsigned long *)addr = val; | 88 | *(unsigned long *)addr = val; |
89 | } | 89 | } |
90 | 90 | ||
91 | static void __used __kprobes kprobes_optinsn_template_holder(void) | 91 | asm ( |
92 | { | ||
93 | asm volatile ( | ||
94 | ".global optprobe_template_entry\n" | 92 | ".global optprobe_template_entry\n" |
95 | "optprobe_template_entry:\n" | 93 | "optprobe_template_entry:\n" |
96 | #ifdef CONFIG_X86_64 | 94 | #ifdef CONFIG_X86_64 |
@@ -129,7 +127,6 @@ static void __used __kprobes kprobes_optinsn_template_holder(void) | |||
129 | #endif | 127 | #endif |
130 | ".global optprobe_template_end\n" | 128 | ".global optprobe_template_end\n" |
131 | "optprobe_template_end:\n"); | 129 | "optprobe_template_end:\n"); |
132 | } | ||
133 | 130 | ||
134 | #define TMPL_MOVE_IDX \ | 131 | #define TMPL_MOVE_IDX \ |
135 | ((long)&optprobe_template_val - (long)&optprobe_template_entry) | 132 | ((long)&optprobe_template_val - (long)&optprobe_template_entry) |
@@ -371,31 +368,6 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | |||
371 | return 0; | 368 | return 0; |
372 | } | 369 | } |
373 | 370 | ||
374 | #define MAX_OPTIMIZE_PROBES 256 | ||
375 | static struct text_poke_param *jump_poke_params; | ||
376 | static struct jump_poke_buffer { | ||
377 | u8 buf[RELATIVEJUMP_SIZE]; | ||
378 | } *jump_poke_bufs; | ||
379 | |||
380 | static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, | ||
381 | u8 *insn_buf, | ||
382 | struct optimized_kprobe *op) | ||
383 | { | ||
384 | s32 rel = (s32)((long)op->optinsn.insn - | ||
385 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | ||
386 | |||
387 | /* Backup instructions which will be replaced by jump address */ | ||
388 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, | ||
389 | RELATIVE_ADDR_SIZE); | ||
390 | |||
391 | insn_buf[0] = RELATIVEJUMP_OPCODE; | ||
392 | *(s32 *)(&insn_buf[1]) = rel; | ||
393 | |||
394 | tprm->addr = op->kp.addr; | ||
395 | tprm->opcode = insn_buf; | ||
396 | tprm->len = RELATIVEJUMP_SIZE; | ||
397 | } | ||
398 | |||
399 | /* | 371 | /* |
400 | * Replace breakpoints (int3) with relative jumps. | 372 | * Replace breakpoints (int3) with relative jumps. |
401 | * Caller must call with locking kprobe_mutex and text_mutex. | 373 | * Caller must call with locking kprobe_mutex and text_mutex. |
@@ -403,37 +375,38 @@ static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, | |||
403 | void __kprobes arch_optimize_kprobes(struct list_head *oplist) | 375 | void __kprobes arch_optimize_kprobes(struct list_head *oplist) |
404 | { | 376 | { |
405 | struct optimized_kprobe *op, *tmp; | 377 | struct optimized_kprobe *op, *tmp; |
406 | int c = 0; | 378 | u8 insn_buf[RELATIVEJUMP_SIZE]; |
407 | 379 | ||
408 | list_for_each_entry_safe(op, tmp, oplist, list) { | 380 | list_for_each_entry_safe(op, tmp, oplist, list) { |
381 | s32 rel = (s32)((long)op->optinsn.insn - | ||
382 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | ||
383 | |||
409 | WARN_ON(kprobe_disabled(&op->kp)); | 384 | WARN_ON(kprobe_disabled(&op->kp)); |
410 | /* Setup param */ | 385 | |
411 | setup_optimize_kprobe(&jump_poke_params[c], | 386 | /* Backup instructions which will be replaced by jump address */ |
412 | jump_poke_bufs[c].buf, op); | 387 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, |
388 | RELATIVE_ADDR_SIZE); | ||
389 | |||
390 | insn_buf[0] = RELATIVEJUMP_OPCODE; | ||
391 | *(s32 *)(&insn_buf[1]) = rel; | ||
392 | |||
393 | text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, | ||
394 | op->optinsn.insn); | ||
395 | |||
413 | list_del_init(&op->list); | 396 | list_del_init(&op->list); |
414 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
415 | break; | ||
416 | } | 397 | } |
417 | |||
418 | /* | ||
419 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
420 | * However, since kprobes itself also doesn't support NMI/MCE | ||
421 | * code probing, it's not a problem. | ||
422 | */ | ||
423 | text_poke_smp_batch(jump_poke_params, c); | ||
424 | } | 398 | } |
425 | 399 | ||
426 | static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, | 400 | /* Replace a relative jump with a breakpoint (int3). */ |
427 | u8 *insn_buf, | 401 | void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) |
428 | struct optimized_kprobe *op) | ||
429 | { | 402 | { |
403 | u8 insn_buf[RELATIVEJUMP_SIZE]; | ||
404 | |||
430 | /* Set int3 to first byte for kprobes */ | 405 | /* Set int3 to first byte for kprobes */ |
431 | insn_buf[0] = BREAKPOINT_INSTRUCTION; | 406 | insn_buf[0] = BREAKPOINT_INSTRUCTION; |
432 | memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | 407 | memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); |
433 | 408 | text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, | |
434 | tprm->addr = op->kp.addr; | 409 | op->optinsn.insn); |
435 | tprm->opcode = insn_buf; | ||
436 | tprm->len = RELATIVEJUMP_SIZE; | ||
437 | } | 410 | } |
438 | 411 | ||
439 | /* | 412 | /* |
@@ -444,34 +417,11 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist, | |||
444 | struct list_head *done_list) | 417 | struct list_head *done_list) |
445 | { | 418 | { |
446 | struct optimized_kprobe *op, *tmp; | 419 | struct optimized_kprobe *op, *tmp; |
447 | int c = 0; | ||
448 | 420 | ||
449 | list_for_each_entry_safe(op, tmp, oplist, list) { | 421 | list_for_each_entry_safe(op, tmp, oplist, list) { |
450 | /* Setup param */ | 422 | arch_unoptimize_kprobe(op); |
451 | setup_unoptimize_kprobe(&jump_poke_params[c], | ||
452 | jump_poke_bufs[c].buf, op); | ||
453 | list_move(&op->list, done_list); | 423 | list_move(&op->list, done_list); |
454 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
455 | break; | ||
456 | } | 424 | } |
457 | |||
458 | /* | ||
459 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
460 | * However, since kprobes itself also doesn't support NMI/MCE | ||
461 | * code probing, it's not a problem. | ||
462 | */ | ||
463 | text_poke_smp_batch(jump_poke_params, c); | ||
464 | } | ||
465 | |||
466 | /* Replace a relative jump with a breakpoint (int3). */ | ||
467 | void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) | ||
468 | { | ||
469 | u8 buf[RELATIVEJUMP_SIZE]; | ||
470 | |||
471 | /* Set int3 to first byte for kprobes */ | ||
472 | buf[0] = BREAKPOINT_INSTRUCTION; | ||
473 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
474 | text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); | ||
475 | } | 425 | } |
476 | 426 | ||
477 | int __kprobes | 427 | int __kprobes |
@@ -491,22 +441,3 @@ setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) | |||
491 | } | 441 | } |
492 | return 0; | 442 | return 0; |
493 | } | 443 | } |
494 | |||
495 | int __kprobes arch_init_optprobes(void) | ||
496 | { | ||
497 | /* Allocate code buffer and parameter array */ | ||
498 | jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * | ||
499 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
500 | if (!jump_poke_bufs) | ||
501 | return -ENOMEM; | ||
502 | |||
503 | jump_poke_params = kmalloc(sizeof(struct text_poke_param) * | ||
504 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
505 | if (!jump_poke_params) { | ||
506 | kfree(jump_poke_bufs); | ||
507 | jump_poke_bufs = NULL; | ||
508 | return -ENOMEM; | ||
509 | } | ||
510 | |||
511 | return 0; | ||
512 | } | ||
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a96d32cc55b8..697b93af02dd 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/sched.h> | 34 | #include <linux/sched.h> |
35 | #include <linux/slab.h> | 35 | #include <linux/slab.h> |
36 | #include <linux/kprobes.h> | 36 | #include <linux/kprobes.h> |
37 | #include <linux/debugfs.h> | ||
37 | #include <asm/timer.h> | 38 | #include <asm/timer.h> |
38 | #include <asm/cpu.h> | 39 | #include <asm/cpu.h> |
39 | #include <asm/traps.h> | 40 | #include <asm/traps.h> |
@@ -419,6 +420,7 @@ static void __init kvm_smp_prepare_boot_cpu(void) | |||
419 | WARN_ON(kvm_register_clock("primary cpu clock")); | 420 | WARN_ON(kvm_register_clock("primary cpu clock")); |
420 | kvm_guest_cpu_init(); | 421 | kvm_guest_cpu_init(); |
421 | native_smp_prepare_boot_cpu(); | 422 | native_smp_prepare_boot_cpu(); |
423 | kvm_spinlock_init(); | ||
422 | } | 424 | } |
423 | 425 | ||
424 | static void kvm_guest_cpu_online(void *dummy) | 426 | static void kvm_guest_cpu_online(void *dummy) |
@@ -498,11 +500,9 @@ void __init kvm_guest_init(void) | |||
498 | #endif | 500 | #endif |
499 | } | 501 | } |
500 | 502 | ||
501 | static bool __init kvm_detect(void) | 503 | static uint32_t __init kvm_detect(void) |
502 | { | 504 | { |
503 | if (!kvm_para_available()) | 505 | return kvm_cpuid_base(); |
504 | return false; | ||
505 | return true; | ||
506 | } | 506 | } |
507 | 507 | ||
508 | const struct hypervisor_x86 x86_hyper_kvm __refconst = { | 508 | const struct hypervisor_x86 x86_hyper_kvm __refconst = { |
@@ -523,3 +523,263 @@ static __init int activate_jump_labels(void) | |||
523 | return 0; | 523 | return 0; |
524 | } | 524 | } |
525 | arch_initcall(activate_jump_labels); | 525 | arch_initcall(activate_jump_labels); |
526 | |||
527 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
528 | |||
529 | /* Kick a cpu by its apicid. Used to wake up a halted vcpu */ | ||
530 | static void kvm_kick_cpu(int cpu) | ||
531 | { | ||
532 | int apicid; | ||
533 | unsigned long flags = 0; | ||
534 | |||
535 | apicid = per_cpu(x86_cpu_to_apicid, cpu); | ||
536 | kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid); | ||
537 | } | ||
538 | |||
539 | enum kvm_contention_stat { | ||
540 | TAKEN_SLOW, | ||
541 | TAKEN_SLOW_PICKUP, | ||
542 | RELEASED_SLOW, | ||
543 | RELEASED_SLOW_KICKED, | ||
544 | NR_CONTENTION_STATS | ||
545 | }; | ||
546 | |||
547 | #ifdef CONFIG_KVM_DEBUG_FS | ||
548 | #define HISTO_BUCKETS 30 | ||
549 | |||
550 | static struct kvm_spinlock_stats | ||
551 | { | ||
552 | u32 contention_stats[NR_CONTENTION_STATS]; | ||
553 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; | ||
554 | u64 time_blocked; | ||
555 | } spinlock_stats; | ||
556 | |||
557 | static u8 zero_stats; | ||
558 | |||
559 | static inline void check_zero(void) | ||
560 | { | ||
561 | u8 ret; | ||
562 | u8 old; | ||
563 | |||
564 | old = ACCESS_ONCE(zero_stats); | ||
565 | if (unlikely(old)) { | ||
566 | ret = cmpxchg(&zero_stats, old, 0); | ||
567 | /* This ensures only one fellow resets the stat */ | ||
568 | if (ret == old) | ||
569 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | ||
570 | } | ||
571 | } | ||
572 | |||
573 | static inline void add_stats(enum kvm_contention_stat var, u32 val) | ||
574 | { | ||
575 | check_zero(); | ||
576 | spinlock_stats.contention_stats[var] += val; | ||
577 | } | ||
578 | |||
579 | |||
580 | static inline u64 spin_time_start(void) | ||
581 | { | ||
582 | return sched_clock(); | ||
583 | } | ||
584 | |||
585 | static void __spin_time_accum(u64 delta, u32 *array) | ||
586 | { | ||
587 | unsigned index; | ||
588 | |||
589 | index = ilog2(delta); | ||
590 | check_zero(); | ||
591 | |||
592 | if (index < HISTO_BUCKETS) | ||
593 | array[index]++; | ||
594 | else | ||
595 | array[HISTO_BUCKETS]++; | ||
596 | } | ||
597 | |||
598 | static inline void spin_time_accum_blocked(u64 start) | ||
599 | { | ||
600 | u32 delta; | ||
601 | |||
602 | delta = sched_clock() - start; | ||
603 | __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); | ||
604 | spinlock_stats.time_blocked += delta; | ||
605 | } | ||
606 | |||
607 | static struct dentry *d_spin_debug; | ||
608 | static struct dentry *d_kvm_debug; | ||
609 | |||
610 | struct dentry *kvm_init_debugfs(void) | ||
611 | { | ||
612 | d_kvm_debug = debugfs_create_dir("kvm", NULL); | ||
613 | if (!d_kvm_debug) | ||
614 | printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n"); | ||
615 | |||
616 | return d_kvm_debug; | ||
617 | } | ||
618 | |||
619 | static int __init kvm_spinlock_debugfs(void) | ||
620 | { | ||
621 | struct dentry *d_kvm; | ||
622 | |||
623 | d_kvm = kvm_init_debugfs(); | ||
624 | if (d_kvm == NULL) | ||
625 | return -ENOMEM; | ||
626 | |||
627 | d_spin_debug = debugfs_create_dir("spinlocks", d_kvm); | ||
628 | |||
629 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); | ||
630 | |||
631 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, | ||
632 | &spinlock_stats.contention_stats[TAKEN_SLOW]); | ||
633 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, | ||
634 | &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]); | ||
635 | |||
636 | debugfs_create_u32("released_slow", 0444, d_spin_debug, | ||
637 | &spinlock_stats.contention_stats[RELEASED_SLOW]); | ||
638 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, | ||
639 | &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]); | ||
640 | |||
641 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, | ||
642 | &spinlock_stats.time_blocked); | ||
643 | |||
644 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, | ||
645 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); | ||
646 | |||
647 | return 0; | ||
648 | } | ||
649 | fs_initcall(kvm_spinlock_debugfs); | ||
650 | #else /* !CONFIG_KVM_DEBUG_FS */ | ||
651 | static inline void add_stats(enum kvm_contention_stat var, u32 val) | ||
652 | { | ||
653 | } | ||
654 | |||
655 | static inline u64 spin_time_start(void) | ||
656 | { | ||
657 | return 0; | ||
658 | } | ||
659 | |||
660 | static inline void spin_time_accum_blocked(u64 start) | ||
661 | { | ||
662 | } | ||
663 | #endif /* CONFIG_KVM_DEBUG_FS */ | ||
664 | |||
665 | struct kvm_lock_waiting { | ||
666 | struct arch_spinlock *lock; | ||
667 | __ticket_t want; | ||
668 | }; | ||
669 | |||
670 | /* cpus 'waiting' on a spinlock to become available */ | ||
671 | static cpumask_t waiting_cpus; | ||
672 | |||
673 | /* Track spinlock on which a cpu is waiting */ | ||
674 | static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting); | ||
675 | |||
676 | static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | ||
677 | { | ||
678 | struct kvm_lock_waiting *w; | ||
679 | int cpu; | ||
680 | u64 start; | ||
681 | unsigned long flags; | ||
682 | |||
683 | if (in_nmi()) | ||
684 | return; | ||
685 | |||
686 | w = &__get_cpu_var(klock_waiting); | ||
687 | cpu = smp_processor_id(); | ||
688 | start = spin_time_start(); | ||
689 | |||
690 | /* | ||
691 | * Make sure an interrupt handler can't upset things in a | ||
692 | * partially setup state. | ||
693 | */ | ||
694 | local_irq_save(flags); | ||
695 | |||
696 | /* | ||
697 | * The ordering protocol on this is that the "lock" pointer | ||
698 | * may only be set non-NULL if the "want" ticket is correct. | ||
699 | * If we're updating "want", we must first clear "lock". | ||
700 | */ | ||
701 | w->lock = NULL; | ||
702 | smp_wmb(); | ||
703 | w->want = want; | ||
704 | smp_wmb(); | ||
705 | w->lock = lock; | ||
706 | |||
707 | add_stats(TAKEN_SLOW, 1); | ||
708 | |||
709 | /* | ||
710 | * This uses set_bit, which is atomic but we should not rely on its | ||
711 | * reordering gurantees. So barrier is needed after this call. | ||
712 | */ | ||
713 | cpumask_set_cpu(cpu, &waiting_cpus); | ||
714 | |||
715 | barrier(); | ||
716 | |||
717 | /* | ||
718 | * Mark entry to slowpath before doing the pickup test to make | ||
719 | * sure we don't deadlock with an unlocker. | ||
720 | */ | ||
721 | __ticket_enter_slowpath(lock); | ||
722 | |||
723 | /* | ||
724 | * check again make sure it didn't become free while | ||
725 | * we weren't looking. | ||
726 | */ | ||
727 | if (ACCESS_ONCE(lock->tickets.head) == want) { | ||
728 | add_stats(TAKEN_SLOW_PICKUP, 1); | ||
729 | goto out; | ||
730 | } | ||
731 | |||
732 | /* | ||
733 | * halt until it's our turn and kicked. Note that we do safe halt | ||
734 | * for irq enabled case to avoid hang when lock info is overwritten | ||
735 | * in irq spinlock slowpath and no spurious interrupt occur to save us. | ||
736 | */ | ||
737 | if (arch_irqs_disabled_flags(flags)) | ||
738 | halt(); | ||
739 | else | ||
740 | safe_halt(); | ||
741 | |||
742 | out: | ||
743 | cpumask_clear_cpu(cpu, &waiting_cpus); | ||
744 | w->lock = NULL; | ||
745 | local_irq_restore(flags); | ||
746 | spin_time_accum_blocked(start); | ||
747 | } | ||
748 | PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning); | ||
749 | |||
750 | /* Kick vcpu waiting on @lock->head to reach value @ticket */ | ||
751 | static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket) | ||
752 | { | ||
753 | int cpu; | ||
754 | |||
755 | add_stats(RELEASED_SLOW, 1); | ||
756 | for_each_cpu(cpu, &waiting_cpus) { | ||
757 | const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu); | ||
758 | if (ACCESS_ONCE(w->lock) == lock && | ||
759 | ACCESS_ONCE(w->want) == ticket) { | ||
760 | add_stats(RELEASED_SLOW_KICKED, 1); | ||
761 | kvm_kick_cpu(cpu); | ||
762 | break; | ||
763 | } | ||
764 | } | ||
765 | } | ||
766 | |||
767 | /* | ||
768 | * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. | ||
769 | */ | ||
770 | void __init kvm_spinlock_init(void) | ||
771 | { | ||
772 | if (!kvm_para_available()) | ||
773 | return; | ||
774 | /* Does host kernel support KVM_FEATURE_PV_UNHALT? */ | ||
775 | if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) | ||
776 | return; | ||
777 | |||
778 | printk(KERN_INFO "KVM setup paravirtual spinlock\n"); | ||
779 | |||
780 | static_key_slow_inc(¶virt_ticketlocks_enabled); | ||
781 | |||
782 | pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); | ||
783 | pv_lock_ops.unlock_kick = kvm_unlock_kick; | ||
784 | } | ||
785 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 47ebb1dbfbcb..7123b5df479d 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -145,10 +145,9 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | |||
145 | return 0; | 145 | return 0; |
146 | } | 146 | } |
147 | 147 | ||
148 | static unsigned int verify_patch_size(int cpu, u32 patch_size, | 148 | static unsigned int verify_patch_size(u8 family, u32 patch_size, |
149 | unsigned int size) | 149 | unsigned int size) |
150 | { | 150 | { |
151 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
152 | u32 max_size; | 151 | u32 max_size; |
153 | 152 | ||
154 | #define F1XH_MPB_MAX_SIZE 2048 | 153 | #define F1XH_MPB_MAX_SIZE 2048 |
@@ -156,7 +155,7 @@ static unsigned int verify_patch_size(int cpu, u32 patch_size, | |||
156 | #define F15H_MPB_MAX_SIZE 4096 | 155 | #define F15H_MPB_MAX_SIZE 4096 |
157 | #define F16H_MPB_MAX_SIZE 3458 | 156 | #define F16H_MPB_MAX_SIZE 3458 |
158 | 157 | ||
159 | switch (c->x86) { | 158 | switch (family) { |
160 | case 0x14: | 159 | case 0x14: |
161 | max_size = F14H_MPB_MAX_SIZE; | 160 | max_size = F14H_MPB_MAX_SIZE; |
162 | break; | 161 | break; |
@@ -220,12 +219,13 @@ int apply_microcode_amd(int cpu) | |||
220 | return 0; | 219 | return 0; |
221 | } | 220 | } |
222 | 221 | ||
223 | if (__apply_microcode_amd(mc_amd)) | 222 | if (__apply_microcode_amd(mc_amd)) { |
224 | pr_err("CPU%d: update failed for patch_level=0x%08x\n", | 223 | pr_err("CPU%d: update failed for patch_level=0x%08x\n", |
225 | cpu, mc_amd->hdr.patch_id); | 224 | cpu, mc_amd->hdr.patch_id); |
226 | else | 225 | return -1; |
227 | pr_info("CPU%d: new patch_level=0x%08x\n", cpu, | 226 | } |
228 | mc_amd->hdr.patch_id); | 227 | pr_info("CPU%d: new patch_level=0x%08x\n", cpu, |
228 | mc_amd->hdr.patch_id); | ||
229 | 229 | ||
230 | uci->cpu_sig.rev = mc_amd->hdr.patch_id; | 230 | uci->cpu_sig.rev = mc_amd->hdr.patch_id; |
231 | c->microcode = mc_amd->hdr.patch_id; | 231 | c->microcode = mc_amd->hdr.patch_id; |
@@ -276,9 +276,8 @@ static void cleanup(void) | |||
276 | * driver cannot continue functioning normally. In such cases, we tear | 276 | * driver cannot continue functioning normally. In such cases, we tear |
277 | * down everything we've used up so far and exit. | 277 | * down everything we've used up so far and exit. |
278 | */ | 278 | */ |
279 | static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover) | 279 | static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover) |
280 | { | 280 | { |
281 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
282 | struct microcode_header_amd *mc_hdr; | 281 | struct microcode_header_amd *mc_hdr; |
283 | struct ucode_patch *patch; | 282 | struct ucode_patch *patch; |
284 | unsigned int patch_size, crnt_size, ret; | 283 | unsigned int patch_size, crnt_size, ret; |
@@ -298,7 +297,7 @@ static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover) | |||
298 | 297 | ||
299 | /* check if patch is for the current family */ | 298 | /* check if patch is for the current family */ |
300 | proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff); | 299 | proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff); |
301 | if (proc_fam != c->x86) | 300 | if (proc_fam != family) |
302 | return crnt_size; | 301 | return crnt_size; |
303 | 302 | ||
304 | if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { | 303 | if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { |
@@ -307,7 +306,7 @@ static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover) | |||
307 | return crnt_size; | 306 | return crnt_size; |
308 | } | 307 | } |
309 | 308 | ||
310 | ret = verify_patch_size(cpu, patch_size, leftover); | 309 | ret = verify_patch_size(family, patch_size, leftover); |
311 | if (!ret) { | 310 | if (!ret) { |
312 | pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id); | 311 | pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id); |
313 | return crnt_size; | 312 | return crnt_size; |
@@ -338,7 +337,8 @@ static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover) | |||
338 | return crnt_size; | 337 | return crnt_size; |
339 | } | 338 | } |
340 | 339 | ||
341 | static enum ucode_state __load_microcode_amd(int cpu, const u8 *data, size_t size) | 340 | static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, |
341 | size_t size) | ||
342 | { | 342 | { |
343 | enum ucode_state ret = UCODE_ERROR; | 343 | enum ucode_state ret = UCODE_ERROR; |
344 | unsigned int leftover; | 344 | unsigned int leftover; |
@@ -361,7 +361,7 @@ static enum ucode_state __load_microcode_amd(int cpu, const u8 *data, size_t siz | |||
361 | } | 361 | } |
362 | 362 | ||
363 | while (leftover) { | 363 | while (leftover) { |
364 | crnt_size = verify_and_add_patch(cpu, fw, leftover); | 364 | crnt_size = verify_and_add_patch(family, fw, leftover); |
365 | if (crnt_size < 0) | 365 | if (crnt_size < 0) |
366 | return ret; | 366 | return ret; |
367 | 367 | ||
@@ -372,22 +372,22 @@ static enum ucode_state __load_microcode_amd(int cpu, const u8 *data, size_t siz | |||
372 | return UCODE_OK; | 372 | return UCODE_OK; |
373 | } | 373 | } |
374 | 374 | ||
375 | enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size) | 375 | enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size) |
376 | { | 376 | { |
377 | enum ucode_state ret; | 377 | enum ucode_state ret; |
378 | 378 | ||
379 | /* free old equiv table */ | 379 | /* free old equiv table */ |
380 | free_equiv_cpu_table(); | 380 | free_equiv_cpu_table(); |
381 | 381 | ||
382 | ret = __load_microcode_amd(cpu, data, size); | 382 | ret = __load_microcode_amd(family, data, size); |
383 | 383 | ||
384 | if (ret != UCODE_OK) | 384 | if (ret != UCODE_OK) |
385 | cleanup(); | 385 | cleanup(); |
386 | 386 | ||
387 | #if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32) | 387 | #if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32) |
388 | /* save BSP's matching patch for early load */ | 388 | /* save BSP's matching patch for early load */ |
389 | if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) { | 389 | if (cpu_data(smp_processor_id()).cpu_index == boot_cpu_data.cpu_index) { |
390 | struct ucode_patch *p = find_patch(cpu); | 390 | struct ucode_patch *p = find_patch(smp_processor_id()); |
391 | if (p) { | 391 | if (p) { |
392 | memset(amd_bsp_mpb, 0, MPB_MAX_SIZE); | 392 | memset(amd_bsp_mpb, 0, MPB_MAX_SIZE); |
393 | memcpy(amd_bsp_mpb, p->data, min_t(u32, ksize(p->data), | 393 | memcpy(amd_bsp_mpb, p->data, min_t(u32, ksize(p->data), |
@@ -440,7 +440,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, | |||
440 | goto fw_release; | 440 | goto fw_release; |
441 | } | 441 | } |
442 | 442 | ||
443 | ret = load_microcode_amd(cpu, fw->data, fw->size); | 443 | ret = load_microcode_amd(c->x86, fw->data, fw->size); |
444 | 444 | ||
445 | fw_release: | 445 | fw_release: |
446 | release_firmware(fw); | 446 | release_firmware(fw); |
diff --git a/arch/x86/kernel/microcode_amd_early.c b/arch/x86/kernel/microcode_amd_early.c index 1d14ffee5749..6073104ccaa3 100644 --- a/arch/x86/kernel/microcode_amd_early.c +++ b/arch/x86/kernel/microcode_amd_early.c | |||
@@ -238,25 +238,17 @@ static void __init collect_cpu_sig_on_bsp(void *arg) | |||
238 | uci->cpu_sig.sig = cpuid_eax(0x00000001); | 238 | uci->cpu_sig.sig = cpuid_eax(0x00000001); |
239 | } | 239 | } |
240 | #else | 240 | #else |
241 | static void collect_cpu_info_amd_early(struct cpuinfo_x86 *c, | 241 | void load_ucode_amd_ap(void) |
242 | struct ucode_cpu_info *uci) | ||
243 | { | 242 | { |
243 | unsigned int cpu = smp_processor_id(); | ||
244 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
244 | u32 rev, eax; | 245 | u32 rev, eax; |
245 | 246 | ||
246 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); | 247 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); |
247 | eax = cpuid_eax(0x00000001); | 248 | eax = cpuid_eax(0x00000001); |
248 | 249 | ||
249 | uci->cpu_sig.sig = eax; | ||
250 | uci->cpu_sig.rev = rev; | 250 | uci->cpu_sig.rev = rev; |
251 | c->microcode = rev; | 251 | uci->cpu_sig.sig = eax; |
252 | c->x86 = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); | ||
253 | } | ||
254 | |||
255 | void load_ucode_amd_ap(void) | ||
256 | { | ||
257 | unsigned int cpu = smp_processor_id(); | ||
258 | |||
259 | collect_cpu_info_amd_early(&cpu_data(cpu), ucode_cpu_info + cpu); | ||
260 | 252 | ||
261 | if (cpu && !ucode_loaded) { | 253 | if (cpu && !ucode_loaded) { |
262 | void *ucode; | 254 | void *ucode; |
@@ -265,8 +257,10 @@ void load_ucode_amd_ap(void) | |||
265 | return; | 257 | return; |
266 | 258 | ||
267 | ucode = (void *)(initrd_start + ucode_offset); | 259 | ucode = (void *)(initrd_start + ucode_offset); |
268 | if (load_microcode_amd(0, ucode, ucode_size) != UCODE_OK) | 260 | eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); |
261 | if (load_microcode_amd(eax, ucode, ucode_size) != UCODE_OK) | ||
269 | return; | 262 | return; |
263 | |||
270 | ucode_loaded = true; | 264 | ucode_loaded = true; |
271 | } | 265 | } |
272 | 266 | ||
@@ -278,6 +272,8 @@ int __init save_microcode_in_initrd_amd(void) | |||
278 | { | 272 | { |
279 | enum ucode_state ret; | 273 | enum ucode_state ret; |
280 | void *ucode; | 274 | void *ucode; |
275 | u32 eax; | ||
276 | |||
281 | #ifdef CONFIG_X86_32 | 277 | #ifdef CONFIG_X86_32 |
282 | unsigned int bsp = boot_cpu_data.cpu_index; | 278 | unsigned int bsp = boot_cpu_data.cpu_index; |
283 | struct ucode_cpu_info *uci = ucode_cpu_info + bsp; | 279 | struct ucode_cpu_info *uci = ucode_cpu_info + bsp; |
@@ -293,7 +289,10 @@ int __init save_microcode_in_initrd_amd(void) | |||
293 | return 0; | 289 | return 0; |
294 | 290 | ||
295 | ucode = (void *)(initrd_start + ucode_offset); | 291 | ucode = (void *)(initrd_start + ucode_offset); |
296 | ret = load_microcode_amd(0, ucode, ucode_size); | 292 | eax = cpuid_eax(0x00000001); |
293 | eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); | ||
294 | |||
295 | ret = load_microcode_amd(eax, ucode, ucode_size); | ||
297 | if (ret != UCODE_OK) | 296 | if (ret != UCODE_OK) |
298 | return -EINVAL; | 297 | return -EINVAL; |
299 | 298 | ||
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 676b8c77a976..bbb6c7316341 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c | |||
@@ -4,25 +4,17 @@ | |||
4 | */ | 4 | */ |
5 | #include <linux/spinlock.h> | 5 | #include <linux/spinlock.h> |
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <linux/jump_label.h> | ||
7 | 8 | ||
8 | #include <asm/paravirt.h> | 9 | #include <asm/paravirt.h> |
9 | 10 | ||
10 | static inline void | ||
11 | default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) | ||
12 | { | ||
13 | arch_spin_lock(lock); | ||
14 | } | ||
15 | |||
16 | struct pv_lock_ops pv_lock_ops = { | 11 | struct pv_lock_ops pv_lock_ops = { |
17 | #ifdef CONFIG_SMP | 12 | #ifdef CONFIG_SMP |
18 | .spin_is_locked = __ticket_spin_is_locked, | 13 | .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop), |
19 | .spin_is_contended = __ticket_spin_is_contended, | 14 | .unlock_kick = paravirt_nop, |
20 | |||
21 | .spin_lock = __ticket_spin_lock, | ||
22 | .spin_lock_flags = default_spin_lock_flags, | ||
23 | .spin_trylock = __ticket_spin_trylock, | ||
24 | .spin_unlock = __ticket_spin_unlock, | ||
25 | #endif | 15 | #endif |
26 | }; | 16 | }; |
27 | EXPORT_SYMBOL(pv_lock_ops); | 17 | EXPORT_SYMBOL(pv_lock_ops); |
28 | 18 | ||
19 | struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE; | ||
20 | EXPORT_SYMBOL(paravirt_ticketlocks_enabled); | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index cd6de64cc480..1b10af835c31 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -62,11 +62,6 @@ void __init default_banner(void) | |||
62 | pv_info.name); | 62 | pv_info.name); |
63 | } | 63 | } |
64 | 64 | ||
65 | /* Simple instruction patching code. */ | ||
66 | #define DEF_NATIVE(ops, name, code) \ | ||
67 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | ||
68 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | ||
69 | |||
70 | /* Undefined instruction for dealing with missing ops pointers. */ | 65 | /* Undefined instruction for dealing with missing ops pointers. */ |
71 | static const unsigned char ud2a[] = { 0x0f, 0x0b }; | 66 | static const unsigned char ud2a[] = { 0x0f, 0x0b }; |
72 | 67 | ||
@@ -324,7 +319,7 @@ struct pv_time_ops pv_time_ops = { | |||
324 | .steal_clock = native_steal_clock, | 319 | .steal_clock = native_steal_clock, |
325 | }; | 320 | }; |
326 | 321 | ||
327 | struct pv_irq_ops pv_irq_ops = { | 322 | __visible struct pv_irq_ops pv_irq_ops = { |
328 | .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), | 323 | .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), |
329 | .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), | 324 | .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), |
330 | .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), | 325 | .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), |
@@ -336,7 +331,7 @@ struct pv_irq_ops pv_irq_ops = { | |||
336 | #endif | 331 | #endif |
337 | }; | 332 | }; |
338 | 333 | ||
339 | struct pv_cpu_ops pv_cpu_ops = { | 334 | __visible struct pv_cpu_ops pv_cpu_ops = { |
340 | .cpuid = native_cpuid, | 335 | .cpuid = native_cpuid, |
341 | .get_debugreg = native_get_debugreg, | 336 | .get_debugreg = native_get_debugreg, |
342 | .set_debugreg = native_set_debugreg, | 337 | .set_debugreg = native_set_debugreg, |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 83369e5a1d27..c83516be1052 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -36,7 +36,7 @@ | |||
36 | * section. Since TSS's are completely CPU-local, we want them | 36 | * section. Since TSS's are completely CPU-local, we want them |
37 | * on exact cacheline boundaries, to eliminate cacheline ping-pong. | 37 | * on exact cacheline boundaries, to eliminate cacheline ping-pong. |
38 | */ | 38 | */ |
39 | DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; | 39 | __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; |
40 | 40 | ||
41 | #ifdef CONFIG_X86_64 | 41 | #ifdef CONFIG_X86_64 |
42 | static DEFINE_PER_CPU(unsigned char, is_idle); | 42 | static DEFINE_PER_CPU(unsigned char, is_idle); |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f8adefca71dc..884f98f69354 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -247,7 +247,7 @@ EXPORT_SYMBOL_GPL(start_thread); | |||
247 | * the task-switch, and shows up in ret_from_fork in entry.S, | 247 | * the task-switch, and shows up in ret_from_fork in entry.S, |
248 | * for example. | 248 | * for example. |
249 | */ | 249 | */ |
250 | __notrace_funcgraph struct task_struct * | 250 | __visible __notrace_funcgraph struct task_struct * |
251 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 251 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
252 | { | 252 | { |
253 | struct thread_struct *prev = &prev_p->thread, | 253 | struct thread_struct *prev = &prev_p->thread, |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 05646bab4ca6..bb1dc51bab05 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -52,7 +52,7 @@ | |||
52 | 52 | ||
53 | asmlinkage extern void ret_from_fork(void); | 53 | asmlinkage extern void ret_from_fork(void); |
54 | 54 | ||
55 | DEFINE_PER_CPU(unsigned long, old_rsp); | 55 | asmlinkage DEFINE_PER_CPU(unsigned long, old_rsp); |
56 | 56 | ||
57 | /* Prints also some state that isn't saved in the pt_regs */ | 57 | /* Prints also some state that isn't saved in the pt_regs */ |
58 | void __show_regs(struct pt_regs *regs, int all) | 58 | void __show_regs(struct pt_regs *regs, int all) |
@@ -274,7 +274,7 @@ void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp) | |||
274 | * Kprobes not supported here. Set the probe on schedule instead. | 274 | * Kprobes not supported here. Set the probe on schedule instead. |
275 | * Function graph tracer not supported too. | 275 | * Function graph tracer not supported too. |
276 | */ | 276 | */ |
277 | __notrace_funcgraph struct task_struct * | 277 | __visible __notrace_funcgraph struct task_struct * |
278 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 278 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
279 | { | 279 | { |
280 | struct thread_struct *prev = &prev_p->thread; | 280 | struct thread_struct *prev = &prev_p->thread; |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 2cb9470ea85b..a16bae3f83b3 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
@@ -128,46 +128,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, | |||
128 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | 128 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); |
129 | } | 129 | } |
130 | 130 | ||
131 | static struct pvclock_vsyscall_time_info *pvclock_vdso_info; | ||
132 | |||
133 | static struct pvclock_vsyscall_time_info * | ||
134 | pvclock_get_vsyscall_user_time_info(int cpu) | ||
135 | { | ||
136 | if (!pvclock_vdso_info) { | ||
137 | BUG(); | ||
138 | return NULL; | ||
139 | } | ||
140 | |||
141 | return &pvclock_vdso_info[cpu]; | ||
142 | } | ||
143 | |||
144 | struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) | ||
145 | { | ||
146 | return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; | ||
147 | } | ||
148 | |||
149 | #ifdef CONFIG_X86_64 | 131 | #ifdef CONFIG_X86_64 |
150 | static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, | ||
151 | void *v) | ||
152 | { | ||
153 | struct task_migration_notifier *mn = v; | ||
154 | struct pvclock_vsyscall_time_info *pvti; | ||
155 | |||
156 | pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); | ||
157 | |||
158 | /* this is NULL when pvclock vsyscall is not initialized */ | ||
159 | if (unlikely(pvti == NULL)) | ||
160 | return NOTIFY_DONE; | ||
161 | |||
162 | pvti->migrate_count++; | ||
163 | |||
164 | return NOTIFY_DONE; | ||
165 | } | ||
166 | |||
167 | static struct notifier_block pvclock_migrate = { | ||
168 | .notifier_call = pvclock_task_migrate, | ||
169 | }; | ||
170 | |||
171 | /* | 132 | /* |
172 | * Initialize the generic pvclock vsyscall state. This will allocate | 133 | * Initialize the generic pvclock vsyscall state. This will allocate |
173 | * a/some page(s) for the per-vcpu pvclock information, set up a | 134 | * a/some page(s) for the per-vcpu pvclock information, set up a |
@@ -181,17 +142,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, | |||
181 | 142 | ||
182 | WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); | 143 | WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); |
183 | 144 | ||
184 | pvclock_vdso_info = i; | ||
185 | |||
186 | for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { | 145 | for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { |
187 | __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, | 146 | __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, |
188 | __pa(i) + (idx*PAGE_SIZE), | 147 | __pa(i) + (idx*PAGE_SIZE), |
189 | PAGE_KERNEL_VVAR); | 148 | PAGE_KERNEL_VVAR); |
190 | } | 149 | } |
191 | 150 | ||
192 | |||
193 | register_task_migration_notifier(&pvclock_migrate); | ||
194 | |||
195 | return 0; | 151 | return 0; |
196 | } | 152 | } |
197 | #endif | 153 | #endif |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f8ec57815c05..f0de6294b955 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -206,9 +206,9 @@ EXPORT_SYMBOL(boot_cpu_data); | |||
206 | 206 | ||
207 | 207 | ||
208 | #if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) | 208 | #if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) |
209 | unsigned long mmu_cr4_features; | 209 | __visible unsigned long mmu_cr4_features; |
210 | #else | 210 | #else |
211 | unsigned long mmu_cr4_features = X86_CR4_PAE; | 211 | __visible unsigned long mmu_cr4_features = X86_CR4_PAE; |
212 | #endif | 212 | #endif |
213 | 213 | ||
214 | /* Boot loader ID and version as integers, for the benefit of proc_dointvec */ | 214 | /* Boot loader ID and version as integers, for the benefit of proc_dointvec */ |
@@ -426,25 +426,23 @@ static void __init reserve_initrd(void) | |||
426 | static void __init parse_setup_data(void) | 426 | static void __init parse_setup_data(void) |
427 | { | 427 | { |
428 | struct setup_data *data; | 428 | struct setup_data *data; |
429 | u64 pa_data; | 429 | u64 pa_data, pa_next; |
430 | 430 | ||
431 | pa_data = boot_params.hdr.setup_data; | 431 | pa_data = boot_params.hdr.setup_data; |
432 | while (pa_data) { | 432 | while (pa_data) { |
433 | u32 data_len, map_len; | 433 | u32 data_len, map_len, data_type; |
434 | 434 | ||
435 | map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK), | 435 | map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK), |
436 | (u64)sizeof(struct setup_data)); | 436 | (u64)sizeof(struct setup_data)); |
437 | data = early_memremap(pa_data, map_len); | 437 | data = early_memremap(pa_data, map_len); |
438 | data_len = data->len + sizeof(struct setup_data); | 438 | data_len = data->len + sizeof(struct setup_data); |
439 | if (data_len > map_len) { | 439 | data_type = data->type; |
440 | early_iounmap(data, map_len); | 440 | pa_next = data->next; |
441 | data = early_memremap(pa_data, data_len); | 441 | early_iounmap(data, map_len); |
442 | map_len = data_len; | ||
443 | } | ||
444 | 442 | ||
445 | switch (data->type) { | 443 | switch (data_type) { |
446 | case SETUP_E820_EXT: | 444 | case SETUP_E820_EXT: |
447 | parse_e820_ext(data); | 445 | parse_e820_ext(pa_data, data_len); |
448 | break; | 446 | break; |
449 | case SETUP_DTB: | 447 | case SETUP_DTB: |
450 | add_dtb(pa_data); | 448 | add_dtb(pa_data); |
@@ -452,8 +450,7 @@ static void __init parse_setup_data(void) | |||
452 | default: | 450 | default: |
453 | break; | 451 | break; |
454 | } | 452 | } |
455 | pa_data = data->next; | 453 | pa_data = pa_next; |
456 | early_iounmap(data, map_len); | ||
457 | } | 454 | } |
458 | } | 455 | } |
459 | 456 | ||
@@ -1070,7 +1067,7 @@ void __init setup_arch(char **cmdline_p) | |||
1070 | 1067 | ||
1071 | cleanup_highmap(); | 1068 | cleanup_highmap(); |
1072 | 1069 | ||
1073 | memblock.current_limit = ISA_END_ADDRESS; | 1070 | memblock_set_current_limit(ISA_END_ADDRESS); |
1074 | memblock_x86_fill(); | 1071 | memblock_x86_fill(); |
1075 | 1072 | ||
1076 | /* | 1073 | /* |
@@ -1103,7 +1100,7 @@ void __init setup_arch(char **cmdline_p) | |||
1103 | 1100 | ||
1104 | setup_real_mode(); | 1101 | setup_real_mode(); |
1105 | 1102 | ||
1106 | memblock.current_limit = get_max_mapped(); | 1103 | memblock_set_current_limit(get_max_mapped()); |
1107 | dma_contiguous_reserve(0); | 1104 | dma_contiguous_reserve(0); |
1108 | 1105 | ||
1109 | /* | 1106 | /* |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index cf913587d4dd..9e5de6813e1f 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -358,7 +358,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, | |||
358 | else | 358 | else |
359 | put_user_ex(0, &frame->uc.uc_flags); | 359 | put_user_ex(0, &frame->uc.uc_flags); |
360 | put_user_ex(0, &frame->uc.uc_link); | 360 | put_user_ex(0, &frame->uc.uc_link); |
361 | err |= __save_altstack(&frame->uc.uc_stack, regs->sp); | 361 | save_altstack_ex(&frame->uc.uc_stack, regs->sp); |
362 | 362 | ||
363 | /* Set up to return from userspace. */ | 363 | /* Set up to return from userspace. */ |
364 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); | 364 | restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); |
@@ -423,7 +423,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, | |||
423 | else | 423 | else |
424 | put_user_ex(0, &frame->uc.uc_flags); | 424 | put_user_ex(0, &frame->uc.uc_flags); |
425 | put_user_ex(0, &frame->uc.uc_link); | 425 | put_user_ex(0, &frame->uc.uc_link); |
426 | err |= __save_altstack(&frame->uc.uc_stack, regs->sp); | 426 | save_altstack_ex(&frame->uc.uc_stack, regs->sp); |
427 | 427 | ||
428 | /* Set up to return from userspace. If provided, use a stub | 428 | /* Set up to return from userspace. If provided, use a stub |
429 | already in userspace. */ | 429 | already in userspace. */ |
@@ -490,7 +490,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, | |||
490 | else | 490 | else |
491 | put_user_ex(0, &frame->uc.uc_flags); | 491 | put_user_ex(0, &frame->uc.uc_flags); |
492 | put_user_ex(0, &frame->uc.uc_link); | 492 | put_user_ex(0, &frame->uc.uc_link); |
493 | err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); | 493 | compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp); |
494 | put_user_ex(0, &frame->uc.uc__pad0); | 494 | put_user_ex(0, &frame->uc.uc__pad0); |
495 | 495 | ||
496 | if (ksig->ka.sa.sa_flags & SA_RESTORER) { | 496 | if (ksig->ka.sa.sa_flags & SA_RESTORER) { |
@@ -533,7 +533,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, | |||
533 | * Do a signal return; undo the signal stack. | 533 | * Do a signal return; undo the signal stack. |
534 | */ | 534 | */ |
535 | #ifdef CONFIG_X86_32 | 535 | #ifdef CONFIG_X86_32 |
536 | unsigned long sys_sigreturn(void) | 536 | asmlinkage unsigned long sys_sigreturn(void) |
537 | { | 537 | { |
538 | struct pt_regs *regs = current_pt_regs(); | 538 | struct pt_regs *regs = current_pt_regs(); |
539 | struct sigframe __user *frame; | 539 | struct sigframe __user *frame; |
@@ -562,7 +562,7 @@ badframe: | |||
562 | } | 562 | } |
563 | #endif /* CONFIG_X86_32 */ | 563 | #endif /* CONFIG_X86_32 */ |
564 | 564 | ||
565 | long sys_rt_sigreturn(void) | 565 | asmlinkage long sys_rt_sigreturn(void) |
566 | { | 566 | { |
567 | struct pt_regs *regs = current_pt_regs(); | 567 | struct pt_regs *regs = current_pt_regs(); |
568 | struct rt_sigframe __user *frame; | 568 | struct rt_sigframe __user *frame; |
@@ -728,7 +728,7 @@ static void do_signal(struct pt_regs *regs) | |||
728 | * notification of userspace execution resumption | 728 | * notification of userspace execution resumption |
729 | * - triggered by the TIF_WORK_MASK flags | 729 | * - triggered by the TIF_WORK_MASK flags |
730 | */ | 730 | */ |
731 | void | 731 | __visible void |
732 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | 732 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) |
733 | { | 733 | { |
734 | user_exit(); | 734 | user_exit(); |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index cdaa347dfcad..7c3a5a61f2e4 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -256,7 +256,7 @@ static inline void __smp_reschedule_interrupt(void) | |||
256 | scheduler_ipi(); | 256 | scheduler_ipi(); |
257 | } | 257 | } |
258 | 258 | ||
259 | void smp_reschedule_interrupt(struct pt_regs *regs) | 259 | __visible void smp_reschedule_interrupt(struct pt_regs *regs) |
260 | { | 260 | { |
261 | ack_APIC_irq(); | 261 | ack_APIC_irq(); |
262 | __smp_reschedule_interrupt(); | 262 | __smp_reschedule_interrupt(); |
@@ -271,7 +271,7 @@ static inline void smp_entering_irq(void) | |||
271 | irq_enter(); | 271 | irq_enter(); |
272 | } | 272 | } |
273 | 273 | ||
274 | void smp_trace_reschedule_interrupt(struct pt_regs *regs) | 274 | __visible void smp_trace_reschedule_interrupt(struct pt_regs *regs) |
275 | { | 275 | { |
276 | /* | 276 | /* |
277 | * Need to call irq_enter() before calling the trace point. | 277 | * Need to call irq_enter() before calling the trace point. |
@@ -295,14 +295,14 @@ static inline void __smp_call_function_interrupt(void) | |||
295 | inc_irq_stat(irq_call_count); | 295 | inc_irq_stat(irq_call_count); |
296 | } | 296 | } |
297 | 297 | ||
298 | void smp_call_function_interrupt(struct pt_regs *regs) | 298 | __visible void smp_call_function_interrupt(struct pt_regs *regs) |
299 | { | 299 | { |
300 | smp_entering_irq(); | 300 | smp_entering_irq(); |
301 | __smp_call_function_interrupt(); | 301 | __smp_call_function_interrupt(); |
302 | exiting_irq(); | 302 | exiting_irq(); |
303 | } | 303 | } |
304 | 304 | ||
305 | void smp_trace_call_function_interrupt(struct pt_regs *regs) | 305 | __visible void smp_trace_call_function_interrupt(struct pt_regs *regs) |
306 | { | 306 | { |
307 | smp_entering_irq(); | 307 | smp_entering_irq(); |
308 | trace_call_function_entry(CALL_FUNCTION_VECTOR); | 308 | trace_call_function_entry(CALL_FUNCTION_VECTOR); |
@@ -317,14 +317,14 @@ static inline void __smp_call_function_single_interrupt(void) | |||
317 | inc_irq_stat(irq_call_count); | 317 | inc_irq_stat(irq_call_count); |
318 | } | 318 | } |
319 | 319 | ||
320 | void smp_call_function_single_interrupt(struct pt_regs *regs) | 320 | __visible void smp_call_function_single_interrupt(struct pt_regs *regs) |
321 | { | 321 | { |
322 | smp_entering_irq(); | 322 | smp_entering_irq(); |
323 | __smp_call_function_single_interrupt(); | 323 | __smp_call_function_single_interrupt(); |
324 | exiting_irq(); | 324 | exiting_irq(); |
325 | } | 325 | } |
326 | 326 | ||
327 | void smp_trace_call_function_single_interrupt(struct pt_regs *regs) | 327 | __visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs) |
328 | { | 328 | { |
329 | smp_entering_irq(); | 329 | smp_entering_irq(); |
330 | trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR); | 330 | trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR); |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index dbded5aedb81..30277e27431a 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -101,7 +101,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin, | |||
101 | *begin = new_begin; | 101 | *begin = new_begin; |
102 | } | 102 | } |
103 | } else { | 103 | } else { |
104 | *begin = TASK_UNMAPPED_BASE; | 104 | *begin = current->mm->mmap_legacy_base; |
105 | *end = TASK_SIZE; | 105 | *end = TASK_SIZE; |
106 | } | 106 | } |
107 | } | 107 | } |
diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c index 147fcd4941c4..e9bcd57d8a9e 100644 --- a/arch/x86/kernel/syscall_32.c +++ b/arch/x86/kernel/syscall_32.c | |||
@@ -15,7 +15,7 @@ typedef asmlinkage void (*sys_call_ptr_t)(void); | |||
15 | 15 | ||
16 | extern asmlinkage void sys_ni_syscall(void); | 16 | extern asmlinkage void sys_ni_syscall(void); |
17 | 17 | ||
18 | const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { | 18 | __visible const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { |
19 | /* | 19 | /* |
20 | * Smells like a compiler bug -- it doesn't work | 20 | * Smells like a compiler bug -- it doesn't work |
21 | * when the & below is removed. | 21 | * when the & below is removed. |
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index 5c7f8c20da74..4ac730b37f0b 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/sys.h> | 4 | #include <linux/sys.h> |
5 | #include <linux/cache.h> | 5 | #include <linux/cache.h> |
6 | #include <asm/asm-offsets.h> | 6 | #include <asm/asm-offsets.h> |
7 | #include <asm/syscall.h> | ||
7 | 8 | ||
8 | #define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat) | 9 | #define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat) |
9 | 10 | ||
@@ -19,11 +20,9 @@ | |||
19 | 20 | ||
20 | #define __SYSCALL_64(nr, sym, compat) [nr] = sym, | 21 | #define __SYSCALL_64(nr, sym, compat) [nr] = sym, |
21 | 22 | ||
22 | typedef void (*sys_call_ptr_t)(void); | ||
23 | |||
24 | extern void sys_ni_syscall(void); | 23 | extern void sys_ni_syscall(void); |
25 | 24 | ||
26 | const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { | 25 | asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { |
27 | /* | 26 | /* |
28 | * Smells like a compiler bug -- it doesn't work | 27 | * Smells like a compiler bug -- it doesn't work |
29 | * when the & below is removed. | 28 | * when the & below is removed. |
diff --git a/arch/x86/kernel/sysfb.c b/arch/x86/kernel/sysfb.c new file mode 100644 index 000000000000..193ec2ce46c7 --- /dev/null +++ b/arch/x86/kernel/sysfb.c | |||
@@ -0,0 +1,74 @@ | |||
1 | /* | ||
2 | * Generic System Framebuffers on x86 | ||
3 | * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License as published by the Free | ||
7 | * Software Foundation; either version 2 of the License, or (at your option) | ||
8 | * any later version. | ||
9 | */ | ||
10 | |||
11 | /* | ||
12 | * Simple-Framebuffer support for x86 systems | ||
13 | * Create a platform-device for any available boot framebuffer. The | ||
14 | * simple-framebuffer platform device is already available on DT systems, so | ||
15 | * this module parses the global "screen_info" object and creates a suitable | ||
16 | * platform device compatible with the "simple-framebuffer" DT object. If | ||
17 | * the framebuffer is incompatible, we instead create a legacy | ||
18 | * "vesa-framebuffer", "efi-framebuffer" or "platform-framebuffer" device and | ||
19 | * pass the screen_info as platform_data. This allows legacy drivers | ||
20 | * to pick these devices up without messing with simple-framebuffer drivers. | ||
21 | * The global "screen_info" is still valid at all times. | ||
22 | * | ||
23 | * If CONFIG_X86_SYSFB is not selected, we never register "simple-framebuffer" | ||
24 | * platform devices, but only use legacy framebuffer devices for | ||
25 | * backwards compatibility. | ||
26 | * | ||
27 | * TODO: We set the dev_id field of all platform-devices to 0. This allows | ||
28 | * other x86 OF/DT parsers to create such devices, too. However, they must | ||
29 | * start at offset 1 for this to work. | ||
30 | */ | ||
31 | |||
32 | #include <linux/err.h> | ||
33 | #include <linux/init.h> | ||
34 | #include <linux/kernel.h> | ||
35 | #include <linux/mm.h> | ||
36 | #include <linux/platform_data/simplefb.h> | ||
37 | #include <linux/platform_device.h> | ||
38 | #include <linux/screen_info.h> | ||
39 | #include <asm/sysfb.h> | ||
40 | |||
41 | static __init int sysfb_init(void) | ||
42 | { | ||
43 | struct screen_info *si = &screen_info; | ||
44 | struct simplefb_platform_data mode; | ||
45 | struct platform_device *pd; | ||
46 | const char *name; | ||
47 | bool compatible; | ||
48 | int ret; | ||
49 | |||
50 | sysfb_apply_efi_quirks(); | ||
51 | |||
52 | /* try to create a simple-framebuffer device */ | ||
53 | compatible = parse_mode(si, &mode); | ||
54 | if (compatible) { | ||
55 | ret = create_simplefb(si, &mode); | ||
56 | if (!ret) | ||
57 | return 0; | ||
58 | } | ||
59 | |||
60 | /* if the FB is incompatible, create a legacy framebuffer device */ | ||
61 | if (si->orig_video_isVGA == VIDEO_TYPE_EFI) | ||
62 | name = "efi-framebuffer"; | ||
63 | else if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) | ||
64 | name = "vesa-framebuffer"; | ||
65 | else | ||
66 | name = "platform-framebuffer"; | ||
67 | |||
68 | pd = platform_device_register_resndata(NULL, name, 0, | ||
69 | NULL, 0, si, sizeof(*si)); | ||
70 | return IS_ERR(pd) ? PTR_ERR(pd) : 0; | ||
71 | } | ||
72 | |||
73 | /* must execute after PCI subsystem for EFI quirks */ | ||
74 | device_initcall(sysfb_init); | ||
diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c new file mode 100644 index 000000000000..b285d4e8c68e --- /dev/null +++ b/arch/x86/kernel/sysfb_efi.c | |||
@@ -0,0 +1,214 @@ | |||
1 | /* | ||
2 | * Generic System Framebuffers on x86 | ||
3 | * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com> | ||
4 | * | ||
5 | * EFI Quirks Copyright (c) 2006 Edgar Hucek <gimli@dark-green.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms of the GNU General Public License as published by the Free | ||
9 | * Software Foundation; either version 2 of the License, or (at your option) | ||
10 | * any later version. | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * EFI Quirks | ||
15 | * Several EFI systems do not correctly advertise their boot framebuffers. | ||
16 | * Hence, we use this static table of known broken machines and fix up the | ||
17 | * information so framebuffer drivers can load corectly. | ||
18 | */ | ||
19 | |||
20 | #include <linux/dmi.h> | ||
21 | #include <linux/err.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/mm.h> | ||
25 | #include <linux/pci.h> | ||
26 | #include <linux/screen_info.h> | ||
27 | #include <video/vga.h> | ||
28 | #include <asm/sysfb.h> | ||
29 | |||
30 | enum { | ||
31 | OVERRIDE_NONE = 0x0, | ||
32 | OVERRIDE_BASE = 0x1, | ||
33 | OVERRIDE_STRIDE = 0x2, | ||
34 | OVERRIDE_HEIGHT = 0x4, | ||
35 | OVERRIDE_WIDTH = 0x8, | ||
36 | }; | ||
37 | |||
38 | struct efifb_dmi_info efifb_dmi_list[] = { | ||
39 | [M_I17] = { "i17", 0x80010000, 1472 * 4, 1440, 900, OVERRIDE_NONE }, | ||
40 | [M_I20] = { "i20", 0x80010000, 1728 * 4, 1680, 1050, OVERRIDE_NONE }, /* guess */ | ||
41 | [M_I20_SR] = { "imac7", 0x40010000, 1728 * 4, 1680, 1050, OVERRIDE_NONE }, | ||
42 | [M_I24] = { "i24", 0x80010000, 2048 * 4, 1920, 1200, OVERRIDE_NONE }, /* guess */ | ||
43 | [M_I24_8_1] = { "imac8", 0xc0060000, 2048 * 4, 1920, 1200, OVERRIDE_NONE }, | ||
44 | [M_I24_10_1] = { "imac10", 0xc0010000, 2048 * 4, 1920, 1080, OVERRIDE_NONE }, | ||
45 | [M_I27_11_1] = { "imac11", 0xc0010000, 2560 * 4, 2560, 1440, OVERRIDE_NONE }, | ||
46 | [M_MINI]= { "mini", 0x80000000, 2048 * 4, 1024, 768, OVERRIDE_NONE }, | ||
47 | [M_MINI_3_1] = { "mini31", 0x40010000, 1024 * 4, 1024, 768, OVERRIDE_NONE }, | ||
48 | [M_MINI_4_1] = { "mini41", 0xc0010000, 2048 * 4, 1920, 1200, OVERRIDE_NONE }, | ||
49 | [M_MB] = { "macbook", 0x80000000, 2048 * 4, 1280, 800, OVERRIDE_NONE }, | ||
50 | [M_MB_5_1] = { "macbook51", 0x80010000, 2048 * 4, 1280, 800, OVERRIDE_NONE }, | ||
51 | [M_MB_6_1] = { "macbook61", 0x80010000, 2048 * 4, 1280, 800, OVERRIDE_NONE }, | ||
52 | [M_MB_7_1] = { "macbook71", 0x80010000, 2048 * 4, 1280, 800, OVERRIDE_NONE }, | ||
53 | [M_MBA] = { "mba", 0x80000000, 2048 * 4, 1280, 800, OVERRIDE_NONE }, | ||
54 | /* 11" Macbook Air 3,1 passes the wrong stride */ | ||
55 | [M_MBA_3] = { "mba3", 0, 2048 * 4, 0, 0, OVERRIDE_STRIDE }, | ||
56 | [M_MBP] = { "mbp", 0x80010000, 1472 * 4, 1440, 900, OVERRIDE_NONE }, | ||
57 | [M_MBP_2] = { "mbp2", 0, 0, 0, 0, OVERRIDE_NONE }, /* placeholder */ | ||
58 | [M_MBP_2_2] = { "mbp22", 0x80010000, 1472 * 4, 1440, 900, OVERRIDE_NONE }, | ||
59 | [M_MBP_SR] = { "mbp3", 0x80030000, 2048 * 4, 1440, 900, OVERRIDE_NONE }, | ||
60 | [M_MBP_4] = { "mbp4", 0xc0060000, 2048 * 4, 1920, 1200, OVERRIDE_NONE }, | ||
61 | [M_MBP_5_1] = { "mbp51", 0xc0010000, 2048 * 4, 1440, 900, OVERRIDE_NONE }, | ||
62 | [M_MBP_5_2] = { "mbp52", 0xc0010000, 2048 * 4, 1920, 1200, OVERRIDE_NONE }, | ||
63 | [M_MBP_5_3] = { "mbp53", 0xd0010000, 2048 * 4, 1440, 900, OVERRIDE_NONE }, | ||
64 | [M_MBP_6_1] = { "mbp61", 0x90030000, 2048 * 4, 1920, 1200, OVERRIDE_NONE }, | ||
65 | [M_MBP_6_2] = { "mbp62", 0x90030000, 2048 * 4, 1680, 1050, OVERRIDE_NONE }, | ||
66 | [M_MBP_7_1] = { "mbp71", 0xc0010000, 2048 * 4, 1280, 800, OVERRIDE_NONE }, | ||
67 | [M_MBP_8_2] = { "mbp82", 0x90010000, 1472 * 4, 1440, 900, OVERRIDE_NONE }, | ||
68 | [M_UNKNOWN] = { NULL, 0, 0, 0, 0, OVERRIDE_NONE } | ||
69 | }; | ||
70 | |||
71 | #define choose_value(dmivalue, fwvalue, field, flags) ({ \ | ||
72 | typeof(fwvalue) _ret_ = fwvalue; \ | ||
73 | if ((flags) & (field)) \ | ||
74 | _ret_ = dmivalue; \ | ||
75 | else if ((fwvalue) == 0) \ | ||
76 | _ret_ = dmivalue; \ | ||
77 | _ret_; \ | ||
78 | }) | ||
79 | |||
80 | static int __init efifb_set_system(const struct dmi_system_id *id) | ||
81 | { | ||
82 | struct efifb_dmi_info *info = id->driver_data; | ||
83 | |||
84 | if (info->base == 0 && info->height == 0 && info->width == 0 && | ||
85 | info->stride == 0) | ||
86 | return 0; | ||
87 | |||
88 | /* Trust the bootloader over the DMI tables */ | ||
89 | if (screen_info.lfb_base == 0) { | ||
90 | #if defined(CONFIG_PCI) | ||
91 | struct pci_dev *dev = NULL; | ||
92 | int found_bar = 0; | ||
93 | #endif | ||
94 | if (info->base) { | ||
95 | screen_info.lfb_base = choose_value(info->base, | ||
96 | screen_info.lfb_base, OVERRIDE_BASE, | ||
97 | info->flags); | ||
98 | |||
99 | #if defined(CONFIG_PCI) | ||
100 | /* make sure that the address in the table is actually | ||
101 | * on a VGA device's PCI BAR */ | ||
102 | |||
103 | for_each_pci_dev(dev) { | ||
104 | int i; | ||
105 | if ((dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) | ||
106 | continue; | ||
107 | for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { | ||
108 | resource_size_t start, end; | ||
109 | |||
110 | start = pci_resource_start(dev, i); | ||
111 | if (start == 0) | ||
112 | break; | ||
113 | end = pci_resource_end(dev, i); | ||
114 | if (screen_info.lfb_base >= start && | ||
115 | screen_info.lfb_base < end) { | ||
116 | found_bar = 1; | ||
117 | } | ||
118 | } | ||
119 | } | ||
120 | if (!found_bar) | ||
121 | screen_info.lfb_base = 0; | ||
122 | #endif | ||
123 | } | ||
124 | } | ||
125 | if (screen_info.lfb_base) { | ||
126 | screen_info.lfb_linelength = choose_value(info->stride, | ||
127 | screen_info.lfb_linelength, OVERRIDE_STRIDE, | ||
128 | info->flags); | ||
129 | screen_info.lfb_width = choose_value(info->width, | ||
130 | screen_info.lfb_width, OVERRIDE_WIDTH, | ||
131 | info->flags); | ||
132 | screen_info.lfb_height = choose_value(info->height, | ||
133 | screen_info.lfb_height, OVERRIDE_HEIGHT, | ||
134 | info->flags); | ||
135 | if (screen_info.orig_video_isVGA == 0) | ||
136 | screen_info.orig_video_isVGA = VIDEO_TYPE_EFI; | ||
137 | } else { | ||
138 | screen_info.lfb_linelength = 0; | ||
139 | screen_info.lfb_width = 0; | ||
140 | screen_info.lfb_height = 0; | ||
141 | screen_info.orig_video_isVGA = 0; | ||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | printk(KERN_INFO "efifb: dmi detected %s - framebuffer at 0x%08x " | ||
146 | "(%dx%d, stride %d)\n", id->ident, | ||
147 | screen_info.lfb_base, screen_info.lfb_width, | ||
148 | screen_info.lfb_height, screen_info.lfb_linelength); | ||
149 | |||
150 | return 1; | ||
151 | } | ||
152 | |||
153 | #define EFIFB_DMI_SYSTEM_ID(vendor, name, enumid) \ | ||
154 | { \ | ||
155 | efifb_set_system, \ | ||
156 | name, \ | ||
157 | { \ | ||
158 | DMI_MATCH(DMI_BIOS_VENDOR, vendor), \ | ||
159 | DMI_MATCH(DMI_PRODUCT_NAME, name) \ | ||
160 | }, \ | ||
161 | &efifb_dmi_list[enumid] \ | ||
162 | } | ||
163 | |||
164 | static const struct dmi_system_id efifb_dmi_system_table[] __initconst = { | ||
165 | EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac4,1", M_I17), | ||
166 | /* At least one of these two will be right; maybe both? */ | ||
167 | EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac5,1", M_I20), | ||
168 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac5,1", M_I20), | ||
169 | /* At least one of these two will be right; maybe both? */ | ||
170 | EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac6,1", M_I24), | ||
171 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac6,1", M_I24), | ||
172 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac7,1", M_I20_SR), | ||
173 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac8,1", M_I24_8_1), | ||
174 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac10,1", M_I24_10_1), | ||
175 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac11,1", M_I27_11_1), | ||
176 | EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "Macmini1,1", M_MINI), | ||
177 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "Macmini3,1", M_MINI_3_1), | ||
178 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "Macmini4,1", M_MINI_4_1), | ||
179 | EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook1,1", M_MB), | ||
180 | /* At least one of these two will be right; maybe both? */ | ||
181 | EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook2,1", M_MB), | ||
182 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook2,1", M_MB), | ||
183 | /* At least one of these two will be right; maybe both? */ | ||
184 | EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook3,1", M_MB), | ||
185 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook3,1", M_MB), | ||
186 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook4,1", M_MB), | ||
187 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook5,1", M_MB_5_1), | ||
188 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook6,1", M_MB_6_1), | ||
189 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook7,1", M_MB_7_1), | ||
190 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookAir1,1", M_MBA), | ||
191 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookAir3,1", M_MBA_3), | ||
192 | EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro1,1", M_MBP), | ||
193 | EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro2,1", M_MBP_2), | ||
194 | EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro2,2", M_MBP_2_2), | ||
195 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro2,1", M_MBP_2), | ||
196 | EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro3,1", M_MBP_SR), | ||
197 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro3,1", M_MBP_SR), | ||
198 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro4,1", M_MBP_4), | ||
199 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,1", M_MBP_5_1), | ||
200 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,2", M_MBP_5_2), | ||
201 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,3", M_MBP_5_3), | ||
202 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro6,1", M_MBP_6_1), | ||
203 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro6,2", M_MBP_6_2), | ||
204 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro7,1", M_MBP_7_1), | ||
205 | EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro8,2", M_MBP_8_2), | ||
206 | {}, | ||
207 | }; | ||
208 | |||
209 | __init void sysfb_apply_efi_quirks(void) | ||
210 | { | ||
211 | if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || | ||
212 | !(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS)) | ||
213 | dmi_check_system(efifb_dmi_system_table); | ||
214 | } | ||
diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c new file mode 100644 index 000000000000..22513e96b012 --- /dev/null +++ b/arch/x86/kernel/sysfb_simplefb.c | |||
@@ -0,0 +1,95 @@ | |||
1 | /* | ||
2 | * Generic System Framebuffers on x86 | ||
3 | * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License as published by the Free | ||
7 | * Software Foundation; either version 2 of the License, or (at your option) | ||
8 | * any later version. | ||
9 | */ | ||
10 | |||
11 | /* | ||
12 | * simple-framebuffer probing | ||
13 | * Try to convert "screen_info" into a "simple-framebuffer" compatible mode. | ||
14 | * If the mode is incompatible, we return "false" and let the caller create | ||
15 | * legacy nodes instead. | ||
16 | */ | ||
17 | |||
18 | #include <linux/err.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/mm.h> | ||
22 | #include <linux/platform_data/simplefb.h> | ||
23 | #include <linux/platform_device.h> | ||
24 | #include <linux/screen_info.h> | ||
25 | #include <asm/sysfb.h> | ||
26 | |||
27 | static const char simplefb_resname[] = "BOOTFB"; | ||
28 | static const struct simplefb_format formats[] = SIMPLEFB_FORMATS; | ||
29 | |||
30 | /* try parsing x86 screen_info into a simple-framebuffer mode struct */ | ||
31 | __init bool parse_mode(const struct screen_info *si, | ||
32 | struct simplefb_platform_data *mode) | ||
33 | { | ||
34 | const struct simplefb_format *f; | ||
35 | __u8 type; | ||
36 | unsigned int i; | ||
37 | |||
38 | type = si->orig_video_isVGA; | ||
39 | if (type != VIDEO_TYPE_VLFB && type != VIDEO_TYPE_EFI) | ||
40 | return false; | ||
41 | |||
42 | for (i = 0; i < ARRAY_SIZE(formats); ++i) { | ||
43 | f = &formats[i]; | ||
44 | if (si->lfb_depth == f->bits_per_pixel && | ||
45 | si->red_size == f->red.length && | ||
46 | si->red_pos == f->red.offset && | ||
47 | si->green_size == f->green.length && | ||
48 | si->green_pos == f->green.offset && | ||
49 | si->blue_size == f->blue.length && | ||
50 | si->blue_pos == f->blue.offset && | ||
51 | si->rsvd_size == f->transp.length && | ||
52 | si->rsvd_pos == f->transp.offset) { | ||
53 | mode->format = f->name; | ||
54 | mode->width = si->lfb_width; | ||
55 | mode->height = si->lfb_height; | ||
56 | mode->stride = si->lfb_linelength; | ||
57 | return true; | ||
58 | } | ||
59 | } | ||
60 | |||
61 | return false; | ||
62 | } | ||
63 | |||
64 | __init int create_simplefb(const struct screen_info *si, | ||
65 | const struct simplefb_platform_data *mode) | ||
66 | { | ||
67 | struct platform_device *pd; | ||
68 | struct resource res; | ||
69 | unsigned long len; | ||
70 | |||
71 | /* don't use lfb_size as it may contain the whole VMEM instead of only | ||
72 | * the part that is occupied by the framebuffer */ | ||
73 | len = mode->height * mode->stride; | ||
74 | len = PAGE_ALIGN(len); | ||
75 | if (len > si->lfb_size << 16) { | ||
76 | printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); | ||
77 | return -EINVAL; | ||
78 | } | ||
79 | |||
80 | /* setup IORESOURCE_MEM as framebuffer memory */ | ||
81 | memset(&res, 0, sizeof(res)); | ||
82 | res.flags = IORESOURCE_MEM; | ||
83 | res.name = simplefb_resname; | ||
84 | res.start = si->lfb_base; | ||
85 | res.end = si->lfb_base + len - 1; | ||
86 | if (res.end <= res.start) | ||
87 | return -EINVAL; | ||
88 | |||
89 | pd = platform_device_register_resndata(NULL, "simple-framebuffer", 0, | ||
90 | &res, 1, mode, sizeof(*mode)); | ||
91 | if (IS_ERR(pd)) | ||
92 | return PTR_ERR(pd); | ||
93 | |||
94 | return 0; | ||
95 | } | ||
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index addf7b58f4e8..91a4496db434 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c | |||
@@ -301,6 +301,15 @@ static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control) | |||
301 | return 0; | 301 | return 0; |
302 | } | 302 | } |
303 | 303 | ||
304 | static int tboot_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b) | ||
305 | { | ||
306 | if (!tboot_enabled()) | ||
307 | return 0; | ||
308 | |||
309 | pr_warning("tboot is not able to suspend on platforms with reduced hardware sleep (ACPIv5)"); | ||
310 | return -ENODEV; | ||
311 | } | ||
312 | |||
304 | static atomic_t ap_wfs_count; | 313 | static atomic_t ap_wfs_count; |
305 | 314 | ||
306 | static int tboot_wait_for_aps(int num_aps) | 315 | static int tboot_wait_for_aps(int num_aps) |
@@ -422,6 +431,7 @@ static __init int tboot_late_init(void) | |||
422 | #endif | 431 | #endif |
423 | 432 | ||
424 | acpi_os_set_prepare_sleep(&tboot_sleep); | 433 | acpi_os_set_prepare_sleep(&tboot_sleep); |
434 | acpi_os_set_prepare_extended_sleep(&tboot_extended_sleep); | ||
425 | return 0; | 435 | return 0; |
426 | } | 436 | } |
427 | 437 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1b23a1c92746..8c8093b146ca 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include <asm/mce.h> | 58 | #include <asm/mce.h> |
59 | #include <asm/fixmap.h> | 59 | #include <asm/fixmap.h> |
60 | #include <asm/mach_traps.h> | 60 | #include <asm/mach_traps.h> |
61 | #include <asm/alternative.h> | ||
61 | 62 | ||
62 | #ifdef CONFIG_X86_64 | 63 | #ifdef CONFIG_X86_64 |
63 | #include <asm/x86_init.h> | 64 | #include <asm/x86_init.h> |
@@ -327,6 +328,9 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co | |||
327 | ftrace_int3_handler(regs)) | 328 | ftrace_int3_handler(regs)) |
328 | return; | 329 | return; |
329 | #endif | 330 | #endif |
331 | if (poke_int3_handler(regs)) | ||
332 | return; | ||
333 | |||
330 | prev_state = exception_enter(); | 334 | prev_state = exception_enter(); |
331 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | 335 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP |
332 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, | 336 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6ff49247edf8..930e5d48f560 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -89,6 +89,12 @@ int check_tsc_unstable(void) | |||
89 | } | 89 | } |
90 | EXPORT_SYMBOL_GPL(check_tsc_unstable); | 90 | EXPORT_SYMBOL_GPL(check_tsc_unstable); |
91 | 91 | ||
92 | int check_tsc_disabled(void) | ||
93 | { | ||
94 | return tsc_disabled; | ||
95 | } | ||
96 | EXPORT_SYMBOL_GPL(check_tsc_disabled); | ||
97 | |||
92 | #ifdef CONFIG_X86_TSC | 98 | #ifdef CONFIG_X86_TSC |
93 | int __init notsc_setup(char *str) | 99 | int __init notsc_setup(char *str) |
94 | { | 100 | { |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a20ecb5b6cbf..b110fe6c03d4 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -413,7 +413,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
413 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | 413 | (1 << KVM_FEATURE_CLOCKSOURCE2) | |
414 | (1 << KVM_FEATURE_ASYNC_PF) | | 414 | (1 << KVM_FEATURE_ASYNC_PF) | |
415 | (1 << KVM_FEATURE_PV_EOI) | | 415 | (1 << KVM_FEATURE_PV_EOI) | |
416 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | 416 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | |
417 | (1 << KVM_FEATURE_PV_UNHALT); | ||
417 | 418 | ||
418 | if (sched_info_on()) | 419 | if (sched_info_on()) |
419 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); | 420 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index afc11245827c..5439117d5c4c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -79,16 +79,6 @@ static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) | |||
79 | *((u32 *) (apic->regs + reg_off)) = val; | 79 | *((u32 *) (apic->regs + reg_off)) = val; |
80 | } | 80 | } |
81 | 81 | ||
82 | static inline int apic_test_and_set_vector(int vec, void *bitmap) | ||
83 | { | ||
84 | return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
85 | } | ||
86 | |||
87 | static inline int apic_test_and_clear_vector(int vec, void *bitmap) | ||
88 | { | ||
89 | return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
90 | } | ||
91 | |||
92 | static inline int apic_test_vector(int vec, void *bitmap) | 82 | static inline int apic_test_vector(int vec, void *bitmap) |
93 | { | 83 | { |
94 | return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 84 | return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
@@ -331,10 +321,10 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) | |||
331 | } | 321 | } |
332 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); | 322 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); |
333 | 323 | ||
334 | static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) | 324 | static inline void apic_set_irr(int vec, struct kvm_lapic *apic) |
335 | { | 325 | { |
336 | apic->irr_pending = true; | 326 | apic->irr_pending = true; |
337 | return apic_test_and_set_vector(vec, apic->regs + APIC_IRR); | 327 | apic_set_vector(vec, apic->regs + APIC_IRR); |
338 | } | 328 | } |
339 | 329 | ||
340 | static inline int apic_search_irr(struct kvm_lapic *apic) | 330 | static inline int apic_search_irr(struct kvm_lapic *apic) |
@@ -681,32 +671,28 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
681 | if (unlikely(!apic_enabled(apic))) | 671 | if (unlikely(!apic_enabled(apic))) |
682 | break; | 672 | break; |
683 | 673 | ||
674 | result = 1; | ||
675 | |||
684 | if (dest_map) | 676 | if (dest_map) |
685 | __set_bit(vcpu->vcpu_id, dest_map); | 677 | __set_bit(vcpu->vcpu_id, dest_map); |
686 | 678 | ||
687 | if (kvm_x86_ops->deliver_posted_interrupt) { | 679 | if (kvm_x86_ops->deliver_posted_interrupt) |
688 | result = 1; | ||
689 | kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); | 680 | kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); |
690 | } else { | 681 | else { |
691 | result = !apic_test_and_set_irr(vector, apic); | 682 | apic_set_irr(vector, apic); |
692 | |||
693 | if (!result) { | ||
694 | if (trig_mode) | ||
695 | apic_debug("level trig mode repeatedly " | ||
696 | "for vector %d", vector); | ||
697 | goto out; | ||
698 | } | ||
699 | 683 | ||
700 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 684 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
701 | kvm_vcpu_kick(vcpu); | 685 | kvm_vcpu_kick(vcpu); |
702 | } | 686 | } |
703 | out: | ||
704 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, | 687 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, |
705 | trig_mode, vector, !result); | 688 | trig_mode, vector, false); |
706 | break; | 689 | break; |
707 | 690 | ||
708 | case APIC_DM_REMRD: | 691 | case APIC_DM_REMRD: |
709 | apic_debug("Ignoring delivery mode 3\n"); | 692 | result = 1; |
693 | vcpu->arch.pv.pv_unhalted = 1; | ||
694 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
695 | kvm_vcpu_kick(vcpu); | ||
710 | break; | 696 | break; |
711 | 697 | ||
712 | case APIC_DM_SMI: | 698 | case APIC_DM_SMI: |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9e9285ae9b94..6e2d2c8f230b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -132,8 +132,8 @@ module_param(dbg, bool, 0644); | |||
132 | (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ | 132 | (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ |
133 | * PT32_LEVEL_BITS))) - 1)) | 133 | * PT32_LEVEL_BITS))) - 1)) |
134 | 134 | ||
135 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ | 135 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \ |
136 | | PT64_NX_MASK) | 136 | | shadow_x_mask | shadow_nx_mask) |
137 | 137 | ||
138 | #define ACC_EXEC_MASK 1 | 138 | #define ACC_EXEC_MASK 1 |
139 | #define ACC_WRITE_MASK PT_WRITABLE_MASK | 139 | #define ACC_WRITE_MASK PT_WRITABLE_MASK |
@@ -331,11 +331,6 @@ static int is_large_pte(u64 pte) | |||
331 | return pte & PT_PAGE_SIZE_MASK; | 331 | return pte & PT_PAGE_SIZE_MASK; |
332 | } | 332 | } |
333 | 333 | ||
334 | static int is_dirty_gpte(unsigned long pte) | ||
335 | { | ||
336 | return pte & PT_DIRTY_MASK; | ||
337 | } | ||
338 | |||
339 | static int is_rmap_spte(u64 pte) | 334 | static int is_rmap_spte(u64 pte) |
340 | { | 335 | { |
341 | return is_shadow_present_pte(pte); | 336 | return is_shadow_present_pte(pte); |
@@ -2052,12 +2047,18 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) | |||
2052 | return __shadow_walk_next(iterator, *iterator->sptep); | 2047 | return __shadow_walk_next(iterator, *iterator->sptep); |
2053 | } | 2048 | } |
2054 | 2049 | ||
2055 | static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) | 2050 | static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed) |
2056 | { | 2051 | { |
2057 | u64 spte; | 2052 | u64 spte; |
2058 | 2053 | ||
2054 | BUILD_BUG_ON(VMX_EPT_READABLE_MASK != PT_PRESENT_MASK || | ||
2055 | VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); | ||
2056 | |||
2059 | spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | | 2057 | spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | |
2060 | shadow_user_mask | shadow_x_mask | shadow_accessed_mask; | 2058 | shadow_user_mask | shadow_x_mask; |
2059 | |||
2060 | if (accessed) | ||
2061 | spte |= shadow_accessed_mask; | ||
2061 | 2062 | ||
2062 | mmu_spte_set(sptep, spte); | 2063 | mmu_spte_set(sptep, spte); |
2063 | } | 2064 | } |
@@ -2574,14 +2575,6 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | |||
2574 | mmu_free_roots(vcpu); | 2575 | mmu_free_roots(vcpu); |
2575 | } | 2576 | } |
2576 | 2577 | ||
2577 | static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) | ||
2578 | { | ||
2579 | int bit7; | ||
2580 | |||
2581 | bit7 = (gpte >> 7) & 1; | ||
2582 | return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; | ||
2583 | } | ||
2584 | |||
2585 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | 2578 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, |
2586 | bool no_dirty_log) | 2579 | bool no_dirty_log) |
2587 | { | 2580 | { |
@@ -2594,26 +2587,6 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
2594 | return gfn_to_pfn_memslot_atomic(slot, gfn); | 2587 | return gfn_to_pfn_memslot_atomic(slot, gfn); |
2595 | } | 2588 | } |
2596 | 2589 | ||
2597 | static bool prefetch_invalid_gpte(struct kvm_vcpu *vcpu, | ||
2598 | struct kvm_mmu_page *sp, u64 *spte, | ||
2599 | u64 gpte) | ||
2600 | { | ||
2601 | if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) | ||
2602 | goto no_present; | ||
2603 | |||
2604 | if (!is_present_gpte(gpte)) | ||
2605 | goto no_present; | ||
2606 | |||
2607 | if (!(gpte & PT_ACCESSED_MASK)) | ||
2608 | goto no_present; | ||
2609 | |||
2610 | return false; | ||
2611 | |||
2612 | no_present: | ||
2613 | drop_spte(vcpu->kvm, spte); | ||
2614 | return true; | ||
2615 | } | ||
2616 | |||
2617 | static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, | 2590 | static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, |
2618 | struct kvm_mmu_page *sp, | 2591 | struct kvm_mmu_page *sp, |
2619 | u64 *start, u64 *end) | 2592 | u64 *start, u64 *end) |
@@ -2710,7 +2683,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2710 | iterator.level - 1, | 2683 | iterator.level - 1, |
2711 | 1, ACC_ALL, iterator.sptep); | 2684 | 1, ACC_ALL, iterator.sptep); |
2712 | 2685 | ||
2713 | link_shadow_page(iterator.sptep, sp); | 2686 | link_shadow_page(iterator.sptep, sp, true); |
2714 | } | 2687 | } |
2715 | } | 2688 | } |
2716 | return emulate; | 2689 | return emulate; |
@@ -2808,7 +2781,7 @@ exit: | |||
2808 | return ret; | 2781 | return ret; |
2809 | } | 2782 | } |
2810 | 2783 | ||
2811 | static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code) | 2784 | static bool page_fault_can_be_fast(u32 error_code) |
2812 | { | 2785 | { |
2813 | /* | 2786 | /* |
2814 | * Do not fix the mmio spte with invalid generation number which | 2787 | * Do not fix the mmio spte with invalid generation number which |
@@ -2861,7 +2834,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, | |||
2861 | bool ret = false; | 2834 | bool ret = false; |
2862 | u64 spte = 0ull; | 2835 | u64 spte = 0ull; |
2863 | 2836 | ||
2864 | if (!page_fault_can_be_fast(vcpu, error_code)) | 2837 | if (!page_fault_can_be_fast(error_code)) |
2865 | return false; | 2838 | return false; |
2866 | 2839 | ||
2867 | walk_shadow_page_lockless_begin(vcpu); | 2840 | walk_shadow_page_lockless_begin(vcpu); |
@@ -3209,6 +3182,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
3209 | mmu_sync_roots(vcpu); | 3182 | mmu_sync_roots(vcpu); |
3210 | spin_unlock(&vcpu->kvm->mmu_lock); | 3183 | spin_unlock(&vcpu->kvm->mmu_lock); |
3211 | } | 3184 | } |
3185 | EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots); | ||
3212 | 3186 | ||
3213 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, | 3187 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, |
3214 | u32 access, struct x86_exception *exception) | 3188 | u32 access, struct x86_exception *exception) |
@@ -3478,6 +3452,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | |||
3478 | ++vcpu->stat.tlb_flush; | 3452 | ++vcpu->stat.tlb_flush; |
3479 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | 3453 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
3480 | } | 3454 | } |
3455 | EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); | ||
3481 | 3456 | ||
3482 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 3457 | static void paging_new_cr3(struct kvm_vcpu *vcpu) |
3483 | { | 3458 | { |
@@ -3501,18 +3476,6 @@ static void paging_free(struct kvm_vcpu *vcpu) | |||
3501 | nonpaging_free(vcpu); | 3476 | nonpaging_free(vcpu); |
3502 | } | 3477 | } |
3503 | 3478 | ||
3504 | static inline void protect_clean_gpte(unsigned *access, unsigned gpte) | ||
3505 | { | ||
3506 | unsigned mask; | ||
3507 | |||
3508 | BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); | ||
3509 | |||
3510 | mask = (unsigned)~ACC_WRITE_MASK; | ||
3511 | /* Allow write access to dirty gptes */ | ||
3512 | mask |= (gpte >> (PT_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK; | ||
3513 | *access &= mask; | ||
3514 | } | ||
3515 | |||
3516 | static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, | 3479 | static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, |
3517 | unsigned access, int *nr_present) | 3480 | unsigned access, int *nr_present) |
3518 | { | 3481 | { |
@@ -3530,16 +3493,6 @@ static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, | |||
3530 | return false; | 3493 | return false; |
3531 | } | 3494 | } |
3532 | 3495 | ||
3533 | static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte) | ||
3534 | { | ||
3535 | unsigned access; | ||
3536 | |||
3537 | access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; | ||
3538 | access &= ~(gpte >> PT64_NX_SHIFT); | ||
3539 | |||
3540 | return access; | ||
3541 | } | ||
3542 | |||
3543 | static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) | 3496 | static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) |
3544 | { | 3497 | { |
3545 | unsigned index; | 3498 | unsigned index; |
@@ -3549,6 +3502,11 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gp | |||
3549 | return mmu->last_pte_bitmap & (1 << index); | 3502 | return mmu->last_pte_bitmap & (1 << index); |
3550 | } | 3503 | } |
3551 | 3504 | ||
3505 | #define PTTYPE_EPT 18 /* arbitrary */ | ||
3506 | #define PTTYPE PTTYPE_EPT | ||
3507 | #include "paging_tmpl.h" | ||
3508 | #undef PTTYPE | ||
3509 | |||
3552 | #define PTTYPE 64 | 3510 | #define PTTYPE 64 |
3553 | #include "paging_tmpl.h" | 3511 | #include "paging_tmpl.h" |
3554 | #undef PTTYPE | 3512 | #undef PTTYPE |
@@ -3563,6 +3521,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
3563 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | 3521 | int maxphyaddr = cpuid_maxphyaddr(vcpu); |
3564 | u64 exb_bit_rsvd = 0; | 3522 | u64 exb_bit_rsvd = 0; |
3565 | 3523 | ||
3524 | context->bad_mt_xwr = 0; | ||
3525 | |||
3566 | if (!context->nx) | 3526 | if (!context->nx) |
3567 | exb_bit_rsvd = rsvd_bits(63, 63); | 3527 | exb_bit_rsvd = rsvd_bits(63, 63); |
3568 | switch (context->root_level) { | 3528 | switch (context->root_level) { |
@@ -3618,7 +3578,40 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
3618 | } | 3578 | } |
3619 | } | 3579 | } |
3620 | 3580 | ||
3621 | static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) | 3581 | static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, |
3582 | struct kvm_mmu *context, bool execonly) | ||
3583 | { | ||
3584 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | ||
3585 | int pte; | ||
3586 | |||
3587 | context->rsvd_bits_mask[0][3] = | ||
3588 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); | ||
3589 | context->rsvd_bits_mask[0][2] = | ||
3590 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); | ||
3591 | context->rsvd_bits_mask[0][1] = | ||
3592 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); | ||
3593 | context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); | ||
3594 | |||
3595 | /* large page */ | ||
3596 | context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; | ||
3597 | context->rsvd_bits_mask[1][2] = | ||
3598 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29); | ||
3599 | context->rsvd_bits_mask[1][1] = | ||
3600 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20); | ||
3601 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; | ||
3602 | |||
3603 | for (pte = 0; pte < 64; pte++) { | ||
3604 | int rwx_bits = pte & 7; | ||
3605 | int mt = pte >> 3; | ||
3606 | if (mt == 0x2 || mt == 0x3 || mt == 0x7 || | ||
3607 | rwx_bits == 0x2 || rwx_bits == 0x6 || | ||
3608 | (rwx_bits == 0x4 && !execonly)) | ||
3609 | context->bad_mt_xwr |= (1ull << pte); | ||
3610 | } | ||
3611 | } | ||
3612 | |||
3613 | static void update_permission_bitmask(struct kvm_vcpu *vcpu, | ||
3614 | struct kvm_mmu *mmu, bool ept) | ||
3622 | { | 3615 | { |
3623 | unsigned bit, byte, pfec; | 3616 | unsigned bit, byte, pfec; |
3624 | u8 map; | 3617 | u8 map; |
@@ -3636,12 +3629,16 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu | |||
3636 | w = bit & ACC_WRITE_MASK; | 3629 | w = bit & ACC_WRITE_MASK; |
3637 | u = bit & ACC_USER_MASK; | 3630 | u = bit & ACC_USER_MASK; |
3638 | 3631 | ||
3639 | /* Not really needed: !nx will cause pte.nx to fault */ | 3632 | if (!ept) { |
3640 | x |= !mmu->nx; | 3633 | /* Not really needed: !nx will cause pte.nx to fault */ |
3641 | /* Allow supervisor writes if !cr0.wp */ | 3634 | x |= !mmu->nx; |
3642 | w |= !is_write_protection(vcpu) && !uf; | 3635 | /* Allow supervisor writes if !cr0.wp */ |
3643 | /* Disallow supervisor fetches of user code if cr4.smep */ | 3636 | w |= !is_write_protection(vcpu) && !uf; |
3644 | x &= !(smep && u && !uf); | 3637 | /* Disallow supervisor fetches of user code if cr4.smep */ |
3638 | x &= !(smep && u && !uf); | ||
3639 | } else | ||
3640 | /* Not really needed: no U/S accesses on ept */ | ||
3641 | u = 1; | ||
3645 | 3642 | ||
3646 | fault = (ff && !x) || (uf && !u) || (wf && !w); | 3643 | fault = (ff && !x) || (uf && !u) || (wf && !w); |
3647 | map |= fault << bit; | 3644 | map |= fault << bit; |
@@ -3676,7 +3673,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
3676 | context->root_level = level; | 3673 | context->root_level = level; |
3677 | 3674 | ||
3678 | reset_rsvds_bits_mask(vcpu, context); | 3675 | reset_rsvds_bits_mask(vcpu, context); |
3679 | update_permission_bitmask(vcpu, context); | 3676 | update_permission_bitmask(vcpu, context, false); |
3680 | update_last_pte_bitmap(vcpu, context); | 3677 | update_last_pte_bitmap(vcpu, context); |
3681 | 3678 | ||
3682 | ASSERT(is_pae(vcpu)); | 3679 | ASSERT(is_pae(vcpu)); |
@@ -3706,7 +3703,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, | |||
3706 | context->root_level = PT32_ROOT_LEVEL; | 3703 | context->root_level = PT32_ROOT_LEVEL; |
3707 | 3704 | ||
3708 | reset_rsvds_bits_mask(vcpu, context); | 3705 | reset_rsvds_bits_mask(vcpu, context); |
3709 | update_permission_bitmask(vcpu, context); | 3706 | update_permission_bitmask(vcpu, context, false); |
3710 | update_last_pte_bitmap(vcpu, context); | 3707 | update_last_pte_bitmap(vcpu, context); |
3711 | 3708 | ||
3712 | context->new_cr3 = paging_new_cr3; | 3709 | context->new_cr3 = paging_new_cr3; |
@@ -3768,7 +3765,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
3768 | context->gva_to_gpa = paging32_gva_to_gpa; | 3765 | context->gva_to_gpa = paging32_gva_to_gpa; |
3769 | } | 3766 | } |
3770 | 3767 | ||
3771 | update_permission_bitmask(vcpu, context); | 3768 | update_permission_bitmask(vcpu, context, false); |
3772 | update_last_pte_bitmap(vcpu, context); | 3769 | update_last_pte_bitmap(vcpu, context); |
3773 | 3770 | ||
3774 | return 0; | 3771 | return 0; |
@@ -3800,6 +3797,33 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) | |||
3800 | } | 3797 | } |
3801 | EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); | 3798 | EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); |
3802 | 3799 | ||
3800 | int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, | ||
3801 | bool execonly) | ||
3802 | { | ||
3803 | ASSERT(vcpu); | ||
3804 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | ||
3805 | |||
3806 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | ||
3807 | |||
3808 | context->nx = true; | ||
3809 | context->new_cr3 = paging_new_cr3; | ||
3810 | context->page_fault = ept_page_fault; | ||
3811 | context->gva_to_gpa = ept_gva_to_gpa; | ||
3812 | context->sync_page = ept_sync_page; | ||
3813 | context->invlpg = ept_invlpg; | ||
3814 | context->update_pte = ept_update_pte; | ||
3815 | context->free = paging_free; | ||
3816 | context->root_level = context->shadow_root_level; | ||
3817 | context->root_hpa = INVALID_PAGE; | ||
3818 | context->direct_map = false; | ||
3819 | |||
3820 | update_permission_bitmask(vcpu, context, true); | ||
3821 | reset_rsvds_bits_mask_ept(vcpu, context, execonly); | ||
3822 | |||
3823 | return 0; | ||
3824 | } | ||
3825 | EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); | ||
3826 | |||
3803 | static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | 3827 | static int init_kvm_softmmu(struct kvm_vcpu *vcpu) |
3804 | { | 3828 | { |
3805 | int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); | 3829 | int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); |
@@ -3847,7 +3871,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
3847 | g_context->gva_to_gpa = paging32_gva_to_gpa_nested; | 3871 | g_context->gva_to_gpa = paging32_gva_to_gpa_nested; |
3848 | } | 3872 | } |
3849 | 3873 | ||
3850 | update_permission_bitmask(vcpu, g_context); | 3874 | update_permission_bitmask(vcpu, g_context, false); |
3851 | update_last_pte_bitmap(vcpu, g_context); | 3875 | update_last_pte_bitmap(vcpu, g_context); |
3852 | 3876 | ||
3853 | return 0; | 3877 | return 0; |
@@ -3923,8 +3947,8 @@ static bool need_remote_flush(u64 old, u64 new) | |||
3923 | return true; | 3947 | return true; |
3924 | if ((old ^ new) & PT64_BASE_ADDR_MASK) | 3948 | if ((old ^ new) & PT64_BASE_ADDR_MASK) |
3925 | return true; | 3949 | return true; |
3926 | old ^= PT64_NX_MASK; | 3950 | old ^= shadow_nx_mask; |
3927 | new ^= PT64_NX_MASK; | 3951 | new ^= shadow_nx_mask; |
3928 | return (old & ~new & PT64_PERM_MASK) != 0; | 3952 | return (old & ~new & PT64_PERM_MASK) != 0; |
3929 | } | 3953 | } |
3930 | 3954 | ||
@@ -4182,7 +4206,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, | |||
4182 | switch (er) { | 4206 | switch (er) { |
4183 | case EMULATE_DONE: | 4207 | case EMULATE_DONE: |
4184 | return 1; | 4208 | return 1; |
4185 | case EMULATE_DO_MMIO: | 4209 | case EMULATE_USER_EXIT: |
4186 | ++vcpu->stat.mmio_exits; | 4210 | ++vcpu->stat.mmio_exits; |
4187 | /* fall through */ | 4211 | /* fall through */ |
4188 | case EMULATE_FAIL: | 4212 | case EMULATE_FAIL: |
@@ -4390,11 +4414,8 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) | |||
4390 | /* | 4414 | /* |
4391 | * The very rare case: if the generation-number is round, | 4415 | * The very rare case: if the generation-number is round, |
4392 | * zap all shadow pages. | 4416 | * zap all shadow pages. |
4393 | * | ||
4394 | * The max value is MMIO_MAX_GEN - 1 since it is not called | ||
4395 | * when mark memslot invalid. | ||
4396 | */ | 4417 | */ |
4397 | if (unlikely(kvm_current_mmio_generation(kvm) >= (MMIO_MAX_GEN - 1))) { | 4418 | if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) { |
4398 | printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); | 4419 | printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); |
4399 | kvm_mmu_invalidate_zap_all_pages(kvm); | 4420 | kvm_mmu_invalidate_zap_all_pages(kvm); |
4400 | } | 4421 | } |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 5b59c573aba7..77e044a0f5f7 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -71,6 +71,8 @@ enum { | |||
71 | 71 | ||
72 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); | 72 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); |
73 | int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); | 73 | int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); |
74 | int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, | ||
75 | bool execonly); | ||
74 | 76 | ||
75 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) | 77 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) |
76 | { | 78 | { |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 7769699d48a8..043330159179 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -23,6 +23,13 @@ | |||
23 | * so the code in this file is compiled twice, once per pte size. | 23 | * so the code in this file is compiled twice, once per pte size. |
24 | */ | 24 | */ |
25 | 25 | ||
26 | /* | ||
27 | * This is used to catch non optimized PT_GUEST_(DIRTY|ACCESS)_SHIFT macro | ||
28 | * uses for EPT without A/D paging type. | ||
29 | */ | ||
30 | extern u64 __pure __using_nonexistent_pte_bit(void) | ||
31 | __compiletime_error("wrong use of PT_GUEST_(DIRTY|ACCESS)_SHIFT"); | ||
32 | |||
26 | #if PTTYPE == 64 | 33 | #if PTTYPE == 64 |
27 | #define pt_element_t u64 | 34 | #define pt_element_t u64 |
28 | #define guest_walker guest_walker64 | 35 | #define guest_walker guest_walker64 |
@@ -32,6 +39,10 @@ | |||
32 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) | 39 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) |
33 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) | 40 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) |
34 | #define PT_LEVEL_BITS PT64_LEVEL_BITS | 41 | #define PT_LEVEL_BITS PT64_LEVEL_BITS |
42 | #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK | ||
43 | #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK | ||
44 | #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT | ||
45 | #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT | ||
35 | #ifdef CONFIG_X86_64 | 46 | #ifdef CONFIG_X86_64 |
36 | #define PT_MAX_FULL_LEVELS 4 | 47 | #define PT_MAX_FULL_LEVELS 4 |
37 | #define CMPXCHG cmpxchg | 48 | #define CMPXCHG cmpxchg |
@@ -49,7 +60,26 @@ | |||
49 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) | 60 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) |
50 | #define PT_LEVEL_BITS PT32_LEVEL_BITS | 61 | #define PT_LEVEL_BITS PT32_LEVEL_BITS |
51 | #define PT_MAX_FULL_LEVELS 2 | 62 | #define PT_MAX_FULL_LEVELS 2 |
63 | #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK | ||
64 | #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK | ||
65 | #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT | ||
66 | #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT | ||
52 | #define CMPXCHG cmpxchg | 67 | #define CMPXCHG cmpxchg |
68 | #elif PTTYPE == PTTYPE_EPT | ||
69 | #define pt_element_t u64 | ||
70 | #define guest_walker guest_walkerEPT | ||
71 | #define FNAME(name) ept_##name | ||
72 | #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK | ||
73 | #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) | ||
74 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) | ||
75 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) | ||
76 | #define PT_LEVEL_BITS PT64_LEVEL_BITS | ||
77 | #define PT_GUEST_ACCESSED_MASK 0 | ||
78 | #define PT_GUEST_DIRTY_MASK 0 | ||
79 | #define PT_GUEST_DIRTY_SHIFT __using_nonexistent_pte_bit() | ||
80 | #define PT_GUEST_ACCESSED_SHIFT __using_nonexistent_pte_bit() | ||
81 | #define CMPXCHG cmpxchg64 | ||
82 | #define PT_MAX_FULL_LEVELS 4 | ||
53 | #else | 83 | #else |
54 | #error Invalid PTTYPE value | 84 | #error Invalid PTTYPE value |
55 | #endif | 85 | #endif |
@@ -80,6 +110,40 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) | |||
80 | return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; | 110 | return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; |
81 | } | 111 | } |
82 | 112 | ||
113 | static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte) | ||
114 | { | ||
115 | unsigned mask; | ||
116 | |||
117 | /* dirty bit is not supported, so no need to track it */ | ||
118 | if (!PT_GUEST_DIRTY_MASK) | ||
119 | return; | ||
120 | |||
121 | BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); | ||
122 | |||
123 | mask = (unsigned)~ACC_WRITE_MASK; | ||
124 | /* Allow write access to dirty gptes */ | ||
125 | mask |= (gpte >> (PT_GUEST_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & | ||
126 | PT_WRITABLE_MASK; | ||
127 | *access &= mask; | ||
128 | } | ||
129 | |||
130 | static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level) | ||
131 | { | ||
132 | int bit7 = (gpte >> 7) & 1, low6 = gpte & 0x3f; | ||
133 | |||
134 | return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) | | ||
135 | ((mmu->bad_mt_xwr & (1ull << low6)) != 0); | ||
136 | } | ||
137 | |||
138 | static inline int FNAME(is_present_gpte)(unsigned long pte) | ||
139 | { | ||
140 | #if PTTYPE != PTTYPE_EPT | ||
141 | return is_present_gpte(pte); | ||
142 | #else | ||
143 | return pte & 7; | ||
144 | #endif | ||
145 | } | ||
146 | |||
83 | static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | 147 | static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
84 | pt_element_t __user *ptep_user, unsigned index, | 148 | pt_element_t __user *ptep_user, unsigned index, |
85 | pt_element_t orig_pte, pt_element_t new_pte) | 149 | pt_element_t orig_pte, pt_element_t new_pte) |
@@ -103,6 +167,42 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
103 | return (ret != orig_pte); | 167 | return (ret != orig_pte); |
104 | } | 168 | } |
105 | 169 | ||
170 | static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, | ||
171 | struct kvm_mmu_page *sp, u64 *spte, | ||
172 | u64 gpte) | ||
173 | { | ||
174 | if (FNAME(is_rsvd_bits_set)(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) | ||
175 | goto no_present; | ||
176 | |||
177 | if (!FNAME(is_present_gpte)(gpte)) | ||
178 | goto no_present; | ||
179 | |||
180 | /* if accessed bit is not supported prefetch non accessed gpte */ | ||
181 | if (PT_GUEST_ACCESSED_MASK && !(gpte & PT_GUEST_ACCESSED_MASK)) | ||
182 | goto no_present; | ||
183 | |||
184 | return false; | ||
185 | |||
186 | no_present: | ||
187 | drop_spte(vcpu->kvm, spte); | ||
188 | return true; | ||
189 | } | ||
190 | |||
191 | static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte) | ||
192 | { | ||
193 | unsigned access; | ||
194 | #if PTTYPE == PTTYPE_EPT | ||
195 | access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) | | ||
196 | ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) | | ||
197 | ACC_USER_MASK; | ||
198 | #else | ||
199 | access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; | ||
200 | access &= ~(gpte >> PT64_NX_SHIFT); | ||
201 | #endif | ||
202 | |||
203 | return access; | ||
204 | } | ||
205 | |||
106 | static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, | 206 | static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, |
107 | struct kvm_mmu *mmu, | 207 | struct kvm_mmu *mmu, |
108 | struct guest_walker *walker, | 208 | struct guest_walker *walker, |
@@ -114,18 +214,23 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, | |||
114 | gfn_t table_gfn; | 214 | gfn_t table_gfn; |
115 | int ret; | 215 | int ret; |
116 | 216 | ||
217 | /* dirty/accessed bits are not supported, so no need to update them */ | ||
218 | if (!PT_GUEST_DIRTY_MASK) | ||
219 | return 0; | ||
220 | |||
117 | for (level = walker->max_level; level >= walker->level; --level) { | 221 | for (level = walker->max_level; level >= walker->level; --level) { |
118 | pte = orig_pte = walker->ptes[level - 1]; | 222 | pte = orig_pte = walker->ptes[level - 1]; |
119 | table_gfn = walker->table_gfn[level - 1]; | 223 | table_gfn = walker->table_gfn[level - 1]; |
120 | ptep_user = walker->ptep_user[level - 1]; | 224 | ptep_user = walker->ptep_user[level - 1]; |
121 | index = offset_in_page(ptep_user) / sizeof(pt_element_t); | 225 | index = offset_in_page(ptep_user) / sizeof(pt_element_t); |
122 | if (!(pte & PT_ACCESSED_MASK)) { | 226 | if (!(pte & PT_GUEST_ACCESSED_MASK)) { |
123 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); | 227 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); |
124 | pte |= PT_ACCESSED_MASK; | 228 | pte |= PT_GUEST_ACCESSED_MASK; |
125 | } | 229 | } |
126 | if (level == walker->level && write_fault && !is_dirty_gpte(pte)) { | 230 | if (level == walker->level && write_fault && |
231 | !(pte & PT_GUEST_DIRTY_MASK)) { | ||
127 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); | 232 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); |
128 | pte |= PT_DIRTY_MASK; | 233 | pte |= PT_GUEST_DIRTY_MASK; |
129 | } | 234 | } |
130 | if (pte == orig_pte) | 235 | if (pte == orig_pte) |
131 | continue; | 236 | continue; |
@@ -170,7 +275,7 @@ retry_walk: | |||
170 | if (walker->level == PT32E_ROOT_LEVEL) { | 275 | if (walker->level == PT32E_ROOT_LEVEL) { |
171 | pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); | 276 | pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); |
172 | trace_kvm_mmu_paging_element(pte, walker->level); | 277 | trace_kvm_mmu_paging_element(pte, walker->level); |
173 | if (!is_present_gpte(pte)) | 278 | if (!FNAME(is_present_gpte)(pte)) |
174 | goto error; | 279 | goto error; |
175 | --walker->level; | 280 | --walker->level; |
176 | } | 281 | } |
@@ -179,7 +284,7 @@ retry_walk: | |||
179 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || | 284 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || |
180 | (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); | 285 | (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); |
181 | 286 | ||
182 | accessed_dirty = PT_ACCESSED_MASK; | 287 | accessed_dirty = PT_GUEST_ACCESSED_MASK; |
183 | pt_access = pte_access = ACC_ALL; | 288 | pt_access = pte_access = ACC_ALL; |
184 | ++walker->level; | 289 | ++walker->level; |
185 | 290 | ||
@@ -215,17 +320,17 @@ retry_walk: | |||
215 | 320 | ||
216 | trace_kvm_mmu_paging_element(pte, walker->level); | 321 | trace_kvm_mmu_paging_element(pte, walker->level); |
217 | 322 | ||
218 | if (unlikely(!is_present_gpte(pte))) | 323 | if (unlikely(!FNAME(is_present_gpte)(pte))) |
219 | goto error; | 324 | goto error; |
220 | 325 | ||
221 | if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte, | 326 | if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, |
222 | walker->level))) { | 327 | walker->level))) { |
223 | errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK; | 328 | errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK; |
224 | goto error; | 329 | goto error; |
225 | } | 330 | } |
226 | 331 | ||
227 | accessed_dirty &= pte; | 332 | accessed_dirty &= pte; |
228 | pte_access = pt_access & gpte_access(vcpu, pte); | 333 | pte_access = pt_access & FNAME(gpte_access)(vcpu, pte); |
229 | 334 | ||
230 | walker->ptes[walker->level - 1] = pte; | 335 | walker->ptes[walker->level - 1] = pte; |
231 | } while (!is_last_gpte(mmu, walker->level, pte)); | 336 | } while (!is_last_gpte(mmu, walker->level, pte)); |
@@ -248,13 +353,15 @@ retry_walk: | |||
248 | walker->gfn = real_gpa >> PAGE_SHIFT; | 353 | walker->gfn = real_gpa >> PAGE_SHIFT; |
249 | 354 | ||
250 | if (!write_fault) | 355 | if (!write_fault) |
251 | protect_clean_gpte(&pte_access, pte); | 356 | FNAME(protect_clean_gpte)(&pte_access, pte); |
252 | else | 357 | else |
253 | /* | 358 | /* |
254 | * On a write fault, fold the dirty bit into accessed_dirty by | 359 | * On a write fault, fold the dirty bit into accessed_dirty. |
255 | * shifting it one place right. | 360 | * For modes without A/D bits support accessed_dirty will be |
361 | * always clear. | ||
256 | */ | 362 | */ |
257 | accessed_dirty &= pte >> (PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT); | 363 | accessed_dirty &= pte >> |
364 | (PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT); | ||
258 | 365 | ||
259 | if (unlikely(!accessed_dirty)) { | 366 | if (unlikely(!accessed_dirty)) { |
260 | ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); | 367 | ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); |
@@ -279,6 +386,25 @@ error: | |||
279 | walker->fault.vector = PF_VECTOR; | 386 | walker->fault.vector = PF_VECTOR; |
280 | walker->fault.error_code_valid = true; | 387 | walker->fault.error_code_valid = true; |
281 | walker->fault.error_code = errcode; | 388 | walker->fault.error_code = errcode; |
389 | |||
390 | #if PTTYPE == PTTYPE_EPT | ||
391 | /* | ||
392 | * Use PFERR_RSVD_MASK in error_code to to tell if EPT | ||
393 | * misconfiguration requires to be injected. The detection is | ||
394 | * done by is_rsvd_bits_set() above. | ||
395 | * | ||
396 | * We set up the value of exit_qualification to inject: | ||
397 | * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation | ||
398 | * [5:3] - Calculated by the page walk of the guest EPT page tables | ||
399 | * [7:8] - Derived from [7:8] of real exit_qualification | ||
400 | * | ||
401 | * The other bits are set to 0. | ||
402 | */ | ||
403 | if (!(errcode & PFERR_RSVD_MASK)) { | ||
404 | vcpu->arch.exit_qualification &= 0x187; | ||
405 | vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3; | ||
406 | } | ||
407 | #endif | ||
282 | walker->fault.address = addr; | 408 | walker->fault.address = addr; |
283 | walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu; | 409 | walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu; |
284 | 410 | ||
@@ -293,6 +419,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
293 | access); | 419 | access); |
294 | } | 420 | } |
295 | 421 | ||
422 | #if PTTYPE != PTTYPE_EPT | ||
296 | static int FNAME(walk_addr_nested)(struct guest_walker *walker, | 423 | static int FNAME(walk_addr_nested)(struct guest_walker *walker, |
297 | struct kvm_vcpu *vcpu, gva_t addr, | 424 | struct kvm_vcpu *vcpu, gva_t addr, |
298 | u32 access) | 425 | u32 access) |
@@ -300,6 +427,7 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker, | |||
300 | return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu, | 427 | return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu, |
301 | addr, access); | 428 | addr, access); |
302 | } | 429 | } |
430 | #endif | ||
303 | 431 | ||
304 | static bool | 432 | static bool |
305 | FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 433 | FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
@@ -309,14 +437,14 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
309 | gfn_t gfn; | 437 | gfn_t gfn; |
310 | pfn_t pfn; | 438 | pfn_t pfn; |
311 | 439 | ||
312 | if (prefetch_invalid_gpte(vcpu, sp, spte, gpte)) | 440 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) |
313 | return false; | 441 | return false; |
314 | 442 | ||
315 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 443 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
316 | 444 | ||
317 | gfn = gpte_to_gfn(gpte); | 445 | gfn = gpte_to_gfn(gpte); |
318 | pte_access = sp->role.access & gpte_access(vcpu, gpte); | 446 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
319 | protect_clean_gpte(&pte_access, gpte); | 447 | FNAME(protect_clean_gpte)(&pte_access, gpte); |
320 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, | 448 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, |
321 | no_dirty_log && (pte_access & ACC_WRITE_MASK)); | 449 | no_dirty_log && (pte_access & ACC_WRITE_MASK)); |
322 | if (is_error_pfn(pfn)) | 450 | if (is_error_pfn(pfn)) |
@@ -446,7 +574,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
446 | goto out_gpte_changed; | 574 | goto out_gpte_changed; |
447 | 575 | ||
448 | if (sp) | 576 | if (sp) |
449 | link_shadow_page(it.sptep, sp); | 577 | link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK); |
450 | } | 578 | } |
451 | 579 | ||
452 | for (; | 580 | for (; |
@@ -466,7 +594,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
466 | 594 | ||
467 | sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, | 595 | sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, |
468 | true, direct_access, it.sptep); | 596 | true, direct_access, it.sptep); |
469 | link_shadow_page(it.sptep, sp); | 597 | link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK); |
470 | } | 598 | } |
471 | 599 | ||
472 | clear_sp_write_flooding_count(it.sptep); | 600 | clear_sp_write_flooding_count(it.sptep); |
@@ -727,6 +855,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | |||
727 | return gpa; | 855 | return gpa; |
728 | } | 856 | } |
729 | 857 | ||
858 | #if PTTYPE != PTTYPE_EPT | ||
730 | static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, | 859 | static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, |
731 | u32 access, | 860 | u32 access, |
732 | struct x86_exception *exception) | 861 | struct x86_exception *exception) |
@@ -745,6 +874,7 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, | |||
745 | 874 | ||
746 | return gpa; | 875 | return gpa; |
747 | } | 876 | } |
877 | #endif | ||
748 | 878 | ||
749 | /* | 879 | /* |
750 | * Using the cached information from sp->gfns is safe because: | 880 | * Using the cached information from sp->gfns is safe because: |
@@ -785,15 +915,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
785 | sizeof(pt_element_t))) | 915 | sizeof(pt_element_t))) |
786 | return -EINVAL; | 916 | return -EINVAL; |
787 | 917 | ||
788 | if (prefetch_invalid_gpte(vcpu, sp, &sp->spt[i], gpte)) { | 918 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { |
789 | vcpu->kvm->tlbs_dirty++; | 919 | vcpu->kvm->tlbs_dirty++; |
790 | continue; | 920 | continue; |
791 | } | 921 | } |
792 | 922 | ||
793 | gfn = gpte_to_gfn(gpte); | 923 | gfn = gpte_to_gfn(gpte); |
794 | pte_access = sp->role.access; | 924 | pte_access = sp->role.access; |
795 | pte_access &= gpte_access(vcpu, gpte); | 925 | pte_access &= FNAME(gpte_access)(vcpu, gpte); |
796 | protect_clean_gpte(&pte_access, gpte); | 926 | FNAME(protect_clean_gpte)(&pte_access, gpte); |
797 | 927 | ||
798 | if (sync_mmio_spte(vcpu->kvm, &sp->spt[i], gfn, pte_access, | 928 | if (sync_mmio_spte(vcpu->kvm, &sp->spt[i], gfn, pte_access, |
799 | &nr_present)) | 929 | &nr_present)) |
@@ -830,3 +960,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
830 | #undef gpte_to_gfn | 960 | #undef gpte_to_gfn |
831 | #undef gpte_to_gfn_lvl | 961 | #undef gpte_to_gfn_lvl |
832 | #undef CMPXCHG | 962 | #undef CMPXCHG |
963 | #undef PT_GUEST_ACCESSED_MASK | ||
964 | #undef PT_GUEST_DIRTY_MASK | ||
965 | #undef PT_GUEST_DIRTY_SHIFT | ||
966 | #undef PT_GUEST_ACCESSED_SHIFT | ||
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index c53e797e7369..5c4f63151b4d 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -160,7 +160,7 @@ static void stop_counter(struct kvm_pmc *pmc) | |||
160 | 160 | ||
161 | static void reprogram_counter(struct kvm_pmc *pmc, u32 type, | 161 | static void reprogram_counter(struct kvm_pmc *pmc, u32 type, |
162 | unsigned config, bool exclude_user, bool exclude_kernel, | 162 | unsigned config, bool exclude_user, bool exclude_kernel, |
163 | bool intr) | 163 | bool intr, bool in_tx, bool in_tx_cp) |
164 | { | 164 | { |
165 | struct perf_event *event; | 165 | struct perf_event *event; |
166 | struct perf_event_attr attr = { | 166 | struct perf_event_attr attr = { |
@@ -173,6 +173,10 @@ static void reprogram_counter(struct kvm_pmc *pmc, u32 type, | |||
173 | .exclude_kernel = exclude_kernel, | 173 | .exclude_kernel = exclude_kernel, |
174 | .config = config, | 174 | .config = config, |
175 | }; | 175 | }; |
176 | if (in_tx) | ||
177 | attr.config |= HSW_IN_TX; | ||
178 | if (in_tx_cp) | ||
179 | attr.config |= HSW_IN_TX_CHECKPOINTED; | ||
176 | 180 | ||
177 | attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); | 181 | attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); |
178 | 182 | ||
@@ -226,7 +230,9 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | |||
226 | 230 | ||
227 | if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | | 231 | if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | |
228 | ARCH_PERFMON_EVENTSEL_INV | | 232 | ARCH_PERFMON_EVENTSEL_INV | |
229 | ARCH_PERFMON_EVENTSEL_CMASK))) { | 233 | ARCH_PERFMON_EVENTSEL_CMASK | |
234 | HSW_IN_TX | | ||
235 | HSW_IN_TX_CHECKPOINTED))) { | ||
230 | config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, | 236 | config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, |
231 | unit_mask); | 237 | unit_mask); |
232 | if (config != PERF_COUNT_HW_MAX) | 238 | if (config != PERF_COUNT_HW_MAX) |
@@ -239,7 +245,9 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | |||
239 | reprogram_counter(pmc, type, config, | 245 | reprogram_counter(pmc, type, config, |
240 | !(eventsel & ARCH_PERFMON_EVENTSEL_USR), | 246 | !(eventsel & ARCH_PERFMON_EVENTSEL_USR), |
241 | !(eventsel & ARCH_PERFMON_EVENTSEL_OS), | 247 | !(eventsel & ARCH_PERFMON_EVENTSEL_OS), |
242 | eventsel & ARCH_PERFMON_EVENTSEL_INT); | 248 | eventsel & ARCH_PERFMON_EVENTSEL_INT, |
249 | (eventsel & HSW_IN_TX), | ||
250 | (eventsel & HSW_IN_TX_CHECKPOINTED)); | ||
243 | } | 251 | } |
244 | 252 | ||
245 | static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx) | 253 | static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx) |
@@ -256,7 +264,7 @@ static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx) | |||
256 | arch_events[fixed_pmc_events[idx]].event_type, | 264 | arch_events[fixed_pmc_events[idx]].event_type, |
257 | !(en & 0x2), /* exclude user */ | 265 | !(en & 0x2), /* exclude user */ |
258 | !(en & 0x1), /* exclude kernel */ | 266 | !(en & 0x1), /* exclude kernel */ |
259 | pmi); | 267 | pmi, false, false); |
260 | } | 268 | } |
261 | 269 | ||
262 | static inline u8 fixed_en_pmi(u64 ctrl, int idx) | 270 | static inline u8 fixed_en_pmi(u64 ctrl, int idx) |
@@ -408,7 +416,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
408 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { | 416 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { |
409 | if (data == pmc->eventsel) | 417 | if (data == pmc->eventsel) |
410 | return 0; | 418 | return 0; |
411 | if (!(data & 0xffffffff00200000ull)) { | 419 | if (!(data & pmu->reserved_bits)) { |
412 | reprogram_gp_counter(pmc, data); | 420 | reprogram_gp_counter(pmc, data); |
413 | return 0; | 421 | return 0; |
414 | } | 422 | } |
@@ -450,6 +458,7 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | |||
450 | pmu->counter_bitmask[KVM_PMC_GP] = 0; | 458 | pmu->counter_bitmask[KVM_PMC_GP] = 0; |
451 | pmu->counter_bitmask[KVM_PMC_FIXED] = 0; | 459 | pmu->counter_bitmask[KVM_PMC_FIXED] = 0; |
452 | pmu->version = 0; | 460 | pmu->version = 0; |
461 | pmu->reserved_bits = 0xffffffff00200000ull; | ||
453 | 462 | ||
454 | entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); | 463 | entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); |
455 | if (!entry) | 464 | if (!entry) |
@@ -478,6 +487,12 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | |||
478 | pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | | 487 | pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | |
479 | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); | 488 | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); |
480 | pmu->global_ctrl_mask = ~pmu->global_ctrl; | 489 | pmu->global_ctrl_mask = ~pmu->global_ctrl; |
490 | |||
491 | entry = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
492 | if (entry && | ||
493 | (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && | ||
494 | (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) | ||
495 | pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED; | ||
481 | } | 496 | } |
482 | 497 | ||
483 | void kvm_pmu_init(struct kvm_vcpu *vcpu) | 498 | void kvm_pmu_init(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 064d0be67ecc..1f1da43ff2a2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -373,6 +373,7 @@ struct nested_vmx { | |||
373 | * we must keep them pinned while L2 runs. | 373 | * we must keep them pinned while L2 runs. |
374 | */ | 374 | */ |
375 | struct page *apic_access_page; | 375 | struct page *apic_access_page; |
376 | u64 msr_ia32_feature_control; | ||
376 | }; | 377 | }; |
377 | 378 | ||
378 | #define POSTED_INTR_ON 0 | 379 | #define POSTED_INTR_ON 0 |
@@ -711,10 +712,10 @@ static void nested_release_page_clean(struct page *page) | |||
711 | kvm_release_page_clean(page); | 712 | kvm_release_page_clean(page); |
712 | } | 713 | } |
713 | 714 | ||
715 | static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); | ||
714 | static u64 construct_eptp(unsigned long root_hpa); | 716 | static u64 construct_eptp(unsigned long root_hpa); |
715 | static void kvm_cpu_vmxon(u64 addr); | 717 | static void kvm_cpu_vmxon(u64 addr); |
716 | static void kvm_cpu_vmxoff(void); | 718 | static void kvm_cpu_vmxoff(void); |
717 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | ||
718 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | 719 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); |
719 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | 720 | static void vmx_set_segment(struct kvm_vcpu *vcpu, |
720 | struct kvm_segment *var, int seg); | 721 | struct kvm_segment *var, int seg); |
@@ -1039,12 +1040,16 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit) | |||
1039 | (vmcs12->secondary_vm_exec_control & bit); | 1040 | (vmcs12->secondary_vm_exec_control & bit); |
1040 | } | 1041 | } |
1041 | 1042 | ||
1042 | static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12, | 1043 | static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12) |
1043 | struct kvm_vcpu *vcpu) | ||
1044 | { | 1044 | { |
1045 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; | 1045 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; |
1046 | } | 1046 | } |
1047 | 1047 | ||
1048 | static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) | ||
1049 | { | ||
1050 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); | ||
1051 | } | ||
1052 | |||
1048 | static inline bool is_exception(u32 intr_info) | 1053 | static inline bool is_exception(u32 intr_info) |
1049 | { | 1054 | { |
1050 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | 1055 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) |
@@ -2155,6 +2160,7 @@ static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; | |||
2155 | static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; | 2160 | static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; |
2156 | static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; | 2161 | static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; |
2157 | static u32 nested_vmx_misc_low, nested_vmx_misc_high; | 2162 | static u32 nested_vmx_misc_low, nested_vmx_misc_high; |
2163 | static u32 nested_vmx_ept_caps; | ||
2158 | static __init void nested_vmx_setup_ctls_msrs(void) | 2164 | static __init void nested_vmx_setup_ctls_msrs(void) |
2159 | { | 2165 | { |
2160 | /* | 2166 | /* |
@@ -2190,14 +2196,17 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2190 | * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and | 2196 | * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and |
2191 | * 17 must be 1. | 2197 | * 17 must be 1. |
2192 | */ | 2198 | */ |
2199 | rdmsr(MSR_IA32_VMX_EXIT_CTLS, | ||
2200 | nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high); | ||
2193 | nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | 2201 | nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; |
2194 | /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ | 2202 | /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ |
2203 | nested_vmx_exit_ctls_high &= | ||
2195 | #ifdef CONFIG_X86_64 | 2204 | #ifdef CONFIG_X86_64 |
2196 | nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; | 2205 | VM_EXIT_HOST_ADDR_SPACE_SIZE | |
2197 | #else | ||
2198 | nested_vmx_exit_ctls_high = 0; | ||
2199 | #endif | 2206 | #endif |
2200 | nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | 2207 | VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; |
2208 | nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | | ||
2209 | VM_EXIT_LOAD_IA32_EFER); | ||
2201 | 2210 | ||
2202 | /* entry controls */ | 2211 | /* entry controls */ |
2203 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, | 2212 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, |
@@ -2205,8 +2214,12 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2205 | /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ | 2214 | /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ |
2206 | nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | 2215 | nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; |
2207 | nested_vmx_entry_ctls_high &= | 2216 | nested_vmx_entry_ctls_high &= |
2208 | VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; | 2217 | #ifdef CONFIG_X86_64 |
2209 | nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | 2218 | VM_ENTRY_IA32E_MODE | |
2219 | #endif | ||
2220 | VM_ENTRY_LOAD_IA32_PAT; | ||
2221 | nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | | ||
2222 | VM_ENTRY_LOAD_IA32_EFER); | ||
2210 | 2223 | ||
2211 | /* cpu-based controls */ | 2224 | /* cpu-based controls */ |
2212 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, | 2225 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, |
@@ -2241,6 +2254,22 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2241 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 2254 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
2242 | SECONDARY_EXEC_WBINVD_EXITING; | 2255 | SECONDARY_EXEC_WBINVD_EXITING; |
2243 | 2256 | ||
2257 | if (enable_ept) { | ||
2258 | /* nested EPT: emulate EPT also to L1 */ | ||
2259 | nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; | ||
2260 | nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | | ||
2261 | VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; | ||
2262 | nested_vmx_ept_caps &= vmx_capability.ept; | ||
2263 | /* | ||
2264 | * Since invept is completely emulated we support both global | ||
2265 | * and context invalidation independent of what host cpu | ||
2266 | * supports | ||
2267 | */ | ||
2268 | nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | | ||
2269 | VMX_EPT_EXTENT_CONTEXT_BIT; | ||
2270 | } else | ||
2271 | nested_vmx_ept_caps = 0; | ||
2272 | |||
2244 | /* miscellaneous data */ | 2273 | /* miscellaneous data */ |
2245 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); | 2274 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); |
2246 | nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | | 2275 | nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | |
@@ -2282,8 +2311,11 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2282 | 2311 | ||
2283 | switch (msr_index) { | 2312 | switch (msr_index) { |
2284 | case MSR_IA32_FEATURE_CONTROL: | 2313 | case MSR_IA32_FEATURE_CONTROL: |
2285 | *pdata = 0; | 2314 | if (nested_vmx_allowed(vcpu)) { |
2286 | break; | 2315 | *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control; |
2316 | break; | ||
2317 | } | ||
2318 | return 0; | ||
2287 | case MSR_IA32_VMX_BASIC: | 2319 | case MSR_IA32_VMX_BASIC: |
2288 | /* | 2320 | /* |
2289 | * This MSR reports some information about VMX support. We | 2321 | * This MSR reports some information about VMX support. We |
@@ -2346,8 +2378,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2346 | nested_vmx_secondary_ctls_high); | 2378 | nested_vmx_secondary_ctls_high); |
2347 | break; | 2379 | break; |
2348 | case MSR_IA32_VMX_EPT_VPID_CAP: | 2380 | case MSR_IA32_VMX_EPT_VPID_CAP: |
2349 | /* Currently, no nested ept or nested vpid */ | 2381 | /* Currently, no nested vpid support */ |
2350 | *pdata = 0; | 2382 | *pdata = nested_vmx_ept_caps; |
2351 | break; | 2383 | break; |
2352 | default: | 2384 | default: |
2353 | return 0; | 2385 | return 0; |
@@ -2356,14 +2388,24 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2356 | return 1; | 2388 | return 1; |
2357 | } | 2389 | } |
2358 | 2390 | ||
2359 | static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | 2391 | static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
2360 | { | 2392 | { |
2393 | u32 msr_index = msr_info->index; | ||
2394 | u64 data = msr_info->data; | ||
2395 | bool host_initialized = msr_info->host_initiated; | ||
2396 | |||
2361 | if (!nested_vmx_allowed(vcpu)) | 2397 | if (!nested_vmx_allowed(vcpu)) |
2362 | return 0; | 2398 | return 0; |
2363 | 2399 | ||
2364 | if (msr_index == MSR_IA32_FEATURE_CONTROL) | 2400 | if (msr_index == MSR_IA32_FEATURE_CONTROL) { |
2365 | /* TODO: the right thing. */ | 2401 | if (!host_initialized && |
2402 | to_vmx(vcpu)->nested.msr_ia32_feature_control | ||
2403 | & FEATURE_CONTROL_LOCKED) | ||
2404 | return 0; | ||
2405 | to_vmx(vcpu)->nested.msr_ia32_feature_control = data; | ||
2366 | return 1; | 2406 | return 1; |
2407 | } | ||
2408 | |||
2367 | /* | 2409 | /* |
2368 | * No need to treat VMX capability MSRs specially: If we don't handle | 2410 | * No need to treat VMX capability MSRs specially: If we don't handle |
2369 | * them, handle_wrmsr will #GP(0), which is correct (they are readonly) | 2411 | * them, handle_wrmsr will #GP(0), which is correct (they are readonly) |
@@ -2494,7 +2536,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2494 | return 1; | 2536 | return 1; |
2495 | /* Otherwise falls through */ | 2537 | /* Otherwise falls through */ |
2496 | default: | 2538 | default: |
2497 | if (vmx_set_vmx_msr(vcpu, msr_index, data)) | 2539 | if (vmx_set_vmx_msr(vcpu, msr_info)) |
2498 | break; | 2540 | break; |
2499 | msr = find_msr_entry(vmx, msr_index); | 2541 | msr = find_msr_entry(vmx, msr_index); |
2500 | if (msr) { | 2542 | if (msr) { |
@@ -5302,9 +5344,13 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
5302 | 5344 | ||
5303 | /* It is a write fault? */ | 5345 | /* It is a write fault? */ |
5304 | error_code = exit_qualification & (1U << 1); | 5346 | error_code = exit_qualification & (1U << 1); |
5347 | /* It is a fetch fault? */ | ||
5348 | error_code |= (exit_qualification & (1U << 2)) << 2; | ||
5305 | /* ept page table is present? */ | 5349 | /* ept page table is present? */ |
5306 | error_code |= (exit_qualification >> 3) & 0x1; | 5350 | error_code |= (exit_qualification >> 3) & 0x1; |
5307 | 5351 | ||
5352 | vcpu->arch.exit_qualification = exit_qualification; | ||
5353 | |||
5308 | return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); | 5354 | return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); |
5309 | } | 5355 | } |
5310 | 5356 | ||
@@ -5438,7 +5484,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
5438 | 5484 | ||
5439 | err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); | 5485 | err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); |
5440 | 5486 | ||
5441 | if (err == EMULATE_DO_MMIO) { | 5487 | if (err == EMULATE_USER_EXIT) { |
5488 | ++vcpu->stat.mmio_exits; | ||
5442 | ret = 0; | 5489 | ret = 0; |
5443 | goto out; | 5490 | goto out; |
5444 | } | 5491 | } |
@@ -5567,8 +5614,47 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) | |||
5567 | free_loaded_vmcs(&vmx->vmcs01); | 5614 | free_loaded_vmcs(&vmx->vmcs01); |
5568 | } | 5615 | } |
5569 | 5616 | ||
5617 | /* | ||
5618 | * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), | ||
5619 | * set the success or error code of an emulated VMX instruction, as specified | ||
5620 | * by Vol 2B, VMX Instruction Reference, "Conventions". | ||
5621 | */ | ||
5622 | static void nested_vmx_succeed(struct kvm_vcpu *vcpu) | ||
5623 | { | ||
5624 | vmx_set_rflags(vcpu, vmx_get_rflags(vcpu) | ||
5625 | & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | ||
5626 | X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)); | ||
5627 | } | ||
5628 | |||
5629 | static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu) | ||
5630 | { | ||
5631 | vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | ||
5632 | & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | | ||
5633 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | ||
5634 | | X86_EFLAGS_CF); | ||
5635 | } | ||
5636 | |||
5570 | static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | 5637 | static void nested_vmx_failValid(struct kvm_vcpu *vcpu, |
5571 | u32 vm_instruction_error); | 5638 | u32 vm_instruction_error) |
5639 | { | ||
5640 | if (to_vmx(vcpu)->nested.current_vmptr == -1ull) { | ||
5641 | /* | ||
5642 | * failValid writes the error number to the current VMCS, which | ||
5643 | * can't be done there isn't a current VMCS. | ||
5644 | */ | ||
5645 | nested_vmx_failInvalid(vcpu); | ||
5646 | return; | ||
5647 | } | ||
5648 | vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | ||
5649 | & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | ||
5650 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | ||
5651 | | X86_EFLAGS_ZF); | ||
5652 | get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; | ||
5653 | /* | ||
5654 | * We don't need to force a shadow sync because | ||
5655 | * VM_INSTRUCTION_ERROR is not shadowed | ||
5656 | */ | ||
5657 | } | ||
5572 | 5658 | ||
5573 | /* | 5659 | /* |
5574 | * Emulate the VMXON instruction. | 5660 | * Emulate the VMXON instruction. |
@@ -5583,6 +5669,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
5583 | struct kvm_segment cs; | 5669 | struct kvm_segment cs; |
5584 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 5670 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
5585 | struct vmcs *shadow_vmcs; | 5671 | struct vmcs *shadow_vmcs; |
5672 | const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | ||
5673 | | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; | ||
5586 | 5674 | ||
5587 | /* The Intel VMX Instruction Reference lists a bunch of bits that | 5675 | /* The Intel VMX Instruction Reference lists a bunch of bits that |
5588 | * are prerequisite to running VMXON, most notably cr4.VMXE must be | 5676 | * are prerequisite to running VMXON, most notably cr4.VMXE must be |
@@ -5611,6 +5699,13 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
5611 | skip_emulated_instruction(vcpu); | 5699 | skip_emulated_instruction(vcpu); |
5612 | return 1; | 5700 | return 1; |
5613 | } | 5701 | } |
5702 | |||
5703 | if ((vmx->nested.msr_ia32_feature_control & VMXON_NEEDED_FEATURES) | ||
5704 | != VMXON_NEEDED_FEATURES) { | ||
5705 | kvm_inject_gp(vcpu, 0); | ||
5706 | return 1; | ||
5707 | } | ||
5708 | |||
5614 | if (enable_shadow_vmcs) { | 5709 | if (enable_shadow_vmcs) { |
5615 | shadow_vmcs = alloc_vmcs(); | 5710 | shadow_vmcs = alloc_vmcs(); |
5616 | if (!shadow_vmcs) | 5711 | if (!shadow_vmcs) |
@@ -5628,6 +5723,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
5628 | vmx->nested.vmxon = true; | 5723 | vmx->nested.vmxon = true; |
5629 | 5724 | ||
5630 | skip_emulated_instruction(vcpu); | 5725 | skip_emulated_instruction(vcpu); |
5726 | nested_vmx_succeed(vcpu); | ||
5631 | return 1; | 5727 | return 1; |
5632 | } | 5728 | } |
5633 | 5729 | ||
@@ -5712,6 +5808,7 @@ static int handle_vmoff(struct kvm_vcpu *vcpu) | |||
5712 | return 1; | 5808 | return 1; |
5713 | free_nested(to_vmx(vcpu)); | 5809 | free_nested(to_vmx(vcpu)); |
5714 | skip_emulated_instruction(vcpu); | 5810 | skip_emulated_instruction(vcpu); |
5811 | nested_vmx_succeed(vcpu); | ||
5715 | return 1; | 5812 | return 1; |
5716 | } | 5813 | } |
5717 | 5814 | ||
@@ -5768,48 +5865,6 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, | |||
5768 | return 0; | 5865 | return 0; |
5769 | } | 5866 | } |
5770 | 5867 | ||
5771 | /* | ||
5772 | * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), | ||
5773 | * set the success or error code of an emulated VMX instruction, as specified | ||
5774 | * by Vol 2B, VMX Instruction Reference, "Conventions". | ||
5775 | */ | ||
5776 | static void nested_vmx_succeed(struct kvm_vcpu *vcpu) | ||
5777 | { | ||
5778 | vmx_set_rflags(vcpu, vmx_get_rflags(vcpu) | ||
5779 | & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | ||
5780 | X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)); | ||
5781 | } | ||
5782 | |||
5783 | static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu) | ||
5784 | { | ||
5785 | vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | ||
5786 | & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | | ||
5787 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | ||
5788 | | X86_EFLAGS_CF); | ||
5789 | } | ||
5790 | |||
5791 | static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | ||
5792 | u32 vm_instruction_error) | ||
5793 | { | ||
5794 | if (to_vmx(vcpu)->nested.current_vmptr == -1ull) { | ||
5795 | /* | ||
5796 | * failValid writes the error number to the current VMCS, which | ||
5797 | * can't be done there isn't a current VMCS. | ||
5798 | */ | ||
5799 | nested_vmx_failInvalid(vcpu); | ||
5800 | return; | ||
5801 | } | ||
5802 | vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | ||
5803 | & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | ||
5804 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | ||
5805 | | X86_EFLAGS_ZF); | ||
5806 | get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; | ||
5807 | /* | ||
5808 | * We don't need to force a shadow sync because | ||
5809 | * VM_INSTRUCTION_ERROR is not shadowed | ||
5810 | */ | ||
5811 | } | ||
5812 | |||
5813 | /* Emulate the VMCLEAR instruction */ | 5868 | /* Emulate the VMCLEAR instruction */ |
5814 | static int handle_vmclear(struct kvm_vcpu *vcpu) | 5869 | static int handle_vmclear(struct kvm_vcpu *vcpu) |
5815 | { | 5870 | { |
@@ -5972,8 +6027,8 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) | |||
5972 | unsigned long field; | 6027 | unsigned long field; |
5973 | u64 field_value; | 6028 | u64 field_value; |
5974 | struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; | 6029 | struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; |
5975 | unsigned long *fields = (unsigned long *)shadow_read_write_fields; | 6030 | const unsigned long *fields = shadow_read_write_fields; |
5976 | int num_fields = max_shadow_read_write_fields; | 6031 | const int num_fields = max_shadow_read_write_fields; |
5977 | 6032 | ||
5978 | vmcs_load(shadow_vmcs); | 6033 | vmcs_load(shadow_vmcs); |
5979 | 6034 | ||
@@ -6002,12 +6057,11 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) | |||
6002 | 6057 | ||
6003 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) | 6058 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) |
6004 | { | 6059 | { |
6005 | unsigned long *fields[] = { | 6060 | const unsigned long *fields[] = { |
6006 | (unsigned long *)shadow_read_write_fields, | 6061 | shadow_read_write_fields, |
6007 | (unsigned long *)shadow_read_only_fields | 6062 | shadow_read_only_fields |
6008 | }; | 6063 | }; |
6009 | int num_lists = ARRAY_SIZE(fields); | 6064 | const int max_fields[] = { |
6010 | int max_fields[] = { | ||
6011 | max_shadow_read_write_fields, | 6065 | max_shadow_read_write_fields, |
6012 | max_shadow_read_only_fields | 6066 | max_shadow_read_only_fields |
6013 | }; | 6067 | }; |
@@ -6018,7 +6072,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) | |||
6018 | 6072 | ||
6019 | vmcs_load(shadow_vmcs); | 6073 | vmcs_load(shadow_vmcs); |
6020 | 6074 | ||
6021 | for (q = 0; q < num_lists; q++) { | 6075 | for (q = 0; q < ARRAY_SIZE(fields); q++) { |
6022 | for (i = 0; i < max_fields[q]; i++) { | 6076 | for (i = 0; i < max_fields[q]; i++) { |
6023 | field = fields[q][i]; | 6077 | field = fields[q][i]; |
6024 | vmcs12_read_any(&vmx->vcpu, field, &field_value); | 6078 | vmcs12_read_any(&vmx->vcpu, field, &field_value); |
@@ -6248,6 +6302,74 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) | |||
6248 | return 1; | 6302 | return 1; |
6249 | } | 6303 | } |
6250 | 6304 | ||
6305 | /* Emulate the INVEPT instruction */ | ||
6306 | static int handle_invept(struct kvm_vcpu *vcpu) | ||
6307 | { | ||
6308 | u32 vmx_instruction_info, types; | ||
6309 | unsigned long type; | ||
6310 | gva_t gva; | ||
6311 | struct x86_exception e; | ||
6312 | struct { | ||
6313 | u64 eptp, gpa; | ||
6314 | } operand; | ||
6315 | u64 eptp_mask = ((1ull << 51) - 1) & PAGE_MASK; | ||
6316 | |||
6317 | if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) || | ||
6318 | !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) { | ||
6319 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
6320 | return 1; | ||
6321 | } | ||
6322 | |||
6323 | if (!nested_vmx_check_permission(vcpu)) | ||
6324 | return 1; | ||
6325 | |||
6326 | if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) { | ||
6327 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
6328 | return 1; | ||
6329 | } | ||
6330 | |||
6331 | vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | ||
6332 | type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); | ||
6333 | |||
6334 | types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; | ||
6335 | |||
6336 | if (!(types & (1UL << type))) { | ||
6337 | nested_vmx_failValid(vcpu, | ||
6338 | VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); | ||
6339 | return 1; | ||
6340 | } | ||
6341 | |||
6342 | /* According to the Intel VMX instruction reference, the memory | ||
6343 | * operand is read even if it isn't needed (e.g., for type==global) | ||
6344 | */ | ||
6345 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), | ||
6346 | vmx_instruction_info, &gva)) | ||
6347 | return 1; | ||
6348 | if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, | ||
6349 | sizeof(operand), &e)) { | ||
6350 | kvm_inject_page_fault(vcpu, &e); | ||
6351 | return 1; | ||
6352 | } | ||
6353 | |||
6354 | switch (type) { | ||
6355 | case VMX_EPT_EXTENT_CONTEXT: | ||
6356 | if ((operand.eptp & eptp_mask) != | ||
6357 | (nested_ept_get_cr3(vcpu) & eptp_mask)) | ||
6358 | break; | ||
6359 | case VMX_EPT_EXTENT_GLOBAL: | ||
6360 | kvm_mmu_sync_roots(vcpu); | ||
6361 | kvm_mmu_flush_tlb(vcpu); | ||
6362 | nested_vmx_succeed(vcpu); | ||
6363 | break; | ||
6364 | default: | ||
6365 | BUG_ON(1); | ||
6366 | break; | ||
6367 | } | ||
6368 | |||
6369 | skip_emulated_instruction(vcpu); | ||
6370 | return 1; | ||
6371 | } | ||
6372 | |||
6251 | /* | 6373 | /* |
6252 | * The exit handlers return 1 if the exit was handled fully and guest execution | 6374 | * The exit handlers return 1 if the exit was handled fully and guest execution |
6253 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 6375 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -6292,6 +6414,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
6292 | [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, | 6414 | [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, |
6293 | [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, | 6415 | [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, |
6294 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, | 6416 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, |
6417 | [EXIT_REASON_INVEPT] = handle_invept, | ||
6295 | }; | 6418 | }; |
6296 | 6419 | ||
6297 | static const int kvm_vmx_max_exit_handlers = | 6420 | static const int kvm_vmx_max_exit_handlers = |
@@ -6518,6 +6641,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6518 | case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: | 6641 | case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: |
6519 | case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: | 6642 | case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: |
6520 | case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: | 6643 | case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: |
6644 | case EXIT_REASON_INVEPT: | ||
6521 | /* | 6645 | /* |
6522 | * VMX instructions trap unconditionally. This allows L1 to | 6646 | * VMX instructions trap unconditionally. This allows L1 to |
6523 | * emulate them for its L2 guest, i.e., allows 3-level nesting! | 6647 | * emulate them for its L2 guest, i.e., allows 3-level nesting! |
@@ -6550,7 +6674,20 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6550 | return nested_cpu_has2(vmcs12, | 6674 | return nested_cpu_has2(vmcs12, |
6551 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | 6675 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); |
6552 | case EXIT_REASON_EPT_VIOLATION: | 6676 | case EXIT_REASON_EPT_VIOLATION: |
6677 | /* | ||
6678 | * L0 always deals with the EPT violation. If nested EPT is | ||
6679 | * used, and the nested mmu code discovers that the address is | ||
6680 | * missing in the guest EPT table (EPT12), the EPT violation | ||
6681 | * will be injected with nested_ept_inject_page_fault() | ||
6682 | */ | ||
6683 | return 0; | ||
6553 | case EXIT_REASON_EPT_MISCONFIG: | 6684 | case EXIT_REASON_EPT_MISCONFIG: |
6685 | /* | ||
6686 | * L2 never uses directly L1's EPT, but rather L0's own EPT | ||
6687 | * table (shadow on EPT) or a merged EPT table that L0 built | ||
6688 | * (EPT on EPT). So any problems with the structure of the | ||
6689 | * table is L0's fault. | ||
6690 | */ | ||
6554 | return 0; | 6691 | return 0; |
6555 | case EXIT_REASON_PREEMPTION_TIMER: | 6692 | case EXIT_REASON_PREEMPTION_TIMER: |
6556 | return vmcs12->pin_based_vm_exec_control & | 6693 | return vmcs12->pin_based_vm_exec_control & |
@@ -6638,7 +6775,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
6638 | 6775 | ||
6639 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked && | 6776 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked && |
6640 | !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis( | 6777 | !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis( |
6641 | get_vmcs12(vcpu), vcpu)))) { | 6778 | get_vmcs12(vcpu))))) { |
6642 | if (vmx_interrupt_allowed(vcpu)) { | 6779 | if (vmx_interrupt_allowed(vcpu)) { |
6643 | vmx->soft_vnmi_blocked = 0; | 6780 | vmx->soft_vnmi_blocked = 0; |
6644 | } else if (vmx->vnmi_blocked_time > 1000000000LL && | 6781 | } else if (vmx->vnmi_blocked_time > 1000000000LL && |
@@ -7326,6 +7463,48 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | |||
7326 | entry->ecx |= bit(X86_FEATURE_VMX); | 7463 | entry->ecx |= bit(X86_FEATURE_VMX); |
7327 | } | 7464 | } |
7328 | 7465 | ||
7466 | static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, | ||
7467 | struct x86_exception *fault) | ||
7468 | { | ||
7469 | struct vmcs12 *vmcs12; | ||
7470 | nested_vmx_vmexit(vcpu); | ||
7471 | vmcs12 = get_vmcs12(vcpu); | ||
7472 | |||
7473 | if (fault->error_code & PFERR_RSVD_MASK) | ||
7474 | vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; | ||
7475 | else | ||
7476 | vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION; | ||
7477 | vmcs12->exit_qualification = vcpu->arch.exit_qualification; | ||
7478 | vmcs12->guest_physical_address = fault->address; | ||
7479 | } | ||
7480 | |||
7481 | /* Callbacks for nested_ept_init_mmu_context: */ | ||
7482 | |||
7483 | static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) | ||
7484 | { | ||
7485 | /* return the page table to be shadowed - in our case, EPT12 */ | ||
7486 | return get_vmcs12(vcpu)->ept_pointer; | ||
7487 | } | ||
7488 | |||
7489 | static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) | ||
7490 | { | ||
7491 | int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu, | ||
7492 | nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT); | ||
7493 | |||
7494 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; | ||
7495 | vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; | ||
7496 | vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; | ||
7497 | |||
7498 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; | ||
7499 | |||
7500 | return r; | ||
7501 | } | ||
7502 | |||
7503 | static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) | ||
7504 | { | ||
7505 | vcpu->arch.walk_mmu = &vcpu->arch.mmu; | ||
7506 | } | ||
7507 | |||
7329 | /* | 7508 | /* |
7330 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested | 7509 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested |
7331 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it | 7510 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it |
@@ -7388,7 +7567,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7388 | vmcs12->guest_interruptibility_info); | 7567 | vmcs12->guest_interruptibility_info); |
7389 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); | 7568 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); |
7390 | kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); | 7569 | kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); |
7391 | vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags); | 7570 | vmx_set_rflags(vcpu, vmcs12->guest_rflags); |
7392 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, | 7571 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, |
7393 | vmcs12->guest_pending_dbg_exceptions); | 7572 | vmcs12->guest_pending_dbg_exceptions); |
7394 | vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); | 7573 | vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); |
@@ -7508,15 +7687,24 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7508 | vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; | 7687 | vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; |
7509 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | 7688 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); |
7510 | 7689 | ||
7511 | /* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer below */ | 7690 | /* L2->L1 exit controls are emulated - the hardware exit is to L0 so |
7512 | vmcs_write32(VM_EXIT_CONTROLS, | 7691 | * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER |
7513 | vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl); | 7692 | * bits are further modified by vmx_set_efer() below. |
7514 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls | | 7693 | */ |
7694 | vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); | ||
7695 | |||
7696 | /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are | ||
7697 | * emulated by vmx_set_efer(), below. | ||
7698 | */ | ||
7699 | vmcs_write32(VM_ENTRY_CONTROLS, | ||
7700 | (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & | ||
7701 | ~VM_ENTRY_IA32E_MODE) | | ||
7515 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); | 7702 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); |
7516 | 7703 | ||
7517 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) | 7704 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) { |
7518 | vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); | 7705 | vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); |
7519 | else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) | 7706 | vcpu->arch.pat = vmcs12->guest_ia32_pat; |
7707 | } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) | ||
7520 | vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); | 7708 | vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); |
7521 | 7709 | ||
7522 | 7710 | ||
@@ -7538,6 +7726,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7538 | vmx_flush_tlb(vcpu); | 7726 | vmx_flush_tlb(vcpu); |
7539 | } | 7727 | } |
7540 | 7728 | ||
7729 | if (nested_cpu_has_ept(vmcs12)) { | ||
7730 | kvm_mmu_unload(vcpu); | ||
7731 | nested_ept_init_mmu_context(vcpu); | ||
7732 | } | ||
7733 | |||
7541 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) | 7734 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) |
7542 | vcpu->arch.efer = vmcs12->guest_ia32_efer; | 7735 | vcpu->arch.efer = vmcs12->guest_ia32_efer; |
7543 | else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) | 7736 | else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) |
@@ -7565,6 +7758,16 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7565 | kvm_set_cr3(vcpu, vmcs12->guest_cr3); | 7758 | kvm_set_cr3(vcpu, vmcs12->guest_cr3); |
7566 | kvm_mmu_reset_context(vcpu); | 7759 | kvm_mmu_reset_context(vcpu); |
7567 | 7760 | ||
7761 | /* | ||
7762 | * L1 may access the L2's PDPTR, so save them to construct vmcs12 | ||
7763 | */ | ||
7764 | if (enable_ept) { | ||
7765 | vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0); | ||
7766 | vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); | ||
7767 | vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); | ||
7768 | vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); | ||
7769 | } | ||
7770 | |||
7568 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); | 7771 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); |
7569 | kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip); | 7772 | kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip); |
7570 | } | 7773 | } |
@@ -7887,6 +8090,22 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7887 | vmcs12->guest_pending_dbg_exceptions = | 8090 | vmcs12->guest_pending_dbg_exceptions = |
7888 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); | 8091 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); |
7889 | 8092 | ||
8093 | /* | ||
8094 | * In some cases (usually, nested EPT), L2 is allowed to change its | ||
8095 | * own CR3 without exiting. If it has changed it, we must keep it. | ||
8096 | * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined | ||
8097 | * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12. | ||
8098 | * | ||
8099 | * Additionally, restore L2's PDPTR to vmcs12. | ||
8100 | */ | ||
8101 | if (enable_ept) { | ||
8102 | vmcs12->guest_cr3 = vmcs_read64(GUEST_CR3); | ||
8103 | vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0); | ||
8104 | vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1); | ||
8105 | vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2); | ||
8106 | vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); | ||
8107 | } | ||
8108 | |||
7890 | vmcs12->vm_entry_controls = | 8109 | vmcs12->vm_entry_controls = |
7891 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | | 8110 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | |
7892 | (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); | 8111 | (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); |
@@ -7948,6 +8167,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7948 | static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | 8167 | static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, |
7949 | struct vmcs12 *vmcs12) | 8168 | struct vmcs12 *vmcs12) |
7950 | { | 8169 | { |
8170 | struct kvm_segment seg; | ||
8171 | |||
7951 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) | 8172 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) |
7952 | vcpu->arch.efer = vmcs12->host_ia32_efer; | 8173 | vcpu->arch.efer = vmcs12->host_ia32_efer; |
7953 | else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) | 8174 | else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) |
@@ -7982,7 +8203,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
7982 | vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); | 8203 | vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); |
7983 | kvm_set_cr4(vcpu, vmcs12->host_cr4); | 8204 | kvm_set_cr4(vcpu, vmcs12->host_cr4); |
7984 | 8205 | ||
7985 | /* shadow page tables on either EPT or shadow page tables */ | 8206 | if (nested_cpu_has_ept(vmcs12)) |
8207 | nested_ept_uninit_mmu_context(vcpu); | ||
8208 | |||
7986 | kvm_set_cr3(vcpu, vmcs12->host_cr3); | 8209 | kvm_set_cr3(vcpu, vmcs12->host_cr3); |
7987 | kvm_mmu_reset_context(vcpu); | 8210 | kvm_mmu_reset_context(vcpu); |
7988 | 8211 | ||
@@ -8001,23 +8224,61 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
8001 | vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip); | 8224 | vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip); |
8002 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); | 8225 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); |
8003 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); | 8226 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); |
8004 | vmcs_writel(GUEST_TR_BASE, vmcs12->host_tr_base); | 8227 | |
8005 | vmcs_writel(GUEST_GS_BASE, vmcs12->host_gs_base); | 8228 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) { |
8006 | vmcs_writel(GUEST_FS_BASE, vmcs12->host_fs_base); | ||
8007 | vmcs_write16(GUEST_ES_SELECTOR, vmcs12->host_es_selector); | ||
8008 | vmcs_write16(GUEST_CS_SELECTOR, vmcs12->host_cs_selector); | ||
8009 | vmcs_write16(GUEST_SS_SELECTOR, vmcs12->host_ss_selector); | ||
8010 | vmcs_write16(GUEST_DS_SELECTOR, vmcs12->host_ds_selector); | ||
8011 | vmcs_write16(GUEST_FS_SELECTOR, vmcs12->host_fs_selector); | ||
8012 | vmcs_write16(GUEST_GS_SELECTOR, vmcs12->host_gs_selector); | ||
8013 | vmcs_write16(GUEST_TR_SELECTOR, vmcs12->host_tr_selector); | ||
8014 | |||
8015 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) | ||
8016 | vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); | 8229 | vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); |
8230 | vcpu->arch.pat = vmcs12->host_ia32_pat; | ||
8231 | } | ||
8017 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) | 8232 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) |
8018 | vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, | 8233 | vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, |
8019 | vmcs12->host_ia32_perf_global_ctrl); | 8234 | vmcs12->host_ia32_perf_global_ctrl); |
8020 | 8235 | ||
8236 | /* Set L1 segment info according to Intel SDM | ||
8237 | 27.5.2 Loading Host Segment and Descriptor-Table Registers */ | ||
8238 | seg = (struct kvm_segment) { | ||
8239 | .base = 0, | ||
8240 | .limit = 0xFFFFFFFF, | ||
8241 | .selector = vmcs12->host_cs_selector, | ||
8242 | .type = 11, | ||
8243 | .present = 1, | ||
8244 | .s = 1, | ||
8245 | .g = 1 | ||
8246 | }; | ||
8247 | if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) | ||
8248 | seg.l = 1; | ||
8249 | else | ||
8250 | seg.db = 1; | ||
8251 | vmx_set_segment(vcpu, &seg, VCPU_SREG_CS); | ||
8252 | seg = (struct kvm_segment) { | ||
8253 | .base = 0, | ||
8254 | .limit = 0xFFFFFFFF, | ||
8255 | .type = 3, | ||
8256 | .present = 1, | ||
8257 | .s = 1, | ||
8258 | .db = 1, | ||
8259 | .g = 1 | ||
8260 | }; | ||
8261 | seg.selector = vmcs12->host_ds_selector; | ||
8262 | vmx_set_segment(vcpu, &seg, VCPU_SREG_DS); | ||
8263 | seg.selector = vmcs12->host_es_selector; | ||
8264 | vmx_set_segment(vcpu, &seg, VCPU_SREG_ES); | ||
8265 | seg.selector = vmcs12->host_ss_selector; | ||
8266 | vmx_set_segment(vcpu, &seg, VCPU_SREG_SS); | ||
8267 | seg.selector = vmcs12->host_fs_selector; | ||
8268 | seg.base = vmcs12->host_fs_base; | ||
8269 | vmx_set_segment(vcpu, &seg, VCPU_SREG_FS); | ||
8270 | seg.selector = vmcs12->host_gs_selector; | ||
8271 | seg.base = vmcs12->host_gs_base; | ||
8272 | vmx_set_segment(vcpu, &seg, VCPU_SREG_GS); | ||
8273 | seg = (struct kvm_segment) { | ||
8274 | .base = vmcs12->host_tr_base, | ||
8275 | .limit = 0x67, | ||
8276 | .selector = vmcs12->host_tr_selector, | ||
8277 | .type = 11, | ||
8278 | .present = 1 | ||
8279 | }; | ||
8280 | vmx_set_segment(vcpu, &seg, VCPU_SREG_TR); | ||
8281 | |||
8021 | kvm_set_dr(vcpu, 7, 0x400); | 8282 | kvm_set_dr(vcpu, 7, 0x400); |
8022 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); | 8283 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); |
8023 | } | 8284 | } |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d21bce505315..e5ca72a5cdb6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -682,17 +682,6 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
682 | */ | 682 | */ |
683 | } | 683 | } |
684 | 684 | ||
685 | /* | ||
686 | * Does the new cr3 value map to physical memory? (Note, we | ||
687 | * catch an invalid cr3 even in real-mode, because it would | ||
688 | * cause trouble later on when we turn on paging anyway.) | ||
689 | * | ||
690 | * A real CPU would silently accept an invalid cr3 and would | ||
691 | * attempt to use it - with largely undefined (and often hard | ||
692 | * to debug) behavior on the guest side. | ||
693 | */ | ||
694 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) | ||
695 | return 1; | ||
696 | vcpu->arch.cr3 = cr3; | 685 | vcpu->arch.cr3 = cr3; |
697 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | 686 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); |
698 | vcpu->arch.mmu.new_cr3(vcpu); | 687 | vcpu->arch.mmu.new_cr3(vcpu); |
@@ -850,7 +839,8 @@ static u32 msrs_to_save[] = { | |||
850 | #ifdef CONFIG_X86_64 | 839 | #ifdef CONFIG_X86_64 |
851 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, | 840 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, |
852 | #endif | 841 | #endif |
853 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA | 842 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, |
843 | MSR_IA32_FEATURE_CONTROL | ||
854 | }; | 844 | }; |
855 | 845 | ||
856 | static unsigned num_msrs_to_save; | 846 | static unsigned num_msrs_to_save; |
@@ -1457,6 +1447,29 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm) | |||
1457 | #endif | 1447 | #endif |
1458 | } | 1448 | } |
1459 | 1449 | ||
1450 | static void kvm_gen_update_masterclock(struct kvm *kvm) | ||
1451 | { | ||
1452 | #ifdef CONFIG_X86_64 | ||
1453 | int i; | ||
1454 | struct kvm_vcpu *vcpu; | ||
1455 | struct kvm_arch *ka = &kvm->arch; | ||
1456 | |||
1457 | spin_lock(&ka->pvclock_gtod_sync_lock); | ||
1458 | kvm_make_mclock_inprogress_request(kvm); | ||
1459 | /* no guest entries from this point */ | ||
1460 | pvclock_update_vm_gtod_copy(kvm); | ||
1461 | |||
1462 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
1463 | set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); | ||
1464 | |||
1465 | /* guest entries allowed */ | ||
1466 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
1467 | clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests); | ||
1468 | |||
1469 | spin_unlock(&ka->pvclock_gtod_sync_lock); | ||
1470 | #endif | ||
1471 | } | ||
1472 | |||
1460 | static int kvm_guest_time_update(struct kvm_vcpu *v) | 1473 | static int kvm_guest_time_update(struct kvm_vcpu *v) |
1461 | { | 1474 | { |
1462 | unsigned long flags, this_tsc_khz; | 1475 | unsigned long flags, this_tsc_khz; |
@@ -3806,6 +3819,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3806 | delta = user_ns.clock - now_ns; | 3819 | delta = user_ns.clock - now_ns; |
3807 | local_irq_enable(); | 3820 | local_irq_enable(); |
3808 | kvm->arch.kvmclock_offset = delta; | 3821 | kvm->arch.kvmclock_offset = delta; |
3822 | kvm_gen_update_masterclock(kvm); | ||
3809 | break; | 3823 | break; |
3810 | } | 3824 | } |
3811 | case KVM_GET_CLOCK: { | 3825 | case KVM_GET_CLOCK: { |
@@ -4955,6 +4969,97 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, | |||
4955 | static int complete_emulated_mmio(struct kvm_vcpu *vcpu); | 4969 | static int complete_emulated_mmio(struct kvm_vcpu *vcpu); |
4956 | static int complete_emulated_pio(struct kvm_vcpu *vcpu); | 4970 | static int complete_emulated_pio(struct kvm_vcpu *vcpu); |
4957 | 4971 | ||
4972 | static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, | ||
4973 | unsigned long *db) | ||
4974 | { | ||
4975 | u32 dr6 = 0; | ||
4976 | int i; | ||
4977 | u32 enable, rwlen; | ||
4978 | |||
4979 | enable = dr7; | ||
4980 | rwlen = dr7 >> 16; | ||
4981 | for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4) | ||
4982 | if ((enable & 3) && (rwlen & 15) == type && db[i] == addr) | ||
4983 | dr6 |= (1 << i); | ||
4984 | return dr6; | ||
4985 | } | ||
4986 | |||
4987 | static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r) | ||
4988 | { | ||
4989 | struct kvm_run *kvm_run = vcpu->run; | ||
4990 | |||
4991 | /* | ||
4992 | * Use the "raw" value to see if TF was passed to the processor. | ||
4993 | * Note that the new value of the flags has not been saved yet. | ||
4994 | * | ||
4995 | * This is correct even for TF set by the guest, because "the | ||
4996 | * processor will not generate this exception after the instruction | ||
4997 | * that sets the TF flag". | ||
4998 | */ | ||
4999 | unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); | ||
5000 | |||
5001 | if (unlikely(rflags & X86_EFLAGS_TF)) { | ||
5002 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { | ||
5003 | kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1; | ||
5004 | kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; | ||
5005 | kvm_run->debug.arch.exception = DB_VECTOR; | ||
5006 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | ||
5007 | *r = EMULATE_USER_EXIT; | ||
5008 | } else { | ||
5009 | vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF; | ||
5010 | /* | ||
5011 | * "Certain debug exceptions may clear bit 0-3. The | ||
5012 | * remaining contents of the DR6 register are never | ||
5013 | * cleared by the processor". | ||
5014 | */ | ||
5015 | vcpu->arch.dr6 &= ~15; | ||
5016 | vcpu->arch.dr6 |= DR6_BS; | ||
5017 | kvm_queue_exception(vcpu, DB_VECTOR); | ||
5018 | } | ||
5019 | } | ||
5020 | } | ||
5021 | |||
5022 | static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) | ||
5023 | { | ||
5024 | struct kvm_run *kvm_run = vcpu->run; | ||
5025 | unsigned long eip = vcpu->arch.emulate_ctxt.eip; | ||
5026 | u32 dr6 = 0; | ||
5027 | |||
5028 | if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) && | ||
5029 | (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) { | ||
5030 | dr6 = kvm_vcpu_check_hw_bp(eip, 0, | ||
5031 | vcpu->arch.guest_debug_dr7, | ||
5032 | vcpu->arch.eff_db); | ||
5033 | |||
5034 | if (dr6 != 0) { | ||
5035 | kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; | ||
5036 | kvm_run->debug.arch.pc = kvm_rip_read(vcpu) + | ||
5037 | get_segment_base(vcpu, VCPU_SREG_CS); | ||
5038 | |||
5039 | kvm_run->debug.arch.exception = DB_VECTOR; | ||
5040 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | ||
5041 | *r = EMULATE_USER_EXIT; | ||
5042 | return true; | ||
5043 | } | ||
5044 | } | ||
5045 | |||
5046 | if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK)) { | ||
5047 | dr6 = kvm_vcpu_check_hw_bp(eip, 0, | ||
5048 | vcpu->arch.dr7, | ||
5049 | vcpu->arch.db); | ||
5050 | |||
5051 | if (dr6 != 0) { | ||
5052 | vcpu->arch.dr6 &= ~15; | ||
5053 | vcpu->arch.dr6 |= dr6; | ||
5054 | kvm_queue_exception(vcpu, DB_VECTOR); | ||
5055 | *r = EMULATE_DONE; | ||
5056 | return true; | ||
5057 | } | ||
5058 | } | ||
5059 | |||
5060 | return false; | ||
5061 | } | ||
5062 | |||
4958 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, | 5063 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, |
4959 | unsigned long cr2, | 5064 | unsigned long cr2, |
4960 | int emulation_type, | 5065 | int emulation_type, |
@@ -4975,6 +5080,16 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4975 | 5080 | ||
4976 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 5081 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
4977 | init_emulate_ctxt(vcpu); | 5082 | init_emulate_ctxt(vcpu); |
5083 | |||
5084 | /* | ||
5085 | * We will reenter on the same instruction since | ||
5086 | * we do not set complete_userspace_io. This does not | ||
5087 | * handle watchpoints yet, those would be handled in | ||
5088 | * the emulate_ops. | ||
5089 | */ | ||
5090 | if (kvm_vcpu_check_breakpoint(vcpu, &r)) | ||
5091 | return r; | ||
5092 | |||
4978 | ctxt->interruptibility = 0; | 5093 | ctxt->interruptibility = 0; |
4979 | ctxt->have_exception = false; | 5094 | ctxt->have_exception = false; |
4980 | ctxt->perm_ok = false; | 5095 | ctxt->perm_ok = false; |
@@ -5031,17 +5146,18 @@ restart: | |||
5031 | inject_emulated_exception(vcpu); | 5146 | inject_emulated_exception(vcpu); |
5032 | r = EMULATE_DONE; | 5147 | r = EMULATE_DONE; |
5033 | } else if (vcpu->arch.pio.count) { | 5148 | } else if (vcpu->arch.pio.count) { |
5034 | if (!vcpu->arch.pio.in) | 5149 | if (!vcpu->arch.pio.in) { |
5150 | /* FIXME: return into emulator if single-stepping. */ | ||
5035 | vcpu->arch.pio.count = 0; | 5151 | vcpu->arch.pio.count = 0; |
5036 | else { | 5152 | } else { |
5037 | writeback = false; | 5153 | writeback = false; |
5038 | vcpu->arch.complete_userspace_io = complete_emulated_pio; | 5154 | vcpu->arch.complete_userspace_io = complete_emulated_pio; |
5039 | } | 5155 | } |
5040 | r = EMULATE_DO_MMIO; | 5156 | r = EMULATE_USER_EXIT; |
5041 | } else if (vcpu->mmio_needed) { | 5157 | } else if (vcpu->mmio_needed) { |
5042 | if (!vcpu->mmio_is_write) | 5158 | if (!vcpu->mmio_is_write) |
5043 | writeback = false; | 5159 | writeback = false; |
5044 | r = EMULATE_DO_MMIO; | 5160 | r = EMULATE_USER_EXIT; |
5045 | vcpu->arch.complete_userspace_io = complete_emulated_mmio; | 5161 | vcpu->arch.complete_userspace_io = complete_emulated_mmio; |
5046 | } else if (r == EMULATION_RESTART) | 5162 | } else if (r == EMULATION_RESTART) |
5047 | goto restart; | 5163 | goto restart; |
@@ -5050,10 +5166,12 @@ restart: | |||
5050 | 5166 | ||
5051 | if (writeback) { | 5167 | if (writeback) { |
5052 | toggle_interruptibility(vcpu, ctxt->interruptibility); | 5168 | toggle_interruptibility(vcpu, ctxt->interruptibility); |
5053 | kvm_set_rflags(vcpu, ctxt->eflags); | ||
5054 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5169 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5055 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | 5170 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; |
5056 | kvm_rip_write(vcpu, ctxt->eip); | 5171 | kvm_rip_write(vcpu, ctxt->eip); |
5172 | if (r == EMULATE_DONE) | ||
5173 | kvm_vcpu_check_singlestep(vcpu, &r); | ||
5174 | kvm_set_rflags(vcpu, ctxt->eflags); | ||
5057 | } else | 5175 | } else |
5058 | vcpu->arch.emulate_regs_need_sync_to_vcpu = true; | 5176 | vcpu->arch.emulate_regs_need_sync_to_vcpu = true; |
5059 | 5177 | ||
@@ -5347,7 +5465,7 @@ static struct notifier_block pvclock_gtod_notifier = { | |||
5347 | int kvm_arch_init(void *opaque) | 5465 | int kvm_arch_init(void *opaque) |
5348 | { | 5466 | { |
5349 | int r; | 5467 | int r; |
5350 | struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; | 5468 | struct kvm_x86_ops *ops = opaque; |
5351 | 5469 | ||
5352 | if (kvm_x86_ops) { | 5470 | if (kvm_x86_ops) { |
5353 | printk(KERN_ERR "kvm: already loaded the other module\n"); | 5471 | printk(KERN_ERR "kvm: already loaded the other module\n"); |
@@ -5495,6 +5613,23 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | |||
5495 | return 1; | 5613 | return 1; |
5496 | } | 5614 | } |
5497 | 5615 | ||
5616 | /* | ||
5617 | * kvm_pv_kick_cpu_op: Kick a vcpu. | ||
5618 | * | ||
5619 | * @apicid - apicid of vcpu to be kicked. | ||
5620 | */ | ||
5621 | static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) | ||
5622 | { | ||
5623 | struct kvm_lapic_irq lapic_irq; | ||
5624 | |||
5625 | lapic_irq.shorthand = 0; | ||
5626 | lapic_irq.dest_mode = 0; | ||
5627 | lapic_irq.dest_id = apicid; | ||
5628 | |||
5629 | lapic_irq.delivery_mode = APIC_DM_REMRD; | ||
5630 | kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL); | ||
5631 | } | ||
5632 | |||
5498 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | 5633 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) |
5499 | { | 5634 | { |
5500 | unsigned long nr, a0, a1, a2, a3, ret; | 5635 | unsigned long nr, a0, a1, a2, a3, ret; |
@@ -5528,6 +5663,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
5528 | case KVM_HC_VAPIC_POLL_IRQ: | 5663 | case KVM_HC_VAPIC_POLL_IRQ: |
5529 | ret = 0; | 5664 | ret = 0; |
5530 | break; | 5665 | break; |
5666 | case KVM_HC_KICK_CPU: | ||
5667 | kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); | ||
5668 | ret = 0; | ||
5669 | break; | ||
5531 | default: | 5670 | default: |
5532 | ret = -KVM_ENOSYS; | 5671 | ret = -KVM_ENOSYS; |
5533 | break; | 5672 | break; |
@@ -5689,29 +5828,6 @@ static void process_nmi(struct kvm_vcpu *vcpu) | |||
5689 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5828 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5690 | } | 5829 | } |
5691 | 5830 | ||
5692 | static void kvm_gen_update_masterclock(struct kvm *kvm) | ||
5693 | { | ||
5694 | #ifdef CONFIG_X86_64 | ||
5695 | int i; | ||
5696 | struct kvm_vcpu *vcpu; | ||
5697 | struct kvm_arch *ka = &kvm->arch; | ||
5698 | |||
5699 | spin_lock(&ka->pvclock_gtod_sync_lock); | ||
5700 | kvm_make_mclock_inprogress_request(kvm); | ||
5701 | /* no guest entries from this point */ | ||
5702 | pvclock_update_vm_gtod_copy(kvm); | ||
5703 | |||
5704 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
5705 | set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); | ||
5706 | |||
5707 | /* guest entries allowed */ | ||
5708 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
5709 | clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests); | ||
5710 | |||
5711 | spin_unlock(&ka->pvclock_gtod_sync_lock); | ||
5712 | #endif | ||
5713 | } | ||
5714 | |||
5715 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) | 5831 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) |
5716 | { | 5832 | { |
5717 | u64 eoi_exit_bitmap[4]; | 5833 | u64 eoi_exit_bitmap[4]; |
@@ -5950,6 +6066,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5950 | kvm_apic_accept_events(vcpu); | 6066 | kvm_apic_accept_events(vcpu); |
5951 | switch(vcpu->arch.mp_state) { | 6067 | switch(vcpu->arch.mp_state) { |
5952 | case KVM_MP_STATE_HALTED: | 6068 | case KVM_MP_STATE_HALTED: |
6069 | vcpu->arch.pv.pv_unhalted = false; | ||
5953 | vcpu->arch.mp_state = | 6070 | vcpu->arch.mp_state = |
5954 | KVM_MP_STATE_RUNNABLE; | 6071 | KVM_MP_STATE_RUNNABLE; |
5955 | case KVM_MP_STATE_RUNNABLE: | 6072 | case KVM_MP_STATE_RUNNABLE: |
@@ -6061,6 +6178,8 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) | |||
6061 | 6178 | ||
6062 | if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { | 6179 | if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { |
6063 | vcpu->mmio_needed = 0; | 6180 | vcpu->mmio_needed = 0; |
6181 | |||
6182 | /* FIXME: return into emulator if single-stepping. */ | ||
6064 | if (vcpu->mmio_is_write) | 6183 | if (vcpu->mmio_is_write) |
6065 | return 1; | 6184 | return 1; |
6066 | vcpu->mmio_read_completed = 1; | 6185 | vcpu->mmio_read_completed = 1; |
@@ -6249,7 +6368,12 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | |||
6249 | struct kvm_mp_state *mp_state) | 6368 | struct kvm_mp_state *mp_state) |
6250 | { | 6369 | { |
6251 | kvm_apic_accept_events(vcpu); | 6370 | kvm_apic_accept_events(vcpu); |
6252 | mp_state->mp_state = vcpu->arch.mp_state; | 6371 | if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && |
6372 | vcpu->arch.pv.pv_unhalted) | ||
6373 | mp_state->mp_state = KVM_MP_STATE_RUNNABLE; | ||
6374 | else | ||
6375 | mp_state->mp_state = vcpu->arch.mp_state; | ||
6376 | |||
6253 | return 0; | 6377 | return 0; |
6254 | } | 6378 | } |
6255 | 6379 | ||
@@ -6770,6 +6894,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
6770 | BUG_ON(vcpu->kvm == NULL); | 6894 | BUG_ON(vcpu->kvm == NULL); |
6771 | kvm = vcpu->kvm; | 6895 | kvm = vcpu->kvm; |
6772 | 6896 | ||
6897 | vcpu->arch.pv.pv_unhalted = false; | ||
6773 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; | 6898 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; |
6774 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) | 6899 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) |
6775 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 6900 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
@@ -7019,6 +7144,15 @@ out_free: | |||
7019 | return -ENOMEM; | 7144 | return -ENOMEM; |
7020 | } | 7145 | } |
7021 | 7146 | ||
7147 | void kvm_arch_memslots_updated(struct kvm *kvm) | ||
7148 | { | ||
7149 | /* | ||
7150 | * memslots->generation has been incremented. | ||
7151 | * mmio generation may have reached its maximum value. | ||
7152 | */ | ||
7153 | kvm_mmu_invalidate_mmio_sptes(kvm); | ||
7154 | } | ||
7155 | |||
7022 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 7156 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
7023 | struct kvm_memory_slot *memslot, | 7157 | struct kvm_memory_slot *memslot, |
7024 | struct kvm_userspace_memory_region *mem, | 7158 | struct kvm_userspace_memory_region *mem, |
@@ -7079,11 +7213,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
7079 | */ | 7213 | */ |
7080 | if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) | 7214 | if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) |
7081 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 7215 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
7082 | /* | ||
7083 | * If memory slot is created, or moved, we need to clear all | ||
7084 | * mmio sptes. | ||
7085 | */ | ||
7086 | kvm_mmu_invalidate_mmio_sptes(kvm); | ||
7087 | } | 7216 | } |
7088 | 7217 | ||
7089 | void kvm_arch_flush_shadow_all(struct kvm *kvm) | 7218 | void kvm_arch_flush_shadow_all(struct kvm *kvm) |
@@ -7103,6 +7232,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
7103 | !vcpu->arch.apf.halted) | 7232 | !vcpu->arch.apf.halted) |
7104 | || !list_empty_careful(&vcpu->async_pf.done) | 7233 | || !list_empty_careful(&vcpu->async_pf.done) |
7105 | || kvm_apic_has_events(vcpu) | 7234 | || kvm_apic_has_events(vcpu) |
7235 | || vcpu->arch.pv.pv_unhalted | ||
7106 | || atomic_read(&vcpu->arch.nmi_queued) || | 7236 | || atomic_read(&vcpu->arch.nmi_queued) || |
7107 | (kvm_arch_interrupt_allowed(vcpu) && | 7237 | (kvm_arch_interrupt_allowed(vcpu) && |
7108 | kvm_cpu_has_interrupt(vcpu)); | 7238 | kvm_cpu_has_interrupt(vcpu)); |
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c index 25b7ae8d058a..7609e0e421ec 100644 --- a/arch/x86/lib/csum-wrappers_64.c +++ b/arch/x86/lib/csum-wrappers_64.c | |||
@@ -6,6 +6,7 @@ | |||
6 | */ | 6 | */ |
7 | #include <asm/checksum.h> | 7 | #include <asm/checksum.h> |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <asm/smap.h> | ||
9 | 10 | ||
10 | /** | 11 | /** |
11 | * csum_partial_copy_from_user - Copy and checksum from user space. | 12 | * csum_partial_copy_from_user - Copy and checksum from user space. |
@@ -52,8 +53,10 @@ csum_partial_copy_from_user(const void __user *src, void *dst, | |||
52 | len -= 2; | 53 | len -= 2; |
53 | } | 54 | } |
54 | } | 55 | } |
56 | stac(); | ||
55 | isum = csum_partial_copy_generic((__force const void *)src, | 57 | isum = csum_partial_copy_generic((__force const void *)src, |
56 | dst, len, isum, errp, NULL); | 58 | dst, len, isum, errp, NULL); |
59 | clac(); | ||
57 | if (unlikely(*errp)) | 60 | if (unlikely(*errp)) |
58 | goto out_err; | 61 | goto out_err; |
59 | 62 | ||
@@ -82,6 +85,8 @@ __wsum | |||
82 | csum_partial_copy_to_user(const void *src, void __user *dst, | 85 | csum_partial_copy_to_user(const void *src, void __user *dst, |
83 | int len, __wsum isum, int *errp) | 86 | int len, __wsum isum, int *errp) |
84 | { | 87 | { |
88 | __wsum ret; | ||
89 | |||
85 | might_sleep(); | 90 | might_sleep(); |
86 | 91 | ||
87 | if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) { | 92 | if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) { |
@@ -105,8 +110,11 @@ csum_partial_copy_to_user(const void *src, void __user *dst, | |||
105 | } | 110 | } |
106 | 111 | ||
107 | *errp = 0; | 112 | *errp = 0; |
108 | return csum_partial_copy_generic(src, (void __force *)dst, | 113 | stac(); |
109 | len, isum, NULL, errp); | 114 | ret = csum_partial_copy_generic(src, (void __force *)dst, |
115 | len, isum, NULL, errp); | ||
116 | clac(); | ||
117 | return ret; | ||
110 | } | 118 | } |
111 | EXPORT_SYMBOL(csum_partial_copy_to_user); | 119 | EXPORT_SYMBOL(csum_partial_copy_to_user); |
112 | 120 | ||
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 906fea315791..c905e89e19fe 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c | |||
@@ -68,7 +68,7 @@ EXPORT_SYMBOL(copy_in_user); | |||
68 | * Since protection fault in copy_from/to_user is not a normal situation, | 68 | * Since protection fault in copy_from/to_user is not a normal situation, |
69 | * it is not necessary to optimize tail handling. | 69 | * it is not necessary to optimize tail handling. |
70 | */ | 70 | */ |
71 | unsigned long | 71 | __visible unsigned long |
72 | copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) | 72 | copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) |
73 | { | 73 | { |
74 | char c; | 74 | char c; |
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 5d7e51f3fd28..533a85e3a07e 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt | |||
@@ -1,10 +1,8 @@ | |||
1 | # x86 Opcode Maps | 1 | # x86 Opcode Maps |
2 | # | 2 | # |
3 | # This is (mostly) based on following documentations. | 3 | # This is (mostly) based on following documentations. |
4 | # - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2 | 4 | # - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C |
5 | # (#325383-040US, October 2011) | 5 | # (#326018-047US, June 2013) |
6 | # - Intel(R) Advanced Vector Extensions Programming Reference | ||
7 | # (#319433-011,JUNE 2011). | ||
8 | # | 6 | # |
9 | #<Opcode maps> | 7 | #<Opcode maps> |
10 | # Table: table-name | 8 | # Table: table-name |
@@ -29,6 +27,7 @@ | |||
29 | # - (F3): the last prefix is 0xF3 | 27 | # - (F3): the last prefix is 0xF3 |
30 | # - (F2): the last prefix is 0xF2 | 28 | # - (F2): the last prefix is 0xF2 |
31 | # - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) | 29 | # - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) |
30 | # - (66&F2): Both 0x66 and 0xF2 prefixes are specified. | ||
32 | 31 | ||
33 | Table: one byte opcode | 32 | Table: one byte opcode |
34 | Referrer: | 33 | Referrer: |
@@ -246,8 +245,8 @@ c2: RETN Iw (f64) | |||
246 | c3: RETN | 245 | c3: RETN |
247 | c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) | 246 | c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) |
248 | c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) | 247 | c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) |
249 | c6: Grp11 Eb,Ib (1A) | 248 | c6: Grp11A Eb,Ib (1A) |
250 | c7: Grp11 Ev,Iz (1A) | 249 | c7: Grp11B Ev,Iz (1A) |
251 | c8: ENTER Iw,Ib | 250 | c8: ENTER Iw,Ib |
252 | c9: LEAVE (d64) | 251 | c9: LEAVE (d64) |
253 | ca: RETF Iw | 252 | ca: RETF Iw |
@@ -293,8 +292,8 @@ ef: OUT DX,eAX | |||
293 | # 0xf0 - 0xff | 292 | # 0xf0 - 0xff |
294 | f0: LOCK (Prefix) | 293 | f0: LOCK (Prefix) |
295 | f1: | 294 | f1: |
296 | f2: REPNE (Prefix) | 295 | f2: REPNE (Prefix) | XACQUIRE (Prefix) |
297 | f3: REP/REPE (Prefix) | 296 | f3: REP/REPE (Prefix) | XRELEASE (Prefix) |
298 | f4: HLT | 297 | f4: HLT |
299 | f5: CMC | 298 | f5: CMC |
300 | f6: Grp3_1 Eb (1A) | 299 | f6: Grp3_1 Eb (1A) |
@@ -326,7 +325,8 @@ AVXcode: 1 | |||
326 | 0a: | 325 | 0a: |
327 | 0b: UD2 (1B) | 326 | 0b: UD2 (1B) |
328 | 0c: | 327 | 0c: |
329 | 0d: NOP Ev | GrpP | 328 | # AMD's prefetch group. Intel supports prefetchw(/1) only. |
329 | 0d: GrpP | ||
330 | 0e: FEMMS | 330 | 0e: FEMMS |
331 | # 3DNow! uses the last imm byte as opcode extension. | 331 | # 3DNow! uses the last imm byte as opcode extension. |
332 | 0f: 3DNow! Pq,Qq,Ib | 332 | 0f: 3DNow! Pq,Qq,Ib |
@@ -729,12 +729,12 @@ dc: VAESENC Vdq,Hdq,Wdq (66),(v1) | |||
729 | dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) | 729 | dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) |
730 | de: VAESDEC Vdq,Hdq,Wdq (66),(v1) | 730 | de: VAESDEC Vdq,Hdq,Wdq (66),(v1) |
731 | df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) | 731 | df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) |
732 | f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | 732 | f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) |
733 | f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | 733 | f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) |
734 | f2: ANDN Gy,By,Ey (v) | 734 | f2: ANDN Gy,By,Ey (v) |
735 | f3: Grp17 (1A) | 735 | f3: Grp17 (1A) |
736 | f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | 736 | f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) |
737 | f6: MULX By,Gy,rDX,Ey (F2),(v) | 737 | f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) |
738 | f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) | 738 | f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) |
739 | EndTable | 739 | EndTable |
740 | 740 | ||
@@ -861,8 +861,8 @@ EndTable | |||
861 | 861 | ||
862 | GrpTable: Grp7 | 862 | GrpTable: Grp7 |
863 | 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | 863 | 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) |
864 | 1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) | 864 | 1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) |
865 | 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | 865 | 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) |
866 | 3: LIDT Ms | 866 | 3: LIDT Ms |
867 | 4: SMSW Mw/Rv | 867 | 4: SMSW Mw/Rv |
868 | 5: | 868 | 5: |
@@ -880,15 +880,21 @@ EndTable | |||
880 | GrpTable: Grp9 | 880 | GrpTable: Grp9 |
881 | 1: CMPXCHG8B/16B Mq/Mdq | 881 | 1: CMPXCHG8B/16B Mq/Mdq |
882 | 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | 882 | 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) |
883 | 7: VMPTRST Mq | VMPTRST Mq (F3) | 883 | 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) |
884 | EndTable | 884 | EndTable |
885 | 885 | ||
886 | GrpTable: Grp10 | 886 | GrpTable: Grp10 |
887 | EndTable | 887 | EndTable |
888 | 888 | ||
889 | GrpTable: Grp11 | 889 | # Grp11A and Grp11B are expressed as Grp11 in Intel SDM |
890 | # Note: the operands are given by group opcode | 890 | GrpTable: Grp11A |
891 | 0: MOV | 891 | 0: MOV Eb,Ib |
892 | 7: XABORT Ib (000),(11B) | ||
893 | EndTable | ||
894 | |||
895 | GrpTable: Grp11B | ||
896 | 0: MOV Eb,Iz | ||
897 | 7: XBEGIN Jz (000),(11B) | ||
892 | EndTable | 898 | EndTable |
893 | 899 | ||
894 | GrpTable: Grp12 | 900 | GrpTable: Grp12 |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 2ec29ac78ae6..04664cdb7fda 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -78,8 +78,8 @@ __ref void *alloc_low_pages(unsigned int num) | |||
78 | return __va(pfn << PAGE_SHIFT); | 78 | return __va(pfn << PAGE_SHIFT); |
79 | } | 79 | } |
80 | 80 | ||
81 | /* need 4 4k for initial PMD_SIZE, 4k for 0-ISA_END_ADDRESS */ | 81 | /* need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS */ |
82 | #define INIT_PGT_BUF_SIZE (5 * PAGE_SIZE) | 82 | #define INIT_PGT_BUF_SIZE (6 * PAGE_SIZE) |
83 | RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); | 83 | RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); |
84 | void __init early_alloc_pgt_buf(void) | 84 | void __init early_alloc_pgt_buf(void) |
85 | { | 85 | { |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 0215e2c563ef..799580cabc78 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -487,7 +487,7 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) | |||
487 | unsigned long offset; | 487 | unsigned long offset; |
488 | resource_size_t last_addr; | 488 | resource_size_t last_addr; |
489 | unsigned int nrpages; | 489 | unsigned int nrpages; |
490 | enum fixed_addresses idx0, idx; | 490 | enum fixed_addresses idx; |
491 | int i, slot; | 491 | int i, slot; |
492 | 492 | ||
493 | WARN_ON(system_state != SYSTEM_BOOTING); | 493 | WARN_ON(system_state != SYSTEM_BOOTING); |
@@ -540,8 +540,7 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) | |||
540 | /* | 540 | /* |
541 | * Ok, go for it.. | 541 | * Ok, go for it.. |
542 | */ | 542 | */ |
543 | idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; | 543 | idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; |
544 | idx = idx0; | ||
545 | while (nrpages > 0) { | 544 | while (nrpages > 0) { |
546 | early_set_fixmap(idx, phys_addr, prot); | 545 | early_set_fixmap(idx, phys_addr, prot); |
547 | phys_addr += PAGE_SIZE; | 546 | phys_addr += PAGE_SIZE; |
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 62c29a5bfe26..25e7e1372bb2 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
@@ -112,11 +112,13 @@ static unsigned long mmap_legacy_base(void) | |||
112 | */ | 112 | */ |
113 | void arch_pick_mmap_layout(struct mm_struct *mm) | 113 | void arch_pick_mmap_layout(struct mm_struct *mm) |
114 | { | 114 | { |
115 | mm->mmap_legacy_base = mmap_legacy_base(); | ||
116 | mm->mmap_base = mmap_base(); | ||
117 | |||
115 | if (mmap_is_legacy()) { | 118 | if (mmap_is_legacy()) { |
116 | mm->mmap_base = mmap_legacy_base(); | 119 | mm->mmap_base = mm->mmap_legacy_base; |
117 | mm->get_unmapped_area = arch_get_unmapped_area; | 120 | mm->get_unmapped_area = arch_get_unmapped_area; |
118 | } else { | 121 | } else { |
119 | mm->mmap_base = mmap_base(); | ||
120 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; | 122 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; |
121 | } | 123 | } |
122 | } | 124 | } |
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index cdd0da9dd530..266ca912f62e 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c | |||
@@ -146,6 +146,7 @@ int __init | |||
146 | acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | 146 | acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) |
147 | { | 147 | { |
148 | u64 start, end; | 148 | u64 start, end; |
149 | u32 hotpluggable; | ||
149 | int node, pxm; | 150 | int node, pxm; |
150 | 151 | ||
151 | if (srat_disabled()) | 152 | if (srat_disabled()) |
@@ -154,7 +155,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
154 | goto out_err_bad_srat; | 155 | goto out_err_bad_srat; |
155 | if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) | 156 | if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) |
156 | goto out_err; | 157 | goto out_err; |
157 | if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info()) | 158 | hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE; |
159 | if (hotpluggable && !save_add_info()) | ||
158 | goto out_err; | 160 | goto out_err; |
159 | 161 | ||
160 | start = ma->base_address; | 162 | start = ma->base_address; |
@@ -174,9 +176,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
174 | 176 | ||
175 | node_set(node, numa_nodes_parsed); | 177 | node_set(node, numa_nodes_parsed); |
176 | 178 | ||
177 | printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", | 179 | pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s\n", |
178 | node, pxm, | 180 | node, pxm, |
179 | (unsigned long long) start, (unsigned long long) end - 1); | 181 | (unsigned long long) start, (unsigned long long) end - 1, |
182 | hotpluggable ? " hotplug" : ""); | ||
180 | 183 | ||
181 | return 0; | 184 | return 0; |
182 | out_err_bad_srat: | 185 | out_err_bad_srat: |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 48768df2471a..6890d8498e0b 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -403,7 +403,7 @@ static void nmi_cpu_down(void *dummy) | |||
403 | nmi_cpu_shutdown(dummy); | 403 | nmi_cpu_shutdown(dummy); |
404 | } | 404 | } |
405 | 405 | ||
406 | static int nmi_create_files(struct super_block *sb, struct dentry *root) | 406 | static int nmi_create_files(struct dentry *root) |
407 | { | 407 | { |
408 | unsigned int i; | 408 | unsigned int i; |
409 | 409 | ||
@@ -420,14 +420,14 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) | |||
420 | continue; | 420 | continue; |
421 | 421 | ||
422 | snprintf(buf, sizeof(buf), "%d", i); | 422 | snprintf(buf, sizeof(buf), "%d", i); |
423 | dir = oprofilefs_mkdir(sb, root, buf); | 423 | dir = oprofilefs_mkdir(root, buf); |
424 | oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); | 424 | oprofilefs_create_ulong(dir, "enabled", &counter_config[i].enabled); |
425 | oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); | 425 | oprofilefs_create_ulong(dir, "event", &counter_config[i].event); |
426 | oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); | 426 | oprofilefs_create_ulong(dir, "count", &counter_config[i].count); |
427 | oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); | 427 | oprofilefs_create_ulong(dir, "unit_mask", &counter_config[i].unit_mask); |
428 | oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); | 428 | oprofilefs_create_ulong(dir, "kernel", &counter_config[i].kernel); |
429 | oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); | 429 | oprofilefs_create_ulong(dir, "user", &counter_config[i].user); |
430 | oprofilefs_create_ulong(sb, dir, "extra", &counter_config[i].extra); | 430 | oprofilefs_create_ulong(dir, "extra", &counter_config[i].extra); |
431 | } | 431 | } |
432 | 432 | ||
433 | return 0; | 433 | return 0; |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index b2b94438ff05..50d86c0e9ba4 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -454,16 +454,16 @@ static void init_ibs(void) | |||
454 | printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); | 454 | printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); |
455 | } | 455 | } |
456 | 456 | ||
457 | static int (*create_arch_files)(struct super_block *sb, struct dentry *root); | 457 | static int (*create_arch_files)(struct dentry *root); |
458 | 458 | ||
459 | static int setup_ibs_files(struct super_block *sb, struct dentry *root) | 459 | static int setup_ibs_files(struct dentry *root) |
460 | { | 460 | { |
461 | struct dentry *dir; | 461 | struct dentry *dir; |
462 | int ret = 0; | 462 | int ret = 0; |
463 | 463 | ||
464 | /* architecture specific files */ | 464 | /* architecture specific files */ |
465 | if (create_arch_files) | 465 | if (create_arch_files) |
466 | ret = create_arch_files(sb, root); | 466 | ret = create_arch_files(root); |
467 | 467 | ||
468 | if (ret) | 468 | if (ret) |
469 | return ret; | 469 | return ret; |
@@ -479,26 +479,26 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
479 | ibs_config.max_cnt_op = 250000; | 479 | ibs_config.max_cnt_op = 250000; |
480 | 480 | ||
481 | if (ibs_caps & IBS_CAPS_FETCHSAM) { | 481 | if (ibs_caps & IBS_CAPS_FETCHSAM) { |
482 | dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); | 482 | dir = oprofilefs_mkdir(root, "ibs_fetch"); |
483 | oprofilefs_create_ulong(sb, dir, "enable", | 483 | oprofilefs_create_ulong(dir, "enable", |
484 | &ibs_config.fetch_enabled); | 484 | &ibs_config.fetch_enabled); |
485 | oprofilefs_create_ulong(sb, dir, "max_count", | 485 | oprofilefs_create_ulong(dir, "max_count", |
486 | &ibs_config.max_cnt_fetch); | 486 | &ibs_config.max_cnt_fetch); |
487 | oprofilefs_create_ulong(sb, dir, "rand_enable", | 487 | oprofilefs_create_ulong(dir, "rand_enable", |
488 | &ibs_config.rand_en); | 488 | &ibs_config.rand_en); |
489 | } | 489 | } |
490 | 490 | ||
491 | if (ibs_caps & IBS_CAPS_OPSAM) { | 491 | if (ibs_caps & IBS_CAPS_OPSAM) { |
492 | dir = oprofilefs_mkdir(sb, root, "ibs_op"); | 492 | dir = oprofilefs_mkdir(root, "ibs_op"); |
493 | oprofilefs_create_ulong(sb, dir, "enable", | 493 | oprofilefs_create_ulong(dir, "enable", |
494 | &ibs_config.op_enabled); | 494 | &ibs_config.op_enabled); |
495 | oprofilefs_create_ulong(sb, dir, "max_count", | 495 | oprofilefs_create_ulong(dir, "max_count", |
496 | &ibs_config.max_cnt_op); | 496 | &ibs_config.max_cnt_op); |
497 | if (ibs_caps & IBS_CAPS_OPCNT) | 497 | if (ibs_caps & IBS_CAPS_OPCNT) |
498 | oprofilefs_create_ulong(sb, dir, "dispatched_ops", | 498 | oprofilefs_create_ulong(dir, "dispatched_ops", |
499 | &ibs_config.dispatched_ops); | 499 | &ibs_config.dispatched_ops); |
500 | if (ibs_caps & IBS_CAPS_BRNTRGT) | 500 | if (ibs_caps & IBS_CAPS_BRNTRGT) |
501 | oprofilefs_create_ulong(sb, dir, "branch_target", | 501 | oprofilefs_create_ulong(dir, "branch_target", |
502 | &ibs_config.branch_target); | 502 | &ibs_config.branch_target); |
503 | } | 503 | } |
504 | 504 | ||
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index d641897a1f4e..b30e937689d6 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -568,13 +568,8 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) | |||
568 | */ | 568 | */ |
569 | if (bus) { | 569 | if (bus) { |
570 | struct pci_bus *child; | 570 | struct pci_bus *child; |
571 | list_for_each_entry(child, &bus->children, node) { | 571 | list_for_each_entry(child, &bus->children, node) |
572 | struct pci_dev *self = child->self; | 572 | pcie_bus_configure_settings(child); |
573 | if (!self) | ||
574 | continue; | ||
575 | |||
576 | pcie_bus_configure_settings(child, self->pcie_mpss); | ||
577 | } | ||
578 | } | 573 | } |
579 | 574 | ||
580 | if (bus && node != -1) { | 575 | if (bus && node != -1) { |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 94919e307f8e..db6b1ab43255 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -210,6 +210,8 @@ static void pcibios_allocate_bridge_resources(struct pci_dev *dev) | |||
210 | r = &dev->resource[idx]; | 210 | r = &dev->resource[idx]; |
211 | if (!r->flags) | 211 | if (!r->flags) |
212 | continue; | 212 | continue; |
213 | if (r->parent) /* Already allocated */ | ||
214 | continue; | ||
213 | if (!r->start || pci_claim_resource(dev, idx) < 0) { | 215 | if (!r->start || pci_claim_resource(dev, idx) < 0) { |
214 | /* | 216 | /* |
215 | * Something is wrong with the region. | 217 | * Something is wrong with the region. |
@@ -318,6 +320,8 @@ static void pcibios_allocate_dev_rom_resource(struct pci_dev *dev) | |||
318 | r = &dev->resource[PCI_ROM_RESOURCE]; | 320 | r = &dev->resource[PCI_ROM_RESOURCE]; |
319 | if (!r->flags || !r->start) | 321 | if (!r->flags || !r->start) |
320 | return; | 322 | return; |
323 | if (r->parent) /* Already allocated */ | ||
324 | return; | ||
321 | 325 | ||
322 | if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) { | 326 | if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) { |
323 | r->end -= r->start; | 327 | r->end -= r->start; |
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 082e88129712..5596c7bdd327 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -700,7 +700,7 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end, | |||
700 | if (!(pci_probe & PCI_PROBE_MMCONF) || pci_mmcfg_arch_init_failed) | 700 | if (!(pci_probe & PCI_PROBE_MMCONF) || pci_mmcfg_arch_init_failed) |
701 | return -ENODEV; | 701 | return -ENODEV; |
702 | 702 | ||
703 | if (start > end) | 703 | if (start > end || !addr) |
704 | return -EINVAL; | 704 | return -EINVAL; |
705 | 705 | ||
706 | mutex_lock(&pci_mmcfg_lock); | 706 | mutex_lock(&pci_mmcfg_lock); |
@@ -716,11 +716,6 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end, | |||
716 | return -EEXIST; | 716 | return -EEXIST; |
717 | } | 717 | } |
718 | 718 | ||
719 | if (!addr) { | ||
720 | mutex_unlock(&pci_mmcfg_lock); | ||
721 | return -EINVAL; | ||
722 | } | ||
723 | |||
724 | rc = -EBUSY; | 719 | rc = -EBUSY; |
725 | cfg = pci_mmconfig_alloc(seg, start, end, addr); | 720 | cfg = pci_mmconfig_alloc(seg, start, end, addr); |
726 | if (cfg == NULL) { | 721 | if (cfg == NULL) { |
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c index 6eb18c42a28a..903fded50786 100644 --- a/arch/x86/pci/mrst.c +++ b/arch/x86/pci/mrst.c | |||
@@ -23,11 +23,11 @@ | |||
23 | #include <linux/ioport.h> | 23 | #include <linux/ioport.h> |
24 | #include <linux/init.h> | 24 | #include <linux/init.h> |
25 | #include <linux/dmi.h> | 25 | #include <linux/dmi.h> |
26 | #include <linux/acpi.h> | ||
27 | #include <linux/io.h> | ||
28 | #include <linux/smp.h> | ||
26 | 29 | ||
27 | #include <asm/acpi.h> | ||
28 | #include <asm/segment.h> | 30 | #include <asm/segment.h> |
29 | #include <asm/io.h> | ||
30 | #include <asm/smp.h> | ||
31 | #include <asm/pci_x86.h> | 31 | #include <asm/pci_x86.h> |
32 | #include <asm/hw_irq.h> | 32 | #include <asm/hw_irq.h> |
33 | #include <asm/io_apic.h> | 33 | #include <asm/io_apic.h> |
@@ -43,7 +43,7 @@ | |||
43 | #define PCI_FIXED_BAR_4_SIZE 0x14 | 43 | #define PCI_FIXED_BAR_4_SIZE 0x14 |
44 | #define PCI_FIXED_BAR_5_SIZE 0x1c | 44 | #define PCI_FIXED_BAR_5_SIZE 0x1c |
45 | 45 | ||
46 | static int pci_soc_mode = 0; | 46 | static int pci_soc_mode; |
47 | 47 | ||
48 | /** | 48 | /** |
49 | * fixed_bar_cap - return the offset of the fixed BAR cap if found | 49 | * fixed_bar_cap - return the offset of the fixed BAR cap if found |
@@ -141,7 +141,8 @@ static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn, | |||
141 | */ | 141 | */ |
142 | static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg) | 142 | static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg) |
143 | { | 143 | { |
144 | /* This is a workaround for A0 LNC bug where PCI status register does | 144 | /* |
145 | * This is a workaround for A0 LNC bug where PCI status register does | ||
145 | * not have new CAP bit set. can not be written by SW either. | 146 | * not have new CAP bit set. can not be written by SW either. |
146 | * | 147 | * |
147 | * PCI header type in real LNC indicates a single function device, this | 148 | * PCI header type in real LNC indicates a single function device, this |
@@ -154,7 +155,7 @@ static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg) | |||
154 | || devfn == PCI_DEVFN(0, 0) | 155 | || devfn == PCI_DEVFN(0, 0) |
155 | || devfn == PCI_DEVFN(3, 0))) | 156 | || devfn == PCI_DEVFN(3, 0))) |
156 | return 1; | 157 | return 1; |
157 | return 0; /* langwell on others */ | 158 | return 0; /* Langwell on others */ |
158 | } | 159 | } |
159 | 160 | ||
160 | static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, | 161 | static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, |
@@ -172,7 +173,8 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, | |||
172 | { | 173 | { |
173 | int offset; | 174 | int offset; |
174 | 175 | ||
175 | /* On MRST, there is no PCI ROM BAR, this will cause a subsequent read | 176 | /* |
177 | * On MRST, there is no PCI ROM BAR, this will cause a subsequent read | ||
176 | * to ROM BAR return 0 then being ignored. | 178 | * to ROM BAR return 0 then being ignored. |
177 | */ | 179 | */ |
178 | if (where == PCI_ROM_ADDRESS) | 180 | if (where == PCI_ROM_ADDRESS) |
@@ -210,7 +212,8 @@ static int mrst_pci_irq_enable(struct pci_dev *dev) | |||
210 | 212 | ||
211 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); | 213 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); |
212 | 214 | ||
213 | /* MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to | 215 | /* |
216 | * MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to | ||
214 | * IOAPIC RTE entries, so we just enable RTE for the device. | 217 | * IOAPIC RTE entries, so we just enable RTE for the device. |
215 | */ | 218 | */ |
216 | irq_attr.ioapic = mp_find_ioapic(dev->irq); | 219 | irq_attr.ioapic = mp_find_ioapic(dev->irq); |
@@ -235,7 +238,7 @@ struct pci_ops pci_mrst_ops = { | |||
235 | */ | 238 | */ |
236 | int __init pci_mrst_init(void) | 239 | int __init pci_mrst_init(void) |
237 | { | 240 | { |
238 | printk(KERN_INFO "Intel MID platform detected, using MID PCI ops\n"); | 241 | pr_info("Intel MID platform detected, using MID PCI ops\n"); |
239 | pci_mmcfg_late_init(); | 242 | pci_mmcfg_late_init(); |
240 | pcibios_enable_irq = mrst_pci_irq_enable; | 243 | pcibios_enable_irq = mrst_pci_irq_enable; |
241 | pci_root_ops = pci_mrst_ops; | 244 | pci_root_ops = pci_mrst_ops; |
@@ -244,17 +247,21 @@ int __init pci_mrst_init(void) | |||
244 | return 1; | 247 | return 1; |
245 | } | 248 | } |
246 | 249 | ||
247 | /* Langwell devices are not true pci devices, they are not subject to 10 ms | 250 | /* |
248 | * d3 to d0 delay required by pci spec. | 251 | * Langwell devices are not true PCI devices; they are not subject to 10 ms |
252 | * d3 to d0 delay required by PCI spec. | ||
249 | */ | 253 | */ |
250 | static void pci_d3delay_fixup(struct pci_dev *dev) | 254 | static void pci_d3delay_fixup(struct pci_dev *dev) |
251 | { | 255 | { |
252 | /* PCI fixups are effectively decided compile time. If we have a dual | 256 | /* |
253 | SoC/non-SoC kernel we don't want to mangle d3 on non SoC devices */ | 257 | * PCI fixups are effectively decided compile time. If we have a dual |
254 | if (!pci_soc_mode) | 258 | * SoC/non-SoC kernel we don't want to mangle d3 on non-SoC devices. |
255 | return; | 259 | */ |
256 | /* true pci devices in lincroft should allow type 1 access, the rest | 260 | if (!pci_soc_mode) |
257 | * are langwell fake pci devices. | 261 | return; |
262 | /* | ||
263 | * True PCI devices in Lincroft should allow type 1 access, the rest | ||
264 | * are Langwell fake PCI devices. | ||
258 | */ | 265 | */ |
259 | if (type1_access_ok(dev->bus->number, dev->devfn, PCI_DEVICE_ID)) | 266 | if (type1_access_ok(dev->bus->number, dev->devfn, PCI_DEVICE_ID)) |
260 | return; | 267 | return; |
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index 643b8b5eee86..8244f5ec2f4c 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
13 | #include <linux/irq.h> | 13 | #include <linux/irq.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/reboot.h> | ||
15 | #include <linux/serial_reg.h> | 16 | #include <linux/serial_reg.h> |
16 | #include <linux/serial_8250.h> | 17 | #include <linux/serial_8250.h> |
17 | #include <linux/reboot.h> | 18 | #include <linux/reboot.h> |
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 1cf5b300305e..424f4c97a44d 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
@@ -25,10 +25,10 @@ | |||
25 | #include <asm/cpu.h> | 25 | #include <asm/cpu.h> |
26 | 26 | ||
27 | #ifdef CONFIG_X86_32 | 27 | #ifdef CONFIG_X86_32 |
28 | unsigned long saved_context_ebx; | 28 | __visible unsigned long saved_context_ebx; |
29 | unsigned long saved_context_esp, saved_context_ebp; | 29 | __visible unsigned long saved_context_esp, saved_context_ebp; |
30 | unsigned long saved_context_esi, saved_context_edi; | 30 | __visible unsigned long saved_context_esi, saved_context_edi; |
31 | unsigned long saved_context_eflags; | 31 | __visible unsigned long saved_context_eflags; |
32 | #endif | 32 | #endif |
33 | struct saved_context saved_context; | 33 | struct saved_context saved_context; |
34 | 34 | ||
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index a0fde91c16cf..304fca20d96e 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c | |||
@@ -20,26 +20,26 @@ | |||
20 | #include <asm/suspend.h> | 20 | #include <asm/suspend.h> |
21 | 21 | ||
22 | /* References to section boundaries */ | 22 | /* References to section boundaries */ |
23 | extern const void __nosave_begin, __nosave_end; | 23 | extern __visible const void __nosave_begin, __nosave_end; |
24 | 24 | ||
25 | /* Defined in hibernate_asm_64.S */ | 25 | /* Defined in hibernate_asm_64.S */ |
26 | extern int restore_image(void); | 26 | extern asmlinkage int restore_image(void); |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * Address to jump to in the last phase of restore in order to get to the image | 29 | * Address to jump to in the last phase of restore in order to get to the image |
30 | * kernel's text (this value is passed in the image header). | 30 | * kernel's text (this value is passed in the image header). |
31 | */ | 31 | */ |
32 | unsigned long restore_jump_address; | 32 | unsigned long restore_jump_address __visible; |
33 | 33 | ||
34 | /* | 34 | /* |
35 | * Value of the cr3 register from before the hibernation (this value is passed | 35 | * Value of the cr3 register from before the hibernation (this value is passed |
36 | * in the image header). | 36 | * in the image header). |
37 | */ | 37 | */ |
38 | unsigned long restore_cr3; | 38 | unsigned long restore_cr3 __visible; |
39 | 39 | ||
40 | pgd_t *temp_level4_pgt; | 40 | pgd_t *temp_level4_pgt __visible; |
41 | 41 | ||
42 | void *relocated_restore_code; | 42 | void *relocated_restore_code __visible; |
43 | 43 | ||
44 | static void *alloc_pgt_page(void *context) | 44 | static void *alloc_pgt_page(void *context) |
45 | { | 45 | { |
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index e6773dc8ac41..093a892026f9 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk | |||
@@ -68,7 +68,7 @@ BEGIN { | |||
68 | 68 | ||
69 | lprefix1_expr = "\\((66|!F3)\\)" | 69 | lprefix1_expr = "\\((66|!F3)\\)" |
70 | lprefix2_expr = "\\(F3\\)" | 70 | lprefix2_expr = "\\(F3\\)" |
71 | lprefix3_expr = "\\((F2|!F3)\\)" | 71 | lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" |
72 | lprefix_expr = "\\((66|F2|F3)\\)" | 72 | lprefix_expr = "\\((66|F2|F3)\\)" |
73 | max_lprefix = 4 | 73 | max_lprefix = 4 |
74 | 74 | ||
@@ -83,6 +83,8 @@ BEGIN { | |||
83 | prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" | 83 | prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" |
84 | prefix_num["REPNE"] = "INAT_PFX_REPNE" | 84 | prefix_num["REPNE"] = "INAT_PFX_REPNE" |
85 | prefix_num["REP/REPE"] = "INAT_PFX_REPE" | 85 | prefix_num["REP/REPE"] = "INAT_PFX_REPE" |
86 | prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" | ||
87 | prefix_num["XRELEASE"] = "INAT_PFX_REPE" | ||
86 | prefix_num["LOCK"] = "INAT_PFX_LOCK" | 88 | prefix_num["LOCK"] = "INAT_PFX_LOCK" |
87 | prefix_num["SEG=CS"] = "INAT_PFX_CS" | 89 | prefix_num["SEG=CS"] = "INAT_PFX_CS" |
88 | prefix_num["SEG=DS"] = "INAT_PFX_DS" | 90 | prefix_num["SEG=DS"] = "INAT_PFX_DS" |
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index c74436e687bf..72074d528400 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -85,15 +85,18 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
85 | cycle_t ret; | 85 | cycle_t ret; |
86 | u64 last; | 86 | u64 last; |
87 | u32 version; | 87 | u32 version; |
88 | u32 migrate_count; | ||
89 | u8 flags; | 88 | u8 flags; |
90 | unsigned cpu, cpu1; | 89 | unsigned cpu, cpu1; |
91 | 90 | ||
92 | 91 | ||
93 | /* | 92 | /* |
94 | * When looping to get a consistent (time-info, tsc) pair, we | 93 | * Note: hypervisor must guarantee that: |
95 | * also need to deal with the possibility we can switch vcpus, | 94 | * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. |
96 | * so make sure we always re-fetch time-info for the current vcpu. | 95 | * 2. that per-CPU pvclock time info is updated if the |
96 | * underlying CPU changes. | ||
97 | * 3. that version is increased whenever underlying CPU | ||
98 | * changes. | ||
99 | * | ||
97 | */ | 100 | */ |
98 | do { | 101 | do { |
99 | cpu = __getcpu() & VGETCPU_CPU_MASK; | 102 | cpu = __getcpu() & VGETCPU_CPU_MASK; |
@@ -104,8 +107,6 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
104 | 107 | ||
105 | pvti = get_pvti(cpu); | 108 | pvti = get_pvti(cpu); |
106 | 109 | ||
107 | migrate_count = pvti->migrate_count; | ||
108 | |||
109 | version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); | 110 | version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); |
110 | 111 | ||
111 | /* | 112 | /* |
@@ -117,8 +118,7 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
117 | cpu1 = __getcpu() & VGETCPU_CPU_MASK; | 118 | cpu1 = __getcpu() & VGETCPU_CPU_MASK; |
118 | } while (unlikely(cpu != cpu1 || | 119 | } while (unlikely(cpu != cpu1 || |
119 | (pvti->pvti.version & 1) || | 120 | (pvti->pvti.version & 1) || |
120 | pvti->pvti.version != version || | 121 | pvti->pvti.version != version)); |
121 | pvti->migrate_count != migrate_count)); | ||
122 | 122 | ||
123 | if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) | 123 | if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) |
124 | *mode = VCLOCK_NONE; | 124 | *mode = VCLOCK_NONE; |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 193097ef3d7d..2fc216dfbd9c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -427,8 +427,7 @@ static void __init xen_init_cpuid_mask(void) | |||
427 | 427 | ||
428 | if (!xen_initial_domain()) | 428 | if (!xen_initial_domain()) |
429 | cpuid_leaf1_edx_mask &= | 429 | cpuid_leaf1_edx_mask &= |
430 | ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ | 430 | ~((1 << X86_FEATURE_ACPI)); /* disable ACPI */ |
431 | (1 << X86_FEATURE_ACPI)); /* disable ACPI */ | ||
432 | 431 | ||
433 | cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32)); | 432 | cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32)); |
434 | 433 | ||
@@ -735,8 +734,7 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
735 | addr = (unsigned long)xen_int3; | 734 | addr = (unsigned long)xen_int3; |
736 | else if (addr == (unsigned long)stack_segment) | 735 | else if (addr == (unsigned long)stack_segment) |
737 | addr = (unsigned long)xen_stack_segment; | 736 | addr = (unsigned long)xen_stack_segment; |
738 | else if (addr == (unsigned long)double_fault || | 737 | else if (addr == (unsigned long)double_fault) { |
739 | addr == (unsigned long)nmi) { | ||
740 | /* Don't need to handle these */ | 738 | /* Don't need to handle these */ |
741 | return 0; | 739 | return 0; |
742 | #ifdef CONFIG_X86_MCE | 740 | #ifdef CONFIG_X86_MCE |
@@ -747,7 +745,12 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
747 | */ | 745 | */ |
748 | ; | 746 | ; |
749 | #endif | 747 | #endif |
750 | } else { | 748 | } else if (addr == (unsigned long)nmi) |
749 | /* | ||
750 | * Use the native version as well. | ||
751 | */ | ||
752 | ; | ||
753 | else { | ||
751 | /* Some other trap using IST? */ | 754 | /* Some other trap using IST? */ |
752 | if (WARN_ON(val->ist != 0)) | 755 | if (WARN_ON(val->ist != 0)) |
753 | return 0; | 756 | return 0; |
@@ -1710,6 +1713,8 @@ static void __init xen_hvm_guest_init(void) | |||
1710 | 1713 | ||
1711 | xen_hvm_init_shared_info(); | 1714 | xen_hvm_init_shared_info(); |
1712 | 1715 | ||
1716 | xen_panic_handler_init(); | ||
1717 | |||
1713 | if (xen_feature(XENFEAT_hvm_callback_vector)) | 1718 | if (xen_feature(XENFEAT_hvm_callback_vector)) |
1714 | xen_have_vector_callback = 1; | 1719 | xen_have_vector_callback = 1; |
1715 | xen_hvm_smp_init(); | 1720 | xen_hvm_smp_init(); |
@@ -1720,15 +1725,12 @@ static void __init xen_hvm_guest_init(void) | |||
1720 | xen_hvm_init_mmu_ops(); | 1725 | xen_hvm_init_mmu_ops(); |
1721 | } | 1726 | } |
1722 | 1727 | ||
1723 | static bool __init xen_hvm_platform(void) | 1728 | static uint32_t __init xen_hvm_platform(void) |
1724 | { | 1729 | { |
1725 | if (xen_pv_domain()) | 1730 | if (xen_pv_domain()) |
1726 | return false; | 1731 | return 0; |
1727 | |||
1728 | if (!xen_cpuid_base()) | ||
1729 | return false; | ||
1730 | 1732 | ||
1731 | return true; | 1733 | return xen_cpuid_base(); |
1732 | } | 1734 | } |
1733 | 1735 | ||
1734 | bool xen_hvm_need_lapic(void) | 1736 | bool xen_hvm_need_lapic(void) |
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 01a4dc015ae1..0da7f863056f 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c | |||
@@ -47,23 +47,18 @@ static void xen_restore_fl(unsigned long flags) | |||
47 | /* convert from IF type flag */ | 47 | /* convert from IF type flag */ |
48 | flags = !(flags & X86_EFLAGS_IF); | 48 | flags = !(flags & X86_EFLAGS_IF); |
49 | 49 | ||
50 | /* There's a one instruction preempt window here. We need to | 50 | /* See xen_irq_enable() for why preemption must be disabled. */ |
51 | make sure we're don't switch CPUs between getting the vcpu | ||
52 | pointer and updating the mask. */ | ||
53 | preempt_disable(); | 51 | preempt_disable(); |
54 | vcpu = this_cpu_read(xen_vcpu); | 52 | vcpu = this_cpu_read(xen_vcpu); |
55 | vcpu->evtchn_upcall_mask = flags; | 53 | vcpu->evtchn_upcall_mask = flags; |
56 | preempt_enable_no_resched(); | ||
57 | |||
58 | /* Doesn't matter if we get preempted here, because any | ||
59 | pending event will get dealt with anyway. */ | ||
60 | 54 | ||
61 | if (flags == 0) { | 55 | if (flags == 0) { |
62 | preempt_check_resched(); | ||
63 | barrier(); /* unmask then check (avoid races) */ | 56 | barrier(); /* unmask then check (avoid races) */ |
64 | if (unlikely(vcpu->evtchn_upcall_pending)) | 57 | if (unlikely(vcpu->evtchn_upcall_pending)) |
65 | xen_force_evtchn_callback(); | 58 | xen_force_evtchn_callback(); |
66 | } | 59 | preempt_enable(); |
60 | } else | ||
61 | preempt_enable_no_resched(); | ||
67 | } | 62 | } |
68 | PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); | 63 | PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); |
69 | 64 | ||
@@ -82,10 +77,12 @@ static void xen_irq_enable(void) | |||
82 | { | 77 | { |
83 | struct vcpu_info *vcpu; | 78 | struct vcpu_info *vcpu; |
84 | 79 | ||
85 | /* We don't need to worry about being preempted here, since | 80 | /* |
86 | either a) interrupts are disabled, so no preemption, or b) | 81 | * We may be preempted as soon as vcpu->evtchn_upcall_mask is |
87 | the caller is confused and is trying to re-enable interrupts | 82 | * cleared, so disable preemption to ensure we check for |
88 | on an indeterminate processor. */ | 83 | * events on the VCPU we are still running on. |
84 | */ | ||
85 | preempt_disable(); | ||
89 | 86 | ||
90 | vcpu = this_cpu_read(xen_vcpu); | 87 | vcpu = this_cpu_read(xen_vcpu); |
91 | vcpu->evtchn_upcall_mask = 0; | 88 | vcpu->evtchn_upcall_mask = 0; |
@@ -96,6 +93,8 @@ static void xen_irq_enable(void) | |||
96 | barrier(); /* unmask then check (avoid races) */ | 93 | barrier(); /* unmask then check (avoid races) */ |
97 | if (unlikely(vcpu->evtchn_upcall_pending)) | 94 | if (unlikely(vcpu->evtchn_upcall_pending)) |
98 | xen_force_evtchn_callback(); | 95 | xen_force_evtchn_callback(); |
96 | |||
97 | preempt_enable(); | ||
99 | } | 98 | } |
100 | PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable); | 99 | PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable); |
101 | 100 | ||
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 95fb2aa5927e..0d4ec35895d4 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -161,6 +161,7 @@ | |||
161 | #include <asm/xen/page.h> | 161 | #include <asm/xen/page.h> |
162 | #include <asm/xen/hypercall.h> | 162 | #include <asm/xen/hypercall.h> |
163 | #include <asm/xen/hypervisor.h> | 163 | #include <asm/xen/hypervisor.h> |
164 | #include <xen/balloon.h> | ||
164 | #include <xen/grant_table.h> | 165 | #include <xen/grant_table.h> |
165 | 166 | ||
166 | #include "multicalls.h" | 167 | #include "multicalls.h" |
@@ -967,7 +968,10 @@ int m2p_remove_override(struct page *page, | |||
967 | if (kmap_op != NULL) { | 968 | if (kmap_op != NULL) { |
968 | if (!PageHighMem(page)) { | 969 | if (!PageHighMem(page)) { |
969 | struct multicall_space mcs; | 970 | struct multicall_space mcs; |
970 | struct gnttab_unmap_grant_ref *unmap_op; | 971 | struct gnttab_unmap_and_replace *unmap_op; |
972 | struct page *scratch_page = get_balloon_scratch_page(); | ||
973 | unsigned long scratch_page_address = (unsigned long) | ||
974 | __va(page_to_pfn(scratch_page) << PAGE_SHIFT); | ||
971 | 975 | ||
972 | /* | 976 | /* |
973 | * It might be that we queued all the m2p grant table | 977 | * It might be that we queued all the m2p grant table |
@@ -990,21 +994,25 @@ int m2p_remove_override(struct page *page, | |||
990 | } | 994 | } |
991 | 995 | ||
992 | mcs = xen_mc_entry( | 996 | mcs = xen_mc_entry( |
993 | sizeof(struct gnttab_unmap_grant_ref)); | 997 | sizeof(struct gnttab_unmap_and_replace)); |
994 | unmap_op = mcs.args; | 998 | unmap_op = mcs.args; |
995 | unmap_op->host_addr = kmap_op->host_addr; | 999 | unmap_op->host_addr = kmap_op->host_addr; |
1000 | unmap_op->new_addr = scratch_page_address; | ||
996 | unmap_op->handle = kmap_op->handle; | 1001 | unmap_op->handle = kmap_op->handle; |
997 | unmap_op->dev_bus_addr = 0; | ||
998 | 1002 | ||
999 | MULTI_grant_table_op(mcs.mc, | 1003 | MULTI_grant_table_op(mcs.mc, |
1000 | GNTTABOP_unmap_grant_ref, unmap_op, 1); | 1004 | GNTTABOP_unmap_and_replace, unmap_op, 1); |
1001 | 1005 | ||
1002 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 1006 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
1003 | 1007 | ||
1004 | set_pte_at(&init_mm, address, ptep, | 1008 | mcs = __xen_mc_entry(0); |
1005 | pfn_pte(pfn, PAGE_KERNEL)); | 1009 | MULTI_update_va_mapping(mcs.mc, scratch_page_address, |
1006 | __flush_tlb_single(address); | 1010 | pfn_pte(page_to_pfn(get_balloon_scratch_page()), |
1011 | PAGE_KERNEL_RO), 0); | ||
1012 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
1013 | |||
1007 | kmap_op->host_addr = 0; | 1014 | kmap_op->host_addr = 0; |
1015 | put_balloon_scratch_page(); | ||
1008 | } | 1016 | } |
1009 | } | 1017 | } |
1010 | 1018 | ||
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 056d11faef21..09f3059cb00b 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -33,6 +33,9 @@ | |||
33 | /* These are code, but not functions. Defined in entry.S */ | 33 | /* These are code, but not functions. Defined in entry.S */ |
34 | extern const char xen_hypervisor_callback[]; | 34 | extern const char xen_hypervisor_callback[]; |
35 | extern const char xen_failsafe_callback[]; | 35 | extern const char xen_failsafe_callback[]; |
36 | #ifdef CONFIG_X86_64 | ||
37 | extern const char nmi[]; | ||
38 | #endif | ||
36 | extern void xen_sysenter_target(void); | 39 | extern void xen_sysenter_target(void); |
37 | extern void xen_syscall_target(void); | 40 | extern void xen_syscall_target(void); |
38 | extern void xen_syscall32_target(void); | 41 | extern void xen_syscall32_target(void); |
@@ -215,13 +218,19 @@ static void __init xen_set_identity_and_release_chunk( | |||
215 | unsigned long pfn; | 218 | unsigned long pfn; |
216 | 219 | ||
217 | /* | 220 | /* |
218 | * If the PFNs are currently mapped, the VA mapping also needs | 221 | * If the PFNs are currently mapped, clear the mappings |
219 | * to be updated to be 1:1. | 222 | * (except for the ISA region which must be 1:1 mapped) to |
223 | * release the refcounts (in Xen) on the original frames. | ||
220 | */ | 224 | */ |
221 | for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) | 225 | for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { |
226 | pte_t pte = __pte_ma(0); | ||
227 | |||
228 | if (pfn < PFN_UP(ISA_END_ADDRESS)) | ||
229 | pte = mfn_pte(pfn, PAGE_KERNEL_IO); | ||
230 | |||
222 | (void)HYPERVISOR_update_va_mapping( | 231 | (void)HYPERVISOR_update_va_mapping( |
223 | (unsigned long)__va(pfn << PAGE_SHIFT), | 232 | (unsigned long)__va(pfn << PAGE_SHIFT), pte, 0); |
224 | mfn_pte(pfn, PAGE_KERNEL_IO), 0); | 233 | } |
225 | 234 | ||
226 | if (start_pfn < nr_pages) | 235 | if (start_pfn < nr_pages) |
227 | *released += xen_release_chunk( | 236 | *released += xen_release_chunk( |
@@ -313,6 +322,17 @@ static void xen_align_and_add_e820_region(u64 start, u64 size, int type) | |||
313 | e820_add_region(start, end - start, type); | 322 | e820_add_region(start, end - start, type); |
314 | } | 323 | } |
315 | 324 | ||
325 | void xen_ignore_unusable(struct e820entry *list, size_t map_size) | ||
326 | { | ||
327 | struct e820entry *entry; | ||
328 | unsigned int i; | ||
329 | |||
330 | for (i = 0, entry = list; i < map_size; i++, entry++) { | ||
331 | if (entry->type == E820_UNUSABLE) | ||
332 | entry->type = E820_RAM; | ||
333 | } | ||
334 | } | ||
335 | |||
316 | /** | 336 | /** |
317 | * machine_specific_memory_setup - Hook for machine specific memory setup. | 337 | * machine_specific_memory_setup - Hook for machine specific memory setup. |
318 | **/ | 338 | **/ |
@@ -353,6 +373,17 @@ char * __init xen_memory_setup(void) | |||
353 | } | 373 | } |
354 | BUG_ON(rc); | 374 | BUG_ON(rc); |
355 | 375 | ||
376 | /* | ||
377 | * Xen won't allow a 1:1 mapping to be created to UNUSABLE | ||
378 | * regions, so if we're using the machine memory map leave the | ||
379 | * region as RAM as it is in the pseudo-physical map. | ||
380 | * | ||
381 | * UNUSABLE regions in domUs are not handled and will need | ||
382 | * a patch in the future. | ||
383 | */ | ||
384 | if (xen_initial_domain()) | ||
385 | xen_ignore_unusable(map, memmap.nr_entries); | ||
386 | |||
356 | /* Make sure the Xen-supplied memory map is well-ordered. */ | 387 | /* Make sure the Xen-supplied memory map is well-ordered. */ |
357 | sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries); | 388 | sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries); |
358 | 389 | ||
@@ -525,7 +556,13 @@ void xen_enable_syscall(void) | |||
525 | } | 556 | } |
526 | #endif /* CONFIG_X86_64 */ | 557 | #endif /* CONFIG_X86_64 */ |
527 | } | 558 | } |
528 | 559 | void __cpuinit xen_enable_nmi(void) | |
560 | { | ||
561 | #ifdef CONFIG_X86_64 | ||
562 | if (register_callback(CALLBACKTYPE_nmi, nmi)) | ||
563 | BUG(); | ||
564 | #endif | ||
565 | } | ||
529 | void __init xen_arch_setup(void) | 566 | void __init xen_arch_setup(void) |
530 | { | 567 | { |
531 | xen_panic_handler_init(); | 568 | xen_panic_handler_init(); |
@@ -543,7 +580,7 @@ void __init xen_arch_setup(void) | |||
543 | 580 | ||
544 | xen_enable_sysenter(); | 581 | xen_enable_sysenter(); |
545 | xen_enable_syscall(); | 582 | xen_enable_syscall(); |
546 | 583 | xen_enable_nmi(); | |
547 | #ifdef CONFIG_ACPI | 584 | #ifdef CONFIG_ACPI |
548 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { | 585 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { |
549 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); | 586 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index ca92754eb846..9235842cd76a 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -279,6 +279,7 @@ static void __init xen_smp_prepare_boot_cpu(void) | |||
279 | 279 | ||
280 | xen_filter_cpu_maps(); | 280 | xen_filter_cpu_maps(); |
281 | xen_setup_vcpu_info_placement(); | 281 | xen_setup_vcpu_info_placement(); |
282 | xen_init_spinlocks(); | ||
282 | } | 283 | } |
283 | 284 | ||
284 | static void __init xen_smp_prepare_cpus(unsigned int max_cpus) | 285 | static void __init xen_smp_prepare_cpus(unsigned int max_cpus) |
@@ -572,6 +573,12 @@ static inline int xen_map_vector(int vector) | |||
572 | case IRQ_WORK_VECTOR: | 573 | case IRQ_WORK_VECTOR: |
573 | xen_vector = XEN_IRQ_WORK_VECTOR; | 574 | xen_vector = XEN_IRQ_WORK_VECTOR; |
574 | break; | 575 | break; |
576 | #ifdef CONFIG_X86_64 | ||
577 | case NMI_VECTOR: | ||
578 | case APIC_DM_NMI: /* Some use that instead of NMI_VECTOR */ | ||
579 | xen_vector = XEN_NMI_VECTOR; | ||
580 | break; | ||
581 | #endif | ||
575 | default: | 582 | default: |
576 | xen_vector = -1; | 583 | xen_vector = -1; |
577 | printk(KERN_ERR "xen: vector 0x%x is not implemented\n", | 584 | printk(KERN_ERR "xen: vector 0x%x is not implemented\n", |
@@ -680,7 +687,6 @@ void __init xen_smp_init(void) | |||
680 | { | 687 | { |
681 | smp_ops = xen_smp_ops; | 688 | smp_ops = xen_smp_ops; |
682 | xen_fill_possible_map(); | 689 | xen_fill_possible_map(); |
683 | xen_init_spinlocks(); | ||
684 | } | 690 | } |
685 | 691 | ||
686 | static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) | 692 | static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) |
@@ -694,8 +700,15 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) | |||
694 | static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) | 700 | static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) |
695 | { | 701 | { |
696 | int rc; | 702 | int rc; |
697 | rc = native_cpu_up(cpu, tidle); | 703 | /* |
698 | WARN_ON (xen_smp_intr_init(cpu)); | 704 | * xen_smp_intr_init() needs to run before native_cpu_up() |
705 | * so that IPI vectors are set up on the booting CPU before | ||
706 | * it is marked online in native_cpu_up(). | ||
707 | */ | ||
708 | rc = xen_smp_intr_init(cpu); | ||
709 | WARN_ON(rc); | ||
710 | if (!rc) | ||
711 | rc = native_cpu_up(cpu, tidle); | ||
699 | return rc; | 712 | return rc; |
700 | } | 713 | } |
701 | 714 | ||
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index cf3caee356b3..0438b9324a72 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -17,45 +17,44 @@ | |||
17 | #include "xen-ops.h" | 17 | #include "xen-ops.h" |
18 | #include "debugfs.h" | 18 | #include "debugfs.h" |
19 | 19 | ||
20 | #ifdef CONFIG_XEN_DEBUG_FS | 20 | enum xen_contention_stat { |
21 | static struct xen_spinlock_stats | 21 | TAKEN_SLOW, |
22 | { | 22 | TAKEN_SLOW_PICKUP, |
23 | u64 taken; | 23 | TAKEN_SLOW_SPURIOUS, |
24 | u32 taken_slow; | 24 | RELEASED_SLOW, |
25 | u32 taken_slow_nested; | 25 | RELEASED_SLOW_KICKED, |
26 | u32 taken_slow_pickup; | 26 | NR_CONTENTION_STATS |
27 | u32 taken_slow_spurious; | 27 | }; |
28 | u32 taken_slow_irqenable; | ||
29 | 28 | ||
30 | u64 released; | ||
31 | u32 released_slow; | ||
32 | u32 released_slow_kicked; | ||
33 | 29 | ||
30 | #ifdef CONFIG_XEN_DEBUG_FS | ||
34 | #define HISTO_BUCKETS 30 | 31 | #define HISTO_BUCKETS 30 |
35 | u32 histo_spin_total[HISTO_BUCKETS+1]; | 32 | static struct xen_spinlock_stats |
36 | u32 histo_spin_spinning[HISTO_BUCKETS+1]; | 33 | { |
34 | u32 contention_stats[NR_CONTENTION_STATS]; | ||
37 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; | 35 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; |
38 | |||
39 | u64 time_total; | ||
40 | u64 time_spinning; | ||
41 | u64 time_blocked; | 36 | u64 time_blocked; |
42 | } spinlock_stats; | 37 | } spinlock_stats; |
43 | 38 | ||
44 | static u8 zero_stats; | 39 | static u8 zero_stats; |
45 | 40 | ||
46 | static unsigned lock_timeout = 1 << 10; | ||
47 | #define TIMEOUT lock_timeout | ||
48 | |||
49 | static inline void check_zero(void) | 41 | static inline void check_zero(void) |
50 | { | 42 | { |
51 | if (unlikely(zero_stats)) { | 43 | u8 ret; |
52 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | 44 | u8 old = ACCESS_ONCE(zero_stats); |
53 | zero_stats = 0; | 45 | if (unlikely(old)) { |
46 | ret = cmpxchg(&zero_stats, old, 0); | ||
47 | /* This ensures only one fellow resets the stat */ | ||
48 | if (ret == old) | ||
49 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | ||
54 | } | 50 | } |
55 | } | 51 | } |
56 | 52 | ||
57 | #define ADD_STATS(elem, val) \ | 53 | static inline void add_stats(enum xen_contention_stat var, u32 val) |
58 | do { check_zero(); spinlock_stats.elem += (val); } while(0) | 54 | { |
55 | check_zero(); | ||
56 | spinlock_stats.contention_stats[var] += val; | ||
57 | } | ||
59 | 58 | ||
60 | static inline u64 spin_time_start(void) | 59 | static inline u64 spin_time_start(void) |
61 | { | 60 | { |
@@ -74,22 +73,6 @@ static void __spin_time_accum(u64 delta, u32 *array) | |||
74 | array[HISTO_BUCKETS]++; | 73 | array[HISTO_BUCKETS]++; |
75 | } | 74 | } |
76 | 75 | ||
77 | static inline void spin_time_accum_spinning(u64 start) | ||
78 | { | ||
79 | u32 delta = xen_clocksource_read() - start; | ||
80 | |||
81 | __spin_time_accum(delta, spinlock_stats.histo_spin_spinning); | ||
82 | spinlock_stats.time_spinning += delta; | ||
83 | } | ||
84 | |||
85 | static inline void spin_time_accum_total(u64 start) | ||
86 | { | ||
87 | u32 delta = xen_clocksource_read() - start; | ||
88 | |||
89 | __spin_time_accum(delta, spinlock_stats.histo_spin_total); | ||
90 | spinlock_stats.time_total += delta; | ||
91 | } | ||
92 | |||
93 | static inline void spin_time_accum_blocked(u64 start) | 76 | static inline void spin_time_accum_blocked(u64 start) |
94 | { | 77 | { |
95 | u32 delta = xen_clocksource_read() - start; | 78 | u32 delta = xen_clocksource_read() - start; |
@@ -99,19 +82,15 @@ static inline void spin_time_accum_blocked(u64 start) | |||
99 | } | 82 | } |
100 | #else /* !CONFIG_XEN_DEBUG_FS */ | 83 | #else /* !CONFIG_XEN_DEBUG_FS */ |
101 | #define TIMEOUT (1 << 10) | 84 | #define TIMEOUT (1 << 10) |
102 | #define ADD_STATS(elem, val) do { (void)(val); } while(0) | 85 | static inline void add_stats(enum xen_contention_stat var, u32 val) |
86 | { | ||
87 | } | ||
103 | 88 | ||
104 | static inline u64 spin_time_start(void) | 89 | static inline u64 spin_time_start(void) |
105 | { | 90 | { |
106 | return 0; | 91 | return 0; |
107 | } | 92 | } |
108 | 93 | ||
109 | static inline void spin_time_accum_total(u64 start) | ||
110 | { | ||
111 | } | ||
112 | static inline void spin_time_accum_spinning(u64 start) | ||
113 | { | ||
114 | } | ||
115 | static inline void spin_time_accum_blocked(u64 start) | 94 | static inline void spin_time_accum_blocked(u64 start) |
116 | { | 95 | { |
117 | } | 96 | } |
@@ -134,227 +113,123 @@ typedef u16 xen_spinners_t; | |||
134 | asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); | 113 | asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); |
135 | #endif | 114 | #endif |
136 | 115 | ||
137 | struct xen_spinlock { | 116 | struct xen_lock_waiting { |
138 | unsigned char lock; /* 0 -> free; 1 -> locked */ | 117 | struct arch_spinlock *lock; |
139 | xen_spinners_t spinners; /* count of waiting cpus */ | 118 | __ticket_t want; |
140 | }; | 119 | }; |
141 | 120 | ||
142 | static int xen_spin_is_locked(struct arch_spinlock *lock) | ||
143 | { | ||
144 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
145 | |||
146 | return xl->lock != 0; | ||
147 | } | ||
148 | |||
149 | static int xen_spin_is_contended(struct arch_spinlock *lock) | ||
150 | { | ||
151 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
152 | |||
153 | /* Not strictly true; this is only the count of contended | ||
154 | lock-takers entering the slow path. */ | ||
155 | return xl->spinners != 0; | ||
156 | } | ||
157 | |||
158 | static int xen_spin_trylock(struct arch_spinlock *lock) | ||
159 | { | ||
160 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
161 | u8 old = 1; | ||
162 | |||
163 | asm("xchgb %b0,%1" | ||
164 | : "+q" (old), "+m" (xl->lock) : : "memory"); | ||
165 | |||
166 | return old == 0; | ||
167 | } | ||
168 | |||
169 | static DEFINE_PER_CPU(char *, irq_name); | ||
170 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; | 121 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; |
171 | static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); | 122 | static DEFINE_PER_CPU(char *, irq_name); |
172 | 123 | static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); | |
173 | /* | 124 | static cpumask_t waiting_cpus; |
174 | * Mark a cpu as interested in a lock. Returns the CPU's previous | ||
175 | * lock of interest, in case we got preempted by an interrupt. | ||
176 | */ | ||
177 | static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) | ||
178 | { | ||
179 | struct xen_spinlock *prev; | ||
180 | |||
181 | prev = __this_cpu_read(lock_spinners); | ||
182 | __this_cpu_write(lock_spinners, xl); | ||
183 | |||
184 | wmb(); /* set lock of interest before count */ | ||
185 | |||
186 | inc_spinners(xl); | ||
187 | |||
188 | return prev; | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * Mark a cpu as no longer interested in a lock. Restores previous | ||
193 | * lock of interest (NULL for none). | ||
194 | */ | ||
195 | static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) | ||
196 | { | ||
197 | dec_spinners(xl); | ||
198 | wmb(); /* decrement count before restoring lock */ | ||
199 | __this_cpu_write(lock_spinners, prev); | ||
200 | } | ||
201 | 125 | ||
202 | static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) | 126 | static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) |
203 | { | 127 | { |
204 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
205 | struct xen_spinlock *prev; | ||
206 | int irq = __this_cpu_read(lock_kicker_irq); | 128 | int irq = __this_cpu_read(lock_kicker_irq); |
207 | int ret; | 129 | struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting); |
130 | int cpu = smp_processor_id(); | ||
208 | u64 start; | 131 | u64 start; |
132 | unsigned long flags; | ||
209 | 133 | ||
210 | /* If kicker interrupts not initialized yet, just spin */ | 134 | /* If kicker interrupts not initialized yet, just spin */ |
211 | if (irq == -1) | 135 | if (irq == -1) |
212 | return 0; | 136 | return; |
213 | 137 | ||
214 | start = spin_time_start(); | 138 | start = spin_time_start(); |
215 | 139 | ||
216 | /* announce we're spinning */ | 140 | /* |
217 | prev = spinning_lock(xl); | 141 | * Make sure an interrupt handler can't upset things in a |
142 | * partially setup state. | ||
143 | */ | ||
144 | local_irq_save(flags); | ||
145 | /* | ||
146 | * We don't really care if we're overwriting some other | ||
147 | * (lock,want) pair, as that would mean that we're currently | ||
148 | * in an interrupt context, and the outer context had | ||
149 | * interrupts enabled. That has already kicked the VCPU out | ||
150 | * of xen_poll_irq(), so it will just return spuriously and | ||
151 | * retry with newly setup (lock,want). | ||
152 | * | ||
153 | * The ordering protocol on this is that the "lock" pointer | ||
154 | * may only be set non-NULL if the "want" ticket is correct. | ||
155 | * If we're updating "want", we must first clear "lock". | ||
156 | */ | ||
157 | w->lock = NULL; | ||
158 | smp_wmb(); | ||
159 | w->want = want; | ||
160 | smp_wmb(); | ||
161 | w->lock = lock; | ||
218 | 162 | ||
219 | ADD_STATS(taken_slow, 1); | 163 | /* This uses set_bit, which atomic and therefore a barrier */ |
220 | ADD_STATS(taken_slow_nested, prev != NULL); | 164 | cpumask_set_cpu(cpu, &waiting_cpus); |
165 | add_stats(TAKEN_SLOW, 1); | ||
221 | 166 | ||
222 | do { | 167 | /* clear pending */ |
223 | unsigned long flags; | 168 | xen_clear_irq_pending(irq); |
224 | 169 | ||
225 | /* clear pending */ | 170 | /* Only check lock once pending cleared */ |
226 | xen_clear_irq_pending(irq); | 171 | barrier(); |
227 | 172 | ||
228 | /* check again make sure it didn't become free while | 173 | /* |
229 | we weren't looking */ | 174 | * Mark entry to slowpath before doing the pickup test to make |
230 | ret = xen_spin_trylock(lock); | 175 | * sure we don't deadlock with an unlocker. |
231 | if (ret) { | 176 | */ |
232 | ADD_STATS(taken_slow_pickup, 1); | 177 | __ticket_enter_slowpath(lock); |
233 | 178 | ||
234 | /* | 179 | /* |
235 | * If we interrupted another spinlock while it | 180 | * check again make sure it didn't become free while |
236 | * was blocking, make sure it doesn't block | 181 | * we weren't looking |
237 | * without rechecking the lock. | 182 | */ |
238 | */ | 183 | if (ACCESS_ONCE(lock->tickets.head) == want) { |
239 | if (prev != NULL) | 184 | add_stats(TAKEN_SLOW_PICKUP, 1); |
240 | xen_set_irq_pending(irq); | 185 | goto out; |
241 | goto out; | 186 | } |
242 | } | ||
243 | 187 | ||
244 | flags = arch_local_save_flags(); | 188 | /* Allow interrupts while blocked */ |
245 | if (irq_enable) { | 189 | local_irq_restore(flags); |
246 | ADD_STATS(taken_slow_irqenable, 1); | ||
247 | raw_local_irq_enable(); | ||
248 | } | ||
249 | 190 | ||
250 | /* | 191 | /* |
251 | * Block until irq becomes pending. If we're | 192 | * If an interrupt happens here, it will leave the wakeup irq |
252 | * interrupted at this point (after the trylock but | 193 | * pending, which will cause xen_poll_irq() to return |
253 | * before entering the block), then the nested lock | 194 | * immediately. |
254 | * handler guarantees that the irq will be left | 195 | */ |
255 | * pending if there's any chance the lock became free; | ||
256 | * xen_poll_irq() returns immediately if the irq is | ||
257 | * pending. | ||
258 | */ | ||
259 | xen_poll_irq(irq); | ||
260 | 196 | ||
261 | raw_local_irq_restore(flags); | 197 | /* Block until irq becomes pending (or perhaps a spurious wakeup) */ |
198 | xen_poll_irq(irq); | ||
199 | add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq)); | ||
262 | 200 | ||
263 | ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); | 201 | local_irq_save(flags); |
264 | } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ | ||
265 | 202 | ||
266 | kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); | 203 | kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); |
267 | |||
268 | out: | 204 | out: |
269 | unspinning_lock(xl, prev); | 205 | cpumask_clear_cpu(cpu, &waiting_cpus); |
270 | spin_time_accum_blocked(start); | 206 | w->lock = NULL; |
271 | |||
272 | return ret; | ||
273 | } | ||
274 | |||
275 | static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable) | ||
276 | { | ||
277 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
278 | unsigned timeout; | ||
279 | u8 oldval; | ||
280 | u64 start_spin; | ||
281 | |||
282 | ADD_STATS(taken, 1); | ||
283 | |||
284 | start_spin = spin_time_start(); | ||
285 | |||
286 | do { | ||
287 | u64 start_spin_fast = spin_time_start(); | ||
288 | |||
289 | timeout = TIMEOUT; | ||
290 | |||
291 | asm("1: xchgb %1,%0\n" | ||
292 | " testb %1,%1\n" | ||
293 | " jz 3f\n" | ||
294 | "2: rep;nop\n" | ||
295 | " cmpb $0,%0\n" | ||
296 | " je 1b\n" | ||
297 | " dec %2\n" | ||
298 | " jnz 2b\n" | ||
299 | "3:\n" | ||
300 | : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) | ||
301 | : "1" (1) | ||
302 | : "memory"); | ||
303 | 207 | ||
304 | spin_time_accum_spinning(start_spin_fast); | 208 | local_irq_restore(flags); |
305 | 209 | ||
306 | } while (unlikely(oldval != 0 && | 210 | spin_time_accum_blocked(start); |
307 | (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable)))); | ||
308 | |||
309 | spin_time_accum_total(start_spin); | ||
310 | } | ||
311 | |||
312 | static void xen_spin_lock(struct arch_spinlock *lock) | ||
313 | { | ||
314 | __xen_spin_lock(lock, false); | ||
315 | } | ||
316 | |||
317 | static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags) | ||
318 | { | ||
319 | __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); | ||
320 | } | 211 | } |
212 | PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning); | ||
321 | 213 | ||
322 | static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) | 214 | static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) |
323 | { | 215 | { |
324 | int cpu; | 216 | int cpu; |
325 | 217 | ||
326 | ADD_STATS(released_slow, 1); | 218 | add_stats(RELEASED_SLOW, 1); |
219 | |||
220 | for_each_cpu(cpu, &waiting_cpus) { | ||
221 | const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu); | ||
327 | 222 | ||
328 | for_each_online_cpu(cpu) { | 223 | /* Make sure we read lock before want */ |
329 | /* XXX should mix up next cpu selection */ | 224 | if (ACCESS_ONCE(w->lock) == lock && |
330 | if (per_cpu(lock_spinners, cpu) == xl) { | 225 | ACCESS_ONCE(w->want) == next) { |
331 | ADD_STATS(released_slow_kicked, 1); | 226 | add_stats(RELEASED_SLOW_KICKED, 1); |
332 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); | 227 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); |
228 | break; | ||
333 | } | 229 | } |
334 | } | 230 | } |
335 | } | 231 | } |
336 | 232 | ||
337 | static void xen_spin_unlock(struct arch_spinlock *lock) | ||
338 | { | ||
339 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
340 | |||
341 | ADD_STATS(released, 1); | ||
342 | |||
343 | smp_wmb(); /* make sure no writes get moved after unlock */ | ||
344 | xl->lock = 0; /* release lock */ | ||
345 | |||
346 | /* | ||
347 | * Make sure unlock happens before checking for waiting | ||
348 | * spinners. We need a strong barrier to enforce the | ||
349 | * write-read ordering to different memory locations, as the | ||
350 | * CPU makes no implied guarantees about their ordering. | ||
351 | */ | ||
352 | mb(); | ||
353 | |||
354 | if (unlikely(xl->spinners)) | ||
355 | xen_spin_unlock_slow(xl); | ||
356 | } | ||
357 | |||
358 | static irqreturn_t dummy_handler(int irq, void *dev_id) | 233 | static irqreturn_t dummy_handler(int irq, void *dev_id) |
359 | { | 234 | { |
360 | BUG(); | 235 | BUG(); |
@@ -408,6 +283,8 @@ void xen_uninit_lock_cpu(int cpu) | |||
408 | per_cpu(irq_name, cpu) = NULL; | 283 | per_cpu(irq_name, cpu) = NULL; |
409 | } | 284 | } |
410 | 285 | ||
286 | static bool xen_pvspin __initdata = true; | ||
287 | |||
411 | void __init xen_init_spinlocks(void) | 288 | void __init xen_init_spinlocks(void) |
412 | { | 289 | { |
413 | /* | 290 | /* |
@@ -417,15 +294,23 @@ void __init xen_init_spinlocks(void) | |||
417 | if (xen_hvm_domain()) | 294 | if (xen_hvm_domain()) |
418 | return; | 295 | return; |
419 | 296 | ||
420 | BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); | 297 | if (!xen_pvspin) { |
298 | printk(KERN_DEBUG "xen: PV spinlocks disabled\n"); | ||
299 | return; | ||
300 | } | ||
421 | 301 | ||
422 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; | 302 | static_key_slow_inc(¶virt_ticketlocks_enabled); |
423 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; | 303 | |
424 | pv_lock_ops.spin_lock = xen_spin_lock; | 304 | pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); |
425 | pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; | 305 | pv_lock_ops.unlock_kick = xen_unlock_kick; |
426 | pv_lock_ops.spin_trylock = xen_spin_trylock; | 306 | } |
427 | pv_lock_ops.spin_unlock = xen_spin_unlock; | 307 | |
308 | static __init int xen_parse_nopvspin(char *arg) | ||
309 | { | ||
310 | xen_pvspin = false; | ||
311 | return 0; | ||
428 | } | 312 | } |
313 | early_param("xen_nopvspin", xen_parse_nopvspin); | ||
429 | 314 | ||
430 | #ifdef CONFIG_XEN_DEBUG_FS | 315 | #ifdef CONFIG_XEN_DEBUG_FS |
431 | 316 | ||
@@ -442,37 +327,21 @@ static int __init xen_spinlock_debugfs(void) | |||
442 | 327 | ||
443 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); | 328 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); |
444 | 329 | ||
445 | debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout); | ||
446 | |||
447 | debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken); | ||
448 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, | 330 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, |
449 | &spinlock_stats.taken_slow); | 331 | &spinlock_stats.contention_stats[TAKEN_SLOW]); |
450 | debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug, | ||
451 | &spinlock_stats.taken_slow_nested); | ||
452 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, | 332 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, |
453 | &spinlock_stats.taken_slow_pickup); | 333 | &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]); |
454 | debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, | 334 | debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, |
455 | &spinlock_stats.taken_slow_spurious); | 335 | &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]); |
456 | debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug, | ||
457 | &spinlock_stats.taken_slow_irqenable); | ||
458 | 336 | ||
459 | debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released); | ||
460 | debugfs_create_u32("released_slow", 0444, d_spin_debug, | 337 | debugfs_create_u32("released_slow", 0444, d_spin_debug, |
461 | &spinlock_stats.released_slow); | 338 | &spinlock_stats.contention_stats[RELEASED_SLOW]); |
462 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, | 339 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, |
463 | &spinlock_stats.released_slow_kicked); | 340 | &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]); |
464 | 341 | ||
465 | debugfs_create_u64("time_spinning", 0444, d_spin_debug, | ||
466 | &spinlock_stats.time_spinning); | ||
467 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, | 342 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, |
468 | &spinlock_stats.time_blocked); | 343 | &spinlock_stats.time_blocked); |
469 | debugfs_create_u64("time_total", 0444, d_spin_debug, | ||
470 | &spinlock_stats.time_total); | ||
471 | 344 | ||
472 | debugfs_create_u32_array("histo_total", 0444, d_spin_debug, | ||
473 | spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); | ||
474 | debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, | ||
475 | spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); | ||
476 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, | 345 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, |
477 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); | 346 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); |
478 | 347 | ||
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 86782c5d7e2a..95f8c6142328 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -105,9 +105,9 @@ static inline void __init xen_init_apic(void) | |||
105 | /* Declare an asm function, along with symbols needed to make it | 105 | /* Declare an asm function, along with symbols needed to make it |
106 | inlineable */ | 106 | inlineable */ |
107 | #define DECL_ASM(ret, name, ...) \ | 107 | #define DECL_ASM(ret, name, ...) \ |
108 | ret name(__VA_ARGS__); \ | 108 | __visible ret name(__VA_ARGS__); \ |
109 | extern char name##_end[]; \ | 109 | extern char name##_end[] __visible; \ |
110 | extern char name##_reloc[] \ | 110 | extern char name##_reloc[] __visible |
111 | 111 | ||
112 | DECL_ASM(void, xen_irq_enable_direct, void); | 112 | DECL_ASM(void, xen_irq_enable_direct, void); |
113 | DECL_ASM(void, xen_irq_disable_direct, void); | 113 | DECL_ASM(void, xen_irq_disable_direct, void); |
@@ -115,11 +115,11 @@ DECL_ASM(unsigned long, xen_save_fl_direct, void); | |||
115 | DECL_ASM(void, xen_restore_fl_direct, unsigned long); | 115 | DECL_ASM(void, xen_restore_fl_direct, unsigned long); |
116 | 116 | ||
117 | /* These are not functions, and cannot be called normally */ | 117 | /* These are not functions, and cannot be called normally */ |
118 | void xen_iret(void); | 118 | __visible void xen_iret(void); |
119 | void xen_sysexit(void); | 119 | __visible void xen_sysexit(void); |
120 | void xen_sysret32(void); | 120 | __visible void xen_sysret32(void); |
121 | void xen_sysret64(void); | 121 | __visible void xen_sysret64(void); |
122 | void xen_adjust_exception_frame(void); | 122 | __visible void xen_adjust_exception_frame(void); |
123 | 123 | ||
124 | extern int xen_panic_handler_init(void); | 124 | extern int xen_panic_handler_init(void); |
125 | 125 | ||