diff options
Diffstat (limited to 'arch/x86')
280 files changed, 12797 insertions, 5305 deletions
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 0e9dec6cadd..e5287d8517a 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild | |||
@@ -1,4 +1,3 @@ | |||
1 | |||
2 | obj-$(CONFIG_KVM) += kvm/ | 1 | obj-$(CONFIG_KVM) += kvm/ |
3 | 2 | ||
4 | # Xen paravirtualization support | 3 | # Xen paravirtualization support |
@@ -7,6 +6,7 @@ obj-$(CONFIG_XEN) += xen/ | |||
7 | # lguest paravirtualization support | 6 | # lguest paravirtualization support |
8 | obj-$(CONFIG_LGUEST_GUEST) += lguest/ | 7 | obj-$(CONFIG_LGUEST_GUEST) += lguest/ |
9 | 8 | ||
9 | obj-y += realmode/ | ||
10 | obj-y += kernel/ | 10 | obj-y += kernel/ |
11 | obj-y += mm/ | 11 | obj-y += mm/ |
12 | 12 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d6168994e11..c70684f859e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -32,6 +32,7 @@ config X86 | |||
32 | select ARCH_WANT_OPTIONAL_GPIOLIB | 32 | select ARCH_WANT_OPTIONAL_GPIOLIB |
33 | select ARCH_WANT_FRAME_POINTERS | 33 | select ARCH_WANT_FRAME_POINTERS |
34 | select HAVE_DMA_ATTRS | 34 | select HAVE_DMA_ATTRS |
35 | select HAVE_DMA_CONTIGUOUS if !SWIOTLB | ||
35 | select HAVE_KRETPROBES | 36 | select HAVE_KRETPROBES |
36 | select HAVE_OPTPROBES | 37 | select HAVE_OPTPROBES |
37 | select HAVE_FTRACE_MCOUNT_RECORD | 38 | select HAVE_FTRACE_MCOUNT_RECORD |
@@ -85,9 +86,18 @@ config X86 | |||
85 | select GENERIC_SMP_IDLE_THREAD | 86 | select GENERIC_SMP_IDLE_THREAD |
86 | select HAVE_ARCH_SECCOMP_FILTER | 87 | select HAVE_ARCH_SECCOMP_FILTER |
87 | select BUILDTIME_EXTABLE_SORT | 88 | select BUILDTIME_EXTABLE_SORT |
89 | select GENERIC_CMOS_UPDATE | ||
90 | select CLOCKSOURCE_WATCHDOG | ||
91 | select GENERIC_CLOCKEVENTS | ||
92 | select ARCH_CLOCKSOURCE_DATA if X86_64 | ||
93 | select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) | ||
94 | select GENERIC_TIME_VSYSCALL if X86_64 | ||
95 | select KTIME_SCALAR if X86_32 | ||
96 | select GENERIC_STRNCPY_FROM_USER | ||
97 | select GENERIC_STRNLEN_USER | ||
88 | 98 | ||
89 | config INSTRUCTION_DECODER | 99 | config INSTRUCTION_DECODER |
90 | def_bool (KPROBES || PERF_EVENTS) | 100 | def_bool (KPROBES || PERF_EVENTS || UPROBES) |
91 | 101 | ||
92 | config OUTPUT_FORMAT | 102 | config OUTPUT_FORMAT |
93 | string | 103 | string |
@@ -99,23 +109,6 @@ config ARCH_DEFCONFIG | |||
99 | default "arch/x86/configs/i386_defconfig" if X86_32 | 109 | default "arch/x86/configs/i386_defconfig" if X86_32 |
100 | default "arch/x86/configs/x86_64_defconfig" if X86_64 | 110 | default "arch/x86/configs/x86_64_defconfig" if X86_64 |
101 | 111 | ||
102 | config GENERIC_CMOS_UPDATE | ||
103 | def_bool y | ||
104 | |||
105 | config CLOCKSOURCE_WATCHDOG | ||
106 | def_bool y | ||
107 | |||
108 | config GENERIC_CLOCKEVENTS | ||
109 | def_bool y | ||
110 | |||
111 | config ARCH_CLOCKSOURCE_DATA | ||
112 | def_bool y | ||
113 | depends on X86_64 | ||
114 | |||
115 | config GENERIC_CLOCKEVENTS_BROADCAST | ||
116 | def_bool y | ||
117 | depends on X86_64 || (X86_32 && X86_LOCAL_APIC) | ||
118 | |||
119 | config LOCKDEP_SUPPORT | 112 | config LOCKDEP_SUPPORT |
120 | def_bool y | 113 | def_bool y |
121 | 114 | ||
@@ -166,10 +159,6 @@ config RWSEM_XCHGADD_ALGORITHM | |||
166 | config GENERIC_CALIBRATE_DELAY | 159 | config GENERIC_CALIBRATE_DELAY |
167 | def_bool y | 160 | def_bool y |
168 | 161 | ||
169 | config GENERIC_TIME_VSYSCALL | ||
170 | bool | ||
171 | default X86_64 | ||
172 | |||
173 | config ARCH_HAS_CPU_RELAX | 162 | config ARCH_HAS_CPU_RELAX |
174 | def_bool y | 163 | def_bool y |
175 | 164 | ||
@@ -236,13 +225,13 @@ config ARCH_HWEIGHT_CFLAGS | |||
236 | default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 | 225 | default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 |
237 | default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 | 226 | default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 |
238 | 227 | ||
239 | config KTIME_SCALAR | ||
240 | def_bool X86_32 | ||
241 | |||
242 | config ARCH_CPU_PROBE_RELEASE | 228 | config ARCH_CPU_PROBE_RELEASE |
243 | def_bool y | 229 | def_bool y |
244 | depends on HOTPLUG_CPU | 230 | depends on HOTPLUG_CPU |
245 | 231 | ||
232 | config ARCH_SUPPORTS_UPROBES | ||
233 | def_bool y | ||
234 | |||
246 | source "init/Kconfig" | 235 | source "init/Kconfig" |
247 | source "kernel/Kconfig.freezer" | 236 | source "kernel/Kconfig.freezer" |
248 | 237 | ||
@@ -258,8 +247,6 @@ config ZONE_DMA | |||
258 | 247 | ||
259 | If unsure, say Y. | 248 | If unsure, say Y. |
260 | 249 | ||
261 | source "kernel/time/Kconfig" | ||
262 | |||
263 | config SMP | 250 | config SMP |
264 | bool "Symmetric multi-processing support" | 251 | bool "Symmetric multi-processing support" |
265 | ---help--- | 252 | ---help--- |
@@ -1519,6 +1506,8 @@ config EFI_STUB | |||
1519 | This kernel feature allows a bzImage to be loaded directly | 1506 | This kernel feature allows a bzImage to be loaded directly |
1520 | by EFI firmware without the use of a bootloader. | 1507 | by EFI firmware without the use of a bootloader. |
1521 | 1508 | ||
1509 | See Documentation/x86/efi-stub.txt for more information. | ||
1510 | |||
1522 | config SECCOMP | 1511 | config SECCOMP |
1523 | def_bool y | 1512 | def_bool y |
1524 | prompt "Enable seccomp to safely compute untrusted bytecode" | 1513 | prompt "Enable seccomp to safely compute untrusted bytecode" |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1f252143455..b0c5276861e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -49,6 +49,9 @@ else | |||
49 | KBUILD_AFLAGS += -m64 | 49 | KBUILD_AFLAGS += -m64 |
50 | KBUILD_CFLAGS += -m64 | 50 | KBUILD_CFLAGS += -m64 |
51 | 51 | ||
52 | # Use -mpreferred-stack-boundary=3 if supported. | ||
53 | KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3) | ||
54 | |||
52 | # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) | 55 | # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) |
53 | cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) | 56 | cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) |
54 | cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) | 57 | cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) |
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index cb62f786990..10f6b1178c6 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c | |||
@@ -1,5 +1,7 @@ | |||
1 | #include "misc.h" | 1 | #include "misc.h" |
2 | 2 | ||
3 | #ifdef CONFIG_EARLY_PRINTK | ||
4 | |||
3 | static unsigned long fs; | 5 | static unsigned long fs; |
4 | static inline void set_fs(unsigned long seg) | 6 | static inline void set_fs(unsigned long seg) |
5 | { | 7 | { |
@@ -19,3 +21,5 @@ int cmdline_find_option_bool(const char *option) | |||
19 | { | 21 | { |
20 | return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option); | 22 | return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option); |
21 | } | 23 | } |
24 | |||
25 | #endif | ||
diff --git a/arch/x86/boot/compressed/early_serial_console.c b/arch/x86/boot/compressed/early_serial_console.c index 261e81fb958..d3d003cb548 100644 --- a/arch/x86/boot/compressed/early_serial_console.c +++ b/arch/x86/boot/compressed/early_serial_console.c | |||
@@ -1,5 +1,9 @@ | |||
1 | #include "misc.h" | 1 | #include "misc.h" |
2 | 2 | ||
3 | #ifdef CONFIG_EARLY_PRINTK | ||
4 | |||
3 | int early_serial_base; | 5 | int early_serial_base; |
4 | 6 | ||
5 | #include "../early_serial_console.c" | 7 | #include "../early_serial_console.c" |
8 | |||
9 | #endif | ||
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 2c14e76bb4c..4e85f5f8583 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c | |||
@@ -16,6 +16,26 @@ | |||
16 | 16 | ||
17 | static efi_system_table_t *sys_table; | 17 | static efi_system_table_t *sys_table; |
18 | 18 | ||
19 | static void efi_printk(char *str) | ||
20 | { | ||
21 | char *s8; | ||
22 | |||
23 | for (s8 = str; *s8; s8++) { | ||
24 | struct efi_simple_text_output_protocol *out; | ||
25 | efi_char16_t ch[2] = { 0 }; | ||
26 | |||
27 | ch[0] = *s8; | ||
28 | out = (struct efi_simple_text_output_protocol *)sys_table->con_out; | ||
29 | |||
30 | if (*s8 == '\n') { | ||
31 | efi_char16_t nl[2] = { '\r', 0 }; | ||
32 | efi_call_phys2(out->output_string, out, nl); | ||
33 | } | ||
34 | |||
35 | efi_call_phys2(out->output_string, out, ch); | ||
36 | } | ||
37 | } | ||
38 | |||
19 | static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size, | 39 | static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size, |
20 | unsigned long *desc_size) | 40 | unsigned long *desc_size) |
21 | { | 41 | { |
@@ -531,8 +551,10 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image, | |||
531 | EFI_LOADER_DATA, | 551 | EFI_LOADER_DATA, |
532 | nr_initrds * sizeof(*initrds), | 552 | nr_initrds * sizeof(*initrds), |
533 | &initrds); | 553 | &initrds); |
534 | if (status != EFI_SUCCESS) | 554 | if (status != EFI_SUCCESS) { |
555 | efi_printk("Failed to alloc mem for initrds\n"); | ||
535 | goto fail; | 556 | goto fail; |
557 | } | ||
536 | 558 | ||
537 | str = (char *)(unsigned long)hdr->cmd_line_ptr; | 559 | str = (char *)(unsigned long)hdr->cmd_line_ptr; |
538 | for (i = 0; i < nr_initrds; i++) { | 560 | for (i = 0; i < nr_initrds; i++) { |
@@ -575,32 +597,42 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image, | |||
575 | 597 | ||
576 | status = efi_call_phys3(boottime->handle_protocol, | 598 | status = efi_call_phys3(boottime->handle_protocol, |
577 | image->device_handle, &fs_proto, &io); | 599 | image->device_handle, &fs_proto, &io); |
578 | if (status != EFI_SUCCESS) | 600 | if (status != EFI_SUCCESS) { |
601 | efi_printk("Failed to handle fs_proto\n"); | ||
579 | goto free_initrds; | 602 | goto free_initrds; |
603 | } | ||
580 | 604 | ||
581 | status = efi_call_phys2(io->open_volume, io, &fh); | 605 | status = efi_call_phys2(io->open_volume, io, &fh); |
582 | if (status != EFI_SUCCESS) | 606 | if (status != EFI_SUCCESS) { |
607 | efi_printk("Failed to open volume\n"); | ||
583 | goto free_initrds; | 608 | goto free_initrds; |
609 | } | ||
584 | } | 610 | } |
585 | 611 | ||
586 | status = efi_call_phys5(fh->open, fh, &h, filename_16, | 612 | status = efi_call_phys5(fh->open, fh, &h, filename_16, |
587 | EFI_FILE_MODE_READ, (u64)0); | 613 | EFI_FILE_MODE_READ, (u64)0); |
588 | if (status != EFI_SUCCESS) | 614 | if (status != EFI_SUCCESS) { |
615 | efi_printk("Failed to open initrd file\n"); | ||
589 | goto close_handles; | 616 | goto close_handles; |
617 | } | ||
590 | 618 | ||
591 | initrd->handle = h; | 619 | initrd->handle = h; |
592 | 620 | ||
593 | info_sz = 0; | 621 | info_sz = 0; |
594 | status = efi_call_phys4(h->get_info, h, &info_guid, | 622 | status = efi_call_phys4(h->get_info, h, &info_guid, |
595 | &info_sz, NULL); | 623 | &info_sz, NULL); |
596 | if (status != EFI_BUFFER_TOO_SMALL) | 624 | if (status != EFI_BUFFER_TOO_SMALL) { |
625 | efi_printk("Failed to get initrd info size\n"); | ||
597 | goto close_handles; | 626 | goto close_handles; |
627 | } | ||
598 | 628 | ||
599 | grow: | 629 | grow: |
600 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 630 | status = efi_call_phys3(sys_table->boottime->allocate_pool, |
601 | EFI_LOADER_DATA, info_sz, &info); | 631 | EFI_LOADER_DATA, info_sz, &info); |
602 | if (status != EFI_SUCCESS) | 632 | if (status != EFI_SUCCESS) { |
633 | efi_printk("Failed to alloc mem for initrd info\n"); | ||
603 | goto close_handles; | 634 | goto close_handles; |
635 | } | ||
604 | 636 | ||
605 | status = efi_call_phys4(h->get_info, h, &info_guid, | 637 | status = efi_call_phys4(h->get_info, h, &info_guid, |
606 | &info_sz, info); | 638 | &info_sz, info); |
@@ -612,8 +644,10 @@ grow: | |||
612 | file_sz = info->file_size; | 644 | file_sz = info->file_size; |
613 | efi_call_phys1(sys_table->boottime->free_pool, info); | 645 | efi_call_phys1(sys_table->boottime->free_pool, info); |
614 | 646 | ||
615 | if (status != EFI_SUCCESS) | 647 | if (status != EFI_SUCCESS) { |
648 | efi_printk("Failed to get initrd info\n"); | ||
616 | goto close_handles; | 649 | goto close_handles; |
650 | } | ||
617 | 651 | ||
618 | initrd->size = file_sz; | 652 | initrd->size = file_sz; |
619 | initrd_total += file_sz; | 653 | initrd_total += file_sz; |
@@ -629,11 +663,14 @@ grow: | |||
629 | */ | 663 | */ |
630 | status = high_alloc(initrd_total, 0x1000, | 664 | status = high_alloc(initrd_total, 0x1000, |
631 | &initrd_addr, hdr->initrd_addr_max); | 665 | &initrd_addr, hdr->initrd_addr_max); |
632 | if (status != EFI_SUCCESS) | 666 | if (status != EFI_SUCCESS) { |
667 | efi_printk("Failed to alloc highmem for initrds\n"); | ||
633 | goto close_handles; | 668 | goto close_handles; |
669 | } | ||
634 | 670 | ||
635 | /* We've run out of free low memory. */ | 671 | /* We've run out of free low memory. */ |
636 | if (initrd_addr > hdr->initrd_addr_max) { | 672 | if (initrd_addr > hdr->initrd_addr_max) { |
673 | efi_printk("We've run out of free low memory\n"); | ||
637 | status = EFI_INVALID_PARAMETER; | 674 | status = EFI_INVALID_PARAMETER; |
638 | goto free_initrd_total; | 675 | goto free_initrd_total; |
639 | } | 676 | } |
@@ -652,8 +689,10 @@ grow: | |||
652 | status = efi_call_phys3(fh->read, | 689 | status = efi_call_phys3(fh->read, |
653 | initrds[j].handle, | 690 | initrds[j].handle, |
654 | &chunksize, addr); | 691 | &chunksize, addr); |
655 | if (status != EFI_SUCCESS) | 692 | if (status != EFI_SUCCESS) { |
693 | efi_printk("Failed to read initrd\n"); | ||
656 | goto free_initrd_total; | 694 | goto free_initrd_total; |
695 | } | ||
657 | addr += chunksize; | 696 | addr += chunksize; |
658 | size -= chunksize; | 697 | size -= chunksize; |
659 | } | 698 | } |
@@ -674,7 +713,7 @@ free_initrd_total: | |||
674 | low_free(initrd_total, initrd_addr); | 713 | low_free(initrd_total, initrd_addr); |
675 | 714 | ||
676 | close_handles: | 715 | close_handles: |
677 | for (k = j; k < nr_initrds; k++) | 716 | for (k = j; k < i; k++) |
678 | efi_call_phys1(fh->close, initrds[k].handle); | 717 | efi_call_phys1(fh->close, initrds[k].handle); |
679 | free_initrds: | 718 | free_initrds: |
680 | efi_call_phys1(sys_table->boottime->free_pool, initrds); | 719 | efi_call_phys1(sys_table->boottime->free_pool, initrds); |
@@ -732,8 +771,10 @@ static efi_status_t make_boot_params(struct boot_params *boot_params, | |||
732 | options_size++; /* NUL termination */ | 771 | options_size++; /* NUL termination */ |
733 | 772 | ||
734 | status = low_alloc(options_size, 1, &cmdline); | 773 | status = low_alloc(options_size, 1, &cmdline); |
735 | if (status != EFI_SUCCESS) | 774 | if (status != EFI_SUCCESS) { |
775 | efi_printk("Failed to alloc mem for cmdline\n"); | ||
736 | goto fail; | 776 | goto fail; |
777 | } | ||
737 | 778 | ||
738 | s1 = (u8 *)(unsigned long)cmdline; | 779 | s1 = (u8 *)(unsigned long)cmdline; |
739 | s2 = (u16 *)options; | 780 | s2 = (u16 *)options; |
@@ -895,12 +936,16 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | |||
895 | 936 | ||
896 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | 937 | status = efi_call_phys3(sys_table->boottime->handle_protocol, |
897 | handle, &proto, (void *)&image); | 938 | handle, &proto, (void *)&image); |
898 | if (status != EFI_SUCCESS) | 939 | if (status != EFI_SUCCESS) { |
940 | efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); | ||
899 | goto fail; | 941 | goto fail; |
942 | } | ||
900 | 943 | ||
901 | status = low_alloc(0x4000, 1, (unsigned long *)&boot_params); | 944 | status = low_alloc(0x4000, 1, (unsigned long *)&boot_params); |
902 | if (status != EFI_SUCCESS) | 945 | if (status != EFI_SUCCESS) { |
946 | efi_printk("Failed to alloc lowmem for boot params\n"); | ||
903 | goto fail; | 947 | goto fail; |
948 | } | ||
904 | 949 | ||
905 | memset(boot_params, 0x0, 0x4000); | 950 | memset(boot_params, 0x0, 0x4000); |
906 | 951 | ||
@@ -933,8 +978,10 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | |||
933 | if (status != EFI_SUCCESS) { | 978 | if (status != EFI_SUCCESS) { |
934 | status = low_alloc(hdr->init_size, hdr->kernel_alignment, | 979 | status = low_alloc(hdr->init_size, hdr->kernel_alignment, |
935 | &start); | 980 | &start); |
936 | if (status != EFI_SUCCESS) | 981 | if (status != EFI_SUCCESS) { |
982 | efi_printk("Failed to alloc mem for kernel\n"); | ||
937 | goto fail; | 983 | goto fail; |
984 | } | ||
938 | } | 985 | } |
939 | 986 | ||
940 | hdr->code32_start = (__u32)start; | 987 | hdr->code32_start = (__u32)start; |
@@ -945,19 +992,25 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | |||
945 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 992 | status = efi_call_phys3(sys_table->boottime->allocate_pool, |
946 | EFI_LOADER_DATA, sizeof(*gdt), | 993 | EFI_LOADER_DATA, sizeof(*gdt), |
947 | (void **)&gdt); | 994 | (void **)&gdt); |
948 | if (status != EFI_SUCCESS) | 995 | if (status != EFI_SUCCESS) { |
996 | efi_printk("Failed to alloc mem for gdt structure\n"); | ||
949 | goto fail; | 997 | goto fail; |
998 | } | ||
950 | 999 | ||
951 | gdt->size = 0x800; | 1000 | gdt->size = 0x800; |
952 | status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address); | 1001 | status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address); |
953 | if (status != EFI_SUCCESS) | 1002 | if (status != EFI_SUCCESS) { |
1003 | efi_printk("Failed to alloc mem for gdt\n"); | ||
954 | goto fail; | 1004 | goto fail; |
1005 | } | ||
955 | 1006 | ||
956 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 1007 | status = efi_call_phys3(sys_table->boottime->allocate_pool, |
957 | EFI_LOADER_DATA, sizeof(*idt), | 1008 | EFI_LOADER_DATA, sizeof(*idt), |
958 | (void **)&idt); | 1009 | (void **)&idt); |
959 | if (status != EFI_SUCCESS) | 1010 | if (status != EFI_SUCCESS) { |
1011 | efi_printk("Failed to alloc mem for idt structure\n"); | ||
960 | goto fail; | 1012 | goto fail; |
1013 | } | ||
961 | 1014 | ||
962 | idt->size = 0; | 1015 | idt->size = 0; |
963 | idt->address = 0; | 1016 | idt->address = 0; |
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 39251663e65..3b6e15627c5 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h | |||
@@ -58,4 +58,10 @@ struct efi_uga_draw_protocol { | |||
58 | void *blt; | 58 | void *blt; |
59 | }; | 59 | }; |
60 | 60 | ||
61 | struct efi_simple_text_output_protocol { | ||
62 | void *reset; | ||
63 | void *output_string; | ||
64 | void *test_string; | ||
65 | }; | ||
66 | |||
61 | #endif /* BOOT_COMPRESSED_EBOOT_H */ | 67 | #endif /* BOOT_COMPRESSED_EBOOT_H */ |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 7116dcba0c9..88f7ff6da40 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -108,8 +108,6 @@ static void error(char *m); | |||
108 | * This is set up by the setup-routine at boot-time | 108 | * This is set up by the setup-routine at boot-time |
109 | */ | 109 | */ |
110 | struct boot_params *real_mode; /* Pointer to real-mode data */ | 110 | struct boot_params *real_mode; /* Pointer to real-mode data */ |
111 | static int quiet; | ||
112 | static int debug; | ||
113 | 111 | ||
114 | void *memset(void *s, int c, size_t n); | 112 | void *memset(void *s, int c, size_t n); |
115 | void *memcpy(void *dest, const void *src, size_t n); | 113 | void *memcpy(void *dest, const void *src, size_t n); |
@@ -170,15 +168,11 @@ static void serial_putchar(int ch) | |||
170 | outb(ch, early_serial_base + TXR); | 168 | outb(ch, early_serial_base + TXR); |
171 | } | 169 | } |
172 | 170 | ||
173 | void __putstr(int error, const char *s) | 171 | void __putstr(const char *s) |
174 | { | 172 | { |
175 | int x, y, pos; | 173 | int x, y, pos; |
176 | char c; | 174 | char c; |
177 | 175 | ||
178 | #ifndef CONFIG_X86_VERBOSE_BOOTUP | ||
179 | if (!error) | ||
180 | return; | ||
181 | #endif | ||
182 | if (early_serial_base) { | 176 | if (early_serial_base) { |
183 | const char *str = s; | 177 | const char *str = s; |
184 | while (*str) { | 178 | while (*str) { |
@@ -265,9 +259,9 @@ void *memcpy(void *dest, const void *src, size_t n) | |||
265 | 259 | ||
266 | static void error(char *x) | 260 | static void error(char *x) |
267 | { | 261 | { |
268 | __putstr(1, "\n\n"); | 262 | error_putstr("\n\n"); |
269 | __putstr(1, x); | 263 | error_putstr(x); |
270 | __putstr(1, "\n\n -- System halted"); | 264 | error_putstr("\n\n -- System halted"); |
271 | 265 | ||
272 | while (1) | 266 | while (1) |
273 | asm("hlt"); | 267 | asm("hlt"); |
@@ -294,8 +288,7 @@ static void parse_elf(void *output) | |||
294 | return; | 288 | return; |
295 | } | 289 | } |
296 | 290 | ||
297 | if (!quiet) | 291 | debug_putstr("Parsing ELF... "); |
298 | putstr("Parsing ELF... "); | ||
299 | 292 | ||
300 | phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); | 293 | phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); |
301 | if (!phdrs) | 294 | if (!phdrs) |
@@ -332,11 +325,6 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
332 | { | 325 | { |
333 | real_mode = rmode; | 326 | real_mode = rmode; |
334 | 327 | ||
335 | if (cmdline_find_option_bool("quiet")) | ||
336 | quiet = 1; | ||
337 | if (cmdline_find_option_bool("debug")) | ||
338 | debug = 1; | ||
339 | |||
340 | if (real_mode->screen_info.orig_video_mode == 7) { | 328 | if (real_mode->screen_info.orig_video_mode == 7) { |
341 | vidmem = (char *) 0xb0000; | 329 | vidmem = (char *) 0xb0000; |
342 | vidport = 0x3b4; | 330 | vidport = 0x3b4; |
@@ -349,8 +337,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
349 | cols = real_mode->screen_info.orig_video_cols; | 337 | cols = real_mode->screen_info.orig_video_cols; |
350 | 338 | ||
351 | console_init(); | 339 | console_init(); |
352 | if (debug) | 340 | debug_putstr("early console in decompress_kernel\n"); |
353 | putstr("early console in decompress_kernel\n"); | ||
354 | 341 | ||
355 | free_mem_ptr = heap; /* Heap */ | 342 | free_mem_ptr = heap; /* Heap */ |
356 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; | 343 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; |
@@ -369,11 +356,9 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
369 | error("Wrong destination address"); | 356 | error("Wrong destination address"); |
370 | #endif | 357 | #endif |
371 | 358 | ||
372 | if (!quiet) | 359 | debug_putstr("\nDecompressing Linux... "); |
373 | putstr("\nDecompressing Linux... "); | ||
374 | decompress(input_data, input_len, NULL, NULL, output, NULL, error); | 360 | decompress(input_data, input_len, NULL, NULL, output, NULL, error); |
375 | parse_elf(output); | 361 | parse_elf(output); |
376 | if (!quiet) | 362 | debug_putstr("done.\nBooting the kernel.\n"); |
377 | putstr("done.\nBooting the kernel.\n"); | ||
378 | return; | 363 | return; |
379 | } | 364 | } |
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 3f19c81a620..0e6dc0ee0ee 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h | |||
@@ -24,9 +24,21 @@ | |||
24 | 24 | ||
25 | /* misc.c */ | 25 | /* misc.c */ |
26 | extern struct boot_params *real_mode; /* Pointer to real-mode data */ | 26 | extern struct boot_params *real_mode; /* Pointer to real-mode data */ |
27 | void __putstr(int error, const char *s); | 27 | void __putstr(const char *s); |
28 | #define putstr(__x) __putstr(0, __x) | 28 | #define error_putstr(__x) __putstr(__x) |
29 | #define puts(__x) __putstr(0, __x) | 29 | |
30 | #ifdef CONFIG_X86_VERBOSE_BOOTUP | ||
31 | |||
32 | #define debug_putstr(__x) __putstr(__x) | ||
33 | |||
34 | #else | ||
35 | |||
36 | static inline void debug_putstr(const char *s) | ||
37 | { } | ||
38 | |||
39 | #endif | ||
40 | |||
41 | #ifdef CONFIG_EARLY_PRINTK | ||
30 | 42 | ||
31 | /* cmdline.c */ | 43 | /* cmdline.c */ |
32 | int cmdline_find_option(const char *option, char *buffer, int bufsize); | 44 | int cmdline_find_option(const char *option, char *buffer, int bufsize); |
@@ -36,4 +48,13 @@ int cmdline_find_option_bool(const char *option); | |||
36 | extern int early_serial_base; | 48 | extern int early_serial_base; |
37 | void console_init(void); | 49 | void console_init(void); |
38 | 50 | ||
51 | #else | ||
52 | |||
53 | /* early_serial_console.c */ | ||
54 | static const int early_serial_base; | ||
55 | static inline void console_init(void) | ||
56 | { } | ||
57 | |||
58 | #endif | ||
59 | |||
39 | #endif | 60 | #endif |
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index fde5bde3b60..9b9c6475b36 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
@@ -94,10 +94,10 @@ bs_die: | |||
94 | 94 | ||
95 | .section ".bsdata", "a" | 95 | .section ".bsdata", "a" |
96 | bugger_off_msg: | 96 | bugger_off_msg: |
97 | .ascii "Direct booting from floppy is no longer supported.\r\n" | 97 | .ascii "Direct floppy boot is not supported. " |
98 | .ascii "Please use a boot loader program instead.\r\n" | 98 | .ascii "Use a boot loader program instead.\r\n" |
99 | .ascii "\n" | 99 | .ascii "\n" |
100 | .ascii "Remove disk and press any key to reboot . . .\r\n" | 100 | .ascii "Remove disk and press any key to reboot ...\r\n" |
101 | .byte 0 | 101 | .byte 0 |
102 | 102 | ||
103 | #ifdef CONFIG_EFI_STUB | 103 | #ifdef CONFIG_EFI_STUB |
@@ -111,7 +111,7 @@ coff_header: | |||
111 | #else | 111 | #else |
112 | .word 0x8664 # x86-64 | 112 | .word 0x8664 # x86-64 |
113 | #endif | 113 | #endif |
114 | .word 2 # nr_sections | 114 | .word 3 # nr_sections |
115 | .long 0 # TimeDateStamp | 115 | .long 0 # TimeDateStamp |
116 | .long 0 # PointerToSymbolTable | 116 | .long 0 # PointerToSymbolTable |
117 | .long 1 # NumberOfSymbols | 117 | .long 1 # NumberOfSymbols |
@@ -158,8 +158,8 @@ extra_header_fields: | |||
158 | #else | 158 | #else |
159 | .quad 0 # ImageBase | 159 | .quad 0 # ImageBase |
160 | #endif | 160 | #endif |
161 | .long 0x1000 # SectionAlignment | 161 | .long 0x20 # SectionAlignment |
162 | .long 0x200 # FileAlignment | 162 | .long 0x20 # FileAlignment |
163 | .word 0 # MajorOperatingSystemVersion | 163 | .word 0 # MajorOperatingSystemVersion |
164 | .word 0 # MinorOperatingSystemVersion | 164 | .word 0 # MinorOperatingSystemVersion |
165 | .word 0 # MajorImageVersion | 165 | .word 0 # MajorImageVersion |
@@ -200,8 +200,10 @@ extra_header_fields: | |||
200 | 200 | ||
201 | # Section table | 201 | # Section table |
202 | section_table: | 202 | section_table: |
203 | .ascii ".text" | 203 | # |
204 | .byte 0 | 204 | # The offset & size fields are filled in by build.c. |
205 | # | ||
206 | .ascii ".setup" | ||
205 | .byte 0 | 207 | .byte 0 |
206 | .byte 0 | 208 | .byte 0 |
207 | .long 0 | 209 | .long 0 |
@@ -217,9 +219,8 @@ section_table: | |||
217 | 219 | ||
218 | # | 220 | # |
219 | # The EFI application loader requires a relocation section | 221 | # The EFI application loader requires a relocation section |
220 | # because EFI applications must be relocatable. But since | 222 | # because EFI applications must be relocatable. The .reloc |
221 | # we don't need the loader to fixup any relocs for us, we | 223 | # offset & size fields are filled in by build.c. |
222 | # just create an empty (zero-length) .reloc section header. | ||
223 | # | 224 | # |
224 | .ascii ".reloc" | 225 | .ascii ".reloc" |
225 | .byte 0 | 226 | .byte 0 |
@@ -233,6 +234,25 @@ section_table: | |||
233 | .word 0 # NumberOfRelocations | 234 | .word 0 # NumberOfRelocations |
234 | .word 0 # NumberOfLineNumbers | 235 | .word 0 # NumberOfLineNumbers |
235 | .long 0x42100040 # Characteristics (section flags) | 236 | .long 0x42100040 # Characteristics (section flags) |
237 | |||
238 | # | ||
239 | # The offset & size fields are filled in by build.c. | ||
240 | # | ||
241 | .ascii ".text" | ||
242 | .byte 0 | ||
243 | .byte 0 | ||
244 | .byte 0 | ||
245 | .long 0 | ||
246 | .long 0x0 # startup_{32,64} | ||
247 | .long 0 # Size of initialized data | ||
248 | # on disk | ||
249 | .long 0x0 # startup_{32,64} | ||
250 | .long 0 # PointerToRelocations | ||
251 | .long 0 # PointerToLineNumbers | ||
252 | .word 0 # NumberOfRelocations | ||
253 | .word 0 # NumberOfLineNumbers | ||
254 | .long 0x60500020 # Characteristics (section flags) | ||
255 | |||
236 | #endif /* CONFIG_EFI_STUB */ | 256 | #endif /* CONFIG_EFI_STUB */ |
237 | 257 | ||
238 | # Kernel attributes; used by setup. This is part 1 of the | 258 | # Kernel attributes; used by setup. This is part 1 of the |
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 3f61f6e2b46..4b8e165ee57 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c | |||
@@ -50,6 +50,8 @@ typedef unsigned int u32; | |||
50 | u8 buf[SETUP_SECT_MAX*512]; | 50 | u8 buf[SETUP_SECT_MAX*512]; |
51 | int is_big_kernel; | 51 | int is_big_kernel; |
52 | 52 | ||
53 | #define PECOFF_RELOC_RESERVE 0x20 | ||
54 | |||
53 | /*----------------------------------------------------------------------*/ | 55 | /*----------------------------------------------------------------------*/ |
54 | 56 | ||
55 | static const u32 crctab32[] = { | 57 | static const u32 crctab32[] = { |
@@ -133,11 +135,103 @@ static void usage(void) | |||
133 | die("Usage: build setup system [> image]"); | 135 | die("Usage: build setup system [> image]"); |
134 | } | 136 | } |
135 | 137 | ||
136 | int main(int argc, char ** argv) | ||
137 | { | ||
138 | #ifdef CONFIG_EFI_STUB | 138 | #ifdef CONFIG_EFI_STUB |
139 | unsigned int file_sz, pe_header; | 139 | |
140 | static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) | ||
141 | { | ||
142 | unsigned int pe_header; | ||
143 | unsigned short num_sections; | ||
144 | u8 *section; | ||
145 | |||
146 | pe_header = get_unaligned_le32(&buf[0x3c]); | ||
147 | num_sections = get_unaligned_le16(&buf[pe_header + 6]); | ||
148 | |||
149 | #ifdef CONFIG_X86_32 | ||
150 | section = &buf[pe_header + 0xa8]; | ||
151 | #else | ||
152 | section = &buf[pe_header + 0xb8]; | ||
140 | #endif | 153 | #endif |
154 | |||
155 | while (num_sections > 0) { | ||
156 | if (strncmp((char*)section, section_name, 8) == 0) { | ||
157 | /* section header size field */ | ||
158 | put_unaligned_le32(size, section + 0x8); | ||
159 | |||
160 | /* section header vma field */ | ||
161 | put_unaligned_le32(offset, section + 0xc); | ||
162 | |||
163 | /* section header 'size of initialised data' field */ | ||
164 | put_unaligned_le32(size, section + 0x10); | ||
165 | |||
166 | /* section header 'file offset' field */ | ||
167 | put_unaligned_le32(offset, section + 0x14); | ||
168 | |||
169 | break; | ||
170 | } | ||
171 | section += 0x28; | ||
172 | num_sections--; | ||
173 | } | ||
174 | } | ||
175 | |||
176 | static void update_pecoff_setup_and_reloc(unsigned int size) | ||
177 | { | ||
178 | u32 setup_offset = 0x200; | ||
179 | u32 reloc_offset = size - PECOFF_RELOC_RESERVE; | ||
180 | u32 setup_size = reloc_offset - setup_offset; | ||
181 | |||
182 | update_pecoff_section_header(".setup", setup_offset, setup_size); | ||
183 | update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE); | ||
184 | |||
185 | /* | ||
186 | * Modify .reloc section contents with a single entry. The | ||
187 | * relocation is applied to offset 10 of the relocation section. | ||
188 | */ | ||
189 | put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]); | ||
190 | put_unaligned_le32(10, &buf[reloc_offset + 4]); | ||
191 | } | ||
192 | |||
193 | static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) | ||
194 | { | ||
195 | unsigned int pe_header; | ||
196 | unsigned int text_sz = file_sz - text_start; | ||
197 | |||
198 | pe_header = get_unaligned_le32(&buf[0x3c]); | ||
199 | |||
200 | /* Size of image */ | ||
201 | put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); | ||
202 | |||
203 | /* | ||
204 | * Size of code: Subtract the size of the first sector (512 bytes) | ||
205 | * which includes the header. | ||
206 | */ | ||
207 | put_unaligned_le32(file_sz - 512, &buf[pe_header + 0x1c]); | ||
208 | |||
209 | #ifdef CONFIG_X86_32 | ||
210 | /* | ||
211 | * Address of entry point. | ||
212 | * | ||
213 | * The EFI stub entry point is +16 bytes from the start of | ||
214 | * the .text section. | ||
215 | */ | ||
216 | put_unaligned_le32(text_start + 16, &buf[pe_header + 0x28]); | ||
217 | #else | ||
218 | /* | ||
219 | * Address of entry point. startup_32 is at the beginning and | ||
220 | * the 64-bit entry point (startup_64) is always 512 bytes | ||
221 | * after. The EFI stub entry point is 16 bytes after that, as | ||
222 | * the first instruction allows legacy loaders to jump over | ||
223 | * the EFI stub initialisation | ||
224 | */ | ||
225 | put_unaligned_le32(text_start + 528, &buf[pe_header + 0x28]); | ||
226 | #endif /* CONFIG_X86_32 */ | ||
227 | |||
228 | update_pecoff_section_header(".text", text_start, text_sz); | ||
229 | } | ||
230 | |||
231 | #endif /* CONFIG_EFI_STUB */ | ||
232 | |||
233 | int main(int argc, char ** argv) | ||
234 | { | ||
141 | unsigned int i, sz, setup_sectors; | 235 | unsigned int i, sz, setup_sectors; |
142 | int c; | 236 | int c; |
143 | u32 sys_size; | 237 | u32 sys_size; |
@@ -163,6 +257,12 @@ int main(int argc, char ** argv) | |||
163 | die("Boot block hasn't got boot flag (0xAA55)"); | 257 | die("Boot block hasn't got boot flag (0xAA55)"); |
164 | fclose(file); | 258 | fclose(file); |
165 | 259 | ||
260 | #ifdef CONFIG_EFI_STUB | ||
261 | /* Reserve 0x20 bytes for .reloc section */ | ||
262 | memset(buf+c, 0, PECOFF_RELOC_RESERVE); | ||
263 | c += PECOFF_RELOC_RESERVE; | ||
264 | #endif | ||
265 | |||
166 | /* Pad unused space with zeros */ | 266 | /* Pad unused space with zeros */ |
167 | setup_sectors = (c + 511) / 512; | 267 | setup_sectors = (c + 511) / 512; |
168 | if (setup_sectors < SETUP_SECT_MIN) | 268 | if (setup_sectors < SETUP_SECT_MIN) |
@@ -170,6 +270,10 @@ int main(int argc, char ** argv) | |||
170 | i = setup_sectors*512; | 270 | i = setup_sectors*512; |
171 | memset(buf+c, 0, i-c); | 271 | memset(buf+c, 0, i-c); |
172 | 272 | ||
273 | #ifdef CONFIG_EFI_STUB | ||
274 | update_pecoff_setup_and_reloc(i); | ||
275 | #endif | ||
276 | |||
173 | /* Set the default root device */ | 277 | /* Set the default root device */ |
174 | put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); | 278 | put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); |
175 | 279 | ||
@@ -194,66 +298,8 @@ int main(int argc, char ** argv) | |||
194 | put_unaligned_le32(sys_size, &buf[0x1f4]); | 298 | put_unaligned_le32(sys_size, &buf[0x1f4]); |
195 | 299 | ||
196 | #ifdef CONFIG_EFI_STUB | 300 | #ifdef CONFIG_EFI_STUB |
197 | file_sz = sz + i + ((sys_size * 16) - sz); | 301 | update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz)); |
198 | 302 | #endif | |
199 | pe_header = get_unaligned_le32(&buf[0x3c]); | ||
200 | |||
201 | /* Size of image */ | ||
202 | put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); | ||
203 | |||
204 | /* | ||
205 | * Subtract the size of the first section (512 bytes) which | ||
206 | * includes the header and .reloc section. The remaining size | ||
207 | * is that of the .text section. | ||
208 | */ | ||
209 | file_sz -= 512; | ||
210 | |||
211 | /* Size of code */ | ||
212 | put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]); | ||
213 | |||
214 | #ifdef CONFIG_X86_32 | ||
215 | /* | ||
216 | * Address of entry point. | ||
217 | * | ||
218 | * The EFI stub entry point is +16 bytes from the start of | ||
219 | * the .text section. | ||
220 | */ | ||
221 | put_unaligned_le32(i + 16, &buf[pe_header + 0x28]); | ||
222 | |||
223 | /* .text size */ | ||
224 | put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]); | ||
225 | |||
226 | /* .text vma */ | ||
227 | put_unaligned_le32(0x200, &buf[pe_header + 0xb4]); | ||
228 | |||
229 | /* .text size of initialised data */ | ||
230 | put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]); | ||
231 | |||
232 | /* .text file offset */ | ||
233 | put_unaligned_le32(0x200, &buf[pe_header + 0xbc]); | ||
234 | #else | ||
235 | /* | ||
236 | * Address of entry point. startup_32 is at the beginning and | ||
237 | * the 64-bit entry point (startup_64) is always 512 bytes | ||
238 | * after. The EFI stub entry point is 16 bytes after that, as | ||
239 | * the first instruction allows legacy loaders to jump over | ||
240 | * the EFI stub initialisation | ||
241 | */ | ||
242 | put_unaligned_le32(i + 528, &buf[pe_header + 0x28]); | ||
243 | |||
244 | /* .text size */ | ||
245 | put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]); | ||
246 | |||
247 | /* .text vma */ | ||
248 | put_unaligned_le32(0x200, &buf[pe_header + 0xc4]); | ||
249 | |||
250 | /* .text size of initialised data */ | ||
251 | put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]); | ||
252 | |||
253 | /* .text file offset */ | ||
254 | put_unaligned_le32(0x200, &buf[pe_header + 0xcc]); | ||
255 | #endif /* CONFIG_X86_32 */ | ||
256 | #endif /* CONFIG_EFI_STUB */ | ||
257 | 303 | ||
258 | crc = partial_crc32(buf, i, crc); | 304 | crc = partial_crc32(buf, i, crc); |
259 | if (fwrite(buf, 1, i, stdout) != i) | 305 | if (fwrite(buf, 1, i, stdout) != i) |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e191ac048b5..e908e5de82d 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -2,6 +2,9 @@ | |||
2 | # Arch-specific CryptoAPI modules. | 2 | # Arch-specific CryptoAPI modules. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o | ||
6 | obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o | ||
7 | |||
5 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o | 8 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o |
6 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o | 9 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o |
7 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o | 10 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o |
@@ -12,8 +15,10 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o | |||
12 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o | 15 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o |
13 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o | 16 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o |
14 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o | 17 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o |
18 | obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o | ||
15 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o | 19 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o |
16 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o | 20 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o |
21 | obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o | ||
17 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o | 22 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o |
18 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o | 23 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o |
19 | 24 | ||
@@ -30,16 +35,11 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o | |||
30 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o | 35 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o |
31 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o | 36 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o |
32 | twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o | 37 | twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o |
38 | twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o | ||
33 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o | 39 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o |
34 | serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o | 40 | serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o |
41 | serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o | ||
35 | 42 | ||
36 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o | 43 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
37 | |||
38 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | 44 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o |
39 | |||
40 | # enable AVX support only when $(AS) can actually assemble the instructions | ||
41 | ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes) | ||
42 | AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT | ||
43 | CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT | ||
44 | endif | ||
45 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o | 45 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o |
diff --git a/arch/x86/crypto/ablk_helper.c b/arch/x86/crypto/ablk_helper.c new file mode 100644 index 00000000000..43282fe04a8 --- /dev/null +++ b/arch/x86/crypto/ablk_helper.c | |||
@@ -0,0 +1,149 @@ | |||
1 | /* | ||
2 | * Shared async block cipher helpers | ||
3 | * | ||
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Based on aesni-intel_glue.c by: | ||
7 | * Copyright (C) 2008, Intel Corp. | ||
8 | * Author: Huang Ying <ying.huang@intel.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/crypto.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <crypto/algapi.h> | ||
32 | #include <crypto/cryptd.h> | ||
33 | #include <asm/i387.h> | ||
34 | #include <asm/crypto/ablk_helper.h> | ||
35 | |||
36 | int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
37 | unsigned int key_len) | ||
38 | { | ||
39 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
40 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; | ||
41 | int err; | ||
42 | |||
43 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
44 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) | ||
45 | & CRYPTO_TFM_REQ_MASK); | ||
46 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
47 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
48 | & CRYPTO_TFM_RES_MASK); | ||
49 | return err; | ||
50 | } | ||
51 | EXPORT_SYMBOL_GPL(ablk_set_key); | ||
52 | |||
53 | int __ablk_encrypt(struct ablkcipher_request *req) | ||
54 | { | ||
55 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
56 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
57 | struct blkcipher_desc desc; | ||
58 | |||
59 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
60 | desc.info = req->info; | ||
61 | desc.flags = 0; | ||
62 | |||
63 | return crypto_blkcipher_crt(desc.tfm)->encrypt( | ||
64 | &desc, req->dst, req->src, req->nbytes); | ||
65 | } | ||
66 | EXPORT_SYMBOL_GPL(__ablk_encrypt); | ||
67 | |||
68 | int ablk_encrypt(struct ablkcipher_request *req) | ||
69 | { | ||
70 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
71 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
72 | |||
73 | if (!irq_fpu_usable()) { | ||
74 | struct ablkcipher_request *cryptd_req = | ||
75 | ablkcipher_request_ctx(req); | ||
76 | |||
77 | memcpy(cryptd_req, req, sizeof(*req)); | ||
78 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
79 | |||
80 | return crypto_ablkcipher_encrypt(cryptd_req); | ||
81 | } else { | ||
82 | return __ablk_encrypt(req); | ||
83 | } | ||
84 | } | ||
85 | EXPORT_SYMBOL_GPL(ablk_encrypt); | ||
86 | |||
87 | int ablk_decrypt(struct ablkcipher_request *req) | ||
88 | { | ||
89 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
90 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
91 | |||
92 | if (!irq_fpu_usable()) { | ||
93 | struct ablkcipher_request *cryptd_req = | ||
94 | ablkcipher_request_ctx(req); | ||
95 | |||
96 | memcpy(cryptd_req, req, sizeof(*req)); | ||
97 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
98 | |||
99 | return crypto_ablkcipher_decrypt(cryptd_req); | ||
100 | } else { | ||
101 | struct blkcipher_desc desc; | ||
102 | |||
103 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
104 | desc.info = req->info; | ||
105 | desc.flags = 0; | ||
106 | |||
107 | return crypto_blkcipher_crt(desc.tfm)->decrypt( | ||
108 | &desc, req->dst, req->src, req->nbytes); | ||
109 | } | ||
110 | } | ||
111 | EXPORT_SYMBOL_GPL(ablk_decrypt); | ||
112 | |||
113 | void ablk_exit(struct crypto_tfm *tfm) | ||
114 | { | ||
115 | struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); | ||
116 | |||
117 | cryptd_free_ablkcipher(ctx->cryptd_tfm); | ||
118 | } | ||
119 | EXPORT_SYMBOL_GPL(ablk_exit); | ||
120 | |||
121 | int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name) | ||
122 | { | ||
123 | struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); | ||
124 | struct cryptd_ablkcipher *cryptd_tfm; | ||
125 | |||
126 | cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); | ||
127 | if (IS_ERR(cryptd_tfm)) | ||
128 | return PTR_ERR(cryptd_tfm); | ||
129 | |||
130 | ctx->cryptd_tfm = cryptd_tfm; | ||
131 | tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + | ||
132 | crypto_ablkcipher_reqsize(&cryptd_tfm->base); | ||
133 | |||
134 | return 0; | ||
135 | } | ||
136 | EXPORT_SYMBOL_GPL(ablk_init_common); | ||
137 | |||
138 | int ablk_init(struct crypto_tfm *tfm) | ||
139 | { | ||
140 | char drv_name[CRYPTO_MAX_ALG_NAME]; | ||
141 | |||
142 | snprintf(drv_name, sizeof(drv_name), "__driver-%s", | ||
143 | crypto_tfm_alg_driver_name(tfm)); | ||
144 | |||
145 | return ablk_init_common(tfm, drv_name); | ||
146 | } | ||
147 | EXPORT_SYMBOL_GPL(ablk_init); | ||
148 | |||
149 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c index 8efcf42a9d7..59b37deb8c8 100644 --- a/arch/x86/crypto/aes_glue.c +++ b/arch/x86/crypto/aes_glue.c | |||
@@ -5,7 +5,7 @@ | |||
5 | 5 | ||
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <crypto/aes.h> | 7 | #include <crypto/aes.h> |
8 | #include <asm/aes.h> | 8 | #include <asm/crypto/aes.h> |
9 | 9 | ||
10 | asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); | 10 | asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); |
11 | asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); | 11 | asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); |
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index be6d9e365a8..3470624d783 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -2460,10 +2460,12 @@ ENTRY(aesni_cbc_dec) | |||
2460 | pxor IN3, STATE4 | 2460 | pxor IN3, STATE4 |
2461 | movaps IN4, IV | 2461 | movaps IN4, IV |
2462 | #else | 2462 | #else |
2463 | pxor (INP), STATE2 | ||
2464 | pxor 0x10(INP), STATE3 | ||
2465 | pxor IN1, STATE4 | 2463 | pxor IN1, STATE4 |
2466 | movaps IN2, IV | 2464 | movaps IN2, IV |
2465 | movups (INP), IN1 | ||
2466 | pxor IN1, STATE2 | ||
2467 | movups 0x10(INP), IN2 | ||
2468 | pxor IN2, STATE3 | ||
2467 | #endif | 2469 | #endif |
2468 | movups STATE1, (OUTP) | 2470 | movups STATE1, (OUTP) |
2469 | movups STATE2, 0x10(OUTP) | 2471 | movups STATE2, 0x10(OUTP) |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index ac7f5cd019e..34fdcff4d2c 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -30,7 +30,8 @@ | |||
30 | #include <crypto/ctr.h> | 30 | #include <crypto/ctr.h> |
31 | #include <asm/cpu_device_id.h> | 31 | #include <asm/cpu_device_id.h> |
32 | #include <asm/i387.h> | 32 | #include <asm/i387.h> |
33 | #include <asm/aes.h> | 33 | #include <asm/crypto/aes.h> |
34 | #include <asm/crypto/ablk_helper.h> | ||
34 | #include <crypto/scatterwalk.h> | 35 | #include <crypto/scatterwalk.h> |
35 | #include <crypto/internal/aead.h> | 36 | #include <crypto/internal/aead.h> |
36 | #include <linux/workqueue.h> | 37 | #include <linux/workqueue.h> |
@@ -52,10 +53,6 @@ | |||
52 | #define HAS_XTS | 53 | #define HAS_XTS |
53 | #endif | 54 | #endif |
54 | 55 | ||
55 | struct async_aes_ctx { | ||
56 | struct cryptd_ablkcipher *cryptd_tfm; | ||
57 | }; | ||
58 | |||
59 | /* This data is stored at the end of the crypto_tfm struct. | 56 | /* This data is stored at the end of the crypto_tfm struct. |
60 | * It's a type of per "session" data storage location. | 57 | * It's a type of per "session" data storage location. |
61 | * This needs to be 16 byte aligned. | 58 | * This needs to be 16 byte aligned. |
@@ -377,87 +374,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, | |||
377 | } | 374 | } |
378 | #endif | 375 | #endif |
379 | 376 | ||
380 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
381 | unsigned int key_len) | ||
382 | { | ||
383 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
384 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; | ||
385 | int err; | ||
386 | |||
387 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
388 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) | ||
389 | & CRYPTO_TFM_REQ_MASK); | ||
390 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
391 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
392 | & CRYPTO_TFM_RES_MASK); | ||
393 | return err; | ||
394 | } | ||
395 | |||
396 | static int ablk_encrypt(struct ablkcipher_request *req) | ||
397 | { | ||
398 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
399 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
400 | |||
401 | if (!irq_fpu_usable()) { | ||
402 | struct ablkcipher_request *cryptd_req = | ||
403 | ablkcipher_request_ctx(req); | ||
404 | memcpy(cryptd_req, req, sizeof(*req)); | ||
405 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
406 | return crypto_ablkcipher_encrypt(cryptd_req); | ||
407 | } else { | ||
408 | struct blkcipher_desc desc; | ||
409 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
410 | desc.info = req->info; | ||
411 | desc.flags = 0; | ||
412 | return crypto_blkcipher_crt(desc.tfm)->encrypt( | ||
413 | &desc, req->dst, req->src, req->nbytes); | ||
414 | } | ||
415 | } | ||
416 | |||
417 | static int ablk_decrypt(struct ablkcipher_request *req) | ||
418 | { | ||
419 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
420 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
421 | |||
422 | if (!irq_fpu_usable()) { | ||
423 | struct ablkcipher_request *cryptd_req = | ||
424 | ablkcipher_request_ctx(req); | ||
425 | memcpy(cryptd_req, req, sizeof(*req)); | ||
426 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
427 | return crypto_ablkcipher_decrypt(cryptd_req); | ||
428 | } else { | ||
429 | struct blkcipher_desc desc; | ||
430 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
431 | desc.info = req->info; | ||
432 | desc.flags = 0; | ||
433 | return crypto_blkcipher_crt(desc.tfm)->decrypt( | ||
434 | &desc, req->dst, req->src, req->nbytes); | ||
435 | } | ||
436 | } | ||
437 | |||
438 | static void ablk_exit(struct crypto_tfm *tfm) | ||
439 | { | ||
440 | struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
441 | |||
442 | cryptd_free_ablkcipher(ctx->cryptd_tfm); | ||
443 | } | ||
444 | |||
445 | static int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name) | ||
446 | { | ||
447 | struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
448 | struct cryptd_ablkcipher *cryptd_tfm; | ||
449 | |||
450 | cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); | ||
451 | if (IS_ERR(cryptd_tfm)) | ||
452 | return PTR_ERR(cryptd_tfm); | ||
453 | |||
454 | ctx->cryptd_tfm = cryptd_tfm; | ||
455 | tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + | ||
456 | crypto_ablkcipher_reqsize(&cryptd_tfm->base); | ||
457 | |||
458 | return 0; | ||
459 | } | ||
460 | |||
461 | static int ablk_ecb_init(struct crypto_tfm *tfm) | 377 | static int ablk_ecb_init(struct crypto_tfm *tfm) |
462 | { | 378 | { |
463 | return ablk_init_common(tfm, "__driver-ecb-aes-aesni"); | 379 | return ablk_init_common(tfm, "__driver-ecb-aes-aesni"); |
@@ -613,7 +529,7 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | |||
613 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | 529 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); |
614 | struct aesni_rfc4106_gcm_ctx *child_ctx = | 530 | struct aesni_rfc4106_gcm_ctx *child_ctx = |
615 | aesni_rfc4106_gcm_ctx_get(cryptd_child); | 531 | aesni_rfc4106_gcm_ctx_get(cryptd_child); |
616 | u8 *new_key_mem = NULL; | 532 | u8 *new_key_align, *new_key_mem = NULL; |
617 | 533 | ||
618 | if (key_len < 4) { | 534 | if (key_len < 4) { |
619 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | 535 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); |
@@ -637,9 +553,9 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | |||
637 | if (!new_key_mem) | 553 | if (!new_key_mem) |
638 | return -ENOMEM; | 554 | return -ENOMEM; |
639 | 555 | ||
640 | new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN); | 556 | new_key_align = PTR_ALIGN(new_key_mem, AESNI_ALIGN); |
641 | memcpy(new_key_mem, key, key_len); | 557 | memcpy(new_key_align, key, key_len); |
642 | key = new_key_mem; | 558 | key = new_key_align; |
643 | } | 559 | } |
644 | 560 | ||
645 | if (!irq_fpu_usable()) | 561 | if (!irq_fpu_usable()) |
@@ -968,7 +884,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
968 | .cra_priority = 400, | 884 | .cra_priority = 400, |
969 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 885 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
970 | .cra_blocksize = AES_BLOCK_SIZE, | 886 | .cra_blocksize = AES_BLOCK_SIZE, |
971 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 887 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
972 | .cra_alignmask = 0, | 888 | .cra_alignmask = 0, |
973 | .cra_type = &crypto_ablkcipher_type, | 889 | .cra_type = &crypto_ablkcipher_type, |
974 | .cra_module = THIS_MODULE, | 890 | .cra_module = THIS_MODULE, |
@@ -989,7 +905,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
989 | .cra_priority = 400, | 905 | .cra_priority = 400, |
990 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 906 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
991 | .cra_blocksize = AES_BLOCK_SIZE, | 907 | .cra_blocksize = AES_BLOCK_SIZE, |
992 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 908 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
993 | .cra_alignmask = 0, | 909 | .cra_alignmask = 0, |
994 | .cra_type = &crypto_ablkcipher_type, | 910 | .cra_type = &crypto_ablkcipher_type, |
995 | .cra_module = THIS_MODULE, | 911 | .cra_module = THIS_MODULE, |
@@ -1033,7 +949,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1033 | .cra_priority = 400, | 949 | .cra_priority = 400, |
1034 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 950 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1035 | .cra_blocksize = 1, | 951 | .cra_blocksize = 1, |
1036 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 952 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1037 | .cra_alignmask = 0, | 953 | .cra_alignmask = 0, |
1038 | .cra_type = &crypto_ablkcipher_type, | 954 | .cra_type = &crypto_ablkcipher_type, |
1039 | .cra_module = THIS_MODULE, | 955 | .cra_module = THIS_MODULE, |
@@ -1098,7 +1014,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1098 | .cra_priority = 400, | 1014 | .cra_priority = 400, |
1099 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1015 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1100 | .cra_blocksize = 1, | 1016 | .cra_blocksize = 1, |
1101 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 1017 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1102 | .cra_alignmask = 0, | 1018 | .cra_alignmask = 0, |
1103 | .cra_type = &crypto_ablkcipher_type, | 1019 | .cra_type = &crypto_ablkcipher_type, |
1104 | .cra_module = THIS_MODULE, | 1020 | .cra_module = THIS_MODULE, |
@@ -1126,7 +1042,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1126 | .cra_priority = 400, | 1042 | .cra_priority = 400, |
1127 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1043 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1128 | .cra_blocksize = AES_BLOCK_SIZE, | 1044 | .cra_blocksize = AES_BLOCK_SIZE, |
1129 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 1045 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1130 | .cra_alignmask = 0, | 1046 | .cra_alignmask = 0, |
1131 | .cra_type = &crypto_ablkcipher_type, | 1047 | .cra_type = &crypto_ablkcipher_type, |
1132 | .cra_module = THIS_MODULE, | 1048 | .cra_module = THIS_MODULE, |
@@ -1150,7 +1066,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1150 | .cra_priority = 400, | 1066 | .cra_priority = 400, |
1151 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1067 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1152 | .cra_blocksize = AES_BLOCK_SIZE, | 1068 | .cra_blocksize = AES_BLOCK_SIZE, |
1153 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 1069 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1154 | .cra_alignmask = 0, | 1070 | .cra_alignmask = 0, |
1155 | .cra_type = &crypto_ablkcipher_type, | 1071 | .cra_type = &crypto_ablkcipher_type, |
1156 | .cra_module = THIS_MODULE, | 1072 | .cra_module = THIS_MODULE, |
@@ -1174,7 +1090,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1174 | .cra_priority = 400, | 1090 | .cra_priority = 400, |
1175 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1091 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1176 | .cra_blocksize = AES_BLOCK_SIZE, | 1092 | .cra_blocksize = AES_BLOCK_SIZE, |
1177 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 1093 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1178 | .cra_alignmask = 0, | 1094 | .cra_alignmask = 0, |
1179 | .cra_type = &crypto_ablkcipher_type, | 1095 | .cra_type = &crypto_ablkcipher_type, |
1180 | .cra_module = THIS_MODULE, | 1096 | .cra_module = THIS_MODULE, |
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 3306dc0b139..eeb2b3b743e 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c | |||
@@ -5,10 +5,6 @@ | |||
5 | * | 5 | * |
6 | * Camellia parts based on code by: | 6 | * Camellia parts based on code by: |
7 | * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) | 7 | * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) |
8 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
9 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
10 | * CTR part based on code (crypto/ctr.c) by: | ||
11 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
12 | * | 8 | * |
13 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
14 | * it under the terms of the GNU General Public License as published by | 10 | * it under the terms of the GNU General Public License as published by |
@@ -34,9 +30,9 @@ | |||
34 | #include <linux/module.h> | 30 | #include <linux/module.h> |
35 | #include <linux/types.h> | 31 | #include <linux/types.h> |
36 | #include <crypto/algapi.h> | 32 | #include <crypto/algapi.h> |
37 | #include <crypto/b128ops.h> | ||
38 | #include <crypto/lrw.h> | 33 | #include <crypto/lrw.h> |
39 | #include <crypto/xts.h> | 34 | #include <crypto/xts.h> |
35 | #include <asm/crypto/glue_helper.h> | ||
40 | 36 | ||
41 | #define CAMELLIA_MIN_KEY_SIZE 16 | 37 | #define CAMELLIA_MIN_KEY_SIZE 16 |
42 | #define CAMELLIA_MAX_KEY_SIZE 32 | 38 | #define CAMELLIA_MAX_KEY_SIZE 32 |
@@ -1312,307 +1308,128 @@ static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, | |||
1312 | &tfm->crt_flags); | 1308 | &tfm->crt_flags); |
1313 | } | 1309 | } |
1314 | 1310 | ||
1315 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | 1311 | static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) |
1316 | void (*fn)(struct camellia_ctx *, u8 *, const u8 *), | ||
1317 | void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *)) | ||
1318 | { | 1312 | { |
1319 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 1313 | u128 iv = *src; |
1320 | unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
1321 | unsigned int nbytes; | ||
1322 | int err; | ||
1323 | |||
1324 | err = blkcipher_walk_virt(desc, walk); | ||
1325 | |||
1326 | while ((nbytes = walk->nbytes)) { | ||
1327 | u8 *wsrc = walk->src.virt.addr; | ||
1328 | u8 *wdst = walk->dst.virt.addr; | ||
1329 | |||
1330 | /* Process two block batch */ | ||
1331 | if (nbytes >= bsize * 2) { | ||
1332 | do { | ||
1333 | fn_2way(ctx, wdst, wsrc); | ||
1334 | |||
1335 | wsrc += bsize * 2; | ||
1336 | wdst += bsize * 2; | ||
1337 | nbytes -= bsize * 2; | ||
1338 | } while (nbytes >= bsize * 2); | ||
1339 | |||
1340 | if (nbytes < bsize) | ||
1341 | goto done; | ||
1342 | } | ||
1343 | |||
1344 | /* Handle leftovers */ | ||
1345 | do { | ||
1346 | fn(ctx, wdst, wsrc); | ||
1347 | |||
1348 | wsrc += bsize; | ||
1349 | wdst += bsize; | ||
1350 | nbytes -= bsize; | ||
1351 | } while (nbytes >= bsize); | ||
1352 | |||
1353 | done: | ||
1354 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
1355 | } | ||
1356 | |||
1357 | return err; | ||
1358 | } | ||
1359 | |||
1360 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
1361 | struct scatterlist *src, unsigned int nbytes) | ||
1362 | { | ||
1363 | struct blkcipher_walk walk; | ||
1364 | |||
1365 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
1366 | return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way); | ||
1367 | } | ||
1368 | 1314 | ||
1369 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 1315 | camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); |
1370 | struct scatterlist *src, unsigned int nbytes) | ||
1371 | { | ||
1372 | struct blkcipher_walk walk; | ||
1373 | |||
1374 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
1375 | return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way); | ||
1376 | } | ||
1377 | 1316 | ||
1378 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | 1317 | u128_xor(&dst[1], &dst[1], &iv); |
1379 | struct blkcipher_walk *walk) | ||
1380 | { | ||
1381 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
1382 | unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
1383 | unsigned int nbytes = walk->nbytes; | ||
1384 | u128 *src = (u128 *)walk->src.virt.addr; | ||
1385 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
1386 | u128 *iv = (u128 *)walk->iv; | ||
1387 | |||
1388 | do { | ||
1389 | u128_xor(dst, src, iv); | ||
1390 | camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst); | ||
1391 | iv = dst; | ||
1392 | |||
1393 | src += 1; | ||
1394 | dst += 1; | ||
1395 | nbytes -= bsize; | ||
1396 | } while (nbytes >= bsize); | ||
1397 | |||
1398 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
1399 | return nbytes; | ||
1400 | } | 1318 | } |
1401 | 1319 | ||
1402 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 1320 | static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) |
1403 | struct scatterlist *src, unsigned int nbytes) | ||
1404 | { | 1321 | { |
1405 | struct blkcipher_walk walk; | 1322 | be128 ctrblk; |
1406 | int err; | ||
1407 | 1323 | ||
1408 | blkcipher_walk_init(&walk, dst, src, nbytes); | 1324 | if (dst != src) |
1409 | err = blkcipher_walk_virt(desc, &walk); | 1325 | *dst = *src; |
1410 | 1326 | ||
1411 | while ((nbytes = walk.nbytes)) { | 1327 | u128_to_be128(&ctrblk, iv); |
1412 | nbytes = __cbc_encrypt(desc, &walk); | 1328 | u128_inc(iv); |
1413 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
1414 | } | ||
1415 | 1329 | ||
1416 | return err; | 1330 | camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk); |
1417 | } | 1331 | } |
1418 | 1332 | ||
1419 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | 1333 | static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, |
1420 | struct blkcipher_walk *walk) | 1334 | u128 *iv) |
1421 | { | 1335 | { |
1422 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 1336 | be128 ctrblks[2]; |
1423 | unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
1424 | unsigned int nbytes = walk->nbytes; | ||
1425 | u128 *src = (u128 *)walk->src.virt.addr; | ||
1426 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
1427 | u128 ivs[2 - 1]; | ||
1428 | u128 last_iv; | ||
1429 | 1337 | ||
1430 | /* Start of the last block. */ | 1338 | if (dst != src) { |
1431 | src += nbytes / bsize - 1; | 1339 | dst[0] = src[0]; |
1432 | dst += nbytes / bsize - 1; | 1340 | dst[1] = src[1]; |
1433 | |||
1434 | last_iv = *src; | ||
1435 | |||
1436 | /* Process two block batch */ | ||
1437 | if (nbytes >= bsize * 2) { | ||
1438 | do { | ||
1439 | nbytes -= bsize * (2 - 1); | ||
1440 | src -= 2 - 1; | ||
1441 | dst -= 2 - 1; | ||
1442 | |||
1443 | ivs[0] = src[0]; | ||
1444 | |||
1445 | camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); | ||
1446 | |||
1447 | u128_xor(dst + 1, dst + 1, ivs + 0); | ||
1448 | |||
1449 | nbytes -= bsize; | ||
1450 | if (nbytes < bsize) | ||
1451 | goto done; | ||
1452 | |||
1453 | u128_xor(dst, dst, src - 1); | ||
1454 | src -= 1; | ||
1455 | dst -= 1; | ||
1456 | } while (nbytes >= bsize * 2); | ||
1457 | |||
1458 | if (nbytes < bsize) | ||
1459 | goto done; | ||
1460 | } | 1341 | } |
1461 | 1342 | ||
1462 | /* Handle leftovers */ | 1343 | u128_to_be128(&ctrblks[0], iv); |
1463 | for (;;) { | 1344 | u128_inc(iv); |
1464 | camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src); | 1345 | u128_to_be128(&ctrblks[1], iv); |
1465 | 1346 | u128_inc(iv); | |
1466 | nbytes -= bsize; | ||
1467 | if (nbytes < bsize) | ||
1468 | break; | ||
1469 | 1347 | ||
1470 | u128_xor(dst, dst, src - 1); | 1348 | camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks); |
1471 | src -= 1; | ||
1472 | dst -= 1; | ||
1473 | } | ||
1474 | |||
1475 | done: | ||
1476 | u128_xor(dst, dst, (u128 *)walk->iv); | ||
1477 | *(u128 *)walk->iv = last_iv; | ||
1478 | |||
1479 | return nbytes; | ||
1480 | } | 1349 | } |
1481 | 1350 | ||
1482 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 1351 | static const struct common_glue_ctx camellia_enc = { |
1483 | struct scatterlist *src, unsigned int nbytes) | 1352 | .num_funcs = 2, |
1484 | { | 1353 | .fpu_blocks_limit = -1, |
1485 | struct blkcipher_walk walk; | 1354 | |
1486 | int err; | 1355 | .funcs = { { |
1487 | 1356 | .num_blocks = 2, | |
1488 | blkcipher_walk_init(&walk, dst, src, nbytes); | 1357 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) } |
1489 | err = blkcipher_walk_virt(desc, &walk); | 1358 | }, { |
1359 | .num_blocks = 1, | ||
1360 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) } | ||
1361 | } } | ||
1362 | }; | ||
1490 | 1363 | ||
1491 | while ((nbytes = walk.nbytes)) { | 1364 | static const struct common_glue_ctx camellia_ctr = { |
1492 | nbytes = __cbc_decrypt(desc, &walk); | 1365 | .num_funcs = 2, |
1493 | err = blkcipher_walk_done(desc, &walk, nbytes); | 1366 | .fpu_blocks_limit = -1, |
1494 | } | 1367 | |
1368 | .funcs = { { | ||
1369 | .num_blocks = 2, | ||
1370 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) } | ||
1371 | }, { | ||
1372 | .num_blocks = 1, | ||
1373 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) } | ||
1374 | } } | ||
1375 | }; | ||
1495 | 1376 | ||
1496 | return err; | 1377 | static const struct common_glue_ctx camellia_dec = { |
1497 | } | 1378 | .num_funcs = 2, |
1379 | .fpu_blocks_limit = -1, | ||
1380 | |||
1381 | .funcs = { { | ||
1382 | .num_blocks = 2, | ||
1383 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) } | ||
1384 | }, { | ||
1385 | .num_blocks = 1, | ||
1386 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) } | ||
1387 | } } | ||
1388 | }; | ||
1498 | 1389 | ||
1499 | static inline void u128_to_be128(be128 *dst, const u128 *src) | 1390 | static const struct common_glue_ctx camellia_dec_cbc = { |
1500 | { | 1391 | .num_funcs = 2, |
1501 | dst->a = cpu_to_be64(src->a); | 1392 | .fpu_blocks_limit = -1, |
1502 | dst->b = cpu_to_be64(src->b); | 1393 | |
1503 | } | 1394 | .funcs = { { |
1395 | .num_blocks = 2, | ||
1396 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) } | ||
1397 | }, { | ||
1398 | .num_blocks = 1, | ||
1399 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) } | ||
1400 | } } | ||
1401 | }; | ||
1504 | 1402 | ||
1505 | static inline void be128_to_u128(u128 *dst, const be128 *src) | 1403 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1404 | struct scatterlist *src, unsigned int nbytes) | ||
1506 | { | 1405 | { |
1507 | dst->a = be64_to_cpu(src->a); | 1406 | return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); |
1508 | dst->b = be64_to_cpu(src->b); | ||
1509 | } | 1407 | } |
1510 | 1408 | ||
1511 | static inline void u128_inc(u128 *i) | 1409 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1410 | struct scatterlist *src, unsigned int nbytes) | ||
1512 | { | 1411 | { |
1513 | i->b++; | 1412 | return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); |
1514 | if (!i->b) | ||
1515 | i->a++; | ||
1516 | } | 1413 | } |
1517 | 1414 | ||
1518 | static void ctr_crypt_final(struct blkcipher_desc *desc, | 1415 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1519 | struct blkcipher_walk *walk) | 1416 | struct scatterlist *src, unsigned int nbytes) |
1520 | { | 1417 | { |
1521 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 1418 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, |
1522 | u8 keystream[CAMELLIA_BLOCK_SIZE]; | 1419 | dst, src, nbytes); |
1523 | u8 *src = walk->src.virt.addr; | ||
1524 | u8 *dst = walk->dst.virt.addr; | ||
1525 | unsigned int nbytes = walk->nbytes; | ||
1526 | u128 ctrblk; | ||
1527 | |||
1528 | memcpy(keystream, src, nbytes); | ||
1529 | camellia_enc_blk_xor(ctx, keystream, walk->iv); | ||
1530 | memcpy(dst, keystream, nbytes); | ||
1531 | |||
1532 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
1533 | u128_inc(&ctrblk); | ||
1534 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
1535 | } | 1420 | } |
1536 | 1421 | ||
1537 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | 1422 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1538 | struct blkcipher_walk *walk) | 1423 | struct scatterlist *src, unsigned int nbytes) |
1539 | { | 1424 | { |
1540 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 1425 | return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, |
1541 | unsigned int bsize = CAMELLIA_BLOCK_SIZE; | 1426 | nbytes); |
1542 | unsigned int nbytes = walk->nbytes; | ||
1543 | u128 *src = (u128 *)walk->src.virt.addr; | ||
1544 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
1545 | u128 ctrblk; | ||
1546 | be128 ctrblocks[2]; | ||
1547 | |||
1548 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
1549 | |||
1550 | /* Process two block batch */ | ||
1551 | if (nbytes >= bsize * 2) { | ||
1552 | do { | ||
1553 | if (dst != src) { | ||
1554 | dst[0] = src[0]; | ||
1555 | dst[1] = src[1]; | ||
1556 | } | ||
1557 | |||
1558 | /* create ctrblks for parallel encrypt */ | ||
1559 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
1560 | u128_inc(&ctrblk); | ||
1561 | u128_to_be128(&ctrblocks[1], &ctrblk); | ||
1562 | u128_inc(&ctrblk); | ||
1563 | |||
1564 | camellia_enc_blk_xor_2way(ctx, (u8 *)dst, | ||
1565 | (u8 *)ctrblocks); | ||
1566 | |||
1567 | src += 2; | ||
1568 | dst += 2; | ||
1569 | nbytes -= bsize * 2; | ||
1570 | } while (nbytes >= bsize * 2); | ||
1571 | |||
1572 | if (nbytes < bsize) | ||
1573 | goto done; | ||
1574 | } | ||
1575 | |||
1576 | /* Handle leftovers */ | ||
1577 | do { | ||
1578 | if (dst != src) | ||
1579 | *dst = *src; | ||
1580 | |||
1581 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
1582 | u128_inc(&ctrblk); | ||
1583 | |||
1584 | camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks); | ||
1585 | |||
1586 | src += 1; | ||
1587 | dst += 1; | ||
1588 | nbytes -= bsize; | ||
1589 | } while (nbytes >= bsize); | ||
1590 | |||
1591 | done: | ||
1592 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
1593 | return nbytes; | ||
1594 | } | 1427 | } |
1595 | 1428 | ||
1596 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 1429 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1597 | struct scatterlist *src, unsigned int nbytes) | 1430 | struct scatterlist *src, unsigned int nbytes) |
1598 | { | 1431 | { |
1599 | struct blkcipher_walk walk; | 1432 | return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); |
1600 | int err; | ||
1601 | |||
1602 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
1603 | err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE); | ||
1604 | |||
1605 | while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) { | ||
1606 | nbytes = __ctr_crypt(desc, &walk); | ||
1607 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
1608 | } | ||
1609 | |||
1610 | if (walk.nbytes) { | ||
1611 | ctr_crypt_final(desc, &walk); | ||
1612 | err = blkcipher_walk_done(desc, &walk, 0); | ||
1613 | } | ||
1614 | |||
1615 | return err; | ||
1616 | } | 1433 | } |
1617 | 1434 | ||
1618 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | 1435 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) |
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c new file mode 100644 index 00000000000..4854f0f31e4 --- /dev/null +++ b/arch/x86/crypto/glue_helper.c | |||
@@ -0,0 +1,307 @@ | |||
1 | /* | ||
2 | * Shared glue code for 128bit block ciphers | ||
3 | * | ||
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
8 | * CTR part based on code (crypto/ctr.c) by: | ||
9 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
24 | * USA | ||
25 | * | ||
26 | */ | ||
27 | |||
28 | #include <linux/module.h> | ||
29 | #include <crypto/b128ops.h> | ||
30 | #include <crypto/lrw.h> | ||
31 | #include <crypto/xts.h> | ||
32 | #include <asm/crypto/glue_helper.h> | ||
33 | #include <crypto/scatterwalk.h> | ||
34 | |||
35 | static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | ||
36 | struct blkcipher_desc *desc, | ||
37 | struct blkcipher_walk *walk) | ||
38 | { | ||
39 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
40 | const unsigned int bsize = 128 / 8; | ||
41 | unsigned int nbytes, i, func_bytes; | ||
42 | bool fpu_enabled = false; | ||
43 | int err; | ||
44 | |||
45 | err = blkcipher_walk_virt(desc, walk); | ||
46 | |||
47 | while ((nbytes = walk->nbytes)) { | ||
48 | u8 *wsrc = walk->src.virt.addr; | ||
49 | u8 *wdst = walk->dst.virt.addr; | ||
50 | |||
51 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
52 | desc, fpu_enabled, nbytes); | ||
53 | |||
54 | for (i = 0; i < gctx->num_funcs; i++) { | ||
55 | func_bytes = bsize * gctx->funcs[i].num_blocks; | ||
56 | |||
57 | /* Process multi-block batch */ | ||
58 | if (nbytes >= func_bytes) { | ||
59 | do { | ||
60 | gctx->funcs[i].fn_u.ecb(ctx, wdst, | ||
61 | wsrc); | ||
62 | |||
63 | wsrc += func_bytes; | ||
64 | wdst += func_bytes; | ||
65 | nbytes -= func_bytes; | ||
66 | } while (nbytes >= func_bytes); | ||
67 | |||
68 | if (nbytes < bsize) | ||
69 | goto done; | ||
70 | } | ||
71 | } | ||
72 | |||
73 | done: | ||
74 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
75 | } | ||
76 | |||
77 | glue_fpu_end(fpu_enabled); | ||
78 | return err; | ||
79 | } | ||
80 | |||
81 | int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | ||
82 | struct blkcipher_desc *desc, struct scatterlist *dst, | ||
83 | struct scatterlist *src, unsigned int nbytes) | ||
84 | { | ||
85 | struct blkcipher_walk walk; | ||
86 | |||
87 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
88 | return __glue_ecb_crypt_128bit(gctx, desc, &walk); | ||
89 | } | ||
90 | EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit); | ||
91 | |||
92 | static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn, | ||
93 | struct blkcipher_desc *desc, | ||
94 | struct blkcipher_walk *walk) | ||
95 | { | ||
96 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
97 | const unsigned int bsize = 128 / 8; | ||
98 | unsigned int nbytes = walk->nbytes; | ||
99 | u128 *src = (u128 *)walk->src.virt.addr; | ||
100 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
101 | u128 *iv = (u128 *)walk->iv; | ||
102 | |||
103 | do { | ||
104 | u128_xor(dst, src, iv); | ||
105 | fn(ctx, (u8 *)dst, (u8 *)dst); | ||
106 | iv = dst; | ||
107 | |||
108 | src += 1; | ||
109 | dst += 1; | ||
110 | nbytes -= bsize; | ||
111 | } while (nbytes >= bsize); | ||
112 | |||
113 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
114 | return nbytes; | ||
115 | } | ||
116 | |||
117 | int glue_cbc_encrypt_128bit(const common_glue_func_t fn, | ||
118 | struct blkcipher_desc *desc, | ||
119 | struct scatterlist *dst, | ||
120 | struct scatterlist *src, unsigned int nbytes) | ||
121 | { | ||
122 | struct blkcipher_walk walk; | ||
123 | int err; | ||
124 | |||
125 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
126 | err = blkcipher_walk_virt(desc, &walk); | ||
127 | |||
128 | while ((nbytes = walk.nbytes)) { | ||
129 | nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk); | ||
130 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
131 | } | ||
132 | |||
133 | return err; | ||
134 | } | ||
135 | EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit); | ||
136 | |||
137 | static unsigned int | ||
138 | __glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | ||
139 | struct blkcipher_desc *desc, | ||
140 | struct blkcipher_walk *walk) | ||
141 | { | ||
142 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
143 | const unsigned int bsize = 128 / 8; | ||
144 | unsigned int nbytes = walk->nbytes; | ||
145 | u128 *src = (u128 *)walk->src.virt.addr; | ||
146 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
147 | u128 last_iv; | ||
148 | unsigned int num_blocks, func_bytes; | ||
149 | unsigned int i; | ||
150 | |||
151 | /* Start of the last block. */ | ||
152 | src += nbytes / bsize - 1; | ||
153 | dst += nbytes / bsize - 1; | ||
154 | |||
155 | last_iv = *src; | ||
156 | |||
157 | for (i = 0; i < gctx->num_funcs; i++) { | ||
158 | num_blocks = gctx->funcs[i].num_blocks; | ||
159 | func_bytes = bsize * num_blocks; | ||
160 | |||
161 | /* Process multi-block batch */ | ||
162 | if (nbytes >= func_bytes) { | ||
163 | do { | ||
164 | nbytes -= func_bytes - bsize; | ||
165 | src -= num_blocks - 1; | ||
166 | dst -= num_blocks - 1; | ||
167 | |||
168 | gctx->funcs[i].fn_u.cbc(ctx, dst, src); | ||
169 | |||
170 | nbytes -= bsize; | ||
171 | if (nbytes < bsize) | ||
172 | goto done; | ||
173 | |||
174 | u128_xor(dst, dst, src - 1); | ||
175 | src -= 1; | ||
176 | dst -= 1; | ||
177 | } while (nbytes >= func_bytes); | ||
178 | |||
179 | if (nbytes < bsize) | ||
180 | goto done; | ||
181 | } | ||
182 | } | ||
183 | |||
184 | done: | ||
185 | u128_xor(dst, dst, (u128 *)walk->iv); | ||
186 | *(u128 *)walk->iv = last_iv; | ||
187 | |||
188 | return nbytes; | ||
189 | } | ||
190 | |||
191 | int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | ||
192 | struct blkcipher_desc *desc, | ||
193 | struct scatterlist *dst, | ||
194 | struct scatterlist *src, unsigned int nbytes) | ||
195 | { | ||
196 | const unsigned int bsize = 128 / 8; | ||
197 | bool fpu_enabled = false; | ||
198 | struct blkcipher_walk walk; | ||
199 | int err; | ||
200 | |||
201 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
202 | err = blkcipher_walk_virt(desc, &walk); | ||
203 | |||
204 | while ((nbytes = walk.nbytes)) { | ||
205 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
206 | desc, fpu_enabled, nbytes); | ||
207 | nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); | ||
208 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
209 | } | ||
210 | |||
211 | glue_fpu_end(fpu_enabled); | ||
212 | return err; | ||
213 | } | ||
214 | EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); | ||
215 | |||
216 | static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr, | ||
217 | struct blkcipher_desc *desc, | ||
218 | struct blkcipher_walk *walk) | ||
219 | { | ||
220 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
221 | u8 *src = (u8 *)walk->src.virt.addr; | ||
222 | u8 *dst = (u8 *)walk->dst.virt.addr; | ||
223 | unsigned int nbytes = walk->nbytes; | ||
224 | u128 ctrblk; | ||
225 | u128 tmp; | ||
226 | |||
227 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
228 | |||
229 | memcpy(&tmp, src, nbytes); | ||
230 | fn_ctr(ctx, &tmp, &tmp, &ctrblk); | ||
231 | memcpy(dst, &tmp, nbytes); | ||
232 | |||
233 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
234 | } | ||
235 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit); | ||
236 | |||
237 | static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | ||
238 | struct blkcipher_desc *desc, | ||
239 | struct blkcipher_walk *walk) | ||
240 | { | ||
241 | const unsigned int bsize = 128 / 8; | ||
242 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
243 | unsigned int nbytes = walk->nbytes; | ||
244 | u128 *src = (u128 *)walk->src.virt.addr; | ||
245 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
246 | u128 ctrblk; | ||
247 | unsigned int num_blocks, func_bytes; | ||
248 | unsigned int i; | ||
249 | |||
250 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
251 | |||
252 | /* Process multi-block batch */ | ||
253 | for (i = 0; i < gctx->num_funcs; i++) { | ||
254 | num_blocks = gctx->funcs[i].num_blocks; | ||
255 | func_bytes = bsize * num_blocks; | ||
256 | |||
257 | if (nbytes >= func_bytes) { | ||
258 | do { | ||
259 | gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk); | ||
260 | |||
261 | src += num_blocks; | ||
262 | dst += num_blocks; | ||
263 | nbytes -= func_bytes; | ||
264 | } while (nbytes >= func_bytes); | ||
265 | |||
266 | if (nbytes < bsize) | ||
267 | goto done; | ||
268 | } | ||
269 | } | ||
270 | |||
271 | done: | ||
272 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
273 | return nbytes; | ||
274 | } | ||
275 | |||
276 | int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | ||
277 | struct blkcipher_desc *desc, struct scatterlist *dst, | ||
278 | struct scatterlist *src, unsigned int nbytes) | ||
279 | { | ||
280 | const unsigned int bsize = 128 / 8; | ||
281 | bool fpu_enabled = false; | ||
282 | struct blkcipher_walk walk; | ||
283 | int err; | ||
284 | |||
285 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
286 | err = blkcipher_walk_virt_block(desc, &walk, bsize); | ||
287 | |||
288 | while ((nbytes = walk.nbytes) >= bsize) { | ||
289 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
290 | desc, fpu_enabled, nbytes); | ||
291 | nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); | ||
292 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
293 | } | ||
294 | |||
295 | glue_fpu_end(fpu_enabled); | ||
296 | |||
297 | if (walk.nbytes) { | ||
298 | glue_ctr_crypt_final_128bit( | ||
299 | gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); | ||
300 | err = blkcipher_walk_done(desc, &walk, 0); | ||
301 | } | ||
302 | |||
303 | return err; | ||
304 | } | ||
305 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); | ||
306 | |||
307 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S new file mode 100644 index 00000000000..504106bf04a --- /dev/null +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S | |||
@@ -0,0 +1,704 @@ | |||
1 | /* | ||
2 | * Serpent Cipher 8-way parallel algorithm (x86_64/AVX) | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * Based on arch/x86/crypto/serpent-sse2-x86_64-asm_64.S by | ||
8 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | .file "serpent-avx-x86_64-asm_64.S" | ||
28 | .text | ||
29 | |||
30 | #define CTX %rdi | ||
31 | |||
32 | /********************************************************************** | ||
33 | 8-way AVX serpent | ||
34 | **********************************************************************/ | ||
35 | #define RA1 %xmm0 | ||
36 | #define RB1 %xmm1 | ||
37 | #define RC1 %xmm2 | ||
38 | #define RD1 %xmm3 | ||
39 | #define RE1 %xmm4 | ||
40 | |||
41 | #define tp %xmm5 | ||
42 | |||
43 | #define RA2 %xmm6 | ||
44 | #define RB2 %xmm7 | ||
45 | #define RC2 %xmm8 | ||
46 | #define RD2 %xmm9 | ||
47 | #define RE2 %xmm10 | ||
48 | |||
49 | #define RNOT %xmm11 | ||
50 | |||
51 | #define RK0 %xmm12 | ||
52 | #define RK1 %xmm13 | ||
53 | #define RK2 %xmm14 | ||
54 | #define RK3 %xmm15 | ||
55 | |||
56 | |||
57 | #define S0_1(x0, x1, x2, x3, x4) \ | ||
58 | vpor x0, x3, tp; \ | ||
59 | vpxor x3, x0, x0; \ | ||
60 | vpxor x2, x3, x4; \ | ||
61 | vpxor RNOT, x4, x4; \ | ||
62 | vpxor x1, tp, x3; \ | ||
63 | vpand x0, x1, x1; \ | ||
64 | vpxor x4, x1, x1; \ | ||
65 | vpxor x0, x2, x2; | ||
66 | #define S0_2(x0, x1, x2, x3, x4) \ | ||
67 | vpxor x3, x0, x0; \ | ||
68 | vpor x0, x4, x4; \ | ||
69 | vpxor x2, x0, x0; \ | ||
70 | vpand x1, x2, x2; \ | ||
71 | vpxor x2, x3, x3; \ | ||
72 | vpxor RNOT, x1, x1; \ | ||
73 | vpxor x4, x2, x2; \ | ||
74 | vpxor x2, x1, x1; | ||
75 | |||
76 | #define S1_1(x0, x1, x2, x3, x4) \ | ||
77 | vpxor x0, x1, tp; \ | ||
78 | vpxor x3, x0, x0; \ | ||
79 | vpxor RNOT, x3, x3; \ | ||
80 | vpand tp, x1, x4; \ | ||
81 | vpor tp, x0, x0; \ | ||
82 | vpxor x2, x3, x3; \ | ||
83 | vpxor x3, x0, x0; \ | ||
84 | vpxor x3, tp, x1; | ||
85 | #define S1_2(x0, x1, x2, x3, x4) \ | ||
86 | vpxor x4, x3, x3; \ | ||
87 | vpor x4, x1, x1; \ | ||
88 | vpxor x2, x4, x4; \ | ||
89 | vpand x0, x2, x2; \ | ||
90 | vpxor x1, x2, x2; \ | ||
91 | vpor x0, x1, x1; \ | ||
92 | vpxor RNOT, x0, x0; \ | ||
93 | vpxor x2, x0, x0; \ | ||
94 | vpxor x1, x4, x4; | ||
95 | |||
96 | #define S2_1(x0, x1, x2, x3, x4) \ | ||
97 | vpxor RNOT, x3, x3; \ | ||
98 | vpxor x0, x1, x1; \ | ||
99 | vpand x2, x0, tp; \ | ||
100 | vpxor x3, tp, tp; \ | ||
101 | vpor x0, x3, x3; \ | ||
102 | vpxor x1, x2, x2; \ | ||
103 | vpxor x1, x3, x3; \ | ||
104 | vpand tp, x1, x1; | ||
105 | #define S2_2(x0, x1, x2, x3, x4) \ | ||
106 | vpxor x2, tp, tp; \ | ||
107 | vpand x3, x2, x2; \ | ||
108 | vpor x1, x3, x3; \ | ||
109 | vpxor RNOT, tp, tp; \ | ||
110 | vpxor tp, x3, x3; \ | ||
111 | vpxor tp, x0, x4; \ | ||
112 | vpxor x2, tp, x0; \ | ||
113 | vpor x2, x1, x1; | ||
114 | |||
115 | #define S3_1(x0, x1, x2, x3, x4) \ | ||
116 | vpxor x3, x1, tp; \ | ||
117 | vpor x0, x3, x3; \ | ||
118 | vpand x0, x1, x4; \ | ||
119 | vpxor x2, x0, x0; \ | ||
120 | vpxor tp, x2, x2; \ | ||
121 | vpand x3, tp, x1; \ | ||
122 | vpxor x3, x2, x2; \ | ||
123 | vpor x4, x0, x0; \ | ||
124 | vpxor x3, x4, x4; | ||
125 | #define S3_2(x0, x1, x2, x3, x4) \ | ||
126 | vpxor x0, x1, x1; \ | ||
127 | vpand x3, x0, x0; \ | ||
128 | vpand x4, x3, x3; \ | ||
129 | vpxor x2, x3, x3; \ | ||
130 | vpor x1, x4, x4; \ | ||
131 | vpand x1, x2, x2; \ | ||
132 | vpxor x3, x4, x4; \ | ||
133 | vpxor x3, x0, x0; \ | ||
134 | vpxor x2, x3, x3; | ||
135 | |||
136 | #define S4_1(x0, x1, x2, x3, x4) \ | ||
137 | vpand x0, x3, tp; \ | ||
138 | vpxor x3, x0, x0; \ | ||
139 | vpxor x2, tp, tp; \ | ||
140 | vpor x3, x2, x2; \ | ||
141 | vpxor x1, x0, x0; \ | ||
142 | vpxor tp, x3, x4; \ | ||
143 | vpor x0, x2, x2; \ | ||
144 | vpxor x1, x2, x2; | ||
145 | #define S4_2(x0, x1, x2, x3, x4) \ | ||
146 | vpand x0, x1, x1; \ | ||
147 | vpxor x4, x1, x1; \ | ||
148 | vpand x2, x4, x4; \ | ||
149 | vpxor tp, x2, x2; \ | ||
150 | vpxor x0, x4, x4; \ | ||
151 | vpor x1, tp, x3; \ | ||
152 | vpxor RNOT, x1, x1; \ | ||
153 | vpxor x0, x3, x3; | ||
154 | |||
155 | #define S5_1(x0, x1, x2, x3, x4) \ | ||
156 | vpor x0, x1, tp; \ | ||
157 | vpxor tp, x2, x2; \ | ||
158 | vpxor RNOT, x3, x3; \ | ||
159 | vpxor x0, x1, x4; \ | ||
160 | vpxor x2, x0, x0; \ | ||
161 | vpand x4, tp, x1; \ | ||
162 | vpor x3, x4, x4; \ | ||
163 | vpxor x0, x4, x4; | ||
164 | #define S5_2(x0, x1, x2, x3, x4) \ | ||
165 | vpand x3, x0, x0; \ | ||
166 | vpxor x3, x1, x1; \ | ||
167 | vpxor x2, x3, x3; \ | ||
168 | vpxor x1, x0, x0; \ | ||
169 | vpand x4, x2, x2; \ | ||
170 | vpxor x2, x1, x1; \ | ||
171 | vpand x0, x2, x2; \ | ||
172 | vpxor x2, x3, x3; | ||
173 | |||
174 | #define S6_1(x0, x1, x2, x3, x4) \ | ||
175 | vpxor x0, x3, x3; \ | ||
176 | vpxor x2, x1, tp; \ | ||
177 | vpxor x0, x2, x2; \ | ||
178 | vpand x3, x0, x0; \ | ||
179 | vpor x3, tp, tp; \ | ||
180 | vpxor RNOT, x1, x4; \ | ||
181 | vpxor tp, x0, x0; \ | ||
182 | vpxor x2, tp, x1; | ||
183 | #define S6_2(x0, x1, x2, x3, x4) \ | ||
184 | vpxor x4, x3, x3; \ | ||
185 | vpxor x0, x4, x4; \ | ||
186 | vpand x0, x2, x2; \ | ||
187 | vpxor x1, x4, x4; \ | ||
188 | vpxor x3, x2, x2; \ | ||
189 | vpand x1, x3, x3; \ | ||
190 | vpxor x0, x3, x3; \ | ||
191 | vpxor x2, x1, x1; | ||
192 | |||
193 | #define S7_1(x0, x1, x2, x3, x4) \ | ||
194 | vpxor RNOT, x1, tp; \ | ||
195 | vpxor RNOT, x0, x0; \ | ||
196 | vpand x2, tp, x1; \ | ||
197 | vpxor x3, x1, x1; \ | ||
198 | vpor tp, x3, x3; \ | ||
199 | vpxor x2, tp, x4; \ | ||
200 | vpxor x3, x2, x2; \ | ||
201 | vpxor x0, x3, x3; \ | ||
202 | vpor x1, x0, x0; | ||
203 | #define S7_2(x0, x1, x2, x3, x4) \ | ||
204 | vpand x0, x2, x2; \ | ||
205 | vpxor x4, x0, x0; \ | ||
206 | vpxor x3, x4, x4; \ | ||
207 | vpand x0, x3, x3; \ | ||
208 | vpxor x1, x4, x4; \ | ||
209 | vpxor x4, x2, x2; \ | ||
210 | vpxor x1, x3, x3; \ | ||
211 | vpor x0, x4, x4; \ | ||
212 | vpxor x1, x4, x4; | ||
213 | |||
214 | #define SI0_1(x0, x1, x2, x3, x4) \ | ||
215 | vpxor x0, x1, x1; \ | ||
216 | vpor x1, x3, tp; \ | ||
217 | vpxor x1, x3, x4; \ | ||
218 | vpxor RNOT, x0, x0; \ | ||
219 | vpxor tp, x2, x2; \ | ||
220 | vpxor x0, tp, x3; \ | ||
221 | vpand x1, x0, x0; \ | ||
222 | vpxor x2, x0, x0; | ||
223 | #define SI0_2(x0, x1, x2, x3, x4) \ | ||
224 | vpand x3, x2, x2; \ | ||
225 | vpxor x4, x3, x3; \ | ||
226 | vpxor x3, x2, x2; \ | ||
227 | vpxor x3, x1, x1; \ | ||
228 | vpand x0, x3, x3; \ | ||
229 | vpxor x0, x1, x1; \ | ||
230 | vpxor x2, x0, x0; \ | ||
231 | vpxor x3, x4, x4; | ||
232 | |||
233 | #define SI1_1(x0, x1, x2, x3, x4) \ | ||
234 | vpxor x3, x1, x1; \ | ||
235 | vpxor x2, x0, tp; \ | ||
236 | vpxor RNOT, x2, x2; \ | ||
237 | vpor x1, x0, x4; \ | ||
238 | vpxor x3, x4, x4; \ | ||
239 | vpand x1, x3, x3; \ | ||
240 | vpxor x2, x1, x1; \ | ||
241 | vpand x4, x2, x2; | ||
242 | #define SI1_2(x0, x1, x2, x3, x4) \ | ||
243 | vpxor x1, x4, x4; \ | ||
244 | vpor x3, x1, x1; \ | ||
245 | vpxor tp, x3, x3; \ | ||
246 | vpxor tp, x2, x2; \ | ||
247 | vpor x4, tp, x0; \ | ||
248 | vpxor x4, x2, x2; \ | ||
249 | vpxor x0, x1, x1; \ | ||
250 | vpxor x1, x4, x4; | ||
251 | |||
252 | #define SI2_1(x0, x1, x2, x3, x4) \ | ||
253 | vpxor x1, x2, x2; \ | ||
254 | vpxor RNOT, x3, tp; \ | ||
255 | vpor x2, tp, tp; \ | ||
256 | vpxor x3, x2, x2; \ | ||
257 | vpxor x0, x3, x4; \ | ||
258 | vpxor x1, tp, x3; \ | ||
259 | vpor x2, x1, x1; \ | ||
260 | vpxor x0, x2, x2; | ||
261 | #define SI2_2(x0, x1, x2, x3, x4) \ | ||
262 | vpxor x4, x1, x1; \ | ||
263 | vpor x3, x4, x4; \ | ||
264 | vpxor x3, x2, x2; \ | ||
265 | vpxor x2, x4, x4; \ | ||
266 | vpand x1, x2, x2; \ | ||
267 | vpxor x3, x2, x2; \ | ||
268 | vpxor x4, x3, x3; \ | ||
269 | vpxor x0, x4, x4; | ||
270 | |||
271 | #define SI3_1(x0, x1, x2, x3, x4) \ | ||
272 | vpxor x1, x2, x2; \ | ||
273 | vpand x2, x1, tp; \ | ||
274 | vpxor x0, tp, tp; \ | ||
275 | vpor x1, x0, x0; \ | ||
276 | vpxor x3, x1, x4; \ | ||
277 | vpxor x3, x0, x0; \ | ||
278 | vpor tp, x3, x3; \ | ||
279 | vpxor x2, tp, x1; | ||
280 | #define SI3_2(x0, x1, x2, x3, x4) \ | ||
281 | vpxor x3, x1, x1; \ | ||
282 | vpxor x2, x0, x0; \ | ||
283 | vpxor x3, x2, x2; \ | ||
284 | vpand x1, x3, x3; \ | ||
285 | vpxor x0, x1, x1; \ | ||
286 | vpand x2, x0, x0; \ | ||
287 | vpxor x3, x4, x4; \ | ||
288 | vpxor x0, x3, x3; \ | ||
289 | vpxor x1, x0, x0; | ||
290 | |||
291 | #define SI4_1(x0, x1, x2, x3, x4) \ | ||
292 | vpxor x3, x2, x2; \ | ||
293 | vpand x1, x0, tp; \ | ||
294 | vpxor x2, tp, tp; \ | ||
295 | vpor x3, x2, x2; \ | ||
296 | vpxor RNOT, x0, x4; \ | ||
297 | vpxor tp, x1, x1; \ | ||
298 | vpxor x2, tp, x0; \ | ||
299 | vpand x4, x2, x2; | ||
300 | #define SI4_2(x0, x1, x2, x3, x4) \ | ||
301 | vpxor x0, x2, x2; \ | ||
302 | vpor x4, x0, x0; \ | ||
303 | vpxor x3, x0, x0; \ | ||
304 | vpand x2, x3, x3; \ | ||
305 | vpxor x3, x4, x4; \ | ||
306 | vpxor x1, x3, x3; \ | ||
307 | vpand x0, x1, x1; \ | ||
308 | vpxor x1, x4, x4; \ | ||
309 | vpxor x3, x0, x0; | ||
310 | |||
311 | #define SI5_1(x0, x1, x2, x3, x4) \ | ||
312 | vpor x2, x1, tp; \ | ||
313 | vpxor x1, x2, x2; \ | ||
314 | vpxor x3, tp, tp; \ | ||
315 | vpand x1, x3, x3; \ | ||
316 | vpxor x3, x2, x2; \ | ||
317 | vpor x0, x3, x3; \ | ||
318 | vpxor RNOT, x0, x0; \ | ||
319 | vpxor x2, x3, x3; \ | ||
320 | vpor x0, x2, x2; | ||
321 | #define SI5_2(x0, x1, x2, x3, x4) \ | ||
322 | vpxor tp, x1, x4; \ | ||
323 | vpxor x4, x2, x2; \ | ||
324 | vpand x0, x4, x4; \ | ||
325 | vpxor tp, x0, x0; \ | ||
326 | vpxor x3, tp, x1; \ | ||
327 | vpand x2, x0, x0; \ | ||
328 | vpxor x3, x2, x2; \ | ||
329 | vpxor x2, x0, x0; \ | ||
330 | vpxor x4, x2, x2; \ | ||
331 | vpxor x3, x4, x4; | ||
332 | |||
333 | #define SI6_1(x0, x1, x2, x3, x4) \ | ||
334 | vpxor x2, x0, x0; \ | ||
335 | vpand x3, x0, tp; \ | ||
336 | vpxor x3, x2, x2; \ | ||
337 | vpxor x2, tp, tp; \ | ||
338 | vpxor x1, x3, x3; \ | ||
339 | vpor x0, x2, x2; \ | ||
340 | vpxor x3, x2, x2; \ | ||
341 | vpand tp, x3, x3; | ||
342 | #define SI6_2(x0, x1, x2, x3, x4) \ | ||
343 | vpxor RNOT, tp, tp; \ | ||
344 | vpxor x1, x3, x3; \ | ||
345 | vpand x2, x1, x1; \ | ||
346 | vpxor tp, x0, x4; \ | ||
347 | vpxor x4, x3, x3; \ | ||
348 | vpxor x2, x4, x4; \ | ||
349 | vpxor x1, tp, x0; \ | ||
350 | vpxor x0, x2, x2; | ||
351 | |||
352 | #define SI7_1(x0, x1, x2, x3, x4) \ | ||
353 | vpand x0, x3, tp; \ | ||
354 | vpxor x2, x0, x0; \ | ||
355 | vpor x3, x2, x2; \ | ||
356 | vpxor x1, x3, x4; \ | ||
357 | vpxor RNOT, x0, x0; \ | ||
358 | vpor tp, x1, x1; \ | ||
359 | vpxor x0, x4, x4; \ | ||
360 | vpand x2, x0, x0; \ | ||
361 | vpxor x1, x0, x0; | ||
362 | #define SI7_2(x0, x1, x2, x3, x4) \ | ||
363 | vpand x2, x1, x1; \ | ||
364 | vpxor x2, tp, x3; \ | ||
365 | vpxor x3, x4, x4; \ | ||
366 | vpand x3, x2, x2; \ | ||
367 | vpor x0, x3, x3; \ | ||
368 | vpxor x4, x1, x1; \ | ||
369 | vpxor x4, x3, x3; \ | ||
370 | vpand x0, x4, x4; \ | ||
371 | vpxor x2, x4, x4; | ||
372 | |||
373 | #define get_key(i, j, t) \ | ||
374 | vbroadcastss (4*(i)+(j))*4(CTX), t; | ||
375 | |||
376 | #define K2(x0, x1, x2, x3, x4, i) \ | ||
377 | get_key(i, 0, RK0); \ | ||
378 | get_key(i, 1, RK1); \ | ||
379 | get_key(i, 2, RK2); \ | ||
380 | get_key(i, 3, RK3); \ | ||
381 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
382 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
383 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
384 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
385 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
386 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
387 | vpxor RK2, x2 ## 2, x2 ## 2; \ | ||
388 | vpxor RK3, x3 ## 2, x3 ## 2; | ||
389 | |||
390 | #define LK2(x0, x1, x2, x3, x4, i) \ | ||
391 | vpslld $13, x0 ## 1, x4 ## 1; \ | ||
392 | vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \ | ||
393 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
394 | vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ | ||
395 | vpslld $3, x2 ## 1, x4 ## 1; \ | ||
396 | vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \ | ||
397 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
398 | vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ | ||
399 | vpslld $13, x0 ## 2, x4 ## 2; \ | ||
400 | vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \ | ||
401 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
402 | vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ | ||
403 | vpslld $3, x2 ## 2, x4 ## 2; \ | ||
404 | vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \ | ||
405 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
406 | vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ | ||
407 | vpslld $1, x1 ## 1, x4 ## 1; \ | ||
408 | vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \ | ||
409 | vpor x4 ## 1, x1 ## 1, x1 ## 1; \ | ||
410 | vpslld $3, x0 ## 1, x4 ## 1; \ | ||
411 | vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ | ||
412 | vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
413 | get_key(i, 1, RK1); \ | ||
414 | vpslld $1, x1 ## 2, x4 ## 2; \ | ||
415 | vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \ | ||
416 | vpor x4 ## 2, x1 ## 2, x1 ## 2; \ | ||
417 | vpslld $3, x0 ## 2, x4 ## 2; \ | ||
418 | vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ | ||
419 | vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
420 | get_key(i, 3, RK3); \ | ||
421 | vpslld $7, x3 ## 1, x4 ## 1; \ | ||
422 | vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \ | ||
423 | vpor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
424 | vpslld $7, x1 ## 1, x4 ## 1; \ | ||
425 | vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ | ||
426 | vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ | ||
427 | vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ | ||
428 | vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
429 | get_key(i, 0, RK0); \ | ||
430 | vpslld $7, x3 ## 2, x4 ## 2; \ | ||
431 | vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \ | ||
432 | vpor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
433 | vpslld $7, x1 ## 2, x4 ## 2; \ | ||
434 | vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ | ||
435 | vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ | ||
436 | vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ | ||
437 | vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
438 | get_key(i, 2, RK2); \ | ||
439 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
440 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
441 | vpslld $5, x0 ## 1, x4 ## 1; \ | ||
442 | vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \ | ||
443 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
444 | vpslld $22, x2 ## 1, x4 ## 1; \ | ||
445 | vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \ | ||
446 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
447 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
448 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
449 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
450 | vpxor RK3, x3 ## 2, x3 ## 2; \ | ||
451 | vpslld $5, x0 ## 2, x4 ## 2; \ | ||
452 | vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \ | ||
453 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
454 | vpslld $22, x2 ## 2, x4 ## 2; \ | ||
455 | vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \ | ||
456 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
457 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
458 | vpxor RK2, x2 ## 2, x2 ## 2; | ||
459 | |||
460 | #define KL2(x0, x1, x2, x3, x4, i) \ | ||
461 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
462 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
463 | vpsrld $5, x0 ## 1, x4 ## 1; \ | ||
464 | vpslld $(32 - 5), x0 ## 1, x0 ## 1; \ | ||
465 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
466 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
467 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
468 | vpsrld $22, x2 ## 1, x4 ## 1; \ | ||
469 | vpslld $(32 - 22), x2 ## 1, x2 ## 1; \ | ||
470 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
471 | vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ | ||
472 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
473 | vpxor RK2, x2 ## 2, x2 ## 2; \ | ||
474 | vpsrld $5, x0 ## 2, x4 ## 2; \ | ||
475 | vpslld $(32 - 5), x0 ## 2, x0 ## 2; \ | ||
476 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
477 | vpxor RK3, x3 ## 2, x3 ## 2; \ | ||
478 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
479 | vpsrld $22, x2 ## 2, x4 ## 2; \ | ||
480 | vpslld $(32 - 22), x2 ## 2, x2 ## 2; \ | ||
481 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
482 | vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ | ||
483 | vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ | ||
484 | vpslld $7, x1 ## 1, x4 ## 1; \ | ||
485 | vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ | ||
486 | vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
487 | vpsrld $1, x1 ## 1, x4 ## 1; \ | ||
488 | vpslld $(32 - 1), x1 ## 1, x1 ## 1; \ | ||
489 | vpor x4 ## 1, x1 ## 1, x1 ## 1; \ | ||
490 | vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ | ||
491 | vpslld $7, x1 ## 2, x4 ## 2; \ | ||
492 | vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ | ||
493 | vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
494 | vpsrld $1, x1 ## 2, x4 ## 2; \ | ||
495 | vpslld $(32 - 1), x1 ## 2, x1 ## 2; \ | ||
496 | vpor x4 ## 2, x1 ## 2, x1 ## 2; \ | ||
497 | vpsrld $7, x3 ## 1, x4 ## 1; \ | ||
498 | vpslld $(32 - 7), x3 ## 1, x3 ## 1; \ | ||
499 | vpor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
500 | vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ | ||
501 | vpslld $3, x0 ## 1, x4 ## 1; \ | ||
502 | vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
503 | vpsrld $7, x3 ## 2, x4 ## 2; \ | ||
504 | vpslld $(32 - 7), x3 ## 2, x3 ## 2; \ | ||
505 | vpor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
506 | vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ | ||
507 | vpslld $3, x0 ## 2, x4 ## 2; \ | ||
508 | vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
509 | vpsrld $13, x0 ## 1, x4 ## 1; \ | ||
510 | vpslld $(32 - 13), x0 ## 1, x0 ## 1; \ | ||
511 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
512 | vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ | ||
513 | vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ | ||
514 | vpsrld $3, x2 ## 1, x4 ## 1; \ | ||
515 | vpslld $(32 - 3), x2 ## 1, x2 ## 1; \ | ||
516 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
517 | vpsrld $13, x0 ## 2, x4 ## 2; \ | ||
518 | vpslld $(32 - 13), x0 ## 2, x0 ## 2; \ | ||
519 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
520 | vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ | ||
521 | vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ | ||
522 | vpsrld $3, x2 ## 2, x4 ## 2; \ | ||
523 | vpslld $(32 - 3), x2 ## 2, x2 ## 2; \ | ||
524 | vpor x4 ## 2, x2 ## 2, x2 ## 2; | ||
525 | |||
526 | #define S(SBOX, x0, x1, x2, x3, x4) \ | ||
527 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
528 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
529 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
530 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); | ||
531 | |||
532 | #define SP(SBOX, x0, x1, x2, x3, x4, i) \ | ||
533 | get_key(i, 0, RK0); \ | ||
534 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
535 | get_key(i, 2, RK2); \ | ||
536 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
537 | get_key(i, 3, RK3); \ | ||
538 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
539 | get_key(i, 1, RK1); \ | ||
540 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
541 | |||
542 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
543 | vpunpckldq x1, x0, t0; \ | ||
544 | vpunpckhdq x1, x0, t2; \ | ||
545 | vpunpckldq x3, x2, t1; \ | ||
546 | vpunpckhdq x3, x2, x3; \ | ||
547 | \ | ||
548 | vpunpcklqdq t1, t0, x0; \ | ||
549 | vpunpckhqdq t1, t0, x1; \ | ||
550 | vpunpcklqdq x3, t2, x2; \ | ||
551 | vpunpckhqdq x3, t2, x3; | ||
552 | |||
553 | #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ | ||
554 | vmovdqu (0*4*4)(in), x0; \ | ||
555 | vmovdqu (1*4*4)(in), x1; \ | ||
556 | vmovdqu (2*4*4)(in), x2; \ | ||
557 | vmovdqu (3*4*4)(in), x3; \ | ||
558 | \ | ||
559 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
560 | |||
561 | #define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
562 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
563 | \ | ||
564 | vmovdqu x0, (0*4*4)(out); \ | ||
565 | vmovdqu x1, (1*4*4)(out); \ | ||
566 | vmovdqu x2, (2*4*4)(out); \ | ||
567 | vmovdqu x3, (3*4*4)(out); | ||
568 | |||
569 | #define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
570 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
571 | \ | ||
572 | vpxor (0*4*4)(out), x0, x0; \ | ||
573 | vmovdqu x0, (0*4*4)(out); \ | ||
574 | vpxor (1*4*4)(out), x1, x1; \ | ||
575 | vmovdqu x1, (1*4*4)(out); \ | ||
576 | vpxor (2*4*4)(out), x2, x2; \ | ||
577 | vmovdqu x2, (2*4*4)(out); \ | ||
578 | vpxor (3*4*4)(out), x3, x3; \ | ||
579 | vmovdqu x3, (3*4*4)(out); | ||
580 | |||
581 | .align 8 | ||
582 | .global __serpent_enc_blk_8way_avx | ||
583 | .type __serpent_enc_blk_8way_avx,@function; | ||
584 | |||
585 | __serpent_enc_blk_8way_avx: | ||
586 | /* input: | ||
587 | * %rdi: ctx, CTX | ||
588 | * %rsi: dst | ||
589 | * %rdx: src | ||
590 | * %rcx: bool, if true: xor output | ||
591 | */ | ||
592 | |||
593 | vpcmpeqd RNOT, RNOT, RNOT; | ||
594 | |||
595 | leaq (4*4*4)(%rdx), %rax; | ||
596 | read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
597 | read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
598 | |||
599 | K2(RA, RB, RC, RD, RE, 0); | ||
600 | S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); | ||
601 | S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); | ||
602 | S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); | ||
603 | S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); | ||
604 | S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); | ||
605 | S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); | ||
606 | S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); | ||
607 | S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); | ||
608 | S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); | ||
609 | S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); | ||
610 | S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); | ||
611 | S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); | ||
612 | S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); | ||
613 | S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); | ||
614 | S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); | ||
615 | S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); | ||
616 | S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); | ||
617 | S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); | ||
618 | S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); | ||
619 | S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); | ||
620 | S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); | ||
621 | S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); | ||
622 | S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); | ||
623 | S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); | ||
624 | S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); | ||
625 | S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); | ||
626 | S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); | ||
627 | S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); | ||
628 | S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); | ||
629 | S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); | ||
630 | S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); | ||
631 | S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); | ||
632 | |||
633 | leaq (4*4*4)(%rsi), %rax; | ||
634 | |||
635 | testb %cl, %cl; | ||
636 | jnz __enc_xor8; | ||
637 | |||
638 | write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
639 | write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
640 | |||
641 | ret; | ||
642 | |||
643 | __enc_xor8: | ||
644 | xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
645 | xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
646 | |||
647 | ret; | ||
648 | |||
649 | .align 8 | ||
650 | .global serpent_dec_blk_8way_avx | ||
651 | .type serpent_dec_blk_8way_avx,@function; | ||
652 | |||
653 | serpent_dec_blk_8way_avx: | ||
654 | /* input: | ||
655 | * %rdi: ctx, CTX | ||
656 | * %rsi: dst | ||
657 | * %rdx: src | ||
658 | */ | ||
659 | |||
660 | vpcmpeqd RNOT, RNOT, RNOT; | ||
661 | |||
662 | leaq (4*4*4)(%rdx), %rax; | ||
663 | read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
664 | read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
665 | |||
666 | K2(RA, RB, RC, RD, RE, 32); | ||
667 | SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); | ||
668 | SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); | ||
669 | SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); | ||
670 | SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); | ||
671 | SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); | ||
672 | SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); | ||
673 | SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); | ||
674 | SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); | ||
675 | SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); | ||
676 | SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); | ||
677 | SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); | ||
678 | SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); | ||
679 | SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); | ||
680 | SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); | ||
681 | SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); | ||
682 | SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); | ||
683 | SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); | ||
684 | SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); | ||
685 | SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); | ||
686 | SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); | ||
687 | SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); | ||
688 | SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); | ||
689 | SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); | ||
690 | SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); | ||
691 | SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); | ||
692 | SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); | ||
693 | SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); | ||
694 | SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); | ||
695 | SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); | ||
696 | SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); | ||
697 | SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); | ||
698 | S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); | ||
699 | |||
700 | leaq (4*4*4)(%rsi), %rax; | ||
701 | write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); | ||
702 | write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); | ||
703 | |||
704 | ret; | ||
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c new file mode 100644 index 00000000000..b36bdac237e --- /dev/null +++ b/arch/x86/crypto/serpent_avx_glue.c | |||
@@ -0,0 +1,636 @@ | |||
1 | /* | ||
2 | * Glue Code for AVX assembler versions of Serpent Cipher | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * Glue code based on serpent_sse2_glue.c by: | ||
8 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/module.h> | ||
28 | #include <linux/hardirq.h> | ||
29 | #include <linux/types.h> | ||
30 | #include <linux/crypto.h> | ||
31 | #include <linux/err.h> | ||
32 | #include <crypto/algapi.h> | ||
33 | #include <crypto/serpent.h> | ||
34 | #include <crypto/cryptd.h> | ||
35 | #include <crypto/b128ops.h> | ||
36 | #include <crypto/ctr.h> | ||
37 | #include <crypto/lrw.h> | ||
38 | #include <crypto/xts.h> | ||
39 | #include <asm/xcr.h> | ||
40 | #include <asm/xsave.h> | ||
41 | #include <asm/crypto/serpent-avx.h> | ||
42 | #include <asm/crypto/ablk_helper.h> | ||
43 | #include <asm/crypto/glue_helper.h> | ||
44 | |||
45 | static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) | ||
46 | { | ||
47 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; | ||
48 | unsigned int j; | ||
49 | |||
50 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) | ||
51 | ivs[j] = src[j]; | ||
52 | |||
53 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
54 | |||
55 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) | ||
56 | u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); | ||
57 | } | ||
58 | |||
59 | static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) | ||
60 | { | ||
61 | be128 ctrblk; | ||
62 | |||
63 | u128_to_be128(&ctrblk, iv); | ||
64 | u128_inc(iv); | ||
65 | |||
66 | __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); | ||
67 | u128_xor(dst, src, (u128 *)&ctrblk); | ||
68 | } | ||
69 | |||
70 | static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, | ||
71 | u128 *iv) | ||
72 | { | ||
73 | be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; | ||
74 | unsigned int i; | ||
75 | |||
76 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { | ||
77 | if (dst != src) | ||
78 | dst[i] = src[i]; | ||
79 | |||
80 | u128_to_be128(&ctrblks[i], iv); | ||
81 | u128_inc(iv); | ||
82 | } | ||
83 | |||
84 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); | ||
85 | } | ||
86 | |||
87 | static const struct common_glue_ctx serpent_enc = { | ||
88 | .num_funcs = 2, | ||
89 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
90 | |||
91 | .funcs = { { | ||
92 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
93 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } | ||
94 | }, { | ||
95 | .num_blocks = 1, | ||
96 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } | ||
97 | } } | ||
98 | }; | ||
99 | |||
100 | static const struct common_glue_ctx serpent_ctr = { | ||
101 | .num_funcs = 2, | ||
102 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
103 | |||
104 | .funcs = { { | ||
105 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
106 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } | ||
107 | }, { | ||
108 | .num_blocks = 1, | ||
109 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } | ||
110 | } } | ||
111 | }; | ||
112 | |||
113 | static const struct common_glue_ctx serpent_dec = { | ||
114 | .num_funcs = 2, | ||
115 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
116 | |||
117 | .funcs = { { | ||
118 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
119 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } | ||
120 | }, { | ||
121 | .num_blocks = 1, | ||
122 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } | ||
123 | } } | ||
124 | }; | ||
125 | |||
126 | static const struct common_glue_ctx serpent_dec_cbc = { | ||
127 | .num_funcs = 2, | ||
128 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
129 | |||
130 | .funcs = { { | ||
131 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
132 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } | ||
133 | }, { | ||
134 | .num_blocks = 1, | ||
135 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } | ||
136 | } } | ||
137 | }; | ||
138 | |||
139 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
140 | struct scatterlist *src, unsigned int nbytes) | ||
141 | { | ||
142 | return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); | ||
143 | } | ||
144 | |||
145 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
146 | struct scatterlist *src, unsigned int nbytes) | ||
147 | { | ||
148 | return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); | ||
149 | } | ||
150 | |||
151 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
152 | struct scatterlist *src, unsigned int nbytes) | ||
153 | { | ||
154 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, | ||
155 | dst, src, nbytes); | ||
156 | } | ||
157 | |||
158 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
159 | struct scatterlist *src, unsigned int nbytes) | ||
160 | { | ||
161 | return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, | ||
162 | nbytes); | ||
163 | } | ||
164 | |||
165 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
166 | struct scatterlist *src, unsigned int nbytes) | ||
167 | { | ||
168 | return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); | ||
169 | } | ||
170 | |||
171 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
172 | { | ||
173 | return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, | ||
174 | NULL, fpu_enabled, nbytes); | ||
175 | } | ||
176 | |||
177 | static inline void serpent_fpu_end(bool fpu_enabled) | ||
178 | { | ||
179 | glue_fpu_end(fpu_enabled); | ||
180 | } | ||
181 | |||
182 | struct crypt_priv { | ||
183 | struct serpent_ctx *ctx; | ||
184 | bool fpu_enabled; | ||
185 | }; | ||
186 | |||
187 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
188 | { | ||
189 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
190 | struct crypt_priv *ctx = priv; | ||
191 | int i; | ||
192 | |||
193 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
194 | |||
195 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | ||
196 | serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); | ||
197 | return; | ||
198 | } | ||
199 | |||
200 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
201 | __serpent_encrypt(ctx->ctx, srcdst, srcdst); | ||
202 | } | ||
203 | |||
204 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
205 | { | ||
206 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
207 | struct crypt_priv *ctx = priv; | ||
208 | int i; | ||
209 | |||
210 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
211 | |||
212 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | ||
213 | serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); | ||
214 | return; | ||
215 | } | ||
216 | |||
217 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
218 | __serpent_decrypt(ctx->ctx, srcdst, srcdst); | ||
219 | } | ||
220 | |||
221 | struct serpent_lrw_ctx { | ||
222 | struct lrw_table_ctx lrw_table; | ||
223 | struct serpent_ctx serpent_ctx; | ||
224 | }; | ||
225 | |||
226 | static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
227 | unsigned int keylen) | ||
228 | { | ||
229 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
230 | int err; | ||
231 | |||
232 | err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - | ||
233 | SERPENT_BLOCK_SIZE); | ||
234 | if (err) | ||
235 | return err; | ||
236 | |||
237 | return lrw_init_table(&ctx->lrw_table, key + keylen - | ||
238 | SERPENT_BLOCK_SIZE); | ||
239 | } | ||
240 | |||
241 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
242 | struct scatterlist *src, unsigned int nbytes) | ||
243 | { | ||
244 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
245 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
246 | struct crypt_priv crypt_ctx = { | ||
247 | .ctx = &ctx->serpent_ctx, | ||
248 | .fpu_enabled = false, | ||
249 | }; | ||
250 | struct lrw_crypt_req req = { | ||
251 | .tbuf = buf, | ||
252 | .tbuflen = sizeof(buf), | ||
253 | |||
254 | .table_ctx = &ctx->lrw_table, | ||
255 | .crypt_ctx = &crypt_ctx, | ||
256 | .crypt_fn = encrypt_callback, | ||
257 | }; | ||
258 | int ret; | ||
259 | |||
260 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
261 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
262 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
263 | |||
264 | return ret; | ||
265 | } | ||
266 | |||
267 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
268 | struct scatterlist *src, unsigned int nbytes) | ||
269 | { | ||
270 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
271 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
272 | struct crypt_priv crypt_ctx = { | ||
273 | .ctx = &ctx->serpent_ctx, | ||
274 | .fpu_enabled = false, | ||
275 | }; | ||
276 | struct lrw_crypt_req req = { | ||
277 | .tbuf = buf, | ||
278 | .tbuflen = sizeof(buf), | ||
279 | |||
280 | .table_ctx = &ctx->lrw_table, | ||
281 | .crypt_ctx = &crypt_ctx, | ||
282 | .crypt_fn = decrypt_callback, | ||
283 | }; | ||
284 | int ret; | ||
285 | |||
286 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
287 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
288 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
289 | |||
290 | return ret; | ||
291 | } | ||
292 | |||
293 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | ||
294 | { | ||
295 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
296 | |||
297 | lrw_free_table(&ctx->lrw_table); | ||
298 | } | ||
299 | |||
300 | struct serpent_xts_ctx { | ||
301 | struct serpent_ctx tweak_ctx; | ||
302 | struct serpent_ctx crypt_ctx; | ||
303 | }; | ||
304 | |||
305 | static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
306 | unsigned int keylen) | ||
307 | { | ||
308 | struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
309 | u32 *flags = &tfm->crt_flags; | ||
310 | int err; | ||
311 | |||
312 | /* key consists of keys of equal size concatenated, therefore | ||
313 | * the length must be even | ||
314 | */ | ||
315 | if (keylen % 2) { | ||
316 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
317 | return -EINVAL; | ||
318 | } | ||
319 | |||
320 | /* first half of xts-key is for crypt */ | ||
321 | err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); | ||
322 | if (err) | ||
323 | return err; | ||
324 | |||
325 | /* second half of xts-key is for tweak */ | ||
326 | return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); | ||
327 | } | ||
328 | |||
329 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
330 | struct scatterlist *src, unsigned int nbytes) | ||
331 | { | ||
332 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
333 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
334 | struct crypt_priv crypt_ctx = { | ||
335 | .ctx = &ctx->crypt_ctx, | ||
336 | .fpu_enabled = false, | ||
337 | }; | ||
338 | struct xts_crypt_req req = { | ||
339 | .tbuf = buf, | ||
340 | .tbuflen = sizeof(buf), | ||
341 | |||
342 | .tweak_ctx = &ctx->tweak_ctx, | ||
343 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
344 | .crypt_ctx = &crypt_ctx, | ||
345 | .crypt_fn = encrypt_callback, | ||
346 | }; | ||
347 | int ret; | ||
348 | |||
349 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
350 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
351 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
352 | |||
353 | return ret; | ||
354 | } | ||
355 | |||
356 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
357 | struct scatterlist *src, unsigned int nbytes) | ||
358 | { | ||
359 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
360 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
361 | struct crypt_priv crypt_ctx = { | ||
362 | .ctx = &ctx->crypt_ctx, | ||
363 | .fpu_enabled = false, | ||
364 | }; | ||
365 | struct xts_crypt_req req = { | ||
366 | .tbuf = buf, | ||
367 | .tbuflen = sizeof(buf), | ||
368 | |||
369 | .tweak_ctx = &ctx->tweak_ctx, | ||
370 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
371 | .crypt_ctx = &crypt_ctx, | ||
372 | .crypt_fn = decrypt_callback, | ||
373 | }; | ||
374 | int ret; | ||
375 | |||
376 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
377 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
378 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
379 | |||
380 | return ret; | ||
381 | } | ||
382 | |||
383 | static struct crypto_alg serpent_algs[10] = { { | ||
384 | .cra_name = "__ecb-serpent-avx", | ||
385 | .cra_driver_name = "__driver-ecb-serpent-avx", | ||
386 | .cra_priority = 0, | ||
387 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
388 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
389 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
390 | .cra_alignmask = 0, | ||
391 | .cra_type = &crypto_blkcipher_type, | ||
392 | .cra_module = THIS_MODULE, | ||
393 | .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list), | ||
394 | .cra_u = { | ||
395 | .blkcipher = { | ||
396 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
397 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
398 | .setkey = serpent_setkey, | ||
399 | .encrypt = ecb_encrypt, | ||
400 | .decrypt = ecb_decrypt, | ||
401 | }, | ||
402 | }, | ||
403 | }, { | ||
404 | .cra_name = "__cbc-serpent-avx", | ||
405 | .cra_driver_name = "__driver-cbc-serpent-avx", | ||
406 | .cra_priority = 0, | ||
407 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
408 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
409 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
410 | .cra_alignmask = 0, | ||
411 | .cra_type = &crypto_blkcipher_type, | ||
412 | .cra_module = THIS_MODULE, | ||
413 | .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list), | ||
414 | .cra_u = { | ||
415 | .blkcipher = { | ||
416 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
417 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
418 | .setkey = serpent_setkey, | ||
419 | .encrypt = cbc_encrypt, | ||
420 | .decrypt = cbc_decrypt, | ||
421 | }, | ||
422 | }, | ||
423 | }, { | ||
424 | .cra_name = "__ctr-serpent-avx", | ||
425 | .cra_driver_name = "__driver-ctr-serpent-avx", | ||
426 | .cra_priority = 0, | ||
427 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
428 | .cra_blocksize = 1, | ||
429 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
430 | .cra_alignmask = 0, | ||
431 | .cra_type = &crypto_blkcipher_type, | ||
432 | .cra_module = THIS_MODULE, | ||
433 | .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list), | ||
434 | .cra_u = { | ||
435 | .blkcipher = { | ||
436 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
437 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
438 | .ivsize = SERPENT_BLOCK_SIZE, | ||
439 | .setkey = serpent_setkey, | ||
440 | .encrypt = ctr_crypt, | ||
441 | .decrypt = ctr_crypt, | ||
442 | }, | ||
443 | }, | ||
444 | }, { | ||
445 | .cra_name = "__lrw-serpent-avx", | ||
446 | .cra_driver_name = "__driver-lrw-serpent-avx", | ||
447 | .cra_priority = 0, | ||
448 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
449 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
450 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | ||
451 | .cra_alignmask = 0, | ||
452 | .cra_type = &crypto_blkcipher_type, | ||
453 | .cra_module = THIS_MODULE, | ||
454 | .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list), | ||
455 | .cra_exit = lrw_exit_tfm, | ||
456 | .cra_u = { | ||
457 | .blkcipher = { | ||
458 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
459 | SERPENT_BLOCK_SIZE, | ||
460 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
461 | SERPENT_BLOCK_SIZE, | ||
462 | .ivsize = SERPENT_BLOCK_SIZE, | ||
463 | .setkey = lrw_serpent_setkey, | ||
464 | .encrypt = lrw_encrypt, | ||
465 | .decrypt = lrw_decrypt, | ||
466 | }, | ||
467 | }, | ||
468 | }, { | ||
469 | .cra_name = "__xts-serpent-avx", | ||
470 | .cra_driver_name = "__driver-xts-serpent-avx", | ||
471 | .cra_priority = 0, | ||
472 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
473 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
474 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | ||
475 | .cra_alignmask = 0, | ||
476 | .cra_type = &crypto_blkcipher_type, | ||
477 | .cra_module = THIS_MODULE, | ||
478 | .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list), | ||
479 | .cra_u = { | ||
480 | .blkcipher = { | ||
481 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
482 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
483 | .ivsize = SERPENT_BLOCK_SIZE, | ||
484 | .setkey = xts_serpent_setkey, | ||
485 | .encrypt = xts_encrypt, | ||
486 | .decrypt = xts_decrypt, | ||
487 | }, | ||
488 | }, | ||
489 | }, { | ||
490 | .cra_name = "ecb(serpent)", | ||
491 | .cra_driver_name = "ecb-serpent-avx", | ||
492 | .cra_priority = 500, | ||
493 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
494 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
495 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
496 | .cra_alignmask = 0, | ||
497 | .cra_type = &crypto_ablkcipher_type, | ||
498 | .cra_module = THIS_MODULE, | ||
499 | .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list), | ||
500 | .cra_init = ablk_init, | ||
501 | .cra_exit = ablk_exit, | ||
502 | .cra_u = { | ||
503 | .ablkcipher = { | ||
504 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
505 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
506 | .setkey = ablk_set_key, | ||
507 | .encrypt = ablk_encrypt, | ||
508 | .decrypt = ablk_decrypt, | ||
509 | }, | ||
510 | }, | ||
511 | }, { | ||
512 | .cra_name = "cbc(serpent)", | ||
513 | .cra_driver_name = "cbc-serpent-avx", | ||
514 | .cra_priority = 500, | ||
515 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
516 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
517 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
518 | .cra_alignmask = 0, | ||
519 | .cra_type = &crypto_ablkcipher_type, | ||
520 | .cra_module = THIS_MODULE, | ||
521 | .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list), | ||
522 | .cra_init = ablk_init, | ||
523 | .cra_exit = ablk_exit, | ||
524 | .cra_u = { | ||
525 | .ablkcipher = { | ||
526 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
527 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
528 | .ivsize = SERPENT_BLOCK_SIZE, | ||
529 | .setkey = ablk_set_key, | ||
530 | .encrypt = __ablk_encrypt, | ||
531 | .decrypt = ablk_decrypt, | ||
532 | }, | ||
533 | }, | ||
534 | }, { | ||
535 | .cra_name = "ctr(serpent)", | ||
536 | .cra_driver_name = "ctr-serpent-avx", | ||
537 | .cra_priority = 500, | ||
538 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
539 | .cra_blocksize = 1, | ||
540 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
541 | .cra_alignmask = 0, | ||
542 | .cra_type = &crypto_ablkcipher_type, | ||
543 | .cra_module = THIS_MODULE, | ||
544 | .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list), | ||
545 | .cra_init = ablk_init, | ||
546 | .cra_exit = ablk_exit, | ||
547 | .cra_u = { | ||
548 | .ablkcipher = { | ||
549 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
550 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
551 | .ivsize = SERPENT_BLOCK_SIZE, | ||
552 | .setkey = ablk_set_key, | ||
553 | .encrypt = ablk_encrypt, | ||
554 | .decrypt = ablk_encrypt, | ||
555 | .geniv = "chainiv", | ||
556 | }, | ||
557 | }, | ||
558 | }, { | ||
559 | .cra_name = "lrw(serpent)", | ||
560 | .cra_driver_name = "lrw-serpent-avx", | ||
561 | .cra_priority = 500, | ||
562 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
563 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
564 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
565 | .cra_alignmask = 0, | ||
566 | .cra_type = &crypto_ablkcipher_type, | ||
567 | .cra_module = THIS_MODULE, | ||
568 | .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list), | ||
569 | .cra_init = ablk_init, | ||
570 | .cra_exit = ablk_exit, | ||
571 | .cra_u = { | ||
572 | .ablkcipher = { | ||
573 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
574 | SERPENT_BLOCK_SIZE, | ||
575 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
576 | SERPENT_BLOCK_SIZE, | ||
577 | .ivsize = SERPENT_BLOCK_SIZE, | ||
578 | .setkey = ablk_set_key, | ||
579 | .encrypt = ablk_encrypt, | ||
580 | .decrypt = ablk_decrypt, | ||
581 | }, | ||
582 | }, | ||
583 | }, { | ||
584 | .cra_name = "xts(serpent)", | ||
585 | .cra_driver_name = "xts-serpent-avx", | ||
586 | .cra_priority = 500, | ||
587 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
588 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
589 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
590 | .cra_alignmask = 0, | ||
591 | .cra_type = &crypto_ablkcipher_type, | ||
592 | .cra_module = THIS_MODULE, | ||
593 | .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list), | ||
594 | .cra_init = ablk_init, | ||
595 | .cra_exit = ablk_exit, | ||
596 | .cra_u = { | ||
597 | .ablkcipher = { | ||
598 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
599 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
600 | .ivsize = SERPENT_BLOCK_SIZE, | ||
601 | .setkey = ablk_set_key, | ||
602 | .encrypt = ablk_encrypt, | ||
603 | .decrypt = ablk_decrypt, | ||
604 | }, | ||
605 | }, | ||
606 | } }; | ||
607 | |||
608 | static int __init serpent_init(void) | ||
609 | { | ||
610 | u64 xcr0; | ||
611 | |||
612 | if (!cpu_has_avx || !cpu_has_osxsave) { | ||
613 | printk(KERN_INFO "AVX instructions are not detected.\n"); | ||
614 | return -ENODEV; | ||
615 | } | ||
616 | |||
617 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
618 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
619 | printk(KERN_INFO "AVX detected but unusable.\n"); | ||
620 | return -ENODEV; | ||
621 | } | ||
622 | |||
623 | return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); | ||
624 | } | ||
625 | |||
626 | static void __exit serpent_exit(void) | ||
627 | { | ||
628 | crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); | ||
629 | } | ||
630 | |||
631 | module_init(serpent_init); | ||
632 | module_exit(serpent_exit); | ||
633 | |||
634 | MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized"); | ||
635 | MODULE_LICENSE("GPL"); | ||
636 | MODULE_ALIAS("serpent"); | ||
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 4b21be85e0a..d679c8675f4 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c | |||
@@ -41,358 +41,145 @@ | |||
41 | #include <crypto/ctr.h> | 41 | #include <crypto/ctr.h> |
42 | #include <crypto/lrw.h> | 42 | #include <crypto/lrw.h> |
43 | #include <crypto/xts.h> | 43 | #include <crypto/xts.h> |
44 | #include <asm/i387.h> | 44 | #include <asm/crypto/serpent-sse2.h> |
45 | #include <asm/serpent.h> | 45 | #include <asm/crypto/ablk_helper.h> |
46 | #include <crypto/scatterwalk.h> | 46 | #include <asm/crypto/glue_helper.h> |
47 | #include <linux/workqueue.h> | ||
48 | #include <linux/spinlock.h> | ||
49 | |||
50 | struct async_serpent_ctx { | ||
51 | struct cryptd_ablkcipher *cryptd_tfm; | ||
52 | }; | ||
53 | 47 | ||
54 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) | 48 | static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) |
55 | { | ||
56 | if (fpu_enabled) | ||
57 | return true; | ||
58 | |||
59 | /* SSE2 is only used when chunk to be processed is large enough, so | ||
60 | * do not enable FPU until it is necessary. | ||
61 | */ | ||
62 | if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS) | ||
63 | return false; | ||
64 | |||
65 | kernel_fpu_begin(); | ||
66 | return true; | ||
67 | } | ||
68 | |||
69 | static inline void serpent_fpu_end(bool fpu_enabled) | ||
70 | { | 49 | { |
71 | if (fpu_enabled) | 50 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; |
72 | kernel_fpu_end(); | 51 | unsigned int j; |
73 | } | ||
74 | |||
75 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | ||
76 | bool enc) | ||
77 | { | ||
78 | bool fpu_enabled = false; | ||
79 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
80 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
81 | unsigned int nbytes; | ||
82 | int err; | ||
83 | |||
84 | err = blkcipher_walk_virt(desc, walk); | ||
85 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
86 | |||
87 | while ((nbytes = walk->nbytes)) { | ||
88 | u8 *wsrc = walk->src.virt.addr; | ||
89 | u8 *wdst = walk->dst.virt.addr; | ||
90 | |||
91 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
92 | |||
93 | /* Process multi-block batch */ | ||
94 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
95 | do { | ||
96 | if (enc) | ||
97 | serpent_enc_blk_xway(ctx, wdst, wsrc); | ||
98 | else | ||
99 | serpent_dec_blk_xway(ctx, wdst, wsrc); | ||
100 | |||
101 | wsrc += bsize * SERPENT_PARALLEL_BLOCKS; | ||
102 | wdst += bsize * SERPENT_PARALLEL_BLOCKS; | ||
103 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | ||
104 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | ||
105 | |||
106 | if (nbytes < bsize) | ||
107 | goto done; | ||
108 | } | ||
109 | |||
110 | /* Handle leftovers */ | ||
111 | do { | ||
112 | if (enc) | ||
113 | __serpent_encrypt(ctx, wdst, wsrc); | ||
114 | else | ||
115 | __serpent_decrypt(ctx, wdst, wsrc); | ||
116 | |||
117 | wsrc += bsize; | ||
118 | wdst += bsize; | ||
119 | nbytes -= bsize; | ||
120 | } while (nbytes >= bsize); | ||
121 | |||
122 | done: | ||
123 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
124 | } | ||
125 | 52 | ||
126 | serpent_fpu_end(fpu_enabled); | 53 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) |
127 | return err; | 54 | ivs[j] = src[j]; |
128 | } | ||
129 | 55 | ||
130 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 56 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); |
131 | struct scatterlist *src, unsigned int nbytes) | ||
132 | { | ||
133 | struct blkcipher_walk walk; | ||
134 | 57 | ||
135 | blkcipher_walk_init(&walk, dst, src, nbytes); | 58 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) |
136 | return ecb_crypt(desc, &walk, true); | 59 | u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); |
137 | } | 60 | } |
138 | 61 | ||
139 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 62 | static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) |
140 | struct scatterlist *src, unsigned int nbytes) | ||
141 | { | 63 | { |
142 | struct blkcipher_walk walk; | 64 | be128 ctrblk; |
143 | 65 | ||
144 | blkcipher_walk_init(&walk, dst, src, nbytes); | 66 | u128_to_be128(&ctrblk, iv); |
145 | return ecb_crypt(desc, &walk, false); | 67 | u128_inc(iv); |
146 | } | ||
147 | 68 | ||
148 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | 69 | __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); |
149 | struct blkcipher_walk *walk) | 70 | u128_xor(dst, src, (u128 *)&ctrblk); |
150 | { | ||
151 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
152 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
153 | unsigned int nbytes = walk->nbytes; | ||
154 | u128 *src = (u128 *)walk->src.virt.addr; | ||
155 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
156 | u128 *iv = (u128 *)walk->iv; | ||
157 | |||
158 | do { | ||
159 | u128_xor(dst, src, iv); | ||
160 | __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst); | ||
161 | iv = dst; | ||
162 | |||
163 | src += 1; | ||
164 | dst += 1; | ||
165 | nbytes -= bsize; | ||
166 | } while (nbytes >= bsize); | ||
167 | |||
168 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
169 | return nbytes; | ||
170 | } | 71 | } |
171 | 72 | ||
172 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 73 | static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, |
173 | struct scatterlist *src, unsigned int nbytes) | 74 | u128 *iv) |
174 | { | 75 | { |
175 | struct blkcipher_walk walk; | 76 | be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; |
176 | int err; | 77 | unsigned int i; |
177 | 78 | ||
178 | blkcipher_walk_init(&walk, dst, src, nbytes); | 79 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { |
179 | err = blkcipher_walk_virt(desc, &walk); | 80 | if (dst != src) |
81 | dst[i] = src[i]; | ||
180 | 82 | ||
181 | while ((nbytes = walk.nbytes)) { | 83 | u128_to_be128(&ctrblks[i], iv); |
182 | nbytes = __cbc_encrypt(desc, &walk); | 84 | u128_inc(iv); |
183 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
184 | } | 85 | } |
185 | 86 | ||
186 | return err; | 87 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); |
187 | } | 88 | } |
188 | 89 | ||
189 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | 90 | static const struct common_glue_ctx serpent_enc = { |
190 | struct blkcipher_walk *walk) | 91 | .num_funcs = 2, |
191 | { | 92 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, |
192 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
193 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
194 | unsigned int nbytes = walk->nbytes; | ||
195 | u128 *src = (u128 *)walk->src.virt.addr; | ||
196 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
197 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; | ||
198 | u128 last_iv; | ||
199 | int i; | ||
200 | |||
201 | /* Start of the last block. */ | ||
202 | src += nbytes / bsize - 1; | ||
203 | dst += nbytes / bsize - 1; | ||
204 | |||
205 | last_iv = *src; | ||
206 | |||
207 | /* Process multi-block batch */ | ||
208 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
209 | do { | ||
210 | nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1); | ||
211 | src -= SERPENT_PARALLEL_BLOCKS - 1; | ||
212 | dst -= SERPENT_PARALLEL_BLOCKS - 1; | ||
213 | |||
214 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) | ||
215 | ivs[i] = src[i]; | ||
216 | |||
217 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
218 | |||
219 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) | ||
220 | u128_xor(dst + (i + 1), dst + (i + 1), ivs + i); | ||
221 | |||
222 | nbytes -= bsize; | ||
223 | if (nbytes < bsize) | ||
224 | goto done; | ||
225 | 93 | ||
226 | u128_xor(dst, dst, src - 1); | 94 | .funcs = { { |
227 | src -= 1; | 95 | .num_blocks = SERPENT_PARALLEL_BLOCKS, |
228 | dst -= 1; | 96 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } |
229 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | 97 | }, { |
230 | 98 | .num_blocks = 1, | |
231 | if (nbytes < bsize) | 99 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } |
232 | goto done; | 100 | } } |
233 | } | 101 | }; |
234 | |||
235 | /* Handle leftovers */ | ||
236 | for (;;) { | ||
237 | __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src); | ||
238 | |||
239 | nbytes -= bsize; | ||
240 | if (nbytes < bsize) | ||
241 | break; | ||
242 | 102 | ||
243 | u128_xor(dst, dst, src - 1); | 103 | static const struct common_glue_ctx serpent_ctr = { |
244 | src -= 1; | 104 | .num_funcs = 2, |
245 | dst -= 1; | 105 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, |
246 | } | 106 | |
107 | .funcs = { { | ||
108 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
109 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } | ||
110 | }, { | ||
111 | .num_blocks = 1, | ||
112 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } | ||
113 | } } | ||
114 | }; | ||
247 | 115 | ||
248 | done: | 116 | static const struct common_glue_ctx serpent_dec = { |
249 | u128_xor(dst, dst, (u128 *)walk->iv); | 117 | .num_funcs = 2, |
250 | *(u128 *)walk->iv = last_iv; | 118 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, |
119 | |||
120 | .funcs = { { | ||
121 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
122 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } | ||
123 | }, { | ||
124 | .num_blocks = 1, | ||
125 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } | ||
126 | } } | ||
127 | }; | ||
251 | 128 | ||
252 | return nbytes; | 129 | static const struct common_glue_ctx serpent_dec_cbc = { |
253 | } | 130 | .num_funcs = 2, |
131 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
132 | |||
133 | .funcs = { { | ||
134 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
135 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } | ||
136 | }, { | ||
137 | .num_blocks = 1, | ||
138 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } | ||
139 | } } | ||
140 | }; | ||
254 | 141 | ||
255 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 142 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
256 | struct scatterlist *src, unsigned int nbytes) | 143 | struct scatterlist *src, unsigned int nbytes) |
257 | { | 144 | { |
258 | bool fpu_enabled = false; | 145 | return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); |
259 | struct blkcipher_walk walk; | ||
260 | int err; | ||
261 | |||
262 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
263 | err = blkcipher_walk_virt(desc, &walk); | ||
264 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
265 | |||
266 | while ((nbytes = walk.nbytes)) { | ||
267 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
268 | nbytes = __cbc_decrypt(desc, &walk); | ||
269 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
270 | } | ||
271 | |||
272 | serpent_fpu_end(fpu_enabled); | ||
273 | return err; | ||
274 | } | 146 | } |
275 | 147 | ||
276 | static inline void u128_to_be128(be128 *dst, const u128 *src) | 148 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
149 | struct scatterlist *src, unsigned int nbytes) | ||
277 | { | 150 | { |
278 | dst->a = cpu_to_be64(src->a); | 151 | return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); |
279 | dst->b = cpu_to_be64(src->b); | ||
280 | } | 152 | } |
281 | 153 | ||
282 | static inline void be128_to_u128(u128 *dst, const be128 *src) | 154 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
155 | struct scatterlist *src, unsigned int nbytes) | ||
283 | { | 156 | { |
284 | dst->a = be64_to_cpu(src->a); | 157 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, |
285 | dst->b = be64_to_cpu(src->b); | 158 | dst, src, nbytes); |
286 | } | 159 | } |
287 | 160 | ||
288 | static inline void u128_inc(u128 *i) | 161 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
162 | struct scatterlist *src, unsigned int nbytes) | ||
289 | { | 163 | { |
290 | i->b++; | 164 | return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, |
291 | if (!i->b) | 165 | nbytes); |
292 | i->a++; | ||
293 | } | 166 | } |
294 | 167 | ||
295 | static void ctr_crypt_final(struct blkcipher_desc *desc, | 168 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
296 | struct blkcipher_walk *walk) | 169 | struct scatterlist *src, unsigned int nbytes) |
297 | { | 170 | { |
298 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 171 | return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); |
299 | u8 *ctrblk = walk->iv; | ||
300 | u8 keystream[SERPENT_BLOCK_SIZE]; | ||
301 | u8 *src = walk->src.virt.addr; | ||
302 | u8 *dst = walk->dst.virt.addr; | ||
303 | unsigned int nbytes = walk->nbytes; | ||
304 | |||
305 | __serpent_encrypt(ctx, keystream, ctrblk); | ||
306 | crypto_xor(keystream, src, nbytes); | ||
307 | memcpy(dst, keystream, nbytes); | ||
308 | |||
309 | crypto_inc(ctrblk, SERPENT_BLOCK_SIZE); | ||
310 | } | 172 | } |
311 | 173 | ||
312 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | 174 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) |
313 | struct blkcipher_walk *walk) | ||
314 | { | 175 | { |
315 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 176 | return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, |
316 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | 177 | NULL, fpu_enabled, nbytes); |
317 | unsigned int nbytes = walk->nbytes; | ||
318 | u128 *src = (u128 *)walk->src.virt.addr; | ||
319 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
320 | u128 ctrblk; | ||
321 | be128 ctrblocks[SERPENT_PARALLEL_BLOCKS]; | ||
322 | int i; | ||
323 | |||
324 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
325 | |||
326 | /* Process multi-block batch */ | ||
327 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
328 | do { | ||
329 | /* create ctrblks for parallel encrypt */ | ||
330 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { | ||
331 | if (dst != src) | ||
332 | dst[i] = src[i]; | ||
333 | |||
334 | u128_to_be128(&ctrblocks[i], &ctrblk); | ||
335 | u128_inc(&ctrblk); | ||
336 | } | ||
337 | |||
338 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, | ||
339 | (u8 *)ctrblocks); | ||
340 | |||
341 | src += SERPENT_PARALLEL_BLOCKS; | ||
342 | dst += SERPENT_PARALLEL_BLOCKS; | ||
343 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | ||
344 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | ||
345 | |||
346 | if (nbytes < bsize) | ||
347 | goto done; | ||
348 | } | ||
349 | |||
350 | /* Handle leftovers */ | ||
351 | do { | ||
352 | if (dst != src) | ||
353 | *dst = *src; | ||
354 | |||
355 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
356 | u128_inc(&ctrblk); | ||
357 | |||
358 | __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); | ||
359 | u128_xor(dst, dst, (u128 *)ctrblocks); | ||
360 | |||
361 | src += 1; | ||
362 | dst += 1; | ||
363 | nbytes -= bsize; | ||
364 | } while (nbytes >= bsize); | ||
365 | |||
366 | done: | ||
367 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
368 | return nbytes; | ||
369 | } | 178 | } |
370 | 179 | ||
371 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 180 | static inline void serpent_fpu_end(bool fpu_enabled) |
372 | struct scatterlist *src, unsigned int nbytes) | ||
373 | { | 181 | { |
374 | bool fpu_enabled = false; | 182 | glue_fpu_end(fpu_enabled); |
375 | struct blkcipher_walk walk; | ||
376 | int err; | ||
377 | |||
378 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
379 | err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE); | ||
380 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
381 | |||
382 | while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) { | ||
383 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
384 | nbytes = __ctr_crypt(desc, &walk); | ||
385 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
386 | } | ||
387 | |||
388 | serpent_fpu_end(fpu_enabled); | ||
389 | |||
390 | if (walk.nbytes) { | ||
391 | ctr_crypt_final(desc, &walk); | ||
392 | err = blkcipher_walk_done(desc, &walk, 0); | ||
393 | } | ||
394 | |||
395 | return err; | ||
396 | } | 183 | } |
397 | 184 | ||
398 | struct crypt_priv { | 185 | struct crypt_priv { |
@@ -596,106 +383,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
596 | return ret; | 383 | return ret; |
597 | } | 384 | } |
598 | 385 | ||
599 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
600 | unsigned int key_len) | ||
601 | { | ||
602 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
603 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; | ||
604 | int err; | ||
605 | |||
606 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
607 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) | ||
608 | & CRYPTO_TFM_REQ_MASK); | ||
609 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
610 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
611 | & CRYPTO_TFM_RES_MASK); | ||
612 | return err; | ||
613 | } | ||
614 | |||
615 | static int __ablk_encrypt(struct ablkcipher_request *req) | ||
616 | { | ||
617 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
618 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
619 | struct blkcipher_desc desc; | ||
620 | |||
621 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
622 | desc.info = req->info; | ||
623 | desc.flags = 0; | ||
624 | |||
625 | return crypto_blkcipher_crt(desc.tfm)->encrypt( | ||
626 | &desc, req->dst, req->src, req->nbytes); | ||
627 | } | ||
628 | |||
629 | static int ablk_encrypt(struct ablkcipher_request *req) | ||
630 | { | ||
631 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
632 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
633 | |||
634 | if (!irq_fpu_usable()) { | ||
635 | struct ablkcipher_request *cryptd_req = | ||
636 | ablkcipher_request_ctx(req); | ||
637 | |||
638 | memcpy(cryptd_req, req, sizeof(*req)); | ||
639 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
640 | |||
641 | return crypto_ablkcipher_encrypt(cryptd_req); | ||
642 | } else { | ||
643 | return __ablk_encrypt(req); | ||
644 | } | ||
645 | } | ||
646 | |||
647 | static int ablk_decrypt(struct ablkcipher_request *req) | ||
648 | { | ||
649 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
650 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
651 | |||
652 | if (!irq_fpu_usable()) { | ||
653 | struct ablkcipher_request *cryptd_req = | ||
654 | ablkcipher_request_ctx(req); | ||
655 | |||
656 | memcpy(cryptd_req, req, sizeof(*req)); | ||
657 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
658 | |||
659 | return crypto_ablkcipher_decrypt(cryptd_req); | ||
660 | } else { | ||
661 | struct blkcipher_desc desc; | ||
662 | |||
663 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
664 | desc.info = req->info; | ||
665 | desc.flags = 0; | ||
666 | |||
667 | return crypto_blkcipher_crt(desc.tfm)->decrypt( | ||
668 | &desc, req->dst, req->src, req->nbytes); | ||
669 | } | ||
670 | } | ||
671 | |||
672 | static void ablk_exit(struct crypto_tfm *tfm) | ||
673 | { | ||
674 | struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); | ||
675 | |||
676 | cryptd_free_ablkcipher(ctx->cryptd_tfm); | ||
677 | } | ||
678 | |||
679 | static int ablk_init(struct crypto_tfm *tfm) | ||
680 | { | ||
681 | struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); | ||
682 | struct cryptd_ablkcipher *cryptd_tfm; | ||
683 | char drv_name[CRYPTO_MAX_ALG_NAME]; | ||
684 | |||
685 | snprintf(drv_name, sizeof(drv_name), "__driver-%s", | ||
686 | crypto_tfm_alg_driver_name(tfm)); | ||
687 | |||
688 | cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); | ||
689 | if (IS_ERR(cryptd_tfm)) | ||
690 | return PTR_ERR(cryptd_tfm); | ||
691 | |||
692 | ctx->cryptd_tfm = cryptd_tfm; | ||
693 | tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + | ||
694 | crypto_ablkcipher_reqsize(&cryptd_tfm->base); | ||
695 | |||
696 | return 0; | ||
697 | } | ||
698 | |||
699 | static struct crypto_alg serpent_algs[10] = { { | 386 | static struct crypto_alg serpent_algs[10] = { { |
700 | .cra_name = "__ecb-serpent-sse2", | 387 | .cra_name = "__ecb-serpent-sse2", |
701 | .cra_driver_name = "__driver-ecb-serpent-sse2", | 388 | .cra_driver_name = "__driver-ecb-serpent-sse2", |
@@ -808,7 +495,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
808 | .cra_priority = 400, | 495 | .cra_priority = 400, |
809 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 496 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
810 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 497 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
811 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 498 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
812 | .cra_alignmask = 0, | 499 | .cra_alignmask = 0, |
813 | .cra_type = &crypto_ablkcipher_type, | 500 | .cra_type = &crypto_ablkcipher_type, |
814 | .cra_module = THIS_MODULE, | 501 | .cra_module = THIS_MODULE, |
@@ -830,7 +517,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
830 | .cra_priority = 400, | 517 | .cra_priority = 400, |
831 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 518 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
832 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 519 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
833 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 520 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
834 | .cra_alignmask = 0, | 521 | .cra_alignmask = 0, |
835 | .cra_type = &crypto_ablkcipher_type, | 522 | .cra_type = &crypto_ablkcipher_type, |
836 | .cra_module = THIS_MODULE, | 523 | .cra_module = THIS_MODULE, |
@@ -853,7 +540,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
853 | .cra_priority = 400, | 540 | .cra_priority = 400, |
854 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 541 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
855 | .cra_blocksize = 1, | 542 | .cra_blocksize = 1, |
856 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 543 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
857 | .cra_alignmask = 0, | 544 | .cra_alignmask = 0, |
858 | .cra_type = &crypto_ablkcipher_type, | 545 | .cra_type = &crypto_ablkcipher_type, |
859 | .cra_module = THIS_MODULE, | 546 | .cra_module = THIS_MODULE, |
@@ -877,7 +564,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
877 | .cra_priority = 400, | 564 | .cra_priority = 400, |
878 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 565 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
879 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 566 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
880 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 567 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
881 | .cra_alignmask = 0, | 568 | .cra_alignmask = 0, |
882 | .cra_type = &crypto_ablkcipher_type, | 569 | .cra_type = &crypto_ablkcipher_type, |
883 | .cra_module = THIS_MODULE, | 570 | .cra_module = THIS_MODULE, |
@@ -902,7 +589,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
902 | .cra_priority = 400, | 589 | .cra_priority = 400, |
903 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 590 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
904 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 591 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
905 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 592 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
906 | .cra_alignmask = 0, | 593 | .cra_alignmask = 0, |
907 | .cra_type = &crypto_ablkcipher_type, | 594 | .cra_type = &crypto_ablkcipher_type, |
908 | .cra_module = THIS_MODULE, | 595 | .cra_module = THIS_MODULE, |
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index b2c2f57d70e..49d6987a73d 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S | |||
@@ -468,7 +468,7 @@ W_PRECALC_SSSE3 | |||
468 | */ | 468 | */ |
469 | SHA1_VECTOR_ASM sha1_transform_ssse3 | 469 | SHA1_VECTOR_ASM sha1_transform_ssse3 |
470 | 470 | ||
471 | #ifdef SHA1_ENABLE_AVX_SUPPORT | 471 | #ifdef CONFIG_AS_AVX |
472 | 472 | ||
473 | .macro W_PRECALC_AVX | 473 | .macro W_PRECALC_AVX |
474 | 474 | ||
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index f916499d0ab..4a11a9d7245 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c | |||
@@ -35,7 +35,7 @@ | |||
35 | 35 | ||
36 | asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, | 36 | asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, |
37 | unsigned int rounds); | 37 | unsigned int rounds); |
38 | #ifdef SHA1_ENABLE_AVX_SUPPORT | 38 | #ifdef CONFIG_AS_AVX |
39 | asmlinkage void sha1_transform_avx(u32 *digest, const char *data, | 39 | asmlinkage void sha1_transform_avx(u32 *digest, const char *data, |
40 | unsigned int rounds); | 40 | unsigned int rounds); |
41 | #endif | 41 | #endif |
@@ -184,7 +184,7 @@ static struct shash_alg alg = { | |||
184 | } | 184 | } |
185 | }; | 185 | }; |
186 | 186 | ||
187 | #ifdef SHA1_ENABLE_AVX_SUPPORT | 187 | #ifdef CONFIG_AS_AVX |
188 | static bool __init avx_usable(void) | 188 | static bool __init avx_usable(void) |
189 | { | 189 | { |
190 | u64 xcr0; | 190 | u64 xcr0; |
@@ -209,7 +209,7 @@ static int __init sha1_ssse3_mod_init(void) | |||
209 | if (cpu_has_ssse3) | 209 | if (cpu_has_ssse3) |
210 | sha1_transform_asm = sha1_transform_ssse3; | 210 | sha1_transform_asm = sha1_transform_ssse3; |
211 | 211 | ||
212 | #ifdef SHA1_ENABLE_AVX_SUPPORT | 212 | #ifdef CONFIG_AS_AVX |
213 | /* allow AVX to override SSSE3, it's a little faster */ | 213 | /* allow AVX to override SSSE3, it's a little faster */ |
214 | if (avx_usable()) | 214 | if (avx_usable()) |
215 | sha1_transform_asm = sha1_transform_avx; | 215 | sha1_transform_asm = sha1_transform_avx; |
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S new file mode 100644 index 00000000000..35f45574390 --- /dev/null +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S | |||
@@ -0,0 +1,300 @@ | |||
1 | /* | ||
2 | * Twofish Cipher 8-way parallel algorithm (AVX/x86_64) | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
20 | * USA | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | .file "twofish-avx-x86_64-asm_64.S" | ||
25 | .text | ||
26 | |||
27 | /* structure of crypto context */ | ||
28 | #define s0 0 | ||
29 | #define s1 1024 | ||
30 | #define s2 2048 | ||
31 | #define s3 3072 | ||
32 | #define w 4096 | ||
33 | #define k 4128 | ||
34 | |||
35 | /********************************************************************** | ||
36 | 8-way AVX twofish | ||
37 | **********************************************************************/ | ||
38 | #define CTX %rdi | ||
39 | |||
40 | #define RA1 %xmm0 | ||
41 | #define RB1 %xmm1 | ||
42 | #define RC1 %xmm2 | ||
43 | #define RD1 %xmm3 | ||
44 | |||
45 | #define RA2 %xmm4 | ||
46 | #define RB2 %xmm5 | ||
47 | #define RC2 %xmm6 | ||
48 | #define RD2 %xmm7 | ||
49 | |||
50 | #define RX %xmm8 | ||
51 | #define RY %xmm9 | ||
52 | |||
53 | #define RK1 %xmm10 | ||
54 | #define RK2 %xmm11 | ||
55 | |||
56 | #define RID1 %rax | ||
57 | #define RID1b %al | ||
58 | #define RID2 %rbx | ||
59 | #define RID2b %bl | ||
60 | |||
61 | #define RGI1 %rdx | ||
62 | #define RGI1bl %dl | ||
63 | #define RGI1bh %dh | ||
64 | #define RGI2 %rcx | ||
65 | #define RGI2bl %cl | ||
66 | #define RGI2bh %ch | ||
67 | |||
68 | #define RGS1 %r8 | ||
69 | #define RGS1d %r8d | ||
70 | #define RGS2 %r9 | ||
71 | #define RGS2d %r9d | ||
72 | #define RGS3 %r10 | ||
73 | #define RGS3d %r10d | ||
74 | |||
75 | |||
76 | #define lookup_32bit(t0, t1, t2, t3, src, dst) \ | ||
77 | movb src ## bl, RID1b; \ | ||
78 | movb src ## bh, RID2b; \ | ||
79 | movl t0(CTX, RID1, 4), dst ## d; \ | ||
80 | xorl t1(CTX, RID2, 4), dst ## d; \ | ||
81 | shrq $16, src; \ | ||
82 | movb src ## bl, RID1b; \ | ||
83 | movb src ## bh, RID2b; \ | ||
84 | xorl t2(CTX, RID1, 4), dst ## d; \ | ||
85 | xorl t3(CTX, RID2, 4), dst ## d; | ||
86 | |||
87 | #define G(a, x, t0, t1, t2, t3) \ | ||
88 | vmovq a, RGI1; \ | ||
89 | vpsrldq $8, a, x; \ | ||
90 | vmovq x, RGI2; \ | ||
91 | \ | ||
92 | lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \ | ||
93 | shrq $16, RGI1; \ | ||
94 | lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \ | ||
95 | shlq $32, RGS2; \ | ||
96 | orq RGS1, RGS2; \ | ||
97 | \ | ||
98 | lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \ | ||
99 | shrq $16, RGI2; \ | ||
100 | lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \ | ||
101 | shlq $32, RGS3; \ | ||
102 | orq RGS1, RGS3; \ | ||
103 | \ | ||
104 | vmovq RGS2, x; \ | ||
105 | vpinsrq $1, RGS3, x, x; | ||
106 | |||
107 | #define encround(a, b, c, d, x, y) \ | ||
108 | G(a, x, s0, s1, s2, s3); \ | ||
109 | G(b, y, s1, s2, s3, s0); \ | ||
110 | vpaddd x, y, x; \ | ||
111 | vpaddd y, x, y; \ | ||
112 | vpaddd x, RK1, x; \ | ||
113 | vpaddd y, RK2, y; \ | ||
114 | vpxor x, c, c; \ | ||
115 | vpsrld $1, c, x; \ | ||
116 | vpslld $(32 - 1), c, c; \ | ||
117 | vpor c, x, c; \ | ||
118 | vpslld $1, d, x; \ | ||
119 | vpsrld $(32 - 1), d, d; \ | ||
120 | vpor d, x, d; \ | ||
121 | vpxor d, y, d; | ||
122 | |||
123 | #define decround(a, b, c, d, x, y) \ | ||
124 | G(a, x, s0, s1, s2, s3); \ | ||
125 | G(b, y, s1, s2, s3, s0); \ | ||
126 | vpaddd x, y, x; \ | ||
127 | vpaddd y, x, y; \ | ||
128 | vpaddd y, RK2, y; \ | ||
129 | vpxor d, y, d; \ | ||
130 | vpsrld $1, d, y; \ | ||
131 | vpslld $(32 - 1), d, d; \ | ||
132 | vpor d, y, d; \ | ||
133 | vpslld $1, c, y; \ | ||
134 | vpsrld $(32 - 1), c, c; \ | ||
135 | vpor c, y, c; \ | ||
136 | vpaddd x, RK1, x; \ | ||
137 | vpxor x, c, c; | ||
138 | |||
139 | #define encrypt_round(n, a, b, c, d) \ | ||
140 | vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ | ||
141 | vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ | ||
142 | encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ | ||
143 | encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); | ||
144 | |||
145 | #define decrypt_round(n, a, b, c, d) \ | ||
146 | vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ | ||
147 | vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ | ||
148 | decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ | ||
149 | decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); | ||
150 | |||
151 | #define encrypt_cycle(n) \ | ||
152 | encrypt_round((2*n), RA, RB, RC, RD); \ | ||
153 | encrypt_round(((2*n) + 1), RC, RD, RA, RB); | ||
154 | |||
155 | #define decrypt_cycle(n) \ | ||
156 | decrypt_round(((2*n) + 1), RC, RD, RA, RB); \ | ||
157 | decrypt_round((2*n), RA, RB, RC, RD); | ||
158 | |||
159 | |||
160 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
161 | vpunpckldq x1, x0, t0; \ | ||
162 | vpunpckhdq x1, x0, t2; \ | ||
163 | vpunpckldq x3, x2, t1; \ | ||
164 | vpunpckhdq x3, x2, x3; \ | ||
165 | \ | ||
166 | vpunpcklqdq t1, t0, x0; \ | ||
167 | vpunpckhqdq t1, t0, x1; \ | ||
168 | vpunpcklqdq x3, t2, x2; \ | ||
169 | vpunpckhqdq x3, t2, x3; | ||
170 | |||
171 | #define inpack_blocks(in, x0, x1, x2, x3, wkey, t0, t1, t2) \ | ||
172 | vpxor (0*4*4)(in), wkey, x0; \ | ||
173 | vpxor (1*4*4)(in), wkey, x1; \ | ||
174 | vpxor (2*4*4)(in), wkey, x2; \ | ||
175 | vpxor (3*4*4)(in), wkey, x3; \ | ||
176 | \ | ||
177 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
178 | |||
179 | #define outunpack_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \ | ||
180 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
181 | \ | ||
182 | vpxor x0, wkey, x0; \ | ||
183 | vmovdqu x0, (0*4*4)(out); \ | ||
184 | vpxor x1, wkey, x1; \ | ||
185 | vmovdqu x1, (1*4*4)(out); \ | ||
186 | vpxor x2, wkey, x2; \ | ||
187 | vmovdqu x2, (2*4*4)(out); \ | ||
188 | vpxor x3, wkey, x3; \ | ||
189 | vmovdqu x3, (3*4*4)(out); | ||
190 | |||
191 | #define outunpack_xor_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \ | ||
192 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
193 | \ | ||
194 | vpxor x0, wkey, x0; \ | ||
195 | vpxor (0*4*4)(out), x0, x0; \ | ||
196 | vmovdqu x0, (0*4*4)(out); \ | ||
197 | vpxor x1, wkey, x1; \ | ||
198 | vpxor (1*4*4)(out), x1, x1; \ | ||
199 | vmovdqu x1, (1*4*4)(out); \ | ||
200 | vpxor x2, wkey, x2; \ | ||
201 | vpxor (2*4*4)(out), x2, x2; \ | ||
202 | vmovdqu x2, (2*4*4)(out); \ | ||
203 | vpxor x3, wkey, x3; \ | ||
204 | vpxor (3*4*4)(out), x3, x3; \ | ||
205 | vmovdqu x3, (3*4*4)(out); | ||
206 | |||
207 | .align 8 | ||
208 | .global __twofish_enc_blk_8way | ||
209 | .type __twofish_enc_blk_8way,@function; | ||
210 | |||
211 | __twofish_enc_blk_8way: | ||
212 | /* input: | ||
213 | * %rdi: ctx, CTX | ||
214 | * %rsi: dst | ||
215 | * %rdx: src | ||
216 | * %rcx: bool, if true: xor output | ||
217 | */ | ||
218 | |||
219 | pushq %rbx; | ||
220 | pushq %rcx; | ||
221 | |||
222 | vmovdqu w(CTX), RK1; | ||
223 | |||
224 | leaq (4*4*4)(%rdx), %rax; | ||
225 | inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); | ||
226 | inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); | ||
227 | |||
228 | xorq RID1, RID1; | ||
229 | xorq RID2, RID2; | ||
230 | |||
231 | encrypt_cycle(0); | ||
232 | encrypt_cycle(1); | ||
233 | encrypt_cycle(2); | ||
234 | encrypt_cycle(3); | ||
235 | encrypt_cycle(4); | ||
236 | encrypt_cycle(5); | ||
237 | encrypt_cycle(6); | ||
238 | encrypt_cycle(7); | ||
239 | |||
240 | vmovdqu (w+4*4)(CTX), RK1; | ||
241 | |||
242 | popq %rcx; | ||
243 | popq %rbx; | ||
244 | |||
245 | leaq (4*4*4)(%rsi), %rax; | ||
246 | |||
247 | testb %cl, %cl; | ||
248 | jnz __enc_xor8; | ||
249 | |||
250 | outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); | ||
251 | outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); | ||
252 | |||
253 | ret; | ||
254 | |||
255 | __enc_xor8: | ||
256 | outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); | ||
257 | outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); | ||
258 | |||
259 | ret; | ||
260 | |||
261 | .align 8 | ||
262 | .global twofish_dec_blk_8way | ||
263 | .type twofish_dec_blk_8way,@function; | ||
264 | |||
265 | twofish_dec_blk_8way: | ||
266 | /* input: | ||
267 | * %rdi: ctx, CTX | ||
268 | * %rsi: dst | ||
269 | * %rdx: src | ||
270 | */ | ||
271 | |||
272 | pushq %rbx; | ||
273 | |||
274 | vmovdqu (w+4*4)(CTX), RK1; | ||
275 | |||
276 | leaq (4*4*4)(%rdx), %rax; | ||
277 | inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); | ||
278 | inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); | ||
279 | |||
280 | xorq RID1, RID1; | ||
281 | xorq RID2, RID2; | ||
282 | |||
283 | decrypt_cycle(7); | ||
284 | decrypt_cycle(6); | ||
285 | decrypt_cycle(5); | ||
286 | decrypt_cycle(4); | ||
287 | decrypt_cycle(3); | ||
288 | decrypt_cycle(2); | ||
289 | decrypt_cycle(1); | ||
290 | decrypt_cycle(0); | ||
291 | |||
292 | vmovdqu (w)(CTX), RK1; | ||
293 | |||
294 | popq %rbx; | ||
295 | |||
296 | leaq (4*4*4)(%rsi), %rax; | ||
297 | outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); | ||
298 | outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); | ||
299 | |||
300 | ret; | ||
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c new file mode 100644 index 00000000000..782b67ddaf6 --- /dev/null +++ b/arch/x86/crypto/twofish_avx_glue.c | |||
@@ -0,0 +1,624 @@ | |||
1 | /* | ||
2 | * Glue Code for AVX assembler version of Twofish Cipher | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
20 | * USA | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/module.h> | ||
25 | #include <linux/hardirq.h> | ||
26 | #include <linux/types.h> | ||
27 | #include <linux/crypto.h> | ||
28 | #include <linux/err.h> | ||
29 | #include <crypto/algapi.h> | ||
30 | #include <crypto/twofish.h> | ||
31 | #include <crypto/cryptd.h> | ||
32 | #include <crypto/b128ops.h> | ||
33 | #include <crypto/ctr.h> | ||
34 | #include <crypto/lrw.h> | ||
35 | #include <crypto/xts.h> | ||
36 | #include <asm/i387.h> | ||
37 | #include <asm/xcr.h> | ||
38 | #include <asm/xsave.h> | ||
39 | #include <asm/crypto/twofish.h> | ||
40 | #include <asm/crypto/ablk_helper.h> | ||
41 | #include <asm/crypto/glue_helper.h> | ||
42 | #include <crypto/scatterwalk.h> | ||
43 | #include <linux/workqueue.h> | ||
44 | #include <linux/spinlock.h> | ||
45 | |||
46 | #define TWOFISH_PARALLEL_BLOCKS 8 | ||
47 | |||
48 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
49 | const u8 *src) | ||
50 | { | ||
51 | __twofish_enc_blk_3way(ctx, dst, src, false); | ||
52 | } | ||
53 | |||
54 | /* 8-way parallel cipher functions */ | ||
55 | asmlinkage void __twofish_enc_blk_8way(struct twofish_ctx *ctx, u8 *dst, | ||
56 | const u8 *src, bool xor); | ||
57 | asmlinkage void twofish_dec_blk_8way(struct twofish_ctx *ctx, u8 *dst, | ||
58 | const u8 *src); | ||
59 | |||
60 | static inline void twofish_enc_blk_xway(struct twofish_ctx *ctx, u8 *dst, | ||
61 | const u8 *src) | ||
62 | { | ||
63 | __twofish_enc_blk_8way(ctx, dst, src, false); | ||
64 | } | ||
65 | |||
66 | static inline void twofish_enc_blk_xway_xor(struct twofish_ctx *ctx, u8 *dst, | ||
67 | const u8 *src) | ||
68 | { | ||
69 | __twofish_enc_blk_8way(ctx, dst, src, true); | ||
70 | } | ||
71 | |||
72 | static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst, | ||
73 | const u8 *src) | ||
74 | { | ||
75 | twofish_dec_blk_8way(ctx, dst, src); | ||
76 | } | ||
77 | |||
78 | static void twofish_dec_blk_cbc_xway(void *ctx, u128 *dst, const u128 *src) | ||
79 | { | ||
80 | u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1]; | ||
81 | unsigned int j; | ||
82 | |||
83 | for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++) | ||
84 | ivs[j] = src[j]; | ||
85 | |||
86 | twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
87 | |||
88 | for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++) | ||
89 | u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); | ||
90 | } | ||
91 | |||
92 | static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src, | ||
93 | u128 *iv) | ||
94 | { | ||
95 | be128 ctrblks[TWOFISH_PARALLEL_BLOCKS]; | ||
96 | unsigned int i; | ||
97 | |||
98 | for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) { | ||
99 | if (dst != src) | ||
100 | dst[i] = src[i]; | ||
101 | |||
102 | u128_to_be128(&ctrblks[i], iv); | ||
103 | u128_inc(iv); | ||
104 | } | ||
105 | |||
106 | twofish_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); | ||
107 | } | ||
108 | |||
109 | static const struct common_glue_ctx twofish_enc = { | ||
110 | .num_funcs = 3, | ||
111 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
112 | |||
113 | .funcs = { { | ||
114 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
115 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_xway) } | ||
116 | }, { | ||
117 | .num_blocks = 3, | ||
118 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } | ||
119 | }, { | ||
120 | .num_blocks = 1, | ||
121 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } | ||
122 | } } | ||
123 | }; | ||
124 | |||
125 | static const struct common_glue_ctx twofish_ctr = { | ||
126 | .num_funcs = 3, | ||
127 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
128 | |||
129 | .funcs = { { | ||
130 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
131 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_xway) } | ||
132 | }, { | ||
133 | .num_blocks = 3, | ||
134 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) } | ||
135 | }, { | ||
136 | .num_blocks = 1, | ||
137 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) } | ||
138 | } } | ||
139 | }; | ||
140 | |||
141 | static const struct common_glue_ctx twofish_dec = { | ||
142 | .num_funcs = 3, | ||
143 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
144 | |||
145 | .funcs = { { | ||
146 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
147 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_xway) } | ||
148 | }, { | ||
149 | .num_blocks = 3, | ||
150 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } | ||
151 | }, { | ||
152 | .num_blocks = 1, | ||
153 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } | ||
154 | } } | ||
155 | }; | ||
156 | |||
157 | static const struct common_glue_ctx twofish_dec_cbc = { | ||
158 | .num_funcs = 3, | ||
159 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
160 | |||
161 | .funcs = { { | ||
162 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
163 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_xway) } | ||
164 | }, { | ||
165 | .num_blocks = 3, | ||
166 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } | ||
167 | }, { | ||
168 | .num_blocks = 1, | ||
169 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } | ||
170 | } } | ||
171 | }; | ||
172 | |||
173 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
174 | struct scatterlist *src, unsigned int nbytes) | ||
175 | { | ||
176 | return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); | ||
177 | } | ||
178 | |||
179 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
180 | struct scatterlist *src, unsigned int nbytes) | ||
181 | { | ||
182 | return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); | ||
183 | } | ||
184 | |||
185 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
186 | struct scatterlist *src, unsigned int nbytes) | ||
187 | { | ||
188 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, | ||
189 | dst, src, nbytes); | ||
190 | } | ||
191 | |||
192 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
193 | struct scatterlist *src, unsigned int nbytes) | ||
194 | { | ||
195 | return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, | ||
196 | nbytes); | ||
197 | } | ||
198 | |||
199 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
200 | struct scatterlist *src, unsigned int nbytes) | ||
201 | { | ||
202 | return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); | ||
203 | } | ||
204 | |||
205 | static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
206 | { | ||
207 | return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL, | ||
208 | fpu_enabled, nbytes); | ||
209 | } | ||
210 | |||
211 | static inline void twofish_fpu_end(bool fpu_enabled) | ||
212 | { | ||
213 | glue_fpu_end(fpu_enabled); | ||
214 | } | ||
215 | |||
216 | struct crypt_priv { | ||
217 | struct twofish_ctx *ctx; | ||
218 | bool fpu_enabled; | ||
219 | }; | ||
220 | |||
221 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
222 | { | ||
223 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
224 | struct crypt_priv *ctx = priv; | ||
225 | int i; | ||
226 | |||
227 | ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); | ||
228 | |||
229 | if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { | ||
230 | twofish_enc_blk_xway(ctx->ctx, srcdst, srcdst); | ||
231 | return; | ||
232 | } | ||
233 | |||
234 | for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) | ||
235 | twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst); | ||
236 | |||
237 | nbytes %= bsize * 3; | ||
238 | |||
239 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
240 | twofish_enc_blk(ctx->ctx, srcdst, srcdst); | ||
241 | } | ||
242 | |||
243 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
244 | { | ||
245 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
246 | struct crypt_priv *ctx = priv; | ||
247 | int i; | ||
248 | |||
249 | ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); | ||
250 | |||
251 | if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { | ||
252 | twofish_dec_blk_xway(ctx->ctx, srcdst, srcdst); | ||
253 | return; | ||
254 | } | ||
255 | |||
256 | for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) | ||
257 | twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst); | ||
258 | |||
259 | nbytes %= bsize * 3; | ||
260 | |||
261 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
262 | twofish_dec_blk(ctx->ctx, srcdst, srcdst); | ||
263 | } | ||
264 | |||
265 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
266 | struct scatterlist *src, unsigned int nbytes) | ||
267 | { | ||
268 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
269 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
270 | struct crypt_priv crypt_ctx = { | ||
271 | .ctx = &ctx->twofish_ctx, | ||
272 | .fpu_enabled = false, | ||
273 | }; | ||
274 | struct lrw_crypt_req req = { | ||
275 | .tbuf = buf, | ||
276 | .tbuflen = sizeof(buf), | ||
277 | |||
278 | .table_ctx = &ctx->lrw_table, | ||
279 | .crypt_ctx = &crypt_ctx, | ||
280 | .crypt_fn = encrypt_callback, | ||
281 | }; | ||
282 | int ret; | ||
283 | |||
284 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
285 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
286 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
287 | |||
288 | return ret; | ||
289 | } | ||
290 | |||
291 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
292 | struct scatterlist *src, unsigned int nbytes) | ||
293 | { | ||
294 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
295 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
296 | struct crypt_priv crypt_ctx = { | ||
297 | .ctx = &ctx->twofish_ctx, | ||
298 | .fpu_enabled = false, | ||
299 | }; | ||
300 | struct lrw_crypt_req req = { | ||
301 | .tbuf = buf, | ||
302 | .tbuflen = sizeof(buf), | ||
303 | |||
304 | .table_ctx = &ctx->lrw_table, | ||
305 | .crypt_ctx = &crypt_ctx, | ||
306 | .crypt_fn = decrypt_callback, | ||
307 | }; | ||
308 | int ret; | ||
309 | |||
310 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
311 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
312 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
313 | |||
314 | return ret; | ||
315 | } | ||
316 | |||
317 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
318 | struct scatterlist *src, unsigned int nbytes) | ||
319 | { | ||
320 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
321 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
322 | struct crypt_priv crypt_ctx = { | ||
323 | .ctx = &ctx->crypt_ctx, | ||
324 | .fpu_enabled = false, | ||
325 | }; | ||
326 | struct xts_crypt_req req = { | ||
327 | .tbuf = buf, | ||
328 | .tbuflen = sizeof(buf), | ||
329 | |||
330 | .tweak_ctx = &ctx->tweak_ctx, | ||
331 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
332 | .crypt_ctx = &crypt_ctx, | ||
333 | .crypt_fn = encrypt_callback, | ||
334 | }; | ||
335 | int ret; | ||
336 | |||
337 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
338 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
339 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
340 | |||
341 | return ret; | ||
342 | } | ||
343 | |||
344 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
345 | struct scatterlist *src, unsigned int nbytes) | ||
346 | { | ||
347 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
348 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
349 | struct crypt_priv crypt_ctx = { | ||
350 | .ctx = &ctx->crypt_ctx, | ||
351 | .fpu_enabled = false, | ||
352 | }; | ||
353 | struct xts_crypt_req req = { | ||
354 | .tbuf = buf, | ||
355 | .tbuflen = sizeof(buf), | ||
356 | |||
357 | .tweak_ctx = &ctx->tweak_ctx, | ||
358 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
359 | .crypt_ctx = &crypt_ctx, | ||
360 | .crypt_fn = decrypt_callback, | ||
361 | }; | ||
362 | int ret; | ||
363 | |||
364 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
365 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
366 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
367 | |||
368 | return ret; | ||
369 | } | ||
370 | |||
371 | static struct crypto_alg twofish_algs[10] = { { | ||
372 | .cra_name = "__ecb-twofish-avx", | ||
373 | .cra_driver_name = "__driver-ecb-twofish-avx", | ||
374 | .cra_priority = 0, | ||
375 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
376 | .cra_blocksize = TF_BLOCK_SIZE, | ||
377 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
378 | .cra_alignmask = 0, | ||
379 | .cra_type = &crypto_blkcipher_type, | ||
380 | .cra_module = THIS_MODULE, | ||
381 | .cra_list = LIST_HEAD_INIT(twofish_algs[0].cra_list), | ||
382 | .cra_u = { | ||
383 | .blkcipher = { | ||
384 | .min_keysize = TF_MIN_KEY_SIZE, | ||
385 | .max_keysize = TF_MAX_KEY_SIZE, | ||
386 | .setkey = twofish_setkey, | ||
387 | .encrypt = ecb_encrypt, | ||
388 | .decrypt = ecb_decrypt, | ||
389 | }, | ||
390 | }, | ||
391 | }, { | ||
392 | .cra_name = "__cbc-twofish-avx", | ||
393 | .cra_driver_name = "__driver-cbc-twofish-avx", | ||
394 | .cra_priority = 0, | ||
395 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
396 | .cra_blocksize = TF_BLOCK_SIZE, | ||
397 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
398 | .cra_alignmask = 0, | ||
399 | .cra_type = &crypto_blkcipher_type, | ||
400 | .cra_module = THIS_MODULE, | ||
401 | .cra_list = LIST_HEAD_INIT(twofish_algs[1].cra_list), | ||
402 | .cra_u = { | ||
403 | .blkcipher = { | ||
404 | .min_keysize = TF_MIN_KEY_SIZE, | ||
405 | .max_keysize = TF_MAX_KEY_SIZE, | ||
406 | .setkey = twofish_setkey, | ||
407 | .encrypt = cbc_encrypt, | ||
408 | .decrypt = cbc_decrypt, | ||
409 | }, | ||
410 | }, | ||
411 | }, { | ||
412 | .cra_name = "__ctr-twofish-avx", | ||
413 | .cra_driver_name = "__driver-ctr-twofish-avx", | ||
414 | .cra_priority = 0, | ||
415 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
416 | .cra_blocksize = 1, | ||
417 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
418 | .cra_alignmask = 0, | ||
419 | .cra_type = &crypto_blkcipher_type, | ||
420 | .cra_module = THIS_MODULE, | ||
421 | .cra_list = LIST_HEAD_INIT(twofish_algs[2].cra_list), | ||
422 | .cra_u = { | ||
423 | .blkcipher = { | ||
424 | .min_keysize = TF_MIN_KEY_SIZE, | ||
425 | .max_keysize = TF_MAX_KEY_SIZE, | ||
426 | .ivsize = TF_BLOCK_SIZE, | ||
427 | .setkey = twofish_setkey, | ||
428 | .encrypt = ctr_crypt, | ||
429 | .decrypt = ctr_crypt, | ||
430 | }, | ||
431 | }, | ||
432 | }, { | ||
433 | .cra_name = "__lrw-twofish-avx", | ||
434 | .cra_driver_name = "__driver-lrw-twofish-avx", | ||
435 | .cra_priority = 0, | ||
436 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
437 | .cra_blocksize = TF_BLOCK_SIZE, | ||
438 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), | ||
439 | .cra_alignmask = 0, | ||
440 | .cra_type = &crypto_blkcipher_type, | ||
441 | .cra_module = THIS_MODULE, | ||
442 | .cra_list = LIST_HEAD_INIT(twofish_algs[3].cra_list), | ||
443 | .cra_exit = lrw_twofish_exit_tfm, | ||
444 | .cra_u = { | ||
445 | .blkcipher = { | ||
446 | .min_keysize = TF_MIN_KEY_SIZE + | ||
447 | TF_BLOCK_SIZE, | ||
448 | .max_keysize = TF_MAX_KEY_SIZE + | ||
449 | TF_BLOCK_SIZE, | ||
450 | .ivsize = TF_BLOCK_SIZE, | ||
451 | .setkey = lrw_twofish_setkey, | ||
452 | .encrypt = lrw_encrypt, | ||
453 | .decrypt = lrw_decrypt, | ||
454 | }, | ||
455 | }, | ||
456 | }, { | ||
457 | .cra_name = "__xts-twofish-avx", | ||
458 | .cra_driver_name = "__driver-xts-twofish-avx", | ||
459 | .cra_priority = 0, | ||
460 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
461 | .cra_blocksize = TF_BLOCK_SIZE, | ||
462 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), | ||
463 | .cra_alignmask = 0, | ||
464 | .cra_type = &crypto_blkcipher_type, | ||
465 | .cra_module = THIS_MODULE, | ||
466 | .cra_list = LIST_HEAD_INIT(twofish_algs[4].cra_list), | ||
467 | .cra_u = { | ||
468 | .blkcipher = { | ||
469 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
470 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
471 | .ivsize = TF_BLOCK_SIZE, | ||
472 | .setkey = xts_twofish_setkey, | ||
473 | .encrypt = xts_encrypt, | ||
474 | .decrypt = xts_decrypt, | ||
475 | }, | ||
476 | }, | ||
477 | }, { | ||
478 | .cra_name = "ecb(twofish)", | ||
479 | .cra_driver_name = "ecb-twofish-avx", | ||
480 | .cra_priority = 400, | ||
481 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
482 | .cra_blocksize = TF_BLOCK_SIZE, | ||
483 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
484 | .cra_alignmask = 0, | ||
485 | .cra_type = &crypto_ablkcipher_type, | ||
486 | .cra_module = THIS_MODULE, | ||
487 | .cra_list = LIST_HEAD_INIT(twofish_algs[5].cra_list), | ||
488 | .cra_init = ablk_init, | ||
489 | .cra_exit = ablk_exit, | ||
490 | .cra_u = { | ||
491 | .ablkcipher = { | ||
492 | .min_keysize = TF_MIN_KEY_SIZE, | ||
493 | .max_keysize = TF_MAX_KEY_SIZE, | ||
494 | .setkey = ablk_set_key, | ||
495 | .encrypt = ablk_encrypt, | ||
496 | .decrypt = ablk_decrypt, | ||
497 | }, | ||
498 | }, | ||
499 | }, { | ||
500 | .cra_name = "cbc(twofish)", | ||
501 | .cra_driver_name = "cbc-twofish-avx", | ||
502 | .cra_priority = 400, | ||
503 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
504 | .cra_blocksize = TF_BLOCK_SIZE, | ||
505 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
506 | .cra_alignmask = 0, | ||
507 | .cra_type = &crypto_ablkcipher_type, | ||
508 | .cra_module = THIS_MODULE, | ||
509 | .cra_list = LIST_HEAD_INIT(twofish_algs[6].cra_list), | ||
510 | .cra_init = ablk_init, | ||
511 | .cra_exit = ablk_exit, | ||
512 | .cra_u = { | ||
513 | .ablkcipher = { | ||
514 | .min_keysize = TF_MIN_KEY_SIZE, | ||
515 | .max_keysize = TF_MAX_KEY_SIZE, | ||
516 | .ivsize = TF_BLOCK_SIZE, | ||
517 | .setkey = ablk_set_key, | ||
518 | .encrypt = __ablk_encrypt, | ||
519 | .decrypt = ablk_decrypt, | ||
520 | }, | ||
521 | }, | ||
522 | }, { | ||
523 | .cra_name = "ctr(twofish)", | ||
524 | .cra_driver_name = "ctr-twofish-avx", | ||
525 | .cra_priority = 400, | ||
526 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
527 | .cra_blocksize = 1, | ||
528 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
529 | .cra_alignmask = 0, | ||
530 | .cra_type = &crypto_ablkcipher_type, | ||
531 | .cra_module = THIS_MODULE, | ||
532 | .cra_list = LIST_HEAD_INIT(twofish_algs[7].cra_list), | ||
533 | .cra_init = ablk_init, | ||
534 | .cra_exit = ablk_exit, | ||
535 | .cra_u = { | ||
536 | .ablkcipher = { | ||
537 | .min_keysize = TF_MIN_KEY_SIZE, | ||
538 | .max_keysize = TF_MAX_KEY_SIZE, | ||
539 | .ivsize = TF_BLOCK_SIZE, | ||
540 | .setkey = ablk_set_key, | ||
541 | .encrypt = ablk_encrypt, | ||
542 | .decrypt = ablk_encrypt, | ||
543 | .geniv = "chainiv", | ||
544 | }, | ||
545 | }, | ||
546 | }, { | ||
547 | .cra_name = "lrw(twofish)", | ||
548 | .cra_driver_name = "lrw-twofish-avx", | ||
549 | .cra_priority = 400, | ||
550 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
551 | .cra_blocksize = TF_BLOCK_SIZE, | ||
552 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
553 | .cra_alignmask = 0, | ||
554 | .cra_type = &crypto_ablkcipher_type, | ||
555 | .cra_module = THIS_MODULE, | ||
556 | .cra_list = LIST_HEAD_INIT(twofish_algs[8].cra_list), | ||
557 | .cra_init = ablk_init, | ||
558 | .cra_exit = ablk_exit, | ||
559 | .cra_u = { | ||
560 | .ablkcipher = { | ||
561 | .min_keysize = TF_MIN_KEY_SIZE + | ||
562 | TF_BLOCK_SIZE, | ||
563 | .max_keysize = TF_MAX_KEY_SIZE + | ||
564 | TF_BLOCK_SIZE, | ||
565 | .ivsize = TF_BLOCK_SIZE, | ||
566 | .setkey = ablk_set_key, | ||
567 | .encrypt = ablk_encrypt, | ||
568 | .decrypt = ablk_decrypt, | ||
569 | }, | ||
570 | }, | ||
571 | }, { | ||
572 | .cra_name = "xts(twofish)", | ||
573 | .cra_driver_name = "xts-twofish-avx", | ||
574 | .cra_priority = 400, | ||
575 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
576 | .cra_blocksize = TF_BLOCK_SIZE, | ||
577 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
578 | .cra_alignmask = 0, | ||
579 | .cra_type = &crypto_ablkcipher_type, | ||
580 | .cra_module = THIS_MODULE, | ||
581 | .cra_list = LIST_HEAD_INIT(twofish_algs[9].cra_list), | ||
582 | .cra_init = ablk_init, | ||
583 | .cra_exit = ablk_exit, | ||
584 | .cra_u = { | ||
585 | .ablkcipher = { | ||
586 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
587 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
588 | .ivsize = TF_BLOCK_SIZE, | ||
589 | .setkey = ablk_set_key, | ||
590 | .encrypt = ablk_encrypt, | ||
591 | .decrypt = ablk_decrypt, | ||
592 | }, | ||
593 | }, | ||
594 | } }; | ||
595 | |||
596 | static int __init twofish_init(void) | ||
597 | { | ||
598 | u64 xcr0; | ||
599 | |||
600 | if (!cpu_has_avx || !cpu_has_osxsave) { | ||
601 | printk(KERN_INFO "AVX instructions are not detected.\n"); | ||
602 | return -ENODEV; | ||
603 | } | ||
604 | |||
605 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
606 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
607 | printk(KERN_INFO "AVX detected but unusable.\n"); | ||
608 | return -ENODEV; | ||
609 | } | ||
610 | |||
611 | return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); | ||
612 | } | ||
613 | |||
614 | static void __exit twofish_exit(void) | ||
615 | { | ||
616 | crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); | ||
617 | } | ||
618 | |||
619 | module_init(twofish_init); | ||
620 | module_exit(twofish_exit); | ||
621 | |||
622 | MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized"); | ||
623 | MODULE_LICENSE("GPL"); | ||
624 | MODULE_ALIAS("twofish"); | ||
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 922ab24cce3..15f9347316c 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c | |||
@@ -3,11 +3,6 @@ | |||
3 | * | 3 | * |
4 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 4 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> |
5 | * | 5 | * |
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
8 | * CTR part based on code (crypto/ctr.c) by: | ||
9 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
12 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
13 | * the Free Software Foundation; either version 2 of the License, or | 8 | * the Free Software Foundation; either version 2 of the License, or |
@@ -33,20 +28,13 @@ | |||
33 | #include <crypto/algapi.h> | 28 | #include <crypto/algapi.h> |
34 | #include <crypto/twofish.h> | 29 | #include <crypto/twofish.h> |
35 | #include <crypto/b128ops.h> | 30 | #include <crypto/b128ops.h> |
31 | #include <asm/crypto/twofish.h> | ||
32 | #include <asm/crypto/glue_helper.h> | ||
36 | #include <crypto/lrw.h> | 33 | #include <crypto/lrw.h> |
37 | #include <crypto/xts.h> | 34 | #include <crypto/xts.h> |
38 | 35 | ||
39 | /* regular block cipher functions from twofish_x86_64 module */ | 36 | EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); |
40 | asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, | 37 | EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); |
41 | const u8 *src); | ||
42 | asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, | ||
43 | const u8 *src); | ||
44 | |||
45 | /* 3-way parallel cipher functions */ | ||
46 | asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
47 | const u8 *src, bool xor); | ||
48 | asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
49 | const u8 *src); | ||
50 | 38 | ||
51 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | 39 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, |
52 | const u8 *src) | 40 | const u8 *src) |
@@ -60,311 +48,139 @@ static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst, | |||
60 | __twofish_enc_blk_3way(ctx, dst, src, true); | 48 | __twofish_enc_blk_3way(ctx, dst, src, true); |
61 | } | 49 | } |
62 | 50 | ||
63 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | 51 | void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) |
64 | void (*fn)(struct twofish_ctx *, u8 *, const u8 *), | ||
65 | void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *)) | ||
66 | { | ||
67 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
68 | unsigned int bsize = TF_BLOCK_SIZE; | ||
69 | unsigned int nbytes; | ||
70 | int err; | ||
71 | |||
72 | err = blkcipher_walk_virt(desc, walk); | ||
73 | |||
74 | while ((nbytes = walk->nbytes)) { | ||
75 | u8 *wsrc = walk->src.virt.addr; | ||
76 | u8 *wdst = walk->dst.virt.addr; | ||
77 | |||
78 | /* Process three block batch */ | ||
79 | if (nbytes >= bsize * 3) { | ||
80 | do { | ||
81 | fn_3way(ctx, wdst, wsrc); | ||
82 | |||
83 | wsrc += bsize * 3; | ||
84 | wdst += bsize * 3; | ||
85 | nbytes -= bsize * 3; | ||
86 | } while (nbytes >= bsize * 3); | ||
87 | |||
88 | if (nbytes < bsize) | ||
89 | goto done; | ||
90 | } | ||
91 | |||
92 | /* Handle leftovers */ | ||
93 | do { | ||
94 | fn(ctx, wdst, wsrc); | ||
95 | |||
96 | wsrc += bsize; | ||
97 | wdst += bsize; | ||
98 | nbytes -= bsize; | ||
99 | } while (nbytes >= bsize); | ||
100 | |||
101 | done: | ||
102 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
103 | } | ||
104 | |||
105 | return err; | ||
106 | } | ||
107 | |||
108 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
109 | struct scatterlist *src, unsigned int nbytes) | ||
110 | { | 52 | { |
111 | struct blkcipher_walk walk; | 53 | u128 ivs[2]; |
112 | 54 | ||
113 | blkcipher_walk_init(&walk, dst, src, nbytes); | 55 | ivs[0] = src[0]; |
114 | return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way); | 56 | ivs[1] = src[1]; |
115 | } | ||
116 | 57 | ||
117 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 58 | twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); |
118 | struct scatterlist *src, unsigned int nbytes) | ||
119 | { | ||
120 | struct blkcipher_walk walk; | ||
121 | 59 | ||
122 | blkcipher_walk_init(&walk, dst, src, nbytes); | 60 | u128_xor(&dst[1], &dst[1], &ivs[0]); |
123 | return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way); | 61 | u128_xor(&dst[2], &dst[2], &ivs[1]); |
124 | } | 62 | } |
63 | EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); | ||
125 | 64 | ||
126 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | 65 | void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) |
127 | struct blkcipher_walk *walk) | ||
128 | { | ||
129 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
130 | unsigned int bsize = TF_BLOCK_SIZE; | ||
131 | unsigned int nbytes = walk->nbytes; | ||
132 | u128 *src = (u128 *)walk->src.virt.addr; | ||
133 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
134 | u128 *iv = (u128 *)walk->iv; | ||
135 | |||
136 | do { | ||
137 | u128_xor(dst, src, iv); | ||
138 | twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); | ||
139 | iv = dst; | ||
140 | |||
141 | src += 1; | ||
142 | dst += 1; | ||
143 | nbytes -= bsize; | ||
144 | } while (nbytes >= bsize); | ||
145 | |||
146 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
147 | return nbytes; | ||
148 | } | ||
149 | |||
150 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
151 | struct scatterlist *src, unsigned int nbytes) | ||
152 | { | 66 | { |
153 | struct blkcipher_walk walk; | 67 | be128 ctrblk; |
154 | int err; | ||
155 | 68 | ||
156 | blkcipher_walk_init(&walk, dst, src, nbytes); | 69 | if (dst != src) |
157 | err = blkcipher_walk_virt(desc, &walk); | 70 | *dst = *src; |
158 | 71 | ||
159 | while ((nbytes = walk.nbytes)) { | 72 | u128_to_be128(&ctrblk, iv); |
160 | nbytes = __cbc_encrypt(desc, &walk); | 73 | u128_inc(iv); |
161 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
162 | } | ||
163 | 74 | ||
164 | return err; | 75 | twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); |
76 | u128_xor(dst, dst, (u128 *)&ctrblk); | ||
165 | } | 77 | } |
78 | EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); | ||
166 | 79 | ||
167 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | 80 | void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, |
168 | struct blkcipher_walk *walk) | 81 | u128 *iv) |
169 | { | 82 | { |
170 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 83 | be128 ctrblks[3]; |
171 | unsigned int bsize = TF_BLOCK_SIZE; | ||
172 | unsigned int nbytes = walk->nbytes; | ||
173 | u128 *src = (u128 *)walk->src.virt.addr; | ||
174 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
175 | u128 ivs[3 - 1]; | ||
176 | u128 last_iv; | ||
177 | |||
178 | /* Start of the last block. */ | ||
179 | src += nbytes / bsize - 1; | ||
180 | dst += nbytes / bsize - 1; | ||
181 | |||
182 | last_iv = *src; | ||
183 | |||
184 | /* Process three block batch */ | ||
185 | if (nbytes >= bsize * 3) { | ||
186 | do { | ||
187 | nbytes -= bsize * (3 - 1); | ||
188 | src -= 3 - 1; | ||
189 | dst -= 3 - 1; | ||
190 | |||
191 | ivs[0] = src[0]; | ||
192 | ivs[1] = src[1]; | ||
193 | |||
194 | twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); | ||
195 | |||
196 | u128_xor(dst + 1, dst + 1, ivs + 0); | ||
197 | u128_xor(dst + 2, dst + 2, ivs + 1); | ||
198 | |||
199 | nbytes -= bsize; | ||
200 | if (nbytes < bsize) | ||
201 | goto done; | ||
202 | |||
203 | u128_xor(dst, dst, src - 1); | ||
204 | src -= 1; | ||
205 | dst -= 1; | ||
206 | } while (nbytes >= bsize * 3); | ||
207 | |||
208 | if (nbytes < bsize) | ||
209 | goto done; | ||
210 | } | ||
211 | |||
212 | /* Handle leftovers */ | ||
213 | for (;;) { | ||
214 | twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src); | ||
215 | |||
216 | nbytes -= bsize; | ||
217 | if (nbytes < bsize) | ||
218 | break; | ||
219 | 84 | ||
220 | u128_xor(dst, dst, src - 1); | 85 | if (dst != src) { |
221 | src -= 1; | 86 | dst[0] = src[0]; |
222 | dst -= 1; | 87 | dst[1] = src[1]; |
88 | dst[2] = src[2]; | ||
223 | } | 89 | } |
224 | 90 | ||
225 | done: | 91 | u128_to_be128(&ctrblks[0], iv); |
226 | u128_xor(dst, dst, (u128 *)walk->iv); | 92 | u128_inc(iv); |
227 | *(u128 *)walk->iv = last_iv; | 93 | u128_to_be128(&ctrblks[1], iv); |
94 | u128_inc(iv); | ||
95 | u128_to_be128(&ctrblks[2], iv); | ||
96 | u128_inc(iv); | ||
228 | 97 | ||
229 | return nbytes; | 98 | twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); |
230 | } | 99 | } |
100 | EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way); | ||
101 | |||
102 | static const struct common_glue_ctx twofish_enc = { | ||
103 | .num_funcs = 2, | ||
104 | .fpu_blocks_limit = -1, | ||
105 | |||
106 | .funcs = { { | ||
107 | .num_blocks = 3, | ||
108 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } | ||
109 | }, { | ||
110 | .num_blocks = 1, | ||
111 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } | ||
112 | } } | ||
113 | }; | ||
231 | 114 | ||
232 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 115 | static const struct common_glue_ctx twofish_ctr = { |
233 | struct scatterlist *src, unsigned int nbytes) | 116 | .num_funcs = 2, |
234 | { | 117 | .fpu_blocks_limit = -1, |
235 | struct blkcipher_walk walk; | 118 | |
236 | int err; | 119 | .funcs = { { |
237 | 120 | .num_blocks = 3, | |
238 | blkcipher_walk_init(&walk, dst, src, nbytes); | 121 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) } |
239 | err = blkcipher_walk_virt(desc, &walk); | 122 | }, { |
123 | .num_blocks = 1, | ||
124 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) } | ||
125 | } } | ||
126 | }; | ||
240 | 127 | ||
241 | while ((nbytes = walk.nbytes)) { | 128 | static const struct common_glue_ctx twofish_dec = { |
242 | nbytes = __cbc_decrypt(desc, &walk); | 129 | .num_funcs = 2, |
243 | err = blkcipher_walk_done(desc, &walk, nbytes); | 130 | .fpu_blocks_limit = -1, |
244 | } | 131 | |
132 | .funcs = { { | ||
133 | .num_blocks = 3, | ||
134 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } | ||
135 | }, { | ||
136 | .num_blocks = 1, | ||
137 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } | ||
138 | } } | ||
139 | }; | ||
245 | 140 | ||
246 | return err; | 141 | static const struct common_glue_ctx twofish_dec_cbc = { |
247 | } | 142 | .num_funcs = 2, |
143 | .fpu_blocks_limit = -1, | ||
144 | |||
145 | .funcs = { { | ||
146 | .num_blocks = 3, | ||
147 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } | ||
148 | }, { | ||
149 | .num_blocks = 1, | ||
150 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } | ||
151 | } } | ||
152 | }; | ||
248 | 153 | ||
249 | static inline void u128_to_be128(be128 *dst, const u128 *src) | 154 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
155 | struct scatterlist *src, unsigned int nbytes) | ||
250 | { | 156 | { |
251 | dst->a = cpu_to_be64(src->a); | 157 | return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); |
252 | dst->b = cpu_to_be64(src->b); | ||
253 | } | 158 | } |
254 | 159 | ||
255 | static inline void be128_to_u128(u128 *dst, const be128 *src) | 160 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
161 | struct scatterlist *src, unsigned int nbytes) | ||
256 | { | 162 | { |
257 | dst->a = be64_to_cpu(src->a); | 163 | return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); |
258 | dst->b = be64_to_cpu(src->b); | ||
259 | } | 164 | } |
260 | 165 | ||
261 | static inline void u128_inc(u128 *i) | 166 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
167 | struct scatterlist *src, unsigned int nbytes) | ||
262 | { | 168 | { |
263 | i->b++; | 169 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, |
264 | if (!i->b) | 170 | dst, src, nbytes); |
265 | i->a++; | ||
266 | } | 171 | } |
267 | 172 | ||
268 | static void ctr_crypt_final(struct blkcipher_desc *desc, | 173 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
269 | struct blkcipher_walk *walk) | 174 | struct scatterlist *src, unsigned int nbytes) |
270 | { | 175 | { |
271 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 176 | return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, |
272 | u8 *ctrblk = walk->iv; | 177 | nbytes); |
273 | u8 keystream[TF_BLOCK_SIZE]; | ||
274 | u8 *src = walk->src.virt.addr; | ||
275 | u8 *dst = walk->dst.virt.addr; | ||
276 | unsigned int nbytes = walk->nbytes; | ||
277 | |||
278 | twofish_enc_blk(ctx, keystream, ctrblk); | ||
279 | crypto_xor(keystream, src, nbytes); | ||
280 | memcpy(dst, keystream, nbytes); | ||
281 | |||
282 | crypto_inc(ctrblk, TF_BLOCK_SIZE); | ||
283 | } | ||
284 | |||
285 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | ||
286 | struct blkcipher_walk *walk) | ||
287 | { | ||
288 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
289 | unsigned int bsize = TF_BLOCK_SIZE; | ||
290 | unsigned int nbytes = walk->nbytes; | ||
291 | u128 *src = (u128 *)walk->src.virt.addr; | ||
292 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
293 | u128 ctrblk; | ||
294 | be128 ctrblocks[3]; | ||
295 | |||
296 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
297 | |||
298 | /* Process three block batch */ | ||
299 | if (nbytes >= bsize * 3) { | ||
300 | do { | ||
301 | if (dst != src) { | ||
302 | dst[0] = src[0]; | ||
303 | dst[1] = src[1]; | ||
304 | dst[2] = src[2]; | ||
305 | } | ||
306 | |||
307 | /* create ctrblks for parallel encrypt */ | ||
308 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
309 | u128_inc(&ctrblk); | ||
310 | u128_to_be128(&ctrblocks[1], &ctrblk); | ||
311 | u128_inc(&ctrblk); | ||
312 | u128_to_be128(&ctrblocks[2], &ctrblk); | ||
313 | u128_inc(&ctrblk); | ||
314 | |||
315 | twofish_enc_blk_xor_3way(ctx, (u8 *)dst, | ||
316 | (u8 *)ctrblocks); | ||
317 | |||
318 | src += 3; | ||
319 | dst += 3; | ||
320 | nbytes -= bsize * 3; | ||
321 | } while (nbytes >= bsize * 3); | ||
322 | |||
323 | if (nbytes < bsize) | ||
324 | goto done; | ||
325 | } | ||
326 | |||
327 | /* Handle leftovers */ | ||
328 | do { | ||
329 | if (dst != src) | ||
330 | *dst = *src; | ||
331 | |||
332 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
333 | u128_inc(&ctrblk); | ||
334 | |||
335 | twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); | ||
336 | u128_xor(dst, dst, (u128 *)ctrblocks); | ||
337 | |||
338 | src += 1; | ||
339 | dst += 1; | ||
340 | nbytes -= bsize; | ||
341 | } while (nbytes >= bsize); | ||
342 | |||
343 | done: | ||
344 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
345 | return nbytes; | ||
346 | } | 178 | } |
347 | 179 | ||
348 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 180 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
349 | struct scatterlist *src, unsigned int nbytes) | 181 | struct scatterlist *src, unsigned int nbytes) |
350 | { | 182 | { |
351 | struct blkcipher_walk walk; | 183 | return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); |
352 | int err; | ||
353 | |||
354 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
355 | err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE); | ||
356 | |||
357 | while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) { | ||
358 | nbytes = __ctr_crypt(desc, &walk); | ||
359 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
360 | } | ||
361 | |||
362 | if (walk.nbytes) { | ||
363 | ctr_crypt_final(desc, &walk); | ||
364 | err = blkcipher_walk_done(desc, &walk, 0); | ||
365 | } | ||
366 | |||
367 | return err; | ||
368 | } | 184 | } |
369 | 185 | ||
370 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | 186 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) |
@@ -397,13 +213,8 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | |||
397 | twofish_dec_blk(ctx, srcdst, srcdst); | 213 | twofish_dec_blk(ctx, srcdst, srcdst); |
398 | } | 214 | } |
399 | 215 | ||
400 | struct twofish_lrw_ctx { | 216 | int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, |
401 | struct lrw_table_ctx lrw_table; | 217 | unsigned int keylen) |
402 | struct twofish_ctx twofish_ctx; | ||
403 | }; | ||
404 | |||
405 | static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
406 | unsigned int keylen) | ||
407 | { | 218 | { |
408 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | 219 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); |
409 | int err; | 220 | int err; |
@@ -415,6 +226,7 @@ static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | |||
415 | 226 | ||
416 | return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); | 227 | return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); |
417 | } | 228 | } |
229 | EXPORT_SYMBOL_GPL(lrw_twofish_setkey); | ||
418 | 230 | ||
419 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 231 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
420 | struct scatterlist *src, unsigned int nbytes) | 232 | struct scatterlist *src, unsigned int nbytes) |
@@ -450,20 +262,16 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
450 | return lrw_crypt(desc, dst, src, nbytes, &req); | 262 | return lrw_crypt(desc, dst, src, nbytes, &req); |
451 | } | 263 | } |
452 | 264 | ||
453 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | 265 | void lrw_twofish_exit_tfm(struct crypto_tfm *tfm) |
454 | { | 266 | { |
455 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | 267 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); |
456 | 268 | ||
457 | lrw_free_table(&ctx->lrw_table); | 269 | lrw_free_table(&ctx->lrw_table); |
458 | } | 270 | } |
271 | EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm); | ||
459 | 272 | ||
460 | struct twofish_xts_ctx { | 273 | int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, |
461 | struct twofish_ctx tweak_ctx; | 274 | unsigned int keylen) |
462 | struct twofish_ctx crypt_ctx; | ||
463 | }; | ||
464 | |||
465 | static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
466 | unsigned int keylen) | ||
467 | { | 275 | { |
468 | struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); | 276 | struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); |
469 | u32 *flags = &tfm->crt_flags; | 277 | u32 *flags = &tfm->crt_flags; |
@@ -486,6 +294,7 @@ static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | |||
486 | return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, | 294 | return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, |
487 | flags); | 295 | flags); |
488 | } | 296 | } |
297 | EXPORT_SYMBOL_GPL(xts_twofish_setkey); | ||
489 | 298 | ||
490 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 299 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
491 | struct scatterlist *src, unsigned int nbytes) | 300 | struct scatterlist *src, unsigned int nbytes) |
@@ -596,7 +405,7 @@ static struct crypto_alg tf_algs[5] = { { | |||
596 | .cra_type = &crypto_blkcipher_type, | 405 | .cra_type = &crypto_blkcipher_type, |
597 | .cra_module = THIS_MODULE, | 406 | .cra_module = THIS_MODULE, |
598 | .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), | 407 | .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), |
599 | .cra_exit = lrw_exit_tfm, | 408 | .cra_exit = lrw_twofish_exit_tfm, |
600 | .cra_u = { | 409 | .cra_u = { |
601 | .blkcipher = { | 410 | .blkcipher = { |
602 | .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, | 411 | .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, |
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 98bd70faccc..673ac9b63d6 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c | |||
@@ -38,7 +38,7 @@ | |||
38 | int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) | 38 | int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) |
39 | { | 39 | { |
40 | int err = 0; | 40 | int err = 0; |
41 | bool ia32 = is_ia32_task(); | 41 | bool ia32 = test_thread_flag(TIF_IA32); |
42 | 42 | ||
43 | if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) | 43 | if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) |
44 | return -EFAULT; | 44 | return -EFAULT; |
@@ -273,7 +273,6 @@ asmlinkage long sys32_sigreturn(struct pt_regs *regs) | |||
273 | sizeof(frame->extramask)))) | 273 | sizeof(frame->extramask)))) |
274 | goto badframe; | 274 | goto badframe; |
275 | 275 | ||
276 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
277 | set_current_blocked(&set); | 276 | set_current_blocked(&set); |
278 | 277 | ||
279 | if (ia32_restore_sigcontext(regs, &frame->sc, &ax)) | 278 | if (ia32_restore_sigcontext(regs, &frame->sc, &ax)) |
@@ -299,7 +298,6 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) | |||
299 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | 298 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) |
300 | goto badframe; | 299 | goto badframe; |
301 | 300 | ||
302 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
303 | set_current_blocked(&set); | 301 | set_current_blocked(&set); |
304 | 302 | ||
305 | if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 303 | if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 610001d385d..0c44630d178 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -29,7 +29,7 @@ | |||
29 | #include <asm/processor.h> | 29 | #include <asm/processor.h> |
30 | #include <asm/mmu.h> | 30 | #include <asm/mmu.h> |
31 | #include <asm/mpspec.h> | 31 | #include <asm/mpspec.h> |
32 | #include <asm/trampoline.h> | 32 | #include <asm/realmode.h> |
33 | 33 | ||
34 | #define COMPILER_DEPENDENT_INT64 long long | 34 | #define COMPILER_DEPENDENT_INT64 long long |
35 | #define COMPILER_DEPENDENT_UINT64 unsigned long long | 35 | #define COMPILER_DEPENDENT_UINT64 unsigned long long |
@@ -117,11 +117,8 @@ static inline void acpi_disable_pci(void) | |||
117 | /* Low-level suspend routine. */ | 117 | /* Low-level suspend routine. */ |
118 | extern int acpi_suspend_lowlevel(void); | 118 | extern int acpi_suspend_lowlevel(void); |
119 | 119 | ||
120 | extern const unsigned char acpi_wakeup_code[]; | 120 | /* Physical address to resume after wakeup */ |
121 | #define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code))) | 121 | #define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start)) |
122 | |||
123 | /* early initialization routine */ | ||
124 | extern void acpi_reserve_wakeup_memory(void); | ||
125 | 122 | ||
126 | /* | 123 | /* |
127 | * Check if the CPU can handle C2 and deeper | 124 | * Check if the CPU can handle C2 and deeper |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 49331bedc15..70780689599 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -75,23 +75,54 @@ static inline int alternatives_text_reserved(void *start, void *end) | |||
75 | } | 75 | } |
76 | #endif /* CONFIG_SMP */ | 76 | #endif /* CONFIG_SMP */ |
77 | 77 | ||
78 | #define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n" | ||
79 | |||
80 | #define b_replacement(number) "663"#number | ||
81 | #define e_replacement(number) "664"#number | ||
82 | |||
83 | #define alt_slen "662b-661b" | ||
84 | #define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f" | ||
85 | |||
86 | #define ALTINSTR_ENTRY(feature, number) \ | ||
87 | " .long 661b - .\n" /* label */ \ | ||
88 | " .long " b_replacement(number)"f - .\n" /* new instruction */ \ | ||
89 | " .word " __stringify(feature) "\n" /* feature bit */ \ | ||
90 | " .byte " alt_slen "\n" /* source len */ \ | ||
91 | " .byte " alt_rlen(number) "\n" /* replacement len */ | ||
92 | |||
93 | #define DISCARD_ENTRY(number) /* rlen <= slen */ \ | ||
94 | " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n" | ||
95 | |||
96 | #define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \ | ||
97 | b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t" | ||
98 | |||
78 | /* alternative assembly primitive: */ | 99 | /* alternative assembly primitive: */ |
79 | #define ALTERNATIVE(oldinstr, newinstr, feature) \ | 100 | #define ALTERNATIVE(oldinstr, newinstr, feature) \ |
80 | \ | 101 | OLDINSTR(oldinstr) \ |
81 | "661:\n\t" oldinstr "\n662:\n" \ | 102 | ".section .altinstructions,\"a\"\n" \ |
82 | ".section .altinstructions,\"a\"\n" \ | 103 | ALTINSTR_ENTRY(feature, 1) \ |
83 | " .long 661b - .\n" /* label */ \ | 104 | ".previous\n" \ |
84 | " .long 663f - .\n" /* new instruction */ \ | 105 | ".section .discard,\"aw\",@progbits\n" \ |
85 | " .word " __stringify(feature) "\n" /* feature bit */ \ | 106 | DISCARD_ENTRY(1) \ |
86 | " .byte 662b-661b\n" /* sourcelen */ \ | 107 | ".previous\n" \ |
87 | " .byte 664f-663f\n" /* replacementlen */ \ | 108 | ".section .altinstr_replacement, \"ax\"\n" \ |
88 | ".previous\n" \ | 109 | ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ |
89 | ".section .discard,\"aw\",@progbits\n" \ | 110 | ".previous" |
90 | " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \ | 111 | |
91 | ".previous\n" \ | 112 | #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ |
92 | ".section .altinstr_replacement, \"ax\"\n" \ | 113 | OLDINSTR(oldinstr) \ |
93 | "663:\n\t" newinstr "\n664:\n" /* replacement */ \ | 114 | ".section .altinstructions,\"a\"\n" \ |
94 | ".previous" | 115 | ALTINSTR_ENTRY(feature1, 1) \ |
116 | ALTINSTR_ENTRY(feature2, 2) \ | ||
117 | ".previous\n" \ | ||
118 | ".section .discard,\"aw\",@progbits\n" \ | ||
119 | DISCARD_ENTRY(1) \ | ||
120 | DISCARD_ENTRY(2) \ | ||
121 | ".previous\n" \ | ||
122 | ".section .altinstr_replacement, \"ax\"\n" \ | ||
123 | ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ | ||
124 | ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ | ||
125 | ".previous" | ||
95 | 126 | ||
96 | /* | 127 | /* |
97 | * This must be included *after* the definition of ALTERNATIVE due to | 128 | * This must be included *after* the definition of ALTERNATIVE due to |
@@ -140,6 +171,19 @@ static inline int alternatives_text_reserved(void *start, void *end) | |||
140 | : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) | 171 | : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) |
141 | 172 | ||
142 | /* | 173 | /* |
174 | * Like alternative_call, but there are two features and respective functions. | ||
175 | * If CPU has feature2, function2 is used. | ||
176 | * Otherwise, if CPU has feature1, function1 is used. | ||
177 | * Otherwise, old function is used. | ||
178 | */ | ||
179 | #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ | ||
180 | output, input...) \ | ||
181 | asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ | ||
182 | "call %P[new2]", feature2) \ | ||
183 | : output : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ | ||
184 | [new2] "i" (newfunc2), ## input) | ||
185 | |||
186 | /* | ||
143 | * use this macro(s) if you need more than one output parameter | 187 | * use this macro(s) if you need more than one output parameter |
144 | * in alternative_io | 188 | * in alternative_io |
145 | */ | 189 | */ |
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 49ad773f4b9..b3341e9cd8f 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h | |||
@@ -26,10 +26,31 @@ struct amd_l3_cache { | |||
26 | u8 subcaches[4]; | 26 | u8 subcaches[4]; |
27 | }; | 27 | }; |
28 | 28 | ||
29 | struct threshold_block { | ||
30 | unsigned int block; | ||
31 | unsigned int bank; | ||
32 | unsigned int cpu; | ||
33 | u32 address; | ||
34 | u16 interrupt_enable; | ||
35 | bool interrupt_capable; | ||
36 | u16 threshold_limit; | ||
37 | struct kobject kobj; | ||
38 | struct list_head miscj; | ||
39 | }; | ||
40 | |||
41 | struct threshold_bank { | ||
42 | struct kobject *kobj; | ||
43 | struct threshold_block *blocks; | ||
44 | |||
45 | /* initialized to the number of CPUs on the node sharing this bank */ | ||
46 | atomic_t cpus; | ||
47 | }; | ||
48 | |||
29 | struct amd_northbridge { | 49 | struct amd_northbridge { |
30 | struct pci_dev *misc; | 50 | struct pci_dev *misc; |
31 | struct pci_dev *link; | 51 | struct pci_dev *link; |
32 | struct amd_l3_cache l3_cache; | 52 | struct amd_l3_cache l3_cache; |
53 | struct threshold_bank *bank4; | ||
33 | }; | 54 | }; |
34 | 55 | ||
35 | struct amd_northbridge_info { | 56 | struct amd_northbridge_info { |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index eaff4790ed9..3ea51a84a0e 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -306,7 +306,8 @@ struct apic { | |||
306 | unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); | 306 | unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); |
307 | unsigned long (*check_apicid_present)(int apicid); | 307 | unsigned long (*check_apicid_present)(int apicid); |
308 | 308 | ||
309 | void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); | 309 | void (*vector_allocation_domain)(int cpu, struct cpumask *retmask, |
310 | const struct cpumask *mask); | ||
310 | void (*init_apic_ldr)(void); | 311 | void (*init_apic_ldr)(void); |
311 | 312 | ||
312 | void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); | 313 | void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); |
@@ -331,9 +332,9 @@ struct apic { | |||
331 | unsigned long (*set_apic_id)(unsigned int id); | 332 | unsigned long (*set_apic_id)(unsigned int id); |
332 | unsigned long apic_id_mask; | 333 | unsigned long apic_id_mask; |
333 | 334 | ||
334 | unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); | 335 | int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, |
335 | unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, | 336 | const struct cpumask *andmask, |
336 | const struct cpumask *andmask); | 337 | unsigned int *apicid); |
337 | 338 | ||
338 | /* ipi */ | 339 | /* ipi */ |
339 | void (*send_IPI_mask)(const struct cpumask *mask, int vector); | 340 | void (*send_IPI_mask)(const struct cpumask *mask, int vector); |
@@ -464,6 +465,8 @@ static inline u32 safe_apic_wait_icr_idle(void) | |||
464 | return apic->safe_wait_icr_idle(); | 465 | return apic->safe_wait_icr_idle(); |
465 | } | 466 | } |
466 | 467 | ||
468 | extern void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)); | ||
469 | |||
467 | #else /* CONFIG_X86_LOCAL_APIC */ | 470 | #else /* CONFIG_X86_LOCAL_APIC */ |
468 | 471 | ||
469 | static inline u32 apic_read(u32 reg) { return 0; } | 472 | static inline u32 apic_read(u32 reg) { return 0; } |
@@ -473,6 +476,7 @@ static inline u64 apic_icr_read(void) { return 0; } | |||
473 | static inline void apic_icr_write(u32 low, u32 high) { } | 476 | static inline void apic_icr_write(u32 low, u32 high) { } |
474 | static inline void apic_wait_icr_idle(void) { } | 477 | static inline void apic_wait_icr_idle(void) { } |
475 | static inline u32 safe_apic_wait_icr_idle(void) { return 0; } | 478 | static inline u32 safe_apic_wait_icr_idle(void) { return 0; } |
479 | static inline void apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) {} | ||
476 | 480 | ||
477 | #endif /* CONFIG_X86_LOCAL_APIC */ | 481 | #endif /* CONFIG_X86_LOCAL_APIC */ |
478 | 482 | ||
@@ -537,6 +541,11 @@ static inline const struct cpumask *default_target_cpus(void) | |||
537 | #endif | 541 | #endif |
538 | } | 542 | } |
539 | 543 | ||
544 | static inline const struct cpumask *online_target_cpus(void) | ||
545 | { | ||
546 | return cpu_online_mask; | ||
547 | } | ||
548 | |||
540 | DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); | 549 | DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); |
541 | 550 | ||
542 | 551 | ||
@@ -586,21 +595,50 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) | |||
586 | 595 | ||
587 | #endif | 596 | #endif |
588 | 597 | ||
589 | static inline unsigned int | 598 | static inline int |
590 | default_cpu_mask_to_apicid(const struct cpumask *cpumask) | 599 | flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, |
600 | const struct cpumask *andmask, | ||
601 | unsigned int *apicid) | ||
591 | { | 602 | { |
592 | return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; | 603 | unsigned long cpu_mask = cpumask_bits(cpumask)[0] & |
604 | cpumask_bits(andmask)[0] & | ||
605 | cpumask_bits(cpu_online_mask)[0] & | ||
606 | APIC_ALL_CPUS; | ||
607 | |||
608 | if (likely(cpu_mask)) { | ||
609 | *apicid = (unsigned int)cpu_mask; | ||
610 | return 0; | ||
611 | } else { | ||
612 | return -EINVAL; | ||
613 | } | ||
593 | } | 614 | } |
594 | 615 | ||
595 | static inline unsigned int | 616 | extern int |
596 | default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | 617 | default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, |
597 | const struct cpumask *andmask) | 618 | const struct cpumask *andmask, |
619 | unsigned int *apicid); | ||
620 | |||
621 | static inline void | ||
622 | flat_vector_allocation_domain(int cpu, struct cpumask *retmask, | ||
623 | const struct cpumask *mask) | ||
598 | { | 624 | { |
599 | unsigned long mask1 = cpumask_bits(cpumask)[0]; | 625 | /* Careful. Some cpus do not strictly honor the set of cpus |
600 | unsigned long mask2 = cpumask_bits(andmask)[0]; | 626 | * specified in the interrupt destination when using lowest |
601 | unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; | 627 | * priority interrupt delivery mode. |
628 | * | ||
629 | * In particular there was a hyperthreading cpu observed to | ||
630 | * deliver interrupts to the wrong hyperthread when only one | ||
631 | * hyperthread was specified in the interrupt desitination. | ||
632 | */ | ||
633 | cpumask_clear(retmask); | ||
634 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; | ||
635 | } | ||
602 | 636 | ||
603 | return (unsigned int)(mask1 & mask2 & mask3); | 637 | static inline void |
638 | default_vector_allocation_domain(int cpu, struct cpumask *retmask, | ||
639 | const struct cpumask *mask) | ||
640 | { | ||
641 | cpumask_copy(retmask, cpumask_of(cpu)); | ||
604 | } | 642 | } |
605 | 643 | ||
606 | static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) | 644 | static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index b97596e2b68..72f5009deb5 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -15,6 +15,8 @@ | |||
15 | #include <linux/compiler.h> | 15 | #include <linux/compiler.h> |
16 | #include <asm/alternative.h> | 16 | #include <asm/alternative.h> |
17 | 17 | ||
18 | #define BIT_64(n) (U64_C(1) << (n)) | ||
19 | |||
18 | /* | 20 | /* |
19 | * These have to be done with inline assembly: that way the bit-setting | 21 | * These have to be done with inline assembly: that way the bit-setting |
20 | * is guaranteed to be atomic. All bit operations return 0 if the bit | 22 | * is guaranteed to be atomic. All bit operations return 0 if the bit |
@@ -262,6 +264,13 @@ static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) | |||
262 | * This operation is non-atomic and can be reordered. | 264 | * This operation is non-atomic and can be reordered. |
263 | * If two examples of this operation race, one can appear to succeed | 265 | * If two examples of this operation race, one can appear to succeed |
264 | * but actually fail. You must protect multiple accesses with a lock. | 266 | * but actually fail. You must protect multiple accesses with a lock. |
267 | * | ||
268 | * Note: the operation is performed atomically with respect to | ||
269 | * the local CPU, but not other CPUs. Portable code should not | ||
270 | * rely on this behaviour. | ||
271 | * KVM relies on this behaviour on x86 for modifying memory that is also | ||
272 | * accessed from a hypervisor on the same CPU if running in a VM: don't change | ||
273 | * this without also updating arch/x86/kernel/kvm.c | ||
265 | */ | 274 | */ |
266 | static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) | 275 | static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) |
267 | { | 276 | { |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 340ee49961a..6b7ee5ff682 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -176,7 +176,7 @@ | |||
176 | #define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */ | 176 | #define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */ |
177 | #define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ | 177 | #define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ |
178 | #define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ | 178 | #define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ |
179 | #define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */ | 179 | #define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */ |
180 | #define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */ | 180 | #define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */ |
181 | 181 | ||
182 | /* Virtualization flags: Linux defined, word 8 */ | 182 | /* Virtualization flags: Linux defined, word 8 */ |
@@ -207,6 +207,8 @@ | |||
207 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ | 207 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ |
208 | #define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */ | 208 | #define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */ |
209 | #define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ | 209 | #define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ |
210 | #define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ | ||
211 | #define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ | ||
210 | 212 | ||
211 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) | 213 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) |
212 | 214 | ||
diff --git a/arch/x86/include/asm/crypto/ablk_helper.h b/arch/x86/include/asm/crypto/ablk_helper.h new file mode 100644 index 00000000000..4f93df50c23 --- /dev/null +++ b/arch/x86/include/asm/crypto/ablk_helper.h | |||
@@ -0,0 +1,31 @@ | |||
1 | /* | ||
2 | * Shared async block cipher helpers | ||
3 | */ | ||
4 | |||
5 | #ifndef _CRYPTO_ABLK_HELPER_H | ||
6 | #define _CRYPTO_ABLK_HELPER_H | ||
7 | |||
8 | #include <linux/crypto.h> | ||
9 | #include <linux/kernel.h> | ||
10 | #include <crypto/cryptd.h> | ||
11 | |||
12 | struct async_helper_ctx { | ||
13 | struct cryptd_ablkcipher *cryptd_tfm; | ||
14 | }; | ||
15 | |||
16 | extern int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
17 | unsigned int key_len); | ||
18 | |||
19 | extern int __ablk_encrypt(struct ablkcipher_request *req); | ||
20 | |||
21 | extern int ablk_encrypt(struct ablkcipher_request *req); | ||
22 | |||
23 | extern int ablk_decrypt(struct ablkcipher_request *req); | ||
24 | |||
25 | extern void ablk_exit(struct crypto_tfm *tfm); | ||
26 | |||
27 | extern int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name); | ||
28 | |||
29 | extern int ablk_init(struct crypto_tfm *tfm); | ||
30 | |||
31 | #endif /* _CRYPTO_ABLK_HELPER_H */ | ||
diff --git a/arch/x86/include/asm/aes.h b/arch/x86/include/asm/crypto/aes.h index 80545a1cbe3..80545a1cbe3 100644 --- a/arch/x86/include/asm/aes.h +++ b/arch/x86/include/asm/crypto/aes.h | |||
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h new file mode 100644 index 00000000000..3e408bddc96 --- /dev/null +++ b/arch/x86/include/asm/crypto/glue_helper.h | |||
@@ -0,0 +1,115 @@ | |||
1 | /* | ||
2 | * Shared glue code for 128bit block ciphers | ||
3 | */ | ||
4 | |||
5 | #ifndef _CRYPTO_GLUE_HELPER_H | ||
6 | #define _CRYPTO_GLUE_HELPER_H | ||
7 | |||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/crypto.h> | ||
10 | #include <asm/i387.h> | ||
11 | #include <crypto/b128ops.h> | ||
12 | |||
13 | typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); | ||
14 | typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); | ||
15 | typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, | ||
16 | u128 *iv); | ||
17 | |||
18 | #define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) | ||
19 | #define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) | ||
20 | #define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn)) | ||
21 | |||
22 | struct common_glue_func_entry { | ||
23 | unsigned int num_blocks; /* number of blocks that @fn will process */ | ||
24 | union { | ||
25 | common_glue_func_t ecb; | ||
26 | common_glue_cbc_func_t cbc; | ||
27 | common_glue_ctr_func_t ctr; | ||
28 | } fn_u; | ||
29 | }; | ||
30 | |||
31 | struct common_glue_ctx { | ||
32 | unsigned int num_funcs; | ||
33 | int fpu_blocks_limit; /* -1 means fpu not needed at all */ | ||
34 | |||
35 | /* | ||
36 | * First funcs entry must have largest num_blocks and last funcs entry | ||
37 | * must have num_blocks == 1! | ||
38 | */ | ||
39 | struct common_glue_func_entry funcs[]; | ||
40 | }; | ||
41 | |||
42 | static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, | ||
43 | struct blkcipher_desc *desc, | ||
44 | bool fpu_enabled, unsigned int nbytes) | ||
45 | { | ||
46 | if (likely(fpu_blocks_limit < 0)) | ||
47 | return false; | ||
48 | |||
49 | if (fpu_enabled) | ||
50 | return true; | ||
51 | |||
52 | /* | ||
53 | * Vector-registers are only used when chunk to be processed is large | ||
54 | * enough, so do not enable FPU until it is necessary. | ||
55 | */ | ||
56 | if (nbytes < bsize * (unsigned int)fpu_blocks_limit) | ||
57 | return false; | ||
58 | |||
59 | if (desc) { | ||
60 | /* prevent sleeping if FPU is in use */ | ||
61 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
62 | } | ||
63 | |||
64 | kernel_fpu_begin(); | ||
65 | return true; | ||
66 | } | ||
67 | |||
68 | static inline void glue_fpu_end(bool fpu_enabled) | ||
69 | { | ||
70 | if (fpu_enabled) | ||
71 | kernel_fpu_end(); | ||
72 | } | ||
73 | |||
74 | static inline void u128_to_be128(be128 *dst, const u128 *src) | ||
75 | { | ||
76 | dst->a = cpu_to_be64(src->a); | ||
77 | dst->b = cpu_to_be64(src->b); | ||
78 | } | ||
79 | |||
80 | static inline void be128_to_u128(u128 *dst, const be128 *src) | ||
81 | { | ||
82 | dst->a = be64_to_cpu(src->a); | ||
83 | dst->b = be64_to_cpu(src->b); | ||
84 | } | ||
85 | |||
86 | static inline void u128_inc(u128 *i) | ||
87 | { | ||
88 | i->b++; | ||
89 | if (!i->b) | ||
90 | i->a++; | ||
91 | } | ||
92 | |||
93 | extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | ||
94 | struct blkcipher_desc *desc, | ||
95 | struct scatterlist *dst, | ||
96 | struct scatterlist *src, unsigned int nbytes); | ||
97 | |||
98 | extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn, | ||
99 | struct blkcipher_desc *desc, | ||
100 | struct scatterlist *dst, | ||
101 | struct scatterlist *src, | ||
102 | unsigned int nbytes); | ||
103 | |||
104 | extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | ||
105 | struct blkcipher_desc *desc, | ||
106 | struct scatterlist *dst, | ||
107 | struct scatterlist *src, | ||
108 | unsigned int nbytes); | ||
109 | |||
110 | extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | ||
111 | struct blkcipher_desc *desc, | ||
112 | struct scatterlist *dst, | ||
113 | struct scatterlist *src, unsigned int nbytes); | ||
114 | |||
115 | #endif /* _CRYPTO_GLUE_HELPER_H */ | ||
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h new file mode 100644 index 00000000000..432deedd294 --- /dev/null +++ b/arch/x86/include/asm/crypto/serpent-avx.h | |||
@@ -0,0 +1,32 @@ | |||
1 | #ifndef ASM_X86_SERPENT_AVX_H | ||
2 | #define ASM_X86_SERPENT_AVX_H | ||
3 | |||
4 | #include <linux/crypto.h> | ||
5 | #include <crypto/serpent.h> | ||
6 | |||
7 | #define SERPENT_PARALLEL_BLOCKS 8 | ||
8 | |||
9 | asmlinkage void __serpent_enc_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
10 | const u8 *src, bool xor); | ||
11 | asmlinkage void serpent_dec_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
12 | const u8 *src); | ||
13 | |||
14 | static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, | ||
15 | const u8 *src) | ||
16 | { | ||
17 | __serpent_enc_blk_8way_avx(ctx, dst, src, false); | ||
18 | } | ||
19 | |||
20 | static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, | ||
21 | const u8 *src) | ||
22 | { | ||
23 | __serpent_enc_blk_8way_avx(ctx, dst, src, true); | ||
24 | } | ||
25 | |||
26 | static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, | ||
27 | const u8 *src) | ||
28 | { | ||
29 | serpent_dec_blk_8way_avx(ctx, dst, src); | ||
30 | } | ||
31 | |||
32 | #endif | ||
diff --git a/arch/x86/include/asm/serpent.h b/arch/x86/include/asm/crypto/serpent-sse2.h index d3ef63fe0c8..e6e77dffbda 100644 --- a/arch/x86/include/asm/serpent.h +++ b/arch/x86/include/asm/crypto/serpent-sse2.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef ASM_X86_SERPENT_H | 1 | #ifndef ASM_X86_SERPENT_SSE2_H |
2 | #define ASM_X86_SERPENT_H | 2 | #define ASM_X86_SERPENT_SSE2_H |
3 | 3 | ||
4 | #include <linux/crypto.h> | 4 | #include <linux/crypto.h> |
5 | #include <crypto/serpent.h> | 5 | #include <crypto/serpent.h> |
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h new file mode 100644 index 00000000000..9d2c514bd5f --- /dev/null +++ b/arch/x86/include/asm/crypto/twofish.h | |||
@@ -0,0 +1,46 @@ | |||
1 | #ifndef ASM_X86_TWOFISH_H | ||
2 | #define ASM_X86_TWOFISH_H | ||
3 | |||
4 | #include <linux/crypto.h> | ||
5 | #include <crypto/twofish.h> | ||
6 | #include <crypto/lrw.h> | ||
7 | #include <crypto/b128ops.h> | ||
8 | |||
9 | struct twofish_lrw_ctx { | ||
10 | struct lrw_table_ctx lrw_table; | ||
11 | struct twofish_ctx twofish_ctx; | ||
12 | }; | ||
13 | |||
14 | struct twofish_xts_ctx { | ||
15 | struct twofish_ctx tweak_ctx; | ||
16 | struct twofish_ctx crypt_ctx; | ||
17 | }; | ||
18 | |||
19 | /* regular block cipher functions from twofish_x86_64 module */ | ||
20 | asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, | ||
21 | const u8 *src); | ||
22 | asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, | ||
23 | const u8 *src); | ||
24 | |||
25 | /* 3-way parallel cipher functions */ | ||
26 | asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
27 | const u8 *src, bool xor); | ||
28 | asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
29 | const u8 *src); | ||
30 | |||
31 | /* helpers from twofish_x86_64-3way module */ | ||
32 | extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); | ||
33 | extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, | ||
34 | u128 *iv); | ||
35 | extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, | ||
36 | u128 *iv); | ||
37 | |||
38 | extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
39 | unsigned int keylen); | ||
40 | |||
41 | extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm); | ||
42 | |||
43 | extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
44 | unsigned int keylen); | ||
45 | |||
46 | #endif /* ASM_X86_TWOFISH_H */ | ||
diff --git a/arch/x86/include/asm/dma-contiguous.h b/arch/x86/include/asm/dma-contiguous.h new file mode 100644 index 00000000000..c0924165997 --- /dev/null +++ b/arch/x86/include/asm/dma-contiguous.h | |||
@@ -0,0 +1,13 @@ | |||
1 | #ifndef ASMX86_DMA_CONTIGUOUS_H | ||
2 | #define ASMX86_DMA_CONTIGUOUS_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | |||
6 | #include <linux/types.h> | ||
7 | #include <asm-generic/dma-contiguous.h> | ||
8 | |||
9 | static inline void | ||
10 | dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } | ||
11 | |||
12 | #endif | ||
13 | #endif | ||
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 61c0bd25845..f7b4c7903e7 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <asm/io.h> | 13 | #include <asm/io.h> |
14 | #include <asm/swiotlb.h> | 14 | #include <asm/swiotlb.h> |
15 | #include <asm-generic/dma-coherent.h> | 15 | #include <asm-generic/dma-coherent.h> |
16 | #include <linux/dma-contiguous.h> | ||
16 | 17 | ||
17 | #ifdef CONFIG_ISA | 18 | #ifdef CONFIG_ISA |
18 | # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) | 19 | # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) |
@@ -62,6 +63,10 @@ extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, | |||
62 | dma_addr_t *dma_addr, gfp_t flag, | 63 | dma_addr_t *dma_addr, gfp_t flag, |
63 | struct dma_attrs *attrs); | 64 | struct dma_attrs *attrs); |
64 | 65 | ||
66 | extern void dma_generic_free_coherent(struct device *dev, size_t size, | ||
67 | void *vaddr, dma_addr_t dma_addr, | ||
68 | struct dma_attrs *attrs); | ||
69 | |||
65 | #ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ | 70 | #ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ |
66 | extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); | 71 | extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); |
67 | extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); | 72 | extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); |
diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h index cc70c1c78ca..75ce3f47d20 100644 --- a/arch/x86/include/asm/emergency-restart.h +++ b/arch/x86/include/asm/emergency-restart.h | |||
@@ -4,9 +4,7 @@ | |||
4 | enum reboot_type { | 4 | enum reboot_type { |
5 | BOOT_TRIPLE = 't', | 5 | BOOT_TRIPLE = 't', |
6 | BOOT_KBD = 'k', | 6 | BOOT_KBD = 'k', |
7 | #ifdef CONFIG_X86_32 | ||
8 | BOOT_BIOS = 'b', | 7 | BOOT_BIOS = 'b', |
9 | #endif | ||
10 | BOOT_ACPI = 'a', | 8 | BOOT_ACPI = 'a', |
11 | BOOT_EFI = 'e', | 9 | BOOT_EFI = 'e', |
12 | BOOT_CF9 = 'p', | 10 | BOOT_CF9 = 'p', |
diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h index dbe82a5c5ea..d3d74698dce 100644 --- a/arch/x86/include/asm/floppy.h +++ b/arch/x86/include/asm/floppy.h | |||
@@ -99,7 +99,7 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id) | |||
99 | virtual_dma_residue += virtual_dma_count; | 99 | virtual_dma_residue += virtual_dma_count; |
100 | virtual_dma_count = 0; | 100 | virtual_dma_count = 0; |
101 | #ifdef TRACE_FLPY_INT | 101 | #ifdef TRACE_FLPY_INT |
102 | printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", | 102 | printk(KERN_DEBUG "count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", |
103 | virtual_dma_count, virtual_dma_residue, calls, bytes, | 103 | virtual_dma_count, virtual_dma_residue, calls, bytes, |
104 | dma_wait); | 104 | dma_wait); |
105 | calls = 0; | 105 | calls = 0; |
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 18d9005d9e4..b0767bc0874 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h | |||
@@ -34,7 +34,7 @@ | |||
34 | 34 | ||
35 | #ifndef __ASSEMBLY__ | 35 | #ifndef __ASSEMBLY__ |
36 | extern void mcount(void); | 36 | extern void mcount(void); |
37 | extern int modifying_ftrace_code; | 37 | extern atomic_t modifying_ftrace_code; |
38 | 38 | ||
39 | static inline unsigned long ftrace_call_adjust(unsigned long addr) | 39 | static inline unsigned long ftrace_call_adjust(unsigned long addr) |
40 | { | 40 | { |
diff --git a/arch/x86/include/asm/gpio.h b/arch/x86/include/asm/gpio.h index 91d915a6525..b3799d88ffc 100644 --- a/arch/x86/include/asm/gpio.h +++ b/arch/x86/include/asm/gpio.h | |||
@@ -1,53 +1,4 @@ | |||
1 | /* | 1 | #ifndef __LINUX_GPIO_H |
2 | * Generic GPIO API implementation for x86. | 2 | #warning Include linux/gpio.h instead of asm/gpio.h |
3 | * | 3 | #include <linux/gpio.h> |
4 | * Derived from the generic GPIO API for powerpc: | 4 | #endif |
5 | * | ||
6 | * Copyright (c) 2007-2008 MontaVista Software, Inc. | ||
7 | * | ||
8 | * Author: Anton Vorontsov <avorontsov@ru.mvista.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | */ | ||
15 | |||
16 | #ifndef _ASM_X86_GPIO_H | ||
17 | #define _ASM_X86_GPIO_H | ||
18 | |||
19 | #include <asm-generic/gpio.h> | ||
20 | |||
21 | #ifdef CONFIG_GPIOLIB | ||
22 | |||
23 | /* | ||
24 | * Just call gpiolib. | ||
25 | */ | ||
26 | static inline int gpio_get_value(unsigned int gpio) | ||
27 | { | ||
28 | return __gpio_get_value(gpio); | ||
29 | } | ||
30 | |||
31 | static inline void gpio_set_value(unsigned int gpio, int value) | ||
32 | { | ||
33 | __gpio_set_value(gpio, value); | ||
34 | } | ||
35 | |||
36 | static inline int gpio_cansleep(unsigned int gpio) | ||
37 | { | ||
38 | return __gpio_cansleep(gpio); | ||
39 | } | ||
40 | |||
41 | static inline int gpio_to_irq(unsigned int gpio) | ||
42 | { | ||
43 | return __gpio_to_irq(gpio); | ||
44 | } | ||
45 | |||
46 | static inline int irq_to_gpio(unsigned int irq) | ||
47 | { | ||
48 | return -EINVAL; | ||
49 | } | ||
50 | |||
51 | #endif /* CONFIG_GPIOLIB */ | ||
52 | |||
53 | #endif /* _ASM_X86_GPIO_H */ | ||
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 7a15153c675..b518c750993 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h | |||
@@ -49,6 +49,7 @@ extern const struct hypervisor_x86 *x86_hyper; | |||
49 | extern const struct hypervisor_x86 x86_hyper_vmware; | 49 | extern const struct hypervisor_x86 x86_hyper_vmware; |
50 | extern const struct hypervisor_x86 x86_hyper_ms_hyperv; | 50 | extern const struct hypervisor_x86 x86_hyper_ms_hyperv; |
51 | extern const struct hypervisor_x86 x86_hyper_xen_hvm; | 51 | extern const struct hypervisor_x86 x86_hyper_xen_hvm; |
52 | extern const struct hypervisor_x86 x86_hyper_kvm; | ||
52 | 53 | ||
53 | static inline bool hypervisor_x2apic_available(void) | 54 | static inline bool hypervisor_x2apic_available(void) |
54 | { | 55 | { |
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index dffc38ee625..345c99cef15 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h | |||
@@ -5,7 +5,6 @@ extern struct dma_map_ops nommu_dma_ops; | |||
5 | extern int force_iommu, no_iommu; | 5 | extern int force_iommu, no_iommu; |
6 | extern int iommu_detected; | 6 | extern int iommu_detected; |
7 | extern int iommu_pass_through; | 7 | extern int iommu_pass_through; |
8 | extern int iommu_group_mf; | ||
9 | 8 | ||
10 | /* 10 seconds */ | 9 | /* 10 seconds */ |
11 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) | 10 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) |
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index e7d1c194d27..246617efd67 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
@@ -12,6 +12,7 @@ | |||
12 | /* Select x86 specific features in <linux/kvm.h> */ | 12 | /* Select x86 specific features in <linux/kvm.h> */ |
13 | #define __KVM_HAVE_PIT | 13 | #define __KVM_HAVE_PIT |
14 | #define __KVM_HAVE_IOAPIC | 14 | #define __KVM_HAVE_IOAPIC |
15 | #define __KVM_HAVE_IRQ_LINE | ||
15 | #define __KVM_HAVE_DEVICE_ASSIGNMENT | 16 | #define __KVM_HAVE_DEVICE_ASSIGNMENT |
16 | #define __KVM_HAVE_MSI | 17 | #define __KVM_HAVE_MSI |
17 | #define __KVM_HAVE_USER_NMI | 18 | #define __KVM_HAVE_USER_NMI |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index c222e1a1b12..c764f43b71c 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -192,15 +192,15 @@ struct x86_emulate_ops { | |||
192 | struct x86_instruction_info *info, | 192 | struct x86_instruction_info *info, |
193 | enum x86_intercept_stage stage); | 193 | enum x86_intercept_stage stage); |
194 | 194 | ||
195 | bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, | 195 | void (*get_cpuid)(struct x86_emulate_ctxt *ctxt, |
196 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); | 196 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); |
197 | }; | 197 | }; |
198 | 198 | ||
199 | typedef u32 __attribute__((vector_size(16))) sse128_t; | 199 | typedef u32 __attribute__((vector_size(16))) sse128_t; |
200 | 200 | ||
201 | /* Type, address-of, and value of an instruction's operand. */ | 201 | /* Type, address-of, and value of an instruction's operand. */ |
202 | struct operand { | 202 | struct operand { |
203 | enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type; | 203 | enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_MM, OP_NONE } type; |
204 | unsigned int bytes; | 204 | unsigned int bytes; |
205 | union { | 205 | union { |
206 | unsigned long orig_val; | 206 | unsigned long orig_val; |
@@ -213,12 +213,14 @@ struct operand { | |||
213 | unsigned seg; | 213 | unsigned seg; |
214 | } mem; | 214 | } mem; |
215 | unsigned xmm; | 215 | unsigned xmm; |
216 | unsigned mm; | ||
216 | } addr; | 217 | } addr; |
217 | union { | 218 | union { |
218 | unsigned long val; | 219 | unsigned long val; |
219 | u64 val64; | 220 | u64 val64; |
220 | char valptr[sizeof(unsigned long) + 2]; | 221 | char valptr[sizeof(unsigned long) + 2]; |
221 | sse128_t vec_val; | 222 | sse128_t vec_val; |
223 | u64 mm_val; | ||
222 | }; | 224 | }; |
223 | }; | 225 | }; |
224 | 226 | ||
@@ -278,9 +280,9 @@ struct x86_emulate_ctxt { | |||
278 | u8 modrm_seg; | 280 | u8 modrm_seg; |
279 | bool rip_relative; | 281 | bool rip_relative; |
280 | unsigned long _eip; | 282 | unsigned long _eip; |
283 | struct operand memop; | ||
281 | /* Fields above regs are cleared together. */ | 284 | /* Fields above regs are cleared together. */ |
282 | unsigned long regs[NR_VCPU_REGS]; | 285 | unsigned long regs[NR_VCPU_REGS]; |
283 | struct operand memop; | ||
284 | struct operand *memopp; | 286 | struct operand *memopp; |
285 | struct fetch_cache fetch; | 287 | struct fetch_cache fetch; |
286 | struct read_cache io_read; | 288 | struct read_cache io_read; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e5b97be12d2..09155d64cf7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -48,12 +48,13 @@ | |||
48 | 48 | ||
49 | #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) | 49 | #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) |
50 | #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) | 50 | #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) |
51 | #define CR3_PCID_ENABLED_RESERVED_BITS 0xFFFFFF0000000000ULL | ||
51 | #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ | 52 | #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ |
52 | 0xFFFFFF0000000000ULL) | 53 | 0xFFFFFF0000000000ULL) |
53 | #define CR4_RESERVED_BITS \ | 54 | #define CR4_RESERVED_BITS \ |
54 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | 55 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ |
55 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | 56 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ |
56 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ | 57 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ |
57 | | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \ | 58 | | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \ |
58 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | 59 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) |
59 | 60 | ||
@@ -173,6 +174,16 @@ enum { | |||
173 | #define DR7_FIXED_1 0x00000400 | 174 | #define DR7_FIXED_1 0x00000400 |
174 | #define DR7_VOLATILE 0xffff23ff | 175 | #define DR7_VOLATILE 0xffff23ff |
175 | 176 | ||
177 | /* apic attention bits */ | ||
178 | #define KVM_APIC_CHECK_VAPIC 0 | ||
179 | /* | ||
180 | * The following bit is set with PV-EOI, unset on EOI. | ||
181 | * We detect PV-EOI changes by guest by comparing | ||
182 | * this bit with PV-EOI in guest memory. | ||
183 | * See the implementation in apic_update_pv_eoi. | ||
184 | */ | ||
185 | #define KVM_APIC_PV_EOI_PENDING 1 | ||
186 | |||
176 | /* | 187 | /* |
177 | * We don't want allocation failures within the mmu code, so we preallocate | 188 | * We don't want allocation failures within the mmu code, so we preallocate |
178 | * enough memory for a single page fault in a cache. | 189 | * enough memory for a single page fault in a cache. |
@@ -238,8 +249,6 @@ struct kvm_mmu_page { | |||
238 | #endif | 249 | #endif |
239 | 250 | ||
240 | int write_flooding_count; | 251 | int write_flooding_count; |
241 | |||
242 | struct rcu_head rcu; | ||
243 | }; | 252 | }; |
244 | 253 | ||
245 | struct kvm_pio_request { | 254 | struct kvm_pio_request { |
@@ -312,8 +321,8 @@ struct kvm_pmu { | |||
312 | u64 counter_bitmask[2]; | 321 | u64 counter_bitmask[2]; |
313 | u64 global_ctrl_mask; | 322 | u64 global_ctrl_mask; |
314 | u8 version; | 323 | u8 version; |
315 | struct kvm_pmc gp_counters[X86_PMC_MAX_GENERIC]; | 324 | struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; |
316 | struct kvm_pmc fixed_counters[X86_PMC_MAX_FIXED]; | 325 | struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; |
317 | struct irq_work irq_work; | 326 | struct irq_work irq_work; |
318 | u64 reprogram_pmi; | 327 | u64 reprogram_pmi; |
319 | }; | 328 | }; |
@@ -338,6 +347,7 @@ struct kvm_vcpu_arch { | |||
338 | u64 efer; | 347 | u64 efer; |
339 | u64 apic_base; | 348 | u64 apic_base; |
340 | struct kvm_lapic *apic; /* kernel irqchip context */ | 349 | struct kvm_lapic *apic; /* kernel irqchip context */ |
350 | unsigned long apic_attention; | ||
341 | int32_t apic_arb_prio; | 351 | int32_t apic_arb_prio; |
342 | int mp_state; | 352 | int mp_state; |
343 | int sipi_vector; | 353 | int sipi_vector; |
@@ -482,6 +492,11 @@ struct kvm_vcpu_arch { | |||
482 | u64 length; | 492 | u64 length; |
483 | u64 status; | 493 | u64 status; |
484 | } osvw; | 494 | } osvw; |
495 | |||
496 | struct { | ||
497 | u64 msr_val; | ||
498 | struct gfn_to_hva_cache data; | ||
499 | } pv_eoi; | ||
485 | }; | 500 | }; |
486 | 501 | ||
487 | struct kvm_lpage_info { | 502 | struct kvm_lpage_info { |
@@ -537,8 +552,6 @@ struct kvm_arch { | |||
537 | u64 hv_guest_os_id; | 552 | u64 hv_guest_os_id; |
538 | u64 hv_hypercall; | 553 | u64 hv_hypercall; |
539 | 554 | ||
540 | atomic_t reader_counter; | ||
541 | |||
542 | #ifdef CONFIG_KVM_MMU_AUDIT | 555 | #ifdef CONFIG_KVM_MMU_AUDIT |
543 | int audit_point; | 556 | int audit_point; |
544 | #endif | 557 | #endif |
@@ -661,6 +674,7 @@ struct kvm_x86_ops { | |||
661 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); | 674 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
662 | int (*get_lpage_level)(void); | 675 | int (*get_lpage_level)(void); |
663 | bool (*rdtscp_supported)(void); | 676 | bool (*rdtscp_supported)(void); |
677 | bool (*invpcid_supported)(void); | ||
664 | void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host); | 678 | void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host); |
665 | 679 | ||
666 | void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); | 680 | void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
@@ -713,8 +727,9 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | |||
713 | 727 | ||
714 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); | 728 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
715 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); | 729 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); |
716 | int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, | 730 | void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, |
717 | struct kvm_memory_slot *slot); | 731 | struct kvm_memory_slot *slot, |
732 | gfn_t gfn_offset, unsigned long mask); | ||
718 | void kvm_mmu_zap_all(struct kvm *kvm); | 733 | void kvm_mmu_zap_all(struct kvm *kvm); |
719 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); | 734 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); |
720 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); | 735 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); |
@@ -801,7 +816,20 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
801 | void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); | 816 | void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); |
802 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); | 817 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); |
803 | 818 | ||
804 | int kvm_pic_set_irq(void *opaque, int irq, int level); | 819 | static inline int __kvm_irq_line_state(unsigned long *irq_state, |
820 | int irq_source_id, int level) | ||
821 | { | ||
822 | /* Logical OR for level trig interrupt */ | ||
823 | if (level) | ||
824 | __set_bit(irq_source_id, irq_state); | ||
825 | else | ||
826 | __clear_bit(irq_source_id, irq_state); | ||
827 | |||
828 | return !!(*irq_state); | ||
829 | } | ||
830 | |||
831 | int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); | ||
832 | void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); | ||
805 | 833 | ||
806 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); | 834 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); |
807 | 835 | ||
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 183922e13de..2f7712e08b1 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -22,6 +22,7 @@ | |||
22 | #define KVM_FEATURE_CLOCKSOURCE2 3 | 22 | #define KVM_FEATURE_CLOCKSOURCE2 3 |
23 | #define KVM_FEATURE_ASYNC_PF 4 | 23 | #define KVM_FEATURE_ASYNC_PF 4 |
24 | #define KVM_FEATURE_STEAL_TIME 5 | 24 | #define KVM_FEATURE_STEAL_TIME 5 |
25 | #define KVM_FEATURE_PV_EOI 6 | ||
25 | 26 | ||
26 | /* The last 8 bits are used to indicate how to interpret the flags field | 27 | /* The last 8 bits are used to indicate how to interpret the flags field |
27 | * in pvclock structure. If no bits are set, all flags are ignored. | 28 | * in pvclock structure. If no bits are set, all flags are ignored. |
@@ -37,6 +38,7 @@ | |||
37 | #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 | 38 | #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 |
38 | #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 | 39 | #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 |
39 | #define MSR_KVM_STEAL_TIME 0x4b564d03 | 40 | #define MSR_KVM_STEAL_TIME 0x4b564d03 |
41 | #define MSR_KVM_PV_EOI_EN 0x4b564d04 | ||
40 | 42 | ||
41 | struct kvm_steal_time { | 43 | struct kvm_steal_time { |
42 | __u64 steal; | 44 | __u64 steal; |
@@ -89,12 +91,25 @@ struct kvm_vcpu_pv_apf_data { | |||
89 | __u32 enabled; | 91 | __u32 enabled; |
90 | }; | 92 | }; |
91 | 93 | ||
94 | #define KVM_PV_EOI_BIT 0 | ||
95 | #define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT) | ||
96 | #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK | ||
97 | #define KVM_PV_EOI_DISABLED 0x0 | ||
98 | |||
92 | #ifdef __KERNEL__ | 99 | #ifdef __KERNEL__ |
93 | #include <asm/processor.h> | 100 | #include <asm/processor.h> |
94 | 101 | ||
95 | extern void kvmclock_init(void); | 102 | extern void kvmclock_init(void); |
96 | extern int kvm_register_clock(char *txt); | 103 | extern int kvm_register_clock(char *txt); |
97 | 104 | ||
105 | #ifdef CONFIG_KVM_CLOCK | ||
106 | bool kvm_check_and_clear_guest_paused(void); | ||
107 | #else | ||
108 | static inline bool kvm_check_and_clear_guest_paused(void) | ||
109 | { | ||
110 | return false; | ||
111 | } | ||
112 | #endif /* CONFIG_KVMCLOCK */ | ||
98 | 113 | ||
99 | /* This instruction is vmcall. On non-VT architectures, it will generate a | 114 | /* This instruction is vmcall. On non-VT architectures, it will generate a |
100 | * trap that we will then rewrite to the appropriate instruction. | 115 | * trap that we will then rewrite to the appropriate instruction. |
@@ -173,14 +188,16 @@ static inline int kvm_para_available(void) | |||
173 | if (boot_cpu_data.cpuid_level < 0) | 188 | if (boot_cpu_data.cpuid_level < 0) |
174 | return 0; /* So we don't blow up on old processors */ | 189 | return 0; /* So we don't blow up on old processors */ |
175 | 190 | ||
176 | cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); | 191 | if (cpu_has_hypervisor) { |
177 | memcpy(signature + 0, &ebx, 4); | 192 | cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); |
178 | memcpy(signature + 4, &ecx, 4); | 193 | memcpy(signature + 0, &ebx, 4); |
179 | memcpy(signature + 8, &edx, 4); | 194 | memcpy(signature + 4, &ecx, 4); |
180 | signature[12] = 0; | 195 | memcpy(signature + 8, &edx, 4); |
196 | signature[12] = 0; | ||
181 | 197 | ||
182 | if (strcmp(signature, "KVMKVMKVM") == 0) | 198 | if (strcmp(signature, "KVMKVMKVM") == 0) |
183 | return 1; | 199 | return 1; |
200 | } | ||
184 | 201 | ||
185 | return 0; | 202 | return 0; |
186 | } | 203 | } |
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 084ef95274c..813ed103f45 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h | |||
@@ -115,8 +115,8 @@ notrace static inline int native_write_msr_safe(unsigned int msr, | |||
115 | 115 | ||
116 | extern unsigned long long native_read_tsc(void); | 116 | extern unsigned long long native_read_tsc(void); |
117 | 117 | ||
118 | extern int native_rdmsr_safe_regs(u32 regs[8]); | 118 | extern int rdmsr_safe_regs(u32 regs[8]); |
119 | extern int native_wrmsr_safe_regs(u32 regs[8]); | 119 | extern int wrmsr_safe_regs(u32 regs[8]); |
120 | 120 | ||
121 | static __always_inline unsigned long long __native_read_tsc(void) | 121 | static __always_inline unsigned long long __native_read_tsc(void) |
122 | { | 122 | { |
@@ -187,43 +187,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) | |||
187 | return err; | 187 | return err; |
188 | } | 188 | } |
189 | 189 | ||
190 | static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) | ||
191 | { | ||
192 | u32 gprs[8] = { 0 }; | ||
193 | int err; | ||
194 | |||
195 | gprs[1] = msr; | ||
196 | gprs[7] = 0x9c5a203a; | ||
197 | |||
198 | err = native_rdmsr_safe_regs(gprs); | ||
199 | |||
200 | *p = gprs[0] | ((u64)gprs[2] << 32); | ||
201 | |||
202 | return err; | ||
203 | } | ||
204 | |||
205 | static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) | ||
206 | { | ||
207 | u32 gprs[8] = { 0 }; | ||
208 | |||
209 | gprs[0] = (u32)val; | ||
210 | gprs[1] = msr; | ||
211 | gprs[2] = val >> 32; | ||
212 | gprs[7] = 0x9c5a203a; | ||
213 | |||
214 | return native_wrmsr_safe_regs(gprs); | ||
215 | } | ||
216 | |||
217 | static inline int rdmsr_safe_regs(u32 regs[8]) | ||
218 | { | ||
219 | return native_rdmsr_safe_regs(regs); | ||
220 | } | ||
221 | |||
222 | static inline int wrmsr_safe_regs(u32 regs[8]) | ||
223 | { | ||
224 | return native_wrmsr_safe_regs(regs); | ||
225 | } | ||
226 | |||
227 | #define rdtscl(low) \ | 190 | #define rdtscl(low) \ |
228 | ((low) = (u32)__native_read_tsc()) | 191 | ((low) = (u32)__native_read_tsc()) |
229 | 192 | ||
@@ -237,6 +200,8 @@ do { \ | |||
237 | (high) = (u32)(_l >> 32); \ | 200 | (high) = (u32)(_l >> 32); \ |
238 | } while (0) | 201 | } while (0) |
239 | 202 | ||
203 | #define rdpmcl(counter, val) ((val) = native_read_pmc(counter)) | ||
204 | |||
240 | #define rdtscp(low, high, aux) \ | 205 | #define rdtscp(low, high, aux) \ |
241 | do { \ | 206 | do { \ |
242 | unsigned long long _val = native_read_tscp(&(aux)); \ | 207 | unsigned long long _val = native_read_tscp(&(aux)); \ |
@@ -248,8 +213,7 @@ do { \ | |||
248 | 213 | ||
249 | #endif /* !CONFIG_PARAVIRT */ | 214 | #endif /* !CONFIG_PARAVIRT */ |
250 | 215 | ||
251 | 216 | #define wrmsrl_safe(msr, val) wrmsr_safe((msr), (u32)(val), \ | |
252 | #define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \ | ||
253 | (u32)((val) >> 32)) | 217 | (u32)((val) >> 32)) |
254 | 218 | ||
255 | #define write_tsc(val1, val2) wrmsr(MSR_IA32_TSC, (val1), (val2)) | 219 | #define write_tsc(val1, val2) wrmsr(MSR_IA32_TSC, (val1), (val2)) |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 0e3793b821e..c0fa356e90d 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -44,14 +44,14 @@ struct nmiaction { | |||
44 | const char *name; | 44 | const char *name; |
45 | }; | 45 | }; |
46 | 46 | ||
47 | #define register_nmi_handler(t, fn, fg, n) \ | 47 | #define register_nmi_handler(t, fn, fg, n, init...) \ |
48 | ({ \ | 48 | ({ \ |
49 | static struct nmiaction fn##_na = { \ | 49 | static struct nmiaction init fn##_na = { \ |
50 | .handler = (fn), \ | 50 | .handler = (fn), \ |
51 | .name = (n), \ | 51 | .name = (n), \ |
52 | .flags = (fg), \ | 52 | .flags = (fg), \ |
53 | }; \ | 53 | }; \ |
54 | __register_nmi_handler((t), &fn##_na); \ | 54 | __register_nmi_handler((t), &fn##_na); \ |
55 | }) | 55 | }) |
56 | 56 | ||
57 | int __register_nmi_handler(unsigned int, struct nmiaction *); | 57 | int __register_nmi_handler(unsigned int, struct nmiaction *); |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 6cbbabf5270..0b47ddb6f00 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -128,21 +128,11 @@ static inline u64 paravirt_read_msr(unsigned msr, int *err) | |||
128 | return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); | 128 | return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); |
129 | } | 129 | } |
130 | 130 | ||
131 | static inline int paravirt_rdmsr_regs(u32 *regs) | ||
132 | { | ||
133 | return PVOP_CALL1(int, pv_cpu_ops.rdmsr_regs, regs); | ||
134 | } | ||
135 | |||
136 | static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) | 131 | static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) |
137 | { | 132 | { |
138 | return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); | 133 | return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); |
139 | } | 134 | } |
140 | 135 | ||
141 | static inline int paravirt_wrmsr_regs(u32 *regs) | ||
142 | { | ||
143 | return PVOP_CALL1(int, pv_cpu_ops.wrmsr_regs, regs); | ||
144 | } | ||
145 | |||
146 | /* These should all do BUG_ON(_err), but our headers are too tangled. */ | 136 | /* These should all do BUG_ON(_err), but our headers are too tangled. */ |
147 | #define rdmsr(msr, val1, val2) \ | 137 | #define rdmsr(msr, val1, val2) \ |
148 | do { \ | 138 | do { \ |
@@ -176,9 +166,6 @@ do { \ | |||
176 | _err; \ | 166 | _err; \ |
177 | }) | 167 | }) |
178 | 168 | ||
179 | #define rdmsr_safe_regs(regs) paravirt_rdmsr_regs(regs) | ||
180 | #define wrmsr_safe_regs(regs) paravirt_wrmsr_regs(regs) | ||
181 | |||
182 | static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) | 169 | static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) |
183 | { | 170 | { |
184 | int err; | 171 | int err; |
@@ -186,32 +173,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) | |||
186 | *p = paravirt_read_msr(msr, &err); | 173 | *p = paravirt_read_msr(msr, &err); |
187 | return err; | 174 | return err; |
188 | } | 175 | } |
189 | static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) | ||
190 | { | ||
191 | u32 gprs[8] = { 0 }; | ||
192 | int err; | ||
193 | |||
194 | gprs[1] = msr; | ||
195 | gprs[7] = 0x9c5a203a; | ||
196 | |||
197 | err = paravirt_rdmsr_regs(gprs); | ||
198 | |||
199 | *p = gprs[0] | ((u64)gprs[2] << 32); | ||
200 | |||
201 | return err; | ||
202 | } | ||
203 | |||
204 | static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) | ||
205 | { | ||
206 | u32 gprs[8] = { 0 }; | ||
207 | |||
208 | gprs[0] = (u32)val; | ||
209 | gprs[1] = msr; | ||
210 | gprs[2] = val >> 32; | ||
211 | gprs[7] = 0x9c5a203a; | ||
212 | |||
213 | return paravirt_wrmsr_regs(gprs); | ||
214 | } | ||
215 | 176 | ||
216 | static inline u64 paravirt_read_tsc(void) | 177 | static inline u64 paravirt_read_tsc(void) |
217 | { | 178 | { |
@@ -252,6 +213,8 @@ do { \ | |||
252 | high = _l >> 32; \ | 213 | high = _l >> 32; \ |
253 | } while (0) | 214 | } while (0) |
254 | 215 | ||
216 | #define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter)) | ||
217 | |||
255 | static inline unsigned long long paravirt_rdtscp(unsigned int *aux) | 218 | static inline unsigned long long paravirt_rdtscp(unsigned int *aux) |
256 | { | 219 | { |
257 | return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); | 220 | return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 8e8b9a4987e..8613cbb7ba4 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -153,9 +153,7 @@ struct pv_cpu_ops { | |||
153 | /* MSR, PMC and TSR operations. | 153 | /* MSR, PMC and TSR operations. |
154 | err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ | 154 | err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ |
155 | u64 (*read_msr)(unsigned int msr, int *err); | 155 | u64 (*read_msr)(unsigned int msr, int *err); |
156 | int (*rdmsr_regs)(u32 *regs); | ||
157 | int (*write_msr)(unsigned int msr, unsigned low, unsigned high); | 156 | int (*write_msr)(unsigned int msr, unsigned low, unsigned high); |
158 | int (*wrmsr_regs)(u32 *regs); | ||
159 | 157 | ||
160 | u64 (*read_tsc)(void); | 158 | u64 (*read_tsc)(void); |
161 | u64 (*read_pmc)(int counter); | 159 | u64 (*read_pmc)(int counter); |
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index b3a53174602..73e8eeff22e 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h | |||
@@ -7,9 +7,13 @@ | |||
7 | #undef DEBUG | 7 | #undef DEBUG |
8 | 8 | ||
9 | #ifdef DEBUG | 9 | #ifdef DEBUG |
10 | #define DBG(x...) printk(x) | 10 | #define DBG(fmt, ...) printk(fmt, ##__VA_ARGS__) |
11 | #else | 11 | #else |
12 | #define DBG(x...) | 12 | #define DBG(fmt, ...) \ |
13 | do { \ | ||
14 | if (0) \ | ||
15 | printk(fmt, ##__VA_ARGS__); \ | ||
16 | } while (0) | ||
13 | #endif | 17 | #endif |
14 | 18 | ||
15 | #define PCI_PROBE_BIOS 0x0001 | 19 | #define PCI_PROBE_BIOS 0x0001 |
@@ -100,6 +104,7 @@ struct pci_raw_ops { | |||
100 | extern const struct pci_raw_ops *raw_pci_ops; | 104 | extern const struct pci_raw_ops *raw_pci_ops; |
101 | extern const struct pci_raw_ops *raw_pci_ext_ops; | 105 | extern const struct pci_raw_ops *raw_pci_ext_ops; |
102 | 106 | ||
107 | extern const struct pci_raw_ops pci_mmcfg; | ||
103 | extern const struct pci_raw_ops pci_direct_conf1; | 108 | extern const struct pci_raw_ops pci_direct_conf1; |
104 | extern bool port_cf9_safe; | 109 | extern bool port_cf9_safe; |
105 | 110 | ||
@@ -135,6 +140,12 @@ struct pci_mmcfg_region { | |||
135 | 140 | ||
136 | extern int __init pci_mmcfg_arch_init(void); | 141 | extern int __init pci_mmcfg_arch_init(void); |
137 | extern void __init pci_mmcfg_arch_free(void); | 142 | extern void __init pci_mmcfg_arch_free(void); |
143 | extern int __devinit pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg); | ||
144 | extern void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg); | ||
145 | extern int __devinit pci_mmconfig_insert(struct device *dev, | ||
146 | u16 seg, u8 start, | ||
147 | u8 end, phys_addr_t addr); | ||
148 | extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end); | ||
138 | extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus); | 149 | extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus); |
139 | 150 | ||
140 | extern struct list_head pci_mmcfg_list; | 151 | extern struct list_head pci_mmcfg_list; |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 588f52ea810..c78f14a0df0 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -5,11 +5,10 @@ | |||
5 | * Performance event hw details: | 5 | * Performance event hw details: |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #define X86_PMC_MAX_GENERIC 32 | 8 | #define INTEL_PMC_MAX_GENERIC 32 |
9 | #define X86_PMC_MAX_FIXED 3 | 9 | #define INTEL_PMC_MAX_FIXED 3 |
10 | #define INTEL_PMC_IDX_FIXED 32 | ||
10 | 11 | ||
11 | #define X86_PMC_IDX_GENERIC 0 | ||
12 | #define X86_PMC_IDX_FIXED 32 | ||
13 | #define X86_PMC_IDX_MAX 64 | 12 | #define X86_PMC_IDX_MAX 64 |
14 | 13 | ||
15 | #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 | 14 | #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 |
@@ -48,8 +47,7 @@ | |||
48 | (X86_RAW_EVENT_MASK | \ | 47 | (X86_RAW_EVENT_MASK | \ |
49 | AMD64_EVENTSEL_EVENT) | 48 | AMD64_EVENTSEL_EVENT) |
50 | #define AMD64_NUM_COUNTERS 4 | 49 | #define AMD64_NUM_COUNTERS 4 |
51 | #define AMD64_NUM_COUNTERS_F15H 6 | 50 | #define AMD64_NUM_COUNTERS_CORE 6 |
52 | #define AMD64_NUM_COUNTERS_MAX AMD64_NUM_COUNTERS_F15H | ||
53 | 51 | ||
54 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c | 52 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c |
55 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | 53 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) |
@@ -121,16 +119,16 @@ struct x86_pmu_capability { | |||
121 | 119 | ||
122 | /* Instr_Retired.Any: */ | 120 | /* Instr_Retired.Any: */ |
123 | #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 | 121 | #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 |
124 | #define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) | 122 | #define INTEL_PMC_IDX_FIXED_INSTRUCTIONS (INTEL_PMC_IDX_FIXED + 0) |
125 | 123 | ||
126 | /* CPU_CLK_Unhalted.Core: */ | 124 | /* CPU_CLK_Unhalted.Core: */ |
127 | #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a | 125 | #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a |
128 | #define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) | 126 | #define INTEL_PMC_IDX_FIXED_CPU_CYCLES (INTEL_PMC_IDX_FIXED + 1) |
129 | 127 | ||
130 | /* CPU_CLK_Unhalted.Ref: */ | 128 | /* CPU_CLK_Unhalted.Ref: */ |
131 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b | 129 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b |
132 | #define X86_PMC_IDX_FIXED_REF_CYCLES (X86_PMC_IDX_FIXED + 2) | 130 | #define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2) |
133 | #define X86_PMC_MSK_FIXED_REF_CYCLES (1ULL << X86_PMC_IDX_FIXED_REF_CYCLES) | 131 | #define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES) |
134 | 132 | ||
135 | /* | 133 | /* |
136 | * We model BTS tracing as another fixed-mode PMC. | 134 | * We model BTS tracing as another fixed-mode PMC. |
@@ -139,7 +137,7 @@ struct x86_pmu_capability { | |||
139 | * values are used by actual fixed events and higher values are used | 137 | * values are used by actual fixed events and higher values are used |
140 | * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. | 138 | * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. |
141 | */ | 139 | */ |
142 | #define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) | 140 | #define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16) |
143 | 141 | ||
144 | /* | 142 | /* |
145 | * IBS cpuid feature detection | 143 | * IBS cpuid feature detection |
@@ -234,6 +232,7 @@ struct perf_guest_switch_msr { | |||
234 | 232 | ||
235 | extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); | 233 | extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); |
236 | extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); | 234 | extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); |
235 | extern void perf_check_microcode(void); | ||
237 | #else | 236 | #else |
238 | static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) | 237 | static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) |
239 | { | 238 | { |
@@ -247,6 +246,7 @@ static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) | |||
247 | } | 246 | } |
248 | 247 | ||
249 | static inline void perf_events_lapic_init(void) { } | 248 | static inline void perf_events_lapic_init(void) { } |
249 | static inline void perf_check_microcode(void) { } | ||
250 | #endif | 250 | #endif |
251 | 251 | ||
252 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) | 252 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) |
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 98391db840c..f2b489cf160 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h | |||
@@ -2,9 +2,9 @@ | |||
2 | #define _ASM_X86_PGTABLE_2LEVEL_H | 2 | #define _ASM_X86_PGTABLE_2LEVEL_H |
3 | 3 | ||
4 | #define pte_ERROR(e) \ | 4 | #define pte_ERROR(e) \ |
5 | printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) | 5 | pr_err("%s:%d: bad pte %08lx\n", __FILE__, __LINE__, (e).pte_low) |
6 | #define pgd_ERROR(e) \ | 6 | #define pgd_ERROR(e) \ |
7 | printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) | 7 | pr_err("%s:%d: bad pgd %08lx\n", __FILE__, __LINE__, pgd_val(e)) |
8 | 8 | ||
9 | /* | 9 | /* |
10 | * Certain architectures need to do special things when PTEs | 10 | * Certain architectures need to do special things when PTEs |
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index effff47a3c8..4cc9f2b7cdc 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h | |||
@@ -9,13 +9,13 @@ | |||
9 | */ | 9 | */ |
10 | 10 | ||
11 | #define pte_ERROR(e) \ | 11 | #define pte_ERROR(e) \ |
12 | printk("%s:%d: bad pte %p(%08lx%08lx).\n", \ | 12 | pr_err("%s:%d: bad pte %p(%08lx%08lx)\n", \ |
13 | __FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low) | 13 | __FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low) |
14 | #define pmd_ERROR(e) \ | 14 | #define pmd_ERROR(e) \ |
15 | printk("%s:%d: bad pmd %p(%016Lx).\n", \ | 15 | pr_err("%s:%d: bad pmd %p(%016Lx)\n", \ |
16 | __FILE__, __LINE__, &(e), pmd_val(e)) | 16 | __FILE__, __LINE__, &(e), pmd_val(e)) |
17 | #define pgd_ERROR(e) \ | 17 | #define pgd_ERROR(e) \ |
18 | printk("%s:%d: bad pgd %p(%016Lx).\n", \ | 18 | pr_err("%s:%d: bad pgd %p(%016Lx)\n", \ |
19 | __FILE__, __LINE__, &(e), pgd_val(e)) | 19 | __FILE__, __LINE__, &(e), pgd_val(e)) |
20 | 20 | ||
21 | /* Rules for using set_pte: the pte being assigned *must* be | 21 | /* Rules for using set_pte: the pte being assigned *must* be |
@@ -31,6 +31,60 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte) | |||
31 | ptep->pte_low = pte.pte_low; | 31 | ptep->pte_low = pte.pte_low; |
32 | } | 32 | } |
33 | 33 | ||
34 | #define pmd_read_atomic pmd_read_atomic | ||
35 | /* | ||
36 | * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with | ||
37 | * a "*pmdp" dereference done by gcc. Problem is, in certain places | ||
38 | * where pte_offset_map_lock is called, concurrent page faults are | ||
39 | * allowed, if the mmap_sem is hold for reading. An example is mincore | ||
40 | * vs page faults vs MADV_DONTNEED. On the page fault side | ||
41 | * pmd_populate rightfully does a set_64bit, but if we're reading the | ||
42 | * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen | ||
43 | * because gcc will not read the 64bit of the pmd atomically. To fix | ||
44 | * this all places running pmd_offset_map_lock() while holding the | ||
45 | * mmap_sem in read mode, shall read the pmdp pointer using this | ||
46 | * function to know if the pmd is null nor not, and in turn to know if | ||
47 | * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd | ||
48 | * operations. | ||
49 | * | ||
50 | * Without THP if the mmap_sem is hold for reading, the pmd can only | ||
51 | * transition from null to not null while pmd_read_atomic runs. So | ||
52 | * we can always return atomic pmd values with this function. | ||
53 | * | ||
54 | * With THP if the mmap_sem is hold for reading, the pmd can become | ||
55 | * trans_huge or none or point to a pte (and in turn become "stable") | ||
56 | * at any time under pmd_read_atomic. We could read it really | ||
57 | * atomically here with a atomic64_read for the THP enabled case (and | ||
58 | * it would be a whole lot simpler), but to avoid using cmpxchg8b we | ||
59 | * only return an atomic pmdval if the low part of the pmdval is later | ||
60 | * found stable (i.e. pointing to a pte). And we're returning a none | ||
61 | * pmdval if the low part of the pmd is none. In some cases the high | ||
62 | * and low part of the pmdval returned may not be consistent if THP is | ||
63 | * enabled (the low part may point to previously mapped hugepage, | ||
64 | * while the high part may point to a more recently mapped hugepage), | ||
65 | * but pmd_none_or_trans_huge_or_clear_bad() only needs the low part | ||
66 | * of the pmd to be read atomically to decide if the pmd is unstable | ||
67 | * or not, with the only exception of when the low part of the pmd is | ||
68 | * zero in which case we return a none pmd. | ||
69 | */ | ||
70 | static inline pmd_t pmd_read_atomic(pmd_t *pmdp) | ||
71 | { | ||
72 | pmdval_t ret; | ||
73 | u32 *tmp = (u32 *)pmdp; | ||
74 | |||
75 | ret = (pmdval_t) (*tmp); | ||
76 | if (ret) { | ||
77 | /* | ||
78 | * If the low part is null, we must not read the high part | ||
79 | * or we can end up with a partial pmd. | ||
80 | */ | ||
81 | smp_rmb(); | ||
82 | ret |= ((pmdval_t)*(tmp + 1)) << 32; | ||
83 | } | ||
84 | |||
85 | return (pmd_t) { ret }; | ||
86 | } | ||
87 | |||
34 | static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) | 88 | static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) |
35 | { | 89 | { |
36 | set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); | 90 | set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); |
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 975f709e09a..8251be02301 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -26,16 +26,16 @@ extern pgd_t init_level4_pgt[]; | |||
26 | extern void paging_init(void); | 26 | extern void paging_init(void); |
27 | 27 | ||
28 | #define pte_ERROR(e) \ | 28 | #define pte_ERROR(e) \ |
29 | printk("%s:%d: bad pte %p(%016lx).\n", \ | 29 | pr_err("%s:%d: bad pte %p(%016lx)\n", \ |
30 | __FILE__, __LINE__, &(e), pte_val(e)) | 30 | __FILE__, __LINE__, &(e), pte_val(e)) |
31 | #define pmd_ERROR(e) \ | 31 | #define pmd_ERROR(e) \ |
32 | printk("%s:%d: bad pmd %p(%016lx).\n", \ | 32 | pr_err("%s:%d: bad pmd %p(%016lx)\n", \ |
33 | __FILE__, __LINE__, &(e), pmd_val(e)) | 33 | __FILE__, __LINE__, &(e), pmd_val(e)) |
34 | #define pud_ERROR(e) \ | 34 | #define pud_ERROR(e) \ |
35 | printk("%s:%d: bad pud %p(%016lx).\n", \ | 35 | pr_err("%s:%d: bad pud %p(%016lx)\n", \ |
36 | __FILE__, __LINE__, &(e), pud_val(e)) | 36 | __FILE__, __LINE__, &(e), pud_val(e)) |
37 | #define pgd_ERROR(e) \ | 37 | #define pgd_ERROR(e) \ |
38 | printk("%s:%d: bad pgd %p(%016lx).\n", \ | 38 | pr_err("%s:%d: bad pgd %p(%016lx)\n", \ |
39 | __FILE__, __LINE__, &(e), pgd_val(e)) | 39 | __FILE__, __LINE__, &(e), pgd_val(e)) |
40 | 40 | ||
41 | struct mm_struct; | 41 | struct mm_struct; |
diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/asm/posix_types_32.h index 99f262e04b9..8e525059e7d 100644 --- a/arch/x86/include/asm/posix_types_32.h +++ b/arch/x86/include/asm/posix_types_32.h | |||
@@ -10,9 +10,6 @@ | |||
10 | typedef unsigned short __kernel_mode_t; | 10 | typedef unsigned short __kernel_mode_t; |
11 | #define __kernel_mode_t __kernel_mode_t | 11 | #define __kernel_mode_t __kernel_mode_t |
12 | 12 | ||
13 | typedef unsigned short __kernel_nlink_t; | ||
14 | #define __kernel_nlink_t __kernel_nlink_t | ||
15 | |||
16 | typedef unsigned short __kernel_ipc_pid_t; | 13 | typedef unsigned short __kernel_ipc_pid_t; |
17 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t | 14 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t |
18 | 15 | ||
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index f8ab3eaad12..aea1d1d848c 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h | |||
@@ -44,6 +44,7 @@ | |||
44 | */ | 44 | */ |
45 | #define X86_CR3_PWT 0x00000008 /* Page Write Through */ | 45 | #define X86_CR3_PWT 0x00000008 /* Page Write Through */ |
46 | #define X86_CR3_PCD 0x00000010 /* Page Cache Disable */ | 46 | #define X86_CR3_PCD 0x00000010 /* Page Cache Disable */ |
47 | #define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */ | ||
47 | 48 | ||
48 | /* | 49 | /* |
49 | * Intel CPU features in CR4 | 50 | * Intel CPU features in CR4 |
@@ -61,6 +62,7 @@ | |||
61 | #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ | 62 | #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ |
62 | #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ | 63 | #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ |
63 | #define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */ | 64 | #define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */ |
65 | #define X86_CR4_PCIDE 0x00020000 /* enable PCID support */ | ||
64 | #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ | 66 | #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ |
65 | #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ | 67 | #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ |
66 | 68 | ||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7745b257f03..39bc5777211 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -544,13 +544,16 @@ static inline void load_sp0(struct tss_struct *tss, | |||
544 | * enable), so that any CPU's that boot up | 544 | * enable), so that any CPU's that boot up |
545 | * after us can get the correct flags. | 545 | * after us can get the correct flags. |
546 | */ | 546 | */ |
547 | extern unsigned long mmu_cr4_features; | 547 | extern unsigned long mmu_cr4_features; |
548 | extern u32 *trampoline_cr4_features; | ||
548 | 549 | ||
549 | static inline void set_in_cr4(unsigned long mask) | 550 | static inline void set_in_cr4(unsigned long mask) |
550 | { | 551 | { |
551 | unsigned long cr4; | 552 | unsigned long cr4; |
552 | 553 | ||
553 | mmu_cr4_features |= mask; | 554 | mmu_cr4_features |= mask; |
555 | if (trampoline_cr4_features) | ||
556 | *trampoline_cr4_features = mmu_cr4_features; | ||
554 | cr4 = read_cr4(); | 557 | cr4 = read_cr4(); |
555 | cr4 |= mask; | 558 | cr4 |= mask; |
556 | write_cr4(cr4); | 559 | write_cr4(cr4); |
@@ -561,6 +564,8 @@ static inline void clear_in_cr4(unsigned long mask) | |||
561 | unsigned long cr4; | 564 | unsigned long cr4; |
562 | 565 | ||
563 | mmu_cr4_features &= ~mask; | 566 | mmu_cr4_features &= ~mask; |
567 | if (trampoline_cr4_features) | ||
568 | *trampoline_cr4_features = mmu_cr4_features; | ||
564 | cr4 = read_cr4(); | 569 | cr4 = read_cr4(); |
565 | cr4 &= ~mask; | 570 | cr4 &= ~mask; |
566 | write_cr4(cr4); | 571 | write_cr4(cr4); |
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h index 35f2d1948ad..6167fd79818 100644 --- a/arch/x86/include/asm/pvclock-abi.h +++ b/arch/x86/include/asm/pvclock-abi.h | |||
@@ -40,5 +40,6 @@ struct pvclock_wall_clock { | |||
40 | } __attribute__((__packed__)); | 40 | } __attribute__((__packed__)); |
41 | 41 | ||
42 | #define PVCLOCK_TSC_STABLE_BIT (1 << 0) | 42 | #define PVCLOCK_TSC_STABLE_BIT (1 << 0) |
43 | #define PVCLOCK_GUEST_STOPPED (1 << 1) | ||
43 | #endif /* __ASSEMBLY__ */ | 44 | #endif /* __ASSEMBLY__ */ |
44 | #endif /* _ASM_X86_PVCLOCK_ABI_H */ | 45 | #endif /* _ASM_X86_PVCLOCK_ABI_H */ |
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h new file mode 100644 index 00000000000..fe1ec5bcd84 --- /dev/null +++ b/arch/x86/include/asm/realmode.h | |||
@@ -0,0 +1,63 @@ | |||
1 | #ifndef _ARCH_X86_REALMODE_H | ||
2 | #define _ARCH_X86_REALMODE_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | #include <asm/io.h> | ||
6 | |||
7 | /* This must match data at realmode.S */ | ||
8 | struct real_mode_header { | ||
9 | u32 text_start; | ||
10 | u32 ro_end; | ||
11 | /* SMP trampoline */ | ||
12 | u32 trampoline_start; | ||
13 | u32 trampoline_status; | ||
14 | u32 trampoline_header; | ||
15 | #ifdef CONFIG_X86_64 | ||
16 | u32 trampoline_pgd; | ||
17 | #endif | ||
18 | /* ACPI S3 wakeup */ | ||
19 | #ifdef CONFIG_ACPI_SLEEP | ||
20 | u32 wakeup_start; | ||
21 | u32 wakeup_header; | ||
22 | #endif | ||
23 | /* APM/BIOS reboot */ | ||
24 | u32 machine_real_restart_asm; | ||
25 | #ifdef CONFIG_X86_64 | ||
26 | u32 machine_real_restart_seg; | ||
27 | #endif | ||
28 | }; | ||
29 | |||
30 | /* This must match data at trampoline_32/64.S */ | ||
31 | struct trampoline_header { | ||
32 | #ifdef CONFIG_X86_32 | ||
33 | u32 start; | ||
34 | u16 gdt_pad; | ||
35 | u16 gdt_limit; | ||
36 | u32 gdt_base; | ||
37 | #else | ||
38 | u64 start; | ||
39 | u64 efer; | ||
40 | u32 cr4; | ||
41 | #endif | ||
42 | }; | ||
43 | |||
44 | extern struct real_mode_header *real_mode_header; | ||
45 | extern unsigned char real_mode_blob_end[]; | ||
46 | |||
47 | extern unsigned long init_rsp; | ||
48 | extern unsigned long initial_code; | ||
49 | extern unsigned long initial_gs; | ||
50 | |||
51 | extern unsigned char real_mode_blob[]; | ||
52 | extern unsigned char real_mode_relocs[]; | ||
53 | |||
54 | #ifdef CONFIG_X86_32 | ||
55 | extern unsigned char startup_32_smp[]; | ||
56 | extern unsigned char boot_gdt[]; | ||
57 | #else | ||
58 | extern unsigned char secondary_startup_64[]; | ||
59 | #endif | ||
60 | |||
61 | extern void __init setup_real_mode(void); | ||
62 | |||
63 | #endif /* _ARCH_X86_REALMODE_H */ | ||
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 92f297069e8..a82c4f1b4d8 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h | |||
@@ -18,8 +18,8 @@ extern struct machine_ops machine_ops; | |||
18 | 18 | ||
19 | void native_machine_crash_shutdown(struct pt_regs *regs); | 19 | void native_machine_crash_shutdown(struct pt_regs *regs); |
20 | void native_machine_shutdown(void); | 20 | void native_machine_shutdown(void); |
21 | void machine_real_restart(unsigned int type); | 21 | void __noreturn machine_real_restart(unsigned int type); |
22 | /* These must match dispatch_table in reboot_32.S */ | 22 | /* These must match dispatch in arch/x86/realmore/rm/reboot.S */ |
23 | #define MRR_BIOS 0 | 23 | #define MRR_BIOS 0 |
24 | #define MRR_APM 1 | 24 | #define MRR_APM 1 |
25 | 25 | ||
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index ada93b3b8c6..beff97f7df3 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h | |||
@@ -7,8 +7,6 @@ | |||
7 | 7 | ||
8 | #include <asm/processor-flags.h> | 8 | #include <asm/processor-flags.h> |
9 | 9 | ||
10 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) | ||
11 | |||
12 | #define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ | 10 | #define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ |
13 | X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ | 11 | X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ |
14 | X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ | 12 | X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index f48394513c3..2ffa95dc233 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -169,11 +169,6 @@ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); | |||
169 | void smp_store_cpu_info(int id); | 169 | void smp_store_cpu_info(int id); |
170 | #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) | 170 | #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) |
171 | 171 | ||
172 | /* We don't mark CPUs online until __cpu_up(), so we need another measure */ | ||
173 | static inline int num_booting_cpus(void) | ||
174 | { | ||
175 | return cpumask_weight(cpu_callout_mask); | ||
176 | } | ||
177 | #else /* !CONFIG_SMP */ | 172 | #else /* !CONFIG_SMP */ |
178 | #define wbinvd_on_cpu(cpu) wbinvd() | 173 | #define wbinvd_on_cpu(cpu) wbinvd() |
179 | static inline int wbinvd_on_all_cpus(void) | 174 | static inline int wbinvd_on_all_cpus(void) |
diff --git a/arch/x86/include/asm/sta2x11.h b/arch/x86/include/asm/sta2x11.h new file mode 100644 index 00000000000..e9d32df89cc --- /dev/null +++ b/arch/x86/include/asm/sta2x11.h | |||
@@ -0,0 +1,12 @@ | |||
1 | /* | ||
2 | * Header file for STMicroelectronics ConneXt (STA2X11) IOHub | ||
3 | */ | ||
4 | #ifndef __ASM_STA2X11_H | ||
5 | #define __ASM_STA2X11_H | ||
6 | |||
7 | #include <linux/pci.h> | ||
8 | |||
9 | /* This needs to be called from the MFD to configure its sub-devices */ | ||
10 | struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev); | ||
11 | |||
12 | #endif /* __ASM_STA2X11_H */ | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 3c9aebc00d3..89f794f007e 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -85,6 +85,7 @@ struct thread_info { | |||
85 | #define TIF_SECCOMP 8 /* secure computing */ | 85 | #define TIF_SECCOMP 8 /* secure computing */ |
86 | #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ | 86 | #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ |
87 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ | 87 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ |
88 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ | ||
88 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | 89 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ |
89 | #define TIF_IA32 17 /* IA32 compatibility process */ | 90 | #define TIF_IA32 17 /* IA32 compatibility process */ |
90 | #define TIF_FORK 18 /* ret_from_fork */ | 91 | #define TIF_FORK 18 /* ret_from_fork */ |
@@ -109,6 +110,7 @@ struct thread_info { | |||
109 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) | 110 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) |
110 | #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) | 111 | #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) |
111 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) | 112 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) |
113 | #define _TIF_UPROBE (1 << TIF_UPROBE) | ||
112 | #define _TIF_NOTSC (1 << TIF_NOTSC) | 114 | #define _TIF_NOTSC (1 << TIF_NOTSC) |
113 | #define _TIF_IA32 (1 << TIF_IA32) | 115 | #define _TIF_IA32 (1 << TIF_IA32) |
114 | #define _TIF_FORK (1 << TIF_FORK) | 116 | #define _TIF_FORK (1 << TIF_FORK) |
@@ -246,7 +248,23 @@ static inline void set_restore_sigmask(void) | |||
246 | { | 248 | { |
247 | struct thread_info *ti = current_thread_info(); | 249 | struct thread_info *ti = current_thread_info(); |
248 | ti->status |= TS_RESTORE_SIGMASK; | 250 | ti->status |= TS_RESTORE_SIGMASK; |
249 | set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags); | 251 | WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags)); |
252 | } | ||
253 | static inline void clear_restore_sigmask(void) | ||
254 | { | ||
255 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; | ||
256 | } | ||
257 | static inline bool test_restore_sigmask(void) | ||
258 | { | ||
259 | return current_thread_info()->status & TS_RESTORE_SIGMASK; | ||
260 | } | ||
261 | static inline bool test_and_clear_restore_sigmask(void) | ||
262 | { | ||
263 | struct thread_info *ti = current_thread_info(); | ||
264 | if (!(ti->status & TS_RESTORE_SIGMASK)) | ||
265 | return false; | ||
266 | ti->status &= ~TS_RESTORE_SIGMASK; | ||
267 | return true; | ||
250 | } | 268 | } |
251 | 269 | ||
252 | static inline bool is_ia32_task(void) | 270 | static inline bool is_ia32_task(void) |
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h deleted file mode 100644 index feca3118a73..00000000000 --- a/arch/x86/include/asm/trampoline.h +++ /dev/null | |||
@@ -1,39 +0,0 @@ | |||
1 | #ifndef _ASM_X86_TRAMPOLINE_H | ||
2 | #define _ASM_X86_TRAMPOLINE_H | ||
3 | |||
4 | #ifndef __ASSEMBLY__ | ||
5 | |||
6 | #include <linux/types.h> | ||
7 | #include <asm/io.h> | ||
8 | |||
9 | /* | ||
10 | * Trampoline 80x86 program as an array. These are in the init rodata | ||
11 | * segment, but that's okay, because we only care about the relative | ||
12 | * addresses of the symbols. | ||
13 | */ | ||
14 | extern const unsigned char x86_trampoline_start []; | ||
15 | extern const unsigned char x86_trampoline_end []; | ||
16 | extern unsigned char *x86_trampoline_base; | ||
17 | |||
18 | extern unsigned long init_rsp; | ||
19 | extern unsigned long initial_code; | ||
20 | extern unsigned long initial_gs; | ||
21 | |||
22 | extern void __init setup_trampolines(void); | ||
23 | |||
24 | extern const unsigned char trampoline_data[]; | ||
25 | extern const unsigned char trampoline_status[]; | ||
26 | |||
27 | #define TRAMPOLINE_SYM(x) \ | ||
28 | ((void *)(x86_trampoline_base + \ | ||
29 | ((const unsigned char *)(x) - x86_trampoline_start))) | ||
30 | |||
31 | /* Address of the SMP trampoline */ | ||
32 | static inline unsigned long trampoline_address(void) | ||
33 | { | ||
34 | return virt_to_phys(TRAMPOLINE_SYM(trampoline_data)); | ||
35 | } | ||
36 | |||
37 | #endif /* __ASSEMBLY__ */ | ||
38 | |||
39 | #endif /* _ASM_X86_TRAMPOLINE_H */ | ||
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 851fe0dc13b..e1f3a17034f 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -32,9 +32,9 @@ | |||
32 | 32 | ||
33 | #define segment_eq(a, b) ((a).seg == (b).seg) | 33 | #define segment_eq(a, b) ((a).seg == (b).seg) |
34 | 34 | ||
35 | #define __addr_ok(addr) \ | 35 | #define user_addr_max() (current_thread_info()->addr_limit.seg) |
36 | ((unsigned long __force)(addr) < \ | 36 | #define __addr_ok(addr) \ |
37 | (current_thread_info()->addr_limit.seg)) | 37 | ((unsigned long __force)(addr) < user_addr_max()) |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * Test whether a block of memory is a valid user space address. | 40 | * Test whether a block of memory is a valid user space address. |
@@ -46,14 +46,14 @@ | |||
46 | * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... | 46 | * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... |
47 | */ | 47 | */ |
48 | 48 | ||
49 | #define __range_not_ok(addr, size) \ | 49 | #define __range_not_ok(addr, size, limit) \ |
50 | ({ \ | 50 | ({ \ |
51 | unsigned long flag, roksum; \ | 51 | unsigned long flag, roksum; \ |
52 | __chk_user_ptr(addr); \ | 52 | __chk_user_ptr(addr); \ |
53 | asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ | 53 | asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ |
54 | : "=&r" (flag), "=r" (roksum) \ | 54 | : "=&r" (flag), "=r" (roksum) \ |
55 | : "1" (addr), "g" ((long)(size)), \ | 55 | : "1" (addr), "g" ((long)(size)), \ |
56 | "rm" (current_thread_info()->addr_limit.seg)); \ | 56 | "rm" (limit)); \ |
57 | flag; \ | 57 | flag; \ |
58 | }) | 58 | }) |
59 | 59 | ||
@@ -76,7 +76,8 @@ | |||
76 | * checks that the pointer is in the user space range - after calling | 76 | * checks that the pointer is in the user space range - after calling |
77 | * this function, memory access functions may still return -EFAULT. | 77 | * this function, memory access functions may still return -EFAULT. |
78 | */ | 78 | */ |
79 | #define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) | 79 | #define access_ok(type, addr, size) \ |
80 | (likely(__range_not_ok(addr, size, user_addr_max()) == 0)) | ||
80 | 81 | ||
81 | /* | 82 | /* |
82 | * The exception table consists of pairs of addresses relative to the | 83 | * The exception table consists of pairs of addresses relative to the |
@@ -565,6 +566,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n); | |||
565 | extern __must_check long | 566 | extern __must_check long |
566 | strncpy_from_user(char *dst, const char __user *src, long count); | 567 | strncpy_from_user(char *dst, const char __user *src, long count); |
567 | 568 | ||
569 | extern __must_check long strlen_user(const char __user *str); | ||
570 | extern __must_check long strnlen_user(const char __user *str, long n); | ||
571 | |||
568 | /* | 572 | /* |
569 | * movsl can be slow when source and dest are not both 8-byte aligned | 573 | * movsl can be slow when source and dest are not both 8-byte aligned |
570 | */ | 574 | */ |
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 8084bc73b18..576e39bca6a 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h | |||
@@ -213,23 +213,6 @@ static inline unsigned long __must_check copy_from_user(void *to, | |||
213 | return n; | 213 | return n; |
214 | } | 214 | } |
215 | 215 | ||
216 | /** | ||
217 | * strlen_user: - Get the size of a string in user space. | ||
218 | * @str: The string to measure. | ||
219 | * | ||
220 | * Context: User context only. This function may sleep. | ||
221 | * | ||
222 | * Get the size of a NUL-terminated string in user space. | ||
223 | * | ||
224 | * Returns the size of the string INCLUDING the terminating NUL. | ||
225 | * On exception, returns 0. | ||
226 | * | ||
227 | * If there is a limit on the length of a valid string, you may wish to | ||
228 | * consider using strnlen_user() instead. | ||
229 | */ | ||
230 | #define strlen_user(str) strnlen_user(str, LONG_MAX) | ||
231 | |||
232 | long strnlen_user(const char __user *str, long n); | ||
233 | unsigned long __must_check clear_user(void __user *mem, unsigned long len); | 216 | unsigned long __must_check clear_user(void __user *mem, unsigned long len); |
234 | unsigned long __must_check __clear_user(void __user *mem, unsigned long len); | 217 | unsigned long __must_check __clear_user(void __user *mem, unsigned long len); |
235 | 218 | ||
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index fcd4b6f3ef0..d8def8b3dba 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -17,6 +17,8 @@ | |||
17 | 17 | ||
18 | /* Handles exceptions in both to and from, but doesn't do access_ok */ | 18 | /* Handles exceptions in both to and from, but doesn't do access_ok */ |
19 | __must_check unsigned long | 19 | __must_check unsigned long |
20 | copy_user_enhanced_fast_string(void *to, const void *from, unsigned len); | ||
21 | __must_check unsigned long | ||
20 | copy_user_generic_string(void *to, const void *from, unsigned len); | 22 | copy_user_generic_string(void *to, const void *from, unsigned len); |
21 | __must_check unsigned long | 23 | __must_check unsigned long |
22 | copy_user_generic_unrolled(void *to, const void *from, unsigned len); | 24 | copy_user_generic_unrolled(void *to, const void *from, unsigned len); |
@@ -26,9 +28,16 @@ copy_user_generic(void *to, const void *from, unsigned len) | |||
26 | { | 28 | { |
27 | unsigned ret; | 29 | unsigned ret; |
28 | 30 | ||
29 | alternative_call(copy_user_generic_unrolled, | 31 | /* |
32 | * If CPU has ERMS feature, use copy_user_enhanced_fast_string. | ||
33 | * Otherwise, if CPU has rep_good feature, use copy_user_generic_string. | ||
34 | * Otherwise, use copy_user_generic_unrolled. | ||
35 | */ | ||
36 | alternative_call_2(copy_user_generic_unrolled, | ||
30 | copy_user_generic_string, | 37 | copy_user_generic_string, |
31 | X86_FEATURE_REP_GOOD, | 38 | X86_FEATURE_REP_GOOD, |
39 | copy_user_enhanced_fast_string, | ||
40 | X86_FEATURE_ERMS, | ||
32 | ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from), | 41 | ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from), |
33 | "=d" (len)), | 42 | "=d" (len)), |
34 | "1" (to), "2" (from), "3" (len) | 43 | "1" (to), "2" (from), "3" (len) |
@@ -208,9 +217,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) | |||
208 | } | 217 | } |
209 | } | 218 | } |
210 | 219 | ||
211 | __must_check long strnlen_user(const char __user *str, long n); | ||
212 | __must_check long __strnlen_user(const char __user *str, long n); | ||
213 | __must_check long strlen_user(const char __user *str); | ||
214 | __must_check unsigned long clear_user(void __user *mem, unsigned long len); | 220 | __must_check unsigned long clear_user(void __user *mem, unsigned long len); |
215 | __must_check unsigned long __clear_user(void __user *mem, unsigned long len); | 221 | __must_check unsigned long __clear_user(void __user *mem, unsigned long len); |
216 | 222 | ||
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h new file mode 100644 index 00000000000..f3971bbcd1d --- /dev/null +++ b/arch/x86/include/asm/uprobes.h | |||
@@ -0,0 +1,57 @@ | |||
1 | #ifndef _ASM_UPROBES_H | ||
2 | #define _ASM_UPROBES_H | ||
3 | /* | ||
4 | * User-space Probes (UProbes) for x86 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
19 | * | ||
20 | * Copyright (C) IBM Corporation, 2008-2011 | ||
21 | * Authors: | ||
22 | * Srikar Dronamraju | ||
23 | * Jim Keniston | ||
24 | */ | ||
25 | |||
26 | #include <linux/notifier.h> | ||
27 | |||
28 | typedef u8 uprobe_opcode_t; | ||
29 | |||
30 | #define MAX_UINSN_BYTES 16 | ||
31 | #define UPROBE_XOL_SLOT_BYTES 128 /* to keep it cache aligned */ | ||
32 | |||
33 | #define UPROBE_SWBP_INSN 0xcc | ||
34 | #define UPROBE_SWBP_INSN_SIZE 1 | ||
35 | |||
36 | struct arch_uprobe { | ||
37 | u16 fixups; | ||
38 | u8 insn[MAX_UINSN_BYTES]; | ||
39 | #ifdef CONFIG_X86_64 | ||
40 | unsigned long rip_rela_target_address; | ||
41 | #endif | ||
42 | }; | ||
43 | |||
44 | struct arch_uprobe_task { | ||
45 | unsigned long saved_trap_nr; | ||
46 | #ifdef CONFIG_X86_64 | ||
47 | unsigned long saved_scratch_register; | ||
48 | #endif | ||
49 | }; | ||
50 | |||
51 | extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); | ||
52 | extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); | ||
53 | extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); | ||
54 | extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); | ||
55 | extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); | ||
56 | extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); | ||
57 | #endif /* _ASM_UPROBES_H */ | ||
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index becf47b8173..a06983cdc12 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -140,6 +140,9 @@ | |||
140 | #define IPI_RESET_LIMIT 1 | 140 | #define IPI_RESET_LIMIT 1 |
141 | /* after this # consecutive successes, bump up the throttle if it was lowered */ | 141 | /* after this # consecutive successes, bump up the throttle if it was lowered */ |
142 | #define COMPLETE_THRESHOLD 5 | 142 | #define COMPLETE_THRESHOLD 5 |
143 | /* after this # of giveups (fall back to kernel IPI's) disable the use of | ||
144 | the BAU for a period of time */ | ||
145 | #define GIVEUP_LIMIT 100 | ||
143 | 146 | ||
144 | #define UV_LB_SUBNODEID 0x10 | 147 | #define UV_LB_SUBNODEID 0x10 |
145 | 148 | ||
@@ -149,7 +152,6 @@ | |||
149 | /* 4 bits of software ack period */ | 152 | /* 4 bits of software ack period */ |
150 | #define UV2_ACK_MASK 0x7UL | 153 | #define UV2_ACK_MASK 0x7UL |
151 | #define UV2_ACK_UNITS_SHFT 3 | 154 | #define UV2_ACK_UNITS_SHFT 3 |
152 | #define UV2_LEG_SHFT UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT | ||
153 | #define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT | 155 | #define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT |
154 | 156 | ||
155 | /* | 157 | /* |
@@ -167,7 +169,6 @@ | |||
167 | #define FLUSH_RETRY_TIMEOUT 2 | 169 | #define FLUSH_RETRY_TIMEOUT 2 |
168 | #define FLUSH_GIVEUP 3 | 170 | #define FLUSH_GIVEUP 3 |
169 | #define FLUSH_COMPLETE 4 | 171 | #define FLUSH_COMPLETE 4 |
170 | #define FLUSH_RETRY_BUSYBUG 5 | ||
171 | 172 | ||
172 | /* | 173 | /* |
173 | * tuning the action when the numalink network is extremely delayed | 174 | * tuning the action when the numalink network is extremely delayed |
@@ -176,7 +177,7 @@ | |||
176 | microseconds */ | 177 | microseconds */ |
177 | #define CONGESTED_REPS 10 /* long delays averaged over | 178 | #define CONGESTED_REPS 10 /* long delays averaged over |
178 | this many broadcasts */ | 179 | this many broadcasts */ |
179 | #define CONGESTED_PERIOD 30 /* time for the bau to be | 180 | #define DISABLED_PERIOD 10 /* time for the bau to be |
180 | disabled, in seconds */ | 181 | disabled, in seconds */ |
181 | /* see msg_type: */ | 182 | /* see msg_type: */ |
182 | #define MSG_NOOP 0 | 183 | #define MSG_NOOP 0 |
@@ -521,6 +522,12 @@ struct ptc_stats { | |||
521 | unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */ | 522 | unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */ |
522 | unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */ | 523 | unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */ |
523 | unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */ | 524 | unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */ |
525 | unsigned long s_overipilimit; /* over the ipi reset limit */ | ||
526 | unsigned long s_giveuplimit; /* disables, over giveup limit*/ | ||
527 | unsigned long s_enters; /* entries to the driver */ | ||
528 | unsigned long s_ipifordisabled; /* fall back to IPI; disabled */ | ||
529 | unsigned long s_plugged; /* plugged by h/w bug*/ | ||
530 | unsigned long s_congested; /* giveup on long wait */ | ||
524 | /* destination statistics */ | 531 | /* destination statistics */ |
525 | unsigned long d_alltlb; /* times all tlb's on this | 532 | unsigned long d_alltlb; /* times all tlb's on this |
526 | cpu were flushed */ | 533 | cpu were flushed */ |
@@ -587,8 +594,8 @@ struct bau_control { | |||
587 | int timeout_tries; | 594 | int timeout_tries; |
588 | int ipi_attempts; | 595 | int ipi_attempts; |
589 | int conseccompletes; | 596 | int conseccompletes; |
590 | int baudisabled; | 597 | short nobau; |
591 | int set_bau_off; | 598 | short baudisabled; |
592 | short cpu; | 599 | short cpu; |
593 | short osnode; | 600 | short osnode; |
594 | short uvhub_cpu; | 601 | short uvhub_cpu; |
@@ -597,14 +604,16 @@ struct bau_control { | |||
597 | short cpus_in_socket; | 604 | short cpus_in_socket; |
598 | short cpus_in_uvhub; | 605 | short cpus_in_uvhub; |
599 | short partition_base_pnode; | 606 | short partition_base_pnode; |
600 | short using_desc; /* an index, like uvhub_cpu */ | 607 | short busy; /* all were busy (war) */ |
601 | unsigned int inuse_map; | ||
602 | unsigned short message_number; | 608 | unsigned short message_number; |
603 | unsigned short uvhub_quiesce; | 609 | unsigned short uvhub_quiesce; |
604 | short socket_acknowledge_count[DEST_Q_SIZE]; | 610 | short socket_acknowledge_count[DEST_Q_SIZE]; |
605 | cycles_t send_message; | 611 | cycles_t send_message; |
612 | cycles_t period_end; | ||
613 | cycles_t period_time; | ||
606 | spinlock_t uvhub_lock; | 614 | spinlock_t uvhub_lock; |
607 | spinlock_t queue_lock; | 615 | spinlock_t queue_lock; |
616 | spinlock_t disable_lock; | ||
608 | /* tunables */ | 617 | /* tunables */ |
609 | int max_concurr; | 618 | int max_concurr; |
610 | int max_concurr_const; | 619 | int max_concurr_const; |
@@ -615,9 +624,9 @@ struct bau_control { | |||
615 | int complete_threshold; | 624 | int complete_threshold; |
616 | int cong_response_us; | 625 | int cong_response_us; |
617 | int cong_reps; | 626 | int cong_reps; |
618 | int cong_period; | 627 | cycles_t disabled_period; |
619 | unsigned long clocks_per_100_usec; | 628 | int period_giveups; |
620 | cycles_t period_time; | 629 | int giveup_limit; |
621 | long period_requests; | 630 | long period_requests; |
622 | struct hub_and_pnode *thp; | 631 | struct hub_and_pnode *thp; |
623 | }; | 632 | }; |
diff --git a/arch/x86/include/asm/vga.h b/arch/x86/include/asm/vga.h index c4b9dc2f67c..44282fbf7bf 100644 --- a/arch/x86/include/asm/vga.h +++ b/arch/x86/include/asm/vga.h | |||
@@ -17,4 +17,10 @@ | |||
17 | #define vga_readb(x) (*(x)) | 17 | #define vga_readb(x) (*(x)) |
18 | #define vga_writeb(x, y) (*(y) = (x)) | 18 | #define vga_writeb(x, y) (*(y) = (x)) |
19 | 19 | ||
20 | #ifdef CONFIG_FB_EFI | ||
21 | #define __ARCH_HAS_VGA_DEFAULT_DEVICE | ||
22 | extern struct pci_dev *vga_default_device(void); | ||
23 | extern void vga_set_default_device(struct pci_dev *pdev); | ||
24 | #endif | ||
25 | |||
20 | #endif /* _ASM_X86_VGA_H */ | 26 | #endif /* _ASM_X86_VGA_H */ |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 31f180c21ce..74fcb963595 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -60,6 +60,7 @@ | |||
60 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 | 60 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 |
61 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 | 61 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 |
62 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 | 62 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 |
63 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 | ||
63 | 64 | ||
64 | 65 | ||
65 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 | 66 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 |
@@ -281,6 +282,7 @@ enum vmcs_field { | |||
281 | #define EXIT_REASON_EPT_MISCONFIG 49 | 282 | #define EXIT_REASON_EPT_MISCONFIG 49 |
282 | #define EXIT_REASON_WBINVD 54 | 283 | #define EXIT_REASON_WBINVD 54 |
283 | #define EXIT_REASON_XSETBV 55 | 284 | #define EXIT_REASON_XSETBV 55 |
285 | #define EXIT_REASON_INVPCID 58 | ||
284 | 286 | ||
285 | /* | 287 | /* |
286 | * Interruption-information format | 288 | * Interruption-information format |
@@ -404,6 +406,7 @@ enum vmcs_field { | |||
404 | #define VMX_EPTP_WB_BIT (1ull << 14) | 406 | #define VMX_EPTP_WB_BIT (1ull << 14) |
405 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) | 407 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) |
406 | #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) | 408 | #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) |
409 | #define VMX_EPT_AD_BIT (1ull << 21) | ||
407 | #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) | 410 | #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) |
408 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) | 411 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) |
409 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) | 412 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) |
@@ -415,11 +418,14 @@ enum vmcs_field { | |||
415 | #define VMX_EPT_MAX_GAW 0x4 | 418 | #define VMX_EPT_MAX_GAW 0x4 |
416 | #define VMX_EPT_MT_EPTE_SHIFT 3 | 419 | #define VMX_EPT_MT_EPTE_SHIFT 3 |
417 | #define VMX_EPT_GAW_EPTP_SHIFT 3 | 420 | #define VMX_EPT_GAW_EPTP_SHIFT 3 |
421 | #define VMX_EPT_AD_ENABLE_BIT (1ull << 6) | ||
418 | #define VMX_EPT_DEFAULT_MT 0x6ull | 422 | #define VMX_EPT_DEFAULT_MT 0x6ull |
419 | #define VMX_EPT_READABLE_MASK 0x1ull | 423 | #define VMX_EPT_READABLE_MASK 0x1ull |
420 | #define VMX_EPT_WRITABLE_MASK 0x2ull | 424 | #define VMX_EPT_WRITABLE_MASK 0x2ull |
421 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull | 425 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull |
422 | #define VMX_EPT_IPAT_BIT (1ull << 6) | 426 | #define VMX_EPT_IPAT_BIT (1ull << 6) |
427 | #define VMX_EPT_ACCESS_BIT (1ull << 8) | ||
428 | #define VMX_EPT_DIRTY_BIT (1ull << 9) | ||
423 | 429 | ||
424 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul | 430 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul |
425 | 431 | ||
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h index e58f03b206c..5b238981542 100644 --- a/arch/x86/include/asm/word-at-a-time.h +++ b/arch/x86/include/asm/word-at-a-time.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_WORD_AT_A_TIME_H | 1 | #ifndef _ASM_WORD_AT_A_TIME_H |
2 | #define _ASM_WORD_AT_A_TIME_H | 2 | #define _ASM_WORD_AT_A_TIME_H |
3 | 3 | ||
4 | #include <linux/kernel.h> | ||
5 | |||
4 | /* | 6 | /* |
5 | * This is largely generic for little-endian machines, but the | 7 | * This is largely generic for little-endian machines, but the |
6 | * optimal byte mask counting is probably going to be something | 8 | * optimal byte mask counting is probably going to be something |
@@ -8,6 +10,11 @@ | |||
8 | * bit count instruction, that might be better than the multiply | 10 | * bit count instruction, that might be better than the multiply |
9 | * and shift, for example. | 11 | * and shift, for example. |
10 | */ | 12 | */ |
13 | struct word_at_a_time { | ||
14 | const unsigned long one_bits, high_bits; | ||
15 | }; | ||
16 | |||
17 | #define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } | ||
11 | 18 | ||
12 | #ifdef CONFIG_64BIT | 19 | #ifdef CONFIG_64BIT |
13 | 20 | ||
@@ -35,12 +42,31 @@ static inline long count_masked_bytes(long mask) | |||
35 | 42 | ||
36 | #endif | 43 | #endif |
37 | 44 | ||
38 | #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) | 45 | /* Return nonzero if it has a zero */ |
46 | static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) | ||
47 | { | ||
48 | unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits; | ||
49 | *bits = mask; | ||
50 | return mask; | ||
51 | } | ||
52 | |||
53 | static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c) | ||
54 | { | ||
55 | return bits; | ||
56 | } | ||
57 | |||
58 | static inline unsigned long create_zero_mask(unsigned long bits) | ||
59 | { | ||
60 | bits = (bits - 1) & ~bits; | ||
61 | return bits >> 7; | ||
62 | } | ||
63 | |||
64 | /* The mask we created is directly usable as a bytemask */ | ||
65 | #define zero_bytemask(mask) (mask) | ||
39 | 66 | ||
40 | /* Return the high bit set in the first byte that is a zero */ | 67 | static inline unsigned long find_zero(unsigned long mask) |
41 | static inline unsigned long has_zero(unsigned long a) | ||
42 | { | 68 | { |
43 | return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80); | 69 | return count_masked_bytes(mask); |
44 | } | 70 | } |
45 | 71 | ||
46 | /* | 72 | /* |
diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h index 92e54abf89e..f90f0a587c6 100644 --- a/arch/x86/include/asm/x2apic.h +++ b/arch/x86/include/asm/x2apic.h | |||
@@ -9,15 +9,6 @@ | |||
9 | #include <asm/ipi.h> | 9 | #include <asm/ipi.h> |
10 | #include <linux/cpumask.h> | 10 | #include <linux/cpumask.h> |
11 | 11 | ||
12 | /* | ||
13 | * Need to use more than cpu 0, because we need more vectors | ||
14 | * when MSI-X are used. | ||
15 | */ | ||
16 | static const struct cpumask *x2apic_target_cpus(void) | ||
17 | { | ||
18 | return cpu_online_mask; | ||
19 | } | ||
20 | |||
21 | static int x2apic_apic_id_valid(int apicid) | 12 | static int x2apic_apic_id_valid(int apicid) |
22 | { | 13 | { |
23 | return 1; | 14 | return 1; |
@@ -28,15 +19,6 @@ static int x2apic_apic_id_registered(void) | |||
28 | return 1; | 19 | return 1; |
29 | } | 20 | } |
30 | 21 | ||
31 | /* | ||
32 | * For now each logical cpu is in its own vector allocation domain. | ||
33 | */ | ||
34 | static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
35 | { | ||
36 | cpumask_clear(retmask); | ||
37 | cpumask_set_cpu(cpu, retmask); | ||
38 | } | ||
39 | |||
40 | static void | 22 | static void |
41 | __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) | 23 | __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) |
42 | { | 24 | { |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 42d2ae18dab..38155f66714 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -163,6 +163,7 @@ struct x86_cpuinit_ops { | |||
163 | * @i8042_detect pre-detect if i8042 controller exists | 163 | * @i8042_detect pre-detect if i8042 controller exists |
164 | * @save_sched_clock_state: save state for sched_clock() on suspend | 164 | * @save_sched_clock_state: save state for sched_clock() on suspend |
165 | * @restore_sched_clock_state: restore state for sched_clock() on resume | 165 | * @restore_sched_clock_state: restore state for sched_clock() on resume |
166 | * @apic_post_init: adjust apic if neeeded | ||
166 | */ | 167 | */ |
167 | struct x86_platform_ops { | 168 | struct x86_platform_ops { |
168 | unsigned long (*calibrate_tsc)(void); | 169 | unsigned long (*calibrate_tsc)(void); |
@@ -175,6 +176,7 @@ struct x86_platform_ops { | |||
175 | int (*i8042_detect)(void); | 176 | int (*i8042_detect)(void); |
176 | void (*save_sched_clock_state)(void); | 177 | void (*save_sched_clock_state)(void); |
177 | void (*restore_sched_clock_state)(void); | 178 | void (*restore_sched_clock_state)(void); |
179 | void (*apic_post_init)(void); | ||
178 | }; | 180 | }; |
179 | 181 | ||
180 | struct pci_dev; | 182 | struct pci_dev; |
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index 1df35417c41..cc146d51449 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h | |||
@@ -6,6 +6,7 @@ enum ipi_vector { | |||
6 | XEN_CALL_FUNCTION_VECTOR, | 6 | XEN_CALL_FUNCTION_VECTOR, |
7 | XEN_CALL_FUNCTION_SINGLE_VECTOR, | 7 | XEN_CALL_FUNCTION_SINGLE_VECTOR, |
8 | XEN_SPIN_UNLOCK_VECTOR, | 8 | XEN_SPIN_UNLOCK_VECTOR, |
9 | XEN_IRQ_WORK_VECTOR, | ||
9 | 10 | ||
10 | XEN_NR_IPIS, | 11 | XEN_NR_IPIS, |
11 | }; | 12 | }; |
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 5728852fb90..59c226d120c 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <xen/interface/sched.h> | 48 | #include <xen/interface/sched.h> |
49 | #include <xen/interface/physdev.h> | 49 | #include <xen/interface/physdev.h> |
50 | #include <xen/interface/platform.h> | 50 | #include <xen/interface/platform.h> |
51 | #include <xen/interface/xen-mca.h> | ||
51 | 52 | ||
52 | /* | 53 | /* |
53 | * The hypercall asms have to meet several constraints: | 54 | * The hypercall asms have to meet several constraints: |
@@ -302,6 +303,13 @@ HYPERVISOR_set_timer_op(u64 timeout) | |||
302 | } | 303 | } |
303 | 304 | ||
304 | static inline int | 305 | static inline int |
306 | HYPERVISOR_mca(struct xen_mc *mc_op) | ||
307 | { | ||
308 | mc_op->interface_version = XEN_MCA_INTERFACE_VERSION; | ||
309 | return _hypercall1(int, mca, mc_op); | ||
310 | } | ||
311 | |||
312 | static inline int | ||
305 | HYPERVISOR_dom0_op(struct xen_platform_op *platform_op) | 313 | HYPERVISOR_dom0_op(struct xen_platform_op *platform_op) |
306 | { | 314 | { |
307 | platform_op->interface_version = XENPF_INTERFACE_VERSION; | 315 | platform_op->interface_version = XENPF_INTERFACE_VERSION; |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index c34f96c2f7a..93971e841dd 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -44,6 +44,7 @@ extern unsigned long machine_to_phys_nr; | |||
44 | 44 | ||
45 | extern unsigned long get_phys_to_machine(unsigned long pfn); | 45 | extern unsigned long get_phys_to_machine(unsigned long pfn); |
46 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 46 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
47 | extern bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn); | ||
47 | extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 48 | extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
48 | extern unsigned long set_phys_range_identity(unsigned long pfn_s, | 49 | extern unsigned long set_phys_range_identity(unsigned long pfn_s, |
49 | unsigned long pfn_e); | 50 | unsigned long pfn_e); |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index bb8529275aa..8215e5652d9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -35,7 +35,6 @@ obj-y += tsc.o io_delay.o rtc.o | |||
35 | obj-y += pci-iommu_table.o | 35 | obj-y += pci-iommu_table.o |
36 | obj-y += resource.o | 36 | obj-y += resource.o |
37 | 37 | ||
38 | obj-y += trampoline.o trampoline_$(BITS).o | ||
39 | obj-y += process.o | 38 | obj-y += process.o |
40 | obj-y += i387.o xsave.o | 39 | obj-y += i387.o xsave.o |
41 | obj-y += ptrace.o | 40 | obj-y += ptrace.o |
@@ -48,7 +47,6 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o | |||
48 | obj-y += cpu/ | 47 | obj-y += cpu/ |
49 | obj-y += acpi/ | 48 | obj-y += acpi/ |
50 | obj-y += reboot.o | 49 | obj-y += reboot.o |
51 | obj-$(CONFIG_X86_32) += reboot_32.o | ||
52 | obj-$(CONFIG_X86_MSR) += msr.o | 50 | obj-$(CONFIG_X86_MSR) += msr.o |
53 | obj-$(CONFIG_X86_CPUID) += cpuid.o | 51 | obj-$(CONFIG_X86_CPUID) += cpuid.o |
54 | obj-$(CONFIG_PCI) += early-quirks.o | 52 | obj-$(CONFIG_PCI) += early-quirks.o |
@@ -100,6 +98,7 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o | |||
100 | 98 | ||
101 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | 99 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o |
102 | obj-$(CONFIG_OF) += devicetree.o | 100 | obj-$(CONFIG_OF) += devicetree.o |
101 | obj-$(CONFIG_UPROBES) += uprobes.o | ||
103 | 102 | ||
104 | ### | 103 | ### |
105 | # 64 bit specific files | 104 | # 64 bit specific files |
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index 6f35260bb3e..163b2258147 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile | |||
@@ -1,14 +1,7 @@ | |||
1 | subdir- := realmode | ||
2 | |||
3 | obj-$(CONFIG_ACPI) += boot.o | 1 | obj-$(CONFIG_ACPI) += boot.o |
4 | obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_rm.o wakeup_$(BITS).o | 2 | obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o |
5 | 3 | ||
6 | ifneq ($(CONFIG_ACPI_PROCESSOR),) | 4 | ifneq ($(CONFIG_ACPI_PROCESSOR),) |
7 | obj-y += cstate.o | 5 | obj-y += cstate.o |
8 | endif | 6 | endif |
9 | 7 | ||
10 | $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin | ||
11 | |||
12 | $(obj)/realmode/wakeup.bin: FORCE | ||
13 | $(Q)$(MAKE) $(build)=$(obj)/realmode | ||
14 | |||
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8afb6931981..b2297e58c6e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -422,12 +422,14 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, | |||
422 | return 0; | 422 | return 0; |
423 | } | 423 | } |
424 | 424 | ||
425 | if (intsrc->source_irq == 0 && intsrc->global_irq == 2) { | 425 | if (intsrc->source_irq == 0) { |
426 | if (acpi_skip_timer_override) { | 426 | if (acpi_skip_timer_override) { |
427 | printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); | 427 | printk(PREFIX "BIOS IRQ0 override ignored.\n"); |
428 | return 0; | 428 | return 0; |
429 | } | 429 | } |
430 | if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) { | 430 | |
431 | if ((intsrc->global_irq == 2) && acpi_fix_pin2_polarity | ||
432 | && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) { | ||
431 | intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK; | 433 | intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK; |
432 | printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n"); | 434 | printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n"); |
433 | } | 435 | } |
@@ -1334,17 +1336,12 @@ static int __init dmi_disable_acpi(const struct dmi_system_id *d) | |||
1334 | } | 1336 | } |
1335 | 1337 | ||
1336 | /* | 1338 | /* |
1337 | * Force ignoring BIOS IRQ0 pin2 override | 1339 | * Force ignoring BIOS IRQ0 override |
1338 | */ | 1340 | */ |
1339 | static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) | 1341 | static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) |
1340 | { | 1342 | { |
1341 | /* | ||
1342 | * The ati_ixp4x0_rev() early PCI quirk should have set | ||
1343 | * the acpi_skip_timer_override flag already: | ||
1344 | */ | ||
1345 | if (!acpi_skip_timer_override) { | 1343 | if (!acpi_skip_timer_override) { |
1346 | WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n"); | 1344 | pr_notice("%s detected: Ignoring BIOS IRQ0 override\n", |
1347 | pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", | ||
1348 | d->ident); | 1345 | d->ident); |
1349 | acpi_skip_timer_override = 1; | 1346 | acpi_skip_timer_override = 1; |
1350 | } | 1347 | } |
@@ -1438,7 +1435,7 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = { | |||
1438 | * is enabled. This input is incorrectly designated the | 1435 | * is enabled. This input is incorrectly designated the |
1439 | * ISA IRQ 0 via an interrupt source override even though | 1436 | * ISA IRQ 0 via an interrupt source override even though |
1440 | * it is wired to the output of the master 8259A and INTIN0 | 1437 | * it is wired to the output of the master 8259A and INTIN0 |
1441 | * is not connected at all. Force ignoring BIOS IRQ0 pin2 | 1438 | * is not connected at all. Force ignoring BIOS IRQ0 |
1442 | * override in that cases. | 1439 | * override in that cases. |
1443 | */ | 1440 | */ |
1444 | { | 1441 | { |
@@ -1473,6 +1470,14 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = { | |||
1473 | DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), | 1470 | DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), |
1474 | }, | 1471 | }, |
1475 | }, | 1472 | }, |
1473 | { | ||
1474 | .callback = dmi_ignore_irq0_timer_override, | ||
1475 | .ident = "FUJITSU SIEMENS", | ||
1476 | .matches = { | ||
1477 | DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), | ||
1478 | DMI_MATCH(DMI_PRODUCT_NAME, "AMILO PRO V2030"), | ||
1479 | }, | ||
1480 | }, | ||
1476 | {} | 1481 | {} |
1477 | }; | 1482 | }; |
1478 | 1483 | ||
diff --git a/arch/x86/kernel/acpi/realmode/.gitignore b/arch/x86/kernel/acpi/realmode/.gitignore deleted file mode 100644 index 58f1f48a58f..00000000000 --- a/arch/x86/kernel/acpi/realmode/.gitignore +++ /dev/null | |||
@@ -1,3 +0,0 @@ | |||
1 | wakeup.bin | ||
2 | wakeup.elf | ||
3 | wakeup.lds | ||
diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile deleted file mode 100644 index 6a564ac67ef..00000000000 --- a/arch/x86/kernel/acpi/realmode/Makefile +++ /dev/null | |||
@@ -1,59 +0,0 @@ | |||
1 | # | ||
2 | # arch/x86/kernel/acpi/realmode/Makefile | ||
3 | # | ||
4 | # This file is subject to the terms and conditions of the GNU General Public | ||
5 | # License. See the file "COPYING" in the main directory of this archive | ||
6 | # for more details. | ||
7 | # | ||
8 | |||
9 | always := wakeup.bin | ||
10 | targets := wakeup.elf wakeup.lds | ||
11 | |||
12 | wakeup-y += wakeup.o wakemain.o video-mode.o copy.o bioscall.o regs.o | ||
13 | |||
14 | # The link order of the video-*.o modules can matter. In particular, | ||
15 | # video-vga.o *must* be listed first, followed by video-vesa.o. | ||
16 | # Hardware-specific drivers should follow in the order they should be | ||
17 | # probed, and video-bios.o should typically be last. | ||
18 | wakeup-y += video-vga.o | ||
19 | wakeup-y += video-vesa.o | ||
20 | wakeup-y += video-bios.o | ||
21 | |||
22 | targets += $(wakeup-y) | ||
23 | |||
24 | bootsrc := $(src)/../../../boot | ||
25 | |||
26 | # --------------------------------------------------------------------------- | ||
27 | |||
28 | # How to compile the 16-bit code. Note we always compile for -march=i386, | ||
29 | # that way we can complain to the user if the CPU is insufficient. | ||
30 | # Compile with _SETUP since this is similar to the boot-time setup code. | ||
31 | KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D_WAKEUP -D__KERNEL__ \ | ||
32 | -I$(srctree)/$(bootsrc) \ | ||
33 | $(cflags-y) \ | ||
34 | -Wall -Wstrict-prototypes \ | ||
35 | -march=i386 -mregparm=3 \ | ||
36 | -include $(srctree)/$(bootsrc)/code16gcc.h \ | ||
37 | -fno-strict-aliasing -fomit-frame-pointer \ | ||
38 | $(call cc-option, -ffreestanding) \ | ||
39 | $(call cc-option, -fno-toplevel-reorder,\ | ||
40 | $(call cc-option, -fno-unit-at-a-time)) \ | ||
41 | $(call cc-option, -fno-stack-protector) \ | ||
42 | $(call cc-option, -mpreferred-stack-boundary=2) | ||
43 | KBUILD_CFLAGS += $(call cc-option, -m32) | ||
44 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ | ||
45 | GCOV_PROFILE := n | ||
46 | |||
47 | WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y)) | ||
48 | |||
49 | LDFLAGS_wakeup.elf := -T | ||
50 | |||
51 | CPPFLAGS_wakeup.lds += -P -C | ||
52 | |||
53 | $(obj)/wakeup.elf: $(obj)/wakeup.lds $(WAKEUP_OBJS) FORCE | ||
54 | $(call if_changed,ld) | ||
55 | |||
56 | OBJCOPYFLAGS_wakeup.bin := -O binary | ||
57 | |||
58 | $(obj)/wakeup.bin: $(obj)/wakeup.elf FORCE | ||
59 | $(call if_changed,objcopy) | ||
diff --git a/arch/x86/kernel/acpi/realmode/bioscall.S b/arch/x86/kernel/acpi/realmode/bioscall.S deleted file mode 100644 index f51eb0bb56c..00000000000 --- a/arch/x86/kernel/acpi/realmode/bioscall.S +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | #include "../../../boot/bioscall.S" | ||
diff --git a/arch/x86/kernel/acpi/realmode/copy.S b/arch/x86/kernel/acpi/realmode/copy.S deleted file mode 100644 index dc59ebee69d..00000000000 --- a/arch/x86/kernel/acpi/realmode/copy.S +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | #include "../../../boot/copy.S" | ||
diff --git a/arch/x86/kernel/acpi/realmode/regs.c b/arch/x86/kernel/acpi/realmode/regs.c deleted file mode 100644 index 6206033ba20..00000000000 --- a/arch/x86/kernel/acpi/realmode/regs.c +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | #include "../../../boot/regs.c" | ||
diff --git a/arch/x86/kernel/acpi/realmode/video-bios.c b/arch/x86/kernel/acpi/realmode/video-bios.c deleted file mode 100644 index 7deabc144a2..00000000000 --- a/arch/x86/kernel/acpi/realmode/video-bios.c +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | #include "../../../boot/video-bios.c" | ||
diff --git a/arch/x86/kernel/acpi/realmode/video-mode.c b/arch/x86/kernel/acpi/realmode/video-mode.c deleted file mode 100644 index 328ad209f11..00000000000 --- a/arch/x86/kernel/acpi/realmode/video-mode.c +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | #include "../../../boot/video-mode.c" | ||
diff --git a/arch/x86/kernel/acpi/realmode/video-vesa.c b/arch/x86/kernel/acpi/realmode/video-vesa.c deleted file mode 100644 index 9dbb9672226..00000000000 --- a/arch/x86/kernel/acpi/realmode/video-vesa.c +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | #include "../../../boot/video-vesa.c" | ||
diff --git a/arch/x86/kernel/acpi/realmode/video-vga.c b/arch/x86/kernel/acpi/realmode/video-vga.c deleted file mode 100644 index bcc81255f37..00000000000 --- a/arch/x86/kernel/acpi/realmode/video-vga.c +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | #include "../../../boot/video-vga.c" | ||
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S deleted file mode 100644 index d4f8010a5b1..00000000000 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ /dev/null | |||
@@ -1,62 +0,0 @@ | |||
1 | /* | ||
2 | * wakeup.ld | ||
3 | * | ||
4 | * Linker script for the real-mode wakeup code | ||
5 | */ | ||
6 | #undef i386 | ||
7 | #include "wakeup.h" | ||
8 | |||
9 | OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") | ||
10 | OUTPUT_ARCH(i386) | ||
11 | ENTRY(_start) | ||
12 | |||
13 | SECTIONS | ||
14 | { | ||
15 | . = 0; | ||
16 | .jump : { | ||
17 | *(.jump) | ||
18 | } = 0x90909090 | ||
19 | |||
20 | . = WAKEUP_HEADER_OFFSET; | ||
21 | .header : { | ||
22 | *(.header) | ||
23 | } | ||
24 | |||
25 | . = ALIGN(16); | ||
26 | .text : { | ||
27 | *(.text*) | ||
28 | } = 0x90909090 | ||
29 | |||
30 | . = ALIGN(16); | ||
31 | .rodata : { | ||
32 | *(.rodata*) | ||
33 | } | ||
34 | |||
35 | .videocards : { | ||
36 | video_cards = .; | ||
37 | *(.videocards) | ||
38 | video_cards_end = .; | ||
39 | } | ||
40 | |||
41 | . = ALIGN(16); | ||
42 | .data : { | ||
43 | *(.data*) | ||
44 | } | ||
45 | |||
46 | . = ALIGN(16); | ||
47 | .bss : { | ||
48 | __bss_start = .; | ||
49 | *(.bss) | ||
50 | __bss_end = .; | ||
51 | } | ||
52 | |||
53 | .signature : { | ||
54 | *(.signature) | ||
55 | } | ||
56 | |||
57 | _end = .; | ||
58 | |||
59 | /DISCARD/ : { | ||
60 | *(.note*) | ||
61 | } | ||
62 | } | ||
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 146a49c763a..95bf99de905 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -14,8 +14,9 @@ | |||
14 | #include <asm/desc.h> | 14 | #include <asm/desc.h> |
15 | #include <asm/pgtable.h> | 15 | #include <asm/pgtable.h> |
16 | #include <asm/cacheflush.h> | 16 | #include <asm/cacheflush.h> |
17 | #include <asm/realmode.h> | ||
17 | 18 | ||
18 | #include "realmode/wakeup.h" | 19 | #include "../../realmode/rm/wakeup.h" |
19 | #include "sleep.h" | 20 | #include "sleep.h" |
20 | 21 | ||
21 | unsigned long acpi_realmode_flags; | 22 | unsigned long acpi_realmode_flags; |
@@ -36,13 +37,9 @@ asmlinkage void acpi_enter_s3(void) | |||
36 | */ | 37 | */ |
37 | int acpi_suspend_lowlevel(void) | 38 | int acpi_suspend_lowlevel(void) |
38 | { | 39 | { |
39 | struct wakeup_header *header; | 40 | struct wakeup_header *header = |
40 | /* address in low memory of the wakeup routine. */ | 41 | (struct wakeup_header *) __va(real_mode_header->wakeup_header); |
41 | char *acpi_realmode; | ||
42 | 42 | ||
43 | acpi_realmode = TRAMPOLINE_SYM(acpi_wakeup_code); | ||
44 | |||
45 | header = (struct wakeup_header *)(acpi_realmode + WAKEUP_HEADER_OFFSET); | ||
46 | if (header->signature != WAKEUP_HEADER_SIGNATURE) { | 43 | if (header->signature != WAKEUP_HEADER_SIGNATURE) { |
47 | printk(KERN_ERR "wakeup header does not match\n"); | 44 | printk(KERN_ERR "wakeup header does not match\n"); |
48 | return -EINVAL; | 45 | return -EINVAL; |
@@ -50,27 +47,6 @@ int acpi_suspend_lowlevel(void) | |||
50 | 47 | ||
51 | header->video_mode = saved_video_mode; | 48 | header->video_mode = saved_video_mode; |
52 | 49 | ||
53 | header->wakeup_jmp_seg = acpi_wakeup_address >> 4; | ||
54 | |||
55 | /* | ||
56 | * Set up the wakeup GDT. We set these up as Big Real Mode, | ||
57 | * that is, with limits set to 4 GB. At least the Lenovo | ||
58 | * Thinkpad X61 is known to need this for the video BIOS | ||
59 | * initialization quirk to work; this is likely to also | ||
60 | * be the case for other laptops or integrated video devices. | ||
61 | */ | ||
62 | |||
63 | /* GDT[0]: GDT self-pointer */ | ||
64 | header->wakeup_gdt[0] = | ||
65 | (u64)(sizeof(header->wakeup_gdt) - 1) + | ||
66 | ((u64)__pa(&header->wakeup_gdt) << 16); | ||
67 | /* GDT[1]: big real mode-like code segment */ | ||
68 | header->wakeup_gdt[1] = | ||
69 | GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff); | ||
70 | /* GDT[2]: big real mode-like data segment */ | ||
71 | header->wakeup_gdt[2] = | ||
72 | GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff); | ||
73 | |||
74 | #ifndef CONFIG_64BIT | 50 | #ifndef CONFIG_64BIT |
75 | store_gdt((struct desc_ptr *)&header->pmode_gdt); | 51 | store_gdt((struct desc_ptr *)&header->pmode_gdt); |
76 | 52 | ||
@@ -95,7 +71,6 @@ int acpi_suspend_lowlevel(void) | |||
95 | header->pmode_cr3 = (u32)__pa(&initial_page_table); | 71 | header->pmode_cr3 = (u32)__pa(&initial_page_table); |
96 | saved_magic = 0x12345678; | 72 | saved_magic = 0x12345678; |
97 | #else /* CONFIG_64BIT */ | 73 | #else /* CONFIG_64BIT */ |
98 | header->trampoline_segment = trampoline_address() >> 4; | ||
99 | #ifdef CONFIG_SMP | 74 | #ifdef CONFIG_SMP |
100 | stack_start = (unsigned long)temp_stack + sizeof(temp_stack); | 75 | stack_start = (unsigned long)temp_stack + sizeof(temp_stack); |
101 | early_gdt_descr.address = | 76 | early_gdt_descr.address = |
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index d68677a2a01..5653a5791ec 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h | |||
@@ -2,8 +2,8 @@ | |||
2 | * Variables and functions used by the code in sleep.c | 2 | * Variables and functions used by the code in sleep.c |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #include <asm/trampoline.h> | ||
6 | #include <linux/linkage.h> | 5 | #include <linux/linkage.h> |
6 | #include <asm/realmode.h> | ||
7 | 7 | ||
8 | extern unsigned long saved_video_mode; | 8 | extern unsigned long saved_video_mode; |
9 | extern long saved_magic; | 9 | extern long saved_magic; |
diff --git a/arch/x86/kernel/acpi/wakeup_rm.S b/arch/x86/kernel/acpi/wakeup_rm.S deleted file mode 100644 index 63b8ab524f2..00000000000 --- a/arch/x86/kernel/acpi/wakeup_rm.S +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | /* | ||
2 | * Wrapper script for the realmode binary as a transport object | ||
3 | * before copying to low memory. | ||
4 | */ | ||
5 | #include <asm/page_types.h> | ||
6 | |||
7 | .section ".x86_trampoline","a" | ||
8 | .balign PAGE_SIZE | ||
9 | .globl acpi_wakeup_code | ||
10 | acpi_wakeup_code: | ||
11 | .incbin "arch/x86/kernel/acpi/realmode/wakeup.bin" | ||
12 | .size acpi_wakeup_code, .-acpi_wakeup_code | ||
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 1f84794f075..931280ff829 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -1,3 +1,5 @@ | |||
1 | #define pr_fmt(fmt) "SMP alternatives: " fmt | ||
2 | |||
1 | #include <linux/module.h> | 3 | #include <linux/module.h> |
2 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
3 | #include <linux/mutex.h> | 5 | #include <linux/mutex.h> |
@@ -63,8 +65,11 @@ static int __init setup_noreplace_paravirt(char *str) | |||
63 | __setup("noreplace-paravirt", setup_noreplace_paravirt); | 65 | __setup("noreplace-paravirt", setup_noreplace_paravirt); |
64 | #endif | 66 | #endif |
65 | 67 | ||
66 | #define DPRINTK(fmt, args...) if (debug_alternative) \ | 68 | #define DPRINTK(fmt, ...) \ |
67 | printk(KERN_DEBUG fmt, args) | 69 | do { \ |
70 | if (debug_alternative) \ | ||
71 | printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ | ||
72 | } while (0) | ||
68 | 73 | ||
69 | /* | 74 | /* |
70 | * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes | 75 | * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes |
@@ -428,7 +433,7 @@ void alternatives_smp_switch(int smp) | |||
428 | * If this still occurs then you should see a hang | 433 | * If this still occurs then you should see a hang |
429 | * or crash shortly after this line: | 434 | * or crash shortly after this line: |
430 | */ | 435 | */ |
431 | printk("lockdep: fixing up alternatives.\n"); | 436 | pr_info("lockdep: fixing up alternatives\n"); |
432 | #endif | 437 | #endif |
433 | 438 | ||
434 | if (noreplace_smp || smp_alt_once || skip_smp_alternatives) | 439 | if (noreplace_smp || smp_alt_once || skip_smp_alternatives) |
@@ -444,14 +449,14 @@ void alternatives_smp_switch(int smp) | |||
444 | if (smp == smp_mode) { | 449 | if (smp == smp_mode) { |
445 | /* nothing */ | 450 | /* nothing */ |
446 | } else if (smp) { | 451 | } else if (smp) { |
447 | printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); | 452 | pr_info("switching to SMP code\n"); |
448 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); | 453 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); |
449 | clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); | 454 | clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); |
450 | list_for_each_entry(mod, &smp_alt_modules, next) | 455 | list_for_each_entry(mod, &smp_alt_modules, next) |
451 | alternatives_smp_lock(mod->locks, mod->locks_end, | 456 | alternatives_smp_lock(mod->locks, mod->locks_end, |
452 | mod->text, mod->text_end); | 457 | mod->text, mod->text_end); |
453 | } else { | 458 | } else { |
454 | printk(KERN_INFO "SMP alternatives: switching to UP code\n"); | 459 | pr_info("switching to UP code\n"); |
455 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); | 460 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); |
456 | set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); | 461 | set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); |
457 | list_for_each_entry(mod, &smp_alt_modules, next) | 462 | list_for_each_entry(mod, &smp_alt_modules, next) |
@@ -546,7 +551,7 @@ void __init alternative_instructions(void) | |||
546 | #ifdef CONFIG_SMP | 551 | #ifdef CONFIG_SMP |
547 | if (smp_alt_once) { | 552 | if (smp_alt_once) { |
548 | if (1 == num_possible_cpus()) { | 553 | if (1 == num_possible_cpus()) { |
549 | printk(KERN_INFO "SMP alternatives: switching to UP code\n"); | 554 | pr_info("switching to UP code\n"); |
550 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); | 555 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); |
551 | set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); | 556 | set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); |
552 | 557 | ||
@@ -664,7 +669,7 @@ static int __kprobes stop_machine_text_poke(void *data) | |||
664 | struct text_poke_param *p; | 669 | struct text_poke_param *p; |
665 | int i; | 670 | int i; |
666 | 671 | ||
667 | if (atomic_dec_and_test(&stop_machine_first)) { | 672 | if (atomic_xchg(&stop_machine_first, 0)) { |
668 | for (i = 0; i < tpp->nparams; i++) { | 673 | for (i = 0; i < tpp->nparams; i++) { |
669 | p = &tpp->params[i]; | 674 | p = &tpp->params[i]; |
670 | text_poke(p->addr, p->opcode, p->len); | 675 | text_poke(p->addr, p->opcode, p->len); |
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index be16854591c..aadf3359e2a 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c | |||
@@ -2,6 +2,9 @@ | |||
2 | * Shared support code for AMD K8 northbridges and derivates. | 2 | * Shared support code for AMD K8 northbridges and derivates. |
3 | * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. | 3 | * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. |
4 | */ | 4 | */ |
5 | |||
6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
7 | |||
5 | #include <linux/types.h> | 8 | #include <linux/types.h> |
6 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
7 | #include <linux/init.h> | 10 | #include <linux/init.h> |
@@ -16,6 +19,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { | |||
16 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, | 19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, |
17 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, | 20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, |
18 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, | 21 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, |
22 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, | ||
19 | {} | 23 | {} |
20 | }; | 24 | }; |
21 | EXPORT_SYMBOL(amd_nb_misc_ids); | 25 | EXPORT_SYMBOL(amd_nb_misc_ids); |
@@ -258,7 +262,7 @@ void amd_flush_garts(void) | |||
258 | } | 262 | } |
259 | spin_unlock_irqrestore(&gart_lock, flags); | 263 | spin_unlock_irqrestore(&gart_lock, flags); |
260 | if (!flushed) | 264 | if (!flushed) |
261 | printk("nothing to flush?\n"); | 265 | pr_notice("nothing to flush?\n"); |
262 | } | 266 | } |
263 | EXPORT_SYMBOL_GPL(amd_flush_garts); | 267 | EXPORT_SYMBOL_GPL(amd_flush_garts); |
264 | 268 | ||
@@ -269,11 +273,10 @@ static __init int init_amd_nbs(void) | |||
269 | err = amd_cache_northbridges(); | 273 | err = amd_cache_northbridges(); |
270 | 274 | ||
271 | if (err < 0) | 275 | if (err < 0) |
272 | printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n"); | 276 | pr_notice("Cannot enumerate AMD northbridges\n"); |
273 | 277 | ||
274 | if (amd_cache_gart() < 0) | 278 | if (amd_cache_gart() < 0) |
275 | printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, " | 279 | pr_notice("Cannot initialize GART flush words, GART support disabled\n"); |
276 | "GART support disabled.\n"); | ||
277 | 280 | ||
278 | return err; | 281 | return err; |
279 | } | 282 | } |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 6e76c191a83..d5fd66f0d4c 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/bitops.h> | 20 | #include <linux/bitops.h> |
21 | #include <linux/ioport.h> | 21 | #include <linux/ioport.h> |
22 | #include <linux/suspend.h> | 22 | #include <linux/suspend.h> |
23 | #include <linux/kmemleak.h> | ||
24 | #include <asm/e820.h> | 23 | #include <asm/e820.h> |
25 | #include <asm/io.h> | 24 | #include <asm/io.h> |
26 | #include <asm/iommu.h> | 25 | #include <asm/iommu.h> |
@@ -95,11 +94,6 @@ static u32 __init allocate_aperture(void) | |||
95 | return 0; | 94 | return 0; |
96 | } | 95 | } |
97 | memblock_reserve(addr, aper_size); | 96 | memblock_reserve(addr, aper_size); |
98 | /* | ||
99 | * Kmemleak should not scan this block as it may not be mapped via the | ||
100 | * kernel direct mapping. | ||
101 | */ | ||
102 | kmemleak_ignore(phys_to_virt(addr)); | ||
103 | printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", | 97 | printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", |
104 | aper_size >> 10, addr); | 98 | aper_size >> 10, addr); |
105 | insert_aperture_resource((u32)addr, aper_size); | 99 | insert_aperture_resource((u32)addr, aper_size); |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 39a222e094a..98e24131ff3 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -2123,6 +2123,42 @@ void default_init_apic_ldr(void) | |||
2123 | apic_write(APIC_LDR, val); | 2123 | apic_write(APIC_LDR, val); |
2124 | } | 2124 | } |
2125 | 2125 | ||
2126 | int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
2127 | const struct cpumask *andmask, | ||
2128 | unsigned int *apicid) | ||
2129 | { | ||
2130 | unsigned int cpu; | ||
2131 | |||
2132 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
2133 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
2134 | break; | ||
2135 | } | ||
2136 | |||
2137 | if (likely(cpu < nr_cpu_ids)) { | ||
2138 | *apicid = per_cpu(x86_cpu_to_apicid, cpu); | ||
2139 | return 0; | ||
2140 | } | ||
2141 | |||
2142 | return -EINVAL; | ||
2143 | } | ||
2144 | |||
2145 | /* | ||
2146 | * Override the generic EOI implementation with an optimized version. | ||
2147 | * Only called during early boot when only one CPU is active and with | ||
2148 | * interrupts disabled, so we know this does not race with actual APIC driver | ||
2149 | * use. | ||
2150 | */ | ||
2151 | void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) | ||
2152 | { | ||
2153 | struct apic **drv; | ||
2154 | |||
2155 | for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { | ||
2156 | /* Should happen once for each apic */ | ||
2157 | WARN_ON((*drv)->eoi_write == eoi_write); | ||
2158 | (*drv)->eoi_write = eoi_write; | ||
2159 | } | ||
2160 | } | ||
2161 | |||
2126 | /* | 2162 | /* |
2127 | * Power management | 2163 | * Power management |
2128 | */ | 2164 | */ |
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 0e881c46e8c..00c77cf78e9 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -36,25 +36,6 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
36 | return 1; | 36 | return 1; |
37 | } | 37 | } |
38 | 38 | ||
39 | static const struct cpumask *flat_target_cpus(void) | ||
40 | { | ||
41 | return cpu_online_mask; | ||
42 | } | ||
43 | |||
44 | static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
45 | { | ||
46 | /* Careful. Some cpus do not strictly honor the set of cpus | ||
47 | * specified in the interrupt destination when using lowest | ||
48 | * priority interrupt delivery mode. | ||
49 | * | ||
50 | * In particular there was a hyperthreading cpu observed to | ||
51 | * deliver interrupts to the wrong hyperthread when only one | ||
52 | * hyperthread was specified in the interrupt desitination. | ||
53 | */ | ||
54 | cpumask_clear(retmask); | ||
55 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; | ||
56 | } | ||
57 | |||
58 | /* | 39 | /* |
59 | * Set up the logical destination ID. | 40 | * Set up the logical destination ID. |
60 | * | 41 | * |
@@ -92,7 +73,7 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) | |||
92 | } | 73 | } |
93 | 74 | ||
94 | static void | 75 | static void |
95 | flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) | 76 | flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) |
96 | { | 77 | { |
97 | unsigned long mask = cpumask_bits(cpumask)[0]; | 78 | unsigned long mask = cpumask_bits(cpumask)[0]; |
98 | int cpu = smp_processor_id(); | 79 | int cpu = smp_processor_id(); |
@@ -186,7 +167,7 @@ static struct apic apic_flat = { | |||
186 | .irq_delivery_mode = dest_LowestPrio, | 167 | .irq_delivery_mode = dest_LowestPrio, |
187 | .irq_dest_mode = 1, /* logical */ | 168 | .irq_dest_mode = 1, /* logical */ |
188 | 169 | ||
189 | .target_cpus = flat_target_cpus, | 170 | .target_cpus = online_target_cpus, |
190 | .disable_esr = 0, | 171 | .disable_esr = 0, |
191 | .dest_logical = APIC_DEST_LOGICAL, | 172 | .dest_logical = APIC_DEST_LOGICAL, |
192 | .check_apicid_used = NULL, | 173 | .check_apicid_used = NULL, |
@@ -210,8 +191,7 @@ static struct apic apic_flat = { | |||
210 | .set_apic_id = set_apic_id, | 191 | .set_apic_id = set_apic_id, |
211 | .apic_id_mask = 0xFFu << 24, | 192 | .apic_id_mask = 0xFFu << 24, |
212 | 193 | ||
213 | .cpu_mask_to_apicid = default_cpu_mask_to_apicid, | 194 | .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, |
214 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, | ||
215 | 195 | ||
216 | .send_IPI_mask = flat_send_IPI_mask, | 196 | .send_IPI_mask = flat_send_IPI_mask, |
217 | .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, | 197 | .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, |
@@ -262,17 +242,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
262 | return 0; | 242 | return 0; |
263 | } | 243 | } |
264 | 244 | ||
265 | static const struct cpumask *physflat_target_cpus(void) | ||
266 | { | ||
267 | return cpu_online_mask; | ||
268 | } | ||
269 | |||
270 | static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
271 | { | ||
272 | cpumask_clear(retmask); | ||
273 | cpumask_set_cpu(cpu, retmask); | ||
274 | } | ||
275 | |||
276 | static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) | 245 | static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) |
277 | { | 246 | { |
278 | default_send_IPI_mask_sequence_phys(cpumask, vector); | 247 | default_send_IPI_mask_sequence_phys(cpumask, vector); |
@@ -294,38 +263,6 @@ static void physflat_send_IPI_all(int vector) | |||
294 | physflat_send_IPI_mask(cpu_online_mask, vector); | 263 | physflat_send_IPI_mask(cpu_online_mask, vector); |
295 | } | 264 | } |
296 | 265 | ||
297 | static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) | ||
298 | { | ||
299 | int cpu; | ||
300 | |||
301 | /* | ||
302 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
303 | * May as well be the first. | ||
304 | */ | ||
305 | cpu = cpumask_first(cpumask); | ||
306 | if ((unsigned)cpu < nr_cpu_ids) | ||
307 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
308 | else | ||
309 | return BAD_APICID; | ||
310 | } | ||
311 | |||
312 | static unsigned int | ||
313 | physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
314 | const struct cpumask *andmask) | ||
315 | { | ||
316 | int cpu; | ||
317 | |||
318 | /* | ||
319 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
320 | * May as well be the first. | ||
321 | */ | ||
322 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
323 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
324 | break; | ||
325 | } | ||
326 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
327 | } | ||
328 | |||
329 | static int physflat_probe(void) | 266 | static int physflat_probe(void) |
330 | { | 267 | { |
331 | if (apic == &apic_physflat || num_possible_cpus() > 8) | 268 | if (apic == &apic_physflat || num_possible_cpus() > 8) |
@@ -345,13 +282,13 @@ static struct apic apic_physflat = { | |||
345 | .irq_delivery_mode = dest_Fixed, | 282 | .irq_delivery_mode = dest_Fixed, |
346 | .irq_dest_mode = 0, /* physical */ | 283 | .irq_dest_mode = 0, /* physical */ |
347 | 284 | ||
348 | .target_cpus = physflat_target_cpus, | 285 | .target_cpus = online_target_cpus, |
349 | .disable_esr = 0, | 286 | .disable_esr = 0, |
350 | .dest_logical = 0, | 287 | .dest_logical = 0, |
351 | .check_apicid_used = NULL, | 288 | .check_apicid_used = NULL, |
352 | .check_apicid_present = NULL, | 289 | .check_apicid_present = NULL, |
353 | 290 | ||
354 | .vector_allocation_domain = physflat_vector_allocation_domain, | 291 | .vector_allocation_domain = default_vector_allocation_domain, |
355 | /* not needed, but shouldn't hurt: */ | 292 | /* not needed, but shouldn't hurt: */ |
356 | .init_apic_ldr = flat_init_apic_ldr, | 293 | .init_apic_ldr = flat_init_apic_ldr, |
357 | 294 | ||
@@ -370,8 +307,7 @@ static struct apic apic_physflat = { | |||
370 | .set_apic_id = set_apic_id, | 307 | .set_apic_id = set_apic_id, |
371 | .apic_id_mask = 0xFFu << 24, | 308 | .apic_id_mask = 0xFFu << 24, |
372 | 309 | ||
373 | .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, | 310 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, |
374 | .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, | ||
375 | 311 | ||
376 | .send_IPI_mask = physflat_send_IPI_mask, | 312 | .send_IPI_mask = physflat_send_IPI_mask, |
377 | .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, | 313 | .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, |
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index a6e4c6e06c0..e145f28b409 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c | |||
@@ -100,12 +100,12 @@ static unsigned long noop_check_apicid_present(int bit) | |||
100 | return physid_isset(bit, phys_cpu_present_map); | 100 | return physid_isset(bit, phys_cpu_present_map); |
101 | } | 101 | } |
102 | 102 | ||
103 | static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) | 103 | static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, |
104 | const struct cpumask *mask) | ||
104 | { | 105 | { |
105 | if (cpu != 0) | 106 | if (cpu != 0) |
106 | pr_warning("APIC: Vector allocated for non-BSP cpu\n"); | 107 | pr_warning("APIC: Vector allocated for non-BSP cpu\n"); |
107 | cpumask_clear(retmask); | 108 | cpumask_copy(retmask, cpumask_of(cpu)); |
108 | cpumask_set_cpu(cpu, retmask); | ||
109 | } | 109 | } |
110 | 110 | ||
111 | static u32 noop_apic_read(u32 reg) | 111 | static u32 noop_apic_read(u32 reg) |
@@ -159,8 +159,7 @@ struct apic apic_noop = { | |||
159 | .set_apic_id = NULL, | 159 | .set_apic_id = NULL, |
160 | .apic_id_mask = 0x0F << 24, | 160 | .apic_id_mask = 0x0F << 24, |
161 | 161 | ||
162 | .cpu_mask_to_apicid = default_cpu_mask_to_apicid, | 162 | .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, |
163 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, | ||
164 | 163 | ||
165 | .send_IPI_mask = noop_send_IPI_mask, | 164 | .send_IPI_mask = noop_send_IPI_mask, |
166 | .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, | 165 | .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, |
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 6ec6d5d297c..bc552cff257 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c | |||
@@ -72,17 +72,6 @@ static int numachip_phys_pkg_id(int initial_apic_id, int index_msb) | |||
72 | return initial_apic_id >> index_msb; | 72 | return initial_apic_id >> index_msb; |
73 | } | 73 | } |
74 | 74 | ||
75 | static const struct cpumask *numachip_target_cpus(void) | ||
76 | { | ||
77 | return cpu_online_mask; | ||
78 | } | ||
79 | |||
80 | static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
81 | { | ||
82 | cpumask_clear(retmask); | ||
83 | cpumask_set_cpu(cpu, retmask); | ||
84 | } | ||
85 | |||
86 | static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) | 75 | static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) |
87 | { | 76 | { |
88 | union numachip_csr_g3_ext_irq_gen int_gen; | 77 | union numachip_csr_g3_ext_irq_gen int_gen; |
@@ -157,38 +146,6 @@ static void numachip_send_IPI_self(int vector) | |||
157 | __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); | 146 | __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); |
158 | } | 147 | } |
159 | 148 | ||
160 | static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask) | ||
161 | { | ||
162 | int cpu; | ||
163 | |||
164 | /* | ||
165 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
166 | * May as well be the first. | ||
167 | */ | ||
168 | cpu = cpumask_first(cpumask); | ||
169 | if (likely((unsigned)cpu < nr_cpu_ids)) | ||
170 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
171 | |||
172 | return BAD_APICID; | ||
173 | } | ||
174 | |||
175 | static unsigned int | ||
176 | numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
177 | const struct cpumask *andmask) | ||
178 | { | ||
179 | int cpu; | ||
180 | |||
181 | /* | ||
182 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
183 | * May as well be the first. | ||
184 | */ | ||
185 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
186 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
187 | break; | ||
188 | } | ||
189 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
190 | } | ||
191 | |||
192 | static int __init numachip_probe(void) | 149 | static int __init numachip_probe(void) |
193 | { | 150 | { |
194 | return apic == &apic_numachip; | 151 | return apic == &apic_numachip; |
@@ -253,13 +210,13 @@ static struct apic apic_numachip __refconst = { | |||
253 | .irq_delivery_mode = dest_Fixed, | 210 | .irq_delivery_mode = dest_Fixed, |
254 | .irq_dest_mode = 0, /* physical */ | 211 | .irq_dest_mode = 0, /* physical */ |
255 | 212 | ||
256 | .target_cpus = numachip_target_cpus, | 213 | .target_cpus = online_target_cpus, |
257 | .disable_esr = 0, | 214 | .disable_esr = 0, |
258 | .dest_logical = 0, | 215 | .dest_logical = 0, |
259 | .check_apicid_used = NULL, | 216 | .check_apicid_used = NULL, |
260 | .check_apicid_present = NULL, | 217 | .check_apicid_present = NULL, |
261 | 218 | ||
262 | .vector_allocation_domain = numachip_vector_allocation_domain, | 219 | .vector_allocation_domain = default_vector_allocation_domain, |
263 | .init_apic_ldr = flat_init_apic_ldr, | 220 | .init_apic_ldr = flat_init_apic_ldr, |
264 | 221 | ||
265 | .ioapic_phys_id_map = NULL, | 222 | .ioapic_phys_id_map = NULL, |
@@ -277,8 +234,7 @@ static struct apic apic_numachip __refconst = { | |||
277 | .set_apic_id = set_apic_id, | 234 | .set_apic_id = set_apic_id, |
278 | .apic_id_mask = 0xffU << 24, | 235 | .apic_id_mask = 0xffU << 24, |
279 | 236 | ||
280 | .cpu_mask_to_apicid = numachip_cpu_mask_to_apicid, | 237 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, |
281 | .cpu_mask_to_apicid_and = numachip_cpu_mask_to_apicid_and, | ||
282 | 238 | ||
283 | .send_IPI_mask = numachip_send_IPI_mask, | 239 | .send_IPI_mask = numachip_send_IPI_mask, |
284 | .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself, | 240 | .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself, |
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 31fbdbfbf96..d50e3640d5a 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c | |||
@@ -26,15 +26,6 @@ static int bigsmp_apic_id_registered(void) | |||
26 | return 1; | 26 | return 1; |
27 | } | 27 | } |
28 | 28 | ||
29 | static const struct cpumask *bigsmp_target_cpus(void) | ||
30 | { | ||
31 | #ifdef CONFIG_SMP | ||
32 | return cpu_online_mask; | ||
33 | #else | ||
34 | return cpumask_of(0); | ||
35 | #endif | ||
36 | } | ||
37 | |||
38 | static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) | 29 | static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) |
39 | { | 30 | { |
40 | return 0; | 31 | return 0; |
@@ -105,32 +96,6 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid) | |||
105 | return 1; | 96 | return 1; |
106 | } | 97 | } |
107 | 98 | ||
108 | /* As we are using single CPU as destination, pick only one CPU here */ | ||
109 | static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask) | ||
110 | { | ||
111 | int cpu = cpumask_first(cpumask); | ||
112 | |||
113 | if (cpu < nr_cpu_ids) | ||
114 | return cpu_physical_id(cpu); | ||
115 | return BAD_APICID; | ||
116 | } | ||
117 | |||
118 | static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
119 | const struct cpumask *andmask) | ||
120 | { | ||
121 | int cpu; | ||
122 | |||
123 | /* | ||
124 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
125 | * May as well be the first. | ||
126 | */ | ||
127 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
128 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
129 | return cpu_physical_id(cpu); | ||
130 | } | ||
131 | return BAD_APICID; | ||
132 | } | ||
133 | |||
134 | static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) | 99 | static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) |
135 | { | 100 | { |
136 | return cpuid_apic >> index_msb; | 101 | return cpuid_apic >> index_msb; |
@@ -177,12 +142,6 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { | |||
177 | { } /* NULL entry stops DMI scanning */ | 142 | { } /* NULL entry stops DMI scanning */ |
178 | }; | 143 | }; |
179 | 144 | ||
180 | static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
181 | { | ||
182 | cpumask_clear(retmask); | ||
183 | cpumask_set_cpu(cpu, retmask); | ||
184 | } | ||
185 | |||
186 | static int probe_bigsmp(void) | 145 | static int probe_bigsmp(void) |
187 | { | 146 | { |
188 | if (def_to_bigsmp) | 147 | if (def_to_bigsmp) |
@@ -205,13 +164,13 @@ static struct apic apic_bigsmp = { | |||
205 | /* phys delivery to target CPU: */ | 164 | /* phys delivery to target CPU: */ |
206 | .irq_dest_mode = 0, | 165 | .irq_dest_mode = 0, |
207 | 166 | ||
208 | .target_cpus = bigsmp_target_cpus, | 167 | .target_cpus = default_target_cpus, |
209 | .disable_esr = 1, | 168 | .disable_esr = 1, |
210 | .dest_logical = 0, | 169 | .dest_logical = 0, |
211 | .check_apicid_used = bigsmp_check_apicid_used, | 170 | .check_apicid_used = bigsmp_check_apicid_used, |
212 | .check_apicid_present = bigsmp_check_apicid_present, | 171 | .check_apicid_present = bigsmp_check_apicid_present, |
213 | 172 | ||
214 | .vector_allocation_domain = bigsmp_vector_allocation_domain, | 173 | .vector_allocation_domain = default_vector_allocation_domain, |
215 | .init_apic_ldr = bigsmp_init_apic_ldr, | 174 | .init_apic_ldr = bigsmp_init_apic_ldr, |
216 | 175 | ||
217 | .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, | 176 | .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, |
@@ -229,8 +188,7 @@ static struct apic apic_bigsmp = { | |||
229 | .set_apic_id = NULL, | 188 | .set_apic_id = NULL, |
230 | .apic_id_mask = 0xFF << 24, | 189 | .apic_id_mask = 0xFF << 24, |
231 | 190 | ||
232 | .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, | 191 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, |
233 | .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and, | ||
234 | 192 | ||
235 | .send_IPI_mask = bigsmp_send_IPI_mask, | 193 | .send_IPI_mask = bigsmp_send_IPI_mask, |
236 | .send_IPI_mask_allbutself = NULL, | 194 | .send_IPI_mask_allbutself = NULL, |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index db4ab1be3c7..0874799a98c 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -394,21 +394,6 @@ static void es7000_enable_apic_mode(void) | |||
394 | WARN(1, "Command failed, status = %x\n", mip_status); | 394 | WARN(1, "Command failed, status = %x\n", mip_status); |
395 | } | 395 | } |
396 | 396 | ||
397 | static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
398 | { | ||
399 | /* Careful. Some cpus do not strictly honor the set of cpus | ||
400 | * specified in the interrupt destination when using lowest | ||
401 | * priority interrupt delivery mode. | ||
402 | * | ||
403 | * In particular there was a hyperthreading cpu observed to | ||
404 | * deliver interrupts to the wrong hyperthread when only one | ||
405 | * hyperthread was specified in the interrupt desitination. | ||
406 | */ | ||
407 | cpumask_clear(retmask); | ||
408 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; | ||
409 | } | ||
410 | |||
411 | |||
412 | static void es7000_wait_for_init_deassert(atomic_t *deassert) | 397 | static void es7000_wait_for_init_deassert(atomic_t *deassert) |
413 | { | 398 | { |
414 | while (!atomic_read(deassert)) | 399 | while (!atomic_read(deassert)) |
@@ -540,45 +525,49 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid) | |||
540 | return 1; | 525 | return 1; |
541 | } | 526 | } |
542 | 527 | ||
543 | static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask) | 528 | static inline int |
529 | es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) | ||
544 | { | 530 | { |
545 | unsigned int round = 0; | 531 | unsigned int round = 0; |
546 | int cpu, uninitialized_var(apicid); | 532 | unsigned int cpu, uninitialized_var(apicid); |
547 | 533 | ||
548 | /* | 534 | /* |
549 | * The cpus in the mask must all be on the apic cluster. | 535 | * The cpus in the mask must all be on the apic cluster. |
550 | */ | 536 | */ |
551 | for_each_cpu(cpu, cpumask) { | 537 | for_each_cpu_and(cpu, cpumask, cpu_online_mask) { |
552 | int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); | 538 | int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); |
553 | 539 | ||
554 | if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { | 540 | if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { |
555 | WARN(1, "Not a valid mask!"); | 541 | WARN(1, "Not a valid mask!"); |
556 | 542 | ||
557 | return BAD_APICID; | 543 | return -EINVAL; |
558 | } | 544 | } |
559 | apicid = new_apicid; | 545 | apicid |= new_apicid; |
560 | round++; | 546 | round++; |
561 | } | 547 | } |
562 | return apicid; | 548 | if (!round) |
549 | return -EINVAL; | ||
550 | *dest_id = apicid; | ||
551 | return 0; | ||
563 | } | 552 | } |
564 | 553 | ||
565 | static unsigned int | 554 | static int |
566 | es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, | 555 | es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, |
567 | const struct cpumask *andmask) | 556 | const struct cpumask *andmask, |
557 | unsigned int *apicid) | ||
568 | { | 558 | { |
569 | int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); | ||
570 | cpumask_var_t cpumask; | 559 | cpumask_var_t cpumask; |
560 | *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); | ||
571 | 561 | ||
572 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) | 562 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) |
573 | return apicid; | 563 | return 0; |
574 | 564 | ||
575 | cpumask_and(cpumask, inmask, andmask); | 565 | cpumask_and(cpumask, inmask, andmask); |
576 | cpumask_and(cpumask, cpumask, cpu_online_mask); | 566 | es7000_cpu_mask_to_apicid(cpumask, apicid); |
577 | apicid = es7000_cpu_mask_to_apicid(cpumask); | ||
578 | 567 | ||
579 | free_cpumask_var(cpumask); | 568 | free_cpumask_var(cpumask); |
580 | 569 | ||
581 | return apicid; | 570 | return 0; |
582 | } | 571 | } |
583 | 572 | ||
584 | static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) | 573 | static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) |
@@ -638,7 +627,7 @@ static struct apic __refdata apic_es7000_cluster = { | |||
638 | .check_apicid_used = es7000_check_apicid_used, | 627 | .check_apicid_used = es7000_check_apicid_used, |
639 | .check_apicid_present = es7000_check_apicid_present, | 628 | .check_apicid_present = es7000_check_apicid_present, |
640 | 629 | ||
641 | .vector_allocation_domain = es7000_vector_allocation_domain, | 630 | .vector_allocation_domain = flat_vector_allocation_domain, |
642 | .init_apic_ldr = es7000_init_apic_ldr_cluster, | 631 | .init_apic_ldr = es7000_init_apic_ldr_cluster, |
643 | 632 | ||
644 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, | 633 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, |
@@ -656,7 +645,6 @@ static struct apic __refdata apic_es7000_cluster = { | |||
656 | .set_apic_id = NULL, | 645 | .set_apic_id = NULL, |
657 | .apic_id_mask = 0xFF << 24, | 646 | .apic_id_mask = 0xFF << 24, |
658 | 647 | ||
659 | .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, | ||
660 | .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, | 648 | .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, |
661 | 649 | ||
662 | .send_IPI_mask = es7000_send_IPI_mask, | 650 | .send_IPI_mask = es7000_send_IPI_mask, |
@@ -705,7 +693,7 @@ static struct apic __refdata apic_es7000 = { | |||
705 | .check_apicid_used = es7000_check_apicid_used, | 693 | .check_apicid_used = es7000_check_apicid_used, |
706 | .check_apicid_present = es7000_check_apicid_present, | 694 | .check_apicid_present = es7000_check_apicid_present, |
707 | 695 | ||
708 | .vector_allocation_domain = es7000_vector_allocation_domain, | 696 | .vector_allocation_domain = flat_vector_allocation_domain, |
709 | .init_apic_ldr = es7000_init_apic_ldr, | 697 | .init_apic_ldr = es7000_init_apic_ldr, |
710 | 698 | ||
711 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, | 699 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, |
@@ -723,7 +711,6 @@ static struct apic __refdata apic_es7000 = { | |||
723 | .set_apic_id = NULL, | 711 | .set_apic_id = NULL, |
724 | .apic_id_mask = 0xFF << 24, | 712 | .apic_id_mask = 0xFF << 24, |
725 | 713 | ||
726 | .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, | ||
727 | .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, | 714 | .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, |
728 | 715 | ||
729 | .send_IPI_mask = es7000_send_IPI_mask, | 716 | .send_IPI_mask = es7000_send_IPI_mask, |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index bce2001b264..406eee78468 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -448,8 +448,8 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi | |||
448 | 448 | ||
449 | entry = alloc_irq_pin_list(node); | 449 | entry = alloc_irq_pin_list(node); |
450 | if (!entry) { | 450 | if (!entry) { |
451 | printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", | 451 | pr_err("can not alloc irq_pin_list (%d,%d,%d)\n", |
452 | node, apic, pin); | 452 | node, apic, pin); |
453 | return -ENOMEM; | 453 | return -ENOMEM; |
454 | } | 454 | } |
455 | entry->apic = apic; | 455 | entry->apic = apic; |
@@ -661,7 +661,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | |||
661 | ioapic_mask_entry(apic, pin); | 661 | ioapic_mask_entry(apic, pin); |
662 | entry = ioapic_read_entry(apic, pin); | 662 | entry = ioapic_read_entry(apic, pin); |
663 | if (entry.irr) | 663 | if (entry.irr) |
664 | printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n", | 664 | pr_err("Unable to reset IRR for apic: %d, pin :%d\n", |
665 | mpc_ioapic_id(apic), pin); | 665 | mpc_ioapic_id(apic), pin); |
666 | } | 666 | } |
667 | 667 | ||
@@ -895,7 +895,7 @@ static int irq_polarity(int idx) | |||
895 | } | 895 | } |
896 | case 2: /* reserved */ | 896 | case 2: /* reserved */ |
897 | { | 897 | { |
898 | printk(KERN_WARNING "broken BIOS!!\n"); | 898 | pr_warn("broken BIOS!!\n"); |
899 | polarity = 1; | 899 | polarity = 1; |
900 | break; | 900 | break; |
901 | } | 901 | } |
@@ -906,7 +906,7 @@ static int irq_polarity(int idx) | |||
906 | } | 906 | } |
907 | default: /* invalid */ | 907 | default: /* invalid */ |
908 | { | 908 | { |
909 | printk(KERN_WARNING "broken BIOS!!\n"); | 909 | pr_warn("broken BIOS!!\n"); |
910 | polarity = 1; | 910 | polarity = 1; |
911 | break; | 911 | break; |
912 | } | 912 | } |
@@ -948,7 +948,7 @@ static int irq_trigger(int idx) | |||
948 | } | 948 | } |
949 | default: | 949 | default: |
950 | { | 950 | { |
951 | printk(KERN_WARNING "broken BIOS!!\n"); | 951 | pr_warn("broken BIOS!!\n"); |
952 | trigger = 1; | 952 | trigger = 1; |
953 | break; | 953 | break; |
954 | } | 954 | } |
@@ -962,7 +962,7 @@ static int irq_trigger(int idx) | |||
962 | } | 962 | } |
963 | case 2: /* reserved */ | 963 | case 2: /* reserved */ |
964 | { | 964 | { |
965 | printk(KERN_WARNING "broken BIOS!!\n"); | 965 | pr_warn("broken BIOS!!\n"); |
966 | trigger = 1; | 966 | trigger = 1; |
967 | break; | 967 | break; |
968 | } | 968 | } |
@@ -973,7 +973,7 @@ static int irq_trigger(int idx) | |||
973 | } | 973 | } |
974 | default: /* invalid */ | 974 | default: /* invalid */ |
975 | { | 975 | { |
976 | printk(KERN_WARNING "broken BIOS!!\n"); | 976 | pr_warn("broken BIOS!!\n"); |
977 | trigger = 0; | 977 | trigger = 0; |
978 | break; | 978 | break; |
979 | } | 979 | } |
@@ -991,7 +991,7 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
991 | * Debugging check, we are in big trouble if this message pops up! | 991 | * Debugging check, we are in big trouble if this message pops up! |
992 | */ | 992 | */ |
993 | if (mp_irqs[idx].dstirq != pin) | 993 | if (mp_irqs[idx].dstirq != pin) |
994 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); | 994 | pr_err("broken BIOS or MPTABLE parser, ayiee!!\n"); |
995 | 995 | ||
996 | if (test_bit(bus, mp_bus_not_pci)) { | 996 | if (test_bit(bus, mp_bus_not_pci)) { |
997 | irq = mp_irqs[idx].srcbusirq; | 997 | irq = mp_irqs[idx].srcbusirq; |
@@ -1112,8 +1112,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | |||
1112 | * 0x80, because int 0x80 is hm, kind of importantish. ;) | 1112 | * 0x80, because int 0x80 is hm, kind of importantish. ;) |
1113 | */ | 1113 | */ |
1114 | static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; | 1114 | static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; |
1115 | static int current_offset = VECTOR_OFFSET_START % 8; | 1115 | static int current_offset = VECTOR_OFFSET_START % 16; |
1116 | unsigned int old_vector; | ||
1117 | int cpu, err; | 1116 | int cpu, err; |
1118 | cpumask_var_t tmp_mask; | 1117 | cpumask_var_t tmp_mask; |
1119 | 1118 | ||
@@ -1123,35 +1122,45 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | |||
1123 | if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) | 1122 | if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) |
1124 | return -ENOMEM; | 1123 | return -ENOMEM; |
1125 | 1124 | ||
1126 | old_vector = cfg->vector; | ||
1127 | if (old_vector) { | ||
1128 | cpumask_and(tmp_mask, mask, cpu_online_mask); | ||
1129 | cpumask_and(tmp_mask, cfg->domain, tmp_mask); | ||
1130 | if (!cpumask_empty(tmp_mask)) { | ||
1131 | free_cpumask_var(tmp_mask); | ||
1132 | return 0; | ||
1133 | } | ||
1134 | } | ||
1135 | |||
1136 | /* Only try and allocate irqs on cpus that are present */ | 1125 | /* Only try and allocate irqs on cpus that are present */ |
1137 | err = -ENOSPC; | 1126 | err = -ENOSPC; |
1138 | for_each_cpu_and(cpu, mask, cpu_online_mask) { | 1127 | cpumask_clear(cfg->old_domain); |
1139 | int new_cpu; | 1128 | cpu = cpumask_first_and(mask, cpu_online_mask); |
1140 | int vector, offset; | 1129 | while (cpu < nr_cpu_ids) { |
1130 | int new_cpu, vector, offset; | ||
1141 | 1131 | ||
1142 | apic->vector_allocation_domain(cpu, tmp_mask); | 1132 | apic->vector_allocation_domain(cpu, tmp_mask, mask); |
1133 | |||
1134 | if (cpumask_subset(tmp_mask, cfg->domain)) { | ||
1135 | err = 0; | ||
1136 | if (cpumask_equal(tmp_mask, cfg->domain)) | ||
1137 | break; | ||
1138 | /* | ||
1139 | * New cpumask using the vector is a proper subset of | ||
1140 | * the current in use mask. So cleanup the vector | ||
1141 | * allocation for the members that are not used anymore. | ||
1142 | */ | ||
1143 | cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); | ||
1144 | cfg->move_in_progress = 1; | ||
1145 | cpumask_and(cfg->domain, cfg->domain, tmp_mask); | ||
1146 | break; | ||
1147 | } | ||
1143 | 1148 | ||
1144 | vector = current_vector; | 1149 | vector = current_vector; |
1145 | offset = current_offset; | 1150 | offset = current_offset; |
1146 | next: | 1151 | next: |
1147 | vector += 8; | 1152 | vector += 16; |
1148 | if (vector >= first_system_vector) { | 1153 | if (vector >= first_system_vector) { |
1149 | /* If out of vectors on large boxen, must share them. */ | 1154 | offset = (offset + 1) % 16; |
1150 | offset = (offset + 1) % 8; | ||
1151 | vector = FIRST_EXTERNAL_VECTOR + offset; | 1155 | vector = FIRST_EXTERNAL_VECTOR + offset; |
1152 | } | 1156 | } |
1153 | if (unlikely(current_vector == vector)) | 1157 | |
1158 | if (unlikely(current_vector == vector)) { | ||
1159 | cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask); | ||
1160 | cpumask_andnot(tmp_mask, mask, cfg->old_domain); | ||
1161 | cpu = cpumask_first_and(tmp_mask, cpu_online_mask); | ||
1154 | continue; | 1162 | continue; |
1163 | } | ||
1155 | 1164 | ||
1156 | if (test_bit(vector, used_vectors)) | 1165 | if (test_bit(vector, used_vectors)) |
1157 | goto next; | 1166 | goto next; |
@@ -1162,7 +1171,7 @@ next: | |||
1162 | /* Found one! */ | 1171 | /* Found one! */ |
1163 | current_vector = vector; | 1172 | current_vector = vector; |
1164 | current_offset = offset; | 1173 | current_offset = offset; |
1165 | if (old_vector) { | 1174 | if (cfg->vector) { |
1166 | cfg->move_in_progress = 1; | 1175 | cfg->move_in_progress = 1; |
1167 | cpumask_copy(cfg->old_domain, cfg->domain); | 1176 | cpumask_copy(cfg->old_domain, cfg->domain); |
1168 | } | 1177 | } |
@@ -1195,7 +1204,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) | |||
1195 | BUG_ON(!cfg->vector); | 1204 | BUG_ON(!cfg->vector); |
1196 | 1205 | ||
1197 | vector = cfg->vector; | 1206 | vector = cfg->vector; |
1198 | for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) | 1207 | for_each_cpu(cpu, cfg->domain) |
1199 | per_cpu(vector_irq, cpu)[vector] = -1; | 1208 | per_cpu(vector_irq, cpu)[vector] = -1; |
1200 | 1209 | ||
1201 | cfg->vector = 0; | 1210 | cfg->vector = 0; |
@@ -1203,7 +1212,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) | |||
1203 | 1212 | ||
1204 | if (likely(!cfg->move_in_progress)) | 1213 | if (likely(!cfg->move_in_progress)) |
1205 | return; | 1214 | return; |
1206 | for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { | 1215 | for_each_cpu(cpu, cfg->old_domain) { |
1207 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; | 1216 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; |
1208 | vector++) { | 1217 | vector++) { |
1209 | if (per_cpu(vector_irq, cpu)[vector] != irq) | 1218 | if (per_cpu(vector_irq, cpu)[vector] != irq) |
@@ -1346,18 +1355,18 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, | |||
1346 | 1355 | ||
1347 | if (!IO_APIC_IRQ(irq)) | 1356 | if (!IO_APIC_IRQ(irq)) |
1348 | return; | 1357 | return; |
1349 | /* | ||
1350 | * For legacy irqs, cfg->domain starts with cpu 0 for legacy | ||
1351 | * controllers like 8259. Now that IO-APIC can handle this irq, update | ||
1352 | * the cfg->domain. | ||
1353 | */ | ||
1354 | if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) | ||
1355 | apic->vector_allocation_domain(0, cfg->domain); | ||
1356 | 1358 | ||
1357 | if (assign_irq_vector(irq, cfg, apic->target_cpus())) | 1359 | if (assign_irq_vector(irq, cfg, apic->target_cpus())) |
1358 | return; | 1360 | return; |
1359 | 1361 | ||
1360 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); | 1362 | if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(), |
1363 | &dest)) { | ||
1364 | pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n", | ||
1365 | mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); | ||
1366 | __clear_irq_vector(irq, cfg); | ||
1367 | |||
1368 | return; | ||
1369 | } | ||
1361 | 1370 | ||
1362 | apic_printk(APIC_VERBOSE,KERN_DEBUG | 1371 | apic_printk(APIC_VERBOSE,KERN_DEBUG |
1363 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " | 1372 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " |
@@ -1366,7 +1375,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, | |||
1366 | cfg->vector, irq, attr->trigger, attr->polarity, dest); | 1375 | cfg->vector, irq, attr->trigger, attr->polarity, dest); |
1367 | 1376 | ||
1368 | if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { | 1377 | if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { |
1369 | pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", | 1378 | pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", |
1370 | mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); | 1379 | mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); |
1371 | __clear_irq_vector(irq, cfg); | 1380 | __clear_irq_vector(irq, cfg); |
1372 | 1381 | ||
@@ -1469,9 +1478,10 @@ void setup_IO_APIC_irq_extra(u32 gsi) | |||
1469 | * Set up the timer pin, possibly with the 8259A-master behind. | 1478 | * Set up the timer pin, possibly with the 8259A-master behind. |
1470 | */ | 1479 | */ |
1471 | static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, | 1480 | static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, |
1472 | unsigned int pin, int vector) | 1481 | unsigned int pin, int vector) |
1473 | { | 1482 | { |
1474 | struct IO_APIC_route_entry entry; | 1483 | struct IO_APIC_route_entry entry; |
1484 | unsigned int dest; | ||
1475 | 1485 | ||
1476 | if (irq_remapping_enabled) | 1486 | if (irq_remapping_enabled) |
1477 | return; | 1487 | return; |
@@ -1482,9 +1492,13 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, | |||
1482 | * We use logical delivery to get the timer IRQ | 1492 | * We use logical delivery to get the timer IRQ |
1483 | * to the first CPU. | 1493 | * to the first CPU. |
1484 | */ | 1494 | */ |
1495 | if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(), | ||
1496 | apic->target_cpus(), &dest))) | ||
1497 | dest = BAD_APICID; | ||
1498 | |||
1485 | entry.dest_mode = apic->irq_dest_mode; | 1499 | entry.dest_mode = apic->irq_dest_mode; |
1486 | entry.mask = 0; /* don't mask IRQ for edge */ | 1500 | entry.mask = 0; /* don't mask IRQ for edge */ |
1487 | entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); | 1501 | entry.dest = dest; |
1488 | entry.delivery_mode = apic->irq_delivery_mode; | 1502 | entry.delivery_mode = apic->irq_delivery_mode; |
1489 | entry.polarity = 0; | 1503 | entry.polarity = 0; |
1490 | entry.trigger = 0; | 1504 | entry.trigger = 0; |
@@ -1521,7 +1535,6 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) | |||
1521 | reg_03.raw = io_apic_read(ioapic_idx, 3); | 1535 | reg_03.raw = io_apic_read(ioapic_idx, 3); |
1522 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 1536 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
1523 | 1537 | ||
1524 | printk("\n"); | ||
1525 | printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx)); | 1538 | printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx)); |
1526 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | 1539 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); |
1527 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | 1540 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); |
@@ -1578,7 +1591,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) | |||
1578 | i, | 1591 | i, |
1579 | ir_entry->index | 1592 | ir_entry->index |
1580 | ); | 1593 | ); |
1581 | printk("%1d %1d %1d %1d %1d " | 1594 | pr_cont("%1d %1d %1d %1d %1d " |
1582 | "%1d %1d %X %02X\n", | 1595 | "%1d %1d %X %02X\n", |
1583 | ir_entry->format, | 1596 | ir_entry->format, |
1584 | ir_entry->mask, | 1597 | ir_entry->mask, |
@@ -1598,7 +1611,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) | |||
1598 | i, | 1611 | i, |
1599 | entry.dest | 1612 | entry.dest |
1600 | ); | 1613 | ); |
1601 | printk("%1d %1d %1d %1d %1d " | 1614 | pr_cont("%1d %1d %1d %1d %1d " |
1602 | "%1d %1d %02X\n", | 1615 | "%1d %1d %02X\n", |
1603 | entry.mask, | 1616 | entry.mask, |
1604 | entry.trigger, | 1617 | entry.trigger, |
@@ -1651,8 +1664,8 @@ __apicdebuginit(void) print_IO_APICs(void) | |||
1651 | continue; | 1664 | continue; |
1652 | printk(KERN_DEBUG "IRQ%d ", irq); | 1665 | printk(KERN_DEBUG "IRQ%d ", irq); |
1653 | for_each_irq_pin(entry, cfg->irq_2_pin) | 1666 | for_each_irq_pin(entry, cfg->irq_2_pin) |
1654 | printk("-> %d:%d", entry->apic, entry->pin); | 1667 | pr_cont("-> %d:%d", entry->apic, entry->pin); |
1655 | printk("\n"); | 1668 | pr_cont("\n"); |
1656 | } | 1669 | } |
1657 | 1670 | ||
1658 | printk(KERN_INFO ".................................... done.\n"); | 1671 | printk(KERN_INFO ".................................... done.\n"); |
@@ -1665,9 +1678,9 @@ __apicdebuginit(void) print_APIC_field(int base) | |||
1665 | printk(KERN_DEBUG); | 1678 | printk(KERN_DEBUG); |
1666 | 1679 | ||
1667 | for (i = 0; i < 8; i++) | 1680 | for (i = 0; i < 8; i++) |
1668 | printk(KERN_CONT "%08x", apic_read(base + i*0x10)); | 1681 | pr_cont("%08x", apic_read(base + i*0x10)); |
1669 | 1682 | ||
1670 | printk(KERN_CONT "\n"); | 1683 | pr_cont("\n"); |
1671 | } | 1684 | } |
1672 | 1685 | ||
1673 | __apicdebuginit(void) print_local_APIC(void *dummy) | 1686 | __apicdebuginit(void) print_local_APIC(void *dummy) |
@@ -1769,7 +1782,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy) | |||
1769 | printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v); | 1782 | printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v); |
1770 | } | 1783 | } |
1771 | } | 1784 | } |
1772 | printk("\n"); | 1785 | pr_cont("\n"); |
1773 | } | 1786 | } |
1774 | 1787 | ||
1775 | __apicdebuginit(void) print_local_APICs(int maxcpu) | 1788 | __apicdebuginit(void) print_local_APICs(int maxcpu) |
@@ -2065,7 +2078,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) | |||
2065 | reg_00.raw = io_apic_read(ioapic_idx, 0); | 2078 | reg_00.raw = io_apic_read(ioapic_idx, 0); |
2066 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 2079 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2067 | if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) | 2080 | if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) |
2068 | printk("could not set ID!\n"); | 2081 | pr_cont("could not set ID!\n"); |
2069 | else | 2082 | else |
2070 | apic_printk(APIC_VERBOSE, " ok.\n"); | 2083 | apic_printk(APIC_VERBOSE, " ok.\n"); |
2071 | } | 2084 | } |
@@ -2210,72 +2223,6 @@ void send_cleanup_vector(struct irq_cfg *cfg) | |||
2210 | cfg->move_in_progress = 0; | 2223 | cfg->move_in_progress = 0; |
2211 | } | 2224 | } |
2212 | 2225 | ||
2213 | static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) | ||
2214 | { | ||
2215 | int apic, pin; | ||
2216 | struct irq_pin_list *entry; | ||
2217 | u8 vector = cfg->vector; | ||
2218 | |||
2219 | for_each_irq_pin(entry, cfg->irq_2_pin) { | ||
2220 | unsigned int reg; | ||
2221 | |||
2222 | apic = entry->apic; | ||
2223 | pin = entry->pin; | ||
2224 | /* | ||
2225 | * With interrupt-remapping, destination information comes | ||
2226 | * from interrupt-remapping table entry. | ||
2227 | */ | ||
2228 | if (!irq_remapped(cfg)) | ||
2229 | io_apic_write(apic, 0x11 + pin*2, dest); | ||
2230 | reg = io_apic_read(apic, 0x10 + pin*2); | ||
2231 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; | ||
2232 | reg |= vector; | ||
2233 | io_apic_modify(apic, 0x10 + pin*2, reg); | ||
2234 | } | ||
2235 | } | ||
2236 | |||
2237 | /* | ||
2238 | * Either sets data->affinity to a valid value, and returns | ||
2239 | * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and | ||
2240 | * leaves data->affinity untouched. | ||
2241 | */ | ||
2242 | int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, | ||
2243 | unsigned int *dest_id) | ||
2244 | { | ||
2245 | struct irq_cfg *cfg = data->chip_data; | ||
2246 | |||
2247 | if (!cpumask_intersects(mask, cpu_online_mask)) | ||
2248 | return -1; | ||
2249 | |||
2250 | if (assign_irq_vector(data->irq, data->chip_data, mask)) | ||
2251 | return -1; | ||
2252 | |||
2253 | cpumask_copy(data->affinity, mask); | ||
2254 | |||
2255 | *dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain); | ||
2256 | return 0; | ||
2257 | } | ||
2258 | |||
2259 | static int | ||
2260 | ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, | ||
2261 | bool force) | ||
2262 | { | ||
2263 | unsigned int dest, irq = data->irq; | ||
2264 | unsigned long flags; | ||
2265 | int ret; | ||
2266 | |||
2267 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
2268 | ret = __ioapic_set_affinity(data, mask, &dest); | ||
2269 | if (!ret) { | ||
2270 | /* Only the high 8 bits are valid. */ | ||
2271 | dest = SET_APIC_LOGICAL_ID(dest); | ||
2272 | __target_IO_APIC_irq(irq, dest, data->chip_data); | ||
2273 | ret = IRQ_SET_MASK_OK_NOCOPY; | ||
2274 | } | ||
2275 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2276 | return ret; | ||
2277 | } | ||
2278 | |||
2279 | asmlinkage void smp_irq_move_cleanup_interrupt(void) | 2226 | asmlinkage void smp_irq_move_cleanup_interrupt(void) |
2280 | { | 2227 | { |
2281 | unsigned vector, me; | 2228 | unsigned vector, me; |
@@ -2363,6 +2310,87 @@ void irq_force_complete_move(int irq) | |||
2363 | static inline void irq_complete_move(struct irq_cfg *cfg) { } | 2310 | static inline void irq_complete_move(struct irq_cfg *cfg) { } |
2364 | #endif | 2311 | #endif |
2365 | 2312 | ||
2313 | static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) | ||
2314 | { | ||
2315 | int apic, pin; | ||
2316 | struct irq_pin_list *entry; | ||
2317 | u8 vector = cfg->vector; | ||
2318 | |||
2319 | for_each_irq_pin(entry, cfg->irq_2_pin) { | ||
2320 | unsigned int reg; | ||
2321 | |||
2322 | apic = entry->apic; | ||
2323 | pin = entry->pin; | ||
2324 | /* | ||
2325 | * With interrupt-remapping, destination information comes | ||
2326 | * from interrupt-remapping table entry. | ||
2327 | */ | ||
2328 | if (!irq_remapped(cfg)) | ||
2329 | io_apic_write(apic, 0x11 + pin*2, dest); | ||
2330 | reg = io_apic_read(apic, 0x10 + pin*2); | ||
2331 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; | ||
2332 | reg |= vector; | ||
2333 | io_apic_modify(apic, 0x10 + pin*2, reg); | ||
2334 | } | ||
2335 | } | ||
2336 | |||
2337 | /* | ||
2338 | * Either sets data->affinity to a valid value, and returns | ||
2339 | * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and | ||
2340 | * leaves data->affinity untouched. | ||
2341 | */ | ||
2342 | int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, | ||
2343 | unsigned int *dest_id) | ||
2344 | { | ||
2345 | struct irq_cfg *cfg = data->chip_data; | ||
2346 | unsigned int irq = data->irq; | ||
2347 | int err; | ||
2348 | |||
2349 | if (!config_enabled(CONFIG_SMP)) | ||
2350 | return -1; | ||
2351 | |||
2352 | if (!cpumask_intersects(mask, cpu_online_mask)) | ||
2353 | return -EINVAL; | ||
2354 | |||
2355 | err = assign_irq_vector(irq, cfg, mask); | ||
2356 | if (err) | ||
2357 | return err; | ||
2358 | |||
2359 | err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id); | ||
2360 | if (err) { | ||
2361 | if (assign_irq_vector(irq, cfg, data->affinity)) | ||
2362 | pr_err("Failed to recover vector for irq %d\n", irq); | ||
2363 | return err; | ||
2364 | } | ||
2365 | |||
2366 | cpumask_copy(data->affinity, mask); | ||
2367 | |||
2368 | return 0; | ||
2369 | } | ||
2370 | |||
2371 | static int | ||
2372 | ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, | ||
2373 | bool force) | ||
2374 | { | ||
2375 | unsigned int dest, irq = data->irq; | ||
2376 | unsigned long flags; | ||
2377 | int ret; | ||
2378 | |||
2379 | if (!config_enabled(CONFIG_SMP)) | ||
2380 | return -1; | ||
2381 | |||
2382 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
2383 | ret = __ioapic_set_affinity(data, mask, &dest); | ||
2384 | if (!ret) { | ||
2385 | /* Only the high 8 bits are valid. */ | ||
2386 | dest = SET_APIC_LOGICAL_ID(dest); | ||
2387 | __target_IO_APIC_irq(irq, dest, data->chip_data); | ||
2388 | ret = IRQ_SET_MASK_OK_NOCOPY; | ||
2389 | } | ||
2390 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2391 | return ret; | ||
2392 | } | ||
2393 | |||
2366 | static void ack_apic_edge(struct irq_data *data) | 2394 | static void ack_apic_edge(struct irq_data *data) |
2367 | { | 2395 | { |
2368 | irq_complete_move(data->chip_data); | 2396 | irq_complete_move(data->chip_data); |
@@ -2542,9 +2570,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip) | |||
2542 | chip->irq_ack = ir_ack_apic_edge; | 2570 | chip->irq_ack = ir_ack_apic_edge; |
2543 | chip->irq_eoi = ir_ack_apic_level; | 2571 | chip->irq_eoi = ir_ack_apic_level; |
2544 | 2572 | ||
2545 | #ifdef CONFIG_SMP | ||
2546 | chip->irq_set_affinity = set_remapped_irq_affinity; | 2573 | chip->irq_set_affinity = set_remapped_irq_affinity; |
2547 | #endif | ||
2548 | } | 2574 | } |
2549 | #endif /* CONFIG_IRQ_REMAP */ | 2575 | #endif /* CONFIG_IRQ_REMAP */ |
2550 | 2576 | ||
@@ -2555,9 +2581,7 @@ static struct irq_chip ioapic_chip __read_mostly = { | |||
2555 | .irq_unmask = unmask_ioapic_irq, | 2581 | .irq_unmask = unmask_ioapic_irq, |
2556 | .irq_ack = ack_apic_edge, | 2582 | .irq_ack = ack_apic_edge, |
2557 | .irq_eoi = ack_apic_level, | 2583 | .irq_eoi = ack_apic_level, |
2558 | #ifdef CONFIG_SMP | ||
2559 | .irq_set_affinity = ioapic_set_affinity, | 2584 | .irq_set_affinity = ioapic_set_affinity, |
2560 | #endif | ||
2561 | .irq_retrigger = ioapic_retrigger_irq, | 2585 | .irq_retrigger = ioapic_retrigger_irq, |
2562 | }; | 2586 | }; |
2563 | 2587 | ||
@@ -3039,7 +3063,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, | |||
3039 | if (err) | 3063 | if (err) |
3040 | return err; | 3064 | return err; |
3041 | 3065 | ||
3042 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); | 3066 | err = apic->cpu_mask_to_apicid_and(cfg->domain, |
3067 | apic->target_cpus(), &dest); | ||
3068 | if (err) | ||
3069 | return err; | ||
3043 | 3070 | ||
3044 | if (irq_remapped(cfg)) { | 3071 | if (irq_remapped(cfg)) { |
3045 | compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); | 3072 | compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); |
@@ -3073,7 +3100,6 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, | |||
3073 | return err; | 3100 | return err; |
3074 | } | 3101 | } |
3075 | 3102 | ||
3076 | #ifdef CONFIG_SMP | ||
3077 | static int | 3103 | static int |
3078 | msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) | 3104 | msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) |
3079 | { | 3105 | { |
@@ -3095,7 +3121,6 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) | |||
3095 | 3121 | ||
3096 | return IRQ_SET_MASK_OK_NOCOPY; | 3122 | return IRQ_SET_MASK_OK_NOCOPY; |
3097 | } | 3123 | } |
3098 | #endif /* CONFIG_SMP */ | ||
3099 | 3124 | ||
3100 | /* | 3125 | /* |
3101 | * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, | 3126 | * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, |
@@ -3106,9 +3131,7 @@ static struct irq_chip msi_chip = { | |||
3106 | .irq_unmask = unmask_msi_irq, | 3131 | .irq_unmask = unmask_msi_irq, |
3107 | .irq_mask = mask_msi_irq, | 3132 | .irq_mask = mask_msi_irq, |
3108 | .irq_ack = ack_apic_edge, | 3133 | .irq_ack = ack_apic_edge, |
3109 | #ifdef CONFIG_SMP | ||
3110 | .irq_set_affinity = msi_set_affinity, | 3134 | .irq_set_affinity = msi_set_affinity, |
3111 | #endif | ||
3112 | .irq_retrigger = ioapic_retrigger_irq, | 3135 | .irq_retrigger = ioapic_retrigger_irq, |
3113 | }; | 3136 | }; |
3114 | 3137 | ||
@@ -3193,7 +3216,6 @@ void native_teardown_msi_irq(unsigned int irq) | |||
3193 | } | 3216 | } |
3194 | 3217 | ||
3195 | #ifdef CONFIG_DMAR_TABLE | 3218 | #ifdef CONFIG_DMAR_TABLE |
3196 | #ifdef CONFIG_SMP | ||
3197 | static int | 3219 | static int |
3198 | dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, | 3220 | dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, |
3199 | bool force) | 3221 | bool force) |
@@ -3218,16 +3240,12 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, | |||
3218 | return IRQ_SET_MASK_OK_NOCOPY; | 3240 | return IRQ_SET_MASK_OK_NOCOPY; |
3219 | } | 3241 | } |
3220 | 3242 | ||
3221 | #endif /* CONFIG_SMP */ | ||
3222 | |||
3223 | static struct irq_chip dmar_msi_type = { | 3243 | static struct irq_chip dmar_msi_type = { |
3224 | .name = "DMAR_MSI", | 3244 | .name = "DMAR_MSI", |
3225 | .irq_unmask = dmar_msi_unmask, | 3245 | .irq_unmask = dmar_msi_unmask, |
3226 | .irq_mask = dmar_msi_mask, | 3246 | .irq_mask = dmar_msi_mask, |
3227 | .irq_ack = ack_apic_edge, | 3247 | .irq_ack = ack_apic_edge, |
3228 | #ifdef CONFIG_SMP | ||
3229 | .irq_set_affinity = dmar_msi_set_affinity, | 3248 | .irq_set_affinity = dmar_msi_set_affinity, |
3230 | #endif | ||
3231 | .irq_retrigger = ioapic_retrigger_irq, | 3249 | .irq_retrigger = ioapic_retrigger_irq, |
3232 | }; | 3250 | }; |
3233 | 3251 | ||
@@ -3248,7 +3266,6 @@ int arch_setup_dmar_msi(unsigned int irq) | |||
3248 | 3266 | ||
3249 | #ifdef CONFIG_HPET_TIMER | 3267 | #ifdef CONFIG_HPET_TIMER |
3250 | 3268 | ||
3251 | #ifdef CONFIG_SMP | ||
3252 | static int hpet_msi_set_affinity(struct irq_data *data, | 3269 | static int hpet_msi_set_affinity(struct irq_data *data, |
3253 | const struct cpumask *mask, bool force) | 3270 | const struct cpumask *mask, bool force) |
3254 | { | 3271 | { |
@@ -3271,16 +3288,12 @@ static int hpet_msi_set_affinity(struct irq_data *data, | |||
3271 | return IRQ_SET_MASK_OK_NOCOPY; | 3288 | return IRQ_SET_MASK_OK_NOCOPY; |
3272 | } | 3289 | } |
3273 | 3290 | ||
3274 | #endif /* CONFIG_SMP */ | ||
3275 | |||
3276 | static struct irq_chip hpet_msi_type = { | 3291 | static struct irq_chip hpet_msi_type = { |
3277 | .name = "HPET_MSI", | 3292 | .name = "HPET_MSI", |
3278 | .irq_unmask = hpet_msi_unmask, | 3293 | .irq_unmask = hpet_msi_unmask, |
3279 | .irq_mask = hpet_msi_mask, | 3294 | .irq_mask = hpet_msi_mask, |
3280 | .irq_ack = ack_apic_edge, | 3295 | .irq_ack = ack_apic_edge, |
3281 | #ifdef CONFIG_SMP | ||
3282 | .irq_set_affinity = hpet_msi_set_affinity, | 3296 | .irq_set_affinity = hpet_msi_set_affinity, |
3283 | #endif | ||
3284 | .irq_retrigger = ioapic_retrigger_irq, | 3297 | .irq_retrigger = ioapic_retrigger_irq, |
3285 | }; | 3298 | }; |
3286 | 3299 | ||
@@ -3315,8 +3328,6 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id) | |||
3315 | */ | 3328 | */ |
3316 | #ifdef CONFIG_HT_IRQ | 3329 | #ifdef CONFIG_HT_IRQ |
3317 | 3330 | ||
3318 | #ifdef CONFIG_SMP | ||
3319 | |||
3320 | static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) | 3331 | static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) |
3321 | { | 3332 | { |
3322 | struct ht_irq_msg msg; | 3333 | struct ht_irq_msg msg; |
@@ -3344,22 +3355,20 @@ ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) | |||
3344 | return IRQ_SET_MASK_OK_NOCOPY; | 3355 | return IRQ_SET_MASK_OK_NOCOPY; |
3345 | } | 3356 | } |
3346 | 3357 | ||
3347 | #endif | ||
3348 | |||
3349 | static struct irq_chip ht_irq_chip = { | 3358 | static struct irq_chip ht_irq_chip = { |
3350 | .name = "PCI-HT", | 3359 | .name = "PCI-HT", |
3351 | .irq_mask = mask_ht_irq, | 3360 | .irq_mask = mask_ht_irq, |
3352 | .irq_unmask = unmask_ht_irq, | 3361 | .irq_unmask = unmask_ht_irq, |
3353 | .irq_ack = ack_apic_edge, | 3362 | .irq_ack = ack_apic_edge, |
3354 | #ifdef CONFIG_SMP | ||
3355 | .irq_set_affinity = ht_set_affinity, | 3363 | .irq_set_affinity = ht_set_affinity, |
3356 | #endif | ||
3357 | .irq_retrigger = ioapic_retrigger_irq, | 3364 | .irq_retrigger = ioapic_retrigger_irq, |
3358 | }; | 3365 | }; |
3359 | 3366 | ||
3360 | int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | 3367 | int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) |
3361 | { | 3368 | { |
3362 | struct irq_cfg *cfg; | 3369 | struct irq_cfg *cfg; |
3370 | struct ht_irq_msg msg; | ||
3371 | unsigned dest; | ||
3363 | int err; | 3372 | int err; |
3364 | 3373 | ||
3365 | if (disable_apic) | 3374 | if (disable_apic) |
@@ -3367,36 +3376,37 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3367 | 3376 | ||
3368 | cfg = irq_cfg(irq); | 3377 | cfg = irq_cfg(irq); |
3369 | err = assign_irq_vector(irq, cfg, apic->target_cpus()); | 3378 | err = assign_irq_vector(irq, cfg, apic->target_cpus()); |
3370 | if (!err) { | 3379 | if (err) |
3371 | struct ht_irq_msg msg; | 3380 | return err; |
3372 | unsigned dest; | 3381 | |
3382 | err = apic->cpu_mask_to_apicid_and(cfg->domain, | ||
3383 | apic->target_cpus(), &dest); | ||
3384 | if (err) | ||
3385 | return err; | ||
3373 | 3386 | ||
3374 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, | 3387 | msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); |
3375 | apic->target_cpus()); | ||
3376 | 3388 | ||
3377 | msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); | 3389 | msg.address_lo = |
3390 | HT_IRQ_LOW_BASE | | ||
3391 | HT_IRQ_LOW_DEST_ID(dest) | | ||
3392 | HT_IRQ_LOW_VECTOR(cfg->vector) | | ||
3393 | ((apic->irq_dest_mode == 0) ? | ||
3394 | HT_IRQ_LOW_DM_PHYSICAL : | ||
3395 | HT_IRQ_LOW_DM_LOGICAL) | | ||
3396 | HT_IRQ_LOW_RQEOI_EDGE | | ||
3397 | ((apic->irq_delivery_mode != dest_LowestPrio) ? | ||
3398 | HT_IRQ_LOW_MT_FIXED : | ||
3399 | HT_IRQ_LOW_MT_ARBITRATED) | | ||
3400 | HT_IRQ_LOW_IRQ_MASKED; | ||
3378 | 3401 | ||
3379 | msg.address_lo = | 3402 | write_ht_irq_msg(irq, &msg); |
3380 | HT_IRQ_LOW_BASE | | ||
3381 | HT_IRQ_LOW_DEST_ID(dest) | | ||
3382 | HT_IRQ_LOW_VECTOR(cfg->vector) | | ||
3383 | ((apic->irq_dest_mode == 0) ? | ||
3384 | HT_IRQ_LOW_DM_PHYSICAL : | ||
3385 | HT_IRQ_LOW_DM_LOGICAL) | | ||
3386 | HT_IRQ_LOW_RQEOI_EDGE | | ||
3387 | ((apic->irq_delivery_mode != dest_LowestPrio) ? | ||
3388 | HT_IRQ_LOW_MT_FIXED : | ||
3389 | HT_IRQ_LOW_MT_ARBITRATED) | | ||
3390 | HT_IRQ_LOW_IRQ_MASKED; | ||
3391 | 3403 | ||
3392 | write_ht_irq_msg(irq, &msg); | 3404 | irq_set_chip_and_handler_name(irq, &ht_irq_chip, |
3405 | handle_edge_irq, "edge"); | ||
3393 | 3406 | ||
3394 | irq_set_chip_and_handler_name(irq, &ht_irq_chip, | 3407 | dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); |
3395 | handle_edge_irq, "edge"); | ||
3396 | 3408 | ||
3397 | dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); | 3409 | return 0; |
3398 | } | ||
3399 | return err; | ||
3400 | } | 3410 | } |
3401 | #endif /* CONFIG_HT_IRQ */ | 3411 | #endif /* CONFIG_HT_IRQ */ |
3402 | 3412 | ||
@@ -3564,7 +3574,8 @@ static int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3564 | 3574 | ||
3565 | /* Sanity check */ | 3575 | /* Sanity check */ |
3566 | if (reg_00.bits.ID != apic_id) { | 3576 | if (reg_00.bits.ID != apic_id) { |
3567 | printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); | 3577 | pr_err("IOAPIC[%d]: Unable to change apic_id!\n", |
3578 | ioapic); | ||
3568 | return -1; | 3579 | return -1; |
3569 | } | 3580 | } |
3570 | } | 3581 | } |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index f00a68cca37..d661ee95cab 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -406,16 +406,13 @@ static inline int numaq_check_phys_apicid_present(int phys_apicid) | |||
406 | * We use physical apicids here, not logical, so just return the default | 406 | * We use physical apicids here, not logical, so just return the default |
407 | * physical broadcast to stop people from breaking us | 407 | * physical broadcast to stop people from breaking us |
408 | */ | 408 | */ |
409 | static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask) | 409 | static int |
410 | { | ||
411 | return 0x0F; | ||
412 | } | ||
413 | |||
414 | static inline unsigned int | ||
415 | numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | 410 | numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, |
416 | const struct cpumask *andmask) | 411 | const struct cpumask *andmask, |
412 | unsigned int *apicid) | ||
417 | { | 413 | { |
418 | return 0x0F; | 414 | *apicid = 0x0F; |
415 | return 0; | ||
419 | } | 416 | } |
420 | 417 | ||
421 | /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ | 418 | /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ |
@@ -441,20 +438,6 @@ static int probe_numaq(void) | |||
441 | return found_numaq; | 438 | return found_numaq; |
442 | } | 439 | } |
443 | 440 | ||
444 | static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
445 | { | ||
446 | /* Careful. Some cpus do not strictly honor the set of cpus | ||
447 | * specified in the interrupt destination when using lowest | ||
448 | * priority interrupt delivery mode. | ||
449 | * | ||
450 | * In particular there was a hyperthreading cpu observed to | ||
451 | * deliver interrupts to the wrong hyperthread when only one | ||
452 | * hyperthread was specified in the interrupt desitination. | ||
453 | */ | ||
454 | cpumask_clear(retmask); | ||
455 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; | ||
456 | } | ||
457 | |||
458 | static void numaq_setup_portio_remap(void) | 441 | static void numaq_setup_portio_remap(void) |
459 | { | 442 | { |
460 | int num_quads = num_online_nodes(); | 443 | int num_quads = num_online_nodes(); |
@@ -491,7 +474,7 @@ static struct apic __refdata apic_numaq = { | |||
491 | .check_apicid_used = numaq_check_apicid_used, | 474 | .check_apicid_used = numaq_check_apicid_used, |
492 | .check_apicid_present = numaq_check_apicid_present, | 475 | .check_apicid_present = numaq_check_apicid_present, |
493 | 476 | ||
494 | .vector_allocation_domain = numaq_vector_allocation_domain, | 477 | .vector_allocation_domain = flat_vector_allocation_domain, |
495 | .init_apic_ldr = numaq_init_apic_ldr, | 478 | .init_apic_ldr = numaq_init_apic_ldr, |
496 | 479 | ||
497 | .ioapic_phys_id_map = numaq_ioapic_phys_id_map, | 480 | .ioapic_phys_id_map = numaq_ioapic_phys_id_map, |
@@ -509,7 +492,6 @@ static struct apic __refdata apic_numaq = { | |||
509 | .set_apic_id = NULL, | 492 | .set_apic_id = NULL, |
510 | .apic_id_mask = 0x0F << 24, | 493 | .apic_id_mask = 0x0F << 24, |
511 | 494 | ||
512 | .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, | ||
513 | .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, | 495 | .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, |
514 | 496 | ||
515 | .send_IPI_mask = numaq_send_IPI_mask, | 497 | .send_IPI_mask = numaq_send_IPI_mask, |
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1b291da09e6..eb35ef9ee63 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
@@ -66,21 +66,6 @@ static void setup_apic_flat_routing(void) | |||
66 | #endif | 66 | #endif |
67 | } | 67 | } |
68 | 68 | ||
69 | static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
70 | { | ||
71 | /* | ||
72 | * Careful. Some cpus do not strictly honor the set of cpus | ||
73 | * specified in the interrupt destination when using lowest | ||
74 | * priority interrupt delivery mode. | ||
75 | * | ||
76 | * In particular there was a hyperthreading cpu observed to | ||
77 | * deliver interrupts to the wrong hyperthread when only one | ||
78 | * hyperthread was specified in the interrupt desitination. | ||
79 | */ | ||
80 | cpumask_clear(retmask); | ||
81 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; | ||
82 | } | ||
83 | |||
84 | /* should be called last. */ | 69 | /* should be called last. */ |
85 | static int probe_default(void) | 70 | static int probe_default(void) |
86 | { | 71 | { |
@@ -105,7 +90,7 @@ static struct apic apic_default = { | |||
105 | .check_apicid_used = default_check_apicid_used, | 90 | .check_apicid_used = default_check_apicid_used, |
106 | .check_apicid_present = default_check_apicid_present, | 91 | .check_apicid_present = default_check_apicid_present, |
107 | 92 | ||
108 | .vector_allocation_domain = default_vector_allocation_domain, | 93 | .vector_allocation_domain = flat_vector_allocation_domain, |
109 | .init_apic_ldr = default_init_apic_ldr, | 94 | .init_apic_ldr = default_init_apic_ldr, |
110 | 95 | ||
111 | .ioapic_phys_id_map = default_ioapic_phys_id_map, | 96 | .ioapic_phys_id_map = default_ioapic_phys_id_map, |
@@ -123,8 +108,7 @@ static struct apic apic_default = { | |||
123 | .set_apic_id = NULL, | 108 | .set_apic_id = NULL, |
124 | .apic_id_mask = 0x0F << 24, | 109 | .apic_id_mask = 0x0F << 24, |
125 | 110 | ||
126 | .cpu_mask_to_apicid = default_cpu_mask_to_apicid, | 111 | .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, |
127 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, | ||
128 | 112 | ||
129 | .send_IPI_mask = default_send_IPI_mask_logical, | 113 | .send_IPI_mask = default_send_IPI_mask_logical, |
130 | .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, | 114 | .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, |
@@ -208,6 +192,9 @@ void __init default_setup_apic_routing(void) | |||
208 | 192 | ||
209 | if (apic->setup_apic_routing) | 193 | if (apic->setup_apic_routing) |
210 | apic->setup_apic_routing(); | 194 | apic->setup_apic_routing(); |
195 | |||
196 | if (x86_platform.apic_post_init) | ||
197 | x86_platform.apic_post_init(); | ||
211 | } | 198 | } |
212 | 199 | ||
213 | void __init generic_apic_probe(void) | 200 | void __init generic_apic_probe(void) |
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 3fe98669892..1793dba7a74 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c | |||
@@ -23,11 +23,6 @@ | |||
23 | #include <asm/ipi.h> | 23 | #include <asm/ipi.h> |
24 | #include <asm/setup.h> | 24 | #include <asm/setup.h> |
25 | 25 | ||
26 | static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) | ||
27 | { | ||
28 | return hard_smp_processor_id() >> index_msb; | ||
29 | } | ||
30 | |||
31 | /* | 26 | /* |
32 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. | 27 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. |
33 | */ | 28 | */ |
@@ -48,10 +43,8 @@ void __init default_setup_apic_routing(void) | |||
48 | } | 43 | } |
49 | } | 44 | } |
50 | 45 | ||
51 | if (is_vsmp_box()) { | 46 | if (x86_platform.apic_post_init) |
52 | /* need to update phys_pkg_id */ | 47 | x86_platform.apic_post_init(); |
53 | apic->phys_pkg_id = apicid_phys_pkg_id; | ||
54 | } | ||
55 | } | 48 | } |
56 | 49 | ||
57 | /* Same for both flat and physical. */ | 50 | /* Same for both flat and physical. */ |
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 659897c0075..77c95c0e1bf 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c | |||
@@ -26,6 +26,8 @@ | |||
26 | * | 26 | * |
27 | */ | 27 | */ |
28 | 28 | ||
29 | #define pr_fmt(fmt) "summit: %s: " fmt, __func__ | ||
30 | |||
29 | #include <linux/mm.h> | 31 | #include <linux/mm.h> |
30 | #include <linux/init.h> | 32 | #include <linux/init.h> |
31 | #include <asm/io.h> | 33 | #include <asm/io.h> |
@@ -235,8 +237,8 @@ static int summit_apic_id_registered(void) | |||
235 | 237 | ||
236 | static void summit_setup_apic_routing(void) | 238 | static void summit_setup_apic_routing(void) |
237 | { | 239 | { |
238 | printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", | 240 | pr_info("Enabling APIC mode: Summit. Using %d I/O APICs\n", |
239 | nr_ioapics); | 241 | nr_ioapics); |
240 | } | 242 | } |
241 | 243 | ||
242 | static int summit_cpu_present_to_apicid(int mps_cpu) | 244 | static int summit_cpu_present_to_apicid(int mps_cpu) |
@@ -263,43 +265,48 @@ static int summit_check_phys_apicid_present(int physical_apicid) | |||
263 | return 1; | 265 | return 1; |
264 | } | 266 | } |
265 | 267 | ||
266 | static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask) | 268 | static inline int |
269 | summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) | ||
267 | { | 270 | { |
268 | unsigned int round = 0; | 271 | unsigned int round = 0; |
269 | int cpu, apicid = 0; | 272 | unsigned int cpu, apicid = 0; |
270 | 273 | ||
271 | /* | 274 | /* |
272 | * The cpus in the mask must all be on the apic cluster. | 275 | * The cpus in the mask must all be on the apic cluster. |
273 | */ | 276 | */ |
274 | for_each_cpu(cpu, cpumask) { | 277 | for_each_cpu_and(cpu, cpumask, cpu_online_mask) { |
275 | int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); | 278 | int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); |
276 | 279 | ||
277 | if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { | 280 | if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { |
278 | printk("%s: Not a valid mask!\n", __func__); | 281 | pr_err("Not a valid mask!\n"); |
279 | return BAD_APICID; | 282 | return -EINVAL; |
280 | } | 283 | } |
281 | apicid |= new_apicid; | 284 | apicid |= new_apicid; |
282 | round++; | 285 | round++; |
283 | } | 286 | } |
284 | return apicid; | 287 | if (!round) |
288 | return -EINVAL; | ||
289 | *dest_id = apicid; | ||
290 | return 0; | ||
285 | } | 291 | } |
286 | 292 | ||
287 | static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, | 293 | static int |
288 | const struct cpumask *andmask) | 294 | summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, |
295 | const struct cpumask *andmask, | ||
296 | unsigned int *apicid) | ||
289 | { | 297 | { |
290 | int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); | ||
291 | cpumask_var_t cpumask; | 298 | cpumask_var_t cpumask; |
299 | *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); | ||
292 | 300 | ||
293 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) | 301 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) |
294 | return apicid; | 302 | return 0; |
295 | 303 | ||
296 | cpumask_and(cpumask, inmask, andmask); | 304 | cpumask_and(cpumask, inmask, andmask); |
297 | cpumask_and(cpumask, cpumask, cpu_online_mask); | 305 | summit_cpu_mask_to_apicid(cpumask, apicid); |
298 | apicid = summit_cpu_mask_to_apicid(cpumask); | ||
299 | 306 | ||
300 | free_cpumask_var(cpumask); | 307 | free_cpumask_var(cpumask); |
301 | 308 | ||
302 | return apicid; | 309 | return 0; |
303 | } | 310 | } |
304 | 311 | ||
305 | /* | 312 | /* |
@@ -320,20 +327,6 @@ static int probe_summit(void) | |||
320 | return 0; | 327 | return 0; |
321 | } | 328 | } |
322 | 329 | ||
323 | static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
324 | { | ||
325 | /* Careful. Some cpus do not strictly honor the set of cpus | ||
326 | * specified in the interrupt destination when using lowest | ||
327 | * priority interrupt delivery mode. | ||
328 | * | ||
329 | * In particular there was a hyperthreading cpu observed to | ||
330 | * deliver interrupts to the wrong hyperthread when only one | ||
331 | * hyperthread was specified in the interrupt desitination. | ||
332 | */ | ||
333 | cpumask_clear(retmask); | ||
334 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; | ||
335 | } | ||
336 | |||
337 | #ifdef CONFIG_X86_SUMMIT_NUMA | 330 | #ifdef CONFIG_X86_SUMMIT_NUMA |
338 | static struct rio_table_hdr *rio_table_hdr; | 331 | static struct rio_table_hdr *rio_table_hdr; |
339 | static struct scal_detail *scal_devs[MAX_NUMNODES]; | 332 | static struct scal_detail *scal_devs[MAX_NUMNODES]; |
@@ -355,7 +348,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) | |||
355 | } | 348 | } |
356 | } | 349 | } |
357 | if (i == rio_table_hdr->num_rio_dev) { | 350 | if (i == rio_table_hdr->num_rio_dev) { |
358 | printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__); | 351 | pr_err("Couldn't find owner Cyclone for Winnipeg!\n"); |
359 | return last_bus; | 352 | return last_bus; |
360 | } | 353 | } |
361 | 354 | ||
@@ -366,7 +359,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) | |||
366 | } | 359 | } |
367 | } | 360 | } |
368 | if (i == rio_table_hdr->num_scal_dev) { | 361 | if (i == rio_table_hdr->num_scal_dev) { |
369 | printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__); | 362 | pr_err("Couldn't find owner Twister for Cyclone!\n"); |
370 | return last_bus; | 363 | return last_bus; |
371 | } | 364 | } |
372 | 365 | ||
@@ -396,7 +389,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) | |||
396 | num_buses = 9; | 389 | num_buses = 9; |
397 | break; | 390 | break; |
398 | default: | 391 | default: |
399 | printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__); | 392 | pr_info("Unsupported Winnipeg type!\n"); |
400 | return last_bus; | 393 | return last_bus; |
401 | } | 394 | } |
402 | 395 | ||
@@ -411,13 +404,15 @@ static int build_detail_arrays(void) | |||
411 | int i, scal_detail_size, rio_detail_size; | 404 | int i, scal_detail_size, rio_detail_size; |
412 | 405 | ||
413 | if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) { | 406 | if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) { |
414 | printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev); | 407 | pr_warn("MAX_NUMNODES too low! Defined as %d, but system has %d nodes\n", |
408 | MAX_NUMNODES, rio_table_hdr->num_scal_dev); | ||
415 | return 0; | 409 | return 0; |
416 | } | 410 | } |
417 | 411 | ||
418 | switch (rio_table_hdr->version) { | 412 | switch (rio_table_hdr->version) { |
419 | default: | 413 | default: |
420 | printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version); | 414 | pr_warn("Invalid Rio Grande Table Version: %d\n", |
415 | rio_table_hdr->version); | ||
421 | return 0; | 416 | return 0; |
422 | case 2: | 417 | case 2: |
423 | scal_detail_size = 11; | 418 | scal_detail_size = 11; |
@@ -462,7 +457,7 @@ void setup_summit(void) | |||
462 | offset = *((unsigned short *)(ptr + offset)); | 457 | offset = *((unsigned short *)(ptr + offset)); |
463 | } | 458 | } |
464 | if (!rio_table_hdr) { | 459 | if (!rio_table_hdr) { |
465 | printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__); | 460 | pr_err("Unable to locate Rio Grande Table in EBDA - bailing!\n"); |
466 | return; | 461 | return; |
467 | } | 462 | } |
468 | 463 | ||
@@ -509,7 +504,7 @@ static struct apic apic_summit = { | |||
509 | .check_apicid_used = summit_check_apicid_used, | 504 | .check_apicid_used = summit_check_apicid_used, |
510 | .check_apicid_present = summit_check_apicid_present, | 505 | .check_apicid_present = summit_check_apicid_present, |
511 | 506 | ||
512 | .vector_allocation_domain = summit_vector_allocation_domain, | 507 | .vector_allocation_domain = flat_vector_allocation_domain, |
513 | .init_apic_ldr = summit_init_apic_ldr, | 508 | .init_apic_ldr = summit_init_apic_ldr, |
514 | 509 | ||
515 | .ioapic_phys_id_map = summit_ioapic_phys_id_map, | 510 | .ioapic_phys_id_map = summit_ioapic_phys_id_map, |
@@ -527,7 +522,6 @@ static struct apic apic_summit = { | |||
527 | .set_apic_id = NULL, | 522 | .set_apic_id = NULL, |
528 | .apic_id_mask = 0xFF << 24, | 523 | .apic_id_mask = 0xFF << 24, |
529 | 524 | ||
530 | .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, | ||
531 | .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, | 525 | .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, |
532 | 526 | ||
533 | .send_IPI_mask = summit_send_IPI_mask, | 527 | .send_IPI_mask = summit_send_IPI_mask, |
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index ff35cff0e1a..c88baa4ff0e 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -81,7 +81,7 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) | |||
81 | } | 81 | } |
82 | 82 | ||
83 | static void | 83 | static void |
84 | x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) | 84 | x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) |
85 | { | 85 | { |
86 | __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); | 86 | __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); |
87 | } | 87 | } |
@@ -96,36 +96,37 @@ static void x2apic_send_IPI_all(int vector) | |||
96 | __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); | 96 | __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); |
97 | } | 97 | } |
98 | 98 | ||
99 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) | 99 | static int |
100 | x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
101 | const struct cpumask *andmask, | ||
102 | unsigned int *apicid) | ||
100 | { | 103 | { |
101 | /* | 104 | u32 dest = 0; |
102 | * We're using fixed IRQ delivery, can only return one logical APIC ID. | 105 | u16 cluster; |
103 | * May as well be the first. | 106 | int i; |
104 | */ | ||
105 | int cpu = cpumask_first(cpumask); | ||
106 | 107 | ||
107 | if ((unsigned)cpu < nr_cpu_ids) | 108 | for_each_cpu_and(i, cpumask, andmask) { |
108 | return per_cpu(x86_cpu_to_logical_apicid, cpu); | 109 | if (!cpumask_test_cpu(i, cpu_online_mask)) |
109 | else | 110 | continue; |
110 | return BAD_APICID; | 111 | dest = per_cpu(x86_cpu_to_logical_apicid, i); |
111 | } | 112 | cluster = x2apic_cluster(i); |
113 | break; | ||
114 | } | ||
112 | 115 | ||
113 | static unsigned int | 116 | if (!dest) |
114 | x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | 117 | return -EINVAL; |
115 | const struct cpumask *andmask) | ||
116 | { | ||
117 | int cpu; | ||
118 | 118 | ||
119 | /* | 119 | for_each_cpu_and(i, cpumask, andmask) { |
120 | * We're using fixed IRQ delivery, can only return one logical APIC ID. | 120 | if (!cpumask_test_cpu(i, cpu_online_mask)) |
121 | * May as well be the first. | 121 | continue; |
122 | */ | 122 | if (cluster != x2apic_cluster(i)) |
123 | for_each_cpu_and(cpu, cpumask, andmask) { | 123 | continue; |
124 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | 124 | dest |= per_cpu(x86_cpu_to_logical_apicid, i); |
125 | break; | ||
126 | } | 125 | } |
127 | 126 | ||
128 | return per_cpu(x86_cpu_to_logical_apicid, cpu); | 127 | *apicid = dest; |
128 | |||
129 | return 0; | ||
129 | } | 130 | } |
130 | 131 | ||
131 | static void init_x2apic_ldr(void) | 132 | static void init_x2apic_ldr(void) |
@@ -208,6 +209,32 @@ static int x2apic_cluster_probe(void) | |||
208 | return 0; | 209 | return 0; |
209 | } | 210 | } |
210 | 211 | ||
212 | static const struct cpumask *x2apic_cluster_target_cpus(void) | ||
213 | { | ||
214 | return cpu_all_mask; | ||
215 | } | ||
216 | |||
217 | /* | ||
218 | * Each x2apic cluster is an allocation domain. | ||
219 | */ | ||
220 | static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask, | ||
221 | const struct cpumask *mask) | ||
222 | { | ||
223 | /* | ||
224 | * To minimize vector pressure, default case of boot, device bringup | ||
225 | * etc will use a single cpu for the interrupt destination. | ||
226 | * | ||
227 | * On explicit migration requests coming from irqbalance etc, | ||
228 | * interrupts will be routed to the x2apic cluster (cluster-id | ||
229 | * derived from the first cpu in the mask) members specified | ||
230 | * in the mask. | ||
231 | */ | ||
232 | if (mask == x2apic_cluster_target_cpus()) | ||
233 | cpumask_copy(retmask, cpumask_of(cpu)); | ||
234 | else | ||
235 | cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); | ||
236 | } | ||
237 | |||
211 | static struct apic apic_x2apic_cluster = { | 238 | static struct apic apic_x2apic_cluster = { |
212 | 239 | ||
213 | .name = "cluster x2apic", | 240 | .name = "cluster x2apic", |
@@ -219,13 +246,13 @@ static struct apic apic_x2apic_cluster = { | |||
219 | .irq_delivery_mode = dest_LowestPrio, | 246 | .irq_delivery_mode = dest_LowestPrio, |
220 | .irq_dest_mode = 1, /* logical */ | 247 | .irq_dest_mode = 1, /* logical */ |
221 | 248 | ||
222 | .target_cpus = x2apic_target_cpus, | 249 | .target_cpus = x2apic_cluster_target_cpus, |
223 | .disable_esr = 0, | 250 | .disable_esr = 0, |
224 | .dest_logical = APIC_DEST_LOGICAL, | 251 | .dest_logical = APIC_DEST_LOGICAL, |
225 | .check_apicid_used = NULL, | 252 | .check_apicid_used = NULL, |
226 | .check_apicid_present = NULL, | 253 | .check_apicid_present = NULL, |
227 | 254 | ||
228 | .vector_allocation_domain = x2apic_vector_allocation_domain, | 255 | .vector_allocation_domain = cluster_vector_allocation_domain, |
229 | .init_apic_ldr = init_x2apic_ldr, | 256 | .init_apic_ldr = init_x2apic_ldr, |
230 | 257 | ||
231 | .ioapic_phys_id_map = NULL, | 258 | .ioapic_phys_id_map = NULL, |
@@ -243,7 +270,6 @@ static struct apic apic_x2apic_cluster = { | |||
243 | .set_apic_id = x2apic_set_apic_id, | 270 | .set_apic_id = x2apic_set_apic_id, |
244 | .apic_id_mask = 0xFFFFFFFFu, | 271 | .apic_id_mask = 0xFFFFFFFFu, |
245 | 272 | ||
246 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | ||
247 | .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, | 273 | .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, |
248 | 274 | ||
249 | .send_IPI_mask = x2apic_send_IPI_mask, | 275 | .send_IPI_mask = x2apic_send_IPI_mask, |
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index c17e982db27..e03a1e180e8 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c | |||
@@ -76,38 +76,6 @@ static void x2apic_send_IPI_all(int vector) | |||
76 | __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); | 76 | __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); |
77 | } | 77 | } |
78 | 78 | ||
79 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) | ||
80 | { | ||
81 | /* | ||
82 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
83 | * May as well be the first. | ||
84 | */ | ||
85 | int cpu = cpumask_first(cpumask); | ||
86 | |||
87 | if ((unsigned)cpu < nr_cpu_ids) | ||
88 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
89 | else | ||
90 | return BAD_APICID; | ||
91 | } | ||
92 | |||
93 | static unsigned int | ||
94 | x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
95 | const struct cpumask *andmask) | ||
96 | { | ||
97 | int cpu; | ||
98 | |||
99 | /* | ||
100 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
101 | * May as well be the first. | ||
102 | */ | ||
103 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
104 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
105 | break; | ||
106 | } | ||
107 | |||
108 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
109 | } | ||
110 | |||
111 | static void init_x2apic_ldr(void) | 79 | static void init_x2apic_ldr(void) |
112 | { | 80 | { |
113 | } | 81 | } |
@@ -131,13 +99,13 @@ static struct apic apic_x2apic_phys = { | |||
131 | .irq_delivery_mode = dest_Fixed, | 99 | .irq_delivery_mode = dest_Fixed, |
132 | .irq_dest_mode = 0, /* physical */ | 100 | .irq_dest_mode = 0, /* physical */ |
133 | 101 | ||
134 | .target_cpus = x2apic_target_cpus, | 102 | .target_cpus = online_target_cpus, |
135 | .disable_esr = 0, | 103 | .disable_esr = 0, |
136 | .dest_logical = 0, | 104 | .dest_logical = 0, |
137 | .check_apicid_used = NULL, | 105 | .check_apicid_used = NULL, |
138 | .check_apicid_present = NULL, | 106 | .check_apicid_present = NULL, |
139 | 107 | ||
140 | .vector_allocation_domain = x2apic_vector_allocation_domain, | 108 | .vector_allocation_domain = default_vector_allocation_domain, |
141 | .init_apic_ldr = init_x2apic_ldr, | 109 | .init_apic_ldr = init_x2apic_ldr, |
142 | 110 | ||
143 | .ioapic_phys_id_map = NULL, | 111 | .ioapic_phys_id_map = NULL, |
@@ -155,8 +123,7 @@ static struct apic apic_x2apic_phys = { | |||
155 | .set_apic_id = x2apic_set_apic_id, | 123 | .set_apic_id = x2apic_set_apic_id, |
156 | .apic_id_mask = 0xFFFFFFFFu, | 124 | .apic_id_mask = 0xFFFFFFFFu, |
157 | 125 | ||
158 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | 126 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, |
159 | .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, | ||
160 | 127 | ||
161 | .send_IPI_mask = x2apic_send_IPI_mask, | 128 | .send_IPI_mask = x2apic_send_IPI_mask, |
162 | .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, | 129 | .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index c6d03f7a440..8cfade9510a 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -185,17 +185,6 @@ EXPORT_SYMBOL_GPL(uv_possible_blades); | |||
185 | unsigned long sn_rtc_cycles_per_second; | 185 | unsigned long sn_rtc_cycles_per_second; |
186 | EXPORT_SYMBOL(sn_rtc_cycles_per_second); | 186 | EXPORT_SYMBOL(sn_rtc_cycles_per_second); |
187 | 187 | ||
188 | static const struct cpumask *uv_target_cpus(void) | ||
189 | { | ||
190 | return cpu_online_mask; | ||
191 | } | ||
192 | |||
193 | static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
194 | { | ||
195 | cpumask_clear(retmask); | ||
196 | cpumask_set_cpu(cpu, retmask); | ||
197 | } | ||
198 | |||
199 | static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) | 188 | static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) |
200 | { | 189 | { |
201 | #ifdef CONFIG_SMP | 190 | #ifdef CONFIG_SMP |
@@ -280,25 +269,12 @@ static void uv_init_apic_ldr(void) | |||
280 | { | 269 | { |
281 | } | 270 | } |
282 | 271 | ||
283 | static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) | 272 | static int |
284 | { | ||
285 | /* | ||
286 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
287 | * May as well be the first. | ||
288 | */ | ||
289 | int cpu = cpumask_first(cpumask); | ||
290 | |||
291 | if ((unsigned)cpu < nr_cpu_ids) | ||
292 | return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; | ||
293 | else | ||
294 | return BAD_APICID; | ||
295 | } | ||
296 | |||
297 | static unsigned int | ||
298 | uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | 273 | uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, |
299 | const struct cpumask *andmask) | 274 | const struct cpumask *andmask, |
275 | unsigned int *apicid) | ||
300 | { | 276 | { |
301 | int cpu; | 277 | int unsigned cpu; |
302 | 278 | ||
303 | /* | 279 | /* |
304 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 280 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
@@ -308,7 +284,13 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
308 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | 284 | if (cpumask_test_cpu(cpu, cpu_online_mask)) |
309 | break; | 285 | break; |
310 | } | 286 | } |
311 | return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; | 287 | |
288 | if (likely(cpu < nr_cpu_ids)) { | ||
289 | *apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; | ||
290 | return 0; | ||
291 | } | ||
292 | |||
293 | return -EINVAL; | ||
312 | } | 294 | } |
313 | 295 | ||
314 | static unsigned int x2apic_get_apic_id(unsigned long x) | 296 | static unsigned int x2apic_get_apic_id(unsigned long x) |
@@ -362,13 +344,13 @@ static struct apic __refdata apic_x2apic_uv_x = { | |||
362 | .irq_delivery_mode = dest_Fixed, | 344 | .irq_delivery_mode = dest_Fixed, |
363 | .irq_dest_mode = 0, /* physical */ | 345 | .irq_dest_mode = 0, /* physical */ |
364 | 346 | ||
365 | .target_cpus = uv_target_cpus, | 347 | .target_cpus = online_target_cpus, |
366 | .disable_esr = 0, | 348 | .disable_esr = 0, |
367 | .dest_logical = APIC_DEST_LOGICAL, | 349 | .dest_logical = APIC_DEST_LOGICAL, |
368 | .check_apicid_used = NULL, | 350 | .check_apicid_used = NULL, |
369 | .check_apicid_present = NULL, | 351 | .check_apicid_present = NULL, |
370 | 352 | ||
371 | .vector_allocation_domain = uv_vector_allocation_domain, | 353 | .vector_allocation_domain = default_vector_allocation_domain, |
372 | .init_apic_ldr = uv_init_apic_ldr, | 354 | .init_apic_ldr = uv_init_apic_ldr, |
373 | 355 | ||
374 | .ioapic_phys_id_map = NULL, | 356 | .ioapic_phys_id_map = NULL, |
@@ -386,7 +368,6 @@ static struct apic __refdata apic_x2apic_uv_x = { | |||
386 | .set_apic_id = set_apic_id, | 368 | .set_apic_id = set_apic_id, |
387 | .apic_id_mask = 0xFFFFFFFFu, | 369 | .apic_id_mask = 0xFFFFFFFFu, |
388 | 370 | ||
389 | .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, | ||
390 | .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, | 371 | .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, |
391 | 372 | ||
392 | .send_IPI_mask = uv_send_IPI_mask, | 373 | .send_IPI_mask = uv_send_IPI_mask, |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 07b0c0db466..d65464e4350 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -201,6 +201,8 @@ | |||
201 | * http://www.microsoft.com/whdc/archive/amp_12.mspx] | 201 | * http://www.microsoft.com/whdc/archive/amp_12.mspx] |
202 | */ | 202 | */ |
203 | 203 | ||
204 | #define pr_fmt(fmt) "apm: " fmt | ||
205 | |||
204 | #include <linux/module.h> | 206 | #include <linux/module.h> |
205 | 207 | ||
206 | #include <linux/poll.h> | 208 | #include <linux/poll.h> |
@@ -485,11 +487,11 @@ static void apm_error(char *str, int err) | |||
485 | if (error_table[i].key == err) | 487 | if (error_table[i].key == err) |
486 | break; | 488 | break; |
487 | if (i < ERROR_COUNT) | 489 | if (i < ERROR_COUNT) |
488 | printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); | 490 | pr_notice("%s: %s\n", str, error_table[i].msg); |
489 | else if (err < 0) | 491 | else if (err < 0) |
490 | printk(KERN_NOTICE "apm: %s: linux error code %i\n", str, err); | 492 | pr_notice("%s: linux error code %i\n", str, err); |
491 | else | 493 | else |
492 | printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", | 494 | pr_notice("%s: unknown error code %#2.2x\n", |
493 | str, err); | 495 | str, err); |
494 | } | 496 | } |
495 | 497 | ||
@@ -1184,7 +1186,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender) | |||
1184 | static int notified; | 1186 | static int notified; |
1185 | 1187 | ||
1186 | if (notified++ == 0) | 1188 | if (notified++ == 0) |
1187 | printk(KERN_ERR "apm: an event queue overflowed\n"); | 1189 | pr_err("an event queue overflowed\n"); |
1188 | if (++as->event_tail >= APM_MAX_EVENTS) | 1190 | if (++as->event_tail >= APM_MAX_EVENTS) |
1189 | as->event_tail = 0; | 1191 | as->event_tail = 0; |
1190 | } | 1192 | } |
@@ -1447,7 +1449,7 @@ static void apm_mainloop(void) | |||
1447 | static int check_apm_user(struct apm_user *as, const char *func) | 1449 | static int check_apm_user(struct apm_user *as, const char *func) |
1448 | { | 1450 | { |
1449 | if (as == NULL || as->magic != APM_BIOS_MAGIC) { | 1451 | if (as == NULL || as->magic != APM_BIOS_MAGIC) { |
1450 | printk(KERN_ERR "apm: %s passed bad filp\n", func); | 1452 | pr_err("%s passed bad filp\n", func); |
1451 | return 1; | 1453 | return 1; |
1452 | } | 1454 | } |
1453 | return 0; | 1455 | return 0; |
@@ -1586,7 +1588,7 @@ static int do_release(struct inode *inode, struct file *filp) | |||
1586 | as1 = as1->next) | 1588 | as1 = as1->next) |
1587 | ; | 1589 | ; |
1588 | if (as1 == NULL) | 1590 | if (as1 == NULL) |
1589 | printk(KERN_ERR "apm: filp not in user list\n"); | 1591 | pr_err("filp not in user list\n"); |
1590 | else | 1592 | else |
1591 | as1->next = as->next; | 1593 | as1->next = as->next; |
1592 | } | 1594 | } |
@@ -1600,11 +1602,9 @@ static int do_open(struct inode *inode, struct file *filp) | |||
1600 | struct apm_user *as; | 1602 | struct apm_user *as; |
1601 | 1603 | ||
1602 | as = kmalloc(sizeof(*as), GFP_KERNEL); | 1604 | as = kmalloc(sizeof(*as), GFP_KERNEL); |
1603 | if (as == NULL) { | 1605 | if (as == NULL) |
1604 | printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", | ||
1605 | sizeof(*as)); | ||
1606 | return -ENOMEM; | 1606 | return -ENOMEM; |
1607 | } | 1607 | |
1608 | as->magic = APM_BIOS_MAGIC; | 1608 | as->magic = APM_BIOS_MAGIC; |
1609 | as->event_tail = as->event_head = 0; | 1609 | as->event_tail = as->event_head = 0; |
1610 | as->suspends_pending = as->standbys_pending = 0; | 1610 | as->suspends_pending = as->standbys_pending = 0; |
@@ -2313,16 +2313,16 @@ static int __init apm_init(void) | |||
2313 | } | 2313 | } |
2314 | 2314 | ||
2315 | if (apm_info.disabled) { | 2315 | if (apm_info.disabled) { |
2316 | printk(KERN_NOTICE "apm: disabled on user request.\n"); | 2316 | pr_notice("disabled on user request.\n"); |
2317 | return -ENODEV; | 2317 | return -ENODEV; |
2318 | } | 2318 | } |
2319 | if ((num_online_cpus() > 1) && !power_off && !smp) { | 2319 | if ((num_online_cpus() > 1) && !power_off && !smp) { |
2320 | printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n"); | 2320 | pr_notice("disabled - APM is not SMP safe.\n"); |
2321 | apm_info.disabled = 1; | 2321 | apm_info.disabled = 1; |
2322 | return -ENODEV; | 2322 | return -ENODEV; |
2323 | } | 2323 | } |
2324 | if (!acpi_disabled) { | 2324 | if (!acpi_disabled) { |
2325 | printk(KERN_NOTICE "apm: overridden by ACPI.\n"); | 2325 | pr_notice("overridden by ACPI.\n"); |
2326 | apm_info.disabled = 1; | 2326 | apm_info.disabled = 1; |
2327 | return -ENODEV; | 2327 | return -ENODEV; |
2328 | } | 2328 | } |
@@ -2356,8 +2356,7 @@ static int __init apm_init(void) | |||
2356 | 2356 | ||
2357 | kapmd_task = kthread_create(apm, NULL, "kapmd"); | 2357 | kapmd_task = kthread_create(apm, NULL, "kapmd"); |
2358 | if (IS_ERR(kapmd_task)) { | 2358 | if (IS_ERR(kapmd_task)) { |
2359 | printk(KERN_ERR "apm: disabled - Unable to start kernel " | 2359 | pr_err("disabled - Unable to start kernel thread\n"); |
2360 | "thread.\n"); | ||
2361 | err = PTR_ERR(kapmd_task); | 2360 | err = PTR_ERR(kapmd_task); |
2362 | kapmd_task = NULL; | 2361 | kapmd_task = NULL; |
2363 | remove_proc_entry("apm", NULL); | 2362 | remove_proc_entry("apm", NULL); |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 6ab6aa2fdfd..d30a6a9a012 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp) | |||
14 | 14 | ||
15 | obj-y := intel_cacheinfo.o scattered.o topology.o | 15 | obj-y := intel_cacheinfo.o scattered.o topology.o |
16 | obj-y += proc.o capflags.o powerflags.o common.o | 16 | obj-y += proc.o capflags.o powerflags.o common.o |
17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o | 17 | obj-y += vmware.o hypervisor.o mshyperv.o |
18 | obj-y += rdrand.o | 18 | obj-y += rdrand.o |
19 | obj-y += match.o | 19 | obj-y += match.o |
20 | 20 | ||
@@ -32,7 +32,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o | |||
32 | 32 | ||
33 | ifdef CONFIG_PERF_EVENTS | 33 | ifdef CONFIG_PERF_EVENTS |
34 | obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o | 34 | obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o |
35 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o | 35 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o |
36 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o | ||
37 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o | ||
36 | endif | 38 | endif |
37 | 39 | ||
38 | obj-$(CONFIG_X86_MCE) += mcheck/ | 40 | obj-$(CONFIG_X86_MCE) += mcheck/ |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 146bb6218ee..9d92e19039f 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -19,6 +19,39 @@ | |||
19 | 19 | ||
20 | #include "cpu.h" | 20 | #include "cpu.h" |
21 | 21 | ||
22 | static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) | ||
23 | { | ||
24 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); | ||
25 | u32 gprs[8] = { 0 }; | ||
26 | int err; | ||
27 | |||
28 | WARN_ONCE((c->x86 != 0xf), "%s should only be used on K8!\n", __func__); | ||
29 | |||
30 | gprs[1] = msr; | ||
31 | gprs[7] = 0x9c5a203a; | ||
32 | |||
33 | err = rdmsr_safe_regs(gprs); | ||
34 | |||
35 | *p = gprs[0] | ((u64)gprs[2] << 32); | ||
36 | |||
37 | return err; | ||
38 | } | ||
39 | |||
40 | static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) | ||
41 | { | ||
42 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); | ||
43 | u32 gprs[8] = { 0 }; | ||
44 | |||
45 | WARN_ONCE((c->x86 != 0xf), "%s should only be used on K8!\n", __func__); | ||
46 | |||
47 | gprs[0] = (u32)val; | ||
48 | gprs[1] = msr; | ||
49 | gprs[2] = val >> 32; | ||
50 | gprs[7] = 0x9c5a203a; | ||
51 | |||
52 | return wrmsr_safe_regs(gprs); | ||
53 | } | ||
54 | |||
22 | #ifdef CONFIG_X86_32 | 55 | #ifdef CONFIG_X86_32 |
23 | /* | 56 | /* |
24 | * B step AMD K6 before B 9730xxxx have hardware bugs that can cause | 57 | * B step AMD K6 before B 9730xxxx have hardware bugs that can cause |
@@ -586,9 +619,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
586 | !cpu_has(c, X86_FEATURE_TOPOEXT)) { | 619 | !cpu_has(c, X86_FEATURE_TOPOEXT)) { |
587 | u64 val; | 620 | u64 val; |
588 | 621 | ||
589 | if (!rdmsrl_amd_safe(0xc0011005, &val)) { | 622 | if (!rdmsrl_safe(0xc0011005, &val)) { |
590 | val |= 1ULL << 54; | 623 | val |= 1ULL << 54; |
591 | wrmsrl_amd_safe(0xc0011005, val); | 624 | wrmsrl_safe(0xc0011005, val); |
592 | rdmsrl(0xc0011005, val); | 625 | rdmsrl(0xc0011005, val); |
593 | if (val & (1ULL << 54)) { | 626 | if (val & (1ULL << 54)) { |
594 | set_cpu_cap(c, X86_FEATURE_TOPOEXT); | 627 | set_cpu_cap(c, X86_FEATURE_TOPOEXT); |
@@ -679,7 +712,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
679 | err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask); | 712 | err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask); |
680 | if (err == 0) { | 713 | if (err == 0) { |
681 | mask |= (1 << 10); | 714 | mask |= (1 << 10); |
682 | checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask); | 715 | wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask); |
683 | } | 716 | } |
684 | } | 717 | } |
685 | 718 | ||
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 46674fbb62b..c97bb7b5a9f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -55,8 +55,8 @@ static void __init check_fpu(void) | |||
55 | 55 | ||
56 | if (!boot_cpu_data.hard_math) { | 56 | if (!boot_cpu_data.hard_math) { |
57 | #ifndef CONFIG_MATH_EMULATION | 57 | #ifndef CONFIG_MATH_EMULATION |
58 | printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); | 58 | pr_emerg("No coprocessor found and no math emulation present\n"); |
59 | printk(KERN_EMERG "Giving up.\n"); | 59 | pr_emerg("Giving up\n"); |
60 | for (;;) ; | 60 | for (;;) ; |
61 | #endif | 61 | #endif |
62 | return; | 62 | return; |
@@ -86,7 +86,7 @@ static void __init check_fpu(void) | |||
86 | 86 | ||
87 | boot_cpu_data.fdiv_bug = fdiv_bug; | 87 | boot_cpu_data.fdiv_bug = fdiv_bug; |
88 | if (boot_cpu_data.fdiv_bug) | 88 | if (boot_cpu_data.fdiv_bug) |
89 | printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n"); | 89 | pr_warn("Hmm, FPU with FDIV bug\n"); |
90 | } | 90 | } |
91 | 91 | ||
92 | static void __init check_hlt(void) | 92 | static void __init check_hlt(void) |
@@ -94,16 +94,16 @@ static void __init check_hlt(void) | |||
94 | if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) | 94 | if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) |
95 | return; | 95 | return; |
96 | 96 | ||
97 | printk(KERN_INFO "Checking 'hlt' instruction... "); | 97 | pr_info("Checking 'hlt' instruction... "); |
98 | if (!boot_cpu_data.hlt_works_ok) { | 98 | if (!boot_cpu_data.hlt_works_ok) { |
99 | printk("disabled\n"); | 99 | pr_cont("disabled\n"); |
100 | return; | 100 | return; |
101 | } | 101 | } |
102 | halt(); | 102 | halt(); |
103 | halt(); | 103 | halt(); |
104 | halt(); | 104 | halt(); |
105 | halt(); | 105 | halt(); |
106 | printk(KERN_CONT "OK.\n"); | 106 | pr_cont("OK\n"); |
107 | } | 107 | } |
108 | 108 | ||
109 | /* | 109 | /* |
@@ -116,7 +116,7 @@ static void __init check_popad(void) | |||
116 | #ifndef CONFIG_X86_POPAD_OK | 116 | #ifndef CONFIG_X86_POPAD_OK |
117 | int res, inp = (int) &res; | 117 | int res, inp = (int) &res; |
118 | 118 | ||
119 | printk(KERN_INFO "Checking for popad bug... "); | 119 | pr_info("Checking for popad bug... "); |
120 | __asm__ __volatile__( | 120 | __asm__ __volatile__( |
121 | "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " | 121 | "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " |
122 | : "=&a" (res) | 122 | : "=&a" (res) |
@@ -127,9 +127,9 @@ static void __init check_popad(void) | |||
127 | * CPU hard. Too bad. | 127 | * CPU hard. Too bad. |
128 | */ | 128 | */ |
129 | if (res != 12345678) | 129 | if (res != 12345678) |
130 | printk(KERN_CONT "Buggy.\n"); | 130 | pr_cont("Buggy\n"); |
131 | else | 131 | else |
132 | printk(KERN_CONT "OK.\n"); | 132 | pr_cont("OK\n"); |
133 | #endif | 133 | #endif |
134 | } | 134 | } |
135 | 135 | ||
@@ -161,7 +161,7 @@ void __init check_bugs(void) | |||
161 | { | 161 | { |
162 | identify_boot_cpu(); | 162 | identify_boot_cpu(); |
163 | #ifndef CONFIG_SMP | 163 | #ifndef CONFIG_SMP |
164 | printk(KERN_INFO "CPU: "); | 164 | pr_info("CPU: "); |
165 | print_cpu_info(&boot_cpu_data); | 165 | print_cpu_info(&boot_cpu_data); |
166 | #endif | 166 | #endif |
167 | check_config(); | 167 | check_config(); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 82f29e70d05..5bbc082c47a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -947,7 +947,7 @@ static void __cpuinit __print_cpu_msr(void) | |||
947 | index_max = msr_range_array[i].max; | 947 | index_max = msr_range_array[i].max; |
948 | 948 | ||
949 | for (index = index_min; index < index_max; index++) { | 949 | for (index = index_min; index < index_max; index++) { |
950 | if (rdmsrl_amd_safe(index, &val)) | 950 | if (rdmsrl_safe(index, &val)) |
951 | continue; | 951 | continue; |
952 | printk(KERN_INFO " MSR%08x: %016llx\n", index, val); | 952 | printk(KERN_INFO " MSR%08x: %016llx\n", index, val); |
953 | } | 953 | } |
@@ -1101,14 +1101,20 @@ int is_debug_stack(unsigned long addr) | |||
1101 | addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); | 1101 | addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); |
1102 | } | 1102 | } |
1103 | 1103 | ||
1104 | static DEFINE_PER_CPU(u32, debug_stack_use_ctr); | ||
1105 | |||
1104 | void debug_stack_set_zero(void) | 1106 | void debug_stack_set_zero(void) |
1105 | { | 1107 | { |
1108 | this_cpu_inc(debug_stack_use_ctr); | ||
1106 | load_idt((const struct desc_ptr *)&nmi_idt_descr); | 1109 | load_idt((const struct desc_ptr *)&nmi_idt_descr); |
1107 | } | 1110 | } |
1108 | 1111 | ||
1109 | void debug_stack_reset(void) | 1112 | void debug_stack_reset(void) |
1110 | { | 1113 | { |
1111 | load_idt((const struct desc_ptr *)&idt_descr); | 1114 | if (WARN_ON(!this_cpu_read(debug_stack_use_ctr))) |
1115 | return; | ||
1116 | if (this_cpu_dec_return(debug_stack_use_ctr) == 0) | ||
1117 | load_idt((const struct desc_ptr *)&idt_descr); | ||
1112 | } | 1118 | } |
1113 | 1119 | ||
1114 | #else /* CONFIG_X86_64 */ | 1120 | #else /* CONFIG_X86_64 */ |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 755f64fb074..a8f8fa9769d 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -37,6 +37,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = | |||
37 | #endif | 37 | #endif |
38 | &x86_hyper_vmware, | 38 | &x86_hyper_vmware, |
39 | &x86_hyper_ms_hyperv, | 39 | &x86_hyper_ms_hyperv, |
40 | #ifdef CONFIG_KVM_GUEST | ||
41 | &x86_hyper_kvm, | ||
42 | #endif | ||
40 | }; | 43 | }; |
41 | 44 | ||
42 | const struct hypervisor_x86 *x86_hyper; | 45 | const struct hypervisor_x86 *x86_hyper; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index 507ea58688e..cd8b166a173 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c | |||
@@ -42,7 +42,8 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) | |||
42 | struct mce m; | 42 | struct mce m; |
43 | 43 | ||
44 | /* Only corrected MC is reported */ | 44 | /* Only corrected MC is reported */ |
45 | if (!corrected) | 45 | if (!corrected || !(mem_err->validation_bits & |
46 | CPER_MEM_VALID_PHYSICAL_ADDRESS)) | ||
46 | return; | 47 | return; |
47 | 48 | ||
48 | mce_setup(&m); | 49 | mce_setup(&m); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 0c82091b165..413c2ced887 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -126,6 +126,16 @@ static struct severity { | |||
126 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), | 126 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), |
127 | USER | 127 | USER |
128 | ), | 128 | ), |
129 | MCESEV( | ||
130 | KEEP, "HT thread notices Action required: instruction fetch error", | ||
131 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), | ||
132 | MCGMASK(MCG_STATUS_EIPV, 0) | ||
133 | ), | ||
134 | MCESEV( | ||
135 | AR, "Action required: instruction fetch error", | ||
136 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), | ||
137 | USER | ||
138 | ), | ||
129 | #endif | 139 | #endif |
130 | MCESEV( | 140 | MCESEV( |
131 | PANIC, "Action required: unknown MCACOD", | 141 | PANIC, "Action required: unknown MCACOD", |
@@ -165,15 +175,19 @@ static struct severity { | |||
165 | }; | 175 | }; |
166 | 176 | ||
167 | /* | 177 | /* |
168 | * If the EIPV bit is set, it means the saved IP is the | 178 | * If mcgstatus indicated that ip/cs on the stack were |
169 | * instruction which caused the MCE. | 179 | * no good, then "m->cs" will be zero and we will have |
180 | * to assume the worst case (IN_KERNEL) as we actually | ||
181 | * have no idea what we were executing when the machine | ||
182 | * check hit. | ||
183 | * If we do have a good "m->cs" (or a faked one in the | ||
184 | * case we were executing in VM86 mode) we can use it to | ||
185 | * distinguish an exception taken in user from from one | ||
186 | * taken in the kernel. | ||
170 | */ | 187 | */ |
171 | static int error_context(struct mce *m) | 188 | static int error_context(struct mce *m) |
172 | { | 189 | { |
173 | if (m->mcgstatus & MCG_STATUS_EIPV) | 190 | return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; |
174 | return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL; | ||
175 | /* Unknown, assume kernel */ | ||
176 | return IN_KERNEL; | ||
177 | } | 191 | } |
178 | 192 | ||
179 | int mce_severity(struct mce *m, int tolerant, char **msg) | 193 | int mce_severity(struct mce *m, int tolerant, char **msg) |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 2afcbd253e1..5e095f873e3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -7,6 +7,9 @@ | |||
7 | * Copyright 2008 Intel Corporation | 7 | * Copyright 2008 Intel Corporation |
8 | * Author: Andi Kleen | 8 | * Author: Andi Kleen |
9 | */ | 9 | */ |
10 | |||
11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
12 | |||
10 | #include <linux/thread_info.h> | 13 | #include <linux/thread_info.h> |
11 | #include <linux/capability.h> | 14 | #include <linux/capability.h> |
12 | #include <linux/miscdevice.h> | 15 | #include <linux/miscdevice.h> |
@@ -57,8 +60,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); | |||
57 | 60 | ||
58 | int mce_disabled __read_mostly; | 61 | int mce_disabled __read_mostly; |
59 | 62 | ||
60 | #define MISC_MCELOG_MINOR 227 | ||
61 | |||
62 | #define SPINUNIT 100 /* 100ns */ | 63 | #define SPINUNIT 100 /* 100ns */ |
63 | 64 | ||
64 | atomic_t mce_entry; | 65 | atomic_t mce_entry; |
@@ -210,7 +211,7 @@ static void drain_mcelog_buffer(void) | |||
210 | cpu_relax(); | 211 | cpu_relax(); |
211 | 212 | ||
212 | if (!m->finished && retries >= 4) { | 213 | if (!m->finished && retries >= 4) { |
213 | pr_err("MCE: skipping error being logged currently!\n"); | 214 | pr_err("skipping error being logged currently!\n"); |
214 | break; | 215 | break; |
215 | } | 216 | } |
216 | } | 217 | } |
@@ -437,6 +438,14 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) | |||
437 | if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) { | 438 | if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) { |
438 | m->ip = regs->ip; | 439 | m->ip = regs->ip; |
439 | m->cs = regs->cs; | 440 | m->cs = regs->cs; |
441 | |||
442 | /* | ||
443 | * When in VM86 mode make the cs look like ring 3 | ||
444 | * always. This is a lie, but it's better than passing | ||
445 | * the additional vm86 bit around everywhere. | ||
446 | */ | ||
447 | if (v8086_mode(regs)) | ||
448 | m->cs |= 3; | ||
440 | } | 449 | } |
441 | /* Use accurate RIP reporting if available. */ | 450 | /* Use accurate RIP reporting if available. */ |
442 | if (rip_msr) | 451 | if (rip_msr) |
@@ -641,16 +650,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll); | |||
641 | * Do a quick check if any of the events requires a panic. | 650 | * Do a quick check if any of the events requires a panic. |
642 | * This decides if we keep the events around or clear them. | 651 | * This decides if we keep the events around or clear them. |
643 | */ | 652 | */ |
644 | static int mce_no_way_out(struct mce *m, char **msg) | 653 | static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp) |
645 | { | 654 | { |
646 | int i; | 655 | int i, ret = 0; |
647 | 656 | ||
648 | for (i = 0; i < banks; i++) { | 657 | for (i = 0; i < banks; i++) { |
649 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); | 658 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
659 | if (m->status & MCI_STATUS_VAL) | ||
660 | __set_bit(i, validp); | ||
650 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) | 661 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) |
651 | return 1; | 662 | ret = 1; |
652 | } | 663 | } |
653 | return 0; | 664 | return ret; |
654 | } | 665 | } |
655 | 666 | ||
656 | /* | 667 | /* |
@@ -1013,6 +1024,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1013 | */ | 1024 | */ |
1014 | int kill_it = 0; | 1025 | int kill_it = 0; |
1015 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); | 1026 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); |
1027 | DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); | ||
1016 | char *msg = "Unknown"; | 1028 | char *msg = "Unknown"; |
1017 | 1029 | ||
1018 | atomic_inc(&mce_entry); | 1030 | atomic_inc(&mce_entry); |
@@ -1027,7 +1039,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1027 | final = &__get_cpu_var(mces_seen); | 1039 | final = &__get_cpu_var(mces_seen); |
1028 | *final = m; | 1040 | *final = m; |
1029 | 1041 | ||
1030 | no_way_out = mce_no_way_out(&m, &msg); | 1042 | memset(valid_banks, 0, sizeof(valid_banks)); |
1043 | no_way_out = mce_no_way_out(&m, &msg, valid_banks); | ||
1031 | 1044 | ||
1032 | barrier(); | 1045 | barrier(); |
1033 | 1046 | ||
@@ -1047,6 +1060,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1047 | order = mce_start(&no_way_out); | 1060 | order = mce_start(&no_way_out); |
1048 | for (i = 0; i < banks; i++) { | 1061 | for (i = 0; i < banks; i++) { |
1049 | __clear_bit(i, toclear); | 1062 | __clear_bit(i, toclear); |
1063 | if (!test_bit(i, valid_banks)) | ||
1064 | continue; | ||
1050 | if (!mce_banks[i].ctl) | 1065 | if (!mce_banks[i].ctl) |
1051 | continue; | 1066 | continue; |
1052 | 1067 | ||
@@ -1153,8 +1168,9 @@ int memory_failure(unsigned long pfn, int vector, int flags) | |||
1153 | { | 1168 | { |
1154 | /* mce_severity() should not hand us an ACTION_REQUIRED error */ | 1169 | /* mce_severity() should not hand us an ACTION_REQUIRED error */ |
1155 | BUG_ON(flags & MF_ACTION_REQUIRED); | 1170 | BUG_ON(flags & MF_ACTION_REQUIRED); |
1156 | printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n" | 1171 | pr_err("Uncorrected memory error in page 0x%lx ignored\n" |
1157 | "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn); | 1172 | "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", |
1173 | pfn); | ||
1158 | 1174 | ||
1159 | return 0; | 1175 | return 0; |
1160 | } | 1176 | } |
@@ -1172,6 +1188,7 @@ void mce_notify_process(void) | |||
1172 | { | 1188 | { |
1173 | unsigned long pfn; | 1189 | unsigned long pfn; |
1174 | struct mce_info *mi = mce_find_info(); | 1190 | struct mce_info *mi = mce_find_info(); |
1191 | int flags = MF_ACTION_REQUIRED; | ||
1175 | 1192 | ||
1176 | if (!mi) | 1193 | if (!mi) |
1177 | mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); | 1194 | mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); |
@@ -1186,8 +1203,9 @@ void mce_notify_process(void) | |||
1186 | * doomed. We still need to mark the page as poisoned and alert any | 1203 | * doomed. We still need to mark the page as poisoned and alert any |
1187 | * other users of the page. | 1204 | * other users of the page. |
1188 | */ | 1205 | */ |
1189 | if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 || | 1206 | if (!mi->restartable) |
1190 | mi->restartable == 0) { | 1207 | flags |= MF_MUST_KILL; |
1208 | if (memory_failure(pfn, MCE_VECTOR, flags) < 0) { | ||
1191 | pr_err("Memory error not recovered"); | 1209 | pr_err("Memory error not recovered"); |
1192 | force_sig(SIGBUS, current); | 1210 | force_sig(SIGBUS, current); |
1193 | } | 1211 | } |
@@ -1237,15 +1255,15 @@ void mce_log_therm_throt_event(__u64 status) | |||
1237 | * poller finds an MCE, poll 2x faster. When the poller finds no more | 1255 | * poller finds an MCE, poll 2x faster. When the poller finds no more |
1238 | * errors, poll 2x slower (up to check_interval seconds). | 1256 | * errors, poll 2x slower (up to check_interval seconds). |
1239 | */ | 1257 | */ |
1240 | static int check_interval = 5 * 60; /* 5 minutes */ | 1258 | static unsigned long check_interval = 5 * 60; /* 5 minutes */ |
1241 | 1259 | ||
1242 | static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ | 1260 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ |
1243 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | 1261 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
1244 | 1262 | ||
1245 | static void mce_start_timer(unsigned long data) | 1263 | static void mce_timer_fn(unsigned long data) |
1246 | { | 1264 | { |
1247 | struct timer_list *t = &per_cpu(mce_timer, data); | 1265 | struct timer_list *t = &__get_cpu_var(mce_timer); |
1248 | int *n; | 1266 | unsigned long iv; |
1249 | 1267 | ||
1250 | WARN_ON(smp_processor_id() != data); | 1268 | WARN_ON(smp_processor_id() != data); |
1251 | 1269 | ||
@@ -1258,13 +1276,14 @@ static void mce_start_timer(unsigned long data) | |||
1258 | * Alert userspace if needed. If we logged an MCE, reduce the | 1276 | * Alert userspace if needed. If we logged an MCE, reduce the |
1259 | * polling interval, otherwise increase the polling interval. | 1277 | * polling interval, otherwise increase the polling interval. |
1260 | */ | 1278 | */ |
1261 | n = &__get_cpu_var(mce_next_interval); | 1279 | iv = __this_cpu_read(mce_next_interval); |
1262 | if (mce_notify_irq()) | 1280 | if (mce_notify_irq()) |
1263 | *n = max(*n/2, HZ/100); | 1281 | iv = max(iv / 2, (unsigned long) HZ/100); |
1264 | else | 1282 | else |
1265 | *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); | 1283 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); |
1284 | __this_cpu_write(mce_next_interval, iv); | ||
1266 | 1285 | ||
1267 | t->expires = jiffies + *n; | 1286 | t->expires = jiffies + iv; |
1268 | add_timer_on(t, smp_processor_id()); | 1287 | add_timer_on(t, smp_processor_id()); |
1269 | } | 1288 | } |
1270 | 1289 | ||
@@ -1343,11 +1362,10 @@ static int __cpuinit __mcheck_cpu_cap_init(void) | |||
1343 | 1362 | ||
1344 | b = cap & MCG_BANKCNT_MASK; | 1363 | b = cap & MCG_BANKCNT_MASK; |
1345 | if (!banks) | 1364 | if (!banks) |
1346 | printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); | 1365 | pr_info("CPU supports %d MCE banks\n", b); |
1347 | 1366 | ||
1348 | if (b > MAX_NR_BANKS) { | 1367 | if (b > MAX_NR_BANKS) { |
1349 | printk(KERN_WARNING | 1368 | pr_warn("Using only %u machine check banks out of %u\n", |
1350 | "MCE: Using only %u machine check banks out of %u\n", | ||
1351 | MAX_NR_BANKS, b); | 1369 | MAX_NR_BANKS, b); |
1352 | b = MAX_NR_BANKS; | 1370 | b = MAX_NR_BANKS; |
1353 | } | 1371 | } |
@@ -1404,7 +1422,7 @@ static void __mcheck_cpu_init_generic(void) | |||
1404 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | 1422 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) |
1405 | { | 1423 | { |
1406 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { | 1424 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { |
1407 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); | 1425 | pr_info("unknown CPU type - not enabling MCE support\n"); |
1408 | return -EOPNOTSUPP; | 1426 | return -EOPNOTSUPP; |
1409 | } | 1427 | } |
1410 | 1428 | ||
@@ -1458,9 +1476,9 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1458 | rdmsrl(msrs[i], val); | 1476 | rdmsrl(msrs[i], val); |
1459 | 1477 | ||
1460 | /* CntP bit set? */ | 1478 | /* CntP bit set? */ |
1461 | if (val & BIT(62)) { | 1479 | if (val & BIT_64(62)) { |
1462 | val &= ~BIT(62); | 1480 | val &= ~BIT_64(62); |
1463 | wrmsrl(msrs[i], val); | 1481 | wrmsrl(msrs[i], val); |
1464 | } | 1482 | } |
1465 | } | 1483 | } |
1466 | 1484 | ||
@@ -1542,24 +1560,24 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | |||
1542 | static void __mcheck_cpu_init_timer(void) | 1560 | static void __mcheck_cpu_init_timer(void) |
1543 | { | 1561 | { |
1544 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1562 | struct timer_list *t = &__get_cpu_var(mce_timer); |
1545 | int *n = &__get_cpu_var(mce_next_interval); | 1563 | unsigned long iv = check_interval * HZ; |
1546 | 1564 | ||
1547 | setup_timer(t, mce_start_timer, smp_processor_id()); | 1565 | setup_timer(t, mce_timer_fn, smp_processor_id()); |
1548 | 1566 | ||
1549 | if (mce_ignore_ce) | 1567 | if (mce_ignore_ce) |
1550 | return; | 1568 | return; |
1551 | 1569 | ||
1552 | *n = check_interval * HZ; | 1570 | __this_cpu_write(mce_next_interval, iv); |
1553 | if (!*n) | 1571 | if (!iv) |
1554 | return; | 1572 | return; |
1555 | t->expires = round_jiffies(jiffies + *n); | 1573 | t->expires = round_jiffies(jiffies + iv); |
1556 | add_timer_on(t, smp_processor_id()); | 1574 | add_timer_on(t, smp_processor_id()); |
1557 | } | 1575 | } |
1558 | 1576 | ||
1559 | /* Handle unconfigured int18 (should never happen) */ | 1577 | /* Handle unconfigured int18 (should never happen) */ |
1560 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | 1578 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) |
1561 | { | 1579 | { |
1562 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", | 1580 | pr_err("CPU#%d: Unexpected int18 (Machine Check)\n", |
1563 | smp_processor_id()); | 1581 | smp_processor_id()); |
1564 | } | 1582 | } |
1565 | 1583 | ||
@@ -1878,8 +1896,7 @@ static int __init mcheck_enable(char *str) | |||
1878 | get_option(&str, &monarch_timeout); | 1896 | get_option(&str, &monarch_timeout); |
1879 | } | 1897 | } |
1880 | } else { | 1898 | } else { |
1881 | printk(KERN_INFO "mce argument %s ignored. Please use /sys\n", | 1899 | pr_info("mce argument %s ignored. Please use /sys\n", str); |
1882 | str); | ||
1883 | return 0; | 1900 | return 0; |
1884 | } | 1901 | } |
1885 | return 1; | 1902 | return 1; |
@@ -2262,7 +2279,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2262 | case CPU_DOWN_FAILED_FROZEN: | 2279 | case CPU_DOWN_FAILED_FROZEN: |
2263 | if (!mce_ignore_ce && check_interval) { | 2280 | if (!mce_ignore_ce && check_interval) { |
2264 | t->expires = round_jiffies(jiffies + | 2281 | t->expires = round_jiffies(jiffies + |
2265 | __get_cpu_var(mce_next_interval)); | 2282 | per_cpu(mce_next_interval, cpu)); |
2266 | add_timer_on(t, cpu); | 2283 | add_timer_on(t, cpu); |
2267 | } | 2284 | } |
2268 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | 2285 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); |
@@ -2327,7 +2344,7 @@ static __init int mcheck_init_device(void) | |||
2327 | 2344 | ||
2328 | return err; | 2345 | return err; |
2329 | } | 2346 | } |
2330 | device_initcall(mcheck_init_device); | 2347 | device_initcall_sync(mcheck_init_device); |
2331 | 2348 | ||
2332 | /* | 2349 | /* |
2333 | * Old style boot options parsing. Only for compatibility. | 2350 | * Old style boot options parsing. Only for compatibility. |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f4873a64f46..c4e916d7737 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -1,15 +1,17 @@ | |||
1 | /* | 1 | /* |
2 | * (c) 2005, 2006 Advanced Micro Devices, Inc. | 2 | * (c) 2005-2012 Advanced Micro Devices, Inc. |
3 | * Your use of this code is subject to the terms and conditions of the | 3 | * Your use of this code is subject to the terms and conditions of the |
4 | * GNU general public license version 2. See "COPYING" or | 4 | * GNU general public license version 2. See "COPYING" or |
5 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
6 | * | 6 | * |
7 | * Written by Jacob Shin - AMD, Inc. | 7 | * Written by Jacob Shin - AMD, Inc. |
8 | * | 8 | * |
9 | * Support : jacob.shin@amd.com | 9 | * Support: borislav.petkov@amd.com |
10 | * | 10 | * |
11 | * April 2006 | 11 | * April 2006 |
12 | * - added support for AMD Family 0x10 processors | 12 | * - added support for AMD Family 0x10 processors |
13 | * May 2012 | ||
14 | * - major scrubbing | ||
13 | * | 15 | * |
14 | * All MC4_MISCi registers are shared between multi-cores | 16 | * All MC4_MISCi registers are shared between multi-cores |
15 | */ | 17 | */ |
@@ -25,6 +27,7 @@ | |||
25 | #include <linux/cpu.h> | 27 | #include <linux/cpu.h> |
26 | #include <linux/smp.h> | 28 | #include <linux/smp.h> |
27 | 29 | ||
30 | #include <asm/amd_nb.h> | ||
28 | #include <asm/apic.h> | 31 | #include <asm/apic.h> |
29 | #include <asm/idle.h> | 32 | #include <asm/idle.h> |
30 | #include <asm/mce.h> | 33 | #include <asm/mce.h> |
@@ -45,23 +48,15 @@ | |||
45 | #define MASK_BLKPTR_LO 0xFF000000 | 48 | #define MASK_BLKPTR_LO 0xFF000000 |
46 | #define MCG_XBLK_ADDR 0xC0000400 | 49 | #define MCG_XBLK_ADDR 0xC0000400 |
47 | 50 | ||
48 | struct threshold_block { | 51 | static const char * const th_names[] = { |
49 | unsigned int block; | 52 | "load_store", |
50 | unsigned int bank; | 53 | "insn_fetch", |
51 | unsigned int cpu; | 54 | "combined_unit", |
52 | u32 address; | 55 | "", |
53 | u16 interrupt_enable; | 56 | "northbridge", |
54 | bool interrupt_capable; | 57 | "execution_unit", |
55 | u16 threshold_limit; | ||
56 | struct kobject kobj; | ||
57 | struct list_head miscj; | ||
58 | }; | 58 | }; |
59 | 59 | ||
60 | struct threshold_bank { | ||
61 | struct kobject *kobj; | ||
62 | struct threshold_block *blocks; | ||
63 | cpumask_var_t cpus; | ||
64 | }; | ||
65 | static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); | 60 | static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); |
66 | 61 | ||
67 | static unsigned char shared_bank[NR_BANKS] = { | 62 | static unsigned char shared_bank[NR_BANKS] = { |
@@ -84,6 +79,26 @@ struct thresh_restart { | |||
84 | u16 old_limit; | 79 | u16 old_limit; |
85 | }; | 80 | }; |
86 | 81 | ||
82 | static const char * const bank4_names(struct threshold_block *b) | ||
83 | { | ||
84 | switch (b->address) { | ||
85 | /* MSR4_MISC0 */ | ||
86 | case 0x00000413: | ||
87 | return "dram"; | ||
88 | |||
89 | case 0xc0000408: | ||
90 | return "ht_links"; | ||
91 | |||
92 | case 0xc0000409: | ||
93 | return "l3_cache"; | ||
94 | |||
95 | default: | ||
96 | WARN(1, "Funny MSR: 0x%08x\n", b->address); | ||
97 | return ""; | ||
98 | } | ||
99 | }; | ||
100 | |||
101 | |||
87 | static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) | 102 | static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) |
88 | { | 103 | { |
89 | /* | 104 | /* |
@@ -224,8 +239,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
224 | 239 | ||
225 | if (!block) | 240 | if (!block) |
226 | per_cpu(bank_map, cpu) |= (1 << bank); | 241 | per_cpu(bank_map, cpu) |= (1 << bank); |
227 | if (shared_bank[bank] && c->cpu_core_id) | ||
228 | break; | ||
229 | 242 | ||
230 | memset(&b, 0, sizeof(b)); | 243 | memset(&b, 0, sizeof(b)); |
231 | b.cpu = cpu; | 244 | b.cpu = cpu; |
@@ -326,7 +339,7 @@ struct threshold_attr { | |||
326 | #define SHOW_FIELDS(name) \ | 339 | #define SHOW_FIELDS(name) \ |
327 | static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ | 340 | static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ |
328 | { \ | 341 | { \ |
329 | return sprintf(buf, "%lx\n", (unsigned long) b->name); \ | 342 | return sprintf(buf, "%lu\n", (unsigned long) b->name); \ |
330 | } | 343 | } |
331 | SHOW_FIELDS(interrupt_enable) | 344 | SHOW_FIELDS(interrupt_enable) |
332 | SHOW_FIELDS(threshold_limit) | 345 | SHOW_FIELDS(threshold_limit) |
@@ -377,38 +390,21 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) | |||
377 | return size; | 390 | return size; |
378 | } | 391 | } |
379 | 392 | ||
380 | struct threshold_block_cross_cpu { | ||
381 | struct threshold_block *tb; | ||
382 | long retval; | ||
383 | }; | ||
384 | |||
385 | static void local_error_count_handler(void *_tbcc) | ||
386 | { | ||
387 | struct threshold_block_cross_cpu *tbcc = _tbcc; | ||
388 | struct threshold_block *b = tbcc->tb; | ||
389 | u32 low, high; | ||
390 | |||
391 | rdmsr(b->address, low, high); | ||
392 | tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); | ||
393 | } | ||
394 | |||
395 | static ssize_t show_error_count(struct threshold_block *b, char *buf) | 393 | static ssize_t show_error_count(struct threshold_block *b, char *buf) |
396 | { | 394 | { |
397 | struct threshold_block_cross_cpu tbcc = { .tb = b, }; | 395 | u32 lo, hi; |
398 | 396 | ||
399 | smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1); | 397 | rdmsr_on_cpu(b->cpu, b->address, &lo, &hi); |
400 | return sprintf(buf, "%lx\n", tbcc.retval); | ||
401 | } | ||
402 | |||
403 | static ssize_t store_error_count(struct threshold_block *b, | ||
404 | const char *buf, size_t count) | ||
405 | { | ||
406 | struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; | ||
407 | 398 | ||
408 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); | 399 | return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) - |
409 | return 1; | 400 | (THRESHOLD_MAX - b->threshold_limit))); |
410 | } | 401 | } |
411 | 402 | ||
403 | static struct threshold_attr error_count = { | ||
404 | .attr = {.name = __stringify(error_count), .mode = 0444 }, | ||
405 | .show = show_error_count, | ||
406 | }; | ||
407 | |||
412 | #define RW_ATTR(val) \ | 408 | #define RW_ATTR(val) \ |
413 | static struct threshold_attr val = { \ | 409 | static struct threshold_attr val = { \ |
414 | .attr = {.name = __stringify(val), .mode = 0644 }, \ | 410 | .attr = {.name = __stringify(val), .mode = 0644 }, \ |
@@ -418,7 +414,6 @@ static struct threshold_attr val = { \ | |||
418 | 414 | ||
419 | RW_ATTR(interrupt_enable); | 415 | RW_ATTR(interrupt_enable); |
420 | RW_ATTR(threshold_limit); | 416 | RW_ATTR(threshold_limit); |
421 | RW_ATTR(error_count); | ||
422 | 417 | ||
423 | static struct attribute *default_attrs[] = { | 418 | static struct attribute *default_attrs[] = { |
424 | &threshold_limit.attr, | 419 | &threshold_limit.attr, |
@@ -517,7 +512,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | |||
517 | 512 | ||
518 | err = kobject_init_and_add(&b->kobj, &threshold_ktype, | 513 | err = kobject_init_and_add(&b->kobj, &threshold_ktype, |
519 | per_cpu(threshold_banks, cpu)[bank]->kobj, | 514 | per_cpu(threshold_banks, cpu)[bank]->kobj, |
520 | "misc%i", block); | 515 | (bank == 4 ? bank4_names(b) : th_names[bank])); |
521 | if (err) | 516 | if (err) |
522 | goto out_free; | 517 | goto out_free; |
523 | recurse: | 518 | recurse: |
@@ -548,98 +543,91 @@ out_free: | |||
548 | return err; | 543 | return err; |
549 | } | 544 | } |
550 | 545 | ||
551 | static __cpuinit long | 546 | static __cpuinit int __threshold_add_blocks(struct threshold_bank *b) |
552 | local_allocate_threshold_blocks(int cpu, unsigned int bank) | ||
553 | { | 547 | { |
554 | return allocate_threshold_blocks(cpu, bank, 0, | 548 | struct list_head *head = &b->blocks->miscj; |
555 | MSR_IA32_MC0_MISC + bank * 4); | 549 | struct threshold_block *pos = NULL; |
550 | struct threshold_block *tmp = NULL; | ||
551 | int err = 0; | ||
552 | |||
553 | err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name); | ||
554 | if (err) | ||
555 | return err; | ||
556 | |||
557 | list_for_each_entry_safe(pos, tmp, head, miscj) { | ||
558 | |||
559 | err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name); | ||
560 | if (err) { | ||
561 | list_for_each_entry_safe_reverse(pos, tmp, head, miscj) | ||
562 | kobject_del(&pos->kobj); | ||
563 | |||
564 | return err; | ||
565 | } | ||
566 | } | ||
567 | return err; | ||
556 | } | 568 | } |
557 | 569 | ||
558 | /* symlinks sibling shared banks to first core. first core owns dir/files. */ | ||
559 | static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | 570 | static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) |
560 | { | 571 | { |
561 | int i, err = 0; | ||
562 | struct threshold_bank *b = NULL; | ||
563 | struct device *dev = per_cpu(mce_device, cpu); | 572 | struct device *dev = per_cpu(mce_device, cpu); |
564 | char name[32]; | 573 | struct amd_northbridge *nb = NULL; |
574 | struct threshold_bank *b = NULL; | ||
575 | const char *name = th_names[bank]; | ||
576 | int err = 0; | ||
565 | 577 | ||
566 | sprintf(name, "threshold_bank%i", bank); | 578 | if (shared_bank[bank]) { |
567 | 579 | ||
568 | #ifdef CONFIG_SMP | 580 | nb = node_to_amd_nb(amd_get_nb_id(cpu)); |
569 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ | 581 | WARN_ON(!nb); |
570 | i = cpumask_first(cpu_llc_shared_mask(cpu)); | ||
571 | 582 | ||
572 | /* first core not up yet */ | 583 | /* threshold descriptor already initialized on this node? */ |
573 | if (cpu_data(i).cpu_core_id) | 584 | if (nb->bank4) { |
574 | goto out; | 585 | /* yes, use it */ |
586 | b = nb->bank4; | ||
587 | err = kobject_add(b->kobj, &dev->kobj, name); | ||
588 | if (err) | ||
589 | goto out; | ||
575 | 590 | ||
576 | /* already linked */ | 591 | per_cpu(threshold_banks, cpu)[bank] = b; |
577 | if (per_cpu(threshold_banks, cpu)[bank]) | 592 | atomic_inc(&b->cpus); |
578 | goto out; | ||
579 | 593 | ||
580 | b = per_cpu(threshold_banks, i)[bank]; | 594 | err = __threshold_add_blocks(b); |
581 | 595 | ||
582 | if (!b) | ||
583 | goto out; | 596 | goto out; |
584 | 597 | } | |
585 | err = sysfs_create_link(&dev->kobj, b->kobj, name); | ||
586 | if (err) | ||
587 | goto out; | ||
588 | |||
589 | cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu)); | ||
590 | per_cpu(threshold_banks, cpu)[bank] = b; | ||
591 | |||
592 | goto out; | ||
593 | } | 598 | } |
594 | #endif | ||
595 | 599 | ||
596 | b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); | 600 | b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); |
597 | if (!b) { | 601 | if (!b) { |
598 | err = -ENOMEM; | 602 | err = -ENOMEM; |
599 | goto out; | 603 | goto out; |
600 | } | 604 | } |
601 | if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) { | ||
602 | kfree(b); | ||
603 | err = -ENOMEM; | ||
604 | goto out; | ||
605 | } | ||
606 | 605 | ||
607 | b->kobj = kobject_create_and_add(name, &dev->kobj); | 606 | b->kobj = kobject_create_and_add(name, &dev->kobj); |
608 | if (!b->kobj) | 607 | if (!b->kobj) { |
608 | err = -EINVAL; | ||
609 | goto out_free; | 609 | goto out_free; |
610 | 610 | } | |
611 | #ifndef CONFIG_SMP | ||
612 | cpumask_setall(b->cpus); | ||
613 | #else | ||
614 | cpumask_set_cpu(cpu, b->cpus); | ||
615 | #endif | ||
616 | 611 | ||
617 | per_cpu(threshold_banks, cpu)[bank] = b; | 612 | per_cpu(threshold_banks, cpu)[bank] = b; |
618 | 613 | ||
619 | err = local_allocate_threshold_blocks(cpu, bank); | 614 | if (shared_bank[bank]) { |
620 | if (err) | 615 | atomic_set(&b->cpus, 1); |
621 | goto out_free; | ||
622 | |||
623 | for_each_cpu(i, b->cpus) { | ||
624 | if (i == cpu) | ||
625 | continue; | ||
626 | |||
627 | dev = per_cpu(mce_device, i); | ||
628 | if (dev) | ||
629 | err = sysfs_create_link(&dev->kobj,b->kobj, name); | ||
630 | if (err) | ||
631 | goto out; | ||
632 | 616 | ||
633 | per_cpu(threshold_banks, i)[bank] = b; | 617 | /* nb is already initialized, see above */ |
618 | WARN_ON(nb->bank4); | ||
619 | nb->bank4 = b; | ||
634 | } | 620 | } |
635 | 621 | ||
636 | goto out; | 622 | err = allocate_threshold_blocks(cpu, bank, 0, |
623 | MSR_IA32_MC0_MISC + bank * 4); | ||
624 | if (!err) | ||
625 | goto out; | ||
637 | 626 | ||
638 | out_free: | 627 | out_free: |
639 | per_cpu(threshold_banks, cpu)[bank] = NULL; | ||
640 | free_cpumask_var(b->cpus); | ||
641 | kfree(b); | 628 | kfree(b); |
642 | out: | 629 | |
630 | out: | ||
643 | return err; | 631 | return err; |
644 | } | 632 | } |
645 | 633 | ||
@@ -660,12 +648,6 @@ static __cpuinit int threshold_create_device(unsigned int cpu) | |||
660 | return err; | 648 | return err; |
661 | } | 649 | } |
662 | 650 | ||
663 | /* | ||
664 | * let's be hotplug friendly. | ||
665 | * in case of multiple core processors, the first core always takes ownership | ||
666 | * of shared sysfs dir/files, and rest of the cores will be symlinked to it. | ||
667 | */ | ||
668 | |||
669 | static void deallocate_threshold_block(unsigned int cpu, | 651 | static void deallocate_threshold_block(unsigned int cpu, |
670 | unsigned int bank) | 652 | unsigned int bank) |
671 | { | 653 | { |
@@ -686,41 +668,42 @@ static void deallocate_threshold_block(unsigned int cpu, | |||
686 | per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; | 668 | per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; |
687 | } | 669 | } |
688 | 670 | ||
671 | static void __threshold_remove_blocks(struct threshold_bank *b) | ||
672 | { | ||
673 | struct threshold_block *pos = NULL; | ||
674 | struct threshold_block *tmp = NULL; | ||
675 | |||
676 | kobject_del(b->kobj); | ||
677 | |||
678 | list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj) | ||
679 | kobject_del(&pos->kobj); | ||
680 | } | ||
681 | |||
689 | static void threshold_remove_bank(unsigned int cpu, int bank) | 682 | static void threshold_remove_bank(unsigned int cpu, int bank) |
690 | { | 683 | { |
684 | struct amd_northbridge *nb; | ||
691 | struct threshold_bank *b; | 685 | struct threshold_bank *b; |
692 | struct device *dev; | ||
693 | char name[32]; | ||
694 | int i = 0; | ||
695 | 686 | ||
696 | b = per_cpu(threshold_banks, cpu)[bank]; | 687 | b = per_cpu(threshold_banks, cpu)[bank]; |
697 | if (!b) | 688 | if (!b) |
698 | return; | 689 | return; |
690 | |||
699 | if (!b->blocks) | 691 | if (!b->blocks) |
700 | goto free_out; | 692 | goto free_out; |
701 | 693 | ||
702 | sprintf(name, "threshold_bank%i", bank); | 694 | if (shared_bank[bank]) { |
703 | 695 | if (!atomic_dec_and_test(&b->cpus)) { | |
704 | #ifdef CONFIG_SMP | 696 | __threshold_remove_blocks(b); |
705 | /* sibling symlink */ | 697 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
706 | if (shared_bank[bank] && b->blocks->cpu != cpu) { | 698 | return; |
707 | dev = per_cpu(mce_device, cpu); | 699 | } else { |
708 | sysfs_remove_link(&dev->kobj, name); | 700 | /* |
709 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 701 | * the last CPU on this node using the shared bank is |
710 | 702 | * going away, remove that bank now. | |
711 | return; | 703 | */ |
712 | } | 704 | nb = node_to_amd_nb(amd_get_nb_id(cpu)); |
713 | #endif | 705 | nb->bank4 = NULL; |
714 | 706 | } | |
715 | /* remove all sibling symlinks before unregistering */ | ||
716 | for_each_cpu(i, b->cpus) { | ||
717 | if (i == cpu) | ||
718 | continue; | ||
719 | |||
720 | dev = per_cpu(mce_device, i); | ||
721 | if (dev) | ||
722 | sysfs_remove_link(&dev->kobj, name); | ||
723 | per_cpu(threshold_banks, i)[bank] = NULL; | ||
724 | } | 707 | } |
725 | 708 | ||
726 | deallocate_threshold_block(cpu, bank); | 709 | deallocate_threshold_block(cpu, bank); |
@@ -728,7 +711,6 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
728 | free_out: | 711 | free_out: |
729 | kobject_del(b->kobj); | 712 | kobject_del(b->kobj); |
730 | kobject_put(b->kobj); | 713 | kobject_put(b->kobj); |
731 | free_cpumask_var(b->cpus); | ||
732 | kfree(b); | 714 | kfree(b); |
733 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 715 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
734 | } | 716 | } |
@@ -777,4 +759,24 @@ static __init int threshold_init_device(void) | |||
777 | 759 | ||
778 | return 0; | 760 | return 0; |
779 | } | 761 | } |
780 | device_initcall(threshold_init_device); | 762 | /* |
763 | * there are 3 funcs which need to be _initcalled in a logic sequence: | ||
764 | * 1. xen_late_init_mcelog | ||
765 | * 2. mcheck_init_device | ||
766 | * 3. threshold_init_device | ||
767 | * | ||
768 | * xen_late_init_mcelog must register xen_mce_chrdev_device before | ||
769 | * native mce_chrdev_device registration if running under xen platform; | ||
770 | * | ||
771 | * mcheck_init_device should be inited before threshold_init_device to | ||
772 | * initialize mce_device, otherwise a NULL ptr dereference will cause panic. | ||
773 | * | ||
774 | * so we use following _initcalls | ||
775 | * 1. device_initcall(xen_late_init_mcelog); | ||
776 | * 2. device_initcall_sync(mcheck_init_device); | ||
777 | * 3. late_initcall(threshold_init_device); | ||
778 | * | ||
779 | * when running under xen, the initcall order is 1,2,3; | ||
780 | * on baremetal, we skip 1 and we do only 2 and 3. | ||
781 | */ | ||
782 | late_initcall(threshold_init_device); | ||
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl index dfea390e160..c7b3fe2d72e 100644 --- a/arch/x86/kernel/cpu/mkcapflags.pl +++ b/arch/x86/kernel/cpu/mkcapflags.pl | |||
@@ -1,4 +1,4 @@ | |||
1 | #!/usr/bin/perl | 1 | #!/usr/bin/perl -w |
2 | # | 2 | # |
3 | # Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h | 3 | # Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h |
4 | # | 4 | # |
@@ -11,22 +11,35 @@ open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; | |||
11 | print OUT "#include <asm/cpufeature.h>\n\n"; | 11 | print OUT "#include <asm/cpufeature.h>\n\n"; |
12 | print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; | 12 | print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; |
13 | 13 | ||
14 | %features = (); | ||
15 | $err = 0; | ||
16 | |||
14 | while (defined($line = <IN>)) { | 17 | while (defined($line = <IN>)) { |
15 | if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) { | 18 | if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) { |
16 | $macro = $1; | 19 | $macro = $1; |
17 | $feature = $2; | 20 | $feature = "\L$2"; |
18 | $tail = $3; | 21 | $tail = $3; |
19 | if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) { | 22 | if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) { |
20 | $feature = $1; | 23 | $feature = "\L$1"; |
21 | } | 24 | } |
22 | 25 | ||
23 | if ($feature ne '') { | 26 | next if ($feature eq ''); |
24 | printf OUT "\t%-32s = \"%s\",\n", | 27 | |
25 | "[$macro]", "\L$feature"; | 28 | if ($features{$feature}++) { |
29 | print STDERR "$in: duplicate feature name: $feature\n"; | ||
30 | $err++; | ||
26 | } | 31 | } |
32 | printf OUT "\t%-32s = \"%s\",\n", "[$macro]", $feature; | ||
27 | } | 33 | } |
28 | } | 34 | } |
29 | print OUT "};\n"; | 35 | print OUT "};\n"; |
30 | 36 | ||
31 | close(IN); | 37 | close(IN); |
32 | close(OUT); | 38 | close(OUT); |
39 | |||
40 | if ($err) { | ||
41 | unlink($out); | ||
42 | exit(1); | ||
43 | } | ||
44 | |||
45 | exit(0); | ||
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index ac140c7be39..35ffda5d072 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -258,15 +258,15 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, | |||
258 | 258 | ||
259 | /* Compute the maximum size with which we can make a range: */ | 259 | /* Compute the maximum size with which we can make a range: */ |
260 | if (range_startk) | 260 | if (range_startk) |
261 | max_align = ffs(range_startk) - 1; | 261 | max_align = __ffs(range_startk); |
262 | else | 262 | else |
263 | max_align = 32; | 263 | max_align = BITS_PER_LONG - 1; |
264 | 264 | ||
265 | align = fls(range_sizek) - 1; | 265 | align = __fls(range_sizek); |
266 | if (align > max_align) | 266 | if (align > max_align) |
267 | align = max_align; | 267 | align = max_align; |
268 | 268 | ||
269 | sizek = 1 << align; | 269 | sizek = 1UL << align; |
270 | if (debug_print) { | 270 | if (debug_print) { |
271 | char start_factor = 'K', size_factor = 'K'; | 271 | char start_factor = 'K', size_factor = 'K'; |
272 | unsigned long start_base, size_base; | 272 | unsigned long start_base, size_base; |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 75772ae6c65..e9fe907cd24 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -361,11 +361,7 @@ static void __init print_mtrr_state(void) | |||
361 | } | 361 | } |
362 | pr_debug("MTRR variable ranges %sabled:\n", | 362 | pr_debug("MTRR variable ranges %sabled:\n", |
363 | mtrr_state.enabled & 2 ? "en" : "dis"); | 363 | mtrr_state.enabled & 2 ? "en" : "dis"); |
364 | if (size_or_mask & 0xffffffffUL) | 364 | high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4; |
365 | high_width = ffs(size_or_mask & 0xffffffffUL) - 1; | ||
366 | else | ||
367 | high_width = ffs(size_or_mask>>32) + 32 - 1; | ||
368 | high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4; | ||
369 | 365 | ||
370 | for (i = 0; i < num_var_ranges; ++i) { | 366 | for (i = 0; i < num_var_ranges; ++i) { |
371 | if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) | 367 | if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e049d6da018..29557aa06dd 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -35,17 +35,6 @@ | |||
35 | 35 | ||
36 | #include "perf_event.h" | 36 | #include "perf_event.h" |
37 | 37 | ||
38 | #if 0 | ||
39 | #undef wrmsrl | ||
40 | #define wrmsrl(msr, val) \ | ||
41 | do { \ | ||
42 | trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ | ||
43 | (unsigned long)(val)); \ | ||
44 | native_write_msr((msr), (u32)((u64)(val)), \ | ||
45 | (u32)((u64)(val) >> 32)); \ | ||
46 | } while (0) | ||
47 | #endif | ||
48 | |||
49 | struct x86_pmu x86_pmu __read_mostly; | 38 | struct x86_pmu x86_pmu __read_mostly; |
50 | 39 | ||
51 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | 40 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { |
@@ -74,7 +63,7 @@ u64 x86_perf_event_update(struct perf_event *event) | |||
74 | int idx = hwc->idx; | 63 | int idx = hwc->idx; |
75 | s64 delta; | 64 | s64 delta; |
76 | 65 | ||
77 | if (idx == X86_PMC_IDX_FIXED_BTS) | 66 | if (idx == INTEL_PMC_IDX_FIXED_BTS) |
78 | return 0; | 67 | return 0; |
79 | 68 | ||
80 | /* | 69 | /* |
@@ -86,7 +75,7 @@ u64 x86_perf_event_update(struct perf_event *event) | |||
86 | */ | 75 | */ |
87 | again: | 76 | again: |
88 | prev_raw_count = local64_read(&hwc->prev_count); | 77 | prev_raw_count = local64_read(&hwc->prev_count); |
89 | rdmsrl(hwc->event_base, new_raw_count); | 78 | rdpmcl(hwc->event_base_rdpmc, new_raw_count); |
90 | 79 | ||
91 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | 80 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
92 | new_raw_count) != prev_raw_count) | 81 | new_raw_count) != prev_raw_count) |
@@ -189,7 +178,7 @@ static void release_pmc_hardware(void) {} | |||
189 | 178 | ||
190 | static bool check_hw_exists(void) | 179 | static bool check_hw_exists(void) |
191 | { | 180 | { |
192 | u64 val, val_new = 0; | 181 | u64 val, val_new = ~0; |
193 | int i, reg, ret = 0; | 182 | int i, reg, ret = 0; |
194 | 183 | ||
195 | /* | 184 | /* |
@@ -222,8 +211,9 @@ static bool check_hw_exists(void) | |||
222 | * that don't trap on the MSR access and always return 0s. | 211 | * that don't trap on the MSR access and always return 0s. |
223 | */ | 212 | */ |
224 | val = 0xabcdUL; | 213 | val = 0xabcdUL; |
225 | ret = checking_wrmsrl(x86_pmu_event_addr(0), val); | 214 | reg = x86_pmu_event_addr(0); |
226 | ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new); | 215 | ret = wrmsrl_safe(reg, val); |
216 | ret |= rdmsrl_safe(reg, &val_new); | ||
227 | if (ret || val != val_new) | 217 | if (ret || val != val_new) |
228 | goto msr_fail; | 218 | goto msr_fail; |
229 | 219 | ||
@@ -240,6 +230,7 @@ bios_fail: | |||
240 | 230 | ||
241 | msr_fail: | 231 | msr_fail: |
242 | printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); | 232 | printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); |
233 | printk(KERN_ERR "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new); | ||
243 | 234 | ||
244 | return false; | 235 | return false; |
245 | } | 236 | } |
@@ -388,7 +379,7 @@ int x86_pmu_hw_config(struct perf_event *event) | |||
388 | int precise = 0; | 379 | int precise = 0; |
389 | 380 | ||
390 | /* Support for constant skid */ | 381 | /* Support for constant skid */ |
391 | if (x86_pmu.pebs_active) { | 382 | if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { |
392 | precise++; | 383 | precise++; |
393 | 384 | ||
394 | /* Support for IP fixup */ | 385 | /* Support for IP fixup */ |
@@ -637,8 +628,8 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) | |||
637 | c = sched->constraints[sched->state.event]; | 628 | c = sched->constraints[sched->state.event]; |
638 | 629 | ||
639 | /* Prefer fixed purpose counters */ | 630 | /* Prefer fixed purpose counters */ |
640 | if (x86_pmu.num_counters_fixed) { | 631 | if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { |
641 | idx = X86_PMC_IDX_FIXED; | 632 | idx = INTEL_PMC_IDX_FIXED; |
642 | for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { | 633 | for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { |
643 | if (!__test_and_set_bit(idx, sched->state.used)) | 634 | if (!__test_and_set_bit(idx, sched->state.used)) |
644 | goto done; | 635 | goto done; |
@@ -646,7 +637,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) | |||
646 | } | 637 | } |
647 | /* Grab the first unused counter starting with idx */ | 638 | /* Grab the first unused counter starting with idx */ |
648 | idx = sched->state.counter; | 639 | idx = sched->state.counter; |
649 | for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) { | 640 | for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) { |
650 | if (!__test_and_set_bit(idx, sched->state.used)) | 641 | if (!__test_and_set_bit(idx, sched->state.used)) |
651 | goto done; | 642 | goto done; |
652 | } | 643 | } |
@@ -704,8 +695,8 @@ static bool perf_sched_next_event(struct perf_sched *sched) | |||
704 | /* | 695 | /* |
705 | * Assign a counter for each event. | 696 | * Assign a counter for each event. |
706 | */ | 697 | */ |
707 | static int perf_assign_events(struct event_constraint **constraints, int n, | 698 | int perf_assign_events(struct event_constraint **constraints, int n, |
708 | int wmin, int wmax, int *assign) | 699 | int wmin, int wmax, int *assign) |
709 | { | 700 | { |
710 | struct perf_sched sched; | 701 | struct perf_sched sched; |
711 | 702 | ||
@@ -824,15 +815,17 @@ static inline void x86_assign_hw_event(struct perf_event *event, | |||
824 | hwc->last_cpu = smp_processor_id(); | 815 | hwc->last_cpu = smp_processor_id(); |
825 | hwc->last_tag = ++cpuc->tags[i]; | 816 | hwc->last_tag = ++cpuc->tags[i]; |
826 | 817 | ||
827 | if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { | 818 | if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) { |
828 | hwc->config_base = 0; | 819 | hwc->config_base = 0; |
829 | hwc->event_base = 0; | 820 | hwc->event_base = 0; |
830 | } else if (hwc->idx >= X86_PMC_IDX_FIXED) { | 821 | } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) { |
831 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | 822 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; |
832 | hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED); | 823 | hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED); |
824 | hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30; | ||
833 | } else { | 825 | } else { |
834 | hwc->config_base = x86_pmu_config_addr(hwc->idx); | 826 | hwc->config_base = x86_pmu_config_addr(hwc->idx); |
835 | hwc->event_base = x86_pmu_event_addr(hwc->idx); | 827 | hwc->event_base = x86_pmu_event_addr(hwc->idx); |
828 | hwc->event_base_rdpmc = hwc->idx; | ||
836 | } | 829 | } |
837 | } | 830 | } |
838 | 831 | ||
@@ -930,7 +923,7 @@ int x86_perf_event_set_period(struct perf_event *event) | |||
930 | s64 period = hwc->sample_period; | 923 | s64 period = hwc->sample_period; |
931 | int ret = 0, idx = hwc->idx; | 924 | int ret = 0, idx = hwc->idx; |
932 | 925 | ||
933 | if (idx == X86_PMC_IDX_FIXED_BTS) | 926 | if (idx == INTEL_PMC_IDX_FIXED_BTS) |
934 | return 0; | 927 | return 0; |
935 | 928 | ||
936 | /* | 929 | /* |
@@ -1316,7 +1309,6 @@ static struct attribute_group x86_pmu_format_group = { | |||
1316 | static int __init init_hw_perf_events(void) | 1309 | static int __init init_hw_perf_events(void) |
1317 | { | 1310 | { |
1318 | struct x86_pmu_quirk *quirk; | 1311 | struct x86_pmu_quirk *quirk; |
1319 | struct event_constraint *c; | ||
1320 | int err; | 1312 | int err; |
1321 | 1313 | ||
1322 | pr_info("Performance Events: "); | 1314 | pr_info("Performance Events: "); |
@@ -1347,21 +1339,8 @@ static int __init init_hw_perf_events(void) | |||
1347 | for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) | 1339 | for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) |
1348 | quirk->func(); | 1340 | quirk->func(); |
1349 | 1341 | ||
1350 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | 1342 | if (!x86_pmu.intel_ctrl) |
1351 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | 1343 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; |
1352 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); | ||
1353 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; | ||
1354 | } | ||
1355 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; | ||
1356 | |||
1357 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { | ||
1358 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", | ||
1359 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); | ||
1360 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; | ||
1361 | } | ||
1362 | |||
1363 | x86_pmu.intel_ctrl |= | ||
1364 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; | ||
1365 | 1344 | ||
1366 | perf_events_lapic_init(); | 1345 | perf_events_lapic_init(); |
1367 | register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); | 1346 | register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); |
@@ -1370,22 +1349,6 @@ static int __init init_hw_perf_events(void) | |||
1370 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, | 1349 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
1371 | 0, x86_pmu.num_counters, 0); | 1350 | 0, x86_pmu.num_counters, 0); |
1372 | 1351 | ||
1373 | if (x86_pmu.event_constraints) { | ||
1374 | /* | ||
1375 | * event on fixed counter2 (REF_CYCLES) only works on this | ||
1376 | * counter, so do not extend mask to generic counters | ||
1377 | */ | ||
1378 | for_each_event_constraint(c, x86_pmu.event_constraints) { | ||
1379 | if (c->cmask != X86_RAW_EVENT_MASK | ||
1380 | || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) { | ||
1381 | continue; | ||
1382 | } | ||
1383 | |||
1384 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; | ||
1385 | c->weight += x86_pmu.num_counters; | ||
1386 | } | ||
1387 | } | ||
1388 | |||
1389 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ | 1352 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ |
1390 | x86_pmu_format_group.attrs = x86_pmu.format_attrs; | 1353 | x86_pmu_format_group.attrs = x86_pmu.format_attrs; |
1391 | 1354 | ||
@@ -1496,6 +1459,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void) | |||
1496 | if (!cpuc->shared_regs) | 1459 | if (!cpuc->shared_regs) |
1497 | goto error; | 1460 | goto error; |
1498 | } | 1461 | } |
1462 | cpuc->is_fake = 1; | ||
1499 | return cpuc; | 1463 | return cpuc; |
1500 | error: | 1464 | error: |
1501 | free_fake_cpuc(cpuc); | 1465 | free_fake_cpuc(cpuc); |
@@ -1619,8 +1583,8 @@ static int x86_pmu_event_idx(struct perf_event *event) | |||
1619 | if (!x86_pmu.attr_rdpmc) | 1583 | if (!x86_pmu.attr_rdpmc) |
1620 | return 0; | 1584 | return 0; |
1621 | 1585 | ||
1622 | if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { | 1586 | if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) { |
1623 | idx -= X86_PMC_IDX_FIXED; | 1587 | idx -= INTEL_PMC_IDX_FIXED; |
1624 | idx |= 1 << 30; | 1588 | idx |= 1 << 30; |
1625 | } | 1589 | } |
1626 | 1590 | ||
@@ -1648,7 +1612,12 @@ static ssize_t set_attr_rdpmc(struct device *cdev, | |||
1648 | struct device_attribute *attr, | 1612 | struct device_attribute *attr, |
1649 | const char *buf, size_t count) | 1613 | const char *buf, size_t count) |
1650 | { | 1614 | { |
1651 | unsigned long val = simple_strtoul(buf, NULL, 0); | 1615 | unsigned long val; |
1616 | ssize_t ret; | ||
1617 | |||
1618 | ret = kstrtoul(buf, 0, &val); | ||
1619 | if (ret) | ||
1620 | return ret; | ||
1652 | 1621 | ||
1653 | if (!!val != !!x86_pmu.attr_rdpmc) { | 1622 | if (!!val != !!x86_pmu.attr_rdpmc) { |
1654 | x86_pmu.attr_rdpmc = !!val; | 1623 | x86_pmu.attr_rdpmc = !!val; |
@@ -1681,13 +1650,20 @@ static void x86_pmu_flush_branch_stack(void) | |||
1681 | x86_pmu.flush_branch_stack(); | 1650 | x86_pmu.flush_branch_stack(); |
1682 | } | 1651 | } |
1683 | 1652 | ||
1653 | void perf_check_microcode(void) | ||
1654 | { | ||
1655 | if (x86_pmu.check_microcode) | ||
1656 | x86_pmu.check_microcode(); | ||
1657 | } | ||
1658 | EXPORT_SYMBOL_GPL(perf_check_microcode); | ||
1659 | |||
1684 | static struct pmu pmu = { | 1660 | static struct pmu pmu = { |
1685 | .pmu_enable = x86_pmu_enable, | 1661 | .pmu_enable = x86_pmu_enable, |
1686 | .pmu_disable = x86_pmu_disable, | 1662 | .pmu_disable = x86_pmu_disable, |
1687 | 1663 | ||
1688 | .attr_groups = x86_pmu_attr_groups, | 1664 | .attr_groups = x86_pmu_attr_groups, |
1689 | 1665 | ||
1690 | .event_init = x86_pmu_event_init, | 1666 | .event_init = x86_pmu_event_init, |
1691 | 1667 | ||
1692 | .add = x86_pmu_add, | 1668 | .add = x86_pmu_add, |
1693 | .del = x86_pmu_del, | 1669 | .del = x86_pmu_del, |
@@ -1695,11 +1671,11 @@ static struct pmu pmu = { | |||
1695 | .stop = x86_pmu_stop, | 1671 | .stop = x86_pmu_stop, |
1696 | .read = x86_pmu_read, | 1672 | .read = x86_pmu_read, |
1697 | 1673 | ||
1698 | .start_txn = x86_pmu_start_txn, | 1674 | .start_txn = x86_pmu_start_txn, |
1699 | .cancel_txn = x86_pmu_cancel_txn, | 1675 | .cancel_txn = x86_pmu_cancel_txn, |
1700 | .commit_txn = x86_pmu_commit_txn, | 1676 | .commit_txn = x86_pmu_commit_txn, |
1701 | 1677 | ||
1702 | .event_idx = x86_pmu_event_idx, | 1678 | .event_idx = x86_pmu_event_idx, |
1703 | .flush_branch_stack = x86_pmu_flush_branch_stack, | 1679 | .flush_branch_stack = x86_pmu_flush_branch_stack, |
1704 | }; | 1680 | }; |
1705 | 1681 | ||
@@ -1756,6 +1732,12 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
1756 | dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); | 1732 | dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); |
1757 | } | 1733 | } |
1758 | 1734 | ||
1735 | static inline int | ||
1736 | valid_user_frame(const void __user *fp, unsigned long size) | ||
1737 | { | ||
1738 | return (__range_not_ok(fp, size, TASK_SIZE) == 0); | ||
1739 | } | ||
1740 | |||
1759 | #ifdef CONFIG_COMPAT | 1741 | #ifdef CONFIG_COMPAT |
1760 | 1742 | ||
1761 | #include <asm/compat.h> | 1743 | #include <asm/compat.h> |
@@ -1780,7 +1762,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1780 | if (bytes != sizeof(frame)) | 1762 | if (bytes != sizeof(frame)) |
1781 | break; | 1763 | break; |
1782 | 1764 | ||
1783 | if (fp < compat_ptr(regs->sp)) | 1765 | if (!valid_user_frame(fp, sizeof(frame))) |
1784 | break; | 1766 | break; |
1785 | 1767 | ||
1786 | perf_callchain_store(entry, frame.return_address); | 1768 | perf_callchain_store(entry, frame.return_address); |
@@ -1826,7 +1808,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
1826 | if (bytes != sizeof(frame)) | 1808 | if (bytes != sizeof(frame)) |
1827 | break; | 1809 | break; |
1828 | 1810 | ||
1829 | if ((unsigned long)fp < regs->sp) | 1811 | if (!valid_user_frame(fp, sizeof(frame))) |
1830 | break; | 1812 | break; |
1831 | 1813 | ||
1832 | perf_callchain_store(entry, frame.return_address); | 1814 | perf_callchain_store(entry, frame.return_address); |
@@ -1856,7 +1838,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs) | |||
1856 | else | 1838 | else |
1857 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; | 1839 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; |
1858 | } else { | 1840 | } else { |
1859 | if (user_mode(regs)) | 1841 | if (!kernel_ip(regs->ip)) |
1860 | misc |= PERF_RECORD_MISC_USER; | 1842 | misc |= PERF_RECORD_MISC_USER; |
1861 | else | 1843 | else |
1862 | misc |= PERF_RECORD_MISC_KERNEL; | 1844 | misc |= PERF_RECORD_MISC_KERNEL; |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6638aaf5449..a15df4be151 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -14,6 +14,18 @@ | |||
14 | 14 | ||
15 | #include <linux/perf_event.h> | 15 | #include <linux/perf_event.h> |
16 | 16 | ||
17 | #if 0 | ||
18 | #undef wrmsrl | ||
19 | #define wrmsrl(msr, val) \ | ||
20 | do { \ | ||
21 | unsigned int _msr = (msr); \ | ||
22 | u64 _val = (val); \ | ||
23 | trace_printk("wrmsrl(%x, %Lx)\n", (unsigned int)(_msr), \ | ||
24 | (unsigned long long)(_val)); \ | ||
25 | native_write_msr((_msr), (u32)(_val), (u32)(_val >> 32)); \ | ||
26 | } while (0) | ||
27 | #endif | ||
28 | |||
17 | /* | 29 | /* |
18 | * | NHM/WSM | SNB | | 30 | * | NHM/WSM | SNB | |
19 | * register ------------------------------- | 31 | * register ------------------------------- |
@@ -57,7 +69,7 @@ struct amd_nb { | |||
57 | }; | 69 | }; |
58 | 70 | ||
59 | /* The maximal number of PEBS events: */ | 71 | /* The maximal number of PEBS events: */ |
60 | #define MAX_PEBS_EVENTS 4 | 72 | #define MAX_PEBS_EVENTS 8 |
61 | 73 | ||
62 | /* | 74 | /* |
63 | * A debug store configuration. | 75 | * A debug store configuration. |
@@ -117,6 +129,7 @@ struct cpu_hw_events { | |||
117 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 129 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
118 | 130 | ||
119 | unsigned int group_flag; | 131 | unsigned int group_flag; |
132 | int is_fake; | ||
120 | 133 | ||
121 | /* | 134 | /* |
122 | * Intel DebugStore bits | 135 | * Intel DebugStore bits |
@@ -348,6 +361,8 @@ struct x86_pmu { | |||
348 | void (*cpu_starting)(int cpu); | 361 | void (*cpu_starting)(int cpu); |
349 | void (*cpu_dying)(int cpu); | 362 | void (*cpu_dying)(int cpu); |
350 | void (*cpu_dead)(int cpu); | 363 | void (*cpu_dead)(int cpu); |
364 | |||
365 | void (*check_microcode)(void); | ||
351 | void (*flush_branch_stack)(void); | 366 | void (*flush_branch_stack)(void); |
352 | 367 | ||
353 | /* | 368 | /* |
@@ -359,11 +374,16 @@ struct x86_pmu { | |||
359 | /* | 374 | /* |
360 | * Intel DebugStore bits | 375 | * Intel DebugStore bits |
361 | */ | 376 | */ |
362 | int bts, pebs; | 377 | int bts :1, |
363 | int bts_active, pebs_active; | 378 | bts_active :1, |
379 | pebs :1, | ||
380 | pebs_active :1, | ||
381 | pebs_broken :1; | ||
364 | int pebs_record_size; | 382 | int pebs_record_size; |
365 | void (*drain_pebs)(struct pt_regs *regs); | 383 | void (*drain_pebs)(struct pt_regs *regs); |
366 | struct event_constraint *pebs_constraints; | 384 | struct event_constraint *pebs_constraints; |
385 | void (*pebs_aliases)(struct perf_event *event); | ||
386 | int max_pebs_events; | ||
367 | 387 | ||
368 | /* | 388 | /* |
369 | * Intel LBR | 389 | * Intel LBR |
@@ -466,6 +486,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | |||
466 | 486 | ||
467 | void x86_pmu_enable_all(int added); | 487 | void x86_pmu_enable_all(int added); |
468 | 488 | ||
489 | int perf_assign_events(struct event_constraint **constraints, int n, | ||
490 | int wmin, int wmax, int *assign); | ||
469 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); | 491 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); |
470 | 492 | ||
471 | void x86_pmu_stop(struct perf_event *event, int flags); | 493 | void x86_pmu_stop(struct perf_event *event, int flags); |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 11a4eb9131d..4528ae7b6ec 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -366,7 +366,7 @@ static void amd_pmu_cpu_starting(int cpu) | |||
366 | 366 | ||
367 | cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; | 367 | cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; |
368 | 368 | ||
369 | if (boot_cpu_data.x86_max_cores < 2 || boot_cpu_data.x86 == 0x15) | 369 | if (boot_cpu_data.x86_max_cores < 2) |
370 | return; | 370 | return; |
371 | 371 | ||
372 | nb_id = amd_get_nb_id(cpu); | 372 | nb_id = amd_get_nb_id(cpu); |
@@ -422,35 +422,6 @@ static struct attribute *amd_format_attr[] = { | |||
422 | NULL, | 422 | NULL, |
423 | }; | 423 | }; |
424 | 424 | ||
425 | static __initconst const struct x86_pmu amd_pmu = { | ||
426 | .name = "AMD", | ||
427 | .handle_irq = x86_pmu_handle_irq, | ||
428 | .disable_all = x86_pmu_disable_all, | ||
429 | .enable_all = x86_pmu_enable_all, | ||
430 | .enable = x86_pmu_enable_event, | ||
431 | .disable = x86_pmu_disable_event, | ||
432 | .hw_config = amd_pmu_hw_config, | ||
433 | .schedule_events = x86_schedule_events, | ||
434 | .eventsel = MSR_K7_EVNTSEL0, | ||
435 | .perfctr = MSR_K7_PERFCTR0, | ||
436 | .event_map = amd_pmu_event_map, | ||
437 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
438 | .num_counters = AMD64_NUM_COUNTERS, | ||
439 | .cntval_bits = 48, | ||
440 | .cntval_mask = (1ULL << 48) - 1, | ||
441 | .apic = 1, | ||
442 | /* use highest bit to detect overflow */ | ||
443 | .max_period = (1ULL << 47) - 1, | ||
444 | .get_event_constraints = amd_get_event_constraints, | ||
445 | .put_event_constraints = amd_put_event_constraints, | ||
446 | |||
447 | .format_attrs = amd_format_attr, | ||
448 | |||
449 | .cpu_prepare = amd_pmu_cpu_prepare, | ||
450 | .cpu_starting = amd_pmu_cpu_starting, | ||
451 | .cpu_dead = amd_pmu_cpu_dead, | ||
452 | }; | ||
453 | |||
454 | /* AMD Family 15h */ | 425 | /* AMD Family 15h */ |
455 | 426 | ||
456 | #define AMD_EVENT_TYPE_MASK 0x000000F0ULL | 427 | #define AMD_EVENT_TYPE_MASK 0x000000F0ULL |
@@ -597,8 +568,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev | |||
597 | } | 568 | } |
598 | } | 569 | } |
599 | 570 | ||
600 | static __initconst const struct x86_pmu amd_pmu_f15h = { | 571 | static __initconst const struct x86_pmu amd_pmu = { |
601 | .name = "AMD Family 15h", | 572 | .name = "AMD", |
602 | .handle_irq = x86_pmu_handle_irq, | 573 | .handle_irq = x86_pmu_handle_irq, |
603 | .disable_all = x86_pmu_disable_all, | 574 | .disable_all = x86_pmu_disable_all, |
604 | .enable_all = x86_pmu_enable_all, | 575 | .enable_all = x86_pmu_enable_all, |
@@ -606,50 +577,68 @@ static __initconst const struct x86_pmu amd_pmu_f15h = { | |||
606 | .disable = x86_pmu_disable_event, | 577 | .disable = x86_pmu_disable_event, |
607 | .hw_config = amd_pmu_hw_config, | 578 | .hw_config = amd_pmu_hw_config, |
608 | .schedule_events = x86_schedule_events, | 579 | .schedule_events = x86_schedule_events, |
609 | .eventsel = MSR_F15H_PERF_CTL, | 580 | .eventsel = MSR_K7_EVNTSEL0, |
610 | .perfctr = MSR_F15H_PERF_CTR, | 581 | .perfctr = MSR_K7_PERFCTR0, |
611 | .event_map = amd_pmu_event_map, | 582 | .event_map = amd_pmu_event_map, |
612 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | 583 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), |
613 | .num_counters = AMD64_NUM_COUNTERS_F15H, | 584 | .num_counters = AMD64_NUM_COUNTERS, |
614 | .cntval_bits = 48, | 585 | .cntval_bits = 48, |
615 | .cntval_mask = (1ULL << 48) - 1, | 586 | .cntval_mask = (1ULL << 48) - 1, |
616 | .apic = 1, | 587 | .apic = 1, |
617 | /* use highest bit to detect overflow */ | 588 | /* use highest bit to detect overflow */ |
618 | .max_period = (1ULL << 47) - 1, | 589 | .max_period = (1ULL << 47) - 1, |
619 | .get_event_constraints = amd_get_event_constraints_f15h, | 590 | .get_event_constraints = amd_get_event_constraints, |
620 | /* nortbridge counters not yet implemented: */ | ||
621 | #if 0 | ||
622 | .put_event_constraints = amd_put_event_constraints, | 591 | .put_event_constraints = amd_put_event_constraints, |
623 | 592 | ||
593 | .format_attrs = amd_format_attr, | ||
594 | |||
624 | .cpu_prepare = amd_pmu_cpu_prepare, | 595 | .cpu_prepare = amd_pmu_cpu_prepare, |
625 | .cpu_dead = amd_pmu_cpu_dead, | ||
626 | #endif | ||
627 | .cpu_starting = amd_pmu_cpu_starting, | 596 | .cpu_starting = amd_pmu_cpu_starting, |
628 | .format_attrs = amd_format_attr, | 597 | .cpu_dead = amd_pmu_cpu_dead, |
629 | }; | 598 | }; |
630 | 599 | ||
600 | static int setup_event_constraints(void) | ||
601 | { | ||
602 | if (boot_cpu_data.x86 >= 0x15) | ||
603 | x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; | ||
604 | return 0; | ||
605 | } | ||
606 | |||
607 | static int setup_perfctr_core(void) | ||
608 | { | ||
609 | if (!cpu_has_perfctr_core) { | ||
610 | WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h, | ||
611 | KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!"); | ||
612 | return -ENODEV; | ||
613 | } | ||
614 | |||
615 | WARN(x86_pmu.get_event_constraints == amd_get_event_constraints, | ||
616 | KERN_ERR "hw perf events core counters need constraints handler!"); | ||
617 | |||
618 | /* | ||
619 | * If core performance counter extensions exists, we must use | ||
620 | * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also | ||
621 | * x86_pmu_addr_offset(). | ||
622 | */ | ||
623 | x86_pmu.eventsel = MSR_F15H_PERF_CTL; | ||
624 | x86_pmu.perfctr = MSR_F15H_PERF_CTR; | ||
625 | x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; | ||
626 | |||
627 | printk(KERN_INFO "perf: AMD core performance counters detected\n"); | ||
628 | |||
629 | return 0; | ||
630 | } | ||
631 | |||
631 | __init int amd_pmu_init(void) | 632 | __init int amd_pmu_init(void) |
632 | { | 633 | { |
633 | /* Performance-monitoring supported from K7 and later: */ | 634 | /* Performance-monitoring supported from K7 and later: */ |
634 | if (boot_cpu_data.x86 < 6) | 635 | if (boot_cpu_data.x86 < 6) |
635 | return -ENODEV; | 636 | return -ENODEV; |
636 | 637 | ||
637 | /* | 638 | x86_pmu = amd_pmu; |
638 | * If core performance counter extensions exists, it must be | 639 | |
639 | * family 15h, otherwise fail. See x86_pmu_addr_offset(). | 640 | setup_event_constraints(); |
640 | */ | 641 | setup_perfctr_core(); |
641 | switch (boot_cpu_data.x86) { | ||
642 | case 0x15: | ||
643 | if (!cpu_has_perfctr_core) | ||
644 | return -ENODEV; | ||
645 | x86_pmu = amd_pmu_f15h; | ||
646 | break; | ||
647 | default: | ||
648 | if (cpu_has_perfctr_core) | ||
649 | return -ENODEV; | ||
650 | x86_pmu = amd_pmu; | ||
651 | break; | ||
652 | } | ||
653 | 642 | ||
654 | /* Events are common for all AMDs */ | 643 | /* Events are common for all AMDs */ |
655 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | 644 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 166546ec6ae..7a8b9d0abca 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -5,6 +5,8 @@ | |||
5 | * among events on a single PMU. | 5 | * among events on a single PMU. |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
9 | |||
8 | #include <linux/stddef.h> | 10 | #include <linux/stddef.h> |
9 | #include <linux/types.h> | 11 | #include <linux/types.h> |
10 | #include <linux/init.h> | 12 | #include <linux/init.h> |
@@ -21,14 +23,14 @@ | |||
21 | */ | 23 | */ |
22 | static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = | 24 | static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = |
23 | { | 25 | { |
24 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | 26 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, |
25 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | 27 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
26 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, | 28 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, |
27 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, | 29 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, |
28 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | 30 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, |
29 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | 31 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, |
30 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | 32 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, |
31 | [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ | 33 | [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ |
32 | }; | 34 | }; |
33 | 35 | ||
34 | static struct event_constraint intel_core_event_constraints[] __read_mostly = | 36 | static struct event_constraint intel_core_event_constraints[] __read_mostly = |
@@ -747,7 +749,7 @@ static void intel_pmu_disable_all(void) | |||
747 | 749 | ||
748 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | 750 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); |
749 | 751 | ||
750 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | 752 | if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) |
751 | intel_pmu_disable_bts(); | 753 | intel_pmu_disable_bts(); |
752 | 754 | ||
753 | intel_pmu_pebs_disable_all(); | 755 | intel_pmu_pebs_disable_all(); |
@@ -763,9 +765,9 @@ static void intel_pmu_enable_all(int added) | |||
763 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, | 765 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, |
764 | x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); | 766 | x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); |
765 | 767 | ||
766 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | 768 | if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { |
767 | struct perf_event *event = | 769 | struct perf_event *event = |
768 | cpuc->events[X86_PMC_IDX_FIXED_BTS]; | 770 | cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; |
769 | 771 | ||
770 | if (WARN_ON_ONCE(!event)) | 772 | if (WARN_ON_ONCE(!event)) |
771 | return; | 773 | return; |
@@ -871,7 +873,7 @@ static inline void intel_pmu_ack_status(u64 ack) | |||
871 | 873 | ||
872 | static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) | 874 | static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) |
873 | { | 875 | { |
874 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 876 | int idx = hwc->idx - INTEL_PMC_IDX_FIXED; |
875 | u64 ctrl_val, mask; | 877 | u64 ctrl_val, mask; |
876 | 878 | ||
877 | mask = 0xfULL << (idx * 4); | 879 | mask = 0xfULL << (idx * 4); |
@@ -886,7 +888,7 @@ static void intel_pmu_disable_event(struct perf_event *event) | |||
886 | struct hw_perf_event *hwc = &event->hw; | 888 | struct hw_perf_event *hwc = &event->hw; |
887 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 889 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
888 | 890 | ||
889 | if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { | 891 | if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { |
890 | intel_pmu_disable_bts(); | 892 | intel_pmu_disable_bts(); |
891 | intel_pmu_drain_bts_buffer(); | 893 | intel_pmu_drain_bts_buffer(); |
892 | return; | 894 | return; |
@@ -915,7 +917,7 @@ static void intel_pmu_disable_event(struct perf_event *event) | |||
915 | 917 | ||
916 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) | 918 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) |
917 | { | 919 | { |
918 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 920 | int idx = hwc->idx - INTEL_PMC_IDX_FIXED; |
919 | u64 ctrl_val, bits, mask; | 921 | u64 ctrl_val, bits, mask; |
920 | 922 | ||
921 | /* | 923 | /* |
@@ -949,7 +951,7 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
949 | struct hw_perf_event *hwc = &event->hw; | 951 | struct hw_perf_event *hwc = &event->hw; |
950 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 952 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
951 | 953 | ||
952 | if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { | 954 | if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { |
953 | if (!__this_cpu_read(cpu_hw_events.enabled)) | 955 | if (!__this_cpu_read(cpu_hw_events.enabled)) |
954 | return; | 956 | return; |
955 | 957 | ||
@@ -1000,14 +1002,14 @@ static void intel_pmu_reset(void) | |||
1000 | 1002 | ||
1001 | local_irq_save(flags); | 1003 | local_irq_save(flags); |
1002 | 1004 | ||
1003 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | 1005 | pr_info("clearing PMU state on CPU#%d\n", smp_processor_id()); |
1004 | 1006 | ||
1005 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1007 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1006 | checking_wrmsrl(x86_pmu_config_addr(idx), 0ull); | 1008 | wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); |
1007 | checking_wrmsrl(x86_pmu_event_addr(idx), 0ull); | 1009 | wrmsrl_safe(x86_pmu_event_addr(idx), 0ull); |
1008 | } | 1010 | } |
1009 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) | 1011 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) |
1010 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | 1012 | wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
1011 | 1013 | ||
1012 | if (ds) | 1014 | if (ds) |
1013 | ds->bts_index = ds->bts_buffer_base; | 1015 | ds->bts_index = ds->bts_buffer_base; |
@@ -1119,27 +1121,33 @@ intel_bts_constraints(struct perf_event *event) | |||
1119 | return NULL; | 1121 | return NULL; |
1120 | } | 1122 | } |
1121 | 1123 | ||
1122 | static bool intel_try_alt_er(struct perf_event *event, int orig_idx) | 1124 | static int intel_alt_er(int idx) |
1123 | { | 1125 | { |
1124 | if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) | 1126 | if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) |
1125 | return false; | 1127 | return idx; |
1126 | 1128 | ||
1127 | if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { | 1129 | if (idx == EXTRA_REG_RSP_0) |
1128 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | 1130 | return EXTRA_REG_RSP_1; |
1129 | event->hw.config |= 0x01bb; | 1131 | |
1130 | event->hw.extra_reg.idx = EXTRA_REG_RSP_1; | 1132 | if (idx == EXTRA_REG_RSP_1) |
1131 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; | 1133 | return EXTRA_REG_RSP_0; |
1132 | } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { | 1134 | |
1135 | return idx; | ||
1136 | } | ||
1137 | |||
1138 | static void intel_fixup_er(struct perf_event *event, int idx) | ||
1139 | { | ||
1140 | event->hw.extra_reg.idx = idx; | ||
1141 | |||
1142 | if (idx == EXTRA_REG_RSP_0) { | ||
1133 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | 1143 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; |
1134 | event->hw.config |= 0x01b7; | 1144 | event->hw.config |= 0x01b7; |
1135 | event->hw.extra_reg.idx = EXTRA_REG_RSP_0; | ||
1136 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; | 1145 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; |
1146 | } else if (idx == EXTRA_REG_RSP_1) { | ||
1147 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | ||
1148 | event->hw.config |= 0x01bb; | ||
1149 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; | ||
1137 | } | 1150 | } |
1138 | |||
1139 | if (event->hw.extra_reg.idx == orig_idx) | ||
1140 | return false; | ||
1141 | |||
1142 | return true; | ||
1143 | } | 1151 | } |
1144 | 1152 | ||
1145 | /* | 1153 | /* |
@@ -1157,14 +1165,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, | |||
1157 | struct event_constraint *c = &emptyconstraint; | 1165 | struct event_constraint *c = &emptyconstraint; |
1158 | struct er_account *era; | 1166 | struct er_account *era; |
1159 | unsigned long flags; | 1167 | unsigned long flags; |
1160 | int orig_idx = reg->idx; | 1168 | int idx = reg->idx; |
1161 | 1169 | ||
1162 | /* already allocated shared msr */ | 1170 | /* |
1163 | if (reg->alloc) | 1171 | * reg->alloc can be set due to existing state, so for fake cpuc we |
1172 | * need to ignore this, otherwise we might fail to allocate proper fake | ||
1173 | * state for this extra reg constraint. Also see the comment below. | ||
1174 | */ | ||
1175 | if (reg->alloc && !cpuc->is_fake) | ||
1164 | return NULL; /* call x86_get_event_constraint() */ | 1176 | return NULL; /* call x86_get_event_constraint() */ |
1165 | 1177 | ||
1166 | again: | 1178 | again: |
1167 | era = &cpuc->shared_regs->regs[reg->idx]; | 1179 | era = &cpuc->shared_regs->regs[idx]; |
1168 | /* | 1180 | /* |
1169 | * we use spin_lock_irqsave() to avoid lockdep issues when | 1181 | * we use spin_lock_irqsave() to avoid lockdep issues when |
1170 | * passing a fake cpuc | 1182 | * passing a fake cpuc |
@@ -1173,6 +1185,29 @@ again: | |||
1173 | 1185 | ||
1174 | if (!atomic_read(&era->ref) || era->config == reg->config) { | 1186 | if (!atomic_read(&era->ref) || era->config == reg->config) { |
1175 | 1187 | ||
1188 | /* | ||
1189 | * If its a fake cpuc -- as per validate_{group,event}() we | ||
1190 | * shouldn't touch event state and we can avoid doing so | ||
1191 | * since both will only call get_event_constraints() once | ||
1192 | * on each event, this avoids the need for reg->alloc. | ||
1193 | * | ||
1194 | * Not doing the ER fixup will only result in era->reg being | ||
1195 | * wrong, but since we won't actually try and program hardware | ||
1196 | * this isn't a problem either. | ||
1197 | */ | ||
1198 | if (!cpuc->is_fake) { | ||
1199 | if (idx != reg->idx) | ||
1200 | intel_fixup_er(event, idx); | ||
1201 | |||
1202 | /* | ||
1203 | * x86_schedule_events() can call get_event_constraints() | ||
1204 | * multiple times on events in the case of incremental | ||
1205 | * scheduling(). reg->alloc ensures we only do the ER | ||
1206 | * allocation once. | ||
1207 | */ | ||
1208 | reg->alloc = 1; | ||
1209 | } | ||
1210 | |||
1176 | /* lock in msr value */ | 1211 | /* lock in msr value */ |
1177 | era->config = reg->config; | 1212 | era->config = reg->config; |
1178 | era->reg = reg->reg; | 1213 | era->reg = reg->reg; |
@@ -1180,17 +1215,17 @@ again: | |||
1180 | /* one more user */ | 1215 | /* one more user */ |
1181 | atomic_inc(&era->ref); | 1216 | atomic_inc(&era->ref); |
1182 | 1217 | ||
1183 | /* no need to reallocate during incremental event scheduling */ | ||
1184 | reg->alloc = 1; | ||
1185 | |||
1186 | /* | 1218 | /* |
1187 | * need to call x86_get_event_constraint() | 1219 | * need to call x86_get_event_constraint() |
1188 | * to check if associated event has constraints | 1220 | * to check if associated event has constraints |
1189 | */ | 1221 | */ |
1190 | c = NULL; | 1222 | c = NULL; |
1191 | } else if (intel_try_alt_er(event, orig_idx)) { | 1223 | } else { |
1192 | raw_spin_unlock_irqrestore(&era->lock, flags); | 1224 | idx = intel_alt_er(idx); |
1193 | goto again; | 1225 | if (idx != reg->idx) { |
1226 | raw_spin_unlock_irqrestore(&era->lock, flags); | ||
1227 | goto again; | ||
1228 | } | ||
1194 | } | 1229 | } |
1195 | raw_spin_unlock_irqrestore(&era->lock, flags); | 1230 | raw_spin_unlock_irqrestore(&era->lock, flags); |
1196 | 1231 | ||
@@ -1204,11 +1239,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, | |||
1204 | struct er_account *era; | 1239 | struct er_account *era; |
1205 | 1240 | ||
1206 | /* | 1241 | /* |
1207 | * only put constraint if extra reg was actually | 1242 | * Only put constraint if extra reg was actually allocated. Also takes |
1208 | * allocated. Also takes care of event which do | 1243 | * care of event which do not use an extra shared reg. |
1209 | * not use an extra shared reg | 1244 | * |
1245 | * Also, if this is a fake cpuc we shouldn't touch any event state | ||
1246 | * (reg->alloc) and we don't care about leaving inconsistent cpuc state | ||
1247 | * either since it'll be thrown out. | ||
1210 | */ | 1248 | */ |
1211 | if (!reg->alloc) | 1249 | if (!reg->alloc || cpuc->is_fake) |
1212 | return; | 1250 | return; |
1213 | 1251 | ||
1214 | era = &cpuc->shared_regs->regs[reg->idx]; | 1252 | era = &cpuc->shared_regs->regs[reg->idx]; |
@@ -1300,15 +1338,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | |||
1300 | intel_put_shared_regs_event_constraints(cpuc, event); | 1338 | intel_put_shared_regs_event_constraints(cpuc, event); |
1301 | } | 1339 | } |
1302 | 1340 | ||
1303 | static int intel_pmu_hw_config(struct perf_event *event) | 1341 | static void intel_pebs_aliases_core2(struct perf_event *event) |
1304 | { | 1342 | { |
1305 | int ret = x86_pmu_hw_config(event); | 1343 | if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { |
1306 | |||
1307 | if (ret) | ||
1308 | return ret; | ||
1309 | |||
1310 | if (event->attr.precise_ip && | ||
1311 | (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { | ||
1312 | /* | 1344 | /* |
1313 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P | 1345 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P |
1314 | * (0x003c) so that we can use it with PEBS. | 1346 | * (0x003c) so that we can use it with PEBS. |
@@ -1329,10 +1361,48 @@ static int intel_pmu_hw_config(struct perf_event *event) | |||
1329 | */ | 1361 | */ |
1330 | u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); | 1362 | u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); |
1331 | 1363 | ||
1364 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); | ||
1365 | event->hw.config = alt_config; | ||
1366 | } | ||
1367 | } | ||
1368 | |||
1369 | static void intel_pebs_aliases_snb(struct perf_event *event) | ||
1370 | { | ||
1371 | if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { | ||
1372 | /* | ||
1373 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P | ||
1374 | * (0x003c) so that we can use it with PEBS. | ||
1375 | * | ||
1376 | * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't | ||
1377 | * PEBS capable. However we can use UOPS_RETIRED.ALL | ||
1378 | * (0x01c2), which is a PEBS capable event, to get the same | ||
1379 | * count. | ||
1380 | * | ||
1381 | * UOPS_RETIRED.ALL counts the number of cycles that retires | ||
1382 | * CNTMASK micro-ops. By setting CNTMASK to a value (16) | ||
1383 | * larger than the maximum number of micro-ops that can be | ||
1384 | * retired per cycle (4) and then inverting the condition, we | ||
1385 | * count all cycles that retire 16 or less micro-ops, which | ||
1386 | * is every cycle. | ||
1387 | * | ||
1388 | * Thereby we gain a PEBS capable cycle counter. | ||
1389 | */ | ||
1390 | u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16); | ||
1332 | 1391 | ||
1333 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); | 1392 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); |
1334 | event->hw.config = alt_config; | 1393 | event->hw.config = alt_config; |
1335 | } | 1394 | } |
1395 | } | ||
1396 | |||
1397 | static int intel_pmu_hw_config(struct perf_event *event) | ||
1398 | { | ||
1399 | int ret = x86_pmu_hw_config(event); | ||
1400 | |||
1401 | if (ret) | ||
1402 | return ret; | ||
1403 | |||
1404 | if (event->attr.precise_ip && x86_pmu.pebs_aliases) | ||
1405 | x86_pmu.pebs_aliases(event); | ||
1336 | 1406 | ||
1337 | if (intel_pmu_needs_lbr_smpl(event)) { | 1407 | if (intel_pmu_needs_lbr_smpl(event)) { |
1338 | ret = intel_pmu_setup_lbr_filter(event); | 1408 | ret = intel_pmu_setup_lbr_filter(event); |
@@ -1607,6 +1677,7 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
1607 | .max_period = (1ULL << 31) - 1, | 1677 | .max_period = (1ULL << 31) - 1, |
1608 | .get_event_constraints = intel_get_event_constraints, | 1678 | .get_event_constraints = intel_get_event_constraints, |
1609 | .put_event_constraints = intel_put_event_constraints, | 1679 | .put_event_constraints = intel_put_event_constraints, |
1680 | .pebs_aliases = intel_pebs_aliases_core2, | ||
1610 | 1681 | ||
1611 | .format_attrs = intel_arch3_formats_attr, | 1682 | .format_attrs = intel_arch3_formats_attr, |
1612 | 1683 | ||
@@ -1638,16 +1709,61 @@ static __init void intel_clovertown_quirk(void) | |||
1638 | * But taken together it might just make sense to not enable PEBS on | 1709 | * But taken together it might just make sense to not enable PEBS on |
1639 | * these chips. | 1710 | * these chips. |
1640 | */ | 1711 | */ |
1641 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | 1712 | pr_warn("PEBS disabled due to CPU errata\n"); |
1642 | x86_pmu.pebs = 0; | 1713 | x86_pmu.pebs = 0; |
1643 | x86_pmu.pebs_constraints = NULL; | 1714 | x86_pmu.pebs_constraints = NULL; |
1644 | } | 1715 | } |
1645 | 1716 | ||
1717 | static int intel_snb_pebs_broken(int cpu) | ||
1718 | { | ||
1719 | u32 rev = UINT_MAX; /* default to broken for unknown models */ | ||
1720 | |||
1721 | switch (cpu_data(cpu).x86_model) { | ||
1722 | case 42: /* SNB */ | ||
1723 | rev = 0x28; | ||
1724 | break; | ||
1725 | |||
1726 | case 45: /* SNB-EP */ | ||
1727 | switch (cpu_data(cpu).x86_mask) { | ||
1728 | case 6: rev = 0x618; break; | ||
1729 | case 7: rev = 0x70c; break; | ||
1730 | } | ||
1731 | } | ||
1732 | |||
1733 | return (cpu_data(cpu).microcode < rev); | ||
1734 | } | ||
1735 | |||
1736 | static void intel_snb_check_microcode(void) | ||
1737 | { | ||
1738 | int pebs_broken = 0; | ||
1739 | int cpu; | ||
1740 | |||
1741 | get_online_cpus(); | ||
1742 | for_each_online_cpu(cpu) { | ||
1743 | if ((pebs_broken = intel_snb_pebs_broken(cpu))) | ||
1744 | break; | ||
1745 | } | ||
1746 | put_online_cpus(); | ||
1747 | |||
1748 | if (pebs_broken == x86_pmu.pebs_broken) | ||
1749 | return; | ||
1750 | |||
1751 | /* | ||
1752 | * Serialized by the microcode lock.. | ||
1753 | */ | ||
1754 | if (x86_pmu.pebs_broken) { | ||
1755 | pr_info("PEBS enabled due to microcode update\n"); | ||
1756 | x86_pmu.pebs_broken = 0; | ||
1757 | } else { | ||
1758 | pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n"); | ||
1759 | x86_pmu.pebs_broken = 1; | ||
1760 | } | ||
1761 | } | ||
1762 | |||
1646 | static __init void intel_sandybridge_quirk(void) | 1763 | static __init void intel_sandybridge_quirk(void) |
1647 | { | 1764 | { |
1648 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | 1765 | x86_pmu.check_microcode = intel_snb_check_microcode; |
1649 | x86_pmu.pebs = 0; | 1766 | intel_snb_check_microcode(); |
1650 | x86_pmu.pebs_constraints = NULL; | ||
1651 | } | 1767 | } |
1652 | 1768 | ||
1653 | static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { | 1769 | static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { |
@@ -1667,8 +1783,8 @@ static __init void intel_arch_events_quirk(void) | |||
1667 | /* disable event that reported as not presend by cpuid */ | 1783 | /* disable event that reported as not presend by cpuid */ |
1668 | for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { | 1784 | for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { |
1669 | intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; | 1785 | intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; |
1670 | printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n", | 1786 | pr_warn("CPUID marked event: \'%s\' unavailable\n", |
1671 | intel_arch_events_map[bit].name); | 1787 | intel_arch_events_map[bit].name); |
1672 | } | 1788 | } |
1673 | } | 1789 | } |
1674 | 1790 | ||
@@ -1687,7 +1803,7 @@ static __init void intel_nehalem_quirk(void) | |||
1687 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; | 1803 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; |
1688 | ebx.split.no_branch_misses_retired = 0; | 1804 | ebx.split.no_branch_misses_retired = 0; |
1689 | x86_pmu.events_maskl = ebx.full; | 1805 | x86_pmu.events_maskl = ebx.full; |
1690 | printk(KERN_INFO "CPU erratum AAJ80 worked around\n"); | 1806 | pr_info("CPU erratum AAJ80 worked around\n"); |
1691 | } | 1807 | } |
1692 | } | 1808 | } |
1693 | 1809 | ||
@@ -1696,6 +1812,7 @@ __init int intel_pmu_init(void) | |||
1696 | union cpuid10_edx edx; | 1812 | union cpuid10_edx edx; |
1697 | union cpuid10_eax eax; | 1813 | union cpuid10_eax eax; |
1698 | union cpuid10_ebx ebx; | 1814 | union cpuid10_ebx ebx; |
1815 | struct event_constraint *c; | ||
1699 | unsigned int unused; | 1816 | unsigned int unused; |
1700 | int version; | 1817 | int version; |
1701 | 1818 | ||
@@ -1731,6 +1848,8 @@ __init int intel_pmu_init(void) | |||
1731 | x86_pmu.events_maskl = ebx.full; | 1848 | x86_pmu.events_maskl = ebx.full; |
1732 | x86_pmu.events_mask_len = eax.split.mask_length; | 1849 | x86_pmu.events_mask_len = eax.split.mask_length; |
1733 | 1850 | ||
1851 | x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); | ||
1852 | |||
1734 | /* | 1853 | /* |
1735 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 1854 | * Quirk: v2 perfmon does not report fixed-purpose events, so |
1736 | * assume at least 3 events: | 1855 | * assume at least 3 events: |
@@ -1840,8 +1959,9 @@ __init int intel_pmu_init(void) | |||
1840 | break; | 1959 | break; |
1841 | 1960 | ||
1842 | case 42: /* SandyBridge */ | 1961 | case 42: /* SandyBridge */ |
1843 | x86_add_quirk(intel_sandybridge_quirk); | ||
1844 | case 45: /* SandyBridge, "Romely-EP" */ | 1962 | case 45: /* SandyBridge, "Romely-EP" */ |
1963 | x86_add_quirk(intel_sandybridge_quirk); | ||
1964 | case 58: /* IvyBridge */ | ||
1845 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 1965 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
1846 | sizeof(hw_cache_event_ids)); | 1966 | sizeof(hw_cache_event_ids)); |
1847 | 1967 | ||
@@ -1849,6 +1969,7 @@ __init int intel_pmu_init(void) | |||
1849 | 1969 | ||
1850 | x86_pmu.event_constraints = intel_snb_event_constraints; | 1970 | x86_pmu.event_constraints = intel_snb_event_constraints; |
1851 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; | 1971 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; |
1972 | x86_pmu.pebs_aliases = intel_pebs_aliases_snb; | ||
1852 | x86_pmu.extra_regs = intel_snb_extra_regs; | 1973 | x86_pmu.extra_regs = intel_snb_extra_regs; |
1853 | /* all extra regs are per-cpu when HT is on */ | 1974 | /* all extra regs are per-cpu when HT is on */ |
1854 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | 1975 | x86_pmu.er_flags |= ERF_HAS_RSP_1; |
@@ -1880,5 +2001,37 @@ __init int intel_pmu_init(void) | |||
1880 | } | 2001 | } |
1881 | } | 2002 | } |
1882 | 2003 | ||
2004 | if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { | ||
2005 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | ||
2006 | x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); | ||
2007 | x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; | ||
2008 | } | ||
2009 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; | ||
2010 | |||
2011 | if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { | ||
2012 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", | ||
2013 | x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED); | ||
2014 | x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; | ||
2015 | } | ||
2016 | |||
2017 | x86_pmu.intel_ctrl |= | ||
2018 | ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; | ||
2019 | |||
2020 | if (x86_pmu.event_constraints) { | ||
2021 | /* | ||
2022 | * event on fixed counter2 (REF_CYCLES) only works on this | ||
2023 | * counter, so do not extend mask to generic counters | ||
2024 | */ | ||
2025 | for_each_event_constraint(c, x86_pmu.event_constraints) { | ||
2026 | if (c->cmask != X86_RAW_EVENT_MASK | ||
2027 | || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { | ||
2028 | continue; | ||
2029 | } | ||
2030 | |||
2031 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; | ||
2032 | c->weight += x86_pmu.num_counters; | ||
2033 | } | ||
2034 | } | ||
2035 | |||
1883 | return 0; | 2036 | return 0; |
1884 | } | 2037 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 5a3edc27f6e..629ae0b7ad9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -248,7 +248,7 @@ void reserve_ds_buffers(void) | |||
248 | */ | 248 | */ |
249 | 249 | ||
250 | struct event_constraint bts_constraint = | 250 | struct event_constraint bts_constraint = |
251 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | 251 | EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0); |
252 | 252 | ||
253 | void intel_pmu_enable_bts(u64 config) | 253 | void intel_pmu_enable_bts(u64 config) |
254 | { | 254 | { |
@@ -295,7 +295,7 @@ int intel_pmu_drain_bts_buffer(void) | |||
295 | u64 to; | 295 | u64 to; |
296 | u64 flags; | 296 | u64 flags; |
297 | }; | 297 | }; |
298 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | 298 | struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; |
299 | struct bts_record *at, *top; | 299 | struct bts_record *at, *top; |
300 | struct perf_output_handle handle; | 300 | struct perf_output_handle handle; |
301 | struct perf_event_header header; | 301 | struct perf_event_header header; |
@@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { | |||
400 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ | 400 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ |
401 | INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ | 401 | INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ |
402 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ | 402 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ |
403 | INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ | 403 | INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ |
404 | INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ | ||
405 | INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ | ||
406 | INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ | ||
407 | INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ | ||
408 | INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ | ||
409 | INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ | ||
410 | INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ | ||
411 | INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ | 404 | INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ |
412 | INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ | 405 | INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ |
413 | INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ | 406 | INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ |
@@ -627,7 +620,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | |||
627 | * Should not happen, we program the threshold at 1 and do not | 620 | * Should not happen, we program the threshold at 1 and do not |
628 | * set a reset value. | 621 | * set a reset value. |
629 | */ | 622 | */ |
630 | WARN_ON_ONCE(n > 1); | 623 | WARN_ONCE(n > 1, "bad leftover pebs %d\n", n); |
631 | at += n - 1; | 624 | at += n - 1; |
632 | 625 | ||
633 | __intel_pmu_pebs_event(event, iregs, at); | 626 | __intel_pmu_pebs_event(event, iregs, at); |
@@ -658,10 +651,10 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | |||
658 | * Should not happen, we program the threshold at 1 and do not | 651 | * Should not happen, we program the threshold at 1 and do not |
659 | * set a reset value. | 652 | * set a reset value. |
660 | */ | 653 | */ |
661 | WARN_ON_ONCE(n > MAX_PEBS_EVENTS); | 654 | WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n); |
662 | 655 | ||
663 | for ( ; at < top; at++) { | 656 | for ( ; at < top; at++) { |
664 | for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { | 657 | for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) { |
665 | event = cpuc->events[bit]; | 658 | event = cpuc->events[bit]; |
666 | if (!test_bit(bit, cpuc->active_mask)) | 659 | if (!test_bit(bit, cpuc->active_mask)) |
667 | continue; | 660 | continue; |
@@ -677,7 +670,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | |||
677 | break; | 670 | break; |
678 | } | 671 | } |
679 | 672 | ||
680 | if (!event || bit >= MAX_PEBS_EVENTS) | 673 | if (!event || bit >= x86_pmu.max_pebs_events) |
681 | continue; | 674 | continue; |
682 | 675 | ||
683 | __intel_pmu_pebs_event(event, iregs, at); | 676 | __intel_pmu_pebs_event(event, iregs, at); |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c new file mode 100644 index 00000000000..19faffc6088 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
@@ -0,0 +1,1850 @@ | |||
1 | #include "perf_event_intel_uncore.h" | ||
2 | |||
3 | static struct intel_uncore_type *empty_uncore[] = { NULL, }; | ||
4 | static struct intel_uncore_type **msr_uncores = empty_uncore; | ||
5 | static struct intel_uncore_type **pci_uncores = empty_uncore; | ||
6 | /* pci bus to socket mapping */ | ||
7 | static int pcibus_to_physid[256] = { [0 ... 255] = -1, }; | ||
8 | |||
9 | static DEFINE_RAW_SPINLOCK(uncore_box_lock); | ||
10 | |||
11 | /* mask of cpus that collect uncore events */ | ||
12 | static cpumask_t uncore_cpu_mask; | ||
13 | |||
14 | /* constraint for the fixed counter */ | ||
15 | static struct event_constraint constraint_fixed = | ||
16 | EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); | ||
17 | static struct event_constraint constraint_empty = | ||
18 | EVENT_CONSTRAINT(0, 0, 0); | ||
19 | |||
20 | DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); | ||
21 | DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); | ||
22 | DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); | ||
23 | DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); | ||
24 | DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); | ||
25 | DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); | ||
26 | DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31"); | ||
27 | DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); | ||
28 | DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28"); | ||
29 | DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15"); | ||
30 | DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); | ||
31 | DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); | ||
32 | DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); | ||
33 | DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); | ||
34 | DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); | ||
35 | DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31"); | ||
36 | DEFINE_UNCORE_FORMAT_ATTR(filter_brand0, filter_brand0, "config1:0-7"); | ||
37 | DEFINE_UNCORE_FORMAT_ATTR(filter_brand1, filter_brand1, "config1:8-15"); | ||
38 | DEFINE_UNCORE_FORMAT_ATTR(filter_brand2, filter_brand2, "config1:16-23"); | ||
39 | DEFINE_UNCORE_FORMAT_ATTR(filter_brand3, filter_brand3, "config1:24-31"); | ||
40 | |||
41 | /* Sandy Bridge-EP uncore support */ | ||
42 | static struct intel_uncore_type snbep_uncore_cbox; | ||
43 | static struct intel_uncore_type snbep_uncore_pcu; | ||
44 | |||
45 | static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box) | ||
46 | { | ||
47 | struct pci_dev *pdev = box->pci_dev; | ||
48 | int box_ctl = uncore_pci_box_ctl(box); | ||
49 | u32 config; | ||
50 | |||
51 | pci_read_config_dword(pdev, box_ctl, &config); | ||
52 | config |= SNBEP_PMON_BOX_CTL_FRZ; | ||
53 | pci_write_config_dword(pdev, box_ctl, config); | ||
54 | } | ||
55 | |||
56 | static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) | ||
57 | { | ||
58 | struct pci_dev *pdev = box->pci_dev; | ||
59 | int box_ctl = uncore_pci_box_ctl(box); | ||
60 | u32 config; | ||
61 | |||
62 | pci_read_config_dword(pdev, box_ctl, &config); | ||
63 | config &= ~SNBEP_PMON_BOX_CTL_FRZ; | ||
64 | pci_write_config_dword(pdev, box_ctl, config); | ||
65 | } | ||
66 | |||
67 | static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, | ||
68 | struct perf_event *event) | ||
69 | { | ||
70 | struct pci_dev *pdev = box->pci_dev; | ||
71 | struct hw_perf_event *hwc = &event->hw; | ||
72 | |||
73 | pci_write_config_dword(pdev, hwc->config_base, hwc->config | | ||
74 | SNBEP_PMON_CTL_EN); | ||
75 | } | ||
76 | |||
77 | static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, | ||
78 | struct perf_event *event) | ||
79 | { | ||
80 | struct pci_dev *pdev = box->pci_dev; | ||
81 | struct hw_perf_event *hwc = &event->hw; | ||
82 | |||
83 | pci_write_config_dword(pdev, hwc->config_base, hwc->config); | ||
84 | } | ||
85 | |||
86 | static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, | ||
87 | struct perf_event *event) | ||
88 | { | ||
89 | struct pci_dev *pdev = box->pci_dev; | ||
90 | struct hw_perf_event *hwc = &event->hw; | ||
91 | u64 count; | ||
92 | |||
93 | pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); | ||
94 | pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); | ||
95 | return count; | ||
96 | } | ||
97 | |||
98 | static void snbep_uncore_pci_init_box(struct intel_uncore_box *box) | ||
99 | { | ||
100 | struct pci_dev *pdev = box->pci_dev; | ||
101 | pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, | ||
102 | SNBEP_PMON_BOX_CTL_INT); | ||
103 | } | ||
104 | |||
105 | static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) | ||
106 | { | ||
107 | u64 config; | ||
108 | unsigned msr; | ||
109 | |||
110 | msr = uncore_msr_box_ctl(box); | ||
111 | if (msr) { | ||
112 | rdmsrl(msr, config); | ||
113 | config |= SNBEP_PMON_BOX_CTL_FRZ; | ||
114 | wrmsrl(msr, config); | ||
115 | return; | ||
116 | } | ||
117 | } | ||
118 | |||
119 | static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box) | ||
120 | { | ||
121 | u64 config; | ||
122 | unsigned msr; | ||
123 | |||
124 | msr = uncore_msr_box_ctl(box); | ||
125 | if (msr) { | ||
126 | rdmsrl(msr, config); | ||
127 | config &= ~SNBEP_PMON_BOX_CTL_FRZ; | ||
128 | wrmsrl(msr, config); | ||
129 | return; | ||
130 | } | ||
131 | } | ||
132 | |||
133 | static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, | ||
134 | struct perf_event *event) | ||
135 | { | ||
136 | struct hw_perf_event *hwc = &event->hw; | ||
137 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; | ||
138 | |||
139 | if (reg1->idx != EXTRA_REG_NONE) | ||
140 | wrmsrl(reg1->reg, reg1->config); | ||
141 | |||
142 | wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); | ||
143 | } | ||
144 | |||
145 | static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box, | ||
146 | struct perf_event *event) | ||
147 | { | ||
148 | struct hw_perf_event *hwc = &event->hw; | ||
149 | |||
150 | wrmsrl(hwc->config_base, hwc->config); | ||
151 | } | ||
152 | |||
153 | static u64 snbep_uncore_msr_read_counter(struct intel_uncore_box *box, | ||
154 | struct perf_event *event) | ||
155 | { | ||
156 | struct hw_perf_event *hwc = &event->hw; | ||
157 | u64 count; | ||
158 | |||
159 | rdmsrl(hwc->event_base, count); | ||
160 | return count; | ||
161 | } | ||
162 | |||
163 | static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) | ||
164 | { | ||
165 | unsigned msr = uncore_msr_box_ctl(box); | ||
166 | if (msr) | ||
167 | wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT); | ||
168 | } | ||
169 | |||
170 | static struct event_constraint * | ||
171 | snbep_uncore_get_constraint(struct intel_uncore_box *box, | ||
172 | struct perf_event *event) | ||
173 | { | ||
174 | struct intel_uncore_extra_reg *er; | ||
175 | struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; | ||
176 | unsigned long flags; | ||
177 | bool ok = false; | ||
178 | |||
179 | if (reg1->idx == EXTRA_REG_NONE || (box->phys_id >= 0 && reg1->alloc)) | ||
180 | return NULL; | ||
181 | |||
182 | er = &box->shared_regs[reg1->idx]; | ||
183 | raw_spin_lock_irqsave(&er->lock, flags); | ||
184 | if (!atomic_read(&er->ref) || er->config1 == reg1->config) { | ||
185 | atomic_inc(&er->ref); | ||
186 | er->config1 = reg1->config; | ||
187 | ok = true; | ||
188 | } | ||
189 | raw_spin_unlock_irqrestore(&er->lock, flags); | ||
190 | |||
191 | if (ok) { | ||
192 | if (box->phys_id >= 0) | ||
193 | reg1->alloc = 1; | ||
194 | return NULL; | ||
195 | } | ||
196 | return &constraint_empty; | ||
197 | } | ||
198 | |||
199 | static void snbep_uncore_put_constraint(struct intel_uncore_box *box, | ||
200 | struct perf_event *event) | ||
201 | { | ||
202 | struct intel_uncore_extra_reg *er; | ||
203 | struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; | ||
204 | |||
205 | if (box->phys_id < 0 || !reg1->alloc) | ||
206 | return; | ||
207 | |||
208 | er = &box->shared_regs[reg1->idx]; | ||
209 | atomic_dec(&er->ref); | ||
210 | reg1->alloc = 0; | ||
211 | } | ||
212 | |||
213 | static int snbep_uncore_hw_config(struct intel_uncore_box *box, | ||
214 | struct perf_event *event) | ||
215 | { | ||
216 | struct hw_perf_event *hwc = &event->hw; | ||
217 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; | ||
218 | |||
219 | if (box->pmu->type == &snbep_uncore_cbox) { | ||
220 | reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + | ||
221 | SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; | ||
222 | reg1->config = event->attr.config1 & | ||
223 | SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK; | ||
224 | } else if (box->pmu->type == &snbep_uncore_pcu) { | ||
225 | reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; | ||
226 | reg1->config = event->attr.config1 & | ||
227 | SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK; | ||
228 | } else { | ||
229 | return 0; | ||
230 | } | ||
231 | reg1->idx = 0; | ||
232 | return 0; | ||
233 | } | ||
234 | |||
235 | static struct attribute *snbep_uncore_formats_attr[] = { | ||
236 | &format_attr_event.attr, | ||
237 | &format_attr_umask.attr, | ||
238 | &format_attr_edge.attr, | ||
239 | &format_attr_inv.attr, | ||
240 | &format_attr_thresh8.attr, | ||
241 | NULL, | ||
242 | }; | ||
243 | |||
244 | static struct attribute *snbep_uncore_ubox_formats_attr[] = { | ||
245 | &format_attr_event.attr, | ||
246 | &format_attr_umask.attr, | ||
247 | &format_attr_edge.attr, | ||
248 | &format_attr_inv.attr, | ||
249 | &format_attr_thresh5.attr, | ||
250 | NULL, | ||
251 | }; | ||
252 | |||
253 | static struct attribute *snbep_uncore_cbox_formats_attr[] = { | ||
254 | &format_attr_event.attr, | ||
255 | &format_attr_umask.attr, | ||
256 | &format_attr_edge.attr, | ||
257 | &format_attr_tid_en.attr, | ||
258 | &format_attr_inv.attr, | ||
259 | &format_attr_thresh8.attr, | ||
260 | &format_attr_filter_tid.attr, | ||
261 | &format_attr_filter_nid.attr, | ||
262 | &format_attr_filter_state.attr, | ||
263 | &format_attr_filter_opc.attr, | ||
264 | NULL, | ||
265 | }; | ||
266 | |||
267 | static struct attribute *snbep_uncore_pcu_formats_attr[] = { | ||
268 | &format_attr_event.attr, | ||
269 | &format_attr_occ_sel.attr, | ||
270 | &format_attr_edge.attr, | ||
271 | &format_attr_inv.attr, | ||
272 | &format_attr_thresh5.attr, | ||
273 | &format_attr_occ_invert.attr, | ||
274 | &format_attr_occ_edge.attr, | ||
275 | &format_attr_filter_brand0.attr, | ||
276 | &format_attr_filter_brand1.attr, | ||
277 | &format_attr_filter_brand2.attr, | ||
278 | &format_attr_filter_brand3.attr, | ||
279 | NULL, | ||
280 | }; | ||
281 | |||
282 | static struct uncore_event_desc snbep_uncore_imc_events[] = { | ||
283 | INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), | ||
284 | INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"), | ||
285 | INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"), | ||
286 | { /* end: all zeroes */ }, | ||
287 | }; | ||
288 | |||
289 | static struct uncore_event_desc snbep_uncore_qpi_events[] = { | ||
290 | INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"), | ||
291 | INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"), | ||
292 | INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x02,umask=0x08"), | ||
293 | INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x03,umask=0x04"), | ||
294 | { /* end: all zeroes */ }, | ||
295 | }; | ||
296 | |||
297 | static struct attribute_group snbep_uncore_format_group = { | ||
298 | .name = "format", | ||
299 | .attrs = snbep_uncore_formats_attr, | ||
300 | }; | ||
301 | |||
302 | static struct attribute_group snbep_uncore_ubox_format_group = { | ||
303 | .name = "format", | ||
304 | .attrs = snbep_uncore_ubox_formats_attr, | ||
305 | }; | ||
306 | |||
307 | static struct attribute_group snbep_uncore_cbox_format_group = { | ||
308 | .name = "format", | ||
309 | .attrs = snbep_uncore_cbox_formats_attr, | ||
310 | }; | ||
311 | |||
312 | static struct attribute_group snbep_uncore_pcu_format_group = { | ||
313 | .name = "format", | ||
314 | .attrs = snbep_uncore_pcu_formats_attr, | ||
315 | }; | ||
316 | |||
317 | static struct intel_uncore_ops snbep_uncore_msr_ops = { | ||
318 | .init_box = snbep_uncore_msr_init_box, | ||
319 | .disable_box = snbep_uncore_msr_disable_box, | ||
320 | .enable_box = snbep_uncore_msr_enable_box, | ||
321 | .disable_event = snbep_uncore_msr_disable_event, | ||
322 | .enable_event = snbep_uncore_msr_enable_event, | ||
323 | .read_counter = snbep_uncore_msr_read_counter, | ||
324 | .get_constraint = snbep_uncore_get_constraint, | ||
325 | .put_constraint = snbep_uncore_put_constraint, | ||
326 | .hw_config = snbep_uncore_hw_config, | ||
327 | }; | ||
328 | |||
329 | static struct intel_uncore_ops snbep_uncore_pci_ops = { | ||
330 | .init_box = snbep_uncore_pci_init_box, | ||
331 | .disable_box = snbep_uncore_pci_disable_box, | ||
332 | .enable_box = snbep_uncore_pci_enable_box, | ||
333 | .disable_event = snbep_uncore_pci_disable_event, | ||
334 | .enable_event = snbep_uncore_pci_enable_event, | ||
335 | .read_counter = snbep_uncore_pci_read_counter, | ||
336 | }; | ||
337 | |||
338 | static struct event_constraint snbep_uncore_cbox_constraints[] = { | ||
339 | UNCORE_EVENT_CONSTRAINT(0x01, 0x1), | ||
340 | UNCORE_EVENT_CONSTRAINT(0x02, 0x3), | ||
341 | UNCORE_EVENT_CONSTRAINT(0x04, 0x3), | ||
342 | UNCORE_EVENT_CONSTRAINT(0x05, 0x3), | ||
343 | UNCORE_EVENT_CONSTRAINT(0x07, 0x3), | ||
344 | UNCORE_EVENT_CONSTRAINT(0x11, 0x1), | ||
345 | UNCORE_EVENT_CONSTRAINT(0x12, 0x3), | ||
346 | UNCORE_EVENT_CONSTRAINT(0x13, 0x3), | ||
347 | UNCORE_EVENT_CONSTRAINT(0x1b, 0xc), | ||
348 | UNCORE_EVENT_CONSTRAINT(0x1c, 0xc), | ||
349 | UNCORE_EVENT_CONSTRAINT(0x1d, 0xc), | ||
350 | UNCORE_EVENT_CONSTRAINT(0x1e, 0xc), | ||
351 | EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff), | ||
352 | UNCORE_EVENT_CONSTRAINT(0x21, 0x3), | ||
353 | UNCORE_EVENT_CONSTRAINT(0x23, 0x3), | ||
354 | UNCORE_EVENT_CONSTRAINT(0x31, 0x3), | ||
355 | UNCORE_EVENT_CONSTRAINT(0x32, 0x3), | ||
356 | UNCORE_EVENT_CONSTRAINT(0x33, 0x3), | ||
357 | UNCORE_EVENT_CONSTRAINT(0x34, 0x3), | ||
358 | UNCORE_EVENT_CONSTRAINT(0x35, 0x3), | ||
359 | UNCORE_EVENT_CONSTRAINT(0x36, 0x1), | ||
360 | UNCORE_EVENT_CONSTRAINT(0x37, 0x3), | ||
361 | UNCORE_EVENT_CONSTRAINT(0x38, 0x3), | ||
362 | UNCORE_EVENT_CONSTRAINT(0x39, 0x3), | ||
363 | UNCORE_EVENT_CONSTRAINT(0x3b, 0x1), | ||
364 | EVENT_CONSTRAINT_END | ||
365 | }; | ||
366 | |||
367 | static struct event_constraint snbep_uncore_r2pcie_constraints[] = { | ||
368 | UNCORE_EVENT_CONSTRAINT(0x10, 0x3), | ||
369 | UNCORE_EVENT_CONSTRAINT(0x11, 0x3), | ||
370 | UNCORE_EVENT_CONSTRAINT(0x12, 0x1), | ||
371 | UNCORE_EVENT_CONSTRAINT(0x23, 0x3), | ||
372 | UNCORE_EVENT_CONSTRAINT(0x24, 0x3), | ||
373 | UNCORE_EVENT_CONSTRAINT(0x25, 0x3), | ||
374 | UNCORE_EVENT_CONSTRAINT(0x26, 0x3), | ||
375 | UNCORE_EVENT_CONSTRAINT(0x32, 0x3), | ||
376 | UNCORE_EVENT_CONSTRAINT(0x33, 0x3), | ||
377 | UNCORE_EVENT_CONSTRAINT(0x34, 0x3), | ||
378 | EVENT_CONSTRAINT_END | ||
379 | }; | ||
380 | |||
381 | static struct event_constraint snbep_uncore_r3qpi_constraints[] = { | ||
382 | UNCORE_EVENT_CONSTRAINT(0x10, 0x3), | ||
383 | UNCORE_EVENT_CONSTRAINT(0x11, 0x3), | ||
384 | UNCORE_EVENT_CONSTRAINT(0x12, 0x3), | ||
385 | UNCORE_EVENT_CONSTRAINT(0x13, 0x1), | ||
386 | UNCORE_EVENT_CONSTRAINT(0x20, 0x3), | ||
387 | UNCORE_EVENT_CONSTRAINT(0x21, 0x3), | ||
388 | UNCORE_EVENT_CONSTRAINT(0x22, 0x3), | ||
389 | UNCORE_EVENT_CONSTRAINT(0x23, 0x3), | ||
390 | UNCORE_EVENT_CONSTRAINT(0x24, 0x3), | ||
391 | UNCORE_EVENT_CONSTRAINT(0x25, 0x3), | ||
392 | UNCORE_EVENT_CONSTRAINT(0x26, 0x3), | ||
393 | UNCORE_EVENT_CONSTRAINT(0x30, 0x3), | ||
394 | UNCORE_EVENT_CONSTRAINT(0x31, 0x3), | ||
395 | UNCORE_EVENT_CONSTRAINT(0x32, 0x3), | ||
396 | UNCORE_EVENT_CONSTRAINT(0x33, 0x3), | ||
397 | UNCORE_EVENT_CONSTRAINT(0x34, 0x3), | ||
398 | UNCORE_EVENT_CONSTRAINT(0x36, 0x3), | ||
399 | UNCORE_EVENT_CONSTRAINT(0x37, 0x3), | ||
400 | EVENT_CONSTRAINT_END | ||
401 | }; | ||
402 | |||
403 | static struct intel_uncore_type snbep_uncore_ubox = { | ||
404 | .name = "ubox", | ||
405 | .num_counters = 2, | ||
406 | .num_boxes = 1, | ||
407 | .perf_ctr_bits = 44, | ||
408 | .fixed_ctr_bits = 48, | ||
409 | .perf_ctr = SNBEP_U_MSR_PMON_CTR0, | ||
410 | .event_ctl = SNBEP_U_MSR_PMON_CTL0, | ||
411 | .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK, | ||
412 | .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR, | ||
413 | .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL, | ||
414 | .ops = &snbep_uncore_msr_ops, | ||
415 | .format_group = &snbep_uncore_ubox_format_group, | ||
416 | }; | ||
417 | |||
418 | static struct intel_uncore_type snbep_uncore_cbox = { | ||
419 | .name = "cbox", | ||
420 | .num_counters = 4, | ||
421 | .num_boxes = 8, | ||
422 | .perf_ctr_bits = 44, | ||
423 | .event_ctl = SNBEP_C0_MSR_PMON_CTL0, | ||
424 | .perf_ctr = SNBEP_C0_MSR_PMON_CTR0, | ||
425 | .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK, | ||
426 | .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL, | ||
427 | .msr_offset = SNBEP_CBO_MSR_OFFSET, | ||
428 | .num_shared_regs = 1, | ||
429 | .constraints = snbep_uncore_cbox_constraints, | ||
430 | .ops = &snbep_uncore_msr_ops, | ||
431 | .format_group = &snbep_uncore_cbox_format_group, | ||
432 | }; | ||
433 | |||
434 | static struct intel_uncore_type snbep_uncore_pcu = { | ||
435 | .name = "pcu", | ||
436 | .num_counters = 4, | ||
437 | .num_boxes = 1, | ||
438 | .perf_ctr_bits = 48, | ||
439 | .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0, | ||
440 | .event_ctl = SNBEP_PCU_MSR_PMON_CTL0, | ||
441 | .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, | ||
442 | .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, | ||
443 | .num_shared_regs = 1, | ||
444 | .ops = &snbep_uncore_msr_ops, | ||
445 | .format_group = &snbep_uncore_pcu_format_group, | ||
446 | }; | ||
447 | |||
448 | static struct intel_uncore_type *snbep_msr_uncores[] = { | ||
449 | &snbep_uncore_ubox, | ||
450 | &snbep_uncore_cbox, | ||
451 | &snbep_uncore_pcu, | ||
452 | NULL, | ||
453 | }; | ||
454 | |||
455 | #define SNBEP_UNCORE_PCI_COMMON_INIT() \ | ||
456 | .perf_ctr = SNBEP_PCI_PMON_CTR0, \ | ||
457 | .event_ctl = SNBEP_PCI_PMON_CTL0, \ | ||
458 | .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \ | ||
459 | .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \ | ||
460 | .ops = &snbep_uncore_pci_ops, \ | ||
461 | .format_group = &snbep_uncore_format_group | ||
462 | |||
463 | static struct intel_uncore_type snbep_uncore_ha = { | ||
464 | .name = "ha", | ||
465 | .num_counters = 4, | ||
466 | .num_boxes = 1, | ||
467 | .perf_ctr_bits = 48, | ||
468 | SNBEP_UNCORE_PCI_COMMON_INIT(), | ||
469 | }; | ||
470 | |||
471 | static struct intel_uncore_type snbep_uncore_imc = { | ||
472 | .name = "imc", | ||
473 | .num_counters = 4, | ||
474 | .num_boxes = 4, | ||
475 | .perf_ctr_bits = 48, | ||
476 | .fixed_ctr_bits = 48, | ||
477 | .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, | ||
478 | .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, | ||
479 | .event_descs = snbep_uncore_imc_events, | ||
480 | SNBEP_UNCORE_PCI_COMMON_INIT(), | ||
481 | }; | ||
482 | |||
483 | static struct intel_uncore_type snbep_uncore_qpi = { | ||
484 | .name = "qpi", | ||
485 | .num_counters = 4, | ||
486 | .num_boxes = 2, | ||
487 | .perf_ctr_bits = 48, | ||
488 | .event_descs = snbep_uncore_qpi_events, | ||
489 | SNBEP_UNCORE_PCI_COMMON_INIT(), | ||
490 | }; | ||
491 | |||
492 | |||
493 | static struct intel_uncore_type snbep_uncore_r2pcie = { | ||
494 | .name = "r2pcie", | ||
495 | .num_counters = 4, | ||
496 | .num_boxes = 1, | ||
497 | .perf_ctr_bits = 44, | ||
498 | .constraints = snbep_uncore_r2pcie_constraints, | ||
499 | SNBEP_UNCORE_PCI_COMMON_INIT(), | ||
500 | }; | ||
501 | |||
502 | static struct intel_uncore_type snbep_uncore_r3qpi = { | ||
503 | .name = "r3qpi", | ||
504 | .num_counters = 3, | ||
505 | .num_boxes = 2, | ||
506 | .perf_ctr_bits = 44, | ||
507 | .constraints = snbep_uncore_r3qpi_constraints, | ||
508 | SNBEP_UNCORE_PCI_COMMON_INIT(), | ||
509 | }; | ||
510 | |||
511 | static struct intel_uncore_type *snbep_pci_uncores[] = { | ||
512 | &snbep_uncore_ha, | ||
513 | &snbep_uncore_imc, | ||
514 | &snbep_uncore_qpi, | ||
515 | &snbep_uncore_r2pcie, | ||
516 | &snbep_uncore_r3qpi, | ||
517 | NULL, | ||
518 | }; | ||
519 | |||
520 | static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = { | ||
521 | { /* Home Agent */ | ||
522 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA), | ||
523 | .driver_data = (unsigned long)&snbep_uncore_ha, | ||
524 | }, | ||
525 | { /* MC Channel 0 */ | ||
526 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0), | ||
527 | .driver_data = (unsigned long)&snbep_uncore_imc, | ||
528 | }, | ||
529 | { /* MC Channel 1 */ | ||
530 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1), | ||
531 | .driver_data = (unsigned long)&snbep_uncore_imc, | ||
532 | }, | ||
533 | { /* MC Channel 2 */ | ||
534 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2), | ||
535 | .driver_data = (unsigned long)&snbep_uncore_imc, | ||
536 | }, | ||
537 | { /* MC Channel 3 */ | ||
538 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3), | ||
539 | .driver_data = (unsigned long)&snbep_uncore_imc, | ||
540 | }, | ||
541 | { /* QPI Port 0 */ | ||
542 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0), | ||
543 | .driver_data = (unsigned long)&snbep_uncore_qpi, | ||
544 | }, | ||
545 | { /* QPI Port 1 */ | ||
546 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1), | ||
547 | .driver_data = (unsigned long)&snbep_uncore_qpi, | ||
548 | }, | ||
549 | { /* P2PCIe */ | ||
550 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE), | ||
551 | .driver_data = (unsigned long)&snbep_uncore_r2pcie, | ||
552 | }, | ||
553 | { /* R3QPI Link 0 */ | ||
554 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0), | ||
555 | .driver_data = (unsigned long)&snbep_uncore_r3qpi, | ||
556 | }, | ||
557 | { /* R3QPI Link 1 */ | ||
558 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1), | ||
559 | .driver_data = (unsigned long)&snbep_uncore_r3qpi, | ||
560 | }, | ||
561 | { /* end: all zeroes */ } | ||
562 | }; | ||
563 | |||
564 | static struct pci_driver snbep_uncore_pci_driver = { | ||
565 | .name = "snbep_uncore", | ||
566 | .id_table = snbep_uncore_pci_ids, | ||
567 | }; | ||
568 | |||
569 | /* | ||
570 | * build pci bus to socket mapping | ||
571 | */ | ||
572 | static void snbep_pci2phy_map_init(void) | ||
573 | { | ||
574 | struct pci_dev *ubox_dev = NULL; | ||
575 | int i, bus, nodeid; | ||
576 | u32 config; | ||
577 | |||
578 | while (1) { | ||
579 | /* find the UBOX device */ | ||
580 | ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, | ||
581 | PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX, | ||
582 | ubox_dev); | ||
583 | if (!ubox_dev) | ||
584 | break; | ||
585 | bus = ubox_dev->bus->number; | ||
586 | /* get the Node ID of the local register */ | ||
587 | pci_read_config_dword(ubox_dev, 0x40, &config); | ||
588 | nodeid = config; | ||
589 | /* get the Node ID mapping */ | ||
590 | pci_read_config_dword(ubox_dev, 0x54, &config); | ||
591 | /* | ||
592 | * every three bits in the Node ID mapping register maps | ||
593 | * to a particular node. | ||
594 | */ | ||
595 | for (i = 0; i < 8; i++) { | ||
596 | if (nodeid == ((config >> (3 * i)) & 0x7)) { | ||
597 | pcibus_to_physid[bus] = i; | ||
598 | break; | ||
599 | } | ||
600 | } | ||
601 | }; | ||
602 | return; | ||
603 | } | ||
604 | /* end of Sandy Bridge-EP uncore support */ | ||
605 | |||
606 | |||
607 | /* Sandy Bridge uncore support */ | ||
608 | static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, | ||
609 | struct perf_event *event) | ||
610 | { | ||
611 | struct hw_perf_event *hwc = &event->hw; | ||
612 | |||
613 | if (hwc->idx < UNCORE_PMC_IDX_FIXED) | ||
614 | wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); | ||
615 | else | ||
616 | wrmsrl(hwc->config_base, SNB_UNC_CTL_EN); | ||
617 | } | ||
618 | |||
619 | static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, | ||
620 | struct perf_event *event) | ||
621 | { | ||
622 | wrmsrl(event->hw.config_base, 0); | ||
623 | } | ||
624 | |||
625 | static u64 snb_uncore_msr_read_counter(struct intel_uncore_box *box, | ||
626 | struct perf_event *event) | ||
627 | { | ||
628 | u64 count; | ||
629 | rdmsrl(event->hw.event_base, count); | ||
630 | return count; | ||
631 | } | ||
632 | |||
633 | static void snb_uncore_msr_init_box(struct intel_uncore_box *box) | ||
634 | { | ||
635 | if (box->pmu->pmu_idx == 0) { | ||
636 | wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, | ||
637 | SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); | ||
638 | } | ||
639 | } | ||
640 | |||
641 | static struct attribute *snb_uncore_formats_attr[] = { | ||
642 | &format_attr_event.attr, | ||
643 | &format_attr_umask.attr, | ||
644 | &format_attr_edge.attr, | ||
645 | &format_attr_inv.attr, | ||
646 | &format_attr_cmask5.attr, | ||
647 | NULL, | ||
648 | }; | ||
649 | |||
650 | static struct attribute_group snb_uncore_format_group = { | ||
651 | .name = "format", | ||
652 | .attrs = snb_uncore_formats_attr, | ||
653 | }; | ||
654 | |||
655 | static struct intel_uncore_ops snb_uncore_msr_ops = { | ||
656 | .init_box = snb_uncore_msr_init_box, | ||
657 | .disable_event = snb_uncore_msr_disable_event, | ||
658 | .enable_event = snb_uncore_msr_enable_event, | ||
659 | .read_counter = snb_uncore_msr_read_counter, | ||
660 | }; | ||
661 | |||
662 | static struct event_constraint snb_uncore_cbox_constraints[] = { | ||
663 | UNCORE_EVENT_CONSTRAINT(0x80, 0x1), | ||
664 | UNCORE_EVENT_CONSTRAINT(0x83, 0x1), | ||
665 | EVENT_CONSTRAINT_END | ||
666 | }; | ||
667 | |||
668 | static struct intel_uncore_type snb_uncore_cbox = { | ||
669 | .name = "cbox", | ||
670 | .num_counters = 2, | ||
671 | .num_boxes = 4, | ||
672 | .perf_ctr_bits = 44, | ||
673 | .fixed_ctr_bits = 48, | ||
674 | .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, | ||
675 | .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, | ||
676 | .fixed_ctr = SNB_UNC_FIXED_CTR, | ||
677 | .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL, | ||
678 | .single_fixed = 1, | ||
679 | .event_mask = SNB_UNC_RAW_EVENT_MASK, | ||
680 | .msr_offset = SNB_UNC_CBO_MSR_OFFSET, | ||
681 | .constraints = snb_uncore_cbox_constraints, | ||
682 | .ops = &snb_uncore_msr_ops, | ||
683 | .format_group = &snb_uncore_format_group, | ||
684 | }; | ||
685 | |||
686 | static struct intel_uncore_type *snb_msr_uncores[] = { | ||
687 | &snb_uncore_cbox, | ||
688 | NULL, | ||
689 | }; | ||
690 | /* end of Sandy Bridge uncore support */ | ||
691 | |||
692 | /* Nehalem uncore support */ | ||
693 | static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box) | ||
694 | { | ||
695 | wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0); | ||
696 | } | ||
697 | |||
698 | static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box) | ||
699 | { | ||
700 | wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, | ||
701 | NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); | ||
702 | } | ||
703 | |||
704 | static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, | ||
705 | struct perf_event *event) | ||
706 | { | ||
707 | struct hw_perf_event *hwc = &event->hw; | ||
708 | |||
709 | if (hwc->idx < UNCORE_PMC_IDX_FIXED) | ||
710 | wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); | ||
711 | else | ||
712 | wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN); | ||
713 | } | ||
714 | |||
715 | static struct attribute *nhm_uncore_formats_attr[] = { | ||
716 | &format_attr_event.attr, | ||
717 | &format_attr_umask.attr, | ||
718 | &format_attr_edge.attr, | ||
719 | &format_attr_inv.attr, | ||
720 | &format_attr_cmask8.attr, | ||
721 | NULL, | ||
722 | }; | ||
723 | |||
724 | static struct attribute_group nhm_uncore_format_group = { | ||
725 | .name = "format", | ||
726 | .attrs = nhm_uncore_formats_attr, | ||
727 | }; | ||
728 | |||
729 | static struct uncore_event_desc nhm_uncore_events[] = { | ||
730 | INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), | ||
731 | INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"), | ||
732 | INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"), | ||
733 | INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"), | ||
734 | INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes, "event=0x20,umask=0x02"), | ||
735 | INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads, "event=0x20,umask=0x04"), | ||
736 | INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"), | ||
737 | INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads, "event=0x20,umask=0x10"), | ||
738 | INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes, "event=0x20,umask=0x20"), | ||
739 | { /* end: all zeroes */ }, | ||
740 | }; | ||
741 | |||
742 | static struct intel_uncore_ops nhm_uncore_msr_ops = { | ||
743 | .disable_box = nhm_uncore_msr_disable_box, | ||
744 | .enable_box = nhm_uncore_msr_enable_box, | ||
745 | .disable_event = snb_uncore_msr_disable_event, | ||
746 | .enable_event = nhm_uncore_msr_enable_event, | ||
747 | .read_counter = snb_uncore_msr_read_counter, | ||
748 | }; | ||
749 | |||
750 | static struct intel_uncore_type nhm_uncore = { | ||
751 | .name = "", | ||
752 | .num_counters = 8, | ||
753 | .num_boxes = 1, | ||
754 | .perf_ctr_bits = 48, | ||
755 | .fixed_ctr_bits = 48, | ||
756 | .event_ctl = NHM_UNC_PERFEVTSEL0, | ||
757 | .perf_ctr = NHM_UNC_UNCORE_PMC0, | ||
758 | .fixed_ctr = NHM_UNC_FIXED_CTR, | ||
759 | .fixed_ctl = NHM_UNC_FIXED_CTR_CTRL, | ||
760 | .event_mask = NHM_UNC_RAW_EVENT_MASK, | ||
761 | .event_descs = nhm_uncore_events, | ||
762 | .ops = &nhm_uncore_msr_ops, | ||
763 | .format_group = &nhm_uncore_format_group, | ||
764 | }; | ||
765 | |||
766 | static struct intel_uncore_type *nhm_msr_uncores[] = { | ||
767 | &nhm_uncore, | ||
768 | NULL, | ||
769 | }; | ||
770 | /* end of Nehalem uncore support */ | ||
771 | |||
772 | static void uncore_assign_hw_event(struct intel_uncore_box *box, | ||
773 | struct perf_event *event, int idx) | ||
774 | { | ||
775 | struct hw_perf_event *hwc = &event->hw; | ||
776 | |||
777 | hwc->idx = idx; | ||
778 | hwc->last_tag = ++box->tags[idx]; | ||
779 | |||
780 | if (hwc->idx == UNCORE_PMC_IDX_FIXED) { | ||
781 | hwc->event_base = uncore_fixed_ctr(box); | ||
782 | hwc->config_base = uncore_fixed_ctl(box); | ||
783 | return; | ||
784 | } | ||
785 | |||
786 | hwc->config_base = uncore_event_ctl(box, hwc->idx); | ||
787 | hwc->event_base = uncore_perf_ctr(box, hwc->idx); | ||
788 | } | ||
789 | |||
790 | static void uncore_perf_event_update(struct intel_uncore_box *box, | ||
791 | struct perf_event *event) | ||
792 | { | ||
793 | u64 prev_count, new_count, delta; | ||
794 | int shift; | ||
795 | |||
796 | if (event->hw.idx >= UNCORE_PMC_IDX_FIXED) | ||
797 | shift = 64 - uncore_fixed_ctr_bits(box); | ||
798 | else | ||
799 | shift = 64 - uncore_perf_ctr_bits(box); | ||
800 | |||
801 | /* the hrtimer might modify the previous event value */ | ||
802 | again: | ||
803 | prev_count = local64_read(&event->hw.prev_count); | ||
804 | new_count = uncore_read_counter(box, event); | ||
805 | if (local64_xchg(&event->hw.prev_count, new_count) != prev_count) | ||
806 | goto again; | ||
807 | |||
808 | delta = (new_count << shift) - (prev_count << shift); | ||
809 | delta >>= shift; | ||
810 | |||
811 | local64_add(delta, &event->count); | ||
812 | } | ||
813 | |||
814 | /* | ||
815 | * The overflow interrupt is unavailable for SandyBridge-EP, is broken | ||
816 | * for SandyBridge. So we use hrtimer to periodically poll the counter | ||
817 | * to avoid overflow. | ||
818 | */ | ||
819 | static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) | ||
820 | { | ||
821 | struct intel_uncore_box *box; | ||
822 | unsigned long flags; | ||
823 | int bit; | ||
824 | |||
825 | box = container_of(hrtimer, struct intel_uncore_box, hrtimer); | ||
826 | if (!box->n_active || box->cpu != smp_processor_id()) | ||
827 | return HRTIMER_NORESTART; | ||
828 | /* | ||
829 | * disable local interrupt to prevent uncore_pmu_event_start/stop | ||
830 | * to interrupt the update process | ||
831 | */ | ||
832 | local_irq_save(flags); | ||
833 | |||
834 | for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) | ||
835 | uncore_perf_event_update(box, box->events[bit]); | ||
836 | |||
837 | local_irq_restore(flags); | ||
838 | |||
839 | hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL)); | ||
840 | return HRTIMER_RESTART; | ||
841 | } | ||
842 | |||
843 | static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) | ||
844 | { | ||
845 | __hrtimer_start_range_ns(&box->hrtimer, | ||
846 | ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0, | ||
847 | HRTIMER_MODE_REL_PINNED, 0); | ||
848 | } | ||
849 | |||
850 | static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) | ||
851 | { | ||
852 | hrtimer_cancel(&box->hrtimer); | ||
853 | } | ||
854 | |||
855 | static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) | ||
856 | { | ||
857 | hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
858 | box->hrtimer.function = uncore_pmu_hrtimer; | ||
859 | } | ||
860 | |||
861 | struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, | ||
862 | int cpu) | ||
863 | { | ||
864 | struct intel_uncore_box *box; | ||
865 | int i, size; | ||
866 | |||
867 | size = sizeof(*box) + type->num_shared_regs * | ||
868 | sizeof(struct intel_uncore_extra_reg); | ||
869 | |||
870 | box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu)); | ||
871 | if (!box) | ||
872 | return NULL; | ||
873 | |||
874 | for (i = 0; i < type->num_shared_regs; i++) | ||
875 | raw_spin_lock_init(&box->shared_regs[i].lock); | ||
876 | |||
877 | uncore_pmu_init_hrtimer(box); | ||
878 | atomic_set(&box->refcnt, 1); | ||
879 | box->cpu = -1; | ||
880 | box->phys_id = -1; | ||
881 | |||
882 | return box; | ||
883 | } | ||
884 | |||
885 | static struct intel_uncore_box * | ||
886 | uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) | ||
887 | { | ||
888 | static struct intel_uncore_box *box; | ||
889 | |||
890 | box = *per_cpu_ptr(pmu->box, cpu); | ||
891 | if (box) | ||
892 | return box; | ||
893 | |||
894 | raw_spin_lock(&uncore_box_lock); | ||
895 | list_for_each_entry(box, &pmu->box_list, list) { | ||
896 | if (box->phys_id == topology_physical_package_id(cpu)) { | ||
897 | atomic_inc(&box->refcnt); | ||
898 | *per_cpu_ptr(pmu->box, cpu) = box; | ||
899 | break; | ||
900 | } | ||
901 | } | ||
902 | raw_spin_unlock(&uncore_box_lock); | ||
903 | |||
904 | return *per_cpu_ptr(pmu->box, cpu); | ||
905 | } | ||
906 | |||
907 | static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) | ||
908 | { | ||
909 | return container_of(event->pmu, struct intel_uncore_pmu, pmu); | ||
910 | } | ||
911 | |||
912 | static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) | ||
913 | { | ||
914 | /* | ||
915 | * perf core schedules event on the basis of cpu, uncore events are | ||
916 | * collected by one of the cpus inside a physical package. | ||
917 | */ | ||
918 | return uncore_pmu_to_box(uncore_event_to_pmu(event), | ||
919 | smp_processor_id()); | ||
920 | } | ||
921 | |||
922 | static int uncore_collect_events(struct intel_uncore_box *box, | ||
923 | struct perf_event *leader, bool dogrp) | ||
924 | { | ||
925 | struct perf_event *event; | ||
926 | int n, max_count; | ||
927 | |||
928 | max_count = box->pmu->type->num_counters; | ||
929 | if (box->pmu->type->fixed_ctl) | ||
930 | max_count++; | ||
931 | |||
932 | if (box->n_events >= max_count) | ||
933 | return -EINVAL; | ||
934 | |||
935 | n = box->n_events; | ||
936 | box->event_list[n] = leader; | ||
937 | n++; | ||
938 | if (!dogrp) | ||
939 | return n; | ||
940 | |||
941 | list_for_each_entry(event, &leader->sibling_list, group_entry) { | ||
942 | if (event->state <= PERF_EVENT_STATE_OFF) | ||
943 | continue; | ||
944 | |||
945 | if (n >= max_count) | ||
946 | return -EINVAL; | ||
947 | |||
948 | box->event_list[n] = event; | ||
949 | n++; | ||
950 | } | ||
951 | return n; | ||
952 | } | ||
953 | |||
954 | static struct event_constraint * | ||
955 | uncore_get_event_constraint(struct intel_uncore_box *box, | ||
956 | struct perf_event *event) | ||
957 | { | ||
958 | struct intel_uncore_type *type = box->pmu->type; | ||
959 | struct event_constraint *c; | ||
960 | |||
961 | if (type->ops->get_constraint) { | ||
962 | c = type->ops->get_constraint(box, event); | ||
963 | if (c) | ||
964 | return c; | ||
965 | } | ||
966 | |||
967 | if (event->hw.config == ~0ULL) | ||
968 | return &constraint_fixed; | ||
969 | |||
970 | if (type->constraints) { | ||
971 | for_each_event_constraint(c, type->constraints) { | ||
972 | if ((event->hw.config & c->cmask) == c->code) | ||
973 | return c; | ||
974 | } | ||
975 | } | ||
976 | |||
977 | return &type->unconstrainted; | ||
978 | } | ||
979 | |||
980 | static void uncore_put_event_constraint(struct intel_uncore_box *box, | ||
981 | struct perf_event *event) | ||
982 | { | ||
983 | if (box->pmu->type->ops->put_constraint) | ||
984 | box->pmu->type->ops->put_constraint(box, event); | ||
985 | } | ||
986 | |||
987 | static int uncore_assign_events(struct intel_uncore_box *box, | ||
988 | int assign[], int n) | ||
989 | { | ||
990 | unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; | ||
991 | struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX]; | ||
992 | int i, wmin, wmax, ret = 0; | ||
993 | struct hw_perf_event *hwc; | ||
994 | |||
995 | bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); | ||
996 | |||
997 | for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { | ||
998 | c = uncore_get_event_constraint(box, box->event_list[i]); | ||
999 | constraints[i] = c; | ||
1000 | wmin = min(wmin, c->weight); | ||
1001 | wmax = max(wmax, c->weight); | ||
1002 | } | ||
1003 | |||
1004 | /* fastpath, try to reuse previous register */ | ||
1005 | for (i = 0; i < n; i++) { | ||
1006 | hwc = &box->event_list[i]->hw; | ||
1007 | c = constraints[i]; | ||
1008 | |||
1009 | /* never assigned */ | ||
1010 | if (hwc->idx == -1) | ||
1011 | break; | ||
1012 | |||
1013 | /* constraint still honored */ | ||
1014 | if (!test_bit(hwc->idx, c->idxmsk)) | ||
1015 | break; | ||
1016 | |||
1017 | /* not already used */ | ||
1018 | if (test_bit(hwc->idx, used_mask)) | ||
1019 | break; | ||
1020 | |||
1021 | __set_bit(hwc->idx, used_mask); | ||
1022 | if (assign) | ||
1023 | assign[i] = hwc->idx; | ||
1024 | } | ||
1025 | /* slow path */ | ||
1026 | if (i != n) | ||
1027 | ret = perf_assign_events(constraints, n, wmin, wmax, assign); | ||
1028 | |||
1029 | if (!assign || ret) { | ||
1030 | for (i = 0; i < n; i++) | ||
1031 | uncore_put_event_constraint(box, box->event_list[i]); | ||
1032 | } | ||
1033 | return ret ? -EINVAL : 0; | ||
1034 | } | ||
1035 | |||
1036 | static void uncore_pmu_event_start(struct perf_event *event, int flags) | ||
1037 | { | ||
1038 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
1039 | int idx = event->hw.idx; | ||
1040 | |||
1041 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | ||
1042 | return; | ||
1043 | |||
1044 | if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) | ||
1045 | return; | ||
1046 | |||
1047 | event->hw.state = 0; | ||
1048 | box->events[idx] = event; | ||
1049 | box->n_active++; | ||
1050 | __set_bit(idx, box->active_mask); | ||
1051 | |||
1052 | local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); | ||
1053 | uncore_enable_event(box, event); | ||
1054 | |||
1055 | if (box->n_active == 1) { | ||
1056 | uncore_enable_box(box); | ||
1057 | uncore_pmu_start_hrtimer(box); | ||
1058 | } | ||
1059 | } | ||
1060 | |||
1061 | static void uncore_pmu_event_stop(struct perf_event *event, int flags) | ||
1062 | { | ||
1063 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
1064 | struct hw_perf_event *hwc = &event->hw; | ||
1065 | |||
1066 | if (__test_and_clear_bit(hwc->idx, box->active_mask)) { | ||
1067 | uncore_disable_event(box, event); | ||
1068 | box->n_active--; | ||
1069 | box->events[hwc->idx] = NULL; | ||
1070 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | ||
1071 | hwc->state |= PERF_HES_STOPPED; | ||
1072 | |||
1073 | if (box->n_active == 0) { | ||
1074 | uncore_disable_box(box); | ||
1075 | uncore_pmu_cancel_hrtimer(box); | ||
1076 | } | ||
1077 | } | ||
1078 | |||
1079 | if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { | ||
1080 | /* | ||
1081 | * Drain the remaining delta count out of a event | ||
1082 | * that we are disabling: | ||
1083 | */ | ||
1084 | uncore_perf_event_update(box, event); | ||
1085 | hwc->state |= PERF_HES_UPTODATE; | ||
1086 | } | ||
1087 | } | ||
1088 | |||
1089 | static int uncore_pmu_event_add(struct perf_event *event, int flags) | ||
1090 | { | ||
1091 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
1092 | struct hw_perf_event *hwc = &event->hw; | ||
1093 | int assign[UNCORE_PMC_IDX_MAX]; | ||
1094 | int i, n, ret; | ||
1095 | |||
1096 | if (!box) | ||
1097 | return -ENODEV; | ||
1098 | |||
1099 | ret = n = uncore_collect_events(box, event, false); | ||
1100 | if (ret < 0) | ||
1101 | return ret; | ||
1102 | |||
1103 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
1104 | if (!(flags & PERF_EF_START)) | ||
1105 | hwc->state |= PERF_HES_ARCH; | ||
1106 | |||
1107 | ret = uncore_assign_events(box, assign, n); | ||
1108 | if (ret) | ||
1109 | return ret; | ||
1110 | |||
1111 | /* save events moving to new counters */ | ||
1112 | for (i = 0; i < box->n_events; i++) { | ||
1113 | event = box->event_list[i]; | ||
1114 | hwc = &event->hw; | ||
1115 | |||
1116 | if (hwc->idx == assign[i] && | ||
1117 | hwc->last_tag == box->tags[assign[i]]) | ||
1118 | continue; | ||
1119 | /* | ||
1120 | * Ensure we don't accidentally enable a stopped | ||
1121 | * counter simply because we rescheduled. | ||
1122 | */ | ||
1123 | if (hwc->state & PERF_HES_STOPPED) | ||
1124 | hwc->state |= PERF_HES_ARCH; | ||
1125 | |||
1126 | uncore_pmu_event_stop(event, PERF_EF_UPDATE); | ||
1127 | } | ||
1128 | |||
1129 | /* reprogram moved events into new counters */ | ||
1130 | for (i = 0; i < n; i++) { | ||
1131 | event = box->event_list[i]; | ||
1132 | hwc = &event->hw; | ||
1133 | |||
1134 | if (hwc->idx != assign[i] || | ||
1135 | hwc->last_tag != box->tags[assign[i]]) | ||
1136 | uncore_assign_hw_event(box, event, assign[i]); | ||
1137 | else if (i < box->n_events) | ||
1138 | continue; | ||
1139 | |||
1140 | if (hwc->state & PERF_HES_ARCH) | ||
1141 | continue; | ||
1142 | |||
1143 | uncore_pmu_event_start(event, 0); | ||
1144 | } | ||
1145 | box->n_events = n; | ||
1146 | |||
1147 | return 0; | ||
1148 | } | ||
1149 | |||
1150 | static void uncore_pmu_event_del(struct perf_event *event, int flags) | ||
1151 | { | ||
1152 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
1153 | int i; | ||
1154 | |||
1155 | uncore_pmu_event_stop(event, PERF_EF_UPDATE); | ||
1156 | |||
1157 | for (i = 0; i < box->n_events; i++) { | ||
1158 | if (event == box->event_list[i]) { | ||
1159 | uncore_put_event_constraint(box, event); | ||
1160 | |||
1161 | while (++i < box->n_events) | ||
1162 | box->event_list[i - 1] = box->event_list[i]; | ||
1163 | |||
1164 | --box->n_events; | ||
1165 | break; | ||
1166 | } | ||
1167 | } | ||
1168 | |||
1169 | event->hw.idx = -1; | ||
1170 | event->hw.last_tag = ~0ULL; | ||
1171 | } | ||
1172 | |||
1173 | static void uncore_pmu_event_read(struct perf_event *event) | ||
1174 | { | ||
1175 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
1176 | uncore_perf_event_update(box, event); | ||
1177 | } | ||
1178 | |||
1179 | /* | ||
1180 | * validation ensures the group can be loaded onto the | ||
1181 | * PMU if it was the only group available. | ||
1182 | */ | ||
1183 | static int uncore_validate_group(struct intel_uncore_pmu *pmu, | ||
1184 | struct perf_event *event) | ||
1185 | { | ||
1186 | struct perf_event *leader = event->group_leader; | ||
1187 | struct intel_uncore_box *fake_box; | ||
1188 | int ret = -EINVAL, n; | ||
1189 | |||
1190 | fake_box = uncore_alloc_box(pmu->type, smp_processor_id()); | ||
1191 | if (!fake_box) | ||
1192 | return -ENOMEM; | ||
1193 | |||
1194 | fake_box->pmu = pmu; | ||
1195 | /* | ||
1196 | * the event is not yet connected with its | ||
1197 | * siblings therefore we must first collect | ||
1198 | * existing siblings, then add the new event | ||
1199 | * before we can simulate the scheduling | ||
1200 | */ | ||
1201 | n = uncore_collect_events(fake_box, leader, true); | ||
1202 | if (n < 0) | ||
1203 | goto out; | ||
1204 | |||
1205 | fake_box->n_events = n; | ||
1206 | n = uncore_collect_events(fake_box, event, false); | ||
1207 | if (n < 0) | ||
1208 | goto out; | ||
1209 | |||
1210 | fake_box->n_events = n; | ||
1211 | |||
1212 | ret = uncore_assign_events(fake_box, NULL, n); | ||
1213 | out: | ||
1214 | kfree(fake_box); | ||
1215 | return ret; | ||
1216 | } | ||
1217 | |||
1218 | int uncore_pmu_event_init(struct perf_event *event) | ||
1219 | { | ||
1220 | struct intel_uncore_pmu *pmu; | ||
1221 | struct intel_uncore_box *box; | ||
1222 | struct hw_perf_event *hwc = &event->hw; | ||
1223 | int ret; | ||
1224 | |||
1225 | if (event->attr.type != event->pmu->type) | ||
1226 | return -ENOENT; | ||
1227 | |||
1228 | pmu = uncore_event_to_pmu(event); | ||
1229 | /* no device found for this pmu */ | ||
1230 | if (pmu->func_id < 0) | ||
1231 | return -ENOENT; | ||
1232 | |||
1233 | /* | ||
1234 | * Uncore PMU does measure at all privilege level all the time. | ||
1235 | * So it doesn't make sense to specify any exclude bits. | ||
1236 | */ | ||
1237 | if (event->attr.exclude_user || event->attr.exclude_kernel || | ||
1238 | event->attr.exclude_hv || event->attr.exclude_idle) | ||
1239 | return -EINVAL; | ||
1240 | |||
1241 | /* Sampling not supported yet */ | ||
1242 | if (hwc->sample_period) | ||
1243 | return -EINVAL; | ||
1244 | |||
1245 | /* | ||
1246 | * Place all uncore events for a particular physical package | ||
1247 | * onto a single cpu | ||
1248 | */ | ||
1249 | if (event->cpu < 0) | ||
1250 | return -EINVAL; | ||
1251 | box = uncore_pmu_to_box(pmu, event->cpu); | ||
1252 | if (!box || box->cpu < 0) | ||
1253 | return -EINVAL; | ||
1254 | event->cpu = box->cpu; | ||
1255 | |||
1256 | event->hw.idx = -1; | ||
1257 | event->hw.last_tag = ~0ULL; | ||
1258 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
1259 | |||
1260 | if (event->attr.config == UNCORE_FIXED_EVENT) { | ||
1261 | /* no fixed counter */ | ||
1262 | if (!pmu->type->fixed_ctl) | ||
1263 | return -EINVAL; | ||
1264 | /* | ||
1265 | * if there is only one fixed counter, only the first pmu | ||
1266 | * can access the fixed counter | ||
1267 | */ | ||
1268 | if (pmu->type->single_fixed && pmu->pmu_idx > 0) | ||
1269 | return -EINVAL; | ||
1270 | hwc->config = ~0ULL; | ||
1271 | } else { | ||
1272 | hwc->config = event->attr.config & pmu->type->event_mask; | ||
1273 | if (pmu->type->ops->hw_config) { | ||
1274 | ret = pmu->type->ops->hw_config(box, event); | ||
1275 | if (ret) | ||
1276 | return ret; | ||
1277 | } | ||
1278 | } | ||
1279 | |||
1280 | if (event->group_leader != event) | ||
1281 | ret = uncore_validate_group(pmu, event); | ||
1282 | else | ||
1283 | ret = 0; | ||
1284 | |||
1285 | return ret; | ||
1286 | } | ||
1287 | |||
1288 | static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) | ||
1289 | { | ||
1290 | int ret; | ||
1291 | |||
1292 | pmu->pmu = (struct pmu) { | ||
1293 | .attr_groups = pmu->type->attr_groups, | ||
1294 | .task_ctx_nr = perf_invalid_context, | ||
1295 | .event_init = uncore_pmu_event_init, | ||
1296 | .add = uncore_pmu_event_add, | ||
1297 | .del = uncore_pmu_event_del, | ||
1298 | .start = uncore_pmu_event_start, | ||
1299 | .stop = uncore_pmu_event_stop, | ||
1300 | .read = uncore_pmu_event_read, | ||
1301 | }; | ||
1302 | |||
1303 | if (pmu->type->num_boxes == 1) { | ||
1304 | if (strlen(pmu->type->name) > 0) | ||
1305 | sprintf(pmu->name, "uncore_%s", pmu->type->name); | ||
1306 | else | ||
1307 | sprintf(pmu->name, "uncore"); | ||
1308 | } else { | ||
1309 | sprintf(pmu->name, "uncore_%s_%d", pmu->type->name, | ||
1310 | pmu->pmu_idx); | ||
1311 | } | ||
1312 | |||
1313 | ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); | ||
1314 | return ret; | ||
1315 | } | ||
1316 | |||
1317 | static void __init uncore_type_exit(struct intel_uncore_type *type) | ||
1318 | { | ||
1319 | int i; | ||
1320 | |||
1321 | for (i = 0; i < type->num_boxes; i++) | ||
1322 | free_percpu(type->pmus[i].box); | ||
1323 | kfree(type->pmus); | ||
1324 | type->pmus = NULL; | ||
1325 | kfree(type->attr_groups[1]); | ||
1326 | type->attr_groups[1] = NULL; | ||
1327 | } | ||
1328 | |||
1329 | static void uncore_types_exit(struct intel_uncore_type **types) | ||
1330 | { | ||
1331 | int i; | ||
1332 | for (i = 0; types[i]; i++) | ||
1333 | uncore_type_exit(types[i]); | ||
1334 | } | ||
1335 | |||
1336 | static int __init uncore_type_init(struct intel_uncore_type *type) | ||
1337 | { | ||
1338 | struct intel_uncore_pmu *pmus; | ||
1339 | struct attribute_group *events_group; | ||
1340 | struct attribute **attrs; | ||
1341 | int i, j; | ||
1342 | |||
1343 | pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL); | ||
1344 | if (!pmus) | ||
1345 | return -ENOMEM; | ||
1346 | |||
1347 | type->unconstrainted = (struct event_constraint) | ||
1348 | __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, | ||
1349 | 0, type->num_counters, 0); | ||
1350 | |||
1351 | for (i = 0; i < type->num_boxes; i++) { | ||
1352 | pmus[i].func_id = -1; | ||
1353 | pmus[i].pmu_idx = i; | ||
1354 | pmus[i].type = type; | ||
1355 | INIT_LIST_HEAD(&pmus[i].box_list); | ||
1356 | pmus[i].box = alloc_percpu(struct intel_uncore_box *); | ||
1357 | if (!pmus[i].box) | ||
1358 | goto fail; | ||
1359 | } | ||
1360 | |||
1361 | if (type->event_descs) { | ||
1362 | i = 0; | ||
1363 | while (type->event_descs[i].attr.attr.name) | ||
1364 | i++; | ||
1365 | |||
1366 | events_group = kzalloc(sizeof(struct attribute *) * (i + 1) + | ||
1367 | sizeof(*events_group), GFP_KERNEL); | ||
1368 | if (!events_group) | ||
1369 | goto fail; | ||
1370 | |||
1371 | attrs = (struct attribute **)(events_group + 1); | ||
1372 | events_group->name = "events"; | ||
1373 | events_group->attrs = attrs; | ||
1374 | |||
1375 | for (j = 0; j < i; j++) | ||
1376 | attrs[j] = &type->event_descs[j].attr.attr; | ||
1377 | |||
1378 | type->attr_groups[1] = events_group; | ||
1379 | } | ||
1380 | |||
1381 | type->pmus = pmus; | ||
1382 | return 0; | ||
1383 | fail: | ||
1384 | uncore_type_exit(type); | ||
1385 | return -ENOMEM; | ||
1386 | } | ||
1387 | |||
1388 | static int __init uncore_types_init(struct intel_uncore_type **types) | ||
1389 | { | ||
1390 | int i, ret; | ||
1391 | |||
1392 | for (i = 0; types[i]; i++) { | ||
1393 | ret = uncore_type_init(types[i]); | ||
1394 | if (ret) | ||
1395 | goto fail; | ||
1396 | } | ||
1397 | return 0; | ||
1398 | fail: | ||
1399 | while (--i >= 0) | ||
1400 | uncore_type_exit(types[i]); | ||
1401 | return ret; | ||
1402 | } | ||
1403 | |||
1404 | static struct pci_driver *uncore_pci_driver; | ||
1405 | static bool pcidrv_registered; | ||
1406 | |||
1407 | /* | ||
1408 | * add a pci uncore device | ||
1409 | */ | ||
1410 | static int __devinit uncore_pci_add(struct intel_uncore_type *type, | ||
1411 | struct pci_dev *pdev) | ||
1412 | { | ||
1413 | struct intel_uncore_pmu *pmu; | ||
1414 | struct intel_uncore_box *box; | ||
1415 | int i, phys_id; | ||
1416 | |||
1417 | phys_id = pcibus_to_physid[pdev->bus->number]; | ||
1418 | if (phys_id < 0) | ||
1419 | return -ENODEV; | ||
1420 | |||
1421 | box = uncore_alloc_box(type, 0); | ||
1422 | if (!box) | ||
1423 | return -ENOMEM; | ||
1424 | |||
1425 | /* | ||
1426 | * for performance monitoring unit with multiple boxes, | ||
1427 | * each box has a different function id. | ||
1428 | */ | ||
1429 | for (i = 0; i < type->num_boxes; i++) { | ||
1430 | pmu = &type->pmus[i]; | ||
1431 | if (pmu->func_id == pdev->devfn) | ||
1432 | break; | ||
1433 | if (pmu->func_id < 0) { | ||
1434 | pmu->func_id = pdev->devfn; | ||
1435 | break; | ||
1436 | } | ||
1437 | pmu = NULL; | ||
1438 | } | ||
1439 | |||
1440 | if (!pmu) { | ||
1441 | kfree(box); | ||
1442 | return -EINVAL; | ||
1443 | } | ||
1444 | |||
1445 | box->phys_id = phys_id; | ||
1446 | box->pci_dev = pdev; | ||
1447 | box->pmu = pmu; | ||
1448 | uncore_box_init(box); | ||
1449 | pci_set_drvdata(pdev, box); | ||
1450 | |||
1451 | raw_spin_lock(&uncore_box_lock); | ||
1452 | list_add_tail(&box->list, &pmu->box_list); | ||
1453 | raw_spin_unlock(&uncore_box_lock); | ||
1454 | |||
1455 | return 0; | ||
1456 | } | ||
1457 | |||
1458 | static void uncore_pci_remove(struct pci_dev *pdev) | ||
1459 | { | ||
1460 | struct intel_uncore_box *box = pci_get_drvdata(pdev); | ||
1461 | struct intel_uncore_pmu *pmu = box->pmu; | ||
1462 | int cpu, phys_id = pcibus_to_physid[pdev->bus->number]; | ||
1463 | |||
1464 | if (WARN_ON_ONCE(phys_id != box->phys_id)) | ||
1465 | return; | ||
1466 | |||
1467 | raw_spin_lock(&uncore_box_lock); | ||
1468 | list_del(&box->list); | ||
1469 | raw_spin_unlock(&uncore_box_lock); | ||
1470 | |||
1471 | for_each_possible_cpu(cpu) { | ||
1472 | if (*per_cpu_ptr(pmu->box, cpu) == box) { | ||
1473 | *per_cpu_ptr(pmu->box, cpu) = NULL; | ||
1474 | atomic_dec(&box->refcnt); | ||
1475 | } | ||
1476 | } | ||
1477 | |||
1478 | WARN_ON_ONCE(atomic_read(&box->refcnt) != 1); | ||
1479 | kfree(box); | ||
1480 | } | ||
1481 | |||
1482 | static int __devinit uncore_pci_probe(struct pci_dev *pdev, | ||
1483 | const struct pci_device_id *id) | ||
1484 | { | ||
1485 | struct intel_uncore_type *type; | ||
1486 | |||
1487 | type = (struct intel_uncore_type *)id->driver_data; | ||
1488 | return uncore_pci_add(type, pdev); | ||
1489 | } | ||
1490 | |||
1491 | static int __init uncore_pci_init(void) | ||
1492 | { | ||
1493 | int ret; | ||
1494 | |||
1495 | switch (boot_cpu_data.x86_model) { | ||
1496 | case 45: /* Sandy Bridge-EP */ | ||
1497 | pci_uncores = snbep_pci_uncores; | ||
1498 | uncore_pci_driver = &snbep_uncore_pci_driver; | ||
1499 | snbep_pci2phy_map_init(); | ||
1500 | break; | ||
1501 | default: | ||
1502 | return 0; | ||
1503 | } | ||
1504 | |||
1505 | ret = uncore_types_init(pci_uncores); | ||
1506 | if (ret) | ||
1507 | return ret; | ||
1508 | |||
1509 | uncore_pci_driver->probe = uncore_pci_probe; | ||
1510 | uncore_pci_driver->remove = uncore_pci_remove; | ||
1511 | |||
1512 | ret = pci_register_driver(uncore_pci_driver); | ||
1513 | if (ret == 0) | ||
1514 | pcidrv_registered = true; | ||
1515 | else | ||
1516 | uncore_types_exit(pci_uncores); | ||
1517 | |||
1518 | return ret; | ||
1519 | } | ||
1520 | |||
1521 | static void __init uncore_pci_exit(void) | ||
1522 | { | ||
1523 | if (pcidrv_registered) { | ||
1524 | pcidrv_registered = false; | ||
1525 | pci_unregister_driver(uncore_pci_driver); | ||
1526 | uncore_types_exit(pci_uncores); | ||
1527 | } | ||
1528 | } | ||
1529 | |||
1530 | static void __cpuinit uncore_cpu_dying(int cpu) | ||
1531 | { | ||
1532 | struct intel_uncore_type *type; | ||
1533 | struct intel_uncore_pmu *pmu; | ||
1534 | struct intel_uncore_box *box; | ||
1535 | int i, j; | ||
1536 | |||
1537 | for (i = 0; msr_uncores[i]; i++) { | ||
1538 | type = msr_uncores[i]; | ||
1539 | for (j = 0; j < type->num_boxes; j++) { | ||
1540 | pmu = &type->pmus[j]; | ||
1541 | box = *per_cpu_ptr(pmu->box, cpu); | ||
1542 | *per_cpu_ptr(pmu->box, cpu) = NULL; | ||
1543 | if (box && atomic_dec_and_test(&box->refcnt)) | ||
1544 | kfree(box); | ||
1545 | } | ||
1546 | } | ||
1547 | } | ||
1548 | |||
1549 | static int __cpuinit uncore_cpu_starting(int cpu) | ||
1550 | { | ||
1551 | struct intel_uncore_type *type; | ||
1552 | struct intel_uncore_pmu *pmu; | ||
1553 | struct intel_uncore_box *box, *exist; | ||
1554 | int i, j, k, phys_id; | ||
1555 | |||
1556 | phys_id = topology_physical_package_id(cpu); | ||
1557 | |||
1558 | for (i = 0; msr_uncores[i]; i++) { | ||
1559 | type = msr_uncores[i]; | ||
1560 | for (j = 0; j < type->num_boxes; j++) { | ||
1561 | pmu = &type->pmus[j]; | ||
1562 | box = *per_cpu_ptr(pmu->box, cpu); | ||
1563 | /* called by uncore_cpu_init? */ | ||
1564 | if (box && box->phys_id >= 0) { | ||
1565 | uncore_box_init(box); | ||
1566 | continue; | ||
1567 | } | ||
1568 | |||
1569 | for_each_online_cpu(k) { | ||
1570 | exist = *per_cpu_ptr(pmu->box, k); | ||
1571 | if (exist && exist->phys_id == phys_id) { | ||
1572 | atomic_inc(&exist->refcnt); | ||
1573 | *per_cpu_ptr(pmu->box, cpu) = exist; | ||
1574 | kfree(box); | ||
1575 | box = NULL; | ||
1576 | break; | ||
1577 | } | ||
1578 | } | ||
1579 | |||
1580 | if (box) { | ||
1581 | box->phys_id = phys_id; | ||
1582 | uncore_box_init(box); | ||
1583 | } | ||
1584 | } | ||
1585 | } | ||
1586 | return 0; | ||
1587 | } | ||
1588 | |||
1589 | static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id) | ||
1590 | { | ||
1591 | struct intel_uncore_type *type; | ||
1592 | struct intel_uncore_pmu *pmu; | ||
1593 | struct intel_uncore_box *box; | ||
1594 | int i, j; | ||
1595 | |||
1596 | for (i = 0; msr_uncores[i]; i++) { | ||
1597 | type = msr_uncores[i]; | ||
1598 | for (j = 0; j < type->num_boxes; j++) { | ||
1599 | pmu = &type->pmus[j]; | ||
1600 | if (pmu->func_id < 0) | ||
1601 | pmu->func_id = j; | ||
1602 | |||
1603 | box = uncore_alloc_box(type, cpu); | ||
1604 | if (!box) | ||
1605 | return -ENOMEM; | ||
1606 | |||
1607 | box->pmu = pmu; | ||
1608 | box->phys_id = phys_id; | ||
1609 | *per_cpu_ptr(pmu->box, cpu) = box; | ||
1610 | } | ||
1611 | } | ||
1612 | return 0; | ||
1613 | } | ||
1614 | |||
1615 | static void __cpuinit uncore_change_context(struct intel_uncore_type **uncores, | ||
1616 | int old_cpu, int new_cpu) | ||
1617 | { | ||
1618 | struct intel_uncore_type *type; | ||
1619 | struct intel_uncore_pmu *pmu; | ||
1620 | struct intel_uncore_box *box; | ||
1621 | int i, j; | ||
1622 | |||
1623 | for (i = 0; uncores[i]; i++) { | ||
1624 | type = uncores[i]; | ||
1625 | for (j = 0; j < type->num_boxes; j++) { | ||
1626 | pmu = &type->pmus[j]; | ||
1627 | if (old_cpu < 0) | ||
1628 | box = uncore_pmu_to_box(pmu, new_cpu); | ||
1629 | else | ||
1630 | box = uncore_pmu_to_box(pmu, old_cpu); | ||
1631 | if (!box) | ||
1632 | continue; | ||
1633 | |||
1634 | if (old_cpu < 0) { | ||
1635 | WARN_ON_ONCE(box->cpu != -1); | ||
1636 | box->cpu = new_cpu; | ||
1637 | continue; | ||
1638 | } | ||
1639 | |||
1640 | WARN_ON_ONCE(box->cpu != old_cpu); | ||
1641 | if (new_cpu >= 0) { | ||
1642 | uncore_pmu_cancel_hrtimer(box); | ||
1643 | perf_pmu_migrate_context(&pmu->pmu, | ||
1644 | old_cpu, new_cpu); | ||
1645 | box->cpu = new_cpu; | ||
1646 | } else { | ||
1647 | box->cpu = -1; | ||
1648 | } | ||
1649 | } | ||
1650 | } | ||
1651 | } | ||
1652 | |||
1653 | static void __cpuinit uncore_event_exit_cpu(int cpu) | ||
1654 | { | ||
1655 | int i, phys_id, target; | ||
1656 | |||
1657 | /* if exiting cpu is used for collecting uncore events */ | ||
1658 | if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) | ||
1659 | return; | ||
1660 | |||
1661 | /* find a new cpu to collect uncore events */ | ||
1662 | phys_id = topology_physical_package_id(cpu); | ||
1663 | target = -1; | ||
1664 | for_each_online_cpu(i) { | ||
1665 | if (i == cpu) | ||
1666 | continue; | ||
1667 | if (phys_id == topology_physical_package_id(i)) { | ||
1668 | target = i; | ||
1669 | break; | ||
1670 | } | ||
1671 | } | ||
1672 | |||
1673 | /* migrate uncore events to the new cpu */ | ||
1674 | if (target >= 0) | ||
1675 | cpumask_set_cpu(target, &uncore_cpu_mask); | ||
1676 | |||
1677 | uncore_change_context(msr_uncores, cpu, target); | ||
1678 | uncore_change_context(pci_uncores, cpu, target); | ||
1679 | } | ||
1680 | |||
1681 | static void __cpuinit uncore_event_init_cpu(int cpu) | ||
1682 | { | ||
1683 | int i, phys_id; | ||
1684 | |||
1685 | phys_id = topology_physical_package_id(cpu); | ||
1686 | for_each_cpu(i, &uncore_cpu_mask) { | ||
1687 | if (phys_id == topology_physical_package_id(i)) | ||
1688 | return; | ||
1689 | } | ||
1690 | |||
1691 | cpumask_set_cpu(cpu, &uncore_cpu_mask); | ||
1692 | |||
1693 | uncore_change_context(msr_uncores, -1, cpu); | ||
1694 | uncore_change_context(pci_uncores, -1, cpu); | ||
1695 | } | ||
1696 | |||
1697 | static int __cpuinit uncore_cpu_notifier(struct notifier_block *self, | ||
1698 | unsigned long action, void *hcpu) | ||
1699 | { | ||
1700 | unsigned int cpu = (long)hcpu; | ||
1701 | |||
1702 | /* allocate/free data structure for uncore box */ | ||
1703 | switch (action & ~CPU_TASKS_FROZEN) { | ||
1704 | case CPU_UP_PREPARE: | ||
1705 | uncore_cpu_prepare(cpu, -1); | ||
1706 | break; | ||
1707 | case CPU_STARTING: | ||
1708 | uncore_cpu_starting(cpu); | ||
1709 | break; | ||
1710 | case CPU_UP_CANCELED: | ||
1711 | case CPU_DYING: | ||
1712 | uncore_cpu_dying(cpu); | ||
1713 | break; | ||
1714 | default: | ||
1715 | break; | ||
1716 | } | ||
1717 | |||
1718 | /* select the cpu that collects uncore events */ | ||
1719 | switch (action & ~CPU_TASKS_FROZEN) { | ||
1720 | case CPU_DOWN_FAILED: | ||
1721 | case CPU_STARTING: | ||
1722 | uncore_event_init_cpu(cpu); | ||
1723 | break; | ||
1724 | case CPU_DOWN_PREPARE: | ||
1725 | uncore_event_exit_cpu(cpu); | ||
1726 | break; | ||
1727 | default: | ||
1728 | break; | ||
1729 | } | ||
1730 | |||
1731 | return NOTIFY_OK; | ||
1732 | } | ||
1733 | |||
1734 | static struct notifier_block uncore_cpu_nb __cpuinitdata = { | ||
1735 | .notifier_call = uncore_cpu_notifier, | ||
1736 | /* | ||
1737 | * to migrate uncore events, our notifier should be executed | ||
1738 | * before perf core's notifier. | ||
1739 | */ | ||
1740 | .priority = CPU_PRI_PERF + 1, | ||
1741 | }; | ||
1742 | |||
1743 | static void __init uncore_cpu_setup(void *dummy) | ||
1744 | { | ||
1745 | uncore_cpu_starting(smp_processor_id()); | ||
1746 | } | ||
1747 | |||
1748 | static int __init uncore_cpu_init(void) | ||
1749 | { | ||
1750 | int ret, cpu, max_cores; | ||
1751 | |||
1752 | max_cores = boot_cpu_data.x86_max_cores; | ||
1753 | switch (boot_cpu_data.x86_model) { | ||
1754 | case 26: /* Nehalem */ | ||
1755 | case 30: | ||
1756 | case 37: /* Westmere */ | ||
1757 | case 44: | ||
1758 | msr_uncores = nhm_msr_uncores; | ||
1759 | break; | ||
1760 | case 42: /* Sandy Bridge */ | ||
1761 | if (snb_uncore_cbox.num_boxes > max_cores) | ||
1762 | snb_uncore_cbox.num_boxes = max_cores; | ||
1763 | msr_uncores = snb_msr_uncores; | ||
1764 | break; | ||
1765 | case 45: /* Sandy Birdge-EP */ | ||
1766 | if (snbep_uncore_cbox.num_boxes > max_cores) | ||
1767 | snbep_uncore_cbox.num_boxes = max_cores; | ||
1768 | msr_uncores = snbep_msr_uncores; | ||
1769 | break; | ||
1770 | default: | ||
1771 | return 0; | ||
1772 | } | ||
1773 | |||
1774 | ret = uncore_types_init(msr_uncores); | ||
1775 | if (ret) | ||
1776 | return ret; | ||
1777 | |||
1778 | get_online_cpus(); | ||
1779 | |||
1780 | for_each_online_cpu(cpu) { | ||
1781 | int i, phys_id = topology_physical_package_id(cpu); | ||
1782 | |||
1783 | for_each_cpu(i, &uncore_cpu_mask) { | ||
1784 | if (phys_id == topology_physical_package_id(i)) { | ||
1785 | phys_id = -1; | ||
1786 | break; | ||
1787 | } | ||
1788 | } | ||
1789 | if (phys_id < 0) | ||
1790 | continue; | ||
1791 | |||
1792 | uncore_cpu_prepare(cpu, phys_id); | ||
1793 | uncore_event_init_cpu(cpu); | ||
1794 | } | ||
1795 | on_each_cpu(uncore_cpu_setup, NULL, 1); | ||
1796 | |||
1797 | register_cpu_notifier(&uncore_cpu_nb); | ||
1798 | |||
1799 | put_online_cpus(); | ||
1800 | |||
1801 | return 0; | ||
1802 | } | ||
1803 | |||
1804 | static int __init uncore_pmus_register(void) | ||
1805 | { | ||
1806 | struct intel_uncore_pmu *pmu; | ||
1807 | struct intel_uncore_type *type; | ||
1808 | int i, j; | ||
1809 | |||
1810 | for (i = 0; msr_uncores[i]; i++) { | ||
1811 | type = msr_uncores[i]; | ||
1812 | for (j = 0; j < type->num_boxes; j++) { | ||
1813 | pmu = &type->pmus[j]; | ||
1814 | uncore_pmu_register(pmu); | ||
1815 | } | ||
1816 | } | ||
1817 | |||
1818 | for (i = 0; pci_uncores[i]; i++) { | ||
1819 | type = pci_uncores[i]; | ||
1820 | for (j = 0; j < type->num_boxes; j++) { | ||
1821 | pmu = &type->pmus[j]; | ||
1822 | uncore_pmu_register(pmu); | ||
1823 | } | ||
1824 | } | ||
1825 | |||
1826 | return 0; | ||
1827 | } | ||
1828 | |||
1829 | static int __init intel_uncore_init(void) | ||
1830 | { | ||
1831 | int ret; | ||
1832 | |||
1833 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | ||
1834 | return -ENODEV; | ||
1835 | |||
1836 | ret = uncore_pci_init(); | ||
1837 | if (ret) | ||
1838 | goto fail; | ||
1839 | ret = uncore_cpu_init(); | ||
1840 | if (ret) { | ||
1841 | uncore_pci_exit(); | ||
1842 | goto fail; | ||
1843 | } | ||
1844 | |||
1845 | uncore_pmus_register(); | ||
1846 | return 0; | ||
1847 | fail: | ||
1848 | return ret; | ||
1849 | } | ||
1850 | device_initcall(intel_uncore_init); | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h new file mode 100644 index 00000000000..b13e9ea81de --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h | |||
@@ -0,0 +1,424 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/slab.h> | ||
3 | #include <linux/pci.h> | ||
4 | #include <linux/perf_event.h> | ||
5 | #include "perf_event.h" | ||
6 | |||
7 | #define UNCORE_PMU_NAME_LEN 32 | ||
8 | #define UNCORE_BOX_HASH_SIZE 8 | ||
9 | |||
10 | #define UNCORE_PMU_HRTIMER_INTERVAL (60 * NSEC_PER_SEC) | ||
11 | |||
12 | #define UNCORE_FIXED_EVENT 0xff | ||
13 | #define UNCORE_PMC_IDX_MAX_GENERIC 8 | ||
14 | #define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC | ||
15 | #define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1) | ||
16 | |||
17 | #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) | ||
18 | |||
19 | /* SNB event control */ | ||
20 | #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff | ||
21 | #define SNB_UNC_CTL_UMASK_MASK 0x0000ff00 | ||
22 | #define SNB_UNC_CTL_EDGE_DET (1 << 18) | ||
23 | #define SNB_UNC_CTL_EN (1 << 22) | ||
24 | #define SNB_UNC_CTL_INVERT (1 << 23) | ||
25 | #define SNB_UNC_CTL_CMASK_MASK 0x1f000000 | ||
26 | #define NHM_UNC_CTL_CMASK_MASK 0xff000000 | ||
27 | #define NHM_UNC_FIXED_CTR_CTL_EN (1 << 0) | ||
28 | |||
29 | #define SNB_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ | ||
30 | SNB_UNC_CTL_UMASK_MASK | \ | ||
31 | SNB_UNC_CTL_EDGE_DET | \ | ||
32 | SNB_UNC_CTL_INVERT | \ | ||
33 | SNB_UNC_CTL_CMASK_MASK) | ||
34 | |||
35 | #define NHM_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ | ||
36 | SNB_UNC_CTL_UMASK_MASK | \ | ||
37 | SNB_UNC_CTL_EDGE_DET | \ | ||
38 | SNB_UNC_CTL_INVERT | \ | ||
39 | NHM_UNC_CTL_CMASK_MASK) | ||
40 | |||
41 | /* SNB global control register */ | ||
42 | #define SNB_UNC_PERF_GLOBAL_CTL 0x391 | ||
43 | #define SNB_UNC_FIXED_CTR_CTRL 0x394 | ||
44 | #define SNB_UNC_FIXED_CTR 0x395 | ||
45 | |||
46 | /* SNB uncore global control */ | ||
47 | #define SNB_UNC_GLOBAL_CTL_CORE_ALL ((1 << 4) - 1) | ||
48 | #define SNB_UNC_GLOBAL_CTL_EN (1 << 29) | ||
49 | |||
50 | /* SNB Cbo register */ | ||
51 | #define SNB_UNC_CBO_0_PERFEVTSEL0 0x700 | ||
52 | #define SNB_UNC_CBO_0_PER_CTR0 0x706 | ||
53 | #define SNB_UNC_CBO_MSR_OFFSET 0x10 | ||
54 | |||
55 | /* NHM global control register */ | ||
56 | #define NHM_UNC_PERF_GLOBAL_CTL 0x391 | ||
57 | #define NHM_UNC_FIXED_CTR 0x394 | ||
58 | #define NHM_UNC_FIXED_CTR_CTRL 0x395 | ||
59 | |||
60 | /* NHM uncore global control */ | ||
61 | #define NHM_UNC_GLOBAL_CTL_EN_PC_ALL ((1ULL << 8) - 1) | ||
62 | #define NHM_UNC_GLOBAL_CTL_EN_FC (1ULL << 32) | ||
63 | |||
64 | /* NHM uncore register */ | ||
65 | #define NHM_UNC_PERFEVTSEL0 0x3c0 | ||
66 | #define NHM_UNC_UNCORE_PMC0 0x3b0 | ||
67 | |||
68 | /* SNB-EP Box level control */ | ||
69 | #define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0) | ||
70 | #define SNBEP_PMON_BOX_CTL_RST_CTRS (1 << 1) | ||
71 | #define SNBEP_PMON_BOX_CTL_FRZ (1 << 8) | ||
72 | #define SNBEP_PMON_BOX_CTL_FRZ_EN (1 << 16) | ||
73 | #define SNBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \ | ||
74 | SNBEP_PMON_BOX_CTL_RST_CTRS | \ | ||
75 | SNBEP_PMON_BOX_CTL_FRZ_EN) | ||
76 | /* SNB-EP event control */ | ||
77 | #define SNBEP_PMON_CTL_EV_SEL_MASK 0x000000ff | ||
78 | #define SNBEP_PMON_CTL_UMASK_MASK 0x0000ff00 | ||
79 | #define SNBEP_PMON_CTL_RST (1 << 17) | ||
80 | #define SNBEP_PMON_CTL_EDGE_DET (1 << 18) | ||
81 | #define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21) /* only for QPI */ | ||
82 | #define SNBEP_PMON_CTL_EN (1 << 22) | ||
83 | #define SNBEP_PMON_CTL_INVERT (1 << 23) | ||
84 | #define SNBEP_PMON_CTL_TRESH_MASK 0xff000000 | ||
85 | #define SNBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \ | ||
86 | SNBEP_PMON_CTL_UMASK_MASK | \ | ||
87 | SNBEP_PMON_CTL_EDGE_DET | \ | ||
88 | SNBEP_PMON_CTL_INVERT | \ | ||
89 | SNBEP_PMON_CTL_TRESH_MASK) | ||
90 | |||
91 | /* SNB-EP Ubox event control */ | ||
92 | #define SNBEP_U_MSR_PMON_CTL_TRESH_MASK 0x1f000000 | ||
93 | #define SNBEP_U_MSR_PMON_RAW_EVENT_MASK \ | ||
94 | (SNBEP_PMON_CTL_EV_SEL_MASK | \ | ||
95 | SNBEP_PMON_CTL_UMASK_MASK | \ | ||
96 | SNBEP_PMON_CTL_EDGE_DET | \ | ||
97 | SNBEP_PMON_CTL_INVERT | \ | ||
98 | SNBEP_U_MSR_PMON_CTL_TRESH_MASK) | ||
99 | |||
100 | #define SNBEP_CBO_PMON_CTL_TID_EN (1 << 19) | ||
101 | #define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ | ||
102 | SNBEP_CBO_PMON_CTL_TID_EN) | ||
103 | |||
104 | /* SNB-EP PCU event control */ | ||
105 | #define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK 0x0000c000 | ||
106 | #define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK 0x1f000000 | ||
107 | #define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT (1 << 30) | ||
108 | #define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET (1 << 31) | ||
109 | #define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK \ | ||
110 | (SNBEP_PMON_CTL_EV_SEL_MASK | \ | ||
111 | SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ | ||
112 | SNBEP_PMON_CTL_EDGE_DET | \ | ||
113 | SNBEP_PMON_CTL_INVERT | \ | ||
114 | SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ | ||
115 | SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ | ||
116 | SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) | ||
117 | |||
118 | /* SNB-EP pci control register */ | ||
119 | #define SNBEP_PCI_PMON_BOX_CTL 0xf4 | ||
120 | #define SNBEP_PCI_PMON_CTL0 0xd8 | ||
121 | /* SNB-EP pci counter register */ | ||
122 | #define SNBEP_PCI_PMON_CTR0 0xa0 | ||
123 | |||
124 | /* SNB-EP home agent register */ | ||
125 | #define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0 0x40 | ||
126 | #define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1 0x44 | ||
127 | #define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH 0x48 | ||
128 | /* SNB-EP memory controller register */ | ||
129 | #define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL 0xf0 | ||
130 | #define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR 0xd0 | ||
131 | /* SNB-EP QPI register */ | ||
132 | #define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0 0x228 | ||
133 | #define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1 0x22c | ||
134 | #define SNBEP_Q_Py_PCI_PMON_PKT_MASK0 0x238 | ||
135 | #define SNBEP_Q_Py_PCI_PMON_PKT_MASK1 0x23c | ||
136 | |||
137 | /* SNB-EP Ubox register */ | ||
138 | #define SNBEP_U_MSR_PMON_CTR0 0xc16 | ||
139 | #define SNBEP_U_MSR_PMON_CTL0 0xc10 | ||
140 | |||
141 | #define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL 0xc08 | ||
142 | #define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR 0xc09 | ||
143 | |||
144 | /* SNB-EP Cbo register */ | ||
145 | #define SNBEP_C0_MSR_PMON_CTR0 0xd16 | ||
146 | #define SNBEP_C0_MSR_PMON_CTL0 0xd10 | ||
147 | #define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04 | ||
148 | #define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14 | ||
149 | #define SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK 0xfffffc1f | ||
150 | #define SNBEP_CBO_MSR_OFFSET 0x20 | ||
151 | |||
152 | /* SNB-EP PCU register */ | ||
153 | #define SNBEP_PCU_MSR_PMON_CTR0 0xc36 | ||
154 | #define SNBEP_PCU_MSR_PMON_CTL0 0xc30 | ||
155 | #define SNBEP_PCU_MSR_PMON_BOX_CTL 0xc24 | ||
156 | #define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34 | ||
157 | #define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK 0xffffffff | ||
158 | #define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc | ||
159 | #define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd | ||
160 | |||
161 | struct intel_uncore_ops; | ||
162 | struct intel_uncore_pmu; | ||
163 | struct intel_uncore_box; | ||
164 | struct uncore_event_desc; | ||
165 | |||
166 | struct intel_uncore_type { | ||
167 | const char *name; | ||
168 | int num_counters; | ||
169 | int num_boxes; | ||
170 | int perf_ctr_bits; | ||
171 | int fixed_ctr_bits; | ||
172 | unsigned perf_ctr; | ||
173 | unsigned event_ctl; | ||
174 | unsigned event_mask; | ||
175 | unsigned fixed_ctr; | ||
176 | unsigned fixed_ctl; | ||
177 | unsigned box_ctl; | ||
178 | unsigned msr_offset; | ||
179 | unsigned num_shared_regs:8; | ||
180 | unsigned single_fixed:1; | ||
181 | struct event_constraint unconstrainted; | ||
182 | struct event_constraint *constraints; | ||
183 | struct intel_uncore_pmu *pmus; | ||
184 | struct intel_uncore_ops *ops; | ||
185 | struct uncore_event_desc *event_descs; | ||
186 | const struct attribute_group *attr_groups[3]; | ||
187 | }; | ||
188 | |||
189 | #define format_group attr_groups[0] | ||
190 | |||
191 | struct intel_uncore_ops { | ||
192 | void (*init_box)(struct intel_uncore_box *); | ||
193 | void (*disable_box)(struct intel_uncore_box *); | ||
194 | void (*enable_box)(struct intel_uncore_box *); | ||
195 | void (*disable_event)(struct intel_uncore_box *, struct perf_event *); | ||
196 | void (*enable_event)(struct intel_uncore_box *, struct perf_event *); | ||
197 | u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *); | ||
198 | int (*hw_config)(struct intel_uncore_box *, struct perf_event *); | ||
199 | struct event_constraint *(*get_constraint)(struct intel_uncore_box *, | ||
200 | struct perf_event *); | ||
201 | void (*put_constraint)(struct intel_uncore_box *, struct perf_event *); | ||
202 | }; | ||
203 | |||
204 | struct intel_uncore_pmu { | ||
205 | struct pmu pmu; | ||
206 | char name[UNCORE_PMU_NAME_LEN]; | ||
207 | int pmu_idx; | ||
208 | int func_id; | ||
209 | struct intel_uncore_type *type; | ||
210 | struct intel_uncore_box ** __percpu box; | ||
211 | struct list_head box_list; | ||
212 | }; | ||
213 | |||
214 | struct intel_uncore_extra_reg { | ||
215 | raw_spinlock_t lock; | ||
216 | u64 config1; | ||
217 | atomic_t ref; | ||
218 | }; | ||
219 | |||
220 | struct intel_uncore_box { | ||
221 | int phys_id; | ||
222 | int n_active; /* number of active events */ | ||
223 | int n_events; | ||
224 | int cpu; /* cpu to collect events */ | ||
225 | unsigned long flags; | ||
226 | atomic_t refcnt; | ||
227 | struct perf_event *events[UNCORE_PMC_IDX_MAX]; | ||
228 | struct perf_event *event_list[UNCORE_PMC_IDX_MAX]; | ||
229 | unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; | ||
230 | u64 tags[UNCORE_PMC_IDX_MAX]; | ||
231 | struct pci_dev *pci_dev; | ||
232 | struct intel_uncore_pmu *pmu; | ||
233 | struct hrtimer hrtimer; | ||
234 | struct list_head list; | ||
235 | struct intel_uncore_extra_reg shared_regs[0]; | ||
236 | }; | ||
237 | |||
238 | #define UNCORE_BOX_FLAG_INITIATED 0 | ||
239 | |||
240 | struct uncore_event_desc { | ||
241 | struct kobj_attribute attr; | ||
242 | const char *config; | ||
243 | }; | ||
244 | |||
245 | #define INTEL_UNCORE_EVENT_DESC(_name, _config) \ | ||
246 | { \ | ||
247 | .attr = __ATTR(_name, 0444, uncore_event_show, NULL), \ | ||
248 | .config = _config, \ | ||
249 | } | ||
250 | |||
251 | #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \ | ||
252 | static ssize_t __uncore_##_var##_show(struct kobject *kobj, \ | ||
253 | struct kobj_attribute *attr, \ | ||
254 | char *page) \ | ||
255 | { \ | ||
256 | BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ | ||
257 | return sprintf(page, _format "\n"); \ | ||
258 | } \ | ||
259 | static struct kobj_attribute format_attr_##_var = \ | ||
260 | __ATTR(_name, 0444, __uncore_##_var##_show, NULL) | ||
261 | |||
262 | |||
263 | static ssize_t uncore_event_show(struct kobject *kobj, | ||
264 | struct kobj_attribute *attr, char *buf) | ||
265 | { | ||
266 | struct uncore_event_desc *event = | ||
267 | container_of(attr, struct uncore_event_desc, attr); | ||
268 | return sprintf(buf, "%s", event->config); | ||
269 | } | ||
270 | |||
271 | static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box) | ||
272 | { | ||
273 | return box->pmu->type->box_ctl; | ||
274 | } | ||
275 | |||
276 | static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box) | ||
277 | { | ||
278 | return box->pmu->type->fixed_ctl; | ||
279 | } | ||
280 | |||
281 | static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box) | ||
282 | { | ||
283 | return box->pmu->type->fixed_ctr; | ||
284 | } | ||
285 | |||
286 | static inline | ||
287 | unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx) | ||
288 | { | ||
289 | return idx * 4 + box->pmu->type->event_ctl; | ||
290 | } | ||
291 | |||
292 | static inline | ||
293 | unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx) | ||
294 | { | ||
295 | return idx * 8 + box->pmu->type->perf_ctr; | ||
296 | } | ||
297 | |||
298 | static inline | ||
299 | unsigned uncore_msr_box_ctl(struct intel_uncore_box *box) | ||
300 | { | ||
301 | if (!box->pmu->type->box_ctl) | ||
302 | return 0; | ||
303 | return box->pmu->type->box_ctl + | ||
304 | box->pmu->type->msr_offset * box->pmu->pmu_idx; | ||
305 | } | ||
306 | |||
307 | static inline | ||
308 | unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box) | ||
309 | { | ||
310 | if (!box->pmu->type->fixed_ctl) | ||
311 | return 0; | ||
312 | return box->pmu->type->fixed_ctl + | ||
313 | box->pmu->type->msr_offset * box->pmu->pmu_idx; | ||
314 | } | ||
315 | |||
316 | static inline | ||
317 | unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box) | ||
318 | { | ||
319 | return box->pmu->type->fixed_ctr + | ||
320 | box->pmu->type->msr_offset * box->pmu->pmu_idx; | ||
321 | } | ||
322 | |||
323 | static inline | ||
324 | unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx) | ||
325 | { | ||
326 | return idx + box->pmu->type->event_ctl + | ||
327 | box->pmu->type->msr_offset * box->pmu->pmu_idx; | ||
328 | } | ||
329 | |||
330 | static inline | ||
331 | unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx) | ||
332 | { | ||
333 | return idx + box->pmu->type->perf_ctr + | ||
334 | box->pmu->type->msr_offset * box->pmu->pmu_idx; | ||
335 | } | ||
336 | |||
337 | static inline | ||
338 | unsigned uncore_fixed_ctl(struct intel_uncore_box *box) | ||
339 | { | ||
340 | if (box->pci_dev) | ||
341 | return uncore_pci_fixed_ctl(box); | ||
342 | else | ||
343 | return uncore_msr_fixed_ctl(box); | ||
344 | } | ||
345 | |||
346 | static inline | ||
347 | unsigned uncore_fixed_ctr(struct intel_uncore_box *box) | ||
348 | { | ||
349 | if (box->pci_dev) | ||
350 | return uncore_pci_fixed_ctr(box); | ||
351 | else | ||
352 | return uncore_msr_fixed_ctr(box); | ||
353 | } | ||
354 | |||
355 | static inline | ||
356 | unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx) | ||
357 | { | ||
358 | if (box->pci_dev) | ||
359 | return uncore_pci_event_ctl(box, idx); | ||
360 | else | ||
361 | return uncore_msr_event_ctl(box, idx); | ||
362 | } | ||
363 | |||
364 | static inline | ||
365 | unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx) | ||
366 | { | ||
367 | if (box->pci_dev) | ||
368 | return uncore_pci_perf_ctr(box, idx); | ||
369 | else | ||
370 | return uncore_msr_perf_ctr(box, idx); | ||
371 | } | ||
372 | |||
373 | static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box) | ||
374 | { | ||
375 | return box->pmu->type->perf_ctr_bits; | ||
376 | } | ||
377 | |||
378 | static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box) | ||
379 | { | ||
380 | return box->pmu->type->fixed_ctr_bits; | ||
381 | } | ||
382 | |||
383 | static inline int uncore_num_counters(struct intel_uncore_box *box) | ||
384 | { | ||
385 | return box->pmu->type->num_counters; | ||
386 | } | ||
387 | |||
388 | static inline void uncore_disable_box(struct intel_uncore_box *box) | ||
389 | { | ||
390 | if (box->pmu->type->ops->disable_box) | ||
391 | box->pmu->type->ops->disable_box(box); | ||
392 | } | ||
393 | |||
394 | static inline void uncore_enable_box(struct intel_uncore_box *box) | ||
395 | { | ||
396 | if (box->pmu->type->ops->enable_box) | ||
397 | box->pmu->type->ops->enable_box(box); | ||
398 | } | ||
399 | |||
400 | static inline void uncore_disable_event(struct intel_uncore_box *box, | ||
401 | struct perf_event *event) | ||
402 | { | ||
403 | box->pmu->type->ops->disable_event(box, event); | ||
404 | } | ||
405 | |||
406 | static inline void uncore_enable_event(struct intel_uncore_box *box, | ||
407 | struct perf_event *event) | ||
408 | { | ||
409 | box->pmu->type->ops->enable_event(box, event); | ||
410 | } | ||
411 | |||
412 | static inline u64 uncore_read_counter(struct intel_uncore_box *box, | ||
413 | struct perf_event *event) | ||
414 | { | ||
415 | return box->pmu->type->ops->read_counter(box, event); | ||
416 | } | ||
417 | |||
418 | static inline void uncore_box_init(struct intel_uncore_box *box) | ||
419 | { | ||
420 | if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { | ||
421 | if (box->pmu->type->ops->init_box) | ||
422 | box->pmu->type->ops->init_box(box); | ||
423 | } | ||
424 | } | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 47124a73dd7..92c7e39a079 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -895,8 +895,8 @@ static void p4_pmu_disable_pebs(void) | |||
895 | * So at moment let leave metrics turned on forever -- it's | 895 | * So at moment let leave metrics turned on forever -- it's |
896 | * ok for now but need to be revisited! | 896 | * ok for now but need to be revisited! |
897 | * | 897 | * |
898 | * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0); | 898 | * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)0); |
899 | * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0); | 899 | * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)0); |
900 | */ | 900 | */ |
901 | } | 901 | } |
902 | 902 | ||
@@ -909,7 +909,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event) | |||
909 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | 909 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get |
910 | * asserted again and again | 910 | * asserted again and again |
911 | */ | 911 | */ |
912 | (void)checking_wrmsrl(hwc->config_base, | 912 | (void)wrmsrl_safe(hwc->config_base, |
913 | (u64)(p4_config_unpack_cccr(hwc->config)) & | 913 | (u64)(p4_config_unpack_cccr(hwc->config)) & |
914 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); | 914 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); |
915 | } | 915 | } |
@@ -943,8 +943,8 @@ static void p4_pmu_enable_pebs(u64 config) | |||
943 | 943 | ||
944 | bind = &p4_pebs_bind_map[idx]; | 944 | bind = &p4_pebs_bind_map[idx]; |
945 | 945 | ||
946 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); | 946 | (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); |
947 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); | 947 | (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); |
948 | } | 948 | } |
949 | 949 | ||
950 | static void p4_pmu_enable_event(struct perf_event *event) | 950 | static void p4_pmu_enable_event(struct perf_event *event) |
@@ -978,8 +978,8 @@ static void p4_pmu_enable_event(struct perf_event *event) | |||
978 | */ | 978 | */ |
979 | p4_pmu_enable_pebs(hwc->config); | 979 | p4_pmu_enable_pebs(hwc->config); |
980 | 980 | ||
981 | (void)checking_wrmsrl(escr_addr, escr_conf); | 981 | (void)wrmsrl_safe(escr_addr, escr_conf); |
982 | (void)checking_wrmsrl(hwc->config_base, | 982 | (void)wrmsrl_safe(hwc->config_base, |
983 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); | 983 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); |
984 | } | 984 | } |
985 | 985 | ||
@@ -1325,7 +1325,7 @@ __init int p4_pmu_init(void) | |||
1325 | unsigned int low, high; | 1325 | unsigned int low, high; |
1326 | 1326 | ||
1327 | /* If we get stripped -- indexing fails */ | 1327 | /* If we get stripped -- indexing fails */ |
1328 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); | 1328 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC); |
1329 | 1329 | ||
1330 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | 1330 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); |
1331 | if (!(low & (1 << 7))) { | 1331 | if (!(low & (1 << 7))) { |
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 32bcfc7dd23..e4dd0f7a045 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -71,7 +71,7 @@ p6_pmu_disable_event(struct perf_event *event) | |||
71 | if (cpuc->enabled) | 71 | if (cpuc->enabled) |
72 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | 72 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
73 | 73 | ||
74 | (void)checking_wrmsrl(hwc->config_base, val); | 74 | (void)wrmsrl_safe(hwc->config_base, val); |
75 | } | 75 | } |
76 | 76 | ||
77 | static void p6_pmu_enable_event(struct perf_event *event) | 77 | static void p6_pmu_enable_event(struct perf_event *event) |
@@ -84,7 +84,7 @@ static void p6_pmu_enable_event(struct perf_event *event) | |||
84 | if (cpuc->enabled) | 84 | if (cpuc->enabled) |
85 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | 85 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
86 | 86 | ||
87 | (void)checking_wrmsrl(hwc->config_base, val); | 87 | (void)wrmsrl_safe(hwc->config_base, val); |
88 | } | 88 | } |
89 | 89 | ||
90 | PMU_FORMAT_ATTR(event, "config:0-7" ); | 90 | PMU_FORMAT_ATTR(event, "config:0-7" ); |
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index addf9e82a7f..ee8e9abc859 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c | |||
@@ -31,7 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
31 | const struct cpuid_bit *cb; | 31 | const struct cpuid_bit *cb; |
32 | 32 | ||
33 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | 33 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { |
34 | { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 }, | 34 | { X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 }, |
35 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, | 35 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, |
36 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, | 36 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, |
37 | { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, | 37 | { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, |
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c deleted file mode 100644 index a640ae5ad20..00000000000 --- a/arch/x86/kernel/cpu/sched.c +++ /dev/null | |||
@@ -1,55 +0,0 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/math64.h> | ||
3 | #include <linux/percpu.h> | ||
4 | #include <linux/irqflags.h> | ||
5 | |||
6 | #include <asm/cpufeature.h> | ||
7 | #include <asm/processor.h> | ||
8 | |||
9 | #ifdef CONFIG_SMP | ||
10 | |||
11 | static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched); | ||
12 | |||
13 | static unsigned long scale_aperfmperf(void) | ||
14 | { | ||
15 | struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched); | ||
16 | unsigned long ratio, flags; | ||
17 | |||
18 | local_irq_save(flags); | ||
19 | get_aperfmperf(&val); | ||
20 | local_irq_restore(flags); | ||
21 | |||
22 | ratio = calc_aperfmperf_ratio(old, &val); | ||
23 | *old = val; | ||
24 | |||
25 | return ratio; | ||
26 | } | ||
27 | |||
28 | unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) | ||
29 | { | ||
30 | /* | ||
31 | * do aperf/mperf on the cpu level because it includes things | ||
32 | * like turbo mode, which are relevant to full cores. | ||
33 | */ | ||
34 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
35 | return scale_aperfmperf(); | ||
36 | |||
37 | /* | ||
38 | * maybe have something cpufreq here | ||
39 | */ | ||
40 | |||
41 | return default_scale_freq_power(sd, cpu); | ||
42 | } | ||
43 | |||
44 | unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu) | ||
45 | { | ||
46 | /* | ||
47 | * aperf/mperf already includes the smt gain | ||
48 | */ | ||
49 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
50 | return SCHED_LOAD_SCALE; | ||
51 | |||
52 | return default_scale_smt_power(sd, cpu); | ||
53 | } | ||
54 | |||
55 | #endif | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 571246d81ed..ae42418bc50 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -27,8 +27,8 @@ static int die_counter; | |||
27 | 27 | ||
28 | void printk_address(unsigned long address, int reliable) | 28 | void printk_address(unsigned long address, int reliable) |
29 | { | 29 | { |
30 | printk(" [<%p>] %s%pB\n", (void *) address, | 30 | pr_cont(" [<%p>] %s%pB\n", |
31 | reliable ? "" : "? ", (void *) address); | 31 | (void *)address, reliable ? "" : "? ", (void *)address); |
32 | } | 32 | } |
33 | 33 | ||
34 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 34 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
@@ -271,6 +271,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) | |||
271 | current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) | 271 | current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) |
272 | return 1; | 272 | return 1; |
273 | 273 | ||
274 | print_modules(); | ||
274 | show_regs(regs); | 275 | show_regs(regs); |
275 | #ifdef CONFIG_X86_32 | 276 | #ifdef CONFIG_X86_32 |
276 | if (user_mode_vm(regs)) { | 277 | if (user_mode_vm(regs)) { |
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index e0b1d783daa..1038a417ea5 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -73,11 +73,11 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
73 | if (kstack_end(stack)) | 73 | if (kstack_end(stack)) |
74 | break; | 74 | break; |
75 | if (i && ((i % STACKSLOTS_PER_LINE) == 0)) | 75 | if (i && ((i % STACKSLOTS_PER_LINE) == 0)) |
76 | printk(KERN_CONT "\n"); | 76 | pr_cont("\n"); |
77 | printk(KERN_CONT " %08lx", *stack++); | 77 | pr_cont(" %08lx", *stack++); |
78 | touch_nmi_watchdog(); | 78 | touch_nmi_watchdog(); |
79 | } | 79 | } |
80 | printk(KERN_CONT "\n"); | 80 | pr_cont("\n"); |
81 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 81 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
82 | } | 82 | } |
83 | 83 | ||
@@ -86,12 +86,11 @@ void show_regs(struct pt_regs *regs) | |||
86 | { | 86 | { |
87 | int i; | 87 | int i; |
88 | 88 | ||
89 | print_modules(); | ||
90 | __show_regs(regs, !user_mode_vm(regs)); | 89 | __show_regs(regs, !user_mode_vm(regs)); |
91 | 90 | ||
92 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", | 91 | pr_emerg("Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", |
93 | TASK_COMM_LEN, current->comm, task_pid_nr(current), | 92 | TASK_COMM_LEN, current->comm, task_pid_nr(current), |
94 | current_thread_info(), current, task_thread_info(current)); | 93 | current_thread_info(), current, task_thread_info(current)); |
95 | /* | 94 | /* |
96 | * When in-kernel, we also print out the stack and code at the | 95 | * When in-kernel, we also print out the stack and code at the |
97 | * time of the fault.. | 96 | * time of the fault.. |
@@ -102,10 +101,10 @@ void show_regs(struct pt_regs *regs) | |||
102 | unsigned char c; | 101 | unsigned char c; |
103 | u8 *ip; | 102 | u8 *ip; |
104 | 103 | ||
105 | printk(KERN_EMERG "Stack:\n"); | 104 | pr_emerg("Stack:\n"); |
106 | show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); | 105 | show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); |
107 | 106 | ||
108 | printk(KERN_EMERG "Code: "); | 107 | pr_emerg("Code:"); |
109 | 108 | ||
110 | ip = (u8 *)regs->ip - code_prologue; | 109 | ip = (u8 *)regs->ip - code_prologue; |
111 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { | 110 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { |
@@ -116,16 +115,16 @@ void show_regs(struct pt_regs *regs) | |||
116 | for (i = 0; i < code_len; i++, ip++) { | 115 | for (i = 0; i < code_len; i++, ip++) { |
117 | if (ip < (u8 *)PAGE_OFFSET || | 116 | if (ip < (u8 *)PAGE_OFFSET || |
118 | probe_kernel_address(ip, c)) { | 117 | probe_kernel_address(ip, c)) { |
119 | printk(KERN_CONT " Bad EIP value."); | 118 | pr_cont(" Bad EIP value."); |
120 | break; | 119 | break; |
121 | } | 120 | } |
122 | if (ip == (u8 *)regs->ip) | 121 | if (ip == (u8 *)regs->ip) |
123 | printk(KERN_CONT "<%02x> ", c); | 122 | pr_cont(" <%02x>", c); |
124 | else | 123 | else |
125 | printk(KERN_CONT "%02x ", c); | 124 | pr_cont(" %02x", c); |
126 | } | 125 | } |
127 | } | 126 | } |
128 | printk(KERN_CONT "\n"); | 127 | pr_cont("\n"); |
129 | } | 128 | } |
130 | 129 | ||
131 | int is_valid_bugaddr(unsigned long ip) | 130 | int is_valid_bugaddr(unsigned long ip) |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 791b76122aa..b653675d528 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -228,20 +228,20 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
228 | if (stack >= irq_stack && stack <= irq_stack_end) { | 228 | if (stack >= irq_stack && stack <= irq_stack_end) { |
229 | if (stack == irq_stack_end) { | 229 | if (stack == irq_stack_end) { |
230 | stack = (unsigned long *) (irq_stack_end[-1]); | 230 | stack = (unsigned long *) (irq_stack_end[-1]); |
231 | printk(KERN_CONT " <EOI> "); | 231 | pr_cont(" <EOI> "); |
232 | } | 232 | } |
233 | } else { | 233 | } else { |
234 | if (((long) stack & (THREAD_SIZE-1)) == 0) | 234 | if (((long) stack & (THREAD_SIZE-1)) == 0) |
235 | break; | 235 | break; |
236 | } | 236 | } |
237 | if (i && ((i % STACKSLOTS_PER_LINE) == 0)) | 237 | if (i && ((i % STACKSLOTS_PER_LINE) == 0)) |
238 | printk(KERN_CONT "\n"); | 238 | pr_cont("\n"); |
239 | printk(KERN_CONT " %016lx", *stack++); | 239 | pr_cont(" %016lx", *stack++); |
240 | touch_nmi_watchdog(); | 240 | touch_nmi_watchdog(); |
241 | } | 241 | } |
242 | preempt_enable(); | 242 | preempt_enable(); |
243 | 243 | ||
244 | printk(KERN_CONT "\n"); | 244 | pr_cont("\n"); |
245 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 245 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
246 | } | 246 | } |
247 | 247 | ||
@@ -254,10 +254,9 @@ void show_regs(struct pt_regs *regs) | |||
254 | 254 | ||
255 | sp = regs->sp; | 255 | sp = regs->sp; |
256 | printk("CPU %d ", cpu); | 256 | printk("CPU %d ", cpu); |
257 | print_modules(); | ||
258 | __show_regs(regs, 1); | 257 | __show_regs(regs, 1); |
259 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", | 258 | printk(KERN_DEFAULT "Process %s (pid: %d, threadinfo %p, task %p)\n", |
260 | cur->comm, cur->pid, task_thread_info(cur), cur); | 259 | cur->comm, cur->pid, task_thread_info(cur), cur); |
261 | 260 | ||
262 | /* | 261 | /* |
263 | * When in-kernel, we also print out the stack and code at the | 262 | * When in-kernel, we also print out the stack and code at the |
@@ -284,16 +283,16 @@ void show_regs(struct pt_regs *regs) | |||
284 | for (i = 0; i < code_len; i++, ip++) { | 283 | for (i = 0; i < code_len; i++, ip++) { |
285 | if (ip < (u8 *)PAGE_OFFSET || | 284 | if (ip < (u8 *)PAGE_OFFSET || |
286 | probe_kernel_address(ip, c)) { | 285 | probe_kernel_address(ip, c)) { |
287 | printk(KERN_CONT " Bad RIP value."); | 286 | pr_cont(" Bad RIP value."); |
288 | break; | 287 | break; |
289 | } | 288 | } |
290 | if (ip == (u8 *)regs->ip) | 289 | if (ip == (u8 *)regs->ip) |
291 | printk(KERN_CONT "<%02x> ", c); | 290 | pr_cont("<%02x> ", c); |
292 | else | 291 | else |
293 | printk(KERN_CONT "%02x ", c); | 292 | pr_cont("%02x ", c); |
294 | } | 293 | } |
295 | } | 294 | } |
296 | printk(KERN_CONT "\n"); | 295 | pr_cont("\n"); |
297 | } | 296 | } |
298 | 297 | ||
299 | int is_valid_bugaddr(unsigned long ip) | 298 | int is_valid_bugaddr(unsigned long ip) |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 62d61e9976e..41857970517 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -113,7 +113,9 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, | |||
113 | int x = e820x->nr_map; | 113 | int x = e820x->nr_map; |
114 | 114 | ||
115 | if (x >= ARRAY_SIZE(e820x->map)) { | 115 | if (x >= ARRAY_SIZE(e820x->map)) { |
116 | printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | 116 | printk(KERN_ERR "e820: too many entries; ignoring [mem %#010llx-%#010llx]\n", |
117 | (unsigned long long) start, | ||
118 | (unsigned long long) (start + size - 1)); | ||
117 | return; | 119 | return; |
118 | } | 120 | } |
119 | 121 | ||
@@ -133,19 +135,19 @@ static void __init e820_print_type(u32 type) | |||
133 | switch (type) { | 135 | switch (type) { |
134 | case E820_RAM: | 136 | case E820_RAM: |
135 | case E820_RESERVED_KERN: | 137 | case E820_RESERVED_KERN: |
136 | printk(KERN_CONT "(usable)"); | 138 | printk(KERN_CONT "usable"); |
137 | break; | 139 | break; |
138 | case E820_RESERVED: | 140 | case E820_RESERVED: |
139 | printk(KERN_CONT "(reserved)"); | 141 | printk(KERN_CONT "reserved"); |
140 | break; | 142 | break; |
141 | case E820_ACPI: | 143 | case E820_ACPI: |
142 | printk(KERN_CONT "(ACPI data)"); | 144 | printk(KERN_CONT "ACPI data"); |
143 | break; | 145 | break; |
144 | case E820_NVS: | 146 | case E820_NVS: |
145 | printk(KERN_CONT "(ACPI NVS)"); | 147 | printk(KERN_CONT "ACPI NVS"); |
146 | break; | 148 | break; |
147 | case E820_UNUSABLE: | 149 | case E820_UNUSABLE: |
148 | printk(KERN_CONT "(unusable)"); | 150 | printk(KERN_CONT "unusable"); |
149 | break; | 151 | break; |
150 | default: | 152 | default: |
151 | printk(KERN_CONT "type %u", type); | 153 | printk(KERN_CONT "type %u", type); |
@@ -158,10 +160,10 @@ void __init e820_print_map(char *who) | |||
158 | int i; | 160 | int i; |
159 | 161 | ||
160 | for (i = 0; i < e820.nr_map; i++) { | 162 | for (i = 0; i < e820.nr_map; i++) { |
161 | printk(KERN_INFO " %s: %016Lx - %016Lx ", who, | 163 | printk(KERN_INFO "%s: [mem %#018Lx-%#018Lx] ", who, |
162 | (unsigned long long) e820.map[i].addr, | 164 | (unsigned long long) e820.map[i].addr, |
163 | (unsigned long long) | 165 | (unsigned long long) |
164 | (e820.map[i].addr + e820.map[i].size)); | 166 | (e820.map[i].addr + e820.map[i].size - 1)); |
165 | e820_print_type(e820.map[i].type); | 167 | e820_print_type(e820.map[i].type); |
166 | printk(KERN_CONT "\n"); | 168 | printk(KERN_CONT "\n"); |
167 | } | 169 | } |
@@ -428,9 +430,8 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start, | |||
428 | size = ULLONG_MAX - start; | 430 | size = ULLONG_MAX - start; |
429 | 431 | ||
430 | end = start + size; | 432 | end = start + size; |
431 | printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ", | 433 | printk(KERN_DEBUG "e820: update [mem %#010Lx-%#010Lx] ", |
432 | (unsigned long long) start, | 434 | (unsigned long long) start, (unsigned long long) (end - 1)); |
433 | (unsigned long long) end); | ||
434 | e820_print_type(old_type); | 435 | e820_print_type(old_type); |
435 | printk(KERN_CONT " ==> "); | 436 | printk(KERN_CONT " ==> "); |
436 | e820_print_type(new_type); | 437 | e820_print_type(new_type); |
@@ -509,9 +510,8 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, | |||
509 | size = ULLONG_MAX - start; | 510 | size = ULLONG_MAX - start; |
510 | 511 | ||
511 | end = start + size; | 512 | end = start + size; |
512 | printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ", | 513 | printk(KERN_DEBUG "e820: remove [mem %#010Lx-%#010Lx] ", |
513 | (unsigned long long) start, | 514 | (unsigned long long) start, (unsigned long long) (end - 1)); |
514 | (unsigned long long) end); | ||
515 | if (checktype) | 515 | if (checktype) |
516 | e820_print_type(old_type); | 516 | e820_print_type(old_type); |
517 | printk(KERN_CONT "\n"); | 517 | printk(KERN_CONT "\n"); |
@@ -567,7 +567,7 @@ void __init update_e820(void) | |||
567 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) | 567 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) |
568 | return; | 568 | return; |
569 | e820.nr_map = nr_map; | 569 | e820.nr_map = nr_map; |
570 | printk(KERN_INFO "modified physical RAM map:\n"); | 570 | printk(KERN_INFO "e820: modified physical RAM map:\n"); |
571 | e820_print_map("modified"); | 571 | e820_print_map("modified"); |
572 | } | 572 | } |
573 | static void __init update_e820_saved(void) | 573 | static void __init update_e820_saved(void) |
@@ -637,8 +637,8 @@ __init void e820_setup_gap(void) | |||
637 | if (!found) { | 637 | if (!found) { |
638 | gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; | 638 | gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; |
639 | printk(KERN_ERR | 639 | printk(KERN_ERR |
640 | "PCI: Warning: Cannot find a gap in the 32bit address range\n" | 640 | "e820: cannot find a gap in the 32bit address range\n" |
641 | "PCI: Unassigned devices with 32bit resource registers may break!\n"); | 641 | "e820: PCI devices with unassigned 32bit BARs may break!\n"); |
642 | } | 642 | } |
643 | #endif | 643 | #endif |
644 | 644 | ||
@@ -648,8 +648,8 @@ __init void e820_setup_gap(void) | |||
648 | pci_mem_start = gapstart; | 648 | pci_mem_start = gapstart; |
649 | 649 | ||
650 | printk(KERN_INFO | 650 | printk(KERN_INFO |
651 | "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", | 651 | "e820: [mem %#010lx-%#010lx] available for PCI devices\n", |
652 | pci_mem_start, gapstart, gapsize); | 652 | gapstart, gapstart + gapsize - 1); |
653 | } | 653 | } |
654 | 654 | ||
655 | /** | 655 | /** |
@@ -667,7 +667,7 @@ void __init parse_e820_ext(struct setup_data *sdata) | |||
667 | extmap = (struct e820entry *)(sdata->data); | 667 | extmap = (struct e820entry *)(sdata->data); |
668 | __append_e820_map(extmap, entries); | 668 | __append_e820_map(extmap, entries); |
669 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 669 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
670 | printk(KERN_INFO "extended physical RAM map:\n"); | 670 | printk(KERN_INFO "e820: extended physical RAM map:\n"); |
671 | e820_print_map("extended"); | 671 | e820_print_map("extended"); |
672 | } | 672 | } |
673 | 673 | ||
@@ -734,7 +734,7 @@ u64 __init early_reserve_e820(u64 size, u64 align) | |||
734 | addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); | 734 | addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); |
735 | if (addr) { | 735 | if (addr) { |
736 | e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED); | 736 | e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED); |
737 | printk(KERN_INFO "update e820_saved for early_reserve_e820\n"); | 737 | printk(KERN_INFO "e820: update e820_saved for early_reserve_e820\n"); |
738 | update_e820_saved(); | 738 | update_e820_saved(); |
739 | } | 739 | } |
740 | 740 | ||
@@ -784,7 +784,7 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) | |||
784 | if (last_pfn > max_arch_pfn) | 784 | if (last_pfn > max_arch_pfn) |
785 | last_pfn = max_arch_pfn; | 785 | last_pfn = max_arch_pfn; |
786 | 786 | ||
787 | printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", | 787 | printk(KERN_INFO "e820: last_pfn = %#lx max_arch_pfn = %#lx\n", |
788 | last_pfn, max_arch_pfn); | 788 | last_pfn, max_arch_pfn); |
789 | return last_pfn; | 789 | return last_pfn; |
790 | } | 790 | } |
@@ -888,7 +888,7 @@ void __init finish_e820_parsing(void) | |||
888 | early_panic("Invalid user supplied memory map"); | 888 | early_panic("Invalid user supplied memory map"); |
889 | e820.nr_map = nr; | 889 | e820.nr_map = nr; |
890 | 890 | ||
891 | printk(KERN_INFO "user-defined physical RAM map:\n"); | 891 | printk(KERN_INFO "e820: user-defined physical RAM map:\n"); |
892 | e820_print_map("user"); | 892 | e820_print_map("user"); |
893 | } | 893 | } |
894 | } | 894 | } |
@@ -996,8 +996,9 @@ void __init e820_reserve_resources_late(void) | |||
996 | end = MAX_RESOURCE_SIZE; | 996 | end = MAX_RESOURCE_SIZE; |
997 | if (start >= end) | 997 | if (start >= end) |
998 | continue; | 998 | continue; |
999 | printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ", | 999 | printk(KERN_DEBUG |
1000 | start, end); | 1000 | "e820: reserve RAM buffer [mem %#010llx-%#010llx]\n", |
1001 | start, end); | ||
1001 | reserve_region_with_split(&iomem_resource, start, end, | 1002 | reserve_region_with_split(&iomem_resource, start, end, |
1002 | "RAM buffer"); | 1003 | "RAM buffer"); |
1003 | } | 1004 | } |
@@ -1047,7 +1048,7 @@ void __init setup_memory_map(void) | |||
1047 | 1048 | ||
1048 | who = x86_init.resources.memory_setup(); | 1049 | who = x86_init.resources.memory_setup(); |
1049 | memcpy(&e820_saved, &e820, sizeof(struct e820map)); | 1050 | memcpy(&e820_saved, &e820, sizeof(struct e820map)); |
1050 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | 1051 | printk(KERN_INFO "e820: BIOS-provided physical RAM map:\n"); |
1051 | e820_print_map(who); | 1052 | e820_print_map(who); |
1052 | } | 1053 | } |
1053 | 1054 | ||
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 5e4771266f1..9b9f18b4991 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
@@ -119,7 +119,7 @@ static __init void early_serial_init(char *s) | |||
119 | unsigned char c; | 119 | unsigned char c; |
120 | unsigned divisor; | 120 | unsigned divisor; |
121 | unsigned baud = DEFAULT_BAUD; | 121 | unsigned baud = DEFAULT_BAUD; |
122 | ssize_t ret; | 122 | char *e; |
123 | 123 | ||
124 | if (*s == ',') | 124 | if (*s == ',') |
125 | ++s; | 125 | ++s; |
@@ -127,14 +127,14 @@ static __init void early_serial_init(char *s) | |||
127 | if (*s) { | 127 | if (*s) { |
128 | unsigned port; | 128 | unsigned port; |
129 | if (!strncmp(s, "0x", 2)) { | 129 | if (!strncmp(s, "0x", 2)) { |
130 | ret = kstrtoint(s, 16, &early_serial_base); | 130 | early_serial_base = simple_strtoul(s, &e, 16); |
131 | } else { | 131 | } else { |
132 | static const int __initconst bases[] = { 0x3f8, 0x2f8 }; | 132 | static const int __initconst bases[] = { 0x3f8, 0x2f8 }; |
133 | 133 | ||
134 | if (!strncmp(s, "ttyS", 4)) | 134 | if (!strncmp(s, "ttyS", 4)) |
135 | s += 4; | 135 | s += 4; |
136 | ret = kstrtouint(s, 10, &port); | 136 | port = simple_strtoul(s, &e, 10); |
137 | if (ret || port > 1) | 137 | if (port > 1 || s == e) |
138 | port = 0; | 138 | port = 0; |
139 | early_serial_base = bases[port]; | 139 | early_serial_base = bases[port]; |
140 | } | 140 | } |
@@ -149,8 +149,8 @@ static __init void early_serial_init(char *s) | |||
149 | outb(0x3, early_serial_base + MCR); /* DTR + RTS */ | 149 | outb(0x3, early_serial_base + MCR); /* DTR + RTS */ |
150 | 150 | ||
151 | if (*s) { | 151 | if (*s) { |
152 | ret = kstrtouint(s, 0, &baud); | 152 | baud = simple_strtoul(s, &e, 0); |
153 | if (ret || baud == 0) | 153 | if (baud == 0 || s == e) |
154 | baud = DEFAULT_BAUD; | 154 | baud = DEFAULT_BAUD; |
155 | } | 155 | } |
156 | 156 | ||
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 01ccf9b7147..623f2883747 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -316,7 +316,6 @@ ret_from_exception: | |||
316 | preempt_stop(CLBR_ANY) | 316 | preempt_stop(CLBR_ANY) |
317 | ret_from_intr: | 317 | ret_from_intr: |
318 | GET_THREAD_INFO(%ebp) | 318 | GET_THREAD_INFO(%ebp) |
319 | resume_userspace_sig: | ||
320 | #ifdef CONFIG_VM86 | 319 | #ifdef CONFIG_VM86 |
321 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS | 320 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS |
322 | movb PT_CS(%esp), %al | 321 | movb PT_CS(%esp), %al |
@@ -615,9 +614,13 @@ work_notifysig: # deal with pending signals and | |||
615 | # vm86-space | 614 | # vm86-space |
616 | TRACE_IRQS_ON | 615 | TRACE_IRQS_ON |
617 | ENABLE_INTERRUPTS(CLBR_NONE) | 616 | ENABLE_INTERRUPTS(CLBR_NONE) |
617 | movb PT_CS(%esp), %bl | ||
618 | andb $SEGMENT_RPL_MASK, %bl | ||
619 | cmpb $USER_RPL, %bl | ||
620 | jb resume_kernel | ||
618 | xorl %edx, %edx | 621 | xorl %edx, %edx |
619 | call do_notify_resume | 622 | call do_notify_resume |
620 | jmp resume_userspace_sig | 623 | jmp resume_userspace |
621 | 624 | ||
622 | ALIGN | 625 | ALIGN |
623 | work_notifysig_v86: | 626 | work_notifysig_v86: |
@@ -630,9 +633,13 @@ work_notifysig_v86: | |||
630 | #endif | 633 | #endif |
631 | TRACE_IRQS_ON | 634 | TRACE_IRQS_ON |
632 | ENABLE_INTERRUPTS(CLBR_NONE) | 635 | ENABLE_INTERRUPTS(CLBR_NONE) |
636 | movb PT_CS(%esp), %bl | ||
637 | andb $SEGMENT_RPL_MASK, %bl | ||
638 | cmpb $USER_RPL, %bl | ||
639 | jb resume_kernel | ||
633 | xorl %edx, %edx | 640 | xorl %edx, %edx |
634 | call do_notify_resume | 641 | call do_notify_resume |
635 | jmp resume_userspace_sig | 642 | jmp resume_userspace |
636 | END(work_pending) | 643 | END(work_pending) |
637 | 644 | ||
638 | # perform syscall exit tracing | 645 | # perform syscall exit tracing |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 320852d0202..111f6bbd8b3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -191,6 +191,44 @@ ENDPROC(native_usergs_sysret64) | |||
191 | .endm | 191 | .endm |
192 | 192 | ||
193 | /* | 193 | /* |
194 | * When dynamic function tracer is enabled it will add a breakpoint | ||
195 | * to all locations that it is about to modify, sync CPUs, update | ||
196 | * all the code, sync CPUs, then remove the breakpoints. In this time | ||
197 | * if lockdep is enabled, it might jump back into the debug handler | ||
198 | * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF). | ||
199 | * | ||
200 | * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to | ||
201 | * make sure the stack pointer does not get reset back to the top | ||
202 | * of the debug stack, and instead just reuses the current stack. | ||
203 | */ | ||
204 | #if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) | ||
205 | |||
206 | .macro TRACE_IRQS_OFF_DEBUG | ||
207 | call debug_stack_set_zero | ||
208 | TRACE_IRQS_OFF | ||
209 | call debug_stack_reset | ||
210 | .endm | ||
211 | |||
212 | .macro TRACE_IRQS_ON_DEBUG | ||
213 | call debug_stack_set_zero | ||
214 | TRACE_IRQS_ON | ||
215 | call debug_stack_reset | ||
216 | .endm | ||
217 | |||
218 | .macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET | ||
219 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | ||
220 | jnc 1f | ||
221 | TRACE_IRQS_ON_DEBUG | ||
222 | 1: | ||
223 | .endm | ||
224 | |||
225 | #else | ||
226 | # define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF | ||
227 | # define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON | ||
228 | # define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ | ||
229 | #endif | ||
230 | |||
231 | /* | ||
194 | * C code is not supposed to know about undefined top of stack. Every time | 232 | * C code is not supposed to know about undefined top of stack. Every time |
195 | * a C function with an pt_regs argument is called from the SYSCALL based | 233 | * a C function with an pt_regs argument is called from the SYSCALL based |
196 | * fast path FIXUP_TOP_OF_STACK is needed. | 234 | * fast path FIXUP_TOP_OF_STACK is needed. |
@@ -1098,7 +1136,7 @@ ENTRY(\sym) | |||
1098 | subq $ORIG_RAX-R15, %rsp | 1136 | subq $ORIG_RAX-R15, %rsp |
1099 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1137 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1100 | call save_paranoid | 1138 | call save_paranoid |
1101 | TRACE_IRQS_OFF | 1139 | TRACE_IRQS_OFF_DEBUG |
1102 | movq %rsp,%rdi /* pt_regs pointer */ | 1140 | movq %rsp,%rdi /* pt_regs pointer */ |
1103 | xorl %esi,%esi /* no error code */ | 1141 | xorl %esi,%esi /* no error code */ |
1104 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) | 1142 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) |
@@ -1393,7 +1431,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip) | |||
1393 | ENTRY(paranoid_exit) | 1431 | ENTRY(paranoid_exit) |
1394 | DEFAULT_FRAME | 1432 | DEFAULT_FRAME |
1395 | DISABLE_INTERRUPTS(CLBR_NONE) | 1433 | DISABLE_INTERRUPTS(CLBR_NONE) |
1396 | TRACE_IRQS_OFF | 1434 | TRACE_IRQS_OFF_DEBUG |
1397 | testl %ebx,%ebx /* swapgs needed? */ | 1435 | testl %ebx,%ebx /* swapgs needed? */ |
1398 | jnz paranoid_restore | 1436 | jnz paranoid_restore |
1399 | testl $3,CS(%rsp) | 1437 | testl $3,CS(%rsp) |
@@ -1404,7 +1442,7 @@ paranoid_swapgs: | |||
1404 | RESTORE_ALL 8 | 1442 | RESTORE_ALL 8 |
1405 | jmp irq_return | 1443 | jmp irq_return |
1406 | paranoid_restore: | 1444 | paranoid_restore: |
1407 | TRACE_IRQS_IRETQ 0 | 1445 | TRACE_IRQS_IRETQ_DEBUG 0 |
1408 | RESTORE_ALL 8 | 1446 | RESTORE_ALL 8 |
1409 | jmp irq_return | 1447 | jmp irq_return |
1410 | paranoid_userspace: | 1448 | paranoid_userspace: |
@@ -1720,10 +1758,30 @@ end_repeat_nmi: | |||
1720 | */ | 1758 | */ |
1721 | call save_paranoid | 1759 | call save_paranoid |
1722 | DEFAULT_FRAME 0 | 1760 | DEFAULT_FRAME 0 |
1761 | |||
1762 | /* | ||
1763 | * Save off the CR2 register. If we take a page fault in the NMI then | ||
1764 | * it could corrupt the CR2 value. If the NMI preempts a page fault | ||
1765 | * handler before it was able to read the CR2 register, and then the | ||
1766 | * NMI itself takes a page fault, the page fault that was preempted | ||
1767 | * will read the information from the NMI page fault and not the | ||
1768 | * origin fault. Save it off and restore it if it changes. | ||
1769 | * Use the r12 callee-saved register. | ||
1770 | */ | ||
1771 | movq %cr2, %r12 | ||
1772 | |||
1723 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ | 1773 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ |
1724 | movq %rsp,%rdi | 1774 | movq %rsp,%rdi |
1725 | movq $-1,%rsi | 1775 | movq $-1,%rsi |
1726 | call do_nmi | 1776 | call do_nmi |
1777 | |||
1778 | /* Did the NMI take a page fault? Restore cr2 if it did */ | ||
1779 | movq %cr2, %rcx | ||
1780 | cmpq %rcx, %r12 | ||
1781 | je 1f | ||
1782 | movq %r12, %cr2 | ||
1783 | 1: | ||
1784 | |||
1727 | testl %ebx,%ebx /* swapgs needed? */ | 1785 | testl %ebx,%ebx /* swapgs needed? */ |
1728 | jnz nmi_restore | 1786 | jnz nmi_restore |
1729 | nmi_swapgs: | 1787 | nmi_swapgs: |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 32ff36596ab..c3a7cb4bf6e 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -100,7 +100,7 @@ static const unsigned char *ftrace_nop_replace(void) | |||
100 | } | 100 | } |
101 | 101 | ||
102 | static int | 102 | static int |
103 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | 103 | ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, |
104 | unsigned const char *new_code) | 104 | unsigned const char *new_code) |
105 | { | 105 | { |
106 | unsigned char replaced[MCOUNT_INSN_SIZE]; | 106 | unsigned char replaced[MCOUNT_INSN_SIZE]; |
@@ -141,7 +141,20 @@ int ftrace_make_nop(struct module *mod, | |||
141 | old = ftrace_call_replace(ip, addr); | 141 | old = ftrace_call_replace(ip, addr); |
142 | new = ftrace_nop_replace(); | 142 | new = ftrace_nop_replace(); |
143 | 143 | ||
144 | return ftrace_modify_code(rec->ip, old, new); | 144 | /* |
145 | * On boot up, and when modules are loaded, the MCOUNT_ADDR | ||
146 | * is converted to a nop, and will never become MCOUNT_ADDR | ||
147 | * again. This code is either running before SMP (on boot up) | ||
148 | * or before the code will ever be executed (module load). | ||
149 | * We do not want to use the breakpoint version in this case, | ||
150 | * just modify the code directly. | ||
151 | */ | ||
152 | if (addr == MCOUNT_ADDR) | ||
153 | return ftrace_modify_code_direct(rec->ip, old, new); | ||
154 | |||
155 | /* Normal cases use add_brk_on_nop */ | ||
156 | WARN_ONCE(1, "invalid use of ftrace_make_nop"); | ||
157 | return -EINVAL; | ||
145 | } | 158 | } |
146 | 159 | ||
147 | int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | 160 | int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) |
@@ -152,9 +165,47 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | |||
152 | old = ftrace_nop_replace(); | 165 | old = ftrace_nop_replace(); |
153 | new = ftrace_call_replace(ip, addr); | 166 | new = ftrace_call_replace(ip, addr); |
154 | 167 | ||
155 | return ftrace_modify_code(rec->ip, old, new); | 168 | /* Should only be called when module is loaded */ |
169 | return ftrace_modify_code_direct(rec->ip, old, new); | ||
156 | } | 170 | } |
157 | 171 | ||
172 | /* | ||
173 | * The modifying_ftrace_code is used to tell the breakpoint | ||
174 | * handler to call ftrace_int3_handler(). If it fails to | ||
175 | * call this handler for a breakpoint added by ftrace, then | ||
176 | * the kernel may crash. | ||
177 | * | ||
178 | * As atomic_writes on x86 do not need a barrier, we do not | ||
179 | * need to add smp_mb()s for this to work. It is also considered | ||
180 | * that we can not read the modifying_ftrace_code before | ||
181 | * executing the breakpoint. That would be quite remarkable if | ||
182 | * it could do that. Here's the flow that is required: | ||
183 | * | ||
184 | * CPU-0 CPU-1 | ||
185 | * | ||
186 | * atomic_inc(mfc); | ||
187 | * write int3s | ||
188 | * <trap-int3> // implicit (r)mb | ||
189 | * if (atomic_read(mfc)) | ||
190 | * call ftrace_int3_handler() | ||
191 | * | ||
192 | * Then when we are finished: | ||
193 | * | ||
194 | * atomic_dec(mfc); | ||
195 | * | ||
196 | * If we hit a breakpoint that was not set by ftrace, it does not | ||
197 | * matter if ftrace_int3_handler() is called or not. It will | ||
198 | * simply be ignored. But it is crucial that a ftrace nop/caller | ||
199 | * breakpoint is handled. No other user should ever place a | ||
200 | * breakpoint on an ftrace nop/caller location. It must only | ||
201 | * be done by this code. | ||
202 | */ | ||
203 | atomic_t modifying_ftrace_code __read_mostly; | ||
204 | |||
205 | static int | ||
206 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | ||
207 | unsigned const char *new_code); | ||
208 | |||
158 | int ftrace_update_ftrace_func(ftrace_func_t func) | 209 | int ftrace_update_ftrace_func(ftrace_func_t func) |
159 | { | 210 | { |
160 | unsigned long ip = (unsigned long)(&ftrace_call); | 211 | unsigned long ip = (unsigned long)(&ftrace_call); |
@@ -163,13 +214,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func) | |||
163 | 214 | ||
164 | memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); | 215 | memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); |
165 | new = ftrace_call_replace(ip, (unsigned long)func); | 216 | new = ftrace_call_replace(ip, (unsigned long)func); |
217 | |||
218 | /* See comment above by declaration of modifying_ftrace_code */ | ||
219 | atomic_inc(&modifying_ftrace_code); | ||
220 | |||
166 | ret = ftrace_modify_code(ip, old, new); | 221 | ret = ftrace_modify_code(ip, old, new); |
167 | 222 | ||
223 | atomic_dec(&modifying_ftrace_code); | ||
224 | |||
168 | return ret; | 225 | return ret; |
169 | } | 226 | } |
170 | 227 | ||
171 | int modifying_ftrace_code __read_mostly; | ||
172 | |||
173 | /* | 228 | /* |
174 | * A breakpoint was added to the code address we are about to | 229 | * A breakpoint was added to the code address we are about to |
175 | * modify, and this is the handle that will just skip over it. | 230 | * modify, and this is the handle that will just skip over it. |
@@ -489,13 +544,46 @@ void ftrace_replace_code(int enable) | |||
489 | } | 544 | } |
490 | } | 545 | } |
491 | 546 | ||
547 | static int | ||
548 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | ||
549 | unsigned const char *new_code) | ||
550 | { | ||
551 | int ret; | ||
552 | |||
553 | ret = add_break(ip, old_code); | ||
554 | if (ret) | ||
555 | goto out; | ||
556 | |||
557 | run_sync(); | ||
558 | |||
559 | ret = add_update_code(ip, new_code); | ||
560 | if (ret) | ||
561 | goto fail_update; | ||
562 | |||
563 | run_sync(); | ||
564 | |||
565 | ret = ftrace_write(ip, new_code, 1); | ||
566 | if (ret) { | ||
567 | ret = -EPERM; | ||
568 | goto out; | ||
569 | } | ||
570 | run_sync(); | ||
571 | out: | ||
572 | return ret; | ||
573 | |||
574 | fail_update: | ||
575 | probe_kernel_write((void *)ip, &old_code[0], 1); | ||
576 | goto out; | ||
577 | } | ||
578 | |||
492 | void arch_ftrace_update_code(int command) | 579 | void arch_ftrace_update_code(int command) |
493 | { | 580 | { |
494 | modifying_ftrace_code++; | 581 | /* See comment above by declaration of modifying_ftrace_code */ |
582 | atomic_inc(&modifying_ftrace_code); | ||
495 | 583 | ||
496 | ftrace_modify_all_code(command); | 584 | ftrace_modify_all_code(command); |
497 | 585 | ||
498 | modifying_ftrace_code--; | 586 | atomic_dec(&modifying_ftrace_code); |
499 | } | 587 | } |
500 | 588 | ||
501 | int __init ftrace_dyn_arch_init(void *data) | 589 | int __init ftrace_dyn_arch_init(void *data) |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 51ff18616d5..c18f59d1010 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <asm/sections.h> | 14 | #include <asm/sections.h> |
15 | #include <asm/e820.h> | 15 | #include <asm/e820.h> |
16 | #include <asm/page.h> | 16 | #include <asm/page.h> |
17 | #include <asm/trampoline.h> | ||
18 | #include <asm/apic.h> | 17 | #include <asm/apic.h> |
19 | #include <asm/io_apic.h> | 18 | #include <asm/io_apic.h> |
20 | #include <asm/bios_ebda.h> | 19 | #include <asm/bios_ebda.h> |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 3a3b779f41d..037df57a99a 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include <asm/sections.h> | 24 | #include <asm/sections.h> |
25 | #include <asm/kdebug.h> | 25 | #include <asm/kdebug.h> |
26 | #include <asm/e820.h> | 26 | #include <asm/e820.h> |
27 | #include <asm/trampoline.h> | ||
28 | #include <asm/bios_ebda.h> | 27 | #include <asm/bios_ebda.h> |
29 | 28 | ||
30 | static void __init zap_identity_mappings(void) | 29 | static void __init zap_identity_mappings(void) |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 463c9797ca6..d42ab17b739 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -274,10 +274,7 @@ num_subarch_entries = (. - subarch_entries) / 4 | |||
274 | * If cpu hotplug is not supported then this code can go in init section | 274 | * If cpu hotplug is not supported then this code can go in init section |
275 | * which will be freed later | 275 | * which will be freed later |
276 | */ | 276 | */ |
277 | |||
278 | __CPUINIT | 277 | __CPUINIT |
279 | |||
280 | #ifdef CONFIG_SMP | ||
281 | ENTRY(startup_32_smp) | 278 | ENTRY(startup_32_smp) |
282 | cld | 279 | cld |
283 | movl $(__BOOT_DS),%eax | 280 | movl $(__BOOT_DS),%eax |
@@ -288,7 +285,7 @@ ENTRY(startup_32_smp) | |||
288 | movl pa(stack_start),%ecx | 285 | movl pa(stack_start),%ecx |
289 | movl %eax,%ss | 286 | movl %eax,%ss |
290 | leal -__PAGE_OFFSET(%ecx),%esp | 287 | leal -__PAGE_OFFSET(%ecx),%esp |
291 | #endif /* CONFIG_SMP */ | 288 | |
292 | default_entry: | 289 | default_entry: |
293 | 290 | ||
294 | /* | 291 | /* |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7a40f244732..94bf9cc2c7e 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -139,10 +139,6 @@ ident_complete: | |||
139 | /* Fixup phys_base */ | 139 | /* Fixup phys_base */ |
140 | addq %rbp, phys_base(%rip) | 140 | addq %rbp, phys_base(%rip) |
141 | 141 | ||
142 | /* Fixup trampoline */ | ||
143 | addq %rbp, trampoline_level4_pgt + 0(%rip) | ||
144 | addq %rbp, trampoline_level4_pgt + (511*8)(%rip) | ||
145 | |||
146 | /* Due to ENTRY(), sometimes the empty space gets filled with | 142 | /* Due to ENTRY(), sometimes the empty space gets filled with |
147 | * zeros. Better take a jmp than relying on empty space being | 143 | * zeros. Better take a jmp than relying on empty space being |
148 | * filled with 0x90 (nop) | 144 | * filled with 0x90 (nop) |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ad0de0c2714..1460a5df92f 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -94,13 +94,18 @@ static int hpet_verbose; | |||
94 | 94 | ||
95 | static int __init hpet_setup(char *str) | 95 | static int __init hpet_setup(char *str) |
96 | { | 96 | { |
97 | if (str) { | 97 | while (str) { |
98 | char *next = strchr(str, ','); | ||
99 | |||
100 | if (next) | ||
101 | *next++ = 0; | ||
98 | if (!strncmp("disable", str, 7)) | 102 | if (!strncmp("disable", str, 7)) |
99 | boot_hpet_disable = 1; | 103 | boot_hpet_disable = 1; |
100 | if (!strncmp("force", str, 5)) | 104 | if (!strncmp("force", str, 5)) |
101 | hpet_force_user = 1; | 105 | hpet_force_user = 1; |
102 | if (!strncmp("verbose", str, 7)) | 106 | if (!strncmp("verbose", str, 7)) |
103 | hpet_verbose = 1; | 107 | hpet_verbose = 1; |
108 | str = next; | ||
104 | } | 109 | } |
105 | return 1; | 110 | return 1; |
106 | } | 111 | } |
@@ -319,8 +324,6 @@ static void hpet_set_mode(enum clock_event_mode mode, | |||
319 | now = hpet_readl(HPET_COUNTER); | 324 | now = hpet_readl(HPET_COUNTER); |
320 | cmp = now + (unsigned int) delta; | 325 | cmp = now + (unsigned int) delta; |
321 | cfg = hpet_readl(HPET_Tn_CFG(timer)); | 326 | cfg = hpet_readl(HPET_Tn_CFG(timer)); |
322 | /* Make sure we use edge triggered interrupts */ | ||
323 | cfg &= ~HPET_TN_LEVEL; | ||
324 | cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | | 327 | cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | |
325 | HPET_TN_SETVAL | HPET_TN_32BIT; | 328 | HPET_TN_SETVAL | HPET_TN_32BIT; |
326 | hpet_writel(cfg, HPET_Tn_CFG(timer)); | 329 | hpet_writel(cfg, HPET_Tn_CFG(timer)); |
@@ -787,15 +790,16 @@ static int hpet_clocksource_register(void) | |||
787 | return 0; | 790 | return 0; |
788 | } | 791 | } |
789 | 792 | ||
793 | static u32 *hpet_boot_cfg; | ||
794 | |||
790 | /** | 795 | /** |
791 | * hpet_enable - Try to setup the HPET timer. Returns 1 on success. | 796 | * hpet_enable - Try to setup the HPET timer. Returns 1 on success. |
792 | */ | 797 | */ |
793 | int __init hpet_enable(void) | 798 | int __init hpet_enable(void) |
794 | { | 799 | { |
795 | unsigned long hpet_period; | 800 | u32 hpet_period, cfg, id; |
796 | unsigned int id; | ||
797 | u64 freq; | 801 | u64 freq; |
798 | int i; | 802 | unsigned int i, last; |
799 | 803 | ||
800 | if (!is_hpet_capable()) | 804 | if (!is_hpet_capable()) |
801 | return 0; | 805 | return 0; |
@@ -847,15 +851,45 @@ int __init hpet_enable(void) | |||
847 | id = hpet_readl(HPET_ID); | 851 | id = hpet_readl(HPET_ID); |
848 | hpet_print_config(); | 852 | hpet_print_config(); |
849 | 853 | ||
854 | last = (id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT; | ||
855 | |||
850 | #ifdef CONFIG_HPET_EMULATE_RTC | 856 | #ifdef CONFIG_HPET_EMULATE_RTC |
851 | /* | 857 | /* |
852 | * The legacy routing mode needs at least two channels, tick timer | 858 | * The legacy routing mode needs at least two channels, tick timer |
853 | * and the rtc emulation channel. | 859 | * and the rtc emulation channel. |
854 | */ | 860 | */ |
855 | if (!(id & HPET_ID_NUMBER)) | 861 | if (!last) |
856 | goto out_nohpet; | 862 | goto out_nohpet; |
857 | #endif | 863 | #endif |
858 | 864 | ||
865 | cfg = hpet_readl(HPET_CFG); | ||
866 | hpet_boot_cfg = kmalloc((last + 2) * sizeof(*hpet_boot_cfg), | ||
867 | GFP_KERNEL); | ||
868 | if (hpet_boot_cfg) | ||
869 | *hpet_boot_cfg = cfg; | ||
870 | else | ||
871 | pr_warn("HPET initial state will not be saved\n"); | ||
872 | cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); | ||
873 | hpet_writel(cfg, HPET_CFG); | ||
874 | if (cfg) | ||
875 | pr_warn("HPET: Unrecognized bits %#x set in global cfg\n", | ||
876 | cfg); | ||
877 | |||
878 | for (i = 0; i <= last; ++i) { | ||
879 | cfg = hpet_readl(HPET_Tn_CFG(i)); | ||
880 | if (hpet_boot_cfg) | ||
881 | hpet_boot_cfg[i + 1] = cfg; | ||
882 | cfg &= ~(HPET_TN_ENABLE | HPET_TN_LEVEL | HPET_TN_FSB); | ||
883 | hpet_writel(cfg, HPET_Tn_CFG(i)); | ||
884 | cfg &= ~(HPET_TN_PERIODIC | HPET_TN_PERIODIC_CAP | ||
885 | | HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE | ||
886 | | HPET_TN_FSB | HPET_TN_FSB_CAP); | ||
887 | if (cfg) | ||
888 | pr_warn("HPET: Unrecognized bits %#x set in cfg#%u\n", | ||
889 | cfg, i); | ||
890 | } | ||
891 | hpet_print_config(); | ||
892 | |||
859 | if (hpet_clocksource_register()) | 893 | if (hpet_clocksource_register()) |
860 | goto out_nohpet; | 894 | goto out_nohpet; |
861 | 895 | ||
@@ -923,14 +957,28 @@ fs_initcall(hpet_late_init); | |||
923 | void hpet_disable(void) | 957 | void hpet_disable(void) |
924 | { | 958 | { |
925 | if (is_hpet_capable() && hpet_virt_address) { | 959 | if (is_hpet_capable() && hpet_virt_address) { |
926 | unsigned int cfg = hpet_readl(HPET_CFG); | 960 | unsigned int cfg = hpet_readl(HPET_CFG), id, last; |
927 | 961 | ||
928 | if (hpet_legacy_int_enabled) { | 962 | if (hpet_boot_cfg) |
963 | cfg = *hpet_boot_cfg; | ||
964 | else if (hpet_legacy_int_enabled) { | ||
929 | cfg &= ~HPET_CFG_LEGACY; | 965 | cfg &= ~HPET_CFG_LEGACY; |
930 | hpet_legacy_int_enabled = 0; | 966 | hpet_legacy_int_enabled = 0; |
931 | } | 967 | } |
932 | cfg &= ~HPET_CFG_ENABLE; | 968 | cfg &= ~HPET_CFG_ENABLE; |
933 | hpet_writel(cfg, HPET_CFG); | 969 | hpet_writel(cfg, HPET_CFG); |
970 | |||
971 | if (!hpet_boot_cfg) | ||
972 | return; | ||
973 | |||
974 | id = hpet_readl(HPET_ID); | ||
975 | last = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); | ||
976 | |||
977 | for (id = 0; id <= last; ++id) | ||
978 | hpet_writel(hpet_boot_cfg[id + 1], HPET_Tn_CFG(id)); | ||
979 | |||
980 | if (*hpet_boot_cfg & HPET_CFG_ENABLE) | ||
981 | hpet_writel(*hpet_boot_cfg, HPET_CFG); | ||
934 | } | 982 | } |
935 | } | 983 | } |
936 | 984 | ||
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3dafc6003b7..1f5f1d5d2a0 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -294,9 +294,9 @@ void fixup_irqs(void) | |||
294 | raw_spin_unlock(&desc->lock); | 294 | raw_spin_unlock(&desc->lock); |
295 | 295 | ||
296 | if (break_affinity && set_affinity) | 296 | if (break_affinity && set_affinity) |
297 | printk("Broke affinity for irq %i\n", irq); | 297 | pr_notice("Broke affinity for irq %i\n", irq); |
298 | else if (!set_affinity) | 298 | else if (!set_affinity) |
299 | printk("Cannot set affinity for irq %i\n", irq); | 299 | pr_notice("Cannot set affinity for irq %i\n", irq); |
300 | } | 300 | } |
301 | 301 | ||
302 | /* | 302 | /* |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8bfb6146f75..3f61904365c 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -444,12 +444,12 @@ void kgdb_roundup_cpus(unsigned long flags) | |||
444 | 444 | ||
445 | /** | 445 | /** |
446 | * kgdb_arch_handle_exception - Handle architecture specific GDB packets. | 446 | * kgdb_arch_handle_exception - Handle architecture specific GDB packets. |
447 | * @vector: The error vector of the exception that happened. | 447 | * @e_vector: The error vector of the exception that happened. |
448 | * @signo: The signal number of the exception that happened. | 448 | * @signo: The signal number of the exception that happened. |
449 | * @err_code: The error code of the exception that happened. | 449 | * @err_code: The error code of the exception that happened. |
450 | * @remcom_in_buffer: The buffer of the packet we have read. | 450 | * @remcomInBuffer: The buffer of the packet we have read. |
451 | * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into. | 451 | * @remcomOutBuffer: The buffer of %BUFMAX bytes to write a packet into. |
452 | * @regs: The &struct pt_regs of the current process. | 452 | * @linux_regs: The &struct pt_regs of the current process. |
453 | * | 453 | * |
454 | * This function MUST handle the 'c' and 's' command packets, | 454 | * This function MUST handle the 'c' and 's' command packets, |
455 | * as well packets to set / remove a hardware breakpoint, if used. | 455 | * as well packets to set / remove a hardware breakpoint, if used. |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e554e5ad2fe..c1d61ee4b4f 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -39,6 +39,9 @@ | |||
39 | #include <asm/desc.h> | 39 | #include <asm/desc.h> |
40 | #include <asm/tlbflush.h> | 40 | #include <asm/tlbflush.h> |
41 | #include <asm/idle.h> | 41 | #include <asm/idle.h> |
42 | #include <asm/apic.h> | ||
43 | #include <asm/apicdef.h> | ||
44 | #include <asm/hypervisor.h> | ||
42 | 45 | ||
43 | static int kvmapf = 1; | 46 | static int kvmapf = 1; |
44 | 47 | ||
@@ -283,6 +286,22 @@ static void kvm_register_steal_time(void) | |||
283 | cpu, __pa(st)); | 286 | cpu, __pa(st)); |
284 | } | 287 | } |
285 | 288 | ||
289 | static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; | ||
290 | |||
291 | static void kvm_guest_apic_eoi_write(u32 reg, u32 val) | ||
292 | { | ||
293 | /** | ||
294 | * This relies on __test_and_clear_bit to modify the memory | ||
295 | * in a way that is atomic with respect to the local CPU. | ||
296 | * The hypervisor only accesses this memory from the local CPU so | ||
297 | * there's no need for lock or memory barriers. | ||
298 | * An optimization barrier is implied in apic write. | ||
299 | */ | ||
300 | if (__test_and_clear_bit(KVM_PV_EOI_BIT, &__get_cpu_var(kvm_apic_eoi))) | ||
301 | return; | ||
302 | apic_write(APIC_EOI, APIC_EOI_ACK); | ||
303 | } | ||
304 | |||
286 | void __cpuinit kvm_guest_cpu_init(void) | 305 | void __cpuinit kvm_guest_cpu_init(void) |
287 | { | 306 | { |
288 | if (!kvm_para_available()) | 307 | if (!kvm_para_available()) |
@@ -300,11 +319,20 @@ void __cpuinit kvm_guest_cpu_init(void) | |||
300 | smp_processor_id()); | 319 | smp_processor_id()); |
301 | } | 320 | } |
302 | 321 | ||
322 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) { | ||
323 | unsigned long pa; | ||
324 | /* Size alignment is implied but just to make it explicit. */ | ||
325 | BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); | ||
326 | __get_cpu_var(kvm_apic_eoi) = 0; | ||
327 | pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED; | ||
328 | wrmsrl(MSR_KVM_PV_EOI_EN, pa); | ||
329 | } | ||
330 | |||
303 | if (has_steal_clock) | 331 | if (has_steal_clock) |
304 | kvm_register_steal_time(); | 332 | kvm_register_steal_time(); |
305 | } | 333 | } |
306 | 334 | ||
307 | static void kvm_pv_disable_apf(void *unused) | 335 | static void kvm_pv_disable_apf(void) |
308 | { | 336 | { |
309 | if (!__get_cpu_var(apf_reason).enabled) | 337 | if (!__get_cpu_var(apf_reason).enabled) |
310 | return; | 338 | return; |
@@ -316,11 +344,23 @@ static void kvm_pv_disable_apf(void *unused) | |||
316 | smp_processor_id()); | 344 | smp_processor_id()); |
317 | } | 345 | } |
318 | 346 | ||
347 | static void kvm_pv_guest_cpu_reboot(void *unused) | ||
348 | { | ||
349 | /* | ||
350 | * We disable PV EOI before we load a new kernel by kexec, | ||
351 | * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory. | ||
352 | * New kernel can re-enable when it boots. | ||
353 | */ | ||
354 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) | ||
355 | wrmsrl(MSR_KVM_PV_EOI_EN, 0); | ||
356 | kvm_pv_disable_apf(); | ||
357 | } | ||
358 | |||
319 | static int kvm_pv_reboot_notify(struct notifier_block *nb, | 359 | static int kvm_pv_reboot_notify(struct notifier_block *nb, |
320 | unsigned long code, void *unused) | 360 | unsigned long code, void *unused) |
321 | { | 361 | { |
322 | if (code == SYS_RESTART) | 362 | if (code == SYS_RESTART) |
323 | on_each_cpu(kvm_pv_disable_apf, NULL, 1); | 363 | on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); |
324 | return NOTIFY_DONE; | 364 | return NOTIFY_DONE; |
325 | } | 365 | } |
326 | 366 | ||
@@ -371,7 +411,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy) | |||
371 | static void kvm_guest_cpu_offline(void *dummy) | 411 | static void kvm_guest_cpu_offline(void *dummy) |
372 | { | 412 | { |
373 | kvm_disable_steal_time(); | 413 | kvm_disable_steal_time(); |
374 | kvm_pv_disable_apf(NULL); | 414 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) |
415 | wrmsrl(MSR_KVM_PV_EOI_EN, 0); | ||
416 | kvm_pv_disable_apf(); | ||
375 | apf_task_wake_all(); | 417 | apf_task_wake_all(); |
376 | } | 418 | } |
377 | 419 | ||
@@ -424,6 +466,9 @@ void __init kvm_guest_init(void) | |||
424 | pv_time_ops.steal_clock = kvm_steal_clock; | 466 | pv_time_ops.steal_clock = kvm_steal_clock; |
425 | } | 467 | } |
426 | 468 | ||
469 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) | ||
470 | apic_set_eoi_write(kvm_guest_apic_eoi_write); | ||
471 | |||
427 | #ifdef CONFIG_SMP | 472 | #ifdef CONFIG_SMP |
428 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 473 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
429 | register_cpu_notifier(&kvm_cpu_notifier); | 474 | register_cpu_notifier(&kvm_cpu_notifier); |
@@ -432,6 +477,19 @@ void __init kvm_guest_init(void) | |||
432 | #endif | 477 | #endif |
433 | } | 478 | } |
434 | 479 | ||
480 | static bool __init kvm_detect(void) | ||
481 | { | ||
482 | if (!kvm_para_available()) | ||
483 | return false; | ||
484 | return true; | ||
485 | } | ||
486 | |||
487 | const struct hypervisor_x86 x86_hyper_kvm __refconst = { | ||
488 | .name = "KVM", | ||
489 | .detect = kvm_detect, | ||
490 | }; | ||
491 | EXPORT_SYMBOL_GPL(x86_hyper_kvm); | ||
492 | |||
435 | static __init int activate_jump_labels(void) | 493 | static __init int activate_jump_labels(void) |
436 | { | 494 | { |
437 | if (has_steal_clock) { | 495 | if (has_steal_clock) { |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index f8492da65bf..f1b42b3a186 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <asm/msr.h> | 22 | #include <asm/msr.h> |
23 | #include <asm/apic.h> | 23 | #include <asm/apic.h> |
24 | #include <linux/percpu.h> | 24 | #include <linux/percpu.h> |
25 | #include <linux/hardirq.h> | ||
25 | 26 | ||
26 | #include <asm/x86_init.h> | 27 | #include <asm/x86_init.h> |
27 | #include <asm/reboot.h> | 28 | #include <asm/reboot.h> |
@@ -114,6 +115,20 @@ static void kvm_get_preset_lpj(void) | |||
114 | preset_lpj = lpj; | 115 | preset_lpj = lpj; |
115 | } | 116 | } |
116 | 117 | ||
118 | bool kvm_check_and_clear_guest_paused(void) | ||
119 | { | ||
120 | bool ret = false; | ||
121 | struct pvclock_vcpu_time_info *src; | ||
122 | |||
123 | src = &__get_cpu_var(hv_clock); | ||
124 | if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { | ||
125 | __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED); | ||
126 | ret = true; | ||
127 | } | ||
128 | |||
129 | return ret; | ||
130 | } | ||
131 | |||
117 | static struct clocksource kvm_clock = { | 132 | static struct clocksource kvm_clock = { |
118 | .name = "kvm-clock", | 133 | .name = "kvm-clock", |
119 | .read = kvm_clock_get_cycles, | 134 | .read = kvm_clock_get_cycles, |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index fbdfc691718..4873e62db6a 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -87,6 +87,7 @@ | |||
87 | #include <asm/microcode.h> | 87 | #include <asm/microcode.h> |
88 | #include <asm/processor.h> | 88 | #include <asm/processor.h> |
89 | #include <asm/cpu_device_id.h> | 89 | #include <asm/cpu_device_id.h> |
90 | #include <asm/perf_event.h> | ||
90 | 91 | ||
91 | MODULE_DESCRIPTION("Microcode Update Driver"); | 92 | MODULE_DESCRIPTION("Microcode Update Driver"); |
92 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); | 93 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); |
@@ -277,7 +278,6 @@ static int reload_for_cpu(int cpu) | |||
277 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 278 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
278 | int err = 0; | 279 | int err = 0; |
279 | 280 | ||
280 | mutex_lock(µcode_mutex); | ||
281 | if (uci->valid) { | 281 | if (uci->valid) { |
282 | enum ucode_state ustate; | 282 | enum ucode_state ustate; |
283 | 283 | ||
@@ -288,7 +288,6 @@ static int reload_for_cpu(int cpu) | |||
288 | if (ustate == UCODE_ERROR) | 288 | if (ustate == UCODE_ERROR) |
289 | err = -EINVAL; | 289 | err = -EINVAL; |
290 | } | 290 | } |
291 | mutex_unlock(µcode_mutex); | ||
292 | 291 | ||
293 | return err; | 292 | return err; |
294 | } | 293 | } |
@@ -298,19 +297,31 @@ static ssize_t reload_store(struct device *dev, | |||
298 | const char *buf, size_t size) | 297 | const char *buf, size_t size) |
299 | { | 298 | { |
300 | unsigned long val; | 299 | unsigned long val; |
301 | int cpu = dev->id; | 300 | int cpu; |
302 | ssize_t ret = 0; | 301 | ssize_t ret = 0, tmp_ret; |
303 | 302 | ||
304 | ret = kstrtoul(buf, 0, &val); | 303 | ret = kstrtoul(buf, 0, &val); |
305 | if (ret) | 304 | if (ret) |
306 | return ret; | 305 | return ret; |
307 | 306 | ||
308 | if (val == 1) { | 307 | if (val != 1) |
309 | get_online_cpus(); | 308 | return size; |
310 | if (cpu_online(cpu)) | 309 | |
311 | ret = reload_for_cpu(cpu); | 310 | get_online_cpus(); |
312 | put_online_cpus(); | 311 | mutex_lock(µcode_mutex); |
312 | for_each_online_cpu(cpu) { | ||
313 | tmp_ret = reload_for_cpu(cpu); | ||
314 | if (tmp_ret != 0) | ||
315 | pr_warn("Error reloading microcode on CPU %d\n", cpu); | ||
316 | |||
317 | /* save retval of the first encountered reload error */ | ||
318 | if (!ret) | ||
319 | ret = tmp_ret; | ||
313 | } | 320 | } |
321 | if (!ret) | ||
322 | perf_check_microcode(); | ||
323 | mutex_unlock(µcode_mutex); | ||
324 | put_online_cpus(); | ||
314 | 325 | ||
315 | if (!ret) | 326 | if (!ret) |
316 | ret = size; | 327 | ret = size; |
@@ -339,7 +350,6 @@ static DEVICE_ATTR(version, 0400, version_show, NULL); | |||
339 | static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); | 350 | static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); |
340 | 351 | ||
341 | static struct attribute *mc_default_attrs[] = { | 352 | static struct attribute *mc_default_attrs[] = { |
342 | &dev_attr_reload.attr, | ||
343 | &dev_attr_version.attr, | 353 | &dev_attr_version.attr, |
344 | &dev_attr_processor_flags.attr, | 354 | &dev_attr_processor_flags.attr, |
345 | NULL | 355 | NULL |
@@ -504,7 +514,7 @@ static struct notifier_block __refdata mc_cpu_notifier = { | |||
504 | 514 | ||
505 | #ifdef MODULE | 515 | #ifdef MODULE |
506 | /* Autoload on Intel and AMD systems */ | 516 | /* Autoload on Intel and AMD systems */ |
507 | static const struct x86_cpu_id microcode_id[] = { | 517 | static const struct x86_cpu_id __initconst microcode_id[] = { |
508 | #ifdef CONFIG_MICROCODE_INTEL | 518 | #ifdef CONFIG_MICROCODE_INTEL |
509 | { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, }, | 519 | { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, }, |
510 | #endif | 520 | #endif |
@@ -516,6 +526,16 @@ static const struct x86_cpu_id microcode_id[] = { | |||
516 | MODULE_DEVICE_TABLE(x86cpu, microcode_id); | 526 | MODULE_DEVICE_TABLE(x86cpu, microcode_id); |
517 | #endif | 527 | #endif |
518 | 528 | ||
529 | static struct attribute *cpu_root_microcode_attrs[] = { | ||
530 | &dev_attr_reload.attr, | ||
531 | NULL | ||
532 | }; | ||
533 | |||
534 | static struct attribute_group cpu_root_microcode_group = { | ||
535 | .name = "microcode", | ||
536 | .attrs = cpu_root_microcode_attrs, | ||
537 | }; | ||
538 | |||
519 | static int __init microcode_init(void) | 539 | static int __init microcode_init(void) |
520 | { | 540 | { |
521 | struct cpuinfo_x86 *c = &cpu_data(0); | 541 | struct cpuinfo_x86 *c = &cpu_data(0); |
@@ -540,16 +560,25 @@ static int __init microcode_init(void) | |||
540 | mutex_lock(µcode_mutex); | 560 | mutex_lock(µcode_mutex); |
541 | 561 | ||
542 | error = subsys_interface_register(&mc_cpu_interface); | 562 | error = subsys_interface_register(&mc_cpu_interface); |
543 | 563 | if (!error) | |
564 | perf_check_microcode(); | ||
544 | mutex_unlock(µcode_mutex); | 565 | mutex_unlock(µcode_mutex); |
545 | put_online_cpus(); | 566 | put_online_cpus(); |
546 | 567 | ||
547 | if (error) | 568 | if (error) |
548 | goto out_pdev; | 569 | goto out_pdev; |
549 | 570 | ||
571 | error = sysfs_create_group(&cpu_subsys.dev_root->kobj, | ||
572 | &cpu_root_microcode_group); | ||
573 | |||
574 | if (error) { | ||
575 | pr_err("Error creating microcode group!\n"); | ||
576 | goto out_driver; | ||
577 | } | ||
578 | |||
550 | error = microcode_dev_init(); | 579 | error = microcode_dev_init(); |
551 | if (error) | 580 | if (error) |
552 | goto out_driver; | 581 | goto out_ucode_group; |
553 | 582 | ||
554 | register_syscore_ops(&mc_syscore_ops); | 583 | register_syscore_ops(&mc_syscore_ops); |
555 | register_hotcpu_notifier(&mc_cpu_notifier); | 584 | register_hotcpu_notifier(&mc_cpu_notifier); |
@@ -559,7 +588,11 @@ static int __init microcode_init(void) | |||
559 | 588 | ||
560 | return 0; | 589 | return 0; |
561 | 590 | ||
562 | out_driver: | 591 | out_ucode_group: |
592 | sysfs_remove_group(&cpu_subsys.dev_root->kobj, | ||
593 | &cpu_root_microcode_group); | ||
594 | |||
595 | out_driver: | ||
563 | get_online_cpus(); | 596 | get_online_cpus(); |
564 | mutex_lock(µcode_mutex); | 597 | mutex_lock(µcode_mutex); |
565 | 598 | ||
@@ -568,7 +601,7 @@ out_driver: | |||
568 | mutex_unlock(µcode_mutex); | 601 | mutex_unlock(µcode_mutex); |
569 | put_online_cpus(); | 602 | put_online_cpus(); |
570 | 603 | ||
571 | out_pdev: | 604 | out_pdev: |
572 | platform_device_unregister(microcode_pdev); | 605 | platform_device_unregister(microcode_pdev); |
573 | return error; | 606 | return error; |
574 | 607 | ||
@@ -584,6 +617,9 @@ static void __exit microcode_exit(void) | |||
584 | unregister_hotcpu_notifier(&mc_cpu_notifier); | 617 | unregister_hotcpu_notifier(&mc_cpu_notifier); |
585 | unregister_syscore_ops(&mc_syscore_ops); | 618 | unregister_syscore_ops(&mc_syscore_ops); |
586 | 619 | ||
620 | sysfs_remove_group(&cpu_subsys.dev_root->kobj, | ||
621 | &cpu_root_microcode_group); | ||
622 | |||
587 | get_online_cpus(); | 623 | get_online_cpus(); |
588 | mutex_lock(µcode_mutex); | 624 | mutex_lock(µcode_mutex); |
589 | 625 | ||
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index f21fd94ac89..216a4d754b0 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
@@ -15,6 +15,9 @@ | |||
15 | along with this program; if not, write to the Free Software | 15 | along with this program; if not, write to the Free Software |
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | |||
19 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
20 | |||
18 | #include <linux/moduleloader.h> | 21 | #include <linux/moduleloader.h> |
19 | #include <linux/elf.h> | 22 | #include <linux/elf.h> |
20 | #include <linux/vmalloc.h> | 23 | #include <linux/vmalloc.h> |
@@ -30,9 +33,14 @@ | |||
30 | #include <asm/pgtable.h> | 33 | #include <asm/pgtable.h> |
31 | 34 | ||
32 | #if 0 | 35 | #if 0 |
33 | #define DEBUGP printk | 36 | #define DEBUGP(fmt, ...) \ |
37 | printk(KERN_DEBUG fmt, ##__VA_ARGS__) | ||
34 | #else | 38 | #else |
35 | #define DEBUGP(fmt...) | 39 | #define DEBUGP(fmt, ...) \ |
40 | do { \ | ||
41 | if (0) \ | ||
42 | printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ | ||
43 | } while (0) | ||
36 | #endif | 44 | #endif |
37 | 45 | ||
38 | void *module_alloc(unsigned long size) | 46 | void *module_alloc(unsigned long size) |
@@ -56,8 +64,8 @@ int apply_relocate(Elf32_Shdr *sechdrs, | |||
56 | Elf32_Sym *sym; | 64 | Elf32_Sym *sym; |
57 | uint32_t *location; | 65 | uint32_t *location; |
58 | 66 | ||
59 | DEBUGP("Applying relocate section %u to %u\n", relsec, | 67 | DEBUGP("Applying relocate section %u to %u\n", |
60 | sechdrs[relsec].sh_info); | 68 | relsec, sechdrs[relsec].sh_info); |
61 | for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { | 69 | for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { |
62 | /* This is where to make the change */ | 70 | /* This is where to make the change */ |
63 | location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr | 71 | location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr |
@@ -73,11 +81,11 @@ int apply_relocate(Elf32_Shdr *sechdrs, | |||
73 | *location += sym->st_value; | 81 | *location += sym->st_value; |
74 | break; | 82 | break; |
75 | case R_386_PC32: | 83 | case R_386_PC32: |
76 | /* Add the value, subtract its postition */ | 84 | /* Add the value, subtract its position */ |
77 | *location += sym->st_value - (uint32_t)location; | 85 | *location += sym->st_value - (uint32_t)location; |
78 | break; | 86 | break; |
79 | default: | 87 | default: |
80 | printk(KERN_ERR "module %s: Unknown relocation: %u\n", | 88 | pr_err("%s: Unknown relocation: %u\n", |
81 | me->name, ELF32_R_TYPE(rel[i].r_info)); | 89 | me->name, ELF32_R_TYPE(rel[i].r_info)); |
82 | return -ENOEXEC; | 90 | return -ENOEXEC; |
83 | } | 91 | } |
@@ -97,8 +105,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
97 | void *loc; | 105 | void *loc; |
98 | u64 val; | 106 | u64 val; |
99 | 107 | ||
100 | DEBUGP("Applying relocate section %u to %u\n", relsec, | 108 | DEBUGP("Applying relocate section %u to %u\n", |
101 | sechdrs[relsec].sh_info); | 109 | relsec, sechdrs[relsec].sh_info); |
102 | for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { | 110 | for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { |
103 | /* This is where to make the change */ | 111 | /* This is where to make the change */ |
104 | loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr | 112 | loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr |
@@ -110,8 +118,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
110 | + ELF64_R_SYM(rel[i].r_info); | 118 | + ELF64_R_SYM(rel[i].r_info); |
111 | 119 | ||
112 | DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", | 120 | DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", |
113 | (int)ELF64_R_TYPE(rel[i].r_info), | 121 | (int)ELF64_R_TYPE(rel[i].r_info), |
114 | sym->st_value, rel[i].r_addend, (u64)loc); | 122 | sym->st_value, rel[i].r_addend, (u64)loc); |
115 | 123 | ||
116 | val = sym->st_value + rel[i].r_addend; | 124 | val = sym->st_value + rel[i].r_addend; |
117 | 125 | ||
@@ -140,7 +148,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
140 | #endif | 148 | #endif |
141 | break; | 149 | break; |
142 | default: | 150 | default: |
143 | printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n", | 151 | pr_err("%s: Unknown rela relocation: %llu\n", |
144 | me->name, ELF64_R_TYPE(rel[i].r_info)); | 152 | me->name, ELF64_R_TYPE(rel[i].r_info)); |
145 | return -ENOEXEC; | 153 | return -ENOEXEC; |
146 | } | 154 | } |
@@ -148,9 +156,9 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
148 | return 0; | 156 | return 0; |
149 | 157 | ||
150 | overflow: | 158 | overflow: |
151 | printk(KERN_ERR "overflow in relocation type %d val %Lx\n", | 159 | pr_err("overflow in relocation type %d val %Lx\n", |
152 | (int)ELF64_R_TYPE(rel[i].r_info), val); | 160 | (int)ELF64_R_TYPE(rel[i].r_info), val); |
153 | printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", | 161 | pr_err("`%s' likely not compiled with -mcmodel=kernel\n", |
154 | me->name); | 162 | me->name); |
155 | return -ENOEXEC; | 163 | return -ENOEXEC; |
156 | } | 164 | } |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index b02d4dd6b8a..d2b56489d70 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include <asm/proto.h> | 27 | #include <asm/proto.h> |
28 | #include <asm/bios_ebda.h> | 28 | #include <asm/bios_ebda.h> |
29 | #include <asm/e820.h> | 29 | #include <asm/e820.h> |
30 | #include <asm/trampoline.h> | ||
31 | #include <asm/setup.h> | 30 | #include <asm/setup.h> |
32 | #include <asm/smp.h> | 31 | #include <asm/smp.h> |
33 | 32 | ||
@@ -568,8 +567,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) | |||
568 | struct mpf_intel *mpf; | 567 | struct mpf_intel *mpf; |
569 | unsigned long mem; | 568 | unsigned long mem; |
570 | 569 | ||
571 | apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", | 570 | apic_printk(APIC_VERBOSE, "Scan for SMP in [mem %#010lx-%#010lx]\n", |
572 | bp, length); | 571 | base, base + length - 1); |
573 | BUILD_BUG_ON(sizeof(*mpf) != 16); | 572 | BUILD_BUG_ON(sizeof(*mpf) != 16); |
574 | 573 | ||
575 | while (length > 0) { | 574 | while (length > 0) { |
@@ -584,8 +583,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) | |||
584 | #endif | 583 | #endif |
585 | mpf_found = mpf; | 584 | mpf_found = mpf; |
586 | 585 | ||
587 | printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", | 586 | printk(KERN_INFO "found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n", |
588 | mpf, (u64)virt_to_phys(mpf)); | 587 | (unsigned long long) virt_to_phys(mpf), |
588 | (unsigned long long) virt_to_phys(mpf) + | ||
589 | sizeof(*mpf) - 1, mpf); | ||
589 | 590 | ||
590 | mem = virt_to_phys(mpf); | 591 | mem = virt_to_phys(mpf); |
591 | memblock_reserve(mem, sizeof(*mpf)); | 592 | memblock_reserve(mem, sizeof(*mpf)); |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 90875279ef3..f84f5c57de3 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -365,8 +365,9 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) | |||
365 | #ifdef CONFIG_X86_32 | 365 | #ifdef CONFIG_X86_32 |
366 | /* | 366 | /* |
367 | * For i386, NMIs use the same stack as the kernel, and we can | 367 | * For i386, NMIs use the same stack as the kernel, and we can |
368 | * add a workaround to the iret problem in C. Simply have 3 states | 368 | * add a workaround to the iret problem in C (preventing nested |
369 | * the NMI can be in. | 369 | * NMIs if an NMI takes a trap). Simply have 3 states the NMI |
370 | * can be in: | ||
370 | * | 371 | * |
371 | * 1) not running | 372 | * 1) not running |
372 | * 2) executing | 373 | * 2) executing |
@@ -383,32 +384,50 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) | |||
383 | * If an NMI hits a breakpoint that executes an iret, another | 384 | * If an NMI hits a breakpoint that executes an iret, another |
384 | * NMI can preempt it. We do not want to allow this new NMI | 385 | * NMI can preempt it. We do not want to allow this new NMI |
385 | * to run, but we want to execute it when the first one finishes. | 386 | * to run, but we want to execute it when the first one finishes. |
386 | * We set the state to "latched", and the first NMI will perform | 387 | * We set the state to "latched", and the exit of the first NMI will |
387 | * an cmpxchg on the state, and if it doesn't successfully | 388 | * perform a dec_return, if the result is zero (NOT_RUNNING), then |
388 | * reset the state to "not running" it will restart the next | 389 | * it will simply exit the NMI handler. If not, the dec_return |
389 | * NMI. | 390 | * would have set the state to NMI_EXECUTING (what we want it to |
391 | * be when we are running). In this case, we simply jump back | ||
392 | * to rerun the NMI handler again, and restart the 'latched' NMI. | ||
393 | * | ||
394 | * No trap (breakpoint or page fault) should be hit before nmi_restart, | ||
395 | * thus there is no race between the first check of state for NOT_RUNNING | ||
396 | * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs | ||
397 | * at this point. | ||
398 | * | ||
399 | * In case the NMI takes a page fault, we need to save off the CR2 | ||
400 | * because the NMI could have preempted another page fault and corrupt | ||
401 | * the CR2 that is about to be read. As nested NMIs must be restarted | ||
402 | * and they can not take breakpoints or page faults, the update of the | ||
403 | * CR2 must be done before converting the nmi state back to NOT_RUNNING. | ||
404 | * Otherwise, there would be a race of another nested NMI coming in | ||
405 | * after setting state to NOT_RUNNING but before updating the nmi_cr2. | ||
390 | */ | 406 | */ |
391 | enum nmi_states { | 407 | enum nmi_states { |
392 | NMI_NOT_RUNNING, | 408 | NMI_NOT_RUNNING = 0, |
393 | NMI_EXECUTING, | 409 | NMI_EXECUTING, |
394 | NMI_LATCHED, | 410 | NMI_LATCHED, |
395 | }; | 411 | }; |
396 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); | 412 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); |
413 | static DEFINE_PER_CPU(unsigned long, nmi_cr2); | ||
397 | 414 | ||
398 | #define nmi_nesting_preprocess(regs) \ | 415 | #define nmi_nesting_preprocess(regs) \ |
399 | do { \ | 416 | do { \ |
400 | if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ | 417 | if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \ |
401 | __get_cpu_var(nmi_state) = NMI_LATCHED; \ | 418 | this_cpu_write(nmi_state, NMI_LATCHED); \ |
402 | return; \ | 419 | return; \ |
403 | } \ | 420 | } \ |
404 | nmi_restart: \ | 421 | this_cpu_write(nmi_state, NMI_EXECUTING); \ |
405 | __get_cpu_var(nmi_state) = NMI_EXECUTING; \ | 422 | this_cpu_write(nmi_cr2, read_cr2()); \ |
406 | } while (0) | 423 | } while (0); \ |
424 | nmi_restart: | ||
407 | 425 | ||
408 | #define nmi_nesting_postprocess() \ | 426 | #define nmi_nesting_postprocess() \ |
409 | do { \ | 427 | do { \ |
410 | if (cmpxchg(&__get_cpu_var(nmi_state), \ | 428 | if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \ |
411 | NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ | 429 | write_cr2(this_cpu_read(nmi_cr2)); \ |
430 | if (this_cpu_dec_return(nmi_state)) \ | ||
412 | goto nmi_restart; \ | 431 | goto nmi_restart; \ |
413 | } while (0) | 432 | } while (0) |
414 | #else /* x86_64 */ | 433 | #else /* x86_64 */ |
@@ -444,14 +463,16 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs) | |||
444 | */ | 463 | */ |
445 | if (unlikely(is_debug_stack(regs->sp))) { | 464 | if (unlikely(is_debug_stack(regs->sp))) { |
446 | debug_stack_set_zero(); | 465 | debug_stack_set_zero(); |
447 | __get_cpu_var(update_debug_stack) = 1; | 466 | this_cpu_write(update_debug_stack, 1); |
448 | } | 467 | } |
449 | } | 468 | } |
450 | 469 | ||
451 | static inline void nmi_nesting_postprocess(void) | 470 | static inline void nmi_nesting_postprocess(void) |
452 | { | 471 | { |
453 | if (unlikely(__get_cpu_var(update_debug_stack))) | 472 | if (unlikely(this_cpu_read(update_debug_stack))) { |
454 | debug_stack_reset(); | 473 | debug_stack_reset(); |
474 | this_cpu_write(update_debug_stack, 0); | ||
475 | } | ||
455 | } | 476 | } |
456 | #endif | 477 | #endif |
457 | 478 | ||
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c index e31bf8d5c4d..6d9582ec032 100644 --- a/arch/x86/kernel/nmi_selftest.c +++ b/arch/x86/kernel/nmi_selftest.c | |||
@@ -42,7 +42,8 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs) | |||
42 | static void __init init_nmi_testsuite(void) | 42 | static void __init init_nmi_testsuite(void) |
43 | { | 43 | { |
44 | /* trap all the unknown NMIs we may generate */ | 44 | /* trap all the unknown NMIs we may generate */ |
45 | register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); | 45 | register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk", |
46 | __initdata); | ||
46 | } | 47 | } |
47 | 48 | ||
48 | static void __init cleanup_nmi_testsuite(void) | 49 | static void __init cleanup_nmi_testsuite(void) |
@@ -65,7 +66,7 @@ static void __init test_nmi_ipi(struct cpumask *mask) | |||
65 | unsigned long timeout; | 66 | unsigned long timeout; |
66 | 67 | ||
67 | if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, | 68 | if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, |
68 | NMI_FLAG_FIRST, "nmi_selftest")) { | 69 | NMI_FLAG_FIRST, "nmi_selftest", __initdata)) { |
69 | nmi_fail = FAILURE; | 70 | nmi_fail = FAILURE; |
70 | return; | 71 | return; |
71 | } | 72 | } |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 9ce885996fd..17fff18a103 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -352,9 +352,7 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
352 | #endif | 352 | #endif |
353 | .wbinvd = native_wbinvd, | 353 | .wbinvd = native_wbinvd, |
354 | .read_msr = native_read_msr_safe, | 354 | .read_msr = native_read_msr_safe, |
355 | .rdmsr_regs = native_rdmsr_safe_regs, | ||
356 | .write_msr = native_write_msr_safe, | 355 | .write_msr = native_write_msr_safe, |
357 | .wrmsr_regs = native_wrmsr_safe_regs, | ||
358 | .read_tsc = native_read_tsc, | 356 | .read_tsc = native_read_tsc, |
359 | .read_pmc = native_read_pmc, | 357 | .read_pmc = native_read_pmc, |
360 | .read_tscp = native_read_tscp, | 358 | .read_tscp = native_read_tscp, |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index b72838bae64..299d49302e7 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -22,6 +22,8 @@ | |||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #define pr_fmt(fmt) "Calgary: " fmt | ||
26 | |||
25 | #include <linux/kernel.h> | 27 | #include <linux/kernel.h> |
26 | #include <linux/init.h> | 28 | #include <linux/init.h> |
27 | #include <linux/types.h> | 29 | #include <linux/types.h> |
@@ -245,7 +247,7 @@ static unsigned long iommu_range_alloc(struct device *dev, | |||
245 | offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0, | 247 | offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0, |
246 | npages, 0, boundary_size, 0); | 248 | npages, 0, boundary_size, 0); |
247 | if (offset == ~0UL) { | 249 | if (offset == ~0UL) { |
248 | printk(KERN_WARNING "Calgary: IOMMU full.\n"); | 250 | pr_warn("IOMMU full\n"); |
249 | spin_unlock_irqrestore(&tbl->it_lock, flags); | 251 | spin_unlock_irqrestore(&tbl->it_lock, flags); |
250 | if (panic_on_overflow) | 252 | if (panic_on_overflow) |
251 | panic("Calgary: fix the allocator.\n"); | 253 | panic("Calgary: fix the allocator.\n"); |
@@ -271,8 +273,8 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, | |||
271 | entry = iommu_range_alloc(dev, tbl, npages); | 273 | entry = iommu_range_alloc(dev, tbl, npages); |
272 | 274 | ||
273 | if (unlikely(entry == DMA_ERROR_CODE)) { | 275 | if (unlikely(entry == DMA_ERROR_CODE)) { |
274 | printk(KERN_WARNING "Calgary: failed to allocate %u pages in " | 276 | pr_warn("failed to allocate %u pages in iommu %p\n", |
275 | "iommu %p\n", npages, tbl); | 277 | npages, tbl); |
276 | return DMA_ERROR_CODE; | 278 | return DMA_ERROR_CODE; |
277 | } | 279 | } |
278 | 280 | ||
@@ -561,8 +563,7 @@ static void calgary_tce_cache_blast(struct iommu_table *tbl) | |||
561 | i++; | 563 | i++; |
562 | } while ((val & 0xff) != 0xff && i < 100); | 564 | } while ((val & 0xff) != 0xff && i < 100); |
563 | if (i == 100) | 565 | if (i == 100) |
564 | printk(KERN_WARNING "Calgary: PCI bus not quiesced, " | 566 | pr_warn("PCI bus not quiesced, continuing anyway\n"); |
565 | "continuing anyway\n"); | ||
566 | 567 | ||
567 | /* invalidate TCE cache */ | 568 | /* invalidate TCE cache */ |
568 | target = calgary_reg(bbar, tar_offset(tbl->it_busno)); | 569 | target = calgary_reg(bbar, tar_offset(tbl->it_busno)); |
@@ -604,8 +605,7 @@ begin: | |||
604 | i++; | 605 | i++; |
605 | } while ((val64 & 0xff) != 0xff && i < 100); | 606 | } while ((val64 & 0xff) != 0xff && i < 100); |
606 | if (i == 100) | 607 | if (i == 100) |
607 | printk(KERN_WARNING "CalIOC2: PCI bus not quiesced, " | 608 | pr_warn("CalIOC2: PCI bus not quiesced, continuing anyway\n"); |
608 | "continuing anyway\n"); | ||
609 | 609 | ||
610 | /* 3. poll Page Migration DEBUG for SoftStopFault */ | 610 | /* 3. poll Page Migration DEBUG for SoftStopFault */ |
611 | target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG); | 611 | target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG); |
@@ -617,8 +617,7 @@ begin: | |||
617 | if (++count < 100) | 617 | if (++count < 100) |
618 | goto begin; | 618 | goto begin; |
619 | else { | 619 | else { |
620 | printk(KERN_WARNING "CalIOC2: too many SoftStopFaults, " | 620 | pr_warn("CalIOC2: too many SoftStopFaults, aborting TCE cache flush sequence!\n"); |
621 | "aborting TCE cache flush sequence!\n"); | ||
622 | return; /* pray for the best */ | 621 | return; /* pray for the best */ |
623 | } | 622 | } |
624 | } | 623 | } |
@@ -840,8 +839,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl) | |||
840 | plssr = be32_to_cpu(readl(target)); | 839 | plssr = be32_to_cpu(readl(target)); |
841 | 840 | ||
842 | /* If no error, the agent ID in the CSR is not valid */ | 841 | /* If no error, the agent ID in the CSR is not valid */ |
843 | printk(KERN_EMERG "Calgary: DMA error on Calgary PHB 0x%x, " | 842 | pr_emerg("DMA error on Calgary PHB 0x%x, 0x%08x@CSR 0x%08x@PLSSR\n", |
844 | "0x%08x@CSR 0x%08x@PLSSR\n", tbl->it_busno, csr, plssr); | 843 | tbl->it_busno, csr, plssr); |
845 | } | 844 | } |
846 | 845 | ||
847 | static void calioc2_dump_error_regs(struct iommu_table *tbl) | 846 | static void calioc2_dump_error_regs(struct iommu_table *tbl) |
@@ -867,22 +866,21 @@ static void calioc2_dump_error_regs(struct iommu_table *tbl) | |||
867 | target = calgary_reg(bbar, phboff | 0x800); | 866 | target = calgary_reg(bbar, phboff | 0x800); |
868 | mck = be32_to_cpu(readl(target)); | 867 | mck = be32_to_cpu(readl(target)); |
869 | 868 | ||
870 | printk(KERN_EMERG "Calgary: DMA error on CalIOC2 PHB 0x%x\n", | 869 | pr_emerg("DMA error on CalIOC2 PHB 0x%x\n", tbl->it_busno); |
871 | tbl->it_busno); | ||
872 | 870 | ||
873 | printk(KERN_EMERG "Calgary: 0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n", | 871 | pr_emerg("0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n", |
874 | csr, plssr, csmr, mck); | 872 | csr, plssr, csmr, mck); |
875 | 873 | ||
876 | /* dump rest of error regs */ | 874 | /* dump rest of error regs */ |
877 | printk(KERN_EMERG "Calgary: "); | 875 | pr_emerg(""); |
878 | for (i = 0; i < ARRAY_SIZE(errregs); i++) { | 876 | for (i = 0; i < ARRAY_SIZE(errregs); i++) { |
879 | /* err regs are at 0x810 - 0x870 */ | 877 | /* err regs are at 0x810 - 0x870 */ |
880 | erroff = (0x810 + (i * 0x10)); | 878 | erroff = (0x810 + (i * 0x10)); |
881 | target = calgary_reg(bbar, phboff | erroff); | 879 | target = calgary_reg(bbar, phboff | erroff); |
882 | errregs[i] = be32_to_cpu(readl(target)); | 880 | errregs[i] = be32_to_cpu(readl(target)); |
883 | printk("0x%08x@0x%lx ", errregs[i], erroff); | 881 | pr_cont("0x%08x@0x%lx ", errregs[i], erroff); |
884 | } | 882 | } |
885 | printk("\n"); | 883 | pr_cont("\n"); |
886 | 884 | ||
887 | /* root complex status */ | 885 | /* root complex status */ |
888 | target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS); | 886 | target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS); |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 3003250ac51..de2b7ad7027 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -45,15 +45,6 @@ int iommu_detected __read_mostly = 0; | |||
45 | */ | 45 | */ |
46 | int iommu_pass_through __read_mostly; | 46 | int iommu_pass_through __read_mostly; |
47 | 47 | ||
48 | /* | ||
49 | * Group multi-function PCI devices into a single device-group for the | ||
50 | * iommu_device_group interface. This tells the iommu driver to pretend | ||
51 | * it cannot distinguish between functions of a device, exposing only one | ||
52 | * group for the device. Useful for disallowing use of individual PCI | ||
53 | * functions from userspace drivers. | ||
54 | */ | ||
55 | int iommu_group_mf __read_mostly; | ||
56 | |||
57 | extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; | 48 | extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; |
58 | 49 | ||
59 | /* Dummy device used for NULL arguments (normally ISA). */ | 50 | /* Dummy device used for NULL arguments (normally ISA). */ |
@@ -101,13 +92,18 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, | |||
101 | { | 92 | { |
102 | unsigned long dma_mask; | 93 | unsigned long dma_mask; |
103 | struct page *page; | 94 | struct page *page; |
95 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | ||
104 | dma_addr_t addr; | 96 | dma_addr_t addr; |
105 | 97 | ||
106 | dma_mask = dma_alloc_coherent_mask(dev, flag); | 98 | dma_mask = dma_alloc_coherent_mask(dev, flag); |
107 | 99 | ||
108 | flag |= __GFP_ZERO; | 100 | flag |= __GFP_ZERO; |
109 | again: | 101 | again: |
110 | page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); | 102 | page = NULL; |
103 | if (!(flag & GFP_ATOMIC)) | ||
104 | page = dma_alloc_from_contiguous(dev, count, get_order(size)); | ||
105 | if (!page) | ||
106 | page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); | ||
111 | if (!page) | 107 | if (!page) |
112 | return NULL; | 108 | return NULL; |
113 | 109 | ||
@@ -127,6 +123,16 @@ again: | |||
127 | return page_address(page); | 123 | return page_address(page); |
128 | } | 124 | } |
129 | 125 | ||
126 | void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, | ||
127 | dma_addr_t dma_addr, struct dma_attrs *attrs) | ||
128 | { | ||
129 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | ||
130 | struct page *page = virt_to_page(vaddr); | ||
131 | |||
132 | if (!dma_release_from_contiguous(dev, page, count)) | ||
133 | free_pages((unsigned long)vaddr, get_order(size)); | ||
134 | } | ||
135 | |||
130 | /* | 136 | /* |
131 | * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel | 137 | * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel |
132 | * parameter documentation. | 138 | * parameter documentation. |
@@ -179,8 +185,6 @@ static __init int iommu_setup(char *p) | |||
179 | #endif | 185 | #endif |
180 | if (!strncmp(p, "pt", 2)) | 186 | if (!strncmp(p, "pt", 2)) |
181 | iommu_pass_through = 1; | 187 | iommu_pass_through = 1; |
182 | if (!strncmp(p, "group_mf", 8)) | ||
183 | iommu_group_mf = 1; | ||
184 | 188 | ||
185 | gart_parse_options(p); | 189 | gart_parse_options(p); |
186 | 190 | ||
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index f96050685b4..871be4a84c7 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
@@ -74,12 +74,6 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, | |||
74 | return nents; | 74 | return nents; |
75 | } | 75 | } |
76 | 76 | ||
77 | static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, | ||
78 | dma_addr_t dma_addr, struct dma_attrs *attrs) | ||
79 | { | ||
80 | free_pages((unsigned long)vaddr, get_order(size)); | ||
81 | } | ||
82 | |||
83 | static void nommu_sync_single_for_device(struct device *dev, | 77 | static void nommu_sync_single_for_device(struct device *dev, |
84 | dma_addr_t addr, size_t size, | 78 | dma_addr_t addr, size_t size, |
85 | enum dma_data_direction dir) | 79 | enum dma_data_direction dir) |
@@ -97,7 +91,7 @@ static void nommu_sync_sg_for_device(struct device *dev, | |||
97 | 91 | ||
98 | struct dma_map_ops nommu_dma_ops = { | 92 | struct dma_map_ops nommu_dma_ops = { |
99 | .alloc = dma_generic_alloc_coherent, | 93 | .alloc = dma_generic_alloc_coherent, |
100 | .free = nommu_free_coherent, | 94 | .free = dma_generic_free_coherent, |
101 | .map_sg = nommu_map_sg, | 95 | .map_sg = nommu_map_sg, |
102 | .map_page = nommu_map_page, | 96 | .map_page = nommu_map_page, |
103 | .sync_single_for_device = nommu_sync_single_for_device, | 97 | .sync_single_for_device = nommu_sync_single_for_device, |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 735279e54e5..ef6a8456f71 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -1,3 +1,5 @@ | |||
1 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
2 | |||
1 | #include <linux/errno.h> | 3 | #include <linux/errno.h> |
2 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
3 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
@@ -145,16 +147,14 @@ void show_regs_common(void) | |||
145 | /* Board Name is optional */ | 147 | /* Board Name is optional */ |
146 | board = dmi_get_system_info(DMI_BOARD_NAME); | 148 | board = dmi_get_system_info(DMI_BOARD_NAME); |
147 | 149 | ||
148 | printk(KERN_CONT "\n"); | 150 | printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s %s%s%s\n", |
149 | printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s", | 151 | current->pid, current->comm, print_tainted(), |
150 | current->pid, current->comm, print_tainted(), | 152 | init_utsname()->release, |
151 | init_utsname()->release, | 153 | (int)strcspn(init_utsname()->version, " "), |
152 | (int)strcspn(init_utsname()->version, " "), | 154 | init_utsname()->version, |
153 | init_utsname()->version); | 155 | vendor, product, |
154 | printk(KERN_CONT " %s %s", vendor, product); | 156 | board ? "/" : "", |
155 | if (board) | 157 | board ? board : ""); |
156 | printk(KERN_CONT "/%s", board); | ||
157 | printk(KERN_CONT "\n"); | ||
158 | } | 158 | } |
159 | 159 | ||
160 | void flush_thread(void) | 160 | void flush_thread(void) |
@@ -645,7 +645,7 @@ static void amd_e400_idle(void) | |||
645 | amd_e400_c1e_detected = true; | 645 | amd_e400_c1e_detected = true; |
646 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) | 646 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) |
647 | mark_tsc_unstable("TSC halt in AMD C1E"); | 647 | mark_tsc_unstable("TSC halt in AMD C1E"); |
648 | printk(KERN_INFO "System has AMD C1E enabled\n"); | 648 | pr_info("System has AMD C1E enabled\n"); |
649 | } | 649 | } |
650 | } | 650 | } |
651 | 651 | ||
@@ -659,8 +659,7 @@ static void amd_e400_idle(void) | |||
659 | */ | 659 | */ |
660 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, | 660 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, |
661 | &cpu); | 661 | &cpu); |
662 | printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", | 662 | pr_info("Switch to broadcast mode on CPU%d\n", cpu); |
663 | cpu); | ||
664 | } | 663 | } |
665 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); | 664 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); |
666 | 665 | ||
@@ -681,8 +680,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
681 | { | 680 | { |
682 | #ifdef CONFIG_SMP | 681 | #ifdef CONFIG_SMP |
683 | if (pm_idle == poll_idle && smp_num_siblings > 1) { | 682 | if (pm_idle == poll_idle && smp_num_siblings > 1) { |
684 | printk_once(KERN_WARNING "WARNING: polling idle and HT enabled," | 683 | pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); |
685 | " performance may degrade.\n"); | ||
686 | } | 684 | } |
687 | #endif | 685 | #endif |
688 | if (pm_idle) | 686 | if (pm_idle) |
@@ -692,11 +690,11 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
692 | /* | 690 | /* |
693 | * One CPU supports mwait => All CPUs supports mwait | 691 | * One CPU supports mwait => All CPUs supports mwait |
694 | */ | 692 | */ |
695 | printk(KERN_INFO "using mwait in idle threads.\n"); | 693 | pr_info("using mwait in idle threads\n"); |
696 | pm_idle = mwait_idle; | 694 | pm_idle = mwait_idle; |
697 | } else if (cpu_has_amd_erratum(amd_erratum_400)) { | 695 | } else if (cpu_has_amd_erratum(amd_erratum_400)) { |
698 | /* E400: APIC timer interrupt does not wake up CPU from C1e */ | 696 | /* E400: APIC timer interrupt does not wake up CPU from C1e */ |
699 | printk(KERN_INFO "using AMD E400 aware idle routine\n"); | 697 | pr_info("using AMD E400 aware idle routine\n"); |
700 | pm_idle = amd_e400_idle; | 698 | pm_idle = amd_e400_idle; |
701 | } else | 699 | } else |
702 | pm_idle = default_idle; | 700 | pm_idle = default_idle; |
@@ -715,7 +713,7 @@ static int __init idle_setup(char *str) | |||
715 | return -EINVAL; | 713 | return -EINVAL; |
716 | 714 | ||
717 | if (!strcmp(str, "poll")) { | 715 | if (!strcmp(str, "poll")) { |
718 | printk("using polling idle threads.\n"); | 716 | pr_info("using polling idle threads\n"); |
719 | pm_idle = poll_idle; | 717 | pm_idle = poll_idle; |
720 | boot_option_idle_override = IDLE_POLL; | 718 | boot_option_idle_override = IDLE_POLL; |
721 | } else if (!strcmp(str, "mwait")) { | 719 | } else if (!strcmp(str, "mwait")) { |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 61cdf7fdf09..0a980c9d7cb 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -117,10 +117,10 @@ void release_thread(struct task_struct *dead_task) | |||
117 | { | 117 | { |
118 | if (dead_task->mm) { | 118 | if (dead_task->mm) { |
119 | if (dead_task->mm->context.size) { | 119 | if (dead_task->mm->context.size) { |
120 | printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", | 120 | pr_warn("WARNING: dead process %8s still has LDT? <%p/%d>\n", |
121 | dead_task->comm, | 121 | dead_task->comm, |
122 | dead_task->mm->context.ldt, | 122 | dead_task->mm->context.ldt, |
123 | dead_task->mm->context.size); | 123 | dead_task->mm->context.size); |
124 | BUG(); | 124 | BUG(); |
125 | } | 125 | } |
126 | } | 126 | } |
@@ -466,7 +466,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
466 | task->thread.gs = addr; | 466 | task->thread.gs = addr; |
467 | if (doit) { | 467 | if (doit) { |
468 | load_gs_index(0); | 468 | load_gs_index(0); |
469 | ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); | 469 | ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); |
470 | } | 470 | } |
471 | } | 471 | } |
472 | put_cpu(); | 472 | put_cpu(); |
@@ -494,7 +494,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
494 | /* set the selector to 0 to not confuse | 494 | /* set the selector to 0 to not confuse |
495 | __switch_to */ | 495 | __switch_to */ |
496 | loadsegment(fs, 0); | 496 | loadsegment(fs, 0); |
497 | ret = checking_wrmsrl(MSR_FS_BASE, addr); | 497 | ret = wrmsrl_safe(MSR_FS_BASE, addr); |
498 | } | 498 | } |
499 | } | 499 | } |
500 | put_cpu(); | 500 | put_cpu(); |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 13b1990c7c5..c4c6a5c2bf0 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -1211,12 +1211,6 @@ static long x32_arch_ptrace(struct task_struct *child, | |||
1211 | 0, sizeof(struct user_i387_struct), | 1211 | 0, sizeof(struct user_i387_struct), |
1212 | datap); | 1212 | datap); |
1213 | 1213 | ||
1214 | /* normal 64bit interface to access TLS data. | ||
1215 | Works just like arch_prctl, except that the arguments | ||
1216 | are reversed. */ | ||
1217 | case PTRACE_ARCH_PRCTL: | ||
1218 | return do_arch_prctl(child, data, addr); | ||
1219 | |||
1220 | default: | 1214 | default: |
1221 | return compat_ptrace_request(child, request, addr, data); | 1215 | return compat_ptrace_request(child, request, addr, data); |
1222 | } | 1216 | } |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 03920a15a63..1b27de56356 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -512,7 +512,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, | |||
512 | 512 | ||
513 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) | 513 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) |
514 | /* Set correct numa_node information for AMD NB functions */ | 514 | /* Set correct numa_node information for AMD NB functions */ |
515 | static void __init quirk_amd_nb_node(struct pci_dev *dev) | 515 | static void __devinit quirk_amd_nb_node(struct pci_dev *dev) |
516 | { | 516 | { |
517 | struct pci_dev *nb_ht; | 517 | struct pci_dev *nb_ht; |
518 | unsigned int devfn; | 518 | unsigned int devfn; |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 77215c23fba..52190a938b4 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -1,3 +1,5 @@ | |||
1 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
2 | |||
1 | #include <linux/module.h> | 3 | #include <linux/module.h> |
2 | #include <linux/reboot.h> | 4 | #include <linux/reboot.h> |
3 | #include <linux/init.h> | 5 | #include <linux/init.h> |
@@ -20,13 +22,12 @@ | |||
20 | #include <asm/virtext.h> | 22 | #include <asm/virtext.h> |
21 | #include <asm/cpu.h> | 23 | #include <asm/cpu.h> |
22 | #include <asm/nmi.h> | 24 | #include <asm/nmi.h> |
25 | #include <asm/smp.h> | ||
23 | 26 | ||
24 | #ifdef CONFIG_X86_32 | 27 | #include <linux/ctype.h> |
25 | # include <linux/ctype.h> | 28 | #include <linux/mc146818rtc.h> |
26 | # include <linux/mc146818rtc.h> | 29 | #include <asm/realmode.h> |
27 | #else | 30 | #include <asm/x86_init.h> |
28 | # include <asm/x86_init.h> | ||
29 | #endif | ||
30 | 31 | ||
31 | /* | 32 | /* |
32 | * Power off function, if any | 33 | * Power off function, if any |
@@ -48,7 +49,7 @@ int reboot_force; | |||
48 | */ | 49 | */ |
49 | static int reboot_default = 1; | 50 | static int reboot_default = 1; |
50 | 51 | ||
51 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) | 52 | #ifdef CONFIG_SMP |
52 | static int reboot_cpu = -1; | 53 | static int reboot_cpu = -1; |
53 | #endif | 54 | #endif |
54 | 55 | ||
@@ -66,8 +67,8 @@ bool port_cf9_safe = false; | |||
66 | * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] | 67 | * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] |
67 | * warm Don't set the cold reboot flag | 68 | * warm Don't set the cold reboot flag |
68 | * cold Set the cold reboot flag | 69 | * cold Set the cold reboot flag |
69 | * bios Reboot by jumping through the BIOS (only for X86_32) | 70 | * bios Reboot by jumping through the BIOS |
70 | * smp Reboot by executing reset on BSP or other CPU (only for X86_32) | 71 | * smp Reboot by executing reset on BSP or other CPU |
71 | * triple Force a triple fault (init) | 72 | * triple Force a triple fault (init) |
72 | * kbd Use the keyboard controller. cold reset (default) | 73 | * kbd Use the keyboard controller. cold reset (default) |
73 | * acpi Use the RESET_REG in the FADT | 74 | * acpi Use the RESET_REG in the FADT |
@@ -94,7 +95,6 @@ static int __init reboot_setup(char *str) | |||
94 | reboot_mode = 0; | 95 | reboot_mode = 0; |
95 | break; | 96 | break; |
96 | 97 | ||
97 | #ifdef CONFIG_X86_32 | ||
98 | #ifdef CONFIG_SMP | 98 | #ifdef CONFIG_SMP |
99 | case 's': | 99 | case 's': |
100 | if (isdigit(*(str+1))) { | 100 | if (isdigit(*(str+1))) { |
@@ -111,7 +111,6 @@ static int __init reboot_setup(char *str) | |||
111 | #endif /* CONFIG_SMP */ | 111 | #endif /* CONFIG_SMP */ |
112 | 112 | ||
113 | case 'b': | 113 | case 'b': |
114 | #endif | ||
115 | case 'a': | 114 | case 'a': |
116 | case 'k': | 115 | case 'k': |
117 | case 't': | 116 | case 't': |
@@ -137,7 +136,6 @@ static int __init reboot_setup(char *str) | |||
137 | __setup("reboot=", reboot_setup); | 136 | __setup("reboot=", reboot_setup); |
138 | 137 | ||
139 | 138 | ||
140 | #ifdef CONFIG_X86_32 | ||
141 | /* | 139 | /* |
142 | * Reboot options and system auto-detection code provided by | 140 | * Reboot options and system auto-detection code provided by |
143 | * Dell Inc. so their systems "just work". :-) | 141 | * Dell Inc. so their systems "just work". :-) |
@@ -151,21 +149,14 @@ static int __init set_bios_reboot(const struct dmi_system_id *d) | |||
151 | { | 149 | { |
152 | if (reboot_type != BOOT_BIOS) { | 150 | if (reboot_type != BOOT_BIOS) { |
153 | reboot_type = BOOT_BIOS; | 151 | reboot_type = BOOT_BIOS; |
154 | printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident); | 152 | pr_info("%s series board detected. Selecting %s-method for reboots.\n", |
153 | "BIOS", d->ident); | ||
155 | } | 154 | } |
156 | return 0; | 155 | return 0; |
157 | } | 156 | } |
158 | 157 | ||
159 | extern const unsigned char machine_real_restart_asm[]; | 158 | void __noreturn machine_real_restart(unsigned int type) |
160 | extern const u64 machine_real_restart_gdt[3]; | ||
161 | |||
162 | void machine_real_restart(unsigned int type) | ||
163 | { | 159 | { |
164 | void *restart_va; | ||
165 | unsigned long restart_pa; | ||
166 | void (*restart_lowmem)(unsigned int); | ||
167 | u64 *lowmem_gdt; | ||
168 | |||
169 | local_irq_disable(); | 160 | local_irq_disable(); |
170 | 161 | ||
171 | /* | 162 | /* |
@@ -185,40 +176,28 @@ void machine_real_restart(unsigned int type) | |||
185 | /* | 176 | /* |
186 | * Switch back to the initial page table. | 177 | * Switch back to the initial page table. |
187 | */ | 178 | */ |
179 | #ifdef CONFIG_X86_32 | ||
188 | load_cr3(initial_page_table); | 180 | load_cr3(initial_page_table); |
189 | 181 | #else | |
190 | /* | 182 | write_cr3(real_mode_header->trampoline_pgd); |
191 | * Write 0x1234 to absolute memory location 0x472. The BIOS reads | 183 | #endif |
192 | * this on booting to tell it to "Bypass memory test (also warm | ||
193 | * boot)". This seems like a fairly standard thing that gets set by | ||
194 | * REBOOT.COM programs, and the previous reset routine did this | ||
195 | * too. */ | ||
196 | *((unsigned short *)0x472) = reboot_mode; | ||
197 | |||
198 | /* Patch the GDT in the low memory trampoline */ | ||
199 | lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt); | ||
200 | |||
201 | restart_va = TRAMPOLINE_SYM(machine_real_restart_asm); | ||
202 | restart_pa = virt_to_phys(restart_va); | ||
203 | restart_lowmem = (void (*)(unsigned int))restart_pa; | ||
204 | |||
205 | /* GDT[0]: GDT self-pointer */ | ||
206 | lowmem_gdt[0] = | ||
207 | (u64)(sizeof(machine_real_restart_gdt) - 1) + | ||
208 | ((u64)virt_to_phys(lowmem_gdt) << 16); | ||
209 | /* GDT[1]: 64K real mode code segment */ | ||
210 | lowmem_gdt[1] = | ||
211 | GDT_ENTRY(0x009b, restart_pa, 0xffff); | ||
212 | 184 | ||
213 | /* Jump to the identity-mapped low memory code */ | 185 | /* Jump to the identity-mapped low memory code */ |
214 | restart_lowmem(type); | 186 | #ifdef CONFIG_X86_32 |
187 | asm volatile("jmpl *%0" : : | ||
188 | "rm" (real_mode_header->machine_real_restart_asm), | ||
189 | "a" (type)); | ||
190 | #else | ||
191 | asm volatile("ljmpl *%0" : : | ||
192 | "m" (real_mode_header->machine_real_restart_asm), | ||
193 | "D" (type)); | ||
194 | #endif | ||
195 | unreachable(); | ||
215 | } | 196 | } |
216 | #ifdef CONFIG_APM_MODULE | 197 | #ifdef CONFIG_APM_MODULE |
217 | EXPORT_SYMBOL(machine_real_restart); | 198 | EXPORT_SYMBOL(machine_real_restart); |
218 | #endif | 199 | #endif |
219 | 200 | ||
220 | #endif /* CONFIG_X86_32 */ | ||
221 | |||
222 | /* | 201 | /* |
223 | * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot | 202 | * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot |
224 | */ | 203 | */ |
@@ -226,8 +205,8 @@ static int __init set_pci_reboot(const struct dmi_system_id *d) | |||
226 | { | 205 | { |
227 | if (reboot_type != BOOT_CF9) { | 206 | if (reboot_type != BOOT_CF9) { |
228 | reboot_type = BOOT_CF9; | 207 | reboot_type = BOOT_CF9; |
229 | printk(KERN_INFO "%s series board detected. " | 208 | pr_info("%s series board detected. Selecting %s-method for reboots.\n", |
230 | "Selecting PCI-method for reboots.\n", d->ident); | 209 | "PCI", d->ident); |
231 | } | 210 | } |
232 | return 0; | 211 | return 0; |
233 | } | 212 | } |
@@ -236,17 +215,16 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d) | |||
236 | { | 215 | { |
237 | if (reboot_type != BOOT_KBD) { | 216 | if (reboot_type != BOOT_KBD) { |
238 | reboot_type = BOOT_KBD; | 217 | reboot_type = BOOT_KBD; |
239 | printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident); | 218 | pr_info("%s series board detected. Selecting %s-method for reboot.\n", |
219 | "KBD", d->ident); | ||
240 | } | 220 | } |
241 | return 0; | 221 | return 0; |
242 | } | 222 | } |
243 | 223 | ||
244 | /* | 224 | /* |
245 | * This is a single dmi_table handling all reboot quirks. Note that | 225 | * This is a single dmi_table handling all reboot quirks. |
246 | * REBOOT_BIOS is only available for 32bit | ||
247 | */ | 226 | */ |
248 | static struct dmi_system_id __initdata reboot_dmi_table[] = { | 227 | static struct dmi_system_id __initdata reboot_dmi_table[] = { |
249 | #ifdef CONFIG_X86_32 | ||
250 | { /* Handle problems with rebooting on Dell E520's */ | 228 | { /* Handle problems with rebooting on Dell E520's */ |
251 | .callback = set_bios_reboot, | 229 | .callback = set_bios_reboot, |
252 | .ident = "Dell E520", | 230 | .ident = "Dell E520", |
@@ -396,7 +374,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
396 | DMI_MATCH(DMI_BOARD_NAME, "P4S800"), | 374 | DMI_MATCH(DMI_BOARD_NAME, "P4S800"), |
397 | }, | 375 | }, |
398 | }, | 376 | }, |
399 | #endif /* CONFIG_X86_32 */ | ||
400 | 377 | ||
401 | { /* Handle reboot issue on Acer Aspire one */ | 378 | { /* Handle reboot issue on Acer Aspire one */ |
402 | .callback = set_kbd_reboot, | 379 | .callback = set_kbd_reboot, |
@@ -470,6 +447,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
470 | DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"), | 447 | DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"), |
471 | }, | 448 | }, |
472 | }, | 449 | }, |
450 | { /* Handle problems with rebooting on the Precision M6600. */ | ||
451 | .callback = set_pci_reboot, | ||
452 | .ident = "Dell OptiPlex 990", | ||
453 | .matches = { | ||
454 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | ||
455 | DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), | ||
456 | }, | ||
457 | }, | ||
473 | { } | 458 | { } |
474 | }; | 459 | }; |
475 | 460 | ||
@@ -595,13 +580,11 @@ static void native_machine_emergency_restart(void) | |||
595 | reboot_type = BOOT_KBD; | 580 | reboot_type = BOOT_KBD; |
596 | break; | 581 | break; |
597 | 582 | ||
598 | #ifdef CONFIG_X86_32 | ||
599 | case BOOT_BIOS: | 583 | case BOOT_BIOS: |
600 | machine_real_restart(MRR_BIOS); | 584 | machine_real_restart(MRR_BIOS); |
601 | 585 | ||
602 | reboot_type = BOOT_KBD; | 586 | reboot_type = BOOT_KBD; |
603 | break; | 587 | break; |
604 | #endif | ||
605 | 588 | ||
606 | case BOOT_ACPI: | 589 | case BOOT_ACPI: |
607 | acpi_reboot(); | 590 | acpi_reboot(); |
@@ -643,12 +626,10 @@ void native_machine_shutdown(void) | |||
643 | /* The boot cpu is always logical cpu 0 */ | 626 | /* The boot cpu is always logical cpu 0 */ |
644 | int reboot_cpu_id = 0; | 627 | int reboot_cpu_id = 0; |
645 | 628 | ||
646 | #ifdef CONFIG_X86_32 | ||
647 | /* See if there has been given a command line override */ | 629 | /* See if there has been given a command line override */ |
648 | if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) && | 630 | if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) && |
649 | cpu_online(reboot_cpu)) | 631 | cpu_online(reboot_cpu)) |
650 | reboot_cpu_id = reboot_cpu; | 632 | reboot_cpu_id = reboot_cpu; |
651 | #endif | ||
652 | 633 | ||
653 | /* Make certain the cpu I'm about to reboot on is online */ | 634 | /* Make certain the cpu I'm about to reboot on is online */ |
654 | if (!cpu_online(reboot_cpu_id)) | 635 | if (!cpu_online(reboot_cpu_id)) |
@@ -658,9 +639,11 @@ void native_machine_shutdown(void) | |||
658 | set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); | 639 | set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); |
659 | 640 | ||
660 | /* | 641 | /* |
661 | * O.K Now that I'm on the appropriate processor, | 642 | * O.K Now that I'm on the appropriate processor, stop all of the |
662 | * stop all of the others. | 643 | * others. Also disable the local irq to not receive the per-cpu |
644 | * timer interrupt which may trigger scheduler's load balance. | ||
663 | */ | 645 | */ |
646 | local_irq_disable(); | ||
664 | stop_other_cpus(); | 647 | stop_other_cpus(); |
665 | #endif | 648 | #endif |
666 | 649 | ||
@@ -687,7 +670,7 @@ static void __machine_emergency_restart(int emergency) | |||
687 | 670 | ||
688 | static void native_machine_restart(char *__unused) | 671 | static void native_machine_restart(char *__unused) |
689 | { | 672 | { |
690 | printk("machine restart\n"); | 673 | pr_notice("machine restart\n"); |
691 | 674 | ||
692 | if (!reboot_force) | 675 | if (!reboot_force) |
693 | machine_shutdown(); | 676 | machine_shutdown(); |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 58a07b10812..f4b9b80e1b9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <asm/pci-direct.h> | 49 | #include <asm/pci-direct.h> |
50 | #include <linux/init_ohci1394_dma.h> | 50 | #include <linux/init_ohci1394_dma.h> |
51 | #include <linux/kvm_para.h> | 51 | #include <linux/kvm_para.h> |
52 | #include <linux/dma-contiguous.h> | ||
52 | 53 | ||
53 | #include <linux/errno.h> | 54 | #include <linux/errno.h> |
54 | #include <linux/kernel.h> | 55 | #include <linux/kernel.h> |
@@ -72,7 +73,7 @@ | |||
72 | 73 | ||
73 | #include <asm/mtrr.h> | 74 | #include <asm/mtrr.h> |
74 | #include <asm/apic.h> | 75 | #include <asm/apic.h> |
75 | #include <asm/trampoline.h> | 76 | #include <asm/realmode.h> |
76 | #include <asm/e820.h> | 77 | #include <asm/e820.h> |
77 | #include <asm/mpspec.h> | 78 | #include <asm/mpspec.h> |
78 | #include <asm/setup.h> | 79 | #include <asm/setup.h> |
@@ -333,8 +334,8 @@ static void __init relocate_initrd(void) | |||
333 | memblock_reserve(ramdisk_here, area_size); | 334 | memblock_reserve(ramdisk_here, area_size); |
334 | initrd_start = ramdisk_here + PAGE_OFFSET; | 335 | initrd_start = ramdisk_here + PAGE_OFFSET; |
335 | initrd_end = initrd_start + ramdisk_size; | 336 | initrd_end = initrd_start + ramdisk_size; |
336 | printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", | 337 | printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n", |
337 | ramdisk_here, ramdisk_here + ramdisk_size); | 338 | ramdisk_here, ramdisk_here + ramdisk_size - 1); |
338 | 339 | ||
339 | q = (char *)initrd_start; | 340 | q = (char *)initrd_start; |
340 | 341 | ||
@@ -365,8 +366,8 @@ static void __init relocate_initrd(void) | |||
365 | /* high pages is not converted by early_res_to_bootmem */ | 366 | /* high pages is not converted by early_res_to_bootmem */ |
366 | ramdisk_image = boot_params.hdr.ramdisk_image; | 367 | ramdisk_image = boot_params.hdr.ramdisk_image; |
367 | ramdisk_size = boot_params.hdr.ramdisk_size; | 368 | ramdisk_size = boot_params.hdr.ramdisk_size; |
368 | printk(KERN_INFO "Move RAMDISK from %016llx - %016llx to" | 369 | printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" |
369 | " %08llx - %08llx\n", | 370 | " [mem %#010llx-%#010llx]\n", |
370 | ramdisk_image, ramdisk_image + ramdisk_size - 1, | 371 | ramdisk_image, ramdisk_image + ramdisk_size - 1, |
371 | ramdisk_here, ramdisk_here + ramdisk_size - 1); | 372 | ramdisk_here, ramdisk_here + ramdisk_size - 1); |
372 | } | 373 | } |
@@ -391,8 +392,8 @@ static void __init reserve_initrd(void) | |||
391 | ramdisk_size, end_of_lowmem>>1); | 392 | ramdisk_size, end_of_lowmem>>1); |
392 | } | 393 | } |
393 | 394 | ||
394 | printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image, | 395 | printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, |
395 | ramdisk_end); | 396 | ramdisk_end - 1); |
396 | 397 | ||
397 | 398 | ||
398 | if (ramdisk_end <= end_of_lowmem) { | 399 | if (ramdisk_end <= end_of_lowmem) { |
@@ -905,10 +906,10 @@ void __init setup_arch(char **cmdline_p) | |||
905 | setup_bios_corruption_check(); | 906 | setup_bios_corruption_check(); |
906 | #endif | 907 | #endif |
907 | 908 | ||
908 | printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", | 909 | printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", |
909 | max_pfn_mapped<<PAGE_SHIFT); | 910 | (max_pfn_mapped<<PAGE_SHIFT) - 1); |
910 | 911 | ||
911 | setup_trampolines(); | 912 | setup_real_mode(); |
912 | 913 | ||
913 | init_gbpages(); | 914 | init_gbpages(); |
914 | 915 | ||
@@ -925,6 +926,7 @@ void __init setup_arch(char **cmdline_p) | |||
925 | } | 926 | } |
926 | #endif | 927 | #endif |
927 | memblock.current_limit = get_max_mapped(); | 928 | memblock.current_limit = get_max_mapped(); |
929 | dma_contiguous_reserve(0); | ||
928 | 930 | ||
929 | /* | 931 | /* |
930 | * NOTE: On x86-32, only from this point on, fixmaps are ready for use. | 932 | * NOTE: On x86-32, only from this point on, fixmaps are ready for use. |
@@ -966,6 +968,8 @@ void __init setup_arch(char **cmdline_p) | |||
966 | if (boot_cpu_data.cpuid_level >= 0) { | 968 | if (boot_cpu_data.cpuid_level >= 0) { |
967 | /* A CPU has %cr4 if and only if it has CPUID */ | 969 | /* A CPU has %cr4 if and only if it has CPUID */ |
968 | mmu_cr4_features = read_cr4(); | 970 | mmu_cr4_features = read_cr4(); |
971 | if (trampoline_cr4_features) | ||
972 | *trampoline_cr4_features = mmu_cr4_features; | ||
969 | } | 973 | } |
970 | 974 | ||
971 | #ifdef CONFIG_X86_32 | 975 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b68ccadd2ff..b280908a376 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -6,6 +6,9 @@ | |||
6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | 6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes |
7 | * 2000-2002 x86-64 support by Andi Kleen | 7 | * 2000-2002 x86-64 support by Andi Kleen |
8 | */ | 8 | */ |
9 | |||
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | |||
9 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
10 | #include <linux/mm.h> | 13 | #include <linux/mm.h> |
11 | #include <linux/smp.h> | 14 | #include <linux/smp.h> |
@@ -18,6 +21,7 @@ | |||
18 | #include <linux/personality.h> | 21 | #include <linux/personality.h> |
19 | #include <linux/uaccess.h> | 22 | #include <linux/uaccess.h> |
20 | #include <linux/user-return-notifier.h> | 23 | #include <linux/user-return-notifier.h> |
24 | #include <linux/uprobes.h> | ||
21 | 25 | ||
22 | #include <asm/processor.h> | 26 | #include <asm/processor.h> |
23 | #include <asm/ucontext.h> | 27 | #include <asm/ucontext.h> |
@@ -554,7 +558,6 @@ unsigned long sys_sigreturn(struct pt_regs *regs) | |||
554 | sizeof(frame->extramask)))) | 558 | sizeof(frame->extramask)))) |
555 | goto badframe; | 559 | goto badframe; |
556 | 560 | ||
557 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
558 | set_current_blocked(&set); | 561 | set_current_blocked(&set); |
559 | 562 | ||
560 | if (restore_sigcontext(regs, &frame->sc, &ax)) | 563 | if (restore_sigcontext(regs, &frame->sc, &ax)) |
@@ -580,7 +583,6 @@ long sys_rt_sigreturn(struct pt_regs *regs) | |||
580 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | 583 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) |
581 | goto badframe; | 584 | goto badframe; |
582 | 585 | ||
583 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
584 | set_current_blocked(&set); | 586 | set_current_blocked(&set); |
585 | 587 | ||
586 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 588 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) |
@@ -646,42 +648,28 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
646 | struct pt_regs *regs) | 648 | struct pt_regs *regs) |
647 | { | 649 | { |
648 | int usig = signr_convert(sig); | 650 | int usig = signr_convert(sig); |
649 | sigset_t *set = ¤t->blocked; | 651 | sigset_t *set = sigmask_to_save(); |
650 | int ret; | ||
651 | |||
652 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) | ||
653 | set = ¤t->saved_sigmask; | ||
654 | 652 | ||
655 | /* Set up the stack frame */ | 653 | /* Set up the stack frame */ |
656 | if (is_ia32) { | 654 | if (is_ia32) { |
657 | if (ka->sa.sa_flags & SA_SIGINFO) | 655 | if (ka->sa.sa_flags & SA_SIGINFO) |
658 | ret = ia32_setup_rt_frame(usig, ka, info, set, regs); | 656 | return ia32_setup_rt_frame(usig, ka, info, set, regs); |
659 | else | 657 | else |
660 | ret = ia32_setup_frame(usig, ka, set, regs); | 658 | return ia32_setup_frame(usig, ka, set, regs); |
661 | #ifdef CONFIG_X86_X32_ABI | 659 | #ifdef CONFIG_X86_X32_ABI |
662 | } else if (is_x32) { | 660 | } else if (is_x32) { |
663 | ret = x32_setup_rt_frame(usig, ka, info, | 661 | return x32_setup_rt_frame(usig, ka, info, |
664 | (compat_sigset_t *)set, regs); | 662 | (compat_sigset_t *)set, regs); |
665 | #endif | 663 | #endif |
666 | } else { | 664 | } else { |
667 | ret = __setup_rt_frame(sig, ka, info, set, regs); | 665 | return __setup_rt_frame(sig, ka, info, set, regs); |
668 | } | ||
669 | |||
670 | if (ret) { | ||
671 | force_sigsegv(sig, current); | ||
672 | return -EFAULT; | ||
673 | } | 666 | } |
674 | |||
675 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; | ||
676 | return ret; | ||
677 | } | 667 | } |
678 | 668 | ||
679 | static int | 669 | static void |
680 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | 670 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, |
681 | struct pt_regs *regs) | 671 | struct pt_regs *regs) |
682 | { | 672 | { |
683 | int ret; | ||
684 | |||
685 | /* Are we from a system call? */ | 673 | /* Are we from a system call? */ |
686 | if (syscall_get_nr(current, regs) >= 0) { | 674 | if (syscall_get_nr(current, regs) >= 0) { |
687 | /* If so, check system call restarting.. */ | 675 | /* If so, check system call restarting.. */ |
@@ -712,10 +700,10 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
712 | likely(test_and_clear_thread_flag(TIF_FORCED_TF))) | 700 | likely(test_and_clear_thread_flag(TIF_FORCED_TF))) |
713 | regs->flags &= ~X86_EFLAGS_TF; | 701 | regs->flags &= ~X86_EFLAGS_TF; |
714 | 702 | ||
715 | ret = setup_rt_frame(sig, ka, info, regs); | 703 | if (setup_rt_frame(sig, ka, info, regs) < 0) { |
716 | 704 | force_sigsegv(sig, current); | |
717 | if (ret) | 705 | return; |
718 | return ret; | 706 | } |
719 | 707 | ||
720 | /* | 708 | /* |
721 | * Clear the direction flag as per the ABI for function entry. | 709 | * Clear the direction flag as per the ABI for function entry. |
@@ -730,12 +718,8 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
730 | */ | 718 | */ |
731 | regs->flags &= ~X86_EFLAGS_TF; | 719 | regs->flags &= ~X86_EFLAGS_TF; |
732 | 720 | ||
733 | block_sigmask(ka, sig); | 721 | signal_delivered(sig, info, ka, regs, |
734 | 722 | test_thread_flag(TIF_SINGLESTEP)); | |
735 | tracehook_signal_handler(sig, info, ka, regs, | ||
736 | test_thread_flag(TIF_SINGLESTEP)); | ||
737 | |||
738 | return 0; | ||
739 | } | 723 | } |
740 | 724 | ||
741 | #ifdef CONFIG_X86_32 | 725 | #ifdef CONFIG_X86_32 |
@@ -756,16 +740,6 @@ static void do_signal(struct pt_regs *regs) | |||
756 | siginfo_t info; | 740 | siginfo_t info; |
757 | int signr; | 741 | int signr; |
758 | 742 | ||
759 | /* | ||
760 | * We want the common case to go fast, which is why we may in certain | ||
761 | * cases get here from kernel mode. Just return without doing anything | ||
762 | * if so. | ||
763 | * X86_32: vm86 regs switched out by assembly code before reaching | ||
764 | * here, so testing against kernel CS suffices. | ||
765 | */ | ||
766 | if (!user_mode(regs)) | ||
767 | return; | ||
768 | |||
769 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | 743 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); |
770 | if (signr > 0) { | 744 | if (signr > 0) { |
771 | /* Whee! Actually deliver the signal. */ | 745 | /* Whee! Actually deliver the signal. */ |
@@ -795,10 +769,7 @@ static void do_signal(struct pt_regs *regs) | |||
795 | * If there's no signal to deliver, we just put the saved sigmask | 769 | * If there's no signal to deliver, we just put the saved sigmask |
796 | * back. | 770 | * back. |
797 | */ | 771 | */ |
798 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) { | 772 | restore_saved_sigmask(); |
799 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; | ||
800 | set_current_blocked(¤t->saved_sigmask); | ||
801 | } | ||
802 | } | 773 | } |
803 | 774 | ||
804 | /* | 775 | /* |
@@ -814,6 +785,11 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
814 | mce_notify_process(); | 785 | mce_notify_process(); |
815 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ | 786 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ |
816 | 787 | ||
788 | if (thread_info_flags & _TIF_UPROBE) { | ||
789 | clear_thread_flag(TIF_UPROBE); | ||
790 | uprobe_notify_resume(regs); | ||
791 | } | ||
792 | |||
817 | /* deal with pending signal delivery */ | 793 | /* deal with pending signal delivery */ |
818 | if (thread_info_flags & _TIF_SIGPENDING) | 794 | if (thread_info_flags & _TIF_SIGPENDING) |
819 | do_signal(regs); | 795 | do_signal(regs); |
@@ -821,8 +797,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
821 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { | 797 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { |
822 | clear_thread_flag(TIF_NOTIFY_RESUME); | 798 | clear_thread_flag(TIF_NOTIFY_RESUME); |
823 | tracehook_notify_resume(regs); | 799 | tracehook_notify_resume(regs); |
824 | if (current->replacement_session_keyring) | ||
825 | key_replace_session_keyring(); | ||
826 | } | 800 | } |
827 | if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) | 801 | if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) |
828 | fire_user_return_notifiers(); | 802 | fire_user_return_notifiers(); |
@@ -843,7 +817,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | |||
843 | me->comm, me->pid, where, frame, | 817 | me->comm, me->pid, where, frame, |
844 | regs->ip, regs->sp, regs->orig_ax); | 818 | regs->ip, regs->sp, regs->orig_ax); |
845 | print_vma_addr(" in ", regs->ip); | 819 | print_vma_addr(" in ", regs->ip); |
846 | printk(KERN_CONT "\n"); | 820 | pr_cont("\n"); |
847 | } | 821 | } |
848 | 822 | ||
849 | force_sig(SIGSEGV, me); | 823 | force_sig(SIGSEGV, me); |
@@ -930,7 +904,6 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) | |||
930 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | 904 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) |
931 | goto badframe; | 905 | goto badframe; |
932 | 906 | ||
933 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
934 | set_current_blocked(&set); | 907 | set_current_blocked(&set); |
935 | 908 | ||
936 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 909 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 433529e29be..c1a310fb830 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * x86 SMP booting functions | 2 | * x86 SMP booting functions |
3 | * | 3 | * |
4 | * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> | 4 | * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> |
@@ -39,6 +39,8 @@ | |||
39 | * Glauber Costa : i386 and x86_64 integration | 39 | * Glauber Costa : i386 and x86_64 integration |
40 | */ | 40 | */ |
41 | 41 | ||
42 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
43 | |||
42 | #include <linux/init.h> | 44 | #include <linux/init.h> |
43 | #include <linux/smp.h> | 45 | #include <linux/smp.h> |
44 | #include <linux/module.h> | 46 | #include <linux/module.h> |
@@ -57,7 +59,7 @@ | |||
57 | #include <asm/nmi.h> | 59 | #include <asm/nmi.h> |
58 | #include <asm/irq.h> | 60 | #include <asm/irq.h> |
59 | #include <asm/idle.h> | 61 | #include <asm/idle.h> |
60 | #include <asm/trampoline.h> | 62 | #include <asm/realmode.h> |
61 | #include <asm/cpu.h> | 63 | #include <asm/cpu.h> |
62 | #include <asm/numa.h> | 64 | #include <asm/numa.h> |
63 | #include <asm/pgtable.h> | 65 | #include <asm/pgtable.h> |
@@ -73,6 +75,8 @@ | |||
73 | #include <asm/smpboot_hooks.h> | 75 | #include <asm/smpboot_hooks.h> |
74 | #include <asm/i8259.h> | 76 | #include <asm/i8259.h> |
75 | 77 | ||
78 | #include <asm/realmode.h> | ||
79 | |||
76 | /* State of each CPU */ | 80 | /* State of each CPU */ |
77 | DEFINE_PER_CPU(int, cpu_state) = { 0 }; | 81 | DEFINE_PER_CPU(int, cpu_state) = { 0 }; |
78 | 82 | ||
@@ -182,7 +186,7 @@ static void __cpuinit smp_callin(void) | |||
182 | * boards) | 186 | * boards) |
183 | */ | 187 | */ |
184 | 188 | ||
185 | pr_debug("CALLIN, before setup_local_APIC().\n"); | 189 | pr_debug("CALLIN, before setup_local_APIC()\n"); |
186 | if (apic->smp_callin_clear_local_apic) | 190 | if (apic->smp_callin_clear_local_apic) |
187 | apic->smp_callin_clear_local_apic(); | 191 | apic->smp_callin_clear_local_apic(); |
188 | setup_local_APIC(); | 192 | setup_local_APIC(); |
@@ -253,22 +257,13 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
253 | check_tsc_sync_target(); | 257 | check_tsc_sync_target(); |
254 | 258 | ||
255 | /* | 259 | /* |
256 | * We need to hold call_lock, so there is no inconsistency | ||
257 | * between the time smp_call_function() determines number of | ||
258 | * IPI recipients, and the time when the determination is made | ||
259 | * for which cpus receive the IPI. Holding this | ||
260 | * lock helps us to not include this cpu in a currently in progress | ||
261 | * smp_call_function(). | ||
262 | * | ||
263 | * We need to hold vector_lock so there the set of online cpus | 260 | * We need to hold vector_lock so there the set of online cpus |
264 | * does not change while we are assigning vectors to cpus. Holding | 261 | * does not change while we are assigning vectors to cpus. Holding |
265 | * this lock ensures we don't half assign or remove an irq from a cpu. | 262 | * this lock ensures we don't half assign or remove an irq from a cpu. |
266 | */ | 263 | */ |
267 | ipi_call_lock(); | ||
268 | lock_vector_lock(); | 264 | lock_vector_lock(); |
269 | set_cpu_online(smp_processor_id(), true); | 265 | set_cpu_online(smp_processor_id(), true); |
270 | unlock_vector_lock(); | 266 | unlock_vector_lock(); |
271 | ipi_call_unlock(); | ||
272 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 267 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; |
273 | x86_platform.nmi_init(); | 268 | x86_platform.nmi_init(); |
274 | 269 | ||
@@ -347,9 +342,12 @@ static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) | |||
347 | 342 | ||
348 | static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) | 343 | static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) |
349 | { | 344 | { |
350 | if (c->phys_proc_id == o->phys_proc_id) | 345 | if (c->phys_proc_id == o->phys_proc_id) { |
351 | return topology_sane(c, o, "mc"); | 346 | if (cpu_has(c, X86_FEATURE_AMD_DCM)) |
347 | return true; | ||
352 | 348 | ||
349 | return topology_sane(c, o, "mc"); | ||
350 | } | ||
353 | return false; | 351 | return false; |
354 | } | 352 | } |
355 | 353 | ||
@@ -380,6 +378,15 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
380 | if ((i == cpu) || (has_mc && match_llc(c, o))) | 378 | if ((i == cpu) || (has_mc && match_llc(c, o))) |
381 | link_mask(llc_shared, cpu, i); | 379 | link_mask(llc_shared, cpu, i); |
382 | 380 | ||
381 | } | ||
382 | |||
383 | /* | ||
384 | * This needs a separate iteration over the cpus because we rely on all | ||
385 | * cpu_sibling_mask links to be set-up. | ||
386 | */ | ||
387 | for_each_cpu(i, cpu_sibling_setup_mask) { | ||
388 | o = &cpu_data(i); | ||
389 | |||
383 | if ((i == cpu) || (has_mc && match_mc(c, o))) { | 390 | if ((i == cpu) || (has_mc && match_mc(c, o))) { |
384 | link_mask(core, cpu, i); | 391 | link_mask(core, cpu, i); |
385 | 392 | ||
@@ -408,15 +415,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
408 | /* maps the cpu to the sched domain representing multi-core */ | 415 | /* maps the cpu to the sched domain representing multi-core */ |
409 | const struct cpumask *cpu_coregroup_mask(int cpu) | 416 | const struct cpumask *cpu_coregroup_mask(int cpu) |
410 | { | 417 | { |
411 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 418 | return cpu_llc_shared_mask(cpu); |
412 | /* | ||
413 | * For perf, we return last level cache shared map. | ||
414 | * And for power savings, we return cpu_core_map | ||
415 | */ | ||
416 | if (!(cpu_has(c, X86_FEATURE_AMD_DCM))) | ||
417 | return cpu_core_mask(cpu); | ||
418 | else | ||
419 | return cpu_llc_shared_mask(cpu); | ||
420 | } | 419 | } |
421 | 420 | ||
422 | static void impress_friends(void) | 421 | static void impress_friends(void) |
@@ -426,17 +425,16 @@ static void impress_friends(void) | |||
426 | /* | 425 | /* |
427 | * Allow the user to impress friends. | 426 | * Allow the user to impress friends. |
428 | */ | 427 | */ |
429 | pr_debug("Before bogomips.\n"); | 428 | pr_debug("Before bogomips\n"); |
430 | for_each_possible_cpu(cpu) | 429 | for_each_possible_cpu(cpu) |
431 | if (cpumask_test_cpu(cpu, cpu_callout_mask)) | 430 | if (cpumask_test_cpu(cpu, cpu_callout_mask)) |
432 | bogosum += cpu_data(cpu).loops_per_jiffy; | 431 | bogosum += cpu_data(cpu).loops_per_jiffy; |
433 | printk(KERN_INFO | 432 | pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n", |
434 | "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", | ||
435 | num_online_cpus(), | 433 | num_online_cpus(), |
436 | bogosum/(500000/HZ), | 434 | bogosum/(500000/HZ), |
437 | (bogosum/(5000/HZ))%100); | 435 | (bogosum/(5000/HZ))%100); |
438 | 436 | ||
439 | pr_debug("Before bogocount - setting activated=1.\n"); | 437 | pr_debug("Before bogocount - setting activated=1\n"); |
440 | } | 438 | } |
441 | 439 | ||
442 | void __inquire_remote_apic(int apicid) | 440 | void __inquire_remote_apic(int apicid) |
@@ -446,18 +444,17 @@ void __inquire_remote_apic(int apicid) | |||
446 | int timeout; | 444 | int timeout; |
447 | u32 status; | 445 | u32 status; |
448 | 446 | ||
449 | printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid); | 447 | pr_info("Inquiring remote APIC 0x%x...\n", apicid); |
450 | 448 | ||
451 | for (i = 0; i < ARRAY_SIZE(regs); i++) { | 449 | for (i = 0; i < ARRAY_SIZE(regs); i++) { |
452 | printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]); | 450 | pr_info("... APIC 0x%x %s: ", apicid, names[i]); |
453 | 451 | ||
454 | /* | 452 | /* |
455 | * Wait for idle. | 453 | * Wait for idle. |
456 | */ | 454 | */ |
457 | status = safe_apic_wait_icr_idle(); | 455 | status = safe_apic_wait_icr_idle(); |
458 | if (status) | 456 | if (status) |
459 | printk(KERN_CONT | 457 | pr_cont("a previous APIC delivery may have failed\n"); |
460 | "a previous APIC delivery may have failed\n"); | ||
461 | 458 | ||
462 | apic_icr_write(APIC_DM_REMRD | regs[i], apicid); | 459 | apic_icr_write(APIC_DM_REMRD | regs[i], apicid); |
463 | 460 | ||
@@ -470,10 +467,10 @@ void __inquire_remote_apic(int apicid) | |||
470 | switch (status) { | 467 | switch (status) { |
471 | case APIC_ICR_RR_VALID: | 468 | case APIC_ICR_RR_VALID: |
472 | status = apic_read(APIC_RRR); | 469 | status = apic_read(APIC_RRR); |
473 | printk(KERN_CONT "%08x\n", status); | 470 | pr_cont("%08x\n", status); |
474 | break; | 471 | break; |
475 | default: | 472 | default: |
476 | printk(KERN_CONT "failed\n"); | 473 | pr_cont("failed\n"); |
477 | } | 474 | } |
478 | } | 475 | } |
479 | } | 476 | } |
@@ -507,12 +504,12 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) | |||
507 | apic_write(APIC_ESR, 0); | 504 | apic_write(APIC_ESR, 0); |
508 | accept_status = (apic_read(APIC_ESR) & 0xEF); | 505 | accept_status = (apic_read(APIC_ESR) & 0xEF); |
509 | } | 506 | } |
510 | pr_debug("NMI sent.\n"); | 507 | pr_debug("NMI sent\n"); |
511 | 508 | ||
512 | if (send_status) | 509 | if (send_status) |
513 | printk(KERN_ERR "APIC never delivered???\n"); | 510 | pr_err("APIC never delivered???\n"); |
514 | if (accept_status) | 511 | if (accept_status) |
515 | printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); | 512 | pr_err("APIC delivery error (%lx)\n", accept_status); |
516 | 513 | ||
517 | return (send_status | accept_status); | 514 | return (send_status | accept_status); |
518 | } | 515 | } |
@@ -534,7 +531,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) | |||
534 | apic_read(APIC_ESR); | 531 | apic_read(APIC_ESR); |
535 | } | 532 | } |
536 | 533 | ||
537 | pr_debug("Asserting INIT.\n"); | 534 | pr_debug("Asserting INIT\n"); |
538 | 535 | ||
539 | /* | 536 | /* |
540 | * Turn INIT on target chip | 537 | * Turn INIT on target chip |
@@ -550,7 +547,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) | |||
550 | 547 | ||
551 | mdelay(10); | 548 | mdelay(10); |
552 | 549 | ||
553 | pr_debug("Deasserting INIT.\n"); | 550 | pr_debug("Deasserting INIT\n"); |
554 | 551 | ||
555 | /* Target chip */ | 552 | /* Target chip */ |
556 | /* Send IPI */ | 553 | /* Send IPI */ |
@@ -583,14 +580,14 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) | |||
583 | /* | 580 | /* |
584 | * Run STARTUP IPI loop. | 581 | * Run STARTUP IPI loop. |
585 | */ | 582 | */ |
586 | pr_debug("#startup loops: %d.\n", num_starts); | 583 | pr_debug("#startup loops: %d\n", num_starts); |
587 | 584 | ||
588 | for (j = 1; j <= num_starts; j++) { | 585 | for (j = 1; j <= num_starts; j++) { |
589 | pr_debug("Sending STARTUP #%d.\n", j); | 586 | pr_debug("Sending STARTUP #%d\n", j); |
590 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | 587 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
591 | apic_write(APIC_ESR, 0); | 588 | apic_write(APIC_ESR, 0); |
592 | apic_read(APIC_ESR); | 589 | apic_read(APIC_ESR); |
593 | pr_debug("After apic_write.\n"); | 590 | pr_debug("After apic_write\n"); |
594 | 591 | ||
595 | /* | 592 | /* |
596 | * STARTUP IPI | 593 | * STARTUP IPI |
@@ -607,7 +604,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) | |||
607 | */ | 604 | */ |
608 | udelay(300); | 605 | udelay(300); |
609 | 606 | ||
610 | pr_debug("Startup point 1.\n"); | 607 | pr_debug("Startup point 1\n"); |
611 | 608 | ||
612 | pr_debug("Waiting for send to finish...\n"); | 609 | pr_debug("Waiting for send to finish...\n"); |
613 | send_status = safe_apic_wait_icr_idle(); | 610 | send_status = safe_apic_wait_icr_idle(); |
@@ -622,12 +619,12 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) | |||
622 | if (send_status || accept_status) | 619 | if (send_status || accept_status) |
623 | break; | 620 | break; |
624 | } | 621 | } |
625 | pr_debug("After Startup.\n"); | 622 | pr_debug("After Startup\n"); |
626 | 623 | ||
627 | if (send_status) | 624 | if (send_status) |
628 | printk(KERN_ERR "APIC never delivered???\n"); | 625 | pr_err("APIC never delivered???\n"); |
629 | if (accept_status) | 626 | if (accept_status) |
630 | printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); | 627 | pr_err("APIC delivery error (%lx)\n", accept_status); |
631 | 628 | ||
632 | return (send_status | accept_status); | 629 | return (send_status | accept_status); |
633 | } | 630 | } |
@@ -641,11 +638,11 @@ static void __cpuinit announce_cpu(int cpu, int apicid) | |||
641 | if (system_state == SYSTEM_BOOTING) { | 638 | if (system_state == SYSTEM_BOOTING) { |
642 | if (node != current_node) { | 639 | if (node != current_node) { |
643 | if (current_node > (-1)) | 640 | if (current_node > (-1)) |
644 | pr_cont(" Ok.\n"); | 641 | pr_cont(" OK\n"); |
645 | current_node = node; | 642 | current_node = node; |
646 | pr_info("Booting Node %3d, Processors ", node); | 643 | pr_info("Booting Node %3d, Processors ", node); |
647 | } | 644 | } |
648 | pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : ""); | 645 | pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : ""); |
649 | return; | 646 | return; |
650 | } else | 647 | } else |
651 | pr_info("Booting Node %d Processor %d APIC 0x%x\n", | 648 | pr_info("Booting Node %d Processor %d APIC 0x%x\n", |
@@ -660,8 +657,12 @@ static void __cpuinit announce_cpu(int cpu, int apicid) | |||
660 | */ | 657 | */ |
661 | static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | 658 | static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) |
662 | { | 659 | { |
660 | volatile u32 *trampoline_status = | ||
661 | (volatile u32 *) __va(real_mode_header->trampoline_status); | ||
662 | /* start_ip had better be page-aligned! */ | ||
663 | unsigned long start_ip = real_mode_header->trampoline_start; | ||
664 | |||
663 | unsigned long boot_error = 0; | 665 | unsigned long boot_error = 0; |
664 | unsigned long start_ip; | ||
665 | int timeout; | 666 | int timeout; |
666 | 667 | ||
667 | alternatives_smp_switch(1); | 668 | alternatives_smp_switch(1); |
@@ -684,9 +685,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
684 | initial_code = (unsigned long)start_secondary; | 685 | initial_code = (unsigned long)start_secondary; |
685 | stack_start = idle->thread.sp; | 686 | stack_start = idle->thread.sp; |
686 | 687 | ||
687 | /* start_ip had better be page-aligned! */ | ||
688 | start_ip = trampoline_address(); | ||
689 | |||
690 | /* So we see what's up */ | 688 | /* So we see what's up */ |
691 | announce_cpu(cpu, apicid); | 689 | announce_cpu(cpu, apicid); |
692 | 690 | ||
@@ -724,9 +722,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
724 | /* | 722 | /* |
725 | * allow APs to start initializing. | 723 | * allow APs to start initializing. |
726 | */ | 724 | */ |
727 | pr_debug("Before Callout %d.\n", cpu); | 725 | pr_debug("Before Callout %d\n", cpu); |
728 | cpumask_set_cpu(cpu, cpu_callout_mask); | 726 | cpumask_set_cpu(cpu, cpu_callout_mask); |
729 | pr_debug("After Callout %d.\n", cpu); | 727 | pr_debug("After Callout %d\n", cpu); |
730 | 728 | ||
731 | /* | 729 | /* |
732 | * Wait 5s total for a response | 730 | * Wait 5s total for a response |
@@ -749,13 +747,12 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
749 | pr_debug("CPU%d: has booted.\n", cpu); | 747 | pr_debug("CPU%d: has booted.\n", cpu); |
750 | } else { | 748 | } else { |
751 | boot_error = 1; | 749 | boot_error = 1; |
752 | if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) | 750 | if (*trampoline_status == 0xA5A5A5A5) |
753 | == 0xA5A5A5A5) | ||
754 | /* trampoline started but...? */ | 751 | /* trampoline started but...? */ |
755 | pr_err("CPU%d: Stuck ??\n", cpu); | 752 | pr_err("CPU%d: Stuck ??\n", cpu); |
756 | else | 753 | else |
757 | /* trampoline code not run */ | 754 | /* trampoline code not run */ |
758 | pr_err("CPU%d: Not responding.\n", cpu); | 755 | pr_err("CPU%d: Not responding\n", cpu); |
759 | if (apic->inquire_remote_apic) | 756 | if (apic->inquire_remote_apic) |
760 | apic->inquire_remote_apic(apicid); | 757 | apic->inquire_remote_apic(apicid); |
761 | } | 758 | } |
@@ -776,7 +773,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
776 | } | 773 | } |
777 | 774 | ||
778 | /* mark "stuck" area as not stuck */ | 775 | /* mark "stuck" area as not stuck */ |
779 | *(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) = 0; | 776 | *trampoline_status = 0; |
780 | 777 | ||
781 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { | 778 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { |
782 | /* | 779 | /* |
@@ -800,7 +797,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
800 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || | 797 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || |
801 | !physid_isset(apicid, phys_cpu_present_map) || | 798 | !physid_isset(apicid, phys_cpu_present_map) || |
802 | !apic->apic_id_valid(apicid)) { | 799 | !apic->apic_id_valid(apicid)) { |
803 | printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); | 800 | pr_err("%s: bad cpu %d\n", __func__, cpu); |
804 | return -EINVAL; | 801 | return -EINVAL; |
805 | } | 802 | } |
806 | 803 | ||
@@ -881,9 +878,8 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
881 | unsigned int cpu; | 878 | unsigned int cpu; |
882 | unsigned nr; | 879 | unsigned nr; |
883 | 880 | ||
884 | printk(KERN_WARNING | 881 | pr_warn("More than 8 CPUs detected - skipping them\n" |
885 | "More than 8 CPUs detected - skipping them.\n" | 882 | "Use CONFIG_X86_BIGSMP\n"); |
886 | "Use CONFIG_X86_BIGSMP.\n"); | ||
887 | 883 | ||
888 | nr = 0; | 884 | nr = 0; |
889 | for_each_present_cpu(cpu) { | 885 | for_each_present_cpu(cpu) { |
@@ -904,8 +900,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
904 | #endif | 900 | #endif |
905 | 901 | ||
906 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { | 902 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { |
907 | printk(KERN_WARNING | 903 | pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n", |
908 | "weird, boot CPU (#%d) not listed by the BIOS.\n", | ||
909 | hard_smp_processor_id()); | 904 | hard_smp_processor_id()); |
910 | 905 | ||
911 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); | 906 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); |
@@ -917,11 +912,10 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
917 | */ | 912 | */ |
918 | if (!smp_found_config && !acpi_lapic) { | 913 | if (!smp_found_config && !acpi_lapic) { |
919 | preempt_enable(); | 914 | preempt_enable(); |
920 | printk(KERN_NOTICE "SMP motherboard not detected.\n"); | 915 | pr_notice("SMP motherboard not detected\n"); |
921 | disable_smp(); | 916 | disable_smp(); |
922 | if (APIC_init_uniprocessor()) | 917 | if (APIC_init_uniprocessor()) |
923 | printk(KERN_NOTICE "Local APIC not detected." | 918 | pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); |
924 | " Using dummy APIC emulation.\n"); | ||
925 | return -1; | 919 | return -1; |
926 | } | 920 | } |
927 | 921 | ||
@@ -930,9 +924,8 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
930 | * CPU too, but we do it for the sake of robustness anyway. | 924 | * CPU too, but we do it for the sake of robustness anyway. |
931 | */ | 925 | */ |
932 | if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { | 926 | if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { |
933 | printk(KERN_NOTICE | 927 | pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n", |
934 | "weird, boot CPU (#%d) not listed by the BIOS.\n", | 928 | boot_cpu_physical_apicid); |
935 | boot_cpu_physical_apicid); | ||
936 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); | 929 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); |
937 | } | 930 | } |
938 | preempt_enable(); | 931 | preempt_enable(); |
@@ -945,8 +938,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
945 | if (!disable_apic) { | 938 | if (!disable_apic) { |
946 | pr_err("BIOS bug, local APIC #%d not detected!...\n", | 939 | pr_err("BIOS bug, local APIC #%d not detected!...\n", |
947 | boot_cpu_physical_apicid); | 940 | boot_cpu_physical_apicid); |
948 | pr_err("... forcing use of dummy APIC emulation." | 941 | pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n"); |
949 | "(tell your hw vendor)\n"); | ||
950 | } | 942 | } |
951 | smpboot_clear_io_apic(); | 943 | smpboot_clear_io_apic(); |
952 | disable_ioapic_support(); | 944 | disable_ioapic_support(); |
@@ -959,7 +951,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
959 | * If SMP should be disabled, then really disable it! | 951 | * If SMP should be disabled, then really disable it! |
960 | */ | 952 | */ |
961 | if (!max_cpus) { | 953 | if (!max_cpus) { |
962 | printk(KERN_INFO "SMP mode deactivated.\n"); | 954 | pr_info("SMP mode deactivated\n"); |
963 | smpboot_clear_io_apic(); | 955 | smpboot_clear_io_apic(); |
964 | 956 | ||
965 | connect_bsp_APIC(); | 957 | connect_bsp_APIC(); |
@@ -1011,7 +1003,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1011 | 1003 | ||
1012 | 1004 | ||
1013 | if (smp_sanity_check(max_cpus) < 0) { | 1005 | if (smp_sanity_check(max_cpus) < 0) { |
1014 | printk(KERN_INFO "SMP disabled\n"); | 1006 | pr_info("SMP disabled\n"); |
1015 | disable_smp(); | 1007 | disable_smp(); |
1016 | goto out; | 1008 | goto out; |
1017 | } | 1009 | } |
@@ -1049,7 +1041,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1049 | * Set up local APIC timer on boot CPU. | 1041 | * Set up local APIC timer on boot CPU. |
1050 | */ | 1042 | */ |
1051 | 1043 | ||
1052 | printk(KERN_INFO "CPU%d: ", 0); | 1044 | pr_info("CPU%d: ", 0); |
1053 | print_cpu_info(&cpu_data(0)); | 1045 | print_cpu_info(&cpu_data(0)); |
1054 | x86_init.timers.setup_percpu_clockev(); | 1046 | x86_init.timers.setup_percpu_clockev(); |
1055 | 1047 | ||
@@ -1099,7 +1091,7 @@ void __init native_smp_prepare_boot_cpu(void) | |||
1099 | 1091 | ||
1100 | void __init native_smp_cpus_done(unsigned int max_cpus) | 1092 | void __init native_smp_cpus_done(unsigned int max_cpus) |
1101 | { | 1093 | { |
1102 | pr_debug("Boot done.\n"); | 1094 | pr_debug("Boot done\n"); |
1103 | 1095 | ||
1104 | nmi_selftest(); | 1096 | nmi_selftest(); |
1105 | impress_friends(); | 1097 | impress_friends(); |
@@ -1160,8 +1152,7 @@ __init void prefill_possible_map(void) | |||
1160 | 1152 | ||
1161 | /* nr_cpu_ids could be reduced via nr_cpus= */ | 1153 | /* nr_cpu_ids could be reduced via nr_cpus= */ |
1162 | if (possible > nr_cpu_ids) { | 1154 | if (possible > nr_cpu_ids) { |
1163 | printk(KERN_WARNING | 1155 | pr_warn("%d Processors exceeds NR_CPUS limit of %d\n", |
1164 | "%d Processors exceeds NR_CPUS limit of %d\n", | ||
1165 | possible, nr_cpu_ids); | 1156 | possible, nr_cpu_ids); |
1166 | possible = nr_cpu_ids; | 1157 | possible = nr_cpu_ids; |
1167 | } | 1158 | } |
@@ -1170,13 +1161,12 @@ __init void prefill_possible_map(void) | |||
1170 | if (!setup_max_cpus) | 1161 | if (!setup_max_cpus) |
1171 | #endif | 1162 | #endif |
1172 | if (possible > i) { | 1163 | if (possible > i) { |
1173 | printk(KERN_WARNING | 1164 | pr_warn("%d Processors exceeds max_cpus limit of %u\n", |
1174 | "%d Processors exceeds max_cpus limit of %u\n", | ||
1175 | possible, setup_max_cpus); | 1165 | possible, setup_max_cpus); |
1176 | possible = i; | 1166 | possible = i; |
1177 | } | 1167 | } |
1178 | 1168 | ||
1179 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", | 1169 | pr_info("Allowing %d CPUs, %d hotplug CPUs\n", |
1180 | possible, max_t(int, possible - num_processors, 0)); | 1170 | possible, max_t(int, possible - num_processors, 0)); |
1181 | 1171 | ||
1182 | for (i = 0; i < possible; i++) | 1172 | for (i = 0; i < possible; i++) |
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 6410744ac5c..f84fe00fad4 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c | |||
@@ -32,7 +32,7 @@ | |||
32 | #include <linux/mm.h> | 32 | #include <linux/mm.h> |
33 | #include <linux/tboot.h> | 33 | #include <linux/tboot.h> |
34 | 34 | ||
35 | #include <asm/trampoline.h> | 35 | #include <asm/realmode.h> |
36 | #include <asm/processor.h> | 36 | #include <asm/processor.h> |
37 | #include <asm/bootparam.h> | 37 | #include <asm/bootparam.h> |
38 | #include <asm/pgtable.h> | 38 | #include <asm/pgtable.h> |
@@ -44,7 +44,7 @@ | |||
44 | #include <asm/e820.h> | 44 | #include <asm/e820.h> |
45 | #include <asm/io.h> | 45 | #include <asm/io.h> |
46 | 46 | ||
47 | #include "acpi/realmode/wakeup.h" | 47 | #include "../realmode/rm/wakeup.h" |
48 | 48 | ||
49 | /* Global pointer to shared data; NULL means no measured launch. */ | 49 | /* Global pointer to shared data; NULL means no measured launch. */ |
50 | struct tboot *tboot __read_mostly; | 50 | struct tboot *tboot __read_mostly; |
@@ -201,7 +201,8 @@ static int tboot_setup_sleep(void) | |||
201 | add_mac_region(e820.map[i].addr, e820.map[i].size); | 201 | add_mac_region(e820.map[i].addr, e820.map[i].size); |
202 | } | 202 | } |
203 | 203 | ||
204 | tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address; | 204 | tboot->acpi_sinfo.kernel_s3_resume_vector = |
205 | real_mode_header->wakeup_start; | ||
205 | 206 | ||
206 | return 0; | 207 | return 0; |
207 | } | 208 | } |
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c deleted file mode 100644 index a73b61055ad..00000000000 --- a/arch/x86/kernel/trampoline.c +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | #include <linux/io.h> | ||
2 | #include <linux/memblock.h> | ||
3 | |||
4 | #include <asm/trampoline.h> | ||
5 | #include <asm/cacheflush.h> | ||
6 | #include <asm/pgtable.h> | ||
7 | |||
8 | unsigned char *x86_trampoline_base; | ||
9 | |||
10 | void __init setup_trampolines(void) | ||
11 | { | ||
12 | phys_addr_t mem; | ||
13 | size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start); | ||
14 | |||
15 | /* Has to be in very low memory so we can execute real-mode AP code. */ | ||
16 | mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); | ||
17 | if (!mem) | ||
18 | panic("Cannot allocate trampoline\n"); | ||
19 | |||
20 | x86_trampoline_base = __va(mem); | ||
21 | memblock_reserve(mem, size); | ||
22 | |||
23 | printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", | ||
24 | x86_trampoline_base, (unsigned long long)mem, size); | ||
25 | |||
26 | memcpy(x86_trampoline_base, x86_trampoline_start, size); | ||
27 | } | ||
28 | |||
29 | /* | ||
30 | * setup_trampolines() gets called very early, to guarantee the | ||
31 | * availability of low memory. This is before the proper kernel page | ||
32 | * tables are set up, so we cannot set page permissions in that | ||
33 | * function. Thus, we use an arch_initcall instead. | ||
34 | */ | ||
35 | static int __init configure_trampolines(void) | ||
36 | { | ||
37 | size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start); | ||
38 | |||
39 | set_memory_x((unsigned long)x86_trampoline_base, size >> PAGE_SHIFT); | ||
40 | return 0; | ||
41 | } | ||
42 | arch_initcall(configure_trampolines); | ||
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S deleted file mode 100644 index 451c0a7ef7f..00000000000 --- a/arch/x86/kernel/trampoline_32.S +++ /dev/null | |||
@@ -1,83 +0,0 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Trampoline.S Derived from Setup.S by Linus Torvalds | ||
4 | * | ||
5 | * 4 Jan 1997 Michael Chastain: changed to gnu as. | ||
6 | * | ||
7 | * This is only used for booting secondary CPUs in SMP machine | ||
8 | * | ||
9 | * Entry: CS:IP point to the start of our code, we are | ||
10 | * in real mode with no stack, but the rest of the | ||
11 | * trampoline page to make our stack and everything else | ||
12 | * is a mystery. | ||
13 | * | ||
14 | * We jump into arch/x86/kernel/head_32.S. | ||
15 | * | ||
16 | * On entry to trampoline_data, the processor is in real mode | ||
17 | * with 16-bit addressing and 16-bit data. CS has some value | ||
18 | * and IP is zero. Thus, data addresses need to be absolute | ||
19 | * (no relocation) and are taken with regard to r_base. | ||
20 | * | ||
21 | * If you work on this file, check the object module with | ||
22 | * objdump --reloc to make sure there are no relocation | ||
23 | * entries except for: | ||
24 | * | ||
25 | * TYPE VALUE | ||
26 | * R_386_32 startup_32_smp | ||
27 | * R_386_32 boot_gdt | ||
28 | */ | ||
29 | |||
30 | #include <linux/linkage.h> | ||
31 | #include <linux/init.h> | ||
32 | #include <asm/segment.h> | ||
33 | #include <asm/page_types.h> | ||
34 | |||
35 | #ifdef CONFIG_SMP | ||
36 | |||
37 | .section ".x86_trampoline","a" | ||
38 | .balign PAGE_SIZE | ||
39 | .code16 | ||
40 | |||
41 | ENTRY(trampoline_data) | ||
42 | r_base = . | ||
43 | wbinvd # Needed for NUMA-Q should be harmless for others | ||
44 | mov %cs, %ax # Code and data in the same place | ||
45 | mov %ax, %ds | ||
46 | |||
47 | cli # We should be safe anyway | ||
48 | |||
49 | movl $0xA5A5A5A5, trampoline_status - r_base | ||
50 | # write marker for master knows we're running | ||
51 | |||
52 | /* GDT tables in non default location kernel can be beyond 16MB and | ||
53 | * lgdt will not be able to load the address as in real mode default | ||
54 | * operand size is 16bit. Use lgdtl instead to force operand size | ||
55 | * to 32 bit. | ||
56 | */ | ||
57 | |||
58 | lidtl boot_idt_descr - r_base # load idt with 0, 0 | ||
59 | lgdtl boot_gdt_descr - r_base # load gdt with whatever is appropriate | ||
60 | |||
61 | xor %ax, %ax | ||
62 | inc %ax # protected mode (PE) bit | ||
63 | lmsw %ax # into protected mode | ||
64 | # flush prefetch and jump to startup_32_smp in arch/i386/kernel/head.S | ||
65 | ljmpl $__BOOT_CS, $(startup_32_smp-__PAGE_OFFSET) | ||
66 | |||
67 | # These need to be in the same 64K segment as the above; | ||
68 | # hence we don't use the boot_gdt_descr defined in head.S | ||
69 | boot_gdt_descr: | ||
70 | .word __BOOT_DS + 7 # gdt limit | ||
71 | .long boot_gdt - __PAGE_OFFSET # gdt base | ||
72 | |||
73 | boot_idt_descr: | ||
74 | .word 0 # idt limit = 0 | ||
75 | .long 0 # idt base = 0L | ||
76 | |||
77 | ENTRY(trampoline_status) | ||
78 | .long 0 | ||
79 | |||
80 | .globl trampoline_end | ||
81 | trampoline_end: | ||
82 | |||
83 | #endif /* CONFIG_SMP */ | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ff08457a025..b481341c936 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -9,6 +9,9 @@ | |||
9 | /* | 9 | /* |
10 | * Handle hardware traps and faults. | 10 | * Handle hardware traps and faults. |
11 | */ | 11 | */ |
12 | |||
13 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
14 | |||
12 | #include <linux/interrupt.h> | 15 | #include <linux/interrupt.h> |
13 | #include <linux/kallsyms.h> | 16 | #include <linux/kallsyms.h> |
14 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
@@ -143,12 +146,11 @@ trap_signal: | |||
143 | #ifdef CONFIG_X86_64 | 146 | #ifdef CONFIG_X86_64 |
144 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && | 147 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && |
145 | printk_ratelimit()) { | 148 | printk_ratelimit()) { |
146 | printk(KERN_INFO | 149 | pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx", |
147 | "%s[%d] trap %s ip:%lx sp:%lx error:%lx", | 150 | tsk->comm, tsk->pid, str, |
148 | tsk->comm, tsk->pid, str, | 151 | regs->ip, regs->sp, error_code); |
149 | regs->ip, regs->sp, error_code); | ||
150 | print_vma_addr(" in ", regs->ip); | 152 | print_vma_addr(" in ", regs->ip); |
151 | printk("\n"); | 153 | pr_cont("\n"); |
152 | } | 154 | } |
153 | #endif | 155 | #endif |
154 | 156 | ||
@@ -269,12 +271,11 @@ do_general_protection(struct pt_regs *regs, long error_code) | |||
269 | 271 | ||
270 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | 272 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && |
271 | printk_ratelimit()) { | 273 | printk_ratelimit()) { |
272 | printk(KERN_INFO | 274 | pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx", |
273 | "%s[%d] general protection ip:%lx sp:%lx error:%lx", | ||
274 | tsk->comm, task_pid_nr(tsk), | 275 | tsk->comm, task_pid_nr(tsk), |
275 | regs->ip, regs->sp, error_code); | 276 | regs->ip, regs->sp, error_code); |
276 | print_vma_addr(" in ", regs->ip); | 277 | print_vma_addr(" in ", regs->ip); |
277 | printk("\n"); | 278 | pr_cont("\n"); |
278 | } | 279 | } |
279 | 280 | ||
280 | force_sig(SIGSEGV, tsk); | 281 | force_sig(SIGSEGV, tsk); |
@@ -303,8 +304,12 @@ gp_in_kernel: | |||
303 | dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) | 304 | dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) |
304 | { | 305 | { |
305 | #ifdef CONFIG_DYNAMIC_FTRACE | 306 | #ifdef CONFIG_DYNAMIC_FTRACE |
306 | /* ftrace must be first, everything else may cause a recursive crash */ | 307 | /* |
307 | if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs)) | 308 | * ftrace must be first, everything else may cause a recursive crash. |
309 | * See note by declaration of modifying_ftrace_code in ftrace.c | ||
310 | */ | ||
311 | if (unlikely(atomic_read(&modifying_ftrace_code)) && | ||
312 | ftrace_int3_handler(regs)) | ||
308 | return; | 313 | return; |
309 | #endif | 314 | #endif |
310 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | 315 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP |
@@ -566,7 +571,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) | |||
566 | conditional_sti(regs); | 571 | conditional_sti(regs); |
567 | #if 0 | 572 | #if 0 |
568 | /* No need to warn about this any longer. */ | 573 | /* No need to warn about this any longer. */ |
569 | printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); | 574 | pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); |
570 | #endif | 575 | #endif |
571 | } | 576 | } |
572 | 577 | ||
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index fc0a147e372..cfa5d4f7ca5 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -1,3 +1,5 @@ | |||
1 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
2 | |||
1 | #include <linux/kernel.h> | 3 | #include <linux/kernel.h> |
2 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
3 | #include <linux/init.h> | 5 | #include <linux/init.h> |
@@ -84,8 +86,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable); | |||
84 | #ifdef CONFIG_X86_TSC | 86 | #ifdef CONFIG_X86_TSC |
85 | int __init notsc_setup(char *str) | 87 | int __init notsc_setup(char *str) |
86 | { | 88 | { |
87 | printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " | 89 | pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n"); |
88 | "cannot disable TSC completely.\n"); | ||
89 | tsc_disabled = 1; | 90 | tsc_disabled = 1; |
90 | return 1; | 91 | return 1; |
91 | } | 92 | } |
@@ -373,7 +374,7 @@ static unsigned long quick_pit_calibrate(void) | |||
373 | goto success; | 374 | goto success; |
374 | } | 375 | } |
375 | } | 376 | } |
376 | printk("Fast TSC calibration failed\n"); | 377 | pr_err("Fast TSC calibration failed\n"); |
377 | return 0; | 378 | return 0; |
378 | 379 | ||
379 | success: | 380 | success: |
@@ -392,7 +393,7 @@ success: | |||
392 | */ | 393 | */ |
393 | delta *= PIT_TICK_RATE; | 394 | delta *= PIT_TICK_RATE; |
394 | do_div(delta, i*256*1000); | 395 | do_div(delta, i*256*1000); |
395 | printk("Fast TSC calibration using PIT\n"); | 396 | pr_info("Fast TSC calibration using PIT\n"); |
396 | return delta; | 397 | return delta; |
397 | } | 398 | } |
398 | 399 | ||
@@ -487,9 +488,8 @@ unsigned long native_calibrate_tsc(void) | |||
487 | * use the reference value, as it is more precise. | 488 | * use the reference value, as it is more precise. |
488 | */ | 489 | */ |
489 | if (delta >= 90 && delta <= 110) { | 490 | if (delta >= 90 && delta <= 110) { |
490 | printk(KERN_INFO | 491 | pr_info("PIT calibration matches %s. %d loops\n", |
491 | "TSC: PIT calibration matches %s. %d loops\n", | 492 | hpet ? "HPET" : "PMTIMER", i + 1); |
492 | hpet ? "HPET" : "PMTIMER", i + 1); | ||
493 | return tsc_ref_min; | 493 | return tsc_ref_min; |
494 | } | 494 | } |
495 | 495 | ||
@@ -511,38 +511,36 @@ unsigned long native_calibrate_tsc(void) | |||
511 | */ | 511 | */ |
512 | if (tsc_pit_min == ULONG_MAX) { | 512 | if (tsc_pit_min == ULONG_MAX) { |
513 | /* PIT gave no useful value */ | 513 | /* PIT gave no useful value */ |
514 | printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); | 514 | pr_warn("Unable to calibrate against PIT\n"); |
515 | 515 | ||
516 | /* We don't have an alternative source, disable TSC */ | 516 | /* We don't have an alternative source, disable TSC */ |
517 | if (!hpet && !ref1 && !ref2) { | 517 | if (!hpet && !ref1 && !ref2) { |
518 | printk("TSC: No reference (HPET/PMTIMER) available\n"); | 518 | pr_notice("No reference (HPET/PMTIMER) available\n"); |
519 | return 0; | 519 | return 0; |
520 | } | 520 | } |
521 | 521 | ||
522 | /* The alternative source failed as well, disable TSC */ | 522 | /* The alternative source failed as well, disable TSC */ |
523 | if (tsc_ref_min == ULONG_MAX) { | 523 | if (tsc_ref_min == ULONG_MAX) { |
524 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " | 524 | pr_warn("HPET/PMTIMER calibration failed\n"); |
525 | "failed.\n"); | ||
526 | return 0; | 525 | return 0; |
527 | } | 526 | } |
528 | 527 | ||
529 | /* Use the alternative source */ | 528 | /* Use the alternative source */ |
530 | printk(KERN_INFO "TSC: using %s reference calibration\n", | 529 | pr_info("using %s reference calibration\n", |
531 | hpet ? "HPET" : "PMTIMER"); | 530 | hpet ? "HPET" : "PMTIMER"); |
532 | 531 | ||
533 | return tsc_ref_min; | 532 | return tsc_ref_min; |
534 | } | 533 | } |
535 | 534 | ||
536 | /* We don't have an alternative source, use the PIT calibration value */ | 535 | /* We don't have an alternative source, use the PIT calibration value */ |
537 | if (!hpet && !ref1 && !ref2) { | 536 | if (!hpet && !ref1 && !ref2) { |
538 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); | 537 | pr_info("Using PIT calibration value\n"); |
539 | return tsc_pit_min; | 538 | return tsc_pit_min; |
540 | } | 539 | } |
541 | 540 | ||
542 | /* The alternative source failed, use the PIT calibration value */ | 541 | /* The alternative source failed, use the PIT calibration value */ |
543 | if (tsc_ref_min == ULONG_MAX) { | 542 | if (tsc_ref_min == ULONG_MAX) { |
544 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " | 543 | pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n"); |
545 | "Using PIT calibration\n"); | ||
546 | return tsc_pit_min; | 544 | return tsc_pit_min; |
547 | } | 545 | } |
548 | 546 | ||
@@ -551,9 +549,9 @@ unsigned long native_calibrate_tsc(void) | |||
551 | * the PIT value as we know that there are PMTIMERs around | 549 | * the PIT value as we know that there are PMTIMERs around |
552 | * running at double speed. At least we let the user know: | 550 | * running at double speed. At least we let the user know: |
553 | */ | 551 | */ |
554 | printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", | 552 | pr_warn("PIT calibration deviates from %s: %lu %lu\n", |
555 | hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); | 553 | hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); |
556 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); | 554 | pr_info("Using PIT calibration value\n"); |
557 | return tsc_pit_min; | 555 | return tsc_pit_min; |
558 | } | 556 | } |
559 | 557 | ||
@@ -785,7 +783,7 @@ void mark_tsc_unstable(char *reason) | |||
785 | tsc_unstable = 1; | 783 | tsc_unstable = 1; |
786 | sched_clock_stable = 0; | 784 | sched_clock_stable = 0; |
787 | disable_sched_clock_irqtime(); | 785 | disable_sched_clock_irqtime(); |
788 | printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); | 786 | pr_info("Marking TSC unstable due to %s\n", reason); |
789 | /* Change only the rating, when not registered */ | 787 | /* Change only the rating, when not registered */ |
790 | if (clocksource_tsc.mult) | 788 | if (clocksource_tsc.mult) |
791 | clocksource_mark_unstable(&clocksource_tsc); | 789 | clocksource_mark_unstable(&clocksource_tsc); |
@@ -912,9 +910,9 @@ static void tsc_refine_calibration_work(struct work_struct *work) | |||
912 | goto out; | 910 | goto out; |
913 | 911 | ||
914 | tsc_khz = freq; | 912 | tsc_khz = freq; |
915 | printk(KERN_INFO "Refined TSC clocksource calibration: " | 913 | pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n", |
916 | "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000, | 914 | (unsigned long)tsc_khz / 1000, |
917 | (unsigned long)tsc_khz % 1000); | 915 | (unsigned long)tsc_khz % 1000); |
918 | 916 | ||
919 | out: | 917 | out: |
920 | clocksource_register_khz(&clocksource_tsc, tsc_khz); | 918 | clocksource_register_khz(&clocksource_tsc, tsc_khz); |
@@ -970,9 +968,9 @@ void __init tsc_init(void) | |||
970 | return; | 968 | return; |
971 | } | 969 | } |
972 | 970 | ||
973 | printk("Detected %lu.%03lu MHz processor.\n", | 971 | pr_info("Detected %lu.%03lu MHz processor\n", |
974 | (unsigned long)cpu_khz / 1000, | 972 | (unsigned long)cpu_khz / 1000, |
975 | (unsigned long)cpu_khz % 1000); | 973 | (unsigned long)cpu_khz % 1000); |
976 | 974 | ||
977 | /* | 975 | /* |
978 | * Secondary CPUs do not run through tsc_init(), so set up | 976 | * Secondary CPUs do not run through tsc_init(), so set up |
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c new file mode 100644 index 00000000000..36fd42091fa --- /dev/null +++ b/arch/x86/kernel/uprobes.c | |||
@@ -0,0 +1,675 @@ | |||
1 | /* | ||
2 | * User-space Probes (UProbes) for x86 | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2008-2011 | ||
19 | * Authors: | ||
20 | * Srikar Dronamraju | ||
21 | * Jim Keniston | ||
22 | */ | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/sched.h> | ||
25 | #include <linux/ptrace.h> | ||
26 | #include <linux/uprobes.h> | ||
27 | #include <linux/uaccess.h> | ||
28 | |||
29 | #include <linux/kdebug.h> | ||
30 | #include <asm/processor.h> | ||
31 | #include <asm/insn.h> | ||
32 | |||
33 | /* Post-execution fixups. */ | ||
34 | |||
35 | /* No fixup needed */ | ||
36 | #define UPROBE_FIX_NONE 0x0 | ||
37 | |||
38 | /* Adjust IP back to vicinity of actual insn */ | ||
39 | #define UPROBE_FIX_IP 0x1 | ||
40 | |||
41 | /* Adjust the return address of a call insn */ | ||
42 | #define UPROBE_FIX_CALL 0x2 | ||
43 | |||
44 | #define UPROBE_FIX_RIP_AX 0x8000 | ||
45 | #define UPROBE_FIX_RIP_CX 0x4000 | ||
46 | |||
47 | #define UPROBE_TRAP_NR UINT_MAX | ||
48 | |||
49 | /* Adaptations for mhiramat x86 decoder v14. */ | ||
50 | #define OPCODE1(insn) ((insn)->opcode.bytes[0]) | ||
51 | #define OPCODE2(insn) ((insn)->opcode.bytes[1]) | ||
52 | #define OPCODE3(insn) ((insn)->opcode.bytes[2]) | ||
53 | #define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value) | ||
54 | |||
55 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ | ||
56 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ | ||
57 | (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ | ||
58 | (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ | ||
59 | (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ | ||
60 | << (row % 32)) | ||
61 | |||
62 | /* | ||
63 | * Good-instruction tables for 32-bit apps. This is non-const and volatile | ||
64 | * to keep gcc from statically optimizing it out, as variable_test_bit makes | ||
65 | * some versions of gcc to think only *(unsigned long*) is used. | ||
66 | */ | ||
67 | static volatile u32 good_insns_32[256 / 32] = { | ||
68 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
69 | /* ---------------------------------------------- */ | ||
70 | W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */ | ||
71 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */ | ||
72 | W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */ | ||
73 | W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */ | ||
74 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | ||
75 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | ||
76 | W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ | ||
77 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ | ||
78 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
79 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | ||
80 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ | ||
81 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | ||
82 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ | ||
83 | W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
84 | W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ | ||
85 | W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ | ||
86 | /* ---------------------------------------------- */ | ||
87 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
88 | }; | ||
89 | |||
90 | /* Using this for both 64-bit and 32-bit apps */ | ||
91 | static volatile u32 good_2byte_insns[256 / 32] = { | ||
92 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
93 | /* ---------------------------------------------- */ | ||
94 | W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ | ||
95 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ | ||
96 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ | ||
97 | W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ | ||
98 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | ||
99 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | ||
100 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ | ||
101 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ | ||
102 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
103 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | ||
104 | W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ | ||
105 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | ||
106 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ | ||
107 | W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
108 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ | ||
109 | W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ | ||
110 | /* ---------------------------------------------- */ | ||
111 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
112 | }; | ||
113 | |||
114 | #ifdef CONFIG_X86_64 | ||
115 | /* Good-instruction tables for 64-bit apps */ | ||
116 | static volatile u32 good_insns_64[256 / 32] = { | ||
117 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
118 | /* ---------------------------------------------- */ | ||
119 | W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */ | ||
120 | W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */ | ||
121 | W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */ | ||
122 | W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */ | ||
123 | W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ | ||
124 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | ||
125 | W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ | ||
126 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ | ||
127 | W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
128 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | ||
129 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ | ||
130 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | ||
131 | W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ | ||
132 | W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
133 | W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ | ||
134 | W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ | ||
135 | /* ---------------------------------------------- */ | ||
136 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
137 | }; | ||
138 | #endif | ||
139 | #undef W | ||
140 | |||
141 | /* | ||
142 | * opcodes we'll probably never support: | ||
143 | * | ||
144 | * 6c-6d, e4-e5, ec-ed - in | ||
145 | * 6e-6f, e6-e7, ee-ef - out | ||
146 | * cc, cd - int3, int | ||
147 | * cf - iret | ||
148 | * d6 - illegal instruction | ||
149 | * f1 - int1/icebp | ||
150 | * f4 - hlt | ||
151 | * fa, fb - cli, sti | ||
152 | * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2 | ||
153 | * | ||
154 | * invalid opcodes in 64-bit mode: | ||
155 | * | ||
156 | * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5 | ||
157 | * 63 - we support this opcode in x86_64 but not in i386. | ||
158 | * | ||
159 | * opcodes we may need to refine support for: | ||
160 | * | ||
161 | * 0f - 2-byte instructions: For many of these instructions, the validity | ||
162 | * depends on the prefix and/or the reg field. On such instructions, we | ||
163 | * just consider the opcode combination valid if it corresponds to any | ||
164 | * valid instruction. | ||
165 | * | ||
166 | * 8f - Group 1 - only reg = 0 is OK | ||
167 | * c6-c7 - Group 11 - only reg = 0 is OK | ||
168 | * d9-df - fpu insns with some illegal encodings | ||
169 | * f2, f3 - repnz, repz prefixes. These are also the first byte for | ||
170 | * certain floating-point instructions, such as addsd. | ||
171 | * | ||
172 | * fe - Group 4 - only reg = 0 or 1 is OK | ||
173 | * ff - Group 5 - only reg = 0-6 is OK | ||
174 | * | ||
175 | * others -- Do we need to support these? | ||
176 | * | ||
177 | * 0f - (floating-point?) prefetch instructions | ||
178 | * 07, 17, 1f - pop es, pop ss, pop ds | ||
179 | * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes -- | ||
180 | * but 64 and 65 (fs: and gs:) seem to be used, so we support them | ||
181 | * 67 - addr16 prefix | ||
182 | * ce - into | ||
183 | * f0 - lock prefix | ||
184 | */ | ||
185 | |||
186 | /* | ||
187 | * TODO: | ||
188 | * - Where necessary, examine the modrm byte and allow only valid instructions | ||
189 | * in the different Groups and fpu instructions. | ||
190 | */ | ||
191 | |||
192 | static bool is_prefix_bad(struct insn *insn) | ||
193 | { | ||
194 | int i; | ||
195 | |||
196 | for (i = 0; i < insn->prefixes.nbytes; i++) { | ||
197 | switch (insn->prefixes.bytes[i]) { | ||
198 | case 0x26: /* INAT_PFX_ES */ | ||
199 | case 0x2E: /* INAT_PFX_CS */ | ||
200 | case 0x36: /* INAT_PFX_DS */ | ||
201 | case 0x3E: /* INAT_PFX_SS */ | ||
202 | case 0xF0: /* INAT_PFX_LOCK */ | ||
203 | return true; | ||
204 | } | ||
205 | } | ||
206 | return false; | ||
207 | } | ||
208 | |||
209 | static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) | ||
210 | { | ||
211 | insn_init(insn, auprobe->insn, false); | ||
212 | |||
213 | /* Skip good instruction prefixes; reject "bad" ones. */ | ||
214 | insn_get_opcode(insn); | ||
215 | if (is_prefix_bad(insn)) | ||
216 | return -ENOTSUPP; | ||
217 | |||
218 | if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32)) | ||
219 | return 0; | ||
220 | |||
221 | if (insn->opcode.nbytes == 2) { | ||
222 | if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) | ||
223 | return 0; | ||
224 | } | ||
225 | |||
226 | return -ENOTSUPP; | ||
227 | } | ||
228 | |||
229 | /* | ||
230 | * Figure out which fixups arch_uprobe_post_xol() will need to perform, and | ||
231 | * annotate arch_uprobe->fixups accordingly. To start with, | ||
232 | * arch_uprobe->fixups is either zero or it reflects rip-related fixups. | ||
233 | */ | ||
234 | static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) | ||
235 | { | ||
236 | bool fix_ip = true, fix_call = false; /* defaults */ | ||
237 | int reg; | ||
238 | |||
239 | insn_get_opcode(insn); /* should be a nop */ | ||
240 | |||
241 | switch (OPCODE1(insn)) { | ||
242 | case 0xc3: /* ret/lret */ | ||
243 | case 0xcb: | ||
244 | case 0xc2: | ||
245 | case 0xca: | ||
246 | /* ip is correct */ | ||
247 | fix_ip = false; | ||
248 | break; | ||
249 | case 0xe8: /* call relative - Fix return addr */ | ||
250 | fix_call = true; | ||
251 | break; | ||
252 | case 0x9a: /* call absolute - Fix return addr, not ip */ | ||
253 | fix_call = true; | ||
254 | fix_ip = false; | ||
255 | break; | ||
256 | case 0xff: | ||
257 | insn_get_modrm(insn); | ||
258 | reg = MODRM_REG(insn); | ||
259 | if (reg == 2 || reg == 3) { | ||
260 | /* call or lcall, indirect */ | ||
261 | /* Fix return addr; ip is correct. */ | ||
262 | fix_call = true; | ||
263 | fix_ip = false; | ||
264 | } else if (reg == 4 || reg == 5) { | ||
265 | /* jmp or ljmp, indirect */ | ||
266 | /* ip is correct. */ | ||
267 | fix_ip = false; | ||
268 | } | ||
269 | break; | ||
270 | case 0xea: /* jmp absolute -- ip is correct */ | ||
271 | fix_ip = false; | ||
272 | break; | ||
273 | default: | ||
274 | break; | ||
275 | } | ||
276 | if (fix_ip) | ||
277 | auprobe->fixups |= UPROBE_FIX_IP; | ||
278 | if (fix_call) | ||
279 | auprobe->fixups |= UPROBE_FIX_CALL; | ||
280 | } | ||
281 | |||
282 | #ifdef CONFIG_X86_64 | ||
283 | /* | ||
284 | * If arch_uprobe->insn doesn't use rip-relative addressing, return | ||
285 | * immediately. Otherwise, rewrite the instruction so that it accesses | ||
286 | * its memory operand indirectly through a scratch register. Set | ||
287 | * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address | ||
288 | * accordingly. (The contents of the scratch register will be saved | ||
289 | * before we single-step the modified instruction, and restored | ||
290 | * afterward.) | ||
291 | * | ||
292 | * We do this because a rip-relative instruction can access only a | ||
293 | * relatively small area (+/- 2 GB from the instruction), and the XOL | ||
294 | * area typically lies beyond that area. At least for instructions | ||
295 | * that store to memory, we can't execute the original instruction | ||
296 | * and "fix things up" later, because the misdirected store could be | ||
297 | * disastrous. | ||
298 | * | ||
299 | * Some useful facts about rip-relative instructions: | ||
300 | * | ||
301 | * - There's always a modrm byte. | ||
302 | * - There's never a SIB byte. | ||
303 | * - The displacement is always 4 bytes. | ||
304 | */ | ||
305 | static void | ||
306 | handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
307 | { | ||
308 | u8 *cursor; | ||
309 | u8 reg; | ||
310 | |||
311 | if (mm->context.ia32_compat) | ||
312 | return; | ||
313 | |||
314 | auprobe->rip_rela_target_address = 0x0; | ||
315 | if (!insn_rip_relative(insn)) | ||
316 | return; | ||
317 | |||
318 | /* | ||
319 | * insn_rip_relative() would have decoded rex_prefix, modrm. | ||
320 | * Clear REX.b bit (extension of MODRM.rm field): | ||
321 | * we want to encode rax/rcx, not r8/r9. | ||
322 | */ | ||
323 | if (insn->rex_prefix.nbytes) { | ||
324 | cursor = auprobe->insn + insn_offset_rex_prefix(insn); | ||
325 | *cursor &= 0xfe; /* Clearing REX.B bit */ | ||
326 | } | ||
327 | |||
328 | /* | ||
329 | * Point cursor at the modrm byte. The next 4 bytes are the | ||
330 | * displacement. Beyond the displacement, for some instructions, | ||
331 | * is the immediate operand. | ||
332 | */ | ||
333 | cursor = auprobe->insn + insn_offset_modrm(insn); | ||
334 | insn_get_length(insn); | ||
335 | |||
336 | /* | ||
337 | * Convert from rip-relative addressing to indirect addressing | ||
338 | * via a scratch register. Change the r/m field from 0x5 (%rip) | ||
339 | * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field. | ||
340 | */ | ||
341 | reg = MODRM_REG(insn); | ||
342 | if (reg == 0) { | ||
343 | /* | ||
344 | * The register operand (if any) is either the A register | ||
345 | * (%rax, %eax, etc.) or (if the 0x4 bit is set in the | ||
346 | * REX prefix) %r8. In any case, we know the C register | ||
347 | * is NOT the register operand, so we use %rcx (register | ||
348 | * #1) for the scratch register. | ||
349 | */ | ||
350 | auprobe->fixups = UPROBE_FIX_RIP_CX; | ||
351 | /* Change modrm from 00 000 101 to 00 000 001. */ | ||
352 | *cursor = 0x1; | ||
353 | } else { | ||
354 | /* Use %rax (register #0) for the scratch register. */ | ||
355 | auprobe->fixups = UPROBE_FIX_RIP_AX; | ||
356 | /* Change modrm from 00 xxx 101 to 00 xxx 000 */ | ||
357 | *cursor = (reg << 3); | ||
358 | } | ||
359 | |||
360 | /* Target address = address of next instruction + (signed) offset */ | ||
361 | auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value; | ||
362 | |||
363 | /* Displacement field is gone; slide immediate field (if any) over. */ | ||
364 | if (insn->immediate.nbytes) { | ||
365 | cursor++; | ||
366 | memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); | ||
367 | } | ||
368 | return; | ||
369 | } | ||
370 | |||
371 | static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) | ||
372 | { | ||
373 | insn_init(insn, auprobe->insn, true); | ||
374 | |||
375 | /* Skip good instruction prefixes; reject "bad" ones. */ | ||
376 | insn_get_opcode(insn); | ||
377 | if (is_prefix_bad(insn)) | ||
378 | return -ENOTSUPP; | ||
379 | |||
380 | if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64)) | ||
381 | return 0; | ||
382 | |||
383 | if (insn->opcode.nbytes == 2) { | ||
384 | if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) | ||
385 | return 0; | ||
386 | } | ||
387 | return -ENOTSUPP; | ||
388 | } | ||
389 | |||
390 | static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
391 | { | ||
392 | if (mm->context.ia32_compat) | ||
393 | return validate_insn_32bits(auprobe, insn); | ||
394 | return validate_insn_64bits(auprobe, insn); | ||
395 | } | ||
396 | #else /* 32-bit: */ | ||
397 | static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
398 | { | ||
399 | /* No RIP-relative addressing on 32-bit */ | ||
400 | } | ||
401 | |||
402 | static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
403 | { | ||
404 | return validate_insn_32bits(auprobe, insn); | ||
405 | } | ||
406 | #endif /* CONFIG_X86_64 */ | ||
407 | |||
408 | /** | ||
409 | * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. | ||
410 | * @mm: the probed address space. | ||
411 | * @arch_uprobe: the probepoint information. | ||
412 | * @addr: virtual address at which to install the probepoint | ||
413 | * Return 0 on success or a -ve number on error. | ||
414 | */ | ||
415 | int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) | ||
416 | { | ||
417 | int ret; | ||
418 | struct insn insn; | ||
419 | |||
420 | auprobe->fixups = 0; | ||
421 | ret = validate_insn_bits(auprobe, mm, &insn); | ||
422 | if (ret != 0) | ||
423 | return ret; | ||
424 | |||
425 | handle_riprel_insn(auprobe, mm, &insn); | ||
426 | prepare_fixups(auprobe, &insn); | ||
427 | |||
428 | return 0; | ||
429 | } | ||
430 | |||
431 | #ifdef CONFIG_X86_64 | ||
432 | /* | ||
433 | * If we're emulating a rip-relative instruction, save the contents | ||
434 | * of the scratch register and store the target address in that register. | ||
435 | */ | ||
436 | static void | ||
437 | pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, | ||
438 | struct arch_uprobe_task *autask) | ||
439 | { | ||
440 | if (auprobe->fixups & UPROBE_FIX_RIP_AX) { | ||
441 | autask->saved_scratch_register = regs->ax; | ||
442 | regs->ax = current->utask->vaddr; | ||
443 | regs->ax += auprobe->rip_rela_target_address; | ||
444 | } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { | ||
445 | autask->saved_scratch_register = regs->cx; | ||
446 | regs->cx = current->utask->vaddr; | ||
447 | regs->cx += auprobe->rip_rela_target_address; | ||
448 | } | ||
449 | } | ||
450 | #else | ||
451 | static void | ||
452 | pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, | ||
453 | struct arch_uprobe_task *autask) | ||
454 | { | ||
455 | /* No RIP-relative addressing on 32-bit */ | ||
456 | } | ||
457 | #endif | ||
458 | |||
459 | /* | ||
460 | * arch_uprobe_pre_xol - prepare to execute out of line. | ||
461 | * @auprobe: the probepoint information. | ||
462 | * @regs: reflects the saved user state of current task. | ||
463 | */ | ||
464 | int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | ||
465 | { | ||
466 | struct arch_uprobe_task *autask; | ||
467 | |||
468 | autask = ¤t->utask->autask; | ||
469 | autask->saved_trap_nr = current->thread.trap_nr; | ||
470 | current->thread.trap_nr = UPROBE_TRAP_NR; | ||
471 | regs->ip = current->utask->xol_vaddr; | ||
472 | pre_xol_rip_insn(auprobe, regs, autask); | ||
473 | |||
474 | return 0; | ||
475 | } | ||
476 | |||
477 | /* | ||
478 | * This function is called by arch_uprobe_post_xol() to adjust the return | ||
479 | * address pushed by a call instruction executed out of line. | ||
480 | */ | ||
481 | static int adjust_ret_addr(unsigned long sp, long correction) | ||
482 | { | ||
483 | int rasize, ncopied; | ||
484 | long ra = 0; | ||
485 | |||
486 | if (is_ia32_task()) | ||
487 | rasize = 4; | ||
488 | else | ||
489 | rasize = 8; | ||
490 | |||
491 | ncopied = copy_from_user(&ra, (void __user *)sp, rasize); | ||
492 | if (unlikely(ncopied)) | ||
493 | return -EFAULT; | ||
494 | |||
495 | ra += correction; | ||
496 | ncopied = copy_to_user((void __user *)sp, &ra, rasize); | ||
497 | if (unlikely(ncopied)) | ||
498 | return -EFAULT; | ||
499 | |||
500 | return 0; | ||
501 | } | ||
502 | |||
503 | #ifdef CONFIG_X86_64 | ||
504 | static bool is_riprel_insn(struct arch_uprobe *auprobe) | ||
505 | { | ||
506 | return ((auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) != 0); | ||
507 | } | ||
508 | |||
509 | static void | ||
510 | handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) | ||
511 | { | ||
512 | if (is_riprel_insn(auprobe)) { | ||
513 | struct arch_uprobe_task *autask; | ||
514 | |||
515 | autask = ¤t->utask->autask; | ||
516 | if (auprobe->fixups & UPROBE_FIX_RIP_AX) | ||
517 | regs->ax = autask->saved_scratch_register; | ||
518 | else | ||
519 | regs->cx = autask->saved_scratch_register; | ||
520 | |||
521 | /* | ||
522 | * The original instruction includes a displacement, and so | ||
523 | * is 4 bytes longer than what we've just single-stepped. | ||
524 | * Fall through to handle stuff like "jmpq *...(%rip)" and | ||
525 | * "callq *...(%rip)". | ||
526 | */ | ||
527 | if (correction) | ||
528 | *correction += 4; | ||
529 | } | ||
530 | } | ||
531 | #else | ||
532 | static void | ||
533 | handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) | ||
534 | { | ||
535 | /* No RIP-relative addressing on 32-bit */ | ||
536 | } | ||
537 | #endif | ||
538 | |||
539 | /* | ||
540 | * If xol insn itself traps and generates a signal(Say, | ||
541 | * SIGILL/SIGSEGV/etc), then detect the case where a singlestepped | ||
542 | * instruction jumps back to its own address. It is assumed that anything | ||
543 | * like do_page_fault/do_trap/etc sets thread.trap_nr != -1. | ||
544 | * | ||
545 | * arch_uprobe_pre_xol/arch_uprobe_post_xol save/restore thread.trap_nr, | ||
546 | * arch_uprobe_xol_was_trapped() simply checks that ->trap_nr is not equal to | ||
547 | * UPROBE_TRAP_NR == -1 set by arch_uprobe_pre_xol(). | ||
548 | */ | ||
549 | bool arch_uprobe_xol_was_trapped(struct task_struct *t) | ||
550 | { | ||
551 | if (t->thread.trap_nr != UPROBE_TRAP_NR) | ||
552 | return true; | ||
553 | |||
554 | return false; | ||
555 | } | ||
556 | |||
557 | /* | ||
558 | * Called after single-stepping. To avoid the SMP problems that can | ||
559 | * occur when we temporarily put back the original opcode to | ||
560 | * single-step, we single-stepped a copy of the instruction. | ||
561 | * | ||
562 | * This function prepares to resume execution after the single-step. | ||
563 | * We have to fix things up as follows: | ||
564 | * | ||
565 | * Typically, the new ip is relative to the copied instruction. We need | ||
566 | * to make it relative to the original instruction (FIX_IP). Exceptions | ||
567 | * are return instructions and absolute or indirect jump or call instructions. | ||
568 | * | ||
569 | * If the single-stepped instruction was a call, the return address that | ||
570 | * is atop the stack is the address following the copied instruction. We | ||
571 | * need to make it the address following the original instruction (FIX_CALL). | ||
572 | * | ||
573 | * If the original instruction was a rip-relative instruction such as | ||
574 | * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent | ||
575 | * instruction using a scratch register -- e.g., "movl %edx,(%rax)". | ||
576 | * We need to restore the contents of the scratch register and adjust | ||
577 | * the ip, keeping in mind that the instruction we executed is 4 bytes | ||
578 | * shorter than the original instruction (since we squeezed out the offset | ||
579 | * field). (FIX_RIP_AX or FIX_RIP_CX) | ||
580 | */ | ||
581 | int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | ||
582 | { | ||
583 | struct uprobe_task *utask; | ||
584 | long correction; | ||
585 | int result = 0; | ||
586 | |||
587 | WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); | ||
588 | |||
589 | utask = current->utask; | ||
590 | current->thread.trap_nr = utask->autask.saved_trap_nr; | ||
591 | correction = (long)(utask->vaddr - utask->xol_vaddr); | ||
592 | handle_riprel_post_xol(auprobe, regs, &correction); | ||
593 | if (auprobe->fixups & UPROBE_FIX_IP) | ||
594 | regs->ip += correction; | ||
595 | |||
596 | if (auprobe->fixups & UPROBE_FIX_CALL) | ||
597 | result = adjust_ret_addr(regs->sp, correction); | ||
598 | |||
599 | return result; | ||
600 | } | ||
601 | |||
602 | /* callback routine for handling exceptions. */ | ||
603 | int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data) | ||
604 | { | ||
605 | struct die_args *args = data; | ||
606 | struct pt_regs *regs = args->regs; | ||
607 | int ret = NOTIFY_DONE; | ||
608 | |||
609 | /* We are only interested in userspace traps */ | ||
610 | if (regs && !user_mode_vm(regs)) | ||
611 | return NOTIFY_DONE; | ||
612 | |||
613 | switch (val) { | ||
614 | case DIE_INT3: | ||
615 | if (uprobe_pre_sstep_notifier(regs)) | ||
616 | ret = NOTIFY_STOP; | ||
617 | |||
618 | break; | ||
619 | |||
620 | case DIE_DEBUG: | ||
621 | if (uprobe_post_sstep_notifier(regs)) | ||
622 | ret = NOTIFY_STOP; | ||
623 | |||
624 | default: | ||
625 | break; | ||
626 | } | ||
627 | |||
628 | return ret; | ||
629 | } | ||
630 | |||
631 | /* | ||
632 | * This function gets called when XOL instruction either gets trapped or | ||
633 | * the thread has a fatal signal, so reset the instruction pointer to its | ||
634 | * probed address. | ||
635 | */ | ||
636 | void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | ||
637 | { | ||
638 | struct uprobe_task *utask = current->utask; | ||
639 | |||
640 | current->thread.trap_nr = utask->autask.saved_trap_nr; | ||
641 | handle_riprel_post_xol(auprobe, regs, NULL); | ||
642 | instruction_pointer_set(regs, utask->vaddr); | ||
643 | } | ||
644 | |||
645 | /* | ||
646 | * Skip these instructions as per the currently known x86 ISA. | ||
647 | * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } | ||
648 | */ | ||
649 | bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) | ||
650 | { | ||
651 | int i; | ||
652 | |||
653 | for (i = 0; i < MAX_UINSN_BYTES; i++) { | ||
654 | if ((auprobe->insn[i] == 0x66)) | ||
655 | continue; | ||
656 | |||
657 | if (auprobe->insn[i] == 0x90) | ||
658 | return true; | ||
659 | |||
660 | if (i == (MAX_UINSN_BYTES - 1)) | ||
661 | break; | ||
662 | |||
663 | if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x1f)) | ||
664 | return true; | ||
665 | |||
666 | if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x19)) | ||
667 | return true; | ||
668 | |||
669 | if ((auprobe->insn[i] == 0x87) && (auprobe->insn[i+1] == 0xc0)) | ||
670 | return true; | ||
671 | |||
672 | break; | ||
673 | } | ||
674 | return false; | ||
675 | } | ||
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 255f58ae71e..54abcc0baf2 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -28,6 +28,8 @@ | |||
28 | * | 28 | * |
29 | */ | 29 | */ |
30 | 30 | ||
31 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
32 | |||
31 | #include <linux/capability.h> | 33 | #include <linux/capability.h> |
32 | #include <linux/errno.h> | 34 | #include <linux/errno.h> |
33 | #include <linux/interrupt.h> | 35 | #include <linux/interrupt.h> |
@@ -137,14 +139,14 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) | |||
137 | local_irq_enable(); | 139 | local_irq_enable(); |
138 | 140 | ||
139 | if (!current->thread.vm86_info) { | 141 | if (!current->thread.vm86_info) { |
140 | printk("no vm86_info: BAD\n"); | 142 | pr_alert("no vm86_info: BAD\n"); |
141 | do_exit(SIGSEGV); | 143 | do_exit(SIGSEGV); |
142 | } | 144 | } |
143 | set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask); | 145 | set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask); |
144 | tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs); | 146 | tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs); |
145 | tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap); | 147 | tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap); |
146 | if (tmp) { | 148 | if (tmp) { |
147 | printk("vm86: could not access userspace vm86_info\n"); | 149 | pr_alert("could not access userspace vm86_info\n"); |
148 | do_exit(SIGSEGV); | 150 | do_exit(SIGSEGV); |
149 | } | 151 | } |
150 | 152 | ||
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0f703f10901..22a1530146a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -197,18 +197,6 @@ SECTIONS | |||
197 | 197 | ||
198 | INIT_DATA_SECTION(16) | 198 | INIT_DATA_SECTION(16) |
199 | 199 | ||
200 | /* | ||
201 | * Code and data for a variety of lowlevel trampolines, to be | ||
202 | * copied into base memory (< 1 MiB) during initialization. | ||
203 | * Since it is copied early, the main copy can be discarded | ||
204 | * afterwards. | ||
205 | */ | ||
206 | .x86_trampoline : AT(ADDR(.x86_trampoline) - LOAD_OFFSET) { | ||
207 | x86_trampoline_start = .; | ||
208 | *(.x86_trampoline) | ||
209 | x86_trampoline_end = .; | ||
210 | } | ||
211 | |||
212 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { | 200 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { |
213 | __x86_cpu_dev_start = .; | 201 | __x86_cpu_dev_start = .; |
214 | *(.x86_cpu_dev.init) | 202 | *(.x86_cpu_dev.init) |
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 8eeb55a551b..992f890283e 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/pci_ids.h> | 16 | #include <linux/pci_ids.h> |
17 | #include <linux/pci_regs.h> | 17 | #include <linux/pci_regs.h> |
18 | #include <linux/smp.h> | 18 | #include <linux/smp.h> |
19 | #include <linux/irq.h> | ||
19 | 20 | ||
20 | #include <asm/apic.h> | 21 | #include <asm/apic.h> |
21 | #include <asm/pci-direct.h> | 22 | #include <asm/pci-direct.h> |
@@ -95,6 +96,18 @@ static void __init set_vsmp_pv_ops(void) | |||
95 | ctl = readl(address + 4); | 96 | ctl = readl(address + 4); |
96 | printk(KERN_INFO "vSMP CTL: capabilities:0x%08x control:0x%08x\n", | 97 | printk(KERN_INFO "vSMP CTL: capabilities:0x%08x control:0x%08x\n", |
97 | cap, ctl); | 98 | cap, ctl); |
99 | |||
100 | /* If possible, let the vSMP foundation route the interrupt optimally */ | ||
101 | #ifdef CONFIG_SMP | ||
102 | if (cap & ctl & BIT(8)) { | ||
103 | ctl &= ~BIT(8); | ||
104 | #ifdef CONFIG_PROC_FS | ||
105 | /* Don't let users change irq affinity via procfs */ | ||
106 | no_irq_affinity = 1; | ||
107 | #endif | ||
108 | } | ||
109 | #endif | ||
110 | |||
98 | if (cap & ctl & (1 << 4)) { | 111 | if (cap & ctl & (1 << 4)) { |
99 | /* Setup irq ops and turn on vSMP IRQ fastpath handling */ | 112 | /* Setup irq ops and turn on vSMP IRQ fastpath handling */ |
100 | pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); | 113 | pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); |
@@ -102,12 +115,11 @@ static void __init set_vsmp_pv_ops(void) | |||
102 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); | 115 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); |
103 | pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); | 116 | pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); |
104 | pv_init_ops.patch = vsmp_patch; | 117 | pv_init_ops.patch = vsmp_patch; |
105 | |||
106 | ctl &= ~(1 << 4); | 118 | ctl &= ~(1 << 4); |
107 | writel(ctl, address + 4); | ||
108 | ctl = readl(address + 4); | ||
109 | printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl); | ||
110 | } | 119 | } |
120 | writel(ctl, address + 4); | ||
121 | ctl = readl(address + 4); | ||
122 | pr_info("vSMP CTL: control set to:0x%08x\n", ctl); | ||
111 | 123 | ||
112 | early_iounmap(address, 8); | 124 | early_iounmap(address, 8); |
113 | } | 125 | } |
@@ -187,12 +199,36 @@ static void __init vsmp_cap_cpus(void) | |||
187 | #endif | 199 | #endif |
188 | } | 200 | } |
189 | 201 | ||
202 | static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) | ||
203 | { | ||
204 | return hard_smp_processor_id() >> index_msb; | ||
205 | } | ||
206 | |||
207 | /* | ||
208 | * In vSMP, all cpus should be capable of handling interrupts, regardless of | ||
209 | * the APIC used. | ||
210 | */ | ||
211 | static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask, | ||
212 | const struct cpumask *mask) | ||
213 | { | ||
214 | cpumask_setall(retmask); | ||
215 | } | ||
216 | |||
217 | static void vsmp_apic_post_init(void) | ||
218 | { | ||
219 | /* need to update phys_pkg_id */ | ||
220 | apic->phys_pkg_id = apicid_phys_pkg_id; | ||
221 | apic->vector_allocation_domain = fill_vector_allocation_domain; | ||
222 | } | ||
223 | |||
190 | void __init vsmp_init(void) | 224 | void __init vsmp_init(void) |
191 | { | 225 | { |
192 | detect_vsmp_box(); | 226 | detect_vsmp_box(); |
193 | if (!is_vsmp_box()) | 227 | if (!is_vsmp_box()) |
194 | return; | 228 | return; |
195 | 229 | ||
230 | x86_platform.apic_post_init = vsmp_apic_post_init; | ||
231 | |||
196 | vsmp_cap_cpus(); | 232 | vsmp_cap_cpus(); |
197 | 233 | ||
198 | set_vsmp_pv_ops(); | 234 | set_vsmp_pv_ops(); |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 7515cf0e180..8d141b30904 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -18,6 +18,8 @@ | |||
18 | * use the vDSO. | 18 | * use the vDSO. |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
22 | |||
21 | #include <linux/time.h> | 23 | #include <linux/time.h> |
22 | #include <linux/init.h> | 24 | #include <linux/init.h> |
23 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
@@ -111,18 +113,13 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, | |||
111 | static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, | 113 | static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, |
112 | const char *message) | 114 | const char *message) |
113 | { | 115 | { |
114 | static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); | 116 | if (!show_unhandled_signals) |
115 | struct task_struct *tsk; | ||
116 | |||
117 | if (!show_unhandled_signals || !__ratelimit(&rs)) | ||
118 | return; | 117 | return; |
119 | 118 | ||
120 | tsk = current; | 119 | pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", |
121 | 120 | level, current->comm, task_pid_nr(current), | |
122 | printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", | 121 | message, regs->ip, regs->cs, |
123 | level, tsk->comm, task_pid_nr(tsk), | 122 | regs->sp, regs->ax, regs->si, regs->di); |
124 | message, regs->ip, regs->cs, | ||
125 | regs->sp, regs->ax, regs->si, regs->di); | ||
126 | } | 123 | } |
127 | 124 | ||
128 | static int addr_to_vsyscall_nr(unsigned long addr) | 125 | static int addr_to_vsyscall_nr(unsigned long addr) |
@@ -139,6 +136,19 @@ static int addr_to_vsyscall_nr(unsigned long addr) | |||
139 | return nr; | 136 | return nr; |
140 | } | 137 | } |
141 | 138 | ||
139 | #ifdef CONFIG_SECCOMP | ||
140 | static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr) | ||
141 | { | ||
142 | if (!seccomp_mode(&tsk->seccomp)) | ||
143 | return 0; | ||
144 | task_pt_regs(tsk)->orig_ax = syscall_nr; | ||
145 | task_pt_regs(tsk)->ax = syscall_nr; | ||
146 | return __secure_computing(syscall_nr); | ||
147 | } | ||
148 | #else | ||
149 | #define vsyscall_seccomp(_tsk, _nr) 0 | ||
150 | #endif | ||
151 | |||
142 | static bool write_ok_or_segv(unsigned long ptr, size_t size) | 152 | static bool write_ok_or_segv(unsigned long ptr, size_t size) |
143 | { | 153 | { |
144 | /* | 154 | /* |
@@ -174,6 +184,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
174 | int vsyscall_nr; | 184 | int vsyscall_nr; |
175 | int prev_sig_on_uaccess_error; | 185 | int prev_sig_on_uaccess_error; |
176 | long ret; | 186 | long ret; |
187 | int skip; | ||
177 | 188 | ||
178 | /* | 189 | /* |
179 | * No point in checking CS -- the only way to get here is a user mode | 190 | * No point in checking CS -- the only way to get here is a user mode |
@@ -205,9 +216,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
205 | } | 216 | } |
206 | 217 | ||
207 | tsk = current; | 218 | tsk = current; |
208 | if (seccomp_mode(&tsk->seccomp)) | ||
209 | do_exit(SIGKILL); | ||
210 | |||
211 | /* | 219 | /* |
212 | * With a real vsyscall, page faults cause SIGSEGV. We want to | 220 | * With a real vsyscall, page faults cause SIGSEGV. We want to |
213 | * preserve that behavior to make writing exploits harder. | 221 | * preserve that behavior to make writing exploits harder. |
@@ -222,8 +230,13 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
222 | * address 0". | 230 | * address 0". |
223 | */ | 231 | */ |
224 | ret = -EFAULT; | 232 | ret = -EFAULT; |
233 | skip = 0; | ||
225 | switch (vsyscall_nr) { | 234 | switch (vsyscall_nr) { |
226 | case 0: | 235 | case 0: |
236 | skip = vsyscall_seccomp(tsk, __NR_gettimeofday); | ||
237 | if (skip) | ||
238 | break; | ||
239 | |||
227 | if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || | 240 | if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || |
228 | !write_ok_or_segv(regs->si, sizeof(struct timezone))) | 241 | !write_ok_or_segv(regs->si, sizeof(struct timezone))) |
229 | break; | 242 | break; |
@@ -234,6 +247,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
234 | break; | 247 | break; |
235 | 248 | ||
236 | case 1: | 249 | case 1: |
250 | skip = vsyscall_seccomp(tsk, __NR_time); | ||
251 | if (skip) | ||
252 | break; | ||
253 | |||
237 | if (!write_ok_or_segv(regs->di, sizeof(time_t))) | 254 | if (!write_ok_or_segv(regs->di, sizeof(time_t))) |
238 | break; | 255 | break; |
239 | 256 | ||
@@ -241,6 +258,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
241 | break; | 258 | break; |
242 | 259 | ||
243 | case 2: | 260 | case 2: |
261 | skip = vsyscall_seccomp(tsk, __NR_getcpu); | ||
262 | if (skip) | ||
263 | break; | ||
264 | |||
244 | if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || | 265 | if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || |
245 | !write_ok_or_segv(regs->si, sizeof(unsigned))) | 266 | !write_ok_or_segv(regs->si, sizeof(unsigned))) |
246 | break; | 267 | break; |
@@ -253,6 +274,12 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
253 | 274 | ||
254 | current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; | 275 | current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; |
255 | 276 | ||
277 | if (skip) { | ||
278 | if ((long)regs->ax <= 0L) /* seccomp errno emulation */ | ||
279 | goto do_ret; | ||
280 | goto done; /* seccomp trace/trap */ | ||
281 | } | ||
282 | |||
256 | if (ret == -EFAULT) { | 283 | if (ret == -EFAULT) { |
257 | /* Bad news -- userspace fed a bad pointer to a vsyscall. */ | 284 | /* Bad news -- userspace fed a bad pointer to a vsyscall. */ |
258 | warn_bad_vsyscall(KERN_INFO, regs, | 285 | warn_bad_vsyscall(KERN_INFO, regs, |
@@ -271,10 +298,11 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
271 | 298 | ||
272 | regs->ax = ret; | 299 | regs->ax = ret; |
273 | 300 | ||
301 | do_ret: | ||
274 | /* Emulate a ret instruction. */ | 302 | /* Emulate a ret instruction. */ |
275 | regs->ip = caller; | 303 | regs->ip = caller; |
276 | regs->sp += 8; | 304 | regs->sp += 8; |
277 | 305 | done: | |
278 | return true; | 306 | return true; |
279 | 307 | ||
280 | sigsegv: | 308 | sigsegv: |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 9796c2f3d07..6020f6f5927 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -28,6 +28,7 @@ EXPORT_SYMBOL(__put_user_8); | |||
28 | 28 | ||
29 | EXPORT_SYMBOL(copy_user_generic_string); | 29 | EXPORT_SYMBOL(copy_user_generic_string); |
30 | EXPORT_SYMBOL(copy_user_generic_unrolled); | 30 | EXPORT_SYMBOL(copy_user_generic_unrolled); |
31 | EXPORT_SYMBOL(copy_user_enhanced_fast_string); | ||
31 | EXPORT_SYMBOL(__copy_user_nocache); | 32 | EXPORT_SYMBOL(__copy_user_nocache); |
32 | EXPORT_SYMBOL(_copy_from_user); | 33 | EXPORT_SYMBOL(_copy_from_user); |
33 | EXPORT_SYMBOL(_copy_to_user); | 34 | EXPORT_SYMBOL(_copy_to_user); |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index bd18149b2b0..3d3e2070911 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -3,6 +3,9 @@ | |||
3 | * | 3 | * |
4 | * Author: Suresh Siddha <suresh.b.siddha@intel.com> | 4 | * Author: Suresh Siddha <suresh.b.siddha@intel.com> |
5 | */ | 5 | */ |
6 | |||
7 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
8 | |||
6 | #include <linux/bootmem.h> | 9 | #include <linux/bootmem.h> |
7 | #include <linux/compat.h> | 10 | #include <linux/compat.h> |
8 | #include <asm/i387.h> | 11 | #include <asm/i387.h> |
@@ -162,7 +165,7 @@ int save_i387_xstate(void __user *buf) | |||
162 | BUG_ON(sig_xstate_size < xstate_size); | 165 | BUG_ON(sig_xstate_size < xstate_size); |
163 | 166 | ||
164 | if ((unsigned long)buf % 64) | 167 | if ((unsigned long)buf % 64) |
165 | printk("save_i387_xstate: bad fpstate %p\n", buf); | 168 | pr_err("%s: bad fpstate %p\n", __func__, buf); |
166 | 169 | ||
167 | if (!used_math()) | 170 | if (!used_math()) |
168 | return 0; | 171 | return 0; |
@@ -422,7 +425,7 @@ static void __init xstate_enable_boot_cpu(void) | |||
422 | pcntxt_mask = eax + ((u64)edx << 32); | 425 | pcntxt_mask = eax + ((u64)edx << 32); |
423 | 426 | ||
424 | if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { | 427 | if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { |
425 | printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n", | 428 | pr_err("FP/SSE not shown under xsave features 0x%llx\n", |
426 | pcntxt_mask); | 429 | pcntxt_mask); |
427 | BUG(); | 430 | BUG(); |
428 | } | 431 | } |
@@ -445,9 +448,8 @@ static void __init xstate_enable_boot_cpu(void) | |||
445 | 448 | ||
446 | setup_xstate_init(); | 449 | setup_xstate_init(); |
447 | 450 | ||
448 | printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, " | 451 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", |
449 | "cntxt size 0x%x\n", | 452 | pcntxt_mask, xstate_size); |
450 | pcntxt_mask, xstate_size); | ||
451 | } | 453 | } |
452 | 454 | ||
453 | /* | 455 | /* |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 1a7fe868f37..a28f338843e 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -36,6 +36,7 @@ config KVM | |||
36 | select TASKSTATS | 36 | select TASKSTATS |
37 | select TASK_DELAY_ACCT | 37 | select TASK_DELAY_ACCT |
38 | select PERF_EVENTS | 38 | select PERF_EVENTS |
39 | select HAVE_KVM_MSI | ||
39 | ---help--- | 40 | ---help--- |
40 | Support hosting fully virtualized guest machines using hardware | 41 | Support hosting fully virtualized guest machines using hardware |
41 | virtualization extensions. You will need a fairly recent | 42 | virtualization extensions. You will need a fairly recent |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9fed5bedaad..0595f1397b7 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -201,6 +201,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
201 | unsigned f_lm = 0; | 201 | unsigned f_lm = 0; |
202 | #endif | 202 | #endif |
203 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; | 203 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; |
204 | unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; | ||
204 | 205 | ||
205 | /* cpuid 1.edx */ | 206 | /* cpuid 1.edx */ |
206 | const u32 kvm_supported_word0_x86_features = | 207 | const u32 kvm_supported_word0_x86_features = |
@@ -228,7 +229,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
228 | 0 /* DS-CPL, VMX, SMX, EST */ | | 229 | 0 /* DS-CPL, VMX, SMX, EST */ | |
229 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | | 230 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | |
230 | F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | | 231 | F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | |
231 | 0 /* Reserved, DCA */ | F(XMM4_1) | | 232 | F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) | |
232 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | | 233 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | |
233 | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | | 234 | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | |
234 | F(F16C) | F(RDRAND); | 235 | F(F16C) | F(RDRAND); |
@@ -247,7 +248,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
247 | 248 | ||
248 | /* cpuid 7.0.ebx */ | 249 | /* cpuid 7.0.ebx */ |
249 | const u32 kvm_supported_word9_x86_features = | 250 | const u32 kvm_supported_word9_x86_features = |
250 | F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS); | 251 | F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | |
252 | F(BMI2) | F(ERMS) | f_invpcid | F(RTM); | ||
251 | 253 | ||
252 | /* all calls to cpuid_count() should be made on the same cpu */ | 254 | /* all calls to cpuid_count() should be made on the same cpu */ |
253 | get_cpu(); | 255 | get_cpu(); |
@@ -397,7 +399,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
397 | case KVM_CPUID_SIGNATURE: { | 399 | case KVM_CPUID_SIGNATURE: { |
398 | char signature[12] = "KVMKVMKVM\0\0"; | 400 | char signature[12] = "KVMKVMKVM\0\0"; |
399 | u32 *sigptr = (u32 *)signature; | 401 | u32 *sigptr = (u32 *)signature; |
400 | entry->eax = 0; | 402 | entry->eax = KVM_CPUID_FEATURES; |
401 | entry->ebx = sigptr[0]; | 403 | entry->ebx = sigptr[0]; |
402 | entry->ecx = sigptr[1]; | 404 | entry->ecx = sigptr[1]; |
403 | entry->edx = sigptr[2]; | 405 | entry->edx = sigptr[2]; |
@@ -408,6 +410,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
408 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | 410 | (1 << KVM_FEATURE_NOP_IO_DELAY) | |
409 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | 411 | (1 << KVM_FEATURE_CLOCKSOURCE2) | |
410 | (1 << KVM_FEATURE_ASYNC_PF) | | 412 | (1 << KVM_FEATURE_ASYNC_PF) | |
413 | (1 << KVM_FEATURE_PV_EOI) | | ||
411 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | 414 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); |
412 | 415 | ||
413 | if (sched_info_on()) | 416 | if (sched_info_on()) |
@@ -638,33 +641,37 @@ static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, | |||
638 | return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); | 641 | return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); |
639 | } | 642 | } |
640 | 643 | ||
641 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | 644 | void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) |
642 | { | 645 | { |
643 | u32 function, index; | 646 | u32 function = *eax, index = *ecx; |
644 | struct kvm_cpuid_entry2 *best; | 647 | struct kvm_cpuid_entry2 *best; |
645 | 648 | ||
646 | function = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
647 | index = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
648 | kvm_register_write(vcpu, VCPU_REGS_RAX, 0); | ||
649 | kvm_register_write(vcpu, VCPU_REGS_RBX, 0); | ||
650 | kvm_register_write(vcpu, VCPU_REGS_RCX, 0); | ||
651 | kvm_register_write(vcpu, VCPU_REGS_RDX, 0); | ||
652 | best = kvm_find_cpuid_entry(vcpu, function, index); | 649 | best = kvm_find_cpuid_entry(vcpu, function, index); |
653 | 650 | ||
654 | if (!best) | 651 | if (!best) |
655 | best = check_cpuid_limit(vcpu, function, index); | 652 | best = check_cpuid_limit(vcpu, function, index); |
656 | 653 | ||
657 | if (best) { | 654 | if (best) { |
658 | kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); | 655 | *eax = best->eax; |
659 | kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); | 656 | *ebx = best->ebx; |
660 | kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx); | 657 | *ecx = best->ecx; |
661 | kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx); | 658 | *edx = best->edx; |
662 | } | 659 | } else |
660 | *eax = *ebx = *ecx = *edx = 0; | ||
661 | } | ||
662 | |||
663 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | ||
664 | { | ||
665 | u32 function, eax, ebx, ecx, edx; | ||
666 | |||
667 | function = eax = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
668 | ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
669 | kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx); | ||
670 | kvm_register_write(vcpu, VCPU_REGS_RAX, eax); | ||
671 | kvm_register_write(vcpu, VCPU_REGS_RBX, ebx); | ||
672 | kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); | ||
673 | kvm_register_write(vcpu, VCPU_REGS_RDX, edx); | ||
663 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 674 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
664 | trace_kvm_cpuid(function, | 675 | trace_kvm_cpuid(function, eax, ebx, ecx, edx); |
665 | kvm_register_read(vcpu, VCPU_REGS_RAX), | ||
666 | kvm_register_read(vcpu, VCPU_REGS_RBX), | ||
667 | kvm_register_read(vcpu, VCPU_REGS_RCX), | ||
668 | kvm_register_read(vcpu, VCPU_REGS_RDX)); | ||
669 | } | 676 | } |
670 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | 677 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 26d1fb437eb..a10e4601685 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -17,6 +17,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | |||
17 | int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | 17 | int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, |
18 | struct kvm_cpuid2 *cpuid, | 18 | struct kvm_cpuid2 *cpuid, |
19 | struct kvm_cpuid_entry2 __user *entries); | 19 | struct kvm_cpuid_entry2 __user *entries); |
20 | void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); | ||
20 | 21 | ||
21 | 22 | ||
22 | static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) | 23 | static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) |
@@ -51,4 +52,12 @@ static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu) | |||
51 | return best && (best->ecx & bit(X86_FEATURE_OSVW)); | 52 | return best && (best->ecx & bit(X86_FEATURE_OSVW)); |
52 | } | 53 | } |
53 | 54 | ||
55 | static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu) | ||
56 | { | ||
57 | struct kvm_cpuid_entry2 *best; | ||
58 | |||
59 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
60 | return best && (best->ecx & bit(X86_FEATURE_PCID)); | ||
61 | } | ||
62 | |||
54 | #endif | 63 | #endif |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 83756223f8a..97d9a9914ba 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -142,6 +142,10 @@ | |||
142 | #define Src2FS (OpFS << Src2Shift) | 142 | #define Src2FS (OpFS << Src2Shift) |
143 | #define Src2GS (OpGS << Src2Shift) | 143 | #define Src2GS (OpGS << Src2Shift) |
144 | #define Src2Mask (OpMask << Src2Shift) | 144 | #define Src2Mask (OpMask << Src2Shift) |
145 | #define Mmx ((u64)1 << 40) /* MMX Vector instruction */ | ||
146 | #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ | ||
147 | #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ | ||
148 | #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ | ||
145 | 149 | ||
146 | #define X2(x...) x, x | 150 | #define X2(x...) x, x |
147 | #define X3(x...) X2(x), x | 151 | #define X3(x...) X2(x), x |
@@ -429,11 +433,32 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, | |||
429 | return ctxt->ops->intercept(ctxt, &info, stage); | 433 | return ctxt->ops->intercept(ctxt, &info, stage); |
430 | } | 434 | } |
431 | 435 | ||
436 | static void assign_masked(ulong *dest, ulong src, ulong mask) | ||
437 | { | ||
438 | *dest = (*dest & ~mask) | (src & mask); | ||
439 | } | ||
440 | |||
432 | static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt) | 441 | static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt) |
433 | { | 442 | { |
434 | return (1UL << (ctxt->ad_bytes << 3)) - 1; | 443 | return (1UL << (ctxt->ad_bytes << 3)) - 1; |
435 | } | 444 | } |
436 | 445 | ||
446 | static ulong stack_mask(struct x86_emulate_ctxt *ctxt) | ||
447 | { | ||
448 | u16 sel; | ||
449 | struct desc_struct ss; | ||
450 | |||
451 | if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
452 | return ~0UL; | ||
453 | ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS); | ||
454 | return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */ | ||
455 | } | ||
456 | |||
457 | static int stack_size(struct x86_emulate_ctxt *ctxt) | ||
458 | { | ||
459 | return (__fls(stack_mask(ctxt)) + 1) >> 3; | ||
460 | } | ||
461 | |||
437 | /* Access/update address held in a register, based on addressing mode. */ | 462 | /* Access/update address held in a register, based on addressing mode. */ |
438 | static inline unsigned long | 463 | static inline unsigned long |
439 | address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg) | 464 | address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg) |
@@ -557,6 +582,29 @@ static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector, | |||
557 | ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg); | 582 | ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg); |
558 | } | 583 | } |
559 | 584 | ||
585 | /* | ||
586 | * x86 defines three classes of vector instructions: explicitly | ||
587 | * aligned, explicitly unaligned, and the rest, which change behaviour | ||
588 | * depending on whether they're AVX encoded or not. | ||
589 | * | ||
590 | * Also included is CMPXCHG16B which is not a vector instruction, yet it is | ||
591 | * subject to the same check. | ||
592 | */ | ||
593 | static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size) | ||
594 | { | ||
595 | if (likely(size < 16)) | ||
596 | return false; | ||
597 | |||
598 | if (ctxt->d & Aligned) | ||
599 | return true; | ||
600 | else if (ctxt->d & Unaligned) | ||
601 | return false; | ||
602 | else if (ctxt->d & Avx) | ||
603 | return false; | ||
604 | else | ||
605 | return true; | ||
606 | } | ||
607 | |||
560 | static int __linearize(struct x86_emulate_ctxt *ctxt, | 608 | static int __linearize(struct x86_emulate_ctxt *ctxt, |
561 | struct segmented_address addr, | 609 | struct segmented_address addr, |
562 | unsigned size, bool write, bool fetch, | 610 | unsigned size, bool write, bool fetch, |
@@ -621,6 +669,8 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
621 | } | 669 | } |
622 | if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8) | 670 | if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8) |
623 | la &= (u32)-1; | 671 | la &= (u32)-1; |
672 | if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0)) | ||
673 | return emulate_gp(ctxt, 0); | ||
624 | *linear = la; | 674 | *linear = la; |
625 | return X86EMUL_CONTINUE; | 675 | return X86EMUL_CONTINUE; |
626 | bad: | 676 | bad: |
@@ -859,6 +909,40 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, | |||
859 | ctxt->ops->put_fpu(ctxt); | 909 | ctxt->ops->put_fpu(ctxt); |
860 | } | 910 | } |
861 | 911 | ||
912 | static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) | ||
913 | { | ||
914 | ctxt->ops->get_fpu(ctxt); | ||
915 | switch (reg) { | ||
916 | case 0: asm("movq %%mm0, %0" : "=m"(*data)); break; | ||
917 | case 1: asm("movq %%mm1, %0" : "=m"(*data)); break; | ||
918 | case 2: asm("movq %%mm2, %0" : "=m"(*data)); break; | ||
919 | case 3: asm("movq %%mm3, %0" : "=m"(*data)); break; | ||
920 | case 4: asm("movq %%mm4, %0" : "=m"(*data)); break; | ||
921 | case 5: asm("movq %%mm5, %0" : "=m"(*data)); break; | ||
922 | case 6: asm("movq %%mm6, %0" : "=m"(*data)); break; | ||
923 | case 7: asm("movq %%mm7, %0" : "=m"(*data)); break; | ||
924 | default: BUG(); | ||
925 | } | ||
926 | ctxt->ops->put_fpu(ctxt); | ||
927 | } | ||
928 | |||
929 | static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) | ||
930 | { | ||
931 | ctxt->ops->get_fpu(ctxt); | ||
932 | switch (reg) { | ||
933 | case 0: asm("movq %0, %%mm0" : : "m"(*data)); break; | ||
934 | case 1: asm("movq %0, %%mm1" : : "m"(*data)); break; | ||
935 | case 2: asm("movq %0, %%mm2" : : "m"(*data)); break; | ||
936 | case 3: asm("movq %0, %%mm3" : : "m"(*data)); break; | ||
937 | case 4: asm("movq %0, %%mm4" : : "m"(*data)); break; | ||
938 | case 5: asm("movq %0, %%mm5" : : "m"(*data)); break; | ||
939 | case 6: asm("movq %0, %%mm6" : : "m"(*data)); break; | ||
940 | case 7: asm("movq %0, %%mm7" : : "m"(*data)); break; | ||
941 | default: BUG(); | ||
942 | } | ||
943 | ctxt->ops->put_fpu(ctxt); | ||
944 | } | ||
945 | |||
862 | static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | 946 | static void decode_register_operand(struct x86_emulate_ctxt *ctxt, |
863 | struct operand *op) | 947 | struct operand *op) |
864 | { | 948 | { |
@@ -875,6 +959,13 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | |||
875 | read_sse_reg(ctxt, &op->vec_val, reg); | 959 | read_sse_reg(ctxt, &op->vec_val, reg); |
876 | return; | 960 | return; |
877 | } | 961 | } |
962 | if (ctxt->d & Mmx) { | ||
963 | reg &= 7; | ||
964 | op->type = OP_MM; | ||
965 | op->bytes = 8; | ||
966 | op->addr.mm = reg; | ||
967 | return; | ||
968 | } | ||
878 | 969 | ||
879 | op->type = OP_REG; | 970 | op->type = OP_REG; |
880 | if (ctxt->d & ByteOp) { | 971 | if (ctxt->d & ByteOp) { |
@@ -888,6 +979,12 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | |||
888 | op->orig_val = op->val; | 979 | op->orig_val = op->val; |
889 | } | 980 | } |
890 | 981 | ||
982 | static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg) | ||
983 | { | ||
984 | if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP) | ||
985 | ctxt->modrm_seg = VCPU_SREG_SS; | ||
986 | } | ||
987 | |||
891 | static int decode_modrm(struct x86_emulate_ctxt *ctxt, | 988 | static int decode_modrm(struct x86_emulate_ctxt *ctxt, |
892 | struct operand *op) | 989 | struct operand *op) |
893 | { | 990 | { |
@@ -902,7 +999,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
902 | ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */ | 999 | ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */ |
903 | } | 1000 | } |
904 | 1001 | ||
905 | ctxt->modrm = insn_fetch(u8, ctxt); | ||
906 | ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6; | 1002 | ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6; |
907 | ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; | 1003 | ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; |
908 | ctxt->modrm_rm |= (ctxt->modrm & 0x07); | 1004 | ctxt->modrm_rm |= (ctxt->modrm & 0x07); |
@@ -920,6 +1016,12 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
920 | read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm); | 1016 | read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm); |
921 | return rc; | 1017 | return rc; |
922 | } | 1018 | } |
1019 | if (ctxt->d & Mmx) { | ||
1020 | op->type = OP_MM; | ||
1021 | op->bytes = 8; | ||
1022 | op->addr.xmm = ctxt->modrm_rm & 7; | ||
1023 | return rc; | ||
1024 | } | ||
923 | fetch_register_operand(op); | 1025 | fetch_register_operand(op); |
924 | return rc; | 1026 | return rc; |
925 | } | 1027 | } |
@@ -986,15 +1088,20 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
986 | 1088 | ||
987 | if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0) | 1089 | if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0) |
988 | modrm_ea += insn_fetch(s32, ctxt); | 1090 | modrm_ea += insn_fetch(s32, ctxt); |
989 | else | 1091 | else { |
990 | modrm_ea += ctxt->regs[base_reg]; | 1092 | modrm_ea += ctxt->regs[base_reg]; |
1093 | adjust_modrm_seg(ctxt, base_reg); | ||
1094 | } | ||
991 | if (index_reg != 4) | 1095 | if (index_reg != 4) |
992 | modrm_ea += ctxt->regs[index_reg] << scale; | 1096 | modrm_ea += ctxt->regs[index_reg] << scale; |
993 | } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) { | 1097 | } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) { |
994 | if (ctxt->mode == X86EMUL_MODE_PROT64) | 1098 | if (ctxt->mode == X86EMUL_MODE_PROT64) |
995 | ctxt->rip_relative = 1; | 1099 | ctxt->rip_relative = 1; |
996 | } else | 1100 | } else { |
997 | modrm_ea += ctxt->regs[ctxt->modrm_rm]; | 1101 | base_reg = ctxt->modrm_rm; |
1102 | modrm_ea += ctxt->regs[base_reg]; | ||
1103 | adjust_modrm_seg(ctxt, base_reg); | ||
1104 | } | ||
998 | switch (ctxt->modrm_mod) { | 1105 | switch (ctxt->modrm_mod) { |
999 | case 0: | 1106 | case 0: |
1000 | if (ctxt->modrm_rm == 5) | 1107 | if (ctxt->modrm_rm == 5) |
@@ -1189,7 +1296,8 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | |||
1189 | 1296 | ||
1190 | /* allowed just for 8 bytes segments */ | 1297 | /* allowed just for 8 bytes segments */ |
1191 | static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 1298 | static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, |
1192 | u16 selector, struct desc_struct *desc) | 1299 | u16 selector, struct desc_struct *desc, |
1300 | ulong *desc_addr_p) | ||
1193 | { | 1301 | { |
1194 | struct desc_ptr dt; | 1302 | struct desc_ptr dt; |
1195 | u16 index = selector >> 3; | 1303 | u16 index = selector >> 3; |
@@ -1200,7 +1308,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1200 | if (dt.size < index * 8 + 7) | 1308 | if (dt.size < index * 8 + 7) |
1201 | return emulate_gp(ctxt, selector & 0xfffc); | 1309 | return emulate_gp(ctxt, selector & 0xfffc); |
1202 | 1310 | ||
1203 | addr = dt.address + index * 8; | 1311 | *desc_addr_p = addr = dt.address + index * 8; |
1204 | return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc, | 1312 | return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc, |
1205 | &ctxt->exception); | 1313 | &ctxt->exception); |
1206 | } | 1314 | } |
@@ -1227,11 +1335,12 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1227 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 1335 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, |
1228 | u16 selector, int seg) | 1336 | u16 selector, int seg) |
1229 | { | 1337 | { |
1230 | struct desc_struct seg_desc; | 1338 | struct desc_struct seg_desc, old_desc; |
1231 | u8 dpl, rpl, cpl; | 1339 | u8 dpl, rpl, cpl; |
1232 | unsigned err_vec = GP_VECTOR; | 1340 | unsigned err_vec = GP_VECTOR; |
1233 | u32 err_code = 0; | 1341 | u32 err_code = 0; |
1234 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | 1342 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ |
1343 | ulong desc_addr; | ||
1235 | int ret; | 1344 | int ret; |
1236 | 1345 | ||
1237 | memset(&seg_desc, 0, sizeof seg_desc); | 1346 | memset(&seg_desc, 0, sizeof seg_desc); |
@@ -1249,8 +1358,14 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1249 | goto load; | 1358 | goto load; |
1250 | } | 1359 | } |
1251 | 1360 | ||
1252 | /* NULL selector is not valid for TR, CS and SS */ | 1361 | rpl = selector & 3; |
1253 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) | 1362 | cpl = ctxt->ops->cpl(ctxt); |
1363 | |||
1364 | /* NULL selector is not valid for TR, CS and SS (except for long mode) */ | ||
1365 | if ((seg == VCPU_SREG_CS | ||
1366 | || (seg == VCPU_SREG_SS | ||
1367 | && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)) | ||
1368 | || seg == VCPU_SREG_TR) | ||
1254 | && null_selector) | 1369 | && null_selector) |
1255 | goto exception; | 1370 | goto exception; |
1256 | 1371 | ||
@@ -1261,7 +1376,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1261 | if (null_selector) /* for NULL selector skip all following checks */ | 1376 | if (null_selector) /* for NULL selector skip all following checks */ |
1262 | goto load; | 1377 | goto load; |
1263 | 1378 | ||
1264 | ret = read_segment_descriptor(ctxt, selector, &seg_desc); | 1379 | ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr); |
1265 | if (ret != X86EMUL_CONTINUE) | 1380 | if (ret != X86EMUL_CONTINUE) |
1266 | return ret; | 1381 | return ret; |
1267 | 1382 | ||
@@ -1277,9 +1392,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1277 | goto exception; | 1392 | goto exception; |
1278 | } | 1393 | } |
1279 | 1394 | ||
1280 | rpl = selector & 3; | ||
1281 | dpl = seg_desc.dpl; | 1395 | dpl = seg_desc.dpl; |
1282 | cpl = ctxt->ops->cpl(ctxt); | ||
1283 | 1396 | ||
1284 | switch (seg) { | 1397 | switch (seg) { |
1285 | case VCPU_SREG_SS: | 1398 | case VCPU_SREG_SS: |
@@ -1309,6 +1422,12 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1309 | case VCPU_SREG_TR: | 1422 | case VCPU_SREG_TR: |
1310 | if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) | 1423 | if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) |
1311 | goto exception; | 1424 | goto exception; |
1425 | old_desc = seg_desc; | ||
1426 | seg_desc.type |= 2; /* busy */ | ||
1427 | ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc, | ||
1428 | sizeof(seg_desc), &ctxt->exception); | ||
1429 | if (ret != X86EMUL_CONTINUE) | ||
1430 | return ret; | ||
1312 | break; | 1431 | break; |
1313 | case VCPU_SREG_LDTR: | 1432 | case VCPU_SREG_LDTR: |
1314 | if (seg_desc.s || seg_desc.type != 2) | 1433 | if (seg_desc.s || seg_desc.type != 2) |
@@ -1387,6 +1506,9 @@ static int writeback(struct x86_emulate_ctxt *ctxt) | |||
1387 | case OP_XMM: | 1506 | case OP_XMM: |
1388 | write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm); | 1507 | write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm); |
1389 | break; | 1508 | break; |
1509 | case OP_MM: | ||
1510 | write_mmx_reg(ctxt, &ctxt->dst.mm_val, ctxt->dst.addr.mm); | ||
1511 | break; | ||
1390 | case OP_NONE: | 1512 | case OP_NONE: |
1391 | /* no writeback */ | 1513 | /* no writeback */ |
1392 | break; | 1514 | break; |
@@ -1396,17 +1518,22 @@ static int writeback(struct x86_emulate_ctxt *ctxt) | |||
1396 | return X86EMUL_CONTINUE; | 1518 | return X86EMUL_CONTINUE; |
1397 | } | 1519 | } |
1398 | 1520 | ||
1399 | static int em_push(struct x86_emulate_ctxt *ctxt) | 1521 | static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes) |
1400 | { | 1522 | { |
1401 | struct segmented_address addr; | 1523 | struct segmented_address addr; |
1402 | 1524 | ||
1403 | register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], -ctxt->op_bytes); | 1525 | register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], -bytes); |
1404 | addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]); | 1526 | addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]); |
1405 | addr.seg = VCPU_SREG_SS; | 1527 | addr.seg = VCPU_SREG_SS; |
1406 | 1528 | ||
1529 | return segmented_write(ctxt, addr, data, bytes); | ||
1530 | } | ||
1531 | |||
1532 | static int em_push(struct x86_emulate_ctxt *ctxt) | ||
1533 | { | ||
1407 | /* Disable writeback. */ | 1534 | /* Disable writeback. */ |
1408 | ctxt->dst.type = OP_NONE; | 1535 | ctxt->dst.type = OP_NONE; |
1409 | return segmented_write(ctxt, addr, &ctxt->src.val, ctxt->op_bytes); | 1536 | return push(ctxt, &ctxt->src.val, ctxt->op_bytes); |
1410 | } | 1537 | } |
1411 | 1538 | ||
1412 | static int emulate_pop(struct x86_emulate_ctxt *ctxt, | 1539 | static int emulate_pop(struct x86_emulate_ctxt *ctxt, |
@@ -1478,6 +1605,33 @@ static int em_popf(struct x86_emulate_ctxt *ctxt) | |||
1478 | return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes); | 1605 | return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes); |
1479 | } | 1606 | } |
1480 | 1607 | ||
1608 | static int em_enter(struct x86_emulate_ctxt *ctxt) | ||
1609 | { | ||
1610 | int rc; | ||
1611 | unsigned frame_size = ctxt->src.val; | ||
1612 | unsigned nesting_level = ctxt->src2.val & 31; | ||
1613 | |||
1614 | if (nesting_level) | ||
1615 | return X86EMUL_UNHANDLEABLE; | ||
1616 | |||
1617 | rc = push(ctxt, &ctxt->regs[VCPU_REGS_RBP], stack_size(ctxt)); | ||
1618 | if (rc != X86EMUL_CONTINUE) | ||
1619 | return rc; | ||
1620 | assign_masked(&ctxt->regs[VCPU_REGS_RBP], ctxt->regs[VCPU_REGS_RSP], | ||
1621 | stack_mask(ctxt)); | ||
1622 | assign_masked(&ctxt->regs[VCPU_REGS_RSP], | ||
1623 | ctxt->regs[VCPU_REGS_RSP] - frame_size, | ||
1624 | stack_mask(ctxt)); | ||
1625 | return X86EMUL_CONTINUE; | ||
1626 | } | ||
1627 | |||
1628 | static int em_leave(struct x86_emulate_ctxt *ctxt) | ||
1629 | { | ||
1630 | assign_masked(&ctxt->regs[VCPU_REGS_RSP], ctxt->regs[VCPU_REGS_RBP], | ||
1631 | stack_mask(ctxt)); | ||
1632 | return emulate_pop(ctxt, &ctxt->regs[VCPU_REGS_RBP], ctxt->op_bytes); | ||
1633 | } | ||
1634 | |||
1481 | static int em_push_sreg(struct x86_emulate_ctxt *ctxt) | 1635 | static int em_push_sreg(struct x86_emulate_ctxt *ctxt) |
1482 | { | 1636 | { |
1483 | int seg = ctxt->src2.val; | 1637 | int seg = ctxt->src2.val; |
@@ -1915,8 +2069,8 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt) | |||
1915 | u32 eax, ebx, ecx, edx; | 2069 | u32 eax, ebx, ecx, edx; |
1916 | 2070 | ||
1917 | eax = ecx = 0; | 2071 | eax = ecx = 0; |
1918 | return ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx) | 2072 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); |
1919 | && ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx | 2073 | return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx |
1920 | && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx | 2074 | && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx |
1921 | && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx; | 2075 | && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx; |
1922 | } | 2076 | } |
@@ -1935,32 +2089,31 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) | |||
1935 | 2089 | ||
1936 | eax = 0x00000000; | 2090 | eax = 0x00000000; |
1937 | ecx = 0x00000000; | 2091 | ecx = 0x00000000; |
1938 | if (ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx)) { | 2092 | ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); |
1939 | /* | 2093 | /* |
1940 | * Intel ("GenuineIntel") | 2094 | * Intel ("GenuineIntel") |
1941 | * remark: Intel CPUs only support "syscall" in 64bit | 2095 | * remark: Intel CPUs only support "syscall" in 64bit |
1942 | * longmode. Also an 64bit guest with a | 2096 | * longmode. Also an 64bit guest with a |
1943 | * 32bit compat-app running will #UD !! While this | 2097 | * 32bit compat-app running will #UD !! While this |
1944 | * behaviour can be fixed (by emulating) into AMD | 2098 | * behaviour can be fixed (by emulating) into AMD |
1945 | * response - CPUs of AMD can't behave like Intel. | 2099 | * response - CPUs of AMD can't behave like Intel. |
1946 | */ | 2100 | */ |
1947 | if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx && | 2101 | if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx && |
1948 | ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx && | 2102 | ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx && |
1949 | edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx) | 2103 | edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx) |
1950 | return false; | 2104 | return false; |
1951 | 2105 | ||
1952 | /* AMD ("AuthenticAMD") */ | 2106 | /* AMD ("AuthenticAMD") */ |
1953 | if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx && | 2107 | if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx && |
1954 | ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx && | 2108 | ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx && |
1955 | edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx) | 2109 | edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx) |
1956 | return true; | 2110 | return true; |
1957 | 2111 | ||
1958 | /* AMD ("AMDisbetter!") */ | 2112 | /* AMD ("AMDisbetter!") */ |
1959 | if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx && | 2113 | if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx && |
1960 | ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx && | 2114 | ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx && |
1961 | edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx) | 2115 | edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx) |
1962 | return true; | 2116 | return true; |
1963 | } | ||
1964 | 2117 | ||
1965 | /* default: (not Intel, not AMD), apply Intel's stricter rules... */ | 2118 | /* default: (not Intel, not AMD), apply Intel's stricter rules... */ |
1966 | return false; | 2119 | return false; |
@@ -2469,13 +2622,14 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2469 | ulong old_tss_base = | 2622 | ulong old_tss_base = |
2470 | ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); | 2623 | ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); |
2471 | u32 desc_limit; | 2624 | u32 desc_limit; |
2625 | ulong desc_addr; | ||
2472 | 2626 | ||
2473 | /* FIXME: old_tss_base == ~0 ? */ | 2627 | /* FIXME: old_tss_base == ~0 ? */ |
2474 | 2628 | ||
2475 | ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc); | 2629 | ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr); |
2476 | if (ret != X86EMUL_CONTINUE) | 2630 | if (ret != X86EMUL_CONTINUE) |
2477 | return ret; | 2631 | return ret; |
2478 | ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc); | 2632 | ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr); |
2479 | if (ret != X86EMUL_CONTINUE) | 2633 | if (ret != X86EMUL_CONTINUE) |
2480 | return ret; | 2634 | return ret; |
2481 | 2635 | ||
@@ -2790,7 +2944,7 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt) | |||
2790 | 2944 | ||
2791 | static int em_mov(struct x86_emulate_ctxt *ctxt) | 2945 | static int em_mov(struct x86_emulate_ctxt *ctxt) |
2792 | { | 2946 | { |
2793 | ctxt->dst.val = ctxt->src.val; | 2947 | memcpy(ctxt->dst.valptr, ctxt->src.valptr, ctxt->op_bytes); |
2794 | return X86EMUL_CONTINUE; | 2948 | return X86EMUL_CONTINUE; |
2795 | } | 2949 | } |
2796 | 2950 | ||
@@ -2870,10 +3024,22 @@ static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt) | |||
2870 | return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg); | 3024 | return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg); |
2871 | } | 3025 | } |
2872 | 3026 | ||
2873 | static int em_movdqu(struct x86_emulate_ctxt *ctxt) | 3027 | static int em_lldt(struct x86_emulate_ctxt *ctxt) |
2874 | { | 3028 | { |
2875 | memcpy(&ctxt->dst.vec_val, &ctxt->src.vec_val, ctxt->op_bytes); | 3029 | u16 sel = ctxt->src.val; |
2876 | return X86EMUL_CONTINUE; | 3030 | |
3031 | /* Disable writeback. */ | ||
3032 | ctxt->dst.type = OP_NONE; | ||
3033 | return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR); | ||
3034 | } | ||
3035 | |||
3036 | static int em_ltr(struct x86_emulate_ctxt *ctxt) | ||
3037 | { | ||
3038 | u16 sel = ctxt->src.val; | ||
3039 | |||
3040 | /* Disable writeback. */ | ||
3041 | ctxt->dst.type = OP_NONE; | ||
3042 | return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR); | ||
2877 | } | 3043 | } |
2878 | 3044 | ||
2879 | static int em_invlpg(struct x86_emulate_ctxt *ctxt) | 3045 | static int em_invlpg(struct x86_emulate_ctxt *ctxt) |
@@ -2917,11 +3083,42 @@ static int em_vmcall(struct x86_emulate_ctxt *ctxt) | |||
2917 | return X86EMUL_CONTINUE; | 3083 | return X86EMUL_CONTINUE; |
2918 | } | 3084 | } |
2919 | 3085 | ||
3086 | static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt, | ||
3087 | void (*get)(struct x86_emulate_ctxt *ctxt, | ||
3088 | struct desc_ptr *ptr)) | ||
3089 | { | ||
3090 | struct desc_ptr desc_ptr; | ||
3091 | |||
3092 | if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
3093 | ctxt->op_bytes = 8; | ||
3094 | get(ctxt, &desc_ptr); | ||
3095 | if (ctxt->op_bytes == 2) { | ||
3096 | ctxt->op_bytes = 4; | ||
3097 | desc_ptr.address &= 0x00ffffff; | ||
3098 | } | ||
3099 | /* Disable writeback. */ | ||
3100 | ctxt->dst.type = OP_NONE; | ||
3101 | return segmented_write(ctxt, ctxt->dst.addr.mem, | ||
3102 | &desc_ptr, 2 + ctxt->op_bytes); | ||
3103 | } | ||
3104 | |||
3105 | static int em_sgdt(struct x86_emulate_ctxt *ctxt) | ||
3106 | { | ||
3107 | return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt); | ||
3108 | } | ||
3109 | |||
3110 | static int em_sidt(struct x86_emulate_ctxt *ctxt) | ||
3111 | { | ||
3112 | return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt); | ||
3113 | } | ||
3114 | |||
2920 | static int em_lgdt(struct x86_emulate_ctxt *ctxt) | 3115 | static int em_lgdt(struct x86_emulate_ctxt *ctxt) |
2921 | { | 3116 | { |
2922 | struct desc_ptr desc_ptr; | 3117 | struct desc_ptr desc_ptr; |
2923 | int rc; | 3118 | int rc; |
2924 | 3119 | ||
3120 | if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
3121 | ctxt->op_bytes = 8; | ||
2925 | rc = read_descriptor(ctxt, ctxt->src.addr.mem, | 3122 | rc = read_descriptor(ctxt, ctxt->src.addr.mem, |
2926 | &desc_ptr.size, &desc_ptr.address, | 3123 | &desc_ptr.size, &desc_ptr.address, |
2927 | ctxt->op_bytes); | 3124 | ctxt->op_bytes); |
@@ -2949,6 +3146,8 @@ static int em_lidt(struct x86_emulate_ctxt *ctxt) | |||
2949 | struct desc_ptr desc_ptr; | 3146 | struct desc_ptr desc_ptr; |
2950 | int rc; | 3147 | int rc; |
2951 | 3148 | ||
3149 | if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
3150 | ctxt->op_bytes = 8; | ||
2952 | rc = read_descriptor(ctxt, ctxt->src.addr.mem, | 3151 | rc = read_descriptor(ctxt, ctxt->src.addr.mem, |
2953 | &desc_ptr.size, &desc_ptr.address, | 3152 | &desc_ptr.size, &desc_ptr.address, |
2954 | ctxt->op_bytes); | 3153 | ctxt->op_bytes); |
@@ -3061,34 +3260,48 @@ static int em_btc(struct x86_emulate_ctxt *ctxt) | |||
3061 | 3260 | ||
3062 | static int em_bsf(struct x86_emulate_ctxt *ctxt) | 3261 | static int em_bsf(struct x86_emulate_ctxt *ctxt) |
3063 | { | 3262 | { |
3064 | u8 zf; | 3263 | emulate_2op_SrcV_nobyte(ctxt, "bsf"); |
3065 | |||
3066 | __asm__ ("bsf %2, %0; setz %1" | ||
3067 | : "=r"(ctxt->dst.val), "=q"(zf) | ||
3068 | : "r"(ctxt->src.val)); | ||
3069 | |||
3070 | ctxt->eflags &= ~X86_EFLAGS_ZF; | ||
3071 | if (zf) { | ||
3072 | ctxt->eflags |= X86_EFLAGS_ZF; | ||
3073 | /* Disable writeback. */ | ||
3074 | ctxt->dst.type = OP_NONE; | ||
3075 | } | ||
3076 | return X86EMUL_CONTINUE; | 3264 | return X86EMUL_CONTINUE; |
3077 | } | 3265 | } |
3078 | 3266 | ||
3079 | static int em_bsr(struct x86_emulate_ctxt *ctxt) | 3267 | static int em_bsr(struct x86_emulate_ctxt *ctxt) |
3080 | { | 3268 | { |
3081 | u8 zf; | 3269 | emulate_2op_SrcV_nobyte(ctxt, "bsr"); |
3270 | return X86EMUL_CONTINUE; | ||
3271 | } | ||
3082 | 3272 | ||
3083 | __asm__ ("bsr %2, %0; setz %1" | 3273 | static int em_cpuid(struct x86_emulate_ctxt *ctxt) |
3084 | : "=r"(ctxt->dst.val), "=q"(zf) | 3274 | { |
3085 | : "r"(ctxt->src.val)); | 3275 | u32 eax, ebx, ecx, edx; |
3276 | |||
3277 | eax = ctxt->regs[VCPU_REGS_RAX]; | ||
3278 | ecx = ctxt->regs[VCPU_REGS_RCX]; | ||
3279 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | ||
3280 | ctxt->regs[VCPU_REGS_RAX] = eax; | ||
3281 | ctxt->regs[VCPU_REGS_RBX] = ebx; | ||
3282 | ctxt->regs[VCPU_REGS_RCX] = ecx; | ||
3283 | ctxt->regs[VCPU_REGS_RDX] = edx; | ||
3284 | return X86EMUL_CONTINUE; | ||
3285 | } | ||
3086 | 3286 | ||
3087 | ctxt->eflags &= ~X86_EFLAGS_ZF; | 3287 | static int em_lahf(struct x86_emulate_ctxt *ctxt) |
3088 | if (zf) { | 3288 | { |
3089 | ctxt->eflags |= X86_EFLAGS_ZF; | 3289 | ctxt->regs[VCPU_REGS_RAX] &= ~0xff00UL; |
3090 | /* Disable writeback. */ | 3290 | ctxt->regs[VCPU_REGS_RAX] |= (ctxt->eflags & 0xff) << 8; |
3091 | ctxt->dst.type = OP_NONE; | 3291 | return X86EMUL_CONTINUE; |
3292 | } | ||
3293 | |||
3294 | static int em_bswap(struct x86_emulate_ctxt *ctxt) | ||
3295 | { | ||
3296 | switch (ctxt->op_bytes) { | ||
3297 | #ifdef CONFIG_X86_64 | ||
3298 | case 8: | ||
3299 | asm("bswap %0" : "+r"(ctxt->dst.val)); | ||
3300 | break; | ||
3301 | #endif | ||
3302 | default: | ||
3303 | asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val)); | ||
3304 | break; | ||
3092 | } | 3305 | } |
3093 | return X86EMUL_CONTINUE; | 3306 | return X86EMUL_CONTINUE; |
3094 | } | 3307 | } |
@@ -3286,8 +3499,8 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
3286 | .check_perm = (_p) } | 3499 | .check_perm = (_p) } |
3287 | #define N D(0) | 3500 | #define N D(0) |
3288 | #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } | 3501 | #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } |
3289 | #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } | 3502 | #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } |
3290 | #define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) } | 3503 | #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } |
3291 | #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } | 3504 | #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } |
3292 | #define II(_f, _e, _i) \ | 3505 | #define II(_f, _e, _i) \ |
3293 | { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } | 3506 | { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } |
@@ -3307,25 +3520,25 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
3307 | I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) | 3520 | I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) |
3308 | 3521 | ||
3309 | static struct opcode group7_rm1[] = { | 3522 | static struct opcode group7_rm1[] = { |
3310 | DI(SrcNone | ModRM | Priv, monitor), | 3523 | DI(SrcNone | Priv, monitor), |
3311 | DI(SrcNone | ModRM | Priv, mwait), | 3524 | DI(SrcNone | Priv, mwait), |
3312 | N, N, N, N, N, N, | 3525 | N, N, N, N, N, N, |
3313 | }; | 3526 | }; |
3314 | 3527 | ||
3315 | static struct opcode group7_rm3[] = { | 3528 | static struct opcode group7_rm3[] = { |
3316 | DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), | 3529 | DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), |
3317 | II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall), | 3530 | II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall), |
3318 | DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), | 3531 | DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), |
3319 | DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), | 3532 | DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa), |
3320 | DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), | 3533 | DIP(SrcNone | Prot | Priv, stgi, check_svme), |
3321 | DIP(SrcNone | ModRM | Prot | Priv, clgi, check_svme), | 3534 | DIP(SrcNone | Prot | Priv, clgi, check_svme), |
3322 | DIP(SrcNone | ModRM | Prot | Priv, skinit, check_svme), | 3535 | DIP(SrcNone | Prot | Priv, skinit, check_svme), |
3323 | DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme), | 3536 | DIP(SrcNone | Prot | Priv, invlpga, check_svme), |
3324 | }; | 3537 | }; |
3325 | 3538 | ||
3326 | static struct opcode group7_rm7[] = { | 3539 | static struct opcode group7_rm7[] = { |
3327 | N, | 3540 | N, |
3328 | DIP(SrcNone | ModRM, rdtscp, check_rdtsc), | 3541 | DIP(SrcNone, rdtscp, check_rdtsc), |
3329 | N, N, N, N, N, N, | 3542 | N, N, N, N, N, N, |
3330 | }; | 3543 | }; |
3331 | 3544 | ||
@@ -3341,81 +3554,86 @@ static struct opcode group1[] = { | |||
3341 | }; | 3554 | }; |
3342 | 3555 | ||
3343 | static struct opcode group1A[] = { | 3556 | static struct opcode group1A[] = { |
3344 | I(DstMem | SrcNone | ModRM | Mov | Stack, em_pop), N, N, N, N, N, N, N, | 3557 | I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, |
3345 | }; | 3558 | }; |
3346 | 3559 | ||
3347 | static struct opcode group3[] = { | 3560 | static struct opcode group3[] = { |
3348 | I(DstMem | SrcImm | ModRM, em_test), | 3561 | I(DstMem | SrcImm, em_test), |
3349 | I(DstMem | SrcImm | ModRM, em_test), | 3562 | I(DstMem | SrcImm, em_test), |
3350 | I(DstMem | SrcNone | ModRM | Lock, em_not), | 3563 | I(DstMem | SrcNone | Lock, em_not), |
3351 | I(DstMem | SrcNone | ModRM | Lock, em_neg), | 3564 | I(DstMem | SrcNone | Lock, em_neg), |
3352 | I(SrcMem | ModRM, em_mul_ex), | 3565 | I(SrcMem, em_mul_ex), |
3353 | I(SrcMem | ModRM, em_imul_ex), | 3566 | I(SrcMem, em_imul_ex), |
3354 | I(SrcMem | ModRM, em_div_ex), | 3567 | I(SrcMem, em_div_ex), |
3355 | I(SrcMem | ModRM, em_idiv_ex), | 3568 | I(SrcMem, em_idiv_ex), |
3356 | }; | 3569 | }; |
3357 | 3570 | ||
3358 | static struct opcode group4[] = { | 3571 | static struct opcode group4[] = { |
3359 | I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45), | 3572 | I(ByteOp | DstMem | SrcNone | Lock, em_grp45), |
3360 | I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45), | 3573 | I(ByteOp | DstMem | SrcNone | Lock, em_grp45), |
3361 | N, N, N, N, N, N, | 3574 | N, N, N, N, N, N, |
3362 | }; | 3575 | }; |
3363 | 3576 | ||
3364 | static struct opcode group5[] = { | 3577 | static struct opcode group5[] = { |
3365 | I(DstMem | SrcNone | ModRM | Lock, em_grp45), | 3578 | I(DstMem | SrcNone | Lock, em_grp45), |
3366 | I(DstMem | SrcNone | ModRM | Lock, em_grp45), | 3579 | I(DstMem | SrcNone | Lock, em_grp45), |
3367 | I(SrcMem | ModRM | Stack, em_grp45), | 3580 | I(SrcMem | Stack, em_grp45), |
3368 | I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far), | 3581 | I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), |
3369 | I(SrcMem | ModRM | Stack, em_grp45), | 3582 | I(SrcMem | Stack, em_grp45), |
3370 | I(SrcMemFAddr | ModRM | ImplicitOps, em_grp45), | 3583 | I(SrcMemFAddr | ImplicitOps, em_grp45), |
3371 | I(SrcMem | ModRM | Stack, em_grp45), N, | 3584 | I(SrcMem | Stack, em_grp45), N, |
3372 | }; | 3585 | }; |
3373 | 3586 | ||
3374 | static struct opcode group6[] = { | 3587 | static struct opcode group6[] = { |
3375 | DI(ModRM | Prot, sldt), | 3588 | DI(Prot, sldt), |
3376 | DI(ModRM | Prot, str), | 3589 | DI(Prot, str), |
3377 | DI(ModRM | Prot | Priv, lldt), | 3590 | II(Prot | Priv | SrcMem16, em_lldt, lldt), |
3378 | DI(ModRM | Prot | Priv, ltr), | 3591 | II(Prot | Priv | SrcMem16, em_ltr, ltr), |
3379 | N, N, N, N, | 3592 | N, N, N, N, |
3380 | }; | 3593 | }; |
3381 | 3594 | ||
3382 | static struct group_dual group7 = { { | 3595 | static struct group_dual group7 = { { |
3383 | DI(ModRM | Mov | DstMem | Priv, sgdt), | 3596 | II(Mov | DstMem | Priv, em_sgdt, sgdt), |
3384 | DI(ModRM | Mov | DstMem | Priv, sidt), | 3597 | II(Mov | DstMem | Priv, em_sidt, sidt), |
3385 | II(ModRM | SrcMem | Priv, em_lgdt, lgdt), | 3598 | II(SrcMem | Priv, em_lgdt, lgdt), |
3386 | II(ModRM | SrcMem | Priv, em_lidt, lidt), | 3599 | II(SrcMem | Priv, em_lidt, lidt), |
3387 | II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, | 3600 | II(SrcNone | DstMem | Mov, em_smsw, smsw), N, |
3388 | II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), | 3601 | II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), |
3389 | II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg), | 3602 | II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg), |
3390 | }, { | 3603 | }, { |
3391 | I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall), | 3604 | I(SrcNone | Priv | VendorSpecific, em_vmcall), |
3392 | EXT(0, group7_rm1), | 3605 | EXT(0, group7_rm1), |
3393 | N, EXT(0, group7_rm3), | 3606 | N, EXT(0, group7_rm3), |
3394 | II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, | 3607 | II(SrcNone | DstMem | Mov, em_smsw, smsw), N, |
3395 | II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7), | 3608 | II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), |
3609 | EXT(0, group7_rm7), | ||
3396 | } }; | 3610 | } }; |
3397 | 3611 | ||
3398 | static struct opcode group8[] = { | 3612 | static struct opcode group8[] = { |
3399 | N, N, N, N, | 3613 | N, N, N, N, |
3400 | I(DstMem | SrcImmByte | ModRM, em_bt), | 3614 | I(DstMem | SrcImmByte, em_bt), |
3401 | I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_bts), | 3615 | I(DstMem | SrcImmByte | Lock | PageTable, em_bts), |
3402 | I(DstMem | SrcImmByte | ModRM | Lock, em_btr), | 3616 | I(DstMem | SrcImmByte | Lock, em_btr), |
3403 | I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_btc), | 3617 | I(DstMem | SrcImmByte | Lock | PageTable, em_btc), |
3404 | }; | 3618 | }; |
3405 | 3619 | ||
3406 | static struct group_dual group9 = { { | 3620 | static struct group_dual group9 = { { |
3407 | N, I(DstMem64 | ModRM | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, | 3621 | N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, |
3408 | }, { | 3622 | }, { |
3409 | N, N, N, N, N, N, N, N, | 3623 | N, N, N, N, N, N, N, N, |
3410 | } }; | 3624 | } }; |
3411 | 3625 | ||
3412 | static struct opcode group11[] = { | 3626 | static struct opcode group11[] = { |
3413 | I(DstMem | SrcImm | ModRM | Mov | PageTable, em_mov), | 3627 | I(DstMem | SrcImm | Mov | PageTable, em_mov), |
3414 | X7(D(Undefined)), | 3628 | X7(D(Undefined)), |
3415 | }; | 3629 | }; |
3416 | 3630 | ||
3417 | static struct gprefix pfx_0f_6f_0f_7f = { | 3631 | static struct gprefix pfx_0f_6f_0f_7f = { |
3418 | N, N, N, I(Sse, em_movdqu), | 3632 | I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), |
3633 | }; | ||
3634 | |||
3635 | static struct gprefix pfx_vmovntpx = { | ||
3636 | I(0, em_mov), N, N, N, | ||
3419 | }; | 3637 | }; |
3420 | 3638 | ||
3421 | static struct opcode opcode_table[256] = { | 3639 | static struct opcode opcode_table[256] = { |
@@ -3464,10 +3682,10 @@ static struct opcode opcode_table[256] = { | |||
3464 | /* 0x70 - 0x7F */ | 3682 | /* 0x70 - 0x7F */ |
3465 | X16(D(SrcImmByte)), | 3683 | X16(D(SrcImmByte)), |
3466 | /* 0x80 - 0x87 */ | 3684 | /* 0x80 - 0x87 */ |
3467 | G(ByteOp | DstMem | SrcImm | ModRM | Group, group1), | 3685 | G(ByteOp | DstMem | SrcImm, group1), |
3468 | G(DstMem | SrcImm | ModRM | Group, group1), | 3686 | G(DstMem | SrcImm, group1), |
3469 | G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1), | 3687 | G(ByteOp | DstMem | SrcImm | No64, group1), |
3470 | G(DstMem | SrcImmByte | ModRM | Group, group1), | 3688 | G(DstMem | SrcImmByte, group1), |
3471 | I2bv(DstMem | SrcReg | ModRM, em_test), | 3689 | I2bv(DstMem | SrcReg | ModRM, em_test), |
3472 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), | 3690 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), |
3473 | /* 0x88 - 0x8F */ | 3691 | /* 0x88 - 0x8F */ |
@@ -3483,7 +3701,7 @@ static struct opcode opcode_table[256] = { | |||
3483 | D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), | 3701 | D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), |
3484 | I(SrcImmFAddr | No64, em_call_far), N, | 3702 | I(SrcImmFAddr | No64, em_call_far), N, |
3485 | II(ImplicitOps | Stack, em_pushf, pushf), | 3703 | II(ImplicitOps | Stack, em_pushf, pushf), |
3486 | II(ImplicitOps | Stack, em_popf, popf), N, N, | 3704 | II(ImplicitOps | Stack, em_popf, popf), N, I(ImplicitOps, em_lahf), |
3487 | /* 0xA0 - 0xA7 */ | 3705 | /* 0xA0 - 0xA7 */ |
3488 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), | 3706 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), |
3489 | I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), | 3707 | I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), |
@@ -3506,7 +3724,8 @@ static struct opcode opcode_table[256] = { | |||
3506 | I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), | 3724 | I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), |
3507 | G(ByteOp, group11), G(0, group11), | 3725 | G(ByteOp, group11), G(0, group11), |
3508 | /* 0xC8 - 0xCF */ | 3726 | /* 0xC8 - 0xCF */ |
3509 | N, N, N, I(ImplicitOps | Stack, em_ret_far), | 3727 | I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave), |
3728 | N, I(ImplicitOps | Stack, em_ret_far), | ||
3510 | D(ImplicitOps), DI(SrcImmByte, intn), | 3729 | D(ImplicitOps), DI(SrcImmByte, intn), |
3511 | D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), | 3730 | D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), |
3512 | /* 0xD0 - 0xD7 */ | 3731 | /* 0xD0 - 0xD7 */ |
@@ -3549,7 +3768,8 @@ static struct opcode twobyte_table[256] = { | |||
3549 | IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write), | 3768 | IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write), |
3550 | IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write), | 3769 | IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write), |
3551 | N, N, N, N, | 3770 | N, N, N, N, |
3552 | N, N, N, N, N, N, N, N, | 3771 | N, N, N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx), |
3772 | N, N, N, N, | ||
3553 | /* 0x30 - 0x3F */ | 3773 | /* 0x30 - 0x3F */ |
3554 | II(ImplicitOps | Priv, em_wrmsr, wrmsr), | 3774 | II(ImplicitOps | Priv, em_wrmsr, wrmsr), |
3555 | IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), | 3775 | IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), |
@@ -3579,7 +3799,7 @@ static struct opcode twobyte_table[256] = { | |||
3579 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), | 3799 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), |
3580 | /* 0xA0 - 0xA7 */ | 3800 | /* 0xA0 - 0xA7 */ |
3581 | I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), | 3801 | I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), |
3582 | DI(ImplicitOps, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt), | 3802 | II(ImplicitOps, em_cpuid, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt), |
3583 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3803 | D(DstMem | SrcReg | Src2ImmByte | ModRM), |
3584 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, | 3804 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, |
3585 | /* 0xA8 - 0xAF */ | 3805 | /* 0xA8 - 0xAF */ |
@@ -3602,11 +3822,12 @@ static struct opcode twobyte_table[256] = { | |||
3602 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), | 3822 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), |
3603 | I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), | 3823 | I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), |
3604 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 3824 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
3605 | /* 0xC0 - 0xCF */ | 3825 | /* 0xC0 - 0xC7 */ |
3606 | D2bv(DstMem | SrcReg | ModRM | Lock), | 3826 | D2bv(DstMem | SrcReg | ModRM | Lock), |
3607 | N, D(DstMem | SrcReg | ModRM | Mov), | 3827 | N, D(DstMem | SrcReg | ModRM | Mov), |
3608 | N, N, N, GD(0, &group9), | 3828 | N, N, N, GD(0, &group9), |
3609 | N, N, N, N, N, N, N, N, | 3829 | /* 0xC8 - 0xCF */ |
3830 | X8(I(DstReg, em_bswap)), | ||
3610 | /* 0xD0 - 0xDF */ | 3831 | /* 0xD0 - 0xDF */ |
3611 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, | 3832 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, |
3612 | /* 0xE0 - 0xEF */ | 3833 | /* 0xE0 - 0xEF */ |
@@ -3897,17 +4118,16 @@ done_prefixes: | |||
3897 | } | 4118 | } |
3898 | ctxt->d = opcode.flags; | 4119 | ctxt->d = opcode.flags; |
3899 | 4120 | ||
4121 | if (ctxt->d & ModRM) | ||
4122 | ctxt->modrm = insn_fetch(u8, ctxt); | ||
4123 | |||
3900 | while (ctxt->d & GroupMask) { | 4124 | while (ctxt->d & GroupMask) { |
3901 | switch (ctxt->d & GroupMask) { | 4125 | switch (ctxt->d & GroupMask) { |
3902 | case Group: | 4126 | case Group: |
3903 | ctxt->modrm = insn_fetch(u8, ctxt); | ||
3904 | --ctxt->_eip; | ||
3905 | goffset = (ctxt->modrm >> 3) & 7; | 4127 | goffset = (ctxt->modrm >> 3) & 7; |
3906 | opcode = opcode.u.group[goffset]; | 4128 | opcode = opcode.u.group[goffset]; |
3907 | break; | 4129 | break; |
3908 | case GroupDual: | 4130 | case GroupDual: |
3909 | ctxt->modrm = insn_fetch(u8, ctxt); | ||
3910 | --ctxt->_eip; | ||
3911 | goffset = (ctxt->modrm >> 3) & 7; | 4131 | goffset = (ctxt->modrm >> 3) & 7; |
3912 | if ((ctxt->modrm >> 6) == 3) | 4132 | if ((ctxt->modrm >> 6) == 3) |
3913 | opcode = opcode.u.gdual->mod3[goffset]; | 4133 | opcode = opcode.u.gdual->mod3[goffset]; |
@@ -3960,6 +4180,8 @@ done_prefixes: | |||
3960 | 4180 | ||
3961 | if (ctxt->d & Sse) | 4181 | if (ctxt->d & Sse) |
3962 | ctxt->op_bytes = 16; | 4182 | ctxt->op_bytes = 16; |
4183 | else if (ctxt->d & Mmx) | ||
4184 | ctxt->op_bytes = 8; | ||
3963 | 4185 | ||
3964 | /* ModRM and SIB bytes. */ | 4186 | /* ModRM and SIB bytes. */ |
3965 | if (ctxt->d & ModRM) { | 4187 | if (ctxt->d & ModRM) { |
@@ -4030,6 +4252,35 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) | |||
4030 | return false; | 4252 | return false; |
4031 | } | 4253 | } |
4032 | 4254 | ||
4255 | static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) | ||
4256 | { | ||
4257 | bool fault = false; | ||
4258 | |||
4259 | ctxt->ops->get_fpu(ctxt); | ||
4260 | asm volatile("1: fwait \n\t" | ||
4261 | "2: \n\t" | ||
4262 | ".pushsection .fixup,\"ax\" \n\t" | ||
4263 | "3: \n\t" | ||
4264 | "movb $1, %[fault] \n\t" | ||
4265 | "jmp 2b \n\t" | ||
4266 | ".popsection \n\t" | ||
4267 | _ASM_EXTABLE(1b, 3b) | ||
4268 | : [fault]"+qm"(fault)); | ||
4269 | ctxt->ops->put_fpu(ctxt); | ||
4270 | |||
4271 | if (unlikely(fault)) | ||
4272 | return emulate_exception(ctxt, MF_VECTOR, 0, false); | ||
4273 | |||
4274 | return X86EMUL_CONTINUE; | ||
4275 | } | ||
4276 | |||
4277 | static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, | ||
4278 | struct operand *op) | ||
4279 | { | ||
4280 | if (op->type == OP_MM) | ||
4281 | read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); | ||
4282 | } | ||
4283 | |||
4033 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | 4284 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) |
4034 | { | 4285 | { |
4035 | struct x86_emulate_ops *ops = ctxt->ops; | 4286 | struct x86_emulate_ops *ops = ctxt->ops; |
@@ -4054,18 +4305,31 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
4054 | goto done; | 4305 | goto done; |
4055 | } | 4306 | } |
4056 | 4307 | ||
4057 | if ((ctxt->d & Sse) | 4308 | if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) |
4058 | && ((ops->get_cr(ctxt, 0) & X86_CR0_EM) | 4309 | || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { |
4059 | || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { | ||
4060 | rc = emulate_ud(ctxt); | 4310 | rc = emulate_ud(ctxt); |
4061 | goto done; | 4311 | goto done; |
4062 | } | 4312 | } |
4063 | 4313 | ||
4064 | if ((ctxt->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { | 4314 | if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { |
4065 | rc = emulate_nm(ctxt); | 4315 | rc = emulate_nm(ctxt); |
4066 | goto done; | 4316 | goto done; |
4067 | } | 4317 | } |
4068 | 4318 | ||
4319 | if (ctxt->d & Mmx) { | ||
4320 | rc = flush_pending_x87_faults(ctxt); | ||
4321 | if (rc != X86EMUL_CONTINUE) | ||
4322 | goto done; | ||
4323 | /* | ||
4324 | * Now that we know the fpu is exception safe, we can fetch | ||
4325 | * operands from it. | ||
4326 | */ | ||
4327 | fetch_possible_mmx_operand(ctxt, &ctxt->src); | ||
4328 | fetch_possible_mmx_operand(ctxt, &ctxt->src2); | ||
4329 | if (!(ctxt->d & Mov)) | ||
4330 | fetch_possible_mmx_operand(ctxt, &ctxt->dst); | ||
4331 | } | ||
4332 | |||
4069 | if (unlikely(ctxt->guest_mode) && ctxt->intercept) { | 4333 | if (unlikely(ctxt->guest_mode) && ctxt->intercept) { |
4070 | rc = emulator_check_intercept(ctxt, ctxt->intercept, | 4334 | rc = emulator_check_intercept(ctxt, ctxt->intercept, |
4071 | X86_ICPT_PRE_EXCEPT); | 4335 | X86_ICPT_PRE_EXCEPT); |
@@ -4327,12 +4591,12 @@ twobyte_insn: | |||
4327 | break; | 4591 | break; |
4328 | case 0xb6 ... 0xb7: /* movzx */ | 4592 | case 0xb6 ... 0xb7: /* movzx */ |
4329 | ctxt->dst.bytes = ctxt->op_bytes; | 4593 | ctxt->dst.bytes = ctxt->op_bytes; |
4330 | ctxt->dst.val = (ctxt->d & ByteOp) ? (u8) ctxt->src.val | 4594 | ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val |
4331 | : (u16) ctxt->src.val; | 4595 | : (u16) ctxt->src.val; |
4332 | break; | 4596 | break; |
4333 | case 0xbe ... 0xbf: /* movsx */ | 4597 | case 0xbe ... 0xbf: /* movsx */ |
4334 | ctxt->dst.bytes = ctxt->op_bytes; | 4598 | ctxt->dst.bytes = ctxt->op_bytes; |
4335 | ctxt->dst.val = (ctxt->d & ByteOp) ? (s8) ctxt->src.val : | 4599 | ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val : |
4336 | (s16) ctxt->src.val; | 4600 | (s16) ctxt->src.val; |
4337 | break; | 4601 | break; |
4338 | case 0xc0 ... 0xc1: /* xadd */ | 4602 | case 0xc0 ... 0xc1: /* xadd */ |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index d68f99df690..adba28f88d1 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -34,7 +34,6 @@ | |||
34 | 34 | ||
35 | #include <linux/kvm_host.h> | 35 | #include <linux/kvm_host.h> |
36 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
37 | #include <linux/workqueue.h> | ||
38 | 37 | ||
39 | #include "irq.h" | 38 | #include "irq.h" |
40 | #include "i8254.h" | 39 | #include "i8254.h" |
@@ -249,7 +248,7 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
249 | /* in this case, we had multiple outstanding pit interrupts | 248 | /* in this case, we had multiple outstanding pit interrupts |
250 | * that we needed to inject. Reinject | 249 | * that we needed to inject. Reinject |
251 | */ | 250 | */ |
252 | queue_work(ps->pit->wq, &ps->pit->expired); | 251 | queue_kthread_work(&ps->pit->worker, &ps->pit->expired); |
253 | ps->irq_ack = 1; | 252 | ps->irq_ack = 1; |
254 | spin_unlock(&ps->inject_lock); | 253 | spin_unlock(&ps->inject_lock); |
255 | } | 254 | } |
@@ -270,7 +269,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | |||
270 | static void destroy_pit_timer(struct kvm_pit *pit) | 269 | static void destroy_pit_timer(struct kvm_pit *pit) |
271 | { | 270 | { |
272 | hrtimer_cancel(&pit->pit_state.pit_timer.timer); | 271 | hrtimer_cancel(&pit->pit_state.pit_timer.timer); |
273 | cancel_work_sync(&pit->expired); | 272 | flush_kthread_work(&pit->expired); |
274 | } | 273 | } |
275 | 274 | ||
276 | static bool kpit_is_periodic(struct kvm_timer *ktimer) | 275 | static bool kpit_is_periodic(struct kvm_timer *ktimer) |
@@ -284,7 +283,7 @@ static struct kvm_timer_ops kpit_ops = { | |||
284 | .is_periodic = kpit_is_periodic, | 283 | .is_periodic = kpit_is_periodic, |
285 | }; | 284 | }; |
286 | 285 | ||
287 | static void pit_do_work(struct work_struct *work) | 286 | static void pit_do_work(struct kthread_work *work) |
288 | { | 287 | { |
289 | struct kvm_pit *pit = container_of(work, struct kvm_pit, expired); | 288 | struct kvm_pit *pit = container_of(work, struct kvm_pit, expired); |
290 | struct kvm *kvm = pit->kvm; | 289 | struct kvm *kvm = pit->kvm; |
@@ -328,7 +327,7 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) | |||
328 | 327 | ||
329 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { | 328 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { |
330 | atomic_inc(&ktimer->pending); | 329 | atomic_inc(&ktimer->pending); |
331 | queue_work(pt->wq, &pt->expired); | 330 | queue_kthread_work(&pt->worker, &pt->expired); |
332 | } | 331 | } |
333 | 332 | ||
334 | if (ktimer->t_ops->is_periodic(ktimer)) { | 333 | if (ktimer->t_ops->is_periodic(ktimer)) { |
@@ -353,7 +352,7 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) | |||
353 | 352 | ||
354 | /* TODO The new value only affected after the retriggered */ | 353 | /* TODO The new value only affected after the retriggered */ |
355 | hrtimer_cancel(&pt->timer); | 354 | hrtimer_cancel(&pt->timer); |
356 | cancel_work_sync(&ps->pit->expired); | 355 | flush_kthread_work(&ps->pit->expired); |
357 | pt->period = interval; | 356 | pt->period = interval; |
358 | ps->is_periodic = is_period; | 357 | ps->is_periodic = is_period; |
359 | 358 | ||
@@ -669,6 +668,8 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
669 | { | 668 | { |
670 | struct kvm_pit *pit; | 669 | struct kvm_pit *pit; |
671 | struct kvm_kpit_state *pit_state; | 670 | struct kvm_kpit_state *pit_state; |
671 | struct pid *pid; | ||
672 | pid_t pid_nr; | ||
672 | int ret; | 673 | int ret; |
673 | 674 | ||
674 | pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL); | 675 | pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL); |
@@ -685,14 +686,20 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
685 | mutex_lock(&pit->pit_state.lock); | 686 | mutex_lock(&pit->pit_state.lock); |
686 | spin_lock_init(&pit->pit_state.inject_lock); | 687 | spin_lock_init(&pit->pit_state.inject_lock); |
687 | 688 | ||
688 | pit->wq = create_singlethread_workqueue("kvm-pit-wq"); | 689 | pid = get_pid(task_tgid(current)); |
689 | if (!pit->wq) { | 690 | pid_nr = pid_vnr(pid); |
691 | put_pid(pid); | ||
692 | |||
693 | init_kthread_worker(&pit->worker); | ||
694 | pit->worker_task = kthread_run(kthread_worker_fn, &pit->worker, | ||
695 | "kvm-pit/%d", pid_nr); | ||
696 | if (IS_ERR(pit->worker_task)) { | ||
690 | mutex_unlock(&pit->pit_state.lock); | 697 | mutex_unlock(&pit->pit_state.lock); |
691 | kvm_free_irq_source_id(kvm, pit->irq_source_id); | 698 | kvm_free_irq_source_id(kvm, pit->irq_source_id); |
692 | kfree(pit); | 699 | kfree(pit); |
693 | return NULL; | 700 | return NULL; |
694 | } | 701 | } |
695 | INIT_WORK(&pit->expired, pit_do_work); | 702 | init_kthread_work(&pit->expired, pit_do_work); |
696 | 703 | ||
697 | kvm->arch.vpit = pit; | 704 | kvm->arch.vpit = pit; |
698 | pit->kvm = kvm; | 705 | pit->kvm = kvm; |
@@ -736,7 +743,7 @@ fail: | |||
736 | kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier); | 743 | kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier); |
737 | kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); | 744 | kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); |
738 | kvm_free_irq_source_id(kvm, pit->irq_source_id); | 745 | kvm_free_irq_source_id(kvm, pit->irq_source_id); |
739 | destroy_workqueue(pit->wq); | 746 | kthread_stop(pit->worker_task); |
740 | kfree(pit); | 747 | kfree(pit); |
741 | return NULL; | 748 | return NULL; |
742 | } | 749 | } |
@@ -756,10 +763,10 @@ void kvm_free_pit(struct kvm *kvm) | |||
756 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 763 | mutex_lock(&kvm->arch.vpit->pit_state.lock); |
757 | timer = &kvm->arch.vpit->pit_state.pit_timer.timer; | 764 | timer = &kvm->arch.vpit->pit_state.pit_timer.timer; |
758 | hrtimer_cancel(timer); | 765 | hrtimer_cancel(timer); |
759 | cancel_work_sync(&kvm->arch.vpit->expired); | 766 | flush_kthread_work(&kvm->arch.vpit->expired); |
767 | kthread_stop(kvm->arch.vpit->worker_task); | ||
760 | kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); | 768 | kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); |
761 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 769 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); |
762 | destroy_workqueue(kvm->arch.vpit->wq); | ||
763 | kfree(kvm->arch.vpit); | 770 | kfree(kvm->arch.vpit); |
764 | } | 771 | } |
765 | } | 772 | } |
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 51a97426e79..fdf40425ea1 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef __I8254_H | 1 | #ifndef __I8254_H |
2 | #define __I8254_H | 2 | #define __I8254_H |
3 | 3 | ||
4 | #include <linux/kthread.h> | ||
5 | |||
4 | #include "iodev.h" | 6 | #include "iodev.h" |
5 | 7 | ||
6 | struct kvm_kpit_channel_state { | 8 | struct kvm_kpit_channel_state { |
@@ -39,8 +41,9 @@ struct kvm_pit { | |||
39 | struct kvm_kpit_state pit_state; | 41 | struct kvm_kpit_state pit_state; |
40 | int irq_source_id; | 42 | int irq_source_id; |
41 | struct kvm_irq_mask_notifier mask_notifier; | 43 | struct kvm_irq_mask_notifier mask_notifier; |
42 | struct workqueue_struct *wq; | 44 | struct kthread_worker worker; |
43 | struct work_struct expired; | 45 | struct task_struct *worker_task; |
46 | struct kthread_work expired; | ||
44 | }; | 47 | }; |
45 | 48 | ||
46 | #define KVM_PIT_BASE_ADDRESS 0x40 | 49 | #define KVM_PIT_BASE_ADDRESS 0x40 |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 81cf4fa4a2b..1df8fb9e1d5 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -188,14 +188,15 @@ void kvm_pic_update_irq(struct kvm_pic *s) | |||
188 | pic_unlock(s); | 188 | pic_unlock(s); |
189 | } | 189 | } |
190 | 190 | ||
191 | int kvm_pic_set_irq(void *opaque, int irq, int level) | 191 | int kvm_pic_set_irq(struct kvm_pic *s, int irq, int irq_source_id, int level) |
192 | { | 192 | { |
193 | struct kvm_pic *s = opaque; | ||
194 | int ret = -1; | 193 | int ret = -1; |
195 | 194 | ||
196 | pic_lock(s); | 195 | pic_lock(s); |
197 | if (irq >= 0 && irq < PIC_NUM_PINS) { | 196 | if (irq >= 0 && irq < PIC_NUM_PINS) { |
198 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); | 197 | int irq_level = __kvm_irq_line_state(&s->irq_states[irq], |
198 | irq_source_id, level); | ||
199 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level); | ||
199 | pic_update_irq(s); | 200 | pic_update_irq(s); |
200 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, | 201 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, |
201 | s->pics[irq >> 3].imr, ret == 0); | 202 | s->pics[irq >> 3].imr, ret == 0); |
@@ -205,6 +206,16 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) | |||
205 | return ret; | 206 | return ret; |
206 | } | 207 | } |
207 | 208 | ||
209 | void kvm_pic_clear_all(struct kvm_pic *s, int irq_source_id) | ||
210 | { | ||
211 | int i; | ||
212 | |||
213 | pic_lock(s); | ||
214 | for (i = 0; i < PIC_NUM_PINS; i++) | ||
215 | __clear_bit(irq_source_id, &s->irq_states[i]); | ||
216 | pic_unlock(s); | ||
217 | } | ||
218 | |||
208 | /* | 219 | /* |
209 | * acknowledge interrupt 'irq' | 220 | * acknowledge interrupt 'irq' |
210 | */ | 221 | */ |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 858432287ab..ce878788a39 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -92,6 +92,11 @@ static inline int apic_test_and_clear_vector(int vec, void *bitmap) | |||
92 | return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 92 | return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
93 | } | 93 | } |
94 | 94 | ||
95 | static inline int apic_test_vector(int vec, void *bitmap) | ||
96 | { | ||
97 | return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
98 | } | ||
99 | |||
95 | static inline void apic_set_vector(int vec, void *bitmap) | 100 | static inline void apic_set_vector(int vec, void *bitmap) |
96 | { | 101 | { |
97 | set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 102 | set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
@@ -102,6 +107,16 @@ static inline void apic_clear_vector(int vec, void *bitmap) | |||
102 | clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 107 | clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
103 | } | 108 | } |
104 | 109 | ||
110 | static inline int __apic_test_and_set_vector(int vec, void *bitmap) | ||
111 | { | ||
112 | return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
113 | } | ||
114 | |||
115 | static inline int __apic_test_and_clear_vector(int vec, void *bitmap) | ||
116 | { | ||
117 | return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
118 | } | ||
119 | |||
105 | static inline int apic_hw_enabled(struct kvm_lapic *apic) | 120 | static inline int apic_hw_enabled(struct kvm_lapic *apic) |
106 | { | 121 | { |
107 | return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; | 122 | return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; |
@@ -205,6 +220,16 @@ static int find_highest_vector(void *bitmap) | |||
205 | return fls(word[word_offset << 2]) - 1 + (word_offset << 5); | 220 | return fls(word[word_offset << 2]) - 1 + (word_offset << 5); |
206 | } | 221 | } |
207 | 222 | ||
223 | static u8 count_vectors(void *bitmap) | ||
224 | { | ||
225 | u32 *word = bitmap; | ||
226 | int word_offset; | ||
227 | u8 count = 0; | ||
228 | for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset) | ||
229 | count += hweight32(word[word_offset << 2]); | ||
230 | return count; | ||
231 | } | ||
232 | |||
208 | static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) | 233 | static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) |
209 | { | 234 | { |
210 | apic->irr_pending = true; | 235 | apic->irr_pending = true; |
@@ -237,6 +262,27 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) | |||
237 | apic->irr_pending = true; | 262 | apic->irr_pending = true; |
238 | } | 263 | } |
239 | 264 | ||
265 | static inline void apic_set_isr(int vec, struct kvm_lapic *apic) | ||
266 | { | ||
267 | if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) | ||
268 | ++apic->isr_count; | ||
269 | BUG_ON(apic->isr_count > MAX_APIC_VECTOR); | ||
270 | /* | ||
271 | * ISR (in service register) bit is set when injecting an interrupt. | ||
272 | * The highest vector is injected. Thus the latest bit set matches | ||
273 | * the highest bit in ISR. | ||
274 | */ | ||
275 | apic->highest_isr_cache = vec; | ||
276 | } | ||
277 | |||
278 | static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) | ||
279 | { | ||
280 | if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) | ||
281 | --apic->isr_count; | ||
282 | BUG_ON(apic->isr_count < 0); | ||
283 | apic->highest_isr_cache = -1; | ||
284 | } | ||
285 | |||
240 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) | 286 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) |
241 | { | 287 | { |
242 | struct kvm_lapic *apic = vcpu->arch.apic; | 288 | struct kvm_lapic *apic = vcpu->arch.apic; |
@@ -265,9 +311,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) | |||
265 | irq->level, irq->trig_mode); | 311 | irq->level, irq->trig_mode); |
266 | } | 312 | } |
267 | 313 | ||
314 | static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) | ||
315 | { | ||
316 | |||
317 | return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, | ||
318 | sizeof(val)); | ||
319 | } | ||
320 | |||
321 | static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) | ||
322 | { | ||
323 | |||
324 | return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, | ||
325 | sizeof(*val)); | ||
326 | } | ||
327 | |||
328 | static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) | ||
329 | { | ||
330 | return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; | ||
331 | } | ||
332 | |||
333 | static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) | ||
334 | { | ||
335 | u8 val; | ||
336 | if (pv_eoi_get_user(vcpu, &val) < 0) | ||
337 | apic_debug("Can't read EOI MSR value: 0x%llx\n", | ||
338 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | ||
339 | return val & 0x1; | ||
340 | } | ||
341 | |||
342 | static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) | ||
343 | { | ||
344 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { | ||
345 | apic_debug("Can't set EOI MSR value: 0x%llx\n", | ||
346 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | ||
347 | return; | ||
348 | } | ||
349 | __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | ||
350 | } | ||
351 | |||
352 | static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) | ||
353 | { | ||
354 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { | ||
355 | apic_debug("Can't clear EOI MSR value: 0x%llx\n", | ||
356 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | ||
357 | return; | ||
358 | } | ||
359 | __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | ||
360 | } | ||
361 | |||
268 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) | 362 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) |
269 | { | 363 | { |
270 | int result; | 364 | int result; |
365 | if (!apic->isr_count) | ||
366 | return -1; | ||
367 | if (likely(apic->highest_isr_cache != -1)) | ||
368 | return apic->highest_isr_cache; | ||
271 | 369 | ||
272 | result = find_highest_vector(apic->regs + APIC_ISR); | 370 | result = find_highest_vector(apic->regs + APIC_ISR); |
273 | ASSERT(result == -1 || result >= 16); | 371 | ASSERT(result == -1 || result >= 16); |
@@ -477,27 +575,33 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) | |||
477 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; | 575 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; |
478 | } | 576 | } |
479 | 577 | ||
480 | static void apic_set_eoi(struct kvm_lapic *apic) | 578 | static int apic_set_eoi(struct kvm_lapic *apic) |
481 | { | 579 | { |
482 | int vector = apic_find_highest_isr(apic); | 580 | int vector = apic_find_highest_isr(apic); |
483 | int trigger_mode; | 581 | |
582 | trace_kvm_eoi(apic, vector); | ||
583 | |||
484 | /* | 584 | /* |
485 | * Not every write EOI will has corresponding ISR, | 585 | * Not every write EOI will has corresponding ISR, |
486 | * one example is when Kernel check timer on setup_IO_APIC | 586 | * one example is when Kernel check timer on setup_IO_APIC |
487 | */ | 587 | */ |
488 | if (vector == -1) | 588 | if (vector == -1) |
489 | return; | 589 | return vector; |
490 | 590 | ||
491 | apic_clear_vector(vector, apic->regs + APIC_ISR); | 591 | apic_clear_isr(vector, apic); |
492 | apic_update_ppr(apic); | 592 | apic_update_ppr(apic); |
493 | 593 | ||
494 | if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR)) | 594 | if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && |
495 | trigger_mode = IOAPIC_LEVEL_TRIG; | 595 | kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { |
496 | else | 596 | int trigger_mode; |
497 | trigger_mode = IOAPIC_EDGE_TRIG; | 597 | if (apic_test_vector(vector, apic->regs + APIC_TMR)) |
498 | if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) | 598 | trigger_mode = IOAPIC_LEVEL_TRIG; |
599 | else | ||
600 | trigger_mode = IOAPIC_EDGE_TRIG; | ||
499 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | 601 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); |
602 | } | ||
500 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); | 603 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); |
604 | return vector; | ||
501 | } | 605 | } |
502 | 606 | ||
503 | static void apic_send_ipi(struct kvm_lapic *apic) | 607 | static void apic_send_ipi(struct kvm_lapic *apic) |
@@ -1074,13 +1178,17 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
1074 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); | 1178 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); |
1075 | } | 1179 | } |
1076 | apic->irr_pending = false; | 1180 | apic->irr_pending = false; |
1181 | apic->isr_count = 0; | ||
1182 | apic->highest_isr_cache = -1; | ||
1077 | update_divide_count(apic); | 1183 | update_divide_count(apic); |
1078 | atomic_set(&apic->lapic_timer.pending, 0); | 1184 | atomic_set(&apic->lapic_timer.pending, 0); |
1079 | if (kvm_vcpu_is_bsp(vcpu)) | 1185 | if (kvm_vcpu_is_bsp(vcpu)) |
1080 | vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; | 1186 | vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; |
1187 | vcpu->arch.pv_eoi.msr_val = 0; | ||
1081 | apic_update_ppr(apic); | 1188 | apic_update_ppr(apic); |
1082 | 1189 | ||
1083 | vcpu->arch.apic_arb_prio = 0; | 1190 | vcpu->arch.apic_arb_prio = 0; |
1191 | vcpu->arch.apic_attention = 0; | ||
1084 | 1192 | ||
1085 | apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" | 1193 | apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" |
1086 | "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, | 1194 | "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, |
@@ -1240,7 +1348,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) | |||
1240 | if (vector == -1) | 1348 | if (vector == -1) |
1241 | return -1; | 1349 | return -1; |
1242 | 1350 | ||
1243 | apic_set_vector(vector, apic->regs + APIC_ISR); | 1351 | apic_set_isr(vector, apic); |
1244 | apic_update_ppr(apic); | 1352 | apic_update_ppr(apic); |
1245 | apic_clear_irr(vector, apic); | 1353 | apic_clear_irr(vector, apic); |
1246 | return vector; | 1354 | return vector; |
@@ -1259,6 +1367,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) | |||
1259 | update_divide_count(apic); | 1367 | update_divide_count(apic); |
1260 | start_apic_timer(apic); | 1368 | start_apic_timer(apic); |
1261 | apic->irr_pending = true; | 1369 | apic->irr_pending = true; |
1370 | apic->isr_count = count_vectors(apic->regs + APIC_ISR); | ||
1371 | apic->highest_isr_cache = -1; | ||
1262 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 1372 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
1263 | } | 1373 | } |
1264 | 1374 | ||
@@ -1275,12 +1385,52 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | |||
1275 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 1385 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
1276 | } | 1386 | } |
1277 | 1387 | ||
1388 | /* | ||
1389 | * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt | ||
1390 | * | ||
1391 | * Detect whether guest triggered PV EOI since the | ||
1392 | * last entry. If yes, set EOI on guests's behalf. | ||
1393 | * Clear PV EOI in guest memory in any case. | ||
1394 | */ | ||
1395 | static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, | ||
1396 | struct kvm_lapic *apic) | ||
1397 | { | ||
1398 | bool pending; | ||
1399 | int vector; | ||
1400 | /* | ||
1401 | * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host | ||
1402 | * and KVM_PV_EOI_ENABLED in guest memory as follows: | ||
1403 | * | ||
1404 | * KVM_APIC_PV_EOI_PENDING is unset: | ||
1405 | * -> host disabled PV EOI. | ||
1406 | * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: | ||
1407 | * -> host enabled PV EOI, guest did not execute EOI yet. | ||
1408 | * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: | ||
1409 | * -> host enabled PV EOI, guest executed EOI. | ||
1410 | */ | ||
1411 | BUG_ON(!pv_eoi_enabled(vcpu)); | ||
1412 | pending = pv_eoi_get_pending(vcpu); | ||
1413 | /* | ||
1414 | * Clear pending bit in any case: it will be set again on vmentry. | ||
1415 | * While this might not be ideal from performance point of view, | ||
1416 | * this makes sure pv eoi is only enabled when we know it's safe. | ||
1417 | */ | ||
1418 | pv_eoi_clr_pending(vcpu); | ||
1419 | if (pending) | ||
1420 | return; | ||
1421 | vector = apic_set_eoi(apic); | ||
1422 | trace_kvm_pv_eoi(apic, vector); | ||
1423 | } | ||
1424 | |||
1278 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) | 1425 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) |
1279 | { | 1426 | { |
1280 | u32 data; | 1427 | u32 data; |
1281 | void *vapic; | 1428 | void *vapic; |
1282 | 1429 | ||
1283 | if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr) | 1430 | if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) |
1431 | apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); | ||
1432 | |||
1433 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) | ||
1284 | return; | 1434 | return; |
1285 | 1435 | ||
1286 | vapic = kmap_atomic(vcpu->arch.apic->vapic_page); | 1436 | vapic = kmap_atomic(vcpu->arch.apic->vapic_page); |
@@ -1290,17 +1440,44 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) | |||
1290 | apic_set_tpr(vcpu->arch.apic, data & 0xff); | 1440 | apic_set_tpr(vcpu->arch.apic, data & 0xff); |
1291 | } | 1441 | } |
1292 | 1442 | ||
1443 | /* | ||
1444 | * apic_sync_pv_eoi_to_guest - called before vmentry | ||
1445 | * | ||
1446 | * Detect whether it's safe to enable PV EOI and | ||
1447 | * if yes do so. | ||
1448 | */ | ||
1449 | static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, | ||
1450 | struct kvm_lapic *apic) | ||
1451 | { | ||
1452 | if (!pv_eoi_enabled(vcpu) || | ||
1453 | /* IRR set or many bits in ISR: could be nested. */ | ||
1454 | apic->irr_pending || | ||
1455 | /* Cache not set: could be safe but we don't bother. */ | ||
1456 | apic->highest_isr_cache == -1 || | ||
1457 | /* Need EOI to update ioapic. */ | ||
1458 | kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { | ||
1459 | /* | ||
1460 | * PV EOI was disabled by apic_sync_pv_eoi_from_guest | ||
1461 | * so we need not do anything here. | ||
1462 | */ | ||
1463 | return; | ||
1464 | } | ||
1465 | |||
1466 | pv_eoi_set_pending(apic->vcpu); | ||
1467 | } | ||
1468 | |||
1293 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) | 1469 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) |
1294 | { | 1470 | { |
1295 | u32 data, tpr; | 1471 | u32 data, tpr; |
1296 | int max_irr, max_isr; | 1472 | int max_irr, max_isr; |
1297 | struct kvm_lapic *apic; | 1473 | struct kvm_lapic *apic = vcpu->arch.apic; |
1298 | void *vapic; | 1474 | void *vapic; |
1299 | 1475 | ||
1300 | if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr) | 1476 | apic_sync_pv_eoi_to_guest(vcpu, apic); |
1477 | |||
1478 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) | ||
1301 | return; | 1479 | return; |
1302 | 1480 | ||
1303 | apic = vcpu->arch.apic; | ||
1304 | tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; | 1481 | tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; |
1305 | max_irr = apic_find_highest_irr(apic); | 1482 | max_irr = apic_find_highest_irr(apic); |
1306 | if (max_irr < 0) | 1483 | if (max_irr < 0) |
@@ -1317,10 +1494,11 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) | |||
1317 | 1494 | ||
1318 | void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) | 1495 | void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) |
1319 | { | 1496 | { |
1320 | if (!irqchip_in_kernel(vcpu->kvm)) | ||
1321 | return; | ||
1322 | |||
1323 | vcpu->arch.apic->vapic_addr = vapic_addr; | 1497 | vcpu->arch.apic->vapic_addr = vapic_addr; |
1498 | if (vapic_addr) | ||
1499 | __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); | ||
1500 | else | ||
1501 | __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); | ||
1324 | } | 1502 | } |
1325 | 1503 | ||
1326 | int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1504 | int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
@@ -1385,3 +1563,16 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) | |||
1385 | 1563 | ||
1386 | return 0; | 1564 | return 0; |
1387 | } | 1565 | } |
1566 | |||
1567 | int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) | ||
1568 | { | ||
1569 | u64 addr = data & ~KVM_MSR_ENABLED; | ||
1570 | if (!IS_ALIGNED(addr, 4)) | ||
1571 | return 1; | ||
1572 | |||
1573 | vcpu->arch.pv_eoi.msr_val = data; | ||
1574 | if (!pv_eoi_enabled(vcpu)) | ||
1575 | return 0; | ||
1576 | return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, | ||
1577 | addr); | ||
1578 | } | ||
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 6f4ce2575d0..4af5405ae1e 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -13,6 +13,15 @@ struct kvm_lapic { | |||
13 | u32 divide_count; | 13 | u32 divide_count; |
14 | struct kvm_vcpu *vcpu; | 14 | struct kvm_vcpu *vcpu; |
15 | bool irr_pending; | 15 | bool irr_pending; |
16 | /* Number of bits set in ISR. */ | ||
17 | s16 isr_count; | ||
18 | /* The highest vector set in ISR; if -1 - invalid, must scan ISR. */ | ||
19 | int highest_isr_cache; | ||
20 | /** | ||
21 | * APIC register page. The layout matches the register layout seen by | ||
22 | * the guest 1:1, because it is accessed by the vmx microcode. | ||
23 | * Note: Only one register, the TPR, is used by the microcode. | ||
24 | */ | ||
16 | void *regs; | 25 | void *regs; |
17 | gpa_t vapic_addr; | 26 | gpa_t vapic_addr; |
18 | struct page *vapic_page; | 27 | struct page *vapic_page; |
@@ -60,4 +69,6 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) | |||
60 | { | 69 | { |
61 | return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; | 70 | return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; |
62 | } | 71 | } |
72 | |||
73 | int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); | ||
63 | #endif | 74 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4cb16426884..01ca0042393 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -90,7 +90,7 @@ module_param(dbg, bool, 0644); | |||
90 | 90 | ||
91 | #define PTE_PREFETCH_NUM 8 | 91 | #define PTE_PREFETCH_NUM 8 |
92 | 92 | ||
93 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 | 93 | #define PT_FIRST_AVAIL_BITS_SHIFT 10 |
94 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | 94 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 |
95 | 95 | ||
96 | #define PT64_LEVEL_BITS 9 | 96 | #define PT64_LEVEL_BITS 9 |
@@ -135,8 +135,6 @@ module_param(dbg, bool, 0644); | |||
135 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ | 135 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ |
136 | | PT64_NX_MASK) | 136 | | PT64_NX_MASK) |
137 | 137 | ||
138 | #define PTE_LIST_EXT 4 | ||
139 | |||
140 | #define ACC_EXEC_MASK 1 | 138 | #define ACC_EXEC_MASK 1 |
141 | #define ACC_WRITE_MASK PT_WRITABLE_MASK | 139 | #define ACC_WRITE_MASK PT_WRITABLE_MASK |
142 | #define ACC_USER_MASK PT_USER_MASK | 140 | #define ACC_USER_MASK PT_USER_MASK |
@@ -147,10 +145,14 @@ module_param(dbg, bool, 0644); | |||
147 | #define CREATE_TRACE_POINTS | 145 | #define CREATE_TRACE_POINTS |
148 | #include "mmutrace.h" | 146 | #include "mmutrace.h" |
149 | 147 | ||
150 | #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | 148 | #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) |
149 | #define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) | ||
151 | 150 | ||
152 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | 151 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) |
153 | 152 | ||
153 | /* make pte_list_desc fit well in cache line */ | ||
154 | #define PTE_LIST_EXT 3 | ||
155 | |||
154 | struct pte_list_desc { | 156 | struct pte_list_desc { |
155 | u64 *sptes[PTE_LIST_EXT]; | 157 | u64 *sptes[PTE_LIST_EXT]; |
156 | struct pte_list_desc *more; | 158 | struct pte_list_desc *more; |
@@ -187,6 +189,7 @@ static u64 __read_mostly shadow_dirty_mask; | |||
187 | static u64 __read_mostly shadow_mmio_mask; | 189 | static u64 __read_mostly shadow_mmio_mask; |
188 | 190 | ||
189 | static void mmu_spte_set(u64 *sptep, u64 spte); | 191 | static void mmu_spte_set(u64 *sptep, u64 spte); |
192 | static void mmu_free_roots(struct kvm_vcpu *vcpu); | ||
190 | 193 | ||
191 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) | 194 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) |
192 | { | 195 | { |
@@ -443,8 +446,22 @@ static bool __check_direct_spte_mmio_pf(u64 spte) | |||
443 | } | 446 | } |
444 | #endif | 447 | #endif |
445 | 448 | ||
449 | static bool spte_is_locklessly_modifiable(u64 spte) | ||
450 | { | ||
451 | return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)); | ||
452 | } | ||
453 | |||
446 | static bool spte_has_volatile_bits(u64 spte) | 454 | static bool spte_has_volatile_bits(u64 spte) |
447 | { | 455 | { |
456 | /* | ||
457 | * Always atomicly update spte if it can be updated | ||
458 | * out of mmu-lock, it can ensure dirty bit is not lost, | ||
459 | * also, it can help us to get a stable is_writable_pte() | ||
460 | * to ensure tlb flush is not missed. | ||
461 | */ | ||
462 | if (spte_is_locklessly_modifiable(spte)) | ||
463 | return true; | ||
464 | |||
448 | if (!shadow_accessed_mask) | 465 | if (!shadow_accessed_mask) |
449 | return false; | 466 | return false; |
450 | 467 | ||
@@ -477,34 +494,47 @@ static void mmu_spte_set(u64 *sptep, u64 new_spte) | |||
477 | 494 | ||
478 | /* Rules for using mmu_spte_update: | 495 | /* Rules for using mmu_spte_update: |
479 | * Update the state bits, it means the mapped pfn is not changged. | 496 | * Update the state bits, it means the mapped pfn is not changged. |
497 | * | ||
498 | * Whenever we overwrite a writable spte with a read-only one we | ||
499 | * should flush remote TLBs. Otherwise rmap_write_protect | ||
500 | * will find a read-only spte, even though the writable spte | ||
501 | * might be cached on a CPU's TLB, the return value indicates this | ||
502 | * case. | ||
480 | */ | 503 | */ |
481 | static void mmu_spte_update(u64 *sptep, u64 new_spte) | 504 | static bool mmu_spte_update(u64 *sptep, u64 new_spte) |
482 | { | 505 | { |
483 | u64 mask, old_spte = *sptep; | 506 | u64 old_spte = *sptep; |
507 | bool ret = false; | ||
484 | 508 | ||
485 | WARN_ON(!is_rmap_spte(new_spte)); | 509 | WARN_ON(!is_rmap_spte(new_spte)); |
486 | 510 | ||
487 | if (!is_shadow_present_pte(old_spte)) | 511 | if (!is_shadow_present_pte(old_spte)) { |
488 | return mmu_spte_set(sptep, new_spte); | 512 | mmu_spte_set(sptep, new_spte); |
489 | 513 | return ret; | |
490 | new_spte |= old_spte & shadow_dirty_mask; | 514 | } |
491 | |||
492 | mask = shadow_accessed_mask; | ||
493 | if (is_writable_pte(old_spte)) | ||
494 | mask |= shadow_dirty_mask; | ||
495 | 515 | ||
496 | if (!spte_has_volatile_bits(old_spte) || (new_spte & mask) == mask) | 516 | if (!spte_has_volatile_bits(old_spte)) |
497 | __update_clear_spte_fast(sptep, new_spte); | 517 | __update_clear_spte_fast(sptep, new_spte); |
498 | else | 518 | else |
499 | old_spte = __update_clear_spte_slow(sptep, new_spte); | 519 | old_spte = __update_clear_spte_slow(sptep, new_spte); |
500 | 520 | ||
521 | /* | ||
522 | * For the spte updated out of mmu-lock is safe, since | ||
523 | * we always atomicly update it, see the comments in | ||
524 | * spte_has_volatile_bits(). | ||
525 | */ | ||
526 | if (is_writable_pte(old_spte) && !is_writable_pte(new_spte)) | ||
527 | ret = true; | ||
528 | |||
501 | if (!shadow_accessed_mask) | 529 | if (!shadow_accessed_mask) |
502 | return; | 530 | return ret; |
503 | 531 | ||
504 | if (spte_is_bit_cleared(old_spte, new_spte, shadow_accessed_mask)) | 532 | if (spte_is_bit_cleared(old_spte, new_spte, shadow_accessed_mask)) |
505 | kvm_set_pfn_accessed(spte_to_pfn(old_spte)); | 533 | kvm_set_pfn_accessed(spte_to_pfn(old_spte)); |
506 | if (spte_is_bit_cleared(old_spte, new_spte, shadow_dirty_mask)) | 534 | if (spte_is_bit_cleared(old_spte, new_spte, shadow_dirty_mask)) |
507 | kvm_set_pfn_dirty(spte_to_pfn(old_spte)); | 535 | kvm_set_pfn_dirty(spte_to_pfn(old_spte)); |
536 | |||
537 | return ret; | ||
508 | } | 538 | } |
509 | 539 | ||
510 | /* | 540 | /* |
@@ -550,19 +580,29 @@ static u64 mmu_spte_get_lockless(u64 *sptep) | |||
550 | 580 | ||
551 | static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) | 581 | static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) |
552 | { | 582 | { |
553 | rcu_read_lock(); | 583 | /* |
554 | atomic_inc(&vcpu->kvm->arch.reader_counter); | 584 | * Prevent page table teardown by making any free-er wait during |
555 | 585 | * kvm_flush_remote_tlbs() IPI to all active vcpus. | |
556 | /* Increase the counter before walking shadow page table */ | 586 | */ |
557 | smp_mb__after_atomic_inc(); | 587 | local_irq_disable(); |
588 | vcpu->mode = READING_SHADOW_PAGE_TABLES; | ||
589 | /* | ||
590 | * Make sure a following spte read is not reordered ahead of the write | ||
591 | * to vcpu->mode. | ||
592 | */ | ||
593 | smp_mb(); | ||
558 | } | 594 | } |
559 | 595 | ||
560 | static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) | 596 | static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) |
561 | { | 597 | { |
562 | /* Decrease the counter after walking shadow page table finished */ | 598 | /* |
563 | smp_mb__before_atomic_dec(); | 599 | * Make sure the write to vcpu->mode is not reordered in front of |
564 | atomic_dec(&vcpu->kvm->arch.reader_counter); | 600 | * reads to sptes. If it does, kvm_commit_zap_page() can see us |
565 | rcu_read_unlock(); | 601 | * OUTSIDE_GUEST_MODE and proceed to free the shadow page table. |
602 | */ | ||
603 | smp_mb(); | ||
604 | vcpu->mode = OUTSIDE_GUEST_MODE; | ||
605 | local_irq_enable(); | ||
566 | } | 606 | } |
567 | 607 | ||
568 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | 608 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
@@ -641,8 +681,7 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) | |||
641 | mmu_page_header_cache); | 681 | mmu_page_header_cache); |
642 | } | 682 | } |
643 | 683 | ||
644 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, | 684 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) |
645 | size_t size) | ||
646 | { | 685 | { |
647 | void *p; | 686 | void *p; |
648 | 687 | ||
@@ -653,8 +692,7 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, | |||
653 | 692 | ||
654 | static struct pte_list_desc *mmu_alloc_pte_list_desc(struct kvm_vcpu *vcpu) | 693 | static struct pte_list_desc *mmu_alloc_pte_list_desc(struct kvm_vcpu *vcpu) |
655 | { | 694 | { |
656 | return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_list_desc_cache, | 695 | return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_list_desc_cache); |
657 | sizeof(struct pte_list_desc)); | ||
658 | } | 696 | } |
659 | 697 | ||
660 | static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc) | 698 | static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc) |
@@ -841,32 +879,6 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, | |||
841 | return count; | 879 | return count; |
842 | } | 880 | } |
843 | 881 | ||
844 | static u64 *pte_list_next(unsigned long *pte_list, u64 *spte) | ||
845 | { | ||
846 | struct pte_list_desc *desc; | ||
847 | u64 *prev_spte; | ||
848 | int i; | ||
849 | |||
850 | if (!*pte_list) | ||
851 | return NULL; | ||
852 | else if (!(*pte_list & 1)) { | ||
853 | if (!spte) | ||
854 | return (u64 *)*pte_list; | ||
855 | return NULL; | ||
856 | } | ||
857 | desc = (struct pte_list_desc *)(*pte_list & ~1ul); | ||
858 | prev_spte = NULL; | ||
859 | while (desc) { | ||
860 | for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) { | ||
861 | if (prev_spte == spte) | ||
862 | return desc->sptes[i]; | ||
863 | prev_spte = desc->sptes[i]; | ||
864 | } | ||
865 | desc = desc->more; | ||
866 | } | ||
867 | return NULL; | ||
868 | } | ||
869 | |||
870 | static void | 882 | static void |
871 | pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc, | 883 | pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc, |
872 | int i, struct pte_list_desc *prev_desc) | 884 | int i, struct pte_list_desc *prev_desc) |
@@ -987,11 +999,6 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
987 | return pte_list_add(vcpu, spte, rmapp); | 999 | return pte_list_add(vcpu, spte, rmapp); |
988 | } | 1000 | } |
989 | 1001 | ||
990 | static u64 *rmap_next(unsigned long *rmapp, u64 *spte) | ||
991 | { | ||
992 | return pte_list_next(rmapp, spte); | ||
993 | } | ||
994 | |||
995 | static void rmap_remove(struct kvm *kvm, u64 *spte) | 1002 | static void rmap_remove(struct kvm *kvm, u64 *spte) |
996 | { | 1003 | { |
997 | struct kvm_mmu_page *sp; | 1004 | struct kvm_mmu_page *sp; |
@@ -1004,106 +1011,248 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
1004 | pte_list_remove(spte, rmapp); | 1011 | pte_list_remove(spte, rmapp); |
1005 | } | 1012 | } |
1006 | 1013 | ||
1014 | /* | ||
1015 | * Used by the following functions to iterate through the sptes linked by a | ||
1016 | * rmap. All fields are private and not assumed to be used outside. | ||
1017 | */ | ||
1018 | struct rmap_iterator { | ||
1019 | /* private fields */ | ||
1020 | struct pte_list_desc *desc; /* holds the sptep if not NULL */ | ||
1021 | int pos; /* index of the sptep */ | ||
1022 | }; | ||
1023 | |||
1024 | /* | ||
1025 | * Iteration must be started by this function. This should also be used after | ||
1026 | * removing/dropping sptes from the rmap link because in such cases the | ||
1027 | * information in the itererator may not be valid. | ||
1028 | * | ||
1029 | * Returns sptep if found, NULL otherwise. | ||
1030 | */ | ||
1031 | static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter) | ||
1032 | { | ||
1033 | if (!rmap) | ||
1034 | return NULL; | ||
1035 | |||
1036 | if (!(rmap & 1)) { | ||
1037 | iter->desc = NULL; | ||
1038 | return (u64 *)rmap; | ||
1039 | } | ||
1040 | |||
1041 | iter->desc = (struct pte_list_desc *)(rmap & ~1ul); | ||
1042 | iter->pos = 0; | ||
1043 | return iter->desc->sptes[iter->pos]; | ||
1044 | } | ||
1045 | |||
1046 | /* | ||
1047 | * Must be used with a valid iterator: e.g. after rmap_get_first(). | ||
1048 | * | ||
1049 | * Returns sptep if found, NULL otherwise. | ||
1050 | */ | ||
1051 | static u64 *rmap_get_next(struct rmap_iterator *iter) | ||
1052 | { | ||
1053 | if (iter->desc) { | ||
1054 | if (iter->pos < PTE_LIST_EXT - 1) { | ||
1055 | u64 *sptep; | ||
1056 | |||
1057 | ++iter->pos; | ||
1058 | sptep = iter->desc->sptes[iter->pos]; | ||
1059 | if (sptep) | ||
1060 | return sptep; | ||
1061 | } | ||
1062 | |||
1063 | iter->desc = iter->desc->more; | ||
1064 | |||
1065 | if (iter->desc) { | ||
1066 | iter->pos = 0; | ||
1067 | /* desc->sptes[0] cannot be NULL */ | ||
1068 | return iter->desc->sptes[iter->pos]; | ||
1069 | } | ||
1070 | } | ||
1071 | |||
1072 | return NULL; | ||
1073 | } | ||
1074 | |||
1007 | static void drop_spte(struct kvm *kvm, u64 *sptep) | 1075 | static void drop_spte(struct kvm *kvm, u64 *sptep) |
1008 | { | 1076 | { |
1009 | if (mmu_spte_clear_track_bits(sptep)) | 1077 | if (mmu_spte_clear_track_bits(sptep)) |
1010 | rmap_remove(kvm, sptep); | 1078 | rmap_remove(kvm, sptep); |
1011 | } | 1079 | } |
1012 | 1080 | ||
1013 | int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, | 1081 | |
1014 | struct kvm_memory_slot *slot) | 1082 | static bool __drop_large_spte(struct kvm *kvm, u64 *sptep) |
1015 | { | 1083 | { |
1016 | unsigned long *rmapp; | 1084 | if (is_large_pte(*sptep)) { |
1017 | u64 *spte; | 1085 | WARN_ON(page_header(__pa(sptep))->role.level == |
1018 | int i, write_protected = 0; | 1086 | PT_PAGE_TABLE_LEVEL); |
1019 | 1087 | drop_spte(kvm, sptep); | |
1020 | rmapp = __gfn_to_rmap(gfn, PT_PAGE_TABLE_LEVEL, slot); | 1088 | --kvm->stat.lpages; |
1021 | spte = rmap_next(rmapp, NULL); | 1089 | return true; |
1022 | while (spte) { | ||
1023 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | ||
1024 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | ||
1025 | if (is_writable_pte(*spte)) { | ||
1026 | mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK); | ||
1027 | write_protected = 1; | ||
1028 | } | ||
1029 | spte = rmap_next(rmapp, spte); | ||
1030 | } | 1090 | } |
1031 | 1091 | ||
1032 | /* check for huge page mappings */ | 1092 | return false; |
1033 | for (i = PT_DIRECTORY_LEVEL; | 1093 | } |
1034 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 1094 | |
1035 | rmapp = __gfn_to_rmap(gfn, i, slot); | 1095 | static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) |
1036 | spte = rmap_next(rmapp, NULL); | 1096 | { |
1037 | while (spte) { | 1097 | if (__drop_large_spte(vcpu->kvm, sptep)) |
1038 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1098 | kvm_flush_remote_tlbs(vcpu->kvm); |
1039 | BUG_ON(!is_large_pte(*spte)); | 1099 | } |
1040 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); | 1100 | |
1041 | if (is_writable_pte(*spte)) { | 1101 | /* |
1042 | drop_spte(kvm, spte); | 1102 | * Write-protect on the specified @sptep, @pt_protect indicates whether |
1043 | --kvm->stat.lpages; | 1103 | * spte writ-protection is caused by protecting shadow page table. |
1044 | spte = NULL; | 1104 | * @flush indicates whether tlb need be flushed. |
1045 | write_protected = 1; | 1105 | * |
1046 | } | 1106 | * Note: write protection is difference between drity logging and spte |
1047 | spte = rmap_next(rmapp, spte); | 1107 | * protection: |
1108 | * - for dirty logging, the spte can be set to writable at anytime if | ||
1109 | * its dirty bitmap is properly set. | ||
1110 | * - for spte protection, the spte can be writable only after unsync-ing | ||
1111 | * shadow page. | ||
1112 | * | ||
1113 | * Return true if the spte is dropped. | ||
1114 | */ | ||
1115 | static bool | ||
1116 | spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) | ||
1117 | { | ||
1118 | u64 spte = *sptep; | ||
1119 | |||
1120 | if (!is_writable_pte(spte) && | ||
1121 | !(pt_protect && spte_is_locklessly_modifiable(spte))) | ||
1122 | return false; | ||
1123 | |||
1124 | rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); | ||
1125 | |||
1126 | if (__drop_large_spte(kvm, sptep)) { | ||
1127 | *flush |= true; | ||
1128 | return true; | ||
1129 | } | ||
1130 | |||
1131 | if (pt_protect) | ||
1132 | spte &= ~SPTE_MMU_WRITEABLE; | ||
1133 | spte = spte & ~PT_WRITABLE_MASK; | ||
1134 | |||
1135 | *flush |= mmu_spte_update(sptep, spte); | ||
1136 | return false; | ||
1137 | } | ||
1138 | |||
1139 | static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, | ||
1140 | int level, bool pt_protect) | ||
1141 | { | ||
1142 | u64 *sptep; | ||
1143 | struct rmap_iterator iter; | ||
1144 | bool flush = false; | ||
1145 | |||
1146 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { | ||
1147 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); | ||
1148 | if (spte_write_protect(kvm, sptep, &flush, pt_protect)) { | ||
1149 | sptep = rmap_get_first(*rmapp, &iter); | ||
1150 | continue; | ||
1048 | } | 1151 | } |
1152 | |||
1153 | sptep = rmap_get_next(&iter); | ||
1049 | } | 1154 | } |
1050 | 1155 | ||
1051 | return write_protected; | 1156 | return flush; |
1052 | } | 1157 | } |
1053 | 1158 | ||
1054 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) | 1159 | /** |
1160 | * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages | ||
1161 | * @kvm: kvm instance | ||
1162 | * @slot: slot to protect | ||
1163 | * @gfn_offset: start of the BITS_PER_LONG pages we care about | ||
1164 | * @mask: indicates which pages we should protect | ||
1165 | * | ||
1166 | * Used when we do not need to care about huge page mappings: e.g. during dirty | ||
1167 | * logging we do not have any such mappings. | ||
1168 | */ | ||
1169 | void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, | ||
1170 | struct kvm_memory_slot *slot, | ||
1171 | gfn_t gfn_offset, unsigned long mask) | ||
1172 | { | ||
1173 | unsigned long *rmapp; | ||
1174 | |||
1175 | while (mask) { | ||
1176 | rmapp = &slot->rmap[gfn_offset + __ffs(mask)]; | ||
1177 | __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false); | ||
1178 | |||
1179 | /* clear the first set bit */ | ||
1180 | mask &= mask - 1; | ||
1181 | } | ||
1182 | } | ||
1183 | |||
1184 | static bool rmap_write_protect(struct kvm *kvm, u64 gfn) | ||
1055 | { | 1185 | { |
1056 | struct kvm_memory_slot *slot; | 1186 | struct kvm_memory_slot *slot; |
1187 | unsigned long *rmapp; | ||
1188 | int i; | ||
1189 | bool write_protected = false; | ||
1057 | 1190 | ||
1058 | slot = gfn_to_memslot(kvm, gfn); | 1191 | slot = gfn_to_memslot(kvm, gfn); |
1059 | return kvm_mmu_rmap_write_protect(kvm, gfn, slot); | 1192 | |
1193 | for (i = PT_PAGE_TABLE_LEVEL; | ||
1194 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | ||
1195 | rmapp = __gfn_to_rmap(gfn, i, slot); | ||
1196 | write_protected |= __rmap_write_protect(kvm, rmapp, i, true); | ||
1197 | } | ||
1198 | |||
1199 | return write_protected; | ||
1060 | } | 1200 | } |
1061 | 1201 | ||
1062 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1202 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1063 | unsigned long data) | 1203 | unsigned long data) |
1064 | { | 1204 | { |
1065 | u64 *spte; | 1205 | u64 *sptep; |
1206 | struct rmap_iterator iter; | ||
1066 | int need_tlb_flush = 0; | 1207 | int need_tlb_flush = 0; |
1067 | 1208 | ||
1068 | while ((spte = rmap_next(rmapp, NULL))) { | 1209 | while ((sptep = rmap_get_first(*rmapp, &iter))) { |
1069 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1210 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); |
1070 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); | 1211 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep); |
1071 | drop_spte(kvm, spte); | 1212 | |
1213 | drop_spte(kvm, sptep); | ||
1072 | need_tlb_flush = 1; | 1214 | need_tlb_flush = 1; |
1073 | } | 1215 | } |
1216 | |||
1074 | return need_tlb_flush; | 1217 | return need_tlb_flush; |
1075 | } | 1218 | } |
1076 | 1219 | ||
1077 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1220 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1078 | unsigned long data) | 1221 | unsigned long data) |
1079 | { | 1222 | { |
1223 | u64 *sptep; | ||
1224 | struct rmap_iterator iter; | ||
1080 | int need_flush = 0; | 1225 | int need_flush = 0; |
1081 | u64 *spte, new_spte; | 1226 | u64 new_spte; |
1082 | pte_t *ptep = (pte_t *)data; | 1227 | pte_t *ptep = (pte_t *)data; |
1083 | pfn_t new_pfn; | 1228 | pfn_t new_pfn; |
1084 | 1229 | ||
1085 | WARN_ON(pte_huge(*ptep)); | 1230 | WARN_ON(pte_huge(*ptep)); |
1086 | new_pfn = pte_pfn(*ptep); | 1231 | new_pfn = pte_pfn(*ptep); |
1087 | spte = rmap_next(rmapp, NULL); | 1232 | |
1088 | while (spte) { | 1233 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { |
1089 | BUG_ON(!is_shadow_present_pte(*spte)); | 1234 | BUG_ON(!is_shadow_present_pte(*sptep)); |
1090 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); | 1235 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep); |
1236 | |||
1091 | need_flush = 1; | 1237 | need_flush = 1; |
1238 | |||
1092 | if (pte_write(*ptep)) { | 1239 | if (pte_write(*ptep)) { |
1093 | drop_spte(kvm, spte); | 1240 | drop_spte(kvm, sptep); |
1094 | spte = rmap_next(rmapp, NULL); | 1241 | sptep = rmap_get_first(*rmapp, &iter); |
1095 | } else { | 1242 | } else { |
1096 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); | 1243 | new_spte = *sptep & ~PT64_BASE_ADDR_MASK; |
1097 | new_spte |= (u64)new_pfn << PAGE_SHIFT; | 1244 | new_spte |= (u64)new_pfn << PAGE_SHIFT; |
1098 | 1245 | ||
1099 | new_spte &= ~PT_WRITABLE_MASK; | 1246 | new_spte &= ~PT_WRITABLE_MASK; |
1100 | new_spte &= ~SPTE_HOST_WRITEABLE; | 1247 | new_spte &= ~SPTE_HOST_WRITEABLE; |
1101 | new_spte &= ~shadow_accessed_mask; | 1248 | new_spte &= ~shadow_accessed_mask; |
1102 | mmu_spte_clear_track_bits(spte); | 1249 | |
1103 | mmu_spte_set(spte, new_spte); | 1250 | mmu_spte_clear_track_bits(sptep); |
1104 | spte = rmap_next(rmapp, spte); | 1251 | mmu_spte_set(sptep, new_spte); |
1252 | sptep = rmap_get_next(&iter); | ||
1105 | } | 1253 | } |
1106 | } | 1254 | } |
1255 | |||
1107 | if (need_flush) | 1256 | if (need_flush) |
1108 | kvm_flush_remote_tlbs(kvm); | 1257 | kvm_flush_remote_tlbs(kvm); |
1109 | 1258 | ||
@@ -1162,11 +1311,13 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) | |||
1162 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1311 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1163 | unsigned long data) | 1312 | unsigned long data) |
1164 | { | 1313 | { |
1165 | u64 *spte; | 1314 | u64 *sptep; |
1315 | struct rmap_iterator uninitialized_var(iter); | ||
1166 | int young = 0; | 1316 | int young = 0; |
1167 | 1317 | ||
1168 | /* | 1318 | /* |
1169 | * Emulate the accessed bit for EPT, by checking if this page has | 1319 | * In case of absence of EPT Access and Dirty Bits supports, |
1320 | * emulate the accessed bit for EPT, by checking if this page has | ||
1170 | * an EPT mapping, and clearing it if it does. On the next access, | 1321 | * an EPT mapping, and clearing it if it does. On the next access, |
1171 | * a new EPT mapping will be established. | 1322 | * a new EPT mapping will be established. |
1172 | * This has some overhead, but not as much as the cost of swapping | 1323 | * This has some overhead, but not as much as the cost of swapping |
@@ -1175,25 +1326,25 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1175 | if (!shadow_accessed_mask) | 1326 | if (!shadow_accessed_mask) |
1176 | return kvm_unmap_rmapp(kvm, rmapp, data); | 1327 | return kvm_unmap_rmapp(kvm, rmapp, data); |
1177 | 1328 | ||
1178 | spte = rmap_next(rmapp, NULL); | 1329 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; |
1179 | while (spte) { | 1330 | sptep = rmap_get_next(&iter)) { |
1180 | int _young; | 1331 | BUG_ON(!is_shadow_present_pte(*sptep)); |
1181 | u64 _spte = *spte; | 1332 | |
1182 | BUG_ON(!(_spte & PT_PRESENT_MASK)); | 1333 | if (*sptep & shadow_accessed_mask) { |
1183 | _young = _spte & PT_ACCESSED_MASK; | ||
1184 | if (_young) { | ||
1185 | young = 1; | 1334 | young = 1; |
1186 | clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); | 1335 | clear_bit((ffs(shadow_accessed_mask) - 1), |
1336 | (unsigned long *)sptep); | ||
1187 | } | 1337 | } |
1188 | spte = rmap_next(rmapp, spte); | ||
1189 | } | 1338 | } |
1339 | |||
1190 | return young; | 1340 | return young; |
1191 | } | 1341 | } |
1192 | 1342 | ||
1193 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1343 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1194 | unsigned long data) | 1344 | unsigned long data) |
1195 | { | 1345 | { |
1196 | u64 *spte; | 1346 | u64 *sptep; |
1347 | struct rmap_iterator iter; | ||
1197 | int young = 0; | 1348 | int young = 0; |
1198 | 1349 | ||
1199 | /* | 1350 | /* |
@@ -1204,16 +1355,14 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1204 | if (!shadow_accessed_mask) | 1355 | if (!shadow_accessed_mask) |
1205 | goto out; | 1356 | goto out; |
1206 | 1357 | ||
1207 | spte = rmap_next(rmapp, NULL); | 1358 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; |
1208 | while (spte) { | 1359 | sptep = rmap_get_next(&iter)) { |
1209 | u64 _spte = *spte; | 1360 | BUG_ON(!is_shadow_present_pte(*sptep)); |
1210 | BUG_ON(!(_spte & PT_PRESENT_MASK)); | 1361 | |
1211 | young = _spte & PT_ACCESSED_MASK; | 1362 | if (*sptep & shadow_accessed_mask) { |
1212 | if (young) { | ||
1213 | young = 1; | 1363 | young = 1; |
1214 | break; | 1364 | break; |
1215 | } | 1365 | } |
1216 | spte = rmap_next(rmapp, spte); | ||
1217 | } | 1366 | } |
1218 | out: | 1367 | out: |
1219 | return young; | 1368 | return young; |
@@ -1328,12 +1477,10 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
1328 | u64 *parent_pte, int direct) | 1477 | u64 *parent_pte, int direct) |
1329 | { | 1478 | { |
1330 | struct kvm_mmu_page *sp; | 1479 | struct kvm_mmu_page *sp; |
1331 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, | 1480 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); |
1332 | sizeof *sp); | 1481 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); |
1333 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | ||
1334 | if (!direct) | 1482 | if (!direct) |
1335 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, | 1483 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); |
1336 | PAGE_SIZE); | ||
1337 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 1484 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
1338 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 1485 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
1339 | bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM); | 1486 | bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM); |
@@ -1628,7 +1775,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, | |||
1628 | 1775 | ||
1629 | kvm_mmu_pages_init(parent, &parents, &pages); | 1776 | kvm_mmu_pages_init(parent, &parents, &pages); |
1630 | while (mmu_unsync_walk(parent, &pages)) { | 1777 | while (mmu_unsync_walk(parent, &pages)) { |
1631 | int protected = 0; | 1778 | bool protected = false; |
1632 | 1779 | ||
1633 | for_each_sp(pages, sp, parents, i) | 1780 | for_each_sp(pages, sp, parents, i) |
1634 | protected |= rmap_write_protect(vcpu->kvm, sp->gfn); | 1781 | protected |= rmap_write_protect(vcpu->kvm, sp->gfn); |
@@ -1793,15 +1940,6 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) | |||
1793 | mmu_spte_set(sptep, spte); | 1940 | mmu_spte_set(sptep, spte); |
1794 | } | 1941 | } |
1795 | 1942 | ||
1796 | static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) | ||
1797 | { | ||
1798 | if (is_large_pte(*sptep)) { | ||
1799 | drop_spte(vcpu->kvm, sptep); | ||
1800 | --vcpu->kvm->stat.lpages; | ||
1801 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
1802 | } | ||
1803 | } | ||
1804 | |||
1805 | static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, | 1943 | static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, |
1806 | unsigned direct_access) | 1944 | unsigned direct_access) |
1807 | { | 1945 | { |
@@ -1865,10 +2003,11 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) | |||
1865 | 2003 | ||
1866 | static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) | 2004 | static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) |
1867 | { | 2005 | { |
1868 | u64 *parent_pte; | 2006 | u64 *sptep; |
2007 | struct rmap_iterator iter; | ||
1869 | 2008 | ||
1870 | while ((parent_pte = pte_list_next(&sp->parent_ptes, NULL))) | 2009 | while ((sptep = rmap_get_first(sp->parent_ptes, &iter))) |
1871 | drop_parent_pte(sp, parent_pte); | 2010 | drop_parent_pte(sp, sptep); |
1872 | } | 2011 | } |
1873 | 2012 | ||
1874 | static int mmu_zap_unsync_children(struct kvm *kvm, | 2013 | static int mmu_zap_unsync_children(struct kvm *kvm, |
@@ -1925,30 +2064,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
1925 | return ret; | 2064 | return ret; |
1926 | } | 2065 | } |
1927 | 2066 | ||
1928 | static void kvm_mmu_isolate_pages(struct list_head *invalid_list) | ||
1929 | { | ||
1930 | struct kvm_mmu_page *sp; | ||
1931 | |||
1932 | list_for_each_entry(sp, invalid_list, link) | ||
1933 | kvm_mmu_isolate_page(sp); | ||
1934 | } | ||
1935 | |||
1936 | static void free_pages_rcu(struct rcu_head *head) | ||
1937 | { | ||
1938 | struct kvm_mmu_page *next, *sp; | ||
1939 | |||
1940 | sp = container_of(head, struct kvm_mmu_page, rcu); | ||
1941 | while (sp) { | ||
1942 | if (!list_empty(&sp->link)) | ||
1943 | next = list_first_entry(&sp->link, | ||
1944 | struct kvm_mmu_page, link); | ||
1945 | else | ||
1946 | next = NULL; | ||
1947 | kvm_mmu_free_page(sp); | ||
1948 | sp = next; | ||
1949 | } | ||
1950 | } | ||
1951 | |||
1952 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 2067 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
1953 | struct list_head *invalid_list) | 2068 | struct list_head *invalid_list) |
1954 | { | 2069 | { |
@@ -1957,17 +2072,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
1957 | if (list_empty(invalid_list)) | 2072 | if (list_empty(invalid_list)) |
1958 | return; | 2073 | return; |
1959 | 2074 | ||
1960 | kvm_flush_remote_tlbs(kvm); | 2075 | /* |
1961 | 2076 | * wmb: make sure everyone sees our modifications to the page tables | |
1962 | if (atomic_read(&kvm->arch.reader_counter)) { | 2077 | * rmb: make sure we see changes to vcpu->mode |
1963 | kvm_mmu_isolate_pages(invalid_list); | 2078 | */ |
1964 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); | 2079 | smp_mb(); |
1965 | list_del_init(invalid_list); | ||
1966 | 2080 | ||
1967 | trace_kvm_mmu_delay_free_pages(sp); | 2081 | /* |
1968 | call_rcu(&sp->rcu, free_pages_rcu); | 2082 | * Wait for all vcpus to exit guest mode and/or lockless shadow |
1969 | return; | 2083 | * page table walks. |
1970 | } | 2084 | */ |
2085 | kvm_flush_remote_tlbs(kvm); | ||
1971 | 2086 | ||
1972 | do { | 2087 | do { |
1973 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); | 2088 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); |
@@ -1975,7 +2090,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
1975 | kvm_mmu_isolate_page(sp); | 2090 | kvm_mmu_isolate_page(sp); |
1976 | kvm_mmu_free_page(sp); | 2091 | kvm_mmu_free_page(sp); |
1977 | } while (!list_empty(invalid_list)); | 2092 | } while (!list_empty(invalid_list)); |
1978 | |||
1979 | } | 2093 | } |
1980 | 2094 | ||
1981 | /* | 2095 | /* |
@@ -2194,7 +2308,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2194 | gfn_t gfn, pfn_t pfn, bool speculative, | 2308 | gfn_t gfn, pfn_t pfn, bool speculative, |
2195 | bool can_unsync, bool host_writable) | 2309 | bool can_unsync, bool host_writable) |
2196 | { | 2310 | { |
2197 | u64 spte, entry = *sptep; | 2311 | u64 spte; |
2198 | int ret = 0; | 2312 | int ret = 0; |
2199 | 2313 | ||
2200 | if (set_mmio_spte(sptep, gfn, pfn, pte_access)) | 2314 | if (set_mmio_spte(sptep, gfn, pfn, pte_access)) |
@@ -2208,8 +2322,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2208 | spte |= shadow_x_mask; | 2322 | spte |= shadow_x_mask; |
2209 | else | 2323 | else |
2210 | spte |= shadow_nx_mask; | 2324 | spte |= shadow_nx_mask; |
2325 | |||
2211 | if (pte_access & ACC_USER_MASK) | 2326 | if (pte_access & ACC_USER_MASK) |
2212 | spte |= shadow_user_mask; | 2327 | spte |= shadow_user_mask; |
2328 | |||
2213 | if (level > PT_PAGE_TABLE_LEVEL) | 2329 | if (level > PT_PAGE_TABLE_LEVEL) |
2214 | spte |= PT_PAGE_SIZE_MASK; | 2330 | spte |= PT_PAGE_SIZE_MASK; |
2215 | if (tdp_enabled) | 2331 | if (tdp_enabled) |
@@ -2234,7 +2350,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2234 | goto done; | 2350 | goto done; |
2235 | } | 2351 | } |
2236 | 2352 | ||
2237 | spte |= PT_WRITABLE_MASK; | 2353 | spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; |
2238 | 2354 | ||
2239 | if (!vcpu->arch.mmu.direct_map | 2355 | if (!vcpu->arch.mmu.direct_map |
2240 | && !(pte_access & ACC_WRITE_MASK)) { | 2356 | && !(pte_access & ACC_WRITE_MASK)) { |
@@ -2263,8 +2379,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2263 | __func__, gfn); | 2379 | __func__, gfn); |
2264 | ret = 1; | 2380 | ret = 1; |
2265 | pte_access &= ~ACC_WRITE_MASK; | 2381 | pte_access &= ~ACC_WRITE_MASK; |
2266 | if (is_writable_pte(spte)) | 2382 | spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE); |
2267 | spte &= ~PT_WRITABLE_MASK; | ||
2268 | } | 2383 | } |
2269 | } | 2384 | } |
2270 | 2385 | ||
@@ -2272,14 +2387,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2272 | mark_page_dirty(vcpu->kvm, gfn); | 2387 | mark_page_dirty(vcpu->kvm, gfn); |
2273 | 2388 | ||
2274 | set_pte: | 2389 | set_pte: |
2275 | mmu_spte_update(sptep, spte); | 2390 | if (mmu_spte_update(sptep, spte)) |
2276 | /* | ||
2277 | * If we overwrite a writable spte with a read-only one we | ||
2278 | * should flush remote TLBs. Otherwise rmap_write_protect | ||
2279 | * will find a read-only spte, even though the writable spte | ||
2280 | * might be cached on a CPU's TLB. | ||
2281 | */ | ||
2282 | if (is_writable_pte(entry) && !is_writable_pte(*sptep)) | ||
2283 | kvm_flush_remote_tlbs(vcpu->kvm); | 2391 | kvm_flush_remote_tlbs(vcpu->kvm); |
2284 | done: | 2392 | done: |
2285 | return ret; | 2393 | return ret; |
@@ -2354,6 +2462,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2354 | 2462 | ||
2355 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | 2463 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) |
2356 | { | 2464 | { |
2465 | mmu_free_roots(vcpu); | ||
2357 | } | 2466 | } |
2358 | 2467 | ||
2359 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | 2468 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, |
@@ -2546,8 +2655,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, | |||
2546 | *gfnp = gfn; | 2655 | *gfnp = gfn; |
2547 | kvm_release_pfn_clean(pfn); | 2656 | kvm_release_pfn_clean(pfn); |
2548 | pfn &= ~mask; | 2657 | pfn &= ~mask; |
2549 | if (!get_page_unless_zero(pfn_to_page(pfn))) | 2658 | kvm_get_pfn(pfn); |
2550 | BUG(); | ||
2551 | *pfnp = pfn; | 2659 | *pfnp = pfn; |
2552 | } | 2660 | } |
2553 | } | 2661 | } |
@@ -2577,18 +2685,116 @@ exit: | |||
2577 | return ret; | 2685 | return ret; |
2578 | } | 2686 | } |
2579 | 2687 | ||
2688 | static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code) | ||
2689 | { | ||
2690 | /* | ||
2691 | * #PF can be fast only if the shadow page table is present and it | ||
2692 | * is caused by write-protect, that means we just need change the | ||
2693 | * W bit of the spte which can be done out of mmu-lock. | ||
2694 | */ | ||
2695 | if (!(error_code & PFERR_PRESENT_MASK) || | ||
2696 | !(error_code & PFERR_WRITE_MASK)) | ||
2697 | return false; | ||
2698 | |||
2699 | return true; | ||
2700 | } | ||
2701 | |||
2702 | static bool | ||
2703 | fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 spte) | ||
2704 | { | ||
2705 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
2706 | gfn_t gfn; | ||
2707 | |||
2708 | WARN_ON(!sp->role.direct); | ||
2709 | |||
2710 | /* | ||
2711 | * The gfn of direct spte is stable since it is calculated | ||
2712 | * by sp->gfn. | ||
2713 | */ | ||
2714 | gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); | ||
2715 | |||
2716 | if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte) | ||
2717 | mark_page_dirty(vcpu->kvm, gfn); | ||
2718 | |||
2719 | return true; | ||
2720 | } | ||
2721 | |||
2722 | /* | ||
2723 | * Return value: | ||
2724 | * - true: let the vcpu to access on the same address again. | ||
2725 | * - false: let the real page fault path to fix it. | ||
2726 | */ | ||
2727 | static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, | ||
2728 | u32 error_code) | ||
2729 | { | ||
2730 | struct kvm_shadow_walk_iterator iterator; | ||
2731 | bool ret = false; | ||
2732 | u64 spte = 0ull; | ||
2733 | |||
2734 | if (!page_fault_can_be_fast(vcpu, error_code)) | ||
2735 | return false; | ||
2736 | |||
2737 | walk_shadow_page_lockless_begin(vcpu); | ||
2738 | for_each_shadow_entry_lockless(vcpu, gva, iterator, spte) | ||
2739 | if (!is_shadow_present_pte(spte) || iterator.level < level) | ||
2740 | break; | ||
2741 | |||
2742 | /* | ||
2743 | * If the mapping has been changed, let the vcpu fault on the | ||
2744 | * same address again. | ||
2745 | */ | ||
2746 | if (!is_rmap_spte(spte)) { | ||
2747 | ret = true; | ||
2748 | goto exit; | ||
2749 | } | ||
2750 | |||
2751 | if (!is_last_spte(spte, level)) | ||
2752 | goto exit; | ||
2753 | |||
2754 | /* | ||
2755 | * Check if it is a spurious fault caused by TLB lazily flushed. | ||
2756 | * | ||
2757 | * Need not check the access of upper level table entries since | ||
2758 | * they are always ACC_ALL. | ||
2759 | */ | ||
2760 | if (is_writable_pte(spte)) { | ||
2761 | ret = true; | ||
2762 | goto exit; | ||
2763 | } | ||
2764 | |||
2765 | /* | ||
2766 | * Currently, to simplify the code, only the spte write-protected | ||
2767 | * by dirty-log can be fast fixed. | ||
2768 | */ | ||
2769 | if (!spte_is_locklessly_modifiable(spte)) | ||
2770 | goto exit; | ||
2771 | |||
2772 | /* | ||
2773 | * Currently, fast page fault only works for direct mapping since | ||
2774 | * the gfn is not stable for indirect shadow page. | ||
2775 | * See Documentation/virtual/kvm/locking.txt to get more detail. | ||
2776 | */ | ||
2777 | ret = fast_pf_fix_direct_spte(vcpu, iterator.sptep, spte); | ||
2778 | exit: | ||
2779 | trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, | ||
2780 | spte, ret); | ||
2781 | walk_shadow_page_lockless_end(vcpu); | ||
2782 | |||
2783 | return ret; | ||
2784 | } | ||
2785 | |||
2580 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | 2786 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, |
2581 | gva_t gva, pfn_t *pfn, bool write, bool *writable); | 2787 | gva_t gva, pfn_t *pfn, bool write, bool *writable); |
2582 | 2788 | ||
2583 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn, | 2789 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, |
2584 | bool prefault) | 2790 | gfn_t gfn, bool prefault) |
2585 | { | 2791 | { |
2586 | int r; | 2792 | int r; |
2587 | int level; | 2793 | int level; |
2588 | int force_pt_level; | 2794 | int force_pt_level; |
2589 | pfn_t pfn; | 2795 | pfn_t pfn; |
2590 | unsigned long mmu_seq; | 2796 | unsigned long mmu_seq; |
2591 | bool map_writable; | 2797 | bool map_writable, write = error_code & PFERR_WRITE_MASK; |
2592 | 2798 | ||
2593 | force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); | 2799 | force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); |
2594 | if (likely(!force_pt_level)) { | 2800 | if (likely(!force_pt_level)) { |
@@ -2605,6 +2811,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn, | |||
2605 | } else | 2811 | } else |
2606 | level = PT_PAGE_TABLE_LEVEL; | 2812 | level = PT_PAGE_TABLE_LEVEL; |
2607 | 2813 | ||
2814 | if (fast_page_fault(vcpu, v, level, error_code)) | ||
2815 | return 0; | ||
2816 | |||
2608 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2817 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2609 | smp_rmb(); | 2818 | smp_rmb(); |
2610 | 2819 | ||
@@ -2993,7 +3202,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
2993 | gfn = gva >> PAGE_SHIFT; | 3202 | gfn = gva >> PAGE_SHIFT; |
2994 | 3203 | ||
2995 | return nonpaging_map(vcpu, gva & PAGE_MASK, | 3204 | return nonpaging_map(vcpu, gva & PAGE_MASK, |
2996 | error_code & PFERR_WRITE_MASK, gfn, prefault); | 3205 | error_code, gfn, prefault); |
2997 | } | 3206 | } |
2998 | 3207 | ||
2999 | static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) | 3208 | static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) |
@@ -3073,6 +3282,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3073 | } else | 3282 | } else |
3074 | level = PT_PAGE_TABLE_LEVEL; | 3283 | level = PT_PAGE_TABLE_LEVEL; |
3075 | 3284 | ||
3285 | if (fast_page_fault(vcpu, gpa, level, error_code)) | ||
3286 | return 0; | ||
3287 | |||
3076 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 3288 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
3077 | smp_rmb(); | 3289 | smp_rmb(); |
3078 | 3290 | ||
@@ -3554,7 +3766,7 @@ static bool detect_write_flooding(struct kvm_mmu_page *sp) | |||
3554 | * Skip write-flooding detected for the sp whose level is 1, because | 3766 | * Skip write-flooding detected for the sp whose level is 1, because |
3555 | * it can become unsync, then the guest page is not write-protected. | 3767 | * it can become unsync, then the guest page is not write-protected. |
3556 | */ | 3768 | */ |
3557 | if (sp->role.level == 1) | 3769 | if (sp->role.level == PT_PAGE_TABLE_LEVEL) |
3558 | return false; | 3770 | return false; |
3559 | 3771 | ||
3560 | return ++sp->write_flooding_count >= 3; | 3772 | return ++sp->write_flooding_count >= 3; |
@@ -3837,6 +4049,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu) | |||
3837 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | 4049 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) |
3838 | { | 4050 | { |
3839 | struct kvm_mmu_page *sp; | 4051 | struct kvm_mmu_page *sp; |
4052 | bool flush = false; | ||
3840 | 4053 | ||
3841 | list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { | 4054 | list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { |
3842 | int i; | 4055 | int i; |
@@ -3851,16 +4064,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
3851 | !is_last_spte(pt[i], sp->role.level)) | 4064 | !is_last_spte(pt[i], sp->role.level)) |
3852 | continue; | 4065 | continue; |
3853 | 4066 | ||
3854 | if (is_large_pte(pt[i])) { | 4067 | spte_write_protect(kvm, &pt[i], &flush, false); |
3855 | drop_spte(kvm, &pt[i]); | ||
3856 | --kvm->stat.lpages; | ||
3857 | continue; | ||
3858 | } | ||
3859 | |||
3860 | /* avoid RMW */ | ||
3861 | if (is_writable_pte(pt[i])) | ||
3862 | mmu_spte_update(&pt[i], | ||
3863 | pt[i] & ~PT_WRITABLE_MASK); | ||
3864 | } | 4068 | } |
3865 | } | 4069 | } |
3866 | kvm_flush_remote_tlbs(kvm); | 4070 | kvm_flush_remote_tlbs(kvm); |
@@ -3886,6 +4090,9 @@ static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, | |||
3886 | { | 4090 | { |
3887 | struct kvm_mmu_page *page; | 4091 | struct kvm_mmu_page *page; |
3888 | 4092 | ||
4093 | if (list_empty(&kvm->arch.active_mmu_pages)) | ||
4094 | return; | ||
4095 | |||
3889 | page = container_of(kvm->arch.active_mmu_pages.prev, | 4096 | page = container_of(kvm->arch.active_mmu_pages.prev, |
3890 | struct kvm_mmu_page, link); | 4097 | struct kvm_mmu_page, link); |
3891 | kvm_mmu_prepare_zap_page(kvm, page, invalid_list); | 4098 | kvm_mmu_prepare_zap_page(kvm, page, invalid_list); |
@@ -3894,7 +4101,6 @@ static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, | |||
3894 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | 4101 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) |
3895 | { | 4102 | { |
3896 | struct kvm *kvm; | 4103 | struct kvm *kvm; |
3897 | struct kvm *kvm_freed = NULL; | ||
3898 | int nr_to_scan = sc->nr_to_scan; | 4104 | int nr_to_scan = sc->nr_to_scan; |
3899 | 4105 | ||
3900 | if (nr_to_scan == 0) | 4106 | if (nr_to_scan == 0) |
@@ -3906,22 +4112,30 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
3906 | int idx; | 4112 | int idx; |
3907 | LIST_HEAD(invalid_list); | 4113 | LIST_HEAD(invalid_list); |
3908 | 4114 | ||
4115 | /* | ||
4116 | * n_used_mmu_pages is accessed without holding kvm->mmu_lock | ||
4117 | * here. We may skip a VM instance errorneosly, but we do not | ||
4118 | * want to shrink a VM that only started to populate its MMU | ||
4119 | * anyway. | ||
4120 | */ | ||
4121 | if (kvm->arch.n_used_mmu_pages > 0) { | ||
4122 | if (!nr_to_scan--) | ||
4123 | break; | ||
4124 | continue; | ||
4125 | } | ||
4126 | |||
3909 | idx = srcu_read_lock(&kvm->srcu); | 4127 | idx = srcu_read_lock(&kvm->srcu); |
3910 | spin_lock(&kvm->mmu_lock); | 4128 | spin_lock(&kvm->mmu_lock); |
3911 | if (!kvm_freed && nr_to_scan > 0 && | ||
3912 | kvm->arch.n_used_mmu_pages > 0) { | ||
3913 | kvm_mmu_remove_some_alloc_mmu_pages(kvm, | ||
3914 | &invalid_list); | ||
3915 | kvm_freed = kvm; | ||
3916 | } | ||
3917 | nr_to_scan--; | ||
3918 | 4129 | ||
4130 | kvm_mmu_remove_some_alloc_mmu_pages(kvm, &invalid_list); | ||
3919 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 4131 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
4132 | |||
3920 | spin_unlock(&kvm->mmu_lock); | 4133 | spin_unlock(&kvm->mmu_lock); |
3921 | srcu_read_unlock(&kvm->srcu, idx); | 4134 | srcu_read_unlock(&kvm->srcu, idx); |
4135 | |||
4136 | list_move_tail(&kvm->vm_list, &vm_list); | ||
4137 | break; | ||
3922 | } | 4138 | } |
3923 | if (kvm_freed) | ||
3924 | list_move_tail(&kvm_freed->vm_list, &vm_list); | ||
3925 | 4139 | ||
3926 | raw_spin_unlock(&kvm_lock); | 4140 | raw_spin_unlock(&kvm_lock); |
3927 | 4141 | ||
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 715da5a19a5..7d7d0b9e23e 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
@@ -192,7 +192,8 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
192 | { | 192 | { |
193 | struct kvm_memory_slot *slot; | 193 | struct kvm_memory_slot *slot; |
194 | unsigned long *rmapp; | 194 | unsigned long *rmapp; |
195 | u64 *spte; | 195 | u64 *sptep; |
196 | struct rmap_iterator iter; | ||
196 | 197 | ||
197 | if (sp->role.direct || sp->unsync || sp->role.invalid) | 198 | if (sp->role.direct || sp->unsync || sp->role.invalid) |
198 | return; | 199 | return; |
@@ -200,13 +201,12 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
200 | slot = gfn_to_memslot(kvm, sp->gfn); | 201 | slot = gfn_to_memslot(kvm, sp->gfn); |
201 | rmapp = &slot->rmap[sp->gfn - slot->base_gfn]; | 202 | rmapp = &slot->rmap[sp->gfn - slot->base_gfn]; |
202 | 203 | ||
203 | spte = rmap_next(rmapp, NULL); | 204 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; |
204 | while (spte) { | 205 | sptep = rmap_get_next(&iter)) { |
205 | if (is_writable_pte(*spte)) | 206 | if (is_writable_pte(*sptep)) |
206 | audit_printk(kvm, "shadow page has writable " | 207 | audit_printk(kvm, "shadow page has writable " |
207 | "mappings: gfn %llx role %x\n", | 208 | "mappings: gfn %llx role %x\n", |
208 | sp->gfn, sp->role.word); | 209 | sp->gfn, sp->role.word); |
209 | spte = rmap_next(rmapp, spte); | ||
210 | } | 210 | } |
211 | } | 211 | } |
212 | 212 | ||
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 89fb0e81322..cd6e98333ba 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -54,8 +54,8 @@ | |||
54 | */ | 54 | */ |
55 | TRACE_EVENT( | 55 | TRACE_EVENT( |
56 | kvm_mmu_pagetable_walk, | 56 | kvm_mmu_pagetable_walk, |
57 | TP_PROTO(u64 addr, int write_fault, int user_fault, int fetch_fault), | 57 | TP_PROTO(u64 addr, u32 pferr), |
58 | TP_ARGS(addr, write_fault, user_fault, fetch_fault), | 58 | TP_ARGS(addr, pferr), |
59 | 59 | ||
60 | TP_STRUCT__entry( | 60 | TP_STRUCT__entry( |
61 | __field(__u64, addr) | 61 | __field(__u64, addr) |
@@ -64,8 +64,7 @@ TRACE_EVENT( | |||
64 | 64 | ||
65 | TP_fast_assign( | 65 | TP_fast_assign( |
66 | __entry->addr = addr; | 66 | __entry->addr = addr; |
67 | __entry->pferr = (!!write_fault << 1) | (!!user_fault << 2) | 67 | __entry->pferr = pferr; |
68 | | (!!fetch_fault << 4); | ||
69 | ), | 68 | ), |
70 | 69 | ||
71 | TP_printk("addr %llx pferr %x %s", __entry->addr, __entry->pferr, | 70 | TP_printk("addr %llx pferr %x %s", __entry->addr, __entry->pferr, |
@@ -243,6 +242,44 @@ TRACE_EVENT( | |||
243 | TP_printk("addr:%llx gfn %llx access %x", __entry->addr, __entry->gfn, | 242 | TP_printk("addr:%llx gfn %llx access %x", __entry->addr, __entry->gfn, |
244 | __entry->access) | 243 | __entry->access) |
245 | ); | 244 | ); |
245 | |||
246 | #define __spte_satisfied(__spte) \ | ||
247 | (__entry->retry && is_writable_pte(__entry->__spte)) | ||
248 | |||
249 | TRACE_EVENT( | ||
250 | fast_page_fault, | ||
251 | TP_PROTO(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, | ||
252 | u64 *sptep, u64 old_spte, bool retry), | ||
253 | TP_ARGS(vcpu, gva, error_code, sptep, old_spte, retry), | ||
254 | |||
255 | TP_STRUCT__entry( | ||
256 | __field(int, vcpu_id) | ||
257 | __field(gva_t, gva) | ||
258 | __field(u32, error_code) | ||
259 | __field(u64 *, sptep) | ||
260 | __field(u64, old_spte) | ||
261 | __field(u64, new_spte) | ||
262 | __field(bool, retry) | ||
263 | ), | ||
264 | |||
265 | TP_fast_assign( | ||
266 | __entry->vcpu_id = vcpu->vcpu_id; | ||
267 | __entry->gva = gva; | ||
268 | __entry->error_code = error_code; | ||
269 | __entry->sptep = sptep; | ||
270 | __entry->old_spte = old_spte; | ||
271 | __entry->new_spte = *sptep; | ||
272 | __entry->retry = retry; | ||
273 | ), | ||
274 | |||
275 | TP_printk("vcpu %d gva %lx error_code %s sptep %p old %#llx" | ||
276 | " new %llx spurious %d fixed %d", __entry->vcpu_id, | ||
277 | __entry->gva, __print_flags(__entry->error_code, "|", | ||
278 | kvm_mmu_trace_pferr_flags), __entry->sptep, | ||
279 | __entry->old_spte, __entry->new_spte, | ||
280 | __spte_satisfied(old_spte), __spte_satisfied(new_spte) | ||
281 | ) | ||
282 | ); | ||
246 | #endif /* _TRACE_KVMMMU_H */ | 283 | #endif /* _TRACE_KVMMMU_H */ |
247 | 284 | ||
248 | #undef TRACE_INCLUDE_PATH | 285 | #undef TRACE_INCLUDE_PATH |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index df5a70311be..bb7cf01cae7 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -154,8 +154,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, | |||
154 | const int fetch_fault = access & PFERR_FETCH_MASK; | 154 | const int fetch_fault = access & PFERR_FETCH_MASK; |
155 | u16 errcode = 0; | 155 | u16 errcode = 0; |
156 | 156 | ||
157 | trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, | 157 | trace_kvm_mmu_pagetable_walk(addr, access); |
158 | fetch_fault); | ||
159 | retry_walk: | 158 | retry_walk: |
160 | eperm = false; | 159 | eperm = false; |
161 | walker->level = mmu->root_level; | 160 | walker->level = mmu->root_level; |
@@ -658,7 +657,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) | |||
658 | { | 657 | { |
659 | int offset = 0; | 658 | int offset = 0; |
660 | 659 | ||
661 | WARN_ON(sp->role.level != 1); | 660 | WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL); |
662 | 661 | ||
663 | if (PTTYPE == 32) | 662 | if (PTTYPE == 32) |
664 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | 663 | offset = sp->role.quadrant << PT64_LEVEL_BITS; |
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 2e88438ffd8..9b7ec1150ab 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -80,10 +80,10 @@ static inline struct kvm_pmc *get_fixed_pmc_idx(struct kvm_pmu *pmu, int idx) | |||
80 | 80 | ||
81 | static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx) | 81 | static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx) |
82 | { | 82 | { |
83 | if (idx < X86_PMC_IDX_FIXED) | 83 | if (idx < INTEL_PMC_IDX_FIXED) |
84 | return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0); | 84 | return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0); |
85 | else | 85 | else |
86 | return get_fixed_pmc_idx(pmu, idx - X86_PMC_IDX_FIXED); | 86 | return get_fixed_pmc_idx(pmu, idx - INTEL_PMC_IDX_FIXED); |
87 | } | 87 | } |
88 | 88 | ||
89 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu) | 89 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu) |
@@ -291,7 +291,7 @@ static void reprogram_idx(struct kvm_pmu *pmu, int idx) | |||
291 | if (pmc_is_gp(pmc)) | 291 | if (pmc_is_gp(pmc)) |
292 | reprogram_gp_counter(pmc, pmc->eventsel); | 292 | reprogram_gp_counter(pmc, pmc->eventsel); |
293 | else { | 293 | else { |
294 | int fidx = idx - X86_PMC_IDX_FIXED; | 294 | int fidx = idx - INTEL_PMC_IDX_FIXED; |
295 | reprogram_fixed_counter(pmc, | 295 | reprogram_fixed_counter(pmc, |
296 | fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx); | 296 | fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx); |
297 | } | 297 | } |
@@ -452,7 +452,7 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | |||
452 | return; | 452 | return; |
453 | 453 | ||
454 | pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff, | 454 | pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff, |
455 | X86_PMC_MAX_GENERIC); | 455 | INTEL_PMC_MAX_GENERIC); |
456 | pmu->counter_bitmask[KVM_PMC_GP] = | 456 | pmu->counter_bitmask[KVM_PMC_GP] = |
457 | ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1; | 457 | ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1; |
458 | bitmap_len = (entry->eax >> 24) & 0xff; | 458 | bitmap_len = (entry->eax >> 24) & 0xff; |
@@ -462,13 +462,13 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | |||
462 | pmu->nr_arch_fixed_counters = 0; | 462 | pmu->nr_arch_fixed_counters = 0; |
463 | } else { | 463 | } else { |
464 | pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), | 464 | pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), |
465 | X86_PMC_MAX_FIXED); | 465 | INTEL_PMC_MAX_FIXED); |
466 | pmu->counter_bitmask[KVM_PMC_FIXED] = | 466 | pmu->counter_bitmask[KVM_PMC_FIXED] = |
467 | ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; | 467 | ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; |
468 | } | 468 | } |
469 | 469 | ||
470 | pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | | 470 | pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | |
471 | (((1ull << pmu->nr_arch_fixed_counters) - 1) << X86_PMC_IDX_FIXED); | 471 | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); |
472 | pmu->global_ctrl_mask = ~pmu->global_ctrl; | 472 | pmu->global_ctrl_mask = ~pmu->global_ctrl; |
473 | } | 473 | } |
474 | 474 | ||
@@ -478,15 +478,15 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu) | |||
478 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | 478 | struct kvm_pmu *pmu = &vcpu->arch.pmu; |
479 | 479 | ||
480 | memset(pmu, 0, sizeof(*pmu)); | 480 | memset(pmu, 0, sizeof(*pmu)); |
481 | for (i = 0; i < X86_PMC_MAX_GENERIC; i++) { | 481 | for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { |
482 | pmu->gp_counters[i].type = KVM_PMC_GP; | 482 | pmu->gp_counters[i].type = KVM_PMC_GP; |
483 | pmu->gp_counters[i].vcpu = vcpu; | 483 | pmu->gp_counters[i].vcpu = vcpu; |
484 | pmu->gp_counters[i].idx = i; | 484 | pmu->gp_counters[i].idx = i; |
485 | } | 485 | } |
486 | for (i = 0; i < X86_PMC_MAX_FIXED; i++) { | 486 | for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) { |
487 | pmu->fixed_counters[i].type = KVM_PMC_FIXED; | 487 | pmu->fixed_counters[i].type = KVM_PMC_FIXED; |
488 | pmu->fixed_counters[i].vcpu = vcpu; | 488 | pmu->fixed_counters[i].vcpu = vcpu; |
489 | pmu->fixed_counters[i].idx = i + X86_PMC_IDX_FIXED; | 489 | pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED; |
490 | } | 490 | } |
491 | init_irq_work(&pmu->irq_work, trigger_pmi); | 491 | init_irq_work(&pmu->irq_work, trigger_pmi); |
492 | kvm_pmu_cpuid_update(vcpu); | 492 | kvm_pmu_cpuid_update(vcpu); |
@@ -498,13 +498,13 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu) | |||
498 | int i; | 498 | int i; |
499 | 499 | ||
500 | irq_work_sync(&pmu->irq_work); | 500 | irq_work_sync(&pmu->irq_work); |
501 | for (i = 0; i < X86_PMC_MAX_GENERIC; i++) { | 501 | for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { |
502 | struct kvm_pmc *pmc = &pmu->gp_counters[i]; | 502 | struct kvm_pmc *pmc = &pmu->gp_counters[i]; |
503 | stop_counter(pmc); | 503 | stop_counter(pmc); |
504 | pmc->counter = pmc->eventsel = 0; | 504 | pmc->counter = pmc->eventsel = 0; |
505 | } | 505 | } |
506 | 506 | ||
507 | for (i = 0; i < X86_PMC_MAX_FIXED; i++) | 507 | for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) |
508 | stop_counter(&pmu->fixed_counters[i]); | 508 | stop_counter(&pmu->fixed_counters[i]); |
509 | 509 | ||
510 | pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = | 510 | pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e334389e1c7..baead950d6c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "x86.h" | 22 | #include "x86.h" |
23 | 23 | ||
24 | #include <linux/module.h> | 24 | #include <linux/module.h> |
25 | #include <linux/mod_devicetable.h> | ||
25 | #include <linux/kernel.h> | 26 | #include <linux/kernel.h> |
26 | #include <linux/vmalloc.h> | 27 | #include <linux/vmalloc.h> |
27 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
@@ -42,6 +43,12 @@ | |||
42 | MODULE_AUTHOR("Qumranet"); | 43 | MODULE_AUTHOR("Qumranet"); |
43 | MODULE_LICENSE("GPL"); | 44 | MODULE_LICENSE("GPL"); |
44 | 45 | ||
46 | static const struct x86_cpu_id svm_cpu_id[] = { | ||
47 | X86_FEATURE_MATCH(X86_FEATURE_SVM), | ||
48 | {} | ||
49 | }; | ||
50 | MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); | ||
51 | |||
45 | #define IOPM_ALLOC_ORDER 2 | 52 | #define IOPM_ALLOC_ORDER 2 |
46 | #define MSRPM_ALLOC_ORDER 1 | 53 | #define MSRPM_ALLOC_ORDER 1 |
47 | 54 | ||
@@ -3178,8 +3185,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
3178 | break; | 3185 | break; |
3179 | case MSR_IA32_DEBUGCTLMSR: | 3186 | case MSR_IA32_DEBUGCTLMSR: |
3180 | if (!boot_cpu_has(X86_FEATURE_LBRV)) { | 3187 | if (!boot_cpu_has(X86_FEATURE_LBRV)) { |
3181 | pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", | 3188 | vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", |
3182 | __func__, data); | 3189 | __func__, data); |
3183 | break; | 3190 | break; |
3184 | } | 3191 | } |
3185 | if (data & DEBUGCTL_RESERVED_BITS) | 3192 | if (data & DEBUGCTL_RESERVED_BITS) |
@@ -3198,7 +3205,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
3198 | case MSR_VM_CR: | 3205 | case MSR_VM_CR: |
3199 | return svm_set_vm_cr(vcpu, data); | 3206 | return svm_set_vm_cr(vcpu, data); |
3200 | case MSR_VM_IGNNE: | 3207 | case MSR_VM_IGNNE: |
3201 | pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); | 3208 | vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); |
3202 | break; | 3209 | break; |
3203 | default: | 3210 | default: |
3204 | return kvm_set_msr_common(vcpu, ecx, data); | 3211 | return kvm_set_msr_common(vcpu, ecx, data); |
@@ -3240,6 +3247,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm) | |||
3240 | svm_clear_vintr(svm); | 3247 | svm_clear_vintr(svm); |
3241 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; | 3248 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; |
3242 | mark_dirty(svm->vmcb, VMCB_INTR); | 3249 | mark_dirty(svm->vmcb, VMCB_INTR); |
3250 | ++svm->vcpu.stat.irq_window_exits; | ||
3243 | /* | 3251 | /* |
3244 | * If the user space waits to inject interrupts, exit as soon as | 3252 | * If the user space waits to inject interrupts, exit as soon as |
3245 | * possible | 3253 | * possible |
@@ -3247,7 +3255,6 @@ static int interrupt_window_interception(struct vcpu_svm *svm) | |||
3247 | if (!irqchip_in_kernel(svm->vcpu.kvm) && | 3255 | if (!irqchip_in_kernel(svm->vcpu.kvm) && |
3248 | kvm_run->request_interrupt_window && | 3256 | kvm_run->request_interrupt_window && |
3249 | !kvm_cpu_has_interrupt(&svm->vcpu)) { | 3257 | !kvm_cpu_has_interrupt(&svm->vcpu)) { |
3250 | ++svm->vcpu.stat.irq_window_exits; | ||
3251 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | 3258 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
3252 | return 0; | 3259 | return 0; |
3253 | } | 3260 | } |
@@ -4037,6 +4044,11 @@ static bool svm_rdtscp_supported(void) | |||
4037 | return false; | 4044 | return false; |
4038 | } | 4045 | } |
4039 | 4046 | ||
4047 | static bool svm_invpcid_supported(void) | ||
4048 | { | ||
4049 | return false; | ||
4050 | } | ||
4051 | |||
4040 | static bool svm_has_wbinvd_exit(void) | 4052 | static bool svm_has_wbinvd_exit(void) |
4041 | { | 4053 | { |
4042 | return true; | 4054 | return true; |
@@ -4305,6 +4317,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4305 | .cpuid_update = svm_cpuid_update, | 4317 | .cpuid_update = svm_cpuid_update, |
4306 | 4318 | ||
4307 | .rdtscp_supported = svm_rdtscp_supported, | 4319 | .rdtscp_supported = svm_rdtscp_supported, |
4320 | .invpcid_supported = svm_invpcid_supported, | ||
4308 | 4321 | ||
4309 | .set_supported_cpuid = svm_set_supported_cpuid, | 4322 | .set_supported_cpuid = svm_set_supported_cpuid, |
4310 | 4323 | ||
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 911d2641f14..a71faf727ff 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -517,6 +517,40 @@ TRACE_EVENT(kvm_apic_accept_irq, | |||
517 | __entry->coalesced ? " (coalesced)" : "") | 517 | __entry->coalesced ? " (coalesced)" : "") |
518 | ); | 518 | ); |
519 | 519 | ||
520 | TRACE_EVENT(kvm_eoi, | ||
521 | TP_PROTO(struct kvm_lapic *apic, int vector), | ||
522 | TP_ARGS(apic, vector), | ||
523 | |||
524 | TP_STRUCT__entry( | ||
525 | __field( __u32, apicid ) | ||
526 | __field( int, vector ) | ||
527 | ), | ||
528 | |||
529 | TP_fast_assign( | ||
530 | __entry->apicid = apic->vcpu->vcpu_id; | ||
531 | __entry->vector = vector; | ||
532 | ), | ||
533 | |||
534 | TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector) | ||
535 | ); | ||
536 | |||
537 | TRACE_EVENT(kvm_pv_eoi, | ||
538 | TP_PROTO(struct kvm_lapic *apic, int vector), | ||
539 | TP_ARGS(apic, vector), | ||
540 | |||
541 | TP_STRUCT__entry( | ||
542 | __field( __u32, apicid ) | ||
543 | __field( int, vector ) | ||
544 | ), | ||
545 | |||
546 | TP_fast_assign( | ||
547 | __entry->apicid = apic->vcpu->vcpu_id; | ||
548 | __entry->vector = vector; | ||
549 | ), | ||
550 | |||
551 | TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector) | ||
552 | ); | ||
553 | |||
520 | /* | 554 | /* |
521 | * Tracepoint for nested VMRUN | 555 | * Tracepoint for nested VMRUN |
522 | */ | 556 | */ |
@@ -710,16 +744,6 @@ TRACE_EVENT(kvm_skinit, | |||
710 | __entry->rip, __entry->slb) | 744 | __entry->rip, __entry->slb) |
711 | ); | 745 | ); |
712 | 746 | ||
713 | #define __print_insn(insn, ilen) ({ \ | ||
714 | int i; \ | ||
715 | const char *ret = p->buffer + p->len; \ | ||
716 | \ | ||
717 | for (i = 0; i < ilen; ++i) \ | ||
718 | trace_seq_printf(p, " %02x", insn[i]); \ | ||
719 | trace_seq_printf(p, "%c", 0); \ | ||
720 | ret; \ | ||
721 | }) | ||
722 | |||
723 | #define KVM_EMUL_INSN_F_CR0_PE (1 << 0) | 747 | #define KVM_EMUL_INSN_F_CR0_PE (1 << 0) |
724 | #define KVM_EMUL_INSN_F_EFL_VM (1 << 1) | 748 | #define KVM_EMUL_INSN_F_EFL_VM (1 << 1) |
725 | #define KVM_EMUL_INSN_F_CS_D (1 << 2) | 749 | #define KVM_EMUL_INSN_F_CS_D (1 << 2) |
@@ -786,7 +810,7 @@ TRACE_EVENT(kvm_emulate_insn, | |||
786 | 810 | ||
787 | TP_printk("%x:%llx:%s (%s)%s", | 811 | TP_printk("%x:%llx:%s (%s)%s", |
788 | __entry->csbase, __entry->rip, | 812 | __entry->csbase, __entry->rip, |
789 | __print_insn(__entry->insn, __entry->len), | 813 | __print_hex(__entry->insn, __entry->len), |
790 | __print_symbolic(__entry->flags, | 814 | __print_symbolic(__entry->flags, |
791 | kvm_trace_symbol_emul_flags), | 815 | kvm_trace_symbol_emul_flags), |
792 | __entry->failed ? " failed" : "" | 816 | __entry->failed ? " failed" : "" |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4ff0ab9bc3c..c39b60707e0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
29 | #include <linux/moduleparam.h> | 29 | #include <linux/moduleparam.h> |
30 | #include <linux/mod_devicetable.h> | ||
30 | #include <linux/ftrace_event.h> | 31 | #include <linux/ftrace_event.h> |
31 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
32 | #include <linux/tboot.h> | 33 | #include <linux/tboot.h> |
@@ -51,6 +52,12 @@ | |||
51 | MODULE_AUTHOR("Qumranet"); | 52 | MODULE_AUTHOR("Qumranet"); |
52 | MODULE_LICENSE("GPL"); | 53 | MODULE_LICENSE("GPL"); |
53 | 54 | ||
55 | static const struct x86_cpu_id vmx_cpu_id[] = { | ||
56 | X86_FEATURE_MATCH(X86_FEATURE_VMX), | ||
57 | {} | ||
58 | }; | ||
59 | MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); | ||
60 | |||
54 | static bool __read_mostly enable_vpid = 1; | 61 | static bool __read_mostly enable_vpid = 1; |
55 | module_param_named(vpid, enable_vpid, bool, 0444); | 62 | module_param_named(vpid, enable_vpid, bool, 0444); |
56 | 63 | ||
@@ -64,7 +71,10 @@ static bool __read_mostly enable_unrestricted_guest = 1; | |||
64 | module_param_named(unrestricted_guest, | 71 | module_param_named(unrestricted_guest, |
65 | enable_unrestricted_guest, bool, S_IRUGO); | 72 | enable_unrestricted_guest, bool, S_IRUGO); |
66 | 73 | ||
67 | static bool __read_mostly emulate_invalid_guest_state = 0; | 74 | static bool __read_mostly enable_ept_ad_bits = 1; |
75 | module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO); | ||
76 | |||
77 | static bool __read_mostly emulate_invalid_guest_state = true; | ||
68 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 78 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
69 | 79 | ||
70 | static bool __read_mostly vmm_exclusive = 1; | 80 | static bool __read_mostly vmm_exclusive = 1; |
@@ -386,6 +396,9 @@ struct vcpu_vmx { | |||
386 | struct { | 396 | struct { |
387 | int loaded; | 397 | int loaded; |
388 | u16 fs_sel, gs_sel, ldt_sel; | 398 | u16 fs_sel, gs_sel, ldt_sel; |
399 | #ifdef CONFIG_X86_64 | ||
400 | u16 ds_sel, es_sel; | ||
401 | #endif | ||
389 | int gs_ldt_reload_needed; | 402 | int gs_ldt_reload_needed; |
390 | int fs_reload_needed; | 403 | int fs_reload_needed; |
391 | } host_state; | 404 | } host_state; |
@@ -605,6 +618,10 @@ static void kvm_cpu_vmxon(u64 addr); | |||
605 | static void kvm_cpu_vmxoff(void); | 618 | static void kvm_cpu_vmxoff(void); |
606 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | 619 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
607 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | 620 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); |
621 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | ||
622 | struct kvm_segment *var, int seg); | ||
623 | static void vmx_get_segment(struct kvm_vcpu *vcpu, | ||
624 | struct kvm_segment *var, int seg); | ||
608 | 625 | ||
609 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 626 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
610 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 627 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -779,6 +796,11 @@ static inline bool cpu_has_vmx_ept_4levels(void) | |||
779 | return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; | 796 | return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; |
780 | } | 797 | } |
781 | 798 | ||
799 | static inline bool cpu_has_vmx_ept_ad_bits(void) | ||
800 | { | ||
801 | return vmx_capability.ept & VMX_EPT_AD_BIT; | ||
802 | } | ||
803 | |||
782 | static inline bool cpu_has_vmx_invept_individual_addr(void) | 804 | static inline bool cpu_has_vmx_invept_individual_addr(void) |
783 | { | 805 | { |
784 | return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; | 806 | return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; |
@@ -839,6 +861,12 @@ static inline bool cpu_has_vmx_rdtscp(void) | |||
839 | SECONDARY_EXEC_RDTSCP; | 861 | SECONDARY_EXEC_RDTSCP; |
840 | } | 862 | } |
841 | 863 | ||
864 | static inline bool cpu_has_vmx_invpcid(void) | ||
865 | { | ||
866 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
867 | SECONDARY_EXEC_ENABLE_INVPCID; | ||
868 | } | ||
869 | |||
842 | static inline bool cpu_has_virtual_nmis(void) | 870 | static inline bool cpu_has_virtual_nmis(void) |
843 | { | 871 | { |
844 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 872 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
@@ -1411,6 +1439,11 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
1411 | } | 1439 | } |
1412 | 1440 | ||
1413 | #ifdef CONFIG_X86_64 | 1441 | #ifdef CONFIG_X86_64 |
1442 | savesegment(ds, vmx->host_state.ds_sel); | ||
1443 | savesegment(es, vmx->host_state.es_sel); | ||
1444 | #endif | ||
1445 | |||
1446 | #ifdef CONFIG_X86_64 | ||
1414 | vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); | 1447 | vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); |
1415 | vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE)); | 1448 | vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE)); |
1416 | #else | 1449 | #else |
@@ -1450,6 +1483,19 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
1450 | } | 1483 | } |
1451 | if (vmx->host_state.fs_reload_needed) | 1484 | if (vmx->host_state.fs_reload_needed) |
1452 | loadsegment(fs, vmx->host_state.fs_sel); | 1485 | loadsegment(fs, vmx->host_state.fs_sel); |
1486 | #ifdef CONFIG_X86_64 | ||
1487 | if (unlikely(vmx->host_state.ds_sel | vmx->host_state.es_sel)) { | ||
1488 | loadsegment(ds, vmx->host_state.ds_sel); | ||
1489 | loadsegment(es, vmx->host_state.es_sel); | ||
1490 | } | ||
1491 | #else | ||
1492 | /* | ||
1493 | * The sysexit path does not restore ds/es, so we must set them to | ||
1494 | * a reasonable value ourselves. | ||
1495 | */ | ||
1496 | loadsegment(ds, __USER_DS); | ||
1497 | loadsegment(es, __USER_DS); | ||
1498 | #endif | ||
1453 | reload_tss(); | 1499 | reload_tss(); |
1454 | #ifdef CONFIG_X86_64 | 1500 | #ifdef CONFIG_X86_64 |
1455 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | 1501 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
@@ -1711,6 +1757,11 @@ static bool vmx_rdtscp_supported(void) | |||
1711 | return cpu_has_vmx_rdtscp(); | 1757 | return cpu_has_vmx_rdtscp(); |
1712 | } | 1758 | } |
1713 | 1759 | ||
1760 | static bool vmx_invpcid_supported(void) | ||
1761 | { | ||
1762 | return cpu_has_vmx_invpcid() && enable_ept; | ||
1763 | } | ||
1764 | |||
1714 | /* | 1765 | /* |
1715 | * Swap MSR entry in host/guest MSR entry array. | 1766 | * Swap MSR entry in host/guest MSR entry array. |
1716 | */ | 1767 | */ |
@@ -2430,7 +2481,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2430 | SECONDARY_EXEC_ENABLE_EPT | | 2481 | SECONDARY_EXEC_ENABLE_EPT | |
2431 | SECONDARY_EXEC_UNRESTRICTED_GUEST | | 2482 | SECONDARY_EXEC_UNRESTRICTED_GUEST | |
2432 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | | 2483 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | |
2433 | SECONDARY_EXEC_RDTSCP; | 2484 | SECONDARY_EXEC_RDTSCP | |
2485 | SECONDARY_EXEC_ENABLE_INVPCID; | ||
2434 | if (adjust_vmx_controls(min2, opt2, | 2486 | if (adjust_vmx_controls(min2, opt2, |
2435 | MSR_IA32_VMX_PROCBASED_CTLS2, | 2487 | MSR_IA32_VMX_PROCBASED_CTLS2, |
2436 | &_cpu_based_2nd_exec_control) < 0) | 2488 | &_cpu_based_2nd_exec_control) < 0) |
@@ -2617,8 +2669,12 @@ static __init int hardware_setup(void) | |||
2617 | !cpu_has_vmx_ept_4levels()) { | 2669 | !cpu_has_vmx_ept_4levels()) { |
2618 | enable_ept = 0; | 2670 | enable_ept = 0; |
2619 | enable_unrestricted_guest = 0; | 2671 | enable_unrestricted_guest = 0; |
2672 | enable_ept_ad_bits = 0; | ||
2620 | } | 2673 | } |
2621 | 2674 | ||
2675 | if (!cpu_has_vmx_ept_ad_bits()) | ||
2676 | enable_ept_ad_bits = 0; | ||
2677 | |||
2622 | if (!cpu_has_vmx_unrestricted_guest()) | 2678 | if (!cpu_has_vmx_unrestricted_guest()) |
2623 | enable_unrestricted_guest = 0; | 2679 | enable_unrestricted_guest = 0; |
2624 | 2680 | ||
@@ -2742,6 +2798,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
2742 | { | 2798 | { |
2743 | unsigned long flags; | 2799 | unsigned long flags; |
2744 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2800 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2801 | struct kvm_segment var; | ||
2745 | 2802 | ||
2746 | if (enable_unrestricted_guest) | 2803 | if (enable_unrestricted_guest) |
2747 | return; | 2804 | return; |
@@ -2785,20 +2842,23 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
2785 | if (emulate_invalid_guest_state) | 2842 | if (emulate_invalid_guest_state) |
2786 | goto continue_rmode; | 2843 | goto continue_rmode; |
2787 | 2844 | ||
2788 | vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4); | 2845 | vmx_get_segment(vcpu, &var, VCPU_SREG_SS); |
2789 | vmcs_write32(GUEST_SS_LIMIT, 0xffff); | 2846 | vmx_set_segment(vcpu, &var, VCPU_SREG_SS); |
2790 | vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); | ||
2791 | 2847 | ||
2792 | vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); | 2848 | vmx_get_segment(vcpu, &var, VCPU_SREG_CS); |
2793 | vmcs_write32(GUEST_CS_LIMIT, 0xffff); | 2849 | vmx_set_segment(vcpu, &var, VCPU_SREG_CS); |
2794 | if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000) | ||
2795 | vmcs_writel(GUEST_CS_BASE, 0xf0000); | ||
2796 | vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4); | ||
2797 | 2850 | ||
2798 | fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es); | 2851 | vmx_get_segment(vcpu, &var, VCPU_SREG_ES); |
2799 | fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds); | 2852 | vmx_set_segment(vcpu, &var, VCPU_SREG_ES); |
2800 | fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs); | 2853 | |
2801 | fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs); | 2854 | vmx_get_segment(vcpu, &var, VCPU_SREG_DS); |
2855 | vmx_set_segment(vcpu, &var, VCPU_SREG_DS); | ||
2856 | |||
2857 | vmx_get_segment(vcpu, &var, VCPU_SREG_GS); | ||
2858 | vmx_set_segment(vcpu, &var, VCPU_SREG_GS); | ||
2859 | |||
2860 | vmx_get_segment(vcpu, &var, VCPU_SREG_FS); | ||
2861 | vmx_set_segment(vcpu, &var, VCPU_SREG_FS); | ||
2802 | 2862 | ||
2803 | continue_rmode: | 2863 | continue_rmode: |
2804 | kvm_mmu_reset_context(vcpu); | 2864 | kvm_mmu_reset_context(vcpu); |
@@ -2999,6 +3059,8 @@ static u64 construct_eptp(unsigned long root_hpa) | |||
2999 | /* TODO write the value reading from MSR */ | 3059 | /* TODO write the value reading from MSR */ |
3000 | eptp = VMX_EPT_DEFAULT_MT | | 3060 | eptp = VMX_EPT_DEFAULT_MT | |
3001 | VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; | 3061 | VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; |
3062 | if (enable_ept_ad_bits) | ||
3063 | eptp |= VMX_EPT_AD_ENABLE_BIT; | ||
3002 | eptp |= (root_hpa & PAGE_MASK); | 3064 | eptp |= (root_hpa & PAGE_MASK); |
3003 | 3065 | ||
3004 | return eptp; | 3066 | return eptp; |
@@ -3125,11 +3187,22 @@ static int __vmx_get_cpl(struct kvm_vcpu *vcpu) | |||
3125 | 3187 | ||
3126 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | 3188 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) |
3127 | { | 3189 | { |
3190 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
3191 | |||
3192 | /* | ||
3193 | * If we enter real mode with cs.sel & 3 != 0, the normal CPL calculations | ||
3194 | * fail; use the cache instead. | ||
3195 | */ | ||
3196 | if (unlikely(vmx->emulation_required && emulate_invalid_guest_state)) { | ||
3197 | return vmx->cpl; | ||
3198 | } | ||
3199 | |||
3128 | if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { | 3200 | if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { |
3129 | __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | 3201 | __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); |
3130 | to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu); | 3202 | vmx->cpl = __vmx_get_cpl(vcpu); |
3131 | } | 3203 | } |
3132 | return to_vmx(vcpu)->cpl; | 3204 | |
3205 | return vmx->cpl; | ||
3133 | } | 3206 | } |
3134 | 3207 | ||
3135 | 3208 | ||
@@ -3137,7 +3210,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var) | |||
3137 | { | 3210 | { |
3138 | u32 ar; | 3211 | u32 ar; |
3139 | 3212 | ||
3140 | if (var->unusable) | 3213 | if (var->unusable || !var->present) |
3141 | ar = 1 << 16; | 3214 | ar = 1 << 16; |
3142 | else { | 3215 | else { |
3143 | ar = var->type & 15; | 3216 | ar = var->type & 15; |
@@ -3149,8 +3222,6 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var) | |||
3149 | ar |= (var->db & 1) << 14; | 3222 | ar |= (var->db & 1) << 14; |
3150 | ar |= (var->g & 1) << 15; | 3223 | ar |= (var->g & 1) << 15; |
3151 | } | 3224 | } |
3152 | if (ar == 0) /* a 0 value means unusable */ | ||
3153 | ar = AR_UNUSABLE_MASK; | ||
3154 | 3225 | ||
3155 | return ar; | 3226 | return ar; |
3156 | } | 3227 | } |
@@ -3201,6 +3272,44 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
3201 | 3272 | ||
3202 | vmcs_write32(sf->ar_bytes, ar); | 3273 | vmcs_write32(sf->ar_bytes, ar); |
3203 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | 3274 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); |
3275 | |||
3276 | /* | ||
3277 | * Fix segments for real mode guest in hosts that don't have | ||
3278 | * "unrestricted_mode" or it was disabled. | ||
3279 | * This is done to allow migration of the guests from hosts with | ||
3280 | * unrestricted guest like Westmere to older host that don't have | ||
3281 | * unrestricted guest like Nehelem. | ||
3282 | */ | ||
3283 | if (!enable_unrestricted_guest && vmx->rmode.vm86_active) { | ||
3284 | switch (seg) { | ||
3285 | case VCPU_SREG_CS: | ||
3286 | vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); | ||
3287 | vmcs_write32(GUEST_CS_LIMIT, 0xffff); | ||
3288 | if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000) | ||
3289 | vmcs_writel(GUEST_CS_BASE, 0xf0000); | ||
3290 | vmcs_write16(GUEST_CS_SELECTOR, | ||
3291 | vmcs_readl(GUEST_CS_BASE) >> 4); | ||
3292 | break; | ||
3293 | case VCPU_SREG_ES: | ||
3294 | fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es); | ||
3295 | break; | ||
3296 | case VCPU_SREG_DS: | ||
3297 | fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds); | ||
3298 | break; | ||
3299 | case VCPU_SREG_GS: | ||
3300 | fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs); | ||
3301 | break; | ||
3302 | case VCPU_SREG_FS: | ||
3303 | fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs); | ||
3304 | break; | ||
3305 | case VCPU_SREG_SS: | ||
3306 | vmcs_write16(GUEST_SS_SELECTOR, | ||
3307 | vmcs_readl(GUEST_SS_BASE) >> 4); | ||
3308 | vmcs_write32(GUEST_SS_LIMIT, 0xffff); | ||
3309 | vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); | ||
3310 | break; | ||
3311 | } | ||
3312 | } | ||
3204 | } | 3313 | } |
3205 | 3314 | ||
3206 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | 3315 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) |
@@ -3633,8 +3742,18 @@ static void vmx_set_constant_host_state(void) | |||
3633 | vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ | 3742 | vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ |
3634 | 3743 | ||
3635 | vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ | 3744 | vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ |
3745 | #ifdef CONFIG_X86_64 | ||
3746 | /* | ||
3747 | * Load null selectors, so we can avoid reloading them in | ||
3748 | * __vmx_load_host_state(), in case userspace uses the null selectors | ||
3749 | * too (the expected case). | ||
3750 | */ | ||
3751 | vmcs_write16(HOST_DS_SELECTOR, 0); | ||
3752 | vmcs_write16(HOST_ES_SELECTOR, 0); | ||
3753 | #else | ||
3636 | vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ | 3754 | vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ |
3637 | vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ | 3755 | vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ |
3756 | #endif | ||
3638 | vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ | 3757 | vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ |
3639 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ | 3758 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ |
3640 | 3759 | ||
@@ -3693,6 +3812,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
3693 | if (!enable_ept) { | 3812 | if (!enable_ept) { |
3694 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; | 3813 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; |
3695 | enable_unrestricted_guest = 0; | 3814 | enable_unrestricted_guest = 0; |
3815 | /* Enable INVPCID for non-ept guests may cause performance regression. */ | ||
3816 | exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; | ||
3696 | } | 3817 | } |
3697 | if (!enable_unrestricted_guest) | 3818 | if (!enable_unrestricted_guest) |
3698 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 3819 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
@@ -4451,7 +4572,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
4451 | break; | 4572 | break; |
4452 | } | 4573 | } |
4453 | vcpu->run->exit_reason = 0; | 4574 | vcpu->run->exit_reason = 0; |
4454 | pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n", | 4575 | vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n", |
4455 | (int)(exit_qualification >> 4) & 3, cr); | 4576 | (int)(exit_qualification >> 4) & 3, cr); |
4456 | return 0; | 4577 | return 0; |
4457 | } | 4578 | } |
@@ -4731,6 +4852,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
4731 | { | 4852 | { |
4732 | unsigned long exit_qualification; | 4853 | unsigned long exit_qualification; |
4733 | gpa_t gpa; | 4854 | gpa_t gpa; |
4855 | u32 error_code; | ||
4734 | int gla_validity; | 4856 | int gla_validity; |
4735 | 4857 | ||
4736 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 4858 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
@@ -4755,7 +4877,13 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
4755 | 4877 | ||
4756 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | 4878 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); |
4757 | trace_kvm_page_fault(gpa, exit_qualification); | 4879 | trace_kvm_page_fault(gpa, exit_qualification); |
4758 | return kvm_mmu_page_fault(vcpu, gpa, exit_qualification & 0x3, NULL, 0); | 4880 | |
4881 | /* It is a write fault? */ | ||
4882 | error_code = exit_qualification & (1U << 1); | ||
4883 | /* ept page table is present? */ | ||
4884 | error_code |= (exit_qualification >> 3) & 0x1; | ||
4885 | |||
4886 | return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); | ||
4759 | } | 4887 | } |
4760 | 4888 | ||
4761 | static u64 ept_rsvd_mask(u64 spte, int level) | 4889 | static u64 ept_rsvd_mask(u64 spte, int level) |
@@ -4870,15 +4998,18 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
4870 | int ret = 1; | 4998 | int ret = 1; |
4871 | u32 cpu_exec_ctrl; | 4999 | u32 cpu_exec_ctrl; |
4872 | bool intr_window_requested; | 5000 | bool intr_window_requested; |
5001 | unsigned count = 130; | ||
4873 | 5002 | ||
4874 | cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 5003 | cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
4875 | intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; | 5004 | intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; |
4876 | 5005 | ||
4877 | while (!guest_state_valid(vcpu)) { | 5006 | while (!guest_state_valid(vcpu) && count-- != 0) { |
4878 | if (intr_window_requested | 5007 | if (intr_window_requested && vmx_interrupt_allowed(vcpu)) |
4879 | && (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF)) | ||
4880 | return handle_interrupt_window(&vmx->vcpu); | 5008 | return handle_interrupt_window(&vmx->vcpu); |
4881 | 5009 | ||
5010 | if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) | ||
5011 | return 1; | ||
5012 | |||
4882 | err = emulate_instruction(vcpu, 0); | 5013 | err = emulate_instruction(vcpu, 0); |
4883 | 5014 | ||
4884 | if (err == EMULATE_DO_MMIO) { | 5015 | if (err == EMULATE_DO_MMIO) { |
@@ -4886,8 +5017,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
4886 | goto out; | 5017 | goto out; |
4887 | } | 5018 | } |
4888 | 5019 | ||
4889 | if (err != EMULATE_DONE) | 5020 | if (err != EMULATE_DONE) { |
5021 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
5022 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
5023 | vcpu->run->internal.ndata = 0; | ||
4890 | return 0; | 5024 | return 0; |
5025 | } | ||
4891 | 5026 | ||
4892 | if (signal_pending(current)) | 5027 | if (signal_pending(current)) |
4893 | goto out; | 5028 | goto out; |
@@ -4895,7 +5030,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
4895 | schedule(); | 5030 | schedule(); |
4896 | } | 5031 | } |
4897 | 5032 | ||
4898 | vmx->emulation_required = 0; | 5033 | vmx->emulation_required = !guest_state_valid(vcpu); |
4899 | out: | 5034 | out: |
4900 | return ret; | 5035 | return ret; |
4901 | } | 5036 | } |
@@ -6256,7 +6391,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6256 | } | 6391 | } |
6257 | } | 6392 | } |
6258 | 6393 | ||
6259 | asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); | ||
6260 | vmx->loaded_vmcs->launched = 1; | 6394 | vmx->loaded_vmcs->launched = 1; |
6261 | 6395 | ||
6262 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); | 6396 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); |
@@ -6343,7 +6477,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
6343 | return &vmx->vcpu; | 6477 | return &vmx->vcpu; |
6344 | 6478 | ||
6345 | free_vmcs: | 6479 | free_vmcs: |
6346 | free_vmcs(vmx->loaded_vmcs->vmcs); | 6480 | free_loaded_vmcs(vmx->loaded_vmcs); |
6347 | free_msrs: | 6481 | free_msrs: |
6348 | kfree(vmx->guest_msrs); | 6482 | kfree(vmx->guest_msrs); |
6349 | uninit_vcpu: | 6483 | uninit_vcpu: |
@@ -6430,6 +6564,23 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | |||
6430 | } | 6564 | } |
6431 | } | 6565 | } |
6432 | } | 6566 | } |
6567 | |||
6568 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
6569 | /* Exposing INVPCID only when PCID is exposed */ | ||
6570 | best = kvm_find_cpuid_entry(vcpu, 0x7, 0); | ||
6571 | if (vmx_invpcid_supported() && | ||
6572 | best && (best->ecx & bit(X86_FEATURE_INVPCID)) && | ||
6573 | guest_cpuid_has_pcid(vcpu)) { | ||
6574 | exec_control |= SECONDARY_EXEC_ENABLE_INVPCID; | ||
6575 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | ||
6576 | exec_control); | ||
6577 | } else { | ||
6578 | exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; | ||
6579 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | ||
6580 | exec_control); | ||
6581 | if (best) | ||
6582 | best->ecx &= ~bit(X86_FEATURE_INVPCID); | ||
6583 | } | ||
6433 | } | 6584 | } |
6434 | 6585 | ||
6435 | static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | 6586 | static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) |
@@ -7164,6 +7315,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
7164 | .cpuid_update = vmx_cpuid_update, | 7315 | .cpuid_update = vmx_cpuid_update, |
7165 | 7316 | ||
7166 | .rdtscp_supported = vmx_rdtscp_supported, | 7317 | .rdtscp_supported = vmx_rdtscp_supported, |
7318 | .invpcid_supported = vmx_invpcid_supported, | ||
7167 | 7319 | ||
7168 | .set_supported_cpuid = vmx_set_supported_cpuid, | 7320 | .set_supported_cpuid = vmx_set_supported_cpuid, |
7169 | 7321 | ||
@@ -7193,23 +7345,21 @@ static int __init vmx_init(void) | |||
7193 | if (!vmx_io_bitmap_a) | 7345 | if (!vmx_io_bitmap_a) |
7194 | return -ENOMEM; | 7346 | return -ENOMEM; |
7195 | 7347 | ||
7348 | r = -ENOMEM; | ||
7349 | |||
7196 | vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); | 7350 | vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); |
7197 | if (!vmx_io_bitmap_b) { | 7351 | if (!vmx_io_bitmap_b) |
7198 | r = -ENOMEM; | ||
7199 | goto out; | 7352 | goto out; |
7200 | } | ||
7201 | 7353 | ||
7202 | vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); | 7354 | vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); |
7203 | if (!vmx_msr_bitmap_legacy) { | 7355 | if (!vmx_msr_bitmap_legacy) |
7204 | r = -ENOMEM; | ||
7205 | goto out1; | 7356 | goto out1; |
7206 | } | 7357 | |
7207 | 7358 | ||
7208 | vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); | 7359 | vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); |
7209 | if (!vmx_msr_bitmap_longmode) { | 7360 | if (!vmx_msr_bitmap_longmode) |
7210 | r = -ENOMEM; | ||
7211 | goto out2; | 7361 | goto out2; |
7212 | } | 7362 | |
7213 | 7363 | ||
7214 | /* | 7364 | /* |
7215 | * Allow direct access to the PC debug port (it is often used for I/O | 7365 | * Allow direct access to the PC debug port (it is often used for I/O |
@@ -7238,8 +7388,10 @@ static int __init vmx_init(void) | |||
7238 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); | 7388 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); |
7239 | 7389 | ||
7240 | if (enable_ept) { | 7390 | if (enable_ept) { |
7241 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, | 7391 | kvm_mmu_set_mask_ptes(0ull, |
7242 | VMX_EPT_EXECUTABLE_MASK); | 7392 | (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, |
7393 | (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull, | ||
7394 | 0ull, VMX_EPT_EXECUTABLE_MASK); | ||
7243 | ept_set_mmio_spte_mask(); | 7395 | ept_set_mmio_spte_mask(); |
7244 | kvm_enable_tdp(); | 7396 | kvm_enable_tdp(); |
7245 | } else | 7397 | } else |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 185a2b823a2..59b59508ff0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -528,6 +528,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
528 | return 1; | 528 | return 1; |
529 | } | 529 | } |
530 | 530 | ||
531 | if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) | ||
532 | return 1; | ||
533 | |||
531 | kvm_x86_ops->set_cr0(vcpu, cr0); | 534 | kvm_x86_ops->set_cr0(vcpu, cr0); |
532 | 535 | ||
533 | if ((cr0 ^ old_cr0) & X86_CR0_PG) { | 536 | if ((cr0 ^ old_cr0) & X86_CR0_PG) { |
@@ -604,10 +607,20 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
604 | kvm_read_cr3(vcpu))) | 607 | kvm_read_cr3(vcpu))) |
605 | return 1; | 608 | return 1; |
606 | 609 | ||
610 | if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) { | ||
611 | if (!guest_cpuid_has_pcid(vcpu)) | ||
612 | return 1; | ||
613 | |||
614 | /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ | ||
615 | if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu)) | ||
616 | return 1; | ||
617 | } | ||
618 | |||
607 | if (kvm_x86_ops->set_cr4(vcpu, cr4)) | 619 | if (kvm_x86_ops->set_cr4(vcpu, cr4)) |
608 | return 1; | 620 | return 1; |
609 | 621 | ||
610 | if ((cr4 ^ old_cr4) & pdptr_bits) | 622 | if (((cr4 ^ old_cr4) & pdptr_bits) || |
623 | (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) | ||
611 | kvm_mmu_reset_context(vcpu); | 624 | kvm_mmu_reset_context(vcpu); |
612 | 625 | ||
613 | if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) | 626 | if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) |
@@ -626,8 +639,12 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
626 | } | 639 | } |
627 | 640 | ||
628 | if (is_long_mode(vcpu)) { | 641 | if (is_long_mode(vcpu)) { |
629 | if (cr3 & CR3_L_MODE_RESERVED_BITS) | 642 | if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) { |
630 | return 1; | 643 | if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS) |
644 | return 1; | ||
645 | } else | ||
646 | if (cr3 & CR3_L_MODE_RESERVED_BITS) | ||
647 | return 1; | ||
631 | } else { | 648 | } else { |
632 | if (is_pae(vcpu)) { | 649 | if (is_pae(vcpu)) { |
633 | if (cr3 & CR3_PAE_RESERVED_BITS) | 650 | if (cr3 & CR3_PAE_RESERVED_BITS) |
@@ -795,6 +812,7 @@ static u32 msrs_to_save[] = { | |||
795 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | 812 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, |
796 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 813 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
797 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, | 814 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, |
815 | MSR_KVM_PV_EOI_EN, | ||
798 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 816 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
799 | MSR_STAR, | 817 | MSR_STAR, |
800 | #ifdef CONFIG_X86_64 | 818 | #ifdef CONFIG_X86_64 |
@@ -1437,8 +1455,8 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1437 | break; | 1455 | break; |
1438 | } | 1456 | } |
1439 | default: | 1457 | default: |
1440 | pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " | 1458 | vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " |
1441 | "data 0x%llx\n", msr, data); | 1459 | "data 0x%llx\n", msr, data); |
1442 | return 1; | 1460 | return 1; |
1443 | } | 1461 | } |
1444 | return 0; | 1462 | return 0; |
@@ -1470,8 +1488,8 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1470 | case HV_X64_MSR_TPR: | 1488 | case HV_X64_MSR_TPR: |
1471 | return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); | 1489 | return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); |
1472 | default: | 1490 | default: |
1473 | pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " | 1491 | vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " |
1474 | "data 0x%llx\n", msr, data); | 1492 | "data 0x%llx\n", msr, data); |
1475 | return 1; | 1493 | return 1; |
1476 | } | 1494 | } |
1477 | 1495 | ||
@@ -1551,15 +1569,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1551 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ | 1569 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ |
1552 | data &= ~(u64)0x8; /* ignore TLB cache disable */ | 1570 | data &= ~(u64)0x8; /* ignore TLB cache disable */ |
1553 | if (data != 0) { | 1571 | if (data != 0) { |
1554 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", | 1572 | vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", |
1555 | data); | 1573 | data); |
1556 | return 1; | 1574 | return 1; |
1557 | } | 1575 | } |
1558 | break; | 1576 | break; |
1559 | case MSR_FAM10H_MMIO_CONF_BASE: | 1577 | case MSR_FAM10H_MMIO_CONF_BASE: |
1560 | if (data != 0) { | 1578 | if (data != 0) { |
1561 | pr_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: " | 1579 | vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: " |
1562 | "0x%llx\n", data); | 1580 | "0x%llx\n", data); |
1563 | return 1; | 1581 | return 1; |
1564 | } | 1582 | } |
1565 | break; | 1583 | break; |
@@ -1574,8 +1592,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1574 | thus reserved and should throw a #GP */ | 1592 | thus reserved and should throw a #GP */ |
1575 | return 1; | 1593 | return 1; |
1576 | } | 1594 | } |
1577 | pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", | 1595 | vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", |
1578 | __func__, data); | 1596 | __func__, data); |
1579 | break; | 1597 | break; |
1580 | case MSR_IA32_UCODE_REV: | 1598 | case MSR_IA32_UCODE_REV: |
1581 | case MSR_IA32_UCODE_WRITE: | 1599 | case MSR_IA32_UCODE_WRITE: |
@@ -1653,6 +1671,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1653 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); | 1671 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); |
1654 | 1672 | ||
1655 | break; | 1673 | break; |
1674 | case MSR_KVM_PV_EOI_EN: | ||
1675 | if (kvm_lapic_enable_pv_eoi(vcpu, data)) | ||
1676 | return 1; | ||
1677 | break; | ||
1656 | 1678 | ||
1657 | case MSR_IA32_MCG_CTL: | 1679 | case MSR_IA32_MCG_CTL: |
1658 | case MSR_IA32_MCG_STATUS: | 1680 | case MSR_IA32_MCG_STATUS: |
@@ -1671,8 +1693,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1671 | case MSR_K7_EVNTSEL2: | 1693 | case MSR_K7_EVNTSEL2: |
1672 | case MSR_K7_EVNTSEL3: | 1694 | case MSR_K7_EVNTSEL3: |
1673 | if (data != 0) | 1695 | if (data != 0) |
1674 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " | 1696 | vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: " |
1675 | "0x%x data 0x%llx\n", msr, data); | 1697 | "0x%x data 0x%llx\n", msr, data); |
1676 | break; | 1698 | break; |
1677 | /* at least RHEL 4 unconditionally writes to the perfctr registers, | 1699 | /* at least RHEL 4 unconditionally writes to the perfctr registers, |
1678 | * so we ignore writes to make it happy. | 1700 | * so we ignore writes to make it happy. |
@@ -1681,8 +1703,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1681 | case MSR_K7_PERFCTR1: | 1703 | case MSR_K7_PERFCTR1: |
1682 | case MSR_K7_PERFCTR2: | 1704 | case MSR_K7_PERFCTR2: |
1683 | case MSR_K7_PERFCTR3: | 1705 | case MSR_K7_PERFCTR3: |
1684 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " | 1706 | vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: " |
1685 | "0x%x data 0x%llx\n", msr, data); | 1707 | "0x%x data 0x%llx\n", msr, data); |
1686 | break; | 1708 | break; |
1687 | case MSR_P6_PERFCTR0: | 1709 | case MSR_P6_PERFCTR0: |
1688 | case MSR_P6_PERFCTR1: | 1710 | case MSR_P6_PERFCTR1: |
@@ -1693,8 +1715,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1693 | return kvm_pmu_set_msr(vcpu, msr, data); | 1715 | return kvm_pmu_set_msr(vcpu, msr, data); |
1694 | 1716 | ||
1695 | if (pr || data != 0) | 1717 | if (pr || data != 0) |
1696 | pr_unimpl(vcpu, "disabled perfctr wrmsr: " | 1718 | vcpu_unimpl(vcpu, "disabled perfctr wrmsr: " |
1697 | "0x%x data 0x%llx\n", msr, data); | 1719 | "0x%x data 0x%llx\n", msr, data); |
1698 | break; | 1720 | break; |
1699 | case MSR_K7_CLK_CTL: | 1721 | case MSR_K7_CLK_CTL: |
1700 | /* | 1722 | /* |
@@ -1720,7 +1742,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1720 | /* Drop writes to this legacy MSR -- see rdmsr | 1742 | /* Drop writes to this legacy MSR -- see rdmsr |
1721 | * counterpart for further detail. | 1743 | * counterpart for further detail. |
1722 | */ | 1744 | */ |
1723 | pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); | 1745 | vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); |
1724 | break; | 1746 | break; |
1725 | case MSR_AMD64_OSVW_ID_LENGTH: | 1747 | case MSR_AMD64_OSVW_ID_LENGTH: |
1726 | if (!guest_cpuid_has_osvw(vcpu)) | 1748 | if (!guest_cpuid_has_osvw(vcpu)) |
@@ -1738,12 +1760,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1738 | if (kvm_pmu_msr(vcpu, msr)) | 1760 | if (kvm_pmu_msr(vcpu, msr)) |
1739 | return kvm_pmu_set_msr(vcpu, msr, data); | 1761 | return kvm_pmu_set_msr(vcpu, msr, data); |
1740 | if (!ignore_msrs) { | 1762 | if (!ignore_msrs) { |
1741 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", | 1763 | vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", |
1742 | msr, data); | 1764 | msr, data); |
1743 | return 1; | 1765 | return 1; |
1744 | } else { | 1766 | } else { |
1745 | pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", | 1767 | vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", |
1746 | msr, data); | 1768 | msr, data); |
1747 | break; | 1769 | break; |
1748 | } | 1770 | } |
1749 | } | 1771 | } |
@@ -1846,7 +1868,7 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1846 | data = kvm->arch.hv_hypercall; | 1868 | data = kvm->arch.hv_hypercall; |
1847 | break; | 1869 | break; |
1848 | default: | 1870 | default: |
1849 | pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 1871 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
1850 | return 1; | 1872 | return 1; |
1851 | } | 1873 | } |
1852 | 1874 | ||
@@ -1877,7 +1899,7 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1877 | data = vcpu->arch.hv_vapic; | 1899 | data = vcpu->arch.hv_vapic; |
1878 | break; | 1900 | break; |
1879 | default: | 1901 | default: |
1880 | pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 1902 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
1881 | return 1; | 1903 | return 1; |
1882 | } | 1904 | } |
1883 | *pdata = data; | 1905 | *pdata = data; |
@@ -2030,10 +2052,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2030 | if (kvm_pmu_msr(vcpu, msr)) | 2052 | if (kvm_pmu_msr(vcpu, msr)) |
2031 | return kvm_pmu_get_msr(vcpu, msr, pdata); | 2053 | return kvm_pmu_get_msr(vcpu, msr, pdata); |
2032 | if (!ignore_msrs) { | 2054 | if (!ignore_msrs) { |
2033 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 2055 | vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
2034 | return 1; | 2056 | return 1; |
2035 | } else { | 2057 | } else { |
2036 | pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr); | 2058 | vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr); |
2037 | data = 0; | 2059 | data = 0; |
2038 | } | 2060 | } |
2039 | break; | 2061 | break; |
@@ -2147,6 +2169,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2147 | case KVM_CAP_ASYNC_PF: | 2169 | case KVM_CAP_ASYNC_PF: |
2148 | case KVM_CAP_GET_TSC_KHZ: | 2170 | case KVM_CAP_GET_TSC_KHZ: |
2149 | case KVM_CAP_PCI_2_3: | 2171 | case KVM_CAP_PCI_2_3: |
2172 | case KVM_CAP_KVMCLOCK_CTRL: | ||
2150 | r = 1; | 2173 | r = 1; |
2151 | break; | 2174 | break; |
2152 | case KVM_CAP_COALESCED_MMIO: | 2175 | case KVM_CAP_COALESCED_MMIO: |
@@ -2597,6 +2620,23 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, | |||
2597 | return r; | 2620 | return r; |
2598 | } | 2621 | } |
2599 | 2622 | ||
2623 | /* | ||
2624 | * kvm_set_guest_paused() indicates to the guest kernel that it has been | ||
2625 | * stopped by the hypervisor. This function will be called from the host only. | ||
2626 | * EINVAL is returned when the host attempts to set the flag for a guest that | ||
2627 | * does not support pv clocks. | ||
2628 | */ | ||
2629 | static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) | ||
2630 | { | ||
2631 | struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock; | ||
2632 | if (!vcpu->arch.time_page) | ||
2633 | return -EINVAL; | ||
2634 | src->flags |= PVCLOCK_GUEST_STOPPED; | ||
2635 | mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT); | ||
2636 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | ||
2637 | return 0; | ||
2638 | } | ||
2639 | |||
2600 | long kvm_arch_vcpu_ioctl(struct file *filp, | 2640 | long kvm_arch_vcpu_ioctl(struct file *filp, |
2601 | unsigned int ioctl, unsigned long arg) | 2641 | unsigned int ioctl, unsigned long arg) |
2602 | { | 2642 | { |
@@ -2873,6 +2913,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2873 | r = vcpu->arch.virtual_tsc_khz; | 2913 | r = vcpu->arch.virtual_tsc_khz; |
2874 | goto out; | 2914 | goto out; |
2875 | } | 2915 | } |
2916 | case KVM_KVMCLOCK_CTRL: { | ||
2917 | r = kvm_set_guest_paused(vcpu); | ||
2918 | goto out; | ||
2919 | } | ||
2876 | default: | 2920 | default: |
2877 | r = -EINVAL; | 2921 | r = -EINVAL; |
2878 | } | 2922 | } |
@@ -3045,57 +3089,32 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, | |||
3045 | } | 3089 | } |
3046 | 3090 | ||
3047 | /** | 3091 | /** |
3048 | * write_protect_slot - write protect a slot for dirty logging | 3092 | * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot |
3049 | * @kvm: the kvm instance | 3093 | * @kvm: kvm instance |
3050 | * @memslot: the slot we protect | 3094 | * @log: slot id and address to which we copy the log |
3051 | * @dirty_bitmap: the bitmap indicating which pages are dirty | ||
3052 | * @nr_dirty_pages: the number of dirty pages | ||
3053 | * | 3095 | * |
3054 | * We have two ways to find all sptes to protect: | 3096 | * We need to keep it in mind that VCPU threads can write to the bitmap |
3055 | * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and | 3097 | * concurrently. So, to avoid losing data, we keep the following order for |
3056 | * checks ones that have a spte mapping a page in the slot. | 3098 | * each bit: |
3057 | * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap. | ||
3058 | * | 3099 | * |
3059 | * Generally speaking, if there are not so many dirty pages compared to the | 3100 | * 1. Take a snapshot of the bit and clear it if needed. |
3060 | * number of shadow pages, we should use the latter. | 3101 | * 2. Write protect the corresponding page. |
3102 | * 3. Flush TLB's if needed. | ||
3103 | * 4. Copy the snapshot to the userspace. | ||
3061 | * | 3104 | * |
3062 | * Note that letting others write into a page marked dirty in the old bitmap | 3105 | * Between 2 and 3, the guest may write to the page using the remaining TLB |
3063 | * by using the remaining tlb entry is not a problem. That page will become | 3106 | * entry. This is not a problem because the page will be reported dirty at |
3064 | * write protected again when we flush the tlb and then be reported dirty to | 3107 | * step 4 using the snapshot taken before and step 3 ensures that successive |
3065 | * the user space by copying the old bitmap. | 3108 | * writes will be logged for the next call. |
3066 | */ | ||
3067 | static void write_protect_slot(struct kvm *kvm, | ||
3068 | struct kvm_memory_slot *memslot, | ||
3069 | unsigned long *dirty_bitmap, | ||
3070 | unsigned long nr_dirty_pages) | ||
3071 | { | ||
3072 | spin_lock(&kvm->mmu_lock); | ||
3073 | |||
3074 | /* Not many dirty pages compared to # of shadow pages. */ | ||
3075 | if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) { | ||
3076 | unsigned long gfn_offset; | ||
3077 | |||
3078 | for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) { | ||
3079 | unsigned long gfn = memslot->base_gfn + gfn_offset; | ||
3080 | |||
3081 | kvm_mmu_rmap_write_protect(kvm, gfn, memslot); | ||
3082 | } | ||
3083 | kvm_flush_remote_tlbs(kvm); | ||
3084 | } else | ||
3085 | kvm_mmu_slot_remove_write_access(kvm, memslot->id); | ||
3086 | |||
3087 | spin_unlock(&kvm->mmu_lock); | ||
3088 | } | ||
3089 | |||
3090 | /* | ||
3091 | * Get (and clear) the dirty memory log for a memory slot. | ||
3092 | */ | 3109 | */ |
3093 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | 3110 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) |
3094 | struct kvm_dirty_log *log) | ||
3095 | { | 3111 | { |
3096 | int r; | 3112 | int r; |
3097 | struct kvm_memory_slot *memslot; | 3113 | struct kvm_memory_slot *memslot; |
3098 | unsigned long n, nr_dirty_pages; | 3114 | unsigned long n, i; |
3115 | unsigned long *dirty_bitmap; | ||
3116 | unsigned long *dirty_bitmap_buffer; | ||
3117 | bool is_dirty = false; | ||
3099 | 3118 | ||
3100 | mutex_lock(&kvm->slots_lock); | 3119 | mutex_lock(&kvm->slots_lock); |
3101 | 3120 | ||
@@ -3104,49 +3123,42 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
3104 | goto out; | 3123 | goto out; |
3105 | 3124 | ||
3106 | memslot = id_to_memslot(kvm->memslots, log->slot); | 3125 | memslot = id_to_memslot(kvm->memslots, log->slot); |
3126 | |||
3127 | dirty_bitmap = memslot->dirty_bitmap; | ||
3107 | r = -ENOENT; | 3128 | r = -ENOENT; |
3108 | if (!memslot->dirty_bitmap) | 3129 | if (!dirty_bitmap) |
3109 | goto out; | 3130 | goto out; |
3110 | 3131 | ||
3111 | n = kvm_dirty_bitmap_bytes(memslot); | 3132 | n = kvm_dirty_bitmap_bytes(memslot); |
3112 | nr_dirty_pages = memslot->nr_dirty_pages; | ||
3113 | 3133 | ||
3114 | /* If nothing is dirty, don't bother messing with page tables. */ | 3134 | dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long); |
3115 | if (nr_dirty_pages) { | 3135 | memset(dirty_bitmap_buffer, 0, n); |
3116 | struct kvm_memslots *slots, *old_slots; | ||
3117 | unsigned long *dirty_bitmap, *dirty_bitmap_head; | ||
3118 | 3136 | ||
3119 | dirty_bitmap = memslot->dirty_bitmap; | 3137 | spin_lock(&kvm->mmu_lock); |
3120 | dirty_bitmap_head = memslot->dirty_bitmap_head; | ||
3121 | if (dirty_bitmap == dirty_bitmap_head) | ||
3122 | dirty_bitmap_head += n / sizeof(long); | ||
3123 | memset(dirty_bitmap_head, 0, n); | ||
3124 | 3138 | ||
3125 | r = -ENOMEM; | 3139 | for (i = 0; i < n / sizeof(long); i++) { |
3126 | slots = kmemdup(kvm->memslots, sizeof(*kvm->memslots), GFP_KERNEL); | 3140 | unsigned long mask; |
3127 | if (!slots) | 3141 | gfn_t offset; |
3128 | goto out; | ||
3129 | 3142 | ||
3130 | memslot = id_to_memslot(slots, log->slot); | 3143 | if (!dirty_bitmap[i]) |
3131 | memslot->nr_dirty_pages = 0; | 3144 | continue; |
3132 | memslot->dirty_bitmap = dirty_bitmap_head; | ||
3133 | update_memslots(slots, NULL); | ||
3134 | 3145 | ||
3135 | old_slots = kvm->memslots; | 3146 | is_dirty = true; |
3136 | rcu_assign_pointer(kvm->memslots, slots); | ||
3137 | synchronize_srcu_expedited(&kvm->srcu); | ||
3138 | kfree(old_slots); | ||
3139 | 3147 | ||
3140 | write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages); | 3148 | mask = xchg(&dirty_bitmap[i], 0); |
3149 | dirty_bitmap_buffer[i] = mask; | ||
3141 | 3150 | ||
3142 | r = -EFAULT; | 3151 | offset = i * BITS_PER_LONG; |
3143 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) | 3152 | kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask); |
3144 | goto out; | ||
3145 | } else { | ||
3146 | r = -EFAULT; | ||
3147 | if (clear_user(log->dirty_bitmap, n)) | ||
3148 | goto out; | ||
3149 | } | 3153 | } |
3154 | if (is_dirty) | ||
3155 | kvm_flush_remote_tlbs(kvm); | ||
3156 | |||
3157 | spin_unlock(&kvm->mmu_lock); | ||
3158 | |||
3159 | r = -EFAULT; | ||
3160 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) | ||
3161 | goto out; | ||
3150 | 3162 | ||
3151 | r = 0; | 3163 | r = 0; |
3152 | out: | 3164 | out: |
@@ -3728,9 +3740,8 @@ struct read_write_emulator_ops { | |||
3728 | static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) | 3740 | static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) |
3729 | { | 3741 | { |
3730 | if (vcpu->mmio_read_completed) { | 3742 | if (vcpu->mmio_read_completed) { |
3731 | memcpy(val, vcpu->mmio_data, bytes); | ||
3732 | trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, | 3743 | trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, |
3733 | vcpu->mmio_phys_addr, *(u64 *)val); | 3744 | vcpu->mmio_fragments[0].gpa, *(u64 *)val); |
3734 | vcpu->mmio_read_completed = 0; | 3745 | vcpu->mmio_read_completed = 0; |
3735 | return 1; | 3746 | return 1; |
3736 | } | 3747 | } |
@@ -3766,8 +3777,9 @@ static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3766 | static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, | 3777 | static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, |
3767 | void *val, int bytes) | 3778 | void *val, int bytes) |
3768 | { | 3779 | { |
3769 | memcpy(vcpu->mmio_data, val, bytes); | 3780 | struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0]; |
3770 | memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); | 3781 | |
3782 | memcpy(vcpu->run->mmio.data, frag->data, frag->len); | ||
3771 | return X86EMUL_CONTINUE; | 3783 | return X86EMUL_CONTINUE; |
3772 | } | 3784 | } |
3773 | 3785 | ||
@@ -3794,10 +3806,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, | |||
3794 | gpa_t gpa; | 3806 | gpa_t gpa; |
3795 | int handled, ret; | 3807 | int handled, ret; |
3796 | bool write = ops->write; | 3808 | bool write = ops->write; |
3797 | 3809 | struct kvm_mmio_fragment *frag; | |
3798 | if (ops->read_write_prepare && | ||
3799 | ops->read_write_prepare(vcpu, val, bytes)) | ||
3800 | return X86EMUL_CONTINUE; | ||
3801 | 3810 | ||
3802 | ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); | 3811 | ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); |
3803 | 3812 | ||
@@ -3823,15 +3832,19 @@ mmio: | |||
3823 | bytes -= handled; | 3832 | bytes -= handled; |
3824 | val += handled; | 3833 | val += handled; |
3825 | 3834 | ||
3826 | vcpu->mmio_needed = 1; | 3835 | while (bytes) { |
3827 | vcpu->run->exit_reason = KVM_EXIT_MMIO; | 3836 | unsigned now = min(bytes, 8U); |
3828 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; | ||
3829 | vcpu->mmio_size = bytes; | ||
3830 | vcpu->run->mmio.len = min(vcpu->mmio_size, 8); | ||
3831 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = write; | ||
3832 | vcpu->mmio_index = 0; | ||
3833 | 3837 | ||
3834 | return ops->read_write_exit_mmio(vcpu, gpa, val, bytes); | 3838 | frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++]; |
3839 | frag->gpa = gpa; | ||
3840 | frag->data = val; | ||
3841 | frag->len = now; | ||
3842 | |||
3843 | gpa += now; | ||
3844 | val += now; | ||
3845 | bytes -= now; | ||
3846 | } | ||
3847 | return X86EMUL_CONTINUE; | ||
3835 | } | 3848 | } |
3836 | 3849 | ||
3837 | int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, | 3850 | int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, |
@@ -3840,10 +3853,18 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, | |||
3840 | struct read_write_emulator_ops *ops) | 3853 | struct read_write_emulator_ops *ops) |
3841 | { | 3854 | { |
3842 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | 3855 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); |
3856 | gpa_t gpa; | ||
3857 | int rc; | ||
3858 | |||
3859 | if (ops->read_write_prepare && | ||
3860 | ops->read_write_prepare(vcpu, val, bytes)) | ||
3861 | return X86EMUL_CONTINUE; | ||
3862 | |||
3863 | vcpu->mmio_nr_fragments = 0; | ||
3843 | 3864 | ||
3844 | /* Crossing a page boundary? */ | 3865 | /* Crossing a page boundary? */ |
3845 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { | 3866 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { |
3846 | int rc, now; | 3867 | int now; |
3847 | 3868 | ||
3848 | now = -addr & ~PAGE_MASK; | 3869 | now = -addr & ~PAGE_MASK; |
3849 | rc = emulator_read_write_onepage(addr, val, now, exception, | 3870 | rc = emulator_read_write_onepage(addr, val, now, exception, |
@@ -3856,8 +3877,25 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, | |||
3856 | bytes -= now; | 3877 | bytes -= now; |
3857 | } | 3878 | } |
3858 | 3879 | ||
3859 | return emulator_read_write_onepage(addr, val, bytes, exception, | 3880 | rc = emulator_read_write_onepage(addr, val, bytes, exception, |
3860 | vcpu, ops); | 3881 | vcpu, ops); |
3882 | if (rc != X86EMUL_CONTINUE) | ||
3883 | return rc; | ||
3884 | |||
3885 | if (!vcpu->mmio_nr_fragments) | ||
3886 | return rc; | ||
3887 | |||
3888 | gpa = vcpu->mmio_fragments[0].gpa; | ||
3889 | |||
3890 | vcpu->mmio_needed = 1; | ||
3891 | vcpu->mmio_cur_fragment = 0; | ||
3892 | |||
3893 | vcpu->run->mmio.len = vcpu->mmio_fragments[0].len; | ||
3894 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write; | ||
3895 | vcpu->run->exit_reason = KVM_EXIT_MMIO; | ||
3896 | vcpu->run->mmio.phys_addr = gpa; | ||
3897 | |||
3898 | return ops->read_write_exit_mmio(vcpu, gpa, val, bytes); | ||
3861 | } | 3899 | } |
3862 | 3900 | ||
3863 | static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, | 3901 | static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, |
@@ -4100,7 +4138,7 @@ static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr) | |||
4100 | value = kvm_get_cr8(vcpu); | 4138 | value = kvm_get_cr8(vcpu); |
4101 | break; | 4139 | break; |
4102 | default: | 4140 | default: |
4103 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | 4141 | kvm_err("%s: unexpected cr %u\n", __func__, cr); |
4104 | return 0; | 4142 | return 0; |
4105 | } | 4143 | } |
4106 | 4144 | ||
@@ -4129,7 +4167,7 @@ static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val) | |||
4129 | res = kvm_set_cr8(vcpu, val); | 4167 | res = kvm_set_cr8(vcpu, val); |
4130 | break; | 4168 | break; |
4131 | default: | 4169 | default: |
4132 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | 4170 | kvm_err("%s: unexpected cr %u\n", __func__, cr); |
4133 | res = -1; | 4171 | res = -1; |
4134 | } | 4172 | } |
4135 | 4173 | ||
@@ -4281,26 +4319,10 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt, | |||
4281 | return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); | 4319 | return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); |
4282 | } | 4320 | } |
4283 | 4321 | ||
4284 | static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, | 4322 | static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, |
4285 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) | 4323 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) |
4286 | { | 4324 | { |
4287 | struct kvm_cpuid_entry2 *cpuid = NULL; | 4325 | kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx); |
4288 | |||
4289 | if (eax && ecx) | ||
4290 | cpuid = kvm_find_cpuid_entry(emul_to_vcpu(ctxt), | ||
4291 | *eax, *ecx); | ||
4292 | |||
4293 | if (cpuid) { | ||
4294 | *eax = cpuid->eax; | ||
4295 | *ecx = cpuid->ecx; | ||
4296 | if (ebx) | ||
4297 | *ebx = cpuid->ebx; | ||
4298 | if (edx) | ||
4299 | *edx = cpuid->edx; | ||
4300 | return true; | ||
4301 | } | ||
4302 | |||
4303 | return false; | ||
4304 | } | 4326 | } |
4305 | 4327 | ||
4306 | static struct x86_emulate_ops emulate_ops = { | 4328 | static struct x86_emulate_ops emulate_ops = { |
@@ -5263,10 +5285,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5263 | kvm_deliver_pmi(vcpu); | 5285 | kvm_deliver_pmi(vcpu); |
5264 | } | 5286 | } |
5265 | 5287 | ||
5266 | r = kvm_mmu_reload(vcpu); | ||
5267 | if (unlikely(r)) | ||
5268 | goto out; | ||
5269 | |||
5270 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 5288 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
5271 | inject_pending_event(vcpu); | 5289 | inject_pending_event(vcpu); |
5272 | 5290 | ||
@@ -5282,6 +5300,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5282 | } | 5300 | } |
5283 | } | 5301 | } |
5284 | 5302 | ||
5303 | r = kvm_mmu_reload(vcpu); | ||
5304 | if (unlikely(r)) { | ||
5305 | goto cancel_injection; | ||
5306 | } | ||
5307 | |||
5285 | preempt_disable(); | 5308 | preempt_disable(); |
5286 | 5309 | ||
5287 | kvm_x86_ops->prepare_guest_switch(vcpu); | 5310 | kvm_x86_ops->prepare_guest_switch(vcpu); |
@@ -5304,9 +5327,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5304 | smp_wmb(); | 5327 | smp_wmb(); |
5305 | local_irq_enable(); | 5328 | local_irq_enable(); |
5306 | preempt_enable(); | 5329 | preempt_enable(); |
5307 | kvm_x86_ops->cancel_injection(vcpu); | ||
5308 | r = 1; | 5330 | r = 1; |
5309 | goto out; | 5331 | goto cancel_injection; |
5310 | } | 5332 | } |
5311 | 5333 | ||
5312 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 5334 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
@@ -5370,9 +5392,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5370 | if (unlikely(vcpu->arch.tsc_always_catchup)) | 5392 | if (unlikely(vcpu->arch.tsc_always_catchup)) |
5371 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | 5393 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
5372 | 5394 | ||
5373 | kvm_lapic_sync_from_vapic(vcpu); | 5395 | if (vcpu->arch.apic_attention) |
5396 | kvm_lapic_sync_from_vapic(vcpu); | ||
5374 | 5397 | ||
5375 | r = kvm_x86_ops->handle_exit(vcpu); | 5398 | r = kvm_x86_ops->handle_exit(vcpu); |
5399 | return r; | ||
5400 | |||
5401 | cancel_injection: | ||
5402 | kvm_x86_ops->cancel_injection(vcpu); | ||
5403 | if (unlikely(vcpu->arch.apic_attention)) | ||
5404 | kvm_lapic_sync_from_vapic(vcpu); | ||
5376 | out: | 5405 | out: |
5377 | return r; | 5406 | return r; |
5378 | } | 5407 | } |
@@ -5456,33 +5485,55 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5456 | return r; | 5485 | return r; |
5457 | } | 5486 | } |
5458 | 5487 | ||
5488 | /* | ||
5489 | * Implements the following, as a state machine: | ||
5490 | * | ||
5491 | * read: | ||
5492 | * for each fragment | ||
5493 | * write gpa, len | ||
5494 | * exit | ||
5495 | * copy data | ||
5496 | * execute insn | ||
5497 | * | ||
5498 | * write: | ||
5499 | * for each fragment | ||
5500 | * write gpa, len | ||
5501 | * copy data | ||
5502 | * exit | ||
5503 | */ | ||
5459 | static int complete_mmio(struct kvm_vcpu *vcpu) | 5504 | static int complete_mmio(struct kvm_vcpu *vcpu) |
5460 | { | 5505 | { |
5461 | struct kvm_run *run = vcpu->run; | 5506 | struct kvm_run *run = vcpu->run; |
5507 | struct kvm_mmio_fragment *frag; | ||
5462 | int r; | 5508 | int r; |
5463 | 5509 | ||
5464 | if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) | 5510 | if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) |
5465 | return 1; | 5511 | return 1; |
5466 | 5512 | ||
5467 | if (vcpu->mmio_needed) { | 5513 | if (vcpu->mmio_needed) { |
5468 | vcpu->mmio_needed = 0; | 5514 | /* Complete previous fragment */ |
5515 | frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++]; | ||
5469 | if (!vcpu->mmio_is_write) | 5516 | if (!vcpu->mmio_is_write) |
5470 | memcpy(vcpu->mmio_data + vcpu->mmio_index, | 5517 | memcpy(frag->data, run->mmio.data, frag->len); |
5471 | run->mmio.data, 8); | 5518 | if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { |
5472 | vcpu->mmio_index += 8; | 5519 | vcpu->mmio_needed = 0; |
5473 | if (vcpu->mmio_index < vcpu->mmio_size) { | 5520 | if (vcpu->mmio_is_write) |
5474 | run->exit_reason = KVM_EXIT_MMIO; | 5521 | return 1; |
5475 | run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index; | 5522 | vcpu->mmio_read_completed = 1; |
5476 | memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8); | 5523 | goto done; |
5477 | run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8); | ||
5478 | run->mmio.is_write = vcpu->mmio_is_write; | ||
5479 | vcpu->mmio_needed = 1; | ||
5480 | return 0; | ||
5481 | } | 5524 | } |
5525 | /* Initiate next fragment */ | ||
5526 | ++frag; | ||
5527 | run->exit_reason = KVM_EXIT_MMIO; | ||
5528 | run->mmio.phys_addr = frag->gpa; | ||
5482 | if (vcpu->mmio_is_write) | 5529 | if (vcpu->mmio_is_write) |
5483 | return 1; | 5530 | memcpy(run->mmio.data, frag->data, frag->len); |
5484 | vcpu->mmio_read_completed = 1; | 5531 | run->mmio.len = frag->len; |
5532 | run->mmio.is_write = vcpu->mmio_is_write; | ||
5533 | return 0; | ||
5534 | |||
5485 | } | 5535 | } |
5536 | done: | ||
5486 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 5537 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
5487 | r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); | 5538 | r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); |
5488 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 5539 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
@@ -6264,7 +6315,7 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, | |||
6264 | 6315 | ||
6265 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | 6316 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { |
6266 | if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) { | 6317 | if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) { |
6267 | vfree(free->arch.lpage_info[i]); | 6318 | kvm_kvfree(free->arch.lpage_info[i]); |
6268 | free->arch.lpage_info[i] = NULL; | 6319 | free->arch.lpage_info[i] = NULL; |
6269 | } | 6320 | } |
6270 | } | 6321 | } |
@@ -6283,7 +6334,7 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
6283 | slot->base_gfn, level) + 1; | 6334 | slot->base_gfn, level) + 1; |
6284 | 6335 | ||
6285 | slot->arch.lpage_info[i] = | 6336 | slot->arch.lpage_info[i] = |
6286 | vzalloc(lpages * sizeof(*slot->arch.lpage_info[i])); | 6337 | kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i])); |
6287 | if (!slot->arch.lpage_info[i]) | 6338 | if (!slot->arch.lpage_info[i]) |
6288 | goto out_free; | 6339 | goto out_free; |
6289 | 6340 | ||
@@ -6310,7 +6361,7 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
6310 | 6361 | ||
6311 | out_free: | 6362 | out_free: |
6312 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | 6363 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { |
6313 | vfree(slot->arch.lpage_info[i]); | 6364 | kvm_kvfree(slot->arch.lpage_info[i]); |
6314 | slot->arch.lpage_info[i] = NULL; | 6365 | slot->arch.lpage_info[i] = NULL; |
6315 | } | 6366 | } |
6316 | return -ENOMEM; | 6367 | return -ENOMEM; |
@@ -6399,21 +6450,9 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
6399 | kvm_cpu_has_interrupt(vcpu)); | 6450 | kvm_cpu_has_interrupt(vcpu)); |
6400 | } | 6451 | } |
6401 | 6452 | ||
6402 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | 6453 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) |
6403 | { | 6454 | { |
6404 | int me; | 6455 | return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; |
6405 | int cpu = vcpu->cpu; | ||
6406 | |||
6407 | if (waitqueue_active(&vcpu->wq)) { | ||
6408 | wake_up_interruptible(&vcpu->wq); | ||
6409 | ++vcpu->stat.halt_wakeup; | ||
6410 | } | ||
6411 | |||
6412 | me = get_cpu(); | ||
6413 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) | ||
6414 | if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) | ||
6415 | smp_send_reschedule(cpu); | ||
6416 | put_cpu(); | ||
6417 | } | 6456 | } |
6418 | 6457 | ||
6419 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) | 6458 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index cb80c293cdd..3d1134ddb88 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -64,7 +64,7 @@ static inline int is_pse(struct kvm_vcpu *vcpu) | |||
64 | 64 | ||
65 | static inline int is_paging(struct kvm_vcpu *vcpu) | 65 | static inline int is_paging(struct kvm_vcpu *vcpu) |
66 | { | 66 | { |
67 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | 67 | return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG)); |
68 | } | 68 | } |
69 | 69 | ||
70 | static inline u32 bit(int bitno) | 70 | static inline u32 bit(int bitno) |
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c index 459b58a8a15..25b7ae8d058 100644 --- a/arch/x86/lib/csum-wrappers_64.c +++ b/arch/x86/lib/csum-wrappers_64.c | |||
@@ -115,7 +115,7 @@ EXPORT_SYMBOL(csum_partial_copy_to_user); | |||
115 | * @src: source address | 115 | * @src: source address |
116 | * @dst: destination address | 116 | * @dst: destination address |
117 | * @len: number of bytes to be copied. | 117 | * @len: number of bytes to be copied. |
118 | * @isum: initial sum that is added into the result (32bit unfolded) | 118 | * @sum: initial sum that is added into the result (32bit unfolded) |
119 | * | 119 | * |
120 | * Returns an 32bit unfolded checksum of the buffer. | 120 | * Returns an 32bit unfolded checksum of the buffer. |
121 | */ | 121 | */ |
diff --git a/arch/x86/lib/msr-reg-export.c b/arch/x86/lib/msr-reg-export.c index a311cc59b65..8d6ef78b5d0 100644 --- a/arch/x86/lib/msr-reg-export.c +++ b/arch/x86/lib/msr-reg-export.c | |||
@@ -1,5 +1,5 @@ | |||
1 | #include <linux/module.h> | 1 | #include <linux/module.h> |
2 | #include <asm/msr.h> | 2 | #include <asm/msr.h> |
3 | 3 | ||
4 | EXPORT_SYMBOL(native_rdmsr_safe_regs); | 4 | EXPORT_SYMBOL(rdmsr_safe_regs); |
5 | EXPORT_SYMBOL(native_wrmsr_safe_regs); | 5 | EXPORT_SYMBOL(wrmsr_safe_regs); |
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index 69fa10623f2..f6d13eefad1 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S | |||
@@ -6,13 +6,13 @@ | |||
6 | 6 | ||
7 | #ifdef CONFIG_X86_64 | 7 | #ifdef CONFIG_X86_64 |
8 | /* | 8 | /* |
9 | * int native_{rdmsr,wrmsr}_safe_regs(u32 gprs[8]); | 9 | * int {rdmsr,wrmsr}_safe_regs(u32 gprs[8]); |
10 | * | 10 | * |
11 | * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi] | 11 | * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi] |
12 | * | 12 | * |
13 | */ | 13 | */ |
14 | .macro op_safe_regs op | 14 | .macro op_safe_regs op |
15 | ENTRY(native_\op\()_safe_regs) | 15 | ENTRY(\op\()_safe_regs) |
16 | CFI_STARTPROC | 16 | CFI_STARTPROC |
17 | pushq_cfi %rbx | 17 | pushq_cfi %rbx |
18 | pushq_cfi %rbp | 18 | pushq_cfi %rbp |
@@ -45,13 +45,13 @@ ENTRY(native_\op\()_safe_regs) | |||
45 | 45 | ||
46 | _ASM_EXTABLE(1b, 3b) | 46 | _ASM_EXTABLE(1b, 3b) |
47 | CFI_ENDPROC | 47 | CFI_ENDPROC |
48 | ENDPROC(native_\op\()_safe_regs) | 48 | ENDPROC(\op\()_safe_regs) |
49 | .endm | 49 | .endm |
50 | 50 | ||
51 | #else /* X86_32 */ | 51 | #else /* X86_32 */ |
52 | 52 | ||
53 | .macro op_safe_regs op | 53 | .macro op_safe_regs op |
54 | ENTRY(native_\op\()_safe_regs) | 54 | ENTRY(\op\()_safe_regs) |
55 | CFI_STARTPROC | 55 | CFI_STARTPROC |
56 | pushl_cfi %ebx | 56 | pushl_cfi %ebx |
57 | pushl_cfi %ebp | 57 | pushl_cfi %ebp |
@@ -92,7 +92,7 @@ ENTRY(native_\op\()_safe_regs) | |||
92 | 92 | ||
93 | _ASM_EXTABLE(1b, 3b) | 93 | _ASM_EXTABLE(1b, 3b) |
94 | CFI_ENDPROC | 94 | CFI_ENDPROC |
95 | ENDPROC(native_\op\()_safe_regs) | 95 | ENDPROC(\op\()_safe_regs) |
96 | .endm | 96 | .endm |
97 | 97 | ||
98 | #endif | 98 | #endif |
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 2e4e4b02c37..4f74d94c8d9 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | 9 | ||
10 | #include <asm/word-at-a-time.h> | 10 | #include <asm/word-at-a-time.h> |
11 | #include <linux/sched.h> | ||
11 | 12 | ||
12 | /* | 13 | /* |
13 | * best effort, GUP based copy_from_user() that is NMI-safe | 14 | * best effort, GUP based copy_from_user() that is NMI-safe |
@@ -21,6 +22,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | |||
21 | void *map; | 22 | void *map; |
22 | int ret; | 23 | int ret; |
23 | 24 | ||
25 | if (__range_not_ok(from, n, TASK_SIZE)) | ||
26 | return len; | ||
27 | |||
24 | do { | 28 | do { |
25 | ret = __get_user_pages_fast(addr, 1, 0, &page); | 29 | ret = __get_user_pages_fast(addr, 1, 0, &page); |
26 | if (!ret) | 30 | if (!ret) |
@@ -43,100 +47,3 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | |||
43 | return len; | 47 | return len; |
44 | } | 48 | } |
45 | EXPORT_SYMBOL_GPL(copy_from_user_nmi); | 49 | EXPORT_SYMBOL_GPL(copy_from_user_nmi); |
46 | |||
47 | /* | ||
48 | * Do a strncpy, return length of string without final '\0'. | ||
49 | * 'count' is the user-supplied count (return 'count' if we | ||
50 | * hit it), 'max' is the address space maximum (and we return | ||
51 | * -EFAULT if we hit it). | ||
52 | */ | ||
53 | static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max) | ||
54 | { | ||
55 | long res = 0; | ||
56 | |||
57 | /* | ||
58 | * Truncate 'max' to the user-specified limit, so that | ||
59 | * we only have one limit we need to check in the loop | ||
60 | */ | ||
61 | if (max > count) | ||
62 | max = count; | ||
63 | |||
64 | while (max >= sizeof(unsigned long)) { | ||
65 | unsigned long c, mask; | ||
66 | |||
67 | /* Fall back to byte-at-a-time if we get a page fault */ | ||
68 | if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) | ||
69 | break; | ||
70 | mask = has_zero(c); | ||
71 | if (mask) { | ||
72 | mask = (mask - 1) & ~mask; | ||
73 | mask >>= 7; | ||
74 | *(unsigned long *)(dst+res) = c & mask; | ||
75 | return res + count_masked_bytes(mask); | ||
76 | } | ||
77 | *(unsigned long *)(dst+res) = c; | ||
78 | res += sizeof(unsigned long); | ||
79 | max -= sizeof(unsigned long); | ||
80 | } | ||
81 | |||
82 | while (max) { | ||
83 | char c; | ||
84 | |||
85 | if (unlikely(__get_user(c,src+res))) | ||
86 | return -EFAULT; | ||
87 | dst[res] = c; | ||
88 | if (!c) | ||
89 | return res; | ||
90 | res++; | ||
91 | max--; | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * Uhhuh. We hit 'max'. But was that the user-specified maximum | ||
96 | * too? If so, that's ok - we got as much as the user asked for. | ||
97 | */ | ||
98 | if (res >= count) | ||
99 | return res; | ||
100 | |||
101 | /* | ||
102 | * Nope: we hit the address space limit, and we still had more | ||
103 | * characters the caller would have wanted. That's an EFAULT. | ||
104 | */ | ||
105 | return -EFAULT; | ||
106 | } | ||
107 | |||
108 | /** | ||
109 | * strncpy_from_user: - Copy a NUL terminated string from userspace. | ||
110 | * @dst: Destination address, in kernel space. This buffer must be at | ||
111 | * least @count bytes long. | ||
112 | * @src: Source address, in user space. | ||
113 | * @count: Maximum number of bytes to copy, including the trailing NUL. | ||
114 | * | ||
115 | * Copies a NUL-terminated string from userspace to kernel space. | ||
116 | * | ||
117 | * On success, returns the length of the string (not including the trailing | ||
118 | * NUL). | ||
119 | * | ||
120 | * If access to userspace fails, returns -EFAULT (some data may have been | ||
121 | * copied). | ||
122 | * | ||
123 | * If @count is smaller than the length of the string, copies @count bytes | ||
124 | * and returns @count. | ||
125 | */ | ||
126 | long | ||
127 | strncpy_from_user(char *dst, const char __user *src, long count) | ||
128 | { | ||
129 | unsigned long max_addr, src_addr; | ||
130 | |||
131 | if (unlikely(count <= 0)) | ||
132 | return 0; | ||
133 | |||
134 | max_addr = current_thread_info()->addr_limit.seg; | ||
135 | src_addr = (unsigned long)src; | ||
136 | if (likely(src_addr < max_addr)) { | ||
137 | unsigned long max = max_addr - src_addr; | ||
138 | return do_strncpy_from_user(dst, src, count, max); | ||
139 | } | ||
140 | return -EFAULT; | ||
141 | } | ||
142 | EXPORT_SYMBOL(strncpy_from_user); | ||
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 883b216c60b..1781b2f950e 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c | |||
@@ -95,47 +95,6 @@ __clear_user(void __user *to, unsigned long n) | |||
95 | } | 95 | } |
96 | EXPORT_SYMBOL(__clear_user); | 96 | EXPORT_SYMBOL(__clear_user); |
97 | 97 | ||
98 | /** | ||
99 | * strnlen_user: - Get the size of a string in user space. | ||
100 | * @s: The string to measure. | ||
101 | * @n: The maximum valid length | ||
102 | * | ||
103 | * Get the size of a NUL-terminated string in user space. | ||
104 | * | ||
105 | * Returns the size of the string INCLUDING the terminating NUL. | ||
106 | * On exception, returns 0. | ||
107 | * If the string is too long, returns a value greater than @n. | ||
108 | */ | ||
109 | long strnlen_user(const char __user *s, long n) | ||
110 | { | ||
111 | unsigned long mask = -__addr_ok(s); | ||
112 | unsigned long res, tmp; | ||
113 | |||
114 | might_fault(); | ||
115 | |||
116 | __asm__ __volatile__( | ||
117 | " testl %0, %0\n" | ||
118 | " jz 3f\n" | ||
119 | " andl %0,%%ecx\n" | ||
120 | "0: repne; scasb\n" | ||
121 | " setne %%al\n" | ||
122 | " subl %%ecx,%0\n" | ||
123 | " addl %0,%%eax\n" | ||
124 | "1:\n" | ||
125 | ".section .fixup,\"ax\"\n" | ||
126 | "2: xorl %%eax,%%eax\n" | ||
127 | " jmp 1b\n" | ||
128 | "3: movb $1,%%al\n" | ||
129 | " jmp 1b\n" | ||
130 | ".previous\n" | ||
131 | _ASM_EXTABLE(0b,2b) | ||
132 | :"=&r" (n), "=&D" (s), "=&a" (res), "=&c" (tmp) | ||
133 | :"0" (n), "1" (s), "2" (0), "3" (mask) | ||
134 | :"cc"); | ||
135 | return res & mask; | ||
136 | } | ||
137 | EXPORT_SYMBOL(strnlen_user); | ||
138 | |||
139 | #ifdef CONFIG_X86_INTEL_USERCOPY | 98 | #ifdef CONFIG_X86_INTEL_USERCOPY |
140 | static unsigned long | 99 | static unsigned long |
141 | __copy_user_intel(void __user *to, const void *from, unsigned long size) | 100 | __copy_user_intel(void __user *to, const void *from, unsigned long size) |
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 0d0326f388c..e5b130bc2d0 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c | |||
@@ -52,54 +52,6 @@ unsigned long clear_user(void __user *to, unsigned long n) | |||
52 | } | 52 | } |
53 | EXPORT_SYMBOL(clear_user); | 53 | EXPORT_SYMBOL(clear_user); |
54 | 54 | ||
55 | /* | ||
56 | * Return the size of a string (including the ending 0) | ||
57 | * | ||
58 | * Return 0 on exception, a value greater than N if too long | ||
59 | */ | ||
60 | |||
61 | long __strnlen_user(const char __user *s, long n) | ||
62 | { | ||
63 | long res = 0; | ||
64 | char c; | ||
65 | |||
66 | while (1) { | ||
67 | if (res>n) | ||
68 | return n+1; | ||
69 | if (__get_user(c, s)) | ||
70 | return 0; | ||
71 | if (!c) | ||
72 | return res+1; | ||
73 | res++; | ||
74 | s++; | ||
75 | } | ||
76 | } | ||
77 | EXPORT_SYMBOL(__strnlen_user); | ||
78 | |||
79 | long strnlen_user(const char __user *s, long n) | ||
80 | { | ||
81 | if (!access_ok(VERIFY_READ, s, 1)) | ||
82 | return 0; | ||
83 | return __strnlen_user(s, n); | ||
84 | } | ||
85 | EXPORT_SYMBOL(strnlen_user); | ||
86 | |||
87 | long strlen_user(const char __user *s) | ||
88 | { | ||
89 | long res = 0; | ||
90 | char c; | ||
91 | |||
92 | for (;;) { | ||
93 | if (get_user(c, s)) | ||
94 | return 0; | ||
95 | if (!c) | ||
96 | return res+1; | ||
97 | res++; | ||
98 | s++; | ||
99 | } | ||
100 | } | ||
101 | EXPORT_SYMBOL(strlen_user); | ||
102 | |||
103 | unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len) | 55 | unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len) |
104 | { | 56 | { |
105 | if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) { | 57 | if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) { |
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 81913790442..5d7e51f3fd2 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt | |||
@@ -28,7 +28,7 @@ | |||
28 | # - (66): the last prefix is 0x66 | 28 | # - (66): the last prefix is 0x66 |
29 | # - (F3): the last prefix is 0xF3 | 29 | # - (F3): the last prefix is 0xF3 |
30 | # - (F2): the last prefix is 0xF2 | 30 | # - (F2): the last prefix is 0xF2 |
31 | # | 31 | # - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) |
32 | 32 | ||
33 | Table: one byte opcode | 33 | Table: one byte opcode |
34 | Referrer: | 34 | Referrer: |
@@ -515,12 +515,12 @@ b4: LFS Gv,Mp | |||
515 | b5: LGS Gv,Mp | 515 | b5: LGS Gv,Mp |
516 | b6: MOVZX Gv,Eb | 516 | b6: MOVZX Gv,Eb |
517 | b7: MOVZX Gv,Ew | 517 | b7: MOVZX Gv,Ew |
518 | b8: JMPE | POPCNT Gv,Ev (F3) | 518 | b8: JMPE (!F3) | POPCNT Gv,Ev (F3) |
519 | b9: Grp10 (1A) | 519 | b9: Grp10 (1A) |
520 | ba: Grp8 Ev,Ib (1A) | 520 | ba: Grp8 Ev,Ib (1A) |
521 | bb: BTC Ev,Gv | 521 | bb: BTC Ev,Gv |
522 | bc: BSF Gv,Ev | TZCNT Gv,Ev (F3) | 522 | bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) |
523 | bd: BSR Gv,Ev | LZCNT Gv,Ev (F3) | 523 | bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) |
524 | be: MOVSX Gv,Eb | 524 | be: MOVSX Gv,Eb |
525 | bf: MOVSX Gv,Ew | 525 | bf: MOVSX Gv,Ew |
526 | # 0x0f 0xc0-0xcf | 526 | # 0x0f 0xc0-0xcf |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 319b6f2fb8b..e0e6990723e 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -62,7 +62,8 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en | |||
62 | extra += PMD_SIZE; | 62 | extra += PMD_SIZE; |
63 | #endif | 63 | #endif |
64 | /* The first 2/4M doesn't use large pages. */ | 64 | /* The first 2/4M doesn't use large pages. */ |
65 | extra += mr->end - mr->start; | 65 | if (mr->start < PMD_SIZE) |
66 | extra += mr->end - mr->start; | ||
66 | 67 | ||
67 | ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; | 68 | ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; |
68 | } else | 69 | } else |
@@ -84,8 +85,9 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en | |||
84 | pgt_buf_end = pgt_buf_start; | 85 | pgt_buf_end = pgt_buf_start; |
85 | pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); | 86 | pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); |
86 | 87 | ||
87 | printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", | 88 | printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", |
88 | end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); | 89 | end - 1, pgt_buf_start << PAGE_SHIFT, |
90 | (pgt_buf_top << PAGE_SHIFT) - 1); | ||
89 | } | 91 | } |
90 | 92 | ||
91 | void __init native_pagetable_reserve(u64 start, u64 end) | 93 | void __init native_pagetable_reserve(u64 start, u64 end) |
@@ -132,7 +134,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
132 | int nr_range, i; | 134 | int nr_range, i; |
133 | int use_pse, use_gbpages; | 135 | int use_pse, use_gbpages; |
134 | 136 | ||
135 | printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); | 137 | printk(KERN_INFO "init_memory_mapping: [mem %#010lx-%#010lx]\n", |
138 | start, end - 1); | ||
136 | 139 | ||
137 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) | 140 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) |
138 | /* | 141 | /* |
@@ -251,8 +254,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
251 | } | 254 | } |
252 | 255 | ||
253 | for (i = 0; i < nr_range; i++) | 256 | for (i = 0; i < nr_range; i++) |
254 | printk(KERN_DEBUG " %010lx - %010lx page %s\n", | 257 | printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", |
255 | mr[i].start, mr[i].end, | 258 | mr[i].start, mr[i].end - 1, |
256 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( | 259 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( |
257 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); | 260 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); |
258 | 261 | ||
@@ -350,8 +353,8 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
350 | * create a kernel page fault: | 353 | * create a kernel page fault: |
351 | */ | 354 | */ |
352 | #ifdef CONFIG_DEBUG_PAGEALLOC | 355 | #ifdef CONFIG_DEBUG_PAGEALLOC |
353 | printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", | 356 | printk(KERN_INFO "debug: unmapping init [mem %#010lx-%#010lx]\n", |
354 | begin, end); | 357 | begin, end - 1); |
355 | set_memory_np(begin, (end - begin) >> PAGE_SHIFT); | 358 | set_memory_np(begin, (end - begin) >> PAGE_SHIFT); |
356 | #else | 359 | #else |
357 | /* | 360 | /* |
@@ -382,7 +385,7 @@ void free_initmem(void) | |||
382 | } | 385 | } |
383 | 386 | ||
384 | #ifdef CONFIG_BLK_DEV_INITRD | 387 | #ifdef CONFIG_BLK_DEV_INITRD |
385 | void free_initrd_mem(unsigned long start, unsigned long end) | 388 | void __init free_initrd_mem(unsigned long start, unsigned long end) |
386 | { | 389 | { |
387 | /* | 390 | /* |
388 | * end could be not aligned, and We can not align that, | 391 | * end could be not aligned, and We can not align that, |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index be1ef574ce9..78fe3f1ac49 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -180,7 +180,7 @@ err_free_memtype: | |||
180 | 180 | ||
181 | /** | 181 | /** |
182 | * ioremap_nocache - map bus memory into CPU space | 182 | * ioremap_nocache - map bus memory into CPU space |
183 | * @offset: bus address of the memory | 183 | * @phys_addr: bus address of the memory |
184 | * @size: size of the resource to map | 184 | * @size: size of the resource to map |
185 | * | 185 | * |
186 | * ioremap_nocache performs a platform specific sequence of operations to | 186 | * ioremap_nocache performs a platform specific sequence of operations to |
@@ -217,7 +217,7 @@ EXPORT_SYMBOL(ioremap_nocache); | |||
217 | 217 | ||
218 | /** | 218 | /** |
219 | * ioremap_wc - map memory into CPU space write combined | 219 | * ioremap_wc - map memory into CPU space write combined |
220 | * @offset: bus address of the memory | 220 | * @phys_addr: bus address of the memory |
221 | * @size: size of the resource to map | 221 | * @size: size of the resource to map |
222 | * | 222 | * |
223 | * This version of ioremap ensures that the memory is marked write combining. | 223 | * This version of ioremap ensures that the memory is marked write combining. |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 19d3fa08b11..2d125be1bae 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -141,8 +141,8 @@ static int __init numa_add_memblk_to(int nid, u64 start, u64 end, | |||
141 | 141 | ||
142 | /* whine about and ignore invalid blks */ | 142 | /* whine about and ignore invalid blks */ |
143 | if (start > end || nid < 0 || nid >= MAX_NUMNODES) { | 143 | if (start > end || nid < 0 || nid >= MAX_NUMNODES) { |
144 | pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n", | 144 | pr_warning("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", |
145 | nid, start, end); | 145 | nid, start, end - 1); |
146 | return 0; | 146 | return 0; |
147 | } | 147 | } |
148 | 148 | ||
@@ -210,8 +210,8 @@ static void __init setup_node_data(int nid, u64 start, u64 end) | |||
210 | 210 | ||
211 | start = roundup(start, ZONE_ALIGN); | 211 | start = roundup(start, ZONE_ALIGN); |
212 | 212 | ||
213 | printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n", | 213 | printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", |
214 | nid, start, end); | 214 | nid, start, end - 1); |
215 | 215 | ||
216 | /* | 216 | /* |
217 | * Allocate node data. Try remap allocator first, node-local | 217 | * Allocate node data. Try remap allocator first, node-local |
@@ -232,7 +232,7 @@ static void __init setup_node_data(int nid, u64 start, u64 end) | |||
232 | } | 232 | } |
233 | 233 | ||
234 | /* report and initialize */ | 234 | /* report and initialize */ |
235 | printk(KERN_INFO " NODE_DATA [%016Lx - %016Lx]%s\n", | 235 | printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]%s\n", |
236 | nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); | 236 | nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); |
237 | tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); | 237 | tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); |
238 | if (!remapped && tnid != nid) | 238 | if (!remapped && tnid != nid) |
@@ -291,14 +291,14 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi) | |||
291 | */ | 291 | */ |
292 | if (bi->end > bj->start && bi->start < bj->end) { | 292 | if (bi->end > bj->start && bi->start < bj->end) { |
293 | if (bi->nid != bj->nid) { | 293 | if (bi->nid != bj->nid) { |
294 | pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n", | 294 | pr_err("NUMA: node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n", |
295 | bi->nid, bi->start, bi->end, | 295 | bi->nid, bi->start, bi->end - 1, |
296 | bj->nid, bj->start, bj->end); | 296 | bj->nid, bj->start, bj->end - 1); |
297 | return -EINVAL; | 297 | return -EINVAL; |
298 | } | 298 | } |
299 | pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n", | 299 | pr_warning("NUMA: Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n", |
300 | bi->nid, bi->start, bi->end, | 300 | bi->nid, bi->start, bi->end - 1, |
301 | bj->start, bj->end); | 301 | bj->start, bj->end - 1); |
302 | } | 302 | } |
303 | 303 | ||
304 | /* | 304 | /* |
@@ -320,9 +320,9 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi) | |||
320 | } | 320 | } |
321 | if (k < mi->nr_blks) | 321 | if (k < mi->nr_blks) |
322 | continue; | 322 | continue; |
323 | printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n", | 323 | printk(KERN_INFO "NUMA: Node %d [mem %#010Lx-%#010Lx] + [mem %#010Lx-%#010Lx] -> [mem %#010Lx-%#010Lx]\n", |
324 | bi->nid, bi->start, bi->end, bj->start, bj->end, | 324 | bi->nid, bi->start, bi->end - 1, bj->start, |
325 | start, end); | 325 | bj->end - 1, start, end - 1); |
326 | bi->start = start; | 326 | bi->start = start; |
327 | bi->end = end; | 327 | bi->end = end; |
328 | numa_remove_memblk_from(j--, mi); | 328 | numa_remove_memblk_from(j--, mi); |
@@ -616,8 +616,8 @@ static int __init dummy_numa_init(void) | |||
616 | { | 616 | { |
617 | printk(KERN_INFO "%s\n", | 617 | printk(KERN_INFO "%s\n", |
618 | numa_off ? "NUMA turned off" : "No NUMA configuration found"); | 618 | numa_off ? "NUMA turned off" : "No NUMA configuration found"); |
619 | printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n", | 619 | printk(KERN_INFO "Faking a node at [mem %#018Lx-%#018Lx]\n", |
620 | 0LLU, PFN_PHYS(max_pfn)); | 620 | 0LLU, PFN_PHYS(max_pfn) - 1); |
621 | 621 | ||
622 | node_set(0, numa_nodes_parsed); | 622 | node_set(0, numa_nodes_parsed); |
623 | numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); | 623 | numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); |
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index 871dd886817..dbbbb47260c 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c | |||
@@ -68,8 +68,8 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei, | |||
68 | numa_remove_memblk_from(phys_blk, pi); | 68 | numa_remove_memblk_from(phys_blk, pi); |
69 | } | 69 | } |
70 | 70 | ||
71 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, | 71 | printk(KERN_INFO "Faking node %d at [mem %#018Lx-%#018Lx] (%LuMB)\n", |
72 | eb->start, eb->end, (eb->end - eb->start) >> 20); | 72 | nid, eb->start, eb->end - 1, (eb->end - eb->start) >> 20); |
73 | return 0; | 73 | return 0; |
74 | } | 74 | } |
75 | 75 | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e1ebde31521..a718e0d2350 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -122,7 +122,7 @@ within(unsigned long addr, unsigned long start, unsigned long end) | |||
122 | 122 | ||
123 | /** | 123 | /** |
124 | * clflush_cache_range - flush a cache range with clflush | 124 | * clflush_cache_range - flush a cache range with clflush |
125 | * @addr: virtual start address | 125 | * @vaddr: virtual start address |
126 | * @size: number of bytes to flush | 126 | * @size: number of bytes to flush |
127 | * | 127 | * |
128 | * clflush is an unordered instruction which needs fencing with mfence | 128 | * clflush is an unordered instruction which needs fencing with mfence |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index f6ff57b7efa..3d68ef6d226 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -158,31 +158,47 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) | |||
158 | return req_type; | 158 | return req_type; |
159 | } | 159 | } |
160 | 160 | ||
161 | struct pagerange_state { | ||
162 | unsigned long cur_pfn; | ||
163 | int ram; | ||
164 | int not_ram; | ||
165 | }; | ||
166 | |||
167 | static int | ||
168 | pagerange_is_ram_callback(unsigned long initial_pfn, unsigned long total_nr_pages, void *arg) | ||
169 | { | ||
170 | struct pagerange_state *state = arg; | ||
171 | |||
172 | state->not_ram |= initial_pfn > state->cur_pfn; | ||
173 | state->ram |= total_nr_pages > 0; | ||
174 | state->cur_pfn = initial_pfn + total_nr_pages; | ||
175 | |||
176 | return state->ram && state->not_ram; | ||
177 | } | ||
178 | |||
161 | static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end) | 179 | static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end) |
162 | { | 180 | { |
163 | int ram_page = 0, not_rampage = 0; | 181 | int ret = 0; |
164 | unsigned long page_nr; | 182 | unsigned long start_pfn = start >> PAGE_SHIFT; |
183 | unsigned long end_pfn = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
184 | struct pagerange_state state = {start_pfn, 0, 0}; | ||
165 | 185 | ||
166 | for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); | 186 | /* |
167 | ++page_nr) { | 187 | * For legacy reasons, physical address range in the legacy ISA |
168 | /* | 188 | * region is tracked as non-RAM. This will allow users of |
169 | * For legacy reasons, physical address range in the legacy ISA | 189 | * /dev/mem to map portions of legacy ISA region, even when |
170 | * region is tracked as non-RAM. This will allow users of | 190 | * some of those portions are listed(or not even listed) with |
171 | * /dev/mem to map portions of legacy ISA region, even when | 191 | * different e820 types(RAM/reserved/..) |
172 | * some of those portions are listed(or not even listed) with | 192 | */ |
173 | * different e820 types(RAM/reserved/..) | 193 | if (start_pfn < ISA_END_ADDRESS >> PAGE_SHIFT) |
174 | */ | 194 | start_pfn = ISA_END_ADDRESS >> PAGE_SHIFT; |
175 | if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) && | 195 | |
176 | page_is_ram(page_nr)) | 196 | if (start_pfn < end_pfn) { |
177 | ram_page = 1; | 197 | ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, |
178 | else | 198 | &state, pagerange_is_ram_callback); |
179 | not_rampage = 1; | ||
180 | |||
181 | if (ram_page == not_rampage) | ||
182 | return -1; | ||
183 | } | 199 | } |
184 | 200 | ||
185 | return ram_page; | 201 | return (ret > 0) ? -1 : (state.ram ? 1 : 0); |
186 | } | 202 | } |
187 | 203 | ||
188 | /* | 204 | /* |
@@ -209,9 +225,8 @@ static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, | |||
209 | page = pfn_to_page(pfn); | 225 | page = pfn_to_page(pfn); |
210 | type = get_page_memtype(page); | 226 | type = get_page_memtype(page); |
211 | if (type != -1) { | 227 | if (type != -1) { |
212 | printk(KERN_INFO "reserve_ram_pages_type failed " | 228 | printk(KERN_INFO "reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%lx, req 0x%lx\n", |
213 | "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n", | 229 | start, end - 1, type, req_type); |
214 | start, end, type, req_type); | ||
215 | if (new_type) | 230 | if (new_type) |
216 | *new_type = type; | 231 | *new_type = type; |
217 | 232 | ||
@@ -314,9 +329,9 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
314 | 329 | ||
315 | err = rbt_memtype_check_insert(new, new_type); | 330 | err = rbt_memtype_check_insert(new, new_type); |
316 | if (err) { | 331 | if (err) { |
317 | printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " | 332 | printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", |
318 | "track %s, req %s\n", | 333 | start, end - 1, |
319 | start, end, cattr_name(new->type), cattr_name(req_type)); | 334 | cattr_name(new->type), cattr_name(req_type)); |
320 | kfree(new); | 335 | kfree(new); |
321 | spin_unlock(&memtype_lock); | 336 | spin_unlock(&memtype_lock); |
322 | 337 | ||
@@ -325,8 +340,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
325 | 340 | ||
326 | spin_unlock(&memtype_lock); | 341 | spin_unlock(&memtype_lock); |
327 | 342 | ||
328 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", | 343 | dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n", |
329 | start, end, cattr_name(new->type), cattr_name(req_type), | 344 | start, end - 1, cattr_name(new->type), cattr_name(req_type), |
330 | new_type ? cattr_name(*new_type) : "-"); | 345 | new_type ? cattr_name(*new_type) : "-"); |
331 | 346 | ||
332 | return err; | 347 | return err; |
@@ -360,14 +375,14 @@ int free_memtype(u64 start, u64 end) | |||
360 | spin_unlock(&memtype_lock); | 375 | spin_unlock(&memtype_lock); |
361 | 376 | ||
362 | if (!entry) { | 377 | if (!entry) { |
363 | printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n", | 378 | printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n", |
364 | current->comm, current->pid, start, end); | 379 | current->comm, current->pid, start, end - 1); |
365 | return -EINVAL; | 380 | return -EINVAL; |
366 | } | 381 | } |
367 | 382 | ||
368 | kfree(entry); | 383 | kfree(entry); |
369 | 384 | ||
370 | dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); | 385 | dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1); |
371 | 386 | ||
372 | return 0; | 387 | return 0; |
373 | } | 388 | } |
@@ -491,9 +506,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) | |||
491 | 506 | ||
492 | while (cursor < to) { | 507 | while (cursor < to) { |
493 | if (!devmem_is_allowed(pfn)) { | 508 | if (!devmem_is_allowed(pfn)) { |
494 | printk(KERN_INFO | 509 | printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n", |
495 | "Program %s tried to access /dev/mem between %Lx->%Lx.\n", | 510 | current->comm, from, to - 1); |
496 | current->comm, from, to); | ||
497 | return 0; | 511 | return 0; |
498 | } | 512 | } |
499 | cursor += PAGE_SIZE; | 513 | cursor += PAGE_SIZE; |
@@ -554,12 +568,11 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) | |||
554 | size; | 568 | size; |
555 | 569 | ||
556 | if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) { | 570 | if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) { |
557 | printk(KERN_INFO | 571 | printk(KERN_INFO "%s:%d ioremap_change_attr failed %s " |
558 | "%s:%d ioremap_change_attr failed %s " | 572 | "for [mem %#010Lx-%#010Lx]\n", |
559 | "for %Lx-%Lx\n", | ||
560 | current->comm, current->pid, | 573 | current->comm, current->pid, |
561 | cattr_name(flags), | 574 | cattr_name(flags), |
562 | base, (unsigned long long)(base + size)); | 575 | base, (unsigned long long)(base + size-1)); |
563 | return -EINVAL; | 576 | return -EINVAL; |
564 | } | 577 | } |
565 | return 0; | 578 | return 0; |
@@ -591,12 +604,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, | |||
591 | 604 | ||
592 | flags = lookup_memtype(paddr); | 605 | flags = lookup_memtype(paddr); |
593 | if (want_flags != flags) { | 606 | if (want_flags != flags) { |
594 | printk(KERN_WARNING | 607 | printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n", |
595 | "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n", | ||
596 | current->comm, current->pid, | 608 | current->comm, current->pid, |
597 | cattr_name(want_flags), | 609 | cattr_name(want_flags), |
598 | (unsigned long long)paddr, | 610 | (unsigned long long)paddr, |
599 | (unsigned long long)(paddr + size), | 611 | (unsigned long long)(paddr + size - 1), |
600 | cattr_name(flags)); | 612 | cattr_name(flags)); |
601 | *vma_prot = __pgprot((pgprot_val(*vma_prot) & | 613 | *vma_prot = __pgprot((pgprot_val(*vma_prot) & |
602 | (~_PAGE_CACHE_MASK)) | | 614 | (~_PAGE_CACHE_MASK)) | |
@@ -614,11 +626,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, | |||
614 | !is_new_memtype_allowed(paddr, size, want_flags, flags)) { | 626 | !is_new_memtype_allowed(paddr, size, want_flags, flags)) { |
615 | free_memtype(paddr, paddr + size); | 627 | free_memtype(paddr, paddr + size); |
616 | printk(KERN_ERR "%s:%d map pfn expected mapping type %s" | 628 | printk(KERN_ERR "%s:%d map pfn expected mapping type %s" |
617 | " for %Lx-%Lx, got %s\n", | 629 | " for [mem %#010Lx-%#010Lx], got %s\n", |
618 | current->comm, current->pid, | 630 | current->comm, current->pid, |
619 | cattr_name(want_flags), | 631 | cattr_name(want_flags), |
620 | (unsigned long long)paddr, | 632 | (unsigned long long)paddr, |
621 | (unsigned long long)(paddr + size), | 633 | (unsigned long long)(paddr + size - 1), |
622 | cattr_name(flags)); | 634 | cattr_name(flags)); |
623 | return -EINVAL; | 635 | return -EINVAL; |
624 | } | 636 | } |
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index efb5b4b9371..4599c3e8bcb 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c | |||
@@ -176,8 +176,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
176 | return; | 176 | return; |
177 | } | 177 | } |
178 | 178 | ||
179 | printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, | 179 | node_set(node, numa_nodes_parsed); |
180 | start, end); | 180 | |
181 | printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", | ||
182 | node, pxm, | ||
183 | (unsigned long long) start, (unsigned long long) end - 1); | ||
181 | } | 184 | } |
182 | 185 | ||
183 | void __init acpi_numa_arch_fixup(void) {} | 186 | void __init acpi_numa_arch_fixup(void) {} |
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 0597f95b6da..33643a8bcbb 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c | |||
@@ -309,6 +309,10 @@ void bpf_jit_compile(struct sk_filter *fp) | |||
309 | else | 309 | else |
310 | EMIT1_off32(0x0d, K); /* or imm32,%eax */ | 310 | EMIT1_off32(0x0d, K); /* or imm32,%eax */ |
311 | break; | 311 | break; |
312 | case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ | ||
313 | seen |= SEEN_XREG; | ||
314 | EMIT2(0x31, 0xd8); /* xor %ebx,%eax */ | ||
315 | break; | ||
312 | case BPF_S_ALU_LSH_X: /* A <<= X; */ | 316 | case BPF_S_ALU_LSH_X: /* A <<= X; */ |
313 | seen |= SEEN_XREG; | 317 | seen |= SEEN_XREG; |
314 | EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */ | 318 | EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */ |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 303f0863782..b2b94438ff0 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -312,7 +312,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs) | |||
312 | goto fail; | 312 | goto fail; |
313 | } | 313 | } |
314 | /* both registers must be reserved */ | 314 | /* both registers must be reserved */ |
315 | if (num_counters == AMD64_NUM_COUNTERS_F15H) { | 315 | if (num_counters == AMD64_NUM_COUNTERS_CORE) { |
316 | msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1); | 316 | msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1); |
317 | msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1); | 317 | msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1); |
318 | } else { | 318 | } else { |
@@ -514,7 +514,7 @@ static int op_amd_init(struct oprofile_operations *ops) | |||
514 | ops->create_files = setup_ibs_files; | 514 | ops->create_files = setup_ibs_files; |
515 | 515 | ||
516 | if (boot_cpu_data.x86 == 0x15) { | 516 | if (boot_cpu_data.x86 == 0x15) { |
517 | num_counters = AMD64_NUM_COUNTERS_F15H; | 517 | num_counters = AMD64_NUM_COUNTERS_CORE; |
518 | } else { | 518 | } else { |
519 | num_counters = AMD64_NUM_COUNTERS; | 519 | num_counters = AMD64_NUM_COUNTERS; |
520 | } | 520 | } |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index fc09c2754e0..505acdd6d60 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -12,8 +12,13 @@ struct pci_root_info { | |||
12 | char name[16]; | 12 | char name[16]; |
13 | unsigned int res_num; | 13 | unsigned int res_num; |
14 | struct resource *res; | 14 | struct resource *res; |
15 | int busnum; | ||
16 | struct pci_sysdata sd; | 15 | struct pci_sysdata sd; |
16 | #ifdef CONFIG_PCI_MMCONFIG | ||
17 | bool mcfg_added; | ||
18 | u16 segment; | ||
19 | u8 start_bus; | ||
20 | u8 end_bus; | ||
21 | #endif | ||
17 | }; | 22 | }; |
18 | 23 | ||
19 | static bool pci_use_crs = true; | 24 | static bool pci_use_crs = true; |
@@ -120,6 +125,81 @@ void __init pci_acpi_crs_quirks(void) | |||
120 | pci_use_crs ? "nocrs" : "use_crs"); | 125 | pci_use_crs ? "nocrs" : "use_crs"); |
121 | } | 126 | } |
122 | 127 | ||
128 | #ifdef CONFIG_PCI_MMCONFIG | ||
129 | static int __devinit check_segment(u16 seg, struct device *dev, char *estr) | ||
130 | { | ||
131 | if (seg) { | ||
132 | dev_err(dev, | ||
133 | "%s can't access PCI configuration " | ||
134 | "space under this host bridge.\n", | ||
135 | estr); | ||
136 | return -EIO; | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * Failure in adding MMCFG information is not fatal, | ||
141 | * just can't access extended configuration space of | ||
142 | * devices under this host bridge. | ||
143 | */ | ||
144 | dev_warn(dev, | ||
145 | "%s can't access extended PCI configuration " | ||
146 | "space under this bridge.\n", | ||
147 | estr); | ||
148 | |||
149 | return 0; | ||
150 | } | ||
151 | |||
152 | static int __devinit setup_mcfg_map(struct pci_root_info *info, | ||
153 | u16 seg, u8 start, u8 end, | ||
154 | phys_addr_t addr) | ||
155 | { | ||
156 | int result; | ||
157 | struct device *dev = &info->bridge->dev; | ||
158 | |||
159 | info->start_bus = start; | ||
160 | info->end_bus = end; | ||
161 | info->mcfg_added = false; | ||
162 | |||
163 | /* return success if MMCFG is not in use */ | ||
164 | if (raw_pci_ext_ops && raw_pci_ext_ops != &pci_mmcfg) | ||
165 | return 0; | ||
166 | |||
167 | if (!(pci_probe & PCI_PROBE_MMCONF)) | ||
168 | return check_segment(seg, dev, "MMCONFIG is disabled,"); | ||
169 | |||
170 | result = pci_mmconfig_insert(dev, seg, start, end, addr); | ||
171 | if (result == 0) { | ||
172 | /* enable MMCFG if it hasn't been enabled yet */ | ||
173 | if (raw_pci_ext_ops == NULL) | ||
174 | raw_pci_ext_ops = &pci_mmcfg; | ||
175 | info->mcfg_added = true; | ||
176 | } else if (result != -EEXIST) | ||
177 | return check_segment(seg, dev, | ||
178 | "fail to add MMCONFIG information,"); | ||
179 | |||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | static void teardown_mcfg_map(struct pci_root_info *info) | ||
184 | { | ||
185 | if (info->mcfg_added) { | ||
186 | pci_mmconfig_delete(info->segment, info->start_bus, | ||
187 | info->end_bus); | ||
188 | info->mcfg_added = false; | ||
189 | } | ||
190 | } | ||
191 | #else | ||
192 | static int __devinit setup_mcfg_map(struct pci_root_info *info, | ||
193 | u16 seg, u8 start, u8 end, | ||
194 | phys_addr_t addr) | ||
195 | { | ||
196 | return 0; | ||
197 | } | ||
198 | static void teardown_mcfg_map(struct pci_root_info *info) | ||
199 | { | ||
200 | } | ||
201 | #endif | ||
202 | |||
123 | static acpi_status | 203 | static acpi_status |
124 | resource_to_addr(struct acpi_resource *resource, | 204 | resource_to_addr(struct acpi_resource *resource, |
125 | struct acpi_resource_address64 *addr) | 205 | struct acpi_resource_address64 *addr) |
@@ -234,13 +314,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
234 | } | 314 | } |
235 | 315 | ||
236 | info->res_num++; | 316 | info->res_num++; |
237 | if (addr.translation_offset) | ||
238 | dev_info(&info->bridge->dev, "host bridge window %pR " | ||
239 | "(PCI address [%#llx-%#llx])\n", | ||
240 | res, res->start - addr.translation_offset, | ||
241 | res->end - addr.translation_offset); | ||
242 | else | ||
243 | dev_info(&info->bridge->dev, "host bridge window %pR\n", res); | ||
244 | 317 | ||
245 | return AE_OK; | 318 | return AE_OK; |
246 | } | 319 | } |
@@ -332,8 +405,11 @@ static void __release_pci_root_info(struct pci_root_info *info) | |||
332 | 405 | ||
333 | free_pci_root_info_res(info); | 406 | free_pci_root_info_res(info); |
334 | 407 | ||
408 | teardown_mcfg_map(info); | ||
409 | |||
335 | kfree(info); | 410 | kfree(info); |
336 | } | 411 | } |
412 | |||
337 | static void release_pci_root_info(struct pci_host_bridge *bridge) | 413 | static void release_pci_root_info(struct pci_host_bridge *bridge) |
338 | { | 414 | { |
339 | struct pci_root_info *info = bridge->release_data; | 415 | struct pci_root_info *info = bridge->release_data; |
@@ -347,7 +423,9 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, | |||
347 | { | 423 | { |
348 | size_t size; | 424 | size_t size; |
349 | 425 | ||
426 | sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum); | ||
350 | info->bridge = device; | 427 | info->bridge = device; |
428 | |||
351 | info->res_num = 0; | 429 | info->res_num = 0; |
352 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, | 430 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, |
353 | info); | 431 | info); |
@@ -360,8 +438,6 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, | |||
360 | if (!info->res) | 438 | if (!info->res) |
361 | return; | 439 | return; |
362 | 440 | ||
363 | sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum); | ||
364 | |||
365 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, | 441 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, |
366 | info); | 442 | info); |
367 | } | 443 | } |
@@ -373,7 +449,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) | |||
373 | int domain = root->segment; | 449 | int domain = root->segment; |
374 | int busnum = root->secondary.start; | 450 | int busnum = root->secondary.start; |
375 | LIST_HEAD(resources); | 451 | LIST_HEAD(resources); |
376 | struct pci_bus *bus; | 452 | struct pci_bus *bus = NULL; |
377 | struct pci_sysdata *sd; | 453 | struct pci_sysdata *sd; |
378 | int node; | 454 | int node; |
379 | #ifdef CONFIG_ACPI_NUMA | 455 | #ifdef CONFIG_ACPI_NUMA |
@@ -426,6 +502,8 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) | |||
426 | } else { | 502 | } else { |
427 | probe_pci_root_info(info, device, busnum, domain); | 503 | probe_pci_root_info(info, device, busnum, domain); |
428 | 504 | ||
505 | /* insert busn res at first */ | ||
506 | pci_add_resource(&resources, &root->secondary); | ||
429 | /* | 507 | /* |
430 | * _CRS with no apertures is normal, so only fall back to | 508 | * _CRS with no apertures is normal, so only fall back to |
431 | * defaults or native bridge info if we're ignoring _CRS. | 509 | * defaults or native bridge info if we're ignoring _CRS. |
@@ -437,10 +515,13 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) | |||
437 | x86_pci_root_bus_resources(busnum, &resources); | 515 | x86_pci_root_bus_resources(busnum, &resources); |
438 | } | 516 | } |
439 | 517 | ||
440 | bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd, | 518 | if (!setup_mcfg_map(info, domain, (u8)root->secondary.start, |
441 | &resources); | 519 | (u8)root->secondary.end, root->mcfg_addr)) |
520 | bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, | ||
521 | sd, &resources); | ||
522 | |||
442 | if (bus) { | 523 | if (bus) { |
443 | bus->subordinate = pci_scan_child_bus(bus); | 524 | pci_scan_child_bus(bus); |
444 | pci_set_host_bridge_release( | 525 | pci_set_host_bridge_release( |
445 | to_pci_host_bridge(bus->bridge), | 526 | to_pci_host_bridge(bus->bridge), |
446 | release_pci_root_info, info); | 527 | release_pci_root_info, info); |
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 5aed49bff05..e9e6ed5cdf9 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -121,7 +121,6 @@ static int __init early_fill_mp_bus_info(void) | |||
121 | link = (reg >> 8) & 0x03; | 121 | link = (reg >> 8) & 0x03; |
122 | 122 | ||
123 | info = alloc_pci_root_info(min_bus, max_bus, node, link); | 123 | info = alloc_pci_root_info(min_bus, max_bus, node, link); |
124 | sprintf(info->name, "PCI Bus #%02x", min_bus); | ||
125 | } | 124 | } |
126 | 125 | ||
127 | /* get the default node and link for left over res */ | 126 | /* get the default node and link for left over res */ |
@@ -300,9 +299,9 @@ static int __init early_fill_mp_bus_info(void) | |||
300 | int busnum; | 299 | int busnum; |
301 | struct pci_root_res *root_res; | 300 | struct pci_root_res *root_res; |
302 | 301 | ||
303 | busnum = info->bus_min; | 302 | busnum = info->busn.start; |
304 | printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n", | 303 | printk(KERN_DEBUG "bus: %pR on node %x link %x\n", |
305 | info->bus_min, info->bus_max, info->node, info->link); | 304 | &info->busn, info->node, info->link); |
306 | list_for_each_entry(root_res, &info->resources, list) | 305 | list_for_each_entry(root_res, &info->resources, list) |
307 | printk(KERN_DEBUG "bus: %02x %pR\n", | 306 | printk(KERN_DEBUG "bus: %02x %pR\n", |
308 | busnum, &root_res->res); | 307 | busnum, &root_res->res); |
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index 306579f7d0f..d37e2fec97e 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c | |||
@@ -14,7 +14,7 @@ static struct pci_root_info *x86_find_pci_root_info(int bus) | |||
14 | return NULL; | 14 | return NULL; |
15 | 15 | ||
16 | list_for_each_entry(info, &pci_root_infos, list) | 16 | list_for_each_entry(info, &pci_root_infos, list) |
17 | if (info->bus_min == bus) | 17 | if (info->busn.start == bus) |
18 | return info; | 18 | return info; |
19 | 19 | ||
20 | return NULL; | 20 | return NULL; |
@@ -24,6 +24,8 @@ void x86_pci_root_bus_resources(int bus, struct list_head *resources) | |||
24 | { | 24 | { |
25 | struct pci_root_info *info = x86_find_pci_root_info(bus); | 25 | struct pci_root_info *info = x86_find_pci_root_info(bus); |
26 | struct pci_root_res *root_res; | 26 | struct pci_root_res *root_res; |
27 | struct pci_host_bridge_window *window; | ||
28 | bool found = false; | ||
27 | 29 | ||
28 | if (!info) | 30 | if (!info) |
29 | goto default_resources; | 31 | goto default_resources; |
@@ -31,6 +33,16 @@ void x86_pci_root_bus_resources(int bus, struct list_head *resources) | |||
31 | printk(KERN_DEBUG "PCI: root bus %02x: hardware-probed resources\n", | 33 | printk(KERN_DEBUG "PCI: root bus %02x: hardware-probed resources\n", |
32 | bus); | 34 | bus); |
33 | 35 | ||
36 | /* already added by acpi ? */ | ||
37 | list_for_each_entry(window, resources, list) | ||
38 | if (window->res->flags & IORESOURCE_BUS) { | ||
39 | found = true; | ||
40 | break; | ||
41 | } | ||
42 | |||
43 | if (!found) | ||
44 | pci_add_resource(resources, &info->busn); | ||
45 | |||
34 | list_for_each_entry(root_res, &info->resources, list) { | 46 | list_for_each_entry(root_res, &info->resources, list) { |
35 | struct resource *res; | 47 | struct resource *res; |
36 | struct resource *root; | 48 | struct resource *root; |
@@ -66,9 +78,13 @@ struct pci_root_info __init *alloc_pci_root_info(int bus_min, int bus_max, | |||
66 | if (!info) | 78 | if (!info) |
67 | return info; | 79 | return info; |
68 | 80 | ||
81 | sprintf(info->name, "PCI Bus #%02x", bus_min); | ||
82 | |||
69 | INIT_LIST_HEAD(&info->resources); | 83 | INIT_LIST_HEAD(&info->resources); |
70 | info->bus_min = bus_min; | 84 | info->busn.name = info->name; |
71 | info->bus_max = bus_max; | 85 | info->busn.start = bus_min; |
86 | info->busn.end = bus_max; | ||
87 | info->busn.flags = IORESOURCE_BUS; | ||
72 | info->node = node; | 88 | info->node = node; |
73 | info->link = link; | 89 | info->link = link; |
74 | 90 | ||
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h index 226a466b2b2..ff8f65b0457 100644 --- a/arch/x86/pci/bus_numa.h +++ b/arch/x86/pci/bus_numa.h | |||
@@ -13,8 +13,7 @@ struct pci_root_info { | |||
13 | struct list_head list; | 13 | struct list_head list; |
14 | char name[12]; | 14 | char name[12]; |
15 | struct list_head resources; | 15 | struct list_head resources; |
16 | int bus_min; | 16 | struct resource busn; |
17 | int bus_max; | ||
18 | int node; | 17 | int node; |
19 | int link; | 18 | int link; |
20 | }; | 19 | }; |
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 0ad990a20d4..720e973fc34 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -494,7 +494,7 @@ int __init pcibios_init(void) | |||
494 | return 0; | 494 | return 0; |
495 | } | 495 | } |
496 | 496 | ||
497 | char * __devinit pcibios_setup(char *str) | 497 | char * __init pcibios_setup(char *str) |
498 | { | 498 | { |
499 | if (!strcmp(str, "off")) { | 499 | if (!strcmp(str, "off")) { |
500 | pci_probe = 0; | 500 | pci_probe = 0; |
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 5dd467bd612..af8a224db21 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/dmi.h> | 6 | #include <linux/dmi.h> |
7 | #include <linux/pci.h> | 7 | #include <linux/pci.h> |
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/vgaarb.h> | ||
9 | #include <asm/pci_x86.h> | 10 | #include <asm/pci_x86.h> |
10 | 11 | ||
11 | static void __devinit pci_fixup_i450nx(struct pci_dev *d) | 12 | static void __devinit pci_fixup_i450nx(struct pci_dev *d) |
@@ -348,6 +349,8 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev) | |||
348 | if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { | 349 | if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { |
349 | pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; | 350 | pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; |
350 | dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n"); | 351 | dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n"); |
352 | if (!vga_default_device()) | ||
353 | vga_set_default_device(pdev); | ||
351 | } | 354 | } |
352 | } | 355 | } |
353 | DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, | 356 | DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, |
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 301e325992f..937bcece700 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -17,6 +17,8 @@ | |||
17 | #include <linux/bitmap.h> | 17 | #include <linux/bitmap.h> |
18 | #include <linux/dmi.h> | 18 | #include <linux/dmi.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/mutex.h> | ||
21 | #include <linux/rculist.h> | ||
20 | #include <asm/e820.h> | 22 | #include <asm/e820.h> |
21 | #include <asm/pci_x86.h> | 23 | #include <asm/pci_x86.h> |
22 | #include <asm/acpi.h> | 24 | #include <asm/acpi.h> |
@@ -24,7 +26,9 @@ | |||
24 | #define PREFIX "PCI: " | 26 | #define PREFIX "PCI: " |
25 | 27 | ||
26 | /* Indicate if the mmcfg resources have been placed into the resource table. */ | 28 | /* Indicate if the mmcfg resources have been placed into the resource table. */ |
27 | static int __initdata pci_mmcfg_resources_inserted; | 29 | static bool pci_mmcfg_running_state; |
30 | static bool pci_mmcfg_arch_init_failed; | ||
31 | static DEFINE_MUTEX(pci_mmcfg_lock); | ||
28 | 32 | ||
29 | LIST_HEAD(pci_mmcfg_list); | 33 | LIST_HEAD(pci_mmcfg_list); |
30 | 34 | ||
@@ -45,24 +49,25 @@ static __init void free_all_mmcfg(void) | |||
45 | pci_mmconfig_remove(cfg); | 49 | pci_mmconfig_remove(cfg); |
46 | } | 50 | } |
47 | 51 | ||
48 | static __init void list_add_sorted(struct pci_mmcfg_region *new) | 52 | static __devinit void list_add_sorted(struct pci_mmcfg_region *new) |
49 | { | 53 | { |
50 | struct pci_mmcfg_region *cfg; | 54 | struct pci_mmcfg_region *cfg; |
51 | 55 | ||
52 | /* keep list sorted by segment and starting bus number */ | 56 | /* keep list sorted by segment and starting bus number */ |
53 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { | 57 | list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) { |
54 | if (cfg->segment > new->segment || | 58 | if (cfg->segment > new->segment || |
55 | (cfg->segment == new->segment && | 59 | (cfg->segment == new->segment && |
56 | cfg->start_bus >= new->start_bus)) { | 60 | cfg->start_bus >= new->start_bus)) { |
57 | list_add_tail(&new->list, &cfg->list); | 61 | list_add_tail_rcu(&new->list, &cfg->list); |
58 | return; | 62 | return; |
59 | } | 63 | } |
60 | } | 64 | } |
61 | list_add_tail(&new->list, &pci_mmcfg_list); | 65 | list_add_tail_rcu(&new->list, &pci_mmcfg_list); |
62 | } | 66 | } |
63 | 67 | ||
64 | static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, | 68 | static __devinit struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, |
65 | int end, u64 addr) | 69 | int start, |
70 | int end, u64 addr) | ||
66 | { | 71 | { |
67 | struct pci_mmcfg_region *new; | 72 | struct pci_mmcfg_region *new; |
68 | struct resource *res; | 73 | struct resource *res; |
@@ -79,8 +84,6 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, | |||
79 | new->start_bus = start; | 84 | new->start_bus = start; |
80 | new->end_bus = end; | 85 | new->end_bus = end; |
81 | 86 | ||
82 | list_add_sorted(new); | ||
83 | |||
84 | res = &new->res; | 87 | res = &new->res; |
85 | res->start = addr + PCI_MMCFG_BUS_OFFSET(start); | 88 | res->start = addr + PCI_MMCFG_BUS_OFFSET(start); |
86 | res->end = addr + PCI_MMCFG_BUS_OFFSET(end + 1) - 1; | 89 | res->end = addr + PCI_MMCFG_BUS_OFFSET(end + 1) - 1; |
@@ -89,9 +92,25 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, | |||
89 | "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end); | 92 | "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end); |
90 | res->name = new->name; | 93 | res->name = new->name; |
91 | 94 | ||
92 | printk(KERN_INFO PREFIX "MMCONFIG for domain %04x [bus %02x-%02x] at " | 95 | return new; |
93 | "%pR (base %#lx)\n", segment, start, end, &new->res, | 96 | } |
94 | (unsigned long) addr); | 97 | |
98 | static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, | ||
99 | int end, u64 addr) | ||
100 | { | ||
101 | struct pci_mmcfg_region *new; | ||
102 | |||
103 | new = pci_mmconfig_alloc(segment, start, end, addr); | ||
104 | if (new) { | ||
105 | mutex_lock(&pci_mmcfg_lock); | ||
106 | list_add_sorted(new); | ||
107 | mutex_unlock(&pci_mmcfg_lock); | ||
108 | |||
109 | pr_info(PREFIX | ||
110 | "MMCONFIG for domain %04x [bus %02x-%02x] at %pR " | ||
111 | "(base %#lx)\n", | ||
112 | segment, start, end, &new->res, (unsigned long)addr); | ||
113 | } | ||
95 | 114 | ||
96 | return new; | 115 | return new; |
97 | } | 116 | } |
@@ -100,7 +119,7 @@ struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus) | |||
100 | { | 119 | { |
101 | struct pci_mmcfg_region *cfg; | 120 | struct pci_mmcfg_region *cfg; |
102 | 121 | ||
103 | list_for_each_entry(cfg, &pci_mmcfg_list, list) | 122 | list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) |
104 | if (cfg->segment == segment && | 123 | if (cfg->segment == segment && |
105 | cfg->start_bus <= bus && bus <= cfg->end_bus) | 124 | cfg->start_bus <= bus && bus <= cfg->end_bus) |
106 | return cfg; | 125 | return cfg; |
@@ -343,8 +362,7 @@ static int __init pci_mmcfg_check_hostbridge(void) | |||
343 | name = pci_mmcfg_probes[i].probe(); | 362 | name = pci_mmcfg_probes[i].probe(); |
344 | 363 | ||
345 | if (name) | 364 | if (name) |
346 | printk(KERN_INFO PREFIX "%s with MMCONFIG support\n", | 365 | pr_info(PREFIX "%s with MMCONFIG support\n", name); |
347 | name); | ||
348 | } | 366 | } |
349 | 367 | ||
350 | /* some end_bus_number is crazy, fix it */ | 368 | /* some end_bus_number is crazy, fix it */ |
@@ -353,19 +371,8 @@ static int __init pci_mmcfg_check_hostbridge(void) | |||
353 | return !list_empty(&pci_mmcfg_list); | 371 | return !list_empty(&pci_mmcfg_list); |
354 | } | 372 | } |
355 | 373 | ||
356 | static void __init pci_mmcfg_insert_resources(void) | 374 | static acpi_status __devinit check_mcfg_resource(struct acpi_resource *res, |
357 | { | 375 | void *data) |
358 | struct pci_mmcfg_region *cfg; | ||
359 | |||
360 | list_for_each_entry(cfg, &pci_mmcfg_list, list) | ||
361 | insert_resource(&iomem_resource, &cfg->res); | ||
362 | |||
363 | /* Mark that the resources have been inserted. */ | ||
364 | pci_mmcfg_resources_inserted = 1; | ||
365 | } | ||
366 | |||
367 | static acpi_status __init check_mcfg_resource(struct acpi_resource *res, | ||
368 | void *data) | ||
369 | { | 376 | { |
370 | struct resource *mcfg_res = data; | 377 | struct resource *mcfg_res = data; |
371 | struct acpi_resource_address64 address; | 378 | struct acpi_resource_address64 address; |
@@ -401,8 +408,8 @@ static acpi_status __init check_mcfg_resource(struct acpi_resource *res, | |||
401 | return AE_OK; | 408 | return AE_OK; |
402 | } | 409 | } |
403 | 410 | ||
404 | static acpi_status __init find_mboard_resource(acpi_handle handle, u32 lvl, | 411 | static acpi_status __devinit find_mboard_resource(acpi_handle handle, u32 lvl, |
405 | void *context, void **rv) | 412 | void *context, void **rv) |
406 | { | 413 | { |
407 | struct resource *mcfg_res = context; | 414 | struct resource *mcfg_res = context; |
408 | 415 | ||
@@ -415,7 +422,7 @@ static acpi_status __init find_mboard_resource(acpi_handle handle, u32 lvl, | |||
415 | return AE_OK; | 422 | return AE_OK; |
416 | } | 423 | } |
417 | 424 | ||
418 | static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) | 425 | static int __devinit is_acpi_reserved(u64 start, u64 end, unsigned not_used) |
419 | { | 426 | { |
420 | struct resource mcfg_res; | 427 | struct resource mcfg_res; |
421 | 428 | ||
@@ -434,13 +441,15 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) | |||
434 | 441 | ||
435 | typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); | 442 | typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); |
436 | 443 | ||
437 | static int __init is_mmconf_reserved(check_reserved_t is_reserved, | 444 | static int __ref is_mmconf_reserved(check_reserved_t is_reserved, |
438 | struct pci_mmcfg_region *cfg, int with_e820) | 445 | struct pci_mmcfg_region *cfg, |
446 | struct device *dev, int with_e820) | ||
439 | { | 447 | { |
440 | u64 addr = cfg->res.start; | 448 | u64 addr = cfg->res.start; |
441 | u64 size = resource_size(&cfg->res); | 449 | u64 size = resource_size(&cfg->res); |
442 | u64 old_size = size; | 450 | u64 old_size = size; |
443 | int valid = 0, num_buses; | 451 | int num_buses; |
452 | char *method = with_e820 ? "E820" : "ACPI motherboard resources"; | ||
444 | 453 | ||
445 | while (!is_reserved(addr, addr + size, E820_RESERVED)) { | 454 | while (!is_reserved(addr, addr + size, E820_RESERVED)) { |
446 | size >>= 1; | 455 | size >>= 1; |
@@ -448,30 +457,76 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, | |||
448 | break; | 457 | break; |
449 | } | 458 | } |
450 | 459 | ||
451 | if (size >= (16UL<<20) || size == old_size) { | 460 | if (size < (16UL<<20) && size != old_size) |
452 | printk(KERN_INFO PREFIX "MMCONFIG at %pR reserved in %s\n", | 461 | return 0; |
453 | &cfg->res, | 462 | |
454 | with_e820 ? "E820" : "ACPI motherboard resources"); | 463 | if (dev) |
455 | valid = 1; | 464 | dev_info(dev, "MMCONFIG at %pR reserved in %s\n", |
456 | 465 | &cfg->res, method); | |
457 | if (old_size != size) { | 466 | else |
458 | /* update end_bus */ | 467 | pr_info(PREFIX "MMCONFIG at %pR reserved in %s\n", |
459 | cfg->end_bus = cfg->start_bus + ((size>>20) - 1); | 468 | &cfg->res, method); |
460 | num_buses = cfg->end_bus - cfg->start_bus + 1; | 469 | |
461 | cfg->res.end = cfg->res.start + | 470 | if (old_size != size) { |
462 | PCI_MMCFG_BUS_OFFSET(num_buses) - 1; | 471 | /* update end_bus */ |
463 | snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN, | 472 | cfg->end_bus = cfg->start_bus + ((size>>20) - 1); |
464 | "PCI MMCONFIG %04x [bus %02x-%02x]", | 473 | num_buses = cfg->end_bus - cfg->start_bus + 1; |
465 | cfg->segment, cfg->start_bus, cfg->end_bus); | 474 | cfg->res.end = cfg->res.start + |
466 | printk(KERN_INFO PREFIX | 475 | PCI_MMCFG_BUS_OFFSET(num_buses) - 1; |
467 | "MMCONFIG for %04x [bus%02x-%02x] " | 476 | snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN, |
468 | "at %pR (base %#lx) (size reduced!)\n", | 477 | "PCI MMCONFIG %04x [bus %02x-%02x]", |
469 | cfg->segment, cfg->start_bus, cfg->end_bus, | 478 | cfg->segment, cfg->start_bus, cfg->end_bus); |
470 | &cfg->res, (unsigned long) cfg->address); | 479 | |
471 | } | 480 | if (dev) |
481 | dev_info(dev, | ||
482 | "MMCONFIG " | ||
483 | "at %pR (base %#lx) (size reduced!)\n", | ||
484 | &cfg->res, (unsigned long) cfg->address); | ||
485 | else | ||
486 | pr_info(PREFIX | ||
487 | "MMCONFIG for %04x [bus%02x-%02x] " | ||
488 | "at %pR (base %#lx) (size reduced!)\n", | ||
489 | cfg->segment, cfg->start_bus, cfg->end_bus, | ||
490 | &cfg->res, (unsigned long) cfg->address); | ||
472 | } | 491 | } |
473 | 492 | ||
474 | return valid; | 493 | return 1; |
494 | } | ||
495 | |||
496 | static int __ref pci_mmcfg_check_reserved(struct device *dev, | ||
497 | struct pci_mmcfg_region *cfg, int early) | ||
498 | { | ||
499 | if (!early && !acpi_disabled) { | ||
500 | if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, 0)) | ||
501 | return 1; | ||
502 | |||
503 | if (dev) | ||
504 | dev_info(dev, FW_INFO | ||
505 | "MMCONFIG at %pR not reserved in " | ||
506 | "ACPI motherboard resources\n", | ||
507 | &cfg->res); | ||
508 | else | ||
509 | pr_info(FW_INFO PREFIX | ||
510 | "MMCONFIG at %pR not reserved in " | ||
511 | "ACPI motherboard resources\n", | ||
512 | &cfg->res); | ||
513 | } | ||
514 | |||
515 | /* | ||
516 | * e820_all_mapped() is marked as __init. | ||
517 | * All entries from ACPI MCFG table have been checked at boot time. | ||
518 | * For MCFG information constructed from hotpluggable host bridge's | ||
519 | * _CBA method, just assume it's reserved. | ||
520 | */ | ||
521 | if (pci_mmcfg_running_state) | ||
522 | return 1; | ||
523 | |||
524 | /* Don't try to do this check unless configuration | ||
525 | type 1 is available. how about type 2 ?*/ | ||
526 | if (raw_pci_ops) | ||
527 | return is_mmconf_reserved(e820_all_mapped, cfg, dev, 1); | ||
528 | |||
529 | return 0; | ||
475 | } | 530 | } |
476 | 531 | ||
477 | static void __init pci_mmcfg_reject_broken(int early) | 532 | static void __init pci_mmcfg_reject_broken(int early) |
@@ -479,38 +534,14 @@ static void __init pci_mmcfg_reject_broken(int early) | |||
479 | struct pci_mmcfg_region *cfg; | 534 | struct pci_mmcfg_region *cfg; |
480 | 535 | ||
481 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { | 536 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { |
482 | int valid = 0; | 537 | if (pci_mmcfg_check_reserved(NULL, cfg, early) == 0) { |
483 | 538 | pr_info(PREFIX "not using MMCONFIG\n"); | |
484 | if (!early && !acpi_disabled) { | 539 | free_all_mmcfg(); |
485 | valid = is_mmconf_reserved(is_acpi_reserved, cfg, 0); | 540 | return; |
486 | |||
487 | if (valid) | ||
488 | continue; | ||
489 | else | ||
490 | printk(KERN_ERR FW_BUG PREFIX | ||
491 | "MMCONFIG at %pR not reserved in " | ||
492 | "ACPI motherboard resources\n", | ||
493 | &cfg->res); | ||
494 | } | 541 | } |
495 | |||
496 | /* Don't try to do this check unless configuration | ||
497 | type 1 is available. how about type 2 ?*/ | ||
498 | if (raw_pci_ops) | ||
499 | valid = is_mmconf_reserved(e820_all_mapped, cfg, 1); | ||
500 | |||
501 | if (!valid) | ||
502 | goto reject; | ||
503 | } | 542 | } |
504 | |||
505 | return; | ||
506 | |||
507 | reject: | ||
508 | printk(KERN_INFO PREFIX "not using MMCONFIG\n"); | ||
509 | free_all_mmcfg(); | ||
510 | } | 543 | } |
511 | 544 | ||
512 | static int __initdata known_bridge; | ||
513 | |||
514 | static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, | 545 | static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, |
515 | struct acpi_mcfg_allocation *cfg) | 546 | struct acpi_mcfg_allocation *cfg) |
516 | { | 547 | { |
@@ -529,7 +560,7 @@ static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, | |||
529 | return 0; | 560 | return 0; |
530 | } | 561 | } |
531 | 562 | ||
532 | printk(KERN_ERR PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx " | 563 | pr_err(PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx " |
533 | "is above 4GB, ignored\n", cfg->pci_segment, | 564 | "is above 4GB, ignored\n", cfg->pci_segment, |
534 | cfg->start_bus_number, cfg->end_bus_number, cfg->address); | 565 | cfg->start_bus_number, cfg->end_bus_number, cfg->address); |
535 | return -EINVAL; | 566 | return -EINVAL; |
@@ -556,7 +587,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) | |||
556 | i -= sizeof(struct acpi_mcfg_allocation); | 587 | i -= sizeof(struct acpi_mcfg_allocation); |
557 | }; | 588 | }; |
558 | if (entries == 0) { | 589 | if (entries == 0) { |
559 | printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); | 590 | pr_err(PREFIX "MMCONFIG has no entries\n"); |
560 | return -ENODEV; | 591 | return -ENODEV; |
561 | } | 592 | } |
562 | 593 | ||
@@ -570,8 +601,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) | |||
570 | 601 | ||
571 | if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number, | 602 | if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number, |
572 | cfg->end_bus_number, cfg->address) == NULL) { | 603 | cfg->end_bus_number, cfg->address) == NULL) { |
573 | printk(KERN_WARNING PREFIX | 604 | pr_warn(PREFIX "no memory for MCFG entries\n"); |
574 | "no memory for MCFG entries\n"); | ||
575 | free_all_mmcfg(); | 605 | free_all_mmcfg(); |
576 | return -ENOMEM; | 606 | return -ENOMEM; |
577 | } | 607 | } |
@@ -582,28 +612,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) | |||
582 | 612 | ||
583 | static void __init __pci_mmcfg_init(int early) | 613 | static void __init __pci_mmcfg_init(int early) |
584 | { | 614 | { |
585 | /* MMCONFIG disabled */ | ||
586 | if ((pci_probe & PCI_PROBE_MMCONF) == 0) | ||
587 | return; | ||
588 | |||
589 | /* MMCONFIG already enabled */ | ||
590 | if (!early && !(pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF)) | ||
591 | return; | ||
592 | |||
593 | /* for late to exit */ | ||
594 | if (known_bridge) | ||
595 | return; | ||
596 | |||
597 | if (early) { | ||
598 | if (pci_mmcfg_check_hostbridge()) | ||
599 | known_bridge = 1; | ||
600 | } | ||
601 | |||
602 | if (!known_bridge) | ||
603 | acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); | ||
604 | |||
605 | pci_mmcfg_reject_broken(early); | 615 | pci_mmcfg_reject_broken(early); |
606 | |||
607 | if (list_empty(&pci_mmcfg_list)) | 616 | if (list_empty(&pci_mmcfg_list)) |
608 | return; | 617 | return; |
609 | 618 | ||
@@ -620,33 +629,48 @@ static void __init __pci_mmcfg_init(int early) | |||
620 | if (pci_mmcfg_arch_init()) | 629 | if (pci_mmcfg_arch_init()) |
621 | pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; | 630 | pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; |
622 | else { | 631 | else { |
623 | /* | 632 | free_all_mmcfg(); |
624 | * Signal not to attempt to insert mmcfg resources because | 633 | pci_mmcfg_arch_init_failed = true; |
625 | * the architecture mmcfg setup could not initialize. | ||
626 | */ | ||
627 | pci_mmcfg_resources_inserted = 1; | ||
628 | } | 634 | } |
629 | } | 635 | } |
630 | 636 | ||
637 | static int __initdata known_bridge; | ||
638 | |||
631 | void __init pci_mmcfg_early_init(void) | 639 | void __init pci_mmcfg_early_init(void) |
632 | { | 640 | { |
633 | __pci_mmcfg_init(1); | 641 | if (pci_probe & PCI_PROBE_MMCONF) { |
642 | if (pci_mmcfg_check_hostbridge()) | ||
643 | known_bridge = 1; | ||
644 | else | ||
645 | acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); | ||
646 | __pci_mmcfg_init(1); | ||
647 | } | ||
634 | } | 648 | } |
635 | 649 | ||
636 | void __init pci_mmcfg_late_init(void) | 650 | void __init pci_mmcfg_late_init(void) |
637 | { | 651 | { |
638 | __pci_mmcfg_init(0); | 652 | /* MMCONFIG disabled */ |
653 | if ((pci_probe & PCI_PROBE_MMCONF) == 0) | ||
654 | return; | ||
655 | |||
656 | if (known_bridge) | ||
657 | return; | ||
658 | |||
659 | /* MMCONFIG hasn't been enabled yet, try again */ | ||
660 | if (pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF) { | ||
661 | acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); | ||
662 | __pci_mmcfg_init(0); | ||
663 | } | ||
639 | } | 664 | } |
640 | 665 | ||
641 | static int __init pci_mmcfg_late_insert_resources(void) | 666 | static int __init pci_mmcfg_late_insert_resources(void) |
642 | { | 667 | { |
643 | /* | 668 | struct pci_mmcfg_region *cfg; |
644 | * If resources are already inserted or we are not using MMCONFIG, | 669 | |
645 | * don't insert the resources. | 670 | pci_mmcfg_running_state = true; |
646 | */ | 671 | |
647 | if ((pci_mmcfg_resources_inserted == 1) || | 672 | /* If we are not using MMCONFIG, don't insert the resources. */ |
648 | (pci_probe & PCI_PROBE_MMCONF) == 0 || | 673 | if ((pci_probe & PCI_PROBE_MMCONF) == 0) |
649 | list_empty(&pci_mmcfg_list)) | ||
650 | return 1; | 674 | return 1; |
651 | 675 | ||
652 | /* | 676 | /* |
@@ -654,7 +678,9 @@ static int __init pci_mmcfg_late_insert_resources(void) | |||
654 | * marked so it won't cause request errors when __request_region is | 678 | * marked so it won't cause request errors when __request_region is |
655 | * called. | 679 | * called. |
656 | */ | 680 | */ |
657 | pci_mmcfg_insert_resources(); | 681 | list_for_each_entry(cfg, &pci_mmcfg_list, list) |
682 | if (!cfg->res.parent) | ||
683 | insert_resource(&iomem_resource, &cfg->res); | ||
658 | 684 | ||
659 | return 0; | 685 | return 0; |
660 | } | 686 | } |
@@ -665,3 +691,101 @@ static int __init pci_mmcfg_late_insert_resources(void) | |||
665 | * with other system resources. | 691 | * with other system resources. |
666 | */ | 692 | */ |
667 | late_initcall(pci_mmcfg_late_insert_resources); | 693 | late_initcall(pci_mmcfg_late_insert_resources); |
694 | |||
695 | /* Add MMCFG information for host bridges */ | ||
696 | int __devinit pci_mmconfig_insert(struct device *dev, | ||
697 | u16 seg, u8 start, u8 end, | ||
698 | phys_addr_t addr) | ||
699 | { | ||
700 | int rc; | ||
701 | struct resource *tmp = NULL; | ||
702 | struct pci_mmcfg_region *cfg; | ||
703 | |||
704 | if (!(pci_probe & PCI_PROBE_MMCONF) || pci_mmcfg_arch_init_failed) | ||
705 | return -ENODEV; | ||
706 | |||
707 | if (start > end) | ||
708 | return -EINVAL; | ||
709 | |||
710 | mutex_lock(&pci_mmcfg_lock); | ||
711 | cfg = pci_mmconfig_lookup(seg, start); | ||
712 | if (cfg) { | ||
713 | if (cfg->end_bus < end) | ||
714 | dev_info(dev, FW_INFO | ||
715 | "MMCONFIG for " | ||
716 | "domain %04x [bus %02x-%02x] " | ||
717 | "only partially covers this bridge\n", | ||
718 | cfg->segment, cfg->start_bus, cfg->end_bus); | ||
719 | mutex_unlock(&pci_mmcfg_lock); | ||
720 | return -EEXIST; | ||
721 | } | ||
722 | |||
723 | if (!addr) { | ||
724 | mutex_unlock(&pci_mmcfg_lock); | ||
725 | return -EINVAL; | ||
726 | } | ||
727 | |||
728 | rc = -EBUSY; | ||
729 | cfg = pci_mmconfig_alloc(seg, start, end, addr); | ||
730 | if (cfg == NULL) { | ||
731 | dev_warn(dev, "fail to add MMCONFIG (out of memory)\n"); | ||
732 | rc = -ENOMEM; | ||
733 | } else if (!pci_mmcfg_check_reserved(dev, cfg, 0)) { | ||
734 | dev_warn(dev, FW_BUG "MMCONFIG %pR isn't reserved\n", | ||
735 | &cfg->res); | ||
736 | } else { | ||
737 | /* Insert resource if it's not in boot stage */ | ||
738 | if (pci_mmcfg_running_state) | ||
739 | tmp = insert_resource_conflict(&iomem_resource, | ||
740 | &cfg->res); | ||
741 | |||
742 | if (tmp) { | ||
743 | dev_warn(dev, | ||
744 | "MMCONFIG %pR conflicts with " | ||
745 | "%s %pR\n", | ||
746 | &cfg->res, tmp->name, tmp); | ||
747 | } else if (pci_mmcfg_arch_map(cfg)) { | ||
748 | dev_warn(dev, "fail to map MMCONFIG %pR.\n", | ||
749 | &cfg->res); | ||
750 | } else { | ||
751 | list_add_sorted(cfg); | ||
752 | dev_info(dev, "MMCONFIG at %pR (base %#lx)\n", | ||
753 | &cfg->res, (unsigned long)addr); | ||
754 | cfg = NULL; | ||
755 | rc = 0; | ||
756 | } | ||
757 | } | ||
758 | |||
759 | if (cfg) { | ||
760 | if (cfg->res.parent) | ||
761 | release_resource(&cfg->res); | ||
762 | kfree(cfg); | ||
763 | } | ||
764 | |||
765 | mutex_unlock(&pci_mmcfg_lock); | ||
766 | |||
767 | return rc; | ||
768 | } | ||
769 | |||
770 | /* Delete MMCFG information for host bridges */ | ||
771 | int pci_mmconfig_delete(u16 seg, u8 start, u8 end) | ||
772 | { | ||
773 | struct pci_mmcfg_region *cfg; | ||
774 | |||
775 | mutex_lock(&pci_mmcfg_lock); | ||
776 | list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) | ||
777 | if (cfg->segment == seg && cfg->start_bus == start && | ||
778 | cfg->end_bus == end) { | ||
779 | list_del_rcu(&cfg->list); | ||
780 | synchronize_rcu(); | ||
781 | pci_mmcfg_arch_unmap(cfg); | ||
782 | if (cfg->res.parent) | ||
783 | release_resource(&cfg->res); | ||
784 | mutex_unlock(&pci_mmcfg_lock); | ||
785 | kfree(cfg); | ||
786 | return 0; | ||
787 | } | ||
788 | mutex_unlock(&pci_mmcfg_lock); | ||
789 | |||
790 | return -ENOENT; | ||
791 | } | ||
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 5372e86834c..db63ac23e3d 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c | |||
@@ -11,6 +11,7 @@ | |||
11 | 11 | ||
12 | #include <linux/pci.h> | 12 | #include <linux/pci.h> |
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <linux/rcupdate.h> | ||
14 | #include <asm/e820.h> | 15 | #include <asm/e820.h> |
15 | #include <asm/pci_x86.h> | 16 | #include <asm/pci_x86.h> |
16 | #include <acpi/acpi.h> | 17 | #include <acpi/acpi.h> |
@@ -60,9 +61,12 @@ err: *value = -1; | |||
60 | return -EINVAL; | 61 | return -EINVAL; |
61 | } | 62 | } |
62 | 63 | ||
64 | rcu_read_lock(); | ||
63 | base = get_base_addr(seg, bus, devfn); | 65 | base = get_base_addr(seg, bus, devfn); |
64 | if (!base) | 66 | if (!base) { |
67 | rcu_read_unlock(); | ||
65 | goto err; | 68 | goto err; |
69 | } | ||
66 | 70 | ||
67 | raw_spin_lock_irqsave(&pci_config_lock, flags); | 71 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
68 | 72 | ||
@@ -80,6 +84,7 @@ err: *value = -1; | |||
80 | break; | 84 | break; |
81 | } | 85 | } |
82 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); | 86 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
87 | rcu_read_unlock(); | ||
83 | 88 | ||
84 | return 0; | 89 | return 0; |
85 | } | 90 | } |
@@ -93,9 +98,12 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, | |||
93 | if ((bus > 255) || (devfn > 255) || (reg > 4095)) | 98 | if ((bus > 255) || (devfn > 255) || (reg > 4095)) |
94 | return -EINVAL; | 99 | return -EINVAL; |
95 | 100 | ||
101 | rcu_read_lock(); | ||
96 | base = get_base_addr(seg, bus, devfn); | 102 | base = get_base_addr(seg, bus, devfn); |
97 | if (!base) | 103 | if (!base) { |
104 | rcu_read_unlock(); | ||
98 | return -EINVAL; | 105 | return -EINVAL; |
106 | } | ||
99 | 107 | ||
100 | raw_spin_lock_irqsave(&pci_config_lock, flags); | 108 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
101 | 109 | ||
@@ -113,11 +121,12 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, | |||
113 | break; | 121 | break; |
114 | } | 122 | } |
115 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); | 123 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
124 | rcu_read_unlock(); | ||
116 | 125 | ||
117 | return 0; | 126 | return 0; |
118 | } | 127 | } |
119 | 128 | ||
120 | static const struct pci_raw_ops pci_mmcfg = { | 129 | const struct pci_raw_ops pci_mmcfg = { |
121 | .read = pci_mmcfg_read, | 130 | .read = pci_mmcfg_read, |
122 | .write = pci_mmcfg_write, | 131 | .write = pci_mmcfg_write, |
123 | }; | 132 | }; |
@@ -132,3 +141,18 @@ int __init pci_mmcfg_arch_init(void) | |||
132 | void __init pci_mmcfg_arch_free(void) | 141 | void __init pci_mmcfg_arch_free(void) |
133 | { | 142 | { |
134 | } | 143 | } |
144 | |||
145 | int __devinit pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg) | ||
146 | { | ||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg) | ||
151 | { | ||
152 | unsigned long flags; | ||
153 | |||
154 | /* Invalidate the cached mmcfg map entry. */ | ||
155 | raw_spin_lock_irqsave(&pci_config_lock, flags); | ||
156 | mmcfg_last_accessed_device = 0; | ||
157 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); | ||
158 | } | ||
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 915a493502c..d4ebd07c306 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/init.h> | 9 | #include <linux/init.h> |
10 | #include <linux/acpi.h> | 10 | #include <linux/acpi.h> |
11 | #include <linux/bitmap.h> | 11 | #include <linux/bitmap.h> |
12 | #include <linux/rcupdate.h> | ||
12 | #include <asm/e820.h> | 13 | #include <asm/e820.h> |
13 | #include <asm/pci_x86.h> | 14 | #include <asm/pci_x86.h> |
14 | 15 | ||
@@ -34,9 +35,12 @@ err: *value = -1; | |||
34 | return -EINVAL; | 35 | return -EINVAL; |
35 | } | 36 | } |
36 | 37 | ||
38 | rcu_read_lock(); | ||
37 | addr = pci_dev_base(seg, bus, devfn); | 39 | addr = pci_dev_base(seg, bus, devfn); |
38 | if (!addr) | 40 | if (!addr) { |
41 | rcu_read_unlock(); | ||
39 | goto err; | 42 | goto err; |
43 | } | ||
40 | 44 | ||
41 | switch (len) { | 45 | switch (len) { |
42 | case 1: | 46 | case 1: |
@@ -49,6 +53,7 @@ err: *value = -1; | |||
49 | *value = mmio_config_readl(addr + reg); | 53 | *value = mmio_config_readl(addr + reg); |
50 | break; | 54 | break; |
51 | } | 55 | } |
56 | rcu_read_unlock(); | ||
52 | 57 | ||
53 | return 0; | 58 | return 0; |
54 | } | 59 | } |
@@ -62,9 +67,12 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, | |||
62 | if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) | 67 | if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) |
63 | return -EINVAL; | 68 | return -EINVAL; |
64 | 69 | ||
70 | rcu_read_lock(); | ||
65 | addr = pci_dev_base(seg, bus, devfn); | 71 | addr = pci_dev_base(seg, bus, devfn); |
66 | if (!addr) | 72 | if (!addr) { |
73 | rcu_read_unlock(); | ||
67 | return -EINVAL; | 74 | return -EINVAL; |
75 | } | ||
68 | 76 | ||
69 | switch (len) { | 77 | switch (len) { |
70 | case 1: | 78 | case 1: |
@@ -77,16 +85,17 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, | |||
77 | mmio_config_writel(addr + reg, value); | 85 | mmio_config_writel(addr + reg, value); |
78 | break; | 86 | break; |
79 | } | 87 | } |
88 | rcu_read_unlock(); | ||
80 | 89 | ||
81 | return 0; | 90 | return 0; |
82 | } | 91 | } |
83 | 92 | ||
84 | static const struct pci_raw_ops pci_mmcfg = { | 93 | const struct pci_raw_ops pci_mmcfg = { |
85 | .read = pci_mmcfg_read, | 94 | .read = pci_mmcfg_read, |
86 | .write = pci_mmcfg_write, | 95 | .write = pci_mmcfg_write, |
87 | }; | 96 | }; |
88 | 97 | ||
89 | static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg) | 98 | static void __iomem * __devinit mcfg_ioremap(struct pci_mmcfg_region *cfg) |
90 | { | 99 | { |
91 | void __iomem *addr; | 100 | void __iomem *addr; |
92 | u64 start, size; | 101 | u64 start, size; |
@@ -105,16 +114,14 @@ int __init pci_mmcfg_arch_init(void) | |||
105 | { | 114 | { |
106 | struct pci_mmcfg_region *cfg; | 115 | struct pci_mmcfg_region *cfg; |
107 | 116 | ||
108 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { | 117 | list_for_each_entry(cfg, &pci_mmcfg_list, list) |
109 | cfg->virt = mcfg_ioremap(cfg); | 118 | if (pci_mmcfg_arch_map(cfg)) { |
110 | if (!cfg->virt) { | ||
111 | printk(KERN_ERR PREFIX "can't map MMCONFIG at %pR\n", | ||
112 | &cfg->res); | ||
113 | pci_mmcfg_arch_free(); | 119 | pci_mmcfg_arch_free(); |
114 | return 0; | 120 | return 0; |
115 | } | 121 | } |
116 | } | 122 | |
117 | raw_pci_ext_ops = &pci_mmcfg; | 123 | raw_pci_ext_ops = &pci_mmcfg; |
124 | |||
118 | return 1; | 125 | return 1; |
119 | } | 126 | } |
120 | 127 | ||
@@ -122,10 +129,25 @@ void __init pci_mmcfg_arch_free(void) | |||
122 | { | 129 | { |
123 | struct pci_mmcfg_region *cfg; | 130 | struct pci_mmcfg_region *cfg; |
124 | 131 | ||
125 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { | 132 | list_for_each_entry(cfg, &pci_mmcfg_list, list) |
126 | if (cfg->virt) { | 133 | pci_mmcfg_arch_unmap(cfg); |
127 | iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus)); | 134 | } |
128 | cfg->virt = NULL; | 135 | |
129 | } | 136 | int __devinit pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg) |
137 | { | ||
138 | cfg->virt = mcfg_ioremap(cfg); | ||
139 | if (!cfg->virt) { | ||
140 | pr_err(PREFIX "can't map MMCONFIG at %pR\n", &cfg->res); | ||
141 | return -ENOMEM; | ||
142 | } | ||
143 | |||
144 | return 0; | ||
145 | } | ||
146 | |||
147 | void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg) | ||
148 | { | ||
149 | if (cfg && cfg->virt) { | ||
150 | iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus)); | ||
151 | cfg->virt = NULL; | ||
130 | } | 152 | } |
131 | } | 153 | } |
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c index 140942f66b3..e14a2ff708b 100644 --- a/arch/x86/pci/mrst.c +++ b/arch/x86/pci/mrst.c | |||
@@ -264,7 +264,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup); | |||
264 | 264 | ||
265 | static void __devinit mrst_power_off_unused_dev(struct pci_dev *dev) | 265 | static void __devinit mrst_power_off_unused_dev(struct pci_dev *dev) |
266 | { | 266 | { |
267 | pci_set_power_state(dev, PCI_D3cold); | 267 | pci_set_power_state(dev, PCI_D3hot); |
268 | } | 268 | } |
269 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev); | 269 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev); |
270 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev); | 270 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev); |
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 7415aa92791..56ab74989cf 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c | |||
@@ -64,6 +64,10 @@ static int xen_register_pirq(u32 gsi, int gsi_override, int triggering, | |||
64 | int shareable = 0; | 64 | int shareable = 0; |
65 | char *name; | 65 | char *name; |
66 | 66 | ||
67 | irq = xen_irq_from_gsi(gsi); | ||
68 | if (irq > 0) | ||
69 | return irq; | ||
70 | |||
67 | if (set_pirq) | 71 | if (set_pirq) |
68 | pirq = gsi; | 72 | pirq = gsi; |
69 | 73 | ||
diff --git a/arch/x86/platform/mrst/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c index 3c6e328483c..028454f0c3a 100644 --- a/arch/x86/platform/mrst/early_printk_mrst.c +++ b/arch/x86/platform/mrst/early_printk_mrst.c | |||
@@ -110,19 +110,16 @@ static struct kmsg_dumper dw_dumper; | |||
110 | static int dumper_registered; | 110 | static int dumper_registered; |
111 | 111 | ||
112 | static void dw_kmsg_dump(struct kmsg_dumper *dumper, | 112 | static void dw_kmsg_dump(struct kmsg_dumper *dumper, |
113 | enum kmsg_dump_reason reason, | 113 | enum kmsg_dump_reason reason) |
114 | const char *s1, unsigned long l1, | ||
115 | const char *s2, unsigned long l2) | ||
116 | { | 114 | { |
117 | int i; | 115 | static char line[1024]; |
116 | size_t len; | ||
118 | 117 | ||
119 | /* When run to this, we'd better re-init the HW */ | 118 | /* When run to this, we'd better re-init the HW */ |
120 | mrst_early_console_init(); | 119 | mrst_early_console_init(); |
121 | 120 | ||
122 | for (i = 0; i < l1; i++) | 121 | while (kmsg_dump_get_line(dumper, true, line, sizeof(line), &len)) |
123 | early_mrst_console.write(&early_mrst_console, s1 + i, 1); | 122 | early_mrst_console.write(&early_mrst_console, line, len); |
124 | for (i = 0; i < l2; i++) | ||
125 | early_mrst_console.write(&early_mrst_console, s2 + i, 1); | ||
126 | } | 123 | } |
127 | 124 | ||
128 | /* Set the ratio rate to 115200, 8n1, IRQ disabled */ | 125 | /* Set the ratio rate to 115200, 8n1, IRQ disabled */ |
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index e31bcd8f2ee..fd41a9262d6 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c | |||
@@ -782,7 +782,7 @@ BLOCKING_NOTIFIER_HEAD(intel_scu_notifier); | |||
782 | EXPORT_SYMBOL_GPL(intel_scu_notifier); | 782 | EXPORT_SYMBOL_GPL(intel_scu_notifier); |
783 | 783 | ||
784 | /* Called by IPC driver */ | 784 | /* Called by IPC driver */ |
785 | void intel_scu_devices_create(void) | 785 | void __devinit intel_scu_devices_create(void) |
786 | { | 786 | { |
787 | int i; | 787 | int i; |
788 | 788 | ||
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c index 23e5b9d7977..599be499fdf 100644 --- a/arch/x86/platform/olpc/olpc-xo15-sci.c +++ b/arch/x86/platform/olpc/olpc-xo15-sci.c | |||
@@ -203,7 +203,7 @@ static int xo15_sci_remove(struct acpi_device *device, int type) | |||
203 | return 0; | 203 | return 0; |
204 | } | 204 | } |
205 | 205 | ||
206 | static int xo15_sci_resume(struct acpi_device *device) | 206 | static int xo15_sci_resume(struct device *dev) |
207 | { | 207 | { |
208 | /* Enable all EC events */ | 208 | /* Enable all EC events */ |
209 | olpc_ec_mask_write(EC_SCI_SRC_ALL); | 209 | olpc_ec_mask_write(EC_SCI_SRC_ALL); |
@@ -215,6 +215,8 @@ static int xo15_sci_resume(struct acpi_device *device) | |||
215 | return 0; | 215 | return 0; |
216 | } | 216 | } |
217 | 217 | ||
218 | static SIMPLE_DEV_PM_OPS(xo15_sci_pm, NULL, xo15_sci_resume); | ||
219 | |||
218 | static const struct acpi_device_id xo15_sci_device_ids[] = { | 220 | static const struct acpi_device_id xo15_sci_device_ids[] = { |
219 | {"XO15EC", 0}, | 221 | {"XO15EC", 0}, |
220 | {"", 0}, | 222 | {"", 0}, |
@@ -227,8 +229,8 @@ static struct acpi_driver xo15_sci_drv = { | |||
227 | .ops = { | 229 | .ops = { |
228 | .add = xo15_sci_add, | 230 | .add = xo15_sci_add, |
229 | .remove = xo15_sci_remove, | 231 | .remove = xo15_sci_remove, |
230 | .resume = xo15_sci_resume, | ||
231 | }, | 232 | }, |
233 | .drv.pm = &xo15_sci_pm, | ||
232 | }; | 234 | }; |
233 | 235 | ||
234 | static int __init xo15_sci_init(void) | 236 | static int __init xo15_sci_init(void) |
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 3ae0e61abd2..71b5d5a07d7 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * SGI UltraViolet TLB flush routines. | 2 | * SGI UltraViolet TLB flush routines. |
3 | * | 3 | * |
4 | * (c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI. | 4 | * (c) 2008-2012 Cliff Wickman <cpw@sgi.com>, SGI. |
5 | * | 5 | * |
6 | * This code is released under the GNU General Public License version 2 or | 6 | * This code is released under the GNU General Public License version 2 or |
7 | * later. | 7 | * later. |
@@ -38,8 +38,7 @@ static int timeout_base_ns[] = { | |||
38 | 38 | ||
39 | static int timeout_us; | 39 | static int timeout_us; |
40 | static int nobau; | 40 | static int nobau; |
41 | static int baudisabled; | 41 | static int nobau_perm; |
42 | static spinlock_t disable_lock; | ||
43 | static cycles_t congested_cycles; | 42 | static cycles_t congested_cycles; |
44 | 43 | ||
45 | /* tunables: */ | 44 | /* tunables: */ |
@@ -47,12 +46,13 @@ static int max_concurr = MAX_BAU_CONCURRENT; | |||
47 | static int max_concurr_const = MAX_BAU_CONCURRENT; | 46 | static int max_concurr_const = MAX_BAU_CONCURRENT; |
48 | static int plugged_delay = PLUGGED_DELAY; | 47 | static int plugged_delay = PLUGGED_DELAY; |
49 | static int plugsb4reset = PLUGSB4RESET; | 48 | static int plugsb4reset = PLUGSB4RESET; |
49 | static int giveup_limit = GIVEUP_LIMIT; | ||
50 | static int timeoutsb4reset = TIMEOUTSB4RESET; | 50 | static int timeoutsb4reset = TIMEOUTSB4RESET; |
51 | static int ipi_reset_limit = IPI_RESET_LIMIT; | 51 | static int ipi_reset_limit = IPI_RESET_LIMIT; |
52 | static int complete_threshold = COMPLETE_THRESHOLD; | 52 | static int complete_threshold = COMPLETE_THRESHOLD; |
53 | static int congested_respns_us = CONGESTED_RESPONSE_US; | 53 | static int congested_respns_us = CONGESTED_RESPONSE_US; |
54 | static int congested_reps = CONGESTED_REPS; | 54 | static int congested_reps = CONGESTED_REPS; |
55 | static int congested_period = CONGESTED_PERIOD; | 55 | static int disabled_period = DISABLED_PERIOD; |
56 | 56 | ||
57 | static struct tunables tunables[] = { | 57 | static struct tunables tunables[] = { |
58 | {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ | 58 | {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ |
@@ -63,7 +63,8 @@ static struct tunables tunables[] = { | |||
63 | {&complete_threshold, COMPLETE_THRESHOLD}, | 63 | {&complete_threshold, COMPLETE_THRESHOLD}, |
64 | {&congested_respns_us, CONGESTED_RESPONSE_US}, | 64 | {&congested_respns_us, CONGESTED_RESPONSE_US}, |
65 | {&congested_reps, CONGESTED_REPS}, | 65 | {&congested_reps, CONGESTED_REPS}, |
66 | {&congested_period, CONGESTED_PERIOD} | 66 | {&disabled_period, DISABLED_PERIOD}, |
67 | {&giveup_limit, GIVEUP_LIMIT} | ||
67 | }; | 68 | }; |
68 | 69 | ||
69 | static struct dentry *tunables_dir; | 70 | static struct dentry *tunables_dir; |
@@ -120,6 +121,40 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats); | |||
120 | static DEFINE_PER_CPU(struct bau_control, bau_control); | 121 | static DEFINE_PER_CPU(struct bau_control, bau_control); |
121 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | 122 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); |
122 | 123 | ||
124 | static void | ||
125 | set_bau_on(void) | ||
126 | { | ||
127 | int cpu; | ||
128 | struct bau_control *bcp; | ||
129 | |||
130 | if (nobau_perm) { | ||
131 | pr_info("BAU not initialized; cannot be turned on\n"); | ||
132 | return; | ||
133 | } | ||
134 | nobau = 0; | ||
135 | for_each_present_cpu(cpu) { | ||
136 | bcp = &per_cpu(bau_control, cpu); | ||
137 | bcp->nobau = 0; | ||
138 | } | ||
139 | pr_info("BAU turned on\n"); | ||
140 | return; | ||
141 | } | ||
142 | |||
143 | static void | ||
144 | set_bau_off(void) | ||
145 | { | ||
146 | int cpu; | ||
147 | struct bau_control *bcp; | ||
148 | |||
149 | nobau = 1; | ||
150 | for_each_present_cpu(cpu) { | ||
151 | bcp = &per_cpu(bau_control, cpu); | ||
152 | bcp->nobau = 1; | ||
153 | } | ||
154 | pr_info("BAU turned off\n"); | ||
155 | return; | ||
156 | } | ||
157 | |||
123 | /* | 158 | /* |
124 | * Determine the first node on a uvhub. 'Nodes' are used for kernel | 159 | * Determine the first node on a uvhub. 'Nodes' are used for kernel |
125 | * memory allocation. | 160 | * memory allocation. |
@@ -278,7 +313,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, | |||
278 | * Both sockets dump their completed count total into | 313 | * Both sockets dump their completed count total into |
279 | * the message's count. | 314 | * the message's count. |
280 | */ | 315 | */ |
281 | smaster->socket_acknowledge_count[mdp->msg_slot] = 0; | 316 | *sp = 0; |
282 | asp = (struct atomic_short *)&msg->acknowledge_count; | 317 | asp = (struct atomic_short *)&msg->acknowledge_count; |
283 | msg_ack_count = atom_asr(socket_ack_count, asp); | 318 | msg_ack_count = atom_asr(socket_ack_count, asp); |
284 | 319 | ||
@@ -491,16 +526,15 @@ static int uv1_wait_completion(struct bau_desc *bau_desc, | |||
491 | } | 526 | } |
492 | 527 | ||
493 | /* | 528 | /* |
494 | * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register. | 529 | * UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register. |
530 | * But not currently used. | ||
495 | */ | 531 | */ |
496 | static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc) | 532 | static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc) |
497 | { | 533 | { |
498 | unsigned long descriptor_status; | 534 | unsigned long descriptor_status; |
499 | unsigned long descriptor_status2; | ||
500 | 535 | ||
501 | descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK); | 536 | descriptor_status = |
502 | descriptor_status2 = (read_mmr_uv2_status() >> desc) & 0x1UL; | 537 | ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1; |
503 | descriptor_status = (descriptor_status << 1) | descriptor_status2; | ||
504 | return descriptor_status; | 538 | return descriptor_status; |
505 | } | 539 | } |
506 | 540 | ||
@@ -531,87 +565,11 @@ int normal_busy(struct bau_control *bcp) | |||
531 | */ | 565 | */ |
532 | int handle_uv2_busy(struct bau_control *bcp) | 566 | int handle_uv2_busy(struct bau_control *bcp) |
533 | { | 567 | { |
534 | int busy_one = bcp->using_desc; | ||
535 | int normal = bcp->uvhub_cpu; | ||
536 | int selected = -1; | ||
537 | int i; | ||
538 | unsigned long descriptor_status; | ||
539 | unsigned long status; | ||
540 | int mmr_offset; | ||
541 | struct bau_desc *bau_desc_old; | ||
542 | struct bau_desc *bau_desc_new; | ||
543 | struct bau_control *hmaster = bcp->uvhub_master; | ||
544 | struct ptc_stats *stat = bcp->statp; | 568 | struct ptc_stats *stat = bcp->statp; |
545 | cycles_t ttm; | ||
546 | 569 | ||
547 | stat->s_uv2_wars++; | 570 | stat->s_uv2_wars++; |
548 | spin_lock(&hmaster->uvhub_lock); | 571 | bcp->busy = 1; |
549 | /* try for the original first */ | 572 | return FLUSH_GIVEUP; |
550 | if (busy_one != normal) { | ||
551 | if (!normal_busy(bcp)) | ||
552 | selected = normal; | ||
553 | } | ||
554 | if (selected < 0) { | ||
555 | /* can't use the normal, select an alternate */ | ||
556 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; | ||
557 | descriptor_status = read_lmmr(mmr_offset); | ||
558 | |||
559 | /* scan available descriptors 32-63 */ | ||
560 | for (i = 0; i < UV_CPUS_PER_AS; i++) { | ||
561 | if ((hmaster->inuse_map & (1 << i)) == 0) { | ||
562 | status = ((descriptor_status >> | ||
563 | (i * UV_ACT_STATUS_SIZE)) & | ||
564 | UV_ACT_STATUS_MASK) << 1; | ||
565 | if (status != UV2H_DESC_BUSY) { | ||
566 | selected = i + UV_CPUS_PER_AS; | ||
567 | break; | ||
568 | } | ||
569 | } | ||
570 | } | ||
571 | } | ||
572 | |||
573 | if (busy_one != normal) | ||
574 | /* mark the busy alternate as not in-use */ | ||
575 | hmaster->inuse_map &= ~(1 << (busy_one - UV_CPUS_PER_AS)); | ||
576 | |||
577 | if (selected >= 0) { | ||
578 | /* switch to the selected descriptor */ | ||
579 | if (selected != normal) { | ||
580 | /* set the selected alternate as in-use */ | ||
581 | hmaster->inuse_map |= | ||
582 | (1 << (selected - UV_CPUS_PER_AS)); | ||
583 | if (selected > stat->s_uv2_wars_hw) | ||
584 | stat->s_uv2_wars_hw = selected; | ||
585 | } | ||
586 | bau_desc_old = bcp->descriptor_base; | ||
587 | bau_desc_old += (ITEMS_PER_DESC * busy_one); | ||
588 | bcp->using_desc = selected; | ||
589 | bau_desc_new = bcp->descriptor_base; | ||
590 | bau_desc_new += (ITEMS_PER_DESC * selected); | ||
591 | *bau_desc_new = *bau_desc_old; | ||
592 | } else { | ||
593 | /* | ||
594 | * All are busy. Wait for the normal one for this cpu to | ||
595 | * free up. | ||
596 | */ | ||
597 | stat->s_uv2_war_waits++; | ||
598 | spin_unlock(&hmaster->uvhub_lock); | ||
599 | ttm = get_cycles(); | ||
600 | do { | ||
601 | cpu_relax(); | ||
602 | } while (normal_busy(bcp)); | ||
603 | spin_lock(&hmaster->uvhub_lock); | ||
604 | /* switch to the original descriptor */ | ||
605 | bcp->using_desc = normal; | ||
606 | bau_desc_old = bcp->descriptor_base; | ||
607 | bau_desc_old += (ITEMS_PER_DESC * bcp->using_desc); | ||
608 | bcp->using_desc = (ITEMS_PER_DESC * normal); | ||
609 | bau_desc_new = bcp->descriptor_base; | ||
610 | bau_desc_new += (ITEMS_PER_DESC * normal); | ||
611 | *bau_desc_new = *bau_desc_old; /* copy the entire descriptor */ | ||
612 | } | ||
613 | spin_unlock(&hmaster->uvhub_lock); | ||
614 | return FLUSH_RETRY_BUSYBUG; | ||
615 | } | 573 | } |
616 | 574 | ||
617 | static int uv2_wait_completion(struct bau_desc *bau_desc, | 575 | static int uv2_wait_completion(struct bau_desc *bau_desc, |
@@ -620,7 +578,7 @@ static int uv2_wait_completion(struct bau_desc *bau_desc, | |||
620 | { | 578 | { |
621 | unsigned long descriptor_stat; | 579 | unsigned long descriptor_stat; |
622 | cycles_t ttm; | 580 | cycles_t ttm; |
623 | int desc = bcp->using_desc; | 581 | int desc = bcp->uvhub_cpu; |
624 | long busy_reps = 0; | 582 | long busy_reps = 0; |
625 | struct ptc_stats *stat = bcp->statp; | 583 | struct ptc_stats *stat = bcp->statp; |
626 | 584 | ||
@@ -628,24 +586,38 @@ static int uv2_wait_completion(struct bau_desc *bau_desc, | |||
628 | 586 | ||
629 | /* spin on the status MMR, waiting for it to go idle */ | 587 | /* spin on the status MMR, waiting for it to go idle */ |
630 | while (descriptor_stat != UV2H_DESC_IDLE) { | 588 | while (descriptor_stat != UV2H_DESC_IDLE) { |
631 | /* | 589 | if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) { |
632 | * Our software ack messages may be blocked because | 590 | /* |
633 | * there are no swack resources available. As long | 591 | * A h/w bug on the destination side may |
634 | * as none of them has timed out hardware will NACK | 592 | * have prevented the message being marked |
635 | * our message and its state will stay IDLE. | 593 | * pending, thus it doesn't get replied to |
636 | */ | 594 | * and gets continually nacked until it times |
637 | if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) || | 595 | * out with a SOURCE_TIMEOUT. |
638 | (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) { | 596 | */ |
639 | stat->s_stimeout++; | 597 | stat->s_stimeout++; |
640 | return FLUSH_GIVEUP; | 598 | return FLUSH_GIVEUP; |
641 | } else if (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) { | ||
642 | stat->s_strongnacks++; | ||
643 | bcp->conseccompletes = 0; | ||
644 | return FLUSH_GIVEUP; | ||
645 | } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { | 599 | } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { |
600 | ttm = get_cycles(); | ||
601 | |||
602 | /* | ||
603 | * Our retries may be blocked by all destination | ||
604 | * swack resources being consumed, and a timeout | ||
605 | * pending. In that case hardware returns the | ||
606 | * ERROR that looks like a destination timeout. | ||
607 | * Without using the extended status we have to | ||
608 | * deduce from the short time that this was a | ||
609 | * strong nack. | ||
610 | */ | ||
611 | if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { | ||
612 | bcp->conseccompletes = 0; | ||
613 | stat->s_plugged++; | ||
614 | /* FLUSH_RETRY_PLUGGED causes hang on boot */ | ||
615 | return FLUSH_GIVEUP; | ||
616 | } | ||
646 | stat->s_dtimeout++; | 617 | stat->s_dtimeout++; |
647 | bcp->conseccompletes = 0; | 618 | bcp->conseccompletes = 0; |
648 | return FLUSH_RETRY_TIMEOUT; | 619 | /* FLUSH_RETRY_TIMEOUT causes hang on boot */ |
620 | return FLUSH_GIVEUP; | ||
649 | } else { | 621 | } else { |
650 | busy_reps++; | 622 | busy_reps++; |
651 | if (busy_reps > 1000000) { | 623 | if (busy_reps > 1000000) { |
@@ -653,9 +625,8 @@ static int uv2_wait_completion(struct bau_desc *bau_desc, | |||
653 | busy_reps = 0; | 625 | busy_reps = 0; |
654 | ttm = get_cycles(); | 626 | ttm = get_cycles(); |
655 | if ((ttm - bcp->send_message) > | 627 | if ((ttm - bcp->send_message) > |
656 | (bcp->clocks_per_100_usec)) { | 628 | bcp->timeout_interval) |
657 | return handle_uv2_busy(bcp); | 629 | return handle_uv2_busy(bcp); |
658 | } | ||
659 | } | 630 | } |
660 | /* | 631 | /* |
661 | * descriptor_stat is still BUSY | 632 | * descriptor_stat is still BUSY |
@@ -679,7 +650,7 @@ static int wait_completion(struct bau_desc *bau_desc, | |||
679 | { | 650 | { |
680 | int right_shift; | 651 | int right_shift; |
681 | unsigned long mmr_offset; | 652 | unsigned long mmr_offset; |
682 | int desc = bcp->using_desc; | 653 | int desc = bcp->uvhub_cpu; |
683 | 654 | ||
684 | if (desc < UV_CPUS_PER_AS) { | 655 | if (desc < UV_CPUS_PER_AS) { |
685 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; | 656 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; |
@@ -758,33 +729,31 @@ static void destination_timeout(struct bau_desc *bau_desc, | |||
758 | } | 729 | } |
759 | 730 | ||
760 | /* | 731 | /* |
761 | * Completions are taking a very long time due to a congested numalink | 732 | * Stop all cpus on a uvhub from using the BAU for a period of time. |
762 | * network. | 733 | * This is reversed by check_enable. |
763 | */ | 734 | */ |
764 | static void disable_for_congestion(struct bau_control *bcp, | 735 | static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) |
765 | struct ptc_stats *stat) | ||
766 | { | 736 | { |
767 | /* let only one cpu do this disabling */ | 737 | int tcpu; |
768 | spin_lock(&disable_lock); | 738 | struct bau_control *tbcp; |
769 | 739 | struct bau_control *hmaster; | |
770 | if (!baudisabled && bcp->period_requests && | 740 | cycles_t tm1; |
771 | ((bcp->period_time / bcp->period_requests) > congested_cycles)) { | 741 | |
772 | int tcpu; | 742 | hmaster = bcp->uvhub_master; |
773 | struct bau_control *tbcp; | 743 | spin_lock(&hmaster->disable_lock); |
774 | /* it becomes this cpu's job to turn on the use of the | 744 | if (!bcp->baudisabled) { |
775 | BAU again */ | ||
776 | baudisabled = 1; | ||
777 | bcp->set_bau_off = 1; | ||
778 | bcp->set_bau_on_time = get_cycles(); | ||
779 | bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period); | ||
780 | stat->s_bau_disabled++; | 745 | stat->s_bau_disabled++; |
746 | tm1 = get_cycles(); | ||
781 | for_each_present_cpu(tcpu) { | 747 | for_each_present_cpu(tcpu) { |
782 | tbcp = &per_cpu(bau_control, tcpu); | 748 | tbcp = &per_cpu(bau_control, tcpu); |
783 | tbcp->baudisabled = 1; | 749 | if (tbcp->uvhub_master == hmaster) { |
750 | tbcp->baudisabled = 1; | ||
751 | tbcp->set_bau_on_time = | ||
752 | tm1 + bcp->disabled_period; | ||
753 | } | ||
784 | } | 754 | } |
785 | } | 755 | } |
786 | 756 | spin_unlock(&hmaster->disable_lock); | |
787 | spin_unlock(&disable_lock); | ||
788 | } | 757 | } |
789 | 758 | ||
790 | static void count_max_concurr(int stat, struct bau_control *bcp, | 759 | static void count_max_concurr(int stat, struct bau_control *bcp, |
@@ -815,16 +784,30 @@ static void record_send_stats(cycles_t time1, cycles_t time2, | |||
815 | bcp->period_requests++; | 784 | bcp->period_requests++; |
816 | bcp->period_time += elapsed; | 785 | bcp->period_time += elapsed; |
817 | if ((elapsed > congested_cycles) && | 786 | if ((elapsed > congested_cycles) && |
818 | (bcp->period_requests > bcp->cong_reps)) | 787 | (bcp->period_requests > bcp->cong_reps) && |
819 | disable_for_congestion(bcp, stat); | 788 | ((bcp->period_time / bcp->period_requests) > |
789 | congested_cycles)) { | ||
790 | stat->s_congested++; | ||
791 | disable_for_period(bcp, stat); | ||
792 | } | ||
820 | } | 793 | } |
821 | } else | 794 | } else |
822 | stat->s_requestor--; | 795 | stat->s_requestor--; |
823 | 796 | ||
824 | if (completion_status == FLUSH_COMPLETE && try > 1) | 797 | if (completion_status == FLUSH_COMPLETE && try > 1) |
825 | stat->s_retriesok++; | 798 | stat->s_retriesok++; |
826 | else if (completion_status == FLUSH_GIVEUP) | 799 | else if (completion_status == FLUSH_GIVEUP) { |
827 | stat->s_giveup++; | 800 | stat->s_giveup++; |
801 | if (get_cycles() > bcp->period_end) | ||
802 | bcp->period_giveups = 0; | ||
803 | bcp->period_giveups++; | ||
804 | if (bcp->period_giveups == 1) | ||
805 | bcp->period_end = get_cycles() + bcp->disabled_period; | ||
806 | if (bcp->period_giveups > bcp->giveup_limit) { | ||
807 | disable_for_period(bcp, stat); | ||
808 | stat->s_giveuplimit++; | ||
809 | } | ||
810 | } | ||
828 | } | 811 | } |
829 | 812 | ||
830 | /* | 813 | /* |
@@ -868,7 +851,8 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc, | |||
868 | * Returns 1 if it gives up entirely and the original cpu mask is to be | 851 | * Returns 1 if it gives up entirely and the original cpu mask is to be |
869 | * returned to the kernel. | 852 | * returned to the kernel. |
870 | */ | 853 | */ |
871 | int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) | 854 | int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp, |
855 | struct bau_desc *bau_desc) | ||
872 | { | 856 | { |
873 | int seq_number = 0; | 857 | int seq_number = 0; |
874 | int completion_stat = 0; | 858 | int completion_stat = 0; |
@@ -881,24 +865,23 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) | |||
881 | struct bau_control *hmaster = bcp->uvhub_master; | 865 | struct bau_control *hmaster = bcp->uvhub_master; |
882 | struct uv1_bau_msg_header *uv1_hdr = NULL; | 866 | struct uv1_bau_msg_header *uv1_hdr = NULL; |
883 | struct uv2_bau_msg_header *uv2_hdr = NULL; | 867 | struct uv2_bau_msg_header *uv2_hdr = NULL; |
884 | struct bau_desc *bau_desc; | ||
885 | 868 | ||
886 | if (bcp->uvhub_version == 1) | 869 | if (bcp->uvhub_version == 1) { |
870 | uv1 = 1; | ||
887 | uv1_throttle(hmaster, stat); | 871 | uv1_throttle(hmaster, stat); |
872 | } | ||
888 | 873 | ||
889 | while (hmaster->uvhub_quiesce) | 874 | while (hmaster->uvhub_quiesce) |
890 | cpu_relax(); | 875 | cpu_relax(); |
891 | 876 | ||
892 | time1 = get_cycles(); | 877 | time1 = get_cycles(); |
878 | if (uv1) | ||
879 | uv1_hdr = &bau_desc->header.uv1_hdr; | ||
880 | else | ||
881 | uv2_hdr = &bau_desc->header.uv2_hdr; | ||
882 | |||
893 | do { | 883 | do { |
894 | bau_desc = bcp->descriptor_base; | 884 | if (try == 0) { |
895 | bau_desc += (ITEMS_PER_DESC * bcp->using_desc); | ||
896 | if (bcp->uvhub_version == 1) { | ||
897 | uv1 = 1; | ||
898 | uv1_hdr = &bau_desc->header.uv1_hdr; | ||
899 | } else | ||
900 | uv2_hdr = &bau_desc->header.uv2_hdr; | ||
901 | if ((try == 0) || (completion_stat == FLUSH_RETRY_BUSYBUG)) { | ||
902 | if (uv1) | 885 | if (uv1) |
903 | uv1_hdr->msg_type = MSG_REGULAR; | 886 | uv1_hdr->msg_type = MSG_REGULAR; |
904 | else | 887 | else |
@@ -916,25 +899,24 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) | |||
916 | uv1_hdr->sequence = seq_number; | 899 | uv1_hdr->sequence = seq_number; |
917 | else | 900 | else |
918 | uv2_hdr->sequence = seq_number; | 901 | uv2_hdr->sequence = seq_number; |
919 | index = (1UL << AS_PUSH_SHIFT) | bcp->using_desc; | 902 | index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu; |
920 | bcp->send_message = get_cycles(); | 903 | bcp->send_message = get_cycles(); |
921 | 904 | ||
922 | write_mmr_activation(index); | 905 | write_mmr_activation(index); |
923 | 906 | ||
924 | try++; | 907 | try++; |
925 | completion_stat = wait_completion(bau_desc, bcp, try); | 908 | completion_stat = wait_completion(bau_desc, bcp, try); |
926 | /* UV2: wait_completion() may change the bcp->using_desc */ | ||
927 | 909 | ||
928 | handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); | 910 | handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); |
929 | 911 | ||
930 | if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { | 912 | if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { |
931 | bcp->ipi_attempts = 0; | 913 | bcp->ipi_attempts = 0; |
914 | stat->s_overipilimit++; | ||
932 | completion_stat = FLUSH_GIVEUP; | 915 | completion_stat = FLUSH_GIVEUP; |
933 | break; | 916 | break; |
934 | } | 917 | } |
935 | cpu_relax(); | 918 | cpu_relax(); |
936 | } while ((completion_stat == FLUSH_RETRY_PLUGGED) || | 919 | } while ((completion_stat == FLUSH_RETRY_PLUGGED) || |
937 | (completion_stat == FLUSH_RETRY_BUSYBUG) || | ||
938 | (completion_stat == FLUSH_RETRY_TIMEOUT)); | 920 | (completion_stat == FLUSH_RETRY_TIMEOUT)); |
939 | 921 | ||
940 | time2 = get_cycles(); | 922 | time2 = get_cycles(); |
@@ -955,28 +937,33 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) | |||
955 | } | 937 | } |
956 | 938 | ||
957 | /* | 939 | /* |
958 | * The BAU is disabled. When the disabled time period has expired, the cpu | 940 | * The BAU is disabled for this uvhub. When the disabled time period has |
959 | * that disabled it must re-enable it. | 941 | * expired re-enable it. |
960 | * Return 0 if it is re-enabled for all cpus. | 942 | * Return 0 if it is re-enabled for all cpus on this uvhub. |
961 | */ | 943 | */ |
962 | static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) | 944 | static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) |
963 | { | 945 | { |
964 | int tcpu; | 946 | int tcpu; |
965 | struct bau_control *tbcp; | 947 | struct bau_control *tbcp; |
948 | struct bau_control *hmaster; | ||
966 | 949 | ||
967 | if (bcp->set_bau_off) { | 950 | hmaster = bcp->uvhub_master; |
968 | if (get_cycles() >= bcp->set_bau_on_time) { | 951 | spin_lock(&hmaster->disable_lock); |
969 | stat->s_bau_reenabled++; | 952 | if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { |
970 | baudisabled = 0; | 953 | stat->s_bau_reenabled++; |
971 | for_each_present_cpu(tcpu) { | 954 | for_each_present_cpu(tcpu) { |
972 | tbcp = &per_cpu(bau_control, tcpu); | 955 | tbcp = &per_cpu(bau_control, tcpu); |
956 | if (tbcp->uvhub_master == hmaster) { | ||
973 | tbcp->baudisabled = 0; | 957 | tbcp->baudisabled = 0; |
974 | tbcp->period_requests = 0; | 958 | tbcp->period_requests = 0; |
975 | tbcp->period_time = 0; | 959 | tbcp->period_time = 0; |
960 | tbcp->period_giveups = 0; | ||
976 | } | 961 | } |
977 | return 0; | ||
978 | } | 962 | } |
963 | spin_unlock(&hmaster->disable_lock); | ||
964 | return 0; | ||
979 | } | 965 | } |
966 | spin_unlock(&hmaster->disable_lock); | ||
980 | return -1; | 967 | return -1; |
981 | } | 968 | } |
982 | 969 | ||
@@ -1078,18 +1065,32 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
1078 | struct cpumask *flush_mask; | 1065 | struct cpumask *flush_mask; |
1079 | struct ptc_stats *stat; | 1066 | struct ptc_stats *stat; |
1080 | struct bau_control *bcp; | 1067 | struct bau_control *bcp; |
1081 | 1068 | unsigned long descriptor_status; | |
1082 | /* kernel was booted 'nobau' */ | 1069 | unsigned long status; |
1083 | if (nobau) | ||
1084 | return cpumask; | ||
1085 | 1070 | ||
1086 | bcp = &per_cpu(bau_control, cpu); | 1071 | bcp = &per_cpu(bau_control, cpu); |
1087 | stat = bcp->statp; | 1072 | stat = bcp->statp; |
1073 | stat->s_enters++; | ||
1074 | |||
1075 | if (bcp->nobau) | ||
1076 | return cpumask; | ||
1077 | |||
1078 | if (bcp->busy) { | ||
1079 | descriptor_status = | ||
1080 | read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0); | ||
1081 | status = ((descriptor_status >> (bcp->uvhub_cpu * | ||
1082 | UV_ACT_STATUS_SIZE)) & UV_ACT_STATUS_MASK) << 1; | ||
1083 | if (status == UV2H_DESC_BUSY) | ||
1084 | return cpumask; | ||
1085 | bcp->busy = 0; | ||
1086 | } | ||
1088 | 1087 | ||
1089 | /* bau was disabled due to slow response */ | 1088 | /* bau was disabled due to slow response */ |
1090 | if (bcp->baudisabled) { | 1089 | if (bcp->baudisabled) { |
1091 | if (check_enable(bcp, stat)) | 1090 | if (check_enable(bcp, stat)) { |
1091 | stat->s_ipifordisabled++; | ||
1092 | return cpumask; | 1092 | return cpumask; |
1093 | } | ||
1093 | } | 1094 | } |
1094 | 1095 | ||
1095 | /* | 1096 | /* |
@@ -1105,7 +1106,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
1105 | stat->s_ntargself++; | 1106 | stat->s_ntargself++; |
1106 | 1107 | ||
1107 | bau_desc = bcp->descriptor_base; | 1108 | bau_desc = bcp->descriptor_base; |
1108 | bau_desc += (ITEMS_PER_DESC * bcp->using_desc); | 1109 | bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu); |
1109 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 1110 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
1110 | if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) | 1111 | if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) |
1111 | return NULL; | 1112 | return NULL; |
@@ -1118,25 +1119,27 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
1118 | * uv_flush_send_and_wait returns 0 if all cpu's were messaged, | 1119 | * uv_flush_send_and_wait returns 0 if all cpu's were messaged, |
1119 | * or 1 if it gave up and the original cpumask should be returned. | 1120 | * or 1 if it gave up and the original cpumask should be returned. |
1120 | */ | 1121 | */ |
1121 | if (!uv_flush_send_and_wait(flush_mask, bcp)) | 1122 | if (!uv_flush_send_and_wait(flush_mask, bcp, bau_desc)) |
1122 | return NULL; | 1123 | return NULL; |
1123 | else | 1124 | else |
1124 | return cpumask; | 1125 | return cpumask; |
1125 | } | 1126 | } |
1126 | 1127 | ||
1127 | /* | 1128 | /* |
1128 | * Search the message queue for any 'other' message with the same software | 1129 | * Search the message queue for any 'other' unprocessed message with the |
1129 | * acknowledge resource bit vector. | 1130 | * same software acknowledge resource bit vector as the 'msg' message. |
1130 | */ | 1131 | */ |
1131 | struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, | 1132 | struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, |
1132 | struct bau_control *bcp, unsigned char swack_vec) | 1133 | struct bau_control *bcp) |
1133 | { | 1134 | { |
1134 | struct bau_pq_entry *msg_next = msg + 1; | 1135 | struct bau_pq_entry *msg_next = msg + 1; |
1136 | unsigned char swack_vec = msg->swack_vec; | ||
1135 | 1137 | ||
1136 | if (msg_next > bcp->queue_last) | 1138 | if (msg_next > bcp->queue_last) |
1137 | msg_next = bcp->queue_first; | 1139 | msg_next = bcp->queue_first; |
1138 | while ((msg_next->swack_vec != 0) && (msg_next != msg)) { | 1140 | while (msg_next != msg) { |
1139 | if (msg_next->swack_vec == swack_vec) | 1141 | if ((msg_next->canceled == 0) && (msg_next->replied_to == 0) && |
1142 | (msg_next->swack_vec == swack_vec)) | ||
1140 | return msg_next; | 1143 | return msg_next; |
1141 | msg_next++; | 1144 | msg_next++; |
1142 | if (msg_next > bcp->queue_last) | 1145 | if (msg_next > bcp->queue_last) |
@@ -1165,32 +1168,30 @@ void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp) | |||
1165 | * This message was assigned a swack resource, but no | 1168 | * This message was assigned a swack resource, but no |
1166 | * reserved acknowlegment is pending. | 1169 | * reserved acknowlegment is pending. |
1167 | * The bug has prevented this message from setting the MMR. | 1170 | * The bug has prevented this message from setting the MMR. |
1168 | * And no other message has used the same sw_ack resource. | ||
1169 | * Do the requested shootdown but do not reply to the msg. | ||
1170 | * (the 0 means make no acknowledge) | ||
1171 | */ | 1171 | */ |
1172 | bau_process_message(mdp, bcp, 0); | ||
1173 | return; | ||
1174 | } | ||
1175 | |||
1176 | /* | ||
1177 | * Some message has set the MMR 'pending' bit; it might have been | ||
1178 | * another message. Look for that message. | ||
1179 | */ | ||
1180 | other_msg = find_another_by_swack(msg, bcp, msg->swack_vec); | ||
1181 | if (other_msg) { | ||
1182 | /* There is another. Do not ack the current one. */ | ||
1183 | bau_process_message(mdp, bcp, 0); | ||
1184 | /* | 1172 | /* |
1185 | * Let the natural processing of that message acknowledge | 1173 | * Some message has set the MMR 'pending' bit; it might have |
1186 | * it. Don't get the processing of sw_ack's out of order. | 1174 | * been another message. Look for that message. |
1187 | */ | 1175 | */ |
1188 | return; | 1176 | other_msg = find_another_by_swack(msg, bcp); |
1177 | if (other_msg) { | ||
1178 | /* | ||
1179 | * There is another. Process this one but do not | ||
1180 | * ack it. | ||
1181 | */ | ||
1182 | bau_process_message(mdp, bcp, 0); | ||
1183 | /* | ||
1184 | * Let the natural processing of that other message | ||
1185 | * acknowledge it. Don't get the processing of sw_ack's | ||
1186 | * out of order. | ||
1187 | */ | ||
1188 | return; | ||
1189 | } | ||
1189 | } | 1190 | } |
1190 | 1191 | ||
1191 | /* | 1192 | /* |
1192 | * There is no other message using this sw_ack, so it is safe to | 1193 | * Either the MMR shows this one pending a reply or there is no |
1193 | * acknowledge it. | 1194 | * other message using this sw_ack, so it is safe to acknowledge it. |
1194 | */ | 1195 | */ |
1195 | bau_process_message(mdp, bcp, 1); | 1196 | bau_process_message(mdp, bcp, 1); |
1196 | 1197 | ||
@@ -1295,8 +1296,8 @@ static void __init enable_timeouts(void) | |||
1295 | */ | 1296 | */ |
1296 | mmr_image |= (1L << SOFTACK_MSHIFT); | 1297 | mmr_image |= (1L << SOFTACK_MSHIFT); |
1297 | if (is_uv2_hub()) { | 1298 | if (is_uv2_hub()) { |
1298 | mmr_image &= ~(1L << UV2_LEG_SHFT); | 1299 | /* hw bug workaround; do not use extended status */ |
1299 | mmr_image |= (1L << UV2_EXT_SHFT); | 1300 | mmr_image &= ~(1L << UV2_EXT_SHFT); |
1300 | } | 1301 | } |
1301 | write_mmr_misc_control(pnode, mmr_image); | 1302 | write_mmr_misc_control(pnode, mmr_image); |
1302 | } | 1303 | } |
@@ -1339,29 +1340,34 @@ static inline unsigned long long usec_2_cycles(unsigned long microsec) | |||
1339 | static int ptc_seq_show(struct seq_file *file, void *data) | 1340 | static int ptc_seq_show(struct seq_file *file, void *data) |
1340 | { | 1341 | { |
1341 | struct ptc_stats *stat; | 1342 | struct ptc_stats *stat; |
1343 | struct bau_control *bcp; | ||
1342 | int cpu; | 1344 | int cpu; |
1343 | 1345 | ||
1344 | cpu = *(loff_t *)data; | 1346 | cpu = *(loff_t *)data; |
1345 | if (!cpu) { | 1347 | if (!cpu) { |
1346 | seq_printf(file, | 1348 | seq_printf(file, |
1347 | "# cpu sent stime self locals remotes ncpus localhub "); | 1349 | "# cpu bauoff sent stime self locals remotes ncpus localhub "); |
1348 | seq_printf(file, | 1350 | seq_printf(file, |
1349 | "remotehub numuvhubs numuvhubs16 numuvhubs8 "); | 1351 | "remotehub numuvhubs numuvhubs16 numuvhubs8 "); |
1350 | seq_printf(file, | 1352 | seq_printf(file, |
1351 | "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok "); | 1353 | "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries "); |
1354 | seq_printf(file, | ||
1355 | "rok resetp resett giveup sto bz throt disable "); | ||
1352 | seq_printf(file, | 1356 | seq_printf(file, |
1353 | "resetp resett giveup sto bz throt swack recv rtime "); | 1357 | "enable wars warshw warwaits enters ipidis plugged "); |
1354 | seq_printf(file, | 1358 | seq_printf(file, |
1355 | "all one mult none retry canc nocan reset rcan "); | 1359 | "ipiover glim cong swack recv rtime all one mult "); |
1356 | seq_printf(file, | 1360 | seq_printf(file, |
1357 | "disable enable wars warshw warwaits\n"); | 1361 | "none retry canc nocan reset rcan\n"); |
1358 | } | 1362 | } |
1359 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { | 1363 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { |
1360 | stat = &per_cpu(ptcstats, cpu); | 1364 | bcp = &per_cpu(bau_control, cpu); |
1365 | stat = bcp->statp; | ||
1361 | /* source side statistics */ | 1366 | /* source side statistics */ |
1362 | seq_printf(file, | 1367 | seq_printf(file, |
1363 | "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | 1368 | "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", |
1364 | cpu, stat->s_requestor, cycles_2_us(stat->s_time), | 1369 | cpu, bcp->nobau, stat->s_requestor, |
1370 | cycles_2_us(stat->s_time), | ||
1365 | stat->s_ntargself, stat->s_ntarglocals, | 1371 | stat->s_ntargself, stat->s_ntarglocals, |
1366 | stat->s_ntargremotes, stat->s_ntargcpu, | 1372 | stat->s_ntargremotes, stat->s_ntargcpu, |
1367 | stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, | 1373 | stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, |
@@ -1375,20 +1381,23 @@ static int ptc_seq_show(struct seq_file *file, void *data) | |||
1375 | stat->s_resets_plug, stat->s_resets_timeout, | 1381 | stat->s_resets_plug, stat->s_resets_timeout, |
1376 | stat->s_giveup, stat->s_stimeout, | 1382 | stat->s_giveup, stat->s_stimeout, |
1377 | stat->s_busy, stat->s_throttles); | 1383 | stat->s_busy, stat->s_throttles); |
1384 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | ||
1385 | stat->s_bau_disabled, stat->s_bau_reenabled, | ||
1386 | stat->s_uv2_wars, stat->s_uv2_wars_hw, | ||
1387 | stat->s_uv2_war_waits, stat->s_enters, | ||
1388 | stat->s_ipifordisabled, stat->s_plugged, | ||
1389 | stat->s_overipilimit, stat->s_giveuplimit, | ||
1390 | stat->s_congested); | ||
1378 | 1391 | ||
1379 | /* destination side statistics */ | 1392 | /* destination side statistics */ |
1380 | seq_printf(file, | 1393 | seq_printf(file, |
1381 | "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | 1394 | "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", |
1382 | read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), | 1395 | read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), |
1383 | stat->d_requestee, cycles_2_us(stat->d_time), | 1396 | stat->d_requestee, cycles_2_us(stat->d_time), |
1384 | stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, | 1397 | stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, |
1385 | stat->d_nomsg, stat->d_retries, stat->d_canceled, | 1398 | stat->d_nomsg, stat->d_retries, stat->d_canceled, |
1386 | stat->d_nocanceled, stat->d_resets, | 1399 | stat->d_nocanceled, stat->d_resets, |
1387 | stat->d_rcanceled); | 1400 | stat->d_rcanceled); |
1388 | seq_printf(file, "%ld %ld %ld %ld %ld\n", | ||
1389 | stat->s_bau_disabled, stat->s_bau_reenabled, | ||
1390 | stat->s_uv2_wars, stat->s_uv2_wars_hw, | ||
1391 | stat->s_uv2_war_waits); | ||
1392 | } | 1401 | } |
1393 | return 0; | 1402 | return 0; |
1394 | } | 1403 | } |
@@ -1402,13 +1411,14 @@ static ssize_t tunables_read(struct file *file, char __user *userbuf, | |||
1402 | char *buf; | 1411 | char *buf; |
1403 | int ret; | 1412 | int ret; |
1404 | 1413 | ||
1405 | buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", | 1414 | buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d %d\n", |
1406 | "max_concur plugged_delay plugsb4reset", | 1415 | "max_concur plugged_delay plugsb4reset timeoutsb4reset", |
1407 | "timeoutsb4reset ipi_reset_limit complete_threshold", | 1416 | "ipi_reset_limit complete_threshold congested_response_us", |
1408 | "congested_response_us congested_reps congested_period", | 1417 | "congested_reps disabled_period giveup_limit", |
1409 | max_concurr, plugged_delay, plugsb4reset, | 1418 | max_concurr, plugged_delay, plugsb4reset, |
1410 | timeoutsb4reset, ipi_reset_limit, complete_threshold, | 1419 | timeoutsb4reset, ipi_reset_limit, complete_threshold, |
1411 | congested_respns_us, congested_reps, congested_period); | 1420 | congested_respns_us, congested_reps, disabled_period, |
1421 | giveup_limit); | ||
1412 | 1422 | ||
1413 | if (!buf) | 1423 | if (!buf) |
1414 | return -ENOMEM; | 1424 | return -ENOMEM; |
@@ -1439,6 +1449,14 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user, | |||
1439 | return -EFAULT; | 1449 | return -EFAULT; |
1440 | optstr[count - 1] = '\0'; | 1450 | optstr[count - 1] = '\0'; |
1441 | 1451 | ||
1452 | if (!strcmp(optstr, "on")) { | ||
1453 | set_bau_on(); | ||
1454 | return count; | ||
1455 | } else if (!strcmp(optstr, "off")) { | ||
1456 | set_bau_off(); | ||
1457 | return count; | ||
1458 | } | ||
1459 | |||
1442 | if (strict_strtol(optstr, 10, &input_arg) < 0) { | 1460 | if (strict_strtol(optstr, 10, &input_arg) < 0) { |
1443 | printk(KERN_DEBUG "%s is invalid\n", optstr); | 1461 | printk(KERN_DEBUG "%s is invalid\n", optstr); |
1444 | return -EINVAL; | 1462 | return -EINVAL; |
@@ -1571,7 +1589,8 @@ static ssize_t tunables_write(struct file *file, const char __user *user, | |||
1571 | bcp->complete_threshold = complete_threshold; | 1589 | bcp->complete_threshold = complete_threshold; |
1572 | bcp->cong_response_us = congested_respns_us; | 1590 | bcp->cong_response_us = congested_respns_us; |
1573 | bcp->cong_reps = congested_reps; | 1591 | bcp->cong_reps = congested_reps; |
1574 | bcp->cong_period = congested_period; | 1592 | bcp->disabled_period = sec_2_cycles(disabled_period); |
1593 | bcp->giveup_limit = giveup_limit; | ||
1575 | } | 1594 | } |
1576 | return count; | 1595 | return count; |
1577 | } | 1596 | } |
@@ -1700,6 +1719,10 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode) | |||
1700 | * fairness chaining multilevel count replied_to | 1719 | * fairness chaining multilevel count replied_to |
1701 | */ | 1720 | */ |
1702 | } else { | 1721 | } else { |
1722 | /* | ||
1723 | * BIOS uses legacy mode, but UV2 hardware always | ||
1724 | * uses native mode for selective broadcasts. | ||
1725 | */ | ||
1703 | uv2_hdr = &bd2->header.uv2_hdr; | 1726 | uv2_hdr = &bd2->header.uv2_hdr; |
1704 | uv2_hdr->swack_flag = 1; | 1727 | uv2_hdr->swack_flag = 1; |
1705 | uv2_hdr->base_dest_nasid = | 1728 | uv2_hdr->base_dest_nasid = |
@@ -1812,8 +1835,8 @@ static int calculate_destination_timeout(void) | |||
1812 | index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; | 1835 | index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; |
1813 | mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); | 1836 | mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); |
1814 | mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; | 1837 | mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; |
1815 | base = timeout_base_ns[index]; | 1838 | ts_ns = timeout_base_ns[index]; |
1816 | ts_ns = base * mult1 * mult2; | 1839 | ts_ns *= (mult1 * mult2); |
1817 | ret = ts_ns / 1000; | 1840 | ret = ts_ns / 1000; |
1818 | } else { | 1841 | } else { |
1819 | /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */ | 1842 | /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */ |
@@ -1837,6 +1860,8 @@ static void __init init_per_cpu_tunables(void) | |||
1837 | for_each_present_cpu(cpu) { | 1860 | for_each_present_cpu(cpu) { |
1838 | bcp = &per_cpu(bau_control, cpu); | 1861 | bcp = &per_cpu(bau_control, cpu); |
1839 | bcp->baudisabled = 0; | 1862 | bcp->baudisabled = 0; |
1863 | if (nobau) | ||
1864 | bcp->nobau = 1; | ||
1840 | bcp->statp = &per_cpu(ptcstats, cpu); | 1865 | bcp->statp = &per_cpu(ptcstats, cpu); |
1841 | /* time interval to catch a hardware stay-busy bug */ | 1866 | /* time interval to catch a hardware stay-busy bug */ |
1842 | bcp->timeout_interval = usec_2_cycles(2*timeout_us); | 1867 | bcp->timeout_interval = usec_2_cycles(2*timeout_us); |
@@ -1849,10 +1874,11 @@ static void __init init_per_cpu_tunables(void) | |||
1849 | bcp->complete_threshold = complete_threshold; | 1874 | bcp->complete_threshold = complete_threshold; |
1850 | bcp->cong_response_us = congested_respns_us; | 1875 | bcp->cong_response_us = congested_respns_us; |
1851 | bcp->cong_reps = congested_reps; | 1876 | bcp->cong_reps = congested_reps; |
1852 | bcp->cong_period = congested_period; | 1877 | bcp->disabled_period = sec_2_cycles(disabled_period); |
1853 | bcp->clocks_per_100_usec = usec_2_cycles(100); | 1878 | bcp->giveup_limit = giveup_limit; |
1854 | spin_lock_init(&bcp->queue_lock); | 1879 | spin_lock_init(&bcp->queue_lock); |
1855 | spin_lock_init(&bcp->uvhub_lock); | 1880 | spin_lock_init(&bcp->uvhub_lock); |
1881 | spin_lock_init(&bcp->disable_lock); | ||
1856 | } | 1882 | } |
1857 | } | 1883 | } |
1858 | 1884 | ||
@@ -1973,7 +1999,6 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp, | |||
1973 | } | 1999 | } |
1974 | bcp->uvhub_master = *hmasterp; | 2000 | bcp->uvhub_master = *hmasterp; |
1975 | bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; | 2001 | bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; |
1976 | bcp->using_desc = bcp->uvhub_cpu; | ||
1977 | if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { | 2002 | if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { |
1978 | printk(KERN_EMERG "%d cpus per uvhub invalid\n", | 2003 | printk(KERN_EMERG "%d cpus per uvhub invalid\n", |
1979 | bcp->uvhub_cpu); | 2004 | bcp->uvhub_cpu); |
@@ -2070,16 +2095,12 @@ static int __init uv_bau_init(void) | |||
2070 | if (!is_uv_system()) | 2095 | if (!is_uv_system()) |
2071 | return 0; | 2096 | return 0; |
2072 | 2097 | ||
2073 | if (nobau) | ||
2074 | return 0; | ||
2075 | |||
2076 | for_each_possible_cpu(cur_cpu) { | 2098 | for_each_possible_cpu(cur_cpu) { |
2077 | mask = &per_cpu(uv_flush_tlb_mask, cur_cpu); | 2099 | mask = &per_cpu(uv_flush_tlb_mask, cur_cpu); |
2078 | zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); | 2100 | zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); |
2079 | } | 2101 | } |
2080 | 2102 | ||
2081 | nuvhubs = uv_num_possible_blades(); | 2103 | nuvhubs = uv_num_possible_blades(); |
2082 | spin_lock_init(&disable_lock); | ||
2083 | congested_cycles = usec_2_cycles(congested_respns_us); | 2104 | congested_cycles = usec_2_cycles(congested_respns_us); |
2084 | 2105 | ||
2085 | uv_base_pnode = 0x7fffffff; | 2106 | uv_base_pnode = 0x7fffffff; |
@@ -2092,7 +2113,8 @@ static int __init uv_bau_init(void) | |||
2092 | enable_timeouts(); | 2113 | enable_timeouts(); |
2093 | 2114 | ||
2094 | if (init_per_cpu(nuvhubs, uv_base_pnode)) { | 2115 | if (init_per_cpu(nuvhubs, uv_base_pnode)) { |
2095 | nobau = 1; | 2116 | set_bau_off(); |
2117 | nobau_perm = 1; | ||
2096 | return 0; | 2118 | return 0; |
2097 | } | 2119 | } |
2098 | 2120 | ||
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index a22c41656b5..acf7752da95 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c | |||
@@ -135,6 +135,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
135 | unsigned long mmr_value; | 135 | unsigned long mmr_value; |
136 | struct uv_IO_APIC_route_entry *entry; | 136 | struct uv_IO_APIC_route_entry *entry; |
137 | int mmr_pnode, err; | 137 | int mmr_pnode, err; |
138 | unsigned int dest; | ||
138 | 139 | ||
139 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != | 140 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != |
140 | sizeof(unsigned long)); | 141 | sizeof(unsigned long)); |
@@ -143,6 +144,10 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
143 | if (err != 0) | 144 | if (err != 0) |
144 | return err; | 145 | return err; |
145 | 146 | ||
147 | err = apic->cpu_mask_to_apicid_and(eligible_cpu, eligible_cpu, &dest); | ||
148 | if (err != 0) | ||
149 | return err; | ||
150 | |||
146 | if (limit == UV_AFFINITY_CPU) | 151 | if (limit == UV_AFFINITY_CPU) |
147 | irq_set_status_flags(irq, IRQ_NO_BALANCING); | 152 | irq_set_status_flags(irq, IRQ_NO_BALANCING); |
148 | else | 153 | else |
@@ -159,7 +164,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
159 | entry->polarity = 0; | 164 | entry->polarity = 0; |
160 | entry->trigger = 0; | 165 | entry->trigger = 0; |
161 | entry->mask = 0; | 166 | entry->mask = 0; |
162 | entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); | 167 | entry->dest = dest; |
163 | 168 | ||
164 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | 169 | mmr_pnode = uv_blade_to_pnode(mmr_blade); |
165 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | 170 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); |
diff --git a/arch/x86/realmode/Makefile b/arch/x86/realmode/Makefile new file mode 100644 index 00000000000..94f7fbe97b0 --- /dev/null +++ b/arch/x86/realmode/Makefile | |||
@@ -0,0 +1,18 @@ | |||
1 | # | ||
2 | # arch/x86/realmode/Makefile | ||
3 | # | ||
4 | # This file is subject to the terms and conditions of the GNU General Public | ||
5 | # License. See the file "COPYING" in the main directory of this archive | ||
6 | # for more details. | ||
7 | # | ||
8 | # | ||
9 | |||
10 | subdir- := rm | ||
11 | |||
12 | obj-y += init.o | ||
13 | obj-y += rmpiggy.o | ||
14 | |||
15 | $(obj)/rmpiggy.o: $(obj)/rm/realmode.bin | ||
16 | |||
17 | $(obj)/rm/realmode.bin: FORCE | ||
18 | $(Q)$(MAKE) $(build)=$(obj)/rm $@ | ||
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c new file mode 100644 index 00000000000..cbca565af5b --- /dev/null +++ b/arch/x86/realmode/init.c | |||
@@ -0,0 +1,115 @@ | |||
1 | #include <linux/io.h> | ||
2 | #include <linux/memblock.h> | ||
3 | |||
4 | #include <asm/cacheflush.h> | ||
5 | #include <asm/pgtable.h> | ||
6 | #include <asm/realmode.h> | ||
7 | |||
8 | struct real_mode_header *real_mode_header; | ||
9 | u32 *trampoline_cr4_features; | ||
10 | |||
11 | void __init setup_real_mode(void) | ||
12 | { | ||
13 | phys_addr_t mem; | ||
14 | u16 real_mode_seg; | ||
15 | u32 *rel; | ||
16 | u32 count; | ||
17 | u32 *ptr; | ||
18 | u16 *seg; | ||
19 | int i; | ||
20 | unsigned char *base; | ||
21 | struct trampoline_header *trampoline_header; | ||
22 | size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); | ||
23 | #ifdef CONFIG_X86_64 | ||
24 | u64 *trampoline_pgd; | ||
25 | u64 efer; | ||
26 | #endif | ||
27 | |||
28 | /* Has to be in very low memory so we can execute real-mode AP code. */ | ||
29 | mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); | ||
30 | if (!mem) | ||
31 | panic("Cannot allocate trampoline\n"); | ||
32 | |||
33 | base = __va(mem); | ||
34 | memblock_reserve(mem, size); | ||
35 | real_mode_header = (struct real_mode_header *) base; | ||
36 | printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", | ||
37 | base, (unsigned long long)mem, size); | ||
38 | |||
39 | memcpy(base, real_mode_blob, size); | ||
40 | |||
41 | real_mode_seg = __pa(base) >> 4; | ||
42 | rel = (u32 *) real_mode_relocs; | ||
43 | |||
44 | /* 16-bit segment relocations. */ | ||
45 | count = rel[0]; | ||
46 | rel = &rel[1]; | ||
47 | for (i = 0; i < count; i++) { | ||
48 | seg = (u16 *) (base + rel[i]); | ||
49 | *seg = real_mode_seg; | ||
50 | } | ||
51 | |||
52 | /* 32-bit linear relocations. */ | ||
53 | count = rel[i]; | ||
54 | rel = &rel[i + 1]; | ||
55 | for (i = 0; i < count; i++) { | ||
56 | ptr = (u32 *) (base + rel[i]); | ||
57 | *ptr += __pa(base); | ||
58 | } | ||
59 | |||
60 | /* Must be perfomed *after* relocation. */ | ||
61 | trampoline_header = (struct trampoline_header *) | ||
62 | __va(real_mode_header->trampoline_header); | ||
63 | |||
64 | #ifdef CONFIG_X86_32 | ||
65 | trampoline_header->start = __pa(startup_32_smp); | ||
66 | trampoline_header->gdt_limit = __BOOT_DS + 7; | ||
67 | trampoline_header->gdt_base = __pa(boot_gdt); | ||
68 | #else | ||
69 | /* | ||
70 | * Some AMD processors will #GP(0) if EFER.LMA is set in WRMSR | ||
71 | * so we need to mask it out. | ||
72 | */ | ||
73 | rdmsrl(MSR_EFER, efer); | ||
74 | trampoline_header->efer = efer & ~EFER_LMA; | ||
75 | |||
76 | trampoline_header->start = (u64) secondary_startup_64; | ||
77 | trampoline_cr4_features = &trampoline_header->cr4; | ||
78 | *trampoline_cr4_features = read_cr4(); | ||
79 | |||
80 | trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); | ||
81 | trampoline_pgd[0] = __pa(level3_ident_pgt) + _KERNPG_TABLE; | ||
82 | trampoline_pgd[511] = __pa(level3_kernel_pgt) + _KERNPG_TABLE; | ||
83 | #endif | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * set_real_mode_permissions() gets called very early, to guarantee the | ||
88 | * availability of low memory. This is before the proper kernel page | ||
89 | * tables are set up, so we cannot set page permissions in that | ||
90 | * function. Thus, we use an arch_initcall instead. | ||
91 | */ | ||
92 | static int __init set_real_mode_permissions(void) | ||
93 | { | ||
94 | unsigned char *base = (unsigned char *) real_mode_header; | ||
95 | size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); | ||
96 | |||
97 | size_t ro_size = | ||
98 | PAGE_ALIGN(real_mode_header->ro_end) - | ||
99 | __pa(base); | ||
100 | |||
101 | size_t text_size = | ||
102 | PAGE_ALIGN(real_mode_header->ro_end) - | ||
103 | real_mode_header->text_start; | ||
104 | |||
105 | unsigned long text_start = | ||
106 | (unsigned long) __va(real_mode_header->text_start); | ||
107 | |||
108 | set_memory_nx((unsigned long) base, size >> PAGE_SHIFT); | ||
109 | set_memory_ro((unsigned long) base, ro_size >> PAGE_SHIFT); | ||
110 | set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT); | ||
111 | |||
112 | return 0; | ||
113 | } | ||
114 | |||
115 | arch_initcall(set_real_mode_permissions); | ||
diff --git a/arch/x86/realmode/rm/.gitignore b/arch/x86/realmode/rm/.gitignore new file mode 100644 index 00000000000..b6ed3a2555c --- /dev/null +++ b/arch/x86/realmode/rm/.gitignore | |||
@@ -0,0 +1,3 @@ | |||
1 | pasyms.h | ||
2 | realmode.lds | ||
3 | realmode.relocs | ||
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile new file mode 100644 index 00000000000..b2d534cab25 --- /dev/null +++ b/arch/x86/realmode/rm/Makefile | |||
@@ -0,0 +1,82 @@ | |||
1 | # | ||
2 | # arch/x86/realmode/Makefile | ||
3 | # | ||
4 | # This file is subject to the terms and conditions of the GNU General Public | ||
5 | # License. See the file "COPYING" in the main directory of this archive | ||
6 | # for more details. | ||
7 | # | ||
8 | # | ||
9 | |||
10 | always := realmode.bin realmode.relocs | ||
11 | |||
12 | wakeup-objs := wakeup_asm.o wakemain.o video-mode.o | ||
13 | wakeup-objs += copy.o bioscall.o regs.o | ||
14 | # The link order of the video-*.o modules can matter. In particular, | ||
15 | # video-vga.o *must* be listed first, followed by video-vesa.o. | ||
16 | # Hardware-specific drivers should follow in the order they should be | ||
17 | # probed, and video-bios.o should typically be last. | ||
18 | wakeup-objs += video-vga.o | ||
19 | wakeup-objs += video-vesa.o | ||
20 | wakeup-objs += video-bios.o | ||
21 | |||
22 | realmode-y += header.o | ||
23 | realmode-y += trampoline_$(BITS).o | ||
24 | realmode-y += stack.o | ||
25 | realmode-y += reboot.o | ||
26 | realmode-$(CONFIG_ACPI_SLEEP) += $(wakeup-objs) | ||
27 | |||
28 | targets += $(realmode-y) | ||
29 | |||
30 | REALMODE_OBJS = $(addprefix $(obj)/,$(realmode-y)) | ||
31 | |||
32 | sed-pasyms := -n -r -e 's/^([0-9a-fA-F]+) [ABCDGRSTVW] (.+)$$/pa_\2 = \2;/p' | ||
33 | |||
34 | quiet_cmd_pasyms = PASYMS $@ | ||
35 | cmd_pasyms = $(NM) $(filter-out FORCE,$^) | \ | ||
36 | sed $(sed-pasyms) | sort | uniq > $@ | ||
37 | |||
38 | targets += pasyms.h | ||
39 | $(obj)/pasyms.h: $(REALMODE_OBJS) FORCE | ||
40 | $(call if_changed,pasyms) | ||
41 | |||
42 | targets += realmode.lds | ||
43 | $(obj)/realmode.lds: $(obj)/pasyms.h | ||
44 | |||
45 | LDFLAGS_realmode.elf := --emit-relocs -T | ||
46 | CPPFLAGS_realmode.lds += -P -C -I$(obj) | ||
47 | |||
48 | targets += realmode.elf | ||
49 | $(obj)/realmode.elf: $(obj)/realmode.lds $(REALMODE_OBJS) FORCE | ||
50 | $(call if_changed,ld) | ||
51 | |||
52 | OBJCOPYFLAGS_realmode.bin := -O binary | ||
53 | |||
54 | targets += realmode.bin | ||
55 | $(obj)/realmode.bin: $(obj)/realmode.elf $(obj)/realmode.relocs | ||
56 | $(call if_changed,objcopy) | ||
57 | |||
58 | quiet_cmd_relocs = RELOCS $@ | ||
59 | cmd_relocs = arch/x86/tools/relocs --realmode $< > $@ | ||
60 | |||
61 | targets += realmode.relocs | ||
62 | $(obj)/realmode.relocs: $(obj)/realmode.elf FORCE | ||
63 | $(call if_changed,relocs) | ||
64 | |||
65 | # --------------------------------------------------------------------------- | ||
66 | |||
67 | # How to compile the 16-bit code. Note we always compile for -march=i386, | ||
68 | # that way we can complain to the user if the CPU is insufficient. | ||
69 | KBUILD_CFLAGS := $(LINUXINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ -D_WAKEUP \ | ||
70 | -I$(srctree)/arch/x86/boot \ | ||
71 | -DDISABLE_BRANCH_PROFILING \ | ||
72 | -Wall -Wstrict-prototypes \ | ||
73 | -march=i386 -mregparm=3 \ | ||
74 | -include $(srctree)/$(src)/../../boot/code16gcc.h \ | ||
75 | -fno-strict-aliasing -fomit-frame-pointer \ | ||
76 | $(call cc-option, -ffreestanding) \ | ||
77 | $(call cc-option, -fno-toplevel-reorder,\ | ||
78 | $(call cc-option, -fno-unit-at-a-time)) \ | ||
79 | $(call cc-option, -fno-stack-protector) \ | ||
80 | $(call cc-option, -mpreferred-stack-boundary=2) | ||
81 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ | ||
82 | GCOV_PROFILE := n | ||
diff --git a/arch/x86/realmode/rm/bioscall.S b/arch/x86/realmode/rm/bioscall.S new file mode 100644 index 00000000000..16162d19791 --- /dev/null +++ b/arch/x86/realmode/rm/bioscall.S | |||
@@ -0,0 +1 @@ | |||
#include "../../boot/bioscall.S" | |||
diff --git a/arch/x86/realmode/rm/copy.S b/arch/x86/realmode/rm/copy.S new file mode 100644 index 00000000000..b785e6f38fd --- /dev/null +++ b/arch/x86/realmode/rm/copy.S | |||
@@ -0,0 +1 @@ | |||
#include "../../boot/copy.S" | |||
diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S new file mode 100644 index 00000000000..a28221d94e6 --- /dev/null +++ b/arch/x86/realmode/rm/header.S | |||
@@ -0,0 +1,43 @@ | |||
1 | /* | ||
2 | * Real-mode blob header; this should match realmode.h and be | ||
3 | * readonly; for mutable data instead add pointers into the .data | ||
4 | * or .bss sections as appropriate. | ||
5 | */ | ||
6 | |||
7 | #include <linux/linkage.h> | ||
8 | #include <asm/page_types.h> | ||
9 | #include <asm/segment.h> | ||
10 | |||
11 | #include "realmode.h" | ||
12 | |||
13 | .section ".header", "a" | ||
14 | |||
15 | .balign 16 | ||
16 | GLOBAL(real_mode_header) | ||
17 | .long pa_text_start | ||
18 | .long pa_ro_end | ||
19 | /* SMP trampoline */ | ||
20 | .long pa_trampoline_start | ||
21 | .long pa_trampoline_status | ||
22 | .long pa_trampoline_header | ||
23 | #ifdef CONFIG_X86_64 | ||
24 | .long pa_trampoline_pgd; | ||
25 | #endif | ||
26 | /* ACPI S3 wakeup */ | ||
27 | #ifdef CONFIG_ACPI_SLEEP | ||
28 | .long pa_wakeup_start | ||
29 | .long pa_wakeup_header | ||
30 | #endif | ||
31 | /* APM/BIOS reboot */ | ||
32 | .long pa_machine_real_restart_asm | ||
33 | #ifdef CONFIG_X86_64 | ||
34 | .long __KERNEL32_CS | ||
35 | #endif | ||
36 | END(real_mode_header) | ||
37 | |||
38 | /* End signature, used to verify integrity */ | ||
39 | .section ".signature","a" | ||
40 | .balign 4 | ||
41 | GLOBAL(end_signature) | ||
42 | .long REALMODE_END_SIGNATURE | ||
43 | END(end_signature) | ||
diff --git a/arch/x86/realmode/rm/realmode.h b/arch/x86/realmode/rm/realmode.h new file mode 100644 index 00000000000..d74cff6350e --- /dev/null +++ b/arch/x86/realmode/rm/realmode.h | |||
@@ -0,0 +1,21 @@ | |||
1 | #ifndef ARCH_X86_REALMODE_RM_REALMODE_H | ||
2 | #define ARCH_X86_REALMODE_RM_REALMODE_H | ||
3 | |||
4 | #ifdef __ASSEMBLY__ | ||
5 | |||
6 | /* | ||
7 | * 16-bit ljmpw to the real_mode_seg | ||
8 | * | ||
9 | * This must be open-coded since gas will choke on using a | ||
10 | * relocatable symbol for the segment portion. | ||
11 | */ | ||
12 | #define LJMPW_RM(to) .byte 0xea ; .word (to), real_mode_seg | ||
13 | |||
14 | #endif /* __ASSEMBLY__ */ | ||
15 | |||
16 | /* | ||
17 | * Signature at the end of the realmode region | ||
18 | */ | ||
19 | #define REALMODE_END_SIGNATURE 0x65a22c82 | ||
20 | |||
21 | #endif /* ARCH_X86_REALMODE_RM_REALMODE_H */ | ||
diff --git a/arch/x86/realmode/rm/realmode.lds.S b/arch/x86/realmode/rm/realmode.lds.S new file mode 100644 index 00000000000..86b2e8d6b1f --- /dev/null +++ b/arch/x86/realmode/rm/realmode.lds.S | |||
@@ -0,0 +1,76 @@ | |||
1 | /* | ||
2 | * realmode.lds.S | ||
3 | * | ||
4 | * Linker script for the real-mode code | ||
5 | */ | ||
6 | |||
7 | #include <asm/page_types.h> | ||
8 | |||
9 | #undef i386 | ||
10 | |||
11 | OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") | ||
12 | OUTPUT_ARCH(i386) | ||
13 | |||
14 | SECTIONS | ||
15 | { | ||
16 | real_mode_seg = 0; | ||
17 | |||
18 | . = 0; | ||
19 | .header : { | ||
20 | pa_real_mode_base = .; | ||
21 | *(.header) | ||
22 | } | ||
23 | |||
24 | . = ALIGN(4); | ||
25 | .rodata : { | ||
26 | *(.rodata) | ||
27 | *(.rodata.*) | ||
28 | . = ALIGN(16); | ||
29 | video_cards = .; | ||
30 | *(.videocards) | ||
31 | video_cards_end = .; | ||
32 | } | ||
33 | |||
34 | . = ALIGN(PAGE_SIZE); | ||
35 | pa_text_start = .; | ||
36 | .text : { | ||
37 | *(.text) | ||
38 | *(.text.*) | ||
39 | } | ||
40 | |||
41 | .text32 : { | ||
42 | *(.text32) | ||
43 | *(.text32.*) | ||
44 | } | ||
45 | |||
46 | .text64 : { | ||
47 | *(.text64) | ||
48 | *(.text64.*) | ||
49 | } | ||
50 | pa_ro_end = .; | ||
51 | |||
52 | . = ALIGN(PAGE_SIZE); | ||
53 | .data : { | ||
54 | *(.data) | ||
55 | *(.data.*) | ||
56 | } | ||
57 | |||
58 | . = ALIGN(128); | ||
59 | .bss : { | ||
60 | *(.bss*) | ||
61 | } | ||
62 | |||
63 | /* End signature for integrity checking */ | ||
64 | . = ALIGN(4); | ||
65 | .signature : { | ||
66 | *(.signature) | ||
67 | } | ||
68 | |||
69 | /DISCARD/ : { | ||
70 | *(.note*) | ||
71 | *(.debug*) | ||
72 | *(.eh_frame*) | ||
73 | } | ||
74 | |||
75 | #include "pasyms.h" | ||
76 | } | ||
diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/realmode/rm/reboot.S index 1d5c46df0d7..f932ea61d1c 100644 --- a/arch/x86/kernel/reboot_32.S +++ b/arch/x86/realmode/rm/reboot.S | |||
@@ -2,6 +2,9 @@ | |||
2 | #include <linux/init.h> | 2 | #include <linux/init.h> |
3 | #include <asm/segment.h> | 3 | #include <asm/segment.h> |
4 | #include <asm/page_types.h> | 4 | #include <asm/page_types.h> |
5 | #include <asm/processor-flags.h> | ||
6 | #include <asm/msr-index.h> | ||
7 | #include "realmode.h" | ||
5 | 8 | ||
6 | /* | 9 | /* |
7 | * The following code and data reboots the machine by switching to real | 10 | * The following code and data reboots the machine by switching to real |
@@ -11,36 +14,44 @@ | |||
11 | * doesn't work with at least one type of 486 motherboard. It is easy | 14 | * doesn't work with at least one type of 486 motherboard. It is easy |
12 | * to stop this code working; hence the copious comments. | 15 | * to stop this code working; hence the copious comments. |
13 | * | 16 | * |
14 | * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax. | 17 | * This code is called with the restart type (0 = BIOS, 1 = APM) in |
18 | * the primary argument register (%eax for 32 bit, %edi for 64 bit). | ||
15 | */ | 19 | */ |
16 | .section ".x86_trampoline","a" | 20 | .section ".text32", "ax" |
17 | .balign 16 | ||
18 | .code32 | 21 | .code32 |
19 | ENTRY(machine_real_restart_asm) | 22 | ENTRY(machine_real_restart_asm) |
20 | r_base = . | ||
21 | /* Get our own relocated address */ | ||
22 | call 1f | ||
23 | 1: popl %ebx | ||
24 | subl $(1b - r_base), %ebx | ||
25 | |||
26 | /* Compute the equivalent real-mode segment */ | ||
27 | movl %ebx, %ecx | ||
28 | shrl $4, %ecx | ||
29 | |||
30 | /* Patch post-real-mode segment jump */ | ||
31 | movw (dispatch_table - r_base)(%ebx,%eax,2),%ax | ||
32 | movw %ax, (101f - r_base)(%ebx) | ||
33 | movw %cx, (102f - r_base)(%ebx) | ||
34 | 23 | ||
24 | #ifdef CONFIG_X86_64 | ||
25 | /* Switch to trampoline GDT as it is guaranteed < 4 GiB */ | ||
26 | movl $__KERNEL_DS, %eax | ||
27 | movl %eax, %ds | ||
28 | lgdtl pa_tr_gdt | ||
29 | |||
30 | /* Disable paging to drop us out of long mode */ | ||
31 | movl %cr0, %eax | ||
32 | andl $~X86_CR0_PG, %eax | ||
33 | movl %eax, %cr0 | ||
34 | ljmpl $__KERNEL32_CS, $pa_machine_real_restart_paging_off | ||
35 | |||
36 | GLOBAL(machine_real_restart_paging_off) | ||
37 | xorl %eax, %eax | ||
38 | xorl %edx, %edx | ||
39 | movl $MSR_EFER, %ecx | ||
40 | wrmsr | ||
41 | |||
42 | movl %edi, %eax | ||
43 | |||
44 | #endif /* CONFIG_X86_64 */ | ||
45 | |||
35 | /* Set up the IDT for real mode. */ | 46 | /* Set up the IDT for real mode. */ |
36 | lidtl (machine_real_restart_idt - r_base)(%ebx) | 47 | lidtl pa_machine_real_restart_idt |
37 | 48 | ||
38 | /* | 49 | /* |
39 | * Set up a GDT from which we can load segment descriptors for real | 50 | * Set up a GDT from which we can load segment descriptors for real |
40 | * mode. The GDT is not used in real mode; it is just needed here to | 51 | * mode. The GDT is not used in real mode; it is just needed here to |
41 | * prepare the descriptors. | 52 | * prepare the descriptors. |
42 | */ | 53 | */ |
43 | lgdtl (machine_real_restart_gdt - r_base)(%ebx) | 54 | lgdtl pa_machine_real_restart_gdt |
44 | 55 | ||
45 | /* | 56 | /* |
46 | * Load the data segment registers with 16-bit compatible values | 57 | * Load the data segment registers with 16-bit compatible values |
@@ -51,7 +62,7 @@ r_base = . | |||
51 | movl %ecx, %fs | 62 | movl %ecx, %fs |
52 | movl %ecx, %gs | 63 | movl %ecx, %gs |
53 | movl %ecx, %ss | 64 | movl %ecx, %ss |
54 | ljmpl $8, $1f - r_base | 65 | ljmpw $8, $1f |
55 | 66 | ||
56 | /* | 67 | /* |
57 | * This is 16-bit protected mode code to disable paging and the cache, | 68 | * This is 16-bit protected mode code to disable paging and the cache, |
@@ -76,27 +87,29 @@ r_base = . | |||
76 | * | 87 | * |
77 | * Most of this work is probably excessive, but it is what is tested. | 88 | * Most of this work is probably excessive, but it is what is tested. |
78 | */ | 89 | */ |
90 | .text | ||
79 | .code16 | 91 | .code16 |
92 | |||
93 | .balign 16 | ||
94 | machine_real_restart_asm16: | ||
80 | 1: | 95 | 1: |
81 | xorl %ecx, %ecx | 96 | xorl %ecx, %ecx |
82 | movl %cr0, %eax | 97 | movl %cr0, %edx |
83 | andl $0x00000011, %eax | 98 | andl $0x00000011, %edx |
84 | orl $0x60000000, %eax | 99 | orl $0x60000000, %edx |
85 | movl %eax, %cr0 | 100 | movl %edx, %cr0 |
86 | movl %ecx, %cr3 | 101 | movl %ecx, %cr3 |
87 | movl %cr0, %edx | 102 | movl %cr0, %edx |
88 | andl $0x60000000, %edx /* If no cache bits -> no wbinvd */ | 103 | testl $0x60000000, %edx /* If no cache bits -> no wbinvd */ |
89 | jz 2f | 104 | jz 2f |
90 | wbinvd | 105 | wbinvd |
91 | 2: | 106 | 2: |
92 | andb $0x10, %al | 107 | andb $0x10, %dl |
93 | movl %eax, %cr0 | 108 | movl %edx, %cr0 |
94 | .byte 0xea /* ljmpw */ | 109 | LJMPW_RM(3f) |
95 | 101: .word 0 /* Offset */ | 110 | 3: |
96 | 102: .word 0 /* Segment */ | 111 | andw %ax, %ax |
97 | 112 | jz bios | |
98 | bios: | ||
99 | ljmpw $0xf000, $0xfff0 | ||
100 | 113 | ||
101 | apm: | 114 | apm: |
102 | movw $0x1000, %ax | 115 | movw $0x1000, %ax |
@@ -106,26 +119,34 @@ apm: | |||
106 | movw $0x0001, %bx | 119 | movw $0x0001, %bx |
107 | movw $0x0003, %cx | 120 | movw $0x0003, %cx |
108 | int $0x15 | 121 | int $0x15 |
122 | /* This should never return... */ | ||
109 | 123 | ||
110 | END(machine_real_restart_asm) | 124 | bios: |
125 | ljmpw $0xf000, $0xfff0 | ||
111 | 126 | ||
112 | .balign 16 | 127 | .section ".rodata", "a" |
113 | /* These must match <asm/reboot.h */ | ||
114 | dispatch_table: | ||
115 | .word bios - r_base | ||
116 | .word apm - r_base | ||
117 | END(dispatch_table) | ||
118 | 128 | ||
119 | .balign 16 | 129 | .balign 16 |
120 | machine_real_restart_idt: | 130 | GLOBAL(machine_real_restart_idt) |
121 | .word 0xffff /* Length - real mode default value */ | 131 | .word 0xffff /* Length - real mode default value */ |
122 | .long 0 /* Base - real mode default value */ | 132 | .long 0 /* Base - real mode default value */ |
123 | END(machine_real_restart_idt) | 133 | END(machine_real_restart_idt) |
124 | 134 | ||
125 | .balign 16 | 135 | .balign 16 |
126 | ENTRY(machine_real_restart_gdt) | 136 | GLOBAL(machine_real_restart_gdt) |
127 | .quad 0 /* Self-pointer, filled in by PM code */ | 137 | /* Self-pointer */ |
128 | .quad 0 /* 16-bit code segment, filled in by PM code */ | 138 | .word 0xffff /* Length - real mode default value */ |
139 | .long pa_machine_real_restart_gdt | ||
140 | .word 0 | ||
141 | |||
142 | /* | ||
143 | * 16-bit code segment pointing to real_mode_seg | ||
144 | * Selector value 8 | ||
145 | */ | ||
146 | .word 0xffff /* Limit */ | ||
147 | .long 0x9b000000 + pa_real_mode_base | ||
148 | .word 0 | ||
149 | |||
129 | /* | 150 | /* |
130 | * 16-bit data segment with the selector value 16 = 0x10 and | 151 | * 16-bit data segment with the selector value 16 = 0x10 and |
131 | * base value 0x100; since this is consistent with real mode | 152 | * base value 0x100; since this is consistent with real mode |
diff --git a/arch/x86/realmode/rm/regs.c b/arch/x86/realmode/rm/regs.c new file mode 100644 index 00000000000..fbb15b9f9ca --- /dev/null +++ b/arch/x86/realmode/rm/regs.c | |||
@@ -0,0 +1 @@ | |||
#include "../../boot/regs.c" | |||
diff --git a/arch/x86/realmode/rm/stack.S b/arch/x86/realmode/rm/stack.S new file mode 100644 index 00000000000..867ae87adfa --- /dev/null +++ b/arch/x86/realmode/rm/stack.S | |||
@@ -0,0 +1,19 @@ | |||
1 | /* | ||
2 | * Common heap and stack allocations | ||
3 | */ | ||
4 | |||
5 | #include <linux/linkage.h> | ||
6 | |||
7 | .data | ||
8 | GLOBAL(HEAP) | ||
9 | .long rm_heap | ||
10 | GLOBAL(heap_end) | ||
11 | .long rm_stack | ||
12 | |||
13 | .bss | ||
14 | .balign 16 | ||
15 | GLOBAL(rm_heap) | ||
16 | .space 2048 | ||
17 | GLOBAL(rm_stack) | ||
18 | .space 2048 | ||
19 | GLOBAL(rm_stack_end) | ||
diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S new file mode 100644 index 00000000000..c1b2791183e --- /dev/null +++ b/arch/x86/realmode/rm/trampoline_32.S | |||
@@ -0,0 +1,74 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Trampoline.S Derived from Setup.S by Linus Torvalds | ||
4 | * | ||
5 | * 4 Jan 1997 Michael Chastain: changed to gnu as. | ||
6 | * | ||
7 | * This is only used for booting secondary CPUs in SMP machine | ||
8 | * | ||
9 | * Entry: CS:IP point to the start of our code, we are | ||
10 | * in real mode with no stack, but the rest of the | ||
11 | * trampoline page to make our stack and everything else | ||
12 | * is a mystery. | ||
13 | * | ||
14 | * We jump into arch/x86/kernel/head_32.S. | ||
15 | * | ||
16 | * On entry to trampoline_start, the processor is in real mode | ||
17 | * with 16-bit addressing and 16-bit data. CS has some value | ||
18 | * and IP is zero. Thus, we load CS to the physical segment | ||
19 | * of the real mode code before doing anything further. | ||
20 | */ | ||
21 | |||
22 | #include <linux/linkage.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <asm/segment.h> | ||
25 | #include <asm/page_types.h> | ||
26 | #include "realmode.h" | ||
27 | |||
28 | .text | ||
29 | .code16 | ||
30 | |||
31 | .balign PAGE_SIZE | ||
32 | ENTRY(trampoline_start) | ||
33 | wbinvd # Needed for NUMA-Q should be harmless for others | ||
34 | |||
35 | LJMPW_RM(1f) | ||
36 | 1: | ||
37 | mov %cs, %ax # Code and data in the same place | ||
38 | mov %ax, %ds | ||
39 | |||
40 | cli # We should be safe anyway | ||
41 | |||
42 | movl tr_start, %eax # where we need to go | ||
43 | |||
44 | movl $0xA5A5A5A5, trampoline_status | ||
45 | # write marker for master knows we're running | ||
46 | |||
47 | /* | ||
48 | * GDT tables in non default location kernel can be beyond 16MB and | ||
49 | * lgdt will not be able to load the address as in real mode default | ||
50 | * operand size is 16bit. Use lgdtl instead to force operand size | ||
51 | * to 32 bit. | ||
52 | */ | ||
53 | lidtl tr_idt # load idt with 0, 0 | ||
54 | lgdtl tr_gdt # load gdt with whatever is appropriate | ||
55 | |||
56 | movw $1, %dx # protected mode (PE) bit | ||
57 | lmsw %dx # into protected mode | ||
58 | |||
59 | ljmpl $__BOOT_CS, $pa_startup_32 | ||
60 | |||
61 | .section ".text32","ax" | ||
62 | .code32 | ||
63 | ENTRY(startup_32) # note: also used from wakeup_asm.S | ||
64 | jmp *%eax | ||
65 | |||
66 | .bss | ||
67 | .balign 8 | ||
68 | GLOBAL(trampoline_header) | ||
69 | tr_start: .space 4 | ||
70 | tr_gdt_pad: .space 2 | ||
71 | tr_gdt: .space 6 | ||
72 | END(trampoline_header) | ||
73 | |||
74 | #include "trampoline_common.S" | ||
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index 09ff51799e9..bb360dc39d2 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S | |||
@@ -5,12 +5,12 @@ | |||
5 | * 4 Jan 1997 Michael Chastain: changed to gnu as. | 5 | * 4 Jan 1997 Michael Chastain: changed to gnu as. |
6 | * 15 Sept 2005 Eric Biederman: 64bit PIC support | 6 | * 15 Sept 2005 Eric Biederman: 64bit PIC support |
7 | * | 7 | * |
8 | * Entry: CS:IP point to the start of our code, we are | 8 | * Entry: CS:IP point to the start of our code, we are |
9 | * in real mode with no stack, but the rest of the | 9 | * in real mode with no stack, but the rest of the |
10 | * trampoline page to make our stack and everything else | 10 | * trampoline page to make our stack and everything else |
11 | * is a mystery. | 11 | * is a mystery. |
12 | * | 12 | * |
13 | * On entry to trampoline_data, the processor is in real mode | 13 | * On entry to trampoline_start, the processor is in real mode |
14 | * with 16-bit addressing and 16-bit data. CS has some value | 14 | * with 16-bit addressing and 16-bit data. CS has some value |
15 | * and IP is zero. Thus, data addresses need to be absolute | 15 | * and IP is zero. Thus, data addresses need to be absolute |
16 | * (no relocation) and are taken with regard to r_base. | 16 | * (no relocation) and are taken with regard to r_base. |
@@ -31,43 +31,33 @@ | |||
31 | #include <asm/msr.h> | 31 | #include <asm/msr.h> |
32 | #include <asm/segment.h> | 32 | #include <asm/segment.h> |
33 | #include <asm/processor-flags.h> | 33 | #include <asm/processor-flags.h> |
34 | #include "realmode.h" | ||
34 | 35 | ||
35 | .section ".x86_trampoline","a" | 36 | .text |
36 | .balign PAGE_SIZE | ||
37 | .code16 | 37 | .code16 |
38 | 38 | ||
39 | ENTRY(trampoline_data) | 39 | .balign PAGE_SIZE |
40 | r_base = . | 40 | ENTRY(trampoline_start) |
41 | cli # We should be safe anyway | 41 | cli # We should be safe anyway |
42 | wbinvd | 42 | wbinvd |
43 | |||
44 | LJMPW_RM(1f) | ||
45 | 1: | ||
43 | mov %cs, %ax # Code and data in the same place | 46 | mov %cs, %ax # Code and data in the same place |
44 | mov %ax, %ds | 47 | mov %ax, %ds |
45 | mov %ax, %es | 48 | mov %ax, %es |
46 | mov %ax, %ss | 49 | mov %ax, %ss |
47 | 50 | ||
51 | movl $0xA5A5A5A5, trampoline_status | ||
52 | # write marker for master knows we're running | ||
48 | 53 | ||
49 | movl $0xA5A5A5A5, trampoline_status - r_base | 54 | # Setup stack |
50 | # write marker for master knows we're running | 55 | movl $rm_stack_end, %esp |
51 | |||
52 | # Setup stack | ||
53 | movw $(trampoline_stack_end - r_base), %sp | ||
54 | 56 | ||
55 | call verify_cpu # Verify the cpu supports long mode | 57 | call verify_cpu # Verify the cpu supports long mode |
56 | testl %eax, %eax # Check for return code | 58 | testl %eax, %eax # Check for return code |
57 | jnz no_longmode | 59 | jnz no_longmode |
58 | 60 | ||
59 | mov %cs, %ax | ||
60 | movzx %ax, %esi # Find the 32bit trampoline location | ||
61 | shll $4, %esi | ||
62 | |||
63 | # Fixup the absolute vectors | ||
64 | leal (startup_32 - r_base)(%esi), %eax | ||
65 | movl %eax, startup_32_vector - r_base | ||
66 | leal (startup_64 - r_base)(%esi), %eax | ||
67 | movl %eax, startup_64_vector - r_base | ||
68 | leal (tgdt - r_base)(%esi), %eax | ||
69 | movl %eax, (tgdt + 2 - r_base) | ||
70 | |||
71 | /* | 61 | /* |
72 | * GDT tables in non default location kernel can be beyond 16MB and | 62 | * GDT tables in non default location kernel can be beyond 16MB and |
73 | * lgdt will not be able to load the address as in real mode default | 63 | * lgdt will not be able to load the address as in real mode default |
@@ -75,36 +65,49 @@ r_base = . | |||
75 | * to 32 bit. | 65 | * to 32 bit. |
76 | */ | 66 | */ |
77 | 67 | ||
78 | lidtl tidt - r_base # load idt with 0, 0 | 68 | lidtl tr_idt # load idt with 0, 0 |
79 | lgdtl tgdt - r_base # load gdt with whatever is appropriate | 69 | lgdtl tr_gdt # load gdt with whatever is appropriate |
70 | |||
71 | movw $__KERNEL_DS, %dx # Data segment descriptor | ||
80 | 72 | ||
81 | mov $X86_CR0_PE, %ax # protected mode (PE) bit | 73 | # Enable protected mode |
82 | lmsw %ax # into protected mode | 74 | movl $X86_CR0_PE, %eax # protected mode (PE) bit |
75 | movl %eax, %cr0 # into protected mode | ||
83 | 76 | ||
84 | # flush prefetch and jump to startup_32 | 77 | # flush prefetch and jump to startup_32 |
85 | ljmpl *(startup_32_vector - r_base) | 78 | ljmpl $__KERNEL32_CS, $pa_startup_32 |
86 | 79 | ||
80 | no_longmode: | ||
81 | hlt | ||
82 | jmp no_longmode | ||
83 | #include "../kernel/verify_cpu.S" | ||
84 | |||
85 | .section ".text32","ax" | ||
87 | .code32 | 86 | .code32 |
88 | .balign 4 | 87 | .balign 4 |
89 | startup_32: | 88 | ENTRY(startup_32) |
90 | movl $__KERNEL_DS, %eax # Initialize the %ds segment register | 89 | movl %edx, %ss |
91 | movl %eax, %ds | 90 | addl $pa_real_mode_base, %esp |
92 | 91 | movl %edx, %ds | |
93 | movl $X86_CR4_PAE, %eax | 92 | movl %edx, %es |
93 | movl %edx, %fs | ||
94 | movl %edx, %gs | ||
95 | |||
96 | movl pa_tr_cr4, %eax | ||
94 | movl %eax, %cr4 # Enable PAE mode | 97 | movl %eax, %cr4 # Enable PAE mode |
95 | 98 | ||
96 | # Setup trampoline 4 level pagetables | 99 | # Setup trampoline 4 level pagetables |
97 | leal (trampoline_level4_pgt - r_base)(%esi), %eax | 100 | movl $pa_trampoline_pgd, %eax |
98 | movl %eax, %cr3 | 101 | movl %eax, %cr3 |
99 | 102 | ||
103 | # Set up EFER | ||
104 | movl pa_tr_efer, %eax | ||
105 | movl pa_tr_efer + 4, %edx | ||
100 | movl $MSR_EFER, %ecx | 106 | movl $MSR_EFER, %ecx |
101 | movl $(1 << _EFER_LME), %eax # Enable Long Mode | ||
102 | xorl %edx, %edx | ||
103 | wrmsr | 107 | wrmsr |
104 | 108 | ||
105 | # Enable paging and in turn activate Long Mode | 109 | # Enable paging and in turn activate Long Mode |
106 | # Enable protected mode | 110 | movl $(X86_CR0_PG | X86_CR0_WP | X86_CR0_PE), %eax |
107 | movl $(X86_CR0_PG | X86_CR0_PE), %eax | ||
108 | movl %eax, %cr0 | 111 | movl %eax, %cr0 |
109 | 112 | ||
110 | /* | 113 | /* |
@@ -113,59 +116,38 @@ startup_32: | |||
113 | * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use | 116 | * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use |
114 | * the new gdt/idt that has __KERNEL_CS with CS.L = 1. | 117 | * the new gdt/idt that has __KERNEL_CS with CS.L = 1. |
115 | */ | 118 | */ |
116 | ljmp *(startup_64_vector - r_base)(%esi) | 119 | ljmpl $__KERNEL_CS, $pa_startup_64 |
117 | 120 | ||
121 | .section ".text64","ax" | ||
118 | .code64 | 122 | .code64 |
119 | .balign 4 | 123 | .balign 4 |
120 | startup_64: | 124 | ENTRY(startup_64) |
121 | # Now jump into the kernel using virtual addresses | 125 | # Now jump into the kernel using virtual addresses |
122 | movq $secondary_startup_64, %rax | 126 | jmpq *tr_start(%rip) |
123 | jmp *%rax | ||
124 | |||
125 | .code16 | ||
126 | no_longmode: | ||
127 | hlt | ||
128 | jmp no_longmode | ||
129 | #include "verify_cpu.S" | ||
130 | |||
131 | .balign 4 | ||
132 | # Careful these need to be in the same 64K segment as the above; | ||
133 | tidt: | ||
134 | .word 0 # idt limit = 0 | ||
135 | .word 0, 0 # idt base = 0L | ||
136 | 127 | ||
128 | .section ".rodata","a" | ||
137 | # Duplicate the global descriptor table | 129 | # Duplicate the global descriptor table |
138 | # so the kernel can live anywhere | 130 | # so the kernel can live anywhere |
139 | .balign 4 | 131 | .balign 16 |
140 | tgdt: | 132 | .globl tr_gdt |
141 | .short tgdt_end - tgdt # gdt limit | 133 | tr_gdt: |
142 | .long tgdt - r_base | 134 | .short tr_gdt_end - tr_gdt - 1 # gdt limit |
143 | .short 0 | 135 | .long pa_tr_gdt |
136 | .short 0 | ||
144 | .quad 0x00cf9b000000ffff # __KERNEL32_CS | 137 | .quad 0x00cf9b000000ffff # __KERNEL32_CS |
145 | .quad 0x00af9b000000ffff # __KERNEL_CS | 138 | .quad 0x00af9b000000ffff # __KERNEL_CS |
146 | .quad 0x00cf93000000ffff # __KERNEL_DS | 139 | .quad 0x00cf93000000ffff # __KERNEL_DS |
147 | tgdt_end: | 140 | tr_gdt_end: |
148 | 141 | ||
149 | .balign 4 | 142 | .bss |
150 | startup_32_vector: | 143 | .balign PAGE_SIZE |
151 | .long startup_32 - r_base | 144 | GLOBAL(trampoline_pgd) .space PAGE_SIZE |
152 | .word __KERNEL32_CS, 0 | ||
153 | 145 | ||
154 | .balign 4 | 146 | .balign 8 |
155 | startup_64_vector: | 147 | GLOBAL(trampoline_header) |
156 | .long startup_64 - r_base | 148 | tr_start: .space 8 |
157 | .word __KERNEL_CS, 0 | 149 | GLOBAL(tr_efer) .space 8 |
150 | GLOBAL(tr_cr4) .space 4 | ||
151 | END(trampoline_header) | ||
158 | 152 | ||
159 | .balign 4 | 153 | #include "trampoline_common.S" |
160 | ENTRY(trampoline_status) | ||
161 | .long 0 | ||
162 | |||
163 | trampoline_stack: | ||
164 | .org 0x1000 | ||
165 | trampoline_stack_end: | ||
166 | ENTRY(trampoline_level4_pgt) | ||
167 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | ||
168 | .fill 510,8,0 | ||
169 | .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE | ||
170 | |||
171 | ENTRY(trampoline_end) | ||
diff --git a/arch/x86/realmode/rm/trampoline_common.S b/arch/x86/realmode/rm/trampoline_common.S new file mode 100644 index 00000000000..b1ecdb9692a --- /dev/null +++ b/arch/x86/realmode/rm/trampoline_common.S | |||
@@ -0,0 +1,7 @@ | |||
1 | .section ".rodata","a" | ||
2 | .balign 16 | ||
3 | tr_idt: .fill 1, 6, 0 | ||
4 | |||
5 | .bss | ||
6 | .balign 4 | ||
7 | GLOBAL(trampoline_status) .space 4 | ||
diff --git a/arch/x86/realmode/rm/video-bios.c b/arch/x86/realmode/rm/video-bios.c new file mode 100644 index 00000000000..848b25aaf11 --- /dev/null +++ b/arch/x86/realmode/rm/video-bios.c | |||
@@ -0,0 +1 @@ | |||
#include "../../boot/video-bios.c" | |||
diff --git a/arch/x86/realmode/rm/video-mode.c b/arch/x86/realmode/rm/video-mode.c new file mode 100644 index 00000000000..2a98b7e2368 --- /dev/null +++ b/arch/x86/realmode/rm/video-mode.c | |||
@@ -0,0 +1 @@ | |||
#include "../../boot/video-mode.c" | |||
diff --git a/arch/x86/realmode/rm/video-vesa.c b/arch/x86/realmode/rm/video-vesa.c new file mode 100644 index 00000000000..413edddb51e --- /dev/null +++ b/arch/x86/realmode/rm/video-vesa.c | |||
@@ -0,0 +1 @@ | |||
#include "../../boot/video-vesa.c" | |||
diff --git a/arch/x86/realmode/rm/video-vga.c b/arch/x86/realmode/rm/video-vga.c new file mode 100644 index 00000000000..3085f5c9d28 --- /dev/null +++ b/arch/x86/realmode/rm/video-vga.c | |||
@@ -0,0 +1 @@ | |||
#include "../../boot/video-vga.c" | |||
diff --git a/arch/x86/kernel/acpi/realmode/wakemain.c b/arch/x86/realmode/rm/wakemain.c index 883962d9eef..91405d515ec 100644 --- a/arch/x86/kernel/acpi/realmode/wakemain.c +++ b/arch/x86/realmode/rm/wakemain.c | |||
@@ -65,7 +65,8 @@ void main(void) | |||
65 | { | 65 | { |
66 | /* Kill machine if structures are wrong */ | 66 | /* Kill machine if structures are wrong */ |
67 | if (wakeup_header.real_magic != 0x12345678) | 67 | if (wakeup_header.real_magic != 0x12345678) |
68 | while (1); | 68 | while (1) |
69 | ; | ||
69 | 70 | ||
70 | if (wakeup_header.realmode_flags & 4) | 71 | if (wakeup_header.realmode_flags & 4) |
71 | send_morse("...-"); | 72 | send_morse("...-"); |
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/realmode/rm/wakeup.h index 97a29e1430e..9317e0042f2 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.h +++ b/arch/x86/realmode/rm/wakeup.h | |||
@@ -12,9 +12,8 @@ | |||
12 | /* This must match data at wakeup.S */ | 12 | /* This must match data at wakeup.S */ |
13 | struct wakeup_header { | 13 | struct wakeup_header { |
14 | u16 video_mode; /* Video mode number */ | 14 | u16 video_mode; /* Video mode number */ |
15 | u16 _jmp1; /* ljmpl opcode, 32-bit only */ | ||
16 | u32 pmode_entry; /* Protected mode resume point, 32-bit only */ | 15 | u32 pmode_entry; /* Protected mode resume point, 32-bit only */ |
17 | u16 _jmp2; /* CS value, 32-bit only */ | 16 | u16 pmode_cs; |
18 | u32 pmode_cr0; /* Protected mode cr0 */ | 17 | u32 pmode_cr0; /* Protected mode cr0 */ |
19 | u32 pmode_cr3; /* Protected mode cr3 */ | 18 | u32 pmode_cr3; /* Protected mode cr3 */ |
20 | u32 pmode_cr4; /* Protected mode cr4 */ | 19 | u32 pmode_cr4; /* Protected mode cr4 */ |
@@ -26,12 +25,6 @@ struct wakeup_header { | |||
26 | u32 pmode_behavior; /* Wakeup routine behavior flags */ | 25 | u32 pmode_behavior; /* Wakeup routine behavior flags */ |
27 | u32 realmode_flags; | 26 | u32 realmode_flags; |
28 | u32 real_magic; | 27 | u32 real_magic; |
29 | u16 trampoline_segment; /* segment with trampoline code, 64-bit only */ | ||
30 | u8 _pad1; | ||
31 | u8 wakeup_jmp; | ||
32 | u16 wakeup_jmp_off; | ||
33 | u16 wakeup_jmp_seg; | ||
34 | u64 wakeup_gdt[3]; | ||
35 | u32 signature; /* To check we have correct structure */ | 28 | u32 signature; /* To check we have correct structure */ |
36 | } __attribute__((__packed__)); | 29 | } __attribute__((__packed__)); |
37 | 30 | ||
@@ -40,7 +33,6 @@ extern struct wakeup_header wakeup_header; | |||
40 | 33 | ||
41 | #define WAKEUP_HEADER_OFFSET 8 | 34 | #define WAKEUP_HEADER_OFFSET 8 |
42 | #define WAKEUP_HEADER_SIGNATURE 0x51ee1111 | 35 | #define WAKEUP_HEADER_SIGNATURE 0x51ee1111 |
43 | #define WAKEUP_END_SIGNATURE 0x65a22c82 | ||
44 | 36 | ||
45 | /* Wakeup behavior bits */ | 37 | /* Wakeup behavior bits */ |
46 | #define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE 0 | 38 | #define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE 0 |
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/realmode/rm/wakeup_asm.S index b4fd836e405..8905166b0bb 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/realmode/rm/wakeup_asm.S | |||
@@ -1,50 +1,47 @@ | |||
1 | /* | 1 | /* |
2 | * ACPI wakeup real mode startup stub | 2 | * ACPI wakeup real mode startup stub |
3 | */ | 3 | */ |
4 | #include <linux/linkage.h> | ||
4 | #include <asm/segment.h> | 5 | #include <asm/segment.h> |
5 | #include <asm/msr-index.h> | 6 | #include <asm/msr-index.h> |
6 | #include <asm/page_types.h> | 7 | #include <asm/page_types.h> |
7 | #include <asm/pgtable_types.h> | 8 | #include <asm/pgtable_types.h> |
8 | #include <asm/processor-flags.h> | 9 | #include <asm/processor-flags.h> |
10 | #include "realmode.h" | ||
9 | #include "wakeup.h" | 11 | #include "wakeup.h" |
10 | 12 | ||
11 | .code16 | 13 | .code16 |
12 | .section ".jump", "ax" | ||
13 | .globl _start | ||
14 | _start: | ||
15 | cli | ||
16 | jmp wakeup_code | ||
17 | 14 | ||
18 | /* This should match the structure in wakeup.h */ | 15 | /* This should match the structure in wakeup.h */ |
19 | .section ".header", "a" | 16 | .section ".data", "aw" |
20 | .globl wakeup_header | 17 | |
21 | wakeup_header: | 18 | .balign 16 |
22 | video_mode: .short 0 /* Video mode number */ | 19 | GLOBAL(wakeup_header) |
23 | pmode_return: .byte 0x66, 0xea /* ljmpl */ | 20 | video_mode: .short 0 /* Video mode number */ |
24 | .long 0 /* offset goes here */ | 21 | pmode_entry: .long 0 |
25 | .short __KERNEL_CS | 22 | pmode_cs: .short __KERNEL_CS |
26 | pmode_cr0: .long 0 /* Saved %cr0 */ | 23 | pmode_cr0: .long 0 /* Saved %cr0 */ |
27 | pmode_cr3: .long 0 /* Saved %cr3 */ | 24 | pmode_cr3: .long 0 /* Saved %cr3 */ |
28 | pmode_cr4: .long 0 /* Saved %cr4 */ | 25 | pmode_cr4: .long 0 /* Saved %cr4 */ |
29 | pmode_efer: .quad 0 /* Saved EFER */ | 26 | pmode_efer: .quad 0 /* Saved EFER */ |
30 | pmode_gdt: .quad 0 | 27 | pmode_gdt: .quad 0 |
31 | pmode_misc_en: .quad 0 /* Saved MISC_ENABLE MSR */ | 28 | pmode_misc_en: .quad 0 /* Saved MISC_ENABLE MSR */ |
32 | pmode_behavior: .long 0 /* Wakeup behavior flags */ | 29 | pmode_behavior: .long 0 /* Wakeup behavior flags */ |
33 | realmode_flags: .long 0 | 30 | realmode_flags: .long 0 |
34 | real_magic: .long 0 | 31 | real_magic: .long 0 |
35 | trampoline_segment: .word 0 | 32 | signature: .long WAKEUP_HEADER_SIGNATURE |
36 | _pad1: .byte 0 | 33 | END(wakeup_header) |
37 | wakeup_jmp: .byte 0xea /* ljmpw */ | ||
38 | wakeup_jmp_off: .word 3f | ||
39 | wakeup_jmp_seg: .word 0 | ||
40 | wakeup_gdt: .quad 0, 0, 0 | ||
41 | signature: .long WAKEUP_HEADER_SIGNATURE | ||
42 | 34 | ||
43 | .text | 35 | .text |
44 | .code16 | 36 | .code16 |
45 | wakeup_code: | 37 | |
38 | .balign 16 | ||
39 | ENTRY(wakeup_start) | ||
40 | cli | ||
46 | cld | 41 | cld |
47 | 42 | ||
43 | LJMPW_RM(3f) | ||
44 | 3: | ||
48 | /* Apparently some dimwit BIOS programmers don't know how to | 45 | /* Apparently some dimwit BIOS programmers don't know how to |
49 | program a PM to RM transition, and we might end up here with | 46 | program a PM to RM transition, and we might end up here with |
50 | junk in the data segment descriptor registers. The only way | 47 | junk in the data segment descriptor registers. The only way |
@@ -54,8 +51,7 @@ wakeup_code: | |||
54 | movl %cr0, %eax | 51 | movl %cr0, %eax |
55 | orb $X86_CR0_PE, %al | 52 | orb $X86_CR0_PE, %al |
56 | movl %eax, %cr0 | 53 | movl %eax, %cr0 |
57 | jmp 1f | 54 | ljmpw $8, $2f |
58 | 1: ljmpw $8, $2f | ||
59 | 2: | 55 | 2: |
60 | movw %cx, %ds | 56 | movw %cx, %ds |
61 | movw %cx, %es | 57 | movw %cx, %es |
@@ -65,16 +61,18 @@ wakeup_code: | |||
65 | 61 | ||
66 | andb $~X86_CR0_PE, %al | 62 | andb $~X86_CR0_PE, %al |
67 | movl %eax, %cr0 | 63 | movl %eax, %cr0 |
68 | jmp wakeup_jmp | 64 | LJMPW_RM(3f) |
69 | 3: | 65 | 3: |
70 | /* Set up segments */ | 66 | /* Set up segments */ |
71 | movw %cs, %ax | 67 | movw %cs, %ax |
68 | movw %ax, %ss | ||
69 | movl $rm_stack_end, %esp | ||
72 | movw %ax, %ds | 70 | movw %ax, %ds |
73 | movw %ax, %es | 71 | movw %ax, %es |
74 | movw %ax, %ss | 72 | movw %ax, %fs |
75 | lidtl wakeup_idt | 73 | movw %ax, %gs |
76 | 74 | ||
77 | movl $wakeup_stack_end, %esp | 75 | lidtl wakeup_idt |
78 | 76 | ||
79 | /* Clear the EFLAGS */ | 77 | /* Clear the EFLAGS */ |
80 | pushl $0 | 78 | pushl $0 |
@@ -87,7 +85,7 @@ wakeup_code: | |||
87 | 85 | ||
88 | /* Check we really have everything... */ | 86 | /* Check we really have everything... */ |
89 | movl end_signature, %eax | 87 | movl end_signature, %eax |
90 | cmpl $WAKEUP_END_SIGNATURE, %eax | 88 | cmpl $REALMODE_END_SIGNATURE, %eax |
91 | jne bogus_real_magic | 89 | jne bogus_real_magic |
92 | 90 | ||
93 | /* Call the C code */ | 91 | /* Call the C code */ |
@@ -128,14 +126,13 @@ wakeup_code: | |||
128 | lgdtl pmode_gdt | 126 | lgdtl pmode_gdt |
129 | 127 | ||
130 | /* This really couldn't... */ | 128 | /* This really couldn't... */ |
131 | movl pmode_cr0, %eax | 129 | movl pmode_entry, %eax |
132 | movl %eax, %cr0 | 130 | movl pmode_cr0, %ecx |
133 | jmp pmode_return | 131 | movl %ecx, %cr0 |
132 | ljmpl $__KERNEL_CS, $pa_startup_32 | ||
133 | /* -> jmp *%eax in trampoline_32.S */ | ||
134 | #else | 134 | #else |
135 | pushw $0 | 135 | jmp trampoline_start |
136 | pushw trampoline_segment | ||
137 | pushw $0 | ||
138 | lret | ||
139 | #endif | 136 | #endif |
140 | 137 | ||
141 | bogus_real_magic: | 138 | bogus_real_magic: |
@@ -143,28 +140,38 @@ bogus_real_magic: | |||
143 | hlt | 140 | hlt |
144 | jmp 1b | 141 | jmp 1b |
145 | 142 | ||
146 | .data | 143 | .section ".rodata","a" |
144 | |||
145 | /* | ||
146 | * Set up the wakeup GDT. We set these up as Big Real Mode, | ||
147 | * that is, with limits set to 4 GB. At least the Lenovo | ||
148 | * Thinkpad X61 is known to need this for the video BIOS | ||
149 | * initialization quirk to work; this is likely to also | ||
150 | * be the case for other laptops or integrated video devices. | ||
151 | */ | ||
152 | |||
153 | .balign 16 | ||
154 | GLOBAL(wakeup_gdt) | ||
155 | .word 3*8-1 /* Self-descriptor */ | ||
156 | .long pa_wakeup_gdt | ||
157 | .word 0 | ||
158 | |||
159 | .word 0xffff /* 16-bit code segment @ real_mode_base */ | ||
160 | .long 0x9b000000 + pa_real_mode_base | ||
161 | .word 0x008f /* big real mode */ | ||
162 | |||
163 | .word 0xffff /* 16-bit data segment @ real_mode_base */ | ||
164 | .long 0x93000000 + pa_real_mode_base | ||
165 | .word 0x008f /* big real mode */ | ||
166 | END(wakeup_gdt) | ||
167 | |||
168 | .section ".rodata","a" | ||
147 | .balign 8 | 169 | .balign 8 |
148 | 170 | ||
149 | /* This is the standard real-mode IDT */ | 171 | /* This is the standard real-mode IDT */ |
150 | wakeup_idt: | 172 | .balign 16 |
173 | GLOBAL(wakeup_idt) | ||
151 | .word 0xffff /* limit */ | 174 | .word 0xffff /* limit */ |
152 | .long 0 /* address */ | 175 | .long 0 /* address */ |
153 | .word 0 | 176 | .word 0 |
154 | 177 | END(wakeup_idt) | |
155 | .globl HEAP, heap_end | ||
156 | HEAP: | ||
157 | .long wakeup_heap | ||
158 | heap_end: | ||
159 | .long wakeup_stack | ||
160 | |||
161 | .bss | ||
162 | wakeup_heap: | ||
163 | .space 2048 | ||
164 | wakeup_stack: | ||
165 | .space 2048 | ||
166 | wakeup_stack_end: | ||
167 | |||
168 | .section ".signature","a" | ||
169 | end_signature: | ||
170 | .long WAKEUP_END_SIGNATURE | ||
diff --git a/arch/x86/realmode/rmpiggy.S b/arch/x86/realmode/rmpiggy.S new file mode 100644 index 00000000000..204c6ece0e9 --- /dev/null +++ b/arch/x86/realmode/rmpiggy.S | |||
@@ -0,0 +1,20 @@ | |||
1 | /* | ||
2 | * Wrapper script for the realmode binary as a transport object | ||
3 | * before copying to low memory. | ||
4 | */ | ||
5 | #include <linux/linkage.h> | ||
6 | #include <asm/page_types.h> | ||
7 | |||
8 | .section ".init.data","aw" | ||
9 | |||
10 | .balign PAGE_SIZE | ||
11 | |||
12 | GLOBAL(real_mode_blob) | ||
13 | .incbin "arch/x86/realmode/rm/realmode.bin" | ||
14 | END(real_mode_blob) | ||
15 | |||
16 | GLOBAL(real_mode_blob_end); | ||
17 | |||
18 | GLOBAL(real_mode_relocs) | ||
19 | .incbin "arch/x86/realmode/rm/realmode.relocs" | ||
20 | END(real_mode_relocs) | ||
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 29f9f0554f7..7a35a6e71d4 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl | |||
@@ -355,3 +355,4 @@ | |||
355 | 346 i386 setns sys_setns | 355 | 346 i386 setns sys_setns |
356 | 347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv | 356 | 347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv |
357 | 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev | 357 | 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev |
358 | 349 i386 kcmp sys_kcmp | ||
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index dd29a9ea27c..51171aeff0d 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl | |||
@@ -318,6 +318,8 @@ | |||
318 | 309 common getcpu sys_getcpu | 318 | 309 common getcpu sys_getcpu |
319 | 310 64 process_vm_readv sys_process_vm_readv | 319 | 310 64 process_vm_readv sys_process_vm_readv |
320 | 311 64 process_vm_writev sys_process_vm_writev | 320 | 311 64 process_vm_writev sys_process_vm_writev |
321 | 312 64 kcmp sys_kcmp | ||
322 | |||
321 | # | 323 | # |
322 | # x32-specific system call numbers start at 512 to avoid cache impact | 324 | # x32-specific system call numbers start at 512 to avoid cache impact |
323 | # for native 64-bit operation. | 325 | # for native 64-bit operation. |
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 5f6a5b6c3a1..ddcf39b1a18 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk | |||
@@ -66,9 +66,10 @@ BEGIN { | |||
66 | rex_expr = "^REX(\\.[XRWB]+)*" | 66 | rex_expr = "^REX(\\.[XRWB]+)*" |
67 | fpu_expr = "^ESC" # TODO | 67 | fpu_expr = "^ESC" # TODO |
68 | 68 | ||
69 | lprefix1_expr = "\\(66\\)" | 69 | lprefix1_expr = "\\((66|!F3)\\)" |
70 | lprefix2_expr = "\\(F3\\)" | 70 | lprefix2_expr = "\\(F3\\)" |
71 | lprefix3_expr = "\\(F2\\)" | 71 | lprefix3_expr = "\\((F2|!F3)\\)" |
72 | lprefix_expr = "\\((66|F2|F3)\\)" | ||
72 | max_lprefix = 4 | 73 | max_lprefix = 4 |
73 | 74 | ||
74 | # All opcodes starting with lower-case 'v' or with (v1) superscript | 75 | # All opcodes starting with lower-case 'v' or with (v1) superscript |
@@ -333,13 +334,16 @@ function convert_operands(count,opnd, i,j,imm,mod) | |||
333 | if (match(ext, lprefix1_expr)) { | 334 | if (match(ext, lprefix1_expr)) { |
334 | lptable1[idx] = add_flags(lptable1[idx],flags) | 335 | lptable1[idx] = add_flags(lptable1[idx],flags) |
335 | variant = "INAT_VARIANT" | 336 | variant = "INAT_VARIANT" |
336 | } else if (match(ext, lprefix2_expr)) { | 337 | } |
338 | if (match(ext, lprefix2_expr)) { | ||
337 | lptable2[idx] = add_flags(lptable2[idx],flags) | 339 | lptable2[idx] = add_flags(lptable2[idx],flags) |
338 | variant = "INAT_VARIANT" | 340 | variant = "INAT_VARIANT" |
339 | } else if (match(ext, lprefix3_expr)) { | 341 | } |
342 | if (match(ext, lprefix3_expr)) { | ||
340 | lptable3[idx] = add_flags(lptable3[idx],flags) | 343 | lptable3[idx] = add_flags(lptable3[idx],flags) |
341 | variant = "INAT_VARIANT" | 344 | variant = "INAT_VARIANT" |
342 | } else { | 345 | } |
346 | if (!match(ext, lprefix_expr)){ | ||
343 | table[idx] = add_flags(table[idx],flags) | 347 | table[idx] = add_flags(table[idx],flags) |
344 | } | 348 | } |
345 | } | 349 | } |
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index b43cfcd9bf4..5a1847d6193 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c | |||
@@ -60,12 +60,31 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { | |||
60 | "__x86_cpu_dev_(start|end)|" | 60 | "__x86_cpu_dev_(start|end)|" |
61 | "(__parainstructions|__alt_instructions)(|_end)|" | 61 | "(__parainstructions|__alt_instructions)(|_end)|" |
62 | "(__iommu_table|__apicdrivers|__smp_locks)(|_end)|" | 62 | "(__iommu_table|__apicdrivers|__smp_locks)(|_end)|" |
63 | "__(start|end)_pci_.*|" | ||
64 | "__(start|end)_builtin_fw|" | ||
65 | "__(start|stop)___ksymtab(|_gpl|_unused|_unused_gpl|_gpl_future)|" | ||
66 | "__(start|stop)___kcrctab(|_gpl|_unused|_unused_gpl|_gpl_future)|" | ||
67 | "__(start|stop)___param|" | ||
68 | "__(start|stop)___modver|" | ||
69 | "__(start|stop)___bug_table|" | ||
70 | "__tracedata_(start|end)|" | ||
71 | "__(start|stop)_notes|" | ||
72 | "__end_rodata|" | ||
73 | "__initramfs_start|" | ||
74 | "(jiffies|jiffies_64)|" | ||
63 | "_end)$" | 75 | "_end)$" |
64 | }; | 76 | }; |
65 | 77 | ||
66 | 78 | ||
67 | static const char * const sym_regex_realmode[S_NSYMTYPES] = { | 79 | static const char * const sym_regex_realmode[S_NSYMTYPES] = { |
68 | /* | 80 | /* |
81 | * These symbols are known to be relative, even if the linker marks them | ||
82 | * as absolute (typically defined outside any section in the linker script.) | ||
83 | */ | ||
84 | [S_REL] = | ||
85 | "^pa_", | ||
86 | |||
87 | /* | ||
69 | * These are 16-bit segment symbols when compiling 16-bit code. | 88 | * These are 16-bit segment symbols when compiling 16-bit code. |
70 | */ | 89 | */ |
71 | [S_SEG] = | 90 | [S_SEG] = |
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index bb0fb03b9f8..a508cea1350 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c | |||
@@ -486,7 +486,6 @@ long sys_sigreturn(struct pt_regs *regs) | |||
486 | copy_from_user(&set.sig[1], extramask, sig_size)) | 486 | copy_from_user(&set.sig[1], extramask, sig_size)) |
487 | goto segfault; | 487 | goto segfault; |
488 | 488 | ||
489 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
490 | set_current_blocked(&set); | 489 | set_current_blocked(&set); |
491 | 490 | ||
492 | if (copy_sc_from_user(¤t->thread.regs, sc)) | 491 | if (copy_sc_from_user(¤t->thread.regs, sc)) |
@@ -600,7 +599,6 @@ long sys_rt_sigreturn(struct pt_regs *regs) | |||
600 | if (copy_from_user(&set, &uc->uc_sigmask, sizeof(set))) | 599 | if (copy_from_user(&set, &uc->uc_sigmask, sizeof(set))) |
601 | goto segfault; | 600 | goto segfault; |
602 | 601 | ||
603 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
604 | set_current_blocked(&set); | 602 | set_current_blocked(&set); |
605 | 603 | ||
606 | if (copy_sc_from_user(¤t->thread.regs, &uc->uc_mcontext)) | 604 | if (copy_sc_from_user(¤t->thread.regs, &uc->uc_mcontext)) |
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 416bd40c0eb..68d1dc91b37 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c | |||
@@ -39,9 +39,9 @@ | |||
39 | #undef __SYSCALL_I386 | 39 | #undef __SYSCALL_I386 |
40 | #define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym, | 40 | #define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym, |
41 | 41 | ||
42 | typedef void (*sys_call_ptr_t)(void); | 42 | typedef asmlinkage void (*sys_call_ptr_t)(void); |
43 | 43 | ||
44 | extern void sys_ni_syscall(void); | 44 | extern asmlinkage void sys_ni_syscall(void); |
45 | 45 | ||
46 | const sys_call_ptr_t sys_call_table[] __cacheline_aligned = { | 46 | const sys_call_ptr_t sys_call_table[] __cacheline_aligned = { |
47 | /* | 47 | /* |
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 66e6d935982..0faad646f5f 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -205,9 +205,9 @@ void syscall32_cpu_init(void) | |||
205 | { | 205 | { |
206 | /* Load these always in case some future AMD CPU supports | 206 | /* Load these always in case some future AMD CPU supports |
207 | SYSENTER from compat mode too. */ | 207 | SYSENTER from compat mode too. */ |
208 | checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); | 208 | wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); |
209 | checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL); | 209 | wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); |
210 | checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); | 210 | wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); |
211 | 211 | ||
212 | wrmsrl(MSR_CSTAR, ia32_cstar_target); | 212 | wrmsrl(MSR_CSTAR, ia32_cstar_target); |
213 | } | 213 | } |
diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c index c5ffb6ac870..d5644bbe8cb 100644 --- a/arch/x86/video/fbdev.c +++ b/arch/x86/video/fbdev.c | |||
@@ -9,24 +9,34 @@ | |||
9 | #include <linux/fb.h> | 9 | #include <linux/fb.h> |
10 | #include <linux/pci.h> | 10 | #include <linux/pci.h> |
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/vgaarb.h> | ||
12 | 13 | ||
13 | int fb_is_primary_device(struct fb_info *info) | 14 | int fb_is_primary_device(struct fb_info *info) |
14 | { | 15 | { |
15 | struct device *device = info->device; | 16 | struct device *device = info->device; |
16 | struct pci_dev *pci_dev = NULL; | 17 | struct pci_dev *pci_dev = NULL; |
18 | struct pci_dev *default_device = vga_default_device(); | ||
17 | struct resource *res = NULL; | 19 | struct resource *res = NULL; |
18 | int retval = 0; | ||
19 | 20 | ||
20 | if (device) | 21 | if (device) |
21 | pci_dev = to_pci_dev(device); | 22 | pci_dev = to_pci_dev(device); |
22 | 23 | ||
23 | if (pci_dev) | 24 | if (!pci_dev) |
24 | res = &pci_dev->resource[PCI_ROM_RESOURCE]; | 25 | return 0; |
26 | |||
27 | if (default_device) { | ||
28 | if (pci_dev == default_device) | ||
29 | return 1; | ||
30 | else | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | res = &pci_dev->resource[PCI_ROM_RESOURCE]; | ||
25 | 35 | ||
26 | if (res && res->flags & IORESOURCE_ROM_SHADOW) | 36 | if (res && res->flags & IORESOURCE_ROM_SHADOW) |
27 | retval = 1; | 37 | return 1; |
28 | 38 | ||
29 | return retval; | 39 | return 0; |
30 | } | 40 | } |
31 | EXPORT_SYMBOL(fb_is_primary_device); | 41 | EXPORT_SYMBOL(fb_is_primary_device); |
32 | MODULE_LICENSE("GPL"); | 42 | MODULE_LICENSE("GPL"); |
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c index ef1db1900d8..c8377fb26cd 100644 --- a/arch/x86/xen/debugfs.c +++ b/arch/x86/xen/debugfs.c | |||
@@ -19,107 +19,3 @@ struct dentry * __init xen_init_debugfs(void) | |||
19 | return d_xen_debug; | 19 | return d_xen_debug; |
20 | } | 20 | } |
21 | 21 | ||
22 | struct array_data | ||
23 | { | ||
24 | void *array; | ||
25 | unsigned elements; | ||
26 | }; | ||
27 | |||
28 | static int u32_array_open(struct inode *inode, struct file *file) | ||
29 | { | ||
30 | file->private_data = NULL; | ||
31 | return nonseekable_open(inode, file); | ||
32 | } | ||
33 | |||
34 | static size_t format_array(char *buf, size_t bufsize, const char *fmt, | ||
35 | u32 *array, unsigned array_size) | ||
36 | { | ||
37 | size_t ret = 0; | ||
38 | unsigned i; | ||
39 | |||
40 | for(i = 0; i < array_size; i++) { | ||
41 | size_t len; | ||
42 | |||
43 | len = snprintf(buf, bufsize, fmt, array[i]); | ||
44 | len++; /* ' ' or '\n' */ | ||
45 | ret += len; | ||
46 | |||
47 | if (buf) { | ||
48 | buf += len; | ||
49 | bufsize -= len; | ||
50 | buf[-1] = (i == array_size-1) ? '\n' : ' '; | ||
51 | } | ||
52 | } | ||
53 | |||
54 | ret++; /* \0 */ | ||
55 | if (buf) | ||
56 | *buf = '\0'; | ||
57 | |||
58 | return ret; | ||
59 | } | ||
60 | |||
61 | static char *format_array_alloc(const char *fmt, u32 *array, unsigned array_size) | ||
62 | { | ||
63 | size_t len = format_array(NULL, 0, fmt, array, array_size); | ||
64 | char *ret; | ||
65 | |||
66 | ret = kmalloc(len, GFP_KERNEL); | ||
67 | if (ret == NULL) | ||
68 | return NULL; | ||
69 | |||
70 | format_array(ret, len, fmt, array, array_size); | ||
71 | return ret; | ||
72 | } | ||
73 | |||
74 | static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, | ||
75 | loff_t *ppos) | ||
76 | { | ||
77 | struct inode *inode = file->f_path.dentry->d_inode; | ||
78 | struct array_data *data = inode->i_private; | ||
79 | size_t size; | ||
80 | |||
81 | if (*ppos == 0) { | ||
82 | if (file->private_data) { | ||
83 | kfree(file->private_data); | ||
84 | file->private_data = NULL; | ||
85 | } | ||
86 | |||
87 | file->private_data = format_array_alloc("%u", data->array, data->elements); | ||
88 | } | ||
89 | |||
90 | size = 0; | ||
91 | if (file->private_data) | ||
92 | size = strlen(file->private_data); | ||
93 | |||
94 | return simple_read_from_buffer(buf, len, ppos, file->private_data, size); | ||
95 | } | ||
96 | |||
97 | static int xen_array_release(struct inode *inode, struct file *file) | ||
98 | { | ||
99 | kfree(file->private_data); | ||
100 | |||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static const struct file_operations u32_array_fops = { | ||
105 | .owner = THIS_MODULE, | ||
106 | .open = u32_array_open, | ||
107 | .release= xen_array_release, | ||
108 | .read = u32_array_read, | ||
109 | .llseek = no_llseek, | ||
110 | }; | ||
111 | |||
112 | struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode, | ||
113 | struct dentry *parent, | ||
114 | u32 *array, unsigned elements) | ||
115 | { | ||
116 | struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL); | ||
117 | |||
118 | if (data == NULL) | ||
119 | return NULL; | ||
120 | |||
121 | data->array = array; | ||
122 | data->elements = elements; | ||
123 | |||
124 | return debugfs_create_file(name, mode, parent, data, &u32_array_fops); | ||
125 | } | ||
diff --git a/arch/x86/xen/debugfs.h b/arch/x86/xen/debugfs.h index 78d25499be5..12ebf3325c7 100644 --- a/arch/x86/xen/debugfs.h +++ b/arch/x86/xen/debugfs.h | |||
@@ -3,8 +3,4 @@ | |||
3 | 3 | ||
4 | struct dentry * __init xen_init_debugfs(void); | 4 | struct dentry * __init xen_init_debugfs(void); |
5 | 5 | ||
6 | struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode, | ||
7 | struct dentry *parent, | ||
8 | u32 *array, unsigned elements); | ||
9 | |||
10 | #endif /* _XEN_DEBUGFS_H */ | 6 | #endif /* _XEN_DEBUGFS_H */ |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c0f5facdb10..bf4bda6d3e9 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/pci.h> | 31 | #include <linux/pci.h> |
32 | #include <linux/gfp.h> | 32 | #include <linux/gfp.h> |
33 | #include <linux/memblock.h> | 33 | #include <linux/memblock.h> |
34 | #include <linux/syscore_ops.h> | ||
34 | 35 | ||
35 | #include <xen/xen.h> | 36 | #include <xen/xen.h> |
36 | #include <xen/interface/xen.h> | 37 | #include <xen/interface/xen.h> |
@@ -38,10 +39,12 @@ | |||
38 | #include <xen/interface/physdev.h> | 39 | #include <xen/interface/physdev.h> |
39 | #include <xen/interface/vcpu.h> | 40 | #include <xen/interface/vcpu.h> |
40 | #include <xen/interface/memory.h> | 41 | #include <xen/interface/memory.h> |
42 | #include <xen/interface/xen-mca.h> | ||
41 | #include <xen/features.h> | 43 | #include <xen/features.h> |
42 | #include <xen/page.h> | 44 | #include <xen/page.h> |
43 | #include <xen/hvm.h> | 45 | #include <xen/hvm.h> |
44 | #include <xen/hvc-console.h> | 46 | #include <xen/hvc-console.h> |
47 | #include <xen/acpi.h> | ||
45 | 48 | ||
46 | #include <asm/paravirt.h> | 49 | #include <asm/paravirt.h> |
47 | #include <asm/apic.h> | 50 | #include <asm/apic.h> |
@@ -75,6 +78,7 @@ | |||
75 | 78 | ||
76 | #include "xen-ops.h" | 79 | #include "xen-ops.h" |
77 | #include "mmu.h" | 80 | #include "mmu.h" |
81 | #include "smp.h" | ||
78 | #include "multicalls.h" | 82 | #include "multicalls.h" |
79 | 83 | ||
80 | EXPORT_SYMBOL_GPL(hypercall_page); | 84 | EXPORT_SYMBOL_GPL(hypercall_page); |
@@ -105,7 +109,7 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback); | |||
105 | * Point at some empty memory to start with. We map the real shared_info | 109 | * Point at some empty memory to start with. We map the real shared_info |
106 | * page as soon as fixmap is up and running. | 110 | * page as soon as fixmap is up and running. |
107 | */ | 111 | */ |
108 | struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; | 112 | struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info; |
109 | 113 | ||
110 | /* | 114 | /* |
111 | * Flag to determine whether vcpu info placement is available on all | 115 | * Flag to determine whether vcpu info placement is available on all |
@@ -122,6 +126,19 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; | |||
122 | */ | 126 | */ |
123 | static int have_vcpu_info_placement = 1; | 127 | static int have_vcpu_info_placement = 1; |
124 | 128 | ||
129 | struct tls_descs { | ||
130 | struct desc_struct desc[3]; | ||
131 | }; | ||
132 | |||
133 | /* | ||
134 | * Updating the 3 TLS descriptors in the GDT on every task switch is | ||
135 | * surprisingly expensive so we avoid updating them if they haven't | ||
136 | * changed. Since Xen writes different descriptors than the one | ||
137 | * passed in the update_descriptor hypercall we keep shadow copies to | ||
138 | * compare against. | ||
139 | */ | ||
140 | static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); | ||
141 | |||
125 | static void clamp_max_cpus(void) | 142 | static void clamp_max_cpus(void) |
126 | { | 143 | { |
127 | #ifdef CONFIG_SMP | 144 | #ifdef CONFIG_SMP |
@@ -207,6 +224,9 @@ static void __init xen_banner(void) | |||
207 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); | 224 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); |
208 | } | 225 | } |
209 | 226 | ||
227 | #define CPUID_THERM_POWER_LEAF 6 | ||
228 | #define APERFMPERF_PRESENT 0 | ||
229 | |||
210 | static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; | 230 | static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; |
211 | static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; | 231 | static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; |
212 | 232 | ||
@@ -240,6 +260,11 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, | |||
240 | *dx = cpuid_leaf5_edx_val; | 260 | *dx = cpuid_leaf5_edx_val; |
241 | return; | 261 | return; |
242 | 262 | ||
263 | case CPUID_THERM_POWER_LEAF: | ||
264 | /* Disabling APERFMPERF for kernel usage */ | ||
265 | maskecx = ~(1 << APERFMPERF_PRESENT); | ||
266 | break; | ||
267 | |||
243 | case 0xb: | 268 | case 0xb: |
244 | /* Suppress extended topology stuff */ | 269 | /* Suppress extended topology stuff */ |
245 | maskebx = 0; | 270 | maskebx = 0; |
@@ -331,9 +356,7 @@ static void __init xen_init_cpuid_mask(void) | |||
331 | unsigned int xsave_mask; | 356 | unsigned int xsave_mask; |
332 | 357 | ||
333 | cpuid_leaf1_edx_mask = | 358 | cpuid_leaf1_edx_mask = |
334 | ~((1 << X86_FEATURE_MCE) | /* disable MCE */ | 359 | ~((1 << X86_FEATURE_MTRR) | /* disable MTRR */ |
335 | (1 << X86_FEATURE_MCA) | /* disable MCA */ | ||
336 | (1 << X86_FEATURE_MTRR) | /* disable MTRR */ | ||
337 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ | 360 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ |
338 | 361 | ||
339 | if (!xen_initial_domain()) | 362 | if (!xen_initial_domain()) |
@@ -530,12 +553,28 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) | |||
530 | BUG(); | 553 | BUG(); |
531 | } | 554 | } |
532 | 555 | ||
556 | static inline bool desc_equal(const struct desc_struct *d1, | ||
557 | const struct desc_struct *d2) | ||
558 | { | ||
559 | return d1->a == d2->a && d1->b == d2->b; | ||
560 | } | ||
561 | |||
533 | static void load_TLS_descriptor(struct thread_struct *t, | 562 | static void load_TLS_descriptor(struct thread_struct *t, |
534 | unsigned int cpu, unsigned int i) | 563 | unsigned int cpu, unsigned int i) |
535 | { | 564 | { |
536 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | 565 | struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i]; |
537 | xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); | 566 | struct desc_struct *gdt; |
538 | struct multicall_space mc = __xen_mc_entry(0); | 567 | xmaddr_t maddr; |
568 | struct multicall_space mc; | ||
569 | |||
570 | if (desc_equal(shadow, &t->tls_array[i])) | ||
571 | return; | ||
572 | |||
573 | *shadow = t->tls_array[i]; | ||
574 | |||
575 | gdt = get_cpu_gdt_table(cpu); | ||
576 | maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); | ||
577 | mc = __xen_mc_entry(0); | ||
539 | 578 | ||
540 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); | 579 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); |
541 | } | 580 | } |
@@ -617,8 +656,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
617 | /* | 656 | /* |
618 | * Look for known traps using IST, and substitute them | 657 | * Look for known traps using IST, and substitute them |
619 | * appropriately. The debugger ones are the only ones we care | 658 | * appropriately. The debugger ones are the only ones we care |
620 | * about. Xen will handle faults like double_fault and | 659 | * about. Xen will handle faults like double_fault, |
621 | * machine_check, so we should never see them. Warn if | 660 | * so we should never see them. Warn if |
622 | * there's an unexpected IST-using fault handler. | 661 | * there's an unexpected IST-using fault handler. |
623 | */ | 662 | */ |
624 | if (addr == (unsigned long)debug) | 663 | if (addr == (unsigned long)debug) |
@@ -633,7 +672,11 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
633 | return 0; | 672 | return 0; |
634 | #ifdef CONFIG_X86_MCE | 673 | #ifdef CONFIG_X86_MCE |
635 | } else if (addr == (unsigned long)machine_check) { | 674 | } else if (addr == (unsigned long)machine_check) { |
636 | return 0; | 675 | /* |
676 | * when xen hypervisor inject vMCE to guest, | ||
677 | * use native mce handler to handle it | ||
678 | */ | ||
679 | ; | ||
637 | #endif | 680 | #endif |
638 | } else { | 681 | } else { |
639 | /* Some other trap using IST? */ | 682 | /* Some other trap using IST? */ |
@@ -883,6 +926,14 @@ static void set_xen_basic_apic_ops(void) | |||
883 | apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; | 926 | apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; |
884 | apic->set_apic_id = xen_set_apic_id; | 927 | apic->set_apic_id = xen_set_apic_id; |
885 | apic->get_apic_id = xen_get_apic_id; | 928 | apic->get_apic_id = xen_get_apic_id; |
929 | |||
930 | #ifdef CONFIG_SMP | ||
931 | apic->send_IPI_allbutself = xen_send_IPI_allbutself; | ||
932 | apic->send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself; | ||
933 | apic->send_IPI_mask = xen_send_IPI_mask; | ||
934 | apic->send_IPI_all = xen_send_IPI_all; | ||
935 | apic->send_IPI_self = xen_send_IPI_self; | ||
936 | #endif | ||
886 | } | 937 | } |
887 | 938 | ||
888 | #endif | 939 | #endif |
@@ -1107,6 +1158,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { | |||
1107 | 1158 | ||
1108 | .read_msr = native_read_msr_safe, | 1159 | .read_msr = native_read_msr_safe, |
1109 | .write_msr = xen_write_msr_safe, | 1160 | .write_msr = xen_write_msr_safe, |
1161 | |||
1110 | .read_tsc = native_read_tsc, | 1162 | .read_tsc = native_read_tsc, |
1111 | .read_pmc = native_read_pmc, | 1163 | .read_pmc = native_read_pmc, |
1112 | 1164 | ||
@@ -1340,7 +1392,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
1340 | 1392 | ||
1341 | xen_raw_console_write("mapping kernel into physical memory\n"); | 1393 | xen_raw_console_write("mapping kernel into physical memory\n"); |
1342 | pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); | 1394 | pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); |
1343 | xen_ident_map_ISA(); | ||
1344 | 1395 | ||
1345 | /* Allocate and initialize top and mid mfn levels for p2m structure */ | 1396 | /* Allocate and initialize top and mid mfn levels for p2m structure */ |
1346 | xen_build_mfn_list_list(); | 1397 | xen_build_mfn_list_list(); |
@@ -1400,6 +1451,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1400 | 1451 | ||
1401 | /* Make sure ACS will be enabled */ | 1452 | /* Make sure ACS will be enabled */ |
1402 | pci_request_acs(); | 1453 | pci_request_acs(); |
1454 | |||
1455 | xen_acpi_sleep_register(); | ||
1403 | } | 1456 | } |
1404 | #ifdef CONFIG_PCI | 1457 | #ifdef CONFIG_PCI |
1405 | /* PCI BIOS service won't work from a PV guest. */ | 1458 | /* PCI BIOS service won't work from a PV guest. */ |
@@ -1417,64 +1470,155 @@ asmlinkage void __init xen_start_kernel(void) | |||
1417 | #endif | 1470 | #endif |
1418 | } | 1471 | } |
1419 | 1472 | ||
1420 | static int init_hvm_pv_info(int *major, int *minor) | 1473 | #ifdef CONFIG_XEN_PVHVM |
1421 | { | 1474 | /* |
1422 | uint32_t eax, ebx, ecx, edx, pages, msr, base; | 1475 | * The pfn containing the shared_info is located somewhere in RAM. This |
1423 | u64 pfn; | 1476 | * will cause trouble if the current kernel is doing a kexec boot into a |
1424 | 1477 | * new kernel. The new kernel (and its startup code) can not know where | |
1425 | base = xen_cpuid_base(); | 1478 | * the pfn is, so it can not reserve the page. The hypervisor will |
1426 | cpuid(base + 1, &eax, &ebx, &ecx, &edx); | 1479 | * continue to update the pfn, and as a result memory corruption occours |
1427 | 1480 | * in the new kernel. | |
1428 | *major = eax >> 16; | 1481 | * |
1429 | *minor = eax & 0xffff; | 1482 | * One way to work around this issue is to allocate a page in the |
1430 | printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor); | 1483 | * xen-platform pci device's BAR memory range. But pci init is done very |
1431 | 1484 | * late and the shared_info page is already in use very early to read | |
1432 | cpuid(base + 2, &pages, &msr, &ecx, &edx); | 1485 | * the pvclock. So moving the pfn from RAM to MMIO is racy because some |
1433 | 1486 | * code paths on other vcpus could access the pfn during the small | |
1434 | pfn = __pa(hypercall_page); | 1487 | * window when the old pfn is moved to the new pfn. There is even a |
1435 | wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); | 1488 | * small window were the old pfn is not backed by a mfn, and during that |
1436 | 1489 | * time all reads return -1. | |
1437 | xen_setup_features(); | 1490 | * |
1438 | 1491 | * Because it is not known upfront where the MMIO region is located it | |
1439 | pv_info.name = "Xen HVM"; | 1492 | * can not be used right from the start in xen_hvm_init_shared_info. |
1440 | 1493 | * | |
1441 | xen_domain_type = XEN_HVM_DOMAIN; | 1494 | * To minimise trouble the move of the pfn is done shortly before kexec. |
1495 | * This does not eliminate the race because all vcpus are still online | ||
1496 | * when the syscore_ops will be called. But hopefully there is no work | ||
1497 | * pending at this point in time. Also the syscore_op is run last which | ||
1498 | * reduces the risk further. | ||
1499 | */ | ||
1442 | 1500 | ||
1443 | return 0; | 1501 | static struct shared_info *xen_hvm_shared_info; |
1444 | } | ||
1445 | 1502 | ||
1446 | void __ref xen_hvm_init_shared_info(void) | 1503 | static void xen_hvm_connect_shared_info(unsigned long pfn) |
1447 | { | 1504 | { |
1448 | int cpu; | ||
1449 | struct xen_add_to_physmap xatp; | 1505 | struct xen_add_to_physmap xatp; |
1450 | static struct shared_info *shared_info_page = 0; | ||
1451 | 1506 | ||
1452 | if (!shared_info_page) | ||
1453 | shared_info_page = (struct shared_info *) | ||
1454 | extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
1455 | xatp.domid = DOMID_SELF; | 1507 | xatp.domid = DOMID_SELF; |
1456 | xatp.idx = 0; | 1508 | xatp.idx = 0; |
1457 | xatp.space = XENMAPSPACE_shared_info; | 1509 | xatp.space = XENMAPSPACE_shared_info; |
1458 | xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; | 1510 | xatp.gpfn = pfn; |
1459 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) | 1511 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) |
1460 | BUG(); | 1512 | BUG(); |
1461 | 1513 | ||
1462 | HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; | 1514 | } |
1515 | static void xen_hvm_set_shared_info(struct shared_info *sip) | ||
1516 | { | ||
1517 | int cpu; | ||
1518 | |||
1519 | HYPERVISOR_shared_info = sip; | ||
1463 | 1520 | ||
1464 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info | 1521 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info |
1465 | * page, we use it in the event channel upcall and in some pvclock | 1522 | * page, we use it in the event channel upcall and in some pvclock |
1466 | * related functions. We don't need the vcpu_info placement | 1523 | * related functions. We don't need the vcpu_info placement |
1467 | * optimizations because we don't use any pv_mmu or pv_irq op on | 1524 | * optimizations because we don't use any pv_mmu or pv_irq op on |
1468 | * HVM. | 1525 | * HVM. |
1469 | * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is | 1526 | * When xen_hvm_set_shared_info is run at boot time only vcpu 0 is |
1470 | * online but xen_hvm_init_shared_info is run at resume time too and | 1527 | * online but xen_hvm_set_shared_info is run at resume time too and |
1471 | * in that case multiple vcpus might be online. */ | 1528 | * in that case multiple vcpus might be online. */ |
1472 | for_each_online_cpu(cpu) { | 1529 | for_each_online_cpu(cpu) { |
1473 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 1530 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
1474 | } | 1531 | } |
1475 | } | 1532 | } |
1476 | 1533 | ||
1477 | #ifdef CONFIG_XEN_PVHVM | 1534 | /* Reconnect the shared_info pfn to a mfn */ |
1535 | void xen_hvm_resume_shared_info(void) | ||
1536 | { | ||
1537 | xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); | ||
1538 | } | ||
1539 | |||
1540 | #ifdef CONFIG_KEXEC | ||
1541 | static struct shared_info *xen_hvm_shared_info_kexec; | ||
1542 | static unsigned long xen_hvm_shared_info_pfn_kexec; | ||
1543 | |||
1544 | /* Remember a pfn in MMIO space for kexec reboot */ | ||
1545 | void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn) | ||
1546 | { | ||
1547 | xen_hvm_shared_info_kexec = sip; | ||
1548 | xen_hvm_shared_info_pfn_kexec = pfn; | ||
1549 | } | ||
1550 | |||
1551 | static void xen_hvm_syscore_shutdown(void) | ||
1552 | { | ||
1553 | struct xen_memory_reservation reservation = { | ||
1554 | .domid = DOMID_SELF, | ||
1555 | .nr_extents = 1, | ||
1556 | }; | ||
1557 | unsigned long prev_pfn; | ||
1558 | int rc; | ||
1559 | |||
1560 | if (!xen_hvm_shared_info_kexec) | ||
1561 | return; | ||
1562 | |||
1563 | prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT; | ||
1564 | set_xen_guest_handle(reservation.extent_start, &prev_pfn); | ||
1565 | |||
1566 | /* Move pfn to MMIO, disconnects previous pfn from mfn */ | ||
1567 | xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec); | ||
1568 | |||
1569 | /* Update pointers, following hypercall is also a memory barrier */ | ||
1570 | xen_hvm_set_shared_info(xen_hvm_shared_info_kexec); | ||
1571 | |||
1572 | /* Allocate new mfn for previous pfn */ | ||
1573 | do { | ||
1574 | rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); | ||
1575 | if (rc == 0) | ||
1576 | msleep(123); | ||
1577 | } while (rc == 0); | ||
1578 | |||
1579 | /* Make sure the previous pfn is really connected to a (new) mfn */ | ||
1580 | BUG_ON(rc != 1); | ||
1581 | } | ||
1582 | |||
1583 | static struct syscore_ops xen_hvm_syscore_ops = { | ||
1584 | .shutdown = xen_hvm_syscore_shutdown, | ||
1585 | }; | ||
1586 | #endif | ||
1587 | |||
1588 | /* Use a pfn in RAM, may move to MMIO before kexec. */ | ||
1589 | static void __init xen_hvm_init_shared_info(void) | ||
1590 | { | ||
1591 | /* Remember pointer for resume */ | ||
1592 | xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
1593 | xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); | ||
1594 | xen_hvm_set_shared_info(xen_hvm_shared_info); | ||
1595 | } | ||
1596 | |||
1597 | static void __init init_hvm_pv_info(void) | ||
1598 | { | ||
1599 | int major, minor; | ||
1600 | uint32_t eax, ebx, ecx, edx, pages, msr, base; | ||
1601 | u64 pfn; | ||
1602 | |||
1603 | base = xen_cpuid_base(); | ||
1604 | cpuid(base + 1, &eax, &ebx, &ecx, &edx); | ||
1605 | |||
1606 | major = eax >> 16; | ||
1607 | minor = eax & 0xffff; | ||
1608 | printk(KERN_INFO "Xen version %d.%d.\n", major, minor); | ||
1609 | |||
1610 | cpuid(base + 2, &pages, &msr, &ecx, &edx); | ||
1611 | |||
1612 | pfn = __pa(hypercall_page); | ||
1613 | wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); | ||
1614 | |||
1615 | xen_setup_features(); | ||
1616 | |||
1617 | pv_info.name = "Xen HVM"; | ||
1618 | |||
1619 | xen_domain_type = XEN_HVM_DOMAIN; | ||
1620 | } | ||
1621 | |||
1478 | static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, | 1622 | static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, |
1479 | unsigned long action, void *hcpu) | 1623 | unsigned long action, void *hcpu) |
1480 | { | 1624 | { |
@@ -1497,14 +1641,12 @@ static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = { | |||
1497 | 1641 | ||
1498 | static void __init xen_hvm_guest_init(void) | 1642 | static void __init xen_hvm_guest_init(void) |
1499 | { | 1643 | { |
1500 | int r; | 1644 | init_hvm_pv_info(); |
1501 | int major, minor; | ||
1502 | |||
1503 | r = init_hvm_pv_info(&major, &minor); | ||
1504 | if (r < 0) | ||
1505 | return; | ||
1506 | 1645 | ||
1507 | xen_hvm_init_shared_info(); | 1646 | xen_hvm_init_shared_info(); |
1647 | #ifdef CONFIG_KEXEC | ||
1648 | register_syscore_ops(&xen_hvm_syscore_ops); | ||
1649 | #endif | ||
1508 | 1650 | ||
1509 | if (xen_feature(XENFEAT_hvm_callback_vector)) | 1651 | if (xen_feature(XENFEAT_hvm_callback_vector)) |
1510 | xen_have_vector_callback = 1; | 1652 | xen_have_vector_callback = 1; |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3506cd4f9a4..27336dfcda8 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -308,8 +308,20 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) | |||
308 | 308 | ||
309 | static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) | 309 | static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) |
310 | { | 310 | { |
311 | if (!xen_batched_set_pte(ptep, pteval)) | 311 | if (!xen_batched_set_pte(ptep, pteval)) { |
312 | native_set_pte(ptep, pteval); | 312 | /* |
313 | * Could call native_set_pte() here and trap and | ||
314 | * emulate the PTE write but with 32-bit guests this | ||
315 | * needs two traps (one for each of the two 32-bit | ||
316 | * words in the PTE) so do one hypercall directly | ||
317 | * instead. | ||
318 | */ | ||
319 | struct mmu_update u; | ||
320 | |||
321 | u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE; | ||
322 | u.val = pte_val_ma(pteval); | ||
323 | HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF); | ||
324 | } | ||
313 | } | 325 | } |
314 | 326 | ||
315 | static void xen_set_pte(pte_t *ptep, pte_t pteval) | 327 | static void xen_set_pte(pte_t *ptep, pte_t pteval) |
@@ -1416,13 +1428,28 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1416 | } | 1428 | } |
1417 | #endif /* CONFIG_X86_64 */ | 1429 | #endif /* CONFIG_X86_64 */ |
1418 | 1430 | ||
1419 | /* Init-time set_pte while constructing initial pagetables, which | 1431 | /* |
1420 | doesn't allow RO pagetable pages to be remapped RW */ | 1432 | * Init-time set_pte while constructing initial pagetables, which |
1433 | * doesn't allow RO page table pages to be remapped RW. | ||
1434 | * | ||
1435 | * If there is no MFN for this PFN then this page is initially | ||
1436 | * ballooned out so clear the PTE (as in decrease_reservation() in | ||
1437 | * drivers/xen/balloon.c). | ||
1438 | * | ||
1439 | * Many of these PTE updates are done on unpinned and writable pages | ||
1440 | * and doing a hypercall for these is unnecessary and expensive. At | ||
1441 | * this point it is not possible to tell if a page is pinned or not, | ||
1442 | * so always write the PTE directly and rely on Xen trapping and | ||
1443 | * emulating any updates as necessary. | ||
1444 | */ | ||
1421 | static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) | 1445 | static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) |
1422 | { | 1446 | { |
1423 | pte = mask_rw_pte(ptep, pte); | 1447 | if (pte_mfn(pte) != INVALID_P2M_ENTRY) |
1448 | pte = mask_rw_pte(ptep, pte); | ||
1449 | else | ||
1450 | pte = __pte_ma(0); | ||
1424 | 1451 | ||
1425 | xen_set_pte(ptep, pte); | 1452 | native_set_pte(ptep, pte); |
1426 | } | 1453 | } |
1427 | 1454 | ||
1428 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | 1455 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) |
@@ -1933,29 +1960,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
1933 | #endif | 1960 | #endif |
1934 | } | 1961 | } |
1935 | 1962 | ||
1936 | void __init xen_ident_map_ISA(void) | ||
1937 | { | ||
1938 | unsigned long pa; | ||
1939 | |||
1940 | /* | ||
1941 | * If we're dom0, then linear map the ISA machine addresses into | ||
1942 | * the kernel's address space. | ||
1943 | */ | ||
1944 | if (!xen_initial_domain()) | ||
1945 | return; | ||
1946 | |||
1947 | xen_raw_printk("Xen: setup ISA identity maps\n"); | ||
1948 | |||
1949 | for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) { | ||
1950 | pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO); | ||
1951 | |||
1952 | if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0)) | ||
1953 | BUG(); | ||
1954 | } | ||
1955 | |||
1956 | xen_flush_tlb(); | ||
1957 | } | ||
1958 | |||
1959 | static void __init xen_post_allocator_init(void) | 1963 | static void __init xen_post_allocator_init(void) |
1960 | { | 1964 | { |
1961 | pv_mmu_ops.set_pte = xen_set_pte; | 1965 | pv_mmu_ops.set_pte = xen_set_pte; |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 1b267e75158..64effdc6da9 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -499,16 +499,18 @@ static bool alloc_p2m(unsigned long pfn) | |||
499 | return true; | 499 | return true; |
500 | } | 500 | } |
501 | 501 | ||
502 | static bool __init __early_alloc_p2m(unsigned long pfn) | 502 | static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary) |
503 | { | 503 | { |
504 | unsigned topidx, mididx, idx; | 504 | unsigned topidx, mididx, idx; |
505 | unsigned long *p2m; | ||
506 | unsigned long *mid_mfn_p; | ||
505 | 507 | ||
506 | topidx = p2m_top_index(pfn); | 508 | topidx = p2m_top_index(pfn); |
507 | mididx = p2m_mid_index(pfn); | 509 | mididx = p2m_mid_index(pfn); |
508 | idx = p2m_index(pfn); | 510 | idx = p2m_index(pfn); |
509 | 511 | ||
510 | /* Pfff.. No boundary cross-over, lets get out. */ | 512 | /* Pfff.. No boundary cross-over, lets get out. */ |
511 | if (!idx) | 513 | if (!idx && check_boundary) |
512 | return false; | 514 | return false; |
513 | 515 | ||
514 | WARN(p2m_top[topidx][mididx] == p2m_identity, | 516 | WARN(p2m_top[topidx][mididx] == p2m_identity, |
@@ -522,24 +524,66 @@ static bool __init __early_alloc_p2m(unsigned long pfn) | |||
522 | return false; | 524 | return false; |
523 | 525 | ||
524 | /* Boundary cross-over for the edges: */ | 526 | /* Boundary cross-over for the edges: */ |
525 | if (idx) { | 527 | p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); |
526 | unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
527 | unsigned long *mid_mfn_p; | ||
528 | 528 | ||
529 | p2m_init(p2m); | 529 | p2m_init(p2m); |
530 | 530 | ||
531 | p2m_top[topidx][mididx] = p2m; | 531 | p2m_top[topidx][mididx] = p2m; |
532 | 532 | ||
533 | /* For save/restore we need to MFN of the P2M saved */ | 533 | /* For save/restore we need to MFN of the P2M saved */ |
534 | 534 | ||
535 | mid_mfn_p = p2m_top_mfn_p[topidx]; | 535 | mid_mfn_p = p2m_top_mfn_p[topidx]; |
536 | WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing), | 536 | WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing), |
537 | "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n", | 537 | "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n", |
538 | topidx, mididx); | 538 | topidx, mididx); |
539 | mid_mfn_p[mididx] = virt_to_mfn(p2m); | 539 | mid_mfn_p[mididx] = virt_to_mfn(p2m); |
540 | |||
541 | return true; | ||
542 | } | ||
543 | |||
544 | static bool __init early_alloc_p2m(unsigned long pfn) | ||
545 | { | ||
546 | unsigned topidx = p2m_top_index(pfn); | ||
547 | unsigned long *mid_mfn_p; | ||
548 | unsigned long **mid; | ||
549 | |||
550 | mid = p2m_top[topidx]; | ||
551 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
552 | if (mid == p2m_mid_missing) { | ||
553 | mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
554 | |||
555 | p2m_mid_init(mid); | ||
540 | 556 | ||
557 | p2m_top[topidx] = mid; | ||
558 | |||
559 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
560 | } | ||
561 | /* And the save/restore P2M tables.. */ | ||
562 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
563 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
564 | p2m_mid_mfn_init(mid_mfn_p); | ||
565 | |||
566 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
567 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
568 | /* Note: we don't set mid_mfn_p[midix] here, | ||
569 | * look in early_alloc_p2m_middle */ | ||
541 | } | 570 | } |
542 | return idx != 0; | 571 | return true; |
572 | } | ||
573 | bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
574 | { | ||
575 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | ||
576 | if (!early_alloc_p2m(pfn)) | ||
577 | return false; | ||
578 | |||
579 | if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/)) | ||
580 | return false; | ||
581 | |||
582 | if (!__set_phys_to_machine(pfn, mfn)) | ||
583 | return false; | ||
584 | } | ||
585 | |||
586 | return true; | ||
543 | } | 587 | } |
544 | unsigned long __init set_phys_range_identity(unsigned long pfn_s, | 588 | unsigned long __init set_phys_range_identity(unsigned long pfn_s, |
545 | unsigned long pfn_e) | 589 | unsigned long pfn_e) |
@@ -559,35 +603,11 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, | |||
559 | pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE)); | 603 | pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE)); |
560 | pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) | 604 | pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) |
561 | { | 605 | { |
562 | unsigned topidx = p2m_top_index(pfn); | 606 | WARN_ON(!early_alloc_p2m(pfn)); |
563 | unsigned long *mid_mfn_p; | ||
564 | unsigned long **mid; | ||
565 | |||
566 | mid = p2m_top[topidx]; | ||
567 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
568 | if (mid == p2m_mid_missing) { | ||
569 | mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
570 | |||
571 | p2m_mid_init(mid); | ||
572 | |||
573 | p2m_top[topidx] = mid; | ||
574 | |||
575 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
576 | } | ||
577 | /* And the save/restore P2M tables.. */ | ||
578 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
579 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
580 | p2m_mid_mfn_init(mid_mfn_p); | ||
581 | |||
582 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
583 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
584 | /* Note: we don't set mid_mfn_p[midix] here, | ||
585 | * look in __early_alloc_p2m */ | ||
586 | } | ||
587 | } | 607 | } |
588 | 608 | ||
589 | __early_alloc_p2m(pfn_s); | 609 | early_alloc_p2m_middle(pfn_s, true); |
590 | __early_alloc_p2m(pfn_e); | 610 | early_alloc_p2m_middle(pfn_e, true); |
591 | 611 | ||
592 | for (pfn = pfn_s; pfn < pfn_e; pfn++) | 612 | for (pfn = pfn_s; pfn < pfn_e; pfn++) |
593 | if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) | 613 | if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) |
@@ -686,6 +706,7 @@ int m2p_add_override(unsigned long mfn, struct page *page, | |||
686 | unsigned long uninitialized_var(address); | 706 | unsigned long uninitialized_var(address); |
687 | unsigned level; | 707 | unsigned level; |
688 | pte_t *ptep = NULL; | 708 | pte_t *ptep = NULL; |
709 | int ret = 0; | ||
689 | 710 | ||
690 | pfn = page_to_pfn(page); | 711 | pfn = page_to_pfn(page); |
691 | if (!PageHighMem(page)) { | 712 | if (!PageHighMem(page)) { |
@@ -721,6 +742,24 @@ int m2p_add_override(unsigned long mfn, struct page *page, | |||
721 | list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); | 742 | list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); |
722 | spin_unlock_irqrestore(&m2p_override_lock, flags); | 743 | spin_unlock_irqrestore(&m2p_override_lock, flags); |
723 | 744 | ||
745 | /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in | ||
746 | * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other | ||
747 | * pfn so that the following mfn_to_pfn(mfn) calls will return the | ||
748 | * pfn from the m2p_override (the backend pfn) instead. | ||
749 | * We need to do this because the pages shared by the frontend | ||
750 | * (xen-blkfront) can be already locked (lock_page, called by | ||
751 | * do_read_cache_page); when the userspace backend tries to use them | ||
752 | * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so | ||
753 | * do_blockdev_direct_IO is going to try to lock the same pages | ||
754 | * again resulting in a deadlock. | ||
755 | * As a side effect get_user_pages_fast might not be safe on the | ||
756 | * frontend pages while they are being shared with the backend, | ||
757 | * because mfn_to_pfn (that ends up being called by GUPF) will | ||
758 | * return the backend pfn rather than the frontend pfn. */ | ||
759 | ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); | ||
760 | if (ret == 0 && get_phys_to_machine(pfn) == mfn) | ||
761 | set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); | ||
762 | |||
724 | return 0; | 763 | return 0; |
725 | } | 764 | } |
726 | EXPORT_SYMBOL_GPL(m2p_add_override); | 765 | EXPORT_SYMBOL_GPL(m2p_add_override); |
@@ -732,6 +771,7 @@ int m2p_remove_override(struct page *page, bool clear_pte) | |||
732 | unsigned long uninitialized_var(address); | 771 | unsigned long uninitialized_var(address); |
733 | unsigned level; | 772 | unsigned level; |
734 | pte_t *ptep = NULL; | 773 | pte_t *ptep = NULL; |
774 | int ret = 0; | ||
735 | 775 | ||
736 | pfn = page_to_pfn(page); | 776 | pfn = page_to_pfn(page); |
737 | mfn = get_phys_to_machine(pfn); | 777 | mfn = get_phys_to_machine(pfn); |
@@ -801,6 +841,22 @@ int m2p_remove_override(struct page *page, bool clear_pte) | |||
801 | } else | 841 | } else |
802 | set_phys_to_machine(pfn, page->index); | 842 | set_phys_to_machine(pfn, page->index); |
803 | 843 | ||
844 | /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present | ||
845 | * somewhere in this domain, even before being added to the | ||
846 | * m2p_override (see comment above in m2p_add_override). | ||
847 | * If there are no other entries in the m2p_override corresponding | ||
848 | * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for | ||
849 | * the original pfn (the one shared by the frontend): the backend | ||
850 | * cannot do any IO on this page anymore because it has been | ||
851 | * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of | ||
852 | * the original pfn causes mfn_to_pfn(mfn) to return the frontend | ||
853 | * pfn again. */ | ||
854 | mfn &= ~FOREIGN_FRAME_BIT; | ||
855 | ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); | ||
856 | if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && | ||
857 | m2p_find_override(mfn) == NULL) | ||
858 | set_phys_to_machine(pfn, mfn); | ||
859 | |||
804 | return 0; | 860 | return 0; |
805 | } | 861 | } |
806 | EXPORT_SYMBOL_GPL(m2p_remove_override); | 862 | EXPORT_SYMBOL_GPL(m2p_remove_override); |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 1ba8dff2675..ead85576d54 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include <xen/interface/memory.h> | 26 | #include <xen/interface/memory.h> |
27 | #include <xen/interface/physdev.h> | 27 | #include <xen/interface/physdev.h> |
28 | #include <xen/features.h> | 28 | #include <xen/features.h> |
29 | |||
30 | #include "xen-ops.h" | 29 | #include "xen-ops.h" |
31 | #include "vdso.h" | 30 | #include "vdso.h" |
32 | 31 | ||
@@ -84,8 +83,8 @@ static void __init xen_add_extra_mem(u64 start, u64 size) | |||
84 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 83 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
85 | } | 84 | } |
86 | 85 | ||
87 | static unsigned long __init xen_release_chunk(unsigned long start, | 86 | static unsigned long __init xen_do_chunk(unsigned long start, |
88 | unsigned long end) | 87 | unsigned long end, bool release) |
89 | { | 88 | { |
90 | struct xen_memory_reservation reservation = { | 89 | struct xen_memory_reservation reservation = { |
91 | .address_bits = 0, | 90 | .address_bits = 0, |
@@ -96,30 +95,133 @@ static unsigned long __init xen_release_chunk(unsigned long start, | |||
96 | unsigned long pfn; | 95 | unsigned long pfn; |
97 | int ret; | 96 | int ret; |
98 | 97 | ||
99 | for(pfn = start; pfn < end; pfn++) { | 98 | for (pfn = start; pfn < end; pfn++) { |
99 | unsigned long frame; | ||
100 | unsigned long mfn = pfn_to_mfn(pfn); | 100 | unsigned long mfn = pfn_to_mfn(pfn); |
101 | 101 | ||
102 | /* Make sure pfn exists to start with */ | 102 | if (release) { |
103 | if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) | 103 | /* Make sure pfn exists to start with */ |
104 | continue; | 104 | if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) |
105 | 105 | continue; | |
106 | set_xen_guest_handle(reservation.extent_start, &mfn); | 106 | frame = mfn; |
107 | } else { | ||
108 | if (mfn != INVALID_P2M_ENTRY) | ||
109 | continue; | ||
110 | frame = pfn; | ||
111 | } | ||
112 | set_xen_guest_handle(reservation.extent_start, &frame); | ||
107 | reservation.nr_extents = 1; | 113 | reservation.nr_extents = 1; |
108 | 114 | ||
109 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | 115 | ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap, |
110 | &reservation); | 116 | &reservation); |
111 | WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); | 117 | WARN(ret != 1, "Failed to %s pfn %lx err=%d\n", |
118 | release ? "release" : "populate", pfn, ret); | ||
119 | |||
112 | if (ret == 1) { | 120 | if (ret == 1) { |
113 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 121 | if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) { |
122 | if (release) | ||
123 | break; | ||
124 | set_xen_guest_handle(reservation.extent_start, &frame); | ||
125 | reservation.nr_extents = 1; | ||
126 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | ||
127 | &reservation); | ||
128 | break; | ||
129 | } | ||
114 | len++; | 130 | len++; |
115 | } | 131 | } else |
132 | break; | ||
116 | } | 133 | } |
117 | printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n", | 134 | if (len) |
118 | start, end, len); | 135 | printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n", |
136 | release ? "Freeing" : "Populating", | ||
137 | start, end, len, | ||
138 | release ? "freed" : "added"); | ||
119 | 139 | ||
120 | return len; | 140 | return len; |
121 | } | 141 | } |
122 | 142 | ||
143 | static unsigned long __init xen_release_chunk(unsigned long start, | ||
144 | unsigned long end) | ||
145 | { | ||
146 | return xen_do_chunk(start, end, true); | ||
147 | } | ||
148 | |||
149 | static unsigned long __init xen_populate_chunk( | ||
150 | const struct e820entry *list, size_t map_size, | ||
151 | unsigned long max_pfn, unsigned long *last_pfn, | ||
152 | unsigned long credits_left) | ||
153 | { | ||
154 | const struct e820entry *entry; | ||
155 | unsigned int i; | ||
156 | unsigned long done = 0; | ||
157 | unsigned long dest_pfn; | ||
158 | |||
159 | for (i = 0, entry = list; i < map_size; i++, entry++) { | ||
160 | unsigned long s_pfn; | ||
161 | unsigned long e_pfn; | ||
162 | unsigned long pfns; | ||
163 | long capacity; | ||
164 | |||
165 | if (credits_left <= 0) | ||
166 | break; | ||
167 | |||
168 | if (entry->type != E820_RAM) | ||
169 | continue; | ||
170 | |||
171 | e_pfn = PFN_DOWN(entry->addr + entry->size); | ||
172 | |||
173 | /* We only care about E820 after the xen_start_info->nr_pages */ | ||
174 | if (e_pfn <= max_pfn) | ||
175 | continue; | ||
176 | |||
177 | s_pfn = PFN_UP(entry->addr); | ||
178 | /* If the E820 falls within the nr_pages, we want to start | ||
179 | * at the nr_pages PFN. | ||
180 | * If that would mean going past the E820 entry, skip it | ||
181 | */ | ||
182 | if (s_pfn <= max_pfn) { | ||
183 | capacity = e_pfn - max_pfn; | ||
184 | dest_pfn = max_pfn; | ||
185 | } else { | ||
186 | capacity = e_pfn - s_pfn; | ||
187 | dest_pfn = s_pfn; | ||
188 | } | ||
189 | |||
190 | if (credits_left < capacity) | ||
191 | capacity = credits_left; | ||
192 | |||
193 | pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false); | ||
194 | done += pfns; | ||
195 | *last_pfn = (dest_pfn + pfns); | ||
196 | if (pfns < capacity) | ||
197 | break; | ||
198 | credits_left -= pfns; | ||
199 | } | ||
200 | return done; | ||
201 | } | ||
202 | |||
203 | static void __init xen_set_identity_and_release_chunk( | ||
204 | unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, | ||
205 | unsigned long *released, unsigned long *identity) | ||
206 | { | ||
207 | unsigned long pfn; | ||
208 | |||
209 | /* | ||
210 | * If the PFNs are currently mapped, the VA mapping also needs | ||
211 | * to be updated to be 1:1. | ||
212 | */ | ||
213 | for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) | ||
214 | (void)HYPERVISOR_update_va_mapping( | ||
215 | (unsigned long)__va(pfn << PAGE_SHIFT), | ||
216 | mfn_pte(pfn, PAGE_KERNEL_IO), 0); | ||
217 | |||
218 | if (start_pfn < nr_pages) | ||
219 | *released += xen_release_chunk( | ||
220 | start_pfn, min(end_pfn, nr_pages)); | ||
221 | |||
222 | *identity += set_phys_range_identity(start_pfn, end_pfn); | ||
223 | } | ||
224 | |||
123 | static unsigned long __init xen_set_identity_and_release( | 225 | static unsigned long __init xen_set_identity_and_release( |
124 | const struct e820entry *list, size_t map_size, unsigned long nr_pages) | 226 | const struct e820entry *list, size_t map_size, unsigned long nr_pages) |
125 | { | 227 | { |
@@ -142,7 +244,6 @@ static unsigned long __init xen_set_identity_and_release( | |||
142 | */ | 244 | */ |
143 | for (i = 0, entry = list; i < map_size; i++, entry++) { | 245 | for (i = 0, entry = list; i < map_size; i++, entry++) { |
144 | phys_addr_t end = entry->addr + entry->size; | 246 | phys_addr_t end = entry->addr + entry->size; |
145 | |||
146 | if (entry->type == E820_RAM || i == map_size - 1) { | 247 | if (entry->type == E820_RAM || i == map_size - 1) { |
147 | unsigned long start_pfn = PFN_DOWN(start); | 248 | unsigned long start_pfn = PFN_DOWN(start); |
148 | unsigned long end_pfn = PFN_UP(end); | 249 | unsigned long end_pfn = PFN_UP(end); |
@@ -150,20 +251,19 @@ static unsigned long __init xen_set_identity_and_release( | |||
150 | if (entry->type == E820_RAM) | 251 | if (entry->type == E820_RAM) |
151 | end_pfn = PFN_UP(entry->addr); | 252 | end_pfn = PFN_UP(entry->addr); |
152 | 253 | ||
153 | if (start_pfn < end_pfn) { | 254 | if (start_pfn < end_pfn) |
154 | if (start_pfn < nr_pages) | 255 | xen_set_identity_and_release_chunk( |
155 | released += xen_release_chunk( | 256 | start_pfn, end_pfn, nr_pages, |
156 | start_pfn, min(end_pfn, nr_pages)); | 257 | &released, &identity); |
157 | 258 | ||
158 | identity += set_phys_range_identity( | ||
159 | start_pfn, end_pfn); | ||
160 | } | ||
161 | start = end; | 259 | start = end; |
162 | } | 260 | } |
163 | } | 261 | } |
164 | 262 | ||
165 | printk(KERN_INFO "Released %lu pages of unused memory\n", released); | 263 | if (released) |
166 | printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity); | 264 | printk(KERN_INFO "Released %lu pages of unused memory\n", released); |
265 | if (identity) | ||
266 | printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity); | ||
167 | 267 | ||
168 | return released; | 268 | return released; |
169 | } | 269 | } |
@@ -217,7 +317,9 @@ char * __init xen_memory_setup(void) | |||
217 | int rc; | 317 | int rc; |
218 | struct xen_memory_map memmap; | 318 | struct xen_memory_map memmap; |
219 | unsigned long max_pages; | 319 | unsigned long max_pages; |
320 | unsigned long last_pfn = 0; | ||
220 | unsigned long extra_pages = 0; | 321 | unsigned long extra_pages = 0; |
322 | unsigned long populated; | ||
221 | int i; | 323 | int i; |
222 | int op; | 324 | int op; |
223 | 325 | ||
@@ -257,8 +359,20 @@ char * __init xen_memory_setup(void) | |||
257 | */ | 359 | */ |
258 | xen_released_pages = xen_set_identity_and_release( | 360 | xen_released_pages = xen_set_identity_and_release( |
259 | map, memmap.nr_entries, max_pfn); | 361 | map, memmap.nr_entries, max_pfn); |
362 | |||
363 | /* | ||
364 | * Populate back the non-RAM pages and E820 gaps that had been | ||
365 | * released. */ | ||
366 | populated = xen_populate_chunk(map, memmap.nr_entries, | ||
367 | max_pfn, &last_pfn, xen_released_pages); | ||
368 | |||
369 | xen_released_pages -= populated; | ||
260 | extra_pages += xen_released_pages; | 370 | extra_pages += xen_released_pages; |
261 | 371 | ||
372 | if (last_pfn > max_pfn) { | ||
373 | max_pfn = min(MAX_DOMAIN_PAGES, last_pfn); | ||
374 | mem_end = PFN_PHYS(max_pfn); | ||
375 | } | ||
262 | /* | 376 | /* |
263 | * Clamp the amount of extra memory to a EXTRA_MEM_RATIO | 377 | * Clamp the amount of extra memory to a EXTRA_MEM_RATIO |
264 | * factor the base size. On non-highmem systems, the base | 378 | * factor the base size. On non-highmem systems, the base |
@@ -272,7 +386,6 @@ char * __init xen_memory_setup(void) | |||
272 | */ | 386 | */ |
273 | extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), | 387 | extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), |
274 | extra_pages); | 388 | extra_pages); |
275 | |||
276 | i = 0; | 389 | i = 0; |
277 | while (i < memmap.nr_entries) { | 390 | while (i < memmap.nr_entries) { |
278 | u64 addr = map[i].addr; | 391 | u64 addr = map[i].addr; |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 3700945ed0d..f58dca7a6e5 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/err.h> | 16 | #include <linux/err.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/smp.h> | 18 | #include <linux/smp.h> |
19 | #include <linux/irq_work.h> | ||
19 | 20 | ||
20 | #include <asm/paravirt.h> | 21 | #include <asm/paravirt.h> |
21 | #include <asm/desc.h> | 22 | #include <asm/desc.h> |
@@ -41,10 +42,12 @@ cpumask_var_t xen_cpu_initialized_map; | |||
41 | static DEFINE_PER_CPU(int, xen_resched_irq); | 42 | static DEFINE_PER_CPU(int, xen_resched_irq); |
42 | static DEFINE_PER_CPU(int, xen_callfunc_irq); | 43 | static DEFINE_PER_CPU(int, xen_callfunc_irq); |
43 | static DEFINE_PER_CPU(int, xen_callfuncsingle_irq); | 44 | static DEFINE_PER_CPU(int, xen_callfuncsingle_irq); |
45 | static DEFINE_PER_CPU(int, xen_irq_work); | ||
44 | static DEFINE_PER_CPU(int, xen_debug_irq) = -1; | 46 | static DEFINE_PER_CPU(int, xen_debug_irq) = -1; |
45 | 47 | ||
46 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); | 48 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); |
47 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); | 49 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); |
50 | static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id); | ||
48 | 51 | ||
49 | /* | 52 | /* |
50 | * Reschedule call back. | 53 | * Reschedule call back. |
@@ -77,9 +80,7 @@ static void __cpuinit cpu_bringup(void) | |||
77 | 80 | ||
78 | notify_cpu_starting(cpu); | 81 | notify_cpu_starting(cpu); |
79 | 82 | ||
80 | ipi_call_lock(); | ||
81 | set_cpu_online(cpu, true); | 83 | set_cpu_online(cpu, true); |
82 | ipi_call_unlock(); | ||
83 | 84 | ||
84 | this_cpu_write(cpu_state, CPU_ONLINE); | 85 | this_cpu_write(cpu_state, CPU_ONLINE); |
85 | 86 | ||
@@ -143,6 +144,17 @@ static int xen_smp_intr_init(unsigned int cpu) | |||
143 | goto fail; | 144 | goto fail; |
144 | per_cpu(xen_callfuncsingle_irq, cpu) = rc; | 145 | per_cpu(xen_callfuncsingle_irq, cpu) = rc; |
145 | 146 | ||
147 | callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu); | ||
148 | rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR, | ||
149 | cpu, | ||
150 | xen_irq_work_interrupt, | ||
151 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | ||
152 | callfunc_name, | ||
153 | NULL); | ||
154 | if (rc < 0) | ||
155 | goto fail; | ||
156 | per_cpu(xen_irq_work, cpu) = rc; | ||
157 | |||
146 | return 0; | 158 | return 0; |
147 | 159 | ||
148 | fail: | 160 | fail: |
@@ -155,6 +167,8 @@ static int xen_smp_intr_init(unsigned int cpu) | |||
155 | if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0) | 167 | if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0) |
156 | unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), | 168 | unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), |
157 | NULL); | 169 | NULL); |
170 | if (per_cpu(xen_irq_work, cpu) >= 0) | ||
171 | unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); | ||
158 | 172 | ||
159 | return rc; | 173 | return rc; |
160 | } | 174 | } |
@@ -407,6 +421,7 @@ static void xen_cpu_die(unsigned int cpu) | |||
407 | unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); | 421 | unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); |
408 | unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); | 422 | unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); |
409 | unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); | 423 | unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); |
424 | unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); | ||
410 | xen_uninit_lock_cpu(cpu); | 425 | xen_uninit_lock_cpu(cpu); |
411 | xen_teardown_timer(cpu); | 426 | xen_teardown_timer(cpu); |
412 | 427 | ||
@@ -469,8 +484,8 @@ static void xen_smp_send_reschedule(int cpu) | |||
469 | xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); | 484 | xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); |
470 | } | 485 | } |
471 | 486 | ||
472 | static void xen_send_IPI_mask(const struct cpumask *mask, | 487 | static void __xen_send_IPI_mask(const struct cpumask *mask, |
473 | enum ipi_vector vector) | 488 | int vector) |
474 | { | 489 | { |
475 | unsigned cpu; | 490 | unsigned cpu; |
476 | 491 | ||
@@ -482,7 +497,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask) | |||
482 | { | 497 | { |
483 | int cpu; | 498 | int cpu; |
484 | 499 | ||
485 | xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); | 500 | __xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); |
486 | 501 | ||
487 | /* Make sure other vcpus get a chance to run if they need to. */ | 502 | /* Make sure other vcpus get a chance to run if they need to. */ |
488 | for_each_cpu(cpu, mask) { | 503 | for_each_cpu(cpu, mask) { |
@@ -495,10 +510,86 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask) | |||
495 | 510 | ||
496 | static void xen_smp_send_call_function_single_ipi(int cpu) | 511 | static void xen_smp_send_call_function_single_ipi(int cpu) |
497 | { | 512 | { |
498 | xen_send_IPI_mask(cpumask_of(cpu), | 513 | __xen_send_IPI_mask(cpumask_of(cpu), |
499 | XEN_CALL_FUNCTION_SINGLE_VECTOR); | 514 | XEN_CALL_FUNCTION_SINGLE_VECTOR); |
500 | } | 515 | } |
501 | 516 | ||
517 | static inline int xen_map_vector(int vector) | ||
518 | { | ||
519 | int xen_vector; | ||
520 | |||
521 | switch (vector) { | ||
522 | case RESCHEDULE_VECTOR: | ||
523 | xen_vector = XEN_RESCHEDULE_VECTOR; | ||
524 | break; | ||
525 | case CALL_FUNCTION_VECTOR: | ||
526 | xen_vector = XEN_CALL_FUNCTION_VECTOR; | ||
527 | break; | ||
528 | case CALL_FUNCTION_SINGLE_VECTOR: | ||
529 | xen_vector = XEN_CALL_FUNCTION_SINGLE_VECTOR; | ||
530 | break; | ||
531 | case IRQ_WORK_VECTOR: | ||
532 | xen_vector = XEN_IRQ_WORK_VECTOR; | ||
533 | break; | ||
534 | default: | ||
535 | xen_vector = -1; | ||
536 | printk(KERN_ERR "xen: vector 0x%x is not implemented\n", | ||
537 | vector); | ||
538 | } | ||
539 | |||
540 | return xen_vector; | ||
541 | } | ||
542 | |||
543 | void xen_send_IPI_mask(const struct cpumask *mask, | ||
544 | int vector) | ||
545 | { | ||
546 | int xen_vector = xen_map_vector(vector); | ||
547 | |||
548 | if (xen_vector >= 0) | ||
549 | __xen_send_IPI_mask(mask, xen_vector); | ||
550 | } | ||
551 | |||
552 | void xen_send_IPI_all(int vector) | ||
553 | { | ||
554 | int xen_vector = xen_map_vector(vector); | ||
555 | |||
556 | if (xen_vector >= 0) | ||
557 | __xen_send_IPI_mask(cpu_online_mask, xen_vector); | ||
558 | } | ||
559 | |||
560 | void xen_send_IPI_self(int vector) | ||
561 | { | ||
562 | int xen_vector = xen_map_vector(vector); | ||
563 | |||
564 | if (xen_vector >= 0) | ||
565 | xen_send_IPI_one(smp_processor_id(), xen_vector); | ||
566 | } | ||
567 | |||
568 | void xen_send_IPI_mask_allbutself(const struct cpumask *mask, | ||
569 | int vector) | ||
570 | { | ||
571 | unsigned cpu; | ||
572 | unsigned int this_cpu = smp_processor_id(); | ||
573 | |||
574 | if (!(num_online_cpus() > 1)) | ||
575 | return; | ||
576 | |||
577 | for_each_cpu_and(cpu, mask, cpu_online_mask) { | ||
578 | if (this_cpu == cpu) | ||
579 | continue; | ||
580 | |||
581 | xen_smp_send_call_function_single_ipi(cpu); | ||
582 | } | ||
583 | } | ||
584 | |||
585 | void xen_send_IPI_allbutself(int vector) | ||
586 | { | ||
587 | int xen_vector = xen_map_vector(vector); | ||
588 | |||
589 | if (xen_vector >= 0) | ||
590 | xen_send_IPI_mask_allbutself(cpu_online_mask, xen_vector); | ||
591 | } | ||
592 | |||
502 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) | 593 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) |
503 | { | 594 | { |
504 | irq_enter(); | 595 | irq_enter(); |
@@ -519,6 +610,16 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) | |||
519 | return IRQ_HANDLED; | 610 | return IRQ_HANDLED; |
520 | } | 611 | } |
521 | 612 | ||
613 | static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id) | ||
614 | { | ||
615 | irq_enter(); | ||
616 | irq_work_run(); | ||
617 | inc_irq_stat(apic_irq_work_irqs); | ||
618 | irq_exit(); | ||
619 | |||
620 | return IRQ_HANDLED; | ||
621 | } | ||
622 | |||
522 | static const struct smp_ops xen_smp_ops __initconst = { | 623 | static const struct smp_ops xen_smp_ops __initconst = { |
523 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, | 624 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, |
524 | .smp_prepare_cpus = xen_smp_prepare_cpus, | 625 | .smp_prepare_cpus = xen_smp_prepare_cpus, |
@@ -565,6 +666,7 @@ static void xen_hvm_cpu_die(unsigned int cpu) | |||
565 | unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); | 666 | unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); |
566 | unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); | 667 | unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); |
567 | unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); | 668 | unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); |
669 | unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); | ||
568 | native_cpu_die(cpu); | 670 | native_cpu_die(cpu); |
569 | } | 671 | } |
570 | 672 | ||
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h new file mode 100644 index 00000000000..8981a76d081 --- /dev/null +++ b/arch/x86/xen/smp.h | |||
@@ -0,0 +1,12 @@ | |||
1 | #ifndef _XEN_SMP_H | ||
2 | |||
3 | extern void xen_send_IPI_mask(const struct cpumask *mask, | ||
4 | int vector); | ||
5 | extern void xen_send_IPI_mask_allbutself(const struct cpumask *mask, | ||
6 | int vector); | ||
7 | extern void xen_send_IPI_allbutself(int vector); | ||
8 | extern void physflat_send_IPI_allbutself(int vector); | ||
9 | extern void xen_send_IPI_all(int vector); | ||
10 | extern void xen_send_IPI_self(int vector); | ||
11 | |||
12 | #endif | ||
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index d69cc6c3f80..83e866d714c 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -440,12 +440,12 @@ static int __init xen_spinlock_debugfs(void) | |||
440 | debugfs_create_u64("time_total", 0444, d_spin_debug, | 440 | debugfs_create_u64("time_total", 0444, d_spin_debug, |
441 | &spinlock_stats.time_total); | 441 | &spinlock_stats.time_total); |
442 | 442 | ||
443 | xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug, | 443 | debugfs_create_u32_array("histo_total", 0444, d_spin_debug, |
444 | spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); | 444 | spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); |
445 | xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, | 445 | debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, |
446 | spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); | 446 | spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); |
447 | xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, | 447 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, |
448 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); | 448 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); |
449 | 449 | ||
450 | return 0; | 450 | return 0; |
451 | } | 451 | } |
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 45329c8c226..ae8a00c39de 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled) | |||
30 | { | 30 | { |
31 | #ifdef CONFIG_XEN_PVHVM | 31 | #ifdef CONFIG_XEN_PVHVM |
32 | int cpu; | 32 | int cpu; |
33 | xen_hvm_init_shared_info(); | 33 | xen_hvm_resume_shared_info(); |
34 | xen_callback_vector(); | 34 | xen_callback_vector(); |
35 | xen_unplug_emulated_devices(); | 35 | xen_unplug_emulated_devices(); |
36 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { | 36 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 45c0c0667bd..1e4329e04e0 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -28,7 +28,6 @@ void xen_setup_shared_info(void); | |||
28 | void xen_build_mfn_list_list(void); | 28 | void xen_build_mfn_list_list(void); |
29 | void xen_setup_machphys_mapping(void); | 29 | void xen_setup_machphys_mapping(void); |
30 | pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); | 30 | pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); |
31 | void xen_ident_map_ISA(void); | ||
32 | void xen_reserve_top(void); | 31 | void xen_reserve_top(void); |
33 | extern unsigned long xen_max_p2m_pfn; | 32 | extern unsigned long xen_max_p2m_pfn; |
34 | 33 | ||
@@ -42,7 +41,7 @@ void xen_enable_syscall(void); | |||
42 | void xen_vcpu_restore(void); | 41 | void xen_vcpu_restore(void); |
43 | 42 | ||
44 | void xen_callback_vector(void); | 43 | void xen_callback_vector(void); |
45 | void xen_hvm_init_shared_info(void); | 44 | void xen_hvm_resume_shared_info(void); |
46 | void xen_unplug_emulated_devices(void); | 45 | void xen_unplug_emulated_devices(void); |
47 | 46 | ||
48 | void __init xen_build_dynamic_phys_to_machine(void); | 47 | void __init xen_build_dynamic_phys_to_machine(void); |