diff options
Diffstat (limited to 'arch/x86')
187 files changed, 11171 insertions, 3868 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c70684f859e1..ba2657c49217 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -70,6 +70,7 @@ config X86 | |||
70 | select HAVE_ARCH_JUMP_LABEL | 70 | select HAVE_ARCH_JUMP_LABEL |
71 | select HAVE_TEXT_POKE_SMP | 71 | select HAVE_TEXT_POKE_SMP |
72 | select HAVE_GENERIC_HARDIRQS | 72 | select HAVE_GENERIC_HARDIRQS |
73 | select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE | ||
73 | select SPARSE_IRQ | 74 | select SPARSE_IRQ |
74 | select GENERIC_FIND_FIRST_BIT | 75 | select GENERIC_FIND_FIRST_BIT |
75 | select GENERIC_IRQ_PROBE | 76 | select GENERIC_IRQ_PROBE |
@@ -84,6 +85,7 @@ config X86 | |||
84 | select GENERIC_IOMAP | 85 | select GENERIC_IOMAP |
85 | select DCACHE_WORD_ACCESS | 86 | select DCACHE_WORD_ACCESS |
86 | select GENERIC_SMP_IDLE_THREAD | 87 | select GENERIC_SMP_IDLE_THREAD |
88 | select ARCH_WANT_IPC_PARSE_VERSION if X86_32 | ||
87 | select HAVE_ARCH_SECCOMP_FILTER | 89 | select HAVE_ARCH_SECCOMP_FILTER |
88 | select BUILDTIME_EXTABLE_SORT | 90 | select BUILDTIME_EXTABLE_SORT |
89 | select GENERIC_CMOS_UPDATE | 91 | select GENERIC_CMOS_UPDATE |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index e46c2147397f..b322f124ee3c 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -129,6 +129,25 @@ config DOUBLEFAULT | |||
129 | option saves about 4k and might cause you much additional grey | 129 | option saves about 4k and might cause you much additional grey |
130 | hair. | 130 | hair. |
131 | 131 | ||
132 | config DEBUG_TLBFLUSH | ||
133 | bool "Set upper limit of TLB entries to flush one-by-one" | ||
134 | depends on DEBUG_KERNEL && (X86_64 || X86_INVLPG) | ||
135 | ---help--- | ||
136 | |||
137 | X86-only for now. | ||
138 | |||
139 | This option allows the user to tune the amount of TLB entries the | ||
140 | kernel flushes one-by-one instead of doing a full TLB flush. In | ||
141 | certain situations, the former is cheaper. This is controlled by the | ||
142 | tlb_flushall_shift knob under /sys/kernel/debug/x86. If you set it | ||
143 | to -1, the code flushes the whole TLB unconditionally. Otherwise, | ||
144 | for positive values of it, the kernel will use single TLB entry | ||
145 | invalidating instructions according to the following formula: | ||
146 | |||
147 | flush_entries <= active_tlb_entries / 2^tlb_flushall_shift | ||
148 | |||
149 | If in doubt, say "N". | ||
150 | |||
132 | config IOMMU_DEBUG | 151 | config IOMMU_DEBUG |
133 | bool "Enable IOMMU debugging" | 152 | bool "Enable IOMMU debugging" |
134 | depends on GART_IOMMU && DEBUG_KERNEL | 153 | depends on GART_IOMMU && DEBUG_KERNEL |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1f2521434554..b0c5276861ec 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -49,6 +49,9 @@ else | |||
49 | KBUILD_AFLAGS += -m64 | 49 | KBUILD_AFLAGS += -m64 |
50 | KBUILD_CFLAGS += -m64 | 50 | KBUILD_CFLAGS += -m64 |
51 | 51 | ||
52 | # Use -mpreferred-stack-boundary=3 if supported. | ||
53 | KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3) | ||
54 | |||
52 | # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) | 55 | # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) |
53 | cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) | 56 | cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) |
54 | cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) | 57 | cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) |
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index cb62f786990d..10f6b1178c68 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c | |||
@@ -1,5 +1,7 @@ | |||
1 | #include "misc.h" | 1 | #include "misc.h" |
2 | 2 | ||
3 | #ifdef CONFIG_EARLY_PRINTK | ||
4 | |||
3 | static unsigned long fs; | 5 | static unsigned long fs; |
4 | static inline void set_fs(unsigned long seg) | 6 | static inline void set_fs(unsigned long seg) |
5 | { | 7 | { |
@@ -19,3 +21,5 @@ int cmdline_find_option_bool(const char *option) | |||
19 | { | 21 | { |
20 | return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option); | 22 | return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option); |
21 | } | 23 | } |
24 | |||
25 | #endif | ||
diff --git a/arch/x86/boot/compressed/early_serial_console.c b/arch/x86/boot/compressed/early_serial_console.c index 261e81fb9582..d3d003cb5481 100644 --- a/arch/x86/boot/compressed/early_serial_console.c +++ b/arch/x86/boot/compressed/early_serial_console.c | |||
@@ -1,5 +1,9 @@ | |||
1 | #include "misc.h" | 1 | #include "misc.h" |
2 | 2 | ||
3 | #ifdef CONFIG_EARLY_PRINTK | ||
4 | |||
3 | int early_serial_base; | 5 | int early_serial_base; |
4 | 6 | ||
5 | #include "../early_serial_console.c" | 7 | #include "../early_serial_console.c" |
8 | |||
9 | #endif | ||
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 4e85f5f85837..b3e0227df2c9 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c | |||
@@ -729,32 +729,68 @@ fail: | |||
729 | * need to create one ourselves (usually the bootloader would create | 729 | * need to create one ourselves (usually the bootloader would create |
730 | * one for us). | 730 | * one for us). |
731 | */ | 731 | */ |
732 | static efi_status_t make_boot_params(struct boot_params *boot_params, | 732 | struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table) |
733 | efi_loaded_image_t *image, | ||
734 | void *handle) | ||
735 | { | 733 | { |
736 | struct efi_info *efi = &boot_params->efi_info; | 734 | struct boot_params *boot_params; |
737 | struct apm_bios_info *bi = &boot_params->apm_bios_info; | 735 | struct sys_desc_table *sdt; |
738 | struct sys_desc_table *sdt = &boot_params->sys_desc_table; | 736 | struct apm_bios_info *bi; |
739 | struct e820entry *e820_map = &boot_params->e820_map[0]; | 737 | struct setup_header *hdr; |
740 | struct e820entry *prev = NULL; | 738 | struct efi_info *efi; |
741 | struct setup_header *hdr = &boot_params->hdr; | 739 | efi_loaded_image_t *image; |
742 | unsigned long size, key, desc_size, _size; | 740 | void *options; |
743 | efi_memory_desc_t *mem_map; | 741 | u32 load_options_size; |
744 | void *options = image->load_options; | 742 | efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; |
745 | u32 load_options_size = image->load_options_size / 2; /* ASCII */ | ||
746 | int options_size = 0; | 743 | int options_size = 0; |
747 | efi_status_t status; | 744 | efi_status_t status; |
748 | __u32 desc_version; | ||
749 | unsigned long cmdline; | 745 | unsigned long cmdline; |
750 | u8 nr_entries; | ||
751 | u16 *s2; | 746 | u16 *s2; |
752 | u8 *s1; | 747 | u8 *s1; |
753 | int i; | 748 | int i; |
754 | 749 | ||
750 | sys_table = _table; | ||
751 | |||
752 | /* Check if we were booted by the EFI firmware */ | ||
753 | if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) | ||
754 | return NULL; | ||
755 | |||
756 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | ||
757 | handle, &proto, (void *)&image); | ||
758 | if (status != EFI_SUCCESS) { | ||
759 | efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); | ||
760 | return NULL; | ||
761 | } | ||
762 | |||
763 | status = low_alloc(0x4000, 1, (unsigned long *)&boot_params); | ||
764 | if (status != EFI_SUCCESS) { | ||
765 | efi_printk("Failed to alloc lowmem for boot params\n"); | ||
766 | return NULL; | ||
767 | } | ||
768 | |||
769 | memset(boot_params, 0x0, 0x4000); | ||
770 | |||
771 | hdr = &boot_params->hdr; | ||
772 | efi = &boot_params->efi_info; | ||
773 | bi = &boot_params->apm_bios_info; | ||
774 | sdt = &boot_params->sys_desc_table; | ||
775 | |||
776 | /* Copy the second sector to boot_params */ | ||
777 | memcpy(&hdr->jump, image->image_base + 512, 512); | ||
778 | |||
779 | /* | ||
780 | * Fill out some of the header fields ourselves because the | ||
781 | * EFI firmware loader doesn't load the first sector. | ||
782 | */ | ||
783 | hdr->root_flags = 1; | ||
784 | hdr->vid_mode = 0xffff; | ||
785 | hdr->boot_flag = 0xAA55; | ||
786 | |||
787 | hdr->code32_start = (__u64)(unsigned long)image->image_base; | ||
788 | |||
755 | hdr->type_of_loader = 0x21; | 789 | hdr->type_of_loader = 0x21; |
756 | 790 | ||
757 | /* Convert unicode cmdline to ascii */ | 791 | /* Convert unicode cmdline to ascii */ |
792 | options = image->load_options; | ||
793 | load_options_size = image->load_options_size / 2; /* ASCII */ | ||
758 | cmdline = 0; | 794 | cmdline = 0; |
759 | s2 = (u16 *)options; | 795 | s2 = (u16 *)options; |
760 | 796 | ||
@@ -791,18 +827,36 @@ static efi_status_t make_boot_params(struct boot_params *boot_params, | |||
791 | hdr->ramdisk_image = 0; | 827 | hdr->ramdisk_image = 0; |
792 | hdr->ramdisk_size = 0; | 828 | hdr->ramdisk_size = 0; |
793 | 829 | ||
794 | status = handle_ramdisks(image, hdr); | ||
795 | if (status != EFI_SUCCESS) | ||
796 | goto free_cmdline; | ||
797 | |||
798 | setup_graphics(boot_params); | ||
799 | |||
800 | /* Clear APM BIOS info */ | 830 | /* Clear APM BIOS info */ |
801 | memset(bi, 0, sizeof(*bi)); | 831 | memset(bi, 0, sizeof(*bi)); |
802 | 832 | ||
803 | memset(sdt, 0, sizeof(*sdt)); | 833 | memset(sdt, 0, sizeof(*sdt)); |
804 | 834 | ||
805 | memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32)); | 835 | status = handle_ramdisks(image, hdr); |
836 | if (status != EFI_SUCCESS) | ||
837 | goto fail2; | ||
838 | |||
839 | return boot_params; | ||
840 | fail2: | ||
841 | if (options_size) | ||
842 | low_free(options_size, hdr->cmd_line_ptr); | ||
843 | fail: | ||
844 | low_free(0x4000, (unsigned long)boot_params); | ||
845 | return NULL; | ||
846 | } | ||
847 | |||
848 | static efi_status_t exit_boot(struct boot_params *boot_params, | ||
849 | void *handle) | ||
850 | { | ||
851 | struct efi_info *efi = &boot_params->efi_info; | ||
852 | struct e820entry *e820_map = &boot_params->e820_map[0]; | ||
853 | struct e820entry *prev = NULL; | ||
854 | unsigned long size, key, desc_size, _size; | ||
855 | efi_memory_desc_t *mem_map; | ||
856 | efi_status_t status; | ||
857 | __u32 desc_version; | ||
858 | u8 nr_entries; | ||
859 | int i; | ||
806 | 860 | ||
807 | size = sizeof(*mem_map) * 32; | 861 | size = sizeof(*mem_map) * 32; |
808 | 862 | ||
@@ -811,7 +865,7 @@ again: | |||
811 | _size = size; | 865 | _size = size; |
812 | status = low_alloc(size, 1, (unsigned long *)&mem_map); | 866 | status = low_alloc(size, 1, (unsigned long *)&mem_map); |
813 | if (status != EFI_SUCCESS) | 867 | if (status != EFI_SUCCESS) |
814 | goto free_cmdline; | 868 | return status; |
815 | 869 | ||
816 | status = efi_call_phys5(sys_table->boottime->get_memory_map, &size, | 870 | status = efi_call_phys5(sys_table->boottime->get_memory_map, &size, |
817 | mem_map, &key, &desc_size, &desc_version); | 871 | mem_map, &key, &desc_size, &desc_version); |
@@ -823,6 +877,7 @@ again: | |||
823 | if (status != EFI_SUCCESS) | 877 | if (status != EFI_SUCCESS) |
824 | goto free_mem_map; | 878 | goto free_mem_map; |
825 | 879 | ||
880 | memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32)); | ||
826 | efi->efi_systab = (unsigned long)sys_table; | 881 | efi->efi_systab = (unsigned long)sys_table; |
827 | efi->efi_memdesc_size = desc_size; | 882 | efi->efi_memdesc_size = desc_size; |
828 | efi->efi_memdesc_version = desc_version; | 883 | efi->efi_memdesc_version = desc_version; |
@@ -906,61 +961,13 @@ again: | |||
906 | 961 | ||
907 | free_mem_map: | 962 | free_mem_map: |
908 | low_free(_size, (unsigned long)mem_map); | 963 | low_free(_size, (unsigned long)mem_map); |
909 | free_cmdline: | ||
910 | if (options_size) | ||
911 | low_free(options_size, hdr->cmd_line_ptr); | ||
912 | fail: | ||
913 | return status; | 964 | return status; |
914 | } | 965 | } |
915 | 966 | ||
916 | /* | 967 | static efi_status_t relocate_kernel(struct setup_header *hdr) |
917 | * On success we return a pointer to a boot_params structure, and NULL | ||
918 | * on failure. | ||
919 | */ | ||
920 | struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | ||
921 | { | 968 | { |
922 | struct boot_params *boot_params; | ||
923 | unsigned long start, nr_pages; | 969 | unsigned long start, nr_pages; |
924 | struct desc_ptr *gdt, *idt; | ||
925 | efi_loaded_image_t *image; | ||
926 | struct setup_header *hdr; | ||
927 | efi_status_t status; | 970 | efi_status_t status; |
928 | efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; | ||
929 | struct desc_struct *desc; | ||
930 | |||
931 | sys_table = _table; | ||
932 | |||
933 | /* Check if we were booted by the EFI firmware */ | ||
934 | if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) | ||
935 | goto fail; | ||
936 | |||
937 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | ||
938 | handle, &proto, (void *)&image); | ||
939 | if (status != EFI_SUCCESS) { | ||
940 | efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); | ||
941 | goto fail; | ||
942 | } | ||
943 | |||
944 | status = low_alloc(0x4000, 1, (unsigned long *)&boot_params); | ||
945 | if (status != EFI_SUCCESS) { | ||
946 | efi_printk("Failed to alloc lowmem for boot params\n"); | ||
947 | goto fail; | ||
948 | } | ||
949 | |||
950 | memset(boot_params, 0x0, 0x4000); | ||
951 | |||
952 | hdr = &boot_params->hdr; | ||
953 | |||
954 | /* Copy the second sector to boot_params */ | ||
955 | memcpy(&hdr->jump, image->image_base + 512, 512); | ||
956 | |||
957 | /* | ||
958 | * Fill out some of the header fields ourselves because the | ||
959 | * EFI firmware loader doesn't load the first sector. | ||
960 | */ | ||
961 | hdr->root_flags = 1; | ||
962 | hdr->vid_mode = 0xffff; | ||
963 | hdr->boot_flag = 0xAA55; | ||
964 | 971 | ||
965 | /* | 972 | /* |
966 | * The EFI firmware loader could have placed the kernel image | 973 | * The EFI firmware loader could have placed the kernel image |
@@ -978,16 +985,40 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | |||
978 | if (status != EFI_SUCCESS) { | 985 | if (status != EFI_SUCCESS) { |
979 | status = low_alloc(hdr->init_size, hdr->kernel_alignment, | 986 | status = low_alloc(hdr->init_size, hdr->kernel_alignment, |
980 | &start); | 987 | &start); |
981 | if (status != EFI_SUCCESS) { | 988 | if (status != EFI_SUCCESS) |
982 | efi_printk("Failed to alloc mem for kernel\n"); | 989 | efi_printk("Failed to alloc mem for kernel\n"); |
983 | goto fail; | ||
984 | } | ||
985 | } | 990 | } |
986 | 991 | ||
992 | if (status == EFI_SUCCESS) | ||
993 | memcpy((void *)start, (void *)(unsigned long)hdr->code32_start, | ||
994 | hdr->init_size); | ||
995 | |||
996 | hdr->pref_address = hdr->code32_start; | ||
987 | hdr->code32_start = (__u32)start; | 997 | hdr->code32_start = (__u32)start; |
988 | hdr->pref_address = (__u64)(unsigned long)image->image_base; | ||
989 | 998 | ||
990 | memcpy((void *)start, image->image_base, image->image_size); | 999 | return status; |
1000 | } | ||
1001 | |||
1002 | /* | ||
1003 | * On success we return a pointer to a boot_params structure, and NULL | ||
1004 | * on failure. | ||
1005 | */ | ||
1006 | struct boot_params *efi_main(void *handle, efi_system_table_t *_table, | ||
1007 | struct boot_params *boot_params) | ||
1008 | { | ||
1009 | struct desc_ptr *gdt, *idt; | ||
1010 | efi_loaded_image_t *image; | ||
1011 | struct setup_header *hdr = &boot_params->hdr; | ||
1012 | efi_status_t status; | ||
1013 | struct desc_struct *desc; | ||
1014 | |||
1015 | sys_table = _table; | ||
1016 | |||
1017 | /* Check if we were booted by the EFI firmware */ | ||
1018 | if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) | ||
1019 | goto fail; | ||
1020 | |||
1021 | setup_graphics(boot_params); | ||
991 | 1022 | ||
992 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 1023 | status = efi_call_phys3(sys_table->boottime->allocate_pool, |
993 | EFI_LOADER_DATA, sizeof(*gdt), | 1024 | EFI_LOADER_DATA, sizeof(*gdt), |
@@ -1015,7 +1046,18 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | |||
1015 | idt->size = 0; | 1046 | idt->size = 0; |
1016 | idt->address = 0; | 1047 | idt->address = 0; |
1017 | 1048 | ||
1018 | status = make_boot_params(boot_params, image, handle); | 1049 | /* |
1050 | * If the kernel isn't already loaded at the preferred load | ||
1051 | * address, relocate it. | ||
1052 | */ | ||
1053 | if (hdr->pref_address != hdr->code32_start) { | ||
1054 | status = relocate_kernel(hdr); | ||
1055 | |||
1056 | if (status != EFI_SUCCESS) | ||
1057 | goto fail; | ||
1058 | } | ||
1059 | |||
1060 | status = exit_boot(boot_params, handle); | ||
1019 | if (status != EFI_SUCCESS) | 1061 | if (status != EFI_SUCCESS) |
1020 | goto fail; | 1062 | goto fail; |
1021 | 1063 | ||
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index c85e3ac99bba..aa4aaf1b2380 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S | |||
@@ -42,6 +42,16 @@ ENTRY(startup_32) | |||
42 | */ | 42 | */ |
43 | add $0x4, %esp | 43 | add $0x4, %esp |
44 | 44 | ||
45 | call make_boot_params | ||
46 | cmpl $0, %eax | ||
47 | je 1f | ||
48 | movl 0x4(%esp), %esi | ||
49 | movl (%esp), %ecx | ||
50 | pushl %eax | ||
51 | pushl %esi | ||
52 | pushl %ecx | ||
53 | |||
54 | .org 0x30,0x90 | ||
45 | call efi_main | 55 | call efi_main |
46 | cmpl $0, %eax | 56 | cmpl $0, %eax |
47 | movl %eax, %esi | 57 | movl %eax, %esi |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 87e03a13d8e3..2c4b171eec33 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -209,6 +209,16 @@ ENTRY(startup_64) | |||
209 | .org 0x210 | 209 | .org 0x210 |
210 | mov %rcx, %rdi | 210 | mov %rcx, %rdi |
211 | mov %rdx, %rsi | 211 | mov %rdx, %rsi |
212 | pushq %rdi | ||
213 | pushq %rsi | ||
214 | call make_boot_params | ||
215 | cmpq $0,%rax | ||
216 | je 1f | ||
217 | mov %rax, %rdx | ||
218 | popq %rsi | ||
219 | popq %rdi | ||
220 | |||
221 | .org 0x230,0x90 | ||
212 | call efi_main | 222 | call efi_main |
213 | movq %rax,%rsi | 223 | movq %rax,%rsi |
214 | cmpq $0,%rax | 224 | cmpq $0,%rax |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 7116dcba0c9e..88f7ff6da404 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -108,8 +108,6 @@ static void error(char *m); | |||
108 | * This is set up by the setup-routine at boot-time | 108 | * This is set up by the setup-routine at boot-time |
109 | */ | 109 | */ |
110 | struct boot_params *real_mode; /* Pointer to real-mode data */ | 110 | struct boot_params *real_mode; /* Pointer to real-mode data */ |
111 | static int quiet; | ||
112 | static int debug; | ||
113 | 111 | ||
114 | void *memset(void *s, int c, size_t n); | 112 | void *memset(void *s, int c, size_t n); |
115 | void *memcpy(void *dest, const void *src, size_t n); | 113 | void *memcpy(void *dest, const void *src, size_t n); |
@@ -170,15 +168,11 @@ static void serial_putchar(int ch) | |||
170 | outb(ch, early_serial_base + TXR); | 168 | outb(ch, early_serial_base + TXR); |
171 | } | 169 | } |
172 | 170 | ||
173 | void __putstr(int error, const char *s) | 171 | void __putstr(const char *s) |
174 | { | 172 | { |
175 | int x, y, pos; | 173 | int x, y, pos; |
176 | char c; | 174 | char c; |
177 | 175 | ||
178 | #ifndef CONFIG_X86_VERBOSE_BOOTUP | ||
179 | if (!error) | ||
180 | return; | ||
181 | #endif | ||
182 | if (early_serial_base) { | 176 | if (early_serial_base) { |
183 | const char *str = s; | 177 | const char *str = s; |
184 | while (*str) { | 178 | while (*str) { |
@@ -265,9 +259,9 @@ void *memcpy(void *dest, const void *src, size_t n) | |||
265 | 259 | ||
266 | static void error(char *x) | 260 | static void error(char *x) |
267 | { | 261 | { |
268 | __putstr(1, "\n\n"); | 262 | error_putstr("\n\n"); |
269 | __putstr(1, x); | 263 | error_putstr(x); |
270 | __putstr(1, "\n\n -- System halted"); | 264 | error_putstr("\n\n -- System halted"); |
271 | 265 | ||
272 | while (1) | 266 | while (1) |
273 | asm("hlt"); | 267 | asm("hlt"); |
@@ -294,8 +288,7 @@ static void parse_elf(void *output) | |||
294 | return; | 288 | return; |
295 | } | 289 | } |
296 | 290 | ||
297 | if (!quiet) | 291 | debug_putstr("Parsing ELF... "); |
298 | putstr("Parsing ELF... "); | ||
299 | 292 | ||
300 | phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); | 293 | phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); |
301 | if (!phdrs) | 294 | if (!phdrs) |
@@ -332,11 +325,6 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
332 | { | 325 | { |
333 | real_mode = rmode; | 326 | real_mode = rmode; |
334 | 327 | ||
335 | if (cmdline_find_option_bool("quiet")) | ||
336 | quiet = 1; | ||
337 | if (cmdline_find_option_bool("debug")) | ||
338 | debug = 1; | ||
339 | |||
340 | if (real_mode->screen_info.orig_video_mode == 7) { | 328 | if (real_mode->screen_info.orig_video_mode == 7) { |
341 | vidmem = (char *) 0xb0000; | 329 | vidmem = (char *) 0xb0000; |
342 | vidport = 0x3b4; | 330 | vidport = 0x3b4; |
@@ -349,8 +337,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
349 | cols = real_mode->screen_info.orig_video_cols; | 337 | cols = real_mode->screen_info.orig_video_cols; |
350 | 338 | ||
351 | console_init(); | 339 | console_init(); |
352 | if (debug) | 340 | debug_putstr("early console in decompress_kernel\n"); |
353 | putstr("early console in decompress_kernel\n"); | ||
354 | 341 | ||
355 | free_mem_ptr = heap; /* Heap */ | 342 | free_mem_ptr = heap; /* Heap */ |
356 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; | 343 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; |
@@ -369,11 +356,9 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
369 | error("Wrong destination address"); | 356 | error("Wrong destination address"); |
370 | #endif | 357 | #endif |
371 | 358 | ||
372 | if (!quiet) | 359 | debug_putstr("\nDecompressing Linux... "); |
373 | putstr("\nDecompressing Linux... "); | ||
374 | decompress(input_data, input_len, NULL, NULL, output, NULL, error); | 360 | decompress(input_data, input_len, NULL, NULL, output, NULL, error); |
375 | parse_elf(output); | 361 | parse_elf(output); |
376 | if (!quiet) | 362 | debug_putstr("done.\nBooting the kernel.\n"); |
377 | putstr("done.\nBooting the kernel.\n"); | ||
378 | return; | 363 | return; |
379 | } | 364 | } |
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 3f19c81a6203..0e6dc0ee0eea 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h | |||
@@ -24,9 +24,21 @@ | |||
24 | 24 | ||
25 | /* misc.c */ | 25 | /* misc.c */ |
26 | extern struct boot_params *real_mode; /* Pointer to real-mode data */ | 26 | extern struct boot_params *real_mode; /* Pointer to real-mode data */ |
27 | void __putstr(int error, const char *s); | 27 | void __putstr(const char *s); |
28 | #define putstr(__x) __putstr(0, __x) | 28 | #define error_putstr(__x) __putstr(__x) |
29 | #define puts(__x) __putstr(0, __x) | 29 | |
30 | #ifdef CONFIG_X86_VERBOSE_BOOTUP | ||
31 | |||
32 | #define debug_putstr(__x) __putstr(__x) | ||
33 | |||
34 | #else | ||
35 | |||
36 | static inline void debug_putstr(const char *s) | ||
37 | { } | ||
38 | |||
39 | #endif | ||
40 | |||
41 | #ifdef CONFIG_EARLY_PRINTK | ||
30 | 42 | ||
31 | /* cmdline.c */ | 43 | /* cmdline.c */ |
32 | int cmdline_find_option(const char *option, char *buffer, int bufsize); | 44 | int cmdline_find_option(const char *option, char *buffer, int bufsize); |
@@ -36,4 +48,13 @@ int cmdline_find_option_bool(const char *option); | |||
36 | extern int early_serial_base; | 48 | extern int early_serial_base; |
37 | void console_init(void); | 49 | void console_init(void); |
38 | 50 | ||
51 | #else | ||
52 | |||
53 | /* early_serial_console.c */ | ||
54 | static const int early_serial_base; | ||
55 | static inline void console_init(void) | ||
56 | { } | ||
57 | |||
58 | #endif | ||
59 | |||
39 | #endif | 60 | #endif |
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index efe5acfc79c3..b4e15dd6786a 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
@@ -283,7 +283,7 @@ _start: | |||
283 | # Part 2 of the header, from the old setup.S | 283 | # Part 2 of the header, from the old setup.S |
284 | 284 | ||
285 | .ascii "HdrS" # header signature | 285 | .ascii "HdrS" # header signature |
286 | .word 0x020a # header version number (>= 0x0105) | 286 | .word 0x020b # header version number (>= 0x0105) |
287 | # or else old loadlin-1.5 will fail) | 287 | # or else old loadlin-1.5 will fail) |
288 | .globl realmode_swtch | 288 | .globl realmode_swtch |
289 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG | 289 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG |
@@ -401,18 +401,13 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr | |||
401 | #define INIT_SIZE VO_INIT_SIZE | 401 | #define INIT_SIZE VO_INIT_SIZE |
402 | #endif | 402 | #endif |
403 | init_size: .long INIT_SIZE # kernel initialization size | 403 | init_size: .long INIT_SIZE # kernel initialization size |
404 | handover_offset: .long 0x30 # offset to the handover | ||
405 | # protocol entry point | ||
404 | 406 | ||
405 | # End of setup header ##################################################### | 407 | # End of setup header ##################################################### |
406 | 408 | ||
407 | .section ".entrytext", "ax" | 409 | .section ".entrytext", "ax" |
408 | start_of_setup: | 410 | start_of_setup: |
409 | #ifdef SAFE_RESET_DISK_CONTROLLER | ||
410 | # Reset the disk controller. | ||
411 | movw $0x0000, %ax # Reset disk controller | ||
412 | movb $0x80, %dl # All disks | ||
413 | int $0x13 | ||
414 | #endif | ||
415 | |||
416 | # Force %es = %ds | 411 | # Force %es = %ds |
417 | movw %ds, %ax | 412 | movw %ds, %ax |
418 | movw %ax, %es | 413 | movw %ax, %es |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e191ac048b59..e908e5de82d3 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -2,6 +2,9 @@ | |||
2 | # Arch-specific CryptoAPI modules. | 2 | # Arch-specific CryptoAPI modules. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o | ||
6 | obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o | ||
7 | |||
5 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o | 8 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o |
6 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o | 9 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o |
7 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o | 10 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o |
@@ -12,8 +15,10 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o | |||
12 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o | 15 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o |
13 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o | 16 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o |
14 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o | 17 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o |
18 | obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o | ||
15 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o | 19 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o |
16 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o | 20 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o |
21 | obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o | ||
17 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o | 22 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o |
18 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o | 23 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o |
19 | 24 | ||
@@ -30,16 +35,11 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o | |||
30 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o | 35 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o |
31 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o | 36 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o |
32 | twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o | 37 | twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o |
38 | twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o | ||
33 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o | 39 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o |
34 | serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o | 40 | serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o |
41 | serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o | ||
35 | 42 | ||
36 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o | 43 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
37 | |||
38 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | 44 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o |
39 | |||
40 | # enable AVX support only when $(AS) can actually assemble the instructions | ||
41 | ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes) | ||
42 | AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT | ||
43 | CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT | ||
44 | endif | ||
45 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o | 45 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o |
diff --git a/arch/x86/crypto/ablk_helper.c b/arch/x86/crypto/ablk_helper.c new file mode 100644 index 000000000000..43282fe04a8b --- /dev/null +++ b/arch/x86/crypto/ablk_helper.c | |||
@@ -0,0 +1,149 @@ | |||
1 | /* | ||
2 | * Shared async block cipher helpers | ||
3 | * | ||
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Based on aesni-intel_glue.c by: | ||
7 | * Copyright (C) 2008, Intel Corp. | ||
8 | * Author: Huang Ying <ying.huang@intel.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/crypto.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <crypto/algapi.h> | ||
32 | #include <crypto/cryptd.h> | ||
33 | #include <asm/i387.h> | ||
34 | #include <asm/crypto/ablk_helper.h> | ||
35 | |||
36 | int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
37 | unsigned int key_len) | ||
38 | { | ||
39 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
40 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; | ||
41 | int err; | ||
42 | |||
43 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
44 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) | ||
45 | & CRYPTO_TFM_REQ_MASK); | ||
46 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
47 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
48 | & CRYPTO_TFM_RES_MASK); | ||
49 | return err; | ||
50 | } | ||
51 | EXPORT_SYMBOL_GPL(ablk_set_key); | ||
52 | |||
53 | int __ablk_encrypt(struct ablkcipher_request *req) | ||
54 | { | ||
55 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
56 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
57 | struct blkcipher_desc desc; | ||
58 | |||
59 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
60 | desc.info = req->info; | ||
61 | desc.flags = 0; | ||
62 | |||
63 | return crypto_blkcipher_crt(desc.tfm)->encrypt( | ||
64 | &desc, req->dst, req->src, req->nbytes); | ||
65 | } | ||
66 | EXPORT_SYMBOL_GPL(__ablk_encrypt); | ||
67 | |||
68 | int ablk_encrypt(struct ablkcipher_request *req) | ||
69 | { | ||
70 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
71 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
72 | |||
73 | if (!irq_fpu_usable()) { | ||
74 | struct ablkcipher_request *cryptd_req = | ||
75 | ablkcipher_request_ctx(req); | ||
76 | |||
77 | memcpy(cryptd_req, req, sizeof(*req)); | ||
78 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
79 | |||
80 | return crypto_ablkcipher_encrypt(cryptd_req); | ||
81 | } else { | ||
82 | return __ablk_encrypt(req); | ||
83 | } | ||
84 | } | ||
85 | EXPORT_SYMBOL_GPL(ablk_encrypt); | ||
86 | |||
87 | int ablk_decrypt(struct ablkcipher_request *req) | ||
88 | { | ||
89 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
90 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
91 | |||
92 | if (!irq_fpu_usable()) { | ||
93 | struct ablkcipher_request *cryptd_req = | ||
94 | ablkcipher_request_ctx(req); | ||
95 | |||
96 | memcpy(cryptd_req, req, sizeof(*req)); | ||
97 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
98 | |||
99 | return crypto_ablkcipher_decrypt(cryptd_req); | ||
100 | } else { | ||
101 | struct blkcipher_desc desc; | ||
102 | |||
103 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
104 | desc.info = req->info; | ||
105 | desc.flags = 0; | ||
106 | |||
107 | return crypto_blkcipher_crt(desc.tfm)->decrypt( | ||
108 | &desc, req->dst, req->src, req->nbytes); | ||
109 | } | ||
110 | } | ||
111 | EXPORT_SYMBOL_GPL(ablk_decrypt); | ||
112 | |||
113 | void ablk_exit(struct crypto_tfm *tfm) | ||
114 | { | ||
115 | struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); | ||
116 | |||
117 | cryptd_free_ablkcipher(ctx->cryptd_tfm); | ||
118 | } | ||
119 | EXPORT_SYMBOL_GPL(ablk_exit); | ||
120 | |||
121 | int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name) | ||
122 | { | ||
123 | struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); | ||
124 | struct cryptd_ablkcipher *cryptd_tfm; | ||
125 | |||
126 | cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); | ||
127 | if (IS_ERR(cryptd_tfm)) | ||
128 | return PTR_ERR(cryptd_tfm); | ||
129 | |||
130 | ctx->cryptd_tfm = cryptd_tfm; | ||
131 | tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + | ||
132 | crypto_ablkcipher_reqsize(&cryptd_tfm->base); | ||
133 | |||
134 | return 0; | ||
135 | } | ||
136 | EXPORT_SYMBOL_GPL(ablk_init_common); | ||
137 | |||
138 | int ablk_init(struct crypto_tfm *tfm) | ||
139 | { | ||
140 | char drv_name[CRYPTO_MAX_ALG_NAME]; | ||
141 | |||
142 | snprintf(drv_name, sizeof(drv_name), "__driver-%s", | ||
143 | crypto_tfm_alg_driver_name(tfm)); | ||
144 | |||
145 | return ablk_init_common(tfm, drv_name); | ||
146 | } | ||
147 | EXPORT_SYMBOL_GPL(ablk_init); | ||
148 | |||
149 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c index 8efcf42a9d7e..59b37deb8c8d 100644 --- a/arch/x86/crypto/aes_glue.c +++ b/arch/x86/crypto/aes_glue.c | |||
@@ -5,7 +5,7 @@ | |||
5 | 5 | ||
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <crypto/aes.h> | 7 | #include <crypto/aes.h> |
8 | #include <asm/aes.h> | 8 | #include <asm/crypto/aes.h> |
9 | 9 | ||
10 | asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); | 10 | asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); |
11 | asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); | 11 | asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index ac7f5cd019e8..34fdcff4d2c8 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -30,7 +30,8 @@ | |||
30 | #include <crypto/ctr.h> | 30 | #include <crypto/ctr.h> |
31 | #include <asm/cpu_device_id.h> | 31 | #include <asm/cpu_device_id.h> |
32 | #include <asm/i387.h> | 32 | #include <asm/i387.h> |
33 | #include <asm/aes.h> | 33 | #include <asm/crypto/aes.h> |
34 | #include <asm/crypto/ablk_helper.h> | ||
34 | #include <crypto/scatterwalk.h> | 35 | #include <crypto/scatterwalk.h> |
35 | #include <crypto/internal/aead.h> | 36 | #include <crypto/internal/aead.h> |
36 | #include <linux/workqueue.h> | 37 | #include <linux/workqueue.h> |
@@ -52,10 +53,6 @@ | |||
52 | #define HAS_XTS | 53 | #define HAS_XTS |
53 | #endif | 54 | #endif |
54 | 55 | ||
55 | struct async_aes_ctx { | ||
56 | struct cryptd_ablkcipher *cryptd_tfm; | ||
57 | }; | ||
58 | |||
59 | /* This data is stored at the end of the crypto_tfm struct. | 56 | /* This data is stored at the end of the crypto_tfm struct. |
60 | * It's a type of per "session" data storage location. | 57 | * It's a type of per "session" data storage location. |
61 | * This needs to be 16 byte aligned. | 58 | * This needs to be 16 byte aligned. |
@@ -377,87 +374,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, | |||
377 | } | 374 | } |
378 | #endif | 375 | #endif |
379 | 376 | ||
380 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
381 | unsigned int key_len) | ||
382 | { | ||
383 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
384 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; | ||
385 | int err; | ||
386 | |||
387 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
388 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) | ||
389 | & CRYPTO_TFM_REQ_MASK); | ||
390 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
391 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
392 | & CRYPTO_TFM_RES_MASK); | ||
393 | return err; | ||
394 | } | ||
395 | |||
396 | static int ablk_encrypt(struct ablkcipher_request *req) | ||
397 | { | ||
398 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
399 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
400 | |||
401 | if (!irq_fpu_usable()) { | ||
402 | struct ablkcipher_request *cryptd_req = | ||
403 | ablkcipher_request_ctx(req); | ||
404 | memcpy(cryptd_req, req, sizeof(*req)); | ||
405 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
406 | return crypto_ablkcipher_encrypt(cryptd_req); | ||
407 | } else { | ||
408 | struct blkcipher_desc desc; | ||
409 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
410 | desc.info = req->info; | ||
411 | desc.flags = 0; | ||
412 | return crypto_blkcipher_crt(desc.tfm)->encrypt( | ||
413 | &desc, req->dst, req->src, req->nbytes); | ||
414 | } | ||
415 | } | ||
416 | |||
417 | static int ablk_decrypt(struct ablkcipher_request *req) | ||
418 | { | ||
419 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
420 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
421 | |||
422 | if (!irq_fpu_usable()) { | ||
423 | struct ablkcipher_request *cryptd_req = | ||
424 | ablkcipher_request_ctx(req); | ||
425 | memcpy(cryptd_req, req, sizeof(*req)); | ||
426 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
427 | return crypto_ablkcipher_decrypt(cryptd_req); | ||
428 | } else { | ||
429 | struct blkcipher_desc desc; | ||
430 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
431 | desc.info = req->info; | ||
432 | desc.flags = 0; | ||
433 | return crypto_blkcipher_crt(desc.tfm)->decrypt( | ||
434 | &desc, req->dst, req->src, req->nbytes); | ||
435 | } | ||
436 | } | ||
437 | |||
438 | static void ablk_exit(struct crypto_tfm *tfm) | ||
439 | { | ||
440 | struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
441 | |||
442 | cryptd_free_ablkcipher(ctx->cryptd_tfm); | ||
443 | } | ||
444 | |||
445 | static int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name) | ||
446 | { | ||
447 | struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
448 | struct cryptd_ablkcipher *cryptd_tfm; | ||
449 | |||
450 | cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); | ||
451 | if (IS_ERR(cryptd_tfm)) | ||
452 | return PTR_ERR(cryptd_tfm); | ||
453 | |||
454 | ctx->cryptd_tfm = cryptd_tfm; | ||
455 | tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + | ||
456 | crypto_ablkcipher_reqsize(&cryptd_tfm->base); | ||
457 | |||
458 | return 0; | ||
459 | } | ||
460 | |||
461 | static int ablk_ecb_init(struct crypto_tfm *tfm) | 377 | static int ablk_ecb_init(struct crypto_tfm *tfm) |
462 | { | 378 | { |
463 | return ablk_init_common(tfm, "__driver-ecb-aes-aesni"); | 379 | return ablk_init_common(tfm, "__driver-ecb-aes-aesni"); |
@@ -613,7 +529,7 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | |||
613 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | 529 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); |
614 | struct aesni_rfc4106_gcm_ctx *child_ctx = | 530 | struct aesni_rfc4106_gcm_ctx *child_ctx = |
615 | aesni_rfc4106_gcm_ctx_get(cryptd_child); | 531 | aesni_rfc4106_gcm_ctx_get(cryptd_child); |
616 | u8 *new_key_mem = NULL; | 532 | u8 *new_key_align, *new_key_mem = NULL; |
617 | 533 | ||
618 | if (key_len < 4) { | 534 | if (key_len < 4) { |
619 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | 535 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); |
@@ -637,9 +553,9 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | |||
637 | if (!new_key_mem) | 553 | if (!new_key_mem) |
638 | return -ENOMEM; | 554 | return -ENOMEM; |
639 | 555 | ||
640 | new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN); | 556 | new_key_align = PTR_ALIGN(new_key_mem, AESNI_ALIGN); |
641 | memcpy(new_key_mem, key, key_len); | 557 | memcpy(new_key_align, key, key_len); |
642 | key = new_key_mem; | 558 | key = new_key_align; |
643 | } | 559 | } |
644 | 560 | ||
645 | if (!irq_fpu_usable()) | 561 | if (!irq_fpu_usable()) |
@@ -968,7 +884,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
968 | .cra_priority = 400, | 884 | .cra_priority = 400, |
969 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 885 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
970 | .cra_blocksize = AES_BLOCK_SIZE, | 886 | .cra_blocksize = AES_BLOCK_SIZE, |
971 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 887 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
972 | .cra_alignmask = 0, | 888 | .cra_alignmask = 0, |
973 | .cra_type = &crypto_ablkcipher_type, | 889 | .cra_type = &crypto_ablkcipher_type, |
974 | .cra_module = THIS_MODULE, | 890 | .cra_module = THIS_MODULE, |
@@ -989,7 +905,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
989 | .cra_priority = 400, | 905 | .cra_priority = 400, |
990 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 906 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
991 | .cra_blocksize = AES_BLOCK_SIZE, | 907 | .cra_blocksize = AES_BLOCK_SIZE, |
992 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 908 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
993 | .cra_alignmask = 0, | 909 | .cra_alignmask = 0, |
994 | .cra_type = &crypto_ablkcipher_type, | 910 | .cra_type = &crypto_ablkcipher_type, |
995 | .cra_module = THIS_MODULE, | 911 | .cra_module = THIS_MODULE, |
@@ -1033,7 +949,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1033 | .cra_priority = 400, | 949 | .cra_priority = 400, |
1034 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 950 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1035 | .cra_blocksize = 1, | 951 | .cra_blocksize = 1, |
1036 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 952 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1037 | .cra_alignmask = 0, | 953 | .cra_alignmask = 0, |
1038 | .cra_type = &crypto_ablkcipher_type, | 954 | .cra_type = &crypto_ablkcipher_type, |
1039 | .cra_module = THIS_MODULE, | 955 | .cra_module = THIS_MODULE, |
@@ -1098,7 +1014,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1098 | .cra_priority = 400, | 1014 | .cra_priority = 400, |
1099 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1015 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1100 | .cra_blocksize = 1, | 1016 | .cra_blocksize = 1, |
1101 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 1017 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1102 | .cra_alignmask = 0, | 1018 | .cra_alignmask = 0, |
1103 | .cra_type = &crypto_ablkcipher_type, | 1019 | .cra_type = &crypto_ablkcipher_type, |
1104 | .cra_module = THIS_MODULE, | 1020 | .cra_module = THIS_MODULE, |
@@ -1126,7 +1042,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1126 | .cra_priority = 400, | 1042 | .cra_priority = 400, |
1127 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1043 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1128 | .cra_blocksize = AES_BLOCK_SIZE, | 1044 | .cra_blocksize = AES_BLOCK_SIZE, |
1129 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 1045 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1130 | .cra_alignmask = 0, | 1046 | .cra_alignmask = 0, |
1131 | .cra_type = &crypto_ablkcipher_type, | 1047 | .cra_type = &crypto_ablkcipher_type, |
1132 | .cra_module = THIS_MODULE, | 1048 | .cra_module = THIS_MODULE, |
@@ -1150,7 +1066,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1150 | .cra_priority = 400, | 1066 | .cra_priority = 400, |
1151 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1067 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1152 | .cra_blocksize = AES_BLOCK_SIZE, | 1068 | .cra_blocksize = AES_BLOCK_SIZE, |
1153 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 1069 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1154 | .cra_alignmask = 0, | 1070 | .cra_alignmask = 0, |
1155 | .cra_type = &crypto_ablkcipher_type, | 1071 | .cra_type = &crypto_ablkcipher_type, |
1156 | .cra_module = THIS_MODULE, | 1072 | .cra_module = THIS_MODULE, |
@@ -1174,7 +1090,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1174 | .cra_priority = 400, | 1090 | .cra_priority = 400, |
1175 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1091 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1176 | .cra_blocksize = AES_BLOCK_SIZE, | 1092 | .cra_blocksize = AES_BLOCK_SIZE, |
1177 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 1093 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1178 | .cra_alignmask = 0, | 1094 | .cra_alignmask = 0, |
1179 | .cra_type = &crypto_ablkcipher_type, | 1095 | .cra_type = &crypto_ablkcipher_type, |
1180 | .cra_module = THIS_MODULE, | 1096 | .cra_module = THIS_MODULE, |
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 3306dc0b139e..eeb2b3b743e9 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c | |||
@@ -5,10 +5,6 @@ | |||
5 | * | 5 | * |
6 | * Camellia parts based on code by: | 6 | * Camellia parts based on code by: |
7 | * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) | 7 | * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) |
8 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
9 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
10 | * CTR part based on code (crypto/ctr.c) by: | ||
11 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
12 | * | 8 | * |
13 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
14 | * it under the terms of the GNU General Public License as published by | 10 | * it under the terms of the GNU General Public License as published by |
@@ -34,9 +30,9 @@ | |||
34 | #include <linux/module.h> | 30 | #include <linux/module.h> |
35 | #include <linux/types.h> | 31 | #include <linux/types.h> |
36 | #include <crypto/algapi.h> | 32 | #include <crypto/algapi.h> |
37 | #include <crypto/b128ops.h> | ||
38 | #include <crypto/lrw.h> | 33 | #include <crypto/lrw.h> |
39 | #include <crypto/xts.h> | 34 | #include <crypto/xts.h> |
35 | #include <asm/crypto/glue_helper.h> | ||
40 | 36 | ||
41 | #define CAMELLIA_MIN_KEY_SIZE 16 | 37 | #define CAMELLIA_MIN_KEY_SIZE 16 |
42 | #define CAMELLIA_MAX_KEY_SIZE 32 | 38 | #define CAMELLIA_MAX_KEY_SIZE 32 |
@@ -1312,307 +1308,128 @@ static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, | |||
1312 | &tfm->crt_flags); | 1308 | &tfm->crt_flags); |
1313 | } | 1309 | } |
1314 | 1310 | ||
1315 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | 1311 | static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) |
1316 | void (*fn)(struct camellia_ctx *, u8 *, const u8 *), | ||
1317 | void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *)) | ||
1318 | { | 1312 | { |
1319 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 1313 | u128 iv = *src; |
1320 | unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
1321 | unsigned int nbytes; | ||
1322 | int err; | ||
1323 | |||
1324 | err = blkcipher_walk_virt(desc, walk); | ||
1325 | |||
1326 | while ((nbytes = walk->nbytes)) { | ||
1327 | u8 *wsrc = walk->src.virt.addr; | ||
1328 | u8 *wdst = walk->dst.virt.addr; | ||
1329 | |||
1330 | /* Process two block batch */ | ||
1331 | if (nbytes >= bsize * 2) { | ||
1332 | do { | ||
1333 | fn_2way(ctx, wdst, wsrc); | ||
1334 | |||
1335 | wsrc += bsize * 2; | ||
1336 | wdst += bsize * 2; | ||
1337 | nbytes -= bsize * 2; | ||
1338 | } while (nbytes >= bsize * 2); | ||
1339 | |||
1340 | if (nbytes < bsize) | ||
1341 | goto done; | ||
1342 | } | ||
1343 | |||
1344 | /* Handle leftovers */ | ||
1345 | do { | ||
1346 | fn(ctx, wdst, wsrc); | ||
1347 | |||
1348 | wsrc += bsize; | ||
1349 | wdst += bsize; | ||
1350 | nbytes -= bsize; | ||
1351 | } while (nbytes >= bsize); | ||
1352 | |||
1353 | done: | ||
1354 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
1355 | } | ||
1356 | |||
1357 | return err; | ||
1358 | } | ||
1359 | |||
1360 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
1361 | struct scatterlist *src, unsigned int nbytes) | ||
1362 | { | ||
1363 | struct blkcipher_walk walk; | ||
1364 | |||
1365 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
1366 | return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way); | ||
1367 | } | ||
1368 | 1314 | ||
1369 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 1315 | camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); |
1370 | struct scatterlist *src, unsigned int nbytes) | ||
1371 | { | ||
1372 | struct blkcipher_walk walk; | ||
1373 | |||
1374 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
1375 | return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way); | ||
1376 | } | ||
1377 | 1316 | ||
1378 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | 1317 | u128_xor(&dst[1], &dst[1], &iv); |
1379 | struct blkcipher_walk *walk) | ||
1380 | { | ||
1381 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
1382 | unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
1383 | unsigned int nbytes = walk->nbytes; | ||
1384 | u128 *src = (u128 *)walk->src.virt.addr; | ||
1385 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
1386 | u128 *iv = (u128 *)walk->iv; | ||
1387 | |||
1388 | do { | ||
1389 | u128_xor(dst, src, iv); | ||
1390 | camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst); | ||
1391 | iv = dst; | ||
1392 | |||
1393 | src += 1; | ||
1394 | dst += 1; | ||
1395 | nbytes -= bsize; | ||
1396 | } while (nbytes >= bsize); | ||
1397 | |||
1398 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
1399 | return nbytes; | ||
1400 | } | 1318 | } |
1401 | 1319 | ||
1402 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 1320 | static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) |
1403 | struct scatterlist *src, unsigned int nbytes) | ||
1404 | { | 1321 | { |
1405 | struct blkcipher_walk walk; | 1322 | be128 ctrblk; |
1406 | int err; | ||
1407 | 1323 | ||
1408 | blkcipher_walk_init(&walk, dst, src, nbytes); | 1324 | if (dst != src) |
1409 | err = blkcipher_walk_virt(desc, &walk); | 1325 | *dst = *src; |
1410 | 1326 | ||
1411 | while ((nbytes = walk.nbytes)) { | 1327 | u128_to_be128(&ctrblk, iv); |
1412 | nbytes = __cbc_encrypt(desc, &walk); | 1328 | u128_inc(iv); |
1413 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
1414 | } | ||
1415 | 1329 | ||
1416 | return err; | 1330 | camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk); |
1417 | } | 1331 | } |
1418 | 1332 | ||
1419 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | 1333 | static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, |
1420 | struct blkcipher_walk *walk) | 1334 | u128 *iv) |
1421 | { | 1335 | { |
1422 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 1336 | be128 ctrblks[2]; |
1423 | unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
1424 | unsigned int nbytes = walk->nbytes; | ||
1425 | u128 *src = (u128 *)walk->src.virt.addr; | ||
1426 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
1427 | u128 ivs[2 - 1]; | ||
1428 | u128 last_iv; | ||
1429 | 1337 | ||
1430 | /* Start of the last block. */ | 1338 | if (dst != src) { |
1431 | src += nbytes / bsize - 1; | 1339 | dst[0] = src[0]; |
1432 | dst += nbytes / bsize - 1; | 1340 | dst[1] = src[1]; |
1433 | |||
1434 | last_iv = *src; | ||
1435 | |||
1436 | /* Process two block batch */ | ||
1437 | if (nbytes >= bsize * 2) { | ||
1438 | do { | ||
1439 | nbytes -= bsize * (2 - 1); | ||
1440 | src -= 2 - 1; | ||
1441 | dst -= 2 - 1; | ||
1442 | |||
1443 | ivs[0] = src[0]; | ||
1444 | |||
1445 | camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); | ||
1446 | |||
1447 | u128_xor(dst + 1, dst + 1, ivs + 0); | ||
1448 | |||
1449 | nbytes -= bsize; | ||
1450 | if (nbytes < bsize) | ||
1451 | goto done; | ||
1452 | |||
1453 | u128_xor(dst, dst, src - 1); | ||
1454 | src -= 1; | ||
1455 | dst -= 1; | ||
1456 | } while (nbytes >= bsize * 2); | ||
1457 | |||
1458 | if (nbytes < bsize) | ||
1459 | goto done; | ||
1460 | } | 1341 | } |
1461 | 1342 | ||
1462 | /* Handle leftovers */ | 1343 | u128_to_be128(&ctrblks[0], iv); |
1463 | for (;;) { | 1344 | u128_inc(iv); |
1464 | camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src); | 1345 | u128_to_be128(&ctrblks[1], iv); |
1465 | 1346 | u128_inc(iv); | |
1466 | nbytes -= bsize; | ||
1467 | if (nbytes < bsize) | ||
1468 | break; | ||
1469 | 1347 | ||
1470 | u128_xor(dst, dst, src - 1); | 1348 | camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks); |
1471 | src -= 1; | ||
1472 | dst -= 1; | ||
1473 | } | ||
1474 | |||
1475 | done: | ||
1476 | u128_xor(dst, dst, (u128 *)walk->iv); | ||
1477 | *(u128 *)walk->iv = last_iv; | ||
1478 | |||
1479 | return nbytes; | ||
1480 | } | 1349 | } |
1481 | 1350 | ||
1482 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 1351 | static const struct common_glue_ctx camellia_enc = { |
1483 | struct scatterlist *src, unsigned int nbytes) | 1352 | .num_funcs = 2, |
1484 | { | 1353 | .fpu_blocks_limit = -1, |
1485 | struct blkcipher_walk walk; | 1354 | |
1486 | int err; | 1355 | .funcs = { { |
1487 | 1356 | .num_blocks = 2, | |
1488 | blkcipher_walk_init(&walk, dst, src, nbytes); | 1357 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) } |
1489 | err = blkcipher_walk_virt(desc, &walk); | 1358 | }, { |
1359 | .num_blocks = 1, | ||
1360 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) } | ||
1361 | } } | ||
1362 | }; | ||
1490 | 1363 | ||
1491 | while ((nbytes = walk.nbytes)) { | 1364 | static const struct common_glue_ctx camellia_ctr = { |
1492 | nbytes = __cbc_decrypt(desc, &walk); | 1365 | .num_funcs = 2, |
1493 | err = blkcipher_walk_done(desc, &walk, nbytes); | 1366 | .fpu_blocks_limit = -1, |
1494 | } | 1367 | |
1368 | .funcs = { { | ||
1369 | .num_blocks = 2, | ||
1370 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) } | ||
1371 | }, { | ||
1372 | .num_blocks = 1, | ||
1373 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) } | ||
1374 | } } | ||
1375 | }; | ||
1495 | 1376 | ||
1496 | return err; | 1377 | static const struct common_glue_ctx camellia_dec = { |
1497 | } | 1378 | .num_funcs = 2, |
1379 | .fpu_blocks_limit = -1, | ||
1380 | |||
1381 | .funcs = { { | ||
1382 | .num_blocks = 2, | ||
1383 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) } | ||
1384 | }, { | ||
1385 | .num_blocks = 1, | ||
1386 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) } | ||
1387 | } } | ||
1388 | }; | ||
1498 | 1389 | ||
1499 | static inline void u128_to_be128(be128 *dst, const u128 *src) | 1390 | static const struct common_glue_ctx camellia_dec_cbc = { |
1500 | { | 1391 | .num_funcs = 2, |
1501 | dst->a = cpu_to_be64(src->a); | 1392 | .fpu_blocks_limit = -1, |
1502 | dst->b = cpu_to_be64(src->b); | 1393 | |
1503 | } | 1394 | .funcs = { { |
1395 | .num_blocks = 2, | ||
1396 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) } | ||
1397 | }, { | ||
1398 | .num_blocks = 1, | ||
1399 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) } | ||
1400 | } } | ||
1401 | }; | ||
1504 | 1402 | ||
1505 | static inline void be128_to_u128(u128 *dst, const be128 *src) | 1403 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1404 | struct scatterlist *src, unsigned int nbytes) | ||
1506 | { | 1405 | { |
1507 | dst->a = be64_to_cpu(src->a); | 1406 | return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); |
1508 | dst->b = be64_to_cpu(src->b); | ||
1509 | } | 1407 | } |
1510 | 1408 | ||
1511 | static inline void u128_inc(u128 *i) | 1409 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1410 | struct scatterlist *src, unsigned int nbytes) | ||
1512 | { | 1411 | { |
1513 | i->b++; | 1412 | return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); |
1514 | if (!i->b) | ||
1515 | i->a++; | ||
1516 | } | 1413 | } |
1517 | 1414 | ||
1518 | static void ctr_crypt_final(struct blkcipher_desc *desc, | 1415 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1519 | struct blkcipher_walk *walk) | 1416 | struct scatterlist *src, unsigned int nbytes) |
1520 | { | 1417 | { |
1521 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 1418 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, |
1522 | u8 keystream[CAMELLIA_BLOCK_SIZE]; | 1419 | dst, src, nbytes); |
1523 | u8 *src = walk->src.virt.addr; | ||
1524 | u8 *dst = walk->dst.virt.addr; | ||
1525 | unsigned int nbytes = walk->nbytes; | ||
1526 | u128 ctrblk; | ||
1527 | |||
1528 | memcpy(keystream, src, nbytes); | ||
1529 | camellia_enc_blk_xor(ctx, keystream, walk->iv); | ||
1530 | memcpy(dst, keystream, nbytes); | ||
1531 | |||
1532 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
1533 | u128_inc(&ctrblk); | ||
1534 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
1535 | } | 1420 | } |
1536 | 1421 | ||
1537 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | 1422 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1538 | struct blkcipher_walk *walk) | 1423 | struct scatterlist *src, unsigned int nbytes) |
1539 | { | 1424 | { |
1540 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 1425 | return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, |
1541 | unsigned int bsize = CAMELLIA_BLOCK_SIZE; | 1426 | nbytes); |
1542 | unsigned int nbytes = walk->nbytes; | ||
1543 | u128 *src = (u128 *)walk->src.virt.addr; | ||
1544 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
1545 | u128 ctrblk; | ||
1546 | be128 ctrblocks[2]; | ||
1547 | |||
1548 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
1549 | |||
1550 | /* Process two block batch */ | ||
1551 | if (nbytes >= bsize * 2) { | ||
1552 | do { | ||
1553 | if (dst != src) { | ||
1554 | dst[0] = src[0]; | ||
1555 | dst[1] = src[1]; | ||
1556 | } | ||
1557 | |||
1558 | /* create ctrblks for parallel encrypt */ | ||
1559 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
1560 | u128_inc(&ctrblk); | ||
1561 | u128_to_be128(&ctrblocks[1], &ctrblk); | ||
1562 | u128_inc(&ctrblk); | ||
1563 | |||
1564 | camellia_enc_blk_xor_2way(ctx, (u8 *)dst, | ||
1565 | (u8 *)ctrblocks); | ||
1566 | |||
1567 | src += 2; | ||
1568 | dst += 2; | ||
1569 | nbytes -= bsize * 2; | ||
1570 | } while (nbytes >= bsize * 2); | ||
1571 | |||
1572 | if (nbytes < bsize) | ||
1573 | goto done; | ||
1574 | } | ||
1575 | |||
1576 | /* Handle leftovers */ | ||
1577 | do { | ||
1578 | if (dst != src) | ||
1579 | *dst = *src; | ||
1580 | |||
1581 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
1582 | u128_inc(&ctrblk); | ||
1583 | |||
1584 | camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks); | ||
1585 | |||
1586 | src += 1; | ||
1587 | dst += 1; | ||
1588 | nbytes -= bsize; | ||
1589 | } while (nbytes >= bsize); | ||
1590 | |||
1591 | done: | ||
1592 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
1593 | return nbytes; | ||
1594 | } | 1427 | } |
1595 | 1428 | ||
1596 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 1429 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1597 | struct scatterlist *src, unsigned int nbytes) | 1430 | struct scatterlist *src, unsigned int nbytes) |
1598 | { | 1431 | { |
1599 | struct blkcipher_walk walk; | 1432 | return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); |
1600 | int err; | ||
1601 | |||
1602 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
1603 | err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE); | ||
1604 | |||
1605 | while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) { | ||
1606 | nbytes = __ctr_crypt(desc, &walk); | ||
1607 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
1608 | } | ||
1609 | |||
1610 | if (walk.nbytes) { | ||
1611 | ctr_crypt_final(desc, &walk); | ||
1612 | err = blkcipher_walk_done(desc, &walk, 0); | ||
1613 | } | ||
1614 | |||
1615 | return err; | ||
1616 | } | 1433 | } |
1617 | 1434 | ||
1618 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | 1435 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) |
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c new file mode 100644 index 000000000000..4854f0f31e4f --- /dev/null +++ b/arch/x86/crypto/glue_helper.c | |||
@@ -0,0 +1,307 @@ | |||
1 | /* | ||
2 | * Shared glue code for 128bit block ciphers | ||
3 | * | ||
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
8 | * CTR part based on code (crypto/ctr.c) by: | ||
9 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
24 | * USA | ||
25 | * | ||
26 | */ | ||
27 | |||
28 | #include <linux/module.h> | ||
29 | #include <crypto/b128ops.h> | ||
30 | #include <crypto/lrw.h> | ||
31 | #include <crypto/xts.h> | ||
32 | #include <asm/crypto/glue_helper.h> | ||
33 | #include <crypto/scatterwalk.h> | ||
34 | |||
35 | static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | ||
36 | struct blkcipher_desc *desc, | ||
37 | struct blkcipher_walk *walk) | ||
38 | { | ||
39 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
40 | const unsigned int bsize = 128 / 8; | ||
41 | unsigned int nbytes, i, func_bytes; | ||
42 | bool fpu_enabled = false; | ||
43 | int err; | ||
44 | |||
45 | err = blkcipher_walk_virt(desc, walk); | ||
46 | |||
47 | while ((nbytes = walk->nbytes)) { | ||
48 | u8 *wsrc = walk->src.virt.addr; | ||
49 | u8 *wdst = walk->dst.virt.addr; | ||
50 | |||
51 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
52 | desc, fpu_enabled, nbytes); | ||
53 | |||
54 | for (i = 0; i < gctx->num_funcs; i++) { | ||
55 | func_bytes = bsize * gctx->funcs[i].num_blocks; | ||
56 | |||
57 | /* Process multi-block batch */ | ||
58 | if (nbytes >= func_bytes) { | ||
59 | do { | ||
60 | gctx->funcs[i].fn_u.ecb(ctx, wdst, | ||
61 | wsrc); | ||
62 | |||
63 | wsrc += func_bytes; | ||
64 | wdst += func_bytes; | ||
65 | nbytes -= func_bytes; | ||
66 | } while (nbytes >= func_bytes); | ||
67 | |||
68 | if (nbytes < bsize) | ||
69 | goto done; | ||
70 | } | ||
71 | } | ||
72 | |||
73 | done: | ||
74 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
75 | } | ||
76 | |||
77 | glue_fpu_end(fpu_enabled); | ||
78 | return err; | ||
79 | } | ||
80 | |||
81 | int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | ||
82 | struct blkcipher_desc *desc, struct scatterlist *dst, | ||
83 | struct scatterlist *src, unsigned int nbytes) | ||
84 | { | ||
85 | struct blkcipher_walk walk; | ||
86 | |||
87 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
88 | return __glue_ecb_crypt_128bit(gctx, desc, &walk); | ||
89 | } | ||
90 | EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit); | ||
91 | |||
92 | static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn, | ||
93 | struct blkcipher_desc *desc, | ||
94 | struct blkcipher_walk *walk) | ||
95 | { | ||
96 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
97 | const unsigned int bsize = 128 / 8; | ||
98 | unsigned int nbytes = walk->nbytes; | ||
99 | u128 *src = (u128 *)walk->src.virt.addr; | ||
100 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
101 | u128 *iv = (u128 *)walk->iv; | ||
102 | |||
103 | do { | ||
104 | u128_xor(dst, src, iv); | ||
105 | fn(ctx, (u8 *)dst, (u8 *)dst); | ||
106 | iv = dst; | ||
107 | |||
108 | src += 1; | ||
109 | dst += 1; | ||
110 | nbytes -= bsize; | ||
111 | } while (nbytes >= bsize); | ||
112 | |||
113 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
114 | return nbytes; | ||
115 | } | ||
116 | |||
117 | int glue_cbc_encrypt_128bit(const common_glue_func_t fn, | ||
118 | struct blkcipher_desc *desc, | ||
119 | struct scatterlist *dst, | ||
120 | struct scatterlist *src, unsigned int nbytes) | ||
121 | { | ||
122 | struct blkcipher_walk walk; | ||
123 | int err; | ||
124 | |||
125 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
126 | err = blkcipher_walk_virt(desc, &walk); | ||
127 | |||
128 | while ((nbytes = walk.nbytes)) { | ||
129 | nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk); | ||
130 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
131 | } | ||
132 | |||
133 | return err; | ||
134 | } | ||
135 | EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit); | ||
136 | |||
137 | static unsigned int | ||
138 | __glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | ||
139 | struct blkcipher_desc *desc, | ||
140 | struct blkcipher_walk *walk) | ||
141 | { | ||
142 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
143 | const unsigned int bsize = 128 / 8; | ||
144 | unsigned int nbytes = walk->nbytes; | ||
145 | u128 *src = (u128 *)walk->src.virt.addr; | ||
146 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
147 | u128 last_iv; | ||
148 | unsigned int num_blocks, func_bytes; | ||
149 | unsigned int i; | ||
150 | |||
151 | /* Start of the last block. */ | ||
152 | src += nbytes / bsize - 1; | ||
153 | dst += nbytes / bsize - 1; | ||
154 | |||
155 | last_iv = *src; | ||
156 | |||
157 | for (i = 0; i < gctx->num_funcs; i++) { | ||
158 | num_blocks = gctx->funcs[i].num_blocks; | ||
159 | func_bytes = bsize * num_blocks; | ||
160 | |||
161 | /* Process multi-block batch */ | ||
162 | if (nbytes >= func_bytes) { | ||
163 | do { | ||
164 | nbytes -= func_bytes - bsize; | ||
165 | src -= num_blocks - 1; | ||
166 | dst -= num_blocks - 1; | ||
167 | |||
168 | gctx->funcs[i].fn_u.cbc(ctx, dst, src); | ||
169 | |||
170 | nbytes -= bsize; | ||
171 | if (nbytes < bsize) | ||
172 | goto done; | ||
173 | |||
174 | u128_xor(dst, dst, src - 1); | ||
175 | src -= 1; | ||
176 | dst -= 1; | ||
177 | } while (nbytes >= func_bytes); | ||
178 | |||
179 | if (nbytes < bsize) | ||
180 | goto done; | ||
181 | } | ||
182 | } | ||
183 | |||
184 | done: | ||
185 | u128_xor(dst, dst, (u128 *)walk->iv); | ||
186 | *(u128 *)walk->iv = last_iv; | ||
187 | |||
188 | return nbytes; | ||
189 | } | ||
190 | |||
191 | int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | ||
192 | struct blkcipher_desc *desc, | ||
193 | struct scatterlist *dst, | ||
194 | struct scatterlist *src, unsigned int nbytes) | ||
195 | { | ||
196 | const unsigned int bsize = 128 / 8; | ||
197 | bool fpu_enabled = false; | ||
198 | struct blkcipher_walk walk; | ||
199 | int err; | ||
200 | |||
201 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
202 | err = blkcipher_walk_virt(desc, &walk); | ||
203 | |||
204 | while ((nbytes = walk.nbytes)) { | ||
205 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
206 | desc, fpu_enabled, nbytes); | ||
207 | nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); | ||
208 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
209 | } | ||
210 | |||
211 | glue_fpu_end(fpu_enabled); | ||
212 | return err; | ||
213 | } | ||
214 | EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); | ||
215 | |||
216 | static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr, | ||
217 | struct blkcipher_desc *desc, | ||
218 | struct blkcipher_walk *walk) | ||
219 | { | ||
220 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
221 | u8 *src = (u8 *)walk->src.virt.addr; | ||
222 | u8 *dst = (u8 *)walk->dst.virt.addr; | ||
223 | unsigned int nbytes = walk->nbytes; | ||
224 | u128 ctrblk; | ||
225 | u128 tmp; | ||
226 | |||
227 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
228 | |||
229 | memcpy(&tmp, src, nbytes); | ||
230 | fn_ctr(ctx, &tmp, &tmp, &ctrblk); | ||
231 | memcpy(dst, &tmp, nbytes); | ||
232 | |||
233 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
234 | } | ||
235 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit); | ||
236 | |||
237 | static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | ||
238 | struct blkcipher_desc *desc, | ||
239 | struct blkcipher_walk *walk) | ||
240 | { | ||
241 | const unsigned int bsize = 128 / 8; | ||
242 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
243 | unsigned int nbytes = walk->nbytes; | ||
244 | u128 *src = (u128 *)walk->src.virt.addr; | ||
245 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
246 | u128 ctrblk; | ||
247 | unsigned int num_blocks, func_bytes; | ||
248 | unsigned int i; | ||
249 | |||
250 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
251 | |||
252 | /* Process multi-block batch */ | ||
253 | for (i = 0; i < gctx->num_funcs; i++) { | ||
254 | num_blocks = gctx->funcs[i].num_blocks; | ||
255 | func_bytes = bsize * num_blocks; | ||
256 | |||
257 | if (nbytes >= func_bytes) { | ||
258 | do { | ||
259 | gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk); | ||
260 | |||
261 | src += num_blocks; | ||
262 | dst += num_blocks; | ||
263 | nbytes -= func_bytes; | ||
264 | } while (nbytes >= func_bytes); | ||
265 | |||
266 | if (nbytes < bsize) | ||
267 | goto done; | ||
268 | } | ||
269 | } | ||
270 | |||
271 | done: | ||
272 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
273 | return nbytes; | ||
274 | } | ||
275 | |||
276 | int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | ||
277 | struct blkcipher_desc *desc, struct scatterlist *dst, | ||
278 | struct scatterlist *src, unsigned int nbytes) | ||
279 | { | ||
280 | const unsigned int bsize = 128 / 8; | ||
281 | bool fpu_enabled = false; | ||
282 | struct blkcipher_walk walk; | ||
283 | int err; | ||
284 | |||
285 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
286 | err = blkcipher_walk_virt_block(desc, &walk, bsize); | ||
287 | |||
288 | while ((nbytes = walk.nbytes) >= bsize) { | ||
289 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
290 | desc, fpu_enabled, nbytes); | ||
291 | nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); | ||
292 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
293 | } | ||
294 | |||
295 | glue_fpu_end(fpu_enabled); | ||
296 | |||
297 | if (walk.nbytes) { | ||
298 | glue_ctr_crypt_final_128bit( | ||
299 | gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); | ||
300 | err = blkcipher_walk_done(desc, &walk, 0); | ||
301 | } | ||
302 | |||
303 | return err; | ||
304 | } | ||
305 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); | ||
306 | |||
307 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S new file mode 100644 index 000000000000..504106bf04a2 --- /dev/null +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S | |||
@@ -0,0 +1,704 @@ | |||
1 | /* | ||
2 | * Serpent Cipher 8-way parallel algorithm (x86_64/AVX) | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * Based on arch/x86/crypto/serpent-sse2-x86_64-asm_64.S by | ||
8 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | .file "serpent-avx-x86_64-asm_64.S" | ||
28 | .text | ||
29 | |||
30 | #define CTX %rdi | ||
31 | |||
32 | /********************************************************************** | ||
33 | 8-way AVX serpent | ||
34 | **********************************************************************/ | ||
35 | #define RA1 %xmm0 | ||
36 | #define RB1 %xmm1 | ||
37 | #define RC1 %xmm2 | ||
38 | #define RD1 %xmm3 | ||
39 | #define RE1 %xmm4 | ||
40 | |||
41 | #define tp %xmm5 | ||
42 | |||
43 | #define RA2 %xmm6 | ||
44 | #define RB2 %xmm7 | ||
45 | #define RC2 %xmm8 | ||
46 | #define RD2 %xmm9 | ||
47 | #define RE2 %xmm10 | ||
48 | |||
49 | #define RNOT %xmm11 | ||
50 | |||
51 | #define RK0 %xmm12 | ||
52 | #define RK1 %xmm13 | ||
53 | #define RK2 %xmm14 | ||
54 | #define RK3 %xmm15 | ||
55 | |||
56 | |||
57 | #define S0_1(x0, x1, x2, x3, x4) \ | ||
58 | vpor x0, x3, tp; \ | ||
59 | vpxor x3, x0, x0; \ | ||
60 | vpxor x2, x3, x4; \ | ||
61 | vpxor RNOT, x4, x4; \ | ||
62 | vpxor x1, tp, x3; \ | ||
63 | vpand x0, x1, x1; \ | ||
64 | vpxor x4, x1, x1; \ | ||
65 | vpxor x0, x2, x2; | ||
66 | #define S0_2(x0, x1, x2, x3, x4) \ | ||
67 | vpxor x3, x0, x0; \ | ||
68 | vpor x0, x4, x4; \ | ||
69 | vpxor x2, x0, x0; \ | ||
70 | vpand x1, x2, x2; \ | ||
71 | vpxor x2, x3, x3; \ | ||
72 | vpxor RNOT, x1, x1; \ | ||
73 | vpxor x4, x2, x2; \ | ||
74 | vpxor x2, x1, x1; | ||
75 | |||
76 | #define S1_1(x0, x1, x2, x3, x4) \ | ||
77 | vpxor x0, x1, tp; \ | ||
78 | vpxor x3, x0, x0; \ | ||
79 | vpxor RNOT, x3, x3; \ | ||
80 | vpand tp, x1, x4; \ | ||
81 | vpor tp, x0, x0; \ | ||
82 | vpxor x2, x3, x3; \ | ||
83 | vpxor x3, x0, x0; \ | ||
84 | vpxor x3, tp, x1; | ||
85 | #define S1_2(x0, x1, x2, x3, x4) \ | ||
86 | vpxor x4, x3, x3; \ | ||
87 | vpor x4, x1, x1; \ | ||
88 | vpxor x2, x4, x4; \ | ||
89 | vpand x0, x2, x2; \ | ||
90 | vpxor x1, x2, x2; \ | ||
91 | vpor x0, x1, x1; \ | ||
92 | vpxor RNOT, x0, x0; \ | ||
93 | vpxor x2, x0, x0; \ | ||
94 | vpxor x1, x4, x4; | ||
95 | |||
96 | #define S2_1(x0, x1, x2, x3, x4) \ | ||
97 | vpxor RNOT, x3, x3; \ | ||
98 | vpxor x0, x1, x1; \ | ||
99 | vpand x2, x0, tp; \ | ||
100 | vpxor x3, tp, tp; \ | ||
101 | vpor x0, x3, x3; \ | ||
102 | vpxor x1, x2, x2; \ | ||
103 | vpxor x1, x3, x3; \ | ||
104 | vpand tp, x1, x1; | ||
105 | #define S2_2(x0, x1, x2, x3, x4) \ | ||
106 | vpxor x2, tp, tp; \ | ||
107 | vpand x3, x2, x2; \ | ||
108 | vpor x1, x3, x3; \ | ||
109 | vpxor RNOT, tp, tp; \ | ||
110 | vpxor tp, x3, x3; \ | ||
111 | vpxor tp, x0, x4; \ | ||
112 | vpxor x2, tp, x0; \ | ||
113 | vpor x2, x1, x1; | ||
114 | |||
115 | #define S3_1(x0, x1, x2, x3, x4) \ | ||
116 | vpxor x3, x1, tp; \ | ||
117 | vpor x0, x3, x3; \ | ||
118 | vpand x0, x1, x4; \ | ||
119 | vpxor x2, x0, x0; \ | ||
120 | vpxor tp, x2, x2; \ | ||
121 | vpand x3, tp, x1; \ | ||
122 | vpxor x3, x2, x2; \ | ||
123 | vpor x4, x0, x0; \ | ||
124 | vpxor x3, x4, x4; | ||
125 | #define S3_2(x0, x1, x2, x3, x4) \ | ||
126 | vpxor x0, x1, x1; \ | ||
127 | vpand x3, x0, x0; \ | ||
128 | vpand x4, x3, x3; \ | ||
129 | vpxor x2, x3, x3; \ | ||
130 | vpor x1, x4, x4; \ | ||
131 | vpand x1, x2, x2; \ | ||
132 | vpxor x3, x4, x4; \ | ||
133 | vpxor x3, x0, x0; \ | ||
134 | vpxor x2, x3, x3; | ||
135 | |||
136 | #define S4_1(x0, x1, x2, x3, x4) \ | ||
137 | vpand x0, x3, tp; \ | ||
138 | vpxor x3, x0, x0; \ | ||
139 | vpxor x2, tp, tp; \ | ||
140 | vpor x3, x2, x2; \ | ||
141 | vpxor x1, x0, x0; \ | ||
142 | vpxor tp, x3, x4; \ | ||
143 | vpor x0, x2, x2; \ | ||
144 | vpxor x1, x2, x2; | ||
145 | #define S4_2(x0, x1, x2, x3, x4) \ | ||
146 | vpand x0, x1, x1; \ | ||
147 | vpxor x4, x1, x1; \ | ||
148 | vpand x2, x4, x4; \ | ||
149 | vpxor tp, x2, x2; \ | ||
150 | vpxor x0, x4, x4; \ | ||
151 | vpor x1, tp, x3; \ | ||
152 | vpxor RNOT, x1, x1; \ | ||
153 | vpxor x0, x3, x3; | ||
154 | |||
155 | #define S5_1(x0, x1, x2, x3, x4) \ | ||
156 | vpor x0, x1, tp; \ | ||
157 | vpxor tp, x2, x2; \ | ||
158 | vpxor RNOT, x3, x3; \ | ||
159 | vpxor x0, x1, x4; \ | ||
160 | vpxor x2, x0, x0; \ | ||
161 | vpand x4, tp, x1; \ | ||
162 | vpor x3, x4, x4; \ | ||
163 | vpxor x0, x4, x4; | ||
164 | #define S5_2(x0, x1, x2, x3, x4) \ | ||
165 | vpand x3, x0, x0; \ | ||
166 | vpxor x3, x1, x1; \ | ||
167 | vpxor x2, x3, x3; \ | ||
168 | vpxor x1, x0, x0; \ | ||
169 | vpand x4, x2, x2; \ | ||
170 | vpxor x2, x1, x1; \ | ||
171 | vpand x0, x2, x2; \ | ||
172 | vpxor x2, x3, x3; | ||
173 | |||
174 | #define S6_1(x0, x1, x2, x3, x4) \ | ||
175 | vpxor x0, x3, x3; \ | ||
176 | vpxor x2, x1, tp; \ | ||
177 | vpxor x0, x2, x2; \ | ||
178 | vpand x3, x0, x0; \ | ||
179 | vpor x3, tp, tp; \ | ||
180 | vpxor RNOT, x1, x4; \ | ||
181 | vpxor tp, x0, x0; \ | ||
182 | vpxor x2, tp, x1; | ||
183 | #define S6_2(x0, x1, x2, x3, x4) \ | ||
184 | vpxor x4, x3, x3; \ | ||
185 | vpxor x0, x4, x4; \ | ||
186 | vpand x0, x2, x2; \ | ||
187 | vpxor x1, x4, x4; \ | ||
188 | vpxor x3, x2, x2; \ | ||
189 | vpand x1, x3, x3; \ | ||
190 | vpxor x0, x3, x3; \ | ||
191 | vpxor x2, x1, x1; | ||
192 | |||
193 | #define S7_1(x0, x1, x2, x3, x4) \ | ||
194 | vpxor RNOT, x1, tp; \ | ||
195 | vpxor RNOT, x0, x0; \ | ||
196 | vpand x2, tp, x1; \ | ||
197 | vpxor x3, x1, x1; \ | ||
198 | vpor tp, x3, x3; \ | ||
199 | vpxor x2, tp, x4; \ | ||
200 | vpxor x3, x2, x2; \ | ||
201 | vpxor x0, x3, x3; \ | ||
202 | vpor x1, x0, x0; | ||
203 | #define S7_2(x0, x1, x2, x3, x4) \ | ||
204 | vpand x0, x2, x2; \ | ||
205 | vpxor x4, x0, x0; \ | ||
206 | vpxor x3, x4, x4; \ | ||
207 | vpand x0, x3, x3; \ | ||
208 | vpxor x1, x4, x4; \ | ||
209 | vpxor x4, x2, x2; \ | ||
210 | vpxor x1, x3, x3; \ | ||
211 | vpor x0, x4, x4; \ | ||
212 | vpxor x1, x4, x4; | ||
213 | |||
214 | #define SI0_1(x0, x1, x2, x3, x4) \ | ||
215 | vpxor x0, x1, x1; \ | ||
216 | vpor x1, x3, tp; \ | ||
217 | vpxor x1, x3, x4; \ | ||
218 | vpxor RNOT, x0, x0; \ | ||
219 | vpxor tp, x2, x2; \ | ||
220 | vpxor x0, tp, x3; \ | ||
221 | vpand x1, x0, x0; \ | ||
222 | vpxor x2, x0, x0; | ||
223 | #define SI0_2(x0, x1, x2, x3, x4) \ | ||
224 | vpand x3, x2, x2; \ | ||
225 | vpxor x4, x3, x3; \ | ||
226 | vpxor x3, x2, x2; \ | ||
227 | vpxor x3, x1, x1; \ | ||
228 | vpand x0, x3, x3; \ | ||
229 | vpxor x0, x1, x1; \ | ||
230 | vpxor x2, x0, x0; \ | ||
231 | vpxor x3, x4, x4; | ||
232 | |||
233 | #define SI1_1(x0, x1, x2, x3, x4) \ | ||
234 | vpxor x3, x1, x1; \ | ||
235 | vpxor x2, x0, tp; \ | ||
236 | vpxor RNOT, x2, x2; \ | ||
237 | vpor x1, x0, x4; \ | ||
238 | vpxor x3, x4, x4; \ | ||
239 | vpand x1, x3, x3; \ | ||
240 | vpxor x2, x1, x1; \ | ||
241 | vpand x4, x2, x2; | ||
242 | #define SI1_2(x0, x1, x2, x3, x4) \ | ||
243 | vpxor x1, x4, x4; \ | ||
244 | vpor x3, x1, x1; \ | ||
245 | vpxor tp, x3, x3; \ | ||
246 | vpxor tp, x2, x2; \ | ||
247 | vpor x4, tp, x0; \ | ||
248 | vpxor x4, x2, x2; \ | ||
249 | vpxor x0, x1, x1; \ | ||
250 | vpxor x1, x4, x4; | ||
251 | |||
252 | #define SI2_1(x0, x1, x2, x3, x4) \ | ||
253 | vpxor x1, x2, x2; \ | ||
254 | vpxor RNOT, x3, tp; \ | ||
255 | vpor x2, tp, tp; \ | ||
256 | vpxor x3, x2, x2; \ | ||
257 | vpxor x0, x3, x4; \ | ||
258 | vpxor x1, tp, x3; \ | ||
259 | vpor x2, x1, x1; \ | ||
260 | vpxor x0, x2, x2; | ||
261 | #define SI2_2(x0, x1, x2, x3, x4) \ | ||
262 | vpxor x4, x1, x1; \ | ||
263 | vpor x3, x4, x4; \ | ||
264 | vpxor x3, x2, x2; \ | ||
265 | vpxor x2, x4, x4; \ | ||
266 | vpand x1, x2, x2; \ | ||
267 | vpxor x3, x2, x2; \ | ||
268 | vpxor x4, x3, x3; \ | ||
269 | vpxor x0, x4, x4; | ||
270 | |||
271 | #define SI3_1(x0, x1, x2, x3, x4) \ | ||
272 | vpxor x1, x2, x2; \ | ||
273 | vpand x2, x1, tp; \ | ||
274 | vpxor x0, tp, tp; \ | ||
275 | vpor x1, x0, x0; \ | ||
276 | vpxor x3, x1, x4; \ | ||
277 | vpxor x3, x0, x0; \ | ||
278 | vpor tp, x3, x3; \ | ||
279 | vpxor x2, tp, x1; | ||
280 | #define SI3_2(x0, x1, x2, x3, x4) \ | ||
281 | vpxor x3, x1, x1; \ | ||
282 | vpxor x2, x0, x0; \ | ||
283 | vpxor x3, x2, x2; \ | ||
284 | vpand x1, x3, x3; \ | ||
285 | vpxor x0, x1, x1; \ | ||
286 | vpand x2, x0, x0; \ | ||
287 | vpxor x3, x4, x4; \ | ||
288 | vpxor x0, x3, x3; \ | ||
289 | vpxor x1, x0, x0; | ||
290 | |||
291 | #define SI4_1(x0, x1, x2, x3, x4) \ | ||
292 | vpxor x3, x2, x2; \ | ||
293 | vpand x1, x0, tp; \ | ||
294 | vpxor x2, tp, tp; \ | ||
295 | vpor x3, x2, x2; \ | ||
296 | vpxor RNOT, x0, x4; \ | ||
297 | vpxor tp, x1, x1; \ | ||
298 | vpxor x2, tp, x0; \ | ||
299 | vpand x4, x2, x2; | ||
300 | #define SI4_2(x0, x1, x2, x3, x4) \ | ||
301 | vpxor x0, x2, x2; \ | ||
302 | vpor x4, x0, x0; \ | ||
303 | vpxor x3, x0, x0; \ | ||
304 | vpand x2, x3, x3; \ | ||
305 | vpxor x3, x4, x4; \ | ||
306 | vpxor x1, x3, x3; \ | ||
307 | vpand x0, x1, x1; \ | ||
308 | vpxor x1, x4, x4; \ | ||
309 | vpxor x3, x0, x0; | ||
310 | |||
311 | #define SI5_1(x0, x1, x2, x3, x4) \ | ||
312 | vpor x2, x1, tp; \ | ||
313 | vpxor x1, x2, x2; \ | ||
314 | vpxor x3, tp, tp; \ | ||
315 | vpand x1, x3, x3; \ | ||
316 | vpxor x3, x2, x2; \ | ||
317 | vpor x0, x3, x3; \ | ||
318 | vpxor RNOT, x0, x0; \ | ||
319 | vpxor x2, x3, x3; \ | ||
320 | vpor x0, x2, x2; | ||
321 | #define SI5_2(x0, x1, x2, x3, x4) \ | ||
322 | vpxor tp, x1, x4; \ | ||
323 | vpxor x4, x2, x2; \ | ||
324 | vpand x0, x4, x4; \ | ||
325 | vpxor tp, x0, x0; \ | ||
326 | vpxor x3, tp, x1; \ | ||
327 | vpand x2, x0, x0; \ | ||
328 | vpxor x3, x2, x2; \ | ||
329 | vpxor x2, x0, x0; \ | ||
330 | vpxor x4, x2, x2; \ | ||
331 | vpxor x3, x4, x4; | ||
332 | |||
333 | #define SI6_1(x0, x1, x2, x3, x4) \ | ||
334 | vpxor x2, x0, x0; \ | ||
335 | vpand x3, x0, tp; \ | ||
336 | vpxor x3, x2, x2; \ | ||
337 | vpxor x2, tp, tp; \ | ||
338 | vpxor x1, x3, x3; \ | ||
339 | vpor x0, x2, x2; \ | ||
340 | vpxor x3, x2, x2; \ | ||
341 | vpand tp, x3, x3; | ||
342 | #define SI6_2(x0, x1, x2, x3, x4) \ | ||
343 | vpxor RNOT, tp, tp; \ | ||
344 | vpxor x1, x3, x3; \ | ||
345 | vpand x2, x1, x1; \ | ||
346 | vpxor tp, x0, x4; \ | ||
347 | vpxor x4, x3, x3; \ | ||
348 | vpxor x2, x4, x4; \ | ||
349 | vpxor x1, tp, x0; \ | ||
350 | vpxor x0, x2, x2; | ||
351 | |||
352 | #define SI7_1(x0, x1, x2, x3, x4) \ | ||
353 | vpand x0, x3, tp; \ | ||
354 | vpxor x2, x0, x0; \ | ||
355 | vpor x3, x2, x2; \ | ||
356 | vpxor x1, x3, x4; \ | ||
357 | vpxor RNOT, x0, x0; \ | ||
358 | vpor tp, x1, x1; \ | ||
359 | vpxor x0, x4, x4; \ | ||
360 | vpand x2, x0, x0; \ | ||
361 | vpxor x1, x0, x0; | ||
362 | #define SI7_2(x0, x1, x2, x3, x4) \ | ||
363 | vpand x2, x1, x1; \ | ||
364 | vpxor x2, tp, x3; \ | ||
365 | vpxor x3, x4, x4; \ | ||
366 | vpand x3, x2, x2; \ | ||
367 | vpor x0, x3, x3; \ | ||
368 | vpxor x4, x1, x1; \ | ||
369 | vpxor x4, x3, x3; \ | ||
370 | vpand x0, x4, x4; \ | ||
371 | vpxor x2, x4, x4; | ||
372 | |||
373 | #define get_key(i, j, t) \ | ||
374 | vbroadcastss (4*(i)+(j))*4(CTX), t; | ||
375 | |||
376 | #define K2(x0, x1, x2, x3, x4, i) \ | ||
377 | get_key(i, 0, RK0); \ | ||
378 | get_key(i, 1, RK1); \ | ||
379 | get_key(i, 2, RK2); \ | ||
380 | get_key(i, 3, RK3); \ | ||
381 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
382 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
383 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
384 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
385 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
386 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
387 | vpxor RK2, x2 ## 2, x2 ## 2; \ | ||
388 | vpxor RK3, x3 ## 2, x3 ## 2; | ||
389 | |||
390 | #define LK2(x0, x1, x2, x3, x4, i) \ | ||
391 | vpslld $13, x0 ## 1, x4 ## 1; \ | ||
392 | vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \ | ||
393 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
394 | vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ | ||
395 | vpslld $3, x2 ## 1, x4 ## 1; \ | ||
396 | vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \ | ||
397 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
398 | vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ | ||
399 | vpslld $13, x0 ## 2, x4 ## 2; \ | ||
400 | vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \ | ||
401 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
402 | vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ | ||
403 | vpslld $3, x2 ## 2, x4 ## 2; \ | ||
404 | vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \ | ||
405 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
406 | vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ | ||
407 | vpslld $1, x1 ## 1, x4 ## 1; \ | ||
408 | vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \ | ||
409 | vpor x4 ## 1, x1 ## 1, x1 ## 1; \ | ||
410 | vpslld $3, x0 ## 1, x4 ## 1; \ | ||
411 | vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ | ||
412 | vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
413 | get_key(i, 1, RK1); \ | ||
414 | vpslld $1, x1 ## 2, x4 ## 2; \ | ||
415 | vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \ | ||
416 | vpor x4 ## 2, x1 ## 2, x1 ## 2; \ | ||
417 | vpslld $3, x0 ## 2, x4 ## 2; \ | ||
418 | vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ | ||
419 | vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
420 | get_key(i, 3, RK3); \ | ||
421 | vpslld $7, x3 ## 1, x4 ## 1; \ | ||
422 | vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \ | ||
423 | vpor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
424 | vpslld $7, x1 ## 1, x4 ## 1; \ | ||
425 | vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ | ||
426 | vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ | ||
427 | vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ | ||
428 | vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
429 | get_key(i, 0, RK0); \ | ||
430 | vpslld $7, x3 ## 2, x4 ## 2; \ | ||
431 | vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \ | ||
432 | vpor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
433 | vpslld $7, x1 ## 2, x4 ## 2; \ | ||
434 | vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ | ||
435 | vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ | ||
436 | vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ | ||
437 | vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
438 | get_key(i, 2, RK2); \ | ||
439 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
440 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
441 | vpslld $5, x0 ## 1, x4 ## 1; \ | ||
442 | vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \ | ||
443 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
444 | vpslld $22, x2 ## 1, x4 ## 1; \ | ||
445 | vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \ | ||
446 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
447 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
448 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
449 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
450 | vpxor RK3, x3 ## 2, x3 ## 2; \ | ||
451 | vpslld $5, x0 ## 2, x4 ## 2; \ | ||
452 | vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \ | ||
453 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
454 | vpslld $22, x2 ## 2, x4 ## 2; \ | ||
455 | vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \ | ||
456 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
457 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
458 | vpxor RK2, x2 ## 2, x2 ## 2; | ||
459 | |||
460 | #define KL2(x0, x1, x2, x3, x4, i) \ | ||
461 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
462 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
463 | vpsrld $5, x0 ## 1, x4 ## 1; \ | ||
464 | vpslld $(32 - 5), x0 ## 1, x0 ## 1; \ | ||
465 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
466 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
467 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
468 | vpsrld $22, x2 ## 1, x4 ## 1; \ | ||
469 | vpslld $(32 - 22), x2 ## 1, x2 ## 1; \ | ||
470 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
471 | vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ | ||
472 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
473 | vpxor RK2, x2 ## 2, x2 ## 2; \ | ||
474 | vpsrld $5, x0 ## 2, x4 ## 2; \ | ||
475 | vpslld $(32 - 5), x0 ## 2, x0 ## 2; \ | ||
476 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
477 | vpxor RK3, x3 ## 2, x3 ## 2; \ | ||
478 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
479 | vpsrld $22, x2 ## 2, x4 ## 2; \ | ||
480 | vpslld $(32 - 22), x2 ## 2, x2 ## 2; \ | ||
481 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
482 | vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ | ||
483 | vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ | ||
484 | vpslld $7, x1 ## 1, x4 ## 1; \ | ||
485 | vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ | ||
486 | vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
487 | vpsrld $1, x1 ## 1, x4 ## 1; \ | ||
488 | vpslld $(32 - 1), x1 ## 1, x1 ## 1; \ | ||
489 | vpor x4 ## 1, x1 ## 1, x1 ## 1; \ | ||
490 | vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ | ||
491 | vpslld $7, x1 ## 2, x4 ## 2; \ | ||
492 | vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ | ||
493 | vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
494 | vpsrld $1, x1 ## 2, x4 ## 2; \ | ||
495 | vpslld $(32 - 1), x1 ## 2, x1 ## 2; \ | ||
496 | vpor x4 ## 2, x1 ## 2, x1 ## 2; \ | ||
497 | vpsrld $7, x3 ## 1, x4 ## 1; \ | ||
498 | vpslld $(32 - 7), x3 ## 1, x3 ## 1; \ | ||
499 | vpor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
500 | vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ | ||
501 | vpslld $3, x0 ## 1, x4 ## 1; \ | ||
502 | vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
503 | vpsrld $7, x3 ## 2, x4 ## 2; \ | ||
504 | vpslld $(32 - 7), x3 ## 2, x3 ## 2; \ | ||
505 | vpor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
506 | vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ | ||
507 | vpslld $3, x0 ## 2, x4 ## 2; \ | ||
508 | vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
509 | vpsrld $13, x0 ## 1, x4 ## 1; \ | ||
510 | vpslld $(32 - 13), x0 ## 1, x0 ## 1; \ | ||
511 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
512 | vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ | ||
513 | vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ | ||
514 | vpsrld $3, x2 ## 1, x4 ## 1; \ | ||
515 | vpslld $(32 - 3), x2 ## 1, x2 ## 1; \ | ||
516 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
517 | vpsrld $13, x0 ## 2, x4 ## 2; \ | ||
518 | vpslld $(32 - 13), x0 ## 2, x0 ## 2; \ | ||
519 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
520 | vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ | ||
521 | vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ | ||
522 | vpsrld $3, x2 ## 2, x4 ## 2; \ | ||
523 | vpslld $(32 - 3), x2 ## 2, x2 ## 2; \ | ||
524 | vpor x4 ## 2, x2 ## 2, x2 ## 2; | ||
525 | |||
526 | #define S(SBOX, x0, x1, x2, x3, x4) \ | ||
527 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
528 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
529 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
530 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); | ||
531 | |||
532 | #define SP(SBOX, x0, x1, x2, x3, x4, i) \ | ||
533 | get_key(i, 0, RK0); \ | ||
534 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
535 | get_key(i, 2, RK2); \ | ||
536 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
537 | get_key(i, 3, RK3); \ | ||
538 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
539 | get_key(i, 1, RK1); \ | ||
540 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
541 | |||
542 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
543 | vpunpckldq x1, x0, t0; \ | ||
544 | vpunpckhdq x1, x0, t2; \ | ||
545 | vpunpckldq x3, x2, t1; \ | ||
546 | vpunpckhdq x3, x2, x3; \ | ||
547 | \ | ||
548 | vpunpcklqdq t1, t0, x0; \ | ||
549 | vpunpckhqdq t1, t0, x1; \ | ||
550 | vpunpcklqdq x3, t2, x2; \ | ||
551 | vpunpckhqdq x3, t2, x3; | ||
552 | |||
553 | #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ | ||
554 | vmovdqu (0*4*4)(in), x0; \ | ||
555 | vmovdqu (1*4*4)(in), x1; \ | ||
556 | vmovdqu (2*4*4)(in), x2; \ | ||
557 | vmovdqu (3*4*4)(in), x3; \ | ||
558 | \ | ||
559 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
560 | |||
561 | #define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
562 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
563 | \ | ||
564 | vmovdqu x0, (0*4*4)(out); \ | ||
565 | vmovdqu x1, (1*4*4)(out); \ | ||
566 | vmovdqu x2, (2*4*4)(out); \ | ||
567 | vmovdqu x3, (3*4*4)(out); | ||
568 | |||
569 | #define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
570 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
571 | \ | ||
572 | vpxor (0*4*4)(out), x0, x0; \ | ||
573 | vmovdqu x0, (0*4*4)(out); \ | ||
574 | vpxor (1*4*4)(out), x1, x1; \ | ||
575 | vmovdqu x1, (1*4*4)(out); \ | ||
576 | vpxor (2*4*4)(out), x2, x2; \ | ||
577 | vmovdqu x2, (2*4*4)(out); \ | ||
578 | vpxor (3*4*4)(out), x3, x3; \ | ||
579 | vmovdqu x3, (3*4*4)(out); | ||
580 | |||
581 | .align 8 | ||
582 | .global __serpent_enc_blk_8way_avx | ||
583 | .type __serpent_enc_blk_8way_avx,@function; | ||
584 | |||
585 | __serpent_enc_blk_8way_avx: | ||
586 | /* input: | ||
587 | * %rdi: ctx, CTX | ||
588 | * %rsi: dst | ||
589 | * %rdx: src | ||
590 | * %rcx: bool, if true: xor output | ||
591 | */ | ||
592 | |||
593 | vpcmpeqd RNOT, RNOT, RNOT; | ||
594 | |||
595 | leaq (4*4*4)(%rdx), %rax; | ||
596 | read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
597 | read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
598 | |||
599 | K2(RA, RB, RC, RD, RE, 0); | ||
600 | S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); | ||
601 | S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); | ||
602 | S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); | ||
603 | S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); | ||
604 | S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); | ||
605 | S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); | ||
606 | S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); | ||
607 | S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); | ||
608 | S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); | ||
609 | S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); | ||
610 | S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); | ||
611 | S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); | ||
612 | S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); | ||
613 | S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); | ||
614 | S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); | ||
615 | S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); | ||
616 | S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); | ||
617 | S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); | ||
618 | S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); | ||
619 | S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); | ||
620 | S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); | ||
621 | S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); | ||
622 | S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); | ||
623 | S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); | ||
624 | S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); | ||
625 | S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); | ||
626 | S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); | ||
627 | S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); | ||
628 | S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); | ||
629 | S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); | ||
630 | S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); | ||
631 | S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); | ||
632 | |||
633 | leaq (4*4*4)(%rsi), %rax; | ||
634 | |||
635 | testb %cl, %cl; | ||
636 | jnz __enc_xor8; | ||
637 | |||
638 | write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
639 | write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
640 | |||
641 | ret; | ||
642 | |||
643 | __enc_xor8: | ||
644 | xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
645 | xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
646 | |||
647 | ret; | ||
648 | |||
649 | .align 8 | ||
650 | .global serpent_dec_blk_8way_avx | ||
651 | .type serpent_dec_blk_8way_avx,@function; | ||
652 | |||
653 | serpent_dec_blk_8way_avx: | ||
654 | /* input: | ||
655 | * %rdi: ctx, CTX | ||
656 | * %rsi: dst | ||
657 | * %rdx: src | ||
658 | */ | ||
659 | |||
660 | vpcmpeqd RNOT, RNOT, RNOT; | ||
661 | |||
662 | leaq (4*4*4)(%rdx), %rax; | ||
663 | read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
664 | read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
665 | |||
666 | K2(RA, RB, RC, RD, RE, 32); | ||
667 | SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); | ||
668 | SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); | ||
669 | SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); | ||
670 | SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); | ||
671 | SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); | ||
672 | SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); | ||
673 | SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); | ||
674 | SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); | ||
675 | SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); | ||
676 | SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); | ||
677 | SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); | ||
678 | SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); | ||
679 | SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); | ||
680 | SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); | ||
681 | SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); | ||
682 | SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); | ||
683 | SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); | ||
684 | SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); | ||
685 | SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); | ||
686 | SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); | ||
687 | SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); | ||
688 | SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); | ||
689 | SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); | ||
690 | SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); | ||
691 | SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); | ||
692 | SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); | ||
693 | SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); | ||
694 | SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); | ||
695 | SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); | ||
696 | SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); | ||
697 | SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); | ||
698 | S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); | ||
699 | |||
700 | leaq (4*4*4)(%rsi), %rax; | ||
701 | write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); | ||
702 | write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); | ||
703 | |||
704 | ret; | ||
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c new file mode 100644 index 000000000000..b36bdac237eb --- /dev/null +++ b/arch/x86/crypto/serpent_avx_glue.c | |||
@@ -0,0 +1,636 @@ | |||
1 | /* | ||
2 | * Glue Code for AVX assembler versions of Serpent Cipher | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * Glue code based on serpent_sse2_glue.c by: | ||
8 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/module.h> | ||
28 | #include <linux/hardirq.h> | ||
29 | #include <linux/types.h> | ||
30 | #include <linux/crypto.h> | ||
31 | #include <linux/err.h> | ||
32 | #include <crypto/algapi.h> | ||
33 | #include <crypto/serpent.h> | ||
34 | #include <crypto/cryptd.h> | ||
35 | #include <crypto/b128ops.h> | ||
36 | #include <crypto/ctr.h> | ||
37 | #include <crypto/lrw.h> | ||
38 | #include <crypto/xts.h> | ||
39 | #include <asm/xcr.h> | ||
40 | #include <asm/xsave.h> | ||
41 | #include <asm/crypto/serpent-avx.h> | ||
42 | #include <asm/crypto/ablk_helper.h> | ||
43 | #include <asm/crypto/glue_helper.h> | ||
44 | |||
45 | static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) | ||
46 | { | ||
47 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; | ||
48 | unsigned int j; | ||
49 | |||
50 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) | ||
51 | ivs[j] = src[j]; | ||
52 | |||
53 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
54 | |||
55 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) | ||
56 | u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); | ||
57 | } | ||
58 | |||
59 | static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) | ||
60 | { | ||
61 | be128 ctrblk; | ||
62 | |||
63 | u128_to_be128(&ctrblk, iv); | ||
64 | u128_inc(iv); | ||
65 | |||
66 | __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); | ||
67 | u128_xor(dst, src, (u128 *)&ctrblk); | ||
68 | } | ||
69 | |||
70 | static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, | ||
71 | u128 *iv) | ||
72 | { | ||
73 | be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; | ||
74 | unsigned int i; | ||
75 | |||
76 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { | ||
77 | if (dst != src) | ||
78 | dst[i] = src[i]; | ||
79 | |||
80 | u128_to_be128(&ctrblks[i], iv); | ||
81 | u128_inc(iv); | ||
82 | } | ||
83 | |||
84 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); | ||
85 | } | ||
86 | |||
87 | static const struct common_glue_ctx serpent_enc = { | ||
88 | .num_funcs = 2, | ||
89 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
90 | |||
91 | .funcs = { { | ||
92 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
93 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } | ||
94 | }, { | ||
95 | .num_blocks = 1, | ||
96 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } | ||
97 | } } | ||
98 | }; | ||
99 | |||
100 | static const struct common_glue_ctx serpent_ctr = { | ||
101 | .num_funcs = 2, | ||
102 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
103 | |||
104 | .funcs = { { | ||
105 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
106 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } | ||
107 | }, { | ||
108 | .num_blocks = 1, | ||
109 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } | ||
110 | } } | ||
111 | }; | ||
112 | |||
113 | static const struct common_glue_ctx serpent_dec = { | ||
114 | .num_funcs = 2, | ||
115 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
116 | |||
117 | .funcs = { { | ||
118 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
119 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } | ||
120 | }, { | ||
121 | .num_blocks = 1, | ||
122 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } | ||
123 | } } | ||
124 | }; | ||
125 | |||
126 | static const struct common_glue_ctx serpent_dec_cbc = { | ||
127 | .num_funcs = 2, | ||
128 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
129 | |||
130 | .funcs = { { | ||
131 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
132 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } | ||
133 | }, { | ||
134 | .num_blocks = 1, | ||
135 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } | ||
136 | } } | ||
137 | }; | ||
138 | |||
139 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
140 | struct scatterlist *src, unsigned int nbytes) | ||
141 | { | ||
142 | return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); | ||
143 | } | ||
144 | |||
145 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
146 | struct scatterlist *src, unsigned int nbytes) | ||
147 | { | ||
148 | return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); | ||
149 | } | ||
150 | |||
151 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
152 | struct scatterlist *src, unsigned int nbytes) | ||
153 | { | ||
154 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, | ||
155 | dst, src, nbytes); | ||
156 | } | ||
157 | |||
158 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
159 | struct scatterlist *src, unsigned int nbytes) | ||
160 | { | ||
161 | return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, | ||
162 | nbytes); | ||
163 | } | ||
164 | |||
165 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
166 | struct scatterlist *src, unsigned int nbytes) | ||
167 | { | ||
168 | return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); | ||
169 | } | ||
170 | |||
171 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
172 | { | ||
173 | return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, | ||
174 | NULL, fpu_enabled, nbytes); | ||
175 | } | ||
176 | |||
177 | static inline void serpent_fpu_end(bool fpu_enabled) | ||
178 | { | ||
179 | glue_fpu_end(fpu_enabled); | ||
180 | } | ||
181 | |||
182 | struct crypt_priv { | ||
183 | struct serpent_ctx *ctx; | ||
184 | bool fpu_enabled; | ||
185 | }; | ||
186 | |||
187 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
188 | { | ||
189 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
190 | struct crypt_priv *ctx = priv; | ||
191 | int i; | ||
192 | |||
193 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
194 | |||
195 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | ||
196 | serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); | ||
197 | return; | ||
198 | } | ||
199 | |||
200 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
201 | __serpent_encrypt(ctx->ctx, srcdst, srcdst); | ||
202 | } | ||
203 | |||
204 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
205 | { | ||
206 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
207 | struct crypt_priv *ctx = priv; | ||
208 | int i; | ||
209 | |||
210 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
211 | |||
212 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | ||
213 | serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); | ||
214 | return; | ||
215 | } | ||
216 | |||
217 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
218 | __serpent_decrypt(ctx->ctx, srcdst, srcdst); | ||
219 | } | ||
220 | |||
221 | struct serpent_lrw_ctx { | ||
222 | struct lrw_table_ctx lrw_table; | ||
223 | struct serpent_ctx serpent_ctx; | ||
224 | }; | ||
225 | |||
226 | static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
227 | unsigned int keylen) | ||
228 | { | ||
229 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
230 | int err; | ||
231 | |||
232 | err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - | ||
233 | SERPENT_BLOCK_SIZE); | ||
234 | if (err) | ||
235 | return err; | ||
236 | |||
237 | return lrw_init_table(&ctx->lrw_table, key + keylen - | ||
238 | SERPENT_BLOCK_SIZE); | ||
239 | } | ||
240 | |||
241 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
242 | struct scatterlist *src, unsigned int nbytes) | ||
243 | { | ||
244 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
245 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
246 | struct crypt_priv crypt_ctx = { | ||
247 | .ctx = &ctx->serpent_ctx, | ||
248 | .fpu_enabled = false, | ||
249 | }; | ||
250 | struct lrw_crypt_req req = { | ||
251 | .tbuf = buf, | ||
252 | .tbuflen = sizeof(buf), | ||
253 | |||
254 | .table_ctx = &ctx->lrw_table, | ||
255 | .crypt_ctx = &crypt_ctx, | ||
256 | .crypt_fn = encrypt_callback, | ||
257 | }; | ||
258 | int ret; | ||
259 | |||
260 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
261 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
262 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
263 | |||
264 | return ret; | ||
265 | } | ||
266 | |||
267 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
268 | struct scatterlist *src, unsigned int nbytes) | ||
269 | { | ||
270 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
271 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
272 | struct crypt_priv crypt_ctx = { | ||
273 | .ctx = &ctx->serpent_ctx, | ||
274 | .fpu_enabled = false, | ||
275 | }; | ||
276 | struct lrw_crypt_req req = { | ||
277 | .tbuf = buf, | ||
278 | .tbuflen = sizeof(buf), | ||
279 | |||
280 | .table_ctx = &ctx->lrw_table, | ||
281 | .crypt_ctx = &crypt_ctx, | ||
282 | .crypt_fn = decrypt_callback, | ||
283 | }; | ||
284 | int ret; | ||
285 | |||
286 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
287 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
288 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
289 | |||
290 | return ret; | ||
291 | } | ||
292 | |||
293 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | ||
294 | { | ||
295 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
296 | |||
297 | lrw_free_table(&ctx->lrw_table); | ||
298 | } | ||
299 | |||
300 | struct serpent_xts_ctx { | ||
301 | struct serpent_ctx tweak_ctx; | ||
302 | struct serpent_ctx crypt_ctx; | ||
303 | }; | ||
304 | |||
305 | static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
306 | unsigned int keylen) | ||
307 | { | ||
308 | struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
309 | u32 *flags = &tfm->crt_flags; | ||
310 | int err; | ||
311 | |||
312 | /* key consists of keys of equal size concatenated, therefore | ||
313 | * the length must be even | ||
314 | */ | ||
315 | if (keylen % 2) { | ||
316 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
317 | return -EINVAL; | ||
318 | } | ||
319 | |||
320 | /* first half of xts-key is for crypt */ | ||
321 | err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); | ||
322 | if (err) | ||
323 | return err; | ||
324 | |||
325 | /* second half of xts-key is for tweak */ | ||
326 | return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); | ||
327 | } | ||
328 | |||
329 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
330 | struct scatterlist *src, unsigned int nbytes) | ||
331 | { | ||
332 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
333 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
334 | struct crypt_priv crypt_ctx = { | ||
335 | .ctx = &ctx->crypt_ctx, | ||
336 | .fpu_enabled = false, | ||
337 | }; | ||
338 | struct xts_crypt_req req = { | ||
339 | .tbuf = buf, | ||
340 | .tbuflen = sizeof(buf), | ||
341 | |||
342 | .tweak_ctx = &ctx->tweak_ctx, | ||
343 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
344 | .crypt_ctx = &crypt_ctx, | ||
345 | .crypt_fn = encrypt_callback, | ||
346 | }; | ||
347 | int ret; | ||
348 | |||
349 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
350 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
351 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
352 | |||
353 | return ret; | ||
354 | } | ||
355 | |||
356 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
357 | struct scatterlist *src, unsigned int nbytes) | ||
358 | { | ||
359 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
360 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
361 | struct crypt_priv crypt_ctx = { | ||
362 | .ctx = &ctx->crypt_ctx, | ||
363 | .fpu_enabled = false, | ||
364 | }; | ||
365 | struct xts_crypt_req req = { | ||
366 | .tbuf = buf, | ||
367 | .tbuflen = sizeof(buf), | ||
368 | |||
369 | .tweak_ctx = &ctx->tweak_ctx, | ||
370 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
371 | .crypt_ctx = &crypt_ctx, | ||
372 | .crypt_fn = decrypt_callback, | ||
373 | }; | ||
374 | int ret; | ||
375 | |||
376 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
377 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
378 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
379 | |||
380 | return ret; | ||
381 | } | ||
382 | |||
383 | static struct crypto_alg serpent_algs[10] = { { | ||
384 | .cra_name = "__ecb-serpent-avx", | ||
385 | .cra_driver_name = "__driver-ecb-serpent-avx", | ||
386 | .cra_priority = 0, | ||
387 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
388 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
389 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
390 | .cra_alignmask = 0, | ||
391 | .cra_type = &crypto_blkcipher_type, | ||
392 | .cra_module = THIS_MODULE, | ||
393 | .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list), | ||
394 | .cra_u = { | ||
395 | .blkcipher = { | ||
396 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
397 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
398 | .setkey = serpent_setkey, | ||
399 | .encrypt = ecb_encrypt, | ||
400 | .decrypt = ecb_decrypt, | ||
401 | }, | ||
402 | }, | ||
403 | }, { | ||
404 | .cra_name = "__cbc-serpent-avx", | ||
405 | .cra_driver_name = "__driver-cbc-serpent-avx", | ||
406 | .cra_priority = 0, | ||
407 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
408 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
409 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
410 | .cra_alignmask = 0, | ||
411 | .cra_type = &crypto_blkcipher_type, | ||
412 | .cra_module = THIS_MODULE, | ||
413 | .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list), | ||
414 | .cra_u = { | ||
415 | .blkcipher = { | ||
416 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
417 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
418 | .setkey = serpent_setkey, | ||
419 | .encrypt = cbc_encrypt, | ||
420 | .decrypt = cbc_decrypt, | ||
421 | }, | ||
422 | }, | ||
423 | }, { | ||
424 | .cra_name = "__ctr-serpent-avx", | ||
425 | .cra_driver_name = "__driver-ctr-serpent-avx", | ||
426 | .cra_priority = 0, | ||
427 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
428 | .cra_blocksize = 1, | ||
429 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
430 | .cra_alignmask = 0, | ||
431 | .cra_type = &crypto_blkcipher_type, | ||
432 | .cra_module = THIS_MODULE, | ||
433 | .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list), | ||
434 | .cra_u = { | ||
435 | .blkcipher = { | ||
436 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
437 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
438 | .ivsize = SERPENT_BLOCK_SIZE, | ||
439 | .setkey = serpent_setkey, | ||
440 | .encrypt = ctr_crypt, | ||
441 | .decrypt = ctr_crypt, | ||
442 | }, | ||
443 | }, | ||
444 | }, { | ||
445 | .cra_name = "__lrw-serpent-avx", | ||
446 | .cra_driver_name = "__driver-lrw-serpent-avx", | ||
447 | .cra_priority = 0, | ||
448 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
449 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
450 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | ||
451 | .cra_alignmask = 0, | ||
452 | .cra_type = &crypto_blkcipher_type, | ||
453 | .cra_module = THIS_MODULE, | ||
454 | .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list), | ||
455 | .cra_exit = lrw_exit_tfm, | ||
456 | .cra_u = { | ||
457 | .blkcipher = { | ||
458 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
459 | SERPENT_BLOCK_SIZE, | ||
460 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
461 | SERPENT_BLOCK_SIZE, | ||
462 | .ivsize = SERPENT_BLOCK_SIZE, | ||
463 | .setkey = lrw_serpent_setkey, | ||
464 | .encrypt = lrw_encrypt, | ||
465 | .decrypt = lrw_decrypt, | ||
466 | }, | ||
467 | }, | ||
468 | }, { | ||
469 | .cra_name = "__xts-serpent-avx", | ||
470 | .cra_driver_name = "__driver-xts-serpent-avx", | ||
471 | .cra_priority = 0, | ||
472 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
473 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
474 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | ||
475 | .cra_alignmask = 0, | ||
476 | .cra_type = &crypto_blkcipher_type, | ||
477 | .cra_module = THIS_MODULE, | ||
478 | .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list), | ||
479 | .cra_u = { | ||
480 | .blkcipher = { | ||
481 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
482 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
483 | .ivsize = SERPENT_BLOCK_SIZE, | ||
484 | .setkey = xts_serpent_setkey, | ||
485 | .encrypt = xts_encrypt, | ||
486 | .decrypt = xts_decrypt, | ||
487 | }, | ||
488 | }, | ||
489 | }, { | ||
490 | .cra_name = "ecb(serpent)", | ||
491 | .cra_driver_name = "ecb-serpent-avx", | ||
492 | .cra_priority = 500, | ||
493 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
494 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
495 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
496 | .cra_alignmask = 0, | ||
497 | .cra_type = &crypto_ablkcipher_type, | ||
498 | .cra_module = THIS_MODULE, | ||
499 | .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list), | ||
500 | .cra_init = ablk_init, | ||
501 | .cra_exit = ablk_exit, | ||
502 | .cra_u = { | ||
503 | .ablkcipher = { | ||
504 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
505 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
506 | .setkey = ablk_set_key, | ||
507 | .encrypt = ablk_encrypt, | ||
508 | .decrypt = ablk_decrypt, | ||
509 | }, | ||
510 | }, | ||
511 | }, { | ||
512 | .cra_name = "cbc(serpent)", | ||
513 | .cra_driver_name = "cbc-serpent-avx", | ||
514 | .cra_priority = 500, | ||
515 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
516 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
517 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
518 | .cra_alignmask = 0, | ||
519 | .cra_type = &crypto_ablkcipher_type, | ||
520 | .cra_module = THIS_MODULE, | ||
521 | .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list), | ||
522 | .cra_init = ablk_init, | ||
523 | .cra_exit = ablk_exit, | ||
524 | .cra_u = { | ||
525 | .ablkcipher = { | ||
526 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
527 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
528 | .ivsize = SERPENT_BLOCK_SIZE, | ||
529 | .setkey = ablk_set_key, | ||
530 | .encrypt = __ablk_encrypt, | ||
531 | .decrypt = ablk_decrypt, | ||
532 | }, | ||
533 | }, | ||
534 | }, { | ||
535 | .cra_name = "ctr(serpent)", | ||
536 | .cra_driver_name = "ctr-serpent-avx", | ||
537 | .cra_priority = 500, | ||
538 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
539 | .cra_blocksize = 1, | ||
540 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
541 | .cra_alignmask = 0, | ||
542 | .cra_type = &crypto_ablkcipher_type, | ||
543 | .cra_module = THIS_MODULE, | ||
544 | .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list), | ||
545 | .cra_init = ablk_init, | ||
546 | .cra_exit = ablk_exit, | ||
547 | .cra_u = { | ||
548 | .ablkcipher = { | ||
549 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
550 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
551 | .ivsize = SERPENT_BLOCK_SIZE, | ||
552 | .setkey = ablk_set_key, | ||
553 | .encrypt = ablk_encrypt, | ||
554 | .decrypt = ablk_encrypt, | ||
555 | .geniv = "chainiv", | ||
556 | }, | ||
557 | }, | ||
558 | }, { | ||
559 | .cra_name = "lrw(serpent)", | ||
560 | .cra_driver_name = "lrw-serpent-avx", | ||
561 | .cra_priority = 500, | ||
562 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
563 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
564 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
565 | .cra_alignmask = 0, | ||
566 | .cra_type = &crypto_ablkcipher_type, | ||
567 | .cra_module = THIS_MODULE, | ||
568 | .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list), | ||
569 | .cra_init = ablk_init, | ||
570 | .cra_exit = ablk_exit, | ||
571 | .cra_u = { | ||
572 | .ablkcipher = { | ||
573 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
574 | SERPENT_BLOCK_SIZE, | ||
575 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
576 | SERPENT_BLOCK_SIZE, | ||
577 | .ivsize = SERPENT_BLOCK_SIZE, | ||
578 | .setkey = ablk_set_key, | ||
579 | .encrypt = ablk_encrypt, | ||
580 | .decrypt = ablk_decrypt, | ||
581 | }, | ||
582 | }, | ||
583 | }, { | ||
584 | .cra_name = "xts(serpent)", | ||
585 | .cra_driver_name = "xts-serpent-avx", | ||
586 | .cra_priority = 500, | ||
587 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
588 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
589 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
590 | .cra_alignmask = 0, | ||
591 | .cra_type = &crypto_ablkcipher_type, | ||
592 | .cra_module = THIS_MODULE, | ||
593 | .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list), | ||
594 | .cra_init = ablk_init, | ||
595 | .cra_exit = ablk_exit, | ||
596 | .cra_u = { | ||
597 | .ablkcipher = { | ||
598 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
599 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
600 | .ivsize = SERPENT_BLOCK_SIZE, | ||
601 | .setkey = ablk_set_key, | ||
602 | .encrypt = ablk_encrypt, | ||
603 | .decrypt = ablk_decrypt, | ||
604 | }, | ||
605 | }, | ||
606 | } }; | ||
607 | |||
608 | static int __init serpent_init(void) | ||
609 | { | ||
610 | u64 xcr0; | ||
611 | |||
612 | if (!cpu_has_avx || !cpu_has_osxsave) { | ||
613 | printk(KERN_INFO "AVX instructions are not detected.\n"); | ||
614 | return -ENODEV; | ||
615 | } | ||
616 | |||
617 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
618 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
619 | printk(KERN_INFO "AVX detected but unusable.\n"); | ||
620 | return -ENODEV; | ||
621 | } | ||
622 | |||
623 | return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); | ||
624 | } | ||
625 | |||
626 | static void __exit serpent_exit(void) | ||
627 | { | ||
628 | crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); | ||
629 | } | ||
630 | |||
631 | module_init(serpent_init); | ||
632 | module_exit(serpent_exit); | ||
633 | |||
634 | MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized"); | ||
635 | MODULE_LICENSE("GPL"); | ||
636 | MODULE_ALIAS("serpent"); | ||
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 4b21be85e0a1..d679c8675f4a 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c | |||
@@ -41,358 +41,145 @@ | |||
41 | #include <crypto/ctr.h> | 41 | #include <crypto/ctr.h> |
42 | #include <crypto/lrw.h> | 42 | #include <crypto/lrw.h> |
43 | #include <crypto/xts.h> | 43 | #include <crypto/xts.h> |
44 | #include <asm/i387.h> | 44 | #include <asm/crypto/serpent-sse2.h> |
45 | #include <asm/serpent.h> | 45 | #include <asm/crypto/ablk_helper.h> |
46 | #include <crypto/scatterwalk.h> | 46 | #include <asm/crypto/glue_helper.h> |
47 | #include <linux/workqueue.h> | ||
48 | #include <linux/spinlock.h> | ||
49 | |||
50 | struct async_serpent_ctx { | ||
51 | struct cryptd_ablkcipher *cryptd_tfm; | ||
52 | }; | ||
53 | 47 | ||
54 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) | 48 | static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) |
55 | { | ||
56 | if (fpu_enabled) | ||
57 | return true; | ||
58 | |||
59 | /* SSE2 is only used when chunk to be processed is large enough, so | ||
60 | * do not enable FPU until it is necessary. | ||
61 | */ | ||
62 | if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS) | ||
63 | return false; | ||
64 | |||
65 | kernel_fpu_begin(); | ||
66 | return true; | ||
67 | } | ||
68 | |||
69 | static inline void serpent_fpu_end(bool fpu_enabled) | ||
70 | { | 49 | { |
71 | if (fpu_enabled) | 50 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; |
72 | kernel_fpu_end(); | 51 | unsigned int j; |
73 | } | ||
74 | |||
75 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | ||
76 | bool enc) | ||
77 | { | ||
78 | bool fpu_enabled = false; | ||
79 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
80 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
81 | unsigned int nbytes; | ||
82 | int err; | ||
83 | |||
84 | err = blkcipher_walk_virt(desc, walk); | ||
85 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
86 | |||
87 | while ((nbytes = walk->nbytes)) { | ||
88 | u8 *wsrc = walk->src.virt.addr; | ||
89 | u8 *wdst = walk->dst.virt.addr; | ||
90 | |||
91 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
92 | |||
93 | /* Process multi-block batch */ | ||
94 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
95 | do { | ||
96 | if (enc) | ||
97 | serpent_enc_blk_xway(ctx, wdst, wsrc); | ||
98 | else | ||
99 | serpent_dec_blk_xway(ctx, wdst, wsrc); | ||
100 | |||
101 | wsrc += bsize * SERPENT_PARALLEL_BLOCKS; | ||
102 | wdst += bsize * SERPENT_PARALLEL_BLOCKS; | ||
103 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | ||
104 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | ||
105 | |||
106 | if (nbytes < bsize) | ||
107 | goto done; | ||
108 | } | ||
109 | |||
110 | /* Handle leftovers */ | ||
111 | do { | ||
112 | if (enc) | ||
113 | __serpent_encrypt(ctx, wdst, wsrc); | ||
114 | else | ||
115 | __serpent_decrypt(ctx, wdst, wsrc); | ||
116 | |||
117 | wsrc += bsize; | ||
118 | wdst += bsize; | ||
119 | nbytes -= bsize; | ||
120 | } while (nbytes >= bsize); | ||
121 | |||
122 | done: | ||
123 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
124 | } | ||
125 | 52 | ||
126 | serpent_fpu_end(fpu_enabled); | 53 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) |
127 | return err; | 54 | ivs[j] = src[j]; |
128 | } | ||
129 | 55 | ||
130 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 56 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); |
131 | struct scatterlist *src, unsigned int nbytes) | ||
132 | { | ||
133 | struct blkcipher_walk walk; | ||
134 | 57 | ||
135 | blkcipher_walk_init(&walk, dst, src, nbytes); | 58 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) |
136 | return ecb_crypt(desc, &walk, true); | 59 | u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); |
137 | } | 60 | } |
138 | 61 | ||
139 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 62 | static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) |
140 | struct scatterlist *src, unsigned int nbytes) | ||
141 | { | 63 | { |
142 | struct blkcipher_walk walk; | 64 | be128 ctrblk; |
143 | 65 | ||
144 | blkcipher_walk_init(&walk, dst, src, nbytes); | 66 | u128_to_be128(&ctrblk, iv); |
145 | return ecb_crypt(desc, &walk, false); | 67 | u128_inc(iv); |
146 | } | ||
147 | 68 | ||
148 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | 69 | __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); |
149 | struct blkcipher_walk *walk) | 70 | u128_xor(dst, src, (u128 *)&ctrblk); |
150 | { | ||
151 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
152 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
153 | unsigned int nbytes = walk->nbytes; | ||
154 | u128 *src = (u128 *)walk->src.virt.addr; | ||
155 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
156 | u128 *iv = (u128 *)walk->iv; | ||
157 | |||
158 | do { | ||
159 | u128_xor(dst, src, iv); | ||
160 | __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst); | ||
161 | iv = dst; | ||
162 | |||
163 | src += 1; | ||
164 | dst += 1; | ||
165 | nbytes -= bsize; | ||
166 | } while (nbytes >= bsize); | ||
167 | |||
168 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
169 | return nbytes; | ||
170 | } | 71 | } |
171 | 72 | ||
172 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 73 | static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, |
173 | struct scatterlist *src, unsigned int nbytes) | 74 | u128 *iv) |
174 | { | 75 | { |
175 | struct blkcipher_walk walk; | 76 | be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; |
176 | int err; | 77 | unsigned int i; |
177 | 78 | ||
178 | blkcipher_walk_init(&walk, dst, src, nbytes); | 79 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { |
179 | err = blkcipher_walk_virt(desc, &walk); | 80 | if (dst != src) |
81 | dst[i] = src[i]; | ||
180 | 82 | ||
181 | while ((nbytes = walk.nbytes)) { | 83 | u128_to_be128(&ctrblks[i], iv); |
182 | nbytes = __cbc_encrypt(desc, &walk); | 84 | u128_inc(iv); |
183 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
184 | } | 85 | } |
185 | 86 | ||
186 | return err; | 87 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); |
187 | } | 88 | } |
188 | 89 | ||
189 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | 90 | static const struct common_glue_ctx serpent_enc = { |
190 | struct blkcipher_walk *walk) | 91 | .num_funcs = 2, |
191 | { | 92 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, |
192 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
193 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
194 | unsigned int nbytes = walk->nbytes; | ||
195 | u128 *src = (u128 *)walk->src.virt.addr; | ||
196 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
197 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; | ||
198 | u128 last_iv; | ||
199 | int i; | ||
200 | |||
201 | /* Start of the last block. */ | ||
202 | src += nbytes / bsize - 1; | ||
203 | dst += nbytes / bsize - 1; | ||
204 | |||
205 | last_iv = *src; | ||
206 | |||
207 | /* Process multi-block batch */ | ||
208 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
209 | do { | ||
210 | nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1); | ||
211 | src -= SERPENT_PARALLEL_BLOCKS - 1; | ||
212 | dst -= SERPENT_PARALLEL_BLOCKS - 1; | ||
213 | |||
214 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) | ||
215 | ivs[i] = src[i]; | ||
216 | |||
217 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
218 | |||
219 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) | ||
220 | u128_xor(dst + (i + 1), dst + (i + 1), ivs + i); | ||
221 | |||
222 | nbytes -= bsize; | ||
223 | if (nbytes < bsize) | ||
224 | goto done; | ||
225 | 93 | ||
226 | u128_xor(dst, dst, src - 1); | 94 | .funcs = { { |
227 | src -= 1; | 95 | .num_blocks = SERPENT_PARALLEL_BLOCKS, |
228 | dst -= 1; | 96 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } |
229 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | 97 | }, { |
230 | 98 | .num_blocks = 1, | |
231 | if (nbytes < bsize) | 99 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } |
232 | goto done; | 100 | } } |
233 | } | 101 | }; |
234 | |||
235 | /* Handle leftovers */ | ||
236 | for (;;) { | ||
237 | __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src); | ||
238 | |||
239 | nbytes -= bsize; | ||
240 | if (nbytes < bsize) | ||
241 | break; | ||
242 | 102 | ||
243 | u128_xor(dst, dst, src - 1); | 103 | static const struct common_glue_ctx serpent_ctr = { |
244 | src -= 1; | 104 | .num_funcs = 2, |
245 | dst -= 1; | 105 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, |
246 | } | 106 | |
107 | .funcs = { { | ||
108 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
109 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } | ||
110 | }, { | ||
111 | .num_blocks = 1, | ||
112 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } | ||
113 | } } | ||
114 | }; | ||
247 | 115 | ||
248 | done: | 116 | static const struct common_glue_ctx serpent_dec = { |
249 | u128_xor(dst, dst, (u128 *)walk->iv); | 117 | .num_funcs = 2, |
250 | *(u128 *)walk->iv = last_iv; | 118 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, |
119 | |||
120 | .funcs = { { | ||
121 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
122 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } | ||
123 | }, { | ||
124 | .num_blocks = 1, | ||
125 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } | ||
126 | } } | ||
127 | }; | ||
251 | 128 | ||
252 | return nbytes; | 129 | static const struct common_glue_ctx serpent_dec_cbc = { |
253 | } | 130 | .num_funcs = 2, |
131 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
132 | |||
133 | .funcs = { { | ||
134 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
135 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } | ||
136 | }, { | ||
137 | .num_blocks = 1, | ||
138 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } | ||
139 | } } | ||
140 | }; | ||
254 | 141 | ||
255 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 142 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
256 | struct scatterlist *src, unsigned int nbytes) | 143 | struct scatterlist *src, unsigned int nbytes) |
257 | { | 144 | { |
258 | bool fpu_enabled = false; | 145 | return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); |
259 | struct blkcipher_walk walk; | ||
260 | int err; | ||
261 | |||
262 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
263 | err = blkcipher_walk_virt(desc, &walk); | ||
264 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
265 | |||
266 | while ((nbytes = walk.nbytes)) { | ||
267 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
268 | nbytes = __cbc_decrypt(desc, &walk); | ||
269 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
270 | } | ||
271 | |||
272 | serpent_fpu_end(fpu_enabled); | ||
273 | return err; | ||
274 | } | 146 | } |
275 | 147 | ||
276 | static inline void u128_to_be128(be128 *dst, const u128 *src) | 148 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
149 | struct scatterlist *src, unsigned int nbytes) | ||
277 | { | 150 | { |
278 | dst->a = cpu_to_be64(src->a); | 151 | return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); |
279 | dst->b = cpu_to_be64(src->b); | ||
280 | } | 152 | } |
281 | 153 | ||
282 | static inline void be128_to_u128(u128 *dst, const be128 *src) | 154 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
155 | struct scatterlist *src, unsigned int nbytes) | ||
283 | { | 156 | { |
284 | dst->a = be64_to_cpu(src->a); | 157 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, |
285 | dst->b = be64_to_cpu(src->b); | 158 | dst, src, nbytes); |
286 | } | 159 | } |
287 | 160 | ||
288 | static inline void u128_inc(u128 *i) | 161 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
162 | struct scatterlist *src, unsigned int nbytes) | ||
289 | { | 163 | { |
290 | i->b++; | 164 | return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, |
291 | if (!i->b) | 165 | nbytes); |
292 | i->a++; | ||
293 | } | 166 | } |
294 | 167 | ||
295 | static void ctr_crypt_final(struct blkcipher_desc *desc, | 168 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
296 | struct blkcipher_walk *walk) | 169 | struct scatterlist *src, unsigned int nbytes) |
297 | { | 170 | { |
298 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 171 | return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); |
299 | u8 *ctrblk = walk->iv; | ||
300 | u8 keystream[SERPENT_BLOCK_SIZE]; | ||
301 | u8 *src = walk->src.virt.addr; | ||
302 | u8 *dst = walk->dst.virt.addr; | ||
303 | unsigned int nbytes = walk->nbytes; | ||
304 | |||
305 | __serpent_encrypt(ctx, keystream, ctrblk); | ||
306 | crypto_xor(keystream, src, nbytes); | ||
307 | memcpy(dst, keystream, nbytes); | ||
308 | |||
309 | crypto_inc(ctrblk, SERPENT_BLOCK_SIZE); | ||
310 | } | 172 | } |
311 | 173 | ||
312 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | 174 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) |
313 | struct blkcipher_walk *walk) | ||
314 | { | 175 | { |
315 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 176 | return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, |
316 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | 177 | NULL, fpu_enabled, nbytes); |
317 | unsigned int nbytes = walk->nbytes; | ||
318 | u128 *src = (u128 *)walk->src.virt.addr; | ||
319 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
320 | u128 ctrblk; | ||
321 | be128 ctrblocks[SERPENT_PARALLEL_BLOCKS]; | ||
322 | int i; | ||
323 | |||
324 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
325 | |||
326 | /* Process multi-block batch */ | ||
327 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
328 | do { | ||
329 | /* create ctrblks for parallel encrypt */ | ||
330 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { | ||
331 | if (dst != src) | ||
332 | dst[i] = src[i]; | ||
333 | |||
334 | u128_to_be128(&ctrblocks[i], &ctrblk); | ||
335 | u128_inc(&ctrblk); | ||
336 | } | ||
337 | |||
338 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, | ||
339 | (u8 *)ctrblocks); | ||
340 | |||
341 | src += SERPENT_PARALLEL_BLOCKS; | ||
342 | dst += SERPENT_PARALLEL_BLOCKS; | ||
343 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | ||
344 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | ||
345 | |||
346 | if (nbytes < bsize) | ||
347 | goto done; | ||
348 | } | ||
349 | |||
350 | /* Handle leftovers */ | ||
351 | do { | ||
352 | if (dst != src) | ||
353 | *dst = *src; | ||
354 | |||
355 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
356 | u128_inc(&ctrblk); | ||
357 | |||
358 | __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); | ||
359 | u128_xor(dst, dst, (u128 *)ctrblocks); | ||
360 | |||
361 | src += 1; | ||
362 | dst += 1; | ||
363 | nbytes -= bsize; | ||
364 | } while (nbytes >= bsize); | ||
365 | |||
366 | done: | ||
367 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
368 | return nbytes; | ||
369 | } | 178 | } |
370 | 179 | ||
371 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 180 | static inline void serpent_fpu_end(bool fpu_enabled) |
372 | struct scatterlist *src, unsigned int nbytes) | ||
373 | { | 181 | { |
374 | bool fpu_enabled = false; | 182 | glue_fpu_end(fpu_enabled); |
375 | struct blkcipher_walk walk; | ||
376 | int err; | ||
377 | |||
378 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
379 | err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE); | ||
380 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
381 | |||
382 | while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) { | ||
383 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
384 | nbytes = __ctr_crypt(desc, &walk); | ||
385 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
386 | } | ||
387 | |||
388 | serpent_fpu_end(fpu_enabled); | ||
389 | |||
390 | if (walk.nbytes) { | ||
391 | ctr_crypt_final(desc, &walk); | ||
392 | err = blkcipher_walk_done(desc, &walk, 0); | ||
393 | } | ||
394 | |||
395 | return err; | ||
396 | } | 183 | } |
397 | 184 | ||
398 | struct crypt_priv { | 185 | struct crypt_priv { |
@@ -596,106 +383,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
596 | return ret; | 383 | return ret; |
597 | } | 384 | } |
598 | 385 | ||
599 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
600 | unsigned int key_len) | ||
601 | { | ||
602 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
603 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; | ||
604 | int err; | ||
605 | |||
606 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
607 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) | ||
608 | & CRYPTO_TFM_REQ_MASK); | ||
609 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
610 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
611 | & CRYPTO_TFM_RES_MASK); | ||
612 | return err; | ||
613 | } | ||
614 | |||
615 | static int __ablk_encrypt(struct ablkcipher_request *req) | ||
616 | { | ||
617 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
618 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
619 | struct blkcipher_desc desc; | ||
620 | |||
621 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
622 | desc.info = req->info; | ||
623 | desc.flags = 0; | ||
624 | |||
625 | return crypto_blkcipher_crt(desc.tfm)->encrypt( | ||
626 | &desc, req->dst, req->src, req->nbytes); | ||
627 | } | ||
628 | |||
629 | static int ablk_encrypt(struct ablkcipher_request *req) | ||
630 | { | ||
631 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
632 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
633 | |||
634 | if (!irq_fpu_usable()) { | ||
635 | struct ablkcipher_request *cryptd_req = | ||
636 | ablkcipher_request_ctx(req); | ||
637 | |||
638 | memcpy(cryptd_req, req, sizeof(*req)); | ||
639 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
640 | |||
641 | return crypto_ablkcipher_encrypt(cryptd_req); | ||
642 | } else { | ||
643 | return __ablk_encrypt(req); | ||
644 | } | ||
645 | } | ||
646 | |||
647 | static int ablk_decrypt(struct ablkcipher_request *req) | ||
648 | { | ||
649 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
650 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
651 | |||
652 | if (!irq_fpu_usable()) { | ||
653 | struct ablkcipher_request *cryptd_req = | ||
654 | ablkcipher_request_ctx(req); | ||
655 | |||
656 | memcpy(cryptd_req, req, sizeof(*req)); | ||
657 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
658 | |||
659 | return crypto_ablkcipher_decrypt(cryptd_req); | ||
660 | } else { | ||
661 | struct blkcipher_desc desc; | ||
662 | |||
663 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
664 | desc.info = req->info; | ||
665 | desc.flags = 0; | ||
666 | |||
667 | return crypto_blkcipher_crt(desc.tfm)->decrypt( | ||
668 | &desc, req->dst, req->src, req->nbytes); | ||
669 | } | ||
670 | } | ||
671 | |||
672 | static void ablk_exit(struct crypto_tfm *tfm) | ||
673 | { | ||
674 | struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); | ||
675 | |||
676 | cryptd_free_ablkcipher(ctx->cryptd_tfm); | ||
677 | } | ||
678 | |||
679 | static int ablk_init(struct crypto_tfm *tfm) | ||
680 | { | ||
681 | struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); | ||
682 | struct cryptd_ablkcipher *cryptd_tfm; | ||
683 | char drv_name[CRYPTO_MAX_ALG_NAME]; | ||
684 | |||
685 | snprintf(drv_name, sizeof(drv_name), "__driver-%s", | ||
686 | crypto_tfm_alg_driver_name(tfm)); | ||
687 | |||
688 | cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); | ||
689 | if (IS_ERR(cryptd_tfm)) | ||
690 | return PTR_ERR(cryptd_tfm); | ||
691 | |||
692 | ctx->cryptd_tfm = cryptd_tfm; | ||
693 | tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + | ||
694 | crypto_ablkcipher_reqsize(&cryptd_tfm->base); | ||
695 | |||
696 | return 0; | ||
697 | } | ||
698 | |||
699 | static struct crypto_alg serpent_algs[10] = { { | 386 | static struct crypto_alg serpent_algs[10] = { { |
700 | .cra_name = "__ecb-serpent-sse2", | 387 | .cra_name = "__ecb-serpent-sse2", |
701 | .cra_driver_name = "__driver-ecb-serpent-sse2", | 388 | .cra_driver_name = "__driver-ecb-serpent-sse2", |
@@ -808,7 +495,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
808 | .cra_priority = 400, | 495 | .cra_priority = 400, |
809 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 496 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
810 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 497 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
811 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 498 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
812 | .cra_alignmask = 0, | 499 | .cra_alignmask = 0, |
813 | .cra_type = &crypto_ablkcipher_type, | 500 | .cra_type = &crypto_ablkcipher_type, |
814 | .cra_module = THIS_MODULE, | 501 | .cra_module = THIS_MODULE, |
@@ -830,7 +517,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
830 | .cra_priority = 400, | 517 | .cra_priority = 400, |
831 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 518 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
832 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 519 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
833 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 520 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
834 | .cra_alignmask = 0, | 521 | .cra_alignmask = 0, |
835 | .cra_type = &crypto_ablkcipher_type, | 522 | .cra_type = &crypto_ablkcipher_type, |
836 | .cra_module = THIS_MODULE, | 523 | .cra_module = THIS_MODULE, |
@@ -853,7 +540,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
853 | .cra_priority = 400, | 540 | .cra_priority = 400, |
854 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 541 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
855 | .cra_blocksize = 1, | 542 | .cra_blocksize = 1, |
856 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 543 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
857 | .cra_alignmask = 0, | 544 | .cra_alignmask = 0, |
858 | .cra_type = &crypto_ablkcipher_type, | 545 | .cra_type = &crypto_ablkcipher_type, |
859 | .cra_module = THIS_MODULE, | 546 | .cra_module = THIS_MODULE, |
@@ -877,7 +564,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
877 | .cra_priority = 400, | 564 | .cra_priority = 400, |
878 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 565 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
879 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 566 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
880 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 567 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
881 | .cra_alignmask = 0, | 568 | .cra_alignmask = 0, |
882 | .cra_type = &crypto_ablkcipher_type, | 569 | .cra_type = &crypto_ablkcipher_type, |
883 | .cra_module = THIS_MODULE, | 570 | .cra_module = THIS_MODULE, |
@@ -902,7 +589,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
902 | .cra_priority = 400, | 589 | .cra_priority = 400, |
903 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 590 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
904 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 591 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
905 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 592 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
906 | .cra_alignmask = 0, | 593 | .cra_alignmask = 0, |
907 | .cra_type = &crypto_ablkcipher_type, | 594 | .cra_type = &crypto_ablkcipher_type, |
908 | .cra_module = THIS_MODULE, | 595 | .cra_module = THIS_MODULE, |
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index b2c2f57d70e8..49d6987a73d9 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S | |||
@@ -468,7 +468,7 @@ W_PRECALC_SSSE3 | |||
468 | */ | 468 | */ |
469 | SHA1_VECTOR_ASM sha1_transform_ssse3 | 469 | SHA1_VECTOR_ASM sha1_transform_ssse3 |
470 | 470 | ||
471 | #ifdef SHA1_ENABLE_AVX_SUPPORT | 471 | #ifdef CONFIG_AS_AVX |
472 | 472 | ||
473 | .macro W_PRECALC_AVX | 473 | .macro W_PRECALC_AVX |
474 | 474 | ||
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index f916499d0abe..4a11a9d72451 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c | |||
@@ -35,7 +35,7 @@ | |||
35 | 35 | ||
36 | asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, | 36 | asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, |
37 | unsigned int rounds); | 37 | unsigned int rounds); |
38 | #ifdef SHA1_ENABLE_AVX_SUPPORT | 38 | #ifdef CONFIG_AS_AVX |
39 | asmlinkage void sha1_transform_avx(u32 *digest, const char *data, | 39 | asmlinkage void sha1_transform_avx(u32 *digest, const char *data, |
40 | unsigned int rounds); | 40 | unsigned int rounds); |
41 | #endif | 41 | #endif |
@@ -184,7 +184,7 @@ static struct shash_alg alg = { | |||
184 | } | 184 | } |
185 | }; | 185 | }; |
186 | 186 | ||
187 | #ifdef SHA1_ENABLE_AVX_SUPPORT | 187 | #ifdef CONFIG_AS_AVX |
188 | static bool __init avx_usable(void) | 188 | static bool __init avx_usable(void) |
189 | { | 189 | { |
190 | u64 xcr0; | 190 | u64 xcr0; |
@@ -209,7 +209,7 @@ static int __init sha1_ssse3_mod_init(void) | |||
209 | if (cpu_has_ssse3) | 209 | if (cpu_has_ssse3) |
210 | sha1_transform_asm = sha1_transform_ssse3; | 210 | sha1_transform_asm = sha1_transform_ssse3; |
211 | 211 | ||
212 | #ifdef SHA1_ENABLE_AVX_SUPPORT | 212 | #ifdef CONFIG_AS_AVX |
213 | /* allow AVX to override SSSE3, it's a little faster */ | 213 | /* allow AVX to override SSSE3, it's a little faster */ |
214 | if (avx_usable()) | 214 | if (avx_usable()) |
215 | sha1_transform_asm = sha1_transform_avx; | 215 | sha1_transform_asm = sha1_transform_avx; |
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S new file mode 100644 index 000000000000..35f45574390d --- /dev/null +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S | |||
@@ -0,0 +1,300 @@ | |||
1 | /* | ||
2 | * Twofish Cipher 8-way parallel algorithm (AVX/x86_64) | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
20 | * USA | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | .file "twofish-avx-x86_64-asm_64.S" | ||
25 | .text | ||
26 | |||
27 | /* structure of crypto context */ | ||
28 | #define s0 0 | ||
29 | #define s1 1024 | ||
30 | #define s2 2048 | ||
31 | #define s3 3072 | ||
32 | #define w 4096 | ||
33 | #define k 4128 | ||
34 | |||
35 | /********************************************************************** | ||
36 | 8-way AVX twofish | ||
37 | **********************************************************************/ | ||
38 | #define CTX %rdi | ||
39 | |||
40 | #define RA1 %xmm0 | ||
41 | #define RB1 %xmm1 | ||
42 | #define RC1 %xmm2 | ||
43 | #define RD1 %xmm3 | ||
44 | |||
45 | #define RA2 %xmm4 | ||
46 | #define RB2 %xmm5 | ||
47 | #define RC2 %xmm6 | ||
48 | #define RD2 %xmm7 | ||
49 | |||
50 | #define RX %xmm8 | ||
51 | #define RY %xmm9 | ||
52 | |||
53 | #define RK1 %xmm10 | ||
54 | #define RK2 %xmm11 | ||
55 | |||
56 | #define RID1 %rax | ||
57 | #define RID1b %al | ||
58 | #define RID2 %rbx | ||
59 | #define RID2b %bl | ||
60 | |||
61 | #define RGI1 %rdx | ||
62 | #define RGI1bl %dl | ||
63 | #define RGI1bh %dh | ||
64 | #define RGI2 %rcx | ||
65 | #define RGI2bl %cl | ||
66 | #define RGI2bh %ch | ||
67 | |||
68 | #define RGS1 %r8 | ||
69 | #define RGS1d %r8d | ||
70 | #define RGS2 %r9 | ||
71 | #define RGS2d %r9d | ||
72 | #define RGS3 %r10 | ||
73 | #define RGS3d %r10d | ||
74 | |||
75 | |||
76 | #define lookup_32bit(t0, t1, t2, t3, src, dst) \ | ||
77 | movb src ## bl, RID1b; \ | ||
78 | movb src ## bh, RID2b; \ | ||
79 | movl t0(CTX, RID1, 4), dst ## d; \ | ||
80 | xorl t1(CTX, RID2, 4), dst ## d; \ | ||
81 | shrq $16, src; \ | ||
82 | movb src ## bl, RID1b; \ | ||
83 | movb src ## bh, RID2b; \ | ||
84 | xorl t2(CTX, RID1, 4), dst ## d; \ | ||
85 | xorl t3(CTX, RID2, 4), dst ## d; | ||
86 | |||
87 | #define G(a, x, t0, t1, t2, t3) \ | ||
88 | vmovq a, RGI1; \ | ||
89 | vpsrldq $8, a, x; \ | ||
90 | vmovq x, RGI2; \ | ||
91 | \ | ||
92 | lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \ | ||
93 | shrq $16, RGI1; \ | ||
94 | lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \ | ||
95 | shlq $32, RGS2; \ | ||
96 | orq RGS1, RGS2; \ | ||
97 | \ | ||
98 | lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \ | ||
99 | shrq $16, RGI2; \ | ||
100 | lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \ | ||
101 | shlq $32, RGS3; \ | ||
102 | orq RGS1, RGS3; \ | ||
103 | \ | ||
104 | vmovq RGS2, x; \ | ||
105 | vpinsrq $1, RGS3, x, x; | ||
106 | |||
107 | #define encround(a, b, c, d, x, y) \ | ||
108 | G(a, x, s0, s1, s2, s3); \ | ||
109 | G(b, y, s1, s2, s3, s0); \ | ||
110 | vpaddd x, y, x; \ | ||
111 | vpaddd y, x, y; \ | ||
112 | vpaddd x, RK1, x; \ | ||
113 | vpaddd y, RK2, y; \ | ||
114 | vpxor x, c, c; \ | ||
115 | vpsrld $1, c, x; \ | ||
116 | vpslld $(32 - 1), c, c; \ | ||
117 | vpor c, x, c; \ | ||
118 | vpslld $1, d, x; \ | ||
119 | vpsrld $(32 - 1), d, d; \ | ||
120 | vpor d, x, d; \ | ||
121 | vpxor d, y, d; | ||
122 | |||
123 | #define decround(a, b, c, d, x, y) \ | ||
124 | G(a, x, s0, s1, s2, s3); \ | ||
125 | G(b, y, s1, s2, s3, s0); \ | ||
126 | vpaddd x, y, x; \ | ||
127 | vpaddd y, x, y; \ | ||
128 | vpaddd y, RK2, y; \ | ||
129 | vpxor d, y, d; \ | ||
130 | vpsrld $1, d, y; \ | ||
131 | vpslld $(32 - 1), d, d; \ | ||
132 | vpor d, y, d; \ | ||
133 | vpslld $1, c, y; \ | ||
134 | vpsrld $(32 - 1), c, c; \ | ||
135 | vpor c, y, c; \ | ||
136 | vpaddd x, RK1, x; \ | ||
137 | vpxor x, c, c; | ||
138 | |||
139 | #define encrypt_round(n, a, b, c, d) \ | ||
140 | vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ | ||
141 | vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ | ||
142 | encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ | ||
143 | encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); | ||
144 | |||
145 | #define decrypt_round(n, a, b, c, d) \ | ||
146 | vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ | ||
147 | vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ | ||
148 | decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ | ||
149 | decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); | ||
150 | |||
151 | #define encrypt_cycle(n) \ | ||
152 | encrypt_round((2*n), RA, RB, RC, RD); \ | ||
153 | encrypt_round(((2*n) + 1), RC, RD, RA, RB); | ||
154 | |||
155 | #define decrypt_cycle(n) \ | ||
156 | decrypt_round(((2*n) + 1), RC, RD, RA, RB); \ | ||
157 | decrypt_round((2*n), RA, RB, RC, RD); | ||
158 | |||
159 | |||
160 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
161 | vpunpckldq x1, x0, t0; \ | ||
162 | vpunpckhdq x1, x0, t2; \ | ||
163 | vpunpckldq x3, x2, t1; \ | ||
164 | vpunpckhdq x3, x2, x3; \ | ||
165 | \ | ||
166 | vpunpcklqdq t1, t0, x0; \ | ||
167 | vpunpckhqdq t1, t0, x1; \ | ||
168 | vpunpcklqdq x3, t2, x2; \ | ||
169 | vpunpckhqdq x3, t2, x3; | ||
170 | |||
171 | #define inpack_blocks(in, x0, x1, x2, x3, wkey, t0, t1, t2) \ | ||
172 | vpxor (0*4*4)(in), wkey, x0; \ | ||
173 | vpxor (1*4*4)(in), wkey, x1; \ | ||
174 | vpxor (2*4*4)(in), wkey, x2; \ | ||
175 | vpxor (3*4*4)(in), wkey, x3; \ | ||
176 | \ | ||
177 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
178 | |||
179 | #define outunpack_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \ | ||
180 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
181 | \ | ||
182 | vpxor x0, wkey, x0; \ | ||
183 | vmovdqu x0, (0*4*4)(out); \ | ||
184 | vpxor x1, wkey, x1; \ | ||
185 | vmovdqu x1, (1*4*4)(out); \ | ||
186 | vpxor x2, wkey, x2; \ | ||
187 | vmovdqu x2, (2*4*4)(out); \ | ||
188 | vpxor x3, wkey, x3; \ | ||
189 | vmovdqu x3, (3*4*4)(out); | ||
190 | |||
191 | #define outunpack_xor_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \ | ||
192 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
193 | \ | ||
194 | vpxor x0, wkey, x0; \ | ||
195 | vpxor (0*4*4)(out), x0, x0; \ | ||
196 | vmovdqu x0, (0*4*4)(out); \ | ||
197 | vpxor x1, wkey, x1; \ | ||
198 | vpxor (1*4*4)(out), x1, x1; \ | ||
199 | vmovdqu x1, (1*4*4)(out); \ | ||
200 | vpxor x2, wkey, x2; \ | ||
201 | vpxor (2*4*4)(out), x2, x2; \ | ||
202 | vmovdqu x2, (2*4*4)(out); \ | ||
203 | vpxor x3, wkey, x3; \ | ||
204 | vpxor (3*4*4)(out), x3, x3; \ | ||
205 | vmovdqu x3, (3*4*4)(out); | ||
206 | |||
207 | .align 8 | ||
208 | .global __twofish_enc_blk_8way | ||
209 | .type __twofish_enc_blk_8way,@function; | ||
210 | |||
211 | __twofish_enc_blk_8way: | ||
212 | /* input: | ||
213 | * %rdi: ctx, CTX | ||
214 | * %rsi: dst | ||
215 | * %rdx: src | ||
216 | * %rcx: bool, if true: xor output | ||
217 | */ | ||
218 | |||
219 | pushq %rbx; | ||
220 | pushq %rcx; | ||
221 | |||
222 | vmovdqu w(CTX), RK1; | ||
223 | |||
224 | leaq (4*4*4)(%rdx), %rax; | ||
225 | inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); | ||
226 | inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); | ||
227 | |||
228 | xorq RID1, RID1; | ||
229 | xorq RID2, RID2; | ||
230 | |||
231 | encrypt_cycle(0); | ||
232 | encrypt_cycle(1); | ||
233 | encrypt_cycle(2); | ||
234 | encrypt_cycle(3); | ||
235 | encrypt_cycle(4); | ||
236 | encrypt_cycle(5); | ||
237 | encrypt_cycle(6); | ||
238 | encrypt_cycle(7); | ||
239 | |||
240 | vmovdqu (w+4*4)(CTX), RK1; | ||
241 | |||
242 | popq %rcx; | ||
243 | popq %rbx; | ||
244 | |||
245 | leaq (4*4*4)(%rsi), %rax; | ||
246 | |||
247 | testb %cl, %cl; | ||
248 | jnz __enc_xor8; | ||
249 | |||
250 | outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); | ||
251 | outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); | ||
252 | |||
253 | ret; | ||
254 | |||
255 | __enc_xor8: | ||
256 | outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); | ||
257 | outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); | ||
258 | |||
259 | ret; | ||
260 | |||
261 | .align 8 | ||
262 | .global twofish_dec_blk_8way | ||
263 | .type twofish_dec_blk_8way,@function; | ||
264 | |||
265 | twofish_dec_blk_8way: | ||
266 | /* input: | ||
267 | * %rdi: ctx, CTX | ||
268 | * %rsi: dst | ||
269 | * %rdx: src | ||
270 | */ | ||
271 | |||
272 | pushq %rbx; | ||
273 | |||
274 | vmovdqu (w+4*4)(CTX), RK1; | ||
275 | |||
276 | leaq (4*4*4)(%rdx), %rax; | ||
277 | inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); | ||
278 | inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); | ||
279 | |||
280 | xorq RID1, RID1; | ||
281 | xorq RID2, RID2; | ||
282 | |||
283 | decrypt_cycle(7); | ||
284 | decrypt_cycle(6); | ||
285 | decrypt_cycle(5); | ||
286 | decrypt_cycle(4); | ||
287 | decrypt_cycle(3); | ||
288 | decrypt_cycle(2); | ||
289 | decrypt_cycle(1); | ||
290 | decrypt_cycle(0); | ||
291 | |||
292 | vmovdqu (w)(CTX), RK1; | ||
293 | |||
294 | popq %rbx; | ||
295 | |||
296 | leaq (4*4*4)(%rsi), %rax; | ||
297 | outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); | ||
298 | outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); | ||
299 | |||
300 | ret; | ||
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c new file mode 100644 index 000000000000..782b67ddaf6a --- /dev/null +++ b/arch/x86/crypto/twofish_avx_glue.c | |||
@@ -0,0 +1,624 @@ | |||
1 | /* | ||
2 | * Glue Code for AVX assembler version of Twofish Cipher | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
20 | * USA | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/module.h> | ||
25 | #include <linux/hardirq.h> | ||
26 | #include <linux/types.h> | ||
27 | #include <linux/crypto.h> | ||
28 | #include <linux/err.h> | ||
29 | #include <crypto/algapi.h> | ||
30 | #include <crypto/twofish.h> | ||
31 | #include <crypto/cryptd.h> | ||
32 | #include <crypto/b128ops.h> | ||
33 | #include <crypto/ctr.h> | ||
34 | #include <crypto/lrw.h> | ||
35 | #include <crypto/xts.h> | ||
36 | #include <asm/i387.h> | ||
37 | #include <asm/xcr.h> | ||
38 | #include <asm/xsave.h> | ||
39 | #include <asm/crypto/twofish.h> | ||
40 | #include <asm/crypto/ablk_helper.h> | ||
41 | #include <asm/crypto/glue_helper.h> | ||
42 | #include <crypto/scatterwalk.h> | ||
43 | #include <linux/workqueue.h> | ||
44 | #include <linux/spinlock.h> | ||
45 | |||
46 | #define TWOFISH_PARALLEL_BLOCKS 8 | ||
47 | |||
48 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
49 | const u8 *src) | ||
50 | { | ||
51 | __twofish_enc_blk_3way(ctx, dst, src, false); | ||
52 | } | ||
53 | |||
54 | /* 8-way parallel cipher functions */ | ||
55 | asmlinkage void __twofish_enc_blk_8way(struct twofish_ctx *ctx, u8 *dst, | ||
56 | const u8 *src, bool xor); | ||
57 | asmlinkage void twofish_dec_blk_8way(struct twofish_ctx *ctx, u8 *dst, | ||
58 | const u8 *src); | ||
59 | |||
60 | static inline void twofish_enc_blk_xway(struct twofish_ctx *ctx, u8 *dst, | ||
61 | const u8 *src) | ||
62 | { | ||
63 | __twofish_enc_blk_8way(ctx, dst, src, false); | ||
64 | } | ||
65 | |||
66 | static inline void twofish_enc_blk_xway_xor(struct twofish_ctx *ctx, u8 *dst, | ||
67 | const u8 *src) | ||
68 | { | ||
69 | __twofish_enc_blk_8way(ctx, dst, src, true); | ||
70 | } | ||
71 | |||
72 | static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst, | ||
73 | const u8 *src) | ||
74 | { | ||
75 | twofish_dec_blk_8way(ctx, dst, src); | ||
76 | } | ||
77 | |||
78 | static void twofish_dec_blk_cbc_xway(void *ctx, u128 *dst, const u128 *src) | ||
79 | { | ||
80 | u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1]; | ||
81 | unsigned int j; | ||
82 | |||
83 | for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++) | ||
84 | ivs[j] = src[j]; | ||
85 | |||
86 | twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
87 | |||
88 | for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++) | ||
89 | u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); | ||
90 | } | ||
91 | |||
92 | static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src, | ||
93 | u128 *iv) | ||
94 | { | ||
95 | be128 ctrblks[TWOFISH_PARALLEL_BLOCKS]; | ||
96 | unsigned int i; | ||
97 | |||
98 | for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) { | ||
99 | if (dst != src) | ||
100 | dst[i] = src[i]; | ||
101 | |||
102 | u128_to_be128(&ctrblks[i], iv); | ||
103 | u128_inc(iv); | ||
104 | } | ||
105 | |||
106 | twofish_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); | ||
107 | } | ||
108 | |||
109 | static const struct common_glue_ctx twofish_enc = { | ||
110 | .num_funcs = 3, | ||
111 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
112 | |||
113 | .funcs = { { | ||
114 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
115 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_xway) } | ||
116 | }, { | ||
117 | .num_blocks = 3, | ||
118 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } | ||
119 | }, { | ||
120 | .num_blocks = 1, | ||
121 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } | ||
122 | } } | ||
123 | }; | ||
124 | |||
125 | static const struct common_glue_ctx twofish_ctr = { | ||
126 | .num_funcs = 3, | ||
127 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
128 | |||
129 | .funcs = { { | ||
130 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
131 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_xway) } | ||
132 | }, { | ||
133 | .num_blocks = 3, | ||
134 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) } | ||
135 | }, { | ||
136 | .num_blocks = 1, | ||
137 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) } | ||
138 | } } | ||
139 | }; | ||
140 | |||
141 | static const struct common_glue_ctx twofish_dec = { | ||
142 | .num_funcs = 3, | ||
143 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
144 | |||
145 | .funcs = { { | ||
146 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
147 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_xway) } | ||
148 | }, { | ||
149 | .num_blocks = 3, | ||
150 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } | ||
151 | }, { | ||
152 | .num_blocks = 1, | ||
153 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } | ||
154 | } } | ||
155 | }; | ||
156 | |||
157 | static const struct common_glue_ctx twofish_dec_cbc = { | ||
158 | .num_funcs = 3, | ||
159 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
160 | |||
161 | .funcs = { { | ||
162 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
163 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_xway) } | ||
164 | }, { | ||
165 | .num_blocks = 3, | ||
166 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } | ||
167 | }, { | ||
168 | .num_blocks = 1, | ||
169 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } | ||
170 | } } | ||
171 | }; | ||
172 | |||
173 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
174 | struct scatterlist *src, unsigned int nbytes) | ||
175 | { | ||
176 | return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); | ||
177 | } | ||
178 | |||
179 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
180 | struct scatterlist *src, unsigned int nbytes) | ||
181 | { | ||
182 | return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); | ||
183 | } | ||
184 | |||
185 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
186 | struct scatterlist *src, unsigned int nbytes) | ||
187 | { | ||
188 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, | ||
189 | dst, src, nbytes); | ||
190 | } | ||
191 | |||
192 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
193 | struct scatterlist *src, unsigned int nbytes) | ||
194 | { | ||
195 | return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, | ||
196 | nbytes); | ||
197 | } | ||
198 | |||
199 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
200 | struct scatterlist *src, unsigned int nbytes) | ||
201 | { | ||
202 | return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); | ||
203 | } | ||
204 | |||
205 | static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
206 | { | ||
207 | return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL, | ||
208 | fpu_enabled, nbytes); | ||
209 | } | ||
210 | |||
211 | static inline void twofish_fpu_end(bool fpu_enabled) | ||
212 | { | ||
213 | glue_fpu_end(fpu_enabled); | ||
214 | } | ||
215 | |||
216 | struct crypt_priv { | ||
217 | struct twofish_ctx *ctx; | ||
218 | bool fpu_enabled; | ||
219 | }; | ||
220 | |||
221 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
222 | { | ||
223 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
224 | struct crypt_priv *ctx = priv; | ||
225 | int i; | ||
226 | |||
227 | ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); | ||
228 | |||
229 | if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { | ||
230 | twofish_enc_blk_xway(ctx->ctx, srcdst, srcdst); | ||
231 | return; | ||
232 | } | ||
233 | |||
234 | for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) | ||
235 | twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst); | ||
236 | |||
237 | nbytes %= bsize * 3; | ||
238 | |||
239 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
240 | twofish_enc_blk(ctx->ctx, srcdst, srcdst); | ||
241 | } | ||
242 | |||
243 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
244 | { | ||
245 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
246 | struct crypt_priv *ctx = priv; | ||
247 | int i; | ||
248 | |||
249 | ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); | ||
250 | |||
251 | if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { | ||
252 | twofish_dec_blk_xway(ctx->ctx, srcdst, srcdst); | ||
253 | return; | ||
254 | } | ||
255 | |||
256 | for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) | ||
257 | twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst); | ||
258 | |||
259 | nbytes %= bsize * 3; | ||
260 | |||
261 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
262 | twofish_dec_blk(ctx->ctx, srcdst, srcdst); | ||
263 | } | ||
264 | |||
265 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
266 | struct scatterlist *src, unsigned int nbytes) | ||
267 | { | ||
268 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
269 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
270 | struct crypt_priv crypt_ctx = { | ||
271 | .ctx = &ctx->twofish_ctx, | ||
272 | .fpu_enabled = false, | ||
273 | }; | ||
274 | struct lrw_crypt_req req = { | ||
275 | .tbuf = buf, | ||
276 | .tbuflen = sizeof(buf), | ||
277 | |||
278 | .table_ctx = &ctx->lrw_table, | ||
279 | .crypt_ctx = &crypt_ctx, | ||
280 | .crypt_fn = encrypt_callback, | ||
281 | }; | ||
282 | int ret; | ||
283 | |||
284 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
285 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
286 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
287 | |||
288 | return ret; | ||
289 | } | ||
290 | |||
291 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
292 | struct scatterlist *src, unsigned int nbytes) | ||
293 | { | ||
294 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
295 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
296 | struct crypt_priv crypt_ctx = { | ||
297 | .ctx = &ctx->twofish_ctx, | ||
298 | .fpu_enabled = false, | ||
299 | }; | ||
300 | struct lrw_crypt_req req = { | ||
301 | .tbuf = buf, | ||
302 | .tbuflen = sizeof(buf), | ||
303 | |||
304 | .table_ctx = &ctx->lrw_table, | ||
305 | .crypt_ctx = &crypt_ctx, | ||
306 | .crypt_fn = decrypt_callback, | ||
307 | }; | ||
308 | int ret; | ||
309 | |||
310 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
311 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
312 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
313 | |||
314 | return ret; | ||
315 | } | ||
316 | |||
317 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
318 | struct scatterlist *src, unsigned int nbytes) | ||
319 | { | ||
320 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
321 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
322 | struct crypt_priv crypt_ctx = { | ||
323 | .ctx = &ctx->crypt_ctx, | ||
324 | .fpu_enabled = false, | ||
325 | }; | ||
326 | struct xts_crypt_req req = { | ||
327 | .tbuf = buf, | ||
328 | .tbuflen = sizeof(buf), | ||
329 | |||
330 | .tweak_ctx = &ctx->tweak_ctx, | ||
331 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
332 | .crypt_ctx = &crypt_ctx, | ||
333 | .crypt_fn = encrypt_callback, | ||
334 | }; | ||
335 | int ret; | ||
336 | |||
337 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
338 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
339 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
340 | |||
341 | return ret; | ||
342 | } | ||
343 | |||
344 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
345 | struct scatterlist *src, unsigned int nbytes) | ||
346 | { | ||
347 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
348 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
349 | struct crypt_priv crypt_ctx = { | ||
350 | .ctx = &ctx->crypt_ctx, | ||
351 | .fpu_enabled = false, | ||
352 | }; | ||
353 | struct xts_crypt_req req = { | ||
354 | .tbuf = buf, | ||
355 | .tbuflen = sizeof(buf), | ||
356 | |||
357 | .tweak_ctx = &ctx->tweak_ctx, | ||
358 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
359 | .crypt_ctx = &crypt_ctx, | ||
360 | .crypt_fn = decrypt_callback, | ||
361 | }; | ||
362 | int ret; | ||
363 | |||
364 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
365 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
366 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
367 | |||
368 | return ret; | ||
369 | } | ||
370 | |||
371 | static struct crypto_alg twofish_algs[10] = { { | ||
372 | .cra_name = "__ecb-twofish-avx", | ||
373 | .cra_driver_name = "__driver-ecb-twofish-avx", | ||
374 | .cra_priority = 0, | ||
375 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
376 | .cra_blocksize = TF_BLOCK_SIZE, | ||
377 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
378 | .cra_alignmask = 0, | ||
379 | .cra_type = &crypto_blkcipher_type, | ||
380 | .cra_module = THIS_MODULE, | ||
381 | .cra_list = LIST_HEAD_INIT(twofish_algs[0].cra_list), | ||
382 | .cra_u = { | ||
383 | .blkcipher = { | ||
384 | .min_keysize = TF_MIN_KEY_SIZE, | ||
385 | .max_keysize = TF_MAX_KEY_SIZE, | ||
386 | .setkey = twofish_setkey, | ||
387 | .encrypt = ecb_encrypt, | ||
388 | .decrypt = ecb_decrypt, | ||
389 | }, | ||
390 | }, | ||
391 | }, { | ||
392 | .cra_name = "__cbc-twofish-avx", | ||
393 | .cra_driver_name = "__driver-cbc-twofish-avx", | ||
394 | .cra_priority = 0, | ||
395 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
396 | .cra_blocksize = TF_BLOCK_SIZE, | ||
397 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
398 | .cra_alignmask = 0, | ||
399 | .cra_type = &crypto_blkcipher_type, | ||
400 | .cra_module = THIS_MODULE, | ||
401 | .cra_list = LIST_HEAD_INIT(twofish_algs[1].cra_list), | ||
402 | .cra_u = { | ||
403 | .blkcipher = { | ||
404 | .min_keysize = TF_MIN_KEY_SIZE, | ||
405 | .max_keysize = TF_MAX_KEY_SIZE, | ||
406 | .setkey = twofish_setkey, | ||
407 | .encrypt = cbc_encrypt, | ||
408 | .decrypt = cbc_decrypt, | ||
409 | }, | ||
410 | }, | ||
411 | }, { | ||
412 | .cra_name = "__ctr-twofish-avx", | ||
413 | .cra_driver_name = "__driver-ctr-twofish-avx", | ||
414 | .cra_priority = 0, | ||
415 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
416 | .cra_blocksize = 1, | ||
417 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
418 | .cra_alignmask = 0, | ||
419 | .cra_type = &crypto_blkcipher_type, | ||
420 | .cra_module = THIS_MODULE, | ||
421 | .cra_list = LIST_HEAD_INIT(twofish_algs[2].cra_list), | ||
422 | .cra_u = { | ||
423 | .blkcipher = { | ||
424 | .min_keysize = TF_MIN_KEY_SIZE, | ||
425 | .max_keysize = TF_MAX_KEY_SIZE, | ||
426 | .ivsize = TF_BLOCK_SIZE, | ||
427 | .setkey = twofish_setkey, | ||
428 | .encrypt = ctr_crypt, | ||
429 | .decrypt = ctr_crypt, | ||
430 | }, | ||
431 | }, | ||
432 | }, { | ||
433 | .cra_name = "__lrw-twofish-avx", | ||
434 | .cra_driver_name = "__driver-lrw-twofish-avx", | ||
435 | .cra_priority = 0, | ||
436 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
437 | .cra_blocksize = TF_BLOCK_SIZE, | ||
438 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), | ||
439 | .cra_alignmask = 0, | ||
440 | .cra_type = &crypto_blkcipher_type, | ||
441 | .cra_module = THIS_MODULE, | ||
442 | .cra_list = LIST_HEAD_INIT(twofish_algs[3].cra_list), | ||
443 | .cra_exit = lrw_twofish_exit_tfm, | ||
444 | .cra_u = { | ||
445 | .blkcipher = { | ||
446 | .min_keysize = TF_MIN_KEY_SIZE + | ||
447 | TF_BLOCK_SIZE, | ||
448 | .max_keysize = TF_MAX_KEY_SIZE + | ||
449 | TF_BLOCK_SIZE, | ||
450 | .ivsize = TF_BLOCK_SIZE, | ||
451 | .setkey = lrw_twofish_setkey, | ||
452 | .encrypt = lrw_encrypt, | ||
453 | .decrypt = lrw_decrypt, | ||
454 | }, | ||
455 | }, | ||
456 | }, { | ||
457 | .cra_name = "__xts-twofish-avx", | ||
458 | .cra_driver_name = "__driver-xts-twofish-avx", | ||
459 | .cra_priority = 0, | ||
460 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
461 | .cra_blocksize = TF_BLOCK_SIZE, | ||
462 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), | ||
463 | .cra_alignmask = 0, | ||
464 | .cra_type = &crypto_blkcipher_type, | ||
465 | .cra_module = THIS_MODULE, | ||
466 | .cra_list = LIST_HEAD_INIT(twofish_algs[4].cra_list), | ||
467 | .cra_u = { | ||
468 | .blkcipher = { | ||
469 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
470 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
471 | .ivsize = TF_BLOCK_SIZE, | ||
472 | .setkey = xts_twofish_setkey, | ||
473 | .encrypt = xts_encrypt, | ||
474 | .decrypt = xts_decrypt, | ||
475 | }, | ||
476 | }, | ||
477 | }, { | ||
478 | .cra_name = "ecb(twofish)", | ||
479 | .cra_driver_name = "ecb-twofish-avx", | ||
480 | .cra_priority = 400, | ||
481 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
482 | .cra_blocksize = TF_BLOCK_SIZE, | ||
483 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
484 | .cra_alignmask = 0, | ||
485 | .cra_type = &crypto_ablkcipher_type, | ||
486 | .cra_module = THIS_MODULE, | ||
487 | .cra_list = LIST_HEAD_INIT(twofish_algs[5].cra_list), | ||
488 | .cra_init = ablk_init, | ||
489 | .cra_exit = ablk_exit, | ||
490 | .cra_u = { | ||
491 | .ablkcipher = { | ||
492 | .min_keysize = TF_MIN_KEY_SIZE, | ||
493 | .max_keysize = TF_MAX_KEY_SIZE, | ||
494 | .setkey = ablk_set_key, | ||
495 | .encrypt = ablk_encrypt, | ||
496 | .decrypt = ablk_decrypt, | ||
497 | }, | ||
498 | }, | ||
499 | }, { | ||
500 | .cra_name = "cbc(twofish)", | ||
501 | .cra_driver_name = "cbc-twofish-avx", | ||
502 | .cra_priority = 400, | ||
503 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
504 | .cra_blocksize = TF_BLOCK_SIZE, | ||
505 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
506 | .cra_alignmask = 0, | ||
507 | .cra_type = &crypto_ablkcipher_type, | ||
508 | .cra_module = THIS_MODULE, | ||
509 | .cra_list = LIST_HEAD_INIT(twofish_algs[6].cra_list), | ||
510 | .cra_init = ablk_init, | ||
511 | .cra_exit = ablk_exit, | ||
512 | .cra_u = { | ||
513 | .ablkcipher = { | ||
514 | .min_keysize = TF_MIN_KEY_SIZE, | ||
515 | .max_keysize = TF_MAX_KEY_SIZE, | ||
516 | .ivsize = TF_BLOCK_SIZE, | ||
517 | .setkey = ablk_set_key, | ||
518 | .encrypt = __ablk_encrypt, | ||
519 | .decrypt = ablk_decrypt, | ||
520 | }, | ||
521 | }, | ||
522 | }, { | ||
523 | .cra_name = "ctr(twofish)", | ||
524 | .cra_driver_name = "ctr-twofish-avx", | ||
525 | .cra_priority = 400, | ||
526 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
527 | .cra_blocksize = 1, | ||
528 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
529 | .cra_alignmask = 0, | ||
530 | .cra_type = &crypto_ablkcipher_type, | ||
531 | .cra_module = THIS_MODULE, | ||
532 | .cra_list = LIST_HEAD_INIT(twofish_algs[7].cra_list), | ||
533 | .cra_init = ablk_init, | ||
534 | .cra_exit = ablk_exit, | ||
535 | .cra_u = { | ||
536 | .ablkcipher = { | ||
537 | .min_keysize = TF_MIN_KEY_SIZE, | ||
538 | .max_keysize = TF_MAX_KEY_SIZE, | ||
539 | .ivsize = TF_BLOCK_SIZE, | ||
540 | .setkey = ablk_set_key, | ||
541 | .encrypt = ablk_encrypt, | ||
542 | .decrypt = ablk_encrypt, | ||
543 | .geniv = "chainiv", | ||
544 | }, | ||
545 | }, | ||
546 | }, { | ||
547 | .cra_name = "lrw(twofish)", | ||
548 | .cra_driver_name = "lrw-twofish-avx", | ||
549 | .cra_priority = 400, | ||
550 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
551 | .cra_blocksize = TF_BLOCK_SIZE, | ||
552 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
553 | .cra_alignmask = 0, | ||
554 | .cra_type = &crypto_ablkcipher_type, | ||
555 | .cra_module = THIS_MODULE, | ||
556 | .cra_list = LIST_HEAD_INIT(twofish_algs[8].cra_list), | ||
557 | .cra_init = ablk_init, | ||
558 | .cra_exit = ablk_exit, | ||
559 | .cra_u = { | ||
560 | .ablkcipher = { | ||
561 | .min_keysize = TF_MIN_KEY_SIZE + | ||
562 | TF_BLOCK_SIZE, | ||
563 | .max_keysize = TF_MAX_KEY_SIZE + | ||
564 | TF_BLOCK_SIZE, | ||
565 | .ivsize = TF_BLOCK_SIZE, | ||
566 | .setkey = ablk_set_key, | ||
567 | .encrypt = ablk_encrypt, | ||
568 | .decrypt = ablk_decrypt, | ||
569 | }, | ||
570 | }, | ||
571 | }, { | ||
572 | .cra_name = "xts(twofish)", | ||
573 | .cra_driver_name = "xts-twofish-avx", | ||
574 | .cra_priority = 400, | ||
575 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
576 | .cra_blocksize = TF_BLOCK_SIZE, | ||
577 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
578 | .cra_alignmask = 0, | ||
579 | .cra_type = &crypto_ablkcipher_type, | ||
580 | .cra_module = THIS_MODULE, | ||
581 | .cra_list = LIST_HEAD_INIT(twofish_algs[9].cra_list), | ||
582 | .cra_init = ablk_init, | ||
583 | .cra_exit = ablk_exit, | ||
584 | .cra_u = { | ||
585 | .ablkcipher = { | ||
586 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
587 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
588 | .ivsize = TF_BLOCK_SIZE, | ||
589 | .setkey = ablk_set_key, | ||
590 | .encrypt = ablk_encrypt, | ||
591 | .decrypt = ablk_decrypt, | ||
592 | }, | ||
593 | }, | ||
594 | } }; | ||
595 | |||
596 | static int __init twofish_init(void) | ||
597 | { | ||
598 | u64 xcr0; | ||
599 | |||
600 | if (!cpu_has_avx || !cpu_has_osxsave) { | ||
601 | printk(KERN_INFO "AVX instructions are not detected.\n"); | ||
602 | return -ENODEV; | ||
603 | } | ||
604 | |||
605 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
606 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
607 | printk(KERN_INFO "AVX detected but unusable.\n"); | ||
608 | return -ENODEV; | ||
609 | } | ||
610 | |||
611 | return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); | ||
612 | } | ||
613 | |||
614 | static void __exit twofish_exit(void) | ||
615 | { | ||
616 | crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); | ||
617 | } | ||
618 | |||
619 | module_init(twofish_init); | ||
620 | module_exit(twofish_exit); | ||
621 | |||
622 | MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized"); | ||
623 | MODULE_LICENSE("GPL"); | ||
624 | MODULE_ALIAS("twofish"); | ||
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 922ab24cce31..15f9347316c8 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c | |||
@@ -3,11 +3,6 @@ | |||
3 | * | 3 | * |
4 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 4 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> |
5 | * | 5 | * |
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
8 | * CTR part based on code (crypto/ctr.c) by: | ||
9 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
12 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
13 | * the Free Software Foundation; either version 2 of the License, or | 8 | * the Free Software Foundation; either version 2 of the License, or |
@@ -33,20 +28,13 @@ | |||
33 | #include <crypto/algapi.h> | 28 | #include <crypto/algapi.h> |
34 | #include <crypto/twofish.h> | 29 | #include <crypto/twofish.h> |
35 | #include <crypto/b128ops.h> | 30 | #include <crypto/b128ops.h> |
31 | #include <asm/crypto/twofish.h> | ||
32 | #include <asm/crypto/glue_helper.h> | ||
36 | #include <crypto/lrw.h> | 33 | #include <crypto/lrw.h> |
37 | #include <crypto/xts.h> | 34 | #include <crypto/xts.h> |
38 | 35 | ||
39 | /* regular block cipher functions from twofish_x86_64 module */ | 36 | EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); |
40 | asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, | 37 | EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); |
41 | const u8 *src); | ||
42 | asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, | ||
43 | const u8 *src); | ||
44 | |||
45 | /* 3-way parallel cipher functions */ | ||
46 | asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
47 | const u8 *src, bool xor); | ||
48 | asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
49 | const u8 *src); | ||
50 | 38 | ||
51 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | 39 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, |
52 | const u8 *src) | 40 | const u8 *src) |
@@ -60,311 +48,139 @@ static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst, | |||
60 | __twofish_enc_blk_3way(ctx, dst, src, true); | 48 | __twofish_enc_blk_3way(ctx, dst, src, true); |
61 | } | 49 | } |
62 | 50 | ||
63 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | 51 | void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) |
64 | void (*fn)(struct twofish_ctx *, u8 *, const u8 *), | ||
65 | void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *)) | ||
66 | { | ||
67 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
68 | unsigned int bsize = TF_BLOCK_SIZE; | ||
69 | unsigned int nbytes; | ||
70 | int err; | ||
71 | |||
72 | err = blkcipher_walk_virt(desc, walk); | ||
73 | |||
74 | while ((nbytes = walk->nbytes)) { | ||
75 | u8 *wsrc = walk->src.virt.addr; | ||
76 | u8 *wdst = walk->dst.virt.addr; | ||
77 | |||
78 | /* Process three block batch */ | ||
79 | if (nbytes >= bsize * 3) { | ||
80 | do { | ||
81 | fn_3way(ctx, wdst, wsrc); | ||
82 | |||
83 | wsrc += bsize * 3; | ||
84 | wdst += bsize * 3; | ||
85 | nbytes -= bsize * 3; | ||
86 | } while (nbytes >= bsize * 3); | ||
87 | |||
88 | if (nbytes < bsize) | ||
89 | goto done; | ||
90 | } | ||
91 | |||
92 | /* Handle leftovers */ | ||
93 | do { | ||
94 | fn(ctx, wdst, wsrc); | ||
95 | |||
96 | wsrc += bsize; | ||
97 | wdst += bsize; | ||
98 | nbytes -= bsize; | ||
99 | } while (nbytes >= bsize); | ||
100 | |||
101 | done: | ||
102 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
103 | } | ||
104 | |||
105 | return err; | ||
106 | } | ||
107 | |||
108 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
109 | struct scatterlist *src, unsigned int nbytes) | ||
110 | { | 52 | { |
111 | struct blkcipher_walk walk; | 53 | u128 ivs[2]; |
112 | 54 | ||
113 | blkcipher_walk_init(&walk, dst, src, nbytes); | 55 | ivs[0] = src[0]; |
114 | return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way); | 56 | ivs[1] = src[1]; |
115 | } | ||
116 | 57 | ||
117 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 58 | twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); |
118 | struct scatterlist *src, unsigned int nbytes) | ||
119 | { | ||
120 | struct blkcipher_walk walk; | ||
121 | 59 | ||
122 | blkcipher_walk_init(&walk, dst, src, nbytes); | 60 | u128_xor(&dst[1], &dst[1], &ivs[0]); |
123 | return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way); | 61 | u128_xor(&dst[2], &dst[2], &ivs[1]); |
124 | } | 62 | } |
63 | EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); | ||
125 | 64 | ||
126 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | 65 | void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) |
127 | struct blkcipher_walk *walk) | ||
128 | { | ||
129 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
130 | unsigned int bsize = TF_BLOCK_SIZE; | ||
131 | unsigned int nbytes = walk->nbytes; | ||
132 | u128 *src = (u128 *)walk->src.virt.addr; | ||
133 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
134 | u128 *iv = (u128 *)walk->iv; | ||
135 | |||
136 | do { | ||
137 | u128_xor(dst, src, iv); | ||
138 | twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); | ||
139 | iv = dst; | ||
140 | |||
141 | src += 1; | ||
142 | dst += 1; | ||
143 | nbytes -= bsize; | ||
144 | } while (nbytes >= bsize); | ||
145 | |||
146 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
147 | return nbytes; | ||
148 | } | ||
149 | |||
150 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
151 | struct scatterlist *src, unsigned int nbytes) | ||
152 | { | 66 | { |
153 | struct blkcipher_walk walk; | 67 | be128 ctrblk; |
154 | int err; | ||
155 | 68 | ||
156 | blkcipher_walk_init(&walk, dst, src, nbytes); | 69 | if (dst != src) |
157 | err = blkcipher_walk_virt(desc, &walk); | 70 | *dst = *src; |
158 | 71 | ||
159 | while ((nbytes = walk.nbytes)) { | 72 | u128_to_be128(&ctrblk, iv); |
160 | nbytes = __cbc_encrypt(desc, &walk); | 73 | u128_inc(iv); |
161 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
162 | } | ||
163 | 74 | ||
164 | return err; | 75 | twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); |
76 | u128_xor(dst, dst, (u128 *)&ctrblk); | ||
165 | } | 77 | } |
78 | EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); | ||
166 | 79 | ||
167 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | 80 | void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, |
168 | struct blkcipher_walk *walk) | 81 | u128 *iv) |
169 | { | 82 | { |
170 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 83 | be128 ctrblks[3]; |
171 | unsigned int bsize = TF_BLOCK_SIZE; | ||
172 | unsigned int nbytes = walk->nbytes; | ||
173 | u128 *src = (u128 *)walk->src.virt.addr; | ||
174 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
175 | u128 ivs[3 - 1]; | ||
176 | u128 last_iv; | ||
177 | |||
178 | /* Start of the last block. */ | ||
179 | src += nbytes / bsize - 1; | ||
180 | dst += nbytes / bsize - 1; | ||
181 | |||
182 | last_iv = *src; | ||
183 | |||
184 | /* Process three block batch */ | ||
185 | if (nbytes >= bsize * 3) { | ||
186 | do { | ||
187 | nbytes -= bsize * (3 - 1); | ||
188 | src -= 3 - 1; | ||
189 | dst -= 3 - 1; | ||
190 | |||
191 | ivs[0] = src[0]; | ||
192 | ivs[1] = src[1]; | ||
193 | |||
194 | twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); | ||
195 | |||
196 | u128_xor(dst + 1, dst + 1, ivs + 0); | ||
197 | u128_xor(dst + 2, dst + 2, ivs + 1); | ||
198 | |||
199 | nbytes -= bsize; | ||
200 | if (nbytes < bsize) | ||
201 | goto done; | ||
202 | |||
203 | u128_xor(dst, dst, src - 1); | ||
204 | src -= 1; | ||
205 | dst -= 1; | ||
206 | } while (nbytes >= bsize * 3); | ||
207 | |||
208 | if (nbytes < bsize) | ||
209 | goto done; | ||
210 | } | ||
211 | |||
212 | /* Handle leftovers */ | ||
213 | for (;;) { | ||
214 | twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src); | ||
215 | |||
216 | nbytes -= bsize; | ||
217 | if (nbytes < bsize) | ||
218 | break; | ||
219 | 84 | ||
220 | u128_xor(dst, dst, src - 1); | 85 | if (dst != src) { |
221 | src -= 1; | 86 | dst[0] = src[0]; |
222 | dst -= 1; | 87 | dst[1] = src[1]; |
88 | dst[2] = src[2]; | ||
223 | } | 89 | } |
224 | 90 | ||
225 | done: | 91 | u128_to_be128(&ctrblks[0], iv); |
226 | u128_xor(dst, dst, (u128 *)walk->iv); | 92 | u128_inc(iv); |
227 | *(u128 *)walk->iv = last_iv; | 93 | u128_to_be128(&ctrblks[1], iv); |
94 | u128_inc(iv); | ||
95 | u128_to_be128(&ctrblks[2], iv); | ||
96 | u128_inc(iv); | ||
228 | 97 | ||
229 | return nbytes; | 98 | twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); |
230 | } | 99 | } |
100 | EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way); | ||
101 | |||
102 | static const struct common_glue_ctx twofish_enc = { | ||
103 | .num_funcs = 2, | ||
104 | .fpu_blocks_limit = -1, | ||
105 | |||
106 | .funcs = { { | ||
107 | .num_blocks = 3, | ||
108 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } | ||
109 | }, { | ||
110 | .num_blocks = 1, | ||
111 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } | ||
112 | } } | ||
113 | }; | ||
231 | 114 | ||
232 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 115 | static const struct common_glue_ctx twofish_ctr = { |
233 | struct scatterlist *src, unsigned int nbytes) | 116 | .num_funcs = 2, |
234 | { | 117 | .fpu_blocks_limit = -1, |
235 | struct blkcipher_walk walk; | 118 | |
236 | int err; | 119 | .funcs = { { |
237 | 120 | .num_blocks = 3, | |
238 | blkcipher_walk_init(&walk, dst, src, nbytes); | 121 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) } |
239 | err = blkcipher_walk_virt(desc, &walk); | 122 | }, { |
123 | .num_blocks = 1, | ||
124 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) } | ||
125 | } } | ||
126 | }; | ||
240 | 127 | ||
241 | while ((nbytes = walk.nbytes)) { | 128 | static const struct common_glue_ctx twofish_dec = { |
242 | nbytes = __cbc_decrypt(desc, &walk); | 129 | .num_funcs = 2, |
243 | err = blkcipher_walk_done(desc, &walk, nbytes); | 130 | .fpu_blocks_limit = -1, |
244 | } | 131 | |
132 | .funcs = { { | ||
133 | .num_blocks = 3, | ||
134 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } | ||
135 | }, { | ||
136 | .num_blocks = 1, | ||
137 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } | ||
138 | } } | ||
139 | }; | ||
245 | 140 | ||
246 | return err; | 141 | static const struct common_glue_ctx twofish_dec_cbc = { |
247 | } | 142 | .num_funcs = 2, |
143 | .fpu_blocks_limit = -1, | ||
144 | |||
145 | .funcs = { { | ||
146 | .num_blocks = 3, | ||
147 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } | ||
148 | }, { | ||
149 | .num_blocks = 1, | ||
150 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } | ||
151 | } } | ||
152 | }; | ||
248 | 153 | ||
249 | static inline void u128_to_be128(be128 *dst, const u128 *src) | 154 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
155 | struct scatterlist *src, unsigned int nbytes) | ||
250 | { | 156 | { |
251 | dst->a = cpu_to_be64(src->a); | 157 | return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); |
252 | dst->b = cpu_to_be64(src->b); | ||
253 | } | 158 | } |
254 | 159 | ||
255 | static inline void be128_to_u128(u128 *dst, const be128 *src) | 160 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
161 | struct scatterlist *src, unsigned int nbytes) | ||
256 | { | 162 | { |
257 | dst->a = be64_to_cpu(src->a); | 163 | return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); |
258 | dst->b = be64_to_cpu(src->b); | ||
259 | } | 164 | } |
260 | 165 | ||
261 | static inline void u128_inc(u128 *i) | 166 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
167 | struct scatterlist *src, unsigned int nbytes) | ||
262 | { | 168 | { |
263 | i->b++; | 169 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, |
264 | if (!i->b) | 170 | dst, src, nbytes); |
265 | i->a++; | ||
266 | } | 171 | } |
267 | 172 | ||
268 | static void ctr_crypt_final(struct blkcipher_desc *desc, | 173 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
269 | struct blkcipher_walk *walk) | 174 | struct scatterlist *src, unsigned int nbytes) |
270 | { | 175 | { |
271 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 176 | return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, |
272 | u8 *ctrblk = walk->iv; | 177 | nbytes); |
273 | u8 keystream[TF_BLOCK_SIZE]; | ||
274 | u8 *src = walk->src.virt.addr; | ||
275 | u8 *dst = walk->dst.virt.addr; | ||
276 | unsigned int nbytes = walk->nbytes; | ||
277 | |||
278 | twofish_enc_blk(ctx, keystream, ctrblk); | ||
279 | crypto_xor(keystream, src, nbytes); | ||
280 | memcpy(dst, keystream, nbytes); | ||
281 | |||
282 | crypto_inc(ctrblk, TF_BLOCK_SIZE); | ||
283 | } | ||
284 | |||
285 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | ||
286 | struct blkcipher_walk *walk) | ||
287 | { | ||
288 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
289 | unsigned int bsize = TF_BLOCK_SIZE; | ||
290 | unsigned int nbytes = walk->nbytes; | ||
291 | u128 *src = (u128 *)walk->src.virt.addr; | ||
292 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
293 | u128 ctrblk; | ||
294 | be128 ctrblocks[3]; | ||
295 | |||
296 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
297 | |||
298 | /* Process three block batch */ | ||
299 | if (nbytes >= bsize * 3) { | ||
300 | do { | ||
301 | if (dst != src) { | ||
302 | dst[0] = src[0]; | ||
303 | dst[1] = src[1]; | ||
304 | dst[2] = src[2]; | ||
305 | } | ||
306 | |||
307 | /* create ctrblks for parallel encrypt */ | ||
308 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
309 | u128_inc(&ctrblk); | ||
310 | u128_to_be128(&ctrblocks[1], &ctrblk); | ||
311 | u128_inc(&ctrblk); | ||
312 | u128_to_be128(&ctrblocks[2], &ctrblk); | ||
313 | u128_inc(&ctrblk); | ||
314 | |||
315 | twofish_enc_blk_xor_3way(ctx, (u8 *)dst, | ||
316 | (u8 *)ctrblocks); | ||
317 | |||
318 | src += 3; | ||
319 | dst += 3; | ||
320 | nbytes -= bsize * 3; | ||
321 | } while (nbytes >= bsize * 3); | ||
322 | |||
323 | if (nbytes < bsize) | ||
324 | goto done; | ||
325 | } | ||
326 | |||
327 | /* Handle leftovers */ | ||
328 | do { | ||
329 | if (dst != src) | ||
330 | *dst = *src; | ||
331 | |||
332 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
333 | u128_inc(&ctrblk); | ||
334 | |||
335 | twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); | ||
336 | u128_xor(dst, dst, (u128 *)ctrblocks); | ||
337 | |||
338 | src += 1; | ||
339 | dst += 1; | ||
340 | nbytes -= bsize; | ||
341 | } while (nbytes >= bsize); | ||
342 | |||
343 | done: | ||
344 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
345 | return nbytes; | ||
346 | } | 178 | } |
347 | 179 | ||
348 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 180 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
349 | struct scatterlist *src, unsigned int nbytes) | 181 | struct scatterlist *src, unsigned int nbytes) |
350 | { | 182 | { |
351 | struct blkcipher_walk walk; | 183 | return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); |
352 | int err; | ||
353 | |||
354 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
355 | err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE); | ||
356 | |||
357 | while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) { | ||
358 | nbytes = __ctr_crypt(desc, &walk); | ||
359 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
360 | } | ||
361 | |||
362 | if (walk.nbytes) { | ||
363 | ctr_crypt_final(desc, &walk); | ||
364 | err = blkcipher_walk_done(desc, &walk, 0); | ||
365 | } | ||
366 | |||
367 | return err; | ||
368 | } | 184 | } |
369 | 185 | ||
370 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | 186 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) |
@@ -397,13 +213,8 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | |||
397 | twofish_dec_blk(ctx, srcdst, srcdst); | 213 | twofish_dec_blk(ctx, srcdst, srcdst); |
398 | } | 214 | } |
399 | 215 | ||
400 | struct twofish_lrw_ctx { | 216 | int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, |
401 | struct lrw_table_ctx lrw_table; | 217 | unsigned int keylen) |
402 | struct twofish_ctx twofish_ctx; | ||
403 | }; | ||
404 | |||
405 | static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
406 | unsigned int keylen) | ||
407 | { | 218 | { |
408 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | 219 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); |
409 | int err; | 220 | int err; |
@@ -415,6 +226,7 @@ static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | |||
415 | 226 | ||
416 | return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); | 227 | return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); |
417 | } | 228 | } |
229 | EXPORT_SYMBOL_GPL(lrw_twofish_setkey); | ||
418 | 230 | ||
419 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 231 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
420 | struct scatterlist *src, unsigned int nbytes) | 232 | struct scatterlist *src, unsigned int nbytes) |
@@ -450,20 +262,16 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
450 | return lrw_crypt(desc, dst, src, nbytes, &req); | 262 | return lrw_crypt(desc, dst, src, nbytes, &req); |
451 | } | 263 | } |
452 | 264 | ||
453 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | 265 | void lrw_twofish_exit_tfm(struct crypto_tfm *tfm) |
454 | { | 266 | { |
455 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | 267 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); |
456 | 268 | ||
457 | lrw_free_table(&ctx->lrw_table); | 269 | lrw_free_table(&ctx->lrw_table); |
458 | } | 270 | } |
271 | EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm); | ||
459 | 272 | ||
460 | struct twofish_xts_ctx { | 273 | int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, |
461 | struct twofish_ctx tweak_ctx; | 274 | unsigned int keylen) |
462 | struct twofish_ctx crypt_ctx; | ||
463 | }; | ||
464 | |||
465 | static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
466 | unsigned int keylen) | ||
467 | { | 275 | { |
468 | struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); | 276 | struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); |
469 | u32 *flags = &tfm->crt_flags; | 277 | u32 *flags = &tfm->crt_flags; |
@@ -486,6 +294,7 @@ static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | |||
486 | return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, | 294 | return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, |
487 | flags); | 295 | flags); |
488 | } | 296 | } |
297 | EXPORT_SYMBOL_GPL(xts_twofish_setkey); | ||
489 | 298 | ||
490 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 299 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
491 | struct scatterlist *src, unsigned int nbytes) | 300 | struct scatterlist *src, unsigned int nbytes) |
@@ -596,7 +405,7 @@ static struct crypto_alg tf_algs[5] = { { | |||
596 | .cra_type = &crypto_blkcipher_type, | 405 | .cra_type = &crypto_blkcipher_type, |
597 | .cra_module = THIS_MODULE, | 406 | .cra_module = THIS_MODULE, |
598 | .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), | 407 | .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), |
599 | .cra_exit = lrw_exit_tfm, | 408 | .cra_exit = lrw_twofish_exit_tfm, |
600 | .cra_u = { | 409 | .cra_u = { |
601 | .blkcipher = { | 410 | .blkcipher = { |
602 | .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, | 411 | .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 49331bedc158..70780689599a 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -75,23 +75,54 @@ static inline int alternatives_text_reserved(void *start, void *end) | |||
75 | } | 75 | } |
76 | #endif /* CONFIG_SMP */ | 76 | #endif /* CONFIG_SMP */ |
77 | 77 | ||
78 | #define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n" | ||
79 | |||
80 | #define b_replacement(number) "663"#number | ||
81 | #define e_replacement(number) "664"#number | ||
82 | |||
83 | #define alt_slen "662b-661b" | ||
84 | #define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f" | ||
85 | |||
86 | #define ALTINSTR_ENTRY(feature, number) \ | ||
87 | " .long 661b - .\n" /* label */ \ | ||
88 | " .long " b_replacement(number)"f - .\n" /* new instruction */ \ | ||
89 | " .word " __stringify(feature) "\n" /* feature bit */ \ | ||
90 | " .byte " alt_slen "\n" /* source len */ \ | ||
91 | " .byte " alt_rlen(number) "\n" /* replacement len */ | ||
92 | |||
93 | #define DISCARD_ENTRY(number) /* rlen <= slen */ \ | ||
94 | " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n" | ||
95 | |||
96 | #define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \ | ||
97 | b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t" | ||
98 | |||
78 | /* alternative assembly primitive: */ | 99 | /* alternative assembly primitive: */ |
79 | #define ALTERNATIVE(oldinstr, newinstr, feature) \ | 100 | #define ALTERNATIVE(oldinstr, newinstr, feature) \ |
80 | \ | 101 | OLDINSTR(oldinstr) \ |
81 | "661:\n\t" oldinstr "\n662:\n" \ | 102 | ".section .altinstructions,\"a\"\n" \ |
82 | ".section .altinstructions,\"a\"\n" \ | 103 | ALTINSTR_ENTRY(feature, 1) \ |
83 | " .long 661b - .\n" /* label */ \ | 104 | ".previous\n" \ |
84 | " .long 663f - .\n" /* new instruction */ \ | 105 | ".section .discard,\"aw\",@progbits\n" \ |
85 | " .word " __stringify(feature) "\n" /* feature bit */ \ | 106 | DISCARD_ENTRY(1) \ |
86 | " .byte 662b-661b\n" /* sourcelen */ \ | 107 | ".previous\n" \ |
87 | " .byte 664f-663f\n" /* replacementlen */ \ | 108 | ".section .altinstr_replacement, \"ax\"\n" \ |
88 | ".previous\n" \ | 109 | ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ |
89 | ".section .discard,\"aw\",@progbits\n" \ | 110 | ".previous" |
90 | " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \ | 111 | |
91 | ".previous\n" \ | 112 | #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ |
92 | ".section .altinstr_replacement, \"ax\"\n" \ | 113 | OLDINSTR(oldinstr) \ |
93 | "663:\n\t" newinstr "\n664:\n" /* replacement */ \ | 114 | ".section .altinstructions,\"a\"\n" \ |
94 | ".previous" | 115 | ALTINSTR_ENTRY(feature1, 1) \ |
116 | ALTINSTR_ENTRY(feature2, 2) \ | ||
117 | ".previous\n" \ | ||
118 | ".section .discard,\"aw\",@progbits\n" \ | ||
119 | DISCARD_ENTRY(1) \ | ||
120 | DISCARD_ENTRY(2) \ | ||
121 | ".previous\n" \ | ||
122 | ".section .altinstr_replacement, \"ax\"\n" \ | ||
123 | ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ | ||
124 | ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ | ||
125 | ".previous" | ||
95 | 126 | ||
96 | /* | 127 | /* |
97 | * This must be included *after* the definition of ALTERNATIVE due to | 128 | * This must be included *after* the definition of ALTERNATIVE due to |
@@ -140,6 +171,19 @@ static inline int alternatives_text_reserved(void *start, void *end) | |||
140 | : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) | 171 | : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) |
141 | 172 | ||
142 | /* | 173 | /* |
174 | * Like alternative_call, but there are two features and respective functions. | ||
175 | * If CPU has feature2, function2 is used. | ||
176 | * Otherwise, if CPU has feature1, function1 is used. | ||
177 | * Otherwise, old function is used. | ||
178 | */ | ||
179 | #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ | ||
180 | output, input...) \ | ||
181 | asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ | ||
182 | "call %P[new2]", feature2) \ | ||
183 | : output : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ | ||
184 | [new2] "i" (newfunc2), ## input) | ||
185 | |||
186 | /* | ||
143 | * use this macro(s) if you need more than one output parameter | 187 | * use this macro(s) if you need more than one output parameter |
144 | * in alternative_io | 188 | * in alternative_io |
145 | */ | 189 | */ |
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 49ad773f4b9f..b3341e9cd8fd 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h | |||
@@ -26,10 +26,31 @@ struct amd_l3_cache { | |||
26 | u8 subcaches[4]; | 26 | u8 subcaches[4]; |
27 | }; | 27 | }; |
28 | 28 | ||
29 | struct threshold_block { | ||
30 | unsigned int block; | ||
31 | unsigned int bank; | ||
32 | unsigned int cpu; | ||
33 | u32 address; | ||
34 | u16 interrupt_enable; | ||
35 | bool interrupt_capable; | ||
36 | u16 threshold_limit; | ||
37 | struct kobject kobj; | ||
38 | struct list_head miscj; | ||
39 | }; | ||
40 | |||
41 | struct threshold_bank { | ||
42 | struct kobject *kobj; | ||
43 | struct threshold_block *blocks; | ||
44 | |||
45 | /* initialized to the number of CPUs on the node sharing this bank */ | ||
46 | atomic_t cpus; | ||
47 | }; | ||
48 | |||
29 | struct amd_northbridge { | 49 | struct amd_northbridge { |
30 | struct pci_dev *misc; | 50 | struct pci_dev *misc; |
31 | struct pci_dev *link; | 51 | struct pci_dev *link; |
32 | struct amd_l3_cache l3_cache; | 52 | struct amd_l3_cache l3_cache; |
53 | struct threshold_bank *bank4; | ||
33 | }; | 54 | }; |
34 | 55 | ||
35 | struct amd_northbridge_info { | 56 | struct amd_northbridge_info { |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index eaff4790ed96..f34261296ffb 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -306,7 +306,8 @@ struct apic { | |||
306 | unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); | 306 | unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); |
307 | unsigned long (*check_apicid_present)(int apicid); | 307 | unsigned long (*check_apicid_present)(int apicid); |
308 | 308 | ||
309 | void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); | 309 | void (*vector_allocation_domain)(int cpu, struct cpumask *retmask, |
310 | const struct cpumask *mask); | ||
310 | void (*init_apic_ldr)(void); | 311 | void (*init_apic_ldr)(void); |
311 | 312 | ||
312 | void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); | 313 | void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); |
@@ -331,9 +332,9 @@ struct apic { | |||
331 | unsigned long (*set_apic_id)(unsigned int id); | 332 | unsigned long (*set_apic_id)(unsigned int id); |
332 | unsigned long apic_id_mask; | 333 | unsigned long apic_id_mask; |
333 | 334 | ||
334 | unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); | 335 | int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, |
335 | unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, | 336 | const struct cpumask *andmask, |
336 | const struct cpumask *andmask); | 337 | unsigned int *apicid); |
337 | 338 | ||
338 | /* ipi */ | 339 | /* ipi */ |
339 | void (*send_IPI_mask)(const struct cpumask *mask, int vector); | 340 | void (*send_IPI_mask)(const struct cpumask *mask, int vector); |
@@ -464,6 +465,8 @@ static inline u32 safe_apic_wait_icr_idle(void) | |||
464 | return apic->safe_wait_icr_idle(); | 465 | return apic->safe_wait_icr_idle(); |
465 | } | 466 | } |
466 | 467 | ||
468 | extern void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)); | ||
469 | |||
467 | #else /* CONFIG_X86_LOCAL_APIC */ | 470 | #else /* CONFIG_X86_LOCAL_APIC */ |
468 | 471 | ||
469 | static inline u32 apic_read(u32 reg) { return 0; } | 472 | static inline u32 apic_read(u32 reg) { return 0; } |
@@ -473,6 +476,7 @@ static inline u64 apic_icr_read(void) { return 0; } | |||
473 | static inline void apic_icr_write(u32 low, u32 high) { } | 476 | static inline void apic_icr_write(u32 low, u32 high) { } |
474 | static inline void apic_wait_icr_idle(void) { } | 477 | static inline void apic_wait_icr_idle(void) { } |
475 | static inline u32 safe_apic_wait_icr_idle(void) { return 0; } | 478 | static inline u32 safe_apic_wait_icr_idle(void) { return 0; } |
479 | static inline void apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) {} | ||
476 | 480 | ||
477 | #endif /* CONFIG_X86_LOCAL_APIC */ | 481 | #endif /* CONFIG_X86_LOCAL_APIC */ |
478 | 482 | ||
@@ -537,7 +541,12 @@ static inline const struct cpumask *default_target_cpus(void) | |||
537 | #endif | 541 | #endif |
538 | } | 542 | } |
539 | 543 | ||
540 | DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); | 544 | static inline const struct cpumask *online_target_cpus(void) |
545 | { | ||
546 | return cpu_online_mask; | ||
547 | } | ||
548 | |||
549 | DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); | ||
541 | 550 | ||
542 | 551 | ||
543 | static inline unsigned int read_apic_id(void) | 552 | static inline unsigned int read_apic_id(void) |
@@ -586,21 +595,50 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) | |||
586 | 595 | ||
587 | #endif | 596 | #endif |
588 | 597 | ||
589 | static inline unsigned int | 598 | static inline int |
590 | default_cpu_mask_to_apicid(const struct cpumask *cpumask) | 599 | flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, |
600 | const struct cpumask *andmask, | ||
601 | unsigned int *apicid) | ||
591 | { | 602 | { |
592 | return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; | 603 | unsigned long cpu_mask = cpumask_bits(cpumask)[0] & |
604 | cpumask_bits(andmask)[0] & | ||
605 | cpumask_bits(cpu_online_mask)[0] & | ||
606 | APIC_ALL_CPUS; | ||
607 | |||
608 | if (likely(cpu_mask)) { | ||
609 | *apicid = (unsigned int)cpu_mask; | ||
610 | return 0; | ||
611 | } else { | ||
612 | return -EINVAL; | ||
613 | } | ||
593 | } | 614 | } |
594 | 615 | ||
595 | static inline unsigned int | 616 | extern int |
596 | default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | 617 | default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, |
597 | const struct cpumask *andmask) | 618 | const struct cpumask *andmask, |
619 | unsigned int *apicid); | ||
620 | |||
621 | static inline void | ||
622 | flat_vector_allocation_domain(int cpu, struct cpumask *retmask, | ||
623 | const struct cpumask *mask) | ||
598 | { | 624 | { |
599 | unsigned long mask1 = cpumask_bits(cpumask)[0]; | 625 | /* Careful. Some cpus do not strictly honor the set of cpus |
600 | unsigned long mask2 = cpumask_bits(andmask)[0]; | 626 | * specified in the interrupt destination when using lowest |
601 | unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; | 627 | * priority interrupt delivery mode. |
628 | * | ||
629 | * In particular there was a hyperthreading cpu observed to | ||
630 | * deliver interrupts to the wrong hyperthread when only one | ||
631 | * hyperthread was specified in the interrupt desitination. | ||
632 | */ | ||
633 | cpumask_clear(retmask); | ||
634 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; | ||
635 | } | ||
602 | 636 | ||
603 | return (unsigned int)(mask1 & mask2 & mask3); | 637 | static inline void |
638 | default_vector_allocation_domain(int cpu, struct cpumask *retmask, | ||
639 | const struct cpumask *mask) | ||
640 | { | ||
641 | cpumask_copy(retmask, cpumask_of(cpu)); | ||
604 | } | 642 | } |
605 | 643 | ||
606 | static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) | 644 | static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index a6983b277220..72f5009deb5a 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -264,6 +264,13 @@ static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) | |||
264 | * This operation is non-atomic and can be reordered. | 264 | * This operation is non-atomic and can be reordered. |
265 | * If two examples of this operation race, one can appear to succeed | 265 | * If two examples of this operation race, one can appear to succeed |
266 | * but actually fail. You must protect multiple accesses with a lock. | 266 | * but actually fail. You must protect multiple accesses with a lock. |
267 | * | ||
268 | * Note: the operation is performed atomically with respect to | ||
269 | * the local CPU, but not other CPUs. Portable code should not | ||
270 | * rely on this behaviour. | ||
271 | * KVM relies on this behaviour on x86 for modifying memory that is also | ||
272 | * accessed from a hypervisor on the same CPU if running in a VM: don't change | ||
273 | * this without also updating arch/x86/kernel/kvm.c | ||
267 | */ | 274 | */ |
268 | static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) | 275 | static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) |
269 | { | 276 | { |
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h index eb45aa6b1f27..2ad874cb661c 100644 --- a/arch/x86/include/asm/bootparam.h +++ b/arch/x86/include/asm/bootparam.h | |||
@@ -66,6 +66,7 @@ struct setup_header { | |||
66 | __u64 setup_data; | 66 | __u64 setup_data; |
67 | __u64 pref_address; | 67 | __u64 pref_address; |
68 | __u32 init_size; | 68 | __u32 init_size; |
69 | __u32 handover_offset; | ||
69 | } __attribute__((packed)); | 70 | } __attribute__((packed)); |
70 | 71 | ||
71 | struct sys_desc_table { | 72 | struct sys_desc_table { |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index f91e80f4f180..6b7ee5ff6820 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -207,6 +207,8 @@ | |||
207 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ | 207 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ |
208 | #define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */ | 208 | #define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */ |
209 | #define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ | 209 | #define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ |
210 | #define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ | ||
211 | #define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ | ||
210 | 212 | ||
211 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) | 213 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) |
212 | 214 | ||
diff --git a/arch/x86/include/asm/crypto/ablk_helper.h b/arch/x86/include/asm/crypto/ablk_helper.h new file mode 100644 index 000000000000..4f93df50c23e --- /dev/null +++ b/arch/x86/include/asm/crypto/ablk_helper.h | |||
@@ -0,0 +1,31 @@ | |||
1 | /* | ||
2 | * Shared async block cipher helpers | ||
3 | */ | ||
4 | |||
5 | #ifndef _CRYPTO_ABLK_HELPER_H | ||
6 | #define _CRYPTO_ABLK_HELPER_H | ||
7 | |||
8 | #include <linux/crypto.h> | ||
9 | #include <linux/kernel.h> | ||
10 | #include <crypto/cryptd.h> | ||
11 | |||
12 | struct async_helper_ctx { | ||
13 | struct cryptd_ablkcipher *cryptd_tfm; | ||
14 | }; | ||
15 | |||
16 | extern int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
17 | unsigned int key_len); | ||
18 | |||
19 | extern int __ablk_encrypt(struct ablkcipher_request *req); | ||
20 | |||
21 | extern int ablk_encrypt(struct ablkcipher_request *req); | ||
22 | |||
23 | extern int ablk_decrypt(struct ablkcipher_request *req); | ||
24 | |||
25 | extern void ablk_exit(struct crypto_tfm *tfm); | ||
26 | |||
27 | extern int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name); | ||
28 | |||
29 | extern int ablk_init(struct crypto_tfm *tfm); | ||
30 | |||
31 | #endif /* _CRYPTO_ABLK_HELPER_H */ | ||
diff --git a/arch/x86/include/asm/aes.h b/arch/x86/include/asm/crypto/aes.h index 80545a1cbe39..80545a1cbe39 100644 --- a/arch/x86/include/asm/aes.h +++ b/arch/x86/include/asm/crypto/aes.h | |||
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h new file mode 100644 index 000000000000..3e408bddc96f --- /dev/null +++ b/arch/x86/include/asm/crypto/glue_helper.h | |||
@@ -0,0 +1,115 @@ | |||
1 | /* | ||
2 | * Shared glue code for 128bit block ciphers | ||
3 | */ | ||
4 | |||
5 | #ifndef _CRYPTO_GLUE_HELPER_H | ||
6 | #define _CRYPTO_GLUE_HELPER_H | ||
7 | |||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/crypto.h> | ||
10 | #include <asm/i387.h> | ||
11 | #include <crypto/b128ops.h> | ||
12 | |||
13 | typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); | ||
14 | typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); | ||
15 | typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, | ||
16 | u128 *iv); | ||
17 | |||
18 | #define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) | ||
19 | #define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) | ||
20 | #define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn)) | ||
21 | |||
22 | struct common_glue_func_entry { | ||
23 | unsigned int num_blocks; /* number of blocks that @fn will process */ | ||
24 | union { | ||
25 | common_glue_func_t ecb; | ||
26 | common_glue_cbc_func_t cbc; | ||
27 | common_glue_ctr_func_t ctr; | ||
28 | } fn_u; | ||
29 | }; | ||
30 | |||
31 | struct common_glue_ctx { | ||
32 | unsigned int num_funcs; | ||
33 | int fpu_blocks_limit; /* -1 means fpu not needed at all */ | ||
34 | |||
35 | /* | ||
36 | * First funcs entry must have largest num_blocks and last funcs entry | ||
37 | * must have num_blocks == 1! | ||
38 | */ | ||
39 | struct common_glue_func_entry funcs[]; | ||
40 | }; | ||
41 | |||
42 | static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, | ||
43 | struct blkcipher_desc *desc, | ||
44 | bool fpu_enabled, unsigned int nbytes) | ||
45 | { | ||
46 | if (likely(fpu_blocks_limit < 0)) | ||
47 | return false; | ||
48 | |||
49 | if (fpu_enabled) | ||
50 | return true; | ||
51 | |||
52 | /* | ||
53 | * Vector-registers are only used when chunk to be processed is large | ||
54 | * enough, so do not enable FPU until it is necessary. | ||
55 | */ | ||
56 | if (nbytes < bsize * (unsigned int)fpu_blocks_limit) | ||
57 | return false; | ||
58 | |||
59 | if (desc) { | ||
60 | /* prevent sleeping if FPU is in use */ | ||
61 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
62 | } | ||
63 | |||
64 | kernel_fpu_begin(); | ||
65 | return true; | ||
66 | } | ||
67 | |||
68 | static inline void glue_fpu_end(bool fpu_enabled) | ||
69 | { | ||
70 | if (fpu_enabled) | ||
71 | kernel_fpu_end(); | ||
72 | } | ||
73 | |||
74 | static inline void u128_to_be128(be128 *dst, const u128 *src) | ||
75 | { | ||
76 | dst->a = cpu_to_be64(src->a); | ||
77 | dst->b = cpu_to_be64(src->b); | ||
78 | } | ||
79 | |||
80 | static inline void be128_to_u128(u128 *dst, const be128 *src) | ||
81 | { | ||
82 | dst->a = be64_to_cpu(src->a); | ||
83 | dst->b = be64_to_cpu(src->b); | ||
84 | } | ||
85 | |||
86 | static inline void u128_inc(u128 *i) | ||
87 | { | ||
88 | i->b++; | ||
89 | if (!i->b) | ||
90 | i->a++; | ||
91 | } | ||
92 | |||
93 | extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | ||
94 | struct blkcipher_desc *desc, | ||
95 | struct scatterlist *dst, | ||
96 | struct scatterlist *src, unsigned int nbytes); | ||
97 | |||
98 | extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn, | ||
99 | struct blkcipher_desc *desc, | ||
100 | struct scatterlist *dst, | ||
101 | struct scatterlist *src, | ||
102 | unsigned int nbytes); | ||
103 | |||
104 | extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | ||
105 | struct blkcipher_desc *desc, | ||
106 | struct scatterlist *dst, | ||
107 | struct scatterlist *src, | ||
108 | unsigned int nbytes); | ||
109 | |||
110 | extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | ||
111 | struct blkcipher_desc *desc, | ||
112 | struct scatterlist *dst, | ||
113 | struct scatterlist *src, unsigned int nbytes); | ||
114 | |||
115 | #endif /* _CRYPTO_GLUE_HELPER_H */ | ||
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h new file mode 100644 index 000000000000..432deedd2945 --- /dev/null +++ b/arch/x86/include/asm/crypto/serpent-avx.h | |||
@@ -0,0 +1,32 @@ | |||
1 | #ifndef ASM_X86_SERPENT_AVX_H | ||
2 | #define ASM_X86_SERPENT_AVX_H | ||
3 | |||
4 | #include <linux/crypto.h> | ||
5 | #include <crypto/serpent.h> | ||
6 | |||
7 | #define SERPENT_PARALLEL_BLOCKS 8 | ||
8 | |||
9 | asmlinkage void __serpent_enc_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
10 | const u8 *src, bool xor); | ||
11 | asmlinkage void serpent_dec_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
12 | const u8 *src); | ||
13 | |||
14 | static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, | ||
15 | const u8 *src) | ||
16 | { | ||
17 | __serpent_enc_blk_8way_avx(ctx, dst, src, false); | ||
18 | } | ||
19 | |||
20 | static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, | ||
21 | const u8 *src) | ||
22 | { | ||
23 | __serpent_enc_blk_8way_avx(ctx, dst, src, true); | ||
24 | } | ||
25 | |||
26 | static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, | ||
27 | const u8 *src) | ||
28 | { | ||
29 | serpent_dec_blk_8way_avx(ctx, dst, src); | ||
30 | } | ||
31 | |||
32 | #endif | ||
diff --git a/arch/x86/include/asm/serpent.h b/arch/x86/include/asm/crypto/serpent-sse2.h index d3ef63fe0c81..e6e77dffbdab 100644 --- a/arch/x86/include/asm/serpent.h +++ b/arch/x86/include/asm/crypto/serpent-sse2.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef ASM_X86_SERPENT_H | 1 | #ifndef ASM_X86_SERPENT_SSE2_H |
2 | #define ASM_X86_SERPENT_H | 2 | #define ASM_X86_SERPENT_SSE2_H |
3 | 3 | ||
4 | #include <linux/crypto.h> | 4 | #include <linux/crypto.h> |
5 | #include <crypto/serpent.h> | 5 | #include <crypto/serpent.h> |
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h new file mode 100644 index 000000000000..9d2c514bd5f9 --- /dev/null +++ b/arch/x86/include/asm/crypto/twofish.h | |||
@@ -0,0 +1,46 @@ | |||
1 | #ifndef ASM_X86_TWOFISH_H | ||
2 | #define ASM_X86_TWOFISH_H | ||
3 | |||
4 | #include <linux/crypto.h> | ||
5 | #include <crypto/twofish.h> | ||
6 | #include <crypto/lrw.h> | ||
7 | #include <crypto/b128ops.h> | ||
8 | |||
9 | struct twofish_lrw_ctx { | ||
10 | struct lrw_table_ctx lrw_table; | ||
11 | struct twofish_ctx twofish_ctx; | ||
12 | }; | ||
13 | |||
14 | struct twofish_xts_ctx { | ||
15 | struct twofish_ctx tweak_ctx; | ||
16 | struct twofish_ctx crypt_ctx; | ||
17 | }; | ||
18 | |||
19 | /* regular block cipher functions from twofish_x86_64 module */ | ||
20 | asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, | ||
21 | const u8 *src); | ||
22 | asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, | ||
23 | const u8 *src); | ||
24 | |||
25 | /* 3-way parallel cipher functions */ | ||
26 | asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
27 | const u8 *src, bool xor); | ||
28 | asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
29 | const u8 *src); | ||
30 | |||
31 | /* helpers from twofish_x86_64-3way module */ | ||
32 | extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); | ||
33 | extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, | ||
34 | u128 *iv); | ||
35 | extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, | ||
36 | u128 *iv); | ||
37 | |||
38 | extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
39 | unsigned int keylen); | ||
40 | |||
41 | extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm); | ||
42 | |||
43 | extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
44 | unsigned int keylen); | ||
45 | |||
46 | #endif /* ASM_X86_TWOFISH_H */ | ||
diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h index cc70c1c78ca4..75ce3f47d204 100644 --- a/arch/x86/include/asm/emergency-restart.h +++ b/arch/x86/include/asm/emergency-restart.h | |||
@@ -4,9 +4,7 @@ | |||
4 | enum reboot_type { | 4 | enum reboot_type { |
5 | BOOT_TRIPLE = 't', | 5 | BOOT_TRIPLE = 't', |
6 | BOOT_KBD = 'k', | 6 | BOOT_KBD = 'k', |
7 | #ifdef CONFIG_X86_32 | ||
8 | BOOT_BIOS = 'b', | 7 | BOOT_BIOS = 'b', |
9 | #endif | ||
10 | BOOT_ACPI = 'a', | 8 | BOOT_ACPI = 'a', |
11 | BOOT_EFI = 'e', | 9 | BOOT_EFI = 'e', |
12 | BOOT_CF9 = 'p', | 10 | BOOT_CF9 = 'p', |
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 0baa628e330c..40afa0005c69 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -15,15 +15,6 @@ BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) | |||
15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) | 15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) |
16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) | 16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) |
17 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) | 17 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) |
18 | |||
19 | .irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ | ||
20 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 | ||
21 | .if NUM_INVALIDATE_TLB_VECTORS > \idx | ||
22 | BUILD_INTERRUPT3(invalidate_interrupt\idx, | ||
23 | (INVALIDATE_TLB_VECTOR_START)+\idx, | ||
24 | smp_invalidate_interrupt) | ||
25 | .endif | ||
26 | .endr | ||
27 | #endif | 18 | #endif |
28 | 19 | ||
29 | BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) | 20 | BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) |
diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h index dbe82a5c5eac..d3d74698dce9 100644 --- a/arch/x86/include/asm/floppy.h +++ b/arch/x86/include/asm/floppy.h | |||
@@ -99,7 +99,7 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id) | |||
99 | virtual_dma_residue += virtual_dma_count; | 99 | virtual_dma_residue += virtual_dma_count; |
100 | virtual_dma_count = 0; | 100 | virtual_dma_count = 0; |
101 | #ifdef TRACE_FLPY_INT | 101 | #ifdef TRACE_FLPY_INT |
102 | printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", | 102 | printk(KERN_DEBUG "count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", |
103 | virtual_dma_count, virtual_dma_residue, calls, bytes, | 103 | virtual_dma_count, virtual_dma_residue, calls, bytes, |
104 | dma_wait); | 104 | dma_wait); |
105 | calls = 0; | 105 | calls = 0; |
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 7a15153c675d..b518c7509933 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h | |||
@@ -49,6 +49,7 @@ extern const struct hypervisor_x86 *x86_hyper; | |||
49 | extern const struct hypervisor_x86 x86_hyper_vmware; | 49 | extern const struct hypervisor_x86 x86_hyper_vmware; |
50 | extern const struct hypervisor_x86 x86_hyper_ms_hyperv; | 50 | extern const struct hypervisor_x86 x86_hyper_ms_hyperv; |
51 | extern const struct hypervisor_x86 x86_hyper_xen_hvm; | 51 | extern const struct hypervisor_x86 x86_hyper_xen_hvm; |
52 | extern const struct hypervisor_x86 x86_hyper_kvm; | ||
52 | 53 | ||
53 | static inline bool hypervisor_x2apic_available(void) | 54 | static inline bool hypervisor_x2apic_available(void) |
54 | { | 55 | { |
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index dffc38ee6255..345c99cef152 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h | |||
@@ -5,7 +5,6 @@ extern struct dma_map_ops nommu_dma_ops; | |||
5 | extern int force_iommu, no_iommu; | 5 | extern int force_iommu, no_iommu; |
6 | extern int iommu_detected; | 6 | extern int iommu_detected; |
7 | extern int iommu_pass_through; | 7 | extern int iommu_pass_through; |
8 | extern int iommu_group_mf; | ||
9 | 8 | ||
10 | /* 10 seconds */ | 9 | /* 10 seconds */ |
11 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) | 10 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 4b4448761e88..1508e518c7e3 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -119,17 +119,6 @@ | |||
119 | */ | 119 | */ |
120 | #define LOCAL_TIMER_VECTOR 0xef | 120 | #define LOCAL_TIMER_VECTOR 0xef |
121 | 121 | ||
122 | /* up to 32 vectors used for spreading out TLB flushes: */ | ||
123 | #if NR_CPUS <= 32 | ||
124 | # define NUM_INVALIDATE_TLB_VECTORS (NR_CPUS) | ||
125 | #else | ||
126 | # define NUM_INVALIDATE_TLB_VECTORS (32) | ||
127 | #endif | ||
128 | |||
129 | #define INVALIDATE_TLB_VECTOR_END (0xee) | ||
130 | #define INVALIDATE_TLB_VECTOR_START \ | ||
131 | (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1) | ||
132 | |||
133 | #define NR_VECTORS 256 | 122 | #define NR_VECTORS 256 |
134 | 123 | ||
135 | #define FPU_IRQ 13 | 124 | #define FPU_IRQ 13 |
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index e7d1c194d272..246617efd67f 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
@@ -12,6 +12,7 @@ | |||
12 | /* Select x86 specific features in <linux/kvm.h> */ | 12 | /* Select x86 specific features in <linux/kvm.h> */ |
13 | #define __KVM_HAVE_PIT | 13 | #define __KVM_HAVE_PIT |
14 | #define __KVM_HAVE_IOAPIC | 14 | #define __KVM_HAVE_IOAPIC |
15 | #define __KVM_HAVE_IRQ_LINE | ||
15 | #define __KVM_HAVE_DEVICE_ASSIGNMENT | 16 | #define __KVM_HAVE_DEVICE_ASSIGNMENT |
16 | #define __KVM_HAVE_MSI | 17 | #define __KVM_HAVE_MSI |
17 | #define __KVM_HAVE_USER_NMI | 18 | #define __KVM_HAVE_USER_NMI |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 1ac46c22dd50..c764f43b71c5 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -192,8 +192,8 @@ struct x86_emulate_ops { | |||
192 | struct x86_instruction_info *info, | 192 | struct x86_instruction_info *info, |
193 | enum x86_intercept_stage stage); | 193 | enum x86_intercept_stage stage); |
194 | 194 | ||
195 | bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, | 195 | void (*get_cpuid)(struct x86_emulate_ctxt *ctxt, |
196 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); | 196 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); |
197 | }; | 197 | }; |
198 | 198 | ||
199 | typedef u32 __attribute__((vector_size(16))) sse128_t; | 199 | typedef u32 __attribute__((vector_size(16))) sse128_t; |
@@ -280,9 +280,9 @@ struct x86_emulate_ctxt { | |||
280 | u8 modrm_seg; | 280 | u8 modrm_seg; |
281 | bool rip_relative; | 281 | bool rip_relative; |
282 | unsigned long _eip; | 282 | unsigned long _eip; |
283 | struct operand memop; | ||
283 | /* Fields above regs are cleared together. */ | 284 | /* Fields above regs are cleared together. */ |
284 | unsigned long regs[NR_VCPU_REGS]; | 285 | unsigned long regs[NR_VCPU_REGS]; |
285 | struct operand memop; | ||
286 | struct operand *memopp; | 286 | struct operand *memopp; |
287 | struct fetch_cache fetch; | 287 | struct fetch_cache fetch; |
288 | struct read_cache io_read; | 288 | struct read_cache io_read; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index db7c1f2709a2..09155d64cf7e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -48,12 +48,13 @@ | |||
48 | 48 | ||
49 | #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) | 49 | #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) |
50 | #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) | 50 | #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) |
51 | #define CR3_PCID_ENABLED_RESERVED_BITS 0xFFFFFF0000000000ULL | ||
51 | #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ | 52 | #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ |
52 | 0xFFFFFF0000000000ULL) | 53 | 0xFFFFFF0000000000ULL) |
53 | #define CR4_RESERVED_BITS \ | 54 | #define CR4_RESERVED_BITS \ |
54 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | 55 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ |
55 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | 56 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ |
56 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ | 57 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ |
57 | | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \ | 58 | | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \ |
58 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | 59 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) |
59 | 60 | ||
@@ -175,6 +176,13 @@ enum { | |||
175 | 176 | ||
176 | /* apic attention bits */ | 177 | /* apic attention bits */ |
177 | #define KVM_APIC_CHECK_VAPIC 0 | 178 | #define KVM_APIC_CHECK_VAPIC 0 |
179 | /* | ||
180 | * The following bit is set with PV-EOI, unset on EOI. | ||
181 | * We detect PV-EOI changes by guest by comparing | ||
182 | * this bit with PV-EOI in guest memory. | ||
183 | * See the implementation in apic_update_pv_eoi. | ||
184 | */ | ||
185 | #define KVM_APIC_PV_EOI_PENDING 1 | ||
178 | 186 | ||
179 | /* | 187 | /* |
180 | * We don't want allocation failures within the mmu code, so we preallocate | 188 | * We don't want allocation failures within the mmu code, so we preallocate |
@@ -313,8 +321,8 @@ struct kvm_pmu { | |||
313 | u64 counter_bitmask[2]; | 321 | u64 counter_bitmask[2]; |
314 | u64 global_ctrl_mask; | 322 | u64 global_ctrl_mask; |
315 | u8 version; | 323 | u8 version; |
316 | struct kvm_pmc gp_counters[X86_PMC_MAX_GENERIC]; | 324 | struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; |
317 | struct kvm_pmc fixed_counters[X86_PMC_MAX_FIXED]; | 325 | struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; |
318 | struct irq_work irq_work; | 326 | struct irq_work irq_work; |
319 | u64 reprogram_pmi; | 327 | u64 reprogram_pmi; |
320 | }; | 328 | }; |
@@ -484,6 +492,11 @@ struct kvm_vcpu_arch { | |||
484 | u64 length; | 492 | u64 length; |
485 | u64 status; | 493 | u64 status; |
486 | } osvw; | 494 | } osvw; |
495 | |||
496 | struct { | ||
497 | u64 msr_val; | ||
498 | struct gfn_to_hva_cache data; | ||
499 | } pv_eoi; | ||
487 | }; | 500 | }; |
488 | 501 | ||
489 | struct kvm_lpage_info { | 502 | struct kvm_lpage_info { |
@@ -661,6 +674,7 @@ struct kvm_x86_ops { | |||
661 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); | 674 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
662 | int (*get_lpage_level)(void); | 675 | int (*get_lpage_level)(void); |
663 | bool (*rdtscp_supported)(void); | 676 | bool (*rdtscp_supported)(void); |
677 | bool (*invpcid_supported)(void); | ||
664 | void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host); | 678 | void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host); |
665 | 679 | ||
666 | void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); | 680 | void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
@@ -802,7 +816,20 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
802 | void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); | 816 | void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); |
803 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); | 817 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); |
804 | 818 | ||
805 | int kvm_pic_set_irq(void *opaque, int irq, int level); | 819 | static inline int __kvm_irq_line_state(unsigned long *irq_state, |
820 | int irq_source_id, int level) | ||
821 | { | ||
822 | /* Logical OR for level trig interrupt */ | ||
823 | if (level) | ||
824 | __set_bit(irq_source_id, irq_state); | ||
825 | else | ||
826 | __clear_bit(irq_source_id, irq_state); | ||
827 | |||
828 | return !!(*irq_state); | ||
829 | } | ||
830 | |||
831 | int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); | ||
832 | void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); | ||
806 | 833 | ||
807 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); | 834 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); |
808 | 835 | ||
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 63ab1661d00e..2f7712e08b1e 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -22,6 +22,7 @@ | |||
22 | #define KVM_FEATURE_CLOCKSOURCE2 3 | 22 | #define KVM_FEATURE_CLOCKSOURCE2 3 |
23 | #define KVM_FEATURE_ASYNC_PF 4 | 23 | #define KVM_FEATURE_ASYNC_PF 4 |
24 | #define KVM_FEATURE_STEAL_TIME 5 | 24 | #define KVM_FEATURE_STEAL_TIME 5 |
25 | #define KVM_FEATURE_PV_EOI 6 | ||
25 | 26 | ||
26 | /* The last 8 bits are used to indicate how to interpret the flags field | 27 | /* The last 8 bits are used to indicate how to interpret the flags field |
27 | * in pvclock structure. If no bits are set, all flags are ignored. | 28 | * in pvclock structure. If no bits are set, all flags are ignored. |
@@ -37,6 +38,7 @@ | |||
37 | #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 | 38 | #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 |
38 | #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 | 39 | #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 |
39 | #define MSR_KVM_STEAL_TIME 0x4b564d03 | 40 | #define MSR_KVM_STEAL_TIME 0x4b564d03 |
41 | #define MSR_KVM_PV_EOI_EN 0x4b564d04 | ||
40 | 42 | ||
41 | struct kvm_steal_time { | 43 | struct kvm_steal_time { |
42 | __u64 steal; | 44 | __u64 steal; |
@@ -89,6 +91,11 @@ struct kvm_vcpu_pv_apf_data { | |||
89 | __u32 enabled; | 91 | __u32 enabled; |
90 | }; | 92 | }; |
91 | 93 | ||
94 | #define KVM_PV_EOI_BIT 0 | ||
95 | #define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT) | ||
96 | #define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK | ||
97 | #define KVM_PV_EOI_DISABLED 0x0 | ||
98 | |||
92 | #ifdef __KERNEL__ | 99 | #ifdef __KERNEL__ |
93 | #include <asm/processor.h> | 100 | #include <asm/processor.h> |
94 | 101 | ||
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 084ef95274cd..813ed103f45e 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h | |||
@@ -115,8 +115,8 @@ notrace static inline int native_write_msr_safe(unsigned int msr, | |||
115 | 115 | ||
116 | extern unsigned long long native_read_tsc(void); | 116 | extern unsigned long long native_read_tsc(void); |
117 | 117 | ||
118 | extern int native_rdmsr_safe_regs(u32 regs[8]); | 118 | extern int rdmsr_safe_regs(u32 regs[8]); |
119 | extern int native_wrmsr_safe_regs(u32 regs[8]); | 119 | extern int wrmsr_safe_regs(u32 regs[8]); |
120 | 120 | ||
121 | static __always_inline unsigned long long __native_read_tsc(void) | 121 | static __always_inline unsigned long long __native_read_tsc(void) |
122 | { | 122 | { |
@@ -187,43 +187,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) | |||
187 | return err; | 187 | return err; |
188 | } | 188 | } |
189 | 189 | ||
190 | static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) | ||
191 | { | ||
192 | u32 gprs[8] = { 0 }; | ||
193 | int err; | ||
194 | |||
195 | gprs[1] = msr; | ||
196 | gprs[7] = 0x9c5a203a; | ||
197 | |||
198 | err = native_rdmsr_safe_regs(gprs); | ||
199 | |||
200 | *p = gprs[0] | ((u64)gprs[2] << 32); | ||
201 | |||
202 | return err; | ||
203 | } | ||
204 | |||
205 | static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) | ||
206 | { | ||
207 | u32 gprs[8] = { 0 }; | ||
208 | |||
209 | gprs[0] = (u32)val; | ||
210 | gprs[1] = msr; | ||
211 | gprs[2] = val >> 32; | ||
212 | gprs[7] = 0x9c5a203a; | ||
213 | |||
214 | return native_wrmsr_safe_regs(gprs); | ||
215 | } | ||
216 | |||
217 | static inline int rdmsr_safe_regs(u32 regs[8]) | ||
218 | { | ||
219 | return native_rdmsr_safe_regs(regs); | ||
220 | } | ||
221 | |||
222 | static inline int wrmsr_safe_regs(u32 regs[8]) | ||
223 | { | ||
224 | return native_wrmsr_safe_regs(regs); | ||
225 | } | ||
226 | |||
227 | #define rdtscl(low) \ | 190 | #define rdtscl(low) \ |
228 | ((low) = (u32)__native_read_tsc()) | 191 | ((low) = (u32)__native_read_tsc()) |
229 | 192 | ||
@@ -237,6 +200,8 @@ do { \ | |||
237 | (high) = (u32)(_l >> 32); \ | 200 | (high) = (u32)(_l >> 32); \ |
238 | } while (0) | 201 | } while (0) |
239 | 202 | ||
203 | #define rdpmcl(counter, val) ((val) = native_read_pmc(counter)) | ||
204 | |||
240 | #define rdtscp(low, high, aux) \ | 205 | #define rdtscp(low, high, aux) \ |
241 | do { \ | 206 | do { \ |
242 | unsigned long long _val = native_read_tscp(&(aux)); \ | 207 | unsigned long long _val = native_read_tscp(&(aux)); \ |
@@ -248,8 +213,7 @@ do { \ | |||
248 | 213 | ||
249 | #endif /* !CONFIG_PARAVIRT */ | 214 | #endif /* !CONFIG_PARAVIRT */ |
250 | 215 | ||
251 | 216 | #define wrmsrl_safe(msr, val) wrmsr_safe((msr), (u32)(val), \ | |
252 | #define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \ | ||
253 | (u32)((val) >> 32)) | 217 | (u32)((val) >> 32)) |
254 | 218 | ||
255 | #define write_tsc(val1, val2) wrmsr(MSR_IA32_TSC, (val1), (val2)) | 219 | #define write_tsc(val1, val2) wrmsr(MSR_IA32_TSC, (val1), (val2)) |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index dc580c42851c..c0fa356e90de 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -44,28 +44,14 @@ struct nmiaction { | |||
44 | const char *name; | 44 | const char *name; |
45 | }; | 45 | }; |
46 | 46 | ||
47 | #define register_nmi_handler(t, fn, fg, n) \ | 47 | #define register_nmi_handler(t, fn, fg, n, init...) \ |
48 | ({ \ | 48 | ({ \ |
49 | static struct nmiaction fn##_na = { \ | 49 | static struct nmiaction init fn##_na = { \ |
50 | .handler = (fn), \ | 50 | .handler = (fn), \ |
51 | .name = (n), \ | 51 | .name = (n), \ |
52 | .flags = (fg), \ | 52 | .flags = (fg), \ |
53 | }; \ | 53 | }; \ |
54 | __register_nmi_handler((t), &fn##_na); \ | 54 | __register_nmi_handler((t), &fn##_na); \ |
55 | }) | ||
56 | |||
57 | /* | ||
58 | * For special handlers that register/unregister in the | ||
59 | * init section only. This should be considered rare. | ||
60 | */ | ||
61 | #define register_nmi_handler_initonly(t, fn, fg, n) \ | ||
62 | ({ \ | ||
63 | static struct nmiaction fn##_na __initdata = { \ | ||
64 | .handler = (fn), \ | ||
65 | .name = (n), \ | ||
66 | .flags = (fg), \ | ||
67 | }; \ | ||
68 | __register_nmi_handler((t), &fn##_na); \ | ||
69 | }) | 55 | }) |
70 | 56 | ||
71 | int __register_nmi_handler(unsigned int, struct nmiaction *); | 57 | int __register_nmi_handler(unsigned int, struct nmiaction *); |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 6cbbabf52707..a0facf3908d7 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -128,21 +128,11 @@ static inline u64 paravirt_read_msr(unsigned msr, int *err) | |||
128 | return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); | 128 | return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); |
129 | } | 129 | } |
130 | 130 | ||
131 | static inline int paravirt_rdmsr_regs(u32 *regs) | ||
132 | { | ||
133 | return PVOP_CALL1(int, pv_cpu_ops.rdmsr_regs, regs); | ||
134 | } | ||
135 | |||
136 | static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) | 131 | static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) |
137 | { | 132 | { |
138 | return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); | 133 | return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); |
139 | } | 134 | } |
140 | 135 | ||
141 | static inline int paravirt_wrmsr_regs(u32 *regs) | ||
142 | { | ||
143 | return PVOP_CALL1(int, pv_cpu_ops.wrmsr_regs, regs); | ||
144 | } | ||
145 | |||
146 | /* These should all do BUG_ON(_err), but our headers are too tangled. */ | 136 | /* These should all do BUG_ON(_err), but our headers are too tangled. */ |
147 | #define rdmsr(msr, val1, val2) \ | 137 | #define rdmsr(msr, val1, val2) \ |
148 | do { \ | 138 | do { \ |
@@ -176,9 +166,6 @@ do { \ | |||
176 | _err; \ | 166 | _err; \ |
177 | }) | 167 | }) |
178 | 168 | ||
179 | #define rdmsr_safe_regs(regs) paravirt_rdmsr_regs(regs) | ||
180 | #define wrmsr_safe_regs(regs) paravirt_wrmsr_regs(regs) | ||
181 | |||
182 | static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) | 169 | static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) |
183 | { | 170 | { |
184 | int err; | 171 | int err; |
@@ -186,32 +173,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) | |||
186 | *p = paravirt_read_msr(msr, &err); | 173 | *p = paravirt_read_msr(msr, &err); |
187 | return err; | 174 | return err; |
188 | } | 175 | } |
189 | static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) | ||
190 | { | ||
191 | u32 gprs[8] = { 0 }; | ||
192 | int err; | ||
193 | |||
194 | gprs[1] = msr; | ||
195 | gprs[7] = 0x9c5a203a; | ||
196 | |||
197 | err = paravirt_rdmsr_regs(gprs); | ||
198 | |||
199 | *p = gprs[0] | ((u64)gprs[2] << 32); | ||
200 | |||
201 | return err; | ||
202 | } | ||
203 | |||
204 | static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) | ||
205 | { | ||
206 | u32 gprs[8] = { 0 }; | ||
207 | |||
208 | gprs[0] = (u32)val; | ||
209 | gprs[1] = msr; | ||
210 | gprs[2] = val >> 32; | ||
211 | gprs[7] = 0x9c5a203a; | ||
212 | |||
213 | return paravirt_wrmsr_regs(gprs); | ||
214 | } | ||
215 | 176 | ||
216 | static inline u64 paravirt_read_tsc(void) | 177 | static inline u64 paravirt_read_tsc(void) |
217 | { | 178 | { |
@@ -252,6 +213,8 @@ do { \ | |||
252 | high = _l >> 32; \ | 213 | high = _l >> 32; \ |
253 | } while (0) | 214 | } while (0) |
254 | 215 | ||
216 | #define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter)) | ||
217 | |||
255 | static inline unsigned long long paravirt_rdtscp(unsigned int *aux) | 218 | static inline unsigned long long paravirt_rdtscp(unsigned int *aux) |
256 | { | 219 | { |
257 | return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); | 220 | return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); |
@@ -397,9 +360,10 @@ static inline void __flush_tlb_single(unsigned long addr) | |||
397 | 360 | ||
398 | static inline void flush_tlb_others(const struct cpumask *cpumask, | 361 | static inline void flush_tlb_others(const struct cpumask *cpumask, |
399 | struct mm_struct *mm, | 362 | struct mm_struct *mm, |
400 | unsigned long va) | 363 | unsigned long start, |
364 | unsigned long end) | ||
401 | { | 365 | { |
402 | PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va); | 366 | PVOP_VCALL4(pv_mmu_ops.flush_tlb_others, cpumask, mm, start, end); |
403 | } | 367 | } |
404 | 368 | ||
405 | static inline int paravirt_pgd_alloc(struct mm_struct *mm) | 369 | static inline int paravirt_pgd_alloc(struct mm_struct *mm) |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 8e8b9a4987ee..142236ed83af 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -153,9 +153,7 @@ struct pv_cpu_ops { | |||
153 | /* MSR, PMC and TSR operations. | 153 | /* MSR, PMC and TSR operations. |
154 | err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ | 154 | err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ |
155 | u64 (*read_msr)(unsigned int msr, int *err); | 155 | u64 (*read_msr)(unsigned int msr, int *err); |
156 | int (*rdmsr_regs)(u32 *regs); | ||
157 | int (*write_msr)(unsigned int msr, unsigned low, unsigned high); | 156 | int (*write_msr)(unsigned int msr, unsigned low, unsigned high); |
158 | int (*wrmsr_regs)(u32 *regs); | ||
159 | 157 | ||
160 | u64 (*read_tsc)(void); | 158 | u64 (*read_tsc)(void); |
161 | u64 (*read_pmc)(int counter); | 159 | u64 (*read_pmc)(int counter); |
@@ -250,7 +248,8 @@ struct pv_mmu_ops { | |||
250 | void (*flush_tlb_single)(unsigned long addr); | 248 | void (*flush_tlb_single)(unsigned long addr); |
251 | void (*flush_tlb_others)(const struct cpumask *cpus, | 249 | void (*flush_tlb_others)(const struct cpumask *cpus, |
252 | struct mm_struct *mm, | 250 | struct mm_struct *mm, |
253 | unsigned long va); | 251 | unsigned long start, |
252 | unsigned long end); | ||
254 | 253 | ||
255 | /* Hooks for allocating and freeing a pagetable top-level */ | 254 | /* Hooks for allocating and freeing a pagetable top-level */ |
256 | int (*pgd_alloc)(struct mm_struct *mm); | 255 | int (*pgd_alloc)(struct mm_struct *mm); |
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index b3a531746026..73e8eeff22ee 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h | |||
@@ -7,9 +7,13 @@ | |||
7 | #undef DEBUG | 7 | #undef DEBUG |
8 | 8 | ||
9 | #ifdef DEBUG | 9 | #ifdef DEBUG |
10 | #define DBG(x...) printk(x) | 10 | #define DBG(fmt, ...) printk(fmt, ##__VA_ARGS__) |
11 | #else | 11 | #else |
12 | #define DBG(x...) | 12 | #define DBG(fmt, ...) \ |
13 | do { \ | ||
14 | if (0) \ | ||
15 | printk(fmt, ##__VA_ARGS__); \ | ||
16 | } while (0) | ||
13 | #endif | 17 | #endif |
14 | 18 | ||
15 | #define PCI_PROBE_BIOS 0x0001 | 19 | #define PCI_PROBE_BIOS 0x0001 |
@@ -100,6 +104,7 @@ struct pci_raw_ops { | |||
100 | extern const struct pci_raw_ops *raw_pci_ops; | 104 | extern const struct pci_raw_ops *raw_pci_ops; |
101 | extern const struct pci_raw_ops *raw_pci_ext_ops; | 105 | extern const struct pci_raw_ops *raw_pci_ext_ops; |
102 | 106 | ||
107 | extern const struct pci_raw_ops pci_mmcfg; | ||
103 | extern const struct pci_raw_ops pci_direct_conf1; | 108 | extern const struct pci_raw_ops pci_direct_conf1; |
104 | extern bool port_cf9_safe; | 109 | extern bool port_cf9_safe; |
105 | 110 | ||
@@ -135,6 +140,12 @@ struct pci_mmcfg_region { | |||
135 | 140 | ||
136 | extern int __init pci_mmcfg_arch_init(void); | 141 | extern int __init pci_mmcfg_arch_init(void); |
137 | extern void __init pci_mmcfg_arch_free(void); | 142 | extern void __init pci_mmcfg_arch_free(void); |
143 | extern int __devinit pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg); | ||
144 | extern void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg); | ||
145 | extern int __devinit pci_mmconfig_insert(struct device *dev, | ||
146 | u16 seg, u8 start, | ||
147 | u8 end, phys_addr_t addr); | ||
148 | extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end); | ||
138 | extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus); | 149 | extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus); |
139 | 150 | ||
140 | extern struct list_head pci_mmcfg_list; | 151 | extern struct list_head pci_mmcfg_list; |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index d9b8e3f7f42a..1104afaba52b 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -551,6 +551,12 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off); | |||
551 | { [0 ... NR_CPUS-1] = _initvalue }; \ | 551 | { [0 ... NR_CPUS-1] = _initvalue }; \ |
552 | __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map | 552 | __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map |
553 | 553 | ||
554 | #define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \ | ||
555 | DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue; \ | ||
556 | __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \ | ||
557 | { [0 ... NR_CPUS-1] = _initvalue }; \ | ||
558 | __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map | ||
559 | |||
554 | #define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ | 560 | #define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ |
555 | EXPORT_PER_CPU_SYMBOL(_name) | 561 | EXPORT_PER_CPU_SYMBOL(_name) |
556 | 562 | ||
@@ -559,6 +565,11 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off); | |||
559 | extern __typeof__(_type) *_name##_early_ptr; \ | 565 | extern __typeof__(_type) *_name##_early_ptr; \ |
560 | extern __typeof__(_type) _name##_early_map[] | 566 | extern __typeof__(_type) _name##_early_map[] |
561 | 567 | ||
568 | #define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \ | ||
569 | DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \ | ||
570 | extern __typeof__(_type) *_name##_early_ptr; \ | ||
571 | extern __typeof__(_type) _name##_early_map[] | ||
572 | |||
562 | #define early_per_cpu_ptr(_name) (_name##_early_ptr) | 573 | #define early_per_cpu_ptr(_name) (_name##_early_ptr) |
563 | #define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) | 574 | #define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) |
564 | #define early_per_cpu(_name, _cpu) \ | 575 | #define early_per_cpu(_name, _cpu) \ |
@@ -570,12 +581,18 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off); | |||
570 | #define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ | 581 | #define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ |
571 | DEFINE_PER_CPU(_type, _name) = _initvalue | 582 | DEFINE_PER_CPU(_type, _name) = _initvalue |
572 | 583 | ||
584 | #define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \ | ||
585 | DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue | ||
586 | |||
573 | #define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ | 587 | #define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ |
574 | EXPORT_PER_CPU_SYMBOL(_name) | 588 | EXPORT_PER_CPU_SYMBOL(_name) |
575 | 589 | ||
576 | #define DECLARE_EARLY_PER_CPU(_type, _name) \ | 590 | #define DECLARE_EARLY_PER_CPU(_type, _name) \ |
577 | DECLARE_PER_CPU(_type, _name) | 591 | DECLARE_PER_CPU(_type, _name) |
578 | 592 | ||
593 | #define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \ | ||
594 | DECLARE_PER_CPU_READ_MOSTLY(_type, _name) | ||
595 | |||
579 | #define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu) | 596 | #define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu) |
580 | #define early_per_cpu_ptr(_name) NULL | 597 | #define early_per_cpu_ptr(_name) NULL |
581 | /* no early_per_cpu_map() */ | 598 | /* no early_per_cpu_map() */ |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 588f52ea810e..dab39350e51e 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -5,11 +5,10 @@ | |||
5 | * Performance event hw details: | 5 | * Performance event hw details: |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #define X86_PMC_MAX_GENERIC 32 | 8 | #define INTEL_PMC_MAX_GENERIC 32 |
9 | #define X86_PMC_MAX_FIXED 3 | 9 | #define INTEL_PMC_MAX_FIXED 3 |
10 | #define INTEL_PMC_IDX_FIXED 32 | ||
10 | 11 | ||
11 | #define X86_PMC_IDX_GENERIC 0 | ||
12 | #define X86_PMC_IDX_FIXED 32 | ||
13 | #define X86_PMC_IDX_MAX 64 | 12 | #define X86_PMC_IDX_MAX 64 |
14 | 13 | ||
15 | #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 | 14 | #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 |
@@ -48,8 +47,7 @@ | |||
48 | (X86_RAW_EVENT_MASK | \ | 47 | (X86_RAW_EVENT_MASK | \ |
49 | AMD64_EVENTSEL_EVENT) | 48 | AMD64_EVENTSEL_EVENT) |
50 | #define AMD64_NUM_COUNTERS 4 | 49 | #define AMD64_NUM_COUNTERS 4 |
51 | #define AMD64_NUM_COUNTERS_F15H 6 | 50 | #define AMD64_NUM_COUNTERS_CORE 6 |
52 | #define AMD64_NUM_COUNTERS_MAX AMD64_NUM_COUNTERS_F15H | ||
53 | 51 | ||
54 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c | 52 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c |
55 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | 53 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) |
@@ -121,16 +119,16 @@ struct x86_pmu_capability { | |||
121 | 119 | ||
122 | /* Instr_Retired.Any: */ | 120 | /* Instr_Retired.Any: */ |
123 | #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 | 121 | #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 |
124 | #define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) | 122 | #define INTEL_PMC_IDX_FIXED_INSTRUCTIONS (INTEL_PMC_IDX_FIXED + 0) |
125 | 123 | ||
126 | /* CPU_CLK_Unhalted.Core: */ | 124 | /* CPU_CLK_Unhalted.Core: */ |
127 | #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a | 125 | #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a |
128 | #define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) | 126 | #define INTEL_PMC_IDX_FIXED_CPU_CYCLES (INTEL_PMC_IDX_FIXED + 1) |
129 | 127 | ||
130 | /* CPU_CLK_Unhalted.Ref: */ | 128 | /* CPU_CLK_Unhalted.Ref: */ |
131 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b | 129 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b |
132 | #define X86_PMC_IDX_FIXED_REF_CYCLES (X86_PMC_IDX_FIXED + 2) | 130 | #define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2) |
133 | #define X86_PMC_MSK_FIXED_REF_CYCLES (1ULL << X86_PMC_IDX_FIXED_REF_CYCLES) | 131 | #define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES) |
134 | 132 | ||
135 | /* | 133 | /* |
136 | * We model BTS tracing as another fixed-mode PMC. | 134 | * We model BTS tracing as another fixed-mode PMC. |
@@ -139,7 +137,7 @@ struct x86_pmu_capability { | |||
139 | * values are used by actual fixed events and higher values are used | 137 | * values are used by actual fixed events and higher values are used |
140 | * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. | 138 | * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. |
141 | */ | 139 | */ |
142 | #define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) | 140 | #define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16) |
143 | 141 | ||
144 | /* | 142 | /* |
145 | * IBS cpuid feature detection | 143 | * IBS cpuid feature detection |
@@ -234,8 +232,9 @@ struct perf_guest_switch_msr { | |||
234 | 232 | ||
235 | extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); | 233 | extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); |
236 | extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); | 234 | extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); |
235 | extern void perf_check_microcode(void); | ||
237 | #else | 236 | #else |
238 | static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) | 237 | static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) |
239 | { | 238 | { |
240 | *nr = 0; | 239 | *nr = 0; |
241 | return NULL; | 240 | return NULL; |
@@ -247,6 +246,7 @@ static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) | |||
247 | } | 246 | } |
248 | 247 | ||
249 | static inline void perf_events_lapic_init(void) { } | 248 | static inline void perf_events_lapic_init(void) { } |
249 | static inline void perf_check_microcode(void) { } | ||
250 | #endif | 250 | #endif |
251 | 251 | ||
252 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) | 252 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) |
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 98391db840c6..f2b489cf1602 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h | |||
@@ -2,9 +2,9 @@ | |||
2 | #define _ASM_X86_PGTABLE_2LEVEL_H | 2 | #define _ASM_X86_PGTABLE_2LEVEL_H |
3 | 3 | ||
4 | #define pte_ERROR(e) \ | 4 | #define pte_ERROR(e) \ |
5 | printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) | 5 | pr_err("%s:%d: bad pte %08lx\n", __FILE__, __LINE__, (e).pte_low) |
6 | #define pgd_ERROR(e) \ | 6 | #define pgd_ERROR(e) \ |
7 | printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) | 7 | pr_err("%s:%d: bad pgd %08lx\n", __FILE__, __LINE__, pgd_val(e)) |
8 | 8 | ||
9 | /* | 9 | /* |
10 | * Certain architectures need to do special things when PTEs | 10 | * Certain architectures need to do special things when PTEs |
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index cb00ccc7d571..4cc9f2b7cdc3 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h | |||
@@ -9,13 +9,13 @@ | |||
9 | */ | 9 | */ |
10 | 10 | ||
11 | #define pte_ERROR(e) \ | 11 | #define pte_ERROR(e) \ |
12 | printk("%s:%d: bad pte %p(%08lx%08lx).\n", \ | 12 | pr_err("%s:%d: bad pte %p(%08lx%08lx)\n", \ |
13 | __FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low) | 13 | __FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low) |
14 | #define pmd_ERROR(e) \ | 14 | #define pmd_ERROR(e) \ |
15 | printk("%s:%d: bad pmd %p(%016Lx).\n", \ | 15 | pr_err("%s:%d: bad pmd %p(%016Lx)\n", \ |
16 | __FILE__, __LINE__, &(e), pmd_val(e)) | 16 | __FILE__, __LINE__, &(e), pmd_val(e)) |
17 | #define pgd_ERROR(e) \ | 17 | #define pgd_ERROR(e) \ |
18 | printk("%s:%d: bad pgd %p(%016Lx).\n", \ | 18 | pr_err("%s:%d: bad pgd %p(%016Lx)\n", \ |
19 | __FILE__, __LINE__, &(e), pgd_val(e)) | 19 | __FILE__, __LINE__, &(e), pgd_val(e)) |
20 | 20 | ||
21 | /* Rules for using set_pte: the pte being assigned *must* be | 21 | /* Rules for using set_pte: the pte being assigned *must* be |
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 975f709e09ae..8251be02301e 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -26,16 +26,16 @@ extern pgd_t init_level4_pgt[]; | |||
26 | extern void paging_init(void); | 26 | extern void paging_init(void); |
27 | 27 | ||
28 | #define pte_ERROR(e) \ | 28 | #define pte_ERROR(e) \ |
29 | printk("%s:%d: bad pte %p(%016lx).\n", \ | 29 | pr_err("%s:%d: bad pte %p(%016lx)\n", \ |
30 | __FILE__, __LINE__, &(e), pte_val(e)) | 30 | __FILE__, __LINE__, &(e), pte_val(e)) |
31 | #define pmd_ERROR(e) \ | 31 | #define pmd_ERROR(e) \ |
32 | printk("%s:%d: bad pmd %p(%016lx).\n", \ | 32 | pr_err("%s:%d: bad pmd %p(%016lx)\n", \ |
33 | __FILE__, __LINE__, &(e), pmd_val(e)) | 33 | __FILE__, __LINE__, &(e), pmd_val(e)) |
34 | #define pud_ERROR(e) \ | 34 | #define pud_ERROR(e) \ |
35 | printk("%s:%d: bad pud %p(%016lx).\n", \ | 35 | pr_err("%s:%d: bad pud %p(%016lx)\n", \ |
36 | __FILE__, __LINE__, &(e), pud_val(e)) | 36 | __FILE__, __LINE__, &(e), pud_val(e)) |
37 | #define pgd_ERROR(e) \ | 37 | #define pgd_ERROR(e) \ |
38 | printk("%s:%d: bad pgd %p(%016lx).\n", \ | 38 | pr_err("%s:%d: bad pgd %p(%016lx)\n", \ |
39 | __FILE__, __LINE__, &(e), pgd_val(e)) | 39 | __FILE__, __LINE__, &(e), pgd_val(e)) |
40 | 40 | ||
41 | struct mm_struct; | 41 | struct mm_struct; |
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index f8ab3eaad128..aea1d1d848c7 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h | |||
@@ -44,6 +44,7 @@ | |||
44 | */ | 44 | */ |
45 | #define X86_CR3_PWT 0x00000008 /* Page Write Through */ | 45 | #define X86_CR3_PWT 0x00000008 /* Page Write Through */ |
46 | #define X86_CR3_PCD 0x00000010 /* Page Cache Disable */ | 46 | #define X86_CR3_PCD 0x00000010 /* Page Cache Disable */ |
47 | #define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */ | ||
47 | 48 | ||
48 | /* | 49 | /* |
49 | * Intel CPU features in CR4 | 50 | * Intel CPU features in CR4 |
@@ -61,6 +62,7 @@ | |||
61 | #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ | 62 | #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ |
62 | #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ | 63 | #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ |
63 | #define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */ | 64 | #define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */ |
65 | #define X86_CR4_PCIDE 0x00020000 /* enable PCID support */ | ||
64 | #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ | 66 | #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ |
65 | #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ | 67 | #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ |
66 | 68 | ||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 39bc5777211a..d048cad9bcad 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -61,6 +61,19 @@ static inline void *current_text_addr(void) | |||
61 | # define ARCH_MIN_MMSTRUCT_ALIGN 0 | 61 | # define ARCH_MIN_MMSTRUCT_ALIGN 0 |
62 | #endif | 62 | #endif |
63 | 63 | ||
64 | enum tlb_infos { | ||
65 | ENTRIES, | ||
66 | NR_INFO | ||
67 | }; | ||
68 | |||
69 | extern u16 __read_mostly tlb_lli_4k[NR_INFO]; | ||
70 | extern u16 __read_mostly tlb_lli_2m[NR_INFO]; | ||
71 | extern u16 __read_mostly tlb_lli_4m[NR_INFO]; | ||
72 | extern u16 __read_mostly tlb_lld_4k[NR_INFO]; | ||
73 | extern u16 __read_mostly tlb_lld_2m[NR_INFO]; | ||
74 | extern u16 __read_mostly tlb_lld_4m[NR_INFO]; | ||
75 | extern s8 __read_mostly tlb_flushall_shift; | ||
76 | |||
64 | /* | 77 | /* |
65 | * CPU type and hardware bug flags. Kept separately for each CPU. | 78 | * CPU type and hardware bug flags. Kept separately for each CPU. |
66 | * Members of this structure are referenced in head.S, so think twice | 79 | * Members of this structure are referenced in head.S, so think twice |
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index fce3f4ae5bd6..fe1ec5bcd846 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h | |||
@@ -21,8 +21,9 @@ struct real_mode_header { | |||
21 | u32 wakeup_header; | 21 | u32 wakeup_header; |
22 | #endif | 22 | #endif |
23 | /* APM/BIOS reboot */ | 23 | /* APM/BIOS reboot */ |
24 | #ifdef CONFIG_X86_32 | ||
25 | u32 machine_real_restart_asm; | 24 | u32 machine_real_restart_asm; |
25 | #ifdef CONFIG_X86_64 | ||
26 | u32 machine_real_restart_seg; | ||
26 | #endif | 27 | #endif |
27 | }; | 28 | }; |
28 | 29 | ||
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 92f297069e87..a82c4f1b4d83 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h | |||
@@ -18,8 +18,8 @@ extern struct machine_ops machine_ops; | |||
18 | 18 | ||
19 | void native_machine_crash_shutdown(struct pt_regs *regs); | 19 | void native_machine_crash_shutdown(struct pt_regs *regs); |
20 | void native_machine_shutdown(void); | 20 | void native_machine_shutdown(void); |
21 | void machine_real_restart(unsigned int type); | 21 | void __noreturn machine_real_restart(unsigned int type); |
22 | /* These must match dispatch_table in reboot_32.S */ | 22 | /* These must match dispatch in arch/x86/realmore/rm/reboot.S */ |
23 | #define MRR_BIOS 0 | 23 | #define MRR_BIOS 0 |
24 | #define MRR_APM 1 | 24 | #define MRR_APM 1 |
25 | 25 | ||
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index f48394513c37..4f19a1526037 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -31,12 +31,12 @@ static inline bool cpu_has_ht_siblings(void) | |||
31 | return has_siblings; | 31 | return has_siblings; |
32 | } | 32 | } |
33 | 33 | ||
34 | DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); | 34 | DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); |
35 | DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); | 35 | DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); |
36 | /* cpus sharing the last level cache: */ | 36 | /* cpus sharing the last level cache: */ |
37 | DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); | 37 | DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); |
38 | DECLARE_PER_CPU(u16, cpu_llc_id); | 38 | DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); |
39 | DECLARE_PER_CPU(int, cpu_number); | 39 | DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); |
40 | 40 | ||
41 | static inline struct cpumask *cpu_sibling_mask(int cpu) | 41 | static inline struct cpumask *cpu_sibling_mask(int cpu) |
42 | { | 42 | { |
@@ -53,10 +53,10 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu) | |||
53 | return per_cpu(cpu_llc_shared_map, cpu); | 53 | return per_cpu(cpu_llc_shared_map, cpu); |
54 | } | 54 | } |
55 | 55 | ||
56 | DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); | 56 | DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); |
57 | DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); | 57 | DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); |
58 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) | 58 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) |
59 | DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid); | 59 | DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid); |
60 | #endif | 60 | #endif |
61 | 61 | ||
62 | /* Static state in head.S used to set up a CPU */ | 62 | /* Static state in head.S used to set up a CPU */ |
@@ -169,11 +169,6 @@ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); | |||
169 | void smp_store_cpu_info(int id); | 169 | void smp_store_cpu_info(int id); |
170 | #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) | 170 | #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) |
171 | 171 | ||
172 | /* We don't mark CPUs online until __cpu_up(), so we need another measure */ | ||
173 | static inline int num_booting_cpus(void) | ||
174 | { | ||
175 | return cpumask_weight(cpu_callout_mask); | ||
176 | } | ||
177 | #else /* !CONFIG_SMP */ | 172 | #else /* !CONFIG_SMP */ |
178 | #define wbinvd_on_cpu(cpu) wbinvd() | 173 | #define wbinvd_on_cpu(cpu) wbinvd() |
179 | static inline int wbinvd_on_all_cpus(void) | 174 | static inline int wbinvd_on_all_cpus(void) |
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 829215fef9ee..4fef20773b8f 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h | |||
@@ -4,7 +4,14 @@ | |||
4 | #define tlb_start_vma(tlb, vma) do { } while (0) | 4 | #define tlb_start_vma(tlb, vma) do { } while (0) |
5 | #define tlb_end_vma(tlb, vma) do { } while (0) | 5 | #define tlb_end_vma(tlb, vma) do { } while (0) |
6 | #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) | 6 | #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) |
7 | #define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) | 7 | |
8 | #define tlb_flush(tlb) \ | ||
9 | { \ | ||
10 | if (tlb->fullmm == 0) \ | ||
11 | flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, 0UL); \ | ||
12 | else \ | ||
13 | flush_tlb_mm_range(tlb->mm, 0UL, TLB_FLUSH_ALL, 0UL); \ | ||
14 | } | ||
8 | 15 | ||
9 | #include <asm-generic/tlb.h> | 16 | #include <asm-generic/tlb.h> |
10 | 17 | ||
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 36a1a2ab87d2..74a44333545a 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h | |||
@@ -73,14 +73,10 @@ static inline void __flush_tlb_one(unsigned long addr) | |||
73 | * - flush_tlb_page(vma, vmaddr) flushes one page | 73 | * - flush_tlb_page(vma, vmaddr) flushes one page |
74 | * - flush_tlb_range(vma, start, end) flushes a range of pages | 74 | * - flush_tlb_range(vma, start, end) flushes a range of pages |
75 | * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages | 75 | * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages |
76 | * - flush_tlb_others(cpumask, mm, va) flushes TLBs on other cpus | 76 | * - flush_tlb_others(cpumask, mm, start, end) flushes TLBs on other cpus |
77 | * | 77 | * |
78 | * ..but the i386 has somewhat limited tlb flushing capabilities, | 78 | * ..but the i386 has somewhat limited tlb flushing capabilities, |
79 | * and page-granular flushes are available only on i486 and up. | 79 | * and page-granular flushes are available only on i486 and up. |
80 | * | ||
81 | * x86-64 can only flush individual pages or full VMs. For a range flush | ||
82 | * we always do the full VM. Might be worth trying if for a small | ||
83 | * range a few INVLPGs in a row are a win. | ||
84 | */ | 80 | */ |
85 | 81 | ||
86 | #ifndef CONFIG_SMP | 82 | #ifndef CONFIG_SMP |
@@ -109,9 +105,17 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, | |||
109 | __flush_tlb(); | 105 | __flush_tlb(); |
110 | } | 106 | } |
111 | 107 | ||
108 | static inline void flush_tlb_mm_range(struct mm_struct *mm, | ||
109 | unsigned long start, unsigned long end, unsigned long vmflag) | ||
110 | { | ||
111 | if (mm == current->active_mm) | ||
112 | __flush_tlb(); | ||
113 | } | ||
114 | |||
112 | static inline void native_flush_tlb_others(const struct cpumask *cpumask, | 115 | static inline void native_flush_tlb_others(const struct cpumask *cpumask, |
113 | struct mm_struct *mm, | 116 | struct mm_struct *mm, |
114 | unsigned long va) | 117 | unsigned long start, |
118 | unsigned long end) | ||
115 | { | 119 | { |
116 | } | 120 | } |
117 | 121 | ||
@@ -119,27 +123,35 @@ static inline void reset_lazy_tlbstate(void) | |||
119 | { | 123 | { |
120 | } | 124 | } |
121 | 125 | ||
126 | static inline void flush_tlb_kernel_range(unsigned long start, | ||
127 | unsigned long end) | ||
128 | { | ||
129 | flush_tlb_all(); | ||
130 | } | ||
131 | |||
122 | #else /* SMP */ | 132 | #else /* SMP */ |
123 | 133 | ||
124 | #include <asm/smp.h> | 134 | #include <asm/smp.h> |
125 | 135 | ||
126 | #define local_flush_tlb() __flush_tlb() | 136 | #define local_flush_tlb() __flush_tlb() |
127 | 137 | ||
138 | #define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL) | ||
139 | |||
140 | #define flush_tlb_range(vma, start, end) \ | ||
141 | flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags) | ||
142 | |||
128 | extern void flush_tlb_all(void); | 143 | extern void flush_tlb_all(void); |
129 | extern void flush_tlb_current_task(void); | 144 | extern void flush_tlb_current_task(void); |
130 | extern void flush_tlb_mm(struct mm_struct *); | ||
131 | extern void flush_tlb_page(struct vm_area_struct *, unsigned long); | 145 | extern void flush_tlb_page(struct vm_area_struct *, unsigned long); |
146 | extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | ||
147 | unsigned long end, unsigned long vmflag); | ||
148 | extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); | ||
132 | 149 | ||
133 | #define flush_tlb() flush_tlb_current_task() | 150 | #define flush_tlb() flush_tlb_current_task() |
134 | 151 | ||
135 | static inline void flush_tlb_range(struct vm_area_struct *vma, | ||
136 | unsigned long start, unsigned long end) | ||
137 | { | ||
138 | flush_tlb_mm(vma->vm_mm); | ||
139 | } | ||
140 | |||
141 | void native_flush_tlb_others(const struct cpumask *cpumask, | 152 | void native_flush_tlb_others(const struct cpumask *cpumask, |
142 | struct mm_struct *mm, unsigned long va); | 153 | struct mm_struct *mm, |
154 | unsigned long start, unsigned long end); | ||
143 | 155 | ||
144 | #define TLBSTATE_OK 1 | 156 | #define TLBSTATE_OK 1 |
145 | #define TLBSTATE_LAZY 2 | 157 | #define TLBSTATE_LAZY 2 |
@@ -159,13 +171,8 @@ static inline void reset_lazy_tlbstate(void) | |||
159 | #endif /* SMP */ | 171 | #endif /* SMP */ |
160 | 172 | ||
161 | #ifndef CONFIG_PARAVIRT | 173 | #ifndef CONFIG_PARAVIRT |
162 | #define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va) | 174 | #define flush_tlb_others(mask, mm, start, end) \ |
175 | native_flush_tlb_others(mask, mm, start, end) | ||
163 | #endif | 176 | #endif |
164 | 177 | ||
165 | static inline void flush_tlb_kernel_range(unsigned long start, | ||
166 | unsigned long end) | ||
167 | { | ||
168 | flush_tlb_all(); | ||
169 | } | ||
170 | |||
171 | #endif /* _ASM_X86_TLBFLUSH_H */ | 178 | #endif /* _ASM_X86_TLBFLUSH_H */ |
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 8e796fbbf9c6..d8def8b3dba0 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -17,6 +17,8 @@ | |||
17 | 17 | ||
18 | /* Handles exceptions in both to and from, but doesn't do access_ok */ | 18 | /* Handles exceptions in both to and from, but doesn't do access_ok */ |
19 | __must_check unsigned long | 19 | __must_check unsigned long |
20 | copy_user_enhanced_fast_string(void *to, const void *from, unsigned len); | ||
21 | __must_check unsigned long | ||
20 | copy_user_generic_string(void *to, const void *from, unsigned len); | 22 | copy_user_generic_string(void *to, const void *from, unsigned len); |
21 | __must_check unsigned long | 23 | __must_check unsigned long |
22 | copy_user_generic_unrolled(void *to, const void *from, unsigned len); | 24 | copy_user_generic_unrolled(void *to, const void *from, unsigned len); |
@@ -26,9 +28,16 @@ copy_user_generic(void *to, const void *from, unsigned len) | |||
26 | { | 28 | { |
27 | unsigned ret; | 29 | unsigned ret; |
28 | 30 | ||
29 | alternative_call(copy_user_generic_unrolled, | 31 | /* |
32 | * If CPU has ERMS feature, use copy_user_enhanced_fast_string. | ||
33 | * Otherwise, if CPU has rep_good feature, use copy_user_generic_string. | ||
34 | * Otherwise, use copy_user_generic_unrolled. | ||
35 | */ | ||
36 | alternative_call_2(copy_user_generic_unrolled, | ||
30 | copy_user_generic_string, | 37 | copy_user_generic_string, |
31 | X86_FEATURE_REP_GOOD, | 38 | X86_FEATURE_REP_GOOD, |
39 | copy_user_enhanced_fast_string, | ||
40 | X86_FEATURE_ERMS, | ||
32 | ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from), | 41 | ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from), |
33 | "=d" (len)), | 42 | "=d" (len)), |
34 | "1" (to), "2" (from), "3" (len) | 43 | "1" (to), "2" (from), "3" (len) |
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 4437001d8e3d..0d9776e9e2dc 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h | |||
@@ -15,7 +15,6 @@ | |||
15 | # ifdef CONFIG_X86_32 | 15 | # ifdef CONFIG_X86_32 |
16 | 16 | ||
17 | # include <asm/unistd_32.h> | 17 | # include <asm/unistd_32.h> |
18 | # define __ARCH_WANT_IPC_PARSE_VERSION | ||
19 | # define __ARCH_WANT_STAT64 | 18 | # define __ARCH_WANT_STAT64 |
20 | # define __ARCH_WANT_SYS_IPC | 19 | # define __ARCH_WANT_SYS_IPC |
21 | # define __ARCH_WANT_SYS_OLD_MMAP | 20 | # define __ARCH_WANT_SYS_OLD_MMAP |
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 1e9bed14f7ae..f3971bbcd1de 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h | |||
@@ -48,7 +48,7 @@ struct arch_uprobe_task { | |||
48 | #endif | 48 | #endif |
49 | }; | 49 | }; |
50 | 50 | ||
51 | extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm); | 51 | extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); |
52 | extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); | 52 | extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); |
53 | extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); | 53 | extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); |
54 | extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); | 54 | extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); |
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 3bb9491b7659..b47c2a82ff15 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h | |||
@@ -15,7 +15,8 @@ extern void uv_nmi_init(void); | |||
15 | extern void uv_system_init(void); | 15 | extern void uv_system_init(void); |
16 | extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | 16 | extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, |
17 | struct mm_struct *mm, | 17 | struct mm_struct *mm, |
18 | unsigned long va, | 18 | unsigned long start, |
19 | unsigned end, | ||
19 | unsigned int cpu); | 20 | unsigned int cpu); |
20 | 21 | ||
21 | #else /* X86_UV */ | 22 | #else /* X86_UV */ |
@@ -26,7 +27,7 @@ static inline void uv_cpu_init(void) { } | |||
26 | static inline void uv_system_init(void) { } | 27 | static inline void uv_system_init(void) { } |
27 | static inline const struct cpumask * | 28 | static inline const struct cpumask * |
28 | uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, | 29 | uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, |
29 | unsigned long va, unsigned int cpu) | 30 | unsigned long start, unsigned long end, unsigned int cpu) |
30 | { return cpumask; } | 31 | { return cpumask; } |
31 | 32 | ||
32 | #endif /* X86_UV */ | 33 | #endif /* X86_UV */ |
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 6149b476d9df..a06983cdc125 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -140,6 +140,9 @@ | |||
140 | #define IPI_RESET_LIMIT 1 | 140 | #define IPI_RESET_LIMIT 1 |
141 | /* after this # consecutive successes, bump up the throttle if it was lowered */ | 141 | /* after this # consecutive successes, bump up the throttle if it was lowered */ |
142 | #define COMPLETE_THRESHOLD 5 | 142 | #define COMPLETE_THRESHOLD 5 |
143 | /* after this # of giveups (fall back to kernel IPI's) disable the use of | ||
144 | the BAU for a period of time */ | ||
145 | #define GIVEUP_LIMIT 100 | ||
143 | 146 | ||
144 | #define UV_LB_SUBNODEID 0x10 | 147 | #define UV_LB_SUBNODEID 0x10 |
145 | 148 | ||
@@ -166,7 +169,6 @@ | |||
166 | #define FLUSH_RETRY_TIMEOUT 2 | 169 | #define FLUSH_RETRY_TIMEOUT 2 |
167 | #define FLUSH_GIVEUP 3 | 170 | #define FLUSH_GIVEUP 3 |
168 | #define FLUSH_COMPLETE 4 | 171 | #define FLUSH_COMPLETE 4 |
169 | #define FLUSH_RETRY_BUSYBUG 5 | ||
170 | 172 | ||
171 | /* | 173 | /* |
172 | * tuning the action when the numalink network is extremely delayed | 174 | * tuning the action when the numalink network is extremely delayed |
@@ -175,7 +177,7 @@ | |||
175 | microseconds */ | 177 | microseconds */ |
176 | #define CONGESTED_REPS 10 /* long delays averaged over | 178 | #define CONGESTED_REPS 10 /* long delays averaged over |
177 | this many broadcasts */ | 179 | this many broadcasts */ |
178 | #define CONGESTED_PERIOD 30 /* time for the bau to be | 180 | #define DISABLED_PERIOD 10 /* time for the bau to be |
179 | disabled, in seconds */ | 181 | disabled, in seconds */ |
180 | /* see msg_type: */ | 182 | /* see msg_type: */ |
181 | #define MSG_NOOP 0 | 183 | #define MSG_NOOP 0 |
@@ -520,6 +522,12 @@ struct ptc_stats { | |||
520 | unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */ | 522 | unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */ |
521 | unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */ | 523 | unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */ |
522 | unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */ | 524 | unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */ |
525 | unsigned long s_overipilimit; /* over the ipi reset limit */ | ||
526 | unsigned long s_giveuplimit; /* disables, over giveup limit*/ | ||
527 | unsigned long s_enters; /* entries to the driver */ | ||
528 | unsigned long s_ipifordisabled; /* fall back to IPI; disabled */ | ||
529 | unsigned long s_plugged; /* plugged by h/w bug*/ | ||
530 | unsigned long s_congested; /* giveup on long wait */ | ||
523 | /* destination statistics */ | 531 | /* destination statistics */ |
524 | unsigned long d_alltlb; /* times all tlb's on this | 532 | unsigned long d_alltlb; /* times all tlb's on this |
525 | cpu were flushed */ | 533 | cpu were flushed */ |
@@ -586,8 +594,8 @@ struct bau_control { | |||
586 | int timeout_tries; | 594 | int timeout_tries; |
587 | int ipi_attempts; | 595 | int ipi_attempts; |
588 | int conseccompletes; | 596 | int conseccompletes; |
589 | int baudisabled; | 597 | short nobau; |
590 | int set_bau_off; | 598 | short baudisabled; |
591 | short cpu; | 599 | short cpu; |
592 | short osnode; | 600 | short osnode; |
593 | short uvhub_cpu; | 601 | short uvhub_cpu; |
@@ -596,14 +604,16 @@ struct bau_control { | |||
596 | short cpus_in_socket; | 604 | short cpus_in_socket; |
597 | short cpus_in_uvhub; | 605 | short cpus_in_uvhub; |
598 | short partition_base_pnode; | 606 | short partition_base_pnode; |
599 | short using_desc; /* an index, like uvhub_cpu */ | 607 | short busy; /* all were busy (war) */ |
600 | unsigned int inuse_map; | ||
601 | unsigned short message_number; | 608 | unsigned short message_number; |
602 | unsigned short uvhub_quiesce; | 609 | unsigned short uvhub_quiesce; |
603 | short socket_acknowledge_count[DEST_Q_SIZE]; | 610 | short socket_acknowledge_count[DEST_Q_SIZE]; |
604 | cycles_t send_message; | 611 | cycles_t send_message; |
612 | cycles_t period_end; | ||
613 | cycles_t period_time; | ||
605 | spinlock_t uvhub_lock; | 614 | spinlock_t uvhub_lock; |
606 | spinlock_t queue_lock; | 615 | spinlock_t queue_lock; |
616 | spinlock_t disable_lock; | ||
607 | /* tunables */ | 617 | /* tunables */ |
608 | int max_concurr; | 618 | int max_concurr; |
609 | int max_concurr_const; | 619 | int max_concurr_const; |
@@ -614,9 +624,9 @@ struct bau_control { | |||
614 | int complete_threshold; | 624 | int complete_threshold; |
615 | int cong_response_us; | 625 | int cong_response_us; |
616 | int cong_reps; | 626 | int cong_reps; |
617 | int cong_period; | 627 | cycles_t disabled_period; |
618 | unsigned long clocks_per_100_usec; | 628 | int period_giveups; |
619 | cycles_t period_time; | 629 | int giveup_limit; |
620 | long period_requests; | 630 | long period_requests; |
621 | struct hub_and_pnode *thp; | 631 | struct hub_and_pnode *thp; |
622 | }; | 632 | }; |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 31f180c21ce9..74fcb963595b 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -60,6 +60,7 @@ | |||
60 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 | 60 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 |
61 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 | 61 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 |
62 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 | 62 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 |
63 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 | ||
63 | 64 | ||
64 | 65 | ||
65 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 | 66 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 |
@@ -281,6 +282,7 @@ enum vmcs_field { | |||
281 | #define EXIT_REASON_EPT_MISCONFIG 49 | 282 | #define EXIT_REASON_EPT_MISCONFIG 49 |
282 | #define EXIT_REASON_WBINVD 54 | 283 | #define EXIT_REASON_WBINVD 54 |
283 | #define EXIT_REASON_XSETBV 55 | 284 | #define EXIT_REASON_XSETBV 55 |
285 | #define EXIT_REASON_INVPCID 58 | ||
284 | 286 | ||
285 | /* | 287 | /* |
286 | * Interruption-information format | 288 | * Interruption-information format |
@@ -404,6 +406,7 @@ enum vmcs_field { | |||
404 | #define VMX_EPTP_WB_BIT (1ull << 14) | 406 | #define VMX_EPTP_WB_BIT (1ull << 14) |
405 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) | 407 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) |
406 | #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) | 408 | #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) |
409 | #define VMX_EPT_AD_BIT (1ull << 21) | ||
407 | #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) | 410 | #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) |
408 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) | 411 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) |
409 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) | 412 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) |
@@ -415,11 +418,14 @@ enum vmcs_field { | |||
415 | #define VMX_EPT_MAX_GAW 0x4 | 418 | #define VMX_EPT_MAX_GAW 0x4 |
416 | #define VMX_EPT_MT_EPTE_SHIFT 3 | 419 | #define VMX_EPT_MT_EPTE_SHIFT 3 |
417 | #define VMX_EPT_GAW_EPTP_SHIFT 3 | 420 | #define VMX_EPT_GAW_EPTP_SHIFT 3 |
421 | #define VMX_EPT_AD_ENABLE_BIT (1ull << 6) | ||
418 | #define VMX_EPT_DEFAULT_MT 0x6ull | 422 | #define VMX_EPT_DEFAULT_MT 0x6ull |
419 | #define VMX_EPT_READABLE_MASK 0x1ull | 423 | #define VMX_EPT_READABLE_MASK 0x1ull |
420 | #define VMX_EPT_WRITABLE_MASK 0x2ull | 424 | #define VMX_EPT_WRITABLE_MASK 0x2ull |
421 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull | 425 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull |
422 | #define VMX_EPT_IPAT_BIT (1ull << 6) | 426 | #define VMX_EPT_IPAT_BIT (1ull << 6) |
427 | #define VMX_EPT_ACCESS_BIT (1ull << 8) | ||
428 | #define VMX_EPT_DIRTY_BIT (1ull << 9) | ||
423 | 429 | ||
424 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul | 430 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul |
425 | 431 | ||
diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h index 92e54abf89e0..f90f0a587c66 100644 --- a/arch/x86/include/asm/x2apic.h +++ b/arch/x86/include/asm/x2apic.h | |||
@@ -9,15 +9,6 @@ | |||
9 | #include <asm/ipi.h> | 9 | #include <asm/ipi.h> |
10 | #include <linux/cpumask.h> | 10 | #include <linux/cpumask.h> |
11 | 11 | ||
12 | /* | ||
13 | * Need to use more than cpu 0, because we need more vectors | ||
14 | * when MSI-X are used. | ||
15 | */ | ||
16 | static const struct cpumask *x2apic_target_cpus(void) | ||
17 | { | ||
18 | return cpu_online_mask; | ||
19 | } | ||
20 | |||
21 | static int x2apic_apic_id_valid(int apicid) | 12 | static int x2apic_apic_id_valid(int apicid) |
22 | { | 13 | { |
23 | return 1; | 14 | return 1; |
@@ -28,15 +19,6 @@ static int x2apic_apic_id_registered(void) | |||
28 | return 1; | 19 | return 1; |
29 | } | 20 | } |
30 | 21 | ||
31 | /* | ||
32 | * For now each logical cpu is in its own vector allocation domain. | ||
33 | */ | ||
34 | static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
35 | { | ||
36 | cpumask_clear(retmask); | ||
37 | cpumask_set_cpu(cpu, retmask); | ||
38 | } | ||
39 | |||
40 | static void | 22 | static void |
41 | __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) | 23 | __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) |
42 | { | 24 | { |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index c090af10ac7d..38155f667144 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -156,7 +156,6 @@ struct x86_cpuinit_ops { | |||
156 | /** | 156 | /** |
157 | * struct x86_platform_ops - platform specific runtime functions | 157 | * struct x86_platform_ops - platform specific runtime functions |
158 | * @calibrate_tsc: calibrate TSC | 158 | * @calibrate_tsc: calibrate TSC |
159 | * @wallclock_init: init the wallclock device | ||
160 | * @get_wallclock: get time from HW clock like RTC etc. | 159 | * @get_wallclock: get time from HW clock like RTC etc. |
161 | * @set_wallclock: set time back to HW clock | 160 | * @set_wallclock: set time back to HW clock |
162 | * @is_untracked_pat_range exclude from PAT logic | 161 | * @is_untracked_pat_range exclude from PAT logic |
@@ -164,10 +163,10 @@ struct x86_cpuinit_ops { | |||
164 | * @i8042_detect pre-detect if i8042 controller exists | 163 | * @i8042_detect pre-detect if i8042 controller exists |
165 | * @save_sched_clock_state: save state for sched_clock() on suspend | 164 | * @save_sched_clock_state: save state for sched_clock() on suspend |
166 | * @restore_sched_clock_state: restore state for sched_clock() on resume | 165 | * @restore_sched_clock_state: restore state for sched_clock() on resume |
166 | * @apic_post_init: adjust apic if neeeded | ||
167 | */ | 167 | */ |
168 | struct x86_platform_ops { | 168 | struct x86_platform_ops { |
169 | unsigned long (*calibrate_tsc)(void); | 169 | unsigned long (*calibrate_tsc)(void); |
170 | void (*wallclock_init)(void); | ||
171 | unsigned long (*get_wallclock)(void); | 170 | unsigned long (*get_wallclock)(void); |
172 | int (*set_wallclock)(unsigned long nowtime); | 171 | int (*set_wallclock)(unsigned long nowtime); |
173 | void (*iommu_shutdown)(void); | 172 | void (*iommu_shutdown)(void); |
@@ -177,6 +176,7 @@ struct x86_platform_ops { | |||
177 | int (*i8042_detect)(void); | 176 | int (*i8042_detect)(void); |
178 | void (*save_sched_clock_state)(void); | 177 | void (*save_sched_clock_state)(void); |
179 | void (*restore_sched_clock_state)(void); | 178 | void (*restore_sched_clock_state)(void); |
179 | void (*apic_post_init)(void); | ||
180 | }; | 180 | }; |
181 | 181 | ||
182 | struct pci_dev; | 182 | struct pci_dev; |
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 5728852fb90f..59c226d120cd 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <xen/interface/sched.h> | 48 | #include <xen/interface/sched.h> |
49 | #include <xen/interface/physdev.h> | 49 | #include <xen/interface/physdev.h> |
50 | #include <xen/interface/platform.h> | 50 | #include <xen/interface/platform.h> |
51 | #include <xen/interface/xen-mca.h> | ||
51 | 52 | ||
52 | /* | 53 | /* |
53 | * The hypercall asms have to meet several constraints: | 54 | * The hypercall asms have to meet several constraints: |
@@ -302,6 +303,13 @@ HYPERVISOR_set_timer_op(u64 timeout) | |||
302 | } | 303 | } |
303 | 304 | ||
304 | static inline int | 305 | static inline int |
306 | HYPERVISOR_mca(struct xen_mc *mc_op) | ||
307 | { | ||
308 | mc_op->interface_version = XEN_MCA_INTERFACE_VERSION; | ||
309 | return _hypercall1(int, mca, mc_op); | ||
310 | } | ||
311 | |||
312 | static inline int | ||
305 | HYPERVISOR_dom0_op(struct xen_platform_op *platform_op) | 313 | HYPERVISOR_dom0_op(struct xen_platform_op *platform_op) |
306 | { | 314 | { |
307 | platform_op->interface_version = XENPF_INTERFACE_VERSION; | 315 | platform_op->interface_version = XENPF_INTERFACE_VERSION; |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 1f84794f0759..931280ff8299 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -1,3 +1,5 @@ | |||
1 | #define pr_fmt(fmt) "SMP alternatives: " fmt | ||
2 | |||
1 | #include <linux/module.h> | 3 | #include <linux/module.h> |
2 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
3 | #include <linux/mutex.h> | 5 | #include <linux/mutex.h> |
@@ -63,8 +65,11 @@ static int __init setup_noreplace_paravirt(char *str) | |||
63 | __setup("noreplace-paravirt", setup_noreplace_paravirt); | 65 | __setup("noreplace-paravirt", setup_noreplace_paravirt); |
64 | #endif | 66 | #endif |
65 | 67 | ||
66 | #define DPRINTK(fmt, args...) if (debug_alternative) \ | 68 | #define DPRINTK(fmt, ...) \ |
67 | printk(KERN_DEBUG fmt, args) | 69 | do { \ |
70 | if (debug_alternative) \ | ||
71 | printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ | ||
72 | } while (0) | ||
68 | 73 | ||
69 | /* | 74 | /* |
70 | * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes | 75 | * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes |
@@ -428,7 +433,7 @@ void alternatives_smp_switch(int smp) | |||
428 | * If this still occurs then you should see a hang | 433 | * If this still occurs then you should see a hang |
429 | * or crash shortly after this line: | 434 | * or crash shortly after this line: |
430 | */ | 435 | */ |
431 | printk("lockdep: fixing up alternatives.\n"); | 436 | pr_info("lockdep: fixing up alternatives\n"); |
432 | #endif | 437 | #endif |
433 | 438 | ||
434 | if (noreplace_smp || smp_alt_once || skip_smp_alternatives) | 439 | if (noreplace_smp || smp_alt_once || skip_smp_alternatives) |
@@ -444,14 +449,14 @@ void alternatives_smp_switch(int smp) | |||
444 | if (smp == smp_mode) { | 449 | if (smp == smp_mode) { |
445 | /* nothing */ | 450 | /* nothing */ |
446 | } else if (smp) { | 451 | } else if (smp) { |
447 | printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); | 452 | pr_info("switching to SMP code\n"); |
448 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); | 453 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); |
449 | clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); | 454 | clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); |
450 | list_for_each_entry(mod, &smp_alt_modules, next) | 455 | list_for_each_entry(mod, &smp_alt_modules, next) |
451 | alternatives_smp_lock(mod->locks, mod->locks_end, | 456 | alternatives_smp_lock(mod->locks, mod->locks_end, |
452 | mod->text, mod->text_end); | 457 | mod->text, mod->text_end); |
453 | } else { | 458 | } else { |
454 | printk(KERN_INFO "SMP alternatives: switching to UP code\n"); | 459 | pr_info("switching to UP code\n"); |
455 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); | 460 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); |
456 | set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); | 461 | set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); |
457 | list_for_each_entry(mod, &smp_alt_modules, next) | 462 | list_for_each_entry(mod, &smp_alt_modules, next) |
@@ -546,7 +551,7 @@ void __init alternative_instructions(void) | |||
546 | #ifdef CONFIG_SMP | 551 | #ifdef CONFIG_SMP |
547 | if (smp_alt_once) { | 552 | if (smp_alt_once) { |
548 | if (1 == num_possible_cpus()) { | 553 | if (1 == num_possible_cpus()) { |
549 | printk(KERN_INFO "SMP alternatives: switching to UP code\n"); | 554 | pr_info("switching to UP code\n"); |
550 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); | 555 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); |
551 | set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); | 556 | set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); |
552 | 557 | ||
@@ -664,7 +669,7 @@ static int __kprobes stop_machine_text_poke(void *data) | |||
664 | struct text_poke_param *p; | 669 | struct text_poke_param *p; |
665 | int i; | 670 | int i; |
666 | 671 | ||
667 | if (atomic_dec_and_test(&stop_machine_first)) { | 672 | if (atomic_xchg(&stop_machine_first, 0)) { |
668 | for (i = 0; i < tpp->nparams; i++) { | 673 | for (i = 0; i < tpp->nparams; i++) { |
669 | p = &tpp->params[i]; | 674 | p = &tpp->params[i]; |
670 | text_poke(p->addr, p->opcode, p->len); | 675 | text_poke(p->addr, p->opcode, p->len); |
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index be16854591cc..aadf3359e2a7 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c | |||
@@ -2,6 +2,9 @@ | |||
2 | * Shared support code for AMD K8 northbridges and derivates. | 2 | * Shared support code for AMD K8 northbridges and derivates. |
3 | * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. | 3 | * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. |
4 | */ | 4 | */ |
5 | |||
6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
7 | |||
5 | #include <linux/types.h> | 8 | #include <linux/types.h> |
6 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
7 | #include <linux/init.h> | 10 | #include <linux/init.h> |
@@ -16,6 +19,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { | |||
16 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, | 19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, |
17 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, | 20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, |
18 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, | 21 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, |
22 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, | ||
19 | {} | 23 | {} |
20 | }; | 24 | }; |
21 | EXPORT_SYMBOL(amd_nb_misc_ids); | 25 | EXPORT_SYMBOL(amd_nb_misc_ids); |
@@ -258,7 +262,7 @@ void amd_flush_garts(void) | |||
258 | } | 262 | } |
259 | spin_unlock_irqrestore(&gart_lock, flags); | 263 | spin_unlock_irqrestore(&gart_lock, flags); |
260 | if (!flushed) | 264 | if (!flushed) |
261 | printk("nothing to flush?\n"); | 265 | pr_notice("nothing to flush?\n"); |
262 | } | 266 | } |
263 | EXPORT_SYMBOL_GPL(amd_flush_garts); | 267 | EXPORT_SYMBOL_GPL(amd_flush_garts); |
264 | 268 | ||
@@ -269,11 +273,10 @@ static __init int init_amd_nbs(void) | |||
269 | err = amd_cache_northbridges(); | 273 | err = amd_cache_northbridges(); |
270 | 274 | ||
271 | if (err < 0) | 275 | if (err < 0) |
272 | printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n"); | 276 | pr_notice("Cannot enumerate AMD northbridges\n"); |
273 | 277 | ||
274 | if (amd_cache_gart() < 0) | 278 | if (amd_cache_gart() < 0) |
275 | printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, " | 279 | pr_notice("Cannot initialize GART flush words, GART support disabled\n"); |
276 | "GART support disabled.\n"); | ||
277 | 280 | ||
278 | return err; | 281 | return err; |
279 | } | 282 | } |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 39a222e094af..24deb3082328 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -75,8 +75,8 @@ physid_mask_t phys_cpu_present_map; | |||
75 | /* | 75 | /* |
76 | * Map cpu index to physical APIC ID | 76 | * Map cpu index to physical APIC ID |
77 | */ | 77 | */ |
78 | DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); | 78 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); |
79 | DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); | 79 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); |
80 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); | 80 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); |
81 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | 81 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); |
82 | 82 | ||
@@ -88,7 +88,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | |||
88 | * used for the mapping. This is where the behaviors of x86_64 and 32 | 88 | * used for the mapping. This is where the behaviors of x86_64 and 32 |
89 | * actually diverge. Let's keep it ugly for now. | 89 | * actually diverge. Let's keep it ugly for now. |
90 | */ | 90 | */ |
91 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID); | 91 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); |
92 | 92 | ||
93 | /* | 93 | /* |
94 | * Knob to control our willingness to enable the local APIC. | 94 | * Knob to control our willingness to enable the local APIC. |
@@ -2123,6 +2123,42 @@ void default_init_apic_ldr(void) | |||
2123 | apic_write(APIC_LDR, val); | 2123 | apic_write(APIC_LDR, val); |
2124 | } | 2124 | } |
2125 | 2125 | ||
2126 | int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
2127 | const struct cpumask *andmask, | ||
2128 | unsigned int *apicid) | ||
2129 | { | ||
2130 | unsigned int cpu; | ||
2131 | |||
2132 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
2133 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
2134 | break; | ||
2135 | } | ||
2136 | |||
2137 | if (likely(cpu < nr_cpu_ids)) { | ||
2138 | *apicid = per_cpu(x86_cpu_to_apicid, cpu); | ||
2139 | return 0; | ||
2140 | } | ||
2141 | |||
2142 | return -EINVAL; | ||
2143 | } | ||
2144 | |||
2145 | /* | ||
2146 | * Override the generic EOI implementation with an optimized version. | ||
2147 | * Only called during early boot when only one CPU is active and with | ||
2148 | * interrupts disabled, so we know this does not race with actual APIC driver | ||
2149 | * use. | ||
2150 | */ | ||
2151 | void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) | ||
2152 | { | ||
2153 | struct apic **drv; | ||
2154 | |||
2155 | for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { | ||
2156 | /* Should happen once for each apic */ | ||
2157 | WARN_ON((*drv)->eoi_write == eoi_write); | ||
2158 | (*drv)->eoi_write = eoi_write; | ||
2159 | } | ||
2160 | } | ||
2161 | |||
2126 | /* | 2162 | /* |
2127 | * Power management | 2163 | * Power management |
2128 | */ | 2164 | */ |
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 0e881c46e8c8..00c77cf78e9e 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -36,25 +36,6 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
36 | return 1; | 36 | return 1; |
37 | } | 37 | } |
38 | 38 | ||
39 | static const struct cpumask *flat_target_cpus(void) | ||
40 | { | ||
41 | return cpu_online_mask; | ||
42 | } | ||
43 | |||
44 | static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
45 | { | ||
46 | /* Careful. Some cpus do not strictly honor the set of cpus | ||
47 | * specified in the interrupt destination when using lowest | ||
48 | * priority interrupt delivery mode. | ||
49 | * | ||
50 | * In particular there was a hyperthreading cpu observed to | ||
51 | * deliver interrupts to the wrong hyperthread when only one | ||
52 | * hyperthread was specified in the interrupt desitination. | ||
53 | */ | ||
54 | cpumask_clear(retmask); | ||
55 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; | ||
56 | } | ||
57 | |||
58 | /* | 39 | /* |
59 | * Set up the logical destination ID. | 40 | * Set up the logical destination ID. |
60 | * | 41 | * |
@@ -92,7 +73,7 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) | |||
92 | } | 73 | } |
93 | 74 | ||
94 | static void | 75 | static void |
95 | flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) | 76 | flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) |
96 | { | 77 | { |
97 | unsigned long mask = cpumask_bits(cpumask)[0]; | 78 | unsigned long mask = cpumask_bits(cpumask)[0]; |
98 | int cpu = smp_processor_id(); | 79 | int cpu = smp_processor_id(); |
@@ -186,7 +167,7 @@ static struct apic apic_flat = { | |||
186 | .irq_delivery_mode = dest_LowestPrio, | 167 | .irq_delivery_mode = dest_LowestPrio, |
187 | .irq_dest_mode = 1, /* logical */ | 168 | .irq_dest_mode = 1, /* logical */ |
188 | 169 | ||
189 | .target_cpus = flat_target_cpus, | 170 | .target_cpus = online_target_cpus, |
190 | .disable_esr = 0, | 171 | .disable_esr = 0, |
191 | .dest_logical = APIC_DEST_LOGICAL, | 172 | .dest_logical = APIC_DEST_LOGICAL, |
192 | .check_apicid_used = NULL, | 173 | .check_apicid_used = NULL, |
@@ -210,8 +191,7 @@ static struct apic apic_flat = { | |||
210 | .set_apic_id = set_apic_id, | 191 | .set_apic_id = set_apic_id, |
211 | .apic_id_mask = 0xFFu << 24, | 192 | .apic_id_mask = 0xFFu << 24, |
212 | 193 | ||
213 | .cpu_mask_to_apicid = default_cpu_mask_to_apicid, | 194 | .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, |
214 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, | ||
215 | 195 | ||
216 | .send_IPI_mask = flat_send_IPI_mask, | 196 | .send_IPI_mask = flat_send_IPI_mask, |
217 | .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, | 197 | .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, |
@@ -262,17 +242,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
262 | return 0; | 242 | return 0; |
263 | } | 243 | } |
264 | 244 | ||
265 | static const struct cpumask *physflat_target_cpus(void) | ||
266 | { | ||
267 | return cpu_online_mask; | ||
268 | } | ||
269 | |||
270 | static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
271 | { | ||
272 | cpumask_clear(retmask); | ||
273 | cpumask_set_cpu(cpu, retmask); | ||
274 | } | ||
275 | |||
276 | static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) | 245 | static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) |
277 | { | 246 | { |
278 | default_send_IPI_mask_sequence_phys(cpumask, vector); | 247 | default_send_IPI_mask_sequence_phys(cpumask, vector); |
@@ -294,38 +263,6 @@ static void physflat_send_IPI_all(int vector) | |||
294 | physflat_send_IPI_mask(cpu_online_mask, vector); | 263 | physflat_send_IPI_mask(cpu_online_mask, vector); |
295 | } | 264 | } |
296 | 265 | ||
297 | static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) | ||
298 | { | ||
299 | int cpu; | ||
300 | |||
301 | /* | ||
302 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
303 | * May as well be the first. | ||
304 | */ | ||
305 | cpu = cpumask_first(cpumask); | ||
306 | if ((unsigned)cpu < nr_cpu_ids) | ||
307 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
308 | else | ||
309 | return BAD_APICID; | ||
310 | } | ||
311 | |||
312 | static unsigned int | ||
313 | physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
314 | const struct cpumask *andmask) | ||
315 | { | ||
316 | int cpu; | ||
317 | |||
318 | /* | ||
319 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
320 | * May as well be the first. | ||
321 | */ | ||
322 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
323 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
324 | break; | ||
325 | } | ||
326 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
327 | } | ||
328 | |||
329 | static int physflat_probe(void) | 266 | static int physflat_probe(void) |
330 | { | 267 | { |
331 | if (apic == &apic_physflat || num_possible_cpus() > 8) | 268 | if (apic == &apic_physflat || num_possible_cpus() > 8) |
@@ -345,13 +282,13 @@ static struct apic apic_physflat = { | |||
345 | .irq_delivery_mode = dest_Fixed, | 282 | .irq_delivery_mode = dest_Fixed, |
346 | .irq_dest_mode = 0, /* physical */ | 283 | .irq_dest_mode = 0, /* physical */ |
347 | 284 | ||
348 | .target_cpus = physflat_target_cpus, | 285 | .target_cpus = online_target_cpus, |
349 | .disable_esr = 0, | 286 | .disable_esr = 0, |
350 | .dest_logical = 0, | 287 | .dest_logical = 0, |
351 | .check_apicid_used = NULL, | 288 | .check_apicid_used = NULL, |
352 | .check_apicid_present = NULL, | 289 | .check_apicid_present = NULL, |
353 | 290 | ||
354 | .vector_allocation_domain = physflat_vector_allocation_domain, | 291 | .vector_allocation_domain = default_vector_allocation_domain, |
355 | /* not needed, but shouldn't hurt: */ | 292 | /* not needed, but shouldn't hurt: */ |
356 | .init_apic_ldr = flat_init_apic_ldr, | 293 | .init_apic_ldr = flat_init_apic_ldr, |
357 | 294 | ||
@@ -370,8 +307,7 @@ static struct apic apic_physflat = { | |||
370 | .set_apic_id = set_apic_id, | 307 | .set_apic_id = set_apic_id, |
371 | .apic_id_mask = 0xFFu << 24, | 308 | .apic_id_mask = 0xFFu << 24, |
372 | 309 | ||
373 | .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, | 310 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, |
374 | .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, | ||
375 | 311 | ||
376 | .send_IPI_mask = physflat_send_IPI_mask, | 312 | .send_IPI_mask = physflat_send_IPI_mask, |
377 | .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, | 313 | .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, |
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index a6e4c6e06c08..e145f28b4099 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c | |||
@@ -100,12 +100,12 @@ static unsigned long noop_check_apicid_present(int bit) | |||
100 | return physid_isset(bit, phys_cpu_present_map); | 100 | return physid_isset(bit, phys_cpu_present_map); |
101 | } | 101 | } |
102 | 102 | ||
103 | static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) | 103 | static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, |
104 | const struct cpumask *mask) | ||
104 | { | 105 | { |
105 | if (cpu != 0) | 106 | if (cpu != 0) |
106 | pr_warning("APIC: Vector allocated for non-BSP cpu\n"); | 107 | pr_warning("APIC: Vector allocated for non-BSP cpu\n"); |
107 | cpumask_clear(retmask); | 108 | cpumask_copy(retmask, cpumask_of(cpu)); |
108 | cpumask_set_cpu(cpu, retmask); | ||
109 | } | 109 | } |
110 | 110 | ||
111 | static u32 noop_apic_read(u32 reg) | 111 | static u32 noop_apic_read(u32 reg) |
@@ -159,8 +159,7 @@ struct apic apic_noop = { | |||
159 | .set_apic_id = NULL, | 159 | .set_apic_id = NULL, |
160 | .apic_id_mask = 0x0F << 24, | 160 | .apic_id_mask = 0x0F << 24, |
161 | 161 | ||
162 | .cpu_mask_to_apicid = default_cpu_mask_to_apicid, | 162 | .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, |
163 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, | ||
164 | 163 | ||
165 | .send_IPI_mask = noop_send_IPI_mask, | 164 | .send_IPI_mask = noop_send_IPI_mask, |
166 | .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, | 165 | .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, |
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 6ec6d5d297c3..bc552cff2578 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c | |||
@@ -72,17 +72,6 @@ static int numachip_phys_pkg_id(int initial_apic_id, int index_msb) | |||
72 | return initial_apic_id >> index_msb; | 72 | return initial_apic_id >> index_msb; |
73 | } | 73 | } |
74 | 74 | ||
75 | static const struct cpumask *numachip_target_cpus(void) | ||
76 | { | ||
77 | return cpu_online_mask; | ||
78 | } | ||
79 | |||
80 | static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
81 | { | ||
82 | cpumask_clear(retmask); | ||
83 | cpumask_set_cpu(cpu, retmask); | ||
84 | } | ||
85 | |||
86 | static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) | 75 | static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) |
87 | { | 76 | { |
88 | union numachip_csr_g3_ext_irq_gen int_gen; | 77 | union numachip_csr_g3_ext_irq_gen int_gen; |
@@ -157,38 +146,6 @@ static void numachip_send_IPI_self(int vector) | |||
157 | __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); | 146 | __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); |
158 | } | 147 | } |
159 | 148 | ||
160 | static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask) | ||
161 | { | ||
162 | int cpu; | ||
163 | |||
164 | /* | ||
165 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
166 | * May as well be the first. | ||
167 | */ | ||
168 | cpu = cpumask_first(cpumask); | ||
169 | if (likely((unsigned)cpu < nr_cpu_ids)) | ||
170 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
171 | |||
172 | return BAD_APICID; | ||
173 | } | ||
174 | |||
175 | static unsigned int | ||
176 | numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
177 | const struct cpumask *andmask) | ||
178 | { | ||
179 | int cpu; | ||
180 | |||
181 | /* | ||
182 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
183 | * May as well be the first. | ||
184 | */ | ||
185 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
186 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
187 | break; | ||
188 | } | ||
189 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
190 | } | ||
191 | |||
192 | static int __init numachip_probe(void) | 149 | static int __init numachip_probe(void) |
193 | { | 150 | { |
194 | return apic == &apic_numachip; | 151 | return apic == &apic_numachip; |
@@ -253,13 +210,13 @@ static struct apic apic_numachip __refconst = { | |||
253 | .irq_delivery_mode = dest_Fixed, | 210 | .irq_delivery_mode = dest_Fixed, |
254 | .irq_dest_mode = 0, /* physical */ | 211 | .irq_dest_mode = 0, /* physical */ |
255 | 212 | ||
256 | .target_cpus = numachip_target_cpus, | 213 | .target_cpus = online_target_cpus, |
257 | .disable_esr = 0, | 214 | .disable_esr = 0, |
258 | .dest_logical = 0, | 215 | .dest_logical = 0, |
259 | .check_apicid_used = NULL, | 216 | .check_apicid_used = NULL, |
260 | .check_apicid_present = NULL, | 217 | .check_apicid_present = NULL, |
261 | 218 | ||
262 | .vector_allocation_domain = numachip_vector_allocation_domain, | 219 | .vector_allocation_domain = default_vector_allocation_domain, |
263 | .init_apic_ldr = flat_init_apic_ldr, | 220 | .init_apic_ldr = flat_init_apic_ldr, |
264 | 221 | ||
265 | .ioapic_phys_id_map = NULL, | 222 | .ioapic_phys_id_map = NULL, |
@@ -277,8 +234,7 @@ static struct apic apic_numachip __refconst = { | |||
277 | .set_apic_id = set_apic_id, | 234 | .set_apic_id = set_apic_id, |
278 | .apic_id_mask = 0xffU << 24, | 235 | .apic_id_mask = 0xffU << 24, |
279 | 236 | ||
280 | .cpu_mask_to_apicid = numachip_cpu_mask_to_apicid, | 237 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, |
281 | .cpu_mask_to_apicid_and = numachip_cpu_mask_to_apicid_and, | ||
282 | 238 | ||
283 | .send_IPI_mask = numachip_send_IPI_mask, | 239 | .send_IPI_mask = numachip_send_IPI_mask, |
284 | .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself, | 240 | .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself, |
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 31fbdbfbf960..d50e3640d5ae 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c | |||
@@ -26,15 +26,6 @@ static int bigsmp_apic_id_registered(void) | |||
26 | return 1; | 26 | return 1; |
27 | } | 27 | } |
28 | 28 | ||
29 | static const struct cpumask *bigsmp_target_cpus(void) | ||
30 | { | ||
31 | #ifdef CONFIG_SMP | ||
32 | return cpu_online_mask; | ||
33 | #else | ||
34 | return cpumask_of(0); | ||
35 | #endif | ||
36 | } | ||
37 | |||
38 | static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) | 29 | static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) |
39 | { | 30 | { |
40 | return 0; | 31 | return 0; |
@@ -105,32 +96,6 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid) | |||
105 | return 1; | 96 | return 1; |
106 | } | 97 | } |
107 | 98 | ||
108 | /* As we are using single CPU as destination, pick only one CPU here */ | ||
109 | static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask) | ||
110 | { | ||
111 | int cpu = cpumask_first(cpumask); | ||
112 | |||
113 | if (cpu < nr_cpu_ids) | ||
114 | return cpu_physical_id(cpu); | ||
115 | return BAD_APICID; | ||
116 | } | ||
117 | |||
118 | static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
119 | const struct cpumask *andmask) | ||
120 | { | ||
121 | int cpu; | ||
122 | |||
123 | /* | ||
124 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
125 | * May as well be the first. | ||
126 | */ | ||
127 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
128 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
129 | return cpu_physical_id(cpu); | ||
130 | } | ||
131 | return BAD_APICID; | ||
132 | } | ||
133 | |||
134 | static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) | 99 | static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) |
135 | { | 100 | { |
136 | return cpuid_apic >> index_msb; | 101 | return cpuid_apic >> index_msb; |
@@ -177,12 +142,6 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { | |||
177 | { } /* NULL entry stops DMI scanning */ | 142 | { } /* NULL entry stops DMI scanning */ |
178 | }; | 143 | }; |
179 | 144 | ||
180 | static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
181 | { | ||
182 | cpumask_clear(retmask); | ||
183 | cpumask_set_cpu(cpu, retmask); | ||
184 | } | ||
185 | |||
186 | static int probe_bigsmp(void) | 145 | static int probe_bigsmp(void) |
187 | { | 146 | { |
188 | if (def_to_bigsmp) | 147 | if (def_to_bigsmp) |
@@ -205,13 +164,13 @@ static struct apic apic_bigsmp = { | |||
205 | /* phys delivery to target CPU: */ | 164 | /* phys delivery to target CPU: */ |
206 | .irq_dest_mode = 0, | 165 | .irq_dest_mode = 0, |
207 | 166 | ||
208 | .target_cpus = bigsmp_target_cpus, | 167 | .target_cpus = default_target_cpus, |
209 | .disable_esr = 1, | 168 | .disable_esr = 1, |
210 | .dest_logical = 0, | 169 | .dest_logical = 0, |
211 | .check_apicid_used = bigsmp_check_apicid_used, | 170 | .check_apicid_used = bigsmp_check_apicid_used, |
212 | .check_apicid_present = bigsmp_check_apicid_present, | 171 | .check_apicid_present = bigsmp_check_apicid_present, |
213 | 172 | ||
214 | .vector_allocation_domain = bigsmp_vector_allocation_domain, | 173 | .vector_allocation_domain = default_vector_allocation_domain, |
215 | .init_apic_ldr = bigsmp_init_apic_ldr, | 174 | .init_apic_ldr = bigsmp_init_apic_ldr, |
216 | 175 | ||
217 | .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, | 176 | .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, |
@@ -229,8 +188,7 @@ static struct apic apic_bigsmp = { | |||
229 | .set_apic_id = NULL, | 188 | .set_apic_id = NULL, |
230 | .apic_id_mask = 0xFF << 24, | 189 | .apic_id_mask = 0xFF << 24, |
231 | 190 | ||
232 | .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, | 191 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, |
233 | .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and, | ||
234 | 192 | ||
235 | .send_IPI_mask = bigsmp_send_IPI_mask, | 193 | .send_IPI_mask = bigsmp_send_IPI_mask, |
236 | .send_IPI_mask_allbutself = NULL, | 194 | .send_IPI_mask_allbutself = NULL, |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index db4ab1be3c79..0874799a98c6 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -394,21 +394,6 @@ static void es7000_enable_apic_mode(void) | |||
394 | WARN(1, "Command failed, status = %x\n", mip_status); | 394 | WARN(1, "Command failed, status = %x\n", mip_status); |
395 | } | 395 | } |
396 | 396 | ||
397 | static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
398 | { | ||
399 | /* Careful. Some cpus do not strictly honor the set of cpus | ||
400 | * specified in the interrupt destination when using lowest | ||
401 | * priority interrupt delivery mode. | ||
402 | * | ||
403 | * In particular there was a hyperthreading cpu observed to | ||
404 | * deliver interrupts to the wrong hyperthread when only one | ||
405 | * hyperthread was specified in the interrupt desitination. | ||
406 | */ | ||
407 | cpumask_clear(retmask); | ||
408 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; | ||
409 | } | ||
410 | |||
411 | |||
412 | static void es7000_wait_for_init_deassert(atomic_t *deassert) | 397 | static void es7000_wait_for_init_deassert(atomic_t *deassert) |
413 | { | 398 | { |
414 | while (!atomic_read(deassert)) | 399 | while (!atomic_read(deassert)) |
@@ -540,45 +525,49 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid) | |||
540 | return 1; | 525 | return 1; |
541 | } | 526 | } |
542 | 527 | ||
543 | static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask) | 528 | static inline int |
529 | es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) | ||
544 | { | 530 | { |
545 | unsigned int round = 0; | 531 | unsigned int round = 0; |
546 | int cpu, uninitialized_var(apicid); | 532 | unsigned int cpu, uninitialized_var(apicid); |
547 | 533 | ||
548 | /* | 534 | /* |
549 | * The cpus in the mask must all be on the apic cluster. | 535 | * The cpus in the mask must all be on the apic cluster. |
550 | */ | 536 | */ |
551 | for_each_cpu(cpu, cpumask) { | 537 | for_each_cpu_and(cpu, cpumask, cpu_online_mask) { |
552 | int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); | 538 | int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); |
553 | 539 | ||
554 | if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { | 540 | if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { |
555 | WARN(1, "Not a valid mask!"); | 541 | WARN(1, "Not a valid mask!"); |
556 | 542 | ||
557 | return BAD_APICID; | 543 | return -EINVAL; |
558 | } | 544 | } |
559 | apicid = new_apicid; | 545 | apicid |= new_apicid; |
560 | round++; | 546 | round++; |
561 | } | 547 | } |
562 | return apicid; | 548 | if (!round) |
549 | return -EINVAL; | ||
550 | *dest_id = apicid; | ||
551 | return 0; | ||
563 | } | 552 | } |
564 | 553 | ||
565 | static unsigned int | 554 | static int |
566 | es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, | 555 | es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, |
567 | const struct cpumask *andmask) | 556 | const struct cpumask *andmask, |
557 | unsigned int *apicid) | ||
568 | { | 558 | { |
569 | int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); | ||
570 | cpumask_var_t cpumask; | 559 | cpumask_var_t cpumask; |
560 | *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); | ||
571 | 561 | ||
572 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) | 562 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) |
573 | return apicid; | 563 | return 0; |
574 | 564 | ||
575 | cpumask_and(cpumask, inmask, andmask); | 565 | cpumask_and(cpumask, inmask, andmask); |
576 | cpumask_and(cpumask, cpumask, cpu_online_mask); | 566 | es7000_cpu_mask_to_apicid(cpumask, apicid); |
577 | apicid = es7000_cpu_mask_to_apicid(cpumask); | ||
578 | 567 | ||
579 | free_cpumask_var(cpumask); | 568 | free_cpumask_var(cpumask); |
580 | 569 | ||
581 | return apicid; | 570 | return 0; |
582 | } | 571 | } |
583 | 572 | ||
584 | static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) | 573 | static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) |
@@ -638,7 +627,7 @@ static struct apic __refdata apic_es7000_cluster = { | |||
638 | .check_apicid_used = es7000_check_apicid_used, | 627 | .check_apicid_used = es7000_check_apicid_used, |
639 | .check_apicid_present = es7000_check_apicid_present, | 628 | .check_apicid_present = es7000_check_apicid_present, |
640 | 629 | ||
641 | .vector_allocation_domain = es7000_vector_allocation_domain, | 630 | .vector_allocation_domain = flat_vector_allocation_domain, |
642 | .init_apic_ldr = es7000_init_apic_ldr_cluster, | 631 | .init_apic_ldr = es7000_init_apic_ldr_cluster, |
643 | 632 | ||
644 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, | 633 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, |
@@ -656,7 +645,6 @@ static struct apic __refdata apic_es7000_cluster = { | |||
656 | .set_apic_id = NULL, | 645 | .set_apic_id = NULL, |
657 | .apic_id_mask = 0xFF << 24, | 646 | .apic_id_mask = 0xFF << 24, |
658 | 647 | ||
659 | .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, | ||
660 | .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, | 648 | .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, |
661 | 649 | ||
662 | .send_IPI_mask = es7000_send_IPI_mask, | 650 | .send_IPI_mask = es7000_send_IPI_mask, |
@@ -705,7 +693,7 @@ static struct apic __refdata apic_es7000 = { | |||
705 | .check_apicid_used = es7000_check_apicid_used, | 693 | .check_apicid_used = es7000_check_apicid_used, |
706 | .check_apicid_present = es7000_check_apicid_present, | 694 | .check_apicid_present = es7000_check_apicid_present, |
707 | 695 | ||
708 | .vector_allocation_domain = es7000_vector_allocation_domain, | 696 | .vector_allocation_domain = flat_vector_allocation_domain, |
709 | .init_apic_ldr = es7000_init_apic_ldr, | 697 | .init_apic_ldr = es7000_init_apic_ldr, |
710 | 698 | ||
711 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, | 699 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, |
@@ -723,7 +711,6 @@ static struct apic __refdata apic_es7000 = { | |||
723 | .set_apic_id = NULL, | 711 | .set_apic_id = NULL, |
724 | .apic_id_mask = 0xFF << 24, | 712 | .apic_id_mask = 0xFF << 24, |
725 | 713 | ||
726 | .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, | ||
727 | .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, | 714 | .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, |
728 | 715 | ||
729 | .send_IPI_mask = es7000_send_IPI_mask, | 716 | .send_IPI_mask = es7000_send_IPI_mask, |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5f0ff597437c..406eee784684 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -448,8 +448,8 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi | |||
448 | 448 | ||
449 | entry = alloc_irq_pin_list(node); | 449 | entry = alloc_irq_pin_list(node); |
450 | if (!entry) { | 450 | if (!entry) { |
451 | printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", | 451 | pr_err("can not alloc irq_pin_list (%d,%d,%d)\n", |
452 | node, apic, pin); | 452 | node, apic, pin); |
453 | return -ENOMEM; | 453 | return -ENOMEM; |
454 | } | 454 | } |
455 | entry->apic = apic; | 455 | entry->apic = apic; |
@@ -661,7 +661,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | |||
661 | ioapic_mask_entry(apic, pin); | 661 | ioapic_mask_entry(apic, pin); |
662 | entry = ioapic_read_entry(apic, pin); | 662 | entry = ioapic_read_entry(apic, pin); |
663 | if (entry.irr) | 663 | if (entry.irr) |
664 | printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n", | 664 | pr_err("Unable to reset IRR for apic: %d, pin :%d\n", |
665 | mpc_ioapic_id(apic), pin); | 665 | mpc_ioapic_id(apic), pin); |
666 | } | 666 | } |
667 | 667 | ||
@@ -895,7 +895,7 @@ static int irq_polarity(int idx) | |||
895 | } | 895 | } |
896 | case 2: /* reserved */ | 896 | case 2: /* reserved */ |
897 | { | 897 | { |
898 | printk(KERN_WARNING "broken BIOS!!\n"); | 898 | pr_warn("broken BIOS!!\n"); |
899 | polarity = 1; | 899 | polarity = 1; |
900 | break; | 900 | break; |
901 | } | 901 | } |
@@ -906,7 +906,7 @@ static int irq_polarity(int idx) | |||
906 | } | 906 | } |
907 | default: /* invalid */ | 907 | default: /* invalid */ |
908 | { | 908 | { |
909 | printk(KERN_WARNING "broken BIOS!!\n"); | 909 | pr_warn("broken BIOS!!\n"); |
910 | polarity = 1; | 910 | polarity = 1; |
911 | break; | 911 | break; |
912 | } | 912 | } |
@@ -948,7 +948,7 @@ static int irq_trigger(int idx) | |||
948 | } | 948 | } |
949 | default: | 949 | default: |
950 | { | 950 | { |
951 | printk(KERN_WARNING "broken BIOS!!\n"); | 951 | pr_warn("broken BIOS!!\n"); |
952 | trigger = 1; | 952 | trigger = 1; |
953 | break; | 953 | break; |
954 | } | 954 | } |
@@ -962,7 +962,7 @@ static int irq_trigger(int idx) | |||
962 | } | 962 | } |
963 | case 2: /* reserved */ | 963 | case 2: /* reserved */ |
964 | { | 964 | { |
965 | printk(KERN_WARNING "broken BIOS!!\n"); | 965 | pr_warn("broken BIOS!!\n"); |
966 | trigger = 1; | 966 | trigger = 1; |
967 | break; | 967 | break; |
968 | } | 968 | } |
@@ -973,7 +973,7 @@ static int irq_trigger(int idx) | |||
973 | } | 973 | } |
974 | default: /* invalid */ | 974 | default: /* invalid */ |
975 | { | 975 | { |
976 | printk(KERN_WARNING "broken BIOS!!\n"); | 976 | pr_warn("broken BIOS!!\n"); |
977 | trigger = 0; | 977 | trigger = 0; |
978 | break; | 978 | break; |
979 | } | 979 | } |
@@ -991,7 +991,7 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
991 | * Debugging check, we are in big trouble if this message pops up! | 991 | * Debugging check, we are in big trouble if this message pops up! |
992 | */ | 992 | */ |
993 | if (mp_irqs[idx].dstirq != pin) | 993 | if (mp_irqs[idx].dstirq != pin) |
994 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); | 994 | pr_err("broken BIOS or MPTABLE parser, ayiee!!\n"); |
995 | 995 | ||
996 | if (test_bit(bus, mp_bus_not_pci)) { | 996 | if (test_bit(bus, mp_bus_not_pci)) { |
997 | irq = mp_irqs[idx].srcbusirq; | 997 | irq = mp_irqs[idx].srcbusirq; |
@@ -1112,8 +1112,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | |||
1112 | * 0x80, because int 0x80 is hm, kind of importantish. ;) | 1112 | * 0x80, because int 0x80 is hm, kind of importantish. ;) |
1113 | */ | 1113 | */ |
1114 | static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; | 1114 | static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; |
1115 | static int current_offset = VECTOR_OFFSET_START % 8; | 1115 | static int current_offset = VECTOR_OFFSET_START % 16; |
1116 | unsigned int old_vector; | ||
1117 | int cpu, err; | 1116 | int cpu, err; |
1118 | cpumask_var_t tmp_mask; | 1117 | cpumask_var_t tmp_mask; |
1119 | 1118 | ||
@@ -1123,35 +1122,45 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | |||
1123 | if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) | 1122 | if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) |
1124 | return -ENOMEM; | 1123 | return -ENOMEM; |
1125 | 1124 | ||
1126 | old_vector = cfg->vector; | ||
1127 | if (old_vector) { | ||
1128 | cpumask_and(tmp_mask, mask, cpu_online_mask); | ||
1129 | cpumask_and(tmp_mask, cfg->domain, tmp_mask); | ||
1130 | if (!cpumask_empty(tmp_mask)) { | ||
1131 | free_cpumask_var(tmp_mask); | ||
1132 | return 0; | ||
1133 | } | ||
1134 | } | ||
1135 | |||
1136 | /* Only try and allocate irqs on cpus that are present */ | 1125 | /* Only try and allocate irqs on cpus that are present */ |
1137 | err = -ENOSPC; | 1126 | err = -ENOSPC; |
1138 | for_each_cpu_and(cpu, mask, cpu_online_mask) { | 1127 | cpumask_clear(cfg->old_domain); |
1139 | int new_cpu; | 1128 | cpu = cpumask_first_and(mask, cpu_online_mask); |
1140 | int vector, offset; | 1129 | while (cpu < nr_cpu_ids) { |
1130 | int new_cpu, vector, offset; | ||
1141 | 1131 | ||
1142 | apic->vector_allocation_domain(cpu, tmp_mask); | 1132 | apic->vector_allocation_domain(cpu, tmp_mask, mask); |
1133 | |||
1134 | if (cpumask_subset(tmp_mask, cfg->domain)) { | ||
1135 | err = 0; | ||
1136 | if (cpumask_equal(tmp_mask, cfg->domain)) | ||
1137 | break; | ||
1138 | /* | ||
1139 | * New cpumask using the vector is a proper subset of | ||
1140 | * the current in use mask. So cleanup the vector | ||
1141 | * allocation for the members that are not used anymore. | ||
1142 | */ | ||
1143 | cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); | ||
1144 | cfg->move_in_progress = 1; | ||
1145 | cpumask_and(cfg->domain, cfg->domain, tmp_mask); | ||
1146 | break; | ||
1147 | } | ||
1143 | 1148 | ||
1144 | vector = current_vector; | 1149 | vector = current_vector; |
1145 | offset = current_offset; | 1150 | offset = current_offset; |
1146 | next: | 1151 | next: |
1147 | vector += 8; | 1152 | vector += 16; |
1148 | if (vector >= first_system_vector) { | 1153 | if (vector >= first_system_vector) { |
1149 | /* If out of vectors on large boxen, must share them. */ | 1154 | offset = (offset + 1) % 16; |
1150 | offset = (offset + 1) % 8; | ||
1151 | vector = FIRST_EXTERNAL_VECTOR + offset; | 1155 | vector = FIRST_EXTERNAL_VECTOR + offset; |
1152 | } | 1156 | } |
1153 | if (unlikely(current_vector == vector)) | 1157 | |
1158 | if (unlikely(current_vector == vector)) { | ||
1159 | cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask); | ||
1160 | cpumask_andnot(tmp_mask, mask, cfg->old_domain); | ||
1161 | cpu = cpumask_first_and(tmp_mask, cpu_online_mask); | ||
1154 | continue; | 1162 | continue; |
1163 | } | ||
1155 | 1164 | ||
1156 | if (test_bit(vector, used_vectors)) | 1165 | if (test_bit(vector, used_vectors)) |
1157 | goto next; | 1166 | goto next; |
@@ -1162,7 +1171,7 @@ next: | |||
1162 | /* Found one! */ | 1171 | /* Found one! */ |
1163 | current_vector = vector; | 1172 | current_vector = vector; |
1164 | current_offset = offset; | 1173 | current_offset = offset; |
1165 | if (old_vector) { | 1174 | if (cfg->vector) { |
1166 | cfg->move_in_progress = 1; | 1175 | cfg->move_in_progress = 1; |
1167 | cpumask_copy(cfg->old_domain, cfg->domain); | 1176 | cpumask_copy(cfg->old_domain, cfg->domain); |
1168 | } | 1177 | } |
@@ -1346,18 +1355,18 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, | |||
1346 | 1355 | ||
1347 | if (!IO_APIC_IRQ(irq)) | 1356 | if (!IO_APIC_IRQ(irq)) |
1348 | return; | 1357 | return; |
1349 | /* | ||
1350 | * For legacy irqs, cfg->domain starts with cpu 0 for legacy | ||
1351 | * controllers like 8259. Now that IO-APIC can handle this irq, update | ||
1352 | * the cfg->domain. | ||
1353 | */ | ||
1354 | if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) | ||
1355 | apic->vector_allocation_domain(0, cfg->domain); | ||
1356 | 1358 | ||
1357 | if (assign_irq_vector(irq, cfg, apic->target_cpus())) | 1359 | if (assign_irq_vector(irq, cfg, apic->target_cpus())) |
1358 | return; | 1360 | return; |
1359 | 1361 | ||
1360 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); | 1362 | if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(), |
1363 | &dest)) { | ||
1364 | pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n", | ||
1365 | mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); | ||
1366 | __clear_irq_vector(irq, cfg); | ||
1367 | |||
1368 | return; | ||
1369 | } | ||
1361 | 1370 | ||
1362 | apic_printk(APIC_VERBOSE,KERN_DEBUG | 1371 | apic_printk(APIC_VERBOSE,KERN_DEBUG |
1363 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " | 1372 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " |
@@ -1366,7 +1375,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, | |||
1366 | cfg->vector, irq, attr->trigger, attr->polarity, dest); | 1375 | cfg->vector, irq, attr->trigger, attr->polarity, dest); |
1367 | 1376 | ||
1368 | if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { | 1377 | if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { |
1369 | pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", | 1378 | pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", |
1370 | mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); | 1379 | mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); |
1371 | __clear_irq_vector(irq, cfg); | 1380 | __clear_irq_vector(irq, cfg); |
1372 | 1381 | ||
@@ -1469,9 +1478,10 @@ void setup_IO_APIC_irq_extra(u32 gsi) | |||
1469 | * Set up the timer pin, possibly with the 8259A-master behind. | 1478 | * Set up the timer pin, possibly with the 8259A-master behind. |
1470 | */ | 1479 | */ |
1471 | static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, | 1480 | static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, |
1472 | unsigned int pin, int vector) | 1481 | unsigned int pin, int vector) |
1473 | { | 1482 | { |
1474 | struct IO_APIC_route_entry entry; | 1483 | struct IO_APIC_route_entry entry; |
1484 | unsigned int dest; | ||
1475 | 1485 | ||
1476 | if (irq_remapping_enabled) | 1486 | if (irq_remapping_enabled) |
1477 | return; | 1487 | return; |
@@ -1482,9 +1492,13 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, | |||
1482 | * We use logical delivery to get the timer IRQ | 1492 | * We use logical delivery to get the timer IRQ |
1483 | * to the first CPU. | 1493 | * to the first CPU. |
1484 | */ | 1494 | */ |
1495 | if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(), | ||
1496 | apic->target_cpus(), &dest))) | ||
1497 | dest = BAD_APICID; | ||
1498 | |||
1485 | entry.dest_mode = apic->irq_dest_mode; | 1499 | entry.dest_mode = apic->irq_dest_mode; |
1486 | entry.mask = 0; /* don't mask IRQ for edge */ | 1500 | entry.mask = 0; /* don't mask IRQ for edge */ |
1487 | entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); | 1501 | entry.dest = dest; |
1488 | entry.delivery_mode = apic->irq_delivery_mode; | 1502 | entry.delivery_mode = apic->irq_delivery_mode; |
1489 | entry.polarity = 0; | 1503 | entry.polarity = 0; |
1490 | entry.trigger = 0; | 1504 | entry.trigger = 0; |
@@ -1521,7 +1535,6 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) | |||
1521 | reg_03.raw = io_apic_read(ioapic_idx, 3); | 1535 | reg_03.raw = io_apic_read(ioapic_idx, 3); |
1522 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 1536 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
1523 | 1537 | ||
1524 | printk("\n"); | ||
1525 | printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx)); | 1538 | printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx)); |
1526 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | 1539 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); |
1527 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | 1540 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); |
@@ -1578,7 +1591,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) | |||
1578 | i, | 1591 | i, |
1579 | ir_entry->index | 1592 | ir_entry->index |
1580 | ); | 1593 | ); |
1581 | printk("%1d %1d %1d %1d %1d " | 1594 | pr_cont("%1d %1d %1d %1d %1d " |
1582 | "%1d %1d %X %02X\n", | 1595 | "%1d %1d %X %02X\n", |
1583 | ir_entry->format, | 1596 | ir_entry->format, |
1584 | ir_entry->mask, | 1597 | ir_entry->mask, |
@@ -1598,7 +1611,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) | |||
1598 | i, | 1611 | i, |
1599 | entry.dest | 1612 | entry.dest |
1600 | ); | 1613 | ); |
1601 | printk("%1d %1d %1d %1d %1d " | 1614 | pr_cont("%1d %1d %1d %1d %1d " |
1602 | "%1d %1d %02X\n", | 1615 | "%1d %1d %02X\n", |
1603 | entry.mask, | 1616 | entry.mask, |
1604 | entry.trigger, | 1617 | entry.trigger, |
@@ -1651,8 +1664,8 @@ __apicdebuginit(void) print_IO_APICs(void) | |||
1651 | continue; | 1664 | continue; |
1652 | printk(KERN_DEBUG "IRQ%d ", irq); | 1665 | printk(KERN_DEBUG "IRQ%d ", irq); |
1653 | for_each_irq_pin(entry, cfg->irq_2_pin) | 1666 | for_each_irq_pin(entry, cfg->irq_2_pin) |
1654 | printk("-> %d:%d", entry->apic, entry->pin); | 1667 | pr_cont("-> %d:%d", entry->apic, entry->pin); |
1655 | printk("\n"); | 1668 | pr_cont("\n"); |
1656 | } | 1669 | } |
1657 | 1670 | ||
1658 | printk(KERN_INFO ".................................... done.\n"); | 1671 | printk(KERN_INFO ".................................... done.\n"); |
@@ -1665,9 +1678,9 @@ __apicdebuginit(void) print_APIC_field(int base) | |||
1665 | printk(KERN_DEBUG); | 1678 | printk(KERN_DEBUG); |
1666 | 1679 | ||
1667 | for (i = 0; i < 8; i++) | 1680 | for (i = 0; i < 8; i++) |
1668 | printk(KERN_CONT "%08x", apic_read(base + i*0x10)); | 1681 | pr_cont("%08x", apic_read(base + i*0x10)); |
1669 | 1682 | ||
1670 | printk(KERN_CONT "\n"); | 1683 | pr_cont("\n"); |
1671 | } | 1684 | } |
1672 | 1685 | ||
1673 | __apicdebuginit(void) print_local_APIC(void *dummy) | 1686 | __apicdebuginit(void) print_local_APIC(void *dummy) |
@@ -1769,7 +1782,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy) | |||
1769 | printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v); | 1782 | printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v); |
1770 | } | 1783 | } |
1771 | } | 1784 | } |
1772 | printk("\n"); | 1785 | pr_cont("\n"); |
1773 | } | 1786 | } |
1774 | 1787 | ||
1775 | __apicdebuginit(void) print_local_APICs(int maxcpu) | 1788 | __apicdebuginit(void) print_local_APICs(int maxcpu) |
@@ -2065,7 +2078,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) | |||
2065 | reg_00.raw = io_apic_read(ioapic_idx, 0); | 2078 | reg_00.raw = io_apic_read(ioapic_idx, 0); |
2066 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 2079 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2067 | if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) | 2080 | if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) |
2068 | printk("could not set ID!\n"); | 2081 | pr_cont("could not set ID!\n"); |
2069 | else | 2082 | else |
2070 | apic_printk(APIC_VERBOSE, " ok.\n"); | 2083 | apic_printk(APIC_VERBOSE, " ok.\n"); |
2071 | } | 2084 | } |
@@ -2210,71 +2223,6 @@ void send_cleanup_vector(struct irq_cfg *cfg) | |||
2210 | cfg->move_in_progress = 0; | 2223 | cfg->move_in_progress = 0; |
2211 | } | 2224 | } |
2212 | 2225 | ||
2213 | static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) | ||
2214 | { | ||
2215 | int apic, pin; | ||
2216 | struct irq_pin_list *entry; | ||
2217 | u8 vector = cfg->vector; | ||
2218 | |||
2219 | for_each_irq_pin(entry, cfg->irq_2_pin) { | ||
2220 | unsigned int reg; | ||
2221 | |||
2222 | apic = entry->apic; | ||
2223 | pin = entry->pin; | ||
2224 | /* | ||
2225 | * With interrupt-remapping, destination information comes | ||
2226 | * from interrupt-remapping table entry. | ||
2227 | */ | ||
2228 | if (!irq_remapped(cfg)) | ||
2229 | io_apic_write(apic, 0x11 + pin*2, dest); | ||
2230 | reg = io_apic_read(apic, 0x10 + pin*2); | ||
2231 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; | ||
2232 | reg |= vector; | ||
2233 | io_apic_modify(apic, 0x10 + pin*2, reg); | ||
2234 | } | ||
2235 | } | ||
2236 | |||
2237 | /* | ||
2238 | * Either sets data->affinity to a valid value, and returns | ||
2239 | * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and | ||
2240 | * leaves data->affinity untouched. | ||
2241 | */ | ||
2242 | int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, | ||
2243 | unsigned int *dest_id) | ||
2244 | { | ||
2245 | struct irq_cfg *cfg = data->chip_data; | ||
2246 | |||
2247 | if (!cpumask_intersects(mask, cpu_online_mask)) | ||
2248 | return -1; | ||
2249 | |||
2250 | if (assign_irq_vector(data->irq, data->chip_data, mask)) | ||
2251 | return -1; | ||
2252 | |||
2253 | cpumask_copy(data->affinity, mask); | ||
2254 | |||
2255 | *dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain); | ||
2256 | return 0; | ||
2257 | } | ||
2258 | |||
2259 | static int | ||
2260 | ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, | ||
2261 | bool force) | ||
2262 | { | ||
2263 | unsigned int dest, irq = data->irq; | ||
2264 | unsigned long flags; | ||
2265 | int ret; | ||
2266 | |||
2267 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
2268 | ret = __ioapic_set_affinity(data, mask, &dest); | ||
2269 | if (!ret) { | ||
2270 | /* Only the high 8 bits are valid. */ | ||
2271 | dest = SET_APIC_LOGICAL_ID(dest); | ||
2272 | __target_IO_APIC_irq(irq, dest, data->chip_data); | ||
2273 | } | ||
2274 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2275 | return ret; | ||
2276 | } | ||
2277 | |||
2278 | asmlinkage void smp_irq_move_cleanup_interrupt(void) | 2226 | asmlinkage void smp_irq_move_cleanup_interrupt(void) |
2279 | { | 2227 | { |
2280 | unsigned vector, me; | 2228 | unsigned vector, me; |
@@ -2362,6 +2310,87 @@ void irq_force_complete_move(int irq) | |||
2362 | static inline void irq_complete_move(struct irq_cfg *cfg) { } | 2310 | static inline void irq_complete_move(struct irq_cfg *cfg) { } |
2363 | #endif | 2311 | #endif |
2364 | 2312 | ||
2313 | static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) | ||
2314 | { | ||
2315 | int apic, pin; | ||
2316 | struct irq_pin_list *entry; | ||
2317 | u8 vector = cfg->vector; | ||
2318 | |||
2319 | for_each_irq_pin(entry, cfg->irq_2_pin) { | ||
2320 | unsigned int reg; | ||
2321 | |||
2322 | apic = entry->apic; | ||
2323 | pin = entry->pin; | ||
2324 | /* | ||
2325 | * With interrupt-remapping, destination information comes | ||
2326 | * from interrupt-remapping table entry. | ||
2327 | */ | ||
2328 | if (!irq_remapped(cfg)) | ||
2329 | io_apic_write(apic, 0x11 + pin*2, dest); | ||
2330 | reg = io_apic_read(apic, 0x10 + pin*2); | ||
2331 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; | ||
2332 | reg |= vector; | ||
2333 | io_apic_modify(apic, 0x10 + pin*2, reg); | ||
2334 | } | ||
2335 | } | ||
2336 | |||
2337 | /* | ||
2338 | * Either sets data->affinity to a valid value, and returns | ||
2339 | * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and | ||
2340 | * leaves data->affinity untouched. | ||
2341 | */ | ||
2342 | int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, | ||
2343 | unsigned int *dest_id) | ||
2344 | { | ||
2345 | struct irq_cfg *cfg = data->chip_data; | ||
2346 | unsigned int irq = data->irq; | ||
2347 | int err; | ||
2348 | |||
2349 | if (!config_enabled(CONFIG_SMP)) | ||
2350 | return -1; | ||
2351 | |||
2352 | if (!cpumask_intersects(mask, cpu_online_mask)) | ||
2353 | return -EINVAL; | ||
2354 | |||
2355 | err = assign_irq_vector(irq, cfg, mask); | ||
2356 | if (err) | ||
2357 | return err; | ||
2358 | |||
2359 | err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id); | ||
2360 | if (err) { | ||
2361 | if (assign_irq_vector(irq, cfg, data->affinity)) | ||
2362 | pr_err("Failed to recover vector for irq %d\n", irq); | ||
2363 | return err; | ||
2364 | } | ||
2365 | |||
2366 | cpumask_copy(data->affinity, mask); | ||
2367 | |||
2368 | return 0; | ||
2369 | } | ||
2370 | |||
2371 | static int | ||
2372 | ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, | ||
2373 | bool force) | ||
2374 | { | ||
2375 | unsigned int dest, irq = data->irq; | ||
2376 | unsigned long flags; | ||
2377 | int ret; | ||
2378 | |||
2379 | if (!config_enabled(CONFIG_SMP)) | ||
2380 | return -1; | ||
2381 | |||
2382 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
2383 | ret = __ioapic_set_affinity(data, mask, &dest); | ||
2384 | if (!ret) { | ||
2385 | /* Only the high 8 bits are valid. */ | ||
2386 | dest = SET_APIC_LOGICAL_ID(dest); | ||
2387 | __target_IO_APIC_irq(irq, dest, data->chip_data); | ||
2388 | ret = IRQ_SET_MASK_OK_NOCOPY; | ||
2389 | } | ||
2390 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2391 | return ret; | ||
2392 | } | ||
2393 | |||
2365 | static void ack_apic_edge(struct irq_data *data) | 2394 | static void ack_apic_edge(struct irq_data *data) |
2366 | { | 2395 | { |
2367 | irq_complete_move(data->chip_data); | 2396 | irq_complete_move(data->chip_data); |
@@ -2541,9 +2570,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip) | |||
2541 | chip->irq_ack = ir_ack_apic_edge; | 2570 | chip->irq_ack = ir_ack_apic_edge; |
2542 | chip->irq_eoi = ir_ack_apic_level; | 2571 | chip->irq_eoi = ir_ack_apic_level; |
2543 | 2572 | ||
2544 | #ifdef CONFIG_SMP | ||
2545 | chip->irq_set_affinity = set_remapped_irq_affinity; | 2573 | chip->irq_set_affinity = set_remapped_irq_affinity; |
2546 | #endif | ||
2547 | } | 2574 | } |
2548 | #endif /* CONFIG_IRQ_REMAP */ | 2575 | #endif /* CONFIG_IRQ_REMAP */ |
2549 | 2576 | ||
@@ -2554,9 +2581,7 @@ static struct irq_chip ioapic_chip __read_mostly = { | |||
2554 | .irq_unmask = unmask_ioapic_irq, | 2581 | .irq_unmask = unmask_ioapic_irq, |
2555 | .irq_ack = ack_apic_edge, | 2582 | .irq_ack = ack_apic_edge, |
2556 | .irq_eoi = ack_apic_level, | 2583 | .irq_eoi = ack_apic_level, |
2557 | #ifdef CONFIG_SMP | ||
2558 | .irq_set_affinity = ioapic_set_affinity, | 2584 | .irq_set_affinity = ioapic_set_affinity, |
2559 | #endif | ||
2560 | .irq_retrigger = ioapic_retrigger_irq, | 2585 | .irq_retrigger = ioapic_retrigger_irq, |
2561 | }; | 2586 | }; |
2562 | 2587 | ||
@@ -3038,7 +3063,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, | |||
3038 | if (err) | 3063 | if (err) |
3039 | return err; | 3064 | return err; |
3040 | 3065 | ||
3041 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); | 3066 | err = apic->cpu_mask_to_apicid_and(cfg->domain, |
3067 | apic->target_cpus(), &dest); | ||
3068 | if (err) | ||
3069 | return err; | ||
3042 | 3070 | ||
3043 | if (irq_remapped(cfg)) { | 3071 | if (irq_remapped(cfg)) { |
3044 | compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); | 3072 | compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); |
@@ -3072,7 +3100,6 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, | |||
3072 | return err; | 3100 | return err; |
3073 | } | 3101 | } |
3074 | 3102 | ||
3075 | #ifdef CONFIG_SMP | ||
3076 | static int | 3103 | static int |
3077 | msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) | 3104 | msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) |
3078 | { | 3105 | { |
@@ -3092,9 +3119,8 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) | |||
3092 | 3119 | ||
3093 | __write_msi_msg(data->msi_desc, &msg); | 3120 | __write_msi_msg(data->msi_desc, &msg); |
3094 | 3121 | ||
3095 | return 0; | 3122 | return IRQ_SET_MASK_OK_NOCOPY; |
3096 | } | 3123 | } |
3097 | #endif /* CONFIG_SMP */ | ||
3098 | 3124 | ||
3099 | /* | 3125 | /* |
3100 | * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, | 3126 | * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, |
@@ -3105,9 +3131,7 @@ static struct irq_chip msi_chip = { | |||
3105 | .irq_unmask = unmask_msi_irq, | 3131 | .irq_unmask = unmask_msi_irq, |
3106 | .irq_mask = mask_msi_irq, | 3132 | .irq_mask = mask_msi_irq, |
3107 | .irq_ack = ack_apic_edge, | 3133 | .irq_ack = ack_apic_edge, |
3108 | #ifdef CONFIG_SMP | ||
3109 | .irq_set_affinity = msi_set_affinity, | 3134 | .irq_set_affinity = msi_set_affinity, |
3110 | #endif | ||
3111 | .irq_retrigger = ioapic_retrigger_irq, | 3135 | .irq_retrigger = ioapic_retrigger_irq, |
3112 | }; | 3136 | }; |
3113 | 3137 | ||
@@ -3192,7 +3216,6 @@ void native_teardown_msi_irq(unsigned int irq) | |||
3192 | } | 3216 | } |
3193 | 3217 | ||
3194 | #ifdef CONFIG_DMAR_TABLE | 3218 | #ifdef CONFIG_DMAR_TABLE |
3195 | #ifdef CONFIG_SMP | ||
3196 | static int | 3219 | static int |
3197 | dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, | 3220 | dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, |
3198 | bool force) | 3221 | bool force) |
@@ -3214,19 +3237,15 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, | |||
3214 | 3237 | ||
3215 | dmar_msi_write(irq, &msg); | 3238 | dmar_msi_write(irq, &msg); |
3216 | 3239 | ||
3217 | return 0; | 3240 | return IRQ_SET_MASK_OK_NOCOPY; |
3218 | } | 3241 | } |
3219 | 3242 | ||
3220 | #endif /* CONFIG_SMP */ | ||
3221 | |||
3222 | static struct irq_chip dmar_msi_type = { | 3243 | static struct irq_chip dmar_msi_type = { |
3223 | .name = "DMAR_MSI", | 3244 | .name = "DMAR_MSI", |
3224 | .irq_unmask = dmar_msi_unmask, | 3245 | .irq_unmask = dmar_msi_unmask, |
3225 | .irq_mask = dmar_msi_mask, | 3246 | .irq_mask = dmar_msi_mask, |
3226 | .irq_ack = ack_apic_edge, | 3247 | .irq_ack = ack_apic_edge, |
3227 | #ifdef CONFIG_SMP | ||
3228 | .irq_set_affinity = dmar_msi_set_affinity, | 3248 | .irq_set_affinity = dmar_msi_set_affinity, |
3229 | #endif | ||
3230 | .irq_retrigger = ioapic_retrigger_irq, | 3249 | .irq_retrigger = ioapic_retrigger_irq, |
3231 | }; | 3250 | }; |
3232 | 3251 | ||
@@ -3247,7 +3266,6 @@ int arch_setup_dmar_msi(unsigned int irq) | |||
3247 | 3266 | ||
3248 | #ifdef CONFIG_HPET_TIMER | 3267 | #ifdef CONFIG_HPET_TIMER |
3249 | 3268 | ||
3250 | #ifdef CONFIG_SMP | ||
3251 | static int hpet_msi_set_affinity(struct irq_data *data, | 3269 | static int hpet_msi_set_affinity(struct irq_data *data, |
3252 | const struct cpumask *mask, bool force) | 3270 | const struct cpumask *mask, bool force) |
3253 | { | 3271 | { |
@@ -3267,19 +3285,15 @@ static int hpet_msi_set_affinity(struct irq_data *data, | |||
3267 | 3285 | ||
3268 | hpet_msi_write(data->handler_data, &msg); | 3286 | hpet_msi_write(data->handler_data, &msg); |
3269 | 3287 | ||
3270 | return 0; | 3288 | return IRQ_SET_MASK_OK_NOCOPY; |
3271 | } | 3289 | } |
3272 | 3290 | ||
3273 | #endif /* CONFIG_SMP */ | ||
3274 | |||
3275 | static struct irq_chip hpet_msi_type = { | 3291 | static struct irq_chip hpet_msi_type = { |
3276 | .name = "HPET_MSI", | 3292 | .name = "HPET_MSI", |
3277 | .irq_unmask = hpet_msi_unmask, | 3293 | .irq_unmask = hpet_msi_unmask, |
3278 | .irq_mask = hpet_msi_mask, | 3294 | .irq_mask = hpet_msi_mask, |
3279 | .irq_ack = ack_apic_edge, | 3295 | .irq_ack = ack_apic_edge, |
3280 | #ifdef CONFIG_SMP | ||
3281 | .irq_set_affinity = hpet_msi_set_affinity, | 3296 | .irq_set_affinity = hpet_msi_set_affinity, |
3282 | #endif | ||
3283 | .irq_retrigger = ioapic_retrigger_irq, | 3297 | .irq_retrigger = ioapic_retrigger_irq, |
3284 | }; | 3298 | }; |
3285 | 3299 | ||
@@ -3314,8 +3328,6 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id) | |||
3314 | */ | 3328 | */ |
3315 | #ifdef CONFIG_HT_IRQ | 3329 | #ifdef CONFIG_HT_IRQ |
3316 | 3330 | ||
3317 | #ifdef CONFIG_SMP | ||
3318 | |||
3319 | static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) | 3331 | static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) |
3320 | { | 3332 | { |
3321 | struct ht_irq_msg msg; | 3333 | struct ht_irq_msg msg; |
@@ -3340,25 +3352,23 @@ ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) | |||
3340 | return -1; | 3352 | return -1; |
3341 | 3353 | ||
3342 | target_ht_irq(data->irq, dest, cfg->vector); | 3354 | target_ht_irq(data->irq, dest, cfg->vector); |
3343 | return 0; | 3355 | return IRQ_SET_MASK_OK_NOCOPY; |
3344 | } | 3356 | } |
3345 | 3357 | ||
3346 | #endif | ||
3347 | |||
3348 | static struct irq_chip ht_irq_chip = { | 3358 | static struct irq_chip ht_irq_chip = { |
3349 | .name = "PCI-HT", | 3359 | .name = "PCI-HT", |
3350 | .irq_mask = mask_ht_irq, | 3360 | .irq_mask = mask_ht_irq, |
3351 | .irq_unmask = unmask_ht_irq, | 3361 | .irq_unmask = unmask_ht_irq, |
3352 | .irq_ack = ack_apic_edge, | 3362 | .irq_ack = ack_apic_edge, |
3353 | #ifdef CONFIG_SMP | ||
3354 | .irq_set_affinity = ht_set_affinity, | 3363 | .irq_set_affinity = ht_set_affinity, |
3355 | #endif | ||
3356 | .irq_retrigger = ioapic_retrigger_irq, | 3364 | .irq_retrigger = ioapic_retrigger_irq, |
3357 | }; | 3365 | }; |
3358 | 3366 | ||
3359 | int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | 3367 | int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) |
3360 | { | 3368 | { |
3361 | struct irq_cfg *cfg; | 3369 | struct irq_cfg *cfg; |
3370 | struct ht_irq_msg msg; | ||
3371 | unsigned dest; | ||
3362 | int err; | 3372 | int err; |
3363 | 3373 | ||
3364 | if (disable_apic) | 3374 | if (disable_apic) |
@@ -3366,36 +3376,37 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3366 | 3376 | ||
3367 | cfg = irq_cfg(irq); | 3377 | cfg = irq_cfg(irq); |
3368 | err = assign_irq_vector(irq, cfg, apic->target_cpus()); | 3378 | err = assign_irq_vector(irq, cfg, apic->target_cpus()); |
3369 | if (!err) { | 3379 | if (err) |
3370 | struct ht_irq_msg msg; | 3380 | return err; |
3371 | unsigned dest; | 3381 | |
3382 | err = apic->cpu_mask_to_apicid_and(cfg->domain, | ||
3383 | apic->target_cpus(), &dest); | ||
3384 | if (err) | ||
3385 | return err; | ||
3372 | 3386 | ||
3373 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, | 3387 | msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); |
3374 | apic->target_cpus()); | ||
3375 | 3388 | ||
3376 | msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); | 3389 | msg.address_lo = |
3390 | HT_IRQ_LOW_BASE | | ||
3391 | HT_IRQ_LOW_DEST_ID(dest) | | ||
3392 | HT_IRQ_LOW_VECTOR(cfg->vector) | | ||
3393 | ((apic->irq_dest_mode == 0) ? | ||
3394 | HT_IRQ_LOW_DM_PHYSICAL : | ||
3395 | HT_IRQ_LOW_DM_LOGICAL) | | ||
3396 | HT_IRQ_LOW_RQEOI_EDGE | | ||
3397 | ((apic->irq_delivery_mode != dest_LowestPrio) ? | ||
3398 | HT_IRQ_LOW_MT_FIXED : | ||
3399 | HT_IRQ_LOW_MT_ARBITRATED) | | ||
3400 | HT_IRQ_LOW_IRQ_MASKED; | ||
3377 | 3401 | ||
3378 | msg.address_lo = | 3402 | write_ht_irq_msg(irq, &msg); |
3379 | HT_IRQ_LOW_BASE | | ||
3380 | HT_IRQ_LOW_DEST_ID(dest) | | ||
3381 | HT_IRQ_LOW_VECTOR(cfg->vector) | | ||
3382 | ((apic->irq_dest_mode == 0) ? | ||
3383 | HT_IRQ_LOW_DM_PHYSICAL : | ||
3384 | HT_IRQ_LOW_DM_LOGICAL) | | ||
3385 | HT_IRQ_LOW_RQEOI_EDGE | | ||
3386 | ((apic->irq_delivery_mode != dest_LowestPrio) ? | ||
3387 | HT_IRQ_LOW_MT_FIXED : | ||
3388 | HT_IRQ_LOW_MT_ARBITRATED) | | ||
3389 | HT_IRQ_LOW_IRQ_MASKED; | ||
3390 | 3403 | ||
3391 | write_ht_irq_msg(irq, &msg); | 3404 | irq_set_chip_and_handler_name(irq, &ht_irq_chip, |
3405 | handle_edge_irq, "edge"); | ||
3392 | 3406 | ||
3393 | irq_set_chip_and_handler_name(irq, &ht_irq_chip, | 3407 | dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); |
3394 | handle_edge_irq, "edge"); | ||
3395 | 3408 | ||
3396 | dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); | 3409 | return 0; |
3397 | } | ||
3398 | return err; | ||
3399 | } | 3410 | } |
3400 | #endif /* CONFIG_HT_IRQ */ | 3411 | #endif /* CONFIG_HT_IRQ */ |
3401 | 3412 | ||
@@ -3563,7 +3574,8 @@ static int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3563 | 3574 | ||
3564 | /* Sanity check */ | 3575 | /* Sanity check */ |
3565 | if (reg_00.bits.ID != apic_id) { | 3576 | if (reg_00.bits.ID != apic_id) { |
3566 | printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); | 3577 | pr_err("IOAPIC[%d]: Unable to change apic_id!\n", |
3578 | ioapic); | ||
3567 | return -1; | 3579 | return -1; |
3568 | } | 3580 | } |
3569 | } | 3581 | } |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index f00a68cca37a..d661ee95cabf 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -406,16 +406,13 @@ static inline int numaq_check_phys_apicid_present(int phys_apicid) | |||
406 | * We use physical apicids here, not logical, so just return the default | 406 | * We use physical apicids here, not logical, so just return the default |
407 | * physical broadcast to stop people from breaking us | 407 | * physical broadcast to stop people from breaking us |
408 | */ | 408 | */ |
409 | static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask) | 409 | static int |
410 | { | ||
411 | return 0x0F; | ||
412 | } | ||
413 | |||
414 | static inline unsigned int | ||
415 | numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | 410 | numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, |
416 | const struct cpumask *andmask) | 411 | const struct cpumask *andmask, |
412 | unsigned int *apicid) | ||
417 | { | 413 | { |
418 | return 0x0F; | 414 | *apicid = 0x0F; |
415 | return 0; | ||
419 | } | 416 | } |
420 | 417 | ||
421 | /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ | 418 | /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ |
@@ -441,20 +438,6 @@ static int probe_numaq(void) | |||
441 | return found_numaq; | 438 | return found_numaq; |
442 | } | 439 | } |
443 | 440 | ||
444 | static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
445 | { | ||
446 | /* Careful. Some cpus do not strictly honor the set of cpus | ||
447 | * specified in the interrupt destination when using lowest | ||
448 | * priority interrupt delivery mode. | ||
449 | * | ||
450 | * In particular there was a hyperthreading cpu observed to | ||
451 | * deliver interrupts to the wrong hyperthread when only one | ||
452 | * hyperthread was specified in the interrupt desitination. | ||
453 | */ | ||
454 | cpumask_clear(retmask); | ||
455 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; | ||
456 | } | ||
457 | |||
458 | static void numaq_setup_portio_remap(void) | 441 | static void numaq_setup_portio_remap(void) |
459 | { | 442 | { |
460 | int num_quads = num_online_nodes(); | 443 | int num_quads = num_online_nodes(); |
@@ -491,7 +474,7 @@ static struct apic __refdata apic_numaq = { | |||
491 | .check_apicid_used = numaq_check_apicid_used, | 474 | .check_apicid_used = numaq_check_apicid_used, |
492 | .check_apicid_present = numaq_check_apicid_present, | 475 | .check_apicid_present = numaq_check_apicid_present, |
493 | 476 | ||
494 | .vector_allocation_domain = numaq_vector_allocation_domain, | 477 | .vector_allocation_domain = flat_vector_allocation_domain, |
495 | .init_apic_ldr = numaq_init_apic_ldr, | 478 | .init_apic_ldr = numaq_init_apic_ldr, |
496 | 479 | ||
497 | .ioapic_phys_id_map = numaq_ioapic_phys_id_map, | 480 | .ioapic_phys_id_map = numaq_ioapic_phys_id_map, |
@@ -509,7 +492,6 @@ static struct apic __refdata apic_numaq = { | |||
509 | .set_apic_id = NULL, | 492 | .set_apic_id = NULL, |
510 | .apic_id_mask = 0x0F << 24, | 493 | .apic_id_mask = 0x0F << 24, |
511 | 494 | ||
512 | .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, | ||
513 | .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, | 495 | .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, |
514 | 496 | ||
515 | .send_IPI_mask = numaq_send_IPI_mask, | 497 | .send_IPI_mask = numaq_send_IPI_mask, |
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1b291da09e60..eb35ef9ee63f 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
@@ -66,21 +66,6 @@ static void setup_apic_flat_routing(void) | |||
66 | #endif | 66 | #endif |
67 | } | 67 | } |
68 | 68 | ||
69 | static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
70 | { | ||
71 | /* | ||
72 | * Careful. Some cpus do not strictly honor the set of cpus | ||
73 | * specified in the interrupt destination when using lowest | ||
74 | * priority interrupt delivery mode. | ||
75 | * | ||
76 | * In particular there was a hyperthreading cpu observed to | ||
77 | * deliver interrupts to the wrong hyperthread when only one | ||
78 | * hyperthread was specified in the interrupt desitination. | ||
79 | */ | ||
80 | cpumask_clear(retmask); | ||
81 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; | ||
82 | } | ||
83 | |||
84 | /* should be called last. */ | 69 | /* should be called last. */ |
85 | static int probe_default(void) | 70 | static int probe_default(void) |
86 | { | 71 | { |
@@ -105,7 +90,7 @@ static struct apic apic_default = { | |||
105 | .check_apicid_used = default_check_apicid_used, | 90 | .check_apicid_used = default_check_apicid_used, |
106 | .check_apicid_present = default_check_apicid_present, | 91 | .check_apicid_present = default_check_apicid_present, |
107 | 92 | ||
108 | .vector_allocation_domain = default_vector_allocation_domain, | 93 | .vector_allocation_domain = flat_vector_allocation_domain, |
109 | .init_apic_ldr = default_init_apic_ldr, | 94 | .init_apic_ldr = default_init_apic_ldr, |
110 | 95 | ||
111 | .ioapic_phys_id_map = default_ioapic_phys_id_map, | 96 | .ioapic_phys_id_map = default_ioapic_phys_id_map, |
@@ -123,8 +108,7 @@ static struct apic apic_default = { | |||
123 | .set_apic_id = NULL, | 108 | .set_apic_id = NULL, |
124 | .apic_id_mask = 0x0F << 24, | 109 | .apic_id_mask = 0x0F << 24, |
125 | 110 | ||
126 | .cpu_mask_to_apicid = default_cpu_mask_to_apicid, | 111 | .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, |
127 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, | ||
128 | 112 | ||
129 | .send_IPI_mask = default_send_IPI_mask_logical, | 113 | .send_IPI_mask = default_send_IPI_mask_logical, |
130 | .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, | 114 | .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, |
@@ -208,6 +192,9 @@ void __init default_setup_apic_routing(void) | |||
208 | 192 | ||
209 | if (apic->setup_apic_routing) | 193 | if (apic->setup_apic_routing) |
210 | apic->setup_apic_routing(); | 194 | apic->setup_apic_routing(); |
195 | |||
196 | if (x86_platform.apic_post_init) | ||
197 | x86_platform.apic_post_init(); | ||
211 | } | 198 | } |
212 | 199 | ||
213 | void __init generic_apic_probe(void) | 200 | void __init generic_apic_probe(void) |
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 3fe986698929..1793dba7a741 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c | |||
@@ -23,11 +23,6 @@ | |||
23 | #include <asm/ipi.h> | 23 | #include <asm/ipi.h> |
24 | #include <asm/setup.h> | 24 | #include <asm/setup.h> |
25 | 25 | ||
26 | static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) | ||
27 | { | ||
28 | return hard_smp_processor_id() >> index_msb; | ||
29 | } | ||
30 | |||
31 | /* | 26 | /* |
32 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. | 27 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. |
33 | */ | 28 | */ |
@@ -48,10 +43,8 @@ void __init default_setup_apic_routing(void) | |||
48 | } | 43 | } |
49 | } | 44 | } |
50 | 45 | ||
51 | if (is_vsmp_box()) { | 46 | if (x86_platform.apic_post_init) |
52 | /* need to update phys_pkg_id */ | 47 | x86_platform.apic_post_init(); |
53 | apic->phys_pkg_id = apicid_phys_pkg_id; | ||
54 | } | ||
55 | } | 48 | } |
56 | 49 | ||
57 | /* Same for both flat and physical. */ | 50 | /* Same for both flat and physical. */ |
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 659897c00755..77c95c0e1bf7 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c | |||
@@ -26,6 +26,8 @@ | |||
26 | * | 26 | * |
27 | */ | 27 | */ |
28 | 28 | ||
29 | #define pr_fmt(fmt) "summit: %s: " fmt, __func__ | ||
30 | |||
29 | #include <linux/mm.h> | 31 | #include <linux/mm.h> |
30 | #include <linux/init.h> | 32 | #include <linux/init.h> |
31 | #include <asm/io.h> | 33 | #include <asm/io.h> |
@@ -235,8 +237,8 @@ static int summit_apic_id_registered(void) | |||
235 | 237 | ||
236 | static void summit_setup_apic_routing(void) | 238 | static void summit_setup_apic_routing(void) |
237 | { | 239 | { |
238 | printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", | 240 | pr_info("Enabling APIC mode: Summit. Using %d I/O APICs\n", |
239 | nr_ioapics); | 241 | nr_ioapics); |
240 | } | 242 | } |
241 | 243 | ||
242 | static int summit_cpu_present_to_apicid(int mps_cpu) | 244 | static int summit_cpu_present_to_apicid(int mps_cpu) |
@@ -263,43 +265,48 @@ static int summit_check_phys_apicid_present(int physical_apicid) | |||
263 | return 1; | 265 | return 1; |
264 | } | 266 | } |
265 | 267 | ||
266 | static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask) | 268 | static inline int |
269 | summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) | ||
267 | { | 270 | { |
268 | unsigned int round = 0; | 271 | unsigned int round = 0; |
269 | int cpu, apicid = 0; | 272 | unsigned int cpu, apicid = 0; |
270 | 273 | ||
271 | /* | 274 | /* |
272 | * The cpus in the mask must all be on the apic cluster. | 275 | * The cpus in the mask must all be on the apic cluster. |
273 | */ | 276 | */ |
274 | for_each_cpu(cpu, cpumask) { | 277 | for_each_cpu_and(cpu, cpumask, cpu_online_mask) { |
275 | int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); | 278 | int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); |
276 | 279 | ||
277 | if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { | 280 | if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { |
278 | printk("%s: Not a valid mask!\n", __func__); | 281 | pr_err("Not a valid mask!\n"); |
279 | return BAD_APICID; | 282 | return -EINVAL; |
280 | } | 283 | } |
281 | apicid |= new_apicid; | 284 | apicid |= new_apicid; |
282 | round++; | 285 | round++; |
283 | } | 286 | } |
284 | return apicid; | 287 | if (!round) |
288 | return -EINVAL; | ||
289 | *dest_id = apicid; | ||
290 | return 0; | ||
285 | } | 291 | } |
286 | 292 | ||
287 | static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, | 293 | static int |
288 | const struct cpumask *andmask) | 294 | summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, |
295 | const struct cpumask *andmask, | ||
296 | unsigned int *apicid) | ||
289 | { | 297 | { |
290 | int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); | ||
291 | cpumask_var_t cpumask; | 298 | cpumask_var_t cpumask; |
299 | *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); | ||
292 | 300 | ||
293 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) | 301 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) |
294 | return apicid; | 302 | return 0; |
295 | 303 | ||
296 | cpumask_and(cpumask, inmask, andmask); | 304 | cpumask_and(cpumask, inmask, andmask); |
297 | cpumask_and(cpumask, cpumask, cpu_online_mask); | 305 | summit_cpu_mask_to_apicid(cpumask, apicid); |
298 | apicid = summit_cpu_mask_to_apicid(cpumask); | ||
299 | 306 | ||
300 | free_cpumask_var(cpumask); | 307 | free_cpumask_var(cpumask); |
301 | 308 | ||
302 | return apicid; | 309 | return 0; |
303 | } | 310 | } |
304 | 311 | ||
305 | /* | 312 | /* |
@@ -320,20 +327,6 @@ static int probe_summit(void) | |||
320 | return 0; | 327 | return 0; |
321 | } | 328 | } |
322 | 329 | ||
323 | static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
324 | { | ||
325 | /* Careful. Some cpus do not strictly honor the set of cpus | ||
326 | * specified in the interrupt destination when using lowest | ||
327 | * priority interrupt delivery mode. | ||
328 | * | ||
329 | * In particular there was a hyperthreading cpu observed to | ||
330 | * deliver interrupts to the wrong hyperthread when only one | ||
331 | * hyperthread was specified in the interrupt desitination. | ||
332 | */ | ||
333 | cpumask_clear(retmask); | ||
334 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; | ||
335 | } | ||
336 | |||
337 | #ifdef CONFIG_X86_SUMMIT_NUMA | 330 | #ifdef CONFIG_X86_SUMMIT_NUMA |
338 | static struct rio_table_hdr *rio_table_hdr; | 331 | static struct rio_table_hdr *rio_table_hdr; |
339 | static struct scal_detail *scal_devs[MAX_NUMNODES]; | 332 | static struct scal_detail *scal_devs[MAX_NUMNODES]; |
@@ -355,7 +348,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) | |||
355 | } | 348 | } |
356 | } | 349 | } |
357 | if (i == rio_table_hdr->num_rio_dev) { | 350 | if (i == rio_table_hdr->num_rio_dev) { |
358 | printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__); | 351 | pr_err("Couldn't find owner Cyclone for Winnipeg!\n"); |
359 | return last_bus; | 352 | return last_bus; |
360 | } | 353 | } |
361 | 354 | ||
@@ -366,7 +359,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) | |||
366 | } | 359 | } |
367 | } | 360 | } |
368 | if (i == rio_table_hdr->num_scal_dev) { | 361 | if (i == rio_table_hdr->num_scal_dev) { |
369 | printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__); | 362 | pr_err("Couldn't find owner Twister for Cyclone!\n"); |
370 | return last_bus; | 363 | return last_bus; |
371 | } | 364 | } |
372 | 365 | ||
@@ -396,7 +389,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) | |||
396 | num_buses = 9; | 389 | num_buses = 9; |
397 | break; | 390 | break; |
398 | default: | 391 | default: |
399 | printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__); | 392 | pr_info("Unsupported Winnipeg type!\n"); |
400 | return last_bus; | 393 | return last_bus; |
401 | } | 394 | } |
402 | 395 | ||
@@ -411,13 +404,15 @@ static int build_detail_arrays(void) | |||
411 | int i, scal_detail_size, rio_detail_size; | 404 | int i, scal_detail_size, rio_detail_size; |
412 | 405 | ||
413 | if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) { | 406 | if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) { |
414 | printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev); | 407 | pr_warn("MAX_NUMNODES too low! Defined as %d, but system has %d nodes\n", |
408 | MAX_NUMNODES, rio_table_hdr->num_scal_dev); | ||
415 | return 0; | 409 | return 0; |
416 | } | 410 | } |
417 | 411 | ||
418 | switch (rio_table_hdr->version) { | 412 | switch (rio_table_hdr->version) { |
419 | default: | 413 | default: |
420 | printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version); | 414 | pr_warn("Invalid Rio Grande Table Version: %d\n", |
415 | rio_table_hdr->version); | ||
421 | return 0; | 416 | return 0; |
422 | case 2: | 417 | case 2: |
423 | scal_detail_size = 11; | 418 | scal_detail_size = 11; |
@@ -462,7 +457,7 @@ void setup_summit(void) | |||
462 | offset = *((unsigned short *)(ptr + offset)); | 457 | offset = *((unsigned short *)(ptr + offset)); |
463 | } | 458 | } |
464 | if (!rio_table_hdr) { | 459 | if (!rio_table_hdr) { |
465 | printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__); | 460 | pr_err("Unable to locate Rio Grande Table in EBDA - bailing!\n"); |
466 | return; | 461 | return; |
467 | } | 462 | } |
468 | 463 | ||
@@ -509,7 +504,7 @@ static struct apic apic_summit = { | |||
509 | .check_apicid_used = summit_check_apicid_used, | 504 | .check_apicid_used = summit_check_apicid_used, |
510 | .check_apicid_present = summit_check_apicid_present, | 505 | .check_apicid_present = summit_check_apicid_present, |
511 | 506 | ||
512 | .vector_allocation_domain = summit_vector_allocation_domain, | 507 | .vector_allocation_domain = flat_vector_allocation_domain, |
513 | .init_apic_ldr = summit_init_apic_ldr, | 508 | .init_apic_ldr = summit_init_apic_ldr, |
514 | 509 | ||
515 | .ioapic_phys_id_map = summit_ioapic_phys_id_map, | 510 | .ioapic_phys_id_map = summit_ioapic_phys_id_map, |
@@ -527,7 +522,6 @@ static struct apic apic_summit = { | |||
527 | .set_apic_id = NULL, | 522 | .set_apic_id = NULL, |
528 | .apic_id_mask = 0xFF << 24, | 523 | .apic_id_mask = 0xFF << 24, |
529 | 524 | ||
530 | .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, | ||
531 | .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, | 525 | .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, |
532 | 526 | ||
533 | .send_IPI_mask = summit_send_IPI_mask, | 527 | .send_IPI_mask = summit_send_IPI_mask, |
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index ff35cff0e1a7..c88baa4ff0e5 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -81,7 +81,7 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) | |||
81 | } | 81 | } |
82 | 82 | ||
83 | static void | 83 | static void |
84 | x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) | 84 | x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) |
85 | { | 85 | { |
86 | __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); | 86 | __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); |
87 | } | 87 | } |
@@ -96,36 +96,37 @@ static void x2apic_send_IPI_all(int vector) | |||
96 | __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); | 96 | __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); |
97 | } | 97 | } |
98 | 98 | ||
99 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) | 99 | static int |
100 | x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
101 | const struct cpumask *andmask, | ||
102 | unsigned int *apicid) | ||
100 | { | 103 | { |
101 | /* | 104 | u32 dest = 0; |
102 | * We're using fixed IRQ delivery, can only return one logical APIC ID. | 105 | u16 cluster; |
103 | * May as well be the first. | 106 | int i; |
104 | */ | ||
105 | int cpu = cpumask_first(cpumask); | ||
106 | 107 | ||
107 | if ((unsigned)cpu < nr_cpu_ids) | 108 | for_each_cpu_and(i, cpumask, andmask) { |
108 | return per_cpu(x86_cpu_to_logical_apicid, cpu); | 109 | if (!cpumask_test_cpu(i, cpu_online_mask)) |
109 | else | 110 | continue; |
110 | return BAD_APICID; | 111 | dest = per_cpu(x86_cpu_to_logical_apicid, i); |
111 | } | 112 | cluster = x2apic_cluster(i); |
113 | break; | ||
114 | } | ||
112 | 115 | ||
113 | static unsigned int | 116 | if (!dest) |
114 | x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | 117 | return -EINVAL; |
115 | const struct cpumask *andmask) | ||
116 | { | ||
117 | int cpu; | ||
118 | 118 | ||
119 | /* | 119 | for_each_cpu_and(i, cpumask, andmask) { |
120 | * We're using fixed IRQ delivery, can only return one logical APIC ID. | 120 | if (!cpumask_test_cpu(i, cpu_online_mask)) |
121 | * May as well be the first. | 121 | continue; |
122 | */ | 122 | if (cluster != x2apic_cluster(i)) |
123 | for_each_cpu_and(cpu, cpumask, andmask) { | 123 | continue; |
124 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | 124 | dest |= per_cpu(x86_cpu_to_logical_apicid, i); |
125 | break; | ||
126 | } | 125 | } |
127 | 126 | ||
128 | return per_cpu(x86_cpu_to_logical_apicid, cpu); | 127 | *apicid = dest; |
128 | |||
129 | return 0; | ||
129 | } | 130 | } |
130 | 131 | ||
131 | static void init_x2apic_ldr(void) | 132 | static void init_x2apic_ldr(void) |
@@ -208,6 +209,32 @@ static int x2apic_cluster_probe(void) | |||
208 | return 0; | 209 | return 0; |
209 | } | 210 | } |
210 | 211 | ||
212 | static const struct cpumask *x2apic_cluster_target_cpus(void) | ||
213 | { | ||
214 | return cpu_all_mask; | ||
215 | } | ||
216 | |||
217 | /* | ||
218 | * Each x2apic cluster is an allocation domain. | ||
219 | */ | ||
220 | static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask, | ||
221 | const struct cpumask *mask) | ||
222 | { | ||
223 | /* | ||
224 | * To minimize vector pressure, default case of boot, device bringup | ||
225 | * etc will use a single cpu for the interrupt destination. | ||
226 | * | ||
227 | * On explicit migration requests coming from irqbalance etc, | ||
228 | * interrupts will be routed to the x2apic cluster (cluster-id | ||
229 | * derived from the first cpu in the mask) members specified | ||
230 | * in the mask. | ||
231 | */ | ||
232 | if (mask == x2apic_cluster_target_cpus()) | ||
233 | cpumask_copy(retmask, cpumask_of(cpu)); | ||
234 | else | ||
235 | cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); | ||
236 | } | ||
237 | |||
211 | static struct apic apic_x2apic_cluster = { | 238 | static struct apic apic_x2apic_cluster = { |
212 | 239 | ||
213 | .name = "cluster x2apic", | 240 | .name = "cluster x2apic", |
@@ -219,13 +246,13 @@ static struct apic apic_x2apic_cluster = { | |||
219 | .irq_delivery_mode = dest_LowestPrio, | 246 | .irq_delivery_mode = dest_LowestPrio, |
220 | .irq_dest_mode = 1, /* logical */ | 247 | .irq_dest_mode = 1, /* logical */ |
221 | 248 | ||
222 | .target_cpus = x2apic_target_cpus, | 249 | .target_cpus = x2apic_cluster_target_cpus, |
223 | .disable_esr = 0, | 250 | .disable_esr = 0, |
224 | .dest_logical = APIC_DEST_LOGICAL, | 251 | .dest_logical = APIC_DEST_LOGICAL, |
225 | .check_apicid_used = NULL, | 252 | .check_apicid_used = NULL, |
226 | .check_apicid_present = NULL, | 253 | .check_apicid_present = NULL, |
227 | 254 | ||
228 | .vector_allocation_domain = x2apic_vector_allocation_domain, | 255 | .vector_allocation_domain = cluster_vector_allocation_domain, |
229 | .init_apic_ldr = init_x2apic_ldr, | 256 | .init_apic_ldr = init_x2apic_ldr, |
230 | 257 | ||
231 | .ioapic_phys_id_map = NULL, | 258 | .ioapic_phys_id_map = NULL, |
@@ -243,7 +270,6 @@ static struct apic apic_x2apic_cluster = { | |||
243 | .set_apic_id = x2apic_set_apic_id, | 270 | .set_apic_id = x2apic_set_apic_id, |
244 | .apic_id_mask = 0xFFFFFFFFu, | 271 | .apic_id_mask = 0xFFFFFFFFu, |
245 | 272 | ||
246 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | ||
247 | .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, | 273 | .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, |
248 | 274 | ||
249 | .send_IPI_mask = x2apic_send_IPI_mask, | 275 | .send_IPI_mask = x2apic_send_IPI_mask, |
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index c17e982db275..e03a1e180e81 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c | |||
@@ -76,38 +76,6 @@ static void x2apic_send_IPI_all(int vector) | |||
76 | __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); | 76 | __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); |
77 | } | 77 | } |
78 | 78 | ||
79 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) | ||
80 | { | ||
81 | /* | ||
82 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
83 | * May as well be the first. | ||
84 | */ | ||
85 | int cpu = cpumask_first(cpumask); | ||
86 | |||
87 | if ((unsigned)cpu < nr_cpu_ids) | ||
88 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
89 | else | ||
90 | return BAD_APICID; | ||
91 | } | ||
92 | |||
93 | static unsigned int | ||
94 | x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
95 | const struct cpumask *andmask) | ||
96 | { | ||
97 | int cpu; | ||
98 | |||
99 | /* | ||
100 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
101 | * May as well be the first. | ||
102 | */ | ||
103 | for_each_cpu_and(cpu, cpumask, andmask) { | ||
104 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
105 | break; | ||
106 | } | ||
107 | |||
108 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
109 | } | ||
110 | |||
111 | static void init_x2apic_ldr(void) | 79 | static void init_x2apic_ldr(void) |
112 | { | 80 | { |
113 | } | 81 | } |
@@ -131,13 +99,13 @@ static struct apic apic_x2apic_phys = { | |||
131 | .irq_delivery_mode = dest_Fixed, | 99 | .irq_delivery_mode = dest_Fixed, |
132 | .irq_dest_mode = 0, /* physical */ | 100 | .irq_dest_mode = 0, /* physical */ |
133 | 101 | ||
134 | .target_cpus = x2apic_target_cpus, | 102 | .target_cpus = online_target_cpus, |
135 | .disable_esr = 0, | 103 | .disable_esr = 0, |
136 | .dest_logical = 0, | 104 | .dest_logical = 0, |
137 | .check_apicid_used = NULL, | 105 | .check_apicid_used = NULL, |
138 | .check_apicid_present = NULL, | 106 | .check_apicid_present = NULL, |
139 | 107 | ||
140 | .vector_allocation_domain = x2apic_vector_allocation_domain, | 108 | .vector_allocation_domain = default_vector_allocation_domain, |
141 | .init_apic_ldr = init_x2apic_ldr, | 109 | .init_apic_ldr = init_x2apic_ldr, |
142 | 110 | ||
143 | .ioapic_phys_id_map = NULL, | 111 | .ioapic_phys_id_map = NULL, |
@@ -155,8 +123,7 @@ static struct apic apic_x2apic_phys = { | |||
155 | .set_apic_id = x2apic_set_apic_id, | 123 | .set_apic_id = x2apic_set_apic_id, |
156 | .apic_id_mask = 0xFFFFFFFFu, | 124 | .apic_id_mask = 0xFFFFFFFFu, |
157 | 125 | ||
158 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | 126 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, |
159 | .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, | ||
160 | 127 | ||
161 | .send_IPI_mask = x2apic_send_IPI_mask, | 128 | .send_IPI_mask = x2apic_send_IPI_mask, |
162 | .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, | 129 | .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index c6d03f7a4401..8cfade9510a4 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -185,17 +185,6 @@ EXPORT_SYMBOL_GPL(uv_possible_blades); | |||
185 | unsigned long sn_rtc_cycles_per_second; | 185 | unsigned long sn_rtc_cycles_per_second; |
186 | EXPORT_SYMBOL(sn_rtc_cycles_per_second); | 186 | EXPORT_SYMBOL(sn_rtc_cycles_per_second); |
187 | 187 | ||
188 | static const struct cpumask *uv_target_cpus(void) | ||
189 | { | ||
190 | return cpu_online_mask; | ||
191 | } | ||
192 | |||
193 | static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
194 | { | ||
195 | cpumask_clear(retmask); | ||
196 | cpumask_set_cpu(cpu, retmask); | ||
197 | } | ||
198 | |||
199 | static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) | 188 | static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) |
200 | { | 189 | { |
201 | #ifdef CONFIG_SMP | 190 | #ifdef CONFIG_SMP |
@@ -280,25 +269,12 @@ static void uv_init_apic_ldr(void) | |||
280 | { | 269 | { |
281 | } | 270 | } |
282 | 271 | ||
283 | static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) | 272 | static int |
284 | { | ||
285 | /* | ||
286 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
287 | * May as well be the first. | ||
288 | */ | ||
289 | int cpu = cpumask_first(cpumask); | ||
290 | |||
291 | if ((unsigned)cpu < nr_cpu_ids) | ||
292 | return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; | ||
293 | else | ||
294 | return BAD_APICID; | ||
295 | } | ||
296 | |||
297 | static unsigned int | ||
298 | uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | 273 | uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, |
299 | const struct cpumask *andmask) | 274 | const struct cpumask *andmask, |
275 | unsigned int *apicid) | ||
300 | { | 276 | { |
301 | int cpu; | 277 | int unsigned cpu; |
302 | 278 | ||
303 | /* | 279 | /* |
304 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 280 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
@@ -308,7 +284,13 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
308 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | 284 | if (cpumask_test_cpu(cpu, cpu_online_mask)) |
309 | break; | 285 | break; |
310 | } | 286 | } |
311 | return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; | 287 | |
288 | if (likely(cpu < nr_cpu_ids)) { | ||
289 | *apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; | ||
290 | return 0; | ||
291 | } | ||
292 | |||
293 | return -EINVAL; | ||
312 | } | 294 | } |
313 | 295 | ||
314 | static unsigned int x2apic_get_apic_id(unsigned long x) | 296 | static unsigned int x2apic_get_apic_id(unsigned long x) |
@@ -362,13 +344,13 @@ static struct apic __refdata apic_x2apic_uv_x = { | |||
362 | .irq_delivery_mode = dest_Fixed, | 344 | .irq_delivery_mode = dest_Fixed, |
363 | .irq_dest_mode = 0, /* physical */ | 345 | .irq_dest_mode = 0, /* physical */ |
364 | 346 | ||
365 | .target_cpus = uv_target_cpus, | 347 | .target_cpus = online_target_cpus, |
366 | .disable_esr = 0, | 348 | .disable_esr = 0, |
367 | .dest_logical = APIC_DEST_LOGICAL, | 349 | .dest_logical = APIC_DEST_LOGICAL, |
368 | .check_apicid_used = NULL, | 350 | .check_apicid_used = NULL, |
369 | .check_apicid_present = NULL, | 351 | .check_apicid_present = NULL, |
370 | 352 | ||
371 | .vector_allocation_domain = uv_vector_allocation_domain, | 353 | .vector_allocation_domain = default_vector_allocation_domain, |
372 | .init_apic_ldr = uv_init_apic_ldr, | 354 | .init_apic_ldr = uv_init_apic_ldr, |
373 | 355 | ||
374 | .ioapic_phys_id_map = NULL, | 356 | .ioapic_phys_id_map = NULL, |
@@ -386,7 +368,6 @@ static struct apic __refdata apic_x2apic_uv_x = { | |||
386 | .set_apic_id = set_apic_id, | 368 | .set_apic_id = set_apic_id, |
387 | .apic_id_mask = 0xFFFFFFFFu, | 369 | .apic_id_mask = 0xFFFFFFFFu, |
388 | 370 | ||
389 | .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, | ||
390 | .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, | 371 | .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, |
391 | 372 | ||
392 | .send_IPI_mask = uv_send_IPI_mask, | 373 | .send_IPI_mask = uv_send_IPI_mask, |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 07b0c0db466c..d65464e43503 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -201,6 +201,8 @@ | |||
201 | * http://www.microsoft.com/whdc/archive/amp_12.mspx] | 201 | * http://www.microsoft.com/whdc/archive/amp_12.mspx] |
202 | */ | 202 | */ |
203 | 203 | ||
204 | #define pr_fmt(fmt) "apm: " fmt | ||
205 | |||
204 | #include <linux/module.h> | 206 | #include <linux/module.h> |
205 | 207 | ||
206 | #include <linux/poll.h> | 208 | #include <linux/poll.h> |
@@ -485,11 +487,11 @@ static void apm_error(char *str, int err) | |||
485 | if (error_table[i].key == err) | 487 | if (error_table[i].key == err) |
486 | break; | 488 | break; |
487 | if (i < ERROR_COUNT) | 489 | if (i < ERROR_COUNT) |
488 | printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); | 490 | pr_notice("%s: %s\n", str, error_table[i].msg); |
489 | else if (err < 0) | 491 | else if (err < 0) |
490 | printk(KERN_NOTICE "apm: %s: linux error code %i\n", str, err); | 492 | pr_notice("%s: linux error code %i\n", str, err); |
491 | else | 493 | else |
492 | printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", | 494 | pr_notice("%s: unknown error code %#2.2x\n", |
493 | str, err); | 495 | str, err); |
494 | } | 496 | } |
495 | 497 | ||
@@ -1184,7 +1186,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender) | |||
1184 | static int notified; | 1186 | static int notified; |
1185 | 1187 | ||
1186 | if (notified++ == 0) | 1188 | if (notified++ == 0) |
1187 | printk(KERN_ERR "apm: an event queue overflowed\n"); | 1189 | pr_err("an event queue overflowed\n"); |
1188 | if (++as->event_tail >= APM_MAX_EVENTS) | 1190 | if (++as->event_tail >= APM_MAX_EVENTS) |
1189 | as->event_tail = 0; | 1191 | as->event_tail = 0; |
1190 | } | 1192 | } |
@@ -1447,7 +1449,7 @@ static void apm_mainloop(void) | |||
1447 | static int check_apm_user(struct apm_user *as, const char *func) | 1449 | static int check_apm_user(struct apm_user *as, const char *func) |
1448 | { | 1450 | { |
1449 | if (as == NULL || as->magic != APM_BIOS_MAGIC) { | 1451 | if (as == NULL || as->magic != APM_BIOS_MAGIC) { |
1450 | printk(KERN_ERR "apm: %s passed bad filp\n", func); | 1452 | pr_err("%s passed bad filp\n", func); |
1451 | return 1; | 1453 | return 1; |
1452 | } | 1454 | } |
1453 | return 0; | 1455 | return 0; |
@@ -1586,7 +1588,7 @@ static int do_release(struct inode *inode, struct file *filp) | |||
1586 | as1 = as1->next) | 1588 | as1 = as1->next) |
1587 | ; | 1589 | ; |
1588 | if (as1 == NULL) | 1590 | if (as1 == NULL) |
1589 | printk(KERN_ERR "apm: filp not in user list\n"); | 1591 | pr_err("filp not in user list\n"); |
1590 | else | 1592 | else |
1591 | as1->next = as->next; | 1593 | as1->next = as->next; |
1592 | } | 1594 | } |
@@ -1600,11 +1602,9 @@ static int do_open(struct inode *inode, struct file *filp) | |||
1600 | struct apm_user *as; | 1602 | struct apm_user *as; |
1601 | 1603 | ||
1602 | as = kmalloc(sizeof(*as), GFP_KERNEL); | 1604 | as = kmalloc(sizeof(*as), GFP_KERNEL); |
1603 | if (as == NULL) { | 1605 | if (as == NULL) |
1604 | printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", | ||
1605 | sizeof(*as)); | ||
1606 | return -ENOMEM; | 1606 | return -ENOMEM; |
1607 | } | 1607 | |
1608 | as->magic = APM_BIOS_MAGIC; | 1608 | as->magic = APM_BIOS_MAGIC; |
1609 | as->event_tail = as->event_head = 0; | 1609 | as->event_tail = as->event_head = 0; |
1610 | as->suspends_pending = as->standbys_pending = 0; | 1610 | as->suspends_pending = as->standbys_pending = 0; |
@@ -2313,16 +2313,16 @@ static int __init apm_init(void) | |||
2313 | } | 2313 | } |
2314 | 2314 | ||
2315 | if (apm_info.disabled) { | 2315 | if (apm_info.disabled) { |
2316 | printk(KERN_NOTICE "apm: disabled on user request.\n"); | 2316 | pr_notice("disabled on user request.\n"); |
2317 | return -ENODEV; | 2317 | return -ENODEV; |
2318 | } | 2318 | } |
2319 | if ((num_online_cpus() > 1) && !power_off && !smp) { | 2319 | if ((num_online_cpus() > 1) && !power_off && !smp) { |
2320 | printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n"); | 2320 | pr_notice("disabled - APM is not SMP safe.\n"); |
2321 | apm_info.disabled = 1; | 2321 | apm_info.disabled = 1; |
2322 | return -ENODEV; | 2322 | return -ENODEV; |
2323 | } | 2323 | } |
2324 | if (!acpi_disabled) { | 2324 | if (!acpi_disabled) { |
2325 | printk(KERN_NOTICE "apm: overridden by ACPI.\n"); | 2325 | pr_notice("overridden by ACPI.\n"); |
2326 | apm_info.disabled = 1; | 2326 | apm_info.disabled = 1; |
2327 | return -ENODEV; | 2327 | return -ENODEV; |
2328 | } | 2328 | } |
@@ -2356,8 +2356,7 @@ static int __init apm_init(void) | |||
2356 | 2356 | ||
2357 | kapmd_task = kthread_create(apm, NULL, "kapmd"); | 2357 | kapmd_task = kthread_create(apm, NULL, "kapmd"); |
2358 | if (IS_ERR(kapmd_task)) { | 2358 | if (IS_ERR(kapmd_task)) { |
2359 | printk(KERN_ERR "apm: disabled - Unable to start kernel " | 2359 | pr_err("disabled - Unable to start kernel thread\n"); |
2360 | "thread.\n"); | ||
2361 | err = PTR_ERR(kapmd_task); | 2360 | err = PTR_ERR(kapmd_task); |
2362 | kapmd_task = NULL; | 2361 | kapmd_task = NULL; |
2363 | remove_proc_entry("apm", NULL); | 2362 | remove_proc_entry("apm", NULL); |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 6ab6aa2fdfdd..d30a6a9a0121 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp) | |||
14 | 14 | ||
15 | obj-y := intel_cacheinfo.o scattered.o topology.o | 15 | obj-y := intel_cacheinfo.o scattered.o topology.o |
16 | obj-y += proc.o capflags.o powerflags.o common.o | 16 | obj-y += proc.o capflags.o powerflags.o common.o |
17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o | 17 | obj-y += vmware.o hypervisor.o mshyperv.o |
18 | obj-y += rdrand.o | 18 | obj-y += rdrand.o |
19 | obj-y += match.o | 19 | obj-y += match.o |
20 | 20 | ||
@@ -32,7 +32,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o | |||
32 | 32 | ||
33 | ifdef CONFIG_PERF_EVENTS | 33 | ifdef CONFIG_PERF_EVENTS |
34 | obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o | 34 | obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o |
35 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o | 35 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o |
36 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o | ||
37 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o | ||
36 | endif | 38 | endif |
37 | 39 | ||
38 | obj-$(CONFIG_X86_MCE) += mcheck/ | 40 | obj-$(CONFIG_X86_MCE) += mcheck/ |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 146bb6218eec..9d92e19039f0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -19,6 +19,39 @@ | |||
19 | 19 | ||
20 | #include "cpu.h" | 20 | #include "cpu.h" |
21 | 21 | ||
22 | static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) | ||
23 | { | ||
24 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); | ||
25 | u32 gprs[8] = { 0 }; | ||
26 | int err; | ||
27 | |||
28 | WARN_ONCE((c->x86 != 0xf), "%s should only be used on K8!\n", __func__); | ||
29 | |||
30 | gprs[1] = msr; | ||
31 | gprs[7] = 0x9c5a203a; | ||
32 | |||
33 | err = rdmsr_safe_regs(gprs); | ||
34 | |||
35 | *p = gprs[0] | ((u64)gprs[2] << 32); | ||
36 | |||
37 | return err; | ||
38 | } | ||
39 | |||
40 | static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) | ||
41 | { | ||
42 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); | ||
43 | u32 gprs[8] = { 0 }; | ||
44 | |||
45 | WARN_ONCE((c->x86 != 0xf), "%s should only be used on K8!\n", __func__); | ||
46 | |||
47 | gprs[0] = (u32)val; | ||
48 | gprs[1] = msr; | ||
49 | gprs[2] = val >> 32; | ||
50 | gprs[7] = 0x9c5a203a; | ||
51 | |||
52 | return wrmsr_safe_regs(gprs); | ||
53 | } | ||
54 | |||
22 | #ifdef CONFIG_X86_32 | 55 | #ifdef CONFIG_X86_32 |
23 | /* | 56 | /* |
24 | * B step AMD K6 before B 9730xxxx have hardware bugs that can cause | 57 | * B step AMD K6 before B 9730xxxx have hardware bugs that can cause |
@@ -586,9 +619,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
586 | !cpu_has(c, X86_FEATURE_TOPOEXT)) { | 619 | !cpu_has(c, X86_FEATURE_TOPOEXT)) { |
587 | u64 val; | 620 | u64 val; |
588 | 621 | ||
589 | if (!rdmsrl_amd_safe(0xc0011005, &val)) { | 622 | if (!rdmsrl_safe(0xc0011005, &val)) { |
590 | val |= 1ULL << 54; | 623 | val |= 1ULL << 54; |
591 | wrmsrl_amd_safe(0xc0011005, val); | 624 | wrmsrl_safe(0xc0011005, val); |
592 | rdmsrl(0xc0011005, val); | 625 | rdmsrl(0xc0011005, val); |
593 | if (val & (1ULL << 54)) { | 626 | if (val & (1ULL << 54)) { |
594 | set_cpu_cap(c, X86_FEATURE_TOPOEXT); | 627 | set_cpu_cap(c, X86_FEATURE_TOPOEXT); |
@@ -679,7 +712,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
679 | err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask); | 712 | err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask); |
680 | if (err == 0) { | 713 | if (err == 0) { |
681 | mask |= (1 << 10); | 714 | mask |= (1 << 10); |
682 | checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask); | 715 | wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask); |
683 | } | 716 | } |
684 | } | 717 | } |
685 | 718 | ||
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 46674fbb62ba..c97bb7b5a9f8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -55,8 +55,8 @@ static void __init check_fpu(void) | |||
55 | 55 | ||
56 | if (!boot_cpu_data.hard_math) { | 56 | if (!boot_cpu_data.hard_math) { |
57 | #ifndef CONFIG_MATH_EMULATION | 57 | #ifndef CONFIG_MATH_EMULATION |
58 | printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); | 58 | pr_emerg("No coprocessor found and no math emulation present\n"); |
59 | printk(KERN_EMERG "Giving up.\n"); | 59 | pr_emerg("Giving up\n"); |
60 | for (;;) ; | 60 | for (;;) ; |
61 | #endif | 61 | #endif |
62 | return; | 62 | return; |
@@ -86,7 +86,7 @@ static void __init check_fpu(void) | |||
86 | 86 | ||
87 | boot_cpu_data.fdiv_bug = fdiv_bug; | 87 | boot_cpu_data.fdiv_bug = fdiv_bug; |
88 | if (boot_cpu_data.fdiv_bug) | 88 | if (boot_cpu_data.fdiv_bug) |
89 | printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n"); | 89 | pr_warn("Hmm, FPU with FDIV bug\n"); |
90 | } | 90 | } |
91 | 91 | ||
92 | static void __init check_hlt(void) | 92 | static void __init check_hlt(void) |
@@ -94,16 +94,16 @@ static void __init check_hlt(void) | |||
94 | if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) | 94 | if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) |
95 | return; | 95 | return; |
96 | 96 | ||
97 | printk(KERN_INFO "Checking 'hlt' instruction... "); | 97 | pr_info("Checking 'hlt' instruction... "); |
98 | if (!boot_cpu_data.hlt_works_ok) { | 98 | if (!boot_cpu_data.hlt_works_ok) { |
99 | printk("disabled\n"); | 99 | pr_cont("disabled\n"); |
100 | return; | 100 | return; |
101 | } | 101 | } |
102 | halt(); | 102 | halt(); |
103 | halt(); | 103 | halt(); |
104 | halt(); | 104 | halt(); |
105 | halt(); | 105 | halt(); |
106 | printk(KERN_CONT "OK.\n"); | 106 | pr_cont("OK\n"); |
107 | } | 107 | } |
108 | 108 | ||
109 | /* | 109 | /* |
@@ -116,7 +116,7 @@ static void __init check_popad(void) | |||
116 | #ifndef CONFIG_X86_POPAD_OK | 116 | #ifndef CONFIG_X86_POPAD_OK |
117 | int res, inp = (int) &res; | 117 | int res, inp = (int) &res; |
118 | 118 | ||
119 | printk(KERN_INFO "Checking for popad bug... "); | 119 | pr_info("Checking for popad bug... "); |
120 | __asm__ __volatile__( | 120 | __asm__ __volatile__( |
121 | "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " | 121 | "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " |
122 | : "=&a" (res) | 122 | : "=&a" (res) |
@@ -127,9 +127,9 @@ static void __init check_popad(void) | |||
127 | * CPU hard. Too bad. | 127 | * CPU hard. Too bad. |
128 | */ | 128 | */ |
129 | if (res != 12345678) | 129 | if (res != 12345678) |
130 | printk(KERN_CONT "Buggy.\n"); | 130 | pr_cont("Buggy\n"); |
131 | else | 131 | else |
132 | printk(KERN_CONT "OK.\n"); | 132 | pr_cont("OK\n"); |
133 | #endif | 133 | #endif |
134 | } | 134 | } |
135 | 135 | ||
@@ -161,7 +161,7 @@ void __init check_bugs(void) | |||
161 | { | 161 | { |
162 | identify_boot_cpu(); | 162 | identify_boot_cpu(); |
163 | #ifndef CONFIG_SMP | 163 | #ifndef CONFIG_SMP |
164 | printk(KERN_INFO "CPU: "); | 164 | pr_info("CPU: "); |
165 | print_cpu_info(&boot_cpu_data); | 165 | print_cpu_info(&boot_cpu_data); |
166 | #endif | 166 | #endif |
167 | check_config(); | 167 | check_config(); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6b9333b429ba..46d8786d655e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -452,6 +452,35 @@ void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) | |||
452 | c->x86_cache_size = l2size; | 452 | c->x86_cache_size = l2size; |
453 | } | 453 | } |
454 | 454 | ||
455 | u16 __read_mostly tlb_lli_4k[NR_INFO]; | ||
456 | u16 __read_mostly tlb_lli_2m[NR_INFO]; | ||
457 | u16 __read_mostly tlb_lli_4m[NR_INFO]; | ||
458 | u16 __read_mostly tlb_lld_4k[NR_INFO]; | ||
459 | u16 __read_mostly tlb_lld_2m[NR_INFO]; | ||
460 | u16 __read_mostly tlb_lld_4m[NR_INFO]; | ||
461 | |||
462 | /* | ||
463 | * tlb_flushall_shift shows the balance point in replacing cr3 write | ||
464 | * with multiple 'invlpg'. It will do this replacement when | ||
465 | * flush_tlb_lines <= active_lines/2^tlb_flushall_shift. | ||
466 | * If tlb_flushall_shift is -1, means the replacement will be disabled. | ||
467 | */ | ||
468 | s8 __read_mostly tlb_flushall_shift = -1; | ||
469 | |||
470 | void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) | ||
471 | { | ||
472 | if (this_cpu->c_detect_tlb) | ||
473 | this_cpu->c_detect_tlb(c); | ||
474 | |||
475 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | ||
476 | "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | ||
477 | "tlb_flushall_shift is 0x%x\n", | ||
478 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], | ||
479 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], | ||
480 | tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], | ||
481 | tlb_flushall_shift); | ||
482 | } | ||
483 | |||
455 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) | 484 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
456 | { | 485 | { |
457 | #ifdef CONFIG_X86_HT | 486 | #ifdef CONFIG_X86_HT |
@@ -911,6 +940,8 @@ void __init identify_boot_cpu(void) | |||
911 | #else | 940 | #else |
912 | vgetcpu_set_mode(); | 941 | vgetcpu_set_mode(); |
913 | #endif | 942 | #endif |
943 | if (boot_cpu_data.cpuid_level >= 2) | ||
944 | cpu_detect_tlb(&boot_cpu_data); | ||
914 | } | 945 | } |
915 | 946 | ||
916 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 947 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
@@ -947,7 +978,7 @@ static void __cpuinit __print_cpu_msr(void) | |||
947 | index_max = msr_range_array[i].max; | 978 | index_max = msr_range_array[i].max; |
948 | 979 | ||
949 | for (index = index_min; index < index_max; index++) { | 980 | for (index = index_min; index < index_max; index++) { |
950 | if (rdmsrl_amd_safe(index, &val)) | 981 | if (rdmsrl_safe(index, &val)) |
951 | continue; | 982 | continue; |
952 | printk(KERN_INFO " MSR%08x: %016llx\n", index, val); | 983 | printk(KERN_INFO " MSR%08x: %016llx\n", index, val); |
953 | } | 984 | } |
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 8bacc7826fb3..4041c24ae7db 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
@@ -20,10 +20,19 @@ struct cpu_dev { | |||
20 | void (*c_bsp_init)(struct cpuinfo_x86 *); | 20 | void (*c_bsp_init)(struct cpuinfo_x86 *); |
21 | void (*c_init)(struct cpuinfo_x86 *); | 21 | void (*c_init)(struct cpuinfo_x86 *); |
22 | void (*c_identify)(struct cpuinfo_x86 *); | 22 | void (*c_identify)(struct cpuinfo_x86 *); |
23 | void (*c_detect_tlb)(struct cpuinfo_x86 *); | ||
23 | unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); | 24 | unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); |
24 | int c_x86_vendor; | 25 | int c_x86_vendor; |
25 | }; | 26 | }; |
26 | 27 | ||
28 | struct _tlb_table { | ||
29 | unsigned char descriptor; | ||
30 | char tlb_type; | ||
31 | unsigned int entries; | ||
32 | /* unsigned int ways; */ | ||
33 | char info[128]; | ||
34 | }; | ||
35 | |||
27 | #define cpu_dev_register(cpu_devX) \ | 36 | #define cpu_dev_register(cpu_devX) \ |
28 | static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ | 37 | static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ |
29 | __attribute__((__section__(".x86_cpu_dev.init"))) = \ | 38 | __attribute__((__section__(".x86_cpu_dev.init"))) = \ |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 755f64fb0743..a8f8fa9769d6 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -37,6 +37,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = | |||
37 | #endif | 37 | #endif |
38 | &x86_hyper_vmware, | 38 | &x86_hyper_vmware, |
39 | &x86_hyper_ms_hyperv, | 39 | &x86_hyper_ms_hyperv, |
40 | #ifdef CONFIG_KVM_GUEST | ||
41 | &x86_hyper_kvm, | ||
42 | #endif | ||
40 | }; | 43 | }; |
41 | 44 | ||
42 | const struct hypervisor_x86 *x86_hyper; | 45 | const struct hypervisor_x86 *x86_hyper; |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 3e6ff6cbf42a..0a4ce2980a5a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -491,6 +491,181 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i | |||
491 | } | 491 | } |
492 | #endif | 492 | #endif |
493 | 493 | ||
494 | #define TLB_INST_4K 0x01 | ||
495 | #define TLB_INST_4M 0x02 | ||
496 | #define TLB_INST_2M_4M 0x03 | ||
497 | |||
498 | #define TLB_INST_ALL 0x05 | ||
499 | #define TLB_INST_1G 0x06 | ||
500 | |||
501 | #define TLB_DATA_4K 0x11 | ||
502 | #define TLB_DATA_4M 0x12 | ||
503 | #define TLB_DATA_2M_4M 0x13 | ||
504 | #define TLB_DATA_4K_4M 0x14 | ||
505 | |||
506 | #define TLB_DATA_1G 0x16 | ||
507 | |||
508 | #define TLB_DATA0_4K 0x21 | ||
509 | #define TLB_DATA0_4M 0x22 | ||
510 | #define TLB_DATA0_2M_4M 0x23 | ||
511 | |||
512 | #define STLB_4K 0x41 | ||
513 | |||
514 | static const struct _tlb_table intel_tlb_table[] __cpuinitconst = { | ||
515 | { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, | ||
516 | { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" }, | ||
517 | { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" }, | ||
518 | { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" }, | ||
519 | { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" }, | ||
520 | { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" }, | ||
521 | { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages */" }, | ||
522 | { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, | ||
523 | { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, | ||
524 | { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, | ||
525 | { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, | ||
526 | { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" }, | ||
527 | { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" }, | ||
528 | { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" }, | ||
529 | { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" }, | ||
530 | { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" }, | ||
531 | { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, | ||
532 | { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, | ||
533 | { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, | ||
534 | { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, | ||
535 | { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, | ||
536 | { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, | ||
537 | { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, | ||
538 | { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, | ||
539 | { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, | ||
540 | { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, | ||
541 | { 0x00, 0, 0 } | ||
542 | }; | ||
543 | |||
544 | static void __cpuinit intel_tlb_lookup(const unsigned char desc) | ||
545 | { | ||
546 | unsigned char k; | ||
547 | if (desc == 0) | ||
548 | return; | ||
549 | |||
550 | /* look up this descriptor in the table */ | ||
551 | for (k = 0; intel_tlb_table[k].descriptor != desc && \ | ||
552 | intel_tlb_table[k].descriptor != 0; k++) | ||
553 | ; | ||
554 | |||
555 | if (intel_tlb_table[k].tlb_type == 0) | ||
556 | return; | ||
557 | |||
558 | switch (intel_tlb_table[k].tlb_type) { | ||
559 | case STLB_4K: | ||
560 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
561 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
562 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
563 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
564 | break; | ||
565 | case TLB_INST_ALL: | ||
566 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
567 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
568 | if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) | ||
569 | tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; | ||
570 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
571 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
572 | break; | ||
573 | case TLB_INST_4K: | ||
574 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
575 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
576 | break; | ||
577 | case TLB_INST_4M: | ||
578 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
579 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
580 | break; | ||
581 | case TLB_INST_2M_4M: | ||
582 | if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) | ||
583 | tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; | ||
584 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
585 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
586 | break; | ||
587 | case TLB_DATA_4K: | ||
588 | case TLB_DATA0_4K: | ||
589 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
590 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
591 | break; | ||
592 | case TLB_DATA_4M: | ||
593 | case TLB_DATA0_4M: | ||
594 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
595 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
596 | break; | ||
597 | case TLB_DATA_2M_4M: | ||
598 | case TLB_DATA0_2M_4M: | ||
599 | if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) | ||
600 | tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; | ||
601 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
602 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
603 | break; | ||
604 | case TLB_DATA_4K_4M: | ||
605 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
606 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
607 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
608 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
609 | break; | ||
610 | } | ||
611 | } | ||
612 | |||
613 | static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) | ||
614 | { | ||
615 | if (!cpu_has_invlpg) { | ||
616 | tlb_flushall_shift = -1; | ||
617 | return; | ||
618 | } | ||
619 | switch ((c->x86 << 8) + c->x86_model) { | ||
620 | case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | ||
621 | case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | ||
622 | case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | ||
623 | case 0x61d: /* six-core 45 nm xeon "Dunnington" */ | ||
624 | tlb_flushall_shift = -1; | ||
625 | break; | ||
626 | case 0x61a: /* 45 nm nehalem, "Bloomfield" */ | ||
627 | case 0x61e: /* 45 nm nehalem, "Lynnfield" */ | ||
628 | case 0x625: /* 32 nm nehalem, "Clarkdale" */ | ||
629 | case 0x62c: /* 32 nm nehalem, "Gulftown" */ | ||
630 | case 0x62e: /* 45 nm nehalem-ex, "Beckton" */ | ||
631 | case 0x62f: /* 32 nm Xeon E7 */ | ||
632 | tlb_flushall_shift = 6; | ||
633 | break; | ||
634 | case 0x62a: /* SandyBridge */ | ||
635 | case 0x62d: /* SandyBridge, "Romely-EP" */ | ||
636 | tlb_flushall_shift = 5; | ||
637 | break; | ||
638 | case 0x63a: /* Ivybridge */ | ||
639 | tlb_flushall_shift = 1; | ||
640 | break; | ||
641 | default: | ||
642 | tlb_flushall_shift = 6; | ||
643 | } | ||
644 | } | ||
645 | |||
646 | static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c) | ||
647 | { | ||
648 | int i, j, n; | ||
649 | unsigned int regs[4]; | ||
650 | unsigned char *desc = (unsigned char *)regs; | ||
651 | /* Number of times to iterate */ | ||
652 | n = cpuid_eax(2) & 0xFF; | ||
653 | |||
654 | for (i = 0 ; i < n ; i++) { | ||
655 | cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); | ||
656 | |||
657 | /* If bit 31 is set, this is an unknown format */ | ||
658 | for (j = 0 ; j < 3 ; j++) | ||
659 | if (regs[j] & (1 << 31)) | ||
660 | regs[j] = 0; | ||
661 | |||
662 | /* Byte 0 is level count, not a descriptor */ | ||
663 | for (j = 1 ; j < 16 ; j++) | ||
664 | intel_tlb_lookup(desc[j]); | ||
665 | } | ||
666 | intel_tlb_flushall_shift_set(c); | ||
667 | } | ||
668 | |||
494 | static const struct cpu_dev __cpuinitconst intel_cpu_dev = { | 669 | static const struct cpu_dev __cpuinitconst intel_cpu_dev = { |
495 | .c_vendor = "Intel", | 670 | .c_vendor = "Intel", |
496 | .c_ident = { "GenuineIntel" }, | 671 | .c_ident = { "GenuineIntel" }, |
@@ -546,6 +721,7 @@ static const struct cpu_dev __cpuinitconst intel_cpu_dev = { | |||
546 | }, | 721 | }, |
547 | .c_size_cache = intel_size_cache, | 722 | .c_size_cache = intel_size_cache, |
548 | #endif | 723 | #endif |
724 | .c_detect_tlb = intel_detect_tlb, | ||
549 | .c_early_init = early_init_intel, | 725 | .c_early_init = early_init_intel, |
550 | .c_init = init_intel, | 726 | .c_init = init_intel, |
551 | .c_x86_vendor = X86_VENDOR_INTEL, | 727 | .c_x86_vendor = X86_VENDOR_INTEL, |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index da27c5d2168a..5e095f873e3e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -7,6 +7,9 @@ | |||
7 | * Copyright 2008 Intel Corporation | 7 | * Copyright 2008 Intel Corporation |
8 | * Author: Andi Kleen | 8 | * Author: Andi Kleen |
9 | */ | 9 | */ |
10 | |||
11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
12 | |||
10 | #include <linux/thread_info.h> | 13 | #include <linux/thread_info.h> |
11 | #include <linux/capability.h> | 14 | #include <linux/capability.h> |
12 | #include <linux/miscdevice.h> | 15 | #include <linux/miscdevice.h> |
@@ -57,8 +60,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); | |||
57 | 60 | ||
58 | int mce_disabled __read_mostly; | 61 | int mce_disabled __read_mostly; |
59 | 62 | ||
60 | #define MISC_MCELOG_MINOR 227 | ||
61 | |||
62 | #define SPINUNIT 100 /* 100ns */ | 63 | #define SPINUNIT 100 /* 100ns */ |
63 | 64 | ||
64 | atomic_t mce_entry; | 65 | atomic_t mce_entry; |
@@ -210,7 +211,7 @@ static void drain_mcelog_buffer(void) | |||
210 | cpu_relax(); | 211 | cpu_relax(); |
211 | 212 | ||
212 | if (!m->finished && retries >= 4) { | 213 | if (!m->finished && retries >= 4) { |
213 | pr_err("MCE: skipping error being logged currently!\n"); | 214 | pr_err("skipping error being logged currently!\n"); |
214 | break; | 215 | break; |
215 | } | 216 | } |
216 | } | 217 | } |
@@ -1167,8 +1168,9 @@ int memory_failure(unsigned long pfn, int vector, int flags) | |||
1167 | { | 1168 | { |
1168 | /* mce_severity() should not hand us an ACTION_REQUIRED error */ | 1169 | /* mce_severity() should not hand us an ACTION_REQUIRED error */ |
1169 | BUG_ON(flags & MF_ACTION_REQUIRED); | 1170 | BUG_ON(flags & MF_ACTION_REQUIRED); |
1170 | printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n" | 1171 | pr_err("Uncorrected memory error in page 0x%lx ignored\n" |
1171 | "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn); | 1172 | "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", |
1173 | pfn); | ||
1172 | 1174 | ||
1173 | return 0; | 1175 | return 0; |
1174 | } | 1176 | } |
@@ -1186,6 +1188,7 @@ void mce_notify_process(void) | |||
1186 | { | 1188 | { |
1187 | unsigned long pfn; | 1189 | unsigned long pfn; |
1188 | struct mce_info *mi = mce_find_info(); | 1190 | struct mce_info *mi = mce_find_info(); |
1191 | int flags = MF_ACTION_REQUIRED; | ||
1189 | 1192 | ||
1190 | if (!mi) | 1193 | if (!mi) |
1191 | mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); | 1194 | mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); |
@@ -1200,8 +1203,9 @@ void mce_notify_process(void) | |||
1200 | * doomed. We still need to mark the page as poisoned and alert any | 1203 | * doomed. We still need to mark the page as poisoned and alert any |
1201 | * other users of the page. | 1204 | * other users of the page. |
1202 | */ | 1205 | */ |
1203 | if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 || | 1206 | if (!mi->restartable) |
1204 | mi->restartable == 0) { | 1207 | flags |= MF_MUST_KILL; |
1208 | if (memory_failure(pfn, MCE_VECTOR, flags) < 0) { | ||
1205 | pr_err("Memory error not recovered"); | 1209 | pr_err("Memory error not recovered"); |
1206 | force_sig(SIGBUS, current); | 1210 | force_sig(SIGBUS, current); |
1207 | } | 1211 | } |
@@ -1358,11 +1362,10 @@ static int __cpuinit __mcheck_cpu_cap_init(void) | |||
1358 | 1362 | ||
1359 | b = cap & MCG_BANKCNT_MASK; | 1363 | b = cap & MCG_BANKCNT_MASK; |
1360 | if (!banks) | 1364 | if (!banks) |
1361 | printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); | 1365 | pr_info("CPU supports %d MCE banks\n", b); |
1362 | 1366 | ||
1363 | if (b > MAX_NR_BANKS) { | 1367 | if (b > MAX_NR_BANKS) { |
1364 | printk(KERN_WARNING | 1368 | pr_warn("Using only %u machine check banks out of %u\n", |
1365 | "MCE: Using only %u machine check banks out of %u\n", | ||
1366 | MAX_NR_BANKS, b); | 1369 | MAX_NR_BANKS, b); |
1367 | b = MAX_NR_BANKS; | 1370 | b = MAX_NR_BANKS; |
1368 | } | 1371 | } |
@@ -1419,7 +1422,7 @@ static void __mcheck_cpu_init_generic(void) | |||
1419 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | 1422 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) |
1420 | { | 1423 | { |
1421 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { | 1424 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { |
1422 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); | 1425 | pr_info("unknown CPU type - not enabling MCE support\n"); |
1423 | return -EOPNOTSUPP; | 1426 | return -EOPNOTSUPP; |
1424 | } | 1427 | } |
1425 | 1428 | ||
@@ -1574,7 +1577,7 @@ static void __mcheck_cpu_init_timer(void) | |||
1574 | /* Handle unconfigured int18 (should never happen) */ | 1577 | /* Handle unconfigured int18 (should never happen) */ |
1575 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | 1578 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) |
1576 | { | 1579 | { |
1577 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", | 1580 | pr_err("CPU#%d: Unexpected int18 (Machine Check)\n", |
1578 | smp_processor_id()); | 1581 | smp_processor_id()); |
1579 | } | 1582 | } |
1580 | 1583 | ||
@@ -1893,8 +1896,7 @@ static int __init mcheck_enable(char *str) | |||
1893 | get_option(&str, &monarch_timeout); | 1896 | get_option(&str, &monarch_timeout); |
1894 | } | 1897 | } |
1895 | } else { | 1898 | } else { |
1896 | printk(KERN_INFO "mce argument %s ignored. Please use /sys\n", | 1899 | pr_info("mce argument %s ignored. Please use /sys\n", str); |
1897 | str); | ||
1898 | return 0; | 1900 | return 0; |
1899 | } | 1901 | } |
1900 | return 1; | 1902 | return 1; |
@@ -2342,7 +2344,7 @@ static __init int mcheck_init_device(void) | |||
2342 | 2344 | ||
2343 | return err; | 2345 | return err; |
2344 | } | 2346 | } |
2345 | device_initcall(mcheck_init_device); | 2347 | device_initcall_sync(mcheck_init_device); |
2346 | 2348 | ||
2347 | /* | 2349 | /* |
2348 | * Old style boot options parsing. Only for compatibility. | 2350 | * Old style boot options parsing. Only for compatibility. |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f4873a64f46d..c4e916d77378 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -1,15 +1,17 @@ | |||
1 | /* | 1 | /* |
2 | * (c) 2005, 2006 Advanced Micro Devices, Inc. | 2 | * (c) 2005-2012 Advanced Micro Devices, Inc. |
3 | * Your use of this code is subject to the terms and conditions of the | 3 | * Your use of this code is subject to the terms and conditions of the |
4 | * GNU general public license version 2. See "COPYING" or | 4 | * GNU general public license version 2. See "COPYING" or |
5 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
6 | * | 6 | * |
7 | * Written by Jacob Shin - AMD, Inc. | 7 | * Written by Jacob Shin - AMD, Inc. |
8 | * | 8 | * |
9 | * Support : jacob.shin@amd.com | 9 | * Support: borislav.petkov@amd.com |
10 | * | 10 | * |
11 | * April 2006 | 11 | * April 2006 |
12 | * - added support for AMD Family 0x10 processors | 12 | * - added support for AMD Family 0x10 processors |
13 | * May 2012 | ||
14 | * - major scrubbing | ||
13 | * | 15 | * |
14 | * All MC4_MISCi registers are shared between multi-cores | 16 | * All MC4_MISCi registers are shared between multi-cores |
15 | */ | 17 | */ |
@@ -25,6 +27,7 @@ | |||
25 | #include <linux/cpu.h> | 27 | #include <linux/cpu.h> |
26 | #include <linux/smp.h> | 28 | #include <linux/smp.h> |
27 | 29 | ||
30 | #include <asm/amd_nb.h> | ||
28 | #include <asm/apic.h> | 31 | #include <asm/apic.h> |
29 | #include <asm/idle.h> | 32 | #include <asm/idle.h> |
30 | #include <asm/mce.h> | 33 | #include <asm/mce.h> |
@@ -45,23 +48,15 @@ | |||
45 | #define MASK_BLKPTR_LO 0xFF000000 | 48 | #define MASK_BLKPTR_LO 0xFF000000 |
46 | #define MCG_XBLK_ADDR 0xC0000400 | 49 | #define MCG_XBLK_ADDR 0xC0000400 |
47 | 50 | ||
48 | struct threshold_block { | 51 | static const char * const th_names[] = { |
49 | unsigned int block; | 52 | "load_store", |
50 | unsigned int bank; | 53 | "insn_fetch", |
51 | unsigned int cpu; | 54 | "combined_unit", |
52 | u32 address; | 55 | "", |
53 | u16 interrupt_enable; | 56 | "northbridge", |
54 | bool interrupt_capable; | 57 | "execution_unit", |
55 | u16 threshold_limit; | ||
56 | struct kobject kobj; | ||
57 | struct list_head miscj; | ||
58 | }; | 58 | }; |
59 | 59 | ||
60 | struct threshold_bank { | ||
61 | struct kobject *kobj; | ||
62 | struct threshold_block *blocks; | ||
63 | cpumask_var_t cpus; | ||
64 | }; | ||
65 | static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); | 60 | static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); |
66 | 61 | ||
67 | static unsigned char shared_bank[NR_BANKS] = { | 62 | static unsigned char shared_bank[NR_BANKS] = { |
@@ -84,6 +79,26 @@ struct thresh_restart { | |||
84 | u16 old_limit; | 79 | u16 old_limit; |
85 | }; | 80 | }; |
86 | 81 | ||
82 | static const char * const bank4_names(struct threshold_block *b) | ||
83 | { | ||
84 | switch (b->address) { | ||
85 | /* MSR4_MISC0 */ | ||
86 | case 0x00000413: | ||
87 | return "dram"; | ||
88 | |||
89 | case 0xc0000408: | ||
90 | return "ht_links"; | ||
91 | |||
92 | case 0xc0000409: | ||
93 | return "l3_cache"; | ||
94 | |||
95 | default: | ||
96 | WARN(1, "Funny MSR: 0x%08x\n", b->address); | ||
97 | return ""; | ||
98 | } | ||
99 | }; | ||
100 | |||
101 | |||
87 | static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) | 102 | static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) |
88 | { | 103 | { |
89 | /* | 104 | /* |
@@ -224,8 +239,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
224 | 239 | ||
225 | if (!block) | 240 | if (!block) |
226 | per_cpu(bank_map, cpu) |= (1 << bank); | 241 | per_cpu(bank_map, cpu) |= (1 << bank); |
227 | if (shared_bank[bank] && c->cpu_core_id) | ||
228 | break; | ||
229 | 242 | ||
230 | memset(&b, 0, sizeof(b)); | 243 | memset(&b, 0, sizeof(b)); |
231 | b.cpu = cpu; | 244 | b.cpu = cpu; |
@@ -326,7 +339,7 @@ struct threshold_attr { | |||
326 | #define SHOW_FIELDS(name) \ | 339 | #define SHOW_FIELDS(name) \ |
327 | static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ | 340 | static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ |
328 | { \ | 341 | { \ |
329 | return sprintf(buf, "%lx\n", (unsigned long) b->name); \ | 342 | return sprintf(buf, "%lu\n", (unsigned long) b->name); \ |
330 | } | 343 | } |
331 | SHOW_FIELDS(interrupt_enable) | 344 | SHOW_FIELDS(interrupt_enable) |
332 | SHOW_FIELDS(threshold_limit) | 345 | SHOW_FIELDS(threshold_limit) |
@@ -377,38 +390,21 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) | |||
377 | return size; | 390 | return size; |
378 | } | 391 | } |
379 | 392 | ||
380 | struct threshold_block_cross_cpu { | ||
381 | struct threshold_block *tb; | ||
382 | long retval; | ||
383 | }; | ||
384 | |||
385 | static void local_error_count_handler(void *_tbcc) | ||
386 | { | ||
387 | struct threshold_block_cross_cpu *tbcc = _tbcc; | ||
388 | struct threshold_block *b = tbcc->tb; | ||
389 | u32 low, high; | ||
390 | |||
391 | rdmsr(b->address, low, high); | ||
392 | tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); | ||
393 | } | ||
394 | |||
395 | static ssize_t show_error_count(struct threshold_block *b, char *buf) | 393 | static ssize_t show_error_count(struct threshold_block *b, char *buf) |
396 | { | 394 | { |
397 | struct threshold_block_cross_cpu tbcc = { .tb = b, }; | 395 | u32 lo, hi; |
398 | 396 | ||
399 | smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1); | 397 | rdmsr_on_cpu(b->cpu, b->address, &lo, &hi); |
400 | return sprintf(buf, "%lx\n", tbcc.retval); | ||
401 | } | ||
402 | |||
403 | static ssize_t store_error_count(struct threshold_block *b, | ||
404 | const char *buf, size_t count) | ||
405 | { | ||
406 | struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; | ||
407 | 398 | ||
408 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); | 399 | return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) - |
409 | return 1; | 400 | (THRESHOLD_MAX - b->threshold_limit))); |
410 | } | 401 | } |
411 | 402 | ||
403 | static struct threshold_attr error_count = { | ||
404 | .attr = {.name = __stringify(error_count), .mode = 0444 }, | ||
405 | .show = show_error_count, | ||
406 | }; | ||
407 | |||
412 | #define RW_ATTR(val) \ | 408 | #define RW_ATTR(val) \ |
413 | static struct threshold_attr val = { \ | 409 | static struct threshold_attr val = { \ |
414 | .attr = {.name = __stringify(val), .mode = 0644 }, \ | 410 | .attr = {.name = __stringify(val), .mode = 0644 }, \ |
@@ -418,7 +414,6 @@ static struct threshold_attr val = { \ | |||
418 | 414 | ||
419 | RW_ATTR(interrupt_enable); | 415 | RW_ATTR(interrupt_enable); |
420 | RW_ATTR(threshold_limit); | 416 | RW_ATTR(threshold_limit); |
421 | RW_ATTR(error_count); | ||
422 | 417 | ||
423 | static struct attribute *default_attrs[] = { | 418 | static struct attribute *default_attrs[] = { |
424 | &threshold_limit.attr, | 419 | &threshold_limit.attr, |
@@ -517,7 +512,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | |||
517 | 512 | ||
518 | err = kobject_init_and_add(&b->kobj, &threshold_ktype, | 513 | err = kobject_init_and_add(&b->kobj, &threshold_ktype, |
519 | per_cpu(threshold_banks, cpu)[bank]->kobj, | 514 | per_cpu(threshold_banks, cpu)[bank]->kobj, |
520 | "misc%i", block); | 515 | (bank == 4 ? bank4_names(b) : th_names[bank])); |
521 | if (err) | 516 | if (err) |
522 | goto out_free; | 517 | goto out_free; |
523 | recurse: | 518 | recurse: |
@@ -548,98 +543,91 @@ out_free: | |||
548 | return err; | 543 | return err; |
549 | } | 544 | } |
550 | 545 | ||
551 | static __cpuinit long | 546 | static __cpuinit int __threshold_add_blocks(struct threshold_bank *b) |
552 | local_allocate_threshold_blocks(int cpu, unsigned int bank) | ||
553 | { | 547 | { |
554 | return allocate_threshold_blocks(cpu, bank, 0, | 548 | struct list_head *head = &b->blocks->miscj; |
555 | MSR_IA32_MC0_MISC + bank * 4); | 549 | struct threshold_block *pos = NULL; |
550 | struct threshold_block *tmp = NULL; | ||
551 | int err = 0; | ||
552 | |||
553 | err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name); | ||
554 | if (err) | ||
555 | return err; | ||
556 | |||
557 | list_for_each_entry_safe(pos, tmp, head, miscj) { | ||
558 | |||
559 | err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name); | ||
560 | if (err) { | ||
561 | list_for_each_entry_safe_reverse(pos, tmp, head, miscj) | ||
562 | kobject_del(&pos->kobj); | ||
563 | |||
564 | return err; | ||
565 | } | ||
566 | } | ||
567 | return err; | ||
556 | } | 568 | } |
557 | 569 | ||
558 | /* symlinks sibling shared banks to first core. first core owns dir/files. */ | ||
559 | static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | 570 | static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) |
560 | { | 571 | { |
561 | int i, err = 0; | ||
562 | struct threshold_bank *b = NULL; | ||
563 | struct device *dev = per_cpu(mce_device, cpu); | 572 | struct device *dev = per_cpu(mce_device, cpu); |
564 | char name[32]; | 573 | struct amd_northbridge *nb = NULL; |
574 | struct threshold_bank *b = NULL; | ||
575 | const char *name = th_names[bank]; | ||
576 | int err = 0; | ||
565 | 577 | ||
566 | sprintf(name, "threshold_bank%i", bank); | 578 | if (shared_bank[bank]) { |
567 | 579 | ||
568 | #ifdef CONFIG_SMP | 580 | nb = node_to_amd_nb(amd_get_nb_id(cpu)); |
569 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ | 581 | WARN_ON(!nb); |
570 | i = cpumask_first(cpu_llc_shared_mask(cpu)); | ||
571 | 582 | ||
572 | /* first core not up yet */ | 583 | /* threshold descriptor already initialized on this node? */ |
573 | if (cpu_data(i).cpu_core_id) | 584 | if (nb->bank4) { |
574 | goto out; | 585 | /* yes, use it */ |
586 | b = nb->bank4; | ||
587 | err = kobject_add(b->kobj, &dev->kobj, name); | ||
588 | if (err) | ||
589 | goto out; | ||
575 | 590 | ||
576 | /* already linked */ | 591 | per_cpu(threshold_banks, cpu)[bank] = b; |
577 | if (per_cpu(threshold_banks, cpu)[bank]) | 592 | atomic_inc(&b->cpus); |
578 | goto out; | ||
579 | 593 | ||
580 | b = per_cpu(threshold_banks, i)[bank]; | 594 | err = __threshold_add_blocks(b); |
581 | 595 | ||
582 | if (!b) | ||
583 | goto out; | 596 | goto out; |
584 | 597 | } | |
585 | err = sysfs_create_link(&dev->kobj, b->kobj, name); | ||
586 | if (err) | ||
587 | goto out; | ||
588 | |||
589 | cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu)); | ||
590 | per_cpu(threshold_banks, cpu)[bank] = b; | ||
591 | |||
592 | goto out; | ||
593 | } | 598 | } |
594 | #endif | ||
595 | 599 | ||
596 | b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); | 600 | b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); |
597 | if (!b) { | 601 | if (!b) { |
598 | err = -ENOMEM; | 602 | err = -ENOMEM; |
599 | goto out; | 603 | goto out; |
600 | } | 604 | } |
601 | if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) { | ||
602 | kfree(b); | ||
603 | err = -ENOMEM; | ||
604 | goto out; | ||
605 | } | ||
606 | 605 | ||
607 | b->kobj = kobject_create_and_add(name, &dev->kobj); | 606 | b->kobj = kobject_create_and_add(name, &dev->kobj); |
608 | if (!b->kobj) | 607 | if (!b->kobj) { |
608 | err = -EINVAL; | ||
609 | goto out_free; | 609 | goto out_free; |
610 | 610 | } | |
611 | #ifndef CONFIG_SMP | ||
612 | cpumask_setall(b->cpus); | ||
613 | #else | ||
614 | cpumask_set_cpu(cpu, b->cpus); | ||
615 | #endif | ||
616 | 611 | ||
617 | per_cpu(threshold_banks, cpu)[bank] = b; | 612 | per_cpu(threshold_banks, cpu)[bank] = b; |
618 | 613 | ||
619 | err = local_allocate_threshold_blocks(cpu, bank); | 614 | if (shared_bank[bank]) { |
620 | if (err) | 615 | atomic_set(&b->cpus, 1); |
621 | goto out_free; | ||
622 | |||
623 | for_each_cpu(i, b->cpus) { | ||
624 | if (i == cpu) | ||
625 | continue; | ||
626 | |||
627 | dev = per_cpu(mce_device, i); | ||
628 | if (dev) | ||
629 | err = sysfs_create_link(&dev->kobj,b->kobj, name); | ||
630 | if (err) | ||
631 | goto out; | ||
632 | 616 | ||
633 | per_cpu(threshold_banks, i)[bank] = b; | 617 | /* nb is already initialized, see above */ |
618 | WARN_ON(nb->bank4); | ||
619 | nb->bank4 = b; | ||
634 | } | 620 | } |
635 | 621 | ||
636 | goto out; | 622 | err = allocate_threshold_blocks(cpu, bank, 0, |
623 | MSR_IA32_MC0_MISC + bank * 4); | ||
624 | if (!err) | ||
625 | goto out; | ||
637 | 626 | ||
638 | out_free: | 627 | out_free: |
639 | per_cpu(threshold_banks, cpu)[bank] = NULL; | ||
640 | free_cpumask_var(b->cpus); | ||
641 | kfree(b); | 628 | kfree(b); |
642 | out: | 629 | |
630 | out: | ||
643 | return err; | 631 | return err; |
644 | } | 632 | } |
645 | 633 | ||
@@ -660,12 +648,6 @@ static __cpuinit int threshold_create_device(unsigned int cpu) | |||
660 | return err; | 648 | return err; |
661 | } | 649 | } |
662 | 650 | ||
663 | /* | ||
664 | * let's be hotplug friendly. | ||
665 | * in case of multiple core processors, the first core always takes ownership | ||
666 | * of shared sysfs dir/files, and rest of the cores will be symlinked to it. | ||
667 | */ | ||
668 | |||
669 | static void deallocate_threshold_block(unsigned int cpu, | 651 | static void deallocate_threshold_block(unsigned int cpu, |
670 | unsigned int bank) | 652 | unsigned int bank) |
671 | { | 653 | { |
@@ -686,41 +668,42 @@ static void deallocate_threshold_block(unsigned int cpu, | |||
686 | per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; | 668 | per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; |
687 | } | 669 | } |
688 | 670 | ||
671 | static void __threshold_remove_blocks(struct threshold_bank *b) | ||
672 | { | ||
673 | struct threshold_block *pos = NULL; | ||
674 | struct threshold_block *tmp = NULL; | ||
675 | |||
676 | kobject_del(b->kobj); | ||
677 | |||
678 | list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj) | ||
679 | kobject_del(&pos->kobj); | ||
680 | } | ||
681 | |||
689 | static void threshold_remove_bank(unsigned int cpu, int bank) | 682 | static void threshold_remove_bank(unsigned int cpu, int bank) |
690 | { | 683 | { |
684 | struct amd_northbridge *nb; | ||
691 | struct threshold_bank *b; | 685 | struct threshold_bank *b; |
692 | struct device *dev; | ||
693 | char name[32]; | ||
694 | int i = 0; | ||
695 | 686 | ||
696 | b = per_cpu(threshold_banks, cpu)[bank]; | 687 | b = per_cpu(threshold_banks, cpu)[bank]; |
697 | if (!b) | 688 | if (!b) |
698 | return; | 689 | return; |
690 | |||
699 | if (!b->blocks) | 691 | if (!b->blocks) |
700 | goto free_out; | 692 | goto free_out; |
701 | 693 | ||
702 | sprintf(name, "threshold_bank%i", bank); | 694 | if (shared_bank[bank]) { |
703 | 695 | if (!atomic_dec_and_test(&b->cpus)) { | |
704 | #ifdef CONFIG_SMP | 696 | __threshold_remove_blocks(b); |
705 | /* sibling symlink */ | 697 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
706 | if (shared_bank[bank] && b->blocks->cpu != cpu) { | 698 | return; |
707 | dev = per_cpu(mce_device, cpu); | 699 | } else { |
708 | sysfs_remove_link(&dev->kobj, name); | 700 | /* |
709 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 701 | * the last CPU on this node using the shared bank is |
710 | 702 | * going away, remove that bank now. | |
711 | return; | 703 | */ |
712 | } | 704 | nb = node_to_amd_nb(amd_get_nb_id(cpu)); |
713 | #endif | 705 | nb->bank4 = NULL; |
714 | 706 | } | |
715 | /* remove all sibling symlinks before unregistering */ | ||
716 | for_each_cpu(i, b->cpus) { | ||
717 | if (i == cpu) | ||
718 | continue; | ||
719 | |||
720 | dev = per_cpu(mce_device, i); | ||
721 | if (dev) | ||
722 | sysfs_remove_link(&dev->kobj, name); | ||
723 | per_cpu(threshold_banks, i)[bank] = NULL; | ||
724 | } | 707 | } |
725 | 708 | ||
726 | deallocate_threshold_block(cpu, bank); | 709 | deallocate_threshold_block(cpu, bank); |
@@ -728,7 +711,6 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
728 | free_out: | 711 | free_out: |
729 | kobject_del(b->kobj); | 712 | kobject_del(b->kobj); |
730 | kobject_put(b->kobj); | 713 | kobject_put(b->kobj); |
731 | free_cpumask_var(b->cpus); | ||
732 | kfree(b); | 714 | kfree(b); |
733 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 715 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
734 | } | 716 | } |
@@ -777,4 +759,24 @@ static __init int threshold_init_device(void) | |||
777 | 759 | ||
778 | return 0; | 760 | return 0; |
779 | } | 761 | } |
780 | device_initcall(threshold_init_device); | 762 | /* |
763 | * there are 3 funcs which need to be _initcalled in a logic sequence: | ||
764 | * 1. xen_late_init_mcelog | ||
765 | * 2. mcheck_init_device | ||
766 | * 3. threshold_init_device | ||
767 | * | ||
768 | * xen_late_init_mcelog must register xen_mce_chrdev_device before | ||
769 | * native mce_chrdev_device registration if running under xen platform; | ||
770 | * | ||
771 | * mcheck_init_device should be inited before threshold_init_device to | ||
772 | * initialize mce_device, otherwise a NULL ptr dereference will cause panic. | ||
773 | * | ||
774 | * so we use following _initcalls | ||
775 | * 1. device_initcall(xen_late_init_mcelog); | ||
776 | * 2. device_initcall_sync(mcheck_init_device); | ||
777 | * 3. late_initcall(threshold_init_device); | ||
778 | * | ||
779 | * when running under xen, the initcall order is 1,2,3; | ||
780 | * on baremetal, we skip 1 and we do only 2 and 3. | ||
781 | */ | ||
782 | late_initcall(threshold_init_device); | ||
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index bdda2e6c673b..35ffda5d0727 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -258,11 +258,11 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, | |||
258 | 258 | ||
259 | /* Compute the maximum size with which we can make a range: */ | 259 | /* Compute the maximum size with which we can make a range: */ |
260 | if (range_startk) | 260 | if (range_startk) |
261 | max_align = ffs(range_startk) - 1; | 261 | max_align = __ffs(range_startk); |
262 | else | 262 | else |
263 | max_align = 32; | 263 | max_align = BITS_PER_LONG - 1; |
264 | 264 | ||
265 | align = fls(range_sizek) - 1; | 265 | align = __fls(range_sizek); |
266 | if (align > max_align) | 266 | if (align > max_align) |
267 | align = max_align; | 267 | align = max_align; |
268 | 268 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 75772ae6c65f..e9fe907cd249 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -361,11 +361,7 @@ static void __init print_mtrr_state(void) | |||
361 | } | 361 | } |
362 | pr_debug("MTRR variable ranges %sabled:\n", | 362 | pr_debug("MTRR variable ranges %sabled:\n", |
363 | mtrr_state.enabled & 2 ? "en" : "dis"); | 363 | mtrr_state.enabled & 2 ? "en" : "dis"); |
364 | if (size_or_mask & 0xffffffffUL) | 364 | high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4; |
365 | high_width = ffs(size_or_mask & 0xffffffffUL) - 1; | ||
366 | else | ||
367 | high_width = ffs(size_or_mask>>32) + 32 - 1; | ||
368 | high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4; | ||
369 | 365 | ||
370 | for (i = 0; i < num_var_ranges; ++i) { | 366 | for (i = 0; i < num_var_ranges; ++i) { |
371 | if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) | 367 | if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c4706cf9c011..29557aa06dda 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -35,17 +35,6 @@ | |||
35 | 35 | ||
36 | #include "perf_event.h" | 36 | #include "perf_event.h" |
37 | 37 | ||
38 | #if 0 | ||
39 | #undef wrmsrl | ||
40 | #define wrmsrl(msr, val) \ | ||
41 | do { \ | ||
42 | trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ | ||
43 | (unsigned long)(val)); \ | ||
44 | native_write_msr((msr), (u32)((u64)(val)), \ | ||
45 | (u32)((u64)(val) >> 32)); \ | ||
46 | } while (0) | ||
47 | #endif | ||
48 | |||
49 | struct x86_pmu x86_pmu __read_mostly; | 38 | struct x86_pmu x86_pmu __read_mostly; |
50 | 39 | ||
51 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | 40 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { |
@@ -74,7 +63,7 @@ u64 x86_perf_event_update(struct perf_event *event) | |||
74 | int idx = hwc->idx; | 63 | int idx = hwc->idx; |
75 | s64 delta; | 64 | s64 delta; |
76 | 65 | ||
77 | if (idx == X86_PMC_IDX_FIXED_BTS) | 66 | if (idx == INTEL_PMC_IDX_FIXED_BTS) |
78 | return 0; | 67 | return 0; |
79 | 68 | ||
80 | /* | 69 | /* |
@@ -86,7 +75,7 @@ u64 x86_perf_event_update(struct perf_event *event) | |||
86 | */ | 75 | */ |
87 | again: | 76 | again: |
88 | prev_raw_count = local64_read(&hwc->prev_count); | 77 | prev_raw_count = local64_read(&hwc->prev_count); |
89 | rdmsrl(hwc->event_base, new_raw_count); | 78 | rdpmcl(hwc->event_base_rdpmc, new_raw_count); |
90 | 79 | ||
91 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | 80 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
92 | new_raw_count) != prev_raw_count) | 81 | new_raw_count) != prev_raw_count) |
@@ -189,7 +178,7 @@ static void release_pmc_hardware(void) {} | |||
189 | 178 | ||
190 | static bool check_hw_exists(void) | 179 | static bool check_hw_exists(void) |
191 | { | 180 | { |
192 | u64 val, val_new = 0; | 181 | u64 val, val_new = ~0; |
193 | int i, reg, ret = 0; | 182 | int i, reg, ret = 0; |
194 | 183 | ||
195 | /* | 184 | /* |
@@ -222,8 +211,9 @@ static bool check_hw_exists(void) | |||
222 | * that don't trap on the MSR access and always return 0s. | 211 | * that don't trap on the MSR access and always return 0s. |
223 | */ | 212 | */ |
224 | val = 0xabcdUL; | 213 | val = 0xabcdUL; |
225 | ret = checking_wrmsrl(x86_pmu_event_addr(0), val); | 214 | reg = x86_pmu_event_addr(0); |
226 | ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new); | 215 | ret = wrmsrl_safe(reg, val); |
216 | ret |= rdmsrl_safe(reg, &val_new); | ||
227 | if (ret || val != val_new) | 217 | if (ret || val != val_new) |
228 | goto msr_fail; | 218 | goto msr_fail; |
229 | 219 | ||
@@ -240,6 +230,7 @@ bios_fail: | |||
240 | 230 | ||
241 | msr_fail: | 231 | msr_fail: |
242 | printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); | 232 | printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); |
233 | printk(KERN_ERR "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new); | ||
243 | 234 | ||
244 | return false; | 235 | return false; |
245 | } | 236 | } |
@@ -388,7 +379,7 @@ int x86_pmu_hw_config(struct perf_event *event) | |||
388 | int precise = 0; | 379 | int precise = 0; |
389 | 380 | ||
390 | /* Support for constant skid */ | 381 | /* Support for constant skid */ |
391 | if (x86_pmu.pebs_active) { | 382 | if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { |
392 | precise++; | 383 | precise++; |
393 | 384 | ||
394 | /* Support for IP fixup */ | 385 | /* Support for IP fixup */ |
@@ -637,8 +628,8 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) | |||
637 | c = sched->constraints[sched->state.event]; | 628 | c = sched->constraints[sched->state.event]; |
638 | 629 | ||
639 | /* Prefer fixed purpose counters */ | 630 | /* Prefer fixed purpose counters */ |
640 | if (x86_pmu.num_counters_fixed) { | 631 | if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { |
641 | idx = X86_PMC_IDX_FIXED; | 632 | idx = INTEL_PMC_IDX_FIXED; |
642 | for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { | 633 | for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { |
643 | if (!__test_and_set_bit(idx, sched->state.used)) | 634 | if (!__test_and_set_bit(idx, sched->state.used)) |
644 | goto done; | 635 | goto done; |
@@ -646,7 +637,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) | |||
646 | } | 637 | } |
647 | /* Grab the first unused counter starting with idx */ | 638 | /* Grab the first unused counter starting with idx */ |
648 | idx = sched->state.counter; | 639 | idx = sched->state.counter; |
649 | for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) { | 640 | for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) { |
650 | if (!__test_and_set_bit(idx, sched->state.used)) | 641 | if (!__test_and_set_bit(idx, sched->state.used)) |
651 | goto done; | 642 | goto done; |
652 | } | 643 | } |
@@ -704,8 +695,8 @@ static bool perf_sched_next_event(struct perf_sched *sched) | |||
704 | /* | 695 | /* |
705 | * Assign a counter for each event. | 696 | * Assign a counter for each event. |
706 | */ | 697 | */ |
707 | static int perf_assign_events(struct event_constraint **constraints, int n, | 698 | int perf_assign_events(struct event_constraint **constraints, int n, |
708 | int wmin, int wmax, int *assign) | 699 | int wmin, int wmax, int *assign) |
709 | { | 700 | { |
710 | struct perf_sched sched; | 701 | struct perf_sched sched; |
711 | 702 | ||
@@ -824,15 +815,17 @@ static inline void x86_assign_hw_event(struct perf_event *event, | |||
824 | hwc->last_cpu = smp_processor_id(); | 815 | hwc->last_cpu = smp_processor_id(); |
825 | hwc->last_tag = ++cpuc->tags[i]; | 816 | hwc->last_tag = ++cpuc->tags[i]; |
826 | 817 | ||
827 | if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { | 818 | if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) { |
828 | hwc->config_base = 0; | 819 | hwc->config_base = 0; |
829 | hwc->event_base = 0; | 820 | hwc->event_base = 0; |
830 | } else if (hwc->idx >= X86_PMC_IDX_FIXED) { | 821 | } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) { |
831 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | 822 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; |
832 | hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED); | 823 | hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED); |
824 | hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30; | ||
833 | } else { | 825 | } else { |
834 | hwc->config_base = x86_pmu_config_addr(hwc->idx); | 826 | hwc->config_base = x86_pmu_config_addr(hwc->idx); |
835 | hwc->event_base = x86_pmu_event_addr(hwc->idx); | 827 | hwc->event_base = x86_pmu_event_addr(hwc->idx); |
828 | hwc->event_base_rdpmc = hwc->idx; | ||
836 | } | 829 | } |
837 | } | 830 | } |
838 | 831 | ||
@@ -930,7 +923,7 @@ int x86_perf_event_set_period(struct perf_event *event) | |||
930 | s64 period = hwc->sample_period; | 923 | s64 period = hwc->sample_period; |
931 | int ret = 0, idx = hwc->idx; | 924 | int ret = 0, idx = hwc->idx; |
932 | 925 | ||
933 | if (idx == X86_PMC_IDX_FIXED_BTS) | 926 | if (idx == INTEL_PMC_IDX_FIXED_BTS) |
934 | return 0; | 927 | return 0; |
935 | 928 | ||
936 | /* | 929 | /* |
@@ -1316,7 +1309,6 @@ static struct attribute_group x86_pmu_format_group = { | |||
1316 | static int __init init_hw_perf_events(void) | 1309 | static int __init init_hw_perf_events(void) |
1317 | { | 1310 | { |
1318 | struct x86_pmu_quirk *quirk; | 1311 | struct x86_pmu_quirk *quirk; |
1319 | struct event_constraint *c; | ||
1320 | int err; | 1312 | int err; |
1321 | 1313 | ||
1322 | pr_info("Performance Events: "); | 1314 | pr_info("Performance Events: "); |
@@ -1347,21 +1339,8 @@ static int __init init_hw_perf_events(void) | |||
1347 | for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) | 1339 | for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) |
1348 | quirk->func(); | 1340 | quirk->func(); |
1349 | 1341 | ||
1350 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | 1342 | if (!x86_pmu.intel_ctrl) |
1351 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | 1343 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; |
1352 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); | ||
1353 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; | ||
1354 | } | ||
1355 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; | ||
1356 | |||
1357 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { | ||
1358 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", | ||
1359 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); | ||
1360 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; | ||
1361 | } | ||
1362 | |||
1363 | x86_pmu.intel_ctrl |= | ||
1364 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; | ||
1365 | 1344 | ||
1366 | perf_events_lapic_init(); | 1345 | perf_events_lapic_init(); |
1367 | register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); | 1346 | register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); |
@@ -1370,22 +1349,6 @@ static int __init init_hw_perf_events(void) | |||
1370 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, | 1349 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
1371 | 0, x86_pmu.num_counters, 0); | 1350 | 0, x86_pmu.num_counters, 0); |
1372 | 1351 | ||
1373 | if (x86_pmu.event_constraints) { | ||
1374 | /* | ||
1375 | * event on fixed counter2 (REF_CYCLES) only works on this | ||
1376 | * counter, so do not extend mask to generic counters | ||
1377 | */ | ||
1378 | for_each_event_constraint(c, x86_pmu.event_constraints) { | ||
1379 | if (c->cmask != X86_RAW_EVENT_MASK | ||
1380 | || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) { | ||
1381 | continue; | ||
1382 | } | ||
1383 | |||
1384 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; | ||
1385 | c->weight += x86_pmu.num_counters; | ||
1386 | } | ||
1387 | } | ||
1388 | |||
1389 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ | 1352 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ |
1390 | x86_pmu_format_group.attrs = x86_pmu.format_attrs; | 1353 | x86_pmu_format_group.attrs = x86_pmu.format_attrs; |
1391 | 1354 | ||
@@ -1620,8 +1583,8 @@ static int x86_pmu_event_idx(struct perf_event *event) | |||
1620 | if (!x86_pmu.attr_rdpmc) | 1583 | if (!x86_pmu.attr_rdpmc) |
1621 | return 0; | 1584 | return 0; |
1622 | 1585 | ||
1623 | if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { | 1586 | if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) { |
1624 | idx -= X86_PMC_IDX_FIXED; | 1587 | idx -= INTEL_PMC_IDX_FIXED; |
1625 | idx |= 1 << 30; | 1588 | idx |= 1 << 30; |
1626 | } | 1589 | } |
1627 | 1590 | ||
@@ -1649,7 +1612,12 @@ static ssize_t set_attr_rdpmc(struct device *cdev, | |||
1649 | struct device_attribute *attr, | 1612 | struct device_attribute *attr, |
1650 | const char *buf, size_t count) | 1613 | const char *buf, size_t count) |
1651 | { | 1614 | { |
1652 | unsigned long val = simple_strtoul(buf, NULL, 0); | 1615 | unsigned long val; |
1616 | ssize_t ret; | ||
1617 | |||
1618 | ret = kstrtoul(buf, 0, &val); | ||
1619 | if (ret) | ||
1620 | return ret; | ||
1653 | 1621 | ||
1654 | if (!!val != !!x86_pmu.attr_rdpmc) { | 1622 | if (!!val != !!x86_pmu.attr_rdpmc) { |
1655 | x86_pmu.attr_rdpmc = !!val; | 1623 | x86_pmu.attr_rdpmc = !!val; |
@@ -1682,13 +1650,20 @@ static void x86_pmu_flush_branch_stack(void) | |||
1682 | x86_pmu.flush_branch_stack(); | 1650 | x86_pmu.flush_branch_stack(); |
1683 | } | 1651 | } |
1684 | 1652 | ||
1653 | void perf_check_microcode(void) | ||
1654 | { | ||
1655 | if (x86_pmu.check_microcode) | ||
1656 | x86_pmu.check_microcode(); | ||
1657 | } | ||
1658 | EXPORT_SYMBOL_GPL(perf_check_microcode); | ||
1659 | |||
1685 | static struct pmu pmu = { | 1660 | static struct pmu pmu = { |
1686 | .pmu_enable = x86_pmu_enable, | 1661 | .pmu_enable = x86_pmu_enable, |
1687 | .pmu_disable = x86_pmu_disable, | 1662 | .pmu_disable = x86_pmu_disable, |
1688 | 1663 | ||
1689 | .attr_groups = x86_pmu_attr_groups, | 1664 | .attr_groups = x86_pmu_attr_groups, |
1690 | 1665 | ||
1691 | .event_init = x86_pmu_event_init, | 1666 | .event_init = x86_pmu_event_init, |
1692 | 1667 | ||
1693 | .add = x86_pmu_add, | 1668 | .add = x86_pmu_add, |
1694 | .del = x86_pmu_del, | 1669 | .del = x86_pmu_del, |
@@ -1696,11 +1671,11 @@ static struct pmu pmu = { | |||
1696 | .stop = x86_pmu_stop, | 1671 | .stop = x86_pmu_stop, |
1697 | .read = x86_pmu_read, | 1672 | .read = x86_pmu_read, |
1698 | 1673 | ||
1699 | .start_txn = x86_pmu_start_txn, | 1674 | .start_txn = x86_pmu_start_txn, |
1700 | .cancel_txn = x86_pmu_cancel_txn, | 1675 | .cancel_txn = x86_pmu_cancel_txn, |
1701 | .commit_txn = x86_pmu_commit_txn, | 1676 | .commit_txn = x86_pmu_commit_txn, |
1702 | 1677 | ||
1703 | .event_idx = x86_pmu_event_idx, | 1678 | .event_idx = x86_pmu_event_idx, |
1704 | .flush_branch_stack = x86_pmu_flush_branch_stack, | 1679 | .flush_branch_stack = x86_pmu_flush_branch_stack, |
1705 | }; | 1680 | }; |
1706 | 1681 | ||
@@ -1863,7 +1838,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs) | |||
1863 | else | 1838 | else |
1864 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; | 1839 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; |
1865 | } else { | 1840 | } else { |
1866 | if (user_mode(regs)) | 1841 | if (!kernel_ip(regs->ip)) |
1867 | misc |= PERF_RECORD_MISC_USER; | 1842 | misc |= PERF_RECORD_MISC_USER; |
1868 | else | 1843 | else |
1869 | misc |= PERF_RECORD_MISC_KERNEL; | 1844 | misc |= PERF_RECORD_MISC_KERNEL; |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 7241e2fc3c17..821d53b696d1 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -14,6 +14,18 @@ | |||
14 | 14 | ||
15 | #include <linux/perf_event.h> | 15 | #include <linux/perf_event.h> |
16 | 16 | ||
17 | #if 0 | ||
18 | #undef wrmsrl | ||
19 | #define wrmsrl(msr, val) \ | ||
20 | do { \ | ||
21 | unsigned int _msr = (msr); \ | ||
22 | u64 _val = (val); \ | ||
23 | trace_printk("wrmsrl(%x, %Lx)\n", (unsigned int)(_msr), \ | ||
24 | (unsigned long long)(_val)); \ | ||
25 | native_write_msr((_msr), (u32)(_val), (u32)(_val >> 32)); \ | ||
26 | } while (0) | ||
27 | #endif | ||
28 | |||
17 | /* | 29 | /* |
18 | * | NHM/WSM | SNB | | 30 | * | NHM/WSM | SNB | |
19 | * register ------------------------------- | 31 | * register ------------------------------- |
@@ -57,7 +69,7 @@ struct amd_nb { | |||
57 | }; | 69 | }; |
58 | 70 | ||
59 | /* The maximal number of PEBS events: */ | 71 | /* The maximal number of PEBS events: */ |
60 | #define MAX_PEBS_EVENTS 4 | 72 | #define MAX_PEBS_EVENTS 8 |
61 | 73 | ||
62 | /* | 74 | /* |
63 | * A debug store configuration. | 75 | * A debug store configuration. |
@@ -349,6 +361,8 @@ struct x86_pmu { | |||
349 | void (*cpu_starting)(int cpu); | 361 | void (*cpu_starting)(int cpu); |
350 | void (*cpu_dying)(int cpu); | 362 | void (*cpu_dying)(int cpu); |
351 | void (*cpu_dead)(int cpu); | 363 | void (*cpu_dead)(int cpu); |
364 | |||
365 | void (*check_microcode)(void); | ||
352 | void (*flush_branch_stack)(void); | 366 | void (*flush_branch_stack)(void); |
353 | 367 | ||
354 | /* | 368 | /* |
@@ -360,12 +374,16 @@ struct x86_pmu { | |||
360 | /* | 374 | /* |
361 | * Intel DebugStore bits | 375 | * Intel DebugStore bits |
362 | */ | 376 | */ |
363 | int bts, pebs; | 377 | unsigned int bts :1, |
364 | int bts_active, pebs_active; | 378 | bts_active :1, |
379 | pebs :1, | ||
380 | pebs_active :1, | ||
381 | pebs_broken :1; | ||
365 | int pebs_record_size; | 382 | int pebs_record_size; |
366 | void (*drain_pebs)(struct pt_regs *regs); | 383 | void (*drain_pebs)(struct pt_regs *regs); |
367 | struct event_constraint *pebs_constraints; | 384 | struct event_constraint *pebs_constraints; |
368 | void (*pebs_aliases)(struct perf_event *event); | 385 | void (*pebs_aliases)(struct perf_event *event); |
386 | int max_pebs_events; | ||
369 | 387 | ||
370 | /* | 388 | /* |
371 | * Intel LBR | 389 | * Intel LBR |
@@ -468,6 +486,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | |||
468 | 486 | ||
469 | void x86_pmu_enable_all(int added); | 487 | void x86_pmu_enable_all(int added); |
470 | 488 | ||
489 | int perf_assign_events(struct event_constraint **constraints, int n, | ||
490 | int wmin, int wmax, int *assign); | ||
471 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); | 491 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); |
472 | 492 | ||
473 | void x86_pmu_stop(struct perf_event *event, int flags); | 493 | void x86_pmu_stop(struct perf_event *event, int flags); |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 11a4eb9131d5..4528ae7b6ec4 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -366,7 +366,7 @@ static void amd_pmu_cpu_starting(int cpu) | |||
366 | 366 | ||
367 | cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; | 367 | cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; |
368 | 368 | ||
369 | if (boot_cpu_data.x86_max_cores < 2 || boot_cpu_data.x86 == 0x15) | 369 | if (boot_cpu_data.x86_max_cores < 2) |
370 | return; | 370 | return; |
371 | 371 | ||
372 | nb_id = amd_get_nb_id(cpu); | 372 | nb_id = amd_get_nb_id(cpu); |
@@ -422,35 +422,6 @@ static struct attribute *amd_format_attr[] = { | |||
422 | NULL, | 422 | NULL, |
423 | }; | 423 | }; |
424 | 424 | ||
425 | static __initconst const struct x86_pmu amd_pmu = { | ||
426 | .name = "AMD", | ||
427 | .handle_irq = x86_pmu_handle_irq, | ||
428 | .disable_all = x86_pmu_disable_all, | ||
429 | .enable_all = x86_pmu_enable_all, | ||
430 | .enable = x86_pmu_enable_event, | ||
431 | .disable = x86_pmu_disable_event, | ||
432 | .hw_config = amd_pmu_hw_config, | ||
433 | .schedule_events = x86_schedule_events, | ||
434 | .eventsel = MSR_K7_EVNTSEL0, | ||
435 | .perfctr = MSR_K7_PERFCTR0, | ||
436 | .event_map = amd_pmu_event_map, | ||
437 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
438 | .num_counters = AMD64_NUM_COUNTERS, | ||
439 | .cntval_bits = 48, | ||
440 | .cntval_mask = (1ULL << 48) - 1, | ||
441 | .apic = 1, | ||
442 | /* use highest bit to detect overflow */ | ||
443 | .max_period = (1ULL << 47) - 1, | ||
444 | .get_event_constraints = amd_get_event_constraints, | ||
445 | .put_event_constraints = amd_put_event_constraints, | ||
446 | |||
447 | .format_attrs = amd_format_attr, | ||
448 | |||
449 | .cpu_prepare = amd_pmu_cpu_prepare, | ||
450 | .cpu_starting = amd_pmu_cpu_starting, | ||
451 | .cpu_dead = amd_pmu_cpu_dead, | ||
452 | }; | ||
453 | |||
454 | /* AMD Family 15h */ | 425 | /* AMD Family 15h */ |
455 | 426 | ||
456 | #define AMD_EVENT_TYPE_MASK 0x000000F0ULL | 427 | #define AMD_EVENT_TYPE_MASK 0x000000F0ULL |
@@ -597,8 +568,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev | |||
597 | } | 568 | } |
598 | } | 569 | } |
599 | 570 | ||
600 | static __initconst const struct x86_pmu amd_pmu_f15h = { | 571 | static __initconst const struct x86_pmu amd_pmu = { |
601 | .name = "AMD Family 15h", | 572 | .name = "AMD", |
602 | .handle_irq = x86_pmu_handle_irq, | 573 | .handle_irq = x86_pmu_handle_irq, |
603 | .disable_all = x86_pmu_disable_all, | 574 | .disable_all = x86_pmu_disable_all, |
604 | .enable_all = x86_pmu_enable_all, | 575 | .enable_all = x86_pmu_enable_all, |
@@ -606,50 +577,68 @@ static __initconst const struct x86_pmu amd_pmu_f15h = { | |||
606 | .disable = x86_pmu_disable_event, | 577 | .disable = x86_pmu_disable_event, |
607 | .hw_config = amd_pmu_hw_config, | 578 | .hw_config = amd_pmu_hw_config, |
608 | .schedule_events = x86_schedule_events, | 579 | .schedule_events = x86_schedule_events, |
609 | .eventsel = MSR_F15H_PERF_CTL, | 580 | .eventsel = MSR_K7_EVNTSEL0, |
610 | .perfctr = MSR_F15H_PERF_CTR, | 581 | .perfctr = MSR_K7_PERFCTR0, |
611 | .event_map = amd_pmu_event_map, | 582 | .event_map = amd_pmu_event_map, |
612 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | 583 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), |
613 | .num_counters = AMD64_NUM_COUNTERS_F15H, | 584 | .num_counters = AMD64_NUM_COUNTERS, |
614 | .cntval_bits = 48, | 585 | .cntval_bits = 48, |
615 | .cntval_mask = (1ULL << 48) - 1, | 586 | .cntval_mask = (1ULL << 48) - 1, |
616 | .apic = 1, | 587 | .apic = 1, |
617 | /* use highest bit to detect overflow */ | 588 | /* use highest bit to detect overflow */ |
618 | .max_period = (1ULL << 47) - 1, | 589 | .max_period = (1ULL << 47) - 1, |
619 | .get_event_constraints = amd_get_event_constraints_f15h, | 590 | .get_event_constraints = amd_get_event_constraints, |
620 | /* nortbridge counters not yet implemented: */ | ||
621 | #if 0 | ||
622 | .put_event_constraints = amd_put_event_constraints, | 591 | .put_event_constraints = amd_put_event_constraints, |
623 | 592 | ||
593 | .format_attrs = amd_format_attr, | ||
594 | |||
624 | .cpu_prepare = amd_pmu_cpu_prepare, | 595 | .cpu_prepare = amd_pmu_cpu_prepare, |
625 | .cpu_dead = amd_pmu_cpu_dead, | ||
626 | #endif | ||
627 | .cpu_starting = amd_pmu_cpu_starting, | 596 | .cpu_starting = amd_pmu_cpu_starting, |
628 | .format_attrs = amd_format_attr, | 597 | .cpu_dead = amd_pmu_cpu_dead, |
629 | }; | 598 | }; |
630 | 599 | ||
600 | static int setup_event_constraints(void) | ||
601 | { | ||
602 | if (boot_cpu_data.x86 >= 0x15) | ||
603 | x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; | ||
604 | return 0; | ||
605 | } | ||
606 | |||
607 | static int setup_perfctr_core(void) | ||
608 | { | ||
609 | if (!cpu_has_perfctr_core) { | ||
610 | WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h, | ||
611 | KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!"); | ||
612 | return -ENODEV; | ||
613 | } | ||
614 | |||
615 | WARN(x86_pmu.get_event_constraints == amd_get_event_constraints, | ||
616 | KERN_ERR "hw perf events core counters need constraints handler!"); | ||
617 | |||
618 | /* | ||
619 | * If core performance counter extensions exists, we must use | ||
620 | * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also | ||
621 | * x86_pmu_addr_offset(). | ||
622 | */ | ||
623 | x86_pmu.eventsel = MSR_F15H_PERF_CTL; | ||
624 | x86_pmu.perfctr = MSR_F15H_PERF_CTR; | ||
625 | x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; | ||
626 | |||
627 | printk(KERN_INFO "perf: AMD core performance counters detected\n"); | ||
628 | |||
629 | return 0; | ||
630 | } | ||
631 | |||
631 | __init int amd_pmu_init(void) | 632 | __init int amd_pmu_init(void) |
632 | { | 633 | { |
633 | /* Performance-monitoring supported from K7 and later: */ | 634 | /* Performance-monitoring supported from K7 and later: */ |
634 | if (boot_cpu_data.x86 < 6) | 635 | if (boot_cpu_data.x86 < 6) |
635 | return -ENODEV; | 636 | return -ENODEV; |
636 | 637 | ||
637 | /* | 638 | x86_pmu = amd_pmu; |
638 | * If core performance counter extensions exists, it must be | 639 | |
639 | * family 15h, otherwise fail. See x86_pmu_addr_offset(). | 640 | setup_event_constraints(); |
640 | */ | 641 | setup_perfctr_core(); |
641 | switch (boot_cpu_data.x86) { | ||
642 | case 0x15: | ||
643 | if (!cpu_has_perfctr_core) | ||
644 | return -ENODEV; | ||
645 | x86_pmu = amd_pmu_f15h; | ||
646 | break; | ||
647 | default: | ||
648 | if (cpu_has_perfctr_core) | ||
649 | return -ENODEV; | ||
650 | x86_pmu = amd_pmu; | ||
651 | break; | ||
652 | } | ||
653 | 642 | ||
654 | /* Events are common for all AMDs */ | 643 | /* Events are common for all AMDs */ |
655 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | 644 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 187c294bc658..382366977d4c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -5,6 +5,8 @@ | |||
5 | * among events on a single PMU. | 5 | * among events on a single PMU. |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
9 | |||
8 | #include <linux/stddef.h> | 10 | #include <linux/stddef.h> |
9 | #include <linux/types.h> | 11 | #include <linux/types.h> |
10 | #include <linux/init.h> | 12 | #include <linux/init.h> |
@@ -21,14 +23,14 @@ | |||
21 | */ | 23 | */ |
22 | static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = | 24 | static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = |
23 | { | 25 | { |
24 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | 26 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, |
25 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | 27 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
26 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, | 28 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, |
27 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, | 29 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, |
28 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | 30 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, |
29 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | 31 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, |
30 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | 32 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, |
31 | [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ | 33 | [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ |
32 | }; | 34 | }; |
33 | 35 | ||
34 | static struct event_constraint intel_core_event_constraints[] __read_mostly = | 36 | static struct event_constraint intel_core_event_constraints[] __read_mostly = |
@@ -136,6 +138,84 @@ static u64 intel_pmu_event_map(int hw_event) | |||
136 | return intel_perfmon_event_map[hw_event]; | 138 | return intel_perfmon_event_map[hw_event]; |
137 | } | 139 | } |
138 | 140 | ||
141 | #define SNB_DMND_DATA_RD (1ULL << 0) | ||
142 | #define SNB_DMND_RFO (1ULL << 1) | ||
143 | #define SNB_DMND_IFETCH (1ULL << 2) | ||
144 | #define SNB_DMND_WB (1ULL << 3) | ||
145 | #define SNB_PF_DATA_RD (1ULL << 4) | ||
146 | #define SNB_PF_RFO (1ULL << 5) | ||
147 | #define SNB_PF_IFETCH (1ULL << 6) | ||
148 | #define SNB_LLC_DATA_RD (1ULL << 7) | ||
149 | #define SNB_LLC_RFO (1ULL << 8) | ||
150 | #define SNB_LLC_IFETCH (1ULL << 9) | ||
151 | #define SNB_BUS_LOCKS (1ULL << 10) | ||
152 | #define SNB_STRM_ST (1ULL << 11) | ||
153 | #define SNB_OTHER (1ULL << 15) | ||
154 | #define SNB_RESP_ANY (1ULL << 16) | ||
155 | #define SNB_NO_SUPP (1ULL << 17) | ||
156 | #define SNB_LLC_HITM (1ULL << 18) | ||
157 | #define SNB_LLC_HITE (1ULL << 19) | ||
158 | #define SNB_LLC_HITS (1ULL << 20) | ||
159 | #define SNB_LLC_HITF (1ULL << 21) | ||
160 | #define SNB_LOCAL (1ULL << 22) | ||
161 | #define SNB_REMOTE (0xffULL << 23) | ||
162 | #define SNB_SNP_NONE (1ULL << 31) | ||
163 | #define SNB_SNP_NOT_NEEDED (1ULL << 32) | ||
164 | #define SNB_SNP_MISS (1ULL << 33) | ||
165 | #define SNB_NO_FWD (1ULL << 34) | ||
166 | #define SNB_SNP_FWD (1ULL << 35) | ||
167 | #define SNB_HITM (1ULL << 36) | ||
168 | #define SNB_NON_DRAM (1ULL << 37) | ||
169 | |||
170 | #define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD) | ||
171 | #define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO) | ||
172 | #define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) | ||
173 | |||
174 | #define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \ | ||
175 | SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \ | ||
176 | SNB_HITM) | ||
177 | |||
178 | #define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY) | ||
179 | #define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY) | ||
180 | |||
181 | #define SNB_L3_ACCESS SNB_RESP_ANY | ||
182 | #define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM) | ||
183 | |||
184 | static __initconst const u64 snb_hw_cache_extra_regs | ||
185 | [PERF_COUNT_HW_CACHE_MAX] | ||
186 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
187 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
188 | { | ||
189 | [ C(LL ) ] = { | ||
190 | [ C(OP_READ) ] = { | ||
191 | [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS, | ||
192 | [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS, | ||
193 | }, | ||
194 | [ C(OP_WRITE) ] = { | ||
195 | [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS, | ||
196 | [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS, | ||
197 | }, | ||
198 | [ C(OP_PREFETCH) ] = { | ||
199 | [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS, | ||
200 | [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS, | ||
201 | }, | ||
202 | }, | ||
203 | [ C(NODE) ] = { | ||
204 | [ C(OP_READ) ] = { | ||
205 | [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY, | ||
206 | [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE, | ||
207 | }, | ||
208 | [ C(OP_WRITE) ] = { | ||
209 | [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY, | ||
210 | [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE, | ||
211 | }, | ||
212 | [ C(OP_PREFETCH) ] = { | ||
213 | [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY, | ||
214 | [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE, | ||
215 | }, | ||
216 | }, | ||
217 | }; | ||
218 | |||
139 | static __initconst const u64 snb_hw_cache_event_ids | 219 | static __initconst const u64 snb_hw_cache_event_ids |
140 | [PERF_COUNT_HW_CACHE_MAX] | 220 | [PERF_COUNT_HW_CACHE_MAX] |
141 | [PERF_COUNT_HW_CACHE_OP_MAX] | 221 | [PERF_COUNT_HW_CACHE_OP_MAX] |
@@ -233,16 +313,16 @@ static __initconst const u64 snb_hw_cache_event_ids | |||
233 | }, | 313 | }, |
234 | [ C(NODE) ] = { | 314 | [ C(NODE) ] = { |
235 | [ C(OP_READ) ] = { | 315 | [ C(OP_READ) ] = { |
236 | [ C(RESULT_ACCESS) ] = -1, | 316 | [ C(RESULT_ACCESS) ] = 0x01b7, |
237 | [ C(RESULT_MISS) ] = -1, | 317 | [ C(RESULT_MISS) ] = 0x01b7, |
238 | }, | 318 | }, |
239 | [ C(OP_WRITE) ] = { | 319 | [ C(OP_WRITE) ] = { |
240 | [ C(RESULT_ACCESS) ] = -1, | 320 | [ C(RESULT_ACCESS) ] = 0x01b7, |
241 | [ C(RESULT_MISS) ] = -1, | 321 | [ C(RESULT_MISS) ] = 0x01b7, |
242 | }, | 322 | }, |
243 | [ C(OP_PREFETCH) ] = { | 323 | [ C(OP_PREFETCH) ] = { |
244 | [ C(RESULT_ACCESS) ] = -1, | 324 | [ C(RESULT_ACCESS) ] = 0x01b7, |
245 | [ C(RESULT_MISS) ] = -1, | 325 | [ C(RESULT_MISS) ] = 0x01b7, |
246 | }, | 326 | }, |
247 | }, | 327 | }, |
248 | 328 | ||
@@ -747,7 +827,7 @@ static void intel_pmu_disable_all(void) | |||
747 | 827 | ||
748 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | 828 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); |
749 | 829 | ||
750 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | 830 | if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) |
751 | intel_pmu_disable_bts(); | 831 | intel_pmu_disable_bts(); |
752 | 832 | ||
753 | intel_pmu_pebs_disable_all(); | 833 | intel_pmu_pebs_disable_all(); |
@@ -763,9 +843,9 @@ static void intel_pmu_enable_all(int added) | |||
763 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, | 843 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, |
764 | x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); | 844 | x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); |
765 | 845 | ||
766 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | 846 | if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { |
767 | struct perf_event *event = | 847 | struct perf_event *event = |
768 | cpuc->events[X86_PMC_IDX_FIXED_BTS]; | 848 | cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; |
769 | 849 | ||
770 | if (WARN_ON_ONCE(!event)) | 850 | if (WARN_ON_ONCE(!event)) |
771 | return; | 851 | return; |
@@ -871,7 +951,7 @@ static inline void intel_pmu_ack_status(u64 ack) | |||
871 | 951 | ||
872 | static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) | 952 | static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) |
873 | { | 953 | { |
874 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 954 | int idx = hwc->idx - INTEL_PMC_IDX_FIXED; |
875 | u64 ctrl_val, mask; | 955 | u64 ctrl_val, mask; |
876 | 956 | ||
877 | mask = 0xfULL << (idx * 4); | 957 | mask = 0xfULL << (idx * 4); |
@@ -886,7 +966,7 @@ static void intel_pmu_disable_event(struct perf_event *event) | |||
886 | struct hw_perf_event *hwc = &event->hw; | 966 | struct hw_perf_event *hwc = &event->hw; |
887 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 967 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
888 | 968 | ||
889 | if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { | 969 | if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { |
890 | intel_pmu_disable_bts(); | 970 | intel_pmu_disable_bts(); |
891 | intel_pmu_drain_bts_buffer(); | 971 | intel_pmu_drain_bts_buffer(); |
892 | return; | 972 | return; |
@@ -915,7 +995,7 @@ static void intel_pmu_disable_event(struct perf_event *event) | |||
915 | 995 | ||
916 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) | 996 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) |
917 | { | 997 | { |
918 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 998 | int idx = hwc->idx - INTEL_PMC_IDX_FIXED; |
919 | u64 ctrl_val, bits, mask; | 999 | u64 ctrl_val, bits, mask; |
920 | 1000 | ||
921 | /* | 1001 | /* |
@@ -949,7 +1029,7 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
949 | struct hw_perf_event *hwc = &event->hw; | 1029 | struct hw_perf_event *hwc = &event->hw; |
950 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1030 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
951 | 1031 | ||
952 | if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { | 1032 | if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { |
953 | if (!__this_cpu_read(cpu_hw_events.enabled)) | 1033 | if (!__this_cpu_read(cpu_hw_events.enabled)) |
954 | return; | 1034 | return; |
955 | 1035 | ||
@@ -1000,14 +1080,14 @@ static void intel_pmu_reset(void) | |||
1000 | 1080 | ||
1001 | local_irq_save(flags); | 1081 | local_irq_save(flags); |
1002 | 1082 | ||
1003 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | 1083 | pr_info("clearing PMU state on CPU#%d\n", smp_processor_id()); |
1004 | 1084 | ||
1005 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1085 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1006 | checking_wrmsrl(x86_pmu_config_addr(idx), 0ull); | 1086 | wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); |
1007 | checking_wrmsrl(x86_pmu_event_addr(idx), 0ull); | 1087 | wrmsrl_safe(x86_pmu_event_addr(idx), 0ull); |
1008 | } | 1088 | } |
1009 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) | 1089 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) |
1010 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | 1090 | wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
1011 | 1091 | ||
1012 | if (ds) | 1092 | if (ds) |
1013 | ds->bts_index = ds->bts_buffer_base; | 1093 | ds->bts_index = ds->bts_buffer_base; |
@@ -1707,16 +1787,61 @@ static __init void intel_clovertown_quirk(void) | |||
1707 | * But taken together it might just make sense to not enable PEBS on | 1787 | * But taken together it might just make sense to not enable PEBS on |
1708 | * these chips. | 1788 | * these chips. |
1709 | */ | 1789 | */ |
1710 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | 1790 | pr_warn("PEBS disabled due to CPU errata\n"); |
1711 | x86_pmu.pebs = 0; | 1791 | x86_pmu.pebs = 0; |
1712 | x86_pmu.pebs_constraints = NULL; | 1792 | x86_pmu.pebs_constraints = NULL; |
1713 | } | 1793 | } |
1714 | 1794 | ||
1795 | static int intel_snb_pebs_broken(int cpu) | ||
1796 | { | ||
1797 | u32 rev = UINT_MAX; /* default to broken for unknown models */ | ||
1798 | |||
1799 | switch (cpu_data(cpu).x86_model) { | ||
1800 | case 42: /* SNB */ | ||
1801 | rev = 0x28; | ||
1802 | break; | ||
1803 | |||
1804 | case 45: /* SNB-EP */ | ||
1805 | switch (cpu_data(cpu).x86_mask) { | ||
1806 | case 6: rev = 0x618; break; | ||
1807 | case 7: rev = 0x70c; break; | ||
1808 | } | ||
1809 | } | ||
1810 | |||
1811 | return (cpu_data(cpu).microcode < rev); | ||
1812 | } | ||
1813 | |||
1814 | static void intel_snb_check_microcode(void) | ||
1815 | { | ||
1816 | int pebs_broken = 0; | ||
1817 | int cpu; | ||
1818 | |||
1819 | get_online_cpus(); | ||
1820 | for_each_online_cpu(cpu) { | ||
1821 | if ((pebs_broken = intel_snb_pebs_broken(cpu))) | ||
1822 | break; | ||
1823 | } | ||
1824 | put_online_cpus(); | ||
1825 | |||
1826 | if (pebs_broken == x86_pmu.pebs_broken) | ||
1827 | return; | ||
1828 | |||
1829 | /* | ||
1830 | * Serialized by the microcode lock.. | ||
1831 | */ | ||
1832 | if (x86_pmu.pebs_broken) { | ||
1833 | pr_info("PEBS enabled due to microcode update\n"); | ||
1834 | x86_pmu.pebs_broken = 0; | ||
1835 | } else { | ||
1836 | pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n"); | ||
1837 | x86_pmu.pebs_broken = 1; | ||
1838 | } | ||
1839 | } | ||
1840 | |||
1715 | static __init void intel_sandybridge_quirk(void) | 1841 | static __init void intel_sandybridge_quirk(void) |
1716 | { | 1842 | { |
1717 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | 1843 | x86_pmu.check_microcode = intel_snb_check_microcode; |
1718 | x86_pmu.pebs = 0; | 1844 | intel_snb_check_microcode(); |
1719 | x86_pmu.pebs_constraints = NULL; | ||
1720 | } | 1845 | } |
1721 | 1846 | ||
1722 | static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { | 1847 | static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { |
@@ -1736,8 +1861,8 @@ static __init void intel_arch_events_quirk(void) | |||
1736 | /* disable event that reported as not presend by cpuid */ | 1861 | /* disable event that reported as not presend by cpuid */ |
1737 | for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { | 1862 | for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { |
1738 | intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; | 1863 | intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; |
1739 | printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n", | 1864 | pr_warn("CPUID marked event: \'%s\' unavailable\n", |
1740 | intel_arch_events_map[bit].name); | 1865 | intel_arch_events_map[bit].name); |
1741 | } | 1866 | } |
1742 | } | 1867 | } |
1743 | 1868 | ||
@@ -1756,7 +1881,7 @@ static __init void intel_nehalem_quirk(void) | |||
1756 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; | 1881 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; |
1757 | ebx.split.no_branch_misses_retired = 0; | 1882 | ebx.split.no_branch_misses_retired = 0; |
1758 | x86_pmu.events_maskl = ebx.full; | 1883 | x86_pmu.events_maskl = ebx.full; |
1759 | printk(KERN_INFO "CPU erratum AAJ80 worked around\n"); | 1884 | pr_info("CPU erratum AAJ80 worked around\n"); |
1760 | } | 1885 | } |
1761 | } | 1886 | } |
1762 | 1887 | ||
@@ -1765,6 +1890,7 @@ __init int intel_pmu_init(void) | |||
1765 | union cpuid10_edx edx; | 1890 | union cpuid10_edx edx; |
1766 | union cpuid10_eax eax; | 1891 | union cpuid10_eax eax; |
1767 | union cpuid10_ebx ebx; | 1892 | union cpuid10_ebx ebx; |
1893 | struct event_constraint *c; | ||
1768 | unsigned int unused; | 1894 | unsigned int unused; |
1769 | int version; | 1895 | int version; |
1770 | 1896 | ||
@@ -1800,6 +1926,8 @@ __init int intel_pmu_init(void) | |||
1800 | x86_pmu.events_maskl = ebx.full; | 1926 | x86_pmu.events_maskl = ebx.full; |
1801 | x86_pmu.events_mask_len = eax.split.mask_length; | 1927 | x86_pmu.events_mask_len = eax.split.mask_length; |
1802 | 1928 | ||
1929 | x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); | ||
1930 | |||
1803 | /* | 1931 | /* |
1804 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 1932 | * Quirk: v2 perfmon does not report fixed-purpose events, so |
1805 | * assume at least 3 events: | 1933 | * assume at least 3 events: |
@@ -1914,6 +2042,8 @@ __init int intel_pmu_init(void) | |||
1914 | case 58: /* IvyBridge */ | 2042 | case 58: /* IvyBridge */ |
1915 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 2043 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
1916 | sizeof(hw_cache_event_ids)); | 2044 | sizeof(hw_cache_event_ids)); |
2045 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, | ||
2046 | sizeof(hw_cache_extra_regs)); | ||
1917 | 2047 | ||
1918 | intel_pmu_lbr_init_snb(); | 2048 | intel_pmu_lbr_init_snb(); |
1919 | 2049 | ||
@@ -1951,5 +2081,37 @@ __init int intel_pmu_init(void) | |||
1951 | } | 2081 | } |
1952 | } | 2082 | } |
1953 | 2083 | ||
2084 | if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { | ||
2085 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | ||
2086 | x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); | ||
2087 | x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; | ||
2088 | } | ||
2089 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; | ||
2090 | |||
2091 | if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { | ||
2092 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", | ||
2093 | x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED); | ||
2094 | x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; | ||
2095 | } | ||
2096 | |||
2097 | x86_pmu.intel_ctrl |= | ||
2098 | ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; | ||
2099 | |||
2100 | if (x86_pmu.event_constraints) { | ||
2101 | /* | ||
2102 | * event on fixed counter2 (REF_CYCLES) only works on this | ||
2103 | * counter, so do not extend mask to generic counters | ||
2104 | */ | ||
2105 | for_each_event_constraint(c, x86_pmu.event_constraints) { | ||
2106 | if (c->cmask != X86_RAW_EVENT_MASK | ||
2107 | || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { | ||
2108 | continue; | ||
2109 | } | ||
2110 | |||
2111 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; | ||
2112 | c->weight += x86_pmu.num_counters; | ||
2113 | } | ||
2114 | } | ||
2115 | |||
1954 | return 0; | 2116 | return 0; |
1955 | } | 2117 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 35e2192df9f4..629ae0b7ad90 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -248,7 +248,7 @@ void reserve_ds_buffers(void) | |||
248 | */ | 248 | */ |
249 | 249 | ||
250 | struct event_constraint bts_constraint = | 250 | struct event_constraint bts_constraint = |
251 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | 251 | EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0); |
252 | 252 | ||
253 | void intel_pmu_enable_bts(u64 config) | 253 | void intel_pmu_enable_bts(u64 config) |
254 | { | 254 | { |
@@ -295,7 +295,7 @@ int intel_pmu_drain_bts_buffer(void) | |||
295 | u64 to; | 295 | u64 to; |
296 | u64 flags; | 296 | u64 flags; |
297 | }; | 297 | }; |
298 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | 298 | struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; |
299 | struct bts_record *at, *top; | 299 | struct bts_record *at, *top; |
300 | struct perf_output_handle handle; | 300 | struct perf_output_handle handle; |
301 | struct perf_event_header header; | 301 | struct perf_event_header header; |
@@ -620,7 +620,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | |||
620 | * Should not happen, we program the threshold at 1 and do not | 620 | * Should not happen, we program the threshold at 1 and do not |
621 | * set a reset value. | 621 | * set a reset value. |
622 | */ | 622 | */ |
623 | WARN_ON_ONCE(n > 1); | 623 | WARN_ONCE(n > 1, "bad leftover pebs %d\n", n); |
624 | at += n - 1; | 624 | at += n - 1; |
625 | 625 | ||
626 | __intel_pmu_pebs_event(event, iregs, at); | 626 | __intel_pmu_pebs_event(event, iregs, at); |
@@ -651,10 +651,10 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | |||
651 | * Should not happen, we program the threshold at 1 and do not | 651 | * Should not happen, we program the threshold at 1 and do not |
652 | * set a reset value. | 652 | * set a reset value. |
653 | */ | 653 | */ |
654 | WARN_ON_ONCE(n > MAX_PEBS_EVENTS); | 654 | WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n); |
655 | 655 | ||
656 | for ( ; at < top; at++) { | 656 | for ( ; at < top; at++) { |
657 | for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { | 657 | for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) { |
658 | event = cpuc->events[bit]; | 658 | event = cpuc->events[bit]; |
659 | if (!test_bit(bit, cpuc->active_mask)) | 659 | if (!test_bit(bit, cpuc->active_mask)) |
660 | continue; | 660 | continue; |
@@ -670,7 +670,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | |||
670 | break; | 670 | break; |
671 | } | 671 | } |
672 | 672 | ||
673 | if (!event || bit >= MAX_PEBS_EVENTS) | 673 | if (!event || bit >= x86_pmu.max_pebs_events) |
674 | continue; | 674 | continue; |
675 | 675 | ||
676 | __intel_pmu_pebs_event(event, iregs, at); | 676 | __intel_pmu_pebs_event(event, iregs, at); |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c new file mode 100644 index 000000000000..7563fda9f033 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
@@ -0,0 +1,2900 @@ | |||
1 | #include "perf_event_intel_uncore.h" | ||
2 | |||
3 | static struct intel_uncore_type *empty_uncore[] = { NULL, }; | ||
4 | static struct intel_uncore_type **msr_uncores = empty_uncore; | ||
5 | static struct intel_uncore_type **pci_uncores = empty_uncore; | ||
6 | /* pci bus to socket mapping */ | ||
7 | static int pcibus_to_physid[256] = { [0 ... 255] = -1, }; | ||
8 | |||
9 | static DEFINE_RAW_SPINLOCK(uncore_box_lock); | ||
10 | |||
11 | /* mask of cpus that collect uncore events */ | ||
12 | static cpumask_t uncore_cpu_mask; | ||
13 | |||
14 | /* constraint for the fixed counter */ | ||
15 | static struct event_constraint constraint_fixed = | ||
16 | EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); | ||
17 | static struct event_constraint constraint_empty = | ||
18 | EVENT_CONSTRAINT(0, 0, 0); | ||
19 | |||
20 | DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); | ||
21 | DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); | ||
22 | DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); | ||
23 | DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); | ||
24 | DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); | ||
25 | DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); | ||
26 | DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); | ||
27 | DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31"); | ||
28 | DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); | ||
29 | DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28"); | ||
30 | DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15"); | ||
31 | DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); | ||
32 | DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); | ||
33 | DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); | ||
34 | DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); | ||
35 | DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); | ||
36 | DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31"); | ||
37 | DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7"); | ||
38 | DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15"); | ||
39 | DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23"); | ||
40 | DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31"); | ||
41 | |||
42 | static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) | ||
43 | { | ||
44 | u64 count; | ||
45 | |||
46 | rdmsrl(event->hw.event_base, count); | ||
47 | |||
48 | return count; | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * generic get constraint function for shared match/mask registers. | ||
53 | */ | ||
54 | static struct event_constraint * | ||
55 | uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) | ||
56 | { | ||
57 | struct intel_uncore_extra_reg *er; | ||
58 | struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; | ||
59 | struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; | ||
60 | unsigned long flags; | ||
61 | bool ok = false; | ||
62 | |||
63 | /* | ||
64 | * reg->alloc can be set due to existing state, so for fake box we | ||
65 | * need to ignore this, otherwise we might fail to allocate proper | ||
66 | * fake state for this extra reg constraint. | ||
67 | */ | ||
68 | if (reg1->idx == EXTRA_REG_NONE || | ||
69 | (!uncore_box_is_fake(box) && reg1->alloc)) | ||
70 | return NULL; | ||
71 | |||
72 | er = &box->shared_regs[reg1->idx]; | ||
73 | raw_spin_lock_irqsave(&er->lock, flags); | ||
74 | if (!atomic_read(&er->ref) || | ||
75 | (er->config1 == reg1->config && er->config2 == reg2->config)) { | ||
76 | atomic_inc(&er->ref); | ||
77 | er->config1 = reg1->config; | ||
78 | er->config2 = reg2->config; | ||
79 | ok = true; | ||
80 | } | ||
81 | raw_spin_unlock_irqrestore(&er->lock, flags); | ||
82 | |||
83 | if (ok) { | ||
84 | if (!uncore_box_is_fake(box)) | ||
85 | reg1->alloc = 1; | ||
86 | return NULL; | ||
87 | } | ||
88 | |||
89 | return &constraint_empty; | ||
90 | } | ||
91 | |||
92 | static void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) | ||
93 | { | ||
94 | struct intel_uncore_extra_reg *er; | ||
95 | struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; | ||
96 | |||
97 | /* | ||
98 | * Only put constraint if extra reg was actually allocated. Also | ||
99 | * takes care of event which do not use an extra shared reg. | ||
100 | * | ||
101 | * Also, if this is a fake box we shouldn't touch any event state | ||
102 | * (reg->alloc) and we don't care about leaving inconsistent box | ||
103 | * state either since it will be thrown out. | ||
104 | */ | ||
105 | if (uncore_box_is_fake(box) || !reg1->alloc) | ||
106 | return; | ||
107 | |||
108 | er = &box->shared_regs[reg1->idx]; | ||
109 | atomic_dec(&er->ref); | ||
110 | reg1->alloc = 0; | ||
111 | } | ||
112 | |||
113 | /* Sandy Bridge-EP uncore support */ | ||
114 | static struct intel_uncore_type snbep_uncore_cbox; | ||
115 | static struct intel_uncore_type snbep_uncore_pcu; | ||
116 | |||
117 | static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box) | ||
118 | { | ||
119 | struct pci_dev *pdev = box->pci_dev; | ||
120 | int box_ctl = uncore_pci_box_ctl(box); | ||
121 | u32 config; | ||
122 | |||
123 | pci_read_config_dword(pdev, box_ctl, &config); | ||
124 | config |= SNBEP_PMON_BOX_CTL_FRZ; | ||
125 | pci_write_config_dword(pdev, box_ctl, config); | ||
126 | } | ||
127 | |||
128 | static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) | ||
129 | { | ||
130 | struct pci_dev *pdev = box->pci_dev; | ||
131 | int box_ctl = uncore_pci_box_ctl(box); | ||
132 | u32 config; | ||
133 | |||
134 | pci_read_config_dword(pdev, box_ctl, &config); | ||
135 | config &= ~SNBEP_PMON_BOX_CTL_FRZ; | ||
136 | pci_write_config_dword(pdev, box_ctl, config); | ||
137 | } | ||
138 | |||
139 | static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
140 | { | ||
141 | struct pci_dev *pdev = box->pci_dev; | ||
142 | struct hw_perf_event *hwc = &event->hw; | ||
143 | |||
144 | pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); | ||
145 | } | ||
146 | |||
147 | static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
148 | { | ||
149 | struct pci_dev *pdev = box->pci_dev; | ||
150 | struct hw_perf_event *hwc = &event->hw; | ||
151 | |||
152 | pci_write_config_dword(pdev, hwc->config_base, hwc->config); | ||
153 | } | ||
154 | |||
155 | static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event) | ||
156 | { | ||
157 | struct pci_dev *pdev = box->pci_dev; | ||
158 | struct hw_perf_event *hwc = &event->hw; | ||
159 | u64 count; | ||
160 | |||
161 | pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); | ||
162 | pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); | ||
163 | |||
164 | return count; | ||
165 | } | ||
166 | |||
167 | static void snbep_uncore_pci_init_box(struct intel_uncore_box *box) | ||
168 | { | ||
169 | struct pci_dev *pdev = box->pci_dev; | ||
170 | |||
171 | pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, SNBEP_PMON_BOX_CTL_INT); | ||
172 | } | ||
173 | |||
174 | static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) | ||
175 | { | ||
176 | u64 config; | ||
177 | unsigned msr; | ||
178 | |||
179 | msr = uncore_msr_box_ctl(box); | ||
180 | if (msr) { | ||
181 | rdmsrl(msr, config); | ||
182 | config |= SNBEP_PMON_BOX_CTL_FRZ; | ||
183 | wrmsrl(msr, config); | ||
184 | } | ||
185 | } | ||
186 | |||
187 | static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box) | ||
188 | { | ||
189 | u64 config; | ||
190 | unsigned msr; | ||
191 | |||
192 | msr = uncore_msr_box_ctl(box); | ||
193 | if (msr) { | ||
194 | rdmsrl(msr, config); | ||
195 | config &= ~SNBEP_PMON_BOX_CTL_FRZ; | ||
196 | wrmsrl(msr, config); | ||
197 | } | ||
198 | } | ||
199 | |||
200 | static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
201 | { | ||
202 | struct hw_perf_event *hwc = &event->hw; | ||
203 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; | ||
204 | |||
205 | if (reg1->idx != EXTRA_REG_NONE) | ||
206 | wrmsrl(reg1->reg, reg1->config); | ||
207 | |||
208 | wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); | ||
209 | } | ||
210 | |||
211 | static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box, | ||
212 | struct perf_event *event) | ||
213 | { | ||
214 | struct hw_perf_event *hwc = &event->hw; | ||
215 | |||
216 | wrmsrl(hwc->config_base, hwc->config); | ||
217 | } | ||
218 | |||
219 | static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) | ||
220 | { | ||
221 | unsigned msr = uncore_msr_box_ctl(box); | ||
222 | |||
223 | if (msr) | ||
224 | wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT); | ||
225 | } | ||
226 | |||
227 | static int snbep_uncore_hw_config(struct intel_uncore_box *box, struct perf_event *event) | ||
228 | { | ||
229 | struct hw_perf_event *hwc = &event->hw; | ||
230 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; | ||
231 | |||
232 | if (box->pmu->type == &snbep_uncore_cbox) { | ||
233 | reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + | ||
234 | SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; | ||
235 | reg1->config = event->attr.config1 & | ||
236 | SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK; | ||
237 | } else { | ||
238 | if (box->pmu->type == &snbep_uncore_pcu) { | ||
239 | reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; | ||
240 | reg1->config = event->attr.config1 & SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK; | ||
241 | } else { | ||
242 | return 0; | ||
243 | } | ||
244 | } | ||
245 | reg1->idx = 0; | ||
246 | |||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | static struct attribute *snbep_uncore_formats_attr[] = { | ||
251 | &format_attr_event.attr, | ||
252 | &format_attr_umask.attr, | ||
253 | &format_attr_edge.attr, | ||
254 | &format_attr_inv.attr, | ||
255 | &format_attr_thresh8.attr, | ||
256 | NULL, | ||
257 | }; | ||
258 | |||
259 | static struct attribute *snbep_uncore_ubox_formats_attr[] = { | ||
260 | &format_attr_event.attr, | ||
261 | &format_attr_umask.attr, | ||
262 | &format_attr_edge.attr, | ||
263 | &format_attr_inv.attr, | ||
264 | &format_attr_thresh5.attr, | ||
265 | NULL, | ||
266 | }; | ||
267 | |||
268 | static struct attribute *snbep_uncore_cbox_formats_attr[] = { | ||
269 | &format_attr_event.attr, | ||
270 | &format_attr_umask.attr, | ||
271 | &format_attr_edge.attr, | ||
272 | &format_attr_tid_en.attr, | ||
273 | &format_attr_inv.attr, | ||
274 | &format_attr_thresh8.attr, | ||
275 | &format_attr_filter_tid.attr, | ||
276 | &format_attr_filter_nid.attr, | ||
277 | &format_attr_filter_state.attr, | ||
278 | &format_attr_filter_opc.attr, | ||
279 | NULL, | ||
280 | }; | ||
281 | |||
282 | static struct attribute *snbep_uncore_pcu_formats_attr[] = { | ||
283 | &format_attr_event.attr, | ||
284 | &format_attr_occ_sel.attr, | ||
285 | &format_attr_edge.attr, | ||
286 | &format_attr_inv.attr, | ||
287 | &format_attr_thresh5.attr, | ||
288 | &format_attr_occ_invert.attr, | ||
289 | &format_attr_occ_edge.attr, | ||
290 | &format_attr_filter_band0.attr, | ||
291 | &format_attr_filter_band1.attr, | ||
292 | &format_attr_filter_band2.attr, | ||
293 | &format_attr_filter_band3.attr, | ||
294 | NULL, | ||
295 | }; | ||
296 | |||
297 | static struct attribute *snbep_uncore_qpi_formats_attr[] = { | ||
298 | &format_attr_event_ext.attr, | ||
299 | &format_attr_umask.attr, | ||
300 | &format_attr_edge.attr, | ||
301 | &format_attr_inv.attr, | ||
302 | &format_attr_thresh8.attr, | ||
303 | NULL, | ||
304 | }; | ||
305 | |||
306 | static struct uncore_event_desc snbep_uncore_imc_events[] = { | ||
307 | INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), | ||
308 | INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"), | ||
309 | INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"), | ||
310 | { /* end: all zeroes */ }, | ||
311 | }; | ||
312 | |||
313 | static struct uncore_event_desc snbep_uncore_qpi_events[] = { | ||
314 | INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"), | ||
315 | INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"), | ||
316 | INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x02,umask=0x08"), | ||
317 | INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x03,umask=0x04"), | ||
318 | { /* end: all zeroes */ }, | ||
319 | }; | ||
320 | |||
321 | static struct attribute_group snbep_uncore_format_group = { | ||
322 | .name = "format", | ||
323 | .attrs = snbep_uncore_formats_attr, | ||
324 | }; | ||
325 | |||
326 | static struct attribute_group snbep_uncore_ubox_format_group = { | ||
327 | .name = "format", | ||
328 | .attrs = snbep_uncore_ubox_formats_attr, | ||
329 | }; | ||
330 | |||
331 | static struct attribute_group snbep_uncore_cbox_format_group = { | ||
332 | .name = "format", | ||
333 | .attrs = snbep_uncore_cbox_formats_attr, | ||
334 | }; | ||
335 | |||
336 | static struct attribute_group snbep_uncore_pcu_format_group = { | ||
337 | .name = "format", | ||
338 | .attrs = snbep_uncore_pcu_formats_attr, | ||
339 | }; | ||
340 | |||
341 | static struct attribute_group snbep_uncore_qpi_format_group = { | ||
342 | .name = "format", | ||
343 | .attrs = snbep_uncore_qpi_formats_attr, | ||
344 | }; | ||
345 | |||
346 | static struct intel_uncore_ops snbep_uncore_msr_ops = { | ||
347 | .init_box = snbep_uncore_msr_init_box, | ||
348 | .disable_box = snbep_uncore_msr_disable_box, | ||
349 | .enable_box = snbep_uncore_msr_enable_box, | ||
350 | .disable_event = snbep_uncore_msr_disable_event, | ||
351 | .enable_event = snbep_uncore_msr_enable_event, | ||
352 | .read_counter = uncore_msr_read_counter, | ||
353 | .get_constraint = uncore_get_constraint, | ||
354 | .put_constraint = uncore_put_constraint, | ||
355 | .hw_config = snbep_uncore_hw_config, | ||
356 | }; | ||
357 | |||
358 | static struct intel_uncore_ops snbep_uncore_pci_ops = { | ||
359 | .init_box = snbep_uncore_pci_init_box, | ||
360 | .disable_box = snbep_uncore_pci_disable_box, | ||
361 | .enable_box = snbep_uncore_pci_enable_box, | ||
362 | .disable_event = snbep_uncore_pci_disable_event, | ||
363 | .enable_event = snbep_uncore_pci_enable_event, | ||
364 | .read_counter = snbep_uncore_pci_read_counter, | ||
365 | }; | ||
366 | |||
367 | static struct event_constraint snbep_uncore_cbox_constraints[] = { | ||
368 | UNCORE_EVENT_CONSTRAINT(0x01, 0x1), | ||
369 | UNCORE_EVENT_CONSTRAINT(0x02, 0x3), | ||
370 | UNCORE_EVENT_CONSTRAINT(0x04, 0x3), | ||
371 | UNCORE_EVENT_CONSTRAINT(0x05, 0x3), | ||
372 | UNCORE_EVENT_CONSTRAINT(0x07, 0x3), | ||
373 | UNCORE_EVENT_CONSTRAINT(0x11, 0x1), | ||
374 | UNCORE_EVENT_CONSTRAINT(0x12, 0x3), | ||
375 | UNCORE_EVENT_CONSTRAINT(0x13, 0x3), | ||
376 | UNCORE_EVENT_CONSTRAINT(0x1b, 0xc), | ||
377 | UNCORE_EVENT_CONSTRAINT(0x1c, 0xc), | ||
378 | UNCORE_EVENT_CONSTRAINT(0x1d, 0xc), | ||
379 | UNCORE_EVENT_CONSTRAINT(0x1e, 0xc), | ||
380 | EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff), | ||
381 | UNCORE_EVENT_CONSTRAINT(0x21, 0x3), | ||
382 | UNCORE_EVENT_CONSTRAINT(0x23, 0x3), | ||
383 | UNCORE_EVENT_CONSTRAINT(0x31, 0x3), | ||
384 | UNCORE_EVENT_CONSTRAINT(0x32, 0x3), | ||
385 | UNCORE_EVENT_CONSTRAINT(0x33, 0x3), | ||
386 | UNCORE_EVENT_CONSTRAINT(0x34, 0x3), | ||
387 | UNCORE_EVENT_CONSTRAINT(0x35, 0x3), | ||
388 | UNCORE_EVENT_CONSTRAINT(0x36, 0x1), | ||
389 | UNCORE_EVENT_CONSTRAINT(0x37, 0x3), | ||
390 | UNCORE_EVENT_CONSTRAINT(0x38, 0x3), | ||
391 | UNCORE_EVENT_CONSTRAINT(0x39, 0x3), | ||
392 | UNCORE_EVENT_CONSTRAINT(0x3b, 0x1), | ||
393 | EVENT_CONSTRAINT_END | ||
394 | }; | ||
395 | |||
396 | static struct event_constraint snbep_uncore_r2pcie_constraints[] = { | ||
397 | UNCORE_EVENT_CONSTRAINT(0x10, 0x3), | ||
398 | UNCORE_EVENT_CONSTRAINT(0x11, 0x3), | ||
399 | UNCORE_EVENT_CONSTRAINT(0x12, 0x1), | ||
400 | UNCORE_EVENT_CONSTRAINT(0x23, 0x3), | ||
401 | UNCORE_EVENT_CONSTRAINT(0x24, 0x3), | ||
402 | UNCORE_EVENT_CONSTRAINT(0x25, 0x3), | ||
403 | UNCORE_EVENT_CONSTRAINT(0x26, 0x3), | ||
404 | UNCORE_EVENT_CONSTRAINT(0x32, 0x3), | ||
405 | UNCORE_EVENT_CONSTRAINT(0x33, 0x3), | ||
406 | UNCORE_EVENT_CONSTRAINT(0x34, 0x3), | ||
407 | EVENT_CONSTRAINT_END | ||
408 | }; | ||
409 | |||
410 | static struct event_constraint snbep_uncore_r3qpi_constraints[] = { | ||
411 | UNCORE_EVENT_CONSTRAINT(0x10, 0x3), | ||
412 | UNCORE_EVENT_CONSTRAINT(0x11, 0x3), | ||
413 | UNCORE_EVENT_CONSTRAINT(0x12, 0x3), | ||
414 | UNCORE_EVENT_CONSTRAINT(0x13, 0x1), | ||
415 | UNCORE_EVENT_CONSTRAINT(0x20, 0x3), | ||
416 | UNCORE_EVENT_CONSTRAINT(0x21, 0x3), | ||
417 | UNCORE_EVENT_CONSTRAINT(0x22, 0x3), | ||
418 | UNCORE_EVENT_CONSTRAINT(0x23, 0x3), | ||
419 | UNCORE_EVENT_CONSTRAINT(0x24, 0x3), | ||
420 | UNCORE_EVENT_CONSTRAINT(0x25, 0x3), | ||
421 | UNCORE_EVENT_CONSTRAINT(0x26, 0x3), | ||
422 | UNCORE_EVENT_CONSTRAINT(0x30, 0x3), | ||
423 | UNCORE_EVENT_CONSTRAINT(0x31, 0x3), | ||
424 | UNCORE_EVENT_CONSTRAINT(0x32, 0x3), | ||
425 | UNCORE_EVENT_CONSTRAINT(0x33, 0x3), | ||
426 | UNCORE_EVENT_CONSTRAINT(0x34, 0x3), | ||
427 | UNCORE_EVENT_CONSTRAINT(0x36, 0x3), | ||
428 | UNCORE_EVENT_CONSTRAINT(0x37, 0x3), | ||
429 | EVENT_CONSTRAINT_END | ||
430 | }; | ||
431 | |||
432 | static struct intel_uncore_type snbep_uncore_ubox = { | ||
433 | .name = "ubox", | ||
434 | .num_counters = 2, | ||
435 | .num_boxes = 1, | ||
436 | .perf_ctr_bits = 44, | ||
437 | .fixed_ctr_bits = 48, | ||
438 | .perf_ctr = SNBEP_U_MSR_PMON_CTR0, | ||
439 | .event_ctl = SNBEP_U_MSR_PMON_CTL0, | ||
440 | .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK, | ||
441 | .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR, | ||
442 | .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL, | ||
443 | .ops = &snbep_uncore_msr_ops, | ||
444 | .format_group = &snbep_uncore_ubox_format_group, | ||
445 | }; | ||
446 | |||
447 | static struct intel_uncore_type snbep_uncore_cbox = { | ||
448 | .name = "cbox", | ||
449 | .num_counters = 4, | ||
450 | .num_boxes = 8, | ||
451 | .perf_ctr_bits = 44, | ||
452 | .event_ctl = SNBEP_C0_MSR_PMON_CTL0, | ||
453 | .perf_ctr = SNBEP_C0_MSR_PMON_CTR0, | ||
454 | .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK, | ||
455 | .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL, | ||
456 | .msr_offset = SNBEP_CBO_MSR_OFFSET, | ||
457 | .num_shared_regs = 1, | ||
458 | .constraints = snbep_uncore_cbox_constraints, | ||
459 | .ops = &snbep_uncore_msr_ops, | ||
460 | .format_group = &snbep_uncore_cbox_format_group, | ||
461 | }; | ||
462 | |||
463 | static struct intel_uncore_type snbep_uncore_pcu = { | ||
464 | .name = "pcu", | ||
465 | .num_counters = 4, | ||
466 | .num_boxes = 1, | ||
467 | .perf_ctr_bits = 48, | ||
468 | .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0, | ||
469 | .event_ctl = SNBEP_PCU_MSR_PMON_CTL0, | ||
470 | .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, | ||
471 | .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL, | ||
472 | .num_shared_regs = 1, | ||
473 | .ops = &snbep_uncore_msr_ops, | ||
474 | .format_group = &snbep_uncore_pcu_format_group, | ||
475 | }; | ||
476 | |||
477 | static struct intel_uncore_type *snbep_msr_uncores[] = { | ||
478 | &snbep_uncore_ubox, | ||
479 | &snbep_uncore_cbox, | ||
480 | &snbep_uncore_pcu, | ||
481 | NULL, | ||
482 | }; | ||
483 | |||
484 | #define SNBEP_UNCORE_PCI_COMMON_INIT() \ | ||
485 | .perf_ctr = SNBEP_PCI_PMON_CTR0, \ | ||
486 | .event_ctl = SNBEP_PCI_PMON_CTL0, \ | ||
487 | .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \ | ||
488 | .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \ | ||
489 | .ops = &snbep_uncore_pci_ops, \ | ||
490 | .format_group = &snbep_uncore_format_group | ||
491 | |||
492 | static struct intel_uncore_type snbep_uncore_ha = { | ||
493 | .name = "ha", | ||
494 | .num_counters = 4, | ||
495 | .num_boxes = 1, | ||
496 | .perf_ctr_bits = 48, | ||
497 | SNBEP_UNCORE_PCI_COMMON_INIT(), | ||
498 | }; | ||
499 | |||
500 | static struct intel_uncore_type snbep_uncore_imc = { | ||
501 | .name = "imc", | ||
502 | .num_counters = 4, | ||
503 | .num_boxes = 4, | ||
504 | .perf_ctr_bits = 48, | ||
505 | .fixed_ctr_bits = 48, | ||
506 | .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, | ||
507 | .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, | ||
508 | .event_descs = snbep_uncore_imc_events, | ||
509 | SNBEP_UNCORE_PCI_COMMON_INIT(), | ||
510 | }; | ||
511 | |||
512 | static struct intel_uncore_type snbep_uncore_qpi = { | ||
513 | .name = "qpi", | ||
514 | .num_counters = 4, | ||
515 | .num_boxes = 2, | ||
516 | .perf_ctr_bits = 48, | ||
517 | .perf_ctr = SNBEP_PCI_PMON_CTR0, | ||
518 | .event_ctl = SNBEP_PCI_PMON_CTL0, | ||
519 | .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK, | ||
520 | .box_ctl = SNBEP_PCI_PMON_BOX_CTL, | ||
521 | .ops = &snbep_uncore_pci_ops, | ||
522 | .event_descs = snbep_uncore_qpi_events, | ||
523 | .format_group = &snbep_uncore_qpi_format_group, | ||
524 | }; | ||
525 | |||
526 | |||
527 | static struct intel_uncore_type snbep_uncore_r2pcie = { | ||
528 | .name = "r2pcie", | ||
529 | .num_counters = 4, | ||
530 | .num_boxes = 1, | ||
531 | .perf_ctr_bits = 44, | ||
532 | .constraints = snbep_uncore_r2pcie_constraints, | ||
533 | SNBEP_UNCORE_PCI_COMMON_INIT(), | ||
534 | }; | ||
535 | |||
536 | static struct intel_uncore_type snbep_uncore_r3qpi = { | ||
537 | .name = "r3qpi", | ||
538 | .num_counters = 3, | ||
539 | .num_boxes = 2, | ||
540 | .perf_ctr_bits = 44, | ||
541 | .constraints = snbep_uncore_r3qpi_constraints, | ||
542 | SNBEP_UNCORE_PCI_COMMON_INIT(), | ||
543 | }; | ||
544 | |||
545 | static struct intel_uncore_type *snbep_pci_uncores[] = { | ||
546 | &snbep_uncore_ha, | ||
547 | &snbep_uncore_imc, | ||
548 | &snbep_uncore_qpi, | ||
549 | &snbep_uncore_r2pcie, | ||
550 | &snbep_uncore_r3qpi, | ||
551 | NULL, | ||
552 | }; | ||
553 | |||
554 | static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = { | ||
555 | { /* Home Agent */ | ||
556 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA), | ||
557 | .driver_data = (unsigned long)&snbep_uncore_ha, | ||
558 | }, | ||
559 | { /* MC Channel 0 */ | ||
560 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0), | ||
561 | .driver_data = (unsigned long)&snbep_uncore_imc, | ||
562 | }, | ||
563 | { /* MC Channel 1 */ | ||
564 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1), | ||
565 | .driver_data = (unsigned long)&snbep_uncore_imc, | ||
566 | }, | ||
567 | { /* MC Channel 2 */ | ||
568 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2), | ||
569 | .driver_data = (unsigned long)&snbep_uncore_imc, | ||
570 | }, | ||
571 | { /* MC Channel 3 */ | ||
572 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3), | ||
573 | .driver_data = (unsigned long)&snbep_uncore_imc, | ||
574 | }, | ||
575 | { /* QPI Port 0 */ | ||
576 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0), | ||
577 | .driver_data = (unsigned long)&snbep_uncore_qpi, | ||
578 | }, | ||
579 | { /* QPI Port 1 */ | ||
580 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1), | ||
581 | .driver_data = (unsigned long)&snbep_uncore_qpi, | ||
582 | }, | ||
583 | { /* P2PCIe */ | ||
584 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE), | ||
585 | .driver_data = (unsigned long)&snbep_uncore_r2pcie, | ||
586 | }, | ||
587 | { /* R3QPI Link 0 */ | ||
588 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0), | ||
589 | .driver_data = (unsigned long)&snbep_uncore_r3qpi, | ||
590 | }, | ||
591 | { /* R3QPI Link 1 */ | ||
592 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1), | ||
593 | .driver_data = (unsigned long)&snbep_uncore_r3qpi, | ||
594 | }, | ||
595 | { /* end: all zeroes */ } | ||
596 | }; | ||
597 | |||
598 | static struct pci_driver snbep_uncore_pci_driver = { | ||
599 | .name = "snbep_uncore", | ||
600 | .id_table = snbep_uncore_pci_ids, | ||
601 | }; | ||
602 | |||
603 | /* | ||
604 | * build pci bus to socket mapping | ||
605 | */ | ||
606 | static void snbep_pci2phy_map_init(void) | ||
607 | { | ||
608 | struct pci_dev *ubox_dev = NULL; | ||
609 | int i, bus, nodeid; | ||
610 | u32 config; | ||
611 | |||
612 | while (1) { | ||
613 | /* find the UBOX device */ | ||
614 | ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, | ||
615 | PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX, | ||
616 | ubox_dev); | ||
617 | if (!ubox_dev) | ||
618 | break; | ||
619 | bus = ubox_dev->bus->number; | ||
620 | /* get the Node ID of the local register */ | ||
621 | pci_read_config_dword(ubox_dev, 0x40, &config); | ||
622 | nodeid = config; | ||
623 | /* get the Node ID mapping */ | ||
624 | pci_read_config_dword(ubox_dev, 0x54, &config); | ||
625 | /* | ||
626 | * every three bits in the Node ID mapping register maps | ||
627 | * to a particular node. | ||
628 | */ | ||
629 | for (i = 0; i < 8; i++) { | ||
630 | if (nodeid == ((config >> (3 * i)) & 0x7)) { | ||
631 | pcibus_to_physid[bus] = i; | ||
632 | break; | ||
633 | } | ||
634 | } | ||
635 | }; | ||
636 | return; | ||
637 | } | ||
638 | /* end of Sandy Bridge-EP uncore support */ | ||
639 | |||
640 | /* Sandy Bridge uncore support */ | ||
641 | static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
642 | { | ||
643 | struct hw_perf_event *hwc = &event->hw; | ||
644 | |||
645 | if (hwc->idx < UNCORE_PMC_IDX_FIXED) | ||
646 | wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); | ||
647 | else | ||
648 | wrmsrl(hwc->config_base, SNB_UNC_CTL_EN); | ||
649 | } | ||
650 | |||
651 | static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
652 | { | ||
653 | wrmsrl(event->hw.config_base, 0); | ||
654 | } | ||
655 | |||
656 | static void snb_uncore_msr_init_box(struct intel_uncore_box *box) | ||
657 | { | ||
658 | if (box->pmu->pmu_idx == 0) { | ||
659 | wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, | ||
660 | SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); | ||
661 | } | ||
662 | } | ||
663 | |||
664 | static struct attribute *snb_uncore_formats_attr[] = { | ||
665 | &format_attr_event.attr, | ||
666 | &format_attr_umask.attr, | ||
667 | &format_attr_edge.attr, | ||
668 | &format_attr_inv.attr, | ||
669 | &format_attr_cmask5.attr, | ||
670 | NULL, | ||
671 | }; | ||
672 | |||
673 | static struct attribute_group snb_uncore_format_group = { | ||
674 | .name = "format", | ||
675 | .attrs = snb_uncore_formats_attr, | ||
676 | }; | ||
677 | |||
678 | static struct intel_uncore_ops snb_uncore_msr_ops = { | ||
679 | .init_box = snb_uncore_msr_init_box, | ||
680 | .disable_event = snb_uncore_msr_disable_event, | ||
681 | .enable_event = snb_uncore_msr_enable_event, | ||
682 | .read_counter = uncore_msr_read_counter, | ||
683 | }; | ||
684 | |||
685 | static struct event_constraint snb_uncore_cbox_constraints[] = { | ||
686 | UNCORE_EVENT_CONSTRAINT(0x80, 0x1), | ||
687 | UNCORE_EVENT_CONSTRAINT(0x83, 0x1), | ||
688 | EVENT_CONSTRAINT_END | ||
689 | }; | ||
690 | |||
691 | static struct intel_uncore_type snb_uncore_cbox = { | ||
692 | .name = "cbox", | ||
693 | .num_counters = 2, | ||
694 | .num_boxes = 4, | ||
695 | .perf_ctr_bits = 44, | ||
696 | .fixed_ctr_bits = 48, | ||
697 | .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, | ||
698 | .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, | ||
699 | .fixed_ctr = SNB_UNC_FIXED_CTR, | ||
700 | .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL, | ||
701 | .single_fixed = 1, | ||
702 | .event_mask = SNB_UNC_RAW_EVENT_MASK, | ||
703 | .msr_offset = SNB_UNC_CBO_MSR_OFFSET, | ||
704 | .constraints = snb_uncore_cbox_constraints, | ||
705 | .ops = &snb_uncore_msr_ops, | ||
706 | .format_group = &snb_uncore_format_group, | ||
707 | }; | ||
708 | |||
709 | static struct intel_uncore_type *snb_msr_uncores[] = { | ||
710 | &snb_uncore_cbox, | ||
711 | NULL, | ||
712 | }; | ||
713 | /* end of Sandy Bridge uncore support */ | ||
714 | |||
715 | /* Nehalem uncore support */ | ||
716 | static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box) | ||
717 | { | ||
718 | wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0); | ||
719 | } | ||
720 | |||
721 | static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box) | ||
722 | { | ||
723 | wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); | ||
724 | } | ||
725 | |||
726 | static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
727 | { | ||
728 | struct hw_perf_event *hwc = &event->hw; | ||
729 | |||
730 | if (hwc->idx < UNCORE_PMC_IDX_FIXED) | ||
731 | wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); | ||
732 | else | ||
733 | wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN); | ||
734 | } | ||
735 | |||
736 | static struct attribute *nhm_uncore_formats_attr[] = { | ||
737 | &format_attr_event.attr, | ||
738 | &format_attr_umask.attr, | ||
739 | &format_attr_edge.attr, | ||
740 | &format_attr_inv.attr, | ||
741 | &format_attr_cmask8.attr, | ||
742 | NULL, | ||
743 | }; | ||
744 | |||
745 | static struct attribute_group nhm_uncore_format_group = { | ||
746 | .name = "format", | ||
747 | .attrs = nhm_uncore_formats_attr, | ||
748 | }; | ||
749 | |||
750 | static struct uncore_event_desc nhm_uncore_events[] = { | ||
751 | INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), | ||
752 | INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"), | ||
753 | INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"), | ||
754 | INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"), | ||
755 | INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes, "event=0x20,umask=0x02"), | ||
756 | INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads, "event=0x20,umask=0x04"), | ||
757 | INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"), | ||
758 | INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads, "event=0x20,umask=0x10"), | ||
759 | INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes, "event=0x20,umask=0x20"), | ||
760 | { /* end: all zeroes */ }, | ||
761 | }; | ||
762 | |||
763 | static struct intel_uncore_ops nhm_uncore_msr_ops = { | ||
764 | .disable_box = nhm_uncore_msr_disable_box, | ||
765 | .enable_box = nhm_uncore_msr_enable_box, | ||
766 | .disable_event = snb_uncore_msr_disable_event, | ||
767 | .enable_event = nhm_uncore_msr_enable_event, | ||
768 | .read_counter = uncore_msr_read_counter, | ||
769 | }; | ||
770 | |||
771 | static struct intel_uncore_type nhm_uncore = { | ||
772 | .name = "", | ||
773 | .num_counters = 8, | ||
774 | .num_boxes = 1, | ||
775 | .perf_ctr_bits = 48, | ||
776 | .fixed_ctr_bits = 48, | ||
777 | .event_ctl = NHM_UNC_PERFEVTSEL0, | ||
778 | .perf_ctr = NHM_UNC_UNCORE_PMC0, | ||
779 | .fixed_ctr = NHM_UNC_FIXED_CTR, | ||
780 | .fixed_ctl = NHM_UNC_FIXED_CTR_CTRL, | ||
781 | .event_mask = NHM_UNC_RAW_EVENT_MASK, | ||
782 | .event_descs = nhm_uncore_events, | ||
783 | .ops = &nhm_uncore_msr_ops, | ||
784 | .format_group = &nhm_uncore_format_group, | ||
785 | }; | ||
786 | |||
787 | static struct intel_uncore_type *nhm_msr_uncores[] = { | ||
788 | &nhm_uncore, | ||
789 | NULL, | ||
790 | }; | ||
791 | /* end of Nehalem uncore support */ | ||
792 | |||
793 | /* Nehalem-EX uncore support */ | ||
794 | #define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \ | ||
795 | ((1ULL << (n)) - 1))) | ||
796 | |||
797 | DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5"); | ||
798 | DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7"); | ||
799 | DEFINE_UNCORE_FORMAT_ATTR(mm_cfg, mm_cfg, "config:63"); | ||
800 | DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63"); | ||
801 | DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63"); | ||
802 | |||
803 | static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box) | ||
804 | { | ||
805 | wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL); | ||
806 | } | ||
807 | |||
808 | static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box) | ||
809 | { | ||
810 | unsigned msr = uncore_msr_box_ctl(box); | ||
811 | u64 config; | ||
812 | |||
813 | if (msr) { | ||
814 | rdmsrl(msr, config); | ||
815 | config &= ~((1ULL << uncore_num_counters(box)) - 1); | ||
816 | /* WBox has a fixed counter */ | ||
817 | if (uncore_msr_fixed_ctl(box)) | ||
818 | config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN; | ||
819 | wrmsrl(msr, config); | ||
820 | } | ||
821 | } | ||
822 | |||
823 | static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box) | ||
824 | { | ||
825 | unsigned msr = uncore_msr_box_ctl(box); | ||
826 | u64 config; | ||
827 | |||
828 | if (msr) { | ||
829 | rdmsrl(msr, config); | ||
830 | config |= (1ULL << uncore_num_counters(box)) - 1; | ||
831 | /* WBox has a fixed counter */ | ||
832 | if (uncore_msr_fixed_ctl(box)) | ||
833 | config |= NHMEX_W_PMON_GLOBAL_FIXED_EN; | ||
834 | wrmsrl(msr, config); | ||
835 | } | ||
836 | } | ||
837 | |||
838 | static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
839 | { | ||
840 | wrmsrl(event->hw.config_base, 0); | ||
841 | } | ||
842 | |||
843 | static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
844 | { | ||
845 | struct hw_perf_event *hwc = &event->hw; | ||
846 | |||
847 | if (hwc->idx >= UNCORE_PMC_IDX_FIXED) | ||
848 | wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0); | ||
849 | else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0) | ||
850 | wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); | ||
851 | else | ||
852 | wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); | ||
853 | } | ||
854 | |||
855 | #define NHMEX_UNCORE_OPS_COMMON_INIT() \ | ||
856 | .init_box = nhmex_uncore_msr_init_box, \ | ||
857 | .disable_box = nhmex_uncore_msr_disable_box, \ | ||
858 | .enable_box = nhmex_uncore_msr_enable_box, \ | ||
859 | .disable_event = nhmex_uncore_msr_disable_event, \ | ||
860 | .read_counter = uncore_msr_read_counter | ||
861 | |||
862 | static struct intel_uncore_ops nhmex_uncore_ops = { | ||
863 | NHMEX_UNCORE_OPS_COMMON_INIT(), | ||
864 | .enable_event = nhmex_uncore_msr_enable_event, | ||
865 | }; | ||
866 | |||
867 | static struct attribute *nhmex_uncore_ubox_formats_attr[] = { | ||
868 | &format_attr_event.attr, | ||
869 | &format_attr_edge.attr, | ||
870 | NULL, | ||
871 | }; | ||
872 | |||
873 | static struct attribute_group nhmex_uncore_ubox_format_group = { | ||
874 | .name = "format", | ||
875 | .attrs = nhmex_uncore_ubox_formats_attr, | ||
876 | }; | ||
877 | |||
878 | static struct intel_uncore_type nhmex_uncore_ubox = { | ||
879 | .name = "ubox", | ||
880 | .num_counters = 1, | ||
881 | .num_boxes = 1, | ||
882 | .perf_ctr_bits = 48, | ||
883 | .event_ctl = NHMEX_U_MSR_PMON_EV_SEL, | ||
884 | .perf_ctr = NHMEX_U_MSR_PMON_CTR, | ||
885 | .event_mask = NHMEX_U_PMON_RAW_EVENT_MASK, | ||
886 | .box_ctl = NHMEX_U_MSR_PMON_GLOBAL_CTL, | ||
887 | .ops = &nhmex_uncore_ops, | ||
888 | .format_group = &nhmex_uncore_ubox_format_group | ||
889 | }; | ||
890 | |||
891 | static struct attribute *nhmex_uncore_cbox_formats_attr[] = { | ||
892 | &format_attr_event.attr, | ||
893 | &format_attr_umask.attr, | ||
894 | &format_attr_edge.attr, | ||
895 | &format_attr_inv.attr, | ||
896 | &format_attr_thresh8.attr, | ||
897 | NULL, | ||
898 | }; | ||
899 | |||
900 | static struct attribute_group nhmex_uncore_cbox_format_group = { | ||
901 | .name = "format", | ||
902 | .attrs = nhmex_uncore_cbox_formats_attr, | ||
903 | }; | ||
904 | |||
905 | static struct intel_uncore_type nhmex_uncore_cbox = { | ||
906 | .name = "cbox", | ||
907 | .num_counters = 6, | ||
908 | .num_boxes = 8, | ||
909 | .perf_ctr_bits = 48, | ||
910 | .event_ctl = NHMEX_C0_MSR_PMON_EV_SEL0, | ||
911 | .perf_ctr = NHMEX_C0_MSR_PMON_CTR0, | ||
912 | .event_mask = NHMEX_PMON_RAW_EVENT_MASK, | ||
913 | .box_ctl = NHMEX_C0_MSR_PMON_GLOBAL_CTL, | ||
914 | .msr_offset = NHMEX_C_MSR_OFFSET, | ||
915 | .pair_ctr_ctl = 1, | ||
916 | .ops = &nhmex_uncore_ops, | ||
917 | .format_group = &nhmex_uncore_cbox_format_group | ||
918 | }; | ||
919 | |||
920 | static struct uncore_event_desc nhmex_uncore_wbox_events[] = { | ||
921 | INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"), | ||
922 | { /* end: all zeroes */ }, | ||
923 | }; | ||
924 | |||
925 | static struct intel_uncore_type nhmex_uncore_wbox = { | ||
926 | .name = "wbox", | ||
927 | .num_counters = 4, | ||
928 | .num_boxes = 1, | ||
929 | .perf_ctr_bits = 48, | ||
930 | .event_ctl = NHMEX_W_MSR_PMON_CNT0, | ||
931 | .perf_ctr = NHMEX_W_MSR_PMON_EVT_SEL0, | ||
932 | .fixed_ctr = NHMEX_W_MSR_PMON_FIXED_CTR, | ||
933 | .fixed_ctl = NHMEX_W_MSR_PMON_FIXED_CTL, | ||
934 | .event_mask = NHMEX_PMON_RAW_EVENT_MASK, | ||
935 | .box_ctl = NHMEX_W_MSR_GLOBAL_CTL, | ||
936 | .pair_ctr_ctl = 1, | ||
937 | .event_descs = nhmex_uncore_wbox_events, | ||
938 | .ops = &nhmex_uncore_ops, | ||
939 | .format_group = &nhmex_uncore_cbox_format_group | ||
940 | }; | ||
941 | |||
942 | static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) | ||
943 | { | ||
944 | struct hw_perf_event *hwc = &event->hw; | ||
945 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; | ||
946 | struct hw_perf_event_extra *reg2 = &hwc->branch_reg; | ||
947 | int ctr, ev_sel; | ||
948 | |||
949 | ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >> | ||
950 | NHMEX_B_PMON_CTR_SHIFT; | ||
951 | ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >> | ||
952 | NHMEX_B_PMON_CTL_EV_SEL_SHIFT; | ||
953 | |||
954 | /* events that do not use the match/mask registers */ | ||
955 | if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) || | ||
956 | (ctr == 2 && ev_sel != 0x4) || ctr == 3) | ||
957 | return 0; | ||
958 | |||
959 | if (box->pmu->pmu_idx == 0) | ||
960 | reg1->reg = NHMEX_B0_MSR_MATCH; | ||
961 | else | ||
962 | reg1->reg = NHMEX_B1_MSR_MATCH; | ||
963 | reg1->idx = 0; | ||
964 | reg1->config = event->attr.config1; | ||
965 | reg2->config = event->attr.config2; | ||
966 | return 0; | ||
967 | } | ||
968 | |||
969 | static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
970 | { | ||
971 | struct hw_perf_event *hwc = &event->hw; | ||
972 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; | ||
973 | struct hw_perf_event_extra *reg2 = &hwc->branch_reg; | ||
974 | |||
975 | if (reg1->idx != EXTRA_REG_NONE) { | ||
976 | wrmsrl(reg1->reg, reg1->config); | ||
977 | wrmsrl(reg1->reg + 1, reg2->config); | ||
978 | } | ||
979 | wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | | ||
980 | (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK)); | ||
981 | } | ||
982 | |||
983 | /* | ||
984 | * The Bbox has 4 counters, but each counter monitors different events. | ||
985 | * Use bits 6-7 in the event config to select counter. | ||
986 | */ | ||
987 | static struct event_constraint nhmex_uncore_bbox_constraints[] = { | ||
988 | EVENT_CONSTRAINT(0 , 1, 0xc0), | ||
989 | EVENT_CONSTRAINT(0x40, 2, 0xc0), | ||
990 | EVENT_CONSTRAINT(0x80, 4, 0xc0), | ||
991 | EVENT_CONSTRAINT(0xc0, 8, 0xc0), | ||
992 | EVENT_CONSTRAINT_END, | ||
993 | }; | ||
994 | |||
995 | static struct attribute *nhmex_uncore_bbox_formats_attr[] = { | ||
996 | &format_attr_event5.attr, | ||
997 | &format_attr_counter.attr, | ||
998 | &format_attr_match.attr, | ||
999 | &format_attr_mask.attr, | ||
1000 | NULL, | ||
1001 | }; | ||
1002 | |||
1003 | static struct attribute_group nhmex_uncore_bbox_format_group = { | ||
1004 | .name = "format", | ||
1005 | .attrs = nhmex_uncore_bbox_formats_attr, | ||
1006 | }; | ||
1007 | |||
1008 | static struct intel_uncore_ops nhmex_uncore_bbox_ops = { | ||
1009 | NHMEX_UNCORE_OPS_COMMON_INIT(), | ||
1010 | .enable_event = nhmex_bbox_msr_enable_event, | ||
1011 | .hw_config = nhmex_bbox_hw_config, | ||
1012 | .get_constraint = uncore_get_constraint, | ||
1013 | .put_constraint = uncore_put_constraint, | ||
1014 | }; | ||
1015 | |||
1016 | static struct intel_uncore_type nhmex_uncore_bbox = { | ||
1017 | .name = "bbox", | ||
1018 | .num_counters = 4, | ||
1019 | .num_boxes = 2, | ||
1020 | .perf_ctr_bits = 48, | ||
1021 | .event_ctl = NHMEX_B0_MSR_PMON_CTL0, | ||
1022 | .perf_ctr = NHMEX_B0_MSR_PMON_CTR0, | ||
1023 | .event_mask = NHMEX_B_PMON_RAW_EVENT_MASK, | ||
1024 | .box_ctl = NHMEX_B0_MSR_PMON_GLOBAL_CTL, | ||
1025 | .msr_offset = NHMEX_B_MSR_OFFSET, | ||
1026 | .pair_ctr_ctl = 1, | ||
1027 | .num_shared_regs = 1, | ||
1028 | .constraints = nhmex_uncore_bbox_constraints, | ||
1029 | .ops = &nhmex_uncore_bbox_ops, | ||
1030 | .format_group = &nhmex_uncore_bbox_format_group | ||
1031 | }; | ||
1032 | |||
1033 | static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) | ||
1034 | { | ||
1035 | struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; | ||
1036 | struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; | ||
1037 | |||
1038 | if (event->attr.config & NHMEX_S_PMON_MM_CFG_EN) { | ||
1039 | reg1->config = event->attr.config1; | ||
1040 | reg2->config = event->attr.config2; | ||
1041 | } else { | ||
1042 | reg1->config = ~0ULL; | ||
1043 | reg2->config = ~0ULL; | ||
1044 | } | ||
1045 | |||
1046 | if (box->pmu->pmu_idx == 0) | ||
1047 | reg1->reg = NHMEX_S0_MSR_MM_CFG; | ||
1048 | else | ||
1049 | reg1->reg = NHMEX_S1_MSR_MM_CFG; | ||
1050 | |||
1051 | reg1->idx = 0; | ||
1052 | |||
1053 | return 0; | ||
1054 | } | ||
1055 | |||
1056 | static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
1057 | { | ||
1058 | struct hw_perf_event *hwc = &event->hw; | ||
1059 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; | ||
1060 | struct hw_perf_event_extra *reg2 = &hwc->branch_reg; | ||
1061 | |||
1062 | wrmsrl(reg1->reg, 0); | ||
1063 | if (reg1->config != ~0ULL || reg2->config != ~0ULL) { | ||
1064 | wrmsrl(reg1->reg + 1, reg1->config); | ||
1065 | wrmsrl(reg1->reg + 2, reg2->config); | ||
1066 | wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN); | ||
1067 | } | ||
1068 | wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); | ||
1069 | } | ||
1070 | |||
1071 | static struct attribute *nhmex_uncore_sbox_formats_attr[] = { | ||
1072 | &format_attr_event.attr, | ||
1073 | &format_attr_umask.attr, | ||
1074 | &format_attr_edge.attr, | ||
1075 | &format_attr_inv.attr, | ||
1076 | &format_attr_thresh8.attr, | ||
1077 | &format_attr_mm_cfg.attr, | ||
1078 | &format_attr_match.attr, | ||
1079 | &format_attr_mask.attr, | ||
1080 | NULL, | ||
1081 | }; | ||
1082 | |||
1083 | static struct attribute_group nhmex_uncore_sbox_format_group = { | ||
1084 | .name = "format", | ||
1085 | .attrs = nhmex_uncore_sbox_formats_attr, | ||
1086 | }; | ||
1087 | |||
1088 | static struct intel_uncore_ops nhmex_uncore_sbox_ops = { | ||
1089 | NHMEX_UNCORE_OPS_COMMON_INIT(), | ||
1090 | .enable_event = nhmex_sbox_msr_enable_event, | ||
1091 | .hw_config = nhmex_sbox_hw_config, | ||
1092 | .get_constraint = uncore_get_constraint, | ||
1093 | .put_constraint = uncore_put_constraint, | ||
1094 | }; | ||
1095 | |||
1096 | static struct intel_uncore_type nhmex_uncore_sbox = { | ||
1097 | .name = "sbox", | ||
1098 | .num_counters = 4, | ||
1099 | .num_boxes = 2, | ||
1100 | .perf_ctr_bits = 48, | ||
1101 | .event_ctl = NHMEX_S0_MSR_PMON_CTL0, | ||
1102 | .perf_ctr = NHMEX_S0_MSR_PMON_CTR0, | ||
1103 | .event_mask = NHMEX_PMON_RAW_EVENT_MASK, | ||
1104 | .box_ctl = NHMEX_S0_MSR_PMON_GLOBAL_CTL, | ||
1105 | .msr_offset = NHMEX_S_MSR_OFFSET, | ||
1106 | .pair_ctr_ctl = 1, | ||
1107 | .num_shared_regs = 1, | ||
1108 | .ops = &nhmex_uncore_sbox_ops, | ||
1109 | .format_group = &nhmex_uncore_sbox_format_group | ||
1110 | }; | ||
1111 | |||
1112 | enum { | ||
1113 | EXTRA_REG_NHMEX_M_FILTER, | ||
1114 | EXTRA_REG_NHMEX_M_DSP, | ||
1115 | EXTRA_REG_NHMEX_M_ISS, | ||
1116 | EXTRA_REG_NHMEX_M_MAP, | ||
1117 | EXTRA_REG_NHMEX_M_MSC_THR, | ||
1118 | EXTRA_REG_NHMEX_M_PGT, | ||
1119 | EXTRA_REG_NHMEX_M_PLD, | ||
1120 | EXTRA_REG_NHMEX_M_ZDP_CTL_FVC, | ||
1121 | }; | ||
1122 | |||
1123 | static struct extra_reg nhmex_uncore_mbox_extra_regs[] = { | ||
1124 | MBOX_INC_SEL_EXTAR_REG(0x0, DSP), | ||
1125 | MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR), | ||
1126 | MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR), | ||
1127 | MBOX_INC_SEL_EXTAR_REG(0x9, ISS), | ||
1128 | /* event 0xa uses two extra registers */ | ||
1129 | MBOX_INC_SEL_EXTAR_REG(0xa, ISS), | ||
1130 | MBOX_INC_SEL_EXTAR_REG(0xa, PLD), | ||
1131 | MBOX_INC_SEL_EXTAR_REG(0xb, PLD), | ||
1132 | /* events 0xd ~ 0x10 use the same extra register */ | ||
1133 | MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC), | ||
1134 | MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC), | ||
1135 | MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC), | ||
1136 | MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC), | ||
1137 | MBOX_INC_SEL_EXTAR_REG(0x16, PGT), | ||
1138 | MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP), | ||
1139 | MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS), | ||
1140 | MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT), | ||
1141 | MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP), | ||
1142 | EVENT_EXTRA_END | ||
1143 | }; | ||
1144 | |||
1145 | static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config) | ||
1146 | { | ||
1147 | struct intel_uncore_extra_reg *er; | ||
1148 | unsigned long flags; | ||
1149 | bool ret = false; | ||
1150 | u64 mask; | ||
1151 | |||
1152 | if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { | ||
1153 | er = &box->shared_regs[idx]; | ||
1154 | raw_spin_lock_irqsave(&er->lock, flags); | ||
1155 | if (!atomic_read(&er->ref) || er->config == config) { | ||
1156 | atomic_inc(&er->ref); | ||
1157 | er->config = config; | ||
1158 | ret = true; | ||
1159 | } | ||
1160 | raw_spin_unlock_irqrestore(&er->lock, flags); | ||
1161 | |||
1162 | return ret; | ||
1163 | } | ||
1164 | /* | ||
1165 | * The ZDP_CTL_FVC MSR has 4 fields which are used to control | ||
1166 | * events 0xd ~ 0x10. Besides these 4 fields, there are additional | ||
1167 | * fields which are shared. | ||
1168 | */ | ||
1169 | idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; | ||
1170 | if (WARN_ON_ONCE(idx >= 4)) | ||
1171 | return false; | ||
1172 | |||
1173 | /* mask of the shared fields */ | ||
1174 | mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK; | ||
1175 | er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; | ||
1176 | |||
1177 | raw_spin_lock_irqsave(&er->lock, flags); | ||
1178 | /* add mask of the non-shared field if it's in use */ | ||
1179 | if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) | ||
1180 | mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); | ||
1181 | |||
1182 | if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) { | ||
1183 | atomic_add(1 << (idx * 8), &er->ref); | ||
1184 | mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK | | ||
1185 | NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); | ||
1186 | er->config &= ~mask; | ||
1187 | er->config |= (config & mask); | ||
1188 | ret = true; | ||
1189 | } | ||
1190 | raw_spin_unlock_irqrestore(&er->lock, flags); | ||
1191 | |||
1192 | return ret; | ||
1193 | } | ||
1194 | |||
1195 | static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx) | ||
1196 | { | ||
1197 | struct intel_uncore_extra_reg *er; | ||
1198 | |||
1199 | if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { | ||
1200 | er = &box->shared_regs[idx]; | ||
1201 | atomic_dec(&er->ref); | ||
1202 | return; | ||
1203 | } | ||
1204 | |||
1205 | idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; | ||
1206 | er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; | ||
1207 | atomic_sub(1 << (idx * 8), &er->ref); | ||
1208 | } | ||
1209 | |||
1210 | u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify) | ||
1211 | { | ||
1212 | struct hw_perf_event *hwc = &event->hw; | ||
1213 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; | ||
1214 | int idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8); | ||
1215 | u64 config = reg1->config; | ||
1216 | |||
1217 | /* get the non-shared control bits and shift them */ | ||
1218 | idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; | ||
1219 | config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx); | ||
1220 | if (new_idx > orig_idx) { | ||
1221 | idx = new_idx - orig_idx; | ||
1222 | config <<= 3 * idx; | ||
1223 | } else { | ||
1224 | idx = orig_idx - new_idx; | ||
1225 | config >>= 3 * idx; | ||
1226 | } | ||
1227 | |||
1228 | /* add the shared control bits back */ | ||
1229 | config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config; | ||
1230 | if (modify) { | ||
1231 | /* adjust the main event selector */ | ||
1232 | if (new_idx > orig_idx) | ||
1233 | hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT; | ||
1234 | else | ||
1235 | hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT; | ||
1236 | reg1->config = config; | ||
1237 | reg1->idx = ~0xff | new_idx; | ||
1238 | } | ||
1239 | return config; | ||
1240 | } | ||
1241 | |||
1242 | static struct event_constraint * | ||
1243 | nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) | ||
1244 | { | ||
1245 | struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; | ||
1246 | struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; | ||
1247 | int i, idx[2], alloc = 0; | ||
1248 | u64 config1 = reg1->config; | ||
1249 | |||
1250 | idx[0] = __BITS_VALUE(reg1->idx, 0, 8); | ||
1251 | idx[1] = __BITS_VALUE(reg1->idx, 1, 8); | ||
1252 | again: | ||
1253 | for (i = 0; i < 2; i++) { | ||
1254 | if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i))) | ||
1255 | idx[i] = 0xff; | ||
1256 | |||
1257 | if (idx[i] == 0xff) | ||
1258 | continue; | ||
1259 | |||
1260 | if (!nhmex_mbox_get_shared_reg(box, idx[i], | ||
1261 | __BITS_VALUE(config1, i, 32))) | ||
1262 | goto fail; | ||
1263 | alloc |= (0x1 << i); | ||
1264 | } | ||
1265 | |||
1266 | /* for the match/mask registers */ | ||
1267 | if ((uncore_box_is_fake(box) || !reg2->alloc) && | ||
1268 | !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config)) | ||
1269 | goto fail; | ||
1270 | |||
1271 | /* | ||
1272 | * If it's a fake box -- as per validate_{group,event}() we | ||
1273 | * shouldn't touch event state and we can avoid doing so | ||
1274 | * since both will only call get_event_constraints() once | ||
1275 | * on each event, this avoids the need for reg->alloc. | ||
1276 | */ | ||
1277 | if (!uncore_box_is_fake(box)) { | ||
1278 | if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) | ||
1279 | nhmex_mbox_alter_er(event, idx[0], true); | ||
1280 | reg1->alloc |= alloc; | ||
1281 | reg2->alloc = 1; | ||
1282 | } | ||
1283 | return NULL; | ||
1284 | fail: | ||
1285 | if (idx[0] != 0xff && !(alloc & 0x1) && | ||
1286 | idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) { | ||
1287 | /* | ||
1288 | * events 0xd ~ 0x10 are functional identical, but are | ||
1289 | * controlled by different fields in the ZDP_CTL_FVC | ||
1290 | * register. If we failed to take one field, try the | ||
1291 | * rest 3 choices. | ||
1292 | */ | ||
1293 | BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff); | ||
1294 | idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; | ||
1295 | idx[0] = (idx[0] + 1) % 4; | ||
1296 | idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC; | ||
1297 | if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) { | ||
1298 | config1 = nhmex_mbox_alter_er(event, idx[0], false); | ||
1299 | goto again; | ||
1300 | } | ||
1301 | } | ||
1302 | |||
1303 | if (alloc & 0x1) | ||
1304 | nhmex_mbox_put_shared_reg(box, idx[0]); | ||
1305 | if (alloc & 0x2) | ||
1306 | nhmex_mbox_put_shared_reg(box, idx[1]); | ||
1307 | return &constraint_empty; | ||
1308 | } | ||
1309 | |||
1310 | static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) | ||
1311 | { | ||
1312 | struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; | ||
1313 | struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; | ||
1314 | |||
1315 | if (uncore_box_is_fake(box)) | ||
1316 | return; | ||
1317 | |||
1318 | if (reg1->alloc & 0x1) | ||
1319 | nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8)); | ||
1320 | if (reg1->alloc & 0x2) | ||
1321 | nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8)); | ||
1322 | reg1->alloc = 0; | ||
1323 | |||
1324 | if (reg2->alloc) { | ||
1325 | nhmex_mbox_put_shared_reg(box, reg2->idx); | ||
1326 | reg2->alloc = 0; | ||
1327 | } | ||
1328 | } | ||
1329 | |||
1330 | static int nhmex_mbox_extra_reg_idx(struct extra_reg *er) | ||
1331 | { | ||
1332 | if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) | ||
1333 | return er->idx; | ||
1334 | return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd; | ||
1335 | } | ||
1336 | |||
1337 | static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) | ||
1338 | { | ||
1339 | struct intel_uncore_type *type = box->pmu->type; | ||
1340 | struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; | ||
1341 | struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; | ||
1342 | struct extra_reg *er; | ||
1343 | unsigned msr; | ||
1344 | int reg_idx = 0; | ||
1345 | |||
1346 | if (WARN_ON_ONCE(reg1->idx != -1)) | ||
1347 | return -EINVAL; | ||
1348 | /* | ||
1349 | * The mbox events may require 2 extra MSRs at the most. But only | ||
1350 | * the lower 32 bits in these MSRs are significant, so we can use | ||
1351 | * config1 to pass two MSRs' config. | ||
1352 | */ | ||
1353 | for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) { | ||
1354 | if (er->event != (event->hw.config & er->config_mask)) | ||
1355 | continue; | ||
1356 | if (event->attr.config1 & ~er->valid_mask) | ||
1357 | return -EINVAL; | ||
1358 | if (er->idx == __BITS_VALUE(reg1->idx, 0, 8) || | ||
1359 | er->idx == __BITS_VALUE(reg1->idx, 1, 8)) | ||
1360 | continue; | ||
1361 | if (WARN_ON_ONCE(reg_idx >= 2)) | ||
1362 | return -EINVAL; | ||
1363 | |||
1364 | msr = er->msr + type->msr_offset * box->pmu->pmu_idx; | ||
1365 | if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff)) | ||
1366 | return -EINVAL; | ||
1367 | |||
1368 | /* always use the 32~63 bits to pass the PLD config */ | ||
1369 | if (er->idx == EXTRA_REG_NHMEX_M_PLD) | ||
1370 | reg_idx = 1; | ||
1371 | |||
1372 | reg1->idx &= ~(0xff << (reg_idx * 8)); | ||
1373 | reg1->reg &= ~(0xffff << (reg_idx * 16)); | ||
1374 | reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8); | ||
1375 | reg1->reg |= msr << (reg_idx * 16); | ||
1376 | reg1->config = event->attr.config1; | ||
1377 | reg_idx++; | ||
1378 | } | ||
1379 | /* use config2 to pass the filter config */ | ||
1380 | reg2->idx = EXTRA_REG_NHMEX_M_FILTER; | ||
1381 | if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN) | ||
1382 | reg2->config = event->attr.config2; | ||
1383 | else | ||
1384 | reg2->config = ~0ULL; | ||
1385 | if (box->pmu->pmu_idx == 0) | ||
1386 | reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG; | ||
1387 | else | ||
1388 | reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG; | ||
1389 | |||
1390 | return 0; | ||
1391 | } | ||
1392 | |||
1393 | static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx) | ||
1394 | { | ||
1395 | struct intel_uncore_extra_reg *er; | ||
1396 | unsigned long flags; | ||
1397 | u64 config; | ||
1398 | |||
1399 | if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) | ||
1400 | return box->shared_regs[idx].config; | ||
1401 | |||
1402 | er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC]; | ||
1403 | raw_spin_lock_irqsave(&er->lock, flags); | ||
1404 | config = er->config; | ||
1405 | raw_spin_unlock_irqrestore(&er->lock, flags); | ||
1406 | return config; | ||
1407 | } | ||
1408 | |||
1409 | static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
1410 | { | ||
1411 | struct hw_perf_event *hwc = &event->hw; | ||
1412 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; | ||
1413 | struct hw_perf_event_extra *reg2 = &hwc->branch_reg; | ||
1414 | int idx; | ||
1415 | |||
1416 | idx = __BITS_VALUE(reg1->idx, 0, 8); | ||
1417 | if (idx != 0xff) | ||
1418 | wrmsrl(__BITS_VALUE(reg1->reg, 0, 16), | ||
1419 | nhmex_mbox_shared_reg_config(box, idx)); | ||
1420 | idx = __BITS_VALUE(reg1->idx, 1, 8); | ||
1421 | if (idx != 0xff) | ||
1422 | wrmsrl(__BITS_VALUE(reg1->reg, 1, 16), | ||
1423 | nhmex_mbox_shared_reg_config(box, idx)); | ||
1424 | |||
1425 | wrmsrl(reg2->reg, 0); | ||
1426 | if (reg2->config != ~0ULL) { | ||
1427 | wrmsrl(reg2->reg + 1, | ||
1428 | reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK); | ||
1429 | wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK & | ||
1430 | (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT)); | ||
1431 | wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN); | ||
1432 | } | ||
1433 | |||
1434 | wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); | ||
1435 | } | ||
1436 | |||
1437 | DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3"); | ||
1438 | DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode, "config:4-5"); | ||
1439 | DEFINE_UNCORE_FORMAT_ATTR(wrap_mode, wrap_mode, "config:6"); | ||
1440 | DEFINE_UNCORE_FORMAT_ATTR(flag_mode, flag_mode, "config:7"); | ||
1441 | DEFINE_UNCORE_FORMAT_ATTR(inc_sel, inc_sel, "config:9-13"); | ||
1442 | DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel, set_flag_sel, "config:19-21"); | ||
1443 | DEFINE_UNCORE_FORMAT_ATTR(filter_cfg, filter_cfg, "config2:63"); | ||
1444 | DEFINE_UNCORE_FORMAT_ATTR(filter_match, filter_match, "config2:0-33"); | ||
1445 | DEFINE_UNCORE_FORMAT_ATTR(filter_mask, filter_mask, "config2:34-61"); | ||
1446 | DEFINE_UNCORE_FORMAT_ATTR(dsp, dsp, "config1:0-31"); | ||
1447 | DEFINE_UNCORE_FORMAT_ATTR(thr, thr, "config1:0-31"); | ||
1448 | DEFINE_UNCORE_FORMAT_ATTR(fvc, fvc, "config1:0-31"); | ||
1449 | DEFINE_UNCORE_FORMAT_ATTR(pgt, pgt, "config1:0-31"); | ||
1450 | DEFINE_UNCORE_FORMAT_ATTR(map, map, "config1:0-31"); | ||
1451 | DEFINE_UNCORE_FORMAT_ATTR(iss, iss, "config1:0-31"); | ||
1452 | DEFINE_UNCORE_FORMAT_ATTR(pld, pld, "config1:32-63"); | ||
1453 | |||
1454 | static struct attribute *nhmex_uncore_mbox_formats_attr[] = { | ||
1455 | &format_attr_count_mode.attr, | ||
1456 | &format_attr_storage_mode.attr, | ||
1457 | &format_attr_wrap_mode.attr, | ||
1458 | &format_attr_flag_mode.attr, | ||
1459 | &format_attr_inc_sel.attr, | ||
1460 | &format_attr_set_flag_sel.attr, | ||
1461 | &format_attr_filter_cfg.attr, | ||
1462 | &format_attr_filter_match.attr, | ||
1463 | &format_attr_filter_mask.attr, | ||
1464 | &format_attr_dsp.attr, | ||
1465 | &format_attr_thr.attr, | ||
1466 | &format_attr_fvc.attr, | ||
1467 | &format_attr_pgt.attr, | ||
1468 | &format_attr_map.attr, | ||
1469 | &format_attr_iss.attr, | ||
1470 | &format_attr_pld.attr, | ||
1471 | NULL, | ||
1472 | }; | ||
1473 | |||
1474 | static struct attribute_group nhmex_uncore_mbox_format_group = { | ||
1475 | .name = "format", | ||
1476 | .attrs = nhmex_uncore_mbox_formats_attr, | ||
1477 | }; | ||
1478 | |||
1479 | static struct uncore_event_desc nhmex_uncore_mbox_events[] = { | ||
1480 | INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"), | ||
1481 | INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"), | ||
1482 | { /* end: all zeroes */ }, | ||
1483 | }; | ||
1484 | |||
1485 | static struct intel_uncore_ops nhmex_uncore_mbox_ops = { | ||
1486 | NHMEX_UNCORE_OPS_COMMON_INIT(), | ||
1487 | .enable_event = nhmex_mbox_msr_enable_event, | ||
1488 | .hw_config = nhmex_mbox_hw_config, | ||
1489 | .get_constraint = nhmex_mbox_get_constraint, | ||
1490 | .put_constraint = nhmex_mbox_put_constraint, | ||
1491 | }; | ||
1492 | |||
1493 | static struct intel_uncore_type nhmex_uncore_mbox = { | ||
1494 | .name = "mbox", | ||
1495 | .num_counters = 6, | ||
1496 | .num_boxes = 2, | ||
1497 | .perf_ctr_bits = 48, | ||
1498 | .event_ctl = NHMEX_M0_MSR_PMU_CTL0, | ||
1499 | .perf_ctr = NHMEX_M0_MSR_PMU_CNT0, | ||
1500 | .event_mask = NHMEX_M_PMON_RAW_EVENT_MASK, | ||
1501 | .box_ctl = NHMEX_M0_MSR_GLOBAL_CTL, | ||
1502 | .msr_offset = NHMEX_M_MSR_OFFSET, | ||
1503 | .pair_ctr_ctl = 1, | ||
1504 | .num_shared_regs = 8, | ||
1505 | .event_descs = nhmex_uncore_mbox_events, | ||
1506 | .ops = &nhmex_uncore_mbox_ops, | ||
1507 | .format_group = &nhmex_uncore_mbox_format_group, | ||
1508 | }; | ||
1509 | |||
1510 | void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) | ||
1511 | { | ||
1512 | struct hw_perf_event *hwc = &event->hw; | ||
1513 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; | ||
1514 | int port; | ||
1515 | |||
1516 | /* adjust the main event selector */ | ||
1517 | if (reg1->idx % 2) { | ||
1518 | reg1->idx--; | ||
1519 | hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT; | ||
1520 | } else { | ||
1521 | reg1->idx++; | ||
1522 | hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT; | ||
1523 | } | ||
1524 | |||
1525 | /* adjust address or config of extra register */ | ||
1526 | port = reg1->idx / 6 + box->pmu->pmu_idx * 4; | ||
1527 | switch (reg1->idx % 6) { | ||
1528 | case 0: | ||
1529 | reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG0(port); | ||
1530 | break; | ||
1531 | case 1: | ||
1532 | reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG1(port); | ||
1533 | break; | ||
1534 | case 2: | ||
1535 | /* the 8~15 bits to the 0~7 bits */ | ||
1536 | reg1->config >>= 8; | ||
1537 | break; | ||
1538 | case 3: | ||
1539 | /* the 0~7 bits to the 8~15 bits */ | ||
1540 | reg1->config <<= 8; | ||
1541 | break; | ||
1542 | case 4: | ||
1543 | reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port); | ||
1544 | break; | ||
1545 | case 5: | ||
1546 | reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port); | ||
1547 | break; | ||
1548 | }; | ||
1549 | } | ||
1550 | |||
1551 | /* | ||
1552 | * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7. | ||
1553 | * An event set consists of 6 events, the 3rd and 4th events in | ||
1554 | * an event set use the same extra register. So an event set uses | ||
1555 | * 5 extra registers. | ||
1556 | */ | ||
1557 | static struct event_constraint * | ||
1558 | nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event) | ||
1559 | { | ||
1560 | struct hw_perf_event *hwc = &event->hw; | ||
1561 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; | ||
1562 | struct hw_perf_event_extra *reg2 = &hwc->branch_reg; | ||
1563 | struct intel_uncore_extra_reg *er; | ||
1564 | unsigned long flags; | ||
1565 | int idx, er_idx; | ||
1566 | u64 config1; | ||
1567 | bool ok = false; | ||
1568 | |||
1569 | if (!uncore_box_is_fake(box) && reg1->alloc) | ||
1570 | return NULL; | ||
1571 | |||
1572 | idx = reg1->idx % 6; | ||
1573 | config1 = reg1->config; | ||
1574 | again: | ||
1575 | er_idx = idx; | ||
1576 | /* the 3rd and 4th events use the same extra register */ | ||
1577 | if (er_idx > 2) | ||
1578 | er_idx--; | ||
1579 | er_idx += (reg1->idx / 6) * 5; | ||
1580 | |||
1581 | er = &box->shared_regs[er_idx]; | ||
1582 | raw_spin_lock_irqsave(&er->lock, flags); | ||
1583 | if (idx < 2) { | ||
1584 | if (!atomic_read(&er->ref) || er->config == reg1->config) { | ||
1585 | atomic_inc(&er->ref); | ||
1586 | er->config = reg1->config; | ||
1587 | ok = true; | ||
1588 | } | ||
1589 | } else if (idx == 2 || idx == 3) { | ||
1590 | /* | ||
1591 | * these two events use different fields in a extra register, | ||
1592 | * the 0~7 bits and the 8~15 bits respectively. | ||
1593 | */ | ||
1594 | u64 mask = 0xff << ((idx - 2) * 8); | ||
1595 | if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) || | ||
1596 | !((er->config ^ config1) & mask)) { | ||
1597 | atomic_add(1 << ((idx - 2) * 8), &er->ref); | ||
1598 | er->config &= ~mask; | ||
1599 | er->config |= config1 & mask; | ||
1600 | ok = true; | ||
1601 | } | ||
1602 | } else { | ||
1603 | if (!atomic_read(&er->ref) || | ||
1604 | (er->config == (hwc->config >> 32) && | ||
1605 | er->config1 == reg1->config && | ||
1606 | er->config2 == reg2->config)) { | ||
1607 | atomic_inc(&er->ref); | ||
1608 | er->config = (hwc->config >> 32); | ||
1609 | er->config1 = reg1->config; | ||
1610 | er->config2 = reg2->config; | ||
1611 | ok = true; | ||
1612 | } | ||
1613 | } | ||
1614 | raw_spin_unlock_irqrestore(&er->lock, flags); | ||
1615 | |||
1616 | if (!ok) { | ||
1617 | /* | ||
1618 | * The Rbox events are always in pairs. The paired | ||
1619 | * events are functional identical, but use different | ||
1620 | * extra registers. If we failed to take an extra | ||
1621 | * register, try the alternative. | ||
1622 | */ | ||
1623 | if (idx % 2) | ||
1624 | idx--; | ||
1625 | else | ||
1626 | idx++; | ||
1627 | if (idx != reg1->idx % 6) { | ||
1628 | if (idx == 2) | ||
1629 | config1 >>= 8; | ||
1630 | else if (idx == 3) | ||
1631 | config1 <<= 8; | ||
1632 | goto again; | ||
1633 | } | ||
1634 | } else { | ||
1635 | if (!uncore_box_is_fake(box)) { | ||
1636 | if (idx != reg1->idx % 6) | ||
1637 | nhmex_rbox_alter_er(box, event); | ||
1638 | reg1->alloc = 1; | ||
1639 | } | ||
1640 | return NULL; | ||
1641 | } | ||
1642 | return &constraint_empty; | ||
1643 | } | ||
1644 | |||
1645 | static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event) | ||
1646 | { | ||
1647 | struct intel_uncore_extra_reg *er; | ||
1648 | struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; | ||
1649 | int idx, er_idx; | ||
1650 | |||
1651 | if (uncore_box_is_fake(box) || !reg1->alloc) | ||
1652 | return; | ||
1653 | |||
1654 | idx = reg1->idx % 6; | ||
1655 | er_idx = idx; | ||
1656 | if (er_idx > 2) | ||
1657 | er_idx--; | ||
1658 | er_idx += (reg1->idx / 6) * 5; | ||
1659 | |||
1660 | er = &box->shared_regs[er_idx]; | ||
1661 | if (idx == 2 || idx == 3) | ||
1662 | atomic_sub(1 << ((idx - 2) * 8), &er->ref); | ||
1663 | else | ||
1664 | atomic_dec(&er->ref); | ||
1665 | |||
1666 | reg1->alloc = 0; | ||
1667 | } | ||
1668 | |||
1669 | static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event) | ||
1670 | { | ||
1671 | struct hw_perf_event *hwc = &event->hw; | ||
1672 | struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; | ||
1673 | struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; | ||
1674 | int port, idx; | ||
1675 | |||
1676 | idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >> | ||
1677 | NHMEX_R_PMON_CTL_EV_SEL_SHIFT; | ||
1678 | if (idx >= 0x18) | ||
1679 | return -EINVAL; | ||
1680 | |||
1681 | reg1->idx = idx; | ||
1682 | reg1->config = event->attr.config1; | ||
1683 | |||
1684 | port = idx / 6 + box->pmu->pmu_idx * 4; | ||
1685 | idx %= 6; | ||
1686 | switch (idx) { | ||
1687 | case 0: | ||
1688 | reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG0(port); | ||
1689 | break; | ||
1690 | case 1: | ||
1691 | reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG1(port); | ||
1692 | break; | ||
1693 | case 2: | ||
1694 | case 3: | ||
1695 | reg1->reg = NHMEX_R_MSR_PORTN_QLX_CFG(port); | ||
1696 | break; | ||
1697 | case 4: | ||
1698 | case 5: | ||
1699 | if (idx == 4) | ||
1700 | reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port); | ||
1701 | else | ||
1702 | reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port); | ||
1703 | reg2->config = event->attr.config2; | ||
1704 | hwc->config |= event->attr.config & (~0ULL << 32); | ||
1705 | break; | ||
1706 | }; | ||
1707 | return 0; | ||
1708 | } | ||
1709 | |||
1710 | static u64 nhmex_rbox_shared_reg_config(struct intel_uncore_box *box, int idx) | ||
1711 | { | ||
1712 | struct intel_uncore_extra_reg *er; | ||
1713 | unsigned long flags; | ||
1714 | u64 config; | ||
1715 | |||
1716 | er = &box->shared_regs[idx]; | ||
1717 | |||
1718 | raw_spin_lock_irqsave(&er->lock, flags); | ||
1719 | config = er->config; | ||
1720 | raw_spin_unlock_irqrestore(&er->lock, flags); | ||
1721 | |||
1722 | return config; | ||
1723 | } | ||
1724 | |||
1725 | static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
1726 | { | ||
1727 | struct hw_perf_event *hwc = &event->hw; | ||
1728 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; | ||
1729 | struct hw_perf_event_extra *reg2 = &hwc->branch_reg; | ||
1730 | int idx, er_idx; | ||
1731 | |||
1732 | idx = reg1->idx % 6; | ||
1733 | er_idx = idx; | ||
1734 | if (er_idx > 2) | ||
1735 | er_idx--; | ||
1736 | er_idx += (reg1->idx / 6) * 5; | ||
1737 | |||
1738 | switch (idx) { | ||
1739 | case 0: | ||
1740 | case 1: | ||
1741 | wrmsrl(reg1->reg, reg1->config); | ||
1742 | break; | ||
1743 | case 2: | ||
1744 | case 3: | ||
1745 | wrmsrl(reg1->reg, nhmex_rbox_shared_reg_config(box, er_idx)); | ||
1746 | break; | ||
1747 | case 4: | ||
1748 | case 5: | ||
1749 | wrmsrl(reg1->reg, reg1->config); | ||
1750 | wrmsrl(reg1->reg + 1, hwc->config >> 32); | ||
1751 | wrmsrl(reg1->reg + 2, reg2->config); | ||
1752 | break; | ||
1753 | }; | ||
1754 | |||
1755 | wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | | ||
1756 | (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK)); | ||
1757 | } | ||
1758 | |||
1759 | DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config:32-63"); | ||
1760 | DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config1:0-63"); | ||
1761 | DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63"); | ||
1762 | DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15"); | ||
1763 | DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31"); | ||
1764 | |||
1765 | static struct attribute *nhmex_uncore_rbox_formats_attr[] = { | ||
1766 | &format_attr_event5.attr, | ||
1767 | &format_attr_xbr_mm_cfg.attr, | ||
1768 | &format_attr_xbr_match.attr, | ||
1769 | &format_attr_xbr_mask.attr, | ||
1770 | &format_attr_qlx_cfg.attr, | ||
1771 | &format_attr_iperf_cfg.attr, | ||
1772 | NULL, | ||
1773 | }; | ||
1774 | |||
1775 | static struct attribute_group nhmex_uncore_rbox_format_group = { | ||
1776 | .name = "format", | ||
1777 | .attrs = nhmex_uncore_rbox_formats_attr, | ||
1778 | }; | ||
1779 | |||
1780 | static struct uncore_event_desc nhmex_uncore_rbox_events[] = { | ||
1781 | INTEL_UNCORE_EVENT_DESC(qpi0_flit_send, "event=0x0,iperf_cfg=0x80000000"), | ||
1782 | INTEL_UNCORE_EVENT_DESC(qpi1_filt_send, "event=0x6,iperf_cfg=0x80000000"), | ||
1783 | INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt, "event=0x0,iperf_cfg=0x40000000"), | ||
1784 | INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt, "event=0x6,iperf_cfg=0x40000000"), | ||
1785 | INTEL_UNCORE_EVENT_DESC(qpi0_date_response, "event=0x0,iperf_cfg=0xc4"), | ||
1786 | INTEL_UNCORE_EVENT_DESC(qpi1_date_response, "event=0x6,iperf_cfg=0xc4"), | ||
1787 | { /* end: all zeroes */ }, | ||
1788 | }; | ||
1789 | |||
1790 | static struct intel_uncore_ops nhmex_uncore_rbox_ops = { | ||
1791 | NHMEX_UNCORE_OPS_COMMON_INIT(), | ||
1792 | .enable_event = nhmex_rbox_msr_enable_event, | ||
1793 | .hw_config = nhmex_rbox_hw_config, | ||
1794 | .get_constraint = nhmex_rbox_get_constraint, | ||
1795 | .put_constraint = nhmex_rbox_put_constraint, | ||
1796 | }; | ||
1797 | |||
1798 | static struct intel_uncore_type nhmex_uncore_rbox = { | ||
1799 | .name = "rbox", | ||
1800 | .num_counters = 8, | ||
1801 | .num_boxes = 2, | ||
1802 | .perf_ctr_bits = 48, | ||
1803 | .event_ctl = NHMEX_R_MSR_PMON_CTL0, | ||
1804 | .perf_ctr = NHMEX_R_MSR_PMON_CNT0, | ||
1805 | .event_mask = NHMEX_R_PMON_RAW_EVENT_MASK, | ||
1806 | .box_ctl = NHMEX_R_MSR_GLOBAL_CTL, | ||
1807 | .msr_offset = NHMEX_R_MSR_OFFSET, | ||
1808 | .pair_ctr_ctl = 1, | ||
1809 | .num_shared_regs = 20, | ||
1810 | .event_descs = nhmex_uncore_rbox_events, | ||
1811 | .ops = &nhmex_uncore_rbox_ops, | ||
1812 | .format_group = &nhmex_uncore_rbox_format_group | ||
1813 | }; | ||
1814 | |||
1815 | static struct intel_uncore_type *nhmex_msr_uncores[] = { | ||
1816 | &nhmex_uncore_ubox, | ||
1817 | &nhmex_uncore_cbox, | ||
1818 | &nhmex_uncore_bbox, | ||
1819 | &nhmex_uncore_sbox, | ||
1820 | &nhmex_uncore_mbox, | ||
1821 | &nhmex_uncore_rbox, | ||
1822 | &nhmex_uncore_wbox, | ||
1823 | NULL, | ||
1824 | }; | ||
1825 | /* end of Nehalem-EX uncore support */ | ||
1826 | |||
1827 | static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx) | ||
1828 | { | ||
1829 | struct hw_perf_event *hwc = &event->hw; | ||
1830 | |||
1831 | hwc->idx = idx; | ||
1832 | hwc->last_tag = ++box->tags[idx]; | ||
1833 | |||
1834 | if (hwc->idx == UNCORE_PMC_IDX_FIXED) { | ||
1835 | hwc->event_base = uncore_fixed_ctr(box); | ||
1836 | hwc->config_base = uncore_fixed_ctl(box); | ||
1837 | return; | ||
1838 | } | ||
1839 | |||
1840 | hwc->config_base = uncore_event_ctl(box, hwc->idx); | ||
1841 | hwc->event_base = uncore_perf_ctr(box, hwc->idx); | ||
1842 | } | ||
1843 | |||
1844 | static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) | ||
1845 | { | ||
1846 | u64 prev_count, new_count, delta; | ||
1847 | int shift; | ||
1848 | |||
1849 | if (event->hw.idx >= UNCORE_PMC_IDX_FIXED) | ||
1850 | shift = 64 - uncore_fixed_ctr_bits(box); | ||
1851 | else | ||
1852 | shift = 64 - uncore_perf_ctr_bits(box); | ||
1853 | |||
1854 | /* the hrtimer might modify the previous event value */ | ||
1855 | again: | ||
1856 | prev_count = local64_read(&event->hw.prev_count); | ||
1857 | new_count = uncore_read_counter(box, event); | ||
1858 | if (local64_xchg(&event->hw.prev_count, new_count) != prev_count) | ||
1859 | goto again; | ||
1860 | |||
1861 | delta = (new_count << shift) - (prev_count << shift); | ||
1862 | delta >>= shift; | ||
1863 | |||
1864 | local64_add(delta, &event->count); | ||
1865 | } | ||
1866 | |||
1867 | /* | ||
1868 | * The overflow interrupt is unavailable for SandyBridge-EP, is broken | ||
1869 | * for SandyBridge. So we use hrtimer to periodically poll the counter | ||
1870 | * to avoid overflow. | ||
1871 | */ | ||
1872 | static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) | ||
1873 | { | ||
1874 | struct intel_uncore_box *box; | ||
1875 | unsigned long flags; | ||
1876 | int bit; | ||
1877 | |||
1878 | box = container_of(hrtimer, struct intel_uncore_box, hrtimer); | ||
1879 | if (!box->n_active || box->cpu != smp_processor_id()) | ||
1880 | return HRTIMER_NORESTART; | ||
1881 | /* | ||
1882 | * disable local interrupt to prevent uncore_pmu_event_start/stop | ||
1883 | * to interrupt the update process | ||
1884 | */ | ||
1885 | local_irq_save(flags); | ||
1886 | |||
1887 | for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) | ||
1888 | uncore_perf_event_update(box, box->events[bit]); | ||
1889 | |||
1890 | local_irq_restore(flags); | ||
1891 | |||
1892 | hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL)); | ||
1893 | return HRTIMER_RESTART; | ||
1894 | } | ||
1895 | |||
1896 | static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) | ||
1897 | { | ||
1898 | __hrtimer_start_range_ns(&box->hrtimer, | ||
1899 | ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0, | ||
1900 | HRTIMER_MODE_REL_PINNED, 0); | ||
1901 | } | ||
1902 | |||
1903 | static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) | ||
1904 | { | ||
1905 | hrtimer_cancel(&box->hrtimer); | ||
1906 | } | ||
1907 | |||
1908 | static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) | ||
1909 | { | ||
1910 | hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
1911 | box->hrtimer.function = uncore_pmu_hrtimer; | ||
1912 | } | ||
1913 | |||
1914 | struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu) | ||
1915 | { | ||
1916 | struct intel_uncore_box *box; | ||
1917 | int i, size; | ||
1918 | |||
1919 | size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); | ||
1920 | |||
1921 | box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu)); | ||
1922 | if (!box) | ||
1923 | return NULL; | ||
1924 | |||
1925 | for (i = 0; i < type->num_shared_regs; i++) | ||
1926 | raw_spin_lock_init(&box->shared_regs[i].lock); | ||
1927 | |||
1928 | uncore_pmu_init_hrtimer(box); | ||
1929 | atomic_set(&box->refcnt, 1); | ||
1930 | box->cpu = -1; | ||
1931 | box->phys_id = -1; | ||
1932 | |||
1933 | return box; | ||
1934 | } | ||
1935 | |||
1936 | static struct intel_uncore_box * | ||
1937 | uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) | ||
1938 | { | ||
1939 | static struct intel_uncore_box *box; | ||
1940 | |||
1941 | box = *per_cpu_ptr(pmu->box, cpu); | ||
1942 | if (box) | ||
1943 | return box; | ||
1944 | |||
1945 | raw_spin_lock(&uncore_box_lock); | ||
1946 | list_for_each_entry(box, &pmu->box_list, list) { | ||
1947 | if (box->phys_id == topology_physical_package_id(cpu)) { | ||
1948 | atomic_inc(&box->refcnt); | ||
1949 | *per_cpu_ptr(pmu->box, cpu) = box; | ||
1950 | break; | ||
1951 | } | ||
1952 | } | ||
1953 | raw_spin_unlock(&uncore_box_lock); | ||
1954 | |||
1955 | return *per_cpu_ptr(pmu->box, cpu); | ||
1956 | } | ||
1957 | |||
1958 | static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) | ||
1959 | { | ||
1960 | return container_of(event->pmu, struct intel_uncore_pmu, pmu); | ||
1961 | } | ||
1962 | |||
1963 | static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) | ||
1964 | { | ||
1965 | /* | ||
1966 | * perf core schedules event on the basis of cpu, uncore events are | ||
1967 | * collected by one of the cpus inside a physical package. | ||
1968 | */ | ||
1969 | return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); | ||
1970 | } | ||
1971 | |||
1972 | static int | ||
1973 | uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp) | ||
1974 | { | ||
1975 | struct perf_event *event; | ||
1976 | int n, max_count; | ||
1977 | |||
1978 | max_count = box->pmu->type->num_counters; | ||
1979 | if (box->pmu->type->fixed_ctl) | ||
1980 | max_count++; | ||
1981 | |||
1982 | if (box->n_events >= max_count) | ||
1983 | return -EINVAL; | ||
1984 | |||
1985 | n = box->n_events; | ||
1986 | box->event_list[n] = leader; | ||
1987 | n++; | ||
1988 | if (!dogrp) | ||
1989 | return n; | ||
1990 | |||
1991 | list_for_each_entry(event, &leader->sibling_list, group_entry) { | ||
1992 | if (event->state <= PERF_EVENT_STATE_OFF) | ||
1993 | continue; | ||
1994 | |||
1995 | if (n >= max_count) | ||
1996 | return -EINVAL; | ||
1997 | |||
1998 | box->event_list[n] = event; | ||
1999 | n++; | ||
2000 | } | ||
2001 | return n; | ||
2002 | } | ||
2003 | |||
2004 | static struct event_constraint * | ||
2005 | uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event) | ||
2006 | { | ||
2007 | struct intel_uncore_type *type = box->pmu->type; | ||
2008 | struct event_constraint *c; | ||
2009 | |||
2010 | if (type->ops->get_constraint) { | ||
2011 | c = type->ops->get_constraint(box, event); | ||
2012 | if (c) | ||
2013 | return c; | ||
2014 | } | ||
2015 | |||
2016 | if (event->hw.config == ~0ULL) | ||
2017 | return &constraint_fixed; | ||
2018 | |||
2019 | if (type->constraints) { | ||
2020 | for_each_event_constraint(c, type->constraints) { | ||
2021 | if ((event->hw.config & c->cmask) == c->code) | ||
2022 | return c; | ||
2023 | } | ||
2024 | } | ||
2025 | |||
2026 | return &type->unconstrainted; | ||
2027 | } | ||
2028 | |||
2029 | static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event) | ||
2030 | { | ||
2031 | if (box->pmu->type->ops->put_constraint) | ||
2032 | box->pmu->type->ops->put_constraint(box, event); | ||
2033 | } | ||
2034 | |||
2035 | static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) | ||
2036 | { | ||
2037 | unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; | ||
2038 | struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX]; | ||
2039 | int i, wmin, wmax, ret = 0; | ||
2040 | struct hw_perf_event *hwc; | ||
2041 | |||
2042 | bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); | ||
2043 | |||
2044 | for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { | ||
2045 | c = uncore_get_event_constraint(box, box->event_list[i]); | ||
2046 | constraints[i] = c; | ||
2047 | wmin = min(wmin, c->weight); | ||
2048 | wmax = max(wmax, c->weight); | ||
2049 | } | ||
2050 | |||
2051 | /* fastpath, try to reuse previous register */ | ||
2052 | for (i = 0; i < n; i++) { | ||
2053 | hwc = &box->event_list[i]->hw; | ||
2054 | c = constraints[i]; | ||
2055 | |||
2056 | /* never assigned */ | ||
2057 | if (hwc->idx == -1) | ||
2058 | break; | ||
2059 | |||
2060 | /* constraint still honored */ | ||
2061 | if (!test_bit(hwc->idx, c->idxmsk)) | ||
2062 | break; | ||
2063 | |||
2064 | /* not already used */ | ||
2065 | if (test_bit(hwc->idx, used_mask)) | ||
2066 | break; | ||
2067 | |||
2068 | __set_bit(hwc->idx, used_mask); | ||
2069 | if (assign) | ||
2070 | assign[i] = hwc->idx; | ||
2071 | } | ||
2072 | /* slow path */ | ||
2073 | if (i != n) | ||
2074 | ret = perf_assign_events(constraints, n, wmin, wmax, assign); | ||
2075 | |||
2076 | if (!assign || ret) { | ||
2077 | for (i = 0; i < n; i++) | ||
2078 | uncore_put_event_constraint(box, box->event_list[i]); | ||
2079 | } | ||
2080 | return ret ? -EINVAL : 0; | ||
2081 | } | ||
2082 | |||
2083 | static void uncore_pmu_event_start(struct perf_event *event, int flags) | ||
2084 | { | ||
2085 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
2086 | int idx = event->hw.idx; | ||
2087 | |||
2088 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | ||
2089 | return; | ||
2090 | |||
2091 | if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) | ||
2092 | return; | ||
2093 | |||
2094 | event->hw.state = 0; | ||
2095 | box->events[idx] = event; | ||
2096 | box->n_active++; | ||
2097 | __set_bit(idx, box->active_mask); | ||
2098 | |||
2099 | local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); | ||
2100 | uncore_enable_event(box, event); | ||
2101 | |||
2102 | if (box->n_active == 1) { | ||
2103 | uncore_enable_box(box); | ||
2104 | uncore_pmu_start_hrtimer(box); | ||
2105 | } | ||
2106 | } | ||
2107 | |||
2108 | static void uncore_pmu_event_stop(struct perf_event *event, int flags) | ||
2109 | { | ||
2110 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
2111 | struct hw_perf_event *hwc = &event->hw; | ||
2112 | |||
2113 | if (__test_and_clear_bit(hwc->idx, box->active_mask)) { | ||
2114 | uncore_disable_event(box, event); | ||
2115 | box->n_active--; | ||
2116 | box->events[hwc->idx] = NULL; | ||
2117 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | ||
2118 | hwc->state |= PERF_HES_STOPPED; | ||
2119 | |||
2120 | if (box->n_active == 0) { | ||
2121 | uncore_disable_box(box); | ||
2122 | uncore_pmu_cancel_hrtimer(box); | ||
2123 | } | ||
2124 | } | ||
2125 | |||
2126 | if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { | ||
2127 | /* | ||
2128 | * Drain the remaining delta count out of a event | ||
2129 | * that we are disabling: | ||
2130 | */ | ||
2131 | uncore_perf_event_update(box, event); | ||
2132 | hwc->state |= PERF_HES_UPTODATE; | ||
2133 | } | ||
2134 | } | ||
2135 | |||
2136 | static int uncore_pmu_event_add(struct perf_event *event, int flags) | ||
2137 | { | ||
2138 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
2139 | struct hw_perf_event *hwc = &event->hw; | ||
2140 | int assign[UNCORE_PMC_IDX_MAX]; | ||
2141 | int i, n, ret; | ||
2142 | |||
2143 | if (!box) | ||
2144 | return -ENODEV; | ||
2145 | |||
2146 | ret = n = uncore_collect_events(box, event, false); | ||
2147 | if (ret < 0) | ||
2148 | return ret; | ||
2149 | |||
2150 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
2151 | if (!(flags & PERF_EF_START)) | ||
2152 | hwc->state |= PERF_HES_ARCH; | ||
2153 | |||
2154 | ret = uncore_assign_events(box, assign, n); | ||
2155 | if (ret) | ||
2156 | return ret; | ||
2157 | |||
2158 | /* save events moving to new counters */ | ||
2159 | for (i = 0; i < box->n_events; i++) { | ||
2160 | event = box->event_list[i]; | ||
2161 | hwc = &event->hw; | ||
2162 | |||
2163 | if (hwc->idx == assign[i] && | ||
2164 | hwc->last_tag == box->tags[assign[i]]) | ||
2165 | continue; | ||
2166 | /* | ||
2167 | * Ensure we don't accidentally enable a stopped | ||
2168 | * counter simply because we rescheduled. | ||
2169 | */ | ||
2170 | if (hwc->state & PERF_HES_STOPPED) | ||
2171 | hwc->state |= PERF_HES_ARCH; | ||
2172 | |||
2173 | uncore_pmu_event_stop(event, PERF_EF_UPDATE); | ||
2174 | } | ||
2175 | |||
2176 | /* reprogram moved events into new counters */ | ||
2177 | for (i = 0; i < n; i++) { | ||
2178 | event = box->event_list[i]; | ||
2179 | hwc = &event->hw; | ||
2180 | |||
2181 | if (hwc->idx != assign[i] || | ||
2182 | hwc->last_tag != box->tags[assign[i]]) | ||
2183 | uncore_assign_hw_event(box, event, assign[i]); | ||
2184 | else if (i < box->n_events) | ||
2185 | continue; | ||
2186 | |||
2187 | if (hwc->state & PERF_HES_ARCH) | ||
2188 | continue; | ||
2189 | |||
2190 | uncore_pmu_event_start(event, 0); | ||
2191 | } | ||
2192 | box->n_events = n; | ||
2193 | |||
2194 | return 0; | ||
2195 | } | ||
2196 | |||
2197 | static void uncore_pmu_event_del(struct perf_event *event, int flags) | ||
2198 | { | ||
2199 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
2200 | int i; | ||
2201 | |||
2202 | uncore_pmu_event_stop(event, PERF_EF_UPDATE); | ||
2203 | |||
2204 | for (i = 0; i < box->n_events; i++) { | ||
2205 | if (event == box->event_list[i]) { | ||
2206 | uncore_put_event_constraint(box, event); | ||
2207 | |||
2208 | while (++i < box->n_events) | ||
2209 | box->event_list[i - 1] = box->event_list[i]; | ||
2210 | |||
2211 | --box->n_events; | ||
2212 | break; | ||
2213 | } | ||
2214 | } | ||
2215 | |||
2216 | event->hw.idx = -1; | ||
2217 | event->hw.last_tag = ~0ULL; | ||
2218 | } | ||
2219 | |||
2220 | static void uncore_pmu_event_read(struct perf_event *event) | ||
2221 | { | ||
2222 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
2223 | uncore_perf_event_update(box, event); | ||
2224 | } | ||
2225 | |||
2226 | /* | ||
2227 | * validation ensures the group can be loaded onto the | ||
2228 | * PMU if it was the only group available. | ||
2229 | */ | ||
2230 | static int uncore_validate_group(struct intel_uncore_pmu *pmu, | ||
2231 | struct perf_event *event) | ||
2232 | { | ||
2233 | struct perf_event *leader = event->group_leader; | ||
2234 | struct intel_uncore_box *fake_box; | ||
2235 | int ret = -EINVAL, n; | ||
2236 | |||
2237 | fake_box = uncore_alloc_box(pmu->type, smp_processor_id()); | ||
2238 | if (!fake_box) | ||
2239 | return -ENOMEM; | ||
2240 | |||
2241 | fake_box->pmu = pmu; | ||
2242 | /* | ||
2243 | * the event is not yet connected with its | ||
2244 | * siblings therefore we must first collect | ||
2245 | * existing siblings, then add the new event | ||
2246 | * before we can simulate the scheduling | ||
2247 | */ | ||
2248 | n = uncore_collect_events(fake_box, leader, true); | ||
2249 | if (n < 0) | ||
2250 | goto out; | ||
2251 | |||
2252 | fake_box->n_events = n; | ||
2253 | n = uncore_collect_events(fake_box, event, false); | ||
2254 | if (n < 0) | ||
2255 | goto out; | ||
2256 | |||
2257 | fake_box->n_events = n; | ||
2258 | |||
2259 | ret = uncore_assign_events(fake_box, NULL, n); | ||
2260 | out: | ||
2261 | kfree(fake_box); | ||
2262 | return ret; | ||
2263 | } | ||
2264 | |||
2265 | int uncore_pmu_event_init(struct perf_event *event) | ||
2266 | { | ||
2267 | struct intel_uncore_pmu *pmu; | ||
2268 | struct intel_uncore_box *box; | ||
2269 | struct hw_perf_event *hwc = &event->hw; | ||
2270 | int ret; | ||
2271 | |||
2272 | if (event->attr.type != event->pmu->type) | ||
2273 | return -ENOENT; | ||
2274 | |||
2275 | pmu = uncore_event_to_pmu(event); | ||
2276 | /* no device found for this pmu */ | ||
2277 | if (pmu->func_id < 0) | ||
2278 | return -ENOENT; | ||
2279 | |||
2280 | /* | ||
2281 | * Uncore PMU does measure at all privilege level all the time. | ||
2282 | * So it doesn't make sense to specify any exclude bits. | ||
2283 | */ | ||
2284 | if (event->attr.exclude_user || event->attr.exclude_kernel || | ||
2285 | event->attr.exclude_hv || event->attr.exclude_idle) | ||
2286 | return -EINVAL; | ||
2287 | |||
2288 | /* Sampling not supported yet */ | ||
2289 | if (hwc->sample_period) | ||
2290 | return -EINVAL; | ||
2291 | |||
2292 | /* | ||
2293 | * Place all uncore events for a particular physical package | ||
2294 | * onto a single cpu | ||
2295 | */ | ||
2296 | if (event->cpu < 0) | ||
2297 | return -EINVAL; | ||
2298 | box = uncore_pmu_to_box(pmu, event->cpu); | ||
2299 | if (!box || box->cpu < 0) | ||
2300 | return -EINVAL; | ||
2301 | event->cpu = box->cpu; | ||
2302 | |||
2303 | event->hw.idx = -1; | ||
2304 | event->hw.last_tag = ~0ULL; | ||
2305 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
2306 | |||
2307 | if (event->attr.config == UNCORE_FIXED_EVENT) { | ||
2308 | /* no fixed counter */ | ||
2309 | if (!pmu->type->fixed_ctl) | ||
2310 | return -EINVAL; | ||
2311 | /* | ||
2312 | * if there is only one fixed counter, only the first pmu | ||
2313 | * can access the fixed counter | ||
2314 | */ | ||
2315 | if (pmu->type->single_fixed && pmu->pmu_idx > 0) | ||
2316 | return -EINVAL; | ||
2317 | hwc->config = ~0ULL; | ||
2318 | } else { | ||
2319 | hwc->config = event->attr.config & pmu->type->event_mask; | ||
2320 | if (pmu->type->ops->hw_config) { | ||
2321 | ret = pmu->type->ops->hw_config(box, event); | ||
2322 | if (ret) | ||
2323 | return ret; | ||
2324 | } | ||
2325 | } | ||
2326 | |||
2327 | if (event->group_leader != event) | ||
2328 | ret = uncore_validate_group(pmu, event); | ||
2329 | else | ||
2330 | ret = 0; | ||
2331 | |||
2332 | return ret; | ||
2333 | } | ||
2334 | |||
2335 | static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) | ||
2336 | { | ||
2337 | int ret; | ||
2338 | |||
2339 | pmu->pmu = (struct pmu) { | ||
2340 | .attr_groups = pmu->type->attr_groups, | ||
2341 | .task_ctx_nr = perf_invalid_context, | ||
2342 | .event_init = uncore_pmu_event_init, | ||
2343 | .add = uncore_pmu_event_add, | ||
2344 | .del = uncore_pmu_event_del, | ||
2345 | .start = uncore_pmu_event_start, | ||
2346 | .stop = uncore_pmu_event_stop, | ||
2347 | .read = uncore_pmu_event_read, | ||
2348 | }; | ||
2349 | |||
2350 | if (pmu->type->num_boxes == 1) { | ||
2351 | if (strlen(pmu->type->name) > 0) | ||
2352 | sprintf(pmu->name, "uncore_%s", pmu->type->name); | ||
2353 | else | ||
2354 | sprintf(pmu->name, "uncore"); | ||
2355 | } else { | ||
2356 | sprintf(pmu->name, "uncore_%s_%d", pmu->type->name, | ||
2357 | pmu->pmu_idx); | ||
2358 | } | ||
2359 | |||
2360 | ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); | ||
2361 | return ret; | ||
2362 | } | ||
2363 | |||
2364 | static void __init uncore_type_exit(struct intel_uncore_type *type) | ||
2365 | { | ||
2366 | int i; | ||
2367 | |||
2368 | for (i = 0; i < type->num_boxes; i++) | ||
2369 | free_percpu(type->pmus[i].box); | ||
2370 | kfree(type->pmus); | ||
2371 | type->pmus = NULL; | ||
2372 | kfree(type->attr_groups[1]); | ||
2373 | type->attr_groups[1] = NULL; | ||
2374 | } | ||
2375 | |||
2376 | static void uncore_types_exit(struct intel_uncore_type **types) | ||
2377 | { | ||
2378 | int i; | ||
2379 | for (i = 0; types[i]; i++) | ||
2380 | uncore_type_exit(types[i]); | ||
2381 | } | ||
2382 | |||
2383 | static int __init uncore_type_init(struct intel_uncore_type *type) | ||
2384 | { | ||
2385 | struct intel_uncore_pmu *pmus; | ||
2386 | struct attribute_group *events_group; | ||
2387 | struct attribute **attrs; | ||
2388 | int i, j; | ||
2389 | |||
2390 | pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL); | ||
2391 | if (!pmus) | ||
2392 | return -ENOMEM; | ||
2393 | |||
2394 | type->unconstrainted = (struct event_constraint) | ||
2395 | __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, | ||
2396 | 0, type->num_counters, 0); | ||
2397 | |||
2398 | for (i = 0; i < type->num_boxes; i++) { | ||
2399 | pmus[i].func_id = -1; | ||
2400 | pmus[i].pmu_idx = i; | ||
2401 | pmus[i].type = type; | ||
2402 | INIT_LIST_HEAD(&pmus[i].box_list); | ||
2403 | pmus[i].box = alloc_percpu(struct intel_uncore_box *); | ||
2404 | if (!pmus[i].box) | ||
2405 | goto fail; | ||
2406 | } | ||
2407 | |||
2408 | if (type->event_descs) { | ||
2409 | i = 0; | ||
2410 | while (type->event_descs[i].attr.attr.name) | ||
2411 | i++; | ||
2412 | |||
2413 | events_group = kzalloc(sizeof(struct attribute *) * (i + 1) + | ||
2414 | sizeof(*events_group), GFP_KERNEL); | ||
2415 | if (!events_group) | ||
2416 | goto fail; | ||
2417 | |||
2418 | attrs = (struct attribute **)(events_group + 1); | ||
2419 | events_group->name = "events"; | ||
2420 | events_group->attrs = attrs; | ||
2421 | |||
2422 | for (j = 0; j < i; j++) | ||
2423 | attrs[j] = &type->event_descs[j].attr.attr; | ||
2424 | |||
2425 | type->attr_groups[1] = events_group; | ||
2426 | } | ||
2427 | |||
2428 | type->pmus = pmus; | ||
2429 | return 0; | ||
2430 | fail: | ||
2431 | uncore_type_exit(type); | ||
2432 | return -ENOMEM; | ||
2433 | } | ||
2434 | |||
2435 | static int __init uncore_types_init(struct intel_uncore_type **types) | ||
2436 | { | ||
2437 | int i, ret; | ||
2438 | |||
2439 | for (i = 0; types[i]; i++) { | ||
2440 | ret = uncore_type_init(types[i]); | ||
2441 | if (ret) | ||
2442 | goto fail; | ||
2443 | } | ||
2444 | return 0; | ||
2445 | fail: | ||
2446 | while (--i >= 0) | ||
2447 | uncore_type_exit(types[i]); | ||
2448 | return ret; | ||
2449 | } | ||
2450 | |||
2451 | static struct pci_driver *uncore_pci_driver; | ||
2452 | static bool pcidrv_registered; | ||
2453 | |||
2454 | /* | ||
2455 | * add a pci uncore device | ||
2456 | */ | ||
2457 | static int __devinit uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev) | ||
2458 | { | ||
2459 | struct intel_uncore_pmu *pmu; | ||
2460 | struct intel_uncore_box *box; | ||
2461 | int i, phys_id; | ||
2462 | |||
2463 | phys_id = pcibus_to_physid[pdev->bus->number]; | ||
2464 | if (phys_id < 0) | ||
2465 | return -ENODEV; | ||
2466 | |||
2467 | box = uncore_alloc_box(type, 0); | ||
2468 | if (!box) | ||
2469 | return -ENOMEM; | ||
2470 | |||
2471 | /* | ||
2472 | * for performance monitoring unit with multiple boxes, | ||
2473 | * each box has a different function id. | ||
2474 | */ | ||
2475 | for (i = 0; i < type->num_boxes; i++) { | ||
2476 | pmu = &type->pmus[i]; | ||
2477 | if (pmu->func_id == pdev->devfn) | ||
2478 | break; | ||
2479 | if (pmu->func_id < 0) { | ||
2480 | pmu->func_id = pdev->devfn; | ||
2481 | break; | ||
2482 | } | ||
2483 | pmu = NULL; | ||
2484 | } | ||
2485 | |||
2486 | if (!pmu) { | ||
2487 | kfree(box); | ||
2488 | return -EINVAL; | ||
2489 | } | ||
2490 | |||
2491 | box->phys_id = phys_id; | ||
2492 | box->pci_dev = pdev; | ||
2493 | box->pmu = pmu; | ||
2494 | uncore_box_init(box); | ||
2495 | pci_set_drvdata(pdev, box); | ||
2496 | |||
2497 | raw_spin_lock(&uncore_box_lock); | ||
2498 | list_add_tail(&box->list, &pmu->box_list); | ||
2499 | raw_spin_unlock(&uncore_box_lock); | ||
2500 | |||
2501 | return 0; | ||
2502 | } | ||
2503 | |||
2504 | static void uncore_pci_remove(struct pci_dev *pdev) | ||
2505 | { | ||
2506 | struct intel_uncore_box *box = pci_get_drvdata(pdev); | ||
2507 | struct intel_uncore_pmu *pmu = box->pmu; | ||
2508 | int cpu, phys_id = pcibus_to_physid[pdev->bus->number]; | ||
2509 | |||
2510 | if (WARN_ON_ONCE(phys_id != box->phys_id)) | ||
2511 | return; | ||
2512 | |||
2513 | raw_spin_lock(&uncore_box_lock); | ||
2514 | list_del(&box->list); | ||
2515 | raw_spin_unlock(&uncore_box_lock); | ||
2516 | |||
2517 | for_each_possible_cpu(cpu) { | ||
2518 | if (*per_cpu_ptr(pmu->box, cpu) == box) { | ||
2519 | *per_cpu_ptr(pmu->box, cpu) = NULL; | ||
2520 | atomic_dec(&box->refcnt); | ||
2521 | } | ||
2522 | } | ||
2523 | |||
2524 | WARN_ON_ONCE(atomic_read(&box->refcnt) != 1); | ||
2525 | kfree(box); | ||
2526 | } | ||
2527 | |||
2528 | static int __devinit uncore_pci_probe(struct pci_dev *pdev, | ||
2529 | const struct pci_device_id *id) | ||
2530 | { | ||
2531 | struct intel_uncore_type *type; | ||
2532 | |||
2533 | type = (struct intel_uncore_type *)id->driver_data; | ||
2534 | |||
2535 | return uncore_pci_add(type, pdev); | ||
2536 | } | ||
2537 | |||
2538 | static int __init uncore_pci_init(void) | ||
2539 | { | ||
2540 | int ret; | ||
2541 | |||
2542 | switch (boot_cpu_data.x86_model) { | ||
2543 | case 45: /* Sandy Bridge-EP */ | ||
2544 | pci_uncores = snbep_pci_uncores; | ||
2545 | uncore_pci_driver = &snbep_uncore_pci_driver; | ||
2546 | snbep_pci2phy_map_init(); | ||
2547 | break; | ||
2548 | default: | ||
2549 | return 0; | ||
2550 | } | ||
2551 | |||
2552 | ret = uncore_types_init(pci_uncores); | ||
2553 | if (ret) | ||
2554 | return ret; | ||
2555 | |||
2556 | uncore_pci_driver->probe = uncore_pci_probe; | ||
2557 | uncore_pci_driver->remove = uncore_pci_remove; | ||
2558 | |||
2559 | ret = pci_register_driver(uncore_pci_driver); | ||
2560 | if (ret == 0) | ||
2561 | pcidrv_registered = true; | ||
2562 | else | ||
2563 | uncore_types_exit(pci_uncores); | ||
2564 | |||
2565 | return ret; | ||
2566 | } | ||
2567 | |||
2568 | static void __init uncore_pci_exit(void) | ||
2569 | { | ||
2570 | if (pcidrv_registered) { | ||
2571 | pcidrv_registered = false; | ||
2572 | pci_unregister_driver(uncore_pci_driver); | ||
2573 | uncore_types_exit(pci_uncores); | ||
2574 | } | ||
2575 | } | ||
2576 | |||
2577 | static void __cpuinit uncore_cpu_dying(int cpu) | ||
2578 | { | ||
2579 | struct intel_uncore_type *type; | ||
2580 | struct intel_uncore_pmu *pmu; | ||
2581 | struct intel_uncore_box *box; | ||
2582 | int i, j; | ||
2583 | |||
2584 | for (i = 0; msr_uncores[i]; i++) { | ||
2585 | type = msr_uncores[i]; | ||
2586 | for (j = 0; j < type->num_boxes; j++) { | ||
2587 | pmu = &type->pmus[j]; | ||
2588 | box = *per_cpu_ptr(pmu->box, cpu); | ||
2589 | *per_cpu_ptr(pmu->box, cpu) = NULL; | ||
2590 | if (box && atomic_dec_and_test(&box->refcnt)) | ||
2591 | kfree(box); | ||
2592 | } | ||
2593 | } | ||
2594 | } | ||
2595 | |||
2596 | static int __cpuinit uncore_cpu_starting(int cpu) | ||
2597 | { | ||
2598 | struct intel_uncore_type *type; | ||
2599 | struct intel_uncore_pmu *pmu; | ||
2600 | struct intel_uncore_box *box, *exist; | ||
2601 | int i, j, k, phys_id; | ||
2602 | |||
2603 | phys_id = topology_physical_package_id(cpu); | ||
2604 | |||
2605 | for (i = 0; msr_uncores[i]; i++) { | ||
2606 | type = msr_uncores[i]; | ||
2607 | for (j = 0; j < type->num_boxes; j++) { | ||
2608 | pmu = &type->pmus[j]; | ||
2609 | box = *per_cpu_ptr(pmu->box, cpu); | ||
2610 | /* called by uncore_cpu_init? */ | ||
2611 | if (box && box->phys_id >= 0) { | ||
2612 | uncore_box_init(box); | ||
2613 | continue; | ||
2614 | } | ||
2615 | |||
2616 | for_each_online_cpu(k) { | ||
2617 | exist = *per_cpu_ptr(pmu->box, k); | ||
2618 | if (exist && exist->phys_id == phys_id) { | ||
2619 | atomic_inc(&exist->refcnt); | ||
2620 | *per_cpu_ptr(pmu->box, cpu) = exist; | ||
2621 | kfree(box); | ||
2622 | box = NULL; | ||
2623 | break; | ||
2624 | } | ||
2625 | } | ||
2626 | |||
2627 | if (box) { | ||
2628 | box->phys_id = phys_id; | ||
2629 | uncore_box_init(box); | ||
2630 | } | ||
2631 | } | ||
2632 | } | ||
2633 | return 0; | ||
2634 | } | ||
2635 | |||
2636 | static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id) | ||
2637 | { | ||
2638 | struct intel_uncore_type *type; | ||
2639 | struct intel_uncore_pmu *pmu; | ||
2640 | struct intel_uncore_box *box; | ||
2641 | int i, j; | ||
2642 | |||
2643 | for (i = 0; msr_uncores[i]; i++) { | ||
2644 | type = msr_uncores[i]; | ||
2645 | for (j = 0; j < type->num_boxes; j++) { | ||
2646 | pmu = &type->pmus[j]; | ||
2647 | if (pmu->func_id < 0) | ||
2648 | pmu->func_id = j; | ||
2649 | |||
2650 | box = uncore_alloc_box(type, cpu); | ||
2651 | if (!box) | ||
2652 | return -ENOMEM; | ||
2653 | |||
2654 | box->pmu = pmu; | ||
2655 | box->phys_id = phys_id; | ||
2656 | *per_cpu_ptr(pmu->box, cpu) = box; | ||
2657 | } | ||
2658 | } | ||
2659 | return 0; | ||
2660 | } | ||
2661 | |||
2662 | static void __cpuinit | ||
2663 | uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu) | ||
2664 | { | ||
2665 | struct intel_uncore_type *type; | ||
2666 | struct intel_uncore_pmu *pmu; | ||
2667 | struct intel_uncore_box *box; | ||
2668 | int i, j; | ||
2669 | |||
2670 | for (i = 0; uncores[i]; i++) { | ||
2671 | type = uncores[i]; | ||
2672 | for (j = 0; j < type->num_boxes; j++) { | ||
2673 | pmu = &type->pmus[j]; | ||
2674 | if (old_cpu < 0) | ||
2675 | box = uncore_pmu_to_box(pmu, new_cpu); | ||
2676 | else | ||
2677 | box = uncore_pmu_to_box(pmu, old_cpu); | ||
2678 | if (!box) | ||
2679 | continue; | ||
2680 | |||
2681 | if (old_cpu < 0) { | ||
2682 | WARN_ON_ONCE(box->cpu != -1); | ||
2683 | box->cpu = new_cpu; | ||
2684 | continue; | ||
2685 | } | ||
2686 | |||
2687 | WARN_ON_ONCE(box->cpu != old_cpu); | ||
2688 | if (new_cpu >= 0) { | ||
2689 | uncore_pmu_cancel_hrtimer(box); | ||
2690 | perf_pmu_migrate_context(&pmu->pmu, | ||
2691 | old_cpu, new_cpu); | ||
2692 | box->cpu = new_cpu; | ||
2693 | } else { | ||
2694 | box->cpu = -1; | ||
2695 | } | ||
2696 | } | ||
2697 | } | ||
2698 | } | ||
2699 | |||
2700 | static void __cpuinit uncore_event_exit_cpu(int cpu) | ||
2701 | { | ||
2702 | int i, phys_id, target; | ||
2703 | |||
2704 | /* if exiting cpu is used for collecting uncore events */ | ||
2705 | if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) | ||
2706 | return; | ||
2707 | |||
2708 | /* find a new cpu to collect uncore events */ | ||
2709 | phys_id = topology_physical_package_id(cpu); | ||
2710 | target = -1; | ||
2711 | for_each_online_cpu(i) { | ||
2712 | if (i == cpu) | ||
2713 | continue; | ||
2714 | if (phys_id == topology_physical_package_id(i)) { | ||
2715 | target = i; | ||
2716 | break; | ||
2717 | } | ||
2718 | } | ||
2719 | |||
2720 | /* migrate uncore events to the new cpu */ | ||
2721 | if (target >= 0) | ||
2722 | cpumask_set_cpu(target, &uncore_cpu_mask); | ||
2723 | |||
2724 | uncore_change_context(msr_uncores, cpu, target); | ||
2725 | uncore_change_context(pci_uncores, cpu, target); | ||
2726 | } | ||
2727 | |||
2728 | static void __cpuinit uncore_event_init_cpu(int cpu) | ||
2729 | { | ||
2730 | int i, phys_id; | ||
2731 | |||
2732 | phys_id = topology_physical_package_id(cpu); | ||
2733 | for_each_cpu(i, &uncore_cpu_mask) { | ||
2734 | if (phys_id == topology_physical_package_id(i)) | ||
2735 | return; | ||
2736 | } | ||
2737 | |||
2738 | cpumask_set_cpu(cpu, &uncore_cpu_mask); | ||
2739 | |||
2740 | uncore_change_context(msr_uncores, -1, cpu); | ||
2741 | uncore_change_context(pci_uncores, -1, cpu); | ||
2742 | } | ||
2743 | |||
2744 | static int | ||
2745 | __cpuinit uncore_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | ||
2746 | { | ||
2747 | unsigned int cpu = (long)hcpu; | ||
2748 | |||
2749 | /* allocate/free data structure for uncore box */ | ||
2750 | switch (action & ~CPU_TASKS_FROZEN) { | ||
2751 | case CPU_UP_PREPARE: | ||
2752 | uncore_cpu_prepare(cpu, -1); | ||
2753 | break; | ||
2754 | case CPU_STARTING: | ||
2755 | uncore_cpu_starting(cpu); | ||
2756 | break; | ||
2757 | case CPU_UP_CANCELED: | ||
2758 | case CPU_DYING: | ||
2759 | uncore_cpu_dying(cpu); | ||
2760 | break; | ||
2761 | default: | ||
2762 | break; | ||
2763 | } | ||
2764 | |||
2765 | /* select the cpu that collects uncore events */ | ||
2766 | switch (action & ~CPU_TASKS_FROZEN) { | ||
2767 | case CPU_DOWN_FAILED: | ||
2768 | case CPU_STARTING: | ||
2769 | uncore_event_init_cpu(cpu); | ||
2770 | break; | ||
2771 | case CPU_DOWN_PREPARE: | ||
2772 | uncore_event_exit_cpu(cpu); | ||
2773 | break; | ||
2774 | default: | ||
2775 | break; | ||
2776 | } | ||
2777 | |||
2778 | return NOTIFY_OK; | ||
2779 | } | ||
2780 | |||
2781 | static struct notifier_block uncore_cpu_nb __cpuinitdata = { | ||
2782 | .notifier_call = uncore_cpu_notifier, | ||
2783 | /* | ||
2784 | * to migrate uncore events, our notifier should be executed | ||
2785 | * before perf core's notifier. | ||
2786 | */ | ||
2787 | .priority = CPU_PRI_PERF + 1, | ||
2788 | }; | ||
2789 | |||
2790 | static void __init uncore_cpu_setup(void *dummy) | ||
2791 | { | ||
2792 | uncore_cpu_starting(smp_processor_id()); | ||
2793 | } | ||
2794 | |||
2795 | static int __init uncore_cpu_init(void) | ||
2796 | { | ||
2797 | int ret, cpu, max_cores; | ||
2798 | |||
2799 | max_cores = boot_cpu_data.x86_max_cores; | ||
2800 | switch (boot_cpu_data.x86_model) { | ||
2801 | case 26: /* Nehalem */ | ||
2802 | case 30: | ||
2803 | case 37: /* Westmere */ | ||
2804 | case 44: | ||
2805 | msr_uncores = nhm_msr_uncores; | ||
2806 | break; | ||
2807 | case 42: /* Sandy Bridge */ | ||
2808 | if (snb_uncore_cbox.num_boxes > max_cores) | ||
2809 | snb_uncore_cbox.num_boxes = max_cores; | ||
2810 | msr_uncores = snb_msr_uncores; | ||
2811 | break; | ||
2812 | case 45: /* Sandy Birdge-EP */ | ||
2813 | if (snbep_uncore_cbox.num_boxes > max_cores) | ||
2814 | snbep_uncore_cbox.num_boxes = max_cores; | ||
2815 | msr_uncores = snbep_msr_uncores; | ||
2816 | break; | ||
2817 | case 46: | ||
2818 | msr_uncores = nhmex_msr_uncores; | ||
2819 | break; | ||
2820 | default: | ||
2821 | return 0; | ||
2822 | } | ||
2823 | |||
2824 | ret = uncore_types_init(msr_uncores); | ||
2825 | if (ret) | ||
2826 | return ret; | ||
2827 | |||
2828 | get_online_cpus(); | ||
2829 | |||
2830 | for_each_online_cpu(cpu) { | ||
2831 | int i, phys_id = topology_physical_package_id(cpu); | ||
2832 | |||
2833 | for_each_cpu(i, &uncore_cpu_mask) { | ||
2834 | if (phys_id == topology_physical_package_id(i)) { | ||
2835 | phys_id = -1; | ||
2836 | break; | ||
2837 | } | ||
2838 | } | ||
2839 | if (phys_id < 0) | ||
2840 | continue; | ||
2841 | |||
2842 | uncore_cpu_prepare(cpu, phys_id); | ||
2843 | uncore_event_init_cpu(cpu); | ||
2844 | } | ||
2845 | on_each_cpu(uncore_cpu_setup, NULL, 1); | ||
2846 | |||
2847 | register_cpu_notifier(&uncore_cpu_nb); | ||
2848 | |||
2849 | put_online_cpus(); | ||
2850 | |||
2851 | return 0; | ||
2852 | } | ||
2853 | |||
2854 | static int __init uncore_pmus_register(void) | ||
2855 | { | ||
2856 | struct intel_uncore_pmu *pmu; | ||
2857 | struct intel_uncore_type *type; | ||
2858 | int i, j; | ||
2859 | |||
2860 | for (i = 0; msr_uncores[i]; i++) { | ||
2861 | type = msr_uncores[i]; | ||
2862 | for (j = 0; j < type->num_boxes; j++) { | ||
2863 | pmu = &type->pmus[j]; | ||
2864 | uncore_pmu_register(pmu); | ||
2865 | } | ||
2866 | } | ||
2867 | |||
2868 | for (i = 0; pci_uncores[i]; i++) { | ||
2869 | type = pci_uncores[i]; | ||
2870 | for (j = 0; j < type->num_boxes; j++) { | ||
2871 | pmu = &type->pmus[j]; | ||
2872 | uncore_pmu_register(pmu); | ||
2873 | } | ||
2874 | } | ||
2875 | |||
2876 | return 0; | ||
2877 | } | ||
2878 | |||
2879 | static int __init intel_uncore_init(void) | ||
2880 | { | ||
2881 | int ret; | ||
2882 | |||
2883 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | ||
2884 | return -ENODEV; | ||
2885 | |||
2886 | ret = uncore_pci_init(); | ||
2887 | if (ret) | ||
2888 | goto fail; | ||
2889 | ret = uncore_cpu_init(); | ||
2890 | if (ret) { | ||
2891 | uncore_pci_exit(); | ||
2892 | goto fail; | ||
2893 | } | ||
2894 | |||
2895 | uncore_pmus_register(); | ||
2896 | return 0; | ||
2897 | fail: | ||
2898 | return ret; | ||
2899 | } | ||
2900 | device_initcall(intel_uncore_init); | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h new file mode 100644 index 000000000000..f3851892e077 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h | |||
@@ -0,0 +1,621 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/slab.h> | ||
3 | #include <linux/pci.h> | ||
4 | #include <linux/perf_event.h> | ||
5 | #include "perf_event.h" | ||
6 | |||
7 | #define UNCORE_PMU_NAME_LEN 32 | ||
8 | #define UNCORE_PMU_HRTIMER_INTERVAL (60 * NSEC_PER_SEC) | ||
9 | |||
10 | #define UNCORE_FIXED_EVENT 0xff | ||
11 | #define UNCORE_PMC_IDX_MAX_GENERIC 8 | ||
12 | #define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC | ||
13 | #define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1) | ||
14 | |||
15 | #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) | ||
16 | |||
17 | /* SNB event control */ | ||
18 | #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff | ||
19 | #define SNB_UNC_CTL_UMASK_MASK 0x0000ff00 | ||
20 | #define SNB_UNC_CTL_EDGE_DET (1 << 18) | ||
21 | #define SNB_UNC_CTL_EN (1 << 22) | ||
22 | #define SNB_UNC_CTL_INVERT (1 << 23) | ||
23 | #define SNB_UNC_CTL_CMASK_MASK 0x1f000000 | ||
24 | #define NHM_UNC_CTL_CMASK_MASK 0xff000000 | ||
25 | #define NHM_UNC_FIXED_CTR_CTL_EN (1 << 0) | ||
26 | |||
27 | #define SNB_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ | ||
28 | SNB_UNC_CTL_UMASK_MASK | \ | ||
29 | SNB_UNC_CTL_EDGE_DET | \ | ||
30 | SNB_UNC_CTL_INVERT | \ | ||
31 | SNB_UNC_CTL_CMASK_MASK) | ||
32 | |||
33 | #define NHM_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ | ||
34 | SNB_UNC_CTL_UMASK_MASK | \ | ||
35 | SNB_UNC_CTL_EDGE_DET | \ | ||
36 | SNB_UNC_CTL_INVERT | \ | ||
37 | NHM_UNC_CTL_CMASK_MASK) | ||
38 | |||
39 | /* SNB global control register */ | ||
40 | #define SNB_UNC_PERF_GLOBAL_CTL 0x391 | ||
41 | #define SNB_UNC_FIXED_CTR_CTRL 0x394 | ||
42 | #define SNB_UNC_FIXED_CTR 0x395 | ||
43 | |||
44 | /* SNB uncore global control */ | ||
45 | #define SNB_UNC_GLOBAL_CTL_CORE_ALL ((1 << 4) - 1) | ||
46 | #define SNB_UNC_GLOBAL_CTL_EN (1 << 29) | ||
47 | |||
48 | /* SNB Cbo register */ | ||
49 | #define SNB_UNC_CBO_0_PERFEVTSEL0 0x700 | ||
50 | #define SNB_UNC_CBO_0_PER_CTR0 0x706 | ||
51 | #define SNB_UNC_CBO_MSR_OFFSET 0x10 | ||
52 | |||
53 | /* NHM global control register */ | ||
54 | #define NHM_UNC_PERF_GLOBAL_CTL 0x391 | ||
55 | #define NHM_UNC_FIXED_CTR 0x394 | ||
56 | #define NHM_UNC_FIXED_CTR_CTRL 0x395 | ||
57 | |||
58 | /* NHM uncore global control */ | ||
59 | #define NHM_UNC_GLOBAL_CTL_EN_PC_ALL ((1ULL << 8) - 1) | ||
60 | #define NHM_UNC_GLOBAL_CTL_EN_FC (1ULL << 32) | ||
61 | |||
62 | /* NHM uncore register */ | ||
63 | #define NHM_UNC_PERFEVTSEL0 0x3c0 | ||
64 | #define NHM_UNC_UNCORE_PMC0 0x3b0 | ||
65 | |||
66 | /* SNB-EP Box level control */ | ||
67 | #define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0) | ||
68 | #define SNBEP_PMON_BOX_CTL_RST_CTRS (1 << 1) | ||
69 | #define SNBEP_PMON_BOX_CTL_FRZ (1 << 8) | ||
70 | #define SNBEP_PMON_BOX_CTL_FRZ_EN (1 << 16) | ||
71 | #define SNBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \ | ||
72 | SNBEP_PMON_BOX_CTL_RST_CTRS | \ | ||
73 | SNBEP_PMON_BOX_CTL_FRZ_EN) | ||
74 | /* SNB-EP event control */ | ||
75 | #define SNBEP_PMON_CTL_EV_SEL_MASK 0x000000ff | ||
76 | #define SNBEP_PMON_CTL_UMASK_MASK 0x0000ff00 | ||
77 | #define SNBEP_PMON_CTL_RST (1 << 17) | ||
78 | #define SNBEP_PMON_CTL_EDGE_DET (1 << 18) | ||
79 | #define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21) /* only for QPI */ | ||
80 | #define SNBEP_PMON_CTL_EN (1 << 22) | ||
81 | #define SNBEP_PMON_CTL_INVERT (1 << 23) | ||
82 | #define SNBEP_PMON_CTL_TRESH_MASK 0xff000000 | ||
83 | #define SNBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \ | ||
84 | SNBEP_PMON_CTL_UMASK_MASK | \ | ||
85 | SNBEP_PMON_CTL_EDGE_DET | \ | ||
86 | SNBEP_PMON_CTL_INVERT | \ | ||
87 | SNBEP_PMON_CTL_TRESH_MASK) | ||
88 | |||
89 | /* SNB-EP Ubox event control */ | ||
90 | #define SNBEP_U_MSR_PMON_CTL_TRESH_MASK 0x1f000000 | ||
91 | #define SNBEP_U_MSR_PMON_RAW_EVENT_MASK \ | ||
92 | (SNBEP_PMON_CTL_EV_SEL_MASK | \ | ||
93 | SNBEP_PMON_CTL_UMASK_MASK | \ | ||
94 | SNBEP_PMON_CTL_EDGE_DET | \ | ||
95 | SNBEP_PMON_CTL_INVERT | \ | ||
96 | SNBEP_U_MSR_PMON_CTL_TRESH_MASK) | ||
97 | |||
98 | #define SNBEP_CBO_PMON_CTL_TID_EN (1 << 19) | ||
99 | #define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ | ||
100 | SNBEP_CBO_PMON_CTL_TID_EN) | ||
101 | |||
102 | /* SNB-EP PCU event control */ | ||
103 | #define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK 0x0000c000 | ||
104 | #define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK 0x1f000000 | ||
105 | #define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT (1 << 30) | ||
106 | #define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET (1 << 31) | ||
107 | #define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK \ | ||
108 | (SNBEP_PMON_CTL_EV_SEL_MASK | \ | ||
109 | SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ | ||
110 | SNBEP_PMON_CTL_EDGE_DET | \ | ||
111 | SNBEP_PMON_CTL_INVERT | \ | ||
112 | SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ | ||
113 | SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ | ||
114 | SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) | ||
115 | |||
116 | #define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK \ | ||
117 | (SNBEP_PMON_RAW_EVENT_MASK | \ | ||
118 | SNBEP_PMON_CTL_EV_SEL_EXT) | ||
119 | |||
120 | /* SNB-EP pci control register */ | ||
121 | #define SNBEP_PCI_PMON_BOX_CTL 0xf4 | ||
122 | #define SNBEP_PCI_PMON_CTL0 0xd8 | ||
123 | /* SNB-EP pci counter register */ | ||
124 | #define SNBEP_PCI_PMON_CTR0 0xa0 | ||
125 | |||
126 | /* SNB-EP home agent register */ | ||
127 | #define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0 0x40 | ||
128 | #define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1 0x44 | ||
129 | #define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH 0x48 | ||
130 | /* SNB-EP memory controller register */ | ||
131 | #define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL 0xf0 | ||
132 | #define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR 0xd0 | ||
133 | /* SNB-EP QPI register */ | ||
134 | #define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0 0x228 | ||
135 | #define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1 0x22c | ||
136 | #define SNBEP_Q_Py_PCI_PMON_PKT_MASK0 0x238 | ||
137 | #define SNBEP_Q_Py_PCI_PMON_PKT_MASK1 0x23c | ||
138 | |||
139 | /* SNB-EP Ubox register */ | ||
140 | #define SNBEP_U_MSR_PMON_CTR0 0xc16 | ||
141 | #define SNBEP_U_MSR_PMON_CTL0 0xc10 | ||
142 | |||
143 | #define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL 0xc08 | ||
144 | #define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR 0xc09 | ||
145 | |||
146 | /* SNB-EP Cbo register */ | ||
147 | #define SNBEP_C0_MSR_PMON_CTR0 0xd16 | ||
148 | #define SNBEP_C0_MSR_PMON_CTL0 0xd10 | ||
149 | #define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04 | ||
150 | #define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14 | ||
151 | #define SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK 0xfffffc1f | ||
152 | #define SNBEP_CBO_MSR_OFFSET 0x20 | ||
153 | |||
154 | /* SNB-EP PCU register */ | ||
155 | #define SNBEP_PCU_MSR_PMON_CTR0 0xc36 | ||
156 | #define SNBEP_PCU_MSR_PMON_CTL0 0xc30 | ||
157 | #define SNBEP_PCU_MSR_PMON_BOX_CTL 0xc24 | ||
158 | #define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34 | ||
159 | #define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK 0xffffffff | ||
160 | #define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc | ||
161 | #define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd | ||
162 | |||
163 | /* NHM-EX event control */ | ||
164 | #define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff | ||
165 | #define NHMEX_PMON_CTL_UMASK_MASK 0x0000ff00 | ||
166 | #define NHMEX_PMON_CTL_EN_BIT0 (1 << 0) | ||
167 | #define NHMEX_PMON_CTL_EDGE_DET (1 << 18) | ||
168 | #define NHMEX_PMON_CTL_PMI_EN (1 << 20) | ||
169 | #define NHMEX_PMON_CTL_EN_BIT22 (1 << 22) | ||
170 | #define NHMEX_PMON_CTL_INVERT (1 << 23) | ||
171 | #define NHMEX_PMON_CTL_TRESH_MASK 0xff000000 | ||
172 | #define NHMEX_PMON_RAW_EVENT_MASK (NHMEX_PMON_CTL_EV_SEL_MASK | \ | ||
173 | NHMEX_PMON_CTL_UMASK_MASK | \ | ||
174 | NHMEX_PMON_CTL_EDGE_DET | \ | ||
175 | NHMEX_PMON_CTL_INVERT | \ | ||
176 | NHMEX_PMON_CTL_TRESH_MASK) | ||
177 | |||
178 | /* NHM-EX Ubox */ | ||
179 | #define NHMEX_U_MSR_PMON_GLOBAL_CTL 0xc00 | ||
180 | #define NHMEX_U_MSR_PMON_CTR 0xc11 | ||
181 | #define NHMEX_U_MSR_PMON_EV_SEL 0xc10 | ||
182 | |||
183 | #define NHMEX_U_PMON_GLOBAL_EN (1 << 0) | ||
184 | #define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL 0x0000001e | ||
185 | #define NHMEX_U_PMON_GLOBAL_EN_ALL (1 << 28) | ||
186 | #define NHMEX_U_PMON_GLOBAL_RST_ALL (1 << 29) | ||
187 | #define NHMEX_U_PMON_GLOBAL_FRZ_ALL (1 << 31) | ||
188 | |||
189 | #define NHMEX_U_PMON_RAW_EVENT_MASK \ | ||
190 | (NHMEX_PMON_CTL_EV_SEL_MASK | \ | ||
191 | NHMEX_PMON_CTL_EDGE_DET) | ||
192 | |||
193 | /* NHM-EX Cbox */ | ||
194 | #define NHMEX_C0_MSR_PMON_GLOBAL_CTL 0xd00 | ||
195 | #define NHMEX_C0_MSR_PMON_CTR0 0xd11 | ||
196 | #define NHMEX_C0_MSR_PMON_EV_SEL0 0xd10 | ||
197 | #define NHMEX_C_MSR_OFFSET 0x20 | ||
198 | |||
199 | /* NHM-EX Bbox */ | ||
200 | #define NHMEX_B0_MSR_PMON_GLOBAL_CTL 0xc20 | ||
201 | #define NHMEX_B0_MSR_PMON_CTR0 0xc31 | ||
202 | #define NHMEX_B0_MSR_PMON_CTL0 0xc30 | ||
203 | #define NHMEX_B_MSR_OFFSET 0x40 | ||
204 | #define NHMEX_B0_MSR_MATCH 0xe45 | ||
205 | #define NHMEX_B0_MSR_MASK 0xe46 | ||
206 | #define NHMEX_B1_MSR_MATCH 0xe4d | ||
207 | #define NHMEX_B1_MSR_MASK 0xe4e | ||
208 | |||
209 | #define NHMEX_B_PMON_CTL_EN (1 << 0) | ||
210 | #define NHMEX_B_PMON_CTL_EV_SEL_SHIFT 1 | ||
211 | #define NHMEX_B_PMON_CTL_EV_SEL_MASK \ | ||
212 | (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT) | ||
213 | #define NHMEX_B_PMON_CTR_SHIFT 6 | ||
214 | #define NHMEX_B_PMON_CTR_MASK \ | ||
215 | (0x3 << NHMEX_B_PMON_CTR_SHIFT) | ||
216 | #define NHMEX_B_PMON_RAW_EVENT_MASK \ | ||
217 | (NHMEX_B_PMON_CTL_EV_SEL_MASK | \ | ||
218 | NHMEX_B_PMON_CTR_MASK) | ||
219 | |||
220 | /* NHM-EX Sbox */ | ||
221 | #define NHMEX_S0_MSR_PMON_GLOBAL_CTL 0xc40 | ||
222 | #define NHMEX_S0_MSR_PMON_CTR0 0xc51 | ||
223 | #define NHMEX_S0_MSR_PMON_CTL0 0xc50 | ||
224 | #define NHMEX_S_MSR_OFFSET 0x80 | ||
225 | #define NHMEX_S0_MSR_MM_CFG 0xe48 | ||
226 | #define NHMEX_S0_MSR_MATCH 0xe49 | ||
227 | #define NHMEX_S0_MSR_MASK 0xe4a | ||
228 | #define NHMEX_S1_MSR_MM_CFG 0xe58 | ||
229 | #define NHMEX_S1_MSR_MATCH 0xe59 | ||
230 | #define NHMEX_S1_MSR_MASK 0xe5a | ||
231 | |||
232 | #define NHMEX_S_PMON_MM_CFG_EN (0x1ULL << 63) | ||
233 | |||
234 | /* NHM-EX Mbox */ | ||
235 | #define NHMEX_M0_MSR_GLOBAL_CTL 0xca0 | ||
236 | #define NHMEX_M0_MSR_PMU_DSP 0xca5 | ||
237 | #define NHMEX_M0_MSR_PMU_ISS 0xca6 | ||
238 | #define NHMEX_M0_MSR_PMU_MAP 0xca7 | ||
239 | #define NHMEX_M0_MSR_PMU_MSC_THR 0xca8 | ||
240 | #define NHMEX_M0_MSR_PMU_PGT 0xca9 | ||
241 | #define NHMEX_M0_MSR_PMU_PLD 0xcaa | ||
242 | #define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC 0xcab | ||
243 | #define NHMEX_M0_MSR_PMU_CTL0 0xcb0 | ||
244 | #define NHMEX_M0_MSR_PMU_CNT0 0xcb1 | ||
245 | #define NHMEX_M_MSR_OFFSET 0x40 | ||
246 | #define NHMEX_M0_MSR_PMU_MM_CFG 0xe54 | ||
247 | #define NHMEX_M1_MSR_PMU_MM_CFG 0xe5c | ||
248 | |||
249 | #define NHMEX_M_PMON_MM_CFG_EN (1ULL << 63) | ||
250 | #define NHMEX_M_PMON_ADDR_MATCH_MASK 0x3ffffffffULL | ||
251 | #define NHMEX_M_PMON_ADDR_MASK_MASK 0x7ffffffULL | ||
252 | #define NHMEX_M_PMON_ADDR_MASK_SHIFT 34 | ||
253 | |||
254 | #define NHMEX_M_PMON_CTL_EN (1 << 0) | ||
255 | #define NHMEX_M_PMON_CTL_PMI_EN (1 << 1) | ||
256 | #define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT 2 | ||
257 | #define NHMEX_M_PMON_CTL_COUNT_MODE_MASK \ | ||
258 | (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT) | ||
259 | #define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT 4 | ||
260 | #define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK \ | ||
261 | (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT) | ||
262 | #define NHMEX_M_PMON_CTL_WRAP_MODE (1 << 6) | ||
263 | #define NHMEX_M_PMON_CTL_FLAG_MODE (1 << 7) | ||
264 | #define NHMEX_M_PMON_CTL_INC_SEL_SHIFT 9 | ||
265 | #define NHMEX_M_PMON_CTL_INC_SEL_MASK \ | ||
266 | (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT) | ||
267 | #define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT 19 | ||
268 | #define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK \ | ||
269 | (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | ||
270 | #define NHMEX_M_PMON_RAW_EVENT_MASK \ | ||
271 | (NHMEX_M_PMON_CTL_COUNT_MODE_MASK | \ | ||
272 | NHMEX_M_PMON_CTL_STORAGE_MODE_MASK | \ | ||
273 | NHMEX_M_PMON_CTL_WRAP_MODE | \ | ||
274 | NHMEX_M_PMON_CTL_FLAG_MODE | \ | ||
275 | NHMEX_M_PMON_CTL_INC_SEL_MASK | \ | ||
276 | NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK) | ||
277 | |||
278 | |||
279 | #define NHMEX_M_PMON_ZDP_CTL_FVC_FVID_MASK 0x1f | ||
280 | #define NHMEX_M_PMON_ZDP_CTL_FVC_BCMD_MASK (0x7 << 5) | ||
281 | #define NHMEX_M_PMON_ZDP_CTL_FVC_RSP_MASK (0x7 << 8) | ||
282 | #define NHMEX_M_PMON_ZDP_CTL_FVC_PBOX_INIT_ERR (1 << 23) | ||
283 | #define NHMEX_M_PMON_ZDP_CTL_FVC_MASK \ | ||
284 | (NHMEX_M_PMON_ZDP_CTL_FVC_FVID_MASK | \ | ||
285 | NHMEX_M_PMON_ZDP_CTL_FVC_BCMD_MASK | \ | ||
286 | NHMEX_M_PMON_ZDP_CTL_FVC_RSP_MASK | \ | ||
287 | NHMEX_M_PMON_ZDP_CTL_FVC_PBOX_INIT_ERR) | ||
288 | #define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (11 + 3 * (n))) | ||
289 | |||
290 | /* | ||
291 | * use the 9~13 bits to select event If the 7th bit is not set, | ||
292 | * otherwise use the 19~21 bits to select event. | ||
293 | */ | ||
294 | #define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT) | ||
295 | #define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \ | ||
296 | NHMEX_M_PMON_CTL_FLAG_MODE) | ||
297 | #define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \ | ||
298 | NHMEX_M_PMON_CTL_FLAG_MODE) | ||
299 | #define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \ | ||
300 | NHMEX_M_PMON_CTL_FLAG_MODE) | ||
301 | #define MBOX_INC_SEL_EXTAR_REG(c, r) \ | ||
302 | EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \ | ||
303 | MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r) | ||
304 | #define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \ | ||
305 | EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \ | ||
306 | MBOX_SET_FLAG_SEL_MASK, \ | ||
307 | (u64)-1, NHMEX_M_##r) | ||
308 | |||
309 | /* NHM-EX Rbox */ | ||
310 | #define NHMEX_R_MSR_GLOBAL_CTL 0xe00 | ||
311 | #define NHMEX_R_MSR_PMON_CTL0 0xe10 | ||
312 | #define NHMEX_R_MSR_PMON_CNT0 0xe11 | ||
313 | #define NHMEX_R_MSR_OFFSET 0x20 | ||
314 | |||
315 | #define NHMEX_R_MSR_PORTN_QLX_CFG(n) \ | ||
316 | ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4)) | ||
317 | #define NHMEX_R_MSR_PORTN_IPERF_CFG0(n) (0xe04 + (n)) | ||
318 | #define NHMEX_R_MSR_PORTN_IPERF_CFG1(n) (0xe24 + (n)) | ||
319 | #define NHMEX_R_MSR_PORTN_XBR_OFFSET(n) \ | ||
320 | (((n) < 4 ? 0 : 0x10) + (n) * 4) | ||
321 | #define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) \ | ||
322 | (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n)) | ||
323 | #define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n) \ | ||
324 | (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1) | ||
325 | #define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n) \ | ||
326 | (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2) | ||
327 | #define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) \ | ||
328 | (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n)) | ||
329 | #define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n) \ | ||
330 | (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1) | ||
331 | #define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n) \ | ||
332 | (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2) | ||
333 | |||
334 | #define NHMEX_R_PMON_CTL_EN (1 << 0) | ||
335 | #define NHMEX_R_PMON_CTL_EV_SEL_SHIFT 1 | ||
336 | #define NHMEX_R_PMON_CTL_EV_SEL_MASK \ | ||
337 | (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT) | ||
338 | #define NHMEX_R_PMON_CTL_PMI_EN (1 << 6) | ||
339 | #define NHMEX_R_PMON_RAW_EVENT_MASK NHMEX_R_PMON_CTL_EV_SEL_MASK | ||
340 | |||
341 | /* NHM-EX Wbox */ | ||
342 | #define NHMEX_W_MSR_GLOBAL_CTL 0xc80 | ||
343 | #define NHMEX_W_MSR_PMON_CNT0 0xc90 | ||
344 | #define NHMEX_W_MSR_PMON_EVT_SEL0 0xc91 | ||
345 | #define NHMEX_W_MSR_PMON_FIXED_CTR 0x394 | ||
346 | #define NHMEX_W_MSR_PMON_FIXED_CTL 0x395 | ||
347 | |||
348 | #define NHMEX_W_PMON_GLOBAL_FIXED_EN (1ULL << 31) | ||
349 | |||
350 | struct intel_uncore_ops; | ||
351 | struct intel_uncore_pmu; | ||
352 | struct intel_uncore_box; | ||
353 | struct uncore_event_desc; | ||
354 | |||
355 | struct intel_uncore_type { | ||
356 | const char *name; | ||
357 | int num_counters; | ||
358 | int num_boxes; | ||
359 | int perf_ctr_bits; | ||
360 | int fixed_ctr_bits; | ||
361 | unsigned perf_ctr; | ||
362 | unsigned event_ctl; | ||
363 | unsigned event_mask; | ||
364 | unsigned fixed_ctr; | ||
365 | unsigned fixed_ctl; | ||
366 | unsigned box_ctl; | ||
367 | unsigned msr_offset; | ||
368 | unsigned num_shared_regs:8; | ||
369 | unsigned single_fixed:1; | ||
370 | unsigned pair_ctr_ctl:1; | ||
371 | struct event_constraint unconstrainted; | ||
372 | struct event_constraint *constraints; | ||
373 | struct intel_uncore_pmu *pmus; | ||
374 | struct intel_uncore_ops *ops; | ||
375 | struct uncore_event_desc *event_descs; | ||
376 | const struct attribute_group *attr_groups[3]; | ||
377 | }; | ||
378 | |||
379 | #define format_group attr_groups[0] | ||
380 | |||
381 | struct intel_uncore_ops { | ||
382 | void (*init_box)(struct intel_uncore_box *); | ||
383 | void (*disable_box)(struct intel_uncore_box *); | ||
384 | void (*enable_box)(struct intel_uncore_box *); | ||
385 | void (*disable_event)(struct intel_uncore_box *, struct perf_event *); | ||
386 | void (*enable_event)(struct intel_uncore_box *, struct perf_event *); | ||
387 | u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *); | ||
388 | int (*hw_config)(struct intel_uncore_box *, struct perf_event *); | ||
389 | struct event_constraint *(*get_constraint)(struct intel_uncore_box *, | ||
390 | struct perf_event *); | ||
391 | void (*put_constraint)(struct intel_uncore_box *, struct perf_event *); | ||
392 | }; | ||
393 | |||
394 | struct intel_uncore_pmu { | ||
395 | struct pmu pmu; | ||
396 | char name[UNCORE_PMU_NAME_LEN]; | ||
397 | int pmu_idx; | ||
398 | int func_id; | ||
399 | struct intel_uncore_type *type; | ||
400 | struct intel_uncore_box ** __percpu box; | ||
401 | struct list_head box_list; | ||
402 | }; | ||
403 | |||
404 | struct intel_uncore_extra_reg { | ||
405 | raw_spinlock_t lock; | ||
406 | u64 config, config1, config2; | ||
407 | atomic_t ref; | ||
408 | }; | ||
409 | |||
410 | struct intel_uncore_box { | ||
411 | int phys_id; | ||
412 | int n_active; /* number of active events */ | ||
413 | int n_events; | ||
414 | int cpu; /* cpu to collect events */ | ||
415 | unsigned long flags; | ||
416 | atomic_t refcnt; | ||
417 | struct perf_event *events[UNCORE_PMC_IDX_MAX]; | ||
418 | struct perf_event *event_list[UNCORE_PMC_IDX_MAX]; | ||
419 | unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; | ||
420 | u64 tags[UNCORE_PMC_IDX_MAX]; | ||
421 | struct pci_dev *pci_dev; | ||
422 | struct intel_uncore_pmu *pmu; | ||
423 | struct hrtimer hrtimer; | ||
424 | struct list_head list; | ||
425 | struct intel_uncore_extra_reg shared_regs[0]; | ||
426 | }; | ||
427 | |||
428 | #define UNCORE_BOX_FLAG_INITIATED 0 | ||
429 | |||
430 | struct uncore_event_desc { | ||
431 | struct kobj_attribute attr; | ||
432 | const char *config; | ||
433 | }; | ||
434 | |||
435 | #define INTEL_UNCORE_EVENT_DESC(_name, _config) \ | ||
436 | { \ | ||
437 | .attr = __ATTR(_name, 0444, uncore_event_show, NULL), \ | ||
438 | .config = _config, \ | ||
439 | } | ||
440 | |||
441 | #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \ | ||
442 | static ssize_t __uncore_##_var##_show(struct kobject *kobj, \ | ||
443 | struct kobj_attribute *attr, \ | ||
444 | char *page) \ | ||
445 | { \ | ||
446 | BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ | ||
447 | return sprintf(page, _format "\n"); \ | ||
448 | } \ | ||
449 | static struct kobj_attribute format_attr_##_var = \ | ||
450 | __ATTR(_name, 0444, __uncore_##_var##_show, NULL) | ||
451 | |||
452 | |||
453 | static ssize_t uncore_event_show(struct kobject *kobj, | ||
454 | struct kobj_attribute *attr, char *buf) | ||
455 | { | ||
456 | struct uncore_event_desc *event = | ||
457 | container_of(attr, struct uncore_event_desc, attr); | ||
458 | return sprintf(buf, "%s", event->config); | ||
459 | } | ||
460 | |||
461 | static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box) | ||
462 | { | ||
463 | return box->pmu->type->box_ctl; | ||
464 | } | ||
465 | |||
466 | static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box) | ||
467 | { | ||
468 | return box->pmu->type->fixed_ctl; | ||
469 | } | ||
470 | |||
471 | static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box) | ||
472 | { | ||
473 | return box->pmu->type->fixed_ctr; | ||
474 | } | ||
475 | |||
476 | static inline | ||
477 | unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx) | ||
478 | { | ||
479 | return idx * 4 + box->pmu->type->event_ctl; | ||
480 | } | ||
481 | |||
482 | static inline | ||
483 | unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx) | ||
484 | { | ||
485 | return idx * 8 + box->pmu->type->perf_ctr; | ||
486 | } | ||
487 | |||
488 | static inline | ||
489 | unsigned uncore_msr_box_ctl(struct intel_uncore_box *box) | ||
490 | { | ||
491 | if (!box->pmu->type->box_ctl) | ||
492 | return 0; | ||
493 | return box->pmu->type->box_ctl + | ||
494 | box->pmu->type->msr_offset * box->pmu->pmu_idx; | ||
495 | } | ||
496 | |||
497 | static inline | ||
498 | unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box) | ||
499 | { | ||
500 | if (!box->pmu->type->fixed_ctl) | ||
501 | return 0; | ||
502 | return box->pmu->type->fixed_ctl + | ||
503 | box->pmu->type->msr_offset * box->pmu->pmu_idx; | ||
504 | } | ||
505 | |||
506 | static inline | ||
507 | unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box) | ||
508 | { | ||
509 | return box->pmu->type->fixed_ctr + | ||
510 | box->pmu->type->msr_offset * box->pmu->pmu_idx; | ||
511 | } | ||
512 | |||
513 | static inline | ||
514 | unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx) | ||
515 | { | ||
516 | return box->pmu->type->event_ctl + | ||
517 | (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + | ||
518 | box->pmu->type->msr_offset * box->pmu->pmu_idx; | ||
519 | } | ||
520 | |||
521 | static inline | ||
522 | unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx) | ||
523 | { | ||
524 | return box->pmu->type->perf_ctr + | ||
525 | (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + | ||
526 | box->pmu->type->msr_offset * box->pmu->pmu_idx; | ||
527 | } | ||
528 | |||
529 | static inline | ||
530 | unsigned uncore_fixed_ctl(struct intel_uncore_box *box) | ||
531 | { | ||
532 | if (box->pci_dev) | ||
533 | return uncore_pci_fixed_ctl(box); | ||
534 | else | ||
535 | return uncore_msr_fixed_ctl(box); | ||
536 | } | ||
537 | |||
538 | static inline | ||
539 | unsigned uncore_fixed_ctr(struct intel_uncore_box *box) | ||
540 | { | ||
541 | if (box->pci_dev) | ||
542 | return uncore_pci_fixed_ctr(box); | ||
543 | else | ||
544 | return uncore_msr_fixed_ctr(box); | ||
545 | } | ||
546 | |||
547 | static inline | ||
548 | unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx) | ||
549 | { | ||
550 | if (box->pci_dev) | ||
551 | return uncore_pci_event_ctl(box, idx); | ||
552 | else | ||
553 | return uncore_msr_event_ctl(box, idx); | ||
554 | } | ||
555 | |||
556 | static inline | ||
557 | unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx) | ||
558 | { | ||
559 | if (box->pci_dev) | ||
560 | return uncore_pci_perf_ctr(box, idx); | ||
561 | else | ||
562 | return uncore_msr_perf_ctr(box, idx); | ||
563 | } | ||
564 | |||
565 | static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box) | ||
566 | { | ||
567 | return box->pmu->type->perf_ctr_bits; | ||
568 | } | ||
569 | |||
570 | static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box) | ||
571 | { | ||
572 | return box->pmu->type->fixed_ctr_bits; | ||
573 | } | ||
574 | |||
575 | static inline int uncore_num_counters(struct intel_uncore_box *box) | ||
576 | { | ||
577 | return box->pmu->type->num_counters; | ||
578 | } | ||
579 | |||
580 | static inline void uncore_disable_box(struct intel_uncore_box *box) | ||
581 | { | ||
582 | if (box->pmu->type->ops->disable_box) | ||
583 | box->pmu->type->ops->disable_box(box); | ||
584 | } | ||
585 | |||
586 | static inline void uncore_enable_box(struct intel_uncore_box *box) | ||
587 | { | ||
588 | if (box->pmu->type->ops->enable_box) | ||
589 | box->pmu->type->ops->enable_box(box); | ||
590 | } | ||
591 | |||
592 | static inline void uncore_disable_event(struct intel_uncore_box *box, | ||
593 | struct perf_event *event) | ||
594 | { | ||
595 | box->pmu->type->ops->disable_event(box, event); | ||
596 | } | ||
597 | |||
598 | static inline void uncore_enable_event(struct intel_uncore_box *box, | ||
599 | struct perf_event *event) | ||
600 | { | ||
601 | box->pmu->type->ops->enable_event(box, event); | ||
602 | } | ||
603 | |||
604 | static inline u64 uncore_read_counter(struct intel_uncore_box *box, | ||
605 | struct perf_event *event) | ||
606 | { | ||
607 | return box->pmu->type->ops->read_counter(box, event); | ||
608 | } | ||
609 | |||
610 | static inline void uncore_box_init(struct intel_uncore_box *box) | ||
611 | { | ||
612 | if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { | ||
613 | if (box->pmu->type->ops->init_box) | ||
614 | box->pmu->type->ops->init_box(box); | ||
615 | } | ||
616 | } | ||
617 | |||
618 | static inline bool uncore_box_is_fake(struct intel_uncore_box *box) | ||
619 | { | ||
620 | return (box->phys_id < 0); | ||
621 | } | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 47124a73dd73..92c7e39a079f 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -895,8 +895,8 @@ static void p4_pmu_disable_pebs(void) | |||
895 | * So at moment let leave metrics turned on forever -- it's | 895 | * So at moment let leave metrics turned on forever -- it's |
896 | * ok for now but need to be revisited! | 896 | * ok for now but need to be revisited! |
897 | * | 897 | * |
898 | * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0); | 898 | * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)0); |
899 | * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0); | 899 | * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)0); |
900 | */ | 900 | */ |
901 | } | 901 | } |
902 | 902 | ||
@@ -909,7 +909,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event) | |||
909 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | 909 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get |
910 | * asserted again and again | 910 | * asserted again and again |
911 | */ | 911 | */ |
912 | (void)checking_wrmsrl(hwc->config_base, | 912 | (void)wrmsrl_safe(hwc->config_base, |
913 | (u64)(p4_config_unpack_cccr(hwc->config)) & | 913 | (u64)(p4_config_unpack_cccr(hwc->config)) & |
914 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); | 914 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); |
915 | } | 915 | } |
@@ -943,8 +943,8 @@ static void p4_pmu_enable_pebs(u64 config) | |||
943 | 943 | ||
944 | bind = &p4_pebs_bind_map[idx]; | 944 | bind = &p4_pebs_bind_map[idx]; |
945 | 945 | ||
946 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); | 946 | (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); |
947 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); | 947 | (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); |
948 | } | 948 | } |
949 | 949 | ||
950 | static void p4_pmu_enable_event(struct perf_event *event) | 950 | static void p4_pmu_enable_event(struct perf_event *event) |
@@ -978,8 +978,8 @@ static void p4_pmu_enable_event(struct perf_event *event) | |||
978 | */ | 978 | */ |
979 | p4_pmu_enable_pebs(hwc->config); | 979 | p4_pmu_enable_pebs(hwc->config); |
980 | 980 | ||
981 | (void)checking_wrmsrl(escr_addr, escr_conf); | 981 | (void)wrmsrl_safe(escr_addr, escr_conf); |
982 | (void)checking_wrmsrl(hwc->config_base, | 982 | (void)wrmsrl_safe(hwc->config_base, |
983 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); | 983 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); |
984 | } | 984 | } |
985 | 985 | ||
@@ -1325,7 +1325,7 @@ __init int p4_pmu_init(void) | |||
1325 | unsigned int low, high; | 1325 | unsigned int low, high; |
1326 | 1326 | ||
1327 | /* If we get stripped -- indexing fails */ | 1327 | /* If we get stripped -- indexing fails */ |
1328 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); | 1328 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC); |
1329 | 1329 | ||
1330 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | 1330 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); |
1331 | if (!(low & (1 << 7))) { | 1331 | if (!(low & (1 << 7))) { |
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 32bcfc7dd230..e4dd0f7a0453 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -71,7 +71,7 @@ p6_pmu_disable_event(struct perf_event *event) | |||
71 | if (cpuc->enabled) | 71 | if (cpuc->enabled) |
72 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | 72 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
73 | 73 | ||
74 | (void)checking_wrmsrl(hwc->config_base, val); | 74 | (void)wrmsrl_safe(hwc->config_base, val); |
75 | } | 75 | } |
76 | 76 | ||
77 | static void p6_pmu_enable_event(struct perf_event *event) | 77 | static void p6_pmu_enable_event(struct perf_event *event) |
@@ -84,7 +84,7 @@ static void p6_pmu_enable_event(struct perf_event *event) | |||
84 | if (cpuc->enabled) | 84 | if (cpuc->enabled) |
85 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | 85 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
86 | 86 | ||
87 | (void)checking_wrmsrl(hwc->config_base, val); | 87 | (void)wrmsrl_safe(hwc->config_base, val); |
88 | } | 88 | } |
89 | 89 | ||
90 | PMU_FORMAT_ATTR(event, "config:0-7" ); | 90 | PMU_FORMAT_ATTR(event, "config:0-7" ); |
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c deleted file mode 100644 index a640ae5ad201..000000000000 --- a/arch/x86/kernel/cpu/sched.c +++ /dev/null | |||
@@ -1,55 +0,0 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/math64.h> | ||
3 | #include <linux/percpu.h> | ||
4 | #include <linux/irqflags.h> | ||
5 | |||
6 | #include <asm/cpufeature.h> | ||
7 | #include <asm/processor.h> | ||
8 | |||
9 | #ifdef CONFIG_SMP | ||
10 | |||
11 | static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched); | ||
12 | |||
13 | static unsigned long scale_aperfmperf(void) | ||
14 | { | ||
15 | struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched); | ||
16 | unsigned long ratio, flags; | ||
17 | |||
18 | local_irq_save(flags); | ||
19 | get_aperfmperf(&val); | ||
20 | local_irq_restore(flags); | ||
21 | |||
22 | ratio = calc_aperfmperf_ratio(old, &val); | ||
23 | *old = val; | ||
24 | |||
25 | return ratio; | ||
26 | } | ||
27 | |||
28 | unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) | ||
29 | { | ||
30 | /* | ||
31 | * do aperf/mperf on the cpu level because it includes things | ||
32 | * like turbo mode, which are relevant to full cores. | ||
33 | */ | ||
34 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
35 | return scale_aperfmperf(); | ||
36 | |||
37 | /* | ||
38 | * maybe have something cpufreq here | ||
39 | */ | ||
40 | |||
41 | return default_scale_freq_power(sd, cpu); | ||
42 | } | ||
43 | |||
44 | unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu) | ||
45 | { | ||
46 | /* | ||
47 | * aperf/mperf already includes the smt gain | ||
48 | */ | ||
49 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
50 | return SCHED_LOAD_SCALE; | ||
51 | |||
52 | return default_scale_smt_power(sd, cpu); | ||
53 | } | ||
54 | |||
55 | #endif | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 571246d81edf..ae42418bc50f 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -27,8 +27,8 @@ static int die_counter; | |||
27 | 27 | ||
28 | void printk_address(unsigned long address, int reliable) | 28 | void printk_address(unsigned long address, int reliable) |
29 | { | 29 | { |
30 | printk(" [<%p>] %s%pB\n", (void *) address, | 30 | pr_cont(" [<%p>] %s%pB\n", |
31 | reliable ? "" : "? ", (void *) address); | 31 | (void *)address, reliable ? "" : "? ", (void *)address); |
32 | } | 32 | } |
33 | 33 | ||
34 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 34 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
@@ -271,6 +271,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) | |||
271 | current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) | 271 | current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) |
272 | return 1; | 272 | return 1; |
273 | 273 | ||
274 | print_modules(); | ||
274 | show_regs(regs); | 275 | show_regs(regs); |
275 | #ifdef CONFIG_X86_32 | 276 | #ifdef CONFIG_X86_32 |
276 | if (user_mode_vm(regs)) { | 277 | if (user_mode_vm(regs)) { |
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index e0b1d783daab..1038a417ea53 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -73,11 +73,11 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
73 | if (kstack_end(stack)) | 73 | if (kstack_end(stack)) |
74 | break; | 74 | break; |
75 | if (i && ((i % STACKSLOTS_PER_LINE) == 0)) | 75 | if (i && ((i % STACKSLOTS_PER_LINE) == 0)) |
76 | printk(KERN_CONT "\n"); | 76 | pr_cont("\n"); |
77 | printk(KERN_CONT " %08lx", *stack++); | 77 | pr_cont(" %08lx", *stack++); |
78 | touch_nmi_watchdog(); | 78 | touch_nmi_watchdog(); |
79 | } | 79 | } |
80 | printk(KERN_CONT "\n"); | 80 | pr_cont("\n"); |
81 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 81 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
82 | } | 82 | } |
83 | 83 | ||
@@ -86,12 +86,11 @@ void show_regs(struct pt_regs *regs) | |||
86 | { | 86 | { |
87 | int i; | 87 | int i; |
88 | 88 | ||
89 | print_modules(); | ||
90 | __show_regs(regs, !user_mode_vm(regs)); | 89 | __show_regs(regs, !user_mode_vm(regs)); |
91 | 90 | ||
92 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", | 91 | pr_emerg("Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", |
93 | TASK_COMM_LEN, current->comm, task_pid_nr(current), | 92 | TASK_COMM_LEN, current->comm, task_pid_nr(current), |
94 | current_thread_info(), current, task_thread_info(current)); | 93 | current_thread_info(), current, task_thread_info(current)); |
95 | /* | 94 | /* |
96 | * When in-kernel, we also print out the stack and code at the | 95 | * When in-kernel, we also print out the stack and code at the |
97 | * time of the fault.. | 96 | * time of the fault.. |
@@ -102,10 +101,10 @@ void show_regs(struct pt_regs *regs) | |||
102 | unsigned char c; | 101 | unsigned char c; |
103 | u8 *ip; | 102 | u8 *ip; |
104 | 103 | ||
105 | printk(KERN_EMERG "Stack:\n"); | 104 | pr_emerg("Stack:\n"); |
106 | show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); | 105 | show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); |
107 | 106 | ||
108 | printk(KERN_EMERG "Code: "); | 107 | pr_emerg("Code:"); |
109 | 108 | ||
110 | ip = (u8 *)regs->ip - code_prologue; | 109 | ip = (u8 *)regs->ip - code_prologue; |
111 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { | 110 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { |
@@ -116,16 +115,16 @@ void show_regs(struct pt_regs *regs) | |||
116 | for (i = 0; i < code_len; i++, ip++) { | 115 | for (i = 0; i < code_len; i++, ip++) { |
117 | if (ip < (u8 *)PAGE_OFFSET || | 116 | if (ip < (u8 *)PAGE_OFFSET || |
118 | probe_kernel_address(ip, c)) { | 117 | probe_kernel_address(ip, c)) { |
119 | printk(KERN_CONT " Bad EIP value."); | 118 | pr_cont(" Bad EIP value."); |
120 | break; | 119 | break; |
121 | } | 120 | } |
122 | if (ip == (u8 *)regs->ip) | 121 | if (ip == (u8 *)regs->ip) |
123 | printk(KERN_CONT "<%02x> ", c); | 122 | pr_cont(" <%02x>", c); |
124 | else | 123 | else |
125 | printk(KERN_CONT "%02x ", c); | 124 | pr_cont(" %02x", c); |
126 | } | 125 | } |
127 | } | 126 | } |
128 | printk(KERN_CONT "\n"); | 127 | pr_cont("\n"); |
129 | } | 128 | } |
130 | 129 | ||
131 | int is_valid_bugaddr(unsigned long ip) | 130 | int is_valid_bugaddr(unsigned long ip) |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 791b76122aa8..b653675d5288 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -228,20 +228,20 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
228 | if (stack >= irq_stack && stack <= irq_stack_end) { | 228 | if (stack >= irq_stack && stack <= irq_stack_end) { |
229 | if (stack == irq_stack_end) { | 229 | if (stack == irq_stack_end) { |
230 | stack = (unsigned long *) (irq_stack_end[-1]); | 230 | stack = (unsigned long *) (irq_stack_end[-1]); |
231 | printk(KERN_CONT " <EOI> "); | 231 | pr_cont(" <EOI> "); |
232 | } | 232 | } |
233 | } else { | 233 | } else { |
234 | if (((long) stack & (THREAD_SIZE-1)) == 0) | 234 | if (((long) stack & (THREAD_SIZE-1)) == 0) |
235 | break; | 235 | break; |
236 | } | 236 | } |
237 | if (i && ((i % STACKSLOTS_PER_LINE) == 0)) | 237 | if (i && ((i % STACKSLOTS_PER_LINE) == 0)) |
238 | printk(KERN_CONT "\n"); | 238 | pr_cont("\n"); |
239 | printk(KERN_CONT " %016lx", *stack++); | 239 | pr_cont(" %016lx", *stack++); |
240 | touch_nmi_watchdog(); | 240 | touch_nmi_watchdog(); |
241 | } | 241 | } |
242 | preempt_enable(); | 242 | preempt_enable(); |
243 | 243 | ||
244 | printk(KERN_CONT "\n"); | 244 | pr_cont("\n"); |
245 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 245 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
246 | } | 246 | } |
247 | 247 | ||
@@ -254,10 +254,9 @@ void show_regs(struct pt_regs *regs) | |||
254 | 254 | ||
255 | sp = regs->sp; | 255 | sp = regs->sp; |
256 | printk("CPU %d ", cpu); | 256 | printk("CPU %d ", cpu); |
257 | print_modules(); | ||
258 | __show_regs(regs, 1); | 257 | __show_regs(regs, 1); |
259 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", | 258 | printk(KERN_DEFAULT "Process %s (pid: %d, threadinfo %p, task %p)\n", |
260 | cur->comm, cur->pid, task_thread_info(cur), cur); | 259 | cur->comm, cur->pid, task_thread_info(cur), cur); |
261 | 260 | ||
262 | /* | 261 | /* |
263 | * When in-kernel, we also print out the stack and code at the | 262 | * When in-kernel, we also print out the stack and code at the |
@@ -284,16 +283,16 @@ void show_regs(struct pt_regs *regs) | |||
284 | for (i = 0; i < code_len; i++, ip++) { | 283 | for (i = 0; i < code_len; i++, ip++) { |
285 | if (ip < (u8 *)PAGE_OFFSET || | 284 | if (ip < (u8 *)PAGE_OFFSET || |
286 | probe_kernel_address(ip, c)) { | 285 | probe_kernel_address(ip, c)) { |
287 | printk(KERN_CONT " Bad RIP value."); | 286 | pr_cont(" Bad RIP value."); |
288 | break; | 287 | break; |
289 | } | 288 | } |
290 | if (ip == (u8 *)regs->ip) | 289 | if (ip == (u8 *)regs->ip) |
291 | printk(KERN_CONT "<%02x> ", c); | 290 | pr_cont("<%02x> ", c); |
292 | else | 291 | else |
293 | printk(KERN_CONT "%02x ", c); | 292 | pr_cont("%02x ", c); |
294 | } | 293 | } |
295 | } | 294 | } |
296 | printk(KERN_CONT "\n"); | 295 | pr_cont("\n"); |
297 | } | 296 | } |
298 | 297 | ||
299 | int is_valid_bugaddr(unsigned long ip) | 298 | int is_valid_bugaddr(unsigned long ip) |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 41857970517f..ed858e9e9a74 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -944,7 +944,7 @@ void __init e820_reserve_resources(void) | |||
944 | for (i = 0; i < e820_saved.nr_map; i++) { | 944 | for (i = 0; i < e820_saved.nr_map; i++) { |
945 | struct e820entry *entry = &e820_saved.map[i]; | 945 | struct e820entry *entry = &e820_saved.map[i]; |
946 | firmware_map_add_early(entry->addr, | 946 | firmware_map_add_early(entry->addr, |
947 | entry->addr + entry->size - 1, | 947 | entry->addr + entry->size, |
948 | e820_type_to_string(entry->type)); | 948 | e820_type_to_string(entry->type)); |
949 | } | 949 | } |
950 | } | 950 | } |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7d65133b51be..69babd8c834f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1048,24 +1048,6 @@ apicinterrupt LOCAL_TIMER_VECTOR \ | |||
1048 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ | 1048 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ |
1049 | x86_platform_ipi smp_x86_platform_ipi | 1049 | x86_platform_ipi smp_x86_platform_ipi |
1050 | 1050 | ||
1051 | #ifdef CONFIG_SMP | ||
1052 | ALIGN | ||
1053 | INTR_FRAME | ||
1054 | .irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ | ||
1055 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 | ||
1056 | .if NUM_INVALIDATE_TLB_VECTORS > \idx | ||
1057 | ENTRY(invalidate_interrupt\idx) | ||
1058 | pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx) | ||
1059 | jmp .Lcommon_invalidate_interrupt0 | ||
1060 | CFI_ADJUST_CFA_OFFSET -8 | ||
1061 | END(invalidate_interrupt\idx) | ||
1062 | .endif | ||
1063 | .endr | ||
1064 | CFI_ENDPROC | ||
1065 | apicinterrupt INVALIDATE_TLB_VECTOR_START, \ | ||
1066 | invalidate_interrupt0, smp_invalidate_interrupt | ||
1067 | #endif | ||
1068 | |||
1069 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 1051 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
1070 | threshold_interrupt smp_threshold_interrupt | 1052 | threshold_interrupt smp_threshold_interrupt |
1071 | apicinterrupt THERMAL_APIC_VECTOR \ | 1053 | apicinterrupt THERMAL_APIC_VECTOR \ |
@@ -1758,10 +1740,30 @@ end_repeat_nmi: | |||
1758 | */ | 1740 | */ |
1759 | call save_paranoid | 1741 | call save_paranoid |
1760 | DEFAULT_FRAME 0 | 1742 | DEFAULT_FRAME 0 |
1743 | |||
1744 | /* | ||
1745 | * Save off the CR2 register. If we take a page fault in the NMI then | ||
1746 | * it could corrupt the CR2 value. If the NMI preempts a page fault | ||
1747 | * handler before it was able to read the CR2 register, and then the | ||
1748 | * NMI itself takes a page fault, the page fault that was preempted | ||
1749 | * will read the information from the NMI page fault and not the | ||
1750 | * origin fault. Save it off and restore it if it changes. | ||
1751 | * Use the r12 callee-saved register. | ||
1752 | */ | ||
1753 | movq %cr2, %r12 | ||
1754 | |||
1761 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ | 1755 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ |
1762 | movq %rsp,%rdi | 1756 | movq %rsp,%rdi |
1763 | movq $-1,%rsi | 1757 | movq $-1,%rsi |
1764 | call do_nmi | 1758 | call do_nmi |
1759 | |||
1760 | /* Did the NMI take a page fault? Restore cr2 if it did */ | ||
1761 | movq %cr2, %rcx | ||
1762 | cmpq %rcx, %r12 | ||
1763 | je 1f | ||
1764 | movq %r12, %cr2 | ||
1765 | 1: | ||
1766 | |||
1765 | testl %ebx,%ebx /* swapgs needed? */ | 1767 | testl %ebx,%ebx /* swapgs needed? */ |
1766 | jnz nmi_restore | 1768 | jnz nmi_restore |
1767 | nmi_swapgs: | 1769 | nmi_swapgs: |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3dafc6003b7c..1f5f1d5d2a02 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -294,9 +294,9 @@ void fixup_irqs(void) | |||
294 | raw_spin_unlock(&desc->lock); | 294 | raw_spin_unlock(&desc->lock); |
295 | 295 | ||
296 | if (break_affinity && set_affinity) | 296 | if (break_affinity && set_affinity) |
297 | printk("Broke affinity for irq %i\n", irq); | 297 | pr_notice("Broke affinity for irq %i\n", irq); |
298 | else if (!set_affinity) | 298 | else if (!set_affinity) |
299 | printk("Cannot set affinity for irq %i\n", irq); | 299 | pr_notice("Cannot set affinity for irq %i\n", irq); |
300 | } | 300 | } |
301 | 301 | ||
302 | /* | 302 | /* |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 252981afd6c4..6e03b0d69138 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -171,79 +171,6 @@ static void __init smp_intr_init(void) | |||
171 | */ | 171 | */ |
172 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); | 172 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); |
173 | 173 | ||
174 | /* IPIs for invalidation */ | ||
175 | #define ALLOC_INVTLB_VEC(NR) \ | ||
176 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \ | ||
177 | invalidate_interrupt##NR) | ||
178 | |||
179 | switch (NUM_INVALIDATE_TLB_VECTORS) { | ||
180 | default: | ||
181 | ALLOC_INVTLB_VEC(31); | ||
182 | case 31: | ||
183 | ALLOC_INVTLB_VEC(30); | ||
184 | case 30: | ||
185 | ALLOC_INVTLB_VEC(29); | ||
186 | case 29: | ||
187 | ALLOC_INVTLB_VEC(28); | ||
188 | case 28: | ||
189 | ALLOC_INVTLB_VEC(27); | ||
190 | case 27: | ||
191 | ALLOC_INVTLB_VEC(26); | ||
192 | case 26: | ||
193 | ALLOC_INVTLB_VEC(25); | ||
194 | case 25: | ||
195 | ALLOC_INVTLB_VEC(24); | ||
196 | case 24: | ||
197 | ALLOC_INVTLB_VEC(23); | ||
198 | case 23: | ||
199 | ALLOC_INVTLB_VEC(22); | ||
200 | case 22: | ||
201 | ALLOC_INVTLB_VEC(21); | ||
202 | case 21: | ||
203 | ALLOC_INVTLB_VEC(20); | ||
204 | case 20: | ||
205 | ALLOC_INVTLB_VEC(19); | ||
206 | case 19: | ||
207 | ALLOC_INVTLB_VEC(18); | ||
208 | case 18: | ||
209 | ALLOC_INVTLB_VEC(17); | ||
210 | case 17: | ||
211 | ALLOC_INVTLB_VEC(16); | ||
212 | case 16: | ||
213 | ALLOC_INVTLB_VEC(15); | ||
214 | case 15: | ||
215 | ALLOC_INVTLB_VEC(14); | ||
216 | case 14: | ||
217 | ALLOC_INVTLB_VEC(13); | ||
218 | case 13: | ||
219 | ALLOC_INVTLB_VEC(12); | ||
220 | case 12: | ||
221 | ALLOC_INVTLB_VEC(11); | ||
222 | case 11: | ||
223 | ALLOC_INVTLB_VEC(10); | ||
224 | case 10: | ||
225 | ALLOC_INVTLB_VEC(9); | ||
226 | case 9: | ||
227 | ALLOC_INVTLB_VEC(8); | ||
228 | case 8: | ||
229 | ALLOC_INVTLB_VEC(7); | ||
230 | case 7: | ||
231 | ALLOC_INVTLB_VEC(6); | ||
232 | case 6: | ||
233 | ALLOC_INVTLB_VEC(5); | ||
234 | case 5: | ||
235 | ALLOC_INVTLB_VEC(4); | ||
236 | case 4: | ||
237 | ALLOC_INVTLB_VEC(3); | ||
238 | case 3: | ||
239 | ALLOC_INVTLB_VEC(2); | ||
240 | case 2: | ||
241 | ALLOC_INVTLB_VEC(1); | ||
242 | case 1: | ||
243 | ALLOC_INVTLB_VEC(0); | ||
244 | break; | ||
245 | } | ||
246 | |||
247 | /* IPI for generic function call */ | 174 | /* IPI for generic function call */ |
248 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 175 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
249 | 176 | ||
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e554e5ad2fe8..c1d61ee4b4f1 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -39,6 +39,9 @@ | |||
39 | #include <asm/desc.h> | 39 | #include <asm/desc.h> |
40 | #include <asm/tlbflush.h> | 40 | #include <asm/tlbflush.h> |
41 | #include <asm/idle.h> | 41 | #include <asm/idle.h> |
42 | #include <asm/apic.h> | ||
43 | #include <asm/apicdef.h> | ||
44 | #include <asm/hypervisor.h> | ||
42 | 45 | ||
43 | static int kvmapf = 1; | 46 | static int kvmapf = 1; |
44 | 47 | ||
@@ -283,6 +286,22 @@ static void kvm_register_steal_time(void) | |||
283 | cpu, __pa(st)); | 286 | cpu, __pa(st)); |
284 | } | 287 | } |
285 | 288 | ||
289 | static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; | ||
290 | |||
291 | static void kvm_guest_apic_eoi_write(u32 reg, u32 val) | ||
292 | { | ||
293 | /** | ||
294 | * This relies on __test_and_clear_bit to modify the memory | ||
295 | * in a way that is atomic with respect to the local CPU. | ||
296 | * The hypervisor only accesses this memory from the local CPU so | ||
297 | * there's no need for lock or memory barriers. | ||
298 | * An optimization barrier is implied in apic write. | ||
299 | */ | ||
300 | if (__test_and_clear_bit(KVM_PV_EOI_BIT, &__get_cpu_var(kvm_apic_eoi))) | ||
301 | return; | ||
302 | apic_write(APIC_EOI, APIC_EOI_ACK); | ||
303 | } | ||
304 | |||
286 | void __cpuinit kvm_guest_cpu_init(void) | 305 | void __cpuinit kvm_guest_cpu_init(void) |
287 | { | 306 | { |
288 | if (!kvm_para_available()) | 307 | if (!kvm_para_available()) |
@@ -300,11 +319,20 @@ void __cpuinit kvm_guest_cpu_init(void) | |||
300 | smp_processor_id()); | 319 | smp_processor_id()); |
301 | } | 320 | } |
302 | 321 | ||
322 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) { | ||
323 | unsigned long pa; | ||
324 | /* Size alignment is implied but just to make it explicit. */ | ||
325 | BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); | ||
326 | __get_cpu_var(kvm_apic_eoi) = 0; | ||
327 | pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED; | ||
328 | wrmsrl(MSR_KVM_PV_EOI_EN, pa); | ||
329 | } | ||
330 | |||
303 | if (has_steal_clock) | 331 | if (has_steal_clock) |
304 | kvm_register_steal_time(); | 332 | kvm_register_steal_time(); |
305 | } | 333 | } |
306 | 334 | ||
307 | static void kvm_pv_disable_apf(void *unused) | 335 | static void kvm_pv_disable_apf(void) |
308 | { | 336 | { |
309 | if (!__get_cpu_var(apf_reason).enabled) | 337 | if (!__get_cpu_var(apf_reason).enabled) |
310 | return; | 338 | return; |
@@ -316,11 +344,23 @@ static void kvm_pv_disable_apf(void *unused) | |||
316 | smp_processor_id()); | 344 | smp_processor_id()); |
317 | } | 345 | } |
318 | 346 | ||
347 | static void kvm_pv_guest_cpu_reboot(void *unused) | ||
348 | { | ||
349 | /* | ||
350 | * We disable PV EOI before we load a new kernel by kexec, | ||
351 | * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory. | ||
352 | * New kernel can re-enable when it boots. | ||
353 | */ | ||
354 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) | ||
355 | wrmsrl(MSR_KVM_PV_EOI_EN, 0); | ||
356 | kvm_pv_disable_apf(); | ||
357 | } | ||
358 | |||
319 | static int kvm_pv_reboot_notify(struct notifier_block *nb, | 359 | static int kvm_pv_reboot_notify(struct notifier_block *nb, |
320 | unsigned long code, void *unused) | 360 | unsigned long code, void *unused) |
321 | { | 361 | { |
322 | if (code == SYS_RESTART) | 362 | if (code == SYS_RESTART) |
323 | on_each_cpu(kvm_pv_disable_apf, NULL, 1); | 363 | on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); |
324 | return NOTIFY_DONE; | 364 | return NOTIFY_DONE; |
325 | } | 365 | } |
326 | 366 | ||
@@ -371,7 +411,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy) | |||
371 | static void kvm_guest_cpu_offline(void *dummy) | 411 | static void kvm_guest_cpu_offline(void *dummy) |
372 | { | 412 | { |
373 | kvm_disable_steal_time(); | 413 | kvm_disable_steal_time(); |
374 | kvm_pv_disable_apf(NULL); | 414 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) |
415 | wrmsrl(MSR_KVM_PV_EOI_EN, 0); | ||
416 | kvm_pv_disable_apf(); | ||
375 | apf_task_wake_all(); | 417 | apf_task_wake_all(); |
376 | } | 418 | } |
377 | 419 | ||
@@ -424,6 +466,9 @@ void __init kvm_guest_init(void) | |||
424 | pv_time_ops.steal_clock = kvm_steal_clock; | 466 | pv_time_ops.steal_clock = kvm_steal_clock; |
425 | } | 467 | } |
426 | 468 | ||
469 | if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) | ||
470 | apic_set_eoi_write(kvm_guest_apic_eoi_write); | ||
471 | |||
427 | #ifdef CONFIG_SMP | 472 | #ifdef CONFIG_SMP |
428 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 473 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
429 | register_cpu_notifier(&kvm_cpu_notifier); | 474 | register_cpu_notifier(&kvm_cpu_notifier); |
@@ -432,6 +477,19 @@ void __init kvm_guest_init(void) | |||
432 | #endif | 477 | #endif |
433 | } | 478 | } |
434 | 479 | ||
480 | static bool __init kvm_detect(void) | ||
481 | { | ||
482 | if (!kvm_para_available()) | ||
483 | return false; | ||
484 | return true; | ||
485 | } | ||
486 | |||
487 | const struct hypervisor_x86 x86_hyper_kvm __refconst = { | ||
488 | .name = "KVM", | ||
489 | .detect = kvm_detect, | ||
490 | }; | ||
491 | EXPORT_SYMBOL_GPL(x86_hyper_kvm); | ||
492 | |||
435 | static __init int activate_jump_labels(void) | 493 | static __init int activate_jump_labels(void) |
436 | { | 494 | { |
437 | if (has_steal_clock) { | 495 | if (has_steal_clock) { |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index fbdfc6917180..4873e62db6a1 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -87,6 +87,7 @@ | |||
87 | #include <asm/microcode.h> | 87 | #include <asm/microcode.h> |
88 | #include <asm/processor.h> | 88 | #include <asm/processor.h> |
89 | #include <asm/cpu_device_id.h> | 89 | #include <asm/cpu_device_id.h> |
90 | #include <asm/perf_event.h> | ||
90 | 91 | ||
91 | MODULE_DESCRIPTION("Microcode Update Driver"); | 92 | MODULE_DESCRIPTION("Microcode Update Driver"); |
92 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); | 93 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); |
@@ -277,7 +278,6 @@ static int reload_for_cpu(int cpu) | |||
277 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 278 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
278 | int err = 0; | 279 | int err = 0; |
279 | 280 | ||
280 | mutex_lock(µcode_mutex); | ||
281 | if (uci->valid) { | 281 | if (uci->valid) { |
282 | enum ucode_state ustate; | 282 | enum ucode_state ustate; |
283 | 283 | ||
@@ -288,7 +288,6 @@ static int reload_for_cpu(int cpu) | |||
288 | if (ustate == UCODE_ERROR) | 288 | if (ustate == UCODE_ERROR) |
289 | err = -EINVAL; | 289 | err = -EINVAL; |
290 | } | 290 | } |
291 | mutex_unlock(µcode_mutex); | ||
292 | 291 | ||
293 | return err; | 292 | return err; |
294 | } | 293 | } |
@@ -298,19 +297,31 @@ static ssize_t reload_store(struct device *dev, | |||
298 | const char *buf, size_t size) | 297 | const char *buf, size_t size) |
299 | { | 298 | { |
300 | unsigned long val; | 299 | unsigned long val; |
301 | int cpu = dev->id; | 300 | int cpu; |
302 | ssize_t ret = 0; | 301 | ssize_t ret = 0, tmp_ret; |
303 | 302 | ||
304 | ret = kstrtoul(buf, 0, &val); | 303 | ret = kstrtoul(buf, 0, &val); |
305 | if (ret) | 304 | if (ret) |
306 | return ret; | 305 | return ret; |
307 | 306 | ||
308 | if (val == 1) { | 307 | if (val != 1) |
309 | get_online_cpus(); | 308 | return size; |
310 | if (cpu_online(cpu)) | 309 | |
311 | ret = reload_for_cpu(cpu); | 310 | get_online_cpus(); |
312 | put_online_cpus(); | 311 | mutex_lock(µcode_mutex); |
312 | for_each_online_cpu(cpu) { | ||
313 | tmp_ret = reload_for_cpu(cpu); | ||
314 | if (tmp_ret != 0) | ||
315 | pr_warn("Error reloading microcode on CPU %d\n", cpu); | ||
316 | |||
317 | /* save retval of the first encountered reload error */ | ||
318 | if (!ret) | ||
319 | ret = tmp_ret; | ||
313 | } | 320 | } |
321 | if (!ret) | ||
322 | perf_check_microcode(); | ||
323 | mutex_unlock(µcode_mutex); | ||
324 | put_online_cpus(); | ||
314 | 325 | ||
315 | if (!ret) | 326 | if (!ret) |
316 | ret = size; | 327 | ret = size; |
@@ -339,7 +350,6 @@ static DEVICE_ATTR(version, 0400, version_show, NULL); | |||
339 | static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); | 350 | static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); |
340 | 351 | ||
341 | static struct attribute *mc_default_attrs[] = { | 352 | static struct attribute *mc_default_attrs[] = { |
342 | &dev_attr_reload.attr, | ||
343 | &dev_attr_version.attr, | 353 | &dev_attr_version.attr, |
344 | &dev_attr_processor_flags.attr, | 354 | &dev_attr_processor_flags.attr, |
345 | NULL | 355 | NULL |
@@ -504,7 +514,7 @@ static struct notifier_block __refdata mc_cpu_notifier = { | |||
504 | 514 | ||
505 | #ifdef MODULE | 515 | #ifdef MODULE |
506 | /* Autoload on Intel and AMD systems */ | 516 | /* Autoload on Intel and AMD systems */ |
507 | static const struct x86_cpu_id microcode_id[] = { | 517 | static const struct x86_cpu_id __initconst microcode_id[] = { |
508 | #ifdef CONFIG_MICROCODE_INTEL | 518 | #ifdef CONFIG_MICROCODE_INTEL |
509 | { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, }, | 519 | { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, }, |
510 | #endif | 520 | #endif |
@@ -516,6 +526,16 @@ static const struct x86_cpu_id microcode_id[] = { | |||
516 | MODULE_DEVICE_TABLE(x86cpu, microcode_id); | 526 | MODULE_DEVICE_TABLE(x86cpu, microcode_id); |
517 | #endif | 527 | #endif |
518 | 528 | ||
529 | static struct attribute *cpu_root_microcode_attrs[] = { | ||
530 | &dev_attr_reload.attr, | ||
531 | NULL | ||
532 | }; | ||
533 | |||
534 | static struct attribute_group cpu_root_microcode_group = { | ||
535 | .name = "microcode", | ||
536 | .attrs = cpu_root_microcode_attrs, | ||
537 | }; | ||
538 | |||
519 | static int __init microcode_init(void) | 539 | static int __init microcode_init(void) |
520 | { | 540 | { |
521 | struct cpuinfo_x86 *c = &cpu_data(0); | 541 | struct cpuinfo_x86 *c = &cpu_data(0); |
@@ -540,16 +560,25 @@ static int __init microcode_init(void) | |||
540 | mutex_lock(µcode_mutex); | 560 | mutex_lock(µcode_mutex); |
541 | 561 | ||
542 | error = subsys_interface_register(&mc_cpu_interface); | 562 | error = subsys_interface_register(&mc_cpu_interface); |
543 | 563 | if (!error) | |
564 | perf_check_microcode(); | ||
544 | mutex_unlock(µcode_mutex); | 565 | mutex_unlock(µcode_mutex); |
545 | put_online_cpus(); | 566 | put_online_cpus(); |
546 | 567 | ||
547 | if (error) | 568 | if (error) |
548 | goto out_pdev; | 569 | goto out_pdev; |
549 | 570 | ||
571 | error = sysfs_create_group(&cpu_subsys.dev_root->kobj, | ||
572 | &cpu_root_microcode_group); | ||
573 | |||
574 | if (error) { | ||
575 | pr_err("Error creating microcode group!\n"); | ||
576 | goto out_driver; | ||
577 | } | ||
578 | |||
550 | error = microcode_dev_init(); | 579 | error = microcode_dev_init(); |
551 | if (error) | 580 | if (error) |
552 | goto out_driver; | 581 | goto out_ucode_group; |
553 | 582 | ||
554 | register_syscore_ops(&mc_syscore_ops); | 583 | register_syscore_ops(&mc_syscore_ops); |
555 | register_hotcpu_notifier(&mc_cpu_notifier); | 584 | register_hotcpu_notifier(&mc_cpu_notifier); |
@@ -559,7 +588,11 @@ static int __init microcode_init(void) | |||
559 | 588 | ||
560 | return 0; | 589 | return 0; |
561 | 590 | ||
562 | out_driver: | 591 | out_ucode_group: |
592 | sysfs_remove_group(&cpu_subsys.dev_root->kobj, | ||
593 | &cpu_root_microcode_group); | ||
594 | |||
595 | out_driver: | ||
563 | get_online_cpus(); | 596 | get_online_cpus(); |
564 | mutex_lock(µcode_mutex); | 597 | mutex_lock(µcode_mutex); |
565 | 598 | ||
@@ -568,7 +601,7 @@ out_driver: | |||
568 | mutex_unlock(µcode_mutex); | 601 | mutex_unlock(µcode_mutex); |
569 | put_online_cpus(); | 602 | put_online_cpus(); |
570 | 603 | ||
571 | out_pdev: | 604 | out_pdev: |
572 | platform_device_unregister(microcode_pdev); | 605 | platform_device_unregister(microcode_pdev); |
573 | return error; | 606 | return error; |
574 | 607 | ||
@@ -584,6 +617,9 @@ static void __exit microcode_exit(void) | |||
584 | unregister_hotcpu_notifier(&mc_cpu_notifier); | 617 | unregister_hotcpu_notifier(&mc_cpu_notifier); |
585 | unregister_syscore_ops(&mc_syscore_ops); | 618 | unregister_syscore_ops(&mc_syscore_ops); |
586 | 619 | ||
620 | sysfs_remove_group(&cpu_subsys.dev_root->kobj, | ||
621 | &cpu_root_microcode_group); | ||
622 | |||
587 | get_online_cpus(); | 623 | get_online_cpus(); |
588 | mutex_lock(µcode_mutex); | 624 | mutex_lock(µcode_mutex); |
589 | 625 | ||
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index f21fd94ac897..216a4d754b0c 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
@@ -15,6 +15,9 @@ | |||
15 | along with this program; if not, write to the Free Software | 15 | along with this program; if not, write to the Free Software |
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | |||
19 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
20 | |||
18 | #include <linux/moduleloader.h> | 21 | #include <linux/moduleloader.h> |
19 | #include <linux/elf.h> | 22 | #include <linux/elf.h> |
20 | #include <linux/vmalloc.h> | 23 | #include <linux/vmalloc.h> |
@@ -30,9 +33,14 @@ | |||
30 | #include <asm/pgtable.h> | 33 | #include <asm/pgtable.h> |
31 | 34 | ||
32 | #if 0 | 35 | #if 0 |
33 | #define DEBUGP printk | 36 | #define DEBUGP(fmt, ...) \ |
37 | printk(KERN_DEBUG fmt, ##__VA_ARGS__) | ||
34 | #else | 38 | #else |
35 | #define DEBUGP(fmt...) | 39 | #define DEBUGP(fmt, ...) \ |
40 | do { \ | ||
41 | if (0) \ | ||
42 | printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ | ||
43 | } while (0) | ||
36 | #endif | 44 | #endif |
37 | 45 | ||
38 | void *module_alloc(unsigned long size) | 46 | void *module_alloc(unsigned long size) |
@@ -56,8 +64,8 @@ int apply_relocate(Elf32_Shdr *sechdrs, | |||
56 | Elf32_Sym *sym; | 64 | Elf32_Sym *sym; |
57 | uint32_t *location; | 65 | uint32_t *location; |
58 | 66 | ||
59 | DEBUGP("Applying relocate section %u to %u\n", relsec, | 67 | DEBUGP("Applying relocate section %u to %u\n", |
60 | sechdrs[relsec].sh_info); | 68 | relsec, sechdrs[relsec].sh_info); |
61 | for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { | 69 | for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { |
62 | /* This is where to make the change */ | 70 | /* This is where to make the change */ |
63 | location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr | 71 | location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr |
@@ -73,11 +81,11 @@ int apply_relocate(Elf32_Shdr *sechdrs, | |||
73 | *location += sym->st_value; | 81 | *location += sym->st_value; |
74 | break; | 82 | break; |
75 | case R_386_PC32: | 83 | case R_386_PC32: |
76 | /* Add the value, subtract its postition */ | 84 | /* Add the value, subtract its position */ |
77 | *location += sym->st_value - (uint32_t)location; | 85 | *location += sym->st_value - (uint32_t)location; |
78 | break; | 86 | break; |
79 | default: | 87 | default: |
80 | printk(KERN_ERR "module %s: Unknown relocation: %u\n", | 88 | pr_err("%s: Unknown relocation: %u\n", |
81 | me->name, ELF32_R_TYPE(rel[i].r_info)); | 89 | me->name, ELF32_R_TYPE(rel[i].r_info)); |
82 | return -ENOEXEC; | 90 | return -ENOEXEC; |
83 | } | 91 | } |
@@ -97,8 +105,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
97 | void *loc; | 105 | void *loc; |
98 | u64 val; | 106 | u64 val; |
99 | 107 | ||
100 | DEBUGP("Applying relocate section %u to %u\n", relsec, | 108 | DEBUGP("Applying relocate section %u to %u\n", |
101 | sechdrs[relsec].sh_info); | 109 | relsec, sechdrs[relsec].sh_info); |
102 | for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { | 110 | for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { |
103 | /* This is where to make the change */ | 111 | /* This is where to make the change */ |
104 | loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr | 112 | loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr |
@@ -110,8 +118,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
110 | + ELF64_R_SYM(rel[i].r_info); | 118 | + ELF64_R_SYM(rel[i].r_info); |
111 | 119 | ||
112 | DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", | 120 | DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", |
113 | (int)ELF64_R_TYPE(rel[i].r_info), | 121 | (int)ELF64_R_TYPE(rel[i].r_info), |
114 | sym->st_value, rel[i].r_addend, (u64)loc); | 122 | sym->st_value, rel[i].r_addend, (u64)loc); |
115 | 123 | ||
116 | val = sym->st_value + rel[i].r_addend; | 124 | val = sym->st_value + rel[i].r_addend; |
117 | 125 | ||
@@ -140,7 +148,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
140 | #endif | 148 | #endif |
141 | break; | 149 | break; |
142 | default: | 150 | default: |
143 | printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n", | 151 | pr_err("%s: Unknown rela relocation: %llu\n", |
144 | me->name, ELF64_R_TYPE(rel[i].r_info)); | 152 | me->name, ELF64_R_TYPE(rel[i].r_info)); |
145 | return -ENOEXEC; | 153 | return -ENOEXEC; |
146 | } | 154 | } |
@@ -148,9 +156,9 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | |||
148 | return 0; | 156 | return 0; |
149 | 157 | ||
150 | overflow: | 158 | overflow: |
151 | printk(KERN_ERR "overflow in relocation type %d val %Lx\n", | 159 | pr_err("overflow in relocation type %d val %Lx\n", |
152 | (int)ELF64_R_TYPE(rel[i].r_info), val); | 160 | (int)ELF64_R_TYPE(rel[i].r_info), val); |
153 | printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", | 161 | pr_err("`%s' likely not compiled with -mcmodel=kernel\n", |
154 | me->name); | 162 | me->name); |
155 | return -ENOEXEC; | 163 | return -ENOEXEC; |
156 | } | 164 | } |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index a0b2f84457be..f84f5c57de35 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -365,8 +365,9 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) | |||
365 | #ifdef CONFIG_X86_32 | 365 | #ifdef CONFIG_X86_32 |
366 | /* | 366 | /* |
367 | * For i386, NMIs use the same stack as the kernel, and we can | 367 | * For i386, NMIs use the same stack as the kernel, and we can |
368 | * add a workaround to the iret problem in C. Simply have 3 states | 368 | * add a workaround to the iret problem in C (preventing nested |
369 | * the NMI can be in. | 369 | * NMIs if an NMI takes a trap). Simply have 3 states the NMI |
370 | * can be in: | ||
370 | * | 371 | * |
371 | * 1) not running | 372 | * 1) not running |
372 | * 2) executing | 373 | * 2) executing |
@@ -383,32 +384,50 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) | |||
383 | * If an NMI hits a breakpoint that executes an iret, another | 384 | * If an NMI hits a breakpoint that executes an iret, another |
384 | * NMI can preempt it. We do not want to allow this new NMI | 385 | * NMI can preempt it. We do not want to allow this new NMI |
385 | * to run, but we want to execute it when the first one finishes. | 386 | * to run, but we want to execute it when the first one finishes. |
386 | * We set the state to "latched", and the first NMI will perform | 387 | * We set the state to "latched", and the exit of the first NMI will |
387 | * an cmpxchg on the state, and if it doesn't successfully | 388 | * perform a dec_return, if the result is zero (NOT_RUNNING), then |
388 | * reset the state to "not running" it will restart the next | 389 | * it will simply exit the NMI handler. If not, the dec_return |
389 | * NMI. | 390 | * would have set the state to NMI_EXECUTING (what we want it to |
391 | * be when we are running). In this case, we simply jump back | ||
392 | * to rerun the NMI handler again, and restart the 'latched' NMI. | ||
393 | * | ||
394 | * No trap (breakpoint or page fault) should be hit before nmi_restart, | ||
395 | * thus there is no race between the first check of state for NOT_RUNNING | ||
396 | * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs | ||
397 | * at this point. | ||
398 | * | ||
399 | * In case the NMI takes a page fault, we need to save off the CR2 | ||
400 | * because the NMI could have preempted another page fault and corrupt | ||
401 | * the CR2 that is about to be read. As nested NMIs must be restarted | ||
402 | * and they can not take breakpoints or page faults, the update of the | ||
403 | * CR2 must be done before converting the nmi state back to NOT_RUNNING. | ||
404 | * Otherwise, there would be a race of another nested NMI coming in | ||
405 | * after setting state to NOT_RUNNING but before updating the nmi_cr2. | ||
390 | */ | 406 | */ |
391 | enum nmi_states { | 407 | enum nmi_states { |
392 | NMI_NOT_RUNNING, | 408 | NMI_NOT_RUNNING = 0, |
393 | NMI_EXECUTING, | 409 | NMI_EXECUTING, |
394 | NMI_LATCHED, | 410 | NMI_LATCHED, |
395 | }; | 411 | }; |
396 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); | 412 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); |
413 | static DEFINE_PER_CPU(unsigned long, nmi_cr2); | ||
397 | 414 | ||
398 | #define nmi_nesting_preprocess(regs) \ | 415 | #define nmi_nesting_preprocess(regs) \ |
399 | do { \ | 416 | do { \ |
400 | if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ | 417 | if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \ |
401 | __get_cpu_var(nmi_state) = NMI_LATCHED; \ | 418 | this_cpu_write(nmi_state, NMI_LATCHED); \ |
402 | return; \ | 419 | return; \ |
403 | } \ | 420 | } \ |
404 | nmi_restart: \ | 421 | this_cpu_write(nmi_state, NMI_EXECUTING); \ |
405 | __get_cpu_var(nmi_state) = NMI_EXECUTING; \ | 422 | this_cpu_write(nmi_cr2, read_cr2()); \ |
406 | } while (0) | 423 | } while (0); \ |
424 | nmi_restart: | ||
407 | 425 | ||
408 | #define nmi_nesting_postprocess() \ | 426 | #define nmi_nesting_postprocess() \ |
409 | do { \ | 427 | do { \ |
410 | if (cmpxchg(&__get_cpu_var(nmi_state), \ | 428 | if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \ |
411 | NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ | 429 | write_cr2(this_cpu_read(nmi_cr2)); \ |
430 | if (this_cpu_dec_return(nmi_state)) \ | ||
412 | goto nmi_restart; \ | 431 | goto nmi_restart; \ |
413 | } while (0) | 432 | } while (0) |
414 | #else /* x86_64 */ | 433 | #else /* x86_64 */ |
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c index 149b8d9c6ad4..6d9582ec0324 100644 --- a/arch/x86/kernel/nmi_selftest.c +++ b/arch/x86/kernel/nmi_selftest.c | |||
@@ -42,7 +42,8 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs) | |||
42 | static void __init init_nmi_testsuite(void) | 42 | static void __init init_nmi_testsuite(void) |
43 | { | 43 | { |
44 | /* trap all the unknown NMIs we may generate */ | 44 | /* trap all the unknown NMIs we may generate */ |
45 | register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); | 45 | register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk", |
46 | __initdata); | ||
46 | } | 47 | } |
47 | 48 | ||
48 | static void __init cleanup_nmi_testsuite(void) | 49 | static void __init cleanup_nmi_testsuite(void) |
@@ -64,8 +65,8 @@ static void __init test_nmi_ipi(struct cpumask *mask) | |||
64 | { | 65 | { |
65 | unsigned long timeout; | 66 | unsigned long timeout; |
66 | 67 | ||
67 | if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback, | 68 | if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, |
68 | NMI_FLAG_FIRST, "nmi_selftest")) { | 69 | NMI_FLAG_FIRST, "nmi_selftest", __initdata)) { |
69 | nmi_fail = FAILURE; | 70 | nmi_fail = FAILURE; |
70 | return; | 71 | return; |
71 | } | 72 | } |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 9ce885996fd7..17fff18a1031 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -352,9 +352,7 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
352 | #endif | 352 | #endif |
353 | .wbinvd = native_wbinvd, | 353 | .wbinvd = native_wbinvd, |
354 | .read_msr = native_read_msr_safe, | 354 | .read_msr = native_read_msr_safe, |
355 | .rdmsr_regs = native_rdmsr_safe_regs, | ||
356 | .write_msr = native_write_msr_safe, | 355 | .write_msr = native_write_msr_safe, |
357 | .wrmsr_regs = native_wrmsr_safe_regs, | ||
358 | .read_tsc = native_read_tsc, | 356 | .read_tsc = native_read_tsc, |
359 | .read_pmc = native_read_pmc, | 357 | .read_pmc = native_read_pmc, |
360 | .read_tscp = native_read_tscp, | 358 | .read_tscp = native_read_tscp, |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index b72838bae64a..299d49302e7d 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -22,6 +22,8 @@ | |||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #define pr_fmt(fmt) "Calgary: " fmt | ||
26 | |||
25 | #include <linux/kernel.h> | 27 | #include <linux/kernel.h> |
26 | #include <linux/init.h> | 28 | #include <linux/init.h> |
27 | #include <linux/types.h> | 29 | #include <linux/types.h> |
@@ -245,7 +247,7 @@ static unsigned long iommu_range_alloc(struct device *dev, | |||
245 | offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0, | 247 | offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0, |
246 | npages, 0, boundary_size, 0); | 248 | npages, 0, boundary_size, 0); |
247 | if (offset == ~0UL) { | 249 | if (offset == ~0UL) { |
248 | printk(KERN_WARNING "Calgary: IOMMU full.\n"); | 250 | pr_warn("IOMMU full\n"); |
249 | spin_unlock_irqrestore(&tbl->it_lock, flags); | 251 | spin_unlock_irqrestore(&tbl->it_lock, flags); |
250 | if (panic_on_overflow) | 252 | if (panic_on_overflow) |
251 | panic("Calgary: fix the allocator.\n"); | 253 | panic("Calgary: fix the allocator.\n"); |
@@ -271,8 +273,8 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, | |||
271 | entry = iommu_range_alloc(dev, tbl, npages); | 273 | entry = iommu_range_alloc(dev, tbl, npages); |
272 | 274 | ||
273 | if (unlikely(entry == DMA_ERROR_CODE)) { | 275 | if (unlikely(entry == DMA_ERROR_CODE)) { |
274 | printk(KERN_WARNING "Calgary: failed to allocate %u pages in " | 276 | pr_warn("failed to allocate %u pages in iommu %p\n", |
275 | "iommu %p\n", npages, tbl); | 277 | npages, tbl); |
276 | return DMA_ERROR_CODE; | 278 | return DMA_ERROR_CODE; |
277 | } | 279 | } |
278 | 280 | ||
@@ -561,8 +563,7 @@ static void calgary_tce_cache_blast(struct iommu_table *tbl) | |||
561 | i++; | 563 | i++; |
562 | } while ((val & 0xff) != 0xff && i < 100); | 564 | } while ((val & 0xff) != 0xff && i < 100); |
563 | if (i == 100) | 565 | if (i == 100) |
564 | printk(KERN_WARNING "Calgary: PCI bus not quiesced, " | 566 | pr_warn("PCI bus not quiesced, continuing anyway\n"); |
565 | "continuing anyway\n"); | ||
566 | 567 | ||
567 | /* invalidate TCE cache */ | 568 | /* invalidate TCE cache */ |
568 | target = calgary_reg(bbar, tar_offset(tbl->it_busno)); | 569 | target = calgary_reg(bbar, tar_offset(tbl->it_busno)); |
@@ -604,8 +605,7 @@ begin: | |||
604 | i++; | 605 | i++; |
605 | } while ((val64 & 0xff) != 0xff && i < 100); | 606 | } while ((val64 & 0xff) != 0xff && i < 100); |
606 | if (i == 100) | 607 | if (i == 100) |
607 | printk(KERN_WARNING "CalIOC2: PCI bus not quiesced, " | 608 | pr_warn("CalIOC2: PCI bus not quiesced, continuing anyway\n"); |
608 | "continuing anyway\n"); | ||
609 | 609 | ||
610 | /* 3. poll Page Migration DEBUG for SoftStopFault */ | 610 | /* 3. poll Page Migration DEBUG for SoftStopFault */ |
611 | target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG); | 611 | target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG); |
@@ -617,8 +617,7 @@ begin: | |||
617 | if (++count < 100) | 617 | if (++count < 100) |
618 | goto begin; | 618 | goto begin; |
619 | else { | 619 | else { |
620 | printk(KERN_WARNING "CalIOC2: too many SoftStopFaults, " | 620 | pr_warn("CalIOC2: too many SoftStopFaults, aborting TCE cache flush sequence!\n"); |
621 | "aborting TCE cache flush sequence!\n"); | ||
622 | return; /* pray for the best */ | 621 | return; /* pray for the best */ |
623 | } | 622 | } |
624 | } | 623 | } |
@@ -840,8 +839,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl) | |||
840 | plssr = be32_to_cpu(readl(target)); | 839 | plssr = be32_to_cpu(readl(target)); |
841 | 840 | ||
842 | /* If no error, the agent ID in the CSR is not valid */ | 841 | /* If no error, the agent ID in the CSR is not valid */ |
843 | printk(KERN_EMERG "Calgary: DMA error on Calgary PHB 0x%x, " | 842 | pr_emerg("DMA error on Calgary PHB 0x%x, 0x%08x@CSR 0x%08x@PLSSR\n", |
844 | "0x%08x@CSR 0x%08x@PLSSR\n", tbl->it_busno, csr, plssr); | 843 | tbl->it_busno, csr, plssr); |
845 | } | 844 | } |
846 | 845 | ||
847 | static void calioc2_dump_error_regs(struct iommu_table *tbl) | 846 | static void calioc2_dump_error_regs(struct iommu_table *tbl) |
@@ -867,22 +866,21 @@ static void calioc2_dump_error_regs(struct iommu_table *tbl) | |||
867 | target = calgary_reg(bbar, phboff | 0x800); | 866 | target = calgary_reg(bbar, phboff | 0x800); |
868 | mck = be32_to_cpu(readl(target)); | 867 | mck = be32_to_cpu(readl(target)); |
869 | 868 | ||
870 | printk(KERN_EMERG "Calgary: DMA error on CalIOC2 PHB 0x%x\n", | 869 | pr_emerg("DMA error on CalIOC2 PHB 0x%x\n", tbl->it_busno); |
871 | tbl->it_busno); | ||
872 | 870 | ||
873 | printk(KERN_EMERG "Calgary: 0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n", | 871 | pr_emerg("0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n", |
874 | csr, plssr, csmr, mck); | 872 | csr, plssr, csmr, mck); |
875 | 873 | ||
876 | /* dump rest of error regs */ | 874 | /* dump rest of error regs */ |
877 | printk(KERN_EMERG "Calgary: "); | 875 | pr_emerg(""); |
878 | for (i = 0; i < ARRAY_SIZE(errregs); i++) { | 876 | for (i = 0; i < ARRAY_SIZE(errregs); i++) { |
879 | /* err regs are at 0x810 - 0x870 */ | 877 | /* err regs are at 0x810 - 0x870 */ |
880 | erroff = (0x810 + (i * 0x10)); | 878 | erroff = (0x810 + (i * 0x10)); |
881 | target = calgary_reg(bbar, phboff | erroff); | 879 | target = calgary_reg(bbar, phboff | erroff); |
882 | errregs[i] = be32_to_cpu(readl(target)); | 880 | errregs[i] = be32_to_cpu(readl(target)); |
883 | printk("0x%08x@0x%lx ", errregs[i], erroff); | 881 | pr_cont("0x%08x@0x%lx ", errregs[i], erroff); |
884 | } | 882 | } |
885 | printk("\n"); | 883 | pr_cont("\n"); |
886 | 884 | ||
887 | /* root complex status */ | 885 | /* root complex status */ |
888 | target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS); | 886 | target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS); |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index c0f420f76cd3..de2b7ad70273 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -45,15 +45,6 @@ int iommu_detected __read_mostly = 0; | |||
45 | */ | 45 | */ |
46 | int iommu_pass_through __read_mostly; | 46 | int iommu_pass_through __read_mostly; |
47 | 47 | ||
48 | /* | ||
49 | * Group multi-function PCI devices into a single device-group for the | ||
50 | * iommu_device_group interface. This tells the iommu driver to pretend | ||
51 | * it cannot distinguish between functions of a device, exposing only one | ||
52 | * group for the device. Useful for disallowing use of individual PCI | ||
53 | * functions from userspace drivers. | ||
54 | */ | ||
55 | int iommu_group_mf __read_mostly; | ||
56 | |||
57 | extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; | 48 | extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; |
58 | 49 | ||
59 | /* Dummy device used for NULL arguments (normally ISA). */ | 50 | /* Dummy device used for NULL arguments (normally ISA). */ |
@@ -194,8 +185,6 @@ static __init int iommu_setup(char *p) | |||
194 | #endif | 185 | #endif |
195 | if (!strncmp(p, "pt", 2)) | 186 | if (!strncmp(p, "pt", 2)) |
196 | iommu_pass_through = 1; | 187 | iommu_pass_through = 1; |
197 | if (!strncmp(p, "group_mf", 8)) | ||
198 | iommu_group_mf = 1; | ||
199 | 188 | ||
200 | gart_parse_options(p); | 189 | gart_parse_options(p); |
201 | 190 | ||
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 735279e54e59..ef6a8456f719 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -1,3 +1,5 @@ | |||
1 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
2 | |||
1 | #include <linux/errno.h> | 3 | #include <linux/errno.h> |
2 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
3 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
@@ -145,16 +147,14 @@ void show_regs_common(void) | |||
145 | /* Board Name is optional */ | 147 | /* Board Name is optional */ |
146 | board = dmi_get_system_info(DMI_BOARD_NAME); | 148 | board = dmi_get_system_info(DMI_BOARD_NAME); |
147 | 149 | ||
148 | printk(KERN_CONT "\n"); | 150 | printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s %s%s%s\n", |
149 | printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s", | 151 | current->pid, current->comm, print_tainted(), |
150 | current->pid, current->comm, print_tainted(), | 152 | init_utsname()->release, |
151 | init_utsname()->release, | 153 | (int)strcspn(init_utsname()->version, " "), |
152 | (int)strcspn(init_utsname()->version, " "), | 154 | init_utsname()->version, |
153 | init_utsname()->version); | 155 | vendor, product, |
154 | printk(KERN_CONT " %s %s", vendor, product); | 156 | board ? "/" : "", |
155 | if (board) | 157 | board ? board : ""); |
156 | printk(KERN_CONT "/%s", board); | ||
157 | printk(KERN_CONT "\n"); | ||
158 | } | 158 | } |
159 | 159 | ||
160 | void flush_thread(void) | 160 | void flush_thread(void) |
@@ -645,7 +645,7 @@ static void amd_e400_idle(void) | |||
645 | amd_e400_c1e_detected = true; | 645 | amd_e400_c1e_detected = true; |
646 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) | 646 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) |
647 | mark_tsc_unstable("TSC halt in AMD C1E"); | 647 | mark_tsc_unstable("TSC halt in AMD C1E"); |
648 | printk(KERN_INFO "System has AMD C1E enabled\n"); | 648 | pr_info("System has AMD C1E enabled\n"); |
649 | } | 649 | } |
650 | } | 650 | } |
651 | 651 | ||
@@ -659,8 +659,7 @@ static void amd_e400_idle(void) | |||
659 | */ | 659 | */ |
660 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, | 660 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, |
661 | &cpu); | 661 | &cpu); |
662 | printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", | 662 | pr_info("Switch to broadcast mode on CPU%d\n", cpu); |
663 | cpu); | ||
664 | } | 663 | } |
665 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); | 664 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); |
666 | 665 | ||
@@ -681,8 +680,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
681 | { | 680 | { |
682 | #ifdef CONFIG_SMP | 681 | #ifdef CONFIG_SMP |
683 | if (pm_idle == poll_idle && smp_num_siblings > 1) { | 682 | if (pm_idle == poll_idle && smp_num_siblings > 1) { |
684 | printk_once(KERN_WARNING "WARNING: polling idle and HT enabled," | 683 | pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); |
685 | " performance may degrade.\n"); | ||
686 | } | 684 | } |
687 | #endif | 685 | #endif |
688 | if (pm_idle) | 686 | if (pm_idle) |
@@ -692,11 +690,11 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
692 | /* | 690 | /* |
693 | * One CPU supports mwait => All CPUs supports mwait | 691 | * One CPU supports mwait => All CPUs supports mwait |
694 | */ | 692 | */ |
695 | printk(KERN_INFO "using mwait in idle threads.\n"); | 693 | pr_info("using mwait in idle threads\n"); |
696 | pm_idle = mwait_idle; | 694 | pm_idle = mwait_idle; |
697 | } else if (cpu_has_amd_erratum(amd_erratum_400)) { | 695 | } else if (cpu_has_amd_erratum(amd_erratum_400)) { |
698 | /* E400: APIC timer interrupt does not wake up CPU from C1e */ | 696 | /* E400: APIC timer interrupt does not wake up CPU from C1e */ |
699 | printk(KERN_INFO "using AMD E400 aware idle routine\n"); | 697 | pr_info("using AMD E400 aware idle routine\n"); |
700 | pm_idle = amd_e400_idle; | 698 | pm_idle = amd_e400_idle; |
701 | } else | 699 | } else |
702 | pm_idle = default_idle; | 700 | pm_idle = default_idle; |
@@ -715,7 +713,7 @@ static int __init idle_setup(char *str) | |||
715 | return -EINVAL; | 713 | return -EINVAL; |
716 | 714 | ||
717 | if (!strcmp(str, "poll")) { | 715 | if (!strcmp(str, "poll")) { |
718 | printk("using polling idle threads.\n"); | 716 | pr_info("using polling idle threads\n"); |
719 | pm_idle = poll_idle; | 717 | pm_idle = poll_idle; |
720 | boot_option_idle_override = IDLE_POLL; | 718 | boot_option_idle_override = IDLE_POLL; |
721 | } else if (!strcmp(str, "mwait")) { | 719 | } else if (!strcmp(str, "mwait")) { |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 61cdf7fdf099..0a980c9d7cb8 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -117,10 +117,10 @@ void release_thread(struct task_struct *dead_task) | |||
117 | { | 117 | { |
118 | if (dead_task->mm) { | 118 | if (dead_task->mm) { |
119 | if (dead_task->mm->context.size) { | 119 | if (dead_task->mm->context.size) { |
120 | printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", | 120 | pr_warn("WARNING: dead process %8s still has LDT? <%p/%d>\n", |
121 | dead_task->comm, | 121 | dead_task->comm, |
122 | dead_task->mm->context.ldt, | 122 | dead_task->mm->context.ldt, |
123 | dead_task->mm->context.size); | 123 | dead_task->mm->context.size); |
124 | BUG(); | 124 | BUG(); |
125 | } | 125 | } |
126 | } | 126 | } |
@@ -466,7 +466,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
466 | task->thread.gs = addr; | 466 | task->thread.gs = addr; |
467 | if (doit) { | 467 | if (doit) { |
468 | load_gs_index(0); | 468 | load_gs_index(0); |
469 | ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); | 469 | ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr); |
470 | } | 470 | } |
471 | } | 471 | } |
472 | put_cpu(); | 472 | put_cpu(); |
@@ -494,7 +494,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
494 | /* set the selector to 0 to not confuse | 494 | /* set the selector to 0 to not confuse |
495 | __switch_to */ | 495 | __switch_to */ |
496 | loadsegment(fs, 0); | 496 | loadsegment(fs, 0); |
497 | ret = checking_wrmsrl(MSR_FS_BASE, addr); | 497 | ret = wrmsrl_safe(MSR_FS_BASE, addr); |
498 | } | 498 | } |
499 | } | 499 | } |
500 | put_cpu(); | 500 | put_cpu(); |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 03920a15a632..1b27de563561 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -512,7 +512,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, | |||
512 | 512 | ||
513 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) | 513 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) |
514 | /* Set correct numa_node information for AMD NB functions */ | 514 | /* Set correct numa_node information for AMD NB functions */ |
515 | static void __init quirk_amd_nb_node(struct pci_dev *dev) | 515 | static void __devinit quirk_amd_nb_node(struct pci_dev *dev) |
516 | { | 516 | { |
517 | struct pci_dev *nb_ht; | 517 | struct pci_dev *nb_ht; |
518 | unsigned int devfn; | 518 | unsigned int devfn; |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 5de92f1abd76..52190a938b4a 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -1,3 +1,5 @@ | |||
1 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
2 | |||
1 | #include <linux/module.h> | 3 | #include <linux/module.h> |
2 | #include <linux/reboot.h> | 4 | #include <linux/reboot.h> |
3 | #include <linux/init.h> | 5 | #include <linux/init.h> |
@@ -20,14 +22,12 @@ | |||
20 | #include <asm/virtext.h> | 22 | #include <asm/virtext.h> |
21 | #include <asm/cpu.h> | 23 | #include <asm/cpu.h> |
22 | #include <asm/nmi.h> | 24 | #include <asm/nmi.h> |
25 | #include <asm/smp.h> | ||
23 | 26 | ||
24 | #ifdef CONFIG_X86_32 | 27 | #include <linux/ctype.h> |
25 | # include <linux/ctype.h> | 28 | #include <linux/mc146818rtc.h> |
26 | # include <linux/mc146818rtc.h> | 29 | #include <asm/realmode.h> |
27 | # include <asm/realmode.h> | 30 | #include <asm/x86_init.h> |
28 | #else | ||
29 | # include <asm/x86_init.h> | ||
30 | #endif | ||
31 | 31 | ||
32 | /* | 32 | /* |
33 | * Power off function, if any | 33 | * Power off function, if any |
@@ -49,7 +49,7 @@ int reboot_force; | |||
49 | */ | 49 | */ |
50 | static int reboot_default = 1; | 50 | static int reboot_default = 1; |
51 | 51 | ||
52 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) | 52 | #ifdef CONFIG_SMP |
53 | static int reboot_cpu = -1; | 53 | static int reboot_cpu = -1; |
54 | #endif | 54 | #endif |
55 | 55 | ||
@@ -67,8 +67,8 @@ bool port_cf9_safe = false; | |||
67 | * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] | 67 | * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] |
68 | * warm Don't set the cold reboot flag | 68 | * warm Don't set the cold reboot flag |
69 | * cold Set the cold reboot flag | 69 | * cold Set the cold reboot flag |
70 | * bios Reboot by jumping through the BIOS (only for X86_32) | 70 | * bios Reboot by jumping through the BIOS |
71 | * smp Reboot by executing reset on BSP or other CPU (only for X86_32) | 71 | * smp Reboot by executing reset on BSP or other CPU |
72 | * triple Force a triple fault (init) | 72 | * triple Force a triple fault (init) |
73 | * kbd Use the keyboard controller. cold reset (default) | 73 | * kbd Use the keyboard controller. cold reset (default) |
74 | * acpi Use the RESET_REG in the FADT | 74 | * acpi Use the RESET_REG in the FADT |
@@ -95,7 +95,6 @@ static int __init reboot_setup(char *str) | |||
95 | reboot_mode = 0; | 95 | reboot_mode = 0; |
96 | break; | 96 | break; |
97 | 97 | ||
98 | #ifdef CONFIG_X86_32 | ||
99 | #ifdef CONFIG_SMP | 98 | #ifdef CONFIG_SMP |
100 | case 's': | 99 | case 's': |
101 | if (isdigit(*(str+1))) { | 100 | if (isdigit(*(str+1))) { |
@@ -112,7 +111,6 @@ static int __init reboot_setup(char *str) | |||
112 | #endif /* CONFIG_SMP */ | 111 | #endif /* CONFIG_SMP */ |
113 | 112 | ||
114 | case 'b': | 113 | case 'b': |
115 | #endif | ||
116 | case 'a': | 114 | case 'a': |
117 | case 'k': | 115 | case 'k': |
118 | case 't': | 116 | case 't': |
@@ -138,7 +136,6 @@ static int __init reboot_setup(char *str) | |||
138 | __setup("reboot=", reboot_setup); | 136 | __setup("reboot=", reboot_setup); |
139 | 137 | ||
140 | 138 | ||
141 | #ifdef CONFIG_X86_32 | ||
142 | /* | 139 | /* |
143 | * Reboot options and system auto-detection code provided by | 140 | * Reboot options and system auto-detection code provided by |
144 | * Dell Inc. so their systems "just work". :-) | 141 | * Dell Inc. so their systems "just work". :-) |
@@ -152,16 +149,14 @@ static int __init set_bios_reboot(const struct dmi_system_id *d) | |||
152 | { | 149 | { |
153 | if (reboot_type != BOOT_BIOS) { | 150 | if (reboot_type != BOOT_BIOS) { |
154 | reboot_type = BOOT_BIOS; | 151 | reboot_type = BOOT_BIOS; |
155 | printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident); | 152 | pr_info("%s series board detected. Selecting %s-method for reboots.\n", |
153 | "BIOS", d->ident); | ||
156 | } | 154 | } |
157 | return 0; | 155 | return 0; |
158 | } | 156 | } |
159 | 157 | ||
160 | void machine_real_restart(unsigned int type) | 158 | void __noreturn machine_real_restart(unsigned int type) |
161 | { | 159 | { |
162 | void (*restart_lowmem)(unsigned int) = (void (*)(unsigned int)) | ||
163 | real_mode_header->machine_real_restart_asm; | ||
164 | |||
165 | local_irq_disable(); | 160 | local_irq_disable(); |
166 | 161 | ||
167 | /* | 162 | /* |
@@ -181,25 +176,28 @@ void machine_real_restart(unsigned int type) | |||
181 | /* | 176 | /* |
182 | * Switch back to the initial page table. | 177 | * Switch back to the initial page table. |
183 | */ | 178 | */ |
179 | #ifdef CONFIG_X86_32 | ||
184 | load_cr3(initial_page_table); | 180 | load_cr3(initial_page_table); |
185 | 181 | #else | |
186 | /* | 182 | write_cr3(real_mode_header->trampoline_pgd); |
187 | * Write 0x1234 to absolute memory location 0x472. The BIOS reads | 183 | #endif |
188 | * this on booting to tell it to "Bypass memory test (also warm | ||
189 | * boot)". This seems like a fairly standard thing that gets set by | ||
190 | * REBOOT.COM programs, and the previous reset routine did this | ||
191 | * too. */ | ||
192 | *((unsigned short *)0x472) = reboot_mode; | ||
193 | 184 | ||
194 | /* Jump to the identity-mapped low memory code */ | 185 | /* Jump to the identity-mapped low memory code */ |
195 | restart_lowmem(type); | 186 | #ifdef CONFIG_X86_32 |
187 | asm volatile("jmpl *%0" : : | ||
188 | "rm" (real_mode_header->machine_real_restart_asm), | ||
189 | "a" (type)); | ||
190 | #else | ||
191 | asm volatile("ljmpl *%0" : : | ||
192 | "m" (real_mode_header->machine_real_restart_asm), | ||
193 | "D" (type)); | ||
194 | #endif | ||
195 | unreachable(); | ||
196 | } | 196 | } |
197 | #ifdef CONFIG_APM_MODULE | 197 | #ifdef CONFIG_APM_MODULE |
198 | EXPORT_SYMBOL(machine_real_restart); | 198 | EXPORT_SYMBOL(machine_real_restart); |
199 | #endif | 199 | #endif |
200 | 200 | ||
201 | #endif /* CONFIG_X86_32 */ | ||
202 | |||
203 | /* | 201 | /* |
204 | * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot | 202 | * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot |
205 | */ | 203 | */ |
@@ -207,8 +205,8 @@ static int __init set_pci_reboot(const struct dmi_system_id *d) | |||
207 | { | 205 | { |
208 | if (reboot_type != BOOT_CF9) { | 206 | if (reboot_type != BOOT_CF9) { |
209 | reboot_type = BOOT_CF9; | 207 | reboot_type = BOOT_CF9; |
210 | printk(KERN_INFO "%s series board detected. " | 208 | pr_info("%s series board detected. Selecting %s-method for reboots.\n", |
211 | "Selecting PCI-method for reboots.\n", d->ident); | 209 | "PCI", d->ident); |
212 | } | 210 | } |
213 | return 0; | 211 | return 0; |
214 | } | 212 | } |
@@ -217,17 +215,16 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d) | |||
217 | { | 215 | { |
218 | if (reboot_type != BOOT_KBD) { | 216 | if (reboot_type != BOOT_KBD) { |
219 | reboot_type = BOOT_KBD; | 217 | reboot_type = BOOT_KBD; |
220 | printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident); | 218 | pr_info("%s series board detected. Selecting %s-method for reboot.\n", |
219 | "KBD", d->ident); | ||
221 | } | 220 | } |
222 | return 0; | 221 | return 0; |
223 | } | 222 | } |
224 | 223 | ||
225 | /* | 224 | /* |
226 | * This is a single dmi_table handling all reboot quirks. Note that | 225 | * This is a single dmi_table handling all reboot quirks. |
227 | * REBOOT_BIOS is only available for 32bit | ||
228 | */ | 226 | */ |
229 | static struct dmi_system_id __initdata reboot_dmi_table[] = { | 227 | static struct dmi_system_id __initdata reboot_dmi_table[] = { |
230 | #ifdef CONFIG_X86_32 | ||
231 | { /* Handle problems with rebooting on Dell E520's */ | 228 | { /* Handle problems with rebooting on Dell E520's */ |
232 | .callback = set_bios_reboot, | 229 | .callback = set_bios_reboot, |
233 | .ident = "Dell E520", | 230 | .ident = "Dell E520", |
@@ -377,7 +374,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
377 | DMI_MATCH(DMI_BOARD_NAME, "P4S800"), | 374 | DMI_MATCH(DMI_BOARD_NAME, "P4S800"), |
378 | }, | 375 | }, |
379 | }, | 376 | }, |
380 | #endif /* CONFIG_X86_32 */ | ||
381 | 377 | ||
382 | { /* Handle reboot issue on Acer Aspire one */ | 378 | { /* Handle reboot issue on Acer Aspire one */ |
383 | .callback = set_kbd_reboot, | 379 | .callback = set_kbd_reboot, |
@@ -584,13 +580,11 @@ static void native_machine_emergency_restart(void) | |||
584 | reboot_type = BOOT_KBD; | 580 | reboot_type = BOOT_KBD; |
585 | break; | 581 | break; |
586 | 582 | ||
587 | #ifdef CONFIG_X86_32 | ||
588 | case BOOT_BIOS: | 583 | case BOOT_BIOS: |
589 | machine_real_restart(MRR_BIOS); | 584 | machine_real_restart(MRR_BIOS); |
590 | 585 | ||
591 | reboot_type = BOOT_KBD; | 586 | reboot_type = BOOT_KBD; |
592 | break; | 587 | break; |
593 | #endif | ||
594 | 588 | ||
595 | case BOOT_ACPI: | 589 | case BOOT_ACPI: |
596 | acpi_reboot(); | 590 | acpi_reboot(); |
@@ -632,12 +626,10 @@ void native_machine_shutdown(void) | |||
632 | /* The boot cpu is always logical cpu 0 */ | 626 | /* The boot cpu is always logical cpu 0 */ |
633 | int reboot_cpu_id = 0; | 627 | int reboot_cpu_id = 0; |
634 | 628 | ||
635 | #ifdef CONFIG_X86_32 | ||
636 | /* See if there has been given a command line override */ | 629 | /* See if there has been given a command line override */ |
637 | if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) && | 630 | if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) && |
638 | cpu_online(reboot_cpu)) | 631 | cpu_online(reboot_cpu)) |
639 | reboot_cpu_id = reboot_cpu; | 632 | reboot_cpu_id = reboot_cpu; |
640 | #endif | ||
641 | 633 | ||
642 | /* Make certain the cpu I'm about to reboot on is online */ | 634 | /* Make certain the cpu I'm about to reboot on is online */ |
643 | if (!cpu_online(reboot_cpu_id)) | 635 | if (!cpu_online(reboot_cpu_id)) |
@@ -678,7 +670,7 @@ static void __machine_emergency_restart(int emergency) | |||
678 | 670 | ||
679 | static void native_machine_restart(char *__unused) | 671 | static void native_machine_restart(char *__unused) |
680 | { | 672 | { |
681 | printk("machine restart\n"); | 673 | pr_notice("machine restart\n"); |
682 | 674 | ||
683 | if (!reboot_force) | 675 | if (!reboot_force) |
684 | machine_shutdown(); | 676 | machine_shutdown(); |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 16be6dc14db1..f4b9b80e1b95 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -1031,8 +1031,6 @@ void __init setup_arch(char **cmdline_p) | |||
1031 | 1031 | ||
1032 | x86_init.timers.wallclock_init(); | 1032 | x86_init.timers.wallclock_init(); |
1033 | 1033 | ||
1034 | x86_platform.wallclock_init(); | ||
1035 | |||
1036 | mcheck_init(); | 1034 | mcheck_init(); |
1037 | 1035 | ||
1038 | arch_init_ideal_nops(); | 1036 | arch_init_ideal_nops(); |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 5a98aa272184..5cdff0357746 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <asm/cpu.h> | 21 | #include <asm/cpu.h> |
22 | #include <asm/stackprotector.h> | 22 | #include <asm/stackprotector.h> |
23 | 23 | ||
24 | DEFINE_PER_CPU(int, cpu_number); | 24 | DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); |
25 | EXPORT_PER_CPU_SYMBOL(cpu_number); | 25 | EXPORT_PER_CPU_SYMBOL(cpu_number); |
26 | 26 | ||
27 | #ifdef CONFIG_X86_64 | 27 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 21af737053aa..b280908a376e 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -6,6 +6,9 @@ | |||
6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | 6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes |
7 | * 2000-2002 x86-64 support by Andi Kleen | 7 | * 2000-2002 x86-64 support by Andi Kleen |
8 | */ | 8 | */ |
9 | |||
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | |||
9 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
10 | #include <linux/mm.h> | 13 | #include <linux/mm.h> |
11 | #include <linux/smp.h> | 14 | #include <linux/smp.h> |
@@ -814,7 +817,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | |||
814 | me->comm, me->pid, where, frame, | 817 | me->comm, me->pid, where, frame, |
815 | regs->ip, regs->sp, regs->orig_ax); | 818 | regs->ip, regs->sp, regs->orig_ax); |
816 | print_vma_addr(" in ", regs->ip); | 819 | print_vma_addr(" in ", regs->ip); |
817 | printk(KERN_CONT "\n"); | 820 | pr_cont("\n"); |
818 | } | 821 | } |
819 | 822 | ||
820 | force_sig(SIGSEGV, me); | 823 | force_sig(SIGSEGV, me); |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7bd8a0823654..7c5a8c314c02 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * x86 SMP booting functions | 2 | * x86 SMP booting functions |
3 | * | 3 | * |
4 | * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> | 4 | * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> |
@@ -39,6 +39,8 @@ | |||
39 | * Glauber Costa : i386 and x86_64 integration | 39 | * Glauber Costa : i386 and x86_64 integration |
40 | */ | 40 | */ |
41 | 41 | ||
42 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
43 | |||
42 | #include <linux/init.h> | 44 | #include <linux/init.h> |
43 | #include <linux/smp.h> | 45 | #include <linux/smp.h> |
44 | #include <linux/module.h> | 46 | #include <linux/module.h> |
@@ -104,17 +106,17 @@ int smp_num_siblings = 1; | |||
104 | EXPORT_SYMBOL(smp_num_siblings); | 106 | EXPORT_SYMBOL(smp_num_siblings); |
105 | 107 | ||
106 | /* Last level cache ID of each logical CPU */ | 108 | /* Last level cache ID of each logical CPU */ |
107 | DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; | 109 | DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; |
108 | 110 | ||
109 | /* representing HT siblings of each logical CPU */ | 111 | /* representing HT siblings of each logical CPU */ |
110 | DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); | 112 | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); |
111 | EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); | 113 | EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); |
112 | 114 | ||
113 | /* representing HT and core siblings of each logical CPU */ | 115 | /* representing HT and core siblings of each logical CPU */ |
114 | DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); | 116 | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); |
115 | EXPORT_PER_CPU_SYMBOL(cpu_core_map); | 117 | EXPORT_PER_CPU_SYMBOL(cpu_core_map); |
116 | 118 | ||
117 | DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); | 119 | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); |
118 | 120 | ||
119 | /* Per CPU bogomips and other parameters */ | 121 | /* Per CPU bogomips and other parameters */ |
120 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); | 122 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); |
@@ -184,7 +186,7 @@ static void __cpuinit smp_callin(void) | |||
184 | * boards) | 186 | * boards) |
185 | */ | 187 | */ |
186 | 188 | ||
187 | pr_debug("CALLIN, before setup_local_APIC().\n"); | 189 | pr_debug("CALLIN, before setup_local_APIC()\n"); |
188 | if (apic->smp_callin_clear_local_apic) | 190 | if (apic->smp_callin_clear_local_apic) |
189 | apic->smp_callin_clear_local_apic(); | 191 | apic->smp_callin_clear_local_apic(); |
190 | setup_local_APIC(); | 192 | setup_local_APIC(); |
@@ -255,22 +257,13 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
255 | check_tsc_sync_target(); | 257 | check_tsc_sync_target(); |
256 | 258 | ||
257 | /* | 259 | /* |
258 | * We need to hold call_lock, so there is no inconsistency | ||
259 | * between the time smp_call_function() determines number of | ||
260 | * IPI recipients, and the time when the determination is made | ||
261 | * for which cpus receive the IPI. Holding this | ||
262 | * lock helps us to not include this cpu in a currently in progress | ||
263 | * smp_call_function(). | ||
264 | * | ||
265 | * We need to hold vector_lock so there the set of online cpus | 260 | * We need to hold vector_lock so there the set of online cpus |
266 | * does not change while we are assigning vectors to cpus. Holding | 261 | * does not change while we are assigning vectors to cpus. Holding |
267 | * this lock ensures we don't half assign or remove an irq from a cpu. | 262 | * this lock ensures we don't half assign or remove an irq from a cpu. |
268 | */ | 263 | */ |
269 | ipi_call_lock(); | ||
270 | lock_vector_lock(); | 264 | lock_vector_lock(); |
271 | set_cpu_online(smp_processor_id(), true); | 265 | set_cpu_online(smp_processor_id(), true); |
272 | unlock_vector_lock(); | 266 | unlock_vector_lock(); |
273 | ipi_call_unlock(); | ||
274 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 267 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; |
275 | x86_platform.nmi_init(); | 268 | x86_platform.nmi_init(); |
276 | 269 | ||
@@ -432,17 +425,16 @@ static void impress_friends(void) | |||
432 | /* | 425 | /* |
433 | * Allow the user to impress friends. | 426 | * Allow the user to impress friends. |
434 | */ | 427 | */ |
435 | pr_debug("Before bogomips.\n"); | 428 | pr_debug("Before bogomips\n"); |
436 | for_each_possible_cpu(cpu) | 429 | for_each_possible_cpu(cpu) |
437 | if (cpumask_test_cpu(cpu, cpu_callout_mask)) | 430 | if (cpumask_test_cpu(cpu, cpu_callout_mask)) |
438 | bogosum += cpu_data(cpu).loops_per_jiffy; | 431 | bogosum += cpu_data(cpu).loops_per_jiffy; |
439 | printk(KERN_INFO | 432 | pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n", |
440 | "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", | ||
441 | num_online_cpus(), | 433 | num_online_cpus(), |
442 | bogosum/(500000/HZ), | 434 | bogosum/(500000/HZ), |
443 | (bogosum/(5000/HZ))%100); | 435 | (bogosum/(5000/HZ))%100); |
444 | 436 | ||
445 | pr_debug("Before bogocount - setting activated=1.\n"); | 437 | pr_debug("Before bogocount - setting activated=1\n"); |
446 | } | 438 | } |
447 | 439 | ||
448 | void __inquire_remote_apic(int apicid) | 440 | void __inquire_remote_apic(int apicid) |
@@ -452,18 +444,17 @@ void __inquire_remote_apic(int apicid) | |||
452 | int timeout; | 444 | int timeout; |
453 | u32 status; | 445 | u32 status; |
454 | 446 | ||
455 | printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid); | 447 | pr_info("Inquiring remote APIC 0x%x...\n", apicid); |
456 | 448 | ||
457 | for (i = 0; i < ARRAY_SIZE(regs); i++) { | 449 | for (i = 0; i < ARRAY_SIZE(regs); i++) { |
458 | printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]); | 450 | pr_info("... APIC 0x%x %s: ", apicid, names[i]); |
459 | 451 | ||
460 | /* | 452 | /* |
461 | * Wait for idle. | 453 | * Wait for idle. |
462 | */ | 454 | */ |
463 | status = safe_apic_wait_icr_idle(); | 455 | status = safe_apic_wait_icr_idle(); |
464 | if (status) | 456 | if (status) |
465 | printk(KERN_CONT | 457 | pr_cont("a previous APIC delivery may have failed\n"); |
466 | "a previous APIC delivery may have failed\n"); | ||
467 | 458 | ||
468 | apic_icr_write(APIC_DM_REMRD | regs[i], apicid); | 459 | apic_icr_write(APIC_DM_REMRD | regs[i], apicid); |
469 | 460 | ||
@@ -476,10 +467,10 @@ void __inquire_remote_apic(int apicid) | |||
476 | switch (status) { | 467 | switch (status) { |
477 | case APIC_ICR_RR_VALID: | 468 | case APIC_ICR_RR_VALID: |
478 | status = apic_read(APIC_RRR); | 469 | status = apic_read(APIC_RRR); |
479 | printk(KERN_CONT "%08x\n", status); | 470 | pr_cont("%08x\n", status); |
480 | break; | 471 | break; |
481 | default: | 472 | default: |
482 | printk(KERN_CONT "failed\n"); | 473 | pr_cont("failed\n"); |
483 | } | 474 | } |
484 | } | 475 | } |
485 | } | 476 | } |
@@ -513,12 +504,12 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) | |||
513 | apic_write(APIC_ESR, 0); | 504 | apic_write(APIC_ESR, 0); |
514 | accept_status = (apic_read(APIC_ESR) & 0xEF); | 505 | accept_status = (apic_read(APIC_ESR) & 0xEF); |
515 | } | 506 | } |
516 | pr_debug("NMI sent.\n"); | 507 | pr_debug("NMI sent\n"); |
517 | 508 | ||
518 | if (send_status) | 509 | if (send_status) |
519 | printk(KERN_ERR "APIC never delivered???\n"); | 510 | pr_err("APIC never delivered???\n"); |
520 | if (accept_status) | 511 | if (accept_status) |
521 | printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); | 512 | pr_err("APIC delivery error (%lx)\n", accept_status); |
522 | 513 | ||
523 | return (send_status | accept_status); | 514 | return (send_status | accept_status); |
524 | } | 515 | } |
@@ -540,7 +531,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) | |||
540 | apic_read(APIC_ESR); | 531 | apic_read(APIC_ESR); |
541 | } | 532 | } |
542 | 533 | ||
543 | pr_debug("Asserting INIT.\n"); | 534 | pr_debug("Asserting INIT\n"); |
544 | 535 | ||
545 | /* | 536 | /* |
546 | * Turn INIT on target chip | 537 | * Turn INIT on target chip |
@@ -556,7 +547,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) | |||
556 | 547 | ||
557 | mdelay(10); | 548 | mdelay(10); |
558 | 549 | ||
559 | pr_debug("Deasserting INIT.\n"); | 550 | pr_debug("Deasserting INIT\n"); |
560 | 551 | ||
561 | /* Target chip */ | 552 | /* Target chip */ |
562 | /* Send IPI */ | 553 | /* Send IPI */ |
@@ -589,14 +580,14 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) | |||
589 | /* | 580 | /* |
590 | * Run STARTUP IPI loop. | 581 | * Run STARTUP IPI loop. |
591 | */ | 582 | */ |
592 | pr_debug("#startup loops: %d.\n", num_starts); | 583 | pr_debug("#startup loops: %d\n", num_starts); |
593 | 584 | ||
594 | for (j = 1; j <= num_starts; j++) { | 585 | for (j = 1; j <= num_starts; j++) { |
595 | pr_debug("Sending STARTUP #%d.\n", j); | 586 | pr_debug("Sending STARTUP #%d\n", j); |
596 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | 587 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
597 | apic_write(APIC_ESR, 0); | 588 | apic_write(APIC_ESR, 0); |
598 | apic_read(APIC_ESR); | 589 | apic_read(APIC_ESR); |
599 | pr_debug("After apic_write.\n"); | 590 | pr_debug("After apic_write\n"); |
600 | 591 | ||
601 | /* | 592 | /* |
602 | * STARTUP IPI | 593 | * STARTUP IPI |
@@ -613,7 +604,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) | |||
613 | */ | 604 | */ |
614 | udelay(300); | 605 | udelay(300); |
615 | 606 | ||
616 | pr_debug("Startup point 1.\n"); | 607 | pr_debug("Startup point 1\n"); |
617 | 608 | ||
618 | pr_debug("Waiting for send to finish...\n"); | 609 | pr_debug("Waiting for send to finish...\n"); |
619 | send_status = safe_apic_wait_icr_idle(); | 610 | send_status = safe_apic_wait_icr_idle(); |
@@ -628,12 +619,12 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) | |||
628 | if (send_status || accept_status) | 619 | if (send_status || accept_status) |
629 | break; | 620 | break; |
630 | } | 621 | } |
631 | pr_debug("After Startup.\n"); | 622 | pr_debug("After Startup\n"); |
632 | 623 | ||
633 | if (send_status) | 624 | if (send_status) |
634 | printk(KERN_ERR "APIC never delivered???\n"); | 625 | pr_err("APIC never delivered???\n"); |
635 | if (accept_status) | 626 | if (accept_status) |
636 | printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); | 627 | pr_err("APIC delivery error (%lx)\n", accept_status); |
637 | 628 | ||
638 | return (send_status | accept_status); | 629 | return (send_status | accept_status); |
639 | } | 630 | } |
@@ -647,11 +638,11 @@ static void __cpuinit announce_cpu(int cpu, int apicid) | |||
647 | if (system_state == SYSTEM_BOOTING) { | 638 | if (system_state == SYSTEM_BOOTING) { |
648 | if (node != current_node) { | 639 | if (node != current_node) { |
649 | if (current_node > (-1)) | 640 | if (current_node > (-1)) |
650 | pr_cont(" Ok.\n"); | 641 | pr_cont(" OK\n"); |
651 | current_node = node; | 642 | current_node = node; |
652 | pr_info("Booting Node %3d, Processors ", node); | 643 | pr_info("Booting Node %3d, Processors ", node); |
653 | } | 644 | } |
654 | pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : ""); | 645 | pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : ""); |
655 | return; | 646 | return; |
656 | } else | 647 | } else |
657 | pr_info("Booting Node %d Processor %d APIC 0x%x\n", | 648 | pr_info("Booting Node %d Processor %d APIC 0x%x\n", |
@@ -731,9 +722,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
731 | /* | 722 | /* |
732 | * allow APs to start initializing. | 723 | * allow APs to start initializing. |
733 | */ | 724 | */ |
734 | pr_debug("Before Callout %d.\n", cpu); | 725 | pr_debug("Before Callout %d\n", cpu); |
735 | cpumask_set_cpu(cpu, cpu_callout_mask); | 726 | cpumask_set_cpu(cpu, cpu_callout_mask); |
736 | pr_debug("After Callout %d.\n", cpu); | 727 | pr_debug("After Callout %d\n", cpu); |
737 | 728 | ||
738 | /* | 729 | /* |
739 | * Wait 5s total for a response | 730 | * Wait 5s total for a response |
@@ -761,7 +752,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
761 | pr_err("CPU%d: Stuck ??\n", cpu); | 752 | pr_err("CPU%d: Stuck ??\n", cpu); |
762 | else | 753 | else |
763 | /* trampoline code not run */ | 754 | /* trampoline code not run */ |
764 | pr_err("CPU%d: Not responding.\n", cpu); | 755 | pr_err("CPU%d: Not responding\n", cpu); |
765 | if (apic->inquire_remote_apic) | 756 | if (apic->inquire_remote_apic) |
766 | apic->inquire_remote_apic(apicid); | 757 | apic->inquire_remote_apic(apicid); |
767 | } | 758 | } |
@@ -806,7 +797,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
806 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || | 797 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || |
807 | !physid_isset(apicid, phys_cpu_present_map) || | 798 | !physid_isset(apicid, phys_cpu_present_map) || |
808 | !apic->apic_id_valid(apicid)) { | 799 | !apic->apic_id_valid(apicid)) { |
809 | printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); | 800 | pr_err("%s: bad cpu %d\n", __func__, cpu); |
810 | return -EINVAL; | 801 | return -EINVAL; |
811 | } | 802 | } |
812 | 803 | ||
@@ -887,9 +878,8 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
887 | unsigned int cpu; | 878 | unsigned int cpu; |
888 | unsigned nr; | 879 | unsigned nr; |
889 | 880 | ||
890 | printk(KERN_WARNING | 881 | pr_warn("More than 8 CPUs detected - skipping them\n" |
891 | "More than 8 CPUs detected - skipping them.\n" | 882 | "Use CONFIG_X86_BIGSMP\n"); |
892 | "Use CONFIG_X86_BIGSMP.\n"); | ||
893 | 883 | ||
894 | nr = 0; | 884 | nr = 0; |
895 | for_each_present_cpu(cpu) { | 885 | for_each_present_cpu(cpu) { |
@@ -910,8 +900,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
910 | #endif | 900 | #endif |
911 | 901 | ||
912 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { | 902 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { |
913 | printk(KERN_WARNING | 903 | pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n", |
914 | "weird, boot CPU (#%d) not listed by the BIOS.\n", | ||
915 | hard_smp_processor_id()); | 904 | hard_smp_processor_id()); |
916 | 905 | ||
917 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); | 906 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); |
@@ -923,11 +912,10 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
923 | */ | 912 | */ |
924 | if (!smp_found_config && !acpi_lapic) { | 913 | if (!smp_found_config && !acpi_lapic) { |
925 | preempt_enable(); | 914 | preempt_enable(); |
926 | printk(KERN_NOTICE "SMP motherboard not detected.\n"); | 915 | pr_notice("SMP motherboard not detected\n"); |
927 | disable_smp(); | 916 | disable_smp(); |
928 | if (APIC_init_uniprocessor()) | 917 | if (APIC_init_uniprocessor()) |
929 | printk(KERN_NOTICE "Local APIC not detected." | 918 | pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); |
930 | " Using dummy APIC emulation.\n"); | ||
931 | return -1; | 919 | return -1; |
932 | } | 920 | } |
933 | 921 | ||
@@ -936,9 +924,8 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
936 | * CPU too, but we do it for the sake of robustness anyway. | 924 | * CPU too, but we do it for the sake of robustness anyway. |
937 | */ | 925 | */ |
938 | if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { | 926 | if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { |
939 | printk(KERN_NOTICE | 927 | pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n", |
940 | "weird, boot CPU (#%d) not listed by the BIOS.\n", | 928 | boot_cpu_physical_apicid); |
941 | boot_cpu_physical_apicid); | ||
942 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); | 929 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); |
943 | } | 930 | } |
944 | preempt_enable(); | 931 | preempt_enable(); |
@@ -951,8 +938,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
951 | if (!disable_apic) { | 938 | if (!disable_apic) { |
952 | pr_err("BIOS bug, local APIC #%d not detected!...\n", | 939 | pr_err("BIOS bug, local APIC #%d not detected!...\n", |
953 | boot_cpu_physical_apicid); | 940 | boot_cpu_physical_apicid); |
954 | pr_err("... forcing use of dummy APIC emulation." | 941 | pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n"); |
955 | "(tell your hw vendor)\n"); | ||
956 | } | 942 | } |
957 | smpboot_clear_io_apic(); | 943 | smpboot_clear_io_apic(); |
958 | disable_ioapic_support(); | 944 | disable_ioapic_support(); |
@@ -965,7 +951,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
965 | * If SMP should be disabled, then really disable it! | 951 | * If SMP should be disabled, then really disable it! |
966 | */ | 952 | */ |
967 | if (!max_cpus) { | 953 | if (!max_cpus) { |
968 | printk(KERN_INFO "SMP mode deactivated.\n"); | 954 | pr_info("SMP mode deactivated\n"); |
969 | smpboot_clear_io_apic(); | 955 | smpboot_clear_io_apic(); |
970 | 956 | ||
971 | connect_bsp_APIC(); | 957 | connect_bsp_APIC(); |
@@ -1017,7 +1003,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1017 | 1003 | ||
1018 | 1004 | ||
1019 | if (smp_sanity_check(max_cpus) < 0) { | 1005 | if (smp_sanity_check(max_cpus) < 0) { |
1020 | printk(KERN_INFO "SMP disabled\n"); | 1006 | pr_info("SMP disabled\n"); |
1021 | disable_smp(); | 1007 | disable_smp(); |
1022 | goto out; | 1008 | goto out; |
1023 | } | 1009 | } |
@@ -1055,7 +1041,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1055 | * Set up local APIC timer on boot CPU. | 1041 | * Set up local APIC timer on boot CPU. |
1056 | */ | 1042 | */ |
1057 | 1043 | ||
1058 | printk(KERN_INFO "CPU%d: ", 0); | 1044 | pr_info("CPU%d: ", 0); |
1059 | print_cpu_info(&cpu_data(0)); | 1045 | print_cpu_info(&cpu_data(0)); |
1060 | x86_init.timers.setup_percpu_clockev(); | 1046 | x86_init.timers.setup_percpu_clockev(); |
1061 | 1047 | ||
@@ -1105,7 +1091,7 @@ void __init native_smp_prepare_boot_cpu(void) | |||
1105 | 1091 | ||
1106 | void __init native_smp_cpus_done(unsigned int max_cpus) | 1092 | void __init native_smp_cpus_done(unsigned int max_cpus) |
1107 | { | 1093 | { |
1108 | pr_debug("Boot done.\n"); | 1094 | pr_debug("Boot done\n"); |
1109 | 1095 | ||
1110 | nmi_selftest(); | 1096 | nmi_selftest(); |
1111 | impress_friends(); | 1097 | impress_friends(); |
@@ -1166,8 +1152,7 @@ __init void prefill_possible_map(void) | |||
1166 | 1152 | ||
1167 | /* nr_cpu_ids could be reduced via nr_cpus= */ | 1153 | /* nr_cpu_ids could be reduced via nr_cpus= */ |
1168 | if (possible > nr_cpu_ids) { | 1154 | if (possible > nr_cpu_ids) { |
1169 | printk(KERN_WARNING | 1155 | pr_warn("%d Processors exceeds NR_CPUS limit of %d\n", |
1170 | "%d Processors exceeds NR_CPUS limit of %d\n", | ||
1171 | possible, nr_cpu_ids); | 1156 | possible, nr_cpu_ids); |
1172 | possible = nr_cpu_ids; | 1157 | possible = nr_cpu_ids; |
1173 | } | 1158 | } |
@@ -1176,13 +1161,12 @@ __init void prefill_possible_map(void) | |||
1176 | if (!setup_max_cpus) | 1161 | if (!setup_max_cpus) |
1177 | #endif | 1162 | #endif |
1178 | if (possible > i) { | 1163 | if (possible > i) { |
1179 | printk(KERN_WARNING | 1164 | pr_warn("%d Processors exceeds max_cpus limit of %u\n", |
1180 | "%d Processors exceeds max_cpus limit of %u\n", | ||
1181 | possible, setup_max_cpus); | 1165 | possible, setup_max_cpus); |
1182 | possible = i; | 1166 | possible = i; |
1183 | } | 1167 | } |
1184 | 1168 | ||
1185 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", | 1169 | pr_info("Allowing %d CPUs, %d hotplug CPUs\n", |
1186 | possible, max_t(int, possible - num_processors, 0)); | 1170 | possible, max_t(int, possible - num_processors, 0)); |
1187 | 1171 | ||
1188 | for (i = 0; i < possible; i++) | 1172 | for (i = 0; i < possible; i++) |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 05b31d92f69c..b481341c9369 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -9,6 +9,9 @@ | |||
9 | /* | 9 | /* |
10 | * Handle hardware traps and faults. | 10 | * Handle hardware traps and faults. |
11 | */ | 11 | */ |
12 | |||
13 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
14 | |||
12 | #include <linux/interrupt.h> | 15 | #include <linux/interrupt.h> |
13 | #include <linux/kallsyms.h> | 16 | #include <linux/kallsyms.h> |
14 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
@@ -143,12 +146,11 @@ trap_signal: | |||
143 | #ifdef CONFIG_X86_64 | 146 | #ifdef CONFIG_X86_64 |
144 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && | 147 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && |
145 | printk_ratelimit()) { | 148 | printk_ratelimit()) { |
146 | printk(KERN_INFO | 149 | pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx", |
147 | "%s[%d] trap %s ip:%lx sp:%lx error:%lx", | 150 | tsk->comm, tsk->pid, str, |
148 | tsk->comm, tsk->pid, str, | 151 | regs->ip, regs->sp, error_code); |
149 | regs->ip, regs->sp, error_code); | ||
150 | print_vma_addr(" in ", regs->ip); | 152 | print_vma_addr(" in ", regs->ip); |
151 | printk("\n"); | 153 | pr_cont("\n"); |
152 | } | 154 | } |
153 | #endif | 155 | #endif |
154 | 156 | ||
@@ -269,12 +271,11 @@ do_general_protection(struct pt_regs *regs, long error_code) | |||
269 | 271 | ||
270 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | 272 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && |
271 | printk_ratelimit()) { | 273 | printk_ratelimit()) { |
272 | printk(KERN_INFO | 274 | pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx", |
273 | "%s[%d] general protection ip:%lx sp:%lx error:%lx", | ||
274 | tsk->comm, task_pid_nr(tsk), | 275 | tsk->comm, task_pid_nr(tsk), |
275 | regs->ip, regs->sp, error_code); | 276 | regs->ip, regs->sp, error_code); |
276 | print_vma_addr(" in ", regs->ip); | 277 | print_vma_addr(" in ", regs->ip); |
277 | printk("\n"); | 278 | pr_cont("\n"); |
278 | } | 279 | } |
279 | 280 | ||
280 | force_sig(SIGSEGV, tsk); | 281 | force_sig(SIGSEGV, tsk); |
@@ -570,7 +571,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) | |||
570 | conditional_sti(regs); | 571 | conditional_sti(regs); |
571 | #if 0 | 572 | #if 0 |
572 | /* No need to warn about this any longer. */ | 573 | /* No need to warn about this any longer. */ |
573 | printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); | 574 | pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); |
574 | #endif | 575 | #endif |
575 | } | 576 | } |
576 | 577 | ||
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index fc0a147e3727..cfa5d4f7ca56 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -1,3 +1,5 @@ | |||
1 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
2 | |||
1 | #include <linux/kernel.h> | 3 | #include <linux/kernel.h> |
2 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
3 | #include <linux/init.h> | 5 | #include <linux/init.h> |
@@ -84,8 +86,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable); | |||
84 | #ifdef CONFIG_X86_TSC | 86 | #ifdef CONFIG_X86_TSC |
85 | int __init notsc_setup(char *str) | 87 | int __init notsc_setup(char *str) |
86 | { | 88 | { |
87 | printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " | 89 | pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n"); |
88 | "cannot disable TSC completely.\n"); | ||
89 | tsc_disabled = 1; | 90 | tsc_disabled = 1; |
90 | return 1; | 91 | return 1; |
91 | } | 92 | } |
@@ -373,7 +374,7 @@ static unsigned long quick_pit_calibrate(void) | |||
373 | goto success; | 374 | goto success; |
374 | } | 375 | } |
375 | } | 376 | } |
376 | printk("Fast TSC calibration failed\n"); | 377 | pr_err("Fast TSC calibration failed\n"); |
377 | return 0; | 378 | return 0; |
378 | 379 | ||
379 | success: | 380 | success: |
@@ -392,7 +393,7 @@ success: | |||
392 | */ | 393 | */ |
393 | delta *= PIT_TICK_RATE; | 394 | delta *= PIT_TICK_RATE; |
394 | do_div(delta, i*256*1000); | 395 | do_div(delta, i*256*1000); |
395 | printk("Fast TSC calibration using PIT\n"); | 396 | pr_info("Fast TSC calibration using PIT\n"); |
396 | return delta; | 397 | return delta; |
397 | } | 398 | } |
398 | 399 | ||
@@ -487,9 +488,8 @@ unsigned long native_calibrate_tsc(void) | |||
487 | * use the reference value, as it is more precise. | 488 | * use the reference value, as it is more precise. |
488 | */ | 489 | */ |
489 | if (delta >= 90 && delta <= 110) { | 490 | if (delta >= 90 && delta <= 110) { |
490 | printk(KERN_INFO | 491 | pr_info("PIT calibration matches %s. %d loops\n", |
491 | "TSC: PIT calibration matches %s. %d loops\n", | 492 | hpet ? "HPET" : "PMTIMER", i + 1); |
492 | hpet ? "HPET" : "PMTIMER", i + 1); | ||
493 | return tsc_ref_min; | 493 | return tsc_ref_min; |
494 | } | 494 | } |
495 | 495 | ||
@@ -511,38 +511,36 @@ unsigned long native_calibrate_tsc(void) | |||
511 | */ | 511 | */ |
512 | if (tsc_pit_min == ULONG_MAX) { | 512 | if (tsc_pit_min == ULONG_MAX) { |
513 | /* PIT gave no useful value */ | 513 | /* PIT gave no useful value */ |
514 | printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); | 514 | pr_warn("Unable to calibrate against PIT\n"); |
515 | 515 | ||
516 | /* We don't have an alternative source, disable TSC */ | 516 | /* We don't have an alternative source, disable TSC */ |
517 | if (!hpet && !ref1 && !ref2) { | 517 | if (!hpet && !ref1 && !ref2) { |
518 | printk("TSC: No reference (HPET/PMTIMER) available\n"); | 518 | pr_notice("No reference (HPET/PMTIMER) available\n"); |
519 | return 0; | 519 | return 0; |
520 | } | 520 | } |
521 | 521 | ||
522 | /* The alternative source failed as well, disable TSC */ | 522 | /* The alternative source failed as well, disable TSC */ |
523 | if (tsc_ref_min == ULONG_MAX) { | 523 | if (tsc_ref_min == ULONG_MAX) { |
524 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " | 524 | pr_warn("HPET/PMTIMER calibration failed\n"); |
525 | "failed.\n"); | ||
526 | return 0; | 525 | return 0; |
527 | } | 526 | } |
528 | 527 | ||
529 | /* Use the alternative source */ | 528 | /* Use the alternative source */ |
530 | printk(KERN_INFO "TSC: using %s reference calibration\n", | 529 | pr_info("using %s reference calibration\n", |
531 | hpet ? "HPET" : "PMTIMER"); | 530 | hpet ? "HPET" : "PMTIMER"); |
532 | 531 | ||
533 | return tsc_ref_min; | 532 | return tsc_ref_min; |
534 | } | 533 | } |
535 | 534 | ||
536 | /* We don't have an alternative source, use the PIT calibration value */ | 535 | /* We don't have an alternative source, use the PIT calibration value */ |
537 | if (!hpet && !ref1 && !ref2) { | 536 | if (!hpet && !ref1 && !ref2) { |
538 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); | 537 | pr_info("Using PIT calibration value\n"); |
539 | return tsc_pit_min; | 538 | return tsc_pit_min; |
540 | } | 539 | } |
541 | 540 | ||
542 | /* The alternative source failed, use the PIT calibration value */ | 541 | /* The alternative source failed, use the PIT calibration value */ |
543 | if (tsc_ref_min == ULONG_MAX) { | 542 | if (tsc_ref_min == ULONG_MAX) { |
544 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " | 543 | pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n"); |
545 | "Using PIT calibration\n"); | ||
546 | return tsc_pit_min; | 544 | return tsc_pit_min; |
547 | } | 545 | } |
548 | 546 | ||
@@ -551,9 +549,9 @@ unsigned long native_calibrate_tsc(void) | |||
551 | * the PIT value as we know that there are PMTIMERs around | 549 | * the PIT value as we know that there are PMTIMERs around |
552 | * running at double speed. At least we let the user know: | 550 | * running at double speed. At least we let the user know: |
553 | */ | 551 | */ |
554 | printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", | 552 | pr_warn("PIT calibration deviates from %s: %lu %lu\n", |
555 | hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); | 553 | hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); |
556 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); | 554 | pr_info("Using PIT calibration value\n"); |
557 | return tsc_pit_min; | 555 | return tsc_pit_min; |
558 | } | 556 | } |
559 | 557 | ||
@@ -785,7 +783,7 @@ void mark_tsc_unstable(char *reason) | |||
785 | tsc_unstable = 1; | 783 | tsc_unstable = 1; |
786 | sched_clock_stable = 0; | 784 | sched_clock_stable = 0; |
787 | disable_sched_clock_irqtime(); | 785 | disable_sched_clock_irqtime(); |
788 | printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); | 786 | pr_info("Marking TSC unstable due to %s\n", reason); |
789 | /* Change only the rating, when not registered */ | 787 | /* Change only the rating, when not registered */ |
790 | if (clocksource_tsc.mult) | 788 | if (clocksource_tsc.mult) |
791 | clocksource_mark_unstable(&clocksource_tsc); | 789 | clocksource_mark_unstable(&clocksource_tsc); |
@@ -912,9 +910,9 @@ static void tsc_refine_calibration_work(struct work_struct *work) | |||
912 | goto out; | 910 | goto out; |
913 | 911 | ||
914 | tsc_khz = freq; | 912 | tsc_khz = freq; |
915 | printk(KERN_INFO "Refined TSC clocksource calibration: " | 913 | pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n", |
916 | "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000, | 914 | (unsigned long)tsc_khz / 1000, |
917 | (unsigned long)tsc_khz % 1000); | 915 | (unsigned long)tsc_khz % 1000); |
918 | 916 | ||
919 | out: | 917 | out: |
920 | clocksource_register_khz(&clocksource_tsc, tsc_khz); | 918 | clocksource_register_khz(&clocksource_tsc, tsc_khz); |
@@ -970,9 +968,9 @@ void __init tsc_init(void) | |||
970 | return; | 968 | return; |
971 | } | 969 | } |
972 | 970 | ||
973 | printk("Detected %lu.%03lu MHz processor.\n", | 971 | pr_info("Detected %lu.%03lu MHz processor\n", |
974 | (unsigned long)cpu_khz / 1000, | 972 | (unsigned long)cpu_khz / 1000, |
975 | (unsigned long)cpu_khz % 1000); | 973 | (unsigned long)cpu_khz % 1000); |
976 | 974 | ||
977 | /* | 975 | /* |
978 | * Secondary CPUs do not run through tsc_init(), so set up | 976 | * Secondary CPUs do not run through tsc_init(), so set up |
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index dc4e910a7d96..36fd42091fa7 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
@@ -409,9 +409,10 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, | |||
409 | * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. | 409 | * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. |
410 | * @mm: the probed address space. | 410 | * @mm: the probed address space. |
411 | * @arch_uprobe: the probepoint information. | 411 | * @arch_uprobe: the probepoint information. |
412 | * @addr: virtual address at which to install the probepoint | ||
412 | * Return 0 on success or a -ve number on error. | 413 | * Return 0 on success or a -ve number on error. |
413 | */ | 414 | */ |
414 | int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm) | 415 | int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) |
415 | { | 416 | { |
416 | int ret; | 417 | int ret; |
417 | struct insn insn; | 418 | struct insn insn; |
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 255f58ae71e8..54abcc0baf23 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -28,6 +28,8 @@ | |||
28 | * | 28 | * |
29 | */ | 29 | */ |
30 | 30 | ||
31 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
32 | |||
31 | #include <linux/capability.h> | 33 | #include <linux/capability.h> |
32 | #include <linux/errno.h> | 34 | #include <linux/errno.h> |
33 | #include <linux/interrupt.h> | 35 | #include <linux/interrupt.h> |
@@ -137,14 +139,14 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) | |||
137 | local_irq_enable(); | 139 | local_irq_enable(); |
138 | 140 | ||
139 | if (!current->thread.vm86_info) { | 141 | if (!current->thread.vm86_info) { |
140 | printk("no vm86_info: BAD\n"); | 142 | pr_alert("no vm86_info: BAD\n"); |
141 | do_exit(SIGSEGV); | 143 | do_exit(SIGSEGV); |
142 | } | 144 | } |
143 | set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask); | 145 | set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask); |
144 | tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs); | 146 | tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs); |
145 | tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap); | 147 | tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap); |
146 | if (tmp) { | 148 | if (tmp) { |
147 | printk("vm86: could not access userspace vm86_info\n"); | 149 | pr_alert("could not access userspace vm86_info\n"); |
148 | do_exit(SIGSEGV); | 150 | do_exit(SIGSEGV); |
149 | } | 151 | } |
150 | 152 | ||
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 8eeb55a551b4..992f890283e9 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/pci_ids.h> | 16 | #include <linux/pci_ids.h> |
17 | #include <linux/pci_regs.h> | 17 | #include <linux/pci_regs.h> |
18 | #include <linux/smp.h> | 18 | #include <linux/smp.h> |
19 | #include <linux/irq.h> | ||
19 | 20 | ||
20 | #include <asm/apic.h> | 21 | #include <asm/apic.h> |
21 | #include <asm/pci-direct.h> | 22 | #include <asm/pci-direct.h> |
@@ -95,6 +96,18 @@ static void __init set_vsmp_pv_ops(void) | |||
95 | ctl = readl(address + 4); | 96 | ctl = readl(address + 4); |
96 | printk(KERN_INFO "vSMP CTL: capabilities:0x%08x control:0x%08x\n", | 97 | printk(KERN_INFO "vSMP CTL: capabilities:0x%08x control:0x%08x\n", |
97 | cap, ctl); | 98 | cap, ctl); |
99 | |||
100 | /* If possible, let the vSMP foundation route the interrupt optimally */ | ||
101 | #ifdef CONFIG_SMP | ||
102 | if (cap & ctl & BIT(8)) { | ||
103 | ctl &= ~BIT(8); | ||
104 | #ifdef CONFIG_PROC_FS | ||
105 | /* Don't let users change irq affinity via procfs */ | ||
106 | no_irq_affinity = 1; | ||
107 | #endif | ||
108 | } | ||
109 | #endif | ||
110 | |||
98 | if (cap & ctl & (1 << 4)) { | 111 | if (cap & ctl & (1 << 4)) { |
99 | /* Setup irq ops and turn on vSMP IRQ fastpath handling */ | 112 | /* Setup irq ops and turn on vSMP IRQ fastpath handling */ |
100 | pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); | 113 | pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); |
@@ -102,12 +115,11 @@ static void __init set_vsmp_pv_ops(void) | |||
102 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); | 115 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); |
103 | pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); | 116 | pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); |
104 | pv_init_ops.patch = vsmp_patch; | 117 | pv_init_ops.patch = vsmp_patch; |
105 | |||
106 | ctl &= ~(1 << 4); | 118 | ctl &= ~(1 << 4); |
107 | writel(ctl, address + 4); | ||
108 | ctl = readl(address + 4); | ||
109 | printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl); | ||
110 | } | 119 | } |
120 | writel(ctl, address + 4); | ||
121 | ctl = readl(address + 4); | ||
122 | pr_info("vSMP CTL: control set to:0x%08x\n", ctl); | ||
111 | 123 | ||
112 | early_iounmap(address, 8); | 124 | early_iounmap(address, 8); |
113 | } | 125 | } |
@@ -187,12 +199,36 @@ static void __init vsmp_cap_cpus(void) | |||
187 | #endif | 199 | #endif |
188 | } | 200 | } |
189 | 201 | ||
202 | static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) | ||
203 | { | ||
204 | return hard_smp_processor_id() >> index_msb; | ||
205 | } | ||
206 | |||
207 | /* | ||
208 | * In vSMP, all cpus should be capable of handling interrupts, regardless of | ||
209 | * the APIC used. | ||
210 | */ | ||
211 | static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask, | ||
212 | const struct cpumask *mask) | ||
213 | { | ||
214 | cpumask_setall(retmask); | ||
215 | } | ||
216 | |||
217 | static void vsmp_apic_post_init(void) | ||
218 | { | ||
219 | /* need to update phys_pkg_id */ | ||
220 | apic->phys_pkg_id = apicid_phys_pkg_id; | ||
221 | apic->vector_allocation_domain = fill_vector_allocation_domain; | ||
222 | } | ||
223 | |||
190 | void __init vsmp_init(void) | 224 | void __init vsmp_init(void) |
191 | { | 225 | { |
192 | detect_vsmp_box(); | 226 | detect_vsmp_box(); |
193 | if (!is_vsmp_box()) | 227 | if (!is_vsmp_box()) |
194 | return; | 228 | return; |
195 | 229 | ||
230 | x86_platform.apic_post_init = vsmp_apic_post_init; | ||
231 | |||
196 | vsmp_cap_cpus(); | 232 | vsmp_cap_cpus(); |
197 | 233 | ||
198 | set_vsmp_pv_ops(); | 234 | set_vsmp_pv_ops(); |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 5db36caf4289..8d141b309046 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -18,6 +18,8 @@ | |||
18 | * use the vDSO. | 18 | * use the vDSO. |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
22 | |||
21 | #include <linux/time.h> | 23 | #include <linux/time.h> |
22 | #include <linux/init.h> | 24 | #include <linux/init.h> |
23 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
@@ -111,18 +113,13 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, | |||
111 | static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, | 113 | static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, |
112 | const char *message) | 114 | const char *message) |
113 | { | 115 | { |
114 | static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); | 116 | if (!show_unhandled_signals) |
115 | struct task_struct *tsk; | ||
116 | |||
117 | if (!show_unhandled_signals || !__ratelimit(&rs)) | ||
118 | return; | 117 | return; |
119 | 118 | ||
120 | tsk = current; | 119 | pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", |
121 | 120 | level, current->comm, task_pid_nr(current), | |
122 | printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", | 121 | message, regs->ip, regs->cs, |
123 | level, tsk->comm, task_pid_nr(tsk), | 122 | regs->sp, regs->ax, regs->si, regs->di); |
124 | message, regs->ip, regs->cs, | ||
125 | regs->sp, regs->ax, regs->si, regs->di); | ||
126 | } | 123 | } |
127 | 124 | ||
128 | static int addr_to_vsyscall_nr(unsigned long addr) | 125 | static int addr_to_vsyscall_nr(unsigned long addr) |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 9796c2f3d074..6020f6f5927c 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -28,6 +28,7 @@ EXPORT_SYMBOL(__put_user_8); | |||
28 | 28 | ||
29 | EXPORT_SYMBOL(copy_user_generic_string); | 29 | EXPORT_SYMBOL(copy_user_generic_string); |
30 | EXPORT_SYMBOL(copy_user_generic_unrolled); | 30 | EXPORT_SYMBOL(copy_user_generic_unrolled); |
31 | EXPORT_SYMBOL(copy_user_enhanced_fast_string); | ||
31 | EXPORT_SYMBOL(__copy_user_nocache); | 32 | EXPORT_SYMBOL(__copy_user_nocache); |
32 | EXPORT_SYMBOL(_copy_from_user); | 33 | EXPORT_SYMBOL(_copy_from_user); |
33 | EXPORT_SYMBOL(_copy_to_user); | 34 | EXPORT_SYMBOL(_copy_to_user); |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 35c5e543f550..9f3167e891ef 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -29,7 +29,6 @@ void __init x86_init_uint_noop(unsigned int unused) { } | |||
29 | void __init x86_init_pgd_noop(pgd_t *unused) { } | 29 | void __init x86_init_pgd_noop(pgd_t *unused) { } |
30 | int __init iommu_init_noop(void) { return 0; } | 30 | int __init iommu_init_noop(void) { return 0; } |
31 | void iommu_shutdown_noop(void) { } | 31 | void iommu_shutdown_noop(void) { } |
32 | void wallclock_init_noop(void) { } | ||
33 | 32 | ||
34 | /* | 33 | /* |
35 | * The platform setup functions are preset with the default functions | 34 | * The platform setup functions are preset with the default functions |
@@ -101,7 +100,6 @@ static int default_i8042_detect(void) { return 1; }; | |||
101 | 100 | ||
102 | struct x86_platform_ops x86_platform = { | 101 | struct x86_platform_ops x86_platform = { |
103 | .calibrate_tsc = native_calibrate_tsc, | 102 | .calibrate_tsc = native_calibrate_tsc, |
104 | .wallclock_init = wallclock_init_noop, | ||
105 | .get_wallclock = mach_get_cmos_time, | 103 | .get_wallclock = mach_get_cmos_time, |
106 | .set_wallclock = mach_set_rtc_mmss, | 104 | .set_wallclock = mach_set_rtc_mmss, |
107 | .iommu_shutdown = iommu_shutdown_noop, | 105 | .iommu_shutdown = iommu_shutdown_noop, |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index bd18149b2b0f..3d3e20709119 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -3,6 +3,9 @@ | |||
3 | * | 3 | * |
4 | * Author: Suresh Siddha <suresh.b.siddha@intel.com> | 4 | * Author: Suresh Siddha <suresh.b.siddha@intel.com> |
5 | */ | 5 | */ |
6 | |||
7 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
8 | |||
6 | #include <linux/bootmem.h> | 9 | #include <linux/bootmem.h> |
7 | #include <linux/compat.h> | 10 | #include <linux/compat.h> |
8 | #include <asm/i387.h> | 11 | #include <asm/i387.h> |
@@ -162,7 +165,7 @@ int save_i387_xstate(void __user *buf) | |||
162 | BUG_ON(sig_xstate_size < xstate_size); | 165 | BUG_ON(sig_xstate_size < xstate_size); |
163 | 166 | ||
164 | if ((unsigned long)buf % 64) | 167 | if ((unsigned long)buf % 64) |
165 | printk("save_i387_xstate: bad fpstate %p\n", buf); | 168 | pr_err("%s: bad fpstate %p\n", __func__, buf); |
166 | 169 | ||
167 | if (!used_math()) | 170 | if (!used_math()) |
168 | return 0; | 171 | return 0; |
@@ -422,7 +425,7 @@ static void __init xstate_enable_boot_cpu(void) | |||
422 | pcntxt_mask = eax + ((u64)edx << 32); | 425 | pcntxt_mask = eax + ((u64)edx << 32); |
423 | 426 | ||
424 | if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { | 427 | if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { |
425 | printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n", | 428 | pr_err("FP/SSE not shown under xsave features 0x%llx\n", |
426 | pcntxt_mask); | 429 | pcntxt_mask); |
427 | BUG(); | 430 | BUG(); |
428 | } | 431 | } |
@@ -445,9 +448,8 @@ static void __init xstate_enable_boot_cpu(void) | |||
445 | 448 | ||
446 | setup_xstate_init(); | 449 | setup_xstate_init(); |
447 | 450 | ||
448 | printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, " | 451 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", |
449 | "cntxt size 0x%x\n", | 452 | pcntxt_mask, xstate_size); |
450 | pcntxt_mask, xstate_size); | ||
451 | } | 453 | } |
452 | 454 | ||
453 | /* | 455 | /* |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 7df1c6d839fb..0595f1397b7c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -201,6 +201,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
201 | unsigned f_lm = 0; | 201 | unsigned f_lm = 0; |
202 | #endif | 202 | #endif |
203 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; | 203 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; |
204 | unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; | ||
204 | 205 | ||
205 | /* cpuid 1.edx */ | 206 | /* cpuid 1.edx */ |
206 | const u32 kvm_supported_word0_x86_features = | 207 | const u32 kvm_supported_word0_x86_features = |
@@ -228,7 +229,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
228 | 0 /* DS-CPL, VMX, SMX, EST */ | | 229 | 0 /* DS-CPL, VMX, SMX, EST */ | |
229 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | | 230 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | |
230 | F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | | 231 | F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | |
231 | 0 /* Reserved, DCA */ | F(XMM4_1) | | 232 | F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) | |
232 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | | 233 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | |
233 | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | | 234 | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | |
234 | F(F16C) | F(RDRAND); | 235 | F(F16C) | F(RDRAND); |
@@ -248,7 +249,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
248 | /* cpuid 7.0.ebx */ | 249 | /* cpuid 7.0.ebx */ |
249 | const u32 kvm_supported_word9_x86_features = | 250 | const u32 kvm_supported_word9_x86_features = |
250 | F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | | 251 | F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | |
251 | F(BMI2) | F(ERMS) | F(RTM); | 252 | F(BMI2) | F(ERMS) | f_invpcid | F(RTM); |
252 | 253 | ||
253 | /* all calls to cpuid_count() should be made on the same cpu */ | 254 | /* all calls to cpuid_count() should be made on the same cpu */ |
254 | get_cpu(); | 255 | get_cpu(); |
@@ -409,6 +410,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
409 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | 410 | (1 << KVM_FEATURE_NOP_IO_DELAY) | |
410 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | 411 | (1 << KVM_FEATURE_CLOCKSOURCE2) | |
411 | (1 << KVM_FEATURE_ASYNC_PF) | | 412 | (1 << KVM_FEATURE_ASYNC_PF) | |
413 | (1 << KVM_FEATURE_PV_EOI) | | ||
412 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | 414 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); |
413 | 415 | ||
414 | if (sched_info_on()) | 416 | if (sched_info_on()) |
@@ -639,33 +641,37 @@ static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, | |||
639 | return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); | 641 | return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); |
640 | } | 642 | } |
641 | 643 | ||
642 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | 644 | void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) |
643 | { | 645 | { |
644 | u32 function, index; | 646 | u32 function = *eax, index = *ecx; |
645 | struct kvm_cpuid_entry2 *best; | 647 | struct kvm_cpuid_entry2 *best; |
646 | 648 | ||
647 | function = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
648 | index = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
649 | kvm_register_write(vcpu, VCPU_REGS_RAX, 0); | ||
650 | kvm_register_write(vcpu, VCPU_REGS_RBX, 0); | ||
651 | kvm_register_write(vcpu, VCPU_REGS_RCX, 0); | ||
652 | kvm_register_write(vcpu, VCPU_REGS_RDX, 0); | ||
653 | best = kvm_find_cpuid_entry(vcpu, function, index); | 649 | best = kvm_find_cpuid_entry(vcpu, function, index); |
654 | 650 | ||
655 | if (!best) | 651 | if (!best) |
656 | best = check_cpuid_limit(vcpu, function, index); | 652 | best = check_cpuid_limit(vcpu, function, index); |
657 | 653 | ||
658 | if (best) { | 654 | if (best) { |
659 | kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); | 655 | *eax = best->eax; |
660 | kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); | 656 | *ebx = best->ebx; |
661 | kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx); | 657 | *ecx = best->ecx; |
662 | kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx); | 658 | *edx = best->edx; |
663 | } | 659 | } else |
660 | *eax = *ebx = *ecx = *edx = 0; | ||
661 | } | ||
662 | |||
663 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | ||
664 | { | ||
665 | u32 function, eax, ebx, ecx, edx; | ||
666 | |||
667 | function = eax = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
668 | ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
669 | kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx); | ||
670 | kvm_register_write(vcpu, VCPU_REGS_RAX, eax); | ||
671 | kvm_register_write(vcpu, VCPU_REGS_RBX, ebx); | ||
672 | kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); | ||
673 | kvm_register_write(vcpu, VCPU_REGS_RDX, edx); | ||
664 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 674 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
665 | trace_kvm_cpuid(function, | 675 | trace_kvm_cpuid(function, eax, ebx, ecx, edx); |
666 | kvm_register_read(vcpu, VCPU_REGS_RAX), | ||
667 | kvm_register_read(vcpu, VCPU_REGS_RBX), | ||
668 | kvm_register_read(vcpu, VCPU_REGS_RCX), | ||
669 | kvm_register_read(vcpu, VCPU_REGS_RDX)); | ||
670 | } | 676 | } |
671 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | 677 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 26d1fb437eb5..a10e46016851 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -17,6 +17,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | |||
17 | int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | 17 | int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, |
18 | struct kvm_cpuid2 *cpuid, | 18 | struct kvm_cpuid2 *cpuid, |
19 | struct kvm_cpuid_entry2 __user *entries); | 19 | struct kvm_cpuid_entry2 __user *entries); |
20 | void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); | ||
20 | 21 | ||
21 | 22 | ||
22 | static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) | 23 | static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) |
@@ -51,4 +52,12 @@ static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu) | |||
51 | return best && (best->ecx & bit(X86_FEATURE_OSVW)); | 52 | return best && (best->ecx & bit(X86_FEATURE_OSVW)); |
52 | } | 53 | } |
53 | 54 | ||
55 | static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu) | ||
56 | { | ||
57 | struct kvm_cpuid_entry2 *best; | ||
58 | |||
59 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
60 | return best && (best->ecx & bit(X86_FEATURE_PCID)); | ||
61 | } | ||
62 | |||
54 | #endif | 63 | #endif |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index f95d242ee9f7..97d9a9914ba8 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -433,11 +433,32 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, | |||
433 | return ctxt->ops->intercept(ctxt, &info, stage); | 433 | return ctxt->ops->intercept(ctxt, &info, stage); |
434 | } | 434 | } |
435 | 435 | ||
436 | static void assign_masked(ulong *dest, ulong src, ulong mask) | ||
437 | { | ||
438 | *dest = (*dest & ~mask) | (src & mask); | ||
439 | } | ||
440 | |||
436 | static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt) | 441 | static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt) |
437 | { | 442 | { |
438 | return (1UL << (ctxt->ad_bytes << 3)) - 1; | 443 | return (1UL << (ctxt->ad_bytes << 3)) - 1; |
439 | } | 444 | } |
440 | 445 | ||
446 | static ulong stack_mask(struct x86_emulate_ctxt *ctxt) | ||
447 | { | ||
448 | u16 sel; | ||
449 | struct desc_struct ss; | ||
450 | |||
451 | if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
452 | return ~0UL; | ||
453 | ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS); | ||
454 | return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */ | ||
455 | } | ||
456 | |||
457 | static int stack_size(struct x86_emulate_ctxt *ctxt) | ||
458 | { | ||
459 | return (__fls(stack_mask(ctxt)) + 1) >> 3; | ||
460 | } | ||
461 | |||
441 | /* Access/update address held in a register, based on addressing mode. */ | 462 | /* Access/update address held in a register, based on addressing mode. */ |
442 | static inline unsigned long | 463 | static inline unsigned long |
443 | address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg) | 464 | address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg) |
@@ -958,6 +979,12 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | |||
958 | op->orig_val = op->val; | 979 | op->orig_val = op->val; |
959 | } | 980 | } |
960 | 981 | ||
982 | static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg) | ||
983 | { | ||
984 | if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP) | ||
985 | ctxt->modrm_seg = VCPU_SREG_SS; | ||
986 | } | ||
987 | |||
961 | static int decode_modrm(struct x86_emulate_ctxt *ctxt, | 988 | static int decode_modrm(struct x86_emulate_ctxt *ctxt, |
962 | struct operand *op) | 989 | struct operand *op) |
963 | { | 990 | { |
@@ -1061,15 +1088,20 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
1061 | 1088 | ||
1062 | if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0) | 1089 | if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0) |
1063 | modrm_ea += insn_fetch(s32, ctxt); | 1090 | modrm_ea += insn_fetch(s32, ctxt); |
1064 | else | 1091 | else { |
1065 | modrm_ea += ctxt->regs[base_reg]; | 1092 | modrm_ea += ctxt->regs[base_reg]; |
1093 | adjust_modrm_seg(ctxt, base_reg); | ||
1094 | } | ||
1066 | if (index_reg != 4) | 1095 | if (index_reg != 4) |
1067 | modrm_ea += ctxt->regs[index_reg] << scale; | 1096 | modrm_ea += ctxt->regs[index_reg] << scale; |
1068 | } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) { | 1097 | } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) { |
1069 | if (ctxt->mode == X86EMUL_MODE_PROT64) | 1098 | if (ctxt->mode == X86EMUL_MODE_PROT64) |
1070 | ctxt->rip_relative = 1; | 1099 | ctxt->rip_relative = 1; |
1071 | } else | 1100 | } else { |
1072 | modrm_ea += ctxt->regs[ctxt->modrm_rm]; | 1101 | base_reg = ctxt->modrm_rm; |
1102 | modrm_ea += ctxt->regs[base_reg]; | ||
1103 | adjust_modrm_seg(ctxt, base_reg); | ||
1104 | } | ||
1073 | switch (ctxt->modrm_mod) { | 1105 | switch (ctxt->modrm_mod) { |
1074 | case 0: | 1106 | case 0: |
1075 | if (ctxt->modrm_rm == 5) | 1107 | if (ctxt->modrm_rm == 5) |
@@ -1264,7 +1296,8 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | |||
1264 | 1296 | ||
1265 | /* allowed just for 8 bytes segments */ | 1297 | /* allowed just for 8 bytes segments */ |
1266 | static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 1298 | static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, |
1267 | u16 selector, struct desc_struct *desc) | 1299 | u16 selector, struct desc_struct *desc, |
1300 | ulong *desc_addr_p) | ||
1268 | { | 1301 | { |
1269 | struct desc_ptr dt; | 1302 | struct desc_ptr dt; |
1270 | u16 index = selector >> 3; | 1303 | u16 index = selector >> 3; |
@@ -1275,7 +1308,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1275 | if (dt.size < index * 8 + 7) | 1308 | if (dt.size < index * 8 + 7) |
1276 | return emulate_gp(ctxt, selector & 0xfffc); | 1309 | return emulate_gp(ctxt, selector & 0xfffc); |
1277 | 1310 | ||
1278 | addr = dt.address + index * 8; | 1311 | *desc_addr_p = addr = dt.address + index * 8; |
1279 | return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc, | 1312 | return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc, |
1280 | &ctxt->exception); | 1313 | &ctxt->exception); |
1281 | } | 1314 | } |
@@ -1302,11 +1335,12 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1302 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 1335 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, |
1303 | u16 selector, int seg) | 1336 | u16 selector, int seg) |
1304 | { | 1337 | { |
1305 | struct desc_struct seg_desc; | 1338 | struct desc_struct seg_desc, old_desc; |
1306 | u8 dpl, rpl, cpl; | 1339 | u8 dpl, rpl, cpl; |
1307 | unsigned err_vec = GP_VECTOR; | 1340 | unsigned err_vec = GP_VECTOR; |
1308 | u32 err_code = 0; | 1341 | u32 err_code = 0; |
1309 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | 1342 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ |
1343 | ulong desc_addr; | ||
1310 | int ret; | 1344 | int ret; |
1311 | 1345 | ||
1312 | memset(&seg_desc, 0, sizeof seg_desc); | 1346 | memset(&seg_desc, 0, sizeof seg_desc); |
@@ -1324,8 +1358,14 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1324 | goto load; | 1358 | goto load; |
1325 | } | 1359 | } |
1326 | 1360 | ||
1327 | /* NULL selector is not valid for TR, CS and SS */ | 1361 | rpl = selector & 3; |
1328 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) | 1362 | cpl = ctxt->ops->cpl(ctxt); |
1363 | |||
1364 | /* NULL selector is not valid for TR, CS and SS (except for long mode) */ | ||
1365 | if ((seg == VCPU_SREG_CS | ||
1366 | || (seg == VCPU_SREG_SS | ||
1367 | && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)) | ||
1368 | || seg == VCPU_SREG_TR) | ||
1329 | && null_selector) | 1369 | && null_selector) |
1330 | goto exception; | 1370 | goto exception; |
1331 | 1371 | ||
@@ -1336,7 +1376,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1336 | if (null_selector) /* for NULL selector skip all following checks */ | 1376 | if (null_selector) /* for NULL selector skip all following checks */ |
1337 | goto load; | 1377 | goto load; |
1338 | 1378 | ||
1339 | ret = read_segment_descriptor(ctxt, selector, &seg_desc); | 1379 | ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr); |
1340 | if (ret != X86EMUL_CONTINUE) | 1380 | if (ret != X86EMUL_CONTINUE) |
1341 | return ret; | 1381 | return ret; |
1342 | 1382 | ||
@@ -1352,9 +1392,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1352 | goto exception; | 1392 | goto exception; |
1353 | } | 1393 | } |
1354 | 1394 | ||
1355 | rpl = selector & 3; | ||
1356 | dpl = seg_desc.dpl; | 1395 | dpl = seg_desc.dpl; |
1357 | cpl = ctxt->ops->cpl(ctxt); | ||
1358 | 1396 | ||
1359 | switch (seg) { | 1397 | switch (seg) { |
1360 | case VCPU_SREG_SS: | 1398 | case VCPU_SREG_SS: |
@@ -1384,6 +1422,12 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1384 | case VCPU_SREG_TR: | 1422 | case VCPU_SREG_TR: |
1385 | if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) | 1423 | if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) |
1386 | goto exception; | 1424 | goto exception; |
1425 | old_desc = seg_desc; | ||
1426 | seg_desc.type |= 2; /* busy */ | ||
1427 | ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc, | ||
1428 | sizeof(seg_desc), &ctxt->exception); | ||
1429 | if (ret != X86EMUL_CONTINUE) | ||
1430 | return ret; | ||
1387 | break; | 1431 | break; |
1388 | case VCPU_SREG_LDTR: | 1432 | case VCPU_SREG_LDTR: |
1389 | if (seg_desc.s || seg_desc.type != 2) | 1433 | if (seg_desc.s || seg_desc.type != 2) |
@@ -1474,17 +1518,22 @@ static int writeback(struct x86_emulate_ctxt *ctxt) | |||
1474 | return X86EMUL_CONTINUE; | 1518 | return X86EMUL_CONTINUE; |
1475 | } | 1519 | } |
1476 | 1520 | ||
1477 | static int em_push(struct x86_emulate_ctxt *ctxt) | 1521 | static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes) |
1478 | { | 1522 | { |
1479 | struct segmented_address addr; | 1523 | struct segmented_address addr; |
1480 | 1524 | ||
1481 | register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], -ctxt->op_bytes); | 1525 | register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], -bytes); |
1482 | addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]); | 1526 | addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]); |
1483 | addr.seg = VCPU_SREG_SS; | 1527 | addr.seg = VCPU_SREG_SS; |
1484 | 1528 | ||
1529 | return segmented_write(ctxt, addr, data, bytes); | ||
1530 | } | ||
1531 | |||
1532 | static int em_push(struct x86_emulate_ctxt *ctxt) | ||
1533 | { | ||
1485 | /* Disable writeback. */ | 1534 | /* Disable writeback. */ |
1486 | ctxt->dst.type = OP_NONE; | 1535 | ctxt->dst.type = OP_NONE; |
1487 | return segmented_write(ctxt, addr, &ctxt->src.val, ctxt->op_bytes); | 1536 | return push(ctxt, &ctxt->src.val, ctxt->op_bytes); |
1488 | } | 1537 | } |
1489 | 1538 | ||
1490 | static int emulate_pop(struct x86_emulate_ctxt *ctxt, | 1539 | static int emulate_pop(struct x86_emulate_ctxt *ctxt, |
@@ -1556,6 +1605,33 @@ static int em_popf(struct x86_emulate_ctxt *ctxt) | |||
1556 | return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes); | 1605 | return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes); |
1557 | } | 1606 | } |
1558 | 1607 | ||
1608 | static int em_enter(struct x86_emulate_ctxt *ctxt) | ||
1609 | { | ||
1610 | int rc; | ||
1611 | unsigned frame_size = ctxt->src.val; | ||
1612 | unsigned nesting_level = ctxt->src2.val & 31; | ||
1613 | |||
1614 | if (nesting_level) | ||
1615 | return X86EMUL_UNHANDLEABLE; | ||
1616 | |||
1617 | rc = push(ctxt, &ctxt->regs[VCPU_REGS_RBP], stack_size(ctxt)); | ||
1618 | if (rc != X86EMUL_CONTINUE) | ||
1619 | return rc; | ||
1620 | assign_masked(&ctxt->regs[VCPU_REGS_RBP], ctxt->regs[VCPU_REGS_RSP], | ||
1621 | stack_mask(ctxt)); | ||
1622 | assign_masked(&ctxt->regs[VCPU_REGS_RSP], | ||
1623 | ctxt->regs[VCPU_REGS_RSP] - frame_size, | ||
1624 | stack_mask(ctxt)); | ||
1625 | return X86EMUL_CONTINUE; | ||
1626 | } | ||
1627 | |||
1628 | static int em_leave(struct x86_emulate_ctxt *ctxt) | ||
1629 | { | ||
1630 | assign_masked(&ctxt->regs[VCPU_REGS_RSP], ctxt->regs[VCPU_REGS_RBP], | ||
1631 | stack_mask(ctxt)); | ||
1632 | return emulate_pop(ctxt, &ctxt->regs[VCPU_REGS_RBP], ctxt->op_bytes); | ||
1633 | } | ||
1634 | |||
1559 | static int em_push_sreg(struct x86_emulate_ctxt *ctxt) | 1635 | static int em_push_sreg(struct x86_emulate_ctxt *ctxt) |
1560 | { | 1636 | { |
1561 | int seg = ctxt->src2.val; | 1637 | int seg = ctxt->src2.val; |
@@ -1993,8 +2069,8 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt) | |||
1993 | u32 eax, ebx, ecx, edx; | 2069 | u32 eax, ebx, ecx, edx; |
1994 | 2070 | ||
1995 | eax = ecx = 0; | 2071 | eax = ecx = 0; |
1996 | return ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx) | 2072 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); |
1997 | && ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx | 2073 | return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx |
1998 | && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx | 2074 | && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx |
1999 | && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx; | 2075 | && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx; |
2000 | } | 2076 | } |
@@ -2013,32 +2089,31 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) | |||
2013 | 2089 | ||
2014 | eax = 0x00000000; | 2090 | eax = 0x00000000; |
2015 | ecx = 0x00000000; | 2091 | ecx = 0x00000000; |
2016 | if (ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx)) { | 2092 | ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); |
2017 | /* | 2093 | /* |
2018 | * Intel ("GenuineIntel") | 2094 | * Intel ("GenuineIntel") |
2019 | * remark: Intel CPUs only support "syscall" in 64bit | 2095 | * remark: Intel CPUs only support "syscall" in 64bit |
2020 | * longmode. Also an 64bit guest with a | 2096 | * longmode. Also an 64bit guest with a |
2021 | * 32bit compat-app running will #UD !! While this | 2097 | * 32bit compat-app running will #UD !! While this |
2022 | * behaviour can be fixed (by emulating) into AMD | 2098 | * behaviour can be fixed (by emulating) into AMD |
2023 | * response - CPUs of AMD can't behave like Intel. | 2099 | * response - CPUs of AMD can't behave like Intel. |
2024 | */ | 2100 | */ |
2025 | if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx && | 2101 | if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx && |
2026 | ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx && | 2102 | ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx && |
2027 | edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx) | 2103 | edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx) |
2028 | return false; | 2104 | return false; |
2029 | 2105 | ||
2030 | /* AMD ("AuthenticAMD") */ | 2106 | /* AMD ("AuthenticAMD") */ |
2031 | if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx && | 2107 | if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx && |
2032 | ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx && | 2108 | ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx && |
2033 | edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx) | 2109 | edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx) |
2034 | return true; | 2110 | return true; |
2035 | 2111 | ||
2036 | /* AMD ("AMDisbetter!") */ | 2112 | /* AMD ("AMDisbetter!") */ |
2037 | if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx && | 2113 | if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx && |
2038 | ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx && | 2114 | ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx && |
2039 | edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx) | 2115 | edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx) |
2040 | return true; | 2116 | return true; |
2041 | } | ||
2042 | 2117 | ||
2043 | /* default: (not Intel, not AMD), apply Intel's stricter rules... */ | 2118 | /* default: (not Intel, not AMD), apply Intel's stricter rules... */ |
2044 | return false; | 2119 | return false; |
@@ -2547,13 +2622,14 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2547 | ulong old_tss_base = | 2622 | ulong old_tss_base = |
2548 | ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); | 2623 | ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); |
2549 | u32 desc_limit; | 2624 | u32 desc_limit; |
2625 | ulong desc_addr; | ||
2550 | 2626 | ||
2551 | /* FIXME: old_tss_base == ~0 ? */ | 2627 | /* FIXME: old_tss_base == ~0 ? */ |
2552 | 2628 | ||
2553 | ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc); | 2629 | ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr); |
2554 | if (ret != X86EMUL_CONTINUE) | 2630 | if (ret != X86EMUL_CONTINUE) |
2555 | return ret; | 2631 | return ret; |
2556 | ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc); | 2632 | ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr); |
2557 | if (ret != X86EMUL_CONTINUE) | 2633 | if (ret != X86EMUL_CONTINUE) |
2558 | return ret; | 2634 | return ret; |
2559 | 2635 | ||
@@ -2948,6 +3024,24 @@ static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt) | |||
2948 | return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg); | 3024 | return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg); |
2949 | } | 3025 | } |
2950 | 3026 | ||
3027 | static int em_lldt(struct x86_emulate_ctxt *ctxt) | ||
3028 | { | ||
3029 | u16 sel = ctxt->src.val; | ||
3030 | |||
3031 | /* Disable writeback. */ | ||
3032 | ctxt->dst.type = OP_NONE; | ||
3033 | return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR); | ||
3034 | } | ||
3035 | |||
3036 | static int em_ltr(struct x86_emulate_ctxt *ctxt) | ||
3037 | { | ||
3038 | u16 sel = ctxt->src.val; | ||
3039 | |||
3040 | /* Disable writeback. */ | ||
3041 | ctxt->dst.type = OP_NONE; | ||
3042 | return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR); | ||
3043 | } | ||
3044 | |||
2951 | static int em_invlpg(struct x86_emulate_ctxt *ctxt) | 3045 | static int em_invlpg(struct x86_emulate_ctxt *ctxt) |
2952 | { | 3046 | { |
2953 | int rc; | 3047 | int rc; |
@@ -2989,11 +3083,42 @@ static int em_vmcall(struct x86_emulate_ctxt *ctxt) | |||
2989 | return X86EMUL_CONTINUE; | 3083 | return X86EMUL_CONTINUE; |
2990 | } | 3084 | } |
2991 | 3085 | ||
3086 | static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt, | ||
3087 | void (*get)(struct x86_emulate_ctxt *ctxt, | ||
3088 | struct desc_ptr *ptr)) | ||
3089 | { | ||
3090 | struct desc_ptr desc_ptr; | ||
3091 | |||
3092 | if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
3093 | ctxt->op_bytes = 8; | ||
3094 | get(ctxt, &desc_ptr); | ||
3095 | if (ctxt->op_bytes == 2) { | ||
3096 | ctxt->op_bytes = 4; | ||
3097 | desc_ptr.address &= 0x00ffffff; | ||
3098 | } | ||
3099 | /* Disable writeback. */ | ||
3100 | ctxt->dst.type = OP_NONE; | ||
3101 | return segmented_write(ctxt, ctxt->dst.addr.mem, | ||
3102 | &desc_ptr, 2 + ctxt->op_bytes); | ||
3103 | } | ||
3104 | |||
3105 | static int em_sgdt(struct x86_emulate_ctxt *ctxt) | ||
3106 | { | ||
3107 | return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt); | ||
3108 | } | ||
3109 | |||
3110 | static int em_sidt(struct x86_emulate_ctxt *ctxt) | ||
3111 | { | ||
3112 | return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt); | ||
3113 | } | ||
3114 | |||
2992 | static int em_lgdt(struct x86_emulate_ctxt *ctxt) | 3115 | static int em_lgdt(struct x86_emulate_ctxt *ctxt) |
2993 | { | 3116 | { |
2994 | struct desc_ptr desc_ptr; | 3117 | struct desc_ptr desc_ptr; |
2995 | int rc; | 3118 | int rc; |
2996 | 3119 | ||
3120 | if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
3121 | ctxt->op_bytes = 8; | ||
2997 | rc = read_descriptor(ctxt, ctxt->src.addr.mem, | 3122 | rc = read_descriptor(ctxt, ctxt->src.addr.mem, |
2998 | &desc_ptr.size, &desc_ptr.address, | 3123 | &desc_ptr.size, &desc_ptr.address, |
2999 | ctxt->op_bytes); | 3124 | ctxt->op_bytes); |
@@ -3021,6 +3146,8 @@ static int em_lidt(struct x86_emulate_ctxt *ctxt) | |||
3021 | struct desc_ptr desc_ptr; | 3146 | struct desc_ptr desc_ptr; |
3022 | int rc; | 3147 | int rc; |
3023 | 3148 | ||
3149 | if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
3150 | ctxt->op_bytes = 8; | ||
3024 | rc = read_descriptor(ctxt, ctxt->src.addr.mem, | 3151 | rc = read_descriptor(ctxt, ctxt->src.addr.mem, |
3025 | &desc_ptr.size, &desc_ptr.address, | 3152 | &desc_ptr.size, &desc_ptr.address, |
3026 | ctxt->op_bytes); | 3153 | ctxt->op_bytes); |
@@ -3143,6 +3270,42 @@ static int em_bsr(struct x86_emulate_ctxt *ctxt) | |||
3143 | return X86EMUL_CONTINUE; | 3270 | return X86EMUL_CONTINUE; |
3144 | } | 3271 | } |
3145 | 3272 | ||
3273 | static int em_cpuid(struct x86_emulate_ctxt *ctxt) | ||
3274 | { | ||
3275 | u32 eax, ebx, ecx, edx; | ||
3276 | |||
3277 | eax = ctxt->regs[VCPU_REGS_RAX]; | ||
3278 | ecx = ctxt->regs[VCPU_REGS_RCX]; | ||
3279 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | ||
3280 | ctxt->regs[VCPU_REGS_RAX] = eax; | ||
3281 | ctxt->regs[VCPU_REGS_RBX] = ebx; | ||
3282 | ctxt->regs[VCPU_REGS_RCX] = ecx; | ||
3283 | ctxt->regs[VCPU_REGS_RDX] = edx; | ||
3284 | return X86EMUL_CONTINUE; | ||
3285 | } | ||
3286 | |||
3287 | static int em_lahf(struct x86_emulate_ctxt *ctxt) | ||
3288 | { | ||
3289 | ctxt->regs[VCPU_REGS_RAX] &= ~0xff00UL; | ||
3290 | ctxt->regs[VCPU_REGS_RAX] |= (ctxt->eflags & 0xff) << 8; | ||
3291 | return X86EMUL_CONTINUE; | ||
3292 | } | ||
3293 | |||
3294 | static int em_bswap(struct x86_emulate_ctxt *ctxt) | ||
3295 | { | ||
3296 | switch (ctxt->op_bytes) { | ||
3297 | #ifdef CONFIG_X86_64 | ||
3298 | case 8: | ||
3299 | asm("bswap %0" : "+r"(ctxt->dst.val)); | ||
3300 | break; | ||
3301 | #endif | ||
3302 | default: | ||
3303 | asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val)); | ||
3304 | break; | ||
3305 | } | ||
3306 | return X86EMUL_CONTINUE; | ||
3307 | } | ||
3308 | |||
3146 | static bool valid_cr(int nr) | 3309 | static bool valid_cr(int nr) |
3147 | { | 3310 | { |
3148 | switch (nr) { | 3311 | switch (nr) { |
@@ -3424,14 +3587,14 @@ static struct opcode group5[] = { | |||
3424 | static struct opcode group6[] = { | 3587 | static struct opcode group6[] = { |
3425 | DI(Prot, sldt), | 3588 | DI(Prot, sldt), |
3426 | DI(Prot, str), | 3589 | DI(Prot, str), |
3427 | DI(Prot | Priv, lldt), | 3590 | II(Prot | Priv | SrcMem16, em_lldt, lldt), |
3428 | DI(Prot | Priv, ltr), | 3591 | II(Prot | Priv | SrcMem16, em_ltr, ltr), |
3429 | N, N, N, N, | 3592 | N, N, N, N, |
3430 | }; | 3593 | }; |
3431 | 3594 | ||
3432 | static struct group_dual group7 = { { | 3595 | static struct group_dual group7 = { { |
3433 | DI(Mov | DstMem | Priv, sgdt), | 3596 | II(Mov | DstMem | Priv, em_sgdt, sgdt), |
3434 | DI(Mov | DstMem | Priv, sidt), | 3597 | II(Mov | DstMem | Priv, em_sidt, sidt), |
3435 | II(SrcMem | Priv, em_lgdt, lgdt), | 3598 | II(SrcMem | Priv, em_lgdt, lgdt), |
3436 | II(SrcMem | Priv, em_lidt, lidt), | 3599 | II(SrcMem | Priv, em_lidt, lidt), |
3437 | II(SrcNone | DstMem | Mov, em_smsw, smsw), N, | 3600 | II(SrcNone | DstMem | Mov, em_smsw, smsw), N, |
@@ -3538,7 +3701,7 @@ static struct opcode opcode_table[256] = { | |||
3538 | D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), | 3701 | D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), |
3539 | I(SrcImmFAddr | No64, em_call_far), N, | 3702 | I(SrcImmFAddr | No64, em_call_far), N, |
3540 | II(ImplicitOps | Stack, em_pushf, pushf), | 3703 | II(ImplicitOps | Stack, em_pushf, pushf), |
3541 | II(ImplicitOps | Stack, em_popf, popf), N, N, | 3704 | II(ImplicitOps | Stack, em_popf, popf), N, I(ImplicitOps, em_lahf), |
3542 | /* 0xA0 - 0xA7 */ | 3705 | /* 0xA0 - 0xA7 */ |
3543 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), | 3706 | I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), |
3544 | I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), | 3707 | I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), |
@@ -3561,7 +3724,8 @@ static struct opcode opcode_table[256] = { | |||
3561 | I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), | 3724 | I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), |
3562 | G(ByteOp, group11), G(0, group11), | 3725 | G(ByteOp, group11), G(0, group11), |
3563 | /* 0xC8 - 0xCF */ | 3726 | /* 0xC8 - 0xCF */ |
3564 | N, N, N, I(ImplicitOps | Stack, em_ret_far), | 3727 | I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave), |
3728 | N, I(ImplicitOps | Stack, em_ret_far), | ||
3565 | D(ImplicitOps), DI(SrcImmByte, intn), | 3729 | D(ImplicitOps), DI(SrcImmByte, intn), |
3566 | D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), | 3730 | D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), |
3567 | /* 0xD0 - 0xD7 */ | 3731 | /* 0xD0 - 0xD7 */ |
@@ -3635,7 +3799,7 @@ static struct opcode twobyte_table[256] = { | |||
3635 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), | 3799 | X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), |
3636 | /* 0xA0 - 0xA7 */ | 3800 | /* 0xA0 - 0xA7 */ |
3637 | I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), | 3801 | I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), |
3638 | DI(ImplicitOps, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt), | 3802 | II(ImplicitOps, em_cpuid, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt), |
3639 | D(DstMem | SrcReg | Src2ImmByte | ModRM), | 3803 | D(DstMem | SrcReg | Src2ImmByte | ModRM), |
3640 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, | 3804 | D(DstMem | SrcReg | Src2CL | ModRM), N, N, |
3641 | /* 0xA8 - 0xAF */ | 3805 | /* 0xA8 - 0xAF */ |
@@ -3658,11 +3822,12 @@ static struct opcode twobyte_table[256] = { | |||
3658 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), | 3822 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), |
3659 | I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), | 3823 | I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), |
3660 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 3824 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
3661 | /* 0xC0 - 0xCF */ | 3825 | /* 0xC0 - 0xC7 */ |
3662 | D2bv(DstMem | SrcReg | ModRM | Lock), | 3826 | D2bv(DstMem | SrcReg | ModRM | Lock), |
3663 | N, D(DstMem | SrcReg | ModRM | Mov), | 3827 | N, D(DstMem | SrcReg | ModRM | Mov), |
3664 | N, N, N, GD(0, &group9), | 3828 | N, N, N, GD(0, &group9), |
3665 | N, N, N, N, N, N, N, N, | 3829 | /* 0xC8 - 0xCF */ |
3830 | X8(I(DstReg, em_bswap)), | ||
3666 | /* 0xD0 - 0xDF */ | 3831 | /* 0xD0 - 0xDF */ |
3667 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, | 3832 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, |
3668 | /* 0xE0 - 0xEF */ | 3833 | /* 0xE0 - 0xEF */ |
@@ -4426,12 +4591,12 @@ twobyte_insn: | |||
4426 | break; | 4591 | break; |
4427 | case 0xb6 ... 0xb7: /* movzx */ | 4592 | case 0xb6 ... 0xb7: /* movzx */ |
4428 | ctxt->dst.bytes = ctxt->op_bytes; | 4593 | ctxt->dst.bytes = ctxt->op_bytes; |
4429 | ctxt->dst.val = (ctxt->d & ByteOp) ? (u8) ctxt->src.val | 4594 | ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val |
4430 | : (u16) ctxt->src.val; | 4595 | : (u16) ctxt->src.val; |
4431 | break; | 4596 | break; |
4432 | case 0xbe ... 0xbf: /* movsx */ | 4597 | case 0xbe ... 0xbf: /* movsx */ |
4433 | ctxt->dst.bytes = ctxt->op_bytes; | 4598 | ctxt->dst.bytes = ctxt->op_bytes; |
4434 | ctxt->dst.val = (ctxt->d & ByteOp) ? (s8) ctxt->src.val : | 4599 | ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val : |
4435 | (s16) ctxt->src.val; | 4600 | (s16) ctxt->src.val; |
4436 | break; | 4601 | break; |
4437 | case 0xc0 ... 0xc1: /* xadd */ | 4602 | case 0xc0 ... 0xc1: /* xadd */ |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 81cf4fa4a2be..1df8fb9e1d5d 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -188,14 +188,15 @@ void kvm_pic_update_irq(struct kvm_pic *s) | |||
188 | pic_unlock(s); | 188 | pic_unlock(s); |
189 | } | 189 | } |
190 | 190 | ||
191 | int kvm_pic_set_irq(void *opaque, int irq, int level) | 191 | int kvm_pic_set_irq(struct kvm_pic *s, int irq, int irq_source_id, int level) |
192 | { | 192 | { |
193 | struct kvm_pic *s = opaque; | ||
194 | int ret = -1; | 193 | int ret = -1; |
195 | 194 | ||
196 | pic_lock(s); | 195 | pic_lock(s); |
197 | if (irq >= 0 && irq < PIC_NUM_PINS) { | 196 | if (irq >= 0 && irq < PIC_NUM_PINS) { |
198 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); | 197 | int irq_level = __kvm_irq_line_state(&s->irq_states[irq], |
198 | irq_source_id, level); | ||
199 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level); | ||
199 | pic_update_irq(s); | 200 | pic_update_irq(s); |
200 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, | 201 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, |
201 | s->pics[irq >> 3].imr, ret == 0); | 202 | s->pics[irq >> 3].imr, ret == 0); |
@@ -205,6 +206,16 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) | |||
205 | return ret; | 206 | return ret; |
206 | } | 207 | } |
207 | 208 | ||
209 | void kvm_pic_clear_all(struct kvm_pic *s, int irq_source_id) | ||
210 | { | ||
211 | int i; | ||
212 | |||
213 | pic_lock(s); | ||
214 | for (i = 0; i < PIC_NUM_PINS; i++) | ||
215 | __clear_bit(irq_source_id, &s->irq_states[i]); | ||
216 | pic_unlock(s); | ||
217 | } | ||
218 | |||
208 | /* | 219 | /* |
209 | * acknowledge interrupt 'irq' | 220 | * acknowledge interrupt 'irq' |
210 | */ | 221 | */ |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 93c15743f1ee..ce878788a39f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -107,6 +107,16 @@ static inline void apic_clear_vector(int vec, void *bitmap) | |||
107 | clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 107 | clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
108 | } | 108 | } |
109 | 109 | ||
110 | static inline int __apic_test_and_set_vector(int vec, void *bitmap) | ||
111 | { | ||
112 | return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
113 | } | ||
114 | |||
115 | static inline int __apic_test_and_clear_vector(int vec, void *bitmap) | ||
116 | { | ||
117 | return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | ||
118 | } | ||
119 | |||
110 | static inline int apic_hw_enabled(struct kvm_lapic *apic) | 120 | static inline int apic_hw_enabled(struct kvm_lapic *apic) |
111 | { | 121 | { |
112 | return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; | 122 | return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; |
@@ -210,6 +220,16 @@ static int find_highest_vector(void *bitmap) | |||
210 | return fls(word[word_offset << 2]) - 1 + (word_offset << 5); | 220 | return fls(word[word_offset << 2]) - 1 + (word_offset << 5); |
211 | } | 221 | } |
212 | 222 | ||
223 | static u8 count_vectors(void *bitmap) | ||
224 | { | ||
225 | u32 *word = bitmap; | ||
226 | int word_offset; | ||
227 | u8 count = 0; | ||
228 | for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset) | ||
229 | count += hweight32(word[word_offset << 2]); | ||
230 | return count; | ||
231 | } | ||
232 | |||
213 | static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) | 233 | static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) |
214 | { | 234 | { |
215 | apic->irr_pending = true; | 235 | apic->irr_pending = true; |
@@ -242,6 +262,27 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) | |||
242 | apic->irr_pending = true; | 262 | apic->irr_pending = true; |
243 | } | 263 | } |
244 | 264 | ||
265 | static inline void apic_set_isr(int vec, struct kvm_lapic *apic) | ||
266 | { | ||
267 | if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) | ||
268 | ++apic->isr_count; | ||
269 | BUG_ON(apic->isr_count > MAX_APIC_VECTOR); | ||
270 | /* | ||
271 | * ISR (in service register) bit is set when injecting an interrupt. | ||
272 | * The highest vector is injected. Thus the latest bit set matches | ||
273 | * the highest bit in ISR. | ||
274 | */ | ||
275 | apic->highest_isr_cache = vec; | ||
276 | } | ||
277 | |||
278 | static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) | ||
279 | { | ||
280 | if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) | ||
281 | --apic->isr_count; | ||
282 | BUG_ON(apic->isr_count < 0); | ||
283 | apic->highest_isr_cache = -1; | ||
284 | } | ||
285 | |||
245 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) | 286 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) |
246 | { | 287 | { |
247 | struct kvm_lapic *apic = vcpu->arch.apic; | 288 | struct kvm_lapic *apic = vcpu->arch.apic; |
@@ -270,9 +311,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) | |||
270 | irq->level, irq->trig_mode); | 311 | irq->level, irq->trig_mode); |
271 | } | 312 | } |
272 | 313 | ||
314 | static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) | ||
315 | { | ||
316 | |||
317 | return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, | ||
318 | sizeof(val)); | ||
319 | } | ||
320 | |||
321 | static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) | ||
322 | { | ||
323 | |||
324 | return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, | ||
325 | sizeof(*val)); | ||
326 | } | ||
327 | |||
328 | static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) | ||
329 | { | ||
330 | return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; | ||
331 | } | ||
332 | |||
333 | static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) | ||
334 | { | ||
335 | u8 val; | ||
336 | if (pv_eoi_get_user(vcpu, &val) < 0) | ||
337 | apic_debug("Can't read EOI MSR value: 0x%llx\n", | ||
338 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | ||
339 | return val & 0x1; | ||
340 | } | ||
341 | |||
342 | static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) | ||
343 | { | ||
344 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { | ||
345 | apic_debug("Can't set EOI MSR value: 0x%llx\n", | ||
346 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | ||
347 | return; | ||
348 | } | ||
349 | __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | ||
350 | } | ||
351 | |||
352 | static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) | ||
353 | { | ||
354 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { | ||
355 | apic_debug("Can't clear EOI MSR value: 0x%llx\n", | ||
356 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | ||
357 | return; | ||
358 | } | ||
359 | __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | ||
360 | } | ||
361 | |||
273 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) | 362 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) |
274 | { | 363 | { |
275 | int result; | 364 | int result; |
365 | if (!apic->isr_count) | ||
366 | return -1; | ||
367 | if (likely(apic->highest_isr_cache != -1)) | ||
368 | return apic->highest_isr_cache; | ||
276 | 369 | ||
277 | result = find_highest_vector(apic->regs + APIC_ISR); | 370 | result = find_highest_vector(apic->regs + APIC_ISR); |
278 | ASSERT(result == -1 || result >= 16); | 371 | ASSERT(result == -1 || result >= 16); |
@@ -482,17 +575,20 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) | |||
482 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; | 575 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; |
483 | } | 576 | } |
484 | 577 | ||
485 | static void apic_set_eoi(struct kvm_lapic *apic) | 578 | static int apic_set_eoi(struct kvm_lapic *apic) |
486 | { | 579 | { |
487 | int vector = apic_find_highest_isr(apic); | 580 | int vector = apic_find_highest_isr(apic); |
581 | |||
582 | trace_kvm_eoi(apic, vector); | ||
583 | |||
488 | /* | 584 | /* |
489 | * Not every write EOI will has corresponding ISR, | 585 | * Not every write EOI will has corresponding ISR, |
490 | * one example is when Kernel check timer on setup_IO_APIC | 586 | * one example is when Kernel check timer on setup_IO_APIC |
491 | */ | 587 | */ |
492 | if (vector == -1) | 588 | if (vector == -1) |
493 | return; | 589 | return vector; |
494 | 590 | ||
495 | apic_clear_vector(vector, apic->regs + APIC_ISR); | 591 | apic_clear_isr(vector, apic); |
496 | apic_update_ppr(apic); | 592 | apic_update_ppr(apic); |
497 | 593 | ||
498 | if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && | 594 | if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && |
@@ -505,6 +601,7 @@ static void apic_set_eoi(struct kvm_lapic *apic) | |||
505 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | 601 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); |
506 | } | 602 | } |
507 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); | 603 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); |
604 | return vector; | ||
508 | } | 605 | } |
509 | 606 | ||
510 | static void apic_send_ipi(struct kvm_lapic *apic) | 607 | static void apic_send_ipi(struct kvm_lapic *apic) |
@@ -1081,10 +1178,13 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
1081 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); | 1178 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); |
1082 | } | 1179 | } |
1083 | apic->irr_pending = false; | 1180 | apic->irr_pending = false; |
1181 | apic->isr_count = 0; | ||
1182 | apic->highest_isr_cache = -1; | ||
1084 | update_divide_count(apic); | 1183 | update_divide_count(apic); |
1085 | atomic_set(&apic->lapic_timer.pending, 0); | 1184 | atomic_set(&apic->lapic_timer.pending, 0); |
1086 | if (kvm_vcpu_is_bsp(vcpu)) | 1185 | if (kvm_vcpu_is_bsp(vcpu)) |
1087 | vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; | 1186 | vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; |
1187 | vcpu->arch.pv_eoi.msr_val = 0; | ||
1088 | apic_update_ppr(apic); | 1188 | apic_update_ppr(apic); |
1089 | 1189 | ||
1090 | vcpu->arch.apic_arb_prio = 0; | 1190 | vcpu->arch.apic_arb_prio = 0; |
@@ -1248,7 +1348,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) | |||
1248 | if (vector == -1) | 1348 | if (vector == -1) |
1249 | return -1; | 1349 | return -1; |
1250 | 1350 | ||
1251 | apic_set_vector(vector, apic->regs + APIC_ISR); | 1351 | apic_set_isr(vector, apic); |
1252 | apic_update_ppr(apic); | 1352 | apic_update_ppr(apic); |
1253 | apic_clear_irr(vector, apic); | 1353 | apic_clear_irr(vector, apic); |
1254 | return vector; | 1354 | return vector; |
@@ -1267,6 +1367,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) | |||
1267 | update_divide_count(apic); | 1367 | update_divide_count(apic); |
1268 | start_apic_timer(apic); | 1368 | start_apic_timer(apic); |
1269 | apic->irr_pending = true; | 1369 | apic->irr_pending = true; |
1370 | apic->isr_count = count_vectors(apic->regs + APIC_ISR); | ||
1371 | apic->highest_isr_cache = -1; | ||
1270 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 1372 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
1271 | } | 1373 | } |
1272 | 1374 | ||
@@ -1283,11 +1385,51 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | |||
1283 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 1385 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
1284 | } | 1386 | } |
1285 | 1387 | ||
1388 | /* | ||
1389 | * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt | ||
1390 | * | ||
1391 | * Detect whether guest triggered PV EOI since the | ||
1392 | * last entry. If yes, set EOI on guests's behalf. | ||
1393 | * Clear PV EOI in guest memory in any case. | ||
1394 | */ | ||
1395 | static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, | ||
1396 | struct kvm_lapic *apic) | ||
1397 | { | ||
1398 | bool pending; | ||
1399 | int vector; | ||
1400 | /* | ||
1401 | * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host | ||
1402 | * and KVM_PV_EOI_ENABLED in guest memory as follows: | ||
1403 | * | ||
1404 | * KVM_APIC_PV_EOI_PENDING is unset: | ||
1405 | * -> host disabled PV EOI. | ||
1406 | * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: | ||
1407 | * -> host enabled PV EOI, guest did not execute EOI yet. | ||
1408 | * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: | ||
1409 | * -> host enabled PV EOI, guest executed EOI. | ||
1410 | */ | ||
1411 | BUG_ON(!pv_eoi_enabled(vcpu)); | ||
1412 | pending = pv_eoi_get_pending(vcpu); | ||
1413 | /* | ||
1414 | * Clear pending bit in any case: it will be set again on vmentry. | ||
1415 | * While this might not be ideal from performance point of view, | ||
1416 | * this makes sure pv eoi is only enabled when we know it's safe. | ||
1417 | */ | ||
1418 | pv_eoi_clr_pending(vcpu); | ||
1419 | if (pending) | ||
1420 | return; | ||
1421 | vector = apic_set_eoi(apic); | ||
1422 | trace_kvm_pv_eoi(apic, vector); | ||
1423 | } | ||
1424 | |||
1286 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) | 1425 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) |
1287 | { | 1426 | { |
1288 | u32 data; | 1427 | u32 data; |
1289 | void *vapic; | 1428 | void *vapic; |
1290 | 1429 | ||
1430 | if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) | ||
1431 | apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); | ||
1432 | |||
1291 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) | 1433 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) |
1292 | return; | 1434 | return; |
1293 | 1435 | ||
@@ -1298,17 +1440,44 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) | |||
1298 | apic_set_tpr(vcpu->arch.apic, data & 0xff); | 1440 | apic_set_tpr(vcpu->arch.apic, data & 0xff); |
1299 | } | 1441 | } |
1300 | 1442 | ||
1443 | /* | ||
1444 | * apic_sync_pv_eoi_to_guest - called before vmentry | ||
1445 | * | ||
1446 | * Detect whether it's safe to enable PV EOI and | ||
1447 | * if yes do so. | ||
1448 | */ | ||
1449 | static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, | ||
1450 | struct kvm_lapic *apic) | ||
1451 | { | ||
1452 | if (!pv_eoi_enabled(vcpu) || | ||
1453 | /* IRR set or many bits in ISR: could be nested. */ | ||
1454 | apic->irr_pending || | ||
1455 | /* Cache not set: could be safe but we don't bother. */ | ||
1456 | apic->highest_isr_cache == -1 || | ||
1457 | /* Need EOI to update ioapic. */ | ||
1458 | kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { | ||
1459 | /* | ||
1460 | * PV EOI was disabled by apic_sync_pv_eoi_from_guest | ||
1461 | * so we need not do anything here. | ||
1462 | */ | ||
1463 | return; | ||
1464 | } | ||
1465 | |||
1466 | pv_eoi_set_pending(apic->vcpu); | ||
1467 | } | ||
1468 | |||
1301 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) | 1469 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) |
1302 | { | 1470 | { |
1303 | u32 data, tpr; | 1471 | u32 data, tpr; |
1304 | int max_irr, max_isr; | 1472 | int max_irr, max_isr; |
1305 | struct kvm_lapic *apic; | 1473 | struct kvm_lapic *apic = vcpu->arch.apic; |
1306 | void *vapic; | 1474 | void *vapic; |
1307 | 1475 | ||
1476 | apic_sync_pv_eoi_to_guest(vcpu, apic); | ||
1477 | |||
1308 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) | 1478 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) |
1309 | return; | 1479 | return; |
1310 | 1480 | ||
1311 | apic = vcpu->arch.apic; | ||
1312 | tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; | 1481 | tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; |
1313 | max_irr = apic_find_highest_irr(apic); | 1482 | max_irr = apic_find_highest_irr(apic); |
1314 | if (max_irr < 0) | 1483 | if (max_irr < 0) |
@@ -1394,3 +1563,16 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) | |||
1394 | 1563 | ||
1395 | return 0; | 1564 | return 0; |
1396 | } | 1565 | } |
1566 | |||
1567 | int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) | ||
1568 | { | ||
1569 | u64 addr = data & ~KVM_MSR_ENABLED; | ||
1570 | if (!IS_ALIGNED(addr, 4)) | ||
1571 | return 1; | ||
1572 | |||
1573 | vcpu->arch.pv_eoi.msr_val = data; | ||
1574 | if (!pv_eoi_enabled(vcpu)) | ||
1575 | return 0; | ||
1576 | return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, | ||
1577 | addr); | ||
1578 | } | ||
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 6f4ce2575d09..4af5405ae1e2 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -13,6 +13,15 @@ struct kvm_lapic { | |||
13 | u32 divide_count; | 13 | u32 divide_count; |
14 | struct kvm_vcpu *vcpu; | 14 | struct kvm_vcpu *vcpu; |
15 | bool irr_pending; | 15 | bool irr_pending; |
16 | /* Number of bits set in ISR. */ | ||
17 | s16 isr_count; | ||
18 | /* The highest vector set in ISR; if -1 - invalid, must scan ISR. */ | ||
19 | int highest_isr_cache; | ||
20 | /** | ||
21 | * APIC register page. The layout matches the register layout seen by | ||
22 | * the guest 1:1, because it is accessed by the vmx microcode. | ||
23 | * Note: Only one register, the TPR, is used by the microcode. | ||
24 | */ | ||
16 | void *regs; | 25 | void *regs; |
17 | gpa_t vapic_addr; | 26 | gpa_t vapic_addr; |
18 | struct page *vapic_page; | 27 | struct page *vapic_page; |
@@ -60,4 +69,6 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) | |||
60 | { | 69 | { |
61 | return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; | 70 | return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; |
62 | } | 71 | } |
72 | |||
73 | int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); | ||
63 | #endif | 74 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 57e168e27b5b..01ca00423938 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -90,7 +90,7 @@ module_param(dbg, bool, 0644); | |||
90 | 90 | ||
91 | #define PTE_PREFETCH_NUM 8 | 91 | #define PTE_PREFETCH_NUM 8 |
92 | 92 | ||
93 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 | 93 | #define PT_FIRST_AVAIL_BITS_SHIFT 10 |
94 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | 94 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 |
95 | 95 | ||
96 | #define PT64_LEVEL_BITS 9 | 96 | #define PT64_LEVEL_BITS 9 |
@@ -145,7 +145,8 @@ module_param(dbg, bool, 0644); | |||
145 | #define CREATE_TRACE_POINTS | 145 | #define CREATE_TRACE_POINTS |
146 | #include "mmutrace.h" | 146 | #include "mmutrace.h" |
147 | 147 | ||
148 | #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | 148 | #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) |
149 | #define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) | ||
149 | 150 | ||
150 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | 151 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) |
151 | 152 | ||
@@ -188,6 +189,7 @@ static u64 __read_mostly shadow_dirty_mask; | |||
188 | static u64 __read_mostly shadow_mmio_mask; | 189 | static u64 __read_mostly shadow_mmio_mask; |
189 | 190 | ||
190 | static void mmu_spte_set(u64 *sptep, u64 spte); | 191 | static void mmu_spte_set(u64 *sptep, u64 spte); |
192 | static void mmu_free_roots(struct kvm_vcpu *vcpu); | ||
191 | 193 | ||
192 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) | 194 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) |
193 | { | 195 | { |
@@ -444,8 +446,22 @@ static bool __check_direct_spte_mmio_pf(u64 spte) | |||
444 | } | 446 | } |
445 | #endif | 447 | #endif |
446 | 448 | ||
449 | static bool spte_is_locklessly_modifiable(u64 spte) | ||
450 | { | ||
451 | return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)); | ||
452 | } | ||
453 | |||
447 | static bool spte_has_volatile_bits(u64 spte) | 454 | static bool spte_has_volatile_bits(u64 spte) |
448 | { | 455 | { |
456 | /* | ||
457 | * Always atomicly update spte if it can be updated | ||
458 | * out of mmu-lock, it can ensure dirty bit is not lost, | ||
459 | * also, it can help us to get a stable is_writable_pte() | ||
460 | * to ensure tlb flush is not missed. | ||
461 | */ | ||
462 | if (spte_is_locklessly_modifiable(spte)) | ||
463 | return true; | ||
464 | |||
449 | if (!shadow_accessed_mask) | 465 | if (!shadow_accessed_mask) |
450 | return false; | 466 | return false; |
451 | 467 | ||
@@ -478,34 +494,47 @@ static void mmu_spte_set(u64 *sptep, u64 new_spte) | |||
478 | 494 | ||
479 | /* Rules for using mmu_spte_update: | 495 | /* Rules for using mmu_spte_update: |
480 | * Update the state bits, it means the mapped pfn is not changged. | 496 | * Update the state bits, it means the mapped pfn is not changged. |
497 | * | ||
498 | * Whenever we overwrite a writable spte with a read-only one we | ||
499 | * should flush remote TLBs. Otherwise rmap_write_protect | ||
500 | * will find a read-only spte, even though the writable spte | ||
501 | * might be cached on a CPU's TLB, the return value indicates this | ||
502 | * case. | ||
481 | */ | 503 | */ |
482 | static void mmu_spte_update(u64 *sptep, u64 new_spte) | 504 | static bool mmu_spte_update(u64 *sptep, u64 new_spte) |
483 | { | 505 | { |
484 | u64 mask, old_spte = *sptep; | 506 | u64 old_spte = *sptep; |
507 | bool ret = false; | ||
485 | 508 | ||
486 | WARN_ON(!is_rmap_spte(new_spte)); | 509 | WARN_ON(!is_rmap_spte(new_spte)); |
487 | 510 | ||
488 | if (!is_shadow_present_pte(old_spte)) | 511 | if (!is_shadow_present_pte(old_spte)) { |
489 | return mmu_spte_set(sptep, new_spte); | 512 | mmu_spte_set(sptep, new_spte); |
490 | 513 | return ret; | |
491 | new_spte |= old_spte & shadow_dirty_mask; | 514 | } |
492 | |||
493 | mask = shadow_accessed_mask; | ||
494 | if (is_writable_pte(old_spte)) | ||
495 | mask |= shadow_dirty_mask; | ||
496 | 515 | ||
497 | if (!spte_has_volatile_bits(old_spte) || (new_spte & mask) == mask) | 516 | if (!spte_has_volatile_bits(old_spte)) |
498 | __update_clear_spte_fast(sptep, new_spte); | 517 | __update_clear_spte_fast(sptep, new_spte); |
499 | else | 518 | else |
500 | old_spte = __update_clear_spte_slow(sptep, new_spte); | 519 | old_spte = __update_clear_spte_slow(sptep, new_spte); |
501 | 520 | ||
521 | /* | ||
522 | * For the spte updated out of mmu-lock is safe, since | ||
523 | * we always atomicly update it, see the comments in | ||
524 | * spte_has_volatile_bits(). | ||
525 | */ | ||
526 | if (is_writable_pte(old_spte) && !is_writable_pte(new_spte)) | ||
527 | ret = true; | ||
528 | |||
502 | if (!shadow_accessed_mask) | 529 | if (!shadow_accessed_mask) |
503 | return; | 530 | return ret; |
504 | 531 | ||
505 | if (spte_is_bit_cleared(old_spte, new_spte, shadow_accessed_mask)) | 532 | if (spte_is_bit_cleared(old_spte, new_spte, shadow_accessed_mask)) |
506 | kvm_set_pfn_accessed(spte_to_pfn(old_spte)); | 533 | kvm_set_pfn_accessed(spte_to_pfn(old_spte)); |
507 | if (spte_is_bit_cleared(old_spte, new_spte, shadow_dirty_mask)) | 534 | if (spte_is_bit_cleared(old_spte, new_spte, shadow_dirty_mask)) |
508 | kvm_set_pfn_dirty(spte_to_pfn(old_spte)); | 535 | kvm_set_pfn_dirty(spte_to_pfn(old_spte)); |
536 | |||
537 | return ret; | ||
509 | } | 538 | } |
510 | 539 | ||
511 | /* | 540 | /* |
@@ -652,8 +681,7 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) | |||
652 | mmu_page_header_cache); | 681 | mmu_page_header_cache); |
653 | } | 682 | } |
654 | 683 | ||
655 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, | 684 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) |
656 | size_t size) | ||
657 | { | 685 | { |
658 | void *p; | 686 | void *p; |
659 | 687 | ||
@@ -664,8 +692,7 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, | |||
664 | 692 | ||
665 | static struct pte_list_desc *mmu_alloc_pte_list_desc(struct kvm_vcpu *vcpu) | 693 | static struct pte_list_desc *mmu_alloc_pte_list_desc(struct kvm_vcpu *vcpu) |
666 | { | 694 | { |
667 | return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_list_desc_cache, | 695 | return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_list_desc_cache); |
668 | sizeof(struct pte_list_desc)); | ||
669 | } | 696 | } |
670 | 697 | ||
671 | static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc) | 698 | static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc) |
@@ -1051,35 +1078,82 @@ static void drop_spte(struct kvm *kvm, u64 *sptep) | |||
1051 | rmap_remove(kvm, sptep); | 1078 | rmap_remove(kvm, sptep); |
1052 | } | 1079 | } |
1053 | 1080 | ||
1054 | static int __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int level) | 1081 | |
1082 | static bool __drop_large_spte(struct kvm *kvm, u64 *sptep) | ||
1083 | { | ||
1084 | if (is_large_pte(*sptep)) { | ||
1085 | WARN_ON(page_header(__pa(sptep))->role.level == | ||
1086 | PT_PAGE_TABLE_LEVEL); | ||
1087 | drop_spte(kvm, sptep); | ||
1088 | --kvm->stat.lpages; | ||
1089 | return true; | ||
1090 | } | ||
1091 | |||
1092 | return false; | ||
1093 | } | ||
1094 | |||
1095 | static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) | ||
1096 | { | ||
1097 | if (__drop_large_spte(vcpu->kvm, sptep)) | ||
1098 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
1099 | } | ||
1100 | |||
1101 | /* | ||
1102 | * Write-protect on the specified @sptep, @pt_protect indicates whether | ||
1103 | * spte writ-protection is caused by protecting shadow page table. | ||
1104 | * @flush indicates whether tlb need be flushed. | ||
1105 | * | ||
1106 | * Note: write protection is difference between drity logging and spte | ||
1107 | * protection: | ||
1108 | * - for dirty logging, the spte can be set to writable at anytime if | ||
1109 | * its dirty bitmap is properly set. | ||
1110 | * - for spte protection, the spte can be writable only after unsync-ing | ||
1111 | * shadow page. | ||
1112 | * | ||
1113 | * Return true if the spte is dropped. | ||
1114 | */ | ||
1115 | static bool | ||
1116 | spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) | ||
1117 | { | ||
1118 | u64 spte = *sptep; | ||
1119 | |||
1120 | if (!is_writable_pte(spte) && | ||
1121 | !(pt_protect && spte_is_locklessly_modifiable(spte))) | ||
1122 | return false; | ||
1123 | |||
1124 | rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); | ||
1125 | |||
1126 | if (__drop_large_spte(kvm, sptep)) { | ||
1127 | *flush |= true; | ||
1128 | return true; | ||
1129 | } | ||
1130 | |||
1131 | if (pt_protect) | ||
1132 | spte &= ~SPTE_MMU_WRITEABLE; | ||
1133 | spte = spte & ~PT_WRITABLE_MASK; | ||
1134 | |||
1135 | *flush |= mmu_spte_update(sptep, spte); | ||
1136 | return false; | ||
1137 | } | ||
1138 | |||
1139 | static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, | ||
1140 | int level, bool pt_protect) | ||
1055 | { | 1141 | { |
1056 | u64 *sptep; | 1142 | u64 *sptep; |
1057 | struct rmap_iterator iter; | 1143 | struct rmap_iterator iter; |
1058 | int write_protected = 0; | 1144 | bool flush = false; |
1059 | 1145 | ||
1060 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { | 1146 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { |
1061 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); | 1147 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); |
1062 | rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); | 1148 | if (spte_write_protect(kvm, sptep, &flush, pt_protect)) { |
1063 | |||
1064 | if (!is_writable_pte(*sptep)) { | ||
1065 | sptep = rmap_get_next(&iter); | ||
1066 | continue; | ||
1067 | } | ||
1068 | |||
1069 | if (level == PT_PAGE_TABLE_LEVEL) { | ||
1070 | mmu_spte_update(sptep, *sptep & ~PT_WRITABLE_MASK); | ||
1071 | sptep = rmap_get_next(&iter); | ||
1072 | } else { | ||
1073 | BUG_ON(!is_large_pte(*sptep)); | ||
1074 | drop_spte(kvm, sptep); | ||
1075 | --kvm->stat.lpages; | ||
1076 | sptep = rmap_get_first(*rmapp, &iter); | 1149 | sptep = rmap_get_first(*rmapp, &iter); |
1150 | continue; | ||
1077 | } | 1151 | } |
1078 | 1152 | ||
1079 | write_protected = 1; | 1153 | sptep = rmap_get_next(&iter); |
1080 | } | 1154 | } |
1081 | 1155 | ||
1082 | return write_protected; | 1156 | return flush; |
1083 | } | 1157 | } |
1084 | 1158 | ||
1085 | /** | 1159 | /** |
@@ -1100,26 +1174,26 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, | |||
1100 | 1174 | ||
1101 | while (mask) { | 1175 | while (mask) { |
1102 | rmapp = &slot->rmap[gfn_offset + __ffs(mask)]; | 1176 | rmapp = &slot->rmap[gfn_offset + __ffs(mask)]; |
1103 | __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL); | 1177 | __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false); |
1104 | 1178 | ||
1105 | /* clear the first set bit */ | 1179 | /* clear the first set bit */ |
1106 | mask &= mask - 1; | 1180 | mask &= mask - 1; |
1107 | } | 1181 | } |
1108 | } | 1182 | } |
1109 | 1183 | ||
1110 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) | 1184 | static bool rmap_write_protect(struct kvm *kvm, u64 gfn) |
1111 | { | 1185 | { |
1112 | struct kvm_memory_slot *slot; | 1186 | struct kvm_memory_slot *slot; |
1113 | unsigned long *rmapp; | 1187 | unsigned long *rmapp; |
1114 | int i; | 1188 | int i; |
1115 | int write_protected = 0; | 1189 | bool write_protected = false; |
1116 | 1190 | ||
1117 | slot = gfn_to_memslot(kvm, gfn); | 1191 | slot = gfn_to_memslot(kvm, gfn); |
1118 | 1192 | ||
1119 | for (i = PT_PAGE_TABLE_LEVEL; | 1193 | for (i = PT_PAGE_TABLE_LEVEL; |
1120 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 1194 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
1121 | rmapp = __gfn_to_rmap(gfn, i, slot); | 1195 | rmapp = __gfn_to_rmap(gfn, i, slot); |
1122 | write_protected |= __rmap_write_protect(kvm, rmapp, i); | 1196 | write_protected |= __rmap_write_protect(kvm, rmapp, i, true); |
1123 | } | 1197 | } |
1124 | 1198 | ||
1125 | return write_protected; | 1199 | return write_protected; |
@@ -1238,11 +1312,12 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1238 | unsigned long data) | 1312 | unsigned long data) |
1239 | { | 1313 | { |
1240 | u64 *sptep; | 1314 | u64 *sptep; |
1241 | struct rmap_iterator iter; | 1315 | struct rmap_iterator uninitialized_var(iter); |
1242 | int young = 0; | 1316 | int young = 0; |
1243 | 1317 | ||
1244 | /* | 1318 | /* |
1245 | * Emulate the accessed bit for EPT, by checking if this page has | 1319 | * In case of absence of EPT Access and Dirty Bits supports, |
1320 | * emulate the accessed bit for EPT, by checking if this page has | ||
1246 | * an EPT mapping, and clearing it if it does. On the next access, | 1321 | * an EPT mapping, and clearing it if it does. On the next access, |
1247 | * a new EPT mapping will be established. | 1322 | * a new EPT mapping will be established. |
1248 | * This has some overhead, but not as much as the cost of swapping | 1323 | * This has some overhead, but not as much as the cost of swapping |
@@ -1253,11 +1328,12 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1253 | 1328 | ||
1254 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; | 1329 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; |
1255 | sptep = rmap_get_next(&iter)) { | 1330 | sptep = rmap_get_next(&iter)) { |
1256 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); | 1331 | BUG_ON(!is_shadow_present_pte(*sptep)); |
1257 | 1332 | ||
1258 | if (*sptep & PT_ACCESSED_MASK) { | 1333 | if (*sptep & shadow_accessed_mask) { |
1259 | young = 1; | 1334 | young = 1; |
1260 | clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)sptep); | 1335 | clear_bit((ffs(shadow_accessed_mask) - 1), |
1336 | (unsigned long *)sptep); | ||
1261 | } | 1337 | } |
1262 | } | 1338 | } |
1263 | 1339 | ||
@@ -1281,9 +1357,9 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1281 | 1357 | ||
1282 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; | 1358 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; |
1283 | sptep = rmap_get_next(&iter)) { | 1359 | sptep = rmap_get_next(&iter)) { |
1284 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); | 1360 | BUG_ON(!is_shadow_present_pte(*sptep)); |
1285 | 1361 | ||
1286 | if (*sptep & PT_ACCESSED_MASK) { | 1362 | if (*sptep & shadow_accessed_mask) { |
1287 | young = 1; | 1363 | young = 1; |
1288 | break; | 1364 | break; |
1289 | } | 1365 | } |
@@ -1401,12 +1477,10 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
1401 | u64 *parent_pte, int direct) | 1477 | u64 *parent_pte, int direct) |
1402 | { | 1478 | { |
1403 | struct kvm_mmu_page *sp; | 1479 | struct kvm_mmu_page *sp; |
1404 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, | 1480 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); |
1405 | sizeof *sp); | 1481 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); |
1406 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | ||
1407 | if (!direct) | 1482 | if (!direct) |
1408 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, | 1483 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); |
1409 | PAGE_SIZE); | ||
1410 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 1484 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
1411 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 1485 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
1412 | bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM); | 1486 | bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM); |
@@ -1701,7 +1775,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, | |||
1701 | 1775 | ||
1702 | kvm_mmu_pages_init(parent, &parents, &pages); | 1776 | kvm_mmu_pages_init(parent, &parents, &pages); |
1703 | while (mmu_unsync_walk(parent, &pages)) { | 1777 | while (mmu_unsync_walk(parent, &pages)) { |
1704 | int protected = 0; | 1778 | bool protected = false; |
1705 | 1779 | ||
1706 | for_each_sp(pages, sp, parents, i) | 1780 | for_each_sp(pages, sp, parents, i) |
1707 | protected |= rmap_write_protect(vcpu->kvm, sp->gfn); | 1781 | protected |= rmap_write_protect(vcpu->kvm, sp->gfn); |
@@ -1866,15 +1940,6 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) | |||
1866 | mmu_spte_set(sptep, spte); | 1940 | mmu_spte_set(sptep, spte); |
1867 | } | 1941 | } |
1868 | 1942 | ||
1869 | static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) | ||
1870 | { | ||
1871 | if (is_large_pte(*sptep)) { | ||
1872 | drop_spte(vcpu->kvm, sptep); | ||
1873 | --vcpu->kvm->stat.lpages; | ||
1874 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
1875 | } | ||
1876 | } | ||
1877 | |||
1878 | static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, | 1943 | static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, |
1879 | unsigned direct_access) | 1944 | unsigned direct_access) |
1880 | { | 1945 | { |
@@ -2243,7 +2308,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2243 | gfn_t gfn, pfn_t pfn, bool speculative, | 2308 | gfn_t gfn, pfn_t pfn, bool speculative, |
2244 | bool can_unsync, bool host_writable) | 2309 | bool can_unsync, bool host_writable) |
2245 | { | 2310 | { |
2246 | u64 spte, entry = *sptep; | 2311 | u64 spte; |
2247 | int ret = 0; | 2312 | int ret = 0; |
2248 | 2313 | ||
2249 | if (set_mmio_spte(sptep, gfn, pfn, pte_access)) | 2314 | if (set_mmio_spte(sptep, gfn, pfn, pte_access)) |
@@ -2257,8 +2322,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2257 | spte |= shadow_x_mask; | 2322 | spte |= shadow_x_mask; |
2258 | else | 2323 | else |
2259 | spte |= shadow_nx_mask; | 2324 | spte |= shadow_nx_mask; |
2325 | |||
2260 | if (pte_access & ACC_USER_MASK) | 2326 | if (pte_access & ACC_USER_MASK) |
2261 | spte |= shadow_user_mask; | 2327 | spte |= shadow_user_mask; |
2328 | |||
2262 | if (level > PT_PAGE_TABLE_LEVEL) | 2329 | if (level > PT_PAGE_TABLE_LEVEL) |
2263 | spte |= PT_PAGE_SIZE_MASK; | 2330 | spte |= PT_PAGE_SIZE_MASK; |
2264 | if (tdp_enabled) | 2331 | if (tdp_enabled) |
@@ -2283,7 +2350,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2283 | goto done; | 2350 | goto done; |
2284 | } | 2351 | } |
2285 | 2352 | ||
2286 | spte |= PT_WRITABLE_MASK; | 2353 | spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; |
2287 | 2354 | ||
2288 | if (!vcpu->arch.mmu.direct_map | 2355 | if (!vcpu->arch.mmu.direct_map |
2289 | && !(pte_access & ACC_WRITE_MASK)) { | 2356 | && !(pte_access & ACC_WRITE_MASK)) { |
@@ -2312,8 +2379,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2312 | __func__, gfn); | 2379 | __func__, gfn); |
2313 | ret = 1; | 2380 | ret = 1; |
2314 | pte_access &= ~ACC_WRITE_MASK; | 2381 | pte_access &= ~ACC_WRITE_MASK; |
2315 | if (is_writable_pte(spte)) | 2382 | spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE); |
2316 | spte &= ~PT_WRITABLE_MASK; | ||
2317 | } | 2383 | } |
2318 | } | 2384 | } |
2319 | 2385 | ||
@@ -2321,14 +2387,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2321 | mark_page_dirty(vcpu->kvm, gfn); | 2387 | mark_page_dirty(vcpu->kvm, gfn); |
2322 | 2388 | ||
2323 | set_pte: | 2389 | set_pte: |
2324 | mmu_spte_update(sptep, spte); | 2390 | if (mmu_spte_update(sptep, spte)) |
2325 | /* | ||
2326 | * If we overwrite a writable spte with a read-only one we | ||
2327 | * should flush remote TLBs. Otherwise rmap_write_protect | ||
2328 | * will find a read-only spte, even though the writable spte | ||
2329 | * might be cached on a CPU's TLB. | ||
2330 | */ | ||
2331 | if (is_writable_pte(entry) && !is_writable_pte(*sptep)) | ||
2332 | kvm_flush_remote_tlbs(vcpu->kvm); | 2391 | kvm_flush_remote_tlbs(vcpu->kvm); |
2333 | done: | 2392 | done: |
2334 | return ret; | 2393 | return ret; |
@@ -2403,6 +2462,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2403 | 2462 | ||
2404 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | 2463 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) |
2405 | { | 2464 | { |
2465 | mmu_free_roots(vcpu); | ||
2406 | } | 2466 | } |
2407 | 2467 | ||
2408 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | 2468 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, |
@@ -2625,18 +2685,116 @@ exit: | |||
2625 | return ret; | 2685 | return ret; |
2626 | } | 2686 | } |
2627 | 2687 | ||
2688 | static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code) | ||
2689 | { | ||
2690 | /* | ||
2691 | * #PF can be fast only if the shadow page table is present and it | ||
2692 | * is caused by write-protect, that means we just need change the | ||
2693 | * W bit of the spte which can be done out of mmu-lock. | ||
2694 | */ | ||
2695 | if (!(error_code & PFERR_PRESENT_MASK) || | ||
2696 | !(error_code & PFERR_WRITE_MASK)) | ||
2697 | return false; | ||
2698 | |||
2699 | return true; | ||
2700 | } | ||
2701 | |||
2702 | static bool | ||
2703 | fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 spte) | ||
2704 | { | ||
2705 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
2706 | gfn_t gfn; | ||
2707 | |||
2708 | WARN_ON(!sp->role.direct); | ||
2709 | |||
2710 | /* | ||
2711 | * The gfn of direct spte is stable since it is calculated | ||
2712 | * by sp->gfn. | ||
2713 | */ | ||
2714 | gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); | ||
2715 | |||
2716 | if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte) | ||
2717 | mark_page_dirty(vcpu->kvm, gfn); | ||
2718 | |||
2719 | return true; | ||
2720 | } | ||
2721 | |||
2722 | /* | ||
2723 | * Return value: | ||
2724 | * - true: let the vcpu to access on the same address again. | ||
2725 | * - false: let the real page fault path to fix it. | ||
2726 | */ | ||
2727 | static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, | ||
2728 | u32 error_code) | ||
2729 | { | ||
2730 | struct kvm_shadow_walk_iterator iterator; | ||
2731 | bool ret = false; | ||
2732 | u64 spte = 0ull; | ||
2733 | |||
2734 | if (!page_fault_can_be_fast(vcpu, error_code)) | ||
2735 | return false; | ||
2736 | |||
2737 | walk_shadow_page_lockless_begin(vcpu); | ||
2738 | for_each_shadow_entry_lockless(vcpu, gva, iterator, spte) | ||
2739 | if (!is_shadow_present_pte(spte) || iterator.level < level) | ||
2740 | break; | ||
2741 | |||
2742 | /* | ||
2743 | * If the mapping has been changed, let the vcpu fault on the | ||
2744 | * same address again. | ||
2745 | */ | ||
2746 | if (!is_rmap_spte(spte)) { | ||
2747 | ret = true; | ||
2748 | goto exit; | ||
2749 | } | ||
2750 | |||
2751 | if (!is_last_spte(spte, level)) | ||
2752 | goto exit; | ||
2753 | |||
2754 | /* | ||
2755 | * Check if it is a spurious fault caused by TLB lazily flushed. | ||
2756 | * | ||
2757 | * Need not check the access of upper level table entries since | ||
2758 | * they are always ACC_ALL. | ||
2759 | */ | ||
2760 | if (is_writable_pte(spte)) { | ||
2761 | ret = true; | ||
2762 | goto exit; | ||
2763 | } | ||
2764 | |||
2765 | /* | ||
2766 | * Currently, to simplify the code, only the spte write-protected | ||
2767 | * by dirty-log can be fast fixed. | ||
2768 | */ | ||
2769 | if (!spte_is_locklessly_modifiable(spte)) | ||
2770 | goto exit; | ||
2771 | |||
2772 | /* | ||
2773 | * Currently, fast page fault only works for direct mapping since | ||
2774 | * the gfn is not stable for indirect shadow page. | ||
2775 | * See Documentation/virtual/kvm/locking.txt to get more detail. | ||
2776 | */ | ||
2777 | ret = fast_pf_fix_direct_spte(vcpu, iterator.sptep, spte); | ||
2778 | exit: | ||
2779 | trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, | ||
2780 | spte, ret); | ||
2781 | walk_shadow_page_lockless_end(vcpu); | ||
2782 | |||
2783 | return ret; | ||
2784 | } | ||
2785 | |||
2628 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | 2786 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, |
2629 | gva_t gva, pfn_t *pfn, bool write, bool *writable); | 2787 | gva_t gva, pfn_t *pfn, bool write, bool *writable); |
2630 | 2788 | ||
2631 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn, | 2789 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, |
2632 | bool prefault) | 2790 | gfn_t gfn, bool prefault) |
2633 | { | 2791 | { |
2634 | int r; | 2792 | int r; |
2635 | int level; | 2793 | int level; |
2636 | int force_pt_level; | 2794 | int force_pt_level; |
2637 | pfn_t pfn; | 2795 | pfn_t pfn; |
2638 | unsigned long mmu_seq; | 2796 | unsigned long mmu_seq; |
2639 | bool map_writable; | 2797 | bool map_writable, write = error_code & PFERR_WRITE_MASK; |
2640 | 2798 | ||
2641 | force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); | 2799 | force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); |
2642 | if (likely(!force_pt_level)) { | 2800 | if (likely(!force_pt_level)) { |
@@ -2653,6 +2811,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn, | |||
2653 | } else | 2811 | } else |
2654 | level = PT_PAGE_TABLE_LEVEL; | 2812 | level = PT_PAGE_TABLE_LEVEL; |
2655 | 2813 | ||
2814 | if (fast_page_fault(vcpu, v, level, error_code)) | ||
2815 | return 0; | ||
2816 | |||
2656 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2817 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2657 | smp_rmb(); | 2818 | smp_rmb(); |
2658 | 2819 | ||
@@ -3041,7 +3202,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
3041 | gfn = gva >> PAGE_SHIFT; | 3202 | gfn = gva >> PAGE_SHIFT; |
3042 | 3203 | ||
3043 | return nonpaging_map(vcpu, gva & PAGE_MASK, | 3204 | return nonpaging_map(vcpu, gva & PAGE_MASK, |
3044 | error_code & PFERR_WRITE_MASK, gfn, prefault); | 3205 | error_code, gfn, prefault); |
3045 | } | 3206 | } |
3046 | 3207 | ||
3047 | static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) | 3208 | static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) |
@@ -3121,6 +3282,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3121 | } else | 3282 | } else |
3122 | level = PT_PAGE_TABLE_LEVEL; | 3283 | level = PT_PAGE_TABLE_LEVEL; |
3123 | 3284 | ||
3285 | if (fast_page_fault(vcpu, gpa, level, error_code)) | ||
3286 | return 0; | ||
3287 | |||
3124 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 3288 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
3125 | smp_rmb(); | 3289 | smp_rmb(); |
3126 | 3290 | ||
@@ -3885,6 +4049,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu) | |||
3885 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | 4049 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) |
3886 | { | 4050 | { |
3887 | struct kvm_mmu_page *sp; | 4051 | struct kvm_mmu_page *sp; |
4052 | bool flush = false; | ||
3888 | 4053 | ||
3889 | list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { | 4054 | list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { |
3890 | int i; | 4055 | int i; |
@@ -3899,16 +4064,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
3899 | !is_last_spte(pt[i], sp->role.level)) | 4064 | !is_last_spte(pt[i], sp->role.level)) |
3900 | continue; | 4065 | continue; |
3901 | 4066 | ||
3902 | if (is_large_pte(pt[i])) { | 4067 | spte_write_protect(kvm, &pt[i], &flush, false); |
3903 | drop_spte(kvm, &pt[i]); | ||
3904 | --kvm->stat.lpages; | ||
3905 | continue; | ||
3906 | } | ||
3907 | |||
3908 | /* avoid RMW */ | ||
3909 | if (is_writable_pte(pt[i])) | ||
3910 | mmu_spte_update(&pt[i], | ||
3911 | pt[i] & ~PT_WRITABLE_MASK); | ||
3912 | } | 4068 | } |
3913 | } | 4069 | } |
3914 | kvm_flush_remote_tlbs(kvm); | 4070 | kvm_flush_remote_tlbs(kvm); |
@@ -3945,7 +4101,6 @@ static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, | |||
3945 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | 4101 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) |
3946 | { | 4102 | { |
3947 | struct kvm *kvm; | 4103 | struct kvm *kvm; |
3948 | struct kvm *kvm_freed = NULL; | ||
3949 | int nr_to_scan = sc->nr_to_scan; | 4104 | int nr_to_scan = sc->nr_to_scan; |
3950 | 4105 | ||
3951 | if (nr_to_scan == 0) | 4106 | if (nr_to_scan == 0) |
@@ -3957,22 +4112,30 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
3957 | int idx; | 4112 | int idx; |
3958 | LIST_HEAD(invalid_list); | 4113 | LIST_HEAD(invalid_list); |
3959 | 4114 | ||
4115 | /* | ||
4116 | * n_used_mmu_pages is accessed without holding kvm->mmu_lock | ||
4117 | * here. We may skip a VM instance errorneosly, but we do not | ||
4118 | * want to shrink a VM that only started to populate its MMU | ||
4119 | * anyway. | ||
4120 | */ | ||
4121 | if (kvm->arch.n_used_mmu_pages > 0) { | ||
4122 | if (!nr_to_scan--) | ||
4123 | break; | ||
4124 | continue; | ||
4125 | } | ||
4126 | |||
3960 | idx = srcu_read_lock(&kvm->srcu); | 4127 | idx = srcu_read_lock(&kvm->srcu); |
3961 | spin_lock(&kvm->mmu_lock); | 4128 | spin_lock(&kvm->mmu_lock); |
3962 | if (!kvm_freed && nr_to_scan > 0 && | ||
3963 | kvm->arch.n_used_mmu_pages > 0) { | ||
3964 | kvm_mmu_remove_some_alloc_mmu_pages(kvm, | ||
3965 | &invalid_list); | ||
3966 | kvm_freed = kvm; | ||
3967 | } | ||
3968 | nr_to_scan--; | ||
3969 | 4129 | ||
4130 | kvm_mmu_remove_some_alloc_mmu_pages(kvm, &invalid_list); | ||
3970 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 4131 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
4132 | |||
3971 | spin_unlock(&kvm->mmu_lock); | 4133 | spin_unlock(&kvm->mmu_lock); |
3972 | srcu_read_unlock(&kvm->srcu, idx); | 4134 | srcu_read_unlock(&kvm->srcu, idx); |
4135 | |||
4136 | list_move_tail(&kvm->vm_list, &vm_list); | ||
4137 | break; | ||
3973 | } | 4138 | } |
3974 | if (kvm_freed) | ||
3975 | list_move_tail(&kvm_freed->vm_list, &vm_list); | ||
3976 | 4139 | ||
3977 | raw_spin_unlock(&kvm_lock); | 4140 | raw_spin_unlock(&kvm_lock); |
3978 | 4141 | ||
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 89fb0e81322a..cd6e98333ba3 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -54,8 +54,8 @@ | |||
54 | */ | 54 | */ |
55 | TRACE_EVENT( | 55 | TRACE_EVENT( |
56 | kvm_mmu_pagetable_walk, | 56 | kvm_mmu_pagetable_walk, |
57 | TP_PROTO(u64 addr, int write_fault, int user_fault, int fetch_fault), | 57 | TP_PROTO(u64 addr, u32 pferr), |
58 | TP_ARGS(addr, write_fault, user_fault, fetch_fault), | 58 | TP_ARGS(addr, pferr), |
59 | 59 | ||
60 | TP_STRUCT__entry( | 60 | TP_STRUCT__entry( |
61 | __field(__u64, addr) | 61 | __field(__u64, addr) |
@@ -64,8 +64,7 @@ TRACE_EVENT( | |||
64 | 64 | ||
65 | TP_fast_assign( | 65 | TP_fast_assign( |
66 | __entry->addr = addr; | 66 | __entry->addr = addr; |
67 | __entry->pferr = (!!write_fault << 1) | (!!user_fault << 2) | 67 | __entry->pferr = pferr; |
68 | | (!!fetch_fault << 4); | ||
69 | ), | 68 | ), |
70 | 69 | ||
71 | TP_printk("addr %llx pferr %x %s", __entry->addr, __entry->pferr, | 70 | TP_printk("addr %llx pferr %x %s", __entry->addr, __entry->pferr, |
@@ -243,6 +242,44 @@ TRACE_EVENT( | |||
243 | TP_printk("addr:%llx gfn %llx access %x", __entry->addr, __entry->gfn, | 242 | TP_printk("addr:%llx gfn %llx access %x", __entry->addr, __entry->gfn, |
244 | __entry->access) | 243 | __entry->access) |
245 | ); | 244 | ); |
245 | |||
246 | #define __spte_satisfied(__spte) \ | ||
247 | (__entry->retry && is_writable_pte(__entry->__spte)) | ||
248 | |||
249 | TRACE_EVENT( | ||
250 | fast_page_fault, | ||
251 | TP_PROTO(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, | ||
252 | u64 *sptep, u64 old_spte, bool retry), | ||
253 | TP_ARGS(vcpu, gva, error_code, sptep, old_spte, retry), | ||
254 | |||
255 | TP_STRUCT__entry( | ||
256 | __field(int, vcpu_id) | ||
257 | __field(gva_t, gva) | ||
258 | __field(u32, error_code) | ||
259 | __field(u64 *, sptep) | ||
260 | __field(u64, old_spte) | ||
261 | __field(u64, new_spte) | ||
262 | __field(bool, retry) | ||
263 | ), | ||
264 | |||
265 | TP_fast_assign( | ||
266 | __entry->vcpu_id = vcpu->vcpu_id; | ||
267 | __entry->gva = gva; | ||
268 | __entry->error_code = error_code; | ||
269 | __entry->sptep = sptep; | ||
270 | __entry->old_spte = old_spte; | ||
271 | __entry->new_spte = *sptep; | ||
272 | __entry->retry = retry; | ||
273 | ), | ||
274 | |||
275 | TP_printk("vcpu %d gva %lx error_code %s sptep %p old %#llx" | ||
276 | " new %llx spurious %d fixed %d", __entry->vcpu_id, | ||
277 | __entry->gva, __print_flags(__entry->error_code, "|", | ||
278 | kvm_mmu_trace_pferr_flags), __entry->sptep, | ||
279 | __entry->old_spte, __entry->new_spte, | ||
280 | __spte_satisfied(old_spte), __spte_satisfied(new_spte) | ||
281 | ) | ||
282 | ); | ||
246 | #endif /* _TRACE_KVMMMU_H */ | 283 | #endif /* _TRACE_KVMMMU_H */ |
247 | 284 | ||
248 | #undef TRACE_INCLUDE_PATH | 285 | #undef TRACE_INCLUDE_PATH |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 34f970937ef1..bb7cf01cae76 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -154,8 +154,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, | |||
154 | const int fetch_fault = access & PFERR_FETCH_MASK; | 154 | const int fetch_fault = access & PFERR_FETCH_MASK; |
155 | u16 errcode = 0; | 155 | u16 errcode = 0; |
156 | 156 | ||
157 | trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, | 157 | trace_kvm_mmu_pagetable_walk(addr, access); |
158 | fetch_fault); | ||
159 | retry_walk: | 158 | retry_walk: |
160 | eperm = false; | 159 | eperm = false; |
161 | walker->level = mmu->root_level; | 160 | walker->level = mmu->root_level; |
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 2e88438ffd83..9b7ec1150ab0 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -80,10 +80,10 @@ static inline struct kvm_pmc *get_fixed_pmc_idx(struct kvm_pmu *pmu, int idx) | |||
80 | 80 | ||
81 | static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx) | 81 | static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx) |
82 | { | 82 | { |
83 | if (idx < X86_PMC_IDX_FIXED) | 83 | if (idx < INTEL_PMC_IDX_FIXED) |
84 | return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0); | 84 | return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0); |
85 | else | 85 | else |
86 | return get_fixed_pmc_idx(pmu, idx - X86_PMC_IDX_FIXED); | 86 | return get_fixed_pmc_idx(pmu, idx - INTEL_PMC_IDX_FIXED); |
87 | } | 87 | } |
88 | 88 | ||
89 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu) | 89 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu) |
@@ -291,7 +291,7 @@ static void reprogram_idx(struct kvm_pmu *pmu, int idx) | |||
291 | if (pmc_is_gp(pmc)) | 291 | if (pmc_is_gp(pmc)) |
292 | reprogram_gp_counter(pmc, pmc->eventsel); | 292 | reprogram_gp_counter(pmc, pmc->eventsel); |
293 | else { | 293 | else { |
294 | int fidx = idx - X86_PMC_IDX_FIXED; | 294 | int fidx = idx - INTEL_PMC_IDX_FIXED; |
295 | reprogram_fixed_counter(pmc, | 295 | reprogram_fixed_counter(pmc, |
296 | fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx); | 296 | fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx); |
297 | } | 297 | } |
@@ -452,7 +452,7 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | |||
452 | return; | 452 | return; |
453 | 453 | ||
454 | pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff, | 454 | pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff, |
455 | X86_PMC_MAX_GENERIC); | 455 | INTEL_PMC_MAX_GENERIC); |
456 | pmu->counter_bitmask[KVM_PMC_GP] = | 456 | pmu->counter_bitmask[KVM_PMC_GP] = |
457 | ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1; | 457 | ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1; |
458 | bitmap_len = (entry->eax >> 24) & 0xff; | 458 | bitmap_len = (entry->eax >> 24) & 0xff; |
@@ -462,13 +462,13 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) | |||
462 | pmu->nr_arch_fixed_counters = 0; | 462 | pmu->nr_arch_fixed_counters = 0; |
463 | } else { | 463 | } else { |
464 | pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), | 464 | pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), |
465 | X86_PMC_MAX_FIXED); | 465 | INTEL_PMC_MAX_FIXED); |
466 | pmu->counter_bitmask[KVM_PMC_FIXED] = | 466 | pmu->counter_bitmask[KVM_PMC_FIXED] = |
467 | ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; | 467 | ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; |
468 | } | 468 | } |
469 | 469 | ||
470 | pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | | 470 | pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | |
471 | (((1ull << pmu->nr_arch_fixed_counters) - 1) << X86_PMC_IDX_FIXED); | 471 | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); |
472 | pmu->global_ctrl_mask = ~pmu->global_ctrl; | 472 | pmu->global_ctrl_mask = ~pmu->global_ctrl; |
473 | } | 473 | } |
474 | 474 | ||
@@ -478,15 +478,15 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu) | |||
478 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | 478 | struct kvm_pmu *pmu = &vcpu->arch.pmu; |
479 | 479 | ||
480 | memset(pmu, 0, sizeof(*pmu)); | 480 | memset(pmu, 0, sizeof(*pmu)); |
481 | for (i = 0; i < X86_PMC_MAX_GENERIC; i++) { | 481 | for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { |
482 | pmu->gp_counters[i].type = KVM_PMC_GP; | 482 | pmu->gp_counters[i].type = KVM_PMC_GP; |
483 | pmu->gp_counters[i].vcpu = vcpu; | 483 | pmu->gp_counters[i].vcpu = vcpu; |
484 | pmu->gp_counters[i].idx = i; | 484 | pmu->gp_counters[i].idx = i; |
485 | } | 485 | } |
486 | for (i = 0; i < X86_PMC_MAX_FIXED; i++) { | 486 | for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) { |
487 | pmu->fixed_counters[i].type = KVM_PMC_FIXED; | 487 | pmu->fixed_counters[i].type = KVM_PMC_FIXED; |
488 | pmu->fixed_counters[i].vcpu = vcpu; | 488 | pmu->fixed_counters[i].vcpu = vcpu; |
489 | pmu->fixed_counters[i].idx = i + X86_PMC_IDX_FIXED; | 489 | pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED; |
490 | } | 490 | } |
491 | init_irq_work(&pmu->irq_work, trigger_pmi); | 491 | init_irq_work(&pmu->irq_work, trigger_pmi); |
492 | kvm_pmu_cpuid_update(vcpu); | 492 | kvm_pmu_cpuid_update(vcpu); |
@@ -498,13 +498,13 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu) | |||
498 | int i; | 498 | int i; |
499 | 499 | ||
500 | irq_work_sync(&pmu->irq_work); | 500 | irq_work_sync(&pmu->irq_work); |
501 | for (i = 0; i < X86_PMC_MAX_GENERIC; i++) { | 501 | for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { |
502 | struct kvm_pmc *pmc = &pmu->gp_counters[i]; | 502 | struct kvm_pmc *pmc = &pmu->gp_counters[i]; |
503 | stop_counter(pmc); | 503 | stop_counter(pmc); |
504 | pmc->counter = pmc->eventsel = 0; | 504 | pmc->counter = pmc->eventsel = 0; |
505 | } | 505 | } |
506 | 506 | ||
507 | for (i = 0; i < X86_PMC_MAX_FIXED; i++) | 507 | for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) |
508 | stop_counter(&pmu->fixed_counters[i]); | 508 | stop_counter(&pmu->fixed_counters[i]); |
509 | 509 | ||
510 | pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = | 510 | pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f75af406b268..baead950d6c8 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -3185,8 +3185,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
3185 | break; | 3185 | break; |
3186 | case MSR_IA32_DEBUGCTLMSR: | 3186 | case MSR_IA32_DEBUGCTLMSR: |
3187 | if (!boot_cpu_has(X86_FEATURE_LBRV)) { | 3187 | if (!boot_cpu_has(X86_FEATURE_LBRV)) { |
3188 | pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", | 3188 | vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", |
3189 | __func__, data); | 3189 | __func__, data); |
3190 | break; | 3190 | break; |
3191 | } | 3191 | } |
3192 | if (data & DEBUGCTL_RESERVED_BITS) | 3192 | if (data & DEBUGCTL_RESERVED_BITS) |
@@ -3205,7 +3205,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
3205 | case MSR_VM_CR: | 3205 | case MSR_VM_CR: |
3206 | return svm_set_vm_cr(vcpu, data); | 3206 | return svm_set_vm_cr(vcpu, data); |
3207 | case MSR_VM_IGNNE: | 3207 | case MSR_VM_IGNNE: |
3208 | pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); | 3208 | vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); |
3209 | break; | 3209 | break; |
3210 | default: | 3210 | default: |
3211 | return kvm_set_msr_common(vcpu, ecx, data); | 3211 | return kvm_set_msr_common(vcpu, ecx, data); |
@@ -4044,6 +4044,11 @@ static bool svm_rdtscp_supported(void) | |||
4044 | return false; | 4044 | return false; |
4045 | } | 4045 | } |
4046 | 4046 | ||
4047 | static bool svm_invpcid_supported(void) | ||
4048 | { | ||
4049 | return false; | ||
4050 | } | ||
4051 | |||
4047 | static bool svm_has_wbinvd_exit(void) | 4052 | static bool svm_has_wbinvd_exit(void) |
4048 | { | 4053 | { |
4049 | return true; | 4054 | return true; |
@@ -4312,6 +4317,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4312 | .cpuid_update = svm_cpuid_update, | 4317 | .cpuid_update = svm_cpuid_update, |
4313 | 4318 | ||
4314 | .rdtscp_supported = svm_rdtscp_supported, | 4319 | .rdtscp_supported = svm_rdtscp_supported, |
4320 | .invpcid_supported = svm_invpcid_supported, | ||
4315 | 4321 | ||
4316 | .set_supported_cpuid = svm_set_supported_cpuid, | 4322 | .set_supported_cpuid = svm_set_supported_cpuid, |
4317 | 4323 | ||
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 911d2641f14c..a71faf727ff3 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -517,6 +517,40 @@ TRACE_EVENT(kvm_apic_accept_irq, | |||
517 | __entry->coalesced ? " (coalesced)" : "") | 517 | __entry->coalesced ? " (coalesced)" : "") |
518 | ); | 518 | ); |
519 | 519 | ||
520 | TRACE_EVENT(kvm_eoi, | ||
521 | TP_PROTO(struct kvm_lapic *apic, int vector), | ||
522 | TP_ARGS(apic, vector), | ||
523 | |||
524 | TP_STRUCT__entry( | ||
525 | __field( __u32, apicid ) | ||
526 | __field( int, vector ) | ||
527 | ), | ||
528 | |||
529 | TP_fast_assign( | ||
530 | __entry->apicid = apic->vcpu->vcpu_id; | ||
531 | __entry->vector = vector; | ||
532 | ), | ||
533 | |||
534 | TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector) | ||
535 | ); | ||
536 | |||
537 | TRACE_EVENT(kvm_pv_eoi, | ||
538 | TP_PROTO(struct kvm_lapic *apic, int vector), | ||
539 | TP_ARGS(apic, vector), | ||
540 | |||
541 | TP_STRUCT__entry( | ||
542 | __field( __u32, apicid ) | ||
543 | __field( int, vector ) | ||
544 | ), | ||
545 | |||
546 | TP_fast_assign( | ||
547 | __entry->apicid = apic->vcpu->vcpu_id; | ||
548 | __entry->vector = vector; | ||
549 | ), | ||
550 | |||
551 | TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector) | ||
552 | ); | ||
553 | |||
520 | /* | 554 | /* |
521 | * Tracepoint for nested VMRUN | 555 | * Tracepoint for nested VMRUN |
522 | */ | 556 | */ |
@@ -710,16 +744,6 @@ TRACE_EVENT(kvm_skinit, | |||
710 | __entry->rip, __entry->slb) | 744 | __entry->rip, __entry->slb) |
711 | ); | 745 | ); |
712 | 746 | ||
713 | #define __print_insn(insn, ilen) ({ \ | ||
714 | int i; \ | ||
715 | const char *ret = p->buffer + p->len; \ | ||
716 | \ | ||
717 | for (i = 0; i < ilen; ++i) \ | ||
718 | trace_seq_printf(p, " %02x", insn[i]); \ | ||
719 | trace_seq_printf(p, "%c", 0); \ | ||
720 | ret; \ | ||
721 | }) | ||
722 | |||
723 | #define KVM_EMUL_INSN_F_CR0_PE (1 << 0) | 747 | #define KVM_EMUL_INSN_F_CR0_PE (1 << 0) |
724 | #define KVM_EMUL_INSN_F_EFL_VM (1 << 1) | 748 | #define KVM_EMUL_INSN_F_EFL_VM (1 << 1) |
725 | #define KVM_EMUL_INSN_F_CS_D (1 << 2) | 749 | #define KVM_EMUL_INSN_F_CS_D (1 << 2) |
@@ -786,7 +810,7 @@ TRACE_EVENT(kvm_emulate_insn, | |||
786 | 810 | ||
787 | TP_printk("%x:%llx:%s (%s)%s", | 811 | TP_printk("%x:%llx:%s (%s)%s", |
788 | __entry->csbase, __entry->rip, | 812 | __entry->csbase, __entry->rip, |
789 | __print_insn(__entry->insn, __entry->len), | 813 | __print_hex(__entry->insn, __entry->len), |
790 | __print_symbolic(__entry->flags, | 814 | __print_symbolic(__entry->flags, |
791 | kvm_trace_symbol_emul_flags), | 815 | kvm_trace_symbol_emul_flags), |
792 | __entry->failed ? " failed" : "" | 816 | __entry->failed ? " failed" : "" |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 32eb58866292..c39b60707e02 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -71,7 +71,10 @@ static bool __read_mostly enable_unrestricted_guest = 1; | |||
71 | module_param_named(unrestricted_guest, | 71 | module_param_named(unrestricted_guest, |
72 | enable_unrestricted_guest, bool, S_IRUGO); | 72 | enable_unrestricted_guest, bool, S_IRUGO); |
73 | 73 | ||
74 | static bool __read_mostly emulate_invalid_guest_state = 0; | 74 | static bool __read_mostly enable_ept_ad_bits = 1; |
75 | module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO); | ||
76 | |||
77 | static bool __read_mostly emulate_invalid_guest_state = true; | ||
75 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 78 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
76 | 79 | ||
77 | static bool __read_mostly vmm_exclusive = 1; | 80 | static bool __read_mostly vmm_exclusive = 1; |
@@ -615,6 +618,10 @@ static void kvm_cpu_vmxon(u64 addr); | |||
615 | static void kvm_cpu_vmxoff(void); | 618 | static void kvm_cpu_vmxoff(void); |
616 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | 619 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
617 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | 620 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); |
621 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | ||
622 | struct kvm_segment *var, int seg); | ||
623 | static void vmx_get_segment(struct kvm_vcpu *vcpu, | ||
624 | struct kvm_segment *var, int seg); | ||
618 | 625 | ||
619 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 626 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
620 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 627 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -789,6 +796,11 @@ static inline bool cpu_has_vmx_ept_4levels(void) | |||
789 | return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; | 796 | return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; |
790 | } | 797 | } |
791 | 798 | ||
799 | static inline bool cpu_has_vmx_ept_ad_bits(void) | ||
800 | { | ||
801 | return vmx_capability.ept & VMX_EPT_AD_BIT; | ||
802 | } | ||
803 | |||
792 | static inline bool cpu_has_vmx_invept_individual_addr(void) | 804 | static inline bool cpu_has_vmx_invept_individual_addr(void) |
793 | { | 805 | { |
794 | return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; | 806 | return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; |
@@ -849,6 +861,12 @@ static inline bool cpu_has_vmx_rdtscp(void) | |||
849 | SECONDARY_EXEC_RDTSCP; | 861 | SECONDARY_EXEC_RDTSCP; |
850 | } | 862 | } |
851 | 863 | ||
864 | static inline bool cpu_has_vmx_invpcid(void) | ||
865 | { | ||
866 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
867 | SECONDARY_EXEC_ENABLE_INVPCID; | ||
868 | } | ||
869 | |||
852 | static inline bool cpu_has_virtual_nmis(void) | 870 | static inline bool cpu_has_virtual_nmis(void) |
853 | { | 871 | { |
854 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 872 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
@@ -1739,6 +1757,11 @@ static bool vmx_rdtscp_supported(void) | |||
1739 | return cpu_has_vmx_rdtscp(); | 1757 | return cpu_has_vmx_rdtscp(); |
1740 | } | 1758 | } |
1741 | 1759 | ||
1760 | static bool vmx_invpcid_supported(void) | ||
1761 | { | ||
1762 | return cpu_has_vmx_invpcid() && enable_ept; | ||
1763 | } | ||
1764 | |||
1742 | /* | 1765 | /* |
1743 | * Swap MSR entry in host/guest MSR entry array. | 1766 | * Swap MSR entry in host/guest MSR entry array. |
1744 | */ | 1767 | */ |
@@ -2458,7 +2481,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2458 | SECONDARY_EXEC_ENABLE_EPT | | 2481 | SECONDARY_EXEC_ENABLE_EPT | |
2459 | SECONDARY_EXEC_UNRESTRICTED_GUEST | | 2482 | SECONDARY_EXEC_UNRESTRICTED_GUEST | |
2460 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | | 2483 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | |
2461 | SECONDARY_EXEC_RDTSCP; | 2484 | SECONDARY_EXEC_RDTSCP | |
2485 | SECONDARY_EXEC_ENABLE_INVPCID; | ||
2462 | if (adjust_vmx_controls(min2, opt2, | 2486 | if (adjust_vmx_controls(min2, opt2, |
2463 | MSR_IA32_VMX_PROCBASED_CTLS2, | 2487 | MSR_IA32_VMX_PROCBASED_CTLS2, |
2464 | &_cpu_based_2nd_exec_control) < 0) | 2488 | &_cpu_based_2nd_exec_control) < 0) |
@@ -2645,8 +2669,12 @@ static __init int hardware_setup(void) | |||
2645 | !cpu_has_vmx_ept_4levels()) { | 2669 | !cpu_has_vmx_ept_4levels()) { |
2646 | enable_ept = 0; | 2670 | enable_ept = 0; |
2647 | enable_unrestricted_guest = 0; | 2671 | enable_unrestricted_guest = 0; |
2672 | enable_ept_ad_bits = 0; | ||
2648 | } | 2673 | } |
2649 | 2674 | ||
2675 | if (!cpu_has_vmx_ept_ad_bits()) | ||
2676 | enable_ept_ad_bits = 0; | ||
2677 | |||
2650 | if (!cpu_has_vmx_unrestricted_guest()) | 2678 | if (!cpu_has_vmx_unrestricted_guest()) |
2651 | enable_unrestricted_guest = 0; | 2679 | enable_unrestricted_guest = 0; |
2652 | 2680 | ||
@@ -2770,6 +2798,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
2770 | { | 2798 | { |
2771 | unsigned long flags; | 2799 | unsigned long flags; |
2772 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2800 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2801 | struct kvm_segment var; | ||
2773 | 2802 | ||
2774 | if (enable_unrestricted_guest) | 2803 | if (enable_unrestricted_guest) |
2775 | return; | 2804 | return; |
@@ -2813,20 +2842,23 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
2813 | if (emulate_invalid_guest_state) | 2842 | if (emulate_invalid_guest_state) |
2814 | goto continue_rmode; | 2843 | goto continue_rmode; |
2815 | 2844 | ||
2816 | vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4); | 2845 | vmx_get_segment(vcpu, &var, VCPU_SREG_SS); |
2817 | vmcs_write32(GUEST_SS_LIMIT, 0xffff); | 2846 | vmx_set_segment(vcpu, &var, VCPU_SREG_SS); |
2818 | vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); | 2847 | |
2848 | vmx_get_segment(vcpu, &var, VCPU_SREG_CS); | ||
2849 | vmx_set_segment(vcpu, &var, VCPU_SREG_CS); | ||
2850 | |||
2851 | vmx_get_segment(vcpu, &var, VCPU_SREG_ES); | ||
2852 | vmx_set_segment(vcpu, &var, VCPU_SREG_ES); | ||
2853 | |||
2854 | vmx_get_segment(vcpu, &var, VCPU_SREG_DS); | ||
2855 | vmx_set_segment(vcpu, &var, VCPU_SREG_DS); | ||
2819 | 2856 | ||
2820 | vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); | 2857 | vmx_get_segment(vcpu, &var, VCPU_SREG_GS); |
2821 | vmcs_write32(GUEST_CS_LIMIT, 0xffff); | 2858 | vmx_set_segment(vcpu, &var, VCPU_SREG_GS); |
2822 | if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000) | ||
2823 | vmcs_writel(GUEST_CS_BASE, 0xf0000); | ||
2824 | vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4); | ||
2825 | 2859 | ||
2826 | fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es); | 2860 | vmx_get_segment(vcpu, &var, VCPU_SREG_FS); |
2827 | fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds); | 2861 | vmx_set_segment(vcpu, &var, VCPU_SREG_FS); |
2828 | fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs); | ||
2829 | fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs); | ||
2830 | 2862 | ||
2831 | continue_rmode: | 2863 | continue_rmode: |
2832 | kvm_mmu_reset_context(vcpu); | 2864 | kvm_mmu_reset_context(vcpu); |
@@ -3027,6 +3059,8 @@ static u64 construct_eptp(unsigned long root_hpa) | |||
3027 | /* TODO write the value reading from MSR */ | 3059 | /* TODO write the value reading from MSR */ |
3028 | eptp = VMX_EPT_DEFAULT_MT | | 3060 | eptp = VMX_EPT_DEFAULT_MT | |
3029 | VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; | 3061 | VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; |
3062 | if (enable_ept_ad_bits) | ||
3063 | eptp |= VMX_EPT_AD_ENABLE_BIT; | ||
3030 | eptp |= (root_hpa & PAGE_MASK); | 3064 | eptp |= (root_hpa & PAGE_MASK); |
3031 | 3065 | ||
3032 | return eptp; | 3066 | return eptp; |
@@ -3153,11 +3187,22 @@ static int __vmx_get_cpl(struct kvm_vcpu *vcpu) | |||
3153 | 3187 | ||
3154 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | 3188 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) |
3155 | { | 3189 | { |
3190 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
3191 | |||
3192 | /* | ||
3193 | * If we enter real mode with cs.sel & 3 != 0, the normal CPL calculations | ||
3194 | * fail; use the cache instead. | ||
3195 | */ | ||
3196 | if (unlikely(vmx->emulation_required && emulate_invalid_guest_state)) { | ||
3197 | return vmx->cpl; | ||
3198 | } | ||
3199 | |||
3156 | if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { | 3200 | if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { |
3157 | __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | 3201 | __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); |
3158 | to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu); | 3202 | vmx->cpl = __vmx_get_cpl(vcpu); |
3159 | } | 3203 | } |
3160 | return to_vmx(vcpu)->cpl; | 3204 | |
3205 | return vmx->cpl; | ||
3161 | } | 3206 | } |
3162 | 3207 | ||
3163 | 3208 | ||
@@ -3165,7 +3210,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var) | |||
3165 | { | 3210 | { |
3166 | u32 ar; | 3211 | u32 ar; |
3167 | 3212 | ||
3168 | if (var->unusable) | 3213 | if (var->unusable || !var->present) |
3169 | ar = 1 << 16; | 3214 | ar = 1 << 16; |
3170 | else { | 3215 | else { |
3171 | ar = var->type & 15; | 3216 | ar = var->type & 15; |
@@ -3177,8 +3222,6 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var) | |||
3177 | ar |= (var->db & 1) << 14; | 3222 | ar |= (var->db & 1) << 14; |
3178 | ar |= (var->g & 1) << 15; | 3223 | ar |= (var->g & 1) << 15; |
3179 | } | 3224 | } |
3180 | if (ar == 0) /* a 0 value means unusable */ | ||
3181 | ar = AR_UNUSABLE_MASK; | ||
3182 | 3225 | ||
3183 | return ar; | 3226 | return ar; |
3184 | } | 3227 | } |
@@ -3229,6 +3272,44 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
3229 | 3272 | ||
3230 | vmcs_write32(sf->ar_bytes, ar); | 3273 | vmcs_write32(sf->ar_bytes, ar); |
3231 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); | 3274 | __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); |
3275 | |||
3276 | /* | ||
3277 | * Fix segments for real mode guest in hosts that don't have | ||
3278 | * "unrestricted_mode" or it was disabled. | ||
3279 | * This is done to allow migration of the guests from hosts with | ||
3280 | * unrestricted guest like Westmere to older host that don't have | ||
3281 | * unrestricted guest like Nehelem. | ||
3282 | */ | ||
3283 | if (!enable_unrestricted_guest && vmx->rmode.vm86_active) { | ||
3284 | switch (seg) { | ||
3285 | case VCPU_SREG_CS: | ||
3286 | vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); | ||
3287 | vmcs_write32(GUEST_CS_LIMIT, 0xffff); | ||
3288 | if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000) | ||
3289 | vmcs_writel(GUEST_CS_BASE, 0xf0000); | ||
3290 | vmcs_write16(GUEST_CS_SELECTOR, | ||
3291 | vmcs_readl(GUEST_CS_BASE) >> 4); | ||
3292 | break; | ||
3293 | case VCPU_SREG_ES: | ||
3294 | fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es); | ||
3295 | break; | ||
3296 | case VCPU_SREG_DS: | ||
3297 | fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds); | ||
3298 | break; | ||
3299 | case VCPU_SREG_GS: | ||
3300 | fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs); | ||
3301 | break; | ||
3302 | case VCPU_SREG_FS: | ||
3303 | fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs); | ||
3304 | break; | ||
3305 | case VCPU_SREG_SS: | ||
3306 | vmcs_write16(GUEST_SS_SELECTOR, | ||
3307 | vmcs_readl(GUEST_SS_BASE) >> 4); | ||
3308 | vmcs_write32(GUEST_SS_LIMIT, 0xffff); | ||
3309 | vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); | ||
3310 | break; | ||
3311 | } | ||
3312 | } | ||
3232 | } | 3313 | } |
3233 | 3314 | ||
3234 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | 3315 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) |
@@ -3731,6 +3812,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
3731 | if (!enable_ept) { | 3812 | if (!enable_ept) { |
3732 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; | 3813 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; |
3733 | enable_unrestricted_guest = 0; | 3814 | enable_unrestricted_guest = 0; |
3815 | /* Enable INVPCID for non-ept guests may cause performance regression. */ | ||
3816 | exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; | ||
3734 | } | 3817 | } |
3735 | if (!enable_unrestricted_guest) | 3818 | if (!enable_unrestricted_guest) |
3736 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 3819 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
@@ -4489,7 +4572,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
4489 | break; | 4572 | break; |
4490 | } | 4573 | } |
4491 | vcpu->run->exit_reason = 0; | 4574 | vcpu->run->exit_reason = 0; |
4492 | pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n", | 4575 | vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n", |
4493 | (int)(exit_qualification >> 4) & 3, cr); | 4576 | (int)(exit_qualification >> 4) & 3, cr); |
4494 | return 0; | 4577 | return 0; |
4495 | } | 4578 | } |
@@ -4769,6 +4852,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
4769 | { | 4852 | { |
4770 | unsigned long exit_qualification; | 4853 | unsigned long exit_qualification; |
4771 | gpa_t gpa; | 4854 | gpa_t gpa; |
4855 | u32 error_code; | ||
4772 | int gla_validity; | 4856 | int gla_validity; |
4773 | 4857 | ||
4774 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 4858 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
@@ -4793,7 +4877,13 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
4793 | 4877 | ||
4794 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | 4878 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); |
4795 | trace_kvm_page_fault(gpa, exit_qualification); | 4879 | trace_kvm_page_fault(gpa, exit_qualification); |
4796 | return kvm_mmu_page_fault(vcpu, gpa, exit_qualification & 0x3, NULL, 0); | 4880 | |
4881 | /* It is a write fault? */ | ||
4882 | error_code = exit_qualification & (1U << 1); | ||
4883 | /* ept page table is present? */ | ||
4884 | error_code |= (exit_qualification >> 3) & 0x1; | ||
4885 | |||
4886 | return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); | ||
4797 | } | 4887 | } |
4798 | 4888 | ||
4799 | static u64 ept_rsvd_mask(u64 spte, int level) | 4889 | static u64 ept_rsvd_mask(u64 spte, int level) |
@@ -4908,15 +4998,18 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
4908 | int ret = 1; | 4998 | int ret = 1; |
4909 | u32 cpu_exec_ctrl; | 4999 | u32 cpu_exec_ctrl; |
4910 | bool intr_window_requested; | 5000 | bool intr_window_requested; |
5001 | unsigned count = 130; | ||
4911 | 5002 | ||
4912 | cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 5003 | cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
4913 | intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; | 5004 | intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; |
4914 | 5005 | ||
4915 | while (!guest_state_valid(vcpu)) { | 5006 | while (!guest_state_valid(vcpu) && count-- != 0) { |
4916 | if (intr_window_requested | 5007 | if (intr_window_requested && vmx_interrupt_allowed(vcpu)) |
4917 | && (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF)) | ||
4918 | return handle_interrupt_window(&vmx->vcpu); | 5008 | return handle_interrupt_window(&vmx->vcpu); |
4919 | 5009 | ||
5010 | if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) | ||
5011 | return 1; | ||
5012 | |||
4920 | err = emulate_instruction(vcpu, 0); | 5013 | err = emulate_instruction(vcpu, 0); |
4921 | 5014 | ||
4922 | if (err == EMULATE_DO_MMIO) { | 5015 | if (err == EMULATE_DO_MMIO) { |
@@ -4924,8 +5017,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
4924 | goto out; | 5017 | goto out; |
4925 | } | 5018 | } |
4926 | 5019 | ||
4927 | if (err != EMULATE_DONE) | 5020 | if (err != EMULATE_DONE) { |
5021 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
5022 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
5023 | vcpu->run->internal.ndata = 0; | ||
4928 | return 0; | 5024 | return 0; |
5025 | } | ||
4929 | 5026 | ||
4930 | if (signal_pending(current)) | 5027 | if (signal_pending(current)) |
4931 | goto out; | 5028 | goto out; |
@@ -4933,7 +5030,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
4933 | schedule(); | 5030 | schedule(); |
4934 | } | 5031 | } |
4935 | 5032 | ||
4936 | vmx->emulation_required = 0; | 5033 | vmx->emulation_required = !guest_state_valid(vcpu); |
4937 | out: | 5034 | out: |
4938 | return ret; | 5035 | return ret; |
4939 | } | 5036 | } |
@@ -6467,6 +6564,23 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | |||
6467 | } | 6564 | } |
6468 | } | 6565 | } |
6469 | } | 6566 | } |
6567 | |||
6568 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
6569 | /* Exposing INVPCID only when PCID is exposed */ | ||
6570 | best = kvm_find_cpuid_entry(vcpu, 0x7, 0); | ||
6571 | if (vmx_invpcid_supported() && | ||
6572 | best && (best->ecx & bit(X86_FEATURE_INVPCID)) && | ||
6573 | guest_cpuid_has_pcid(vcpu)) { | ||
6574 | exec_control |= SECONDARY_EXEC_ENABLE_INVPCID; | ||
6575 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | ||
6576 | exec_control); | ||
6577 | } else { | ||
6578 | exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; | ||
6579 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | ||
6580 | exec_control); | ||
6581 | if (best) | ||
6582 | best->ecx &= ~bit(X86_FEATURE_INVPCID); | ||
6583 | } | ||
6470 | } | 6584 | } |
6471 | 6585 | ||
6472 | static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | 6586 | static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) |
@@ -7201,6 +7315,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
7201 | .cpuid_update = vmx_cpuid_update, | 7315 | .cpuid_update = vmx_cpuid_update, |
7202 | 7316 | ||
7203 | .rdtscp_supported = vmx_rdtscp_supported, | 7317 | .rdtscp_supported = vmx_rdtscp_supported, |
7318 | .invpcid_supported = vmx_invpcid_supported, | ||
7204 | 7319 | ||
7205 | .set_supported_cpuid = vmx_set_supported_cpuid, | 7320 | .set_supported_cpuid = vmx_set_supported_cpuid, |
7206 | 7321 | ||
@@ -7230,23 +7345,21 @@ static int __init vmx_init(void) | |||
7230 | if (!vmx_io_bitmap_a) | 7345 | if (!vmx_io_bitmap_a) |
7231 | return -ENOMEM; | 7346 | return -ENOMEM; |
7232 | 7347 | ||
7348 | r = -ENOMEM; | ||
7349 | |||
7233 | vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); | 7350 | vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); |
7234 | if (!vmx_io_bitmap_b) { | 7351 | if (!vmx_io_bitmap_b) |
7235 | r = -ENOMEM; | ||
7236 | goto out; | 7352 | goto out; |
7237 | } | ||
7238 | 7353 | ||
7239 | vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); | 7354 | vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); |
7240 | if (!vmx_msr_bitmap_legacy) { | 7355 | if (!vmx_msr_bitmap_legacy) |
7241 | r = -ENOMEM; | ||
7242 | goto out1; | 7356 | goto out1; |
7243 | } | 7357 | |
7244 | 7358 | ||
7245 | vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); | 7359 | vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); |
7246 | if (!vmx_msr_bitmap_longmode) { | 7360 | if (!vmx_msr_bitmap_longmode) |
7247 | r = -ENOMEM; | ||
7248 | goto out2; | 7361 | goto out2; |
7249 | } | 7362 | |
7250 | 7363 | ||
7251 | /* | 7364 | /* |
7252 | * Allow direct access to the PC debug port (it is often used for I/O | 7365 | * Allow direct access to the PC debug port (it is often used for I/O |
@@ -7275,8 +7388,10 @@ static int __init vmx_init(void) | |||
7275 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); | 7388 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); |
7276 | 7389 | ||
7277 | if (enable_ept) { | 7390 | if (enable_ept) { |
7278 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, | 7391 | kvm_mmu_set_mask_ptes(0ull, |
7279 | VMX_EPT_EXECUTABLE_MASK); | 7392 | (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, |
7393 | (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull, | ||
7394 | 0ull, VMX_EPT_EXECUTABLE_MASK); | ||
7280 | ept_set_mmio_spte_mask(); | 7395 | ept_set_mmio_spte_mask(); |
7281 | kvm_enable_tdp(); | 7396 | kvm_enable_tdp(); |
7282 | } else | 7397 | } else |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index be6d54929fa7..59b59508ff07 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -528,6 +528,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
528 | return 1; | 528 | return 1; |
529 | } | 529 | } |
530 | 530 | ||
531 | if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) | ||
532 | return 1; | ||
533 | |||
531 | kvm_x86_ops->set_cr0(vcpu, cr0); | 534 | kvm_x86_ops->set_cr0(vcpu, cr0); |
532 | 535 | ||
533 | if ((cr0 ^ old_cr0) & X86_CR0_PG) { | 536 | if ((cr0 ^ old_cr0) & X86_CR0_PG) { |
@@ -604,10 +607,20 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
604 | kvm_read_cr3(vcpu))) | 607 | kvm_read_cr3(vcpu))) |
605 | return 1; | 608 | return 1; |
606 | 609 | ||
610 | if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) { | ||
611 | if (!guest_cpuid_has_pcid(vcpu)) | ||
612 | return 1; | ||
613 | |||
614 | /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ | ||
615 | if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu)) | ||
616 | return 1; | ||
617 | } | ||
618 | |||
607 | if (kvm_x86_ops->set_cr4(vcpu, cr4)) | 619 | if (kvm_x86_ops->set_cr4(vcpu, cr4)) |
608 | return 1; | 620 | return 1; |
609 | 621 | ||
610 | if ((cr4 ^ old_cr4) & pdptr_bits) | 622 | if (((cr4 ^ old_cr4) & pdptr_bits) || |
623 | (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) | ||
611 | kvm_mmu_reset_context(vcpu); | 624 | kvm_mmu_reset_context(vcpu); |
612 | 625 | ||
613 | if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) | 626 | if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) |
@@ -626,8 +639,12 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
626 | } | 639 | } |
627 | 640 | ||
628 | if (is_long_mode(vcpu)) { | 641 | if (is_long_mode(vcpu)) { |
629 | if (cr3 & CR3_L_MODE_RESERVED_BITS) | 642 | if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) { |
630 | return 1; | 643 | if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS) |
644 | return 1; | ||
645 | } else | ||
646 | if (cr3 & CR3_L_MODE_RESERVED_BITS) | ||
647 | return 1; | ||
631 | } else { | 648 | } else { |
632 | if (is_pae(vcpu)) { | 649 | if (is_pae(vcpu)) { |
633 | if (cr3 & CR3_PAE_RESERVED_BITS) | 650 | if (cr3 & CR3_PAE_RESERVED_BITS) |
@@ -795,6 +812,7 @@ static u32 msrs_to_save[] = { | |||
795 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | 812 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, |
796 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 813 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
797 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, | 814 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, |
815 | MSR_KVM_PV_EOI_EN, | ||
798 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 816 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
799 | MSR_STAR, | 817 | MSR_STAR, |
800 | #ifdef CONFIG_X86_64 | 818 | #ifdef CONFIG_X86_64 |
@@ -1437,8 +1455,8 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1437 | break; | 1455 | break; |
1438 | } | 1456 | } |
1439 | default: | 1457 | default: |
1440 | pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " | 1458 | vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " |
1441 | "data 0x%llx\n", msr, data); | 1459 | "data 0x%llx\n", msr, data); |
1442 | return 1; | 1460 | return 1; |
1443 | } | 1461 | } |
1444 | return 0; | 1462 | return 0; |
@@ -1470,8 +1488,8 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1470 | case HV_X64_MSR_TPR: | 1488 | case HV_X64_MSR_TPR: |
1471 | return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); | 1489 | return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); |
1472 | default: | 1490 | default: |
1473 | pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " | 1491 | vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " |
1474 | "data 0x%llx\n", msr, data); | 1492 | "data 0x%llx\n", msr, data); |
1475 | return 1; | 1493 | return 1; |
1476 | } | 1494 | } |
1477 | 1495 | ||
@@ -1551,15 +1569,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1551 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ | 1569 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ |
1552 | data &= ~(u64)0x8; /* ignore TLB cache disable */ | 1570 | data &= ~(u64)0x8; /* ignore TLB cache disable */ |
1553 | if (data != 0) { | 1571 | if (data != 0) { |
1554 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", | 1572 | vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", |
1555 | data); | 1573 | data); |
1556 | return 1; | 1574 | return 1; |
1557 | } | 1575 | } |
1558 | break; | 1576 | break; |
1559 | case MSR_FAM10H_MMIO_CONF_BASE: | 1577 | case MSR_FAM10H_MMIO_CONF_BASE: |
1560 | if (data != 0) { | 1578 | if (data != 0) { |
1561 | pr_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: " | 1579 | vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: " |
1562 | "0x%llx\n", data); | 1580 | "0x%llx\n", data); |
1563 | return 1; | 1581 | return 1; |
1564 | } | 1582 | } |
1565 | break; | 1583 | break; |
@@ -1574,8 +1592,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1574 | thus reserved and should throw a #GP */ | 1592 | thus reserved and should throw a #GP */ |
1575 | return 1; | 1593 | return 1; |
1576 | } | 1594 | } |
1577 | pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", | 1595 | vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", |
1578 | __func__, data); | 1596 | __func__, data); |
1579 | break; | 1597 | break; |
1580 | case MSR_IA32_UCODE_REV: | 1598 | case MSR_IA32_UCODE_REV: |
1581 | case MSR_IA32_UCODE_WRITE: | 1599 | case MSR_IA32_UCODE_WRITE: |
@@ -1653,6 +1671,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1653 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); | 1671 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); |
1654 | 1672 | ||
1655 | break; | 1673 | break; |
1674 | case MSR_KVM_PV_EOI_EN: | ||
1675 | if (kvm_lapic_enable_pv_eoi(vcpu, data)) | ||
1676 | return 1; | ||
1677 | break; | ||
1656 | 1678 | ||
1657 | case MSR_IA32_MCG_CTL: | 1679 | case MSR_IA32_MCG_CTL: |
1658 | case MSR_IA32_MCG_STATUS: | 1680 | case MSR_IA32_MCG_STATUS: |
@@ -1671,8 +1693,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1671 | case MSR_K7_EVNTSEL2: | 1693 | case MSR_K7_EVNTSEL2: |
1672 | case MSR_K7_EVNTSEL3: | 1694 | case MSR_K7_EVNTSEL3: |
1673 | if (data != 0) | 1695 | if (data != 0) |
1674 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " | 1696 | vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: " |
1675 | "0x%x data 0x%llx\n", msr, data); | 1697 | "0x%x data 0x%llx\n", msr, data); |
1676 | break; | 1698 | break; |
1677 | /* at least RHEL 4 unconditionally writes to the perfctr registers, | 1699 | /* at least RHEL 4 unconditionally writes to the perfctr registers, |
1678 | * so we ignore writes to make it happy. | 1700 | * so we ignore writes to make it happy. |
@@ -1681,8 +1703,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1681 | case MSR_K7_PERFCTR1: | 1703 | case MSR_K7_PERFCTR1: |
1682 | case MSR_K7_PERFCTR2: | 1704 | case MSR_K7_PERFCTR2: |
1683 | case MSR_K7_PERFCTR3: | 1705 | case MSR_K7_PERFCTR3: |
1684 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " | 1706 | vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: " |
1685 | "0x%x data 0x%llx\n", msr, data); | 1707 | "0x%x data 0x%llx\n", msr, data); |
1686 | break; | 1708 | break; |
1687 | case MSR_P6_PERFCTR0: | 1709 | case MSR_P6_PERFCTR0: |
1688 | case MSR_P6_PERFCTR1: | 1710 | case MSR_P6_PERFCTR1: |
@@ -1693,8 +1715,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1693 | return kvm_pmu_set_msr(vcpu, msr, data); | 1715 | return kvm_pmu_set_msr(vcpu, msr, data); |
1694 | 1716 | ||
1695 | if (pr || data != 0) | 1717 | if (pr || data != 0) |
1696 | pr_unimpl(vcpu, "disabled perfctr wrmsr: " | 1718 | vcpu_unimpl(vcpu, "disabled perfctr wrmsr: " |
1697 | "0x%x data 0x%llx\n", msr, data); | 1719 | "0x%x data 0x%llx\n", msr, data); |
1698 | break; | 1720 | break; |
1699 | case MSR_K7_CLK_CTL: | 1721 | case MSR_K7_CLK_CTL: |
1700 | /* | 1722 | /* |
@@ -1720,7 +1742,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1720 | /* Drop writes to this legacy MSR -- see rdmsr | 1742 | /* Drop writes to this legacy MSR -- see rdmsr |
1721 | * counterpart for further detail. | 1743 | * counterpart for further detail. |
1722 | */ | 1744 | */ |
1723 | pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); | 1745 | vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); |
1724 | break; | 1746 | break; |
1725 | case MSR_AMD64_OSVW_ID_LENGTH: | 1747 | case MSR_AMD64_OSVW_ID_LENGTH: |
1726 | if (!guest_cpuid_has_osvw(vcpu)) | 1748 | if (!guest_cpuid_has_osvw(vcpu)) |
@@ -1738,12 +1760,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1738 | if (kvm_pmu_msr(vcpu, msr)) | 1760 | if (kvm_pmu_msr(vcpu, msr)) |
1739 | return kvm_pmu_set_msr(vcpu, msr, data); | 1761 | return kvm_pmu_set_msr(vcpu, msr, data); |
1740 | if (!ignore_msrs) { | 1762 | if (!ignore_msrs) { |
1741 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", | 1763 | vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", |
1742 | msr, data); | 1764 | msr, data); |
1743 | return 1; | 1765 | return 1; |
1744 | } else { | 1766 | } else { |
1745 | pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", | 1767 | vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", |
1746 | msr, data); | 1768 | msr, data); |
1747 | break; | 1769 | break; |
1748 | } | 1770 | } |
1749 | } | 1771 | } |
@@ -1846,7 +1868,7 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1846 | data = kvm->arch.hv_hypercall; | 1868 | data = kvm->arch.hv_hypercall; |
1847 | break; | 1869 | break; |
1848 | default: | 1870 | default: |
1849 | pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 1871 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
1850 | return 1; | 1872 | return 1; |
1851 | } | 1873 | } |
1852 | 1874 | ||
@@ -1877,7 +1899,7 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1877 | data = vcpu->arch.hv_vapic; | 1899 | data = vcpu->arch.hv_vapic; |
1878 | break; | 1900 | break; |
1879 | default: | 1901 | default: |
1880 | pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 1902 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
1881 | return 1; | 1903 | return 1; |
1882 | } | 1904 | } |
1883 | *pdata = data; | 1905 | *pdata = data; |
@@ -2030,10 +2052,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2030 | if (kvm_pmu_msr(vcpu, msr)) | 2052 | if (kvm_pmu_msr(vcpu, msr)) |
2031 | return kvm_pmu_get_msr(vcpu, msr, pdata); | 2053 | return kvm_pmu_get_msr(vcpu, msr, pdata); |
2032 | if (!ignore_msrs) { | 2054 | if (!ignore_msrs) { |
2033 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 2055 | vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
2034 | return 1; | 2056 | return 1; |
2035 | } else { | 2057 | } else { |
2036 | pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr); | 2058 | vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr); |
2037 | data = 0; | 2059 | data = 0; |
2038 | } | 2060 | } |
2039 | break; | 2061 | break; |
@@ -4116,7 +4138,7 @@ static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr) | |||
4116 | value = kvm_get_cr8(vcpu); | 4138 | value = kvm_get_cr8(vcpu); |
4117 | break; | 4139 | break; |
4118 | default: | 4140 | default: |
4119 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | 4141 | kvm_err("%s: unexpected cr %u\n", __func__, cr); |
4120 | return 0; | 4142 | return 0; |
4121 | } | 4143 | } |
4122 | 4144 | ||
@@ -4145,7 +4167,7 @@ static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val) | |||
4145 | res = kvm_set_cr8(vcpu, val); | 4167 | res = kvm_set_cr8(vcpu, val); |
4146 | break; | 4168 | break; |
4147 | default: | 4169 | default: |
4148 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | 4170 | kvm_err("%s: unexpected cr %u\n", __func__, cr); |
4149 | res = -1; | 4171 | res = -1; |
4150 | } | 4172 | } |
4151 | 4173 | ||
@@ -4297,26 +4319,10 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt, | |||
4297 | return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); | 4319 | return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); |
4298 | } | 4320 | } |
4299 | 4321 | ||
4300 | static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, | 4322 | static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, |
4301 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) | 4323 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) |
4302 | { | 4324 | { |
4303 | struct kvm_cpuid_entry2 *cpuid = NULL; | 4325 | kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx); |
4304 | |||
4305 | if (eax && ecx) | ||
4306 | cpuid = kvm_find_cpuid_entry(emul_to_vcpu(ctxt), | ||
4307 | *eax, *ecx); | ||
4308 | |||
4309 | if (cpuid) { | ||
4310 | *eax = cpuid->eax; | ||
4311 | *ecx = cpuid->ecx; | ||
4312 | if (ebx) | ||
4313 | *ebx = cpuid->ebx; | ||
4314 | if (edx) | ||
4315 | *edx = cpuid->edx; | ||
4316 | return true; | ||
4317 | } | ||
4318 | |||
4319 | return false; | ||
4320 | } | 4326 | } |
4321 | 4327 | ||
4322 | static struct x86_emulate_ops emulate_ops = { | 4328 | static struct x86_emulate_ops emulate_ops = { |
@@ -5296,8 +5302,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5296 | 5302 | ||
5297 | r = kvm_mmu_reload(vcpu); | 5303 | r = kvm_mmu_reload(vcpu); |
5298 | if (unlikely(r)) { | 5304 | if (unlikely(r)) { |
5299 | kvm_x86_ops->cancel_injection(vcpu); | 5305 | goto cancel_injection; |
5300 | goto out; | ||
5301 | } | 5306 | } |
5302 | 5307 | ||
5303 | preempt_disable(); | 5308 | preempt_disable(); |
@@ -5322,9 +5327,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5322 | smp_wmb(); | 5327 | smp_wmb(); |
5323 | local_irq_enable(); | 5328 | local_irq_enable(); |
5324 | preempt_enable(); | 5329 | preempt_enable(); |
5325 | kvm_x86_ops->cancel_injection(vcpu); | ||
5326 | r = 1; | 5330 | r = 1; |
5327 | goto out; | 5331 | goto cancel_injection; |
5328 | } | 5332 | } |
5329 | 5333 | ||
5330 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 5334 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
@@ -5388,9 +5392,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5388 | if (unlikely(vcpu->arch.tsc_always_catchup)) | 5392 | if (unlikely(vcpu->arch.tsc_always_catchup)) |
5389 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | 5393 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
5390 | 5394 | ||
5391 | kvm_lapic_sync_from_vapic(vcpu); | 5395 | if (vcpu->arch.apic_attention) |
5396 | kvm_lapic_sync_from_vapic(vcpu); | ||
5392 | 5397 | ||
5393 | r = kvm_x86_ops->handle_exit(vcpu); | 5398 | r = kvm_x86_ops->handle_exit(vcpu); |
5399 | return r; | ||
5400 | |||
5401 | cancel_injection: | ||
5402 | kvm_x86_ops->cancel_injection(vcpu); | ||
5403 | if (unlikely(vcpu->arch.apic_attention)) | ||
5404 | kvm_lapic_sync_from_vapic(vcpu); | ||
5394 | out: | 5405 | out: |
5395 | return r; | 5406 | return r; |
5396 | } | 5407 | } |
@@ -6304,7 +6315,7 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, | |||
6304 | 6315 | ||
6305 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | 6316 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { |
6306 | if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) { | 6317 | if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) { |
6307 | vfree(free->arch.lpage_info[i]); | 6318 | kvm_kvfree(free->arch.lpage_info[i]); |
6308 | free->arch.lpage_info[i] = NULL; | 6319 | free->arch.lpage_info[i] = NULL; |
6309 | } | 6320 | } |
6310 | } | 6321 | } |
@@ -6323,7 +6334,7 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
6323 | slot->base_gfn, level) + 1; | 6334 | slot->base_gfn, level) + 1; |
6324 | 6335 | ||
6325 | slot->arch.lpage_info[i] = | 6336 | slot->arch.lpage_info[i] = |
6326 | vzalloc(lpages * sizeof(*slot->arch.lpage_info[i])); | 6337 | kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i])); |
6327 | if (!slot->arch.lpage_info[i]) | 6338 | if (!slot->arch.lpage_info[i]) |
6328 | goto out_free; | 6339 | goto out_free; |
6329 | 6340 | ||
@@ -6350,7 +6361,7 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
6350 | 6361 | ||
6351 | out_free: | 6362 | out_free: |
6352 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | 6363 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { |
6353 | vfree(slot->arch.lpage_info[i]); | 6364 | kvm_kvfree(slot->arch.lpage_info[i]); |
6354 | slot->arch.lpage_info[i] = NULL; | 6365 | slot->arch.lpage_info[i] = NULL; |
6355 | } | 6366 | } |
6356 | return -ENOMEM; | 6367 | return -ENOMEM; |
diff --git a/arch/x86/lib/msr-reg-export.c b/arch/x86/lib/msr-reg-export.c index a311cc59b65d..8d6ef78b5d01 100644 --- a/arch/x86/lib/msr-reg-export.c +++ b/arch/x86/lib/msr-reg-export.c | |||
@@ -1,5 +1,5 @@ | |||
1 | #include <linux/module.h> | 1 | #include <linux/module.h> |
2 | #include <asm/msr.h> | 2 | #include <asm/msr.h> |
3 | 3 | ||
4 | EXPORT_SYMBOL(native_rdmsr_safe_regs); | 4 | EXPORT_SYMBOL(rdmsr_safe_regs); |
5 | EXPORT_SYMBOL(native_wrmsr_safe_regs); | 5 | EXPORT_SYMBOL(wrmsr_safe_regs); |
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index 69fa10623f21..f6d13eefad10 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S | |||
@@ -6,13 +6,13 @@ | |||
6 | 6 | ||
7 | #ifdef CONFIG_X86_64 | 7 | #ifdef CONFIG_X86_64 |
8 | /* | 8 | /* |
9 | * int native_{rdmsr,wrmsr}_safe_regs(u32 gprs[8]); | 9 | * int {rdmsr,wrmsr}_safe_regs(u32 gprs[8]); |
10 | * | 10 | * |
11 | * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi] | 11 | * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi] |
12 | * | 12 | * |
13 | */ | 13 | */ |
14 | .macro op_safe_regs op | 14 | .macro op_safe_regs op |
15 | ENTRY(native_\op\()_safe_regs) | 15 | ENTRY(\op\()_safe_regs) |
16 | CFI_STARTPROC | 16 | CFI_STARTPROC |
17 | pushq_cfi %rbx | 17 | pushq_cfi %rbx |
18 | pushq_cfi %rbp | 18 | pushq_cfi %rbp |
@@ -45,13 +45,13 @@ ENTRY(native_\op\()_safe_regs) | |||
45 | 45 | ||
46 | _ASM_EXTABLE(1b, 3b) | 46 | _ASM_EXTABLE(1b, 3b) |
47 | CFI_ENDPROC | 47 | CFI_ENDPROC |
48 | ENDPROC(native_\op\()_safe_regs) | 48 | ENDPROC(\op\()_safe_regs) |
49 | .endm | 49 | .endm |
50 | 50 | ||
51 | #else /* X86_32 */ | 51 | #else /* X86_32 */ |
52 | 52 | ||
53 | .macro op_safe_regs op | 53 | .macro op_safe_regs op |
54 | ENTRY(native_\op\()_safe_regs) | 54 | ENTRY(\op\()_safe_regs) |
55 | CFI_STARTPROC | 55 | CFI_STARTPROC |
56 | pushl_cfi %ebx | 56 | pushl_cfi %ebx |
57 | pushl_cfi %ebp | 57 | pushl_cfi %ebp |
@@ -92,7 +92,7 @@ ENTRY(native_\op\()_safe_regs) | |||
92 | 92 | ||
93 | _ASM_EXTABLE(1b, 3b) | 93 | _ASM_EXTABLE(1b, 3b) |
94 | CFI_ENDPROC | 94 | CFI_ENDPROC |
95 | ENDPROC(native_\op\()_safe_regs) | 95 | ENDPROC(\op\()_safe_regs) |
96 | .endm | 96 | .endm |
97 | 97 | ||
98 | #endif | 98 | #endif |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index bc4e9d84157f..e0e6990723e9 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -385,7 +385,7 @@ void free_initmem(void) | |||
385 | } | 385 | } |
386 | 386 | ||
387 | #ifdef CONFIG_BLK_DEV_INITRD | 387 | #ifdef CONFIG_BLK_DEV_INITRD |
388 | void free_initrd_mem(unsigned long start, unsigned long end) | 388 | void __init free_initrd_mem(unsigned long start, unsigned long end) |
389 | { | 389 | { |
390 | /* | 390 | /* |
391 | * end could be not aligned, and We can not align that, | 391 | * end could be not aligned, and We can not align that, |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index a718e0d23503..931930a96160 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -919,11 +919,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
919 | 919 | ||
920 | /* | 920 | /* |
921 | * On success we use clflush, when the CPU supports it to | 921 | * On success we use clflush, when the CPU supports it to |
922 | * avoid the wbindv. If the CPU does not support it and in the | 922 | * avoid the wbindv. If the CPU does not support it, in the |
923 | * error case we fall back to cpa_flush_all (which uses | 923 | * error case, and during early boot (for EFI) we fall back |
924 | * wbindv): | 924 | * to cpa_flush_all (which uses wbinvd): |
925 | */ | 925 | */ |
926 | if (!ret && cpu_has_clflush) { | 926 | if (early_boot_irqs_disabled) |
927 | __cpa_flush_all((void *)(long)cache); | ||
928 | else if (!ret && cpu_has_clflush) { | ||
927 | if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { | 929 | if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { |
928 | cpa_flush_array(addr, numpages, cache, | 930 | cpa_flush_array(addr, numpages, cache, |
929 | cpa.flags, pages); | 931 | cpa.flags, pages); |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 5e57e113b72c..613cd83e8c0c 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <asm/cache.h> | 12 | #include <asm/cache.h> |
13 | #include <asm/apic.h> | 13 | #include <asm/apic.h> |
14 | #include <asm/uv/uv.h> | 14 | #include <asm/uv/uv.h> |
15 | #include <linux/debugfs.h> | ||
15 | 16 | ||
16 | DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) | 17 | DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) |
17 | = { &init_mm, 0, }; | 18 | = { &init_mm, 0, }; |
@@ -27,33 +28,14 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) | |||
27 | * | 28 | * |
28 | * More scalable flush, from Andi Kleen | 29 | * More scalable flush, from Andi Kleen |
29 | * | 30 | * |
30 | * To avoid global state use 8 different call vectors. | 31 | * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi |
31 | * Each CPU uses a specific vector to trigger flushes on other | ||
32 | * CPUs. Depending on the received vector the target CPUs look into | ||
33 | * the right array slot for the flush data. | ||
34 | * | ||
35 | * With more than 8 CPUs they are hashed to the 8 available | ||
36 | * vectors. The limited global vector space forces us to this right now. | ||
37 | * In future when interrupts are split into per CPU domains this could be | ||
38 | * fixed, at the cost of triggering multiple IPIs in some cases. | ||
39 | */ | 32 | */ |
40 | 33 | ||
41 | union smp_flush_state { | 34 | struct flush_tlb_info { |
42 | struct { | 35 | struct mm_struct *flush_mm; |
43 | struct mm_struct *flush_mm; | 36 | unsigned long flush_start; |
44 | unsigned long flush_va; | 37 | unsigned long flush_end; |
45 | raw_spinlock_t tlbstate_lock; | 38 | }; |
46 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); | ||
47 | }; | ||
48 | char pad[INTERNODE_CACHE_BYTES]; | ||
49 | } ____cacheline_internodealigned_in_smp; | ||
50 | |||
51 | /* State is put into the per CPU data section, but padded | ||
52 | to a full cache line because other CPUs can access it and we don't | ||
53 | want false sharing in the per cpu data segment. */ | ||
54 | static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; | ||
55 | |||
56 | static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset); | ||
57 | 39 | ||
58 | /* | 40 | /* |
59 | * We cannot call mmdrop() because we are in interrupt context, | 41 | * We cannot call mmdrop() because we are in interrupt context, |
@@ -72,28 +54,25 @@ void leave_mm(int cpu) | |||
72 | EXPORT_SYMBOL_GPL(leave_mm); | 54 | EXPORT_SYMBOL_GPL(leave_mm); |
73 | 55 | ||
74 | /* | 56 | /* |
75 | * | ||
76 | * The flush IPI assumes that a thread switch happens in this order: | 57 | * The flush IPI assumes that a thread switch happens in this order: |
77 | * [cpu0: the cpu that switches] | 58 | * [cpu0: the cpu that switches] |
78 | * 1) switch_mm() either 1a) or 1b) | 59 | * 1) switch_mm() either 1a) or 1b) |
79 | * 1a) thread switch to a different mm | 60 | * 1a) thread switch to a different mm |
80 | * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); | 61 | * 1a1) set cpu_tlbstate to TLBSTATE_OK |
81 | * Stop ipi delivery for the old mm. This is not synchronized with | 62 | * Now the tlb flush NMI handler flush_tlb_func won't call leave_mm |
82 | * the other cpus, but smp_invalidate_interrupt ignore flush ipis | 63 | * if cpu0 was in lazy tlb mode. |
83 | * for the wrong mm, and in the worst case we perform a superfluous | 64 | * 1a2) update cpu active_mm |
84 | * tlb flush. | ||
85 | * 1a2) set cpu mmu_state to TLBSTATE_OK | ||
86 | * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 | ||
87 | * was in lazy tlb mode. | ||
88 | * 1a3) update cpu active_mm | ||
89 | * Now cpu0 accepts tlb flushes for the new mm. | 65 | * Now cpu0 accepts tlb flushes for the new mm. |
90 | * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); | 66 | * 1a3) cpu_set(cpu, new_mm->cpu_vm_mask); |
91 | * Now the other cpus will send tlb flush ipis. | 67 | * Now the other cpus will send tlb flush ipis. |
92 | * 1a4) change cr3. | 68 | * 1a4) change cr3. |
69 | * 1a5) cpu_clear(cpu, old_mm->cpu_vm_mask); | ||
70 | * Stop ipi delivery for the old mm. This is not synchronized with | ||
71 | * the other cpus, but flush_tlb_func ignore flush ipis for the wrong | ||
72 | * mm, and in the worst case we perform a superfluous tlb flush. | ||
93 | * 1b) thread switch without mm change | 73 | * 1b) thread switch without mm change |
94 | * cpu active_mm is correct, cpu0 already handles | 74 | * cpu active_mm is correct, cpu0 already handles flush ipis. |
95 | * flush ipis. | 75 | * 1b1) set cpu_tlbstate to TLBSTATE_OK |
96 | * 1b1) set cpu mmu_state to TLBSTATE_OK | ||
97 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. | 76 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. |
98 | * Atomically set the bit [other cpus will start sending flush ipis], | 77 | * Atomically set the bit [other cpus will start sending flush ipis], |
99 | * and test the bit. | 78 | * and test the bit. |
@@ -106,174 +85,62 @@ EXPORT_SYMBOL_GPL(leave_mm); | |||
106 | * runs in kernel space, the cpu could load tlb entries for user space | 85 | * runs in kernel space, the cpu could load tlb entries for user space |
107 | * pages. | 86 | * pages. |
108 | * | 87 | * |
109 | * The good news is that cpu mmu_state is local to each cpu, no | 88 | * The good news is that cpu_tlbstate is local to each cpu, no |
110 | * write/read ordering problems. | 89 | * write/read ordering problems. |
111 | */ | 90 | */ |
112 | 91 | ||
113 | /* | 92 | /* |
114 | * TLB flush IPI: | 93 | * TLB flush funcation: |
115 | * | ||
116 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. | 94 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. |
117 | * 2) Leave the mm if we are in the lazy tlb mode. | 95 | * 2) Leave the mm if we are in the lazy tlb mode. |
118 | * | ||
119 | * Interrupts are disabled. | ||
120 | */ | ||
121 | |||
122 | /* | ||
123 | * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop | ||
124 | * but still used for documentation purpose but the usage is slightly | ||
125 | * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt | ||
126 | * entry calls in with the first parameter in %eax. Maybe define | ||
127 | * intrlinkage? | ||
128 | */ | 96 | */ |
129 | #ifdef CONFIG_X86_64 | 97 | static void flush_tlb_func(void *info) |
130 | asmlinkage | ||
131 | #endif | ||
132 | void smp_invalidate_interrupt(struct pt_regs *regs) | ||
133 | { | 98 | { |
134 | unsigned int cpu; | 99 | struct flush_tlb_info *f = info; |
135 | unsigned int sender; | ||
136 | union smp_flush_state *f; | ||
137 | |||
138 | cpu = smp_processor_id(); | ||
139 | /* | ||
140 | * orig_rax contains the negated interrupt vector. | ||
141 | * Use that to determine where the sender put the data. | ||
142 | */ | ||
143 | sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; | ||
144 | f = &flush_state[sender]; | ||
145 | |||
146 | if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) | ||
147 | goto out; | ||
148 | /* | ||
149 | * This was a BUG() but until someone can quote me the | ||
150 | * line from the intel manual that guarantees an IPI to | ||
151 | * multiple CPUs is retried _only_ on the erroring CPUs | ||
152 | * its staying as a return | ||
153 | * | ||
154 | * BUG(); | ||
155 | */ | ||
156 | |||
157 | if (f->flush_mm == this_cpu_read(cpu_tlbstate.active_mm)) { | ||
158 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { | ||
159 | if (f->flush_va == TLB_FLUSH_ALL) | ||
160 | local_flush_tlb(); | ||
161 | else | ||
162 | __flush_tlb_one(f->flush_va); | ||
163 | } else | ||
164 | leave_mm(cpu); | ||
165 | } | ||
166 | out: | ||
167 | ack_APIC_irq(); | ||
168 | smp_mb__before_clear_bit(); | ||
169 | cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); | ||
170 | smp_mb__after_clear_bit(); | ||
171 | inc_irq_stat(irq_tlb_count); | ||
172 | } | ||
173 | 100 | ||
174 | static void flush_tlb_others_ipi(const struct cpumask *cpumask, | 101 | if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) |
175 | struct mm_struct *mm, unsigned long va) | 102 | return; |
176 | { | 103 | |
177 | unsigned int sender; | 104 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { |
178 | union smp_flush_state *f; | 105 | if (f->flush_end == TLB_FLUSH_ALL || !cpu_has_invlpg) |
179 | 106 | local_flush_tlb(); | |
180 | /* Caller has disabled preemption */ | 107 | else if (!f->flush_end) |
181 | sender = this_cpu_read(tlb_vector_offset); | 108 | __flush_tlb_single(f->flush_start); |
182 | f = &flush_state[sender]; | 109 | else { |
183 | 110 | unsigned long addr; | |
184 | if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS) | 111 | addr = f->flush_start; |
185 | raw_spin_lock(&f->tlbstate_lock); | 112 | while (addr < f->flush_end) { |
186 | 113 | __flush_tlb_single(addr); | |
187 | f->flush_mm = mm; | 114 | addr += PAGE_SIZE; |
188 | f->flush_va = va; | 115 | } |
189 | if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) { | 116 | } |
190 | /* | 117 | } else |
191 | * We have to send the IPI only to | 118 | leave_mm(smp_processor_id()); |
192 | * CPUs affected. | ||
193 | */ | ||
194 | apic->send_IPI_mask(to_cpumask(f->flush_cpumask), | ||
195 | INVALIDATE_TLB_VECTOR_START + sender); | ||
196 | |||
197 | while (!cpumask_empty(to_cpumask(f->flush_cpumask))) | ||
198 | cpu_relax(); | ||
199 | } | ||
200 | 119 | ||
201 | f->flush_mm = NULL; | ||
202 | f->flush_va = 0; | ||
203 | if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS) | ||
204 | raw_spin_unlock(&f->tlbstate_lock); | ||
205 | } | 120 | } |
206 | 121 | ||
207 | void native_flush_tlb_others(const struct cpumask *cpumask, | 122 | void native_flush_tlb_others(const struct cpumask *cpumask, |
208 | struct mm_struct *mm, unsigned long va) | 123 | struct mm_struct *mm, unsigned long start, |
124 | unsigned long end) | ||
209 | { | 125 | { |
126 | struct flush_tlb_info info; | ||
127 | info.flush_mm = mm; | ||
128 | info.flush_start = start; | ||
129 | info.flush_end = end; | ||
130 | |||
210 | if (is_uv_system()) { | 131 | if (is_uv_system()) { |
211 | unsigned int cpu; | 132 | unsigned int cpu; |
212 | 133 | ||
213 | cpu = smp_processor_id(); | 134 | cpu = smp_processor_id(); |
214 | cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); | 135 | cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu); |
215 | if (cpumask) | 136 | if (cpumask) |
216 | flush_tlb_others_ipi(cpumask, mm, va); | 137 | smp_call_function_many(cpumask, flush_tlb_func, |
138 | &info, 1); | ||
217 | return; | 139 | return; |
218 | } | 140 | } |
219 | flush_tlb_others_ipi(cpumask, mm, va); | 141 | smp_call_function_many(cpumask, flush_tlb_func, &info, 1); |
220 | } | 142 | } |
221 | 143 | ||
222 | static void __cpuinit calculate_tlb_offset(void) | ||
223 | { | ||
224 | int cpu, node, nr_node_vecs, idx = 0; | ||
225 | /* | ||
226 | * we are changing tlb_vector_offset for each CPU in runtime, but this | ||
227 | * will not cause inconsistency, as the write is atomic under X86. we | ||
228 | * might see more lock contentions in a short time, but after all CPU's | ||
229 | * tlb_vector_offset are changed, everything should go normal | ||
230 | * | ||
231 | * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might | ||
232 | * waste some vectors. | ||
233 | **/ | ||
234 | if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS) | ||
235 | nr_node_vecs = 1; | ||
236 | else | ||
237 | nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; | ||
238 | |||
239 | for_each_online_node(node) { | ||
240 | int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) * | ||
241 | nr_node_vecs; | ||
242 | int cpu_offset = 0; | ||
243 | for_each_cpu(cpu, cpumask_of_node(node)) { | ||
244 | per_cpu(tlb_vector_offset, cpu) = node_offset + | ||
245 | cpu_offset; | ||
246 | cpu_offset++; | ||
247 | cpu_offset = cpu_offset % nr_node_vecs; | ||
248 | } | ||
249 | idx++; | ||
250 | } | ||
251 | } | ||
252 | |||
253 | static int __cpuinit tlb_cpuhp_notify(struct notifier_block *n, | ||
254 | unsigned long action, void *hcpu) | ||
255 | { | ||
256 | switch (action & 0xf) { | ||
257 | case CPU_ONLINE: | ||
258 | case CPU_DEAD: | ||
259 | calculate_tlb_offset(); | ||
260 | } | ||
261 | return NOTIFY_OK; | ||
262 | } | ||
263 | |||
264 | static int __cpuinit init_smp_flush(void) | ||
265 | { | ||
266 | int i; | ||
267 | |||
268 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) | ||
269 | raw_spin_lock_init(&flush_state[i].tlbstate_lock); | ||
270 | |||
271 | calculate_tlb_offset(); | ||
272 | hotcpu_notifier(tlb_cpuhp_notify, 0); | ||
273 | return 0; | ||
274 | } | ||
275 | core_initcall(init_smp_flush); | ||
276 | |||
277 | void flush_tlb_current_task(void) | 144 | void flush_tlb_current_task(void) |
278 | { | 145 | { |
279 | struct mm_struct *mm = current->mm; | 146 | struct mm_struct *mm = current->mm; |
@@ -282,27 +149,91 @@ void flush_tlb_current_task(void) | |||
282 | 149 | ||
283 | local_flush_tlb(); | 150 | local_flush_tlb(); |
284 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) | 151 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
285 | flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); | 152 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); |
286 | preempt_enable(); | 153 | preempt_enable(); |
287 | } | 154 | } |
288 | 155 | ||
289 | void flush_tlb_mm(struct mm_struct *mm) | 156 | /* |
157 | * It can find out the THP large page, or | ||
158 | * HUGETLB page in tlb_flush when THP disabled | ||
159 | */ | ||
160 | static inline unsigned long has_large_page(struct mm_struct *mm, | ||
161 | unsigned long start, unsigned long end) | ||
162 | { | ||
163 | pgd_t *pgd; | ||
164 | pud_t *pud; | ||
165 | pmd_t *pmd; | ||
166 | unsigned long addr = ALIGN(start, HPAGE_SIZE); | ||
167 | for (; addr < end; addr += HPAGE_SIZE) { | ||
168 | pgd = pgd_offset(mm, addr); | ||
169 | if (likely(!pgd_none(*pgd))) { | ||
170 | pud = pud_offset(pgd, addr); | ||
171 | if (likely(!pud_none(*pud))) { | ||
172 | pmd = pmd_offset(pud, addr); | ||
173 | if (likely(!pmd_none(*pmd))) | ||
174 | if (pmd_large(*pmd)) | ||
175 | return addr; | ||
176 | } | ||
177 | } | ||
178 | } | ||
179 | return 0; | ||
180 | } | ||
181 | |||
182 | void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | ||
183 | unsigned long end, unsigned long vmflag) | ||
290 | { | 184 | { |
185 | unsigned long addr; | ||
186 | unsigned act_entries, tlb_entries = 0; | ||
187 | |||
291 | preempt_disable(); | 188 | preempt_disable(); |
189 | if (current->active_mm != mm) | ||
190 | goto flush_all; | ||
292 | 191 | ||
293 | if (current->active_mm == mm) { | 192 | if (!current->mm) { |
294 | if (current->mm) | 193 | leave_mm(smp_processor_id()); |
194 | goto flush_all; | ||
195 | } | ||
196 | |||
197 | if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 | ||
198 | || vmflag == VM_HUGETLB) { | ||
199 | local_flush_tlb(); | ||
200 | goto flush_all; | ||
201 | } | ||
202 | |||
203 | /* In modern CPU, last level tlb used for both data/ins */ | ||
204 | if (vmflag & VM_EXEC) | ||
205 | tlb_entries = tlb_lli_4k[ENTRIES]; | ||
206 | else | ||
207 | tlb_entries = tlb_lld_4k[ENTRIES]; | ||
208 | /* Assume all of TLB entries was occupied by this task */ | ||
209 | act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm; | ||
210 | |||
211 | /* tlb_flushall_shift is on balance point, details in commit log */ | ||
212 | if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) | ||
213 | local_flush_tlb(); | ||
214 | else { | ||
215 | if (has_large_page(mm, start, end)) { | ||
295 | local_flush_tlb(); | 216 | local_flush_tlb(); |
296 | else | 217 | goto flush_all; |
297 | leave_mm(smp_processor_id()); | 218 | } |
219 | /* flush range by one by one 'invlpg' */ | ||
220 | for (addr = start; addr < end; addr += PAGE_SIZE) | ||
221 | __flush_tlb_single(addr); | ||
222 | |||
223 | if (cpumask_any_but(mm_cpumask(mm), | ||
224 | smp_processor_id()) < nr_cpu_ids) | ||
225 | flush_tlb_others(mm_cpumask(mm), mm, start, end); | ||
226 | preempt_enable(); | ||
227 | return; | ||
298 | } | 228 | } |
299 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) | ||
300 | flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); | ||
301 | 229 | ||
230 | flush_all: | ||
231 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) | ||
232 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); | ||
302 | preempt_enable(); | 233 | preempt_enable(); |
303 | } | 234 | } |
304 | 235 | ||
305 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) | 236 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) |
306 | { | 237 | { |
307 | struct mm_struct *mm = vma->vm_mm; | 238 | struct mm_struct *mm = vma->vm_mm; |
308 | 239 | ||
@@ -310,13 +241,13 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) | |||
310 | 241 | ||
311 | if (current->active_mm == mm) { | 242 | if (current->active_mm == mm) { |
312 | if (current->mm) | 243 | if (current->mm) |
313 | __flush_tlb_one(va); | 244 | __flush_tlb_one(start); |
314 | else | 245 | else |
315 | leave_mm(smp_processor_id()); | 246 | leave_mm(smp_processor_id()); |
316 | } | 247 | } |
317 | 248 | ||
318 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) | 249 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
319 | flush_tlb_others(mm_cpumask(mm), mm, va); | 250 | flush_tlb_others(mm_cpumask(mm), mm, start, 0UL); |
320 | 251 | ||
321 | preempt_enable(); | 252 | preempt_enable(); |
322 | } | 253 | } |
@@ -332,3 +263,83 @@ void flush_tlb_all(void) | |||
332 | { | 263 | { |
333 | on_each_cpu(do_flush_tlb_all, NULL, 1); | 264 | on_each_cpu(do_flush_tlb_all, NULL, 1); |
334 | } | 265 | } |
266 | |||
267 | static void do_kernel_range_flush(void *info) | ||
268 | { | ||
269 | struct flush_tlb_info *f = info; | ||
270 | unsigned long addr; | ||
271 | |||
272 | /* flush range by one by one 'invlpg' */ | ||
273 | for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE) | ||
274 | __flush_tlb_single(addr); | ||
275 | } | ||
276 | |||
277 | void flush_tlb_kernel_range(unsigned long start, unsigned long end) | ||
278 | { | ||
279 | unsigned act_entries; | ||
280 | struct flush_tlb_info info; | ||
281 | |||
282 | /* In modern CPU, last level tlb used for both data/ins */ | ||
283 | act_entries = tlb_lld_4k[ENTRIES]; | ||
284 | |||
285 | /* Balance as user space task's flush, a bit conservative */ | ||
286 | if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 || | ||
287 | (end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) | ||
288 | |||
289 | on_each_cpu(do_flush_tlb_all, NULL, 1); | ||
290 | else { | ||
291 | info.flush_start = start; | ||
292 | info.flush_end = end; | ||
293 | on_each_cpu(do_kernel_range_flush, &info, 1); | ||
294 | } | ||
295 | } | ||
296 | |||
297 | #ifdef CONFIG_DEBUG_TLBFLUSH | ||
298 | static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf, | ||
299 | size_t count, loff_t *ppos) | ||
300 | { | ||
301 | char buf[32]; | ||
302 | unsigned int len; | ||
303 | |||
304 | len = sprintf(buf, "%hd\n", tlb_flushall_shift); | ||
305 | return simple_read_from_buffer(user_buf, count, ppos, buf, len); | ||
306 | } | ||
307 | |||
308 | static ssize_t tlbflush_write_file(struct file *file, | ||
309 | const char __user *user_buf, size_t count, loff_t *ppos) | ||
310 | { | ||
311 | char buf[32]; | ||
312 | ssize_t len; | ||
313 | s8 shift; | ||
314 | |||
315 | len = min(count, sizeof(buf) - 1); | ||
316 | if (copy_from_user(buf, user_buf, len)) | ||
317 | return -EFAULT; | ||
318 | |||
319 | buf[len] = '\0'; | ||
320 | if (kstrtos8(buf, 0, &shift)) | ||
321 | return -EINVAL; | ||
322 | |||
323 | if (shift > 64) | ||
324 | return -EINVAL; | ||
325 | |||
326 | tlb_flushall_shift = shift; | ||
327 | return count; | ||
328 | } | ||
329 | |||
330 | static const struct file_operations fops_tlbflush = { | ||
331 | .read = tlbflush_read_file, | ||
332 | .write = tlbflush_write_file, | ||
333 | .llseek = default_llseek, | ||
334 | }; | ||
335 | |||
336 | static int __cpuinit create_tlb_flushall_shift(void) | ||
337 | { | ||
338 | if (cpu_has_invlpg) { | ||
339 | debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR, | ||
340 | arch_debugfs_dir, NULL, &fops_tlbflush); | ||
341 | } | ||
342 | return 0; | ||
343 | } | ||
344 | late_initcall(create_tlb_flushall_shift); | ||
345 | #endif | ||
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 0597f95b6da6..33643a8bcbbb 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c | |||
@@ -309,6 +309,10 @@ void bpf_jit_compile(struct sk_filter *fp) | |||
309 | else | 309 | else |
310 | EMIT1_off32(0x0d, K); /* or imm32,%eax */ | 310 | EMIT1_off32(0x0d, K); /* or imm32,%eax */ |
311 | break; | 311 | break; |
312 | case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ | ||
313 | seen |= SEEN_XREG; | ||
314 | EMIT2(0x31, 0xd8); /* xor %ebx,%eax */ | ||
315 | break; | ||
312 | case BPF_S_ALU_LSH_X: /* A <<= X; */ | 316 | case BPF_S_ALU_LSH_X: /* A <<= X; */ |
313 | seen |= SEEN_XREG; | 317 | seen |= SEEN_XREG; |
314 | EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */ | 318 | EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */ |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 303f08637826..b2b94438ff05 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -312,7 +312,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs) | |||
312 | goto fail; | 312 | goto fail; |
313 | } | 313 | } |
314 | /* both registers must be reserved */ | 314 | /* both registers must be reserved */ |
315 | if (num_counters == AMD64_NUM_COUNTERS_F15H) { | 315 | if (num_counters == AMD64_NUM_COUNTERS_CORE) { |
316 | msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1); | 316 | msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1); |
317 | msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1); | 317 | msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1); |
318 | } else { | 318 | } else { |
@@ -514,7 +514,7 @@ static int op_amd_init(struct oprofile_operations *ops) | |||
514 | ops->create_files = setup_ibs_files; | 514 | ops->create_files = setup_ibs_files; |
515 | 515 | ||
516 | if (boot_cpu_data.x86 == 0x15) { | 516 | if (boot_cpu_data.x86 == 0x15) { |
517 | num_counters = AMD64_NUM_COUNTERS_F15H; | 517 | num_counters = AMD64_NUM_COUNTERS_CORE; |
518 | } else { | 518 | } else { |
519 | num_counters = AMD64_NUM_COUNTERS; | 519 | num_counters = AMD64_NUM_COUNTERS; |
520 | } | 520 | } |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index fc09c2754e08..505acdd6d600 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -12,8 +12,13 @@ struct pci_root_info { | |||
12 | char name[16]; | 12 | char name[16]; |
13 | unsigned int res_num; | 13 | unsigned int res_num; |
14 | struct resource *res; | 14 | struct resource *res; |
15 | int busnum; | ||
16 | struct pci_sysdata sd; | 15 | struct pci_sysdata sd; |
16 | #ifdef CONFIG_PCI_MMCONFIG | ||
17 | bool mcfg_added; | ||
18 | u16 segment; | ||
19 | u8 start_bus; | ||
20 | u8 end_bus; | ||
21 | #endif | ||
17 | }; | 22 | }; |
18 | 23 | ||
19 | static bool pci_use_crs = true; | 24 | static bool pci_use_crs = true; |
@@ -120,6 +125,81 @@ void __init pci_acpi_crs_quirks(void) | |||
120 | pci_use_crs ? "nocrs" : "use_crs"); | 125 | pci_use_crs ? "nocrs" : "use_crs"); |
121 | } | 126 | } |
122 | 127 | ||
128 | #ifdef CONFIG_PCI_MMCONFIG | ||
129 | static int __devinit check_segment(u16 seg, struct device *dev, char *estr) | ||
130 | { | ||
131 | if (seg) { | ||
132 | dev_err(dev, | ||
133 | "%s can't access PCI configuration " | ||
134 | "space under this host bridge.\n", | ||
135 | estr); | ||
136 | return -EIO; | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * Failure in adding MMCFG information is not fatal, | ||
141 | * just can't access extended configuration space of | ||
142 | * devices under this host bridge. | ||
143 | */ | ||
144 | dev_warn(dev, | ||
145 | "%s can't access extended PCI configuration " | ||
146 | "space under this bridge.\n", | ||
147 | estr); | ||
148 | |||
149 | return 0; | ||
150 | } | ||
151 | |||
152 | static int __devinit setup_mcfg_map(struct pci_root_info *info, | ||
153 | u16 seg, u8 start, u8 end, | ||
154 | phys_addr_t addr) | ||
155 | { | ||
156 | int result; | ||
157 | struct device *dev = &info->bridge->dev; | ||
158 | |||
159 | info->start_bus = start; | ||
160 | info->end_bus = end; | ||
161 | info->mcfg_added = false; | ||
162 | |||
163 | /* return success if MMCFG is not in use */ | ||
164 | if (raw_pci_ext_ops && raw_pci_ext_ops != &pci_mmcfg) | ||
165 | return 0; | ||
166 | |||
167 | if (!(pci_probe & PCI_PROBE_MMCONF)) | ||
168 | return check_segment(seg, dev, "MMCONFIG is disabled,"); | ||
169 | |||
170 | result = pci_mmconfig_insert(dev, seg, start, end, addr); | ||
171 | if (result == 0) { | ||
172 | /* enable MMCFG if it hasn't been enabled yet */ | ||
173 | if (raw_pci_ext_ops == NULL) | ||
174 | raw_pci_ext_ops = &pci_mmcfg; | ||
175 | info->mcfg_added = true; | ||
176 | } else if (result != -EEXIST) | ||
177 | return check_segment(seg, dev, | ||
178 | "fail to add MMCONFIG information,"); | ||
179 | |||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | static void teardown_mcfg_map(struct pci_root_info *info) | ||
184 | { | ||
185 | if (info->mcfg_added) { | ||
186 | pci_mmconfig_delete(info->segment, info->start_bus, | ||
187 | info->end_bus); | ||
188 | info->mcfg_added = false; | ||
189 | } | ||
190 | } | ||
191 | #else | ||
192 | static int __devinit setup_mcfg_map(struct pci_root_info *info, | ||
193 | u16 seg, u8 start, u8 end, | ||
194 | phys_addr_t addr) | ||
195 | { | ||
196 | return 0; | ||
197 | } | ||
198 | static void teardown_mcfg_map(struct pci_root_info *info) | ||
199 | { | ||
200 | } | ||
201 | #endif | ||
202 | |||
123 | static acpi_status | 203 | static acpi_status |
124 | resource_to_addr(struct acpi_resource *resource, | 204 | resource_to_addr(struct acpi_resource *resource, |
125 | struct acpi_resource_address64 *addr) | 205 | struct acpi_resource_address64 *addr) |
@@ -234,13 +314,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
234 | } | 314 | } |
235 | 315 | ||
236 | info->res_num++; | 316 | info->res_num++; |
237 | if (addr.translation_offset) | ||
238 | dev_info(&info->bridge->dev, "host bridge window %pR " | ||
239 | "(PCI address [%#llx-%#llx])\n", | ||
240 | res, res->start - addr.translation_offset, | ||
241 | res->end - addr.translation_offset); | ||
242 | else | ||
243 | dev_info(&info->bridge->dev, "host bridge window %pR\n", res); | ||
244 | 317 | ||
245 | return AE_OK; | 318 | return AE_OK; |
246 | } | 319 | } |
@@ -332,8 +405,11 @@ static void __release_pci_root_info(struct pci_root_info *info) | |||
332 | 405 | ||
333 | free_pci_root_info_res(info); | 406 | free_pci_root_info_res(info); |
334 | 407 | ||
408 | teardown_mcfg_map(info); | ||
409 | |||
335 | kfree(info); | 410 | kfree(info); |
336 | } | 411 | } |
412 | |||
337 | static void release_pci_root_info(struct pci_host_bridge *bridge) | 413 | static void release_pci_root_info(struct pci_host_bridge *bridge) |
338 | { | 414 | { |
339 | struct pci_root_info *info = bridge->release_data; | 415 | struct pci_root_info *info = bridge->release_data; |
@@ -347,7 +423,9 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, | |||
347 | { | 423 | { |
348 | size_t size; | 424 | size_t size; |
349 | 425 | ||
426 | sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum); | ||
350 | info->bridge = device; | 427 | info->bridge = device; |
428 | |||
351 | info->res_num = 0; | 429 | info->res_num = 0; |
352 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, | 430 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, |
353 | info); | 431 | info); |
@@ -360,8 +438,6 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, | |||
360 | if (!info->res) | 438 | if (!info->res) |
361 | return; | 439 | return; |
362 | 440 | ||
363 | sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum); | ||
364 | |||
365 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, | 441 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, |
366 | info); | 442 | info); |
367 | } | 443 | } |
@@ -373,7 +449,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) | |||
373 | int domain = root->segment; | 449 | int domain = root->segment; |
374 | int busnum = root->secondary.start; | 450 | int busnum = root->secondary.start; |
375 | LIST_HEAD(resources); | 451 | LIST_HEAD(resources); |
376 | struct pci_bus *bus; | 452 | struct pci_bus *bus = NULL; |
377 | struct pci_sysdata *sd; | 453 | struct pci_sysdata *sd; |
378 | int node; | 454 | int node; |
379 | #ifdef CONFIG_ACPI_NUMA | 455 | #ifdef CONFIG_ACPI_NUMA |
@@ -426,6 +502,8 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) | |||
426 | } else { | 502 | } else { |
427 | probe_pci_root_info(info, device, busnum, domain); | 503 | probe_pci_root_info(info, device, busnum, domain); |
428 | 504 | ||
505 | /* insert busn res at first */ | ||
506 | pci_add_resource(&resources, &root->secondary); | ||
429 | /* | 507 | /* |
430 | * _CRS with no apertures is normal, so only fall back to | 508 | * _CRS with no apertures is normal, so only fall back to |
431 | * defaults or native bridge info if we're ignoring _CRS. | 509 | * defaults or native bridge info if we're ignoring _CRS. |
@@ -437,10 +515,13 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) | |||
437 | x86_pci_root_bus_resources(busnum, &resources); | 515 | x86_pci_root_bus_resources(busnum, &resources); |
438 | } | 516 | } |
439 | 517 | ||
440 | bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd, | 518 | if (!setup_mcfg_map(info, domain, (u8)root->secondary.start, |
441 | &resources); | 519 | (u8)root->secondary.end, root->mcfg_addr)) |
520 | bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, | ||
521 | sd, &resources); | ||
522 | |||
442 | if (bus) { | 523 | if (bus) { |
443 | bus->subordinate = pci_scan_child_bus(bus); | 524 | pci_scan_child_bus(bus); |
444 | pci_set_host_bridge_release( | 525 | pci_set_host_bridge_release( |
445 | to_pci_host_bridge(bus->bridge), | 526 | to_pci_host_bridge(bus->bridge), |
446 | release_pci_root_info, info); | 527 | release_pci_root_info, info); |
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 5aed49bff058..e9e6ed5cdf94 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -121,7 +121,6 @@ static int __init early_fill_mp_bus_info(void) | |||
121 | link = (reg >> 8) & 0x03; | 121 | link = (reg >> 8) & 0x03; |
122 | 122 | ||
123 | info = alloc_pci_root_info(min_bus, max_bus, node, link); | 123 | info = alloc_pci_root_info(min_bus, max_bus, node, link); |
124 | sprintf(info->name, "PCI Bus #%02x", min_bus); | ||
125 | } | 124 | } |
126 | 125 | ||
127 | /* get the default node and link for left over res */ | 126 | /* get the default node and link for left over res */ |
@@ -300,9 +299,9 @@ static int __init early_fill_mp_bus_info(void) | |||
300 | int busnum; | 299 | int busnum; |
301 | struct pci_root_res *root_res; | 300 | struct pci_root_res *root_res; |
302 | 301 | ||
303 | busnum = info->bus_min; | 302 | busnum = info->busn.start; |
304 | printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n", | 303 | printk(KERN_DEBUG "bus: %pR on node %x link %x\n", |
305 | info->bus_min, info->bus_max, info->node, info->link); | 304 | &info->busn, info->node, info->link); |
306 | list_for_each_entry(root_res, &info->resources, list) | 305 | list_for_each_entry(root_res, &info->resources, list) |
307 | printk(KERN_DEBUG "bus: %02x %pR\n", | 306 | printk(KERN_DEBUG "bus: %02x %pR\n", |
308 | busnum, &root_res->res); | 307 | busnum, &root_res->res); |
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index 306579f7d0fd..d37e2fec97e5 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c | |||
@@ -14,7 +14,7 @@ static struct pci_root_info *x86_find_pci_root_info(int bus) | |||
14 | return NULL; | 14 | return NULL; |
15 | 15 | ||
16 | list_for_each_entry(info, &pci_root_infos, list) | 16 | list_for_each_entry(info, &pci_root_infos, list) |
17 | if (info->bus_min == bus) | 17 | if (info->busn.start == bus) |
18 | return info; | 18 | return info; |
19 | 19 | ||
20 | return NULL; | 20 | return NULL; |
@@ -24,6 +24,8 @@ void x86_pci_root_bus_resources(int bus, struct list_head *resources) | |||
24 | { | 24 | { |
25 | struct pci_root_info *info = x86_find_pci_root_info(bus); | 25 | struct pci_root_info *info = x86_find_pci_root_info(bus); |
26 | struct pci_root_res *root_res; | 26 | struct pci_root_res *root_res; |
27 | struct pci_host_bridge_window *window; | ||
28 | bool found = false; | ||
27 | 29 | ||
28 | if (!info) | 30 | if (!info) |
29 | goto default_resources; | 31 | goto default_resources; |
@@ -31,6 +33,16 @@ void x86_pci_root_bus_resources(int bus, struct list_head *resources) | |||
31 | printk(KERN_DEBUG "PCI: root bus %02x: hardware-probed resources\n", | 33 | printk(KERN_DEBUG "PCI: root bus %02x: hardware-probed resources\n", |
32 | bus); | 34 | bus); |
33 | 35 | ||
36 | /* already added by acpi ? */ | ||
37 | list_for_each_entry(window, resources, list) | ||
38 | if (window->res->flags & IORESOURCE_BUS) { | ||
39 | found = true; | ||
40 | break; | ||
41 | } | ||
42 | |||
43 | if (!found) | ||
44 | pci_add_resource(resources, &info->busn); | ||
45 | |||
34 | list_for_each_entry(root_res, &info->resources, list) { | 46 | list_for_each_entry(root_res, &info->resources, list) { |
35 | struct resource *res; | 47 | struct resource *res; |
36 | struct resource *root; | 48 | struct resource *root; |
@@ -66,9 +78,13 @@ struct pci_root_info __init *alloc_pci_root_info(int bus_min, int bus_max, | |||
66 | if (!info) | 78 | if (!info) |
67 | return info; | 79 | return info; |
68 | 80 | ||
81 | sprintf(info->name, "PCI Bus #%02x", bus_min); | ||
82 | |||
69 | INIT_LIST_HEAD(&info->resources); | 83 | INIT_LIST_HEAD(&info->resources); |
70 | info->bus_min = bus_min; | 84 | info->busn.name = info->name; |
71 | info->bus_max = bus_max; | 85 | info->busn.start = bus_min; |
86 | info->busn.end = bus_max; | ||
87 | info->busn.flags = IORESOURCE_BUS; | ||
72 | info->node = node; | 88 | info->node = node; |
73 | info->link = link; | 89 | info->link = link; |
74 | 90 | ||
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h index 226a466b2b2b..ff8f65b04574 100644 --- a/arch/x86/pci/bus_numa.h +++ b/arch/x86/pci/bus_numa.h | |||
@@ -13,8 +13,7 @@ struct pci_root_info { | |||
13 | struct list_head list; | 13 | struct list_head list; |
14 | char name[12]; | 14 | char name[12]; |
15 | struct list_head resources; | 15 | struct list_head resources; |
16 | int bus_min; | 16 | struct resource busn; |
17 | int bus_max; | ||
18 | int node; | 17 | int node; |
19 | int link; | 18 | int link; |
20 | }; | 19 | }; |
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 0ad990a20d4a..720e973fc34a 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -494,7 +494,7 @@ int __init pcibios_init(void) | |||
494 | return 0; | 494 | return 0; |
495 | } | 495 | } |
496 | 496 | ||
497 | char * __devinit pcibios_setup(char *str) | 497 | char * __init pcibios_setup(char *str) |
498 | { | 498 | { |
499 | if (!strcmp(str, "off")) { | 499 | if (!strcmp(str, "off")) { |
500 | pci_probe = 0; | 500 | pci_probe = 0; |
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 301e325992f6..937bcece7006 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -17,6 +17,8 @@ | |||
17 | #include <linux/bitmap.h> | 17 | #include <linux/bitmap.h> |
18 | #include <linux/dmi.h> | 18 | #include <linux/dmi.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/mutex.h> | ||
21 | #include <linux/rculist.h> | ||
20 | #include <asm/e820.h> | 22 | #include <asm/e820.h> |
21 | #include <asm/pci_x86.h> | 23 | #include <asm/pci_x86.h> |
22 | #include <asm/acpi.h> | 24 | #include <asm/acpi.h> |
@@ -24,7 +26,9 @@ | |||
24 | #define PREFIX "PCI: " | 26 | #define PREFIX "PCI: " |
25 | 27 | ||
26 | /* Indicate if the mmcfg resources have been placed into the resource table. */ | 28 | /* Indicate if the mmcfg resources have been placed into the resource table. */ |
27 | static int __initdata pci_mmcfg_resources_inserted; | 29 | static bool pci_mmcfg_running_state; |
30 | static bool pci_mmcfg_arch_init_failed; | ||
31 | static DEFINE_MUTEX(pci_mmcfg_lock); | ||
28 | 32 | ||
29 | LIST_HEAD(pci_mmcfg_list); | 33 | LIST_HEAD(pci_mmcfg_list); |
30 | 34 | ||
@@ -45,24 +49,25 @@ static __init void free_all_mmcfg(void) | |||
45 | pci_mmconfig_remove(cfg); | 49 | pci_mmconfig_remove(cfg); |
46 | } | 50 | } |
47 | 51 | ||
48 | static __init void list_add_sorted(struct pci_mmcfg_region *new) | 52 | static __devinit void list_add_sorted(struct pci_mmcfg_region *new) |
49 | { | 53 | { |
50 | struct pci_mmcfg_region *cfg; | 54 | struct pci_mmcfg_region *cfg; |
51 | 55 | ||
52 | /* keep list sorted by segment and starting bus number */ | 56 | /* keep list sorted by segment and starting bus number */ |
53 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { | 57 | list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) { |
54 | if (cfg->segment > new->segment || | 58 | if (cfg->segment > new->segment || |
55 | (cfg->segment == new->segment && | 59 | (cfg->segment == new->segment && |
56 | cfg->start_bus >= new->start_bus)) { | 60 | cfg->start_bus >= new->start_bus)) { |
57 | list_add_tail(&new->list, &cfg->list); | 61 | list_add_tail_rcu(&new->list, &cfg->list); |
58 | return; | 62 | return; |
59 | } | 63 | } |
60 | } | 64 | } |
61 | list_add_tail(&new->list, &pci_mmcfg_list); | 65 | list_add_tail_rcu(&new->list, &pci_mmcfg_list); |
62 | } | 66 | } |
63 | 67 | ||
64 | static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, | 68 | static __devinit struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, |
65 | int end, u64 addr) | 69 | int start, |
70 | int end, u64 addr) | ||
66 | { | 71 | { |
67 | struct pci_mmcfg_region *new; | 72 | struct pci_mmcfg_region *new; |
68 | struct resource *res; | 73 | struct resource *res; |
@@ -79,8 +84,6 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, | |||
79 | new->start_bus = start; | 84 | new->start_bus = start; |
80 | new->end_bus = end; | 85 | new->end_bus = end; |
81 | 86 | ||
82 | list_add_sorted(new); | ||
83 | |||
84 | res = &new->res; | 87 | res = &new->res; |
85 | res->start = addr + PCI_MMCFG_BUS_OFFSET(start); | 88 | res->start = addr + PCI_MMCFG_BUS_OFFSET(start); |
86 | res->end = addr + PCI_MMCFG_BUS_OFFSET(end + 1) - 1; | 89 | res->end = addr + PCI_MMCFG_BUS_OFFSET(end + 1) - 1; |
@@ -89,9 +92,25 @@ static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, | |||
89 | "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end); | 92 | "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end); |
90 | res->name = new->name; | 93 | res->name = new->name; |
91 | 94 | ||
92 | printk(KERN_INFO PREFIX "MMCONFIG for domain %04x [bus %02x-%02x] at " | 95 | return new; |
93 | "%pR (base %#lx)\n", segment, start, end, &new->res, | 96 | } |
94 | (unsigned long) addr); | 97 | |
98 | static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, | ||
99 | int end, u64 addr) | ||
100 | { | ||
101 | struct pci_mmcfg_region *new; | ||
102 | |||
103 | new = pci_mmconfig_alloc(segment, start, end, addr); | ||
104 | if (new) { | ||
105 | mutex_lock(&pci_mmcfg_lock); | ||
106 | list_add_sorted(new); | ||
107 | mutex_unlock(&pci_mmcfg_lock); | ||
108 | |||
109 | pr_info(PREFIX | ||
110 | "MMCONFIG for domain %04x [bus %02x-%02x] at %pR " | ||
111 | "(base %#lx)\n", | ||
112 | segment, start, end, &new->res, (unsigned long)addr); | ||
113 | } | ||
95 | 114 | ||
96 | return new; | 115 | return new; |
97 | } | 116 | } |
@@ -100,7 +119,7 @@ struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus) | |||
100 | { | 119 | { |
101 | struct pci_mmcfg_region *cfg; | 120 | struct pci_mmcfg_region *cfg; |
102 | 121 | ||
103 | list_for_each_entry(cfg, &pci_mmcfg_list, list) | 122 | list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) |
104 | if (cfg->segment == segment && | 123 | if (cfg->segment == segment && |
105 | cfg->start_bus <= bus && bus <= cfg->end_bus) | 124 | cfg->start_bus <= bus && bus <= cfg->end_bus) |
106 | return cfg; | 125 | return cfg; |
@@ -343,8 +362,7 @@ static int __init pci_mmcfg_check_hostbridge(void) | |||
343 | name = pci_mmcfg_probes[i].probe(); | 362 | name = pci_mmcfg_probes[i].probe(); |
344 | 363 | ||
345 | if (name) | 364 | if (name) |
346 | printk(KERN_INFO PREFIX "%s with MMCONFIG support\n", | 365 | pr_info(PREFIX "%s with MMCONFIG support\n", name); |
347 | name); | ||
348 | } | 366 | } |
349 | 367 | ||
350 | /* some end_bus_number is crazy, fix it */ | 368 | /* some end_bus_number is crazy, fix it */ |
@@ -353,19 +371,8 @@ static int __init pci_mmcfg_check_hostbridge(void) | |||
353 | return !list_empty(&pci_mmcfg_list); | 371 | return !list_empty(&pci_mmcfg_list); |
354 | } | 372 | } |
355 | 373 | ||
356 | static void __init pci_mmcfg_insert_resources(void) | 374 | static acpi_status __devinit check_mcfg_resource(struct acpi_resource *res, |
357 | { | 375 | void *data) |
358 | struct pci_mmcfg_region *cfg; | ||
359 | |||
360 | list_for_each_entry(cfg, &pci_mmcfg_list, list) | ||
361 | insert_resource(&iomem_resource, &cfg->res); | ||
362 | |||
363 | /* Mark that the resources have been inserted. */ | ||
364 | pci_mmcfg_resources_inserted = 1; | ||
365 | } | ||
366 | |||
367 | static acpi_status __init check_mcfg_resource(struct acpi_resource *res, | ||
368 | void *data) | ||
369 | { | 376 | { |
370 | struct resource *mcfg_res = data; | 377 | struct resource *mcfg_res = data; |
371 | struct acpi_resource_address64 address; | 378 | struct acpi_resource_address64 address; |
@@ -401,8 +408,8 @@ static acpi_status __init check_mcfg_resource(struct acpi_resource *res, | |||
401 | return AE_OK; | 408 | return AE_OK; |
402 | } | 409 | } |
403 | 410 | ||
404 | static acpi_status __init find_mboard_resource(acpi_handle handle, u32 lvl, | 411 | static acpi_status __devinit find_mboard_resource(acpi_handle handle, u32 lvl, |
405 | void *context, void **rv) | 412 | void *context, void **rv) |
406 | { | 413 | { |
407 | struct resource *mcfg_res = context; | 414 | struct resource *mcfg_res = context; |
408 | 415 | ||
@@ -415,7 +422,7 @@ static acpi_status __init find_mboard_resource(acpi_handle handle, u32 lvl, | |||
415 | return AE_OK; | 422 | return AE_OK; |
416 | } | 423 | } |
417 | 424 | ||
418 | static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) | 425 | static int __devinit is_acpi_reserved(u64 start, u64 end, unsigned not_used) |
419 | { | 426 | { |
420 | struct resource mcfg_res; | 427 | struct resource mcfg_res; |
421 | 428 | ||
@@ -434,13 +441,15 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) | |||
434 | 441 | ||
435 | typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); | 442 | typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); |
436 | 443 | ||
437 | static int __init is_mmconf_reserved(check_reserved_t is_reserved, | 444 | static int __ref is_mmconf_reserved(check_reserved_t is_reserved, |
438 | struct pci_mmcfg_region *cfg, int with_e820) | 445 | struct pci_mmcfg_region *cfg, |
446 | struct device *dev, int with_e820) | ||
439 | { | 447 | { |
440 | u64 addr = cfg->res.start; | 448 | u64 addr = cfg->res.start; |
441 | u64 size = resource_size(&cfg->res); | 449 | u64 size = resource_size(&cfg->res); |
442 | u64 old_size = size; | 450 | u64 old_size = size; |
443 | int valid = 0, num_buses; | 451 | int num_buses; |
452 | char *method = with_e820 ? "E820" : "ACPI motherboard resources"; | ||
444 | 453 | ||
445 | while (!is_reserved(addr, addr + size, E820_RESERVED)) { | 454 | while (!is_reserved(addr, addr + size, E820_RESERVED)) { |
446 | size >>= 1; | 455 | size >>= 1; |
@@ -448,30 +457,76 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, | |||
448 | break; | 457 | break; |
449 | } | 458 | } |
450 | 459 | ||
451 | if (size >= (16UL<<20) || size == old_size) { | 460 | if (size < (16UL<<20) && size != old_size) |
452 | printk(KERN_INFO PREFIX "MMCONFIG at %pR reserved in %s\n", | 461 | return 0; |
453 | &cfg->res, | 462 | |
454 | with_e820 ? "E820" : "ACPI motherboard resources"); | 463 | if (dev) |
455 | valid = 1; | 464 | dev_info(dev, "MMCONFIG at %pR reserved in %s\n", |
456 | 465 | &cfg->res, method); | |
457 | if (old_size != size) { | 466 | else |
458 | /* update end_bus */ | 467 | pr_info(PREFIX "MMCONFIG at %pR reserved in %s\n", |
459 | cfg->end_bus = cfg->start_bus + ((size>>20) - 1); | 468 | &cfg->res, method); |
460 | num_buses = cfg->end_bus - cfg->start_bus + 1; | 469 | |
461 | cfg->res.end = cfg->res.start + | 470 | if (old_size != size) { |
462 | PCI_MMCFG_BUS_OFFSET(num_buses) - 1; | 471 | /* update end_bus */ |
463 | snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN, | 472 | cfg->end_bus = cfg->start_bus + ((size>>20) - 1); |
464 | "PCI MMCONFIG %04x [bus %02x-%02x]", | 473 | num_buses = cfg->end_bus - cfg->start_bus + 1; |
465 | cfg->segment, cfg->start_bus, cfg->end_bus); | 474 | cfg->res.end = cfg->res.start + |
466 | printk(KERN_INFO PREFIX | 475 | PCI_MMCFG_BUS_OFFSET(num_buses) - 1; |
467 | "MMCONFIG for %04x [bus%02x-%02x] " | 476 | snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN, |
468 | "at %pR (base %#lx) (size reduced!)\n", | 477 | "PCI MMCONFIG %04x [bus %02x-%02x]", |
469 | cfg->segment, cfg->start_bus, cfg->end_bus, | 478 | cfg->segment, cfg->start_bus, cfg->end_bus); |
470 | &cfg->res, (unsigned long) cfg->address); | 479 | |
471 | } | 480 | if (dev) |
481 | dev_info(dev, | ||
482 | "MMCONFIG " | ||
483 | "at %pR (base %#lx) (size reduced!)\n", | ||
484 | &cfg->res, (unsigned long) cfg->address); | ||
485 | else | ||
486 | pr_info(PREFIX | ||
487 | "MMCONFIG for %04x [bus%02x-%02x] " | ||
488 | "at %pR (base %#lx) (size reduced!)\n", | ||
489 | cfg->segment, cfg->start_bus, cfg->end_bus, | ||
490 | &cfg->res, (unsigned long) cfg->address); | ||
472 | } | 491 | } |
473 | 492 | ||
474 | return valid; | 493 | return 1; |
494 | } | ||
495 | |||
496 | static int __ref pci_mmcfg_check_reserved(struct device *dev, | ||
497 | struct pci_mmcfg_region *cfg, int early) | ||
498 | { | ||
499 | if (!early && !acpi_disabled) { | ||
500 | if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, 0)) | ||
501 | return 1; | ||
502 | |||
503 | if (dev) | ||
504 | dev_info(dev, FW_INFO | ||
505 | "MMCONFIG at %pR not reserved in " | ||
506 | "ACPI motherboard resources\n", | ||
507 | &cfg->res); | ||
508 | else | ||
509 | pr_info(FW_INFO PREFIX | ||
510 | "MMCONFIG at %pR not reserved in " | ||
511 | "ACPI motherboard resources\n", | ||
512 | &cfg->res); | ||
513 | } | ||
514 | |||
515 | /* | ||
516 | * e820_all_mapped() is marked as __init. | ||
517 | * All entries from ACPI MCFG table have been checked at boot time. | ||
518 | * For MCFG information constructed from hotpluggable host bridge's | ||
519 | * _CBA method, just assume it's reserved. | ||
520 | */ | ||
521 | if (pci_mmcfg_running_state) | ||
522 | return 1; | ||
523 | |||
524 | /* Don't try to do this check unless configuration | ||
525 | type 1 is available. how about type 2 ?*/ | ||
526 | if (raw_pci_ops) | ||
527 | return is_mmconf_reserved(e820_all_mapped, cfg, dev, 1); | ||
528 | |||
529 | return 0; | ||
475 | } | 530 | } |
476 | 531 | ||
477 | static void __init pci_mmcfg_reject_broken(int early) | 532 | static void __init pci_mmcfg_reject_broken(int early) |
@@ -479,38 +534,14 @@ static void __init pci_mmcfg_reject_broken(int early) | |||
479 | struct pci_mmcfg_region *cfg; | 534 | struct pci_mmcfg_region *cfg; |
480 | 535 | ||
481 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { | 536 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { |
482 | int valid = 0; | 537 | if (pci_mmcfg_check_reserved(NULL, cfg, early) == 0) { |
483 | 538 | pr_info(PREFIX "not using MMCONFIG\n"); | |
484 | if (!early && !acpi_disabled) { | 539 | free_all_mmcfg(); |
485 | valid = is_mmconf_reserved(is_acpi_reserved, cfg, 0); | 540 | return; |
486 | |||
487 | if (valid) | ||
488 | continue; | ||
489 | else | ||
490 | printk(KERN_ERR FW_BUG PREFIX | ||
491 | "MMCONFIG at %pR not reserved in " | ||
492 | "ACPI motherboard resources\n", | ||
493 | &cfg->res); | ||
494 | } | 541 | } |
495 | |||
496 | /* Don't try to do this check unless configuration | ||
497 | type 1 is available. how about type 2 ?*/ | ||
498 | if (raw_pci_ops) | ||
499 | valid = is_mmconf_reserved(e820_all_mapped, cfg, 1); | ||
500 | |||
501 | if (!valid) | ||
502 | goto reject; | ||
503 | } | 542 | } |
504 | |||
505 | return; | ||
506 | |||
507 | reject: | ||
508 | printk(KERN_INFO PREFIX "not using MMCONFIG\n"); | ||
509 | free_all_mmcfg(); | ||
510 | } | 543 | } |
511 | 544 | ||
512 | static int __initdata known_bridge; | ||
513 | |||
514 | static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, | 545 | static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, |
515 | struct acpi_mcfg_allocation *cfg) | 546 | struct acpi_mcfg_allocation *cfg) |
516 | { | 547 | { |
@@ -529,7 +560,7 @@ static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, | |||
529 | return 0; | 560 | return 0; |
530 | } | 561 | } |
531 | 562 | ||
532 | printk(KERN_ERR PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx " | 563 | pr_err(PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx " |
533 | "is above 4GB, ignored\n", cfg->pci_segment, | 564 | "is above 4GB, ignored\n", cfg->pci_segment, |
534 | cfg->start_bus_number, cfg->end_bus_number, cfg->address); | 565 | cfg->start_bus_number, cfg->end_bus_number, cfg->address); |
535 | return -EINVAL; | 566 | return -EINVAL; |
@@ -556,7 +587,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) | |||
556 | i -= sizeof(struct acpi_mcfg_allocation); | 587 | i -= sizeof(struct acpi_mcfg_allocation); |
557 | }; | 588 | }; |
558 | if (entries == 0) { | 589 | if (entries == 0) { |
559 | printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); | 590 | pr_err(PREFIX "MMCONFIG has no entries\n"); |
560 | return -ENODEV; | 591 | return -ENODEV; |
561 | } | 592 | } |
562 | 593 | ||
@@ -570,8 +601,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) | |||
570 | 601 | ||
571 | if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number, | 602 | if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number, |
572 | cfg->end_bus_number, cfg->address) == NULL) { | 603 | cfg->end_bus_number, cfg->address) == NULL) { |
573 | printk(KERN_WARNING PREFIX | 604 | pr_warn(PREFIX "no memory for MCFG entries\n"); |
574 | "no memory for MCFG entries\n"); | ||
575 | free_all_mmcfg(); | 605 | free_all_mmcfg(); |
576 | return -ENOMEM; | 606 | return -ENOMEM; |
577 | } | 607 | } |
@@ -582,28 +612,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) | |||
582 | 612 | ||
583 | static void __init __pci_mmcfg_init(int early) | 613 | static void __init __pci_mmcfg_init(int early) |
584 | { | 614 | { |
585 | /* MMCONFIG disabled */ | ||
586 | if ((pci_probe & PCI_PROBE_MMCONF) == 0) | ||
587 | return; | ||
588 | |||
589 | /* MMCONFIG already enabled */ | ||
590 | if (!early && !(pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF)) | ||
591 | return; | ||
592 | |||
593 | /* for late to exit */ | ||
594 | if (known_bridge) | ||
595 | return; | ||
596 | |||
597 | if (early) { | ||
598 | if (pci_mmcfg_check_hostbridge()) | ||
599 | known_bridge = 1; | ||
600 | } | ||
601 | |||
602 | if (!known_bridge) | ||
603 | acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); | ||
604 | |||
605 | pci_mmcfg_reject_broken(early); | 615 | pci_mmcfg_reject_broken(early); |
606 | |||
607 | if (list_empty(&pci_mmcfg_list)) | 616 | if (list_empty(&pci_mmcfg_list)) |
608 | return; | 617 | return; |
609 | 618 | ||
@@ -620,33 +629,48 @@ static void __init __pci_mmcfg_init(int early) | |||
620 | if (pci_mmcfg_arch_init()) | 629 | if (pci_mmcfg_arch_init()) |
621 | pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; | 630 | pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; |
622 | else { | 631 | else { |
623 | /* | 632 | free_all_mmcfg(); |
624 | * Signal not to attempt to insert mmcfg resources because | 633 | pci_mmcfg_arch_init_failed = true; |
625 | * the architecture mmcfg setup could not initialize. | ||
626 | */ | ||
627 | pci_mmcfg_resources_inserted = 1; | ||
628 | } | 634 | } |
629 | } | 635 | } |
630 | 636 | ||
637 | static int __initdata known_bridge; | ||
638 | |||
631 | void __init pci_mmcfg_early_init(void) | 639 | void __init pci_mmcfg_early_init(void) |
632 | { | 640 | { |
633 | __pci_mmcfg_init(1); | 641 | if (pci_probe & PCI_PROBE_MMCONF) { |
642 | if (pci_mmcfg_check_hostbridge()) | ||
643 | known_bridge = 1; | ||
644 | else | ||
645 | acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); | ||
646 | __pci_mmcfg_init(1); | ||
647 | } | ||
634 | } | 648 | } |
635 | 649 | ||
636 | void __init pci_mmcfg_late_init(void) | 650 | void __init pci_mmcfg_late_init(void) |
637 | { | 651 | { |
638 | __pci_mmcfg_init(0); | 652 | /* MMCONFIG disabled */ |
653 | if ((pci_probe & PCI_PROBE_MMCONF) == 0) | ||
654 | return; | ||
655 | |||
656 | if (known_bridge) | ||
657 | return; | ||
658 | |||
659 | /* MMCONFIG hasn't been enabled yet, try again */ | ||
660 | if (pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF) { | ||
661 | acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); | ||
662 | __pci_mmcfg_init(0); | ||
663 | } | ||
639 | } | 664 | } |
640 | 665 | ||
641 | static int __init pci_mmcfg_late_insert_resources(void) | 666 | static int __init pci_mmcfg_late_insert_resources(void) |
642 | { | 667 | { |
643 | /* | 668 | struct pci_mmcfg_region *cfg; |
644 | * If resources are already inserted or we are not using MMCONFIG, | 669 | |
645 | * don't insert the resources. | 670 | pci_mmcfg_running_state = true; |
646 | */ | 671 | |
647 | if ((pci_mmcfg_resources_inserted == 1) || | 672 | /* If we are not using MMCONFIG, don't insert the resources. */ |
648 | (pci_probe & PCI_PROBE_MMCONF) == 0 || | 673 | if ((pci_probe & PCI_PROBE_MMCONF) == 0) |
649 | list_empty(&pci_mmcfg_list)) | ||
650 | return 1; | 674 | return 1; |
651 | 675 | ||
652 | /* | 676 | /* |
@@ -654,7 +678,9 @@ static int __init pci_mmcfg_late_insert_resources(void) | |||
654 | * marked so it won't cause request errors when __request_region is | 678 | * marked so it won't cause request errors when __request_region is |
655 | * called. | 679 | * called. |
656 | */ | 680 | */ |
657 | pci_mmcfg_insert_resources(); | 681 | list_for_each_entry(cfg, &pci_mmcfg_list, list) |
682 | if (!cfg->res.parent) | ||
683 | insert_resource(&iomem_resource, &cfg->res); | ||
658 | 684 | ||
659 | return 0; | 685 | return 0; |
660 | } | 686 | } |
@@ -665,3 +691,101 @@ static int __init pci_mmcfg_late_insert_resources(void) | |||
665 | * with other system resources. | 691 | * with other system resources. |
666 | */ | 692 | */ |
667 | late_initcall(pci_mmcfg_late_insert_resources); | 693 | late_initcall(pci_mmcfg_late_insert_resources); |
694 | |||
695 | /* Add MMCFG information for host bridges */ | ||
696 | int __devinit pci_mmconfig_insert(struct device *dev, | ||
697 | u16 seg, u8 start, u8 end, | ||
698 | phys_addr_t addr) | ||
699 | { | ||
700 | int rc; | ||
701 | struct resource *tmp = NULL; | ||
702 | struct pci_mmcfg_region *cfg; | ||
703 | |||
704 | if (!(pci_probe & PCI_PROBE_MMCONF) || pci_mmcfg_arch_init_failed) | ||
705 | return -ENODEV; | ||
706 | |||
707 | if (start > end) | ||
708 | return -EINVAL; | ||
709 | |||
710 | mutex_lock(&pci_mmcfg_lock); | ||
711 | cfg = pci_mmconfig_lookup(seg, start); | ||
712 | if (cfg) { | ||
713 | if (cfg->end_bus < end) | ||
714 | dev_info(dev, FW_INFO | ||
715 | "MMCONFIG for " | ||
716 | "domain %04x [bus %02x-%02x] " | ||
717 | "only partially covers this bridge\n", | ||
718 | cfg->segment, cfg->start_bus, cfg->end_bus); | ||
719 | mutex_unlock(&pci_mmcfg_lock); | ||
720 | return -EEXIST; | ||
721 | } | ||
722 | |||
723 | if (!addr) { | ||
724 | mutex_unlock(&pci_mmcfg_lock); | ||
725 | return -EINVAL; | ||
726 | } | ||
727 | |||
728 | rc = -EBUSY; | ||
729 | cfg = pci_mmconfig_alloc(seg, start, end, addr); | ||
730 | if (cfg == NULL) { | ||
731 | dev_warn(dev, "fail to add MMCONFIG (out of memory)\n"); | ||
732 | rc = -ENOMEM; | ||
733 | } else if (!pci_mmcfg_check_reserved(dev, cfg, 0)) { | ||
734 | dev_warn(dev, FW_BUG "MMCONFIG %pR isn't reserved\n", | ||
735 | &cfg->res); | ||
736 | } else { | ||
737 | /* Insert resource if it's not in boot stage */ | ||
738 | if (pci_mmcfg_running_state) | ||
739 | tmp = insert_resource_conflict(&iomem_resource, | ||
740 | &cfg->res); | ||
741 | |||
742 | if (tmp) { | ||
743 | dev_warn(dev, | ||
744 | "MMCONFIG %pR conflicts with " | ||
745 | "%s %pR\n", | ||
746 | &cfg->res, tmp->name, tmp); | ||
747 | } else if (pci_mmcfg_arch_map(cfg)) { | ||
748 | dev_warn(dev, "fail to map MMCONFIG %pR.\n", | ||
749 | &cfg->res); | ||
750 | } else { | ||
751 | list_add_sorted(cfg); | ||
752 | dev_info(dev, "MMCONFIG at %pR (base %#lx)\n", | ||
753 | &cfg->res, (unsigned long)addr); | ||
754 | cfg = NULL; | ||
755 | rc = 0; | ||
756 | } | ||
757 | } | ||
758 | |||
759 | if (cfg) { | ||
760 | if (cfg->res.parent) | ||
761 | release_resource(&cfg->res); | ||
762 | kfree(cfg); | ||
763 | } | ||
764 | |||
765 | mutex_unlock(&pci_mmcfg_lock); | ||
766 | |||
767 | return rc; | ||
768 | } | ||
769 | |||
770 | /* Delete MMCFG information for host bridges */ | ||
771 | int pci_mmconfig_delete(u16 seg, u8 start, u8 end) | ||
772 | { | ||
773 | struct pci_mmcfg_region *cfg; | ||
774 | |||
775 | mutex_lock(&pci_mmcfg_lock); | ||
776 | list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) | ||
777 | if (cfg->segment == seg && cfg->start_bus == start && | ||
778 | cfg->end_bus == end) { | ||
779 | list_del_rcu(&cfg->list); | ||
780 | synchronize_rcu(); | ||
781 | pci_mmcfg_arch_unmap(cfg); | ||
782 | if (cfg->res.parent) | ||
783 | release_resource(&cfg->res); | ||
784 | mutex_unlock(&pci_mmcfg_lock); | ||
785 | kfree(cfg); | ||
786 | return 0; | ||
787 | } | ||
788 | mutex_unlock(&pci_mmcfg_lock); | ||
789 | |||
790 | return -ENOENT; | ||
791 | } | ||
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 5372e86834c0..db63ac23e3d9 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c | |||
@@ -11,6 +11,7 @@ | |||
11 | 11 | ||
12 | #include <linux/pci.h> | 12 | #include <linux/pci.h> |
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <linux/rcupdate.h> | ||
14 | #include <asm/e820.h> | 15 | #include <asm/e820.h> |
15 | #include <asm/pci_x86.h> | 16 | #include <asm/pci_x86.h> |
16 | #include <acpi/acpi.h> | 17 | #include <acpi/acpi.h> |
@@ -60,9 +61,12 @@ err: *value = -1; | |||
60 | return -EINVAL; | 61 | return -EINVAL; |
61 | } | 62 | } |
62 | 63 | ||
64 | rcu_read_lock(); | ||
63 | base = get_base_addr(seg, bus, devfn); | 65 | base = get_base_addr(seg, bus, devfn); |
64 | if (!base) | 66 | if (!base) { |
67 | rcu_read_unlock(); | ||
65 | goto err; | 68 | goto err; |
69 | } | ||
66 | 70 | ||
67 | raw_spin_lock_irqsave(&pci_config_lock, flags); | 71 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
68 | 72 | ||
@@ -80,6 +84,7 @@ err: *value = -1; | |||
80 | break; | 84 | break; |
81 | } | 85 | } |
82 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); | 86 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
87 | rcu_read_unlock(); | ||
83 | 88 | ||
84 | return 0; | 89 | return 0; |
85 | } | 90 | } |
@@ -93,9 +98,12 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, | |||
93 | if ((bus > 255) || (devfn > 255) || (reg > 4095)) | 98 | if ((bus > 255) || (devfn > 255) || (reg > 4095)) |
94 | return -EINVAL; | 99 | return -EINVAL; |
95 | 100 | ||
101 | rcu_read_lock(); | ||
96 | base = get_base_addr(seg, bus, devfn); | 102 | base = get_base_addr(seg, bus, devfn); |
97 | if (!base) | 103 | if (!base) { |
104 | rcu_read_unlock(); | ||
98 | return -EINVAL; | 105 | return -EINVAL; |
106 | } | ||
99 | 107 | ||
100 | raw_spin_lock_irqsave(&pci_config_lock, flags); | 108 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
101 | 109 | ||
@@ -113,11 +121,12 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, | |||
113 | break; | 121 | break; |
114 | } | 122 | } |
115 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); | 123 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
124 | rcu_read_unlock(); | ||
116 | 125 | ||
117 | return 0; | 126 | return 0; |
118 | } | 127 | } |
119 | 128 | ||
120 | static const struct pci_raw_ops pci_mmcfg = { | 129 | const struct pci_raw_ops pci_mmcfg = { |
121 | .read = pci_mmcfg_read, | 130 | .read = pci_mmcfg_read, |
122 | .write = pci_mmcfg_write, | 131 | .write = pci_mmcfg_write, |
123 | }; | 132 | }; |
@@ -132,3 +141,18 @@ int __init pci_mmcfg_arch_init(void) | |||
132 | void __init pci_mmcfg_arch_free(void) | 141 | void __init pci_mmcfg_arch_free(void) |
133 | { | 142 | { |
134 | } | 143 | } |
144 | |||
145 | int __devinit pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg) | ||
146 | { | ||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg) | ||
151 | { | ||
152 | unsigned long flags; | ||
153 | |||
154 | /* Invalidate the cached mmcfg map entry. */ | ||
155 | raw_spin_lock_irqsave(&pci_config_lock, flags); | ||
156 | mmcfg_last_accessed_device = 0; | ||
157 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); | ||
158 | } | ||
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 915a493502cb..d4ebd07c306d 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/init.h> | 9 | #include <linux/init.h> |
10 | #include <linux/acpi.h> | 10 | #include <linux/acpi.h> |
11 | #include <linux/bitmap.h> | 11 | #include <linux/bitmap.h> |
12 | #include <linux/rcupdate.h> | ||
12 | #include <asm/e820.h> | 13 | #include <asm/e820.h> |
13 | #include <asm/pci_x86.h> | 14 | #include <asm/pci_x86.h> |
14 | 15 | ||
@@ -34,9 +35,12 @@ err: *value = -1; | |||
34 | return -EINVAL; | 35 | return -EINVAL; |
35 | } | 36 | } |
36 | 37 | ||
38 | rcu_read_lock(); | ||
37 | addr = pci_dev_base(seg, bus, devfn); | 39 | addr = pci_dev_base(seg, bus, devfn); |
38 | if (!addr) | 40 | if (!addr) { |
41 | rcu_read_unlock(); | ||
39 | goto err; | 42 | goto err; |
43 | } | ||
40 | 44 | ||
41 | switch (len) { | 45 | switch (len) { |
42 | case 1: | 46 | case 1: |
@@ -49,6 +53,7 @@ err: *value = -1; | |||
49 | *value = mmio_config_readl(addr + reg); | 53 | *value = mmio_config_readl(addr + reg); |
50 | break; | 54 | break; |
51 | } | 55 | } |
56 | rcu_read_unlock(); | ||
52 | 57 | ||
53 | return 0; | 58 | return 0; |
54 | } | 59 | } |
@@ -62,9 +67,12 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, | |||
62 | if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) | 67 | if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) |
63 | return -EINVAL; | 68 | return -EINVAL; |
64 | 69 | ||
70 | rcu_read_lock(); | ||
65 | addr = pci_dev_base(seg, bus, devfn); | 71 | addr = pci_dev_base(seg, bus, devfn); |
66 | if (!addr) | 72 | if (!addr) { |
73 | rcu_read_unlock(); | ||
67 | return -EINVAL; | 74 | return -EINVAL; |
75 | } | ||
68 | 76 | ||
69 | switch (len) { | 77 | switch (len) { |
70 | case 1: | 78 | case 1: |
@@ -77,16 +85,17 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, | |||
77 | mmio_config_writel(addr + reg, value); | 85 | mmio_config_writel(addr + reg, value); |
78 | break; | 86 | break; |
79 | } | 87 | } |
88 | rcu_read_unlock(); | ||
80 | 89 | ||
81 | return 0; | 90 | return 0; |
82 | } | 91 | } |
83 | 92 | ||
84 | static const struct pci_raw_ops pci_mmcfg = { | 93 | const struct pci_raw_ops pci_mmcfg = { |
85 | .read = pci_mmcfg_read, | 94 | .read = pci_mmcfg_read, |
86 | .write = pci_mmcfg_write, | 95 | .write = pci_mmcfg_write, |
87 | }; | 96 | }; |
88 | 97 | ||
89 | static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg) | 98 | static void __iomem * __devinit mcfg_ioremap(struct pci_mmcfg_region *cfg) |
90 | { | 99 | { |
91 | void __iomem *addr; | 100 | void __iomem *addr; |
92 | u64 start, size; | 101 | u64 start, size; |
@@ -105,16 +114,14 @@ int __init pci_mmcfg_arch_init(void) | |||
105 | { | 114 | { |
106 | struct pci_mmcfg_region *cfg; | 115 | struct pci_mmcfg_region *cfg; |
107 | 116 | ||
108 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { | 117 | list_for_each_entry(cfg, &pci_mmcfg_list, list) |
109 | cfg->virt = mcfg_ioremap(cfg); | 118 | if (pci_mmcfg_arch_map(cfg)) { |
110 | if (!cfg->virt) { | ||
111 | printk(KERN_ERR PREFIX "can't map MMCONFIG at %pR\n", | ||
112 | &cfg->res); | ||
113 | pci_mmcfg_arch_free(); | 119 | pci_mmcfg_arch_free(); |
114 | return 0; | 120 | return 0; |
115 | } | 121 | } |
116 | } | 122 | |
117 | raw_pci_ext_ops = &pci_mmcfg; | 123 | raw_pci_ext_ops = &pci_mmcfg; |
124 | |||
118 | return 1; | 125 | return 1; |
119 | } | 126 | } |
120 | 127 | ||
@@ -122,10 +129,25 @@ void __init pci_mmcfg_arch_free(void) | |||
122 | { | 129 | { |
123 | struct pci_mmcfg_region *cfg; | 130 | struct pci_mmcfg_region *cfg; |
124 | 131 | ||
125 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { | 132 | list_for_each_entry(cfg, &pci_mmcfg_list, list) |
126 | if (cfg->virt) { | 133 | pci_mmcfg_arch_unmap(cfg); |
127 | iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus)); | 134 | } |
128 | cfg->virt = NULL; | 135 | |
129 | } | 136 | int __devinit pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg) |
137 | { | ||
138 | cfg->virt = mcfg_ioremap(cfg); | ||
139 | if (!cfg->virt) { | ||
140 | pr_err(PREFIX "can't map MMCONFIG at %pR\n", &cfg->res); | ||
141 | return -ENOMEM; | ||
142 | } | ||
143 | |||
144 | return 0; | ||
145 | } | ||
146 | |||
147 | void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg) | ||
148 | { | ||
149 | if (cfg && cfg->virt) { | ||
150 | iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus)); | ||
151 | cfg->virt = NULL; | ||
130 | } | 152 | } |
131 | } | 153 | } |
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c index 140942f66b31..e14a2ff708b5 100644 --- a/arch/x86/pci/mrst.c +++ b/arch/x86/pci/mrst.c | |||
@@ -264,7 +264,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup); | |||
264 | 264 | ||
265 | static void __devinit mrst_power_off_unused_dev(struct pci_dev *dev) | 265 | static void __devinit mrst_power_off_unused_dev(struct pci_dev *dev) |
266 | { | 266 | { |
267 | pci_set_power_state(dev, PCI_D3cold); | 267 | pci_set_power_state(dev, PCI_D3hot); |
268 | } | 268 | } |
269 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev); | 269 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev); |
270 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev); | 270 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev); |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 92660edaa1e7..2dc29f51e75a 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -234,22 +234,7 @@ static efi_status_t __init phys_efi_set_virtual_address_map( | |||
234 | return status; | 234 | return status; |
235 | } | 235 | } |
236 | 236 | ||
237 | static efi_status_t __init phys_efi_get_time(efi_time_t *tm, | 237 | static int efi_set_rtc_mmss(unsigned long nowtime) |
238 | efi_time_cap_t *tc) | ||
239 | { | ||
240 | unsigned long flags; | ||
241 | efi_status_t status; | ||
242 | |||
243 | spin_lock_irqsave(&rtc_lock, flags); | ||
244 | efi_call_phys_prelog(); | ||
245 | status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm), | ||
246 | virt_to_phys(tc)); | ||
247 | efi_call_phys_epilog(); | ||
248 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
249 | return status; | ||
250 | } | ||
251 | |||
252 | int efi_set_rtc_mmss(unsigned long nowtime) | ||
253 | { | 238 | { |
254 | int real_seconds, real_minutes; | 239 | int real_seconds, real_minutes; |
255 | efi_status_t status; | 240 | efi_status_t status; |
@@ -278,7 +263,7 @@ int efi_set_rtc_mmss(unsigned long nowtime) | |||
278 | return 0; | 263 | return 0; |
279 | } | 264 | } |
280 | 265 | ||
281 | unsigned long efi_get_time(void) | 266 | static unsigned long efi_get_time(void) |
282 | { | 267 | { |
283 | efi_status_t status; | 268 | efi_status_t status; |
284 | efi_time_t eft; | 269 | efi_time_t eft; |
@@ -621,18 +606,13 @@ static int __init efi_runtime_init(void) | |||
621 | } | 606 | } |
622 | /* | 607 | /* |
623 | * We will only need *early* access to the following | 608 | * We will only need *early* access to the following |
624 | * two EFI runtime services before set_virtual_address_map | 609 | * EFI runtime service before set_virtual_address_map |
625 | * is invoked. | 610 | * is invoked. |
626 | */ | 611 | */ |
627 | efi_phys.get_time = (efi_get_time_t *)runtime->get_time; | ||
628 | efi_phys.set_virtual_address_map = | 612 | efi_phys.set_virtual_address_map = |
629 | (efi_set_virtual_address_map_t *) | 613 | (efi_set_virtual_address_map_t *) |
630 | runtime->set_virtual_address_map; | 614 | runtime->set_virtual_address_map; |
631 | /* | 615 | |
632 | * Make efi_get_time can be called before entering | ||
633 | * virtual mode. | ||
634 | */ | ||
635 | efi.get_time = phys_efi_get_time; | ||
636 | early_iounmap(runtime, sizeof(efi_runtime_services_t)); | 616 | early_iounmap(runtime, sizeof(efi_runtime_services_t)); |
637 | 617 | ||
638 | return 0; | 618 | return 0; |
@@ -720,12 +700,10 @@ void __init efi_init(void) | |||
720 | efi_enabled = 0; | 700 | efi_enabled = 0; |
721 | return; | 701 | return; |
722 | } | 702 | } |
723 | #ifdef CONFIG_X86_32 | ||
724 | if (efi_native) { | 703 | if (efi_native) { |
725 | x86_platform.get_wallclock = efi_get_time; | 704 | x86_platform.get_wallclock = efi_get_time; |
726 | x86_platform.set_wallclock = efi_set_rtc_mmss; | 705 | x86_platform.set_wallclock = efi_set_rtc_mmss; |
727 | } | 706 | } |
728 | #endif | ||
729 | 707 | ||
730 | #if EFI_DEBUG | 708 | #if EFI_DEBUG |
731 | print_efi_memmap(); | 709 | print_efi_memmap(); |
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c index 23e5b9d7977b..599be499fdf7 100644 --- a/arch/x86/platform/olpc/olpc-xo15-sci.c +++ b/arch/x86/platform/olpc/olpc-xo15-sci.c | |||
@@ -203,7 +203,7 @@ static int xo15_sci_remove(struct acpi_device *device, int type) | |||
203 | return 0; | 203 | return 0; |
204 | } | 204 | } |
205 | 205 | ||
206 | static int xo15_sci_resume(struct acpi_device *device) | 206 | static int xo15_sci_resume(struct device *dev) |
207 | { | 207 | { |
208 | /* Enable all EC events */ | 208 | /* Enable all EC events */ |
209 | olpc_ec_mask_write(EC_SCI_SRC_ALL); | 209 | olpc_ec_mask_write(EC_SCI_SRC_ALL); |
@@ -215,6 +215,8 @@ static int xo15_sci_resume(struct acpi_device *device) | |||
215 | return 0; | 215 | return 0; |
216 | } | 216 | } |
217 | 217 | ||
218 | static SIMPLE_DEV_PM_OPS(xo15_sci_pm, NULL, xo15_sci_resume); | ||
219 | |||
218 | static const struct acpi_device_id xo15_sci_device_ids[] = { | 220 | static const struct acpi_device_id xo15_sci_device_ids[] = { |
219 | {"XO15EC", 0}, | 221 | {"XO15EC", 0}, |
220 | {"", 0}, | 222 | {"", 0}, |
@@ -227,8 +229,8 @@ static struct acpi_driver xo15_sci_drv = { | |||
227 | .ops = { | 229 | .ops = { |
228 | .add = xo15_sci_add, | 230 | .add = xo15_sci_add, |
229 | .remove = xo15_sci_remove, | 231 | .remove = xo15_sci_remove, |
230 | .resume = xo15_sci_resume, | ||
231 | }, | 232 | }, |
233 | .drv.pm = &xo15_sci_pm, | ||
232 | }; | 234 | }; |
233 | 235 | ||
234 | static int __init xo15_sci_init(void) | 236 | static int __init xo15_sci_init(void) |
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 59880afa851f..b8b3a37c80cd 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * SGI UltraViolet TLB flush routines. | 2 | * SGI UltraViolet TLB flush routines. |
3 | * | 3 | * |
4 | * (c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI. | 4 | * (c) 2008-2012 Cliff Wickman <cpw@sgi.com>, SGI. |
5 | * | 5 | * |
6 | * This code is released under the GNU General Public License version 2 or | 6 | * This code is released under the GNU General Public License version 2 or |
7 | * later. | 7 | * later. |
@@ -38,8 +38,7 @@ static int timeout_base_ns[] = { | |||
38 | 38 | ||
39 | static int timeout_us; | 39 | static int timeout_us; |
40 | static int nobau; | 40 | static int nobau; |
41 | static int baudisabled; | 41 | static int nobau_perm; |
42 | static spinlock_t disable_lock; | ||
43 | static cycles_t congested_cycles; | 42 | static cycles_t congested_cycles; |
44 | 43 | ||
45 | /* tunables: */ | 44 | /* tunables: */ |
@@ -47,12 +46,13 @@ static int max_concurr = MAX_BAU_CONCURRENT; | |||
47 | static int max_concurr_const = MAX_BAU_CONCURRENT; | 46 | static int max_concurr_const = MAX_BAU_CONCURRENT; |
48 | static int plugged_delay = PLUGGED_DELAY; | 47 | static int plugged_delay = PLUGGED_DELAY; |
49 | static int plugsb4reset = PLUGSB4RESET; | 48 | static int plugsb4reset = PLUGSB4RESET; |
49 | static int giveup_limit = GIVEUP_LIMIT; | ||
50 | static int timeoutsb4reset = TIMEOUTSB4RESET; | 50 | static int timeoutsb4reset = TIMEOUTSB4RESET; |
51 | static int ipi_reset_limit = IPI_RESET_LIMIT; | 51 | static int ipi_reset_limit = IPI_RESET_LIMIT; |
52 | static int complete_threshold = COMPLETE_THRESHOLD; | 52 | static int complete_threshold = COMPLETE_THRESHOLD; |
53 | static int congested_respns_us = CONGESTED_RESPONSE_US; | 53 | static int congested_respns_us = CONGESTED_RESPONSE_US; |
54 | static int congested_reps = CONGESTED_REPS; | 54 | static int congested_reps = CONGESTED_REPS; |
55 | static int congested_period = CONGESTED_PERIOD; | 55 | static int disabled_period = DISABLED_PERIOD; |
56 | 56 | ||
57 | static struct tunables tunables[] = { | 57 | static struct tunables tunables[] = { |
58 | {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ | 58 | {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ |
@@ -63,7 +63,8 @@ static struct tunables tunables[] = { | |||
63 | {&complete_threshold, COMPLETE_THRESHOLD}, | 63 | {&complete_threshold, COMPLETE_THRESHOLD}, |
64 | {&congested_respns_us, CONGESTED_RESPONSE_US}, | 64 | {&congested_respns_us, CONGESTED_RESPONSE_US}, |
65 | {&congested_reps, CONGESTED_REPS}, | 65 | {&congested_reps, CONGESTED_REPS}, |
66 | {&congested_period, CONGESTED_PERIOD} | 66 | {&disabled_period, DISABLED_PERIOD}, |
67 | {&giveup_limit, GIVEUP_LIMIT} | ||
67 | }; | 68 | }; |
68 | 69 | ||
69 | static struct dentry *tunables_dir; | 70 | static struct dentry *tunables_dir; |
@@ -120,6 +121,40 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats); | |||
120 | static DEFINE_PER_CPU(struct bau_control, bau_control); | 121 | static DEFINE_PER_CPU(struct bau_control, bau_control); |
121 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | 122 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); |
122 | 123 | ||
124 | static void | ||
125 | set_bau_on(void) | ||
126 | { | ||
127 | int cpu; | ||
128 | struct bau_control *bcp; | ||
129 | |||
130 | if (nobau_perm) { | ||
131 | pr_info("BAU not initialized; cannot be turned on\n"); | ||
132 | return; | ||
133 | } | ||
134 | nobau = 0; | ||
135 | for_each_present_cpu(cpu) { | ||
136 | bcp = &per_cpu(bau_control, cpu); | ||
137 | bcp->nobau = 0; | ||
138 | } | ||
139 | pr_info("BAU turned on\n"); | ||
140 | return; | ||
141 | } | ||
142 | |||
143 | static void | ||
144 | set_bau_off(void) | ||
145 | { | ||
146 | int cpu; | ||
147 | struct bau_control *bcp; | ||
148 | |||
149 | nobau = 1; | ||
150 | for_each_present_cpu(cpu) { | ||
151 | bcp = &per_cpu(bau_control, cpu); | ||
152 | bcp->nobau = 1; | ||
153 | } | ||
154 | pr_info("BAU turned off\n"); | ||
155 | return; | ||
156 | } | ||
157 | |||
123 | /* | 158 | /* |
124 | * Determine the first node on a uvhub. 'Nodes' are used for kernel | 159 | * Determine the first node on a uvhub. 'Nodes' are used for kernel |
125 | * memory allocation. | 160 | * memory allocation. |
@@ -278,7 +313,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, | |||
278 | * Both sockets dump their completed count total into | 313 | * Both sockets dump their completed count total into |
279 | * the message's count. | 314 | * the message's count. |
280 | */ | 315 | */ |
281 | smaster->socket_acknowledge_count[mdp->msg_slot] = 0; | 316 | *sp = 0; |
282 | asp = (struct atomic_short *)&msg->acknowledge_count; | 317 | asp = (struct atomic_short *)&msg->acknowledge_count; |
283 | msg_ack_count = atom_asr(socket_ack_count, asp); | 318 | msg_ack_count = atom_asr(socket_ack_count, asp); |
284 | 319 | ||
@@ -491,16 +526,15 @@ static int uv1_wait_completion(struct bau_desc *bau_desc, | |||
491 | } | 526 | } |
492 | 527 | ||
493 | /* | 528 | /* |
494 | * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register. | 529 | * UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register. |
530 | * But not currently used. | ||
495 | */ | 531 | */ |
496 | static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc) | 532 | static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc) |
497 | { | 533 | { |
498 | unsigned long descriptor_status; | 534 | unsigned long descriptor_status; |
499 | unsigned long descriptor_status2; | ||
500 | 535 | ||
501 | descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK); | 536 | descriptor_status = |
502 | descriptor_status2 = (read_mmr_uv2_status() >> desc) & 0x1UL; | 537 | ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1; |
503 | descriptor_status = (descriptor_status << 1) | descriptor_status2; | ||
504 | return descriptor_status; | 538 | return descriptor_status; |
505 | } | 539 | } |
506 | 540 | ||
@@ -531,87 +565,11 @@ int normal_busy(struct bau_control *bcp) | |||
531 | */ | 565 | */ |
532 | int handle_uv2_busy(struct bau_control *bcp) | 566 | int handle_uv2_busy(struct bau_control *bcp) |
533 | { | 567 | { |
534 | int busy_one = bcp->using_desc; | ||
535 | int normal = bcp->uvhub_cpu; | ||
536 | int selected = -1; | ||
537 | int i; | ||
538 | unsigned long descriptor_status; | ||
539 | unsigned long status; | ||
540 | int mmr_offset; | ||
541 | struct bau_desc *bau_desc_old; | ||
542 | struct bau_desc *bau_desc_new; | ||
543 | struct bau_control *hmaster = bcp->uvhub_master; | ||
544 | struct ptc_stats *stat = bcp->statp; | 568 | struct ptc_stats *stat = bcp->statp; |
545 | cycles_t ttm; | ||
546 | 569 | ||
547 | stat->s_uv2_wars++; | 570 | stat->s_uv2_wars++; |
548 | spin_lock(&hmaster->uvhub_lock); | 571 | bcp->busy = 1; |
549 | /* try for the original first */ | 572 | return FLUSH_GIVEUP; |
550 | if (busy_one != normal) { | ||
551 | if (!normal_busy(bcp)) | ||
552 | selected = normal; | ||
553 | } | ||
554 | if (selected < 0) { | ||
555 | /* can't use the normal, select an alternate */ | ||
556 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; | ||
557 | descriptor_status = read_lmmr(mmr_offset); | ||
558 | |||
559 | /* scan available descriptors 32-63 */ | ||
560 | for (i = 0; i < UV_CPUS_PER_AS; i++) { | ||
561 | if ((hmaster->inuse_map & (1 << i)) == 0) { | ||
562 | status = ((descriptor_status >> | ||
563 | (i * UV_ACT_STATUS_SIZE)) & | ||
564 | UV_ACT_STATUS_MASK) << 1; | ||
565 | if (status != UV2H_DESC_BUSY) { | ||
566 | selected = i + UV_CPUS_PER_AS; | ||
567 | break; | ||
568 | } | ||
569 | } | ||
570 | } | ||
571 | } | ||
572 | |||
573 | if (busy_one != normal) | ||
574 | /* mark the busy alternate as not in-use */ | ||
575 | hmaster->inuse_map &= ~(1 << (busy_one - UV_CPUS_PER_AS)); | ||
576 | |||
577 | if (selected >= 0) { | ||
578 | /* switch to the selected descriptor */ | ||
579 | if (selected != normal) { | ||
580 | /* set the selected alternate as in-use */ | ||
581 | hmaster->inuse_map |= | ||
582 | (1 << (selected - UV_CPUS_PER_AS)); | ||
583 | if (selected > stat->s_uv2_wars_hw) | ||
584 | stat->s_uv2_wars_hw = selected; | ||
585 | } | ||
586 | bau_desc_old = bcp->descriptor_base; | ||
587 | bau_desc_old += (ITEMS_PER_DESC * busy_one); | ||
588 | bcp->using_desc = selected; | ||
589 | bau_desc_new = bcp->descriptor_base; | ||
590 | bau_desc_new += (ITEMS_PER_DESC * selected); | ||
591 | *bau_desc_new = *bau_desc_old; | ||
592 | } else { | ||
593 | /* | ||
594 | * All are busy. Wait for the normal one for this cpu to | ||
595 | * free up. | ||
596 | */ | ||
597 | stat->s_uv2_war_waits++; | ||
598 | spin_unlock(&hmaster->uvhub_lock); | ||
599 | ttm = get_cycles(); | ||
600 | do { | ||
601 | cpu_relax(); | ||
602 | } while (normal_busy(bcp)); | ||
603 | spin_lock(&hmaster->uvhub_lock); | ||
604 | /* switch to the original descriptor */ | ||
605 | bcp->using_desc = normal; | ||
606 | bau_desc_old = bcp->descriptor_base; | ||
607 | bau_desc_old += (ITEMS_PER_DESC * bcp->using_desc); | ||
608 | bcp->using_desc = (ITEMS_PER_DESC * normal); | ||
609 | bau_desc_new = bcp->descriptor_base; | ||
610 | bau_desc_new += (ITEMS_PER_DESC * normal); | ||
611 | *bau_desc_new = *bau_desc_old; /* copy the entire descriptor */ | ||
612 | } | ||
613 | spin_unlock(&hmaster->uvhub_lock); | ||
614 | return FLUSH_RETRY_BUSYBUG; | ||
615 | } | 573 | } |
616 | 574 | ||
617 | static int uv2_wait_completion(struct bau_desc *bau_desc, | 575 | static int uv2_wait_completion(struct bau_desc *bau_desc, |
@@ -620,7 +578,7 @@ static int uv2_wait_completion(struct bau_desc *bau_desc, | |||
620 | { | 578 | { |
621 | unsigned long descriptor_stat; | 579 | unsigned long descriptor_stat; |
622 | cycles_t ttm; | 580 | cycles_t ttm; |
623 | int desc = bcp->using_desc; | 581 | int desc = bcp->uvhub_cpu; |
624 | long busy_reps = 0; | 582 | long busy_reps = 0; |
625 | struct ptc_stats *stat = bcp->statp; | 583 | struct ptc_stats *stat = bcp->statp; |
626 | 584 | ||
@@ -628,24 +586,38 @@ static int uv2_wait_completion(struct bau_desc *bau_desc, | |||
628 | 586 | ||
629 | /* spin on the status MMR, waiting for it to go idle */ | 587 | /* spin on the status MMR, waiting for it to go idle */ |
630 | while (descriptor_stat != UV2H_DESC_IDLE) { | 588 | while (descriptor_stat != UV2H_DESC_IDLE) { |
631 | /* | 589 | if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) { |
632 | * Our software ack messages may be blocked because | 590 | /* |
633 | * there are no swack resources available. As long | 591 | * A h/w bug on the destination side may |
634 | * as none of them has timed out hardware will NACK | 592 | * have prevented the message being marked |
635 | * our message and its state will stay IDLE. | 593 | * pending, thus it doesn't get replied to |
636 | */ | 594 | * and gets continually nacked until it times |
637 | if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) || | 595 | * out with a SOURCE_TIMEOUT. |
638 | (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) { | 596 | */ |
639 | stat->s_stimeout++; | 597 | stat->s_stimeout++; |
640 | return FLUSH_GIVEUP; | 598 | return FLUSH_GIVEUP; |
641 | } else if (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) { | ||
642 | stat->s_strongnacks++; | ||
643 | bcp->conseccompletes = 0; | ||
644 | return FLUSH_GIVEUP; | ||
645 | } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { | 599 | } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { |
600 | ttm = get_cycles(); | ||
601 | |||
602 | /* | ||
603 | * Our retries may be blocked by all destination | ||
604 | * swack resources being consumed, and a timeout | ||
605 | * pending. In that case hardware returns the | ||
606 | * ERROR that looks like a destination timeout. | ||
607 | * Without using the extended status we have to | ||
608 | * deduce from the short time that this was a | ||
609 | * strong nack. | ||
610 | */ | ||
611 | if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { | ||
612 | bcp->conseccompletes = 0; | ||
613 | stat->s_plugged++; | ||
614 | /* FLUSH_RETRY_PLUGGED causes hang on boot */ | ||
615 | return FLUSH_GIVEUP; | ||
616 | } | ||
646 | stat->s_dtimeout++; | 617 | stat->s_dtimeout++; |
647 | bcp->conseccompletes = 0; | 618 | bcp->conseccompletes = 0; |
648 | return FLUSH_RETRY_TIMEOUT; | 619 | /* FLUSH_RETRY_TIMEOUT causes hang on boot */ |
620 | return FLUSH_GIVEUP; | ||
649 | } else { | 621 | } else { |
650 | busy_reps++; | 622 | busy_reps++; |
651 | if (busy_reps > 1000000) { | 623 | if (busy_reps > 1000000) { |
@@ -653,9 +625,8 @@ static int uv2_wait_completion(struct bau_desc *bau_desc, | |||
653 | busy_reps = 0; | 625 | busy_reps = 0; |
654 | ttm = get_cycles(); | 626 | ttm = get_cycles(); |
655 | if ((ttm - bcp->send_message) > | 627 | if ((ttm - bcp->send_message) > |
656 | (bcp->clocks_per_100_usec)) { | 628 | bcp->timeout_interval) |
657 | return handle_uv2_busy(bcp); | 629 | return handle_uv2_busy(bcp); |
658 | } | ||
659 | } | 630 | } |
660 | /* | 631 | /* |
661 | * descriptor_stat is still BUSY | 632 | * descriptor_stat is still BUSY |
@@ -679,7 +650,7 @@ static int wait_completion(struct bau_desc *bau_desc, | |||
679 | { | 650 | { |
680 | int right_shift; | 651 | int right_shift; |
681 | unsigned long mmr_offset; | 652 | unsigned long mmr_offset; |
682 | int desc = bcp->using_desc; | 653 | int desc = bcp->uvhub_cpu; |
683 | 654 | ||
684 | if (desc < UV_CPUS_PER_AS) { | 655 | if (desc < UV_CPUS_PER_AS) { |
685 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; | 656 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; |
@@ -758,33 +729,31 @@ static void destination_timeout(struct bau_desc *bau_desc, | |||
758 | } | 729 | } |
759 | 730 | ||
760 | /* | 731 | /* |
761 | * Completions are taking a very long time due to a congested numalink | 732 | * Stop all cpus on a uvhub from using the BAU for a period of time. |
762 | * network. | 733 | * This is reversed by check_enable. |
763 | */ | 734 | */ |
764 | static void disable_for_congestion(struct bau_control *bcp, | 735 | static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) |
765 | struct ptc_stats *stat) | ||
766 | { | 736 | { |
767 | /* let only one cpu do this disabling */ | 737 | int tcpu; |
768 | spin_lock(&disable_lock); | 738 | struct bau_control *tbcp; |
769 | 739 | struct bau_control *hmaster; | |
770 | if (!baudisabled && bcp->period_requests && | 740 | cycles_t tm1; |
771 | ((bcp->period_time / bcp->period_requests) > congested_cycles)) { | 741 | |
772 | int tcpu; | 742 | hmaster = bcp->uvhub_master; |
773 | struct bau_control *tbcp; | 743 | spin_lock(&hmaster->disable_lock); |
774 | /* it becomes this cpu's job to turn on the use of the | 744 | if (!bcp->baudisabled) { |
775 | BAU again */ | ||
776 | baudisabled = 1; | ||
777 | bcp->set_bau_off = 1; | ||
778 | bcp->set_bau_on_time = get_cycles(); | ||
779 | bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period); | ||
780 | stat->s_bau_disabled++; | 745 | stat->s_bau_disabled++; |
746 | tm1 = get_cycles(); | ||
781 | for_each_present_cpu(tcpu) { | 747 | for_each_present_cpu(tcpu) { |
782 | tbcp = &per_cpu(bau_control, tcpu); | 748 | tbcp = &per_cpu(bau_control, tcpu); |
783 | tbcp->baudisabled = 1; | 749 | if (tbcp->uvhub_master == hmaster) { |
750 | tbcp->baudisabled = 1; | ||
751 | tbcp->set_bau_on_time = | ||
752 | tm1 + bcp->disabled_period; | ||
753 | } | ||
784 | } | 754 | } |
785 | } | 755 | } |
786 | 756 | spin_unlock(&hmaster->disable_lock); | |
787 | spin_unlock(&disable_lock); | ||
788 | } | 757 | } |
789 | 758 | ||
790 | static void count_max_concurr(int stat, struct bau_control *bcp, | 759 | static void count_max_concurr(int stat, struct bau_control *bcp, |
@@ -815,16 +784,30 @@ static void record_send_stats(cycles_t time1, cycles_t time2, | |||
815 | bcp->period_requests++; | 784 | bcp->period_requests++; |
816 | bcp->period_time += elapsed; | 785 | bcp->period_time += elapsed; |
817 | if ((elapsed > congested_cycles) && | 786 | if ((elapsed > congested_cycles) && |
818 | (bcp->period_requests > bcp->cong_reps)) | 787 | (bcp->period_requests > bcp->cong_reps) && |
819 | disable_for_congestion(bcp, stat); | 788 | ((bcp->period_time / bcp->period_requests) > |
789 | congested_cycles)) { | ||
790 | stat->s_congested++; | ||
791 | disable_for_period(bcp, stat); | ||
792 | } | ||
820 | } | 793 | } |
821 | } else | 794 | } else |
822 | stat->s_requestor--; | 795 | stat->s_requestor--; |
823 | 796 | ||
824 | if (completion_status == FLUSH_COMPLETE && try > 1) | 797 | if (completion_status == FLUSH_COMPLETE && try > 1) |
825 | stat->s_retriesok++; | 798 | stat->s_retriesok++; |
826 | else if (completion_status == FLUSH_GIVEUP) | 799 | else if (completion_status == FLUSH_GIVEUP) { |
827 | stat->s_giveup++; | 800 | stat->s_giveup++; |
801 | if (get_cycles() > bcp->period_end) | ||
802 | bcp->period_giveups = 0; | ||
803 | bcp->period_giveups++; | ||
804 | if (bcp->period_giveups == 1) | ||
805 | bcp->period_end = get_cycles() + bcp->disabled_period; | ||
806 | if (bcp->period_giveups > bcp->giveup_limit) { | ||
807 | disable_for_period(bcp, stat); | ||
808 | stat->s_giveuplimit++; | ||
809 | } | ||
810 | } | ||
828 | } | 811 | } |
829 | 812 | ||
830 | /* | 813 | /* |
@@ -868,7 +851,8 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc, | |||
868 | * Returns 1 if it gives up entirely and the original cpu mask is to be | 851 | * Returns 1 if it gives up entirely and the original cpu mask is to be |
869 | * returned to the kernel. | 852 | * returned to the kernel. |
870 | */ | 853 | */ |
871 | int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) | 854 | int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp, |
855 | struct bau_desc *bau_desc) | ||
872 | { | 856 | { |
873 | int seq_number = 0; | 857 | int seq_number = 0; |
874 | int completion_stat = 0; | 858 | int completion_stat = 0; |
@@ -881,24 +865,23 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) | |||
881 | struct bau_control *hmaster = bcp->uvhub_master; | 865 | struct bau_control *hmaster = bcp->uvhub_master; |
882 | struct uv1_bau_msg_header *uv1_hdr = NULL; | 866 | struct uv1_bau_msg_header *uv1_hdr = NULL; |
883 | struct uv2_bau_msg_header *uv2_hdr = NULL; | 867 | struct uv2_bau_msg_header *uv2_hdr = NULL; |
884 | struct bau_desc *bau_desc; | ||
885 | 868 | ||
886 | if (bcp->uvhub_version == 1) | 869 | if (bcp->uvhub_version == 1) { |
870 | uv1 = 1; | ||
887 | uv1_throttle(hmaster, stat); | 871 | uv1_throttle(hmaster, stat); |
872 | } | ||
888 | 873 | ||
889 | while (hmaster->uvhub_quiesce) | 874 | while (hmaster->uvhub_quiesce) |
890 | cpu_relax(); | 875 | cpu_relax(); |
891 | 876 | ||
892 | time1 = get_cycles(); | 877 | time1 = get_cycles(); |
878 | if (uv1) | ||
879 | uv1_hdr = &bau_desc->header.uv1_hdr; | ||
880 | else | ||
881 | uv2_hdr = &bau_desc->header.uv2_hdr; | ||
882 | |||
893 | do { | 883 | do { |
894 | bau_desc = bcp->descriptor_base; | 884 | if (try == 0) { |
895 | bau_desc += (ITEMS_PER_DESC * bcp->using_desc); | ||
896 | if (bcp->uvhub_version == 1) { | ||
897 | uv1 = 1; | ||
898 | uv1_hdr = &bau_desc->header.uv1_hdr; | ||
899 | } else | ||
900 | uv2_hdr = &bau_desc->header.uv2_hdr; | ||
901 | if ((try == 0) || (completion_stat == FLUSH_RETRY_BUSYBUG)) { | ||
902 | if (uv1) | 885 | if (uv1) |
903 | uv1_hdr->msg_type = MSG_REGULAR; | 886 | uv1_hdr->msg_type = MSG_REGULAR; |
904 | else | 887 | else |
@@ -916,25 +899,24 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) | |||
916 | uv1_hdr->sequence = seq_number; | 899 | uv1_hdr->sequence = seq_number; |
917 | else | 900 | else |
918 | uv2_hdr->sequence = seq_number; | 901 | uv2_hdr->sequence = seq_number; |
919 | index = (1UL << AS_PUSH_SHIFT) | bcp->using_desc; | 902 | index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu; |
920 | bcp->send_message = get_cycles(); | 903 | bcp->send_message = get_cycles(); |
921 | 904 | ||
922 | write_mmr_activation(index); | 905 | write_mmr_activation(index); |
923 | 906 | ||
924 | try++; | 907 | try++; |
925 | completion_stat = wait_completion(bau_desc, bcp, try); | 908 | completion_stat = wait_completion(bau_desc, bcp, try); |
926 | /* UV2: wait_completion() may change the bcp->using_desc */ | ||
927 | 909 | ||
928 | handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); | 910 | handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); |
929 | 911 | ||
930 | if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { | 912 | if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { |
931 | bcp->ipi_attempts = 0; | 913 | bcp->ipi_attempts = 0; |
914 | stat->s_overipilimit++; | ||
932 | completion_stat = FLUSH_GIVEUP; | 915 | completion_stat = FLUSH_GIVEUP; |
933 | break; | 916 | break; |
934 | } | 917 | } |
935 | cpu_relax(); | 918 | cpu_relax(); |
936 | } while ((completion_stat == FLUSH_RETRY_PLUGGED) || | 919 | } while ((completion_stat == FLUSH_RETRY_PLUGGED) || |
937 | (completion_stat == FLUSH_RETRY_BUSYBUG) || | ||
938 | (completion_stat == FLUSH_RETRY_TIMEOUT)); | 920 | (completion_stat == FLUSH_RETRY_TIMEOUT)); |
939 | 921 | ||
940 | time2 = get_cycles(); | 922 | time2 = get_cycles(); |
@@ -955,28 +937,33 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) | |||
955 | } | 937 | } |
956 | 938 | ||
957 | /* | 939 | /* |
958 | * The BAU is disabled. When the disabled time period has expired, the cpu | 940 | * The BAU is disabled for this uvhub. When the disabled time period has |
959 | * that disabled it must re-enable it. | 941 | * expired re-enable it. |
960 | * Return 0 if it is re-enabled for all cpus. | 942 | * Return 0 if it is re-enabled for all cpus on this uvhub. |
961 | */ | 943 | */ |
962 | static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) | 944 | static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) |
963 | { | 945 | { |
964 | int tcpu; | 946 | int tcpu; |
965 | struct bau_control *tbcp; | 947 | struct bau_control *tbcp; |
948 | struct bau_control *hmaster; | ||
966 | 949 | ||
967 | if (bcp->set_bau_off) { | 950 | hmaster = bcp->uvhub_master; |
968 | if (get_cycles() >= bcp->set_bau_on_time) { | 951 | spin_lock(&hmaster->disable_lock); |
969 | stat->s_bau_reenabled++; | 952 | if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { |
970 | baudisabled = 0; | 953 | stat->s_bau_reenabled++; |
971 | for_each_present_cpu(tcpu) { | 954 | for_each_present_cpu(tcpu) { |
972 | tbcp = &per_cpu(bau_control, tcpu); | 955 | tbcp = &per_cpu(bau_control, tcpu); |
956 | if (tbcp->uvhub_master == hmaster) { | ||
973 | tbcp->baudisabled = 0; | 957 | tbcp->baudisabled = 0; |
974 | tbcp->period_requests = 0; | 958 | tbcp->period_requests = 0; |
975 | tbcp->period_time = 0; | 959 | tbcp->period_time = 0; |
960 | tbcp->period_giveups = 0; | ||
976 | } | 961 | } |
977 | return 0; | ||
978 | } | 962 | } |
963 | spin_unlock(&hmaster->disable_lock); | ||
964 | return 0; | ||
979 | } | 965 | } |
966 | spin_unlock(&hmaster->disable_lock); | ||
980 | return -1; | 967 | return -1; |
981 | } | 968 | } |
982 | 969 | ||
@@ -1068,8 +1055,8 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp, | |||
1068 | * done. The returned pointer is valid till preemption is re-enabled. | 1055 | * done. The returned pointer is valid till preemption is re-enabled. |
1069 | */ | 1056 | */ |
1070 | const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | 1057 | const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, |
1071 | struct mm_struct *mm, unsigned long va, | 1058 | struct mm_struct *mm, unsigned long start, |
1072 | unsigned int cpu) | 1059 | unsigned end, unsigned int cpu) |
1073 | { | 1060 | { |
1074 | int locals = 0; | 1061 | int locals = 0; |
1075 | int remotes = 0; | 1062 | int remotes = 0; |
@@ -1078,18 +1065,32 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
1078 | struct cpumask *flush_mask; | 1065 | struct cpumask *flush_mask; |
1079 | struct ptc_stats *stat; | 1066 | struct ptc_stats *stat; |
1080 | struct bau_control *bcp; | 1067 | struct bau_control *bcp; |
1081 | 1068 | unsigned long descriptor_status; | |
1082 | /* kernel was booted 'nobau' */ | 1069 | unsigned long status; |
1083 | if (nobau) | ||
1084 | return cpumask; | ||
1085 | 1070 | ||
1086 | bcp = &per_cpu(bau_control, cpu); | 1071 | bcp = &per_cpu(bau_control, cpu); |
1087 | stat = bcp->statp; | 1072 | stat = bcp->statp; |
1073 | stat->s_enters++; | ||
1074 | |||
1075 | if (bcp->nobau) | ||
1076 | return cpumask; | ||
1077 | |||
1078 | if (bcp->busy) { | ||
1079 | descriptor_status = | ||
1080 | read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0); | ||
1081 | status = ((descriptor_status >> (bcp->uvhub_cpu * | ||
1082 | UV_ACT_STATUS_SIZE)) & UV_ACT_STATUS_MASK) << 1; | ||
1083 | if (status == UV2H_DESC_BUSY) | ||
1084 | return cpumask; | ||
1085 | bcp->busy = 0; | ||
1086 | } | ||
1088 | 1087 | ||
1089 | /* bau was disabled due to slow response */ | 1088 | /* bau was disabled due to slow response */ |
1090 | if (bcp->baudisabled) { | 1089 | if (bcp->baudisabled) { |
1091 | if (check_enable(bcp, stat)) | 1090 | if (check_enable(bcp, stat)) { |
1091 | stat->s_ipifordisabled++; | ||
1092 | return cpumask; | 1092 | return cpumask; |
1093 | } | ||
1093 | } | 1094 | } |
1094 | 1095 | ||
1095 | /* | 1096 | /* |
@@ -1105,38 +1106,40 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
1105 | stat->s_ntargself++; | 1106 | stat->s_ntargself++; |
1106 | 1107 | ||
1107 | bau_desc = bcp->descriptor_base; | 1108 | bau_desc = bcp->descriptor_base; |
1108 | bau_desc += (ITEMS_PER_DESC * bcp->using_desc); | 1109 | bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu); |
1109 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 1110 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
1110 | if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) | 1111 | if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) |
1111 | return NULL; | 1112 | return NULL; |
1112 | 1113 | ||
1113 | record_send_statistics(stat, locals, hubs, remotes, bau_desc); | 1114 | record_send_statistics(stat, locals, hubs, remotes, bau_desc); |
1114 | 1115 | ||
1115 | bau_desc->payload.address = va; | 1116 | bau_desc->payload.address = start; |
1116 | bau_desc->payload.sending_cpu = cpu; | 1117 | bau_desc->payload.sending_cpu = cpu; |
1117 | /* | 1118 | /* |
1118 | * uv_flush_send_and_wait returns 0 if all cpu's were messaged, | 1119 | * uv_flush_send_and_wait returns 0 if all cpu's were messaged, |
1119 | * or 1 if it gave up and the original cpumask should be returned. | 1120 | * or 1 if it gave up and the original cpumask should be returned. |
1120 | */ | 1121 | */ |
1121 | if (!uv_flush_send_and_wait(flush_mask, bcp)) | 1122 | if (!uv_flush_send_and_wait(flush_mask, bcp, bau_desc)) |
1122 | return NULL; | 1123 | return NULL; |
1123 | else | 1124 | else |
1124 | return cpumask; | 1125 | return cpumask; |
1125 | } | 1126 | } |
1126 | 1127 | ||
1127 | /* | 1128 | /* |
1128 | * Search the message queue for any 'other' message with the same software | 1129 | * Search the message queue for any 'other' unprocessed message with the |
1129 | * acknowledge resource bit vector. | 1130 | * same software acknowledge resource bit vector as the 'msg' message. |
1130 | */ | 1131 | */ |
1131 | struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, | 1132 | struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, |
1132 | struct bau_control *bcp, unsigned char swack_vec) | 1133 | struct bau_control *bcp) |
1133 | { | 1134 | { |
1134 | struct bau_pq_entry *msg_next = msg + 1; | 1135 | struct bau_pq_entry *msg_next = msg + 1; |
1136 | unsigned char swack_vec = msg->swack_vec; | ||
1135 | 1137 | ||
1136 | if (msg_next > bcp->queue_last) | 1138 | if (msg_next > bcp->queue_last) |
1137 | msg_next = bcp->queue_first; | 1139 | msg_next = bcp->queue_first; |
1138 | while ((msg_next->swack_vec != 0) && (msg_next != msg)) { | 1140 | while (msg_next != msg) { |
1139 | if (msg_next->swack_vec == swack_vec) | 1141 | if ((msg_next->canceled == 0) && (msg_next->replied_to == 0) && |
1142 | (msg_next->swack_vec == swack_vec)) | ||
1140 | return msg_next; | 1143 | return msg_next; |
1141 | msg_next++; | 1144 | msg_next++; |
1142 | if (msg_next > bcp->queue_last) | 1145 | if (msg_next > bcp->queue_last) |
@@ -1165,32 +1168,30 @@ void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp) | |||
1165 | * This message was assigned a swack resource, but no | 1168 | * This message was assigned a swack resource, but no |
1166 | * reserved acknowlegment is pending. | 1169 | * reserved acknowlegment is pending. |
1167 | * The bug has prevented this message from setting the MMR. | 1170 | * The bug has prevented this message from setting the MMR. |
1168 | * And no other message has used the same sw_ack resource. | ||
1169 | * Do the requested shootdown but do not reply to the msg. | ||
1170 | * (the 0 means make no acknowledge) | ||
1171 | */ | 1171 | */ |
1172 | bau_process_message(mdp, bcp, 0); | ||
1173 | return; | ||
1174 | } | ||
1175 | |||
1176 | /* | ||
1177 | * Some message has set the MMR 'pending' bit; it might have been | ||
1178 | * another message. Look for that message. | ||
1179 | */ | ||
1180 | other_msg = find_another_by_swack(msg, bcp, msg->swack_vec); | ||
1181 | if (other_msg) { | ||
1182 | /* There is another. Do not ack the current one. */ | ||
1183 | bau_process_message(mdp, bcp, 0); | ||
1184 | /* | 1172 | /* |
1185 | * Let the natural processing of that message acknowledge | 1173 | * Some message has set the MMR 'pending' bit; it might have |
1186 | * it. Don't get the processing of sw_ack's out of order. | 1174 | * been another message. Look for that message. |
1187 | */ | 1175 | */ |
1188 | return; | 1176 | other_msg = find_another_by_swack(msg, bcp); |
1177 | if (other_msg) { | ||
1178 | /* | ||
1179 | * There is another. Process this one but do not | ||
1180 | * ack it. | ||
1181 | */ | ||
1182 | bau_process_message(mdp, bcp, 0); | ||
1183 | /* | ||
1184 | * Let the natural processing of that other message | ||
1185 | * acknowledge it. Don't get the processing of sw_ack's | ||
1186 | * out of order. | ||
1187 | */ | ||
1188 | return; | ||
1189 | } | ||
1189 | } | 1190 | } |
1190 | 1191 | ||
1191 | /* | 1192 | /* |
1192 | * There is no other message using this sw_ack, so it is safe to | 1193 | * Either the MMR shows this one pending a reply or there is no |
1193 | * acknowledge it. | 1194 | * other message using this sw_ack, so it is safe to acknowledge it. |
1194 | */ | 1195 | */ |
1195 | bau_process_message(mdp, bcp, 1); | 1196 | bau_process_message(mdp, bcp, 1); |
1196 | 1197 | ||
@@ -1295,7 +1296,8 @@ static void __init enable_timeouts(void) | |||
1295 | */ | 1296 | */ |
1296 | mmr_image |= (1L << SOFTACK_MSHIFT); | 1297 | mmr_image |= (1L << SOFTACK_MSHIFT); |
1297 | if (is_uv2_hub()) { | 1298 | if (is_uv2_hub()) { |
1298 | mmr_image |= (1L << UV2_EXT_SHFT); | 1299 | /* hw bug workaround; do not use extended status */ |
1300 | mmr_image &= ~(1L << UV2_EXT_SHFT); | ||
1299 | } | 1301 | } |
1300 | write_mmr_misc_control(pnode, mmr_image); | 1302 | write_mmr_misc_control(pnode, mmr_image); |
1301 | } | 1303 | } |
@@ -1338,29 +1340,34 @@ static inline unsigned long long usec_2_cycles(unsigned long microsec) | |||
1338 | static int ptc_seq_show(struct seq_file *file, void *data) | 1340 | static int ptc_seq_show(struct seq_file *file, void *data) |
1339 | { | 1341 | { |
1340 | struct ptc_stats *stat; | 1342 | struct ptc_stats *stat; |
1343 | struct bau_control *bcp; | ||
1341 | int cpu; | 1344 | int cpu; |
1342 | 1345 | ||
1343 | cpu = *(loff_t *)data; | 1346 | cpu = *(loff_t *)data; |
1344 | if (!cpu) { | 1347 | if (!cpu) { |
1345 | seq_printf(file, | 1348 | seq_printf(file, |
1346 | "# cpu sent stime self locals remotes ncpus localhub "); | 1349 | "# cpu bauoff sent stime self locals remotes ncpus localhub "); |
1347 | seq_printf(file, | 1350 | seq_printf(file, |
1348 | "remotehub numuvhubs numuvhubs16 numuvhubs8 "); | 1351 | "remotehub numuvhubs numuvhubs16 numuvhubs8 "); |
1349 | seq_printf(file, | 1352 | seq_printf(file, |
1350 | "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok "); | 1353 | "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries "); |
1354 | seq_printf(file, | ||
1355 | "rok resetp resett giveup sto bz throt disable "); | ||
1351 | seq_printf(file, | 1356 | seq_printf(file, |
1352 | "resetp resett giveup sto bz throt swack recv rtime "); | 1357 | "enable wars warshw warwaits enters ipidis plugged "); |
1353 | seq_printf(file, | 1358 | seq_printf(file, |
1354 | "all one mult none retry canc nocan reset rcan "); | 1359 | "ipiover glim cong swack recv rtime all one mult "); |
1355 | seq_printf(file, | 1360 | seq_printf(file, |
1356 | "disable enable wars warshw warwaits\n"); | 1361 | "none retry canc nocan reset rcan\n"); |
1357 | } | 1362 | } |
1358 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { | 1363 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { |
1359 | stat = &per_cpu(ptcstats, cpu); | 1364 | bcp = &per_cpu(bau_control, cpu); |
1365 | stat = bcp->statp; | ||
1360 | /* source side statistics */ | 1366 | /* source side statistics */ |
1361 | seq_printf(file, | 1367 | seq_printf(file, |
1362 | "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | 1368 | "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", |
1363 | cpu, stat->s_requestor, cycles_2_us(stat->s_time), | 1369 | cpu, bcp->nobau, stat->s_requestor, |
1370 | cycles_2_us(stat->s_time), | ||
1364 | stat->s_ntargself, stat->s_ntarglocals, | 1371 | stat->s_ntargself, stat->s_ntarglocals, |
1365 | stat->s_ntargremotes, stat->s_ntargcpu, | 1372 | stat->s_ntargremotes, stat->s_ntargcpu, |
1366 | stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, | 1373 | stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, |
@@ -1374,20 +1381,23 @@ static int ptc_seq_show(struct seq_file *file, void *data) | |||
1374 | stat->s_resets_plug, stat->s_resets_timeout, | 1381 | stat->s_resets_plug, stat->s_resets_timeout, |
1375 | stat->s_giveup, stat->s_stimeout, | 1382 | stat->s_giveup, stat->s_stimeout, |
1376 | stat->s_busy, stat->s_throttles); | 1383 | stat->s_busy, stat->s_throttles); |
1384 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | ||
1385 | stat->s_bau_disabled, stat->s_bau_reenabled, | ||
1386 | stat->s_uv2_wars, stat->s_uv2_wars_hw, | ||
1387 | stat->s_uv2_war_waits, stat->s_enters, | ||
1388 | stat->s_ipifordisabled, stat->s_plugged, | ||
1389 | stat->s_overipilimit, stat->s_giveuplimit, | ||
1390 | stat->s_congested); | ||
1377 | 1391 | ||
1378 | /* destination side statistics */ | 1392 | /* destination side statistics */ |
1379 | seq_printf(file, | 1393 | seq_printf(file, |
1380 | "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | 1394 | "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", |
1381 | read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), | 1395 | read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), |
1382 | stat->d_requestee, cycles_2_us(stat->d_time), | 1396 | stat->d_requestee, cycles_2_us(stat->d_time), |
1383 | stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, | 1397 | stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, |
1384 | stat->d_nomsg, stat->d_retries, stat->d_canceled, | 1398 | stat->d_nomsg, stat->d_retries, stat->d_canceled, |
1385 | stat->d_nocanceled, stat->d_resets, | 1399 | stat->d_nocanceled, stat->d_resets, |
1386 | stat->d_rcanceled); | 1400 | stat->d_rcanceled); |
1387 | seq_printf(file, "%ld %ld %ld %ld %ld\n", | ||
1388 | stat->s_bau_disabled, stat->s_bau_reenabled, | ||
1389 | stat->s_uv2_wars, stat->s_uv2_wars_hw, | ||
1390 | stat->s_uv2_war_waits); | ||
1391 | } | 1401 | } |
1392 | return 0; | 1402 | return 0; |
1393 | } | 1403 | } |
@@ -1401,13 +1411,14 @@ static ssize_t tunables_read(struct file *file, char __user *userbuf, | |||
1401 | char *buf; | 1411 | char *buf; |
1402 | int ret; | 1412 | int ret; |
1403 | 1413 | ||
1404 | buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", | 1414 | buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d %d\n", |
1405 | "max_concur plugged_delay plugsb4reset", | 1415 | "max_concur plugged_delay plugsb4reset timeoutsb4reset", |
1406 | "timeoutsb4reset ipi_reset_limit complete_threshold", | 1416 | "ipi_reset_limit complete_threshold congested_response_us", |
1407 | "congested_response_us congested_reps congested_period", | 1417 | "congested_reps disabled_period giveup_limit", |
1408 | max_concurr, plugged_delay, plugsb4reset, | 1418 | max_concurr, plugged_delay, plugsb4reset, |
1409 | timeoutsb4reset, ipi_reset_limit, complete_threshold, | 1419 | timeoutsb4reset, ipi_reset_limit, complete_threshold, |
1410 | congested_respns_us, congested_reps, congested_period); | 1420 | congested_respns_us, congested_reps, disabled_period, |
1421 | giveup_limit); | ||
1411 | 1422 | ||
1412 | if (!buf) | 1423 | if (!buf) |
1413 | return -ENOMEM; | 1424 | return -ENOMEM; |
@@ -1438,6 +1449,14 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user, | |||
1438 | return -EFAULT; | 1449 | return -EFAULT; |
1439 | optstr[count - 1] = '\0'; | 1450 | optstr[count - 1] = '\0'; |
1440 | 1451 | ||
1452 | if (!strcmp(optstr, "on")) { | ||
1453 | set_bau_on(); | ||
1454 | return count; | ||
1455 | } else if (!strcmp(optstr, "off")) { | ||
1456 | set_bau_off(); | ||
1457 | return count; | ||
1458 | } | ||
1459 | |||
1441 | if (strict_strtol(optstr, 10, &input_arg) < 0) { | 1460 | if (strict_strtol(optstr, 10, &input_arg) < 0) { |
1442 | printk(KERN_DEBUG "%s is invalid\n", optstr); | 1461 | printk(KERN_DEBUG "%s is invalid\n", optstr); |
1443 | return -EINVAL; | 1462 | return -EINVAL; |
@@ -1570,7 +1589,8 @@ static ssize_t tunables_write(struct file *file, const char __user *user, | |||
1570 | bcp->complete_threshold = complete_threshold; | 1589 | bcp->complete_threshold = complete_threshold; |
1571 | bcp->cong_response_us = congested_respns_us; | 1590 | bcp->cong_response_us = congested_respns_us; |
1572 | bcp->cong_reps = congested_reps; | 1591 | bcp->cong_reps = congested_reps; |
1573 | bcp->cong_period = congested_period; | 1592 | bcp->disabled_period = sec_2_cycles(disabled_period); |
1593 | bcp->giveup_limit = giveup_limit; | ||
1574 | } | 1594 | } |
1575 | return count; | 1595 | return count; |
1576 | } | 1596 | } |
@@ -1699,6 +1719,10 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode) | |||
1699 | * fairness chaining multilevel count replied_to | 1719 | * fairness chaining multilevel count replied_to |
1700 | */ | 1720 | */ |
1701 | } else { | 1721 | } else { |
1722 | /* | ||
1723 | * BIOS uses legacy mode, but UV2 hardware always | ||
1724 | * uses native mode for selective broadcasts. | ||
1725 | */ | ||
1702 | uv2_hdr = &bd2->header.uv2_hdr; | 1726 | uv2_hdr = &bd2->header.uv2_hdr; |
1703 | uv2_hdr->swack_flag = 1; | 1727 | uv2_hdr->swack_flag = 1; |
1704 | uv2_hdr->base_dest_nasid = | 1728 | uv2_hdr->base_dest_nasid = |
@@ -1811,8 +1835,8 @@ static int calculate_destination_timeout(void) | |||
1811 | index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; | 1835 | index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; |
1812 | mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); | 1836 | mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); |
1813 | mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; | 1837 | mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; |
1814 | base = timeout_base_ns[index]; | 1838 | ts_ns = timeout_base_ns[index]; |
1815 | ts_ns = base * mult1 * mult2; | 1839 | ts_ns *= (mult1 * mult2); |
1816 | ret = ts_ns / 1000; | 1840 | ret = ts_ns / 1000; |
1817 | } else { | 1841 | } else { |
1818 | /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */ | 1842 | /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */ |
@@ -1836,6 +1860,8 @@ static void __init init_per_cpu_tunables(void) | |||
1836 | for_each_present_cpu(cpu) { | 1860 | for_each_present_cpu(cpu) { |
1837 | bcp = &per_cpu(bau_control, cpu); | 1861 | bcp = &per_cpu(bau_control, cpu); |
1838 | bcp->baudisabled = 0; | 1862 | bcp->baudisabled = 0; |
1863 | if (nobau) | ||
1864 | bcp->nobau = 1; | ||
1839 | bcp->statp = &per_cpu(ptcstats, cpu); | 1865 | bcp->statp = &per_cpu(ptcstats, cpu); |
1840 | /* time interval to catch a hardware stay-busy bug */ | 1866 | /* time interval to catch a hardware stay-busy bug */ |
1841 | bcp->timeout_interval = usec_2_cycles(2*timeout_us); | 1867 | bcp->timeout_interval = usec_2_cycles(2*timeout_us); |
@@ -1848,10 +1874,11 @@ static void __init init_per_cpu_tunables(void) | |||
1848 | bcp->complete_threshold = complete_threshold; | 1874 | bcp->complete_threshold = complete_threshold; |
1849 | bcp->cong_response_us = congested_respns_us; | 1875 | bcp->cong_response_us = congested_respns_us; |
1850 | bcp->cong_reps = congested_reps; | 1876 | bcp->cong_reps = congested_reps; |
1851 | bcp->cong_period = congested_period; | 1877 | bcp->disabled_period = sec_2_cycles(disabled_period); |
1852 | bcp->clocks_per_100_usec = usec_2_cycles(100); | 1878 | bcp->giveup_limit = giveup_limit; |
1853 | spin_lock_init(&bcp->queue_lock); | 1879 | spin_lock_init(&bcp->queue_lock); |
1854 | spin_lock_init(&bcp->uvhub_lock); | 1880 | spin_lock_init(&bcp->uvhub_lock); |
1881 | spin_lock_init(&bcp->disable_lock); | ||
1855 | } | 1882 | } |
1856 | } | 1883 | } |
1857 | 1884 | ||
@@ -1972,7 +1999,6 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp, | |||
1972 | } | 1999 | } |
1973 | bcp->uvhub_master = *hmasterp; | 2000 | bcp->uvhub_master = *hmasterp; |
1974 | bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; | 2001 | bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; |
1975 | bcp->using_desc = bcp->uvhub_cpu; | ||
1976 | if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { | 2002 | if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { |
1977 | printk(KERN_EMERG "%d cpus per uvhub invalid\n", | 2003 | printk(KERN_EMERG "%d cpus per uvhub invalid\n", |
1978 | bcp->uvhub_cpu); | 2004 | bcp->uvhub_cpu); |
@@ -2069,16 +2095,12 @@ static int __init uv_bau_init(void) | |||
2069 | if (!is_uv_system()) | 2095 | if (!is_uv_system()) |
2070 | return 0; | 2096 | return 0; |
2071 | 2097 | ||
2072 | if (nobau) | ||
2073 | return 0; | ||
2074 | |||
2075 | for_each_possible_cpu(cur_cpu) { | 2098 | for_each_possible_cpu(cur_cpu) { |
2076 | mask = &per_cpu(uv_flush_tlb_mask, cur_cpu); | 2099 | mask = &per_cpu(uv_flush_tlb_mask, cur_cpu); |
2077 | zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); | 2100 | zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); |
2078 | } | 2101 | } |
2079 | 2102 | ||
2080 | nuvhubs = uv_num_possible_blades(); | 2103 | nuvhubs = uv_num_possible_blades(); |
2081 | spin_lock_init(&disable_lock); | ||
2082 | congested_cycles = usec_2_cycles(congested_respns_us); | 2104 | congested_cycles = usec_2_cycles(congested_respns_us); |
2083 | 2105 | ||
2084 | uv_base_pnode = 0x7fffffff; | 2106 | uv_base_pnode = 0x7fffffff; |
@@ -2091,7 +2113,8 @@ static int __init uv_bau_init(void) | |||
2091 | enable_timeouts(); | 2113 | enable_timeouts(); |
2092 | 2114 | ||
2093 | if (init_per_cpu(nuvhubs, uv_base_pnode)) { | 2115 | if (init_per_cpu(nuvhubs, uv_base_pnode)) { |
2094 | nobau = 1; | 2116 | set_bau_off(); |
2117 | nobau_perm = 1; | ||
2095 | return 0; | 2118 | return 0; |
2096 | } | 2119 | } |
2097 | 2120 | ||
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index f25c2765a5c9..acf7752da952 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c | |||
@@ -135,6 +135,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
135 | unsigned long mmr_value; | 135 | unsigned long mmr_value; |
136 | struct uv_IO_APIC_route_entry *entry; | 136 | struct uv_IO_APIC_route_entry *entry; |
137 | int mmr_pnode, err; | 137 | int mmr_pnode, err; |
138 | unsigned int dest; | ||
138 | 139 | ||
139 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != | 140 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != |
140 | sizeof(unsigned long)); | 141 | sizeof(unsigned long)); |
@@ -143,6 +144,10 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
143 | if (err != 0) | 144 | if (err != 0) |
144 | return err; | 145 | return err; |
145 | 146 | ||
147 | err = apic->cpu_mask_to_apicid_and(eligible_cpu, eligible_cpu, &dest); | ||
148 | if (err != 0) | ||
149 | return err; | ||
150 | |||
146 | if (limit == UV_AFFINITY_CPU) | 151 | if (limit == UV_AFFINITY_CPU) |
147 | irq_set_status_flags(irq, IRQ_NO_BALANCING); | 152 | irq_set_status_flags(irq, IRQ_NO_BALANCING); |
148 | else | 153 | else |
@@ -159,7 +164,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
159 | entry->polarity = 0; | 164 | entry->polarity = 0; |
160 | entry->trigger = 0; | 165 | entry->trigger = 0; |
161 | entry->mask = 0; | 166 | entry->mask = 0; |
162 | entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); | 167 | entry->dest = dest; |
163 | 168 | ||
164 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | 169 | mmr_pnode = uv_blade_to_pnode(mmr_blade); |
165 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | 170 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); |
@@ -222,7 +227,7 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, | |||
222 | if (cfg->move_in_progress) | 227 | if (cfg->move_in_progress) |
223 | send_cleanup_vector(cfg); | 228 | send_cleanup_vector(cfg); |
224 | 229 | ||
225 | return 0; | 230 | return IRQ_SET_MASK_OK_NOCOPY; |
226 | } | 231 | } |
227 | 232 | ||
228 | /* | 233 | /* |
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 5b84a2d30888..b2d534cab25f 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile | |||
@@ -22,7 +22,7 @@ wakeup-objs += video-bios.o | |||
22 | realmode-y += header.o | 22 | realmode-y += header.o |
23 | realmode-y += trampoline_$(BITS).o | 23 | realmode-y += trampoline_$(BITS).o |
24 | realmode-y += stack.o | 24 | realmode-y += stack.o |
25 | realmode-$(CONFIG_X86_32) += reboot_32.o | 25 | realmode-y += reboot.o |
26 | realmode-$(CONFIG_ACPI_SLEEP) += $(wakeup-objs) | 26 | realmode-$(CONFIG_ACPI_SLEEP) += $(wakeup-objs) |
27 | 27 | ||
28 | targets += $(realmode-y) | 28 | targets += $(realmode-y) |
diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S index fadf48378ada..a28221d94e69 100644 --- a/arch/x86/realmode/rm/header.S +++ b/arch/x86/realmode/rm/header.S | |||
@@ -6,6 +6,7 @@ | |||
6 | 6 | ||
7 | #include <linux/linkage.h> | 7 | #include <linux/linkage.h> |
8 | #include <asm/page_types.h> | 8 | #include <asm/page_types.h> |
9 | #include <asm/segment.h> | ||
9 | 10 | ||
10 | #include "realmode.h" | 11 | #include "realmode.h" |
11 | 12 | ||
@@ -28,8 +29,9 @@ GLOBAL(real_mode_header) | |||
28 | .long pa_wakeup_header | 29 | .long pa_wakeup_header |
29 | #endif | 30 | #endif |
30 | /* APM/BIOS reboot */ | 31 | /* APM/BIOS reboot */ |
31 | #ifdef CONFIG_X86_32 | ||
32 | .long pa_machine_real_restart_asm | 32 | .long pa_machine_real_restart_asm |
33 | #ifdef CONFIG_X86_64 | ||
34 | .long __KERNEL32_CS | ||
33 | #endif | 35 | #endif |
34 | END(real_mode_header) | 36 | END(real_mode_header) |
35 | 37 | ||
diff --git a/arch/x86/realmode/rm/reboot_32.S b/arch/x86/realmode/rm/reboot.S index 114044876b3d..f932ea61d1c8 100644 --- a/arch/x86/realmode/rm/reboot_32.S +++ b/arch/x86/realmode/rm/reboot.S | |||
@@ -2,6 +2,8 @@ | |||
2 | #include <linux/init.h> | 2 | #include <linux/init.h> |
3 | #include <asm/segment.h> | 3 | #include <asm/segment.h> |
4 | #include <asm/page_types.h> | 4 | #include <asm/page_types.h> |
5 | #include <asm/processor-flags.h> | ||
6 | #include <asm/msr-index.h> | ||
5 | #include "realmode.h" | 7 | #include "realmode.h" |
6 | 8 | ||
7 | /* | 9 | /* |
@@ -12,13 +14,35 @@ | |||
12 | * doesn't work with at least one type of 486 motherboard. It is easy | 14 | * doesn't work with at least one type of 486 motherboard. It is easy |
13 | * to stop this code working; hence the copious comments. | 15 | * to stop this code working; hence the copious comments. |
14 | * | 16 | * |
15 | * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax. | 17 | * This code is called with the restart type (0 = BIOS, 1 = APM) in |
18 | * the primary argument register (%eax for 32 bit, %edi for 64 bit). | ||
16 | */ | 19 | */ |
17 | .section ".text32", "ax" | 20 | .section ".text32", "ax" |
18 | .code32 | 21 | .code32 |
19 | |||
20 | .balign 16 | ||
21 | ENTRY(machine_real_restart_asm) | 22 | ENTRY(machine_real_restart_asm) |
23 | |||
24 | #ifdef CONFIG_X86_64 | ||
25 | /* Switch to trampoline GDT as it is guaranteed < 4 GiB */ | ||
26 | movl $__KERNEL_DS, %eax | ||
27 | movl %eax, %ds | ||
28 | lgdtl pa_tr_gdt | ||
29 | |||
30 | /* Disable paging to drop us out of long mode */ | ||
31 | movl %cr0, %eax | ||
32 | andl $~X86_CR0_PG, %eax | ||
33 | movl %eax, %cr0 | ||
34 | ljmpl $__KERNEL32_CS, $pa_machine_real_restart_paging_off | ||
35 | |||
36 | GLOBAL(machine_real_restart_paging_off) | ||
37 | xorl %eax, %eax | ||
38 | xorl %edx, %edx | ||
39 | movl $MSR_EFER, %ecx | ||
40 | wrmsr | ||
41 | |||
42 | movl %edi, %eax | ||
43 | |||
44 | #endif /* CONFIG_X86_64 */ | ||
45 | |||
22 | /* Set up the IDT for real mode. */ | 46 | /* Set up the IDT for real mode. */ |
23 | lidtl pa_machine_real_restart_idt | 47 | lidtl pa_machine_real_restart_idt |
24 | 48 | ||
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 66e6d9359826..0faad646f5fd 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -205,9 +205,9 @@ void syscall32_cpu_init(void) | |||
205 | { | 205 | { |
206 | /* Load these always in case some future AMD CPU supports | 206 | /* Load these always in case some future AMD CPU supports |
207 | SYSENTER from compat mode too. */ | 207 | SYSENTER from compat mode too. */ |
208 | checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); | 208 | wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); |
209 | checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL); | 209 | wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); |
210 | checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); | 210 | wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); |
211 | 211 | ||
212 | wrmsrl(MSR_CSTAR, ia32_cstar_target); | 212 | wrmsrl(MSR_CSTAR, ia32_cstar_target); |
213 | } | 213 | } |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ff962d4b821e..bf4bda6d3e9a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/pci.h> | 31 | #include <linux/pci.h> |
32 | #include <linux/gfp.h> | 32 | #include <linux/gfp.h> |
33 | #include <linux/memblock.h> | 33 | #include <linux/memblock.h> |
34 | #include <linux/syscore_ops.h> | ||
34 | 35 | ||
35 | #include <xen/xen.h> | 36 | #include <xen/xen.h> |
36 | #include <xen/interface/xen.h> | 37 | #include <xen/interface/xen.h> |
@@ -38,6 +39,7 @@ | |||
38 | #include <xen/interface/physdev.h> | 39 | #include <xen/interface/physdev.h> |
39 | #include <xen/interface/vcpu.h> | 40 | #include <xen/interface/vcpu.h> |
40 | #include <xen/interface/memory.h> | 41 | #include <xen/interface/memory.h> |
42 | #include <xen/interface/xen-mca.h> | ||
41 | #include <xen/features.h> | 43 | #include <xen/features.h> |
42 | #include <xen/page.h> | 44 | #include <xen/page.h> |
43 | #include <xen/hvm.h> | 45 | #include <xen/hvm.h> |
@@ -107,7 +109,7 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback); | |||
107 | * Point at some empty memory to start with. We map the real shared_info | 109 | * Point at some empty memory to start with. We map the real shared_info |
108 | * page as soon as fixmap is up and running. | 110 | * page as soon as fixmap is up and running. |
109 | */ | 111 | */ |
110 | struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; | 112 | struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info; |
111 | 113 | ||
112 | /* | 114 | /* |
113 | * Flag to determine whether vcpu info placement is available on all | 115 | * Flag to determine whether vcpu info placement is available on all |
@@ -124,6 +126,19 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; | |||
124 | */ | 126 | */ |
125 | static int have_vcpu_info_placement = 1; | 127 | static int have_vcpu_info_placement = 1; |
126 | 128 | ||
129 | struct tls_descs { | ||
130 | struct desc_struct desc[3]; | ||
131 | }; | ||
132 | |||
133 | /* | ||
134 | * Updating the 3 TLS descriptors in the GDT on every task switch is | ||
135 | * surprisingly expensive so we avoid updating them if they haven't | ||
136 | * changed. Since Xen writes different descriptors than the one | ||
137 | * passed in the update_descriptor hypercall we keep shadow copies to | ||
138 | * compare against. | ||
139 | */ | ||
140 | static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); | ||
141 | |||
127 | static void clamp_max_cpus(void) | 142 | static void clamp_max_cpus(void) |
128 | { | 143 | { |
129 | #ifdef CONFIG_SMP | 144 | #ifdef CONFIG_SMP |
@@ -341,9 +356,7 @@ static void __init xen_init_cpuid_mask(void) | |||
341 | unsigned int xsave_mask; | 356 | unsigned int xsave_mask; |
342 | 357 | ||
343 | cpuid_leaf1_edx_mask = | 358 | cpuid_leaf1_edx_mask = |
344 | ~((1 << X86_FEATURE_MCE) | /* disable MCE */ | 359 | ~((1 << X86_FEATURE_MTRR) | /* disable MTRR */ |
345 | (1 << X86_FEATURE_MCA) | /* disable MCA */ | ||
346 | (1 << X86_FEATURE_MTRR) | /* disable MTRR */ | ||
347 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ | 360 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ |
348 | 361 | ||
349 | if (!xen_initial_domain()) | 362 | if (!xen_initial_domain()) |
@@ -540,12 +553,28 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) | |||
540 | BUG(); | 553 | BUG(); |
541 | } | 554 | } |
542 | 555 | ||
556 | static inline bool desc_equal(const struct desc_struct *d1, | ||
557 | const struct desc_struct *d2) | ||
558 | { | ||
559 | return d1->a == d2->a && d1->b == d2->b; | ||
560 | } | ||
561 | |||
543 | static void load_TLS_descriptor(struct thread_struct *t, | 562 | static void load_TLS_descriptor(struct thread_struct *t, |
544 | unsigned int cpu, unsigned int i) | 563 | unsigned int cpu, unsigned int i) |
545 | { | 564 | { |
546 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | 565 | struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i]; |
547 | xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); | 566 | struct desc_struct *gdt; |
548 | struct multicall_space mc = __xen_mc_entry(0); | 567 | xmaddr_t maddr; |
568 | struct multicall_space mc; | ||
569 | |||
570 | if (desc_equal(shadow, &t->tls_array[i])) | ||
571 | return; | ||
572 | |||
573 | *shadow = t->tls_array[i]; | ||
574 | |||
575 | gdt = get_cpu_gdt_table(cpu); | ||
576 | maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); | ||
577 | mc = __xen_mc_entry(0); | ||
549 | 578 | ||
550 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); | 579 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); |
551 | } | 580 | } |
@@ -627,8 +656,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
627 | /* | 656 | /* |
628 | * Look for known traps using IST, and substitute them | 657 | * Look for known traps using IST, and substitute them |
629 | * appropriately. The debugger ones are the only ones we care | 658 | * appropriately. The debugger ones are the only ones we care |
630 | * about. Xen will handle faults like double_fault and | 659 | * about. Xen will handle faults like double_fault, |
631 | * machine_check, so we should never see them. Warn if | 660 | * so we should never see them. Warn if |
632 | * there's an unexpected IST-using fault handler. | 661 | * there's an unexpected IST-using fault handler. |
633 | */ | 662 | */ |
634 | if (addr == (unsigned long)debug) | 663 | if (addr == (unsigned long)debug) |
@@ -643,7 +672,11 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
643 | return 0; | 672 | return 0; |
644 | #ifdef CONFIG_X86_MCE | 673 | #ifdef CONFIG_X86_MCE |
645 | } else if (addr == (unsigned long)machine_check) { | 674 | } else if (addr == (unsigned long)machine_check) { |
646 | return 0; | 675 | /* |
676 | * when xen hypervisor inject vMCE to guest, | ||
677 | * use native mce handler to handle it | ||
678 | */ | ||
679 | ; | ||
647 | #endif | 680 | #endif |
648 | } else { | 681 | } else { |
649 | /* Some other trap using IST? */ | 682 | /* Some other trap using IST? */ |
@@ -1124,9 +1157,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { | |||
1124 | .wbinvd = native_wbinvd, | 1157 | .wbinvd = native_wbinvd, |
1125 | 1158 | ||
1126 | .read_msr = native_read_msr_safe, | 1159 | .read_msr = native_read_msr_safe, |
1127 | .rdmsr_regs = native_rdmsr_safe_regs, | ||
1128 | .write_msr = xen_write_msr_safe, | 1160 | .write_msr = xen_write_msr_safe, |
1129 | .wrmsr_regs = native_wrmsr_safe_regs, | ||
1130 | 1161 | ||
1131 | .read_tsc = native_read_tsc, | 1162 | .read_tsc = native_read_tsc, |
1132 | .read_pmc = native_read_pmc, | 1163 | .read_pmc = native_read_pmc, |
@@ -1439,64 +1470,155 @@ asmlinkage void __init xen_start_kernel(void) | |||
1439 | #endif | 1470 | #endif |
1440 | } | 1471 | } |
1441 | 1472 | ||
1442 | static int init_hvm_pv_info(int *major, int *minor) | 1473 | #ifdef CONFIG_XEN_PVHVM |
1443 | { | 1474 | /* |
1444 | uint32_t eax, ebx, ecx, edx, pages, msr, base; | 1475 | * The pfn containing the shared_info is located somewhere in RAM. This |
1445 | u64 pfn; | 1476 | * will cause trouble if the current kernel is doing a kexec boot into a |
1446 | 1477 | * new kernel. The new kernel (and its startup code) can not know where | |
1447 | base = xen_cpuid_base(); | 1478 | * the pfn is, so it can not reserve the page. The hypervisor will |
1448 | cpuid(base + 1, &eax, &ebx, &ecx, &edx); | 1479 | * continue to update the pfn, and as a result memory corruption occours |
1449 | 1480 | * in the new kernel. | |
1450 | *major = eax >> 16; | 1481 | * |
1451 | *minor = eax & 0xffff; | 1482 | * One way to work around this issue is to allocate a page in the |
1452 | printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor); | 1483 | * xen-platform pci device's BAR memory range. But pci init is done very |
1453 | 1484 | * late and the shared_info page is already in use very early to read | |
1454 | cpuid(base + 2, &pages, &msr, &ecx, &edx); | 1485 | * the pvclock. So moving the pfn from RAM to MMIO is racy because some |
1455 | 1486 | * code paths on other vcpus could access the pfn during the small | |
1456 | pfn = __pa(hypercall_page); | 1487 | * window when the old pfn is moved to the new pfn. There is even a |
1457 | wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); | 1488 | * small window were the old pfn is not backed by a mfn, and during that |
1458 | 1489 | * time all reads return -1. | |
1459 | xen_setup_features(); | 1490 | * |
1460 | 1491 | * Because it is not known upfront where the MMIO region is located it | |
1461 | pv_info.name = "Xen HVM"; | 1492 | * can not be used right from the start in xen_hvm_init_shared_info. |
1462 | 1493 | * | |
1463 | xen_domain_type = XEN_HVM_DOMAIN; | 1494 | * To minimise trouble the move of the pfn is done shortly before kexec. |
1495 | * This does not eliminate the race because all vcpus are still online | ||
1496 | * when the syscore_ops will be called. But hopefully there is no work | ||
1497 | * pending at this point in time. Also the syscore_op is run last which | ||
1498 | * reduces the risk further. | ||
1499 | */ | ||
1464 | 1500 | ||
1465 | return 0; | 1501 | static struct shared_info *xen_hvm_shared_info; |
1466 | } | ||
1467 | 1502 | ||
1468 | void __ref xen_hvm_init_shared_info(void) | 1503 | static void xen_hvm_connect_shared_info(unsigned long pfn) |
1469 | { | 1504 | { |
1470 | int cpu; | ||
1471 | struct xen_add_to_physmap xatp; | 1505 | struct xen_add_to_physmap xatp; |
1472 | static struct shared_info *shared_info_page = 0; | ||
1473 | 1506 | ||
1474 | if (!shared_info_page) | ||
1475 | shared_info_page = (struct shared_info *) | ||
1476 | extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
1477 | xatp.domid = DOMID_SELF; | 1507 | xatp.domid = DOMID_SELF; |
1478 | xatp.idx = 0; | 1508 | xatp.idx = 0; |
1479 | xatp.space = XENMAPSPACE_shared_info; | 1509 | xatp.space = XENMAPSPACE_shared_info; |
1480 | xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; | 1510 | xatp.gpfn = pfn; |
1481 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) | 1511 | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) |
1482 | BUG(); | 1512 | BUG(); |
1483 | 1513 | ||
1484 | HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; | 1514 | } |
1515 | static void xen_hvm_set_shared_info(struct shared_info *sip) | ||
1516 | { | ||
1517 | int cpu; | ||
1518 | |||
1519 | HYPERVISOR_shared_info = sip; | ||
1485 | 1520 | ||
1486 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info | 1521 | /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info |
1487 | * page, we use it in the event channel upcall and in some pvclock | 1522 | * page, we use it in the event channel upcall and in some pvclock |
1488 | * related functions. We don't need the vcpu_info placement | 1523 | * related functions. We don't need the vcpu_info placement |
1489 | * optimizations because we don't use any pv_mmu or pv_irq op on | 1524 | * optimizations because we don't use any pv_mmu or pv_irq op on |
1490 | * HVM. | 1525 | * HVM. |
1491 | * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is | 1526 | * When xen_hvm_set_shared_info is run at boot time only vcpu 0 is |
1492 | * online but xen_hvm_init_shared_info is run at resume time too and | 1527 | * online but xen_hvm_set_shared_info is run at resume time too and |
1493 | * in that case multiple vcpus might be online. */ | 1528 | * in that case multiple vcpus might be online. */ |
1494 | for_each_online_cpu(cpu) { | 1529 | for_each_online_cpu(cpu) { |
1495 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 1530 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
1496 | } | 1531 | } |
1497 | } | 1532 | } |
1498 | 1533 | ||
1499 | #ifdef CONFIG_XEN_PVHVM | 1534 | /* Reconnect the shared_info pfn to a mfn */ |
1535 | void xen_hvm_resume_shared_info(void) | ||
1536 | { | ||
1537 | xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); | ||
1538 | } | ||
1539 | |||
1540 | #ifdef CONFIG_KEXEC | ||
1541 | static struct shared_info *xen_hvm_shared_info_kexec; | ||
1542 | static unsigned long xen_hvm_shared_info_pfn_kexec; | ||
1543 | |||
1544 | /* Remember a pfn in MMIO space for kexec reboot */ | ||
1545 | void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn) | ||
1546 | { | ||
1547 | xen_hvm_shared_info_kexec = sip; | ||
1548 | xen_hvm_shared_info_pfn_kexec = pfn; | ||
1549 | } | ||
1550 | |||
1551 | static void xen_hvm_syscore_shutdown(void) | ||
1552 | { | ||
1553 | struct xen_memory_reservation reservation = { | ||
1554 | .domid = DOMID_SELF, | ||
1555 | .nr_extents = 1, | ||
1556 | }; | ||
1557 | unsigned long prev_pfn; | ||
1558 | int rc; | ||
1559 | |||
1560 | if (!xen_hvm_shared_info_kexec) | ||
1561 | return; | ||
1562 | |||
1563 | prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT; | ||
1564 | set_xen_guest_handle(reservation.extent_start, &prev_pfn); | ||
1565 | |||
1566 | /* Move pfn to MMIO, disconnects previous pfn from mfn */ | ||
1567 | xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec); | ||
1568 | |||
1569 | /* Update pointers, following hypercall is also a memory barrier */ | ||
1570 | xen_hvm_set_shared_info(xen_hvm_shared_info_kexec); | ||
1571 | |||
1572 | /* Allocate new mfn for previous pfn */ | ||
1573 | do { | ||
1574 | rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); | ||
1575 | if (rc == 0) | ||
1576 | msleep(123); | ||
1577 | } while (rc == 0); | ||
1578 | |||
1579 | /* Make sure the previous pfn is really connected to a (new) mfn */ | ||
1580 | BUG_ON(rc != 1); | ||
1581 | } | ||
1582 | |||
1583 | static struct syscore_ops xen_hvm_syscore_ops = { | ||
1584 | .shutdown = xen_hvm_syscore_shutdown, | ||
1585 | }; | ||
1586 | #endif | ||
1587 | |||
1588 | /* Use a pfn in RAM, may move to MMIO before kexec. */ | ||
1589 | static void __init xen_hvm_init_shared_info(void) | ||
1590 | { | ||
1591 | /* Remember pointer for resume */ | ||
1592 | xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
1593 | xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); | ||
1594 | xen_hvm_set_shared_info(xen_hvm_shared_info); | ||
1595 | } | ||
1596 | |||
1597 | static void __init init_hvm_pv_info(void) | ||
1598 | { | ||
1599 | int major, minor; | ||
1600 | uint32_t eax, ebx, ecx, edx, pages, msr, base; | ||
1601 | u64 pfn; | ||
1602 | |||
1603 | base = xen_cpuid_base(); | ||
1604 | cpuid(base + 1, &eax, &ebx, &ecx, &edx); | ||
1605 | |||
1606 | major = eax >> 16; | ||
1607 | minor = eax & 0xffff; | ||
1608 | printk(KERN_INFO "Xen version %d.%d.\n", major, minor); | ||
1609 | |||
1610 | cpuid(base + 2, &pages, &msr, &ecx, &edx); | ||
1611 | |||
1612 | pfn = __pa(hypercall_page); | ||
1613 | wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); | ||
1614 | |||
1615 | xen_setup_features(); | ||
1616 | |||
1617 | pv_info.name = "Xen HVM"; | ||
1618 | |||
1619 | xen_domain_type = XEN_HVM_DOMAIN; | ||
1620 | } | ||
1621 | |||
1500 | static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, | 1622 | static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, |
1501 | unsigned long action, void *hcpu) | 1623 | unsigned long action, void *hcpu) |
1502 | { | 1624 | { |
@@ -1519,14 +1641,12 @@ static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = { | |||
1519 | 1641 | ||
1520 | static void __init xen_hvm_guest_init(void) | 1642 | static void __init xen_hvm_guest_init(void) |
1521 | { | 1643 | { |
1522 | int r; | 1644 | init_hvm_pv_info(); |
1523 | int major, minor; | ||
1524 | |||
1525 | r = init_hvm_pv_info(&major, &minor); | ||
1526 | if (r < 0) | ||
1527 | return; | ||
1528 | 1645 | ||
1529 | xen_hvm_init_shared_info(); | 1646 | xen_hvm_init_shared_info(); |
1647 | #ifdef CONFIG_KEXEC | ||
1648 | register_syscore_ops(&xen_hvm_syscore_ops); | ||
1649 | #endif | ||
1530 | 1650 | ||
1531 | if (xen_feature(XENFEAT_hvm_callback_vector)) | 1651 | if (xen_feature(XENFEAT_hvm_callback_vector)) |
1532 | xen_have_vector_callback = 1; | 1652 | xen_have_vector_callback = 1; |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3a73785631ce..b65a76133f4f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -308,8 +308,20 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) | |||
308 | 308 | ||
309 | static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) | 309 | static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) |
310 | { | 310 | { |
311 | if (!xen_batched_set_pte(ptep, pteval)) | 311 | if (!xen_batched_set_pte(ptep, pteval)) { |
312 | native_set_pte(ptep, pteval); | 312 | /* |
313 | * Could call native_set_pte() here and trap and | ||
314 | * emulate the PTE write but with 32-bit guests this | ||
315 | * needs two traps (one for each of the two 32-bit | ||
316 | * words in the PTE) so do one hypercall directly | ||
317 | * instead. | ||
318 | */ | ||
319 | struct mmu_update u; | ||
320 | |||
321 | u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE; | ||
322 | u.val = pte_val_ma(pteval); | ||
323 | HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF); | ||
324 | } | ||
313 | } | 325 | } |
314 | 326 | ||
315 | static void xen_set_pte(pte_t *ptep, pte_t pteval) | 327 | static void xen_set_pte(pte_t *ptep, pte_t pteval) |
@@ -1244,7 +1256,8 @@ static void xen_flush_tlb_single(unsigned long addr) | |||
1244 | } | 1256 | } |
1245 | 1257 | ||
1246 | static void xen_flush_tlb_others(const struct cpumask *cpus, | 1258 | static void xen_flush_tlb_others(const struct cpumask *cpus, |
1247 | struct mm_struct *mm, unsigned long va) | 1259 | struct mm_struct *mm, unsigned long start, |
1260 | unsigned long end) | ||
1248 | { | 1261 | { |
1249 | struct { | 1262 | struct { |
1250 | struct mmuext_op op; | 1263 | struct mmuext_op op; |
@@ -1256,7 +1269,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, | |||
1256 | } *args; | 1269 | } *args; |
1257 | struct multicall_space mcs; | 1270 | struct multicall_space mcs; |
1258 | 1271 | ||
1259 | trace_xen_mmu_flush_tlb_others(cpus, mm, va); | 1272 | trace_xen_mmu_flush_tlb_others(cpus, mm, start, end); |
1260 | 1273 | ||
1261 | if (cpumask_empty(cpus)) | 1274 | if (cpumask_empty(cpus)) |
1262 | return; /* nothing to do */ | 1275 | return; /* nothing to do */ |
@@ -1269,11 +1282,10 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, | |||
1269 | cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); | 1282 | cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); |
1270 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); | 1283 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); |
1271 | 1284 | ||
1272 | if (va == TLB_FLUSH_ALL) { | 1285 | args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; |
1273 | args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; | 1286 | if (start != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) { |
1274 | } else { | ||
1275 | args->op.cmd = MMUEXT_INVLPG_MULTI; | 1287 | args->op.cmd = MMUEXT_INVLPG_MULTI; |
1276 | args->op.arg1.linear_addr = va; | 1288 | args->op.arg1.linear_addr = start; |
1277 | } | 1289 | } |
1278 | 1290 | ||
1279 | MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); | 1291 | MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); |
@@ -1416,13 +1428,28 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1416 | } | 1428 | } |
1417 | #endif /* CONFIG_X86_64 */ | 1429 | #endif /* CONFIG_X86_64 */ |
1418 | 1430 | ||
1419 | /* Init-time set_pte while constructing initial pagetables, which | 1431 | /* |
1420 | doesn't allow RO pagetable pages to be remapped RW */ | 1432 | * Init-time set_pte while constructing initial pagetables, which |
1433 | * doesn't allow RO page table pages to be remapped RW. | ||
1434 | * | ||
1435 | * If there is no MFN for this PFN then this page is initially | ||
1436 | * ballooned out so clear the PTE (as in decrease_reservation() in | ||
1437 | * drivers/xen/balloon.c). | ||
1438 | * | ||
1439 | * Many of these PTE updates are done on unpinned and writable pages | ||
1440 | * and doing a hypercall for these is unnecessary and expensive. At | ||
1441 | * this point it is not possible to tell if a page is pinned or not, | ||
1442 | * so always write the PTE directly and rely on Xen trapping and | ||
1443 | * emulating any updates as necessary. | ||
1444 | */ | ||
1421 | static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) | 1445 | static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) |
1422 | { | 1446 | { |
1423 | pte = mask_rw_pte(ptep, pte); | 1447 | if (pte_mfn(pte) != INVALID_P2M_ENTRY) |
1448 | pte = mask_rw_pte(ptep, pte); | ||
1449 | else | ||
1450 | pte = __pte_ma(0); | ||
1424 | 1451 | ||
1425 | xen_set_pte(ptep, pte); | 1452 | native_set_pte(ptep, pte); |
1426 | } | 1453 | } |
1427 | 1454 | ||
1428 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | 1455 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index a4790bf22c59..ead85576d54a 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -157,25 +157,24 @@ static unsigned long __init xen_populate_chunk( | |||
157 | unsigned long dest_pfn; | 157 | unsigned long dest_pfn; |
158 | 158 | ||
159 | for (i = 0, entry = list; i < map_size; i++, entry++) { | 159 | for (i = 0, entry = list; i < map_size; i++, entry++) { |
160 | unsigned long credits = credits_left; | ||
161 | unsigned long s_pfn; | 160 | unsigned long s_pfn; |
162 | unsigned long e_pfn; | 161 | unsigned long e_pfn; |
163 | unsigned long pfns; | 162 | unsigned long pfns; |
164 | long capacity; | 163 | long capacity; |
165 | 164 | ||
166 | if (credits <= 0) | 165 | if (credits_left <= 0) |
167 | break; | 166 | break; |
168 | 167 | ||
169 | if (entry->type != E820_RAM) | 168 | if (entry->type != E820_RAM) |
170 | continue; | 169 | continue; |
171 | 170 | ||
172 | e_pfn = PFN_UP(entry->addr + entry->size); | 171 | e_pfn = PFN_DOWN(entry->addr + entry->size); |
173 | 172 | ||
174 | /* We only care about E820 after the xen_start_info->nr_pages */ | 173 | /* We only care about E820 after the xen_start_info->nr_pages */ |
175 | if (e_pfn <= max_pfn) | 174 | if (e_pfn <= max_pfn) |
176 | continue; | 175 | continue; |
177 | 176 | ||
178 | s_pfn = PFN_DOWN(entry->addr); | 177 | s_pfn = PFN_UP(entry->addr); |
179 | /* If the E820 falls within the nr_pages, we want to start | 178 | /* If the E820 falls within the nr_pages, we want to start |
180 | * at the nr_pages PFN. | 179 | * at the nr_pages PFN. |
181 | * If that would mean going past the E820 entry, skip it | 180 | * If that would mean going past the E820 entry, skip it |
@@ -184,23 +183,19 @@ static unsigned long __init xen_populate_chunk( | |||
184 | capacity = e_pfn - max_pfn; | 183 | capacity = e_pfn - max_pfn; |
185 | dest_pfn = max_pfn; | 184 | dest_pfn = max_pfn; |
186 | } else { | 185 | } else { |
187 | /* last_pfn MUST be within E820_RAM regions */ | ||
188 | if (*last_pfn && e_pfn >= *last_pfn) | ||
189 | s_pfn = *last_pfn; | ||
190 | capacity = e_pfn - s_pfn; | 186 | capacity = e_pfn - s_pfn; |
191 | dest_pfn = s_pfn; | 187 | dest_pfn = s_pfn; |
192 | } | 188 | } |
193 | /* If we had filled this E820_RAM entry, go to the next one. */ | ||
194 | if (capacity <= 0) | ||
195 | continue; | ||
196 | 189 | ||
197 | if (credits > capacity) | 190 | if (credits_left < capacity) |
198 | credits = capacity; | 191 | capacity = credits_left; |
199 | 192 | ||
200 | pfns = xen_do_chunk(dest_pfn, dest_pfn + credits, false); | 193 | pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false); |
201 | done += pfns; | 194 | done += pfns; |
202 | credits_left -= pfns; | ||
203 | *last_pfn = (dest_pfn + pfns); | 195 | *last_pfn = (dest_pfn + pfns); |
196 | if (pfns < capacity) | ||
197 | break; | ||
198 | credits_left -= pfns; | ||
204 | } | 199 | } |
205 | return done; | 200 | return done; |
206 | } | 201 | } |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index afb250d22a6b..f58dca7a6e52 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -80,9 +80,7 @@ static void __cpuinit cpu_bringup(void) | |||
80 | 80 | ||
81 | notify_cpu_starting(cpu); | 81 | notify_cpu_starting(cpu); |
82 | 82 | ||
83 | ipi_call_lock(); | ||
84 | set_cpu_online(cpu, true); | 83 | set_cpu_online(cpu, true); |
85 | ipi_call_unlock(); | ||
86 | 84 | ||
87 | this_cpu_write(cpu_state, CPU_ONLINE); | 85 | this_cpu_write(cpu_state, CPU_ONLINE); |
88 | 86 | ||
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 45329c8c226e..ae8a00c39de4 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled) | |||
30 | { | 30 | { |
31 | #ifdef CONFIG_XEN_PVHVM | 31 | #ifdef CONFIG_XEN_PVHVM |
32 | int cpu; | 32 | int cpu; |
33 | xen_hvm_init_shared_info(); | 33 | xen_hvm_resume_shared_info(); |
34 | xen_callback_vector(); | 34 | xen_callback_vector(); |
35 | xen_unplug_emulated_devices(); | 35 | xen_unplug_emulated_devices(); |
36 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { | 36 | if (xen_feature(XENFEAT_hvm_safe_pvclock)) { |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 202d4c150154..1e4329e04e0f 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -41,7 +41,7 @@ void xen_enable_syscall(void); | |||
41 | void xen_vcpu_restore(void); | 41 | void xen_vcpu_restore(void); |
42 | 42 | ||
43 | void xen_callback_vector(void); | 43 | void xen_callback_vector(void); |
44 | void xen_hvm_init_shared_info(void); | 44 | void xen_hvm_resume_shared_info(void); |
45 | void xen_unplug_emulated_devices(void); | 45 | void xen_unplug_emulated_devices(void); |
46 | 46 | ||
47 | void __init xen_build_dynamic_phys_to_machine(void); | 47 | void __init xen_build_dynamic_phys_to_machine(void); |