diff options
Diffstat (limited to 'arch/x86')
50 files changed, 1288 insertions, 396 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 685692c94f05..c49b4dc8ffe0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -1725,7 +1725,7 @@ config PHYSICAL_ALIGN | |||
1725 | 1725 | ||
1726 | config HOTPLUG_CPU | 1726 | config HOTPLUG_CPU |
1727 | bool "Support for hot-pluggable CPUs" | 1727 | bool "Support for hot-pluggable CPUs" |
1728 | depends on SMP && HOTPLUG | 1728 | depends on SMP |
1729 | ---help--- | 1729 | ---help--- |
1730 | Say Y here to allow turning CPUs off and on. CPUs can be | 1730 | Say Y here to allow turning CPUs off and on. CPUs can be |
1731 | controlled through /sys/devices/system/cpu. | 1731 | controlled through /sys/devices/system/cpu. |
@@ -2265,6 +2265,7 @@ source "fs/Kconfig.binfmt" | |||
2265 | config IA32_EMULATION | 2265 | config IA32_EMULATION |
2266 | bool "IA32 Emulation" | 2266 | bool "IA32 Emulation" |
2267 | depends on X86_64 | 2267 | depends on X86_64 |
2268 | select BINFMT_ELF | ||
2268 | select COMPAT_BINFMT_ELF | 2269 | select COMPAT_BINFMT_ELF |
2269 | select HAVE_UID16 | 2270 | select HAVE_UID16 |
2270 | ---help--- | 2271 | ---help--- |
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 35ee62fccf98..c205035a6b96 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c | |||
@@ -251,51 +251,6 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size) | |||
251 | *size = len; | 251 | *size = len; |
252 | } | 252 | } |
253 | 253 | ||
254 | static efi_status_t setup_efi_vars(struct boot_params *params) | ||
255 | { | ||
256 | struct setup_data *data; | ||
257 | struct efi_var_bootdata *efidata; | ||
258 | u64 store_size, remaining_size, var_size; | ||
259 | efi_status_t status; | ||
260 | |||
261 | if (sys_table->runtime->hdr.revision < EFI_2_00_SYSTEM_TABLE_REVISION) | ||
262 | return EFI_UNSUPPORTED; | ||
263 | |||
264 | data = (struct setup_data *)(unsigned long)params->hdr.setup_data; | ||
265 | |||
266 | while (data && data->next) | ||
267 | data = (struct setup_data *)(unsigned long)data->next; | ||
268 | |||
269 | status = efi_call_phys4((void *)sys_table->runtime->query_variable_info, | ||
270 | EFI_VARIABLE_NON_VOLATILE | | ||
271 | EFI_VARIABLE_BOOTSERVICE_ACCESS | | ||
272 | EFI_VARIABLE_RUNTIME_ACCESS, &store_size, | ||
273 | &remaining_size, &var_size); | ||
274 | |||
275 | if (status != EFI_SUCCESS) | ||
276 | return status; | ||
277 | |||
278 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | ||
279 | EFI_LOADER_DATA, sizeof(*efidata), &efidata); | ||
280 | |||
281 | if (status != EFI_SUCCESS) | ||
282 | return status; | ||
283 | |||
284 | efidata->data.type = SETUP_EFI_VARS; | ||
285 | efidata->data.len = sizeof(struct efi_var_bootdata) - | ||
286 | sizeof(struct setup_data); | ||
287 | efidata->data.next = 0; | ||
288 | efidata->store_size = store_size; | ||
289 | efidata->remaining_size = remaining_size; | ||
290 | efidata->max_var_size = var_size; | ||
291 | |||
292 | if (data) | ||
293 | data->next = (unsigned long)efidata; | ||
294 | else | ||
295 | params->hdr.setup_data = (unsigned long)efidata; | ||
296 | |||
297 | } | ||
298 | |||
299 | static efi_status_t setup_efi_pci(struct boot_params *params) | 254 | static efi_status_t setup_efi_pci(struct boot_params *params) |
300 | { | 255 | { |
301 | efi_pci_io_protocol *pci; | 256 | efi_pci_io_protocol *pci; |
@@ -1202,8 +1157,6 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table, | |||
1202 | 1157 | ||
1203 | setup_graphics(boot_params); | 1158 | setup_graphics(boot_params); |
1204 | 1159 | ||
1205 | setup_efi_vars(boot_params); | ||
1206 | |||
1207 | setup_efi_pci(boot_params); | 1160 | setup_efi_pci(boot_params); |
1208 | 1161 | ||
1209 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 1162 | status = efi_call_phys3(sys_table->boottime->allocate_pool, |
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 62fe22cd4cba..477e9d75149b 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -2681,56 +2681,68 @@ ENTRY(aesni_xts_crypt8) | |||
2681 | addq %rcx, KEYP | 2681 | addq %rcx, KEYP |
2682 | 2682 | ||
2683 | movdqa IV, STATE1 | 2683 | movdqa IV, STATE1 |
2684 | pxor 0x00(INP), STATE1 | 2684 | movdqu 0x00(INP), INC |
2685 | pxor INC, STATE1 | ||
2685 | movdqu IV, 0x00(OUTP) | 2686 | movdqu IV, 0x00(OUTP) |
2686 | 2687 | ||
2687 | _aesni_gf128mul_x_ble() | 2688 | _aesni_gf128mul_x_ble() |
2688 | movdqa IV, STATE2 | 2689 | movdqa IV, STATE2 |
2689 | pxor 0x10(INP), STATE2 | 2690 | movdqu 0x10(INP), INC |
2691 | pxor INC, STATE2 | ||
2690 | movdqu IV, 0x10(OUTP) | 2692 | movdqu IV, 0x10(OUTP) |
2691 | 2693 | ||
2692 | _aesni_gf128mul_x_ble() | 2694 | _aesni_gf128mul_x_ble() |
2693 | movdqa IV, STATE3 | 2695 | movdqa IV, STATE3 |
2694 | pxor 0x20(INP), STATE3 | 2696 | movdqu 0x20(INP), INC |
2697 | pxor INC, STATE3 | ||
2695 | movdqu IV, 0x20(OUTP) | 2698 | movdqu IV, 0x20(OUTP) |
2696 | 2699 | ||
2697 | _aesni_gf128mul_x_ble() | 2700 | _aesni_gf128mul_x_ble() |
2698 | movdqa IV, STATE4 | 2701 | movdqa IV, STATE4 |
2699 | pxor 0x30(INP), STATE4 | 2702 | movdqu 0x30(INP), INC |
2703 | pxor INC, STATE4 | ||
2700 | movdqu IV, 0x30(OUTP) | 2704 | movdqu IV, 0x30(OUTP) |
2701 | 2705 | ||
2702 | call *%r11 | 2706 | call *%r11 |
2703 | 2707 | ||
2704 | pxor 0x00(OUTP), STATE1 | 2708 | movdqu 0x00(OUTP), INC |
2709 | pxor INC, STATE1 | ||
2705 | movdqu STATE1, 0x00(OUTP) | 2710 | movdqu STATE1, 0x00(OUTP) |
2706 | 2711 | ||
2707 | _aesni_gf128mul_x_ble() | 2712 | _aesni_gf128mul_x_ble() |
2708 | movdqa IV, STATE1 | 2713 | movdqa IV, STATE1 |
2709 | pxor 0x40(INP), STATE1 | 2714 | movdqu 0x40(INP), INC |
2715 | pxor INC, STATE1 | ||
2710 | movdqu IV, 0x40(OUTP) | 2716 | movdqu IV, 0x40(OUTP) |
2711 | 2717 | ||
2712 | pxor 0x10(OUTP), STATE2 | 2718 | movdqu 0x10(OUTP), INC |
2719 | pxor INC, STATE2 | ||
2713 | movdqu STATE2, 0x10(OUTP) | 2720 | movdqu STATE2, 0x10(OUTP) |
2714 | 2721 | ||
2715 | _aesni_gf128mul_x_ble() | 2722 | _aesni_gf128mul_x_ble() |
2716 | movdqa IV, STATE2 | 2723 | movdqa IV, STATE2 |
2717 | pxor 0x50(INP), STATE2 | 2724 | movdqu 0x50(INP), INC |
2725 | pxor INC, STATE2 | ||
2718 | movdqu IV, 0x50(OUTP) | 2726 | movdqu IV, 0x50(OUTP) |
2719 | 2727 | ||
2720 | pxor 0x20(OUTP), STATE3 | 2728 | movdqu 0x20(OUTP), INC |
2729 | pxor INC, STATE3 | ||
2721 | movdqu STATE3, 0x20(OUTP) | 2730 | movdqu STATE3, 0x20(OUTP) |
2722 | 2731 | ||
2723 | _aesni_gf128mul_x_ble() | 2732 | _aesni_gf128mul_x_ble() |
2724 | movdqa IV, STATE3 | 2733 | movdqa IV, STATE3 |
2725 | pxor 0x60(INP), STATE3 | 2734 | movdqu 0x60(INP), INC |
2735 | pxor INC, STATE3 | ||
2726 | movdqu IV, 0x60(OUTP) | 2736 | movdqu IV, 0x60(OUTP) |
2727 | 2737 | ||
2728 | pxor 0x30(OUTP), STATE4 | 2738 | movdqu 0x30(OUTP), INC |
2739 | pxor INC, STATE4 | ||
2729 | movdqu STATE4, 0x30(OUTP) | 2740 | movdqu STATE4, 0x30(OUTP) |
2730 | 2741 | ||
2731 | _aesni_gf128mul_x_ble() | 2742 | _aesni_gf128mul_x_ble() |
2732 | movdqa IV, STATE4 | 2743 | movdqa IV, STATE4 |
2733 | pxor 0x70(INP), STATE4 | 2744 | movdqu 0x70(INP), INC |
2745 | pxor INC, STATE4 | ||
2734 | movdqu IV, 0x70(OUTP) | 2746 | movdqu IV, 0x70(OUTP) |
2735 | 2747 | ||
2736 | _aesni_gf128mul_x_ble() | 2748 | _aesni_gf128mul_x_ble() |
@@ -2738,16 +2750,20 @@ ENTRY(aesni_xts_crypt8) | |||
2738 | 2750 | ||
2739 | call *%r11 | 2751 | call *%r11 |
2740 | 2752 | ||
2741 | pxor 0x40(OUTP), STATE1 | 2753 | movdqu 0x40(OUTP), INC |
2754 | pxor INC, STATE1 | ||
2742 | movdqu STATE1, 0x40(OUTP) | 2755 | movdqu STATE1, 0x40(OUTP) |
2743 | 2756 | ||
2744 | pxor 0x50(OUTP), STATE2 | 2757 | movdqu 0x50(OUTP), INC |
2758 | pxor INC, STATE2 | ||
2745 | movdqu STATE2, 0x50(OUTP) | 2759 | movdqu STATE2, 0x50(OUTP) |
2746 | 2760 | ||
2747 | pxor 0x60(OUTP), STATE3 | 2761 | movdqu 0x60(OUTP), INC |
2762 | pxor INC, STATE3 | ||
2748 | movdqu STATE3, 0x60(OUTP) | 2763 | movdqu STATE3, 0x60(OUTP) |
2749 | 2764 | ||
2750 | pxor 0x70(OUTP), STATE4 | 2765 | movdqu 0x70(OUTP), INC |
2766 | pxor INC, STATE4 | ||
2751 | movdqu STATE4, 0x70(OUTP) | 2767 | movdqu STATE4, 0x70(OUTP) |
2752 | 2768 | ||
2753 | ret | 2769 | ret |
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S index 94c27df8a549..f247304299a2 100644 --- a/arch/x86/crypto/crc32-pclmul_asm.S +++ b/arch/x86/crypto/crc32-pclmul_asm.S | |||
@@ -240,7 +240,7 @@ fold_64: | |||
240 | pand %xmm3, %xmm1 | 240 | pand %xmm3, %xmm1 |
241 | PCLMULQDQ 0x00, CONSTANT, %xmm1 | 241 | PCLMULQDQ 0x00, CONSTANT, %xmm1 |
242 | pxor %xmm2, %xmm1 | 242 | pxor %xmm2, %xmm1 |
243 | pextrd $0x01, %xmm1, %eax | 243 | PEXTRD 0x01, %xmm1, %eax |
244 | 244 | ||
245 | ret | 245 | ret |
246 | ENDPROC(crc32_pclmul_le_16) | 246 | ENDPROC(crc32_pclmul_le_16) |
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 805078e08013..52ff81cce008 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c | |||
@@ -192,7 +192,7 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, | |||
192 | /* struct user */ | 192 | /* struct user */ |
193 | DUMP_WRITE(&dump, sizeof(dump)); | 193 | DUMP_WRITE(&dump, sizeof(dump)); |
194 | /* Now dump all of the user data. Include malloced stuff as well */ | 194 | /* Now dump all of the user data. Include malloced stuff as well */ |
195 | DUMP_SEEK(PAGE_SIZE); | 195 | DUMP_SEEK(PAGE_SIZE - sizeof(dump)); |
196 | /* now we start writing out the user space info */ | 196 | /* now we start writing out the user space info */ |
197 | set_fs(USER_DS); | 197 | set_fs(USER_DS); |
198 | /* Dump the data area */ | 198 | /* Dump the data area */ |
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index cf1a471a18a2..bccfca68430e 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c | |||
@@ -34,8 +34,6 @@ | |||
34 | #include <asm/sys_ia32.h> | 34 | #include <asm/sys_ia32.h> |
35 | #include <asm/smap.h> | 35 | #include <asm/smap.h> |
36 | 36 | ||
37 | #define FIX_EFLAGS __FIX_EFLAGS | ||
38 | |||
39 | int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) | 37 | int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) |
40 | { | 38 | { |
41 | int err = 0; | 39 | int err = 0; |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 2fb5d5884e23..60c89f30c727 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
@@ -102,13 +102,6 @@ extern void efi_call_phys_epilog(void); | |||
102 | extern void efi_unmap_memmap(void); | 102 | extern void efi_unmap_memmap(void); |
103 | extern void efi_memory_uc(u64 addr, unsigned long size); | 103 | extern void efi_memory_uc(u64 addr, unsigned long size); |
104 | 104 | ||
105 | struct efi_var_bootdata { | ||
106 | struct setup_data data; | ||
107 | u64 store_size; | ||
108 | u64 remaining_size; | ||
109 | u64 max_var_size; | ||
110 | }; | ||
111 | |||
112 | #ifdef CONFIG_EFI | 105 | #ifdef CONFIG_EFI |
113 | 106 | ||
114 | static inline bool efi_is_native(void) | 107 | static inline bool efi_is_native(void) |
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h index 280bf7fb6aba..3e115273ed88 100644 --- a/arch/x86/include/asm/inst.h +++ b/arch/x86/include/asm/inst.h | |||
@@ -9,12 +9,68 @@ | |||
9 | 9 | ||
10 | #define REG_NUM_INVALID 100 | 10 | #define REG_NUM_INVALID 100 |
11 | 11 | ||
12 | #define REG_TYPE_R64 0 | 12 | #define REG_TYPE_R32 0 |
13 | #define REG_TYPE_XMM 1 | 13 | #define REG_TYPE_R64 1 |
14 | #define REG_TYPE_XMM 2 | ||
14 | #define REG_TYPE_INVALID 100 | 15 | #define REG_TYPE_INVALID 100 |
15 | 16 | ||
17 | .macro R32_NUM opd r32 | ||
18 | \opd = REG_NUM_INVALID | ||
19 | .ifc \r32,%eax | ||
20 | \opd = 0 | ||
21 | .endif | ||
22 | .ifc \r32,%ecx | ||
23 | \opd = 1 | ||
24 | .endif | ||
25 | .ifc \r32,%edx | ||
26 | \opd = 2 | ||
27 | .endif | ||
28 | .ifc \r32,%ebx | ||
29 | \opd = 3 | ||
30 | .endif | ||
31 | .ifc \r32,%esp | ||
32 | \opd = 4 | ||
33 | .endif | ||
34 | .ifc \r32,%ebp | ||
35 | \opd = 5 | ||
36 | .endif | ||
37 | .ifc \r32,%esi | ||
38 | \opd = 6 | ||
39 | .endif | ||
40 | .ifc \r32,%edi | ||
41 | \opd = 7 | ||
42 | .endif | ||
43 | #ifdef CONFIG_X86_64 | ||
44 | .ifc \r32,%r8d | ||
45 | \opd = 8 | ||
46 | .endif | ||
47 | .ifc \r32,%r9d | ||
48 | \opd = 9 | ||
49 | .endif | ||
50 | .ifc \r32,%r10d | ||
51 | \opd = 10 | ||
52 | .endif | ||
53 | .ifc \r32,%r11d | ||
54 | \opd = 11 | ||
55 | .endif | ||
56 | .ifc \r32,%r12d | ||
57 | \opd = 12 | ||
58 | .endif | ||
59 | .ifc \r32,%r13d | ||
60 | \opd = 13 | ||
61 | .endif | ||
62 | .ifc \r32,%r14d | ||
63 | \opd = 14 | ||
64 | .endif | ||
65 | .ifc \r32,%r15d | ||
66 | \opd = 15 | ||
67 | .endif | ||
68 | #endif | ||
69 | .endm | ||
70 | |||
16 | .macro R64_NUM opd r64 | 71 | .macro R64_NUM opd r64 |
17 | \opd = REG_NUM_INVALID | 72 | \opd = REG_NUM_INVALID |
73 | #ifdef CONFIG_X86_64 | ||
18 | .ifc \r64,%rax | 74 | .ifc \r64,%rax |
19 | \opd = 0 | 75 | \opd = 0 |
20 | .endif | 76 | .endif |
@@ -63,6 +119,7 @@ | |||
63 | .ifc \r64,%r15 | 119 | .ifc \r64,%r15 |
64 | \opd = 15 | 120 | \opd = 15 |
65 | .endif | 121 | .endif |
122 | #endif | ||
66 | .endm | 123 | .endm |
67 | 124 | ||
68 | .macro XMM_NUM opd xmm | 125 | .macro XMM_NUM opd xmm |
@@ -118,10 +175,13 @@ | |||
118 | .endm | 175 | .endm |
119 | 176 | ||
120 | .macro REG_TYPE type reg | 177 | .macro REG_TYPE type reg |
178 | R32_NUM reg_type_r32 \reg | ||
121 | R64_NUM reg_type_r64 \reg | 179 | R64_NUM reg_type_r64 \reg |
122 | XMM_NUM reg_type_xmm \reg | 180 | XMM_NUM reg_type_xmm \reg |
123 | .if reg_type_r64 <> REG_NUM_INVALID | 181 | .if reg_type_r64 <> REG_NUM_INVALID |
124 | \type = REG_TYPE_R64 | 182 | \type = REG_TYPE_R64 |
183 | .elseif reg_type_r32 <> REG_NUM_INVALID | ||
184 | \type = REG_TYPE_R32 | ||
125 | .elseif reg_type_xmm <> REG_NUM_INVALID | 185 | .elseif reg_type_xmm <> REG_NUM_INVALID |
126 | \type = REG_TYPE_XMM | 186 | \type = REG_TYPE_XMM |
127 | .else | 187 | .else |
@@ -162,6 +222,16 @@ | |||
162 | .byte \imm8 | 222 | .byte \imm8 |
163 | .endm | 223 | .endm |
164 | 224 | ||
225 | .macro PEXTRD imm8 xmm gpr | ||
226 | R32_NUM extrd_opd1 \gpr | ||
227 | XMM_NUM extrd_opd2 \xmm | ||
228 | PFX_OPD_SIZE | ||
229 | PFX_REX extrd_opd1 extrd_opd2 | ||
230 | .byte 0x0f, 0x3a, 0x16 | ||
231 | MODRM 0xc0 extrd_opd1 extrd_opd2 | ||
232 | .byte \imm8 | ||
233 | .endm | ||
234 | |||
165 | .macro AESKEYGENASSIST rcon xmm1 xmm2 | 235 | .macro AESKEYGENASSIST rcon xmm1 xmm2 |
166 | XMM_NUM aeskeygen_opd1 \xmm1 | 236 | XMM_NUM aeskeygen_opd1 \xmm1 |
167 | XMM_NUM aeskeygen_opd2 \xmm2 | 237 | XMM_NUM aeskeygen_opd2 \xmm2 |
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index ba870bb6dd8e..57873beb3292 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -41,4 +41,9 @@ extern int vector_used_by_percpu_irq(unsigned int vector); | |||
41 | 41 | ||
42 | extern void init_ISA_irqs(void); | 42 | extern void init_ISA_irqs(void); |
43 | 43 | ||
44 | #ifdef CONFIG_X86_LOCAL_APIC | ||
45 | void arch_trigger_all_cpu_backtrace(void); | ||
46 | #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace | ||
47 | #endif | ||
48 | |||
44 | #endif /* _ASM_X86_IRQ_H */ | 49 | #endif /* _ASM_X86_IRQ_H */ |
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 6825e2efd1b4..6bc3985ee473 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h | |||
@@ -60,11 +60,11 @@ static inline void __exit exit_amd_microcode(void) {} | |||
60 | #ifdef CONFIG_MICROCODE_EARLY | 60 | #ifdef CONFIG_MICROCODE_EARLY |
61 | #define MAX_UCODE_COUNT 128 | 61 | #define MAX_UCODE_COUNT 128 |
62 | extern void __init load_ucode_bsp(void); | 62 | extern void __init load_ucode_bsp(void); |
63 | extern __init void load_ucode_ap(void); | 63 | extern void __cpuinit load_ucode_ap(void); |
64 | extern int __init save_microcode_in_initrd(void); | 64 | extern int __init save_microcode_in_initrd(void); |
65 | #else | 65 | #else |
66 | static inline void __init load_ucode_bsp(void) {} | 66 | static inline void __init load_ucode_bsp(void) {} |
67 | static inline __init void load_ucode_ap(void) {} | 67 | static inline void __cpuinit load_ucode_ap(void) {} |
68 | static inline int __init save_microcode_in_initrd(void) | 68 | static inline int __init save_microcode_in_initrd(void) |
69 | { | 69 | { |
70 | return 0; | 70 | return 0; |
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h index 03f90c8a5a7c..0208c3c2cbc6 100644 --- a/arch/x86/include/asm/mutex_32.h +++ b/arch/x86/include/asm/mutex_32.h | |||
@@ -42,17 +42,14 @@ do { \ | |||
42 | * __mutex_fastpath_lock_retval - try to take the lock by moving the count | 42 | * __mutex_fastpath_lock_retval - try to take the lock by moving the count |
43 | * from 1 to a 0 value | 43 | * from 1 to a 0 value |
44 | * @count: pointer of type atomic_t | 44 | * @count: pointer of type atomic_t |
45 | * @fail_fn: function to call if the original value was not 1 | ||
46 | * | 45 | * |
47 | * Change the count from 1 to a value lower than 1, and call <fail_fn> if it | 46 | * Change the count from 1 to a value lower than 1. This function returns 0 |
48 | * wasn't 1 originally. This function returns 0 if the fastpath succeeds, | 47 | * if the fastpath succeeds, or -1 otherwise. |
49 | * or anything the slow path function returns | ||
50 | */ | 48 | */ |
51 | static inline int __mutex_fastpath_lock_retval(atomic_t *count, | 49 | static inline int __mutex_fastpath_lock_retval(atomic_t *count) |
52 | int (*fail_fn)(atomic_t *)) | ||
53 | { | 50 | { |
54 | if (unlikely(atomic_dec_return(count) < 0)) | 51 | if (unlikely(atomic_dec_return(count) < 0)) |
55 | return fail_fn(count); | 52 | return -1; |
56 | else | 53 | else |
57 | return 0; | 54 | return 0; |
58 | } | 55 | } |
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h index 68a87b0f8e29..2c543fff241b 100644 --- a/arch/x86/include/asm/mutex_64.h +++ b/arch/x86/include/asm/mutex_64.h | |||
@@ -37,17 +37,14 @@ do { \ | |||
37 | * __mutex_fastpath_lock_retval - try to take the lock by moving the count | 37 | * __mutex_fastpath_lock_retval - try to take the lock by moving the count |
38 | * from 1 to a 0 value | 38 | * from 1 to a 0 value |
39 | * @count: pointer of type atomic_t | 39 | * @count: pointer of type atomic_t |
40 | * @fail_fn: function to call if the original value was not 1 | ||
41 | * | 40 | * |
42 | * Change the count from 1 to a value lower than 1, and call <fail_fn> if | 41 | * Change the count from 1 to a value lower than 1. This function returns 0 |
43 | * it wasn't 1 originally. This function returns 0 if the fastpath succeeds, | 42 | * if the fastpath succeeds, or -1 otherwise. |
44 | * or anything the slow path function returns | ||
45 | */ | 43 | */ |
46 | static inline int __mutex_fastpath_lock_retval(atomic_t *count, | 44 | static inline int __mutex_fastpath_lock_retval(atomic_t *count) |
47 | int (*fail_fn)(atomic_t *)) | ||
48 | { | 45 | { |
49 | if (unlikely(atomic_dec_return(count) < 0)) | 46 | if (unlikely(atomic_dec_return(count) < 0)) |
50 | return fail_fn(count); | 47 | return -1; |
51 | else | 48 | else |
52 | return 0; | 49 | return 0; |
53 | } | 50 | } |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c0fa356e90de..86f9301903c8 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -18,9 +18,7 @@ extern int proc_nmi_enabled(struct ctl_table *, int , | |||
18 | void __user *, size_t *, loff_t *); | 18 | void __user *, size_t *, loff_t *); |
19 | extern int unknown_nmi_panic; | 19 | extern int unknown_nmi_panic; |
20 | 20 | ||
21 | void arch_trigger_all_cpu_backtrace(void); | 21 | #endif /* CONFIG_X86_LOCAL_APIC */ |
22 | #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace | ||
23 | #endif | ||
24 | 22 | ||
25 | #define NMI_FLAG_FIRST 1 | 23 | #define NMI_FLAG_FIRST 1 |
26 | 24 | ||
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 57cb63402213..8249df45d2f2 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -29,6 +29,9 @@ | |||
29 | #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) | 29 | #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) |
30 | #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL | 30 | #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL |
31 | 31 | ||
32 | #define HSW_IN_TX (1ULL << 32) | ||
33 | #define HSW_IN_TX_CHECKPOINTED (1ULL << 33) | ||
34 | |||
32 | #define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) | 35 | #define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) |
33 | #define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) | 36 | #define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) |
34 | #define AMD64_EVENTSEL_HOSTONLY (1ULL << 41) | 37 | #define AMD64_EVENTSEL_HOSTONLY (1ULL << 41) |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1e672234c4ff..5b0818bc8963 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -506,9 +506,6 @@ static inline unsigned long pages_to_mb(unsigned long npg) | |||
506 | return npg >> (20 - PAGE_SHIFT); | 506 | return npg >> (20 - PAGE_SHIFT); |
507 | } | 507 | } |
508 | 508 | ||
509 | #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ | ||
510 | remap_pfn_range(vma, vaddr, pfn, size, prot) | ||
511 | |||
512 | #if PAGETABLE_LEVELS > 2 | 509 | #if PAGETABLE_LEVELS > 2 |
513 | static inline int pud_none(pud_t pud) | 510 | static inline int pud_none(pud_t pud) |
514 | { | 511 | { |
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index beff97f7df37..7a958164088c 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h | |||
@@ -7,10 +7,10 @@ | |||
7 | 7 | ||
8 | #include <asm/processor-flags.h> | 8 | #include <asm/processor-flags.h> |
9 | 9 | ||
10 | #define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ | 10 | #define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ |
11 | X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ | 11 | X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ |
12 | X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ | 12 | X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ |
13 | X86_EFLAGS_CF) | 13 | X86_EFLAGS_CF | X86_EFLAGS_RF) |
14 | 14 | ||
15 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where); | 15 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where); |
16 | 16 | ||
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 142810c457dc..4f7923dd0007 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -235,7 +235,7 @@ extern long __copy_user_nocache(void *dst, const void __user *src, | |||
235 | static inline int | 235 | static inline int |
236 | __copy_from_user_nocache(void *dst, const void __user *src, unsigned size) | 236 | __copy_from_user_nocache(void *dst, const void __user *src, unsigned size) |
237 | { | 237 | { |
238 | might_sleep(); | 238 | might_fault(); |
239 | return __copy_user_nocache(dst, src, size, 1); | 239 | return __copy_user_nocache(dst, src, size, 1); |
240 | } | 240 | } |
241 | 241 | ||
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 08744242b8d2..c15ddaf90710 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h | |||
@@ -6,7 +6,6 @@ | |||
6 | #define SETUP_E820_EXT 1 | 6 | #define SETUP_E820_EXT 1 |
7 | #define SETUP_DTB 2 | 7 | #define SETUP_DTB 2 |
8 | #define SETUP_PCI 3 | 8 | #define SETUP_PCI 3 |
9 | #define SETUP_EFI_VARS 4 | ||
10 | 9 | ||
11 | /* ram_size flags */ | 10 | /* ram_size flags */ |
12 | #define RAMDISK_IMAGE_START_MASK 0x07FF | 11 | #define RAMDISK_IMAGE_START_MASK 0x07FF |
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 2af848dfa754..bb0465090ae5 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h | |||
@@ -170,6 +170,9 @@ | |||
170 | #define MSR_KNC_EVNTSEL0 0x00000028 | 170 | #define MSR_KNC_EVNTSEL0 0x00000028 |
171 | #define MSR_KNC_EVNTSEL1 0x00000029 | 171 | #define MSR_KNC_EVNTSEL1 0x00000029 |
172 | 172 | ||
173 | /* Alternative perfctr range with full access. */ | ||
174 | #define MSR_IA32_PMC0 0x000004c1 | ||
175 | |||
173 | /* AMD64 MSRs. Not complete. See the architecture manual for a more | 176 | /* AMD64 MSRs. Not complete. See the architecture manual for a more |
174 | complete list. */ | 177 | complete list. */ |
175 | 178 | ||
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 31cb9ae992b7..a698d7165c96 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c | |||
@@ -9,6 +9,7 @@ | |||
9 | * | 9 | * |
10 | */ | 10 | */ |
11 | #include <asm/apic.h> | 11 | #include <asm/apic.h> |
12 | #include <asm/nmi.h> | ||
12 | 13 | ||
13 | #include <linux/cpumask.h> | 14 | #include <linux/cpumask.h> |
14 | #include <linux/kdebug.h> | 15 | #include <linux/kdebug.h> |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index b0684e4a73aa..47b56a7e99cb 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -31,11 +31,15 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o | |||
31 | 31 | ||
32 | ifdef CONFIG_PERF_EVENTS | 32 | ifdef CONFIG_PERF_EVENTS |
33 | obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o | 33 | obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o |
34 | ifdef CONFIG_AMD_IOMMU | ||
35 | obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o | ||
36 | endif | ||
34 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o | 37 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o |
35 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o | 38 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o |
36 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o | 39 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o |
37 | endif | 40 | endif |
38 | 41 | ||
42 | |||
39 | obj-$(CONFIG_X86_MCE) += mcheck/ | 43 | obj-$(CONFIG_X86_MCE) += mcheck/ |
40 | obj-$(CONFIG_MTRR) += mtrr/ | 44 | obj-$(CONFIG_MTRR) += mtrr/ |
41 | 45 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 35ffda5d0727..5f90b85ff22e 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -714,15 +714,15 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
714 | if (mtrr_tom2) | 714 | if (mtrr_tom2) |
715 | x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base; | 715 | x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base; |
716 | 716 | ||
717 | nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size); | ||
718 | /* | 717 | /* |
719 | * [0, 1M) should always be covered by var mtrr with WB | 718 | * [0, 1M) should always be covered by var mtrr with WB |
720 | * and fixed mtrrs should take effect before var mtrr for it: | 719 | * and fixed mtrrs should take effect before var mtrr for it: |
721 | */ | 720 | */ |
722 | nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0, | 721 | nr_range = add_range_with_merge(range, RANGE_NUM, 0, 0, |
723 | 1ULL<<(20 - PAGE_SHIFT)); | 722 | 1ULL<<(20 - PAGE_SHIFT)); |
724 | /* Sort the ranges: */ | 723 | /* add from var mtrr at last */ |
725 | sort_range(range, nr_range); | 724 | nr_range = x86_get_mtrr_mem_range(range, nr_range, |
725 | x_remove_base, x_remove_size); | ||
726 | 726 | ||
727 | range_sums = sum_ranges(range, nr_range); | 727 | range_sums = sum_ranges(range, nr_range); |
728 | printk(KERN_INFO "total RAM covered: %ldM\n", | 728 | printk(KERN_INFO "total RAM covered: %ldM\n", |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1025f3c99d20..9e581c5cf6d0 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -403,7 +403,8 @@ int x86_pmu_hw_config(struct perf_event *event) | |||
403 | * check that PEBS LBR correction does not conflict with | 403 | * check that PEBS LBR correction does not conflict with |
404 | * whatever the user is asking with attr->branch_sample_type | 404 | * whatever the user is asking with attr->branch_sample_type |
405 | */ | 405 | */ |
406 | if (event->attr.precise_ip > 1) { | 406 | if (event->attr.precise_ip > 1 && |
407 | x86_pmu.intel_cap.pebs_format < 2) { | ||
407 | u64 *br_type = &event->attr.branch_sample_type; | 408 | u64 *br_type = &event->attr.branch_sample_type; |
408 | 409 | ||
409 | if (has_branch_stack(event)) { | 410 | if (has_branch_stack(event)) { |
@@ -568,7 +569,7 @@ struct sched_state { | |||
568 | struct perf_sched { | 569 | struct perf_sched { |
569 | int max_weight; | 570 | int max_weight; |
570 | int max_events; | 571 | int max_events; |
571 | struct event_constraint **constraints; | 572 | struct perf_event **events; |
572 | struct sched_state state; | 573 | struct sched_state state; |
573 | int saved_states; | 574 | int saved_states; |
574 | struct sched_state saved[SCHED_STATES_MAX]; | 575 | struct sched_state saved[SCHED_STATES_MAX]; |
@@ -577,7 +578,7 @@ struct perf_sched { | |||
577 | /* | 578 | /* |
578 | * Initialize interator that runs through all events and counters. | 579 | * Initialize interator that runs through all events and counters. |
579 | */ | 580 | */ |
580 | static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, | 581 | static void perf_sched_init(struct perf_sched *sched, struct perf_event **events, |
581 | int num, int wmin, int wmax) | 582 | int num, int wmin, int wmax) |
582 | { | 583 | { |
583 | int idx; | 584 | int idx; |
@@ -585,10 +586,10 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint ** | |||
585 | memset(sched, 0, sizeof(*sched)); | 586 | memset(sched, 0, sizeof(*sched)); |
586 | sched->max_events = num; | 587 | sched->max_events = num; |
587 | sched->max_weight = wmax; | 588 | sched->max_weight = wmax; |
588 | sched->constraints = c; | 589 | sched->events = events; |
589 | 590 | ||
590 | for (idx = 0; idx < num; idx++) { | 591 | for (idx = 0; idx < num; idx++) { |
591 | if (c[idx]->weight == wmin) | 592 | if (events[idx]->hw.constraint->weight == wmin) |
592 | break; | 593 | break; |
593 | } | 594 | } |
594 | 595 | ||
@@ -635,8 +636,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) | |||
635 | if (sched->state.event >= sched->max_events) | 636 | if (sched->state.event >= sched->max_events) |
636 | return false; | 637 | return false; |
637 | 638 | ||
638 | c = sched->constraints[sched->state.event]; | 639 | c = sched->events[sched->state.event]->hw.constraint; |
639 | |||
640 | /* Prefer fixed purpose counters */ | 640 | /* Prefer fixed purpose counters */ |
641 | if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { | 641 | if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { |
642 | idx = INTEL_PMC_IDX_FIXED; | 642 | idx = INTEL_PMC_IDX_FIXED; |
@@ -694,7 +694,7 @@ static bool perf_sched_next_event(struct perf_sched *sched) | |||
694 | if (sched->state.weight > sched->max_weight) | 694 | if (sched->state.weight > sched->max_weight) |
695 | return false; | 695 | return false; |
696 | } | 696 | } |
697 | c = sched->constraints[sched->state.event]; | 697 | c = sched->events[sched->state.event]->hw.constraint; |
698 | } while (c->weight != sched->state.weight); | 698 | } while (c->weight != sched->state.weight); |
699 | 699 | ||
700 | sched->state.counter = 0; /* start with first counter */ | 700 | sched->state.counter = 0; /* start with first counter */ |
@@ -705,12 +705,12 @@ static bool perf_sched_next_event(struct perf_sched *sched) | |||
705 | /* | 705 | /* |
706 | * Assign a counter for each event. | 706 | * Assign a counter for each event. |
707 | */ | 707 | */ |
708 | int perf_assign_events(struct event_constraint **constraints, int n, | 708 | int perf_assign_events(struct perf_event **events, int n, |
709 | int wmin, int wmax, int *assign) | 709 | int wmin, int wmax, int *assign) |
710 | { | 710 | { |
711 | struct perf_sched sched; | 711 | struct perf_sched sched; |
712 | 712 | ||
713 | perf_sched_init(&sched, constraints, n, wmin, wmax); | 713 | perf_sched_init(&sched, events, n, wmin, wmax); |
714 | 714 | ||
715 | do { | 715 | do { |
716 | if (!perf_sched_find_counter(&sched)) | 716 | if (!perf_sched_find_counter(&sched)) |
@@ -724,16 +724,19 @@ int perf_assign_events(struct event_constraint **constraints, int n, | |||
724 | 724 | ||
725 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | 725 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) |
726 | { | 726 | { |
727 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; | 727 | struct event_constraint *c; |
728 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 728 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
729 | struct perf_event *e; | ||
729 | int i, wmin, wmax, num = 0; | 730 | int i, wmin, wmax, num = 0; |
730 | struct hw_perf_event *hwc; | 731 | struct hw_perf_event *hwc; |
731 | 732 | ||
732 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | 733 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); |
733 | 734 | ||
734 | for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { | 735 | for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { |
736 | hwc = &cpuc->event_list[i]->hw; | ||
735 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); | 737 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); |
736 | constraints[i] = c; | 738 | hwc->constraint = c; |
739 | |||
737 | wmin = min(wmin, c->weight); | 740 | wmin = min(wmin, c->weight); |
738 | wmax = max(wmax, c->weight); | 741 | wmax = max(wmax, c->weight); |
739 | } | 742 | } |
@@ -743,7 +746,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
743 | */ | 746 | */ |
744 | for (i = 0; i < n; i++) { | 747 | for (i = 0; i < n; i++) { |
745 | hwc = &cpuc->event_list[i]->hw; | 748 | hwc = &cpuc->event_list[i]->hw; |
746 | c = constraints[i]; | 749 | c = hwc->constraint; |
747 | 750 | ||
748 | /* never assigned */ | 751 | /* never assigned */ |
749 | if (hwc->idx == -1) | 752 | if (hwc->idx == -1) |
@@ -764,16 +767,35 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
764 | 767 | ||
765 | /* slow path */ | 768 | /* slow path */ |
766 | if (i != n) | 769 | if (i != n) |
767 | num = perf_assign_events(constraints, n, wmin, wmax, assign); | 770 | num = perf_assign_events(cpuc->event_list, n, wmin, |
771 | wmax, assign); | ||
768 | 772 | ||
769 | /* | 773 | /* |
774 | * Mark the event as committed, so we do not put_constraint() | ||
775 | * in case new events are added and fail scheduling. | ||
776 | */ | ||
777 | if (!num && assign) { | ||
778 | for (i = 0; i < n; i++) { | ||
779 | e = cpuc->event_list[i]; | ||
780 | e->hw.flags |= PERF_X86_EVENT_COMMITTED; | ||
781 | } | ||
782 | } | ||
783 | /* | ||
770 | * scheduling failed or is just a simulation, | 784 | * scheduling failed or is just a simulation, |
771 | * free resources if necessary | 785 | * free resources if necessary |
772 | */ | 786 | */ |
773 | if (!assign || num) { | 787 | if (!assign || num) { |
774 | for (i = 0; i < n; i++) { | 788 | for (i = 0; i < n; i++) { |
789 | e = cpuc->event_list[i]; | ||
790 | /* | ||
791 | * do not put_constraint() on comitted events, | ||
792 | * because they are good to go | ||
793 | */ | ||
794 | if ((e->hw.flags & PERF_X86_EVENT_COMMITTED)) | ||
795 | continue; | ||
796 | |||
775 | if (x86_pmu.put_event_constraints) | 797 | if (x86_pmu.put_event_constraints) |
776 | x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); | 798 | x86_pmu.put_event_constraints(cpuc, e); |
777 | } | 799 | } |
778 | } | 800 | } |
779 | return num ? -EINVAL : 0; | 801 | return num ? -EINVAL : 0; |
@@ -1153,6 +1175,11 @@ static void x86_pmu_del(struct perf_event *event, int flags) | |||
1153 | int i; | 1175 | int i; |
1154 | 1176 | ||
1155 | /* | 1177 | /* |
1178 | * event is descheduled | ||
1179 | */ | ||
1180 | event->hw.flags &= ~PERF_X86_EVENT_COMMITTED; | ||
1181 | |||
1182 | /* | ||
1156 | * If we're called during a txn, we don't need to do anything. | 1183 | * If we're called during a txn, we don't need to do anything. |
1157 | * The events never got scheduled and ->cancel_txn will truncate | 1184 | * The events never got scheduled and ->cancel_txn will truncate |
1158 | * the event_list. | 1185 | * the event_list. |
@@ -1249,10 +1276,20 @@ void perf_events_lapic_init(void) | |||
1249 | static int __kprobes | 1276 | static int __kprobes |
1250 | perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) | 1277 | perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) |
1251 | { | 1278 | { |
1279 | int ret; | ||
1280 | u64 start_clock; | ||
1281 | u64 finish_clock; | ||
1282 | |||
1252 | if (!atomic_read(&active_events)) | 1283 | if (!atomic_read(&active_events)) |
1253 | return NMI_DONE; | 1284 | return NMI_DONE; |
1254 | 1285 | ||
1255 | return x86_pmu.handle_irq(regs); | 1286 | start_clock = local_clock(); |
1287 | ret = x86_pmu.handle_irq(regs); | ||
1288 | finish_clock = local_clock(); | ||
1289 | |||
1290 | perf_sample_event_took(finish_clock - start_clock); | ||
1291 | |||
1292 | return ret; | ||
1256 | } | 1293 | } |
1257 | 1294 | ||
1258 | struct event_constraint emptyconstraint; | 1295 | struct event_constraint emptyconstraint; |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index ba9aadfa683b..97e557bc4c91 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -63,10 +63,12 @@ struct event_constraint { | |||
63 | int flags; | 63 | int flags; |
64 | }; | 64 | }; |
65 | /* | 65 | /* |
66 | * struct event_constraint flags | 66 | * struct hw_perf_event.flags flags |
67 | */ | 67 | */ |
68 | #define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ | 68 | #define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ |
69 | #define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ | 69 | #define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ |
70 | #define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */ | ||
71 | #define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ | ||
70 | 72 | ||
71 | struct amd_nb { | 73 | struct amd_nb { |
72 | int nb_id; /* NorthBridge id */ | 74 | int nb_id; /* NorthBridge id */ |
@@ -227,11 +229,14 @@ struct cpu_hw_events { | |||
227 | * - inv | 229 | * - inv |
228 | * - edge | 230 | * - edge |
229 | * - cnt-mask | 231 | * - cnt-mask |
232 | * - in_tx | ||
233 | * - in_tx_checkpointed | ||
230 | * The other filters are supported by fixed counters. | 234 | * The other filters are supported by fixed counters. |
231 | * The any-thread option is supported starting with v3. | 235 | * The any-thread option is supported starting with v3. |
232 | */ | 236 | */ |
237 | #define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED) | ||
233 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | 238 | #define FIXED_EVENT_CONSTRAINT(c, n) \ |
234 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) | 239 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS) |
235 | 240 | ||
236 | /* | 241 | /* |
237 | * Constraint on the Event code + UMask | 242 | * Constraint on the Event code + UMask |
@@ -247,6 +252,11 @@ struct cpu_hw_events { | |||
247 | __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ | 252 | __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ |
248 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) | 253 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) |
249 | 254 | ||
255 | /* DataLA version of store sampling without extra enable bit. */ | ||
256 | #define INTEL_PST_HSW_CONSTRAINT(c, n) \ | ||
257 | __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ | ||
258 | HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) | ||
259 | |||
250 | #define EVENT_CONSTRAINT_END \ | 260 | #define EVENT_CONSTRAINT_END \ |
251 | EVENT_CONSTRAINT(0, 0, 0) | 261 | EVENT_CONSTRAINT(0, 0, 0) |
252 | 262 | ||
@@ -301,6 +311,11 @@ union perf_capabilities { | |||
301 | u64 pebs_arch_reg:1; | 311 | u64 pebs_arch_reg:1; |
302 | u64 pebs_format:4; | 312 | u64 pebs_format:4; |
303 | u64 smm_freeze:1; | 313 | u64 smm_freeze:1; |
314 | /* | ||
315 | * PMU supports separate counter range for writing | ||
316 | * values > 32bit. | ||
317 | */ | ||
318 | u64 full_width_write:1; | ||
304 | }; | 319 | }; |
305 | u64 capabilities; | 320 | u64 capabilities; |
306 | }; | 321 | }; |
@@ -375,6 +390,7 @@ struct x86_pmu { | |||
375 | struct event_constraint *event_constraints; | 390 | struct event_constraint *event_constraints; |
376 | struct x86_pmu_quirk *quirks; | 391 | struct x86_pmu_quirk *quirks; |
377 | int perfctr_second_write; | 392 | int perfctr_second_write; |
393 | bool late_ack; | ||
378 | 394 | ||
379 | /* | 395 | /* |
380 | * sysfs attrs | 396 | * sysfs attrs |
@@ -528,7 +544,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | |||
528 | 544 | ||
529 | void x86_pmu_enable_all(int added); | 545 | void x86_pmu_enable_all(int added); |
530 | 546 | ||
531 | int perf_assign_events(struct event_constraint **constraints, int n, | 547 | int perf_assign_events(struct perf_event **events, int n, |
532 | int wmin, int wmax, int *assign); | 548 | int wmin, int wmax, int *assign); |
533 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); | 549 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); |
534 | 550 | ||
@@ -633,6 +649,8 @@ extern struct event_constraint intel_snb_pebs_event_constraints[]; | |||
633 | 649 | ||
634 | extern struct event_constraint intel_ivb_pebs_event_constraints[]; | 650 | extern struct event_constraint intel_ivb_pebs_event_constraints[]; |
635 | 651 | ||
652 | extern struct event_constraint intel_hsw_pebs_event_constraints[]; | ||
653 | |||
636 | struct event_constraint *intel_pebs_constraints(struct perf_event *event); | 654 | struct event_constraint *intel_pebs_constraints(struct perf_event *event); |
637 | 655 | ||
638 | void intel_pmu_pebs_enable(struct perf_event *event); | 656 | void intel_pmu_pebs_enable(struct perf_event *event); |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 7e28d9467bb4..4cbe03287b08 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -648,48 +648,48 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
648 | .cpu_dead = amd_pmu_cpu_dead, | 648 | .cpu_dead = amd_pmu_cpu_dead, |
649 | }; | 649 | }; |
650 | 650 | ||
651 | static int setup_event_constraints(void) | 651 | static int __init amd_core_pmu_init(void) |
652 | { | 652 | { |
653 | if (boot_cpu_data.x86 == 0x15) | 653 | if (!cpu_has_perfctr_core) |
654 | return 0; | ||
655 | |||
656 | switch (boot_cpu_data.x86) { | ||
657 | case 0x15: | ||
658 | pr_cont("Fam15h "); | ||
654 | x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; | 659 | x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; |
655 | return 0; | 660 | break; |
656 | } | ||
657 | 661 | ||
658 | static int setup_perfctr_core(void) | 662 | default: |
659 | { | 663 | pr_err("core perfctr but no constraints; unknown hardware!\n"); |
660 | if (!cpu_has_perfctr_core) { | ||
661 | WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h, | ||
662 | KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!"); | ||
663 | return -ENODEV; | 664 | return -ENODEV; |
664 | } | 665 | } |
665 | 666 | ||
666 | WARN(x86_pmu.get_event_constraints == amd_get_event_constraints, | ||
667 | KERN_ERR "hw perf events core counters need constraints handler!"); | ||
668 | |||
669 | /* | 667 | /* |
670 | * If core performance counter extensions exists, we must use | 668 | * If core performance counter extensions exists, we must use |
671 | * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also | 669 | * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also |
672 | * x86_pmu_addr_offset(). | 670 | * amd_pmu_addr_offset(). |
673 | */ | 671 | */ |
674 | x86_pmu.eventsel = MSR_F15H_PERF_CTL; | 672 | x86_pmu.eventsel = MSR_F15H_PERF_CTL; |
675 | x86_pmu.perfctr = MSR_F15H_PERF_CTR; | 673 | x86_pmu.perfctr = MSR_F15H_PERF_CTR; |
676 | x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; | 674 | x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; |
677 | 675 | ||
678 | printk(KERN_INFO "perf: AMD core performance counters detected\n"); | 676 | pr_cont("core perfctr, "); |
679 | |||
680 | return 0; | 677 | return 0; |
681 | } | 678 | } |
682 | 679 | ||
683 | __init int amd_pmu_init(void) | 680 | __init int amd_pmu_init(void) |
684 | { | 681 | { |
682 | int ret; | ||
683 | |||
685 | /* Performance-monitoring supported from K7 and later: */ | 684 | /* Performance-monitoring supported from K7 and later: */ |
686 | if (boot_cpu_data.x86 < 6) | 685 | if (boot_cpu_data.x86 < 6) |
687 | return -ENODEV; | 686 | return -ENODEV; |
688 | 687 | ||
689 | x86_pmu = amd_pmu; | 688 | x86_pmu = amd_pmu; |
690 | 689 | ||
691 | setup_event_constraints(); | 690 | ret = amd_core_pmu_init(); |
692 | setup_perfctr_core(); | 691 | if (ret) |
692 | return ret; | ||
693 | 693 | ||
694 | /* Events are common for all AMDs */ | 694 | /* Events are common for all AMDs */ |
695 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | 695 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, |
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c new file mode 100644 index 000000000000..0db655ef3918 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.c | |||
@@ -0,0 +1,504 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2013 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Author: Steven Kinney <Steven.Kinney@amd.com> | ||
5 | * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com> | ||
6 | * | ||
7 | * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License version 2 as | ||
11 | * published by the Free Software Foundation. | ||
12 | */ | ||
13 | |||
14 | #include <linux/perf_event.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/cpumask.h> | ||
17 | #include <linux/slab.h> | ||
18 | |||
19 | #include "perf_event.h" | ||
20 | #include "perf_event_amd_iommu.h" | ||
21 | |||
22 | #define COUNTER_SHIFT 16 | ||
23 | |||
24 | #define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8)) | ||
25 | #define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg)) | ||
26 | |||
27 | /* iommu pmu config masks */ | ||
28 | #define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL)) | ||
29 | #define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL) | ||
30 | #define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL) | ||
31 | #define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL) | ||
32 | #define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL) | ||
33 | #define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL) | ||
34 | #define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL) | ||
35 | |||
36 | static struct perf_amd_iommu __perf_iommu; | ||
37 | |||
38 | struct perf_amd_iommu { | ||
39 | struct pmu pmu; | ||
40 | u8 max_banks; | ||
41 | u8 max_counters; | ||
42 | u64 cntr_assign_mask; | ||
43 | raw_spinlock_t lock; | ||
44 | const struct attribute_group *attr_groups[4]; | ||
45 | }; | ||
46 | |||
47 | #define format_group attr_groups[0] | ||
48 | #define cpumask_group attr_groups[1] | ||
49 | #define events_group attr_groups[2] | ||
50 | #define null_group attr_groups[3] | ||
51 | |||
52 | /*--------------------------------------------- | ||
53 | * sysfs format attributes | ||
54 | *---------------------------------------------*/ | ||
55 | PMU_FORMAT_ATTR(csource, "config:0-7"); | ||
56 | PMU_FORMAT_ATTR(devid, "config:8-23"); | ||
57 | PMU_FORMAT_ATTR(pasid, "config:24-39"); | ||
58 | PMU_FORMAT_ATTR(domid, "config:40-55"); | ||
59 | PMU_FORMAT_ATTR(devid_mask, "config1:0-15"); | ||
60 | PMU_FORMAT_ATTR(pasid_mask, "config1:16-31"); | ||
61 | PMU_FORMAT_ATTR(domid_mask, "config1:32-47"); | ||
62 | |||
63 | static struct attribute *iommu_format_attrs[] = { | ||
64 | &format_attr_csource.attr, | ||
65 | &format_attr_devid.attr, | ||
66 | &format_attr_pasid.attr, | ||
67 | &format_attr_domid.attr, | ||
68 | &format_attr_devid_mask.attr, | ||
69 | &format_attr_pasid_mask.attr, | ||
70 | &format_attr_domid_mask.attr, | ||
71 | NULL, | ||
72 | }; | ||
73 | |||
74 | static struct attribute_group amd_iommu_format_group = { | ||
75 | .name = "format", | ||
76 | .attrs = iommu_format_attrs, | ||
77 | }; | ||
78 | |||
79 | /*--------------------------------------------- | ||
80 | * sysfs events attributes | ||
81 | *---------------------------------------------*/ | ||
82 | struct amd_iommu_event_desc { | ||
83 | struct kobj_attribute attr; | ||
84 | const char *event; | ||
85 | }; | ||
86 | |||
87 | static ssize_t _iommu_event_show(struct kobject *kobj, | ||
88 | struct kobj_attribute *attr, char *buf) | ||
89 | { | ||
90 | struct amd_iommu_event_desc *event = | ||
91 | container_of(attr, struct amd_iommu_event_desc, attr); | ||
92 | return sprintf(buf, "%s\n", event->event); | ||
93 | } | ||
94 | |||
95 | #define AMD_IOMMU_EVENT_DESC(_name, _event) \ | ||
96 | { \ | ||
97 | .attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \ | ||
98 | .event = _event, \ | ||
99 | } | ||
100 | |||
101 | static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = { | ||
102 | AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"), | ||
103 | AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"), | ||
104 | AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"), | ||
105 | AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"), | ||
106 | AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"), | ||
107 | AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"), | ||
108 | AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"), | ||
109 | AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"), | ||
110 | AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"), | ||
111 | AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"), | ||
112 | AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"), | ||
113 | AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"), | ||
114 | AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"), | ||
115 | AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"), | ||
116 | AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"), | ||
117 | AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"), | ||
118 | AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"), | ||
119 | AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"), | ||
120 | AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"), | ||
121 | { /* end: all zeroes */ }, | ||
122 | }; | ||
123 | |||
124 | /*--------------------------------------------- | ||
125 | * sysfs cpumask attributes | ||
126 | *---------------------------------------------*/ | ||
127 | static cpumask_t iommu_cpumask; | ||
128 | |||
129 | static ssize_t _iommu_cpumask_show(struct device *dev, | ||
130 | struct device_attribute *attr, | ||
131 | char *buf) | ||
132 | { | ||
133 | int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &iommu_cpumask); | ||
134 | buf[n++] = '\n'; | ||
135 | buf[n] = '\0'; | ||
136 | return n; | ||
137 | } | ||
138 | static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL); | ||
139 | |||
140 | static struct attribute *iommu_cpumask_attrs[] = { | ||
141 | &dev_attr_cpumask.attr, | ||
142 | NULL, | ||
143 | }; | ||
144 | |||
145 | static struct attribute_group amd_iommu_cpumask_group = { | ||
146 | .attrs = iommu_cpumask_attrs, | ||
147 | }; | ||
148 | |||
149 | /*---------------------------------------------*/ | ||
150 | |||
151 | static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu) | ||
152 | { | ||
153 | unsigned long flags; | ||
154 | int shift, bank, cntr, retval; | ||
155 | int max_banks = perf_iommu->max_banks; | ||
156 | int max_cntrs = perf_iommu->max_counters; | ||
157 | |||
158 | raw_spin_lock_irqsave(&perf_iommu->lock, flags); | ||
159 | |||
160 | for (bank = 0, shift = 0; bank < max_banks; bank++) { | ||
161 | for (cntr = 0; cntr < max_cntrs; cntr++) { | ||
162 | shift = bank + (bank*3) + cntr; | ||
163 | if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) { | ||
164 | continue; | ||
165 | } else { | ||
166 | perf_iommu->cntr_assign_mask |= (1ULL<<shift); | ||
167 | retval = ((u16)((u16)bank<<8) | (u8)(cntr)); | ||
168 | goto out; | ||
169 | } | ||
170 | } | ||
171 | } | ||
172 | retval = -ENOSPC; | ||
173 | out: | ||
174 | raw_spin_unlock_irqrestore(&perf_iommu->lock, flags); | ||
175 | return retval; | ||
176 | } | ||
177 | |||
178 | static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu, | ||
179 | u8 bank, u8 cntr) | ||
180 | { | ||
181 | unsigned long flags; | ||
182 | int max_banks, max_cntrs; | ||
183 | int shift = 0; | ||
184 | |||
185 | max_banks = perf_iommu->max_banks; | ||
186 | max_cntrs = perf_iommu->max_counters; | ||
187 | |||
188 | if ((bank > max_banks) || (cntr > max_cntrs)) | ||
189 | return -EINVAL; | ||
190 | |||
191 | shift = bank + cntr + (bank*3); | ||
192 | |||
193 | raw_spin_lock_irqsave(&perf_iommu->lock, flags); | ||
194 | perf_iommu->cntr_assign_mask &= ~(1ULL<<shift); | ||
195 | raw_spin_unlock_irqrestore(&perf_iommu->lock, flags); | ||
196 | |||
197 | return 0; | ||
198 | } | ||
199 | |||
200 | static int perf_iommu_event_init(struct perf_event *event) | ||
201 | { | ||
202 | struct hw_perf_event *hwc = &event->hw; | ||
203 | struct perf_amd_iommu *perf_iommu; | ||
204 | u64 config, config1; | ||
205 | |||
206 | /* test the event attr type check for PMU enumeration */ | ||
207 | if (event->attr.type != event->pmu->type) | ||
208 | return -ENOENT; | ||
209 | |||
210 | /* | ||
211 | * IOMMU counters are shared across all cores. | ||
212 | * Therefore, it does not support per-process mode. | ||
213 | * Also, it does not support event sampling mode. | ||
214 | */ | ||
215 | if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) | ||
216 | return -EINVAL; | ||
217 | |||
218 | /* IOMMU counters do not have usr/os/guest/host bits */ | ||
219 | if (event->attr.exclude_user || event->attr.exclude_kernel || | ||
220 | event->attr.exclude_host || event->attr.exclude_guest) | ||
221 | return -EINVAL; | ||
222 | |||
223 | if (event->cpu < 0) | ||
224 | return -EINVAL; | ||
225 | |||
226 | perf_iommu = &__perf_iommu; | ||
227 | |||
228 | if (event->pmu != &perf_iommu->pmu) | ||
229 | return -ENOENT; | ||
230 | |||
231 | if (perf_iommu) { | ||
232 | config = event->attr.config; | ||
233 | config1 = event->attr.config1; | ||
234 | } else { | ||
235 | return -EINVAL; | ||
236 | } | ||
237 | |||
238 | /* integrate with iommu base devid (0000), assume one iommu */ | ||
239 | perf_iommu->max_banks = | ||
240 | amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID); | ||
241 | perf_iommu->max_counters = | ||
242 | amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID); | ||
243 | if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0)) | ||
244 | return -EINVAL; | ||
245 | |||
246 | /* update the hw_perf_event struct with the iommu config data */ | ||
247 | hwc->config = config; | ||
248 | hwc->extra_reg.config = config1; | ||
249 | |||
250 | return 0; | ||
251 | } | ||
252 | |||
253 | static void perf_iommu_enable_event(struct perf_event *ev) | ||
254 | { | ||
255 | u8 csource = _GET_CSOURCE(ev); | ||
256 | u16 devid = _GET_DEVID(ev); | ||
257 | u64 reg = 0ULL; | ||
258 | |||
259 | reg = csource; | ||
260 | amd_iommu_pc_get_set_reg_val(devid, | ||
261 | _GET_BANK(ev), _GET_CNTR(ev) , | ||
262 | IOMMU_PC_COUNTER_SRC_REG, ®, true); | ||
263 | |||
264 | reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32); | ||
265 | if (reg) | ||
266 | reg |= (1UL << 31); | ||
267 | amd_iommu_pc_get_set_reg_val(devid, | ||
268 | _GET_BANK(ev), _GET_CNTR(ev) , | ||
269 | IOMMU_PC_DEVID_MATCH_REG, ®, true); | ||
270 | |||
271 | reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32); | ||
272 | if (reg) | ||
273 | reg |= (1UL << 31); | ||
274 | amd_iommu_pc_get_set_reg_val(devid, | ||
275 | _GET_BANK(ev), _GET_CNTR(ev) , | ||
276 | IOMMU_PC_PASID_MATCH_REG, ®, true); | ||
277 | |||
278 | reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32); | ||
279 | if (reg) | ||
280 | reg |= (1UL << 31); | ||
281 | amd_iommu_pc_get_set_reg_val(devid, | ||
282 | _GET_BANK(ev), _GET_CNTR(ev) , | ||
283 | IOMMU_PC_DOMID_MATCH_REG, ®, true); | ||
284 | } | ||
285 | |||
286 | static void perf_iommu_disable_event(struct perf_event *event) | ||
287 | { | ||
288 | u64 reg = 0ULL; | ||
289 | |||
290 | amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), | ||
291 | _GET_BANK(event), _GET_CNTR(event), | ||
292 | IOMMU_PC_COUNTER_SRC_REG, ®, true); | ||
293 | } | ||
294 | |||
295 | static void perf_iommu_start(struct perf_event *event, int flags) | ||
296 | { | ||
297 | struct hw_perf_event *hwc = &event->hw; | ||
298 | |||
299 | pr_debug("perf: amd_iommu:perf_iommu_start\n"); | ||
300 | if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) | ||
301 | return; | ||
302 | |||
303 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); | ||
304 | hwc->state = 0; | ||
305 | |||
306 | if (flags & PERF_EF_RELOAD) { | ||
307 | u64 prev_raw_count = local64_read(&hwc->prev_count); | ||
308 | amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), | ||
309 | _GET_BANK(event), _GET_CNTR(event), | ||
310 | IOMMU_PC_COUNTER_REG, &prev_raw_count, true); | ||
311 | } | ||
312 | |||
313 | perf_iommu_enable_event(event); | ||
314 | perf_event_update_userpage(event); | ||
315 | |||
316 | } | ||
317 | |||
318 | static void perf_iommu_read(struct perf_event *event) | ||
319 | { | ||
320 | u64 count = 0ULL; | ||
321 | u64 prev_raw_count = 0ULL; | ||
322 | u64 delta = 0ULL; | ||
323 | struct hw_perf_event *hwc = &event->hw; | ||
324 | pr_debug("perf: amd_iommu:perf_iommu_read\n"); | ||
325 | |||
326 | amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), | ||
327 | _GET_BANK(event), _GET_CNTR(event), | ||
328 | IOMMU_PC_COUNTER_REG, &count, false); | ||
329 | |||
330 | /* IOMMU pc counter register is only 48 bits */ | ||
331 | count &= 0xFFFFFFFFFFFFULL; | ||
332 | |||
333 | prev_raw_count = local64_read(&hwc->prev_count); | ||
334 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
335 | count) != prev_raw_count) | ||
336 | return; | ||
337 | |||
338 | /* Handling 48-bit counter overflowing */ | ||
339 | delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT); | ||
340 | delta >>= COUNTER_SHIFT; | ||
341 | local64_add(delta, &event->count); | ||
342 | |||
343 | } | ||
344 | |||
345 | static void perf_iommu_stop(struct perf_event *event, int flags) | ||
346 | { | ||
347 | struct hw_perf_event *hwc = &event->hw; | ||
348 | u64 config; | ||
349 | |||
350 | pr_debug("perf: amd_iommu:perf_iommu_stop\n"); | ||
351 | |||
352 | if (hwc->state & PERF_HES_UPTODATE) | ||
353 | return; | ||
354 | |||
355 | perf_iommu_disable_event(event); | ||
356 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | ||
357 | hwc->state |= PERF_HES_STOPPED; | ||
358 | |||
359 | if (hwc->state & PERF_HES_UPTODATE) | ||
360 | return; | ||
361 | |||
362 | config = hwc->config; | ||
363 | perf_iommu_read(event); | ||
364 | hwc->state |= PERF_HES_UPTODATE; | ||
365 | } | ||
366 | |||
367 | static int perf_iommu_add(struct perf_event *event, int flags) | ||
368 | { | ||
369 | int retval; | ||
370 | struct perf_amd_iommu *perf_iommu = | ||
371 | container_of(event->pmu, struct perf_amd_iommu, pmu); | ||
372 | |||
373 | pr_debug("perf: amd_iommu:perf_iommu_add\n"); | ||
374 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
375 | |||
376 | /* request an iommu bank/counter */ | ||
377 | retval = get_next_avail_iommu_bnk_cntr(perf_iommu); | ||
378 | if (retval != -ENOSPC) | ||
379 | event->hw.extra_reg.reg = (u16)retval; | ||
380 | else | ||
381 | return retval; | ||
382 | |||
383 | if (flags & PERF_EF_START) | ||
384 | perf_iommu_start(event, PERF_EF_RELOAD); | ||
385 | |||
386 | return 0; | ||
387 | } | ||
388 | |||
389 | static void perf_iommu_del(struct perf_event *event, int flags) | ||
390 | { | ||
391 | struct perf_amd_iommu *perf_iommu = | ||
392 | container_of(event->pmu, struct perf_amd_iommu, pmu); | ||
393 | |||
394 | pr_debug("perf: amd_iommu:perf_iommu_del\n"); | ||
395 | perf_iommu_stop(event, PERF_EF_UPDATE); | ||
396 | |||
397 | /* clear the assigned iommu bank/counter */ | ||
398 | clear_avail_iommu_bnk_cntr(perf_iommu, | ||
399 | _GET_BANK(event), | ||
400 | _GET_CNTR(event)); | ||
401 | |||
402 | perf_event_update_userpage(event); | ||
403 | } | ||
404 | |||
405 | static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu) | ||
406 | { | ||
407 | struct attribute **attrs; | ||
408 | struct attribute_group *attr_group; | ||
409 | int i = 0, j; | ||
410 | |||
411 | while (amd_iommu_v2_event_descs[i].attr.attr.name) | ||
412 | i++; | ||
413 | |||
414 | attr_group = kzalloc(sizeof(struct attribute *) | ||
415 | * (i + 1) + sizeof(*attr_group), GFP_KERNEL); | ||
416 | if (!attr_group) | ||
417 | return -ENOMEM; | ||
418 | |||
419 | attrs = (struct attribute **)(attr_group + 1); | ||
420 | for (j = 0; j < i; j++) | ||
421 | attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr; | ||
422 | |||
423 | attr_group->name = "events"; | ||
424 | attr_group->attrs = attrs; | ||
425 | perf_iommu->events_group = attr_group; | ||
426 | |||
427 | return 0; | ||
428 | } | ||
429 | |||
430 | static __init void amd_iommu_pc_exit(void) | ||
431 | { | ||
432 | if (__perf_iommu.events_group != NULL) { | ||
433 | kfree(__perf_iommu.events_group); | ||
434 | __perf_iommu.events_group = NULL; | ||
435 | } | ||
436 | } | ||
437 | |||
438 | static __init int _init_perf_amd_iommu( | ||
439 | struct perf_amd_iommu *perf_iommu, char *name) | ||
440 | { | ||
441 | int ret; | ||
442 | |||
443 | raw_spin_lock_init(&perf_iommu->lock); | ||
444 | |||
445 | /* Init format attributes */ | ||
446 | perf_iommu->format_group = &amd_iommu_format_group; | ||
447 | |||
448 | /* Init cpumask attributes to only core 0 */ | ||
449 | cpumask_set_cpu(0, &iommu_cpumask); | ||
450 | perf_iommu->cpumask_group = &amd_iommu_cpumask_group; | ||
451 | |||
452 | /* Init events attributes */ | ||
453 | if (_init_events_attrs(perf_iommu) != 0) | ||
454 | pr_err("perf: amd_iommu: Only support raw events.\n"); | ||
455 | |||
456 | /* Init null attributes */ | ||
457 | perf_iommu->null_group = NULL; | ||
458 | perf_iommu->pmu.attr_groups = perf_iommu->attr_groups; | ||
459 | |||
460 | ret = perf_pmu_register(&perf_iommu->pmu, name, -1); | ||
461 | if (ret) { | ||
462 | pr_err("perf: amd_iommu: Failed to initialized.\n"); | ||
463 | amd_iommu_pc_exit(); | ||
464 | } else { | ||
465 | pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n", | ||
466 | amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID), | ||
467 | amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID)); | ||
468 | } | ||
469 | |||
470 | return ret; | ||
471 | } | ||
472 | |||
473 | static struct perf_amd_iommu __perf_iommu = { | ||
474 | .pmu = { | ||
475 | .event_init = perf_iommu_event_init, | ||
476 | .add = perf_iommu_add, | ||
477 | .del = perf_iommu_del, | ||
478 | .start = perf_iommu_start, | ||
479 | .stop = perf_iommu_stop, | ||
480 | .read = perf_iommu_read, | ||
481 | }, | ||
482 | .max_banks = 0x00, | ||
483 | .max_counters = 0x00, | ||
484 | .cntr_assign_mask = 0ULL, | ||
485 | .format_group = NULL, | ||
486 | .cpumask_group = NULL, | ||
487 | .events_group = NULL, | ||
488 | .null_group = NULL, | ||
489 | }; | ||
490 | |||
491 | static __init int amd_iommu_pc_init(void) | ||
492 | { | ||
493 | /* Make sure the IOMMU PC resource is available */ | ||
494 | if (!amd_iommu_pc_supported()) { | ||
495 | pr_err("perf: amd_iommu PMU not installed. No support!\n"); | ||
496 | return -ENODEV; | ||
497 | } | ||
498 | |||
499 | _init_perf_amd_iommu(&__perf_iommu, "amd_iommu"); | ||
500 | |||
501 | return 0; | ||
502 | } | ||
503 | |||
504 | device_initcall(amd_iommu_pc_init); | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/kernel/cpu/perf_event_amd_iommu.h new file mode 100644 index 000000000000..845d173278e3 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.h | |||
@@ -0,0 +1,40 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2013 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Author: Steven Kinney <Steven.Kinney@amd.com> | ||
5 | * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #ifndef _PERF_EVENT_AMD_IOMMU_H_ | ||
13 | #define _PERF_EVENT_AMD_IOMMU_H_ | ||
14 | |||
15 | /* iommu pc mmio region register indexes */ | ||
16 | #define IOMMU_PC_COUNTER_REG 0x00 | ||
17 | #define IOMMU_PC_COUNTER_SRC_REG 0x08 | ||
18 | #define IOMMU_PC_PASID_MATCH_REG 0x10 | ||
19 | #define IOMMU_PC_DOMID_MATCH_REG 0x18 | ||
20 | #define IOMMU_PC_DEVID_MATCH_REG 0x20 | ||
21 | #define IOMMU_PC_COUNTER_REPORT_REG 0x28 | ||
22 | |||
23 | /* maximun specified bank/counters */ | ||
24 | #define PC_MAX_SPEC_BNKS 64 | ||
25 | #define PC_MAX_SPEC_CNTRS 16 | ||
26 | |||
27 | /* iommu pc reg masks*/ | ||
28 | #define IOMMU_BASE_DEVID 0x0000 | ||
29 | |||
30 | /* amd_iommu_init.c external support functions */ | ||
31 | extern bool amd_iommu_pc_supported(void); | ||
32 | |||
33 | extern u8 amd_iommu_pc_get_max_banks(u16 devid); | ||
34 | |||
35 | extern u8 amd_iommu_pc_get_max_counters(u16 devid); | ||
36 | |||
37 | extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, | ||
38 | u8 fxn, u64 *value, bool is_write); | ||
39 | |||
40 | #endif /*_PERF_EVENT_AMD_IOMMU_H_*/ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index f60d41ff9a97..fbc9210b45bc 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/export.h> | 14 | #include <linux/export.h> |
15 | 15 | ||
16 | #include <asm/cpufeature.h> | ||
16 | #include <asm/hardirq.h> | 17 | #include <asm/hardirq.h> |
17 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
18 | 19 | ||
@@ -165,13 +166,13 @@ static struct extra_reg intel_snb_extra_regs[] __read_mostly = { | |||
165 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), | 166 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), |
166 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), | 167 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), |
167 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), | 168 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), |
168 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), | ||
169 | EVENT_EXTRA_END | 169 | EVENT_EXTRA_END |
170 | }; | 170 | }; |
171 | 171 | ||
172 | static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { | 172 | static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { |
173 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), | 173 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), |
174 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), | 174 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), |
175 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), | ||
175 | EVENT_EXTRA_END | 176 | EVENT_EXTRA_END |
176 | }; | 177 | }; |
177 | 178 | ||
@@ -190,6 +191,22 @@ struct attribute *snb_events_attrs[] = { | |||
190 | NULL, | 191 | NULL, |
191 | }; | 192 | }; |
192 | 193 | ||
194 | static struct event_constraint intel_hsw_event_constraints[] = { | ||
195 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
196 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
197 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ | ||
198 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */ | ||
199 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ | ||
200 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ | ||
201 | /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ | ||
202 | INTEL_EVENT_CONSTRAINT(0x08a3, 0x4), | ||
203 | /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ | ||
204 | INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4), | ||
205 | /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ | ||
206 | INTEL_EVENT_CONSTRAINT(0x04a3, 0xf), | ||
207 | EVENT_CONSTRAINT_END | ||
208 | }; | ||
209 | |||
193 | static u64 intel_pmu_event_map(int hw_event) | 210 | static u64 intel_pmu_event_map(int hw_event) |
194 | { | 211 | { |
195 | return intel_perfmon_event_map[hw_event]; | 212 | return intel_perfmon_event_map[hw_event]; |
@@ -872,7 +889,8 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) | |||
872 | return true; | 889 | return true; |
873 | 890 | ||
874 | /* implicit branch sampling to correct PEBS skid */ | 891 | /* implicit branch sampling to correct PEBS skid */ |
875 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | 892 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 && |
893 | x86_pmu.intel_cap.pebs_format < 2) | ||
876 | return true; | 894 | return true; |
877 | 895 | ||
878 | return false; | 896 | return false; |
@@ -1167,15 +1185,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
1167 | cpuc = &__get_cpu_var(cpu_hw_events); | 1185 | cpuc = &__get_cpu_var(cpu_hw_events); |
1168 | 1186 | ||
1169 | /* | 1187 | /* |
1170 | * Some chipsets need to unmask the LVTPC in a particular spot | 1188 | * No known reason to not always do late ACK, |
1171 | * inside the nmi handler. As a result, the unmasking was pushed | 1189 | * but just in case do it opt-in. |
1172 | * into all the nmi handlers. | ||
1173 | * | ||
1174 | * This handler doesn't seem to have any issues with the unmasking | ||
1175 | * so it was left at the top. | ||
1176 | */ | 1190 | */ |
1177 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1191 | if (!x86_pmu.late_ack) |
1178 | 1192 | apic_write(APIC_LVTPC, APIC_DM_NMI); | |
1179 | intel_pmu_disable_all(); | 1193 | intel_pmu_disable_all(); |
1180 | handled = intel_pmu_drain_bts_buffer(); | 1194 | handled = intel_pmu_drain_bts_buffer(); |
1181 | status = intel_pmu_get_status(); | 1195 | status = intel_pmu_get_status(); |
@@ -1188,8 +1202,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
1188 | again: | 1202 | again: |
1189 | intel_pmu_ack_status(status); | 1203 | intel_pmu_ack_status(status); |
1190 | if (++loops > 100) { | 1204 | if (++loops > 100) { |
1191 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); | 1205 | static bool warned = false; |
1192 | perf_event_print_debug(); | 1206 | if (!warned) { |
1207 | WARN(1, "perfevents: irq loop stuck!\n"); | ||
1208 | perf_event_print_debug(); | ||
1209 | warned = true; | ||
1210 | } | ||
1193 | intel_pmu_reset(); | 1211 | intel_pmu_reset(); |
1194 | goto done; | 1212 | goto done; |
1195 | } | 1213 | } |
@@ -1235,6 +1253,13 @@ again: | |||
1235 | 1253 | ||
1236 | done: | 1254 | done: |
1237 | intel_pmu_enable_all(0); | 1255 | intel_pmu_enable_all(0); |
1256 | /* | ||
1257 | * Only unmask the NMI after the overflow counters | ||
1258 | * have been reset. This avoids spurious NMIs on | ||
1259 | * Haswell CPUs. | ||
1260 | */ | ||
1261 | if (x86_pmu.late_ack) | ||
1262 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1238 | return handled; | 1263 | return handled; |
1239 | } | 1264 | } |
1240 | 1265 | ||
@@ -1425,7 +1450,6 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
1425 | if (x86_pmu.event_constraints) { | 1450 | if (x86_pmu.event_constraints) { |
1426 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1451 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
1427 | if ((event->hw.config & c->cmask) == c->code) { | 1452 | if ((event->hw.config & c->cmask) == c->code) { |
1428 | /* hw.flags zeroed at initialization */ | ||
1429 | event->hw.flags |= c->flags; | 1453 | event->hw.flags |= c->flags; |
1430 | return c; | 1454 | return c; |
1431 | } | 1455 | } |
@@ -1473,7 +1497,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, | |||
1473 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | 1497 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
1474 | struct perf_event *event) | 1498 | struct perf_event *event) |
1475 | { | 1499 | { |
1476 | event->hw.flags = 0; | ||
1477 | intel_put_shared_regs_event_constraints(cpuc, event); | 1500 | intel_put_shared_regs_event_constraints(cpuc, event); |
1478 | } | 1501 | } |
1479 | 1502 | ||
@@ -1646,6 +1669,47 @@ static void core_pmu_enable_all(int added) | |||
1646 | } | 1669 | } |
1647 | } | 1670 | } |
1648 | 1671 | ||
1672 | static int hsw_hw_config(struct perf_event *event) | ||
1673 | { | ||
1674 | int ret = intel_pmu_hw_config(event); | ||
1675 | |||
1676 | if (ret) | ||
1677 | return ret; | ||
1678 | if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE)) | ||
1679 | return 0; | ||
1680 | event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); | ||
1681 | |||
1682 | /* | ||
1683 | * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with | ||
1684 | * PEBS or in ANY thread mode. Since the results are non-sensical forbid | ||
1685 | * this combination. | ||
1686 | */ | ||
1687 | if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) && | ||
1688 | ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) || | ||
1689 | event->attr.precise_ip > 0)) | ||
1690 | return -EOPNOTSUPP; | ||
1691 | |||
1692 | return 0; | ||
1693 | } | ||
1694 | |||
1695 | static struct event_constraint counter2_constraint = | ||
1696 | EVENT_CONSTRAINT(0, 0x4, 0); | ||
1697 | |||
1698 | static struct event_constraint * | ||
1699 | hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
1700 | { | ||
1701 | struct event_constraint *c = intel_get_event_constraints(cpuc, event); | ||
1702 | |||
1703 | /* Handle special quirk on in_tx_checkpointed only in counter 2 */ | ||
1704 | if (event->hw.config & HSW_IN_TX_CHECKPOINTED) { | ||
1705 | if (c->idxmsk64 & (1U << 2)) | ||
1706 | return &counter2_constraint; | ||
1707 | return &emptyconstraint; | ||
1708 | } | ||
1709 | |||
1710 | return c; | ||
1711 | } | ||
1712 | |||
1649 | PMU_FORMAT_ATTR(event, "config:0-7" ); | 1713 | PMU_FORMAT_ATTR(event, "config:0-7" ); |
1650 | PMU_FORMAT_ATTR(umask, "config:8-15" ); | 1714 | PMU_FORMAT_ATTR(umask, "config:8-15" ); |
1651 | PMU_FORMAT_ATTR(edge, "config:18" ); | 1715 | PMU_FORMAT_ATTR(edge, "config:18" ); |
@@ -1653,6 +1717,8 @@ PMU_FORMAT_ATTR(pc, "config:19" ); | |||
1653 | PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ | 1717 | PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ |
1654 | PMU_FORMAT_ATTR(inv, "config:23" ); | 1718 | PMU_FORMAT_ATTR(inv, "config:23" ); |
1655 | PMU_FORMAT_ATTR(cmask, "config:24-31" ); | 1719 | PMU_FORMAT_ATTR(cmask, "config:24-31" ); |
1720 | PMU_FORMAT_ATTR(in_tx, "config:32"); | ||
1721 | PMU_FORMAT_ATTR(in_tx_cp, "config:33"); | ||
1656 | 1722 | ||
1657 | static struct attribute *intel_arch_formats_attr[] = { | 1723 | static struct attribute *intel_arch_formats_attr[] = { |
1658 | &format_attr_event.attr, | 1724 | &format_attr_event.attr, |
@@ -1807,6 +1873,8 @@ static struct attribute *intel_arch3_formats_attr[] = { | |||
1807 | &format_attr_any.attr, | 1873 | &format_attr_any.attr, |
1808 | &format_attr_inv.attr, | 1874 | &format_attr_inv.attr, |
1809 | &format_attr_cmask.attr, | 1875 | &format_attr_cmask.attr, |
1876 | &format_attr_in_tx.attr, | ||
1877 | &format_attr_in_tx_cp.attr, | ||
1810 | 1878 | ||
1811 | &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ | 1879 | &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ |
1812 | &format_attr_ldlat.attr, /* PEBS load latency */ | 1880 | &format_attr_ldlat.attr, /* PEBS load latency */ |
@@ -1966,6 +2034,15 @@ static __init void intel_nehalem_quirk(void) | |||
1966 | } | 2034 | } |
1967 | } | 2035 | } |
1968 | 2036 | ||
2037 | EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); | ||
2038 | EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") | ||
2039 | |||
2040 | static struct attribute *hsw_events_attrs[] = { | ||
2041 | EVENT_PTR(mem_ld_hsw), | ||
2042 | EVENT_PTR(mem_st_hsw), | ||
2043 | NULL | ||
2044 | }; | ||
2045 | |||
1969 | __init int intel_pmu_init(void) | 2046 | __init int intel_pmu_init(void) |
1970 | { | 2047 | { |
1971 | union cpuid10_edx edx; | 2048 | union cpuid10_edx edx; |
@@ -2189,6 +2266,30 @@ __init int intel_pmu_init(void) | |||
2189 | break; | 2266 | break; |
2190 | 2267 | ||
2191 | 2268 | ||
2269 | case 60: /* Haswell Client */ | ||
2270 | case 70: | ||
2271 | case 71: | ||
2272 | case 63: | ||
2273 | x86_pmu.late_ack = true; | ||
2274 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); | ||
2275 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); | ||
2276 | |||
2277 | intel_pmu_lbr_init_snb(); | ||
2278 | |||
2279 | x86_pmu.event_constraints = intel_hsw_event_constraints; | ||
2280 | x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; | ||
2281 | x86_pmu.extra_regs = intel_snb_extra_regs; | ||
2282 | x86_pmu.pebs_aliases = intel_pebs_aliases_snb; | ||
2283 | /* all extra regs are per-cpu when HT is on */ | ||
2284 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | ||
2285 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; | ||
2286 | |||
2287 | x86_pmu.hw_config = hsw_hw_config; | ||
2288 | x86_pmu.get_event_constraints = hsw_get_event_constraints; | ||
2289 | x86_pmu.cpu_events = hsw_events_attrs; | ||
2290 | pr_cont("Haswell events, "); | ||
2291 | break; | ||
2292 | |||
2192 | default: | 2293 | default: |
2193 | switch (x86_pmu.version) { | 2294 | switch (x86_pmu.version) { |
2194 | case 1: | 2295 | case 1: |
@@ -2227,7 +2328,7 @@ __init int intel_pmu_init(void) | |||
2227 | * counter, so do not extend mask to generic counters | 2328 | * counter, so do not extend mask to generic counters |
2228 | */ | 2329 | */ |
2229 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 2330 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
2230 | if (c->cmask != X86_RAW_EVENT_MASK | 2331 | if (c->cmask != FIXED_EVENT_FLAGS |
2231 | || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { | 2332 | || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { |
2232 | continue; | 2333 | continue; |
2233 | } | 2334 | } |
@@ -2237,5 +2338,12 @@ __init int intel_pmu_init(void) | |||
2237 | } | 2338 | } |
2238 | } | 2339 | } |
2239 | 2340 | ||
2341 | /* Support full width counters using alternative MSR range */ | ||
2342 | if (x86_pmu.intel_cap.full_width_write) { | ||
2343 | x86_pmu.max_period = x86_pmu.cntval_mask; | ||
2344 | x86_pmu.perfctr = MSR_IA32_PMC0; | ||
2345 | pr_cont("full-width counters, "); | ||
2346 | } | ||
2347 | |||
2240 | return 0; | 2348 | return 0; |
2241 | } | 2349 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 60250f687052..3065c57a63c1 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -107,6 +107,19 @@ static u64 precise_store_data(u64 status) | |||
107 | return val; | 107 | return val; |
108 | } | 108 | } |
109 | 109 | ||
110 | static u64 precise_store_data_hsw(u64 status) | ||
111 | { | ||
112 | union perf_mem_data_src dse; | ||
113 | |||
114 | dse.val = 0; | ||
115 | dse.mem_op = PERF_MEM_OP_STORE; | ||
116 | dse.mem_lvl = PERF_MEM_LVL_NA; | ||
117 | if (status & 1) | ||
118 | dse.mem_lvl = PERF_MEM_LVL_L1; | ||
119 | /* Nothing else supported. Sorry. */ | ||
120 | return dse.val; | ||
121 | } | ||
122 | |||
110 | static u64 load_latency_data(u64 status) | 123 | static u64 load_latency_data(u64 status) |
111 | { | 124 | { |
112 | union intel_x86_pebs_dse dse; | 125 | union intel_x86_pebs_dse dse; |
@@ -165,6 +178,22 @@ struct pebs_record_nhm { | |||
165 | u64 status, dla, dse, lat; | 178 | u64 status, dla, dse, lat; |
166 | }; | 179 | }; |
167 | 180 | ||
181 | /* | ||
182 | * Same as pebs_record_nhm, with two additional fields. | ||
183 | */ | ||
184 | struct pebs_record_hsw { | ||
185 | struct pebs_record_nhm nhm; | ||
186 | /* | ||
187 | * Real IP of the event. In the Intel documentation this | ||
188 | * is called eventingrip. | ||
189 | */ | ||
190 | u64 real_ip; | ||
191 | /* | ||
192 | * TSX tuning information field: abort cycles and abort flags. | ||
193 | */ | ||
194 | u64 tsx_tuning; | ||
195 | }; | ||
196 | |||
168 | void init_debug_store_on_cpu(int cpu) | 197 | void init_debug_store_on_cpu(int cpu) |
169 | { | 198 | { |
170 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | 199 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; |
@@ -548,6 +577,42 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = { | |||
548 | EVENT_CONSTRAINT_END | 577 | EVENT_CONSTRAINT_END |
549 | }; | 578 | }; |
550 | 579 | ||
580 | struct event_constraint intel_hsw_pebs_event_constraints[] = { | ||
581 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ | ||
582 | INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ | ||
583 | INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ | ||
584 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ | ||
585 | INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */ | ||
586 | INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */ | ||
587 | INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */ | ||
588 | INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */ | ||
589 | /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ | ||
590 | INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), | ||
591 | /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ | ||
592 | INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), | ||
593 | INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ | ||
594 | INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ | ||
595 | /* MEM_UOPS_RETIRED.SPLIT_STORES */ | ||
596 | INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), | ||
597 | INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ | ||
598 | INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ | ||
599 | INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */ | ||
600 | INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */ | ||
601 | INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */ | ||
602 | /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */ | ||
603 | INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf), | ||
604 | /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */ | ||
605 | INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf), | ||
606 | /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */ | ||
607 | INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf), | ||
608 | /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */ | ||
609 | INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf), | ||
610 | INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */ | ||
611 | INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */ | ||
612 | |||
613 | EVENT_CONSTRAINT_END | ||
614 | }; | ||
615 | |||
551 | struct event_constraint *intel_pebs_constraints(struct perf_event *event) | 616 | struct event_constraint *intel_pebs_constraints(struct perf_event *event) |
552 | { | 617 | { |
553 | struct event_constraint *c; | 618 | struct event_constraint *c; |
@@ -588,6 +653,12 @@ void intel_pmu_pebs_disable(struct perf_event *event) | |||
588 | struct hw_perf_event *hwc = &event->hw; | 653 | struct hw_perf_event *hwc = &event->hw; |
589 | 654 | ||
590 | cpuc->pebs_enabled &= ~(1ULL << hwc->idx); | 655 | cpuc->pebs_enabled &= ~(1ULL << hwc->idx); |
656 | |||
657 | if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT) | ||
658 | cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); | ||
659 | else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST) | ||
660 | cpuc->pebs_enabled &= ~(1ULL << 63); | ||
661 | |||
591 | if (cpuc->enabled) | 662 | if (cpuc->enabled) |
592 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | 663 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); |
593 | 664 | ||
@@ -697,6 +768,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
697 | */ | 768 | */ |
698 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 769 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
699 | struct pebs_record_nhm *pebs = __pebs; | 770 | struct pebs_record_nhm *pebs = __pebs; |
771 | struct pebs_record_hsw *pebs_hsw = __pebs; | ||
700 | struct perf_sample_data data; | 772 | struct perf_sample_data data; |
701 | struct pt_regs regs; | 773 | struct pt_regs regs; |
702 | u64 sample_type; | 774 | u64 sample_type; |
@@ -706,7 +778,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
706 | return; | 778 | return; |
707 | 779 | ||
708 | fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; | 780 | fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; |
709 | fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST; | 781 | fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST | |
782 | PERF_X86_EVENT_PEBS_ST_HSW); | ||
710 | 783 | ||
711 | perf_sample_data_init(&data, 0, event->hw.last_period); | 784 | perf_sample_data_init(&data, 0, event->hw.last_period); |
712 | 785 | ||
@@ -717,9 +790,6 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
717 | * if PEBS-LL or PreciseStore | 790 | * if PEBS-LL or PreciseStore |
718 | */ | 791 | */ |
719 | if (fll || fst) { | 792 | if (fll || fst) { |
720 | if (sample_type & PERF_SAMPLE_ADDR) | ||
721 | data.addr = pebs->dla; | ||
722 | |||
723 | /* | 793 | /* |
724 | * Use latency for weight (only avail with PEBS-LL) | 794 | * Use latency for weight (only avail with PEBS-LL) |
725 | */ | 795 | */ |
@@ -732,6 +802,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
732 | if (sample_type & PERF_SAMPLE_DATA_SRC) { | 802 | if (sample_type & PERF_SAMPLE_DATA_SRC) { |
733 | if (fll) | 803 | if (fll) |
734 | data.data_src.val = load_latency_data(pebs->dse); | 804 | data.data_src.val = load_latency_data(pebs->dse); |
805 | else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) | ||
806 | data.data_src.val = | ||
807 | precise_store_data_hsw(pebs->dse); | ||
735 | else | 808 | else |
736 | data.data_src.val = precise_store_data(pebs->dse); | 809 | data.data_src.val = precise_store_data(pebs->dse); |
737 | } | 810 | } |
@@ -753,11 +826,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
753 | regs.bp = pebs->bp; | 826 | regs.bp = pebs->bp; |
754 | regs.sp = pebs->sp; | 827 | regs.sp = pebs->sp; |
755 | 828 | ||
756 | if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) | 829 | if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { |
830 | regs.ip = pebs_hsw->real_ip; | ||
831 | regs.flags |= PERF_EFLAGS_EXACT; | ||
832 | } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) | ||
757 | regs.flags |= PERF_EFLAGS_EXACT; | 833 | regs.flags |= PERF_EFLAGS_EXACT; |
758 | else | 834 | else |
759 | regs.flags &= ~PERF_EFLAGS_EXACT; | 835 | regs.flags &= ~PERF_EFLAGS_EXACT; |
760 | 836 | ||
837 | if ((event->attr.sample_type & PERF_SAMPLE_ADDR) && | ||
838 | x86_pmu.intel_cap.pebs_format >= 1) | ||
839 | data.addr = pebs->dla; | ||
840 | |||
761 | if (has_branch_stack(event)) | 841 | if (has_branch_stack(event)) |
762 | data.br_stack = &cpuc->lbr_stack; | 842 | data.br_stack = &cpuc->lbr_stack; |
763 | 843 | ||
@@ -806,35 +886,22 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | |||
806 | __intel_pmu_pebs_event(event, iregs, at); | 886 | __intel_pmu_pebs_event(event, iregs, at); |
807 | } | 887 | } |
808 | 888 | ||
809 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | 889 | static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, |
890 | void *top) | ||
810 | { | 891 | { |
811 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 892 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
812 | struct debug_store *ds = cpuc->ds; | 893 | struct debug_store *ds = cpuc->ds; |
813 | struct pebs_record_nhm *at, *top; | ||
814 | struct perf_event *event = NULL; | 894 | struct perf_event *event = NULL; |
815 | u64 status = 0; | 895 | u64 status = 0; |
816 | int bit, n; | 896 | int bit; |
817 | |||
818 | if (!x86_pmu.pebs_active) | ||
819 | return; | ||
820 | |||
821 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
822 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
823 | 897 | ||
824 | ds->pebs_index = ds->pebs_buffer_base; | 898 | ds->pebs_index = ds->pebs_buffer_base; |
825 | 899 | ||
826 | n = top - at; | 900 | for (; at < top; at += x86_pmu.pebs_record_size) { |
827 | if (n <= 0) | 901 | struct pebs_record_nhm *p = at; |
828 | return; | ||
829 | |||
830 | /* | ||
831 | * Should not happen, we program the threshold at 1 and do not | ||
832 | * set a reset value. | ||
833 | */ | ||
834 | WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n); | ||
835 | 902 | ||
836 | for ( ; at < top; at++) { | 903 | for_each_set_bit(bit, (unsigned long *)&p->status, |
837 | for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) { | 904 | x86_pmu.max_pebs_events) { |
838 | event = cpuc->events[bit]; | 905 | event = cpuc->events[bit]; |
839 | if (!test_bit(bit, cpuc->active_mask)) | 906 | if (!test_bit(bit, cpuc->active_mask)) |
840 | continue; | 907 | continue; |
@@ -857,6 +924,61 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | |||
857 | } | 924 | } |
858 | } | 925 | } |
859 | 926 | ||
927 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | ||
928 | { | ||
929 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
930 | struct debug_store *ds = cpuc->ds; | ||
931 | struct pebs_record_nhm *at, *top; | ||
932 | int n; | ||
933 | |||
934 | if (!x86_pmu.pebs_active) | ||
935 | return; | ||
936 | |||
937 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
938 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
939 | |||
940 | ds->pebs_index = ds->pebs_buffer_base; | ||
941 | |||
942 | n = top - at; | ||
943 | if (n <= 0) | ||
944 | return; | ||
945 | |||
946 | /* | ||
947 | * Should not happen, we program the threshold at 1 and do not | ||
948 | * set a reset value. | ||
949 | */ | ||
950 | WARN_ONCE(n > x86_pmu.max_pebs_events, | ||
951 | "Unexpected number of pebs records %d\n", n); | ||
952 | |||
953 | return __intel_pmu_drain_pebs_nhm(iregs, at, top); | ||
954 | } | ||
955 | |||
956 | static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs) | ||
957 | { | ||
958 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
959 | struct debug_store *ds = cpuc->ds; | ||
960 | struct pebs_record_hsw *at, *top; | ||
961 | int n; | ||
962 | |||
963 | if (!x86_pmu.pebs_active) | ||
964 | return; | ||
965 | |||
966 | at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base; | ||
967 | top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index; | ||
968 | |||
969 | n = top - at; | ||
970 | if (n <= 0) | ||
971 | return; | ||
972 | /* | ||
973 | * Should not happen, we program the threshold at 1 and do not | ||
974 | * set a reset value. | ||
975 | */ | ||
976 | WARN_ONCE(n > x86_pmu.max_pebs_events, | ||
977 | "Unexpected number of pebs records %d\n", n); | ||
978 | |||
979 | return __intel_pmu_drain_pebs_nhm(iregs, at, top); | ||
980 | } | ||
981 | |||
860 | /* | 982 | /* |
861 | * BTS, PEBS probe and setup | 983 | * BTS, PEBS probe and setup |
862 | */ | 984 | */ |
@@ -888,6 +1010,12 @@ void intel_ds_init(void) | |||
888 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; | 1010 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; |
889 | break; | 1011 | break; |
890 | 1012 | ||
1013 | case 2: | ||
1014 | pr_cont("PEBS fmt2%c, ", pebs_type); | ||
1015 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); | ||
1016 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw; | ||
1017 | break; | ||
1018 | |||
891 | default: | 1019 | default: |
892 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); | 1020 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); |
893 | x86_pmu.pebs = 0; | 1021 | x86_pmu.pebs = 0; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index d978353c939b..d5be06a5005e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -12,6 +12,16 @@ enum { | |||
12 | LBR_FORMAT_LIP = 0x01, | 12 | LBR_FORMAT_LIP = 0x01, |
13 | LBR_FORMAT_EIP = 0x02, | 13 | LBR_FORMAT_EIP = 0x02, |
14 | LBR_FORMAT_EIP_FLAGS = 0x03, | 14 | LBR_FORMAT_EIP_FLAGS = 0x03, |
15 | LBR_FORMAT_EIP_FLAGS2 = 0x04, | ||
16 | LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_EIP_FLAGS2, | ||
17 | }; | ||
18 | |||
19 | static enum { | ||
20 | LBR_EIP_FLAGS = 1, | ||
21 | LBR_TSX = 2, | ||
22 | } lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = { | ||
23 | [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS, | ||
24 | [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX, | ||
15 | }; | 25 | }; |
16 | 26 | ||
17 | /* | 27 | /* |
@@ -56,6 +66,8 @@ enum { | |||
56 | LBR_FAR) | 66 | LBR_FAR) |
57 | 67 | ||
58 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | 68 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) |
69 | #define LBR_FROM_FLAG_IN_TX (1ULL << 62) | ||
70 | #define LBR_FROM_FLAG_ABORT (1ULL << 61) | ||
59 | 71 | ||
60 | #define for_each_branch_sample_type(x) \ | 72 | #define for_each_branch_sample_type(x) \ |
61 | for ((x) = PERF_SAMPLE_BRANCH_USER; \ | 73 | for ((x) = PERF_SAMPLE_BRANCH_USER; \ |
@@ -81,9 +93,13 @@ enum { | |||
81 | X86_BR_JMP = 1 << 9, /* jump */ | 93 | X86_BR_JMP = 1 << 9, /* jump */ |
82 | X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ | 94 | X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ |
83 | X86_BR_IND_CALL = 1 << 11,/* indirect calls */ | 95 | X86_BR_IND_CALL = 1 << 11,/* indirect calls */ |
96 | X86_BR_ABORT = 1 << 12,/* transaction abort */ | ||
97 | X86_BR_IN_TX = 1 << 13,/* in transaction */ | ||
98 | X86_BR_NO_TX = 1 << 14,/* not in transaction */ | ||
84 | }; | 99 | }; |
85 | 100 | ||
86 | #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) | 101 | #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) |
102 | #define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX) | ||
87 | 103 | ||
88 | #define X86_BR_ANY \ | 104 | #define X86_BR_ANY \ |
89 | (X86_BR_CALL |\ | 105 | (X86_BR_CALL |\ |
@@ -95,6 +111,7 @@ enum { | |||
95 | X86_BR_JCC |\ | 111 | X86_BR_JCC |\ |
96 | X86_BR_JMP |\ | 112 | X86_BR_JMP |\ |
97 | X86_BR_IRQ |\ | 113 | X86_BR_IRQ |\ |
114 | X86_BR_ABORT |\ | ||
98 | X86_BR_IND_CALL) | 115 | X86_BR_IND_CALL) |
99 | 116 | ||
100 | #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) | 117 | #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) |
@@ -270,21 +287,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | |||
270 | 287 | ||
271 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | 288 | for (i = 0; i < x86_pmu.lbr_nr; i++) { |
272 | unsigned long lbr_idx = (tos - i) & mask; | 289 | unsigned long lbr_idx = (tos - i) & mask; |
273 | u64 from, to, mis = 0, pred = 0; | 290 | u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; |
291 | int skip = 0; | ||
292 | int lbr_flags = lbr_desc[lbr_format]; | ||
274 | 293 | ||
275 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | 294 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); |
276 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | 295 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); |
277 | 296 | ||
278 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { | 297 | if (lbr_flags & LBR_EIP_FLAGS) { |
279 | mis = !!(from & LBR_FROM_FLAG_MISPRED); | 298 | mis = !!(from & LBR_FROM_FLAG_MISPRED); |
280 | pred = !mis; | 299 | pred = !mis; |
281 | from = (u64)((((s64)from) << 1) >> 1); | 300 | skip = 1; |
301 | } | ||
302 | if (lbr_flags & LBR_TSX) { | ||
303 | in_tx = !!(from & LBR_FROM_FLAG_IN_TX); | ||
304 | abort = !!(from & LBR_FROM_FLAG_ABORT); | ||
305 | skip = 3; | ||
282 | } | 306 | } |
307 | from = (u64)((((s64)from) << skip) >> skip); | ||
283 | 308 | ||
284 | cpuc->lbr_entries[i].from = from; | 309 | cpuc->lbr_entries[i].from = from; |
285 | cpuc->lbr_entries[i].to = to; | 310 | cpuc->lbr_entries[i].to = to; |
286 | cpuc->lbr_entries[i].mispred = mis; | 311 | cpuc->lbr_entries[i].mispred = mis; |
287 | cpuc->lbr_entries[i].predicted = pred; | 312 | cpuc->lbr_entries[i].predicted = pred; |
313 | cpuc->lbr_entries[i].in_tx = in_tx; | ||
314 | cpuc->lbr_entries[i].abort = abort; | ||
288 | cpuc->lbr_entries[i].reserved = 0; | 315 | cpuc->lbr_entries[i].reserved = 0; |
289 | } | 316 | } |
290 | cpuc->lbr_stack.nr = i; | 317 | cpuc->lbr_stack.nr = i; |
@@ -310,7 +337,7 @@ void intel_pmu_lbr_read(void) | |||
310 | * - in case there is no HW filter | 337 | * - in case there is no HW filter |
311 | * - in case the HW filter has errata or limitations | 338 | * - in case the HW filter has errata or limitations |
312 | */ | 339 | */ |
313 | static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) | 340 | static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) |
314 | { | 341 | { |
315 | u64 br_type = event->attr.branch_sample_type; | 342 | u64 br_type = event->attr.branch_sample_type; |
316 | int mask = 0; | 343 | int mask = 0; |
@@ -318,11 +345,8 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) | |||
318 | if (br_type & PERF_SAMPLE_BRANCH_USER) | 345 | if (br_type & PERF_SAMPLE_BRANCH_USER) |
319 | mask |= X86_BR_USER; | 346 | mask |= X86_BR_USER; |
320 | 347 | ||
321 | if (br_type & PERF_SAMPLE_BRANCH_KERNEL) { | 348 | if (br_type & PERF_SAMPLE_BRANCH_KERNEL) |
322 | if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) | ||
323 | return -EACCES; | ||
324 | mask |= X86_BR_KERNEL; | 349 | mask |= X86_BR_KERNEL; |
325 | } | ||
326 | 350 | ||
327 | /* we ignore BRANCH_HV here */ | 351 | /* we ignore BRANCH_HV here */ |
328 | 352 | ||
@@ -337,13 +361,21 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) | |||
337 | 361 | ||
338 | if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) | 362 | if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) |
339 | mask |= X86_BR_IND_CALL; | 363 | mask |= X86_BR_IND_CALL; |
364 | |||
365 | if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX) | ||
366 | mask |= X86_BR_ABORT; | ||
367 | |||
368 | if (br_type & PERF_SAMPLE_BRANCH_IN_TX) | ||
369 | mask |= X86_BR_IN_TX; | ||
370 | |||
371 | if (br_type & PERF_SAMPLE_BRANCH_NO_TX) | ||
372 | mask |= X86_BR_NO_TX; | ||
373 | |||
340 | /* | 374 | /* |
341 | * stash actual user request into reg, it may | 375 | * stash actual user request into reg, it may |
342 | * be used by fixup code for some CPU | 376 | * be used by fixup code for some CPU |
343 | */ | 377 | */ |
344 | event->hw.branch_reg.reg = mask; | 378 | event->hw.branch_reg.reg = mask; |
345 | |||
346 | return 0; | ||
347 | } | 379 | } |
348 | 380 | ||
349 | /* | 381 | /* |
@@ -391,9 +423,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event) | |||
391 | /* | 423 | /* |
392 | * setup SW LBR filter | 424 | * setup SW LBR filter |
393 | */ | 425 | */ |
394 | ret = intel_pmu_setup_sw_lbr_filter(event); | 426 | intel_pmu_setup_sw_lbr_filter(event); |
395 | if (ret) | ||
396 | return ret; | ||
397 | 427 | ||
398 | /* | 428 | /* |
399 | * setup HW LBR filter, if any | 429 | * setup HW LBR filter, if any |
@@ -415,7 +445,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event) | |||
415 | * decoded (e.g., text page not present), then X86_BR_NONE is | 445 | * decoded (e.g., text page not present), then X86_BR_NONE is |
416 | * returned. | 446 | * returned. |
417 | */ | 447 | */ |
418 | static int branch_type(unsigned long from, unsigned long to) | 448 | static int branch_type(unsigned long from, unsigned long to, int abort) |
419 | { | 449 | { |
420 | struct insn insn; | 450 | struct insn insn; |
421 | void *addr; | 451 | void *addr; |
@@ -435,6 +465,9 @@ static int branch_type(unsigned long from, unsigned long to) | |||
435 | if (from == 0 || to == 0) | 465 | if (from == 0 || to == 0) |
436 | return X86_BR_NONE; | 466 | return X86_BR_NONE; |
437 | 467 | ||
468 | if (abort) | ||
469 | return X86_BR_ABORT | to_plm; | ||
470 | |||
438 | if (from_plm == X86_BR_USER) { | 471 | if (from_plm == X86_BR_USER) { |
439 | /* | 472 | /* |
440 | * can happen if measuring at the user level only | 473 | * can happen if measuring at the user level only |
@@ -581,7 +614,13 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) | |||
581 | from = cpuc->lbr_entries[i].from; | 614 | from = cpuc->lbr_entries[i].from; |
582 | to = cpuc->lbr_entries[i].to; | 615 | to = cpuc->lbr_entries[i].to; |
583 | 616 | ||
584 | type = branch_type(from, to); | 617 | type = branch_type(from, to, cpuc->lbr_entries[i].abort); |
618 | if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) { | ||
619 | if (cpuc->lbr_entries[i].in_tx) | ||
620 | type |= X86_BR_IN_TX; | ||
621 | else | ||
622 | type |= X86_BR_NO_TX; | ||
623 | } | ||
585 | 624 | ||
586 | /* if type does not correspond, then discard */ | 625 | /* if type does not correspond, then discard */ |
587 | if (type == X86_BR_NONE || (br_sel & type) != type) { | 626 | if (type == X86_BR_NONE || (br_sel & type) != type) { |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 52441a2af538..9dd99751ccf9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
@@ -536,7 +536,7 @@ __snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *eve | |||
536 | if (!uncore_box_is_fake(box)) | 536 | if (!uncore_box_is_fake(box)) |
537 | reg1->alloc |= alloc; | 537 | reg1->alloc |= alloc; |
538 | 538 | ||
539 | return 0; | 539 | return NULL; |
540 | fail: | 540 | fail: |
541 | for (; i >= 0; i--) { | 541 | for (; i >= 0; i--) { |
542 | if (alloc & (0x1 << i)) | 542 | if (alloc & (0x1 << i)) |
@@ -644,7 +644,7 @@ snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event) | |||
644 | (!uncore_box_is_fake(box) && reg1->alloc)) | 644 | (!uncore_box_is_fake(box) && reg1->alloc)) |
645 | return NULL; | 645 | return NULL; |
646 | again: | 646 | again: |
647 | mask = 0xff << (idx * 8); | 647 | mask = 0xffULL << (idx * 8); |
648 | raw_spin_lock_irqsave(&er->lock, flags); | 648 | raw_spin_lock_irqsave(&er->lock, flags); |
649 | if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) || | 649 | if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) || |
650 | !((config1 ^ er->config) & mask)) { | 650 | !((config1 ^ er->config) & mask)) { |
@@ -1923,7 +1923,7 @@ static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modif | |||
1923 | { | 1923 | { |
1924 | struct hw_perf_event *hwc = &event->hw; | 1924 | struct hw_perf_event *hwc = &event->hw; |
1925 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; | 1925 | struct hw_perf_event_extra *reg1 = &hwc->extra_reg; |
1926 | int idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8); | 1926 | u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8); |
1927 | u64 config = reg1->config; | 1927 | u64 config = reg1->config; |
1928 | 1928 | ||
1929 | /* get the non-shared control bits and shift them */ | 1929 | /* get the non-shared control bits and shift them */ |
@@ -2723,15 +2723,16 @@ static void uncore_put_event_constraint(struct intel_uncore_box *box, struct per | |||
2723 | static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) | 2723 | static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) |
2724 | { | 2724 | { |
2725 | unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; | 2725 | unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; |
2726 | struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX]; | 2726 | struct event_constraint *c; |
2727 | int i, wmin, wmax, ret = 0; | 2727 | int i, wmin, wmax, ret = 0; |
2728 | struct hw_perf_event *hwc; | 2728 | struct hw_perf_event *hwc; |
2729 | 2729 | ||
2730 | bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); | 2730 | bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); |
2731 | 2731 | ||
2732 | for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { | 2732 | for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { |
2733 | hwc = &box->event_list[i]->hw; | ||
2733 | c = uncore_get_event_constraint(box, box->event_list[i]); | 2734 | c = uncore_get_event_constraint(box, box->event_list[i]); |
2734 | constraints[i] = c; | 2735 | hwc->constraint = c; |
2735 | wmin = min(wmin, c->weight); | 2736 | wmin = min(wmin, c->weight); |
2736 | wmax = max(wmax, c->weight); | 2737 | wmax = max(wmax, c->weight); |
2737 | } | 2738 | } |
@@ -2739,7 +2740,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int | |||
2739 | /* fastpath, try to reuse previous register */ | 2740 | /* fastpath, try to reuse previous register */ |
2740 | for (i = 0; i < n; i++) { | 2741 | for (i = 0; i < n; i++) { |
2741 | hwc = &box->event_list[i]->hw; | 2742 | hwc = &box->event_list[i]->hw; |
2742 | c = constraints[i]; | 2743 | c = hwc->constraint; |
2743 | 2744 | ||
2744 | /* never assigned */ | 2745 | /* never assigned */ |
2745 | if (hwc->idx == -1) | 2746 | if (hwc->idx == -1) |
@@ -2759,7 +2760,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int | |||
2759 | } | 2760 | } |
2760 | /* slow path */ | 2761 | /* slow path */ |
2761 | if (i != n) | 2762 | if (i != n) |
2762 | ret = perf_assign_events(constraints, n, wmin, wmax, assign); | 2763 | ret = perf_assign_events(box->event_list, n, |
2764 | wmin, wmax, assign); | ||
2763 | 2765 | ||
2764 | if (!assign || ret) { | 2766 | if (!assign || ret) { |
2765 | for (i = 0; i < n; i++) | 2767 | for (i = 0; i < n; i++) |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index f9528917f6e8..47b3d00c9d89 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h | |||
@@ -337,10 +337,10 @@ | |||
337 | NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK) | 337 | NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK) |
338 | 338 | ||
339 | #define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23)) | 339 | #define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23)) |
340 | #define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (11 + 3 * (n))) | 340 | #define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n))) |
341 | 341 | ||
342 | #define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24)) | 342 | #define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24)) |
343 | #define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (12 + 3 * (n))) | 343 | #define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n))) |
344 | 344 | ||
345 | /* | 345 | /* |
346 | * use the 9~13 bits to select event If the 7th bit is not set, | 346 | * use the 9~13 bits to select event If the 7th bit is not set, |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 08f7e8039099..321d65ebaffe 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -115,8 +115,10 @@ startup_64: | |||
115 | movq %rdi, %rax | 115 | movq %rdi, %rax |
116 | shrq $PUD_SHIFT, %rax | 116 | shrq $PUD_SHIFT, %rax |
117 | andl $(PTRS_PER_PUD-1), %eax | 117 | andl $(PTRS_PER_PUD-1), %eax |
118 | movq %rdx, (4096+0)(%rbx,%rax,8) | 118 | movq %rdx, 4096(%rbx,%rax,8) |
119 | movq %rdx, (4096+8)(%rbx,%rax,8) | 119 | incl %eax |
120 | andl $(PTRS_PER_PUD-1), %eax | ||
121 | movq %rdx, 4096(%rbx,%rax,8) | ||
120 | 122 | ||
121 | addq $8192, %rbx | 123 | addq $8192, %rbx |
122 | movq %rdi, %rax | 124 | movq %rdi, %rax |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 245a71db401a..cb339097b9ea 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -22,23 +22,19 @@ | |||
22 | /* | 22 | /* |
23 | * Were we in an interrupt that interrupted kernel mode? | 23 | * Were we in an interrupt that interrupted kernel mode? |
24 | * | 24 | * |
25 | * For now, with eagerfpu we will return interrupted kernel FPU | ||
26 | * state as not-idle. TBD: Ideally we can change the return value | ||
27 | * to something like __thread_has_fpu(current). But we need to | ||
28 | * be careful of doing __thread_clear_has_fpu() before saving | ||
29 | * the FPU etc for supporting nested uses etc. For now, take | ||
30 | * the simple route! | ||
31 | * | ||
32 | * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that | 25 | * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that |
33 | * pair does nothing at all: the thread must not have fpu (so | 26 | * pair does nothing at all: the thread must not have fpu (so |
34 | * that we don't try to save the FPU state), and TS must | 27 | * that we don't try to save the FPU state), and TS must |
35 | * be set (so that the clts/stts pair does nothing that is | 28 | * be set (so that the clts/stts pair does nothing that is |
36 | * visible in the interrupted kernel thread). | 29 | * visible in the interrupted kernel thread). |
30 | * | ||
31 | * Except for the eagerfpu case when we return 1 unless we've already | ||
32 | * been eager and saved the state in kernel_fpu_begin(). | ||
37 | */ | 33 | */ |
38 | static inline bool interrupted_kernel_fpu_idle(void) | 34 | static inline bool interrupted_kernel_fpu_idle(void) |
39 | { | 35 | { |
40 | if (use_eager_fpu()) | 36 | if (use_eager_fpu()) |
41 | return 0; | 37 | return __thread_has_fpu(current); |
42 | 38 | ||
43 | return !__thread_has_fpu(current) && | 39 | return !__thread_has_fpu(current) && |
44 | (read_cr0() & X86_CR0_TS); | 40 | (read_cr0() & X86_CR0_TS); |
@@ -78,8 +74,8 @@ void __kernel_fpu_begin(void) | |||
78 | struct task_struct *me = current; | 74 | struct task_struct *me = current; |
79 | 75 | ||
80 | if (__thread_has_fpu(me)) { | 76 | if (__thread_has_fpu(me)) { |
81 | __save_init_fpu(me); | ||
82 | __thread_clear_has_fpu(me); | 77 | __thread_clear_has_fpu(me); |
78 | __save_init_fpu(me); | ||
83 | /* We do 'stts()' in __kernel_fpu_end() */ | 79 | /* We do 'stts()' in __kernel_fpu_end() */ |
84 | } else if (!use_eager_fpu()) { | 80 | } else if (!use_eager_fpu()) { |
85 | this_cpu_write(fpu_owner_task, NULL); | 81 | this_cpu_write(fpu_owner_task, NULL); |
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 9895a9a41380..211bce445522 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c | |||
@@ -365,10 +365,14 @@ int __kprobes __copy_instruction(u8 *dest, u8 *src) | |||
365 | return insn.length; | 365 | return insn.length; |
366 | } | 366 | } |
367 | 367 | ||
368 | static void __kprobes arch_copy_kprobe(struct kprobe *p) | 368 | static int __kprobes arch_copy_kprobe(struct kprobe *p) |
369 | { | 369 | { |
370 | int ret; | ||
371 | |||
370 | /* Copy an instruction with recovering if other optprobe modifies it.*/ | 372 | /* Copy an instruction with recovering if other optprobe modifies it.*/ |
371 | __copy_instruction(p->ainsn.insn, p->addr); | 373 | ret = __copy_instruction(p->ainsn.insn, p->addr); |
374 | if (!ret) | ||
375 | return -EINVAL; | ||
372 | 376 | ||
373 | /* | 377 | /* |
374 | * __copy_instruction can modify the displacement of the instruction, | 378 | * __copy_instruction can modify the displacement of the instruction, |
@@ -384,6 +388,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) | |||
384 | 388 | ||
385 | /* Also, displacement change doesn't affect the first byte */ | 389 | /* Also, displacement change doesn't affect the first byte */ |
386 | p->opcode = p->ainsn.insn[0]; | 390 | p->opcode = p->ainsn.insn[0]; |
391 | |||
392 | return 0; | ||
387 | } | 393 | } |
388 | 394 | ||
389 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | 395 | int __kprobes arch_prepare_kprobe(struct kprobe *p) |
@@ -397,8 +403,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) | |||
397 | p->ainsn.insn = get_insn_slot(); | 403 | p->ainsn.insn = get_insn_slot(); |
398 | if (!p->ainsn.insn) | 404 | if (!p->ainsn.insn) |
399 | return -ENOMEM; | 405 | return -ENOMEM; |
400 | arch_copy_kprobe(p); | 406 | |
401 | return 0; | 407 | return arch_copy_kprobe(p); |
402 | } | 408 | } |
403 | 409 | ||
404 | void __kprobes arch_arm_kprobe(struct kprobe *p) | 410 | void __kprobes arch_arm_kprobe(struct kprobe *p) |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index d2c381280e3c..3dd37ebd591b 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -242,6 +242,7 @@ void __init kvmclock_init(void) | |||
242 | if (!mem) | 242 | if (!mem) |
243 | return; | 243 | return; |
244 | hv_clock = __va(mem); | 244 | hv_clock = __va(mem); |
245 | memset(hv_clock, 0, size); | ||
245 | 246 | ||
246 | if (kvm_register_clock("boot clock")) { | 247 | if (kvm_register_clock("boot clock")) { |
247 | hv_clock = NULL; | 248 | hv_clock = NULL; |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 60308053fdb2..0920212e6159 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/kprobes.h> | 14 | #include <linux/kprobes.h> |
15 | #include <linux/kdebug.h> | 15 | #include <linux/kdebug.h> |
16 | #include <linux/nmi.h> | 16 | #include <linux/nmi.h> |
17 | #include <linux/debugfs.h> | ||
17 | #include <linux/delay.h> | 18 | #include <linux/delay.h> |
18 | #include <linux/hardirq.h> | 19 | #include <linux/hardirq.h> |
19 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
@@ -29,6 +30,9 @@ | |||
29 | #include <asm/nmi.h> | 30 | #include <asm/nmi.h> |
30 | #include <asm/x86_init.h> | 31 | #include <asm/x86_init.h> |
31 | 32 | ||
33 | #define CREATE_TRACE_POINTS | ||
34 | #include <trace/events/nmi.h> | ||
35 | |||
32 | struct nmi_desc { | 36 | struct nmi_desc { |
33 | spinlock_t lock; | 37 | spinlock_t lock; |
34 | struct list_head head; | 38 | struct list_head head; |
@@ -82,6 +86,15 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | |||
82 | 86 | ||
83 | #define nmi_to_desc(type) (&nmi_desc[type]) | 87 | #define nmi_to_desc(type) (&nmi_desc[type]) |
84 | 88 | ||
89 | static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC; | ||
90 | static int __init nmi_warning_debugfs(void) | ||
91 | { | ||
92 | debugfs_create_u64("nmi_longest_ns", 0644, | ||
93 | arch_debugfs_dir, &nmi_longest_ns); | ||
94 | return 0; | ||
95 | } | ||
96 | fs_initcall(nmi_warning_debugfs); | ||
97 | |||
85 | static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) | 98 | static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) |
86 | { | 99 | { |
87 | struct nmi_desc *desc = nmi_to_desc(type); | 100 | struct nmi_desc *desc = nmi_to_desc(type); |
@@ -96,8 +109,27 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 | |||
96 | * can be latched at any given time. Walk the whole list | 109 | * can be latched at any given time. Walk the whole list |
97 | * to handle those situations. | 110 | * to handle those situations. |
98 | */ | 111 | */ |
99 | list_for_each_entry_rcu(a, &desc->head, list) | 112 | list_for_each_entry_rcu(a, &desc->head, list) { |
100 | handled += a->handler(type, regs); | 113 | u64 before, delta, whole_msecs; |
114 | int decimal_msecs, thishandled; | ||
115 | |||
116 | before = local_clock(); | ||
117 | thishandled = a->handler(type, regs); | ||
118 | handled += thishandled; | ||
119 | delta = local_clock() - before; | ||
120 | trace_nmi_handler(a->handler, (int)delta, thishandled); | ||
121 | |||
122 | if (delta < nmi_longest_ns) | ||
123 | continue; | ||
124 | |||
125 | nmi_longest_ns = delta; | ||
126 | whole_msecs = do_div(delta, (1000 * 1000)); | ||
127 | decimal_msecs = do_div(delta, 1000) % 1000; | ||
128 | printk_ratelimited(KERN_INFO | ||
129 | "INFO: NMI handler (%ps) took too long to run: " | ||
130 | "%lld.%03d msecs\n", a->handler, whole_msecs, | ||
131 | decimal_msecs); | ||
132 | } | ||
101 | 133 | ||
102 | rcu_read_unlock(); | 134 | rcu_read_unlock(); |
103 | 135 | ||
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 4e7a37ff03ab..81a5f5e8f142 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -277,18 +277,6 @@ void exit_idle(void) | |||
277 | } | 277 | } |
278 | #endif | 278 | #endif |
279 | 279 | ||
280 | void arch_cpu_idle_prepare(void) | ||
281 | { | ||
282 | /* | ||
283 | * If we're the non-boot CPU, nothing set the stack canary up | ||
284 | * for us. CPU0 already has it initialized but no harm in | ||
285 | * doing it again. This is a good place for updating it, as | ||
286 | * we wont ever return from this function (so the invalid | ||
287 | * canaries already on the stack wont ever trigger). | ||
288 | */ | ||
289 | boot_init_stack_canary(); | ||
290 | } | ||
291 | |||
292 | void arch_cpu_idle_enter(void) | 280 | void arch_cpu_idle_enter(void) |
293 | { | 281 | { |
294 | local_touch_nmi(); | 282 | local_touch_nmi(); |
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 7a6f3b3be3cf..f2bb9c96720a 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S | |||
@@ -160,7 +160,7 @@ identity_mapped: | |||
160 | xorq %rbp, %rbp | 160 | xorq %rbp, %rbp |
161 | xorq %r8, %r8 | 161 | xorq %r8, %r8 |
162 | xorq %r9, %r9 | 162 | xorq %r9, %r9 |
163 | xorq %r10, %r9 | 163 | xorq %r10, %r10 |
164 | xorq %r11, %r11 | 164 | xorq %r11, %r11 |
165 | xorq %r12, %r12 | 165 | xorq %r12, %r12 |
166 | xorq %r13, %r13 | 166 | xorq %r13, %r13 |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 69562992e457..cf913587d4dd 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -43,12 +43,6 @@ | |||
43 | 43 | ||
44 | #include <asm/sigframe.h> | 44 | #include <asm/sigframe.h> |
45 | 45 | ||
46 | #ifdef CONFIG_X86_32 | ||
47 | # define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF) | ||
48 | #else | ||
49 | # define FIX_EFLAGS __FIX_EFLAGS | ||
50 | #endif | ||
51 | |||
52 | #define COPY(x) do { \ | 46 | #define COPY(x) do { \ |
53 | get_user_ex(regs->x, &sc->x); \ | 47 | get_user_ex(regs->x, &sc->x); \ |
54 | } while (0) | 48 | } while (0) |
@@ -668,15 +662,17 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) | |||
668 | if (!failed) { | 662 | if (!failed) { |
669 | /* | 663 | /* |
670 | * Clear the direction flag as per the ABI for function entry. | 664 | * Clear the direction flag as per the ABI for function entry. |
671 | */ | 665 | * |
672 | regs->flags &= ~X86_EFLAGS_DF; | 666 | * Clear RF when entering the signal handler, because |
673 | /* | 667 | * it might disable possible debug exception from the |
668 | * signal handler. | ||
669 | * | ||
674 | * Clear TF when entering the signal handler, but | 670 | * Clear TF when entering the signal handler, but |
675 | * notify any tracer that was single-stepping it. | 671 | * notify any tracer that was single-stepping it. |
676 | * The tracer may want to single-step inside the | 672 | * The tracer may want to single-step inside the |
677 | * handler too. | 673 | * handler too. |
678 | */ | 674 | */ |
679 | regs->flags &= ~X86_EFLAGS_TF; | 675 | regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF); |
680 | } | 676 | } |
681 | signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP)); | 677 | signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP)); |
682 | } | 678 | } |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9c73b51817e4..bfd348e99369 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -372,15 +372,15 @@ static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) | |||
372 | 372 | ||
373 | void __cpuinit set_cpu_sibling_map(int cpu) | 373 | void __cpuinit set_cpu_sibling_map(int cpu) |
374 | { | 374 | { |
375 | bool has_mc = boot_cpu_data.x86_max_cores > 1; | ||
376 | bool has_smt = smp_num_siblings > 1; | 375 | bool has_smt = smp_num_siblings > 1; |
376 | bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; | ||
377 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 377 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
378 | struct cpuinfo_x86 *o; | 378 | struct cpuinfo_x86 *o; |
379 | int i; | 379 | int i; |
380 | 380 | ||
381 | cpumask_set_cpu(cpu, cpu_sibling_setup_mask); | 381 | cpumask_set_cpu(cpu, cpu_sibling_setup_mask); |
382 | 382 | ||
383 | if (!has_smt && !has_mc) { | 383 | if (!has_mp) { |
384 | cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); | 384 | cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); |
385 | cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); | 385 | cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); |
386 | cpumask_set_cpu(cpu, cpu_core_mask(cpu)); | 386 | cpumask_set_cpu(cpu, cpu_core_mask(cpu)); |
@@ -394,7 +394,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
394 | if ((i == cpu) || (has_smt && match_smt(c, o))) | 394 | if ((i == cpu) || (has_smt && match_smt(c, o))) |
395 | link_mask(sibling, cpu, i); | 395 | link_mask(sibling, cpu, i); |
396 | 396 | ||
397 | if ((i == cpu) || (has_mc && match_llc(c, o))) | 397 | if ((i == cpu) || (has_mp && match_llc(c, o))) |
398 | link_mask(llc_shared, cpu, i); | 398 | link_mask(llc_shared, cpu, i); |
399 | 399 | ||
400 | } | 400 | } |
@@ -406,7 +406,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
406 | for_each_cpu(i, cpu_sibling_setup_mask) { | 406 | for_each_cpu(i, cpu_sibling_setup_mask) { |
407 | o = &cpu_data(i); | 407 | o = &cpu_data(i); |
408 | 408 | ||
409 | if ((i == cpu) || (has_mc && match_mc(c, o))) { | 409 | if ((i == cpu) || (has_mp && match_mc(c, o))) { |
410 | link_mask(core, cpu, i); | 410 | link_mask(core, cpu, i); |
411 | 411 | ||
412 | /* | 412 | /* |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8db0010ed150..5953dcea752d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -1240,9 +1240,12 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
1240 | ctxt->modrm_seg = VCPU_SREG_DS; | 1240 | ctxt->modrm_seg = VCPU_SREG_DS; |
1241 | 1241 | ||
1242 | if (ctxt->modrm_mod == 3) { | 1242 | if (ctxt->modrm_mod == 3) { |
1243 | int highbyte_regs = ctxt->rex_prefix == 0; | ||
1244 | |||
1243 | op->type = OP_REG; | 1245 | op->type = OP_REG; |
1244 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | 1246 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; |
1245 | op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, ctxt->d & ByteOp); | 1247 | op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, |
1248 | highbyte_regs && (ctxt->d & ByteOp)); | ||
1246 | if (ctxt->d & Sse) { | 1249 | if (ctxt->d & Sse) { |
1247 | op->type = OP_XMM; | 1250 | op->type = OP_XMM; |
1248 | op->bytes = 16; | 1251 | op->bytes = 16; |
@@ -3997,7 +4000,8 @@ static const struct opcode twobyte_table[256] = { | |||
3997 | DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, | 4000 | DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, |
3998 | N, D(ImplicitOps | ModRM), N, N, | 4001 | N, D(ImplicitOps | ModRM), N, N, |
3999 | /* 0x10 - 0x1F */ | 4002 | /* 0x10 - 0x1F */ |
4000 | N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, | 4003 | N, N, N, N, N, N, N, N, |
4004 | D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM), | ||
4001 | /* 0x20 - 0x2F */ | 4005 | /* 0x20 - 0x2F */ |
4002 | DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), | 4006 | DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), |
4003 | DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), | 4007 | DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), |
@@ -4836,6 +4840,7 @@ twobyte_insn: | |||
4836 | case 0x08: /* invd */ | 4840 | case 0x08: /* invd */ |
4837 | case 0x0d: /* GrpP (prefetch) */ | 4841 | case 0x0d: /* GrpP (prefetch) */ |
4838 | case 0x18: /* Grp16 (prefetch/nop) */ | 4842 | case 0x18: /* Grp16 (prefetch/nop) */ |
4843 | case 0x1f: /* nop */ | ||
4839 | break; | 4844 | break; |
4840 | case 0x20: /* mov cr, reg */ | 4845 | case 0x20: /* mov cr, reg */ |
4841 | ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg); | 4846 | ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg); |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e1adbb4aca75..0eee2c8b64d1 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -1861,11 +1861,14 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) | |||
1861 | { | 1861 | { |
1862 | struct kvm_lapic *apic = vcpu->arch.apic; | 1862 | struct kvm_lapic *apic = vcpu->arch.apic; |
1863 | unsigned int sipi_vector; | 1863 | unsigned int sipi_vector; |
1864 | unsigned long pe; | ||
1864 | 1865 | ||
1865 | if (!kvm_vcpu_has_lapic(vcpu)) | 1866 | if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events) |
1866 | return; | 1867 | return; |
1867 | 1868 | ||
1868 | if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) { | 1869 | pe = xchg(&apic->pending_events, 0); |
1870 | |||
1871 | if (test_bit(KVM_APIC_INIT, &pe)) { | ||
1869 | kvm_lapic_reset(vcpu); | 1872 | kvm_lapic_reset(vcpu); |
1870 | kvm_vcpu_reset(vcpu); | 1873 | kvm_vcpu_reset(vcpu); |
1871 | if (kvm_vcpu_is_bsp(apic->vcpu)) | 1874 | if (kvm_vcpu_is_bsp(apic->vcpu)) |
@@ -1873,7 +1876,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) | |||
1873 | else | 1876 | else |
1874 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | 1877 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; |
1875 | } | 1878 | } |
1876 | if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events) && | 1879 | if (test_bit(KVM_APIC_SIPI, &pe) && |
1877 | vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { | 1880 | vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { |
1878 | /* evaluate pending_events before reading the vector */ | 1881 | /* evaluate pending_events before reading the vector */ |
1879 | smp_rmb(); | 1882 | smp_rmb(); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6b941b4fe13c..292e6ca89f42 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -582,8 +582,6 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | |||
582 | if (index != XCR_XFEATURE_ENABLED_MASK) | 582 | if (index != XCR_XFEATURE_ENABLED_MASK) |
583 | return 1; | 583 | return 1; |
584 | xcr0 = xcr; | 584 | xcr0 = xcr; |
585 | if (kvm_x86_ops->get_cpl(vcpu) != 0) | ||
586 | return 1; | ||
587 | if (!(xcr0 & XSTATE_FP)) | 585 | if (!(xcr0 & XSTATE_FP)) |
588 | return 1; | 586 | return 1; |
589 | if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) | 587 | if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) |
@@ -597,7 +595,8 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | |||
597 | 595 | ||
598 | int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | 596 | int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) |
599 | { | 597 | { |
600 | if (__kvm_set_xcr(vcpu, index, xcr)) { | 598 | if (kvm_x86_ops->get_cpl(vcpu) != 0 || |
599 | __kvm_set_xcr(vcpu, index, xcr)) { | ||
601 | kvm_inject_gp(vcpu, 0); | 600 | kvm_inject_gp(vcpu, 0); |
602 | return 1; | 601 | return 1; |
603 | } | 602 | } |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index eaac1743def7..1f34e9219775 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -277,6 +277,9 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, | |||
277 | end_pfn = limit_pfn; | 277 | end_pfn = limit_pfn; |
278 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | 278 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); |
279 | 279 | ||
280 | if (!after_bootmem) | ||
281 | adjust_range_page_size_mask(mr, nr_range); | ||
282 | |||
280 | /* try to merge same page size and continuous */ | 283 | /* try to merge same page size and continuous */ |
281 | for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { | 284 | for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { |
282 | unsigned long old_start; | 285 | unsigned long old_start; |
@@ -291,9 +294,6 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, | |||
291 | nr_range--; | 294 | nr_range--; |
292 | } | 295 | } |
293 | 296 | ||
294 | if (!after_bootmem) | ||
295 | adjust_range_page_size_mask(mr, nr_range); | ||
296 | |||
297 | for (i = 0; i < nr_range; i++) | 297 | for (i = 0; i < nr_range; i++) |
298 | printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", | 298 | printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", |
299 | mr[i].start, mr[i].end - 1, | 299 | mr[i].start, mr[i].end - 1, |
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 305c68b8d538..981c2dbd72cc 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -628,7 +628,9 @@ int pcibios_add_device(struct pci_dev *dev) | |||
628 | 628 | ||
629 | pa_data = boot_params.hdr.setup_data; | 629 | pa_data = boot_params.hdr.setup_data; |
630 | while (pa_data) { | 630 | while (pa_data) { |
631 | data = phys_to_virt(pa_data); | 631 | data = ioremap(pa_data, sizeof(*rom)); |
632 | if (!data) | ||
633 | return -ENOMEM; | ||
632 | 634 | ||
633 | if (data->type == SETUP_PCI) { | 635 | if (data->type == SETUP_PCI) { |
634 | rom = (struct pci_setup_rom *)data; | 636 | rom = (struct pci_setup_rom *)data; |
@@ -645,6 +647,7 @@ int pcibios_add_device(struct pci_dev *dev) | |||
645 | } | 647 | } |
646 | } | 648 | } |
647 | pa_data = data->next; | 649 | pa_data = data->next; |
650 | iounmap(data); | ||
648 | } | 651 | } |
649 | return 0; | 652 | return 0; |
650 | } | 653 | } |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 55856b2310d3..d2fbcedcf6ea 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -42,7 +42,6 @@ | |||
42 | #include <linux/io.h> | 42 | #include <linux/io.h> |
43 | #include <linux/reboot.h> | 43 | #include <linux/reboot.h> |
44 | #include <linux/bcd.h> | 44 | #include <linux/bcd.h> |
45 | #include <linux/ucs2_string.h> | ||
46 | 45 | ||
47 | #include <asm/setup.h> | 46 | #include <asm/setup.h> |
48 | #include <asm/efi.h> | 47 | #include <asm/efi.h> |
@@ -54,12 +53,12 @@ | |||
54 | 53 | ||
55 | #define EFI_DEBUG 1 | 54 | #define EFI_DEBUG 1 |
56 | 55 | ||
57 | /* | 56 | #define EFI_MIN_RESERVE 5120 |
58 | * There's some additional metadata associated with each | 57 | |
59 | * variable. Intel's reference implementation is 60 bytes - bump that | 58 | #define EFI_DUMMY_GUID \ |
60 | * to account for potential alignment constraints | 59 | EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9) |
61 | */ | 60 | |
62 | #define VAR_METADATA_SIZE 64 | 61 | static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 }; |
63 | 62 | ||
64 | struct efi __read_mostly efi = { | 63 | struct efi __read_mostly efi = { |
65 | .mps = EFI_INVALID_TABLE_ADDR, | 64 | .mps = EFI_INVALID_TABLE_ADDR, |
@@ -79,13 +78,6 @@ struct efi_memory_map memmap; | |||
79 | static struct efi efi_phys __initdata; | 78 | static struct efi efi_phys __initdata; |
80 | static efi_system_table_t efi_systab __initdata; | 79 | static efi_system_table_t efi_systab __initdata; |
81 | 80 | ||
82 | static u64 efi_var_store_size; | ||
83 | static u64 efi_var_remaining_size; | ||
84 | static u64 efi_var_max_var_size; | ||
85 | static u64 boot_used_size; | ||
86 | static u64 boot_var_size; | ||
87 | static u64 active_size; | ||
88 | |||
89 | unsigned long x86_efi_facility; | 81 | unsigned long x86_efi_facility; |
90 | 82 | ||
91 | /* | 83 | /* |
@@ -188,53 +180,8 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, | |||
188 | efi_char16_t *name, | 180 | efi_char16_t *name, |
189 | efi_guid_t *vendor) | 181 | efi_guid_t *vendor) |
190 | { | 182 | { |
191 | efi_status_t status; | 183 | return efi_call_virt3(get_next_variable, |
192 | static bool finished = false; | 184 | name_size, name, vendor); |
193 | static u64 var_size; | ||
194 | |||
195 | status = efi_call_virt3(get_next_variable, | ||
196 | name_size, name, vendor); | ||
197 | |||
198 | if (status == EFI_NOT_FOUND) { | ||
199 | finished = true; | ||
200 | if (var_size < boot_used_size) { | ||
201 | boot_var_size = boot_used_size - var_size; | ||
202 | active_size += boot_var_size; | ||
203 | } else { | ||
204 | printk(KERN_WARNING FW_BUG "efi: Inconsistent initial sizes\n"); | ||
205 | } | ||
206 | } | ||
207 | |||
208 | if (boot_used_size && !finished) { | ||
209 | unsigned long size; | ||
210 | u32 attr; | ||
211 | efi_status_t s; | ||
212 | void *tmp; | ||
213 | |||
214 | s = virt_efi_get_variable(name, vendor, &attr, &size, NULL); | ||
215 | |||
216 | if (s != EFI_BUFFER_TOO_SMALL || !size) | ||
217 | return status; | ||
218 | |||
219 | tmp = kmalloc(size, GFP_ATOMIC); | ||
220 | |||
221 | if (!tmp) | ||
222 | return status; | ||
223 | |||
224 | s = virt_efi_get_variable(name, vendor, &attr, &size, tmp); | ||
225 | |||
226 | if (s == EFI_SUCCESS && (attr & EFI_VARIABLE_NON_VOLATILE)) { | ||
227 | var_size += size; | ||
228 | var_size += ucs2_strsize(name, 1024); | ||
229 | active_size += size; | ||
230 | active_size += VAR_METADATA_SIZE; | ||
231 | active_size += ucs2_strsize(name, 1024); | ||
232 | } | ||
233 | |||
234 | kfree(tmp); | ||
235 | } | ||
236 | |||
237 | return status; | ||
238 | } | 185 | } |
239 | 186 | ||
240 | static efi_status_t virt_efi_set_variable(efi_char16_t *name, | 187 | static efi_status_t virt_efi_set_variable(efi_char16_t *name, |
@@ -243,34 +190,9 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name, | |||
243 | unsigned long data_size, | 190 | unsigned long data_size, |
244 | void *data) | 191 | void *data) |
245 | { | 192 | { |
246 | efi_status_t status; | 193 | return efi_call_virt5(set_variable, |
247 | u32 orig_attr = 0; | 194 | name, vendor, attr, |
248 | unsigned long orig_size = 0; | 195 | data_size, data); |
249 | |||
250 | status = virt_efi_get_variable(name, vendor, &orig_attr, &orig_size, | ||
251 | NULL); | ||
252 | |||
253 | if (status != EFI_BUFFER_TOO_SMALL) | ||
254 | orig_size = 0; | ||
255 | |||
256 | status = efi_call_virt5(set_variable, | ||
257 | name, vendor, attr, | ||
258 | data_size, data); | ||
259 | |||
260 | if (status == EFI_SUCCESS) { | ||
261 | if (orig_size) { | ||
262 | active_size -= orig_size; | ||
263 | active_size -= ucs2_strsize(name, 1024); | ||
264 | active_size -= VAR_METADATA_SIZE; | ||
265 | } | ||
266 | if (data_size) { | ||
267 | active_size += data_size; | ||
268 | active_size += ucs2_strsize(name, 1024); | ||
269 | active_size += VAR_METADATA_SIZE; | ||
270 | } | ||
271 | } | ||
272 | |||
273 | return status; | ||
274 | } | 196 | } |
275 | 197 | ||
276 | static efi_status_t virt_efi_query_variable_info(u32 attr, | 198 | static efi_status_t virt_efi_query_variable_info(u32 attr, |
@@ -786,9 +708,6 @@ void __init efi_init(void) | |||
786 | char vendor[100] = "unknown"; | 708 | char vendor[100] = "unknown"; |
787 | int i = 0; | 709 | int i = 0; |
788 | void *tmp; | 710 | void *tmp; |
789 | struct setup_data *data; | ||
790 | struct efi_var_bootdata *efi_var_data; | ||
791 | u64 pa_data; | ||
792 | 711 | ||
793 | #ifdef CONFIG_X86_32 | 712 | #ifdef CONFIG_X86_32 |
794 | if (boot_params.efi_info.efi_systab_hi || | 713 | if (boot_params.efi_info.efi_systab_hi || |
@@ -806,22 +725,6 @@ void __init efi_init(void) | |||
806 | if (efi_systab_init(efi_phys.systab)) | 725 | if (efi_systab_init(efi_phys.systab)) |
807 | return; | 726 | return; |
808 | 727 | ||
809 | pa_data = boot_params.hdr.setup_data; | ||
810 | while (pa_data) { | ||
811 | data = early_ioremap(pa_data, sizeof(*efi_var_data)); | ||
812 | if (data->type == SETUP_EFI_VARS) { | ||
813 | efi_var_data = (struct efi_var_bootdata *)data; | ||
814 | |||
815 | efi_var_store_size = efi_var_data->store_size; | ||
816 | efi_var_remaining_size = efi_var_data->remaining_size; | ||
817 | efi_var_max_var_size = efi_var_data->max_var_size; | ||
818 | } | ||
819 | pa_data = data->next; | ||
820 | early_iounmap(data, sizeof(*efi_var_data)); | ||
821 | } | ||
822 | |||
823 | boot_used_size = efi_var_store_size - efi_var_remaining_size; | ||
824 | |||
825 | set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); | 728 | set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); |
826 | 729 | ||
827 | /* | 730 | /* |
@@ -1085,6 +988,13 @@ void __init efi_enter_virtual_mode(void) | |||
1085 | runtime_code_page_mkexec(); | 988 | runtime_code_page_mkexec(); |
1086 | 989 | ||
1087 | kfree(new_memmap); | 990 | kfree(new_memmap); |
991 | |||
992 | /* clean DUMMY object */ | ||
993 | efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, | ||
994 | EFI_VARIABLE_NON_VOLATILE | | ||
995 | EFI_VARIABLE_BOOTSERVICE_ACCESS | | ||
996 | EFI_VARIABLE_RUNTIME_ACCESS, | ||
997 | 0, NULL); | ||
1088 | } | 998 | } |
1089 | 999 | ||
1090 | /* | 1000 | /* |
@@ -1136,33 +1046,70 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) | |||
1136 | efi_status_t status; | 1046 | efi_status_t status; |
1137 | u64 storage_size, remaining_size, max_size; | 1047 | u64 storage_size, remaining_size, max_size; |
1138 | 1048 | ||
1049 | if (!(attributes & EFI_VARIABLE_NON_VOLATILE)) | ||
1050 | return 0; | ||
1051 | |||
1139 | status = efi.query_variable_info(attributes, &storage_size, | 1052 | status = efi.query_variable_info(attributes, &storage_size, |
1140 | &remaining_size, &max_size); | 1053 | &remaining_size, &max_size); |
1141 | if (status != EFI_SUCCESS) | 1054 | if (status != EFI_SUCCESS) |
1142 | return status; | 1055 | return status; |
1143 | 1056 | ||
1144 | if (!max_size && remaining_size > size) | ||
1145 | printk_once(KERN_ERR FW_BUG "Broken EFI implementation" | ||
1146 | " is returning MaxVariableSize=0\n"); | ||
1147 | /* | 1057 | /* |
1148 | * Some firmware implementations refuse to boot if there's insufficient | 1058 | * Some firmware implementations refuse to boot if there's insufficient |
1149 | * space in the variable store. We account for that by refusing the | 1059 | * space in the variable store. We account for that by refusing the |
1150 | * write if permitting it would reduce the available space to under | 1060 | * write if permitting it would reduce the available space to under |
1151 | * 50%. However, some firmware won't reclaim variable space until | 1061 | * 5KB. This figure was provided by Samsung, so should be safe. |
1152 | * after the used (not merely the actively used) space drops below | ||
1153 | * a threshold. We can approximate that case with the value calculated | ||
1154 | * above. If both the firmware and our calculations indicate that the | ||
1155 | * available space would drop below 50%, refuse the write. | ||
1156 | */ | 1062 | */ |
1063 | if ((remaining_size - size < EFI_MIN_RESERVE) && | ||
1064 | !efi_no_storage_paranoia) { | ||
1065 | |||
1066 | /* | ||
1067 | * Triggering garbage collection may require that the firmware | ||
1068 | * generate a real EFI_OUT_OF_RESOURCES error. We can force | ||
1069 | * that by attempting to use more space than is available. | ||
1070 | */ | ||
1071 | unsigned long dummy_size = remaining_size + 1024; | ||
1072 | void *dummy = kzalloc(dummy_size, GFP_ATOMIC); | ||
1073 | |||
1074 | if (!dummy) | ||
1075 | return EFI_OUT_OF_RESOURCES; | ||
1076 | |||
1077 | status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, | ||
1078 | EFI_VARIABLE_NON_VOLATILE | | ||
1079 | EFI_VARIABLE_BOOTSERVICE_ACCESS | | ||
1080 | EFI_VARIABLE_RUNTIME_ACCESS, | ||
1081 | dummy_size, dummy); | ||
1082 | |||
1083 | if (status == EFI_SUCCESS) { | ||
1084 | /* | ||
1085 | * This should have failed, so if it didn't make sure | ||
1086 | * that we delete it... | ||
1087 | */ | ||
1088 | efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, | ||
1089 | EFI_VARIABLE_NON_VOLATILE | | ||
1090 | EFI_VARIABLE_BOOTSERVICE_ACCESS | | ||
1091 | EFI_VARIABLE_RUNTIME_ACCESS, | ||
1092 | 0, dummy); | ||
1093 | } | ||
1094 | |||
1095 | kfree(dummy); | ||
1157 | 1096 | ||
1158 | if (!storage_size || size > remaining_size || | 1097 | /* |
1159 | (max_size && size > max_size)) | 1098 | * The runtime code may now have triggered a garbage collection |
1160 | return EFI_OUT_OF_RESOURCES; | 1099 | * run, so check the variable info again |
1100 | */ | ||
1101 | status = efi.query_variable_info(attributes, &storage_size, | ||
1102 | &remaining_size, &max_size); | ||
1161 | 1103 | ||
1162 | if (!efi_no_storage_paranoia && | 1104 | if (status != EFI_SUCCESS) |
1163 | ((active_size + size + VAR_METADATA_SIZE > storage_size / 2) && | 1105 | return status; |
1164 | (remaining_size - size < storage_size / 2))) | 1106 | |
1165 | return EFI_OUT_OF_RESOURCES; | 1107 | /* |
1108 | * There still isn't enough room, so return an error | ||
1109 | */ | ||
1110 | if (remaining_size - size < EFI_MIN_RESERVE) | ||
1111 | return EFI_OUT_OF_RESOURCES; | ||
1112 | } | ||
1166 | 1113 | ||
1167 | return EFI_SUCCESS; | 1114 | return EFI_SUCCESS; |
1168 | } | 1115 | } |
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 590be1090892..f7bab68a4b83 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c | |||
@@ -42,9 +42,6 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { | |||
42 | "^(xen_irq_disable_direct_reloc$|" | 42 | "^(xen_irq_disable_direct_reloc$|" |
43 | "xen_save_fl_direct_reloc$|" | 43 | "xen_save_fl_direct_reloc$|" |
44 | "VDSO|" | 44 | "VDSO|" |
45 | #if ELF_BITS == 64 | ||
46 | "__vvar_page|" | ||
47 | #endif | ||
48 | "__crc_)", | 45 | "__crc_)", |
49 | 46 | ||
50 | /* | 47 | /* |
@@ -72,6 +69,7 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { | |||
72 | "__per_cpu_load|" | 69 | "__per_cpu_load|" |
73 | "init_per_cpu__.*|" | 70 | "init_per_cpu__.*|" |
74 | "__end_rodata_hpage_align|" | 71 | "__end_rodata_hpage_align|" |
72 | "__vvar_page|" | ||
75 | #endif | 73 | #endif |
76 | "_end)$" | 74 | "_end)$" |
77 | }; | 75 | }; |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 8ff37995d54e..d99cae8147d1 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/smp.h> | 18 | #include <linux/smp.h> |
19 | #include <linux/irq_work.h> | 19 | #include <linux/irq_work.h> |
20 | #include <linux/tick.h> | ||
20 | 21 | ||
21 | #include <asm/paravirt.h> | 22 | #include <asm/paravirt.h> |
22 | #include <asm/desc.h> | 23 | #include <asm/desc.h> |
@@ -447,6 +448,13 @@ static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ | |||
447 | play_dead_common(); | 448 | play_dead_common(); |
448 | HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); | 449 | HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); |
449 | cpu_bringup(); | 450 | cpu_bringup(); |
451 | /* | ||
452 | * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down) | ||
453 | * clears certain data that the cpu_idle loop (which called us | ||
454 | * and that we return from) expects. The only way to get that | ||
455 | * data back is to call: | ||
456 | */ | ||
457 | tick_nohz_idle_enter(); | ||
450 | } | 458 | } |
451 | 459 | ||
452 | #else /* !CONFIG_HOTPLUG_CPU */ | 460 | #else /* !CONFIG_HOTPLUG_CPU */ |
@@ -576,24 +584,22 @@ void xen_send_IPI_mask_allbutself(const struct cpumask *mask, | |||
576 | { | 584 | { |
577 | unsigned cpu; | 585 | unsigned cpu; |
578 | unsigned int this_cpu = smp_processor_id(); | 586 | unsigned int this_cpu = smp_processor_id(); |
587 | int xen_vector = xen_map_vector(vector); | ||
579 | 588 | ||
580 | if (!(num_online_cpus() > 1)) | 589 | if (!(num_online_cpus() > 1) || (xen_vector < 0)) |
581 | return; | 590 | return; |
582 | 591 | ||
583 | for_each_cpu_and(cpu, mask, cpu_online_mask) { | 592 | for_each_cpu_and(cpu, mask, cpu_online_mask) { |
584 | if (this_cpu == cpu) | 593 | if (this_cpu == cpu) |
585 | continue; | 594 | continue; |
586 | 595 | ||
587 | xen_smp_send_call_function_single_ipi(cpu); | 596 | xen_send_IPI_one(cpu, xen_vector); |
588 | } | 597 | } |
589 | } | 598 | } |
590 | 599 | ||
591 | void xen_send_IPI_allbutself(int vector) | 600 | void xen_send_IPI_allbutself(int vector) |
592 | { | 601 | { |
593 | int xen_vector = xen_map_vector(vector); | 602 | xen_send_IPI_mask_allbutself(cpu_online_mask, vector); |
594 | |||
595 | if (xen_vector >= 0) | ||
596 | xen_send_IPI_mask_allbutself(cpu_online_mask, xen_vector); | ||
597 | } | 603 | } |
598 | 604 | ||
599 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) | 605 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) |
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h index 8981a76d081a..c7c2d89efd76 100644 --- a/arch/x86/xen/smp.h +++ b/arch/x86/xen/smp.h | |||
@@ -5,7 +5,6 @@ extern void xen_send_IPI_mask(const struct cpumask *mask, | |||
5 | extern void xen_send_IPI_mask_allbutself(const struct cpumask *mask, | 5 | extern void xen_send_IPI_mask_allbutself(const struct cpumask *mask, |
6 | int vector); | 6 | int vector); |
7 | extern void xen_send_IPI_allbutself(int vector); | 7 | extern void xen_send_IPI_allbutself(int vector); |
8 | extern void physflat_send_IPI_allbutself(int vector); | ||
9 | extern void xen_send_IPI_all(int vector); | 8 | extern void xen_send_IPI_all(int vector); |
10 | extern void xen_send_IPI_self(int vector); | 9 | extern void xen_send_IPI_self(int vector); |
11 | 10 | ||