diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/boot/header.S | 26 | ||||
-rw-r--r-- | arch/x86/boot/tools/build.c | 38 | ||||
-rw-r--r-- | arch/x86/include/asm/irqflags.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/apm_32.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel_cacheinfo.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 12 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 78 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 9 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 28 | ||||
-rw-r--r-- | arch/x86/kernel/espfix_64.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/core.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt_patch_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/tsc.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 12 | ||||
-rw-r--r-- | arch/x86/xen/grant-table.c | 148 |
21 files changed, 305 insertions, 128 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a8f749ef0fdc..d24887b645dc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -131,6 +131,7 @@ config X86 | |||
131 | select HAVE_CC_STACKPROTECTOR | 131 | select HAVE_CC_STACKPROTECTOR |
132 | select GENERIC_CPU_AUTOPROBE | 132 | select GENERIC_CPU_AUTOPROBE |
133 | select HAVE_ARCH_AUDITSYSCALL | 133 | select HAVE_ARCH_AUDITSYSCALL |
134 | select ARCH_SUPPORTS_ATOMIC_RMW | ||
134 | 135 | ||
135 | config INSTRUCTION_DECODER | 136 | config INSTRUCTION_DECODER |
136 | def_bool y | 137 | def_bool y |
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 84c223479e3c..7a6d43a554d7 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
@@ -91,10 +91,9 @@ bs_die: | |||
91 | 91 | ||
92 | .section ".bsdata", "a" | 92 | .section ".bsdata", "a" |
93 | bugger_off_msg: | 93 | bugger_off_msg: |
94 | .ascii "Direct floppy boot is not supported. " | 94 | .ascii "Use a boot loader.\r\n" |
95 | .ascii "Use a boot loader program instead.\r\n" | ||
96 | .ascii "\n" | 95 | .ascii "\n" |
97 | .ascii "Remove disk and press any key to reboot ...\r\n" | 96 | .ascii "Remove disk and press any key to reboot...\r\n" |
98 | .byte 0 | 97 | .byte 0 |
99 | 98 | ||
100 | #ifdef CONFIG_EFI_STUB | 99 | #ifdef CONFIG_EFI_STUB |
@@ -108,7 +107,7 @@ coff_header: | |||
108 | #else | 107 | #else |
109 | .word 0x8664 # x86-64 | 108 | .word 0x8664 # x86-64 |
110 | #endif | 109 | #endif |
111 | .word 3 # nr_sections | 110 | .word 4 # nr_sections |
112 | .long 0 # TimeDateStamp | 111 | .long 0 # TimeDateStamp |
113 | .long 0 # PointerToSymbolTable | 112 | .long 0 # PointerToSymbolTable |
114 | .long 1 # NumberOfSymbols | 113 | .long 1 # NumberOfSymbols |
@@ -250,6 +249,25 @@ section_table: | |||
250 | .word 0 # NumberOfLineNumbers | 249 | .word 0 # NumberOfLineNumbers |
251 | .long 0x60500020 # Characteristics (section flags) | 250 | .long 0x60500020 # Characteristics (section flags) |
252 | 251 | ||
252 | # | ||
253 | # The offset & size fields are filled in by build.c. | ||
254 | # | ||
255 | .ascii ".bss" | ||
256 | .byte 0 | ||
257 | .byte 0 | ||
258 | .byte 0 | ||
259 | .byte 0 | ||
260 | .long 0 | ||
261 | .long 0x0 | ||
262 | .long 0 # Size of initialized data | ||
263 | # on disk | ||
264 | .long 0x0 | ||
265 | .long 0 # PointerToRelocations | ||
266 | .long 0 # PointerToLineNumbers | ||
267 | .word 0 # NumberOfRelocations | ||
268 | .word 0 # NumberOfLineNumbers | ||
269 | .long 0xc8000080 # Characteristics (section flags) | ||
270 | |||
253 | #endif /* CONFIG_EFI_STUB */ | 271 | #endif /* CONFIG_EFI_STUB */ |
254 | 272 | ||
255 | # Kernel attributes; used by setup. This is part 1 of the | 273 | # Kernel attributes; used by setup. This is part 1 of the |
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 1a2f2121cada..a7661c430cd9 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c | |||
@@ -143,7 +143,7 @@ static void usage(void) | |||
143 | 143 | ||
144 | #ifdef CONFIG_EFI_STUB | 144 | #ifdef CONFIG_EFI_STUB |
145 | 145 | ||
146 | static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) | 146 | static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset) |
147 | { | 147 | { |
148 | unsigned int pe_header; | 148 | unsigned int pe_header; |
149 | unsigned short num_sections; | 149 | unsigned short num_sections; |
@@ -164,10 +164,10 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz | |||
164 | put_unaligned_le32(size, section + 0x8); | 164 | put_unaligned_le32(size, section + 0x8); |
165 | 165 | ||
166 | /* section header vma field */ | 166 | /* section header vma field */ |
167 | put_unaligned_le32(offset, section + 0xc); | 167 | put_unaligned_le32(vma, section + 0xc); |
168 | 168 | ||
169 | /* section header 'size of initialised data' field */ | 169 | /* section header 'size of initialised data' field */ |
170 | put_unaligned_le32(size, section + 0x10); | 170 | put_unaligned_le32(datasz, section + 0x10); |
171 | 171 | ||
172 | /* section header 'file offset' field */ | 172 | /* section header 'file offset' field */ |
173 | put_unaligned_le32(offset, section + 0x14); | 173 | put_unaligned_le32(offset, section + 0x14); |
@@ -179,6 +179,11 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz | |||
179 | } | 179 | } |
180 | } | 180 | } |
181 | 181 | ||
182 | static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) | ||
183 | { | ||
184 | update_pecoff_section_header_fields(section_name, offset, size, size, offset); | ||
185 | } | ||
186 | |||
182 | static void update_pecoff_setup_and_reloc(unsigned int size) | 187 | static void update_pecoff_setup_and_reloc(unsigned int size) |
183 | { | 188 | { |
184 | u32 setup_offset = 0x200; | 189 | u32 setup_offset = 0x200; |
@@ -203,9 +208,6 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) | |||
203 | 208 | ||
204 | pe_header = get_unaligned_le32(&buf[0x3c]); | 209 | pe_header = get_unaligned_le32(&buf[0x3c]); |
205 | 210 | ||
206 | /* Size of image */ | ||
207 | put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); | ||
208 | |||
209 | /* | 211 | /* |
210 | * Size of code: Subtract the size of the first sector (512 bytes) | 212 | * Size of code: Subtract the size of the first sector (512 bytes) |
211 | * which includes the header. | 213 | * which includes the header. |
@@ -220,6 +222,22 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) | |||
220 | update_pecoff_section_header(".text", text_start, text_sz); | 222 | update_pecoff_section_header(".text", text_start, text_sz); |
221 | } | 223 | } |
222 | 224 | ||
225 | static void update_pecoff_bss(unsigned int file_sz, unsigned int init_sz) | ||
226 | { | ||
227 | unsigned int pe_header; | ||
228 | unsigned int bss_sz = init_sz - file_sz; | ||
229 | |||
230 | pe_header = get_unaligned_le32(&buf[0x3c]); | ||
231 | |||
232 | /* Size of uninitialized data */ | ||
233 | put_unaligned_le32(bss_sz, &buf[pe_header + 0x24]); | ||
234 | |||
235 | /* Size of image */ | ||
236 | put_unaligned_le32(init_sz, &buf[pe_header + 0x50]); | ||
237 | |||
238 | update_pecoff_section_header_fields(".bss", file_sz, bss_sz, 0, 0); | ||
239 | } | ||
240 | |||
223 | static int reserve_pecoff_reloc_section(int c) | 241 | static int reserve_pecoff_reloc_section(int c) |
224 | { | 242 | { |
225 | /* Reserve 0x20 bytes for .reloc section */ | 243 | /* Reserve 0x20 bytes for .reloc section */ |
@@ -259,6 +277,8 @@ static void efi_stub_entry_update(void) | |||
259 | static inline void update_pecoff_setup_and_reloc(unsigned int size) {} | 277 | static inline void update_pecoff_setup_and_reloc(unsigned int size) {} |
260 | static inline void update_pecoff_text(unsigned int text_start, | 278 | static inline void update_pecoff_text(unsigned int text_start, |
261 | unsigned int file_sz) {} | 279 | unsigned int file_sz) {} |
280 | static inline void update_pecoff_bss(unsigned int file_sz, | ||
281 | unsigned int init_sz) {} | ||
262 | static inline void efi_stub_defaults(void) {} | 282 | static inline void efi_stub_defaults(void) {} |
263 | static inline void efi_stub_entry_update(void) {} | 283 | static inline void efi_stub_entry_update(void) {} |
264 | 284 | ||
@@ -310,7 +330,7 @@ static void parse_zoffset(char *fname) | |||
310 | 330 | ||
311 | int main(int argc, char ** argv) | 331 | int main(int argc, char ** argv) |
312 | { | 332 | { |
313 | unsigned int i, sz, setup_sectors; | 333 | unsigned int i, sz, setup_sectors, init_sz; |
314 | int c; | 334 | int c; |
315 | u32 sys_size; | 335 | u32 sys_size; |
316 | struct stat sb; | 336 | struct stat sb; |
@@ -376,7 +396,9 @@ int main(int argc, char ** argv) | |||
376 | buf[0x1f1] = setup_sectors-1; | 396 | buf[0x1f1] = setup_sectors-1; |
377 | put_unaligned_le32(sys_size, &buf[0x1f4]); | 397 | put_unaligned_le32(sys_size, &buf[0x1f4]); |
378 | 398 | ||
379 | update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz)); | 399 | update_pecoff_text(setup_sectors * 512, i + (sys_size * 16)); |
400 | init_sz = get_unaligned_le32(&buf[0x260]); | ||
401 | update_pecoff_bss(i + (sys_size * 16), init_sz); | ||
380 | 402 | ||
381 | efi_stub_entry_update(); | 403 | efi_stub_entry_update(); |
382 | 404 | ||
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index bba3cf88e624..0a8b519226b8 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h | |||
@@ -129,7 +129,7 @@ static inline notrace unsigned long arch_local_irq_save(void) | |||
129 | 129 | ||
130 | #define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */ | 130 | #define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */ |
131 | 131 | ||
132 | #define INTERRUPT_RETURN iretq | 132 | #define INTERRUPT_RETURN jmp native_iret |
133 | #define USERGS_SYSRET64 \ | 133 | #define USERGS_SYSRET64 \ |
134 | swapgs; \ | 134 | swapgs; \ |
135 | sysretq; | 135 | sysretq; |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index f3a1f04ed4cb..584874451414 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -841,7 +841,6 @@ static int apm_do_idle(void) | |||
841 | u32 eax; | 841 | u32 eax; |
842 | u8 ret = 0; | 842 | u8 ret = 0; |
843 | int idled = 0; | 843 | int idled = 0; |
844 | int polling; | ||
845 | int err = 0; | 844 | int err = 0; |
846 | 845 | ||
847 | if (!need_resched()) { | 846 | if (!need_resched()) { |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a80029035bf2..f9e4fdd3b877 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -370,6 +370,17 @@ static void init_intel(struct cpuinfo_x86 *c) | |||
370 | */ | 370 | */ |
371 | detect_extended_topology(c); | 371 | detect_extended_topology(c); |
372 | 372 | ||
373 | if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { | ||
374 | /* | ||
375 | * let's use the legacy cpuid vector 0x1 and 0x4 for topology | ||
376 | * detection. | ||
377 | */ | ||
378 | c->x86_max_cores = intel_num_cpu_cores(c); | ||
379 | #ifdef CONFIG_X86_32 | ||
380 | detect_ht(c); | ||
381 | #endif | ||
382 | } | ||
383 | |||
373 | l2 = init_intel_cacheinfo(c); | 384 | l2 = init_intel_cacheinfo(c); |
374 | if (c->cpuid_level > 9) { | 385 | if (c->cpuid_level > 9) { |
375 | unsigned eax = cpuid_eax(10); | 386 | unsigned eax = cpuid_eax(10); |
@@ -438,17 +449,6 @@ static void init_intel(struct cpuinfo_x86 *c) | |||
438 | set_cpu_cap(c, X86_FEATURE_P3); | 449 | set_cpu_cap(c, X86_FEATURE_P3); |
439 | #endif | 450 | #endif |
440 | 451 | ||
441 | if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { | ||
442 | /* | ||
443 | * let's use the legacy cpuid vector 0x1 and 0x4 for topology | ||
444 | * detection. | ||
445 | */ | ||
446 | c->x86_max_cores = intel_num_cpu_cores(c); | ||
447 | #ifdef CONFIG_X86_32 | ||
448 | detect_ht(c); | ||
449 | #endif | ||
450 | } | ||
451 | |||
452 | /* Work around errata */ | 452 | /* Work around errata */ |
453 | srat_detect_node(c); | 453 | srat_detect_node(c); |
454 | 454 | ||
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a952e9c85b6f..9c8f7394c612 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -730,6 +730,18 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
730 | #endif | 730 | #endif |
731 | } | 731 | } |
732 | 732 | ||
733 | #ifdef CONFIG_X86_HT | ||
734 | /* | ||
735 | * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in | ||
736 | * turns means that the only possibility is SMT (as indicated in | ||
737 | * cpuid1). Since cpuid2 doesn't specify shared caches, and we know | ||
738 | * that SMT shares all caches, we can unconditionally set cpu_llc_id to | ||
739 | * c->phys_proc_id. | ||
740 | */ | ||
741 | if (per_cpu(cpu_llc_id, cpu) == BAD_APICID) | ||
742 | per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; | ||
743 | #endif | ||
744 | |||
733 | c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); | 745 | c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); |
734 | 746 | ||
735 | return l2; | 747 | return l2; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index bb92f38153b2..9a79c8dbd8e8 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -2451,6 +2451,12 @@ static __init int mcheck_init_device(void) | |||
2451 | for_each_online_cpu(i) { | 2451 | for_each_online_cpu(i) { |
2452 | err = mce_device_create(i); | 2452 | err = mce_device_create(i); |
2453 | if (err) { | 2453 | if (err) { |
2454 | /* | ||
2455 | * Register notifier anyway (and do not unreg it) so | ||
2456 | * that we don't leave undeleted timers, see notifier | ||
2457 | * callback above. | ||
2458 | */ | ||
2459 | __register_hotcpu_notifier(&mce_cpu_notifier); | ||
2454 | cpu_notifier_register_done(); | 2460 | cpu_notifier_register_done(); |
2455 | goto err_device_create; | 2461 | goto err_device_create; |
2456 | } | 2462 | } |
@@ -2471,10 +2477,6 @@ static __init int mcheck_init_device(void) | |||
2471 | err_register: | 2477 | err_register: |
2472 | unregister_syscore_ops(&mce_syscore_ops); | 2478 | unregister_syscore_ops(&mce_syscore_ops); |
2473 | 2479 | ||
2474 | cpu_notifier_register_begin(); | ||
2475 | __unregister_hotcpu_notifier(&mce_cpu_notifier); | ||
2476 | cpu_notifier_register_done(); | ||
2477 | |||
2478 | err_device_create: | 2480 | err_device_create: |
2479 | /* | 2481 | /* |
2480 | * We didn't keep track of which devices were created above, but | 2482 | * We didn't keep track of which devices were created above, but |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2bdfbff8a4f6..2879ecdaac43 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) | |||
118 | continue; | 118 | continue; |
119 | if (event->attr.config1 & ~er->valid_mask) | 119 | if (event->attr.config1 & ~er->valid_mask) |
120 | return -EINVAL; | 120 | return -EINVAL; |
121 | /* Check if the extra msrs can be safely accessed*/ | ||
122 | if (!er->extra_msr_access) | ||
123 | return -ENXIO; | ||
121 | 124 | ||
122 | reg->idx = er->idx; | 125 | reg->idx = er->idx; |
123 | reg->config = event->attr.config1; | 126 | reg->config = event->attr.config1; |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 3b2f9bdd974b..8ade93111e03 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -295,14 +295,16 @@ struct extra_reg { | |||
295 | u64 config_mask; | 295 | u64 config_mask; |
296 | u64 valid_mask; | 296 | u64 valid_mask; |
297 | int idx; /* per_xxx->regs[] reg index */ | 297 | int idx; /* per_xxx->regs[] reg index */ |
298 | bool extra_msr_access; | ||
298 | }; | 299 | }; |
299 | 300 | ||
300 | #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ | 301 | #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ |
301 | .event = (e), \ | 302 | .event = (e), \ |
302 | .msr = (ms), \ | 303 | .msr = (ms), \ |
303 | .config_mask = (m), \ | 304 | .config_mask = (m), \ |
304 | .valid_mask = (vm), \ | 305 | .valid_mask = (vm), \ |
305 | .idx = EXTRA_REG_##i, \ | 306 | .idx = EXTRA_REG_##i, \ |
307 | .extra_msr_access = true, \ | ||
306 | } | 308 | } |
307 | 309 | ||
308 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ | 310 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index adb02aa62af5..2502d0d9d246 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1382,6 +1382,15 @@ again: | |||
1382 | intel_pmu_lbr_read(); | 1382 | intel_pmu_lbr_read(); |
1383 | 1383 | ||
1384 | /* | 1384 | /* |
1385 | * CondChgd bit 63 doesn't mean any overflow status. Ignore | ||
1386 | * and clear the bit. | ||
1387 | */ | ||
1388 | if (__test_and_clear_bit(63, (unsigned long *)&status)) { | ||
1389 | if (!status) | ||
1390 | goto done; | ||
1391 | } | ||
1392 | |||
1393 | /* | ||
1385 | * PEBS overflow sets bit 62 in the global status register | 1394 | * PEBS overflow sets bit 62 in the global status register |
1386 | */ | 1395 | */ |
1387 | if (__test_and_clear_bit(62, (unsigned long *)&status)) { | 1396 | if (__test_and_clear_bit(62, (unsigned long *)&status)) { |
@@ -2173,6 +2182,41 @@ static void intel_snb_check_microcode(void) | |||
2173 | } | 2182 | } |
2174 | } | 2183 | } |
2175 | 2184 | ||
2185 | /* | ||
2186 | * Under certain circumstances, access certain MSR may cause #GP. | ||
2187 | * The function tests if the input MSR can be safely accessed. | ||
2188 | */ | ||
2189 | static bool check_msr(unsigned long msr, u64 mask) | ||
2190 | { | ||
2191 | u64 val_old, val_new, val_tmp; | ||
2192 | |||
2193 | /* | ||
2194 | * Read the current value, change it and read it back to see if it | ||
2195 | * matches, this is needed to detect certain hardware emulators | ||
2196 | * (qemu/kvm) that don't trap on the MSR access and always return 0s. | ||
2197 | */ | ||
2198 | if (rdmsrl_safe(msr, &val_old)) | ||
2199 | return false; | ||
2200 | |||
2201 | /* | ||
2202 | * Only change the bits which can be updated by wrmsrl. | ||
2203 | */ | ||
2204 | val_tmp = val_old ^ mask; | ||
2205 | if (wrmsrl_safe(msr, val_tmp) || | ||
2206 | rdmsrl_safe(msr, &val_new)) | ||
2207 | return false; | ||
2208 | |||
2209 | if (val_new != val_tmp) | ||
2210 | return false; | ||
2211 | |||
2212 | /* Here it's sure that the MSR can be safely accessed. | ||
2213 | * Restore the old value and return. | ||
2214 | */ | ||
2215 | wrmsrl(msr, val_old); | ||
2216 | |||
2217 | return true; | ||
2218 | } | ||
2219 | |||
2176 | static __init void intel_sandybridge_quirk(void) | 2220 | static __init void intel_sandybridge_quirk(void) |
2177 | { | 2221 | { |
2178 | x86_pmu.check_microcode = intel_snb_check_microcode; | 2222 | x86_pmu.check_microcode = intel_snb_check_microcode; |
@@ -2262,7 +2306,8 @@ __init int intel_pmu_init(void) | |||
2262 | union cpuid10_ebx ebx; | 2306 | union cpuid10_ebx ebx; |
2263 | struct event_constraint *c; | 2307 | struct event_constraint *c; |
2264 | unsigned int unused; | 2308 | unsigned int unused; |
2265 | int version; | 2309 | struct extra_reg *er; |
2310 | int version, i; | ||
2266 | 2311 | ||
2267 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 2312 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
2268 | switch (boot_cpu_data.x86) { | 2313 | switch (boot_cpu_data.x86) { |
@@ -2465,6 +2510,9 @@ __init int intel_pmu_init(void) | |||
2465 | case 62: /* IvyBridge EP */ | 2510 | case 62: /* IvyBridge EP */ |
2466 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 2511 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
2467 | sizeof(hw_cache_event_ids)); | 2512 | sizeof(hw_cache_event_ids)); |
2513 | /* dTLB-load-misses on IVB is different than SNB */ | ||
2514 | hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */ | ||
2515 | |||
2468 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, | 2516 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, |
2469 | sizeof(hw_cache_extra_regs)); | 2517 | sizeof(hw_cache_extra_regs)); |
2470 | 2518 | ||
@@ -2565,6 +2613,34 @@ __init int intel_pmu_init(void) | |||
2565 | } | 2613 | } |
2566 | } | 2614 | } |
2567 | 2615 | ||
2616 | /* | ||
2617 | * Access LBR MSR may cause #GP under certain circumstances. | ||
2618 | * E.g. KVM doesn't support LBR MSR | ||
2619 | * Check all LBT MSR here. | ||
2620 | * Disable LBR access if any LBR MSRs can not be accessed. | ||
2621 | */ | ||
2622 | if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) | ||
2623 | x86_pmu.lbr_nr = 0; | ||
2624 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
2625 | if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && | ||
2626 | check_msr(x86_pmu.lbr_to + i, 0xffffUL))) | ||
2627 | x86_pmu.lbr_nr = 0; | ||
2628 | } | ||
2629 | |||
2630 | /* | ||
2631 | * Access extra MSR may cause #GP under certain circumstances. | ||
2632 | * E.g. KVM doesn't support offcore event | ||
2633 | * Check all extra_regs here. | ||
2634 | */ | ||
2635 | if (x86_pmu.extra_regs) { | ||
2636 | for (er = x86_pmu.extra_regs; er->msr; er++) { | ||
2637 | er->extra_msr_access = check_msr(er->msr, 0x1ffUL); | ||
2638 | /* Disable LBR select mapping */ | ||
2639 | if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) | ||
2640 | x86_pmu.lbr_sel_map = NULL; | ||
2641 | } | ||
2642 | } | ||
2643 | |||
2568 | /* Support full width counters using alternative MSR range */ | 2644 | /* Support full width counters using alternative MSR range */ |
2569 | if (x86_pmu.intel_cap.full_width_write) { | 2645 | if (x86_pmu.intel_cap.full_width_write) { |
2570 | x86_pmu.max_period = x86_pmu.cntval_mask; | 2646 | x86_pmu.max_period = x86_pmu.cntval_mask; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 980970cb744d..696ade311ded 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -311,9 +311,11 @@ static int alloc_bts_buffer(int cpu) | |||
311 | if (!x86_pmu.bts) | 311 | if (!x86_pmu.bts) |
312 | return 0; | 312 | return 0; |
313 | 313 | ||
314 | buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL, node); | 314 | buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); |
315 | if (unlikely(!buffer)) | 315 | if (unlikely(!buffer)) { |
316 | WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); | ||
316 | return -ENOMEM; | 317 | return -ENOMEM; |
318 | } | ||
317 | 319 | ||
318 | max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; | 320 | max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; |
319 | thresh = max / 16; | 321 | thresh = max / 16; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 65bbbea38b9c..ae6552a0701f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
@@ -550,16 +550,16 @@ static struct extra_reg snbep_uncore_cbox_extra_regs[] = { | |||
550 | SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), | 550 | SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), |
551 | SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), | 551 | SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), |
552 | SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), | 552 | SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), |
553 | SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xc), | 553 | SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa), |
554 | SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xc), | 554 | SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa), |
555 | SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2), | 555 | SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2), |
556 | SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2), | 556 | SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2), |
557 | SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2), | 557 | SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2), |
558 | SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2), | 558 | SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2), |
559 | SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8), | 559 | SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8), |
560 | SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8), | 560 | SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8), |
561 | SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xc), | 561 | SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa), |
562 | SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xc), | 562 | SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa), |
563 | SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2), | 563 | SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2), |
564 | SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2), | 564 | SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2), |
565 | SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2), | 565 | SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2), |
@@ -1222,6 +1222,7 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = { | |||
1222 | SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, | 1222 | SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, |
1223 | SNBEP_CBO_PMON_CTL_TID_EN, 0x1), | 1223 | SNBEP_CBO_PMON_CTL_TID_EN, 0x1), |
1224 | SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), | 1224 | SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), |
1225 | |||
1225 | SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), | 1226 | SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), |
1226 | SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), | 1227 | SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), |
1227 | SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc), | 1228 | SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc), |
@@ -1245,7 +1246,7 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = { | |||
1245 | SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10), | 1246 | SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10), |
1246 | SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10), | 1247 | SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10), |
1247 | SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), | 1248 | SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), |
1248 | SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), | 1249 | SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10), |
1249 | SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), | 1250 | SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), |
1250 | SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18), | 1251 | SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18), |
1251 | SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), | 1252 | SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index dbaa23e78b36..0d0c9d4ab6d5 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -425,8 +425,8 @@ sysenter_do_call: | |||
425 | cmpl $(NR_syscalls), %eax | 425 | cmpl $(NR_syscalls), %eax |
426 | jae sysenter_badsys | 426 | jae sysenter_badsys |
427 | call *sys_call_table(,%eax,4) | 427 | call *sys_call_table(,%eax,4) |
428 | movl %eax,PT_EAX(%esp) | ||
429 | sysenter_after_call: | 428 | sysenter_after_call: |
429 | movl %eax,PT_EAX(%esp) | ||
430 | LOCKDEP_SYS_EXIT | 430 | LOCKDEP_SYS_EXIT |
431 | DISABLE_INTERRUPTS(CLBR_ANY) | 431 | DISABLE_INTERRUPTS(CLBR_ANY) |
432 | TRACE_IRQS_OFF | 432 | TRACE_IRQS_OFF |
@@ -502,6 +502,7 @@ ENTRY(system_call) | |||
502 | jae syscall_badsys | 502 | jae syscall_badsys |
503 | syscall_call: | 503 | syscall_call: |
504 | call *sys_call_table(,%eax,4) | 504 | call *sys_call_table(,%eax,4) |
505 | syscall_after_call: | ||
505 | movl %eax,PT_EAX(%esp) # store the return value | 506 | movl %eax,PT_EAX(%esp) # store the return value |
506 | syscall_exit: | 507 | syscall_exit: |
507 | LOCKDEP_SYS_EXIT | 508 | LOCKDEP_SYS_EXIT |
@@ -675,12 +676,12 @@ syscall_fault: | |||
675 | END(syscall_fault) | 676 | END(syscall_fault) |
676 | 677 | ||
677 | syscall_badsys: | 678 | syscall_badsys: |
678 | movl $-ENOSYS,PT_EAX(%esp) | 679 | movl $-ENOSYS,%eax |
679 | jmp syscall_exit | 680 | jmp syscall_after_call |
680 | END(syscall_badsys) | 681 | END(syscall_badsys) |
681 | 682 | ||
682 | sysenter_badsys: | 683 | sysenter_badsys: |
683 | movl $-ENOSYS,PT_EAX(%esp) | 684 | movl $-ENOSYS,%eax |
684 | jmp sysenter_after_call | 685 | jmp sysenter_after_call |
685 | END(syscall_badsys) | 686 | END(syscall_badsys) |
686 | CFI_ENDPROC | 687 | CFI_ENDPROC |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b25ca969edd2..c844f0816ab8 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -830,27 +830,24 @@ restore_args: | |||
830 | RESTORE_ARGS 1,8,1 | 830 | RESTORE_ARGS 1,8,1 |
831 | 831 | ||
832 | irq_return: | 832 | irq_return: |
833 | INTERRUPT_RETURN | ||
834 | |||
835 | ENTRY(native_iret) | ||
833 | /* | 836 | /* |
834 | * Are we returning to a stack segment from the LDT? Note: in | 837 | * Are we returning to a stack segment from the LDT? Note: in |
835 | * 64-bit mode SS:RSP on the exception stack is always valid. | 838 | * 64-bit mode SS:RSP on the exception stack is always valid. |
836 | */ | 839 | */ |
837 | #ifdef CONFIG_X86_ESPFIX64 | 840 | #ifdef CONFIG_X86_ESPFIX64 |
838 | testb $4,(SS-RIP)(%rsp) | 841 | testb $4,(SS-RIP)(%rsp) |
839 | jnz irq_return_ldt | 842 | jnz native_irq_return_ldt |
840 | #endif | 843 | #endif |
841 | 844 | ||
842 | irq_return_iret: | 845 | native_irq_return_iret: |
843 | INTERRUPT_RETURN | ||
844 | _ASM_EXTABLE(irq_return_iret, bad_iret) | ||
845 | |||
846 | #ifdef CONFIG_PARAVIRT | ||
847 | ENTRY(native_iret) | ||
848 | iretq | 846 | iretq |
849 | _ASM_EXTABLE(native_iret, bad_iret) | 847 | _ASM_EXTABLE(native_irq_return_iret, bad_iret) |
850 | #endif | ||
851 | 848 | ||
852 | #ifdef CONFIG_X86_ESPFIX64 | 849 | #ifdef CONFIG_X86_ESPFIX64 |
853 | irq_return_ldt: | 850 | native_irq_return_ldt: |
854 | pushq_cfi %rax | 851 | pushq_cfi %rax |
855 | pushq_cfi %rdi | 852 | pushq_cfi %rdi |
856 | SWAPGS | 853 | SWAPGS |
@@ -872,7 +869,7 @@ irq_return_ldt: | |||
872 | SWAPGS | 869 | SWAPGS |
873 | movq %rax,%rsp | 870 | movq %rax,%rsp |
874 | popq_cfi %rax | 871 | popq_cfi %rax |
875 | jmp irq_return_iret | 872 | jmp native_irq_return_iret |
876 | #endif | 873 | #endif |
877 | 874 | ||
878 | .section .fixup,"ax" | 875 | .section .fixup,"ax" |
@@ -956,13 +953,8 @@ __do_double_fault: | |||
956 | cmpl $__KERNEL_CS,CS(%rdi) | 953 | cmpl $__KERNEL_CS,CS(%rdi) |
957 | jne do_double_fault | 954 | jne do_double_fault |
958 | movq RIP(%rdi),%rax | 955 | movq RIP(%rdi),%rax |
959 | cmpq $irq_return_iret,%rax | 956 | cmpq $native_irq_return_iret,%rax |
960 | #ifdef CONFIG_PARAVIRT | ||
961 | je 1f | ||
962 | cmpq $native_iret,%rax | ||
963 | #endif | ||
964 | jne do_double_fault /* This shouldn't happen... */ | 957 | jne do_double_fault /* This shouldn't happen... */ |
965 | 1: | ||
966 | movq PER_CPU_VAR(kernel_stack),%rax | 958 | movq PER_CPU_VAR(kernel_stack),%rax |
967 | subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ | 959 | subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ |
968 | movq %rax,RSP(%rdi) | 960 | movq %rax,RSP(%rdi) |
@@ -1428,7 +1420,7 @@ error_sti: | |||
1428 | */ | 1420 | */ |
1429 | error_kernelspace: | 1421 | error_kernelspace: |
1430 | incl %ebx | 1422 | incl %ebx |
1431 | leaq irq_return_iret(%rip),%rcx | 1423 | leaq native_irq_return_iret(%rip),%rcx |
1432 | cmpq %rcx,RIP+8(%rsp) | 1424 | cmpq %rcx,RIP+8(%rsp) |
1433 | je error_swapgs | 1425 | je error_swapgs |
1434 | movl %ecx,%eax /* zero extend */ | 1426 | movl %ecx,%eax /* zero extend */ |
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 6afbb16e9b79..94d857fb1033 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c | |||
@@ -175,7 +175,7 @@ void init_espfix_ap(void) | |||
175 | if (!pud_present(pud)) { | 175 | if (!pud_present(pud)) { |
176 | pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); | 176 | pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); |
177 | pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); | 177 | pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); |
178 | paravirt_alloc_pud(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); | 178 | paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); |
179 | for (n = 0; n < ESPFIX_PUD_CLONES; n++) | 179 | for (n = 0; n < ESPFIX_PUD_CLONES; n++) |
180 | set_pud(&pud_p[n], pud); | 180 | set_pud(&pud_p[n], pud); |
181 | } | 181 | } |
@@ -185,7 +185,7 @@ void init_espfix_ap(void) | |||
185 | if (!pmd_present(pmd)) { | 185 | if (!pmd_present(pmd)) { |
186 | pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); | 186 | pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); |
187 | pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); | 187 | pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); |
188 | paravirt_alloc_pmd(&init_mm, __pa(pte_p) >> PAGE_SHIFT); | 188 | paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT); |
189 | for (n = 0; n < ESPFIX_PMD_CLONES; n++) | 189 | for (n = 0; n < ESPFIX_PMD_CLONES; n++) |
190 | set_pmd(&pmd_p[n], pmd); | 190 | set_pmd(&pmd_p[n], pmd); |
191 | } | 191 | } |
@@ -193,7 +193,6 @@ void init_espfix_ap(void) | |||
193 | pte_p = pte_offset_kernel(&pmd, addr); | 193 | pte_p = pte_offset_kernel(&pmd, addr); |
194 | stack_page = (void *)__get_free_page(GFP_KERNEL); | 194 | stack_page = (void *)__get_free_page(GFP_KERNEL); |
195 | pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); | 195 | pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); |
196 | paravirt_alloc_pte(&init_mm, __pa(stack_page) >> PAGE_SHIFT); | ||
197 | for (n = 0; n < ESPFIX_PTE_CLONES; n++) | 196 | for (n = 0; n < ESPFIX_PTE_CLONES; n++) |
198 | set_pte(&pte_p[n*PTE_STRIDE], pte); | 197 | set_pte(&pte_p[n*PTE_STRIDE], pte); |
199 | 198 | ||
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 7596df664901..67e6d19ef1be 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c | |||
@@ -574,6 +574,9 @@ int kprobe_int3_handler(struct pt_regs *regs) | |||
574 | struct kprobe *p; | 574 | struct kprobe *p; |
575 | struct kprobe_ctlblk *kcb; | 575 | struct kprobe_ctlblk *kcb; |
576 | 576 | ||
577 | if (user_mode_vm(regs)) | ||
578 | return 0; | ||
579 | |||
577 | addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); | 580 | addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); |
578 | /* | 581 | /* |
579 | * We don't want to be preempted for the entire | 582 | * We don't want to be preempted for the entire |
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 3f08f34f93eb..a1da6737ba5b 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c | |||
@@ -6,7 +6,6 @@ DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); | |||
6 | DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); | 6 | DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); |
7 | DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq"); | 7 | DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq"); |
8 | DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); | 8 | DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); |
9 | DEF_NATIVE(pv_cpu_ops, iret, "iretq"); | ||
10 | DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); | 9 | DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); |
11 | DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); | 10 | DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); |
12 | DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); | 11 | DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); |
@@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | |||
50 | PATCH_SITE(pv_irq_ops, save_fl); | 49 | PATCH_SITE(pv_irq_ops, save_fl); |
51 | PATCH_SITE(pv_irq_ops, irq_enable); | 50 | PATCH_SITE(pv_irq_ops, irq_enable); |
52 | PATCH_SITE(pv_irq_ops, irq_disable); | 51 | PATCH_SITE(pv_irq_ops, irq_disable); |
53 | PATCH_SITE(pv_cpu_ops, iret); | ||
54 | PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); | 52 | PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); |
55 | PATCH_SITE(pv_cpu_ops, usergs_sysret32); | 53 | PATCH_SITE(pv_cpu_ops, usergs_sysret32); |
56 | PATCH_SITE(pv_cpu_ops, usergs_sysret64); | 54 | PATCH_SITE(pv_cpu_ops, usergs_sysret64); |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 57e5ce126d5a..ea030319b321 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -920,9 +920,9 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
920 | tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); | 920 | tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); |
921 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | 921 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) |
922 | mark_tsc_unstable("cpufreq changes"); | 922 | mark_tsc_unstable("cpufreq changes"); |
923 | } | ||
924 | 923 | ||
925 | set_cyc2ns_scale(tsc_khz, freq->cpu); | 924 | set_cyc2ns_scale(tsc_khz, freq->cpu); |
925 | } | ||
926 | 926 | ||
927 | return 0; | 927 | return 0; |
928 | } | 928 | } |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f6449334ec45..ef432f891d30 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -5887,6 +5887,18 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) | |||
5887 | kvm_x86_ops->set_nmi(vcpu); | 5887 | kvm_x86_ops->set_nmi(vcpu); |
5888 | } | 5888 | } |
5889 | } else if (kvm_cpu_has_injectable_intr(vcpu)) { | 5889 | } else if (kvm_cpu_has_injectable_intr(vcpu)) { |
5890 | /* | ||
5891 | * Because interrupts can be injected asynchronously, we are | ||
5892 | * calling check_nested_events again here to avoid a race condition. | ||
5893 | * See https://lkml.org/lkml/2014/7/2/60 for discussion about this | ||
5894 | * proposal and current concerns. Perhaps we should be setting | ||
5895 | * KVM_REQ_EVENT only on certain events and not unconditionally? | ||
5896 | */ | ||
5897 | if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { | ||
5898 | r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); | ||
5899 | if (r != 0) | ||
5900 | return r; | ||
5901 | } | ||
5890 | if (kvm_x86_ops->interrupt_allowed(vcpu)) { | 5902 | if (kvm_x86_ops->interrupt_allowed(vcpu)) { |
5891 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), | 5903 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), |
5892 | false); | 5904 | false); |
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index c98583588580..ebfa9b2c871d 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c | |||
@@ -36,99 +36,133 @@ | |||
36 | 36 | ||
37 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
38 | #include <linux/mm.h> | 38 | #include <linux/mm.h> |
39 | #include <linux/slab.h> | ||
39 | #include <linux/vmalloc.h> | 40 | #include <linux/vmalloc.h> |
40 | 41 | ||
41 | #include <xen/interface/xen.h> | 42 | #include <xen/interface/xen.h> |
42 | #include <xen/page.h> | 43 | #include <xen/page.h> |
43 | #include <xen/grant_table.h> | 44 | #include <xen/grant_table.h> |
45 | #include <xen/xen.h> | ||
44 | 46 | ||
45 | #include <asm/pgtable.h> | 47 | #include <asm/pgtable.h> |
46 | 48 | ||
47 | static int map_pte_fn(pte_t *pte, struct page *pmd_page, | 49 | static struct gnttab_vm_area { |
48 | unsigned long addr, void *data) | 50 | struct vm_struct *area; |
51 | pte_t **ptes; | ||
52 | } gnttab_shared_vm_area, gnttab_status_vm_area; | ||
53 | |||
54 | int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, | ||
55 | unsigned long max_nr_gframes, | ||
56 | void **__shared) | ||
49 | { | 57 | { |
50 | unsigned long **frames = (unsigned long **)data; | 58 | void *shared = *__shared; |
59 | unsigned long addr; | ||
60 | unsigned long i; | ||
51 | 61 | ||
52 | set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); | 62 | if (shared == NULL) |
53 | (*frames)++; | 63 | *__shared = shared = gnttab_shared_vm_area.area->addr; |
54 | return 0; | ||
55 | } | ||
56 | 64 | ||
57 | /* | 65 | addr = (unsigned long)shared; |
58 | * This function is used to map shared frames to store grant status. It is | 66 | |
59 | * different from map_pte_fn above, the frames type here is uint64_t. | 67 | for (i = 0; i < nr_gframes; i++) { |
60 | */ | 68 | set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i], |
61 | static int map_pte_fn_status(pte_t *pte, struct page *pmd_page, | 69 | mfn_pte(frames[i], PAGE_KERNEL)); |
62 | unsigned long addr, void *data) | 70 | addr += PAGE_SIZE; |
63 | { | 71 | } |
64 | uint64_t **frames = (uint64_t **)data; | ||
65 | 72 | ||
66 | set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); | ||
67 | (*frames)++; | ||
68 | return 0; | 73 | return 0; |
69 | } | 74 | } |
70 | 75 | ||
71 | static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, | 76 | int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, |
72 | unsigned long addr, void *data) | 77 | unsigned long max_nr_gframes, |
78 | grant_status_t **__shared) | ||
73 | { | 79 | { |
80 | grant_status_t *shared = *__shared; | ||
81 | unsigned long addr; | ||
82 | unsigned long i; | ||
83 | |||
84 | if (shared == NULL) | ||
85 | *__shared = shared = gnttab_status_vm_area.area->addr; | ||
86 | |||
87 | addr = (unsigned long)shared; | ||
88 | |||
89 | for (i = 0; i < nr_gframes; i++) { | ||
90 | set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i], | ||
91 | mfn_pte(frames[i], PAGE_KERNEL)); | ||
92 | addr += PAGE_SIZE; | ||
93 | } | ||
74 | 94 | ||
75 | set_pte_at(&init_mm, addr, pte, __pte(0)); | ||
76 | return 0; | 95 | return 0; |
77 | } | 96 | } |
78 | 97 | ||
79 | int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, | 98 | void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) |
80 | unsigned long max_nr_gframes, | ||
81 | void **__shared) | ||
82 | { | 99 | { |
83 | int rc; | 100 | pte_t **ptes; |
84 | void *shared = *__shared; | 101 | unsigned long addr; |
102 | unsigned long i; | ||
85 | 103 | ||
86 | if (shared == NULL) { | 104 | if (shared == gnttab_status_vm_area.area->addr) |
87 | struct vm_struct *area = | 105 | ptes = gnttab_status_vm_area.ptes; |
88 | alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); | 106 | else |
89 | BUG_ON(area == NULL); | 107 | ptes = gnttab_shared_vm_area.ptes; |
90 | shared = area->addr; | ||
91 | *__shared = shared; | ||
92 | } | ||
93 | 108 | ||
94 | rc = apply_to_page_range(&init_mm, (unsigned long)shared, | 109 | addr = (unsigned long)shared; |
95 | PAGE_SIZE * nr_gframes, | 110 | |
96 | map_pte_fn, &frames); | 111 | for (i = 0; i < nr_gframes; i++) { |
97 | return rc; | 112 | set_pte_at(&init_mm, addr, ptes[i], __pte(0)); |
113 | addr += PAGE_SIZE; | ||
114 | } | ||
98 | } | 115 | } |
99 | 116 | ||
100 | int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, | 117 | static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames) |
101 | unsigned long max_nr_gframes, | ||
102 | grant_status_t **__shared) | ||
103 | { | 118 | { |
104 | int rc; | 119 | area->ptes = kmalloc(sizeof(pte_t *) * nr_frames, GFP_KERNEL); |
105 | grant_status_t *shared = *__shared; | 120 | if (area->ptes == NULL) |
121 | return -ENOMEM; | ||
106 | 122 | ||
107 | if (shared == NULL) { | 123 | area->area = alloc_vm_area(PAGE_SIZE * nr_frames, area->ptes); |
108 | /* No need to pass in PTE as we are going to do it | 124 | if (area->area == NULL) { |
109 | * in apply_to_page_range anyhow. */ | 125 | kfree(area->ptes); |
110 | struct vm_struct *area = | 126 | return -ENOMEM; |
111 | alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); | ||
112 | BUG_ON(area == NULL); | ||
113 | shared = area->addr; | ||
114 | *__shared = shared; | ||
115 | } | 127 | } |
116 | 128 | ||
117 | rc = apply_to_page_range(&init_mm, (unsigned long)shared, | 129 | return 0; |
118 | PAGE_SIZE * nr_gframes, | ||
119 | map_pte_fn_status, &frames); | ||
120 | return rc; | ||
121 | } | 130 | } |
122 | 131 | ||
123 | void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) | 132 | static void arch_gnttab_vfree(struct gnttab_vm_area *area) |
133 | { | ||
134 | free_vm_area(area->area); | ||
135 | kfree(area->ptes); | ||
136 | } | ||
137 | |||
138 | int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status) | ||
124 | { | 139 | { |
125 | apply_to_page_range(&init_mm, (unsigned long)shared, | 140 | int ret; |
126 | PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); | 141 | |
142 | if (!xen_pv_domain()) | ||
143 | return 0; | ||
144 | |||
145 | ret = arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared); | ||
146 | if (ret < 0) | ||
147 | return ret; | ||
148 | |||
149 | /* | ||
150 | * Always allocate the space for the status frames in case | ||
151 | * we're migrated to a host with V2 support. | ||
152 | */ | ||
153 | ret = arch_gnttab_valloc(&gnttab_status_vm_area, nr_status); | ||
154 | if (ret < 0) | ||
155 | goto err; | ||
156 | |||
157 | return 0; | ||
158 | err: | ||
159 | arch_gnttab_vfree(&gnttab_shared_vm_area); | ||
160 | return -ENOMEM; | ||
127 | } | 161 | } |
162 | |||
128 | #ifdef CONFIG_XEN_PVH | 163 | #ifdef CONFIG_XEN_PVH |
129 | #include <xen/balloon.h> | 164 | #include <xen/balloon.h> |
130 | #include <xen/events.h> | 165 | #include <xen/events.h> |
131 | #include <xen/xen.h> | ||
132 | #include <linux/slab.h> | 166 | #include <linux/slab.h> |
133 | static int __init xlated_setup_gnttab_pages(void) | 167 | static int __init xlated_setup_gnttab_pages(void) |
134 | { | 168 | { |