aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/boot/compressed/eboot.c32
-rw-r--r--arch/x86/ia32/ia32entry.S14
-rw-r--r--arch/x86/include/asm/efi.h31
-rw-r--r--arch/x86/include/asm/kvm_host.h16
-rw-r--r--arch/x86/include/asm/preempt.h1
-rw-r--r--arch/x86/include/uapi/asm/vmx.h2
-rw-r--r--arch/x86/kernel/acpi/boot.c16
-rw-r--r--arch/x86/kernel/apb_timer.c2
-rw-r--r--arch/x86/kernel/apic/apic.c4
-rw-r--r--arch/x86/kernel/cpu/Makefile7
-rw-r--r--arch/x86/kernel/cpu/intel.c5
-rw-r--r--arch/x86/kernel/cpu/perf_event.c14
-rw-r--r--arch/x86/kernel/cpu/perf_event.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c173
-rw-r--r--arch/x86/kernel/entry_32.S16
-rw-r--r--arch/x86/kernel/i8259.c3
-rw-r--r--arch/x86/kernel/irqinit.c3
-rw-r--r--arch/x86/kernel/ptrace.c4
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/smpboot.c4
-rw-r--r--arch/x86/kernel/tsc.c5
-rw-r--r--arch/x86/kvm/emulate.c250
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/svm.c8
-rw-r--r--arch/x86/kvm/vmx.c24
-rw-r--r--arch/x86/kvm/x86.c38
-rw-r--r--arch/x86/mm/pageattr.c2
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c36
-rw-r--r--arch/x86/platform/efi/efi.c52
-rw-r--r--arch/x86/platform/efi/efi_32.c12
-rw-r--r--arch/x86/platform/efi/efi_64.c6
-rw-r--r--arch/x86/platform/efi/efi_stub_32.S4
-rw-r--r--arch/x86/platform/intel-mid/intel_mid_weak_decls.h7
-rw-r--r--arch/x86/platform/intel-mid/sfi.c2
-rw-r--r--arch/x86/um/asm/ptrace.h4
-rw-r--r--arch/x86/um/asm/syscall.h15
-rw-r--r--arch/x86/xen/enlighten.c3
-rw-r--r--arch/x86/xen/mmu.c5
-rw-r--r--arch/x86/xen/p2m.c83
-rw-r--r--arch/x86/xen/setup.c1
-rw-r--r--arch/x86/xen/time.c2
43 files changed, 468 insertions, 449 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f2327e88e07c..ded8a6774ac9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -142,6 +142,10 @@ config INSTRUCTION_DECODER
142 def_bool y 142 def_bool y
143 depends on KPROBES || PERF_EVENTS || UPROBES 143 depends on KPROBES || PERF_EVENTS || UPROBES
144 144
145config PERF_EVENTS_INTEL_UNCORE
146 def_bool y
147 depends on PERF_EVENTS && SUP_SUP_INTEL && PCI
148
145config OUTPUT_FORMAT 149config OUTPUT_FORMAT
146 string 150 string
147 default "elf32-i386" if X86_32 151 default "elf32-i386" if X86_32
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index de8eebd6f67c..1acf605a646d 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -330,8 +330,10 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
330 size = pci->romsize + sizeof(*rom); 330 size = pci->romsize + sizeof(*rom);
331 331
332 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); 332 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
333 if (status != EFI_SUCCESS) 333 if (status != EFI_SUCCESS) {
334 efi_printk(sys_table, "Failed to alloc mem for rom\n");
334 return status; 335 return status;
336 }
335 337
336 memset(rom, 0, sizeof(*rom)); 338 memset(rom, 0, sizeof(*rom));
337 339
@@ -344,14 +346,18 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
344 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, 346 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
345 PCI_VENDOR_ID, 1, &(rom->vendor)); 347 PCI_VENDOR_ID, 1, &(rom->vendor));
346 348
347 if (status != EFI_SUCCESS) 349 if (status != EFI_SUCCESS) {
350 efi_printk(sys_table, "Failed to read rom->vendor\n");
348 goto free_struct; 351 goto free_struct;
352 }
349 353
350 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, 354 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
351 PCI_DEVICE_ID, 1, &(rom->devid)); 355 PCI_DEVICE_ID, 1, &(rom->devid));
352 356
353 if (status != EFI_SUCCESS) 357 if (status != EFI_SUCCESS) {
358 efi_printk(sys_table, "Failed to read rom->devid\n");
354 goto free_struct; 359 goto free_struct;
360 }
355 361
356 status = efi_early->call(pci->get_location, pci, &(rom->segment), 362 status = efi_early->call(pci->get_location, pci, &(rom->segment),
357 &(rom->bus), &(rom->device), &(rom->function)); 363 &(rom->bus), &(rom->device), &(rom->function));
@@ -432,8 +438,10 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom)
432 size = pci->romsize + sizeof(*rom); 438 size = pci->romsize + sizeof(*rom);
433 439
434 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); 440 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
435 if (status != EFI_SUCCESS) 441 if (status != EFI_SUCCESS) {
442 efi_printk(sys_table, "Failed to alloc mem for rom\n");
436 return status; 443 return status;
444 }
437 445
438 rom->data.type = SETUP_PCI; 446 rom->data.type = SETUP_PCI;
439 rom->data.len = size - sizeof(struct setup_data); 447 rom->data.len = size - sizeof(struct setup_data);
@@ -444,14 +452,18 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom)
444 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, 452 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
445 PCI_VENDOR_ID, 1, &(rom->vendor)); 453 PCI_VENDOR_ID, 1, &(rom->vendor));
446 454
447 if (status != EFI_SUCCESS) 455 if (status != EFI_SUCCESS) {
456 efi_printk(sys_table, "Failed to read rom->vendor\n");
448 goto free_struct; 457 goto free_struct;
458 }
449 459
450 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, 460 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
451 PCI_DEVICE_ID, 1, &(rom->devid)); 461 PCI_DEVICE_ID, 1, &(rom->devid));
452 462
453 if (status != EFI_SUCCESS) 463 if (status != EFI_SUCCESS) {
464 efi_printk(sys_table, "Failed to read rom->devid\n");
454 goto free_struct; 465 goto free_struct;
466 }
455 467
456 status = efi_early->call(pci->get_location, pci, &(rom->segment), 468 status = efi_early->call(pci->get_location, pci, &(rom->segment),
457 &(rom->bus), &(rom->device), &(rom->function)); 469 &(rom->bus), &(rom->device), &(rom->function));
@@ -538,8 +550,10 @@ static void setup_efi_pci(struct boot_params *params)
538 EFI_LOADER_DATA, 550 EFI_LOADER_DATA,
539 size, (void **)&pci_handle); 551 size, (void **)&pci_handle);
540 552
541 if (status != EFI_SUCCESS) 553 if (status != EFI_SUCCESS) {
554 efi_printk(sys_table, "Failed to alloc mem for pci_handle\n");
542 return; 555 return;
556 }
543 557
544 status = efi_call_early(locate_handle, 558 status = efi_call_early(locate_handle,
545 EFI_LOCATE_BY_PROTOCOL, &pci_proto, 559 EFI_LOCATE_BY_PROTOCOL, &pci_proto,
@@ -1105,6 +1119,10 @@ struct boot_params *make_boot_params(struct efi_config *c)
1105 1119
1106 memset(sdt, 0, sizeof(*sdt)); 1120 memset(sdt, 0, sizeof(*sdt));
1107 1121
1122 status = efi_parse_options(cmdline_ptr);
1123 if (status != EFI_SUCCESS)
1124 goto fail2;
1125
1108 status = handle_cmdline_files(sys_table, image, 1126 status = handle_cmdline_files(sys_table, image,
1109 (char *)(unsigned long)hdr->cmd_line_ptr, 1127 (char *)(unsigned long)hdr->cmd_line_ptr,
1110 "initrd=", hdr->initrd_addr_max, 1128 "initrd=", hdr->initrd_addr_max,
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 711de084ab57..ffe71228fc10 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -157,7 +157,7 @@ ENTRY(ia32_sysenter_target)
157 * ourselves. To save a few cycles, we can check whether 157 * ourselves. To save a few cycles, we can check whether
158 * NT was set instead of doing an unconditional popfq. 158 * NT was set instead of doing an unconditional popfq.
159 */ 159 */
160 testl $X86_EFLAGS_NT,EFLAGS(%rsp) /* saved EFLAGS match cpu */ 160 testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp)
161 jnz sysenter_fix_flags 161 jnz sysenter_fix_flags
162sysenter_flags_fixed: 162sysenter_flags_fixed:
163 163
@@ -198,12 +198,12 @@ sysexit_from_sys_call:
198 198
199#ifdef CONFIG_AUDITSYSCALL 199#ifdef CONFIG_AUDITSYSCALL
200 .macro auditsys_entry_common 200 .macro auditsys_entry_common
201 movl %esi,%r9d /* 6th arg: 4th syscall arg */ 201 movl %esi,%r8d /* 5th arg: 4th syscall arg */
202 movl %edx,%r8d /* 5th arg: 3rd syscall arg */ 202 movl %ecx,%r9d /*swap with edx*/
203 /* (already in %ecx) 4th arg: 2nd syscall arg */ 203 movl %edx,%ecx /* 4th arg: 3rd syscall arg */
204 movl %ebx,%edx /* 3rd arg: 1st syscall arg */ 204 movl %r9d,%edx /* 3rd arg: 2nd syscall arg */
205 movl %eax,%esi /* 2nd arg: syscall number */ 205 movl %ebx,%esi /* 2nd arg: 1st syscall arg */
206 movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ 206 movl %eax,%edi /* 1st arg: syscall number */
207 call __audit_syscall_entry 207 call __audit_syscall_entry
208 movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ 208 movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
209 cmpq $(IA32_NR_syscalls-1),%rax 209 cmpq $(IA32_NR_syscalls-1),%rax
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 0ec241ede5a2..9b11757975d0 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -81,24 +81,23 @@ extern u64 asmlinkage efi_call(void *fp, ...);
81 */ 81 */
82#define __efi_call_virt(f, args...) efi_call_virt(f, args) 82#define __efi_call_virt(f, args...) efi_call_virt(f, args)
83 83
84extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, 84extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
85 u32 type, u64 attribute); 85 u32 type, u64 attribute);
86 86
87#endif /* CONFIG_X86_32 */ 87#endif /* CONFIG_X86_32 */
88 88
89extern int add_efi_memmap;
90extern struct efi_scratch efi_scratch; 89extern struct efi_scratch efi_scratch;
91extern void efi_set_executable(efi_memory_desc_t *md, bool executable); 90extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
92extern int efi_memblock_x86_reserve_range(void); 91extern int __init efi_memblock_x86_reserve_range(void);
93extern void efi_call_phys_prelog(void); 92extern void __init efi_call_phys_prolog(void);
94extern void efi_call_phys_epilog(void); 93extern void __init efi_call_phys_epilog(void);
95extern void efi_unmap_memmap(void); 94extern void __init efi_unmap_memmap(void);
96extern void efi_memory_uc(u64 addr, unsigned long size); 95extern void __init efi_memory_uc(u64 addr, unsigned long size);
97extern void __init efi_map_region(efi_memory_desc_t *md); 96extern void __init efi_map_region(efi_memory_desc_t *md);
98extern void __init efi_map_region_fixed(efi_memory_desc_t *md); 97extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
99extern void efi_sync_low_kernel_mappings(void); 98extern void efi_sync_low_kernel_mappings(void);
100extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); 99extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
101extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); 100extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
102extern void __init old_map_region(efi_memory_desc_t *md); 101extern void __init old_map_region(efi_memory_desc_t *md);
103extern void __init runtime_code_page_mkexec(void); 102extern void __init runtime_code_page_mkexec(void);
104extern void __init efi_runtime_mkexec(void); 103extern void __init efi_runtime_mkexec(void);
@@ -162,16 +161,6 @@ static inline efi_status_t efi_thunk_set_virtual_address_map(
162extern bool efi_reboot_required(void); 161extern bool efi_reboot_required(void);
163 162
164#else 163#else
165/*
166 * IF EFI is not configured, have the EFI calls return -ENOSYS.
167 */
168#define efi_call0(_f) (-ENOSYS)
169#define efi_call1(_f, _a1) (-ENOSYS)
170#define efi_call2(_f, _a1, _a2) (-ENOSYS)
171#define efi_call3(_f, _a1, _a2, _a3) (-ENOSYS)
172#define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS)
173#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS)
174#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS)
175static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} 164static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
176static inline bool efi_reboot_required(void) 165static inline bool efi_reboot_required(void)
177{ 166{
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7d603a71ab3a..6ed0c30d6a0c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -989,6 +989,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
989 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); 989 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
990} 990}
991 991
992static inline u64 get_canonical(u64 la)
993{
994 return ((int64_t)la << 16) >> 16;
995}
996
997static inline bool is_noncanonical_address(u64 la)
998{
999#ifdef CONFIG_X86_64
1000 return get_canonical(la) != la;
1001#else
1002 return false;
1003#endif
1004}
1005
992#define TSS_IOPB_BASE_OFFSET 0x66 1006#define TSS_IOPB_BASE_OFFSET 0x66
993#define TSS_BASE_SIZE 0x68 1007#define TSS_BASE_SIZE 0x68
994#define TSS_IOPB_SIZE (65536 / 8) 1008#define TSS_IOPB_SIZE (65536 / 8)
@@ -1050,7 +1064,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
1050 unsigned long address); 1064 unsigned long address);
1051 1065
1052void kvm_define_shared_msr(unsigned index, u32 msr); 1066void kvm_define_shared_msr(unsigned index, u32 msr);
1053void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); 1067int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
1054 1068
1055bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); 1069bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
1056 1070
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 7024c12f7bfe..400873450e33 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -105,6 +105,7 @@ static __always_inline bool should_resched(void)
105# ifdef CONFIG_CONTEXT_TRACKING 105# ifdef CONFIG_CONTEXT_TRACKING
106 extern asmlinkage void ___preempt_schedule_context(void); 106 extern asmlinkage void ___preempt_schedule_context(void);
107# define __preempt_schedule_context() asm ("call ___preempt_schedule_context") 107# define __preempt_schedule_context() asm ("call ___preempt_schedule_context")
108 extern asmlinkage void preempt_schedule_context(void);
108# endif 109# endif
109#endif 110#endif
110 111
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 0e79420376eb..990a2fe1588d 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -67,6 +67,7 @@
67#define EXIT_REASON_EPT_MISCONFIG 49 67#define EXIT_REASON_EPT_MISCONFIG 49
68#define EXIT_REASON_INVEPT 50 68#define EXIT_REASON_INVEPT 50
69#define EXIT_REASON_PREEMPTION_TIMER 52 69#define EXIT_REASON_PREEMPTION_TIMER 52
70#define EXIT_REASON_INVVPID 53
70#define EXIT_REASON_WBINVD 54 71#define EXIT_REASON_WBINVD 54
71#define EXIT_REASON_XSETBV 55 72#define EXIT_REASON_XSETBV 55
72#define EXIT_REASON_APIC_WRITE 56 73#define EXIT_REASON_APIC_WRITE 56
@@ -114,6 +115,7 @@
114 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ 115 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
115 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ 116 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
116 { EXIT_REASON_INVD, "INVD" }, \ 117 { EXIT_REASON_INVD, "INVD" }, \
118 { EXIT_REASON_INVVPID, "INVVPID" }, \
117 { EXIT_REASON_INVPCID, "INVPCID" } 119 { EXIT_REASON_INVPCID, "INVPCID" }
118 120
119#endif /* _UAPIVMX_H */ 121#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b436fc735aa4..a142e77693e1 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -397,7 +397,7 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger,
397 397
398 /* Don't set up the ACPI SCI because it's already set up */ 398 /* Don't set up the ACPI SCI because it's already set up */
399 if (acpi_gbl_FADT.sci_interrupt == gsi) 399 if (acpi_gbl_FADT.sci_interrupt == gsi)
400 return gsi; 400 return mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC);
401 401
402 trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1; 402 trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1;
403 polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1; 403 polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1;
@@ -604,14 +604,18 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
604 604
605int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) 605int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
606{ 606{
607 int irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); 607 int irq;
608 608
609 if (irq >= 0) { 609 if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
610 *irqp = gsi;
611 } else {
612 irq = mp_map_gsi_to_irq(gsi,
613 IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK);
614 if (irq < 0)
615 return -1;
610 *irqp = irq; 616 *irqp = irq;
611 return 0;
612 } 617 }
613 618 return 0;
614 return -1;
615} 619}
616EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); 620EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
617 621
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 5972b108f15a..b708738d016e 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -185,8 +185,6 @@ static void apbt_setup_irq(struct apbt_dev *adev)
185 185
186 irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); 186 irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
187 irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); 187 irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
188 /* APB timer irqs are set up as mp_irqs, timer is edge type */
189 __irq_set_handler(adev->irq, handle_edge_irq, 0, "edge");
190} 188}
191 189
192/* Should be called with per cpu */ 190/* Should be called with per cpu */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 00853b254ab0..ba6cc041edb1 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1297,7 +1297,7 @@ void setup_local_APIC(void)
1297 unsigned int value, queued; 1297 unsigned int value, queued;
1298 int i, j, acked = 0; 1298 int i, j, acked = 0;
1299 unsigned long long tsc = 0, ntsc; 1299 unsigned long long tsc = 0, ntsc;
1300 long long max_loops = cpu_khz; 1300 long long max_loops = cpu_khz ? cpu_khz : 1000000;
1301 1301
1302 if (cpu_has_tsc) 1302 if (cpu_has_tsc)
1303 rdtscll(tsc); 1303 rdtscll(tsc);
@@ -1383,7 +1383,7 @@ void setup_local_APIC(void)
1383 break; 1383 break;
1384 } 1384 }
1385 if (queued) { 1385 if (queued) {
1386 if (cpu_has_tsc) { 1386 if (cpu_has_tsc && cpu_khz) {
1387 rdtscll(ntsc); 1387 rdtscll(ntsc);
1388 max_loops = (cpu_khz << 10) - (ntsc - tsc); 1388 max_loops = (cpu_khz << 10) - (ntsc - tsc);
1389 } else 1389 } else
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 01d5453b5502..e27b49d7c922 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -39,9 +39,12 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
39endif 39endif
40obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o 40obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
41obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o 41obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
42obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o perf_event_intel_uncore_snb.o
43obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore_snbep.o perf_event_intel_uncore_nhmex.o
44obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o 42obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o
43
44obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
45 perf_event_intel_uncore_snb.o \
46 perf_event_intel_uncore_snbep.o \
47 perf_event_intel_uncore_nhmex.o
45endif 48endif
46 49
47 50
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 1ef456273172..9cc6b6f25f42 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -213,12 +213,13 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
213{ 213{
214#ifdef CONFIG_X86_F00F_BUG 214#ifdef CONFIG_X86_F00F_BUG
215 /* 215 /*
216 * All current models of Pentium and Pentium with MMX technology CPUs 216 * All models of Pentium and Pentium with MMX technology CPUs
217 * have the F0 0F bug, which lets nonprivileged users lock up the 217 * have the F0 0F bug, which lets nonprivileged users lock up the
218 * system. Announce that the fault handler will be checking for it. 218 * system. Announce that the fault handler will be checking for it.
219 * The Quark is also family 5, but does not have the same bug.
219 */ 220 */
220 clear_cpu_bug(c, X86_BUG_F00F); 221 clear_cpu_bug(c, X86_BUG_F00F);
221 if (!paravirt_enabled() && c->x86 == 5) { 222 if (!paravirt_enabled() && c->x86 == 5 && c->x86_model < 9) {
222 static int f00f_workaround_enabled; 223 static int f00f_workaround_enabled;
223 224
224 set_cpu_bug(c, X86_BUG_F00F); 225 set_cpu_bug(c, X86_BUG_F00F);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1b8299dd3d91..143e5f5dc855 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -243,8 +243,9 @@ static bool check_hw_exists(void)
243 243
244msr_fail: 244msr_fail:
245 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); 245 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
246 printk(boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR 246 printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
247 "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new); 247 boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
248 reg, val_new);
248 249
249 return false; 250 return false;
250} 251}
@@ -444,12 +445,6 @@ int x86_pmu_hw_config(struct perf_event *event)
444 if (event->attr.type == PERF_TYPE_RAW) 445 if (event->attr.type == PERF_TYPE_RAW)
445 event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; 446 event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
446 447
447 if (event->attr.sample_period && x86_pmu.limit_period) {
448 if (x86_pmu.limit_period(event, event->attr.sample_period) >
449 event->attr.sample_period)
450 return -EINVAL;
451 }
452
453 return x86_setup_perfctr(event); 448 return x86_setup_perfctr(event);
454} 449}
455 450
@@ -987,9 +982,6 @@ int x86_perf_event_set_period(struct perf_event *event)
987 if (left > x86_pmu.max_period) 982 if (left > x86_pmu.max_period)
988 left = x86_pmu.max_period; 983 left = x86_pmu.max_period;
989 984
990 if (x86_pmu.limit_period)
991 left = x86_pmu.limit_period(event, left);
992
993 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; 985 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
994 986
995 /* 987 /*
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index d98a34d435d7..fc5eb390b368 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -445,7 +445,6 @@ struct x86_pmu {
445 struct x86_pmu_quirk *quirks; 445 struct x86_pmu_quirk *quirks;
446 int perfctr_second_write; 446 int perfctr_second_write;
447 bool late_ack; 447 bool late_ack;
448 unsigned (*limit_period)(struct perf_event *event, unsigned l);
449 448
450 /* 449 /*
451 * sysfs attrs 450 * sysfs attrs
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a73947c53b65..944bf019b74f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -220,15 +220,6 @@ static struct event_constraint intel_hsw_event_constraints[] = {
220 EVENT_CONSTRAINT_END 220 EVENT_CONSTRAINT_END
221}; 221};
222 222
223static struct event_constraint intel_bdw_event_constraints[] = {
224 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
225 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
226 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
227 INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
228 INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */
229 EVENT_CONSTRAINT_END
230};
231
232static u64 intel_pmu_event_map(int hw_event) 223static u64 intel_pmu_event_map(int hw_event)
233{ 224{
234 return intel_perfmon_event_map[hw_event]; 225 return intel_perfmon_event_map[hw_event];
@@ -424,126 +415,6 @@ static __initconst const u64 snb_hw_cache_event_ids
424 415
425}; 416};
426 417
427static __initconst const u64 hsw_hw_cache_event_ids
428 [PERF_COUNT_HW_CACHE_MAX]
429 [PERF_COUNT_HW_CACHE_OP_MAX]
430 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
431{
432 [ C(L1D ) ] = {
433 [ C(OP_READ) ] = {
434 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
435 [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
436 },
437 [ C(OP_WRITE) ] = {
438 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
439 [ C(RESULT_MISS) ] = 0x0,
440 },
441 [ C(OP_PREFETCH) ] = {
442 [ C(RESULT_ACCESS) ] = 0x0,
443 [ C(RESULT_MISS) ] = 0x0,
444 },
445 },
446 [ C(L1I ) ] = {
447 [ C(OP_READ) ] = {
448 [ C(RESULT_ACCESS) ] = 0x0,
449 [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */
450 },
451 [ C(OP_WRITE) ] = {
452 [ C(RESULT_ACCESS) ] = -1,
453 [ C(RESULT_MISS) ] = -1,
454 },
455 [ C(OP_PREFETCH) ] = {
456 [ C(RESULT_ACCESS) ] = 0x0,
457 [ C(RESULT_MISS) ] = 0x0,
458 },
459 },
460 [ C(LL ) ] = {
461 [ C(OP_READ) ] = {
462 /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */
463 [ C(RESULT_ACCESS) ] = 0x1b7,
464 /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE|
465 L3_MISS|ANY_SNOOP */
466 [ C(RESULT_MISS) ] = 0x1b7,
467 },
468 [ C(OP_WRITE) ] = {
469 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE:ALL_RFO */
470 /* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */
471 [ C(RESULT_MISS) ] = 0x1b7,
472 },
473 [ C(OP_PREFETCH) ] = {
474 [ C(RESULT_ACCESS) ] = 0x0,
475 [ C(RESULT_MISS) ] = 0x0,
476 },
477 },
478 [ C(DTLB) ] = {
479 [ C(OP_READ) ] = {
480 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
481 [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
482 },
483 [ C(OP_WRITE) ] = {
484 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
485 [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
486 },
487 [ C(OP_PREFETCH) ] = {
488 [ C(RESULT_ACCESS) ] = 0x0,
489 [ C(RESULT_MISS) ] = 0x0,
490 },
491 },
492 [ C(ITLB) ] = {
493 [ C(OP_READ) ] = {
494 [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */
495 [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */
496 },
497 [ C(OP_WRITE) ] = {
498 [ C(RESULT_ACCESS) ] = -1,
499 [ C(RESULT_MISS) ] = -1,
500 },
501 [ C(OP_PREFETCH) ] = {
502 [ C(RESULT_ACCESS) ] = -1,
503 [ C(RESULT_MISS) ] = -1,
504 },
505 },
506 [ C(BPU ) ] = {
507 [ C(OP_READ) ] = {
508 [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
509 [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
510 },
511 [ C(OP_WRITE) ] = {
512 [ C(RESULT_ACCESS) ] = -1,
513 [ C(RESULT_MISS) ] = -1,
514 },
515 [ C(OP_PREFETCH) ] = {
516 [ C(RESULT_ACCESS) ] = -1,
517 [ C(RESULT_MISS) ] = -1,
518 },
519 },
520};
521
522static __initconst const u64 hsw_hw_cache_extra_regs
523 [PERF_COUNT_HW_CACHE_MAX]
524 [PERF_COUNT_HW_CACHE_OP_MAX]
525 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
526{
527 [ C(LL ) ] = {
528 [ C(OP_READ) ] = {
529 /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */
530 [ C(RESULT_ACCESS) ] = 0x2d5,
531 /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE|
532 L3_MISS|ANY_SNOOP */
533 [ C(RESULT_MISS) ] = 0x3fbc0202d5ull,
534 },
535 [ C(OP_WRITE) ] = {
536 [ C(RESULT_ACCESS) ] = 0x122, /* OFFCORE_RESPONSE:ALL_RFO */
537 /* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */
538 [ C(RESULT_MISS) ] = 0x3fbc020122ull,
539 },
540 [ C(OP_PREFETCH) ] = {
541 [ C(RESULT_ACCESS) ] = 0x0,
542 [ C(RESULT_MISS) ] = 0x0,
543 },
544 },
545};
546
547static __initconst const u64 westmere_hw_cache_event_ids 418static __initconst const u64 westmere_hw_cache_event_ids
548 [PERF_COUNT_HW_CACHE_MAX] 419 [PERF_COUNT_HW_CACHE_MAX]
549 [PERF_COUNT_HW_CACHE_OP_MAX] 420 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -2034,24 +1905,6 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
2034 return c; 1905 return c;
2035} 1906}
2036 1907
2037/*
2038 * Broadwell:
2039 * The INST_RETIRED.ALL period always needs to have lowest
2040 * 6bits cleared (BDM57). It shall not use a period smaller
2041 * than 100 (BDM11). We combine the two to enforce
2042 * a min-period of 128.
2043 */
2044static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
2045{
2046 if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
2047 X86_CONFIG(.event=0xc0, .umask=0x01)) {
2048 if (left < 128)
2049 left = 128;
2050 left &= ~0x3fu;
2051 }
2052 return left;
2053}
2054
2055PMU_FORMAT_ATTR(event, "config:0-7" ); 1908PMU_FORMAT_ATTR(event, "config:0-7" );
2056PMU_FORMAT_ATTR(umask, "config:8-15" ); 1909PMU_FORMAT_ATTR(umask, "config:8-15" );
2057PMU_FORMAT_ATTR(edge, "config:18" ); 1910PMU_FORMAT_ATTR(edge, "config:18" );
@@ -2692,8 +2545,8 @@ __init int intel_pmu_init(void)
2692 case 69: /* 22nm Haswell ULT */ 2545 case 69: /* 22nm Haswell ULT */
2693 case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ 2546 case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
2694 x86_pmu.late_ack = true; 2547 x86_pmu.late_ack = true;
2695 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 2548 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
2696 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 2549 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
2697 2550
2698 intel_pmu_lbr_init_snb(); 2551 intel_pmu_lbr_init_snb();
2699 2552
@@ -2712,28 +2565,6 @@ __init int intel_pmu_init(void)
2712 pr_cont("Haswell events, "); 2565 pr_cont("Haswell events, ");
2713 break; 2566 break;
2714 2567
2715 case 61: /* 14nm Broadwell Core-M */
2716 x86_pmu.late_ack = true;
2717 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
2718 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
2719
2720 intel_pmu_lbr_init_snb();
2721
2722 x86_pmu.event_constraints = intel_bdw_event_constraints;
2723 x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
2724 x86_pmu.extra_regs = intel_snbep_extra_regs;
2725 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
2726 /* all extra regs are per-cpu when HT is on */
2727 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2728 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2729
2730 x86_pmu.hw_config = hsw_hw_config;
2731 x86_pmu.get_event_constraints = hsw_get_event_constraints;
2732 x86_pmu.cpu_events = hsw_events_attrs;
2733 x86_pmu.limit_period = bdw_limit_period;
2734 pr_cont("Broadwell events, ");
2735 break;
2736
2737 default: 2568 default:
2738 switch (x86_pmu.version) { 2569 switch (x86_pmu.version) {
2739 case 1: 2570 case 1:
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 4b0e1dfa2226..344b63f18d14 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -447,16 +447,14 @@ sysenter_exit:
447sysenter_audit: 447sysenter_audit:
448 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) 448 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
449 jnz syscall_trace_entry 449 jnz syscall_trace_entry
450 addl $4,%esp 450 /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */
451 CFI_ADJUST_CFA_OFFSET -4 451 movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */
452 /* %esi already in 8(%esp) 6th arg: 4th syscall arg */ 452 /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */
453 /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */ 453 pushl_cfi PT_ESI(%esp) /* a3: 5th arg */
454 /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */ 454 pushl_cfi PT_EDX+4(%esp) /* a2: 4th arg */
455 movl %ebx,%ecx /* 3rd arg: 1st syscall arg */
456 movl %eax,%edx /* 2nd arg: syscall number */
457 movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
458 call __audit_syscall_entry 455 call __audit_syscall_entry
459 pushl_cfi %ebx 456 popl_cfi %ecx /* get that remapped edx off the stack */
457 popl_cfi %ecx /* get that remapped esi off the stack */
460 movl PT_EAX(%esp),%eax /* reload syscall number */ 458 movl PT_EAX(%esp),%eax /* reload syscall number */
461 jmp sysenter_do_call 459 jmp sysenter_do_call
462 460
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 8af817105e29..e7cc5370cd2f 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -111,8 +111,7 @@ static void make_8259A_irq(unsigned int irq)
111{ 111{
112 disable_irq_nosync(irq); 112 disable_irq_nosync(irq);
113 io_apic_irqs &= ~(1<<irq); 113 io_apic_irqs &= ~(1<<irq);
114 irq_set_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, 114 irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq);
115 i8259A_chip.name);
116 enable_irq(irq); 115 enable_irq(irq);
117} 116}
118 117
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 44f1ed42fdf2..4de73ee78361 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -70,7 +70,6 @@ int vector_used_by_percpu_irq(unsigned int vector)
70void __init init_ISA_irqs(void) 70void __init init_ISA_irqs(void)
71{ 71{
72 struct irq_chip *chip = legacy_pic->chip; 72 struct irq_chip *chip = legacy_pic->chip;
73 const char *name = chip->name;
74 int i; 73 int i;
75 74
76#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) 75#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
@@ -79,7 +78,7 @@ void __init init_ISA_irqs(void)
79 legacy_pic->init(0); 78 legacy_pic->init(0);
80 79
81 for (i = 0; i < nr_legacy_irqs(); i++) 80 for (i = 0; i < nr_legacy_irqs(); i++)
82 irq_set_chip_and_handler_name(i, chip, handle_level_irq, name); 81 irq_set_chip_and_handler(i, chip, handle_level_irq);
83} 82}
84 83
85void __init init_IRQ(void) 84void __init init_IRQ(void)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 29576c244699..749b0e423419 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1445,12 +1445,12 @@ static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
1445{ 1445{
1446#ifdef CONFIG_X86_64 1446#ifdef CONFIG_X86_64
1447 if (arch == AUDIT_ARCH_X86_64) { 1447 if (arch == AUDIT_ARCH_X86_64) {
1448 audit_syscall_entry(arch, regs->orig_ax, regs->di, 1448 audit_syscall_entry(regs->orig_ax, regs->di,
1449 regs->si, regs->dx, regs->r10); 1449 regs->si, regs->dx, regs->r10);
1450 } else 1450 } else
1451#endif 1451#endif
1452 { 1452 {
1453 audit_syscall_entry(arch, regs->orig_ax, regs->bx, 1453 audit_syscall_entry(regs->orig_ax, regs->bx,
1454 regs->cx, regs->dx, regs->si); 1454 regs->cx, regs->dx, regs->si);
1455 } 1455 }
1456} 1456}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 235cfd39e0d7..ab08aa2276fb 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1128,7 +1128,6 @@ void __init setup_arch(char **cmdline_p)
1128 setup_real_mode(); 1128 setup_real_mode();
1129 1129
1130 memblock_set_current_limit(get_max_mapped()); 1130 memblock_set_current_limit(get_max_mapped());
1131 dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT);
1132 1131
1133 /* 1132 /*
1134 * NOTE: On x86-32, only from this point on, fixmaps are ready for use. 1133 * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
@@ -1159,6 +1158,7 @@ void __init setup_arch(char **cmdline_p)
1159 early_acpi_boot_init(); 1158 early_acpi_boot_init();
1160 1159
1161 initmem_init(); 1160 initmem_init();
1161 dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT);
1162 1162
1163 /* 1163 /*
1164 * Reserve memory for crash kernel after SRAT is parsed so that it 1164 * Reserve memory for crash kernel after SRAT is parsed so that it
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2d5200e56357..4d2128ac70bd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -102,8 +102,6 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
102DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); 102DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
103EXPORT_PER_CPU_SYMBOL(cpu_info); 103EXPORT_PER_CPU_SYMBOL(cpu_info);
104 104
105static DEFINE_PER_CPU(struct completion, die_complete);
106
107atomic_t init_deasserted; 105atomic_t init_deasserted;
108 106
109/* 107/*
@@ -1318,6 +1316,8 @@ void cpu_disable_common(void)
1318 fixup_irqs(); 1316 fixup_irqs();
1319} 1317}
1320 1318
1319static DEFINE_PER_CPU(struct completion, die_complete);
1320
1321int native_cpu_disable(void) 1321int native_cpu_disable(void)
1322{ 1322{
1323 int ret; 1323 int ret;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index b6025f9e36c6..b7e50bba3bbb 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1166,14 +1166,17 @@ void __init tsc_init(void)
1166 1166
1167 x86_init.timers.tsc_pre_init(); 1167 x86_init.timers.tsc_pre_init();
1168 1168
1169 if (!cpu_has_tsc) 1169 if (!cpu_has_tsc) {
1170 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1170 return; 1171 return;
1172 }
1171 1173
1172 tsc_khz = x86_platform.calibrate_tsc(); 1174 tsc_khz = x86_platform.calibrate_tsc();
1173 cpu_khz = tsc_khz; 1175 cpu_khz = tsc_khz;
1174 1176
1175 if (!tsc_khz) { 1177 if (!tsc_khz) {
1176 mark_tsc_unstable("could not calculate TSC khz"); 1178 mark_tsc_unstable("could not calculate TSC khz");
1179 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1177 return; 1180 return;
1178 } 1181 }
1179 1182
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a46207a05835..749f9fa38254 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -504,11 +504,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
504 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); 504 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
505} 505}
506 506
507static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
508{
509 register_address_increment(ctxt, &ctxt->_eip, rel);
510}
511
512static u32 desc_limit_scaled(struct desc_struct *desc) 507static u32 desc_limit_scaled(struct desc_struct *desc)
513{ 508{
514 u32 limit = get_desc_limit(desc); 509 u32 limit = get_desc_limit(desc);
@@ -569,6 +564,38 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
569 return emulate_exception(ctxt, NM_VECTOR, 0, false); 564 return emulate_exception(ctxt, NM_VECTOR, 0, false);
570} 565}
571 566
567static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
568 int cs_l)
569{
570 switch (ctxt->op_bytes) {
571 case 2:
572 ctxt->_eip = (u16)dst;
573 break;
574 case 4:
575 ctxt->_eip = (u32)dst;
576 break;
577 case 8:
578 if ((cs_l && is_noncanonical_address(dst)) ||
579 (!cs_l && (dst & ~(u32)-1)))
580 return emulate_gp(ctxt, 0);
581 ctxt->_eip = dst;
582 break;
583 default:
584 WARN(1, "unsupported eip assignment size\n");
585 }
586 return X86EMUL_CONTINUE;
587}
588
589static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
590{
591 return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
592}
593
594static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
595{
596 return assign_eip_near(ctxt, ctxt->_eip + rel);
597}
598
572static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) 599static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
573{ 600{
574 u16 selector; 601 u16 selector;
@@ -751,8 +778,10 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
751static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, 778static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
752 unsigned size) 779 unsigned size)
753{ 780{
754 if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size)) 781 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
755 return __do_insn_fetch_bytes(ctxt, size); 782
783 if (unlikely(done_size < size))
784 return __do_insn_fetch_bytes(ctxt, size - done_size);
756 else 785 else
757 return X86EMUL_CONTINUE; 786 return X86EMUL_CONTINUE;
758} 787}
@@ -1416,7 +1445,9 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1416 1445
1417/* Does not support long mode */ 1446/* Does not support long mode */
1418static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, 1447static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1419 u16 selector, int seg, u8 cpl, bool in_task_switch) 1448 u16 selector, int seg, u8 cpl,
1449 bool in_task_switch,
1450 struct desc_struct *desc)
1420{ 1451{
1421 struct desc_struct seg_desc, old_desc; 1452 struct desc_struct seg_desc, old_desc;
1422 u8 dpl, rpl; 1453 u8 dpl, rpl;
@@ -1557,6 +1588,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1557 } 1588 }
1558load: 1589load:
1559 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); 1590 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1591 if (desc)
1592 *desc = seg_desc;
1560 return X86EMUL_CONTINUE; 1593 return X86EMUL_CONTINUE;
1561exception: 1594exception:
1562 return emulate_exception(ctxt, err_vec, err_code, true); 1595 return emulate_exception(ctxt, err_vec, err_code, true);
@@ -1566,7 +1599,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1566 u16 selector, int seg) 1599 u16 selector, int seg)
1567{ 1600{
1568 u8 cpl = ctxt->ops->cpl(ctxt); 1601 u8 cpl = ctxt->ops->cpl(ctxt);
1569 return __load_segment_descriptor(ctxt, selector, seg, cpl, false); 1602 return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL);
1570} 1603}
1571 1604
1572static void write_register_operand(struct operand *op) 1605static void write_register_operand(struct operand *op)
@@ -1960,17 +1993,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
1960static int em_jmp_far(struct x86_emulate_ctxt *ctxt) 1993static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
1961{ 1994{
1962 int rc; 1995 int rc;
1963 unsigned short sel; 1996 unsigned short sel, old_sel;
1997 struct desc_struct old_desc, new_desc;
1998 const struct x86_emulate_ops *ops = ctxt->ops;
1999 u8 cpl = ctxt->ops->cpl(ctxt);
2000
2001 /* Assignment of RIP may only fail in 64-bit mode */
2002 if (ctxt->mode == X86EMUL_MODE_PROT64)
2003 ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
2004 VCPU_SREG_CS);
1964 2005
1965 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); 2006 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
1966 2007
1967 rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); 2008 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
2009 &new_desc);
1968 if (rc != X86EMUL_CONTINUE) 2010 if (rc != X86EMUL_CONTINUE)
1969 return rc; 2011 return rc;
1970 2012
1971 ctxt->_eip = 0; 2013 rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
1972 memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); 2014 if (rc != X86EMUL_CONTINUE) {
1973 return X86EMUL_CONTINUE; 2015 WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
2016 /* assigning eip failed; restore the old cs */
2017 ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
2018 return rc;
2019 }
2020 return rc;
1974} 2021}
1975 2022
1976static int em_grp45(struct x86_emulate_ctxt *ctxt) 2023static int em_grp45(struct x86_emulate_ctxt *ctxt)
@@ -1981,13 +2028,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
1981 case 2: /* call near abs */ { 2028 case 2: /* call near abs */ {
1982 long int old_eip; 2029 long int old_eip;
1983 old_eip = ctxt->_eip; 2030 old_eip = ctxt->_eip;
1984 ctxt->_eip = ctxt->src.val; 2031 rc = assign_eip_near(ctxt, ctxt->src.val);
2032 if (rc != X86EMUL_CONTINUE)
2033 break;
1985 ctxt->src.val = old_eip; 2034 ctxt->src.val = old_eip;
1986 rc = em_push(ctxt); 2035 rc = em_push(ctxt);
1987 break; 2036 break;
1988 } 2037 }
1989 case 4: /* jmp abs */ 2038 case 4: /* jmp abs */
1990 ctxt->_eip = ctxt->src.val; 2039 rc = assign_eip_near(ctxt, ctxt->src.val);
1991 break; 2040 break;
1992 case 5: /* jmp far */ 2041 case 5: /* jmp far */
1993 rc = em_jmp_far(ctxt); 2042 rc = em_jmp_far(ctxt);
@@ -2022,30 +2071,47 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2022 2071
2023static int em_ret(struct x86_emulate_ctxt *ctxt) 2072static int em_ret(struct x86_emulate_ctxt *ctxt)
2024{ 2073{
2025 ctxt->dst.type = OP_REG; 2074 int rc;
2026 ctxt->dst.addr.reg = &ctxt->_eip; 2075 unsigned long eip;
2027 ctxt->dst.bytes = ctxt->op_bytes; 2076
2028 return em_pop(ctxt); 2077 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2078 if (rc != X86EMUL_CONTINUE)
2079 return rc;
2080
2081 return assign_eip_near(ctxt, eip);
2029} 2082}
2030 2083
2031static int em_ret_far(struct x86_emulate_ctxt *ctxt) 2084static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2032{ 2085{
2033 int rc; 2086 int rc;
2034 unsigned long cs; 2087 unsigned long eip, cs;
2088 u16 old_cs;
2035 int cpl = ctxt->ops->cpl(ctxt); 2089 int cpl = ctxt->ops->cpl(ctxt);
2090 struct desc_struct old_desc, new_desc;
2091 const struct x86_emulate_ops *ops = ctxt->ops;
2036 2092
2037 rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); 2093 if (ctxt->mode == X86EMUL_MODE_PROT64)
2094 ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
2095 VCPU_SREG_CS);
2096
2097 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2038 if (rc != X86EMUL_CONTINUE) 2098 if (rc != X86EMUL_CONTINUE)
2039 return rc; 2099 return rc;
2040 if (ctxt->op_bytes == 4)
2041 ctxt->_eip = (u32)ctxt->_eip;
2042 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); 2100 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2043 if (rc != X86EMUL_CONTINUE) 2101 if (rc != X86EMUL_CONTINUE)
2044 return rc; 2102 return rc;
2045 /* Outer-privilege level return is not implemented */ 2103 /* Outer-privilege level return is not implemented */
2046 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) 2104 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2047 return X86EMUL_UNHANDLEABLE; 2105 return X86EMUL_UNHANDLEABLE;
2048 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); 2106 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
2107 &new_desc);
2108 if (rc != X86EMUL_CONTINUE)
2109 return rc;
2110 rc = assign_eip_far(ctxt, eip, new_desc.l);
2111 if (rc != X86EMUL_CONTINUE) {
2112 WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
2113 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
2114 }
2049 return rc; 2115 return rc;
2050} 2116}
2051 2117
@@ -2306,7 +2372,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2306{ 2372{
2307 const struct x86_emulate_ops *ops = ctxt->ops; 2373 const struct x86_emulate_ops *ops = ctxt->ops;
2308 struct desc_struct cs, ss; 2374 struct desc_struct cs, ss;
2309 u64 msr_data; 2375 u64 msr_data, rcx, rdx;
2310 int usermode; 2376 int usermode;
2311 u16 cs_sel = 0, ss_sel = 0; 2377 u16 cs_sel = 0, ss_sel = 0;
2312 2378
@@ -2322,6 +2388,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2322 else 2388 else
2323 usermode = X86EMUL_MODE_PROT32; 2389 usermode = X86EMUL_MODE_PROT32;
2324 2390
2391 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2392 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2393
2325 cs.dpl = 3; 2394 cs.dpl = 3;
2326 ss.dpl = 3; 2395 ss.dpl = 3;
2327 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); 2396 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
@@ -2339,6 +2408,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2339 ss_sel = cs_sel + 8; 2408 ss_sel = cs_sel + 8;
2340 cs.d = 0; 2409 cs.d = 0;
2341 cs.l = 1; 2410 cs.l = 1;
2411 if (is_noncanonical_address(rcx) ||
2412 is_noncanonical_address(rdx))
2413 return emulate_gp(ctxt, 0);
2342 break; 2414 break;
2343 } 2415 }
2344 cs_sel |= SELECTOR_RPL_MASK; 2416 cs_sel |= SELECTOR_RPL_MASK;
@@ -2347,8 +2419,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2347 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); 2419 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2348 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); 2420 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2349 2421
2350 ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); 2422 ctxt->_eip = rdx;
2351 *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); 2423 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2352 2424
2353 return X86EMUL_CONTINUE; 2425 return X86EMUL_CONTINUE;
2354} 2426}
@@ -2466,19 +2538,24 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2466 * Now load segment descriptors. If fault happens at this stage 2538 * Now load segment descriptors. If fault happens at this stage
2467 * it is handled in a context of new task 2539 * it is handled in a context of new task
2468 */ 2540 */
2469 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true); 2541 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2542 true, NULL);
2470 if (ret != X86EMUL_CONTINUE) 2543 if (ret != X86EMUL_CONTINUE)
2471 return ret; 2544 return ret;
2472 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); 2545 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2546 true, NULL);
2473 if (ret != X86EMUL_CONTINUE) 2547 if (ret != X86EMUL_CONTINUE)
2474 return ret; 2548 return ret;
2475 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); 2549 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2550 true, NULL);
2476 if (ret != X86EMUL_CONTINUE) 2551 if (ret != X86EMUL_CONTINUE)
2477 return ret; 2552 return ret;
2478 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); 2553 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2554 true, NULL);
2479 if (ret != X86EMUL_CONTINUE) 2555 if (ret != X86EMUL_CONTINUE)
2480 return ret; 2556 return ret;
2481 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); 2557 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2558 true, NULL);
2482 if (ret != X86EMUL_CONTINUE) 2559 if (ret != X86EMUL_CONTINUE)
2483 return ret; 2560 return ret;
2484 2561
@@ -2603,25 +2680,32 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2603 * Now load segment descriptors. If fault happenes at this stage 2680 * Now load segment descriptors. If fault happenes at this stage
2604 * it is handled in a context of new task 2681 * it is handled in a context of new task
2605 */ 2682 */
2606 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true); 2683 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
2684 cpl, true, NULL);
2607 if (ret != X86EMUL_CONTINUE) 2685 if (ret != X86EMUL_CONTINUE)
2608 return ret; 2686 return ret;
2609 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); 2687 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2688 true, NULL);
2610 if (ret != X86EMUL_CONTINUE) 2689 if (ret != X86EMUL_CONTINUE)
2611 return ret; 2690 return ret;
2612 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); 2691 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2692 true, NULL);
2613 if (ret != X86EMUL_CONTINUE) 2693 if (ret != X86EMUL_CONTINUE)
2614 return ret; 2694 return ret;
2615 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); 2695 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2696 true, NULL);
2616 if (ret != X86EMUL_CONTINUE) 2697 if (ret != X86EMUL_CONTINUE)
2617 return ret; 2698 return ret;
2618 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); 2699 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2700 true, NULL);
2619 if (ret != X86EMUL_CONTINUE) 2701 if (ret != X86EMUL_CONTINUE)
2620 return ret; 2702 return ret;
2621 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true); 2703 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
2704 true, NULL);
2622 if (ret != X86EMUL_CONTINUE) 2705 if (ret != X86EMUL_CONTINUE)
2623 return ret; 2706 return ret;
2624 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true); 2707 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
2708 true, NULL);
2625 if (ret != X86EMUL_CONTINUE) 2709 if (ret != X86EMUL_CONTINUE)
2626 return ret; 2710 return ret;
2627 2711
@@ -2888,10 +2972,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt)
2888 2972
2889static int em_call(struct x86_emulate_ctxt *ctxt) 2973static int em_call(struct x86_emulate_ctxt *ctxt)
2890{ 2974{
2975 int rc;
2891 long rel = ctxt->src.val; 2976 long rel = ctxt->src.val;
2892 2977
2893 ctxt->src.val = (unsigned long)ctxt->_eip; 2978 ctxt->src.val = (unsigned long)ctxt->_eip;
2894 jmp_rel(ctxt, rel); 2979 rc = jmp_rel(ctxt, rel);
2980 if (rc != X86EMUL_CONTINUE)
2981 return rc;
2895 return em_push(ctxt); 2982 return em_push(ctxt);
2896} 2983}
2897 2984
@@ -2900,34 +2987,50 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
2900 u16 sel, old_cs; 2987 u16 sel, old_cs;
2901 ulong old_eip; 2988 ulong old_eip;
2902 int rc; 2989 int rc;
2990 struct desc_struct old_desc, new_desc;
2991 const struct x86_emulate_ops *ops = ctxt->ops;
2992 int cpl = ctxt->ops->cpl(ctxt);
2903 2993
2904 old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2905 old_eip = ctxt->_eip; 2994 old_eip = ctxt->_eip;
2995 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
2906 2996
2907 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); 2997 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2908 if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS)) 2998 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
2999 &new_desc);
3000 if (rc != X86EMUL_CONTINUE)
2909 return X86EMUL_CONTINUE; 3001 return X86EMUL_CONTINUE;
2910 3002
2911 ctxt->_eip = 0; 3003 rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
2912 memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); 3004 if (rc != X86EMUL_CONTINUE)
3005 goto fail;
2913 3006
2914 ctxt->src.val = old_cs; 3007 ctxt->src.val = old_cs;
2915 rc = em_push(ctxt); 3008 rc = em_push(ctxt);
2916 if (rc != X86EMUL_CONTINUE) 3009 if (rc != X86EMUL_CONTINUE)
2917 return rc; 3010 goto fail;
2918 3011
2919 ctxt->src.val = old_eip; 3012 ctxt->src.val = old_eip;
2920 return em_push(ctxt); 3013 rc = em_push(ctxt);
3014 /* If we failed, we tainted the memory, but the very least we should
3015 restore cs */
3016 if (rc != X86EMUL_CONTINUE)
3017 goto fail;
3018 return rc;
3019fail:
3020 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3021 return rc;
3022
2921} 3023}
2922 3024
2923static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) 3025static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
2924{ 3026{
2925 int rc; 3027 int rc;
3028 unsigned long eip;
2926 3029
2927 ctxt->dst.type = OP_REG; 3030 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2928 ctxt->dst.addr.reg = &ctxt->_eip; 3031 if (rc != X86EMUL_CONTINUE)
2929 ctxt->dst.bytes = ctxt->op_bytes; 3032 return rc;
2930 rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); 3033 rc = assign_eip_near(ctxt, eip);
2931 if (rc != X86EMUL_CONTINUE) 3034 if (rc != X86EMUL_CONTINUE)
2932 return rc; 3035 return rc;
2933 rsp_increment(ctxt, ctxt->src.val); 3036 rsp_increment(ctxt, ctxt->src.val);
@@ -3254,20 +3357,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3254 3357
3255static int em_loop(struct x86_emulate_ctxt *ctxt) 3358static int em_loop(struct x86_emulate_ctxt *ctxt)
3256{ 3359{
3360 int rc = X86EMUL_CONTINUE;
3361
3257 register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); 3362 register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
3258 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && 3363 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3259 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) 3364 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3260 jmp_rel(ctxt, ctxt->src.val); 3365 rc = jmp_rel(ctxt, ctxt->src.val);
3261 3366
3262 return X86EMUL_CONTINUE; 3367 return rc;
3263} 3368}
3264 3369
3265static int em_jcxz(struct x86_emulate_ctxt *ctxt) 3370static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3266{ 3371{
3372 int rc = X86EMUL_CONTINUE;
3373
3267 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) 3374 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3268 jmp_rel(ctxt, ctxt->src.val); 3375 rc = jmp_rel(ctxt, ctxt->src.val);
3269 3376
3270 return X86EMUL_CONTINUE; 3377 return rc;
3271} 3378}
3272 3379
3273static int em_in(struct x86_emulate_ctxt *ctxt) 3380static int em_in(struct x86_emulate_ctxt *ctxt)
@@ -3355,6 +3462,12 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt)
3355 return X86EMUL_CONTINUE; 3462 return X86EMUL_CONTINUE;
3356} 3463}
3357 3464
3465static int em_clflush(struct x86_emulate_ctxt *ctxt)
3466{
3467 /* emulating clflush regardless of cpuid */
3468 return X86EMUL_CONTINUE;
3469}
3470
3358static bool valid_cr(int nr) 3471static bool valid_cr(int nr)
3359{ 3472{
3360 switch (nr) { 3473 switch (nr) {
@@ -3693,6 +3806,16 @@ static const struct opcode group11[] = {
3693 X7(D(Undefined)), 3806 X7(D(Undefined)),
3694}; 3807};
3695 3808
3809static const struct gprefix pfx_0f_ae_7 = {
3810 I(SrcMem | ByteOp, em_clflush), N, N, N,
3811};
3812
3813static const struct group_dual group15 = { {
3814 N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7),
3815}, {
3816 N, N, N, N, N, N, N, N,
3817} };
3818
3696static const struct gprefix pfx_0f_6f_0f_7f = { 3819static const struct gprefix pfx_0f_6f_0f_7f = {
3697 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), 3820 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
3698}; 3821};
@@ -3901,10 +4024,11 @@ static const struct opcode twobyte_table[256] = {
3901 N, I(ImplicitOps | EmulateOnUD, em_syscall), 4024 N, I(ImplicitOps | EmulateOnUD, em_syscall),
3902 II(ImplicitOps | Priv, em_clts, clts), N, 4025 II(ImplicitOps | Priv, em_clts, clts), N,
3903 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, 4026 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
3904 N, D(ImplicitOps | ModRM), N, N, 4027 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
3905 /* 0x10 - 0x1F */ 4028 /* 0x10 - 0x1F */
3906 N, N, N, N, N, N, N, N, 4029 N, N, N, N, N, N, N, N,
3907 D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM), 4030 D(ImplicitOps | ModRM | SrcMem | NoAccess),
4031 N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
3908 /* 0x20 - 0x2F */ 4032 /* 0x20 - 0x2F */
3909 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), 4033 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
3910 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), 4034 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
@@ -3956,7 +4080,7 @@ static const struct opcode twobyte_table[256] = {
3956 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), 4080 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
3957 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), 4081 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
3958 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), 4082 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
3959 D(ModRM), F(DstReg | SrcMem | ModRM, em_imul), 4083 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
3960 /* 0xB0 - 0xB7 */ 4084 /* 0xB0 - 0xB7 */
3961 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), 4085 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
3962 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), 4086 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
@@ -4473,10 +4597,10 @@ done_prefixes:
4473 /* Decode and fetch the destination operand: register or memory. */ 4597 /* Decode and fetch the destination operand: register or memory. */
4474 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); 4598 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
4475 4599
4476done:
4477 if (ctxt->rip_relative) 4600 if (ctxt->rip_relative)
4478 ctxt->memopp->addr.mem.ea += ctxt->_eip; 4601 ctxt->memopp->addr.mem.ea += ctxt->_eip;
4479 4602
4603done:
4480 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; 4604 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
4481} 4605}
4482 4606
@@ -4726,7 +4850,7 @@ special_insn:
4726 break; 4850 break;
4727 case 0x70 ... 0x7f: /* jcc (short) */ 4851 case 0x70 ... 0x7f: /* jcc (short) */
4728 if (test_cc(ctxt->b, ctxt->eflags)) 4852 if (test_cc(ctxt->b, ctxt->eflags))
4729 jmp_rel(ctxt, ctxt->src.val); 4853 rc = jmp_rel(ctxt, ctxt->src.val);
4730 break; 4854 break;
4731 case 0x8d: /* lea r16/r32, m */ 4855 case 0x8d: /* lea r16/r32, m */
4732 ctxt->dst.val = ctxt->src.addr.mem.ea; 4856 ctxt->dst.val = ctxt->src.addr.mem.ea;
@@ -4756,7 +4880,7 @@ special_insn:
4756 break; 4880 break;
4757 case 0xe9: /* jmp rel */ 4881 case 0xe9: /* jmp rel */
4758 case 0xeb: /* jmp rel short */ 4882 case 0xeb: /* jmp rel short */
4759 jmp_rel(ctxt, ctxt->src.val); 4883 rc = jmp_rel(ctxt, ctxt->src.val);
4760 ctxt->dst.type = OP_NONE; /* Disable writeback. */ 4884 ctxt->dst.type = OP_NONE; /* Disable writeback. */
4761 break; 4885 break;
4762 case 0xf4: /* hlt */ 4886 case 0xf4: /* hlt */
@@ -4881,13 +5005,11 @@ twobyte_insn:
4881 break; 5005 break;
4882 case 0x80 ... 0x8f: /* jnz rel, etc*/ 5006 case 0x80 ... 0x8f: /* jnz rel, etc*/
4883 if (test_cc(ctxt->b, ctxt->eflags)) 5007 if (test_cc(ctxt->b, ctxt->eflags))
4884 jmp_rel(ctxt, ctxt->src.val); 5008 rc = jmp_rel(ctxt, ctxt->src.val);
4885 break; 5009 break;
4886 case 0x90 ... 0x9f: /* setcc r/m8 */ 5010 case 0x90 ... 0x9f: /* setcc r/m8 */
4887 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); 5011 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
4888 break; 5012 break;
4889 case 0xae: /* clflush */
4890 break;
4891 case 0xb6 ... 0xb7: /* movzx */ 5013 case 0xb6 ... 0xb7: /* movzx */
4892 ctxt->dst.bytes = ctxt->op_bytes; 5014 ctxt->dst.bytes = ctxt->op_bytes;
4893 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val 5015 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 518d86471b76..298781d4cfb4 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
262 return; 262 return;
263 263
264 timer = &pit->pit_state.timer; 264 timer = &pit->pit_state.timer;
265 mutex_lock(&pit->pit_state.lock);
265 if (hrtimer_cancel(timer)) 266 if (hrtimer_cancel(timer))
266 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 267 hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
268 mutex_unlock(&pit->pit_state.lock);
267} 269}
268 270
269static void destroy_pit_timer(struct kvm_pit *pit) 271static void destroy_pit_timer(struct kvm_pit *pit)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 806d58e3c320..fd49c867b25a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -298,7 +298,7 @@ retry_walk:
298 } 298 }
299#endif 299#endif
300 walker->max_level = walker->level; 300 walker->max_level = walker->level;
301 ASSERT(!is_long_mode(vcpu) && is_pae(vcpu)); 301 ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
302 302
303 accessed_dirty = PT_GUEST_ACCESSED_MASK; 303 accessed_dirty = PT_GUEST_ACCESSED_MASK;
304 pt_access = pte_access = ACC_ALL; 304 pt_access = pte_access = ACC_ALL;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 65510f624dfe..7527cefc5a43 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3251,7 +3251,7 @@ static int wrmsr_interception(struct vcpu_svm *svm)
3251 msr.host_initiated = false; 3251 msr.host_initiated = false;
3252 3252
3253 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 3253 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3254 if (svm_set_msr(&svm->vcpu, &msr)) { 3254 if (kvm_set_msr(&svm->vcpu, &msr)) {
3255 trace_kvm_msr_write_ex(ecx, data); 3255 trace_kvm_msr_write_ex(ecx, data);
3256 kvm_inject_gp(&svm->vcpu, 0); 3256 kvm_inject_gp(&svm->vcpu, 0);
3257 } else { 3257 } else {
@@ -3551,9 +3551,9 @@ static int handle_exit(struct kvm_vcpu *vcpu)
3551 3551
3552 if (exit_code >= ARRAY_SIZE(svm_exit_handlers) 3552 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
3553 || !svm_exit_handlers[exit_code]) { 3553 || !svm_exit_handlers[exit_code]) {
3554 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 3554 WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
3555 kvm_run->hw.hardware_exit_reason = exit_code; 3555 kvm_queue_exception(vcpu, UD_VECTOR);
3556 return 0; 3556 return 1;
3557 } 3557 }
3558 3558
3559 return svm_exit_handlers[exit_code](svm); 3559 return svm_exit_handlers[exit_code](svm);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0acac81f198b..a8b76c4c95e2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2659,12 +2659,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2659 default: 2659 default:
2660 msr = find_msr_entry(vmx, msr_index); 2660 msr = find_msr_entry(vmx, msr_index);
2661 if (msr) { 2661 if (msr) {
2662 u64 old_msr_data = msr->data;
2662 msr->data = data; 2663 msr->data = data;
2663 if (msr - vmx->guest_msrs < vmx->save_nmsrs) { 2664 if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
2664 preempt_disable(); 2665 preempt_disable();
2665 kvm_set_shared_msr(msr->index, msr->data, 2666 ret = kvm_set_shared_msr(msr->index, msr->data,
2666 msr->mask); 2667 msr->mask);
2667 preempt_enable(); 2668 preempt_enable();
2669 if (ret)
2670 msr->data = old_msr_data;
2668 } 2671 }
2669 break; 2672 break;
2670 } 2673 }
@@ -5291,7 +5294,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
5291 msr.data = data; 5294 msr.data = data;
5292 msr.index = ecx; 5295 msr.index = ecx;
5293 msr.host_initiated = false; 5296 msr.host_initiated = false;
5294 if (vmx_set_msr(vcpu, &msr) != 0) { 5297 if (kvm_set_msr(vcpu, &msr) != 0) {
5295 trace_kvm_msr_write_ex(ecx, data); 5298 trace_kvm_msr_write_ex(ecx, data);
5296 kvm_inject_gp(vcpu, 0); 5299 kvm_inject_gp(vcpu, 0);
5297 return 1; 5300 return 1;
@@ -6743,6 +6746,12 @@ static int handle_invept(struct kvm_vcpu *vcpu)
6743 return 1; 6746 return 1;
6744} 6747}
6745 6748
6749static int handle_invvpid(struct kvm_vcpu *vcpu)
6750{
6751 kvm_queue_exception(vcpu, UD_VECTOR);
6752 return 1;
6753}
6754
6746/* 6755/*
6747 * The exit handlers return 1 if the exit was handled fully and guest execution 6756 * The exit handlers return 1 if the exit was handled fully and guest execution
6748 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 6757 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -6788,6 +6797,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
6788 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, 6797 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
6789 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, 6798 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
6790 [EXIT_REASON_INVEPT] = handle_invept, 6799 [EXIT_REASON_INVEPT] = handle_invept,
6800 [EXIT_REASON_INVVPID] = handle_invvpid,
6791}; 6801};
6792 6802
6793static const int kvm_vmx_max_exit_handlers = 6803static const int kvm_vmx_max_exit_handlers =
@@ -7023,7 +7033,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
7023 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: 7033 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
7024 case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: 7034 case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
7025 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: 7035 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
7026 case EXIT_REASON_INVEPT: 7036 case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
7027 /* 7037 /*
7028 * VMX instructions trap unconditionally. This allows L1 to 7038 * VMX instructions trap unconditionally. This allows L1 to
7029 * emulate them for its L2 guest, i.e., allows 3-level nesting! 7039 * emulate them for its L2 guest, i.e., allows 3-level nesting!
@@ -7164,10 +7174,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
7164 && kvm_vmx_exit_handlers[exit_reason]) 7174 && kvm_vmx_exit_handlers[exit_reason])
7165 return kvm_vmx_exit_handlers[exit_reason](vcpu); 7175 return kvm_vmx_exit_handlers[exit_reason](vcpu);
7166 else { 7176 else {
7167 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; 7177 WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
7168 vcpu->run->hw.hardware_exit_reason = exit_reason; 7178 kvm_queue_exception(vcpu, UD_VECTOR);
7179 return 1;
7169 } 7180 }
7170 return 0;
7171} 7181}
7172 7182
7173static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) 7183static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 34c8f94331f8..0033df32a745 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -229,20 +229,25 @@ static void kvm_shared_msr_cpu_online(void)
229 shared_msr_update(i, shared_msrs_global.msrs[i]); 229 shared_msr_update(i, shared_msrs_global.msrs[i]);
230} 230}
231 231
232void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) 232int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
233{ 233{
234 unsigned int cpu = smp_processor_id(); 234 unsigned int cpu = smp_processor_id();
235 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); 235 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
236 int err;
236 237
237 if (((value ^ smsr->values[slot].curr) & mask) == 0) 238 if (((value ^ smsr->values[slot].curr) & mask) == 0)
238 return; 239 return 0;
239 smsr->values[slot].curr = value; 240 smsr->values[slot].curr = value;
240 wrmsrl(shared_msrs_global.msrs[slot], value); 241 err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
242 if (err)
243 return 1;
244
241 if (!smsr->registered) { 245 if (!smsr->registered) {
242 smsr->urn.on_user_return = kvm_on_user_return; 246 smsr->urn.on_user_return = kvm_on_user_return;
243 user_return_notifier_register(&smsr->urn); 247 user_return_notifier_register(&smsr->urn);
244 smsr->registered = true; 248 smsr->registered = true;
245 } 249 }
250 return 0;
246} 251}
247EXPORT_SYMBOL_GPL(kvm_set_shared_msr); 252EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
248 253
@@ -987,7 +992,6 @@ void kvm_enable_efer_bits(u64 mask)
987} 992}
988EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); 993EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
989 994
990
991/* 995/*
992 * Writes msr value into into the appropriate "register". 996 * Writes msr value into into the appropriate "register".
993 * Returns 0 on success, non-0 otherwise. 997 * Returns 0 on success, non-0 otherwise.
@@ -995,8 +999,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
995 */ 999 */
996int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) 1000int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
997{ 1001{
1002 switch (msr->index) {
1003 case MSR_FS_BASE:
1004 case MSR_GS_BASE:
1005 case MSR_KERNEL_GS_BASE:
1006 case MSR_CSTAR:
1007 case MSR_LSTAR:
1008 if (is_noncanonical_address(msr->data))
1009 return 1;
1010 break;
1011 case MSR_IA32_SYSENTER_EIP:
1012 case MSR_IA32_SYSENTER_ESP:
1013 /*
1014 * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
1015 * non-canonical address is written on Intel but not on
1016 * AMD (which ignores the top 32-bits, because it does
1017 * not implement 64-bit SYSENTER).
1018 *
1019 * 64-bit code should hence be able to write a non-canonical
1020 * value on AMD. Making the address canonical ensures that
1021 * vmentry does not fail on Intel after writing a non-canonical
1022 * value, and that something deterministic happens if the guest
1023 * invokes 64-bit SYSENTER.
1024 */
1025 msr->data = get_canonical(msr->data);
1026 }
998 return kvm_x86_ops->set_msr(vcpu, msr); 1027 return kvm_x86_ops->set_msr(vcpu, msr);
999} 1028}
1029EXPORT_SYMBOL_GPL(kvm_set_msr);
1000 1030
1001/* 1031/*
1002 * Adapt set_msr() to msr_io()'s calling convention 1032 * Adapt set_msr() to msr_io()'s calling convention
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index ae242a7c11c7..36de293caf25 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -409,7 +409,7 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr)
409 psize = page_level_size(level); 409 psize = page_level_size(level);
410 pmask = page_level_mask(level); 410 pmask = page_level_mask(level);
411 offset = virt_addr & ~pmask; 411 offset = virt_addr & ~pmask;
412 phys_addr = pte_pfn(*pte) << PAGE_SHIFT; 412 phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
413 return (phys_addr | offset); 413 return (phys_addr | offset);
414} 414}
415EXPORT_SYMBOL_GPL(slow_virt_to_phys); 415EXPORT_SYMBOL_GPL(slow_virt_to_phys);
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index f15103dff4b4..d143d216d52b 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -40,20 +40,40 @@ void __init efi_bgrt_init(void)
40 if (ACPI_FAILURE(status)) 40 if (ACPI_FAILURE(status))
41 return; 41 return;
42 42
43 if (bgrt_tab->header.length < sizeof(*bgrt_tab)) 43 if (bgrt_tab->header.length < sizeof(*bgrt_tab)) {
44 pr_err("Ignoring BGRT: invalid length %u (expected %zu)\n",
45 bgrt_tab->header.length, sizeof(*bgrt_tab));
44 return; 46 return;
45 if (bgrt_tab->version != 1 || bgrt_tab->status != 1) 47 }
48 if (bgrt_tab->version != 1) {
49 pr_err("Ignoring BGRT: invalid version %u (expected 1)\n",
50 bgrt_tab->version);
51 return;
52 }
53 if (bgrt_tab->status != 1) {
54 pr_err("Ignoring BGRT: invalid status %u (expected 1)\n",
55 bgrt_tab->status);
56 return;
57 }
58 if (bgrt_tab->image_type != 0) {
59 pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n",
60 bgrt_tab->image_type);
46 return; 61 return;
47 if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address) 62 }
63 if (!bgrt_tab->image_address) {
64 pr_err("Ignoring BGRT: null image address\n");
48 return; 65 return;
66 }
49 67
50 image = efi_lookup_mapped_addr(bgrt_tab->image_address); 68 image = efi_lookup_mapped_addr(bgrt_tab->image_address);
51 if (!image) { 69 if (!image) {
52 image = early_memremap(bgrt_tab->image_address, 70 image = early_memremap(bgrt_tab->image_address,
53 sizeof(bmp_header)); 71 sizeof(bmp_header));
54 ioremapped = true; 72 ioremapped = true;
55 if (!image) 73 if (!image) {
74 pr_err("Ignoring BGRT: failed to map image header memory\n");
56 return; 75 return;
76 }
57 } 77 }
58 78
59 memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); 79 memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
@@ -61,14 +81,18 @@ void __init efi_bgrt_init(void)
61 early_iounmap(image, sizeof(bmp_header)); 81 early_iounmap(image, sizeof(bmp_header));
62 bgrt_image_size = bmp_header.size; 82 bgrt_image_size = bmp_header.size;
63 83
64 bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); 84 bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN);
65 if (!bgrt_image) 85 if (!bgrt_image) {
86 pr_err("Ignoring BGRT: failed to allocate memory for image (wanted %zu bytes)\n",
87 bgrt_image_size);
66 return; 88 return;
89 }
67 90
68 if (ioremapped) { 91 if (ioremapped) {
69 image = early_memremap(bgrt_tab->image_address, 92 image = early_memremap(bgrt_tab->image_address,
70 bmp_header.size); 93 bmp_header.size);
71 if (!image) { 94 if (!image) {
95 pr_err("Ignoring BGRT: failed to map image memory\n");
72 kfree(bgrt_image); 96 kfree(bgrt_image);
73 bgrt_image = NULL; 97 bgrt_image = NULL;
74 return; 98 return;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 850da94fef30..dbc8627a5cdf 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -70,17 +70,7 @@ static efi_config_table_type_t arch_tables[] __initdata = {
70 70
71u64 efi_setup; /* efi setup_data physical address */ 71u64 efi_setup; /* efi setup_data physical address */
72 72
73static bool disable_runtime __initdata = false; 73static int add_efi_memmap __initdata;
74static int __init setup_noefi(char *arg)
75{
76 disable_runtime = true;
77 return 0;
78}
79early_param("noefi", setup_noefi);
80
81int add_efi_memmap;
82EXPORT_SYMBOL(add_efi_memmap);
83
84static int __init setup_add_efi_memmap(char *arg) 74static int __init setup_add_efi_memmap(char *arg)
85{ 75{
86 add_efi_memmap = 1; 76 add_efi_memmap = 1;
@@ -96,7 +86,7 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
96{ 86{
97 efi_status_t status; 87 efi_status_t status;
98 88
99 efi_call_phys_prelog(); 89 efi_call_phys_prolog();
100 status = efi_call_phys(efi_phys.set_virtual_address_map, 90 status = efi_call_phys(efi_phys.set_virtual_address_map,
101 memory_map_size, descriptor_size, 91 memory_map_size, descriptor_size,
102 descriptor_version, virtual_map); 92 descriptor_version, virtual_map);
@@ -210,9 +200,12 @@ static void __init print_efi_memmap(void)
210 for (p = memmap.map, i = 0; 200 for (p = memmap.map, i = 0;
211 p < memmap.map_end; 201 p < memmap.map_end;
212 p += memmap.desc_size, i++) { 202 p += memmap.desc_size, i++) {
203 char buf[64];
204
213 md = p; 205 md = p;
214 pr_info("mem%02u: type=%u, attr=0x%llx, range=[0x%016llx-0x%016llx) (%lluMB)\n", 206 pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n",
215 i, md->type, md->attribute, md->phys_addr, 207 i, efi_md_typeattr_format(buf, sizeof(buf), md),
208 md->phys_addr,
216 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), 209 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
217 (md->num_pages >> (20 - EFI_PAGE_SHIFT))); 210 (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
218 } 211 }
@@ -344,9 +337,9 @@ static int __init efi_runtime_init32(void)
344 } 337 }
345 338
346 /* 339 /*
347 * We will only need *early* access to the following two 340 * We will only need *early* access to the SetVirtualAddressMap
348 * EFI runtime services before set_virtual_address_map 341 * EFI runtime service. All other runtime services will be called
349 * is invoked. 342 * via the virtual mapping.
350 */ 343 */
351 efi_phys.set_virtual_address_map = 344 efi_phys.set_virtual_address_map =
352 (efi_set_virtual_address_map_t *) 345 (efi_set_virtual_address_map_t *)
@@ -368,9 +361,9 @@ static int __init efi_runtime_init64(void)
368 } 361 }
369 362
370 /* 363 /*
371 * We will only need *early* access to the following two 364 * We will only need *early* access to the SetVirtualAddressMap
372 * EFI runtime services before set_virtual_address_map 365 * EFI runtime service. All other runtime services will be called
373 * is invoked. 366 * via the virtual mapping.
374 */ 367 */
375 efi_phys.set_virtual_address_map = 368 efi_phys.set_virtual_address_map =
376 (efi_set_virtual_address_map_t *) 369 (efi_set_virtual_address_map_t *)
@@ -492,7 +485,7 @@ void __init efi_init(void)
492 if (!efi_runtime_supported()) 485 if (!efi_runtime_supported())
493 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); 486 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
494 else { 487 else {
495 if (disable_runtime || efi_runtime_init()) 488 if (efi_runtime_disabled() || efi_runtime_init())
496 return; 489 return;
497 } 490 }
498 if (efi_memmap_init()) 491 if (efi_memmap_init())
@@ -537,7 +530,7 @@ void __init runtime_code_page_mkexec(void)
537 } 530 }
538} 531}
539 532
540void efi_memory_uc(u64 addr, unsigned long size) 533void __init efi_memory_uc(u64 addr, unsigned long size)
541{ 534{
542 unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; 535 unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
543 u64 npages; 536 u64 npages;
@@ -732,6 +725,7 @@ static void __init kexec_enter_virtual_mode(void)
732 */ 725 */
733 if (!efi_is_native()) { 726 if (!efi_is_native()) {
734 efi_unmap_memmap(); 727 efi_unmap_memmap();
728 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
735 return; 729 return;
736 } 730 }
737 731
@@ -805,6 +799,7 @@ static void __init __efi_enter_virtual_mode(void)
805 new_memmap = efi_map_regions(&count, &pg_shift); 799 new_memmap = efi_map_regions(&count, &pg_shift);
806 if (!new_memmap) { 800 if (!new_memmap) {
807 pr_err("Error reallocating memory, EFI runtime non-functional!\n"); 801 pr_err("Error reallocating memory, EFI runtime non-functional!\n");
802 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
808 return; 803 return;
809 } 804 }
810 805
@@ -812,8 +807,10 @@ static void __init __efi_enter_virtual_mode(void)
812 807
813 BUG_ON(!efi.systab); 808 BUG_ON(!efi.systab);
814 809
815 if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) 810 if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) {
811 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
816 return; 812 return;
813 }
817 814
818 efi_sync_low_kernel_mappings(); 815 efi_sync_low_kernel_mappings();
819 efi_dump_pagetable(); 816 efi_dump_pagetable();
@@ -938,14 +935,11 @@ u64 efi_mem_attributes(unsigned long phys_addr)
938 return 0; 935 return 0;
939} 936}
940 937
941static int __init parse_efi_cmdline(char *str) 938static int __init arch_parse_efi_cmdline(char *str)
942{ 939{
943 if (*str == '=') 940 if (parse_option_str(str, "old_map"))
944 str++;
945
946 if (!strncmp(str, "old_map", 7))
947 set_bit(EFI_OLD_MEMMAP, &efi.flags); 941 set_bit(EFI_OLD_MEMMAP, &efi.flags);
948 942
949 return 0; 943 return 0;
950} 944}
951early_param("efi", parse_efi_cmdline); 945early_param("efi", arch_parse_efi_cmdline);
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 9ee3491e31fb..40e7cda52936 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -33,7 +33,7 @@
33 33
34/* 34/*
35 * To make EFI call EFI runtime service in physical addressing mode we need 35 * To make EFI call EFI runtime service in physical addressing mode we need
36 * prelog/epilog before/after the invocation to disable interrupt, to 36 * prolog/epilog before/after the invocation to disable interrupt, to
37 * claim EFI runtime service handler exclusively and to duplicate a memory in 37 * claim EFI runtime service handler exclusively and to duplicate a memory in
38 * low memory space say 0 - 3G. 38 * low memory space say 0 - 3G.
39 */ 39 */
@@ -41,11 +41,13 @@ static unsigned long efi_rt_eflags;
41 41
42void efi_sync_low_kernel_mappings(void) {} 42void efi_sync_low_kernel_mappings(void) {}
43void __init efi_dump_pagetable(void) {} 43void __init efi_dump_pagetable(void) {}
44int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) 44int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
45{ 45{
46 return 0; 46 return 0;
47} 47}
48void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {} 48void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
49{
50}
49 51
50void __init efi_map_region(efi_memory_desc_t *md) 52void __init efi_map_region(efi_memory_desc_t *md)
51{ 53{
@@ -55,7 +57,7 @@ void __init efi_map_region(efi_memory_desc_t *md)
55void __init efi_map_region_fixed(efi_memory_desc_t *md) {} 57void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
56void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} 58void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
57 59
58void efi_call_phys_prelog(void) 60void __init efi_call_phys_prolog(void)
59{ 61{
60 struct desc_ptr gdt_descr; 62 struct desc_ptr gdt_descr;
61 63
@@ -69,7 +71,7 @@ void efi_call_phys_prelog(void)
69 load_gdt(&gdt_descr); 71 load_gdt(&gdt_descr);
70} 72}
71 73
72void efi_call_phys_epilog(void) 74void __init efi_call_phys_epilog(void)
73{ 75{
74 struct desc_ptr gdt_descr; 76 struct desc_ptr gdt_descr;
75 77
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 290d397e1dd9..35aecb6042fb 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -79,7 +79,7 @@ static void __init early_code_mapping_set_exec(int executable)
79 } 79 }
80} 80}
81 81
82void __init efi_call_phys_prelog(void) 82void __init efi_call_phys_prolog(void)
83{ 83{
84 unsigned long vaddress; 84 unsigned long vaddress;
85 int pgd; 85 int pgd;
@@ -139,7 +139,7 @@ void efi_sync_low_kernel_mappings(void)
139 sizeof(pgd_t) * num_pgds); 139 sizeof(pgd_t) * num_pgds);
140} 140}
141 141
142int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) 142int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
143{ 143{
144 unsigned long text; 144 unsigned long text;
145 struct page *page; 145 struct page *page;
@@ -192,7 +192,7 @@ int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
192 return 0; 192 return 0;
193} 193}
194 194
195void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) 195void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
196{ 196{
197 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); 197 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
198 198
diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S
index fbe66e626c09..040192b50d02 100644
--- a/arch/x86/platform/efi/efi_stub_32.S
+++ b/arch/x86/platform/efi/efi_stub_32.S
@@ -27,13 +27,13 @@ ENTRY(efi_call_phys)
27 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found 27 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
28 * the values of these registers are the same. And, the corresponding 28 * the values of these registers are the same. And, the corresponding
29 * GDT entries are identical. So I will do nothing about segment reg 29 * GDT entries are identical. So I will do nothing about segment reg
30 * and GDT, but change GDT base register in prelog and epilog. 30 * and GDT, but change GDT base register in prolog and epilog.
31 */ 31 */
32 32
33 /* 33 /*
34 * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET. 34 * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
35 * But to make it smoothly switch from virtual mode to flat mode. 35 * But to make it smoothly switch from virtual mode to flat mode.
36 * The mapping of lower virtual memory has been created in prelog and 36 * The mapping of lower virtual memory has been created in prolog and
37 * epilog. 37 * epilog.
38 */ 38 */
39 movl $1f, %edx 39 movl $1f, %edx
diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
index 46aa25c8ce06..3c1c3866d82b 100644
--- a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
+++ b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
@@ -10,10 +10,9 @@
10 */ 10 */
11 11
12 12
13/* __attribute__((weak)) makes these declarations overridable */
14/* For every CPU addition a new get_<cpuname>_ops interface needs 13/* For every CPU addition a new get_<cpuname>_ops interface needs
15 * to be added. 14 * to be added.
16 */ 15 */
17extern void *get_penwell_ops(void) __attribute__((weak)); 16extern void *get_penwell_ops(void);
18extern void *get_cloverview_ops(void) __attribute__((weak)); 17extern void *get_cloverview_ops(void);
19extern void *get_tangier_ops(void) __attribute__((weak)); 18extern void *get_tangier_ops(void);
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
index 3c53a90fdb18..c14ad34776c4 100644
--- a/arch/x86/platform/intel-mid/sfi.c
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -106,6 +106,7 @@ int __init sfi_parse_mtmr(struct sfi_table_header *table)
106 mp_irq.dstapic = MP_APIC_ALL; 106 mp_irq.dstapic = MP_APIC_ALL;
107 mp_irq.dstirq = pentry->irq; 107 mp_irq.dstirq = pentry->irq;
108 mp_save_irq(&mp_irq); 108 mp_save_irq(&mp_irq);
109 mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC);
109 } 110 }
110 111
111 return 0; 112 return 0;
@@ -176,6 +177,7 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
176 mp_irq.dstapic = MP_APIC_ALL; 177 mp_irq.dstapic = MP_APIC_ALL;
177 mp_irq.dstirq = pentry->irq; 178 mp_irq.dstirq = pentry->irq;
178 mp_save_irq(&mp_irq); 179 mp_save_irq(&mp_irq);
180 mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC);
179 } 181 }
180 return 0; 182 return 0;
181} 183}
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
index 54f8102ccde5..e59eef20647b 100644
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h
@@ -47,8 +47,6 @@ struct user_desc;
47 47
48#ifdef CONFIG_X86_32 48#ifdef CONFIG_X86_32
49 49
50#define HOST_AUDIT_ARCH AUDIT_ARCH_I386
51
52extern int ptrace_get_thread_area(struct task_struct *child, int idx, 50extern int ptrace_get_thread_area(struct task_struct *child, int idx,
53 struct user_desc __user *user_desc); 51 struct user_desc __user *user_desc);
54 52
@@ -57,8 +55,6 @@ extern int ptrace_set_thread_area(struct task_struct *child, int idx,
57 55
58#else 56#else
59 57
60#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
61
62#define PT_REGS_R8(r) UPT_R8(&(r)->regs) 58#define PT_REGS_R8(r) UPT_R8(&(r)->regs)
63#define PT_REGS_R9(r) UPT_R9(&(r)->regs) 59#define PT_REGS_R9(r) UPT_R9(&(r)->regs)
64#define PT_REGS_R10(r) UPT_R10(&(r)->regs) 60#define PT_REGS_R10(r) UPT_R10(&(r)->regs)
diff --git a/arch/x86/um/asm/syscall.h b/arch/x86/um/asm/syscall.h
new file mode 100644
index 000000000000..9fe77b7b5a0e
--- /dev/null
+++ b/arch/x86/um/asm/syscall.h
@@ -0,0 +1,15 @@
1#ifndef __UM_ASM_SYSCALL_H
2#define __UM_ASM_SYSCALL_H
3
4#include <uapi/linux/audit.h>
5
6static inline int syscall_get_arch(void)
7{
8#ifdef CONFIG_X86_32
9 return AUDIT_ARCH_I386;
10#else
11 return AUDIT_ARCH_X86_64;
12#endif
13}
14
15#endif /* __UM_ASM_SYSCALL_H */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 1a3f0445432a..fac5e4f9607c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1636,9 +1636,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
1636 xen_raw_console_write("mapping kernel into physical memory\n"); 1636 xen_raw_console_write("mapping kernel into physical memory\n");
1637 xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); 1637 xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages);
1638 1638
1639 /* Allocate and initialize top and mid mfn levels for p2m structure */
1640 xen_build_mfn_list_list();
1641
1642 /* keep using Xen gdt for now; no urgent need to change it */ 1639 /* keep using Xen gdt for now; no urgent need to change it */
1643 1640
1644#ifdef CONFIG_X86_32 1641#ifdef CONFIG_X86_32
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f62af7647ec9..a8a1a3d08d4d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1217,10 +1217,13 @@ static void __init xen_pagetable_p2m_copy(void)
1217static void __init xen_pagetable_init(void) 1217static void __init xen_pagetable_init(void)
1218{ 1218{
1219 paging_init(); 1219 paging_init();
1220 xen_setup_shared_info();
1221#ifdef CONFIG_X86_64 1220#ifdef CONFIG_X86_64
1222 xen_pagetable_p2m_copy(); 1221 xen_pagetable_p2m_copy();
1223#endif 1222#endif
1223 /* Allocate and initialize top and mid mfn levels for p2m structure */
1224 xen_build_mfn_list_list();
1225
1226 xen_setup_shared_info();
1224 xen_post_allocator_init(); 1227 xen_post_allocator_init();
1225} 1228}
1226static void xen_write_cr2(unsigned long cr2) 1229static void xen_write_cr2(unsigned long cr2)
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 9f5983b01ed9..b456b048eca9 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -163,6 +163,7 @@
163#include <linux/hash.h> 163#include <linux/hash.h>
164#include <linux/sched.h> 164#include <linux/sched.h>
165#include <linux/seq_file.h> 165#include <linux/seq_file.h>
166#include <linux/bootmem.h>
166 167
167#include <asm/cache.h> 168#include <asm/cache.h>
168#include <asm/setup.h> 169#include <asm/setup.h>
@@ -181,21 +182,20 @@ static void __init m2p_override_init(void);
181 182
182unsigned long xen_max_p2m_pfn __read_mostly; 183unsigned long xen_max_p2m_pfn __read_mostly;
183 184
185static unsigned long *p2m_mid_missing_mfn;
186static unsigned long *p2m_top_mfn;
187static unsigned long **p2m_top_mfn_p;
188
184/* Placeholders for holes in the address space */ 189/* Placeholders for holes in the address space */
185static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); 190static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
186static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); 191static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
187static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE);
188 192
189static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); 193static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
190static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
191static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
192 194
193static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); 195static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
194static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE); 196static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE);
195static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE);
196 197
197RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 198RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
198RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
199 199
200/* For each I/O range remapped we may lose up to two leaf pages for the boundary 200/* For each I/O range remapped we may lose up to two leaf pages for the boundary
201 * violations and three mid pages to cover up to 3GB. With 201 * violations and three mid pages to cover up to 3GB. With
@@ -272,11 +272,11 @@ static void p2m_init(unsigned long *p2m)
272 * Build the parallel p2m_top_mfn and p2m_mid_mfn structures 272 * Build the parallel p2m_top_mfn and p2m_mid_mfn structures
273 * 273 *
274 * This is called both at boot time, and after resuming from suspend: 274 * This is called both at boot time, and after resuming from suspend:
275 * - At boot time we're called very early, and must use extend_brk() 275 * - At boot time we're called rather early, and must use alloc_bootmem*()
276 * to allocate memory. 276 * to allocate memory.
277 * 277 *
278 * - After resume we're called from within stop_machine, but the mfn 278 * - After resume we're called from within stop_machine, but the mfn
279 * tree should alreay be completely allocated. 279 * tree should already be completely allocated.
280 */ 280 */
281void __ref xen_build_mfn_list_list(void) 281void __ref xen_build_mfn_list_list(void)
282{ 282{
@@ -287,20 +287,17 @@ void __ref xen_build_mfn_list_list(void)
287 287
288 /* Pre-initialize p2m_top_mfn to be completely missing */ 288 /* Pre-initialize p2m_top_mfn to be completely missing */
289 if (p2m_top_mfn == NULL) { 289 if (p2m_top_mfn == NULL) {
290 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); 290 p2m_mid_missing_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
291 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); 291 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
292 p2m_mid_identity_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
293 p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity);
294 292
295 p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); 293 p2m_top_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
296 p2m_top_mfn_p_init(p2m_top_mfn_p); 294 p2m_top_mfn_p_init(p2m_top_mfn_p);
297 295
298 p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); 296 p2m_top_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
299 p2m_top_mfn_init(p2m_top_mfn); 297 p2m_top_mfn_init(p2m_top_mfn);
300 } else { 298 } else {
301 /* Reinitialise, mfn's all change after migration */ 299 /* Reinitialise, mfn's all change after migration */
302 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); 300 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
303 p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity);
304 } 301 }
305 302
306 for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { 303 for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
@@ -328,10 +325,9 @@ void __ref xen_build_mfn_list_list(void)
328 /* 325 /*
329 * XXX boot-time only! We should never find 326 * XXX boot-time only! We should never find
330 * missing parts of the mfn tree after 327 * missing parts of the mfn tree after
331 * runtime. extend_brk() will BUG if we call 328 * runtime.
332 * it too late.
333 */ 329 */
334 mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); 330 mid_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
335 p2m_mid_mfn_init(mid_mfn_p, p2m_missing); 331 p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
336 332
337 p2m_top_mfn_p[topidx] = mid_mfn_p; 333 p2m_top_mfn_p[topidx] = mid_mfn_p;
@@ -415,7 +411,6 @@ void __init xen_build_dynamic_phys_to_machine(void)
415 m2p_override_init(); 411 m2p_override_init();
416} 412}
417#ifdef CONFIG_X86_64 413#ifdef CONFIG_X86_64
418#include <linux/bootmem.h>
419unsigned long __init xen_revector_p2m_tree(void) 414unsigned long __init xen_revector_p2m_tree(void)
420{ 415{
421 unsigned long va_start; 416 unsigned long va_start;
@@ -477,7 +472,6 @@ unsigned long __init xen_revector_p2m_tree(void)
477 472
478 copy_page(new, mid_p); 473 copy_page(new, mid_p);
479 p2m_top[topidx][mididx] = &mfn_list[pfn_free]; 474 p2m_top[topidx][mididx] = &mfn_list[pfn_free];
480 p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]);
481 475
482 pfn_free += P2M_PER_PAGE; 476 pfn_free += P2M_PER_PAGE;
483 477
@@ -538,12 +532,13 @@ static bool alloc_p2m(unsigned long pfn)
538 unsigned topidx, mididx; 532 unsigned topidx, mididx;
539 unsigned long ***top_p, **mid; 533 unsigned long ***top_p, **mid;
540 unsigned long *top_mfn_p, *mid_mfn; 534 unsigned long *top_mfn_p, *mid_mfn;
535 unsigned long *p2m_orig;
541 536
542 topidx = p2m_top_index(pfn); 537 topidx = p2m_top_index(pfn);
543 mididx = p2m_mid_index(pfn); 538 mididx = p2m_mid_index(pfn);
544 539
545 top_p = &p2m_top[topidx]; 540 top_p = &p2m_top[topidx];
546 mid = *top_p; 541 mid = ACCESS_ONCE(*top_p);
547 542
548 if (mid == p2m_mid_missing) { 543 if (mid == p2m_mid_missing) {
549 /* Mid level is missing, allocate a new one */ 544 /* Mid level is missing, allocate a new one */
@@ -558,7 +553,7 @@ static bool alloc_p2m(unsigned long pfn)
558 } 553 }
559 554
560 top_mfn_p = &p2m_top_mfn[topidx]; 555 top_mfn_p = &p2m_top_mfn[topidx];
561 mid_mfn = p2m_top_mfn_p[topidx]; 556 mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
562 557
563 BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); 558 BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
564 559
@@ -566,6 +561,7 @@ static bool alloc_p2m(unsigned long pfn)
566 /* Separately check the mid mfn level */ 561 /* Separately check the mid mfn level */
567 unsigned long missing_mfn; 562 unsigned long missing_mfn;
568 unsigned long mid_mfn_mfn; 563 unsigned long mid_mfn_mfn;
564 unsigned long old_mfn;
569 565
570 mid_mfn = alloc_p2m_page(); 566 mid_mfn = alloc_p2m_page();
571 if (!mid_mfn) 567 if (!mid_mfn)
@@ -575,17 +571,19 @@ static bool alloc_p2m(unsigned long pfn)
575 571
576 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); 572 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
577 mid_mfn_mfn = virt_to_mfn(mid_mfn); 573 mid_mfn_mfn = virt_to_mfn(mid_mfn);
578 if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) 574 old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn);
575 if (old_mfn != missing_mfn) {
579 free_p2m_page(mid_mfn); 576 free_p2m_page(mid_mfn);
580 else 577 mid_mfn = mfn_to_virt(old_mfn);
578 } else {
581 p2m_top_mfn_p[topidx] = mid_mfn; 579 p2m_top_mfn_p[topidx] = mid_mfn;
580 }
582 } 581 }
583 582
584 if (p2m_top[topidx][mididx] == p2m_identity || 583 p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]);
585 p2m_top[topidx][mididx] == p2m_missing) { 584 if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) {
586 /* p2m leaf page is missing */ 585 /* p2m leaf page is missing */
587 unsigned long *p2m; 586 unsigned long *p2m;
588 unsigned long *p2m_orig = p2m_top[topidx][mididx];
589 587
590 p2m = alloc_p2m_page(); 588 p2m = alloc_p2m_page();
591 if (!p2m) 589 if (!p2m)
@@ -606,7 +604,6 @@ static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary)
606{ 604{
607 unsigned topidx, mididx, idx; 605 unsigned topidx, mididx, idx;
608 unsigned long *p2m; 606 unsigned long *p2m;
609 unsigned long *mid_mfn_p;
610 607
611 topidx = p2m_top_index(pfn); 608 topidx = p2m_top_index(pfn);
612 mididx = p2m_mid_index(pfn); 609 mididx = p2m_mid_index(pfn);
@@ -633,43 +630,21 @@ static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary)
633 630
634 p2m_top[topidx][mididx] = p2m; 631 p2m_top[topidx][mididx] = p2m;
635 632
636 /* For save/restore we need to MFN of the P2M saved */
637
638 mid_mfn_p = p2m_top_mfn_p[topidx];
639 WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing),
640 "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n",
641 topidx, mididx);
642 mid_mfn_p[mididx] = virt_to_mfn(p2m);
643
644 return true; 633 return true;
645} 634}
646 635
647static bool __init early_alloc_p2m_middle(unsigned long pfn) 636static bool __init early_alloc_p2m_middle(unsigned long pfn)
648{ 637{
649 unsigned topidx = p2m_top_index(pfn); 638 unsigned topidx = p2m_top_index(pfn);
650 unsigned long *mid_mfn_p;
651 unsigned long **mid; 639 unsigned long **mid;
652 640
653 mid = p2m_top[topidx]; 641 mid = p2m_top[topidx];
654 mid_mfn_p = p2m_top_mfn_p[topidx];
655 if (mid == p2m_mid_missing) { 642 if (mid == p2m_mid_missing) {
656 mid = extend_brk(PAGE_SIZE, PAGE_SIZE); 643 mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
657 644
658 p2m_mid_init(mid, p2m_missing); 645 p2m_mid_init(mid, p2m_missing);
659 646
660 p2m_top[topidx] = mid; 647 p2m_top[topidx] = mid;
661
662 BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
663 }
664 /* And the save/restore P2M tables.. */
665 if (mid_mfn_p == p2m_mid_missing_mfn) {
666 mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
667 p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
668
669 p2m_top_mfn_p[topidx] = mid_mfn_p;
670 p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
671 /* Note: we don't set mid_mfn_p[midix] here,
672 * look in early_alloc_p2m() */
673 } 648 }
674 return true; 649 return true;
675} 650}
@@ -680,14 +655,13 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn)
680 * replace the P2M leaf with a p2m_missing or p2m_identity. 655 * replace the P2M leaf with a p2m_missing or p2m_identity.
681 * Stick the old page in the new P2M tree location. 656 * Stick the old page in the new P2M tree location.
682 */ 657 */
683bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn) 658static bool __init early_can_reuse_p2m_middle(unsigned long set_pfn)
684{ 659{
685 unsigned topidx; 660 unsigned topidx;
686 unsigned mididx; 661 unsigned mididx;
687 unsigned ident_pfns; 662 unsigned ident_pfns;
688 unsigned inv_pfns; 663 unsigned inv_pfns;
689 unsigned long *p2m; 664 unsigned long *p2m;
690 unsigned long *mid_mfn_p;
691 unsigned idx; 665 unsigned idx;
692 unsigned long pfn; 666 unsigned long pfn;
693 667
@@ -733,11 +707,6 @@ bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_
733found: 707found:
734 /* Found one, replace old with p2m_identity or p2m_missing */ 708 /* Found one, replace old with p2m_identity or p2m_missing */
735 p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); 709 p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing);
736 /* And the other for save/restore.. */
737 mid_mfn_p = p2m_top_mfn_p[topidx];
738 /* NOTE: Even if it is a p2m_identity it should still be point to
739 * a page filled with INVALID_P2M_ENTRY entries. */
740 mid_mfn_p[mididx] = virt_to_mfn(p2m_missing);
741 710
742 /* Reset where we want to stick the old page in. */ 711 /* Reset where we want to stick the old page in. */
743 topidx = p2m_top_index(set_pfn); 712 topidx = p2m_top_index(set_pfn);
@@ -752,8 +721,6 @@ found:
752 721
753 p2m_init(p2m); 722 p2m_init(p2m);
754 p2m_top[topidx][mididx] = p2m; 723 p2m_top[topidx][mididx] = p2m;
755 mid_mfn_p = p2m_top_mfn_p[topidx];
756 mid_mfn_p[mididx] = virt_to_mfn(p2m);
757 724
758 return true; 725 return true;
759} 726}
@@ -763,7 +730,7 @@ bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
763 if (!early_alloc_p2m_middle(pfn)) 730 if (!early_alloc_p2m_middle(pfn))
764 return false; 731 return false;
765 732
766 if (early_can_reuse_p2m_middle(pfn, mfn)) 733 if (early_can_reuse_p2m_middle(pfn))
767 return __set_phys_to_machine(pfn, mfn); 734 return __set_phys_to_machine(pfn, mfn);
768 735
769 if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/)) 736 if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/))
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index af7216128d93..29834b3fd87f 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -595,6 +595,7 @@ char * __init xen_memory_setup(void)
595 rc = 0; 595 rc = 0;
596 } 596 }
597 BUG_ON(rc); 597 BUG_ON(rc);
598 BUG_ON(memmap.nr_entries == 0);
598 599
599 /* 600 /*
600 * Xen won't allow a 1:1 mapping to be created to UNUSABLE 601 * Xen won't allow a 1:1 mapping to be created to UNUSABLE
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index a1d430b112b3..f473d268d387 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -158,7 +158,7 @@ cycle_t xen_clocksource_read(void)
158 cycle_t ret; 158 cycle_t ret;
159 159
160 preempt_disable_notrace(); 160 preempt_disable_notrace();
161 src = this_cpu_ptr(&xen_vcpu->time); 161 src = &__this_cpu_read(xen_vcpu)->time;
162 ret = pvclock_clocksource_read(src); 162 ret = pvclock_clocksource_read(src);
163 preempt_enable_notrace(); 163 preempt_enable_notrace();
164 return ret; 164 return ret;