diff options
author | Olof Johansson <olof@lixom.net> | 2014-11-02 16:36:05 -0500 |
---|---|---|
committer | Olof Johansson <olof@lixom.net> | 2014-11-02 16:37:07 -0500 |
commit | 4257412db57900e43716d0b7ddd4f4a51e6ed2f4 (patch) | |
tree | 759963245a484422e9ad2639cb223b53f844ff15 /arch/x86 | |
parent | cc040ba269ae6972face1dc7376ab3eaab9f64c8 (diff) | |
parent | 4b91f7f3c8b20e073b7bfc098625b37f99789508 (diff) |
Merge tag 'fixes-against-v3.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
Merge "omap fixes against v3.18-rc2" from Tony Lindgren:
Few fixes for omaps to enable NAND BCH so devices won't
produce errors when booted with omap2plus_defconfig, and
reduce bloat by making IPV6 a loadable module.
Also let's add a warning about legacy boot being deprecated
for omap3.
We now have things working with device tree, and only omap3 is
still booting in legacy mode. So hopefully this warning will
help move the remaining legacy mode users to boot with device
tree.
As the total reduction of code and static data is somewhere
around 20000 lines of code once we remove omap3 legacy mode
booting, we really do want to make omap3 to boot also in
device tree mode only over the next few merge cycles.
* tag 'fixes-against-v3.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap: (407 commits)
ARM: OMAP2+: Warn about deprecated legacy booting mode
ARM: omap2plus_defconfig: Fix errors with NAND BCH
ARM: omap2plus_defconfig: Fix bloat caused by having ipv6 built-in
+ Linux 3.18-rc2
Signed-off-by: Olof Johansson <olof@lixom.net>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/boot/compressed/eboot.c | 32 | ||||
-rw-r--r-- | arch/x86/include/asm/efi.h | 31 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 16 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/vmx.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 250 | ||||
-rw-r--r-- | arch/x86/kvm/i8254.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 24 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 38 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi-bgrt.c | 36 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi.c | 52 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_32.c | 12 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_64.c | 6 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_stub_32.S | 4 | ||||
-rw-r--r-- | arch/x86/platform/intel-mid/intel_mid_weak_decls.h | 7 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 3 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 5 | ||||
-rw-r--r-- | arch/x86/xen/p2m.c | 83 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 1 | ||||
-rw-r--r-- | arch/x86/xen/time.c | 2 |
21 files changed, 395 insertions, 221 deletions
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index de8eebd6f67c..1acf605a646d 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c | |||
@@ -330,8 +330,10 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom) | |||
330 | size = pci->romsize + sizeof(*rom); | 330 | size = pci->romsize + sizeof(*rom); |
331 | 331 | ||
332 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); | 332 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); |
333 | if (status != EFI_SUCCESS) | 333 | if (status != EFI_SUCCESS) { |
334 | efi_printk(sys_table, "Failed to alloc mem for rom\n"); | ||
334 | return status; | 335 | return status; |
336 | } | ||
335 | 337 | ||
336 | memset(rom, 0, sizeof(*rom)); | 338 | memset(rom, 0, sizeof(*rom)); |
337 | 339 | ||
@@ -344,14 +346,18 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom) | |||
344 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, | 346 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, |
345 | PCI_VENDOR_ID, 1, &(rom->vendor)); | 347 | PCI_VENDOR_ID, 1, &(rom->vendor)); |
346 | 348 | ||
347 | if (status != EFI_SUCCESS) | 349 | if (status != EFI_SUCCESS) { |
350 | efi_printk(sys_table, "Failed to read rom->vendor\n"); | ||
348 | goto free_struct; | 351 | goto free_struct; |
352 | } | ||
349 | 353 | ||
350 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, | 354 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, |
351 | PCI_DEVICE_ID, 1, &(rom->devid)); | 355 | PCI_DEVICE_ID, 1, &(rom->devid)); |
352 | 356 | ||
353 | if (status != EFI_SUCCESS) | 357 | if (status != EFI_SUCCESS) { |
358 | efi_printk(sys_table, "Failed to read rom->devid\n"); | ||
354 | goto free_struct; | 359 | goto free_struct; |
360 | } | ||
355 | 361 | ||
356 | status = efi_early->call(pci->get_location, pci, &(rom->segment), | 362 | status = efi_early->call(pci->get_location, pci, &(rom->segment), |
357 | &(rom->bus), &(rom->device), &(rom->function)); | 363 | &(rom->bus), &(rom->device), &(rom->function)); |
@@ -432,8 +438,10 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom) | |||
432 | size = pci->romsize + sizeof(*rom); | 438 | size = pci->romsize + sizeof(*rom); |
433 | 439 | ||
434 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); | 440 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); |
435 | if (status != EFI_SUCCESS) | 441 | if (status != EFI_SUCCESS) { |
442 | efi_printk(sys_table, "Failed to alloc mem for rom\n"); | ||
436 | return status; | 443 | return status; |
444 | } | ||
437 | 445 | ||
438 | rom->data.type = SETUP_PCI; | 446 | rom->data.type = SETUP_PCI; |
439 | rom->data.len = size - sizeof(struct setup_data); | 447 | rom->data.len = size - sizeof(struct setup_data); |
@@ -444,14 +452,18 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom) | |||
444 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, | 452 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, |
445 | PCI_VENDOR_ID, 1, &(rom->vendor)); | 453 | PCI_VENDOR_ID, 1, &(rom->vendor)); |
446 | 454 | ||
447 | if (status != EFI_SUCCESS) | 455 | if (status != EFI_SUCCESS) { |
456 | efi_printk(sys_table, "Failed to read rom->vendor\n"); | ||
448 | goto free_struct; | 457 | goto free_struct; |
458 | } | ||
449 | 459 | ||
450 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, | 460 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, |
451 | PCI_DEVICE_ID, 1, &(rom->devid)); | 461 | PCI_DEVICE_ID, 1, &(rom->devid)); |
452 | 462 | ||
453 | if (status != EFI_SUCCESS) | 463 | if (status != EFI_SUCCESS) { |
464 | efi_printk(sys_table, "Failed to read rom->devid\n"); | ||
454 | goto free_struct; | 465 | goto free_struct; |
466 | } | ||
455 | 467 | ||
456 | status = efi_early->call(pci->get_location, pci, &(rom->segment), | 468 | status = efi_early->call(pci->get_location, pci, &(rom->segment), |
457 | &(rom->bus), &(rom->device), &(rom->function)); | 469 | &(rom->bus), &(rom->device), &(rom->function)); |
@@ -538,8 +550,10 @@ static void setup_efi_pci(struct boot_params *params) | |||
538 | EFI_LOADER_DATA, | 550 | EFI_LOADER_DATA, |
539 | size, (void **)&pci_handle); | 551 | size, (void **)&pci_handle); |
540 | 552 | ||
541 | if (status != EFI_SUCCESS) | 553 | if (status != EFI_SUCCESS) { |
554 | efi_printk(sys_table, "Failed to alloc mem for pci_handle\n"); | ||
542 | return; | 555 | return; |
556 | } | ||
543 | 557 | ||
544 | status = efi_call_early(locate_handle, | 558 | status = efi_call_early(locate_handle, |
545 | EFI_LOCATE_BY_PROTOCOL, &pci_proto, | 559 | EFI_LOCATE_BY_PROTOCOL, &pci_proto, |
@@ -1105,6 +1119,10 @@ struct boot_params *make_boot_params(struct efi_config *c) | |||
1105 | 1119 | ||
1106 | memset(sdt, 0, sizeof(*sdt)); | 1120 | memset(sdt, 0, sizeof(*sdt)); |
1107 | 1121 | ||
1122 | status = efi_parse_options(cmdline_ptr); | ||
1123 | if (status != EFI_SUCCESS) | ||
1124 | goto fail2; | ||
1125 | |||
1108 | status = handle_cmdline_files(sys_table, image, | 1126 | status = handle_cmdline_files(sys_table, image, |
1109 | (char *)(unsigned long)hdr->cmd_line_ptr, | 1127 | (char *)(unsigned long)hdr->cmd_line_ptr, |
1110 | "initrd=", hdr->initrd_addr_max, | 1128 | "initrd=", hdr->initrd_addr_max, |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 0ec241ede5a2..9b11757975d0 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
@@ -81,24 +81,23 @@ extern u64 asmlinkage efi_call(void *fp, ...); | |||
81 | */ | 81 | */ |
82 | #define __efi_call_virt(f, args...) efi_call_virt(f, args) | 82 | #define __efi_call_virt(f, args...) efi_call_virt(f, args) |
83 | 83 | ||
84 | extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, | 84 | extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, |
85 | u32 type, u64 attribute); | 85 | u32 type, u64 attribute); |
86 | 86 | ||
87 | #endif /* CONFIG_X86_32 */ | 87 | #endif /* CONFIG_X86_32 */ |
88 | 88 | ||
89 | extern int add_efi_memmap; | ||
90 | extern struct efi_scratch efi_scratch; | 89 | extern struct efi_scratch efi_scratch; |
91 | extern void efi_set_executable(efi_memory_desc_t *md, bool executable); | 90 | extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); |
92 | extern int efi_memblock_x86_reserve_range(void); | 91 | extern int __init efi_memblock_x86_reserve_range(void); |
93 | extern void efi_call_phys_prelog(void); | 92 | extern void __init efi_call_phys_prolog(void); |
94 | extern void efi_call_phys_epilog(void); | 93 | extern void __init efi_call_phys_epilog(void); |
95 | extern void efi_unmap_memmap(void); | 94 | extern void __init efi_unmap_memmap(void); |
96 | extern void efi_memory_uc(u64 addr, unsigned long size); | 95 | extern void __init efi_memory_uc(u64 addr, unsigned long size); |
97 | extern void __init efi_map_region(efi_memory_desc_t *md); | 96 | extern void __init efi_map_region(efi_memory_desc_t *md); |
98 | extern void __init efi_map_region_fixed(efi_memory_desc_t *md); | 97 | extern void __init efi_map_region_fixed(efi_memory_desc_t *md); |
99 | extern void efi_sync_low_kernel_mappings(void); | 98 | extern void efi_sync_low_kernel_mappings(void); |
100 | extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); | 99 | extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); |
101 | extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); | 100 | extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); |
102 | extern void __init old_map_region(efi_memory_desc_t *md); | 101 | extern void __init old_map_region(efi_memory_desc_t *md); |
103 | extern void __init runtime_code_page_mkexec(void); | 102 | extern void __init runtime_code_page_mkexec(void); |
104 | extern void __init efi_runtime_mkexec(void); | 103 | extern void __init efi_runtime_mkexec(void); |
@@ -162,16 +161,6 @@ static inline efi_status_t efi_thunk_set_virtual_address_map( | |||
162 | extern bool efi_reboot_required(void); | 161 | extern bool efi_reboot_required(void); |
163 | 162 | ||
164 | #else | 163 | #else |
165 | /* | ||
166 | * IF EFI is not configured, have the EFI calls return -ENOSYS. | ||
167 | */ | ||
168 | #define efi_call0(_f) (-ENOSYS) | ||
169 | #define efi_call1(_f, _a1) (-ENOSYS) | ||
170 | #define efi_call2(_f, _a1, _a2) (-ENOSYS) | ||
171 | #define efi_call3(_f, _a1, _a2, _a3) (-ENOSYS) | ||
172 | #define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS) | ||
173 | #define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS) | ||
174 | #define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS) | ||
175 | static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} | 164 | static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} |
176 | static inline bool efi_reboot_required(void) | 165 | static inline bool efi_reboot_required(void) |
177 | { | 166 | { |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7d603a71ab3a..6ed0c30d6a0c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -989,6 +989,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) | |||
989 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); | 989 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); |
990 | } | 990 | } |
991 | 991 | ||
992 | static inline u64 get_canonical(u64 la) | ||
993 | { | ||
994 | return ((int64_t)la << 16) >> 16; | ||
995 | } | ||
996 | |||
997 | static inline bool is_noncanonical_address(u64 la) | ||
998 | { | ||
999 | #ifdef CONFIG_X86_64 | ||
1000 | return get_canonical(la) != la; | ||
1001 | #else | ||
1002 | return false; | ||
1003 | #endif | ||
1004 | } | ||
1005 | |||
992 | #define TSS_IOPB_BASE_OFFSET 0x66 | 1006 | #define TSS_IOPB_BASE_OFFSET 0x66 |
993 | #define TSS_BASE_SIZE 0x68 | 1007 | #define TSS_BASE_SIZE 0x68 |
994 | #define TSS_IOPB_SIZE (65536 / 8) | 1008 | #define TSS_IOPB_SIZE (65536 / 8) |
@@ -1050,7 +1064,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, | |||
1050 | unsigned long address); | 1064 | unsigned long address); |
1051 | 1065 | ||
1052 | void kvm_define_shared_msr(unsigned index, u32 msr); | 1066 | void kvm_define_shared_msr(unsigned index, u32 msr); |
1053 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | 1067 | int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); |
1054 | 1068 | ||
1055 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); | 1069 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); |
1056 | 1070 | ||
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 0e79420376eb..990a2fe1588d 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h | |||
@@ -67,6 +67,7 @@ | |||
67 | #define EXIT_REASON_EPT_MISCONFIG 49 | 67 | #define EXIT_REASON_EPT_MISCONFIG 49 |
68 | #define EXIT_REASON_INVEPT 50 | 68 | #define EXIT_REASON_INVEPT 50 |
69 | #define EXIT_REASON_PREEMPTION_TIMER 52 | 69 | #define EXIT_REASON_PREEMPTION_TIMER 52 |
70 | #define EXIT_REASON_INVVPID 53 | ||
70 | #define EXIT_REASON_WBINVD 54 | 71 | #define EXIT_REASON_WBINVD 54 |
71 | #define EXIT_REASON_XSETBV 55 | 72 | #define EXIT_REASON_XSETBV 55 |
72 | #define EXIT_REASON_APIC_WRITE 56 | 73 | #define EXIT_REASON_APIC_WRITE 56 |
@@ -114,6 +115,7 @@ | |||
114 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ | 115 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ |
115 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ | 116 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ |
116 | { EXIT_REASON_INVD, "INVD" }, \ | 117 | { EXIT_REASON_INVD, "INVD" }, \ |
118 | { EXIT_REASON_INVVPID, "INVVPID" }, \ | ||
117 | { EXIT_REASON_INVPCID, "INVPCID" } | 119 | { EXIT_REASON_INVPCID, "INVPCID" } |
118 | 120 | ||
119 | #endif /* _UAPIVMX_H */ | 121 | #endif /* _UAPIVMX_H */ |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a46207a05835..749f9fa38254 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -504,11 +504,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) | |||
504 | masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); | 504 | masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); |
505 | } | 505 | } |
506 | 506 | ||
507 | static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) | ||
508 | { | ||
509 | register_address_increment(ctxt, &ctxt->_eip, rel); | ||
510 | } | ||
511 | |||
512 | static u32 desc_limit_scaled(struct desc_struct *desc) | 507 | static u32 desc_limit_scaled(struct desc_struct *desc) |
513 | { | 508 | { |
514 | u32 limit = get_desc_limit(desc); | 509 | u32 limit = get_desc_limit(desc); |
@@ -569,6 +564,38 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) | |||
569 | return emulate_exception(ctxt, NM_VECTOR, 0, false); | 564 | return emulate_exception(ctxt, NM_VECTOR, 0, false); |
570 | } | 565 | } |
571 | 566 | ||
567 | static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, | ||
568 | int cs_l) | ||
569 | { | ||
570 | switch (ctxt->op_bytes) { | ||
571 | case 2: | ||
572 | ctxt->_eip = (u16)dst; | ||
573 | break; | ||
574 | case 4: | ||
575 | ctxt->_eip = (u32)dst; | ||
576 | break; | ||
577 | case 8: | ||
578 | if ((cs_l && is_noncanonical_address(dst)) || | ||
579 | (!cs_l && (dst & ~(u32)-1))) | ||
580 | return emulate_gp(ctxt, 0); | ||
581 | ctxt->_eip = dst; | ||
582 | break; | ||
583 | default: | ||
584 | WARN(1, "unsupported eip assignment size\n"); | ||
585 | } | ||
586 | return X86EMUL_CONTINUE; | ||
587 | } | ||
588 | |||
589 | static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) | ||
590 | { | ||
591 | return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64); | ||
592 | } | ||
593 | |||
594 | static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) | ||
595 | { | ||
596 | return assign_eip_near(ctxt, ctxt->_eip + rel); | ||
597 | } | ||
598 | |||
572 | static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) | 599 | static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) |
573 | { | 600 | { |
574 | u16 selector; | 601 | u16 selector; |
@@ -751,8 +778,10 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) | |||
751 | static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, | 778 | static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, |
752 | unsigned size) | 779 | unsigned size) |
753 | { | 780 | { |
754 | if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size)) | 781 | unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr; |
755 | return __do_insn_fetch_bytes(ctxt, size); | 782 | |
783 | if (unlikely(done_size < size)) | ||
784 | return __do_insn_fetch_bytes(ctxt, size - done_size); | ||
756 | else | 785 | else |
757 | return X86EMUL_CONTINUE; | 786 | return X86EMUL_CONTINUE; |
758 | } | 787 | } |
@@ -1416,7 +1445,9 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1416 | 1445 | ||
1417 | /* Does not support long mode */ | 1446 | /* Does not support long mode */ |
1418 | static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 1447 | static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, |
1419 | u16 selector, int seg, u8 cpl, bool in_task_switch) | 1448 | u16 selector, int seg, u8 cpl, |
1449 | bool in_task_switch, | ||
1450 | struct desc_struct *desc) | ||
1420 | { | 1451 | { |
1421 | struct desc_struct seg_desc, old_desc; | 1452 | struct desc_struct seg_desc, old_desc; |
1422 | u8 dpl, rpl; | 1453 | u8 dpl, rpl; |
@@ -1557,6 +1588,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1557 | } | 1588 | } |
1558 | load: | 1589 | load: |
1559 | ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); | 1590 | ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); |
1591 | if (desc) | ||
1592 | *desc = seg_desc; | ||
1560 | return X86EMUL_CONTINUE; | 1593 | return X86EMUL_CONTINUE; |
1561 | exception: | 1594 | exception: |
1562 | return emulate_exception(ctxt, err_vec, err_code, true); | 1595 | return emulate_exception(ctxt, err_vec, err_code, true); |
@@ -1566,7 +1599,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1566 | u16 selector, int seg) | 1599 | u16 selector, int seg) |
1567 | { | 1600 | { |
1568 | u8 cpl = ctxt->ops->cpl(ctxt); | 1601 | u8 cpl = ctxt->ops->cpl(ctxt); |
1569 | return __load_segment_descriptor(ctxt, selector, seg, cpl, false); | 1602 | return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL); |
1570 | } | 1603 | } |
1571 | 1604 | ||
1572 | static void write_register_operand(struct operand *op) | 1605 | static void write_register_operand(struct operand *op) |
@@ -1960,17 +1993,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt) | |||
1960 | static int em_jmp_far(struct x86_emulate_ctxt *ctxt) | 1993 | static int em_jmp_far(struct x86_emulate_ctxt *ctxt) |
1961 | { | 1994 | { |
1962 | int rc; | 1995 | int rc; |
1963 | unsigned short sel; | 1996 | unsigned short sel, old_sel; |
1997 | struct desc_struct old_desc, new_desc; | ||
1998 | const struct x86_emulate_ops *ops = ctxt->ops; | ||
1999 | u8 cpl = ctxt->ops->cpl(ctxt); | ||
2000 | |||
2001 | /* Assignment of RIP may only fail in 64-bit mode */ | ||
2002 | if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
2003 | ops->get_segment(ctxt, &old_sel, &old_desc, NULL, | ||
2004 | VCPU_SREG_CS); | ||
1964 | 2005 | ||
1965 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); | 2006 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); |
1966 | 2007 | ||
1967 | rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); | 2008 | rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, |
2009 | &new_desc); | ||
1968 | if (rc != X86EMUL_CONTINUE) | 2010 | if (rc != X86EMUL_CONTINUE) |
1969 | return rc; | 2011 | return rc; |
1970 | 2012 | ||
1971 | ctxt->_eip = 0; | 2013 | rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); |
1972 | memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); | 2014 | if (rc != X86EMUL_CONTINUE) { |
1973 | return X86EMUL_CONTINUE; | 2015 | WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); |
2016 | /* assigning eip failed; restore the old cs */ | ||
2017 | ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS); | ||
2018 | return rc; | ||
2019 | } | ||
2020 | return rc; | ||
1974 | } | 2021 | } |
1975 | 2022 | ||
1976 | static int em_grp45(struct x86_emulate_ctxt *ctxt) | 2023 | static int em_grp45(struct x86_emulate_ctxt *ctxt) |
@@ -1981,13 +2028,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) | |||
1981 | case 2: /* call near abs */ { | 2028 | case 2: /* call near abs */ { |
1982 | long int old_eip; | 2029 | long int old_eip; |
1983 | old_eip = ctxt->_eip; | 2030 | old_eip = ctxt->_eip; |
1984 | ctxt->_eip = ctxt->src.val; | 2031 | rc = assign_eip_near(ctxt, ctxt->src.val); |
2032 | if (rc != X86EMUL_CONTINUE) | ||
2033 | break; | ||
1985 | ctxt->src.val = old_eip; | 2034 | ctxt->src.val = old_eip; |
1986 | rc = em_push(ctxt); | 2035 | rc = em_push(ctxt); |
1987 | break; | 2036 | break; |
1988 | } | 2037 | } |
1989 | case 4: /* jmp abs */ | 2038 | case 4: /* jmp abs */ |
1990 | ctxt->_eip = ctxt->src.val; | 2039 | rc = assign_eip_near(ctxt, ctxt->src.val); |
1991 | break; | 2040 | break; |
1992 | case 5: /* jmp far */ | 2041 | case 5: /* jmp far */ |
1993 | rc = em_jmp_far(ctxt); | 2042 | rc = em_jmp_far(ctxt); |
@@ -2022,30 +2071,47 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) | |||
2022 | 2071 | ||
2023 | static int em_ret(struct x86_emulate_ctxt *ctxt) | 2072 | static int em_ret(struct x86_emulate_ctxt *ctxt) |
2024 | { | 2073 | { |
2025 | ctxt->dst.type = OP_REG; | 2074 | int rc; |
2026 | ctxt->dst.addr.reg = &ctxt->_eip; | 2075 | unsigned long eip; |
2027 | ctxt->dst.bytes = ctxt->op_bytes; | 2076 | |
2028 | return em_pop(ctxt); | 2077 | rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); |
2078 | if (rc != X86EMUL_CONTINUE) | ||
2079 | return rc; | ||
2080 | |||
2081 | return assign_eip_near(ctxt, eip); | ||
2029 | } | 2082 | } |
2030 | 2083 | ||
2031 | static int em_ret_far(struct x86_emulate_ctxt *ctxt) | 2084 | static int em_ret_far(struct x86_emulate_ctxt *ctxt) |
2032 | { | 2085 | { |
2033 | int rc; | 2086 | int rc; |
2034 | unsigned long cs; | 2087 | unsigned long eip, cs; |
2088 | u16 old_cs; | ||
2035 | int cpl = ctxt->ops->cpl(ctxt); | 2089 | int cpl = ctxt->ops->cpl(ctxt); |
2090 | struct desc_struct old_desc, new_desc; | ||
2091 | const struct x86_emulate_ops *ops = ctxt->ops; | ||
2036 | 2092 | ||
2037 | rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); | 2093 | if (ctxt->mode == X86EMUL_MODE_PROT64) |
2094 | ops->get_segment(ctxt, &old_cs, &old_desc, NULL, | ||
2095 | VCPU_SREG_CS); | ||
2096 | |||
2097 | rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); | ||
2038 | if (rc != X86EMUL_CONTINUE) | 2098 | if (rc != X86EMUL_CONTINUE) |
2039 | return rc; | 2099 | return rc; |
2040 | if (ctxt->op_bytes == 4) | ||
2041 | ctxt->_eip = (u32)ctxt->_eip; | ||
2042 | rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); | 2100 | rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); |
2043 | if (rc != X86EMUL_CONTINUE) | 2101 | if (rc != X86EMUL_CONTINUE) |
2044 | return rc; | 2102 | return rc; |
2045 | /* Outer-privilege level return is not implemented */ | 2103 | /* Outer-privilege level return is not implemented */ |
2046 | if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) | 2104 | if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) |
2047 | return X86EMUL_UNHANDLEABLE; | 2105 | return X86EMUL_UNHANDLEABLE; |
2048 | rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); | 2106 | rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false, |
2107 | &new_desc); | ||
2108 | if (rc != X86EMUL_CONTINUE) | ||
2109 | return rc; | ||
2110 | rc = assign_eip_far(ctxt, eip, new_desc.l); | ||
2111 | if (rc != X86EMUL_CONTINUE) { | ||
2112 | WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); | ||
2113 | ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); | ||
2114 | } | ||
2049 | return rc; | 2115 | return rc; |
2050 | } | 2116 | } |
2051 | 2117 | ||
@@ -2306,7 +2372,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) | |||
2306 | { | 2372 | { |
2307 | const struct x86_emulate_ops *ops = ctxt->ops; | 2373 | const struct x86_emulate_ops *ops = ctxt->ops; |
2308 | struct desc_struct cs, ss; | 2374 | struct desc_struct cs, ss; |
2309 | u64 msr_data; | 2375 | u64 msr_data, rcx, rdx; |
2310 | int usermode; | 2376 | int usermode; |
2311 | u16 cs_sel = 0, ss_sel = 0; | 2377 | u16 cs_sel = 0, ss_sel = 0; |
2312 | 2378 | ||
@@ -2322,6 +2388,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) | |||
2322 | else | 2388 | else |
2323 | usermode = X86EMUL_MODE_PROT32; | 2389 | usermode = X86EMUL_MODE_PROT32; |
2324 | 2390 | ||
2391 | rcx = reg_read(ctxt, VCPU_REGS_RCX); | ||
2392 | rdx = reg_read(ctxt, VCPU_REGS_RDX); | ||
2393 | |||
2325 | cs.dpl = 3; | 2394 | cs.dpl = 3; |
2326 | ss.dpl = 3; | 2395 | ss.dpl = 3; |
2327 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); | 2396 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); |
@@ -2339,6 +2408,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) | |||
2339 | ss_sel = cs_sel + 8; | 2408 | ss_sel = cs_sel + 8; |
2340 | cs.d = 0; | 2409 | cs.d = 0; |
2341 | cs.l = 1; | 2410 | cs.l = 1; |
2411 | if (is_noncanonical_address(rcx) || | ||
2412 | is_noncanonical_address(rdx)) | ||
2413 | return emulate_gp(ctxt, 0); | ||
2342 | break; | 2414 | break; |
2343 | } | 2415 | } |
2344 | cs_sel |= SELECTOR_RPL_MASK; | 2416 | cs_sel |= SELECTOR_RPL_MASK; |
@@ -2347,8 +2419,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) | |||
2347 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); | 2419 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); |
2348 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); | 2420 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); |
2349 | 2421 | ||
2350 | ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); | 2422 | ctxt->_eip = rdx; |
2351 | *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); | 2423 | *reg_write(ctxt, VCPU_REGS_RSP) = rcx; |
2352 | 2424 | ||
2353 | return X86EMUL_CONTINUE; | 2425 | return X86EMUL_CONTINUE; |
2354 | } | 2426 | } |
@@ -2466,19 +2538,24 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | |||
2466 | * Now load segment descriptors. If fault happens at this stage | 2538 | * Now load segment descriptors. If fault happens at this stage |
2467 | * it is handled in a context of new task | 2539 | * it is handled in a context of new task |
2468 | */ | 2540 | */ |
2469 | ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true); | 2541 | ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, |
2542 | true, NULL); | ||
2470 | if (ret != X86EMUL_CONTINUE) | 2543 | if (ret != X86EMUL_CONTINUE) |
2471 | return ret; | 2544 | return ret; |
2472 | ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); | 2545 | ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, |
2546 | true, NULL); | ||
2473 | if (ret != X86EMUL_CONTINUE) | 2547 | if (ret != X86EMUL_CONTINUE) |
2474 | return ret; | 2548 | return ret; |
2475 | ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); | 2549 | ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, |
2550 | true, NULL); | ||
2476 | if (ret != X86EMUL_CONTINUE) | 2551 | if (ret != X86EMUL_CONTINUE) |
2477 | return ret; | 2552 | return ret; |
2478 | ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); | 2553 | ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, |
2554 | true, NULL); | ||
2479 | if (ret != X86EMUL_CONTINUE) | 2555 | if (ret != X86EMUL_CONTINUE) |
2480 | return ret; | 2556 | return ret; |
2481 | ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); | 2557 | ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, |
2558 | true, NULL); | ||
2482 | if (ret != X86EMUL_CONTINUE) | 2559 | if (ret != X86EMUL_CONTINUE) |
2483 | return ret; | 2560 | return ret; |
2484 | 2561 | ||
@@ -2603,25 +2680,32 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
2603 | * Now load segment descriptors. If fault happenes at this stage | 2680 | * Now load segment descriptors. If fault happenes at this stage |
2604 | * it is handled in a context of new task | 2681 | * it is handled in a context of new task |
2605 | */ | 2682 | */ |
2606 | ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true); | 2683 | ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, |
2684 | cpl, true, NULL); | ||
2607 | if (ret != X86EMUL_CONTINUE) | 2685 | if (ret != X86EMUL_CONTINUE) |
2608 | return ret; | 2686 | return ret; |
2609 | ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); | 2687 | ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, |
2688 | true, NULL); | ||
2610 | if (ret != X86EMUL_CONTINUE) | 2689 | if (ret != X86EMUL_CONTINUE) |
2611 | return ret; | 2690 | return ret; |
2612 | ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); | 2691 | ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, |
2692 | true, NULL); | ||
2613 | if (ret != X86EMUL_CONTINUE) | 2693 | if (ret != X86EMUL_CONTINUE) |
2614 | return ret; | 2694 | return ret; |
2615 | ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); | 2695 | ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, |
2696 | true, NULL); | ||
2616 | if (ret != X86EMUL_CONTINUE) | 2697 | if (ret != X86EMUL_CONTINUE) |
2617 | return ret; | 2698 | return ret; |
2618 | ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); | 2699 | ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, |
2700 | true, NULL); | ||
2619 | if (ret != X86EMUL_CONTINUE) | 2701 | if (ret != X86EMUL_CONTINUE) |
2620 | return ret; | 2702 | return ret; |
2621 | ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true); | 2703 | ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, |
2704 | true, NULL); | ||
2622 | if (ret != X86EMUL_CONTINUE) | 2705 | if (ret != X86EMUL_CONTINUE) |
2623 | return ret; | 2706 | return ret; |
2624 | ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true); | 2707 | ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, |
2708 | true, NULL); | ||
2625 | if (ret != X86EMUL_CONTINUE) | 2709 | if (ret != X86EMUL_CONTINUE) |
2626 | return ret; | 2710 | return ret; |
2627 | 2711 | ||
@@ -2888,10 +2972,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt) | |||
2888 | 2972 | ||
2889 | static int em_call(struct x86_emulate_ctxt *ctxt) | 2973 | static int em_call(struct x86_emulate_ctxt *ctxt) |
2890 | { | 2974 | { |
2975 | int rc; | ||
2891 | long rel = ctxt->src.val; | 2976 | long rel = ctxt->src.val; |
2892 | 2977 | ||
2893 | ctxt->src.val = (unsigned long)ctxt->_eip; | 2978 | ctxt->src.val = (unsigned long)ctxt->_eip; |
2894 | jmp_rel(ctxt, rel); | 2979 | rc = jmp_rel(ctxt, rel); |
2980 | if (rc != X86EMUL_CONTINUE) | ||
2981 | return rc; | ||
2895 | return em_push(ctxt); | 2982 | return em_push(ctxt); |
2896 | } | 2983 | } |
2897 | 2984 | ||
@@ -2900,34 +2987,50 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) | |||
2900 | u16 sel, old_cs; | 2987 | u16 sel, old_cs; |
2901 | ulong old_eip; | 2988 | ulong old_eip; |
2902 | int rc; | 2989 | int rc; |
2990 | struct desc_struct old_desc, new_desc; | ||
2991 | const struct x86_emulate_ops *ops = ctxt->ops; | ||
2992 | int cpl = ctxt->ops->cpl(ctxt); | ||
2903 | 2993 | ||
2904 | old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); | ||
2905 | old_eip = ctxt->_eip; | 2994 | old_eip = ctxt->_eip; |
2995 | ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS); | ||
2906 | 2996 | ||
2907 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); | 2997 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); |
2908 | if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS)) | 2998 | rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, |
2999 | &new_desc); | ||
3000 | if (rc != X86EMUL_CONTINUE) | ||
2909 | return X86EMUL_CONTINUE; | 3001 | return X86EMUL_CONTINUE; |
2910 | 3002 | ||
2911 | ctxt->_eip = 0; | 3003 | rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); |
2912 | memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); | 3004 | if (rc != X86EMUL_CONTINUE) |
3005 | goto fail; | ||
2913 | 3006 | ||
2914 | ctxt->src.val = old_cs; | 3007 | ctxt->src.val = old_cs; |
2915 | rc = em_push(ctxt); | 3008 | rc = em_push(ctxt); |
2916 | if (rc != X86EMUL_CONTINUE) | 3009 | if (rc != X86EMUL_CONTINUE) |
2917 | return rc; | 3010 | goto fail; |
2918 | 3011 | ||
2919 | ctxt->src.val = old_eip; | 3012 | ctxt->src.val = old_eip; |
2920 | return em_push(ctxt); | 3013 | rc = em_push(ctxt); |
3014 | /* If we failed, we tainted the memory, but the very least we should | ||
3015 | restore cs */ | ||
3016 | if (rc != X86EMUL_CONTINUE) | ||
3017 | goto fail; | ||
3018 | return rc; | ||
3019 | fail: | ||
3020 | ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); | ||
3021 | return rc; | ||
3022 | |||
2921 | } | 3023 | } |
2922 | 3024 | ||
2923 | static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) | 3025 | static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) |
2924 | { | 3026 | { |
2925 | int rc; | 3027 | int rc; |
3028 | unsigned long eip; | ||
2926 | 3029 | ||
2927 | ctxt->dst.type = OP_REG; | 3030 | rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); |
2928 | ctxt->dst.addr.reg = &ctxt->_eip; | 3031 | if (rc != X86EMUL_CONTINUE) |
2929 | ctxt->dst.bytes = ctxt->op_bytes; | 3032 | return rc; |
2930 | rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); | 3033 | rc = assign_eip_near(ctxt, eip); |
2931 | if (rc != X86EMUL_CONTINUE) | 3034 | if (rc != X86EMUL_CONTINUE) |
2932 | return rc; | 3035 | return rc; |
2933 | rsp_increment(ctxt, ctxt->src.val); | 3036 | rsp_increment(ctxt, ctxt->src.val); |
@@ -3254,20 +3357,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt) | |||
3254 | 3357 | ||
3255 | static int em_loop(struct x86_emulate_ctxt *ctxt) | 3358 | static int em_loop(struct x86_emulate_ctxt *ctxt) |
3256 | { | 3359 | { |
3360 | int rc = X86EMUL_CONTINUE; | ||
3361 | |||
3257 | register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); | 3362 | register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); |
3258 | if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && | 3363 | if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && |
3259 | (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) | 3364 | (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) |
3260 | jmp_rel(ctxt, ctxt->src.val); | 3365 | rc = jmp_rel(ctxt, ctxt->src.val); |
3261 | 3366 | ||
3262 | return X86EMUL_CONTINUE; | 3367 | return rc; |
3263 | } | 3368 | } |
3264 | 3369 | ||
3265 | static int em_jcxz(struct x86_emulate_ctxt *ctxt) | 3370 | static int em_jcxz(struct x86_emulate_ctxt *ctxt) |
3266 | { | 3371 | { |
3372 | int rc = X86EMUL_CONTINUE; | ||
3373 | |||
3267 | if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) | 3374 | if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) |
3268 | jmp_rel(ctxt, ctxt->src.val); | 3375 | rc = jmp_rel(ctxt, ctxt->src.val); |
3269 | 3376 | ||
3270 | return X86EMUL_CONTINUE; | 3377 | return rc; |
3271 | } | 3378 | } |
3272 | 3379 | ||
3273 | static int em_in(struct x86_emulate_ctxt *ctxt) | 3380 | static int em_in(struct x86_emulate_ctxt *ctxt) |
@@ -3355,6 +3462,12 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt) | |||
3355 | return X86EMUL_CONTINUE; | 3462 | return X86EMUL_CONTINUE; |
3356 | } | 3463 | } |
3357 | 3464 | ||
3465 | static int em_clflush(struct x86_emulate_ctxt *ctxt) | ||
3466 | { | ||
3467 | /* emulating clflush regardless of cpuid */ | ||
3468 | return X86EMUL_CONTINUE; | ||
3469 | } | ||
3470 | |||
3358 | static bool valid_cr(int nr) | 3471 | static bool valid_cr(int nr) |
3359 | { | 3472 | { |
3360 | switch (nr) { | 3473 | switch (nr) { |
@@ -3693,6 +3806,16 @@ static const struct opcode group11[] = { | |||
3693 | X7(D(Undefined)), | 3806 | X7(D(Undefined)), |
3694 | }; | 3807 | }; |
3695 | 3808 | ||
3809 | static const struct gprefix pfx_0f_ae_7 = { | ||
3810 | I(SrcMem | ByteOp, em_clflush), N, N, N, | ||
3811 | }; | ||
3812 | |||
3813 | static const struct group_dual group15 = { { | ||
3814 | N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7), | ||
3815 | }, { | ||
3816 | N, N, N, N, N, N, N, N, | ||
3817 | } }; | ||
3818 | |||
3696 | static const struct gprefix pfx_0f_6f_0f_7f = { | 3819 | static const struct gprefix pfx_0f_6f_0f_7f = { |
3697 | I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), | 3820 | I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), |
3698 | }; | 3821 | }; |
@@ -3901,10 +4024,11 @@ static const struct opcode twobyte_table[256] = { | |||
3901 | N, I(ImplicitOps | EmulateOnUD, em_syscall), | 4024 | N, I(ImplicitOps | EmulateOnUD, em_syscall), |
3902 | II(ImplicitOps | Priv, em_clts, clts), N, | 4025 | II(ImplicitOps | Priv, em_clts, clts), N, |
3903 | DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, | 4026 | DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, |
3904 | N, D(ImplicitOps | ModRM), N, N, | 4027 | N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, |
3905 | /* 0x10 - 0x1F */ | 4028 | /* 0x10 - 0x1F */ |
3906 | N, N, N, N, N, N, N, N, | 4029 | N, N, N, N, N, N, N, N, |
3907 | D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM), | 4030 | D(ImplicitOps | ModRM | SrcMem | NoAccess), |
4031 | N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), | ||
3908 | /* 0x20 - 0x2F */ | 4032 | /* 0x20 - 0x2F */ |
3909 | DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), | 4033 | DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), |
3910 | DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), | 4034 | DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), |
@@ -3956,7 +4080,7 @@ static const struct opcode twobyte_table[256] = { | |||
3956 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), | 4080 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), |
3957 | F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), | 4081 | F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), |
3958 | F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), | 4082 | F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), |
3959 | D(ModRM), F(DstReg | SrcMem | ModRM, em_imul), | 4083 | GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul), |
3960 | /* 0xB0 - 0xB7 */ | 4084 | /* 0xB0 - 0xB7 */ |
3961 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), | 4085 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), |
3962 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), | 4086 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), |
@@ -4473,10 +4597,10 @@ done_prefixes: | |||
4473 | /* Decode and fetch the destination operand: register or memory. */ | 4597 | /* Decode and fetch the destination operand: register or memory. */ |
4474 | rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); | 4598 | rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); |
4475 | 4599 | ||
4476 | done: | ||
4477 | if (ctxt->rip_relative) | 4600 | if (ctxt->rip_relative) |
4478 | ctxt->memopp->addr.mem.ea += ctxt->_eip; | 4601 | ctxt->memopp->addr.mem.ea += ctxt->_eip; |
4479 | 4602 | ||
4603 | done: | ||
4480 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; | 4604 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; |
4481 | } | 4605 | } |
4482 | 4606 | ||
@@ -4726,7 +4850,7 @@ special_insn: | |||
4726 | break; | 4850 | break; |
4727 | case 0x70 ... 0x7f: /* jcc (short) */ | 4851 | case 0x70 ... 0x7f: /* jcc (short) */ |
4728 | if (test_cc(ctxt->b, ctxt->eflags)) | 4852 | if (test_cc(ctxt->b, ctxt->eflags)) |
4729 | jmp_rel(ctxt, ctxt->src.val); | 4853 | rc = jmp_rel(ctxt, ctxt->src.val); |
4730 | break; | 4854 | break; |
4731 | case 0x8d: /* lea r16/r32, m */ | 4855 | case 0x8d: /* lea r16/r32, m */ |
4732 | ctxt->dst.val = ctxt->src.addr.mem.ea; | 4856 | ctxt->dst.val = ctxt->src.addr.mem.ea; |
@@ -4756,7 +4880,7 @@ special_insn: | |||
4756 | break; | 4880 | break; |
4757 | case 0xe9: /* jmp rel */ | 4881 | case 0xe9: /* jmp rel */ |
4758 | case 0xeb: /* jmp rel short */ | 4882 | case 0xeb: /* jmp rel short */ |
4759 | jmp_rel(ctxt, ctxt->src.val); | 4883 | rc = jmp_rel(ctxt, ctxt->src.val); |
4760 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | 4884 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ |
4761 | break; | 4885 | break; |
4762 | case 0xf4: /* hlt */ | 4886 | case 0xf4: /* hlt */ |
@@ -4881,13 +5005,11 @@ twobyte_insn: | |||
4881 | break; | 5005 | break; |
4882 | case 0x80 ... 0x8f: /* jnz rel, etc*/ | 5006 | case 0x80 ... 0x8f: /* jnz rel, etc*/ |
4883 | if (test_cc(ctxt->b, ctxt->eflags)) | 5007 | if (test_cc(ctxt->b, ctxt->eflags)) |
4884 | jmp_rel(ctxt, ctxt->src.val); | 5008 | rc = jmp_rel(ctxt, ctxt->src.val); |
4885 | break; | 5009 | break; |
4886 | case 0x90 ... 0x9f: /* setcc r/m8 */ | 5010 | case 0x90 ... 0x9f: /* setcc r/m8 */ |
4887 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); | 5011 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); |
4888 | break; | 5012 | break; |
4889 | case 0xae: /* clflush */ | ||
4890 | break; | ||
4891 | case 0xb6 ... 0xb7: /* movzx */ | 5013 | case 0xb6 ... 0xb7: /* movzx */ |
4892 | ctxt->dst.bytes = ctxt->op_bytes; | 5014 | ctxt->dst.bytes = ctxt->op_bytes; |
4893 | ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val | 5015 | ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 518d86471b76..298781d4cfb4 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | |||
262 | return; | 262 | return; |
263 | 263 | ||
264 | timer = &pit->pit_state.timer; | 264 | timer = &pit->pit_state.timer; |
265 | mutex_lock(&pit->pit_state.lock); | ||
265 | if (hrtimer_cancel(timer)) | 266 | if (hrtimer_cancel(timer)) |
266 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 267 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
268 | mutex_unlock(&pit->pit_state.lock); | ||
267 | } | 269 | } |
268 | 270 | ||
269 | static void destroy_pit_timer(struct kvm_pit *pit) | 271 | static void destroy_pit_timer(struct kvm_pit *pit) |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 806d58e3c320..fd49c867b25a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -298,7 +298,7 @@ retry_walk: | |||
298 | } | 298 | } |
299 | #endif | 299 | #endif |
300 | walker->max_level = walker->level; | 300 | walker->max_level = walker->level; |
301 | ASSERT(!is_long_mode(vcpu) && is_pae(vcpu)); | 301 | ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); |
302 | 302 | ||
303 | accessed_dirty = PT_GUEST_ACCESSED_MASK; | 303 | accessed_dirty = PT_GUEST_ACCESSED_MASK; |
304 | pt_access = pte_access = ACC_ALL; | 304 | pt_access = pte_access = ACC_ALL; |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 65510f624dfe..7527cefc5a43 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -3251,7 +3251,7 @@ static int wrmsr_interception(struct vcpu_svm *svm) | |||
3251 | msr.host_initiated = false; | 3251 | msr.host_initiated = false; |
3252 | 3252 | ||
3253 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; | 3253 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
3254 | if (svm_set_msr(&svm->vcpu, &msr)) { | 3254 | if (kvm_set_msr(&svm->vcpu, &msr)) { |
3255 | trace_kvm_msr_write_ex(ecx, data); | 3255 | trace_kvm_msr_write_ex(ecx, data); |
3256 | kvm_inject_gp(&svm->vcpu, 0); | 3256 | kvm_inject_gp(&svm->vcpu, 0); |
3257 | } else { | 3257 | } else { |
@@ -3551,9 +3551,9 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
3551 | 3551 | ||
3552 | if (exit_code >= ARRAY_SIZE(svm_exit_handlers) | 3552 | if (exit_code >= ARRAY_SIZE(svm_exit_handlers) |
3553 | || !svm_exit_handlers[exit_code]) { | 3553 | || !svm_exit_handlers[exit_code]) { |
3554 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3554 | WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code); |
3555 | kvm_run->hw.hardware_exit_reason = exit_code; | 3555 | kvm_queue_exception(vcpu, UD_VECTOR); |
3556 | return 0; | 3556 | return 1; |
3557 | } | 3557 | } |
3558 | 3558 | ||
3559 | return svm_exit_handlers[exit_code](svm); | 3559 | return svm_exit_handlers[exit_code](svm); |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0acac81f198b..a8b76c4c95e2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -2659,12 +2659,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2659 | default: | 2659 | default: |
2660 | msr = find_msr_entry(vmx, msr_index); | 2660 | msr = find_msr_entry(vmx, msr_index); |
2661 | if (msr) { | 2661 | if (msr) { |
2662 | u64 old_msr_data = msr->data; | ||
2662 | msr->data = data; | 2663 | msr->data = data; |
2663 | if (msr - vmx->guest_msrs < vmx->save_nmsrs) { | 2664 | if (msr - vmx->guest_msrs < vmx->save_nmsrs) { |
2664 | preempt_disable(); | 2665 | preempt_disable(); |
2665 | kvm_set_shared_msr(msr->index, msr->data, | 2666 | ret = kvm_set_shared_msr(msr->index, msr->data, |
2666 | msr->mask); | 2667 | msr->mask); |
2667 | preempt_enable(); | 2668 | preempt_enable(); |
2669 | if (ret) | ||
2670 | msr->data = old_msr_data; | ||
2668 | } | 2671 | } |
2669 | break; | 2672 | break; |
2670 | } | 2673 | } |
@@ -5291,7 +5294,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu) | |||
5291 | msr.data = data; | 5294 | msr.data = data; |
5292 | msr.index = ecx; | 5295 | msr.index = ecx; |
5293 | msr.host_initiated = false; | 5296 | msr.host_initiated = false; |
5294 | if (vmx_set_msr(vcpu, &msr) != 0) { | 5297 | if (kvm_set_msr(vcpu, &msr) != 0) { |
5295 | trace_kvm_msr_write_ex(ecx, data); | 5298 | trace_kvm_msr_write_ex(ecx, data); |
5296 | kvm_inject_gp(vcpu, 0); | 5299 | kvm_inject_gp(vcpu, 0); |
5297 | return 1; | 5300 | return 1; |
@@ -6743,6 +6746,12 @@ static int handle_invept(struct kvm_vcpu *vcpu) | |||
6743 | return 1; | 6746 | return 1; |
6744 | } | 6747 | } |
6745 | 6748 | ||
6749 | static int handle_invvpid(struct kvm_vcpu *vcpu) | ||
6750 | { | ||
6751 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
6752 | return 1; | ||
6753 | } | ||
6754 | |||
6746 | /* | 6755 | /* |
6747 | * The exit handlers return 1 if the exit was handled fully and guest execution | 6756 | * The exit handlers return 1 if the exit was handled fully and guest execution |
6748 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 6757 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -6788,6 +6797,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
6788 | [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, | 6797 | [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, |
6789 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, | 6798 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, |
6790 | [EXIT_REASON_INVEPT] = handle_invept, | 6799 | [EXIT_REASON_INVEPT] = handle_invept, |
6800 | [EXIT_REASON_INVVPID] = handle_invvpid, | ||
6791 | }; | 6801 | }; |
6792 | 6802 | ||
6793 | static const int kvm_vmx_max_exit_handlers = | 6803 | static const int kvm_vmx_max_exit_handlers = |
@@ -7023,7 +7033,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
7023 | case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: | 7033 | case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: |
7024 | case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: | 7034 | case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: |
7025 | case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: | 7035 | case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: |
7026 | case EXIT_REASON_INVEPT: | 7036 | case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID: |
7027 | /* | 7037 | /* |
7028 | * VMX instructions trap unconditionally. This allows L1 to | 7038 | * VMX instructions trap unconditionally. This allows L1 to |
7029 | * emulate them for its L2 guest, i.e., allows 3-level nesting! | 7039 | * emulate them for its L2 guest, i.e., allows 3-level nesting! |
@@ -7164,10 +7174,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
7164 | && kvm_vmx_exit_handlers[exit_reason]) | 7174 | && kvm_vmx_exit_handlers[exit_reason]) |
7165 | return kvm_vmx_exit_handlers[exit_reason](vcpu); | 7175 | return kvm_vmx_exit_handlers[exit_reason](vcpu); |
7166 | else { | 7176 | else { |
7167 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; | 7177 | WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason); |
7168 | vcpu->run->hw.hardware_exit_reason = exit_reason; | 7178 | kvm_queue_exception(vcpu, UD_VECTOR); |
7179 | return 1; | ||
7169 | } | 7180 | } |
7170 | return 0; | ||
7171 | } | 7181 | } |
7172 | 7182 | ||
7173 | static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | 7183 | static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 34c8f94331f8..0033df32a745 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -229,20 +229,25 @@ static void kvm_shared_msr_cpu_online(void) | |||
229 | shared_msr_update(i, shared_msrs_global.msrs[i]); | 229 | shared_msr_update(i, shared_msrs_global.msrs[i]); |
230 | } | 230 | } |
231 | 231 | ||
232 | void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) | 232 | int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) |
233 | { | 233 | { |
234 | unsigned int cpu = smp_processor_id(); | 234 | unsigned int cpu = smp_processor_id(); |
235 | struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); | 235 | struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); |
236 | int err; | ||
236 | 237 | ||
237 | if (((value ^ smsr->values[slot].curr) & mask) == 0) | 238 | if (((value ^ smsr->values[slot].curr) & mask) == 0) |
238 | return; | 239 | return 0; |
239 | smsr->values[slot].curr = value; | 240 | smsr->values[slot].curr = value; |
240 | wrmsrl(shared_msrs_global.msrs[slot], value); | 241 | err = wrmsrl_safe(shared_msrs_global.msrs[slot], value); |
242 | if (err) | ||
243 | return 1; | ||
244 | |||
241 | if (!smsr->registered) { | 245 | if (!smsr->registered) { |
242 | smsr->urn.on_user_return = kvm_on_user_return; | 246 | smsr->urn.on_user_return = kvm_on_user_return; |
243 | user_return_notifier_register(&smsr->urn); | 247 | user_return_notifier_register(&smsr->urn); |
244 | smsr->registered = true; | 248 | smsr->registered = true; |
245 | } | 249 | } |
250 | return 0; | ||
246 | } | 251 | } |
247 | EXPORT_SYMBOL_GPL(kvm_set_shared_msr); | 252 | EXPORT_SYMBOL_GPL(kvm_set_shared_msr); |
248 | 253 | ||
@@ -987,7 +992,6 @@ void kvm_enable_efer_bits(u64 mask) | |||
987 | } | 992 | } |
988 | EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); | 993 | EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); |
989 | 994 | ||
990 | |||
991 | /* | 995 | /* |
992 | * Writes msr value into into the appropriate "register". | 996 | * Writes msr value into into the appropriate "register". |
993 | * Returns 0 on success, non-0 otherwise. | 997 | * Returns 0 on success, non-0 otherwise. |
@@ -995,8 +999,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); | |||
995 | */ | 999 | */ |
996 | int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) | 1000 | int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) |
997 | { | 1001 | { |
1002 | switch (msr->index) { | ||
1003 | case MSR_FS_BASE: | ||
1004 | case MSR_GS_BASE: | ||
1005 | case MSR_KERNEL_GS_BASE: | ||
1006 | case MSR_CSTAR: | ||
1007 | case MSR_LSTAR: | ||
1008 | if (is_noncanonical_address(msr->data)) | ||
1009 | return 1; | ||
1010 | break; | ||
1011 | case MSR_IA32_SYSENTER_EIP: | ||
1012 | case MSR_IA32_SYSENTER_ESP: | ||
1013 | /* | ||
1014 | * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if | ||
1015 | * non-canonical address is written on Intel but not on | ||
1016 | * AMD (which ignores the top 32-bits, because it does | ||
1017 | * not implement 64-bit SYSENTER). | ||
1018 | * | ||
1019 | * 64-bit code should hence be able to write a non-canonical | ||
1020 | * value on AMD. Making the address canonical ensures that | ||
1021 | * vmentry does not fail on Intel after writing a non-canonical | ||
1022 | * value, and that something deterministic happens if the guest | ||
1023 | * invokes 64-bit SYSENTER. | ||
1024 | */ | ||
1025 | msr->data = get_canonical(msr->data); | ||
1026 | } | ||
998 | return kvm_x86_ops->set_msr(vcpu, msr); | 1027 | return kvm_x86_ops->set_msr(vcpu, msr); |
999 | } | 1028 | } |
1029 | EXPORT_SYMBOL_GPL(kvm_set_msr); | ||
1000 | 1030 | ||
1001 | /* | 1031 | /* |
1002 | * Adapt set_msr() to msr_io()'s calling convention | 1032 | * Adapt set_msr() to msr_io()'s calling convention |
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index f15103dff4b4..d143d216d52b 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c | |||
@@ -40,20 +40,40 @@ void __init efi_bgrt_init(void) | |||
40 | if (ACPI_FAILURE(status)) | 40 | if (ACPI_FAILURE(status)) |
41 | return; | 41 | return; |
42 | 42 | ||
43 | if (bgrt_tab->header.length < sizeof(*bgrt_tab)) | 43 | if (bgrt_tab->header.length < sizeof(*bgrt_tab)) { |
44 | pr_err("Ignoring BGRT: invalid length %u (expected %zu)\n", | ||
45 | bgrt_tab->header.length, sizeof(*bgrt_tab)); | ||
44 | return; | 46 | return; |
45 | if (bgrt_tab->version != 1 || bgrt_tab->status != 1) | 47 | } |
48 | if (bgrt_tab->version != 1) { | ||
49 | pr_err("Ignoring BGRT: invalid version %u (expected 1)\n", | ||
50 | bgrt_tab->version); | ||
51 | return; | ||
52 | } | ||
53 | if (bgrt_tab->status != 1) { | ||
54 | pr_err("Ignoring BGRT: invalid status %u (expected 1)\n", | ||
55 | bgrt_tab->status); | ||
56 | return; | ||
57 | } | ||
58 | if (bgrt_tab->image_type != 0) { | ||
59 | pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n", | ||
60 | bgrt_tab->image_type); | ||
46 | return; | 61 | return; |
47 | if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address) | 62 | } |
63 | if (!bgrt_tab->image_address) { | ||
64 | pr_err("Ignoring BGRT: null image address\n"); | ||
48 | return; | 65 | return; |
66 | } | ||
49 | 67 | ||
50 | image = efi_lookup_mapped_addr(bgrt_tab->image_address); | 68 | image = efi_lookup_mapped_addr(bgrt_tab->image_address); |
51 | if (!image) { | 69 | if (!image) { |
52 | image = early_memremap(bgrt_tab->image_address, | 70 | image = early_memremap(bgrt_tab->image_address, |
53 | sizeof(bmp_header)); | 71 | sizeof(bmp_header)); |
54 | ioremapped = true; | 72 | ioremapped = true; |
55 | if (!image) | 73 | if (!image) { |
74 | pr_err("Ignoring BGRT: failed to map image header memory\n"); | ||
56 | return; | 75 | return; |
76 | } | ||
57 | } | 77 | } |
58 | 78 | ||
59 | memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); | 79 | memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); |
@@ -61,14 +81,18 @@ void __init efi_bgrt_init(void) | |||
61 | early_iounmap(image, sizeof(bmp_header)); | 81 | early_iounmap(image, sizeof(bmp_header)); |
62 | bgrt_image_size = bmp_header.size; | 82 | bgrt_image_size = bmp_header.size; |
63 | 83 | ||
64 | bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); | 84 | bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN); |
65 | if (!bgrt_image) | 85 | if (!bgrt_image) { |
86 | pr_err("Ignoring BGRT: failed to allocate memory for image (wanted %zu bytes)\n", | ||
87 | bgrt_image_size); | ||
66 | return; | 88 | return; |
89 | } | ||
67 | 90 | ||
68 | if (ioremapped) { | 91 | if (ioremapped) { |
69 | image = early_memremap(bgrt_tab->image_address, | 92 | image = early_memremap(bgrt_tab->image_address, |
70 | bmp_header.size); | 93 | bmp_header.size); |
71 | if (!image) { | 94 | if (!image) { |
95 | pr_err("Ignoring BGRT: failed to map image memory\n"); | ||
72 | kfree(bgrt_image); | 96 | kfree(bgrt_image); |
73 | bgrt_image = NULL; | 97 | bgrt_image = NULL; |
74 | return; | 98 | return; |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 850da94fef30..dbc8627a5cdf 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -70,17 +70,7 @@ static efi_config_table_type_t arch_tables[] __initdata = { | |||
70 | 70 | ||
71 | u64 efi_setup; /* efi setup_data physical address */ | 71 | u64 efi_setup; /* efi setup_data physical address */ |
72 | 72 | ||
73 | static bool disable_runtime __initdata = false; | 73 | static int add_efi_memmap __initdata; |
74 | static int __init setup_noefi(char *arg) | ||
75 | { | ||
76 | disable_runtime = true; | ||
77 | return 0; | ||
78 | } | ||
79 | early_param("noefi", setup_noefi); | ||
80 | |||
81 | int add_efi_memmap; | ||
82 | EXPORT_SYMBOL(add_efi_memmap); | ||
83 | |||
84 | static int __init setup_add_efi_memmap(char *arg) | 74 | static int __init setup_add_efi_memmap(char *arg) |
85 | { | 75 | { |
86 | add_efi_memmap = 1; | 76 | add_efi_memmap = 1; |
@@ -96,7 +86,7 @@ static efi_status_t __init phys_efi_set_virtual_address_map( | |||
96 | { | 86 | { |
97 | efi_status_t status; | 87 | efi_status_t status; |
98 | 88 | ||
99 | efi_call_phys_prelog(); | 89 | efi_call_phys_prolog(); |
100 | status = efi_call_phys(efi_phys.set_virtual_address_map, | 90 | status = efi_call_phys(efi_phys.set_virtual_address_map, |
101 | memory_map_size, descriptor_size, | 91 | memory_map_size, descriptor_size, |
102 | descriptor_version, virtual_map); | 92 | descriptor_version, virtual_map); |
@@ -210,9 +200,12 @@ static void __init print_efi_memmap(void) | |||
210 | for (p = memmap.map, i = 0; | 200 | for (p = memmap.map, i = 0; |
211 | p < memmap.map_end; | 201 | p < memmap.map_end; |
212 | p += memmap.desc_size, i++) { | 202 | p += memmap.desc_size, i++) { |
203 | char buf[64]; | ||
204 | |||
213 | md = p; | 205 | md = p; |
214 | pr_info("mem%02u: type=%u, attr=0x%llx, range=[0x%016llx-0x%016llx) (%lluMB)\n", | 206 | pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n", |
215 | i, md->type, md->attribute, md->phys_addr, | 207 | i, efi_md_typeattr_format(buf, sizeof(buf), md), |
208 | md->phys_addr, | ||
216 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), | 209 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), |
217 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); | 210 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); |
218 | } | 211 | } |
@@ -344,9 +337,9 @@ static int __init efi_runtime_init32(void) | |||
344 | } | 337 | } |
345 | 338 | ||
346 | /* | 339 | /* |
347 | * We will only need *early* access to the following two | 340 | * We will only need *early* access to the SetVirtualAddressMap |
348 | * EFI runtime services before set_virtual_address_map | 341 | * EFI runtime service. All other runtime services will be called |
349 | * is invoked. | 342 | * via the virtual mapping. |
350 | */ | 343 | */ |
351 | efi_phys.set_virtual_address_map = | 344 | efi_phys.set_virtual_address_map = |
352 | (efi_set_virtual_address_map_t *) | 345 | (efi_set_virtual_address_map_t *) |
@@ -368,9 +361,9 @@ static int __init efi_runtime_init64(void) | |||
368 | } | 361 | } |
369 | 362 | ||
370 | /* | 363 | /* |
371 | * We will only need *early* access to the following two | 364 | * We will only need *early* access to the SetVirtualAddressMap |
372 | * EFI runtime services before set_virtual_address_map | 365 | * EFI runtime service. All other runtime services will be called |
373 | * is invoked. | 366 | * via the virtual mapping. |
374 | */ | 367 | */ |
375 | efi_phys.set_virtual_address_map = | 368 | efi_phys.set_virtual_address_map = |
376 | (efi_set_virtual_address_map_t *) | 369 | (efi_set_virtual_address_map_t *) |
@@ -492,7 +485,7 @@ void __init efi_init(void) | |||
492 | if (!efi_runtime_supported()) | 485 | if (!efi_runtime_supported()) |
493 | pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); | 486 | pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); |
494 | else { | 487 | else { |
495 | if (disable_runtime || efi_runtime_init()) | 488 | if (efi_runtime_disabled() || efi_runtime_init()) |
496 | return; | 489 | return; |
497 | } | 490 | } |
498 | if (efi_memmap_init()) | 491 | if (efi_memmap_init()) |
@@ -537,7 +530,7 @@ void __init runtime_code_page_mkexec(void) | |||
537 | } | 530 | } |
538 | } | 531 | } |
539 | 532 | ||
540 | void efi_memory_uc(u64 addr, unsigned long size) | 533 | void __init efi_memory_uc(u64 addr, unsigned long size) |
541 | { | 534 | { |
542 | unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; | 535 | unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; |
543 | u64 npages; | 536 | u64 npages; |
@@ -732,6 +725,7 @@ static void __init kexec_enter_virtual_mode(void) | |||
732 | */ | 725 | */ |
733 | if (!efi_is_native()) { | 726 | if (!efi_is_native()) { |
734 | efi_unmap_memmap(); | 727 | efi_unmap_memmap(); |
728 | clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); | ||
735 | return; | 729 | return; |
736 | } | 730 | } |
737 | 731 | ||
@@ -805,6 +799,7 @@ static void __init __efi_enter_virtual_mode(void) | |||
805 | new_memmap = efi_map_regions(&count, &pg_shift); | 799 | new_memmap = efi_map_regions(&count, &pg_shift); |
806 | if (!new_memmap) { | 800 | if (!new_memmap) { |
807 | pr_err("Error reallocating memory, EFI runtime non-functional!\n"); | 801 | pr_err("Error reallocating memory, EFI runtime non-functional!\n"); |
802 | clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); | ||
808 | return; | 803 | return; |
809 | } | 804 | } |
810 | 805 | ||
@@ -812,8 +807,10 @@ static void __init __efi_enter_virtual_mode(void) | |||
812 | 807 | ||
813 | BUG_ON(!efi.systab); | 808 | BUG_ON(!efi.systab); |
814 | 809 | ||
815 | if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) | 810 | if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) { |
811 | clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); | ||
816 | return; | 812 | return; |
813 | } | ||
817 | 814 | ||
818 | efi_sync_low_kernel_mappings(); | 815 | efi_sync_low_kernel_mappings(); |
819 | efi_dump_pagetable(); | 816 | efi_dump_pagetable(); |
@@ -938,14 +935,11 @@ u64 efi_mem_attributes(unsigned long phys_addr) | |||
938 | return 0; | 935 | return 0; |
939 | } | 936 | } |
940 | 937 | ||
941 | static int __init parse_efi_cmdline(char *str) | 938 | static int __init arch_parse_efi_cmdline(char *str) |
942 | { | 939 | { |
943 | if (*str == '=') | 940 | if (parse_option_str(str, "old_map")) |
944 | str++; | ||
945 | |||
946 | if (!strncmp(str, "old_map", 7)) | ||
947 | set_bit(EFI_OLD_MEMMAP, &efi.flags); | 941 | set_bit(EFI_OLD_MEMMAP, &efi.flags); |
948 | 942 | ||
949 | return 0; | 943 | return 0; |
950 | } | 944 | } |
951 | early_param("efi", parse_efi_cmdline); | 945 | early_param("efi", arch_parse_efi_cmdline); |
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 9ee3491e31fb..40e7cda52936 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c | |||
@@ -33,7 +33,7 @@ | |||
33 | 33 | ||
34 | /* | 34 | /* |
35 | * To make EFI call EFI runtime service in physical addressing mode we need | 35 | * To make EFI call EFI runtime service in physical addressing mode we need |
36 | * prelog/epilog before/after the invocation to disable interrupt, to | 36 | * prolog/epilog before/after the invocation to disable interrupt, to |
37 | * claim EFI runtime service handler exclusively and to duplicate a memory in | 37 | * claim EFI runtime service handler exclusively and to duplicate a memory in |
38 | * low memory space say 0 - 3G. | 38 | * low memory space say 0 - 3G. |
39 | */ | 39 | */ |
@@ -41,11 +41,13 @@ static unsigned long efi_rt_eflags; | |||
41 | 41 | ||
42 | void efi_sync_low_kernel_mappings(void) {} | 42 | void efi_sync_low_kernel_mappings(void) {} |
43 | void __init efi_dump_pagetable(void) {} | 43 | void __init efi_dump_pagetable(void) {} |
44 | int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) | 44 | int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) |
45 | { | 45 | { |
46 | return 0; | 46 | return 0; |
47 | } | 47 | } |
48 | void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {} | 48 | void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) |
49 | { | ||
50 | } | ||
49 | 51 | ||
50 | void __init efi_map_region(efi_memory_desc_t *md) | 52 | void __init efi_map_region(efi_memory_desc_t *md) |
51 | { | 53 | { |
@@ -55,7 +57,7 @@ void __init efi_map_region(efi_memory_desc_t *md) | |||
55 | void __init efi_map_region_fixed(efi_memory_desc_t *md) {} | 57 | void __init efi_map_region_fixed(efi_memory_desc_t *md) {} |
56 | void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} | 58 | void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} |
57 | 59 | ||
58 | void efi_call_phys_prelog(void) | 60 | void __init efi_call_phys_prolog(void) |
59 | { | 61 | { |
60 | struct desc_ptr gdt_descr; | 62 | struct desc_ptr gdt_descr; |
61 | 63 | ||
@@ -69,7 +71,7 @@ void efi_call_phys_prelog(void) | |||
69 | load_gdt(&gdt_descr); | 71 | load_gdt(&gdt_descr); |
70 | } | 72 | } |
71 | 73 | ||
72 | void efi_call_phys_epilog(void) | 74 | void __init efi_call_phys_epilog(void) |
73 | { | 75 | { |
74 | struct desc_ptr gdt_descr; | 76 | struct desc_ptr gdt_descr; |
75 | 77 | ||
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 290d397e1dd9..35aecb6042fb 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c | |||
@@ -79,7 +79,7 @@ static void __init early_code_mapping_set_exec(int executable) | |||
79 | } | 79 | } |
80 | } | 80 | } |
81 | 81 | ||
82 | void __init efi_call_phys_prelog(void) | 82 | void __init efi_call_phys_prolog(void) |
83 | { | 83 | { |
84 | unsigned long vaddress; | 84 | unsigned long vaddress; |
85 | int pgd; | 85 | int pgd; |
@@ -139,7 +139,7 @@ void efi_sync_low_kernel_mappings(void) | |||
139 | sizeof(pgd_t) * num_pgds); | 139 | sizeof(pgd_t) * num_pgds); |
140 | } | 140 | } |
141 | 141 | ||
142 | int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) | 142 | int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) |
143 | { | 143 | { |
144 | unsigned long text; | 144 | unsigned long text; |
145 | struct page *page; | 145 | struct page *page; |
@@ -192,7 +192,7 @@ int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) | |||
192 | return 0; | 192 | return 0; |
193 | } | 193 | } |
194 | 194 | ||
195 | void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) | 195 | void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) |
196 | { | 196 | { |
197 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); | 197 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); |
198 | 198 | ||
diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S index fbe66e626c09..040192b50d02 100644 --- a/arch/x86/platform/efi/efi_stub_32.S +++ b/arch/x86/platform/efi/efi_stub_32.S | |||
@@ -27,13 +27,13 @@ ENTRY(efi_call_phys) | |||
27 | * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found | 27 | * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found |
28 | * the values of these registers are the same. And, the corresponding | 28 | * the values of these registers are the same. And, the corresponding |
29 | * GDT entries are identical. So I will do nothing about segment reg | 29 | * GDT entries are identical. So I will do nothing about segment reg |
30 | * and GDT, but change GDT base register in prelog and epilog. | 30 | * and GDT, but change GDT base register in prolog and epilog. |
31 | */ | 31 | */ |
32 | 32 | ||
33 | /* | 33 | /* |
34 | * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET. | 34 | * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET. |
35 | * But to make it smoothly switch from virtual mode to flat mode. | 35 | * But to make it smoothly switch from virtual mode to flat mode. |
36 | * The mapping of lower virtual memory has been created in prelog and | 36 | * The mapping of lower virtual memory has been created in prolog and |
37 | * epilog. | 37 | * epilog. |
38 | */ | 38 | */ |
39 | movl $1f, %edx | 39 | movl $1f, %edx |
diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h index 46aa25c8ce06..3c1c3866d82b 100644 --- a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h +++ b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h | |||
@@ -10,10 +10,9 @@ | |||
10 | */ | 10 | */ |
11 | 11 | ||
12 | 12 | ||
13 | /* __attribute__((weak)) makes these declarations overridable */ | ||
14 | /* For every CPU addition a new get_<cpuname>_ops interface needs | 13 | /* For every CPU addition a new get_<cpuname>_ops interface needs |
15 | * to be added. | 14 | * to be added. |
16 | */ | 15 | */ |
17 | extern void *get_penwell_ops(void) __attribute__((weak)); | 16 | extern void *get_penwell_ops(void); |
18 | extern void *get_cloverview_ops(void) __attribute__((weak)); | 17 | extern void *get_cloverview_ops(void); |
19 | extern void *get_tangier_ops(void) __attribute__((weak)); | 18 | extern void *get_tangier_ops(void); |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 1a3f0445432a..fac5e4f9607c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1636,9 +1636,6 @@ asmlinkage __visible void __init xen_start_kernel(void) | |||
1636 | xen_raw_console_write("mapping kernel into physical memory\n"); | 1636 | xen_raw_console_write("mapping kernel into physical memory\n"); |
1637 | xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); | 1637 | xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); |
1638 | 1638 | ||
1639 | /* Allocate and initialize top and mid mfn levels for p2m structure */ | ||
1640 | xen_build_mfn_list_list(); | ||
1641 | |||
1642 | /* keep using Xen gdt for now; no urgent need to change it */ | 1639 | /* keep using Xen gdt for now; no urgent need to change it */ |
1643 | 1640 | ||
1644 | #ifdef CONFIG_X86_32 | 1641 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f62af7647ec9..a8a1a3d08d4d 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1217,10 +1217,13 @@ static void __init xen_pagetable_p2m_copy(void) | |||
1217 | static void __init xen_pagetable_init(void) | 1217 | static void __init xen_pagetable_init(void) |
1218 | { | 1218 | { |
1219 | paging_init(); | 1219 | paging_init(); |
1220 | xen_setup_shared_info(); | ||
1221 | #ifdef CONFIG_X86_64 | 1220 | #ifdef CONFIG_X86_64 |
1222 | xen_pagetable_p2m_copy(); | 1221 | xen_pagetable_p2m_copy(); |
1223 | #endif | 1222 | #endif |
1223 | /* Allocate and initialize top and mid mfn levels for p2m structure */ | ||
1224 | xen_build_mfn_list_list(); | ||
1225 | |||
1226 | xen_setup_shared_info(); | ||
1224 | xen_post_allocator_init(); | 1227 | xen_post_allocator_init(); |
1225 | } | 1228 | } |
1226 | static void xen_write_cr2(unsigned long cr2) | 1229 | static void xen_write_cr2(unsigned long cr2) |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 9f5983b01ed9..b456b048eca9 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -163,6 +163,7 @@ | |||
163 | #include <linux/hash.h> | 163 | #include <linux/hash.h> |
164 | #include <linux/sched.h> | 164 | #include <linux/sched.h> |
165 | #include <linux/seq_file.h> | 165 | #include <linux/seq_file.h> |
166 | #include <linux/bootmem.h> | ||
166 | 167 | ||
167 | #include <asm/cache.h> | 168 | #include <asm/cache.h> |
168 | #include <asm/setup.h> | 169 | #include <asm/setup.h> |
@@ -181,21 +182,20 @@ static void __init m2p_override_init(void); | |||
181 | 182 | ||
182 | unsigned long xen_max_p2m_pfn __read_mostly; | 183 | unsigned long xen_max_p2m_pfn __read_mostly; |
183 | 184 | ||
185 | static unsigned long *p2m_mid_missing_mfn; | ||
186 | static unsigned long *p2m_top_mfn; | ||
187 | static unsigned long **p2m_top_mfn_p; | ||
188 | |||
184 | /* Placeholders for holes in the address space */ | 189 | /* Placeholders for holes in the address space */ |
185 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); | 190 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); |
186 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); | 191 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); |
187 | static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE); | ||
188 | 192 | ||
189 | static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); | 193 | static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); |
190 | static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); | ||
191 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); | ||
192 | 194 | ||
193 | static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); | 195 | static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); |
194 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE); | 196 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE); |
195 | static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE); | ||
196 | 197 | ||
197 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | 198 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); |
198 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
199 | 199 | ||
200 | /* For each I/O range remapped we may lose up to two leaf pages for the boundary | 200 | /* For each I/O range remapped we may lose up to two leaf pages for the boundary |
201 | * violations and three mid pages to cover up to 3GB. With | 201 | * violations and three mid pages to cover up to 3GB. With |
@@ -272,11 +272,11 @@ static void p2m_init(unsigned long *p2m) | |||
272 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures | 272 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures |
273 | * | 273 | * |
274 | * This is called both at boot time, and after resuming from suspend: | 274 | * This is called both at boot time, and after resuming from suspend: |
275 | * - At boot time we're called very early, and must use extend_brk() | 275 | * - At boot time we're called rather early, and must use alloc_bootmem*() |
276 | * to allocate memory. | 276 | * to allocate memory. |
277 | * | 277 | * |
278 | * - After resume we're called from within stop_machine, but the mfn | 278 | * - After resume we're called from within stop_machine, but the mfn |
279 | * tree should alreay be completely allocated. | 279 | * tree should already be completely allocated. |
280 | */ | 280 | */ |
281 | void __ref xen_build_mfn_list_list(void) | 281 | void __ref xen_build_mfn_list_list(void) |
282 | { | 282 | { |
@@ -287,20 +287,17 @@ void __ref xen_build_mfn_list_list(void) | |||
287 | 287 | ||
288 | /* Pre-initialize p2m_top_mfn to be completely missing */ | 288 | /* Pre-initialize p2m_top_mfn to be completely missing */ |
289 | if (p2m_top_mfn == NULL) { | 289 | if (p2m_top_mfn == NULL) { |
290 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | 290 | p2m_mid_missing_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); |
291 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); | 291 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
292 | p2m_mid_identity_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
293 | p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity); | ||
294 | 292 | ||
295 | p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | 293 | p2m_top_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); |
296 | p2m_top_mfn_p_init(p2m_top_mfn_p); | 294 | p2m_top_mfn_p_init(p2m_top_mfn_p); |
297 | 295 | ||
298 | p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | 296 | p2m_top_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); |
299 | p2m_top_mfn_init(p2m_top_mfn); | 297 | p2m_top_mfn_init(p2m_top_mfn); |
300 | } else { | 298 | } else { |
301 | /* Reinitialise, mfn's all change after migration */ | 299 | /* Reinitialise, mfn's all change after migration */ |
302 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); | 300 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
303 | p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity); | ||
304 | } | 301 | } |
305 | 302 | ||
306 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { | 303 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { |
@@ -328,10 +325,9 @@ void __ref xen_build_mfn_list_list(void) | |||
328 | /* | 325 | /* |
329 | * XXX boot-time only! We should never find | 326 | * XXX boot-time only! We should never find |
330 | * missing parts of the mfn tree after | 327 | * missing parts of the mfn tree after |
331 | * runtime. extend_brk() will BUG if we call | 328 | * runtime. |
332 | * it too late. | ||
333 | */ | 329 | */ |
334 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | 330 | mid_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); |
335 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); | 331 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); |
336 | 332 | ||
337 | p2m_top_mfn_p[topidx] = mid_mfn_p; | 333 | p2m_top_mfn_p[topidx] = mid_mfn_p; |
@@ -415,7 +411,6 @@ void __init xen_build_dynamic_phys_to_machine(void) | |||
415 | m2p_override_init(); | 411 | m2p_override_init(); |
416 | } | 412 | } |
417 | #ifdef CONFIG_X86_64 | 413 | #ifdef CONFIG_X86_64 |
418 | #include <linux/bootmem.h> | ||
419 | unsigned long __init xen_revector_p2m_tree(void) | 414 | unsigned long __init xen_revector_p2m_tree(void) |
420 | { | 415 | { |
421 | unsigned long va_start; | 416 | unsigned long va_start; |
@@ -477,7 +472,6 @@ unsigned long __init xen_revector_p2m_tree(void) | |||
477 | 472 | ||
478 | copy_page(new, mid_p); | 473 | copy_page(new, mid_p); |
479 | p2m_top[topidx][mididx] = &mfn_list[pfn_free]; | 474 | p2m_top[topidx][mididx] = &mfn_list[pfn_free]; |
480 | p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]); | ||
481 | 475 | ||
482 | pfn_free += P2M_PER_PAGE; | 476 | pfn_free += P2M_PER_PAGE; |
483 | 477 | ||
@@ -538,12 +532,13 @@ static bool alloc_p2m(unsigned long pfn) | |||
538 | unsigned topidx, mididx; | 532 | unsigned topidx, mididx; |
539 | unsigned long ***top_p, **mid; | 533 | unsigned long ***top_p, **mid; |
540 | unsigned long *top_mfn_p, *mid_mfn; | 534 | unsigned long *top_mfn_p, *mid_mfn; |
535 | unsigned long *p2m_orig; | ||
541 | 536 | ||
542 | topidx = p2m_top_index(pfn); | 537 | topidx = p2m_top_index(pfn); |
543 | mididx = p2m_mid_index(pfn); | 538 | mididx = p2m_mid_index(pfn); |
544 | 539 | ||
545 | top_p = &p2m_top[topidx]; | 540 | top_p = &p2m_top[topidx]; |
546 | mid = *top_p; | 541 | mid = ACCESS_ONCE(*top_p); |
547 | 542 | ||
548 | if (mid == p2m_mid_missing) { | 543 | if (mid == p2m_mid_missing) { |
549 | /* Mid level is missing, allocate a new one */ | 544 | /* Mid level is missing, allocate a new one */ |
@@ -558,7 +553,7 @@ static bool alloc_p2m(unsigned long pfn) | |||
558 | } | 553 | } |
559 | 554 | ||
560 | top_mfn_p = &p2m_top_mfn[topidx]; | 555 | top_mfn_p = &p2m_top_mfn[topidx]; |
561 | mid_mfn = p2m_top_mfn_p[topidx]; | 556 | mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); |
562 | 557 | ||
563 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); | 558 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); |
564 | 559 | ||
@@ -566,6 +561,7 @@ static bool alloc_p2m(unsigned long pfn) | |||
566 | /* Separately check the mid mfn level */ | 561 | /* Separately check the mid mfn level */ |
567 | unsigned long missing_mfn; | 562 | unsigned long missing_mfn; |
568 | unsigned long mid_mfn_mfn; | 563 | unsigned long mid_mfn_mfn; |
564 | unsigned long old_mfn; | ||
569 | 565 | ||
570 | mid_mfn = alloc_p2m_page(); | 566 | mid_mfn = alloc_p2m_page(); |
571 | if (!mid_mfn) | 567 | if (!mid_mfn) |
@@ -575,17 +571,19 @@ static bool alloc_p2m(unsigned long pfn) | |||
575 | 571 | ||
576 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | 572 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); |
577 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | 573 | mid_mfn_mfn = virt_to_mfn(mid_mfn); |
578 | if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) | 574 | old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); |
575 | if (old_mfn != missing_mfn) { | ||
579 | free_p2m_page(mid_mfn); | 576 | free_p2m_page(mid_mfn); |
580 | else | 577 | mid_mfn = mfn_to_virt(old_mfn); |
578 | } else { | ||
581 | p2m_top_mfn_p[topidx] = mid_mfn; | 579 | p2m_top_mfn_p[topidx] = mid_mfn; |
580 | } | ||
582 | } | 581 | } |
583 | 582 | ||
584 | if (p2m_top[topidx][mididx] == p2m_identity || | 583 | p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]); |
585 | p2m_top[topidx][mididx] == p2m_missing) { | 584 | if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) { |
586 | /* p2m leaf page is missing */ | 585 | /* p2m leaf page is missing */ |
587 | unsigned long *p2m; | 586 | unsigned long *p2m; |
588 | unsigned long *p2m_orig = p2m_top[topidx][mididx]; | ||
589 | 587 | ||
590 | p2m = alloc_p2m_page(); | 588 | p2m = alloc_p2m_page(); |
591 | if (!p2m) | 589 | if (!p2m) |
@@ -606,7 +604,6 @@ static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary) | |||
606 | { | 604 | { |
607 | unsigned topidx, mididx, idx; | 605 | unsigned topidx, mididx, idx; |
608 | unsigned long *p2m; | 606 | unsigned long *p2m; |
609 | unsigned long *mid_mfn_p; | ||
610 | 607 | ||
611 | topidx = p2m_top_index(pfn); | 608 | topidx = p2m_top_index(pfn); |
612 | mididx = p2m_mid_index(pfn); | 609 | mididx = p2m_mid_index(pfn); |
@@ -633,43 +630,21 @@ static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary) | |||
633 | 630 | ||
634 | p2m_top[topidx][mididx] = p2m; | 631 | p2m_top[topidx][mididx] = p2m; |
635 | 632 | ||
636 | /* For save/restore we need to MFN of the P2M saved */ | ||
637 | |||
638 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
639 | WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing), | ||
640 | "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n", | ||
641 | topidx, mididx); | ||
642 | mid_mfn_p[mididx] = virt_to_mfn(p2m); | ||
643 | |||
644 | return true; | 633 | return true; |
645 | } | 634 | } |
646 | 635 | ||
647 | static bool __init early_alloc_p2m_middle(unsigned long pfn) | 636 | static bool __init early_alloc_p2m_middle(unsigned long pfn) |
648 | { | 637 | { |
649 | unsigned topidx = p2m_top_index(pfn); | 638 | unsigned topidx = p2m_top_index(pfn); |
650 | unsigned long *mid_mfn_p; | ||
651 | unsigned long **mid; | 639 | unsigned long **mid; |
652 | 640 | ||
653 | mid = p2m_top[topidx]; | 641 | mid = p2m_top[topidx]; |
654 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
655 | if (mid == p2m_mid_missing) { | 642 | if (mid == p2m_mid_missing) { |
656 | mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | 643 | mid = extend_brk(PAGE_SIZE, PAGE_SIZE); |
657 | 644 | ||
658 | p2m_mid_init(mid, p2m_missing); | 645 | p2m_mid_init(mid, p2m_missing); |
659 | 646 | ||
660 | p2m_top[topidx] = mid; | 647 | p2m_top[topidx] = mid; |
661 | |||
662 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
663 | } | ||
664 | /* And the save/restore P2M tables.. */ | ||
665 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
666 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
667 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); | ||
668 | |||
669 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
670 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
671 | /* Note: we don't set mid_mfn_p[midix] here, | ||
672 | * look in early_alloc_p2m() */ | ||
673 | } | 648 | } |
674 | return true; | 649 | return true; |
675 | } | 650 | } |
@@ -680,14 +655,13 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn) | |||
680 | * replace the P2M leaf with a p2m_missing or p2m_identity. | 655 | * replace the P2M leaf with a p2m_missing or p2m_identity. |
681 | * Stick the old page in the new P2M tree location. | 656 | * Stick the old page in the new P2M tree location. |
682 | */ | 657 | */ |
683 | bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn) | 658 | static bool __init early_can_reuse_p2m_middle(unsigned long set_pfn) |
684 | { | 659 | { |
685 | unsigned topidx; | 660 | unsigned topidx; |
686 | unsigned mididx; | 661 | unsigned mididx; |
687 | unsigned ident_pfns; | 662 | unsigned ident_pfns; |
688 | unsigned inv_pfns; | 663 | unsigned inv_pfns; |
689 | unsigned long *p2m; | 664 | unsigned long *p2m; |
690 | unsigned long *mid_mfn_p; | ||
691 | unsigned idx; | 665 | unsigned idx; |
692 | unsigned long pfn; | 666 | unsigned long pfn; |
693 | 667 | ||
@@ -733,11 +707,6 @@ bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_ | |||
733 | found: | 707 | found: |
734 | /* Found one, replace old with p2m_identity or p2m_missing */ | 708 | /* Found one, replace old with p2m_identity or p2m_missing */ |
735 | p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); | 709 | p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); |
736 | /* And the other for save/restore.. */ | ||
737 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
738 | /* NOTE: Even if it is a p2m_identity it should still be point to | ||
739 | * a page filled with INVALID_P2M_ENTRY entries. */ | ||
740 | mid_mfn_p[mididx] = virt_to_mfn(p2m_missing); | ||
741 | 710 | ||
742 | /* Reset where we want to stick the old page in. */ | 711 | /* Reset where we want to stick the old page in. */ |
743 | topidx = p2m_top_index(set_pfn); | 712 | topidx = p2m_top_index(set_pfn); |
@@ -752,8 +721,6 @@ found: | |||
752 | 721 | ||
753 | p2m_init(p2m); | 722 | p2m_init(p2m); |
754 | p2m_top[topidx][mididx] = p2m; | 723 | p2m_top[topidx][mididx] = p2m; |
755 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
756 | mid_mfn_p[mididx] = virt_to_mfn(p2m); | ||
757 | 724 | ||
758 | return true; | 725 | return true; |
759 | } | 726 | } |
@@ -763,7 +730,7 @@ bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |||
763 | if (!early_alloc_p2m_middle(pfn)) | 730 | if (!early_alloc_p2m_middle(pfn)) |
764 | return false; | 731 | return false; |
765 | 732 | ||
766 | if (early_can_reuse_p2m_middle(pfn, mfn)) | 733 | if (early_can_reuse_p2m_middle(pfn)) |
767 | return __set_phys_to_machine(pfn, mfn); | 734 | return __set_phys_to_machine(pfn, mfn); |
768 | 735 | ||
769 | if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/)) | 736 | if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/)) |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index af7216128d93..29834b3fd87f 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -595,6 +595,7 @@ char * __init xen_memory_setup(void) | |||
595 | rc = 0; | 595 | rc = 0; |
596 | } | 596 | } |
597 | BUG_ON(rc); | 597 | BUG_ON(rc); |
598 | BUG_ON(memmap.nr_entries == 0); | ||
598 | 599 | ||
599 | /* | 600 | /* |
600 | * Xen won't allow a 1:1 mapping to be created to UNUSABLE | 601 | * Xen won't allow a 1:1 mapping to be created to UNUSABLE |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index a1d430b112b3..f473d268d387 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -158,7 +158,7 @@ cycle_t xen_clocksource_read(void) | |||
158 | cycle_t ret; | 158 | cycle_t ret; |
159 | 159 | ||
160 | preempt_disable_notrace(); | 160 | preempt_disable_notrace(); |
161 | src = this_cpu_ptr(&xen_vcpu->time); | 161 | src = &__this_cpu_read(xen_vcpu)->time; |
162 | ret = pvclock_clocksource_read(src); | 162 | ret = pvclock_clocksource_read(src); |
163 | preempt_enable_notrace(); | 163 | preempt_enable_notrace(); |
164 | return ret; | 164 | return ret; |