diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/boot/compressed/eboot.c | 32 | ||||
-rw-r--r-- | arch/x86/include/asm/efi.h | 31 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 16 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/vmx.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 250 | ||||
-rw-r--r-- | arch/x86/kvm/i8254.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 24 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 38 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi-bgrt.c | 36 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi.c | 52 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_32.c | 12 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_64.c | 6 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_stub_32.S | 4 | ||||
-rw-r--r-- | arch/x86/platform/intel-mid/intel_mid_weak_decls.h | 7 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 3 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 5 | ||||
-rw-r--r-- | arch/x86/xen/p2m.c | 83 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 1 | ||||
-rw-r--r-- | arch/x86/xen/time.c | 2 |
21 files changed, 395 insertions, 221 deletions
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index de8eebd6f67c..1acf605a646d 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c | |||
@@ -330,8 +330,10 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom) | |||
330 | size = pci->romsize + sizeof(*rom); | 330 | size = pci->romsize + sizeof(*rom); |
331 | 331 | ||
332 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); | 332 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); |
333 | if (status != EFI_SUCCESS) | 333 | if (status != EFI_SUCCESS) { |
334 | efi_printk(sys_table, "Failed to alloc mem for rom\n"); | ||
334 | return status; | 335 | return status; |
336 | } | ||
335 | 337 | ||
336 | memset(rom, 0, sizeof(*rom)); | 338 | memset(rom, 0, sizeof(*rom)); |
337 | 339 | ||
@@ -344,14 +346,18 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom) | |||
344 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, | 346 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, |
345 | PCI_VENDOR_ID, 1, &(rom->vendor)); | 347 | PCI_VENDOR_ID, 1, &(rom->vendor)); |
346 | 348 | ||
347 | if (status != EFI_SUCCESS) | 349 | if (status != EFI_SUCCESS) { |
350 | efi_printk(sys_table, "Failed to read rom->vendor\n"); | ||
348 | goto free_struct; | 351 | goto free_struct; |
352 | } | ||
349 | 353 | ||
350 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, | 354 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, |
351 | PCI_DEVICE_ID, 1, &(rom->devid)); | 355 | PCI_DEVICE_ID, 1, &(rom->devid)); |
352 | 356 | ||
353 | if (status != EFI_SUCCESS) | 357 | if (status != EFI_SUCCESS) { |
358 | efi_printk(sys_table, "Failed to read rom->devid\n"); | ||
354 | goto free_struct; | 359 | goto free_struct; |
360 | } | ||
355 | 361 | ||
356 | status = efi_early->call(pci->get_location, pci, &(rom->segment), | 362 | status = efi_early->call(pci->get_location, pci, &(rom->segment), |
357 | &(rom->bus), &(rom->device), &(rom->function)); | 363 | &(rom->bus), &(rom->device), &(rom->function)); |
@@ -432,8 +438,10 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom) | |||
432 | size = pci->romsize + sizeof(*rom); | 438 | size = pci->romsize + sizeof(*rom); |
433 | 439 | ||
434 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); | 440 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); |
435 | if (status != EFI_SUCCESS) | 441 | if (status != EFI_SUCCESS) { |
442 | efi_printk(sys_table, "Failed to alloc mem for rom\n"); | ||
436 | return status; | 443 | return status; |
444 | } | ||
437 | 445 | ||
438 | rom->data.type = SETUP_PCI; | 446 | rom->data.type = SETUP_PCI; |
439 | rom->data.len = size - sizeof(struct setup_data); | 447 | rom->data.len = size - sizeof(struct setup_data); |
@@ -444,14 +452,18 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom) | |||
444 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, | 452 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, |
445 | PCI_VENDOR_ID, 1, &(rom->vendor)); | 453 | PCI_VENDOR_ID, 1, &(rom->vendor)); |
446 | 454 | ||
447 | if (status != EFI_SUCCESS) | 455 | if (status != EFI_SUCCESS) { |
456 | efi_printk(sys_table, "Failed to read rom->vendor\n"); | ||
448 | goto free_struct; | 457 | goto free_struct; |
458 | } | ||
449 | 459 | ||
450 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, | 460 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, |
451 | PCI_DEVICE_ID, 1, &(rom->devid)); | 461 | PCI_DEVICE_ID, 1, &(rom->devid)); |
452 | 462 | ||
453 | if (status != EFI_SUCCESS) | 463 | if (status != EFI_SUCCESS) { |
464 | efi_printk(sys_table, "Failed to read rom->devid\n"); | ||
454 | goto free_struct; | 465 | goto free_struct; |
466 | } | ||
455 | 467 | ||
456 | status = efi_early->call(pci->get_location, pci, &(rom->segment), | 468 | status = efi_early->call(pci->get_location, pci, &(rom->segment), |
457 | &(rom->bus), &(rom->device), &(rom->function)); | 469 | &(rom->bus), &(rom->device), &(rom->function)); |
@@ -538,8 +550,10 @@ static void setup_efi_pci(struct boot_params *params) | |||
538 | EFI_LOADER_DATA, | 550 | EFI_LOADER_DATA, |
539 | size, (void **)&pci_handle); | 551 | size, (void **)&pci_handle); |
540 | 552 | ||
541 | if (status != EFI_SUCCESS) | 553 | if (status != EFI_SUCCESS) { |
554 | efi_printk(sys_table, "Failed to alloc mem for pci_handle\n"); | ||
542 | return; | 555 | return; |
556 | } | ||
543 | 557 | ||
544 | status = efi_call_early(locate_handle, | 558 | status = efi_call_early(locate_handle, |
545 | EFI_LOCATE_BY_PROTOCOL, &pci_proto, | 559 | EFI_LOCATE_BY_PROTOCOL, &pci_proto, |
@@ -1105,6 +1119,10 @@ struct boot_params *make_boot_params(struct efi_config *c) | |||
1105 | 1119 | ||
1106 | memset(sdt, 0, sizeof(*sdt)); | 1120 | memset(sdt, 0, sizeof(*sdt)); |
1107 | 1121 | ||
1122 | status = efi_parse_options(cmdline_ptr); | ||
1123 | if (status != EFI_SUCCESS) | ||
1124 | goto fail2; | ||
1125 | |||
1108 | status = handle_cmdline_files(sys_table, image, | 1126 | status = handle_cmdline_files(sys_table, image, |
1109 | (char *)(unsigned long)hdr->cmd_line_ptr, | 1127 | (char *)(unsigned long)hdr->cmd_line_ptr, |
1110 | "initrd=", hdr->initrd_addr_max, | 1128 | "initrd=", hdr->initrd_addr_max, |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 0ec241ede5a2..9b11757975d0 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
@@ -81,24 +81,23 @@ extern u64 asmlinkage efi_call(void *fp, ...); | |||
81 | */ | 81 | */ |
82 | #define __efi_call_virt(f, args...) efi_call_virt(f, args) | 82 | #define __efi_call_virt(f, args...) efi_call_virt(f, args) |
83 | 83 | ||
84 | extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, | 84 | extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, |
85 | u32 type, u64 attribute); | 85 | u32 type, u64 attribute); |
86 | 86 | ||
87 | #endif /* CONFIG_X86_32 */ | 87 | #endif /* CONFIG_X86_32 */ |
88 | 88 | ||
89 | extern int add_efi_memmap; | ||
90 | extern struct efi_scratch efi_scratch; | 89 | extern struct efi_scratch efi_scratch; |
91 | extern void efi_set_executable(efi_memory_desc_t *md, bool executable); | 90 | extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); |
92 | extern int efi_memblock_x86_reserve_range(void); | 91 | extern int __init efi_memblock_x86_reserve_range(void); |
93 | extern void efi_call_phys_prelog(void); | 92 | extern void __init efi_call_phys_prolog(void); |
94 | extern void efi_call_phys_epilog(void); | 93 | extern void __init efi_call_phys_epilog(void); |
95 | extern void efi_unmap_memmap(void); | 94 | extern void __init efi_unmap_memmap(void); |
96 | extern void efi_memory_uc(u64 addr, unsigned long size); | 95 | extern void __init efi_memory_uc(u64 addr, unsigned long size); |
97 | extern void __init efi_map_region(efi_memory_desc_t *md); | 96 | extern void __init efi_map_region(efi_memory_desc_t *md); |
98 | extern void __init efi_map_region_fixed(efi_memory_desc_t *md); | 97 | extern void __init efi_map_region_fixed(efi_memory_desc_t *md); |
99 | extern void efi_sync_low_kernel_mappings(void); | 98 | extern void efi_sync_low_kernel_mappings(void); |
100 | extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); | 99 | extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); |
101 | extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); | 100 | extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); |
102 | extern void __init old_map_region(efi_memory_desc_t *md); | 101 | extern void __init old_map_region(efi_memory_desc_t *md); |
103 | extern void __init runtime_code_page_mkexec(void); | 102 | extern void __init runtime_code_page_mkexec(void); |
104 | extern void __init efi_runtime_mkexec(void); | 103 | extern void __init efi_runtime_mkexec(void); |
@@ -162,16 +161,6 @@ static inline efi_status_t efi_thunk_set_virtual_address_map( | |||
162 | extern bool efi_reboot_required(void); | 161 | extern bool efi_reboot_required(void); |
163 | 162 | ||
164 | #else | 163 | #else |
165 | /* | ||
166 | * IF EFI is not configured, have the EFI calls return -ENOSYS. | ||
167 | */ | ||
168 | #define efi_call0(_f) (-ENOSYS) | ||
169 | #define efi_call1(_f, _a1) (-ENOSYS) | ||
170 | #define efi_call2(_f, _a1, _a2) (-ENOSYS) | ||
171 | #define efi_call3(_f, _a1, _a2, _a3) (-ENOSYS) | ||
172 | #define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS) | ||
173 | #define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS) | ||
174 | #define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS) | ||
175 | static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} | 164 | static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} |
176 | static inline bool efi_reboot_required(void) | 165 | static inline bool efi_reboot_required(void) |
177 | { | 166 | { |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7d603a71ab3a..6ed0c30d6a0c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -989,6 +989,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) | |||
989 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); | 989 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); |
990 | } | 990 | } |
991 | 991 | ||
992 | static inline u64 get_canonical(u64 la) | ||
993 | { | ||
994 | return ((int64_t)la << 16) >> 16; | ||
995 | } | ||
996 | |||
997 | static inline bool is_noncanonical_address(u64 la) | ||
998 | { | ||
999 | #ifdef CONFIG_X86_64 | ||
1000 | return get_canonical(la) != la; | ||
1001 | #else | ||
1002 | return false; | ||
1003 | #endif | ||
1004 | } | ||
1005 | |||
992 | #define TSS_IOPB_BASE_OFFSET 0x66 | 1006 | #define TSS_IOPB_BASE_OFFSET 0x66 |
993 | #define TSS_BASE_SIZE 0x68 | 1007 | #define TSS_BASE_SIZE 0x68 |
994 | #define TSS_IOPB_SIZE (65536 / 8) | 1008 | #define TSS_IOPB_SIZE (65536 / 8) |
@@ -1050,7 +1064,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, | |||
1050 | unsigned long address); | 1064 | unsigned long address); |
1051 | 1065 | ||
1052 | void kvm_define_shared_msr(unsigned index, u32 msr); | 1066 | void kvm_define_shared_msr(unsigned index, u32 msr); |
1053 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | 1067 | int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); |
1054 | 1068 | ||
1055 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); | 1069 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); |
1056 | 1070 | ||
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 0e79420376eb..990a2fe1588d 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h | |||
@@ -67,6 +67,7 @@ | |||
67 | #define EXIT_REASON_EPT_MISCONFIG 49 | 67 | #define EXIT_REASON_EPT_MISCONFIG 49 |
68 | #define EXIT_REASON_INVEPT 50 | 68 | #define EXIT_REASON_INVEPT 50 |
69 | #define EXIT_REASON_PREEMPTION_TIMER 52 | 69 | #define EXIT_REASON_PREEMPTION_TIMER 52 |
70 | #define EXIT_REASON_INVVPID 53 | ||
70 | #define EXIT_REASON_WBINVD 54 | 71 | #define EXIT_REASON_WBINVD 54 |
71 | #define EXIT_REASON_XSETBV 55 | 72 | #define EXIT_REASON_XSETBV 55 |
72 | #define EXIT_REASON_APIC_WRITE 56 | 73 | #define EXIT_REASON_APIC_WRITE 56 |
@@ -114,6 +115,7 @@ | |||
114 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ | 115 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ |
115 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ | 116 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ |
116 | { EXIT_REASON_INVD, "INVD" }, \ | 117 | { EXIT_REASON_INVD, "INVD" }, \ |
118 | { EXIT_REASON_INVVPID, "INVVPID" }, \ | ||
117 | { EXIT_REASON_INVPCID, "INVPCID" } | 119 | { EXIT_REASON_INVPCID, "INVPCID" } |
118 | 120 | ||
119 | #endif /* _UAPIVMX_H */ | 121 | #endif /* _UAPIVMX_H */ |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a46207a05835..749f9fa38254 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -504,11 +504,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) | |||
504 | masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); | 504 | masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); |
505 | } | 505 | } |
506 | 506 | ||
507 | static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) | ||
508 | { | ||
509 | register_address_increment(ctxt, &ctxt->_eip, rel); | ||
510 | } | ||
511 | |||
512 | static u32 desc_limit_scaled(struct desc_struct *desc) | 507 | static u32 desc_limit_scaled(struct desc_struct *desc) |
513 | { | 508 | { |
514 | u32 limit = get_desc_limit(desc); | 509 | u32 limit = get_desc_limit(desc); |
@@ -569,6 +564,38 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) | |||
569 | return emulate_exception(ctxt, NM_VECTOR, 0, false); | 564 | return emulate_exception(ctxt, NM_VECTOR, 0, false); |
570 | } | 565 | } |
571 | 566 | ||
567 | static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, | ||
568 | int cs_l) | ||
569 | { | ||
570 | switch (ctxt->op_bytes) { | ||
571 | case 2: | ||
572 | ctxt->_eip = (u16)dst; | ||
573 | break; | ||
574 | case 4: | ||
575 | ctxt->_eip = (u32)dst; | ||
576 | break; | ||
577 | case 8: | ||
578 | if ((cs_l && is_noncanonical_address(dst)) || | ||
579 | (!cs_l && (dst & ~(u32)-1))) | ||
580 | return emulate_gp(ctxt, 0); | ||
581 | ctxt->_eip = dst; | ||
582 | break; | ||
583 | default: | ||
584 | WARN(1, "unsupported eip assignment size\n"); | ||
585 | } | ||
586 | return X86EMUL_CONTINUE; | ||
587 | } | ||
588 | |||
589 | static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) | ||
590 | { | ||
591 | return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64); | ||
592 | } | ||
593 | |||
594 | static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) | ||
595 | { | ||
596 | return assign_eip_near(ctxt, ctxt->_eip + rel); | ||
597 | } | ||
598 | |||
572 | static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) | 599 | static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) |
573 | { | 600 | { |
574 | u16 selector; | 601 | u16 selector; |
@@ -751,8 +778,10 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) | |||
751 | static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, | 778 | static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, |
752 | unsigned size) | 779 | unsigned size) |
753 | { | 780 | { |
754 | if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size)) | 781 | unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr; |
755 | return __do_insn_fetch_bytes(ctxt, size); | 782 | |
783 | if (unlikely(done_size < size)) | ||
784 | return __do_insn_fetch_bytes(ctxt, size - done_size); | ||
756 | else | 785 | else |
757 | return X86EMUL_CONTINUE; | 786 | return X86EMUL_CONTINUE; |
758 | } | 787 | } |
@@ -1416,7 +1445,9 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1416 | 1445 | ||
1417 | /* Does not support long mode */ | 1446 | /* Does not support long mode */ |
1418 | static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 1447 | static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, |
1419 | u16 selector, int seg, u8 cpl, bool in_task_switch) | 1448 | u16 selector, int seg, u8 cpl, |
1449 | bool in_task_switch, | ||
1450 | struct desc_struct *desc) | ||
1420 | { | 1451 | { |
1421 | struct desc_struct seg_desc, old_desc; | 1452 | struct desc_struct seg_desc, old_desc; |
1422 | u8 dpl, rpl; | 1453 | u8 dpl, rpl; |
@@ -1557,6 +1588,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1557 | } | 1588 | } |
1558 | load: | 1589 | load: |
1559 | ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); | 1590 | ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); |
1591 | if (desc) | ||
1592 | *desc = seg_desc; | ||
1560 | return X86EMUL_CONTINUE; | 1593 | return X86EMUL_CONTINUE; |
1561 | exception: | 1594 | exception: |
1562 | return emulate_exception(ctxt, err_vec, err_code, true); | 1595 | return emulate_exception(ctxt, err_vec, err_code, true); |
@@ -1566,7 +1599,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1566 | u16 selector, int seg) | 1599 | u16 selector, int seg) |
1567 | { | 1600 | { |
1568 | u8 cpl = ctxt->ops->cpl(ctxt); | 1601 | u8 cpl = ctxt->ops->cpl(ctxt); |
1569 | return __load_segment_descriptor(ctxt, selector, seg, cpl, false); | 1602 | return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL); |
1570 | } | 1603 | } |
1571 | 1604 | ||
1572 | static void write_register_operand(struct operand *op) | 1605 | static void write_register_operand(struct operand *op) |
@@ -1960,17 +1993,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt) | |||
1960 | static int em_jmp_far(struct x86_emulate_ctxt *ctxt) | 1993 | static int em_jmp_far(struct x86_emulate_ctxt *ctxt) |
1961 | { | 1994 | { |
1962 | int rc; | 1995 | int rc; |
1963 | unsigned short sel; | 1996 | unsigned short sel, old_sel; |
1997 | struct desc_struct old_desc, new_desc; | ||
1998 | const struct x86_emulate_ops *ops = ctxt->ops; | ||
1999 | u8 cpl = ctxt->ops->cpl(ctxt); | ||
2000 | |||
2001 | /* Assignment of RIP may only fail in 64-bit mode */ | ||
2002 | if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
2003 | ops->get_segment(ctxt, &old_sel, &old_desc, NULL, | ||
2004 | VCPU_SREG_CS); | ||
1964 | 2005 | ||
1965 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); | 2006 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); |
1966 | 2007 | ||
1967 | rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); | 2008 | rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, |
2009 | &new_desc); | ||
1968 | if (rc != X86EMUL_CONTINUE) | 2010 | if (rc != X86EMUL_CONTINUE) |
1969 | return rc; | 2011 | return rc; |
1970 | 2012 | ||
1971 | ctxt->_eip = 0; | 2013 | rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); |
1972 | memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); | 2014 | if (rc != X86EMUL_CONTINUE) { |
1973 | return X86EMUL_CONTINUE; | 2015 | WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); |
2016 | /* assigning eip failed; restore the old cs */ | ||
2017 | ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS); | ||
2018 | return rc; | ||
2019 | } | ||
2020 | return rc; | ||
1974 | } | 2021 | } |
1975 | 2022 | ||
1976 | static int em_grp45(struct x86_emulate_ctxt *ctxt) | 2023 | static int em_grp45(struct x86_emulate_ctxt *ctxt) |
@@ -1981,13 +2028,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) | |||
1981 | case 2: /* call near abs */ { | 2028 | case 2: /* call near abs */ { |
1982 | long int old_eip; | 2029 | long int old_eip; |
1983 | old_eip = ctxt->_eip; | 2030 | old_eip = ctxt->_eip; |
1984 | ctxt->_eip = ctxt->src.val; | 2031 | rc = assign_eip_near(ctxt, ctxt->src.val); |
2032 | if (rc != X86EMUL_CONTINUE) | ||
2033 | break; | ||
1985 | ctxt->src.val = old_eip; | 2034 | ctxt->src.val = old_eip; |
1986 | rc = em_push(ctxt); | 2035 | rc = em_push(ctxt); |
1987 | break; | 2036 | break; |
1988 | } | 2037 | } |
1989 | case 4: /* jmp abs */ | 2038 | case 4: /* jmp abs */ |
1990 | ctxt->_eip = ctxt->src.val; | 2039 | rc = assign_eip_near(ctxt, ctxt->src.val); |
1991 | break; | 2040 | break; |
1992 | case 5: /* jmp far */ | 2041 | case 5: /* jmp far */ |
1993 | rc = em_jmp_far(ctxt); | 2042 | rc = em_jmp_far(ctxt); |
@@ -2022,30 +2071,47 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) | |||
2022 | 2071 | ||
2023 | static int em_ret(struct x86_emulate_ctxt *ctxt) | 2072 | static int em_ret(struct x86_emulate_ctxt *ctxt) |
2024 | { | 2073 | { |
2025 | ctxt->dst.type = OP_REG; | 2074 | int rc; |
2026 | ctxt->dst.addr.reg = &ctxt->_eip; | 2075 | unsigned long eip; |
2027 | ctxt->dst.bytes = ctxt->op_bytes; | 2076 | |
2028 | return em_pop(ctxt); | 2077 | rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); |
2078 | if (rc != X86EMUL_CONTINUE) | ||
2079 | return rc; | ||
2080 | |||
2081 | return assign_eip_near(ctxt, eip); | ||
2029 | } | 2082 | } |
2030 | 2083 | ||
2031 | static int em_ret_far(struct x86_emulate_ctxt *ctxt) | 2084 | static int em_ret_far(struct x86_emulate_ctxt *ctxt) |
2032 | { | 2085 | { |
2033 | int rc; | 2086 | int rc; |
2034 | unsigned long cs; | 2087 | unsigned long eip, cs; |
2088 | u16 old_cs; | ||
2035 | int cpl = ctxt->ops->cpl(ctxt); | 2089 | int cpl = ctxt->ops->cpl(ctxt); |
2090 | struct desc_struct old_desc, new_desc; | ||
2091 | const struct x86_emulate_ops *ops = ctxt->ops; | ||
2036 | 2092 | ||
2037 | rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); | 2093 | if (ctxt->mode == X86EMUL_MODE_PROT64) |
2094 | ops->get_segment(ctxt, &old_cs, &old_desc, NULL, | ||
2095 | VCPU_SREG_CS); | ||
2096 | |||
2097 | rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); | ||
2038 | if (rc != X86EMUL_CONTINUE) | 2098 | if (rc != X86EMUL_CONTINUE) |
2039 | return rc; | 2099 | return rc; |
2040 | if (ctxt->op_bytes == 4) | ||
2041 | ctxt->_eip = (u32)ctxt->_eip; | ||
2042 | rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); | 2100 | rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); |
2043 | if (rc != X86EMUL_CONTINUE) | 2101 | if (rc != X86EMUL_CONTINUE) |
2044 | return rc; | 2102 | return rc; |
2045 | /* Outer-privilege level return is not implemented */ | 2103 | /* Outer-privilege level return is not implemented */ |
2046 | if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) | 2104 | if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) |
2047 | return X86EMUL_UNHANDLEABLE; | 2105 | return X86EMUL_UNHANDLEABLE; |
2048 | rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); | 2106 | rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false, |
2107 | &new_desc); | ||
2108 | if (rc != X86EMUL_CONTINUE) | ||
2109 | return rc; | ||
2110 | rc = assign_eip_far(ctxt, eip, new_desc.l); | ||
2111 | if (rc != X86EMUL_CONTINUE) { | ||
2112 | WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); | ||
2113 | ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); | ||
2114 | } | ||
2049 | return rc; | 2115 | return rc; |
2050 | } | 2116 | } |
2051 | 2117 | ||
@@ -2306,7 +2372,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) | |||
2306 | { | 2372 | { |
2307 | const struct x86_emulate_ops *ops = ctxt->ops; | 2373 | const struct x86_emulate_ops *ops = ctxt->ops; |
2308 | struct desc_struct cs, ss; | 2374 | struct desc_struct cs, ss; |
2309 | u64 msr_data; | 2375 | u64 msr_data, rcx, rdx; |
2310 | int usermode; | 2376 | int usermode; |
2311 | u16 cs_sel = 0, ss_sel = 0; | 2377 | u16 cs_sel = 0, ss_sel = 0; |
2312 | 2378 | ||
@@ -2322,6 +2388,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) | |||
2322 | else | 2388 | else |
2323 | usermode = X86EMUL_MODE_PROT32; | 2389 | usermode = X86EMUL_MODE_PROT32; |
2324 | 2390 | ||
2391 | rcx = reg_read(ctxt, VCPU_REGS_RCX); | ||
2392 | rdx = reg_read(ctxt, VCPU_REGS_RDX); | ||
2393 | |||
2325 | cs.dpl = 3; | 2394 | cs.dpl = 3; |
2326 | ss.dpl = 3; | 2395 | ss.dpl = 3; |
2327 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); | 2396 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); |
@@ -2339,6 +2408,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) | |||
2339 | ss_sel = cs_sel + 8; | 2408 | ss_sel = cs_sel + 8; |
2340 | cs.d = 0; | 2409 | cs.d = 0; |
2341 | cs.l = 1; | 2410 | cs.l = 1; |
2411 | if (is_noncanonical_address(rcx) || | ||
2412 | is_noncanonical_address(rdx)) | ||
2413 | return emulate_gp(ctxt, 0); | ||
2342 | break; | 2414 | break; |
2343 | } | 2415 | } |
2344 | cs_sel |= SELECTOR_RPL_MASK; | 2416 | cs_sel |= SELECTOR_RPL_MASK; |
@@ -2347,8 +2419,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) | |||
2347 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); | 2419 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); |
2348 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); | 2420 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); |
2349 | 2421 | ||
2350 | ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); | 2422 | ctxt->_eip = rdx; |
2351 | *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); | 2423 | *reg_write(ctxt, VCPU_REGS_RSP) = rcx; |
2352 | 2424 | ||
2353 | return X86EMUL_CONTINUE; | 2425 | return X86EMUL_CONTINUE; |
2354 | } | 2426 | } |
@@ -2466,19 +2538,24 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | |||
2466 | * Now load segment descriptors. If fault happens at this stage | 2538 | * Now load segment descriptors. If fault happens at this stage |
2467 | * it is handled in a context of new task | 2539 | * it is handled in a context of new task |
2468 | */ | 2540 | */ |
2469 | ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true); | 2541 | ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, |
2542 | true, NULL); | ||
2470 | if (ret != X86EMUL_CONTINUE) | 2543 | if (ret != X86EMUL_CONTINUE) |
2471 | return ret; | 2544 | return ret; |
2472 | ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); | 2545 | ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, |
2546 | true, NULL); | ||
2473 | if (ret != X86EMUL_CONTINUE) | 2547 | if (ret != X86EMUL_CONTINUE) |
2474 | return ret; | 2548 | return ret; |
2475 | ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); | 2549 | ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, |
2550 | true, NULL); | ||
2476 | if (ret != X86EMUL_CONTINUE) | 2551 | if (ret != X86EMUL_CONTINUE) |
2477 | return ret; | 2552 | return ret; |
2478 | ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); | 2553 | ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, |
2554 | true, NULL); | ||
2479 | if (ret != X86EMUL_CONTINUE) | 2555 | if (ret != X86EMUL_CONTINUE) |
2480 | return ret; | 2556 | return ret; |
2481 | ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); | 2557 | ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, |
2558 | true, NULL); | ||
2482 | if (ret != X86EMUL_CONTINUE) | 2559 | if (ret != X86EMUL_CONTINUE) |
2483 | return ret; | 2560 | return ret; |
2484 | 2561 | ||
@@ -2603,25 +2680,32 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
2603 | * Now load segment descriptors. If fault happenes at this stage | 2680 | * Now load segment descriptors. If fault happenes at this stage |
2604 | * it is handled in a context of new task | 2681 | * it is handled in a context of new task |
2605 | */ | 2682 | */ |
2606 | ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true); | 2683 | ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, |
2684 | cpl, true, NULL); | ||
2607 | if (ret != X86EMUL_CONTINUE) | 2685 | if (ret != X86EMUL_CONTINUE) |
2608 | return ret; | 2686 | return ret; |
2609 | ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); | 2687 | ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, |
2688 | true, NULL); | ||
2610 | if (ret != X86EMUL_CONTINUE) | 2689 | if (ret != X86EMUL_CONTINUE) |
2611 | return ret; | 2690 | return ret; |
2612 | ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); | 2691 | ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, |
2692 | true, NULL); | ||
2613 | if (ret != X86EMUL_CONTINUE) | 2693 | if (ret != X86EMUL_CONTINUE) |
2614 | return ret; | 2694 | return ret; |
2615 | ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); | 2695 | ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, |
2696 | true, NULL); | ||
2616 | if (ret != X86EMUL_CONTINUE) | 2697 | if (ret != X86EMUL_CONTINUE) |
2617 | return ret; | 2698 | return ret; |
2618 | ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); | 2699 | ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, |
2700 | true, NULL); | ||
2619 | if (ret != X86EMUL_CONTINUE) | 2701 | if (ret != X86EMUL_CONTINUE) |
2620 | return ret; | 2702 | return ret; |
2621 | ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true); | 2703 | ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, |
2704 | true, NULL); | ||
2622 | if (ret != X86EMUL_CONTINUE) | 2705 | if (ret != X86EMUL_CONTINUE) |
2623 | return ret; | 2706 | return ret; |
2624 | ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true); | 2707 | ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, |
2708 | true, NULL); | ||
2625 | if (ret != X86EMUL_CONTINUE) | 2709 | if (ret != X86EMUL_CONTINUE) |
2626 | return ret; | 2710 | return ret; |
2627 | 2711 | ||
@@ -2888,10 +2972,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt) | |||
2888 | 2972 | ||
2889 | static int em_call(struct x86_emulate_ctxt *ctxt) | 2973 | static int em_call(struct x86_emulate_ctxt *ctxt) |
2890 | { | 2974 | { |
2975 | int rc; | ||
2891 | long rel = ctxt->src.val; | 2976 | long rel = ctxt->src.val; |
2892 | 2977 | ||
2893 | ctxt->src.val = (unsigned long)ctxt->_eip; | 2978 | ctxt->src.val = (unsigned long)ctxt->_eip; |
2894 | jmp_rel(ctxt, rel); | 2979 | rc = jmp_rel(ctxt, rel); |
2980 | if (rc != X86EMUL_CONTINUE) | ||
2981 | return rc; | ||
2895 | return em_push(ctxt); | 2982 | return em_push(ctxt); |
2896 | } | 2983 | } |
2897 | 2984 | ||
@@ -2900,34 +2987,50 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) | |||
2900 | u16 sel, old_cs; | 2987 | u16 sel, old_cs; |
2901 | ulong old_eip; | 2988 | ulong old_eip; |
2902 | int rc; | 2989 | int rc; |
2990 | struct desc_struct old_desc, new_desc; | ||
2991 | const struct x86_emulate_ops *ops = ctxt->ops; | ||
2992 | int cpl = ctxt->ops->cpl(ctxt); | ||
2903 | 2993 | ||
2904 | old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); | ||
2905 | old_eip = ctxt->_eip; | 2994 | old_eip = ctxt->_eip; |
2995 | ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS); | ||
2906 | 2996 | ||
2907 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); | 2997 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); |
2908 | if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS)) | 2998 | rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, |
2999 | &new_desc); | ||
3000 | if (rc != X86EMUL_CONTINUE) | ||
2909 | return X86EMUL_CONTINUE; | 3001 | return X86EMUL_CONTINUE; |
2910 | 3002 | ||
2911 | ctxt->_eip = 0; | 3003 | rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); |
2912 | memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); | 3004 | if (rc != X86EMUL_CONTINUE) |
3005 | goto fail; | ||
2913 | 3006 | ||
2914 | ctxt->src.val = old_cs; | 3007 | ctxt->src.val = old_cs; |
2915 | rc = em_push(ctxt); | 3008 | rc = em_push(ctxt); |
2916 | if (rc != X86EMUL_CONTINUE) | 3009 | if (rc != X86EMUL_CONTINUE) |
2917 | return rc; | 3010 | goto fail; |
2918 | 3011 | ||
2919 | ctxt->src.val = old_eip; | 3012 | ctxt->src.val = old_eip; |
2920 | return em_push(ctxt); | 3013 | rc = em_push(ctxt); |
3014 | /* If we failed, we tainted the memory, but the very least we should | ||
3015 | restore cs */ | ||
3016 | if (rc != X86EMUL_CONTINUE) | ||
3017 | goto fail; | ||
3018 | return rc; | ||
3019 | fail: | ||
3020 | ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); | ||
3021 | return rc; | ||
3022 | |||
2921 | } | 3023 | } |
2922 | 3024 | ||
2923 | static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) | 3025 | static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) |
2924 | { | 3026 | { |
2925 | int rc; | 3027 | int rc; |
3028 | unsigned long eip; | ||
2926 | 3029 | ||
2927 | ctxt->dst.type = OP_REG; | 3030 | rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); |
2928 | ctxt->dst.addr.reg = &ctxt->_eip; | 3031 | if (rc != X86EMUL_CONTINUE) |
2929 | ctxt->dst.bytes = ctxt->op_bytes; | 3032 | return rc; |
2930 | rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); | 3033 | rc = assign_eip_near(ctxt, eip); |
2931 | if (rc != X86EMUL_CONTINUE) | 3034 | if (rc != X86EMUL_CONTINUE) |
2932 | return rc; | 3035 | return rc; |
2933 | rsp_increment(ctxt, ctxt->src.val); | 3036 | rsp_increment(ctxt, ctxt->src.val); |
@@ -3254,20 +3357,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt) | |||
3254 | 3357 | ||
3255 | static int em_loop(struct x86_emulate_ctxt *ctxt) | 3358 | static int em_loop(struct x86_emulate_ctxt *ctxt) |
3256 | { | 3359 | { |
3360 | int rc = X86EMUL_CONTINUE; | ||
3361 | |||
3257 | register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); | 3362 | register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); |
3258 | if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && | 3363 | if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && |
3259 | (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) | 3364 | (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) |
3260 | jmp_rel(ctxt, ctxt->src.val); | 3365 | rc = jmp_rel(ctxt, ctxt->src.val); |
3261 | 3366 | ||
3262 | return X86EMUL_CONTINUE; | 3367 | return rc; |
3263 | } | 3368 | } |
3264 | 3369 | ||
3265 | static int em_jcxz(struct x86_emulate_ctxt *ctxt) | 3370 | static int em_jcxz(struct x86_emulate_ctxt *ctxt) |
3266 | { | 3371 | { |
3372 | int rc = X86EMUL_CONTINUE; | ||
3373 | |||
3267 | if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) | 3374 | if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) |
3268 | jmp_rel(ctxt, ctxt->src.val); | 3375 | rc = jmp_rel(ctxt, ctxt->src.val); |
3269 | 3376 | ||
3270 | return X86EMUL_CONTINUE; | 3377 | return rc; |
3271 | } | 3378 | } |
3272 | 3379 | ||
3273 | static int em_in(struct x86_emulate_ctxt *ctxt) | 3380 | static int em_in(struct x86_emulate_ctxt *ctxt) |
@@ -3355,6 +3462,12 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt) | |||
3355 | return X86EMUL_CONTINUE; | 3462 | return X86EMUL_CONTINUE; |
3356 | } | 3463 | } |
3357 | 3464 | ||
3465 | static int em_clflush(struct x86_emulate_ctxt *ctxt) | ||
3466 | { | ||
3467 | /* emulating clflush regardless of cpuid */ | ||
3468 | return X86EMUL_CONTINUE; | ||
3469 | } | ||
3470 | |||
3358 | static bool valid_cr(int nr) | 3471 | static bool valid_cr(int nr) |
3359 | { | 3472 | { |
3360 | switch (nr) { | 3473 | switch (nr) { |
@@ -3693,6 +3806,16 @@ static const struct opcode group11[] = { | |||
3693 | X7(D(Undefined)), | 3806 | X7(D(Undefined)), |
3694 | }; | 3807 | }; |
3695 | 3808 | ||
3809 | static const struct gprefix pfx_0f_ae_7 = { | ||
3810 | I(SrcMem | ByteOp, em_clflush), N, N, N, | ||
3811 | }; | ||
3812 | |||
3813 | static const struct group_dual group15 = { { | ||
3814 | N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7), | ||
3815 | }, { | ||
3816 | N, N, N, N, N, N, N, N, | ||
3817 | } }; | ||
3818 | |||
3696 | static const struct gprefix pfx_0f_6f_0f_7f = { | 3819 | static const struct gprefix pfx_0f_6f_0f_7f = { |
3697 | I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), | 3820 | I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), |
3698 | }; | 3821 | }; |
@@ -3901,10 +4024,11 @@ static const struct opcode twobyte_table[256] = { | |||
3901 | N, I(ImplicitOps | EmulateOnUD, em_syscall), | 4024 | N, I(ImplicitOps | EmulateOnUD, em_syscall), |
3902 | II(ImplicitOps | Priv, em_clts, clts), N, | 4025 | II(ImplicitOps | Priv, em_clts, clts), N, |
3903 | DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, | 4026 | DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, |
3904 | N, D(ImplicitOps | ModRM), N, N, | 4027 | N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, |
3905 | /* 0x10 - 0x1F */ | 4028 | /* 0x10 - 0x1F */ |
3906 | N, N, N, N, N, N, N, N, | 4029 | N, N, N, N, N, N, N, N, |
3907 | D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM), | 4030 | D(ImplicitOps | ModRM | SrcMem | NoAccess), |
4031 | N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), | ||
3908 | /* 0x20 - 0x2F */ | 4032 | /* 0x20 - 0x2F */ |
3909 | DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), | 4033 | DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), |
3910 | DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), | 4034 | DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), |
@@ -3956,7 +4080,7 @@ static const struct opcode twobyte_table[256] = { | |||
3956 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), | 4080 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), |
3957 | F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), | 4081 | F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), |
3958 | F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), | 4082 | F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), |
3959 | D(ModRM), F(DstReg | SrcMem | ModRM, em_imul), | 4083 | GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul), |
3960 | /* 0xB0 - 0xB7 */ | 4084 | /* 0xB0 - 0xB7 */ |
3961 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), | 4085 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), |
3962 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), | 4086 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), |
@@ -4473,10 +4597,10 @@ done_prefixes: | |||
4473 | /* Decode and fetch the destination operand: register or memory. */ | 4597 | /* Decode and fetch the destination operand: register or memory. */ |
4474 | rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); | 4598 | rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); |
4475 | 4599 | ||
4476 | done: | ||
4477 | if (ctxt->rip_relative) | 4600 | if (ctxt->rip_relative) |
4478 | ctxt->memopp->addr.mem.ea += ctxt->_eip; | 4601 | ctxt->memopp->addr.mem.ea += ctxt->_eip; |
4479 | 4602 | ||
4603 | done: | ||
4480 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; | 4604 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; |
4481 | } | 4605 | } |
4482 | 4606 | ||
@@ -4726,7 +4850,7 @@ special_insn: | |||
4726 | break; | 4850 | break; |
4727 | case 0x70 ... 0x7f: /* jcc (short) */ | 4851 | case 0x70 ... 0x7f: /* jcc (short) */ |
4728 | if (test_cc(ctxt->b, ctxt->eflags)) | 4852 | if (test_cc(ctxt->b, ctxt->eflags)) |
4729 | jmp_rel(ctxt, ctxt->src.val); | 4853 | rc = jmp_rel(ctxt, ctxt->src.val); |
4730 | break; | 4854 | break; |
4731 | case 0x8d: /* lea r16/r32, m */ | 4855 | case 0x8d: /* lea r16/r32, m */ |
4732 | ctxt->dst.val = ctxt->src.addr.mem.ea; | 4856 | ctxt->dst.val = ctxt->src.addr.mem.ea; |
@@ -4756,7 +4880,7 @@ special_insn: | |||
4756 | break; | 4880 | break; |
4757 | case 0xe9: /* jmp rel */ | 4881 | case 0xe9: /* jmp rel */ |
4758 | case 0xeb: /* jmp rel short */ | 4882 | case 0xeb: /* jmp rel short */ |
4759 | jmp_rel(ctxt, ctxt->src.val); | 4883 | rc = jmp_rel(ctxt, ctxt->src.val); |
4760 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | 4884 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ |
4761 | break; | 4885 | break; |
4762 | case 0xf4: /* hlt */ | 4886 | case 0xf4: /* hlt */ |
@@ -4881,13 +5005,11 @@ twobyte_insn: | |||
4881 | break; | 5005 | break; |
4882 | case 0x80 ... 0x8f: /* jnz rel, etc*/ | 5006 | case 0x80 ... 0x8f: /* jnz rel, etc*/ |
4883 | if (test_cc(ctxt->b, ctxt->eflags)) | 5007 | if (test_cc(ctxt->b, ctxt->eflags)) |
4884 | jmp_rel(ctxt, ctxt->src.val); | 5008 | rc = jmp_rel(ctxt, ctxt->src.val); |
4885 | break; | 5009 | break; |
4886 | case 0x90 ... 0x9f: /* setcc r/m8 */ | 5010 | case 0x90 ... 0x9f: /* setcc r/m8 */ |
4887 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); | 5011 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); |
4888 | break; | 5012 | break; |
4889 | case 0xae: /* clflush */ | ||
4890 | break; | ||
4891 | case 0xb6 ... 0xb7: /* movzx */ | 5013 | case 0xb6 ... 0xb7: /* movzx */ |
4892 | ctxt->dst.bytes = ctxt->op_bytes; | 5014 | ctxt->dst.bytes = ctxt->op_bytes; |
4893 | ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val | 5015 | ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 518d86471b76..298781d4cfb4 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | |||
262 | return; | 262 | return; |
263 | 263 | ||
264 | timer = &pit->pit_state.timer; | 264 | timer = &pit->pit_state.timer; |
265 | mutex_lock(&pit->pit_state.lock); | ||
265 | if (hrtimer_cancel(timer)) | 266 | if (hrtimer_cancel(timer)) |
266 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 267 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
268 | mutex_unlock(&pit->pit_state.lock); | ||
267 | } | 269 | } |
268 | 270 | ||
269 | static void destroy_pit_timer(struct kvm_pit *pit) | 271 | static void destroy_pit_timer(struct kvm_pit *pit) |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 806d58e3c320..fd49c867b25a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -298,7 +298,7 @@ retry_walk: | |||
298 | } | 298 | } |
299 | #endif | 299 | #endif |
300 | walker->max_level = walker->level; | 300 | walker->max_level = walker->level; |
301 | ASSERT(!is_long_mode(vcpu) && is_pae(vcpu)); | 301 | ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); |
302 | 302 | ||
303 | accessed_dirty = PT_GUEST_ACCESSED_MASK; | 303 | accessed_dirty = PT_GUEST_ACCESSED_MASK; |
304 | pt_access = pte_access = ACC_ALL; | 304 | pt_access = pte_access = ACC_ALL; |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 65510f624dfe..7527cefc5a43 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -3251,7 +3251,7 @@ static int wrmsr_interception(struct vcpu_svm *svm) | |||
3251 | msr.host_initiated = false; | 3251 | msr.host_initiated = false; |
3252 | 3252 | ||
3253 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; | 3253 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
3254 | if (svm_set_msr(&svm->vcpu, &msr)) { | 3254 | if (kvm_set_msr(&svm->vcpu, &msr)) { |
3255 | trace_kvm_msr_write_ex(ecx, data); | 3255 | trace_kvm_msr_write_ex(ecx, data); |
3256 | kvm_inject_gp(&svm->vcpu, 0); | 3256 | kvm_inject_gp(&svm->vcpu, 0); |
3257 | } else { | 3257 | } else { |
@@ -3551,9 +3551,9 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
3551 | 3551 | ||
3552 | if (exit_code >= ARRAY_SIZE(svm_exit_handlers) | 3552 | if (exit_code >= ARRAY_SIZE(svm_exit_handlers) |
3553 | || !svm_exit_handlers[exit_code]) { | 3553 | || !svm_exit_handlers[exit_code]) { |
3554 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3554 | WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code); |
3555 | kvm_run->hw.hardware_exit_reason = exit_code; | 3555 | kvm_queue_exception(vcpu, UD_VECTOR); |
3556 | return 0; | 3556 | return 1; |
3557 | } | 3557 | } |
3558 | 3558 | ||
3559 | return svm_exit_handlers[exit_code](svm); | 3559 | return svm_exit_handlers[exit_code](svm); |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0acac81f198b..a8b76c4c95e2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -2659,12 +2659,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2659 | default: | 2659 | default: |
2660 | msr = find_msr_entry(vmx, msr_index); | 2660 | msr = find_msr_entry(vmx, msr_index); |
2661 | if (msr) { | 2661 | if (msr) { |
2662 | u64 old_msr_data = msr->data; | ||
2662 | msr->data = data; | 2663 | msr->data = data; |
2663 | if (msr - vmx->guest_msrs < vmx->save_nmsrs) { | 2664 | if (msr - vmx->guest_msrs < vmx->save_nmsrs) { |
2664 | preempt_disable(); | 2665 | preempt_disable(); |
2665 | kvm_set_shared_msr(msr->index, msr->data, | 2666 | ret = kvm_set_shared_msr(msr->index, msr->data, |
2666 | msr->mask); | 2667 | msr->mask); |
2667 | preempt_enable(); | 2668 | preempt_enable(); |
2669 | if (ret) | ||
2670 | msr->data = old_msr_data; | ||
2668 | } | 2671 | } |
2669 | break; | 2672 | break; |
2670 | } | 2673 | } |
@@ -5291,7 +5294,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu) | |||
5291 | msr.data = data; | 5294 | msr.data = data; |
5292 | msr.index = ecx; | 5295 | msr.index = ecx; |
5293 | msr.host_initiated = false; | 5296 | msr.host_initiated = false; |
5294 | if (vmx_set_msr(vcpu, &msr) != 0) { | 5297 | if (kvm_set_msr(vcpu, &msr) != 0) { |
5295 | trace_kvm_msr_write_ex(ecx, data); | 5298 | trace_kvm_msr_write_ex(ecx, data); |
5296 | kvm_inject_gp(vcpu, 0); | 5299 | kvm_inject_gp(vcpu, 0); |
5297 | return 1; | 5300 | return 1; |
@@ -6743,6 +6746,12 @@ static int handle_invept(struct kvm_vcpu *vcpu) | |||
6743 | return 1; | 6746 | return 1; |
6744 | } | 6747 | } |
6745 | 6748 | ||
6749 | static int handle_invvpid(struct kvm_vcpu *vcpu) | ||
6750 | { | ||
6751 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
6752 | return 1; | ||
6753 | } | ||
6754 | |||
6746 | /* | 6755 | /* |
6747 | * The exit handlers return 1 if the exit was handled fully and guest execution | 6756 | * The exit handlers return 1 if the exit was handled fully and guest execution |
6748 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 6757 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -6788,6 +6797,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
6788 | [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, | 6797 | [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, |
6789 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, | 6798 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, |
6790 | [EXIT_REASON_INVEPT] = handle_invept, | 6799 | [EXIT_REASON_INVEPT] = handle_invept, |
6800 | [EXIT_REASON_INVVPID] = handle_invvpid, | ||
6791 | }; | 6801 | }; |
6792 | 6802 | ||
6793 | static const int kvm_vmx_max_exit_handlers = | 6803 | static const int kvm_vmx_max_exit_handlers = |
@@ -7023,7 +7033,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
7023 | case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: | 7033 | case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: |
7024 | case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: | 7034 | case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: |
7025 | case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: | 7035 | case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: |
7026 | case EXIT_REASON_INVEPT: | 7036 | case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID: |
7027 | /* | 7037 | /* |
7028 | * VMX instructions trap unconditionally. This allows L1 to | 7038 | * VMX instructions trap unconditionally. This allows L1 to |
7029 | * emulate them for its L2 guest, i.e., allows 3-level nesting! | 7039 | * emulate them for its L2 guest, i.e., allows 3-level nesting! |
@@ -7164,10 +7174,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
7164 | && kvm_vmx_exit_handlers[exit_reason]) | 7174 | && kvm_vmx_exit_handlers[exit_reason]) |
7165 | return kvm_vmx_exit_handlers[exit_reason](vcpu); | 7175 | return kvm_vmx_exit_handlers[exit_reason](vcpu); |
7166 | else { | 7176 | else { |
7167 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; | 7177 | WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason); |
7168 | vcpu->run->hw.hardware_exit_reason = exit_reason; | 7178 | kvm_queue_exception(vcpu, UD_VECTOR); |
7179 | return 1; | ||
7169 | } | 7180 | } |
7170 | return 0; | ||
7171 | } | 7181 | } |
7172 | 7182 | ||
7173 | static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | 7183 | static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 34c8f94331f8..0033df32a745 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -229,20 +229,25 @@ static void kvm_shared_msr_cpu_online(void) | |||
229 | shared_msr_update(i, shared_msrs_global.msrs[i]); | 229 | shared_msr_update(i, shared_msrs_global.msrs[i]); |
230 | } | 230 | } |
231 | 231 | ||
232 | void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) | 232 | int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) |
233 | { | 233 | { |
234 | unsigned int cpu = smp_processor_id(); | 234 | unsigned int cpu = smp_processor_id(); |
235 | struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); | 235 | struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); |
236 | int err; | ||
236 | 237 | ||
237 | if (((value ^ smsr->values[slot].curr) & mask) == 0) | 238 | if (((value ^ smsr->values[slot].curr) & mask) == 0) |
238 | return; | 239 | return 0; |
239 | smsr->values[slot].curr = value; | 240 | smsr->values[slot].curr = value; |
240 | wrmsrl(shared_msrs_global.msrs[slot], value); | 241 | err = wrmsrl_safe(shared_msrs_global.msrs[slot], value); |
242 | if (err) | ||
243 | return 1; | ||
244 | |||
241 | if (!smsr->registered) { | 245 | if (!smsr->registered) { |
242 | smsr->urn.on_user_return = kvm_on_user_return; | 246 | smsr->urn.on_user_return = kvm_on_user_return; |
243 | user_return_notifier_register(&smsr->urn); | 247 | user_return_notifier_register(&smsr->urn); |
244 | smsr->registered = true; | 248 | smsr->registered = true; |
245 | } | 249 | } |
250 | return 0; | ||
246 | } | 251 | } |
247 | EXPORT_SYMBOL_GPL(kvm_set_shared_msr); | 252 | EXPORT_SYMBOL_GPL(kvm_set_shared_msr); |
248 | 253 | ||
@@ -987,7 +992,6 @@ void kvm_enable_efer_bits(u64 mask) | |||
987 | } | 992 | } |
988 | EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); | 993 | EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); |
989 | 994 | ||
990 | |||
991 | /* | 995 | /* |
992 | * Writes msr value into into the appropriate "register". | 996 | * Writes msr value into into the appropriate "register". |
993 | * Returns 0 on success, non-0 otherwise. | 997 | * Returns 0 on success, non-0 otherwise. |
@@ -995,8 +999,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); | |||
995 | */ | 999 | */ |
996 | int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) | 1000 | int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) |
997 | { | 1001 | { |
1002 | switch (msr->index) { | ||
1003 | case MSR_FS_BASE: | ||
1004 | case MSR_GS_BASE: | ||
1005 | case MSR_KERNEL_GS_BASE: | ||
1006 | case MSR_CSTAR: | ||
1007 | case MSR_LSTAR: | ||
1008 | if (is_noncanonical_address(msr->data)) | ||
1009 | return 1; | ||
1010 | break; | ||
1011 | case MSR_IA32_SYSENTER_EIP: | ||
1012 | case MSR_IA32_SYSENTER_ESP: | ||
1013 | /* | ||
1014 | * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if | ||
1015 | * non-canonical address is written on Intel but not on | ||
1016 | * AMD (which ignores the top 32-bits, because it does | ||
1017 | * not implement 64-bit SYSENTER). | ||
1018 | * | ||
1019 | * 64-bit code should hence be able to write a non-canonical | ||
1020 | * value on AMD. Making the address canonical ensures that | ||
1021 | * vmentry does not fail on Intel after writing a non-canonical | ||
1022 | * value, and that something deterministic happens if the guest | ||
1023 | * invokes 64-bit SYSENTER. | ||
1024 | */ | ||
1025 | msr->data = get_canonical(msr->data); | ||
1026 | } | ||
998 | return kvm_x86_ops->set_msr(vcpu, msr); | 1027 | return kvm_x86_ops->set_msr(vcpu, msr); |
999 | } | 1028 | } |
1029 | EXPORT_SYMBOL_GPL(kvm_set_msr); | ||
1000 | 1030 | ||
1001 | /* | 1031 | /* |
1002 | * Adapt set_msr() to msr_io()'s calling convention | 1032 | * Adapt set_msr() to msr_io()'s calling convention |
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index f15103dff4b4..d143d216d52b 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c | |||
@@ -40,20 +40,40 @@ void __init efi_bgrt_init(void) | |||
40 | if (ACPI_FAILURE(status)) | 40 | if (ACPI_FAILURE(status)) |
41 | return; | 41 | return; |
42 | 42 | ||
43 | if (bgrt_tab->header.length < sizeof(*bgrt_tab)) | 43 | if (bgrt_tab->header.length < sizeof(*bgrt_tab)) { |
44 | pr_err("Ignoring BGRT: invalid length %u (expected %zu)\n", | ||
45 | bgrt_tab->header.length, sizeof(*bgrt_tab)); | ||
44 | return; | 46 | return; |
45 | if (bgrt_tab->version != 1 || bgrt_tab->status != 1) | 47 | } |
48 | if (bgrt_tab->version != 1) { | ||
49 | pr_err("Ignoring BGRT: invalid version %u (expected 1)\n", | ||
50 | bgrt_tab->version); | ||
51 | return; | ||
52 | } | ||
53 | if (bgrt_tab->status != 1) { | ||
54 | pr_err("Ignoring BGRT: invalid status %u (expected 1)\n", | ||
55 | bgrt_tab->status); | ||
56 | return; | ||
57 | } | ||
58 | if (bgrt_tab->image_type != 0) { | ||
59 | pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n", | ||
60 | bgrt_tab->image_type); | ||
46 | return; | 61 | return; |
47 | if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address) | 62 | } |
63 | if (!bgrt_tab->image_address) { | ||
64 | pr_err("Ignoring BGRT: null image address\n"); | ||
48 | return; | 65 | return; |
66 | } | ||
49 | 67 | ||
50 | image = efi_lookup_mapped_addr(bgrt_tab->image_address); | 68 | image = efi_lookup_mapped_addr(bgrt_tab->image_address); |
51 | if (!image) { | 69 | if (!image) { |
52 | image = early_memremap(bgrt_tab->image_address, | 70 | image = early_memremap(bgrt_tab->image_address, |
53 | sizeof(bmp_header)); | 71 | sizeof(bmp_header)); |
54 | ioremapped = true; | 72 | ioremapped = true; |
55 | if (!image) | 73 | if (!image) { |
74 | pr_err("Ignoring BGRT: failed to map image header memory\n"); | ||
56 | return; | 75 | return; |
76 | } | ||
57 | } | 77 | } |
58 | 78 | ||
59 | memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); | 79 | memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); |
@@ -61,14 +81,18 @@ void __init efi_bgrt_init(void) | |||
61 | early_iounmap(image, sizeof(bmp_header)); | 81 | early_iounmap(image, sizeof(bmp_header)); |
62 | bgrt_image_size = bmp_header.size; | 82 | bgrt_image_size = bmp_header.size; |
63 | 83 | ||
64 | bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); | 84 | bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN); |
65 | if (!bgrt_image) | 85 | if (!bgrt_image) { |
86 | pr_err("Ignoring BGRT: failed to allocate memory for image (wanted %zu bytes)\n", | ||
87 | bgrt_image_size); | ||
66 | return; | 88 | return; |
89 | } | ||
67 | 90 | ||
68 | if (ioremapped) { | 91 | if (ioremapped) { |
69 | image = early_memremap(bgrt_tab->image_address, | 92 | image = early_memremap(bgrt_tab->image_address, |
70 | bmp_header.size); | 93 | bmp_header.size); |
71 | if (!image) { | 94 | if (!image) { |
95 | pr_err("Ignoring BGRT: failed to map image memory\n"); | ||
72 | kfree(bgrt_image); | 96 | kfree(bgrt_image); |
73 | bgrt_image = NULL; | 97 | bgrt_image = NULL; |
74 | return; | 98 | return; |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 850da94fef30..dbc8627a5cdf 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -70,17 +70,7 @@ static efi_config_table_type_t arch_tables[] __initdata = { | |||
70 | 70 | ||
71 | u64 efi_setup; /* efi setup_data physical address */ | 71 | u64 efi_setup; /* efi setup_data physical address */ |
72 | 72 | ||
73 | static bool disable_runtime __initdata = false; | 73 | static int add_efi_memmap __initdata; |
74 | static int __init setup_noefi(char *arg) | ||
75 | { | ||
76 | disable_runtime = true; | ||
77 | return 0; | ||
78 | } | ||
79 | early_param("noefi", setup_noefi); | ||
80 | |||
81 | int add_efi_memmap; | ||
82 | EXPORT_SYMBOL(add_efi_memmap); | ||
83 | |||
84 | static int __init setup_add_efi_memmap(char *arg) | 74 | static int __init setup_add_efi_memmap(char *arg) |
85 | { | 75 | { |
86 | add_efi_memmap = 1; | 76 | add_efi_memmap = 1; |
@@ -96,7 +86,7 @@ static efi_status_t __init phys_efi_set_virtual_address_map( | |||
96 | { | 86 | { |
97 | efi_status_t status; | 87 | efi_status_t status; |
98 | 88 | ||
99 | efi_call_phys_prelog(); | 89 | efi_call_phys_prolog(); |
100 | status = efi_call_phys(efi_phys.set_virtual_address_map, | 90 | status = efi_call_phys(efi_phys.set_virtual_address_map, |
101 | memory_map_size, descriptor_size, | 91 | memory_map_size, descriptor_size, |
102 | descriptor_version, virtual_map); | 92 | descriptor_version, virtual_map); |
@@ -210,9 +200,12 @@ static void __init print_efi_memmap(void) | |||
210 | for (p = memmap.map, i = 0; | 200 | for (p = memmap.map, i = 0; |
211 | p < memmap.map_end; | 201 | p < memmap.map_end; |
212 | p += memmap.desc_size, i++) { | 202 | p += memmap.desc_size, i++) { |
203 | char buf[64]; | ||
204 | |||
213 | md = p; | 205 | md = p; |
214 | pr_info("mem%02u: type=%u, attr=0x%llx, range=[0x%016llx-0x%016llx) (%lluMB)\n", | 206 | pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n", |
215 | i, md->type, md->attribute, md->phys_addr, | 207 | i, efi_md_typeattr_format(buf, sizeof(buf), md), |
208 | md->phys_addr, | ||
216 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), | 209 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), |
217 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); | 210 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); |
218 | } | 211 | } |
@@ -344,9 +337,9 @@ static int __init efi_runtime_init32(void) | |||
344 | } | 337 | } |
345 | 338 | ||
346 | /* | 339 | /* |
347 | * We will only need *early* access to the following two | 340 | * We will only need *early* access to the SetVirtualAddressMap |
348 | * EFI runtime services before set_virtual_address_map | 341 | * EFI runtime service. All other runtime services will be called |
349 | * is invoked. | 342 | * via the virtual mapping. |
350 | */ | 343 | */ |
351 | efi_phys.set_virtual_address_map = | 344 | efi_phys.set_virtual_address_map = |
352 | (efi_set_virtual_address_map_t *) | 345 | (efi_set_virtual_address_map_t *) |
@@ -368,9 +361,9 @@ static int __init efi_runtime_init64(void) | |||
368 | } | 361 | } |
369 | 362 | ||
370 | /* | 363 | /* |
371 | * We will only need *early* access to the following two | 364 | * We will only need *early* access to the SetVirtualAddressMap |
372 | * EFI runtime services before set_virtual_address_map | 365 | * EFI runtime service. All other runtime services will be called |
373 | * is invoked. | 366 | * via the virtual mapping. |
374 | */ | 367 | */ |
375 | efi_phys.set_virtual_address_map = | 368 | efi_phys.set_virtual_address_map = |
376 | (efi_set_virtual_address_map_t *) | 369 | (efi_set_virtual_address_map_t *) |
@@ -492,7 +485,7 @@ void __init efi_init(void) | |||
492 | if (!efi_runtime_supported()) | 485 | if (!efi_runtime_supported()) |
493 | pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); | 486 | pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); |
494 | else { | 487 | else { |
495 | if (disable_runtime || efi_runtime_init()) | 488 | if (efi_runtime_disabled() || efi_runtime_init()) |
496 | return; | 489 | return; |
497 | } | 490 | } |
498 | if (efi_memmap_init()) | 491 | if (efi_memmap_init()) |
@@ -537,7 +530,7 @@ void __init runtime_code_page_mkexec(void) | |||
537 | } | 530 | } |
538 | } | 531 | } |
539 | 532 | ||
540 | void efi_memory_uc(u64 addr, unsigned long size) | 533 | void __init efi_memory_uc(u64 addr, unsigned long size) |
541 | { | 534 | { |
542 | unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; | 535 | unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; |
543 | u64 npages; | 536 | u64 npages; |
@@ -732,6 +725,7 @@ static void __init kexec_enter_virtual_mode(void) | |||
732 | */ | 725 | */ |
733 | if (!efi_is_native()) { | 726 | if (!efi_is_native()) { |
734 | efi_unmap_memmap(); | 727 | efi_unmap_memmap(); |
728 | clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); | ||
735 | return; | 729 | return; |
736 | } | 730 | } |
737 | 731 | ||
@@ -805,6 +799,7 @@ static void __init __efi_enter_virtual_mode(void) | |||
805 | new_memmap = efi_map_regions(&count, &pg_shift); | 799 | new_memmap = efi_map_regions(&count, &pg_shift); |
806 | if (!new_memmap) { | 800 | if (!new_memmap) { |
807 | pr_err("Error reallocating memory, EFI runtime non-functional!\n"); | 801 | pr_err("Error reallocating memory, EFI runtime non-functional!\n"); |
802 | clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); | ||
808 | return; | 803 | return; |
809 | } | 804 | } |
810 | 805 | ||
@@ -812,8 +807,10 @@ static void __init __efi_enter_virtual_mode(void) | |||
812 | 807 | ||
813 | BUG_ON(!efi.systab); | 808 | BUG_ON(!efi.systab); |
814 | 809 | ||
815 | if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) | 810 | if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) { |
811 | clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); | ||
816 | return; | 812 | return; |
813 | } | ||
817 | 814 | ||
818 | efi_sync_low_kernel_mappings(); | 815 | efi_sync_low_kernel_mappings(); |
819 | efi_dump_pagetable(); | 816 | efi_dump_pagetable(); |
@@ -938,14 +935,11 @@ u64 efi_mem_attributes(unsigned long phys_addr) | |||
938 | return 0; | 935 | return 0; |
939 | } | 936 | } |
940 | 937 | ||
941 | static int __init parse_efi_cmdline(char *str) | 938 | static int __init arch_parse_efi_cmdline(char *str) |
942 | { | 939 | { |
943 | if (*str == '=') | 940 | if (parse_option_str(str, "old_map")) |
944 | str++; | ||
945 | |||
946 | if (!strncmp(str, "old_map", 7)) | ||
947 | set_bit(EFI_OLD_MEMMAP, &efi.flags); | 941 | set_bit(EFI_OLD_MEMMAP, &efi.flags); |
948 | 942 | ||
949 | return 0; | 943 | return 0; |
950 | } | 944 | } |
951 | early_param("efi", parse_efi_cmdline); | 945 | early_param("efi", arch_parse_efi_cmdline); |
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 9ee3491e31fb..40e7cda52936 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c | |||
@@ -33,7 +33,7 @@ | |||
33 | 33 | ||
34 | /* | 34 | /* |
35 | * To make EFI call EFI runtime service in physical addressing mode we need | 35 | * To make EFI call EFI runtime service in physical addressing mode we need |
36 | * prelog/epilog before/after the invocation to disable interrupt, to | 36 | * prolog/epilog before/after the invocation to disable interrupt, to |
37 | * claim EFI runtime service handler exclusively and to duplicate a memory in | 37 | * claim EFI runtime service handler exclusively and to duplicate a memory in |
38 | * low memory space say 0 - 3G. | 38 | * low memory space say 0 - 3G. |
39 | */ | 39 | */ |
@@ -41,11 +41,13 @@ static unsigned long efi_rt_eflags; | |||
41 | 41 | ||
42 | void efi_sync_low_kernel_mappings(void) {} | 42 | void efi_sync_low_kernel_mappings(void) {} |
43 | void __init efi_dump_pagetable(void) {} | 43 | void __init efi_dump_pagetable(void) {} |
44 | int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) | 44 | int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) |
45 | { | 45 | { |
46 | return 0; | 46 | return 0; |
47 | } | 47 | } |
48 | void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {} | 48 | void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) |
49 | { | ||
50 | } | ||
49 | 51 | ||
50 | void __init efi_map_region(efi_memory_desc_t *md) | 52 | void __init efi_map_region(efi_memory_desc_t *md) |
51 | { | 53 | { |
@@ -55,7 +57,7 @@ void __init efi_map_region(efi_memory_desc_t *md) | |||
55 | void __init efi_map_region_fixed(efi_memory_desc_t *md) {} | 57 | void __init efi_map_region_fixed(efi_memory_desc_t *md) {} |
56 | void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} | 58 | void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} |
57 | 59 | ||
58 | void efi_call_phys_prelog(void) | 60 | void __init efi_call_phys_prolog(void) |
59 | { | 61 | { |
60 | struct desc_ptr gdt_descr; | 62 | struct desc_ptr gdt_descr; |
61 | 63 | ||
@@ -69,7 +71,7 @@ void efi_call_phys_prelog(void) | |||
69 | load_gdt(&gdt_descr); | 71 | load_gdt(&gdt_descr); |
70 | } | 72 | } |
71 | 73 | ||
72 | void efi_call_phys_epilog(void) | 74 | void __init efi_call_phys_epilog(void) |
73 | { | 75 | { |
74 | struct desc_ptr gdt_descr; | 76 | struct desc_ptr gdt_descr; |
75 | 77 | ||
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 290d397e1dd9..35aecb6042fb 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c | |||
@@ -79,7 +79,7 @@ static void __init early_code_mapping_set_exec(int executable) | |||
79 | } | 79 | } |
80 | } | 80 | } |
81 | 81 | ||
82 | void __init efi_call_phys_prelog(void) | 82 | void __init efi_call_phys_prolog(void) |
83 | { | 83 | { |
84 | unsigned long vaddress; | 84 | unsigned long vaddress; |
85 | int pgd; | 85 | int pgd; |
@@ -139,7 +139,7 @@ void efi_sync_low_kernel_mappings(void) | |||
139 | sizeof(pgd_t) * num_pgds); | 139 | sizeof(pgd_t) * num_pgds); |
140 | } | 140 | } |
141 | 141 | ||
142 | int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) | 142 | int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) |
143 | { | 143 | { |
144 | unsigned long text; | 144 | unsigned long text; |
145 | struct page *page; | 145 | struct page *page; |
@@ -192,7 +192,7 @@ int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) | |||
192 | return 0; | 192 | return 0; |
193 | } | 193 | } |
194 | 194 | ||
195 | void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) | 195 | void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) |
196 | { | 196 | { |
197 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); | 197 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); |
198 | 198 | ||
diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S index fbe66e626c09..040192b50d02 100644 --- a/arch/x86/platform/efi/efi_stub_32.S +++ b/arch/x86/platform/efi/efi_stub_32.S | |||
@@ -27,13 +27,13 @@ ENTRY(efi_call_phys) | |||
27 | * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found | 27 | * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found |
28 | * the values of these registers are the same. And, the corresponding | 28 | * the values of these registers are the same. And, the corresponding |
29 | * GDT entries are identical. So I will do nothing about segment reg | 29 | * GDT entries are identical. So I will do nothing about segment reg |
30 | * and GDT, but change GDT base register in prelog and epilog. | 30 | * and GDT, but change GDT base register in prolog and epilog. |
31 | */ | 31 | */ |
32 | 32 | ||
33 | /* | 33 | /* |
34 | * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET. | 34 | * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET. |
35 | * But to make it smoothly switch from virtual mode to flat mode. | 35 | * But to make it smoothly switch from virtual mode to flat mode. |
36 | * The mapping of lower virtual memory has been created in prelog and | 36 | * The mapping of lower virtual memory has been created in prolog and |
37 | * epilog. | 37 | * epilog. |
38 | */ | 38 | */ |
39 | movl $1f, %edx | 39 | movl $1f, %edx |
diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h index 46aa25c8ce06..3c1c3866d82b 100644 --- a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h +++ b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h | |||
@@ -10,10 +10,9 @@ | |||
10 | */ | 10 | */ |
11 | 11 | ||
12 | 12 | ||
13 | /* __attribute__((weak)) makes these declarations overridable */ | ||
14 | /* For every CPU addition a new get_<cpuname>_ops interface needs | 13 | /* For every CPU addition a new get_<cpuname>_ops interface needs |
15 | * to be added. | 14 | * to be added. |
16 | */ | 15 | */ |
17 | extern void *get_penwell_ops(void) __attribute__((weak)); | 16 | extern void *get_penwell_ops(void); |
18 | extern void *get_cloverview_ops(void) __attribute__((weak)); | 17 | extern void *get_cloverview_ops(void); |
19 | extern void *get_tangier_ops(void) __attribute__((weak)); | 18 | extern void *get_tangier_ops(void); |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 1a3f0445432a..fac5e4f9607c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1636,9 +1636,6 @@ asmlinkage __visible void __init xen_start_kernel(void) | |||
1636 | xen_raw_console_write("mapping kernel into physical memory\n"); | 1636 | xen_raw_console_write("mapping kernel into physical memory\n"); |
1637 | xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); | 1637 | xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); |
1638 | 1638 | ||
1639 | /* Allocate and initialize top and mid mfn levels for p2m structure */ | ||
1640 | xen_build_mfn_list_list(); | ||
1641 | |||
1642 | /* keep using Xen gdt for now; no urgent need to change it */ | 1639 | /* keep using Xen gdt for now; no urgent need to change it */ |
1643 | 1640 | ||
1644 | #ifdef CONFIG_X86_32 | 1641 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f62af7647ec9..a8a1a3d08d4d 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1217,10 +1217,13 @@ static void __init xen_pagetable_p2m_copy(void) | |||
1217 | static void __init xen_pagetable_init(void) | 1217 | static void __init xen_pagetable_init(void) |
1218 | { | 1218 | { |
1219 | paging_init(); | 1219 | paging_init(); |
1220 | xen_setup_shared_info(); | ||
1221 | #ifdef CONFIG_X86_64 | 1220 | #ifdef CONFIG_X86_64 |
1222 | xen_pagetable_p2m_copy(); | 1221 | xen_pagetable_p2m_copy(); |
1223 | #endif | 1222 | #endif |
1223 | /* Allocate and initialize top and mid mfn levels for p2m structure */ | ||
1224 | xen_build_mfn_list_list(); | ||
1225 | |||
1226 | xen_setup_shared_info(); | ||
1224 | xen_post_allocator_init(); | 1227 | xen_post_allocator_init(); |
1225 | } | 1228 | } |
1226 | static void xen_write_cr2(unsigned long cr2) | 1229 | static void xen_write_cr2(unsigned long cr2) |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 9f5983b01ed9..b456b048eca9 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -163,6 +163,7 @@ | |||
163 | #include <linux/hash.h> | 163 | #include <linux/hash.h> |
164 | #include <linux/sched.h> | 164 | #include <linux/sched.h> |
165 | #include <linux/seq_file.h> | 165 | #include <linux/seq_file.h> |
166 | #include <linux/bootmem.h> | ||
166 | 167 | ||
167 | #include <asm/cache.h> | 168 | #include <asm/cache.h> |
168 | #include <asm/setup.h> | 169 | #include <asm/setup.h> |
@@ -181,21 +182,20 @@ static void __init m2p_override_init(void); | |||
181 | 182 | ||
182 | unsigned long xen_max_p2m_pfn __read_mostly; | 183 | unsigned long xen_max_p2m_pfn __read_mostly; |
183 | 184 | ||
185 | static unsigned long *p2m_mid_missing_mfn; | ||
186 | static unsigned long *p2m_top_mfn; | ||
187 | static unsigned long **p2m_top_mfn_p; | ||
188 | |||
184 | /* Placeholders for holes in the address space */ | 189 | /* Placeholders for holes in the address space */ |
185 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); | 190 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); |
186 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); | 191 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); |
187 | static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE); | ||
188 | 192 | ||
189 | static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); | 193 | static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); |
190 | static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); | ||
191 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); | ||
192 | 194 | ||
193 | static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); | 195 | static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); |
194 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE); | 196 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE); |
195 | static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE); | ||
196 | 197 | ||
197 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | 198 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); |
198 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
199 | 199 | ||
200 | /* For each I/O range remapped we may lose up to two leaf pages for the boundary | 200 | /* For each I/O range remapped we may lose up to two leaf pages for the boundary |
201 | * violations and three mid pages to cover up to 3GB. With | 201 | * violations and three mid pages to cover up to 3GB. With |
@@ -272,11 +272,11 @@ static void p2m_init(unsigned long *p2m) | |||
272 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures | 272 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures |
273 | * | 273 | * |
274 | * This is called both at boot time, and after resuming from suspend: | 274 | * This is called both at boot time, and after resuming from suspend: |
275 | * - At boot time we're called very early, and must use extend_brk() | 275 | * - At boot time we're called rather early, and must use alloc_bootmem*() |
276 | * to allocate memory. | 276 | * to allocate memory. |
277 | * | 277 | * |
278 | * - After resume we're called from within stop_machine, but the mfn | 278 | * - After resume we're called from within stop_machine, but the mfn |
279 | * tree should alreay be completely allocated. | 279 | * tree should already be completely allocated. |
280 | */ | 280 | */ |
281 | void __ref xen_build_mfn_list_list(void) | 281 | void __ref xen_build_mfn_list_list(void) |
282 | { | 282 | { |
@@ -287,20 +287,17 @@ void __ref xen_build_mfn_list_list(void) | |||
287 | 287 | ||
288 | /* Pre-initialize p2m_top_mfn to be completely missing */ | 288 | /* Pre-initialize p2m_top_mfn to be completely missing */ |
289 | if (p2m_top_mfn == NULL) { | 289 | if (p2m_top_mfn == NULL) { |
290 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | 290 | p2m_mid_missing_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); |
291 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); | 291 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
292 | p2m_mid_identity_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
293 | p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity); | ||
294 | 292 | ||
295 | p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | 293 | p2m_top_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); |
296 | p2m_top_mfn_p_init(p2m_top_mfn_p); | 294 | p2m_top_mfn_p_init(p2m_top_mfn_p); |
297 | 295 | ||
298 | p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | 296 | p2m_top_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); |
299 | p2m_top_mfn_init(p2m_top_mfn); | 297 | p2m_top_mfn_init(p2m_top_mfn); |
300 | } else { | 298 | } else { |
301 | /* Reinitialise, mfn's all change after migration */ | 299 | /* Reinitialise, mfn's all change after migration */ |
302 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); | 300 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
303 | p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity); | ||
304 | } | 301 | } |
305 | 302 | ||
306 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { | 303 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { |
@@ -328,10 +325,9 @@ void __ref xen_build_mfn_list_list(void) | |||
328 | /* | 325 | /* |
329 | * XXX boot-time only! We should never find | 326 | * XXX boot-time only! We should never find |
330 | * missing parts of the mfn tree after | 327 | * missing parts of the mfn tree after |
331 | * runtime. extend_brk() will BUG if we call | 328 | * runtime. |
332 | * it too late. | ||
333 | */ | 329 | */ |
334 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | 330 | mid_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); |
335 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); | 331 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); |
336 | 332 | ||
337 | p2m_top_mfn_p[topidx] = mid_mfn_p; | 333 | p2m_top_mfn_p[topidx] = mid_mfn_p; |
@@ -415,7 +411,6 @@ void __init xen_build_dynamic_phys_to_machine(void) | |||
415 | m2p_override_init(); | 411 | m2p_override_init(); |
416 | } | 412 | } |
417 | #ifdef CONFIG_X86_64 | 413 | #ifdef CONFIG_X86_64 |
418 | #include <linux/bootmem.h> | ||
419 | unsigned long __init xen_revector_p2m_tree(void) | 414 | unsigned long __init xen_revector_p2m_tree(void) |
420 | { | 415 | { |
421 | unsigned long va_start; | 416 | unsigned long va_start; |
@@ -477,7 +472,6 @@ unsigned long __init xen_revector_p2m_tree(void) | |||
477 | 472 | ||
478 | copy_page(new, mid_p); | 473 | copy_page(new, mid_p); |
479 | p2m_top[topidx][mididx] = &mfn_list[pfn_free]; | 474 | p2m_top[topidx][mididx] = &mfn_list[pfn_free]; |
480 | p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]); | ||
481 | 475 | ||
482 | pfn_free += P2M_PER_PAGE; | 476 | pfn_free += P2M_PER_PAGE; |
483 | 477 | ||
@@ -538,12 +532,13 @@ static bool alloc_p2m(unsigned long pfn) | |||
538 | unsigned topidx, mididx; | 532 | unsigned topidx, mididx; |
539 | unsigned long ***top_p, **mid; | 533 | unsigned long ***top_p, **mid; |
540 | unsigned long *top_mfn_p, *mid_mfn; | 534 | unsigned long *top_mfn_p, *mid_mfn; |
535 | unsigned long *p2m_orig; | ||
541 | 536 | ||
542 | topidx = p2m_top_index(pfn); | 537 | topidx = p2m_top_index(pfn); |
543 | mididx = p2m_mid_index(pfn); | 538 | mididx = p2m_mid_index(pfn); |
544 | 539 | ||
545 | top_p = &p2m_top[topidx]; | 540 | top_p = &p2m_top[topidx]; |
546 | mid = *top_p; | 541 | mid = ACCESS_ONCE(*top_p); |
547 | 542 | ||
548 | if (mid == p2m_mid_missing) { | 543 | if (mid == p2m_mid_missing) { |
549 | /* Mid level is missing, allocate a new one */ | 544 | /* Mid level is missing, allocate a new one */ |
@@ -558,7 +553,7 @@ static bool alloc_p2m(unsigned long pfn) | |||
558 | } | 553 | } |
559 | 554 | ||
560 | top_mfn_p = &p2m_top_mfn[topidx]; | 555 | top_mfn_p = &p2m_top_mfn[topidx]; |
561 | mid_mfn = p2m_top_mfn_p[topidx]; | 556 | mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); |
562 | 557 | ||
563 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); | 558 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); |
564 | 559 | ||
@@ -566,6 +561,7 @@ static bool alloc_p2m(unsigned long pfn) | |||
566 | /* Separately check the mid mfn level */ | 561 | /* Separately check the mid mfn level */ |
567 | unsigned long missing_mfn; | 562 | unsigned long missing_mfn; |
568 | unsigned long mid_mfn_mfn; | 563 | unsigned long mid_mfn_mfn; |
564 | unsigned long old_mfn; | ||
569 | 565 | ||
570 | mid_mfn = alloc_p2m_page(); | 566 | mid_mfn = alloc_p2m_page(); |
571 | if (!mid_mfn) | 567 | if (!mid_mfn) |
@@ -575,17 +571,19 @@ static bool alloc_p2m(unsigned long pfn) | |||
575 | 571 | ||
576 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | 572 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); |
577 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | 573 | mid_mfn_mfn = virt_to_mfn(mid_mfn); |
578 | if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) | 574 | old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); |
575 | if (old_mfn != missing_mfn) { | ||
579 | free_p2m_page(mid_mfn); | 576 | free_p2m_page(mid_mfn); |
580 | else | 577 | mid_mfn = mfn_to_virt(old_mfn); |
578 | } else { | ||
581 | p2m_top_mfn_p[topidx] = mid_mfn; | 579 | p2m_top_mfn_p[topidx] = mid_mfn; |
580 | } | ||
582 | } | 581 | } |
583 | 582 | ||
584 | if (p2m_top[topidx][mididx] == p2m_identity || | 583 | p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]); |
585 | p2m_top[topidx][mididx] == p2m_missing) { | 584 | if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) { |
586 | /* p2m leaf page is missing */ | 585 | /* p2m leaf page is missing */ |
587 | unsigned long *p2m; | 586 | unsigned long *p2m; |
588 | unsigned long *p2m_orig = p2m_top[topidx][mididx]; | ||
589 | 587 | ||
590 | p2m = alloc_p2m_page(); | 588 | p2m = alloc_p2m_page(); |
591 | if (!p2m) | 589 | if (!p2m) |
@@ -606,7 +604,6 @@ static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary) | |||
606 | { | 604 | { |
607 | unsigned topidx, mididx, idx; | 605 | unsigned topidx, mididx, idx; |
608 | unsigned long *p2m; | 606 | unsigned long *p2m; |
609 | unsigned long *mid_mfn_p; | ||
610 | 607 | ||
611 | topidx = p2m_top_index(pfn); | 608 | topidx = p2m_top_index(pfn); |
612 | mididx = p2m_mid_index(pfn); | 609 | mididx = p2m_mid_index(pfn); |
@@ -633,43 +630,21 @@ static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary) | |||
633 | 630 | ||
634 | p2m_top[topidx][mididx] = p2m; | 631 | p2m_top[topidx][mididx] = p2m; |
635 | 632 | ||
636 | /* For save/restore we need to MFN of the P2M saved */ | ||
637 | |||
638 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
639 | WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing), | ||
640 | "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n", | ||
641 | topidx, mididx); | ||
642 | mid_mfn_p[mididx] = virt_to_mfn(p2m); | ||
643 | |||
644 | return true; | 633 | return true; |
645 | } | 634 | } |
646 | 635 | ||
647 | static bool __init early_alloc_p2m_middle(unsigned long pfn) | 636 | static bool __init early_alloc_p2m_middle(unsigned long pfn) |
648 | { | 637 | { |
649 | unsigned topidx = p2m_top_index(pfn); | 638 | unsigned topidx = p2m_top_index(pfn); |
650 | unsigned long *mid_mfn_p; | ||
651 | unsigned long **mid; | 639 | unsigned long **mid; |
652 | 640 | ||
653 | mid = p2m_top[topidx]; | 641 | mid = p2m_top[topidx]; |
654 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
655 | if (mid == p2m_mid_missing) { | 642 | if (mid == p2m_mid_missing) { |
656 | mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | 643 | mid = extend_brk(PAGE_SIZE, PAGE_SIZE); |
657 | 644 | ||
658 | p2m_mid_init(mid, p2m_missing); | 645 | p2m_mid_init(mid, p2m_missing); |
659 | 646 | ||
660 | p2m_top[topidx] = mid; | 647 | p2m_top[topidx] = mid; |
661 | |||
662 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
663 | } | ||
664 | /* And the save/restore P2M tables.. */ | ||
665 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
666 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
667 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); | ||
668 | |||
669 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
670 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
671 | /* Note: we don't set mid_mfn_p[midix] here, | ||
672 | * look in early_alloc_p2m() */ | ||
673 | } | 648 | } |
674 | return true; | 649 | return true; |
675 | } | 650 | } |
@@ -680,14 +655,13 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn) | |||
680 | * replace the P2M leaf with a p2m_missing or p2m_identity. | 655 | * replace the P2M leaf with a p2m_missing or p2m_identity. |
681 | * Stick the old page in the new P2M tree location. | 656 | * Stick the old page in the new P2M tree location. |
682 | */ | 657 | */ |
683 | bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn) | 658 | static bool __init early_can_reuse_p2m_middle(unsigned long set_pfn) |
684 | { | 659 | { |
685 | unsigned topidx; | 660 | unsigned topidx; |
686 | unsigned mididx; | 661 | unsigned mididx; |
687 | unsigned ident_pfns; | 662 | unsigned ident_pfns; |
688 | unsigned inv_pfns; | 663 | unsigned inv_pfns; |
689 | unsigned long *p2m; | 664 | unsigned long *p2m; |
690 | unsigned long *mid_mfn_p; | ||
691 | unsigned idx; | 665 | unsigned idx; |
692 | unsigned long pfn; | 666 | unsigned long pfn; |
693 | 667 | ||
@@ -733,11 +707,6 @@ bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_ | |||
733 | found: | 707 | found: |
734 | /* Found one, replace old with p2m_identity or p2m_missing */ | 708 | /* Found one, replace old with p2m_identity or p2m_missing */ |
735 | p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); | 709 | p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); |
736 | /* And the other for save/restore.. */ | ||
737 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
738 | /* NOTE: Even if it is a p2m_identity it should still be point to | ||
739 | * a page filled with INVALID_P2M_ENTRY entries. */ | ||
740 | mid_mfn_p[mididx] = virt_to_mfn(p2m_missing); | ||
741 | 710 | ||
742 | /* Reset where we want to stick the old page in. */ | 711 | /* Reset where we want to stick the old page in. */ |
743 | topidx = p2m_top_index(set_pfn); | 712 | topidx = p2m_top_index(set_pfn); |
@@ -752,8 +721,6 @@ found: | |||
752 | 721 | ||
753 | p2m_init(p2m); | 722 | p2m_init(p2m); |
754 | p2m_top[topidx][mididx] = p2m; | 723 | p2m_top[topidx][mididx] = p2m; |
755 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
756 | mid_mfn_p[mididx] = virt_to_mfn(p2m); | ||
757 | 724 | ||
758 | return true; | 725 | return true; |
759 | } | 726 | } |
@@ -763,7 +730,7 @@ bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |||
763 | if (!early_alloc_p2m_middle(pfn)) | 730 | if (!early_alloc_p2m_middle(pfn)) |
764 | return false; | 731 | return false; |
765 | 732 | ||
766 | if (early_can_reuse_p2m_middle(pfn, mfn)) | 733 | if (early_can_reuse_p2m_middle(pfn)) |
767 | return __set_phys_to_machine(pfn, mfn); | 734 | return __set_phys_to_machine(pfn, mfn); |
768 | 735 | ||
769 | if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/)) | 736 | if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/)) |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index af7216128d93..29834b3fd87f 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -595,6 +595,7 @@ char * __init xen_memory_setup(void) | |||
595 | rc = 0; | 595 | rc = 0; |
596 | } | 596 | } |
597 | BUG_ON(rc); | 597 | BUG_ON(rc); |
598 | BUG_ON(memmap.nr_entries == 0); | ||
598 | 599 | ||
599 | /* | 600 | /* |
600 | * Xen won't allow a 1:1 mapping to be created to UNUSABLE | 601 | * Xen won't allow a 1:1 mapping to be created to UNUSABLE |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index a1d430b112b3..f473d268d387 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -158,7 +158,7 @@ cycle_t xen_clocksource_read(void) | |||
158 | cycle_t ret; | 158 | cycle_t ret; |
159 | 159 | ||
160 | preempt_disable_notrace(); | 160 | preempt_disable_notrace(); |
161 | src = this_cpu_ptr(&xen_vcpu->time); | 161 | src = &__this_cpu_read(xen_vcpu)->time; |
162 | ret = pvclock_clocksource_read(src); | 162 | ret = pvclock_clocksource_read(src); |
163 | preempt_enable_notrace(); | 163 | preempt_enable_notrace(); |
164 | return ret; | 164 | return ret; |