aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorOlof Johansson <olof@lixom.net>2014-11-02 16:36:05 -0500
committerOlof Johansson <olof@lixom.net>2014-11-02 16:37:07 -0500
commit4257412db57900e43716d0b7ddd4f4a51e6ed2f4 (patch)
tree759963245a484422e9ad2639cb223b53f844ff15 /arch/x86
parentcc040ba269ae6972face1dc7376ab3eaab9f64c8 (diff)
parent4b91f7f3c8b20e073b7bfc098625b37f99789508 (diff)
Merge tag 'fixes-against-v3.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
Merge "omap fixes against v3.18-rc2" from Tony Lindgren: Few fixes for omaps to enable NAND BCH so devices won't produce errors when booted with omap2plus_defconfig, and reduce bloat by making IPV6 a loadable module. Also let's add a warning about legacy boot being deprecated for omap3. We now have things working with device tree, and only omap3 is still booting in legacy mode. So hopefully this warning will help move the remaining legacy mode users to boot with device tree. As the total reduction of code and static data is somewhere around 20000 lines of code once we remove omap3 legacy mode booting, we really do want to make omap3 to boot also in device tree mode only over the next few merge cycles. * tag 'fixes-against-v3.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap: (407 commits) ARM: OMAP2+: Warn about deprecated legacy booting mode ARM: omap2plus_defconfig: Fix errors with NAND BCH ARM: omap2plus_defconfig: Fix bloat caused by having ipv6 built-in + Linux 3.18-rc2 Signed-off-by: Olof Johansson <olof@lixom.net>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/eboot.c32
-rw-r--r--arch/x86/include/asm/efi.h31
-rw-r--r--arch/x86/include/asm/kvm_host.h16
-rw-r--r--arch/x86/include/uapi/asm/vmx.h2
-rw-r--r--arch/x86/kvm/emulate.c250
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/svm.c8
-rw-r--r--arch/x86/kvm/vmx.c24
-rw-r--r--arch/x86/kvm/x86.c38
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c36
-rw-r--r--arch/x86/platform/efi/efi.c52
-rw-r--r--arch/x86/platform/efi/efi_32.c12
-rw-r--r--arch/x86/platform/efi/efi_64.c6
-rw-r--r--arch/x86/platform/efi/efi_stub_32.S4
-rw-r--r--arch/x86/platform/intel-mid/intel_mid_weak_decls.h7
-rw-r--r--arch/x86/xen/enlighten.c3
-rw-r--r--arch/x86/xen/mmu.c5
-rw-r--r--arch/x86/xen/p2m.c83
-rw-r--r--arch/x86/xen/setup.c1
-rw-r--r--arch/x86/xen/time.c2
21 files changed, 395 insertions, 221 deletions
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index de8eebd6f67c..1acf605a646d 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -330,8 +330,10 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
330 size = pci->romsize + sizeof(*rom); 330 size = pci->romsize + sizeof(*rom);
331 331
332 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); 332 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
333 if (status != EFI_SUCCESS) 333 if (status != EFI_SUCCESS) {
334 efi_printk(sys_table, "Failed to alloc mem for rom\n");
334 return status; 335 return status;
336 }
335 337
336 memset(rom, 0, sizeof(*rom)); 338 memset(rom, 0, sizeof(*rom));
337 339
@@ -344,14 +346,18 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
344 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, 346 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
345 PCI_VENDOR_ID, 1, &(rom->vendor)); 347 PCI_VENDOR_ID, 1, &(rom->vendor));
346 348
347 if (status != EFI_SUCCESS) 349 if (status != EFI_SUCCESS) {
350 efi_printk(sys_table, "Failed to read rom->vendor\n");
348 goto free_struct; 351 goto free_struct;
352 }
349 353
350 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, 354 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
351 PCI_DEVICE_ID, 1, &(rom->devid)); 355 PCI_DEVICE_ID, 1, &(rom->devid));
352 356
353 if (status != EFI_SUCCESS) 357 if (status != EFI_SUCCESS) {
358 efi_printk(sys_table, "Failed to read rom->devid\n");
354 goto free_struct; 359 goto free_struct;
360 }
355 361
356 status = efi_early->call(pci->get_location, pci, &(rom->segment), 362 status = efi_early->call(pci->get_location, pci, &(rom->segment),
357 &(rom->bus), &(rom->device), &(rom->function)); 363 &(rom->bus), &(rom->device), &(rom->function));
@@ -432,8 +438,10 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom)
432 size = pci->romsize + sizeof(*rom); 438 size = pci->romsize + sizeof(*rom);
433 439
434 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); 440 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
435 if (status != EFI_SUCCESS) 441 if (status != EFI_SUCCESS) {
442 efi_printk(sys_table, "Failed to alloc mem for rom\n");
436 return status; 443 return status;
444 }
437 445
438 rom->data.type = SETUP_PCI; 446 rom->data.type = SETUP_PCI;
439 rom->data.len = size - sizeof(struct setup_data); 447 rom->data.len = size - sizeof(struct setup_data);
@@ -444,14 +452,18 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom)
444 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, 452 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
445 PCI_VENDOR_ID, 1, &(rom->vendor)); 453 PCI_VENDOR_ID, 1, &(rom->vendor));
446 454
447 if (status != EFI_SUCCESS) 455 if (status != EFI_SUCCESS) {
456 efi_printk(sys_table, "Failed to read rom->vendor\n");
448 goto free_struct; 457 goto free_struct;
458 }
449 459
450 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, 460 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
451 PCI_DEVICE_ID, 1, &(rom->devid)); 461 PCI_DEVICE_ID, 1, &(rom->devid));
452 462
453 if (status != EFI_SUCCESS) 463 if (status != EFI_SUCCESS) {
464 efi_printk(sys_table, "Failed to read rom->devid\n");
454 goto free_struct; 465 goto free_struct;
466 }
455 467
456 status = efi_early->call(pci->get_location, pci, &(rom->segment), 468 status = efi_early->call(pci->get_location, pci, &(rom->segment),
457 &(rom->bus), &(rom->device), &(rom->function)); 469 &(rom->bus), &(rom->device), &(rom->function));
@@ -538,8 +550,10 @@ static void setup_efi_pci(struct boot_params *params)
538 EFI_LOADER_DATA, 550 EFI_LOADER_DATA,
539 size, (void **)&pci_handle); 551 size, (void **)&pci_handle);
540 552
541 if (status != EFI_SUCCESS) 553 if (status != EFI_SUCCESS) {
554 efi_printk(sys_table, "Failed to alloc mem for pci_handle\n");
542 return; 555 return;
556 }
543 557
544 status = efi_call_early(locate_handle, 558 status = efi_call_early(locate_handle,
545 EFI_LOCATE_BY_PROTOCOL, &pci_proto, 559 EFI_LOCATE_BY_PROTOCOL, &pci_proto,
@@ -1105,6 +1119,10 @@ struct boot_params *make_boot_params(struct efi_config *c)
1105 1119
1106 memset(sdt, 0, sizeof(*sdt)); 1120 memset(sdt, 0, sizeof(*sdt));
1107 1121
1122 status = efi_parse_options(cmdline_ptr);
1123 if (status != EFI_SUCCESS)
1124 goto fail2;
1125
1108 status = handle_cmdline_files(sys_table, image, 1126 status = handle_cmdline_files(sys_table, image,
1109 (char *)(unsigned long)hdr->cmd_line_ptr, 1127 (char *)(unsigned long)hdr->cmd_line_ptr,
1110 "initrd=", hdr->initrd_addr_max, 1128 "initrd=", hdr->initrd_addr_max,
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 0ec241ede5a2..9b11757975d0 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -81,24 +81,23 @@ extern u64 asmlinkage efi_call(void *fp, ...);
81 */ 81 */
82#define __efi_call_virt(f, args...) efi_call_virt(f, args) 82#define __efi_call_virt(f, args...) efi_call_virt(f, args)
83 83
84extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, 84extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
85 u32 type, u64 attribute); 85 u32 type, u64 attribute);
86 86
87#endif /* CONFIG_X86_32 */ 87#endif /* CONFIG_X86_32 */
88 88
89extern int add_efi_memmap;
90extern struct efi_scratch efi_scratch; 89extern struct efi_scratch efi_scratch;
91extern void efi_set_executable(efi_memory_desc_t *md, bool executable); 90extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
92extern int efi_memblock_x86_reserve_range(void); 91extern int __init efi_memblock_x86_reserve_range(void);
93extern void efi_call_phys_prelog(void); 92extern void __init efi_call_phys_prolog(void);
94extern void efi_call_phys_epilog(void); 93extern void __init efi_call_phys_epilog(void);
95extern void efi_unmap_memmap(void); 94extern void __init efi_unmap_memmap(void);
96extern void efi_memory_uc(u64 addr, unsigned long size); 95extern void __init efi_memory_uc(u64 addr, unsigned long size);
97extern void __init efi_map_region(efi_memory_desc_t *md); 96extern void __init efi_map_region(efi_memory_desc_t *md);
98extern void __init efi_map_region_fixed(efi_memory_desc_t *md); 97extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
99extern void efi_sync_low_kernel_mappings(void); 98extern void efi_sync_low_kernel_mappings(void);
100extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); 99extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
101extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); 100extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
102extern void __init old_map_region(efi_memory_desc_t *md); 101extern void __init old_map_region(efi_memory_desc_t *md);
103extern void __init runtime_code_page_mkexec(void); 102extern void __init runtime_code_page_mkexec(void);
104extern void __init efi_runtime_mkexec(void); 103extern void __init efi_runtime_mkexec(void);
@@ -162,16 +161,6 @@ static inline efi_status_t efi_thunk_set_virtual_address_map(
162extern bool efi_reboot_required(void); 161extern bool efi_reboot_required(void);
163 162
164#else 163#else
165/*
166 * IF EFI is not configured, have the EFI calls return -ENOSYS.
167 */
168#define efi_call0(_f) (-ENOSYS)
169#define efi_call1(_f, _a1) (-ENOSYS)
170#define efi_call2(_f, _a1, _a2) (-ENOSYS)
171#define efi_call3(_f, _a1, _a2, _a3) (-ENOSYS)
172#define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS)
173#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS)
174#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS)
175static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} 164static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
176static inline bool efi_reboot_required(void) 165static inline bool efi_reboot_required(void)
177{ 166{
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7d603a71ab3a..6ed0c30d6a0c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -989,6 +989,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
989 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); 989 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
990} 990}
991 991
992static inline u64 get_canonical(u64 la)
993{
994 return ((int64_t)la << 16) >> 16;
995}
996
997static inline bool is_noncanonical_address(u64 la)
998{
999#ifdef CONFIG_X86_64
1000 return get_canonical(la) != la;
1001#else
1002 return false;
1003#endif
1004}
1005
992#define TSS_IOPB_BASE_OFFSET 0x66 1006#define TSS_IOPB_BASE_OFFSET 0x66
993#define TSS_BASE_SIZE 0x68 1007#define TSS_BASE_SIZE 0x68
994#define TSS_IOPB_SIZE (65536 / 8) 1008#define TSS_IOPB_SIZE (65536 / 8)
@@ -1050,7 +1064,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
1050 unsigned long address); 1064 unsigned long address);
1051 1065
1052void kvm_define_shared_msr(unsigned index, u32 msr); 1066void kvm_define_shared_msr(unsigned index, u32 msr);
1053void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); 1067int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
1054 1068
1055bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); 1069bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
1056 1070
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 0e79420376eb..990a2fe1588d 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -67,6 +67,7 @@
67#define EXIT_REASON_EPT_MISCONFIG 49 67#define EXIT_REASON_EPT_MISCONFIG 49
68#define EXIT_REASON_INVEPT 50 68#define EXIT_REASON_INVEPT 50
69#define EXIT_REASON_PREEMPTION_TIMER 52 69#define EXIT_REASON_PREEMPTION_TIMER 52
70#define EXIT_REASON_INVVPID 53
70#define EXIT_REASON_WBINVD 54 71#define EXIT_REASON_WBINVD 54
71#define EXIT_REASON_XSETBV 55 72#define EXIT_REASON_XSETBV 55
72#define EXIT_REASON_APIC_WRITE 56 73#define EXIT_REASON_APIC_WRITE 56
@@ -114,6 +115,7 @@
114 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ 115 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
115 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ 116 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
116 { EXIT_REASON_INVD, "INVD" }, \ 117 { EXIT_REASON_INVD, "INVD" }, \
118 { EXIT_REASON_INVVPID, "INVVPID" }, \
117 { EXIT_REASON_INVPCID, "INVPCID" } 119 { EXIT_REASON_INVPCID, "INVPCID" }
118 120
119#endif /* _UAPIVMX_H */ 121#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a46207a05835..749f9fa38254 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -504,11 +504,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
504 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); 504 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
505} 505}
506 506
507static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
508{
509 register_address_increment(ctxt, &ctxt->_eip, rel);
510}
511
512static u32 desc_limit_scaled(struct desc_struct *desc) 507static u32 desc_limit_scaled(struct desc_struct *desc)
513{ 508{
514 u32 limit = get_desc_limit(desc); 509 u32 limit = get_desc_limit(desc);
@@ -569,6 +564,38 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
569 return emulate_exception(ctxt, NM_VECTOR, 0, false); 564 return emulate_exception(ctxt, NM_VECTOR, 0, false);
570} 565}
571 566
567static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
568 int cs_l)
569{
570 switch (ctxt->op_bytes) {
571 case 2:
572 ctxt->_eip = (u16)dst;
573 break;
574 case 4:
575 ctxt->_eip = (u32)dst;
576 break;
577 case 8:
578 if ((cs_l && is_noncanonical_address(dst)) ||
579 (!cs_l && (dst & ~(u32)-1)))
580 return emulate_gp(ctxt, 0);
581 ctxt->_eip = dst;
582 break;
583 default:
584 WARN(1, "unsupported eip assignment size\n");
585 }
586 return X86EMUL_CONTINUE;
587}
588
589static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
590{
591 return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
592}
593
594static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
595{
596 return assign_eip_near(ctxt, ctxt->_eip + rel);
597}
598
572static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) 599static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
573{ 600{
574 u16 selector; 601 u16 selector;
@@ -751,8 +778,10 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
751static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, 778static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
752 unsigned size) 779 unsigned size)
753{ 780{
754 if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size)) 781 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
755 return __do_insn_fetch_bytes(ctxt, size); 782
783 if (unlikely(done_size < size))
784 return __do_insn_fetch_bytes(ctxt, size - done_size);
756 else 785 else
757 return X86EMUL_CONTINUE; 786 return X86EMUL_CONTINUE;
758} 787}
@@ -1416,7 +1445,9 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1416 1445
1417/* Does not support long mode */ 1446/* Does not support long mode */
1418static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, 1447static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1419 u16 selector, int seg, u8 cpl, bool in_task_switch) 1448 u16 selector, int seg, u8 cpl,
1449 bool in_task_switch,
1450 struct desc_struct *desc)
1420{ 1451{
1421 struct desc_struct seg_desc, old_desc; 1452 struct desc_struct seg_desc, old_desc;
1422 u8 dpl, rpl; 1453 u8 dpl, rpl;
@@ -1557,6 +1588,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1557 } 1588 }
1558load: 1589load:
1559 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); 1590 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1591 if (desc)
1592 *desc = seg_desc;
1560 return X86EMUL_CONTINUE; 1593 return X86EMUL_CONTINUE;
1561exception: 1594exception:
1562 return emulate_exception(ctxt, err_vec, err_code, true); 1595 return emulate_exception(ctxt, err_vec, err_code, true);
@@ -1566,7 +1599,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1566 u16 selector, int seg) 1599 u16 selector, int seg)
1567{ 1600{
1568 u8 cpl = ctxt->ops->cpl(ctxt); 1601 u8 cpl = ctxt->ops->cpl(ctxt);
1569 return __load_segment_descriptor(ctxt, selector, seg, cpl, false); 1602 return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL);
1570} 1603}
1571 1604
1572static void write_register_operand(struct operand *op) 1605static void write_register_operand(struct operand *op)
@@ -1960,17 +1993,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
1960static int em_jmp_far(struct x86_emulate_ctxt *ctxt) 1993static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
1961{ 1994{
1962 int rc; 1995 int rc;
1963 unsigned short sel; 1996 unsigned short sel, old_sel;
1997 struct desc_struct old_desc, new_desc;
1998 const struct x86_emulate_ops *ops = ctxt->ops;
1999 u8 cpl = ctxt->ops->cpl(ctxt);
2000
2001 /* Assignment of RIP may only fail in 64-bit mode */
2002 if (ctxt->mode == X86EMUL_MODE_PROT64)
2003 ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
2004 VCPU_SREG_CS);
1964 2005
1965 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); 2006 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
1966 2007
1967 rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); 2008 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
2009 &new_desc);
1968 if (rc != X86EMUL_CONTINUE) 2010 if (rc != X86EMUL_CONTINUE)
1969 return rc; 2011 return rc;
1970 2012
1971 ctxt->_eip = 0; 2013 rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
1972 memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); 2014 if (rc != X86EMUL_CONTINUE) {
1973 return X86EMUL_CONTINUE; 2015 WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
2016 /* assigning eip failed; restore the old cs */
2017 ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
2018 return rc;
2019 }
2020 return rc;
1974} 2021}
1975 2022
1976static int em_grp45(struct x86_emulate_ctxt *ctxt) 2023static int em_grp45(struct x86_emulate_ctxt *ctxt)
@@ -1981,13 +2028,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
1981 case 2: /* call near abs */ { 2028 case 2: /* call near abs */ {
1982 long int old_eip; 2029 long int old_eip;
1983 old_eip = ctxt->_eip; 2030 old_eip = ctxt->_eip;
1984 ctxt->_eip = ctxt->src.val; 2031 rc = assign_eip_near(ctxt, ctxt->src.val);
2032 if (rc != X86EMUL_CONTINUE)
2033 break;
1985 ctxt->src.val = old_eip; 2034 ctxt->src.val = old_eip;
1986 rc = em_push(ctxt); 2035 rc = em_push(ctxt);
1987 break; 2036 break;
1988 } 2037 }
1989 case 4: /* jmp abs */ 2038 case 4: /* jmp abs */
1990 ctxt->_eip = ctxt->src.val; 2039 rc = assign_eip_near(ctxt, ctxt->src.val);
1991 break; 2040 break;
1992 case 5: /* jmp far */ 2041 case 5: /* jmp far */
1993 rc = em_jmp_far(ctxt); 2042 rc = em_jmp_far(ctxt);
@@ -2022,30 +2071,47 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2022 2071
2023static int em_ret(struct x86_emulate_ctxt *ctxt) 2072static int em_ret(struct x86_emulate_ctxt *ctxt)
2024{ 2073{
2025 ctxt->dst.type = OP_REG; 2074 int rc;
2026 ctxt->dst.addr.reg = &ctxt->_eip; 2075 unsigned long eip;
2027 ctxt->dst.bytes = ctxt->op_bytes; 2076
2028 return em_pop(ctxt); 2077 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2078 if (rc != X86EMUL_CONTINUE)
2079 return rc;
2080
2081 return assign_eip_near(ctxt, eip);
2029} 2082}
2030 2083
2031static int em_ret_far(struct x86_emulate_ctxt *ctxt) 2084static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2032{ 2085{
2033 int rc; 2086 int rc;
2034 unsigned long cs; 2087 unsigned long eip, cs;
2088 u16 old_cs;
2035 int cpl = ctxt->ops->cpl(ctxt); 2089 int cpl = ctxt->ops->cpl(ctxt);
2090 struct desc_struct old_desc, new_desc;
2091 const struct x86_emulate_ops *ops = ctxt->ops;
2036 2092
2037 rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); 2093 if (ctxt->mode == X86EMUL_MODE_PROT64)
2094 ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
2095 VCPU_SREG_CS);
2096
2097 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2038 if (rc != X86EMUL_CONTINUE) 2098 if (rc != X86EMUL_CONTINUE)
2039 return rc; 2099 return rc;
2040 if (ctxt->op_bytes == 4)
2041 ctxt->_eip = (u32)ctxt->_eip;
2042 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); 2100 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2043 if (rc != X86EMUL_CONTINUE) 2101 if (rc != X86EMUL_CONTINUE)
2044 return rc; 2102 return rc;
2045 /* Outer-privilege level return is not implemented */ 2103 /* Outer-privilege level return is not implemented */
2046 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) 2104 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2047 return X86EMUL_UNHANDLEABLE; 2105 return X86EMUL_UNHANDLEABLE;
2048 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); 2106 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
2107 &new_desc);
2108 if (rc != X86EMUL_CONTINUE)
2109 return rc;
2110 rc = assign_eip_far(ctxt, eip, new_desc.l);
2111 if (rc != X86EMUL_CONTINUE) {
2112 WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
2113 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
2114 }
2049 return rc; 2115 return rc;
2050} 2116}
2051 2117
@@ -2306,7 +2372,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2306{ 2372{
2307 const struct x86_emulate_ops *ops = ctxt->ops; 2373 const struct x86_emulate_ops *ops = ctxt->ops;
2308 struct desc_struct cs, ss; 2374 struct desc_struct cs, ss;
2309 u64 msr_data; 2375 u64 msr_data, rcx, rdx;
2310 int usermode; 2376 int usermode;
2311 u16 cs_sel = 0, ss_sel = 0; 2377 u16 cs_sel = 0, ss_sel = 0;
2312 2378
@@ -2322,6 +2388,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2322 else 2388 else
2323 usermode = X86EMUL_MODE_PROT32; 2389 usermode = X86EMUL_MODE_PROT32;
2324 2390
2391 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2392 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2393
2325 cs.dpl = 3; 2394 cs.dpl = 3;
2326 ss.dpl = 3; 2395 ss.dpl = 3;
2327 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); 2396 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
@@ -2339,6 +2408,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2339 ss_sel = cs_sel + 8; 2408 ss_sel = cs_sel + 8;
2340 cs.d = 0; 2409 cs.d = 0;
2341 cs.l = 1; 2410 cs.l = 1;
2411 if (is_noncanonical_address(rcx) ||
2412 is_noncanonical_address(rdx))
2413 return emulate_gp(ctxt, 0);
2342 break; 2414 break;
2343 } 2415 }
2344 cs_sel |= SELECTOR_RPL_MASK; 2416 cs_sel |= SELECTOR_RPL_MASK;
@@ -2347,8 +2419,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2347 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); 2419 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2348 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); 2420 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2349 2421
2350 ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); 2422 ctxt->_eip = rdx;
2351 *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); 2423 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2352 2424
2353 return X86EMUL_CONTINUE; 2425 return X86EMUL_CONTINUE;
2354} 2426}
@@ -2466,19 +2538,24 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2466 * Now load segment descriptors. If fault happens at this stage 2538 * Now load segment descriptors. If fault happens at this stage
2467 * it is handled in a context of new task 2539 * it is handled in a context of new task
2468 */ 2540 */
2469 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true); 2541 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2542 true, NULL);
2470 if (ret != X86EMUL_CONTINUE) 2543 if (ret != X86EMUL_CONTINUE)
2471 return ret; 2544 return ret;
2472 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); 2545 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2546 true, NULL);
2473 if (ret != X86EMUL_CONTINUE) 2547 if (ret != X86EMUL_CONTINUE)
2474 return ret; 2548 return ret;
2475 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); 2549 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2550 true, NULL);
2476 if (ret != X86EMUL_CONTINUE) 2551 if (ret != X86EMUL_CONTINUE)
2477 return ret; 2552 return ret;
2478 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); 2553 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2554 true, NULL);
2479 if (ret != X86EMUL_CONTINUE) 2555 if (ret != X86EMUL_CONTINUE)
2480 return ret; 2556 return ret;
2481 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); 2557 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2558 true, NULL);
2482 if (ret != X86EMUL_CONTINUE) 2559 if (ret != X86EMUL_CONTINUE)
2483 return ret; 2560 return ret;
2484 2561
@@ -2603,25 +2680,32 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2603 * Now load segment descriptors. If fault happenes at this stage 2680 * Now load segment descriptors. If fault happenes at this stage
2604 * it is handled in a context of new task 2681 * it is handled in a context of new task
2605 */ 2682 */
2606 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true); 2683 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
2684 cpl, true, NULL);
2607 if (ret != X86EMUL_CONTINUE) 2685 if (ret != X86EMUL_CONTINUE)
2608 return ret; 2686 return ret;
2609 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); 2687 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2688 true, NULL);
2610 if (ret != X86EMUL_CONTINUE) 2689 if (ret != X86EMUL_CONTINUE)
2611 return ret; 2690 return ret;
2612 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); 2691 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2692 true, NULL);
2613 if (ret != X86EMUL_CONTINUE) 2693 if (ret != X86EMUL_CONTINUE)
2614 return ret; 2694 return ret;
2615 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); 2695 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2696 true, NULL);
2616 if (ret != X86EMUL_CONTINUE) 2697 if (ret != X86EMUL_CONTINUE)
2617 return ret; 2698 return ret;
2618 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); 2699 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2700 true, NULL);
2619 if (ret != X86EMUL_CONTINUE) 2701 if (ret != X86EMUL_CONTINUE)
2620 return ret; 2702 return ret;
2621 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true); 2703 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
2704 true, NULL);
2622 if (ret != X86EMUL_CONTINUE) 2705 if (ret != X86EMUL_CONTINUE)
2623 return ret; 2706 return ret;
2624 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true); 2707 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
2708 true, NULL);
2625 if (ret != X86EMUL_CONTINUE) 2709 if (ret != X86EMUL_CONTINUE)
2626 return ret; 2710 return ret;
2627 2711
@@ -2888,10 +2972,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt)
2888 2972
2889static int em_call(struct x86_emulate_ctxt *ctxt) 2973static int em_call(struct x86_emulate_ctxt *ctxt)
2890{ 2974{
2975 int rc;
2891 long rel = ctxt->src.val; 2976 long rel = ctxt->src.val;
2892 2977
2893 ctxt->src.val = (unsigned long)ctxt->_eip; 2978 ctxt->src.val = (unsigned long)ctxt->_eip;
2894 jmp_rel(ctxt, rel); 2979 rc = jmp_rel(ctxt, rel);
2980 if (rc != X86EMUL_CONTINUE)
2981 return rc;
2895 return em_push(ctxt); 2982 return em_push(ctxt);
2896} 2983}
2897 2984
@@ -2900,34 +2987,50 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
2900 u16 sel, old_cs; 2987 u16 sel, old_cs;
2901 ulong old_eip; 2988 ulong old_eip;
2902 int rc; 2989 int rc;
2990 struct desc_struct old_desc, new_desc;
2991 const struct x86_emulate_ops *ops = ctxt->ops;
2992 int cpl = ctxt->ops->cpl(ctxt);
2903 2993
2904 old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2905 old_eip = ctxt->_eip; 2994 old_eip = ctxt->_eip;
2995 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
2906 2996
2907 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); 2997 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2908 if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS)) 2998 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
2999 &new_desc);
3000 if (rc != X86EMUL_CONTINUE)
2909 return X86EMUL_CONTINUE; 3001 return X86EMUL_CONTINUE;
2910 3002
2911 ctxt->_eip = 0; 3003 rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
2912 memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); 3004 if (rc != X86EMUL_CONTINUE)
3005 goto fail;
2913 3006
2914 ctxt->src.val = old_cs; 3007 ctxt->src.val = old_cs;
2915 rc = em_push(ctxt); 3008 rc = em_push(ctxt);
2916 if (rc != X86EMUL_CONTINUE) 3009 if (rc != X86EMUL_CONTINUE)
2917 return rc; 3010 goto fail;
2918 3011
2919 ctxt->src.val = old_eip; 3012 ctxt->src.val = old_eip;
2920 return em_push(ctxt); 3013 rc = em_push(ctxt);
3014 /* If we failed, we tainted the memory, but the very least we should
3015 restore cs */
3016 if (rc != X86EMUL_CONTINUE)
3017 goto fail;
3018 return rc;
3019fail:
3020 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3021 return rc;
3022
2921} 3023}
2922 3024
2923static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) 3025static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
2924{ 3026{
2925 int rc; 3027 int rc;
3028 unsigned long eip;
2926 3029
2927 ctxt->dst.type = OP_REG; 3030 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2928 ctxt->dst.addr.reg = &ctxt->_eip; 3031 if (rc != X86EMUL_CONTINUE)
2929 ctxt->dst.bytes = ctxt->op_bytes; 3032 return rc;
2930 rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); 3033 rc = assign_eip_near(ctxt, eip);
2931 if (rc != X86EMUL_CONTINUE) 3034 if (rc != X86EMUL_CONTINUE)
2932 return rc; 3035 return rc;
2933 rsp_increment(ctxt, ctxt->src.val); 3036 rsp_increment(ctxt, ctxt->src.val);
@@ -3254,20 +3357,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3254 3357
3255static int em_loop(struct x86_emulate_ctxt *ctxt) 3358static int em_loop(struct x86_emulate_ctxt *ctxt)
3256{ 3359{
3360 int rc = X86EMUL_CONTINUE;
3361
3257 register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); 3362 register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
3258 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && 3363 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3259 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) 3364 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3260 jmp_rel(ctxt, ctxt->src.val); 3365 rc = jmp_rel(ctxt, ctxt->src.val);
3261 3366
3262 return X86EMUL_CONTINUE; 3367 return rc;
3263} 3368}
3264 3369
3265static int em_jcxz(struct x86_emulate_ctxt *ctxt) 3370static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3266{ 3371{
3372 int rc = X86EMUL_CONTINUE;
3373
3267 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) 3374 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3268 jmp_rel(ctxt, ctxt->src.val); 3375 rc = jmp_rel(ctxt, ctxt->src.val);
3269 3376
3270 return X86EMUL_CONTINUE; 3377 return rc;
3271} 3378}
3272 3379
3273static int em_in(struct x86_emulate_ctxt *ctxt) 3380static int em_in(struct x86_emulate_ctxt *ctxt)
@@ -3355,6 +3462,12 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt)
3355 return X86EMUL_CONTINUE; 3462 return X86EMUL_CONTINUE;
3356} 3463}
3357 3464
3465static int em_clflush(struct x86_emulate_ctxt *ctxt)
3466{
3467 /* emulating clflush regardless of cpuid */
3468 return X86EMUL_CONTINUE;
3469}
3470
3358static bool valid_cr(int nr) 3471static bool valid_cr(int nr)
3359{ 3472{
3360 switch (nr) { 3473 switch (nr) {
@@ -3693,6 +3806,16 @@ static const struct opcode group11[] = {
3693 X7(D(Undefined)), 3806 X7(D(Undefined)),
3694}; 3807};
3695 3808
3809static const struct gprefix pfx_0f_ae_7 = {
3810 I(SrcMem | ByteOp, em_clflush), N, N, N,
3811};
3812
3813static const struct group_dual group15 = { {
3814 N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7),
3815}, {
3816 N, N, N, N, N, N, N, N,
3817} };
3818
3696static const struct gprefix pfx_0f_6f_0f_7f = { 3819static const struct gprefix pfx_0f_6f_0f_7f = {
3697 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), 3820 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
3698}; 3821};
@@ -3901,10 +4024,11 @@ static const struct opcode twobyte_table[256] = {
3901 N, I(ImplicitOps | EmulateOnUD, em_syscall), 4024 N, I(ImplicitOps | EmulateOnUD, em_syscall),
3902 II(ImplicitOps | Priv, em_clts, clts), N, 4025 II(ImplicitOps | Priv, em_clts, clts), N,
3903 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, 4026 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
3904 N, D(ImplicitOps | ModRM), N, N, 4027 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
3905 /* 0x10 - 0x1F */ 4028 /* 0x10 - 0x1F */
3906 N, N, N, N, N, N, N, N, 4029 N, N, N, N, N, N, N, N,
3907 D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM), 4030 D(ImplicitOps | ModRM | SrcMem | NoAccess),
4031 N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
3908 /* 0x20 - 0x2F */ 4032 /* 0x20 - 0x2F */
3909 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), 4033 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
3910 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), 4034 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
@@ -3956,7 +4080,7 @@ static const struct opcode twobyte_table[256] = {
3956 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), 4080 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
3957 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), 4081 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
3958 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), 4082 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
3959 D(ModRM), F(DstReg | SrcMem | ModRM, em_imul), 4083 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
3960 /* 0xB0 - 0xB7 */ 4084 /* 0xB0 - 0xB7 */
3961 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), 4085 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
3962 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), 4086 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
@@ -4473,10 +4597,10 @@ done_prefixes:
4473 /* Decode and fetch the destination operand: register or memory. */ 4597 /* Decode and fetch the destination operand: register or memory. */
4474 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); 4598 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
4475 4599
4476done:
4477 if (ctxt->rip_relative) 4600 if (ctxt->rip_relative)
4478 ctxt->memopp->addr.mem.ea += ctxt->_eip; 4601 ctxt->memopp->addr.mem.ea += ctxt->_eip;
4479 4602
4603done:
4480 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; 4604 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
4481} 4605}
4482 4606
@@ -4726,7 +4850,7 @@ special_insn:
4726 break; 4850 break;
4727 case 0x70 ... 0x7f: /* jcc (short) */ 4851 case 0x70 ... 0x7f: /* jcc (short) */
4728 if (test_cc(ctxt->b, ctxt->eflags)) 4852 if (test_cc(ctxt->b, ctxt->eflags))
4729 jmp_rel(ctxt, ctxt->src.val); 4853 rc = jmp_rel(ctxt, ctxt->src.val);
4730 break; 4854 break;
4731 case 0x8d: /* lea r16/r32, m */ 4855 case 0x8d: /* lea r16/r32, m */
4732 ctxt->dst.val = ctxt->src.addr.mem.ea; 4856 ctxt->dst.val = ctxt->src.addr.mem.ea;
@@ -4756,7 +4880,7 @@ special_insn:
4756 break; 4880 break;
4757 case 0xe9: /* jmp rel */ 4881 case 0xe9: /* jmp rel */
4758 case 0xeb: /* jmp rel short */ 4882 case 0xeb: /* jmp rel short */
4759 jmp_rel(ctxt, ctxt->src.val); 4883 rc = jmp_rel(ctxt, ctxt->src.val);
4760 ctxt->dst.type = OP_NONE; /* Disable writeback. */ 4884 ctxt->dst.type = OP_NONE; /* Disable writeback. */
4761 break; 4885 break;
4762 case 0xf4: /* hlt */ 4886 case 0xf4: /* hlt */
@@ -4881,13 +5005,11 @@ twobyte_insn:
4881 break; 5005 break;
4882 case 0x80 ... 0x8f: /* jnz rel, etc*/ 5006 case 0x80 ... 0x8f: /* jnz rel, etc*/
4883 if (test_cc(ctxt->b, ctxt->eflags)) 5007 if (test_cc(ctxt->b, ctxt->eflags))
4884 jmp_rel(ctxt, ctxt->src.val); 5008 rc = jmp_rel(ctxt, ctxt->src.val);
4885 break; 5009 break;
4886 case 0x90 ... 0x9f: /* setcc r/m8 */ 5010 case 0x90 ... 0x9f: /* setcc r/m8 */
4887 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); 5011 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
4888 break; 5012 break;
4889 case 0xae: /* clflush */
4890 break;
4891 case 0xb6 ... 0xb7: /* movzx */ 5013 case 0xb6 ... 0xb7: /* movzx */
4892 ctxt->dst.bytes = ctxt->op_bytes; 5014 ctxt->dst.bytes = ctxt->op_bytes;
4893 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val 5015 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 518d86471b76..298781d4cfb4 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
262 return; 262 return;
263 263
264 timer = &pit->pit_state.timer; 264 timer = &pit->pit_state.timer;
265 mutex_lock(&pit->pit_state.lock);
265 if (hrtimer_cancel(timer)) 266 if (hrtimer_cancel(timer))
266 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 267 hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
268 mutex_unlock(&pit->pit_state.lock);
267} 269}
268 270
269static void destroy_pit_timer(struct kvm_pit *pit) 271static void destroy_pit_timer(struct kvm_pit *pit)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 806d58e3c320..fd49c867b25a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -298,7 +298,7 @@ retry_walk:
298 } 298 }
299#endif 299#endif
300 walker->max_level = walker->level; 300 walker->max_level = walker->level;
301 ASSERT(!is_long_mode(vcpu) && is_pae(vcpu)); 301 ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
302 302
303 accessed_dirty = PT_GUEST_ACCESSED_MASK; 303 accessed_dirty = PT_GUEST_ACCESSED_MASK;
304 pt_access = pte_access = ACC_ALL; 304 pt_access = pte_access = ACC_ALL;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 65510f624dfe..7527cefc5a43 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3251,7 +3251,7 @@ static int wrmsr_interception(struct vcpu_svm *svm)
3251 msr.host_initiated = false; 3251 msr.host_initiated = false;
3252 3252
3253 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 3253 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3254 if (svm_set_msr(&svm->vcpu, &msr)) { 3254 if (kvm_set_msr(&svm->vcpu, &msr)) {
3255 trace_kvm_msr_write_ex(ecx, data); 3255 trace_kvm_msr_write_ex(ecx, data);
3256 kvm_inject_gp(&svm->vcpu, 0); 3256 kvm_inject_gp(&svm->vcpu, 0);
3257 } else { 3257 } else {
@@ -3551,9 +3551,9 @@ static int handle_exit(struct kvm_vcpu *vcpu)
3551 3551
3552 if (exit_code >= ARRAY_SIZE(svm_exit_handlers) 3552 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
3553 || !svm_exit_handlers[exit_code]) { 3553 || !svm_exit_handlers[exit_code]) {
3554 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 3554 WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
3555 kvm_run->hw.hardware_exit_reason = exit_code; 3555 kvm_queue_exception(vcpu, UD_VECTOR);
3556 return 0; 3556 return 1;
3557 } 3557 }
3558 3558
3559 return svm_exit_handlers[exit_code](svm); 3559 return svm_exit_handlers[exit_code](svm);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0acac81f198b..a8b76c4c95e2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2659,12 +2659,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2659 default: 2659 default:
2660 msr = find_msr_entry(vmx, msr_index); 2660 msr = find_msr_entry(vmx, msr_index);
2661 if (msr) { 2661 if (msr) {
2662 u64 old_msr_data = msr->data;
2662 msr->data = data; 2663 msr->data = data;
2663 if (msr - vmx->guest_msrs < vmx->save_nmsrs) { 2664 if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
2664 preempt_disable(); 2665 preempt_disable();
2665 kvm_set_shared_msr(msr->index, msr->data, 2666 ret = kvm_set_shared_msr(msr->index, msr->data,
2666 msr->mask); 2667 msr->mask);
2667 preempt_enable(); 2668 preempt_enable();
2669 if (ret)
2670 msr->data = old_msr_data;
2668 } 2671 }
2669 break; 2672 break;
2670 } 2673 }
@@ -5291,7 +5294,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
5291 msr.data = data; 5294 msr.data = data;
5292 msr.index = ecx; 5295 msr.index = ecx;
5293 msr.host_initiated = false; 5296 msr.host_initiated = false;
5294 if (vmx_set_msr(vcpu, &msr) != 0) { 5297 if (kvm_set_msr(vcpu, &msr) != 0) {
5295 trace_kvm_msr_write_ex(ecx, data); 5298 trace_kvm_msr_write_ex(ecx, data);
5296 kvm_inject_gp(vcpu, 0); 5299 kvm_inject_gp(vcpu, 0);
5297 return 1; 5300 return 1;
@@ -6743,6 +6746,12 @@ static int handle_invept(struct kvm_vcpu *vcpu)
6743 return 1; 6746 return 1;
6744} 6747}
6745 6748
6749static int handle_invvpid(struct kvm_vcpu *vcpu)
6750{
6751 kvm_queue_exception(vcpu, UD_VECTOR);
6752 return 1;
6753}
6754
6746/* 6755/*
6747 * The exit handlers return 1 if the exit was handled fully and guest execution 6756 * The exit handlers return 1 if the exit was handled fully and guest execution
6748 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 6757 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -6788,6 +6797,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
6788 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, 6797 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
6789 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, 6798 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
6790 [EXIT_REASON_INVEPT] = handle_invept, 6799 [EXIT_REASON_INVEPT] = handle_invept,
6800 [EXIT_REASON_INVVPID] = handle_invvpid,
6791}; 6801};
6792 6802
6793static const int kvm_vmx_max_exit_handlers = 6803static const int kvm_vmx_max_exit_handlers =
@@ -7023,7 +7033,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
7023 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: 7033 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
7024 case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: 7034 case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
7025 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: 7035 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
7026 case EXIT_REASON_INVEPT: 7036 case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
7027 /* 7037 /*
7028 * VMX instructions trap unconditionally. This allows L1 to 7038 * VMX instructions trap unconditionally. This allows L1 to
7029 * emulate them for its L2 guest, i.e., allows 3-level nesting! 7039 * emulate them for its L2 guest, i.e., allows 3-level nesting!
@@ -7164,10 +7174,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
7164 && kvm_vmx_exit_handlers[exit_reason]) 7174 && kvm_vmx_exit_handlers[exit_reason])
7165 return kvm_vmx_exit_handlers[exit_reason](vcpu); 7175 return kvm_vmx_exit_handlers[exit_reason](vcpu);
7166 else { 7176 else {
7167 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; 7177 WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
7168 vcpu->run->hw.hardware_exit_reason = exit_reason; 7178 kvm_queue_exception(vcpu, UD_VECTOR);
7179 return 1;
7169 } 7180 }
7170 return 0;
7171} 7181}
7172 7182
7173static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) 7183static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 34c8f94331f8..0033df32a745 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -229,20 +229,25 @@ static void kvm_shared_msr_cpu_online(void)
229 shared_msr_update(i, shared_msrs_global.msrs[i]); 229 shared_msr_update(i, shared_msrs_global.msrs[i]);
230} 230}
231 231
232void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) 232int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
233{ 233{
234 unsigned int cpu = smp_processor_id(); 234 unsigned int cpu = smp_processor_id();
235 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); 235 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
236 int err;
236 237
237 if (((value ^ smsr->values[slot].curr) & mask) == 0) 238 if (((value ^ smsr->values[slot].curr) & mask) == 0)
238 return; 239 return 0;
239 smsr->values[slot].curr = value; 240 smsr->values[slot].curr = value;
240 wrmsrl(shared_msrs_global.msrs[slot], value); 241 err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
242 if (err)
243 return 1;
244
241 if (!smsr->registered) { 245 if (!smsr->registered) {
242 smsr->urn.on_user_return = kvm_on_user_return; 246 smsr->urn.on_user_return = kvm_on_user_return;
243 user_return_notifier_register(&smsr->urn); 247 user_return_notifier_register(&smsr->urn);
244 smsr->registered = true; 248 smsr->registered = true;
245 } 249 }
250 return 0;
246} 251}
247EXPORT_SYMBOL_GPL(kvm_set_shared_msr); 252EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
248 253
@@ -987,7 +992,6 @@ void kvm_enable_efer_bits(u64 mask)
987} 992}
988EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); 993EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
989 994
990
991/* 995/*
992 * Writes msr value into into the appropriate "register". 996 * Writes msr value into into the appropriate "register".
993 * Returns 0 on success, non-0 otherwise. 997 * Returns 0 on success, non-0 otherwise.
@@ -995,8 +999,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
995 */ 999 */
996int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) 1000int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
997{ 1001{
1002 switch (msr->index) {
1003 case MSR_FS_BASE:
1004 case MSR_GS_BASE:
1005 case MSR_KERNEL_GS_BASE:
1006 case MSR_CSTAR:
1007 case MSR_LSTAR:
1008 if (is_noncanonical_address(msr->data))
1009 return 1;
1010 break;
1011 case MSR_IA32_SYSENTER_EIP:
1012 case MSR_IA32_SYSENTER_ESP:
1013 /*
1014 * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
1015 * non-canonical address is written on Intel but not on
1016 * AMD (which ignores the top 32-bits, because it does
1017 * not implement 64-bit SYSENTER).
1018 *
1019 * 64-bit code should hence be able to write a non-canonical
1020 * value on AMD. Making the address canonical ensures that
1021 * vmentry does not fail on Intel after writing a non-canonical
1022 * value, and that something deterministic happens if the guest
1023 * invokes 64-bit SYSENTER.
1024 */
1025 msr->data = get_canonical(msr->data);
1026 }
998 return kvm_x86_ops->set_msr(vcpu, msr); 1027 return kvm_x86_ops->set_msr(vcpu, msr);
999} 1028}
1029EXPORT_SYMBOL_GPL(kvm_set_msr);
1000 1030
1001/* 1031/*
1002 * Adapt set_msr() to msr_io()'s calling convention 1032 * Adapt set_msr() to msr_io()'s calling convention
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index f15103dff4b4..d143d216d52b 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -40,20 +40,40 @@ void __init efi_bgrt_init(void)
40 if (ACPI_FAILURE(status)) 40 if (ACPI_FAILURE(status))
41 return; 41 return;
42 42
43 if (bgrt_tab->header.length < sizeof(*bgrt_tab)) 43 if (bgrt_tab->header.length < sizeof(*bgrt_tab)) {
44 pr_err("Ignoring BGRT: invalid length %u (expected %zu)\n",
45 bgrt_tab->header.length, sizeof(*bgrt_tab));
44 return; 46 return;
45 if (bgrt_tab->version != 1 || bgrt_tab->status != 1) 47 }
48 if (bgrt_tab->version != 1) {
49 pr_err("Ignoring BGRT: invalid version %u (expected 1)\n",
50 bgrt_tab->version);
51 return;
52 }
53 if (bgrt_tab->status != 1) {
54 pr_err("Ignoring BGRT: invalid status %u (expected 1)\n",
55 bgrt_tab->status);
56 return;
57 }
58 if (bgrt_tab->image_type != 0) {
59 pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n",
60 bgrt_tab->image_type);
46 return; 61 return;
47 if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address) 62 }
63 if (!bgrt_tab->image_address) {
64 pr_err("Ignoring BGRT: null image address\n");
48 return; 65 return;
66 }
49 67
50 image = efi_lookup_mapped_addr(bgrt_tab->image_address); 68 image = efi_lookup_mapped_addr(bgrt_tab->image_address);
51 if (!image) { 69 if (!image) {
52 image = early_memremap(bgrt_tab->image_address, 70 image = early_memremap(bgrt_tab->image_address,
53 sizeof(bmp_header)); 71 sizeof(bmp_header));
54 ioremapped = true; 72 ioremapped = true;
55 if (!image) 73 if (!image) {
74 pr_err("Ignoring BGRT: failed to map image header memory\n");
56 return; 75 return;
76 }
57 } 77 }
58 78
59 memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); 79 memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
@@ -61,14 +81,18 @@ void __init efi_bgrt_init(void)
61 early_iounmap(image, sizeof(bmp_header)); 81 early_iounmap(image, sizeof(bmp_header));
62 bgrt_image_size = bmp_header.size; 82 bgrt_image_size = bmp_header.size;
63 83
64 bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); 84 bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN);
65 if (!bgrt_image) 85 if (!bgrt_image) {
86 pr_err("Ignoring BGRT: failed to allocate memory for image (wanted %zu bytes)\n",
87 bgrt_image_size);
66 return; 88 return;
89 }
67 90
68 if (ioremapped) { 91 if (ioremapped) {
69 image = early_memremap(bgrt_tab->image_address, 92 image = early_memremap(bgrt_tab->image_address,
70 bmp_header.size); 93 bmp_header.size);
71 if (!image) { 94 if (!image) {
95 pr_err("Ignoring BGRT: failed to map image memory\n");
72 kfree(bgrt_image); 96 kfree(bgrt_image);
73 bgrt_image = NULL; 97 bgrt_image = NULL;
74 return; 98 return;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 850da94fef30..dbc8627a5cdf 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -70,17 +70,7 @@ static efi_config_table_type_t arch_tables[] __initdata = {
70 70
71u64 efi_setup; /* efi setup_data physical address */ 71u64 efi_setup; /* efi setup_data physical address */
72 72
73static bool disable_runtime __initdata = false; 73static int add_efi_memmap __initdata;
74static int __init setup_noefi(char *arg)
75{
76 disable_runtime = true;
77 return 0;
78}
79early_param("noefi", setup_noefi);
80
81int add_efi_memmap;
82EXPORT_SYMBOL(add_efi_memmap);
83
84static int __init setup_add_efi_memmap(char *arg) 74static int __init setup_add_efi_memmap(char *arg)
85{ 75{
86 add_efi_memmap = 1; 76 add_efi_memmap = 1;
@@ -96,7 +86,7 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
96{ 86{
97 efi_status_t status; 87 efi_status_t status;
98 88
99 efi_call_phys_prelog(); 89 efi_call_phys_prolog();
100 status = efi_call_phys(efi_phys.set_virtual_address_map, 90 status = efi_call_phys(efi_phys.set_virtual_address_map,
101 memory_map_size, descriptor_size, 91 memory_map_size, descriptor_size,
102 descriptor_version, virtual_map); 92 descriptor_version, virtual_map);
@@ -210,9 +200,12 @@ static void __init print_efi_memmap(void)
210 for (p = memmap.map, i = 0; 200 for (p = memmap.map, i = 0;
211 p < memmap.map_end; 201 p < memmap.map_end;
212 p += memmap.desc_size, i++) { 202 p += memmap.desc_size, i++) {
203 char buf[64];
204
213 md = p; 205 md = p;
214 pr_info("mem%02u: type=%u, attr=0x%llx, range=[0x%016llx-0x%016llx) (%lluMB)\n", 206 pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n",
215 i, md->type, md->attribute, md->phys_addr, 207 i, efi_md_typeattr_format(buf, sizeof(buf), md),
208 md->phys_addr,
216 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), 209 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
217 (md->num_pages >> (20 - EFI_PAGE_SHIFT))); 210 (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
218 } 211 }
@@ -344,9 +337,9 @@ static int __init efi_runtime_init32(void)
344 } 337 }
345 338
346 /* 339 /*
347 * We will only need *early* access to the following two 340 * We will only need *early* access to the SetVirtualAddressMap
348 * EFI runtime services before set_virtual_address_map 341 * EFI runtime service. All other runtime services will be called
349 * is invoked. 342 * via the virtual mapping.
350 */ 343 */
351 efi_phys.set_virtual_address_map = 344 efi_phys.set_virtual_address_map =
352 (efi_set_virtual_address_map_t *) 345 (efi_set_virtual_address_map_t *)
@@ -368,9 +361,9 @@ static int __init efi_runtime_init64(void)
368 } 361 }
369 362
370 /* 363 /*
371 * We will only need *early* access to the following two 364 * We will only need *early* access to the SetVirtualAddressMap
372 * EFI runtime services before set_virtual_address_map 365 * EFI runtime service. All other runtime services will be called
373 * is invoked. 366 * via the virtual mapping.
374 */ 367 */
375 efi_phys.set_virtual_address_map = 368 efi_phys.set_virtual_address_map =
376 (efi_set_virtual_address_map_t *) 369 (efi_set_virtual_address_map_t *)
@@ -492,7 +485,7 @@ void __init efi_init(void)
492 if (!efi_runtime_supported()) 485 if (!efi_runtime_supported())
493 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); 486 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
494 else { 487 else {
495 if (disable_runtime || efi_runtime_init()) 488 if (efi_runtime_disabled() || efi_runtime_init())
496 return; 489 return;
497 } 490 }
498 if (efi_memmap_init()) 491 if (efi_memmap_init())
@@ -537,7 +530,7 @@ void __init runtime_code_page_mkexec(void)
537 } 530 }
538} 531}
539 532
540void efi_memory_uc(u64 addr, unsigned long size) 533void __init efi_memory_uc(u64 addr, unsigned long size)
541{ 534{
542 unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; 535 unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
543 u64 npages; 536 u64 npages;
@@ -732,6 +725,7 @@ static void __init kexec_enter_virtual_mode(void)
732 */ 725 */
733 if (!efi_is_native()) { 726 if (!efi_is_native()) {
734 efi_unmap_memmap(); 727 efi_unmap_memmap();
728 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
735 return; 729 return;
736 } 730 }
737 731
@@ -805,6 +799,7 @@ static void __init __efi_enter_virtual_mode(void)
805 new_memmap = efi_map_regions(&count, &pg_shift); 799 new_memmap = efi_map_regions(&count, &pg_shift);
806 if (!new_memmap) { 800 if (!new_memmap) {
807 pr_err("Error reallocating memory, EFI runtime non-functional!\n"); 801 pr_err("Error reallocating memory, EFI runtime non-functional!\n");
802 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
808 return; 803 return;
809 } 804 }
810 805
@@ -812,8 +807,10 @@ static void __init __efi_enter_virtual_mode(void)
812 807
813 BUG_ON(!efi.systab); 808 BUG_ON(!efi.systab);
814 809
815 if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) 810 if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) {
811 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
816 return; 812 return;
813 }
817 814
818 efi_sync_low_kernel_mappings(); 815 efi_sync_low_kernel_mappings();
819 efi_dump_pagetable(); 816 efi_dump_pagetable();
@@ -938,14 +935,11 @@ u64 efi_mem_attributes(unsigned long phys_addr)
938 return 0; 935 return 0;
939} 936}
940 937
941static int __init parse_efi_cmdline(char *str) 938static int __init arch_parse_efi_cmdline(char *str)
942{ 939{
943 if (*str == '=') 940 if (parse_option_str(str, "old_map"))
944 str++;
945
946 if (!strncmp(str, "old_map", 7))
947 set_bit(EFI_OLD_MEMMAP, &efi.flags); 941 set_bit(EFI_OLD_MEMMAP, &efi.flags);
948 942
949 return 0; 943 return 0;
950} 944}
951early_param("efi", parse_efi_cmdline); 945early_param("efi", arch_parse_efi_cmdline);
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 9ee3491e31fb..40e7cda52936 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -33,7 +33,7 @@
33 33
34/* 34/*
35 * To make EFI call EFI runtime service in physical addressing mode we need 35 * To make EFI call EFI runtime service in physical addressing mode we need
36 * prelog/epilog before/after the invocation to disable interrupt, to 36 * prolog/epilog before/after the invocation to disable interrupt, to
37 * claim EFI runtime service handler exclusively and to duplicate a memory in 37 * claim EFI runtime service handler exclusively and to duplicate a memory in
38 * low memory space say 0 - 3G. 38 * low memory space say 0 - 3G.
39 */ 39 */
@@ -41,11 +41,13 @@ static unsigned long efi_rt_eflags;
41 41
42void efi_sync_low_kernel_mappings(void) {} 42void efi_sync_low_kernel_mappings(void) {}
43void __init efi_dump_pagetable(void) {} 43void __init efi_dump_pagetable(void) {}
44int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) 44int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
45{ 45{
46 return 0; 46 return 0;
47} 47}
48void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {} 48void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
49{
50}
49 51
50void __init efi_map_region(efi_memory_desc_t *md) 52void __init efi_map_region(efi_memory_desc_t *md)
51{ 53{
@@ -55,7 +57,7 @@ void __init efi_map_region(efi_memory_desc_t *md)
55void __init efi_map_region_fixed(efi_memory_desc_t *md) {} 57void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
56void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} 58void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
57 59
58void efi_call_phys_prelog(void) 60void __init efi_call_phys_prolog(void)
59{ 61{
60 struct desc_ptr gdt_descr; 62 struct desc_ptr gdt_descr;
61 63
@@ -69,7 +71,7 @@ void efi_call_phys_prelog(void)
69 load_gdt(&gdt_descr); 71 load_gdt(&gdt_descr);
70} 72}
71 73
72void efi_call_phys_epilog(void) 74void __init efi_call_phys_epilog(void)
73{ 75{
74 struct desc_ptr gdt_descr; 76 struct desc_ptr gdt_descr;
75 77
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 290d397e1dd9..35aecb6042fb 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -79,7 +79,7 @@ static void __init early_code_mapping_set_exec(int executable)
79 } 79 }
80} 80}
81 81
82void __init efi_call_phys_prelog(void) 82void __init efi_call_phys_prolog(void)
83{ 83{
84 unsigned long vaddress; 84 unsigned long vaddress;
85 int pgd; 85 int pgd;
@@ -139,7 +139,7 @@ void efi_sync_low_kernel_mappings(void)
139 sizeof(pgd_t) * num_pgds); 139 sizeof(pgd_t) * num_pgds);
140} 140}
141 141
142int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) 142int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
143{ 143{
144 unsigned long text; 144 unsigned long text;
145 struct page *page; 145 struct page *page;
@@ -192,7 +192,7 @@ int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
192 return 0; 192 return 0;
193} 193}
194 194
195void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) 195void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
196{ 196{
197 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); 197 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
198 198
diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S
index fbe66e626c09..040192b50d02 100644
--- a/arch/x86/platform/efi/efi_stub_32.S
+++ b/arch/x86/platform/efi/efi_stub_32.S
@@ -27,13 +27,13 @@ ENTRY(efi_call_phys)
27 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found 27 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
28 * the values of these registers are the same. And, the corresponding 28 * the values of these registers are the same. And, the corresponding
29 * GDT entries are identical. So I will do nothing about segment reg 29 * GDT entries are identical. So I will do nothing about segment reg
30 * and GDT, but change GDT base register in prelog and epilog. 30 * and GDT, but change GDT base register in prolog and epilog.
31 */ 31 */
32 32
33 /* 33 /*
34 * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET. 34 * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
35 * But to make it smoothly switch from virtual mode to flat mode. 35 * But to make it smoothly switch from virtual mode to flat mode.
36 * The mapping of lower virtual memory has been created in prelog and 36 * The mapping of lower virtual memory has been created in prolog and
37 * epilog. 37 * epilog.
38 */ 38 */
39 movl $1f, %edx 39 movl $1f, %edx
diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
index 46aa25c8ce06..3c1c3866d82b 100644
--- a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
+++ b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
@@ -10,10 +10,9 @@
10 */ 10 */
11 11
12 12
13/* __attribute__((weak)) makes these declarations overridable */
14/* For every CPU addition a new get_<cpuname>_ops interface needs 13/* For every CPU addition a new get_<cpuname>_ops interface needs
15 * to be added. 14 * to be added.
16 */ 15 */
17extern void *get_penwell_ops(void) __attribute__((weak)); 16extern void *get_penwell_ops(void);
18extern void *get_cloverview_ops(void) __attribute__((weak)); 17extern void *get_cloverview_ops(void);
19extern void *get_tangier_ops(void) __attribute__((weak)); 18extern void *get_tangier_ops(void);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 1a3f0445432a..fac5e4f9607c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1636,9 +1636,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
1636 xen_raw_console_write("mapping kernel into physical memory\n"); 1636 xen_raw_console_write("mapping kernel into physical memory\n");
1637 xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); 1637 xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages);
1638 1638
1639 /* Allocate and initialize top and mid mfn levels for p2m structure */
1640 xen_build_mfn_list_list();
1641
1642 /* keep using Xen gdt for now; no urgent need to change it */ 1639 /* keep using Xen gdt for now; no urgent need to change it */
1643 1640
1644#ifdef CONFIG_X86_32 1641#ifdef CONFIG_X86_32
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f62af7647ec9..a8a1a3d08d4d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1217,10 +1217,13 @@ static void __init xen_pagetable_p2m_copy(void)
1217static void __init xen_pagetable_init(void) 1217static void __init xen_pagetable_init(void)
1218{ 1218{
1219 paging_init(); 1219 paging_init();
1220 xen_setup_shared_info();
1221#ifdef CONFIG_X86_64 1220#ifdef CONFIG_X86_64
1222 xen_pagetable_p2m_copy(); 1221 xen_pagetable_p2m_copy();
1223#endif 1222#endif
1223 /* Allocate and initialize top and mid mfn levels for p2m structure */
1224 xen_build_mfn_list_list();
1225
1226 xen_setup_shared_info();
1224 xen_post_allocator_init(); 1227 xen_post_allocator_init();
1225} 1228}
1226static void xen_write_cr2(unsigned long cr2) 1229static void xen_write_cr2(unsigned long cr2)
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 9f5983b01ed9..b456b048eca9 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -163,6 +163,7 @@
163#include <linux/hash.h> 163#include <linux/hash.h>
164#include <linux/sched.h> 164#include <linux/sched.h>
165#include <linux/seq_file.h> 165#include <linux/seq_file.h>
166#include <linux/bootmem.h>
166 167
167#include <asm/cache.h> 168#include <asm/cache.h>
168#include <asm/setup.h> 169#include <asm/setup.h>
@@ -181,21 +182,20 @@ static void __init m2p_override_init(void);
181 182
182unsigned long xen_max_p2m_pfn __read_mostly; 183unsigned long xen_max_p2m_pfn __read_mostly;
183 184
185static unsigned long *p2m_mid_missing_mfn;
186static unsigned long *p2m_top_mfn;
187static unsigned long **p2m_top_mfn_p;
188
184/* Placeholders for holes in the address space */ 189/* Placeholders for holes in the address space */
185static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); 190static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
186static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); 191static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
187static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE);
188 192
189static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); 193static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
190static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
191static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
192 194
193static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); 195static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
194static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE); 196static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE);
195static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE);
196 197
197RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 198RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
198RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
199 199
200/* For each I/O range remapped we may lose up to two leaf pages for the boundary 200/* For each I/O range remapped we may lose up to two leaf pages for the boundary
201 * violations and three mid pages to cover up to 3GB. With 201 * violations and three mid pages to cover up to 3GB. With
@@ -272,11 +272,11 @@ static void p2m_init(unsigned long *p2m)
272 * Build the parallel p2m_top_mfn and p2m_mid_mfn structures 272 * Build the parallel p2m_top_mfn and p2m_mid_mfn structures
273 * 273 *
274 * This is called both at boot time, and after resuming from suspend: 274 * This is called both at boot time, and after resuming from suspend:
275 * - At boot time we're called very early, and must use extend_brk() 275 * - At boot time we're called rather early, and must use alloc_bootmem*()
276 * to allocate memory. 276 * to allocate memory.
277 * 277 *
278 * - After resume we're called from within stop_machine, but the mfn 278 * - After resume we're called from within stop_machine, but the mfn
279 * tree should alreay be completely allocated. 279 * tree should already be completely allocated.
280 */ 280 */
281void __ref xen_build_mfn_list_list(void) 281void __ref xen_build_mfn_list_list(void)
282{ 282{
@@ -287,20 +287,17 @@ void __ref xen_build_mfn_list_list(void)
287 287
288 /* Pre-initialize p2m_top_mfn to be completely missing */ 288 /* Pre-initialize p2m_top_mfn to be completely missing */
289 if (p2m_top_mfn == NULL) { 289 if (p2m_top_mfn == NULL) {
290 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); 290 p2m_mid_missing_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
291 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); 291 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
292 p2m_mid_identity_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
293 p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity);
294 292
295 p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); 293 p2m_top_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
296 p2m_top_mfn_p_init(p2m_top_mfn_p); 294 p2m_top_mfn_p_init(p2m_top_mfn_p);
297 295
298 p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); 296 p2m_top_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
299 p2m_top_mfn_init(p2m_top_mfn); 297 p2m_top_mfn_init(p2m_top_mfn);
300 } else { 298 } else {
301 /* Reinitialise, mfn's all change after migration */ 299 /* Reinitialise, mfn's all change after migration */
302 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); 300 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
303 p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity);
304 } 301 }
305 302
306 for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { 303 for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
@@ -328,10 +325,9 @@ void __ref xen_build_mfn_list_list(void)
328 /* 325 /*
329 * XXX boot-time only! We should never find 326 * XXX boot-time only! We should never find
330 * missing parts of the mfn tree after 327 * missing parts of the mfn tree after
331 * runtime. extend_brk() will BUG if we call 328 * runtime.
332 * it too late.
333 */ 329 */
334 mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); 330 mid_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
335 p2m_mid_mfn_init(mid_mfn_p, p2m_missing); 331 p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
336 332
337 p2m_top_mfn_p[topidx] = mid_mfn_p; 333 p2m_top_mfn_p[topidx] = mid_mfn_p;
@@ -415,7 +411,6 @@ void __init xen_build_dynamic_phys_to_machine(void)
415 m2p_override_init(); 411 m2p_override_init();
416} 412}
417#ifdef CONFIG_X86_64 413#ifdef CONFIG_X86_64
418#include <linux/bootmem.h>
419unsigned long __init xen_revector_p2m_tree(void) 414unsigned long __init xen_revector_p2m_tree(void)
420{ 415{
421 unsigned long va_start; 416 unsigned long va_start;
@@ -477,7 +472,6 @@ unsigned long __init xen_revector_p2m_tree(void)
477 472
478 copy_page(new, mid_p); 473 copy_page(new, mid_p);
479 p2m_top[topidx][mididx] = &mfn_list[pfn_free]; 474 p2m_top[topidx][mididx] = &mfn_list[pfn_free];
480 p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]);
481 475
482 pfn_free += P2M_PER_PAGE; 476 pfn_free += P2M_PER_PAGE;
483 477
@@ -538,12 +532,13 @@ static bool alloc_p2m(unsigned long pfn)
538 unsigned topidx, mididx; 532 unsigned topidx, mididx;
539 unsigned long ***top_p, **mid; 533 unsigned long ***top_p, **mid;
540 unsigned long *top_mfn_p, *mid_mfn; 534 unsigned long *top_mfn_p, *mid_mfn;
535 unsigned long *p2m_orig;
541 536
542 topidx = p2m_top_index(pfn); 537 topidx = p2m_top_index(pfn);
543 mididx = p2m_mid_index(pfn); 538 mididx = p2m_mid_index(pfn);
544 539
545 top_p = &p2m_top[topidx]; 540 top_p = &p2m_top[topidx];
546 mid = *top_p; 541 mid = ACCESS_ONCE(*top_p);
547 542
548 if (mid == p2m_mid_missing) { 543 if (mid == p2m_mid_missing) {
549 /* Mid level is missing, allocate a new one */ 544 /* Mid level is missing, allocate a new one */
@@ -558,7 +553,7 @@ static bool alloc_p2m(unsigned long pfn)
558 } 553 }
559 554
560 top_mfn_p = &p2m_top_mfn[topidx]; 555 top_mfn_p = &p2m_top_mfn[topidx];
561 mid_mfn = p2m_top_mfn_p[topidx]; 556 mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
562 557
563 BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); 558 BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
564 559
@@ -566,6 +561,7 @@ static bool alloc_p2m(unsigned long pfn)
566 /* Separately check the mid mfn level */ 561 /* Separately check the mid mfn level */
567 unsigned long missing_mfn; 562 unsigned long missing_mfn;
568 unsigned long mid_mfn_mfn; 563 unsigned long mid_mfn_mfn;
564 unsigned long old_mfn;
569 565
570 mid_mfn = alloc_p2m_page(); 566 mid_mfn = alloc_p2m_page();
571 if (!mid_mfn) 567 if (!mid_mfn)
@@ -575,17 +571,19 @@ static bool alloc_p2m(unsigned long pfn)
575 571
576 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); 572 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
577 mid_mfn_mfn = virt_to_mfn(mid_mfn); 573 mid_mfn_mfn = virt_to_mfn(mid_mfn);
578 if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) 574 old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn);
575 if (old_mfn != missing_mfn) {
579 free_p2m_page(mid_mfn); 576 free_p2m_page(mid_mfn);
580 else 577 mid_mfn = mfn_to_virt(old_mfn);
578 } else {
581 p2m_top_mfn_p[topidx] = mid_mfn; 579 p2m_top_mfn_p[topidx] = mid_mfn;
580 }
582 } 581 }
583 582
584 if (p2m_top[topidx][mididx] == p2m_identity || 583 p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]);
585 p2m_top[topidx][mididx] == p2m_missing) { 584 if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) {
586 /* p2m leaf page is missing */ 585 /* p2m leaf page is missing */
587 unsigned long *p2m; 586 unsigned long *p2m;
588 unsigned long *p2m_orig = p2m_top[topidx][mididx];
589 587
590 p2m = alloc_p2m_page(); 588 p2m = alloc_p2m_page();
591 if (!p2m) 589 if (!p2m)
@@ -606,7 +604,6 @@ static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary)
606{ 604{
607 unsigned topidx, mididx, idx; 605 unsigned topidx, mididx, idx;
608 unsigned long *p2m; 606 unsigned long *p2m;
609 unsigned long *mid_mfn_p;
610 607
611 topidx = p2m_top_index(pfn); 608 topidx = p2m_top_index(pfn);
612 mididx = p2m_mid_index(pfn); 609 mididx = p2m_mid_index(pfn);
@@ -633,43 +630,21 @@ static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary)
633 630
634 p2m_top[topidx][mididx] = p2m; 631 p2m_top[topidx][mididx] = p2m;
635 632
636 /* For save/restore we need to MFN of the P2M saved */
637
638 mid_mfn_p = p2m_top_mfn_p[topidx];
639 WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing),
640 "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n",
641 topidx, mididx);
642 mid_mfn_p[mididx] = virt_to_mfn(p2m);
643
644 return true; 633 return true;
645} 634}
646 635
647static bool __init early_alloc_p2m_middle(unsigned long pfn) 636static bool __init early_alloc_p2m_middle(unsigned long pfn)
648{ 637{
649 unsigned topidx = p2m_top_index(pfn); 638 unsigned topidx = p2m_top_index(pfn);
650 unsigned long *mid_mfn_p;
651 unsigned long **mid; 639 unsigned long **mid;
652 640
653 mid = p2m_top[topidx]; 641 mid = p2m_top[topidx];
654 mid_mfn_p = p2m_top_mfn_p[topidx];
655 if (mid == p2m_mid_missing) { 642 if (mid == p2m_mid_missing) {
656 mid = extend_brk(PAGE_SIZE, PAGE_SIZE); 643 mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
657 644
658 p2m_mid_init(mid, p2m_missing); 645 p2m_mid_init(mid, p2m_missing);
659 646
660 p2m_top[topidx] = mid; 647 p2m_top[topidx] = mid;
661
662 BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
663 }
664 /* And the save/restore P2M tables.. */
665 if (mid_mfn_p == p2m_mid_missing_mfn) {
666 mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
667 p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
668
669 p2m_top_mfn_p[topidx] = mid_mfn_p;
670 p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
671 /* Note: we don't set mid_mfn_p[midix] here,
672 * look in early_alloc_p2m() */
673 } 648 }
674 return true; 649 return true;
675} 650}
@@ -680,14 +655,13 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn)
680 * replace the P2M leaf with a p2m_missing or p2m_identity. 655 * replace the P2M leaf with a p2m_missing or p2m_identity.
681 * Stick the old page in the new P2M tree location. 656 * Stick the old page in the new P2M tree location.
682 */ 657 */
683bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn) 658static bool __init early_can_reuse_p2m_middle(unsigned long set_pfn)
684{ 659{
685 unsigned topidx; 660 unsigned topidx;
686 unsigned mididx; 661 unsigned mididx;
687 unsigned ident_pfns; 662 unsigned ident_pfns;
688 unsigned inv_pfns; 663 unsigned inv_pfns;
689 unsigned long *p2m; 664 unsigned long *p2m;
690 unsigned long *mid_mfn_p;
691 unsigned idx; 665 unsigned idx;
692 unsigned long pfn; 666 unsigned long pfn;
693 667
@@ -733,11 +707,6 @@ bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_
733found: 707found:
734 /* Found one, replace old with p2m_identity or p2m_missing */ 708 /* Found one, replace old with p2m_identity or p2m_missing */
735 p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); 709 p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing);
736 /* And the other for save/restore.. */
737 mid_mfn_p = p2m_top_mfn_p[topidx];
738 /* NOTE: Even if it is a p2m_identity it should still be point to
739 * a page filled with INVALID_P2M_ENTRY entries. */
740 mid_mfn_p[mididx] = virt_to_mfn(p2m_missing);
741 710
742 /* Reset where we want to stick the old page in. */ 711 /* Reset where we want to stick the old page in. */
743 topidx = p2m_top_index(set_pfn); 712 topidx = p2m_top_index(set_pfn);
@@ -752,8 +721,6 @@ found:
752 721
753 p2m_init(p2m); 722 p2m_init(p2m);
754 p2m_top[topidx][mididx] = p2m; 723 p2m_top[topidx][mididx] = p2m;
755 mid_mfn_p = p2m_top_mfn_p[topidx];
756 mid_mfn_p[mididx] = virt_to_mfn(p2m);
757 724
758 return true; 725 return true;
759} 726}
@@ -763,7 +730,7 @@ bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
763 if (!early_alloc_p2m_middle(pfn)) 730 if (!early_alloc_p2m_middle(pfn))
764 return false; 731 return false;
765 732
766 if (early_can_reuse_p2m_middle(pfn, mfn)) 733 if (early_can_reuse_p2m_middle(pfn))
767 return __set_phys_to_machine(pfn, mfn); 734 return __set_phys_to_machine(pfn, mfn);
768 735
769 if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/)) 736 if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/))
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index af7216128d93..29834b3fd87f 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -595,6 +595,7 @@ char * __init xen_memory_setup(void)
595 rc = 0; 595 rc = 0;
596 } 596 }
597 BUG_ON(rc); 597 BUG_ON(rc);
598 BUG_ON(memmap.nr_entries == 0);
598 599
599 /* 600 /*
600 * Xen won't allow a 1:1 mapping to be created to UNUSABLE 601 * Xen won't allow a 1:1 mapping to be created to UNUSABLE
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index a1d430b112b3..f473d268d387 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -158,7 +158,7 @@ cycle_t xen_clocksource_read(void)
158 cycle_t ret; 158 cycle_t ret;
159 159
160 preempt_disable_notrace(); 160 preempt_disable_notrace();
161 src = this_cpu_ptr(&xen_vcpu->time); 161 src = &__this_cpu_read(xen_vcpu)->time;
162 ret = pvclock_clocksource_read(src); 162 ret = pvclock_clocksource_read(src);
163 preempt_enable_notrace(); 163 preempt_enable_notrace();
164 return ret; 164 return ret;