aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/eboot.c32
-rw-r--r--arch/x86/include/asm/efi.h31
-rw-r--r--arch/x86/include/asm/kvm_host.h16
-rw-r--r--arch/x86/include/uapi/asm/vmx.h2
-rw-r--r--arch/x86/kvm/emulate.c250
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/svm.c8
-rw-r--r--arch/x86/kvm/vmx.c24
-rw-r--r--arch/x86/kvm/x86.c38
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c36
-rw-r--r--arch/x86/platform/efi/efi.c52
-rw-r--r--arch/x86/platform/efi/efi_32.c12
-rw-r--r--arch/x86/platform/efi/efi_64.c6
-rw-r--r--arch/x86/platform/efi/efi_stub_32.S4
-rw-r--r--arch/x86/platform/intel-mid/intel_mid_weak_decls.h7
-rw-r--r--arch/x86/xen/enlighten.c3
-rw-r--r--arch/x86/xen/mmu.c5
-rw-r--r--arch/x86/xen/p2m.c83
-rw-r--r--arch/x86/xen/setup.c1
-rw-r--r--arch/x86/xen/time.c2
21 files changed, 395 insertions, 221 deletions
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index de8eebd6f67c..1acf605a646d 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -330,8 +330,10 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
330 size = pci->romsize + sizeof(*rom); 330 size = pci->romsize + sizeof(*rom);
331 331
332 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); 332 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
333 if (status != EFI_SUCCESS) 333 if (status != EFI_SUCCESS) {
334 efi_printk(sys_table, "Failed to alloc mem for rom\n");
334 return status; 335 return status;
336 }
335 337
336 memset(rom, 0, sizeof(*rom)); 338 memset(rom, 0, sizeof(*rom));
337 339
@@ -344,14 +346,18 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
344 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, 346 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
345 PCI_VENDOR_ID, 1, &(rom->vendor)); 347 PCI_VENDOR_ID, 1, &(rom->vendor));
346 348
347 if (status != EFI_SUCCESS) 349 if (status != EFI_SUCCESS) {
350 efi_printk(sys_table, "Failed to read rom->vendor\n");
348 goto free_struct; 351 goto free_struct;
352 }
349 353
350 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, 354 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
351 PCI_DEVICE_ID, 1, &(rom->devid)); 355 PCI_DEVICE_ID, 1, &(rom->devid));
352 356
353 if (status != EFI_SUCCESS) 357 if (status != EFI_SUCCESS) {
358 efi_printk(sys_table, "Failed to read rom->devid\n");
354 goto free_struct; 359 goto free_struct;
360 }
355 361
356 status = efi_early->call(pci->get_location, pci, &(rom->segment), 362 status = efi_early->call(pci->get_location, pci, &(rom->segment),
357 &(rom->bus), &(rom->device), &(rom->function)); 363 &(rom->bus), &(rom->device), &(rom->function));
@@ -432,8 +438,10 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom)
432 size = pci->romsize + sizeof(*rom); 438 size = pci->romsize + sizeof(*rom);
433 439
434 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); 440 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
435 if (status != EFI_SUCCESS) 441 if (status != EFI_SUCCESS) {
442 efi_printk(sys_table, "Failed to alloc mem for rom\n");
436 return status; 443 return status;
444 }
437 445
438 rom->data.type = SETUP_PCI; 446 rom->data.type = SETUP_PCI;
439 rom->data.len = size - sizeof(struct setup_data); 447 rom->data.len = size - sizeof(struct setup_data);
@@ -444,14 +452,18 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom)
444 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, 452 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
445 PCI_VENDOR_ID, 1, &(rom->vendor)); 453 PCI_VENDOR_ID, 1, &(rom->vendor));
446 454
447 if (status != EFI_SUCCESS) 455 if (status != EFI_SUCCESS) {
456 efi_printk(sys_table, "Failed to read rom->vendor\n");
448 goto free_struct; 457 goto free_struct;
458 }
449 459
450 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, 460 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
451 PCI_DEVICE_ID, 1, &(rom->devid)); 461 PCI_DEVICE_ID, 1, &(rom->devid));
452 462
453 if (status != EFI_SUCCESS) 463 if (status != EFI_SUCCESS) {
464 efi_printk(sys_table, "Failed to read rom->devid\n");
454 goto free_struct; 465 goto free_struct;
466 }
455 467
456 status = efi_early->call(pci->get_location, pci, &(rom->segment), 468 status = efi_early->call(pci->get_location, pci, &(rom->segment),
457 &(rom->bus), &(rom->device), &(rom->function)); 469 &(rom->bus), &(rom->device), &(rom->function));
@@ -538,8 +550,10 @@ static void setup_efi_pci(struct boot_params *params)
538 EFI_LOADER_DATA, 550 EFI_LOADER_DATA,
539 size, (void **)&pci_handle); 551 size, (void **)&pci_handle);
540 552
541 if (status != EFI_SUCCESS) 553 if (status != EFI_SUCCESS) {
554 efi_printk(sys_table, "Failed to alloc mem for pci_handle\n");
542 return; 555 return;
556 }
543 557
544 status = efi_call_early(locate_handle, 558 status = efi_call_early(locate_handle,
545 EFI_LOCATE_BY_PROTOCOL, &pci_proto, 559 EFI_LOCATE_BY_PROTOCOL, &pci_proto,
@@ -1105,6 +1119,10 @@ struct boot_params *make_boot_params(struct efi_config *c)
1105 1119
1106 memset(sdt, 0, sizeof(*sdt)); 1120 memset(sdt, 0, sizeof(*sdt));
1107 1121
1122 status = efi_parse_options(cmdline_ptr);
1123 if (status != EFI_SUCCESS)
1124 goto fail2;
1125
1108 status = handle_cmdline_files(sys_table, image, 1126 status = handle_cmdline_files(sys_table, image,
1109 (char *)(unsigned long)hdr->cmd_line_ptr, 1127 (char *)(unsigned long)hdr->cmd_line_ptr,
1110 "initrd=", hdr->initrd_addr_max, 1128 "initrd=", hdr->initrd_addr_max,
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 0ec241ede5a2..9b11757975d0 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -81,24 +81,23 @@ extern u64 asmlinkage efi_call(void *fp, ...);
81 */ 81 */
82#define __efi_call_virt(f, args...) efi_call_virt(f, args) 82#define __efi_call_virt(f, args...) efi_call_virt(f, args)
83 83
84extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, 84extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
85 u32 type, u64 attribute); 85 u32 type, u64 attribute);
86 86
87#endif /* CONFIG_X86_32 */ 87#endif /* CONFIG_X86_32 */
88 88
89extern int add_efi_memmap;
90extern struct efi_scratch efi_scratch; 89extern struct efi_scratch efi_scratch;
91extern void efi_set_executable(efi_memory_desc_t *md, bool executable); 90extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
92extern int efi_memblock_x86_reserve_range(void); 91extern int __init efi_memblock_x86_reserve_range(void);
93extern void efi_call_phys_prelog(void); 92extern void __init efi_call_phys_prolog(void);
94extern void efi_call_phys_epilog(void); 93extern void __init efi_call_phys_epilog(void);
95extern void efi_unmap_memmap(void); 94extern void __init efi_unmap_memmap(void);
96extern void efi_memory_uc(u64 addr, unsigned long size); 95extern void __init efi_memory_uc(u64 addr, unsigned long size);
97extern void __init efi_map_region(efi_memory_desc_t *md); 96extern void __init efi_map_region(efi_memory_desc_t *md);
98extern void __init efi_map_region_fixed(efi_memory_desc_t *md); 97extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
99extern void efi_sync_low_kernel_mappings(void); 98extern void efi_sync_low_kernel_mappings(void);
100extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); 99extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
101extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); 100extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
102extern void __init old_map_region(efi_memory_desc_t *md); 101extern void __init old_map_region(efi_memory_desc_t *md);
103extern void __init runtime_code_page_mkexec(void); 102extern void __init runtime_code_page_mkexec(void);
104extern void __init efi_runtime_mkexec(void); 103extern void __init efi_runtime_mkexec(void);
@@ -162,16 +161,6 @@ static inline efi_status_t efi_thunk_set_virtual_address_map(
162extern bool efi_reboot_required(void); 161extern bool efi_reboot_required(void);
163 162
164#else 163#else
165/*
166 * IF EFI is not configured, have the EFI calls return -ENOSYS.
167 */
168#define efi_call0(_f) (-ENOSYS)
169#define efi_call1(_f, _a1) (-ENOSYS)
170#define efi_call2(_f, _a1, _a2) (-ENOSYS)
171#define efi_call3(_f, _a1, _a2, _a3) (-ENOSYS)
172#define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS)
173#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS)
174#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS)
175static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} 164static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
176static inline bool efi_reboot_required(void) 165static inline bool efi_reboot_required(void)
177{ 166{
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7d603a71ab3a..6ed0c30d6a0c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -989,6 +989,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
989 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); 989 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
990} 990}
991 991
992static inline u64 get_canonical(u64 la)
993{
994 return ((int64_t)la << 16) >> 16;
995}
996
997static inline bool is_noncanonical_address(u64 la)
998{
999#ifdef CONFIG_X86_64
1000 return get_canonical(la) != la;
1001#else
1002 return false;
1003#endif
1004}
1005
992#define TSS_IOPB_BASE_OFFSET 0x66 1006#define TSS_IOPB_BASE_OFFSET 0x66
993#define TSS_BASE_SIZE 0x68 1007#define TSS_BASE_SIZE 0x68
994#define TSS_IOPB_SIZE (65536 / 8) 1008#define TSS_IOPB_SIZE (65536 / 8)
@@ -1050,7 +1064,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
1050 unsigned long address); 1064 unsigned long address);
1051 1065
1052void kvm_define_shared_msr(unsigned index, u32 msr); 1066void kvm_define_shared_msr(unsigned index, u32 msr);
1053void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); 1067int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
1054 1068
1055bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); 1069bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
1056 1070
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 0e79420376eb..990a2fe1588d 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -67,6 +67,7 @@
67#define EXIT_REASON_EPT_MISCONFIG 49 67#define EXIT_REASON_EPT_MISCONFIG 49
68#define EXIT_REASON_INVEPT 50 68#define EXIT_REASON_INVEPT 50
69#define EXIT_REASON_PREEMPTION_TIMER 52 69#define EXIT_REASON_PREEMPTION_TIMER 52
70#define EXIT_REASON_INVVPID 53
70#define EXIT_REASON_WBINVD 54 71#define EXIT_REASON_WBINVD 54
71#define EXIT_REASON_XSETBV 55 72#define EXIT_REASON_XSETBV 55
72#define EXIT_REASON_APIC_WRITE 56 73#define EXIT_REASON_APIC_WRITE 56
@@ -114,6 +115,7 @@
114 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ 115 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
115 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ 116 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
116 { EXIT_REASON_INVD, "INVD" }, \ 117 { EXIT_REASON_INVD, "INVD" }, \
118 { EXIT_REASON_INVVPID, "INVVPID" }, \
117 { EXIT_REASON_INVPCID, "INVPCID" } 119 { EXIT_REASON_INVPCID, "INVPCID" }
118 120
119#endif /* _UAPIVMX_H */ 121#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a46207a05835..749f9fa38254 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -504,11 +504,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
504 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); 504 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
505} 505}
506 506
507static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
508{
509 register_address_increment(ctxt, &ctxt->_eip, rel);
510}
511
512static u32 desc_limit_scaled(struct desc_struct *desc) 507static u32 desc_limit_scaled(struct desc_struct *desc)
513{ 508{
514 u32 limit = get_desc_limit(desc); 509 u32 limit = get_desc_limit(desc);
@@ -569,6 +564,38 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
569 return emulate_exception(ctxt, NM_VECTOR, 0, false); 564 return emulate_exception(ctxt, NM_VECTOR, 0, false);
570} 565}
571 566
567static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
568 int cs_l)
569{
570 switch (ctxt->op_bytes) {
571 case 2:
572 ctxt->_eip = (u16)dst;
573 break;
574 case 4:
575 ctxt->_eip = (u32)dst;
576 break;
577 case 8:
578 if ((cs_l && is_noncanonical_address(dst)) ||
579 (!cs_l && (dst & ~(u32)-1)))
580 return emulate_gp(ctxt, 0);
581 ctxt->_eip = dst;
582 break;
583 default:
584 WARN(1, "unsupported eip assignment size\n");
585 }
586 return X86EMUL_CONTINUE;
587}
588
589static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
590{
591 return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
592}
593
594static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
595{
596 return assign_eip_near(ctxt, ctxt->_eip + rel);
597}
598
572static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) 599static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
573{ 600{
574 u16 selector; 601 u16 selector;
@@ -751,8 +778,10 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
751static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, 778static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
752 unsigned size) 779 unsigned size)
753{ 780{
754 if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size)) 781 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
755 return __do_insn_fetch_bytes(ctxt, size); 782
783 if (unlikely(done_size < size))
784 return __do_insn_fetch_bytes(ctxt, size - done_size);
756 else 785 else
757 return X86EMUL_CONTINUE; 786 return X86EMUL_CONTINUE;
758} 787}
@@ -1416,7 +1445,9 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1416 1445
1417/* Does not support long mode */ 1446/* Does not support long mode */
1418static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, 1447static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1419 u16 selector, int seg, u8 cpl, bool in_task_switch) 1448 u16 selector, int seg, u8 cpl,
1449 bool in_task_switch,
1450 struct desc_struct *desc)
1420{ 1451{
1421 struct desc_struct seg_desc, old_desc; 1452 struct desc_struct seg_desc, old_desc;
1422 u8 dpl, rpl; 1453 u8 dpl, rpl;
@@ -1557,6 +1588,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1557 } 1588 }
1558load: 1589load:
1559 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); 1590 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1591 if (desc)
1592 *desc = seg_desc;
1560 return X86EMUL_CONTINUE; 1593 return X86EMUL_CONTINUE;
1561exception: 1594exception:
1562 return emulate_exception(ctxt, err_vec, err_code, true); 1595 return emulate_exception(ctxt, err_vec, err_code, true);
@@ -1566,7 +1599,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1566 u16 selector, int seg) 1599 u16 selector, int seg)
1567{ 1600{
1568 u8 cpl = ctxt->ops->cpl(ctxt); 1601 u8 cpl = ctxt->ops->cpl(ctxt);
1569 return __load_segment_descriptor(ctxt, selector, seg, cpl, false); 1602 return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL);
1570} 1603}
1571 1604
1572static void write_register_operand(struct operand *op) 1605static void write_register_operand(struct operand *op)
@@ -1960,17 +1993,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
1960static int em_jmp_far(struct x86_emulate_ctxt *ctxt) 1993static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
1961{ 1994{
1962 int rc; 1995 int rc;
1963 unsigned short sel; 1996 unsigned short sel, old_sel;
1997 struct desc_struct old_desc, new_desc;
1998 const struct x86_emulate_ops *ops = ctxt->ops;
1999 u8 cpl = ctxt->ops->cpl(ctxt);
2000
2001 /* Assignment of RIP may only fail in 64-bit mode */
2002 if (ctxt->mode == X86EMUL_MODE_PROT64)
2003 ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
2004 VCPU_SREG_CS);
1964 2005
1965 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); 2006 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
1966 2007
1967 rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); 2008 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
2009 &new_desc);
1968 if (rc != X86EMUL_CONTINUE) 2010 if (rc != X86EMUL_CONTINUE)
1969 return rc; 2011 return rc;
1970 2012
1971 ctxt->_eip = 0; 2013 rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
1972 memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); 2014 if (rc != X86EMUL_CONTINUE) {
1973 return X86EMUL_CONTINUE; 2015 WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
2016 /* assigning eip failed; restore the old cs */
2017 ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
2018 return rc;
2019 }
2020 return rc;
1974} 2021}
1975 2022
1976static int em_grp45(struct x86_emulate_ctxt *ctxt) 2023static int em_grp45(struct x86_emulate_ctxt *ctxt)
@@ -1981,13 +2028,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
1981 case 2: /* call near abs */ { 2028 case 2: /* call near abs */ {
1982 long int old_eip; 2029 long int old_eip;
1983 old_eip = ctxt->_eip; 2030 old_eip = ctxt->_eip;
1984 ctxt->_eip = ctxt->src.val; 2031 rc = assign_eip_near(ctxt, ctxt->src.val);
2032 if (rc != X86EMUL_CONTINUE)
2033 break;
1985 ctxt->src.val = old_eip; 2034 ctxt->src.val = old_eip;
1986 rc = em_push(ctxt); 2035 rc = em_push(ctxt);
1987 break; 2036 break;
1988 } 2037 }
1989 case 4: /* jmp abs */ 2038 case 4: /* jmp abs */
1990 ctxt->_eip = ctxt->src.val; 2039 rc = assign_eip_near(ctxt, ctxt->src.val);
1991 break; 2040 break;
1992 case 5: /* jmp far */ 2041 case 5: /* jmp far */
1993 rc = em_jmp_far(ctxt); 2042 rc = em_jmp_far(ctxt);
@@ -2022,30 +2071,47 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2022 2071
2023static int em_ret(struct x86_emulate_ctxt *ctxt) 2072static int em_ret(struct x86_emulate_ctxt *ctxt)
2024{ 2073{
2025 ctxt->dst.type = OP_REG; 2074 int rc;
2026 ctxt->dst.addr.reg = &ctxt->_eip; 2075 unsigned long eip;
2027 ctxt->dst.bytes = ctxt->op_bytes; 2076
2028 return em_pop(ctxt); 2077 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2078 if (rc != X86EMUL_CONTINUE)
2079 return rc;
2080
2081 return assign_eip_near(ctxt, eip);
2029} 2082}
2030 2083
2031static int em_ret_far(struct x86_emulate_ctxt *ctxt) 2084static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2032{ 2085{
2033 int rc; 2086 int rc;
2034 unsigned long cs; 2087 unsigned long eip, cs;
2088 u16 old_cs;
2035 int cpl = ctxt->ops->cpl(ctxt); 2089 int cpl = ctxt->ops->cpl(ctxt);
2090 struct desc_struct old_desc, new_desc;
2091 const struct x86_emulate_ops *ops = ctxt->ops;
2036 2092
2037 rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); 2093 if (ctxt->mode == X86EMUL_MODE_PROT64)
2094 ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
2095 VCPU_SREG_CS);
2096
2097 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2038 if (rc != X86EMUL_CONTINUE) 2098 if (rc != X86EMUL_CONTINUE)
2039 return rc; 2099 return rc;
2040 if (ctxt->op_bytes == 4)
2041 ctxt->_eip = (u32)ctxt->_eip;
2042 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); 2100 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2043 if (rc != X86EMUL_CONTINUE) 2101 if (rc != X86EMUL_CONTINUE)
2044 return rc; 2102 return rc;
2045 /* Outer-privilege level return is not implemented */ 2103 /* Outer-privilege level return is not implemented */
2046 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) 2104 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2047 return X86EMUL_UNHANDLEABLE; 2105 return X86EMUL_UNHANDLEABLE;
2048 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); 2106 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
2107 &new_desc);
2108 if (rc != X86EMUL_CONTINUE)
2109 return rc;
2110 rc = assign_eip_far(ctxt, eip, new_desc.l);
2111 if (rc != X86EMUL_CONTINUE) {
2112 WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
2113 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
2114 }
2049 return rc; 2115 return rc;
2050} 2116}
2051 2117
@@ -2306,7 +2372,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2306{ 2372{
2307 const struct x86_emulate_ops *ops = ctxt->ops; 2373 const struct x86_emulate_ops *ops = ctxt->ops;
2308 struct desc_struct cs, ss; 2374 struct desc_struct cs, ss;
2309 u64 msr_data; 2375 u64 msr_data, rcx, rdx;
2310 int usermode; 2376 int usermode;
2311 u16 cs_sel = 0, ss_sel = 0; 2377 u16 cs_sel = 0, ss_sel = 0;
2312 2378
@@ -2322,6 +2388,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2322 else 2388 else
2323 usermode = X86EMUL_MODE_PROT32; 2389 usermode = X86EMUL_MODE_PROT32;
2324 2390
2391 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2392 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2393
2325 cs.dpl = 3; 2394 cs.dpl = 3;
2326 ss.dpl = 3; 2395 ss.dpl = 3;
2327 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); 2396 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
@@ -2339,6 +2408,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2339 ss_sel = cs_sel + 8; 2408 ss_sel = cs_sel + 8;
2340 cs.d = 0; 2409 cs.d = 0;
2341 cs.l = 1; 2410 cs.l = 1;
2411 if (is_noncanonical_address(rcx) ||
2412 is_noncanonical_address(rdx))
2413 return emulate_gp(ctxt, 0);
2342 break; 2414 break;
2343 } 2415 }
2344 cs_sel |= SELECTOR_RPL_MASK; 2416 cs_sel |= SELECTOR_RPL_MASK;
@@ -2347,8 +2419,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2347 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); 2419 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2348 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); 2420 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2349 2421
2350 ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); 2422 ctxt->_eip = rdx;
2351 *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); 2423 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2352 2424
2353 return X86EMUL_CONTINUE; 2425 return X86EMUL_CONTINUE;
2354} 2426}
@@ -2466,19 +2538,24 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2466 * Now load segment descriptors. If fault happens at this stage 2538 * Now load segment descriptors. If fault happens at this stage
2467 * it is handled in a context of new task 2539 * it is handled in a context of new task
2468 */ 2540 */
2469 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true); 2541 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2542 true, NULL);
2470 if (ret != X86EMUL_CONTINUE) 2543 if (ret != X86EMUL_CONTINUE)
2471 return ret; 2544 return ret;
2472 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); 2545 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2546 true, NULL);
2473 if (ret != X86EMUL_CONTINUE) 2547 if (ret != X86EMUL_CONTINUE)
2474 return ret; 2548 return ret;
2475 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); 2549 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2550 true, NULL);
2476 if (ret != X86EMUL_CONTINUE) 2551 if (ret != X86EMUL_CONTINUE)
2477 return ret; 2552 return ret;
2478 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); 2553 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2554 true, NULL);
2479 if (ret != X86EMUL_CONTINUE) 2555 if (ret != X86EMUL_CONTINUE)
2480 return ret; 2556 return ret;
2481 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); 2557 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2558 true, NULL);
2482 if (ret != X86EMUL_CONTINUE) 2559 if (ret != X86EMUL_CONTINUE)
2483 return ret; 2560 return ret;
2484 2561
@@ -2603,25 +2680,32 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2603 * Now load segment descriptors. If fault happenes at this stage 2680 * Now load segment descriptors. If fault happenes at this stage
2604 * it is handled in a context of new task 2681 * it is handled in a context of new task
2605 */ 2682 */
2606 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true); 2683 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
2684 cpl, true, NULL);
2607 if (ret != X86EMUL_CONTINUE) 2685 if (ret != X86EMUL_CONTINUE)
2608 return ret; 2686 return ret;
2609 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); 2687 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2688 true, NULL);
2610 if (ret != X86EMUL_CONTINUE) 2689 if (ret != X86EMUL_CONTINUE)
2611 return ret; 2690 return ret;
2612 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); 2691 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2692 true, NULL);
2613 if (ret != X86EMUL_CONTINUE) 2693 if (ret != X86EMUL_CONTINUE)
2614 return ret; 2694 return ret;
2615 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); 2695 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2696 true, NULL);
2616 if (ret != X86EMUL_CONTINUE) 2697 if (ret != X86EMUL_CONTINUE)
2617 return ret; 2698 return ret;
2618 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); 2699 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2700 true, NULL);
2619 if (ret != X86EMUL_CONTINUE) 2701 if (ret != X86EMUL_CONTINUE)
2620 return ret; 2702 return ret;
2621 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true); 2703 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
2704 true, NULL);
2622 if (ret != X86EMUL_CONTINUE) 2705 if (ret != X86EMUL_CONTINUE)
2623 return ret; 2706 return ret;
2624 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true); 2707 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
2708 true, NULL);
2625 if (ret != X86EMUL_CONTINUE) 2709 if (ret != X86EMUL_CONTINUE)
2626 return ret; 2710 return ret;
2627 2711
@@ -2888,10 +2972,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt)
2888 2972
2889static int em_call(struct x86_emulate_ctxt *ctxt) 2973static int em_call(struct x86_emulate_ctxt *ctxt)
2890{ 2974{
2975 int rc;
2891 long rel = ctxt->src.val; 2976 long rel = ctxt->src.val;
2892 2977
2893 ctxt->src.val = (unsigned long)ctxt->_eip; 2978 ctxt->src.val = (unsigned long)ctxt->_eip;
2894 jmp_rel(ctxt, rel); 2979 rc = jmp_rel(ctxt, rel);
2980 if (rc != X86EMUL_CONTINUE)
2981 return rc;
2895 return em_push(ctxt); 2982 return em_push(ctxt);
2896} 2983}
2897 2984
@@ -2900,34 +2987,50 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
2900 u16 sel, old_cs; 2987 u16 sel, old_cs;
2901 ulong old_eip; 2988 ulong old_eip;
2902 int rc; 2989 int rc;
2990 struct desc_struct old_desc, new_desc;
2991 const struct x86_emulate_ops *ops = ctxt->ops;
2992 int cpl = ctxt->ops->cpl(ctxt);
2903 2993
2904 old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2905 old_eip = ctxt->_eip; 2994 old_eip = ctxt->_eip;
2995 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
2906 2996
2907 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); 2997 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2908 if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS)) 2998 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
2999 &new_desc);
3000 if (rc != X86EMUL_CONTINUE)
2909 return X86EMUL_CONTINUE; 3001 return X86EMUL_CONTINUE;
2910 3002
2911 ctxt->_eip = 0; 3003 rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
2912 memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); 3004 if (rc != X86EMUL_CONTINUE)
3005 goto fail;
2913 3006
2914 ctxt->src.val = old_cs; 3007 ctxt->src.val = old_cs;
2915 rc = em_push(ctxt); 3008 rc = em_push(ctxt);
2916 if (rc != X86EMUL_CONTINUE) 3009 if (rc != X86EMUL_CONTINUE)
2917 return rc; 3010 goto fail;
2918 3011
2919 ctxt->src.val = old_eip; 3012 ctxt->src.val = old_eip;
2920 return em_push(ctxt); 3013 rc = em_push(ctxt);
3014 /* If we failed, we tainted the memory, but the very least we should
3015 restore cs */
3016 if (rc != X86EMUL_CONTINUE)
3017 goto fail;
3018 return rc;
3019fail:
3020 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3021 return rc;
3022
2921} 3023}
2922 3024
2923static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) 3025static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
2924{ 3026{
2925 int rc; 3027 int rc;
3028 unsigned long eip;
2926 3029
2927 ctxt->dst.type = OP_REG; 3030 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2928 ctxt->dst.addr.reg = &ctxt->_eip; 3031 if (rc != X86EMUL_CONTINUE)
2929 ctxt->dst.bytes = ctxt->op_bytes; 3032 return rc;
2930 rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); 3033 rc = assign_eip_near(ctxt, eip);
2931 if (rc != X86EMUL_CONTINUE) 3034 if (rc != X86EMUL_CONTINUE)
2932 return rc; 3035 return rc;
2933 rsp_increment(ctxt, ctxt->src.val); 3036 rsp_increment(ctxt, ctxt->src.val);
@@ -3254,20 +3357,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3254 3357
3255static int em_loop(struct x86_emulate_ctxt *ctxt) 3358static int em_loop(struct x86_emulate_ctxt *ctxt)
3256{ 3359{
3360 int rc = X86EMUL_CONTINUE;
3361
3257 register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); 3362 register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
3258 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && 3363 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3259 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) 3364 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3260 jmp_rel(ctxt, ctxt->src.val); 3365 rc = jmp_rel(ctxt, ctxt->src.val);
3261 3366
3262 return X86EMUL_CONTINUE; 3367 return rc;
3263} 3368}
3264 3369
3265static int em_jcxz(struct x86_emulate_ctxt *ctxt) 3370static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3266{ 3371{
3372 int rc = X86EMUL_CONTINUE;
3373
3267 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) 3374 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3268 jmp_rel(ctxt, ctxt->src.val); 3375 rc = jmp_rel(ctxt, ctxt->src.val);
3269 3376
3270 return X86EMUL_CONTINUE; 3377 return rc;
3271} 3378}
3272 3379
3273static int em_in(struct x86_emulate_ctxt *ctxt) 3380static int em_in(struct x86_emulate_ctxt *ctxt)
@@ -3355,6 +3462,12 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt)
3355 return X86EMUL_CONTINUE; 3462 return X86EMUL_CONTINUE;
3356} 3463}
3357 3464
3465static int em_clflush(struct x86_emulate_ctxt *ctxt)
3466{
3467 /* emulating clflush regardless of cpuid */
3468 return X86EMUL_CONTINUE;
3469}
3470
3358static bool valid_cr(int nr) 3471static bool valid_cr(int nr)
3359{ 3472{
3360 switch (nr) { 3473 switch (nr) {
@@ -3693,6 +3806,16 @@ static const struct opcode group11[] = {
3693 X7(D(Undefined)), 3806 X7(D(Undefined)),
3694}; 3807};
3695 3808
3809static const struct gprefix pfx_0f_ae_7 = {
3810 I(SrcMem | ByteOp, em_clflush), N, N, N,
3811};
3812
3813static const struct group_dual group15 = { {
3814 N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7),
3815}, {
3816 N, N, N, N, N, N, N, N,
3817} };
3818
3696static const struct gprefix pfx_0f_6f_0f_7f = { 3819static const struct gprefix pfx_0f_6f_0f_7f = {
3697 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), 3820 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
3698}; 3821};
@@ -3901,10 +4024,11 @@ static const struct opcode twobyte_table[256] = {
3901 N, I(ImplicitOps | EmulateOnUD, em_syscall), 4024 N, I(ImplicitOps | EmulateOnUD, em_syscall),
3902 II(ImplicitOps | Priv, em_clts, clts), N, 4025 II(ImplicitOps | Priv, em_clts, clts), N,
3903 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, 4026 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
3904 N, D(ImplicitOps | ModRM), N, N, 4027 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
3905 /* 0x10 - 0x1F */ 4028 /* 0x10 - 0x1F */
3906 N, N, N, N, N, N, N, N, 4029 N, N, N, N, N, N, N, N,
3907 D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM), 4030 D(ImplicitOps | ModRM | SrcMem | NoAccess),
4031 N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
3908 /* 0x20 - 0x2F */ 4032 /* 0x20 - 0x2F */
3909 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), 4033 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
3910 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), 4034 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
@@ -3956,7 +4080,7 @@ static const struct opcode twobyte_table[256] = {
3956 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), 4080 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
3957 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), 4081 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
3958 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), 4082 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
3959 D(ModRM), F(DstReg | SrcMem | ModRM, em_imul), 4083 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
3960 /* 0xB0 - 0xB7 */ 4084 /* 0xB0 - 0xB7 */
3961 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), 4085 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
3962 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), 4086 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
@@ -4473,10 +4597,10 @@ done_prefixes:
4473 /* Decode and fetch the destination operand: register or memory. */ 4597 /* Decode and fetch the destination operand: register or memory. */
4474 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); 4598 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
4475 4599
4476done:
4477 if (ctxt->rip_relative) 4600 if (ctxt->rip_relative)
4478 ctxt->memopp->addr.mem.ea += ctxt->_eip; 4601 ctxt->memopp->addr.mem.ea += ctxt->_eip;
4479 4602
4603done:
4480 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; 4604 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
4481} 4605}
4482 4606
@@ -4726,7 +4850,7 @@ special_insn:
4726 break; 4850 break;
4727 case 0x70 ... 0x7f: /* jcc (short) */ 4851 case 0x70 ... 0x7f: /* jcc (short) */
4728 if (test_cc(ctxt->b, ctxt->eflags)) 4852 if (test_cc(ctxt->b, ctxt->eflags))
4729 jmp_rel(ctxt, ctxt->src.val); 4853 rc = jmp_rel(ctxt, ctxt->src.val);
4730 break; 4854 break;
4731 case 0x8d: /* lea r16/r32, m */ 4855 case 0x8d: /* lea r16/r32, m */
4732 ctxt->dst.val = ctxt->src.addr.mem.ea; 4856 ctxt->dst.val = ctxt->src.addr.mem.ea;
@@ -4756,7 +4880,7 @@ special_insn:
4756 break; 4880 break;
4757 case 0xe9: /* jmp rel */ 4881 case 0xe9: /* jmp rel */
4758 case 0xeb: /* jmp rel short */ 4882 case 0xeb: /* jmp rel short */
4759 jmp_rel(ctxt, ctxt->src.val); 4883 rc = jmp_rel(ctxt, ctxt->src.val);
4760 ctxt->dst.type = OP_NONE; /* Disable writeback. */ 4884 ctxt->dst.type = OP_NONE; /* Disable writeback. */
4761 break; 4885 break;
4762 case 0xf4: /* hlt */ 4886 case 0xf4: /* hlt */
@@ -4881,13 +5005,11 @@ twobyte_insn:
4881 break; 5005 break;
4882 case 0x80 ... 0x8f: /* jnz rel, etc*/ 5006 case 0x80 ... 0x8f: /* jnz rel, etc*/
4883 if (test_cc(ctxt->b, ctxt->eflags)) 5007 if (test_cc(ctxt->b, ctxt->eflags))
4884 jmp_rel(ctxt, ctxt->src.val); 5008 rc = jmp_rel(ctxt, ctxt->src.val);
4885 break; 5009 break;
4886 case 0x90 ... 0x9f: /* setcc r/m8 */ 5010 case 0x90 ... 0x9f: /* setcc r/m8 */
4887 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); 5011 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
4888 break; 5012 break;
4889 case 0xae: /* clflush */
4890 break;
4891 case 0xb6 ... 0xb7: /* movzx */ 5013 case 0xb6 ... 0xb7: /* movzx */
4892 ctxt->dst.bytes = ctxt->op_bytes; 5014 ctxt->dst.bytes = ctxt->op_bytes;
4893 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val 5015 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 518d86471b76..298781d4cfb4 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
262 return; 262 return;
263 263
264 timer = &pit->pit_state.timer; 264 timer = &pit->pit_state.timer;
265 mutex_lock(&pit->pit_state.lock);
265 if (hrtimer_cancel(timer)) 266 if (hrtimer_cancel(timer))
266 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 267 hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
268 mutex_unlock(&pit->pit_state.lock);
267} 269}
268 270
269static void destroy_pit_timer(struct kvm_pit *pit) 271static void destroy_pit_timer(struct kvm_pit *pit)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 806d58e3c320..fd49c867b25a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -298,7 +298,7 @@ retry_walk:
298 } 298 }
299#endif 299#endif
300 walker->max_level = walker->level; 300 walker->max_level = walker->level;
301 ASSERT(!is_long_mode(vcpu) && is_pae(vcpu)); 301 ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
302 302
303 accessed_dirty = PT_GUEST_ACCESSED_MASK; 303 accessed_dirty = PT_GUEST_ACCESSED_MASK;
304 pt_access = pte_access = ACC_ALL; 304 pt_access = pte_access = ACC_ALL;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 65510f624dfe..7527cefc5a43 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3251,7 +3251,7 @@ static int wrmsr_interception(struct vcpu_svm *svm)
3251 msr.host_initiated = false; 3251 msr.host_initiated = false;
3252 3252
3253 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 3253 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3254 if (svm_set_msr(&svm->vcpu, &msr)) { 3254 if (kvm_set_msr(&svm->vcpu, &msr)) {
3255 trace_kvm_msr_write_ex(ecx, data); 3255 trace_kvm_msr_write_ex(ecx, data);
3256 kvm_inject_gp(&svm->vcpu, 0); 3256 kvm_inject_gp(&svm->vcpu, 0);
3257 } else { 3257 } else {
@@ -3551,9 +3551,9 @@ static int handle_exit(struct kvm_vcpu *vcpu)
3551 3551
3552 if (exit_code >= ARRAY_SIZE(svm_exit_handlers) 3552 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
3553 || !svm_exit_handlers[exit_code]) { 3553 || !svm_exit_handlers[exit_code]) {
3554 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 3554 WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
3555 kvm_run->hw.hardware_exit_reason = exit_code; 3555 kvm_queue_exception(vcpu, UD_VECTOR);
3556 return 0; 3556 return 1;
3557 } 3557 }
3558 3558
3559 return svm_exit_handlers[exit_code](svm); 3559 return svm_exit_handlers[exit_code](svm);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0acac81f198b..a8b76c4c95e2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2659,12 +2659,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2659 default: 2659 default:
2660 msr = find_msr_entry(vmx, msr_index); 2660 msr = find_msr_entry(vmx, msr_index);
2661 if (msr) { 2661 if (msr) {
2662 u64 old_msr_data = msr->data;
2662 msr->data = data; 2663 msr->data = data;
2663 if (msr - vmx->guest_msrs < vmx->save_nmsrs) { 2664 if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
2664 preempt_disable(); 2665 preempt_disable();
2665 kvm_set_shared_msr(msr->index, msr->data, 2666 ret = kvm_set_shared_msr(msr->index, msr->data,
2666 msr->mask); 2667 msr->mask);
2667 preempt_enable(); 2668 preempt_enable();
2669 if (ret)
2670 msr->data = old_msr_data;
2668 } 2671 }
2669 break; 2672 break;
2670 } 2673 }
@@ -5291,7 +5294,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
5291 msr.data = data; 5294 msr.data = data;
5292 msr.index = ecx; 5295 msr.index = ecx;
5293 msr.host_initiated = false; 5296 msr.host_initiated = false;
5294 if (vmx_set_msr(vcpu, &msr) != 0) { 5297 if (kvm_set_msr(vcpu, &msr) != 0) {
5295 trace_kvm_msr_write_ex(ecx, data); 5298 trace_kvm_msr_write_ex(ecx, data);
5296 kvm_inject_gp(vcpu, 0); 5299 kvm_inject_gp(vcpu, 0);
5297 return 1; 5300 return 1;
@@ -6743,6 +6746,12 @@ static int handle_invept(struct kvm_vcpu *vcpu)
6743 return 1; 6746 return 1;
6744} 6747}
6745 6748
6749static int handle_invvpid(struct kvm_vcpu *vcpu)
6750{
6751 kvm_queue_exception(vcpu, UD_VECTOR);
6752 return 1;
6753}
6754
6746/* 6755/*
6747 * The exit handlers return 1 if the exit was handled fully and guest execution 6756 * The exit handlers return 1 if the exit was handled fully and guest execution
6748 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 6757 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -6788,6 +6797,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
6788 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, 6797 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
6789 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, 6798 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
6790 [EXIT_REASON_INVEPT] = handle_invept, 6799 [EXIT_REASON_INVEPT] = handle_invept,
6800 [EXIT_REASON_INVVPID] = handle_invvpid,
6791}; 6801};
6792 6802
6793static const int kvm_vmx_max_exit_handlers = 6803static const int kvm_vmx_max_exit_handlers =
@@ -7023,7 +7033,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
7023 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: 7033 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
7024 case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: 7034 case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
7025 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: 7035 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
7026 case EXIT_REASON_INVEPT: 7036 case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
7027 /* 7037 /*
7028 * VMX instructions trap unconditionally. This allows L1 to 7038 * VMX instructions trap unconditionally. This allows L1 to
7029 * emulate them for its L2 guest, i.e., allows 3-level nesting! 7039 * emulate them for its L2 guest, i.e., allows 3-level nesting!
@@ -7164,10 +7174,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
7164 && kvm_vmx_exit_handlers[exit_reason]) 7174 && kvm_vmx_exit_handlers[exit_reason])
7165 return kvm_vmx_exit_handlers[exit_reason](vcpu); 7175 return kvm_vmx_exit_handlers[exit_reason](vcpu);
7166 else { 7176 else {
7167 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; 7177 WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
7168 vcpu->run->hw.hardware_exit_reason = exit_reason; 7178 kvm_queue_exception(vcpu, UD_VECTOR);
7179 return 1;
7169 } 7180 }
7170 return 0;
7171} 7181}
7172 7182
7173static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) 7183static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 34c8f94331f8..0033df32a745 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -229,20 +229,25 @@ static void kvm_shared_msr_cpu_online(void)
229 shared_msr_update(i, shared_msrs_global.msrs[i]); 229 shared_msr_update(i, shared_msrs_global.msrs[i]);
230} 230}
231 231
232void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) 232int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
233{ 233{
234 unsigned int cpu = smp_processor_id(); 234 unsigned int cpu = smp_processor_id();
235 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); 235 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
236 int err;
236 237
237 if (((value ^ smsr->values[slot].curr) & mask) == 0) 238 if (((value ^ smsr->values[slot].curr) & mask) == 0)
238 return; 239 return 0;
239 smsr->values[slot].curr = value; 240 smsr->values[slot].curr = value;
240 wrmsrl(shared_msrs_global.msrs[slot], value); 241 err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
242 if (err)
243 return 1;
244
241 if (!smsr->registered) { 245 if (!smsr->registered) {
242 smsr->urn.on_user_return = kvm_on_user_return; 246 smsr->urn.on_user_return = kvm_on_user_return;
243 user_return_notifier_register(&smsr->urn); 247 user_return_notifier_register(&smsr->urn);
244 smsr->registered = true; 248 smsr->registered = true;
245 } 249 }
250 return 0;
246} 251}
247EXPORT_SYMBOL_GPL(kvm_set_shared_msr); 252EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
248 253
@@ -987,7 +992,6 @@ void kvm_enable_efer_bits(u64 mask)
987} 992}
988EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); 993EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
989 994
990
991/* 995/*
992 * Writes msr value into into the appropriate "register". 996 * Writes msr value into into the appropriate "register".
993 * Returns 0 on success, non-0 otherwise. 997 * Returns 0 on success, non-0 otherwise.
@@ -995,8 +999,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
995 */ 999 */
996int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) 1000int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
997{ 1001{
1002 switch (msr->index) {
1003 case MSR_FS_BASE:
1004 case MSR_GS_BASE:
1005 case MSR_KERNEL_GS_BASE:
1006 case MSR_CSTAR:
1007 case MSR_LSTAR:
1008 if (is_noncanonical_address(msr->data))
1009 return 1;
1010 break;
1011 case MSR_IA32_SYSENTER_EIP:
1012 case MSR_IA32_SYSENTER_ESP:
1013 /*
1014 * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
1015 * non-canonical address is written on Intel but not on
1016 * AMD (which ignores the top 32-bits, because it does
1017 * not implement 64-bit SYSENTER).
1018 *
1019 * 64-bit code should hence be able to write a non-canonical
1020 * value on AMD. Making the address canonical ensures that
1021 * vmentry does not fail on Intel after writing a non-canonical
1022 * value, and that something deterministic happens if the guest
1023 * invokes 64-bit SYSENTER.
1024 */
1025 msr->data = get_canonical(msr->data);
1026 }
998 return kvm_x86_ops->set_msr(vcpu, msr); 1027 return kvm_x86_ops->set_msr(vcpu, msr);
999} 1028}
1029EXPORT_SYMBOL_GPL(kvm_set_msr);
1000 1030
1001/* 1031/*
1002 * Adapt set_msr() to msr_io()'s calling convention 1032 * Adapt set_msr() to msr_io()'s calling convention
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index f15103dff4b4..d143d216d52b 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -40,20 +40,40 @@ void __init efi_bgrt_init(void)
40 if (ACPI_FAILURE(status)) 40 if (ACPI_FAILURE(status))
41 return; 41 return;
42 42
43 if (bgrt_tab->header.length < sizeof(*bgrt_tab)) 43 if (bgrt_tab->header.length < sizeof(*bgrt_tab)) {
44 pr_err("Ignoring BGRT: invalid length %u (expected %zu)\n",
45 bgrt_tab->header.length, sizeof(*bgrt_tab));
44 return; 46 return;
45 if (bgrt_tab->version != 1 || bgrt_tab->status != 1) 47 }
48 if (bgrt_tab->version != 1) {
49 pr_err("Ignoring BGRT: invalid version %u (expected 1)\n",
50 bgrt_tab->version);
51 return;
52 }
53 if (bgrt_tab->status != 1) {
54 pr_err("Ignoring BGRT: invalid status %u (expected 1)\n",
55 bgrt_tab->status);
56 return;
57 }
58 if (bgrt_tab->image_type != 0) {
59 pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n",
60 bgrt_tab->image_type);
46 return; 61 return;
47 if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address) 62 }
63 if (!bgrt_tab->image_address) {
64 pr_err("Ignoring BGRT: null image address\n");
48 return; 65 return;
66 }
49 67
50 image = efi_lookup_mapped_addr(bgrt_tab->image_address); 68 image = efi_lookup_mapped_addr(bgrt_tab->image_address);
51 if (!image) { 69 if (!image) {
52 image = early_memremap(bgrt_tab->image_address, 70 image = early_memremap(bgrt_tab->image_address,
53 sizeof(bmp_header)); 71 sizeof(bmp_header));
54 ioremapped = true; 72 ioremapped = true;
55 if (!image) 73 if (!image) {
74 pr_err("Ignoring BGRT: failed to map image header memory\n");
56 return; 75 return;
76 }
57 } 77 }
58 78
59 memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); 79 memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
@@ -61,14 +81,18 @@ void __init efi_bgrt_init(void)
61 early_iounmap(image, sizeof(bmp_header)); 81 early_iounmap(image, sizeof(bmp_header));
62 bgrt_image_size = bmp_header.size; 82 bgrt_image_size = bmp_header.size;
63 83
64 bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); 84 bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN);
65 if (!bgrt_image) 85 if (!bgrt_image) {
86 pr_err("Ignoring BGRT: failed to allocate memory for image (wanted %zu bytes)\n",
87 bgrt_image_size);
66 return; 88 return;
89 }
67 90
68 if (ioremapped) { 91 if (ioremapped) {
69 image = early_memremap(bgrt_tab->image_address, 92 image = early_memremap(bgrt_tab->image_address,
70 bmp_header.size); 93 bmp_header.size);
71 if (!image) { 94 if (!image) {
95 pr_err("Ignoring BGRT: failed to map image memory\n");
72 kfree(bgrt_image); 96 kfree(bgrt_image);
73 bgrt_image = NULL; 97 bgrt_image = NULL;
74 return; 98 return;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 850da94fef30..dbc8627a5cdf 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -70,17 +70,7 @@ static efi_config_table_type_t arch_tables[] __initdata = {
70 70
71u64 efi_setup; /* efi setup_data physical address */ 71u64 efi_setup; /* efi setup_data physical address */
72 72
73static bool disable_runtime __initdata = false; 73static int add_efi_memmap __initdata;
74static int __init setup_noefi(char *arg)
75{
76 disable_runtime = true;
77 return 0;
78}
79early_param("noefi", setup_noefi);
80
81int add_efi_memmap;
82EXPORT_SYMBOL(add_efi_memmap);
83
84static int __init setup_add_efi_memmap(char *arg) 74static int __init setup_add_efi_memmap(char *arg)
85{ 75{
86 add_efi_memmap = 1; 76 add_efi_memmap = 1;
@@ -96,7 +86,7 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
96{ 86{
97 efi_status_t status; 87 efi_status_t status;
98 88
99 efi_call_phys_prelog(); 89 efi_call_phys_prolog();
100 status = efi_call_phys(efi_phys.set_virtual_address_map, 90 status = efi_call_phys(efi_phys.set_virtual_address_map,
101 memory_map_size, descriptor_size, 91 memory_map_size, descriptor_size,
102 descriptor_version, virtual_map); 92 descriptor_version, virtual_map);
@@ -210,9 +200,12 @@ static void __init print_efi_memmap(void)
210 for (p = memmap.map, i = 0; 200 for (p = memmap.map, i = 0;
211 p < memmap.map_end; 201 p < memmap.map_end;
212 p += memmap.desc_size, i++) { 202 p += memmap.desc_size, i++) {
203 char buf[64];
204
213 md = p; 205 md = p;
214 pr_info("mem%02u: type=%u, attr=0x%llx, range=[0x%016llx-0x%016llx) (%lluMB)\n", 206 pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n",
215 i, md->type, md->attribute, md->phys_addr, 207 i, efi_md_typeattr_format(buf, sizeof(buf), md),
208 md->phys_addr,
216 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), 209 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
217 (md->num_pages >> (20 - EFI_PAGE_SHIFT))); 210 (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
218 } 211 }
@@ -344,9 +337,9 @@ static int __init efi_runtime_init32(void)
344 } 337 }
345 338
346 /* 339 /*
347 * We will only need *early* access to the following two 340 * We will only need *early* access to the SetVirtualAddressMap
348 * EFI runtime services before set_virtual_address_map 341 * EFI runtime service. All other runtime services will be called
349 * is invoked. 342 * via the virtual mapping.
350 */ 343 */
351 efi_phys.set_virtual_address_map = 344 efi_phys.set_virtual_address_map =
352 (efi_set_virtual_address_map_t *) 345 (efi_set_virtual_address_map_t *)
@@ -368,9 +361,9 @@ static int __init efi_runtime_init64(void)
368 } 361 }
369 362
370 /* 363 /*
371 * We will only need *early* access to the following two 364 * We will only need *early* access to the SetVirtualAddressMap
372 * EFI runtime services before set_virtual_address_map 365 * EFI runtime service. All other runtime services will be called
373 * is invoked. 366 * via the virtual mapping.
374 */ 367 */
375 efi_phys.set_virtual_address_map = 368 efi_phys.set_virtual_address_map =
376 (efi_set_virtual_address_map_t *) 369 (efi_set_virtual_address_map_t *)
@@ -492,7 +485,7 @@ void __init efi_init(void)
492 if (!efi_runtime_supported()) 485 if (!efi_runtime_supported())
493 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); 486 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
494 else { 487 else {
495 if (disable_runtime || efi_runtime_init()) 488 if (efi_runtime_disabled() || efi_runtime_init())
496 return; 489 return;
497 } 490 }
498 if (efi_memmap_init()) 491 if (efi_memmap_init())
@@ -537,7 +530,7 @@ void __init runtime_code_page_mkexec(void)
537 } 530 }
538} 531}
539 532
540void efi_memory_uc(u64 addr, unsigned long size) 533void __init efi_memory_uc(u64 addr, unsigned long size)
541{ 534{
542 unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; 535 unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
543 u64 npages; 536 u64 npages;
@@ -732,6 +725,7 @@ static void __init kexec_enter_virtual_mode(void)
732 */ 725 */
733 if (!efi_is_native()) { 726 if (!efi_is_native()) {
734 efi_unmap_memmap(); 727 efi_unmap_memmap();
728 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
735 return; 729 return;
736 } 730 }
737 731
@@ -805,6 +799,7 @@ static void __init __efi_enter_virtual_mode(void)
805 new_memmap = efi_map_regions(&count, &pg_shift); 799 new_memmap = efi_map_regions(&count, &pg_shift);
806 if (!new_memmap) { 800 if (!new_memmap) {
807 pr_err("Error reallocating memory, EFI runtime non-functional!\n"); 801 pr_err("Error reallocating memory, EFI runtime non-functional!\n");
802 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
808 return; 803 return;
809 } 804 }
810 805
@@ -812,8 +807,10 @@ static void __init __efi_enter_virtual_mode(void)
812 807
813 BUG_ON(!efi.systab); 808 BUG_ON(!efi.systab);
814 809
815 if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) 810 if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) {
811 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
816 return; 812 return;
813 }
817 814
818 efi_sync_low_kernel_mappings(); 815 efi_sync_low_kernel_mappings();
819 efi_dump_pagetable(); 816 efi_dump_pagetable();
@@ -938,14 +935,11 @@ u64 efi_mem_attributes(unsigned long phys_addr)
938 return 0; 935 return 0;
939} 936}
940 937
941static int __init parse_efi_cmdline(char *str) 938static int __init arch_parse_efi_cmdline(char *str)
942{ 939{
943 if (*str == '=') 940 if (parse_option_str(str, "old_map"))
944 str++;
945
946 if (!strncmp(str, "old_map", 7))
947 set_bit(EFI_OLD_MEMMAP, &efi.flags); 941 set_bit(EFI_OLD_MEMMAP, &efi.flags);
948 942
949 return 0; 943 return 0;
950} 944}
951early_param("efi", parse_efi_cmdline); 945early_param("efi", arch_parse_efi_cmdline);
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 9ee3491e31fb..40e7cda52936 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -33,7 +33,7 @@
33 33
34/* 34/*
35 * To make EFI call EFI runtime service in physical addressing mode we need 35 * To make EFI call EFI runtime service in physical addressing mode we need
36 * prelog/epilog before/after the invocation to disable interrupt, to 36 * prolog/epilog before/after the invocation to disable interrupt, to
37 * claim EFI runtime service handler exclusively and to duplicate a memory in 37 * claim EFI runtime service handler exclusively and to duplicate a memory in
38 * low memory space say 0 - 3G. 38 * low memory space say 0 - 3G.
39 */ 39 */
@@ -41,11 +41,13 @@ static unsigned long efi_rt_eflags;
41 41
42void efi_sync_low_kernel_mappings(void) {} 42void efi_sync_low_kernel_mappings(void) {}
43void __init efi_dump_pagetable(void) {} 43void __init efi_dump_pagetable(void) {}
44int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) 44int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
45{ 45{
46 return 0; 46 return 0;
47} 47}
48void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {} 48void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
49{
50}
49 51
50void __init efi_map_region(efi_memory_desc_t *md) 52void __init efi_map_region(efi_memory_desc_t *md)
51{ 53{
@@ -55,7 +57,7 @@ void __init efi_map_region(efi_memory_desc_t *md)
55void __init efi_map_region_fixed(efi_memory_desc_t *md) {} 57void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
56void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} 58void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
57 59
58void efi_call_phys_prelog(void) 60void __init efi_call_phys_prolog(void)
59{ 61{
60 struct desc_ptr gdt_descr; 62 struct desc_ptr gdt_descr;
61 63
@@ -69,7 +71,7 @@ void efi_call_phys_prelog(void)
69 load_gdt(&gdt_descr); 71 load_gdt(&gdt_descr);
70} 72}
71 73
72void efi_call_phys_epilog(void) 74void __init efi_call_phys_epilog(void)
73{ 75{
74 struct desc_ptr gdt_descr; 76 struct desc_ptr gdt_descr;
75 77
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 290d397e1dd9..35aecb6042fb 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -79,7 +79,7 @@ static void __init early_code_mapping_set_exec(int executable)
79 } 79 }
80} 80}
81 81
82void __init efi_call_phys_prelog(void) 82void __init efi_call_phys_prolog(void)
83{ 83{
84 unsigned long vaddress; 84 unsigned long vaddress;
85 int pgd; 85 int pgd;
@@ -139,7 +139,7 @@ void efi_sync_low_kernel_mappings(void)
139 sizeof(pgd_t) * num_pgds); 139 sizeof(pgd_t) * num_pgds);
140} 140}
141 141
142int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) 142int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
143{ 143{
144 unsigned long text; 144 unsigned long text;
145 struct page *page; 145 struct page *page;
@@ -192,7 +192,7 @@ int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
192 return 0; 192 return 0;
193} 193}
194 194
195void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) 195void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
196{ 196{
197 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); 197 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
198 198
diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S
index fbe66e626c09..040192b50d02 100644
--- a/arch/x86/platform/efi/efi_stub_32.S
+++ b/arch/x86/platform/efi/efi_stub_32.S
@@ -27,13 +27,13 @@ ENTRY(efi_call_phys)
27 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found 27 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
28 * the values of these registers are the same. And, the corresponding 28 * the values of these registers are the same. And, the corresponding
29 * GDT entries are identical. So I will do nothing about segment reg 29 * GDT entries are identical. So I will do nothing about segment reg
30 * and GDT, but change GDT base register in prelog and epilog. 30 * and GDT, but change GDT base register in prolog and epilog.
31 */ 31 */
32 32
33 /* 33 /*
34 * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET. 34 * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
35 * But to make it smoothly switch from virtual mode to flat mode. 35 * But to make it smoothly switch from virtual mode to flat mode.
36 * The mapping of lower virtual memory has been created in prelog and 36 * The mapping of lower virtual memory has been created in prolog and
37 * epilog. 37 * epilog.
38 */ 38 */
39 movl $1f, %edx 39 movl $1f, %edx
diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
index 46aa25c8ce06..3c1c3866d82b 100644
--- a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
+++ b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
@@ -10,10 +10,9 @@
10 */ 10 */
11 11
12 12
13/* __attribute__((weak)) makes these declarations overridable */
14/* For every CPU addition a new get_<cpuname>_ops interface needs 13/* For every CPU addition a new get_<cpuname>_ops interface needs
15 * to be added. 14 * to be added.
16 */ 15 */
17extern void *get_penwell_ops(void) __attribute__((weak)); 16extern void *get_penwell_ops(void);
18extern void *get_cloverview_ops(void) __attribute__((weak)); 17extern void *get_cloverview_ops(void);
19extern void *get_tangier_ops(void) __attribute__((weak)); 18extern void *get_tangier_ops(void);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 1a3f0445432a..fac5e4f9607c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1636,9 +1636,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
1636 xen_raw_console_write("mapping kernel into physical memory\n"); 1636 xen_raw_console_write("mapping kernel into physical memory\n");
1637 xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); 1637 xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages);
1638 1638
1639 /* Allocate and initialize top and mid mfn levels for p2m structure */
1640 xen_build_mfn_list_list();
1641
1642 /* keep using Xen gdt for now; no urgent need to change it */ 1639 /* keep using Xen gdt for now; no urgent need to change it */
1643 1640
1644#ifdef CONFIG_X86_32 1641#ifdef CONFIG_X86_32
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f62af7647ec9..a8a1a3d08d4d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1217,10 +1217,13 @@ static void __init xen_pagetable_p2m_copy(void)
1217static void __init xen_pagetable_init(void) 1217static void __init xen_pagetable_init(void)
1218{ 1218{
1219 paging_init(); 1219 paging_init();
1220 xen_setup_shared_info();
1221#ifdef CONFIG_X86_64 1220#ifdef CONFIG_X86_64
1222 xen_pagetable_p2m_copy(); 1221 xen_pagetable_p2m_copy();
1223#endif 1222#endif
1223 /* Allocate and initialize top and mid mfn levels for p2m structure */
1224 xen_build_mfn_list_list();
1225
1226 xen_setup_shared_info();
1224 xen_post_allocator_init(); 1227 xen_post_allocator_init();
1225} 1228}
1226static void xen_write_cr2(unsigned long cr2) 1229static void xen_write_cr2(unsigned long cr2)
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 9f5983b01ed9..b456b048eca9 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -163,6 +163,7 @@
163#include <linux/hash.h> 163#include <linux/hash.h>
164#include <linux/sched.h> 164#include <linux/sched.h>
165#include <linux/seq_file.h> 165#include <linux/seq_file.h>
166#include <linux/bootmem.h>
166 167
167#include <asm/cache.h> 168#include <asm/cache.h>
168#include <asm/setup.h> 169#include <asm/setup.h>
@@ -181,21 +182,20 @@ static void __init m2p_override_init(void);
181 182
182unsigned long xen_max_p2m_pfn __read_mostly; 183unsigned long xen_max_p2m_pfn __read_mostly;
183 184
185static unsigned long *p2m_mid_missing_mfn;
186static unsigned long *p2m_top_mfn;
187static unsigned long **p2m_top_mfn_p;
188
184/* Placeholders for holes in the address space */ 189/* Placeholders for holes in the address space */
185static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); 190static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
186static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); 191static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
187static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE);
188 192
189static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); 193static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
190static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
191static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
192 194
193static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); 195static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
194static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE); 196static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE);
195static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE);
196 197
197RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 198RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
198RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
199 199
200/* For each I/O range remapped we may lose up to two leaf pages for the boundary 200/* For each I/O range remapped we may lose up to two leaf pages for the boundary
201 * violations and three mid pages to cover up to 3GB. With 201 * violations and three mid pages to cover up to 3GB. With
@@ -272,11 +272,11 @@ static void p2m_init(unsigned long *p2m)
272 * Build the parallel p2m_top_mfn and p2m_mid_mfn structures 272 * Build the parallel p2m_top_mfn and p2m_mid_mfn structures
273 * 273 *
274 * This is called both at boot time, and after resuming from suspend: 274 * This is called both at boot time, and after resuming from suspend:
275 * - At boot time we're called very early, and must use extend_brk() 275 * - At boot time we're called rather early, and must use alloc_bootmem*()
276 * to allocate memory. 276 * to allocate memory.
277 * 277 *
278 * - After resume we're called from within stop_machine, but the mfn 278 * - After resume we're called from within stop_machine, but the mfn
279 * tree should alreay be completely allocated. 279 * tree should already be completely allocated.
280 */ 280 */
281void __ref xen_build_mfn_list_list(void) 281void __ref xen_build_mfn_list_list(void)
282{ 282{
@@ -287,20 +287,17 @@ void __ref xen_build_mfn_list_list(void)
287 287
288 /* Pre-initialize p2m_top_mfn to be completely missing */ 288 /* Pre-initialize p2m_top_mfn to be completely missing */
289 if (p2m_top_mfn == NULL) { 289 if (p2m_top_mfn == NULL) {
290 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); 290 p2m_mid_missing_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
291 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); 291 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
292 p2m_mid_identity_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
293 p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity);
294 292
295 p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); 293 p2m_top_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
296 p2m_top_mfn_p_init(p2m_top_mfn_p); 294 p2m_top_mfn_p_init(p2m_top_mfn_p);
297 295
298 p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); 296 p2m_top_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
299 p2m_top_mfn_init(p2m_top_mfn); 297 p2m_top_mfn_init(p2m_top_mfn);
300 } else { 298 } else {
301 /* Reinitialise, mfn's all change after migration */ 299 /* Reinitialise, mfn's all change after migration */
302 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); 300 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
303 p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity);
304 } 301 }
305 302
306 for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { 303 for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
@@ -328,10 +325,9 @@ void __ref xen_build_mfn_list_list(void)
328 /* 325 /*
329 * XXX boot-time only! We should never find 326 * XXX boot-time only! We should never find
330 * missing parts of the mfn tree after 327 * missing parts of the mfn tree after
331 * runtime. extend_brk() will BUG if we call 328 * runtime.
332 * it too late.
333 */ 329 */
334 mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); 330 mid_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
335 p2m_mid_mfn_init(mid_mfn_p, p2m_missing); 331 p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
336 332
337 p2m_top_mfn_p[topidx] = mid_mfn_p; 333 p2m_top_mfn_p[topidx] = mid_mfn_p;
@@ -415,7 +411,6 @@ void __init xen_build_dynamic_phys_to_machine(void)
415 m2p_override_init(); 411 m2p_override_init();
416} 412}
417#ifdef CONFIG_X86_64 413#ifdef CONFIG_X86_64
418#include <linux/bootmem.h>
419unsigned long __init xen_revector_p2m_tree(void) 414unsigned long __init xen_revector_p2m_tree(void)
420{ 415{
421 unsigned long va_start; 416 unsigned long va_start;
@@ -477,7 +472,6 @@ unsigned long __init xen_revector_p2m_tree(void)
477 472
478 copy_page(new, mid_p); 473 copy_page(new, mid_p);
479 p2m_top[topidx][mididx] = &mfn_list[pfn_free]; 474 p2m_top[topidx][mididx] = &mfn_list[pfn_free];
480 p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]);
481 475
482 pfn_free += P2M_PER_PAGE; 476 pfn_free += P2M_PER_PAGE;
483 477
@@ -538,12 +532,13 @@ static bool alloc_p2m(unsigned long pfn)
538 unsigned topidx, mididx; 532 unsigned topidx, mididx;
539 unsigned long ***top_p, **mid; 533 unsigned long ***top_p, **mid;
540 unsigned long *top_mfn_p, *mid_mfn; 534 unsigned long *top_mfn_p, *mid_mfn;
535 unsigned long *p2m_orig;
541 536
542 topidx = p2m_top_index(pfn); 537 topidx = p2m_top_index(pfn);
543 mididx = p2m_mid_index(pfn); 538 mididx = p2m_mid_index(pfn);
544 539
545 top_p = &p2m_top[topidx]; 540 top_p = &p2m_top[topidx];
546 mid = *top_p; 541 mid = ACCESS_ONCE(*top_p);
547 542
548 if (mid == p2m_mid_missing) { 543 if (mid == p2m_mid_missing) {
549 /* Mid level is missing, allocate a new one */ 544 /* Mid level is missing, allocate a new one */
@@ -558,7 +553,7 @@ static bool alloc_p2m(unsigned long pfn)
558 } 553 }
559 554
560 top_mfn_p = &p2m_top_mfn[topidx]; 555 top_mfn_p = &p2m_top_mfn[topidx];
561 mid_mfn = p2m_top_mfn_p[topidx]; 556 mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
562 557
563 BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); 558 BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
564 559
@@ -566,6 +561,7 @@ static bool alloc_p2m(unsigned long pfn)
566 /* Separately check the mid mfn level */ 561 /* Separately check the mid mfn level */
567 unsigned long missing_mfn; 562 unsigned long missing_mfn;
568 unsigned long mid_mfn_mfn; 563 unsigned long mid_mfn_mfn;
564 unsigned long old_mfn;
569 565
570 mid_mfn = alloc_p2m_page(); 566 mid_mfn = alloc_p2m_page();
571 if (!mid_mfn) 567 if (!mid_mfn)
@@ -575,17 +571,19 @@ static bool alloc_p2m(unsigned long pfn)
575 571
576 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); 572 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
577 mid_mfn_mfn = virt_to_mfn(mid_mfn); 573 mid_mfn_mfn = virt_to_mfn(mid_mfn);
578 if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) 574 old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn);
575 if (old_mfn != missing_mfn) {
579 free_p2m_page(mid_mfn); 576 free_p2m_page(mid_mfn);
580 else 577 mid_mfn = mfn_to_virt(old_mfn);
578 } else {
581 p2m_top_mfn_p[topidx] = mid_mfn; 579 p2m_top_mfn_p[topidx] = mid_mfn;
580 }
582 } 581 }
583 582
584 if (p2m_top[topidx][mididx] == p2m_identity || 583 p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]);
585 p2m_top[topidx][mididx] == p2m_missing) { 584 if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) {
586 /* p2m leaf page is missing */ 585 /* p2m leaf page is missing */
587 unsigned long *p2m; 586 unsigned long *p2m;
588 unsigned long *p2m_orig = p2m_top[topidx][mididx];
589 587
590 p2m = alloc_p2m_page(); 588 p2m = alloc_p2m_page();
591 if (!p2m) 589 if (!p2m)
@@ -606,7 +604,6 @@ static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary)
606{ 604{
607 unsigned topidx, mididx, idx; 605 unsigned topidx, mididx, idx;
608 unsigned long *p2m; 606 unsigned long *p2m;
609 unsigned long *mid_mfn_p;
610 607
611 topidx = p2m_top_index(pfn); 608 topidx = p2m_top_index(pfn);
612 mididx = p2m_mid_index(pfn); 609 mididx = p2m_mid_index(pfn);
@@ -633,43 +630,21 @@ static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary)
633 630
634 p2m_top[topidx][mididx] = p2m; 631 p2m_top[topidx][mididx] = p2m;
635 632
636 /* For save/restore we need to MFN of the P2M saved */
637
638 mid_mfn_p = p2m_top_mfn_p[topidx];
639 WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing),
640 "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n",
641 topidx, mididx);
642 mid_mfn_p[mididx] = virt_to_mfn(p2m);
643
644 return true; 633 return true;
645} 634}
646 635
647static bool __init early_alloc_p2m_middle(unsigned long pfn) 636static bool __init early_alloc_p2m_middle(unsigned long pfn)
648{ 637{
649 unsigned topidx = p2m_top_index(pfn); 638 unsigned topidx = p2m_top_index(pfn);
650 unsigned long *mid_mfn_p;
651 unsigned long **mid; 639 unsigned long **mid;
652 640
653 mid = p2m_top[topidx]; 641 mid = p2m_top[topidx];
654 mid_mfn_p = p2m_top_mfn_p[topidx];
655 if (mid == p2m_mid_missing) { 642 if (mid == p2m_mid_missing) {
656 mid = extend_brk(PAGE_SIZE, PAGE_SIZE); 643 mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
657 644
658 p2m_mid_init(mid, p2m_missing); 645 p2m_mid_init(mid, p2m_missing);
659 646
660 p2m_top[topidx] = mid; 647 p2m_top[topidx] = mid;
661
662 BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
663 }
664 /* And the save/restore P2M tables.. */
665 if (mid_mfn_p == p2m_mid_missing_mfn) {
666 mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
667 p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
668
669 p2m_top_mfn_p[topidx] = mid_mfn_p;
670 p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
671 /* Note: we don't set mid_mfn_p[midix] here,
672 * look in early_alloc_p2m() */
673 } 648 }
674 return true; 649 return true;
675} 650}
@@ -680,14 +655,13 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn)
680 * replace the P2M leaf with a p2m_missing or p2m_identity. 655 * replace the P2M leaf with a p2m_missing or p2m_identity.
681 * Stick the old page in the new P2M tree location. 656 * Stick the old page in the new P2M tree location.
682 */ 657 */
683bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn) 658static bool __init early_can_reuse_p2m_middle(unsigned long set_pfn)
684{ 659{
685 unsigned topidx; 660 unsigned topidx;
686 unsigned mididx; 661 unsigned mididx;
687 unsigned ident_pfns; 662 unsigned ident_pfns;
688 unsigned inv_pfns; 663 unsigned inv_pfns;
689 unsigned long *p2m; 664 unsigned long *p2m;
690 unsigned long *mid_mfn_p;
691 unsigned idx; 665 unsigned idx;
692 unsigned long pfn; 666 unsigned long pfn;
693 667
@@ -733,11 +707,6 @@ bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_
733found: 707found:
734 /* Found one, replace old with p2m_identity or p2m_missing */ 708 /* Found one, replace old with p2m_identity or p2m_missing */
735 p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); 709 p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing);
736 /* And the other for save/restore.. */
737 mid_mfn_p = p2m_top_mfn_p[topidx];
738 /* NOTE: Even if it is a p2m_identity it should still be point to
739 * a page filled with INVALID_P2M_ENTRY entries. */
740 mid_mfn_p[mididx] = virt_to_mfn(p2m_missing);
741 710
742 /* Reset where we want to stick the old page in. */ 711 /* Reset where we want to stick the old page in. */
743 topidx = p2m_top_index(set_pfn); 712 topidx = p2m_top_index(set_pfn);
@@ -752,8 +721,6 @@ found:
752 721
753 p2m_init(p2m); 722 p2m_init(p2m);
754 p2m_top[topidx][mididx] = p2m; 723 p2m_top[topidx][mididx] = p2m;
755 mid_mfn_p = p2m_top_mfn_p[topidx];
756 mid_mfn_p[mididx] = virt_to_mfn(p2m);
757 724
758 return true; 725 return true;
759} 726}
@@ -763,7 +730,7 @@ bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
763 if (!early_alloc_p2m_middle(pfn)) 730 if (!early_alloc_p2m_middle(pfn))
764 return false; 731 return false;
765 732
766 if (early_can_reuse_p2m_middle(pfn, mfn)) 733 if (early_can_reuse_p2m_middle(pfn))
767 return __set_phys_to_machine(pfn, mfn); 734 return __set_phys_to_machine(pfn, mfn);
768 735
769 if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/)) 736 if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/))
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index af7216128d93..29834b3fd87f 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -595,6 +595,7 @@ char * __init xen_memory_setup(void)
595 rc = 0; 595 rc = 0;
596 } 596 }
597 BUG_ON(rc); 597 BUG_ON(rc);
598 BUG_ON(memmap.nr_entries == 0);
598 599
599 /* 600 /*
600 * Xen won't allow a 1:1 mapping to be created to UNUSABLE 601 * Xen won't allow a 1:1 mapping to be created to UNUSABLE
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index a1d430b112b3..f473d268d387 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -158,7 +158,7 @@ cycle_t xen_clocksource_read(void)
158 cycle_t ret; 158 cycle_t ret;
159 159
160 preempt_disable_notrace(); 160 preempt_disable_notrace();
161 src = this_cpu_ptr(&xen_vcpu->time); 161 src = &__this_cpu_read(xen_vcpu)->time;
162 ret = pvclock_clocksource_read(src); 162 ret = pvclock_clocksource_read(src);
163 preempt_enable_notrace(); 163 preempt_enable_notrace();
164 return ret; 164 return ret;