aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-09-12 11:14:33 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-09-12 11:14:33 -0400
commit25a765b7f05cb8460fa01b54568894b20e184862 (patch)
tree0b56db57b4d9f912393ab303c269e0fe6cdf8635 /arch/x86/xen
parent9d2be9287107695708e6aae5105a8a518a6cb4d0 (diff)
parent64282278989d5b0398dcb3ba7904cb00c621dc35 (diff)
Merge branch 'x86/platform' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into stable/for-linus-3.7
* 'x86/platform' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (9690 commits) x86: Document x86_init.paging.pagetable_init() x86: xen: Cleanup and remove x86_init.paging.pagetable_setup_done() x86: Move paging_init() call to x86_init.paging.pagetable_init() x86: Rename pagetable_setup_start() to pagetable_init() x86: Remove base argument from x86_init.paging.pagetable_setup_start Linux 3.6-rc5 HID: tpkbd: work even if the new Lenovo Keyboard driver is not configured Remove user-triggerable BUG from mpol_to_str xen/pciback: Fix proper FLR steps. uml: fix compile error in deliver_alarm() dj: memory scribble in logi_dj Fix order of arguments to compat_put_time[spec|val] xen: Use correct masking in xen_swiotlb_alloc_coherent. xen: fix logical error in tlb flushing xen/p2m: Fix one-off error in checking the P2M tree directory. powerpc: Don't use __put_user() in patch_instruction powerpc: Make sure IPI handlers see data written by IPI senders powerpc: Restore correct DSCR in context switch powerpc: Fix DSCR inheritance in copy_thread() powerpc: Keep thread.dscr and thread.dscr_inherit in sync ...
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/enlighten.c112
-rw-r--r--arch/x86/xen/mmu.c69
-rw-r--r--arch/x86/xen/p2m.c94
-rw-r--r--arch/x86/xen/setup.c32
-rw-r--r--arch/x86/xen/smp.c2
5 files changed, 225 insertions, 84 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index cb1b1914dbd3..2766746de274 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -38,6 +38,7 @@
38#include <xen/interface/physdev.h> 38#include <xen/interface/physdev.h>
39#include <xen/interface/vcpu.h> 39#include <xen/interface/vcpu.h>
40#include <xen/interface/memory.h> 40#include <xen/interface/memory.h>
41#include <xen/interface/xen-mca.h>
41#include <xen/features.h> 42#include <xen/features.h>
42#include <xen/page.h> 43#include <xen/page.h>
43#include <xen/hvm.h> 44#include <xen/hvm.h>
@@ -109,7 +110,7 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback);
109 * Point at some empty memory to start with. We map the real shared_info 110 * Point at some empty memory to start with. We map the real shared_info
110 * page as soon as fixmap is up and running. 111 * page as soon as fixmap is up and running.
111 */ 112 */
112struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; 113struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info;
113 114
114/* 115/*
115 * Flag to determine whether vcpu info placement is available on all 116 * Flag to determine whether vcpu info placement is available on all
@@ -126,6 +127,19 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
126 */ 127 */
127static int have_vcpu_info_placement = 1; 128static int have_vcpu_info_placement = 1;
128 129
130struct tls_descs {
131 struct desc_struct desc[3];
132};
133
134/*
135 * Updating the 3 TLS descriptors in the GDT on every task switch is
136 * surprisingly expensive so we avoid updating them if they haven't
137 * changed. Since Xen writes different descriptors than the one
138 * passed in the update_descriptor hypercall we keep shadow copies to
139 * compare against.
140 */
141static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
142
129static void clamp_max_cpus(void) 143static void clamp_max_cpus(void)
130{ 144{
131#ifdef CONFIG_SMP 145#ifdef CONFIG_SMP
@@ -343,9 +357,7 @@ static void __init xen_init_cpuid_mask(void)
343 unsigned int xsave_mask; 357 unsigned int xsave_mask;
344 358
345 cpuid_leaf1_edx_mask = 359 cpuid_leaf1_edx_mask =
346 ~((1 << X86_FEATURE_MCE) | /* disable MCE */ 360 ~((1 << X86_FEATURE_MTRR) | /* disable MTRR */
347 (1 << X86_FEATURE_MCA) | /* disable MCA */
348 (1 << X86_FEATURE_MTRR) | /* disable MTRR */
349 (1 << X86_FEATURE_ACC)); /* thermal monitoring */ 361 (1 << X86_FEATURE_ACC)); /* thermal monitoring */
350 362
351 if (!xen_initial_domain()) 363 if (!xen_initial_domain())
@@ -542,12 +554,28 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
542 BUG(); 554 BUG();
543} 555}
544 556
557static inline bool desc_equal(const struct desc_struct *d1,
558 const struct desc_struct *d2)
559{
560 return d1->a == d2->a && d1->b == d2->b;
561}
562
545static void load_TLS_descriptor(struct thread_struct *t, 563static void load_TLS_descriptor(struct thread_struct *t,
546 unsigned int cpu, unsigned int i) 564 unsigned int cpu, unsigned int i)
547{ 565{
548 struct desc_struct *gdt = get_cpu_gdt_table(cpu); 566 struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i];
549 xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); 567 struct desc_struct *gdt;
550 struct multicall_space mc = __xen_mc_entry(0); 568 xmaddr_t maddr;
569 struct multicall_space mc;
570
571 if (desc_equal(shadow, &t->tls_array[i]))
572 return;
573
574 *shadow = t->tls_array[i];
575
576 gdt = get_cpu_gdt_table(cpu);
577 maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
578 mc = __xen_mc_entry(0);
551 579
552 MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); 580 MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
553} 581}
@@ -629,8 +657,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
629 /* 657 /*
630 * Look for known traps using IST, and substitute them 658 * Look for known traps using IST, and substitute them
631 * appropriately. The debugger ones are the only ones we care 659 * appropriately. The debugger ones are the only ones we care
632 * about. Xen will handle faults like double_fault and 660 * about. Xen will handle faults like double_fault,
633 * machine_check, so we should never see them. Warn if 661 * so we should never see them. Warn if
634 * there's an unexpected IST-using fault handler. 662 * there's an unexpected IST-using fault handler.
635 */ 663 */
636 if (addr == (unsigned long)debug) 664 if (addr == (unsigned long)debug)
@@ -645,7 +673,11 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
645 return 0; 673 return 0;
646#ifdef CONFIG_X86_MCE 674#ifdef CONFIG_X86_MCE
647 } else if (addr == (unsigned long)machine_check) { 675 } else if (addr == (unsigned long)machine_check) {
648 return 0; 676 /*
677 * when xen hypervisor inject vMCE to guest,
678 * use native mce handler to handle it
679 */
680 ;
649#endif 681#endif
650 } else { 682 } else {
651 /* Some other trap using IST? */ 683 /* Some other trap using IST? */
@@ -1126,9 +1158,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
1126 .wbinvd = native_wbinvd, 1158 .wbinvd = native_wbinvd,
1127 1159
1128 .read_msr = native_read_msr_safe, 1160 .read_msr = native_read_msr_safe,
1129 .rdmsr_regs = native_rdmsr_safe_regs,
1130 .write_msr = xen_write_msr_safe, 1161 .write_msr = xen_write_msr_safe,
1131 .wrmsr_regs = native_wrmsr_safe_regs,
1132 1162
1133 .read_tsc = native_read_tsc, 1163 .read_tsc = native_read_tsc,
1134 .read_pmc = native_read_pmc, 1164 .read_pmc = native_read_pmc,
@@ -1441,32 +1471,6 @@ asmlinkage void __init xen_start_kernel(void)
1441#endif 1471#endif
1442} 1472}
1443 1473
1444static int init_hvm_pv_info(int *major, int *minor)
1445{
1446 uint32_t eax, ebx, ecx, edx, pages, msr, base;
1447 u64 pfn;
1448
1449 base = xen_cpuid_base();
1450 cpuid(base + 1, &eax, &ebx, &ecx, &edx);
1451
1452 *major = eax >> 16;
1453 *minor = eax & 0xffff;
1454 printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor);
1455
1456 cpuid(base + 2, &pages, &msr, &ecx, &edx);
1457
1458 pfn = __pa(hypercall_page);
1459 wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
1460
1461 xen_setup_features();
1462
1463 pv_info.name = "Xen HVM";
1464
1465 xen_domain_type = XEN_HVM_DOMAIN;
1466
1467 return 0;
1468}
1469
1470void __ref xen_hvm_init_shared_info(void) 1474void __ref xen_hvm_init_shared_info(void)
1471{ 1475{
1472 int cpu; 1476 int cpu;
@@ -1499,6 +1503,31 @@ void __ref xen_hvm_init_shared_info(void)
1499} 1503}
1500 1504
1501#ifdef CONFIG_XEN_PVHVM 1505#ifdef CONFIG_XEN_PVHVM
1506static void __init init_hvm_pv_info(void)
1507{
1508 int major, minor;
1509 uint32_t eax, ebx, ecx, edx, pages, msr, base;
1510 u64 pfn;
1511
1512 base = xen_cpuid_base();
1513 cpuid(base + 1, &eax, &ebx, &ecx, &edx);
1514
1515 major = eax >> 16;
1516 minor = eax & 0xffff;
1517 printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
1518
1519 cpuid(base + 2, &pages, &msr, &ecx, &edx);
1520
1521 pfn = __pa(hypercall_page);
1522 wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
1523
1524 xen_setup_features();
1525
1526 pv_info.name = "Xen HVM";
1527
1528 xen_domain_type = XEN_HVM_DOMAIN;
1529}
1530
1502static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, 1531static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
1503 unsigned long action, void *hcpu) 1532 unsigned long action, void *hcpu)
1504{ 1533{
@@ -1521,12 +1550,7 @@ static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = {
1521 1550
1522static void __init xen_hvm_guest_init(void) 1551static void __init xen_hvm_guest_init(void)
1523{ 1552{
1524 int r; 1553 init_hvm_pv_info();
1525 int major, minor;
1526
1527 r = init_hvm_pv_info(&major, &minor);
1528 if (r < 0)
1529 return;
1530 1554
1531 xen_hvm_init_shared_info(); 1555 xen_hvm_init_shared_info();
1532 1556
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 2d9e7c9c0e7b..dfc900471aef 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -308,8 +308,20 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
308 308
309static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) 309static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
310{ 310{
311 if (!xen_batched_set_pte(ptep, pteval)) 311 if (!xen_batched_set_pte(ptep, pteval)) {
312 native_set_pte(ptep, pteval); 312 /*
313 * Could call native_set_pte() here and trap and
314 * emulate the PTE write but with 32-bit guests this
315 * needs two traps (one for each of the two 32-bit
316 * words in the PTE) so do one hypercall directly
317 * instead.
318 */
319 struct mmu_update u;
320
321 u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
322 u.val = pte_val_ma(pteval);
323 HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF);
324 }
313} 325}
314 326
315static void xen_set_pte(pte_t *ptep, pte_t pteval) 327static void xen_set_pte(pte_t *ptep, pte_t pteval)
@@ -1162,8 +1174,13 @@ static void xen_exit_mmap(struct mm_struct *mm)
1162 spin_unlock(&mm->page_table_lock); 1174 spin_unlock(&mm->page_table_lock);
1163} 1175}
1164 1176
1165static void __init xen_pagetable_setup_start(pgd_t *base) 1177static void xen_post_allocator_init(void);
1178
1179static void __init xen_pagetable_init(void)
1166{ 1180{
1181 paging_init();
1182 xen_setup_shared_info();
1183 xen_post_allocator_init();
1167} 1184}
1168 1185
1169static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) 1186static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
@@ -1180,14 +1197,6 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
1180 } 1197 }
1181} 1198}
1182 1199
1183static void xen_post_allocator_init(void);
1184
1185static void __init xen_pagetable_setup_done(pgd_t *base)
1186{
1187 xen_setup_shared_info();
1188 xen_post_allocator_init();
1189}
1190
1191static void xen_write_cr2(unsigned long cr2) 1200static void xen_write_cr2(unsigned long cr2)
1192{ 1201{
1193 this_cpu_read(xen_vcpu)->arch.cr2 = cr2; 1202 this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
@@ -1244,7 +1253,8 @@ static void xen_flush_tlb_single(unsigned long addr)
1244} 1253}
1245 1254
1246static void xen_flush_tlb_others(const struct cpumask *cpus, 1255static void xen_flush_tlb_others(const struct cpumask *cpus,
1247 struct mm_struct *mm, unsigned long va) 1256 struct mm_struct *mm, unsigned long start,
1257 unsigned long end)
1248{ 1258{
1249 struct { 1259 struct {
1250 struct mmuext_op op; 1260 struct mmuext_op op;
@@ -1256,7 +1266,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
1256 } *args; 1266 } *args;
1257 struct multicall_space mcs; 1267 struct multicall_space mcs;
1258 1268
1259 trace_xen_mmu_flush_tlb_others(cpus, mm, va); 1269 trace_xen_mmu_flush_tlb_others(cpus, mm, start, end);
1260 1270
1261 if (cpumask_empty(cpus)) 1271 if (cpumask_empty(cpus))
1262 return; /* nothing to do */ 1272 return; /* nothing to do */
@@ -1269,11 +1279,10 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
1269 cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); 1279 cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
1270 cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); 1280 cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
1271 1281
1272 if (va == TLB_FLUSH_ALL) { 1282 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
1273 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; 1283 if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
1274 } else {
1275 args->op.cmd = MMUEXT_INVLPG_MULTI; 1284 args->op.cmd = MMUEXT_INVLPG_MULTI;
1276 args->op.arg1.linear_addr = va; 1285 args->op.arg1.linear_addr = start;
1277 } 1286 }
1278 1287
1279 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); 1288 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
@@ -1416,13 +1425,28 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
1416} 1425}
1417#endif /* CONFIG_X86_64 */ 1426#endif /* CONFIG_X86_64 */
1418 1427
1419/* Init-time set_pte while constructing initial pagetables, which 1428/*
1420 doesn't allow RO pagetable pages to be remapped RW */ 1429 * Init-time set_pte while constructing initial pagetables, which
1430 * doesn't allow RO page table pages to be remapped RW.
1431 *
1432 * If there is no MFN for this PFN then this page is initially
1433 * ballooned out so clear the PTE (as in decrease_reservation() in
1434 * drivers/xen/balloon.c).
1435 *
1436 * Many of these PTE updates are done on unpinned and writable pages
1437 * and doing a hypercall for these is unnecessary and expensive. At
1438 * this point it is not possible to tell if a page is pinned or not,
1439 * so always write the PTE directly and rely on Xen trapping and
1440 * emulating any updates as necessary.
1441 */
1421static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) 1442static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
1422{ 1443{
1423 pte = mask_rw_pte(ptep, pte); 1444 if (pte_mfn(pte) != INVALID_P2M_ENTRY)
1445 pte = mask_rw_pte(ptep, pte);
1446 else
1447 pte = __pte_ma(0);
1424 1448
1425 xen_set_pte(ptep, pte); 1449 native_set_pte(ptep, pte);
1426} 1450}
1427 1451
1428static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) 1452static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
@@ -2041,8 +2065,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2041void __init xen_init_mmu_ops(void) 2065void __init xen_init_mmu_ops(void)
2042{ 2066{
2043 x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; 2067 x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
2044 x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; 2068 x86_init.paging.pagetable_init = xen_pagetable_init;
2045 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
2046 pv_mmu_ops = xen_mmu_ops; 2069 pv_mmu_ops = xen_mmu_ops;
2047 2070
2048 memset(dummy_mapping, 0xff, PAGE_SIZE); 2071 memset(dummy_mapping, 0xff, PAGE_SIZE);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 64effdc6da94..76ba0e97e530 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -194,6 +194,13 @@ RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID
194 * boundary violation will require three middle nodes. */ 194 * boundary violation will require three middle nodes. */
195RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3); 195RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
196 196
197/* When we populate back during bootup, the amount of pages can vary. The
198 * max we have is seen is 395979, but that does not mean it can't be more.
199 * Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle
200 * it can re-use Xen provided mfn_list array, so we only need to allocate at
201 * most three P2M top nodes. */
202RESERVE_BRK(p2m_populated, PAGE_SIZE * 3);
203
197static inline unsigned p2m_top_index(unsigned long pfn) 204static inline unsigned p2m_top_index(unsigned long pfn)
198{ 205{
199 BUG_ON(pfn >= MAX_P2M_PFN); 206 BUG_ON(pfn >= MAX_P2M_PFN);
@@ -570,12 +577,99 @@ static bool __init early_alloc_p2m(unsigned long pfn)
570 } 577 }
571 return true; 578 return true;
572} 579}
580
581/*
582 * Skim over the P2M tree looking at pages that are either filled with
583 * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and
584 * replace the P2M leaf with a p2m_missing or p2m_identity.
585 * Stick the old page in the new P2M tree location.
586 */
587bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn)
588{
589 unsigned topidx;
590 unsigned mididx;
591 unsigned ident_pfns;
592 unsigned inv_pfns;
593 unsigned long *p2m;
594 unsigned long *mid_mfn_p;
595 unsigned idx;
596 unsigned long pfn;
597
598 /* We only look when this entails a P2M middle layer */
599 if (p2m_index(set_pfn))
600 return false;
601
602 for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) {
603 topidx = p2m_top_index(pfn);
604
605 if (!p2m_top[topidx])
606 continue;
607
608 if (p2m_top[topidx] == p2m_mid_missing)
609 continue;
610
611 mididx = p2m_mid_index(pfn);
612 p2m = p2m_top[topidx][mididx];
613 if (!p2m)
614 continue;
615
616 if ((p2m == p2m_missing) || (p2m == p2m_identity))
617 continue;
618
619 if ((unsigned long)p2m == INVALID_P2M_ENTRY)
620 continue;
621
622 ident_pfns = 0;
623 inv_pfns = 0;
624 for (idx = 0; idx < P2M_PER_PAGE; idx++) {
625 /* IDENTITY_PFNs are 1:1 */
626 if (p2m[idx] == IDENTITY_FRAME(pfn + idx))
627 ident_pfns++;
628 else if (p2m[idx] == INVALID_P2M_ENTRY)
629 inv_pfns++;
630 else
631 break;
632 }
633 if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE))
634 goto found;
635 }
636 return false;
637found:
638 /* Found one, replace old with p2m_identity or p2m_missing */
639 p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing);
640 /* And the other for save/restore.. */
641 mid_mfn_p = p2m_top_mfn_p[topidx];
642 /* NOTE: Even if it is a p2m_identity it should still be point to
643 * a page filled with INVALID_P2M_ENTRY entries. */
644 mid_mfn_p[mididx] = virt_to_mfn(p2m_missing);
645
646 /* Reset where we want to stick the old page in. */
647 topidx = p2m_top_index(set_pfn);
648 mididx = p2m_mid_index(set_pfn);
649
650 /* This shouldn't happen */
651 if (WARN_ON(p2m_top[topidx] == p2m_mid_missing))
652 early_alloc_p2m(set_pfn);
653
654 if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing))
655 return false;
656
657 p2m_init(p2m);
658 p2m_top[topidx][mididx] = p2m;
659 mid_mfn_p = p2m_top_mfn_p[topidx];
660 mid_mfn_p[mididx] = virt_to_mfn(p2m);
661
662 return true;
663}
573bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) 664bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
574{ 665{
575 if (unlikely(!__set_phys_to_machine(pfn, mfn))) { 666 if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
576 if (!early_alloc_p2m(pfn)) 667 if (!early_alloc_p2m(pfn))
577 return false; 668 return false;
578 669
670 if (early_can_reuse_p2m_middle(pfn, mfn))
671 return __set_phys_to_machine(pfn, mfn);
672
579 if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/)) 673 if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/))
580 return false; 674 return false;
581 675
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index a4790bf22c59..d11ca11d14fc 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -78,9 +78,16 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
78 memblock_reserve(start, size); 78 memblock_reserve(start, size);
79 79
80 xen_max_p2m_pfn = PFN_DOWN(start + size); 80 xen_max_p2m_pfn = PFN_DOWN(start + size);
81 for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
82 unsigned long mfn = pfn_to_mfn(pfn);
83
84 if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn))
85 continue;
86 WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n",
87 pfn, mfn);
81 88
82 for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++)
83 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 89 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
90 }
84} 91}
85 92
86static unsigned long __init xen_do_chunk(unsigned long start, 93static unsigned long __init xen_do_chunk(unsigned long start,
@@ -157,25 +164,24 @@ static unsigned long __init xen_populate_chunk(
157 unsigned long dest_pfn; 164 unsigned long dest_pfn;
158 165
159 for (i = 0, entry = list; i < map_size; i++, entry++) { 166 for (i = 0, entry = list; i < map_size; i++, entry++) {
160 unsigned long credits = credits_left;
161 unsigned long s_pfn; 167 unsigned long s_pfn;
162 unsigned long e_pfn; 168 unsigned long e_pfn;
163 unsigned long pfns; 169 unsigned long pfns;
164 long capacity; 170 long capacity;
165 171
166 if (credits <= 0) 172 if (credits_left <= 0)
167 break; 173 break;
168 174
169 if (entry->type != E820_RAM) 175 if (entry->type != E820_RAM)
170 continue; 176 continue;
171 177
172 e_pfn = PFN_UP(entry->addr + entry->size); 178 e_pfn = PFN_DOWN(entry->addr + entry->size);
173 179
174 /* We only care about E820 after the xen_start_info->nr_pages */ 180 /* We only care about E820 after the xen_start_info->nr_pages */
175 if (e_pfn <= max_pfn) 181 if (e_pfn <= max_pfn)
176 continue; 182 continue;
177 183
178 s_pfn = PFN_DOWN(entry->addr); 184 s_pfn = PFN_UP(entry->addr);
179 /* If the E820 falls within the nr_pages, we want to start 185 /* If the E820 falls within the nr_pages, we want to start
180 * at the nr_pages PFN. 186 * at the nr_pages PFN.
181 * If that would mean going past the E820 entry, skip it 187 * If that would mean going past the E820 entry, skip it
@@ -184,23 +190,19 @@ static unsigned long __init xen_populate_chunk(
184 capacity = e_pfn - max_pfn; 190 capacity = e_pfn - max_pfn;
185 dest_pfn = max_pfn; 191 dest_pfn = max_pfn;
186 } else { 192 } else {
187 /* last_pfn MUST be within E820_RAM regions */
188 if (*last_pfn && e_pfn >= *last_pfn)
189 s_pfn = *last_pfn;
190 capacity = e_pfn - s_pfn; 193 capacity = e_pfn - s_pfn;
191 dest_pfn = s_pfn; 194 dest_pfn = s_pfn;
192 } 195 }
193 /* If we had filled this E820_RAM entry, go to the next one. */
194 if (capacity <= 0)
195 continue;
196 196
197 if (credits > capacity) 197 if (credits_left < capacity)
198 credits = capacity; 198 capacity = credits_left;
199 199
200 pfns = xen_do_chunk(dest_pfn, dest_pfn + credits, false); 200 pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false);
201 done += pfns; 201 done += pfns;
202 credits_left -= pfns;
203 *last_pfn = (dest_pfn + pfns); 202 *last_pfn = (dest_pfn + pfns);
203 if (pfns < capacity)
204 break;
205 credits_left -= pfns;
204 } 206 }
205 return done; 207 return done;
206} 208}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index afb250d22a6b..f58dca7a6e52 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -80,9 +80,7 @@ static void __cpuinit cpu_bringup(void)
80 80
81 notify_cpu_starting(cpu); 81 notify_cpu_starting(cpu);
82 82
83 ipi_call_lock();
84 set_cpu_online(cpu, true); 83 set_cpu_online(cpu, true);
85 ipi_call_unlock();
86 84
87 this_cpu_write(cpu_state, CPU_ONLINE); 85 this_cpu_write(cpu_state, CPU_ONLINE);
88 86