diff options
author | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2012-09-12 11:14:33 -0400 |
---|---|---|
committer | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2012-09-12 11:14:33 -0400 |
commit | 25a765b7f05cb8460fa01b54568894b20e184862 (patch) | |
tree | 0b56db57b4d9f912393ab303c269e0fe6cdf8635 /arch/x86/xen | |
parent | 9d2be9287107695708e6aae5105a8a518a6cb4d0 (diff) | |
parent | 64282278989d5b0398dcb3ba7904cb00c621dc35 (diff) |
Merge branch 'x86/platform' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into stable/for-linus-3.7
* 'x86/platform' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (9690 commits)
x86: Document x86_init.paging.pagetable_init()
x86: xen: Cleanup and remove x86_init.paging.pagetable_setup_done()
x86: Move paging_init() call to x86_init.paging.pagetable_init()
x86: Rename pagetable_setup_start() to pagetable_init()
x86: Remove base argument from x86_init.paging.pagetable_setup_start
Linux 3.6-rc5
HID: tpkbd: work even if the new Lenovo Keyboard driver is not configured
Remove user-triggerable BUG from mpol_to_str
xen/pciback: Fix proper FLR steps.
uml: fix compile error in deliver_alarm()
dj: memory scribble in logi_dj
Fix order of arguments to compat_put_time[spec|val]
xen: Use correct masking in xen_swiotlb_alloc_coherent.
xen: fix logical error in tlb flushing
xen/p2m: Fix one-off error in checking the P2M tree directory.
powerpc: Don't use __put_user() in patch_instruction
powerpc: Make sure IPI handlers see data written by IPI senders
powerpc: Restore correct DSCR in context switch
powerpc: Fix DSCR inheritance in copy_thread()
powerpc: Keep thread.dscr and thread.dscr_inherit in sync
...
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/enlighten.c | 112 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 69 | ||||
-rw-r--r-- | arch/x86/xen/p2m.c | 94 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 32 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 2 |
5 files changed, 225 insertions, 84 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index cb1b1914dbd3..2766746de274 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <xen/interface/physdev.h> | 38 | #include <xen/interface/physdev.h> |
39 | #include <xen/interface/vcpu.h> | 39 | #include <xen/interface/vcpu.h> |
40 | #include <xen/interface/memory.h> | 40 | #include <xen/interface/memory.h> |
41 | #include <xen/interface/xen-mca.h> | ||
41 | #include <xen/features.h> | 42 | #include <xen/features.h> |
42 | #include <xen/page.h> | 43 | #include <xen/page.h> |
43 | #include <xen/hvm.h> | 44 | #include <xen/hvm.h> |
@@ -109,7 +110,7 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback); | |||
109 | * Point at some empty memory to start with. We map the real shared_info | 110 | * Point at some empty memory to start with. We map the real shared_info |
110 | * page as soon as fixmap is up and running. | 111 | * page as soon as fixmap is up and running. |
111 | */ | 112 | */ |
112 | struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; | 113 | struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info; |
113 | 114 | ||
114 | /* | 115 | /* |
115 | * Flag to determine whether vcpu info placement is available on all | 116 | * Flag to determine whether vcpu info placement is available on all |
@@ -126,6 +127,19 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; | |||
126 | */ | 127 | */ |
127 | static int have_vcpu_info_placement = 1; | 128 | static int have_vcpu_info_placement = 1; |
128 | 129 | ||
130 | struct tls_descs { | ||
131 | struct desc_struct desc[3]; | ||
132 | }; | ||
133 | |||
134 | /* | ||
135 | * Updating the 3 TLS descriptors in the GDT on every task switch is | ||
136 | * surprisingly expensive so we avoid updating them if they haven't | ||
137 | * changed. Since Xen writes different descriptors than the one | ||
138 | * passed in the update_descriptor hypercall we keep shadow copies to | ||
139 | * compare against. | ||
140 | */ | ||
141 | static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); | ||
142 | |||
129 | static void clamp_max_cpus(void) | 143 | static void clamp_max_cpus(void) |
130 | { | 144 | { |
131 | #ifdef CONFIG_SMP | 145 | #ifdef CONFIG_SMP |
@@ -343,9 +357,7 @@ static void __init xen_init_cpuid_mask(void) | |||
343 | unsigned int xsave_mask; | 357 | unsigned int xsave_mask; |
344 | 358 | ||
345 | cpuid_leaf1_edx_mask = | 359 | cpuid_leaf1_edx_mask = |
346 | ~((1 << X86_FEATURE_MCE) | /* disable MCE */ | 360 | ~((1 << X86_FEATURE_MTRR) | /* disable MTRR */ |
347 | (1 << X86_FEATURE_MCA) | /* disable MCA */ | ||
348 | (1 << X86_FEATURE_MTRR) | /* disable MTRR */ | ||
349 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ | 361 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ |
350 | 362 | ||
351 | if (!xen_initial_domain()) | 363 | if (!xen_initial_domain()) |
@@ -542,12 +554,28 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) | |||
542 | BUG(); | 554 | BUG(); |
543 | } | 555 | } |
544 | 556 | ||
557 | static inline bool desc_equal(const struct desc_struct *d1, | ||
558 | const struct desc_struct *d2) | ||
559 | { | ||
560 | return d1->a == d2->a && d1->b == d2->b; | ||
561 | } | ||
562 | |||
545 | static void load_TLS_descriptor(struct thread_struct *t, | 563 | static void load_TLS_descriptor(struct thread_struct *t, |
546 | unsigned int cpu, unsigned int i) | 564 | unsigned int cpu, unsigned int i) |
547 | { | 565 | { |
548 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | 566 | struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i]; |
549 | xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); | 567 | struct desc_struct *gdt; |
550 | struct multicall_space mc = __xen_mc_entry(0); | 568 | xmaddr_t maddr; |
569 | struct multicall_space mc; | ||
570 | |||
571 | if (desc_equal(shadow, &t->tls_array[i])) | ||
572 | return; | ||
573 | |||
574 | *shadow = t->tls_array[i]; | ||
575 | |||
576 | gdt = get_cpu_gdt_table(cpu); | ||
577 | maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); | ||
578 | mc = __xen_mc_entry(0); | ||
551 | 579 | ||
552 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); | 580 | MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); |
553 | } | 581 | } |
@@ -629,8 +657,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
629 | /* | 657 | /* |
630 | * Look for known traps using IST, and substitute them | 658 | * Look for known traps using IST, and substitute them |
631 | * appropriately. The debugger ones are the only ones we care | 659 | * appropriately. The debugger ones are the only ones we care |
632 | * about. Xen will handle faults like double_fault and | 660 | * about. Xen will handle faults like double_fault, |
633 | * machine_check, so we should never see them. Warn if | 661 | * so we should never see them. Warn if |
634 | * there's an unexpected IST-using fault handler. | 662 | * there's an unexpected IST-using fault handler. |
635 | */ | 663 | */ |
636 | if (addr == (unsigned long)debug) | 664 | if (addr == (unsigned long)debug) |
@@ -645,7 +673,11 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, | |||
645 | return 0; | 673 | return 0; |
646 | #ifdef CONFIG_X86_MCE | 674 | #ifdef CONFIG_X86_MCE |
647 | } else if (addr == (unsigned long)machine_check) { | 675 | } else if (addr == (unsigned long)machine_check) { |
648 | return 0; | 676 | /* |
677 | * when xen hypervisor inject vMCE to guest, | ||
678 | * use native mce handler to handle it | ||
679 | */ | ||
680 | ; | ||
649 | #endif | 681 | #endif |
650 | } else { | 682 | } else { |
651 | /* Some other trap using IST? */ | 683 | /* Some other trap using IST? */ |
@@ -1126,9 +1158,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { | |||
1126 | .wbinvd = native_wbinvd, | 1158 | .wbinvd = native_wbinvd, |
1127 | 1159 | ||
1128 | .read_msr = native_read_msr_safe, | 1160 | .read_msr = native_read_msr_safe, |
1129 | .rdmsr_regs = native_rdmsr_safe_regs, | ||
1130 | .write_msr = xen_write_msr_safe, | 1161 | .write_msr = xen_write_msr_safe, |
1131 | .wrmsr_regs = native_wrmsr_safe_regs, | ||
1132 | 1162 | ||
1133 | .read_tsc = native_read_tsc, | 1163 | .read_tsc = native_read_tsc, |
1134 | .read_pmc = native_read_pmc, | 1164 | .read_pmc = native_read_pmc, |
@@ -1441,32 +1471,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
1441 | #endif | 1471 | #endif |
1442 | } | 1472 | } |
1443 | 1473 | ||
1444 | static int init_hvm_pv_info(int *major, int *minor) | ||
1445 | { | ||
1446 | uint32_t eax, ebx, ecx, edx, pages, msr, base; | ||
1447 | u64 pfn; | ||
1448 | |||
1449 | base = xen_cpuid_base(); | ||
1450 | cpuid(base + 1, &eax, &ebx, &ecx, &edx); | ||
1451 | |||
1452 | *major = eax >> 16; | ||
1453 | *minor = eax & 0xffff; | ||
1454 | printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor); | ||
1455 | |||
1456 | cpuid(base + 2, &pages, &msr, &ecx, &edx); | ||
1457 | |||
1458 | pfn = __pa(hypercall_page); | ||
1459 | wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); | ||
1460 | |||
1461 | xen_setup_features(); | ||
1462 | |||
1463 | pv_info.name = "Xen HVM"; | ||
1464 | |||
1465 | xen_domain_type = XEN_HVM_DOMAIN; | ||
1466 | |||
1467 | return 0; | ||
1468 | } | ||
1469 | |||
1470 | void __ref xen_hvm_init_shared_info(void) | 1474 | void __ref xen_hvm_init_shared_info(void) |
1471 | { | 1475 | { |
1472 | int cpu; | 1476 | int cpu; |
@@ -1499,6 +1503,31 @@ void __ref xen_hvm_init_shared_info(void) | |||
1499 | } | 1503 | } |
1500 | 1504 | ||
1501 | #ifdef CONFIG_XEN_PVHVM | 1505 | #ifdef CONFIG_XEN_PVHVM |
1506 | static void __init init_hvm_pv_info(void) | ||
1507 | { | ||
1508 | int major, minor; | ||
1509 | uint32_t eax, ebx, ecx, edx, pages, msr, base; | ||
1510 | u64 pfn; | ||
1511 | |||
1512 | base = xen_cpuid_base(); | ||
1513 | cpuid(base + 1, &eax, &ebx, &ecx, &edx); | ||
1514 | |||
1515 | major = eax >> 16; | ||
1516 | minor = eax & 0xffff; | ||
1517 | printk(KERN_INFO "Xen version %d.%d.\n", major, minor); | ||
1518 | |||
1519 | cpuid(base + 2, &pages, &msr, &ecx, &edx); | ||
1520 | |||
1521 | pfn = __pa(hypercall_page); | ||
1522 | wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); | ||
1523 | |||
1524 | xen_setup_features(); | ||
1525 | |||
1526 | pv_info.name = "Xen HVM"; | ||
1527 | |||
1528 | xen_domain_type = XEN_HVM_DOMAIN; | ||
1529 | } | ||
1530 | |||
1502 | static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, | 1531 | static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, |
1503 | unsigned long action, void *hcpu) | 1532 | unsigned long action, void *hcpu) |
1504 | { | 1533 | { |
@@ -1521,12 +1550,7 @@ static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = { | |||
1521 | 1550 | ||
1522 | static void __init xen_hvm_guest_init(void) | 1551 | static void __init xen_hvm_guest_init(void) |
1523 | { | 1552 | { |
1524 | int r; | 1553 | init_hvm_pv_info(); |
1525 | int major, minor; | ||
1526 | |||
1527 | r = init_hvm_pv_info(&major, &minor); | ||
1528 | if (r < 0) | ||
1529 | return; | ||
1530 | 1554 | ||
1531 | xen_hvm_init_shared_info(); | 1555 | xen_hvm_init_shared_info(); |
1532 | 1556 | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 2d9e7c9c0e7b..dfc900471aef 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -308,8 +308,20 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) | |||
308 | 308 | ||
309 | static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) | 309 | static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) |
310 | { | 310 | { |
311 | if (!xen_batched_set_pte(ptep, pteval)) | 311 | if (!xen_batched_set_pte(ptep, pteval)) { |
312 | native_set_pte(ptep, pteval); | 312 | /* |
313 | * Could call native_set_pte() here and trap and | ||
314 | * emulate the PTE write but with 32-bit guests this | ||
315 | * needs two traps (one for each of the two 32-bit | ||
316 | * words in the PTE) so do one hypercall directly | ||
317 | * instead. | ||
318 | */ | ||
319 | struct mmu_update u; | ||
320 | |||
321 | u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE; | ||
322 | u.val = pte_val_ma(pteval); | ||
323 | HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF); | ||
324 | } | ||
313 | } | 325 | } |
314 | 326 | ||
315 | static void xen_set_pte(pte_t *ptep, pte_t pteval) | 327 | static void xen_set_pte(pte_t *ptep, pte_t pteval) |
@@ -1162,8 +1174,13 @@ static void xen_exit_mmap(struct mm_struct *mm) | |||
1162 | spin_unlock(&mm->page_table_lock); | 1174 | spin_unlock(&mm->page_table_lock); |
1163 | } | 1175 | } |
1164 | 1176 | ||
1165 | static void __init xen_pagetable_setup_start(pgd_t *base) | 1177 | static void xen_post_allocator_init(void); |
1178 | |||
1179 | static void __init xen_pagetable_init(void) | ||
1166 | { | 1180 | { |
1181 | paging_init(); | ||
1182 | xen_setup_shared_info(); | ||
1183 | xen_post_allocator_init(); | ||
1167 | } | 1184 | } |
1168 | 1185 | ||
1169 | static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) | 1186 | static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) |
@@ -1180,14 +1197,6 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) | |||
1180 | } | 1197 | } |
1181 | } | 1198 | } |
1182 | 1199 | ||
1183 | static void xen_post_allocator_init(void); | ||
1184 | |||
1185 | static void __init xen_pagetable_setup_done(pgd_t *base) | ||
1186 | { | ||
1187 | xen_setup_shared_info(); | ||
1188 | xen_post_allocator_init(); | ||
1189 | } | ||
1190 | |||
1191 | static void xen_write_cr2(unsigned long cr2) | 1200 | static void xen_write_cr2(unsigned long cr2) |
1192 | { | 1201 | { |
1193 | this_cpu_read(xen_vcpu)->arch.cr2 = cr2; | 1202 | this_cpu_read(xen_vcpu)->arch.cr2 = cr2; |
@@ -1244,7 +1253,8 @@ static void xen_flush_tlb_single(unsigned long addr) | |||
1244 | } | 1253 | } |
1245 | 1254 | ||
1246 | static void xen_flush_tlb_others(const struct cpumask *cpus, | 1255 | static void xen_flush_tlb_others(const struct cpumask *cpus, |
1247 | struct mm_struct *mm, unsigned long va) | 1256 | struct mm_struct *mm, unsigned long start, |
1257 | unsigned long end) | ||
1248 | { | 1258 | { |
1249 | struct { | 1259 | struct { |
1250 | struct mmuext_op op; | 1260 | struct mmuext_op op; |
@@ -1256,7 +1266,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, | |||
1256 | } *args; | 1266 | } *args; |
1257 | struct multicall_space mcs; | 1267 | struct multicall_space mcs; |
1258 | 1268 | ||
1259 | trace_xen_mmu_flush_tlb_others(cpus, mm, va); | 1269 | trace_xen_mmu_flush_tlb_others(cpus, mm, start, end); |
1260 | 1270 | ||
1261 | if (cpumask_empty(cpus)) | 1271 | if (cpumask_empty(cpus)) |
1262 | return; /* nothing to do */ | 1272 | return; /* nothing to do */ |
@@ -1269,11 +1279,10 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, | |||
1269 | cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); | 1279 | cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); |
1270 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); | 1280 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); |
1271 | 1281 | ||
1272 | if (va == TLB_FLUSH_ALL) { | 1282 | args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; |
1273 | args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; | 1283 | if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) { |
1274 | } else { | ||
1275 | args->op.cmd = MMUEXT_INVLPG_MULTI; | 1284 | args->op.cmd = MMUEXT_INVLPG_MULTI; |
1276 | args->op.arg1.linear_addr = va; | 1285 | args->op.arg1.linear_addr = start; |
1277 | } | 1286 | } |
1278 | 1287 | ||
1279 | MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); | 1288 | MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); |
@@ -1416,13 +1425,28 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1416 | } | 1425 | } |
1417 | #endif /* CONFIG_X86_64 */ | 1426 | #endif /* CONFIG_X86_64 */ |
1418 | 1427 | ||
1419 | /* Init-time set_pte while constructing initial pagetables, which | 1428 | /* |
1420 | doesn't allow RO pagetable pages to be remapped RW */ | 1429 | * Init-time set_pte while constructing initial pagetables, which |
1430 | * doesn't allow RO page table pages to be remapped RW. | ||
1431 | * | ||
1432 | * If there is no MFN for this PFN then this page is initially | ||
1433 | * ballooned out so clear the PTE (as in decrease_reservation() in | ||
1434 | * drivers/xen/balloon.c). | ||
1435 | * | ||
1436 | * Many of these PTE updates are done on unpinned and writable pages | ||
1437 | * and doing a hypercall for these is unnecessary and expensive. At | ||
1438 | * this point it is not possible to tell if a page is pinned or not, | ||
1439 | * so always write the PTE directly and rely on Xen trapping and | ||
1440 | * emulating any updates as necessary. | ||
1441 | */ | ||
1421 | static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) | 1442 | static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) |
1422 | { | 1443 | { |
1423 | pte = mask_rw_pte(ptep, pte); | 1444 | if (pte_mfn(pte) != INVALID_P2M_ENTRY) |
1445 | pte = mask_rw_pte(ptep, pte); | ||
1446 | else | ||
1447 | pte = __pte_ma(0); | ||
1424 | 1448 | ||
1425 | xen_set_pte(ptep, pte); | 1449 | native_set_pte(ptep, pte); |
1426 | } | 1450 | } |
1427 | 1451 | ||
1428 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | 1452 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) |
@@ -2041,8 +2065,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { | |||
2041 | void __init xen_init_mmu_ops(void) | 2065 | void __init xen_init_mmu_ops(void) |
2042 | { | 2066 | { |
2043 | x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; | 2067 | x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; |
2044 | x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; | 2068 | x86_init.paging.pagetable_init = xen_pagetable_init; |
2045 | x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; | ||
2046 | pv_mmu_ops = xen_mmu_ops; | 2069 | pv_mmu_ops = xen_mmu_ops; |
2047 | 2070 | ||
2048 | memset(dummy_mapping, 0xff, PAGE_SIZE); | 2071 | memset(dummy_mapping, 0xff, PAGE_SIZE); |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 64effdc6da94..76ba0e97e530 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -194,6 +194,13 @@ RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID | |||
194 | * boundary violation will require three middle nodes. */ | 194 | * boundary violation will require three middle nodes. */ |
195 | RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3); | 195 | RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3); |
196 | 196 | ||
197 | /* When we populate back during bootup, the amount of pages can vary. The | ||
198 | * max we have is seen is 395979, but that does not mean it can't be more. | ||
199 | * Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle | ||
200 | * it can re-use Xen provided mfn_list array, so we only need to allocate at | ||
201 | * most three P2M top nodes. */ | ||
202 | RESERVE_BRK(p2m_populated, PAGE_SIZE * 3); | ||
203 | |||
197 | static inline unsigned p2m_top_index(unsigned long pfn) | 204 | static inline unsigned p2m_top_index(unsigned long pfn) |
198 | { | 205 | { |
199 | BUG_ON(pfn >= MAX_P2M_PFN); | 206 | BUG_ON(pfn >= MAX_P2M_PFN); |
@@ -570,12 +577,99 @@ static bool __init early_alloc_p2m(unsigned long pfn) | |||
570 | } | 577 | } |
571 | return true; | 578 | return true; |
572 | } | 579 | } |
580 | |||
581 | /* | ||
582 | * Skim over the P2M tree looking at pages that are either filled with | ||
583 | * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and | ||
584 | * replace the P2M leaf with a p2m_missing or p2m_identity. | ||
585 | * Stick the old page in the new P2M tree location. | ||
586 | */ | ||
587 | bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn) | ||
588 | { | ||
589 | unsigned topidx; | ||
590 | unsigned mididx; | ||
591 | unsigned ident_pfns; | ||
592 | unsigned inv_pfns; | ||
593 | unsigned long *p2m; | ||
594 | unsigned long *mid_mfn_p; | ||
595 | unsigned idx; | ||
596 | unsigned long pfn; | ||
597 | |||
598 | /* We only look when this entails a P2M middle layer */ | ||
599 | if (p2m_index(set_pfn)) | ||
600 | return false; | ||
601 | |||
602 | for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) { | ||
603 | topidx = p2m_top_index(pfn); | ||
604 | |||
605 | if (!p2m_top[topidx]) | ||
606 | continue; | ||
607 | |||
608 | if (p2m_top[topidx] == p2m_mid_missing) | ||
609 | continue; | ||
610 | |||
611 | mididx = p2m_mid_index(pfn); | ||
612 | p2m = p2m_top[topidx][mididx]; | ||
613 | if (!p2m) | ||
614 | continue; | ||
615 | |||
616 | if ((p2m == p2m_missing) || (p2m == p2m_identity)) | ||
617 | continue; | ||
618 | |||
619 | if ((unsigned long)p2m == INVALID_P2M_ENTRY) | ||
620 | continue; | ||
621 | |||
622 | ident_pfns = 0; | ||
623 | inv_pfns = 0; | ||
624 | for (idx = 0; idx < P2M_PER_PAGE; idx++) { | ||
625 | /* IDENTITY_PFNs are 1:1 */ | ||
626 | if (p2m[idx] == IDENTITY_FRAME(pfn + idx)) | ||
627 | ident_pfns++; | ||
628 | else if (p2m[idx] == INVALID_P2M_ENTRY) | ||
629 | inv_pfns++; | ||
630 | else | ||
631 | break; | ||
632 | } | ||
633 | if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE)) | ||
634 | goto found; | ||
635 | } | ||
636 | return false; | ||
637 | found: | ||
638 | /* Found one, replace old with p2m_identity or p2m_missing */ | ||
639 | p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); | ||
640 | /* And the other for save/restore.. */ | ||
641 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
642 | /* NOTE: Even if it is a p2m_identity it should still be point to | ||
643 | * a page filled with INVALID_P2M_ENTRY entries. */ | ||
644 | mid_mfn_p[mididx] = virt_to_mfn(p2m_missing); | ||
645 | |||
646 | /* Reset where we want to stick the old page in. */ | ||
647 | topidx = p2m_top_index(set_pfn); | ||
648 | mididx = p2m_mid_index(set_pfn); | ||
649 | |||
650 | /* This shouldn't happen */ | ||
651 | if (WARN_ON(p2m_top[topidx] == p2m_mid_missing)) | ||
652 | early_alloc_p2m(set_pfn); | ||
653 | |||
654 | if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing)) | ||
655 | return false; | ||
656 | |||
657 | p2m_init(p2m); | ||
658 | p2m_top[topidx][mididx] = p2m; | ||
659 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
660 | mid_mfn_p[mididx] = virt_to_mfn(p2m); | ||
661 | |||
662 | return true; | ||
663 | } | ||
573 | bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) | 664 | bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
574 | { | 665 | { |
575 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | 666 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { |
576 | if (!early_alloc_p2m(pfn)) | 667 | if (!early_alloc_p2m(pfn)) |
577 | return false; | 668 | return false; |
578 | 669 | ||
670 | if (early_can_reuse_p2m_middle(pfn, mfn)) | ||
671 | return __set_phys_to_machine(pfn, mfn); | ||
672 | |||
579 | if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/)) | 673 | if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/)) |
580 | return false; | 674 | return false; |
581 | 675 | ||
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index a4790bf22c59..d11ca11d14fc 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -78,9 +78,16 @@ static void __init xen_add_extra_mem(u64 start, u64 size) | |||
78 | memblock_reserve(start, size); | 78 | memblock_reserve(start, size); |
79 | 79 | ||
80 | xen_max_p2m_pfn = PFN_DOWN(start + size); | 80 | xen_max_p2m_pfn = PFN_DOWN(start + size); |
81 | for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { | ||
82 | unsigned long mfn = pfn_to_mfn(pfn); | ||
83 | |||
84 | if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) | ||
85 | continue; | ||
86 | WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", | ||
87 | pfn, mfn); | ||
81 | 88 | ||
82 | for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++) | ||
83 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 89 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
90 | } | ||
84 | } | 91 | } |
85 | 92 | ||
86 | static unsigned long __init xen_do_chunk(unsigned long start, | 93 | static unsigned long __init xen_do_chunk(unsigned long start, |
@@ -157,25 +164,24 @@ static unsigned long __init xen_populate_chunk( | |||
157 | unsigned long dest_pfn; | 164 | unsigned long dest_pfn; |
158 | 165 | ||
159 | for (i = 0, entry = list; i < map_size; i++, entry++) { | 166 | for (i = 0, entry = list; i < map_size; i++, entry++) { |
160 | unsigned long credits = credits_left; | ||
161 | unsigned long s_pfn; | 167 | unsigned long s_pfn; |
162 | unsigned long e_pfn; | 168 | unsigned long e_pfn; |
163 | unsigned long pfns; | 169 | unsigned long pfns; |
164 | long capacity; | 170 | long capacity; |
165 | 171 | ||
166 | if (credits <= 0) | 172 | if (credits_left <= 0) |
167 | break; | 173 | break; |
168 | 174 | ||
169 | if (entry->type != E820_RAM) | 175 | if (entry->type != E820_RAM) |
170 | continue; | 176 | continue; |
171 | 177 | ||
172 | e_pfn = PFN_UP(entry->addr + entry->size); | 178 | e_pfn = PFN_DOWN(entry->addr + entry->size); |
173 | 179 | ||
174 | /* We only care about E820 after the xen_start_info->nr_pages */ | 180 | /* We only care about E820 after the xen_start_info->nr_pages */ |
175 | if (e_pfn <= max_pfn) | 181 | if (e_pfn <= max_pfn) |
176 | continue; | 182 | continue; |
177 | 183 | ||
178 | s_pfn = PFN_DOWN(entry->addr); | 184 | s_pfn = PFN_UP(entry->addr); |
179 | /* If the E820 falls within the nr_pages, we want to start | 185 | /* If the E820 falls within the nr_pages, we want to start |
180 | * at the nr_pages PFN. | 186 | * at the nr_pages PFN. |
181 | * If that would mean going past the E820 entry, skip it | 187 | * If that would mean going past the E820 entry, skip it |
@@ -184,23 +190,19 @@ static unsigned long __init xen_populate_chunk( | |||
184 | capacity = e_pfn - max_pfn; | 190 | capacity = e_pfn - max_pfn; |
185 | dest_pfn = max_pfn; | 191 | dest_pfn = max_pfn; |
186 | } else { | 192 | } else { |
187 | /* last_pfn MUST be within E820_RAM regions */ | ||
188 | if (*last_pfn && e_pfn >= *last_pfn) | ||
189 | s_pfn = *last_pfn; | ||
190 | capacity = e_pfn - s_pfn; | 193 | capacity = e_pfn - s_pfn; |
191 | dest_pfn = s_pfn; | 194 | dest_pfn = s_pfn; |
192 | } | 195 | } |
193 | /* If we had filled this E820_RAM entry, go to the next one. */ | ||
194 | if (capacity <= 0) | ||
195 | continue; | ||
196 | 196 | ||
197 | if (credits > capacity) | 197 | if (credits_left < capacity) |
198 | credits = capacity; | 198 | capacity = credits_left; |
199 | 199 | ||
200 | pfns = xen_do_chunk(dest_pfn, dest_pfn + credits, false); | 200 | pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false); |
201 | done += pfns; | 201 | done += pfns; |
202 | credits_left -= pfns; | ||
203 | *last_pfn = (dest_pfn + pfns); | 202 | *last_pfn = (dest_pfn + pfns); |
203 | if (pfns < capacity) | ||
204 | break; | ||
205 | credits_left -= pfns; | ||
204 | } | 206 | } |
205 | return done; | 207 | return done; |
206 | } | 208 | } |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index afb250d22a6b..f58dca7a6e52 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -80,9 +80,7 @@ static void __cpuinit cpu_bringup(void) | |||
80 | 80 | ||
81 | notify_cpu_starting(cpu); | 81 | notify_cpu_starting(cpu); |
82 | 82 | ||
83 | ipi_call_lock(); | ||
84 | set_cpu_online(cpu, true); | 83 | set_cpu_online(cpu, true); |
85 | ipi_call_unlock(); | ||
86 | 84 | ||
87 | this_cpu_write(cpu_state, CPU_ONLINE); | 85 | this_cpu_write(cpu_state, CPU_ONLINE); |
88 | 86 | ||