aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-07-19 13:52:29 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-08-23 11:52:13 -0400
commit357a3cfb147ee8e97c6f9cdc51e9a33aa56f7d99 (patch)
tree9eb96fb1775f81380de2c0cbab6798daf6b46155
parent488f046df922af992c1a718eff276529c0510885 (diff)
xen/p2m: Add logic to revector a P2M tree to use __va leafs.
During bootup Xen supplies us with a P2M array. It sticks it right after the ramdisk, as can be seen with a 128GB PV guest: (certain parts removed for clarity): xc_dom_build_image: called xc_dom_alloc_segment: kernel : 0xffffffff81000000 -> 0xffffffff81e43000 (pfn 0x1000 + 0xe43 pages) xc_dom_pfn_to_ptr: domU mapping: pfn 0x1000+0xe43 at 0x7f097d8bf000 xc_dom_alloc_segment: ramdisk : 0xffffffff81e43000 -> 0xffffffff925c7000 (pfn 0x1e43 + 0x10784 pages) xc_dom_pfn_to_ptr: domU mapping: pfn 0x1e43+0x10784 at 0x7f0952dd2000 xc_dom_alloc_segment: phys2mach : 0xffffffff925c7000 -> 0xffffffffa25c7000 (pfn 0x125c7 + 0x10000 pages) xc_dom_pfn_to_ptr: domU mapping: pfn 0x125c7+0x10000 at 0x7f0942dd2000 xc_dom_alloc_page : start info : 0xffffffffa25c7000 (pfn 0x225c7) xc_dom_alloc_page : xenstore : 0xffffffffa25c8000 (pfn 0x225c8) xc_dom_alloc_page : console : 0xffffffffa25c9000 (pfn 0x225c9) nr_page_tables: 0x0000ffffffffffff/48: 0xffff000000000000 -> 0xffffffffffffffff, 1 table(s) nr_page_tables: 0x0000007fffffffff/39: 0xffffff8000000000 -> 0xffffffffffffffff, 1 table(s) nr_page_tables: 0x000000003fffffff/30: 0xffffffff80000000 -> 0xffffffffbfffffff, 1 table(s) nr_page_tables: 0x00000000001fffff/21: 0xffffffff80000000 -> 0xffffffffa27fffff, 276 table(s) xc_dom_alloc_segment: page tables : 0xffffffffa25ca000 -> 0xffffffffa26e1000 (pfn 0x225ca + 0x117 pages) xc_dom_pfn_to_ptr: domU mapping: pfn 0x225ca+0x117 at 0x7f097d7a8000 xc_dom_alloc_page : boot stack : 0xffffffffa26e1000 (pfn 0x226e1) xc_dom_build_image : virt_alloc_end : 0xffffffffa26e2000 xc_dom_build_image : virt_pgtab_end : 0xffffffffa2800000 So the physical memory and virtual (using __START_KERNEL_map addresses) layout looks as so: phys __ka /------------\ /-------------------\ | 0 | empty | 0xffffffff80000000| | .. | | .. | | 16MB | <= kernel starts | 0xffffffff81000000| | .. | | | | 30MB | <= kernel ends => | 0xffffffff81e43000| | .. | & ramdisk starts | .. | | 293MB | <= ramdisk ends=> | 0xffffffff925c7000| | .. | & P2M starts | .. | | .. | | .. | | 549MB | <= P2M ends => | 0xffffffffa25c7000| | .. | start_info | 0xffffffffa25c7000| | .. | xenstore | 0xffffffffa25c8000| | .. | cosole | 0xffffffffa25c9000| | 549MB | <= page tables => | 0xffffffffa25ca000| | .. | | | | 550MB | <= PGT end => | 0xffffffffa26e1000| | .. | boot stack | | \------------/ \-------------------/ As can be seen, the ramdisk, P2M and pagetables are taking a bit of __ka addresses space. Which is a problem since the MODULES_VADDR starts at 0xffffffffa0000000 - and P2M sits right in there! This results during bootup with the inability to load modules, with this error: ------------[ cut here ]------------ WARNING: at /home/konrad/ssd/linux/mm/vmalloc.c:106 vmap_page_range_noflush+0x2d9/0x370() Call Trace: [<ffffffff810719fa>] warn_slowpath_common+0x7a/0xb0 [<ffffffff81030279>] ? __raw_callee_save_xen_pmd_val+0x11/0x1e [<ffffffff81071a45>] warn_slowpath_null+0x15/0x20 [<ffffffff81130b89>] vmap_page_range_noflush+0x2d9/0x370 [<ffffffff81130c4d>] map_vm_area+0x2d/0x50 [<ffffffff811326d0>] __vmalloc_node_range+0x160/0x250 [<ffffffff810c5369>] ? module_alloc_update_bounds+0x19/0x80 [<ffffffff810c6186>] ? load_module+0x66/0x19c0 [<ffffffff8105cadc>] module_alloc+0x5c/0x60 [<ffffffff810c5369>] ? module_alloc_update_bounds+0x19/0x80 [<ffffffff810c5369>] module_alloc_update_bounds+0x19/0x80 [<ffffffff810c70c3>] load_module+0xfa3/0x19c0 [<ffffffff812491f6>] ? security_file_permission+0x86/0x90 [<ffffffff810c7b3a>] sys_init_module+0x5a/0x220 [<ffffffff815ce339>] system_call_fastpath+0x16/0x1b ---[ end trace fd8f7704fdea0291 ]--- vmalloc: allocation failure, allocated 16384 of 20480 bytes modprobe: page allocation failure: order:0, mode:0xd2 Since the __va and __ka are 1:1 up to MODULES_VADDR and cleanup_highmap rids __ka of the ramdisk mapping, what we want to do is similar - get rid of the P2M in the __ka address space. There are two ways of fixing this: 1) All P2M lookups instead of using the __ka address would use the __va address. This means we can safely erase from __ka space the PMD pointers that point to the PFNs for P2M array and be OK. 2). Allocate a new array, copy the existing P2M into it, revector the P2M tree to use that, and return the old P2M to the memory allocate. This has the advantage that it sets the stage for using XEN_ELF_NOTE_INIT_P2M feature. That feature allows us to set the exact virtual address space we want for the P2M - and allows us to boot as initial domain on large machines. So we pick option 2). This patch only lays the groundwork in the P2M code. The patch that modifies the MMU is called "xen/mmu: Copy and revector the P2M tree." Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-rw-r--r--arch/x86/xen/p2m.c70
-rw-r--r--arch/x86/xen/xen-ops.h1
2 files changed, 71 insertions, 0 deletions
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index e4adbfbdfada..996ee2bf7bdb 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -389,7 +389,77 @@ void __init xen_build_dynamic_phys_to_machine(void)
389 389
390 m2p_override_init(); 390 m2p_override_init();
391} 391}
392#ifdef CONFIG_X86_64
393#include <linux/bootmem.h>
394unsigned long __init xen_revector_p2m_tree(void)
395{
396 unsigned long va_start;
397 unsigned long va_end;
398 unsigned long pfn;
399 unsigned long *mfn_list = NULL;
400 unsigned long size;
401
402 va_start = xen_start_info->mfn_list;
403 /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long),
404 * so make sure it is rounded up to that */
405 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
406 va_end = va_start + size;
407
408 /* If we were revectored already, don't do it again. */
409 if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET)
410 return 0;
411
412 mfn_list = alloc_bootmem_align(size, PAGE_SIZE);
413 if (!mfn_list) {
414 pr_warn("Could not allocate space for a new P2M tree!\n");
415 return xen_start_info->mfn_list;
416 }
417 /* Fill it out with INVALID_P2M_ENTRY value */
418 memset(mfn_list, 0xFF, size);
419
420 for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) {
421 unsigned topidx = p2m_top_index(pfn);
422 unsigned mididx;
423 unsigned long *mid_p;
424
425 if (!p2m_top[topidx])
426 continue;
427
428 if (p2m_top[topidx] == p2m_mid_missing)
429 continue;
430
431 mididx = p2m_mid_index(pfn);
432 mid_p = p2m_top[topidx][mididx];
433 if (!mid_p)
434 continue;
435 if ((mid_p == p2m_missing) || (mid_p == p2m_identity))
436 continue;
437
438 if ((unsigned long)mid_p == INVALID_P2M_ENTRY)
439 continue;
440
441 /* The old va. Rebase it on mfn_list */
442 if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) {
443 unsigned long *new;
444
445 new = &mfn_list[pfn];
446
447 copy_page(new, mid_p);
448 p2m_top[topidx][mididx] = &mfn_list[pfn];
449 p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn]);
392 450
451 }
452 /* This should be the leafs allocated for identity from _brk. */
453 }
454 return (unsigned long)mfn_list;
455
456}
457#else
458unsigned long __init xen_revector_p2m_tree(void)
459{
460 return 0;
461}
462#endif
393unsigned long get_phys_to_machine(unsigned long pfn) 463unsigned long get_phys_to_machine(unsigned long pfn)
394{ 464{
395 unsigned topidx, mididx, idx; 465 unsigned topidx, mididx, idx;
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2230f57a6ebe..bb5a8105ea86 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -45,6 +45,7 @@ void xen_hvm_init_shared_info(void);
45void xen_unplug_emulated_devices(void); 45void xen_unplug_emulated_devices(void);
46 46
47void __init xen_build_dynamic_phys_to_machine(void); 47void __init xen_build_dynamic_phys_to_machine(void);
48unsigned long __init xen_revector_p2m_tree(void);
48 49
49void xen_init_irq_ops(void); 50void xen_init_irq_ops(void);
50void xen_setup_timer(int cpu); 51void xen_setup_timer(int cpu);