aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen/mmu.c
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2008-07-08 18:07:06 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-16 05:03:59 -0400
commit5deb30d194d28b6bf7dacfb758267a51bf7c5b78 (patch)
treefab33e09b8ea65a6e7144cf8b487dba539fdc15d /arch/x86/xen/mmu.c
parenta8fc1089e49caa5dca346dfacb5c84abf9a22a0c (diff)
xen: rework pgd_walk to deal with 32/64 bit
Rewrite pgd_walk to deal with 64-bit address spaces. There are two notible features of 64-bit workspaces: 1. The physical address is only 48 bits wide, with the upper 16 bits being sign extension; kernel addresses are negative, and userspace is positive. 2. The Xen hypervisor mapping is at the negative-most address, just above the sign-extension hole. 1. means that we can't easily use addresses when traversing the space, since we must deal with sign extension. This rewrite expresses everything in terms of pgd/pud/pmd indices, which means we don't need to worry about the exact configuration of the virtual memory space. This approach works equally well in 32-bit. To deal with 2, assume the hole is between the uppermost userspace address and PAGE_OFFSET. For 64-bit this skips the Xen mapping hole. For 32-bit, the hole is zero-sized. In all cases, the uppermost kernel address is FIXADDR_TOP. A side-effect of this patch is that the upper boundary is actually handled properly, exposing a long-standing bug in 32-bit, which failed to pin kernel pmd page. The kernel pmd is not shared, and so must be explicitly pinned, even though the kernel ptes are shared and don't need pinning. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: Stephen Tweedie <sct@redhat.com> Cc: Eduardo Habkost <ehabkost@redhat.com> Cc: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r--arch/x86/xen/mmu.c115
1 files changed, 75 insertions, 40 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index eb31ed291b93..046c1f23dd6e 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -44,6 +44,7 @@
44 44
45#include <asm/pgtable.h> 45#include <asm/pgtable.h>
46#include <asm/tlbflush.h> 46#include <asm/tlbflush.h>
47#include <asm/fixmap.h>
47#include <asm/mmu_context.h> 48#include <asm/mmu_context.h>
48#include <asm/paravirt.h> 49#include <asm/paravirt.h>
49#include <asm/linkage.h> 50#include <asm/linkage.h>
@@ -491,77 +492,103 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
491#endif /* PAGETABLE_LEVELS == 4 */ 492#endif /* PAGETABLE_LEVELS == 4 */
492 493
493/* 494/*
494 (Yet another) pagetable walker. This one is intended for pinning a 495 * (Yet another) pagetable walker. This one is intended for pinning a
495 pagetable. This means that it walks a pagetable and calls the 496 * pagetable. This means that it walks a pagetable and calls the
496 callback function on each page it finds making up the page table, 497 * callback function on each page it finds making up the page table,
497 at every level. It walks the entire pagetable, but it only bothers 498 * at every level. It walks the entire pagetable, but it only bothers
498 pinning pte pages which are below pte_limit. In the normal case 499 * pinning pte pages which are below limit. In the normal case this
499 this will be TASK_SIZE, but at boot we need to pin up to 500 * will be STACK_TOP_MAX, but at boot we need to pin up to
500 FIXADDR_TOP. But the important bit is that we don't pin beyond 501 * FIXADDR_TOP.
501 there, because then we start getting into Xen's ptes. 502 *
502*/ 503 * For 32-bit the important bit is that we don't pin beyond there,
503static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level), 504 * because then we start getting into Xen's ptes.
505 *
506 * For 64-bit, we must skip the Xen hole in the middle of the address
507 * space, just after the big x86-64 virtual hole.
508 */
509static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
504 unsigned long limit) 510 unsigned long limit)
505{ 511{
506 pgd_t *pgd = pgd_base;
507 int flush = 0; 512 int flush = 0;
508 unsigned long addr = 0; 513 unsigned hole_low, hole_high;
509 unsigned long pgd_next; 514 unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
515 unsigned pgdidx, pudidx, pmdidx;
510 516
511 BUG_ON(limit > FIXADDR_TOP); 517 /* The limit is the last byte to be touched */
518 limit--;
519 BUG_ON(limit >= FIXADDR_TOP);
512 520
513 if (xen_feature(XENFEAT_auto_translated_physmap)) 521 if (xen_feature(XENFEAT_auto_translated_physmap))
514 return 0; 522 return 0;
515 523
516 for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) { 524 /*
525 * 64-bit has a great big hole in the middle of the address
526 * space, which contains the Xen mappings. On 32-bit these
527 * will end up making a zero-sized hole and so is a no-op.
528 */
529 hole_low = pgd_index(STACK_TOP_MAX + PGDIR_SIZE - 1);
530 hole_high = pgd_index(PAGE_OFFSET);
531
532 pgdidx_limit = pgd_index(limit);
533#if PTRS_PER_PUD > 1
534 pudidx_limit = pud_index(limit);
535#else
536 pudidx_limit = 0;
537#endif
538#if PTRS_PER_PMD > 1
539 pmdidx_limit = pmd_index(limit);
540#else
541 pmdidx_limit = 0;
542#endif
543
544 flush |= (*func)(virt_to_page(pgd), PT_PGD);
545
546 for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
517 pud_t *pud; 547 pud_t *pud;
518 unsigned long pud_limit, pud_next;
519 548
520 pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP); 549 if (pgdidx >= hole_low && pgdidx < hole_high)
550 continue;
521 551
522 if (!pgd_val(*pgd)) 552 if (!pgd_val(pgd[pgdidx]))
523 continue; 553 continue;
524 554
525 pud = pud_offset(pgd, 0); 555 pud = pud_offset(&pgd[pgdidx], 0);
526 556
527 if (PTRS_PER_PUD > 1) /* not folded */ 557 if (PTRS_PER_PUD > 1) /* not folded */
528 flush |= (*func)(virt_to_page(pud), PT_PUD); 558 flush |= (*func)(virt_to_page(pud), PT_PUD);
529 559
530 for (; addr != pud_limit; pud++, addr = pud_next) { 560 for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
531 pmd_t *pmd; 561 pmd_t *pmd;
532 unsigned long pmd_limit;
533 562
534 pud_next = pud_addr_end(addr, pud_limit); 563 if (pgdidx == pgdidx_limit &&
535 564 pudidx > pudidx_limit)
536 if (pud_next < limit) 565 goto out;
537 pmd_limit = pud_next;
538 else
539 pmd_limit = limit;
540 566
541 if (pud_none(*pud)) 567 if (pud_none(pud[pudidx]))
542 continue; 568 continue;
543 569
544 pmd = pmd_offset(pud, 0); 570 pmd = pmd_offset(&pud[pudidx], 0);
545 571
546 if (PTRS_PER_PMD > 1) /* not folded */ 572 if (PTRS_PER_PMD > 1) /* not folded */
547 flush |= (*func)(virt_to_page(pmd), PT_PMD); 573 flush |= (*func)(virt_to_page(pmd), PT_PMD);
548 574
549 for (; addr != pmd_limit; pmd++) { 575 for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
550 addr += (PAGE_SIZE * PTRS_PER_PTE); 576 struct page *pte;
551 if ((pmd_limit-1) < (addr-1)) { 577
552 addr = pmd_limit; 578 if (pgdidx == pgdidx_limit &&
553 break; 579 pudidx == pudidx_limit &&
554 } 580 pmdidx > pmdidx_limit)
581 goto out;
555 582
556 if (pmd_none(*pmd)) 583 if (pmd_none(pmd[pmdidx]))
557 continue; 584 continue;
558 585
559 flush |= (*func)(pmd_page(*pmd), PT_PTE); 586 pte = pmd_page(pmd[pmdidx]);
587 flush |= (*func)(pte, PT_PTE);
560 } 588 }
561 } 589 }
562 } 590 }
563 591out:
564 flush |= (*func)(virt_to_page(pgd_base), PT_PGD);
565 592
566 return flush; 593 return flush;
567} 594}
@@ -650,6 +677,11 @@ void xen_pgd_pin(pgd_t *pgd)
650 xen_mc_batch(); 677 xen_mc_batch();
651 } 678 }
652 679
680#ifdef CONFIG_X86_PAE
681 /* Need to make sure unshared kernel PMD is pinnable */
682 pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
683#endif
684
653 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); 685 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
654 xen_mc_issue(0); 686 xen_mc_issue(0);
655} 687}
@@ -731,6 +763,10 @@ static void xen_pgd_unpin(pgd_t *pgd)
731 763
732 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); 764 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
733 765
766#ifdef CONFIG_X86_PAE
767 /* Need to make sure unshared kernel PMD is unpinned */
768 pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
769#endif
734 pgd_walk(pgd, unpin_page, TASK_SIZE); 770 pgd_walk(pgd, unpin_page, TASK_SIZE);
735 771
736 xen_mc_issue(0); 772 xen_mc_issue(0);
@@ -750,7 +786,6 @@ void xen_mm_unpin_all(void)
750 list_for_each_entry(page, &pgd_list, lru) { 786 list_for_each_entry(page, &pgd_list, lru) {
751 if (PageSavePinned(page)) { 787 if (PageSavePinned(page)) {
752 BUG_ON(!PagePinned(page)); 788 BUG_ON(!PagePinned(page));
753 printk("unpinning pinned %p\n", page_address(page));
754 xen_pgd_unpin((pgd_t *)page_address(page)); 789 xen_pgd_unpin((pgd_t *)page_address(page));
755 ClearPageSavePinned(page); 790 ClearPageSavePinned(page);
756 } 791 }