aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/xen/mmu.c115
1 files changed, 75 insertions, 40 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index eb31ed291b93..046c1f23dd6e 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -44,6 +44,7 @@
44 44
45#include <asm/pgtable.h> 45#include <asm/pgtable.h>
46#include <asm/tlbflush.h> 46#include <asm/tlbflush.h>
47#include <asm/fixmap.h>
47#include <asm/mmu_context.h> 48#include <asm/mmu_context.h>
48#include <asm/paravirt.h> 49#include <asm/paravirt.h>
49#include <asm/linkage.h> 50#include <asm/linkage.h>
@@ -491,77 +492,103 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
491#endif /* PAGETABLE_LEVELS == 4 */ 492#endif /* PAGETABLE_LEVELS == 4 */
492 493
493/* 494/*
494 (Yet another) pagetable walker. This one is intended for pinning a 495 * (Yet another) pagetable walker. This one is intended for pinning a
495 pagetable. This means that it walks a pagetable and calls the 496 * pagetable. This means that it walks a pagetable and calls the
496 callback function on each page it finds making up the page table, 497 * callback function on each page it finds making up the page table,
497 at every level. It walks the entire pagetable, but it only bothers 498 * at every level. It walks the entire pagetable, but it only bothers
498 pinning pte pages which are below pte_limit. In the normal case 499 * pinning pte pages which are below limit. In the normal case this
499 this will be TASK_SIZE, but at boot we need to pin up to 500 * will be STACK_TOP_MAX, but at boot we need to pin up to
500 FIXADDR_TOP. But the important bit is that we don't pin beyond 501 * FIXADDR_TOP.
501 there, because then we start getting into Xen's ptes. 502 *
502*/ 503 * For 32-bit the important bit is that we don't pin beyond there,
503static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level), 504 * because then we start getting into Xen's ptes.
505 *
506 * For 64-bit, we must skip the Xen hole in the middle of the address
507 * space, just after the big x86-64 virtual hole.
508 */
509static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
504 unsigned long limit) 510 unsigned long limit)
505{ 511{
506 pgd_t *pgd = pgd_base;
507 int flush = 0; 512 int flush = 0;
508 unsigned long addr = 0; 513 unsigned hole_low, hole_high;
509 unsigned long pgd_next; 514 unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
515 unsigned pgdidx, pudidx, pmdidx;
510 516
511 BUG_ON(limit > FIXADDR_TOP); 517 /* The limit is the last byte to be touched */
518 limit--;
519 BUG_ON(limit >= FIXADDR_TOP);
512 520
513 if (xen_feature(XENFEAT_auto_translated_physmap)) 521 if (xen_feature(XENFEAT_auto_translated_physmap))
514 return 0; 522 return 0;
515 523
516 for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) { 524 /*
525 * 64-bit has a great big hole in the middle of the address
526 * space, which contains the Xen mappings. On 32-bit these
527 * will end up making a zero-sized hole and so is a no-op.
528 */
529 hole_low = pgd_index(STACK_TOP_MAX + PGDIR_SIZE - 1);
530 hole_high = pgd_index(PAGE_OFFSET);
531
532 pgdidx_limit = pgd_index(limit);
533#if PTRS_PER_PUD > 1
534 pudidx_limit = pud_index(limit);
535#else
536 pudidx_limit = 0;
537#endif
538#if PTRS_PER_PMD > 1
539 pmdidx_limit = pmd_index(limit);
540#else
541 pmdidx_limit = 0;
542#endif
543
544 flush |= (*func)(virt_to_page(pgd), PT_PGD);
545
546 for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
517 pud_t *pud; 547 pud_t *pud;
518 unsigned long pud_limit, pud_next;
519 548
520 pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP); 549 if (pgdidx >= hole_low && pgdidx < hole_high)
550 continue;
521 551
522 if (!pgd_val(*pgd)) 552 if (!pgd_val(pgd[pgdidx]))
523 continue; 553 continue;
524 554
525 pud = pud_offset(pgd, 0); 555 pud = pud_offset(&pgd[pgdidx], 0);
526 556
527 if (PTRS_PER_PUD > 1) /* not folded */ 557 if (PTRS_PER_PUD > 1) /* not folded */
528 flush |= (*func)(virt_to_page(pud), PT_PUD); 558 flush |= (*func)(virt_to_page(pud), PT_PUD);
529 559
530 for (; addr != pud_limit; pud++, addr = pud_next) { 560 for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
531 pmd_t *pmd; 561 pmd_t *pmd;
532 unsigned long pmd_limit;
533 562
534 pud_next = pud_addr_end(addr, pud_limit); 563 if (pgdidx == pgdidx_limit &&
535 564 pudidx > pudidx_limit)
536 if (pud_next < limit) 565 goto out;
537 pmd_limit = pud_next;
538 else
539 pmd_limit = limit;
540 566
541 if (pud_none(*pud)) 567 if (pud_none(pud[pudidx]))
542 continue; 568 continue;
543 569
544 pmd = pmd_offset(pud, 0); 570 pmd = pmd_offset(&pud[pudidx], 0);
545 571
546 if (PTRS_PER_PMD > 1) /* not folded */ 572 if (PTRS_PER_PMD > 1) /* not folded */
547 flush |= (*func)(virt_to_page(pmd), PT_PMD); 573 flush |= (*func)(virt_to_page(pmd), PT_PMD);
548 574
549 for (; addr != pmd_limit; pmd++) { 575 for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
550 addr += (PAGE_SIZE * PTRS_PER_PTE); 576 struct page *pte;
551 if ((pmd_limit-1) < (addr-1)) { 577
552 addr = pmd_limit; 578 if (pgdidx == pgdidx_limit &&
553 break; 579 pudidx == pudidx_limit &&
554 } 580 pmdidx > pmdidx_limit)
581 goto out;
555 582
556 if (pmd_none(*pmd)) 583 if (pmd_none(pmd[pmdidx]))
557 continue; 584 continue;
558 585
559 flush |= (*func)(pmd_page(*pmd), PT_PTE); 586 pte = pmd_page(pmd[pmdidx]);
587 flush |= (*func)(pte, PT_PTE);
560 } 588 }
561 } 589 }
562 } 590 }
563 591out:
564 flush |= (*func)(virt_to_page(pgd_base), PT_PGD);
565 592
566 return flush; 593 return flush;
567} 594}
@@ -650,6 +677,11 @@ void xen_pgd_pin(pgd_t *pgd)
650 xen_mc_batch(); 677 xen_mc_batch();
651 } 678 }
652 679
680#ifdef CONFIG_X86_PAE
681 /* Need to make sure unshared kernel PMD is pinnable */
682 pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
683#endif
684
653 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); 685 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
654 xen_mc_issue(0); 686 xen_mc_issue(0);
655} 687}
@@ -731,6 +763,10 @@ static void xen_pgd_unpin(pgd_t *pgd)
731 763
732 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); 764 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
733 765
766#ifdef CONFIG_X86_PAE
767 /* Need to make sure unshared kernel PMD is unpinned */
768 pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
769#endif
734 pgd_walk(pgd, unpin_page, TASK_SIZE); 770 pgd_walk(pgd, unpin_page, TASK_SIZE);
735 771
736 xen_mc_issue(0); 772 xen_mc_issue(0);
@@ -750,7 +786,6 @@ void xen_mm_unpin_all(void)
750 list_for_each_entry(page, &pgd_list, lru) { 786 list_for_each_entry(page, &pgd_list, lru) {
751 if (PageSavePinned(page)) { 787 if (PageSavePinned(page)) {
752 BUG_ON(!PagePinned(page)); 788 BUG_ON(!PagePinned(page));
753 printk("unpinning pinned %p\n", page_address(page));
754 xen_pgd_unpin((pgd_t *)page_address(page)); 789 xen_pgd_unpin((pgd_t *)page_address(page));
755 ClearPageSavePinned(page); 790 ClearPageSavePinned(page);
756 } 791 }