diff options
Diffstat (limited to 'arch/x86/xen/mmu.c')
| -rw-r--r-- | arch/x86/xen/mmu.c | 316 |
1 files changed, 227 insertions, 89 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ff0aa74afaa..a44d56e38bd 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
| @@ -44,8 +44,10 @@ | |||
| 44 | 44 | ||
| 45 | #include <asm/pgtable.h> | 45 | #include <asm/pgtable.h> |
| 46 | #include <asm/tlbflush.h> | 46 | #include <asm/tlbflush.h> |
| 47 | #include <asm/fixmap.h> | ||
| 47 | #include <asm/mmu_context.h> | 48 | #include <asm/mmu_context.h> |
| 48 | #include <asm/paravirt.h> | 49 | #include <asm/paravirt.h> |
| 50 | #include <asm/linkage.h> | ||
| 49 | 51 | ||
| 50 | #include <asm/xen/hypercall.h> | 52 | #include <asm/xen/hypercall.h> |
| 51 | #include <asm/xen/hypervisor.h> | 53 | #include <asm/xen/hypervisor.h> |
| @@ -56,26 +58,29 @@ | |||
| 56 | #include "multicalls.h" | 58 | #include "multicalls.h" |
| 57 | #include "mmu.h" | 59 | #include "mmu.h" |
| 58 | 60 | ||
| 61 | /* | ||
| 62 | * Just beyond the highest usermode address. STACK_TOP_MAX has a | ||
| 63 | * redzone above it, so round it up to a PGD boundary. | ||
| 64 | */ | ||
| 65 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) | ||
| 66 | |||
| 67 | |||
| 59 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | 68 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) |
| 60 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) | 69 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) |
| 61 | 70 | ||
| 62 | /* Placeholder for holes in the address space */ | 71 | /* Placeholder for holes in the address space */ |
| 63 | static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] | 72 | static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data = |
| 64 | __attribute__((section(".data.page_aligned"))) = | ||
| 65 | { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; | 73 | { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; |
| 66 | 74 | ||
| 67 | /* Array of pointers to pages containing p2m entries */ | 75 | /* Array of pointers to pages containing p2m entries */ |
| 68 | static unsigned long *p2m_top[TOP_ENTRIES] | 76 | static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data = |
| 69 | __attribute__((section(".data.page_aligned"))) = | ||
| 70 | { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; | 77 | { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; |
| 71 | 78 | ||
| 72 | /* Arrays of p2m arrays expressed in mfns used for save/restore */ | 79 | /* Arrays of p2m arrays expressed in mfns used for save/restore */ |
| 73 | static unsigned long p2m_top_mfn[TOP_ENTRIES] | 80 | static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss; |
| 74 | __attribute__((section(".bss.page_aligned"))); | ||
| 75 | 81 | ||
| 76 | static unsigned long p2m_top_mfn_list[ | 82 | static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE] |
| 77 | PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)] | 83 | __page_aligned_bss; |
| 78 | __attribute__((section(".bss.page_aligned"))); | ||
| 79 | 84 | ||
| 80 | static inline unsigned p2m_top_index(unsigned long pfn) | 85 | static inline unsigned p2m_top_index(unsigned long pfn) |
| 81 | { | 86 | { |
| @@ -181,15 +186,16 @@ void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |||
| 181 | p2m_top[topidx][idx] = mfn; | 186 | p2m_top[topidx][idx] = mfn; |
| 182 | } | 187 | } |
| 183 | 188 | ||
| 184 | xmaddr_t arbitrary_virt_to_machine(unsigned long address) | 189 | xmaddr_t arbitrary_virt_to_machine(void *vaddr) |
| 185 | { | 190 | { |
| 191 | unsigned long address = (unsigned long)vaddr; | ||
| 186 | unsigned int level; | 192 | unsigned int level; |
| 187 | pte_t *pte = lookup_address(address, &level); | 193 | pte_t *pte = lookup_address(address, &level); |
| 188 | unsigned offset = address & ~PAGE_MASK; | 194 | unsigned offset = address & ~PAGE_MASK; |
| 189 | 195 | ||
| 190 | BUG_ON(pte == NULL); | 196 | BUG_ON(pte == NULL); |
| 191 | 197 | ||
| 192 | return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset); | 198 | return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); |
| 193 | } | 199 | } |
| 194 | 200 | ||
| 195 | void make_lowmem_page_readonly(void *vaddr) | 201 | void make_lowmem_page_readonly(void *vaddr) |
| @@ -256,7 +262,8 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | |||
| 256 | 262 | ||
| 257 | xen_mc_batch(); | 263 | xen_mc_batch(); |
| 258 | 264 | ||
| 259 | u.ptr = virt_to_machine(ptr).maddr; | 265 | /* ptr may be ioremapped for 64-bit pagetable setup */ |
| 266 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; | ||
| 260 | u.val = pmd_val_ma(val); | 267 | u.val = pmd_val_ma(val); |
| 261 | extend_mmu_update(&u); | 268 | extend_mmu_update(&u); |
| 262 | 269 | ||
| @@ -283,35 +290,7 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val) | |||
| 283 | */ | 290 | */ |
| 284 | void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) | 291 | void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) |
| 285 | { | 292 | { |
| 286 | pgd_t *pgd; | 293 | set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); |
| 287 | pud_t *pud; | ||
| 288 | pmd_t *pmd; | ||
| 289 | pte_t *pte; | ||
| 290 | |||
| 291 | pgd = swapper_pg_dir + pgd_index(vaddr); | ||
| 292 | if (pgd_none(*pgd)) { | ||
| 293 | BUG(); | ||
| 294 | return; | ||
| 295 | } | ||
| 296 | pud = pud_offset(pgd, vaddr); | ||
| 297 | if (pud_none(*pud)) { | ||
| 298 | BUG(); | ||
| 299 | return; | ||
| 300 | } | ||
| 301 | pmd = pmd_offset(pud, vaddr); | ||
| 302 | if (pmd_none(*pmd)) { | ||
| 303 | BUG(); | ||
| 304 | return; | ||
| 305 | } | ||
| 306 | pte = pte_offset_kernel(pmd, vaddr); | ||
| 307 | /* <mfn,flags> stored as-is, to permit clearing entries */ | ||
| 308 | xen_set_pte(pte, mfn_pte(mfn, flags)); | ||
| 309 | |||
| 310 | /* | ||
| 311 | * It's enough to flush this one mapping. | ||
| 312 | * (PGE mappings get flushed as well) | ||
| 313 | */ | ||
| 314 | __flush_tlb_one(vaddr); | ||
| 315 | } | 294 | } |
| 316 | 295 | ||
| 317 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 296 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
| @@ -418,7 +397,8 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val) | |||
| 418 | 397 | ||
| 419 | xen_mc_batch(); | 398 | xen_mc_batch(); |
| 420 | 399 | ||
| 421 | u.ptr = virt_to_machine(ptr).maddr; | 400 | /* ptr may be ioremapped for 64-bit pagetable setup */ |
| 401 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; | ||
| 422 | u.val = pud_val_ma(val); | 402 | u.val = pud_val_ma(val); |
| 423 | extend_mmu_update(&u); | 403 | extend_mmu_update(&u); |
| 424 | 404 | ||
| @@ -441,14 +421,19 @@ void xen_set_pud(pud_t *ptr, pud_t val) | |||
| 441 | 421 | ||
| 442 | void xen_set_pte(pte_t *ptep, pte_t pte) | 422 | void xen_set_pte(pte_t *ptep, pte_t pte) |
| 443 | { | 423 | { |
| 424 | #ifdef CONFIG_X86_PAE | ||
| 444 | ptep->pte_high = pte.pte_high; | 425 | ptep->pte_high = pte.pte_high; |
| 445 | smp_wmb(); | 426 | smp_wmb(); |
| 446 | ptep->pte_low = pte.pte_low; | 427 | ptep->pte_low = pte.pte_low; |
| 428 | #else | ||
| 429 | *ptep = pte; | ||
| 430 | #endif | ||
| 447 | } | 431 | } |
| 448 | 432 | ||
| 433 | #ifdef CONFIG_X86_PAE | ||
| 449 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte) | 434 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte) |
| 450 | { | 435 | { |
| 451 | set_64bit((u64 *)ptep, pte_val_ma(pte)); | 436 | set_64bit((u64 *)ptep, native_pte_val(pte)); |
| 452 | } | 437 | } |
| 453 | 438 | ||
| 454 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 439 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
| @@ -462,6 +447,7 @@ void xen_pmd_clear(pmd_t *pmdp) | |||
| 462 | { | 447 | { |
| 463 | set_pmd(pmdp, __pmd(0)); | 448 | set_pmd(pmdp, __pmd(0)); |
| 464 | } | 449 | } |
| 450 | #endif /* CONFIG_X86_PAE */ | ||
| 465 | 451 | ||
| 466 | pmd_t xen_make_pmd(pmdval_t pmd) | 452 | pmd_t xen_make_pmd(pmdval_t pmd) |
| 467 | { | 453 | { |
| @@ -469,78 +455,189 @@ pmd_t xen_make_pmd(pmdval_t pmd) | |||
| 469 | return native_make_pmd(pmd); | 455 | return native_make_pmd(pmd); |
| 470 | } | 456 | } |
| 471 | 457 | ||
| 458 | #if PAGETABLE_LEVELS == 4 | ||
| 459 | pudval_t xen_pud_val(pud_t pud) | ||
| 460 | { | ||
| 461 | return pte_mfn_to_pfn(pud.pud); | ||
| 462 | } | ||
| 463 | |||
| 464 | pud_t xen_make_pud(pudval_t pud) | ||
| 465 | { | ||
| 466 | pud = pte_pfn_to_mfn(pud); | ||
| 467 | |||
| 468 | return native_make_pud(pud); | ||
| 469 | } | ||
| 470 | |||
| 471 | pgd_t *xen_get_user_pgd(pgd_t *pgd) | ||
| 472 | { | ||
| 473 | pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK); | ||
| 474 | unsigned offset = pgd - pgd_page; | ||
| 475 | pgd_t *user_ptr = NULL; | ||
| 476 | |||
| 477 | if (offset < pgd_index(USER_LIMIT)) { | ||
| 478 | struct page *page = virt_to_page(pgd_page); | ||
| 479 | user_ptr = (pgd_t *)page->private; | ||
| 480 | if (user_ptr) | ||
| 481 | user_ptr += offset; | ||
| 482 | } | ||
| 483 | |||
| 484 | return user_ptr; | ||
| 485 | } | ||
| 486 | |||
| 487 | static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | ||
| 488 | { | ||
| 489 | struct mmu_update u; | ||
| 490 | |||
| 491 | u.ptr = virt_to_machine(ptr).maddr; | ||
| 492 | u.val = pgd_val_ma(val); | ||
| 493 | extend_mmu_update(&u); | ||
| 494 | } | ||
| 495 | |||
| 496 | /* | ||
| 497 | * Raw hypercall-based set_pgd, intended for in early boot before | ||
| 498 | * there's a page structure. This implies: | ||
| 499 | * 1. The only existing pagetable is the kernel's | ||
| 500 | * 2. It is always pinned | ||
| 501 | * 3. It has no user pagetable attached to it | ||
| 502 | */ | ||
| 503 | void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | ||
| 504 | { | ||
| 505 | preempt_disable(); | ||
| 506 | |||
| 507 | xen_mc_batch(); | ||
| 508 | |||
| 509 | __xen_set_pgd_hyper(ptr, val); | ||
| 510 | |||
| 511 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
| 512 | |||
| 513 | preempt_enable(); | ||
| 514 | } | ||
| 515 | |||
| 516 | void xen_set_pgd(pgd_t *ptr, pgd_t val) | ||
| 517 | { | ||
| 518 | pgd_t *user_ptr = xen_get_user_pgd(ptr); | ||
| 519 | |||
| 520 | /* If page is not pinned, we can just update the entry | ||
| 521 | directly */ | ||
| 522 | if (!page_pinned(ptr)) { | ||
| 523 | *ptr = val; | ||
| 524 | if (user_ptr) { | ||
| 525 | WARN_ON(page_pinned(user_ptr)); | ||
| 526 | *user_ptr = val; | ||
| 527 | } | ||
| 528 | return; | ||
| 529 | } | ||
| 530 | |||
| 531 | /* If it's pinned, then we can at least batch the kernel and | ||
| 532 | user updates together. */ | ||
| 533 | xen_mc_batch(); | ||
| 534 | |||
| 535 | __xen_set_pgd_hyper(ptr, val); | ||
| 536 | if (user_ptr) | ||
| 537 | __xen_set_pgd_hyper(user_ptr, val); | ||
| 538 | |||
| 539 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
| 540 | } | ||
| 541 | #endif /* PAGETABLE_LEVELS == 4 */ | ||
| 542 | |||
| 472 | /* | 543 | /* |
| 473 | (Yet another) pagetable walker. This one is intended for pinning a | 544 | * (Yet another) pagetable walker. This one is intended for pinning a |
| 474 | pagetable. This means that it walks a pagetable and calls the | 545 | * pagetable. This means that it walks a pagetable and calls the |
| 475 | callback function on each page it finds making up the page table, | 546 | * callback function on each page it finds making up the page table, |
| 476 | at every level. It walks the entire pagetable, but it only bothers | 547 | * at every level. It walks the entire pagetable, but it only bothers |
| 477 | pinning pte pages which are below pte_limit. In the normal case | 548 | * pinning pte pages which are below limit. In the normal case this |
| 478 | this will be TASK_SIZE, but at boot we need to pin up to | 549 | * will be STACK_TOP_MAX, but at boot we need to pin up to |
| 479 | FIXADDR_TOP. But the important bit is that we don't pin beyond | 550 | * FIXADDR_TOP. |
| 480 | there, because then we start getting into Xen's ptes. | 551 | * |
| 481 | */ | 552 | * For 32-bit the important bit is that we don't pin beyond there, |
| 482 | static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level), | 553 | * because then we start getting into Xen's ptes. |
| 554 | * | ||
| 555 | * For 64-bit, we must skip the Xen hole in the middle of the address | ||
| 556 | * space, just after the big x86-64 virtual hole. | ||
| 557 | */ | ||
| 558 | static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | ||
| 483 | unsigned long limit) | 559 | unsigned long limit) |
| 484 | { | 560 | { |
| 485 | pgd_t *pgd = pgd_base; | ||
| 486 | int flush = 0; | 561 | int flush = 0; |
| 487 | unsigned long addr = 0; | 562 | unsigned hole_low, hole_high; |
| 488 | unsigned long pgd_next; | 563 | unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; |
| 564 | unsigned pgdidx, pudidx, pmdidx; | ||
| 489 | 565 | ||
| 490 | BUG_ON(limit > FIXADDR_TOP); | 566 | /* The limit is the last byte to be touched */ |
| 567 | limit--; | ||
| 568 | BUG_ON(limit >= FIXADDR_TOP); | ||
| 491 | 569 | ||
| 492 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 570 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
| 493 | return 0; | 571 | return 0; |
| 494 | 572 | ||
| 495 | for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) { | 573 | /* |
| 574 | * 64-bit has a great big hole in the middle of the address | ||
| 575 | * space, which contains the Xen mappings. On 32-bit these | ||
| 576 | * will end up making a zero-sized hole and so is a no-op. | ||
| 577 | */ | ||
| 578 | hole_low = pgd_index(USER_LIMIT); | ||
| 579 | hole_high = pgd_index(PAGE_OFFSET); | ||
| 580 | |||
| 581 | pgdidx_limit = pgd_index(limit); | ||
| 582 | #if PTRS_PER_PUD > 1 | ||
| 583 | pudidx_limit = pud_index(limit); | ||
| 584 | #else | ||
| 585 | pudidx_limit = 0; | ||
| 586 | #endif | ||
| 587 | #if PTRS_PER_PMD > 1 | ||
| 588 | pmdidx_limit = pmd_index(limit); | ||
| 589 | #else | ||
| 590 | pmdidx_limit = 0; | ||
| 591 | #endif | ||
| 592 | |||
| 593 | flush |= (*func)(virt_to_page(pgd), PT_PGD); | ||
| 594 | |||
| 595 | for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) { | ||
| 496 | pud_t *pud; | 596 | pud_t *pud; |
| 497 | unsigned long pud_limit, pud_next; | ||
| 498 | 597 | ||
| 499 | pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP); | 598 | if (pgdidx >= hole_low && pgdidx < hole_high) |
| 599 | continue; | ||
| 500 | 600 | ||
| 501 | if (!pgd_val(*pgd)) | 601 | if (!pgd_val(pgd[pgdidx])) |
| 502 | continue; | 602 | continue; |
| 503 | 603 | ||
| 504 | pud = pud_offset(pgd, 0); | 604 | pud = pud_offset(&pgd[pgdidx], 0); |
| 505 | 605 | ||
| 506 | if (PTRS_PER_PUD > 1) /* not folded */ | 606 | if (PTRS_PER_PUD > 1) /* not folded */ |
| 507 | flush |= (*func)(virt_to_page(pud), PT_PUD); | 607 | flush |= (*func)(virt_to_page(pud), PT_PUD); |
| 508 | 608 | ||
| 509 | for (; addr != pud_limit; pud++, addr = pud_next) { | 609 | for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) { |
| 510 | pmd_t *pmd; | 610 | pmd_t *pmd; |
| 511 | unsigned long pmd_limit; | ||
| 512 | 611 | ||
| 513 | pud_next = pud_addr_end(addr, pud_limit); | 612 | if (pgdidx == pgdidx_limit && |
| 514 | 613 | pudidx > pudidx_limit) | |
| 515 | if (pud_next < limit) | 614 | goto out; |
| 516 | pmd_limit = pud_next; | ||
| 517 | else | ||
| 518 | pmd_limit = limit; | ||
| 519 | 615 | ||
| 520 | if (pud_none(*pud)) | 616 | if (pud_none(pud[pudidx])) |
| 521 | continue; | 617 | continue; |
| 522 | 618 | ||
| 523 | pmd = pmd_offset(pud, 0); | 619 | pmd = pmd_offset(&pud[pudidx], 0); |
| 524 | 620 | ||
| 525 | if (PTRS_PER_PMD > 1) /* not folded */ | 621 | if (PTRS_PER_PMD > 1) /* not folded */ |
| 526 | flush |= (*func)(virt_to_page(pmd), PT_PMD); | 622 | flush |= (*func)(virt_to_page(pmd), PT_PMD); |
| 527 | 623 | ||
| 528 | for (; addr != pmd_limit; pmd++) { | 624 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) { |
| 529 | addr += (PAGE_SIZE * PTRS_PER_PTE); | 625 | struct page *pte; |
| 530 | if ((pmd_limit-1) < (addr-1)) { | 626 | |
| 531 | addr = pmd_limit; | 627 | if (pgdidx == pgdidx_limit && |
| 532 | break; | 628 | pudidx == pudidx_limit && |
| 533 | } | 629 | pmdidx > pmdidx_limit) |
| 630 | goto out; | ||
| 534 | 631 | ||
| 535 | if (pmd_none(*pmd)) | 632 | if (pmd_none(pmd[pmdidx])) |
| 536 | continue; | 633 | continue; |
| 537 | 634 | ||
| 538 | flush |= (*func)(pmd_page(*pmd), PT_PTE); | 635 | pte = pmd_page(pmd[pmdidx]); |
| 636 | flush |= (*func)(pte, PT_PTE); | ||
| 539 | } | 637 | } |
| 540 | } | 638 | } |
| 541 | } | 639 | } |
| 542 | 640 | out: | |
| 543 | flush |= (*func)(virt_to_page(pgd_base), PT_PGD); | ||
| 544 | 641 | ||
| 545 | return flush; | 642 | return flush; |
| 546 | } | 643 | } |
| @@ -622,14 +719,31 @@ void xen_pgd_pin(pgd_t *pgd) | |||
| 622 | { | 719 | { |
| 623 | xen_mc_batch(); | 720 | xen_mc_batch(); |
| 624 | 721 | ||
| 625 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { | 722 | if (pgd_walk(pgd, pin_page, USER_LIMIT)) { |
| 626 | /* re-enable interrupts for kmap_flush_unused */ | 723 | /* re-enable interrupts for kmap_flush_unused */ |
| 627 | xen_mc_issue(0); | 724 | xen_mc_issue(0); |
| 628 | kmap_flush_unused(); | 725 | kmap_flush_unused(); |
| 629 | xen_mc_batch(); | 726 | xen_mc_batch(); |
| 630 | } | 727 | } |
| 631 | 728 | ||
| 729 | #ifdef CONFIG_X86_64 | ||
| 730 | { | ||
| 731 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
| 732 | |||
| 733 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); | ||
| 734 | |||
| 735 | if (user_pgd) { | ||
| 736 | pin_page(virt_to_page(user_pgd), PT_PGD); | ||
| 737 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); | ||
| 738 | } | ||
| 739 | } | ||
| 740 | #else /* CONFIG_X86_32 */ | ||
| 741 | #ifdef CONFIG_X86_PAE | ||
| 742 | /* Need to make sure unshared kernel PMD is pinnable */ | ||
| 743 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | ||
| 744 | #endif | ||
| 632 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); | 745 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); |
| 746 | #endif /* CONFIG_X86_64 */ | ||
| 633 | xen_mc_issue(0); | 747 | xen_mc_issue(0); |
| 634 | } | 748 | } |
| 635 | 749 | ||
| @@ -656,9 +770,11 @@ void xen_mm_pin_all(void) | |||
| 656 | spin_unlock_irqrestore(&pgd_lock, flags); | 770 | spin_unlock_irqrestore(&pgd_lock, flags); |
| 657 | } | 771 | } |
| 658 | 772 | ||
| 659 | /* The init_mm pagetable is really pinned as soon as its created, but | 773 | /* |
| 660 | that's before we have page structures to store the bits. So do all | 774 | * The init_mm pagetable is really pinned as soon as its created, but |
| 661 | the book-keeping now. */ | 775 | * that's before we have page structures to store the bits. So do all |
| 776 | * the book-keeping now. | ||
| 777 | */ | ||
| 662 | static __init int mark_pinned(struct page *page, enum pt_level level) | 778 | static __init int mark_pinned(struct page *page, enum pt_level level) |
| 663 | { | 779 | { |
| 664 | SetPagePinned(page); | 780 | SetPagePinned(page); |
| @@ -708,7 +824,23 @@ static void xen_pgd_unpin(pgd_t *pgd) | |||
| 708 | 824 | ||
| 709 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 825 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
| 710 | 826 | ||
| 711 | pgd_walk(pgd, unpin_page, TASK_SIZE); | 827 | #ifdef CONFIG_X86_64 |
| 828 | { | ||
| 829 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
| 830 | |||
| 831 | if (user_pgd) { | ||
| 832 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); | ||
| 833 | unpin_page(virt_to_page(user_pgd), PT_PGD); | ||
| 834 | } | ||
| 835 | } | ||
| 836 | #endif | ||
| 837 | |||
| 838 | #ifdef CONFIG_X86_PAE | ||
| 839 | /* Need to make sure unshared kernel PMD is unpinned */ | ||
| 840 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | ||
| 841 | #endif | ||
| 842 | |||
| 843 | pgd_walk(pgd, unpin_page, USER_LIMIT); | ||
| 712 | 844 | ||
| 713 | xen_mc_issue(0); | 845 | xen_mc_issue(0); |
| 714 | } | 846 | } |
| @@ -727,7 +859,6 @@ void xen_mm_unpin_all(void) | |||
| 727 | list_for_each_entry(page, &pgd_list, lru) { | 859 | list_for_each_entry(page, &pgd_list, lru) { |
| 728 | if (PageSavePinned(page)) { | 860 | if (PageSavePinned(page)) { |
| 729 | BUG_ON(!PagePinned(page)); | 861 | BUG_ON(!PagePinned(page)); |
| 730 | printk("unpinning pinned %p\n", page_address(page)); | ||
| 731 | xen_pgd_unpin((pgd_t *)page_address(page)); | 862 | xen_pgd_unpin((pgd_t *)page_address(page)); |
| 732 | ClearPageSavePinned(page); | 863 | ClearPageSavePinned(page); |
| 733 | } | 864 | } |
| @@ -757,8 +888,15 @@ void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) | |||
| 757 | static void drop_other_mm_ref(void *info) | 888 | static void drop_other_mm_ref(void *info) |
| 758 | { | 889 | { |
| 759 | struct mm_struct *mm = info; | 890 | struct mm_struct *mm = info; |
| 891 | struct mm_struct *active_mm; | ||
| 892 | |||
| 893 | #ifdef CONFIG_X86_64 | ||
| 894 | active_mm = read_pda(active_mm); | ||
| 895 | #else | ||
| 896 | active_mm = __get_cpu_var(cpu_tlbstate).active_mm; | ||
| 897 | #endif | ||
| 760 | 898 | ||
| 761 | if (__get_cpu_var(cpu_tlbstate).active_mm == mm) | 899 | if (active_mm == mm) |
| 762 | leave_mm(smp_processor_id()); | 900 | leave_mm(smp_processor_id()); |
| 763 | 901 | ||
| 764 | /* If this cpu still has a stale cr3 reference, then make sure | 902 | /* If this cpu still has a stale cr3 reference, then make sure |
