diff options
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r-- | arch/x86/xen/mmu.c | 276 |
1 files changed, 257 insertions, 19 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index df40bf74ea75..ff0aa74afaa1 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -56,6 +56,131 @@ | |||
56 | #include "multicalls.h" | 56 | #include "multicalls.h" |
57 | #include "mmu.h" | 57 | #include "mmu.h" |
58 | 58 | ||
59 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
60 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) | ||
61 | |||
62 | /* Placeholder for holes in the address space */ | ||
63 | static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] | ||
64 | __attribute__((section(".data.page_aligned"))) = | ||
65 | { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; | ||
66 | |||
67 | /* Array of pointers to pages containing p2m entries */ | ||
68 | static unsigned long *p2m_top[TOP_ENTRIES] | ||
69 | __attribute__((section(".data.page_aligned"))) = | ||
70 | { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; | ||
71 | |||
72 | /* Arrays of p2m arrays expressed in mfns used for save/restore */ | ||
73 | static unsigned long p2m_top_mfn[TOP_ENTRIES] | ||
74 | __attribute__((section(".bss.page_aligned"))); | ||
75 | |||
76 | static unsigned long p2m_top_mfn_list[ | ||
77 | PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)] | ||
78 | __attribute__((section(".bss.page_aligned"))); | ||
79 | |||
80 | static inline unsigned p2m_top_index(unsigned long pfn) | ||
81 | { | ||
82 | BUG_ON(pfn >= MAX_DOMAIN_PAGES); | ||
83 | return pfn / P2M_ENTRIES_PER_PAGE; | ||
84 | } | ||
85 | |||
86 | static inline unsigned p2m_index(unsigned long pfn) | ||
87 | { | ||
88 | return pfn % P2M_ENTRIES_PER_PAGE; | ||
89 | } | ||
90 | |||
91 | /* Build the parallel p2m_top_mfn structures */ | ||
92 | void xen_setup_mfn_list_list(void) | ||
93 | { | ||
94 | unsigned pfn, idx; | ||
95 | |||
96 | for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { | ||
97 | unsigned topidx = p2m_top_index(pfn); | ||
98 | |||
99 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); | ||
100 | } | ||
101 | |||
102 | for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { | ||
103 | unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; | ||
104 | p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); | ||
105 | } | ||
106 | |||
107 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | ||
108 | |||
109 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
110 | virt_to_mfn(p2m_top_mfn_list); | ||
111 | HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages; | ||
112 | } | ||
113 | |||
114 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | ||
115 | void __init xen_build_dynamic_phys_to_machine(void) | ||
116 | { | ||
117 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
118 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
119 | unsigned pfn; | ||
120 | |||
121 | for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { | ||
122 | unsigned topidx = p2m_top_index(pfn); | ||
123 | |||
124 | p2m_top[topidx] = &mfn_list[pfn]; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | unsigned long get_phys_to_machine(unsigned long pfn) | ||
129 | { | ||
130 | unsigned topidx, idx; | ||
131 | |||
132 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) | ||
133 | return INVALID_P2M_ENTRY; | ||
134 | |||
135 | topidx = p2m_top_index(pfn); | ||
136 | idx = p2m_index(pfn); | ||
137 | return p2m_top[topidx][idx]; | ||
138 | } | ||
139 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | ||
140 | |||
141 | static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) | ||
142 | { | ||
143 | unsigned long *p; | ||
144 | unsigned i; | ||
145 | |||
146 | p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); | ||
147 | BUG_ON(p == NULL); | ||
148 | |||
149 | for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++) | ||
150 | p[i] = INVALID_P2M_ENTRY; | ||
151 | |||
152 | if (cmpxchg(pp, p2m_missing, p) != p2m_missing) | ||
153 | free_page((unsigned long)p); | ||
154 | else | ||
155 | *mfnp = virt_to_mfn(p); | ||
156 | } | ||
157 | |||
158 | void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
159 | { | ||
160 | unsigned topidx, idx; | ||
161 | |||
162 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
163 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
164 | return; | ||
165 | } | ||
166 | |||
167 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { | ||
168 | BUG_ON(mfn != INVALID_P2M_ENTRY); | ||
169 | return; | ||
170 | } | ||
171 | |||
172 | topidx = p2m_top_index(pfn); | ||
173 | if (p2m_top[topidx] == p2m_missing) { | ||
174 | /* no need to allocate a page to store an invalid entry */ | ||
175 | if (mfn == INVALID_P2M_ENTRY) | ||
176 | return; | ||
177 | alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); | ||
178 | } | ||
179 | |||
180 | idx = p2m_index(pfn); | ||
181 | p2m_top[topidx][idx] = mfn; | ||
182 | } | ||
183 | |||
59 | xmaddr_t arbitrary_virt_to_machine(unsigned long address) | 184 | xmaddr_t arbitrary_virt_to_machine(unsigned long address) |
60 | { | 185 | { |
61 | unsigned int level; | 186 | unsigned int level; |
@@ -98,24 +223,60 @@ void make_lowmem_page_readwrite(void *vaddr) | |||
98 | } | 223 | } |
99 | 224 | ||
100 | 225 | ||
101 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | 226 | static bool page_pinned(void *ptr) |
227 | { | ||
228 | struct page *page = virt_to_page(ptr); | ||
229 | |||
230 | return PagePinned(page); | ||
231 | } | ||
232 | |||
233 | static void extend_mmu_update(const struct mmu_update *update) | ||
102 | { | 234 | { |
103 | struct multicall_space mcs; | 235 | struct multicall_space mcs; |
104 | struct mmu_update *u; | 236 | struct mmu_update *u; |
105 | 237 | ||
106 | preempt_disable(); | 238 | mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); |
239 | |||
240 | if (mcs.mc != NULL) | ||
241 | mcs.mc->args[1]++; | ||
242 | else { | ||
243 | mcs = __xen_mc_entry(sizeof(*u)); | ||
244 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | ||
245 | } | ||
107 | 246 | ||
108 | mcs = xen_mc_entry(sizeof(*u)); | ||
109 | u = mcs.args; | 247 | u = mcs.args; |
110 | u->ptr = virt_to_machine(ptr).maddr; | 248 | *u = *update; |
111 | u->val = pmd_val_ma(val); | 249 | } |
112 | MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); | 250 | |
251 | void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | ||
252 | { | ||
253 | struct mmu_update u; | ||
254 | |||
255 | preempt_disable(); | ||
256 | |||
257 | xen_mc_batch(); | ||
258 | |||
259 | u.ptr = virt_to_machine(ptr).maddr; | ||
260 | u.val = pmd_val_ma(val); | ||
261 | extend_mmu_update(&u); | ||
113 | 262 | ||
114 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 263 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
115 | 264 | ||
116 | preempt_enable(); | 265 | preempt_enable(); |
117 | } | 266 | } |
118 | 267 | ||
268 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | ||
269 | { | ||
270 | /* If page is not pinned, we can just update the entry | ||
271 | directly */ | ||
272 | if (!page_pinned(ptr)) { | ||
273 | *ptr = val; | ||
274 | return; | ||
275 | } | ||
276 | |||
277 | xen_set_pmd_hyper(ptr, val); | ||
278 | } | ||
279 | |||
119 | /* | 280 | /* |
120 | * Associate a virtual page frame with a given physical page frame | 281 | * Associate a virtual page frame with a given physical page frame |
121 | * and protection flags for that frame. | 282 | * and protection flags for that frame. |
@@ -179,13 +340,33 @@ out: | |||
179 | preempt_enable(); | 340 | preempt_enable(); |
180 | } | 341 | } |
181 | 342 | ||
343 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
344 | { | ||
345 | /* Just return the pte as-is. We preserve the bits on commit */ | ||
346 | return *ptep; | ||
347 | } | ||
348 | |||
349 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | ||
350 | pte_t *ptep, pte_t pte) | ||
351 | { | ||
352 | struct mmu_update u; | ||
353 | |||
354 | xen_mc_batch(); | ||
355 | |||
356 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; | ||
357 | u.val = pte_val_ma(pte); | ||
358 | extend_mmu_update(&u); | ||
359 | |||
360 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
361 | } | ||
362 | |||
182 | /* Assume pteval_t is equivalent to all the other *val_t types. */ | 363 | /* Assume pteval_t is equivalent to all the other *val_t types. */ |
183 | static pteval_t pte_mfn_to_pfn(pteval_t val) | 364 | static pteval_t pte_mfn_to_pfn(pteval_t val) |
184 | { | 365 | { |
185 | if (val & _PAGE_PRESENT) { | 366 | if (val & _PAGE_PRESENT) { |
186 | unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT; | 367 | unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT; |
187 | pteval_t flags = val & ~PTE_MASK; | 368 | pteval_t flags = val & ~PTE_MASK; |
188 | val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; | 369 | val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; |
189 | } | 370 | } |
190 | 371 | ||
191 | return val; | 372 | return val; |
@@ -196,7 +377,7 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) | |||
196 | if (val & _PAGE_PRESENT) { | 377 | if (val & _PAGE_PRESENT) { |
197 | unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT; | 378 | unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT; |
198 | pteval_t flags = val & ~PTE_MASK; | 379 | pteval_t flags = val & ~PTE_MASK; |
199 | val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; | 380 | val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; |
200 | } | 381 | } |
201 | 382 | ||
202 | return val; | 383 | return val; |
@@ -229,24 +410,35 @@ pmdval_t xen_pmd_val(pmd_t pmd) | |||
229 | return pte_mfn_to_pfn(pmd.pmd); | 410 | return pte_mfn_to_pfn(pmd.pmd); |
230 | } | 411 | } |
231 | 412 | ||
232 | void xen_set_pud(pud_t *ptr, pud_t val) | 413 | void xen_set_pud_hyper(pud_t *ptr, pud_t val) |
233 | { | 414 | { |
234 | struct multicall_space mcs; | 415 | struct mmu_update u; |
235 | struct mmu_update *u; | ||
236 | 416 | ||
237 | preempt_disable(); | 417 | preempt_disable(); |
238 | 418 | ||
239 | mcs = xen_mc_entry(sizeof(*u)); | 419 | xen_mc_batch(); |
240 | u = mcs.args; | 420 | |
241 | u->ptr = virt_to_machine(ptr).maddr; | 421 | u.ptr = virt_to_machine(ptr).maddr; |
242 | u->val = pud_val_ma(val); | 422 | u.val = pud_val_ma(val); |
243 | MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); | 423 | extend_mmu_update(&u); |
244 | 424 | ||
245 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 425 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
246 | 426 | ||
247 | preempt_enable(); | 427 | preempt_enable(); |
248 | } | 428 | } |
249 | 429 | ||
430 | void xen_set_pud(pud_t *ptr, pud_t val) | ||
431 | { | ||
432 | /* If page is not pinned, we can just update the entry | ||
433 | directly */ | ||
434 | if (!page_pinned(ptr)) { | ||
435 | *ptr = val; | ||
436 | return; | ||
437 | } | ||
438 | |||
439 | xen_set_pud_hyper(ptr, val); | ||
440 | } | ||
441 | |||
250 | void xen_set_pte(pte_t *ptep, pte_t pte) | 442 | void xen_set_pte(pte_t *ptep, pte_t pte) |
251 | { | 443 | { |
252 | ptep->pte_high = pte.pte_high; | 444 | ptep->pte_high = pte.pte_high; |
@@ -268,7 +460,7 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |||
268 | 460 | ||
269 | void xen_pmd_clear(pmd_t *pmdp) | 461 | void xen_pmd_clear(pmd_t *pmdp) |
270 | { | 462 | { |
271 | xen_set_pmd(pmdp, __pmd(0)); | 463 | set_pmd(pmdp, __pmd(0)); |
272 | } | 464 | } |
273 | 465 | ||
274 | pmd_t xen_make_pmd(pmdval_t pmd) | 466 | pmd_t xen_make_pmd(pmdval_t pmd) |
@@ -441,6 +633,29 @@ void xen_pgd_pin(pgd_t *pgd) | |||
441 | xen_mc_issue(0); | 633 | xen_mc_issue(0); |
442 | } | 634 | } |
443 | 635 | ||
636 | /* | ||
637 | * On save, we need to pin all pagetables to make sure they get their | ||
638 | * mfns turned into pfns. Search the list for any unpinned pgds and pin | ||
639 | * them (unpinned pgds are not currently in use, probably because the | ||
640 | * process is under construction or destruction). | ||
641 | */ | ||
642 | void xen_mm_pin_all(void) | ||
643 | { | ||
644 | unsigned long flags; | ||
645 | struct page *page; | ||
646 | |||
647 | spin_lock_irqsave(&pgd_lock, flags); | ||
648 | |||
649 | list_for_each_entry(page, &pgd_list, lru) { | ||
650 | if (!PagePinned(page)) { | ||
651 | xen_pgd_pin((pgd_t *)page_address(page)); | ||
652 | SetPageSavePinned(page); | ||
653 | } | ||
654 | } | ||
655 | |||
656 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
657 | } | ||
658 | |||
444 | /* The init_mm pagetable is really pinned as soon as its created, but | 659 | /* The init_mm pagetable is really pinned as soon as its created, but |
445 | that's before we have page structures to store the bits. So do all | 660 | that's before we have page structures to store the bits. So do all |
446 | the book-keeping now. */ | 661 | the book-keeping now. */ |
@@ -498,6 +713,29 @@ static void xen_pgd_unpin(pgd_t *pgd) | |||
498 | xen_mc_issue(0); | 713 | xen_mc_issue(0); |
499 | } | 714 | } |
500 | 715 | ||
716 | /* | ||
717 | * On resume, undo any pinning done at save, so that the rest of the | ||
718 | * kernel doesn't see any unexpected pinned pagetables. | ||
719 | */ | ||
720 | void xen_mm_unpin_all(void) | ||
721 | { | ||
722 | unsigned long flags; | ||
723 | struct page *page; | ||
724 | |||
725 | spin_lock_irqsave(&pgd_lock, flags); | ||
726 | |||
727 | list_for_each_entry(page, &pgd_list, lru) { | ||
728 | if (PageSavePinned(page)) { | ||
729 | BUG_ON(!PagePinned(page)); | ||
730 | printk("unpinning pinned %p\n", page_address(page)); | ||
731 | xen_pgd_unpin((pgd_t *)page_address(page)); | ||
732 | ClearPageSavePinned(page); | ||
733 | } | ||
734 | } | ||
735 | |||
736 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
737 | } | ||
738 | |||
501 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) | 739 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) |
502 | { | 740 | { |
503 | spin_lock(&next->page_table_lock); | 741 | spin_lock(&next->page_table_lock); |
@@ -558,7 +796,7 @@ static void drop_mm_ref(struct mm_struct *mm) | |||
558 | } | 796 | } |
559 | 797 | ||
560 | if (!cpus_empty(mask)) | 798 | if (!cpus_empty(mask)) |
561 | xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); | 799 | smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); |
562 | } | 800 | } |
563 | #else | 801 | #else |
564 | static void drop_mm_ref(struct mm_struct *mm) | 802 | static void drop_mm_ref(struct mm_struct *mm) |
@@ -591,7 +829,7 @@ void xen_exit_mmap(struct mm_struct *mm) | |||
591 | spin_lock(&mm->page_table_lock); | 829 | spin_lock(&mm->page_table_lock); |
592 | 830 | ||
593 | /* pgd may not be pinned in the error exit path of execve */ | 831 | /* pgd may not be pinned in the error exit path of execve */ |
594 | if (PagePinned(virt_to_page(mm->pgd))) | 832 | if (page_pinned(mm->pgd)) |
595 | xen_pgd_unpin(mm->pgd); | 833 | xen_pgd_unpin(mm->pgd); |
596 | 834 | ||
597 | spin_unlock(&mm->page_table_lock); | 835 | spin_unlock(&mm->page_table_lock); |