diff options
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r-- | arch/x86/xen/mmu.c | 345 |
1 files changed, 286 insertions, 59 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 126766d43aea..42b3b9ed641d 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -56,11 +56,136 @@ | |||
56 | #include "multicalls.h" | 56 | #include "multicalls.h" |
57 | #include "mmu.h" | 57 | #include "mmu.h" |
58 | 58 | ||
59 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
60 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) | ||
61 | |||
62 | /* Placeholder for holes in the address space */ | ||
63 | static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] | ||
64 | __attribute__((section(".data.page_aligned"))) = | ||
65 | { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; | ||
66 | |||
67 | /* Array of pointers to pages containing p2m entries */ | ||
68 | static unsigned long *p2m_top[TOP_ENTRIES] | ||
69 | __attribute__((section(".data.page_aligned"))) = | ||
70 | { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; | ||
71 | |||
72 | /* Arrays of p2m arrays expressed in mfns used for save/restore */ | ||
73 | static unsigned long p2m_top_mfn[TOP_ENTRIES] | ||
74 | __attribute__((section(".bss.page_aligned"))); | ||
75 | |||
76 | static unsigned long p2m_top_mfn_list[ | ||
77 | PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)] | ||
78 | __attribute__((section(".bss.page_aligned"))); | ||
79 | |||
80 | static inline unsigned p2m_top_index(unsigned long pfn) | ||
81 | { | ||
82 | BUG_ON(pfn >= MAX_DOMAIN_PAGES); | ||
83 | return pfn / P2M_ENTRIES_PER_PAGE; | ||
84 | } | ||
85 | |||
86 | static inline unsigned p2m_index(unsigned long pfn) | ||
87 | { | ||
88 | return pfn % P2M_ENTRIES_PER_PAGE; | ||
89 | } | ||
90 | |||
91 | /* Build the parallel p2m_top_mfn structures */ | ||
92 | void xen_setup_mfn_list_list(void) | ||
93 | { | ||
94 | unsigned pfn, idx; | ||
95 | |||
96 | for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { | ||
97 | unsigned topidx = p2m_top_index(pfn); | ||
98 | |||
99 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); | ||
100 | } | ||
101 | |||
102 | for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { | ||
103 | unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; | ||
104 | p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); | ||
105 | } | ||
106 | |||
107 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | ||
108 | |||
109 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
110 | virt_to_mfn(p2m_top_mfn_list); | ||
111 | HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages; | ||
112 | } | ||
113 | |||
114 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | ||
115 | void __init xen_build_dynamic_phys_to_machine(void) | ||
116 | { | ||
117 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
118 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
119 | unsigned pfn; | ||
120 | |||
121 | for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { | ||
122 | unsigned topidx = p2m_top_index(pfn); | ||
123 | |||
124 | p2m_top[topidx] = &mfn_list[pfn]; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | unsigned long get_phys_to_machine(unsigned long pfn) | ||
129 | { | ||
130 | unsigned topidx, idx; | ||
131 | |||
132 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) | ||
133 | return INVALID_P2M_ENTRY; | ||
134 | |||
135 | topidx = p2m_top_index(pfn); | ||
136 | idx = p2m_index(pfn); | ||
137 | return p2m_top[topidx][idx]; | ||
138 | } | ||
139 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | ||
140 | |||
141 | static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) | ||
142 | { | ||
143 | unsigned long *p; | ||
144 | unsigned i; | ||
145 | |||
146 | p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); | ||
147 | BUG_ON(p == NULL); | ||
148 | |||
149 | for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++) | ||
150 | p[i] = INVALID_P2M_ENTRY; | ||
151 | |||
152 | if (cmpxchg(pp, p2m_missing, p) != p2m_missing) | ||
153 | free_page((unsigned long)p); | ||
154 | else | ||
155 | *mfnp = virt_to_mfn(p); | ||
156 | } | ||
157 | |||
158 | void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
159 | { | ||
160 | unsigned topidx, idx; | ||
161 | |||
162 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
163 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
164 | return; | ||
165 | } | ||
166 | |||
167 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { | ||
168 | BUG_ON(mfn != INVALID_P2M_ENTRY); | ||
169 | return; | ||
170 | } | ||
171 | |||
172 | topidx = p2m_top_index(pfn); | ||
173 | if (p2m_top[topidx] == p2m_missing) { | ||
174 | /* no need to allocate a page to store an invalid entry */ | ||
175 | if (mfn == INVALID_P2M_ENTRY) | ||
176 | return; | ||
177 | alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); | ||
178 | } | ||
179 | |||
180 | idx = p2m_index(pfn); | ||
181 | p2m_top[topidx][idx] = mfn; | ||
182 | } | ||
183 | |||
59 | xmaddr_t arbitrary_virt_to_machine(unsigned long address) | 184 | xmaddr_t arbitrary_virt_to_machine(unsigned long address) |
60 | { | 185 | { |
61 | unsigned int level; | 186 | unsigned int level; |
62 | pte_t *pte = lookup_address(address, &level); | 187 | pte_t *pte = lookup_address(address, &level); |
63 | unsigned offset = address & PAGE_MASK; | 188 | unsigned offset = address & ~PAGE_MASK; |
64 | 189 | ||
65 | BUG_ON(pte == NULL); | 190 | BUG_ON(pte == NULL); |
66 | 191 | ||
@@ -98,24 +223,60 @@ void make_lowmem_page_readwrite(void *vaddr) | |||
98 | } | 223 | } |
99 | 224 | ||
100 | 225 | ||
101 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | 226 | static bool page_pinned(void *ptr) |
227 | { | ||
228 | struct page *page = virt_to_page(ptr); | ||
229 | |||
230 | return PagePinned(page); | ||
231 | } | ||
232 | |||
233 | static void extend_mmu_update(const struct mmu_update *update) | ||
102 | { | 234 | { |
103 | struct multicall_space mcs; | 235 | struct multicall_space mcs; |
104 | struct mmu_update *u; | 236 | struct mmu_update *u; |
105 | 237 | ||
106 | preempt_disable(); | 238 | mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); |
239 | |||
240 | if (mcs.mc != NULL) | ||
241 | mcs.mc->args[1]++; | ||
242 | else { | ||
243 | mcs = __xen_mc_entry(sizeof(*u)); | ||
244 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | ||
245 | } | ||
107 | 246 | ||
108 | mcs = xen_mc_entry(sizeof(*u)); | ||
109 | u = mcs.args; | 247 | u = mcs.args; |
110 | u->ptr = virt_to_machine(ptr).maddr; | 248 | *u = *update; |
111 | u->val = pmd_val_ma(val); | 249 | } |
112 | MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); | 250 | |
251 | void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | ||
252 | { | ||
253 | struct mmu_update u; | ||
254 | |||
255 | preempt_disable(); | ||
256 | |||
257 | xen_mc_batch(); | ||
258 | |||
259 | u.ptr = virt_to_machine(ptr).maddr; | ||
260 | u.val = pmd_val_ma(val); | ||
261 | extend_mmu_update(&u); | ||
113 | 262 | ||
114 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 263 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
115 | 264 | ||
116 | preempt_enable(); | 265 | preempt_enable(); |
117 | } | 266 | } |
118 | 267 | ||
268 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | ||
269 | { | ||
270 | /* If page is not pinned, we can just update the entry | ||
271 | directly */ | ||
272 | if (!page_pinned(ptr)) { | ||
273 | *ptr = val; | ||
274 | return; | ||
275 | } | ||
276 | |||
277 | xen_set_pmd_hyper(ptr, val); | ||
278 | } | ||
279 | |||
119 | /* | 280 | /* |
120 | * Associate a virtual page frame with a given physical page frame | 281 | * Associate a virtual page frame with a given physical page frame |
121 | * and protection flags for that frame. | 282 | * and protection flags for that frame. |
@@ -179,68 +340,105 @@ out: | |||
179 | preempt_enable(); | 340 | preempt_enable(); |
180 | } | 341 | } |
181 | 342 | ||
182 | pteval_t xen_pte_val(pte_t pte) | 343 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
183 | { | 344 | { |
184 | pteval_t ret = pte.pte; | 345 | /* Just return the pte as-is. We preserve the bits on commit */ |
346 | return *ptep; | ||
347 | } | ||
348 | |||
349 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | ||
350 | pte_t *ptep, pte_t pte) | ||
351 | { | ||
352 | struct mmu_update u; | ||
353 | |||
354 | xen_mc_batch(); | ||
185 | 355 | ||
186 | if (ret & _PAGE_PRESENT) | 356 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; |
187 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | 357 | u.val = pte_val_ma(pte); |
358 | extend_mmu_update(&u); | ||
188 | 359 | ||
189 | return ret; | 360 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
190 | } | 361 | } |
191 | 362 | ||
192 | pgdval_t xen_pgd_val(pgd_t pgd) | 363 | /* Assume pteval_t is equivalent to all the other *val_t types. */ |
364 | static pteval_t pte_mfn_to_pfn(pteval_t val) | ||
193 | { | 365 | { |
194 | pgdval_t ret = pgd.pgd; | 366 | if (val & _PAGE_PRESENT) { |
195 | if (ret & _PAGE_PRESENT) | 367 | unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT; |
196 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | 368 | pteval_t flags = val & ~PTE_MASK; |
197 | return ret; | 369 | val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; |
370 | } | ||
371 | |||
372 | return val; | ||
198 | } | 373 | } |
199 | 374 | ||
200 | pte_t xen_make_pte(pteval_t pte) | 375 | static pteval_t pte_pfn_to_mfn(pteval_t val) |
201 | { | 376 | { |
202 | if (pte & _PAGE_PRESENT) { | 377 | if (val & _PAGE_PRESENT) { |
203 | pte = phys_to_machine(XPADDR(pte)).maddr; | 378 | unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT; |
204 | pte &= ~(_PAGE_PCD | _PAGE_PWT); | 379 | pteval_t flags = val & ~PTE_MASK; |
380 | val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; | ||
205 | } | 381 | } |
206 | 382 | ||
207 | return (pte_t){ .pte = pte }; | 383 | return val; |
208 | } | 384 | } |
209 | 385 | ||
210 | pgd_t xen_make_pgd(pgdval_t pgd) | 386 | pteval_t xen_pte_val(pte_t pte) |
211 | { | 387 | { |
212 | if (pgd & _PAGE_PRESENT) | 388 | return pte_mfn_to_pfn(pte.pte); |
213 | pgd = phys_to_machine(XPADDR(pgd)).maddr; | 389 | } |
214 | 390 | ||
215 | return (pgd_t){ pgd }; | 391 | pgdval_t xen_pgd_val(pgd_t pgd) |
392 | { | ||
393 | return pte_mfn_to_pfn(pgd.pgd); | ||
394 | } | ||
395 | |||
396 | pte_t xen_make_pte(pteval_t pte) | ||
397 | { | ||
398 | pte = pte_pfn_to_mfn(pte); | ||
399 | return native_make_pte(pte); | ||
400 | } | ||
401 | |||
402 | pgd_t xen_make_pgd(pgdval_t pgd) | ||
403 | { | ||
404 | pgd = pte_pfn_to_mfn(pgd); | ||
405 | return native_make_pgd(pgd); | ||
216 | } | 406 | } |
217 | 407 | ||
218 | pmdval_t xen_pmd_val(pmd_t pmd) | 408 | pmdval_t xen_pmd_val(pmd_t pmd) |
219 | { | 409 | { |
220 | pmdval_t ret = native_pmd_val(pmd); | 410 | return pte_mfn_to_pfn(pmd.pmd); |
221 | if (ret & _PAGE_PRESENT) | ||
222 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | ||
223 | return ret; | ||
224 | } | 411 | } |
225 | #ifdef CONFIG_X86_PAE | 412 | |
226 | void xen_set_pud(pud_t *ptr, pud_t val) | 413 | void xen_set_pud_hyper(pud_t *ptr, pud_t val) |
227 | { | 414 | { |
228 | struct multicall_space mcs; | 415 | struct mmu_update u; |
229 | struct mmu_update *u; | ||
230 | 416 | ||
231 | preempt_disable(); | 417 | preempt_disable(); |
232 | 418 | ||
233 | mcs = xen_mc_entry(sizeof(*u)); | 419 | xen_mc_batch(); |
234 | u = mcs.args; | 420 | |
235 | u->ptr = virt_to_machine(ptr).maddr; | 421 | u.ptr = virt_to_machine(ptr).maddr; |
236 | u->val = pud_val_ma(val); | 422 | u.val = pud_val_ma(val); |
237 | MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); | 423 | extend_mmu_update(&u); |
238 | 424 | ||
239 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 425 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
240 | 426 | ||
241 | preempt_enable(); | 427 | preempt_enable(); |
242 | } | 428 | } |
243 | 429 | ||
430 | void xen_set_pud(pud_t *ptr, pud_t val) | ||
431 | { | ||
432 | /* If page is not pinned, we can just update the entry | ||
433 | directly */ | ||
434 | if (!page_pinned(ptr)) { | ||
435 | *ptr = val; | ||
436 | return; | ||
437 | } | ||
438 | |||
439 | xen_set_pud_hyper(ptr, val); | ||
440 | } | ||
441 | |||
244 | void xen_set_pte(pte_t *ptep, pte_t pte) | 442 | void xen_set_pte(pte_t *ptep, pte_t pte) |
245 | { | 443 | { |
246 | ptep->pte_high = pte.pte_high; | 444 | ptep->pte_high = pte.pte_high; |
@@ -262,22 +460,14 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |||
262 | 460 | ||
263 | void xen_pmd_clear(pmd_t *pmdp) | 461 | void xen_pmd_clear(pmd_t *pmdp) |
264 | { | 462 | { |
265 | xen_set_pmd(pmdp, __pmd(0)); | 463 | set_pmd(pmdp, __pmd(0)); |
266 | } | 464 | } |
267 | 465 | ||
268 | pmd_t xen_make_pmd(pmdval_t pmd) | 466 | pmd_t xen_make_pmd(pmdval_t pmd) |
269 | { | 467 | { |
270 | if (pmd & _PAGE_PRESENT) | 468 | pmd = pte_pfn_to_mfn(pmd); |
271 | pmd = phys_to_machine(XPADDR(pmd)).maddr; | ||
272 | |||
273 | return native_make_pmd(pmd); | 469 | return native_make_pmd(pmd); |
274 | } | 470 | } |
275 | #else /* !PAE */ | ||
276 | void xen_set_pte(pte_t *ptep, pte_t pte) | ||
277 | { | ||
278 | *ptep = pte; | ||
279 | } | ||
280 | #endif /* CONFIG_X86_PAE */ | ||
281 | 471 | ||
282 | /* | 472 | /* |
283 | (Yet another) pagetable walker. This one is intended for pinning a | 473 | (Yet another) pagetable walker. This one is intended for pinning a |
@@ -430,8 +620,6 @@ static int pin_page(struct page *page, enum pt_level level) | |||
430 | read-only, and can be pinned. */ | 620 | read-only, and can be pinned. */ |
431 | void xen_pgd_pin(pgd_t *pgd) | 621 | void xen_pgd_pin(pgd_t *pgd) |
432 | { | 622 | { |
433 | unsigned level; | ||
434 | |||
435 | xen_mc_batch(); | 623 | xen_mc_batch(); |
436 | 624 | ||
437 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { | 625 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { |
@@ -441,15 +629,31 @@ void xen_pgd_pin(pgd_t *pgd) | |||
441 | xen_mc_batch(); | 629 | xen_mc_batch(); |
442 | } | 630 | } |
443 | 631 | ||
444 | #ifdef CONFIG_X86_PAE | 632 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); |
445 | level = MMUEXT_PIN_L3_TABLE; | 633 | xen_mc_issue(0); |
446 | #else | 634 | } |
447 | level = MMUEXT_PIN_L2_TABLE; | 635 | |
448 | #endif | 636 | /* |
637 | * On save, we need to pin all pagetables to make sure they get their | ||
638 | * mfns turned into pfns. Search the list for any unpinned pgds and pin | ||
639 | * them (unpinned pgds are not currently in use, probably because the | ||
640 | * process is under construction or destruction). | ||
641 | */ | ||
642 | void xen_mm_pin_all(void) | ||
643 | { | ||
644 | unsigned long flags; | ||
645 | struct page *page; | ||
449 | 646 | ||
450 | xen_do_pin(level, PFN_DOWN(__pa(pgd))); | 647 | spin_lock_irqsave(&pgd_lock, flags); |
451 | 648 | ||
452 | xen_mc_issue(0); | 649 | list_for_each_entry(page, &pgd_list, lru) { |
650 | if (!PagePinned(page)) { | ||
651 | xen_pgd_pin((pgd_t *)page_address(page)); | ||
652 | SetPageSavePinned(page); | ||
653 | } | ||
654 | } | ||
655 | |||
656 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
453 | } | 657 | } |
454 | 658 | ||
455 | /* The init_mm pagetable is really pinned as soon as its created, but | 659 | /* The init_mm pagetable is really pinned as soon as its created, but |
@@ -509,6 +713,29 @@ static void xen_pgd_unpin(pgd_t *pgd) | |||
509 | xen_mc_issue(0); | 713 | xen_mc_issue(0); |
510 | } | 714 | } |
511 | 715 | ||
716 | /* | ||
717 | * On resume, undo any pinning done at save, so that the rest of the | ||
718 | * kernel doesn't see any unexpected pinned pagetables. | ||
719 | */ | ||
720 | void xen_mm_unpin_all(void) | ||
721 | { | ||
722 | unsigned long flags; | ||
723 | struct page *page; | ||
724 | |||
725 | spin_lock_irqsave(&pgd_lock, flags); | ||
726 | |||
727 | list_for_each_entry(page, &pgd_list, lru) { | ||
728 | if (PageSavePinned(page)) { | ||
729 | BUG_ON(!PagePinned(page)); | ||
730 | printk("unpinning pinned %p\n", page_address(page)); | ||
731 | xen_pgd_unpin((pgd_t *)page_address(page)); | ||
732 | ClearPageSavePinned(page); | ||
733 | } | ||
734 | } | ||
735 | |||
736 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
737 | } | ||
738 | |||
512 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) | 739 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) |
513 | { | 740 | { |
514 | spin_lock(&next->page_table_lock); | 741 | spin_lock(&next->page_table_lock); |
@@ -602,7 +829,7 @@ void xen_exit_mmap(struct mm_struct *mm) | |||
602 | spin_lock(&mm->page_table_lock); | 829 | spin_lock(&mm->page_table_lock); |
603 | 830 | ||
604 | /* pgd may not be pinned in the error exit path of execve */ | 831 | /* pgd may not be pinned in the error exit path of execve */ |
605 | if (PagePinned(virt_to_page(mm->pgd))) | 832 | if (page_pinned(mm->pgd)) |
606 | xen_pgd_unpin(mm->pgd); | 833 | xen_pgd_unpin(mm->pgd); |
607 | 834 | ||
608 | spin_unlock(&mm->page_table_lock); | 835 | spin_unlock(&mm->page_table_lock); |