aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r--arch/x86/xen/mmu.c274
1 files changed, 256 insertions, 18 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index df40bf74ea75..42b3b9ed641d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -56,6 +56,131 @@
56#include "multicalls.h" 56#include "multicalls.h"
57#include "mmu.h" 57#include "mmu.h"
58 58
59#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
60#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
61
62/* Placeholder for holes in the address space */
63static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE]
64 __attribute__((section(".data.page_aligned"))) =
65 { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
66
67 /* Array of pointers to pages containing p2m entries */
68static unsigned long *p2m_top[TOP_ENTRIES]
69 __attribute__((section(".data.page_aligned"))) =
70 { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
71
72/* Arrays of p2m arrays expressed in mfns used for save/restore */
73static unsigned long p2m_top_mfn[TOP_ENTRIES]
74 __attribute__((section(".bss.page_aligned")));
75
76static unsigned long p2m_top_mfn_list[
77 PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)]
78 __attribute__((section(".bss.page_aligned")));
79
80static inline unsigned p2m_top_index(unsigned long pfn)
81{
82 BUG_ON(pfn >= MAX_DOMAIN_PAGES);
83 return pfn / P2M_ENTRIES_PER_PAGE;
84}
85
86static inline unsigned p2m_index(unsigned long pfn)
87{
88 return pfn % P2M_ENTRIES_PER_PAGE;
89}
90
91/* Build the parallel p2m_top_mfn structures */
92void xen_setup_mfn_list_list(void)
93{
94 unsigned pfn, idx;
95
96 for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
97 unsigned topidx = p2m_top_index(pfn);
98
99 p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
100 }
101
102 for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
103 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
104 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
105 }
106
107 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
108
109 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
110 virt_to_mfn(p2m_top_mfn_list);
111 HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
112}
113
114/* Set up p2m_top to point to the domain-builder provided p2m pages */
115void __init xen_build_dynamic_phys_to_machine(void)
116{
117 unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
118 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
119 unsigned pfn;
120
121 for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
122 unsigned topidx = p2m_top_index(pfn);
123
124 p2m_top[topidx] = &mfn_list[pfn];
125 }
126}
127
128unsigned long get_phys_to_machine(unsigned long pfn)
129{
130 unsigned topidx, idx;
131
132 if (unlikely(pfn >= MAX_DOMAIN_PAGES))
133 return INVALID_P2M_ENTRY;
134
135 topidx = p2m_top_index(pfn);
136 idx = p2m_index(pfn);
137 return p2m_top[topidx][idx];
138}
139EXPORT_SYMBOL_GPL(get_phys_to_machine);
140
141static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
142{
143 unsigned long *p;
144 unsigned i;
145
146 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
147 BUG_ON(p == NULL);
148
149 for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
150 p[i] = INVALID_P2M_ENTRY;
151
152 if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
153 free_page((unsigned long)p);
154 else
155 *mfnp = virt_to_mfn(p);
156}
157
158void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
159{
160 unsigned topidx, idx;
161
162 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
163 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
164 return;
165 }
166
167 if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
168 BUG_ON(mfn != INVALID_P2M_ENTRY);
169 return;
170 }
171
172 topidx = p2m_top_index(pfn);
173 if (p2m_top[topidx] == p2m_missing) {
174 /* no need to allocate a page to store an invalid entry */
175 if (mfn == INVALID_P2M_ENTRY)
176 return;
177 alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]);
178 }
179
180 idx = p2m_index(pfn);
181 p2m_top[topidx][idx] = mfn;
182}
183
59xmaddr_t arbitrary_virt_to_machine(unsigned long address) 184xmaddr_t arbitrary_virt_to_machine(unsigned long address)
60{ 185{
61 unsigned int level; 186 unsigned int level;
@@ -98,24 +223,60 @@ void make_lowmem_page_readwrite(void *vaddr)
98} 223}
99 224
100 225
101void xen_set_pmd(pmd_t *ptr, pmd_t val) 226static bool page_pinned(void *ptr)
227{
228 struct page *page = virt_to_page(ptr);
229
230 return PagePinned(page);
231}
232
233static void extend_mmu_update(const struct mmu_update *update)
102{ 234{
103 struct multicall_space mcs; 235 struct multicall_space mcs;
104 struct mmu_update *u; 236 struct mmu_update *u;
105 237
106 preempt_disable(); 238 mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
239
240 if (mcs.mc != NULL)
241 mcs.mc->args[1]++;
242 else {
243 mcs = __xen_mc_entry(sizeof(*u));
244 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
245 }
107 246
108 mcs = xen_mc_entry(sizeof(*u));
109 u = mcs.args; 247 u = mcs.args;
110 u->ptr = virt_to_machine(ptr).maddr; 248 *u = *update;
111 u->val = pmd_val_ma(val); 249}
112 MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); 250
251void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
252{
253 struct mmu_update u;
254
255 preempt_disable();
256
257 xen_mc_batch();
258
259 u.ptr = virt_to_machine(ptr).maddr;
260 u.val = pmd_val_ma(val);
261 extend_mmu_update(&u);
113 262
114 xen_mc_issue(PARAVIRT_LAZY_MMU); 263 xen_mc_issue(PARAVIRT_LAZY_MMU);
115 264
116 preempt_enable(); 265 preempt_enable();
117} 266}
118 267
268void xen_set_pmd(pmd_t *ptr, pmd_t val)
269{
270 /* If page is not pinned, we can just update the entry
271 directly */
272 if (!page_pinned(ptr)) {
273 *ptr = val;
274 return;
275 }
276
277 xen_set_pmd_hyper(ptr, val);
278}
279
119/* 280/*
120 * Associate a virtual page frame with a given physical page frame 281 * Associate a virtual page frame with a given physical page frame
121 * and protection flags for that frame. 282 * and protection flags for that frame.
@@ -179,13 +340,33 @@ out:
179 preempt_enable(); 340 preempt_enable();
180} 341}
181 342
343pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
344{
345 /* Just return the pte as-is. We preserve the bits on commit */
346 return *ptep;
347}
348
349void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
350 pte_t *ptep, pte_t pte)
351{
352 struct mmu_update u;
353
354 xen_mc_batch();
355
356 u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
357 u.val = pte_val_ma(pte);
358 extend_mmu_update(&u);
359
360 xen_mc_issue(PARAVIRT_LAZY_MMU);
361}
362
182/* Assume pteval_t is equivalent to all the other *val_t types. */ 363/* Assume pteval_t is equivalent to all the other *val_t types. */
183static pteval_t pte_mfn_to_pfn(pteval_t val) 364static pteval_t pte_mfn_to_pfn(pteval_t val)
184{ 365{
185 if (val & _PAGE_PRESENT) { 366 if (val & _PAGE_PRESENT) {
186 unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT; 367 unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT;
187 pteval_t flags = val & ~PTE_MASK; 368 pteval_t flags = val & ~PTE_MASK;
188 val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; 369 val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags;
189 } 370 }
190 371
191 return val; 372 return val;
@@ -196,7 +377,7 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
196 if (val & _PAGE_PRESENT) { 377 if (val & _PAGE_PRESENT) {
197 unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT; 378 unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT;
198 pteval_t flags = val & ~PTE_MASK; 379 pteval_t flags = val & ~PTE_MASK;
199 val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; 380 val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
200 } 381 }
201 382
202 return val; 383 return val;
@@ -229,24 +410,35 @@ pmdval_t xen_pmd_val(pmd_t pmd)
229 return pte_mfn_to_pfn(pmd.pmd); 410 return pte_mfn_to_pfn(pmd.pmd);
230} 411}
231 412
232void xen_set_pud(pud_t *ptr, pud_t val) 413void xen_set_pud_hyper(pud_t *ptr, pud_t val)
233{ 414{
234 struct multicall_space mcs; 415 struct mmu_update u;
235 struct mmu_update *u;
236 416
237 preempt_disable(); 417 preempt_disable();
238 418
239 mcs = xen_mc_entry(sizeof(*u)); 419 xen_mc_batch();
240 u = mcs.args; 420
241 u->ptr = virt_to_machine(ptr).maddr; 421 u.ptr = virt_to_machine(ptr).maddr;
242 u->val = pud_val_ma(val); 422 u.val = pud_val_ma(val);
243 MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); 423 extend_mmu_update(&u);
244 424
245 xen_mc_issue(PARAVIRT_LAZY_MMU); 425 xen_mc_issue(PARAVIRT_LAZY_MMU);
246 426
247 preempt_enable(); 427 preempt_enable();
248} 428}
249 429
430void xen_set_pud(pud_t *ptr, pud_t val)
431{
432 /* If page is not pinned, we can just update the entry
433 directly */
434 if (!page_pinned(ptr)) {
435 *ptr = val;
436 return;
437 }
438
439 xen_set_pud_hyper(ptr, val);
440}
441
250void xen_set_pte(pte_t *ptep, pte_t pte) 442void xen_set_pte(pte_t *ptep, pte_t pte)
251{ 443{
252 ptep->pte_high = pte.pte_high; 444 ptep->pte_high = pte.pte_high;
@@ -268,7 +460,7 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
268 460
269void xen_pmd_clear(pmd_t *pmdp) 461void xen_pmd_clear(pmd_t *pmdp)
270{ 462{
271 xen_set_pmd(pmdp, __pmd(0)); 463 set_pmd(pmdp, __pmd(0));
272} 464}
273 465
274pmd_t xen_make_pmd(pmdval_t pmd) 466pmd_t xen_make_pmd(pmdval_t pmd)
@@ -441,6 +633,29 @@ void xen_pgd_pin(pgd_t *pgd)
441 xen_mc_issue(0); 633 xen_mc_issue(0);
442} 634}
443 635
636/*
637 * On save, we need to pin all pagetables to make sure they get their
638 * mfns turned into pfns. Search the list for any unpinned pgds and pin
639 * them (unpinned pgds are not currently in use, probably because the
640 * process is under construction or destruction).
641 */
642void xen_mm_pin_all(void)
643{
644 unsigned long flags;
645 struct page *page;
646
647 spin_lock_irqsave(&pgd_lock, flags);
648
649 list_for_each_entry(page, &pgd_list, lru) {
650 if (!PagePinned(page)) {
651 xen_pgd_pin((pgd_t *)page_address(page));
652 SetPageSavePinned(page);
653 }
654 }
655
656 spin_unlock_irqrestore(&pgd_lock, flags);
657}
658
444/* The init_mm pagetable is really pinned as soon as its created, but 659/* The init_mm pagetable is really pinned as soon as its created, but
445 that's before we have page structures to store the bits. So do all 660 that's before we have page structures to store the bits. So do all
446 the book-keeping now. */ 661 the book-keeping now. */
@@ -498,6 +713,29 @@ static void xen_pgd_unpin(pgd_t *pgd)
498 xen_mc_issue(0); 713 xen_mc_issue(0);
499} 714}
500 715
716/*
717 * On resume, undo any pinning done at save, so that the rest of the
718 * kernel doesn't see any unexpected pinned pagetables.
719 */
720void xen_mm_unpin_all(void)
721{
722 unsigned long flags;
723 struct page *page;
724
725 spin_lock_irqsave(&pgd_lock, flags);
726
727 list_for_each_entry(page, &pgd_list, lru) {
728 if (PageSavePinned(page)) {
729 BUG_ON(!PagePinned(page));
730 printk("unpinning pinned %p\n", page_address(page));
731 xen_pgd_unpin((pgd_t *)page_address(page));
732 ClearPageSavePinned(page);
733 }
734 }
735
736 spin_unlock_irqrestore(&pgd_lock, flags);
737}
738
501void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) 739void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
502{ 740{
503 spin_lock(&next->page_table_lock); 741 spin_lock(&next->page_table_lock);
@@ -591,7 +829,7 @@ void xen_exit_mmap(struct mm_struct *mm)
591 spin_lock(&mm->page_table_lock); 829 spin_lock(&mm->page_table_lock);
592 830
593 /* pgd may not be pinned in the error exit path of execve */ 831 /* pgd may not be pinned in the error exit path of execve */
594 if (PagePinned(virt_to_page(mm->pgd))) 832 if (page_pinned(mm->pgd))
595 xen_pgd_unpin(mm->pgd); 833 xen_pgd_unpin(mm->pgd);
596 834
597 spin_unlock(&mm->page_table_lock); 835 spin_unlock(&mm->page_table_lock);