diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/x86/xen/mmu.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r-- | arch/x86/xen/mmu.c | 870 |
1 files changed, 438 insertions, 432 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 42086ac406af..0ccccb67a993 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -45,6 +45,8 @@ | |||
45 | #include <linux/vmalloc.h> | 45 | #include <linux/vmalloc.h> |
46 | #include <linux/module.h> | 46 | #include <linux/module.h> |
47 | #include <linux/gfp.h> | 47 | #include <linux/gfp.h> |
48 | #include <linux/memblock.h> | ||
49 | #include <linux/seq_file.h> | ||
48 | 50 | ||
49 | #include <asm/pgtable.h> | 51 | #include <asm/pgtable.h> |
50 | #include <asm/tlbflush.h> | 52 | #include <asm/tlbflush.h> |
@@ -55,6 +57,9 @@ | |||
55 | #include <asm/e820.h> | 57 | #include <asm/e820.h> |
56 | #include <asm/linkage.h> | 58 | #include <asm/linkage.h> |
57 | #include <asm/page.h> | 59 | #include <asm/page.h> |
60 | #include <asm/init.h> | ||
61 | #include <asm/pat.h> | ||
62 | #include <asm/smp.h> | ||
58 | 63 | ||
59 | #include <asm/xen/hypercall.h> | 64 | #include <asm/xen/hypercall.h> |
60 | #include <asm/xen/hypervisor.h> | 65 | #include <asm/xen/hypervisor.h> |
@@ -71,74 +76,19 @@ | |||
71 | #include "mmu.h" | 76 | #include "mmu.h" |
72 | #include "debugfs.h" | 77 | #include "debugfs.h" |
73 | 78 | ||
74 | #define MMU_UPDATE_HISTO 30 | ||
75 | |||
76 | /* | 79 | /* |
77 | * Protects atomic reservation decrease/increase against concurrent increases. | 80 | * Protects atomic reservation decrease/increase against concurrent increases. |
78 | * Also protects non-atomic updates of current_pages and driver_pages, and | 81 | * Also protects non-atomic updates of current_pages and balloon lists. |
79 | * balloon lists. | ||
80 | */ | 82 | */ |
81 | DEFINE_SPINLOCK(xen_reservation_lock); | 83 | DEFINE_SPINLOCK(xen_reservation_lock); |
82 | 84 | ||
83 | #ifdef CONFIG_XEN_DEBUG_FS | ||
84 | |||
85 | static struct { | ||
86 | u32 pgd_update; | ||
87 | u32 pgd_update_pinned; | ||
88 | u32 pgd_update_batched; | ||
89 | |||
90 | u32 pud_update; | ||
91 | u32 pud_update_pinned; | ||
92 | u32 pud_update_batched; | ||
93 | |||
94 | u32 pmd_update; | ||
95 | u32 pmd_update_pinned; | ||
96 | u32 pmd_update_batched; | ||
97 | |||
98 | u32 pte_update; | ||
99 | u32 pte_update_pinned; | ||
100 | u32 pte_update_batched; | ||
101 | |||
102 | u32 mmu_update; | ||
103 | u32 mmu_update_extended; | ||
104 | u32 mmu_update_histo[MMU_UPDATE_HISTO]; | ||
105 | |||
106 | u32 prot_commit; | ||
107 | u32 prot_commit_batched; | ||
108 | |||
109 | u32 set_pte_at; | ||
110 | u32 set_pte_at_batched; | ||
111 | u32 set_pte_at_pinned; | ||
112 | u32 set_pte_at_current; | ||
113 | u32 set_pte_at_kernel; | ||
114 | } mmu_stats; | ||
115 | |||
116 | static u8 zero_stats; | ||
117 | |||
118 | static inline void check_zero(void) | ||
119 | { | ||
120 | if (unlikely(zero_stats)) { | ||
121 | memset(&mmu_stats, 0, sizeof(mmu_stats)); | ||
122 | zero_stats = 0; | ||
123 | } | ||
124 | } | ||
125 | |||
126 | #define ADD_STATS(elem, val) \ | ||
127 | do { check_zero(); mmu_stats.elem += (val); } while(0) | ||
128 | |||
129 | #else /* !CONFIG_XEN_DEBUG_FS */ | ||
130 | |||
131 | #define ADD_STATS(elem, val) do { (void)(val); } while(0) | ||
132 | |||
133 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
134 | |||
135 | |||
136 | /* | 85 | /* |
137 | * Identity map, in addition to plain kernel map. This needs to be | 86 | * Identity map, in addition to plain kernel map. This needs to be |
138 | * large enough to allocate page table pages to allocate the rest. | 87 | * large enough to allocate page table pages to allocate the rest. |
139 | * Each page can map 2MB. | 88 | * Each page can map 2MB. |
140 | */ | 89 | */ |
141 | static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; | 90 | #define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) |
91 | static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); | ||
142 | 92 | ||
143 | #ifdef CONFIG_X86_64 | 93 | #ifdef CONFIG_X86_64 |
144 | /* l3 pud for userspace vsyscall mapping */ | 94 | /* l3 pud for userspace vsyscall mapping */ |
@@ -169,160 +119,6 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ | |||
169 | */ | 119 | */ |
170 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) | 120 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) |
171 | 121 | ||
172 | |||
173 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
174 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) | ||
175 | |||
176 | /* Placeholder for holes in the address space */ | ||
177 | static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data = | ||
178 | { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; | ||
179 | |||
180 | /* Array of pointers to pages containing p2m entries */ | ||
181 | static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data = | ||
182 | { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; | ||
183 | |||
184 | /* Arrays of p2m arrays expressed in mfns used for save/restore */ | ||
185 | static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss; | ||
186 | |||
187 | static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE] | ||
188 | __page_aligned_bss; | ||
189 | |||
190 | static inline unsigned p2m_top_index(unsigned long pfn) | ||
191 | { | ||
192 | BUG_ON(pfn >= MAX_DOMAIN_PAGES); | ||
193 | return pfn / P2M_ENTRIES_PER_PAGE; | ||
194 | } | ||
195 | |||
196 | static inline unsigned p2m_index(unsigned long pfn) | ||
197 | { | ||
198 | return pfn % P2M_ENTRIES_PER_PAGE; | ||
199 | } | ||
200 | |||
201 | /* Build the parallel p2m_top_mfn structures */ | ||
202 | void xen_build_mfn_list_list(void) | ||
203 | { | ||
204 | unsigned pfn, idx; | ||
205 | |||
206 | for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { | ||
207 | unsigned topidx = p2m_top_index(pfn); | ||
208 | |||
209 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); | ||
210 | } | ||
211 | |||
212 | for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { | ||
213 | unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; | ||
214 | p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); | ||
215 | } | ||
216 | } | ||
217 | |||
218 | void xen_setup_mfn_list_list(void) | ||
219 | { | ||
220 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | ||
221 | |||
222 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
223 | virt_to_mfn(p2m_top_mfn_list); | ||
224 | HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages; | ||
225 | } | ||
226 | |||
227 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | ||
228 | void __init xen_build_dynamic_phys_to_machine(void) | ||
229 | { | ||
230 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
231 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
232 | unsigned pfn; | ||
233 | |||
234 | for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { | ||
235 | unsigned topidx = p2m_top_index(pfn); | ||
236 | |||
237 | p2m_top[topidx] = &mfn_list[pfn]; | ||
238 | } | ||
239 | |||
240 | xen_build_mfn_list_list(); | ||
241 | } | ||
242 | |||
243 | unsigned long get_phys_to_machine(unsigned long pfn) | ||
244 | { | ||
245 | unsigned topidx, idx; | ||
246 | |||
247 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) | ||
248 | return INVALID_P2M_ENTRY; | ||
249 | |||
250 | topidx = p2m_top_index(pfn); | ||
251 | idx = p2m_index(pfn); | ||
252 | return p2m_top[topidx][idx]; | ||
253 | } | ||
254 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | ||
255 | |||
256 | /* install a new p2m_top page */ | ||
257 | bool install_p2mtop_page(unsigned long pfn, unsigned long *p) | ||
258 | { | ||
259 | unsigned topidx = p2m_top_index(pfn); | ||
260 | unsigned long **pfnp, *mfnp; | ||
261 | unsigned i; | ||
262 | |||
263 | pfnp = &p2m_top[topidx]; | ||
264 | mfnp = &p2m_top_mfn[topidx]; | ||
265 | |||
266 | for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++) | ||
267 | p[i] = INVALID_P2M_ENTRY; | ||
268 | |||
269 | if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) { | ||
270 | *mfnp = virt_to_mfn(p); | ||
271 | return true; | ||
272 | } | ||
273 | |||
274 | return false; | ||
275 | } | ||
276 | |||
277 | static void alloc_p2m(unsigned long pfn) | ||
278 | { | ||
279 | unsigned long *p; | ||
280 | |||
281 | p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); | ||
282 | BUG_ON(p == NULL); | ||
283 | |||
284 | if (!install_p2mtop_page(pfn, p)) | ||
285 | free_page((unsigned long)p); | ||
286 | } | ||
287 | |||
288 | /* Try to install p2m mapping; fail if intermediate bits missing */ | ||
289 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
290 | { | ||
291 | unsigned topidx, idx; | ||
292 | |||
293 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { | ||
294 | BUG_ON(mfn != INVALID_P2M_ENTRY); | ||
295 | return true; | ||
296 | } | ||
297 | |||
298 | topidx = p2m_top_index(pfn); | ||
299 | if (p2m_top[topidx] == p2m_missing) { | ||
300 | if (mfn == INVALID_P2M_ENTRY) | ||
301 | return true; | ||
302 | return false; | ||
303 | } | ||
304 | |||
305 | idx = p2m_index(pfn); | ||
306 | p2m_top[topidx][idx] = mfn; | ||
307 | |||
308 | return true; | ||
309 | } | ||
310 | |||
311 | void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
312 | { | ||
313 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
314 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
315 | return; | ||
316 | } | ||
317 | |||
318 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | ||
319 | alloc_p2m(pfn); | ||
320 | |||
321 | if (!__set_phys_to_machine(pfn, mfn)) | ||
322 | BUG(); | ||
323 | } | ||
324 | } | ||
325 | |||
326 | unsigned long arbitrary_virt_to_mfn(void *vaddr) | 122 | unsigned long arbitrary_virt_to_mfn(void *vaddr) |
327 | { | 123 | { |
328 | xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); | 124 | xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); |
@@ -351,6 +147,7 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr) | |||
351 | offset = address & ~PAGE_MASK; | 147 | offset = address & ~PAGE_MASK; |
352 | return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); | 148 | return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); |
353 | } | 149 | } |
150 | EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine); | ||
354 | 151 | ||
355 | void make_lowmem_page_readonly(void *vaddr) | 152 | void make_lowmem_page_readonly(void *vaddr) |
356 | { | 153 | { |
@@ -359,7 +156,8 @@ void make_lowmem_page_readonly(void *vaddr) | |||
359 | unsigned int level; | 156 | unsigned int level; |
360 | 157 | ||
361 | pte = lookup_address(address, &level); | 158 | pte = lookup_address(address, &level); |
362 | BUG_ON(pte == NULL); | 159 | if (pte == NULL) |
160 | return; /* vaddr missing */ | ||
363 | 161 | ||
364 | ptev = pte_wrprotect(*pte); | 162 | ptev = pte_wrprotect(*pte); |
365 | 163 | ||
@@ -374,7 +172,8 @@ void make_lowmem_page_readwrite(void *vaddr) | |||
374 | unsigned int level; | 172 | unsigned int level; |
375 | 173 | ||
376 | pte = lookup_address(address, &level); | 174 | pte = lookup_address(address, &level); |
377 | BUG_ON(pte == NULL); | 175 | if (pte == NULL) |
176 | return; /* vaddr missing */ | ||
378 | 177 | ||
379 | ptev = pte_mkwrite(*pte); | 178 | ptev = pte_mkwrite(*pte); |
380 | 179 | ||
@@ -390,12 +189,7 @@ static bool xen_page_pinned(void *ptr) | |||
390 | return PagePinned(page); | 189 | return PagePinned(page); |
391 | } | 190 | } |
392 | 191 | ||
393 | static bool xen_iomap_pte(pte_t pte) | 192 | void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid) |
394 | { | ||
395 | return pte_flags(pte) & _PAGE_IOMAP; | ||
396 | } | ||
397 | |||
398 | static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval) | ||
399 | { | 193 | { |
400 | struct multicall_space mcs; | 194 | struct multicall_space mcs; |
401 | struct mmu_update *u; | 195 | struct mmu_update *u; |
@@ -404,13 +198,14 @@ static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval) | |||
404 | u = mcs.args; | 198 | u = mcs.args; |
405 | 199 | ||
406 | /* ptep might be kmapped when using 32-bit HIGHPTE */ | 200 | /* ptep might be kmapped when using 32-bit HIGHPTE */ |
407 | u->ptr = arbitrary_virt_to_machine(ptep).maddr; | 201 | u->ptr = virt_to_machine(ptep).maddr; |
408 | u->val = pte_val_ma(pteval); | 202 | u->val = pte_val_ma(pteval); |
409 | 203 | ||
410 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_IO); | 204 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid); |
411 | 205 | ||
412 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 206 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
413 | } | 207 | } |
208 | EXPORT_SYMBOL_GPL(xen_set_domain_pte); | ||
414 | 209 | ||
415 | static void xen_extend_mmu_update(const struct mmu_update *update) | 210 | static void xen_extend_mmu_update(const struct mmu_update *update) |
416 | { | 211 | { |
@@ -420,27 +215,17 @@ static void xen_extend_mmu_update(const struct mmu_update *update) | |||
420 | mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); | 215 | mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); |
421 | 216 | ||
422 | if (mcs.mc != NULL) { | 217 | if (mcs.mc != NULL) { |
423 | ADD_STATS(mmu_update_extended, 1); | ||
424 | ADD_STATS(mmu_update_histo[mcs.mc->args[1]], -1); | ||
425 | |||
426 | mcs.mc->args[1]++; | 218 | mcs.mc->args[1]++; |
427 | |||
428 | if (mcs.mc->args[1] < MMU_UPDATE_HISTO) | ||
429 | ADD_STATS(mmu_update_histo[mcs.mc->args[1]], 1); | ||
430 | else | ||
431 | ADD_STATS(mmu_update_histo[0], 1); | ||
432 | } else { | 219 | } else { |
433 | ADD_STATS(mmu_update, 1); | ||
434 | mcs = __xen_mc_entry(sizeof(*u)); | 220 | mcs = __xen_mc_entry(sizeof(*u)); |
435 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | 221 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); |
436 | ADD_STATS(mmu_update_histo[1], 1); | ||
437 | } | 222 | } |
438 | 223 | ||
439 | u = mcs.args; | 224 | u = mcs.args; |
440 | *u = *update; | 225 | *u = *update; |
441 | } | 226 | } |
442 | 227 | ||
443 | void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | 228 | static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) |
444 | { | 229 | { |
445 | struct mmu_update u; | 230 | struct mmu_update u; |
446 | 231 | ||
@@ -453,17 +238,13 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | |||
453 | u.val = pmd_val_ma(val); | 238 | u.val = pmd_val_ma(val); |
454 | xen_extend_mmu_update(&u); | 239 | xen_extend_mmu_update(&u); |
455 | 240 | ||
456 | ADD_STATS(pmd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
457 | |||
458 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 241 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
459 | 242 | ||
460 | preempt_enable(); | 243 | preempt_enable(); |
461 | } | 244 | } |
462 | 245 | ||
463 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | 246 | static void xen_set_pmd(pmd_t *ptr, pmd_t val) |
464 | { | 247 | { |
465 | ADD_STATS(pmd_update, 1); | ||
466 | |||
467 | /* If page is not pinned, we can just update the entry | 248 | /* If page is not pinned, we can just update the entry |
468 | directly */ | 249 | directly */ |
469 | if (!xen_page_pinned(ptr)) { | 250 | if (!xen_page_pinned(ptr)) { |
@@ -471,8 +252,6 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val) | |||
471 | return; | 252 | return; |
472 | } | 253 | } |
473 | 254 | ||
474 | ADD_STATS(pmd_update_pinned, 1); | ||
475 | |||
476 | xen_set_pmd_hyper(ptr, val); | 255 | xen_set_pmd_hyper(ptr, val); |
477 | } | 256 | } |
478 | 257 | ||
@@ -485,35 +264,34 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) | |||
485 | set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); | 264 | set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); |
486 | } | 265 | } |
487 | 266 | ||
488 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 267 | static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) |
489 | pte_t *ptep, pte_t pteval) | ||
490 | { | 268 | { |
491 | if (xen_iomap_pte(pteval)) { | 269 | struct mmu_update u; |
492 | xen_set_iomap_pte(ptep, pteval); | 270 | |
493 | goto out; | 271 | if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) |
494 | } | 272 | return false; |
495 | 273 | ||
496 | ADD_STATS(set_pte_at, 1); | 274 | xen_mc_batch(); |
497 | // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); | ||
498 | ADD_STATS(set_pte_at_current, mm == current->mm); | ||
499 | ADD_STATS(set_pte_at_kernel, mm == &init_mm); | ||
500 | 275 | ||
501 | if (mm == current->mm || mm == &init_mm) { | 276 | u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE; |
502 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { | 277 | u.val = pte_val_ma(pteval); |
503 | struct multicall_space mcs; | 278 | xen_extend_mmu_update(&u); |
504 | mcs = xen_mc_entry(0); | ||
505 | 279 | ||
506 | MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); | 280 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
507 | ADD_STATS(set_pte_at_batched, 1); | 281 | |
508 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 282 | return true; |
509 | goto out; | 283 | } |
510 | } else | ||
511 | if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0) | ||
512 | goto out; | ||
513 | } | ||
514 | xen_set_pte(ptep, pteval); | ||
515 | 284 | ||
516 | out: return; | 285 | static void xen_set_pte(pte_t *ptep, pte_t pteval) |
286 | { | ||
287 | if (!xen_batched_set_pte(ptep, pteval)) | ||
288 | native_set_pte(ptep, pteval); | ||
289 | } | ||
290 | |||
291 | static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
292 | pte_t *ptep, pte_t pteval) | ||
293 | { | ||
294 | xen_set_pte(ptep, pteval); | ||
517 | } | 295 | } |
518 | 296 | ||
519 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, | 297 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, |
@@ -530,13 +308,10 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | |||
530 | 308 | ||
531 | xen_mc_batch(); | 309 | xen_mc_batch(); |
532 | 310 | ||
533 | u.ptr = arbitrary_virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; | 311 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; |
534 | u.val = pte_val_ma(pte); | 312 | u.val = pte_val_ma(pte); |
535 | xen_extend_mmu_update(&u); | 313 | xen_extend_mmu_update(&u); |
536 | 314 | ||
537 | ADD_STATS(prot_commit, 1); | ||
538 | ADD_STATS(prot_commit_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
539 | |||
540 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 315 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
541 | } | 316 | } |
542 | 317 | ||
@@ -557,7 +332,34 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) | |||
557 | if (val & _PAGE_PRESENT) { | 332 | if (val & _PAGE_PRESENT) { |
558 | unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; | 333 | unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; |
559 | pteval_t flags = val & PTE_FLAGS_MASK; | 334 | pteval_t flags = val & PTE_FLAGS_MASK; |
560 | val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; | 335 | unsigned long mfn; |
336 | |||
337 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | ||
338 | mfn = get_phys_to_machine(pfn); | ||
339 | else | ||
340 | mfn = pfn; | ||
341 | /* | ||
342 | * If there's no mfn for the pfn, then just create an | ||
343 | * empty non-present pte. Unfortunately this loses | ||
344 | * information about the original pfn, so | ||
345 | * pte_mfn_to_pfn is asymmetric. | ||
346 | */ | ||
347 | if (unlikely(mfn == INVALID_P2M_ENTRY)) { | ||
348 | mfn = 0; | ||
349 | flags = 0; | ||
350 | } else { | ||
351 | /* | ||
352 | * Paramount to do this test _after_ the | ||
353 | * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY & | ||
354 | * IDENTITY_FRAME_BIT resolves to true. | ||
355 | */ | ||
356 | mfn &= ~FOREIGN_FRAME_BIT; | ||
357 | if (mfn & IDENTITY_FRAME_BIT) { | ||
358 | mfn &= ~IDENTITY_FRAME_BIT; | ||
359 | flags |= _PAGE_IOMAP; | ||
360 | } | ||
361 | } | ||
362 | val = ((pteval_t)mfn << PAGE_SHIFT) | flags; | ||
561 | } | 363 | } |
562 | 364 | ||
563 | return val; | 365 | return val; |
@@ -577,25 +379,71 @@ static pteval_t iomap_pte(pteval_t val) | |||
577 | return val; | 379 | return val; |
578 | } | 380 | } |
579 | 381 | ||
580 | pteval_t xen_pte_val(pte_t pte) | 382 | static pteval_t xen_pte_val(pte_t pte) |
581 | { | 383 | { |
582 | if (xen_initial_domain() && (pte.pte & _PAGE_IOMAP)) | 384 | pteval_t pteval = pte.pte; |
583 | return pte.pte; | ||
584 | 385 | ||
585 | return pte_mfn_to_pfn(pte.pte); | 386 | /* If this is a WC pte, convert back from Xen WC to Linux WC */ |
387 | if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) { | ||
388 | WARN_ON(!pat_enabled); | ||
389 | pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT; | ||
390 | } | ||
391 | |||
392 | if (xen_initial_domain() && (pteval & _PAGE_IOMAP)) | ||
393 | return pteval; | ||
394 | |||
395 | return pte_mfn_to_pfn(pteval); | ||
586 | } | 396 | } |
587 | PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); | 397 | PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); |
588 | 398 | ||
589 | pgdval_t xen_pgd_val(pgd_t pgd) | 399 | static pgdval_t xen_pgd_val(pgd_t pgd) |
590 | { | 400 | { |
591 | return pte_mfn_to_pfn(pgd.pgd); | 401 | return pte_mfn_to_pfn(pgd.pgd); |
592 | } | 402 | } |
593 | PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val); | 403 | PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val); |
594 | 404 | ||
595 | pte_t xen_make_pte(pteval_t pte) | 405 | /* |
406 | * Xen's PAT setup is part of its ABI, though I assume entries 6 & 7 | ||
407 | * are reserved for now, to correspond to the Intel-reserved PAT | ||
408 | * types. | ||
409 | * | ||
410 | * We expect Linux's PAT set as follows: | ||
411 | * | ||
412 | * Idx PTE flags Linux Xen Default | ||
413 | * 0 WB WB WB | ||
414 | * 1 PWT WC WT WT | ||
415 | * 2 PCD UC- UC- UC- | ||
416 | * 3 PCD PWT UC UC UC | ||
417 | * 4 PAT WB WC WB | ||
418 | * 5 PAT PWT WC WP WT | ||
419 | * 6 PAT PCD UC- UC UC- | ||
420 | * 7 PAT PCD PWT UC UC UC | ||
421 | */ | ||
422 | |||
423 | void xen_set_pat(u64 pat) | ||
424 | { | ||
425 | /* We expect Linux to use a PAT setting of | ||
426 | * UC UC- WC WB (ignoring the PAT flag) */ | ||
427 | WARN_ON(pat != 0x0007010600070106ull); | ||
428 | } | ||
429 | |||
430 | static pte_t xen_make_pte(pteval_t pte) | ||
596 | { | 431 | { |
597 | phys_addr_t addr = (pte & PTE_PFN_MASK); | 432 | phys_addr_t addr = (pte & PTE_PFN_MASK); |
598 | 433 | ||
434 | /* If Linux is trying to set a WC pte, then map to the Xen WC. | ||
435 | * If _PAGE_PAT is set, then it probably means it is really | ||
436 | * _PAGE_PSE, so avoid fiddling with the PAT mapping and hope | ||
437 | * things work out OK... | ||
438 | * | ||
439 | * (We should never see kernel mappings with _PAGE_PSE set, | ||
440 | * but we could see hugetlbfs mappings, I think.). | ||
441 | */ | ||
442 | if (pat_enabled && !WARN_ON(pte & _PAGE_PAT)) { | ||
443 | if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT) | ||
444 | pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT; | ||
445 | } | ||
446 | |||
599 | /* | 447 | /* |
600 | * Unprivileged domains are allowed to do IOMAPpings for | 448 | * Unprivileged domains are allowed to do IOMAPpings for |
601 | * PCI passthrough, but not map ISA space. The ISA | 449 | * PCI passthrough, but not map ISA space. The ISA |
@@ -614,20 +462,55 @@ pte_t xen_make_pte(pteval_t pte) | |||
614 | } | 462 | } |
615 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); | 463 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); |
616 | 464 | ||
617 | pgd_t xen_make_pgd(pgdval_t pgd) | 465 | #ifdef CONFIG_XEN_DEBUG |
466 | pte_t xen_make_pte_debug(pteval_t pte) | ||
467 | { | ||
468 | phys_addr_t addr = (pte & PTE_PFN_MASK); | ||
469 | phys_addr_t other_addr; | ||
470 | bool io_page = false; | ||
471 | pte_t _pte; | ||
472 | |||
473 | if (pte & _PAGE_IOMAP) | ||
474 | io_page = true; | ||
475 | |||
476 | _pte = xen_make_pte(pte); | ||
477 | |||
478 | if (!addr) | ||
479 | return _pte; | ||
480 | |||
481 | if (io_page && | ||
482 | (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { | ||
483 | other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT; | ||
484 | WARN_ONCE(addr != other_addr, | ||
485 | "0x%lx is using VM_IO, but it is 0x%lx!\n", | ||
486 | (unsigned long)addr, (unsigned long)other_addr); | ||
487 | } else { | ||
488 | pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP; | ||
489 | other_addr = (_pte.pte & PTE_PFN_MASK); | ||
490 | WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set), | ||
491 | "0x%lx is missing VM_IO (and wasn't fixed)!\n", | ||
492 | (unsigned long)addr); | ||
493 | } | ||
494 | |||
495 | return _pte; | ||
496 | } | ||
497 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug); | ||
498 | #endif | ||
499 | |||
500 | static pgd_t xen_make_pgd(pgdval_t pgd) | ||
618 | { | 501 | { |
619 | pgd = pte_pfn_to_mfn(pgd); | 502 | pgd = pte_pfn_to_mfn(pgd); |
620 | return native_make_pgd(pgd); | 503 | return native_make_pgd(pgd); |
621 | } | 504 | } |
622 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); | 505 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); |
623 | 506 | ||
624 | pmdval_t xen_pmd_val(pmd_t pmd) | 507 | static pmdval_t xen_pmd_val(pmd_t pmd) |
625 | { | 508 | { |
626 | return pte_mfn_to_pfn(pmd.pmd); | 509 | return pte_mfn_to_pfn(pmd.pmd); |
627 | } | 510 | } |
628 | PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val); | 511 | PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val); |
629 | 512 | ||
630 | void xen_set_pud_hyper(pud_t *ptr, pud_t val) | 513 | static void xen_set_pud_hyper(pud_t *ptr, pud_t val) |
631 | { | 514 | { |
632 | struct mmu_update u; | 515 | struct mmu_update u; |
633 | 516 | ||
@@ -640,17 +523,13 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val) | |||
640 | u.val = pud_val_ma(val); | 523 | u.val = pud_val_ma(val); |
641 | xen_extend_mmu_update(&u); | 524 | xen_extend_mmu_update(&u); |
642 | 525 | ||
643 | ADD_STATS(pud_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
644 | |||
645 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 526 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
646 | 527 | ||
647 | preempt_enable(); | 528 | preempt_enable(); |
648 | } | 529 | } |
649 | 530 | ||
650 | void xen_set_pud(pud_t *ptr, pud_t val) | 531 | static void xen_set_pud(pud_t *ptr, pud_t val) |
651 | { | 532 | { |
652 | ADD_STATS(pud_update, 1); | ||
653 | |||
654 | /* If page is not pinned, we can just update the entry | 533 | /* If page is not pinned, we can just update the entry |
655 | directly */ | 534 | directly */ |
656 | if (!xen_page_pinned(ptr)) { | 535 | if (!xen_page_pinned(ptr)) { |
@@ -658,56 +537,28 @@ void xen_set_pud(pud_t *ptr, pud_t val) | |||
658 | return; | 537 | return; |
659 | } | 538 | } |
660 | 539 | ||
661 | ADD_STATS(pud_update_pinned, 1); | ||
662 | |||
663 | xen_set_pud_hyper(ptr, val); | 540 | xen_set_pud_hyper(ptr, val); |
664 | } | 541 | } |
665 | 542 | ||
666 | void xen_set_pte(pte_t *ptep, pte_t pte) | ||
667 | { | ||
668 | if (xen_iomap_pte(pte)) { | ||
669 | xen_set_iomap_pte(ptep, pte); | ||
670 | return; | ||
671 | } | ||
672 | |||
673 | ADD_STATS(pte_update, 1); | ||
674 | // ADD_STATS(pte_update_pinned, xen_page_pinned(ptep)); | ||
675 | ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
676 | |||
677 | #ifdef CONFIG_X86_PAE | 543 | #ifdef CONFIG_X86_PAE |
678 | ptep->pte_high = pte.pte_high; | 544 | static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) |
679 | smp_wmb(); | ||
680 | ptep->pte_low = pte.pte_low; | ||
681 | #else | ||
682 | *ptep = pte; | ||
683 | #endif | ||
684 | } | ||
685 | |||
686 | #ifdef CONFIG_X86_PAE | ||
687 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte) | ||
688 | { | 545 | { |
689 | if (xen_iomap_pte(pte)) { | ||
690 | xen_set_iomap_pte(ptep, pte); | ||
691 | return; | ||
692 | } | ||
693 | |||
694 | set_64bit((u64 *)ptep, native_pte_val(pte)); | 546 | set_64bit((u64 *)ptep, native_pte_val(pte)); |
695 | } | 547 | } |
696 | 548 | ||
697 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 549 | static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
698 | { | 550 | { |
699 | ptep->pte_low = 0; | 551 | if (!xen_batched_set_pte(ptep, native_make_pte(0))) |
700 | smp_wmb(); /* make sure low gets written first */ | 552 | native_pte_clear(mm, addr, ptep); |
701 | ptep->pte_high = 0; | ||
702 | } | 553 | } |
703 | 554 | ||
704 | void xen_pmd_clear(pmd_t *pmdp) | 555 | static void xen_pmd_clear(pmd_t *pmdp) |
705 | { | 556 | { |
706 | set_pmd(pmdp, __pmd(0)); | 557 | set_pmd(pmdp, __pmd(0)); |
707 | } | 558 | } |
708 | #endif /* CONFIG_X86_PAE */ | 559 | #endif /* CONFIG_X86_PAE */ |
709 | 560 | ||
710 | pmd_t xen_make_pmd(pmdval_t pmd) | 561 | static pmd_t xen_make_pmd(pmdval_t pmd) |
711 | { | 562 | { |
712 | pmd = pte_pfn_to_mfn(pmd); | 563 | pmd = pte_pfn_to_mfn(pmd); |
713 | return native_make_pmd(pmd); | 564 | return native_make_pmd(pmd); |
@@ -715,13 +566,13 @@ pmd_t xen_make_pmd(pmdval_t pmd) | |||
715 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); | 566 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); |
716 | 567 | ||
717 | #if PAGETABLE_LEVELS == 4 | 568 | #if PAGETABLE_LEVELS == 4 |
718 | pudval_t xen_pud_val(pud_t pud) | 569 | static pudval_t xen_pud_val(pud_t pud) |
719 | { | 570 | { |
720 | return pte_mfn_to_pfn(pud.pud); | 571 | return pte_mfn_to_pfn(pud.pud); |
721 | } | 572 | } |
722 | PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); | 573 | PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); |
723 | 574 | ||
724 | pud_t xen_make_pud(pudval_t pud) | 575 | static pud_t xen_make_pud(pudval_t pud) |
725 | { | 576 | { |
726 | pud = pte_pfn_to_mfn(pud); | 577 | pud = pte_pfn_to_mfn(pud); |
727 | 578 | ||
@@ -729,7 +580,7 @@ pud_t xen_make_pud(pudval_t pud) | |||
729 | } | 580 | } |
730 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud); | 581 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud); |
731 | 582 | ||
732 | pgd_t *xen_get_user_pgd(pgd_t *pgd) | 583 | static pgd_t *xen_get_user_pgd(pgd_t *pgd) |
733 | { | 584 | { |
734 | pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK); | 585 | pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK); |
735 | unsigned offset = pgd - pgd_page; | 586 | unsigned offset = pgd - pgd_page; |
@@ -761,7 +612,7 @@ static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | |||
761 | * 2. It is always pinned | 612 | * 2. It is always pinned |
762 | * 3. It has no user pagetable attached to it | 613 | * 3. It has no user pagetable attached to it |
763 | */ | 614 | */ |
764 | void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | 615 | static void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) |
765 | { | 616 | { |
766 | preempt_disable(); | 617 | preempt_disable(); |
767 | 618 | ||
@@ -774,12 +625,10 @@ void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | |||
774 | preempt_enable(); | 625 | preempt_enable(); |
775 | } | 626 | } |
776 | 627 | ||
777 | void xen_set_pgd(pgd_t *ptr, pgd_t val) | 628 | static void xen_set_pgd(pgd_t *ptr, pgd_t val) |
778 | { | 629 | { |
779 | pgd_t *user_ptr = xen_get_user_pgd(ptr); | 630 | pgd_t *user_ptr = xen_get_user_pgd(ptr); |
780 | 631 | ||
781 | ADD_STATS(pgd_update, 1); | ||
782 | |||
783 | /* If page is not pinned, we can just update the entry | 632 | /* If page is not pinned, we can just update the entry |
784 | directly */ | 633 | directly */ |
785 | if (!xen_page_pinned(ptr)) { | 634 | if (!xen_page_pinned(ptr)) { |
@@ -791,9 +640,6 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val) | |||
791 | return; | 640 | return; |
792 | } | 641 | } |
793 | 642 | ||
794 | ADD_STATS(pgd_update_pinned, 1); | ||
795 | ADD_STATS(pgd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU); | ||
796 | |||
797 | /* If it's pinned, then we can at least batch the kernel and | 643 | /* If it's pinned, then we can at least batch the kernel and |
798 | user updates together. */ | 644 | user updates together. */ |
799 | xen_mc_batch(); | 645 | xen_mc_batch(); |
@@ -1068,10 +914,9 @@ static void xen_pgd_pin(struct mm_struct *mm) | |||
1068 | */ | 914 | */ |
1069 | void xen_mm_pin_all(void) | 915 | void xen_mm_pin_all(void) |
1070 | { | 916 | { |
1071 | unsigned long flags; | ||
1072 | struct page *page; | 917 | struct page *page; |
1073 | 918 | ||
1074 | spin_lock_irqsave(&pgd_lock, flags); | 919 | spin_lock(&pgd_lock); |
1075 | 920 | ||
1076 | list_for_each_entry(page, &pgd_list, lru) { | 921 | list_for_each_entry(page, &pgd_list, lru) { |
1077 | if (!PagePinned(page)) { | 922 | if (!PagePinned(page)) { |
@@ -1080,7 +925,7 @@ void xen_mm_pin_all(void) | |||
1080 | } | 925 | } |
1081 | } | 926 | } |
1082 | 927 | ||
1083 | spin_unlock_irqrestore(&pgd_lock, flags); | 928 | spin_unlock(&pgd_lock); |
1084 | } | 929 | } |
1085 | 930 | ||
1086 | /* | 931 | /* |
@@ -1088,7 +933,7 @@ void xen_mm_pin_all(void) | |||
1088 | * that's before we have page structures to store the bits. So do all | 933 | * that's before we have page structures to store the bits. So do all |
1089 | * the book-keeping now. | 934 | * the book-keeping now. |
1090 | */ | 935 | */ |
1091 | static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page, | 936 | static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, |
1092 | enum pt_level level) | 937 | enum pt_level level) |
1093 | { | 938 | { |
1094 | SetPagePinned(page); | 939 | SetPagePinned(page); |
@@ -1181,10 +1026,9 @@ static void xen_pgd_unpin(struct mm_struct *mm) | |||
1181 | */ | 1026 | */ |
1182 | void xen_mm_unpin_all(void) | 1027 | void xen_mm_unpin_all(void) |
1183 | { | 1028 | { |
1184 | unsigned long flags; | ||
1185 | struct page *page; | 1029 | struct page *page; |
1186 | 1030 | ||
1187 | spin_lock_irqsave(&pgd_lock, flags); | 1031 | spin_lock(&pgd_lock); |
1188 | 1032 | ||
1189 | list_for_each_entry(page, &pgd_list, lru) { | 1033 | list_for_each_entry(page, &pgd_list, lru) { |
1190 | if (PageSavePinned(page)) { | 1034 | if (PageSavePinned(page)) { |
@@ -1194,17 +1038,17 @@ void xen_mm_unpin_all(void) | |||
1194 | } | 1038 | } |
1195 | } | 1039 | } |
1196 | 1040 | ||
1197 | spin_unlock_irqrestore(&pgd_lock, flags); | 1041 | spin_unlock(&pgd_lock); |
1198 | } | 1042 | } |
1199 | 1043 | ||
1200 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) | 1044 | static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) |
1201 | { | 1045 | { |
1202 | spin_lock(&next->page_table_lock); | 1046 | spin_lock(&next->page_table_lock); |
1203 | xen_pgd_pin(next); | 1047 | xen_pgd_pin(next); |
1204 | spin_unlock(&next->page_table_lock); | 1048 | spin_unlock(&next->page_table_lock); |
1205 | } | 1049 | } |
1206 | 1050 | ||
1207 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) | 1051 | static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) |
1208 | { | 1052 | { |
1209 | spin_lock(&mm->page_table_lock); | 1053 | spin_lock(&mm->page_table_lock); |
1210 | xen_pgd_pin(mm); | 1054 | xen_pgd_pin(mm); |
@@ -1222,7 +1066,7 @@ static void drop_other_mm_ref(void *info) | |||
1222 | 1066 | ||
1223 | active_mm = percpu_read(cpu_tlbstate.active_mm); | 1067 | active_mm = percpu_read(cpu_tlbstate.active_mm); |
1224 | 1068 | ||
1225 | if (active_mm == mm) | 1069 | if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK) |
1226 | leave_mm(smp_processor_id()); | 1070 | leave_mm(smp_processor_id()); |
1227 | 1071 | ||
1228 | /* If this cpu still has a stale cr3 reference, then make sure | 1072 | /* If this cpu still has a stale cr3 reference, then make sure |
@@ -1291,7 +1135,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm) | |||
1291 | * pagetable because of lazy tlb flushing. This means we need need to | 1135 | * pagetable because of lazy tlb flushing. This means we need need to |
1292 | * switch all CPUs off this pagetable before we can unpin it. | 1136 | * switch all CPUs off this pagetable before we can unpin it. |
1293 | */ | 1137 | */ |
1294 | void xen_exit_mmap(struct mm_struct *mm) | 1138 | static void xen_exit_mmap(struct mm_struct *mm) |
1295 | { | 1139 | { |
1296 | get_cpu(); /* make sure we don't move around */ | 1140 | get_cpu(); /* make sure we don't move around */ |
1297 | xen_drop_mm_ref(mm); | 1141 | xen_drop_mm_ref(mm); |
@@ -1306,13 +1150,27 @@ void xen_exit_mmap(struct mm_struct *mm) | |||
1306 | spin_unlock(&mm->page_table_lock); | 1150 | spin_unlock(&mm->page_table_lock); |
1307 | } | 1151 | } |
1308 | 1152 | ||
1309 | static __init void xen_pagetable_setup_start(pgd_t *base) | 1153 | static void __init xen_pagetable_setup_start(pgd_t *base) |
1310 | { | 1154 | { |
1311 | } | 1155 | } |
1312 | 1156 | ||
1157 | static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) | ||
1158 | { | ||
1159 | /* reserve the range used */ | ||
1160 | native_pagetable_reserve(start, end); | ||
1161 | |||
1162 | /* set as RW the rest */ | ||
1163 | printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end, | ||
1164 | PFN_PHYS(pgt_buf_top)); | ||
1165 | while (end < PFN_PHYS(pgt_buf_top)) { | ||
1166 | make_lowmem_page_readwrite(__va(end)); | ||
1167 | end += PAGE_SIZE; | ||
1168 | } | ||
1169 | } | ||
1170 | |||
1313 | static void xen_post_allocator_init(void); | 1171 | static void xen_post_allocator_init(void); |
1314 | 1172 | ||
1315 | static __init void xen_pagetable_setup_done(pgd_t *base) | 1173 | static void __init xen_pagetable_setup_done(pgd_t *base) |
1316 | { | 1174 | { |
1317 | xen_setup_shared_info(); | 1175 | xen_setup_shared_info(); |
1318 | xen_post_allocator_init(); | 1176 | xen_post_allocator_init(); |
@@ -1374,7 +1232,11 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, | |||
1374 | { | 1232 | { |
1375 | struct { | 1233 | struct { |
1376 | struct mmuext_op op; | 1234 | struct mmuext_op op; |
1235 | #ifdef CONFIG_SMP | ||
1236 | DECLARE_BITMAP(mask, num_processors); | ||
1237 | #else | ||
1377 | DECLARE_BITMAP(mask, NR_CPUS); | 1238 | DECLARE_BITMAP(mask, NR_CPUS); |
1239 | #endif | ||
1378 | } *args; | 1240 | } *args; |
1379 | struct multicall_space mcs; | 1241 | struct multicall_space mcs; |
1380 | 1242 | ||
@@ -1509,7 +1371,7 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) | |||
1509 | } | 1371 | } |
1510 | 1372 | ||
1511 | #ifdef CONFIG_X86_32 | 1373 | #ifdef CONFIG_X86_32 |
1512 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | 1374 | static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) |
1513 | { | 1375 | { |
1514 | /* If there's an existing pte, then don't allow _PAGE_RW to be set */ | 1376 | /* If there's an existing pte, then don't allow _PAGE_RW to be set */ |
1515 | if (pte_val_ma(*ptep) & _PAGE_PRESENT) | 1377 | if (pte_val_ma(*ptep) & _PAGE_PRESENT) |
@@ -1518,16 +1380,34 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1518 | 1380 | ||
1519 | return pte; | 1381 | return pte; |
1520 | } | 1382 | } |
1383 | #else /* CONFIG_X86_64 */ | ||
1384 | static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) | ||
1385 | { | ||
1386 | unsigned long pfn = pte_pfn(pte); | ||
1387 | |||
1388 | /* | ||
1389 | * If the new pfn is within the range of the newly allocated | ||
1390 | * kernel pagetable, and it isn't being mapped into an | ||
1391 | * early_ioremap fixmap slot as a freshly allocated page, make sure | ||
1392 | * it is RO. | ||
1393 | */ | ||
1394 | if (((!is_early_ioremap_ptep(ptep) && | ||
1395 | pfn >= pgt_buf_start && pfn < pgt_buf_top)) || | ||
1396 | (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) | ||
1397 | pte = pte_wrprotect(pte); | ||
1398 | |||
1399 | return pte; | ||
1400 | } | ||
1401 | #endif /* CONFIG_X86_64 */ | ||
1521 | 1402 | ||
1522 | /* Init-time set_pte while constructing initial pagetables, which | 1403 | /* Init-time set_pte while constructing initial pagetables, which |
1523 | doesn't allow RO pagetable pages to be remapped RW */ | 1404 | doesn't allow RO pagetable pages to be remapped RW */ |
1524 | static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) | 1405 | static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) |
1525 | { | 1406 | { |
1526 | pte = mask_rw_pte(ptep, pte); | 1407 | pte = mask_rw_pte(ptep, pte); |
1527 | 1408 | ||
1528 | xen_set_pte(ptep, pte); | 1409 | xen_set_pte(ptep, pte); |
1529 | } | 1410 | } |
1530 | #endif | ||
1531 | 1411 | ||
1532 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | 1412 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) |
1533 | { | 1413 | { |
@@ -1540,7 +1420,7 @@ static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | |||
1540 | 1420 | ||
1541 | /* Early in boot, while setting up the initial pagetable, assume | 1421 | /* Early in boot, while setting up the initial pagetable, assume |
1542 | everything is pinned. */ | 1422 | everything is pinned. */ |
1543 | static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) | 1423 | static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) |
1544 | { | 1424 | { |
1545 | #ifdef CONFIG_FLATMEM | 1425 | #ifdef CONFIG_FLATMEM |
1546 | BUG_ON(mem_map); /* should only be used early */ | 1426 | BUG_ON(mem_map); /* should only be used early */ |
@@ -1550,7 +1430,7 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) | |||
1550 | } | 1430 | } |
1551 | 1431 | ||
1552 | /* Used for pmd and pud */ | 1432 | /* Used for pmd and pud */ |
1553 | static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) | 1433 | static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) |
1554 | { | 1434 | { |
1555 | #ifdef CONFIG_FLATMEM | 1435 | #ifdef CONFIG_FLATMEM |
1556 | BUG_ON(mem_map); /* should only be used early */ | 1436 | BUG_ON(mem_map); /* should only be used early */ |
@@ -1560,13 +1440,13 @@ static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) | |||
1560 | 1440 | ||
1561 | /* Early release_pte assumes that all pts are pinned, since there's | 1441 | /* Early release_pte assumes that all pts are pinned, since there's |
1562 | only init_mm and anything attached to that is pinned. */ | 1442 | only init_mm and anything attached to that is pinned. */ |
1563 | static __init void xen_release_pte_init(unsigned long pfn) | 1443 | static void __init xen_release_pte_init(unsigned long pfn) |
1564 | { | 1444 | { |
1565 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); | 1445 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); |
1566 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 1446 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
1567 | } | 1447 | } |
1568 | 1448 | ||
1569 | static __init void xen_release_pmd_init(unsigned long pfn) | 1449 | static void __init xen_release_pmd_init(unsigned long pfn) |
1570 | { | 1450 | { |
1571 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 1451 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
1572 | } | 1452 | } |
@@ -1682,6 +1562,7 @@ static void *m2v(phys_addr_t maddr) | |||
1682 | return __ka(m2p(maddr)); | 1562 | return __ka(m2p(maddr)); |
1683 | } | 1563 | } |
1684 | 1564 | ||
1565 | /* Set the page permissions on an identity-mapped pages */ | ||
1685 | static void set_page_prot(void *addr, pgprot_t prot) | 1566 | static void set_page_prot(void *addr, pgprot_t prot) |
1686 | { | 1567 | { |
1687 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; | 1568 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; |
@@ -1691,12 +1572,15 @@ static void set_page_prot(void *addr, pgprot_t prot) | |||
1691 | BUG(); | 1572 | BUG(); |
1692 | } | 1573 | } |
1693 | 1574 | ||
1694 | static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | 1575 | static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) |
1695 | { | 1576 | { |
1696 | unsigned pmdidx, pteidx; | 1577 | unsigned pmdidx, pteidx; |
1697 | unsigned ident_pte; | 1578 | unsigned ident_pte; |
1698 | unsigned long pfn; | 1579 | unsigned long pfn; |
1699 | 1580 | ||
1581 | level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES, | ||
1582 | PAGE_SIZE); | ||
1583 | |||
1700 | ident_pte = 0; | 1584 | ident_pte = 0; |
1701 | pfn = 0; | 1585 | pfn = 0; |
1702 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { | 1586 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { |
@@ -1707,7 +1591,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1707 | pte_page = m2v(pmd[pmdidx].pmd); | 1591 | pte_page = m2v(pmd[pmdidx].pmd); |
1708 | else { | 1592 | else { |
1709 | /* Check for free pte pages */ | 1593 | /* Check for free pte pages */ |
1710 | if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) | 1594 | if (ident_pte == LEVEL1_IDENT_ENTRIES) |
1711 | break; | 1595 | break; |
1712 | 1596 | ||
1713 | pte_page = &level1_ident_pgt[ident_pte]; | 1597 | pte_page = &level1_ident_pgt[ident_pte]; |
@@ -1720,8 +1604,10 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1720 | for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { | 1604 | for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { |
1721 | pte_t pte; | 1605 | pte_t pte; |
1722 | 1606 | ||
1607 | #ifdef CONFIG_X86_32 | ||
1723 | if (pfn > max_pfn_mapped) | 1608 | if (pfn > max_pfn_mapped) |
1724 | max_pfn_mapped = pfn; | 1609 | max_pfn_mapped = pfn; |
1610 | #endif | ||
1725 | 1611 | ||
1726 | if (!pte_none(pte_page[pteidx])) | 1612 | if (!pte_none(pte_page[pteidx])) |
1727 | continue; | 1613 | continue; |
@@ -1737,6 +1623,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1737 | set_page_prot(pmd, PAGE_KERNEL_RO); | 1623 | set_page_prot(pmd, PAGE_KERNEL_RO); |
1738 | } | 1624 | } |
1739 | 1625 | ||
1626 | void __init xen_setup_machphys_mapping(void) | ||
1627 | { | ||
1628 | struct xen_machphys_mapping mapping; | ||
1629 | unsigned long machine_to_phys_nr_ents; | ||
1630 | |||
1631 | if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { | ||
1632 | machine_to_phys_mapping = (unsigned long *)mapping.v_start; | ||
1633 | machine_to_phys_nr_ents = mapping.max_mfn + 1; | ||
1634 | } else { | ||
1635 | machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; | ||
1636 | } | ||
1637 | machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); | ||
1638 | } | ||
1639 | |||
1740 | #ifdef CONFIG_X86_64 | 1640 | #ifdef CONFIG_X86_64 |
1741 | static void convert_pfn_mfn(void *v) | 1641 | static void convert_pfn_mfn(void *v) |
1742 | { | 1642 | { |
@@ -1750,7 +1650,7 @@ static void convert_pfn_mfn(void *v) | |||
1750 | } | 1650 | } |
1751 | 1651 | ||
1752 | /* | 1652 | /* |
1753 | * Set up the inital kernel pagetable. | 1653 | * Set up the initial kernel pagetable. |
1754 | * | 1654 | * |
1755 | * We can construct this by grafting the Xen provided pagetable into | 1655 | * We can construct this by grafting the Xen provided pagetable into |
1756 | * head_64.S's preconstructed pagetables. We copy the Xen L2's into | 1656 | * head_64.S's preconstructed pagetables. We copy the Xen L2's into |
@@ -1760,12 +1660,18 @@ static void convert_pfn_mfn(void *v) | |||
1760 | * of the physical mapping once some sort of allocator has been set | 1660 | * of the physical mapping once some sort of allocator has been set |
1761 | * up. | 1661 | * up. |
1762 | */ | 1662 | */ |
1763 | __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | 1663 | pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, |
1764 | unsigned long max_pfn) | 1664 | unsigned long max_pfn) |
1765 | { | 1665 | { |
1766 | pud_t *l3; | 1666 | pud_t *l3; |
1767 | pmd_t *l2; | 1667 | pmd_t *l2; |
1768 | 1668 | ||
1669 | /* max_pfn_mapped is the last pfn mapped in the initial memory | ||
1670 | * mappings. Considering that on Xen after the kernel mappings we | ||
1671 | * have the mappings of some pages that don't exist in pfn space, we | ||
1672 | * set max_pfn_mapped to the last real pfn mapped. */ | ||
1673 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); | ||
1674 | |||
1769 | /* Zap identity mapping */ | 1675 | /* Zap identity mapping */ |
1770 | init_level4_pgt[0] = __pgd(0); | 1676 | init_level4_pgt[0] = __pgd(0); |
1771 | 1677 | ||
@@ -1814,7 +1720,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1814 | __xen_write_cr3(true, __pa(pgd)); | 1720 | __xen_write_cr3(true, __pa(pgd)); |
1815 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 1721 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
1816 | 1722 | ||
1817 | reserve_early(__pa(xen_start_info->pt_base), | 1723 | memblock_x86_reserve_range(__pa(xen_start_info->pt_base), |
1818 | __pa(xen_start_info->pt_base + | 1724 | __pa(xen_start_info->pt_base + |
1819 | xen_start_info->nr_pt_frames * PAGE_SIZE), | 1725 | xen_start_info->nr_pt_frames * PAGE_SIZE), |
1820 | "XEN PAGETABLES"); | 1726 | "XEN PAGETABLES"); |
@@ -1822,45 +1728,88 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1822 | return pgd; | 1728 | return pgd; |
1823 | } | 1729 | } |
1824 | #else /* !CONFIG_X86_64 */ | 1730 | #else /* !CONFIG_X86_64 */ |
1825 | static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; | 1731 | static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); |
1732 | static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); | ||
1733 | |||
1734 | static void __init xen_write_cr3_init(unsigned long cr3) | ||
1735 | { | ||
1736 | unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); | ||
1737 | |||
1738 | BUG_ON(read_cr3() != __pa(initial_page_table)); | ||
1739 | BUG_ON(cr3 != __pa(swapper_pg_dir)); | ||
1740 | |||
1741 | /* | ||
1742 | * We are switching to swapper_pg_dir for the first time (from | ||
1743 | * initial_page_table) and therefore need to mark that page | ||
1744 | * read-only and then pin it. | ||
1745 | * | ||
1746 | * Xen disallows sharing of kernel PMDs for PAE | ||
1747 | * guests. Therefore we must copy the kernel PMD from | ||
1748 | * initial_page_table into a new kernel PMD to be used in | ||
1749 | * swapper_pg_dir. | ||
1750 | */ | ||
1751 | swapper_kernel_pmd = | ||
1752 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); | ||
1753 | memcpy(swapper_kernel_pmd, initial_kernel_pmd, | ||
1754 | sizeof(pmd_t) * PTRS_PER_PMD); | ||
1755 | swapper_pg_dir[KERNEL_PGD_BOUNDARY] = | ||
1756 | __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); | ||
1757 | set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); | ||
1758 | |||
1759 | set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); | ||
1760 | xen_write_cr3(cr3); | ||
1761 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn); | ||
1762 | |||
1763 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, | ||
1764 | PFN_DOWN(__pa(initial_page_table))); | ||
1765 | set_page_prot(initial_page_table, PAGE_KERNEL); | ||
1766 | set_page_prot(initial_kernel_pmd, PAGE_KERNEL); | ||
1826 | 1767 | ||
1827 | __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | 1768 | pv_mmu_ops.write_cr3 = &xen_write_cr3; |
1769 | } | ||
1770 | |||
1771 | pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, | ||
1828 | unsigned long max_pfn) | 1772 | unsigned long max_pfn) |
1829 | { | 1773 | { |
1830 | pmd_t *kernel_pmd; | 1774 | pmd_t *kernel_pmd; |
1831 | 1775 | ||
1776 | initial_kernel_pmd = | ||
1777 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); | ||
1778 | |||
1832 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + | 1779 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + |
1833 | xen_start_info->nr_pt_frames * PAGE_SIZE + | 1780 | xen_start_info->nr_pt_frames * PAGE_SIZE + |
1834 | 512*1024); | 1781 | 512*1024); |
1835 | 1782 | ||
1836 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); | 1783 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); |
1837 | memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); | 1784 | memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); |
1838 | 1785 | ||
1839 | xen_map_identity_early(level2_kernel_pgt, max_pfn); | 1786 | xen_map_identity_early(initial_kernel_pmd, max_pfn); |
1840 | 1787 | ||
1841 | memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); | 1788 | memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); |
1842 | set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], | 1789 | initial_page_table[KERNEL_PGD_BOUNDARY] = |
1843 | __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); | 1790 | __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); |
1844 | 1791 | ||
1845 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | 1792 | set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO); |
1846 | set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); | 1793 | set_page_prot(initial_page_table, PAGE_KERNEL_RO); |
1847 | set_page_prot(empty_zero_page, PAGE_KERNEL_RO); | 1794 | set_page_prot(empty_zero_page, PAGE_KERNEL_RO); |
1848 | 1795 | ||
1849 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 1796 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
1850 | 1797 | ||
1851 | xen_write_cr3(__pa(swapper_pg_dir)); | 1798 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, |
1799 | PFN_DOWN(__pa(initial_page_table))); | ||
1800 | xen_write_cr3(__pa(initial_page_table)); | ||
1852 | 1801 | ||
1853 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); | 1802 | memblock_x86_reserve_range(__pa(xen_start_info->pt_base), |
1854 | |||
1855 | reserve_early(__pa(xen_start_info->pt_base), | ||
1856 | __pa(xen_start_info->pt_base + | 1803 | __pa(xen_start_info->pt_base + |
1857 | xen_start_info->nr_pt_frames * PAGE_SIZE), | 1804 | xen_start_info->nr_pt_frames * PAGE_SIZE), |
1858 | "XEN PAGETABLES"); | 1805 | "XEN PAGETABLES"); |
1859 | 1806 | ||
1860 | return swapper_pg_dir; | 1807 | return initial_page_table; |
1861 | } | 1808 | } |
1862 | #endif /* CONFIG_X86_64 */ | 1809 | #endif /* CONFIG_X86_64 */ |
1863 | 1810 | ||
1811 | static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; | ||
1812 | |||
1864 | static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | 1813 | static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) |
1865 | { | 1814 | { |
1866 | pte_t pte; | 1815 | pte_t pte; |
@@ -1881,15 +1830,28 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
1881 | #else | 1830 | #else |
1882 | case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: | 1831 | case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: |
1883 | #endif | 1832 | #endif |
1884 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1885 | case FIX_APIC_BASE: /* maps dummy local APIC */ | ||
1886 | #endif | ||
1887 | case FIX_TEXT_POKE0: | 1833 | case FIX_TEXT_POKE0: |
1888 | case FIX_TEXT_POKE1: | 1834 | case FIX_TEXT_POKE1: |
1889 | /* All local page mappings */ | 1835 | /* All local page mappings */ |
1890 | pte = pfn_pte(phys, prot); | 1836 | pte = pfn_pte(phys, prot); |
1891 | break; | 1837 | break; |
1892 | 1838 | ||
1839 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1840 | case FIX_APIC_BASE: /* maps dummy local APIC */ | ||
1841 | pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL); | ||
1842 | break; | ||
1843 | #endif | ||
1844 | |||
1845 | #ifdef CONFIG_X86_IO_APIC | ||
1846 | case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END: | ||
1847 | /* | ||
1848 | * We just don't map the IO APIC - all access is via | ||
1849 | * hypercalls. Keep the address in the pte for reference. | ||
1850 | */ | ||
1851 | pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL); | ||
1852 | break; | ||
1853 | #endif | ||
1854 | |||
1893 | case FIX_PARAVIRT_BOOTMAP: | 1855 | case FIX_PARAVIRT_BOOTMAP: |
1894 | /* This is an MFN, but it isn't an IO mapping from the | 1856 | /* This is an MFN, but it isn't an IO mapping from the |
1895 | IO domain */ | 1857 | IO domain */ |
@@ -1914,8 +1876,34 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
1914 | #endif | 1876 | #endif |
1915 | } | 1877 | } |
1916 | 1878 | ||
1917 | static __init void xen_post_allocator_init(void) | 1879 | void __init xen_ident_map_ISA(void) |
1880 | { | ||
1881 | unsigned long pa; | ||
1882 | |||
1883 | /* | ||
1884 | * If we're dom0, then linear map the ISA machine addresses into | ||
1885 | * the kernel's address space. | ||
1886 | */ | ||
1887 | if (!xen_initial_domain()) | ||
1888 | return; | ||
1889 | |||
1890 | xen_raw_printk("Xen: setup ISA identity maps\n"); | ||
1891 | |||
1892 | for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) { | ||
1893 | pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO); | ||
1894 | |||
1895 | if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0)) | ||
1896 | BUG(); | ||
1897 | } | ||
1898 | |||
1899 | xen_flush_tlb(); | ||
1900 | } | ||
1901 | |||
1902 | static void __init xen_post_allocator_init(void) | ||
1918 | { | 1903 | { |
1904 | #ifdef CONFIG_XEN_DEBUG | ||
1905 | pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); | ||
1906 | #endif | ||
1919 | pv_mmu_ops.set_pte = xen_set_pte; | 1907 | pv_mmu_ops.set_pte = xen_set_pte; |
1920 | pv_mmu_ops.set_pmd = xen_set_pmd; | 1908 | pv_mmu_ops.set_pmd = xen_set_pmd; |
1921 | pv_mmu_ops.set_pud = xen_set_pud; | 1909 | pv_mmu_ops.set_pud = xen_set_pud; |
@@ -1948,12 +1936,16 @@ static void xen_leave_lazy_mmu(void) | |||
1948 | preempt_enable(); | 1936 | preempt_enable(); |
1949 | } | 1937 | } |
1950 | 1938 | ||
1951 | static const struct pv_mmu_ops xen_mmu_ops __initdata = { | 1939 | static const struct pv_mmu_ops xen_mmu_ops __initconst = { |
1952 | .read_cr2 = xen_read_cr2, | 1940 | .read_cr2 = xen_read_cr2, |
1953 | .write_cr2 = xen_write_cr2, | 1941 | .write_cr2 = xen_write_cr2, |
1954 | 1942 | ||
1955 | .read_cr3 = xen_read_cr3, | 1943 | .read_cr3 = xen_read_cr3, |
1944 | #ifdef CONFIG_X86_32 | ||
1945 | .write_cr3 = xen_write_cr3_init, | ||
1946 | #else | ||
1956 | .write_cr3 = xen_write_cr3, | 1947 | .write_cr3 = xen_write_cr3, |
1948 | #endif | ||
1957 | 1949 | ||
1958 | .flush_tlb_user = xen_flush_tlb, | 1950 | .flush_tlb_user = xen_flush_tlb, |
1959 | .flush_tlb_kernel = xen_flush_tlb, | 1951 | .flush_tlb_kernel = xen_flush_tlb, |
@@ -1969,14 +1961,9 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1969 | .alloc_pte = xen_alloc_pte_init, | 1961 | .alloc_pte = xen_alloc_pte_init, |
1970 | .release_pte = xen_release_pte_init, | 1962 | .release_pte = xen_release_pte_init, |
1971 | .alloc_pmd = xen_alloc_pmd_init, | 1963 | .alloc_pmd = xen_alloc_pmd_init, |
1972 | .alloc_pmd_clone = paravirt_nop, | ||
1973 | .release_pmd = xen_release_pmd_init, | 1964 | .release_pmd = xen_release_pmd_init, |
1974 | 1965 | ||
1975 | #ifdef CONFIG_X86_64 | ||
1976 | .set_pte = xen_set_pte, | ||
1977 | #else | ||
1978 | .set_pte = xen_set_pte_init, | 1966 | .set_pte = xen_set_pte_init, |
1979 | #endif | ||
1980 | .set_pte_at = xen_set_pte_at, | 1967 | .set_pte_at = xen_set_pte_at, |
1981 | .set_pmd = xen_set_pmd_hyper, | 1968 | .set_pmd = xen_set_pmd_hyper, |
1982 | 1969 | ||
@@ -2022,11 +2009,12 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
2022 | 2009 | ||
2023 | void __init xen_init_mmu_ops(void) | 2010 | void __init xen_init_mmu_ops(void) |
2024 | { | 2011 | { |
2012 | x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; | ||
2025 | x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; | 2013 | x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; |
2026 | x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; | 2014 | x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; |
2027 | pv_mmu_ops = xen_mmu_ops; | 2015 | pv_mmu_ops = xen_mmu_ops; |
2028 | 2016 | ||
2029 | vmap_lazy_unmap = false; | 2017 | memset(dummy_mapping, 0xff, PAGE_SIZE); |
2030 | } | 2018 | } |
2031 | 2019 | ||
2032 | /* Protected by xen_reservation_lock. */ | 2020 | /* Protected by xen_reservation_lock. */ |
@@ -2049,7 +2037,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order, | |||
2049 | in_frames[i] = virt_to_mfn(vaddr); | 2037 | in_frames[i] = virt_to_mfn(vaddr); |
2050 | 2038 | ||
2051 | MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); | 2039 | MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); |
2052 | set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); | 2040 | __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); |
2053 | 2041 | ||
2054 | if (out_frames) | 2042 | if (out_frames) |
2055 | out_frames[i] = virt_to_pfn(vaddr); | 2043 | out_frames[i] = virt_to_pfn(vaddr); |
@@ -2259,65 +2247,83 @@ void __init xen_hvm_init_mmu_ops(void) | |||
2259 | } | 2247 | } |
2260 | #endif | 2248 | #endif |
2261 | 2249 | ||
2262 | #ifdef CONFIG_XEN_DEBUG_FS | 2250 | #define REMAP_BATCH_SIZE 16 |
2263 | 2251 | ||
2264 | static struct dentry *d_mmu_debug; | 2252 | struct remap_data { |
2253 | unsigned long mfn; | ||
2254 | pgprot_t prot; | ||
2255 | struct mmu_update *mmu_update; | ||
2256 | }; | ||
2265 | 2257 | ||
2266 | static int __init xen_mmu_debugfs(void) | 2258 | static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, |
2259 | unsigned long addr, void *data) | ||
2267 | { | 2260 | { |
2268 | struct dentry *d_xen = xen_init_debugfs(); | 2261 | struct remap_data *rmd = data; |
2269 | 2262 | pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); | |
2270 | if (d_xen == NULL) | ||
2271 | return -ENOMEM; | ||
2272 | 2263 | ||
2273 | d_mmu_debug = debugfs_create_dir("mmu", d_xen); | 2264 | rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; |
2274 | 2265 | rmd->mmu_update->val = pte_val_ma(pte); | |
2275 | debugfs_create_u8("zero_stats", 0644, d_mmu_debug, &zero_stats); | 2266 | rmd->mmu_update++; |
2276 | |||
2277 | debugfs_create_u32("pgd_update", 0444, d_mmu_debug, &mmu_stats.pgd_update); | ||
2278 | debugfs_create_u32("pgd_update_pinned", 0444, d_mmu_debug, | ||
2279 | &mmu_stats.pgd_update_pinned); | ||
2280 | debugfs_create_u32("pgd_update_batched", 0444, d_mmu_debug, | ||
2281 | &mmu_stats.pgd_update_pinned); | ||
2282 | |||
2283 | debugfs_create_u32("pud_update", 0444, d_mmu_debug, &mmu_stats.pud_update); | ||
2284 | debugfs_create_u32("pud_update_pinned", 0444, d_mmu_debug, | ||
2285 | &mmu_stats.pud_update_pinned); | ||
2286 | debugfs_create_u32("pud_update_batched", 0444, d_mmu_debug, | ||
2287 | &mmu_stats.pud_update_pinned); | ||
2288 | |||
2289 | debugfs_create_u32("pmd_update", 0444, d_mmu_debug, &mmu_stats.pmd_update); | ||
2290 | debugfs_create_u32("pmd_update_pinned", 0444, d_mmu_debug, | ||
2291 | &mmu_stats.pmd_update_pinned); | ||
2292 | debugfs_create_u32("pmd_update_batched", 0444, d_mmu_debug, | ||
2293 | &mmu_stats.pmd_update_pinned); | ||
2294 | |||
2295 | debugfs_create_u32("pte_update", 0444, d_mmu_debug, &mmu_stats.pte_update); | ||
2296 | // debugfs_create_u32("pte_update_pinned", 0444, d_mmu_debug, | ||
2297 | // &mmu_stats.pte_update_pinned); | ||
2298 | debugfs_create_u32("pte_update_batched", 0444, d_mmu_debug, | ||
2299 | &mmu_stats.pte_update_pinned); | ||
2300 | |||
2301 | debugfs_create_u32("mmu_update", 0444, d_mmu_debug, &mmu_stats.mmu_update); | ||
2302 | debugfs_create_u32("mmu_update_extended", 0444, d_mmu_debug, | ||
2303 | &mmu_stats.mmu_update_extended); | ||
2304 | xen_debugfs_create_u32_array("mmu_update_histo", 0444, d_mmu_debug, | ||
2305 | mmu_stats.mmu_update_histo, 20); | ||
2306 | |||
2307 | debugfs_create_u32("set_pte_at", 0444, d_mmu_debug, &mmu_stats.set_pte_at); | ||
2308 | debugfs_create_u32("set_pte_at_batched", 0444, d_mmu_debug, | ||
2309 | &mmu_stats.set_pte_at_batched); | ||
2310 | debugfs_create_u32("set_pte_at_current", 0444, d_mmu_debug, | ||
2311 | &mmu_stats.set_pte_at_current); | ||
2312 | debugfs_create_u32("set_pte_at_kernel", 0444, d_mmu_debug, | ||
2313 | &mmu_stats.set_pte_at_kernel); | ||
2314 | |||
2315 | debugfs_create_u32("prot_commit", 0444, d_mmu_debug, &mmu_stats.prot_commit); | ||
2316 | debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug, | ||
2317 | &mmu_stats.prot_commit_batched); | ||
2318 | 2267 | ||
2319 | return 0; | 2268 | return 0; |
2320 | } | 2269 | } |
2321 | fs_initcall(xen_mmu_debugfs); | ||
2322 | 2270 | ||
2323 | #endif /* CONFIG_XEN_DEBUG_FS */ | 2271 | int xen_remap_domain_mfn_range(struct vm_area_struct *vma, |
2272 | unsigned long addr, | ||
2273 | unsigned long mfn, int nr, | ||
2274 | pgprot_t prot, unsigned domid) | ||
2275 | { | ||
2276 | struct remap_data rmd; | ||
2277 | struct mmu_update mmu_update[REMAP_BATCH_SIZE]; | ||
2278 | int batch; | ||
2279 | unsigned long range; | ||
2280 | int err = 0; | ||
2281 | |||
2282 | prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); | ||
2283 | |||
2284 | BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == | ||
2285 | (VM_PFNMAP | VM_RESERVED | VM_IO))); | ||
2286 | |||
2287 | rmd.mfn = mfn; | ||
2288 | rmd.prot = prot; | ||
2289 | |||
2290 | while (nr) { | ||
2291 | batch = min(REMAP_BATCH_SIZE, nr); | ||
2292 | range = (unsigned long)batch << PAGE_SHIFT; | ||
2293 | |||
2294 | rmd.mmu_update = mmu_update; | ||
2295 | err = apply_to_page_range(vma->vm_mm, addr, range, | ||
2296 | remap_area_mfn_pte_fn, &rmd); | ||
2297 | if (err) | ||
2298 | goto out; | ||
2299 | |||
2300 | err = -EFAULT; | ||
2301 | if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) | ||
2302 | goto out; | ||
2303 | |||
2304 | nr -= batch; | ||
2305 | addr += range; | ||
2306 | } | ||
2307 | |||
2308 | err = 0; | ||
2309 | out: | ||
2310 | |||
2311 | flush_tlb_all(); | ||
2312 | |||
2313 | return err; | ||
2314 | } | ||
2315 | EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); | ||
2316 | |||
2317 | #ifdef CONFIG_XEN_DEBUG_FS | ||
2318 | static int p2m_dump_open(struct inode *inode, struct file *filp) | ||
2319 | { | ||
2320 | return single_open(filp, p2m_dump_show, NULL); | ||
2321 | } | ||
2322 | |||
2323 | static const struct file_operations p2m_dump_fops = { | ||
2324 | .open = p2m_dump_open, | ||
2325 | .read = seq_read, | ||
2326 | .llseek = seq_lseek, | ||
2327 | .release = single_release, | ||
2328 | }; | ||
2329 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||