aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen/mmu.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/x86/xen/mmu.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r--arch/x86/xen/mmu.c870
1 files changed, 438 insertions, 432 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 42086ac406af..0ccccb67a993 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -45,6 +45,8 @@
45#include <linux/vmalloc.h> 45#include <linux/vmalloc.h>
46#include <linux/module.h> 46#include <linux/module.h>
47#include <linux/gfp.h> 47#include <linux/gfp.h>
48#include <linux/memblock.h>
49#include <linux/seq_file.h>
48 50
49#include <asm/pgtable.h> 51#include <asm/pgtable.h>
50#include <asm/tlbflush.h> 52#include <asm/tlbflush.h>
@@ -55,6 +57,9 @@
55#include <asm/e820.h> 57#include <asm/e820.h>
56#include <asm/linkage.h> 58#include <asm/linkage.h>
57#include <asm/page.h> 59#include <asm/page.h>
60#include <asm/init.h>
61#include <asm/pat.h>
62#include <asm/smp.h>
58 63
59#include <asm/xen/hypercall.h> 64#include <asm/xen/hypercall.h>
60#include <asm/xen/hypervisor.h> 65#include <asm/xen/hypervisor.h>
@@ -71,74 +76,19 @@
71#include "mmu.h" 76#include "mmu.h"
72#include "debugfs.h" 77#include "debugfs.h"
73 78
74#define MMU_UPDATE_HISTO 30
75
76/* 79/*
77 * Protects atomic reservation decrease/increase against concurrent increases. 80 * Protects atomic reservation decrease/increase against concurrent increases.
78 * Also protects non-atomic updates of current_pages and driver_pages, and 81 * Also protects non-atomic updates of current_pages and balloon lists.
79 * balloon lists.
80 */ 82 */
81DEFINE_SPINLOCK(xen_reservation_lock); 83DEFINE_SPINLOCK(xen_reservation_lock);
82 84
83#ifdef CONFIG_XEN_DEBUG_FS
84
85static struct {
86 u32 pgd_update;
87 u32 pgd_update_pinned;
88 u32 pgd_update_batched;
89
90 u32 pud_update;
91 u32 pud_update_pinned;
92 u32 pud_update_batched;
93
94 u32 pmd_update;
95 u32 pmd_update_pinned;
96 u32 pmd_update_batched;
97
98 u32 pte_update;
99 u32 pte_update_pinned;
100 u32 pte_update_batched;
101
102 u32 mmu_update;
103 u32 mmu_update_extended;
104 u32 mmu_update_histo[MMU_UPDATE_HISTO];
105
106 u32 prot_commit;
107 u32 prot_commit_batched;
108
109 u32 set_pte_at;
110 u32 set_pte_at_batched;
111 u32 set_pte_at_pinned;
112 u32 set_pte_at_current;
113 u32 set_pte_at_kernel;
114} mmu_stats;
115
116static u8 zero_stats;
117
118static inline void check_zero(void)
119{
120 if (unlikely(zero_stats)) {
121 memset(&mmu_stats, 0, sizeof(mmu_stats));
122 zero_stats = 0;
123 }
124}
125
126#define ADD_STATS(elem, val) \
127 do { check_zero(); mmu_stats.elem += (val); } while(0)
128
129#else /* !CONFIG_XEN_DEBUG_FS */
130
131#define ADD_STATS(elem, val) do { (void)(val); } while(0)
132
133#endif /* CONFIG_XEN_DEBUG_FS */
134
135
136/* 85/*
137 * Identity map, in addition to plain kernel map. This needs to be 86 * Identity map, in addition to plain kernel map. This needs to be
138 * large enough to allocate page table pages to allocate the rest. 87 * large enough to allocate page table pages to allocate the rest.
139 * Each page can map 2MB. 88 * Each page can map 2MB.
140 */ 89 */
141static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; 90#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4)
91static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
142 92
143#ifdef CONFIG_X86_64 93#ifdef CONFIG_X86_64
144/* l3 pud for userspace vsyscall mapping */ 94/* l3 pud for userspace vsyscall mapping */
@@ -169,160 +119,6 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
169 */ 119 */
170#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) 120#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
171 121
172
173#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
174#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
175
176/* Placeholder for holes in the address space */
177static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data =
178 { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
179
180 /* Array of pointers to pages containing p2m entries */
181static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data =
182 { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
183
184/* Arrays of p2m arrays expressed in mfns used for save/restore */
185static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss;
186
187static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE]
188 __page_aligned_bss;
189
190static inline unsigned p2m_top_index(unsigned long pfn)
191{
192 BUG_ON(pfn >= MAX_DOMAIN_PAGES);
193 return pfn / P2M_ENTRIES_PER_PAGE;
194}
195
196static inline unsigned p2m_index(unsigned long pfn)
197{
198 return pfn % P2M_ENTRIES_PER_PAGE;
199}
200
201/* Build the parallel p2m_top_mfn structures */
202void xen_build_mfn_list_list(void)
203{
204 unsigned pfn, idx;
205
206 for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
207 unsigned topidx = p2m_top_index(pfn);
208
209 p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
210 }
211
212 for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
213 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
214 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
215 }
216}
217
218void xen_setup_mfn_list_list(void)
219{
220 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
221
222 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
223 virt_to_mfn(p2m_top_mfn_list);
224 HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
225}
226
227/* Set up p2m_top to point to the domain-builder provided p2m pages */
228void __init xen_build_dynamic_phys_to_machine(void)
229{
230 unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
231 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
232 unsigned pfn;
233
234 for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
235 unsigned topidx = p2m_top_index(pfn);
236
237 p2m_top[topidx] = &mfn_list[pfn];
238 }
239
240 xen_build_mfn_list_list();
241}
242
243unsigned long get_phys_to_machine(unsigned long pfn)
244{
245 unsigned topidx, idx;
246
247 if (unlikely(pfn >= MAX_DOMAIN_PAGES))
248 return INVALID_P2M_ENTRY;
249
250 topidx = p2m_top_index(pfn);
251 idx = p2m_index(pfn);
252 return p2m_top[topidx][idx];
253}
254EXPORT_SYMBOL_GPL(get_phys_to_machine);
255
256/* install a new p2m_top page */
257bool install_p2mtop_page(unsigned long pfn, unsigned long *p)
258{
259 unsigned topidx = p2m_top_index(pfn);
260 unsigned long **pfnp, *mfnp;
261 unsigned i;
262
263 pfnp = &p2m_top[topidx];
264 mfnp = &p2m_top_mfn[topidx];
265
266 for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
267 p[i] = INVALID_P2M_ENTRY;
268
269 if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
270 *mfnp = virt_to_mfn(p);
271 return true;
272 }
273
274 return false;
275}
276
277static void alloc_p2m(unsigned long pfn)
278{
279 unsigned long *p;
280
281 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
282 BUG_ON(p == NULL);
283
284 if (!install_p2mtop_page(pfn, p))
285 free_page((unsigned long)p);
286}
287
288/* Try to install p2m mapping; fail if intermediate bits missing */
289bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
290{
291 unsigned topidx, idx;
292
293 if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
294 BUG_ON(mfn != INVALID_P2M_ENTRY);
295 return true;
296 }
297
298 topidx = p2m_top_index(pfn);
299 if (p2m_top[topidx] == p2m_missing) {
300 if (mfn == INVALID_P2M_ENTRY)
301 return true;
302 return false;
303 }
304
305 idx = p2m_index(pfn);
306 p2m_top[topidx][idx] = mfn;
307
308 return true;
309}
310
311void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
312{
313 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
314 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
315 return;
316 }
317
318 if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
319 alloc_p2m(pfn);
320
321 if (!__set_phys_to_machine(pfn, mfn))
322 BUG();
323 }
324}
325
326unsigned long arbitrary_virt_to_mfn(void *vaddr) 122unsigned long arbitrary_virt_to_mfn(void *vaddr)
327{ 123{
328 xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); 124 xmaddr_t maddr = arbitrary_virt_to_machine(vaddr);
@@ -351,6 +147,7 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr)
351 offset = address & ~PAGE_MASK; 147 offset = address & ~PAGE_MASK;
352 return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); 148 return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset);
353} 149}
150EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine);
354 151
355void make_lowmem_page_readonly(void *vaddr) 152void make_lowmem_page_readonly(void *vaddr)
356{ 153{
@@ -359,7 +156,8 @@ void make_lowmem_page_readonly(void *vaddr)
359 unsigned int level; 156 unsigned int level;
360 157
361 pte = lookup_address(address, &level); 158 pte = lookup_address(address, &level);
362 BUG_ON(pte == NULL); 159 if (pte == NULL)
160 return; /* vaddr missing */
363 161
364 ptev = pte_wrprotect(*pte); 162 ptev = pte_wrprotect(*pte);
365 163
@@ -374,7 +172,8 @@ void make_lowmem_page_readwrite(void *vaddr)
374 unsigned int level; 172 unsigned int level;
375 173
376 pte = lookup_address(address, &level); 174 pte = lookup_address(address, &level);
377 BUG_ON(pte == NULL); 175 if (pte == NULL)
176 return; /* vaddr missing */
378 177
379 ptev = pte_mkwrite(*pte); 178 ptev = pte_mkwrite(*pte);
380 179
@@ -390,12 +189,7 @@ static bool xen_page_pinned(void *ptr)
390 return PagePinned(page); 189 return PagePinned(page);
391} 190}
392 191
393static bool xen_iomap_pte(pte_t pte) 192void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
394{
395 return pte_flags(pte) & _PAGE_IOMAP;
396}
397
398static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
399{ 193{
400 struct multicall_space mcs; 194 struct multicall_space mcs;
401 struct mmu_update *u; 195 struct mmu_update *u;
@@ -404,13 +198,14 @@ static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
404 u = mcs.args; 198 u = mcs.args;
405 199
406 /* ptep might be kmapped when using 32-bit HIGHPTE */ 200 /* ptep might be kmapped when using 32-bit HIGHPTE */
407 u->ptr = arbitrary_virt_to_machine(ptep).maddr; 201 u->ptr = virt_to_machine(ptep).maddr;
408 u->val = pte_val_ma(pteval); 202 u->val = pte_val_ma(pteval);
409 203
410 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_IO); 204 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid);
411 205
412 xen_mc_issue(PARAVIRT_LAZY_MMU); 206 xen_mc_issue(PARAVIRT_LAZY_MMU);
413} 207}
208EXPORT_SYMBOL_GPL(xen_set_domain_pte);
414 209
415static void xen_extend_mmu_update(const struct mmu_update *update) 210static void xen_extend_mmu_update(const struct mmu_update *update)
416{ 211{
@@ -420,27 +215,17 @@ static void xen_extend_mmu_update(const struct mmu_update *update)
420 mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); 215 mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
421 216
422 if (mcs.mc != NULL) { 217 if (mcs.mc != NULL) {
423 ADD_STATS(mmu_update_extended, 1);
424 ADD_STATS(mmu_update_histo[mcs.mc->args[1]], -1);
425
426 mcs.mc->args[1]++; 218 mcs.mc->args[1]++;
427
428 if (mcs.mc->args[1] < MMU_UPDATE_HISTO)
429 ADD_STATS(mmu_update_histo[mcs.mc->args[1]], 1);
430 else
431 ADD_STATS(mmu_update_histo[0], 1);
432 } else { 219 } else {
433 ADD_STATS(mmu_update, 1);
434 mcs = __xen_mc_entry(sizeof(*u)); 220 mcs = __xen_mc_entry(sizeof(*u));
435 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); 221 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
436 ADD_STATS(mmu_update_histo[1], 1);
437 } 222 }
438 223
439 u = mcs.args; 224 u = mcs.args;
440 *u = *update; 225 *u = *update;
441} 226}
442 227
443void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) 228static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
444{ 229{
445 struct mmu_update u; 230 struct mmu_update u;
446 231
@@ -453,17 +238,13 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
453 u.val = pmd_val_ma(val); 238 u.val = pmd_val_ma(val);
454 xen_extend_mmu_update(&u); 239 xen_extend_mmu_update(&u);
455 240
456 ADD_STATS(pmd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
457
458 xen_mc_issue(PARAVIRT_LAZY_MMU); 241 xen_mc_issue(PARAVIRT_LAZY_MMU);
459 242
460 preempt_enable(); 243 preempt_enable();
461} 244}
462 245
463void xen_set_pmd(pmd_t *ptr, pmd_t val) 246static void xen_set_pmd(pmd_t *ptr, pmd_t val)
464{ 247{
465 ADD_STATS(pmd_update, 1);
466
467 /* If page is not pinned, we can just update the entry 248 /* If page is not pinned, we can just update the entry
468 directly */ 249 directly */
469 if (!xen_page_pinned(ptr)) { 250 if (!xen_page_pinned(ptr)) {
@@ -471,8 +252,6 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val)
471 return; 252 return;
472 } 253 }
473 254
474 ADD_STATS(pmd_update_pinned, 1);
475
476 xen_set_pmd_hyper(ptr, val); 255 xen_set_pmd_hyper(ptr, val);
477} 256}
478 257
@@ -485,35 +264,34 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
485 set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); 264 set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
486} 265}
487 266
488void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 267static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
489 pte_t *ptep, pte_t pteval)
490{ 268{
491 if (xen_iomap_pte(pteval)) { 269 struct mmu_update u;
492 xen_set_iomap_pte(ptep, pteval); 270
493 goto out; 271 if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU)
494 } 272 return false;
495 273
496 ADD_STATS(set_pte_at, 1); 274 xen_mc_batch();
497// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
498 ADD_STATS(set_pte_at_current, mm == current->mm);
499 ADD_STATS(set_pte_at_kernel, mm == &init_mm);
500 275
501 if (mm == current->mm || mm == &init_mm) { 276 u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
502 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { 277 u.val = pte_val_ma(pteval);
503 struct multicall_space mcs; 278 xen_extend_mmu_update(&u);
504 mcs = xen_mc_entry(0);
505 279
506 MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); 280 xen_mc_issue(PARAVIRT_LAZY_MMU);
507 ADD_STATS(set_pte_at_batched, 1); 281
508 xen_mc_issue(PARAVIRT_LAZY_MMU); 282 return true;
509 goto out; 283}
510 } else
511 if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
512 goto out;
513 }
514 xen_set_pte(ptep, pteval);
515 284
516out: return; 285static void xen_set_pte(pte_t *ptep, pte_t pteval)
286{
287 if (!xen_batched_set_pte(ptep, pteval))
288 native_set_pte(ptep, pteval);
289}
290
291static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
292 pte_t *ptep, pte_t pteval)
293{
294 xen_set_pte(ptep, pteval);
517} 295}
518 296
519pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, 297pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
@@ -530,13 +308,10 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
530 308
531 xen_mc_batch(); 309 xen_mc_batch();
532 310
533 u.ptr = arbitrary_virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; 311 u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
534 u.val = pte_val_ma(pte); 312 u.val = pte_val_ma(pte);
535 xen_extend_mmu_update(&u); 313 xen_extend_mmu_update(&u);
536 314
537 ADD_STATS(prot_commit, 1);
538 ADD_STATS(prot_commit_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
539
540 xen_mc_issue(PARAVIRT_LAZY_MMU); 315 xen_mc_issue(PARAVIRT_LAZY_MMU);
541} 316}
542 317
@@ -557,7 +332,34 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
557 if (val & _PAGE_PRESENT) { 332 if (val & _PAGE_PRESENT) {
558 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; 333 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
559 pteval_t flags = val & PTE_FLAGS_MASK; 334 pteval_t flags = val & PTE_FLAGS_MASK;
560 val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; 335 unsigned long mfn;
336
337 if (!xen_feature(XENFEAT_auto_translated_physmap))
338 mfn = get_phys_to_machine(pfn);
339 else
340 mfn = pfn;
341 /*
342 * If there's no mfn for the pfn, then just create an
343 * empty non-present pte. Unfortunately this loses
344 * information about the original pfn, so
345 * pte_mfn_to_pfn is asymmetric.
346 */
347 if (unlikely(mfn == INVALID_P2M_ENTRY)) {
348 mfn = 0;
349 flags = 0;
350 } else {
351 /*
352 * Paramount to do this test _after_ the
353 * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
354 * IDENTITY_FRAME_BIT resolves to true.
355 */
356 mfn &= ~FOREIGN_FRAME_BIT;
357 if (mfn & IDENTITY_FRAME_BIT) {
358 mfn &= ~IDENTITY_FRAME_BIT;
359 flags |= _PAGE_IOMAP;
360 }
361 }
362 val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
561 } 363 }
562 364
563 return val; 365 return val;
@@ -577,25 +379,71 @@ static pteval_t iomap_pte(pteval_t val)
577 return val; 379 return val;
578} 380}
579 381
580pteval_t xen_pte_val(pte_t pte) 382static pteval_t xen_pte_val(pte_t pte)
581{ 383{
582 if (xen_initial_domain() && (pte.pte & _PAGE_IOMAP)) 384 pteval_t pteval = pte.pte;
583 return pte.pte;
584 385
585 return pte_mfn_to_pfn(pte.pte); 386 /* If this is a WC pte, convert back from Xen WC to Linux WC */
387 if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) {
388 WARN_ON(!pat_enabled);
389 pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
390 }
391
392 if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
393 return pteval;
394
395 return pte_mfn_to_pfn(pteval);
586} 396}
587PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); 397PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
588 398
589pgdval_t xen_pgd_val(pgd_t pgd) 399static pgdval_t xen_pgd_val(pgd_t pgd)
590{ 400{
591 return pte_mfn_to_pfn(pgd.pgd); 401 return pte_mfn_to_pfn(pgd.pgd);
592} 402}
593PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val); 403PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
594 404
595pte_t xen_make_pte(pteval_t pte) 405/*
406 * Xen's PAT setup is part of its ABI, though I assume entries 6 & 7
407 * are reserved for now, to correspond to the Intel-reserved PAT
408 * types.
409 *
410 * We expect Linux's PAT set as follows:
411 *
412 * Idx PTE flags Linux Xen Default
413 * 0 WB WB WB
414 * 1 PWT WC WT WT
415 * 2 PCD UC- UC- UC-
416 * 3 PCD PWT UC UC UC
417 * 4 PAT WB WC WB
418 * 5 PAT PWT WC WP WT
419 * 6 PAT PCD UC- UC UC-
420 * 7 PAT PCD PWT UC UC UC
421 */
422
423void xen_set_pat(u64 pat)
424{
425 /* We expect Linux to use a PAT setting of
426 * UC UC- WC WB (ignoring the PAT flag) */
427 WARN_ON(pat != 0x0007010600070106ull);
428}
429
430static pte_t xen_make_pte(pteval_t pte)
596{ 431{
597 phys_addr_t addr = (pte & PTE_PFN_MASK); 432 phys_addr_t addr = (pte & PTE_PFN_MASK);
598 433
434 /* If Linux is trying to set a WC pte, then map to the Xen WC.
435 * If _PAGE_PAT is set, then it probably means it is really
436 * _PAGE_PSE, so avoid fiddling with the PAT mapping and hope
437 * things work out OK...
438 *
439 * (We should never see kernel mappings with _PAGE_PSE set,
440 * but we could see hugetlbfs mappings, I think.).
441 */
442 if (pat_enabled && !WARN_ON(pte & _PAGE_PAT)) {
443 if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT)
444 pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
445 }
446
599 /* 447 /*
600 * Unprivileged domains are allowed to do IOMAPpings for 448 * Unprivileged domains are allowed to do IOMAPpings for
601 * PCI passthrough, but not map ISA space. The ISA 449 * PCI passthrough, but not map ISA space. The ISA
@@ -614,20 +462,55 @@ pte_t xen_make_pte(pteval_t pte)
614} 462}
615PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); 463PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
616 464
617pgd_t xen_make_pgd(pgdval_t pgd) 465#ifdef CONFIG_XEN_DEBUG
466pte_t xen_make_pte_debug(pteval_t pte)
467{
468 phys_addr_t addr = (pte & PTE_PFN_MASK);
469 phys_addr_t other_addr;
470 bool io_page = false;
471 pte_t _pte;
472
473 if (pte & _PAGE_IOMAP)
474 io_page = true;
475
476 _pte = xen_make_pte(pte);
477
478 if (!addr)
479 return _pte;
480
481 if (io_page &&
482 (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
483 other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
484 WARN_ONCE(addr != other_addr,
485 "0x%lx is using VM_IO, but it is 0x%lx!\n",
486 (unsigned long)addr, (unsigned long)other_addr);
487 } else {
488 pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
489 other_addr = (_pte.pte & PTE_PFN_MASK);
490 WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set),
491 "0x%lx is missing VM_IO (and wasn't fixed)!\n",
492 (unsigned long)addr);
493 }
494
495 return _pte;
496}
497PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
498#endif
499
500static pgd_t xen_make_pgd(pgdval_t pgd)
618{ 501{
619 pgd = pte_pfn_to_mfn(pgd); 502 pgd = pte_pfn_to_mfn(pgd);
620 return native_make_pgd(pgd); 503 return native_make_pgd(pgd);
621} 504}
622PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); 505PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
623 506
624pmdval_t xen_pmd_val(pmd_t pmd) 507static pmdval_t xen_pmd_val(pmd_t pmd)
625{ 508{
626 return pte_mfn_to_pfn(pmd.pmd); 509 return pte_mfn_to_pfn(pmd.pmd);
627} 510}
628PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val); 511PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val);
629 512
630void xen_set_pud_hyper(pud_t *ptr, pud_t val) 513static void xen_set_pud_hyper(pud_t *ptr, pud_t val)
631{ 514{
632 struct mmu_update u; 515 struct mmu_update u;
633 516
@@ -640,17 +523,13 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val)
640 u.val = pud_val_ma(val); 523 u.val = pud_val_ma(val);
641 xen_extend_mmu_update(&u); 524 xen_extend_mmu_update(&u);
642 525
643 ADD_STATS(pud_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
644
645 xen_mc_issue(PARAVIRT_LAZY_MMU); 526 xen_mc_issue(PARAVIRT_LAZY_MMU);
646 527
647 preempt_enable(); 528 preempt_enable();
648} 529}
649 530
650void xen_set_pud(pud_t *ptr, pud_t val) 531static void xen_set_pud(pud_t *ptr, pud_t val)
651{ 532{
652 ADD_STATS(pud_update, 1);
653
654 /* If page is not pinned, we can just update the entry 533 /* If page is not pinned, we can just update the entry
655 directly */ 534 directly */
656 if (!xen_page_pinned(ptr)) { 535 if (!xen_page_pinned(ptr)) {
@@ -658,56 +537,28 @@ void xen_set_pud(pud_t *ptr, pud_t val)
658 return; 537 return;
659 } 538 }
660 539
661 ADD_STATS(pud_update_pinned, 1);
662
663 xen_set_pud_hyper(ptr, val); 540 xen_set_pud_hyper(ptr, val);
664} 541}
665 542
666void xen_set_pte(pte_t *ptep, pte_t pte)
667{
668 if (xen_iomap_pte(pte)) {
669 xen_set_iomap_pte(ptep, pte);
670 return;
671 }
672
673 ADD_STATS(pte_update, 1);
674// ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
675 ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
676
677#ifdef CONFIG_X86_PAE 543#ifdef CONFIG_X86_PAE
678 ptep->pte_high = pte.pte_high; 544static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
679 smp_wmb();
680 ptep->pte_low = pte.pte_low;
681#else
682 *ptep = pte;
683#endif
684}
685
686#ifdef CONFIG_X86_PAE
687void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
688{ 545{
689 if (xen_iomap_pte(pte)) {
690 xen_set_iomap_pte(ptep, pte);
691 return;
692 }
693
694 set_64bit((u64 *)ptep, native_pte_val(pte)); 546 set_64bit((u64 *)ptep, native_pte_val(pte));
695} 547}
696 548
697void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 549static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
698{ 550{
699 ptep->pte_low = 0; 551 if (!xen_batched_set_pte(ptep, native_make_pte(0)))
700 smp_wmb(); /* make sure low gets written first */ 552 native_pte_clear(mm, addr, ptep);
701 ptep->pte_high = 0;
702} 553}
703 554
704void xen_pmd_clear(pmd_t *pmdp) 555static void xen_pmd_clear(pmd_t *pmdp)
705{ 556{
706 set_pmd(pmdp, __pmd(0)); 557 set_pmd(pmdp, __pmd(0));
707} 558}
708#endif /* CONFIG_X86_PAE */ 559#endif /* CONFIG_X86_PAE */
709 560
710pmd_t xen_make_pmd(pmdval_t pmd) 561static pmd_t xen_make_pmd(pmdval_t pmd)
711{ 562{
712 pmd = pte_pfn_to_mfn(pmd); 563 pmd = pte_pfn_to_mfn(pmd);
713 return native_make_pmd(pmd); 564 return native_make_pmd(pmd);
@@ -715,13 +566,13 @@ pmd_t xen_make_pmd(pmdval_t pmd)
715PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); 566PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
716 567
717#if PAGETABLE_LEVELS == 4 568#if PAGETABLE_LEVELS == 4
718pudval_t xen_pud_val(pud_t pud) 569static pudval_t xen_pud_val(pud_t pud)
719{ 570{
720 return pte_mfn_to_pfn(pud.pud); 571 return pte_mfn_to_pfn(pud.pud);
721} 572}
722PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); 573PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
723 574
724pud_t xen_make_pud(pudval_t pud) 575static pud_t xen_make_pud(pudval_t pud)
725{ 576{
726 pud = pte_pfn_to_mfn(pud); 577 pud = pte_pfn_to_mfn(pud);
727 578
@@ -729,7 +580,7 @@ pud_t xen_make_pud(pudval_t pud)
729} 580}
730PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud); 581PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud);
731 582
732pgd_t *xen_get_user_pgd(pgd_t *pgd) 583static pgd_t *xen_get_user_pgd(pgd_t *pgd)
733{ 584{
734 pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK); 585 pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK);
735 unsigned offset = pgd - pgd_page; 586 unsigned offset = pgd - pgd_page;
@@ -761,7 +612,7 @@ static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
761 * 2. It is always pinned 612 * 2. It is always pinned
762 * 3. It has no user pagetable attached to it 613 * 3. It has no user pagetable attached to it
763 */ 614 */
764void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) 615static void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
765{ 616{
766 preempt_disable(); 617 preempt_disable();
767 618
@@ -774,12 +625,10 @@ void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
774 preempt_enable(); 625 preempt_enable();
775} 626}
776 627
777void xen_set_pgd(pgd_t *ptr, pgd_t val) 628static void xen_set_pgd(pgd_t *ptr, pgd_t val)
778{ 629{
779 pgd_t *user_ptr = xen_get_user_pgd(ptr); 630 pgd_t *user_ptr = xen_get_user_pgd(ptr);
780 631
781 ADD_STATS(pgd_update, 1);
782
783 /* If page is not pinned, we can just update the entry 632 /* If page is not pinned, we can just update the entry
784 directly */ 633 directly */
785 if (!xen_page_pinned(ptr)) { 634 if (!xen_page_pinned(ptr)) {
@@ -791,9 +640,6 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
791 return; 640 return;
792 } 641 }
793 642
794 ADD_STATS(pgd_update_pinned, 1);
795 ADD_STATS(pgd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
796
797 /* If it's pinned, then we can at least batch the kernel and 643 /* If it's pinned, then we can at least batch the kernel and
798 user updates together. */ 644 user updates together. */
799 xen_mc_batch(); 645 xen_mc_batch();
@@ -1068,10 +914,9 @@ static void xen_pgd_pin(struct mm_struct *mm)
1068 */ 914 */
1069void xen_mm_pin_all(void) 915void xen_mm_pin_all(void)
1070{ 916{
1071 unsigned long flags;
1072 struct page *page; 917 struct page *page;
1073 918
1074 spin_lock_irqsave(&pgd_lock, flags); 919 spin_lock(&pgd_lock);
1075 920
1076 list_for_each_entry(page, &pgd_list, lru) { 921 list_for_each_entry(page, &pgd_list, lru) {
1077 if (!PagePinned(page)) { 922 if (!PagePinned(page)) {
@@ -1080,7 +925,7 @@ void xen_mm_pin_all(void)
1080 } 925 }
1081 } 926 }
1082 927
1083 spin_unlock_irqrestore(&pgd_lock, flags); 928 spin_unlock(&pgd_lock);
1084} 929}
1085 930
1086/* 931/*
@@ -1088,7 +933,7 @@ void xen_mm_pin_all(void)
1088 * that's before we have page structures to store the bits. So do all 933 * that's before we have page structures to store the bits. So do all
1089 * the book-keeping now. 934 * the book-keeping now.
1090 */ 935 */
1091static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page, 936static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
1092 enum pt_level level) 937 enum pt_level level)
1093{ 938{
1094 SetPagePinned(page); 939 SetPagePinned(page);
@@ -1181,10 +1026,9 @@ static void xen_pgd_unpin(struct mm_struct *mm)
1181 */ 1026 */
1182void xen_mm_unpin_all(void) 1027void xen_mm_unpin_all(void)
1183{ 1028{
1184 unsigned long flags;
1185 struct page *page; 1029 struct page *page;
1186 1030
1187 spin_lock_irqsave(&pgd_lock, flags); 1031 spin_lock(&pgd_lock);
1188 1032
1189 list_for_each_entry(page, &pgd_list, lru) { 1033 list_for_each_entry(page, &pgd_list, lru) {
1190 if (PageSavePinned(page)) { 1034 if (PageSavePinned(page)) {
@@ -1194,17 +1038,17 @@ void xen_mm_unpin_all(void)
1194 } 1038 }
1195 } 1039 }
1196 1040
1197 spin_unlock_irqrestore(&pgd_lock, flags); 1041 spin_unlock(&pgd_lock);
1198} 1042}
1199 1043
1200void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) 1044static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
1201{ 1045{
1202 spin_lock(&next->page_table_lock); 1046 spin_lock(&next->page_table_lock);
1203 xen_pgd_pin(next); 1047 xen_pgd_pin(next);
1204 spin_unlock(&next->page_table_lock); 1048 spin_unlock(&next->page_table_lock);
1205} 1049}
1206 1050
1207void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) 1051static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
1208{ 1052{
1209 spin_lock(&mm->page_table_lock); 1053 spin_lock(&mm->page_table_lock);
1210 xen_pgd_pin(mm); 1054 xen_pgd_pin(mm);
@@ -1222,7 +1066,7 @@ static void drop_other_mm_ref(void *info)
1222 1066
1223 active_mm = percpu_read(cpu_tlbstate.active_mm); 1067 active_mm = percpu_read(cpu_tlbstate.active_mm);
1224 1068
1225 if (active_mm == mm) 1069 if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
1226 leave_mm(smp_processor_id()); 1070 leave_mm(smp_processor_id());
1227 1071
1228 /* If this cpu still has a stale cr3 reference, then make sure 1072 /* If this cpu still has a stale cr3 reference, then make sure
@@ -1291,7 +1135,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
1291 * pagetable because of lazy tlb flushing. This means we need need to 1135 * pagetable because of lazy tlb flushing. This means we need need to
1292 * switch all CPUs off this pagetable before we can unpin it. 1136 * switch all CPUs off this pagetable before we can unpin it.
1293 */ 1137 */
1294void xen_exit_mmap(struct mm_struct *mm) 1138static void xen_exit_mmap(struct mm_struct *mm)
1295{ 1139{
1296 get_cpu(); /* make sure we don't move around */ 1140 get_cpu(); /* make sure we don't move around */
1297 xen_drop_mm_ref(mm); 1141 xen_drop_mm_ref(mm);
@@ -1306,13 +1150,27 @@ void xen_exit_mmap(struct mm_struct *mm)
1306 spin_unlock(&mm->page_table_lock); 1150 spin_unlock(&mm->page_table_lock);
1307} 1151}
1308 1152
1309static __init void xen_pagetable_setup_start(pgd_t *base) 1153static void __init xen_pagetable_setup_start(pgd_t *base)
1310{ 1154{
1311} 1155}
1312 1156
1157static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
1158{
1159 /* reserve the range used */
1160 native_pagetable_reserve(start, end);
1161
1162 /* set as RW the rest */
1163 printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
1164 PFN_PHYS(pgt_buf_top));
1165 while (end < PFN_PHYS(pgt_buf_top)) {
1166 make_lowmem_page_readwrite(__va(end));
1167 end += PAGE_SIZE;
1168 }
1169}
1170
1313static void xen_post_allocator_init(void); 1171static void xen_post_allocator_init(void);
1314 1172
1315static __init void xen_pagetable_setup_done(pgd_t *base) 1173static void __init xen_pagetable_setup_done(pgd_t *base)
1316{ 1174{
1317 xen_setup_shared_info(); 1175 xen_setup_shared_info();
1318 xen_post_allocator_init(); 1176 xen_post_allocator_init();
@@ -1374,7 +1232,11 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
1374{ 1232{
1375 struct { 1233 struct {
1376 struct mmuext_op op; 1234 struct mmuext_op op;
1235#ifdef CONFIG_SMP
1236 DECLARE_BITMAP(mask, num_processors);
1237#else
1377 DECLARE_BITMAP(mask, NR_CPUS); 1238 DECLARE_BITMAP(mask, NR_CPUS);
1239#endif
1378 } *args; 1240 } *args;
1379 struct multicall_space mcs; 1241 struct multicall_space mcs;
1380 1242
@@ -1509,7 +1371,7 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
1509} 1371}
1510 1372
1511#ifdef CONFIG_X86_32 1373#ifdef CONFIG_X86_32
1512static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) 1374static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
1513{ 1375{
1514 /* If there's an existing pte, then don't allow _PAGE_RW to be set */ 1376 /* If there's an existing pte, then don't allow _PAGE_RW to be set */
1515 if (pte_val_ma(*ptep) & _PAGE_PRESENT) 1377 if (pte_val_ma(*ptep) & _PAGE_PRESENT)
@@ -1518,16 +1380,34 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
1518 1380
1519 return pte; 1381 return pte;
1520} 1382}
1383#else /* CONFIG_X86_64 */
1384static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
1385{
1386 unsigned long pfn = pte_pfn(pte);
1387
1388 /*
1389 * If the new pfn is within the range of the newly allocated
1390 * kernel pagetable, and it isn't being mapped into an
1391 * early_ioremap fixmap slot as a freshly allocated page, make sure
1392 * it is RO.
1393 */
1394 if (((!is_early_ioremap_ptep(ptep) &&
1395 pfn >= pgt_buf_start && pfn < pgt_buf_top)) ||
1396 (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
1397 pte = pte_wrprotect(pte);
1398
1399 return pte;
1400}
1401#endif /* CONFIG_X86_64 */
1521 1402
1522/* Init-time set_pte while constructing initial pagetables, which 1403/* Init-time set_pte while constructing initial pagetables, which
1523 doesn't allow RO pagetable pages to be remapped RW */ 1404 doesn't allow RO pagetable pages to be remapped RW */
1524static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) 1405static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
1525{ 1406{
1526 pte = mask_rw_pte(ptep, pte); 1407 pte = mask_rw_pte(ptep, pte);
1527 1408
1528 xen_set_pte(ptep, pte); 1409 xen_set_pte(ptep, pte);
1529} 1410}
1530#endif
1531 1411
1532static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) 1412static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1533{ 1413{
@@ -1540,7 +1420,7 @@ static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1540 1420
1541/* Early in boot, while setting up the initial pagetable, assume 1421/* Early in boot, while setting up the initial pagetable, assume
1542 everything is pinned. */ 1422 everything is pinned. */
1543static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) 1423static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
1544{ 1424{
1545#ifdef CONFIG_FLATMEM 1425#ifdef CONFIG_FLATMEM
1546 BUG_ON(mem_map); /* should only be used early */ 1426 BUG_ON(mem_map); /* should only be used early */
@@ -1550,7 +1430,7 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
1550} 1430}
1551 1431
1552/* Used for pmd and pud */ 1432/* Used for pmd and pud */
1553static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) 1433static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
1554{ 1434{
1555#ifdef CONFIG_FLATMEM 1435#ifdef CONFIG_FLATMEM
1556 BUG_ON(mem_map); /* should only be used early */ 1436 BUG_ON(mem_map); /* should only be used early */
@@ -1560,13 +1440,13 @@ static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
1560 1440
1561/* Early release_pte assumes that all pts are pinned, since there's 1441/* Early release_pte assumes that all pts are pinned, since there's
1562 only init_mm and anything attached to that is pinned. */ 1442 only init_mm and anything attached to that is pinned. */
1563static __init void xen_release_pte_init(unsigned long pfn) 1443static void __init xen_release_pte_init(unsigned long pfn)
1564{ 1444{
1565 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); 1445 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1566 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 1446 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1567} 1447}
1568 1448
1569static __init void xen_release_pmd_init(unsigned long pfn) 1449static void __init xen_release_pmd_init(unsigned long pfn)
1570{ 1450{
1571 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); 1451 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1572} 1452}
@@ -1682,6 +1562,7 @@ static void *m2v(phys_addr_t maddr)
1682 return __ka(m2p(maddr)); 1562 return __ka(m2p(maddr));
1683} 1563}
1684 1564
1565/* Set the page permissions on an identity-mapped pages */
1685static void set_page_prot(void *addr, pgprot_t prot) 1566static void set_page_prot(void *addr, pgprot_t prot)
1686{ 1567{
1687 unsigned long pfn = __pa(addr) >> PAGE_SHIFT; 1568 unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
@@ -1691,12 +1572,15 @@ static void set_page_prot(void *addr, pgprot_t prot)
1691 BUG(); 1572 BUG();
1692} 1573}
1693 1574
1694static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) 1575static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1695{ 1576{
1696 unsigned pmdidx, pteidx; 1577 unsigned pmdidx, pteidx;
1697 unsigned ident_pte; 1578 unsigned ident_pte;
1698 unsigned long pfn; 1579 unsigned long pfn;
1699 1580
1581 level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES,
1582 PAGE_SIZE);
1583
1700 ident_pte = 0; 1584 ident_pte = 0;
1701 pfn = 0; 1585 pfn = 0;
1702 for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { 1586 for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
@@ -1707,7 +1591,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1707 pte_page = m2v(pmd[pmdidx].pmd); 1591 pte_page = m2v(pmd[pmdidx].pmd);
1708 else { 1592 else {
1709 /* Check for free pte pages */ 1593 /* Check for free pte pages */
1710 if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) 1594 if (ident_pte == LEVEL1_IDENT_ENTRIES)
1711 break; 1595 break;
1712 1596
1713 pte_page = &level1_ident_pgt[ident_pte]; 1597 pte_page = &level1_ident_pgt[ident_pte];
@@ -1720,8 +1604,10 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1720 for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { 1604 for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
1721 pte_t pte; 1605 pte_t pte;
1722 1606
1607#ifdef CONFIG_X86_32
1723 if (pfn > max_pfn_mapped) 1608 if (pfn > max_pfn_mapped)
1724 max_pfn_mapped = pfn; 1609 max_pfn_mapped = pfn;
1610#endif
1725 1611
1726 if (!pte_none(pte_page[pteidx])) 1612 if (!pte_none(pte_page[pteidx]))
1727 continue; 1613 continue;
@@ -1737,6 +1623,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1737 set_page_prot(pmd, PAGE_KERNEL_RO); 1623 set_page_prot(pmd, PAGE_KERNEL_RO);
1738} 1624}
1739 1625
1626void __init xen_setup_machphys_mapping(void)
1627{
1628 struct xen_machphys_mapping mapping;
1629 unsigned long machine_to_phys_nr_ents;
1630
1631 if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
1632 machine_to_phys_mapping = (unsigned long *)mapping.v_start;
1633 machine_to_phys_nr_ents = mapping.max_mfn + 1;
1634 } else {
1635 machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
1636 }
1637 machine_to_phys_order = fls(machine_to_phys_nr_ents - 1);
1638}
1639
1740#ifdef CONFIG_X86_64 1640#ifdef CONFIG_X86_64
1741static void convert_pfn_mfn(void *v) 1641static void convert_pfn_mfn(void *v)
1742{ 1642{
@@ -1750,7 +1650,7 @@ static void convert_pfn_mfn(void *v)
1750} 1650}
1751 1651
1752/* 1652/*
1753 * Set up the inital kernel pagetable. 1653 * Set up the initial kernel pagetable.
1754 * 1654 *
1755 * We can construct this by grafting the Xen provided pagetable into 1655 * We can construct this by grafting the Xen provided pagetable into
1756 * head_64.S's preconstructed pagetables. We copy the Xen L2's into 1656 * head_64.S's preconstructed pagetables. We copy the Xen L2's into
@@ -1760,12 +1660,18 @@ static void convert_pfn_mfn(void *v)
1760 * of the physical mapping once some sort of allocator has been set 1660 * of the physical mapping once some sort of allocator has been set
1761 * up. 1661 * up.
1762 */ 1662 */
1763__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, 1663pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1764 unsigned long max_pfn) 1664 unsigned long max_pfn)
1765{ 1665{
1766 pud_t *l3; 1666 pud_t *l3;
1767 pmd_t *l2; 1667 pmd_t *l2;
1768 1668
1669 /* max_pfn_mapped is the last pfn mapped in the initial memory
1670 * mappings. Considering that on Xen after the kernel mappings we
1671 * have the mappings of some pages that don't exist in pfn space, we
1672 * set max_pfn_mapped to the last real pfn mapped. */
1673 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
1674
1769 /* Zap identity mapping */ 1675 /* Zap identity mapping */
1770 init_level4_pgt[0] = __pgd(0); 1676 init_level4_pgt[0] = __pgd(0);
1771 1677
@@ -1814,7 +1720,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
1814 __xen_write_cr3(true, __pa(pgd)); 1720 __xen_write_cr3(true, __pa(pgd));
1815 xen_mc_issue(PARAVIRT_LAZY_CPU); 1721 xen_mc_issue(PARAVIRT_LAZY_CPU);
1816 1722
1817 reserve_early(__pa(xen_start_info->pt_base), 1723 memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
1818 __pa(xen_start_info->pt_base + 1724 __pa(xen_start_info->pt_base +
1819 xen_start_info->nr_pt_frames * PAGE_SIZE), 1725 xen_start_info->nr_pt_frames * PAGE_SIZE),
1820 "XEN PAGETABLES"); 1726 "XEN PAGETABLES");
@@ -1822,45 +1728,88 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
1822 return pgd; 1728 return pgd;
1823} 1729}
1824#else /* !CONFIG_X86_64 */ 1730#else /* !CONFIG_X86_64 */
1825static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; 1731static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
1732static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
1733
1734static void __init xen_write_cr3_init(unsigned long cr3)
1735{
1736 unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
1737
1738 BUG_ON(read_cr3() != __pa(initial_page_table));
1739 BUG_ON(cr3 != __pa(swapper_pg_dir));
1740
1741 /*
1742 * We are switching to swapper_pg_dir for the first time (from
1743 * initial_page_table) and therefore need to mark that page
1744 * read-only and then pin it.
1745 *
1746 * Xen disallows sharing of kernel PMDs for PAE
1747 * guests. Therefore we must copy the kernel PMD from
1748 * initial_page_table into a new kernel PMD to be used in
1749 * swapper_pg_dir.
1750 */
1751 swapper_kernel_pmd =
1752 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
1753 memcpy(swapper_kernel_pmd, initial_kernel_pmd,
1754 sizeof(pmd_t) * PTRS_PER_PMD);
1755 swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
1756 __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
1757 set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
1758
1759 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
1760 xen_write_cr3(cr3);
1761 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
1762
1763 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
1764 PFN_DOWN(__pa(initial_page_table)));
1765 set_page_prot(initial_page_table, PAGE_KERNEL);
1766 set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
1826 1767
1827__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, 1768 pv_mmu_ops.write_cr3 = &xen_write_cr3;
1769}
1770
1771pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1828 unsigned long max_pfn) 1772 unsigned long max_pfn)
1829{ 1773{
1830 pmd_t *kernel_pmd; 1774 pmd_t *kernel_pmd;
1831 1775
1776 initial_kernel_pmd =
1777 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
1778
1832 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + 1779 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
1833 xen_start_info->nr_pt_frames * PAGE_SIZE + 1780 xen_start_info->nr_pt_frames * PAGE_SIZE +
1834 512*1024); 1781 512*1024);
1835 1782
1836 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); 1783 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
1837 memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); 1784 memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
1838 1785
1839 xen_map_identity_early(level2_kernel_pgt, max_pfn); 1786 xen_map_identity_early(initial_kernel_pmd, max_pfn);
1840 1787
1841 memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); 1788 memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
1842 set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], 1789 initial_page_table[KERNEL_PGD_BOUNDARY] =
1843 __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); 1790 __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
1844 1791
1845 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); 1792 set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
1846 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); 1793 set_page_prot(initial_page_table, PAGE_KERNEL_RO);
1847 set_page_prot(empty_zero_page, PAGE_KERNEL_RO); 1794 set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
1848 1795
1849 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); 1796 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1850 1797
1851 xen_write_cr3(__pa(swapper_pg_dir)); 1798 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
1799 PFN_DOWN(__pa(initial_page_table)));
1800 xen_write_cr3(__pa(initial_page_table));
1852 1801
1853 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); 1802 memblock_x86_reserve_range(__pa(xen_start_info->pt_base),
1854
1855 reserve_early(__pa(xen_start_info->pt_base),
1856 __pa(xen_start_info->pt_base + 1803 __pa(xen_start_info->pt_base +
1857 xen_start_info->nr_pt_frames * PAGE_SIZE), 1804 xen_start_info->nr_pt_frames * PAGE_SIZE),
1858 "XEN PAGETABLES"); 1805 "XEN PAGETABLES");
1859 1806
1860 return swapper_pg_dir; 1807 return initial_page_table;
1861} 1808}
1862#endif /* CONFIG_X86_64 */ 1809#endif /* CONFIG_X86_64 */
1863 1810
1811static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
1812
1864static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) 1813static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1865{ 1814{
1866 pte_t pte; 1815 pte_t pte;
@@ -1881,15 +1830,28 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1881#else 1830#else
1882 case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: 1831 case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
1883#endif 1832#endif
1884#ifdef CONFIG_X86_LOCAL_APIC
1885 case FIX_APIC_BASE: /* maps dummy local APIC */
1886#endif
1887 case FIX_TEXT_POKE0: 1833 case FIX_TEXT_POKE0:
1888 case FIX_TEXT_POKE1: 1834 case FIX_TEXT_POKE1:
1889 /* All local page mappings */ 1835 /* All local page mappings */
1890 pte = pfn_pte(phys, prot); 1836 pte = pfn_pte(phys, prot);
1891 break; 1837 break;
1892 1838
1839#ifdef CONFIG_X86_LOCAL_APIC
1840 case FIX_APIC_BASE: /* maps dummy local APIC */
1841 pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
1842 break;
1843#endif
1844
1845#ifdef CONFIG_X86_IO_APIC
1846 case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END:
1847 /*
1848 * We just don't map the IO APIC - all access is via
1849 * hypercalls. Keep the address in the pte for reference.
1850 */
1851 pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
1852 break;
1853#endif
1854
1893 case FIX_PARAVIRT_BOOTMAP: 1855 case FIX_PARAVIRT_BOOTMAP:
1894 /* This is an MFN, but it isn't an IO mapping from the 1856 /* This is an MFN, but it isn't an IO mapping from the
1895 IO domain */ 1857 IO domain */
@@ -1914,8 +1876,34 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1914#endif 1876#endif
1915} 1877}
1916 1878
1917static __init void xen_post_allocator_init(void) 1879void __init xen_ident_map_ISA(void)
1880{
1881 unsigned long pa;
1882
1883 /*
1884 * If we're dom0, then linear map the ISA machine addresses into
1885 * the kernel's address space.
1886 */
1887 if (!xen_initial_domain())
1888 return;
1889
1890 xen_raw_printk("Xen: setup ISA identity maps\n");
1891
1892 for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) {
1893 pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO);
1894
1895 if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0))
1896 BUG();
1897 }
1898
1899 xen_flush_tlb();
1900}
1901
1902static void __init xen_post_allocator_init(void)
1918{ 1903{
1904#ifdef CONFIG_XEN_DEBUG
1905 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
1906#endif
1919 pv_mmu_ops.set_pte = xen_set_pte; 1907 pv_mmu_ops.set_pte = xen_set_pte;
1920 pv_mmu_ops.set_pmd = xen_set_pmd; 1908 pv_mmu_ops.set_pmd = xen_set_pmd;
1921 pv_mmu_ops.set_pud = xen_set_pud; 1909 pv_mmu_ops.set_pud = xen_set_pud;
@@ -1948,12 +1936,16 @@ static void xen_leave_lazy_mmu(void)
1948 preempt_enable(); 1936 preempt_enable();
1949} 1937}
1950 1938
1951static const struct pv_mmu_ops xen_mmu_ops __initdata = { 1939static const struct pv_mmu_ops xen_mmu_ops __initconst = {
1952 .read_cr2 = xen_read_cr2, 1940 .read_cr2 = xen_read_cr2,
1953 .write_cr2 = xen_write_cr2, 1941 .write_cr2 = xen_write_cr2,
1954 1942
1955 .read_cr3 = xen_read_cr3, 1943 .read_cr3 = xen_read_cr3,
1944#ifdef CONFIG_X86_32
1945 .write_cr3 = xen_write_cr3_init,
1946#else
1956 .write_cr3 = xen_write_cr3, 1947 .write_cr3 = xen_write_cr3,
1948#endif
1957 1949
1958 .flush_tlb_user = xen_flush_tlb, 1950 .flush_tlb_user = xen_flush_tlb,
1959 .flush_tlb_kernel = xen_flush_tlb, 1951 .flush_tlb_kernel = xen_flush_tlb,
@@ -1969,14 +1961,9 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1969 .alloc_pte = xen_alloc_pte_init, 1961 .alloc_pte = xen_alloc_pte_init,
1970 .release_pte = xen_release_pte_init, 1962 .release_pte = xen_release_pte_init,
1971 .alloc_pmd = xen_alloc_pmd_init, 1963 .alloc_pmd = xen_alloc_pmd_init,
1972 .alloc_pmd_clone = paravirt_nop,
1973 .release_pmd = xen_release_pmd_init, 1964 .release_pmd = xen_release_pmd_init,
1974 1965
1975#ifdef CONFIG_X86_64
1976 .set_pte = xen_set_pte,
1977#else
1978 .set_pte = xen_set_pte_init, 1966 .set_pte = xen_set_pte_init,
1979#endif
1980 .set_pte_at = xen_set_pte_at, 1967 .set_pte_at = xen_set_pte_at,
1981 .set_pmd = xen_set_pmd_hyper, 1968 .set_pmd = xen_set_pmd_hyper,
1982 1969
@@ -2022,11 +2009,12 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
2022 2009
2023void __init xen_init_mmu_ops(void) 2010void __init xen_init_mmu_ops(void)
2024{ 2011{
2012 x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
2025 x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; 2013 x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
2026 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; 2014 x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
2027 pv_mmu_ops = xen_mmu_ops; 2015 pv_mmu_ops = xen_mmu_ops;
2028 2016
2029 vmap_lazy_unmap = false; 2017 memset(dummy_mapping, 0xff, PAGE_SIZE);
2030} 2018}
2031 2019
2032/* Protected by xen_reservation_lock. */ 2020/* Protected by xen_reservation_lock. */
@@ -2049,7 +2037,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
2049 in_frames[i] = virt_to_mfn(vaddr); 2037 in_frames[i] = virt_to_mfn(vaddr);
2050 2038
2051 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); 2039 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
2052 set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); 2040 __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
2053 2041
2054 if (out_frames) 2042 if (out_frames)
2055 out_frames[i] = virt_to_pfn(vaddr); 2043 out_frames[i] = virt_to_pfn(vaddr);
@@ -2259,65 +2247,83 @@ void __init xen_hvm_init_mmu_ops(void)
2259} 2247}
2260#endif 2248#endif
2261 2249
2262#ifdef CONFIG_XEN_DEBUG_FS 2250#define REMAP_BATCH_SIZE 16
2263 2251
2264static struct dentry *d_mmu_debug; 2252struct remap_data {
2253 unsigned long mfn;
2254 pgprot_t prot;
2255 struct mmu_update *mmu_update;
2256};
2265 2257
2266static int __init xen_mmu_debugfs(void) 2258static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
2259 unsigned long addr, void *data)
2267{ 2260{
2268 struct dentry *d_xen = xen_init_debugfs(); 2261 struct remap_data *rmd = data;
2269 2262 pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
2270 if (d_xen == NULL)
2271 return -ENOMEM;
2272 2263
2273 d_mmu_debug = debugfs_create_dir("mmu", d_xen); 2264 rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
2274 2265 rmd->mmu_update->val = pte_val_ma(pte);
2275 debugfs_create_u8("zero_stats", 0644, d_mmu_debug, &zero_stats); 2266 rmd->mmu_update++;
2276
2277 debugfs_create_u32("pgd_update", 0444, d_mmu_debug, &mmu_stats.pgd_update);
2278 debugfs_create_u32("pgd_update_pinned", 0444, d_mmu_debug,
2279 &mmu_stats.pgd_update_pinned);
2280 debugfs_create_u32("pgd_update_batched", 0444, d_mmu_debug,
2281 &mmu_stats.pgd_update_pinned);
2282
2283 debugfs_create_u32("pud_update", 0444, d_mmu_debug, &mmu_stats.pud_update);
2284 debugfs_create_u32("pud_update_pinned", 0444, d_mmu_debug,
2285 &mmu_stats.pud_update_pinned);
2286 debugfs_create_u32("pud_update_batched", 0444, d_mmu_debug,
2287 &mmu_stats.pud_update_pinned);
2288
2289 debugfs_create_u32("pmd_update", 0444, d_mmu_debug, &mmu_stats.pmd_update);
2290 debugfs_create_u32("pmd_update_pinned", 0444, d_mmu_debug,
2291 &mmu_stats.pmd_update_pinned);
2292 debugfs_create_u32("pmd_update_batched", 0444, d_mmu_debug,
2293 &mmu_stats.pmd_update_pinned);
2294
2295 debugfs_create_u32("pte_update", 0444, d_mmu_debug, &mmu_stats.pte_update);
2296// debugfs_create_u32("pte_update_pinned", 0444, d_mmu_debug,
2297// &mmu_stats.pte_update_pinned);
2298 debugfs_create_u32("pte_update_batched", 0444, d_mmu_debug,
2299 &mmu_stats.pte_update_pinned);
2300
2301 debugfs_create_u32("mmu_update", 0444, d_mmu_debug, &mmu_stats.mmu_update);
2302 debugfs_create_u32("mmu_update_extended", 0444, d_mmu_debug,
2303 &mmu_stats.mmu_update_extended);
2304 xen_debugfs_create_u32_array("mmu_update_histo", 0444, d_mmu_debug,
2305 mmu_stats.mmu_update_histo, 20);
2306
2307 debugfs_create_u32("set_pte_at", 0444, d_mmu_debug, &mmu_stats.set_pte_at);
2308 debugfs_create_u32("set_pte_at_batched", 0444, d_mmu_debug,
2309 &mmu_stats.set_pte_at_batched);
2310 debugfs_create_u32("set_pte_at_current", 0444, d_mmu_debug,
2311 &mmu_stats.set_pte_at_current);
2312 debugfs_create_u32("set_pte_at_kernel", 0444, d_mmu_debug,
2313 &mmu_stats.set_pte_at_kernel);
2314
2315 debugfs_create_u32("prot_commit", 0444, d_mmu_debug, &mmu_stats.prot_commit);
2316 debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
2317 &mmu_stats.prot_commit_batched);
2318 2267
2319 return 0; 2268 return 0;
2320} 2269}
2321fs_initcall(xen_mmu_debugfs);
2322 2270
2323#endif /* CONFIG_XEN_DEBUG_FS */ 2271int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
2272 unsigned long addr,
2273 unsigned long mfn, int nr,
2274 pgprot_t prot, unsigned domid)
2275{
2276 struct remap_data rmd;
2277 struct mmu_update mmu_update[REMAP_BATCH_SIZE];
2278 int batch;
2279 unsigned long range;
2280 int err = 0;
2281
2282 prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
2283
2284 BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
2285 (VM_PFNMAP | VM_RESERVED | VM_IO)));
2286
2287 rmd.mfn = mfn;
2288 rmd.prot = prot;
2289
2290 while (nr) {
2291 batch = min(REMAP_BATCH_SIZE, nr);
2292 range = (unsigned long)batch << PAGE_SHIFT;
2293
2294 rmd.mmu_update = mmu_update;
2295 err = apply_to_page_range(vma->vm_mm, addr, range,
2296 remap_area_mfn_pte_fn, &rmd);
2297 if (err)
2298 goto out;
2299
2300 err = -EFAULT;
2301 if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0)
2302 goto out;
2303
2304 nr -= batch;
2305 addr += range;
2306 }
2307
2308 err = 0;
2309out:
2310
2311 flush_tlb_all();
2312
2313 return err;
2314}
2315EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
2316
2317#ifdef CONFIG_XEN_DEBUG_FS
2318static int p2m_dump_open(struct inode *inode, struct file *filp)
2319{
2320 return single_open(filp, p2m_dump_show, NULL);
2321}
2322
2323static const struct file_operations p2m_dump_fops = {
2324 .open = p2m_dump_open,
2325 .read = seq_read,
2326 .llseek = seq_lseek,
2327 .release = single_release,
2328};
2329#endif /* CONFIG_XEN_DEBUG_FS */