diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 21:46:48 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 21:46:48 -0500 |
commit | 9c4bc1c2befbbdce4b9fd526e67a7a2ea143ffa2 (patch) | |
tree | 1b08c1bb00ce477ca947461777cd7e24de72c148 /arch/x86 | |
parent | 2c0076d8c7eb1dafa03d0a792444862080b34106 (diff) | |
parent | e1b478e4ec4477520767d1a920433626263a2a6b (diff) |
Merge branch 'stable/gntdev' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen
* 'stable/gntdev' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
xen/p2m: Fix module linking error.
xen p2m: clear the old pte when adding a page to m2p_override
xen gntdev: use gnttab_map_refs and gnttab_unmap_refs
xen: introduce gnttab_map_refs and gnttab_unmap_refs
xen p2m: transparently change the p2m mappings in the m2p override
xen/gntdev: Fix circular locking dependency
xen/gntdev: stop using "token" argument
xen: gntdev: move use of GNTMAP_contains_pte next to the map_op
xen: add m2p override mechanism
xen: move p2m handling to separate file
xen/gntdev: add VM_PFNMAP to vma
xen/gntdev: allow usermode to map granted pages
xen: define gnttab_set_map_op/unmap_op
Fix up trivial conflict in drivers/xen/Kconfig
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/xen/page.h | 16 | ||||
-rw-r--r-- | arch/x86/xen/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 365 | ||||
-rw-r--r-- | arch/x86/xen/p2m.c | 510 |
4 files changed, 525 insertions, 369 deletions
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 8760cc60a21c..f25bdf238a33 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -42,6 +42,11 @@ extern unsigned int machine_to_phys_order; | |||
42 | extern unsigned long get_phys_to_machine(unsigned long pfn); | 42 | extern unsigned long get_phys_to_machine(unsigned long pfn); |
43 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 43 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
44 | 44 | ||
45 | extern int m2p_add_override(unsigned long mfn, struct page *page); | ||
46 | extern int m2p_remove_override(struct page *page); | ||
47 | extern struct page *m2p_find_override(unsigned long mfn); | ||
48 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); | ||
49 | |||
45 | static inline unsigned long pfn_to_mfn(unsigned long pfn) | 50 | static inline unsigned long pfn_to_mfn(unsigned long pfn) |
46 | { | 51 | { |
47 | unsigned long mfn; | 52 | unsigned long mfn; |
@@ -72,9 +77,6 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) | |||
72 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 77 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
73 | return mfn; | 78 | return mfn; |
74 | 79 | ||
75 | if (unlikely((mfn >> machine_to_phys_order) != 0)) | ||
76 | return ~0; | ||
77 | |||
78 | pfn = 0; | 80 | pfn = 0; |
79 | /* | 81 | /* |
80 | * The array access can fail (e.g., device space beyond end of RAM). | 82 | * The array access can fail (e.g., device space beyond end of RAM). |
@@ -83,6 +85,14 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) | |||
83 | */ | 85 | */ |
84 | __get_user(pfn, &machine_to_phys_mapping[mfn]); | 86 | __get_user(pfn, &machine_to_phys_mapping[mfn]); |
85 | 87 | ||
88 | /* | ||
89 | * If this appears to be a foreign mfn (because the pfn | ||
90 | * doesn't map back to the mfn), then check the local override | ||
91 | * table to see if there's a better pfn to use. | ||
92 | */ | ||
93 | if (get_phys_to_machine(pfn) != mfn) | ||
94 | pfn = m2p_find_override_pfn(mfn, pfn); | ||
95 | |||
86 | return pfn; | 96 | return pfn; |
87 | } | 97 | } |
88 | 98 | ||
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 779385158915..17c565de3d64 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -12,7 +12,8 @@ CFLAGS_mmu.o := $(nostackp) | |||
12 | 12 | ||
13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ | 13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ |
14 | time.o xen-asm.o xen-asm_$(BITS).o \ | 14 | time.o xen-asm.o xen-asm_$(BITS).o \ |
15 | grant-table.o suspend.o platform-pci-unplug.o | 15 | grant-table.o suspend.o platform-pci-unplug.o \ |
16 | p2m.o | ||
16 | 17 | ||
17 | obj-$(CONFIG_SMP) += smp.o | 18 | obj-$(CONFIG_SMP) += smp.o |
18 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o | 19 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 44924e551fde..7575e55cd52e 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -173,371 +173,6 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ | |||
173 | */ | 173 | */ |
174 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) | 174 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) |
175 | 175 | ||
176 | /* | ||
177 | * Xen leaves the responsibility for maintaining p2m mappings to the | ||
178 | * guests themselves, but it must also access and update the p2m array | ||
179 | * during suspend/resume when all the pages are reallocated. | ||
180 | * | ||
181 | * The p2m table is logically a flat array, but we implement it as a | ||
182 | * three-level tree to allow the address space to be sparse. | ||
183 | * | ||
184 | * Xen | ||
185 | * | | ||
186 | * p2m_top p2m_top_mfn | ||
187 | * / \ / \ | ||
188 | * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn | ||
189 | * / \ / \ / / | ||
190 | * p2m p2m p2m p2m p2m p2m p2m ... | ||
191 | * | ||
192 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. | ||
193 | * | ||
194 | * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the | ||
195 | * maximum representable pseudo-physical address space is: | ||
196 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages | ||
197 | * | ||
198 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | ||
199 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | ||
200 | * 512 and 1024 entries respectively. | ||
201 | */ | ||
202 | |||
203 | unsigned long xen_max_p2m_pfn __read_mostly; | ||
204 | |||
205 | #define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
206 | #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) | ||
207 | #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) | ||
208 | |||
209 | #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) | ||
210 | |||
211 | /* Placeholders for holes in the address space */ | ||
212 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); | ||
213 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); | ||
214 | static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE); | ||
215 | |||
216 | static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); | ||
217 | static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); | ||
218 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); | ||
219 | |||
220 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
221 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
222 | |||
223 | static inline unsigned p2m_top_index(unsigned long pfn) | ||
224 | { | ||
225 | BUG_ON(pfn >= MAX_P2M_PFN); | ||
226 | return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); | ||
227 | } | ||
228 | |||
229 | static inline unsigned p2m_mid_index(unsigned long pfn) | ||
230 | { | ||
231 | return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; | ||
232 | } | ||
233 | |||
234 | static inline unsigned p2m_index(unsigned long pfn) | ||
235 | { | ||
236 | return pfn % P2M_PER_PAGE; | ||
237 | } | ||
238 | |||
239 | static void p2m_top_init(unsigned long ***top) | ||
240 | { | ||
241 | unsigned i; | ||
242 | |||
243 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
244 | top[i] = p2m_mid_missing; | ||
245 | } | ||
246 | |||
247 | static void p2m_top_mfn_init(unsigned long *top) | ||
248 | { | ||
249 | unsigned i; | ||
250 | |||
251 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
252 | top[i] = virt_to_mfn(p2m_mid_missing_mfn); | ||
253 | } | ||
254 | |||
255 | static void p2m_top_mfn_p_init(unsigned long **top) | ||
256 | { | ||
257 | unsigned i; | ||
258 | |||
259 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
260 | top[i] = p2m_mid_missing_mfn; | ||
261 | } | ||
262 | |||
263 | static void p2m_mid_init(unsigned long **mid) | ||
264 | { | ||
265 | unsigned i; | ||
266 | |||
267 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
268 | mid[i] = p2m_missing; | ||
269 | } | ||
270 | |||
271 | static void p2m_mid_mfn_init(unsigned long *mid) | ||
272 | { | ||
273 | unsigned i; | ||
274 | |||
275 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
276 | mid[i] = virt_to_mfn(p2m_missing); | ||
277 | } | ||
278 | |||
279 | static void p2m_init(unsigned long *p2m) | ||
280 | { | ||
281 | unsigned i; | ||
282 | |||
283 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
284 | p2m[i] = INVALID_P2M_ENTRY; | ||
285 | } | ||
286 | |||
287 | /* | ||
288 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures | ||
289 | * | ||
290 | * This is called both at boot time, and after resuming from suspend: | ||
291 | * - At boot time we're called very early, and must use extend_brk() | ||
292 | * to allocate memory. | ||
293 | * | ||
294 | * - After resume we're called from within stop_machine, but the mfn | ||
295 | * tree should alreay be completely allocated. | ||
296 | */ | ||
297 | void xen_build_mfn_list_list(void) | ||
298 | { | ||
299 | unsigned long pfn; | ||
300 | |||
301 | /* Pre-initialize p2m_top_mfn to be completely missing */ | ||
302 | if (p2m_top_mfn == NULL) { | ||
303 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
304 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
305 | |||
306 | p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
307 | p2m_top_mfn_p_init(p2m_top_mfn_p); | ||
308 | |||
309 | p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
310 | p2m_top_mfn_init(p2m_top_mfn); | ||
311 | } else { | ||
312 | /* Reinitialise, mfn's all change after migration */ | ||
313 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
314 | } | ||
315 | |||
316 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { | ||
317 | unsigned topidx = p2m_top_index(pfn); | ||
318 | unsigned mididx = p2m_mid_index(pfn); | ||
319 | unsigned long **mid; | ||
320 | unsigned long *mid_mfn_p; | ||
321 | |||
322 | mid = p2m_top[topidx]; | ||
323 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
324 | |||
325 | /* Don't bother allocating any mfn mid levels if | ||
326 | * they're just missing, just update the stored mfn, | ||
327 | * since all could have changed over a migrate. | ||
328 | */ | ||
329 | if (mid == p2m_mid_missing) { | ||
330 | BUG_ON(mididx); | ||
331 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
332 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); | ||
333 | pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; | ||
334 | continue; | ||
335 | } | ||
336 | |||
337 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
338 | /* | ||
339 | * XXX boot-time only! We should never find | ||
340 | * missing parts of the mfn tree after | ||
341 | * runtime. extend_brk() will BUG if we call | ||
342 | * it too late. | ||
343 | */ | ||
344 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
345 | p2m_mid_mfn_init(mid_mfn_p); | ||
346 | |||
347 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
348 | } | ||
349 | |||
350 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
351 | mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); | ||
352 | } | ||
353 | } | ||
354 | |||
355 | void xen_setup_mfn_list_list(void) | ||
356 | { | ||
357 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | ||
358 | |||
359 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
360 | virt_to_mfn(p2m_top_mfn); | ||
361 | HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; | ||
362 | } | ||
363 | |||
364 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | ||
365 | void __init xen_build_dynamic_phys_to_machine(void) | ||
366 | { | ||
367 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
368 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
369 | unsigned long pfn; | ||
370 | |||
371 | xen_max_p2m_pfn = max_pfn; | ||
372 | |||
373 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
374 | p2m_init(p2m_missing); | ||
375 | |||
376 | p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
377 | p2m_mid_init(p2m_mid_missing); | ||
378 | |||
379 | p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
380 | p2m_top_init(p2m_top); | ||
381 | |||
382 | /* | ||
383 | * The domain builder gives us a pre-constructed p2m array in | ||
384 | * mfn_list for all the pages initially given to us, so we just | ||
385 | * need to graft that into our tree structure. | ||
386 | */ | ||
387 | for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { | ||
388 | unsigned topidx = p2m_top_index(pfn); | ||
389 | unsigned mididx = p2m_mid_index(pfn); | ||
390 | |||
391 | if (p2m_top[topidx] == p2m_mid_missing) { | ||
392 | unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
393 | p2m_mid_init(mid); | ||
394 | |||
395 | p2m_top[topidx] = mid; | ||
396 | } | ||
397 | |||
398 | p2m_top[topidx][mididx] = &mfn_list[pfn]; | ||
399 | } | ||
400 | } | ||
401 | |||
402 | unsigned long get_phys_to_machine(unsigned long pfn) | ||
403 | { | ||
404 | unsigned topidx, mididx, idx; | ||
405 | |||
406 | if (unlikely(pfn >= MAX_P2M_PFN)) | ||
407 | return INVALID_P2M_ENTRY; | ||
408 | |||
409 | topidx = p2m_top_index(pfn); | ||
410 | mididx = p2m_mid_index(pfn); | ||
411 | idx = p2m_index(pfn); | ||
412 | |||
413 | return p2m_top[topidx][mididx][idx]; | ||
414 | } | ||
415 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | ||
416 | |||
417 | static void *alloc_p2m_page(void) | ||
418 | { | ||
419 | return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); | ||
420 | } | ||
421 | |||
422 | static void free_p2m_page(void *p) | ||
423 | { | ||
424 | free_page((unsigned long)p); | ||
425 | } | ||
426 | |||
427 | /* | ||
428 | * Fully allocate the p2m structure for a given pfn. We need to check | ||
429 | * that both the top and mid levels are allocated, and make sure the | ||
430 | * parallel mfn tree is kept in sync. We may race with other cpus, so | ||
431 | * the new pages are installed with cmpxchg; if we lose the race then | ||
432 | * simply free the page we allocated and use the one that's there. | ||
433 | */ | ||
434 | static bool alloc_p2m(unsigned long pfn) | ||
435 | { | ||
436 | unsigned topidx, mididx; | ||
437 | unsigned long ***top_p, **mid; | ||
438 | unsigned long *top_mfn_p, *mid_mfn; | ||
439 | |||
440 | topidx = p2m_top_index(pfn); | ||
441 | mididx = p2m_mid_index(pfn); | ||
442 | |||
443 | top_p = &p2m_top[topidx]; | ||
444 | mid = *top_p; | ||
445 | |||
446 | if (mid == p2m_mid_missing) { | ||
447 | /* Mid level is missing, allocate a new one */ | ||
448 | mid = alloc_p2m_page(); | ||
449 | if (!mid) | ||
450 | return false; | ||
451 | |||
452 | p2m_mid_init(mid); | ||
453 | |||
454 | if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) | ||
455 | free_p2m_page(mid); | ||
456 | } | ||
457 | |||
458 | top_mfn_p = &p2m_top_mfn[topidx]; | ||
459 | mid_mfn = p2m_top_mfn_p[topidx]; | ||
460 | |||
461 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); | ||
462 | |||
463 | if (mid_mfn == p2m_mid_missing_mfn) { | ||
464 | /* Separately check the mid mfn level */ | ||
465 | unsigned long missing_mfn; | ||
466 | unsigned long mid_mfn_mfn; | ||
467 | |||
468 | mid_mfn = alloc_p2m_page(); | ||
469 | if (!mid_mfn) | ||
470 | return false; | ||
471 | |||
472 | p2m_mid_mfn_init(mid_mfn); | ||
473 | |||
474 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | ||
475 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | ||
476 | if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) | ||
477 | free_p2m_page(mid_mfn); | ||
478 | else | ||
479 | p2m_top_mfn_p[topidx] = mid_mfn; | ||
480 | } | ||
481 | |||
482 | if (p2m_top[topidx][mididx] == p2m_missing) { | ||
483 | /* p2m leaf page is missing */ | ||
484 | unsigned long *p2m; | ||
485 | |||
486 | p2m = alloc_p2m_page(); | ||
487 | if (!p2m) | ||
488 | return false; | ||
489 | |||
490 | p2m_init(p2m); | ||
491 | |||
492 | if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) | ||
493 | free_p2m_page(p2m); | ||
494 | else | ||
495 | mid_mfn[mididx] = virt_to_mfn(p2m); | ||
496 | } | ||
497 | |||
498 | return true; | ||
499 | } | ||
500 | |||
501 | /* Try to install p2m mapping; fail if intermediate bits missing */ | ||
502 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
503 | { | ||
504 | unsigned topidx, mididx, idx; | ||
505 | |||
506 | if (unlikely(pfn >= MAX_P2M_PFN)) { | ||
507 | BUG_ON(mfn != INVALID_P2M_ENTRY); | ||
508 | return true; | ||
509 | } | ||
510 | |||
511 | topidx = p2m_top_index(pfn); | ||
512 | mididx = p2m_mid_index(pfn); | ||
513 | idx = p2m_index(pfn); | ||
514 | |||
515 | if (p2m_top[topidx][mididx] == p2m_missing) | ||
516 | return mfn == INVALID_P2M_ENTRY; | ||
517 | |||
518 | p2m_top[topidx][mididx][idx] = mfn; | ||
519 | |||
520 | return true; | ||
521 | } | ||
522 | |||
523 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
524 | { | ||
525 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
526 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
527 | return true; | ||
528 | } | ||
529 | |||
530 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | ||
531 | if (!alloc_p2m(pfn)) | ||
532 | return false; | ||
533 | |||
534 | if (!__set_phys_to_machine(pfn, mfn)) | ||
535 | return false; | ||
536 | } | ||
537 | |||
538 | return true; | ||
539 | } | ||
540 | |||
541 | unsigned long arbitrary_virt_to_mfn(void *vaddr) | 176 | unsigned long arbitrary_virt_to_mfn(void *vaddr) |
542 | { | 177 | { |
543 | xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); | 178 | xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c new file mode 100644 index 000000000000..8f2251d2a3f8 --- /dev/null +++ b/arch/x86/xen/p2m.c | |||
@@ -0,0 +1,510 @@ | |||
1 | /* | ||
2 | * Xen leaves the responsibility for maintaining p2m mappings to the | ||
3 | * guests themselves, but it must also access and update the p2m array | ||
4 | * during suspend/resume when all the pages are reallocated. | ||
5 | * | ||
6 | * The p2m table is logically a flat array, but we implement it as a | ||
7 | * three-level tree to allow the address space to be sparse. | ||
8 | * | ||
9 | * Xen | ||
10 | * | | ||
11 | * p2m_top p2m_top_mfn | ||
12 | * / \ / \ | ||
13 | * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn | ||
14 | * / \ / \ / / | ||
15 | * p2m p2m p2m p2m p2m p2m p2m ... | ||
16 | * | ||
17 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. | ||
18 | * | ||
19 | * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the | ||
20 | * maximum representable pseudo-physical address space is: | ||
21 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages | ||
22 | * | ||
23 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | ||
24 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | ||
25 | * 512 and 1024 entries respectively. | ||
26 | */ | ||
27 | |||
28 | #include <linux/init.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/list.h> | ||
31 | #include <linux/hash.h> | ||
32 | #include <linux/sched.h> | ||
33 | |||
34 | #include <asm/cache.h> | ||
35 | #include <asm/setup.h> | ||
36 | |||
37 | #include <asm/xen/page.h> | ||
38 | #include <asm/xen/hypercall.h> | ||
39 | #include <asm/xen/hypervisor.h> | ||
40 | |||
41 | #include "xen-ops.h" | ||
42 | |||
43 | static void __init m2p_override_init(void); | ||
44 | |||
45 | unsigned long xen_max_p2m_pfn __read_mostly; | ||
46 | |||
47 | #define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
48 | #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) | ||
49 | #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) | ||
50 | |||
51 | #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) | ||
52 | |||
53 | /* Placeholders for holes in the address space */ | ||
54 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); | ||
55 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); | ||
56 | static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE); | ||
57 | |||
58 | static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); | ||
59 | static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); | ||
60 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); | ||
61 | |||
62 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
63 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
64 | |||
65 | static inline unsigned p2m_top_index(unsigned long pfn) | ||
66 | { | ||
67 | BUG_ON(pfn >= MAX_P2M_PFN); | ||
68 | return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); | ||
69 | } | ||
70 | |||
71 | static inline unsigned p2m_mid_index(unsigned long pfn) | ||
72 | { | ||
73 | return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; | ||
74 | } | ||
75 | |||
76 | static inline unsigned p2m_index(unsigned long pfn) | ||
77 | { | ||
78 | return pfn % P2M_PER_PAGE; | ||
79 | } | ||
80 | |||
81 | static void p2m_top_init(unsigned long ***top) | ||
82 | { | ||
83 | unsigned i; | ||
84 | |||
85 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
86 | top[i] = p2m_mid_missing; | ||
87 | } | ||
88 | |||
89 | static void p2m_top_mfn_init(unsigned long *top) | ||
90 | { | ||
91 | unsigned i; | ||
92 | |||
93 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
94 | top[i] = virt_to_mfn(p2m_mid_missing_mfn); | ||
95 | } | ||
96 | |||
97 | static void p2m_top_mfn_p_init(unsigned long **top) | ||
98 | { | ||
99 | unsigned i; | ||
100 | |||
101 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
102 | top[i] = p2m_mid_missing_mfn; | ||
103 | } | ||
104 | |||
105 | static void p2m_mid_init(unsigned long **mid) | ||
106 | { | ||
107 | unsigned i; | ||
108 | |||
109 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
110 | mid[i] = p2m_missing; | ||
111 | } | ||
112 | |||
113 | static void p2m_mid_mfn_init(unsigned long *mid) | ||
114 | { | ||
115 | unsigned i; | ||
116 | |||
117 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
118 | mid[i] = virt_to_mfn(p2m_missing); | ||
119 | } | ||
120 | |||
121 | static void p2m_init(unsigned long *p2m) | ||
122 | { | ||
123 | unsigned i; | ||
124 | |||
125 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
126 | p2m[i] = INVALID_P2M_ENTRY; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures | ||
131 | * | ||
132 | * This is called both at boot time, and after resuming from suspend: | ||
133 | * - At boot time we're called very early, and must use extend_brk() | ||
134 | * to allocate memory. | ||
135 | * | ||
136 | * - After resume we're called from within stop_machine, but the mfn | ||
137 | * tree should alreay be completely allocated. | ||
138 | */ | ||
139 | void xen_build_mfn_list_list(void) | ||
140 | { | ||
141 | unsigned long pfn; | ||
142 | |||
143 | /* Pre-initialize p2m_top_mfn to be completely missing */ | ||
144 | if (p2m_top_mfn == NULL) { | ||
145 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
146 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
147 | |||
148 | p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
149 | p2m_top_mfn_p_init(p2m_top_mfn_p); | ||
150 | |||
151 | p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
152 | p2m_top_mfn_init(p2m_top_mfn); | ||
153 | } else { | ||
154 | /* Reinitialise, mfn's all change after migration */ | ||
155 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
156 | } | ||
157 | |||
158 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { | ||
159 | unsigned topidx = p2m_top_index(pfn); | ||
160 | unsigned mididx = p2m_mid_index(pfn); | ||
161 | unsigned long **mid; | ||
162 | unsigned long *mid_mfn_p; | ||
163 | |||
164 | mid = p2m_top[topidx]; | ||
165 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
166 | |||
167 | /* Don't bother allocating any mfn mid levels if | ||
168 | * they're just missing, just update the stored mfn, | ||
169 | * since all could have changed over a migrate. | ||
170 | */ | ||
171 | if (mid == p2m_mid_missing) { | ||
172 | BUG_ON(mididx); | ||
173 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
174 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); | ||
175 | pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; | ||
176 | continue; | ||
177 | } | ||
178 | |||
179 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
180 | /* | ||
181 | * XXX boot-time only! We should never find | ||
182 | * missing parts of the mfn tree after | ||
183 | * runtime. extend_brk() will BUG if we call | ||
184 | * it too late. | ||
185 | */ | ||
186 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
187 | p2m_mid_mfn_init(mid_mfn_p); | ||
188 | |||
189 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
190 | } | ||
191 | |||
192 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
193 | mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); | ||
194 | } | ||
195 | } | ||
196 | |||
197 | void xen_setup_mfn_list_list(void) | ||
198 | { | ||
199 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | ||
200 | |||
201 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
202 | virt_to_mfn(p2m_top_mfn); | ||
203 | HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; | ||
204 | } | ||
205 | |||
206 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | ||
207 | void __init xen_build_dynamic_phys_to_machine(void) | ||
208 | { | ||
209 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
210 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
211 | unsigned long pfn; | ||
212 | |||
213 | xen_max_p2m_pfn = max_pfn; | ||
214 | |||
215 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
216 | p2m_init(p2m_missing); | ||
217 | |||
218 | p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
219 | p2m_mid_init(p2m_mid_missing); | ||
220 | |||
221 | p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
222 | p2m_top_init(p2m_top); | ||
223 | |||
224 | /* | ||
225 | * The domain builder gives us a pre-constructed p2m array in | ||
226 | * mfn_list for all the pages initially given to us, so we just | ||
227 | * need to graft that into our tree structure. | ||
228 | */ | ||
229 | for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { | ||
230 | unsigned topidx = p2m_top_index(pfn); | ||
231 | unsigned mididx = p2m_mid_index(pfn); | ||
232 | |||
233 | if (p2m_top[topidx] == p2m_mid_missing) { | ||
234 | unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
235 | p2m_mid_init(mid); | ||
236 | |||
237 | p2m_top[topidx] = mid; | ||
238 | } | ||
239 | |||
240 | p2m_top[topidx][mididx] = &mfn_list[pfn]; | ||
241 | } | ||
242 | |||
243 | m2p_override_init(); | ||
244 | } | ||
245 | |||
246 | unsigned long get_phys_to_machine(unsigned long pfn) | ||
247 | { | ||
248 | unsigned topidx, mididx, idx; | ||
249 | |||
250 | if (unlikely(pfn >= MAX_P2M_PFN)) | ||
251 | return INVALID_P2M_ENTRY; | ||
252 | |||
253 | topidx = p2m_top_index(pfn); | ||
254 | mididx = p2m_mid_index(pfn); | ||
255 | idx = p2m_index(pfn); | ||
256 | |||
257 | return p2m_top[topidx][mididx][idx]; | ||
258 | } | ||
259 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | ||
260 | |||
261 | static void *alloc_p2m_page(void) | ||
262 | { | ||
263 | return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); | ||
264 | } | ||
265 | |||
266 | static void free_p2m_page(void *p) | ||
267 | { | ||
268 | free_page((unsigned long)p); | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * Fully allocate the p2m structure for a given pfn. We need to check | ||
273 | * that both the top and mid levels are allocated, and make sure the | ||
274 | * parallel mfn tree is kept in sync. We may race with other cpus, so | ||
275 | * the new pages are installed with cmpxchg; if we lose the race then | ||
276 | * simply free the page we allocated and use the one that's there. | ||
277 | */ | ||
278 | static bool alloc_p2m(unsigned long pfn) | ||
279 | { | ||
280 | unsigned topidx, mididx; | ||
281 | unsigned long ***top_p, **mid; | ||
282 | unsigned long *top_mfn_p, *mid_mfn; | ||
283 | |||
284 | topidx = p2m_top_index(pfn); | ||
285 | mididx = p2m_mid_index(pfn); | ||
286 | |||
287 | top_p = &p2m_top[topidx]; | ||
288 | mid = *top_p; | ||
289 | |||
290 | if (mid == p2m_mid_missing) { | ||
291 | /* Mid level is missing, allocate a new one */ | ||
292 | mid = alloc_p2m_page(); | ||
293 | if (!mid) | ||
294 | return false; | ||
295 | |||
296 | p2m_mid_init(mid); | ||
297 | |||
298 | if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) | ||
299 | free_p2m_page(mid); | ||
300 | } | ||
301 | |||
302 | top_mfn_p = &p2m_top_mfn[topidx]; | ||
303 | mid_mfn = p2m_top_mfn_p[topidx]; | ||
304 | |||
305 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); | ||
306 | |||
307 | if (mid_mfn == p2m_mid_missing_mfn) { | ||
308 | /* Separately check the mid mfn level */ | ||
309 | unsigned long missing_mfn; | ||
310 | unsigned long mid_mfn_mfn; | ||
311 | |||
312 | mid_mfn = alloc_p2m_page(); | ||
313 | if (!mid_mfn) | ||
314 | return false; | ||
315 | |||
316 | p2m_mid_mfn_init(mid_mfn); | ||
317 | |||
318 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | ||
319 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | ||
320 | if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) | ||
321 | free_p2m_page(mid_mfn); | ||
322 | else | ||
323 | p2m_top_mfn_p[topidx] = mid_mfn; | ||
324 | } | ||
325 | |||
326 | if (p2m_top[topidx][mididx] == p2m_missing) { | ||
327 | /* p2m leaf page is missing */ | ||
328 | unsigned long *p2m; | ||
329 | |||
330 | p2m = alloc_p2m_page(); | ||
331 | if (!p2m) | ||
332 | return false; | ||
333 | |||
334 | p2m_init(p2m); | ||
335 | |||
336 | if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) | ||
337 | free_p2m_page(p2m); | ||
338 | else | ||
339 | mid_mfn[mididx] = virt_to_mfn(p2m); | ||
340 | } | ||
341 | |||
342 | return true; | ||
343 | } | ||
344 | |||
345 | /* Try to install p2m mapping; fail if intermediate bits missing */ | ||
346 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
347 | { | ||
348 | unsigned topidx, mididx, idx; | ||
349 | |||
350 | if (unlikely(pfn >= MAX_P2M_PFN)) { | ||
351 | BUG_ON(mfn != INVALID_P2M_ENTRY); | ||
352 | return true; | ||
353 | } | ||
354 | |||
355 | topidx = p2m_top_index(pfn); | ||
356 | mididx = p2m_mid_index(pfn); | ||
357 | idx = p2m_index(pfn); | ||
358 | |||
359 | if (p2m_top[topidx][mididx] == p2m_missing) | ||
360 | return mfn == INVALID_P2M_ENTRY; | ||
361 | |||
362 | p2m_top[topidx][mididx][idx] = mfn; | ||
363 | |||
364 | return true; | ||
365 | } | ||
366 | |||
367 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
368 | { | ||
369 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
370 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
371 | return true; | ||
372 | } | ||
373 | |||
374 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | ||
375 | if (!alloc_p2m(pfn)) | ||
376 | return false; | ||
377 | |||
378 | if (!__set_phys_to_machine(pfn, mfn)) | ||
379 | return false; | ||
380 | } | ||
381 | |||
382 | return true; | ||
383 | } | ||
384 | |||
385 | #define M2P_OVERRIDE_HASH_SHIFT 10 | ||
386 | #define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT) | ||
387 | |||
388 | static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH); | ||
389 | static DEFINE_SPINLOCK(m2p_override_lock); | ||
390 | |||
391 | static void __init m2p_override_init(void) | ||
392 | { | ||
393 | unsigned i; | ||
394 | |||
395 | m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, | ||
396 | sizeof(unsigned long)); | ||
397 | |||
398 | for (i = 0; i < M2P_OVERRIDE_HASH; i++) | ||
399 | INIT_LIST_HEAD(&m2p_overrides[i]); | ||
400 | } | ||
401 | |||
402 | static unsigned long mfn_hash(unsigned long mfn) | ||
403 | { | ||
404 | return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); | ||
405 | } | ||
406 | |||
407 | /* Add an MFN override for a particular page */ | ||
408 | int m2p_add_override(unsigned long mfn, struct page *page) | ||
409 | { | ||
410 | unsigned long flags; | ||
411 | unsigned long pfn; | ||
412 | unsigned long address; | ||
413 | unsigned level; | ||
414 | pte_t *ptep = NULL; | ||
415 | |||
416 | pfn = page_to_pfn(page); | ||
417 | if (!PageHighMem(page)) { | ||
418 | address = (unsigned long)__va(pfn << PAGE_SHIFT); | ||
419 | ptep = lookup_address(address, &level); | ||
420 | |||
421 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, | ||
422 | "m2p_add_override: pfn %lx not mapped", pfn)) | ||
423 | return -EINVAL; | ||
424 | } | ||
425 | |||
426 | page->private = mfn; | ||
427 | page->index = pfn_to_mfn(pfn); | ||
428 | |||
429 | __set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); | ||
430 | if (!PageHighMem(page)) | ||
431 | /* Just zap old mapping for now */ | ||
432 | pte_clear(&init_mm, address, ptep); | ||
433 | |||
434 | spin_lock_irqsave(&m2p_override_lock, flags); | ||
435 | list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); | ||
436 | spin_unlock_irqrestore(&m2p_override_lock, flags); | ||
437 | |||
438 | return 0; | ||
439 | } | ||
440 | |||
441 | int m2p_remove_override(struct page *page) | ||
442 | { | ||
443 | unsigned long flags; | ||
444 | unsigned long mfn; | ||
445 | unsigned long pfn; | ||
446 | unsigned long address; | ||
447 | unsigned level; | ||
448 | pte_t *ptep = NULL; | ||
449 | |||
450 | pfn = page_to_pfn(page); | ||
451 | mfn = get_phys_to_machine(pfn); | ||
452 | if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) | ||
453 | return -EINVAL; | ||
454 | |||
455 | if (!PageHighMem(page)) { | ||
456 | address = (unsigned long)__va(pfn << PAGE_SHIFT); | ||
457 | ptep = lookup_address(address, &level); | ||
458 | |||
459 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, | ||
460 | "m2p_remove_override: pfn %lx not mapped", pfn)) | ||
461 | return -EINVAL; | ||
462 | } | ||
463 | |||
464 | spin_lock_irqsave(&m2p_override_lock, flags); | ||
465 | list_del(&page->lru); | ||
466 | spin_unlock_irqrestore(&m2p_override_lock, flags); | ||
467 | __set_phys_to_machine(pfn, page->index); | ||
468 | |||
469 | if (!PageHighMem(page)) | ||
470 | set_pte_at(&init_mm, address, ptep, | ||
471 | pfn_pte(pfn, PAGE_KERNEL)); | ||
472 | /* No tlb flush necessary because the caller already | ||
473 | * left the pte unmapped. */ | ||
474 | |||
475 | return 0; | ||
476 | } | ||
477 | |||
478 | struct page *m2p_find_override(unsigned long mfn) | ||
479 | { | ||
480 | unsigned long flags; | ||
481 | struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; | ||
482 | struct page *p, *ret; | ||
483 | |||
484 | ret = NULL; | ||
485 | |||
486 | spin_lock_irqsave(&m2p_override_lock, flags); | ||
487 | |||
488 | list_for_each_entry(p, bucket, lru) { | ||
489 | if (p->private == mfn) { | ||
490 | ret = p; | ||
491 | break; | ||
492 | } | ||
493 | } | ||
494 | |||
495 | spin_unlock_irqrestore(&m2p_override_lock, flags); | ||
496 | |||
497 | return ret; | ||
498 | } | ||
499 | |||
500 | unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) | ||
501 | { | ||
502 | struct page *p = m2p_find_override(mfn); | ||
503 | unsigned long ret = pfn; | ||
504 | |||
505 | if (p) | ||
506 | ret = page_to_pfn(p); | ||
507 | |||
508 | return ret; | ||
509 | } | ||
510 | EXPORT_SYMBOL_GPL(m2p_find_override_pfn); | ||