aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>2010-12-06 19:29:22 -0500
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2011-01-11 14:31:07 -0500
commitb5eafe924bb054d7c56e6ebd18106352e8a3f916 (patch)
treeb7e88c3259df6f5a57d7b04225a053ac435c1a9d
parent8d3eaea24609c7cd6fb0e6471f46a52f9e5d0202 (diff)
xen: move p2m handling to separate file
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-rw-r--r--arch/x86/xen/Makefile3
-rw-r--r--arch/x86/xen/mmu.c365
-rw-r--r--arch/x86/xen/p2m.c376
3 files changed, 378 insertions, 366 deletions
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 779385158915..17c565de3d64 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -12,7 +12,8 @@ CFLAGS_mmu.o := $(nostackp)
12 12
13obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ 13obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
14 time.o xen-asm.o xen-asm_$(BITS).o \ 14 time.o xen-asm.o xen-asm_$(BITS).o \
15 grant-table.o suspend.o platform-pci-unplug.o 15 grant-table.o suspend.o platform-pci-unplug.o \
16 p2m.o
16 17
17obj-$(CONFIG_SMP) += smp.o 18obj-$(CONFIG_SMP) += smp.o
18obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o 19obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 44924e551fde..7575e55cd52e 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -173,371 +173,6 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
173 */ 173 */
174#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) 174#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
175 175
176/*
177 * Xen leaves the responsibility for maintaining p2m mappings to the
178 * guests themselves, but it must also access and update the p2m array
179 * during suspend/resume when all the pages are reallocated.
180 *
181 * The p2m table is logically a flat array, but we implement it as a
182 * three-level tree to allow the address space to be sparse.
183 *
184 * Xen
185 * |
186 * p2m_top p2m_top_mfn
187 * / \ / \
188 * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn
189 * / \ / \ / /
190 * p2m p2m p2m p2m p2m p2m p2m ...
191 *
192 * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
193 *
194 * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the
195 * maximum representable pseudo-physical address space is:
196 * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
197 *
198 * P2M_PER_PAGE depends on the architecture, as a mfn is always
199 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
200 * 512 and 1024 entries respectively.
201 */
202
203unsigned long xen_max_p2m_pfn __read_mostly;
204
205#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
206#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
207#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
208
209#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
210
211/* Placeholders for holes in the address space */
212static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
213static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
214static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE);
215
216static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
217static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
218static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
219
220RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
221RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
222
223static inline unsigned p2m_top_index(unsigned long pfn)
224{
225 BUG_ON(pfn >= MAX_P2M_PFN);
226 return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE);
227}
228
229static inline unsigned p2m_mid_index(unsigned long pfn)
230{
231 return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE;
232}
233
234static inline unsigned p2m_index(unsigned long pfn)
235{
236 return pfn % P2M_PER_PAGE;
237}
238
239static void p2m_top_init(unsigned long ***top)
240{
241 unsigned i;
242
243 for (i = 0; i < P2M_TOP_PER_PAGE; i++)
244 top[i] = p2m_mid_missing;
245}
246
247static void p2m_top_mfn_init(unsigned long *top)
248{
249 unsigned i;
250
251 for (i = 0; i < P2M_TOP_PER_PAGE; i++)
252 top[i] = virt_to_mfn(p2m_mid_missing_mfn);
253}
254
255static void p2m_top_mfn_p_init(unsigned long **top)
256{
257 unsigned i;
258
259 for (i = 0; i < P2M_TOP_PER_PAGE; i++)
260 top[i] = p2m_mid_missing_mfn;
261}
262
263static void p2m_mid_init(unsigned long **mid)
264{
265 unsigned i;
266
267 for (i = 0; i < P2M_MID_PER_PAGE; i++)
268 mid[i] = p2m_missing;
269}
270
271static void p2m_mid_mfn_init(unsigned long *mid)
272{
273 unsigned i;
274
275 for (i = 0; i < P2M_MID_PER_PAGE; i++)
276 mid[i] = virt_to_mfn(p2m_missing);
277}
278
279static void p2m_init(unsigned long *p2m)
280{
281 unsigned i;
282
283 for (i = 0; i < P2M_MID_PER_PAGE; i++)
284 p2m[i] = INVALID_P2M_ENTRY;
285}
286
287/*
288 * Build the parallel p2m_top_mfn and p2m_mid_mfn structures
289 *
290 * This is called both at boot time, and after resuming from suspend:
291 * - At boot time we're called very early, and must use extend_brk()
292 * to allocate memory.
293 *
294 * - After resume we're called from within stop_machine, but the mfn
295 * tree should alreay be completely allocated.
296 */
297void xen_build_mfn_list_list(void)
298{
299 unsigned long pfn;
300
301 /* Pre-initialize p2m_top_mfn to be completely missing */
302 if (p2m_top_mfn == NULL) {
303 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
304 p2m_mid_mfn_init(p2m_mid_missing_mfn);
305
306 p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
307 p2m_top_mfn_p_init(p2m_top_mfn_p);
308
309 p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
310 p2m_top_mfn_init(p2m_top_mfn);
311 } else {
312 /* Reinitialise, mfn's all change after migration */
313 p2m_mid_mfn_init(p2m_mid_missing_mfn);
314 }
315
316 for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
317 unsigned topidx = p2m_top_index(pfn);
318 unsigned mididx = p2m_mid_index(pfn);
319 unsigned long **mid;
320 unsigned long *mid_mfn_p;
321
322 mid = p2m_top[topidx];
323 mid_mfn_p = p2m_top_mfn_p[topidx];
324
325 /* Don't bother allocating any mfn mid levels if
326 * they're just missing, just update the stored mfn,
327 * since all could have changed over a migrate.
328 */
329 if (mid == p2m_mid_missing) {
330 BUG_ON(mididx);
331 BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
332 p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
333 pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE;
334 continue;
335 }
336
337 if (mid_mfn_p == p2m_mid_missing_mfn) {
338 /*
339 * XXX boot-time only! We should never find
340 * missing parts of the mfn tree after
341 * runtime. extend_brk() will BUG if we call
342 * it too late.
343 */
344 mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
345 p2m_mid_mfn_init(mid_mfn_p);
346
347 p2m_top_mfn_p[topidx] = mid_mfn_p;
348 }
349
350 p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
351 mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]);
352 }
353}
354
355void xen_setup_mfn_list_list(void)
356{
357 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
358
359 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
360 virt_to_mfn(p2m_top_mfn);
361 HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn;
362}
363
364/* Set up p2m_top to point to the domain-builder provided p2m pages */
365void __init xen_build_dynamic_phys_to_machine(void)
366{
367 unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
368 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
369 unsigned long pfn;
370
371 xen_max_p2m_pfn = max_pfn;
372
373 p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
374 p2m_init(p2m_missing);
375
376 p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
377 p2m_mid_init(p2m_mid_missing);
378
379 p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
380 p2m_top_init(p2m_top);
381
382 /*
383 * The domain builder gives us a pre-constructed p2m array in
384 * mfn_list for all the pages initially given to us, so we just
385 * need to graft that into our tree structure.
386 */
387 for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) {
388 unsigned topidx = p2m_top_index(pfn);
389 unsigned mididx = p2m_mid_index(pfn);
390
391 if (p2m_top[topidx] == p2m_mid_missing) {
392 unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
393 p2m_mid_init(mid);
394
395 p2m_top[topidx] = mid;
396 }
397
398 p2m_top[topidx][mididx] = &mfn_list[pfn];
399 }
400}
401
402unsigned long get_phys_to_machine(unsigned long pfn)
403{
404 unsigned topidx, mididx, idx;
405
406 if (unlikely(pfn >= MAX_P2M_PFN))
407 return INVALID_P2M_ENTRY;
408
409 topidx = p2m_top_index(pfn);
410 mididx = p2m_mid_index(pfn);
411 idx = p2m_index(pfn);
412
413 return p2m_top[topidx][mididx][idx];
414}
415EXPORT_SYMBOL_GPL(get_phys_to_machine);
416
417static void *alloc_p2m_page(void)
418{
419 return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
420}
421
422static void free_p2m_page(void *p)
423{
424 free_page((unsigned long)p);
425}
426
427/*
428 * Fully allocate the p2m structure for a given pfn. We need to check
429 * that both the top and mid levels are allocated, and make sure the
430 * parallel mfn tree is kept in sync. We may race with other cpus, so
431 * the new pages are installed with cmpxchg; if we lose the race then
432 * simply free the page we allocated and use the one that's there.
433 */
434static bool alloc_p2m(unsigned long pfn)
435{
436 unsigned topidx, mididx;
437 unsigned long ***top_p, **mid;
438 unsigned long *top_mfn_p, *mid_mfn;
439
440 topidx = p2m_top_index(pfn);
441 mididx = p2m_mid_index(pfn);
442
443 top_p = &p2m_top[topidx];
444 mid = *top_p;
445
446 if (mid == p2m_mid_missing) {
447 /* Mid level is missing, allocate a new one */
448 mid = alloc_p2m_page();
449 if (!mid)
450 return false;
451
452 p2m_mid_init(mid);
453
454 if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
455 free_p2m_page(mid);
456 }
457
458 top_mfn_p = &p2m_top_mfn[topidx];
459 mid_mfn = p2m_top_mfn_p[topidx];
460
461 BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
462
463 if (mid_mfn == p2m_mid_missing_mfn) {
464 /* Separately check the mid mfn level */
465 unsigned long missing_mfn;
466 unsigned long mid_mfn_mfn;
467
468 mid_mfn = alloc_p2m_page();
469 if (!mid_mfn)
470 return false;
471
472 p2m_mid_mfn_init(mid_mfn);
473
474 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
475 mid_mfn_mfn = virt_to_mfn(mid_mfn);
476 if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn)
477 free_p2m_page(mid_mfn);
478 else
479 p2m_top_mfn_p[topidx] = mid_mfn;
480 }
481
482 if (p2m_top[topidx][mididx] == p2m_missing) {
483 /* p2m leaf page is missing */
484 unsigned long *p2m;
485
486 p2m = alloc_p2m_page();
487 if (!p2m)
488 return false;
489
490 p2m_init(p2m);
491
492 if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
493 free_p2m_page(p2m);
494 else
495 mid_mfn[mididx] = virt_to_mfn(p2m);
496 }
497
498 return true;
499}
500
501/* Try to install p2m mapping; fail if intermediate bits missing */
502bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
503{
504 unsigned topidx, mididx, idx;
505
506 if (unlikely(pfn >= MAX_P2M_PFN)) {
507 BUG_ON(mfn != INVALID_P2M_ENTRY);
508 return true;
509 }
510
511 topidx = p2m_top_index(pfn);
512 mididx = p2m_mid_index(pfn);
513 idx = p2m_index(pfn);
514
515 if (p2m_top[topidx][mididx] == p2m_missing)
516 return mfn == INVALID_P2M_ENTRY;
517
518 p2m_top[topidx][mididx][idx] = mfn;
519
520 return true;
521}
522
523bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
524{
525 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
526 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
527 return true;
528 }
529
530 if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
531 if (!alloc_p2m(pfn))
532 return false;
533
534 if (!__set_phys_to_machine(pfn, mfn))
535 return false;
536 }
537
538 return true;
539}
540
541unsigned long arbitrary_virt_to_mfn(void *vaddr) 176unsigned long arbitrary_virt_to_mfn(void *vaddr)
542{ 177{
543 xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); 178 xmaddr_t maddr = arbitrary_virt_to_machine(vaddr);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
new file mode 100644
index 000000000000..259ec3bb8b6f
--- /dev/null
+++ b/arch/x86/xen/p2m.c
@@ -0,0 +1,376 @@
1/*
2 * Xen leaves the responsibility for maintaining p2m mappings to the
3 * guests themselves, but it must also access and update the p2m array
4 * during suspend/resume when all the pages are reallocated.
5 *
6 * The p2m table is logically a flat array, but we implement it as a
7 * three-level tree to allow the address space to be sparse.
8 *
9 * Xen
10 * |
11 * p2m_top p2m_top_mfn
12 * / \ / \
13 * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn
14 * / \ / \ / /
15 * p2m p2m p2m p2m p2m p2m p2m ...
16 *
17 * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
18 *
19 * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the
20 * maximum representable pseudo-physical address space is:
21 * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
22 *
23 * P2M_PER_PAGE depends on the architecture, as a mfn is always
24 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
25 * 512 and 1024 entries respectively.
26 */
27
28#include <linux/init.h>
29#include <linux/module.h>
30
31#include <asm/cache.h>
32#include <asm/setup.h>
33
34#include <asm/xen/page.h>
35#include <asm/xen/hypercall.h>
36#include <asm/xen/hypervisor.h>
37
38#include "xen-ops.h"
39
40unsigned long xen_max_p2m_pfn __read_mostly;
41
42#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
43#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
44#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
45
46#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
47
48/* Placeholders for holes in the address space */
49static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
50static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
51static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE);
52
53static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
54static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
55static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
56
57RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
58RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
59
60static inline unsigned p2m_top_index(unsigned long pfn)
61{
62 BUG_ON(pfn >= MAX_P2M_PFN);
63 return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE);
64}
65
66static inline unsigned p2m_mid_index(unsigned long pfn)
67{
68 return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE;
69}
70
71static inline unsigned p2m_index(unsigned long pfn)
72{
73 return pfn % P2M_PER_PAGE;
74}
75
76static void p2m_top_init(unsigned long ***top)
77{
78 unsigned i;
79
80 for (i = 0; i < P2M_TOP_PER_PAGE; i++)
81 top[i] = p2m_mid_missing;
82}
83
84static void p2m_top_mfn_init(unsigned long *top)
85{
86 unsigned i;
87
88 for (i = 0; i < P2M_TOP_PER_PAGE; i++)
89 top[i] = virt_to_mfn(p2m_mid_missing_mfn);
90}
91
92static void p2m_top_mfn_p_init(unsigned long **top)
93{
94 unsigned i;
95
96 for (i = 0; i < P2M_TOP_PER_PAGE; i++)
97 top[i] = p2m_mid_missing_mfn;
98}
99
100static void p2m_mid_init(unsigned long **mid)
101{
102 unsigned i;
103
104 for (i = 0; i < P2M_MID_PER_PAGE; i++)
105 mid[i] = p2m_missing;
106}
107
108static void p2m_mid_mfn_init(unsigned long *mid)
109{
110 unsigned i;
111
112 for (i = 0; i < P2M_MID_PER_PAGE; i++)
113 mid[i] = virt_to_mfn(p2m_missing);
114}
115
116static void p2m_init(unsigned long *p2m)
117{
118 unsigned i;
119
120 for (i = 0; i < P2M_MID_PER_PAGE; i++)
121 p2m[i] = INVALID_P2M_ENTRY;
122}
123
124/*
125 * Build the parallel p2m_top_mfn and p2m_mid_mfn structures
126 *
127 * This is called both at boot time, and after resuming from suspend:
128 * - At boot time we're called very early, and must use extend_brk()
129 * to allocate memory.
130 *
131 * - After resume we're called from within stop_machine, but the mfn
132 * tree should alreay be completely allocated.
133 */
134void xen_build_mfn_list_list(void)
135{
136 unsigned long pfn;
137
138 /* Pre-initialize p2m_top_mfn to be completely missing */
139 if (p2m_top_mfn == NULL) {
140 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
141 p2m_mid_mfn_init(p2m_mid_missing_mfn);
142
143 p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
144 p2m_top_mfn_p_init(p2m_top_mfn_p);
145
146 p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
147 p2m_top_mfn_init(p2m_top_mfn);
148 } else {
149 /* Reinitialise, mfn's all change after migration */
150 p2m_mid_mfn_init(p2m_mid_missing_mfn);
151 }
152
153 for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
154 unsigned topidx = p2m_top_index(pfn);
155 unsigned mididx = p2m_mid_index(pfn);
156 unsigned long **mid;
157 unsigned long *mid_mfn_p;
158
159 mid = p2m_top[topidx];
160 mid_mfn_p = p2m_top_mfn_p[topidx];
161
162 /* Don't bother allocating any mfn mid levels if
163 * they're just missing, just update the stored mfn,
164 * since all could have changed over a migrate.
165 */
166 if (mid == p2m_mid_missing) {
167 BUG_ON(mididx);
168 BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
169 p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
170 pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE;
171 continue;
172 }
173
174 if (mid_mfn_p == p2m_mid_missing_mfn) {
175 /*
176 * XXX boot-time only! We should never find
177 * missing parts of the mfn tree after
178 * runtime. extend_brk() will BUG if we call
179 * it too late.
180 */
181 mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
182 p2m_mid_mfn_init(mid_mfn_p);
183
184 p2m_top_mfn_p[topidx] = mid_mfn_p;
185 }
186
187 p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
188 mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]);
189 }
190}
191
192void xen_setup_mfn_list_list(void)
193{
194 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
195
196 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
197 virt_to_mfn(p2m_top_mfn);
198 HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn;
199}
200
201/* Set up p2m_top to point to the domain-builder provided p2m pages */
202void __init xen_build_dynamic_phys_to_machine(void)
203{
204 unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
205 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
206 unsigned long pfn;
207
208 xen_max_p2m_pfn = max_pfn;
209
210 p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
211 p2m_init(p2m_missing);
212
213 p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE);
214 p2m_mid_init(p2m_mid_missing);
215
216 p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
217 p2m_top_init(p2m_top);
218
219 /*
220 * The domain builder gives us a pre-constructed p2m array in
221 * mfn_list for all the pages initially given to us, so we just
222 * need to graft that into our tree structure.
223 */
224 for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) {
225 unsigned topidx = p2m_top_index(pfn);
226 unsigned mididx = p2m_mid_index(pfn);
227
228 if (p2m_top[topidx] == p2m_mid_missing) {
229 unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
230 p2m_mid_init(mid);
231
232 p2m_top[topidx] = mid;
233 }
234
235 p2m_top[topidx][mididx] = &mfn_list[pfn];
236 }
237}
238
239unsigned long get_phys_to_machine(unsigned long pfn)
240{
241 unsigned topidx, mididx, idx;
242
243 if (unlikely(pfn >= MAX_P2M_PFN))
244 return INVALID_P2M_ENTRY;
245
246 topidx = p2m_top_index(pfn);
247 mididx = p2m_mid_index(pfn);
248 idx = p2m_index(pfn);
249
250 return p2m_top[topidx][mididx][idx];
251}
252EXPORT_SYMBOL_GPL(get_phys_to_machine);
253
254static void *alloc_p2m_page(void)
255{
256 return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
257}
258
259static void free_p2m_page(void *p)
260{
261 free_page((unsigned long)p);
262}
263
264/*
265 * Fully allocate the p2m structure for a given pfn. We need to check
266 * that both the top and mid levels are allocated, and make sure the
267 * parallel mfn tree is kept in sync. We may race with other cpus, so
268 * the new pages are installed with cmpxchg; if we lose the race then
269 * simply free the page we allocated and use the one that's there.
270 */
271static bool alloc_p2m(unsigned long pfn)
272{
273 unsigned topidx, mididx;
274 unsigned long ***top_p, **mid;
275 unsigned long *top_mfn_p, *mid_mfn;
276
277 topidx = p2m_top_index(pfn);
278 mididx = p2m_mid_index(pfn);
279
280 top_p = &p2m_top[topidx];
281 mid = *top_p;
282
283 if (mid == p2m_mid_missing) {
284 /* Mid level is missing, allocate a new one */
285 mid = alloc_p2m_page();
286 if (!mid)
287 return false;
288
289 p2m_mid_init(mid);
290
291 if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing)
292 free_p2m_page(mid);
293 }
294
295 top_mfn_p = &p2m_top_mfn[topidx];
296 mid_mfn = p2m_top_mfn_p[topidx];
297
298 BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
299
300 if (mid_mfn == p2m_mid_missing_mfn) {
301 /* Separately check the mid mfn level */
302 unsigned long missing_mfn;
303 unsigned long mid_mfn_mfn;
304
305 mid_mfn = alloc_p2m_page();
306 if (!mid_mfn)
307 return false;
308
309 p2m_mid_mfn_init(mid_mfn);
310
311 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
312 mid_mfn_mfn = virt_to_mfn(mid_mfn);
313 if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn)
314 free_p2m_page(mid_mfn);
315 else
316 p2m_top_mfn_p[topidx] = mid_mfn;
317 }
318
319 if (p2m_top[topidx][mididx] == p2m_missing) {
320 /* p2m leaf page is missing */
321 unsigned long *p2m;
322
323 p2m = alloc_p2m_page();
324 if (!p2m)
325 return false;
326
327 p2m_init(p2m);
328
329 if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
330 free_p2m_page(p2m);
331 else
332 mid_mfn[mididx] = virt_to_mfn(p2m);
333 }
334
335 return true;
336}
337
338/* Try to install p2m mapping; fail if intermediate bits missing */
339bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
340{
341 unsigned topidx, mididx, idx;
342
343 if (unlikely(pfn >= MAX_P2M_PFN)) {
344 BUG_ON(mfn != INVALID_P2M_ENTRY);
345 return true;
346 }
347
348 topidx = p2m_top_index(pfn);
349 mididx = p2m_mid_index(pfn);
350 idx = p2m_index(pfn);
351
352 if (p2m_top[topidx][mididx] == p2m_missing)
353 return mfn == INVALID_P2M_ENTRY;
354
355 p2m_top[topidx][mididx][idx] = mfn;
356
357 return true;
358}
359
360bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
361{
362 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
363 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
364 return true;
365 }
366
367 if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
368 if (!alloc_p2m(pfn))
369 return false;
370
371 if (!__set_phys_to_machine(pfn, mfn))
372 return false;
373 }
374
375 return true;
376}