diff options
Diffstat (limited to 'arch/x86/xen/p2m.c')
-rw-r--r-- | arch/x86/xen/p2m.c | 510 |
1 files changed, 510 insertions, 0 deletions
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c new file mode 100644 index 000000000000..8f2251d2a3f8 --- /dev/null +++ b/arch/x86/xen/p2m.c | |||
@@ -0,0 +1,510 @@ | |||
1 | /* | ||
2 | * Xen leaves the responsibility for maintaining p2m mappings to the | ||
3 | * guests themselves, but it must also access and update the p2m array | ||
4 | * during suspend/resume when all the pages are reallocated. | ||
5 | * | ||
6 | * The p2m table is logically a flat array, but we implement it as a | ||
7 | * three-level tree to allow the address space to be sparse. | ||
8 | * | ||
9 | * Xen | ||
10 | * | | ||
11 | * p2m_top p2m_top_mfn | ||
12 | * / \ / \ | ||
13 | * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn | ||
14 | * / \ / \ / / | ||
15 | * p2m p2m p2m p2m p2m p2m p2m ... | ||
16 | * | ||
17 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. | ||
18 | * | ||
19 | * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the | ||
20 | * maximum representable pseudo-physical address space is: | ||
21 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages | ||
22 | * | ||
23 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | ||
24 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | ||
25 | * 512 and 1024 entries respectively. | ||
26 | */ | ||
27 | |||
28 | #include <linux/init.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/list.h> | ||
31 | #include <linux/hash.h> | ||
32 | #include <linux/sched.h> | ||
33 | |||
34 | #include <asm/cache.h> | ||
35 | #include <asm/setup.h> | ||
36 | |||
37 | #include <asm/xen/page.h> | ||
38 | #include <asm/xen/hypercall.h> | ||
39 | #include <asm/xen/hypervisor.h> | ||
40 | |||
41 | #include "xen-ops.h" | ||
42 | |||
43 | static void __init m2p_override_init(void); | ||
44 | |||
45 | unsigned long xen_max_p2m_pfn __read_mostly; | ||
46 | |||
47 | #define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
48 | #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) | ||
49 | #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) | ||
50 | |||
51 | #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) | ||
52 | |||
53 | /* Placeholders for holes in the address space */ | ||
54 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); | ||
55 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); | ||
56 | static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE); | ||
57 | |||
58 | static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); | ||
59 | static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); | ||
60 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); | ||
61 | |||
62 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
63 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
64 | |||
65 | static inline unsigned p2m_top_index(unsigned long pfn) | ||
66 | { | ||
67 | BUG_ON(pfn >= MAX_P2M_PFN); | ||
68 | return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); | ||
69 | } | ||
70 | |||
71 | static inline unsigned p2m_mid_index(unsigned long pfn) | ||
72 | { | ||
73 | return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; | ||
74 | } | ||
75 | |||
76 | static inline unsigned p2m_index(unsigned long pfn) | ||
77 | { | ||
78 | return pfn % P2M_PER_PAGE; | ||
79 | } | ||
80 | |||
81 | static void p2m_top_init(unsigned long ***top) | ||
82 | { | ||
83 | unsigned i; | ||
84 | |||
85 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
86 | top[i] = p2m_mid_missing; | ||
87 | } | ||
88 | |||
89 | static void p2m_top_mfn_init(unsigned long *top) | ||
90 | { | ||
91 | unsigned i; | ||
92 | |||
93 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
94 | top[i] = virt_to_mfn(p2m_mid_missing_mfn); | ||
95 | } | ||
96 | |||
97 | static void p2m_top_mfn_p_init(unsigned long **top) | ||
98 | { | ||
99 | unsigned i; | ||
100 | |||
101 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
102 | top[i] = p2m_mid_missing_mfn; | ||
103 | } | ||
104 | |||
105 | static void p2m_mid_init(unsigned long **mid) | ||
106 | { | ||
107 | unsigned i; | ||
108 | |||
109 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
110 | mid[i] = p2m_missing; | ||
111 | } | ||
112 | |||
113 | static void p2m_mid_mfn_init(unsigned long *mid) | ||
114 | { | ||
115 | unsigned i; | ||
116 | |||
117 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
118 | mid[i] = virt_to_mfn(p2m_missing); | ||
119 | } | ||
120 | |||
121 | static void p2m_init(unsigned long *p2m) | ||
122 | { | ||
123 | unsigned i; | ||
124 | |||
125 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
126 | p2m[i] = INVALID_P2M_ENTRY; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures | ||
131 | * | ||
132 | * This is called both at boot time, and after resuming from suspend: | ||
133 | * - At boot time we're called very early, and must use extend_brk() | ||
134 | * to allocate memory. | ||
135 | * | ||
136 | * - After resume we're called from within stop_machine, but the mfn | ||
137 | * tree should alreay be completely allocated. | ||
138 | */ | ||
139 | void xen_build_mfn_list_list(void) | ||
140 | { | ||
141 | unsigned long pfn; | ||
142 | |||
143 | /* Pre-initialize p2m_top_mfn to be completely missing */ | ||
144 | if (p2m_top_mfn == NULL) { | ||
145 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
146 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
147 | |||
148 | p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
149 | p2m_top_mfn_p_init(p2m_top_mfn_p); | ||
150 | |||
151 | p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
152 | p2m_top_mfn_init(p2m_top_mfn); | ||
153 | } else { | ||
154 | /* Reinitialise, mfn's all change after migration */ | ||
155 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
156 | } | ||
157 | |||
158 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { | ||
159 | unsigned topidx = p2m_top_index(pfn); | ||
160 | unsigned mididx = p2m_mid_index(pfn); | ||
161 | unsigned long **mid; | ||
162 | unsigned long *mid_mfn_p; | ||
163 | |||
164 | mid = p2m_top[topidx]; | ||
165 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
166 | |||
167 | /* Don't bother allocating any mfn mid levels if | ||
168 | * they're just missing, just update the stored mfn, | ||
169 | * since all could have changed over a migrate. | ||
170 | */ | ||
171 | if (mid == p2m_mid_missing) { | ||
172 | BUG_ON(mididx); | ||
173 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
174 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); | ||
175 | pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; | ||
176 | continue; | ||
177 | } | ||
178 | |||
179 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
180 | /* | ||
181 | * XXX boot-time only! We should never find | ||
182 | * missing parts of the mfn tree after | ||
183 | * runtime. extend_brk() will BUG if we call | ||
184 | * it too late. | ||
185 | */ | ||
186 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
187 | p2m_mid_mfn_init(mid_mfn_p); | ||
188 | |||
189 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
190 | } | ||
191 | |||
192 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
193 | mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); | ||
194 | } | ||
195 | } | ||
196 | |||
197 | void xen_setup_mfn_list_list(void) | ||
198 | { | ||
199 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | ||
200 | |||
201 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
202 | virt_to_mfn(p2m_top_mfn); | ||
203 | HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; | ||
204 | } | ||
205 | |||
206 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | ||
207 | void __init xen_build_dynamic_phys_to_machine(void) | ||
208 | { | ||
209 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
210 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
211 | unsigned long pfn; | ||
212 | |||
213 | xen_max_p2m_pfn = max_pfn; | ||
214 | |||
215 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
216 | p2m_init(p2m_missing); | ||
217 | |||
218 | p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
219 | p2m_mid_init(p2m_mid_missing); | ||
220 | |||
221 | p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
222 | p2m_top_init(p2m_top); | ||
223 | |||
224 | /* | ||
225 | * The domain builder gives us a pre-constructed p2m array in | ||
226 | * mfn_list for all the pages initially given to us, so we just | ||
227 | * need to graft that into our tree structure. | ||
228 | */ | ||
229 | for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { | ||
230 | unsigned topidx = p2m_top_index(pfn); | ||
231 | unsigned mididx = p2m_mid_index(pfn); | ||
232 | |||
233 | if (p2m_top[topidx] == p2m_mid_missing) { | ||
234 | unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
235 | p2m_mid_init(mid); | ||
236 | |||
237 | p2m_top[topidx] = mid; | ||
238 | } | ||
239 | |||
240 | p2m_top[topidx][mididx] = &mfn_list[pfn]; | ||
241 | } | ||
242 | |||
243 | m2p_override_init(); | ||
244 | } | ||
245 | |||
246 | unsigned long get_phys_to_machine(unsigned long pfn) | ||
247 | { | ||
248 | unsigned topidx, mididx, idx; | ||
249 | |||
250 | if (unlikely(pfn >= MAX_P2M_PFN)) | ||
251 | return INVALID_P2M_ENTRY; | ||
252 | |||
253 | topidx = p2m_top_index(pfn); | ||
254 | mididx = p2m_mid_index(pfn); | ||
255 | idx = p2m_index(pfn); | ||
256 | |||
257 | return p2m_top[topidx][mididx][idx]; | ||
258 | } | ||
259 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | ||
260 | |||
261 | static void *alloc_p2m_page(void) | ||
262 | { | ||
263 | return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); | ||
264 | } | ||
265 | |||
266 | static void free_p2m_page(void *p) | ||
267 | { | ||
268 | free_page((unsigned long)p); | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * Fully allocate the p2m structure for a given pfn. We need to check | ||
273 | * that both the top and mid levels are allocated, and make sure the | ||
274 | * parallel mfn tree is kept in sync. We may race with other cpus, so | ||
275 | * the new pages are installed with cmpxchg; if we lose the race then | ||
276 | * simply free the page we allocated and use the one that's there. | ||
277 | */ | ||
278 | static bool alloc_p2m(unsigned long pfn) | ||
279 | { | ||
280 | unsigned topidx, mididx; | ||
281 | unsigned long ***top_p, **mid; | ||
282 | unsigned long *top_mfn_p, *mid_mfn; | ||
283 | |||
284 | topidx = p2m_top_index(pfn); | ||
285 | mididx = p2m_mid_index(pfn); | ||
286 | |||
287 | top_p = &p2m_top[topidx]; | ||
288 | mid = *top_p; | ||
289 | |||
290 | if (mid == p2m_mid_missing) { | ||
291 | /* Mid level is missing, allocate a new one */ | ||
292 | mid = alloc_p2m_page(); | ||
293 | if (!mid) | ||
294 | return false; | ||
295 | |||
296 | p2m_mid_init(mid); | ||
297 | |||
298 | if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) | ||
299 | free_p2m_page(mid); | ||
300 | } | ||
301 | |||
302 | top_mfn_p = &p2m_top_mfn[topidx]; | ||
303 | mid_mfn = p2m_top_mfn_p[topidx]; | ||
304 | |||
305 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); | ||
306 | |||
307 | if (mid_mfn == p2m_mid_missing_mfn) { | ||
308 | /* Separately check the mid mfn level */ | ||
309 | unsigned long missing_mfn; | ||
310 | unsigned long mid_mfn_mfn; | ||
311 | |||
312 | mid_mfn = alloc_p2m_page(); | ||
313 | if (!mid_mfn) | ||
314 | return false; | ||
315 | |||
316 | p2m_mid_mfn_init(mid_mfn); | ||
317 | |||
318 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | ||
319 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | ||
320 | if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) | ||
321 | free_p2m_page(mid_mfn); | ||
322 | else | ||
323 | p2m_top_mfn_p[topidx] = mid_mfn; | ||
324 | } | ||
325 | |||
326 | if (p2m_top[topidx][mididx] == p2m_missing) { | ||
327 | /* p2m leaf page is missing */ | ||
328 | unsigned long *p2m; | ||
329 | |||
330 | p2m = alloc_p2m_page(); | ||
331 | if (!p2m) | ||
332 | return false; | ||
333 | |||
334 | p2m_init(p2m); | ||
335 | |||
336 | if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) | ||
337 | free_p2m_page(p2m); | ||
338 | else | ||
339 | mid_mfn[mididx] = virt_to_mfn(p2m); | ||
340 | } | ||
341 | |||
342 | return true; | ||
343 | } | ||
344 | |||
345 | /* Try to install p2m mapping; fail if intermediate bits missing */ | ||
346 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
347 | { | ||
348 | unsigned topidx, mididx, idx; | ||
349 | |||
350 | if (unlikely(pfn >= MAX_P2M_PFN)) { | ||
351 | BUG_ON(mfn != INVALID_P2M_ENTRY); | ||
352 | return true; | ||
353 | } | ||
354 | |||
355 | topidx = p2m_top_index(pfn); | ||
356 | mididx = p2m_mid_index(pfn); | ||
357 | idx = p2m_index(pfn); | ||
358 | |||
359 | if (p2m_top[topidx][mididx] == p2m_missing) | ||
360 | return mfn == INVALID_P2M_ENTRY; | ||
361 | |||
362 | p2m_top[topidx][mididx][idx] = mfn; | ||
363 | |||
364 | return true; | ||
365 | } | ||
366 | |||
367 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
368 | { | ||
369 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
370 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
371 | return true; | ||
372 | } | ||
373 | |||
374 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | ||
375 | if (!alloc_p2m(pfn)) | ||
376 | return false; | ||
377 | |||
378 | if (!__set_phys_to_machine(pfn, mfn)) | ||
379 | return false; | ||
380 | } | ||
381 | |||
382 | return true; | ||
383 | } | ||
384 | |||
385 | #define M2P_OVERRIDE_HASH_SHIFT 10 | ||
386 | #define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT) | ||
387 | |||
388 | static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH); | ||
389 | static DEFINE_SPINLOCK(m2p_override_lock); | ||
390 | |||
391 | static void __init m2p_override_init(void) | ||
392 | { | ||
393 | unsigned i; | ||
394 | |||
395 | m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, | ||
396 | sizeof(unsigned long)); | ||
397 | |||
398 | for (i = 0; i < M2P_OVERRIDE_HASH; i++) | ||
399 | INIT_LIST_HEAD(&m2p_overrides[i]); | ||
400 | } | ||
401 | |||
402 | static unsigned long mfn_hash(unsigned long mfn) | ||
403 | { | ||
404 | return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); | ||
405 | } | ||
406 | |||
407 | /* Add an MFN override for a particular page */ | ||
408 | int m2p_add_override(unsigned long mfn, struct page *page) | ||
409 | { | ||
410 | unsigned long flags; | ||
411 | unsigned long pfn; | ||
412 | unsigned long address; | ||
413 | unsigned level; | ||
414 | pte_t *ptep = NULL; | ||
415 | |||
416 | pfn = page_to_pfn(page); | ||
417 | if (!PageHighMem(page)) { | ||
418 | address = (unsigned long)__va(pfn << PAGE_SHIFT); | ||
419 | ptep = lookup_address(address, &level); | ||
420 | |||
421 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, | ||
422 | "m2p_add_override: pfn %lx not mapped", pfn)) | ||
423 | return -EINVAL; | ||
424 | } | ||
425 | |||
426 | page->private = mfn; | ||
427 | page->index = pfn_to_mfn(pfn); | ||
428 | |||
429 | __set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); | ||
430 | if (!PageHighMem(page)) | ||
431 | /* Just zap old mapping for now */ | ||
432 | pte_clear(&init_mm, address, ptep); | ||
433 | |||
434 | spin_lock_irqsave(&m2p_override_lock, flags); | ||
435 | list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); | ||
436 | spin_unlock_irqrestore(&m2p_override_lock, flags); | ||
437 | |||
438 | return 0; | ||
439 | } | ||
440 | |||
441 | int m2p_remove_override(struct page *page) | ||
442 | { | ||
443 | unsigned long flags; | ||
444 | unsigned long mfn; | ||
445 | unsigned long pfn; | ||
446 | unsigned long address; | ||
447 | unsigned level; | ||
448 | pte_t *ptep = NULL; | ||
449 | |||
450 | pfn = page_to_pfn(page); | ||
451 | mfn = get_phys_to_machine(pfn); | ||
452 | if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) | ||
453 | return -EINVAL; | ||
454 | |||
455 | if (!PageHighMem(page)) { | ||
456 | address = (unsigned long)__va(pfn << PAGE_SHIFT); | ||
457 | ptep = lookup_address(address, &level); | ||
458 | |||
459 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, | ||
460 | "m2p_remove_override: pfn %lx not mapped", pfn)) | ||
461 | return -EINVAL; | ||
462 | } | ||
463 | |||
464 | spin_lock_irqsave(&m2p_override_lock, flags); | ||
465 | list_del(&page->lru); | ||
466 | spin_unlock_irqrestore(&m2p_override_lock, flags); | ||
467 | __set_phys_to_machine(pfn, page->index); | ||
468 | |||
469 | if (!PageHighMem(page)) | ||
470 | set_pte_at(&init_mm, address, ptep, | ||
471 | pfn_pte(pfn, PAGE_KERNEL)); | ||
472 | /* No tlb flush necessary because the caller already | ||
473 | * left the pte unmapped. */ | ||
474 | |||
475 | return 0; | ||
476 | } | ||
477 | |||
478 | struct page *m2p_find_override(unsigned long mfn) | ||
479 | { | ||
480 | unsigned long flags; | ||
481 | struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; | ||
482 | struct page *p, *ret; | ||
483 | |||
484 | ret = NULL; | ||
485 | |||
486 | spin_lock_irqsave(&m2p_override_lock, flags); | ||
487 | |||
488 | list_for_each_entry(p, bucket, lru) { | ||
489 | if (p->private == mfn) { | ||
490 | ret = p; | ||
491 | break; | ||
492 | } | ||
493 | } | ||
494 | |||
495 | spin_unlock_irqrestore(&m2p_override_lock, flags); | ||
496 | |||
497 | return ret; | ||
498 | } | ||
499 | |||
500 | unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) | ||
501 | { | ||
502 | struct page *p = m2p_find_override(mfn); | ||
503 | unsigned long ret = pfn; | ||
504 | |||
505 | if (p) | ||
506 | ret = page_to_pfn(p); | ||
507 | |||
508 | return ret; | ||
509 | } | ||
510 | EXPORT_SYMBOL_GPL(m2p_find_override_pfn); | ||