diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 21:46:48 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 21:46:48 -0500 |
commit | 9c4bc1c2befbbdce4b9fd526e67a7a2ea143ffa2 (patch) | |
tree | 1b08c1bb00ce477ca947461777cd7e24de72c148 | |
parent | 2c0076d8c7eb1dafa03d0a792444862080b34106 (diff) | |
parent | e1b478e4ec4477520767d1a920433626263a2a6b (diff) |
Merge branch 'stable/gntdev' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen
* 'stable/gntdev' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
xen/p2m: Fix module linking error.
xen p2m: clear the old pte when adding a page to m2p_override
xen gntdev: use gnttab_map_refs and gnttab_unmap_refs
xen: introduce gnttab_map_refs and gnttab_unmap_refs
xen p2m: transparently change the p2m mappings in the m2p override
xen/gntdev: Fix circular locking dependency
xen/gntdev: stop using "token" argument
xen: gntdev: move use of GNTMAP_contains_pte next to the map_op
xen: add m2p override mechanism
xen: move p2m handling to separate file
xen/gntdev: add VM_PFNMAP to vma
xen/gntdev: allow usermode to map granted pages
xen: define gnttab_set_map_op/unmap_op
Fix up trivial conflict in drivers/xen/Kconfig
-rw-r--r-- | arch/x86/include/asm/xen/page.h | 16 | ||||
-rw-r--r-- | arch/x86/xen/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 365 | ||||
-rw-r--r-- | arch/x86/xen/p2m.c | 510 | ||||
-rw-r--r-- | drivers/xen/Kconfig | 9 | ||||
-rw-r--r-- | drivers/xen/Makefile | 2 | ||||
-rw-r--r-- | drivers/xen/gntdev.c | 665 | ||||
-rw-r--r-- | drivers/xen/grant-table.c | 46 | ||||
-rw-r--r-- | include/xen/gntdev.h | 119 | ||||
-rw-r--r-- | include/xen/grant_table.h | 44 |
10 files changed, 1408 insertions, 371 deletions
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 8760cc60a21c..f25bdf238a33 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -42,6 +42,11 @@ extern unsigned int machine_to_phys_order; | |||
42 | extern unsigned long get_phys_to_machine(unsigned long pfn); | 42 | extern unsigned long get_phys_to_machine(unsigned long pfn); |
43 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 43 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
44 | 44 | ||
45 | extern int m2p_add_override(unsigned long mfn, struct page *page); | ||
46 | extern int m2p_remove_override(struct page *page); | ||
47 | extern struct page *m2p_find_override(unsigned long mfn); | ||
48 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); | ||
49 | |||
45 | static inline unsigned long pfn_to_mfn(unsigned long pfn) | 50 | static inline unsigned long pfn_to_mfn(unsigned long pfn) |
46 | { | 51 | { |
47 | unsigned long mfn; | 52 | unsigned long mfn; |
@@ -72,9 +77,6 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) | |||
72 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 77 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
73 | return mfn; | 78 | return mfn; |
74 | 79 | ||
75 | if (unlikely((mfn >> machine_to_phys_order) != 0)) | ||
76 | return ~0; | ||
77 | |||
78 | pfn = 0; | 80 | pfn = 0; |
79 | /* | 81 | /* |
80 | * The array access can fail (e.g., device space beyond end of RAM). | 82 | * The array access can fail (e.g., device space beyond end of RAM). |
@@ -83,6 +85,14 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) | |||
83 | */ | 85 | */ |
84 | __get_user(pfn, &machine_to_phys_mapping[mfn]); | 86 | __get_user(pfn, &machine_to_phys_mapping[mfn]); |
85 | 87 | ||
88 | /* | ||
89 | * If this appears to be a foreign mfn (because the pfn | ||
90 | * doesn't map back to the mfn), then check the local override | ||
91 | * table to see if there's a better pfn to use. | ||
92 | */ | ||
93 | if (get_phys_to_machine(pfn) != mfn) | ||
94 | pfn = m2p_find_override_pfn(mfn, pfn); | ||
95 | |||
86 | return pfn; | 96 | return pfn; |
87 | } | 97 | } |
88 | 98 | ||
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 779385158915..17c565de3d64 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -12,7 +12,8 @@ CFLAGS_mmu.o := $(nostackp) | |||
12 | 12 | ||
13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ | 13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ |
14 | time.o xen-asm.o xen-asm_$(BITS).o \ | 14 | time.o xen-asm.o xen-asm_$(BITS).o \ |
15 | grant-table.o suspend.o platform-pci-unplug.o | 15 | grant-table.o suspend.o platform-pci-unplug.o \ |
16 | p2m.o | ||
16 | 17 | ||
17 | obj-$(CONFIG_SMP) += smp.o | 18 | obj-$(CONFIG_SMP) += smp.o |
18 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o | 19 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 44924e551fde..7575e55cd52e 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -173,371 +173,6 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ | |||
173 | */ | 173 | */ |
174 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) | 174 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) |
175 | 175 | ||
176 | /* | ||
177 | * Xen leaves the responsibility for maintaining p2m mappings to the | ||
178 | * guests themselves, but it must also access and update the p2m array | ||
179 | * during suspend/resume when all the pages are reallocated. | ||
180 | * | ||
181 | * The p2m table is logically a flat array, but we implement it as a | ||
182 | * three-level tree to allow the address space to be sparse. | ||
183 | * | ||
184 | * Xen | ||
185 | * | | ||
186 | * p2m_top p2m_top_mfn | ||
187 | * / \ / \ | ||
188 | * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn | ||
189 | * / \ / \ / / | ||
190 | * p2m p2m p2m p2m p2m p2m p2m ... | ||
191 | * | ||
192 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. | ||
193 | * | ||
194 | * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the | ||
195 | * maximum representable pseudo-physical address space is: | ||
196 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages | ||
197 | * | ||
198 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | ||
199 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | ||
200 | * 512 and 1024 entries respectively. | ||
201 | */ | ||
202 | |||
203 | unsigned long xen_max_p2m_pfn __read_mostly; | ||
204 | |||
205 | #define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
206 | #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) | ||
207 | #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) | ||
208 | |||
209 | #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) | ||
210 | |||
211 | /* Placeholders for holes in the address space */ | ||
212 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); | ||
213 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); | ||
214 | static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE); | ||
215 | |||
216 | static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); | ||
217 | static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); | ||
218 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); | ||
219 | |||
220 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
221 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
222 | |||
223 | static inline unsigned p2m_top_index(unsigned long pfn) | ||
224 | { | ||
225 | BUG_ON(pfn >= MAX_P2M_PFN); | ||
226 | return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); | ||
227 | } | ||
228 | |||
229 | static inline unsigned p2m_mid_index(unsigned long pfn) | ||
230 | { | ||
231 | return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; | ||
232 | } | ||
233 | |||
234 | static inline unsigned p2m_index(unsigned long pfn) | ||
235 | { | ||
236 | return pfn % P2M_PER_PAGE; | ||
237 | } | ||
238 | |||
239 | static void p2m_top_init(unsigned long ***top) | ||
240 | { | ||
241 | unsigned i; | ||
242 | |||
243 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
244 | top[i] = p2m_mid_missing; | ||
245 | } | ||
246 | |||
247 | static void p2m_top_mfn_init(unsigned long *top) | ||
248 | { | ||
249 | unsigned i; | ||
250 | |||
251 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
252 | top[i] = virt_to_mfn(p2m_mid_missing_mfn); | ||
253 | } | ||
254 | |||
255 | static void p2m_top_mfn_p_init(unsigned long **top) | ||
256 | { | ||
257 | unsigned i; | ||
258 | |||
259 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
260 | top[i] = p2m_mid_missing_mfn; | ||
261 | } | ||
262 | |||
263 | static void p2m_mid_init(unsigned long **mid) | ||
264 | { | ||
265 | unsigned i; | ||
266 | |||
267 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
268 | mid[i] = p2m_missing; | ||
269 | } | ||
270 | |||
271 | static void p2m_mid_mfn_init(unsigned long *mid) | ||
272 | { | ||
273 | unsigned i; | ||
274 | |||
275 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
276 | mid[i] = virt_to_mfn(p2m_missing); | ||
277 | } | ||
278 | |||
279 | static void p2m_init(unsigned long *p2m) | ||
280 | { | ||
281 | unsigned i; | ||
282 | |||
283 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
284 | p2m[i] = INVALID_P2M_ENTRY; | ||
285 | } | ||
286 | |||
287 | /* | ||
288 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures | ||
289 | * | ||
290 | * This is called both at boot time, and after resuming from suspend: | ||
291 | * - At boot time we're called very early, and must use extend_brk() | ||
292 | * to allocate memory. | ||
293 | * | ||
294 | * - After resume we're called from within stop_machine, but the mfn | ||
295 | * tree should alreay be completely allocated. | ||
296 | */ | ||
297 | void xen_build_mfn_list_list(void) | ||
298 | { | ||
299 | unsigned long pfn; | ||
300 | |||
301 | /* Pre-initialize p2m_top_mfn to be completely missing */ | ||
302 | if (p2m_top_mfn == NULL) { | ||
303 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
304 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
305 | |||
306 | p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
307 | p2m_top_mfn_p_init(p2m_top_mfn_p); | ||
308 | |||
309 | p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
310 | p2m_top_mfn_init(p2m_top_mfn); | ||
311 | } else { | ||
312 | /* Reinitialise, mfn's all change after migration */ | ||
313 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
314 | } | ||
315 | |||
316 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { | ||
317 | unsigned topidx = p2m_top_index(pfn); | ||
318 | unsigned mididx = p2m_mid_index(pfn); | ||
319 | unsigned long **mid; | ||
320 | unsigned long *mid_mfn_p; | ||
321 | |||
322 | mid = p2m_top[topidx]; | ||
323 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
324 | |||
325 | /* Don't bother allocating any mfn mid levels if | ||
326 | * they're just missing, just update the stored mfn, | ||
327 | * since all could have changed over a migrate. | ||
328 | */ | ||
329 | if (mid == p2m_mid_missing) { | ||
330 | BUG_ON(mididx); | ||
331 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
332 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); | ||
333 | pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; | ||
334 | continue; | ||
335 | } | ||
336 | |||
337 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
338 | /* | ||
339 | * XXX boot-time only! We should never find | ||
340 | * missing parts of the mfn tree after | ||
341 | * runtime. extend_brk() will BUG if we call | ||
342 | * it too late. | ||
343 | */ | ||
344 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
345 | p2m_mid_mfn_init(mid_mfn_p); | ||
346 | |||
347 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
348 | } | ||
349 | |||
350 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
351 | mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); | ||
352 | } | ||
353 | } | ||
354 | |||
355 | void xen_setup_mfn_list_list(void) | ||
356 | { | ||
357 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | ||
358 | |||
359 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
360 | virt_to_mfn(p2m_top_mfn); | ||
361 | HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; | ||
362 | } | ||
363 | |||
364 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | ||
365 | void __init xen_build_dynamic_phys_to_machine(void) | ||
366 | { | ||
367 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
368 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
369 | unsigned long pfn; | ||
370 | |||
371 | xen_max_p2m_pfn = max_pfn; | ||
372 | |||
373 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
374 | p2m_init(p2m_missing); | ||
375 | |||
376 | p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
377 | p2m_mid_init(p2m_mid_missing); | ||
378 | |||
379 | p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
380 | p2m_top_init(p2m_top); | ||
381 | |||
382 | /* | ||
383 | * The domain builder gives us a pre-constructed p2m array in | ||
384 | * mfn_list for all the pages initially given to us, so we just | ||
385 | * need to graft that into our tree structure. | ||
386 | */ | ||
387 | for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { | ||
388 | unsigned topidx = p2m_top_index(pfn); | ||
389 | unsigned mididx = p2m_mid_index(pfn); | ||
390 | |||
391 | if (p2m_top[topidx] == p2m_mid_missing) { | ||
392 | unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
393 | p2m_mid_init(mid); | ||
394 | |||
395 | p2m_top[topidx] = mid; | ||
396 | } | ||
397 | |||
398 | p2m_top[topidx][mididx] = &mfn_list[pfn]; | ||
399 | } | ||
400 | } | ||
401 | |||
402 | unsigned long get_phys_to_machine(unsigned long pfn) | ||
403 | { | ||
404 | unsigned topidx, mididx, idx; | ||
405 | |||
406 | if (unlikely(pfn >= MAX_P2M_PFN)) | ||
407 | return INVALID_P2M_ENTRY; | ||
408 | |||
409 | topidx = p2m_top_index(pfn); | ||
410 | mididx = p2m_mid_index(pfn); | ||
411 | idx = p2m_index(pfn); | ||
412 | |||
413 | return p2m_top[topidx][mididx][idx]; | ||
414 | } | ||
415 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | ||
416 | |||
417 | static void *alloc_p2m_page(void) | ||
418 | { | ||
419 | return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); | ||
420 | } | ||
421 | |||
422 | static void free_p2m_page(void *p) | ||
423 | { | ||
424 | free_page((unsigned long)p); | ||
425 | } | ||
426 | |||
427 | /* | ||
428 | * Fully allocate the p2m structure for a given pfn. We need to check | ||
429 | * that both the top and mid levels are allocated, and make sure the | ||
430 | * parallel mfn tree is kept in sync. We may race with other cpus, so | ||
431 | * the new pages are installed with cmpxchg; if we lose the race then | ||
432 | * simply free the page we allocated and use the one that's there. | ||
433 | */ | ||
434 | static bool alloc_p2m(unsigned long pfn) | ||
435 | { | ||
436 | unsigned topidx, mididx; | ||
437 | unsigned long ***top_p, **mid; | ||
438 | unsigned long *top_mfn_p, *mid_mfn; | ||
439 | |||
440 | topidx = p2m_top_index(pfn); | ||
441 | mididx = p2m_mid_index(pfn); | ||
442 | |||
443 | top_p = &p2m_top[topidx]; | ||
444 | mid = *top_p; | ||
445 | |||
446 | if (mid == p2m_mid_missing) { | ||
447 | /* Mid level is missing, allocate a new one */ | ||
448 | mid = alloc_p2m_page(); | ||
449 | if (!mid) | ||
450 | return false; | ||
451 | |||
452 | p2m_mid_init(mid); | ||
453 | |||
454 | if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) | ||
455 | free_p2m_page(mid); | ||
456 | } | ||
457 | |||
458 | top_mfn_p = &p2m_top_mfn[topidx]; | ||
459 | mid_mfn = p2m_top_mfn_p[topidx]; | ||
460 | |||
461 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); | ||
462 | |||
463 | if (mid_mfn == p2m_mid_missing_mfn) { | ||
464 | /* Separately check the mid mfn level */ | ||
465 | unsigned long missing_mfn; | ||
466 | unsigned long mid_mfn_mfn; | ||
467 | |||
468 | mid_mfn = alloc_p2m_page(); | ||
469 | if (!mid_mfn) | ||
470 | return false; | ||
471 | |||
472 | p2m_mid_mfn_init(mid_mfn); | ||
473 | |||
474 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | ||
475 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | ||
476 | if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) | ||
477 | free_p2m_page(mid_mfn); | ||
478 | else | ||
479 | p2m_top_mfn_p[topidx] = mid_mfn; | ||
480 | } | ||
481 | |||
482 | if (p2m_top[topidx][mididx] == p2m_missing) { | ||
483 | /* p2m leaf page is missing */ | ||
484 | unsigned long *p2m; | ||
485 | |||
486 | p2m = alloc_p2m_page(); | ||
487 | if (!p2m) | ||
488 | return false; | ||
489 | |||
490 | p2m_init(p2m); | ||
491 | |||
492 | if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) | ||
493 | free_p2m_page(p2m); | ||
494 | else | ||
495 | mid_mfn[mididx] = virt_to_mfn(p2m); | ||
496 | } | ||
497 | |||
498 | return true; | ||
499 | } | ||
500 | |||
501 | /* Try to install p2m mapping; fail if intermediate bits missing */ | ||
502 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
503 | { | ||
504 | unsigned topidx, mididx, idx; | ||
505 | |||
506 | if (unlikely(pfn >= MAX_P2M_PFN)) { | ||
507 | BUG_ON(mfn != INVALID_P2M_ENTRY); | ||
508 | return true; | ||
509 | } | ||
510 | |||
511 | topidx = p2m_top_index(pfn); | ||
512 | mididx = p2m_mid_index(pfn); | ||
513 | idx = p2m_index(pfn); | ||
514 | |||
515 | if (p2m_top[topidx][mididx] == p2m_missing) | ||
516 | return mfn == INVALID_P2M_ENTRY; | ||
517 | |||
518 | p2m_top[topidx][mididx][idx] = mfn; | ||
519 | |||
520 | return true; | ||
521 | } | ||
522 | |||
523 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
524 | { | ||
525 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
526 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
527 | return true; | ||
528 | } | ||
529 | |||
530 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | ||
531 | if (!alloc_p2m(pfn)) | ||
532 | return false; | ||
533 | |||
534 | if (!__set_phys_to_machine(pfn, mfn)) | ||
535 | return false; | ||
536 | } | ||
537 | |||
538 | return true; | ||
539 | } | ||
540 | |||
541 | unsigned long arbitrary_virt_to_mfn(void *vaddr) | 176 | unsigned long arbitrary_virt_to_mfn(void *vaddr) |
542 | { | 177 | { |
543 | xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); | 178 | xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c new file mode 100644 index 000000000000..8f2251d2a3f8 --- /dev/null +++ b/arch/x86/xen/p2m.c | |||
@@ -0,0 +1,510 @@ | |||
1 | /* | ||
2 | * Xen leaves the responsibility for maintaining p2m mappings to the | ||
3 | * guests themselves, but it must also access and update the p2m array | ||
4 | * during suspend/resume when all the pages are reallocated. | ||
5 | * | ||
6 | * The p2m table is logically a flat array, but we implement it as a | ||
7 | * three-level tree to allow the address space to be sparse. | ||
8 | * | ||
9 | * Xen | ||
10 | * | | ||
11 | * p2m_top p2m_top_mfn | ||
12 | * / \ / \ | ||
13 | * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn | ||
14 | * / \ / \ / / | ||
15 | * p2m p2m p2m p2m p2m p2m p2m ... | ||
16 | * | ||
17 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. | ||
18 | * | ||
19 | * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the | ||
20 | * maximum representable pseudo-physical address space is: | ||
21 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages | ||
22 | * | ||
23 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | ||
24 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | ||
25 | * 512 and 1024 entries respectively. | ||
26 | */ | ||
27 | |||
28 | #include <linux/init.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/list.h> | ||
31 | #include <linux/hash.h> | ||
32 | #include <linux/sched.h> | ||
33 | |||
34 | #include <asm/cache.h> | ||
35 | #include <asm/setup.h> | ||
36 | |||
37 | #include <asm/xen/page.h> | ||
38 | #include <asm/xen/hypercall.h> | ||
39 | #include <asm/xen/hypervisor.h> | ||
40 | |||
41 | #include "xen-ops.h" | ||
42 | |||
43 | static void __init m2p_override_init(void); | ||
44 | |||
45 | unsigned long xen_max_p2m_pfn __read_mostly; | ||
46 | |||
47 | #define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
48 | #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) | ||
49 | #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) | ||
50 | |||
51 | #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) | ||
52 | |||
53 | /* Placeholders for holes in the address space */ | ||
54 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); | ||
55 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); | ||
56 | static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE); | ||
57 | |||
58 | static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); | ||
59 | static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); | ||
60 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); | ||
61 | |||
62 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
63 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
64 | |||
65 | static inline unsigned p2m_top_index(unsigned long pfn) | ||
66 | { | ||
67 | BUG_ON(pfn >= MAX_P2M_PFN); | ||
68 | return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); | ||
69 | } | ||
70 | |||
71 | static inline unsigned p2m_mid_index(unsigned long pfn) | ||
72 | { | ||
73 | return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; | ||
74 | } | ||
75 | |||
76 | static inline unsigned p2m_index(unsigned long pfn) | ||
77 | { | ||
78 | return pfn % P2M_PER_PAGE; | ||
79 | } | ||
80 | |||
81 | static void p2m_top_init(unsigned long ***top) | ||
82 | { | ||
83 | unsigned i; | ||
84 | |||
85 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
86 | top[i] = p2m_mid_missing; | ||
87 | } | ||
88 | |||
89 | static void p2m_top_mfn_init(unsigned long *top) | ||
90 | { | ||
91 | unsigned i; | ||
92 | |||
93 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
94 | top[i] = virt_to_mfn(p2m_mid_missing_mfn); | ||
95 | } | ||
96 | |||
97 | static void p2m_top_mfn_p_init(unsigned long **top) | ||
98 | { | ||
99 | unsigned i; | ||
100 | |||
101 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
102 | top[i] = p2m_mid_missing_mfn; | ||
103 | } | ||
104 | |||
105 | static void p2m_mid_init(unsigned long **mid) | ||
106 | { | ||
107 | unsigned i; | ||
108 | |||
109 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
110 | mid[i] = p2m_missing; | ||
111 | } | ||
112 | |||
113 | static void p2m_mid_mfn_init(unsigned long *mid) | ||
114 | { | ||
115 | unsigned i; | ||
116 | |||
117 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
118 | mid[i] = virt_to_mfn(p2m_missing); | ||
119 | } | ||
120 | |||
121 | static void p2m_init(unsigned long *p2m) | ||
122 | { | ||
123 | unsigned i; | ||
124 | |||
125 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
126 | p2m[i] = INVALID_P2M_ENTRY; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures | ||
131 | * | ||
132 | * This is called both at boot time, and after resuming from suspend: | ||
133 | * - At boot time we're called very early, and must use extend_brk() | ||
134 | * to allocate memory. | ||
135 | * | ||
136 | * - After resume we're called from within stop_machine, but the mfn | ||
137 | * tree should alreay be completely allocated. | ||
138 | */ | ||
139 | void xen_build_mfn_list_list(void) | ||
140 | { | ||
141 | unsigned long pfn; | ||
142 | |||
143 | /* Pre-initialize p2m_top_mfn to be completely missing */ | ||
144 | if (p2m_top_mfn == NULL) { | ||
145 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
146 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
147 | |||
148 | p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
149 | p2m_top_mfn_p_init(p2m_top_mfn_p); | ||
150 | |||
151 | p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
152 | p2m_top_mfn_init(p2m_top_mfn); | ||
153 | } else { | ||
154 | /* Reinitialise, mfn's all change after migration */ | ||
155 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
156 | } | ||
157 | |||
158 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { | ||
159 | unsigned topidx = p2m_top_index(pfn); | ||
160 | unsigned mididx = p2m_mid_index(pfn); | ||
161 | unsigned long **mid; | ||
162 | unsigned long *mid_mfn_p; | ||
163 | |||
164 | mid = p2m_top[topidx]; | ||
165 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
166 | |||
167 | /* Don't bother allocating any mfn mid levels if | ||
168 | * they're just missing, just update the stored mfn, | ||
169 | * since all could have changed over a migrate. | ||
170 | */ | ||
171 | if (mid == p2m_mid_missing) { | ||
172 | BUG_ON(mididx); | ||
173 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
174 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); | ||
175 | pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; | ||
176 | continue; | ||
177 | } | ||
178 | |||
179 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
180 | /* | ||
181 | * XXX boot-time only! We should never find | ||
182 | * missing parts of the mfn tree after | ||
183 | * runtime. extend_brk() will BUG if we call | ||
184 | * it too late. | ||
185 | */ | ||
186 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
187 | p2m_mid_mfn_init(mid_mfn_p); | ||
188 | |||
189 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
190 | } | ||
191 | |||
192 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
193 | mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); | ||
194 | } | ||
195 | } | ||
196 | |||
197 | void xen_setup_mfn_list_list(void) | ||
198 | { | ||
199 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | ||
200 | |||
201 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
202 | virt_to_mfn(p2m_top_mfn); | ||
203 | HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; | ||
204 | } | ||
205 | |||
206 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | ||
207 | void __init xen_build_dynamic_phys_to_machine(void) | ||
208 | { | ||
209 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
210 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
211 | unsigned long pfn; | ||
212 | |||
213 | xen_max_p2m_pfn = max_pfn; | ||
214 | |||
215 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
216 | p2m_init(p2m_missing); | ||
217 | |||
218 | p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
219 | p2m_mid_init(p2m_mid_missing); | ||
220 | |||
221 | p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
222 | p2m_top_init(p2m_top); | ||
223 | |||
224 | /* | ||
225 | * The domain builder gives us a pre-constructed p2m array in | ||
226 | * mfn_list for all the pages initially given to us, so we just | ||
227 | * need to graft that into our tree structure. | ||
228 | */ | ||
229 | for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { | ||
230 | unsigned topidx = p2m_top_index(pfn); | ||
231 | unsigned mididx = p2m_mid_index(pfn); | ||
232 | |||
233 | if (p2m_top[topidx] == p2m_mid_missing) { | ||
234 | unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
235 | p2m_mid_init(mid); | ||
236 | |||
237 | p2m_top[topidx] = mid; | ||
238 | } | ||
239 | |||
240 | p2m_top[topidx][mididx] = &mfn_list[pfn]; | ||
241 | } | ||
242 | |||
243 | m2p_override_init(); | ||
244 | } | ||
245 | |||
246 | unsigned long get_phys_to_machine(unsigned long pfn) | ||
247 | { | ||
248 | unsigned topidx, mididx, idx; | ||
249 | |||
250 | if (unlikely(pfn >= MAX_P2M_PFN)) | ||
251 | return INVALID_P2M_ENTRY; | ||
252 | |||
253 | topidx = p2m_top_index(pfn); | ||
254 | mididx = p2m_mid_index(pfn); | ||
255 | idx = p2m_index(pfn); | ||
256 | |||
257 | return p2m_top[topidx][mididx][idx]; | ||
258 | } | ||
259 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | ||
260 | |||
261 | static void *alloc_p2m_page(void) | ||
262 | { | ||
263 | return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); | ||
264 | } | ||
265 | |||
266 | static void free_p2m_page(void *p) | ||
267 | { | ||
268 | free_page((unsigned long)p); | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * Fully allocate the p2m structure for a given pfn. We need to check | ||
273 | * that both the top and mid levels are allocated, and make sure the | ||
274 | * parallel mfn tree is kept in sync. We may race with other cpus, so | ||
275 | * the new pages are installed with cmpxchg; if we lose the race then | ||
276 | * simply free the page we allocated and use the one that's there. | ||
277 | */ | ||
278 | static bool alloc_p2m(unsigned long pfn) | ||
279 | { | ||
280 | unsigned topidx, mididx; | ||
281 | unsigned long ***top_p, **mid; | ||
282 | unsigned long *top_mfn_p, *mid_mfn; | ||
283 | |||
284 | topidx = p2m_top_index(pfn); | ||
285 | mididx = p2m_mid_index(pfn); | ||
286 | |||
287 | top_p = &p2m_top[topidx]; | ||
288 | mid = *top_p; | ||
289 | |||
290 | if (mid == p2m_mid_missing) { | ||
291 | /* Mid level is missing, allocate a new one */ | ||
292 | mid = alloc_p2m_page(); | ||
293 | if (!mid) | ||
294 | return false; | ||
295 | |||
296 | p2m_mid_init(mid); | ||
297 | |||
298 | if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) | ||
299 | free_p2m_page(mid); | ||
300 | } | ||
301 | |||
302 | top_mfn_p = &p2m_top_mfn[topidx]; | ||
303 | mid_mfn = p2m_top_mfn_p[topidx]; | ||
304 | |||
305 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); | ||
306 | |||
307 | if (mid_mfn == p2m_mid_missing_mfn) { | ||
308 | /* Separately check the mid mfn level */ | ||
309 | unsigned long missing_mfn; | ||
310 | unsigned long mid_mfn_mfn; | ||
311 | |||
312 | mid_mfn = alloc_p2m_page(); | ||
313 | if (!mid_mfn) | ||
314 | return false; | ||
315 | |||
316 | p2m_mid_mfn_init(mid_mfn); | ||
317 | |||
318 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | ||
319 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | ||
320 | if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) | ||
321 | free_p2m_page(mid_mfn); | ||
322 | else | ||
323 | p2m_top_mfn_p[topidx] = mid_mfn; | ||
324 | } | ||
325 | |||
326 | if (p2m_top[topidx][mididx] == p2m_missing) { | ||
327 | /* p2m leaf page is missing */ | ||
328 | unsigned long *p2m; | ||
329 | |||
330 | p2m = alloc_p2m_page(); | ||
331 | if (!p2m) | ||
332 | return false; | ||
333 | |||
334 | p2m_init(p2m); | ||
335 | |||
336 | if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) | ||
337 | free_p2m_page(p2m); | ||
338 | else | ||
339 | mid_mfn[mididx] = virt_to_mfn(p2m); | ||
340 | } | ||
341 | |||
342 | return true; | ||
343 | } | ||
344 | |||
345 | /* Try to install p2m mapping; fail if intermediate bits missing */ | ||
346 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
347 | { | ||
348 | unsigned topidx, mididx, idx; | ||
349 | |||
350 | if (unlikely(pfn >= MAX_P2M_PFN)) { | ||
351 | BUG_ON(mfn != INVALID_P2M_ENTRY); | ||
352 | return true; | ||
353 | } | ||
354 | |||
355 | topidx = p2m_top_index(pfn); | ||
356 | mididx = p2m_mid_index(pfn); | ||
357 | idx = p2m_index(pfn); | ||
358 | |||
359 | if (p2m_top[topidx][mididx] == p2m_missing) | ||
360 | return mfn == INVALID_P2M_ENTRY; | ||
361 | |||
362 | p2m_top[topidx][mididx][idx] = mfn; | ||
363 | |||
364 | return true; | ||
365 | } | ||
366 | |||
367 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
368 | { | ||
369 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
370 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
371 | return true; | ||
372 | } | ||
373 | |||
374 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | ||
375 | if (!alloc_p2m(pfn)) | ||
376 | return false; | ||
377 | |||
378 | if (!__set_phys_to_machine(pfn, mfn)) | ||
379 | return false; | ||
380 | } | ||
381 | |||
382 | return true; | ||
383 | } | ||
384 | |||
385 | #define M2P_OVERRIDE_HASH_SHIFT 10 | ||
386 | #define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT) | ||
387 | |||
388 | static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH); | ||
389 | static DEFINE_SPINLOCK(m2p_override_lock); | ||
390 | |||
391 | static void __init m2p_override_init(void) | ||
392 | { | ||
393 | unsigned i; | ||
394 | |||
395 | m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, | ||
396 | sizeof(unsigned long)); | ||
397 | |||
398 | for (i = 0; i < M2P_OVERRIDE_HASH; i++) | ||
399 | INIT_LIST_HEAD(&m2p_overrides[i]); | ||
400 | } | ||
401 | |||
402 | static unsigned long mfn_hash(unsigned long mfn) | ||
403 | { | ||
404 | return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); | ||
405 | } | ||
406 | |||
407 | /* Add an MFN override for a particular page */ | ||
408 | int m2p_add_override(unsigned long mfn, struct page *page) | ||
409 | { | ||
410 | unsigned long flags; | ||
411 | unsigned long pfn; | ||
412 | unsigned long address; | ||
413 | unsigned level; | ||
414 | pte_t *ptep = NULL; | ||
415 | |||
416 | pfn = page_to_pfn(page); | ||
417 | if (!PageHighMem(page)) { | ||
418 | address = (unsigned long)__va(pfn << PAGE_SHIFT); | ||
419 | ptep = lookup_address(address, &level); | ||
420 | |||
421 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, | ||
422 | "m2p_add_override: pfn %lx not mapped", pfn)) | ||
423 | return -EINVAL; | ||
424 | } | ||
425 | |||
426 | page->private = mfn; | ||
427 | page->index = pfn_to_mfn(pfn); | ||
428 | |||
429 | __set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); | ||
430 | if (!PageHighMem(page)) | ||
431 | /* Just zap old mapping for now */ | ||
432 | pte_clear(&init_mm, address, ptep); | ||
433 | |||
434 | spin_lock_irqsave(&m2p_override_lock, flags); | ||
435 | list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); | ||
436 | spin_unlock_irqrestore(&m2p_override_lock, flags); | ||
437 | |||
438 | return 0; | ||
439 | } | ||
440 | |||
441 | int m2p_remove_override(struct page *page) | ||
442 | { | ||
443 | unsigned long flags; | ||
444 | unsigned long mfn; | ||
445 | unsigned long pfn; | ||
446 | unsigned long address; | ||
447 | unsigned level; | ||
448 | pte_t *ptep = NULL; | ||
449 | |||
450 | pfn = page_to_pfn(page); | ||
451 | mfn = get_phys_to_machine(pfn); | ||
452 | if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) | ||
453 | return -EINVAL; | ||
454 | |||
455 | if (!PageHighMem(page)) { | ||
456 | address = (unsigned long)__va(pfn << PAGE_SHIFT); | ||
457 | ptep = lookup_address(address, &level); | ||
458 | |||
459 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, | ||
460 | "m2p_remove_override: pfn %lx not mapped", pfn)) | ||
461 | return -EINVAL; | ||
462 | } | ||
463 | |||
464 | spin_lock_irqsave(&m2p_override_lock, flags); | ||
465 | list_del(&page->lru); | ||
466 | spin_unlock_irqrestore(&m2p_override_lock, flags); | ||
467 | __set_phys_to_machine(pfn, page->index); | ||
468 | |||
469 | if (!PageHighMem(page)) | ||
470 | set_pte_at(&init_mm, address, ptep, | ||
471 | pfn_pte(pfn, PAGE_KERNEL)); | ||
472 | /* No tlb flush necessary because the caller already | ||
473 | * left the pte unmapped. */ | ||
474 | |||
475 | return 0; | ||
476 | } | ||
477 | |||
478 | struct page *m2p_find_override(unsigned long mfn) | ||
479 | { | ||
480 | unsigned long flags; | ||
481 | struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; | ||
482 | struct page *p, *ret; | ||
483 | |||
484 | ret = NULL; | ||
485 | |||
486 | spin_lock_irqsave(&m2p_override_lock, flags); | ||
487 | |||
488 | list_for_each_entry(p, bucket, lru) { | ||
489 | if (p->private == mfn) { | ||
490 | ret = p; | ||
491 | break; | ||
492 | } | ||
493 | } | ||
494 | |||
495 | spin_unlock_irqrestore(&m2p_override_lock, flags); | ||
496 | |||
497 | return ret; | ||
498 | } | ||
499 | |||
500 | unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) | ||
501 | { | ||
502 | struct page *p = m2p_find_override(mfn); | ||
503 | unsigned long ret = pfn; | ||
504 | |||
505 | if (p) | ||
506 | ret = page_to_pfn(p); | ||
507 | |||
508 | return ret; | ||
509 | } | ||
510 | EXPORT_SYMBOL_GPL(m2p_find_override_pfn); | ||
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 464d8935ad4e..07bec09d1dad 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig | |||
@@ -71,7 +71,14 @@ config XEN_SYS_HYPERVISOR | |||
71 | but will have no xen contents. | 71 | but will have no xen contents. |
72 | 72 | ||
73 | config XEN_XENBUS_FRONTEND | 73 | config XEN_XENBUS_FRONTEND |
74 | tristate | 74 | tristate |
75 | |||
76 | config XEN_GNTDEV | ||
77 | tristate "userspace grant access device driver" | ||
78 | depends on XEN | ||
79 | select MMU_NOTIFIER | ||
80 | help | ||
81 | Allows userspace processes to use grants. | ||
75 | 82 | ||
76 | config XEN_PLATFORM_PCI | 83 | config XEN_PLATFORM_PCI |
77 | tristate "xen platform pci device driver" | 84 | tristate "xen platform pci device driver" |
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index f81819b0f916..5088cc2e6fe2 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile | |||
@@ -9,6 +9,7 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o | |||
9 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o | 9 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o |
10 | obj-$(CONFIG_XEN_BALLOON) += balloon.o | 10 | obj-$(CONFIG_XEN_BALLOON) += balloon.o |
11 | obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o | 11 | obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o |
12 | obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o | ||
12 | obj-$(CONFIG_XENFS) += xenfs/ | 13 | obj-$(CONFIG_XENFS) += xenfs/ |
13 | obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o | 14 | obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o |
14 | obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o | 15 | obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o |
@@ -16,5 +17,6 @@ obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o | |||
16 | obj-$(CONFIG_XEN_DOM0) += pci.o | 17 | obj-$(CONFIG_XEN_DOM0) += pci.o |
17 | 18 | ||
18 | xen-evtchn-y := evtchn.o | 19 | xen-evtchn-y := evtchn.o |
20 | xen-gntdev-y := gntdev.o | ||
19 | 21 | ||
20 | xen-platform-pci-y := platform-pci.o | 22 | xen-platform-pci-y := platform-pci.o |
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c new file mode 100644 index 000000000000..1e31cdcdae1e --- /dev/null +++ b/drivers/xen/gntdev.c | |||
@@ -0,0 +1,665 @@ | |||
1 | /****************************************************************************** | ||
2 | * gntdev.c | ||
3 | * | ||
4 | * Device for accessing (in user-space) pages that have been granted by other | ||
5 | * domains. | ||
6 | * | ||
7 | * Copyright (c) 2006-2007, D G Murray. | ||
8 | * (c) 2009 Gerd Hoffmann <kraxel@redhat.com> | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | |||
20 | #undef DEBUG | ||
21 | |||
22 | #include <linux/module.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/init.h> | ||
25 | #include <linux/miscdevice.h> | ||
26 | #include <linux/fs.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/mman.h> | ||
29 | #include <linux/mmu_notifier.h> | ||
30 | #include <linux/types.h> | ||
31 | #include <linux/uaccess.h> | ||
32 | #include <linux/sched.h> | ||
33 | #include <linux/spinlock.h> | ||
34 | #include <linux/slab.h> | ||
35 | |||
36 | #include <xen/xen.h> | ||
37 | #include <xen/grant_table.h> | ||
38 | #include <xen/gntdev.h> | ||
39 | #include <asm/xen/hypervisor.h> | ||
40 | #include <asm/xen/hypercall.h> | ||
41 | #include <asm/xen/page.h> | ||
42 | |||
43 | MODULE_LICENSE("GPL"); | ||
44 | MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, " | ||
45 | "Gerd Hoffmann <kraxel@redhat.com>"); | ||
46 | MODULE_DESCRIPTION("User-space granted page access driver"); | ||
47 | |||
48 | static int limit = 1024; | ||
49 | module_param(limit, int, 0644); | ||
50 | MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped at " | ||
51 | "once by a gntdev instance"); | ||
52 | |||
53 | struct gntdev_priv { | ||
54 | struct list_head maps; | ||
55 | uint32_t used; | ||
56 | uint32_t limit; | ||
57 | /* lock protects maps from concurrent changes */ | ||
58 | spinlock_t lock; | ||
59 | struct mm_struct *mm; | ||
60 | struct mmu_notifier mn; | ||
61 | }; | ||
62 | |||
63 | struct grant_map { | ||
64 | struct list_head next; | ||
65 | struct gntdev_priv *priv; | ||
66 | struct vm_area_struct *vma; | ||
67 | int index; | ||
68 | int count; | ||
69 | int flags; | ||
70 | int is_mapped; | ||
71 | struct ioctl_gntdev_grant_ref *grants; | ||
72 | struct gnttab_map_grant_ref *map_ops; | ||
73 | struct gnttab_unmap_grant_ref *unmap_ops; | ||
74 | struct page **pages; | ||
75 | }; | ||
76 | |||
77 | /* ------------------------------------------------------------------ */ | ||
78 | |||
79 | static void gntdev_print_maps(struct gntdev_priv *priv, | ||
80 | char *text, int text_index) | ||
81 | { | ||
82 | #ifdef DEBUG | ||
83 | struct grant_map *map; | ||
84 | |||
85 | pr_debug("maps list (priv %p, usage %d/%d)\n", | ||
86 | priv, priv->used, priv->limit); | ||
87 | |||
88 | list_for_each_entry(map, &priv->maps, next) | ||
89 | pr_debug(" index %2d, count %2d %s\n", | ||
90 | map->index, map->count, | ||
91 | map->index == text_index && text ? text : ""); | ||
92 | #endif | ||
93 | } | ||
94 | |||
95 | static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) | ||
96 | { | ||
97 | struct grant_map *add; | ||
98 | int i; | ||
99 | |||
100 | add = kzalloc(sizeof(struct grant_map), GFP_KERNEL); | ||
101 | if (NULL == add) | ||
102 | return NULL; | ||
103 | |||
104 | add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL); | ||
105 | add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL); | ||
106 | add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL); | ||
107 | add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL); | ||
108 | if (NULL == add->grants || | ||
109 | NULL == add->map_ops || | ||
110 | NULL == add->unmap_ops || | ||
111 | NULL == add->pages) | ||
112 | goto err; | ||
113 | |||
114 | for (i = 0; i < count; i++) { | ||
115 | add->pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); | ||
116 | if (add->pages[i] == NULL) | ||
117 | goto err; | ||
118 | } | ||
119 | |||
120 | add->index = 0; | ||
121 | add->count = count; | ||
122 | add->priv = priv; | ||
123 | |||
124 | if (add->count + priv->used > priv->limit) | ||
125 | goto err; | ||
126 | |||
127 | return add; | ||
128 | |||
129 | err: | ||
130 | if (add->pages) | ||
131 | for (i = 0; i < count; i++) { | ||
132 | if (add->pages[i]) | ||
133 | __free_page(add->pages[i]); | ||
134 | } | ||
135 | kfree(add->pages); | ||
136 | kfree(add->grants); | ||
137 | kfree(add->map_ops); | ||
138 | kfree(add->unmap_ops); | ||
139 | kfree(add); | ||
140 | return NULL; | ||
141 | } | ||
142 | |||
143 | static void gntdev_add_map(struct gntdev_priv *priv, struct grant_map *add) | ||
144 | { | ||
145 | struct grant_map *map; | ||
146 | |||
147 | list_for_each_entry(map, &priv->maps, next) { | ||
148 | if (add->index + add->count < map->index) { | ||
149 | list_add_tail(&add->next, &map->next); | ||
150 | goto done; | ||
151 | } | ||
152 | add->index = map->index + map->count; | ||
153 | } | ||
154 | list_add_tail(&add->next, &priv->maps); | ||
155 | |||
156 | done: | ||
157 | priv->used += add->count; | ||
158 | gntdev_print_maps(priv, "[new]", add->index); | ||
159 | } | ||
160 | |||
161 | static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv, | ||
162 | int index, int count) | ||
163 | { | ||
164 | struct grant_map *map; | ||
165 | |||
166 | list_for_each_entry(map, &priv->maps, next) { | ||
167 | if (map->index != index) | ||
168 | continue; | ||
169 | if (map->count != count) | ||
170 | continue; | ||
171 | return map; | ||
172 | } | ||
173 | return NULL; | ||
174 | } | ||
175 | |||
176 | static struct grant_map *gntdev_find_map_vaddr(struct gntdev_priv *priv, | ||
177 | unsigned long vaddr) | ||
178 | { | ||
179 | struct grant_map *map; | ||
180 | |||
181 | list_for_each_entry(map, &priv->maps, next) { | ||
182 | if (!map->vma) | ||
183 | continue; | ||
184 | if (vaddr < map->vma->vm_start) | ||
185 | continue; | ||
186 | if (vaddr >= map->vma->vm_end) | ||
187 | continue; | ||
188 | return map; | ||
189 | } | ||
190 | return NULL; | ||
191 | } | ||
192 | |||
193 | static int gntdev_del_map(struct grant_map *map) | ||
194 | { | ||
195 | int i; | ||
196 | |||
197 | if (map->vma) | ||
198 | return -EBUSY; | ||
199 | for (i = 0; i < map->count; i++) | ||
200 | if (map->unmap_ops[i].handle) | ||
201 | return -EBUSY; | ||
202 | |||
203 | map->priv->used -= map->count; | ||
204 | list_del(&map->next); | ||
205 | return 0; | ||
206 | } | ||
207 | |||
208 | static void gntdev_free_map(struct grant_map *map) | ||
209 | { | ||
210 | int i; | ||
211 | |||
212 | if (!map) | ||
213 | return; | ||
214 | |||
215 | if (map->pages) | ||
216 | for (i = 0; i < map->count; i++) { | ||
217 | if (map->pages[i]) | ||
218 | __free_page(map->pages[i]); | ||
219 | } | ||
220 | kfree(map->pages); | ||
221 | kfree(map->grants); | ||
222 | kfree(map->map_ops); | ||
223 | kfree(map->unmap_ops); | ||
224 | kfree(map); | ||
225 | } | ||
226 | |||
227 | /* ------------------------------------------------------------------ */ | ||
228 | |||
229 | static int find_grant_ptes(pte_t *pte, pgtable_t token, | ||
230 | unsigned long addr, void *data) | ||
231 | { | ||
232 | struct grant_map *map = data; | ||
233 | unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; | ||
234 | u64 pte_maddr; | ||
235 | |||
236 | BUG_ON(pgnr >= map->count); | ||
237 | pte_maddr = arbitrary_virt_to_machine(pte).maddr; | ||
238 | |||
239 | gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, | ||
240 | GNTMAP_contains_pte | map->flags, | ||
241 | map->grants[pgnr].ref, | ||
242 | map->grants[pgnr].domid); | ||
243 | gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, | ||
244 | GNTMAP_contains_pte | map->flags, | ||
245 | 0 /* handle */); | ||
246 | return 0; | ||
247 | } | ||
248 | |||
249 | static int map_grant_pages(struct grant_map *map) | ||
250 | { | ||
251 | int i, err = 0; | ||
252 | |||
253 | pr_debug("map %d+%d\n", map->index, map->count); | ||
254 | err = gnttab_map_refs(map->map_ops, map->pages, map->count); | ||
255 | if (err) | ||
256 | return err; | ||
257 | |||
258 | for (i = 0; i < map->count; i++) { | ||
259 | if (map->map_ops[i].status) | ||
260 | err = -EINVAL; | ||
261 | map->unmap_ops[i].handle = map->map_ops[i].handle; | ||
262 | } | ||
263 | return err; | ||
264 | } | ||
265 | |||
266 | static int unmap_grant_pages(struct grant_map *map, int offset, int pages) | ||
267 | { | ||
268 | int i, err = 0; | ||
269 | |||
270 | pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages); | ||
271 | err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages); | ||
272 | if (err) | ||
273 | return err; | ||
274 | |||
275 | for (i = 0; i < pages; i++) { | ||
276 | if (map->unmap_ops[offset+i].status) | ||
277 | err = -EINVAL; | ||
278 | map->unmap_ops[offset+i].handle = 0; | ||
279 | } | ||
280 | return err; | ||
281 | } | ||
282 | |||
283 | /* ------------------------------------------------------------------ */ | ||
284 | |||
285 | static void gntdev_vma_close(struct vm_area_struct *vma) | ||
286 | { | ||
287 | struct grant_map *map = vma->vm_private_data; | ||
288 | |||
289 | pr_debug("close %p\n", vma); | ||
290 | map->is_mapped = 0; | ||
291 | map->vma = NULL; | ||
292 | vma->vm_private_data = NULL; | ||
293 | } | ||
294 | |||
295 | static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
296 | { | ||
297 | pr_debug("vaddr %p, pgoff %ld (shouldn't happen)\n", | ||
298 | vmf->virtual_address, vmf->pgoff); | ||
299 | vmf->flags = VM_FAULT_ERROR; | ||
300 | return 0; | ||
301 | } | ||
302 | |||
303 | static struct vm_operations_struct gntdev_vmops = { | ||
304 | .close = gntdev_vma_close, | ||
305 | .fault = gntdev_vma_fault, | ||
306 | }; | ||
307 | |||
308 | /* ------------------------------------------------------------------ */ | ||
309 | |||
310 | static void mn_invl_range_start(struct mmu_notifier *mn, | ||
311 | struct mm_struct *mm, | ||
312 | unsigned long start, unsigned long end) | ||
313 | { | ||
314 | struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); | ||
315 | struct grant_map *map; | ||
316 | unsigned long mstart, mend; | ||
317 | int err; | ||
318 | |||
319 | spin_lock(&priv->lock); | ||
320 | list_for_each_entry(map, &priv->maps, next) { | ||
321 | if (!map->vma) | ||
322 | continue; | ||
323 | if (!map->is_mapped) | ||
324 | continue; | ||
325 | if (map->vma->vm_start >= end) | ||
326 | continue; | ||
327 | if (map->vma->vm_end <= start) | ||
328 | continue; | ||
329 | mstart = max(start, map->vma->vm_start); | ||
330 | mend = min(end, map->vma->vm_end); | ||
331 | pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n", | ||
332 | map->index, map->count, | ||
333 | map->vma->vm_start, map->vma->vm_end, | ||
334 | start, end, mstart, mend); | ||
335 | err = unmap_grant_pages(map, | ||
336 | (mstart - map->vma->vm_start) >> PAGE_SHIFT, | ||
337 | (mend - mstart) >> PAGE_SHIFT); | ||
338 | WARN_ON(err); | ||
339 | } | ||
340 | spin_unlock(&priv->lock); | ||
341 | } | ||
342 | |||
343 | static void mn_invl_page(struct mmu_notifier *mn, | ||
344 | struct mm_struct *mm, | ||
345 | unsigned long address) | ||
346 | { | ||
347 | mn_invl_range_start(mn, mm, address, address + PAGE_SIZE); | ||
348 | } | ||
349 | |||
350 | static void mn_release(struct mmu_notifier *mn, | ||
351 | struct mm_struct *mm) | ||
352 | { | ||
353 | struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); | ||
354 | struct grant_map *map; | ||
355 | int err; | ||
356 | |||
357 | spin_lock(&priv->lock); | ||
358 | list_for_each_entry(map, &priv->maps, next) { | ||
359 | if (!map->vma) | ||
360 | continue; | ||
361 | pr_debug("map %d+%d (%lx %lx)\n", | ||
362 | map->index, map->count, | ||
363 | map->vma->vm_start, map->vma->vm_end); | ||
364 | err = unmap_grant_pages(map, /* offset */ 0, map->count); | ||
365 | WARN_ON(err); | ||
366 | } | ||
367 | spin_unlock(&priv->lock); | ||
368 | } | ||
369 | |||
370 | struct mmu_notifier_ops gntdev_mmu_ops = { | ||
371 | .release = mn_release, | ||
372 | .invalidate_page = mn_invl_page, | ||
373 | .invalidate_range_start = mn_invl_range_start, | ||
374 | }; | ||
375 | |||
376 | /* ------------------------------------------------------------------ */ | ||
377 | |||
378 | static int gntdev_open(struct inode *inode, struct file *flip) | ||
379 | { | ||
380 | struct gntdev_priv *priv; | ||
381 | int ret = 0; | ||
382 | |||
383 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
384 | if (!priv) | ||
385 | return -ENOMEM; | ||
386 | |||
387 | INIT_LIST_HEAD(&priv->maps); | ||
388 | spin_lock_init(&priv->lock); | ||
389 | priv->limit = limit; | ||
390 | |||
391 | priv->mm = get_task_mm(current); | ||
392 | if (!priv->mm) { | ||
393 | kfree(priv); | ||
394 | return -ENOMEM; | ||
395 | } | ||
396 | priv->mn.ops = &gntdev_mmu_ops; | ||
397 | ret = mmu_notifier_register(&priv->mn, priv->mm); | ||
398 | mmput(priv->mm); | ||
399 | |||
400 | if (ret) { | ||
401 | kfree(priv); | ||
402 | return ret; | ||
403 | } | ||
404 | |||
405 | flip->private_data = priv; | ||
406 | pr_debug("priv %p\n", priv); | ||
407 | |||
408 | return 0; | ||
409 | } | ||
410 | |||
411 | static int gntdev_release(struct inode *inode, struct file *flip) | ||
412 | { | ||
413 | struct gntdev_priv *priv = flip->private_data; | ||
414 | struct grant_map *map; | ||
415 | int err; | ||
416 | |||
417 | pr_debug("priv %p\n", priv); | ||
418 | |||
419 | spin_lock(&priv->lock); | ||
420 | while (!list_empty(&priv->maps)) { | ||
421 | map = list_entry(priv->maps.next, struct grant_map, next); | ||
422 | err = gntdev_del_map(map); | ||
423 | if (WARN_ON(err)) | ||
424 | gntdev_free_map(map); | ||
425 | |||
426 | } | ||
427 | spin_unlock(&priv->lock); | ||
428 | |||
429 | mmu_notifier_unregister(&priv->mn, priv->mm); | ||
430 | kfree(priv); | ||
431 | return 0; | ||
432 | } | ||
433 | |||
434 | static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, | ||
435 | struct ioctl_gntdev_map_grant_ref __user *u) | ||
436 | { | ||
437 | struct ioctl_gntdev_map_grant_ref op; | ||
438 | struct grant_map *map; | ||
439 | int err; | ||
440 | |||
441 | if (copy_from_user(&op, u, sizeof(op)) != 0) | ||
442 | return -EFAULT; | ||
443 | pr_debug("priv %p, add %d\n", priv, op.count); | ||
444 | if (unlikely(op.count <= 0)) | ||
445 | return -EINVAL; | ||
446 | if (unlikely(op.count > priv->limit)) | ||
447 | return -EINVAL; | ||
448 | |||
449 | err = -ENOMEM; | ||
450 | map = gntdev_alloc_map(priv, op.count); | ||
451 | if (!map) | ||
452 | return err; | ||
453 | if (copy_from_user(map->grants, &u->refs, | ||
454 | sizeof(map->grants[0]) * op.count) != 0) { | ||
455 | gntdev_free_map(map); | ||
456 | return err; | ||
457 | } | ||
458 | |||
459 | spin_lock(&priv->lock); | ||
460 | gntdev_add_map(priv, map); | ||
461 | op.index = map->index << PAGE_SHIFT; | ||
462 | spin_unlock(&priv->lock); | ||
463 | |||
464 | if (copy_to_user(u, &op, sizeof(op)) != 0) { | ||
465 | spin_lock(&priv->lock); | ||
466 | gntdev_del_map(map); | ||
467 | spin_unlock(&priv->lock); | ||
468 | gntdev_free_map(map); | ||
469 | return err; | ||
470 | } | ||
471 | return 0; | ||
472 | } | ||
473 | |||
474 | static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, | ||
475 | struct ioctl_gntdev_unmap_grant_ref __user *u) | ||
476 | { | ||
477 | struct ioctl_gntdev_unmap_grant_ref op; | ||
478 | struct grant_map *map; | ||
479 | int err = -ENOENT; | ||
480 | |||
481 | if (copy_from_user(&op, u, sizeof(op)) != 0) | ||
482 | return -EFAULT; | ||
483 | pr_debug("priv %p, del %d+%d\n", priv, (int)op.index, (int)op.count); | ||
484 | |||
485 | spin_lock(&priv->lock); | ||
486 | map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); | ||
487 | if (map) | ||
488 | err = gntdev_del_map(map); | ||
489 | spin_unlock(&priv->lock); | ||
490 | if (!err) | ||
491 | gntdev_free_map(map); | ||
492 | return err; | ||
493 | } | ||
494 | |||
495 | static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv, | ||
496 | struct ioctl_gntdev_get_offset_for_vaddr __user *u) | ||
497 | { | ||
498 | struct ioctl_gntdev_get_offset_for_vaddr op; | ||
499 | struct grant_map *map; | ||
500 | |||
501 | if (copy_from_user(&op, u, sizeof(op)) != 0) | ||
502 | return -EFAULT; | ||
503 | pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr); | ||
504 | |||
505 | spin_lock(&priv->lock); | ||
506 | map = gntdev_find_map_vaddr(priv, op.vaddr); | ||
507 | if (map == NULL || | ||
508 | map->vma->vm_start != op.vaddr) { | ||
509 | spin_unlock(&priv->lock); | ||
510 | return -EINVAL; | ||
511 | } | ||
512 | op.offset = map->index << PAGE_SHIFT; | ||
513 | op.count = map->count; | ||
514 | spin_unlock(&priv->lock); | ||
515 | |||
516 | if (copy_to_user(u, &op, sizeof(op)) != 0) | ||
517 | return -EFAULT; | ||
518 | return 0; | ||
519 | } | ||
520 | |||
521 | static long gntdev_ioctl_set_max_grants(struct gntdev_priv *priv, | ||
522 | struct ioctl_gntdev_set_max_grants __user *u) | ||
523 | { | ||
524 | struct ioctl_gntdev_set_max_grants op; | ||
525 | |||
526 | if (copy_from_user(&op, u, sizeof(op)) != 0) | ||
527 | return -EFAULT; | ||
528 | pr_debug("priv %p, limit %d\n", priv, op.count); | ||
529 | if (op.count > limit) | ||
530 | return -E2BIG; | ||
531 | |||
532 | spin_lock(&priv->lock); | ||
533 | priv->limit = op.count; | ||
534 | spin_unlock(&priv->lock); | ||
535 | return 0; | ||
536 | } | ||
537 | |||
538 | static long gntdev_ioctl(struct file *flip, | ||
539 | unsigned int cmd, unsigned long arg) | ||
540 | { | ||
541 | struct gntdev_priv *priv = flip->private_data; | ||
542 | void __user *ptr = (void __user *)arg; | ||
543 | |||
544 | switch (cmd) { | ||
545 | case IOCTL_GNTDEV_MAP_GRANT_REF: | ||
546 | return gntdev_ioctl_map_grant_ref(priv, ptr); | ||
547 | |||
548 | case IOCTL_GNTDEV_UNMAP_GRANT_REF: | ||
549 | return gntdev_ioctl_unmap_grant_ref(priv, ptr); | ||
550 | |||
551 | case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: | ||
552 | return gntdev_ioctl_get_offset_for_vaddr(priv, ptr); | ||
553 | |||
554 | case IOCTL_GNTDEV_SET_MAX_GRANTS: | ||
555 | return gntdev_ioctl_set_max_grants(priv, ptr); | ||
556 | |||
557 | default: | ||
558 | pr_debug("priv %p, unknown cmd %x\n", priv, cmd); | ||
559 | return -ENOIOCTLCMD; | ||
560 | } | ||
561 | |||
562 | return 0; | ||
563 | } | ||
564 | |||
565 | static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) | ||
566 | { | ||
567 | struct gntdev_priv *priv = flip->private_data; | ||
568 | int index = vma->vm_pgoff; | ||
569 | int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | ||
570 | struct grant_map *map; | ||
571 | int err = -EINVAL; | ||
572 | |||
573 | if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) | ||
574 | return -EINVAL; | ||
575 | |||
576 | pr_debug("map %d+%d at %lx (pgoff %lx)\n", | ||
577 | index, count, vma->vm_start, vma->vm_pgoff); | ||
578 | |||
579 | spin_lock(&priv->lock); | ||
580 | map = gntdev_find_map_index(priv, index, count); | ||
581 | if (!map) | ||
582 | goto unlock_out; | ||
583 | if (map->vma) | ||
584 | goto unlock_out; | ||
585 | if (priv->mm != vma->vm_mm) { | ||
586 | printk(KERN_WARNING "Huh? Other mm?\n"); | ||
587 | goto unlock_out; | ||
588 | } | ||
589 | |||
590 | vma->vm_ops = &gntdev_vmops; | ||
591 | |||
592 | vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP; | ||
593 | |||
594 | vma->vm_private_data = map; | ||
595 | map->vma = vma; | ||
596 | |||
597 | map->flags = GNTMAP_host_map | GNTMAP_application_map; | ||
598 | if (!(vma->vm_flags & VM_WRITE)) | ||
599 | map->flags |= GNTMAP_readonly; | ||
600 | |||
601 | spin_unlock(&priv->lock); | ||
602 | |||
603 | err = apply_to_page_range(vma->vm_mm, vma->vm_start, | ||
604 | vma->vm_end - vma->vm_start, | ||
605 | find_grant_ptes, map); | ||
606 | if (err) { | ||
607 | printk(KERN_WARNING "find_grant_ptes() failure.\n"); | ||
608 | return err; | ||
609 | } | ||
610 | |||
611 | err = map_grant_pages(map); | ||
612 | if (err) { | ||
613 | printk(KERN_WARNING "map_grant_pages() failure.\n"); | ||
614 | return err; | ||
615 | } | ||
616 | |||
617 | map->is_mapped = 1; | ||
618 | |||
619 | return 0; | ||
620 | |||
621 | unlock_out: | ||
622 | spin_unlock(&priv->lock); | ||
623 | return err; | ||
624 | } | ||
625 | |||
626 | static const struct file_operations gntdev_fops = { | ||
627 | .owner = THIS_MODULE, | ||
628 | .open = gntdev_open, | ||
629 | .release = gntdev_release, | ||
630 | .mmap = gntdev_mmap, | ||
631 | .unlocked_ioctl = gntdev_ioctl | ||
632 | }; | ||
633 | |||
634 | static struct miscdevice gntdev_miscdev = { | ||
635 | .minor = MISC_DYNAMIC_MINOR, | ||
636 | .name = "xen/gntdev", | ||
637 | .fops = &gntdev_fops, | ||
638 | }; | ||
639 | |||
640 | /* ------------------------------------------------------------------ */ | ||
641 | |||
642 | static int __init gntdev_init(void) | ||
643 | { | ||
644 | int err; | ||
645 | |||
646 | if (!xen_domain()) | ||
647 | return -ENODEV; | ||
648 | |||
649 | err = misc_register(&gntdev_miscdev); | ||
650 | if (err != 0) { | ||
651 | printk(KERN_ERR "Could not register gntdev device\n"); | ||
652 | return err; | ||
653 | } | ||
654 | return 0; | ||
655 | } | ||
656 | |||
657 | static void __exit gntdev_exit(void) | ||
658 | { | ||
659 | misc_deregister(&gntdev_miscdev); | ||
660 | } | ||
661 | |||
662 | module_init(gntdev_init); | ||
663 | module_exit(gntdev_exit); | ||
664 | |||
665 | /* ------------------------------------------------------------------ */ | ||
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 6c4531816496..9ef54ebc1194 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c | |||
@@ -447,6 +447,52 @@ unsigned int gnttab_max_grant_frames(void) | |||
447 | } | 447 | } |
448 | EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); | 448 | EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); |
449 | 449 | ||
450 | int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, | ||
451 | struct page **pages, unsigned int count) | ||
452 | { | ||
453 | int i, ret; | ||
454 | pte_t *pte; | ||
455 | unsigned long mfn; | ||
456 | |||
457 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count); | ||
458 | if (ret) | ||
459 | return ret; | ||
460 | |||
461 | for (i = 0; i < count; i++) { | ||
462 | /* m2p override only supported for GNTMAP_contains_pte mappings */ | ||
463 | if (!(map_ops[i].flags & GNTMAP_contains_pte)) | ||
464 | continue; | ||
465 | pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + | ||
466 | (map_ops[i].host_addr & ~PAGE_MASK)); | ||
467 | mfn = pte_mfn(*pte); | ||
468 | ret = m2p_add_override(mfn, pages[i]); | ||
469 | if (ret) | ||
470 | return ret; | ||
471 | } | ||
472 | |||
473 | return ret; | ||
474 | } | ||
475 | EXPORT_SYMBOL_GPL(gnttab_map_refs); | ||
476 | |||
477 | int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, | ||
478 | struct page **pages, unsigned int count) | ||
479 | { | ||
480 | int i, ret; | ||
481 | |||
482 | ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count); | ||
483 | if (ret) | ||
484 | return ret; | ||
485 | |||
486 | for (i = 0; i < count; i++) { | ||
487 | ret = m2p_remove_override(pages[i]); | ||
488 | if (ret) | ||
489 | return ret; | ||
490 | } | ||
491 | |||
492 | return ret; | ||
493 | } | ||
494 | EXPORT_SYMBOL_GPL(gnttab_unmap_refs); | ||
495 | |||
450 | static int gnttab_map(unsigned int start_idx, unsigned int end_idx) | 496 | static int gnttab_map(unsigned int start_idx, unsigned int end_idx) |
451 | { | 497 | { |
452 | struct gnttab_setup_table setup; | 498 | struct gnttab_setup_table setup; |
diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h new file mode 100644 index 000000000000..eb23f4188f5a --- /dev/null +++ b/include/xen/gntdev.h | |||
@@ -0,0 +1,119 @@ | |||
1 | /****************************************************************************** | ||
2 | * gntdev.h | ||
3 | * | ||
4 | * Interface to /dev/xen/gntdev. | ||
5 | * | ||
6 | * Copyright (c) 2007, D G Murray | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License version 2 | ||
10 | * as published by the Free Software Foundation; or, when distributed | ||
11 | * separately from the Linux kernel or incorporated into other | ||
12 | * software packages, subject to the following license: | ||
13 | * | ||
14 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
15 | * of this source file (the "Software"), to deal in the Software without | ||
16 | * restriction, including without limitation the rights to use, copy, modify, | ||
17 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
18 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
19 | * the following conditions: | ||
20 | * | ||
21 | * The above copyright notice and this permission notice shall be included in | ||
22 | * all copies or substantial portions of the Software. | ||
23 | * | ||
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
26 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
27 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
28 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
29 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
30 | * IN THE SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #ifndef __LINUX_PUBLIC_GNTDEV_H__ | ||
34 | #define __LINUX_PUBLIC_GNTDEV_H__ | ||
35 | |||
36 | struct ioctl_gntdev_grant_ref { | ||
37 | /* The domain ID of the grant to be mapped. */ | ||
38 | uint32_t domid; | ||
39 | /* The grant reference of the grant to be mapped. */ | ||
40 | uint32_t ref; | ||
41 | }; | ||
42 | |||
43 | /* | ||
44 | * Inserts the grant references into the mapping table of an instance | ||
45 | * of gntdev. N.B. This does not perform the mapping, which is deferred | ||
46 | * until mmap() is called with @index as the offset. | ||
47 | */ | ||
48 | #define IOCTL_GNTDEV_MAP_GRANT_REF \ | ||
49 | _IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref)) | ||
50 | struct ioctl_gntdev_map_grant_ref { | ||
51 | /* IN parameters */ | ||
52 | /* The number of grants to be mapped. */ | ||
53 | uint32_t count; | ||
54 | uint32_t pad; | ||
55 | /* OUT parameters */ | ||
56 | /* The offset to be used on a subsequent call to mmap(). */ | ||
57 | uint64_t index; | ||
58 | /* Variable IN parameter. */ | ||
59 | /* Array of grant references, of size @count. */ | ||
60 | struct ioctl_gntdev_grant_ref refs[1]; | ||
61 | }; | ||
62 | |||
63 | /* | ||
64 | * Removes the grant references from the mapping table of an instance of | ||
65 | * of gntdev. N.B. munmap() must be called on the relevant virtual address(es) | ||
66 | * before this ioctl is called, or an error will result. | ||
67 | */ | ||
68 | #define IOCTL_GNTDEV_UNMAP_GRANT_REF \ | ||
69 | _IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref)) | ||
70 | struct ioctl_gntdev_unmap_grant_ref { | ||
71 | /* IN parameters */ | ||
72 | /* The offset was returned by the corresponding map operation. */ | ||
73 | uint64_t index; | ||
74 | /* The number of pages to be unmapped. */ | ||
75 | uint32_t count; | ||
76 | uint32_t pad; | ||
77 | }; | ||
78 | |||
79 | /* | ||
80 | * Returns the offset in the driver's address space that corresponds | ||
81 | * to @vaddr. This can be used to perform a munmap(), followed by an | ||
82 | * UNMAP_GRANT_REF ioctl, where no state about the offset is retained by | ||
83 | * the caller. The number of pages that were allocated at the same time as | ||
84 | * @vaddr is returned in @count. | ||
85 | * | ||
86 | * N.B. Where more than one page has been mapped into a contiguous range, the | ||
87 | * supplied @vaddr must correspond to the start of the range; otherwise | ||
88 | * an error will result. It is only possible to munmap() the entire | ||
89 | * contiguously-allocated range at once, and not any subrange thereof. | ||
90 | */ | ||
91 | #define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \ | ||
92 | _IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntdev_get_offset_for_vaddr)) | ||
93 | struct ioctl_gntdev_get_offset_for_vaddr { | ||
94 | /* IN parameters */ | ||
95 | /* The virtual address of the first mapped page in a range. */ | ||
96 | uint64_t vaddr; | ||
97 | /* OUT parameters */ | ||
98 | /* The offset that was used in the initial mmap() operation. */ | ||
99 | uint64_t offset; | ||
100 | /* The number of pages mapped in the VM area that begins at @vaddr. */ | ||
101 | uint32_t count; | ||
102 | uint32_t pad; | ||
103 | }; | ||
104 | |||
105 | /* | ||
106 | * Sets the maximum number of grants that may mapped at once by this gntdev | ||
107 | * instance. | ||
108 | * | ||
109 | * N.B. This must be called before any other ioctl is performed on the device. | ||
110 | */ | ||
111 | #define IOCTL_GNTDEV_SET_MAX_GRANTS \ | ||
112 | _IOC(_IOC_NONE, 'G', 3, sizeof(struct ioctl_gntdev_set_max_grants)) | ||
113 | struct ioctl_gntdev_set_max_grants { | ||
114 | /* IN parameter */ | ||
115 | /* The maximum number of grants that may be mapped at once. */ | ||
116 | uint32_t count; | ||
117 | }; | ||
118 | |||
119 | #endif /* __LINUX_PUBLIC_GNTDEV_H__ */ | ||
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 9a731706a016..b1fab6b5b3ef 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h | |||
@@ -37,10 +37,16 @@ | |||
37 | #ifndef __ASM_GNTTAB_H__ | 37 | #ifndef __ASM_GNTTAB_H__ |
38 | #define __ASM_GNTTAB_H__ | 38 | #define __ASM_GNTTAB_H__ |
39 | 39 | ||
40 | #include <asm/xen/hypervisor.h> | 40 | #include <asm/page.h> |
41 | |||
42 | #include <xen/interface/xen.h> | ||
41 | #include <xen/interface/grant_table.h> | 43 | #include <xen/interface/grant_table.h> |
44 | |||
45 | #include <asm/xen/hypervisor.h> | ||
42 | #include <asm/xen/grant_table.h> | 46 | #include <asm/xen/grant_table.h> |
43 | 47 | ||
48 | #include <xen/features.h> | ||
49 | |||
44 | /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ | 50 | /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ |
45 | #define NR_GRANT_FRAMES 4 | 51 | #define NR_GRANT_FRAMES 4 |
46 | 52 | ||
@@ -107,6 +113,37 @@ void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, | |||
107 | void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, | 113 | void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, |
108 | unsigned long pfn); | 114 | unsigned long pfn); |
109 | 115 | ||
116 | static inline void | ||
117 | gnttab_set_map_op(struct gnttab_map_grant_ref *map, phys_addr_t addr, | ||
118 | uint32_t flags, grant_ref_t ref, domid_t domid) | ||
119 | { | ||
120 | if (flags & GNTMAP_contains_pte) | ||
121 | map->host_addr = addr; | ||
122 | else if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
123 | map->host_addr = __pa(addr); | ||
124 | else | ||
125 | map->host_addr = addr; | ||
126 | |||
127 | map->flags = flags; | ||
128 | map->ref = ref; | ||
129 | map->dom = domid; | ||
130 | } | ||
131 | |||
132 | static inline void | ||
133 | gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, phys_addr_t addr, | ||
134 | uint32_t flags, grant_handle_t handle) | ||
135 | { | ||
136 | if (flags & GNTMAP_contains_pte) | ||
137 | unmap->host_addr = addr; | ||
138 | else if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
139 | unmap->host_addr = __pa(addr); | ||
140 | else | ||
141 | unmap->host_addr = addr; | ||
142 | |||
143 | unmap->handle = handle; | ||
144 | unmap->dev_bus_addr = 0; | ||
145 | } | ||
146 | |||
110 | int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, | 147 | int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, |
111 | unsigned long max_nr_gframes, | 148 | unsigned long max_nr_gframes, |
112 | struct grant_entry **__shared); | 149 | struct grant_entry **__shared); |
@@ -118,4 +155,9 @@ unsigned int gnttab_max_grant_frames(void); | |||
118 | 155 | ||
119 | #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) | 156 | #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) |
120 | 157 | ||
158 | int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, | ||
159 | struct page **pages, unsigned int count); | ||
160 | int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, | ||
161 | struct page **pages, unsigned int count); | ||
162 | |||
121 | #endif /* __ASM_GNTTAB_H__ */ | 163 | #endif /* __ASM_GNTTAB_H__ */ |