aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/xen
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/Kconfig10
-rw-r--r--drivers/xen/Makefile6
-rw-r--r--drivers/xen/balloon.c359
-rw-r--r--drivers/xen/gntalloc.c545
-rw-r--r--drivers/xen/gntdev.c382
-rw-r--r--drivers/xen/grant-table.c10
-rw-r--r--drivers/xen/xen-balloon.c256
7 files changed, 1184 insertions, 384 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 07bec09d1dad..a59638b37c1a 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -76,10 +76,20 @@ config XEN_XENBUS_FRONTEND
76config XEN_GNTDEV 76config XEN_GNTDEV
77 tristate "userspace grant access device driver" 77 tristate "userspace grant access device driver"
78 depends on XEN 78 depends on XEN
79 default m
79 select MMU_NOTIFIER 80 select MMU_NOTIFIER
80 help 81 help
81 Allows userspace processes to use grants. 82 Allows userspace processes to use grants.
82 83
84config XEN_GRANT_DEV_ALLOC
85 tristate "User-space grant reference allocator driver"
86 depends on XEN
87 default m
88 help
89 Allows userspace processes to create pages with access granted
90 to other domains. This can be used to implement frontend drivers
91 or as part of an inter-domain shared memory channel.
92
83config XEN_PLATFORM_PCI 93config XEN_PLATFORM_PCI
84 tristate "xen platform pci device driver" 94 tristate "xen platform pci device driver"
85 depends on XEN_PVHVM && PCI 95 depends on XEN_PVHVM && PCI
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 5088cc2e6fe2..f420f1ff7f13 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,4 +1,4 @@
1obj-y += grant-table.o features.o events.o manage.o 1obj-y += grant-table.o features.o events.o manage.o balloon.o
2obj-y += xenbus/ 2obj-y += xenbus/
3 3
4nostackp := $(call cc-option, -fno-stack-protector) 4nostackp := $(call cc-option, -fno-stack-protector)
@@ -7,9 +7,10 @@ CFLAGS_features.o := $(nostackp)
7obj-$(CONFIG_BLOCK) += biomerge.o 7obj-$(CONFIG_BLOCK) += biomerge.o
8obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o 8obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
9obj-$(CONFIG_XEN_XENCOMM) += xencomm.o 9obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
10obj-$(CONFIG_XEN_BALLOON) += balloon.o 10obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o
11obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o 11obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o
12obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o 12obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
13obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o
13obj-$(CONFIG_XENFS) += xenfs/ 14obj-$(CONFIG_XENFS) += xenfs/
14obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o 15obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
15obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o 16obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o
@@ -18,5 +19,6 @@ obj-$(CONFIG_XEN_DOM0) += pci.o
18 19
19xen-evtchn-y := evtchn.o 20xen-evtchn-y := evtchn.o
20xen-gntdev-y := gntdev.o 21xen-gntdev-y := gntdev.o
22xen-gntalloc-y := gntalloc.o
21 23
22xen-platform-pci-y := platform-pci.o 24xen-platform-pci-y := platform-pci.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 718050ace08f..043af8ad6b60 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -1,6 +1,4 @@
1/****************************************************************************** 1/******************************************************************************
2 * balloon.c
3 *
4 * Xen balloon driver - enables returning/claiming memory to/from Xen. 2 * Xen balloon driver - enables returning/claiming memory to/from Xen.
5 * 3 *
6 * Copyright (c) 2003, B Dragovic 4 * Copyright (c) 2003, B Dragovic
@@ -33,7 +31,6 @@
33 */ 31 */
34 32
35#include <linux/kernel.h> 33#include <linux/kernel.h>
36#include <linux/module.h>
37#include <linux/sched.h> 34#include <linux/sched.h>
38#include <linux/errno.h> 35#include <linux/errno.h>
39#include <linux/mm.h> 36#include <linux/mm.h>
@@ -42,13 +39,11 @@
42#include <linux/highmem.h> 39#include <linux/highmem.h>
43#include <linux/mutex.h> 40#include <linux/mutex.h>
44#include <linux/list.h> 41#include <linux/list.h>
45#include <linux/sysdev.h>
46#include <linux/gfp.h> 42#include <linux/gfp.h>
47 43
48#include <asm/page.h> 44#include <asm/page.h>
49#include <asm/pgalloc.h> 45#include <asm/pgalloc.h>
50#include <asm/pgtable.h> 46#include <asm/pgtable.h>
51#include <asm/uaccess.h>
52#include <asm/tlb.h> 47#include <asm/tlb.h>
53#include <asm/e820.h> 48#include <asm/e820.h>
54 49
@@ -58,35 +53,29 @@
58#include <xen/xen.h> 53#include <xen/xen.h>
59#include <xen/interface/xen.h> 54#include <xen/interface/xen.h>
60#include <xen/interface/memory.h> 55#include <xen/interface/memory.h>
61#include <xen/xenbus.h> 56#include <xen/balloon.h>
62#include <xen/features.h> 57#include <xen/features.h>
63#include <xen/page.h> 58#include <xen/page.h>
64 59
65#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) 60/*
66 61 * balloon_process() state:
67#define BALLOON_CLASS_NAME "xen_memory" 62 *
63 * BP_DONE: done or nothing to do,
64 * BP_EAGAIN: error, go to sleep,
65 * BP_ECANCELED: error, balloon operation canceled.
66 */
68 67
69struct balloon_stats { 68enum bp_state {
70 /* We aim for 'current allocation' == 'target allocation'. */ 69 BP_DONE,
71 unsigned long current_pages; 70 BP_EAGAIN,
72 unsigned long target_pages; 71 BP_ECANCELED
73 /*
74 * Drivers may alter the memory reservation independently, but they
75 * must inform the balloon driver so we avoid hitting the hard limit.
76 */
77 unsigned long driver_pages;
78 /* Number of pages in high- and low-memory balloons. */
79 unsigned long balloon_low;
80 unsigned long balloon_high;
81}; 72};
82 73
83static DEFINE_MUTEX(balloon_mutex);
84
85static struct sys_device balloon_sysdev;
86 74
87static int register_balloon(struct sys_device *sysdev); 75static DEFINE_MUTEX(balloon_mutex);
88 76
89static struct balloon_stats balloon_stats; 77struct balloon_stats balloon_stats;
78EXPORT_SYMBOL_GPL(balloon_stats);
90 79
91/* We increase/decrease in batches which fit in a page */ 80/* We increase/decrease in batches which fit in a page */
92static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; 81static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
@@ -104,8 +93,7 @@ static LIST_HEAD(ballooned_pages);
104 93
105/* Main work function, always executed in process context. */ 94/* Main work function, always executed in process context. */
106static void balloon_process(struct work_struct *work); 95static void balloon_process(struct work_struct *work);
107static DECLARE_WORK(balloon_worker, balloon_process); 96static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
108static struct timer_list balloon_timer;
109 97
110/* When ballooning out (allocating memory to return to Xen) we don't really 98/* When ballooning out (allocating memory to return to Xen) we don't really
111 want the kernel to try too hard since that can trigger the oom killer. */ 99 want the kernel to try too hard since that can trigger the oom killer. */
@@ -140,14 +128,17 @@ static void balloon_append(struct page *page)
140} 128}
141 129
142/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ 130/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
143static struct page *balloon_retrieve(void) 131static struct page *balloon_retrieve(bool prefer_highmem)
144{ 132{
145 struct page *page; 133 struct page *page;
146 134
147 if (list_empty(&ballooned_pages)) 135 if (list_empty(&ballooned_pages))
148 return NULL; 136 return NULL;
149 137
150 page = list_entry(ballooned_pages.next, struct page, lru); 138 if (prefer_highmem)
139 page = list_entry(ballooned_pages.prev, struct page, lru);
140 else
141 page = list_entry(ballooned_pages.next, struct page, lru);
151 list_del(&page->lru); 142 list_del(&page->lru);
152 143
153 if (PageHighMem(page)) { 144 if (PageHighMem(page)) {
@@ -177,9 +168,29 @@ static struct page *balloon_next_page(struct page *page)
177 return list_entry(next, struct page, lru); 168 return list_entry(next, struct page, lru);
178} 169}
179 170
180static void balloon_alarm(unsigned long unused) 171static enum bp_state update_schedule(enum bp_state state)
181{ 172{
182 schedule_work(&balloon_worker); 173 if (state == BP_DONE) {
174 balloon_stats.schedule_delay = 1;
175 balloon_stats.retry_count = 1;
176 return BP_DONE;
177 }
178
179 ++balloon_stats.retry_count;
180
181 if (balloon_stats.max_retry_count != RETRY_UNLIMITED &&
182 balloon_stats.retry_count > balloon_stats.max_retry_count) {
183 balloon_stats.schedule_delay = 1;
184 balloon_stats.retry_count = 1;
185 return BP_ECANCELED;
186 }
187
188 balloon_stats.schedule_delay <<= 1;
189
190 if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay)
191 balloon_stats.schedule_delay = balloon_stats.max_schedule_delay;
192
193 return BP_EAGAIN;
183} 194}
184 195
185static unsigned long current_target(void) 196static unsigned long current_target(void)
@@ -194,11 +205,11 @@ static unsigned long current_target(void)
194 return target; 205 return target;
195} 206}
196 207
197static int increase_reservation(unsigned long nr_pages) 208static enum bp_state increase_reservation(unsigned long nr_pages)
198{ 209{
210 int rc;
199 unsigned long pfn, i; 211 unsigned long pfn, i;
200 struct page *page; 212 struct page *page;
201 long rc;
202 struct xen_memory_reservation reservation = { 213 struct xen_memory_reservation reservation = {
203 .address_bits = 0, 214 .address_bits = 0,
204 .extent_order = 0, 215 .extent_order = 0,
@@ -210,7 +221,10 @@ static int increase_reservation(unsigned long nr_pages)
210 221
211 page = balloon_first_page(); 222 page = balloon_first_page();
212 for (i = 0; i < nr_pages; i++) { 223 for (i = 0; i < nr_pages; i++) {
213 BUG_ON(page == NULL); 224 if (!page) {
225 nr_pages = i;
226 break;
227 }
214 frame_list[i] = page_to_pfn(page); 228 frame_list[i] = page_to_pfn(page);
215 page = balloon_next_page(page); 229 page = balloon_next_page(page);
216 } 230 }
@@ -218,11 +232,11 @@ static int increase_reservation(unsigned long nr_pages)
218 set_xen_guest_handle(reservation.extent_start, frame_list); 232 set_xen_guest_handle(reservation.extent_start, frame_list);
219 reservation.nr_extents = nr_pages; 233 reservation.nr_extents = nr_pages;
220 rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); 234 rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
221 if (rc < 0) 235 if (rc <= 0)
222 goto out; 236 return BP_EAGAIN;
223 237
224 for (i = 0; i < rc; i++) { 238 for (i = 0; i < rc; i++) {
225 page = balloon_retrieve(); 239 page = balloon_retrieve(false);
226 BUG_ON(page == NULL); 240 BUG_ON(page == NULL);
227 241
228 pfn = page_to_pfn(page); 242 pfn = page_to_pfn(page);
@@ -249,15 +263,14 @@ static int increase_reservation(unsigned long nr_pages)
249 263
250 balloon_stats.current_pages += rc; 264 balloon_stats.current_pages += rc;
251 265
252 out: 266 return BP_DONE;
253 return rc < 0 ? rc : rc != nr_pages;
254} 267}
255 268
256static int decrease_reservation(unsigned long nr_pages) 269static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
257{ 270{
271 enum bp_state state = BP_DONE;
258 unsigned long pfn, i; 272 unsigned long pfn, i;
259 struct page *page; 273 struct page *page;
260 int need_sleep = 0;
261 int ret; 274 int ret;
262 struct xen_memory_reservation reservation = { 275 struct xen_memory_reservation reservation = {
263 .address_bits = 0, 276 .address_bits = 0,
@@ -269,9 +282,9 @@ static int decrease_reservation(unsigned long nr_pages)
269 nr_pages = ARRAY_SIZE(frame_list); 282 nr_pages = ARRAY_SIZE(frame_list);
270 283
271 for (i = 0; i < nr_pages; i++) { 284 for (i = 0; i < nr_pages; i++) {
272 if ((page = alloc_page(GFP_BALLOON)) == NULL) { 285 if ((page = alloc_page(gfp)) == NULL) {
273 nr_pages = i; 286 nr_pages = i;
274 need_sleep = 1; 287 state = BP_EAGAIN;
275 break; 288 break;
276 } 289 }
277 290
@@ -307,7 +320,7 @@ static int decrease_reservation(unsigned long nr_pages)
307 320
308 balloon_stats.current_pages -= nr_pages; 321 balloon_stats.current_pages -= nr_pages;
309 322
310 return need_sleep; 323 return state;
311} 324}
312 325
313/* 326/*
@@ -318,77 +331,101 @@ static int decrease_reservation(unsigned long nr_pages)
318 */ 331 */
319static void balloon_process(struct work_struct *work) 332static void balloon_process(struct work_struct *work)
320{ 333{
321 int need_sleep = 0; 334 enum bp_state state = BP_DONE;
322 long credit; 335 long credit;
323 336
324 mutex_lock(&balloon_mutex); 337 mutex_lock(&balloon_mutex);
325 338
326 do { 339 do {
327 credit = current_target() - balloon_stats.current_pages; 340 credit = current_target() - balloon_stats.current_pages;
341
328 if (credit > 0) 342 if (credit > 0)
329 need_sleep = (increase_reservation(credit) != 0); 343 state = increase_reservation(credit);
344
330 if (credit < 0) 345 if (credit < 0)
331 need_sleep = (decrease_reservation(-credit) != 0); 346 state = decrease_reservation(-credit, GFP_BALLOON);
347
348 state = update_schedule(state);
332 349
333#ifndef CONFIG_PREEMPT 350#ifndef CONFIG_PREEMPT
334 if (need_resched()) 351 if (need_resched())
335 schedule(); 352 schedule();
336#endif 353#endif
337 } while ((credit != 0) && !need_sleep); 354 } while (credit && state == BP_DONE);
338 355
339 /* Schedule more work if there is some still to be done. */ 356 /* Schedule more work if there is some still to be done. */
340 if (current_target() != balloon_stats.current_pages) 357 if (state == BP_EAGAIN)
341 mod_timer(&balloon_timer, jiffies + HZ); 358 schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ);
342 359
343 mutex_unlock(&balloon_mutex); 360 mutex_unlock(&balloon_mutex);
344} 361}
345 362
346/* Resets the Xen limit, sets new target, and kicks off processing. */ 363/* Resets the Xen limit, sets new target, and kicks off processing. */
347static void balloon_set_new_target(unsigned long target) 364void balloon_set_new_target(unsigned long target)
348{ 365{
349 /* No need for lock. Not read-modify-write updates. */ 366 /* No need for lock. Not read-modify-write updates. */
350 balloon_stats.target_pages = target; 367 balloon_stats.target_pages = target;
351 schedule_work(&balloon_worker); 368 schedule_delayed_work(&balloon_worker, 0);
352} 369}
370EXPORT_SYMBOL_GPL(balloon_set_new_target);
353 371
354static struct xenbus_watch target_watch = 372/**
355{ 373 * alloc_xenballooned_pages - get pages that have been ballooned out
356 .node = "memory/target" 374 * @nr_pages: Number of pages to get
357}; 375 * @pages: pages returned
358 376 * @return 0 on success, error otherwise
359/* React to a change in the target key */ 377 */
360static void watch_target(struct xenbus_watch *watch, 378int alloc_xenballooned_pages(int nr_pages, struct page** pages)
361 const char **vec, unsigned int len)
362{ 379{
363 unsigned long long new_target; 380 int pgno = 0;
364 int err; 381 struct page* page;
365 382 mutex_lock(&balloon_mutex);
366 err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); 383 while (pgno < nr_pages) {
367 if (err != 1) { 384 page = balloon_retrieve(true);
368 /* This is ok (for domain0 at least) - so just return */ 385 if (page) {
369 return; 386 pages[pgno++] = page;
387 } else {
388 enum bp_state st;
389 st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER);
390 if (st != BP_DONE)
391 goto out_undo;
392 }
370 } 393 }
371 394 mutex_unlock(&balloon_mutex);
372 /* The given memory/target value is in KiB, so it needs converting to 395 return 0;
373 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. 396 out_undo:
374 */ 397 while (pgno)
375 balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); 398 balloon_append(pages[--pgno]);
399 /* Free the memory back to the kernel soon */
400 schedule_delayed_work(&balloon_worker, 0);
401 mutex_unlock(&balloon_mutex);
402 return -ENOMEM;
376} 403}
404EXPORT_SYMBOL(alloc_xenballooned_pages);
377 405
378static int balloon_init_watcher(struct notifier_block *notifier, 406/**
379 unsigned long event, 407 * free_xenballooned_pages - return pages retrieved with get_ballooned_pages
380 void *data) 408 * @nr_pages: Number of pages
409 * @pages: pages to return
410 */
411void free_xenballooned_pages(int nr_pages, struct page** pages)
381{ 412{
382 int err; 413 int i;
383 414
384 err = register_xenbus_watch(&target_watch); 415 mutex_lock(&balloon_mutex);
385 if (err)
386 printk(KERN_ERR "Failed to set balloon watcher\n");
387 416
388 return NOTIFY_DONE; 417 for (i = 0; i < nr_pages; i++) {
389} 418 if (pages[i])
419 balloon_append(pages[i]);
420 }
421
422 /* The balloon may be too large now. Shrink it if needed. */
423 if (current_target() != balloon_stats.current_pages)
424 schedule_delayed_work(&balloon_worker, 0);
390 425
391static struct notifier_block xenstore_notifier; 426 mutex_unlock(&balloon_mutex);
427}
428EXPORT_SYMBOL(free_xenballooned_pages);
392 429
393static int __init balloon_init(void) 430static int __init balloon_init(void)
394{ 431{
@@ -398,7 +435,7 @@ static int __init balloon_init(void)
398 if (!xen_domain()) 435 if (!xen_domain())
399 return -ENODEV; 436 return -ENODEV;
400 437
401 pr_info("xen_balloon: Initialising balloon driver.\n"); 438 pr_info("xen/balloon: Initialising balloon driver.\n");
402 439
403 if (xen_pv_domain()) 440 if (xen_pv_domain())
404 nr_pages = xen_start_info->nr_pages; 441 nr_pages = xen_start_info->nr_pages;
@@ -408,13 +445,11 @@ static int __init balloon_init(void)
408 balloon_stats.target_pages = balloon_stats.current_pages; 445 balloon_stats.target_pages = balloon_stats.current_pages;
409 balloon_stats.balloon_low = 0; 446 balloon_stats.balloon_low = 0;
410 balloon_stats.balloon_high = 0; 447 balloon_stats.balloon_high = 0;
411 balloon_stats.driver_pages = 0UL;
412
413 init_timer(&balloon_timer);
414 balloon_timer.data = 0;
415 balloon_timer.function = balloon_alarm;
416 448
417 register_balloon(&balloon_sysdev); 449 balloon_stats.schedule_delay = 1;
450 balloon_stats.max_schedule_delay = 32;
451 balloon_stats.retry_count = 1;
452 balloon_stats.max_retry_count = RETRY_UNLIMITED;
418 453
419 /* 454 /*
420 * Initialise the balloon with excess memory space. We need 455 * Initialise the balloon with excess memory space. We need
@@ -436,153 +471,9 @@ static int __init balloon_init(void)
436 __balloon_append(page); 471 __balloon_append(page);
437 } 472 }
438 473
439 target_watch.callback = watch_target;
440 xenstore_notifier.notifier_call = balloon_init_watcher;
441
442 register_xenstore_notifier(&xenstore_notifier);
443
444 return 0; 474 return 0;
445} 475}
446 476
447subsys_initcall(balloon_init); 477subsys_initcall(balloon_init);
448 478
449static void balloon_exit(void)
450{
451 /* XXX - release balloon here */
452 return;
453}
454
455module_exit(balloon_exit);
456
457#define BALLOON_SHOW(name, format, args...) \
458 static ssize_t show_##name(struct sys_device *dev, \
459 struct sysdev_attribute *attr, \
460 char *buf) \
461 { \
462 return sprintf(buf, format, ##args); \
463 } \
464 static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
465
466BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
467BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
468BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
469BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
470
471static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
472 char *buf)
473{
474 return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
475}
476
477static ssize_t store_target_kb(struct sys_device *dev,
478 struct sysdev_attribute *attr,
479 const char *buf,
480 size_t count)
481{
482 char *endchar;
483 unsigned long long target_bytes;
484
485 if (!capable(CAP_SYS_ADMIN))
486 return -EPERM;
487
488 target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
489
490 balloon_set_new_target(target_bytes >> PAGE_SHIFT);
491
492 return count;
493}
494
495static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
496 show_target_kb, store_target_kb);
497
498
499static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr,
500 char *buf)
501{
502 return sprintf(buf, "%llu\n",
503 (unsigned long long)balloon_stats.target_pages
504 << PAGE_SHIFT);
505}
506
507static ssize_t store_target(struct sys_device *dev,
508 struct sysdev_attribute *attr,
509 const char *buf,
510 size_t count)
511{
512 char *endchar;
513 unsigned long long target_bytes;
514
515 if (!capable(CAP_SYS_ADMIN))
516 return -EPERM;
517
518 target_bytes = memparse(buf, &endchar);
519
520 balloon_set_new_target(target_bytes >> PAGE_SHIFT);
521
522 return count;
523}
524
525static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR,
526 show_target, store_target);
527
528
529static struct sysdev_attribute *balloon_attrs[] = {
530 &attr_target_kb,
531 &attr_target,
532};
533
534static struct attribute *balloon_info_attrs[] = {
535 &attr_current_kb.attr,
536 &attr_low_kb.attr,
537 &attr_high_kb.attr,
538 &attr_driver_kb.attr,
539 NULL
540};
541
542static struct attribute_group balloon_info_group = {
543 .name = "info",
544 .attrs = balloon_info_attrs,
545};
546
547static struct sysdev_class balloon_sysdev_class = {
548 .name = BALLOON_CLASS_NAME,
549};
550
551static int register_balloon(struct sys_device *sysdev)
552{
553 int i, error;
554
555 error = sysdev_class_register(&balloon_sysdev_class);
556 if (error)
557 return error;
558
559 sysdev->id = 0;
560 sysdev->cls = &balloon_sysdev_class;
561
562 error = sysdev_register(sysdev);
563 if (error) {
564 sysdev_class_unregister(&balloon_sysdev_class);
565 return error;
566 }
567
568 for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
569 error = sysdev_create_file(sysdev, balloon_attrs[i]);
570 if (error)
571 goto fail;
572 }
573
574 error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
575 if (error)
576 goto fail;
577
578 return 0;
579
580 fail:
581 while (--i >= 0)
582 sysdev_remove_file(sysdev, balloon_attrs[i]);
583 sysdev_unregister(sysdev);
584 sysdev_class_unregister(&balloon_sysdev_class);
585 return error;
586}
587
588MODULE_LICENSE("GPL"); 479MODULE_LICENSE("GPL");
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
new file mode 100644
index 000000000000..a7ffdfe19fc9
--- /dev/null
+++ b/drivers/xen/gntalloc.c
@@ -0,0 +1,545 @@
1/******************************************************************************
2 * gntalloc.c
3 *
4 * Device for creating grant references (in user-space) that may be shared
5 * with other domains.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15 */
16
17/*
18 * This driver exists to allow userspace programs in Linux to allocate kernel
19 * memory that will later be shared with another domain. Without this device,
20 * Linux userspace programs cannot create grant references.
21 *
22 * How this stuff works:
23 * X -> granting a page to Y
24 * Y -> mapping the grant from X
25 *
26 * 1. X uses the gntalloc device to allocate a page of kernel memory, P.
27 * 2. X creates an entry in the grant table that says domid(Y) can access P.
28 * This is done without a hypercall unless the grant table needs expansion.
29 * 3. X gives the grant reference identifier, GREF, to Y.
30 * 4. Y maps the page, either directly into kernel memory for use in a backend
31 * driver, or via a the gntdev device to map into the address space of an
32 * application running in Y. This is the first point at which Xen does any
33 * tracking of the page.
34 * 5. A program in X mmap()s a segment of the gntalloc device that corresponds
35 * to the shared page, and can now communicate with Y over the shared page.
36 *
37 *
38 * NOTE TO USERSPACE LIBRARIES:
39 * The grant allocation and mmap()ing are, naturally, two separate operations.
40 * You set up the sharing by calling the create ioctl() and then the mmap().
41 * Teardown requires munmap() and either close() or ioctl().
42 *
43 * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
44 * reference, this device can be used to consume kernel memory by leaving grant
45 * references mapped by another domain when an application exits. Therefore,
46 * there is a global limit on the number of pages that can be allocated. When
47 * all references to the page are unmapped, it will be freed during the next
48 * grant operation.
49 */
50
51#include <linux/atomic.h>
52#include <linux/module.h>
53#include <linux/miscdevice.h>
54#include <linux/kernel.h>
55#include <linux/init.h>
56#include <linux/slab.h>
57#include <linux/fs.h>
58#include <linux/device.h>
59#include <linux/mm.h>
60#include <linux/uaccess.h>
61#include <linux/types.h>
62#include <linux/list.h>
63#include <linux/highmem.h>
64
65#include <xen/xen.h>
66#include <xen/page.h>
67#include <xen/grant_table.h>
68#include <xen/gntalloc.h>
69#include <xen/events.h>
70
71static int limit = 1024;
72module_param(limit, int, 0644);
73MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by "
74 "the gntalloc device");
75
76static LIST_HEAD(gref_list);
77static DEFINE_SPINLOCK(gref_lock);
78static int gref_size;
79
80struct notify_info {
81 uint16_t pgoff:12; /* Bits 0-11: Offset of the byte to clear */
82 uint16_t flags:2; /* Bits 12-13: Unmap notification flags */
83 int event; /* Port (event channel) to notify */
84};
85
86/* Metadata on a grant reference. */
87struct gntalloc_gref {
88 struct list_head next_gref; /* list entry gref_list */
89 struct list_head next_file; /* list entry file->list, if open */
90 struct page *page; /* The shared page */
91 uint64_t file_index; /* File offset for mmap() */
92 unsigned int users; /* Use count - when zero, waiting on Xen */
93 grant_ref_t gref_id; /* The grant reference number */
94 struct notify_info notify; /* Unmap notification */
95};
96
97struct gntalloc_file_private_data {
98 struct list_head list;
99 uint64_t index;
100};
101
102static void __del_gref(struct gntalloc_gref *gref);
103
104static void do_cleanup(void)
105{
106 struct gntalloc_gref *gref, *n;
107 list_for_each_entry_safe(gref, n, &gref_list, next_gref) {
108 if (!gref->users)
109 __del_gref(gref);
110 }
111}
112
113static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
114 uint32_t *gref_ids, struct gntalloc_file_private_data *priv)
115{
116 int i, rc, readonly;
117 LIST_HEAD(queue_gref);
118 LIST_HEAD(queue_file);
119 struct gntalloc_gref *gref;
120
121 readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE);
122 rc = -ENOMEM;
123 for (i = 0; i < op->count; i++) {
124 gref = kzalloc(sizeof(*gref), GFP_KERNEL);
125 if (!gref)
126 goto undo;
127 list_add_tail(&gref->next_gref, &queue_gref);
128 list_add_tail(&gref->next_file, &queue_file);
129 gref->users = 1;
130 gref->file_index = op->index + i * PAGE_SIZE;
131 gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
132 if (!gref->page)
133 goto undo;
134
135 /* Grant foreign access to the page. */
136 gref->gref_id = gnttab_grant_foreign_access(op->domid,
137 pfn_to_mfn(page_to_pfn(gref->page)), readonly);
138 if (gref->gref_id < 0) {
139 rc = gref->gref_id;
140 goto undo;
141 }
142 gref_ids[i] = gref->gref_id;
143 }
144
145 /* Add to gref lists. */
146 spin_lock(&gref_lock);
147 list_splice_tail(&queue_gref, &gref_list);
148 list_splice_tail(&queue_file, &priv->list);
149 spin_unlock(&gref_lock);
150
151 return 0;
152
153undo:
154 spin_lock(&gref_lock);
155 gref_size -= (op->count - i);
156
157 list_for_each_entry(gref, &queue_file, next_file) {
158 /* __del_gref does not remove from queue_file */
159 __del_gref(gref);
160 }
161
162 /* It's possible for the target domain to map the just-allocated grant
163 * references by blindly guessing their IDs; if this is done, then
164 * __del_gref will leave them in the queue_gref list. They need to be
165 * added to the global list so that we can free them when they are no
166 * longer referenced.
167 */
168 if (unlikely(!list_empty(&queue_gref)))
169 list_splice_tail(&queue_gref, &gref_list);
170 spin_unlock(&gref_lock);
171 return rc;
172}
173
174static void __del_gref(struct gntalloc_gref *gref)
175{
176 if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
177 uint8_t *tmp = kmap(gref->page);
178 tmp[gref->notify.pgoff] = 0;
179 kunmap(gref->page);
180 }
181 if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
182 notify_remote_via_evtchn(gref->notify.event);
183
184 gref->notify.flags = 0;
185
186 if (gref->gref_id > 0) {
187 if (gnttab_query_foreign_access(gref->gref_id))
188 return;
189
190 if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
191 return;
192 }
193
194 gref_size--;
195 list_del(&gref->next_gref);
196
197 if (gref->page)
198 __free_page(gref->page);
199
200 kfree(gref);
201}
202
203/* finds contiguous grant references in a file, returns the first */
204static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv,
205 uint64_t index, uint32_t count)
206{
207 struct gntalloc_gref *rv = NULL, *gref;
208 list_for_each_entry(gref, &priv->list, next_file) {
209 if (gref->file_index == index && !rv)
210 rv = gref;
211 if (rv) {
212 if (gref->file_index != index)
213 return NULL;
214 index += PAGE_SIZE;
215 count--;
216 if (count == 0)
217 return rv;
218 }
219 }
220 return NULL;
221}
222
223/*
224 * -------------------------------------
225 * File operations.
226 * -------------------------------------
227 */
228static int gntalloc_open(struct inode *inode, struct file *filp)
229{
230 struct gntalloc_file_private_data *priv;
231
232 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
233 if (!priv)
234 goto out_nomem;
235 INIT_LIST_HEAD(&priv->list);
236
237 filp->private_data = priv;
238
239 pr_debug("%s: priv %p\n", __func__, priv);
240
241 return 0;
242
243out_nomem:
244 return -ENOMEM;
245}
246
247static int gntalloc_release(struct inode *inode, struct file *filp)
248{
249 struct gntalloc_file_private_data *priv = filp->private_data;
250 struct gntalloc_gref *gref;
251
252 pr_debug("%s: priv %p\n", __func__, priv);
253
254 spin_lock(&gref_lock);
255 while (!list_empty(&priv->list)) {
256 gref = list_entry(priv->list.next,
257 struct gntalloc_gref, next_file);
258 list_del(&gref->next_file);
259 gref->users--;
260 if (gref->users == 0)
261 __del_gref(gref);
262 }
263 kfree(priv);
264 spin_unlock(&gref_lock);
265
266 return 0;
267}
268
269static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
270 struct ioctl_gntalloc_alloc_gref __user *arg)
271{
272 int rc = 0;
273 struct ioctl_gntalloc_alloc_gref op;
274 uint32_t *gref_ids;
275
276 pr_debug("%s: priv %p\n", __func__, priv);
277
278 if (copy_from_user(&op, arg, sizeof(op))) {
279 rc = -EFAULT;
280 goto out;
281 }
282
283 gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY);
284 if (!gref_ids) {
285 rc = -ENOMEM;
286 goto out;
287 }
288
289 spin_lock(&gref_lock);
290 /* Clean up pages that were at zero (local) users but were still mapped
291 * by remote domains. Since those pages count towards the limit that we
292 * are about to enforce, removing them here is a good idea.
293 */
294 do_cleanup();
295 if (gref_size + op.count > limit) {
296 spin_unlock(&gref_lock);
297 rc = -ENOSPC;
298 goto out_free;
299 }
300 gref_size += op.count;
301 op.index = priv->index;
302 priv->index += op.count * PAGE_SIZE;
303 spin_unlock(&gref_lock);
304
305 rc = add_grefs(&op, gref_ids, priv);
306 if (rc < 0)
307 goto out_free;
308
309 /* Once we finish add_grefs, it is unsafe to touch the new reference,
310 * since it is possible for a concurrent ioctl to remove it (by guessing
311 * its index). If the userspace application doesn't provide valid memory
312 * to write the IDs to, then it will need to close the file in order to
313 * release - which it will do by segfaulting when it tries to access the
314 * IDs to close them.
315 */
316 if (copy_to_user(arg, &op, sizeof(op))) {
317 rc = -EFAULT;
318 goto out_free;
319 }
320 if (copy_to_user(arg->gref_ids, gref_ids,
321 sizeof(gref_ids[0]) * op.count)) {
322 rc = -EFAULT;
323 goto out_free;
324 }
325
326out_free:
327 kfree(gref_ids);
328out:
329 return rc;
330}
331
332static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
333 void __user *arg)
334{
335 int i, rc = 0;
336 struct ioctl_gntalloc_dealloc_gref op;
337 struct gntalloc_gref *gref, *n;
338
339 pr_debug("%s: priv %p\n", __func__, priv);
340
341 if (copy_from_user(&op, arg, sizeof(op))) {
342 rc = -EFAULT;
343 goto dealloc_grant_out;
344 }
345
346 spin_lock(&gref_lock);
347 gref = find_grefs(priv, op.index, op.count);
348 if (gref) {
349 /* Remove from the file list only, and decrease reference count.
350 * The later call to do_cleanup() will remove from gref_list and
351 * free the memory if the pages aren't mapped anywhere.
352 */
353 for (i = 0; i < op.count; i++) {
354 n = list_entry(gref->next_file.next,
355 struct gntalloc_gref, next_file);
356 list_del(&gref->next_file);
357 gref->users--;
358 gref = n;
359 }
360 } else {
361 rc = -EINVAL;
362 }
363
364 do_cleanup();
365
366 spin_unlock(&gref_lock);
367dealloc_grant_out:
368 return rc;
369}
370
371static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv,
372 void __user *arg)
373{
374 struct ioctl_gntalloc_unmap_notify op;
375 struct gntalloc_gref *gref;
376 uint64_t index;
377 int pgoff;
378 int rc;
379
380 if (copy_from_user(&op, arg, sizeof(op)))
381 return -EFAULT;
382
383 index = op.index & ~(PAGE_SIZE - 1);
384 pgoff = op.index & (PAGE_SIZE - 1);
385
386 spin_lock(&gref_lock);
387
388 gref = find_grefs(priv, index, 1);
389 if (!gref) {
390 rc = -ENOENT;
391 goto unlock_out;
392 }
393
394 if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) {
395 rc = -EINVAL;
396 goto unlock_out;
397 }
398
399 gref->notify.flags = op.action;
400 gref->notify.pgoff = pgoff;
401 gref->notify.event = op.event_channel_port;
402 rc = 0;
403 unlock_out:
404 spin_unlock(&gref_lock);
405 return rc;
406}
407
408static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
409 unsigned long arg)
410{
411 struct gntalloc_file_private_data *priv = filp->private_data;
412
413 switch (cmd) {
414 case IOCTL_GNTALLOC_ALLOC_GREF:
415 return gntalloc_ioctl_alloc(priv, (void __user *)arg);
416
417 case IOCTL_GNTALLOC_DEALLOC_GREF:
418 return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
419
420 case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY:
421 return gntalloc_ioctl_unmap_notify(priv, (void __user *)arg);
422
423 default:
424 return -ENOIOCTLCMD;
425 }
426
427 return 0;
428}
429
430static void gntalloc_vma_close(struct vm_area_struct *vma)
431{
432 struct gntalloc_gref *gref = vma->vm_private_data;
433 if (!gref)
434 return;
435
436 spin_lock(&gref_lock);
437 gref->users--;
438 if (gref->users == 0)
439 __del_gref(gref);
440 spin_unlock(&gref_lock);
441}
442
443static struct vm_operations_struct gntalloc_vmops = {
444 .close = gntalloc_vma_close,
445};
446
447static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
448{
449 struct gntalloc_file_private_data *priv = filp->private_data;
450 struct gntalloc_gref *gref;
451 int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
452 int rv, i;
453
454 pr_debug("%s: priv %p, page %lu+%d\n", __func__,
455 priv, vma->vm_pgoff, count);
456
457 if (!(vma->vm_flags & VM_SHARED)) {
458 printk(KERN_ERR "%s: Mapping must be shared.\n", __func__);
459 return -EINVAL;
460 }
461
462 spin_lock(&gref_lock);
463 gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
464 if (gref == NULL) {
465 rv = -ENOENT;
466 pr_debug("%s: Could not find grant reference",
467 __func__);
468 goto out_unlock;
469 }
470
471 vma->vm_private_data = gref;
472
473 vma->vm_flags |= VM_RESERVED;
474 vma->vm_flags |= VM_DONTCOPY;
475 vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP;
476
477 vma->vm_ops = &gntalloc_vmops;
478
479 for (i = 0; i < count; i++) {
480 gref->users++;
481 rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
482 gref->page);
483 if (rv)
484 goto out_unlock;
485
486 gref = list_entry(gref->next_file.next,
487 struct gntalloc_gref, next_file);
488 }
489 rv = 0;
490
491out_unlock:
492 spin_unlock(&gref_lock);
493 return rv;
494}
495
496static const struct file_operations gntalloc_fops = {
497 .owner = THIS_MODULE,
498 .open = gntalloc_open,
499 .release = gntalloc_release,
500 .unlocked_ioctl = gntalloc_ioctl,
501 .mmap = gntalloc_mmap
502};
503
504/*
505 * -------------------------------------
506 * Module creation/destruction.
507 * -------------------------------------
508 */
509static struct miscdevice gntalloc_miscdev = {
510 .minor = MISC_DYNAMIC_MINOR,
511 .name = "xen/gntalloc",
512 .fops = &gntalloc_fops,
513};
514
515static int __init gntalloc_init(void)
516{
517 int err;
518
519 if (!xen_domain())
520 return -ENODEV;
521
522 err = misc_register(&gntalloc_miscdev);
523 if (err != 0) {
524 printk(KERN_ERR "Could not register misc gntalloc device\n");
525 return err;
526 }
527
528 pr_debug("Created grant allocation device at %d,%d\n",
529 MISC_MAJOR, gntalloc_miscdev.minor);
530
531 return 0;
532}
533
534static void __exit gntalloc_exit(void)
535{
536 misc_deregister(&gntalloc_miscdev);
537}
538
539module_init(gntalloc_init);
540module_exit(gntalloc_exit);
541
542MODULE_LICENSE("GPL");
543MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, "
544 "Daniel De Graaf <dgdegra@tycho.nsa.gov>");
545MODULE_DESCRIPTION("User-space grant reference allocator driver");
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 1e31cdcdae1e..017ce600fbc6 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -32,10 +32,13 @@
32#include <linux/sched.h> 32#include <linux/sched.h>
33#include <linux/spinlock.h> 33#include <linux/spinlock.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/highmem.h>
35 36
36#include <xen/xen.h> 37#include <xen/xen.h>
37#include <xen/grant_table.h> 38#include <xen/grant_table.h>
39#include <xen/balloon.h>
38#include <xen/gntdev.h> 40#include <xen/gntdev.h>
41#include <xen/events.h>
39#include <asm/xen/hypervisor.h> 42#include <asm/xen/hypervisor.h>
40#include <asm/xen/hypercall.h> 43#include <asm/xen/hypercall.h>
41#include <asm/xen/page.h> 44#include <asm/xen/page.h>
@@ -45,35 +48,46 @@ MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, "
45 "Gerd Hoffmann <kraxel@redhat.com>"); 48 "Gerd Hoffmann <kraxel@redhat.com>");
46MODULE_DESCRIPTION("User-space granted page access driver"); 49MODULE_DESCRIPTION("User-space granted page access driver");
47 50
48static int limit = 1024; 51static int limit = 1024*1024;
49module_param(limit, int, 0644); 52module_param(limit, int, 0644);
50MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped at " 53MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
51 "once by a gntdev instance"); 54 "the gntdev device");
55
56static atomic_t pages_mapped = ATOMIC_INIT(0);
57
58static int use_ptemod;
52 59
53struct gntdev_priv { 60struct gntdev_priv {
54 struct list_head maps; 61 struct list_head maps;
55 uint32_t used;
56 uint32_t limit;
57 /* lock protects maps from concurrent changes */ 62 /* lock protects maps from concurrent changes */
58 spinlock_t lock; 63 spinlock_t lock;
59 struct mm_struct *mm; 64 struct mm_struct *mm;
60 struct mmu_notifier mn; 65 struct mmu_notifier mn;
61}; 66};
62 67
68struct unmap_notify {
69 int flags;
70 /* Address relative to the start of the grant_map */
71 int addr;
72 int event;
73};
74
63struct grant_map { 75struct grant_map {
64 struct list_head next; 76 struct list_head next;
65 struct gntdev_priv *priv;
66 struct vm_area_struct *vma; 77 struct vm_area_struct *vma;
67 int index; 78 int index;
68 int count; 79 int count;
69 int flags; 80 int flags;
70 int is_mapped; 81 atomic_t users;
82 struct unmap_notify notify;
71 struct ioctl_gntdev_grant_ref *grants; 83 struct ioctl_gntdev_grant_ref *grants;
72 struct gnttab_map_grant_ref *map_ops; 84 struct gnttab_map_grant_ref *map_ops;
73 struct gnttab_unmap_grant_ref *unmap_ops; 85 struct gnttab_unmap_grant_ref *unmap_ops;
74 struct page **pages; 86 struct page **pages;
75}; 87};
76 88
89static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
90
77/* ------------------------------------------------------------------ */ 91/* ------------------------------------------------------------------ */
78 92
79static void gntdev_print_maps(struct gntdev_priv *priv, 93static void gntdev_print_maps(struct gntdev_priv *priv,
@@ -82,9 +96,7 @@ static void gntdev_print_maps(struct gntdev_priv *priv,
82#ifdef DEBUG 96#ifdef DEBUG
83 struct grant_map *map; 97 struct grant_map *map;
84 98
85 pr_debug("maps list (priv %p, usage %d/%d)\n", 99 pr_debug("%s: maps list (priv %p)\n", __func__, priv);
86 priv, priv->used, priv->limit);
87
88 list_for_each_entry(map, &priv->maps, next) 100 list_for_each_entry(map, &priv->maps, next)
89 pr_debug(" index %2d, count %2d %s\n", 101 pr_debug(" index %2d, count %2d %s\n",
90 map->index, map->count, 102 map->index, map->count,
@@ -111,27 +123,21 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
111 NULL == add->pages) 123 NULL == add->pages)
112 goto err; 124 goto err;
113 125
126 if (alloc_xenballooned_pages(count, add->pages))
127 goto err;
128
114 for (i = 0; i < count; i++) { 129 for (i = 0; i < count; i++) {
115 add->pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); 130 add->map_ops[i].handle = -1;
116 if (add->pages[i] == NULL) 131 add->unmap_ops[i].handle = -1;
117 goto err;
118 } 132 }
119 133
120 add->index = 0; 134 add->index = 0;
121 add->count = count; 135 add->count = count;
122 add->priv = priv; 136 atomic_set(&add->users, 1);
123
124 if (add->count + priv->used > priv->limit)
125 goto err;
126 137
127 return add; 138 return add;
128 139
129err: 140err:
130 if (add->pages)
131 for (i = 0; i < count; i++) {
132 if (add->pages[i])
133 __free_page(add->pages[i]);
134 }
135 kfree(add->pages); 141 kfree(add->pages);
136 kfree(add->grants); 142 kfree(add->grants);
137 kfree(add->map_ops); 143 kfree(add->map_ops);
@@ -154,7 +160,6 @@ static void gntdev_add_map(struct gntdev_priv *priv, struct grant_map *add)
154 list_add_tail(&add->next, &priv->maps); 160 list_add_tail(&add->next, &priv->maps);
155 161
156done: 162done:
157 priv->used += add->count;
158 gntdev_print_maps(priv, "[new]", add->index); 163 gntdev_print_maps(priv, "[new]", add->index);
159} 164}
160 165
@@ -166,57 +171,33 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
166 list_for_each_entry(map, &priv->maps, next) { 171 list_for_each_entry(map, &priv->maps, next) {
167 if (map->index != index) 172 if (map->index != index)
168 continue; 173 continue;
169 if (map->count != count) 174 if (count && map->count != count)
170 continue; 175 continue;
171 return map; 176 return map;
172 } 177 }
173 return NULL; 178 return NULL;
174} 179}
175 180
176static struct grant_map *gntdev_find_map_vaddr(struct gntdev_priv *priv, 181static void gntdev_put_map(struct grant_map *map)
177 unsigned long vaddr)
178{ 182{
179 struct grant_map *map; 183 if (!map)
180 184 return;
181 list_for_each_entry(map, &priv->maps, next) {
182 if (!map->vma)
183 continue;
184 if (vaddr < map->vma->vm_start)
185 continue;
186 if (vaddr >= map->vma->vm_end)
187 continue;
188 return map;
189 }
190 return NULL;
191}
192
193static int gntdev_del_map(struct grant_map *map)
194{
195 int i;
196 185
197 if (map->vma) 186 if (!atomic_dec_and_test(&map->users))
198 return -EBUSY; 187 return;
199 for (i = 0; i < map->count; i++)
200 if (map->unmap_ops[i].handle)
201 return -EBUSY;
202 188
203 map->priv->used -= map->count; 189 atomic_sub(map->count, &pages_mapped);
204 list_del(&map->next);
205 return 0;
206}
207 190
208static void gntdev_free_map(struct grant_map *map) 191 if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
209{ 192 notify_remote_via_evtchn(map->notify.event);
210 int i; 193 }
211 194
212 if (!map) 195 if (map->pages) {
213 return; 196 if (!use_ptemod)
197 unmap_grant_pages(map, 0, map->count);
214 198
215 if (map->pages) 199 free_xenballooned_pages(map->count, map->pages);
216 for (i = 0; i < map->count; i++) { 200 }
217 if (map->pages[i])
218 __free_page(map->pages[i]);
219 }
220 kfree(map->pages); 201 kfree(map->pages);
221 kfree(map->grants); 202 kfree(map->grants);
222 kfree(map->map_ops); 203 kfree(map->map_ops);
@@ -231,18 +212,17 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
231{ 212{
232 struct grant_map *map = data; 213 struct grant_map *map = data;
233 unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; 214 unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
215 int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
234 u64 pte_maddr; 216 u64 pte_maddr;
235 217
236 BUG_ON(pgnr >= map->count); 218 BUG_ON(pgnr >= map->count);
237 pte_maddr = arbitrary_virt_to_machine(pte).maddr; 219 pte_maddr = arbitrary_virt_to_machine(pte).maddr;
238 220
239 gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, 221 gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
240 GNTMAP_contains_pte | map->flags,
241 map->grants[pgnr].ref, 222 map->grants[pgnr].ref,
242 map->grants[pgnr].domid); 223 map->grants[pgnr].domid);
243 gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, 224 gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
244 GNTMAP_contains_pte | map->flags, 225 -1 /* handle */);
245 0 /* handle */);
246 return 0; 226 return 0;
247} 227}
248 228
@@ -250,6 +230,21 @@ static int map_grant_pages(struct grant_map *map)
250{ 230{
251 int i, err = 0; 231 int i, err = 0;
252 232
233 if (!use_ptemod) {
234 /* Note: it could already be mapped */
235 if (map->map_ops[0].handle != -1)
236 return 0;
237 for (i = 0; i < map->count; i++) {
238 unsigned long addr = (unsigned long)
239 pfn_to_kaddr(page_to_pfn(map->pages[i]));
240 gnttab_set_map_op(&map->map_ops[i], addr, map->flags,
241 map->grants[i].ref,
242 map->grants[i].domid);
243 gnttab_set_unmap_op(&map->unmap_ops[i], addr,
244 map->flags, -1 /* handle */);
245 }
246 }
247
253 pr_debug("map %d+%d\n", map->index, map->count); 248 pr_debug("map %d+%d\n", map->index, map->count);
254 err = gnttab_map_refs(map->map_ops, map->pages, map->count); 249 err = gnttab_map_refs(map->map_ops, map->pages, map->count);
255 if (err) 250 if (err)
@@ -258,28 +253,81 @@ static int map_grant_pages(struct grant_map *map)
258 for (i = 0; i < map->count; i++) { 253 for (i = 0; i < map->count; i++) {
259 if (map->map_ops[i].status) 254 if (map->map_ops[i].status)
260 err = -EINVAL; 255 err = -EINVAL;
261 map->unmap_ops[i].handle = map->map_ops[i].handle; 256 else {
257 BUG_ON(map->map_ops[i].handle == -1);
258 map->unmap_ops[i].handle = map->map_ops[i].handle;
259 pr_debug("map handle=%d\n", map->map_ops[i].handle);
260 }
262 } 261 }
263 return err; 262 return err;
264} 263}
265 264
266static int unmap_grant_pages(struct grant_map *map, int offset, int pages) 265static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
267{ 266{
268 int i, err = 0; 267 int i, err = 0;
269 268
270 pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages); 269 if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
271 err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages); 270 int pgno = (map->notify.addr >> PAGE_SHIFT);
271 if (pgno >= offset && pgno < offset + pages && use_ptemod) {
272 void __user *tmp = (void __user *)
273 map->vma->vm_start + map->notify.addr;
274 err = copy_to_user(tmp, &err, 1);
275 if (err)
276 return err;
277 map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
278 } else if (pgno >= offset && pgno < offset + pages) {
279 uint8_t *tmp = kmap(map->pages[pgno]);
280 tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
281 kunmap(map->pages[pgno]);
282 map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
283 }
284 }
285
286 err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages);
272 if (err) 287 if (err)
273 return err; 288 return err;
274 289
275 for (i = 0; i < pages; i++) { 290 for (i = 0; i < pages; i++) {
276 if (map->unmap_ops[offset+i].status) 291 if (map->unmap_ops[offset+i].status)
277 err = -EINVAL; 292 err = -EINVAL;
278 map->unmap_ops[offset+i].handle = 0; 293 pr_debug("unmap handle=%d st=%d\n",
294 map->unmap_ops[offset+i].handle,
295 map->unmap_ops[offset+i].status);
296 map->unmap_ops[offset+i].handle = -1;
279 } 297 }
280 return err; 298 return err;
281} 299}
282 300
301static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
302{
303 int range, err = 0;
304
305 pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
306
307 /* It is possible the requested range will have a "hole" where we
308 * already unmapped some of the grants. Only unmap valid ranges.
309 */
310 while (pages && !err) {
311 while (pages && map->unmap_ops[offset].handle == -1) {
312 offset++;
313 pages--;
314 }
315 range = 0;
316 while (range < pages) {
317 if (map->unmap_ops[offset+range].handle == -1) {
318 range--;
319 break;
320 }
321 range++;
322 }
323 err = __unmap_grant_pages(map, offset, range);
324 offset += range;
325 pages -= range;
326 }
327
328 return err;
329}
330
283/* ------------------------------------------------------------------ */ 331/* ------------------------------------------------------------------ */
284 332
285static void gntdev_vma_close(struct vm_area_struct *vma) 333static void gntdev_vma_close(struct vm_area_struct *vma)
@@ -287,22 +335,13 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
287 struct grant_map *map = vma->vm_private_data; 335 struct grant_map *map = vma->vm_private_data;
288 336
289 pr_debug("close %p\n", vma); 337 pr_debug("close %p\n", vma);
290 map->is_mapped = 0;
291 map->vma = NULL; 338 map->vma = NULL;
292 vma->vm_private_data = NULL; 339 vma->vm_private_data = NULL;
293} 340 gntdev_put_map(map);
294
295static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
296{
297 pr_debug("vaddr %p, pgoff %ld (shouldn't happen)\n",
298 vmf->virtual_address, vmf->pgoff);
299 vmf->flags = VM_FAULT_ERROR;
300 return 0;
301} 341}
302 342
303static struct vm_operations_struct gntdev_vmops = { 343static struct vm_operations_struct gntdev_vmops = {
304 .close = gntdev_vma_close, 344 .close = gntdev_vma_close,
305 .fault = gntdev_vma_fault,
306}; 345};
307 346
308/* ------------------------------------------------------------------ */ 347/* ------------------------------------------------------------------ */
@@ -320,8 +359,6 @@ static void mn_invl_range_start(struct mmu_notifier *mn,
320 list_for_each_entry(map, &priv->maps, next) { 359 list_for_each_entry(map, &priv->maps, next) {
321 if (!map->vma) 360 if (!map->vma)
322 continue; 361 continue;
323 if (!map->is_mapped)
324 continue;
325 if (map->vma->vm_start >= end) 362 if (map->vma->vm_start >= end)
326 continue; 363 continue;
327 if (map->vma->vm_end <= start) 364 if (map->vma->vm_end <= start)
@@ -386,16 +423,17 @@ static int gntdev_open(struct inode *inode, struct file *flip)
386 423
387 INIT_LIST_HEAD(&priv->maps); 424 INIT_LIST_HEAD(&priv->maps);
388 spin_lock_init(&priv->lock); 425 spin_lock_init(&priv->lock);
389 priv->limit = limit;
390 426
391 priv->mm = get_task_mm(current); 427 if (use_ptemod) {
392 if (!priv->mm) { 428 priv->mm = get_task_mm(current);
393 kfree(priv); 429 if (!priv->mm) {
394 return -ENOMEM; 430 kfree(priv);
431 return -ENOMEM;
432 }
433 priv->mn.ops = &gntdev_mmu_ops;
434 ret = mmu_notifier_register(&priv->mn, priv->mm);
435 mmput(priv->mm);
395 } 436 }
396 priv->mn.ops = &gntdev_mmu_ops;
397 ret = mmu_notifier_register(&priv->mn, priv->mm);
398 mmput(priv->mm);
399 437
400 if (ret) { 438 if (ret) {
401 kfree(priv); 439 kfree(priv);
@@ -412,21 +450,19 @@ static int gntdev_release(struct inode *inode, struct file *flip)
412{ 450{
413 struct gntdev_priv *priv = flip->private_data; 451 struct gntdev_priv *priv = flip->private_data;
414 struct grant_map *map; 452 struct grant_map *map;
415 int err;
416 453
417 pr_debug("priv %p\n", priv); 454 pr_debug("priv %p\n", priv);
418 455
419 spin_lock(&priv->lock); 456 spin_lock(&priv->lock);
420 while (!list_empty(&priv->maps)) { 457 while (!list_empty(&priv->maps)) {
421 map = list_entry(priv->maps.next, struct grant_map, next); 458 map = list_entry(priv->maps.next, struct grant_map, next);
422 err = gntdev_del_map(map); 459 list_del(&map->next);
423 if (WARN_ON(err)) 460 gntdev_put_map(map);
424 gntdev_free_map(map);
425
426 } 461 }
427 spin_unlock(&priv->lock); 462 spin_unlock(&priv->lock);
428 463
429 mmu_notifier_unregister(&priv->mn, priv->mm); 464 if (use_ptemod)
465 mmu_notifier_unregister(&priv->mn, priv->mm);
430 kfree(priv); 466 kfree(priv);
431 return 0; 467 return 0;
432} 468}
@@ -443,16 +479,21 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
443 pr_debug("priv %p, add %d\n", priv, op.count); 479 pr_debug("priv %p, add %d\n", priv, op.count);
444 if (unlikely(op.count <= 0)) 480 if (unlikely(op.count <= 0))
445 return -EINVAL; 481 return -EINVAL;
446 if (unlikely(op.count > priv->limit))
447 return -EINVAL;
448 482
449 err = -ENOMEM; 483 err = -ENOMEM;
450 map = gntdev_alloc_map(priv, op.count); 484 map = gntdev_alloc_map(priv, op.count);
451 if (!map) 485 if (!map)
452 return err; 486 return err;
487
488 if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) {
489 pr_debug("can't map: over limit\n");
490 gntdev_put_map(map);
491 return err;
492 }
493
453 if (copy_from_user(map->grants, &u->refs, 494 if (copy_from_user(map->grants, &u->refs,
454 sizeof(map->grants[0]) * op.count) != 0) { 495 sizeof(map->grants[0]) * op.count) != 0) {
455 gntdev_free_map(map); 496 gntdev_put_map(map);
456 return err; 497 return err;
457 } 498 }
458 499
@@ -461,13 +502,9 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
461 op.index = map->index << PAGE_SHIFT; 502 op.index = map->index << PAGE_SHIFT;
462 spin_unlock(&priv->lock); 503 spin_unlock(&priv->lock);
463 504
464 if (copy_to_user(u, &op, sizeof(op)) != 0) { 505 if (copy_to_user(u, &op, sizeof(op)) != 0)
465 spin_lock(&priv->lock); 506 return -EFAULT;
466 gntdev_del_map(map); 507
467 spin_unlock(&priv->lock);
468 gntdev_free_map(map);
469 return err;
470 }
471 return 0; 508 return 0;
472} 509}
473 510
@@ -484,11 +521,12 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
484 521
485 spin_lock(&priv->lock); 522 spin_lock(&priv->lock);
486 map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); 523 map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
487 if (map) 524 if (map) {
488 err = gntdev_del_map(map); 525 list_del(&map->next);
526 gntdev_put_map(map);
527 err = 0;
528 }
489 spin_unlock(&priv->lock); 529 spin_unlock(&priv->lock);
490 if (!err)
491 gntdev_free_map(map);
492 return err; 530 return err;
493} 531}
494 532
@@ -496,43 +534,66 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
496 struct ioctl_gntdev_get_offset_for_vaddr __user *u) 534 struct ioctl_gntdev_get_offset_for_vaddr __user *u)
497{ 535{
498 struct ioctl_gntdev_get_offset_for_vaddr op; 536 struct ioctl_gntdev_get_offset_for_vaddr op;
537 struct vm_area_struct *vma;
499 struct grant_map *map; 538 struct grant_map *map;
500 539
501 if (copy_from_user(&op, u, sizeof(op)) != 0) 540 if (copy_from_user(&op, u, sizeof(op)) != 0)
502 return -EFAULT; 541 return -EFAULT;
503 pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr); 542 pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr);
504 543
505 spin_lock(&priv->lock); 544 vma = find_vma(current->mm, op.vaddr);
506 map = gntdev_find_map_vaddr(priv, op.vaddr); 545 if (!vma || vma->vm_ops != &gntdev_vmops)
507 if (map == NULL ||
508 map->vma->vm_start != op.vaddr) {
509 spin_unlock(&priv->lock);
510 return -EINVAL; 546 return -EINVAL;
511 } 547
548 map = vma->vm_private_data;
549 if (!map)
550 return -EINVAL;
551
512 op.offset = map->index << PAGE_SHIFT; 552 op.offset = map->index << PAGE_SHIFT;
513 op.count = map->count; 553 op.count = map->count;
514 spin_unlock(&priv->lock);
515 554
516 if (copy_to_user(u, &op, sizeof(op)) != 0) 555 if (copy_to_user(u, &op, sizeof(op)) != 0)
517 return -EFAULT; 556 return -EFAULT;
518 return 0; 557 return 0;
519} 558}
520 559
521static long gntdev_ioctl_set_max_grants(struct gntdev_priv *priv, 560static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
522 struct ioctl_gntdev_set_max_grants __user *u)
523{ 561{
524 struct ioctl_gntdev_set_max_grants op; 562 struct ioctl_gntdev_unmap_notify op;
563 struct grant_map *map;
564 int rc;
525 565
526 if (copy_from_user(&op, u, sizeof(op)) != 0) 566 if (copy_from_user(&op, u, sizeof(op)))
527 return -EFAULT; 567 return -EFAULT;
528 pr_debug("priv %p, limit %d\n", priv, op.count); 568
529 if (op.count > limit) 569 if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
530 return -E2BIG; 570 return -EINVAL;
531 571
532 spin_lock(&priv->lock); 572 spin_lock(&priv->lock);
533 priv->limit = op.count; 573
574 list_for_each_entry(map, &priv->maps, next) {
575 uint64_t begin = map->index << PAGE_SHIFT;
576 uint64_t end = (map->index + map->count) << PAGE_SHIFT;
577 if (op.index >= begin && op.index < end)
578 goto found;
579 }
580 rc = -ENOENT;
581 goto unlock_out;
582
583 found:
584 if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) &&
585 (map->flags & GNTMAP_readonly)) {
586 rc = -EINVAL;
587 goto unlock_out;
588 }
589
590 map->notify.flags = op.action;
591 map->notify.addr = op.index - (map->index << PAGE_SHIFT);
592 map->notify.event = op.event_channel_port;
593 rc = 0;
594 unlock_out:
534 spin_unlock(&priv->lock); 595 spin_unlock(&priv->lock);
535 return 0; 596 return rc;
536} 597}
537 598
538static long gntdev_ioctl(struct file *flip, 599static long gntdev_ioctl(struct file *flip,
@@ -551,8 +612,8 @@ static long gntdev_ioctl(struct file *flip,
551 case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: 612 case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
552 return gntdev_ioctl_get_offset_for_vaddr(priv, ptr); 613 return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);
553 614
554 case IOCTL_GNTDEV_SET_MAX_GRANTS: 615 case IOCTL_GNTDEV_SET_UNMAP_NOTIFY:
555 return gntdev_ioctl_set_max_grants(priv, ptr); 616 return gntdev_ioctl_notify(priv, ptr);
556 617
557 default: 618 default:
558 pr_debug("priv %p, unknown cmd %x\n", priv, cmd); 619 pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
@@ -568,7 +629,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
568 int index = vma->vm_pgoff; 629 int index = vma->vm_pgoff;
569 int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 630 int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
570 struct grant_map *map; 631 struct grant_map *map;
571 int err = -EINVAL; 632 int i, err = -EINVAL;
572 633
573 if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) 634 if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
574 return -EINVAL; 635 return -EINVAL;
@@ -580,47 +641,70 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
580 map = gntdev_find_map_index(priv, index, count); 641 map = gntdev_find_map_index(priv, index, count);
581 if (!map) 642 if (!map)
582 goto unlock_out; 643 goto unlock_out;
583 if (map->vma) 644 if (use_ptemod && map->vma)
584 goto unlock_out; 645 goto unlock_out;
585 if (priv->mm != vma->vm_mm) { 646 if (use_ptemod && priv->mm != vma->vm_mm) {
586 printk(KERN_WARNING "Huh? Other mm?\n"); 647 printk(KERN_WARNING "Huh? Other mm?\n");
587 goto unlock_out; 648 goto unlock_out;
588 } 649 }
589 650
651 atomic_inc(&map->users);
652
590 vma->vm_ops = &gntdev_vmops; 653 vma->vm_ops = &gntdev_vmops;
591 654
592 vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP; 655 vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
593 656
594 vma->vm_private_data = map; 657 vma->vm_private_data = map;
595 map->vma = vma;
596 658
597 map->flags = GNTMAP_host_map | GNTMAP_application_map; 659 if (use_ptemod)
598 if (!(vma->vm_flags & VM_WRITE)) 660 map->vma = vma;
599 map->flags |= GNTMAP_readonly; 661
662 if (map->flags) {
663 if ((vma->vm_flags & VM_WRITE) &&
664 (map->flags & GNTMAP_readonly))
665 return -EINVAL;
666 } else {
667 map->flags = GNTMAP_host_map;
668 if (!(vma->vm_flags & VM_WRITE))
669 map->flags |= GNTMAP_readonly;
670 }
600 671
601 spin_unlock(&priv->lock); 672 spin_unlock(&priv->lock);
602 673
603 err = apply_to_page_range(vma->vm_mm, vma->vm_start, 674 if (use_ptemod) {
604 vma->vm_end - vma->vm_start, 675 err = apply_to_page_range(vma->vm_mm, vma->vm_start,
605 find_grant_ptes, map); 676 vma->vm_end - vma->vm_start,
606 if (err) { 677 find_grant_ptes, map);
607 printk(KERN_WARNING "find_grant_ptes() failure.\n"); 678 if (err) {
608 return err; 679 printk(KERN_WARNING "find_grant_ptes() failure.\n");
680 goto out_put_map;
681 }
609 } 682 }
610 683
611 err = map_grant_pages(map); 684 err = map_grant_pages(map);
612 if (err) { 685 if (err)
613 printk(KERN_WARNING "map_grant_pages() failure.\n"); 686 goto out_put_map;
614 return err;
615 }
616 687
617 map->is_mapped = 1; 688 if (!use_ptemod) {
689 for (i = 0; i < count; i++) {
690 err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
691 map->pages[i]);
692 if (err)
693 goto out_put_map;
694 }
695 }
618 696
619 return 0; 697 return 0;
620 698
621unlock_out: 699unlock_out:
622 spin_unlock(&priv->lock); 700 spin_unlock(&priv->lock);
623 return err; 701 return err;
702
703out_put_map:
704 if (use_ptemod)
705 map->vma = NULL;
706 gntdev_put_map(map);
707 return err;
624} 708}
625 709
626static const struct file_operations gntdev_fops = { 710static const struct file_operations gntdev_fops = {
@@ -646,6 +730,8 @@ static int __init gntdev_init(void)
646 if (!xen_domain()) 730 if (!xen_domain())
647 return -ENODEV; 731 return -ENODEV;
648 732
733 use_ptemod = xen_pv_domain();
734
649 err = misc_register(&gntdev_miscdev); 735 err = misc_register(&gntdev_miscdev);
650 if (err != 0) { 736 if (err != 0) {
651 printk(KERN_ERR "Could not register gntdev device\n"); 737 printk(KERN_ERR "Could not register gntdev device\n");
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 9ef54ebc1194..3745a318defc 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -458,7 +458,14 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
458 if (ret) 458 if (ret)
459 return ret; 459 return ret;
460 460
461 if (xen_feature(XENFEAT_auto_translated_physmap))
462 return ret;
463
461 for (i = 0; i < count; i++) { 464 for (i = 0; i < count; i++) {
465 /* Do not add to override if the map failed. */
466 if (map_ops[i].status)
467 continue;
468
462 /* m2p override only supported for GNTMAP_contains_pte mappings */ 469 /* m2p override only supported for GNTMAP_contains_pte mappings */
463 if (!(map_ops[i].flags & GNTMAP_contains_pte)) 470 if (!(map_ops[i].flags & GNTMAP_contains_pte))
464 continue; 471 continue;
@@ -483,6 +490,9 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
483 if (ret) 490 if (ret)
484 return ret; 491 return ret;
485 492
493 if (xen_feature(XENFEAT_auto_translated_physmap))
494 return ret;
495
486 for (i = 0; i < count; i++) { 496 for (i = 0; i < count; i++) {
487 ret = m2p_remove_override(pages[i]); 497 ret = m2p_remove_override(pages[i]);
488 if (ret) 498 if (ret)
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
new file mode 100644
index 000000000000..a4ff225ee868
--- /dev/null
+++ b/drivers/xen/xen-balloon.c
@@ -0,0 +1,256 @@
1/******************************************************************************
2 * Xen balloon driver - enables returning/claiming memory to/from Xen.
3 *
4 * Copyright (c) 2003, B Dragovic
5 * Copyright (c) 2003-2004, M Williamson, K Fraser
6 * Copyright (c) 2005 Dan M. Smith, IBM Corporation
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
32
33#include <linux/kernel.h>
34#include <linux/module.h>
35#include <linux/sysdev.h>
36#include <linux/capability.h>
37
38#include <xen/xen.h>
39#include <xen/interface/xen.h>
40#include <xen/balloon.h>
41#include <xen/xenbus.h>
42#include <xen/features.h>
43#include <xen/page.h>
44
45#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
46
47#define BALLOON_CLASS_NAME "xen_memory"
48
49static struct sys_device balloon_sysdev;
50
51static int register_balloon(struct sys_device *sysdev);
52
53static struct xenbus_watch target_watch =
54{
55 .node = "memory/target"
56};
57
58/* React to a change in the target key */
59static void watch_target(struct xenbus_watch *watch,
60 const char **vec, unsigned int len)
61{
62 unsigned long long new_target;
63 int err;
64
65 err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
66 if (err != 1) {
67 /* This is ok (for domain0 at least) - so just return */
68 return;
69 }
70
71 /* The given memory/target value is in KiB, so it needs converting to
72 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
73 */
74 balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
75}
76
77static int balloon_init_watcher(struct notifier_block *notifier,
78 unsigned long event,
79 void *data)
80{
81 int err;
82
83 err = register_xenbus_watch(&target_watch);
84 if (err)
85 printk(KERN_ERR "Failed to set balloon watcher\n");
86
87 return NOTIFY_DONE;
88}
89
90static struct notifier_block xenstore_notifier;
91
92static int __init balloon_init(void)
93{
94 if (!xen_domain())
95 return -ENODEV;
96
97 pr_info("xen-balloon: Initialising balloon driver.\n");
98
99 register_balloon(&balloon_sysdev);
100
101 target_watch.callback = watch_target;
102 xenstore_notifier.notifier_call = balloon_init_watcher;
103
104 register_xenstore_notifier(&xenstore_notifier);
105
106 return 0;
107}
108subsys_initcall(balloon_init);
109
110static void balloon_exit(void)
111{
112 /* XXX - release balloon here */
113 return;
114}
115
116module_exit(balloon_exit);
117
118#define BALLOON_SHOW(name, format, args...) \
119 static ssize_t show_##name(struct sys_device *dev, \
120 struct sysdev_attribute *attr, \
121 char *buf) \
122 { \
123 return sprintf(buf, format, ##args); \
124 } \
125 static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
126
127BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
128BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
129BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
130
131static SYSDEV_ULONG_ATTR(schedule_delay, 0444, balloon_stats.schedule_delay);
132static SYSDEV_ULONG_ATTR(max_schedule_delay, 0644, balloon_stats.max_schedule_delay);
133static SYSDEV_ULONG_ATTR(retry_count, 0444, balloon_stats.retry_count);
134static SYSDEV_ULONG_ATTR(max_retry_count, 0644, balloon_stats.max_retry_count);
135
136static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
137 char *buf)
138{
139 return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
140}
141
142static ssize_t store_target_kb(struct sys_device *dev,
143 struct sysdev_attribute *attr,
144 const char *buf,
145 size_t count)
146{
147 char *endchar;
148 unsigned long long target_bytes;
149
150 if (!capable(CAP_SYS_ADMIN))
151 return -EPERM;
152
153 target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
154
155 balloon_set_new_target(target_bytes >> PAGE_SHIFT);
156
157 return count;
158}
159
160static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
161 show_target_kb, store_target_kb);
162
163
164static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr,
165 char *buf)
166{
167 return sprintf(buf, "%llu\n",
168 (unsigned long long)balloon_stats.target_pages
169 << PAGE_SHIFT);
170}
171
172static ssize_t store_target(struct sys_device *dev,
173 struct sysdev_attribute *attr,
174 const char *buf,
175 size_t count)
176{
177 char *endchar;
178 unsigned long long target_bytes;
179
180 if (!capable(CAP_SYS_ADMIN))
181 return -EPERM;
182
183 target_bytes = memparse(buf, &endchar);
184
185 balloon_set_new_target(target_bytes >> PAGE_SHIFT);
186
187 return count;
188}
189
190static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR,
191 show_target, store_target);
192
193
194static struct sysdev_attribute *balloon_attrs[] = {
195 &attr_target_kb,
196 &attr_target,
197 &attr_schedule_delay.attr,
198 &attr_max_schedule_delay.attr,
199 &attr_retry_count.attr,
200 &attr_max_retry_count.attr
201};
202
203static struct attribute *balloon_info_attrs[] = {
204 &attr_current_kb.attr,
205 &attr_low_kb.attr,
206 &attr_high_kb.attr,
207 NULL
208};
209
210static struct attribute_group balloon_info_group = {
211 .name = "info",
212 .attrs = balloon_info_attrs
213};
214
215static struct sysdev_class balloon_sysdev_class = {
216 .name = BALLOON_CLASS_NAME
217};
218
219static int register_balloon(struct sys_device *sysdev)
220{
221 int i, error;
222
223 error = sysdev_class_register(&balloon_sysdev_class);
224 if (error)
225 return error;
226
227 sysdev->id = 0;
228 sysdev->cls = &balloon_sysdev_class;
229
230 error = sysdev_register(sysdev);
231 if (error) {
232 sysdev_class_unregister(&balloon_sysdev_class);
233 return error;
234 }
235
236 for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
237 error = sysdev_create_file(sysdev, balloon_attrs[i]);
238 if (error)
239 goto fail;
240 }
241
242 error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
243 if (error)
244 goto fail;
245
246 return 0;
247
248 fail:
249 while (--i >= 0)
250 sysdev_remove_file(sysdev, balloon_attrs[i]);
251 sysdev_unregister(sysdev);
252 sysdev_class_unregister(&balloon_sysdev_class);
253 return error;
254}
255
256MODULE_LICENSE("GPL");