diff options
Diffstat (limited to 'drivers/xen')
-rw-r--r-- | drivers/xen/Kconfig | 10 | ||||
-rw-r--r-- | drivers/xen/Makefile | 6 | ||||
-rw-r--r-- | drivers/xen/balloon.c | 359 | ||||
-rw-r--r-- | drivers/xen/gntalloc.c | 545 | ||||
-rw-r--r-- | drivers/xen/gntdev.c | 382 | ||||
-rw-r--r-- | drivers/xen/grant-table.c | 10 | ||||
-rw-r--r-- | drivers/xen/xen-balloon.c | 256 |
7 files changed, 1184 insertions, 384 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 07bec09d1dad..a59638b37c1a 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig | |||
@@ -76,10 +76,20 @@ config XEN_XENBUS_FRONTEND | |||
76 | config XEN_GNTDEV | 76 | config XEN_GNTDEV |
77 | tristate "userspace grant access device driver" | 77 | tristate "userspace grant access device driver" |
78 | depends on XEN | 78 | depends on XEN |
79 | default m | ||
79 | select MMU_NOTIFIER | 80 | select MMU_NOTIFIER |
80 | help | 81 | help |
81 | Allows userspace processes to use grants. | 82 | Allows userspace processes to use grants. |
82 | 83 | ||
84 | config XEN_GRANT_DEV_ALLOC | ||
85 | tristate "User-space grant reference allocator driver" | ||
86 | depends on XEN | ||
87 | default m | ||
88 | help | ||
89 | Allows userspace processes to create pages with access granted | ||
90 | to other domains. This can be used to implement frontend drivers | ||
91 | or as part of an inter-domain shared memory channel. | ||
92 | |||
83 | config XEN_PLATFORM_PCI | 93 | config XEN_PLATFORM_PCI |
84 | tristate "xen platform pci device driver" | 94 | tristate "xen platform pci device driver" |
85 | depends on XEN_PVHVM && PCI | 95 | depends on XEN_PVHVM && PCI |
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 5088cc2e6fe2..f420f1ff7f13 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-y += grant-table.o features.o events.o manage.o | 1 | obj-y += grant-table.o features.o events.o manage.o balloon.o |
2 | obj-y += xenbus/ | 2 | obj-y += xenbus/ |
3 | 3 | ||
4 | nostackp := $(call cc-option, -fno-stack-protector) | 4 | nostackp := $(call cc-option, -fno-stack-protector) |
@@ -7,9 +7,10 @@ CFLAGS_features.o := $(nostackp) | |||
7 | obj-$(CONFIG_BLOCK) += biomerge.o | 7 | obj-$(CONFIG_BLOCK) += biomerge.o |
8 | obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o | 8 | obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o |
9 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o | 9 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o |
10 | obj-$(CONFIG_XEN_BALLOON) += balloon.o | 10 | obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o |
11 | obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o | 11 | obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o |
12 | obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o | 12 | obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o |
13 | obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o | ||
13 | obj-$(CONFIG_XENFS) += xenfs/ | 14 | obj-$(CONFIG_XENFS) += xenfs/ |
14 | obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o | 15 | obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o |
15 | obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o | 16 | obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o |
@@ -18,5 +19,6 @@ obj-$(CONFIG_XEN_DOM0) += pci.o | |||
18 | 19 | ||
19 | xen-evtchn-y := evtchn.o | 20 | xen-evtchn-y := evtchn.o |
20 | xen-gntdev-y := gntdev.o | 21 | xen-gntdev-y := gntdev.o |
22 | xen-gntalloc-y := gntalloc.o | ||
21 | 23 | ||
22 | xen-platform-pci-y := platform-pci.o | 24 | xen-platform-pci-y := platform-pci.o |
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 718050ace08f..043af8ad6b60 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c | |||
@@ -1,6 +1,4 @@ | |||
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | * balloon.c | ||
3 | * | ||
4 | * Xen balloon driver - enables returning/claiming memory to/from Xen. | 2 | * Xen balloon driver - enables returning/claiming memory to/from Xen. |
5 | * | 3 | * |
6 | * Copyright (c) 2003, B Dragovic | 4 | * Copyright (c) 2003, B Dragovic |
@@ -33,7 +31,6 @@ | |||
33 | */ | 31 | */ |
34 | 32 | ||
35 | #include <linux/kernel.h> | 33 | #include <linux/kernel.h> |
36 | #include <linux/module.h> | ||
37 | #include <linux/sched.h> | 34 | #include <linux/sched.h> |
38 | #include <linux/errno.h> | 35 | #include <linux/errno.h> |
39 | #include <linux/mm.h> | 36 | #include <linux/mm.h> |
@@ -42,13 +39,11 @@ | |||
42 | #include <linux/highmem.h> | 39 | #include <linux/highmem.h> |
43 | #include <linux/mutex.h> | 40 | #include <linux/mutex.h> |
44 | #include <linux/list.h> | 41 | #include <linux/list.h> |
45 | #include <linux/sysdev.h> | ||
46 | #include <linux/gfp.h> | 42 | #include <linux/gfp.h> |
47 | 43 | ||
48 | #include <asm/page.h> | 44 | #include <asm/page.h> |
49 | #include <asm/pgalloc.h> | 45 | #include <asm/pgalloc.h> |
50 | #include <asm/pgtable.h> | 46 | #include <asm/pgtable.h> |
51 | #include <asm/uaccess.h> | ||
52 | #include <asm/tlb.h> | 47 | #include <asm/tlb.h> |
53 | #include <asm/e820.h> | 48 | #include <asm/e820.h> |
54 | 49 | ||
@@ -58,35 +53,29 @@ | |||
58 | #include <xen/xen.h> | 53 | #include <xen/xen.h> |
59 | #include <xen/interface/xen.h> | 54 | #include <xen/interface/xen.h> |
60 | #include <xen/interface/memory.h> | 55 | #include <xen/interface/memory.h> |
61 | #include <xen/xenbus.h> | 56 | #include <xen/balloon.h> |
62 | #include <xen/features.h> | 57 | #include <xen/features.h> |
63 | #include <xen/page.h> | 58 | #include <xen/page.h> |
64 | 59 | ||
65 | #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) | 60 | /* |
66 | 61 | * balloon_process() state: | |
67 | #define BALLOON_CLASS_NAME "xen_memory" | 62 | * |
63 | * BP_DONE: done or nothing to do, | ||
64 | * BP_EAGAIN: error, go to sleep, | ||
65 | * BP_ECANCELED: error, balloon operation canceled. | ||
66 | */ | ||
68 | 67 | ||
69 | struct balloon_stats { | 68 | enum bp_state { |
70 | /* We aim for 'current allocation' == 'target allocation'. */ | 69 | BP_DONE, |
71 | unsigned long current_pages; | 70 | BP_EAGAIN, |
72 | unsigned long target_pages; | 71 | BP_ECANCELED |
73 | /* | ||
74 | * Drivers may alter the memory reservation independently, but they | ||
75 | * must inform the balloon driver so we avoid hitting the hard limit. | ||
76 | */ | ||
77 | unsigned long driver_pages; | ||
78 | /* Number of pages in high- and low-memory balloons. */ | ||
79 | unsigned long balloon_low; | ||
80 | unsigned long balloon_high; | ||
81 | }; | 72 | }; |
82 | 73 | ||
83 | static DEFINE_MUTEX(balloon_mutex); | ||
84 | |||
85 | static struct sys_device balloon_sysdev; | ||
86 | 74 | ||
87 | static int register_balloon(struct sys_device *sysdev); | 75 | static DEFINE_MUTEX(balloon_mutex); |
88 | 76 | ||
89 | static struct balloon_stats balloon_stats; | 77 | struct balloon_stats balloon_stats; |
78 | EXPORT_SYMBOL_GPL(balloon_stats); | ||
90 | 79 | ||
91 | /* We increase/decrease in batches which fit in a page */ | 80 | /* We increase/decrease in batches which fit in a page */ |
92 | static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; | 81 | static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; |
@@ -104,8 +93,7 @@ static LIST_HEAD(ballooned_pages); | |||
104 | 93 | ||
105 | /* Main work function, always executed in process context. */ | 94 | /* Main work function, always executed in process context. */ |
106 | static void balloon_process(struct work_struct *work); | 95 | static void balloon_process(struct work_struct *work); |
107 | static DECLARE_WORK(balloon_worker, balloon_process); | 96 | static DECLARE_DELAYED_WORK(balloon_worker, balloon_process); |
108 | static struct timer_list balloon_timer; | ||
109 | 97 | ||
110 | /* When ballooning out (allocating memory to return to Xen) we don't really | 98 | /* When ballooning out (allocating memory to return to Xen) we don't really |
111 | want the kernel to try too hard since that can trigger the oom killer. */ | 99 | want the kernel to try too hard since that can trigger the oom killer. */ |
@@ -140,14 +128,17 @@ static void balloon_append(struct page *page) | |||
140 | } | 128 | } |
141 | 129 | ||
142 | /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ | 130 | /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ |
143 | static struct page *balloon_retrieve(void) | 131 | static struct page *balloon_retrieve(bool prefer_highmem) |
144 | { | 132 | { |
145 | struct page *page; | 133 | struct page *page; |
146 | 134 | ||
147 | if (list_empty(&ballooned_pages)) | 135 | if (list_empty(&ballooned_pages)) |
148 | return NULL; | 136 | return NULL; |
149 | 137 | ||
150 | page = list_entry(ballooned_pages.next, struct page, lru); | 138 | if (prefer_highmem) |
139 | page = list_entry(ballooned_pages.prev, struct page, lru); | ||
140 | else | ||
141 | page = list_entry(ballooned_pages.next, struct page, lru); | ||
151 | list_del(&page->lru); | 142 | list_del(&page->lru); |
152 | 143 | ||
153 | if (PageHighMem(page)) { | 144 | if (PageHighMem(page)) { |
@@ -177,9 +168,29 @@ static struct page *balloon_next_page(struct page *page) | |||
177 | return list_entry(next, struct page, lru); | 168 | return list_entry(next, struct page, lru); |
178 | } | 169 | } |
179 | 170 | ||
180 | static void balloon_alarm(unsigned long unused) | 171 | static enum bp_state update_schedule(enum bp_state state) |
181 | { | 172 | { |
182 | schedule_work(&balloon_worker); | 173 | if (state == BP_DONE) { |
174 | balloon_stats.schedule_delay = 1; | ||
175 | balloon_stats.retry_count = 1; | ||
176 | return BP_DONE; | ||
177 | } | ||
178 | |||
179 | ++balloon_stats.retry_count; | ||
180 | |||
181 | if (balloon_stats.max_retry_count != RETRY_UNLIMITED && | ||
182 | balloon_stats.retry_count > balloon_stats.max_retry_count) { | ||
183 | balloon_stats.schedule_delay = 1; | ||
184 | balloon_stats.retry_count = 1; | ||
185 | return BP_ECANCELED; | ||
186 | } | ||
187 | |||
188 | balloon_stats.schedule_delay <<= 1; | ||
189 | |||
190 | if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay) | ||
191 | balloon_stats.schedule_delay = balloon_stats.max_schedule_delay; | ||
192 | |||
193 | return BP_EAGAIN; | ||
183 | } | 194 | } |
184 | 195 | ||
185 | static unsigned long current_target(void) | 196 | static unsigned long current_target(void) |
@@ -194,11 +205,11 @@ static unsigned long current_target(void) | |||
194 | return target; | 205 | return target; |
195 | } | 206 | } |
196 | 207 | ||
197 | static int increase_reservation(unsigned long nr_pages) | 208 | static enum bp_state increase_reservation(unsigned long nr_pages) |
198 | { | 209 | { |
210 | int rc; | ||
199 | unsigned long pfn, i; | 211 | unsigned long pfn, i; |
200 | struct page *page; | 212 | struct page *page; |
201 | long rc; | ||
202 | struct xen_memory_reservation reservation = { | 213 | struct xen_memory_reservation reservation = { |
203 | .address_bits = 0, | 214 | .address_bits = 0, |
204 | .extent_order = 0, | 215 | .extent_order = 0, |
@@ -210,7 +221,10 @@ static int increase_reservation(unsigned long nr_pages) | |||
210 | 221 | ||
211 | page = balloon_first_page(); | 222 | page = balloon_first_page(); |
212 | for (i = 0; i < nr_pages; i++) { | 223 | for (i = 0; i < nr_pages; i++) { |
213 | BUG_ON(page == NULL); | 224 | if (!page) { |
225 | nr_pages = i; | ||
226 | break; | ||
227 | } | ||
214 | frame_list[i] = page_to_pfn(page); | 228 | frame_list[i] = page_to_pfn(page); |
215 | page = balloon_next_page(page); | 229 | page = balloon_next_page(page); |
216 | } | 230 | } |
@@ -218,11 +232,11 @@ static int increase_reservation(unsigned long nr_pages) | |||
218 | set_xen_guest_handle(reservation.extent_start, frame_list); | 232 | set_xen_guest_handle(reservation.extent_start, frame_list); |
219 | reservation.nr_extents = nr_pages; | 233 | reservation.nr_extents = nr_pages; |
220 | rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); | 234 | rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); |
221 | if (rc < 0) | 235 | if (rc <= 0) |
222 | goto out; | 236 | return BP_EAGAIN; |
223 | 237 | ||
224 | for (i = 0; i < rc; i++) { | 238 | for (i = 0; i < rc; i++) { |
225 | page = balloon_retrieve(); | 239 | page = balloon_retrieve(false); |
226 | BUG_ON(page == NULL); | 240 | BUG_ON(page == NULL); |
227 | 241 | ||
228 | pfn = page_to_pfn(page); | 242 | pfn = page_to_pfn(page); |
@@ -249,15 +263,14 @@ static int increase_reservation(unsigned long nr_pages) | |||
249 | 263 | ||
250 | balloon_stats.current_pages += rc; | 264 | balloon_stats.current_pages += rc; |
251 | 265 | ||
252 | out: | 266 | return BP_DONE; |
253 | return rc < 0 ? rc : rc != nr_pages; | ||
254 | } | 267 | } |
255 | 268 | ||
256 | static int decrease_reservation(unsigned long nr_pages) | 269 | static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) |
257 | { | 270 | { |
271 | enum bp_state state = BP_DONE; | ||
258 | unsigned long pfn, i; | 272 | unsigned long pfn, i; |
259 | struct page *page; | 273 | struct page *page; |
260 | int need_sleep = 0; | ||
261 | int ret; | 274 | int ret; |
262 | struct xen_memory_reservation reservation = { | 275 | struct xen_memory_reservation reservation = { |
263 | .address_bits = 0, | 276 | .address_bits = 0, |
@@ -269,9 +282,9 @@ static int decrease_reservation(unsigned long nr_pages) | |||
269 | nr_pages = ARRAY_SIZE(frame_list); | 282 | nr_pages = ARRAY_SIZE(frame_list); |
270 | 283 | ||
271 | for (i = 0; i < nr_pages; i++) { | 284 | for (i = 0; i < nr_pages; i++) { |
272 | if ((page = alloc_page(GFP_BALLOON)) == NULL) { | 285 | if ((page = alloc_page(gfp)) == NULL) { |
273 | nr_pages = i; | 286 | nr_pages = i; |
274 | need_sleep = 1; | 287 | state = BP_EAGAIN; |
275 | break; | 288 | break; |
276 | } | 289 | } |
277 | 290 | ||
@@ -307,7 +320,7 @@ static int decrease_reservation(unsigned long nr_pages) | |||
307 | 320 | ||
308 | balloon_stats.current_pages -= nr_pages; | 321 | balloon_stats.current_pages -= nr_pages; |
309 | 322 | ||
310 | return need_sleep; | 323 | return state; |
311 | } | 324 | } |
312 | 325 | ||
313 | /* | 326 | /* |
@@ -318,77 +331,101 @@ static int decrease_reservation(unsigned long nr_pages) | |||
318 | */ | 331 | */ |
319 | static void balloon_process(struct work_struct *work) | 332 | static void balloon_process(struct work_struct *work) |
320 | { | 333 | { |
321 | int need_sleep = 0; | 334 | enum bp_state state = BP_DONE; |
322 | long credit; | 335 | long credit; |
323 | 336 | ||
324 | mutex_lock(&balloon_mutex); | 337 | mutex_lock(&balloon_mutex); |
325 | 338 | ||
326 | do { | 339 | do { |
327 | credit = current_target() - balloon_stats.current_pages; | 340 | credit = current_target() - balloon_stats.current_pages; |
341 | |||
328 | if (credit > 0) | 342 | if (credit > 0) |
329 | need_sleep = (increase_reservation(credit) != 0); | 343 | state = increase_reservation(credit); |
344 | |||
330 | if (credit < 0) | 345 | if (credit < 0) |
331 | need_sleep = (decrease_reservation(-credit) != 0); | 346 | state = decrease_reservation(-credit, GFP_BALLOON); |
347 | |||
348 | state = update_schedule(state); | ||
332 | 349 | ||
333 | #ifndef CONFIG_PREEMPT | 350 | #ifndef CONFIG_PREEMPT |
334 | if (need_resched()) | 351 | if (need_resched()) |
335 | schedule(); | 352 | schedule(); |
336 | #endif | 353 | #endif |
337 | } while ((credit != 0) && !need_sleep); | 354 | } while (credit && state == BP_DONE); |
338 | 355 | ||
339 | /* Schedule more work if there is some still to be done. */ | 356 | /* Schedule more work if there is some still to be done. */ |
340 | if (current_target() != balloon_stats.current_pages) | 357 | if (state == BP_EAGAIN) |
341 | mod_timer(&balloon_timer, jiffies + HZ); | 358 | schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ); |
342 | 359 | ||
343 | mutex_unlock(&balloon_mutex); | 360 | mutex_unlock(&balloon_mutex); |
344 | } | 361 | } |
345 | 362 | ||
346 | /* Resets the Xen limit, sets new target, and kicks off processing. */ | 363 | /* Resets the Xen limit, sets new target, and kicks off processing. */ |
347 | static void balloon_set_new_target(unsigned long target) | 364 | void balloon_set_new_target(unsigned long target) |
348 | { | 365 | { |
349 | /* No need for lock. Not read-modify-write updates. */ | 366 | /* No need for lock. Not read-modify-write updates. */ |
350 | balloon_stats.target_pages = target; | 367 | balloon_stats.target_pages = target; |
351 | schedule_work(&balloon_worker); | 368 | schedule_delayed_work(&balloon_worker, 0); |
352 | } | 369 | } |
370 | EXPORT_SYMBOL_GPL(balloon_set_new_target); | ||
353 | 371 | ||
354 | static struct xenbus_watch target_watch = | 372 | /** |
355 | { | 373 | * alloc_xenballooned_pages - get pages that have been ballooned out |
356 | .node = "memory/target" | 374 | * @nr_pages: Number of pages to get |
357 | }; | 375 | * @pages: pages returned |
358 | 376 | * @return 0 on success, error otherwise | |
359 | /* React to a change in the target key */ | 377 | */ |
360 | static void watch_target(struct xenbus_watch *watch, | 378 | int alloc_xenballooned_pages(int nr_pages, struct page** pages) |
361 | const char **vec, unsigned int len) | ||
362 | { | 379 | { |
363 | unsigned long long new_target; | 380 | int pgno = 0; |
364 | int err; | 381 | struct page* page; |
365 | 382 | mutex_lock(&balloon_mutex); | |
366 | err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); | 383 | while (pgno < nr_pages) { |
367 | if (err != 1) { | 384 | page = balloon_retrieve(true); |
368 | /* This is ok (for domain0 at least) - so just return */ | 385 | if (page) { |
369 | return; | 386 | pages[pgno++] = page; |
387 | } else { | ||
388 | enum bp_state st; | ||
389 | st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER); | ||
390 | if (st != BP_DONE) | ||
391 | goto out_undo; | ||
392 | } | ||
370 | } | 393 | } |
371 | 394 | mutex_unlock(&balloon_mutex); | |
372 | /* The given memory/target value is in KiB, so it needs converting to | 395 | return 0; |
373 | * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. | 396 | out_undo: |
374 | */ | 397 | while (pgno) |
375 | balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); | 398 | balloon_append(pages[--pgno]); |
399 | /* Free the memory back to the kernel soon */ | ||
400 | schedule_delayed_work(&balloon_worker, 0); | ||
401 | mutex_unlock(&balloon_mutex); | ||
402 | return -ENOMEM; | ||
376 | } | 403 | } |
404 | EXPORT_SYMBOL(alloc_xenballooned_pages); | ||
377 | 405 | ||
378 | static int balloon_init_watcher(struct notifier_block *notifier, | 406 | /** |
379 | unsigned long event, | 407 | * free_xenballooned_pages - return pages retrieved with get_ballooned_pages |
380 | void *data) | 408 | * @nr_pages: Number of pages |
409 | * @pages: pages to return | ||
410 | */ | ||
411 | void free_xenballooned_pages(int nr_pages, struct page** pages) | ||
381 | { | 412 | { |
382 | int err; | 413 | int i; |
383 | 414 | ||
384 | err = register_xenbus_watch(&target_watch); | 415 | mutex_lock(&balloon_mutex); |
385 | if (err) | ||
386 | printk(KERN_ERR "Failed to set balloon watcher\n"); | ||
387 | 416 | ||
388 | return NOTIFY_DONE; | 417 | for (i = 0; i < nr_pages; i++) { |
389 | } | 418 | if (pages[i]) |
419 | balloon_append(pages[i]); | ||
420 | } | ||
421 | |||
422 | /* The balloon may be too large now. Shrink it if needed. */ | ||
423 | if (current_target() != balloon_stats.current_pages) | ||
424 | schedule_delayed_work(&balloon_worker, 0); | ||
390 | 425 | ||
391 | static struct notifier_block xenstore_notifier; | 426 | mutex_unlock(&balloon_mutex); |
427 | } | ||
428 | EXPORT_SYMBOL(free_xenballooned_pages); | ||
392 | 429 | ||
393 | static int __init balloon_init(void) | 430 | static int __init balloon_init(void) |
394 | { | 431 | { |
@@ -398,7 +435,7 @@ static int __init balloon_init(void) | |||
398 | if (!xen_domain()) | 435 | if (!xen_domain()) |
399 | return -ENODEV; | 436 | return -ENODEV; |
400 | 437 | ||
401 | pr_info("xen_balloon: Initialising balloon driver.\n"); | 438 | pr_info("xen/balloon: Initialising balloon driver.\n"); |
402 | 439 | ||
403 | if (xen_pv_domain()) | 440 | if (xen_pv_domain()) |
404 | nr_pages = xen_start_info->nr_pages; | 441 | nr_pages = xen_start_info->nr_pages; |
@@ -408,13 +445,11 @@ static int __init balloon_init(void) | |||
408 | balloon_stats.target_pages = balloon_stats.current_pages; | 445 | balloon_stats.target_pages = balloon_stats.current_pages; |
409 | balloon_stats.balloon_low = 0; | 446 | balloon_stats.balloon_low = 0; |
410 | balloon_stats.balloon_high = 0; | 447 | balloon_stats.balloon_high = 0; |
411 | balloon_stats.driver_pages = 0UL; | ||
412 | |||
413 | init_timer(&balloon_timer); | ||
414 | balloon_timer.data = 0; | ||
415 | balloon_timer.function = balloon_alarm; | ||
416 | 448 | ||
417 | register_balloon(&balloon_sysdev); | 449 | balloon_stats.schedule_delay = 1; |
450 | balloon_stats.max_schedule_delay = 32; | ||
451 | balloon_stats.retry_count = 1; | ||
452 | balloon_stats.max_retry_count = RETRY_UNLIMITED; | ||
418 | 453 | ||
419 | /* | 454 | /* |
420 | * Initialise the balloon with excess memory space. We need | 455 | * Initialise the balloon with excess memory space. We need |
@@ -436,153 +471,9 @@ static int __init balloon_init(void) | |||
436 | __balloon_append(page); | 471 | __balloon_append(page); |
437 | } | 472 | } |
438 | 473 | ||
439 | target_watch.callback = watch_target; | ||
440 | xenstore_notifier.notifier_call = balloon_init_watcher; | ||
441 | |||
442 | register_xenstore_notifier(&xenstore_notifier); | ||
443 | |||
444 | return 0; | 474 | return 0; |
445 | } | 475 | } |
446 | 476 | ||
447 | subsys_initcall(balloon_init); | 477 | subsys_initcall(balloon_init); |
448 | 478 | ||
449 | static void balloon_exit(void) | ||
450 | { | ||
451 | /* XXX - release balloon here */ | ||
452 | return; | ||
453 | } | ||
454 | |||
455 | module_exit(balloon_exit); | ||
456 | |||
457 | #define BALLOON_SHOW(name, format, args...) \ | ||
458 | static ssize_t show_##name(struct sys_device *dev, \ | ||
459 | struct sysdev_attribute *attr, \ | ||
460 | char *buf) \ | ||
461 | { \ | ||
462 | return sprintf(buf, format, ##args); \ | ||
463 | } \ | ||
464 | static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) | ||
465 | |||
466 | BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); | ||
467 | BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); | ||
468 | BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); | ||
469 | BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); | ||
470 | |||
471 | static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, | ||
472 | char *buf) | ||
473 | { | ||
474 | return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); | ||
475 | } | ||
476 | |||
477 | static ssize_t store_target_kb(struct sys_device *dev, | ||
478 | struct sysdev_attribute *attr, | ||
479 | const char *buf, | ||
480 | size_t count) | ||
481 | { | ||
482 | char *endchar; | ||
483 | unsigned long long target_bytes; | ||
484 | |||
485 | if (!capable(CAP_SYS_ADMIN)) | ||
486 | return -EPERM; | ||
487 | |||
488 | target_bytes = simple_strtoull(buf, &endchar, 0) * 1024; | ||
489 | |||
490 | balloon_set_new_target(target_bytes >> PAGE_SHIFT); | ||
491 | |||
492 | return count; | ||
493 | } | ||
494 | |||
495 | static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, | ||
496 | show_target_kb, store_target_kb); | ||
497 | |||
498 | |||
499 | static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr, | ||
500 | char *buf) | ||
501 | { | ||
502 | return sprintf(buf, "%llu\n", | ||
503 | (unsigned long long)balloon_stats.target_pages | ||
504 | << PAGE_SHIFT); | ||
505 | } | ||
506 | |||
507 | static ssize_t store_target(struct sys_device *dev, | ||
508 | struct sysdev_attribute *attr, | ||
509 | const char *buf, | ||
510 | size_t count) | ||
511 | { | ||
512 | char *endchar; | ||
513 | unsigned long long target_bytes; | ||
514 | |||
515 | if (!capable(CAP_SYS_ADMIN)) | ||
516 | return -EPERM; | ||
517 | |||
518 | target_bytes = memparse(buf, &endchar); | ||
519 | |||
520 | balloon_set_new_target(target_bytes >> PAGE_SHIFT); | ||
521 | |||
522 | return count; | ||
523 | } | ||
524 | |||
525 | static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR, | ||
526 | show_target, store_target); | ||
527 | |||
528 | |||
529 | static struct sysdev_attribute *balloon_attrs[] = { | ||
530 | &attr_target_kb, | ||
531 | &attr_target, | ||
532 | }; | ||
533 | |||
534 | static struct attribute *balloon_info_attrs[] = { | ||
535 | &attr_current_kb.attr, | ||
536 | &attr_low_kb.attr, | ||
537 | &attr_high_kb.attr, | ||
538 | &attr_driver_kb.attr, | ||
539 | NULL | ||
540 | }; | ||
541 | |||
542 | static struct attribute_group balloon_info_group = { | ||
543 | .name = "info", | ||
544 | .attrs = balloon_info_attrs, | ||
545 | }; | ||
546 | |||
547 | static struct sysdev_class balloon_sysdev_class = { | ||
548 | .name = BALLOON_CLASS_NAME, | ||
549 | }; | ||
550 | |||
551 | static int register_balloon(struct sys_device *sysdev) | ||
552 | { | ||
553 | int i, error; | ||
554 | |||
555 | error = sysdev_class_register(&balloon_sysdev_class); | ||
556 | if (error) | ||
557 | return error; | ||
558 | |||
559 | sysdev->id = 0; | ||
560 | sysdev->cls = &balloon_sysdev_class; | ||
561 | |||
562 | error = sysdev_register(sysdev); | ||
563 | if (error) { | ||
564 | sysdev_class_unregister(&balloon_sysdev_class); | ||
565 | return error; | ||
566 | } | ||
567 | |||
568 | for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { | ||
569 | error = sysdev_create_file(sysdev, balloon_attrs[i]); | ||
570 | if (error) | ||
571 | goto fail; | ||
572 | } | ||
573 | |||
574 | error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); | ||
575 | if (error) | ||
576 | goto fail; | ||
577 | |||
578 | return 0; | ||
579 | |||
580 | fail: | ||
581 | while (--i >= 0) | ||
582 | sysdev_remove_file(sysdev, balloon_attrs[i]); | ||
583 | sysdev_unregister(sysdev); | ||
584 | sysdev_class_unregister(&balloon_sysdev_class); | ||
585 | return error; | ||
586 | } | ||
587 | |||
588 | MODULE_LICENSE("GPL"); | 479 | MODULE_LICENSE("GPL"); |
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c new file mode 100644 index 000000000000..a7ffdfe19fc9 --- /dev/null +++ b/drivers/xen/gntalloc.c | |||
@@ -0,0 +1,545 @@ | |||
1 | /****************************************************************************** | ||
2 | * gntalloc.c | ||
3 | * | ||
4 | * Device for creating grant references (in user-space) that may be shared | ||
5 | * with other domains. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
15 | */ | ||
16 | |||
17 | /* | ||
18 | * This driver exists to allow userspace programs in Linux to allocate kernel | ||
19 | * memory that will later be shared with another domain. Without this device, | ||
20 | * Linux userspace programs cannot create grant references. | ||
21 | * | ||
22 | * How this stuff works: | ||
23 | * X -> granting a page to Y | ||
24 | * Y -> mapping the grant from X | ||
25 | * | ||
26 | * 1. X uses the gntalloc device to allocate a page of kernel memory, P. | ||
27 | * 2. X creates an entry in the grant table that says domid(Y) can access P. | ||
28 | * This is done without a hypercall unless the grant table needs expansion. | ||
29 | * 3. X gives the grant reference identifier, GREF, to Y. | ||
30 | * 4. Y maps the page, either directly into kernel memory for use in a backend | ||
31 | * driver, or via a the gntdev device to map into the address space of an | ||
32 | * application running in Y. This is the first point at which Xen does any | ||
33 | * tracking of the page. | ||
34 | * 5. A program in X mmap()s a segment of the gntalloc device that corresponds | ||
35 | * to the shared page, and can now communicate with Y over the shared page. | ||
36 | * | ||
37 | * | ||
38 | * NOTE TO USERSPACE LIBRARIES: | ||
39 | * The grant allocation and mmap()ing are, naturally, two separate operations. | ||
40 | * You set up the sharing by calling the create ioctl() and then the mmap(). | ||
41 | * Teardown requires munmap() and either close() or ioctl(). | ||
42 | * | ||
43 | * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant | ||
44 | * reference, this device can be used to consume kernel memory by leaving grant | ||
45 | * references mapped by another domain when an application exits. Therefore, | ||
46 | * there is a global limit on the number of pages that can be allocated. When | ||
47 | * all references to the page are unmapped, it will be freed during the next | ||
48 | * grant operation. | ||
49 | */ | ||
50 | |||
51 | #include <linux/atomic.h> | ||
52 | #include <linux/module.h> | ||
53 | #include <linux/miscdevice.h> | ||
54 | #include <linux/kernel.h> | ||
55 | #include <linux/init.h> | ||
56 | #include <linux/slab.h> | ||
57 | #include <linux/fs.h> | ||
58 | #include <linux/device.h> | ||
59 | #include <linux/mm.h> | ||
60 | #include <linux/uaccess.h> | ||
61 | #include <linux/types.h> | ||
62 | #include <linux/list.h> | ||
63 | #include <linux/highmem.h> | ||
64 | |||
65 | #include <xen/xen.h> | ||
66 | #include <xen/page.h> | ||
67 | #include <xen/grant_table.h> | ||
68 | #include <xen/gntalloc.h> | ||
69 | #include <xen/events.h> | ||
70 | |||
71 | static int limit = 1024; | ||
72 | module_param(limit, int, 0644); | ||
73 | MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by " | ||
74 | "the gntalloc device"); | ||
75 | |||
76 | static LIST_HEAD(gref_list); | ||
77 | static DEFINE_SPINLOCK(gref_lock); | ||
78 | static int gref_size; | ||
79 | |||
80 | struct notify_info { | ||
81 | uint16_t pgoff:12; /* Bits 0-11: Offset of the byte to clear */ | ||
82 | uint16_t flags:2; /* Bits 12-13: Unmap notification flags */ | ||
83 | int event; /* Port (event channel) to notify */ | ||
84 | }; | ||
85 | |||
86 | /* Metadata on a grant reference. */ | ||
87 | struct gntalloc_gref { | ||
88 | struct list_head next_gref; /* list entry gref_list */ | ||
89 | struct list_head next_file; /* list entry file->list, if open */ | ||
90 | struct page *page; /* The shared page */ | ||
91 | uint64_t file_index; /* File offset for mmap() */ | ||
92 | unsigned int users; /* Use count - when zero, waiting on Xen */ | ||
93 | grant_ref_t gref_id; /* The grant reference number */ | ||
94 | struct notify_info notify; /* Unmap notification */ | ||
95 | }; | ||
96 | |||
97 | struct gntalloc_file_private_data { | ||
98 | struct list_head list; | ||
99 | uint64_t index; | ||
100 | }; | ||
101 | |||
102 | static void __del_gref(struct gntalloc_gref *gref); | ||
103 | |||
104 | static void do_cleanup(void) | ||
105 | { | ||
106 | struct gntalloc_gref *gref, *n; | ||
107 | list_for_each_entry_safe(gref, n, &gref_list, next_gref) { | ||
108 | if (!gref->users) | ||
109 | __del_gref(gref); | ||
110 | } | ||
111 | } | ||
112 | |||
113 | static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, | ||
114 | uint32_t *gref_ids, struct gntalloc_file_private_data *priv) | ||
115 | { | ||
116 | int i, rc, readonly; | ||
117 | LIST_HEAD(queue_gref); | ||
118 | LIST_HEAD(queue_file); | ||
119 | struct gntalloc_gref *gref; | ||
120 | |||
121 | readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE); | ||
122 | rc = -ENOMEM; | ||
123 | for (i = 0; i < op->count; i++) { | ||
124 | gref = kzalloc(sizeof(*gref), GFP_KERNEL); | ||
125 | if (!gref) | ||
126 | goto undo; | ||
127 | list_add_tail(&gref->next_gref, &queue_gref); | ||
128 | list_add_tail(&gref->next_file, &queue_file); | ||
129 | gref->users = 1; | ||
130 | gref->file_index = op->index + i * PAGE_SIZE; | ||
131 | gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO); | ||
132 | if (!gref->page) | ||
133 | goto undo; | ||
134 | |||
135 | /* Grant foreign access to the page. */ | ||
136 | gref->gref_id = gnttab_grant_foreign_access(op->domid, | ||
137 | pfn_to_mfn(page_to_pfn(gref->page)), readonly); | ||
138 | if (gref->gref_id < 0) { | ||
139 | rc = gref->gref_id; | ||
140 | goto undo; | ||
141 | } | ||
142 | gref_ids[i] = gref->gref_id; | ||
143 | } | ||
144 | |||
145 | /* Add to gref lists. */ | ||
146 | spin_lock(&gref_lock); | ||
147 | list_splice_tail(&queue_gref, &gref_list); | ||
148 | list_splice_tail(&queue_file, &priv->list); | ||
149 | spin_unlock(&gref_lock); | ||
150 | |||
151 | return 0; | ||
152 | |||
153 | undo: | ||
154 | spin_lock(&gref_lock); | ||
155 | gref_size -= (op->count - i); | ||
156 | |||
157 | list_for_each_entry(gref, &queue_file, next_file) { | ||
158 | /* __del_gref does not remove from queue_file */ | ||
159 | __del_gref(gref); | ||
160 | } | ||
161 | |||
162 | /* It's possible for the target domain to map the just-allocated grant | ||
163 | * references by blindly guessing their IDs; if this is done, then | ||
164 | * __del_gref will leave them in the queue_gref list. They need to be | ||
165 | * added to the global list so that we can free them when they are no | ||
166 | * longer referenced. | ||
167 | */ | ||
168 | if (unlikely(!list_empty(&queue_gref))) | ||
169 | list_splice_tail(&queue_gref, &gref_list); | ||
170 | spin_unlock(&gref_lock); | ||
171 | return rc; | ||
172 | } | ||
173 | |||
174 | static void __del_gref(struct gntalloc_gref *gref) | ||
175 | { | ||
176 | if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { | ||
177 | uint8_t *tmp = kmap(gref->page); | ||
178 | tmp[gref->notify.pgoff] = 0; | ||
179 | kunmap(gref->page); | ||
180 | } | ||
181 | if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) | ||
182 | notify_remote_via_evtchn(gref->notify.event); | ||
183 | |||
184 | gref->notify.flags = 0; | ||
185 | |||
186 | if (gref->gref_id > 0) { | ||
187 | if (gnttab_query_foreign_access(gref->gref_id)) | ||
188 | return; | ||
189 | |||
190 | if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) | ||
191 | return; | ||
192 | } | ||
193 | |||
194 | gref_size--; | ||
195 | list_del(&gref->next_gref); | ||
196 | |||
197 | if (gref->page) | ||
198 | __free_page(gref->page); | ||
199 | |||
200 | kfree(gref); | ||
201 | } | ||
202 | |||
203 | /* finds contiguous grant references in a file, returns the first */ | ||
204 | static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv, | ||
205 | uint64_t index, uint32_t count) | ||
206 | { | ||
207 | struct gntalloc_gref *rv = NULL, *gref; | ||
208 | list_for_each_entry(gref, &priv->list, next_file) { | ||
209 | if (gref->file_index == index && !rv) | ||
210 | rv = gref; | ||
211 | if (rv) { | ||
212 | if (gref->file_index != index) | ||
213 | return NULL; | ||
214 | index += PAGE_SIZE; | ||
215 | count--; | ||
216 | if (count == 0) | ||
217 | return rv; | ||
218 | } | ||
219 | } | ||
220 | return NULL; | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * ------------------------------------- | ||
225 | * File operations. | ||
226 | * ------------------------------------- | ||
227 | */ | ||
228 | static int gntalloc_open(struct inode *inode, struct file *filp) | ||
229 | { | ||
230 | struct gntalloc_file_private_data *priv; | ||
231 | |||
232 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
233 | if (!priv) | ||
234 | goto out_nomem; | ||
235 | INIT_LIST_HEAD(&priv->list); | ||
236 | |||
237 | filp->private_data = priv; | ||
238 | |||
239 | pr_debug("%s: priv %p\n", __func__, priv); | ||
240 | |||
241 | return 0; | ||
242 | |||
243 | out_nomem: | ||
244 | return -ENOMEM; | ||
245 | } | ||
246 | |||
247 | static int gntalloc_release(struct inode *inode, struct file *filp) | ||
248 | { | ||
249 | struct gntalloc_file_private_data *priv = filp->private_data; | ||
250 | struct gntalloc_gref *gref; | ||
251 | |||
252 | pr_debug("%s: priv %p\n", __func__, priv); | ||
253 | |||
254 | spin_lock(&gref_lock); | ||
255 | while (!list_empty(&priv->list)) { | ||
256 | gref = list_entry(priv->list.next, | ||
257 | struct gntalloc_gref, next_file); | ||
258 | list_del(&gref->next_file); | ||
259 | gref->users--; | ||
260 | if (gref->users == 0) | ||
261 | __del_gref(gref); | ||
262 | } | ||
263 | kfree(priv); | ||
264 | spin_unlock(&gref_lock); | ||
265 | |||
266 | return 0; | ||
267 | } | ||
268 | |||
269 | static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, | ||
270 | struct ioctl_gntalloc_alloc_gref __user *arg) | ||
271 | { | ||
272 | int rc = 0; | ||
273 | struct ioctl_gntalloc_alloc_gref op; | ||
274 | uint32_t *gref_ids; | ||
275 | |||
276 | pr_debug("%s: priv %p\n", __func__, priv); | ||
277 | |||
278 | if (copy_from_user(&op, arg, sizeof(op))) { | ||
279 | rc = -EFAULT; | ||
280 | goto out; | ||
281 | } | ||
282 | |||
283 | gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY); | ||
284 | if (!gref_ids) { | ||
285 | rc = -ENOMEM; | ||
286 | goto out; | ||
287 | } | ||
288 | |||
289 | spin_lock(&gref_lock); | ||
290 | /* Clean up pages that were at zero (local) users but were still mapped | ||
291 | * by remote domains. Since those pages count towards the limit that we | ||
292 | * are about to enforce, removing them here is a good idea. | ||
293 | */ | ||
294 | do_cleanup(); | ||
295 | if (gref_size + op.count > limit) { | ||
296 | spin_unlock(&gref_lock); | ||
297 | rc = -ENOSPC; | ||
298 | goto out_free; | ||
299 | } | ||
300 | gref_size += op.count; | ||
301 | op.index = priv->index; | ||
302 | priv->index += op.count * PAGE_SIZE; | ||
303 | spin_unlock(&gref_lock); | ||
304 | |||
305 | rc = add_grefs(&op, gref_ids, priv); | ||
306 | if (rc < 0) | ||
307 | goto out_free; | ||
308 | |||
309 | /* Once we finish add_grefs, it is unsafe to touch the new reference, | ||
310 | * since it is possible for a concurrent ioctl to remove it (by guessing | ||
311 | * its index). If the userspace application doesn't provide valid memory | ||
312 | * to write the IDs to, then it will need to close the file in order to | ||
313 | * release - which it will do by segfaulting when it tries to access the | ||
314 | * IDs to close them. | ||
315 | */ | ||
316 | if (copy_to_user(arg, &op, sizeof(op))) { | ||
317 | rc = -EFAULT; | ||
318 | goto out_free; | ||
319 | } | ||
320 | if (copy_to_user(arg->gref_ids, gref_ids, | ||
321 | sizeof(gref_ids[0]) * op.count)) { | ||
322 | rc = -EFAULT; | ||
323 | goto out_free; | ||
324 | } | ||
325 | |||
326 | out_free: | ||
327 | kfree(gref_ids); | ||
328 | out: | ||
329 | return rc; | ||
330 | } | ||
331 | |||
332 | static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, | ||
333 | void __user *arg) | ||
334 | { | ||
335 | int i, rc = 0; | ||
336 | struct ioctl_gntalloc_dealloc_gref op; | ||
337 | struct gntalloc_gref *gref, *n; | ||
338 | |||
339 | pr_debug("%s: priv %p\n", __func__, priv); | ||
340 | |||
341 | if (copy_from_user(&op, arg, sizeof(op))) { | ||
342 | rc = -EFAULT; | ||
343 | goto dealloc_grant_out; | ||
344 | } | ||
345 | |||
346 | spin_lock(&gref_lock); | ||
347 | gref = find_grefs(priv, op.index, op.count); | ||
348 | if (gref) { | ||
349 | /* Remove from the file list only, and decrease reference count. | ||
350 | * The later call to do_cleanup() will remove from gref_list and | ||
351 | * free the memory if the pages aren't mapped anywhere. | ||
352 | */ | ||
353 | for (i = 0; i < op.count; i++) { | ||
354 | n = list_entry(gref->next_file.next, | ||
355 | struct gntalloc_gref, next_file); | ||
356 | list_del(&gref->next_file); | ||
357 | gref->users--; | ||
358 | gref = n; | ||
359 | } | ||
360 | } else { | ||
361 | rc = -EINVAL; | ||
362 | } | ||
363 | |||
364 | do_cleanup(); | ||
365 | |||
366 | spin_unlock(&gref_lock); | ||
367 | dealloc_grant_out: | ||
368 | return rc; | ||
369 | } | ||
370 | |||
371 | static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv, | ||
372 | void __user *arg) | ||
373 | { | ||
374 | struct ioctl_gntalloc_unmap_notify op; | ||
375 | struct gntalloc_gref *gref; | ||
376 | uint64_t index; | ||
377 | int pgoff; | ||
378 | int rc; | ||
379 | |||
380 | if (copy_from_user(&op, arg, sizeof(op))) | ||
381 | return -EFAULT; | ||
382 | |||
383 | index = op.index & ~(PAGE_SIZE - 1); | ||
384 | pgoff = op.index & (PAGE_SIZE - 1); | ||
385 | |||
386 | spin_lock(&gref_lock); | ||
387 | |||
388 | gref = find_grefs(priv, index, 1); | ||
389 | if (!gref) { | ||
390 | rc = -ENOENT; | ||
391 | goto unlock_out; | ||
392 | } | ||
393 | |||
394 | if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) { | ||
395 | rc = -EINVAL; | ||
396 | goto unlock_out; | ||
397 | } | ||
398 | |||
399 | gref->notify.flags = op.action; | ||
400 | gref->notify.pgoff = pgoff; | ||
401 | gref->notify.event = op.event_channel_port; | ||
402 | rc = 0; | ||
403 | unlock_out: | ||
404 | spin_unlock(&gref_lock); | ||
405 | return rc; | ||
406 | } | ||
407 | |||
408 | static long gntalloc_ioctl(struct file *filp, unsigned int cmd, | ||
409 | unsigned long arg) | ||
410 | { | ||
411 | struct gntalloc_file_private_data *priv = filp->private_data; | ||
412 | |||
413 | switch (cmd) { | ||
414 | case IOCTL_GNTALLOC_ALLOC_GREF: | ||
415 | return gntalloc_ioctl_alloc(priv, (void __user *)arg); | ||
416 | |||
417 | case IOCTL_GNTALLOC_DEALLOC_GREF: | ||
418 | return gntalloc_ioctl_dealloc(priv, (void __user *)arg); | ||
419 | |||
420 | case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY: | ||
421 | return gntalloc_ioctl_unmap_notify(priv, (void __user *)arg); | ||
422 | |||
423 | default: | ||
424 | return -ENOIOCTLCMD; | ||
425 | } | ||
426 | |||
427 | return 0; | ||
428 | } | ||
429 | |||
430 | static void gntalloc_vma_close(struct vm_area_struct *vma) | ||
431 | { | ||
432 | struct gntalloc_gref *gref = vma->vm_private_data; | ||
433 | if (!gref) | ||
434 | return; | ||
435 | |||
436 | spin_lock(&gref_lock); | ||
437 | gref->users--; | ||
438 | if (gref->users == 0) | ||
439 | __del_gref(gref); | ||
440 | spin_unlock(&gref_lock); | ||
441 | } | ||
442 | |||
443 | static struct vm_operations_struct gntalloc_vmops = { | ||
444 | .close = gntalloc_vma_close, | ||
445 | }; | ||
446 | |||
447 | static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) | ||
448 | { | ||
449 | struct gntalloc_file_private_data *priv = filp->private_data; | ||
450 | struct gntalloc_gref *gref; | ||
451 | int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | ||
452 | int rv, i; | ||
453 | |||
454 | pr_debug("%s: priv %p, page %lu+%d\n", __func__, | ||
455 | priv, vma->vm_pgoff, count); | ||
456 | |||
457 | if (!(vma->vm_flags & VM_SHARED)) { | ||
458 | printk(KERN_ERR "%s: Mapping must be shared.\n", __func__); | ||
459 | return -EINVAL; | ||
460 | } | ||
461 | |||
462 | spin_lock(&gref_lock); | ||
463 | gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count); | ||
464 | if (gref == NULL) { | ||
465 | rv = -ENOENT; | ||
466 | pr_debug("%s: Could not find grant reference", | ||
467 | __func__); | ||
468 | goto out_unlock; | ||
469 | } | ||
470 | |||
471 | vma->vm_private_data = gref; | ||
472 | |||
473 | vma->vm_flags |= VM_RESERVED; | ||
474 | vma->vm_flags |= VM_DONTCOPY; | ||
475 | vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP; | ||
476 | |||
477 | vma->vm_ops = &gntalloc_vmops; | ||
478 | |||
479 | for (i = 0; i < count; i++) { | ||
480 | gref->users++; | ||
481 | rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE, | ||
482 | gref->page); | ||
483 | if (rv) | ||
484 | goto out_unlock; | ||
485 | |||
486 | gref = list_entry(gref->next_file.next, | ||
487 | struct gntalloc_gref, next_file); | ||
488 | } | ||
489 | rv = 0; | ||
490 | |||
491 | out_unlock: | ||
492 | spin_unlock(&gref_lock); | ||
493 | return rv; | ||
494 | } | ||
495 | |||
496 | static const struct file_operations gntalloc_fops = { | ||
497 | .owner = THIS_MODULE, | ||
498 | .open = gntalloc_open, | ||
499 | .release = gntalloc_release, | ||
500 | .unlocked_ioctl = gntalloc_ioctl, | ||
501 | .mmap = gntalloc_mmap | ||
502 | }; | ||
503 | |||
504 | /* | ||
505 | * ------------------------------------- | ||
506 | * Module creation/destruction. | ||
507 | * ------------------------------------- | ||
508 | */ | ||
509 | static struct miscdevice gntalloc_miscdev = { | ||
510 | .minor = MISC_DYNAMIC_MINOR, | ||
511 | .name = "xen/gntalloc", | ||
512 | .fops = &gntalloc_fops, | ||
513 | }; | ||
514 | |||
515 | static int __init gntalloc_init(void) | ||
516 | { | ||
517 | int err; | ||
518 | |||
519 | if (!xen_domain()) | ||
520 | return -ENODEV; | ||
521 | |||
522 | err = misc_register(&gntalloc_miscdev); | ||
523 | if (err != 0) { | ||
524 | printk(KERN_ERR "Could not register misc gntalloc device\n"); | ||
525 | return err; | ||
526 | } | ||
527 | |||
528 | pr_debug("Created grant allocation device at %d,%d\n", | ||
529 | MISC_MAJOR, gntalloc_miscdev.minor); | ||
530 | |||
531 | return 0; | ||
532 | } | ||
533 | |||
534 | static void __exit gntalloc_exit(void) | ||
535 | { | ||
536 | misc_deregister(&gntalloc_miscdev); | ||
537 | } | ||
538 | |||
539 | module_init(gntalloc_init); | ||
540 | module_exit(gntalloc_exit); | ||
541 | |||
542 | MODULE_LICENSE("GPL"); | ||
543 | MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, " | ||
544 | "Daniel De Graaf <dgdegra@tycho.nsa.gov>"); | ||
545 | MODULE_DESCRIPTION("User-space grant reference allocator driver"); | ||
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 1e31cdcdae1e..017ce600fbc6 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c | |||
@@ -32,10 +32,13 @@ | |||
32 | #include <linux/sched.h> | 32 | #include <linux/sched.h> |
33 | #include <linux/spinlock.h> | 33 | #include <linux/spinlock.h> |
34 | #include <linux/slab.h> | 34 | #include <linux/slab.h> |
35 | #include <linux/highmem.h> | ||
35 | 36 | ||
36 | #include <xen/xen.h> | 37 | #include <xen/xen.h> |
37 | #include <xen/grant_table.h> | 38 | #include <xen/grant_table.h> |
39 | #include <xen/balloon.h> | ||
38 | #include <xen/gntdev.h> | 40 | #include <xen/gntdev.h> |
41 | #include <xen/events.h> | ||
39 | #include <asm/xen/hypervisor.h> | 42 | #include <asm/xen/hypervisor.h> |
40 | #include <asm/xen/hypercall.h> | 43 | #include <asm/xen/hypercall.h> |
41 | #include <asm/xen/page.h> | 44 | #include <asm/xen/page.h> |
@@ -45,35 +48,46 @@ MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, " | |||
45 | "Gerd Hoffmann <kraxel@redhat.com>"); | 48 | "Gerd Hoffmann <kraxel@redhat.com>"); |
46 | MODULE_DESCRIPTION("User-space granted page access driver"); | 49 | MODULE_DESCRIPTION("User-space granted page access driver"); |
47 | 50 | ||
48 | static int limit = 1024; | 51 | static int limit = 1024*1024; |
49 | module_param(limit, int, 0644); | 52 | module_param(limit, int, 0644); |
50 | MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped at " | 53 | MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by " |
51 | "once by a gntdev instance"); | 54 | "the gntdev device"); |
55 | |||
56 | static atomic_t pages_mapped = ATOMIC_INIT(0); | ||
57 | |||
58 | static int use_ptemod; | ||
52 | 59 | ||
53 | struct gntdev_priv { | 60 | struct gntdev_priv { |
54 | struct list_head maps; | 61 | struct list_head maps; |
55 | uint32_t used; | ||
56 | uint32_t limit; | ||
57 | /* lock protects maps from concurrent changes */ | 62 | /* lock protects maps from concurrent changes */ |
58 | spinlock_t lock; | 63 | spinlock_t lock; |
59 | struct mm_struct *mm; | 64 | struct mm_struct *mm; |
60 | struct mmu_notifier mn; | 65 | struct mmu_notifier mn; |
61 | }; | 66 | }; |
62 | 67 | ||
68 | struct unmap_notify { | ||
69 | int flags; | ||
70 | /* Address relative to the start of the grant_map */ | ||
71 | int addr; | ||
72 | int event; | ||
73 | }; | ||
74 | |||
63 | struct grant_map { | 75 | struct grant_map { |
64 | struct list_head next; | 76 | struct list_head next; |
65 | struct gntdev_priv *priv; | ||
66 | struct vm_area_struct *vma; | 77 | struct vm_area_struct *vma; |
67 | int index; | 78 | int index; |
68 | int count; | 79 | int count; |
69 | int flags; | 80 | int flags; |
70 | int is_mapped; | 81 | atomic_t users; |
82 | struct unmap_notify notify; | ||
71 | struct ioctl_gntdev_grant_ref *grants; | 83 | struct ioctl_gntdev_grant_ref *grants; |
72 | struct gnttab_map_grant_ref *map_ops; | 84 | struct gnttab_map_grant_ref *map_ops; |
73 | struct gnttab_unmap_grant_ref *unmap_ops; | 85 | struct gnttab_unmap_grant_ref *unmap_ops; |
74 | struct page **pages; | 86 | struct page **pages; |
75 | }; | 87 | }; |
76 | 88 | ||
89 | static int unmap_grant_pages(struct grant_map *map, int offset, int pages); | ||
90 | |||
77 | /* ------------------------------------------------------------------ */ | 91 | /* ------------------------------------------------------------------ */ |
78 | 92 | ||
79 | static void gntdev_print_maps(struct gntdev_priv *priv, | 93 | static void gntdev_print_maps(struct gntdev_priv *priv, |
@@ -82,9 +96,7 @@ static void gntdev_print_maps(struct gntdev_priv *priv, | |||
82 | #ifdef DEBUG | 96 | #ifdef DEBUG |
83 | struct grant_map *map; | 97 | struct grant_map *map; |
84 | 98 | ||
85 | pr_debug("maps list (priv %p, usage %d/%d)\n", | 99 | pr_debug("%s: maps list (priv %p)\n", __func__, priv); |
86 | priv, priv->used, priv->limit); | ||
87 | |||
88 | list_for_each_entry(map, &priv->maps, next) | 100 | list_for_each_entry(map, &priv->maps, next) |
89 | pr_debug(" index %2d, count %2d %s\n", | 101 | pr_debug(" index %2d, count %2d %s\n", |
90 | map->index, map->count, | 102 | map->index, map->count, |
@@ -111,27 +123,21 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) | |||
111 | NULL == add->pages) | 123 | NULL == add->pages) |
112 | goto err; | 124 | goto err; |
113 | 125 | ||
126 | if (alloc_xenballooned_pages(count, add->pages)) | ||
127 | goto err; | ||
128 | |||
114 | for (i = 0; i < count; i++) { | 129 | for (i = 0; i < count; i++) { |
115 | add->pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); | 130 | add->map_ops[i].handle = -1; |
116 | if (add->pages[i] == NULL) | 131 | add->unmap_ops[i].handle = -1; |
117 | goto err; | ||
118 | } | 132 | } |
119 | 133 | ||
120 | add->index = 0; | 134 | add->index = 0; |
121 | add->count = count; | 135 | add->count = count; |
122 | add->priv = priv; | 136 | atomic_set(&add->users, 1); |
123 | |||
124 | if (add->count + priv->used > priv->limit) | ||
125 | goto err; | ||
126 | 137 | ||
127 | return add; | 138 | return add; |
128 | 139 | ||
129 | err: | 140 | err: |
130 | if (add->pages) | ||
131 | for (i = 0; i < count; i++) { | ||
132 | if (add->pages[i]) | ||
133 | __free_page(add->pages[i]); | ||
134 | } | ||
135 | kfree(add->pages); | 141 | kfree(add->pages); |
136 | kfree(add->grants); | 142 | kfree(add->grants); |
137 | kfree(add->map_ops); | 143 | kfree(add->map_ops); |
@@ -154,7 +160,6 @@ static void gntdev_add_map(struct gntdev_priv *priv, struct grant_map *add) | |||
154 | list_add_tail(&add->next, &priv->maps); | 160 | list_add_tail(&add->next, &priv->maps); |
155 | 161 | ||
156 | done: | 162 | done: |
157 | priv->used += add->count; | ||
158 | gntdev_print_maps(priv, "[new]", add->index); | 163 | gntdev_print_maps(priv, "[new]", add->index); |
159 | } | 164 | } |
160 | 165 | ||
@@ -166,57 +171,33 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv, | |||
166 | list_for_each_entry(map, &priv->maps, next) { | 171 | list_for_each_entry(map, &priv->maps, next) { |
167 | if (map->index != index) | 172 | if (map->index != index) |
168 | continue; | 173 | continue; |
169 | if (map->count != count) | 174 | if (count && map->count != count) |
170 | continue; | 175 | continue; |
171 | return map; | 176 | return map; |
172 | } | 177 | } |
173 | return NULL; | 178 | return NULL; |
174 | } | 179 | } |
175 | 180 | ||
176 | static struct grant_map *gntdev_find_map_vaddr(struct gntdev_priv *priv, | 181 | static void gntdev_put_map(struct grant_map *map) |
177 | unsigned long vaddr) | ||
178 | { | 182 | { |
179 | struct grant_map *map; | 183 | if (!map) |
180 | 184 | return; | |
181 | list_for_each_entry(map, &priv->maps, next) { | ||
182 | if (!map->vma) | ||
183 | continue; | ||
184 | if (vaddr < map->vma->vm_start) | ||
185 | continue; | ||
186 | if (vaddr >= map->vma->vm_end) | ||
187 | continue; | ||
188 | return map; | ||
189 | } | ||
190 | return NULL; | ||
191 | } | ||
192 | |||
193 | static int gntdev_del_map(struct grant_map *map) | ||
194 | { | ||
195 | int i; | ||
196 | 185 | ||
197 | if (map->vma) | 186 | if (!atomic_dec_and_test(&map->users)) |
198 | return -EBUSY; | 187 | return; |
199 | for (i = 0; i < map->count; i++) | ||
200 | if (map->unmap_ops[i].handle) | ||
201 | return -EBUSY; | ||
202 | 188 | ||
203 | map->priv->used -= map->count; | 189 | atomic_sub(map->count, &pages_mapped); |
204 | list_del(&map->next); | ||
205 | return 0; | ||
206 | } | ||
207 | 190 | ||
208 | static void gntdev_free_map(struct grant_map *map) | 191 | if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { |
209 | { | 192 | notify_remote_via_evtchn(map->notify.event); |
210 | int i; | 193 | } |
211 | 194 | ||
212 | if (!map) | 195 | if (map->pages) { |
213 | return; | 196 | if (!use_ptemod) |
197 | unmap_grant_pages(map, 0, map->count); | ||
214 | 198 | ||
215 | if (map->pages) | 199 | free_xenballooned_pages(map->count, map->pages); |
216 | for (i = 0; i < map->count; i++) { | 200 | } |
217 | if (map->pages[i]) | ||
218 | __free_page(map->pages[i]); | ||
219 | } | ||
220 | kfree(map->pages); | 201 | kfree(map->pages); |
221 | kfree(map->grants); | 202 | kfree(map->grants); |
222 | kfree(map->map_ops); | 203 | kfree(map->map_ops); |
@@ -231,18 +212,17 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token, | |||
231 | { | 212 | { |
232 | struct grant_map *map = data; | 213 | struct grant_map *map = data; |
233 | unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; | 214 | unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; |
215 | int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte; | ||
234 | u64 pte_maddr; | 216 | u64 pte_maddr; |
235 | 217 | ||
236 | BUG_ON(pgnr >= map->count); | 218 | BUG_ON(pgnr >= map->count); |
237 | pte_maddr = arbitrary_virt_to_machine(pte).maddr; | 219 | pte_maddr = arbitrary_virt_to_machine(pte).maddr; |
238 | 220 | ||
239 | gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, | 221 | gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags, |
240 | GNTMAP_contains_pte | map->flags, | ||
241 | map->grants[pgnr].ref, | 222 | map->grants[pgnr].ref, |
242 | map->grants[pgnr].domid); | 223 | map->grants[pgnr].domid); |
243 | gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, | 224 | gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags, |
244 | GNTMAP_contains_pte | map->flags, | 225 | -1 /* handle */); |
245 | 0 /* handle */); | ||
246 | return 0; | 226 | return 0; |
247 | } | 227 | } |
248 | 228 | ||
@@ -250,6 +230,21 @@ static int map_grant_pages(struct grant_map *map) | |||
250 | { | 230 | { |
251 | int i, err = 0; | 231 | int i, err = 0; |
252 | 232 | ||
233 | if (!use_ptemod) { | ||
234 | /* Note: it could already be mapped */ | ||
235 | if (map->map_ops[0].handle != -1) | ||
236 | return 0; | ||
237 | for (i = 0; i < map->count; i++) { | ||
238 | unsigned long addr = (unsigned long) | ||
239 | pfn_to_kaddr(page_to_pfn(map->pages[i])); | ||
240 | gnttab_set_map_op(&map->map_ops[i], addr, map->flags, | ||
241 | map->grants[i].ref, | ||
242 | map->grants[i].domid); | ||
243 | gnttab_set_unmap_op(&map->unmap_ops[i], addr, | ||
244 | map->flags, -1 /* handle */); | ||
245 | } | ||
246 | } | ||
247 | |||
253 | pr_debug("map %d+%d\n", map->index, map->count); | 248 | pr_debug("map %d+%d\n", map->index, map->count); |
254 | err = gnttab_map_refs(map->map_ops, map->pages, map->count); | 249 | err = gnttab_map_refs(map->map_ops, map->pages, map->count); |
255 | if (err) | 250 | if (err) |
@@ -258,28 +253,81 @@ static int map_grant_pages(struct grant_map *map) | |||
258 | for (i = 0; i < map->count; i++) { | 253 | for (i = 0; i < map->count; i++) { |
259 | if (map->map_ops[i].status) | 254 | if (map->map_ops[i].status) |
260 | err = -EINVAL; | 255 | err = -EINVAL; |
261 | map->unmap_ops[i].handle = map->map_ops[i].handle; | 256 | else { |
257 | BUG_ON(map->map_ops[i].handle == -1); | ||
258 | map->unmap_ops[i].handle = map->map_ops[i].handle; | ||
259 | pr_debug("map handle=%d\n", map->map_ops[i].handle); | ||
260 | } | ||
262 | } | 261 | } |
263 | return err; | 262 | return err; |
264 | } | 263 | } |
265 | 264 | ||
266 | static int unmap_grant_pages(struct grant_map *map, int offset, int pages) | 265 | static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) |
267 | { | 266 | { |
268 | int i, err = 0; | 267 | int i, err = 0; |
269 | 268 | ||
270 | pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages); | 269 | if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { |
271 | err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages); | 270 | int pgno = (map->notify.addr >> PAGE_SHIFT); |
271 | if (pgno >= offset && pgno < offset + pages && use_ptemod) { | ||
272 | void __user *tmp = (void __user *) | ||
273 | map->vma->vm_start + map->notify.addr; | ||
274 | err = copy_to_user(tmp, &err, 1); | ||
275 | if (err) | ||
276 | return err; | ||
277 | map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; | ||
278 | } else if (pgno >= offset && pgno < offset + pages) { | ||
279 | uint8_t *tmp = kmap(map->pages[pgno]); | ||
280 | tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; | ||
281 | kunmap(map->pages[pgno]); | ||
282 | map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; | ||
283 | } | ||
284 | } | ||
285 | |||
286 | err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages); | ||
272 | if (err) | 287 | if (err) |
273 | return err; | 288 | return err; |
274 | 289 | ||
275 | for (i = 0; i < pages; i++) { | 290 | for (i = 0; i < pages; i++) { |
276 | if (map->unmap_ops[offset+i].status) | 291 | if (map->unmap_ops[offset+i].status) |
277 | err = -EINVAL; | 292 | err = -EINVAL; |
278 | map->unmap_ops[offset+i].handle = 0; | 293 | pr_debug("unmap handle=%d st=%d\n", |
294 | map->unmap_ops[offset+i].handle, | ||
295 | map->unmap_ops[offset+i].status); | ||
296 | map->unmap_ops[offset+i].handle = -1; | ||
279 | } | 297 | } |
280 | return err; | 298 | return err; |
281 | } | 299 | } |
282 | 300 | ||
301 | static int unmap_grant_pages(struct grant_map *map, int offset, int pages) | ||
302 | { | ||
303 | int range, err = 0; | ||
304 | |||
305 | pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages); | ||
306 | |||
307 | /* It is possible the requested range will have a "hole" where we | ||
308 | * already unmapped some of the grants. Only unmap valid ranges. | ||
309 | */ | ||
310 | while (pages && !err) { | ||
311 | while (pages && map->unmap_ops[offset].handle == -1) { | ||
312 | offset++; | ||
313 | pages--; | ||
314 | } | ||
315 | range = 0; | ||
316 | while (range < pages) { | ||
317 | if (map->unmap_ops[offset+range].handle == -1) { | ||
318 | range--; | ||
319 | break; | ||
320 | } | ||
321 | range++; | ||
322 | } | ||
323 | err = __unmap_grant_pages(map, offset, range); | ||
324 | offset += range; | ||
325 | pages -= range; | ||
326 | } | ||
327 | |||
328 | return err; | ||
329 | } | ||
330 | |||
283 | /* ------------------------------------------------------------------ */ | 331 | /* ------------------------------------------------------------------ */ |
284 | 332 | ||
285 | static void gntdev_vma_close(struct vm_area_struct *vma) | 333 | static void gntdev_vma_close(struct vm_area_struct *vma) |
@@ -287,22 +335,13 @@ static void gntdev_vma_close(struct vm_area_struct *vma) | |||
287 | struct grant_map *map = vma->vm_private_data; | 335 | struct grant_map *map = vma->vm_private_data; |
288 | 336 | ||
289 | pr_debug("close %p\n", vma); | 337 | pr_debug("close %p\n", vma); |
290 | map->is_mapped = 0; | ||
291 | map->vma = NULL; | 338 | map->vma = NULL; |
292 | vma->vm_private_data = NULL; | 339 | vma->vm_private_data = NULL; |
293 | } | 340 | gntdev_put_map(map); |
294 | |||
295 | static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
296 | { | ||
297 | pr_debug("vaddr %p, pgoff %ld (shouldn't happen)\n", | ||
298 | vmf->virtual_address, vmf->pgoff); | ||
299 | vmf->flags = VM_FAULT_ERROR; | ||
300 | return 0; | ||
301 | } | 341 | } |
302 | 342 | ||
303 | static struct vm_operations_struct gntdev_vmops = { | 343 | static struct vm_operations_struct gntdev_vmops = { |
304 | .close = gntdev_vma_close, | 344 | .close = gntdev_vma_close, |
305 | .fault = gntdev_vma_fault, | ||
306 | }; | 345 | }; |
307 | 346 | ||
308 | /* ------------------------------------------------------------------ */ | 347 | /* ------------------------------------------------------------------ */ |
@@ -320,8 +359,6 @@ static void mn_invl_range_start(struct mmu_notifier *mn, | |||
320 | list_for_each_entry(map, &priv->maps, next) { | 359 | list_for_each_entry(map, &priv->maps, next) { |
321 | if (!map->vma) | 360 | if (!map->vma) |
322 | continue; | 361 | continue; |
323 | if (!map->is_mapped) | ||
324 | continue; | ||
325 | if (map->vma->vm_start >= end) | 362 | if (map->vma->vm_start >= end) |
326 | continue; | 363 | continue; |
327 | if (map->vma->vm_end <= start) | 364 | if (map->vma->vm_end <= start) |
@@ -386,16 +423,17 @@ static int gntdev_open(struct inode *inode, struct file *flip) | |||
386 | 423 | ||
387 | INIT_LIST_HEAD(&priv->maps); | 424 | INIT_LIST_HEAD(&priv->maps); |
388 | spin_lock_init(&priv->lock); | 425 | spin_lock_init(&priv->lock); |
389 | priv->limit = limit; | ||
390 | 426 | ||
391 | priv->mm = get_task_mm(current); | 427 | if (use_ptemod) { |
392 | if (!priv->mm) { | 428 | priv->mm = get_task_mm(current); |
393 | kfree(priv); | 429 | if (!priv->mm) { |
394 | return -ENOMEM; | 430 | kfree(priv); |
431 | return -ENOMEM; | ||
432 | } | ||
433 | priv->mn.ops = &gntdev_mmu_ops; | ||
434 | ret = mmu_notifier_register(&priv->mn, priv->mm); | ||
435 | mmput(priv->mm); | ||
395 | } | 436 | } |
396 | priv->mn.ops = &gntdev_mmu_ops; | ||
397 | ret = mmu_notifier_register(&priv->mn, priv->mm); | ||
398 | mmput(priv->mm); | ||
399 | 437 | ||
400 | if (ret) { | 438 | if (ret) { |
401 | kfree(priv); | 439 | kfree(priv); |
@@ -412,21 +450,19 @@ static int gntdev_release(struct inode *inode, struct file *flip) | |||
412 | { | 450 | { |
413 | struct gntdev_priv *priv = flip->private_data; | 451 | struct gntdev_priv *priv = flip->private_data; |
414 | struct grant_map *map; | 452 | struct grant_map *map; |
415 | int err; | ||
416 | 453 | ||
417 | pr_debug("priv %p\n", priv); | 454 | pr_debug("priv %p\n", priv); |
418 | 455 | ||
419 | spin_lock(&priv->lock); | 456 | spin_lock(&priv->lock); |
420 | while (!list_empty(&priv->maps)) { | 457 | while (!list_empty(&priv->maps)) { |
421 | map = list_entry(priv->maps.next, struct grant_map, next); | 458 | map = list_entry(priv->maps.next, struct grant_map, next); |
422 | err = gntdev_del_map(map); | 459 | list_del(&map->next); |
423 | if (WARN_ON(err)) | 460 | gntdev_put_map(map); |
424 | gntdev_free_map(map); | ||
425 | |||
426 | } | 461 | } |
427 | spin_unlock(&priv->lock); | 462 | spin_unlock(&priv->lock); |
428 | 463 | ||
429 | mmu_notifier_unregister(&priv->mn, priv->mm); | 464 | if (use_ptemod) |
465 | mmu_notifier_unregister(&priv->mn, priv->mm); | ||
430 | kfree(priv); | 466 | kfree(priv); |
431 | return 0; | 467 | return 0; |
432 | } | 468 | } |
@@ -443,16 +479,21 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, | |||
443 | pr_debug("priv %p, add %d\n", priv, op.count); | 479 | pr_debug("priv %p, add %d\n", priv, op.count); |
444 | if (unlikely(op.count <= 0)) | 480 | if (unlikely(op.count <= 0)) |
445 | return -EINVAL; | 481 | return -EINVAL; |
446 | if (unlikely(op.count > priv->limit)) | ||
447 | return -EINVAL; | ||
448 | 482 | ||
449 | err = -ENOMEM; | 483 | err = -ENOMEM; |
450 | map = gntdev_alloc_map(priv, op.count); | 484 | map = gntdev_alloc_map(priv, op.count); |
451 | if (!map) | 485 | if (!map) |
452 | return err; | 486 | return err; |
487 | |||
488 | if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) { | ||
489 | pr_debug("can't map: over limit\n"); | ||
490 | gntdev_put_map(map); | ||
491 | return err; | ||
492 | } | ||
493 | |||
453 | if (copy_from_user(map->grants, &u->refs, | 494 | if (copy_from_user(map->grants, &u->refs, |
454 | sizeof(map->grants[0]) * op.count) != 0) { | 495 | sizeof(map->grants[0]) * op.count) != 0) { |
455 | gntdev_free_map(map); | 496 | gntdev_put_map(map); |
456 | return err; | 497 | return err; |
457 | } | 498 | } |
458 | 499 | ||
@@ -461,13 +502,9 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, | |||
461 | op.index = map->index << PAGE_SHIFT; | 502 | op.index = map->index << PAGE_SHIFT; |
462 | spin_unlock(&priv->lock); | 503 | spin_unlock(&priv->lock); |
463 | 504 | ||
464 | if (copy_to_user(u, &op, sizeof(op)) != 0) { | 505 | if (copy_to_user(u, &op, sizeof(op)) != 0) |
465 | spin_lock(&priv->lock); | 506 | return -EFAULT; |
466 | gntdev_del_map(map); | 507 | |
467 | spin_unlock(&priv->lock); | ||
468 | gntdev_free_map(map); | ||
469 | return err; | ||
470 | } | ||
471 | return 0; | 508 | return 0; |
472 | } | 509 | } |
473 | 510 | ||
@@ -484,11 +521,12 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, | |||
484 | 521 | ||
485 | spin_lock(&priv->lock); | 522 | spin_lock(&priv->lock); |
486 | map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); | 523 | map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); |
487 | if (map) | 524 | if (map) { |
488 | err = gntdev_del_map(map); | 525 | list_del(&map->next); |
526 | gntdev_put_map(map); | ||
527 | err = 0; | ||
528 | } | ||
489 | spin_unlock(&priv->lock); | 529 | spin_unlock(&priv->lock); |
490 | if (!err) | ||
491 | gntdev_free_map(map); | ||
492 | return err; | 530 | return err; |
493 | } | 531 | } |
494 | 532 | ||
@@ -496,43 +534,66 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv, | |||
496 | struct ioctl_gntdev_get_offset_for_vaddr __user *u) | 534 | struct ioctl_gntdev_get_offset_for_vaddr __user *u) |
497 | { | 535 | { |
498 | struct ioctl_gntdev_get_offset_for_vaddr op; | 536 | struct ioctl_gntdev_get_offset_for_vaddr op; |
537 | struct vm_area_struct *vma; | ||
499 | struct grant_map *map; | 538 | struct grant_map *map; |
500 | 539 | ||
501 | if (copy_from_user(&op, u, sizeof(op)) != 0) | 540 | if (copy_from_user(&op, u, sizeof(op)) != 0) |
502 | return -EFAULT; | 541 | return -EFAULT; |
503 | pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr); | 542 | pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr); |
504 | 543 | ||
505 | spin_lock(&priv->lock); | 544 | vma = find_vma(current->mm, op.vaddr); |
506 | map = gntdev_find_map_vaddr(priv, op.vaddr); | 545 | if (!vma || vma->vm_ops != &gntdev_vmops) |
507 | if (map == NULL || | ||
508 | map->vma->vm_start != op.vaddr) { | ||
509 | spin_unlock(&priv->lock); | ||
510 | return -EINVAL; | 546 | return -EINVAL; |
511 | } | 547 | |
548 | map = vma->vm_private_data; | ||
549 | if (!map) | ||
550 | return -EINVAL; | ||
551 | |||
512 | op.offset = map->index << PAGE_SHIFT; | 552 | op.offset = map->index << PAGE_SHIFT; |
513 | op.count = map->count; | 553 | op.count = map->count; |
514 | spin_unlock(&priv->lock); | ||
515 | 554 | ||
516 | if (copy_to_user(u, &op, sizeof(op)) != 0) | 555 | if (copy_to_user(u, &op, sizeof(op)) != 0) |
517 | return -EFAULT; | 556 | return -EFAULT; |
518 | return 0; | 557 | return 0; |
519 | } | 558 | } |
520 | 559 | ||
521 | static long gntdev_ioctl_set_max_grants(struct gntdev_priv *priv, | 560 | static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) |
522 | struct ioctl_gntdev_set_max_grants __user *u) | ||
523 | { | 561 | { |
524 | struct ioctl_gntdev_set_max_grants op; | 562 | struct ioctl_gntdev_unmap_notify op; |
563 | struct grant_map *map; | ||
564 | int rc; | ||
525 | 565 | ||
526 | if (copy_from_user(&op, u, sizeof(op)) != 0) | 566 | if (copy_from_user(&op, u, sizeof(op))) |
527 | return -EFAULT; | 567 | return -EFAULT; |
528 | pr_debug("priv %p, limit %d\n", priv, op.count); | 568 | |
529 | if (op.count > limit) | 569 | if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) |
530 | return -E2BIG; | 570 | return -EINVAL; |
531 | 571 | ||
532 | spin_lock(&priv->lock); | 572 | spin_lock(&priv->lock); |
533 | priv->limit = op.count; | 573 | |
574 | list_for_each_entry(map, &priv->maps, next) { | ||
575 | uint64_t begin = map->index << PAGE_SHIFT; | ||
576 | uint64_t end = (map->index + map->count) << PAGE_SHIFT; | ||
577 | if (op.index >= begin && op.index < end) | ||
578 | goto found; | ||
579 | } | ||
580 | rc = -ENOENT; | ||
581 | goto unlock_out; | ||
582 | |||
583 | found: | ||
584 | if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) && | ||
585 | (map->flags & GNTMAP_readonly)) { | ||
586 | rc = -EINVAL; | ||
587 | goto unlock_out; | ||
588 | } | ||
589 | |||
590 | map->notify.flags = op.action; | ||
591 | map->notify.addr = op.index - (map->index << PAGE_SHIFT); | ||
592 | map->notify.event = op.event_channel_port; | ||
593 | rc = 0; | ||
594 | unlock_out: | ||
534 | spin_unlock(&priv->lock); | 595 | spin_unlock(&priv->lock); |
535 | return 0; | 596 | return rc; |
536 | } | 597 | } |
537 | 598 | ||
538 | static long gntdev_ioctl(struct file *flip, | 599 | static long gntdev_ioctl(struct file *flip, |
@@ -551,8 +612,8 @@ static long gntdev_ioctl(struct file *flip, | |||
551 | case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: | 612 | case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: |
552 | return gntdev_ioctl_get_offset_for_vaddr(priv, ptr); | 613 | return gntdev_ioctl_get_offset_for_vaddr(priv, ptr); |
553 | 614 | ||
554 | case IOCTL_GNTDEV_SET_MAX_GRANTS: | 615 | case IOCTL_GNTDEV_SET_UNMAP_NOTIFY: |
555 | return gntdev_ioctl_set_max_grants(priv, ptr); | 616 | return gntdev_ioctl_notify(priv, ptr); |
556 | 617 | ||
557 | default: | 618 | default: |
558 | pr_debug("priv %p, unknown cmd %x\n", priv, cmd); | 619 | pr_debug("priv %p, unknown cmd %x\n", priv, cmd); |
@@ -568,7 +629,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) | |||
568 | int index = vma->vm_pgoff; | 629 | int index = vma->vm_pgoff; |
569 | int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | 630 | int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; |
570 | struct grant_map *map; | 631 | struct grant_map *map; |
571 | int err = -EINVAL; | 632 | int i, err = -EINVAL; |
572 | 633 | ||
573 | if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) | 634 | if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) |
574 | return -EINVAL; | 635 | return -EINVAL; |
@@ -580,47 +641,70 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) | |||
580 | map = gntdev_find_map_index(priv, index, count); | 641 | map = gntdev_find_map_index(priv, index, count); |
581 | if (!map) | 642 | if (!map) |
582 | goto unlock_out; | 643 | goto unlock_out; |
583 | if (map->vma) | 644 | if (use_ptemod && map->vma) |
584 | goto unlock_out; | 645 | goto unlock_out; |
585 | if (priv->mm != vma->vm_mm) { | 646 | if (use_ptemod && priv->mm != vma->vm_mm) { |
586 | printk(KERN_WARNING "Huh? Other mm?\n"); | 647 | printk(KERN_WARNING "Huh? Other mm?\n"); |
587 | goto unlock_out; | 648 | goto unlock_out; |
588 | } | 649 | } |
589 | 650 | ||
651 | atomic_inc(&map->users); | ||
652 | |||
590 | vma->vm_ops = &gntdev_vmops; | 653 | vma->vm_ops = &gntdev_vmops; |
591 | 654 | ||
592 | vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP; | 655 | vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP; |
593 | 656 | ||
594 | vma->vm_private_data = map; | 657 | vma->vm_private_data = map; |
595 | map->vma = vma; | ||
596 | 658 | ||
597 | map->flags = GNTMAP_host_map | GNTMAP_application_map; | 659 | if (use_ptemod) |
598 | if (!(vma->vm_flags & VM_WRITE)) | 660 | map->vma = vma; |
599 | map->flags |= GNTMAP_readonly; | 661 | |
662 | if (map->flags) { | ||
663 | if ((vma->vm_flags & VM_WRITE) && | ||
664 | (map->flags & GNTMAP_readonly)) | ||
665 | return -EINVAL; | ||
666 | } else { | ||
667 | map->flags = GNTMAP_host_map; | ||
668 | if (!(vma->vm_flags & VM_WRITE)) | ||
669 | map->flags |= GNTMAP_readonly; | ||
670 | } | ||
600 | 671 | ||
601 | spin_unlock(&priv->lock); | 672 | spin_unlock(&priv->lock); |
602 | 673 | ||
603 | err = apply_to_page_range(vma->vm_mm, vma->vm_start, | 674 | if (use_ptemod) { |
604 | vma->vm_end - vma->vm_start, | 675 | err = apply_to_page_range(vma->vm_mm, vma->vm_start, |
605 | find_grant_ptes, map); | 676 | vma->vm_end - vma->vm_start, |
606 | if (err) { | 677 | find_grant_ptes, map); |
607 | printk(KERN_WARNING "find_grant_ptes() failure.\n"); | 678 | if (err) { |
608 | return err; | 679 | printk(KERN_WARNING "find_grant_ptes() failure.\n"); |
680 | goto out_put_map; | ||
681 | } | ||
609 | } | 682 | } |
610 | 683 | ||
611 | err = map_grant_pages(map); | 684 | err = map_grant_pages(map); |
612 | if (err) { | 685 | if (err) |
613 | printk(KERN_WARNING "map_grant_pages() failure.\n"); | 686 | goto out_put_map; |
614 | return err; | ||
615 | } | ||
616 | 687 | ||
617 | map->is_mapped = 1; | 688 | if (!use_ptemod) { |
689 | for (i = 0; i < count; i++) { | ||
690 | err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE, | ||
691 | map->pages[i]); | ||
692 | if (err) | ||
693 | goto out_put_map; | ||
694 | } | ||
695 | } | ||
618 | 696 | ||
619 | return 0; | 697 | return 0; |
620 | 698 | ||
621 | unlock_out: | 699 | unlock_out: |
622 | spin_unlock(&priv->lock); | 700 | spin_unlock(&priv->lock); |
623 | return err; | 701 | return err; |
702 | |||
703 | out_put_map: | ||
704 | if (use_ptemod) | ||
705 | map->vma = NULL; | ||
706 | gntdev_put_map(map); | ||
707 | return err; | ||
624 | } | 708 | } |
625 | 709 | ||
626 | static const struct file_operations gntdev_fops = { | 710 | static const struct file_operations gntdev_fops = { |
@@ -646,6 +730,8 @@ static int __init gntdev_init(void) | |||
646 | if (!xen_domain()) | 730 | if (!xen_domain()) |
647 | return -ENODEV; | 731 | return -ENODEV; |
648 | 732 | ||
733 | use_ptemod = xen_pv_domain(); | ||
734 | |||
649 | err = misc_register(&gntdev_miscdev); | 735 | err = misc_register(&gntdev_miscdev); |
650 | if (err != 0) { | 736 | if (err != 0) { |
651 | printk(KERN_ERR "Could not register gntdev device\n"); | 737 | printk(KERN_ERR "Could not register gntdev device\n"); |
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 9ef54ebc1194..3745a318defc 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c | |||
@@ -458,7 +458,14 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, | |||
458 | if (ret) | 458 | if (ret) |
459 | return ret; | 459 | return ret; |
460 | 460 | ||
461 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
462 | return ret; | ||
463 | |||
461 | for (i = 0; i < count; i++) { | 464 | for (i = 0; i < count; i++) { |
465 | /* Do not add to override if the map failed. */ | ||
466 | if (map_ops[i].status) | ||
467 | continue; | ||
468 | |||
462 | /* m2p override only supported for GNTMAP_contains_pte mappings */ | 469 | /* m2p override only supported for GNTMAP_contains_pte mappings */ |
463 | if (!(map_ops[i].flags & GNTMAP_contains_pte)) | 470 | if (!(map_ops[i].flags & GNTMAP_contains_pte)) |
464 | continue; | 471 | continue; |
@@ -483,6 +490,9 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, | |||
483 | if (ret) | 490 | if (ret) |
484 | return ret; | 491 | return ret; |
485 | 492 | ||
493 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
494 | return ret; | ||
495 | |||
486 | for (i = 0; i < count; i++) { | 496 | for (i = 0; i < count; i++) { |
487 | ret = m2p_remove_override(pages[i]); | 497 | ret = m2p_remove_override(pages[i]); |
488 | if (ret) | 498 | if (ret) |
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c new file mode 100644 index 000000000000..a4ff225ee868 --- /dev/null +++ b/drivers/xen/xen-balloon.c | |||
@@ -0,0 +1,256 @@ | |||
1 | /****************************************************************************** | ||
2 | * Xen balloon driver - enables returning/claiming memory to/from Xen. | ||
3 | * | ||
4 | * Copyright (c) 2003, B Dragovic | ||
5 | * Copyright (c) 2003-2004, M Williamson, K Fraser | ||
6 | * Copyright (c) 2005 Dan M. Smith, IBM Corporation | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License version 2 | ||
10 | * as published by the Free Software Foundation; or, when distributed | ||
11 | * separately from the Linux kernel or incorporated into other | ||
12 | * software packages, subject to the following license: | ||
13 | * | ||
14 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
15 | * of this source file (the "Software"), to deal in the Software without | ||
16 | * restriction, including without limitation the rights to use, copy, modify, | ||
17 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
18 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
19 | * the following conditions: | ||
20 | * | ||
21 | * The above copyright notice and this permission notice shall be included in | ||
22 | * all copies or substantial portions of the Software. | ||
23 | * | ||
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
26 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
27 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
28 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
29 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
30 | * IN THE SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/module.h> | ||
35 | #include <linux/sysdev.h> | ||
36 | #include <linux/capability.h> | ||
37 | |||
38 | #include <xen/xen.h> | ||
39 | #include <xen/interface/xen.h> | ||
40 | #include <xen/balloon.h> | ||
41 | #include <xen/xenbus.h> | ||
42 | #include <xen/features.h> | ||
43 | #include <xen/page.h> | ||
44 | |||
45 | #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) | ||
46 | |||
47 | #define BALLOON_CLASS_NAME "xen_memory" | ||
48 | |||
49 | static struct sys_device balloon_sysdev; | ||
50 | |||
51 | static int register_balloon(struct sys_device *sysdev); | ||
52 | |||
53 | static struct xenbus_watch target_watch = | ||
54 | { | ||
55 | .node = "memory/target" | ||
56 | }; | ||
57 | |||
58 | /* React to a change in the target key */ | ||
59 | static void watch_target(struct xenbus_watch *watch, | ||
60 | const char **vec, unsigned int len) | ||
61 | { | ||
62 | unsigned long long new_target; | ||
63 | int err; | ||
64 | |||
65 | err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); | ||
66 | if (err != 1) { | ||
67 | /* This is ok (for domain0 at least) - so just return */ | ||
68 | return; | ||
69 | } | ||
70 | |||
71 | /* The given memory/target value is in KiB, so it needs converting to | ||
72 | * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. | ||
73 | */ | ||
74 | balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); | ||
75 | } | ||
76 | |||
77 | static int balloon_init_watcher(struct notifier_block *notifier, | ||
78 | unsigned long event, | ||
79 | void *data) | ||
80 | { | ||
81 | int err; | ||
82 | |||
83 | err = register_xenbus_watch(&target_watch); | ||
84 | if (err) | ||
85 | printk(KERN_ERR "Failed to set balloon watcher\n"); | ||
86 | |||
87 | return NOTIFY_DONE; | ||
88 | } | ||
89 | |||
90 | static struct notifier_block xenstore_notifier; | ||
91 | |||
92 | static int __init balloon_init(void) | ||
93 | { | ||
94 | if (!xen_domain()) | ||
95 | return -ENODEV; | ||
96 | |||
97 | pr_info("xen-balloon: Initialising balloon driver.\n"); | ||
98 | |||
99 | register_balloon(&balloon_sysdev); | ||
100 | |||
101 | target_watch.callback = watch_target; | ||
102 | xenstore_notifier.notifier_call = balloon_init_watcher; | ||
103 | |||
104 | register_xenstore_notifier(&xenstore_notifier); | ||
105 | |||
106 | return 0; | ||
107 | } | ||
108 | subsys_initcall(balloon_init); | ||
109 | |||
110 | static void balloon_exit(void) | ||
111 | { | ||
112 | /* XXX - release balloon here */ | ||
113 | return; | ||
114 | } | ||
115 | |||
116 | module_exit(balloon_exit); | ||
117 | |||
118 | #define BALLOON_SHOW(name, format, args...) \ | ||
119 | static ssize_t show_##name(struct sys_device *dev, \ | ||
120 | struct sysdev_attribute *attr, \ | ||
121 | char *buf) \ | ||
122 | { \ | ||
123 | return sprintf(buf, format, ##args); \ | ||
124 | } \ | ||
125 | static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) | ||
126 | |||
127 | BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); | ||
128 | BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); | ||
129 | BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); | ||
130 | |||
131 | static SYSDEV_ULONG_ATTR(schedule_delay, 0444, balloon_stats.schedule_delay); | ||
132 | static SYSDEV_ULONG_ATTR(max_schedule_delay, 0644, balloon_stats.max_schedule_delay); | ||
133 | static SYSDEV_ULONG_ATTR(retry_count, 0444, balloon_stats.retry_count); | ||
134 | static SYSDEV_ULONG_ATTR(max_retry_count, 0644, balloon_stats.max_retry_count); | ||
135 | |||
136 | static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, | ||
137 | char *buf) | ||
138 | { | ||
139 | return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); | ||
140 | } | ||
141 | |||
142 | static ssize_t store_target_kb(struct sys_device *dev, | ||
143 | struct sysdev_attribute *attr, | ||
144 | const char *buf, | ||
145 | size_t count) | ||
146 | { | ||
147 | char *endchar; | ||
148 | unsigned long long target_bytes; | ||
149 | |||
150 | if (!capable(CAP_SYS_ADMIN)) | ||
151 | return -EPERM; | ||
152 | |||
153 | target_bytes = simple_strtoull(buf, &endchar, 0) * 1024; | ||
154 | |||
155 | balloon_set_new_target(target_bytes >> PAGE_SHIFT); | ||
156 | |||
157 | return count; | ||
158 | } | ||
159 | |||
160 | static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, | ||
161 | show_target_kb, store_target_kb); | ||
162 | |||
163 | |||
164 | static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr, | ||
165 | char *buf) | ||
166 | { | ||
167 | return sprintf(buf, "%llu\n", | ||
168 | (unsigned long long)balloon_stats.target_pages | ||
169 | << PAGE_SHIFT); | ||
170 | } | ||
171 | |||
172 | static ssize_t store_target(struct sys_device *dev, | ||
173 | struct sysdev_attribute *attr, | ||
174 | const char *buf, | ||
175 | size_t count) | ||
176 | { | ||
177 | char *endchar; | ||
178 | unsigned long long target_bytes; | ||
179 | |||
180 | if (!capable(CAP_SYS_ADMIN)) | ||
181 | return -EPERM; | ||
182 | |||
183 | target_bytes = memparse(buf, &endchar); | ||
184 | |||
185 | balloon_set_new_target(target_bytes >> PAGE_SHIFT); | ||
186 | |||
187 | return count; | ||
188 | } | ||
189 | |||
190 | static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR, | ||
191 | show_target, store_target); | ||
192 | |||
193 | |||
194 | static struct sysdev_attribute *balloon_attrs[] = { | ||
195 | &attr_target_kb, | ||
196 | &attr_target, | ||
197 | &attr_schedule_delay.attr, | ||
198 | &attr_max_schedule_delay.attr, | ||
199 | &attr_retry_count.attr, | ||
200 | &attr_max_retry_count.attr | ||
201 | }; | ||
202 | |||
203 | static struct attribute *balloon_info_attrs[] = { | ||
204 | &attr_current_kb.attr, | ||
205 | &attr_low_kb.attr, | ||
206 | &attr_high_kb.attr, | ||
207 | NULL | ||
208 | }; | ||
209 | |||
210 | static struct attribute_group balloon_info_group = { | ||
211 | .name = "info", | ||
212 | .attrs = balloon_info_attrs | ||
213 | }; | ||
214 | |||
215 | static struct sysdev_class balloon_sysdev_class = { | ||
216 | .name = BALLOON_CLASS_NAME | ||
217 | }; | ||
218 | |||
219 | static int register_balloon(struct sys_device *sysdev) | ||
220 | { | ||
221 | int i, error; | ||
222 | |||
223 | error = sysdev_class_register(&balloon_sysdev_class); | ||
224 | if (error) | ||
225 | return error; | ||
226 | |||
227 | sysdev->id = 0; | ||
228 | sysdev->cls = &balloon_sysdev_class; | ||
229 | |||
230 | error = sysdev_register(sysdev); | ||
231 | if (error) { | ||
232 | sysdev_class_unregister(&balloon_sysdev_class); | ||
233 | return error; | ||
234 | } | ||
235 | |||
236 | for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { | ||
237 | error = sysdev_create_file(sysdev, balloon_attrs[i]); | ||
238 | if (error) | ||
239 | goto fail; | ||
240 | } | ||
241 | |||
242 | error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); | ||
243 | if (error) | ||
244 | goto fail; | ||
245 | |||
246 | return 0; | ||
247 | |||
248 | fail: | ||
249 | while (--i >= 0) | ||
250 | sysdev_remove_file(sysdev, balloon_attrs[i]); | ||
251 | sysdev_unregister(sysdev); | ||
252 | sysdev_class_unregister(&balloon_sysdev_class); | ||
253 | return error; | ||
254 | } | ||
255 | |||
256 | MODULE_LICENSE("GPL"); | ||