diff options
Diffstat (limited to 'drivers/xen')
-rw-r--r-- | drivers/xen/Kconfig | 19 | ||||
-rw-r--r-- | drivers/xen/Makefile | 4 | ||||
-rw-r--r-- | drivers/xen/balloon.c | 712 | ||||
-rw-r--r-- | drivers/xen/events.c | 674 | ||||
-rw-r--r-- | drivers/xen/features.c | 29 | ||||
-rw-r--r-- | drivers/xen/grant-table.c | 37 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_client.c | 6 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe.c | 32 | ||||
-rw-r--r-- | drivers/xen/xencomm.c | 232 |
9 files changed, 1705 insertions, 40 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig new file mode 100644 index 000000000000..4b75a16de009 --- /dev/null +++ b/drivers/xen/Kconfig | |||
@@ -0,0 +1,19 @@ | |||
1 | config XEN_BALLOON | ||
2 | bool "Xen memory balloon driver" | ||
3 | depends on XEN | ||
4 | default y | ||
5 | help | ||
6 | The balloon driver allows the Xen domain to request more memory from | ||
7 | the system to expand the domain's memory allocation, or alternatively | ||
8 | return unneeded memory to the system. | ||
9 | |||
10 | config XEN_SCRUB_PAGES | ||
11 | bool "Scrub pages before returning them to system" | ||
12 | depends on XEN_BALLOON | ||
13 | default y | ||
14 | help | ||
15 | Scrub pages before returning them to the system for reuse by | ||
16 | other domains. This makes sure that any confidential data | ||
17 | is not accidentally visible to other domains. Is it more | ||
18 | secure, but slightly less efficient. | ||
19 | If in doubt, say yes. | ||
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 56592f0d6cef..37af04f1ffd9 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile | |||
@@ -1,2 +1,4 @@ | |||
1 | obj-y += grant-table.o | 1 | obj-y += grant-table.o features.o events.o |
2 | obj-y += xenbus/ | 2 | obj-y += xenbus/ |
3 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o | ||
4 | obj-$(CONFIG_XEN_BALLOON) += balloon.o | ||
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c new file mode 100644 index 000000000000..ab25ba6cbbb9 --- /dev/null +++ b/drivers/xen/balloon.c | |||
@@ -0,0 +1,712 @@ | |||
1 | /****************************************************************************** | ||
2 | * balloon.c | ||
3 | * | ||
4 | * Xen balloon driver - enables returning/claiming memory to/from Xen. | ||
5 | * | ||
6 | * Copyright (c) 2003, B Dragovic | ||
7 | * Copyright (c) 2003-2004, M Williamson, K Fraser | ||
8 | * Copyright (c) 2005 Dan M. Smith, IBM Corporation | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License version 2 | ||
12 | * as published by the Free Software Foundation; or, when distributed | ||
13 | * separately from the Linux kernel or incorporated into other | ||
14 | * software packages, subject to the following license: | ||
15 | * | ||
16 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
17 | * of this source file (the "Software"), to deal in the Software without | ||
18 | * restriction, including without limitation the rights to use, copy, modify, | ||
19 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
20 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
21 | * the following conditions: | ||
22 | * | ||
23 | * The above copyright notice and this permission notice shall be included in | ||
24 | * all copies or substantial portions of the Software. | ||
25 | * | ||
26 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
27 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
28 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
29 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
30 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
31 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
32 | * IN THE SOFTWARE. | ||
33 | */ | ||
34 | |||
35 | #include <linux/kernel.h> | ||
36 | #include <linux/module.h> | ||
37 | #include <linux/sched.h> | ||
38 | #include <linux/errno.h> | ||
39 | #include <linux/mm.h> | ||
40 | #include <linux/bootmem.h> | ||
41 | #include <linux/pagemap.h> | ||
42 | #include <linux/highmem.h> | ||
43 | #include <linux/mutex.h> | ||
44 | #include <linux/highmem.h> | ||
45 | #include <linux/list.h> | ||
46 | #include <linux/sysdev.h> | ||
47 | |||
48 | #include <asm/xen/hypervisor.h> | ||
49 | #include <asm/page.h> | ||
50 | #include <asm/pgalloc.h> | ||
51 | #include <asm/pgtable.h> | ||
52 | #include <asm/uaccess.h> | ||
53 | #include <asm/tlb.h> | ||
54 | |||
55 | #include <xen/interface/memory.h> | ||
56 | #include <xen/balloon.h> | ||
57 | #include <xen/xenbus.h> | ||
58 | #include <xen/features.h> | ||
59 | #include <xen/page.h> | ||
60 | |||
61 | #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) | ||
62 | |||
63 | #define BALLOON_CLASS_NAME "memory" | ||
64 | |||
65 | struct balloon_stats { | ||
66 | /* We aim for 'current allocation' == 'target allocation'. */ | ||
67 | unsigned long current_pages; | ||
68 | unsigned long target_pages; | ||
69 | /* We may hit the hard limit in Xen. If we do then we remember it. */ | ||
70 | unsigned long hard_limit; | ||
71 | /* | ||
72 | * Drivers may alter the memory reservation independently, but they | ||
73 | * must inform the balloon driver so we avoid hitting the hard limit. | ||
74 | */ | ||
75 | unsigned long driver_pages; | ||
76 | /* Number of pages in high- and low-memory balloons. */ | ||
77 | unsigned long balloon_low; | ||
78 | unsigned long balloon_high; | ||
79 | }; | ||
80 | |||
81 | static DEFINE_MUTEX(balloon_mutex); | ||
82 | |||
83 | static struct sys_device balloon_sysdev; | ||
84 | |||
85 | static int register_balloon(struct sys_device *sysdev); | ||
86 | |||
87 | /* | ||
88 | * Protects atomic reservation decrease/increase against concurrent increases. | ||
89 | * Also protects non-atomic updates of current_pages and driver_pages, and | ||
90 | * balloon lists. | ||
91 | */ | ||
92 | static DEFINE_SPINLOCK(balloon_lock); | ||
93 | |||
94 | static struct balloon_stats balloon_stats; | ||
95 | |||
96 | /* We increase/decrease in batches which fit in a page */ | ||
97 | static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; | ||
98 | |||
99 | /* VM /proc information for memory */ | ||
100 | extern unsigned long totalram_pages; | ||
101 | |||
102 | #ifdef CONFIG_HIGHMEM | ||
103 | extern unsigned long totalhigh_pages; | ||
104 | #define inc_totalhigh_pages() (totalhigh_pages++) | ||
105 | #define dec_totalhigh_pages() (totalhigh_pages--) | ||
106 | #else | ||
107 | #define inc_totalhigh_pages() do {} while(0) | ||
108 | #define dec_totalhigh_pages() do {} while(0) | ||
109 | #endif | ||
110 | |||
111 | /* List of ballooned pages, threaded through the mem_map array. */ | ||
112 | static LIST_HEAD(ballooned_pages); | ||
113 | |||
114 | /* Main work function, always executed in process context. */ | ||
115 | static void balloon_process(struct work_struct *work); | ||
116 | static DECLARE_WORK(balloon_worker, balloon_process); | ||
117 | static struct timer_list balloon_timer; | ||
118 | |||
119 | /* When ballooning out (allocating memory to return to Xen) we don't really | ||
120 | want the kernel to try too hard since that can trigger the oom killer. */ | ||
121 | #define GFP_BALLOON \ | ||
122 | (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC) | ||
123 | |||
124 | static void scrub_page(struct page *page) | ||
125 | { | ||
126 | #ifdef CONFIG_XEN_SCRUB_PAGES | ||
127 | if (PageHighMem(page)) { | ||
128 | void *v = kmap(page); | ||
129 | clear_page(v); | ||
130 | kunmap(v); | ||
131 | } else { | ||
132 | void *v = page_address(page); | ||
133 | clear_page(v); | ||
134 | } | ||
135 | #endif | ||
136 | } | ||
137 | |||
138 | /* balloon_append: add the given page to the balloon. */ | ||
139 | static void balloon_append(struct page *page) | ||
140 | { | ||
141 | /* Lowmem is re-populated first, so highmem pages go at list tail. */ | ||
142 | if (PageHighMem(page)) { | ||
143 | list_add_tail(&page->lru, &ballooned_pages); | ||
144 | balloon_stats.balloon_high++; | ||
145 | dec_totalhigh_pages(); | ||
146 | } else { | ||
147 | list_add(&page->lru, &ballooned_pages); | ||
148 | balloon_stats.balloon_low++; | ||
149 | } | ||
150 | } | ||
151 | |||
152 | /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ | ||
153 | static struct page *balloon_retrieve(void) | ||
154 | { | ||
155 | struct page *page; | ||
156 | |||
157 | if (list_empty(&ballooned_pages)) | ||
158 | return NULL; | ||
159 | |||
160 | page = list_entry(ballooned_pages.next, struct page, lru); | ||
161 | list_del(&page->lru); | ||
162 | |||
163 | if (PageHighMem(page)) { | ||
164 | balloon_stats.balloon_high--; | ||
165 | inc_totalhigh_pages(); | ||
166 | } | ||
167 | else | ||
168 | balloon_stats.balloon_low--; | ||
169 | |||
170 | return page; | ||
171 | } | ||
172 | |||
173 | static struct page *balloon_first_page(void) | ||
174 | { | ||
175 | if (list_empty(&ballooned_pages)) | ||
176 | return NULL; | ||
177 | return list_entry(ballooned_pages.next, struct page, lru); | ||
178 | } | ||
179 | |||
180 | static struct page *balloon_next_page(struct page *page) | ||
181 | { | ||
182 | struct list_head *next = page->lru.next; | ||
183 | if (next == &ballooned_pages) | ||
184 | return NULL; | ||
185 | return list_entry(next, struct page, lru); | ||
186 | } | ||
187 | |||
188 | static void balloon_alarm(unsigned long unused) | ||
189 | { | ||
190 | schedule_work(&balloon_worker); | ||
191 | } | ||
192 | |||
193 | static unsigned long current_target(void) | ||
194 | { | ||
195 | unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit); | ||
196 | |||
197 | target = min(target, | ||
198 | balloon_stats.current_pages + | ||
199 | balloon_stats.balloon_low + | ||
200 | balloon_stats.balloon_high); | ||
201 | |||
202 | return target; | ||
203 | } | ||
204 | |||
205 | static int increase_reservation(unsigned long nr_pages) | ||
206 | { | ||
207 | unsigned long pfn, i, flags; | ||
208 | struct page *page; | ||
209 | long rc; | ||
210 | struct xen_memory_reservation reservation = { | ||
211 | .address_bits = 0, | ||
212 | .extent_order = 0, | ||
213 | .domid = DOMID_SELF | ||
214 | }; | ||
215 | |||
216 | if (nr_pages > ARRAY_SIZE(frame_list)) | ||
217 | nr_pages = ARRAY_SIZE(frame_list); | ||
218 | |||
219 | spin_lock_irqsave(&balloon_lock, flags); | ||
220 | |||
221 | page = balloon_first_page(); | ||
222 | for (i = 0; i < nr_pages; i++) { | ||
223 | BUG_ON(page == NULL); | ||
224 | frame_list[i] = page_to_pfn(page);; | ||
225 | page = balloon_next_page(page); | ||
226 | } | ||
227 | |||
228 | reservation.extent_start = (unsigned long)frame_list; | ||
229 | reservation.nr_extents = nr_pages; | ||
230 | rc = HYPERVISOR_memory_op( | ||
231 | XENMEM_populate_physmap, &reservation); | ||
232 | if (rc < nr_pages) { | ||
233 | if (rc > 0) { | ||
234 | int ret; | ||
235 | |||
236 | /* We hit the Xen hard limit: reprobe. */ | ||
237 | reservation.nr_extents = rc; | ||
238 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | ||
239 | &reservation); | ||
240 | BUG_ON(ret != rc); | ||
241 | } | ||
242 | if (rc >= 0) | ||
243 | balloon_stats.hard_limit = (balloon_stats.current_pages + rc - | ||
244 | balloon_stats.driver_pages); | ||
245 | goto out; | ||
246 | } | ||
247 | |||
248 | for (i = 0; i < nr_pages; i++) { | ||
249 | page = balloon_retrieve(); | ||
250 | BUG_ON(page == NULL); | ||
251 | |||
252 | pfn = page_to_pfn(page); | ||
253 | BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && | ||
254 | phys_to_machine_mapping_valid(pfn)); | ||
255 | |||
256 | set_phys_to_machine(pfn, frame_list[i]); | ||
257 | |||
258 | /* Link back into the page tables if not highmem. */ | ||
259 | if (pfn < max_low_pfn) { | ||
260 | int ret; | ||
261 | ret = HYPERVISOR_update_va_mapping( | ||
262 | (unsigned long)__va(pfn << PAGE_SHIFT), | ||
263 | mfn_pte(frame_list[i], PAGE_KERNEL), | ||
264 | 0); | ||
265 | BUG_ON(ret); | ||
266 | } | ||
267 | |||
268 | /* Relinquish the page back to the allocator. */ | ||
269 | ClearPageReserved(page); | ||
270 | init_page_count(page); | ||
271 | __free_page(page); | ||
272 | } | ||
273 | |||
274 | balloon_stats.current_pages += nr_pages; | ||
275 | totalram_pages = balloon_stats.current_pages; | ||
276 | |||
277 | out: | ||
278 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | static int decrease_reservation(unsigned long nr_pages) | ||
284 | { | ||
285 | unsigned long pfn, i, flags; | ||
286 | struct page *page; | ||
287 | int need_sleep = 0; | ||
288 | int ret; | ||
289 | struct xen_memory_reservation reservation = { | ||
290 | .address_bits = 0, | ||
291 | .extent_order = 0, | ||
292 | .domid = DOMID_SELF | ||
293 | }; | ||
294 | |||
295 | if (nr_pages > ARRAY_SIZE(frame_list)) | ||
296 | nr_pages = ARRAY_SIZE(frame_list); | ||
297 | |||
298 | for (i = 0; i < nr_pages; i++) { | ||
299 | if ((page = alloc_page(GFP_BALLOON)) == NULL) { | ||
300 | nr_pages = i; | ||
301 | need_sleep = 1; | ||
302 | break; | ||
303 | } | ||
304 | |||
305 | pfn = page_to_pfn(page); | ||
306 | frame_list[i] = pfn_to_mfn(pfn); | ||
307 | |||
308 | scrub_page(page); | ||
309 | } | ||
310 | |||
311 | /* Ensure that ballooned highmem pages don't have kmaps. */ | ||
312 | kmap_flush_unused(); | ||
313 | flush_tlb_all(); | ||
314 | |||
315 | spin_lock_irqsave(&balloon_lock, flags); | ||
316 | |||
317 | /* No more mappings: invalidate P2M and add to balloon. */ | ||
318 | for (i = 0; i < nr_pages; i++) { | ||
319 | pfn = mfn_to_pfn(frame_list[i]); | ||
320 | set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | ||
321 | balloon_append(pfn_to_page(pfn)); | ||
322 | } | ||
323 | |||
324 | reservation.extent_start = (unsigned long)frame_list; | ||
325 | reservation.nr_extents = nr_pages; | ||
326 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); | ||
327 | BUG_ON(ret != nr_pages); | ||
328 | |||
329 | balloon_stats.current_pages -= nr_pages; | ||
330 | totalram_pages = balloon_stats.current_pages; | ||
331 | |||
332 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
333 | |||
334 | return need_sleep; | ||
335 | } | ||
336 | |||
337 | /* | ||
338 | * We avoid multiple worker processes conflicting via the balloon mutex. | ||
339 | * We may of course race updates of the target counts (which are protected | ||
340 | * by the balloon lock), or with changes to the Xen hard limit, but we will | ||
341 | * recover from these in time. | ||
342 | */ | ||
343 | static void balloon_process(struct work_struct *work) | ||
344 | { | ||
345 | int need_sleep = 0; | ||
346 | long credit; | ||
347 | |||
348 | mutex_lock(&balloon_mutex); | ||
349 | |||
350 | do { | ||
351 | credit = current_target() - balloon_stats.current_pages; | ||
352 | if (credit > 0) | ||
353 | need_sleep = (increase_reservation(credit) != 0); | ||
354 | if (credit < 0) | ||
355 | need_sleep = (decrease_reservation(-credit) != 0); | ||
356 | |||
357 | #ifndef CONFIG_PREEMPT | ||
358 | if (need_resched()) | ||
359 | schedule(); | ||
360 | #endif | ||
361 | } while ((credit != 0) && !need_sleep); | ||
362 | |||
363 | /* Schedule more work if there is some still to be done. */ | ||
364 | if (current_target() != balloon_stats.current_pages) | ||
365 | mod_timer(&balloon_timer, jiffies + HZ); | ||
366 | |||
367 | mutex_unlock(&balloon_mutex); | ||
368 | } | ||
369 | |||
370 | /* Resets the Xen limit, sets new target, and kicks off processing. */ | ||
371 | void balloon_set_new_target(unsigned long target) | ||
372 | { | ||
373 | /* No need for lock. Not read-modify-write updates. */ | ||
374 | balloon_stats.hard_limit = ~0UL; | ||
375 | balloon_stats.target_pages = target; | ||
376 | schedule_work(&balloon_worker); | ||
377 | } | ||
378 | |||
379 | static struct xenbus_watch target_watch = | ||
380 | { | ||
381 | .node = "memory/target" | ||
382 | }; | ||
383 | |||
384 | /* React to a change in the target key */ | ||
385 | static void watch_target(struct xenbus_watch *watch, | ||
386 | const char **vec, unsigned int len) | ||
387 | { | ||
388 | unsigned long long new_target; | ||
389 | int err; | ||
390 | |||
391 | err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); | ||
392 | if (err != 1) { | ||
393 | /* This is ok (for domain0 at least) - so just return */ | ||
394 | return; | ||
395 | } | ||
396 | |||
397 | /* The given memory/target value is in KiB, so it needs converting to | ||
398 | * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. | ||
399 | */ | ||
400 | balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); | ||
401 | } | ||
402 | |||
403 | static int balloon_init_watcher(struct notifier_block *notifier, | ||
404 | unsigned long event, | ||
405 | void *data) | ||
406 | { | ||
407 | int err; | ||
408 | |||
409 | err = register_xenbus_watch(&target_watch); | ||
410 | if (err) | ||
411 | printk(KERN_ERR "Failed to set balloon watcher\n"); | ||
412 | |||
413 | return NOTIFY_DONE; | ||
414 | } | ||
415 | |||
416 | static struct notifier_block xenstore_notifier; | ||
417 | |||
418 | static int __init balloon_init(void) | ||
419 | { | ||
420 | unsigned long pfn; | ||
421 | struct page *page; | ||
422 | |||
423 | if (!is_running_on_xen()) | ||
424 | return -ENODEV; | ||
425 | |||
426 | pr_info("xen_balloon: Initialising balloon driver.\n"); | ||
427 | |||
428 | balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); | ||
429 | totalram_pages = balloon_stats.current_pages; | ||
430 | balloon_stats.target_pages = balloon_stats.current_pages; | ||
431 | balloon_stats.balloon_low = 0; | ||
432 | balloon_stats.balloon_high = 0; | ||
433 | balloon_stats.driver_pages = 0UL; | ||
434 | balloon_stats.hard_limit = ~0UL; | ||
435 | |||
436 | init_timer(&balloon_timer); | ||
437 | balloon_timer.data = 0; | ||
438 | balloon_timer.function = balloon_alarm; | ||
439 | |||
440 | register_balloon(&balloon_sysdev); | ||
441 | |||
442 | /* Initialise the balloon with excess memory space. */ | ||
443 | for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { | ||
444 | page = pfn_to_page(pfn); | ||
445 | if (!PageReserved(page)) | ||
446 | balloon_append(page); | ||
447 | } | ||
448 | |||
449 | target_watch.callback = watch_target; | ||
450 | xenstore_notifier.notifier_call = balloon_init_watcher; | ||
451 | |||
452 | register_xenstore_notifier(&xenstore_notifier); | ||
453 | |||
454 | return 0; | ||
455 | } | ||
456 | |||
457 | subsys_initcall(balloon_init); | ||
458 | |||
459 | static void balloon_exit(void) | ||
460 | { | ||
461 | /* XXX - release balloon here */ | ||
462 | return; | ||
463 | } | ||
464 | |||
465 | module_exit(balloon_exit); | ||
466 | |||
467 | static void balloon_update_driver_allowance(long delta) | ||
468 | { | ||
469 | unsigned long flags; | ||
470 | |||
471 | spin_lock_irqsave(&balloon_lock, flags); | ||
472 | balloon_stats.driver_pages += delta; | ||
473 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
474 | } | ||
475 | |||
476 | static int dealloc_pte_fn( | ||
477 | pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) | ||
478 | { | ||
479 | unsigned long mfn = pte_mfn(*pte); | ||
480 | int ret; | ||
481 | struct xen_memory_reservation reservation = { | ||
482 | .nr_extents = 1, | ||
483 | .extent_order = 0, | ||
484 | .domid = DOMID_SELF | ||
485 | }; | ||
486 | reservation.extent_start = (unsigned long)&mfn; | ||
487 | set_pte_at(&init_mm, addr, pte, __pte_ma(0ull)); | ||
488 | set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); | ||
489 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); | ||
490 | BUG_ON(ret != 1); | ||
491 | return 0; | ||
492 | } | ||
493 | |||
494 | static struct page **alloc_empty_pages_and_pagevec(int nr_pages) | ||
495 | { | ||
496 | unsigned long vaddr, flags; | ||
497 | struct page *page, **pagevec; | ||
498 | int i, ret; | ||
499 | |||
500 | pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL); | ||
501 | if (pagevec == NULL) | ||
502 | return NULL; | ||
503 | |||
504 | for (i = 0; i < nr_pages; i++) { | ||
505 | page = pagevec[i] = alloc_page(GFP_KERNEL); | ||
506 | if (page == NULL) | ||
507 | goto err; | ||
508 | |||
509 | vaddr = (unsigned long)page_address(page); | ||
510 | |||
511 | scrub_page(page); | ||
512 | |||
513 | spin_lock_irqsave(&balloon_lock, flags); | ||
514 | |||
515 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
516 | unsigned long gmfn = page_to_pfn(page); | ||
517 | struct xen_memory_reservation reservation = { | ||
518 | .nr_extents = 1, | ||
519 | .extent_order = 0, | ||
520 | .domid = DOMID_SELF | ||
521 | }; | ||
522 | reservation.extent_start = (unsigned long)&gmfn; | ||
523 | ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, | ||
524 | &reservation); | ||
525 | if (ret == 1) | ||
526 | ret = 0; /* success */ | ||
527 | } else { | ||
528 | ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE, | ||
529 | dealloc_pte_fn, NULL); | ||
530 | } | ||
531 | |||
532 | if (ret != 0) { | ||
533 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
534 | __free_page(page); | ||
535 | goto err; | ||
536 | } | ||
537 | |||
538 | totalram_pages = --balloon_stats.current_pages; | ||
539 | |||
540 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
541 | } | ||
542 | |||
543 | out: | ||
544 | schedule_work(&balloon_worker); | ||
545 | flush_tlb_all(); | ||
546 | return pagevec; | ||
547 | |||
548 | err: | ||
549 | spin_lock_irqsave(&balloon_lock, flags); | ||
550 | while (--i >= 0) | ||
551 | balloon_append(pagevec[i]); | ||
552 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
553 | kfree(pagevec); | ||
554 | pagevec = NULL; | ||
555 | goto out; | ||
556 | } | ||
557 | |||
558 | static void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages) | ||
559 | { | ||
560 | unsigned long flags; | ||
561 | int i; | ||
562 | |||
563 | if (pagevec == NULL) | ||
564 | return; | ||
565 | |||
566 | spin_lock_irqsave(&balloon_lock, flags); | ||
567 | for (i = 0; i < nr_pages; i++) { | ||
568 | BUG_ON(page_count(pagevec[i]) != 1); | ||
569 | balloon_append(pagevec[i]); | ||
570 | } | ||
571 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
572 | |||
573 | kfree(pagevec); | ||
574 | |||
575 | schedule_work(&balloon_worker); | ||
576 | } | ||
577 | |||
578 | static void balloon_release_driver_page(struct page *page) | ||
579 | { | ||
580 | unsigned long flags; | ||
581 | |||
582 | spin_lock_irqsave(&balloon_lock, flags); | ||
583 | balloon_append(page); | ||
584 | balloon_stats.driver_pages--; | ||
585 | spin_unlock_irqrestore(&balloon_lock, flags); | ||
586 | |||
587 | schedule_work(&balloon_worker); | ||
588 | } | ||
589 | |||
590 | |||
591 | #define BALLOON_SHOW(name, format, args...) \ | ||
592 | static ssize_t show_##name(struct sys_device *dev, \ | ||
593 | char *buf) \ | ||
594 | { \ | ||
595 | return sprintf(buf, format, ##args); \ | ||
596 | } \ | ||
597 | static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) | ||
598 | |||
599 | BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); | ||
600 | BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); | ||
601 | BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); | ||
602 | BALLOON_SHOW(hard_limit_kb, | ||
603 | (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n", | ||
604 | (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0); | ||
605 | BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); | ||
606 | |||
607 | static ssize_t show_target_kb(struct sys_device *dev, char *buf) | ||
608 | { | ||
609 | return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); | ||
610 | } | ||
611 | |||
612 | static ssize_t store_target_kb(struct sys_device *dev, | ||
613 | const char *buf, | ||
614 | size_t count) | ||
615 | { | ||
616 | char memstring[64], *endchar; | ||
617 | unsigned long long target_bytes; | ||
618 | |||
619 | if (!capable(CAP_SYS_ADMIN)) | ||
620 | return -EPERM; | ||
621 | |||
622 | if (count <= 1) | ||
623 | return -EBADMSG; /* runt */ | ||
624 | if (count > sizeof(memstring)) | ||
625 | return -EFBIG; /* too long */ | ||
626 | strcpy(memstring, buf); | ||
627 | |||
628 | target_bytes = memparse(memstring, &endchar); | ||
629 | balloon_set_new_target(target_bytes >> PAGE_SHIFT); | ||
630 | |||
631 | return count; | ||
632 | } | ||
633 | |||
634 | static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, | ||
635 | show_target_kb, store_target_kb); | ||
636 | |||
637 | static struct sysdev_attribute *balloon_attrs[] = { | ||
638 | &attr_target_kb, | ||
639 | }; | ||
640 | |||
641 | static struct attribute *balloon_info_attrs[] = { | ||
642 | &attr_current_kb.attr, | ||
643 | &attr_low_kb.attr, | ||
644 | &attr_high_kb.attr, | ||
645 | &attr_hard_limit_kb.attr, | ||
646 | &attr_driver_kb.attr, | ||
647 | NULL | ||
648 | }; | ||
649 | |||
650 | static struct attribute_group balloon_info_group = { | ||
651 | .name = "info", | ||
652 | .attrs = balloon_info_attrs, | ||
653 | }; | ||
654 | |||
655 | static struct sysdev_class balloon_sysdev_class = { | ||
656 | .name = BALLOON_CLASS_NAME, | ||
657 | }; | ||
658 | |||
659 | static int register_balloon(struct sys_device *sysdev) | ||
660 | { | ||
661 | int i, error; | ||
662 | |||
663 | error = sysdev_class_register(&balloon_sysdev_class); | ||
664 | if (error) | ||
665 | return error; | ||
666 | |||
667 | sysdev->id = 0; | ||
668 | sysdev->cls = &balloon_sysdev_class; | ||
669 | |||
670 | error = sysdev_register(sysdev); | ||
671 | if (error) { | ||
672 | sysdev_class_unregister(&balloon_sysdev_class); | ||
673 | return error; | ||
674 | } | ||
675 | |||
676 | for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { | ||
677 | error = sysdev_create_file(sysdev, balloon_attrs[i]); | ||
678 | if (error) | ||
679 | goto fail; | ||
680 | } | ||
681 | |||
682 | error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); | ||
683 | if (error) | ||
684 | goto fail; | ||
685 | |||
686 | return 0; | ||
687 | |||
688 | fail: | ||
689 | while (--i >= 0) | ||
690 | sysdev_remove_file(sysdev, balloon_attrs[i]); | ||
691 | sysdev_unregister(sysdev); | ||
692 | sysdev_class_unregister(&balloon_sysdev_class); | ||
693 | return error; | ||
694 | } | ||
695 | |||
696 | static void unregister_balloon(struct sys_device *sysdev) | ||
697 | { | ||
698 | int i; | ||
699 | |||
700 | sysfs_remove_group(&sysdev->kobj, &balloon_info_group); | ||
701 | for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) | ||
702 | sysdev_remove_file(sysdev, balloon_attrs[i]); | ||
703 | sysdev_unregister(sysdev); | ||
704 | sysdev_class_unregister(&balloon_sysdev_class); | ||
705 | } | ||
706 | |||
707 | static void balloon_sysfs_exit(void) | ||
708 | { | ||
709 | unregister_balloon(&balloon_sysdev); | ||
710 | } | ||
711 | |||
712 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/xen/events.c b/drivers/xen/events.c new file mode 100644 index 000000000000..4f0f22b020ea --- /dev/null +++ b/drivers/xen/events.c | |||
@@ -0,0 +1,674 @@ | |||
1 | /* | ||
2 | * Xen event channels | ||
3 | * | ||
4 | * Xen models interrupts with abstract event channels. Because each | ||
5 | * domain gets 1024 event channels, but NR_IRQ is not that large, we | ||
6 | * must dynamically map irqs<->event channels. The event channels | ||
7 | * interface with the rest of the kernel by defining a xen interrupt | ||
8 | * chip. When an event is recieved, it is mapped to an irq and sent | ||
9 | * through the normal interrupt processing path. | ||
10 | * | ||
11 | * There are four kinds of events which can be mapped to an event | ||
12 | * channel: | ||
13 | * | ||
14 | * 1. Inter-domain notifications. This includes all the virtual | ||
15 | * device events, since they're driven by front-ends in another domain | ||
16 | * (typically dom0). | ||
17 | * 2. VIRQs, typically used for timers. These are per-cpu events. | ||
18 | * 3. IPIs. | ||
19 | * 4. Hardware interrupts. Not supported at present. | ||
20 | * | ||
21 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 | ||
22 | */ | ||
23 | |||
24 | #include <linux/linkage.h> | ||
25 | #include <linux/interrupt.h> | ||
26 | #include <linux/irq.h> | ||
27 | #include <linux/module.h> | ||
28 | #include <linux/string.h> | ||
29 | |||
30 | #include <asm/ptrace.h> | ||
31 | #include <asm/irq.h> | ||
32 | #include <asm/sync_bitops.h> | ||
33 | #include <asm/xen/hypercall.h> | ||
34 | #include <asm/xen/hypervisor.h> | ||
35 | |||
36 | #include <xen/xen-ops.h> | ||
37 | #include <xen/events.h> | ||
38 | #include <xen/interface/xen.h> | ||
39 | #include <xen/interface/event_channel.h> | ||
40 | |||
41 | /* | ||
42 | * This lock protects updates to the following mapping and reference-count | ||
43 | * arrays. The lock does not need to be acquired to read the mapping tables. | ||
44 | */ | ||
45 | static DEFINE_SPINLOCK(irq_mapping_update_lock); | ||
46 | |||
47 | /* IRQ <-> VIRQ mapping. */ | ||
48 | static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1}; | ||
49 | |||
50 | /* IRQ <-> IPI mapping */ | ||
51 | static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1}; | ||
52 | |||
53 | /* Packed IRQ information: binding type, sub-type index, and event channel. */ | ||
54 | struct packed_irq | ||
55 | { | ||
56 | unsigned short evtchn; | ||
57 | unsigned char index; | ||
58 | unsigned char type; | ||
59 | }; | ||
60 | |||
61 | static struct packed_irq irq_info[NR_IRQS]; | ||
62 | |||
63 | /* Binding types. */ | ||
64 | enum { | ||
65 | IRQT_UNBOUND, | ||
66 | IRQT_PIRQ, | ||
67 | IRQT_VIRQ, | ||
68 | IRQT_IPI, | ||
69 | IRQT_EVTCHN | ||
70 | }; | ||
71 | |||
72 | /* Convenient shorthand for packed representation of an unbound IRQ. */ | ||
73 | #define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0) | ||
74 | |||
75 | static int evtchn_to_irq[NR_EVENT_CHANNELS] = { | ||
76 | [0 ... NR_EVENT_CHANNELS-1] = -1 | ||
77 | }; | ||
78 | static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG]; | ||
79 | static u8 cpu_evtchn[NR_EVENT_CHANNELS]; | ||
80 | |||
81 | /* Reference counts for bindings to IRQs. */ | ||
82 | static int irq_bindcount[NR_IRQS]; | ||
83 | |||
84 | /* Xen will never allocate port zero for any purpose. */ | ||
85 | #define VALID_EVTCHN(chn) ((chn) != 0) | ||
86 | |||
87 | /* | ||
88 | * Force a proper event-channel callback from Xen after clearing the | ||
89 | * callback mask. We do this in a very simple manner, by making a call | ||
90 | * down into Xen. The pending flag will be checked by Xen on return. | ||
91 | */ | ||
92 | void force_evtchn_callback(void) | ||
93 | { | ||
94 | (void)HYPERVISOR_xen_version(0, NULL); | ||
95 | } | ||
96 | EXPORT_SYMBOL_GPL(force_evtchn_callback); | ||
97 | |||
98 | static struct irq_chip xen_dynamic_chip; | ||
99 | |||
100 | /* Constructor for packed IRQ information. */ | ||
101 | static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn) | ||
102 | { | ||
103 | return (struct packed_irq) { evtchn, index, type }; | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * Accessors for packed IRQ information. | ||
108 | */ | ||
109 | static inline unsigned int evtchn_from_irq(int irq) | ||
110 | { | ||
111 | return irq_info[irq].evtchn; | ||
112 | } | ||
113 | |||
114 | static inline unsigned int index_from_irq(int irq) | ||
115 | { | ||
116 | return irq_info[irq].index; | ||
117 | } | ||
118 | |||
119 | static inline unsigned int type_from_irq(int irq) | ||
120 | { | ||
121 | return irq_info[irq].type; | ||
122 | } | ||
123 | |||
124 | static inline unsigned long active_evtchns(unsigned int cpu, | ||
125 | struct shared_info *sh, | ||
126 | unsigned int idx) | ||
127 | { | ||
128 | return (sh->evtchn_pending[idx] & | ||
129 | cpu_evtchn_mask[cpu][idx] & | ||
130 | ~sh->evtchn_mask[idx]); | ||
131 | } | ||
132 | |||
133 | static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) | ||
134 | { | ||
135 | int irq = evtchn_to_irq[chn]; | ||
136 | |||
137 | BUG_ON(irq == -1); | ||
138 | #ifdef CONFIG_SMP | ||
139 | irq_desc[irq].affinity = cpumask_of_cpu(cpu); | ||
140 | #endif | ||
141 | |||
142 | __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]); | ||
143 | __set_bit(chn, cpu_evtchn_mask[cpu]); | ||
144 | |||
145 | cpu_evtchn[chn] = cpu; | ||
146 | } | ||
147 | |||
148 | static void init_evtchn_cpu_bindings(void) | ||
149 | { | ||
150 | #ifdef CONFIG_SMP | ||
151 | int i; | ||
152 | /* By default all event channels notify CPU#0. */ | ||
153 | for (i = 0; i < NR_IRQS; i++) | ||
154 | irq_desc[i].affinity = cpumask_of_cpu(0); | ||
155 | #endif | ||
156 | |||
157 | memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); | ||
158 | memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); | ||
159 | } | ||
160 | |||
161 | static inline unsigned int cpu_from_evtchn(unsigned int evtchn) | ||
162 | { | ||
163 | return cpu_evtchn[evtchn]; | ||
164 | } | ||
165 | |||
166 | static inline void clear_evtchn(int port) | ||
167 | { | ||
168 | struct shared_info *s = HYPERVISOR_shared_info; | ||
169 | sync_clear_bit(port, &s->evtchn_pending[0]); | ||
170 | } | ||
171 | |||
172 | static inline void set_evtchn(int port) | ||
173 | { | ||
174 | struct shared_info *s = HYPERVISOR_shared_info; | ||
175 | sync_set_bit(port, &s->evtchn_pending[0]); | ||
176 | } | ||
177 | |||
178 | |||
179 | /** | ||
180 | * notify_remote_via_irq - send event to remote end of event channel via irq | ||
181 | * @irq: irq of event channel to send event to | ||
182 | * | ||
183 | * Unlike notify_remote_via_evtchn(), this is safe to use across | ||
184 | * save/restore. Notifications on a broken connection are silently | ||
185 | * dropped. | ||
186 | */ | ||
187 | void notify_remote_via_irq(int irq) | ||
188 | { | ||
189 | int evtchn = evtchn_from_irq(irq); | ||
190 | |||
191 | if (VALID_EVTCHN(evtchn)) | ||
192 | notify_remote_via_evtchn(evtchn); | ||
193 | } | ||
194 | EXPORT_SYMBOL_GPL(notify_remote_via_irq); | ||
195 | |||
196 | static void mask_evtchn(int port) | ||
197 | { | ||
198 | struct shared_info *s = HYPERVISOR_shared_info; | ||
199 | sync_set_bit(port, &s->evtchn_mask[0]); | ||
200 | } | ||
201 | |||
202 | static void unmask_evtchn(int port) | ||
203 | { | ||
204 | struct shared_info *s = HYPERVISOR_shared_info; | ||
205 | unsigned int cpu = get_cpu(); | ||
206 | |||
207 | BUG_ON(!irqs_disabled()); | ||
208 | |||
209 | /* Slow path (hypercall) if this is a non-local port. */ | ||
210 | if (unlikely(cpu != cpu_from_evtchn(port))) { | ||
211 | struct evtchn_unmask unmask = { .port = port }; | ||
212 | (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); | ||
213 | } else { | ||
214 | struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); | ||
215 | |||
216 | sync_clear_bit(port, &s->evtchn_mask[0]); | ||
217 | |||
218 | /* | ||
219 | * The following is basically the equivalent of | ||
220 | * 'hw_resend_irq'. Just like a real IO-APIC we 'lose | ||
221 | * the interrupt edge' if the channel is masked. | ||
222 | */ | ||
223 | if (sync_test_bit(port, &s->evtchn_pending[0]) && | ||
224 | !sync_test_and_set_bit(port / BITS_PER_LONG, | ||
225 | &vcpu_info->evtchn_pending_sel)) | ||
226 | vcpu_info->evtchn_upcall_pending = 1; | ||
227 | } | ||
228 | |||
229 | put_cpu(); | ||
230 | } | ||
231 | |||
232 | static int find_unbound_irq(void) | ||
233 | { | ||
234 | int irq; | ||
235 | |||
236 | /* Only allocate from dynirq range */ | ||
237 | for (irq = 0; irq < NR_IRQS; irq++) | ||
238 | if (irq_bindcount[irq] == 0) | ||
239 | break; | ||
240 | |||
241 | if (irq == NR_IRQS) | ||
242 | panic("No available IRQ to bind to: increase NR_IRQS!\n"); | ||
243 | |||
244 | return irq; | ||
245 | } | ||
246 | |||
247 | int bind_evtchn_to_irq(unsigned int evtchn) | ||
248 | { | ||
249 | int irq; | ||
250 | |||
251 | spin_lock(&irq_mapping_update_lock); | ||
252 | |||
253 | irq = evtchn_to_irq[evtchn]; | ||
254 | |||
255 | if (irq == -1) { | ||
256 | irq = find_unbound_irq(); | ||
257 | |||
258 | dynamic_irq_init(irq); | ||
259 | set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, | ||
260 | handle_level_irq, "event"); | ||
261 | |||
262 | evtchn_to_irq[evtchn] = irq; | ||
263 | irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); | ||
264 | } | ||
265 | |||
266 | irq_bindcount[irq]++; | ||
267 | |||
268 | spin_unlock(&irq_mapping_update_lock); | ||
269 | |||
270 | return irq; | ||
271 | } | ||
272 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); | ||
273 | |||
274 | static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) | ||
275 | { | ||
276 | struct evtchn_bind_ipi bind_ipi; | ||
277 | int evtchn, irq; | ||
278 | |||
279 | spin_lock(&irq_mapping_update_lock); | ||
280 | |||
281 | irq = per_cpu(ipi_to_irq, cpu)[ipi]; | ||
282 | if (irq == -1) { | ||
283 | irq = find_unbound_irq(); | ||
284 | if (irq < 0) | ||
285 | goto out; | ||
286 | |||
287 | dynamic_irq_init(irq); | ||
288 | set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, | ||
289 | handle_level_irq, "ipi"); | ||
290 | |||
291 | bind_ipi.vcpu = cpu; | ||
292 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, | ||
293 | &bind_ipi) != 0) | ||
294 | BUG(); | ||
295 | evtchn = bind_ipi.port; | ||
296 | |||
297 | evtchn_to_irq[evtchn] = irq; | ||
298 | irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); | ||
299 | |||
300 | per_cpu(ipi_to_irq, cpu)[ipi] = irq; | ||
301 | |||
302 | bind_evtchn_to_cpu(evtchn, cpu); | ||
303 | } | ||
304 | |||
305 | irq_bindcount[irq]++; | ||
306 | |||
307 | out: | ||
308 | spin_unlock(&irq_mapping_update_lock); | ||
309 | return irq; | ||
310 | } | ||
311 | |||
312 | |||
313 | static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) | ||
314 | { | ||
315 | struct evtchn_bind_virq bind_virq; | ||
316 | int evtchn, irq; | ||
317 | |||
318 | spin_lock(&irq_mapping_update_lock); | ||
319 | |||
320 | irq = per_cpu(virq_to_irq, cpu)[virq]; | ||
321 | |||
322 | if (irq == -1) { | ||
323 | bind_virq.virq = virq; | ||
324 | bind_virq.vcpu = cpu; | ||
325 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, | ||
326 | &bind_virq) != 0) | ||
327 | BUG(); | ||
328 | evtchn = bind_virq.port; | ||
329 | |||
330 | irq = find_unbound_irq(); | ||
331 | |||
332 | dynamic_irq_init(irq); | ||
333 | set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, | ||
334 | handle_level_irq, "virq"); | ||
335 | |||
336 | evtchn_to_irq[evtchn] = irq; | ||
337 | irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); | ||
338 | |||
339 | per_cpu(virq_to_irq, cpu)[virq] = irq; | ||
340 | |||
341 | bind_evtchn_to_cpu(evtchn, cpu); | ||
342 | } | ||
343 | |||
344 | irq_bindcount[irq]++; | ||
345 | |||
346 | spin_unlock(&irq_mapping_update_lock); | ||
347 | |||
348 | return irq; | ||
349 | } | ||
350 | |||
351 | static void unbind_from_irq(unsigned int irq) | ||
352 | { | ||
353 | struct evtchn_close close; | ||
354 | int evtchn = evtchn_from_irq(irq); | ||
355 | |||
356 | spin_lock(&irq_mapping_update_lock); | ||
357 | |||
358 | if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) { | ||
359 | close.port = evtchn; | ||
360 | if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) | ||
361 | BUG(); | ||
362 | |||
363 | switch (type_from_irq(irq)) { | ||
364 | case IRQT_VIRQ: | ||
365 | per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) | ||
366 | [index_from_irq(irq)] = -1; | ||
367 | break; | ||
368 | default: | ||
369 | break; | ||
370 | } | ||
371 | |||
372 | /* Closed ports are implicitly re-bound to VCPU0. */ | ||
373 | bind_evtchn_to_cpu(evtchn, 0); | ||
374 | |||
375 | evtchn_to_irq[evtchn] = -1; | ||
376 | irq_info[irq] = IRQ_UNBOUND; | ||
377 | |||
378 | dynamic_irq_init(irq); | ||
379 | } | ||
380 | |||
381 | spin_unlock(&irq_mapping_update_lock); | ||
382 | } | ||
383 | |||
384 | int bind_evtchn_to_irqhandler(unsigned int evtchn, | ||
385 | irq_handler_t handler, | ||
386 | unsigned long irqflags, | ||
387 | const char *devname, void *dev_id) | ||
388 | { | ||
389 | unsigned int irq; | ||
390 | int retval; | ||
391 | |||
392 | irq = bind_evtchn_to_irq(evtchn); | ||
393 | retval = request_irq(irq, handler, irqflags, devname, dev_id); | ||
394 | if (retval != 0) { | ||
395 | unbind_from_irq(irq); | ||
396 | return retval; | ||
397 | } | ||
398 | |||
399 | return irq; | ||
400 | } | ||
401 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); | ||
402 | |||
403 | int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, | ||
404 | irq_handler_t handler, | ||
405 | unsigned long irqflags, const char *devname, void *dev_id) | ||
406 | { | ||
407 | unsigned int irq; | ||
408 | int retval; | ||
409 | |||
410 | irq = bind_virq_to_irq(virq, cpu); | ||
411 | retval = request_irq(irq, handler, irqflags, devname, dev_id); | ||
412 | if (retval != 0) { | ||
413 | unbind_from_irq(irq); | ||
414 | return retval; | ||
415 | } | ||
416 | |||
417 | return irq; | ||
418 | } | ||
419 | EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); | ||
420 | |||
421 | int bind_ipi_to_irqhandler(enum ipi_vector ipi, | ||
422 | unsigned int cpu, | ||
423 | irq_handler_t handler, | ||
424 | unsigned long irqflags, | ||
425 | const char *devname, | ||
426 | void *dev_id) | ||
427 | { | ||
428 | int irq, retval; | ||
429 | |||
430 | irq = bind_ipi_to_irq(ipi, cpu); | ||
431 | if (irq < 0) | ||
432 | return irq; | ||
433 | |||
434 | retval = request_irq(irq, handler, irqflags, devname, dev_id); | ||
435 | if (retval != 0) { | ||
436 | unbind_from_irq(irq); | ||
437 | return retval; | ||
438 | } | ||
439 | |||
440 | return irq; | ||
441 | } | ||
442 | |||
443 | void unbind_from_irqhandler(unsigned int irq, void *dev_id) | ||
444 | { | ||
445 | free_irq(irq, dev_id); | ||
446 | unbind_from_irq(irq); | ||
447 | } | ||
448 | EXPORT_SYMBOL_GPL(unbind_from_irqhandler); | ||
449 | |||
450 | void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) | ||
451 | { | ||
452 | int irq = per_cpu(ipi_to_irq, cpu)[vector]; | ||
453 | BUG_ON(irq < 0); | ||
454 | notify_remote_via_irq(irq); | ||
455 | } | ||
456 | |||
457 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id) | ||
458 | { | ||
459 | struct shared_info *sh = HYPERVISOR_shared_info; | ||
460 | int cpu = smp_processor_id(); | ||
461 | int i; | ||
462 | unsigned long flags; | ||
463 | static DEFINE_SPINLOCK(debug_lock); | ||
464 | |||
465 | spin_lock_irqsave(&debug_lock, flags); | ||
466 | |||
467 | printk("vcpu %d\n ", cpu); | ||
468 | |||
469 | for_each_online_cpu(i) { | ||
470 | struct vcpu_info *v = per_cpu(xen_vcpu, i); | ||
471 | printk("%d: masked=%d pending=%d event_sel %08lx\n ", i, | ||
472 | (get_irq_regs() && i == cpu) ? xen_irqs_disabled(get_irq_regs()) : v->evtchn_upcall_mask, | ||
473 | v->evtchn_upcall_pending, | ||
474 | v->evtchn_pending_sel); | ||
475 | } | ||
476 | printk("pending:\n "); | ||
477 | for(i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--) | ||
478 | printk("%08lx%s", sh->evtchn_pending[i], | ||
479 | i % 8 == 0 ? "\n " : " "); | ||
480 | printk("\nmasks:\n "); | ||
481 | for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) | ||
482 | printk("%08lx%s", sh->evtchn_mask[i], | ||
483 | i % 8 == 0 ? "\n " : " "); | ||
484 | |||
485 | printk("\nunmasked:\n "); | ||
486 | for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) | ||
487 | printk("%08lx%s", sh->evtchn_pending[i] & ~sh->evtchn_mask[i], | ||
488 | i % 8 == 0 ? "\n " : " "); | ||
489 | |||
490 | printk("\npending list:\n"); | ||
491 | for(i = 0; i < NR_EVENT_CHANNELS; i++) { | ||
492 | if (sync_test_bit(i, sh->evtchn_pending)) { | ||
493 | printk(" %d: event %d -> irq %d\n", | ||
494 | cpu_evtchn[i], i, | ||
495 | evtchn_to_irq[i]); | ||
496 | } | ||
497 | } | ||
498 | |||
499 | spin_unlock_irqrestore(&debug_lock, flags); | ||
500 | |||
501 | return IRQ_HANDLED; | ||
502 | } | ||
503 | |||
504 | |||
505 | /* | ||
506 | * Search the CPUs pending events bitmasks. For each one found, map | ||
507 | * the event number to an irq, and feed it into do_IRQ() for | ||
508 | * handling. | ||
509 | * | ||
510 | * Xen uses a two-level bitmap to speed searching. The first level is | ||
511 | * a bitset of words which contain pending event bits. The second | ||
512 | * level is a bitset of pending events themselves. | ||
513 | */ | ||
514 | void xen_evtchn_do_upcall(struct pt_regs *regs) | ||
515 | { | ||
516 | int cpu = get_cpu(); | ||
517 | struct shared_info *s = HYPERVISOR_shared_info; | ||
518 | struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); | ||
519 | static DEFINE_PER_CPU(unsigned, nesting_count); | ||
520 | unsigned count; | ||
521 | |||
522 | do { | ||
523 | unsigned long pending_words; | ||
524 | |||
525 | vcpu_info->evtchn_upcall_pending = 0; | ||
526 | |||
527 | if (__get_cpu_var(nesting_count)++) | ||
528 | goto out; | ||
529 | |||
530 | #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ | ||
531 | /* Clear master flag /before/ clearing selector flag. */ | ||
532 | rmb(); | ||
533 | #endif | ||
534 | pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); | ||
535 | while (pending_words != 0) { | ||
536 | unsigned long pending_bits; | ||
537 | int word_idx = __ffs(pending_words); | ||
538 | pending_words &= ~(1UL << word_idx); | ||
539 | |||
540 | while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) { | ||
541 | int bit_idx = __ffs(pending_bits); | ||
542 | int port = (word_idx * BITS_PER_LONG) + bit_idx; | ||
543 | int irq = evtchn_to_irq[port]; | ||
544 | |||
545 | if (irq != -1) | ||
546 | xen_do_IRQ(irq, regs); | ||
547 | } | ||
548 | } | ||
549 | |||
550 | BUG_ON(!irqs_disabled()); | ||
551 | |||
552 | count = __get_cpu_var(nesting_count); | ||
553 | __get_cpu_var(nesting_count) = 0; | ||
554 | } while(count != 1); | ||
555 | |||
556 | out: | ||
557 | put_cpu(); | ||
558 | } | ||
559 | |||
560 | /* Rebind an evtchn so that it gets delivered to a specific cpu */ | ||
561 | static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) | ||
562 | { | ||
563 | struct evtchn_bind_vcpu bind_vcpu; | ||
564 | int evtchn = evtchn_from_irq(irq); | ||
565 | |||
566 | if (!VALID_EVTCHN(evtchn)) | ||
567 | return; | ||
568 | |||
569 | /* Send future instances of this interrupt to other vcpu. */ | ||
570 | bind_vcpu.port = evtchn; | ||
571 | bind_vcpu.vcpu = tcpu; | ||
572 | |||
573 | /* | ||
574 | * If this fails, it usually just indicates that we're dealing with a | ||
575 | * virq or IPI channel, which don't actually need to be rebound. Ignore | ||
576 | * it, but don't do the xenlinux-level rebind in that case. | ||
577 | */ | ||
578 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) | ||
579 | bind_evtchn_to_cpu(evtchn, tcpu); | ||
580 | } | ||
581 | |||
582 | |||
583 | static void set_affinity_irq(unsigned irq, cpumask_t dest) | ||
584 | { | ||
585 | unsigned tcpu = first_cpu(dest); | ||
586 | rebind_irq_to_cpu(irq, tcpu); | ||
587 | } | ||
588 | |||
589 | int resend_irq_on_evtchn(unsigned int irq) | ||
590 | { | ||
591 | int masked, evtchn = evtchn_from_irq(irq); | ||
592 | struct shared_info *s = HYPERVISOR_shared_info; | ||
593 | |||
594 | if (!VALID_EVTCHN(evtchn)) | ||
595 | return 1; | ||
596 | |||
597 | masked = sync_test_and_set_bit(evtchn, s->evtchn_mask); | ||
598 | sync_set_bit(evtchn, s->evtchn_pending); | ||
599 | if (!masked) | ||
600 | unmask_evtchn(evtchn); | ||
601 | |||
602 | return 1; | ||
603 | } | ||
604 | |||
605 | static void enable_dynirq(unsigned int irq) | ||
606 | { | ||
607 | int evtchn = evtchn_from_irq(irq); | ||
608 | |||
609 | if (VALID_EVTCHN(evtchn)) | ||
610 | unmask_evtchn(evtchn); | ||
611 | } | ||
612 | |||
613 | static void disable_dynirq(unsigned int irq) | ||
614 | { | ||
615 | int evtchn = evtchn_from_irq(irq); | ||
616 | |||
617 | if (VALID_EVTCHN(evtchn)) | ||
618 | mask_evtchn(evtchn); | ||
619 | } | ||
620 | |||
621 | static void ack_dynirq(unsigned int irq) | ||
622 | { | ||
623 | int evtchn = evtchn_from_irq(irq); | ||
624 | |||
625 | move_native_irq(irq); | ||
626 | |||
627 | if (VALID_EVTCHN(evtchn)) | ||
628 | clear_evtchn(evtchn); | ||
629 | } | ||
630 | |||
631 | static int retrigger_dynirq(unsigned int irq) | ||
632 | { | ||
633 | int evtchn = evtchn_from_irq(irq); | ||
634 | struct shared_info *sh = HYPERVISOR_shared_info; | ||
635 | int ret = 0; | ||
636 | |||
637 | if (VALID_EVTCHN(evtchn)) { | ||
638 | int masked; | ||
639 | |||
640 | masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask); | ||
641 | sync_set_bit(evtchn, sh->evtchn_pending); | ||
642 | if (!masked) | ||
643 | unmask_evtchn(evtchn); | ||
644 | ret = 1; | ||
645 | } | ||
646 | |||
647 | return ret; | ||
648 | } | ||
649 | |||
650 | static struct irq_chip xen_dynamic_chip __read_mostly = { | ||
651 | .name = "xen-dyn", | ||
652 | .mask = disable_dynirq, | ||
653 | .unmask = enable_dynirq, | ||
654 | .ack = ack_dynirq, | ||
655 | .set_affinity = set_affinity_irq, | ||
656 | .retrigger = retrigger_dynirq, | ||
657 | }; | ||
658 | |||
659 | void __init xen_init_IRQ(void) | ||
660 | { | ||
661 | int i; | ||
662 | |||
663 | init_evtchn_cpu_bindings(); | ||
664 | |||
665 | /* No event channels are 'live' right now. */ | ||
666 | for (i = 0; i < NR_EVENT_CHANNELS; i++) | ||
667 | mask_evtchn(i); | ||
668 | |||
669 | /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ | ||
670 | for (i = 0; i < NR_IRQS; i++) | ||
671 | irq_bindcount[i] = 0; | ||
672 | |||
673 | irq_ctx_init(smp_processor_id()); | ||
674 | } | ||
diff --git a/drivers/xen/features.c b/drivers/xen/features.c new file mode 100644 index 000000000000..0707714e40d6 --- /dev/null +++ b/drivers/xen/features.c | |||
@@ -0,0 +1,29 @@ | |||
1 | /****************************************************************************** | ||
2 | * features.c | ||
3 | * | ||
4 | * Xen feature flags. | ||
5 | * | ||
6 | * Copyright (c) 2006, Ian Campbell, XenSource Inc. | ||
7 | */ | ||
8 | #include <linux/types.h> | ||
9 | #include <linux/cache.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <asm/xen/hypervisor.h> | ||
12 | #include <xen/features.h> | ||
13 | |||
14 | u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; | ||
15 | EXPORT_SYMBOL_GPL(xen_features); | ||
16 | |||
17 | void xen_setup_features(void) | ||
18 | { | ||
19 | struct xen_feature_info fi; | ||
20 | int i, j; | ||
21 | |||
22 | for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) { | ||
23 | fi.submap_idx = i; | ||
24 | if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) | ||
25 | break; | ||
26 | for (j = 0; j < 32; j++) | ||
27 | xen_features[i * 32 + j] = !!(fi.submap & 1<<j); | ||
28 | } | ||
29 | } | ||
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index d85dc6d41c2a..52b6b41b909d 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c | |||
@@ -439,24 +439,6 @@ static inline unsigned int max_nr_grant_frames(void) | |||
439 | return xen_max; | 439 | return xen_max; |
440 | } | 440 | } |
441 | 441 | ||
442 | static int map_pte_fn(pte_t *pte, struct page *pmd_page, | ||
443 | unsigned long addr, void *data) | ||
444 | { | ||
445 | unsigned long **frames = (unsigned long **)data; | ||
446 | |||
447 | set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); | ||
448 | (*frames)++; | ||
449 | return 0; | ||
450 | } | ||
451 | |||
452 | static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, | ||
453 | unsigned long addr, void *data) | ||
454 | { | ||
455 | |||
456 | set_pte_at(&init_mm, addr, pte, __pte(0)); | ||
457 | return 0; | ||
458 | } | ||
459 | |||
460 | static int gnttab_map(unsigned int start_idx, unsigned int end_idx) | 442 | static int gnttab_map(unsigned int start_idx, unsigned int end_idx) |
461 | { | 443 | { |
462 | struct gnttab_setup_table setup; | 444 | struct gnttab_setup_table setup; |
@@ -470,7 +452,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) | |||
470 | 452 | ||
471 | setup.dom = DOMID_SELF; | 453 | setup.dom = DOMID_SELF; |
472 | setup.nr_frames = nr_gframes; | 454 | setup.nr_frames = nr_gframes; |
473 | setup.frame_list = frames; | 455 | set_xen_guest_handle(setup.frame_list, frames); |
474 | 456 | ||
475 | rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); | 457 | rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); |
476 | if (rc == -ENOSYS) { | 458 | if (rc == -ENOSYS) { |
@@ -480,17 +462,9 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) | |||
480 | 462 | ||
481 | BUG_ON(rc || setup.status); | 463 | BUG_ON(rc || setup.status); |
482 | 464 | ||
483 | if (shared == NULL) { | 465 | rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(), |
484 | struct vm_struct *area; | 466 | &shared); |
485 | area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames()); | ||
486 | BUG_ON(area == NULL); | ||
487 | shared = area->addr; | ||
488 | } | ||
489 | rc = apply_to_page_range(&init_mm, (unsigned long)shared, | ||
490 | PAGE_SIZE * nr_gframes, | ||
491 | map_pte_fn, &frames); | ||
492 | BUG_ON(rc); | 467 | BUG_ON(rc); |
493 | frames -= nr_gframes; /* adjust after map_pte_fn() */ | ||
494 | 468 | ||
495 | kfree(frames); | 469 | kfree(frames); |
496 | 470 | ||
@@ -506,10 +480,7 @@ static int gnttab_resume(void) | |||
506 | 480 | ||
507 | static int gnttab_suspend(void) | 481 | static int gnttab_suspend(void) |
508 | { | 482 | { |
509 | apply_to_page_range(&init_mm, (unsigned long)shared, | 483 | arch_gnttab_unmap_shared(shared, nr_grant_frames); |
510 | PAGE_SIZE * nr_grant_frames, | ||
511 | unmap_pte_fn, NULL); | ||
512 | |||
513 | return 0; | 484 | return 0; |
514 | } | 485 | } |
515 | 486 | ||
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 9fd2f70ab46d..0f86b0ff7879 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c | |||
@@ -399,7 +399,7 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) | |||
399 | 399 | ||
400 | *vaddr = NULL; | 400 | *vaddr = NULL; |
401 | 401 | ||
402 | area = alloc_vm_area(PAGE_SIZE); | 402 | area = xen_alloc_vm_area(PAGE_SIZE); |
403 | if (!area) | 403 | if (!area) |
404 | return -ENOMEM; | 404 | return -ENOMEM; |
405 | 405 | ||
@@ -409,7 +409,7 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) | |||
409 | BUG(); | 409 | BUG(); |
410 | 410 | ||
411 | if (op.status != GNTST_okay) { | 411 | if (op.status != GNTST_okay) { |
412 | free_vm_area(area); | 412 | xen_free_vm_area(area); |
413 | xenbus_dev_fatal(dev, op.status, | 413 | xenbus_dev_fatal(dev, op.status, |
414 | "mapping in shared page %d from domain %d", | 414 | "mapping in shared page %d from domain %d", |
415 | gnt_ref, dev->otherend_id); | 415 | gnt_ref, dev->otherend_id); |
@@ -508,7 +508,7 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) | |||
508 | BUG(); | 508 | BUG(); |
509 | 509 | ||
510 | if (op.status == GNTST_okay) | 510 | if (op.status == GNTST_okay) |
511 | free_vm_area(area); | 511 | xen_free_vm_area(area); |
512 | else | 512 | else |
513 | xenbus_dev_error(dev, op.status, | 513 | xenbus_dev_error(dev, op.status, |
514 | "unmapping page at handle %d error %d", | 514 | "unmapping page at handle %d error %d", |
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 4750de316ad3..57ceb5346b74 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c | |||
@@ -88,6 +88,16 @@ int xenbus_match(struct device *_dev, struct device_driver *_drv) | |||
88 | return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; | 88 | return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; |
89 | } | 89 | } |
90 | 90 | ||
91 | static int xenbus_uevent(struct device *_dev, struct kobj_uevent_env *env) | ||
92 | { | ||
93 | struct xenbus_device *dev = to_xenbus_device(_dev); | ||
94 | |||
95 | if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype)) | ||
96 | return -ENOMEM; | ||
97 | |||
98 | return 0; | ||
99 | } | ||
100 | |||
91 | /* device/<type>/<id> => <type>-<id> */ | 101 | /* device/<type>/<id> => <type>-<id> */ |
92 | static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename) | 102 | static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename) |
93 | { | 103 | { |
@@ -166,6 +176,7 @@ static struct xen_bus_type xenbus_frontend = { | |||
166 | .bus = { | 176 | .bus = { |
167 | .name = "xen", | 177 | .name = "xen", |
168 | .match = xenbus_match, | 178 | .match = xenbus_match, |
179 | .uevent = xenbus_uevent, | ||
169 | .probe = xenbus_dev_probe, | 180 | .probe = xenbus_dev_probe, |
170 | .remove = xenbus_dev_remove, | 181 | .remove = xenbus_dev_remove, |
171 | .shutdown = xenbus_dev_shutdown, | 182 | .shutdown = xenbus_dev_shutdown, |
@@ -438,6 +449,12 @@ static ssize_t xendev_show_devtype(struct device *dev, | |||
438 | } | 449 | } |
439 | DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); | 450 | DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); |
440 | 451 | ||
452 | static ssize_t xendev_show_modalias(struct device *dev, | ||
453 | struct device_attribute *attr, char *buf) | ||
454 | { | ||
455 | return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); | ||
456 | } | ||
457 | DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); | ||
441 | 458 | ||
442 | int xenbus_probe_node(struct xen_bus_type *bus, | 459 | int xenbus_probe_node(struct xen_bus_type *bus, |
443 | const char *type, | 460 | const char *type, |
@@ -492,10 +509,16 @@ int xenbus_probe_node(struct xen_bus_type *bus, | |||
492 | 509 | ||
493 | err = device_create_file(&xendev->dev, &dev_attr_devtype); | 510 | err = device_create_file(&xendev->dev, &dev_attr_devtype); |
494 | if (err) | 511 | if (err) |
495 | goto fail_remove_file; | 512 | goto fail_remove_nodename; |
513 | |||
514 | err = device_create_file(&xendev->dev, &dev_attr_modalias); | ||
515 | if (err) | ||
516 | goto fail_remove_devtype; | ||
496 | 517 | ||
497 | return 0; | 518 | return 0; |
498 | fail_remove_file: | 519 | fail_remove_devtype: |
520 | device_remove_file(&xendev->dev, &dev_attr_devtype); | ||
521 | fail_remove_nodename: | ||
499 | device_remove_file(&xendev->dev, &dev_attr_nodename); | 522 | device_remove_file(&xendev->dev, &dev_attr_nodename); |
500 | fail_unregister: | 523 | fail_unregister: |
501 | device_unregister(&xendev->dev); | 524 | device_unregister(&xendev->dev); |
@@ -846,6 +869,7 @@ static int is_disconnected_device(struct device *dev, void *data) | |||
846 | { | 869 | { |
847 | struct xenbus_device *xendev = to_xenbus_device(dev); | 870 | struct xenbus_device *xendev = to_xenbus_device(dev); |
848 | struct device_driver *drv = data; | 871 | struct device_driver *drv = data; |
872 | struct xenbus_driver *xendrv; | ||
849 | 873 | ||
850 | /* | 874 | /* |
851 | * A device with no driver will never connect. We care only about | 875 | * A device with no driver will never connect. We care only about |
@@ -858,7 +882,9 @@ static int is_disconnected_device(struct device *dev, void *data) | |||
858 | if (drv && (dev->driver != drv)) | 882 | if (drv && (dev->driver != drv)) |
859 | return 0; | 883 | return 0; |
860 | 884 | ||
861 | return (xendev->state != XenbusStateConnected); | 885 | xendrv = to_xenbus_driver(dev->driver); |
886 | return (xendev->state != XenbusStateConnected || | ||
887 | (xendrv->is_ready && !xendrv->is_ready(xendev))); | ||
862 | } | 888 | } |
863 | 889 | ||
864 | static int exists_disconnected_device(struct device_driver *drv) | 890 | static int exists_disconnected_device(struct device_driver *drv) |
diff --git a/drivers/xen/xencomm.c b/drivers/xen/xencomm.c new file mode 100644 index 000000000000..797cb4e31f07 --- /dev/null +++ b/drivers/xen/xencomm.c | |||
@@ -0,0 +1,232 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
15 | * | ||
16 | * Copyright (C) IBM Corp. 2006 | ||
17 | * | ||
18 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | ||
19 | */ | ||
20 | |||
21 | #include <linux/gfp.h> | ||
22 | #include <linux/mm.h> | ||
23 | #include <asm/page.h> | ||
24 | #include <xen/xencomm.h> | ||
25 | #include <xen/interface/xen.h> | ||
26 | #ifdef __ia64__ | ||
27 | #include <asm/xen/xencomm.h> /* for is_kern_addr() */ | ||
28 | #endif | ||
29 | |||
30 | #ifdef HAVE_XEN_PLATFORM_COMPAT_H | ||
31 | #include <xen/platform-compat.h> | ||
32 | #endif | ||
33 | |||
34 | static int xencomm_init(struct xencomm_desc *desc, | ||
35 | void *buffer, unsigned long bytes) | ||
36 | { | ||
37 | unsigned long recorded = 0; | ||
38 | int i = 0; | ||
39 | |||
40 | while ((recorded < bytes) && (i < desc->nr_addrs)) { | ||
41 | unsigned long vaddr = (unsigned long)buffer + recorded; | ||
42 | unsigned long paddr; | ||
43 | int offset; | ||
44 | int chunksz; | ||
45 | |||
46 | offset = vaddr % PAGE_SIZE; /* handle partial pages */ | ||
47 | chunksz = min(PAGE_SIZE - offset, bytes - recorded); | ||
48 | |||
49 | paddr = xencomm_vtop(vaddr); | ||
50 | if (paddr == ~0UL) { | ||
51 | printk(KERN_DEBUG "%s: couldn't translate vaddr %lx\n", | ||
52 | __func__, vaddr); | ||
53 | return -EINVAL; | ||
54 | } | ||
55 | |||
56 | desc->address[i++] = paddr; | ||
57 | recorded += chunksz; | ||
58 | } | ||
59 | |||
60 | if (recorded < bytes) { | ||
61 | printk(KERN_DEBUG | ||
62 | "%s: could only translate %ld of %ld bytes\n", | ||
63 | __func__, recorded, bytes); | ||
64 | return -ENOSPC; | ||
65 | } | ||
66 | |||
67 | /* mark remaining addresses invalid (just for safety) */ | ||
68 | while (i < desc->nr_addrs) | ||
69 | desc->address[i++] = XENCOMM_INVALID; | ||
70 | |||
71 | desc->magic = XENCOMM_MAGIC; | ||
72 | |||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | static struct xencomm_desc *xencomm_alloc(gfp_t gfp_mask, | ||
77 | void *buffer, unsigned long bytes) | ||
78 | { | ||
79 | struct xencomm_desc *desc; | ||
80 | unsigned long buffer_ulong = (unsigned long)buffer; | ||
81 | unsigned long start = buffer_ulong & PAGE_MASK; | ||
82 | unsigned long end = (buffer_ulong + bytes) | ~PAGE_MASK; | ||
83 | unsigned long nr_addrs = (end - start + 1) >> PAGE_SHIFT; | ||
84 | unsigned long size = sizeof(*desc) + | ||
85 | sizeof(desc->address[0]) * nr_addrs; | ||
86 | |||
87 | /* | ||
88 | * slab allocator returns at least sizeof(void*) aligned pointer. | ||
89 | * When sizeof(*desc) > sizeof(void*), struct xencomm_desc might | ||
90 | * cross page boundary. | ||
91 | */ | ||
92 | if (sizeof(*desc) > sizeof(void *)) { | ||
93 | unsigned long order = get_order(size); | ||
94 | desc = (struct xencomm_desc *)__get_free_pages(gfp_mask, | ||
95 | order); | ||
96 | if (desc == NULL) | ||
97 | return NULL; | ||
98 | |||
99 | desc->nr_addrs = | ||
100 | ((PAGE_SIZE << order) - sizeof(struct xencomm_desc)) / | ||
101 | sizeof(*desc->address); | ||
102 | } else { | ||
103 | desc = kmalloc(size, gfp_mask); | ||
104 | if (desc == NULL) | ||
105 | return NULL; | ||
106 | |||
107 | desc->nr_addrs = nr_addrs; | ||
108 | } | ||
109 | return desc; | ||
110 | } | ||
111 | |||
112 | void xencomm_free(struct xencomm_handle *desc) | ||
113 | { | ||
114 | if (desc && !((ulong)desc & XENCOMM_INLINE_FLAG)) { | ||
115 | struct xencomm_desc *desc__ = (struct xencomm_desc *)desc; | ||
116 | if (sizeof(*desc__) > sizeof(void *)) { | ||
117 | unsigned long size = sizeof(*desc__) + | ||
118 | sizeof(desc__->address[0]) * desc__->nr_addrs; | ||
119 | unsigned long order = get_order(size); | ||
120 | free_pages((unsigned long)__va(desc), order); | ||
121 | } else | ||
122 | kfree(__va(desc)); | ||
123 | } | ||
124 | } | ||
125 | |||
126 | static int xencomm_create(void *buffer, unsigned long bytes, | ||
127 | struct xencomm_desc **ret, gfp_t gfp_mask) | ||
128 | { | ||
129 | struct xencomm_desc *desc; | ||
130 | int rc; | ||
131 | |||
132 | pr_debug("%s: %p[%ld]\n", __func__, buffer, bytes); | ||
133 | |||
134 | if (bytes == 0) { | ||
135 | /* don't create a descriptor; Xen recognizes NULL. */ | ||
136 | BUG_ON(buffer != NULL); | ||
137 | *ret = NULL; | ||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | BUG_ON(buffer == NULL); /* 'bytes' is non-zero */ | ||
142 | |||
143 | desc = xencomm_alloc(gfp_mask, buffer, bytes); | ||
144 | if (!desc) { | ||
145 | printk(KERN_DEBUG "%s failure\n", "xencomm_alloc"); | ||
146 | return -ENOMEM; | ||
147 | } | ||
148 | |||
149 | rc = xencomm_init(desc, buffer, bytes); | ||
150 | if (rc) { | ||
151 | printk(KERN_DEBUG "%s failure: %d\n", "xencomm_init", rc); | ||
152 | xencomm_free((struct xencomm_handle *)__pa(desc)); | ||
153 | return rc; | ||
154 | } | ||
155 | |||
156 | *ret = desc; | ||
157 | return 0; | ||
158 | } | ||
159 | |||
160 | /* check if memory address is within VMALLOC region */ | ||
161 | static int is_phys_contiguous(unsigned long addr) | ||
162 | { | ||
163 | if (!is_kernel_addr(addr)) | ||
164 | return 0; | ||
165 | |||
166 | return (addr < VMALLOC_START) || (addr >= VMALLOC_END); | ||
167 | } | ||
168 | |||
169 | static struct xencomm_handle *xencomm_create_inline(void *ptr) | ||
170 | { | ||
171 | unsigned long paddr; | ||
172 | |||
173 | BUG_ON(!is_phys_contiguous((unsigned long)ptr)); | ||
174 | |||
175 | paddr = (unsigned long)xencomm_pa(ptr); | ||
176 | BUG_ON(paddr & XENCOMM_INLINE_FLAG); | ||
177 | return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG); | ||
178 | } | ||
179 | |||
180 | /* "mini" routine, for stack-based communications: */ | ||
181 | static int xencomm_create_mini(void *buffer, | ||
182 | unsigned long bytes, struct xencomm_mini *xc_desc, | ||
183 | struct xencomm_desc **ret) | ||
184 | { | ||
185 | int rc = 0; | ||
186 | struct xencomm_desc *desc; | ||
187 | BUG_ON(((unsigned long)xc_desc) % sizeof(*xc_desc) != 0); | ||
188 | |||
189 | desc = (void *)xc_desc; | ||
190 | |||
191 | desc->nr_addrs = XENCOMM_MINI_ADDRS; | ||
192 | |||
193 | rc = xencomm_init(desc, buffer, bytes); | ||
194 | if (!rc) | ||
195 | *ret = desc; | ||
196 | |||
197 | return rc; | ||
198 | } | ||
199 | |||
200 | struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes) | ||
201 | { | ||
202 | int rc; | ||
203 | struct xencomm_desc *desc; | ||
204 | |||
205 | if (is_phys_contiguous((unsigned long)ptr)) | ||
206 | return xencomm_create_inline(ptr); | ||
207 | |||
208 | rc = xencomm_create(ptr, bytes, &desc, GFP_KERNEL); | ||
209 | |||
210 | if (rc || desc == NULL) | ||
211 | return NULL; | ||
212 | |||
213 | return xencomm_pa(desc); | ||
214 | } | ||
215 | |||
216 | struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, unsigned long bytes, | ||
217 | struct xencomm_mini *xc_desc) | ||
218 | { | ||
219 | int rc; | ||
220 | struct xencomm_desc *desc = NULL; | ||
221 | |||
222 | if (is_phys_contiguous((unsigned long)ptr)) | ||
223 | return xencomm_create_inline(ptr); | ||
224 | |||
225 | rc = xencomm_create_mini(ptr, bytes, xc_desc, | ||
226 | &desc); | ||
227 | |||
228 | if (rc) | ||
229 | return NULL; | ||
230 | |||
231 | return xencomm_pa(desc); | ||
232 | } | ||