diff options
Diffstat (limited to 'drivers/xen')
28 files changed, 4538 insertions, 907 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 60d71e9abe9f..a59638b37c1a 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig | |||
@@ -29,6 +29,14 @@ config XEN_DEV_EVTCHN | |||
29 | firing. | 29 | firing. |
30 | If in doubt, say yes. | 30 | If in doubt, say yes. |
31 | 31 | ||
32 | config XEN_BACKEND | ||
33 | bool "Backend driver support" | ||
34 | depends on XEN_DOM0 | ||
35 | default y | ||
36 | help | ||
37 | Support for backend device drivers that provide I/O services | ||
38 | to other virtual machines. | ||
39 | |||
32 | config XENFS | 40 | config XENFS |
33 | tristate "Xen filesystem" | 41 | tristate "Xen filesystem" |
34 | default y | 42 | default y |
@@ -62,9 +70,29 @@ config XEN_SYS_HYPERVISOR | |||
62 | virtual environment, /sys/hypervisor will still be present, | 70 | virtual environment, /sys/hypervisor will still be present, |
63 | but will have no xen contents. | 71 | but will have no xen contents. |
64 | 72 | ||
73 | config XEN_XENBUS_FRONTEND | ||
74 | tristate | ||
75 | |||
76 | config XEN_GNTDEV | ||
77 | tristate "userspace grant access device driver" | ||
78 | depends on XEN | ||
79 | default m | ||
80 | select MMU_NOTIFIER | ||
81 | help | ||
82 | Allows userspace processes to use grants. | ||
83 | |||
84 | config XEN_GRANT_DEV_ALLOC | ||
85 | tristate "User-space grant reference allocator driver" | ||
86 | depends on XEN | ||
87 | default m | ||
88 | help | ||
89 | Allows userspace processes to create pages with access granted | ||
90 | to other domains. This can be used to implement frontend drivers | ||
91 | or as part of an inter-domain shared memory channel. | ||
92 | |||
65 | config XEN_PLATFORM_PCI | 93 | config XEN_PLATFORM_PCI |
66 | tristate "xen platform pci device driver" | 94 | tristate "xen platform pci device driver" |
67 | depends on XEN_PVHVM | 95 | depends on XEN_PVHVM && PCI |
68 | default m | 96 | default m |
69 | help | 97 | help |
70 | Driver for the Xen PCI Platform device: it is responsible for | 98 | Driver for the Xen PCI Platform device: it is responsible for |
@@ -74,6 +102,7 @@ config XEN_PLATFORM_PCI | |||
74 | 102 | ||
75 | config SWIOTLB_XEN | 103 | config SWIOTLB_XEN |
76 | def_bool y | 104 | def_bool y |
77 | depends on SWIOTLB | 105 | depends on PCI |
106 | select SWIOTLB | ||
78 | 107 | ||
79 | endmenu | 108 | endmenu |
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index fcaf838f54be..bbc18258ecc5 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile | |||
@@ -1,14 +1,25 @@ | |||
1 | obj-y += grant-table.o features.o events.o manage.o | 1 | obj-y += grant-table.o features.o events.o manage.o balloon.o |
2 | obj-y += xenbus/ | 2 | obj-y += xenbus/ |
3 | obj-y += tmem.o | ||
3 | 4 | ||
4 | nostackp := $(call cc-option, -fno-stack-protector) | 5 | nostackp := $(call cc-option, -fno-stack-protector) |
5 | CFLAGS_features.o := $(nostackp) | 6 | CFLAGS_features.o := $(nostackp) |
6 | 7 | ||
7 | obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o | 8 | obj-$(CONFIG_BLOCK) += biomerge.o |
8 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o | 9 | obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o |
9 | obj-$(CONFIG_XEN_BALLOON) += balloon.o | 10 | obj-$(CONFIG_XEN_XENCOMM) += xencomm.o |
10 | obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o | 11 | obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o |
11 | obj-$(CONFIG_XENFS) += xenfs/ | 12 | obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o |
13 | obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o | ||
14 | obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o | ||
15 | obj-$(CONFIG_XENFS) += xenfs/ | ||
12 | obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o | 16 | obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o |
13 | obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o | 17 | obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o |
14 | obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o | 18 | obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o |
19 | obj-$(CONFIG_XEN_DOM0) += pci.o | ||
20 | |||
21 | xen-evtchn-y := evtchn.o | ||
22 | xen-gntdev-y := gntdev.o | ||
23 | xen-gntalloc-y := gntalloc.o | ||
24 | |||
25 | xen-platform-pci-y := platform-pci.o | ||
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 500290b150bb..f54290baa3db 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c | |||
@@ -1,6 +1,4 @@ | |||
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | * balloon.c | ||
3 | * | ||
4 | * Xen balloon driver - enables returning/claiming memory to/from Xen. | 2 | * Xen balloon driver - enables returning/claiming memory to/from Xen. |
5 | * | 3 | * |
6 | * Copyright (c) 2003, B Dragovic | 4 | * Copyright (c) 2003, B Dragovic |
@@ -33,7 +31,6 @@ | |||
33 | */ | 31 | */ |
34 | 32 | ||
35 | #include <linux/kernel.h> | 33 | #include <linux/kernel.h> |
36 | #include <linux/module.h> | ||
37 | #include <linux/sched.h> | 34 | #include <linux/sched.h> |
38 | #include <linux/errno.h> | 35 | #include <linux/errno.h> |
39 | #include <linux/mm.h> | 36 | #include <linux/mm.h> |
@@ -42,14 +39,13 @@ | |||
42 | #include <linux/highmem.h> | 39 | #include <linux/highmem.h> |
43 | #include <linux/mutex.h> | 40 | #include <linux/mutex.h> |
44 | #include <linux/list.h> | 41 | #include <linux/list.h> |
45 | #include <linux/sysdev.h> | ||
46 | #include <linux/gfp.h> | 42 | #include <linux/gfp.h> |
47 | 43 | ||
48 | #include <asm/page.h> | 44 | #include <asm/page.h> |
49 | #include <asm/pgalloc.h> | 45 | #include <asm/pgalloc.h> |
50 | #include <asm/pgtable.h> | 46 | #include <asm/pgtable.h> |
51 | #include <asm/uaccess.h> | ||
52 | #include <asm/tlb.h> | 47 | #include <asm/tlb.h> |
48 | #include <asm/e820.h> | ||
53 | 49 | ||
54 | #include <asm/xen/hypervisor.h> | 50 | #include <asm/xen/hypervisor.h> |
55 | #include <asm/xen/hypercall.h> | 51 | #include <asm/xen/hypercall.h> |
@@ -57,35 +53,29 @@ | |||
57 | #include <xen/xen.h> | 53 | #include <xen/xen.h> |
58 | #include <xen/interface/xen.h> | 54 | #include <xen/interface/xen.h> |
59 | #include <xen/interface/memory.h> | 55 | #include <xen/interface/memory.h> |
60 | #include <xen/xenbus.h> | 56 | #include <xen/balloon.h> |
61 | #include <xen/features.h> | 57 | #include <xen/features.h> |
62 | #include <xen/page.h> | 58 | #include <xen/page.h> |
63 | 59 | ||
64 | #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) | 60 | /* |
65 | 61 | * balloon_process() state: | |
66 | #define BALLOON_CLASS_NAME "xen_memory" | 62 | * |
63 | * BP_DONE: done or nothing to do, | ||
64 | * BP_EAGAIN: error, go to sleep, | ||
65 | * BP_ECANCELED: error, balloon operation canceled. | ||
66 | */ | ||
67 | 67 | ||
68 | struct balloon_stats { | 68 | enum bp_state { |
69 | /* We aim for 'current allocation' == 'target allocation'. */ | 69 | BP_DONE, |
70 | unsigned long current_pages; | 70 | BP_EAGAIN, |
71 | unsigned long target_pages; | 71 | BP_ECANCELED |
72 | /* | ||
73 | * Drivers may alter the memory reservation independently, but they | ||
74 | * must inform the balloon driver so we avoid hitting the hard limit. | ||
75 | */ | ||
76 | unsigned long driver_pages; | ||
77 | /* Number of pages in high- and low-memory balloons. */ | ||
78 | unsigned long balloon_low; | ||
79 | unsigned long balloon_high; | ||
80 | }; | 72 | }; |
81 | 73 | ||
82 | static DEFINE_MUTEX(balloon_mutex); | ||
83 | |||
84 | static struct sys_device balloon_sysdev; | ||
85 | 74 | ||
86 | static int register_balloon(struct sys_device *sysdev); | 75 | static DEFINE_MUTEX(balloon_mutex); |
87 | 76 | ||
88 | static struct balloon_stats balloon_stats; | 77 | struct balloon_stats balloon_stats; |
78 | EXPORT_SYMBOL_GPL(balloon_stats); | ||
89 | 79 | ||
90 | /* We increase/decrease in batches which fit in a page */ | 80 | /* We increase/decrease in batches which fit in a page */ |
91 | static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; | 81 | static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; |
@@ -103,8 +93,7 @@ static LIST_HEAD(ballooned_pages); | |||
103 | 93 | ||
104 | /* Main work function, always executed in process context. */ | 94 | /* Main work function, always executed in process context. */ |
105 | static void balloon_process(struct work_struct *work); | 95 | static void balloon_process(struct work_struct *work); |
106 | static DECLARE_WORK(balloon_worker, balloon_process); | 96 | static DECLARE_DELAYED_WORK(balloon_worker, balloon_process); |
107 | static struct timer_list balloon_timer; | ||
108 | 97 | ||
109 | /* When ballooning out (allocating memory to return to Xen) we don't really | 98 | /* When ballooning out (allocating memory to return to Xen) we don't really |
110 | want the kernel to try too hard since that can trigger the oom killer. */ | 99 | want the kernel to try too hard since that can trigger the oom killer. */ |
@@ -119,30 +108,38 @@ static void scrub_page(struct page *page) | |||
119 | } | 108 | } |
120 | 109 | ||
121 | /* balloon_append: add the given page to the balloon. */ | 110 | /* balloon_append: add the given page to the balloon. */ |
122 | static void balloon_append(struct page *page) | 111 | static void __balloon_append(struct page *page) |
123 | { | 112 | { |
124 | /* Lowmem is re-populated first, so highmem pages go at list tail. */ | 113 | /* Lowmem is re-populated first, so highmem pages go at list tail. */ |
125 | if (PageHighMem(page)) { | 114 | if (PageHighMem(page)) { |
126 | list_add_tail(&page->lru, &ballooned_pages); | 115 | list_add_tail(&page->lru, &ballooned_pages); |
127 | balloon_stats.balloon_high++; | 116 | balloon_stats.balloon_high++; |
128 | dec_totalhigh_pages(); | ||
129 | } else { | 117 | } else { |
130 | list_add(&page->lru, &ballooned_pages); | 118 | list_add(&page->lru, &ballooned_pages); |
131 | balloon_stats.balloon_low++; | 119 | balloon_stats.balloon_low++; |
132 | } | 120 | } |
121 | } | ||
133 | 122 | ||
123 | static void balloon_append(struct page *page) | ||
124 | { | ||
125 | __balloon_append(page); | ||
126 | if (PageHighMem(page)) | ||
127 | dec_totalhigh_pages(); | ||
134 | totalram_pages--; | 128 | totalram_pages--; |
135 | } | 129 | } |
136 | 130 | ||
137 | /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ | 131 | /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ |
138 | static struct page *balloon_retrieve(void) | 132 | static struct page *balloon_retrieve(bool prefer_highmem) |
139 | { | 133 | { |
140 | struct page *page; | 134 | struct page *page; |
141 | 135 | ||
142 | if (list_empty(&ballooned_pages)) | 136 | if (list_empty(&ballooned_pages)) |
143 | return NULL; | 137 | return NULL; |
144 | 138 | ||
145 | page = list_entry(ballooned_pages.next, struct page, lru); | 139 | if (prefer_highmem) |
140 | page = list_entry(ballooned_pages.prev, struct page, lru); | ||
141 | else | ||
142 | page = list_entry(ballooned_pages.next, struct page, lru); | ||
146 | list_del(&page->lru); | 143 | list_del(&page->lru); |
147 | 144 | ||
148 | if (PageHighMem(page)) { | 145 | if (PageHighMem(page)) { |
@@ -172,12 +169,32 @@ static struct page *balloon_next_page(struct page *page) | |||
172 | return list_entry(next, struct page, lru); | 169 | return list_entry(next, struct page, lru); |
173 | } | 170 | } |
174 | 171 | ||
175 | static void balloon_alarm(unsigned long unused) | 172 | static enum bp_state update_schedule(enum bp_state state) |
176 | { | 173 | { |
177 | schedule_work(&balloon_worker); | 174 | if (state == BP_DONE) { |
175 | balloon_stats.schedule_delay = 1; | ||
176 | balloon_stats.retry_count = 1; | ||
177 | return BP_DONE; | ||
178 | } | ||
179 | |||
180 | ++balloon_stats.retry_count; | ||
181 | |||
182 | if (balloon_stats.max_retry_count != RETRY_UNLIMITED && | ||
183 | balloon_stats.retry_count > balloon_stats.max_retry_count) { | ||
184 | balloon_stats.schedule_delay = 1; | ||
185 | balloon_stats.retry_count = 1; | ||
186 | return BP_ECANCELED; | ||
187 | } | ||
188 | |||
189 | balloon_stats.schedule_delay <<= 1; | ||
190 | |||
191 | if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay) | ||
192 | balloon_stats.schedule_delay = balloon_stats.max_schedule_delay; | ||
193 | |||
194 | return BP_EAGAIN; | ||
178 | } | 195 | } |
179 | 196 | ||
180 | static unsigned long current_target(void) | 197 | static long current_credit(void) |
181 | { | 198 | { |
182 | unsigned long target = balloon_stats.target_pages; | 199 | unsigned long target = balloon_stats.target_pages; |
183 | 200 | ||
@@ -186,14 +203,14 @@ static unsigned long current_target(void) | |||
186 | balloon_stats.balloon_low + | 203 | balloon_stats.balloon_low + |
187 | balloon_stats.balloon_high); | 204 | balloon_stats.balloon_high); |
188 | 205 | ||
189 | return target; | 206 | return target - balloon_stats.current_pages; |
190 | } | 207 | } |
191 | 208 | ||
192 | static int increase_reservation(unsigned long nr_pages) | 209 | static enum bp_state increase_reservation(unsigned long nr_pages) |
193 | { | 210 | { |
194 | unsigned long pfn, i, flags; | 211 | int rc; |
212 | unsigned long pfn, i; | ||
195 | struct page *page; | 213 | struct page *page; |
196 | long rc; | ||
197 | struct xen_memory_reservation reservation = { | 214 | struct xen_memory_reservation reservation = { |
198 | .address_bits = 0, | 215 | .address_bits = 0, |
199 | .extent_order = 0, | 216 | .extent_order = 0, |
@@ -203,11 +220,12 @@ static int increase_reservation(unsigned long nr_pages) | |||
203 | if (nr_pages > ARRAY_SIZE(frame_list)) | 220 | if (nr_pages > ARRAY_SIZE(frame_list)) |
204 | nr_pages = ARRAY_SIZE(frame_list); | 221 | nr_pages = ARRAY_SIZE(frame_list); |
205 | 222 | ||
206 | spin_lock_irqsave(&xen_reservation_lock, flags); | ||
207 | |||
208 | page = balloon_first_page(); | 223 | page = balloon_first_page(); |
209 | for (i = 0; i < nr_pages; i++) { | 224 | for (i = 0; i < nr_pages; i++) { |
210 | BUG_ON(page == NULL); | 225 | if (!page) { |
226 | nr_pages = i; | ||
227 | break; | ||
228 | } | ||
211 | frame_list[i] = page_to_pfn(page); | 229 | frame_list[i] = page_to_pfn(page); |
212 | page = balloon_next_page(page); | 230 | page = balloon_next_page(page); |
213 | } | 231 | } |
@@ -215,11 +233,11 @@ static int increase_reservation(unsigned long nr_pages) | |||
215 | set_xen_guest_handle(reservation.extent_start, frame_list); | 233 | set_xen_guest_handle(reservation.extent_start, frame_list); |
216 | reservation.nr_extents = nr_pages; | 234 | reservation.nr_extents = nr_pages; |
217 | rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); | 235 | rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); |
218 | if (rc < 0) | 236 | if (rc <= 0) |
219 | goto out; | 237 | return BP_EAGAIN; |
220 | 238 | ||
221 | for (i = 0; i < rc; i++) { | 239 | for (i = 0; i < rc; i++) { |
222 | page = balloon_retrieve(); | 240 | page = balloon_retrieve(false); |
223 | BUG_ON(page == NULL); | 241 | BUG_ON(page == NULL); |
224 | 242 | ||
225 | pfn = page_to_pfn(page); | 243 | pfn = page_to_pfn(page); |
@@ -229,7 +247,7 @@ static int increase_reservation(unsigned long nr_pages) | |||
229 | set_phys_to_machine(pfn, frame_list[i]); | 247 | set_phys_to_machine(pfn, frame_list[i]); |
230 | 248 | ||
231 | /* Link back into the page tables if not highmem. */ | 249 | /* Link back into the page tables if not highmem. */ |
232 | if (pfn < max_low_pfn) { | 250 | if (xen_pv_domain() && !PageHighMem(page)) { |
233 | int ret; | 251 | int ret; |
234 | ret = HYPERVISOR_update_va_mapping( | 252 | ret = HYPERVISOR_update_va_mapping( |
235 | (unsigned long)__va(pfn << PAGE_SHIFT), | 253 | (unsigned long)__va(pfn << PAGE_SHIFT), |
@@ -246,17 +264,14 @@ static int increase_reservation(unsigned long nr_pages) | |||
246 | 264 | ||
247 | balloon_stats.current_pages += rc; | 265 | balloon_stats.current_pages += rc; |
248 | 266 | ||
249 | out: | 267 | return BP_DONE; |
250 | spin_unlock_irqrestore(&xen_reservation_lock, flags); | ||
251 | |||
252 | return rc < 0 ? rc : rc != nr_pages; | ||
253 | } | 268 | } |
254 | 269 | ||
255 | static int decrease_reservation(unsigned long nr_pages) | 270 | static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) |
256 | { | 271 | { |
257 | unsigned long pfn, i, flags; | 272 | enum bp_state state = BP_DONE; |
273 | unsigned long pfn, i; | ||
258 | struct page *page; | 274 | struct page *page; |
259 | int need_sleep = 0; | ||
260 | int ret; | 275 | int ret; |
261 | struct xen_memory_reservation reservation = { | 276 | struct xen_memory_reservation reservation = { |
262 | .address_bits = 0, | 277 | .address_bits = 0, |
@@ -268,9 +283,9 @@ static int decrease_reservation(unsigned long nr_pages) | |||
268 | nr_pages = ARRAY_SIZE(frame_list); | 283 | nr_pages = ARRAY_SIZE(frame_list); |
269 | 284 | ||
270 | for (i = 0; i < nr_pages; i++) { | 285 | for (i = 0; i < nr_pages; i++) { |
271 | if ((page = alloc_page(GFP_BALLOON)) == NULL) { | 286 | if ((page = alloc_page(gfp)) == NULL) { |
272 | nr_pages = i; | 287 | nr_pages = i; |
273 | need_sleep = 1; | 288 | state = BP_EAGAIN; |
274 | break; | 289 | break; |
275 | } | 290 | } |
276 | 291 | ||
@@ -279,7 +294,7 @@ static int decrease_reservation(unsigned long nr_pages) | |||
279 | 294 | ||
280 | scrub_page(page); | 295 | scrub_page(page); |
281 | 296 | ||
282 | if (!PageHighMem(page)) { | 297 | if (xen_pv_domain() && !PageHighMem(page)) { |
283 | ret = HYPERVISOR_update_va_mapping( | 298 | ret = HYPERVISOR_update_va_mapping( |
284 | (unsigned long)__va(pfn << PAGE_SHIFT), | 299 | (unsigned long)__va(pfn << PAGE_SHIFT), |
285 | __pte_ma(0), 0); | 300 | __pte_ma(0), 0); |
@@ -292,12 +307,10 @@ static int decrease_reservation(unsigned long nr_pages) | |||
292 | kmap_flush_unused(); | 307 | kmap_flush_unused(); |
293 | flush_tlb_all(); | 308 | flush_tlb_all(); |
294 | 309 | ||
295 | spin_lock_irqsave(&xen_reservation_lock, flags); | ||
296 | |||
297 | /* No more mappings: invalidate P2M and add to balloon. */ | 310 | /* No more mappings: invalidate P2M and add to balloon. */ |
298 | for (i = 0; i < nr_pages; i++) { | 311 | for (i = 0; i < nr_pages; i++) { |
299 | pfn = mfn_to_pfn(frame_list[i]); | 312 | pfn = mfn_to_pfn(frame_list[i]); |
300 | set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 313 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
301 | balloon_append(pfn_to_page(pfn)); | 314 | balloon_append(pfn_to_page(pfn)); |
302 | } | 315 | } |
303 | 316 | ||
@@ -308,9 +321,7 @@ static int decrease_reservation(unsigned long nr_pages) | |||
308 | 321 | ||
309 | balloon_stats.current_pages -= nr_pages; | 322 | balloon_stats.current_pages -= nr_pages; |
310 | 323 | ||
311 | spin_unlock_irqrestore(&xen_reservation_lock, flags); | 324 | return state; |
312 | |||
313 | return need_sleep; | ||
314 | } | 325 | } |
315 | 326 | ||
316 | /* | 327 | /* |
@@ -321,254 +332,145 @@ static int decrease_reservation(unsigned long nr_pages) | |||
321 | */ | 332 | */ |
322 | static void balloon_process(struct work_struct *work) | 333 | static void balloon_process(struct work_struct *work) |
323 | { | 334 | { |
324 | int need_sleep = 0; | 335 | enum bp_state state = BP_DONE; |
325 | long credit; | 336 | long credit; |
326 | 337 | ||
327 | mutex_lock(&balloon_mutex); | 338 | mutex_lock(&balloon_mutex); |
328 | 339 | ||
329 | do { | 340 | do { |
330 | credit = current_target() - balloon_stats.current_pages; | 341 | credit = current_credit(); |
342 | |||
331 | if (credit > 0) | 343 | if (credit > 0) |
332 | need_sleep = (increase_reservation(credit) != 0); | 344 | state = increase_reservation(credit); |
345 | |||
333 | if (credit < 0) | 346 | if (credit < 0) |
334 | need_sleep = (decrease_reservation(-credit) != 0); | 347 | state = decrease_reservation(-credit, GFP_BALLOON); |
348 | |||
349 | state = update_schedule(state); | ||
335 | 350 | ||
336 | #ifndef CONFIG_PREEMPT | 351 | #ifndef CONFIG_PREEMPT |
337 | if (need_resched()) | 352 | if (need_resched()) |
338 | schedule(); | 353 | schedule(); |
339 | #endif | 354 | #endif |
340 | } while ((credit != 0) && !need_sleep); | 355 | } while (credit && state == BP_DONE); |
341 | 356 | ||
342 | /* Schedule more work if there is some still to be done. */ | 357 | /* Schedule more work if there is some still to be done. */ |
343 | if (current_target() != balloon_stats.current_pages) | 358 | if (state == BP_EAGAIN) |
344 | mod_timer(&balloon_timer, jiffies + HZ); | 359 | schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ); |
345 | 360 | ||
346 | mutex_unlock(&balloon_mutex); | 361 | mutex_unlock(&balloon_mutex); |
347 | } | 362 | } |
348 | 363 | ||
349 | /* Resets the Xen limit, sets new target, and kicks off processing. */ | 364 | /* Resets the Xen limit, sets new target, and kicks off processing. */ |
350 | static void balloon_set_new_target(unsigned long target) | 365 | void balloon_set_new_target(unsigned long target) |
351 | { | 366 | { |
352 | /* No need for lock. Not read-modify-write updates. */ | 367 | /* No need for lock. Not read-modify-write updates. */ |
353 | balloon_stats.target_pages = target; | 368 | balloon_stats.target_pages = target; |
354 | schedule_work(&balloon_worker); | 369 | schedule_delayed_work(&balloon_worker, 0); |
355 | } | 370 | } |
371 | EXPORT_SYMBOL_GPL(balloon_set_new_target); | ||
356 | 372 | ||
357 | static struct xenbus_watch target_watch = | 373 | /** |
358 | { | 374 | * alloc_xenballooned_pages - get pages that have been ballooned out |
359 | .node = "memory/target" | 375 | * @nr_pages: Number of pages to get |
360 | }; | 376 | * @pages: pages returned |
361 | 377 | * @return 0 on success, error otherwise | |
362 | /* React to a change in the target key */ | 378 | */ |
363 | static void watch_target(struct xenbus_watch *watch, | 379 | int alloc_xenballooned_pages(int nr_pages, struct page** pages) |
364 | const char **vec, unsigned int len) | ||
365 | { | 380 | { |
366 | unsigned long long new_target; | 381 | int pgno = 0; |
367 | int err; | 382 | struct page* page; |
368 | 383 | mutex_lock(&balloon_mutex); | |
369 | err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); | 384 | while (pgno < nr_pages) { |
370 | if (err != 1) { | 385 | page = balloon_retrieve(true); |
371 | /* This is ok (for domain0 at least) - so just return */ | 386 | if (page) { |
372 | return; | 387 | pages[pgno++] = page; |
388 | } else { | ||
389 | enum bp_state st; | ||
390 | st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER); | ||
391 | if (st != BP_DONE) | ||
392 | goto out_undo; | ||
393 | } | ||
373 | } | 394 | } |
374 | 395 | mutex_unlock(&balloon_mutex); | |
375 | /* The given memory/target value is in KiB, so it needs converting to | 396 | return 0; |
376 | * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. | 397 | out_undo: |
377 | */ | 398 | while (pgno) |
378 | balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); | 399 | balloon_append(pages[--pgno]); |
400 | /* Free the memory back to the kernel soon */ | ||
401 | schedule_delayed_work(&balloon_worker, 0); | ||
402 | mutex_unlock(&balloon_mutex); | ||
403 | return -ENOMEM; | ||
379 | } | 404 | } |
405 | EXPORT_SYMBOL(alloc_xenballooned_pages); | ||
380 | 406 | ||
381 | static int balloon_init_watcher(struct notifier_block *notifier, | 407 | /** |
382 | unsigned long event, | 408 | * free_xenballooned_pages - return pages retrieved with get_ballooned_pages |
383 | void *data) | 409 | * @nr_pages: Number of pages |
410 | * @pages: pages to return | ||
411 | */ | ||
412 | void free_xenballooned_pages(int nr_pages, struct page** pages) | ||
384 | { | 413 | { |
385 | int err; | 414 | int i; |
415 | |||
416 | mutex_lock(&balloon_mutex); | ||
386 | 417 | ||
387 | err = register_xenbus_watch(&target_watch); | 418 | for (i = 0; i < nr_pages; i++) { |
388 | if (err) | 419 | if (pages[i]) |
389 | printk(KERN_ERR "Failed to set balloon watcher\n"); | 420 | balloon_append(pages[i]); |
421 | } | ||
390 | 422 | ||
391 | return NOTIFY_DONE; | 423 | /* The balloon may be too large now. Shrink it if needed. */ |
392 | } | 424 | if (current_credit()) |
425 | schedule_delayed_work(&balloon_worker, 0); | ||
393 | 426 | ||
394 | static struct notifier_block xenstore_notifier; | 427 | mutex_unlock(&balloon_mutex); |
428 | } | ||
429 | EXPORT_SYMBOL(free_xenballooned_pages); | ||
395 | 430 | ||
396 | static int __init balloon_init(void) | 431 | static int __init balloon_init(void) |
397 | { | 432 | { |
398 | unsigned long pfn; | 433 | unsigned long pfn, extra_pfn_end; |
399 | struct page *page; | 434 | struct page *page; |
400 | 435 | ||
401 | if (!xen_pv_domain()) | 436 | if (!xen_domain()) |
402 | return -ENODEV; | 437 | return -ENODEV; |
403 | 438 | ||
404 | pr_info("xen_balloon: Initialising balloon driver.\n"); | 439 | pr_info("xen/balloon: Initialising balloon driver.\n"); |
405 | 440 | ||
406 | balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); | 441 | balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages, max_pfn) : max_pfn; |
407 | balloon_stats.target_pages = balloon_stats.current_pages; | 442 | balloon_stats.target_pages = balloon_stats.current_pages; |
408 | balloon_stats.balloon_low = 0; | 443 | balloon_stats.balloon_low = 0; |
409 | balloon_stats.balloon_high = 0; | 444 | balloon_stats.balloon_high = 0; |
410 | balloon_stats.driver_pages = 0UL; | ||
411 | |||
412 | init_timer(&balloon_timer); | ||
413 | balloon_timer.data = 0; | ||
414 | balloon_timer.function = balloon_alarm; | ||
415 | 445 | ||
416 | register_balloon(&balloon_sysdev); | 446 | balloon_stats.schedule_delay = 1; |
447 | balloon_stats.max_schedule_delay = 32; | ||
448 | balloon_stats.retry_count = 1; | ||
449 | balloon_stats.max_retry_count = RETRY_UNLIMITED; | ||
417 | 450 | ||
418 | /* Initialise the balloon with excess memory space. */ | 451 | /* |
419 | for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { | 452 | * Initialise the balloon with excess memory space. We need |
453 | * to make sure we don't add memory which doesn't exist or | ||
454 | * logically exist. The E820 map can be trimmed to be smaller | ||
455 | * than the amount of physical memory due to the mem= command | ||
456 | * line parameter. And if this is a 32-bit non-HIGHMEM kernel | ||
457 | * on a system with memory which requires highmem to access, | ||
458 | * don't try to use it. | ||
459 | */ | ||
460 | extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()), | ||
461 | (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size)); | ||
462 | for (pfn = PFN_UP(xen_extra_mem_start); | ||
463 | pfn < extra_pfn_end; | ||
464 | pfn++) { | ||
420 | page = pfn_to_page(pfn); | 465 | page = pfn_to_page(pfn); |
421 | if (!PageReserved(page)) | 466 | /* totalram_pages and totalhigh_pages do not include the boot-time |
422 | balloon_append(page); | 467 | balloon extension, so don't subtract from it. */ |
468 | __balloon_append(page); | ||
423 | } | 469 | } |
424 | 470 | ||
425 | target_watch.callback = watch_target; | ||
426 | xenstore_notifier.notifier_call = balloon_init_watcher; | ||
427 | |||
428 | register_xenstore_notifier(&xenstore_notifier); | ||
429 | |||
430 | return 0; | 471 | return 0; |
431 | } | 472 | } |
432 | 473 | ||
433 | subsys_initcall(balloon_init); | 474 | subsys_initcall(balloon_init); |
434 | 475 | ||
435 | static void balloon_exit(void) | ||
436 | { | ||
437 | /* XXX - release balloon here */ | ||
438 | return; | ||
439 | } | ||
440 | |||
441 | module_exit(balloon_exit); | ||
442 | |||
443 | #define BALLOON_SHOW(name, format, args...) \ | ||
444 | static ssize_t show_##name(struct sys_device *dev, \ | ||
445 | struct sysdev_attribute *attr, \ | ||
446 | char *buf) \ | ||
447 | { \ | ||
448 | return sprintf(buf, format, ##args); \ | ||
449 | } \ | ||
450 | static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) | ||
451 | |||
452 | BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); | ||
453 | BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); | ||
454 | BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); | ||
455 | BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); | ||
456 | |||
457 | static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, | ||
458 | char *buf) | ||
459 | { | ||
460 | return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); | ||
461 | } | ||
462 | |||
463 | static ssize_t store_target_kb(struct sys_device *dev, | ||
464 | struct sysdev_attribute *attr, | ||
465 | const char *buf, | ||
466 | size_t count) | ||
467 | { | ||
468 | char *endchar; | ||
469 | unsigned long long target_bytes; | ||
470 | |||
471 | if (!capable(CAP_SYS_ADMIN)) | ||
472 | return -EPERM; | ||
473 | |||
474 | target_bytes = simple_strtoull(buf, &endchar, 0) * 1024; | ||
475 | |||
476 | balloon_set_new_target(target_bytes >> PAGE_SHIFT); | ||
477 | |||
478 | return count; | ||
479 | } | ||
480 | |||
481 | static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, | ||
482 | show_target_kb, store_target_kb); | ||
483 | |||
484 | |||
485 | static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr, | ||
486 | char *buf) | ||
487 | { | ||
488 | return sprintf(buf, "%llu\n", | ||
489 | (unsigned long long)balloon_stats.target_pages | ||
490 | << PAGE_SHIFT); | ||
491 | } | ||
492 | |||
493 | static ssize_t store_target(struct sys_device *dev, | ||
494 | struct sysdev_attribute *attr, | ||
495 | const char *buf, | ||
496 | size_t count) | ||
497 | { | ||
498 | char *endchar; | ||
499 | unsigned long long target_bytes; | ||
500 | |||
501 | if (!capable(CAP_SYS_ADMIN)) | ||
502 | return -EPERM; | ||
503 | |||
504 | target_bytes = memparse(buf, &endchar); | ||
505 | |||
506 | balloon_set_new_target(target_bytes >> PAGE_SHIFT); | ||
507 | |||
508 | return count; | ||
509 | } | ||
510 | |||
511 | static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR, | ||
512 | show_target, store_target); | ||
513 | |||
514 | |||
515 | static struct sysdev_attribute *balloon_attrs[] = { | ||
516 | &attr_target_kb, | ||
517 | &attr_target, | ||
518 | }; | ||
519 | |||
520 | static struct attribute *balloon_info_attrs[] = { | ||
521 | &attr_current_kb.attr, | ||
522 | &attr_low_kb.attr, | ||
523 | &attr_high_kb.attr, | ||
524 | &attr_driver_kb.attr, | ||
525 | NULL | ||
526 | }; | ||
527 | |||
528 | static struct attribute_group balloon_info_group = { | ||
529 | .name = "info", | ||
530 | .attrs = balloon_info_attrs, | ||
531 | }; | ||
532 | |||
533 | static struct sysdev_class balloon_sysdev_class = { | ||
534 | .name = BALLOON_CLASS_NAME, | ||
535 | }; | ||
536 | |||
537 | static int register_balloon(struct sys_device *sysdev) | ||
538 | { | ||
539 | int i, error; | ||
540 | |||
541 | error = sysdev_class_register(&balloon_sysdev_class); | ||
542 | if (error) | ||
543 | return error; | ||
544 | |||
545 | sysdev->id = 0; | ||
546 | sysdev->cls = &balloon_sysdev_class; | ||
547 | |||
548 | error = sysdev_register(sysdev); | ||
549 | if (error) { | ||
550 | sysdev_class_unregister(&balloon_sysdev_class); | ||
551 | return error; | ||
552 | } | ||
553 | |||
554 | for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { | ||
555 | error = sysdev_create_file(sysdev, balloon_attrs[i]); | ||
556 | if (error) | ||
557 | goto fail; | ||
558 | } | ||
559 | |||
560 | error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); | ||
561 | if (error) | ||
562 | goto fail; | ||
563 | |||
564 | return 0; | ||
565 | |||
566 | fail: | ||
567 | while (--i >= 0) | ||
568 | sysdev_remove_file(sysdev, balloon_attrs[i]); | ||
569 | sysdev_unregister(sysdev); | ||
570 | sysdev_class_unregister(&balloon_sysdev_class); | ||
571 | return error; | ||
572 | } | ||
573 | |||
574 | MODULE_LICENSE("GPL"); | 476 | MODULE_LICENSE("GPL"); |
diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c new file mode 100644 index 000000000000..ba6eda4b5143 --- /dev/null +++ b/drivers/xen/biomerge.c | |||
@@ -0,0 +1,13 @@ | |||
1 | #include <linux/bio.h> | ||
2 | #include <linux/io.h> | ||
3 | #include <xen/page.h> | ||
4 | |||
5 | bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, | ||
6 | const struct bio_vec *vec2) | ||
7 | { | ||
8 | unsigned long mfn1 = pfn_to_mfn(page_to_pfn(vec1->bv_page)); | ||
9 | unsigned long mfn2 = pfn_to_mfn(page_to_pfn(vec2->bv_page)); | ||
10 | |||
11 | return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) && | ||
12 | ((mfn1 == mfn2) || ((mfn1+1) == mfn2)); | ||
13 | } | ||
diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 13365ba35218..30df85d8fca8 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * domain gets 1024 event channels, but NR_IRQ is not that large, we | 5 | * domain gets 1024 event channels, but NR_IRQ is not that large, we |
6 | * must dynamically map irqs<->event channels. The event channels | 6 | * must dynamically map irqs<->event channels. The event channels |
7 | * interface with the rest of the kernel by defining a xen interrupt | 7 | * interface with the rest of the kernel by defining a xen interrupt |
8 | * chip. When an event is recieved, it is mapped to an irq and sent | 8 | * chip. When an event is received, it is mapped to an irq and sent |
9 | * through the normal interrupt processing path. | 9 | * through the normal interrupt processing path. |
10 | * | 10 | * |
11 | * There are four kinds of events which can be mapped to an event | 11 | * There are four kinds of events which can be mapped to an event |
@@ -16,7 +16,7 @@ | |||
16 | * (typically dom0). | 16 | * (typically dom0). |
17 | * 2. VIRQs, typically used for timers. These are per-cpu events. | 17 | * 2. VIRQs, typically used for timers. These are per-cpu events. |
18 | * 3. IPIs. | 18 | * 3. IPIs. |
19 | * 4. Hardware interrupts. Not supported at present. | 19 | * 4. PIRQs - Hardware interrupts. |
20 | * | 20 | * |
21 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 | 21 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 |
22 | */ | 22 | */ |
@@ -28,12 +28,16 @@ | |||
28 | #include <linux/string.h> | 28 | #include <linux/string.h> |
29 | #include <linux/bootmem.h> | 29 | #include <linux/bootmem.h> |
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/irqnr.h> | ||
32 | #include <linux/pci.h> | ||
31 | 33 | ||
32 | #include <asm/desc.h> | 34 | #include <asm/desc.h> |
33 | #include <asm/ptrace.h> | 35 | #include <asm/ptrace.h> |
34 | #include <asm/irq.h> | 36 | #include <asm/irq.h> |
35 | #include <asm/idle.h> | 37 | #include <asm/idle.h> |
38 | #include <asm/io_apic.h> | ||
36 | #include <asm/sync_bitops.h> | 39 | #include <asm/sync_bitops.h> |
40 | #include <asm/xen/pci.h> | ||
37 | #include <asm/xen/hypercall.h> | 41 | #include <asm/xen/hypercall.h> |
38 | #include <asm/xen/hypervisor.h> | 42 | #include <asm/xen/hypervisor.h> |
39 | 43 | ||
@@ -52,6 +56,8 @@ | |||
52 | */ | 56 | */ |
53 | static DEFINE_SPINLOCK(irq_mapping_update_lock); | 57 | static DEFINE_SPINLOCK(irq_mapping_update_lock); |
54 | 58 | ||
59 | static LIST_HEAD(xen_irq_list_head); | ||
60 | |||
55 | /* IRQ <-> VIRQ mapping. */ | 61 | /* IRQ <-> VIRQ mapping. */ |
56 | static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1}; | 62 | static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1}; |
57 | 63 | ||
@@ -73,14 +79,17 @@ enum xen_irq_type { | |||
73 | * event channel - irq->event channel mapping | 79 | * event channel - irq->event channel mapping |
74 | * cpu - cpu this event channel is bound to | 80 | * cpu - cpu this event channel is bound to |
75 | * index - type-specific information: | 81 | * index - type-specific information: |
76 | * PIRQ - vector, with MSB being "needs EIO" | 82 | * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM |
83 | * guest, or GSI (real passthrough IRQ) of the device. | ||
77 | * VIRQ - virq number | 84 | * VIRQ - virq number |
78 | * IPI - IPI vector | 85 | * IPI - IPI vector |
79 | * EVTCHN - | 86 | * EVTCHN - |
80 | */ | 87 | */ |
81 | struct irq_info | 88 | struct irq_info |
82 | { | 89 | { |
90 | struct list_head list; | ||
83 | enum xen_irq_type type; /* type */ | 91 | enum xen_irq_type type; /* type */ |
92 | unsigned irq; | ||
84 | unsigned short evtchn; /* event channel */ | 93 | unsigned short evtchn; /* event channel */ |
85 | unsigned short cpu; /* cpu bound */ | 94 | unsigned short cpu; /* cpu bound */ |
86 | 95 | ||
@@ -88,73 +97,118 @@ struct irq_info | |||
88 | unsigned short virq; | 97 | unsigned short virq; |
89 | enum ipi_vector ipi; | 98 | enum ipi_vector ipi; |
90 | struct { | 99 | struct { |
100 | unsigned short pirq; | ||
91 | unsigned short gsi; | 101 | unsigned short gsi; |
92 | unsigned short vector; | 102 | unsigned char vector; |
103 | unsigned char flags; | ||
104 | uint16_t domid; | ||
93 | } pirq; | 105 | } pirq; |
94 | } u; | 106 | } u; |
95 | }; | 107 | }; |
108 | #define PIRQ_NEEDS_EOI (1 << 0) | ||
109 | #define PIRQ_SHAREABLE (1 << 1) | ||
96 | 110 | ||
97 | static struct irq_info irq_info[NR_IRQS]; | 111 | static int *evtchn_to_irq; |
98 | 112 | ||
99 | static int evtchn_to_irq[NR_EVENT_CHANNELS] = { | 113 | static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS/BITS_PER_LONG], |
100 | [0 ... NR_EVENT_CHANNELS-1] = -1 | 114 | cpu_evtchn_mask); |
101 | }; | ||
102 | struct cpu_evtchn_s { | ||
103 | unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG]; | ||
104 | }; | ||
105 | static struct cpu_evtchn_s *cpu_evtchn_mask_p; | ||
106 | static inline unsigned long *cpu_evtchn_mask(int cpu) | ||
107 | { | ||
108 | return cpu_evtchn_mask_p[cpu].bits; | ||
109 | } | ||
110 | 115 | ||
111 | /* Xen will never allocate port zero for any purpose. */ | 116 | /* Xen will never allocate port zero for any purpose. */ |
112 | #define VALID_EVTCHN(chn) ((chn) != 0) | 117 | #define VALID_EVTCHN(chn) ((chn) != 0) |
113 | 118 | ||
114 | static struct irq_chip xen_dynamic_chip; | 119 | static struct irq_chip xen_dynamic_chip; |
115 | static struct irq_chip xen_percpu_chip; | 120 | static struct irq_chip xen_percpu_chip; |
121 | static struct irq_chip xen_pirq_chip; | ||
122 | static void enable_dynirq(struct irq_data *data); | ||
123 | static void disable_dynirq(struct irq_data *data); | ||
116 | 124 | ||
117 | /* Constructor for packed IRQ information. */ | 125 | /* Get info for IRQ */ |
118 | static struct irq_info mk_unbound_info(void) | 126 | static struct irq_info *info_for_irq(unsigned irq) |
119 | { | 127 | { |
120 | return (struct irq_info) { .type = IRQT_UNBOUND }; | 128 | return irq_get_handler_data(irq); |
121 | } | 129 | } |
122 | 130 | ||
123 | static struct irq_info mk_evtchn_info(unsigned short evtchn) | 131 | /* Constructors for packed IRQ information. */ |
132 | static void xen_irq_info_common_init(struct irq_info *info, | ||
133 | unsigned irq, | ||
134 | enum xen_irq_type type, | ||
135 | unsigned short evtchn, | ||
136 | unsigned short cpu) | ||
124 | { | 137 | { |
125 | return (struct irq_info) { .type = IRQT_EVTCHN, .evtchn = evtchn, | 138 | |
126 | .cpu = 0 }; | 139 | BUG_ON(info->type != IRQT_UNBOUND && info->type != type); |
140 | |||
141 | info->type = type; | ||
142 | info->irq = irq; | ||
143 | info->evtchn = evtchn; | ||
144 | info->cpu = cpu; | ||
145 | |||
146 | evtchn_to_irq[evtchn] = irq; | ||
127 | } | 147 | } |
128 | 148 | ||
129 | static struct irq_info mk_ipi_info(unsigned short evtchn, enum ipi_vector ipi) | 149 | static void xen_irq_info_evtchn_init(unsigned irq, |
150 | unsigned short evtchn) | ||
130 | { | 151 | { |
131 | return (struct irq_info) { .type = IRQT_IPI, .evtchn = evtchn, | 152 | struct irq_info *info = info_for_irq(irq); |
132 | .cpu = 0, .u.ipi = ipi }; | 153 | |
154 | xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0); | ||
133 | } | 155 | } |
134 | 156 | ||
135 | static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq) | 157 | static void xen_irq_info_ipi_init(unsigned cpu, |
158 | unsigned irq, | ||
159 | unsigned short evtchn, | ||
160 | enum ipi_vector ipi) | ||
136 | { | 161 | { |
137 | return (struct irq_info) { .type = IRQT_VIRQ, .evtchn = evtchn, | 162 | struct irq_info *info = info_for_irq(irq); |
138 | .cpu = 0, .u.virq = virq }; | 163 | |
164 | xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0); | ||
165 | |||
166 | info->u.ipi = ipi; | ||
167 | |||
168 | per_cpu(ipi_to_irq, cpu)[ipi] = irq; | ||
139 | } | 169 | } |
140 | 170 | ||
141 | static struct irq_info mk_pirq_info(unsigned short evtchn, | 171 | static void xen_irq_info_virq_init(unsigned cpu, |
142 | unsigned short gsi, unsigned short vector) | 172 | unsigned irq, |
173 | unsigned short evtchn, | ||
174 | unsigned short virq) | ||
143 | { | 175 | { |
144 | return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn, | 176 | struct irq_info *info = info_for_irq(irq); |
145 | .cpu = 0, .u.pirq = { .gsi = gsi, .vector = vector } }; | 177 | |
178 | xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0); | ||
179 | |||
180 | info->u.virq = virq; | ||
181 | |||
182 | per_cpu(virq_to_irq, cpu)[virq] = irq; | ||
146 | } | 183 | } |
147 | 184 | ||
148 | /* | 185 | static void xen_irq_info_pirq_init(unsigned irq, |
149 | * Accessors for packed IRQ information. | 186 | unsigned short evtchn, |
150 | */ | 187 | unsigned short pirq, |
151 | static struct irq_info *info_for_irq(unsigned irq) | 188 | unsigned short gsi, |
189 | unsigned short vector, | ||
190 | uint16_t domid, | ||
191 | unsigned char flags) | ||
152 | { | 192 | { |
153 | return &irq_info[irq]; | 193 | struct irq_info *info = info_for_irq(irq); |
194 | |||
195 | xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0); | ||
196 | |||
197 | info->u.pirq.pirq = pirq; | ||
198 | info->u.pirq.gsi = gsi; | ||
199 | info->u.pirq.vector = vector; | ||
200 | info->u.pirq.domid = domid; | ||
201 | info->u.pirq.flags = flags; | ||
154 | } | 202 | } |
155 | 203 | ||
204 | /* | ||
205 | * Accessors for packed IRQ information. | ||
206 | */ | ||
156 | static unsigned int evtchn_from_irq(unsigned irq) | 207 | static unsigned int evtchn_from_irq(unsigned irq) |
157 | { | 208 | { |
209 | if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq))) | ||
210 | return 0; | ||
211 | |||
158 | return info_for_irq(irq)->evtchn; | 212 | return info_for_irq(irq)->evtchn; |
159 | } | 213 | } |
160 | 214 | ||
@@ -184,24 +238,14 @@ static unsigned virq_from_irq(unsigned irq) | |||
184 | return info->u.virq; | 238 | return info->u.virq; |
185 | } | 239 | } |
186 | 240 | ||
187 | static unsigned gsi_from_irq(unsigned irq) | 241 | static unsigned pirq_from_irq(unsigned irq) |
188 | { | ||
189 | struct irq_info *info = info_for_irq(irq); | ||
190 | |||
191 | BUG_ON(info == NULL); | ||
192 | BUG_ON(info->type != IRQT_PIRQ); | ||
193 | |||
194 | return info->u.pirq.gsi; | ||
195 | } | ||
196 | |||
197 | static unsigned vector_from_irq(unsigned irq) | ||
198 | { | 242 | { |
199 | struct irq_info *info = info_for_irq(irq); | 243 | struct irq_info *info = info_for_irq(irq); |
200 | 244 | ||
201 | BUG_ON(info == NULL); | 245 | BUG_ON(info == NULL); |
202 | BUG_ON(info->type != IRQT_PIRQ); | 246 | BUG_ON(info->type != IRQT_PIRQ); |
203 | 247 | ||
204 | return info->u.pirq.vector; | 248 | return info->u.pirq.pirq; |
205 | } | 249 | } |
206 | 250 | ||
207 | static enum xen_irq_type type_from_irq(unsigned irq) | 251 | static enum xen_irq_type type_from_irq(unsigned irq) |
@@ -225,12 +269,21 @@ static unsigned int cpu_from_evtchn(unsigned int evtchn) | |||
225 | return ret; | 269 | return ret; |
226 | } | 270 | } |
227 | 271 | ||
272 | static bool pirq_needs_eoi(unsigned irq) | ||
273 | { | ||
274 | struct irq_info *info = info_for_irq(irq); | ||
275 | |||
276 | BUG_ON(info->type != IRQT_PIRQ); | ||
277 | |||
278 | return info->u.pirq.flags & PIRQ_NEEDS_EOI; | ||
279 | } | ||
280 | |||
228 | static inline unsigned long active_evtchns(unsigned int cpu, | 281 | static inline unsigned long active_evtchns(unsigned int cpu, |
229 | struct shared_info *sh, | 282 | struct shared_info *sh, |
230 | unsigned int idx) | 283 | unsigned int idx) |
231 | { | 284 | { |
232 | return (sh->evtchn_pending[idx] & | 285 | return (sh->evtchn_pending[idx] & |
233 | cpu_evtchn_mask(cpu)[idx] & | 286 | per_cpu(cpu_evtchn_mask, cpu)[idx] & |
234 | ~sh->evtchn_mask[idx]); | 287 | ~sh->evtchn_mask[idx]); |
235 | } | 288 | } |
236 | 289 | ||
@@ -240,28 +293,31 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) | |||
240 | 293 | ||
241 | BUG_ON(irq == -1); | 294 | BUG_ON(irq == -1); |
242 | #ifdef CONFIG_SMP | 295 | #ifdef CONFIG_SMP |
243 | cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); | 296 | cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu)); |
244 | #endif | 297 | #endif |
245 | 298 | ||
246 | __clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq))); | 299 | clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_from_irq(irq))); |
247 | __set_bit(chn, cpu_evtchn_mask(cpu)); | 300 | set_bit(chn, per_cpu(cpu_evtchn_mask, cpu)); |
248 | 301 | ||
249 | irq_info[irq].cpu = cpu; | 302 | info_for_irq(irq)->cpu = cpu; |
250 | } | 303 | } |
251 | 304 | ||
252 | static void init_evtchn_cpu_bindings(void) | 305 | static void init_evtchn_cpu_bindings(void) |
253 | { | 306 | { |
254 | #ifdef CONFIG_SMP | ||
255 | struct irq_desc *desc; | ||
256 | int i; | 307 | int i; |
308 | #ifdef CONFIG_SMP | ||
309 | struct irq_info *info; | ||
257 | 310 | ||
258 | /* By default all event channels notify CPU#0. */ | 311 | /* By default all event channels notify CPU#0. */ |
259 | for_each_irq_desc(i, desc) { | 312 | list_for_each_entry(info, &xen_irq_list_head, list) { |
260 | cpumask_copy(desc->affinity, cpumask_of(0)); | 313 | struct irq_desc *desc = irq_to_desc(info->irq); |
314 | cpumask_copy(desc->irq_data.affinity, cpumask_of(0)); | ||
261 | } | 315 | } |
262 | #endif | 316 | #endif |
263 | 317 | ||
264 | memset(cpu_evtchn_mask(0), ~0, sizeof(cpu_evtchn_mask(0))); | 318 | for_each_possible_cpu(i) |
319 | memset(per_cpu(cpu_evtchn_mask, i), | ||
320 | (i == 0) ? ~0 : 0, sizeof(*per_cpu(cpu_evtchn_mask, i))); | ||
265 | } | 321 | } |
266 | 322 | ||
267 | static inline void clear_evtchn(int port) | 323 | static inline void clear_evtchn(int port) |
@@ -318,7 +374,7 @@ static void unmask_evtchn(int port) | |||
318 | struct evtchn_unmask unmask = { .port = port }; | 374 | struct evtchn_unmask unmask = { .port = port }; |
319 | (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); | 375 | (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); |
320 | } else { | 376 | } else { |
321 | struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); | 377 | struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); |
322 | 378 | ||
323 | sync_clear_bit(port, &s->evtchn_mask[0]); | 379 | sync_clear_bit(port, &s->evtchn_mask[0]); |
324 | 380 | ||
@@ -336,36 +392,417 @@ static void unmask_evtchn(int port) | |||
336 | put_cpu(); | 392 | put_cpu(); |
337 | } | 393 | } |
338 | 394 | ||
339 | static int find_unbound_irq(void) | 395 | static void xen_irq_init(unsigned irq) |
396 | { | ||
397 | struct irq_info *info; | ||
398 | #ifdef CONFIG_SMP | ||
399 | struct irq_desc *desc = irq_to_desc(irq); | ||
400 | |||
401 | /* By default all event channels notify CPU#0. */ | ||
402 | cpumask_copy(desc->irq_data.affinity, cpumask_of(0)); | ||
403 | #endif | ||
404 | |||
405 | info = kzalloc(sizeof(*info), GFP_KERNEL); | ||
406 | if (info == NULL) | ||
407 | panic("Unable to allocate metadata for IRQ%d\n", irq); | ||
408 | |||
409 | info->type = IRQT_UNBOUND; | ||
410 | |||
411 | irq_set_handler_data(irq, info); | ||
412 | |||
413 | list_add_tail(&info->list, &xen_irq_list_head); | ||
414 | } | ||
415 | |||
416 | static int __must_check xen_allocate_irq_dynamic(void) | ||
340 | { | 417 | { |
418 | int first = 0; | ||
341 | int irq; | 419 | int irq; |
342 | struct irq_desc *desc; | ||
343 | 420 | ||
344 | for (irq = 0; irq < nr_irqs; irq++) { | 421 | #ifdef CONFIG_X86_IO_APIC |
345 | desc = irq_to_desc(irq); | 422 | /* |
346 | /* only 0->15 have init'd desc; handle irq > 16 */ | 423 | * For an HVM guest or domain 0 which see "real" (emulated or |
347 | if (desc == NULL) | 424 | * actual respectively) GSIs we allocate dynamic IRQs |
348 | break; | 425 | * e.g. those corresponding to event channels or MSIs |
349 | if (desc->chip == &no_irq_chip) | 426 | * etc. from the range above those "real" GSIs to avoid |
350 | break; | 427 | * collisions. |
351 | if (desc->chip != &xen_dynamic_chip) | 428 | */ |
429 | if (xen_initial_domain() || xen_hvm_domain()) | ||
430 | first = get_nr_irqs_gsi(); | ||
431 | #endif | ||
432 | |||
433 | irq = irq_alloc_desc_from(first, -1); | ||
434 | |||
435 | xen_irq_init(irq); | ||
436 | |||
437 | return irq; | ||
438 | } | ||
439 | |||
440 | static int __must_check xen_allocate_irq_gsi(unsigned gsi) | ||
441 | { | ||
442 | int irq; | ||
443 | |||
444 | /* | ||
445 | * A PV guest has no concept of a GSI (since it has no ACPI | ||
446 | * nor access to/knowledge of the physical APICs). Therefore | ||
447 | * all IRQs are dynamically allocated from the entire IRQ | ||
448 | * space. | ||
449 | */ | ||
450 | if (xen_pv_domain() && !xen_initial_domain()) | ||
451 | return xen_allocate_irq_dynamic(); | ||
452 | |||
453 | /* Legacy IRQ descriptors are already allocated by the arch. */ | ||
454 | if (gsi < NR_IRQS_LEGACY) | ||
455 | irq = gsi; | ||
456 | else | ||
457 | irq = irq_alloc_desc_at(gsi, -1); | ||
458 | |||
459 | xen_irq_init(irq); | ||
460 | |||
461 | return irq; | ||
462 | } | ||
463 | |||
464 | static void xen_free_irq(unsigned irq) | ||
465 | { | ||
466 | struct irq_info *info = irq_get_handler_data(irq); | ||
467 | |||
468 | list_del(&info->list); | ||
469 | |||
470 | irq_set_handler_data(irq, NULL); | ||
471 | |||
472 | kfree(info); | ||
473 | |||
474 | /* Legacy IRQ descriptors are managed by the arch. */ | ||
475 | if (irq < NR_IRQS_LEGACY) | ||
476 | return; | ||
477 | |||
478 | irq_free_desc(irq); | ||
479 | } | ||
480 | |||
481 | static void pirq_query_unmask(int irq) | ||
482 | { | ||
483 | struct physdev_irq_status_query irq_status; | ||
484 | struct irq_info *info = info_for_irq(irq); | ||
485 | |||
486 | BUG_ON(info->type != IRQT_PIRQ); | ||
487 | |||
488 | irq_status.irq = pirq_from_irq(irq); | ||
489 | if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) | ||
490 | irq_status.flags = 0; | ||
491 | |||
492 | info->u.pirq.flags &= ~PIRQ_NEEDS_EOI; | ||
493 | if (irq_status.flags & XENIRQSTAT_needs_eoi) | ||
494 | info->u.pirq.flags |= PIRQ_NEEDS_EOI; | ||
495 | } | ||
496 | |||
497 | static bool probing_irq(int irq) | ||
498 | { | ||
499 | struct irq_desc *desc = irq_to_desc(irq); | ||
500 | |||
501 | return desc && desc->action == NULL; | ||
502 | } | ||
503 | |||
504 | static void eoi_pirq(struct irq_data *data) | ||
505 | { | ||
506 | int evtchn = evtchn_from_irq(data->irq); | ||
507 | struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; | ||
508 | int rc = 0; | ||
509 | |||
510 | irq_move_irq(data); | ||
511 | |||
512 | if (VALID_EVTCHN(evtchn)) | ||
513 | clear_evtchn(evtchn); | ||
514 | |||
515 | if (pirq_needs_eoi(data->irq)) { | ||
516 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); | ||
517 | WARN_ON(rc); | ||
518 | } | ||
519 | } | ||
520 | |||
521 | static void mask_ack_pirq(struct irq_data *data) | ||
522 | { | ||
523 | disable_dynirq(data); | ||
524 | eoi_pirq(data); | ||
525 | } | ||
526 | |||
527 | static unsigned int __startup_pirq(unsigned int irq) | ||
528 | { | ||
529 | struct evtchn_bind_pirq bind_pirq; | ||
530 | struct irq_info *info = info_for_irq(irq); | ||
531 | int evtchn = evtchn_from_irq(irq); | ||
532 | int rc; | ||
533 | |||
534 | BUG_ON(info->type != IRQT_PIRQ); | ||
535 | |||
536 | if (VALID_EVTCHN(evtchn)) | ||
537 | goto out; | ||
538 | |||
539 | bind_pirq.pirq = pirq_from_irq(irq); | ||
540 | /* NB. We are happy to share unless we are probing. */ | ||
541 | bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ? | ||
542 | BIND_PIRQ__WILL_SHARE : 0; | ||
543 | rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq); | ||
544 | if (rc != 0) { | ||
545 | if (!probing_irq(irq)) | ||
546 | printk(KERN_INFO "Failed to obtain physical IRQ %d\n", | ||
547 | irq); | ||
548 | return 0; | ||
549 | } | ||
550 | evtchn = bind_pirq.port; | ||
551 | |||
552 | pirq_query_unmask(irq); | ||
553 | |||
554 | evtchn_to_irq[evtchn] = irq; | ||
555 | bind_evtchn_to_cpu(evtchn, 0); | ||
556 | info->evtchn = evtchn; | ||
557 | |||
558 | out: | ||
559 | unmask_evtchn(evtchn); | ||
560 | eoi_pirq(irq_get_irq_data(irq)); | ||
561 | |||
562 | return 0; | ||
563 | } | ||
564 | |||
565 | static unsigned int startup_pirq(struct irq_data *data) | ||
566 | { | ||
567 | return __startup_pirq(data->irq); | ||
568 | } | ||
569 | |||
570 | static void shutdown_pirq(struct irq_data *data) | ||
571 | { | ||
572 | struct evtchn_close close; | ||
573 | unsigned int irq = data->irq; | ||
574 | struct irq_info *info = info_for_irq(irq); | ||
575 | int evtchn = evtchn_from_irq(irq); | ||
576 | |||
577 | BUG_ON(info->type != IRQT_PIRQ); | ||
578 | |||
579 | if (!VALID_EVTCHN(evtchn)) | ||
580 | return; | ||
581 | |||
582 | mask_evtchn(evtchn); | ||
583 | |||
584 | close.port = evtchn; | ||
585 | if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) | ||
586 | BUG(); | ||
587 | |||
588 | bind_evtchn_to_cpu(evtchn, 0); | ||
589 | evtchn_to_irq[evtchn] = -1; | ||
590 | info->evtchn = 0; | ||
591 | } | ||
592 | |||
593 | static void enable_pirq(struct irq_data *data) | ||
594 | { | ||
595 | startup_pirq(data); | ||
596 | } | ||
597 | |||
598 | static void disable_pirq(struct irq_data *data) | ||
599 | { | ||
600 | disable_dynirq(data); | ||
601 | } | ||
602 | |||
603 | static int find_irq_by_gsi(unsigned gsi) | ||
604 | { | ||
605 | struct irq_info *info; | ||
606 | |||
607 | list_for_each_entry(info, &xen_irq_list_head, list) { | ||
608 | if (info->type != IRQT_PIRQ) | ||
352 | continue; | 609 | continue; |
353 | if (irq_info[irq].type == IRQT_UNBOUND) | 610 | |
354 | break; | 611 | if (info->u.pirq.gsi == gsi) |
612 | return info->irq; | ||
355 | } | 613 | } |
356 | 614 | ||
357 | if (irq == nr_irqs) | 615 | return -1; |
358 | panic("No available IRQ to bind to: increase nr_irqs!\n"); | 616 | } |
359 | 617 | ||
360 | desc = irq_to_desc_alloc_node(irq, 0); | 618 | int xen_allocate_pirq_gsi(unsigned gsi) |
361 | if (WARN_ON(desc == NULL)) | 619 | { |
362 | return -1; | 620 | return gsi; |
621 | } | ||
622 | |||
623 | /* | ||
624 | * Do not make any assumptions regarding the relationship between the | ||
625 | * IRQ number returned here and the Xen pirq argument. | ||
626 | * | ||
627 | * Note: We don't assign an event channel until the irq actually started | ||
628 | * up. Return an existing irq if we've already got one for the gsi. | ||
629 | * | ||
630 | * Shareable implies level triggered, not shareable implies edge | ||
631 | * triggered here. | ||
632 | */ | ||
633 | int xen_bind_pirq_gsi_to_irq(unsigned gsi, | ||
634 | unsigned pirq, int shareable, char *name) | ||
635 | { | ||
636 | int irq = -1; | ||
637 | struct physdev_irq irq_op; | ||
638 | |||
639 | spin_lock(&irq_mapping_update_lock); | ||
640 | |||
641 | irq = find_irq_by_gsi(gsi); | ||
642 | if (irq != -1) { | ||
643 | printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n", | ||
644 | irq, gsi); | ||
645 | goto out; /* XXX need refcount? */ | ||
646 | } | ||
647 | |||
648 | irq = xen_allocate_irq_gsi(gsi); | ||
649 | if (irq < 0) | ||
650 | goto out; | ||
651 | |||
652 | irq_op.irq = irq; | ||
653 | irq_op.vector = 0; | ||
654 | |||
655 | /* Only the privileged domain can do this. For non-priv, the pcifront | ||
656 | * driver provides a PCI bus that does the call to do exactly | ||
657 | * this in the priv domain. */ | ||
658 | if (xen_initial_domain() && | ||
659 | HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { | ||
660 | xen_free_irq(irq); | ||
661 | irq = -ENOSPC; | ||
662 | goto out; | ||
663 | } | ||
664 | |||
665 | xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, DOMID_SELF, | ||
666 | shareable ? PIRQ_SHAREABLE : 0); | ||
667 | |||
668 | pirq_query_unmask(irq); | ||
669 | /* We try to use the handler with the appropriate semantic for the | ||
670 | * type of interrupt: if the interrupt is an edge triggered | ||
671 | * interrupt we use handle_edge_irq. | ||
672 | * | ||
673 | * On the other hand if the interrupt is level triggered we use | ||
674 | * handle_fasteoi_irq like the native code does for this kind of | ||
675 | * interrupts. | ||
676 | * | ||
677 | * Depending on the Xen version, pirq_needs_eoi might return true | ||
678 | * not only for level triggered interrupts but for edge triggered | ||
679 | * interrupts too. In any case Xen always honors the eoi mechanism, | ||
680 | * not injecting any more pirqs of the same kind if the first one | ||
681 | * hasn't received an eoi yet. Therefore using the fasteoi handler | ||
682 | * is the right choice either way. | ||
683 | */ | ||
684 | if (shareable) | ||
685 | irq_set_chip_and_handler_name(irq, &xen_pirq_chip, | ||
686 | handle_fasteoi_irq, name); | ||
687 | else | ||
688 | irq_set_chip_and_handler_name(irq, &xen_pirq_chip, | ||
689 | handle_edge_irq, name); | ||
690 | |||
691 | out: | ||
692 | spin_unlock(&irq_mapping_update_lock); | ||
693 | |||
694 | return irq; | ||
695 | } | ||
696 | |||
697 | #ifdef CONFIG_PCI_MSI | ||
698 | int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc) | ||
699 | { | ||
700 | int rc; | ||
701 | struct physdev_get_free_pirq op_get_free_pirq; | ||
702 | |||
703 | op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI; | ||
704 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); | ||
705 | |||
706 | WARN_ONCE(rc == -ENOSYS, | ||
707 | "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n"); | ||
708 | |||
709 | return rc ? -1 : op_get_free_pirq.pirq; | ||
710 | } | ||
711 | |||
712 | int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, | ||
713 | int pirq, int vector, const char *name, | ||
714 | domid_t domid) | ||
715 | { | ||
716 | int irq, ret; | ||
717 | |||
718 | spin_lock(&irq_mapping_update_lock); | ||
719 | |||
720 | irq = xen_allocate_irq_dynamic(); | ||
721 | if (irq == -1) | ||
722 | goto out; | ||
723 | |||
724 | irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq, | ||
725 | name); | ||
726 | |||
727 | xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, domid, 0); | ||
728 | ret = irq_set_msi_desc(irq, msidesc); | ||
729 | if (ret < 0) | ||
730 | goto error_irq; | ||
731 | out: | ||
732 | spin_unlock(&irq_mapping_update_lock); | ||
733 | return irq; | ||
734 | error_irq: | ||
735 | spin_unlock(&irq_mapping_update_lock); | ||
736 | xen_free_irq(irq); | ||
737 | return -1; | ||
738 | } | ||
739 | #endif | ||
740 | |||
741 | int xen_destroy_irq(int irq) | ||
742 | { | ||
743 | struct irq_desc *desc; | ||
744 | struct physdev_unmap_pirq unmap_irq; | ||
745 | struct irq_info *info = info_for_irq(irq); | ||
746 | int rc = -ENOENT; | ||
747 | |||
748 | spin_lock(&irq_mapping_update_lock); | ||
749 | |||
750 | desc = irq_to_desc(irq); | ||
751 | if (!desc) | ||
752 | goto out; | ||
753 | |||
754 | if (xen_initial_domain()) { | ||
755 | unmap_irq.pirq = info->u.pirq.pirq; | ||
756 | unmap_irq.domid = info->u.pirq.domid; | ||
757 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq); | ||
758 | /* If another domain quits without making the pci_disable_msix | ||
759 | * call, the Xen hypervisor takes care of freeing the PIRQs | ||
760 | * (free_domain_pirqs). | ||
761 | */ | ||
762 | if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF)) | ||
763 | printk(KERN_INFO "domain %d does not have %d anymore\n", | ||
764 | info->u.pirq.domid, info->u.pirq.pirq); | ||
765 | else if (rc) { | ||
766 | printk(KERN_WARNING "unmap irq failed %d\n", rc); | ||
767 | goto out; | ||
768 | } | ||
769 | } | ||
770 | |||
771 | xen_free_irq(irq); | ||
772 | |||
773 | out: | ||
774 | spin_unlock(&irq_mapping_update_lock); | ||
775 | return rc; | ||
776 | } | ||
777 | |||
778 | int xen_irq_from_pirq(unsigned pirq) | ||
779 | { | ||
780 | int irq; | ||
781 | |||
782 | struct irq_info *info; | ||
363 | 783 | ||
364 | dynamic_irq_init_keep_chip_data(irq); | 784 | spin_lock(&irq_mapping_update_lock); |
785 | |||
786 | list_for_each_entry(info, &xen_irq_list_head, list) { | ||
787 | if (info == NULL || info->type != IRQT_PIRQ) | ||
788 | continue; | ||
789 | irq = info->irq; | ||
790 | if (info->u.pirq.pirq == pirq) | ||
791 | goto out; | ||
792 | } | ||
793 | irq = -1; | ||
794 | out: | ||
795 | spin_unlock(&irq_mapping_update_lock); | ||
365 | 796 | ||
366 | return irq; | 797 | return irq; |
367 | } | 798 | } |
368 | 799 | ||
800 | |||
801 | int xen_pirq_from_irq(unsigned irq) | ||
802 | { | ||
803 | return pirq_from_irq(irq); | ||
804 | } | ||
805 | EXPORT_SYMBOL_GPL(xen_pirq_from_irq); | ||
369 | int bind_evtchn_to_irq(unsigned int evtchn) | 806 | int bind_evtchn_to_irq(unsigned int evtchn) |
370 | { | 807 | { |
371 | int irq; | 808 | int irq; |
@@ -375,15 +812,17 @@ int bind_evtchn_to_irq(unsigned int evtchn) | |||
375 | irq = evtchn_to_irq[evtchn]; | 812 | irq = evtchn_to_irq[evtchn]; |
376 | 813 | ||
377 | if (irq == -1) { | 814 | if (irq == -1) { |
378 | irq = find_unbound_irq(); | 815 | irq = xen_allocate_irq_dynamic(); |
816 | if (irq == -1) | ||
817 | goto out; | ||
379 | 818 | ||
380 | set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, | 819 | irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, |
381 | handle_edge_irq, "event"); | 820 | handle_edge_irq, "event"); |
382 | 821 | ||
383 | evtchn_to_irq[evtchn] = irq; | 822 | xen_irq_info_evtchn_init(irq, evtchn); |
384 | irq_info[irq] = mk_evtchn_info(evtchn); | ||
385 | } | 823 | } |
386 | 824 | ||
825 | out: | ||
387 | spin_unlock(&irq_mapping_update_lock); | 826 | spin_unlock(&irq_mapping_update_lock); |
388 | 827 | ||
389 | return irq; | 828 | return irq; |
@@ -400,11 +839,11 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) | |||
400 | irq = per_cpu(ipi_to_irq, cpu)[ipi]; | 839 | irq = per_cpu(ipi_to_irq, cpu)[ipi]; |
401 | 840 | ||
402 | if (irq == -1) { | 841 | if (irq == -1) { |
403 | irq = find_unbound_irq(); | 842 | irq = xen_allocate_irq_dynamic(); |
404 | if (irq < 0) | 843 | if (irq < 0) |
405 | goto out; | 844 | goto out; |
406 | 845 | ||
407 | set_irq_chip_and_handler_name(irq, &xen_percpu_chip, | 846 | irq_set_chip_and_handler_name(irq, &xen_percpu_chip, |
408 | handle_percpu_irq, "ipi"); | 847 | handle_percpu_irq, "ipi"); |
409 | 848 | ||
410 | bind_ipi.vcpu = cpu; | 849 | bind_ipi.vcpu = cpu; |
@@ -413,9 +852,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) | |||
413 | BUG(); | 852 | BUG(); |
414 | evtchn = bind_ipi.port; | 853 | evtchn = bind_ipi.port; |
415 | 854 | ||
416 | evtchn_to_irq[evtchn] = irq; | 855 | xen_irq_info_ipi_init(cpu, irq, evtchn, ipi); |
417 | irq_info[irq] = mk_ipi_info(evtchn, ipi); | ||
418 | per_cpu(ipi_to_irq, cpu)[ipi] = irq; | ||
419 | 856 | ||
420 | bind_evtchn_to_cpu(evtchn, cpu); | 857 | bind_evtchn_to_cpu(evtchn, cpu); |
421 | } | 858 | } |
@@ -425,8 +862,23 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) | |||
425 | return irq; | 862 | return irq; |
426 | } | 863 | } |
427 | 864 | ||
865 | static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, | ||
866 | unsigned int remote_port) | ||
867 | { | ||
868 | struct evtchn_bind_interdomain bind_interdomain; | ||
869 | int err; | ||
870 | |||
871 | bind_interdomain.remote_dom = remote_domain; | ||
872 | bind_interdomain.remote_port = remote_port; | ||
873 | |||
874 | err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, | ||
875 | &bind_interdomain); | ||
876 | |||
877 | return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); | ||
878 | } | ||
879 | |||
428 | 880 | ||
429 | static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) | 881 | int bind_virq_to_irq(unsigned int virq, unsigned int cpu) |
430 | { | 882 | { |
431 | struct evtchn_bind_virq bind_virq; | 883 | struct evtchn_bind_virq bind_virq; |
432 | int evtchn, irq; | 884 | int evtchn, irq; |
@@ -436,6 +888,13 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) | |||
436 | irq = per_cpu(virq_to_irq, cpu)[virq]; | 888 | irq = per_cpu(virq_to_irq, cpu)[virq]; |
437 | 889 | ||
438 | if (irq == -1) { | 890 | if (irq == -1) { |
891 | irq = xen_allocate_irq_dynamic(); | ||
892 | if (irq == -1) | ||
893 | goto out; | ||
894 | |||
895 | irq_set_chip_and_handler_name(irq, &xen_percpu_chip, | ||
896 | handle_percpu_irq, "virq"); | ||
897 | |||
439 | bind_virq.virq = virq; | 898 | bind_virq.virq = virq; |
440 | bind_virq.vcpu = cpu; | 899 | bind_virq.vcpu = cpu; |
441 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, | 900 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, |
@@ -443,19 +902,12 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) | |||
443 | BUG(); | 902 | BUG(); |
444 | evtchn = bind_virq.port; | 903 | evtchn = bind_virq.port; |
445 | 904 | ||
446 | irq = find_unbound_irq(); | 905 | xen_irq_info_virq_init(cpu, irq, evtchn, virq); |
447 | |||
448 | set_irq_chip_and_handler_name(irq, &xen_percpu_chip, | ||
449 | handle_percpu_irq, "virq"); | ||
450 | |||
451 | evtchn_to_irq[evtchn] = irq; | ||
452 | irq_info[irq] = mk_virq_info(evtchn, virq); | ||
453 | |||
454 | per_cpu(virq_to_irq, cpu)[virq] = irq; | ||
455 | 906 | ||
456 | bind_evtchn_to_cpu(evtchn, cpu); | 907 | bind_evtchn_to_cpu(evtchn, cpu); |
457 | } | 908 | } |
458 | 909 | ||
910 | out: | ||
459 | spin_unlock(&irq_mapping_update_lock); | 911 | spin_unlock(&irq_mapping_update_lock); |
460 | 912 | ||
461 | return irq; | 913 | return irq; |
@@ -492,11 +944,9 @@ static void unbind_from_irq(unsigned int irq) | |||
492 | evtchn_to_irq[evtchn] = -1; | 944 | evtchn_to_irq[evtchn] = -1; |
493 | } | 945 | } |
494 | 946 | ||
495 | if (irq_info[irq].type != IRQT_UNBOUND) { | 947 | BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND); |
496 | irq_info[irq] = mk_unbound_info(); | ||
497 | 948 | ||
498 | dynamic_irq_cleanup(irq); | 949 | xen_free_irq(irq); |
499 | } | ||
500 | 950 | ||
501 | spin_unlock(&irq_mapping_update_lock); | 951 | spin_unlock(&irq_mapping_update_lock); |
502 | } | 952 | } |
@@ -506,10 +956,11 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, | |||
506 | unsigned long irqflags, | 956 | unsigned long irqflags, |
507 | const char *devname, void *dev_id) | 957 | const char *devname, void *dev_id) |
508 | { | 958 | { |
509 | unsigned int irq; | 959 | int irq, retval; |
510 | int retval; | ||
511 | 960 | ||
512 | irq = bind_evtchn_to_irq(evtchn); | 961 | irq = bind_evtchn_to_irq(evtchn); |
962 | if (irq < 0) | ||
963 | return irq; | ||
513 | retval = request_irq(irq, handler, irqflags, devname, dev_id); | 964 | retval = request_irq(irq, handler, irqflags, devname, dev_id); |
514 | if (retval != 0) { | 965 | if (retval != 0) { |
515 | unbind_from_irq(irq); | 966 | unbind_from_irq(irq); |
@@ -520,14 +971,38 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, | |||
520 | } | 971 | } |
521 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); | 972 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); |
522 | 973 | ||
974 | int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, | ||
975 | unsigned int remote_port, | ||
976 | irq_handler_t handler, | ||
977 | unsigned long irqflags, | ||
978 | const char *devname, | ||
979 | void *dev_id) | ||
980 | { | ||
981 | int irq, retval; | ||
982 | |||
983 | irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); | ||
984 | if (irq < 0) | ||
985 | return irq; | ||
986 | |||
987 | retval = request_irq(irq, handler, irqflags, devname, dev_id); | ||
988 | if (retval != 0) { | ||
989 | unbind_from_irq(irq); | ||
990 | return retval; | ||
991 | } | ||
992 | |||
993 | return irq; | ||
994 | } | ||
995 | EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); | ||
996 | |||
523 | int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, | 997 | int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, |
524 | irq_handler_t handler, | 998 | irq_handler_t handler, |
525 | unsigned long irqflags, const char *devname, void *dev_id) | 999 | unsigned long irqflags, const char *devname, void *dev_id) |
526 | { | 1000 | { |
527 | unsigned int irq; | 1001 | int irq, retval; |
528 | int retval; | ||
529 | 1002 | ||
530 | irq = bind_virq_to_irq(virq, cpu); | 1003 | irq = bind_virq_to_irq(virq, cpu); |
1004 | if (irq < 0) | ||
1005 | return irq; | ||
531 | retval = request_irq(irq, handler, irqflags, devname, dev_id); | 1006 | retval = request_irq(irq, handler, irqflags, devname, dev_id); |
532 | if (retval != 0) { | 1007 | if (retval != 0) { |
533 | unbind_from_irq(irq); | 1008 | unbind_from_irq(irq); |
@@ -551,7 +1026,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, | |||
551 | if (irq < 0) | 1026 | if (irq < 0) |
552 | return irq; | 1027 | return irq; |
553 | 1028 | ||
554 | irqflags |= IRQF_NO_SUSPEND; | 1029 | irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME; |
555 | retval = request_irq(irq, handler, irqflags, devname, dev_id); | 1030 | retval = request_irq(irq, handler, irqflags, devname, dev_id); |
556 | if (retval != 0) { | 1031 | if (retval != 0) { |
557 | unbind_from_irq(irq); | 1032 | unbind_from_irq(irq); |
@@ -579,41 +1054,75 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id) | |||
579 | { | 1054 | { |
580 | struct shared_info *sh = HYPERVISOR_shared_info; | 1055 | struct shared_info *sh = HYPERVISOR_shared_info; |
581 | int cpu = smp_processor_id(); | 1056 | int cpu = smp_processor_id(); |
1057 | unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu); | ||
582 | int i; | 1058 | int i; |
583 | unsigned long flags; | 1059 | unsigned long flags; |
584 | static DEFINE_SPINLOCK(debug_lock); | 1060 | static DEFINE_SPINLOCK(debug_lock); |
1061 | struct vcpu_info *v; | ||
585 | 1062 | ||
586 | spin_lock_irqsave(&debug_lock, flags); | 1063 | spin_lock_irqsave(&debug_lock, flags); |
587 | 1064 | ||
588 | printk("vcpu %d\n ", cpu); | 1065 | printk("\nvcpu %d\n ", cpu); |
589 | 1066 | ||
590 | for_each_online_cpu(i) { | 1067 | for_each_online_cpu(i) { |
591 | struct vcpu_info *v = per_cpu(xen_vcpu, i); | 1068 | int pending; |
592 | printk("%d: masked=%d pending=%d event_sel %08lx\n ", i, | 1069 | v = per_cpu(xen_vcpu, i); |
593 | (get_irq_regs() && i == cpu) ? xen_irqs_disabled(get_irq_regs()) : v->evtchn_upcall_mask, | 1070 | pending = (get_irq_regs() && i == cpu) |
594 | v->evtchn_upcall_pending, | 1071 | ? xen_irqs_disabled(get_irq_regs()) |
595 | v->evtchn_pending_sel); | 1072 | : v->evtchn_upcall_mask; |
1073 | printk("%d: masked=%d pending=%d event_sel %0*lx\n ", i, | ||
1074 | pending, v->evtchn_upcall_pending, | ||
1075 | (int)(sizeof(v->evtchn_pending_sel)*2), | ||
1076 | v->evtchn_pending_sel); | ||
1077 | } | ||
1078 | v = per_cpu(xen_vcpu, cpu); | ||
1079 | |||
1080 | printk("\npending:\n "); | ||
1081 | for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--) | ||
1082 | printk("%0*lx%s", (int)sizeof(sh->evtchn_pending[0])*2, | ||
1083 | sh->evtchn_pending[i], | ||
1084 | i % 8 == 0 ? "\n " : " "); | ||
1085 | printk("\nglobal mask:\n "); | ||
1086 | for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) | ||
1087 | printk("%0*lx%s", | ||
1088 | (int)(sizeof(sh->evtchn_mask[0])*2), | ||
1089 | sh->evtchn_mask[i], | ||
1090 | i % 8 == 0 ? "\n " : " "); | ||
1091 | |||
1092 | printk("\nglobally unmasked:\n "); | ||
1093 | for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) | ||
1094 | printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2), | ||
1095 | sh->evtchn_pending[i] & ~sh->evtchn_mask[i], | ||
1096 | i % 8 == 0 ? "\n " : " "); | ||
1097 | |||
1098 | printk("\nlocal cpu%d mask:\n ", cpu); | ||
1099 | for (i = (NR_EVENT_CHANNELS/BITS_PER_LONG)-1; i >= 0; i--) | ||
1100 | printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2), | ||
1101 | cpu_evtchn[i], | ||
1102 | i % 8 == 0 ? "\n " : " "); | ||
1103 | |||
1104 | printk("\nlocally unmasked:\n "); | ||
1105 | for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) { | ||
1106 | unsigned long pending = sh->evtchn_pending[i] | ||
1107 | & ~sh->evtchn_mask[i] | ||
1108 | & cpu_evtchn[i]; | ||
1109 | printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2), | ||
1110 | pending, i % 8 == 0 ? "\n " : " "); | ||
596 | } | 1111 | } |
597 | printk("pending:\n "); | ||
598 | for(i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--) | ||
599 | printk("%08lx%s", sh->evtchn_pending[i], | ||
600 | i % 8 == 0 ? "\n " : " "); | ||
601 | printk("\nmasks:\n "); | ||
602 | for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) | ||
603 | printk("%08lx%s", sh->evtchn_mask[i], | ||
604 | i % 8 == 0 ? "\n " : " "); | ||
605 | |||
606 | printk("\nunmasked:\n "); | ||
607 | for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) | ||
608 | printk("%08lx%s", sh->evtchn_pending[i] & ~sh->evtchn_mask[i], | ||
609 | i % 8 == 0 ? "\n " : " "); | ||
610 | 1112 | ||
611 | printk("\npending list:\n"); | 1113 | printk("\npending list:\n"); |
612 | for(i = 0; i < NR_EVENT_CHANNELS; i++) { | 1114 | for (i = 0; i < NR_EVENT_CHANNELS; i++) { |
613 | if (sync_test_bit(i, sh->evtchn_pending)) { | 1115 | if (sync_test_bit(i, sh->evtchn_pending)) { |
614 | printk(" %d: event %d -> irq %d\n", | 1116 | int word_idx = i / BITS_PER_LONG; |
1117 | printk(" %d: event %d -> irq %d%s%s%s\n", | ||
615 | cpu_from_evtchn(i), i, | 1118 | cpu_from_evtchn(i), i, |
616 | evtchn_to_irq[i]); | 1119 | evtchn_to_irq[i], |
1120 | sync_test_bit(word_idx, &v->evtchn_pending_sel) | ||
1121 | ? "" : " l2-clear", | ||
1122 | !sync_test_bit(i, sh->evtchn_mask) | ||
1123 | ? "" : " globally-masked", | ||
1124 | sync_test_bit(i, cpu_evtchn) | ||
1125 | ? "" : " locally-masked"); | ||
617 | } | 1126 | } |
618 | } | 1127 | } |
619 | 1128 | ||
@@ -623,6 +1132,13 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id) | |||
623 | } | 1132 | } |
624 | 1133 | ||
625 | static DEFINE_PER_CPU(unsigned, xed_nesting_count); | 1134 | static DEFINE_PER_CPU(unsigned, xed_nesting_count); |
1135 | static DEFINE_PER_CPU(unsigned int, current_word_idx); | ||
1136 | static DEFINE_PER_CPU(unsigned int, current_bit_idx); | ||
1137 | |||
1138 | /* | ||
1139 | * Mask out the i least significant bits of w | ||
1140 | */ | ||
1141 | #define MASK_LSBS(w, i) (w & ((~0UL) << i)) | ||
626 | 1142 | ||
627 | /* | 1143 | /* |
628 | * Search the CPUs pending events bitmasks. For each one found, map | 1144 | * Search the CPUs pending events bitmasks. For each one found, map |
@@ -635,9 +1151,12 @@ static DEFINE_PER_CPU(unsigned, xed_nesting_count); | |||
635 | */ | 1151 | */ |
636 | static void __xen_evtchn_do_upcall(void) | 1152 | static void __xen_evtchn_do_upcall(void) |
637 | { | 1153 | { |
1154 | int start_word_idx, start_bit_idx; | ||
1155 | int word_idx, bit_idx; | ||
1156 | int i; | ||
638 | int cpu = get_cpu(); | 1157 | int cpu = get_cpu(); |
639 | struct shared_info *s = HYPERVISOR_shared_info; | 1158 | struct shared_info *s = HYPERVISOR_shared_info; |
640 | struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); | 1159 | struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); |
641 | unsigned count; | 1160 | unsigned count; |
642 | 1161 | ||
643 | do { | 1162 | do { |
@@ -645,7 +1164,7 @@ static void __xen_evtchn_do_upcall(void) | |||
645 | 1164 | ||
646 | vcpu_info->evtchn_upcall_pending = 0; | 1165 | vcpu_info->evtchn_upcall_pending = 0; |
647 | 1166 | ||
648 | if (__get_cpu_var(xed_nesting_count)++) | 1167 | if (__this_cpu_inc_return(xed_nesting_count) - 1) |
649 | goto out; | 1168 | goto out; |
650 | 1169 | ||
651 | #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ | 1170 | #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ |
@@ -653,29 +1172,83 @@ static void __xen_evtchn_do_upcall(void) | |||
653 | wmb(); | 1172 | wmb(); |
654 | #endif | 1173 | #endif |
655 | pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); | 1174 | pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); |
656 | while (pending_words != 0) { | 1175 | |
1176 | start_word_idx = __this_cpu_read(current_word_idx); | ||
1177 | start_bit_idx = __this_cpu_read(current_bit_idx); | ||
1178 | |||
1179 | word_idx = start_word_idx; | ||
1180 | |||
1181 | for (i = 0; pending_words != 0; i++) { | ||
657 | unsigned long pending_bits; | 1182 | unsigned long pending_bits; |
658 | int word_idx = __ffs(pending_words); | 1183 | unsigned long words; |
659 | pending_words &= ~(1UL << word_idx); | 1184 | |
1185 | words = MASK_LSBS(pending_words, word_idx); | ||
1186 | |||
1187 | /* | ||
1188 | * If we masked out all events, wrap to beginning. | ||
1189 | */ | ||
1190 | if (words == 0) { | ||
1191 | word_idx = 0; | ||
1192 | bit_idx = 0; | ||
1193 | continue; | ||
1194 | } | ||
1195 | word_idx = __ffs(words); | ||
1196 | |||
1197 | pending_bits = active_evtchns(cpu, s, word_idx); | ||
1198 | bit_idx = 0; /* usually scan entire word from start */ | ||
1199 | if (word_idx == start_word_idx) { | ||
1200 | /* We scan the starting word in two parts */ | ||
1201 | if (i == 0) | ||
1202 | /* 1st time: start in the middle */ | ||
1203 | bit_idx = start_bit_idx; | ||
1204 | else | ||
1205 | /* 2nd time: mask bits done already */ | ||
1206 | bit_idx &= (1UL << start_bit_idx) - 1; | ||
1207 | } | ||
660 | 1208 | ||
661 | while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) { | 1209 | do { |
662 | int bit_idx = __ffs(pending_bits); | 1210 | unsigned long bits; |
663 | int port = (word_idx * BITS_PER_LONG) + bit_idx; | 1211 | int port, irq; |
664 | int irq = evtchn_to_irq[port]; | ||
665 | struct irq_desc *desc; | 1212 | struct irq_desc *desc; |
666 | 1213 | ||
1214 | bits = MASK_LSBS(pending_bits, bit_idx); | ||
1215 | |||
1216 | /* If we masked out all events, move on. */ | ||
1217 | if (bits == 0) | ||
1218 | break; | ||
1219 | |||
1220 | bit_idx = __ffs(bits); | ||
1221 | |||
1222 | /* Process port. */ | ||
1223 | port = (word_idx * BITS_PER_LONG) + bit_idx; | ||
1224 | irq = evtchn_to_irq[port]; | ||
1225 | |||
667 | if (irq != -1) { | 1226 | if (irq != -1) { |
668 | desc = irq_to_desc(irq); | 1227 | desc = irq_to_desc(irq); |
669 | if (desc) | 1228 | if (desc) |
670 | generic_handle_irq_desc(irq, desc); | 1229 | generic_handle_irq_desc(irq, desc); |
671 | } | 1230 | } |
672 | } | 1231 | |
1232 | bit_idx = (bit_idx + 1) % BITS_PER_LONG; | ||
1233 | |||
1234 | /* Next caller starts at last processed + 1 */ | ||
1235 | __this_cpu_write(current_word_idx, | ||
1236 | bit_idx ? word_idx : | ||
1237 | (word_idx+1) % BITS_PER_LONG); | ||
1238 | __this_cpu_write(current_bit_idx, bit_idx); | ||
1239 | } while (bit_idx != 0); | ||
1240 | |||
1241 | /* Scan start_l1i twice; all others once. */ | ||
1242 | if ((word_idx != start_word_idx) || (i != 0)) | ||
1243 | pending_words &= ~(1UL << word_idx); | ||
1244 | |||
1245 | word_idx = (word_idx + 1) % BITS_PER_LONG; | ||
673 | } | 1246 | } |
674 | 1247 | ||
675 | BUG_ON(!irqs_disabled()); | 1248 | BUG_ON(!irqs_disabled()); |
676 | 1249 | ||
677 | count = __get_cpu_var(xed_nesting_count); | 1250 | count = __this_cpu_read(xed_nesting_count); |
678 | __get_cpu_var(xed_nesting_count) = 0; | 1251 | __this_cpu_write(xed_nesting_count, 0); |
679 | } while (count != 1 || vcpu_info->evtchn_upcall_pending); | 1252 | } while (count != 1 || vcpu_info->evtchn_upcall_pending); |
680 | 1253 | ||
681 | out: | 1254 | out: |
@@ -719,8 +1292,7 @@ void rebind_evtchn_irq(int evtchn, int irq) | |||
719 | so there should be a proper type */ | 1292 | so there should be a proper type */ |
720 | BUG_ON(info->type == IRQT_UNBOUND); | 1293 | BUG_ON(info->type == IRQT_UNBOUND); |
721 | 1294 | ||
722 | evtchn_to_irq[evtchn] = irq; | 1295 | xen_irq_info_evtchn_init(irq, evtchn); |
723 | irq_info[irq] = mk_evtchn_info(evtchn); | ||
724 | 1296 | ||
725 | spin_unlock(&irq_mapping_update_lock); | 1297 | spin_unlock(&irq_mapping_update_lock); |
726 | 1298 | ||
@@ -737,10 +1309,14 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) | |||
737 | struct evtchn_bind_vcpu bind_vcpu; | 1309 | struct evtchn_bind_vcpu bind_vcpu; |
738 | int evtchn = evtchn_from_irq(irq); | 1310 | int evtchn = evtchn_from_irq(irq); |
739 | 1311 | ||
740 | /* events delivered via platform PCI interrupts are always | 1312 | if (!VALID_EVTCHN(evtchn)) |
741 | * routed to vcpu 0 */ | 1313 | return -1; |
742 | if (!VALID_EVTCHN(evtchn) || | 1314 | |
743 | (xen_hvm_domain() && !xen_have_vector_callback)) | 1315 | /* |
1316 | * Events delivered via platform PCI interrupts are always | ||
1317 | * routed to vcpu 0 and hence cannot be rebound. | ||
1318 | */ | ||
1319 | if (xen_hvm_domain() && !xen_have_vector_callback) | ||
744 | return -1; | 1320 | return -1; |
745 | 1321 | ||
746 | /* Send future instances of this interrupt to other vcpu. */ | 1322 | /* Send future instances of this interrupt to other vcpu. */ |
@@ -758,11 +1334,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) | |||
758 | return 0; | 1334 | return 0; |
759 | } | 1335 | } |
760 | 1336 | ||
761 | static int set_affinity_irq(unsigned irq, const struct cpumask *dest) | 1337 | static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, |
1338 | bool force) | ||
762 | { | 1339 | { |
763 | unsigned tcpu = cpumask_first(dest); | 1340 | unsigned tcpu = cpumask_first(dest); |
764 | 1341 | ||
765 | return rebind_irq_to_cpu(irq, tcpu); | 1342 | return rebind_irq_to_cpu(data->irq, tcpu); |
766 | } | 1343 | } |
767 | 1344 | ||
768 | int resend_irq_on_evtchn(unsigned int irq) | 1345 | int resend_irq_on_evtchn(unsigned int irq) |
@@ -781,35 +1358,41 @@ int resend_irq_on_evtchn(unsigned int irq) | |||
781 | return 1; | 1358 | return 1; |
782 | } | 1359 | } |
783 | 1360 | ||
784 | static void enable_dynirq(unsigned int irq) | 1361 | static void enable_dynirq(struct irq_data *data) |
785 | { | 1362 | { |
786 | int evtchn = evtchn_from_irq(irq); | 1363 | int evtchn = evtchn_from_irq(data->irq); |
787 | 1364 | ||
788 | if (VALID_EVTCHN(evtchn)) | 1365 | if (VALID_EVTCHN(evtchn)) |
789 | unmask_evtchn(evtchn); | 1366 | unmask_evtchn(evtchn); |
790 | } | 1367 | } |
791 | 1368 | ||
792 | static void disable_dynirq(unsigned int irq) | 1369 | static void disable_dynirq(struct irq_data *data) |
793 | { | 1370 | { |
794 | int evtchn = evtchn_from_irq(irq); | 1371 | int evtchn = evtchn_from_irq(data->irq); |
795 | 1372 | ||
796 | if (VALID_EVTCHN(evtchn)) | 1373 | if (VALID_EVTCHN(evtchn)) |
797 | mask_evtchn(evtchn); | 1374 | mask_evtchn(evtchn); |
798 | } | 1375 | } |
799 | 1376 | ||
800 | static void ack_dynirq(unsigned int irq) | 1377 | static void ack_dynirq(struct irq_data *data) |
801 | { | 1378 | { |
802 | int evtchn = evtchn_from_irq(irq); | 1379 | int evtchn = evtchn_from_irq(data->irq); |
803 | 1380 | ||
804 | move_native_irq(irq); | 1381 | irq_move_irq(data); |
805 | 1382 | ||
806 | if (VALID_EVTCHN(evtchn)) | 1383 | if (VALID_EVTCHN(evtchn)) |
807 | clear_evtchn(evtchn); | 1384 | clear_evtchn(evtchn); |
808 | } | 1385 | } |
809 | 1386 | ||
810 | static int retrigger_dynirq(unsigned int irq) | 1387 | static void mask_ack_dynirq(struct irq_data *data) |
811 | { | 1388 | { |
812 | int evtchn = evtchn_from_irq(irq); | 1389 | disable_dynirq(data); |
1390 | ack_dynirq(data); | ||
1391 | } | ||
1392 | |||
1393 | static int retrigger_dynirq(struct irq_data *data) | ||
1394 | { | ||
1395 | int evtchn = evtchn_from_irq(data->irq); | ||
813 | struct shared_info *sh = HYPERVISOR_shared_info; | 1396 | struct shared_info *sh = HYPERVISOR_shared_info; |
814 | int ret = 0; | 1397 | int ret = 0; |
815 | 1398 | ||
@@ -826,6 +1409,44 @@ static int retrigger_dynirq(unsigned int irq) | |||
826 | return ret; | 1409 | return ret; |
827 | } | 1410 | } |
828 | 1411 | ||
1412 | static void restore_pirqs(void) | ||
1413 | { | ||
1414 | int pirq, rc, irq, gsi; | ||
1415 | struct physdev_map_pirq map_irq; | ||
1416 | struct irq_info *info; | ||
1417 | |||
1418 | list_for_each_entry(info, &xen_irq_list_head, list) { | ||
1419 | if (info->type != IRQT_PIRQ) | ||
1420 | continue; | ||
1421 | |||
1422 | pirq = info->u.pirq.pirq; | ||
1423 | gsi = info->u.pirq.gsi; | ||
1424 | irq = info->irq; | ||
1425 | |||
1426 | /* save/restore of PT devices doesn't work, so at this point the | ||
1427 | * only devices present are GSI based emulated devices */ | ||
1428 | if (!gsi) | ||
1429 | continue; | ||
1430 | |||
1431 | map_irq.domid = DOMID_SELF; | ||
1432 | map_irq.type = MAP_PIRQ_TYPE_GSI; | ||
1433 | map_irq.index = gsi; | ||
1434 | map_irq.pirq = pirq; | ||
1435 | |||
1436 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); | ||
1437 | if (rc) { | ||
1438 | printk(KERN_WARNING "xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n", | ||
1439 | gsi, irq, pirq, rc); | ||
1440 | xen_free_irq(irq); | ||
1441 | continue; | ||
1442 | } | ||
1443 | |||
1444 | printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); | ||
1445 | |||
1446 | __startup_pirq(irq); | ||
1447 | } | ||
1448 | } | ||
1449 | |||
829 | static void restore_cpu_virqs(unsigned int cpu) | 1450 | static void restore_cpu_virqs(unsigned int cpu) |
830 | { | 1451 | { |
831 | struct evtchn_bind_virq bind_virq; | 1452 | struct evtchn_bind_virq bind_virq; |
@@ -846,12 +1467,8 @@ static void restore_cpu_virqs(unsigned int cpu) | |||
846 | evtchn = bind_virq.port; | 1467 | evtchn = bind_virq.port; |
847 | 1468 | ||
848 | /* Record the new mapping. */ | 1469 | /* Record the new mapping. */ |
849 | evtchn_to_irq[evtchn] = irq; | 1470 | xen_irq_info_virq_init(cpu, irq, evtchn, virq); |
850 | irq_info[irq] = mk_virq_info(evtchn, virq); | ||
851 | bind_evtchn_to_cpu(evtchn, cpu); | 1471 | bind_evtchn_to_cpu(evtchn, cpu); |
852 | |||
853 | /* Ready for use. */ | ||
854 | unmask_evtchn(evtchn); | ||
855 | } | 1472 | } |
856 | } | 1473 | } |
857 | 1474 | ||
@@ -874,13 +1491,8 @@ static void restore_cpu_ipis(unsigned int cpu) | |||
874 | evtchn = bind_ipi.port; | 1491 | evtchn = bind_ipi.port; |
875 | 1492 | ||
876 | /* Record the new mapping. */ | 1493 | /* Record the new mapping. */ |
877 | evtchn_to_irq[evtchn] = irq; | 1494 | xen_irq_info_ipi_init(cpu, irq, evtchn, ipi); |
878 | irq_info[irq] = mk_ipi_info(evtchn, ipi); | ||
879 | bind_evtchn_to_cpu(evtchn, cpu); | 1495 | bind_evtchn_to_cpu(evtchn, cpu); |
880 | |||
881 | /* Ready for use. */ | ||
882 | unmask_evtchn(evtchn); | ||
883 | |||
884 | } | 1496 | } |
885 | } | 1497 | } |
886 | 1498 | ||
@@ -892,7 +1504,7 @@ void xen_clear_irq_pending(int irq) | |||
892 | if (VALID_EVTCHN(evtchn)) | 1504 | if (VALID_EVTCHN(evtchn)) |
893 | clear_evtchn(evtchn); | 1505 | clear_evtchn(evtchn); |
894 | } | 1506 | } |
895 | 1507 | EXPORT_SYMBOL(xen_clear_irq_pending); | |
896 | void xen_set_irq_pending(int irq) | 1508 | void xen_set_irq_pending(int irq) |
897 | { | 1509 | { |
898 | int evtchn = evtchn_from_irq(irq); | 1510 | int evtchn = evtchn_from_irq(irq); |
@@ -912,9 +1524,9 @@ bool xen_test_irq_pending(int irq) | |||
912 | return ret; | 1524 | return ret; |
913 | } | 1525 | } |
914 | 1526 | ||
915 | /* Poll waiting for an irq to become pending. In the usual case, the | 1527 | /* Poll waiting for an irq to become pending with timeout. In the usual case, |
916 | irq will be disabled so it won't deliver an interrupt. */ | 1528 | * the irq will be disabled so it won't deliver an interrupt. */ |
917 | void xen_poll_irq(int irq) | 1529 | void xen_poll_irq_timeout(int irq, u64 timeout) |
918 | { | 1530 | { |
919 | evtchn_port_t evtchn = evtchn_from_irq(irq); | 1531 | evtchn_port_t evtchn = evtchn_from_irq(irq); |
920 | 1532 | ||
@@ -922,17 +1534,37 @@ void xen_poll_irq(int irq) | |||
922 | struct sched_poll poll; | 1534 | struct sched_poll poll; |
923 | 1535 | ||
924 | poll.nr_ports = 1; | 1536 | poll.nr_ports = 1; |
925 | poll.timeout = 0; | 1537 | poll.timeout = timeout; |
926 | set_xen_guest_handle(poll.ports, &evtchn); | 1538 | set_xen_guest_handle(poll.ports, &evtchn); |
927 | 1539 | ||
928 | if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0) | 1540 | if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0) |
929 | BUG(); | 1541 | BUG(); |
930 | } | 1542 | } |
931 | } | 1543 | } |
1544 | EXPORT_SYMBOL(xen_poll_irq_timeout); | ||
1545 | /* Poll waiting for an irq to become pending. In the usual case, the | ||
1546 | * irq will be disabled so it won't deliver an interrupt. */ | ||
1547 | void xen_poll_irq(int irq) | ||
1548 | { | ||
1549 | xen_poll_irq_timeout(irq, 0 /* no timeout */); | ||
1550 | } | ||
1551 | |||
1552 | /* Check whether the IRQ line is shared with other guests. */ | ||
1553 | int xen_test_irq_shared(int irq) | ||
1554 | { | ||
1555 | struct irq_info *info = info_for_irq(irq); | ||
1556 | struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq }; | ||
1557 | |||
1558 | if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status)) | ||
1559 | return 0; | ||
1560 | return !(irq_status.flags & XENIRQSTAT_shared); | ||
1561 | } | ||
1562 | EXPORT_SYMBOL_GPL(xen_test_irq_shared); | ||
932 | 1563 | ||
933 | void xen_irq_resume(void) | 1564 | void xen_irq_resume(void) |
934 | { | 1565 | { |
935 | unsigned int cpu, irq, evtchn; | 1566 | unsigned int cpu, evtchn; |
1567 | struct irq_info *info; | ||
936 | 1568 | ||
937 | init_evtchn_cpu_bindings(); | 1569 | init_evtchn_cpu_bindings(); |
938 | 1570 | ||
@@ -941,8 +1573,8 @@ void xen_irq_resume(void) | |||
941 | mask_evtchn(evtchn); | 1573 | mask_evtchn(evtchn); |
942 | 1574 | ||
943 | /* No IRQ <-> event-channel mappings. */ | 1575 | /* No IRQ <-> event-channel mappings. */ |
944 | for (irq = 0; irq < nr_irqs; irq++) | 1576 | list_for_each_entry(info, &xen_irq_list_head, list) |
945 | irq_info[irq].evtchn = 0; /* zap event-channel binding */ | 1577 | info->evtchn = 0; /* zap event-channel binding */ |
946 | 1578 | ||
947 | for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) | 1579 | for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) |
948 | evtchn_to_irq[evtchn] = -1; | 1580 | evtchn_to_irq[evtchn] = -1; |
@@ -951,28 +1583,52 @@ void xen_irq_resume(void) | |||
951 | restore_cpu_virqs(cpu); | 1583 | restore_cpu_virqs(cpu); |
952 | restore_cpu_ipis(cpu); | 1584 | restore_cpu_ipis(cpu); |
953 | } | 1585 | } |
1586 | |||
1587 | restore_pirqs(); | ||
954 | } | 1588 | } |
955 | 1589 | ||
956 | static struct irq_chip xen_dynamic_chip __read_mostly = { | 1590 | static struct irq_chip xen_dynamic_chip __read_mostly = { |
957 | .name = "xen-dyn", | 1591 | .name = "xen-dyn", |
1592 | |||
1593 | .irq_disable = disable_dynirq, | ||
1594 | .irq_mask = disable_dynirq, | ||
1595 | .irq_unmask = enable_dynirq, | ||
1596 | |||
1597 | .irq_ack = ack_dynirq, | ||
1598 | .irq_mask_ack = mask_ack_dynirq, | ||
1599 | |||
1600 | .irq_set_affinity = set_affinity_irq, | ||
1601 | .irq_retrigger = retrigger_dynirq, | ||
1602 | }; | ||
958 | 1603 | ||
959 | .disable = disable_dynirq, | 1604 | static struct irq_chip xen_pirq_chip __read_mostly = { |
960 | .mask = disable_dynirq, | 1605 | .name = "xen-pirq", |
961 | .unmask = enable_dynirq, | ||
962 | 1606 | ||
963 | .ack = ack_dynirq, | 1607 | .irq_startup = startup_pirq, |
964 | .set_affinity = set_affinity_irq, | 1608 | .irq_shutdown = shutdown_pirq, |
965 | .retrigger = retrigger_dynirq, | 1609 | .irq_enable = enable_pirq, |
1610 | .irq_disable = disable_pirq, | ||
1611 | |||
1612 | .irq_mask = disable_dynirq, | ||
1613 | .irq_unmask = enable_dynirq, | ||
1614 | |||
1615 | .irq_ack = eoi_pirq, | ||
1616 | .irq_eoi = eoi_pirq, | ||
1617 | .irq_mask_ack = mask_ack_pirq, | ||
1618 | |||
1619 | .irq_set_affinity = set_affinity_irq, | ||
1620 | |||
1621 | .irq_retrigger = retrigger_dynirq, | ||
966 | }; | 1622 | }; |
967 | 1623 | ||
968 | static struct irq_chip xen_percpu_chip __read_mostly = { | 1624 | static struct irq_chip xen_percpu_chip __read_mostly = { |
969 | .name = "xen-percpu", | 1625 | .name = "xen-percpu", |
970 | 1626 | ||
971 | .disable = disable_dynirq, | 1627 | .irq_disable = disable_dynirq, |
972 | .mask = disable_dynirq, | 1628 | .irq_mask = disable_dynirq, |
973 | .unmask = enable_dynirq, | 1629 | .irq_unmask = enable_dynirq, |
974 | 1630 | ||
975 | .ack = ack_dynirq, | 1631 | .irq_ack = ack_dynirq, |
976 | }; | 1632 | }; |
977 | 1633 | ||
978 | int xen_set_callback_via(uint64_t via) | 1634 | int xen_set_callback_via(uint64_t via) |
@@ -1017,9 +1673,10 @@ void __init xen_init_IRQ(void) | |||
1017 | { | 1673 | { |
1018 | int i; | 1674 | int i; |
1019 | 1675 | ||
1020 | cpu_evtchn_mask_p = kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s), | 1676 | evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), |
1021 | GFP_KERNEL); | 1677 | GFP_KERNEL); |
1022 | BUG_ON(cpu_evtchn_mask_p == NULL); | 1678 | for (i = 0; i < NR_EVENT_CHANNELS; i++) |
1679 | evtchn_to_irq[i] = -1; | ||
1023 | 1680 | ||
1024 | init_evtchn_cpu_bindings(); | 1681 | init_evtchn_cpu_bindings(); |
1025 | 1682 | ||
@@ -1030,7 +1687,12 @@ void __init xen_init_IRQ(void) | |||
1030 | if (xen_hvm_domain()) { | 1687 | if (xen_hvm_domain()) { |
1031 | xen_callback_vector(); | 1688 | xen_callback_vector(); |
1032 | native_init_IRQ(); | 1689 | native_init_IRQ(); |
1690 | /* pci_xen_hvm_init must be called after native_init_IRQ so that | ||
1691 | * __acpi_register_gsi can point at the right function */ | ||
1692 | pci_xen_hvm_init(); | ||
1033 | } else { | 1693 | } else { |
1034 | irq_ctx_init(smp_processor_id()); | 1694 | irq_ctx_init(smp_processor_id()); |
1695 | if (xen_initial_domain()) | ||
1696 | xen_setup_pirqs(); | ||
1035 | } | 1697 | } |
1036 | } | 1698 | } |
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 66e185cfe92f..dbc13e94b612 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c | |||
@@ -69,20 +69,51 @@ struct per_user_data { | |||
69 | const char *name; | 69 | const char *name; |
70 | }; | 70 | }; |
71 | 71 | ||
72 | /* Who's bound to each port? */ | 72 | /* |
73 | static struct per_user_data *port_user[NR_EVENT_CHANNELS]; | 73 | * Who's bound to each port? This is logically an array of struct |
74 | * per_user_data *, but we encode the current enabled-state in bit 0. | ||
75 | */ | ||
76 | static unsigned long *port_user; | ||
74 | static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ | 77 | static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ |
75 | 78 | ||
76 | irqreturn_t evtchn_interrupt(int irq, void *data) | 79 | static inline struct per_user_data *get_port_user(unsigned port) |
80 | { | ||
81 | return (struct per_user_data *)(port_user[port] & ~1); | ||
82 | } | ||
83 | |||
84 | static inline void set_port_user(unsigned port, struct per_user_data *u) | ||
85 | { | ||
86 | port_user[port] = (unsigned long)u; | ||
87 | } | ||
88 | |||
89 | static inline bool get_port_enabled(unsigned port) | ||
90 | { | ||
91 | return port_user[port] & 1; | ||
92 | } | ||
93 | |||
94 | static inline void set_port_enabled(unsigned port, bool enabled) | ||
95 | { | ||
96 | if (enabled) | ||
97 | port_user[port] |= 1; | ||
98 | else | ||
99 | port_user[port] &= ~1; | ||
100 | } | ||
101 | |||
102 | static irqreturn_t evtchn_interrupt(int irq, void *data) | ||
77 | { | 103 | { |
78 | unsigned int port = (unsigned long)data; | 104 | unsigned int port = (unsigned long)data; |
79 | struct per_user_data *u; | 105 | struct per_user_data *u; |
80 | 106 | ||
81 | spin_lock(&port_user_lock); | 107 | spin_lock(&port_user_lock); |
82 | 108 | ||
83 | u = port_user[port]; | 109 | u = get_port_user(port); |
110 | |||
111 | WARN(!get_port_enabled(port), | ||
112 | "Interrupt for port %d, but apparently not enabled; per-user %p\n", | ||
113 | port, u); | ||
84 | 114 | ||
85 | disable_irq_nosync(irq); | 115 | disable_irq_nosync(irq); |
116 | set_port_enabled(port, false); | ||
86 | 117 | ||
87 | if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { | 118 | if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { |
88 | u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; | 119 | u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; |
@@ -92,9 +123,8 @@ irqreturn_t evtchn_interrupt(int irq, void *data) | |||
92 | kill_fasync(&u->evtchn_async_queue, | 123 | kill_fasync(&u->evtchn_async_queue, |
93 | SIGIO, POLL_IN); | 124 | SIGIO, POLL_IN); |
94 | } | 125 | } |
95 | } else { | 126 | } else |
96 | u->ring_overflow = 1; | 127 | u->ring_overflow = 1; |
97 | } | ||
98 | 128 | ||
99 | spin_unlock(&port_user_lock); | 129 | spin_unlock(&port_user_lock); |
100 | 130 | ||
@@ -198,9 +228,18 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, | |||
198 | goto out; | 228 | goto out; |
199 | 229 | ||
200 | spin_lock_irq(&port_user_lock); | 230 | spin_lock_irq(&port_user_lock); |
201 | for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) | 231 | |
202 | if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u)) | 232 | for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { |
203 | enable_irq(irq_from_evtchn(kbuf[i])); | 233 | unsigned port = kbuf[i]; |
234 | |||
235 | if (port < NR_EVENT_CHANNELS && | ||
236 | get_port_user(port) == u && | ||
237 | !get_port_enabled(port)) { | ||
238 | set_port_enabled(port, true); | ||
239 | enable_irq(irq_from_evtchn(port)); | ||
240 | } | ||
241 | } | ||
242 | |||
204 | spin_unlock_irq(&port_user_lock); | 243 | spin_unlock_irq(&port_user_lock); |
205 | 244 | ||
206 | rc = count; | 245 | rc = count; |
@@ -222,8 +261,9 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) | |||
222 | * interrupt handler yet, and our caller has already | 261 | * interrupt handler yet, and our caller has already |
223 | * serialized bind operations.) | 262 | * serialized bind operations.) |
224 | */ | 263 | */ |
225 | BUG_ON(port_user[port] != NULL); | 264 | BUG_ON(get_port_user(port) != NULL); |
226 | port_user[port] = u; | 265 | set_port_user(port, u); |
266 | set_port_enabled(port, true); /* start enabled */ | ||
227 | 267 | ||
228 | rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, | 268 | rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, |
229 | u->name, (void *)(unsigned long)port); | 269 | u->name, (void *)(unsigned long)port); |
@@ -239,10 +279,7 @@ static void evtchn_unbind_from_user(struct per_user_data *u, int port) | |||
239 | 279 | ||
240 | unbind_from_irqhandler(irq, (void *)(unsigned long)port); | 280 | unbind_from_irqhandler(irq, (void *)(unsigned long)port); |
241 | 281 | ||
242 | /* make sure we unbind the irq handler before clearing the port */ | 282 | set_port_user(port, NULL); |
243 | barrier(); | ||
244 | |||
245 | port_user[port] = NULL; | ||
246 | } | 283 | } |
247 | 284 | ||
248 | static long evtchn_ioctl(struct file *file, | 285 | static long evtchn_ioctl(struct file *file, |
@@ -333,15 +370,17 @@ static long evtchn_ioctl(struct file *file, | |||
333 | spin_lock_irq(&port_user_lock); | 370 | spin_lock_irq(&port_user_lock); |
334 | 371 | ||
335 | rc = -ENOTCONN; | 372 | rc = -ENOTCONN; |
336 | if (port_user[unbind.port] != u) { | 373 | if (get_port_user(unbind.port) != u) { |
337 | spin_unlock_irq(&port_user_lock); | 374 | spin_unlock_irq(&port_user_lock); |
338 | break; | 375 | break; |
339 | } | 376 | } |
340 | 377 | ||
341 | evtchn_unbind_from_user(u, unbind.port); | 378 | disable_irq(irq_from_evtchn(unbind.port)); |
342 | 379 | ||
343 | spin_unlock_irq(&port_user_lock); | 380 | spin_unlock_irq(&port_user_lock); |
344 | 381 | ||
382 | evtchn_unbind_from_user(u, unbind.port); | ||
383 | |||
345 | rc = 0; | 384 | rc = 0; |
346 | break; | 385 | break; |
347 | } | 386 | } |
@@ -355,7 +394,7 @@ static long evtchn_ioctl(struct file *file, | |||
355 | 394 | ||
356 | if (notify.port >= NR_EVENT_CHANNELS) { | 395 | if (notify.port >= NR_EVENT_CHANNELS) { |
357 | rc = -EINVAL; | 396 | rc = -EINVAL; |
358 | } else if (port_user[notify.port] != u) { | 397 | } else if (get_port_user(notify.port) != u) { |
359 | rc = -ENOTCONN; | 398 | rc = -ENOTCONN; |
360 | } else { | 399 | } else { |
361 | notify_remote_via_evtchn(notify.port); | 400 | notify_remote_via_evtchn(notify.port); |
@@ -431,7 +470,7 @@ static int evtchn_open(struct inode *inode, struct file *filp) | |||
431 | 470 | ||
432 | filp->private_data = u; | 471 | filp->private_data = u; |
433 | 472 | ||
434 | return 0; | 473 | return nonseekable_open(inode, filp); |
435 | } | 474 | } |
436 | 475 | ||
437 | static int evtchn_release(struct inode *inode, struct file *filp) | 476 | static int evtchn_release(struct inode *inode, struct file *filp) |
@@ -444,14 +483,21 @@ static int evtchn_release(struct inode *inode, struct file *filp) | |||
444 | free_page((unsigned long)u->ring); | 483 | free_page((unsigned long)u->ring); |
445 | 484 | ||
446 | for (i = 0; i < NR_EVENT_CHANNELS; i++) { | 485 | for (i = 0; i < NR_EVENT_CHANNELS; i++) { |
447 | if (port_user[i] != u) | 486 | if (get_port_user(i) != u) |
448 | continue; | 487 | continue; |
449 | 488 | ||
450 | evtchn_unbind_from_user(port_user[i], i); | 489 | disable_irq(irq_from_evtchn(i)); |
451 | } | 490 | } |
452 | 491 | ||
453 | spin_unlock_irq(&port_user_lock); | 492 | spin_unlock_irq(&port_user_lock); |
454 | 493 | ||
494 | for (i = 0; i < NR_EVENT_CHANNELS; i++) { | ||
495 | if (get_port_user(i) != u) | ||
496 | continue; | ||
497 | |||
498 | evtchn_unbind_from_user(get_port_user(i), i); | ||
499 | } | ||
500 | |||
455 | kfree(u->name); | 501 | kfree(u->name); |
456 | kfree(u); | 502 | kfree(u); |
457 | 503 | ||
@@ -467,11 +513,12 @@ static const struct file_operations evtchn_fops = { | |||
467 | .fasync = evtchn_fasync, | 513 | .fasync = evtchn_fasync, |
468 | .open = evtchn_open, | 514 | .open = evtchn_open, |
469 | .release = evtchn_release, | 515 | .release = evtchn_release, |
516 | .llseek = no_llseek, | ||
470 | }; | 517 | }; |
471 | 518 | ||
472 | static struct miscdevice evtchn_miscdev = { | 519 | static struct miscdevice evtchn_miscdev = { |
473 | .minor = MISC_DYNAMIC_MINOR, | 520 | .minor = MISC_DYNAMIC_MINOR, |
474 | .name = "evtchn", | 521 | .name = "xen/evtchn", |
475 | .fops = &evtchn_fops, | 522 | .fops = &evtchn_fops, |
476 | }; | 523 | }; |
477 | static int __init evtchn_init(void) | 524 | static int __init evtchn_init(void) |
@@ -481,8 +528,11 @@ static int __init evtchn_init(void) | |||
481 | if (!xen_domain()) | 528 | if (!xen_domain()) |
482 | return -ENODEV; | 529 | return -ENODEV; |
483 | 530 | ||
531 | port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL); | ||
532 | if (port_user == NULL) | ||
533 | return -ENOMEM; | ||
534 | |||
484 | spin_lock_init(&port_user_lock); | 535 | spin_lock_init(&port_user_lock); |
485 | memset(port_user, 0, sizeof(port_user)); | ||
486 | 536 | ||
487 | /* Create '/dev/misc/evtchn'. */ | 537 | /* Create '/dev/misc/evtchn'. */ |
488 | err = misc_register(&evtchn_miscdev); | 538 | err = misc_register(&evtchn_miscdev); |
@@ -498,6 +548,9 @@ static int __init evtchn_init(void) | |||
498 | 548 | ||
499 | static void __exit evtchn_cleanup(void) | 549 | static void __exit evtchn_cleanup(void) |
500 | { | 550 | { |
551 | kfree(port_user); | ||
552 | port_user = NULL; | ||
553 | |||
501 | misc_deregister(&evtchn_miscdev); | 554 | misc_deregister(&evtchn_miscdev); |
502 | } | 555 | } |
503 | 556 | ||
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c new file mode 100644 index 000000000000..f6832f46aea4 --- /dev/null +++ b/drivers/xen/gntalloc.c | |||
@@ -0,0 +1,555 @@ | |||
1 | /****************************************************************************** | ||
2 | * gntalloc.c | ||
3 | * | ||
4 | * Device for creating grant references (in user-space) that may be shared | ||
5 | * with other domains. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
15 | */ | ||
16 | |||
17 | /* | ||
18 | * This driver exists to allow userspace programs in Linux to allocate kernel | ||
19 | * memory that will later be shared with another domain. Without this device, | ||
20 | * Linux userspace programs cannot create grant references. | ||
21 | * | ||
22 | * How this stuff works: | ||
23 | * X -> granting a page to Y | ||
24 | * Y -> mapping the grant from X | ||
25 | * | ||
26 | * 1. X uses the gntalloc device to allocate a page of kernel memory, P. | ||
27 | * 2. X creates an entry in the grant table that says domid(Y) can access P. | ||
28 | * This is done without a hypercall unless the grant table needs expansion. | ||
29 | * 3. X gives the grant reference identifier, GREF, to Y. | ||
30 | * 4. Y maps the page, either directly into kernel memory for use in a backend | ||
31 | * driver, or via a the gntdev device to map into the address space of an | ||
32 | * application running in Y. This is the first point at which Xen does any | ||
33 | * tracking of the page. | ||
34 | * 5. A program in X mmap()s a segment of the gntalloc device that corresponds | ||
35 | * to the shared page, and can now communicate with Y over the shared page. | ||
36 | * | ||
37 | * | ||
38 | * NOTE TO USERSPACE LIBRARIES: | ||
39 | * The grant allocation and mmap()ing are, naturally, two separate operations. | ||
40 | * You set up the sharing by calling the create ioctl() and then the mmap(). | ||
41 | * Teardown requires munmap() and either close() or ioctl(). | ||
42 | * | ||
43 | * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant | ||
44 | * reference, this device can be used to consume kernel memory by leaving grant | ||
45 | * references mapped by another domain when an application exits. Therefore, | ||
46 | * there is a global limit on the number of pages that can be allocated. When | ||
47 | * all references to the page are unmapped, it will be freed during the next | ||
48 | * grant operation. | ||
49 | */ | ||
50 | |||
51 | #include <linux/atomic.h> | ||
52 | #include <linux/module.h> | ||
53 | #include <linux/miscdevice.h> | ||
54 | #include <linux/kernel.h> | ||
55 | #include <linux/init.h> | ||
56 | #include <linux/slab.h> | ||
57 | #include <linux/fs.h> | ||
58 | #include <linux/device.h> | ||
59 | #include <linux/mm.h> | ||
60 | #include <linux/uaccess.h> | ||
61 | #include <linux/types.h> | ||
62 | #include <linux/list.h> | ||
63 | #include <linux/highmem.h> | ||
64 | |||
65 | #include <xen/xen.h> | ||
66 | #include <xen/page.h> | ||
67 | #include <xen/grant_table.h> | ||
68 | #include <xen/gntalloc.h> | ||
69 | #include <xen/events.h> | ||
70 | |||
71 | static int limit = 1024; | ||
72 | module_param(limit, int, 0644); | ||
73 | MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by " | ||
74 | "the gntalloc device"); | ||
75 | |||
76 | static LIST_HEAD(gref_list); | ||
77 | static DEFINE_SPINLOCK(gref_lock); | ||
78 | static int gref_size; | ||
79 | |||
80 | struct notify_info { | ||
81 | uint16_t pgoff:12; /* Bits 0-11: Offset of the byte to clear */ | ||
82 | uint16_t flags:2; /* Bits 12-13: Unmap notification flags */ | ||
83 | int event; /* Port (event channel) to notify */ | ||
84 | }; | ||
85 | |||
86 | /* Metadata on a grant reference. */ | ||
87 | struct gntalloc_gref { | ||
88 | struct list_head next_gref; /* list entry gref_list */ | ||
89 | struct list_head next_file; /* list entry file->list, if open */ | ||
90 | struct page *page; /* The shared page */ | ||
91 | uint64_t file_index; /* File offset for mmap() */ | ||
92 | unsigned int users; /* Use count - when zero, waiting on Xen */ | ||
93 | grant_ref_t gref_id; /* The grant reference number */ | ||
94 | struct notify_info notify; /* Unmap notification */ | ||
95 | }; | ||
96 | |||
97 | struct gntalloc_file_private_data { | ||
98 | struct list_head list; | ||
99 | uint64_t index; | ||
100 | }; | ||
101 | |||
102 | static void __del_gref(struct gntalloc_gref *gref); | ||
103 | |||
104 | static void do_cleanup(void) | ||
105 | { | ||
106 | struct gntalloc_gref *gref, *n; | ||
107 | list_for_each_entry_safe(gref, n, &gref_list, next_gref) { | ||
108 | if (!gref->users) | ||
109 | __del_gref(gref); | ||
110 | } | ||
111 | } | ||
112 | |||
113 | static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, | ||
114 | uint32_t *gref_ids, struct gntalloc_file_private_data *priv) | ||
115 | { | ||
116 | int i, rc, readonly; | ||
117 | LIST_HEAD(queue_gref); | ||
118 | LIST_HEAD(queue_file); | ||
119 | struct gntalloc_gref *gref; | ||
120 | |||
121 | readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE); | ||
122 | rc = -ENOMEM; | ||
123 | for (i = 0; i < op->count; i++) { | ||
124 | gref = kzalloc(sizeof(*gref), GFP_KERNEL); | ||
125 | if (!gref) | ||
126 | goto undo; | ||
127 | list_add_tail(&gref->next_gref, &queue_gref); | ||
128 | list_add_tail(&gref->next_file, &queue_file); | ||
129 | gref->users = 1; | ||
130 | gref->file_index = op->index + i * PAGE_SIZE; | ||
131 | gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO); | ||
132 | if (!gref->page) | ||
133 | goto undo; | ||
134 | |||
135 | /* Grant foreign access to the page. */ | ||
136 | gref->gref_id = gnttab_grant_foreign_access(op->domid, | ||
137 | pfn_to_mfn(page_to_pfn(gref->page)), readonly); | ||
138 | if (gref->gref_id < 0) { | ||
139 | rc = gref->gref_id; | ||
140 | goto undo; | ||
141 | } | ||
142 | gref_ids[i] = gref->gref_id; | ||
143 | } | ||
144 | |||
145 | /* Add to gref lists. */ | ||
146 | spin_lock(&gref_lock); | ||
147 | list_splice_tail(&queue_gref, &gref_list); | ||
148 | list_splice_tail(&queue_file, &priv->list); | ||
149 | spin_unlock(&gref_lock); | ||
150 | |||
151 | return 0; | ||
152 | |||
153 | undo: | ||
154 | spin_lock(&gref_lock); | ||
155 | gref_size -= (op->count - i); | ||
156 | |||
157 | list_for_each_entry(gref, &queue_file, next_file) { | ||
158 | /* __del_gref does not remove from queue_file */ | ||
159 | __del_gref(gref); | ||
160 | } | ||
161 | |||
162 | /* It's possible for the target domain to map the just-allocated grant | ||
163 | * references by blindly guessing their IDs; if this is done, then | ||
164 | * __del_gref will leave them in the queue_gref list. They need to be | ||
165 | * added to the global list so that we can free them when they are no | ||
166 | * longer referenced. | ||
167 | */ | ||
168 | if (unlikely(!list_empty(&queue_gref))) | ||
169 | list_splice_tail(&queue_gref, &gref_list); | ||
170 | spin_unlock(&gref_lock); | ||
171 | return rc; | ||
172 | } | ||
173 | |||
174 | static void __del_gref(struct gntalloc_gref *gref) | ||
175 | { | ||
176 | if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { | ||
177 | uint8_t *tmp = kmap(gref->page); | ||
178 | tmp[gref->notify.pgoff] = 0; | ||
179 | kunmap(gref->page); | ||
180 | } | ||
181 | if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) | ||
182 | notify_remote_via_evtchn(gref->notify.event); | ||
183 | |||
184 | gref->notify.flags = 0; | ||
185 | |||
186 | if (gref->gref_id > 0) { | ||
187 | if (gnttab_query_foreign_access(gref->gref_id)) | ||
188 | return; | ||
189 | |||
190 | if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) | ||
191 | return; | ||
192 | } | ||
193 | |||
194 | gref_size--; | ||
195 | list_del(&gref->next_gref); | ||
196 | |||
197 | if (gref->page) | ||
198 | __free_page(gref->page); | ||
199 | |||
200 | kfree(gref); | ||
201 | } | ||
202 | |||
203 | /* finds contiguous grant references in a file, returns the first */ | ||
204 | static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv, | ||
205 | uint64_t index, uint32_t count) | ||
206 | { | ||
207 | struct gntalloc_gref *rv = NULL, *gref; | ||
208 | list_for_each_entry(gref, &priv->list, next_file) { | ||
209 | if (gref->file_index == index && !rv) | ||
210 | rv = gref; | ||
211 | if (rv) { | ||
212 | if (gref->file_index != index) | ||
213 | return NULL; | ||
214 | index += PAGE_SIZE; | ||
215 | count--; | ||
216 | if (count == 0) | ||
217 | return rv; | ||
218 | } | ||
219 | } | ||
220 | return NULL; | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * ------------------------------------- | ||
225 | * File operations. | ||
226 | * ------------------------------------- | ||
227 | */ | ||
228 | static int gntalloc_open(struct inode *inode, struct file *filp) | ||
229 | { | ||
230 | struct gntalloc_file_private_data *priv; | ||
231 | |||
232 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
233 | if (!priv) | ||
234 | goto out_nomem; | ||
235 | INIT_LIST_HEAD(&priv->list); | ||
236 | |||
237 | filp->private_data = priv; | ||
238 | |||
239 | pr_debug("%s: priv %p\n", __func__, priv); | ||
240 | |||
241 | return 0; | ||
242 | |||
243 | out_nomem: | ||
244 | return -ENOMEM; | ||
245 | } | ||
246 | |||
247 | static int gntalloc_release(struct inode *inode, struct file *filp) | ||
248 | { | ||
249 | struct gntalloc_file_private_data *priv = filp->private_data; | ||
250 | struct gntalloc_gref *gref; | ||
251 | |||
252 | pr_debug("%s: priv %p\n", __func__, priv); | ||
253 | |||
254 | spin_lock(&gref_lock); | ||
255 | while (!list_empty(&priv->list)) { | ||
256 | gref = list_entry(priv->list.next, | ||
257 | struct gntalloc_gref, next_file); | ||
258 | list_del(&gref->next_file); | ||
259 | gref->users--; | ||
260 | if (gref->users == 0) | ||
261 | __del_gref(gref); | ||
262 | } | ||
263 | kfree(priv); | ||
264 | spin_unlock(&gref_lock); | ||
265 | |||
266 | return 0; | ||
267 | } | ||
268 | |||
269 | static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, | ||
270 | struct ioctl_gntalloc_alloc_gref __user *arg) | ||
271 | { | ||
272 | int rc = 0; | ||
273 | struct ioctl_gntalloc_alloc_gref op; | ||
274 | uint32_t *gref_ids; | ||
275 | |||
276 | pr_debug("%s: priv %p\n", __func__, priv); | ||
277 | |||
278 | if (copy_from_user(&op, arg, sizeof(op))) { | ||
279 | rc = -EFAULT; | ||
280 | goto out; | ||
281 | } | ||
282 | |||
283 | gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY); | ||
284 | if (!gref_ids) { | ||
285 | rc = -ENOMEM; | ||
286 | goto out; | ||
287 | } | ||
288 | |||
289 | spin_lock(&gref_lock); | ||
290 | /* Clean up pages that were at zero (local) users but were still mapped | ||
291 | * by remote domains. Since those pages count towards the limit that we | ||
292 | * are about to enforce, removing them here is a good idea. | ||
293 | */ | ||
294 | do_cleanup(); | ||
295 | if (gref_size + op.count > limit) { | ||
296 | spin_unlock(&gref_lock); | ||
297 | rc = -ENOSPC; | ||
298 | goto out_free; | ||
299 | } | ||
300 | gref_size += op.count; | ||
301 | op.index = priv->index; | ||
302 | priv->index += op.count * PAGE_SIZE; | ||
303 | spin_unlock(&gref_lock); | ||
304 | |||
305 | rc = add_grefs(&op, gref_ids, priv); | ||
306 | if (rc < 0) | ||
307 | goto out_free; | ||
308 | |||
309 | /* Once we finish add_grefs, it is unsafe to touch the new reference, | ||
310 | * since it is possible for a concurrent ioctl to remove it (by guessing | ||
311 | * its index). If the userspace application doesn't provide valid memory | ||
312 | * to write the IDs to, then it will need to close the file in order to | ||
313 | * release - which it will do by segfaulting when it tries to access the | ||
314 | * IDs to close them. | ||
315 | */ | ||
316 | if (copy_to_user(arg, &op, sizeof(op))) { | ||
317 | rc = -EFAULT; | ||
318 | goto out_free; | ||
319 | } | ||
320 | if (copy_to_user(arg->gref_ids, gref_ids, | ||
321 | sizeof(gref_ids[0]) * op.count)) { | ||
322 | rc = -EFAULT; | ||
323 | goto out_free; | ||
324 | } | ||
325 | |||
326 | out_free: | ||
327 | kfree(gref_ids); | ||
328 | out: | ||
329 | return rc; | ||
330 | } | ||
331 | |||
332 | static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, | ||
333 | void __user *arg) | ||
334 | { | ||
335 | int i, rc = 0; | ||
336 | struct ioctl_gntalloc_dealloc_gref op; | ||
337 | struct gntalloc_gref *gref, *n; | ||
338 | |||
339 | pr_debug("%s: priv %p\n", __func__, priv); | ||
340 | |||
341 | if (copy_from_user(&op, arg, sizeof(op))) { | ||
342 | rc = -EFAULT; | ||
343 | goto dealloc_grant_out; | ||
344 | } | ||
345 | |||
346 | spin_lock(&gref_lock); | ||
347 | gref = find_grefs(priv, op.index, op.count); | ||
348 | if (gref) { | ||
349 | /* Remove from the file list only, and decrease reference count. | ||
350 | * The later call to do_cleanup() will remove from gref_list and | ||
351 | * free the memory if the pages aren't mapped anywhere. | ||
352 | */ | ||
353 | for (i = 0; i < op.count; i++) { | ||
354 | n = list_entry(gref->next_file.next, | ||
355 | struct gntalloc_gref, next_file); | ||
356 | list_del(&gref->next_file); | ||
357 | gref->users--; | ||
358 | gref = n; | ||
359 | } | ||
360 | } else { | ||
361 | rc = -EINVAL; | ||
362 | } | ||
363 | |||
364 | do_cleanup(); | ||
365 | |||
366 | spin_unlock(&gref_lock); | ||
367 | dealloc_grant_out: | ||
368 | return rc; | ||
369 | } | ||
370 | |||
371 | static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv, | ||
372 | void __user *arg) | ||
373 | { | ||
374 | struct ioctl_gntalloc_unmap_notify op; | ||
375 | struct gntalloc_gref *gref; | ||
376 | uint64_t index; | ||
377 | int pgoff; | ||
378 | int rc; | ||
379 | |||
380 | if (copy_from_user(&op, arg, sizeof(op))) | ||
381 | return -EFAULT; | ||
382 | |||
383 | index = op.index & ~(PAGE_SIZE - 1); | ||
384 | pgoff = op.index & (PAGE_SIZE - 1); | ||
385 | |||
386 | spin_lock(&gref_lock); | ||
387 | |||
388 | gref = find_grefs(priv, index, 1); | ||
389 | if (!gref) { | ||
390 | rc = -ENOENT; | ||
391 | goto unlock_out; | ||
392 | } | ||
393 | |||
394 | if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) { | ||
395 | rc = -EINVAL; | ||
396 | goto unlock_out; | ||
397 | } | ||
398 | |||
399 | gref->notify.flags = op.action; | ||
400 | gref->notify.pgoff = pgoff; | ||
401 | gref->notify.event = op.event_channel_port; | ||
402 | rc = 0; | ||
403 | unlock_out: | ||
404 | spin_unlock(&gref_lock); | ||
405 | return rc; | ||
406 | } | ||
407 | |||
408 | static long gntalloc_ioctl(struct file *filp, unsigned int cmd, | ||
409 | unsigned long arg) | ||
410 | { | ||
411 | struct gntalloc_file_private_data *priv = filp->private_data; | ||
412 | |||
413 | switch (cmd) { | ||
414 | case IOCTL_GNTALLOC_ALLOC_GREF: | ||
415 | return gntalloc_ioctl_alloc(priv, (void __user *)arg); | ||
416 | |||
417 | case IOCTL_GNTALLOC_DEALLOC_GREF: | ||
418 | return gntalloc_ioctl_dealloc(priv, (void __user *)arg); | ||
419 | |||
420 | case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY: | ||
421 | return gntalloc_ioctl_unmap_notify(priv, (void __user *)arg); | ||
422 | |||
423 | default: | ||
424 | return -ENOIOCTLCMD; | ||
425 | } | ||
426 | |||
427 | return 0; | ||
428 | } | ||
429 | |||
430 | static void gntalloc_vma_open(struct vm_area_struct *vma) | ||
431 | { | ||
432 | struct gntalloc_gref *gref = vma->vm_private_data; | ||
433 | if (!gref) | ||
434 | return; | ||
435 | |||
436 | spin_lock(&gref_lock); | ||
437 | gref->users++; | ||
438 | spin_unlock(&gref_lock); | ||
439 | } | ||
440 | |||
441 | static void gntalloc_vma_close(struct vm_area_struct *vma) | ||
442 | { | ||
443 | struct gntalloc_gref *gref = vma->vm_private_data; | ||
444 | if (!gref) | ||
445 | return; | ||
446 | |||
447 | spin_lock(&gref_lock); | ||
448 | gref->users--; | ||
449 | if (gref->users == 0) | ||
450 | __del_gref(gref); | ||
451 | spin_unlock(&gref_lock); | ||
452 | } | ||
453 | |||
454 | static struct vm_operations_struct gntalloc_vmops = { | ||
455 | .open = gntalloc_vma_open, | ||
456 | .close = gntalloc_vma_close, | ||
457 | }; | ||
458 | |||
459 | static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) | ||
460 | { | ||
461 | struct gntalloc_file_private_data *priv = filp->private_data; | ||
462 | struct gntalloc_gref *gref; | ||
463 | int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | ||
464 | int rv, i; | ||
465 | |||
466 | pr_debug("%s: priv %p, page %lu+%d\n", __func__, | ||
467 | priv, vma->vm_pgoff, count); | ||
468 | |||
469 | if (!(vma->vm_flags & VM_SHARED)) { | ||
470 | printk(KERN_ERR "%s: Mapping must be shared.\n", __func__); | ||
471 | return -EINVAL; | ||
472 | } | ||
473 | |||
474 | spin_lock(&gref_lock); | ||
475 | gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count); | ||
476 | if (gref == NULL) { | ||
477 | rv = -ENOENT; | ||
478 | pr_debug("%s: Could not find grant reference", | ||
479 | __func__); | ||
480 | goto out_unlock; | ||
481 | } | ||
482 | |||
483 | vma->vm_private_data = gref; | ||
484 | |||
485 | vma->vm_flags |= VM_RESERVED; | ||
486 | |||
487 | vma->vm_ops = &gntalloc_vmops; | ||
488 | |||
489 | for (i = 0; i < count; i++) { | ||
490 | gref->users++; | ||
491 | rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE, | ||
492 | gref->page); | ||
493 | if (rv) | ||
494 | goto out_unlock; | ||
495 | |||
496 | gref = list_entry(gref->next_file.next, | ||
497 | struct gntalloc_gref, next_file); | ||
498 | } | ||
499 | rv = 0; | ||
500 | |||
501 | out_unlock: | ||
502 | spin_unlock(&gref_lock); | ||
503 | return rv; | ||
504 | } | ||
505 | |||
506 | static const struct file_operations gntalloc_fops = { | ||
507 | .owner = THIS_MODULE, | ||
508 | .open = gntalloc_open, | ||
509 | .release = gntalloc_release, | ||
510 | .unlocked_ioctl = gntalloc_ioctl, | ||
511 | .mmap = gntalloc_mmap | ||
512 | }; | ||
513 | |||
514 | /* | ||
515 | * ------------------------------------- | ||
516 | * Module creation/destruction. | ||
517 | * ------------------------------------- | ||
518 | */ | ||
519 | static struct miscdevice gntalloc_miscdev = { | ||
520 | .minor = MISC_DYNAMIC_MINOR, | ||
521 | .name = "xen/gntalloc", | ||
522 | .fops = &gntalloc_fops, | ||
523 | }; | ||
524 | |||
525 | static int __init gntalloc_init(void) | ||
526 | { | ||
527 | int err; | ||
528 | |||
529 | if (!xen_domain()) | ||
530 | return -ENODEV; | ||
531 | |||
532 | err = misc_register(&gntalloc_miscdev); | ||
533 | if (err != 0) { | ||
534 | printk(KERN_ERR "Could not register misc gntalloc device\n"); | ||
535 | return err; | ||
536 | } | ||
537 | |||
538 | pr_debug("Created grant allocation device at %d,%d\n", | ||
539 | MISC_MAJOR, gntalloc_miscdev.minor); | ||
540 | |||
541 | return 0; | ||
542 | } | ||
543 | |||
544 | static void __exit gntalloc_exit(void) | ||
545 | { | ||
546 | misc_deregister(&gntalloc_miscdev); | ||
547 | } | ||
548 | |||
549 | module_init(gntalloc_init); | ||
550 | module_exit(gntalloc_exit); | ||
551 | |||
552 | MODULE_LICENSE("GPL"); | ||
553 | MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, " | ||
554 | "Daniel De Graaf <dgdegra@tycho.nsa.gov>"); | ||
555 | MODULE_DESCRIPTION("User-space grant reference allocator driver"); | ||
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c new file mode 100644 index 000000000000..f914b26cf0c2 --- /dev/null +++ b/drivers/xen/gntdev.c | |||
@@ -0,0 +1,765 @@ | |||
1 | /****************************************************************************** | ||
2 | * gntdev.c | ||
3 | * | ||
4 | * Device for accessing (in user-space) pages that have been granted by other | ||
5 | * domains. | ||
6 | * | ||
7 | * Copyright (c) 2006-2007, D G Murray. | ||
8 | * (c) 2009 Gerd Hoffmann <kraxel@redhat.com> | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | |||
20 | #undef DEBUG | ||
21 | |||
22 | #include <linux/module.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/init.h> | ||
25 | #include <linux/miscdevice.h> | ||
26 | #include <linux/fs.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/mman.h> | ||
29 | #include <linux/mmu_notifier.h> | ||
30 | #include <linux/types.h> | ||
31 | #include <linux/uaccess.h> | ||
32 | #include <linux/sched.h> | ||
33 | #include <linux/spinlock.h> | ||
34 | #include <linux/slab.h> | ||
35 | #include <linux/highmem.h> | ||
36 | |||
37 | #include <xen/xen.h> | ||
38 | #include <xen/grant_table.h> | ||
39 | #include <xen/balloon.h> | ||
40 | #include <xen/gntdev.h> | ||
41 | #include <xen/events.h> | ||
42 | #include <asm/xen/hypervisor.h> | ||
43 | #include <asm/xen/hypercall.h> | ||
44 | #include <asm/xen/page.h> | ||
45 | |||
46 | MODULE_LICENSE("GPL"); | ||
47 | MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, " | ||
48 | "Gerd Hoffmann <kraxel@redhat.com>"); | ||
49 | MODULE_DESCRIPTION("User-space granted page access driver"); | ||
50 | |||
51 | static int limit = 1024*1024; | ||
52 | module_param(limit, int, 0644); | ||
53 | MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by " | ||
54 | "the gntdev device"); | ||
55 | |||
56 | static atomic_t pages_mapped = ATOMIC_INIT(0); | ||
57 | |||
58 | static int use_ptemod; | ||
59 | |||
60 | struct gntdev_priv { | ||
61 | struct list_head maps; | ||
62 | /* lock protects maps from concurrent changes */ | ||
63 | spinlock_t lock; | ||
64 | struct mm_struct *mm; | ||
65 | struct mmu_notifier mn; | ||
66 | }; | ||
67 | |||
68 | struct unmap_notify { | ||
69 | int flags; | ||
70 | /* Address relative to the start of the grant_map */ | ||
71 | int addr; | ||
72 | int event; | ||
73 | }; | ||
74 | |||
75 | struct grant_map { | ||
76 | struct list_head next; | ||
77 | struct vm_area_struct *vma; | ||
78 | int index; | ||
79 | int count; | ||
80 | int flags; | ||
81 | atomic_t users; | ||
82 | struct unmap_notify notify; | ||
83 | struct ioctl_gntdev_grant_ref *grants; | ||
84 | struct gnttab_map_grant_ref *map_ops; | ||
85 | struct gnttab_unmap_grant_ref *unmap_ops; | ||
86 | struct page **pages; | ||
87 | }; | ||
88 | |||
89 | static int unmap_grant_pages(struct grant_map *map, int offset, int pages); | ||
90 | |||
91 | /* ------------------------------------------------------------------ */ | ||
92 | |||
93 | static void gntdev_print_maps(struct gntdev_priv *priv, | ||
94 | char *text, int text_index) | ||
95 | { | ||
96 | #ifdef DEBUG | ||
97 | struct grant_map *map; | ||
98 | |||
99 | pr_debug("%s: maps list (priv %p)\n", __func__, priv); | ||
100 | list_for_each_entry(map, &priv->maps, next) | ||
101 | pr_debug(" index %2d, count %2d %s\n", | ||
102 | map->index, map->count, | ||
103 | map->index == text_index && text ? text : ""); | ||
104 | #endif | ||
105 | } | ||
106 | |||
107 | static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) | ||
108 | { | ||
109 | struct grant_map *add; | ||
110 | int i; | ||
111 | |||
112 | add = kzalloc(sizeof(struct grant_map), GFP_KERNEL); | ||
113 | if (NULL == add) | ||
114 | return NULL; | ||
115 | |||
116 | add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL); | ||
117 | add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL); | ||
118 | add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL); | ||
119 | add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL); | ||
120 | if (NULL == add->grants || | ||
121 | NULL == add->map_ops || | ||
122 | NULL == add->unmap_ops || | ||
123 | NULL == add->pages) | ||
124 | goto err; | ||
125 | |||
126 | if (alloc_xenballooned_pages(count, add->pages)) | ||
127 | goto err; | ||
128 | |||
129 | for (i = 0; i < count; i++) { | ||
130 | add->map_ops[i].handle = -1; | ||
131 | add->unmap_ops[i].handle = -1; | ||
132 | } | ||
133 | |||
134 | add->index = 0; | ||
135 | add->count = count; | ||
136 | atomic_set(&add->users, 1); | ||
137 | |||
138 | return add; | ||
139 | |||
140 | err: | ||
141 | kfree(add->pages); | ||
142 | kfree(add->grants); | ||
143 | kfree(add->map_ops); | ||
144 | kfree(add->unmap_ops); | ||
145 | kfree(add); | ||
146 | return NULL; | ||
147 | } | ||
148 | |||
149 | static void gntdev_add_map(struct gntdev_priv *priv, struct grant_map *add) | ||
150 | { | ||
151 | struct grant_map *map; | ||
152 | |||
153 | list_for_each_entry(map, &priv->maps, next) { | ||
154 | if (add->index + add->count < map->index) { | ||
155 | list_add_tail(&add->next, &map->next); | ||
156 | goto done; | ||
157 | } | ||
158 | add->index = map->index + map->count; | ||
159 | } | ||
160 | list_add_tail(&add->next, &priv->maps); | ||
161 | |||
162 | done: | ||
163 | gntdev_print_maps(priv, "[new]", add->index); | ||
164 | } | ||
165 | |||
166 | static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv, | ||
167 | int index, int count) | ||
168 | { | ||
169 | struct grant_map *map; | ||
170 | |||
171 | list_for_each_entry(map, &priv->maps, next) { | ||
172 | if (map->index != index) | ||
173 | continue; | ||
174 | if (count && map->count != count) | ||
175 | continue; | ||
176 | return map; | ||
177 | } | ||
178 | return NULL; | ||
179 | } | ||
180 | |||
181 | static void gntdev_put_map(struct grant_map *map) | ||
182 | { | ||
183 | if (!map) | ||
184 | return; | ||
185 | |||
186 | if (!atomic_dec_and_test(&map->users)) | ||
187 | return; | ||
188 | |||
189 | atomic_sub(map->count, &pages_mapped); | ||
190 | |||
191 | if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { | ||
192 | notify_remote_via_evtchn(map->notify.event); | ||
193 | } | ||
194 | |||
195 | if (map->pages) { | ||
196 | if (!use_ptemod) | ||
197 | unmap_grant_pages(map, 0, map->count); | ||
198 | |||
199 | free_xenballooned_pages(map->count, map->pages); | ||
200 | } | ||
201 | kfree(map->pages); | ||
202 | kfree(map->grants); | ||
203 | kfree(map->map_ops); | ||
204 | kfree(map->unmap_ops); | ||
205 | kfree(map); | ||
206 | } | ||
207 | |||
208 | /* ------------------------------------------------------------------ */ | ||
209 | |||
210 | static int find_grant_ptes(pte_t *pte, pgtable_t token, | ||
211 | unsigned long addr, void *data) | ||
212 | { | ||
213 | struct grant_map *map = data; | ||
214 | unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; | ||
215 | int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte; | ||
216 | u64 pte_maddr; | ||
217 | |||
218 | BUG_ON(pgnr >= map->count); | ||
219 | pte_maddr = arbitrary_virt_to_machine(pte).maddr; | ||
220 | |||
221 | gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags, | ||
222 | map->grants[pgnr].ref, | ||
223 | map->grants[pgnr].domid); | ||
224 | gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags, | ||
225 | -1 /* handle */); | ||
226 | return 0; | ||
227 | } | ||
228 | |||
229 | static int map_grant_pages(struct grant_map *map) | ||
230 | { | ||
231 | int i, err = 0; | ||
232 | |||
233 | if (!use_ptemod) { | ||
234 | /* Note: it could already be mapped */ | ||
235 | if (map->map_ops[0].handle != -1) | ||
236 | return 0; | ||
237 | for (i = 0; i < map->count; i++) { | ||
238 | unsigned long addr = (unsigned long) | ||
239 | pfn_to_kaddr(page_to_pfn(map->pages[i])); | ||
240 | gnttab_set_map_op(&map->map_ops[i], addr, map->flags, | ||
241 | map->grants[i].ref, | ||
242 | map->grants[i].domid); | ||
243 | gnttab_set_unmap_op(&map->unmap_ops[i], addr, | ||
244 | map->flags, -1 /* handle */); | ||
245 | } | ||
246 | } | ||
247 | |||
248 | pr_debug("map %d+%d\n", map->index, map->count); | ||
249 | err = gnttab_map_refs(map->map_ops, map->pages, map->count); | ||
250 | if (err) | ||
251 | return err; | ||
252 | |||
253 | for (i = 0; i < map->count; i++) { | ||
254 | if (map->map_ops[i].status) | ||
255 | err = -EINVAL; | ||
256 | else { | ||
257 | BUG_ON(map->map_ops[i].handle == -1); | ||
258 | map->unmap_ops[i].handle = map->map_ops[i].handle; | ||
259 | pr_debug("map handle=%d\n", map->map_ops[i].handle); | ||
260 | } | ||
261 | } | ||
262 | return err; | ||
263 | } | ||
264 | |||
265 | static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) | ||
266 | { | ||
267 | int i, err = 0; | ||
268 | |||
269 | if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { | ||
270 | int pgno = (map->notify.addr >> PAGE_SHIFT); | ||
271 | if (pgno >= offset && pgno < offset + pages && use_ptemod) { | ||
272 | void __user *tmp = (void __user *) | ||
273 | map->vma->vm_start + map->notify.addr; | ||
274 | err = copy_to_user(tmp, &err, 1); | ||
275 | if (err) | ||
276 | return -EFAULT; | ||
277 | map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; | ||
278 | } else if (pgno >= offset && pgno < offset + pages) { | ||
279 | uint8_t *tmp = kmap(map->pages[pgno]); | ||
280 | tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; | ||
281 | kunmap(map->pages[pgno]); | ||
282 | map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; | ||
283 | } | ||
284 | } | ||
285 | |||
286 | err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages); | ||
287 | if (err) | ||
288 | return err; | ||
289 | |||
290 | for (i = 0; i < pages; i++) { | ||
291 | if (map->unmap_ops[offset+i].status) | ||
292 | err = -EINVAL; | ||
293 | pr_debug("unmap handle=%d st=%d\n", | ||
294 | map->unmap_ops[offset+i].handle, | ||
295 | map->unmap_ops[offset+i].status); | ||
296 | map->unmap_ops[offset+i].handle = -1; | ||
297 | } | ||
298 | return err; | ||
299 | } | ||
300 | |||
301 | static int unmap_grant_pages(struct grant_map *map, int offset, int pages) | ||
302 | { | ||
303 | int range, err = 0; | ||
304 | |||
305 | pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages); | ||
306 | |||
307 | /* It is possible the requested range will have a "hole" where we | ||
308 | * already unmapped some of the grants. Only unmap valid ranges. | ||
309 | */ | ||
310 | while (pages && !err) { | ||
311 | while (pages && map->unmap_ops[offset].handle == -1) { | ||
312 | offset++; | ||
313 | pages--; | ||
314 | } | ||
315 | range = 0; | ||
316 | while (range < pages) { | ||
317 | if (map->unmap_ops[offset+range].handle == -1) { | ||
318 | range--; | ||
319 | break; | ||
320 | } | ||
321 | range++; | ||
322 | } | ||
323 | err = __unmap_grant_pages(map, offset, range); | ||
324 | offset += range; | ||
325 | pages -= range; | ||
326 | } | ||
327 | |||
328 | return err; | ||
329 | } | ||
330 | |||
331 | /* ------------------------------------------------------------------ */ | ||
332 | |||
333 | static void gntdev_vma_open(struct vm_area_struct *vma) | ||
334 | { | ||
335 | struct grant_map *map = vma->vm_private_data; | ||
336 | |||
337 | pr_debug("gntdev_vma_open %p\n", vma); | ||
338 | atomic_inc(&map->users); | ||
339 | } | ||
340 | |||
341 | static void gntdev_vma_close(struct vm_area_struct *vma) | ||
342 | { | ||
343 | struct grant_map *map = vma->vm_private_data; | ||
344 | |||
345 | pr_debug("gntdev_vma_close %p\n", vma); | ||
346 | map->vma = NULL; | ||
347 | vma->vm_private_data = NULL; | ||
348 | gntdev_put_map(map); | ||
349 | } | ||
350 | |||
351 | static struct vm_operations_struct gntdev_vmops = { | ||
352 | .open = gntdev_vma_open, | ||
353 | .close = gntdev_vma_close, | ||
354 | }; | ||
355 | |||
356 | /* ------------------------------------------------------------------ */ | ||
357 | |||
358 | static void mn_invl_range_start(struct mmu_notifier *mn, | ||
359 | struct mm_struct *mm, | ||
360 | unsigned long start, unsigned long end) | ||
361 | { | ||
362 | struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); | ||
363 | struct grant_map *map; | ||
364 | unsigned long mstart, mend; | ||
365 | int err; | ||
366 | |||
367 | spin_lock(&priv->lock); | ||
368 | list_for_each_entry(map, &priv->maps, next) { | ||
369 | if (!map->vma) | ||
370 | continue; | ||
371 | if (map->vma->vm_start >= end) | ||
372 | continue; | ||
373 | if (map->vma->vm_end <= start) | ||
374 | continue; | ||
375 | mstart = max(start, map->vma->vm_start); | ||
376 | mend = min(end, map->vma->vm_end); | ||
377 | pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n", | ||
378 | map->index, map->count, | ||
379 | map->vma->vm_start, map->vma->vm_end, | ||
380 | start, end, mstart, mend); | ||
381 | err = unmap_grant_pages(map, | ||
382 | (mstart - map->vma->vm_start) >> PAGE_SHIFT, | ||
383 | (mend - mstart) >> PAGE_SHIFT); | ||
384 | WARN_ON(err); | ||
385 | } | ||
386 | spin_unlock(&priv->lock); | ||
387 | } | ||
388 | |||
389 | static void mn_invl_page(struct mmu_notifier *mn, | ||
390 | struct mm_struct *mm, | ||
391 | unsigned long address) | ||
392 | { | ||
393 | mn_invl_range_start(mn, mm, address, address + PAGE_SIZE); | ||
394 | } | ||
395 | |||
396 | static void mn_release(struct mmu_notifier *mn, | ||
397 | struct mm_struct *mm) | ||
398 | { | ||
399 | struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); | ||
400 | struct grant_map *map; | ||
401 | int err; | ||
402 | |||
403 | spin_lock(&priv->lock); | ||
404 | list_for_each_entry(map, &priv->maps, next) { | ||
405 | if (!map->vma) | ||
406 | continue; | ||
407 | pr_debug("map %d+%d (%lx %lx)\n", | ||
408 | map->index, map->count, | ||
409 | map->vma->vm_start, map->vma->vm_end); | ||
410 | err = unmap_grant_pages(map, /* offset */ 0, map->count); | ||
411 | WARN_ON(err); | ||
412 | } | ||
413 | spin_unlock(&priv->lock); | ||
414 | } | ||
415 | |||
416 | struct mmu_notifier_ops gntdev_mmu_ops = { | ||
417 | .release = mn_release, | ||
418 | .invalidate_page = mn_invl_page, | ||
419 | .invalidate_range_start = mn_invl_range_start, | ||
420 | }; | ||
421 | |||
422 | /* ------------------------------------------------------------------ */ | ||
423 | |||
424 | static int gntdev_open(struct inode *inode, struct file *flip) | ||
425 | { | ||
426 | struct gntdev_priv *priv; | ||
427 | int ret = 0; | ||
428 | |||
429 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
430 | if (!priv) | ||
431 | return -ENOMEM; | ||
432 | |||
433 | INIT_LIST_HEAD(&priv->maps); | ||
434 | spin_lock_init(&priv->lock); | ||
435 | |||
436 | if (use_ptemod) { | ||
437 | priv->mm = get_task_mm(current); | ||
438 | if (!priv->mm) { | ||
439 | kfree(priv); | ||
440 | return -ENOMEM; | ||
441 | } | ||
442 | priv->mn.ops = &gntdev_mmu_ops; | ||
443 | ret = mmu_notifier_register(&priv->mn, priv->mm); | ||
444 | mmput(priv->mm); | ||
445 | } | ||
446 | |||
447 | if (ret) { | ||
448 | kfree(priv); | ||
449 | return ret; | ||
450 | } | ||
451 | |||
452 | flip->private_data = priv; | ||
453 | pr_debug("priv %p\n", priv); | ||
454 | |||
455 | return 0; | ||
456 | } | ||
457 | |||
458 | static int gntdev_release(struct inode *inode, struct file *flip) | ||
459 | { | ||
460 | struct gntdev_priv *priv = flip->private_data; | ||
461 | struct grant_map *map; | ||
462 | |||
463 | pr_debug("priv %p\n", priv); | ||
464 | |||
465 | spin_lock(&priv->lock); | ||
466 | while (!list_empty(&priv->maps)) { | ||
467 | map = list_entry(priv->maps.next, struct grant_map, next); | ||
468 | list_del(&map->next); | ||
469 | gntdev_put_map(map); | ||
470 | } | ||
471 | spin_unlock(&priv->lock); | ||
472 | |||
473 | if (use_ptemod) | ||
474 | mmu_notifier_unregister(&priv->mn, priv->mm); | ||
475 | kfree(priv); | ||
476 | return 0; | ||
477 | } | ||
478 | |||
479 | static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, | ||
480 | struct ioctl_gntdev_map_grant_ref __user *u) | ||
481 | { | ||
482 | struct ioctl_gntdev_map_grant_ref op; | ||
483 | struct grant_map *map; | ||
484 | int err; | ||
485 | |||
486 | if (copy_from_user(&op, u, sizeof(op)) != 0) | ||
487 | return -EFAULT; | ||
488 | pr_debug("priv %p, add %d\n", priv, op.count); | ||
489 | if (unlikely(op.count <= 0)) | ||
490 | return -EINVAL; | ||
491 | |||
492 | err = -ENOMEM; | ||
493 | map = gntdev_alloc_map(priv, op.count); | ||
494 | if (!map) | ||
495 | return err; | ||
496 | |||
497 | if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) { | ||
498 | pr_debug("can't map: over limit\n"); | ||
499 | gntdev_put_map(map); | ||
500 | return err; | ||
501 | } | ||
502 | |||
503 | if (copy_from_user(map->grants, &u->refs, | ||
504 | sizeof(map->grants[0]) * op.count) != 0) { | ||
505 | gntdev_put_map(map); | ||
506 | return err; | ||
507 | } | ||
508 | |||
509 | spin_lock(&priv->lock); | ||
510 | gntdev_add_map(priv, map); | ||
511 | op.index = map->index << PAGE_SHIFT; | ||
512 | spin_unlock(&priv->lock); | ||
513 | |||
514 | if (copy_to_user(u, &op, sizeof(op)) != 0) | ||
515 | return -EFAULT; | ||
516 | |||
517 | return 0; | ||
518 | } | ||
519 | |||
520 | static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, | ||
521 | struct ioctl_gntdev_unmap_grant_ref __user *u) | ||
522 | { | ||
523 | struct ioctl_gntdev_unmap_grant_ref op; | ||
524 | struct grant_map *map; | ||
525 | int err = -ENOENT; | ||
526 | |||
527 | if (copy_from_user(&op, u, sizeof(op)) != 0) | ||
528 | return -EFAULT; | ||
529 | pr_debug("priv %p, del %d+%d\n", priv, (int)op.index, (int)op.count); | ||
530 | |||
531 | spin_lock(&priv->lock); | ||
532 | map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); | ||
533 | if (map) { | ||
534 | list_del(&map->next); | ||
535 | gntdev_put_map(map); | ||
536 | err = 0; | ||
537 | } | ||
538 | spin_unlock(&priv->lock); | ||
539 | return err; | ||
540 | } | ||
541 | |||
542 | static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv, | ||
543 | struct ioctl_gntdev_get_offset_for_vaddr __user *u) | ||
544 | { | ||
545 | struct ioctl_gntdev_get_offset_for_vaddr op; | ||
546 | struct vm_area_struct *vma; | ||
547 | struct grant_map *map; | ||
548 | |||
549 | if (copy_from_user(&op, u, sizeof(op)) != 0) | ||
550 | return -EFAULT; | ||
551 | pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr); | ||
552 | |||
553 | vma = find_vma(current->mm, op.vaddr); | ||
554 | if (!vma || vma->vm_ops != &gntdev_vmops) | ||
555 | return -EINVAL; | ||
556 | |||
557 | map = vma->vm_private_data; | ||
558 | if (!map) | ||
559 | return -EINVAL; | ||
560 | |||
561 | op.offset = map->index << PAGE_SHIFT; | ||
562 | op.count = map->count; | ||
563 | |||
564 | if (copy_to_user(u, &op, sizeof(op)) != 0) | ||
565 | return -EFAULT; | ||
566 | return 0; | ||
567 | } | ||
568 | |||
569 | static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) | ||
570 | { | ||
571 | struct ioctl_gntdev_unmap_notify op; | ||
572 | struct grant_map *map; | ||
573 | int rc; | ||
574 | |||
575 | if (copy_from_user(&op, u, sizeof(op))) | ||
576 | return -EFAULT; | ||
577 | |||
578 | if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) | ||
579 | return -EINVAL; | ||
580 | |||
581 | spin_lock(&priv->lock); | ||
582 | |||
583 | list_for_each_entry(map, &priv->maps, next) { | ||
584 | uint64_t begin = map->index << PAGE_SHIFT; | ||
585 | uint64_t end = (map->index + map->count) << PAGE_SHIFT; | ||
586 | if (op.index >= begin && op.index < end) | ||
587 | goto found; | ||
588 | } | ||
589 | rc = -ENOENT; | ||
590 | goto unlock_out; | ||
591 | |||
592 | found: | ||
593 | if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) && | ||
594 | (map->flags & GNTMAP_readonly)) { | ||
595 | rc = -EINVAL; | ||
596 | goto unlock_out; | ||
597 | } | ||
598 | |||
599 | map->notify.flags = op.action; | ||
600 | map->notify.addr = op.index - (map->index << PAGE_SHIFT); | ||
601 | map->notify.event = op.event_channel_port; | ||
602 | rc = 0; | ||
603 | unlock_out: | ||
604 | spin_unlock(&priv->lock); | ||
605 | return rc; | ||
606 | } | ||
607 | |||
608 | static long gntdev_ioctl(struct file *flip, | ||
609 | unsigned int cmd, unsigned long arg) | ||
610 | { | ||
611 | struct gntdev_priv *priv = flip->private_data; | ||
612 | void __user *ptr = (void __user *)arg; | ||
613 | |||
614 | switch (cmd) { | ||
615 | case IOCTL_GNTDEV_MAP_GRANT_REF: | ||
616 | return gntdev_ioctl_map_grant_ref(priv, ptr); | ||
617 | |||
618 | case IOCTL_GNTDEV_UNMAP_GRANT_REF: | ||
619 | return gntdev_ioctl_unmap_grant_ref(priv, ptr); | ||
620 | |||
621 | case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: | ||
622 | return gntdev_ioctl_get_offset_for_vaddr(priv, ptr); | ||
623 | |||
624 | case IOCTL_GNTDEV_SET_UNMAP_NOTIFY: | ||
625 | return gntdev_ioctl_notify(priv, ptr); | ||
626 | |||
627 | default: | ||
628 | pr_debug("priv %p, unknown cmd %x\n", priv, cmd); | ||
629 | return -ENOIOCTLCMD; | ||
630 | } | ||
631 | |||
632 | return 0; | ||
633 | } | ||
634 | |||
635 | static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) | ||
636 | { | ||
637 | struct gntdev_priv *priv = flip->private_data; | ||
638 | int index = vma->vm_pgoff; | ||
639 | int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | ||
640 | struct grant_map *map; | ||
641 | int i, err = -EINVAL; | ||
642 | |||
643 | if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) | ||
644 | return -EINVAL; | ||
645 | |||
646 | pr_debug("map %d+%d at %lx (pgoff %lx)\n", | ||
647 | index, count, vma->vm_start, vma->vm_pgoff); | ||
648 | |||
649 | spin_lock(&priv->lock); | ||
650 | map = gntdev_find_map_index(priv, index, count); | ||
651 | if (!map) | ||
652 | goto unlock_out; | ||
653 | if (use_ptemod && map->vma) | ||
654 | goto unlock_out; | ||
655 | if (use_ptemod && priv->mm != vma->vm_mm) { | ||
656 | printk(KERN_WARNING "Huh? Other mm?\n"); | ||
657 | goto unlock_out; | ||
658 | } | ||
659 | |||
660 | atomic_inc(&map->users); | ||
661 | |||
662 | vma->vm_ops = &gntdev_vmops; | ||
663 | |||
664 | vma->vm_flags |= VM_RESERVED|VM_DONTEXPAND; | ||
665 | |||
666 | if (use_ptemod) | ||
667 | vma->vm_flags |= VM_DONTCOPY|VM_PFNMAP; | ||
668 | |||
669 | vma->vm_private_data = map; | ||
670 | |||
671 | if (use_ptemod) | ||
672 | map->vma = vma; | ||
673 | |||
674 | if (map->flags) { | ||
675 | if ((vma->vm_flags & VM_WRITE) && | ||
676 | (map->flags & GNTMAP_readonly)) | ||
677 | goto out_unlock_put; | ||
678 | } else { | ||
679 | map->flags = GNTMAP_host_map; | ||
680 | if (!(vma->vm_flags & VM_WRITE)) | ||
681 | map->flags |= GNTMAP_readonly; | ||
682 | } | ||
683 | |||
684 | spin_unlock(&priv->lock); | ||
685 | |||
686 | if (use_ptemod) { | ||
687 | err = apply_to_page_range(vma->vm_mm, vma->vm_start, | ||
688 | vma->vm_end - vma->vm_start, | ||
689 | find_grant_ptes, map); | ||
690 | if (err) { | ||
691 | printk(KERN_WARNING "find_grant_ptes() failure.\n"); | ||
692 | goto out_put_map; | ||
693 | } | ||
694 | } | ||
695 | |||
696 | err = map_grant_pages(map); | ||
697 | if (err) | ||
698 | goto out_put_map; | ||
699 | |||
700 | if (!use_ptemod) { | ||
701 | for (i = 0; i < count; i++) { | ||
702 | err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE, | ||
703 | map->pages[i]); | ||
704 | if (err) | ||
705 | goto out_put_map; | ||
706 | } | ||
707 | } | ||
708 | |||
709 | return 0; | ||
710 | |||
711 | unlock_out: | ||
712 | spin_unlock(&priv->lock); | ||
713 | return err; | ||
714 | |||
715 | out_unlock_put: | ||
716 | spin_unlock(&priv->lock); | ||
717 | out_put_map: | ||
718 | if (use_ptemod) | ||
719 | map->vma = NULL; | ||
720 | gntdev_put_map(map); | ||
721 | return err; | ||
722 | } | ||
723 | |||
724 | static const struct file_operations gntdev_fops = { | ||
725 | .owner = THIS_MODULE, | ||
726 | .open = gntdev_open, | ||
727 | .release = gntdev_release, | ||
728 | .mmap = gntdev_mmap, | ||
729 | .unlocked_ioctl = gntdev_ioctl | ||
730 | }; | ||
731 | |||
732 | static struct miscdevice gntdev_miscdev = { | ||
733 | .minor = MISC_DYNAMIC_MINOR, | ||
734 | .name = "xen/gntdev", | ||
735 | .fops = &gntdev_fops, | ||
736 | }; | ||
737 | |||
738 | /* ------------------------------------------------------------------ */ | ||
739 | |||
740 | static int __init gntdev_init(void) | ||
741 | { | ||
742 | int err; | ||
743 | |||
744 | if (!xen_domain()) | ||
745 | return -ENODEV; | ||
746 | |||
747 | use_ptemod = xen_pv_domain(); | ||
748 | |||
749 | err = misc_register(&gntdev_miscdev); | ||
750 | if (err != 0) { | ||
751 | printk(KERN_ERR "Could not register gntdev device\n"); | ||
752 | return err; | ||
753 | } | ||
754 | return 0; | ||
755 | } | ||
756 | |||
757 | static void __exit gntdev_exit(void) | ||
758 | { | ||
759 | misc_deregister(&gntdev_miscdev); | ||
760 | } | ||
761 | |||
762 | module_init(gntdev_init); | ||
763 | module_exit(gntdev_exit); | ||
764 | |||
765 | /* ------------------------------------------------------------------ */ | ||
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 6c4531816496..fd725cde6ad1 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c | |||
@@ -447,6 +447,79 @@ unsigned int gnttab_max_grant_frames(void) | |||
447 | } | 447 | } |
448 | EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); | 448 | EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); |
449 | 449 | ||
450 | int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, | ||
451 | struct page **pages, unsigned int count) | ||
452 | { | ||
453 | int i, ret; | ||
454 | pte_t *pte; | ||
455 | unsigned long mfn; | ||
456 | |||
457 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count); | ||
458 | if (ret) | ||
459 | return ret; | ||
460 | |||
461 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
462 | return ret; | ||
463 | |||
464 | for (i = 0; i < count; i++) { | ||
465 | /* Do not add to override if the map failed. */ | ||
466 | if (map_ops[i].status) | ||
467 | continue; | ||
468 | |||
469 | if (map_ops[i].flags & GNTMAP_contains_pte) { | ||
470 | pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + | ||
471 | (map_ops[i].host_addr & ~PAGE_MASK)); | ||
472 | mfn = pte_mfn(*pte); | ||
473 | } else { | ||
474 | /* If you really wanted to do this: | ||
475 | * mfn = PFN_DOWN(map_ops[i].dev_bus_addr); | ||
476 | * | ||
477 | * The reason we do not implement it is b/c on the | ||
478 | * unmap path (gnttab_unmap_refs) we have no means of | ||
479 | * checking whether the page is !GNTMAP_contains_pte. | ||
480 | * | ||
481 | * That is without some extra data-structure to carry | ||
482 | * the struct page, bool clear_pte, and list_head next | ||
483 | * tuples and deal with allocation/delallocation, etc. | ||
484 | * | ||
485 | * The users of this API set the GNTMAP_contains_pte | ||
486 | * flag so lets just return not supported until it | ||
487 | * becomes neccessary to implement. | ||
488 | */ | ||
489 | return -EOPNOTSUPP; | ||
490 | } | ||
491 | ret = m2p_add_override(mfn, pages[i], | ||
492 | map_ops[i].flags & GNTMAP_contains_pte); | ||
493 | if (ret) | ||
494 | return ret; | ||
495 | } | ||
496 | |||
497 | return ret; | ||
498 | } | ||
499 | EXPORT_SYMBOL_GPL(gnttab_map_refs); | ||
500 | |||
501 | int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, | ||
502 | struct page **pages, unsigned int count) | ||
503 | { | ||
504 | int i, ret; | ||
505 | |||
506 | ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count); | ||
507 | if (ret) | ||
508 | return ret; | ||
509 | |||
510 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
511 | return ret; | ||
512 | |||
513 | for (i = 0; i < count; i++) { | ||
514 | ret = m2p_remove_override(pages[i], true /* clear the PTE */); | ||
515 | if (ret) | ||
516 | return ret; | ||
517 | } | ||
518 | |||
519 | return ret; | ||
520 | } | ||
521 | EXPORT_SYMBOL_GPL(gnttab_unmap_refs); | ||
522 | |||
450 | static int gnttab_map(unsigned int start_idx, unsigned int end_idx) | 523 | static int gnttab_map(unsigned int start_idx, unsigned int end_idx) |
451 | { | 524 | { |
452 | struct gnttab_setup_table setup; | 525 | struct gnttab_setup_table setup; |
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index ef9c7db52077..0b5366b5be20 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/sysrq.h> | 8 | #include <linux/sysrq.h> |
9 | #include <linux/stop_machine.h> | 9 | #include <linux/stop_machine.h> |
10 | #include <linux/freezer.h> | 10 | #include <linux/freezer.h> |
11 | #include <linux/syscore_ops.h> | ||
11 | 12 | ||
12 | #include <xen/xen.h> | 13 | #include <xen/xen.h> |
13 | #include <xen/xenbus.h> | 14 | #include <xen/xenbus.h> |
@@ -34,63 +35,68 @@ enum shutdown_state { | |||
34 | /* Ignore multiple shutdown requests. */ | 35 | /* Ignore multiple shutdown requests. */ |
35 | static enum shutdown_state shutting_down = SHUTDOWN_INVALID; | 36 | static enum shutdown_state shutting_down = SHUTDOWN_INVALID; |
36 | 37 | ||
37 | #ifdef CONFIG_PM_SLEEP | 38 | struct suspend_info { |
38 | static int xen_hvm_suspend(void *data) | 39 | int cancelled; |
39 | { | 40 | unsigned long arg; /* extra hypercall argument */ |
40 | struct sched_shutdown r = { .reason = SHUTDOWN_suspend }; | 41 | void (*pre)(void); |
41 | int *cancelled = data; | 42 | void (*post)(int cancelled); |
42 | 43 | }; | |
43 | BUG_ON(!irqs_disabled()); | ||
44 | |||
45 | *cancelled = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r); | ||
46 | 44 | ||
47 | xen_hvm_post_suspend(*cancelled); | 45 | static void xen_hvm_post_suspend(int cancelled) |
46 | { | ||
47 | xen_arch_hvm_post_suspend(cancelled); | ||
48 | gnttab_resume(); | 48 | gnttab_resume(); |
49 | } | ||
49 | 50 | ||
50 | if (!*cancelled) { | 51 | static void xen_pre_suspend(void) |
51 | xen_irq_resume(); | 52 | { |
52 | xen_timer_resume(); | 53 | xen_mm_pin_all(); |
53 | } | 54 | gnttab_suspend(); |
55 | xen_arch_pre_suspend(); | ||
56 | } | ||
54 | 57 | ||
55 | return 0; | 58 | static void xen_post_suspend(int cancelled) |
59 | { | ||
60 | xen_arch_post_suspend(cancelled); | ||
61 | gnttab_resume(); | ||
62 | xen_mm_unpin_all(); | ||
56 | } | 63 | } |
57 | 64 | ||
65 | #ifdef CONFIG_HIBERNATE_CALLBACKS | ||
58 | static int xen_suspend(void *data) | 66 | static int xen_suspend(void *data) |
59 | { | 67 | { |
68 | struct suspend_info *si = data; | ||
60 | int err; | 69 | int err; |
61 | int *cancelled = data; | ||
62 | 70 | ||
63 | BUG_ON(!irqs_disabled()); | 71 | BUG_ON(!irqs_disabled()); |
64 | 72 | ||
65 | err = sysdev_suspend(PMSG_SUSPEND); | 73 | err = syscore_suspend(); |
66 | if (err) { | 74 | if (err) { |
67 | printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n", | 75 | printk(KERN_ERR "xen_suspend: system core suspend failed: %d\n", |
68 | err); | 76 | err); |
69 | return err; | 77 | return err; |
70 | } | 78 | } |
71 | 79 | ||
72 | xen_mm_pin_all(); | 80 | if (si->pre) |
73 | gnttab_suspend(); | 81 | si->pre(); |
74 | xen_pre_suspend(); | ||
75 | 82 | ||
76 | /* | 83 | /* |
77 | * This hypercall returns 1 if suspend was cancelled | 84 | * This hypercall returns 1 if suspend was cancelled |
78 | * or the domain was merely checkpointed, and 0 if it | 85 | * or the domain was merely checkpointed, and 0 if it |
79 | * is resuming in a new domain. | 86 | * is resuming in a new domain. |
80 | */ | 87 | */ |
81 | *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); | 88 | si->cancelled = HYPERVISOR_suspend(si->arg); |
82 | 89 | ||
83 | xen_post_suspend(*cancelled); | 90 | if (si->post) |
84 | gnttab_resume(); | 91 | si->post(si->cancelled); |
85 | xen_mm_unpin_all(); | ||
86 | 92 | ||
87 | if (!*cancelled) { | 93 | if (!si->cancelled) { |
88 | xen_irq_resume(); | 94 | xen_irq_resume(); |
89 | xen_console_resume(); | 95 | xen_console_resume(); |
90 | xen_timer_resume(); | 96 | xen_timer_resume(); |
91 | } | 97 | } |
92 | 98 | ||
93 | sysdev_resume(); | 99 | syscore_resume(); |
94 | 100 | ||
95 | return 0; | 101 | return 0; |
96 | } | 102 | } |
@@ -98,7 +104,7 @@ static int xen_suspend(void *data) | |||
98 | static void do_suspend(void) | 104 | static void do_suspend(void) |
99 | { | 105 | { |
100 | int err; | 106 | int err; |
101 | int cancelled = 1; | 107 | struct suspend_info si; |
102 | 108 | ||
103 | shutting_down = SHUTDOWN_SUSPEND; | 109 | shutting_down = SHUTDOWN_SUSPEND; |
104 | 110 | ||
@@ -113,7 +119,7 @@ static void do_suspend(void) | |||
113 | } | 119 | } |
114 | #endif | 120 | #endif |
115 | 121 | ||
116 | err = dpm_suspend_start(PMSG_SUSPEND); | 122 | err = dpm_suspend_start(PMSG_FREEZE); |
117 | if (err) { | 123 | if (err) { |
118 | printk(KERN_ERR "xen suspend: dpm_suspend_start %d\n", err); | 124 | printk(KERN_ERR "xen suspend: dpm_suspend_start %d\n", err); |
119 | goto out_thaw; | 125 | goto out_thaw; |
@@ -122,32 +128,41 @@ static void do_suspend(void) | |||
122 | printk(KERN_DEBUG "suspending xenstore...\n"); | 128 | printk(KERN_DEBUG "suspending xenstore...\n"); |
123 | xs_suspend(); | 129 | xs_suspend(); |
124 | 130 | ||
125 | err = dpm_suspend_noirq(PMSG_SUSPEND); | 131 | err = dpm_suspend_noirq(PMSG_FREEZE); |
126 | if (err) { | 132 | if (err) { |
127 | printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err); | 133 | printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err); |
128 | goto out_resume; | 134 | goto out_resume; |
129 | } | 135 | } |
130 | 136 | ||
131 | if (xen_hvm_domain()) | 137 | si.cancelled = 1; |
132 | err = stop_machine(xen_hvm_suspend, &cancelled, cpumask_of(0)); | ||
133 | else | ||
134 | err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); | ||
135 | 138 | ||
136 | dpm_resume_noirq(PMSG_RESUME); | 139 | if (xen_hvm_domain()) { |
140 | si.arg = 0UL; | ||
141 | si.pre = NULL; | ||
142 | si.post = &xen_hvm_post_suspend; | ||
143 | } else { | ||
144 | si.arg = virt_to_mfn(xen_start_info); | ||
145 | si.pre = &xen_pre_suspend; | ||
146 | si.post = &xen_post_suspend; | ||
147 | } | ||
148 | |||
149 | err = stop_machine(xen_suspend, &si, cpumask_of(0)); | ||
150 | |||
151 | dpm_resume_noirq(si.cancelled ? PMSG_THAW : PMSG_RESTORE); | ||
137 | 152 | ||
138 | if (err) { | 153 | if (err) { |
139 | printk(KERN_ERR "failed to start xen_suspend: %d\n", err); | 154 | printk(KERN_ERR "failed to start xen_suspend: %d\n", err); |
140 | cancelled = 1; | 155 | si.cancelled = 1; |
141 | } | 156 | } |
142 | 157 | ||
143 | out_resume: | 158 | out_resume: |
144 | if (!cancelled) { | 159 | if (!si.cancelled) { |
145 | xen_arch_resume(); | 160 | xen_arch_resume(); |
146 | xs_resume(); | 161 | xs_resume(); |
147 | } else | 162 | } else |
148 | xs_suspend_cancel(); | 163 | xs_suspend_cancel(); |
149 | 164 | ||
150 | dpm_resume_end(PMSG_RESUME); | 165 | dpm_resume_end(si.cancelled ? PMSG_THAW : PMSG_RESTORE); |
151 | 166 | ||
152 | /* Make sure timer events get retriggered on all CPUs */ | 167 | /* Make sure timer events get retriggered on all CPUs */ |
153 | clock_was_set(); | 168 | clock_was_set(); |
@@ -159,7 +174,24 @@ out: | |||
159 | #endif | 174 | #endif |
160 | shutting_down = SHUTDOWN_INVALID; | 175 | shutting_down = SHUTDOWN_INVALID; |
161 | } | 176 | } |
162 | #endif /* CONFIG_PM_SLEEP */ | 177 | #endif /* CONFIG_HIBERNATE_CALLBACKS */ |
178 | |||
179 | struct shutdown_handler { | ||
180 | const char *command; | ||
181 | void (*cb)(void); | ||
182 | }; | ||
183 | |||
184 | static void do_poweroff(void) | ||
185 | { | ||
186 | shutting_down = SHUTDOWN_POWEROFF; | ||
187 | orderly_poweroff(false); | ||
188 | } | ||
189 | |||
190 | static void do_reboot(void) | ||
191 | { | ||
192 | shutting_down = SHUTDOWN_POWEROFF; /* ? */ | ||
193 | ctrl_alt_del(); | ||
194 | } | ||
163 | 195 | ||
164 | static void shutdown_handler(struct xenbus_watch *watch, | 196 | static void shutdown_handler(struct xenbus_watch *watch, |
165 | const char **vec, unsigned int len) | 197 | const char **vec, unsigned int len) |
@@ -167,6 +199,16 @@ static void shutdown_handler(struct xenbus_watch *watch, | |||
167 | char *str; | 199 | char *str; |
168 | struct xenbus_transaction xbt; | 200 | struct xenbus_transaction xbt; |
169 | int err; | 201 | int err; |
202 | static struct shutdown_handler handlers[] = { | ||
203 | { "poweroff", do_poweroff }, | ||
204 | { "halt", do_poweroff }, | ||
205 | { "reboot", do_reboot }, | ||
206 | #ifdef CONFIG_HIBERNATE_CALLBACKS | ||
207 | { "suspend", do_suspend }, | ||
208 | #endif | ||
209 | {NULL, NULL}, | ||
210 | }; | ||
211 | static struct shutdown_handler *handler; | ||
170 | 212 | ||
171 | if (shutting_down != SHUTDOWN_INVALID) | 213 | if (shutting_down != SHUTDOWN_INVALID) |
172 | return; | 214 | return; |
@@ -183,7 +225,14 @@ static void shutdown_handler(struct xenbus_watch *watch, | |||
183 | return; | 225 | return; |
184 | } | 226 | } |
185 | 227 | ||
186 | xenbus_write(xbt, "control", "shutdown", ""); | 228 | for (handler = &handlers[0]; handler->command; handler++) { |
229 | if (strcmp(str, handler->command) == 0) | ||
230 | break; | ||
231 | } | ||
232 | |||
233 | /* Only acknowledge commands which we are prepared to handle. */ | ||
234 | if (handler->cb) | ||
235 | xenbus_write(xbt, "control", "shutdown", ""); | ||
187 | 236 | ||
188 | err = xenbus_transaction_end(xbt, 0); | 237 | err = xenbus_transaction_end(xbt, 0); |
189 | if (err == -EAGAIN) { | 238 | if (err == -EAGAIN) { |
@@ -191,17 +240,8 @@ static void shutdown_handler(struct xenbus_watch *watch, | |||
191 | goto again; | 240 | goto again; |
192 | } | 241 | } |
193 | 242 | ||
194 | if (strcmp(str, "poweroff") == 0 || | 243 | if (handler->cb) { |
195 | strcmp(str, "halt") == 0) { | 244 | handler->cb(); |
196 | shutting_down = SHUTDOWN_POWEROFF; | ||
197 | orderly_poweroff(false); | ||
198 | } else if (strcmp(str, "reboot") == 0) { | ||
199 | shutting_down = SHUTDOWN_POWEROFF; /* ? */ | ||
200 | ctrl_alt_del(); | ||
201 | #ifdef CONFIG_PM_SLEEP | ||
202 | } else if (strcmp(str, "suspend") == 0) { | ||
203 | do_suspend(); | ||
204 | #endif | ||
205 | } else { | 245 | } else { |
206 | printk(KERN_INFO "Ignoring shutdown request: %s\n", str); | 246 | printk(KERN_INFO "Ignoring shutdown request: %s\n", str); |
207 | shutting_down = SHUTDOWN_INVALID; | 247 | shutting_down = SHUTDOWN_INVALID; |
@@ -280,27 +320,18 @@ static int shutdown_event(struct notifier_block *notifier, | |||
280 | return NOTIFY_DONE; | 320 | return NOTIFY_DONE; |
281 | } | 321 | } |
282 | 322 | ||
283 | static int __init __setup_shutdown_event(void) | ||
284 | { | ||
285 | /* Delay initialization in the PV on HVM case */ | ||
286 | if (xen_hvm_domain()) | ||
287 | return 0; | ||
288 | |||
289 | if (!xen_pv_domain()) | ||
290 | return -ENODEV; | ||
291 | |||
292 | return xen_setup_shutdown_event(); | ||
293 | } | ||
294 | |||
295 | int xen_setup_shutdown_event(void) | 323 | int xen_setup_shutdown_event(void) |
296 | { | 324 | { |
297 | static struct notifier_block xenstore_notifier = { | 325 | static struct notifier_block xenstore_notifier = { |
298 | .notifier_call = shutdown_event | 326 | .notifier_call = shutdown_event |
299 | }; | 327 | }; |
328 | |||
329 | if (!xen_domain()) | ||
330 | return -ENODEV; | ||
300 | register_xenstore_notifier(&xenstore_notifier); | 331 | register_xenstore_notifier(&xenstore_notifier); |
301 | 332 | ||
302 | return 0; | 333 | return 0; |
303 | } | 334 | } |
304 | EXPORT_SYMBOL_GPL(xen_setup_shutdown_event); | 335 | EXPORT_SYMBOL_GPL(xen_setup_shutdown_event); |
305 | 336 | ||
306 | subsys_initcall(__setup_shutdown_event); | 337 | subsys_initcall(xen_setup_shutdown_event); |
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c new file mode 100644 index 000000000000..cef4bafc07dc --- /dev/null +++ b/drivers/xen/pci.c | |||
@@ -0,0 +1,117 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2009, Intel Corporation. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License along with | ||
14 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
15 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
16 | * | ||
17 | * Author: Weidong Han <weidong.han@intel.com> | ||
18 | */ | ||
19 | |||
20 | #include <linux/pci.h> | ||
21 | #include <xen/xen.h> | ||
22 | #include <xen/interface/physdev.h> | ||
23 | #include <xen/interface/xen.h> | ||
24 | |||
25 | #include <asm/xen/hypervisor.h> | ||
26 | #include <asm/xen/hypercall.h> | ||
27 | #include "../pci/pci.h" | ||
28 | |||
29 | static int xen_add_device(struct device *dev) | ||
30 | { | ||
31 | int r; | ||
32 | struct pci_dev *pci_dev = to_pci_dev(dev); | ||
33 | |||
34 | #ifdef CONFIG_PCI_IOV | ||
35 | if (pci_dev->is_virtfn) { | ||
36 | struct physdev_manage_pci_ext manage_pci_ext = { | ||
37 | .bus = pci_dev->bus->number, | ||
38 | .devfn = pci_dev->devfn, | ||
39 | .is_virtfn = 1, | ||
40 | .physfn.bus = pci_dev->physfn->bus->number, | ||
41 | .physfn.devfn = pci_dev->physfn->devfn, | ||
42 | }; | ||
43 | |||
44 | r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, | ||
45 | &manage_pci_ext); | ||
46 | } else | ||
47 | #endif | ||
48 | if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { | ||
49 | struct physdev_manage_pci_ext manage_pci_ext = { | ||
50 | .bus = pci_dev->bus->number, | ||
51 | .devfn = pci_dev->devfn, | ||
52 | .is_extfn = 1, | ||
53 | }; | ||
54 | |||
55 | r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, | ||
56 | &manage_pci_ext); | ||
57 | } else { | ||
58 | struct physdev_manage_pci manage_pci = { | ||
59 | .bus = pci_dev->bus->number, | ||
60 | .devfn = pci_dev->devfn, | ||
61 | }; | ||
62 | |||
63 | r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, | ||
64 | &manage_pci); | ||
65 | } | ||
66 | |||
67 | return r; | ||
68 | } | ||
69 | |||
70 | static int xen_remove_device(struct device *dev) | ||
71 | { | ||
72 | int r; | ||
73 | struct pci_dev *pci_dev = to_pci_dev(dev); | ||
74 | struct physdev_manage_pci manage_pci; | ||
75 | |||
76 | manage_pci.bus = pci_dev->bus->number; | ||
77 | manage_pci.devfn = pci_dev->devfn; | ||
78 | |||
79 | r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, | ||
80 | &manage_pci); | ||
81 | |||
82 | return r; | ||
83 | } | ||
84 | |||
85 | static int xen_pci_notifier(struct notifier_block *nb, | ||
86 | unsigned long action, void *data) | ||
87 | { | ||
88 | struct device *dev = data; | ||
89 | int r = 0; | ||
90 | |||
91 | switch (action) { | ||
92 | case BUS_NOTIFY_ADD_DEVICE: | ||
93 | r = xen_add_device(dev); | ||
94 | break; | ||
95 | case BUS_NOTIFY_DEL_DEVICE: | ||
96 | r = xen_remove_device(dev); | ||
97 | break; | ||
98 | default: | ||
99 | break; | ||
100 | } | ||
101 | |||
102 | return r; | ||
103 | } | ||
104 | |||
105 | struct notifier_block device_nb = { | ||
106 | .notifier_call = xen_pci_notifier, | ||
107 | }; | ||
108 | |||
109 | static int __init register_xen_pci_notifier(void) | ||
110 | { | ||
111 | if (!xen_initial_domain()) | ||
112 | return 0; | ||
113 | |||
114 | return bus_register_notifier(&pci_bus_type, &device_nb); | ||
115 | } | ||
116 | |||
117 | arch_initcall(register_xen_pci_notifier); | ||
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index c01b5ddce529..319dd0a94d51 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c | |||
@@ -105,7 +105,7 @@ static int __devinit platform_pci_init(struct pci_dev *pdev, | |||
105 | const struct pci_device_id *ent) | 105 | const struct pci_device_id *ent) |
106 | { | 106 | { |
107 | int i, ret; | 107 | int i, ret; |
108 | long ioaddr, iolen; | 108 | long ioaddr; |
109 | long mmio_addr, mmio_len; | 109 | long mmio_addr, mmio_len; |
110 | unsigned int max_nr_gframes; | 110 | unsigned int max_nr_gframes; |
111 | 111 | ||
@@ -114,7 +114,6 @@ static int __devinit platform_pci_init(struct pci_dev *pdev, | |||
114 | return i; | 114 | return i; |
115 | 115 | ||
116 | ioaddr = pci_resource_start(pdev, 0); | 116 | ioaddr = pci_resource_start(pdev, 0); |
117 | iolen = pci_resource_len(pdev, 0); | ||
118 | 117 | ||
119 | mmio_addr = pci_resource_start(pdev, 1); | 118 | mmio_addr = pci_resource_start(pdev, 1); |
120 | mmio_len = pci_resource_len(pdev, 1); | 119 | mmio_len = pci_resource_len(pdev, 1); |
@@ -125,19 +124,13 @@ static int __devinit platform_pci_init(struct pci_dev *pdev, | |||
125 | goto pci_out; | 124 | goto pci_out; |
126 | } | 125 | } |
127 | 126 | ||
128 | if (request_mem_region(mmio_addr, mmio_len, DRV_NAME) == NULL) { | 127 | ret = pci_request_region(pdev, 1, DRV_NAME); |
129 | dev_err(&pdev->dev, "MEM I/O resource 0x%lx @ 0x%lx busy\n", | 128 | if (ret < 0) |
130 | mmio_addr, mmio_len); | ||
131 | ret = -EBUSY; | ||
132 | goto pci_out; | 129 | goto pci_out; |
133 | } | ||
134 | 130 | ||
135 | if (request_region(ioaddr, iolen, DRV_NAME) == NULL) { | 131 | ret = pci_request_region(pdev, 0, DRV_NAME); |
136 | dev_err(&pdev->dev, "I/O resource 0x%lx @ 0x%lx busy\n", | 132 | if (ret < 0) |
137 | iolen, ioaddr); | ||
138 | ret = -EBUSY; | ||
139 | goto mem_out; | 133 | goto mem_out; |
140 | } | ||
141 | 134 | ||
142 | platform_mmio = mmio_addr; | 135 | platform_mmio = mmio_addr; |
143 | platform_mmiolen = mmio_len; | 136 | platform_mmiolen = mmio_len; |
@@ -163,15 +156,12 @@ static int __devinit platform_pci_init(struct pci_dev *pdev, | |||
163 | if (ret) | 156 | if (ret) |
164 | goto out; | 157 | goto out; |
165 | xenbus_probe(NULL); | 158 | xenbus_probe(NULL); |
166 | ret = xen_setup_shutdown_event(); | ||
167 | if (ret) | ||
168 | goto out; | ||
169 | return 0; | 159 | return 0; |
170 | 160 | ||
171 | out: | 161 | out: |
172 | release_region(ioaddr, iolen); | 162 | pci_release_region(pdev, 0); |
173 | mem_out: | 163 | mem_out: |
174 | release_mem_region(mmio_addr, mmio_len); | 164 | pci_release_region(pdev, 1); |
175 | pci_out: | 165 | pci_out: |
176 | pci_disable_device(pdev); | 166 | pci_disable_device(pdev); |
177 | return ret; | 167 | return ret; |
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 54469c3eeacd..6e8c15a23201 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c | |||
@@ -54,7 +54,7 @@ u64 start_dma_addr; | |||
54 | 54 | ||
55 | static dma_addr_t xen_phys_to_bus(phys_addr_t paddr) | 55 | static dma_addr_t xen_phys_to_bus(phys_addr_t paddr) |
56 | { | 56 | { |
57 | return phys_to_machine(XPADDR(paddr)).maddr;; | 57 | return phys_to_machine(XPADDR(paddr)).maddr; |
58 | } | 58 | } |
59 | 59 | ||
60 | static phys_addr_t xen_bus_to_phys(dma_addr_t baddr) | 60 | static phys_addr_t xen_bus_to_phys(dma_addr_t baddr) |
@@ -147,9 +147,15 @@ void __init xen_swiotlb_init(int verbose) | |||
147 | { | 147 | { |
148 | unsigned long bytes; | 148 | unsigned long bytes; |
149 | int rc; | 149 | int rc; |
150 | 150 | unsigned long nr_tbl; | |
151 | xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT); | 151 | |
152 | xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE); | 152 | nr_tbl = swioltb_nr_tbl(); |
153 | if (nr_tbl) | ||
154 | xen_io_tlb_nslabs = nr_tbl; | ||
155 | else { | ||
156 | xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT); | ||
157 | xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE); | ||
158 | } | ||
153 | 159 | ||
154 | bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; | 160 | bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; |
155 | 161 | ||
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index 60f1827a32cb..1e0fe01eb670 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c | |||
@@ -215,7 +215,7 @@ static struct attribute_group xen_compilation_group = { | |||
215 | .attrs = xen_compile_attrs, | 215 | .attrs = xen_compile_attrs, |
216 | }; | 216 | }; |
217 | 217 | ||
218 | int __init static xen_compilation_init(void) | 218 | static int __init xen_compilation_init(void) |
219 | { | 219 | { |
220 | return sysfs_create_group(hypervisor_kobj, &xen_compilation_group); | 220 | return sysfs_create_group(hypervisor_kobj, &xen_compilation_group); |
221 | } | 221 | } |
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c new file mode 100644 index 000000000000..816a44959ef0 --- /dev/null +++ b/drivers/xen/tmem.c | |||
@@ -0,0 +1,264 @@ | |||
1 | /* | ||
2 | * Xen implementation for transcendent memory (tmem) | ||
3 | * | ||
4 | * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. | ||
5 | * Author: Dan Magenheimer | ||
6 | */ | ||
7 | |||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/types.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/pagemap.h> | ||
12 | #include <linux/cleancache.h> | ||
13 | |||
14 | #include <xen/xen.h> | ||
15 | #include <xen/interface/xen.h> | ||
16 | #include <asm/xen/hypercall.h> | ||
17 | #include <asm/xen/page.h> | ||
18 | #include <asm/xen/hypervisor.h> | ||
19 | |||
20 | #define TMEM_CONTROL 0 | ||
21 | #define TMEM_NEW_POOL 1 | ||
22 | #define TMEM_DESTROY_POOL 2 | ||
23 | #define TMEM_NEW_PAGE 3 | ||
24 | #define TMEM_PUT_PAGE 4 | ||
25 | #define TMEM_GET_PAGE 5 | ||
26 | #define TMEM_FLUSH_PAGE 6 | ||
27 | #define TMEM_FLUSH_OBJECT 7 | ||
28 | #define TMEM_READ 8 | ||
29 | #define TMEM_WRITE 9 | ||
30 | #define TMEM_XCHG 10 | ||
31 | |||
32 | /* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */ | ||
33 | #define TMEM_POOL_PERSIST 1 | ||
34 | #define TMEM_POOL_SHARED 2 | ||
35 | #define TMEM_POOL_PAGESIZE_SHIFT 4 | ||
36 | #define TMEM_VERSION_SHIFT 24 | ||
37 | |||
38 | |||
39 | struct tmem_pool_uuid { | ||
40 | u64 uuid_lo; | ||
41 | u64 uuid_hi; | ||
42 | }; | ||
43 | |||
44 | struct tmem_oid { | ||
45 | u64 oid[3]; | ||
46 | }; | ||
47 | |||
48 | #define TMEM_POOL_PRIVATE_UUID { 0, 0 } | ||
49 | |||
50 | /* flags for tmem_ops.new_pool */ | ||
51 | #define TMEM_POOL_PERSIST 1 | ||
52 | #define TMEM_POOL_SHARED 2 | ||
53 | |||
54 | /* xen tmem foundation ops/hypercalls */ | ||
55 | |||
56 | static inline int xen_tmem_op(u32 tmem_cmd, u32 tmem_pool, struct tmem_oid oid, | ||
57 | u32 index, unsigned long gmfn, u32 tmem_offset, u32 pfn_offset, u32 len) | ||
58 | { | ||
59 | struct tmem_op op; | ||
60 | int rc = 0; | ||
61 | |||
62 | op.cmd = tmem_cmd; | ||
63 | op.pool_id = tmem_pool; | ||
64 | op.u.gen.oid[0] = oid.oid[0]; | ||
65 | op.u.gen.oid[1] = oid.oid[1]; | ||
66 | op.u.gen.oid[2] = oid.oid[2]; | ||
67 | op.u.gen.index = index; | ||
68 | op.u.gen.tmem_offset = tmem_offset; | ||
69 | op.u.gen.pfn_offset = pfn_offset; | ||
70 | op.u.gen.len = len; | ||
71 | set_xen_guest_handle(op.u.gen.gmfn, (void *)gmfn); | ||
72 | rc = HYPERVISOR_tmem_op(&op); | ||
73 | return rc; | ||
74 | } | ||
75 | |||
76 | static int xen_tmem_new_pool(struct tmem_pool_uuid uuid, | ||
77 | u32 flags, unsigned long pagesize) | ||
78 | { | ||
79 | struct tmem_op op; | ||
80 | int rc = 0, pageshift; | ||
81 | |||
82 | for (pageshift = 0; pagesize != 1; pageshift++) | ||
83 | pagesize >>= 1; | ||
84 | flags |= (pageshift - 12) << TMEM_POOL_PAGESIZE_SHIFT; | ||
85 | flags |= TMEM_SPEC_VERSION << TMEM_VERSION_SHIFT; | ||
86 | op.cmd = TMEM_NEW_POOL; | ||
87 | op.u.new.uuid[0] = uuid.uuid_lo; | ||
88 | op.u.new.uuid[1] = uuid.uuid_hi; | ||
89 | op.u.new.flags = flags; | ||
90 | rc = HYPERVISOR_tmem_op(&op); | ||
91 | return rc; | ||
92 | } | ||
93 | |||
94 | /* xen generic tmem ops */ | ||
95 | |||
96 | static int xen_tmem_put_page(u32 pool_id, struct tmem_oid oid, | ||
97 | u32 index, unsigned long pfn) | ||
98 | { | ||
99 | unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn; | ||
100 | |||
101 | return xen_tmem_op(TMEM_PUT_PAGE, pool_id, oid, index, | ||
102 | gmfn, 0, 0, 0); | ||
103 | } | ||
104 | |||
105 | static int xen_tmem_get_page(u32 pool_id, struct tmem_oid oid, | ||
106 | u32 index, unsigned long pfn) | ||
107 | { | ||
108 | unsigned long gmfn = xen_pv_domain() ? pfn_to_mfn(pfn) : pfn; | ||
109 | |||
110 | return xen_tmem_op(TMEM_GET_PAGE, pool_id, oid, index, | ||
111 | gmfn, 0, 0, 0); | ||
112 | } | ||
113 | |||
114 | static int xen_tmem_flush_page(u32 pool_id, struct tmem_oid oid, u32 index) | ||
115 | { | ||
116 | return xen_tmem_op(TMEM_FLUSH_PAGE, pool_id, oid, index, | ||
117 | 0, 0, 0, 0); | ||
118 | } | ||
119 | |||
120 | static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid) | ||
121 | { | ||
122 | return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); | ||
123 | } | ||
124 | |||
125 | static int xen_tmem_destroy_pool(u32 pool_id) | ||
126 | { | ||
127 | struct tmem_oid oid = { { 0 } }; | ||
128 | |||
129 | return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); | ||
130 | } | ||
131 | |||
132 | int tmem_enabled; | ||
133 | |||
134 | static int __init enable_tmem(char *s) | ||
135 | { | ||
136 | tmem_enabled = 1; | ||
137 | return 1; | ||
138 | } | ||
139 | |||
140 | __setup("tmem", enable_tmem); | ||
141 | |||
142 | /* cleancache ops */ | ||
143 | |||
144 | static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, | ||
145 | pgoff_t index, struct page *page) | ||
146 | { | ||
147 | u32 ind = (u32) index; | ||
148 | struct tmem_oid oid = *(struct tmem_oid *)&key; | ||
149 | unsigned long pfn = page_to_pfn(page); | ||
150 | |||
151 | if (pool < 0) | ||
152 | return; | ||
153 | if (ind != index) | ||
154 | return; | ||
155 | mb(); /* ensure page is quiescent; tmem may address it with an alias */ | ||
156 | (void)xen_tmem_put_page((u32)pool, oid, ind, pfn); | ||
157 | } | ||
158 | |||
159 | static int tmem_cleancache_get_page(int pool, struct cleancache_filekey key, | ||
160 | pgoff_t index, struct page *page) | ||
161 | { | ||
162 | u32 ind = (u32) index; | ||
163 | struct tmem_oid oid = *(struct tmem_oid *)&key; | ||
164 | unsigned long pfn = page_to_pfn(page); | ||
165 | int ret; | ||
166 | |||
167 | /* translate return values to linux semantics */ | ||
168 | if (pool < 0) | ||
169 | return -1; | ||
170 | if (ind != index) | ||
171 | return -1; | ||
172 | ret = xen_tmem_get_page((u32)pool, oid, ind, pfn); | ||
173 | if (ret == 1) | ||
174 | return 0; | ||
175 | else | ||
176 | return -1; | ||
177 | } | ||
178 | |||
179 | static void tmem_cleancache_flush_page(int pool, struct cleancache_filekey key, | ||
180 | pgoff_t index) | ||
181 | { | ||
182 | u32 ind = (u32) index; | ||
183 | struct tmem_oid oid = *(struct tmem_oid *)&key; | ||
184 | |||
185 | if (pool < 0) | ||
186 | return; | ||
187 | if (ind != index) | ||
188 | return; | ||
189 | (void)xen_tmem_flush_page((u32)pool, oid, ind); | ||
190 | } | ||
191 | |||
192 | static void tmem_cleancache_flush_inode(int pool, struct cleancache_filekey key) | ||
193 | { | ||
194 | struct tmem_oid oid = *(struct tmem_oid *)&key; | ||
195 | |||
196 | if (pool < 0) | ||
197 | return; | ||
198 | (void)xen_tmem_flush_object((u32)pool, oid); | ||
199 | } | ||
200 | |||
201 | static void tmem_cleancache_flush_fs(int pool) | ||
202 | { | ||
203 | if (pool < 0) | ||
204 | return; | ||
205 | (void)xen_tmem_destroy_pool((u32)pool); | ||
206 | } | ||
207 | |||
208 | static int tmem_cleancache_init_fs(size_t pagesize) | ||
209 | { | ||
210 | struct tmem_pool_uuid uuid_private = TMEM_POOL_PRIVATE_UUID; | ||
211 | |||
212 | return xen_tmem_new_pool(uuid_private, 0, pagesize); | ||
213 | } | ||
214 | |||
215 | static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize) | ||
216 | { | ||
217 | struct tmem_pool_uuid shared_uuid; | ||
218 | |||
219 | shared_uuid.uuid_lo = *(u64 *)uuid; | ||
220 | shared_uuid.uuid_hi = *(u64 *)(&uuid[8]); | ||
221 | return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize); | ||
222 | } | ||
223 | |||
224 | static int use_cleancache = 1; | ||
225 | |||
226 | static int __init no_cleancache(char *s) | ||
227 | { | ||
228 | use_cleancache = 0; | ||
229 | return 1; | ||
230 | } | ||
231 | |||
232 | __setup("nocleancache", no_cleancache); | ||
233 | |||
234 | static struct cleancache_ops tmem_cleancache_ops = { | ||
235 | .put_page = tmem_cleancache_put_page, | ||
236 | .get_page = tmem_cleancache_get_page, | ||
237 | .flush_page = tmem_cleancache_flush_page, | ||
238 | .flush_inode = tmem_cleancache_flush_inode, | ||
239 | .flush_fs = tmem_cleancache_flush_fs, | ||
240 | .init_shared_fs = tmem_cleancache_init_shared_fs, | ||
241 | .init_fs = tmem_cleancache_init_fs | ||
242 | }; | ||
243 | |||
244 | static int __init xen_tmem_init(void) | ||
245 | { | ||
246 | struct cleancache_ops old_ops; | ||
247 | |||
248 | if (!xen_domain()) | ||
249 | return 0; | ||
250 | #ifdef CONFIG_CLEANCACHE | ||
251 | BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); | ||
252 | if (tmem_enabled && use_cleancache) { | ||
253 | char *s = ""; | ||
254 | old_ops = cleancache_register_ops(&tmem_cleancache_ops); | ||
255 | if (old_ops.init_fs != NULL) | ||
256 | s = " (WARNING: cleancache_ops overridden)"; | ||
257 | printk(KERN_INFO "cleancache enabled, RAM provided by " | ||
258 | "Xen Transcendent Memory%s\n", s); | ||
259 | } | ||
260 | #endif | ||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | module_init(xen_tmem_init) | ||
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c new file mode 100644 index 000000000000..a4ff225ee868 --- /dev/null +++ b/drivers/xen/xen-balloon.c | |||
@@ -0,0 +1,256 @@ | |||
1 | /****************************************************************************** | ||
2 | * Xen balloon driver - enables returning/claiming memory to/from Xen. | ||
3 | * | ||
4 | * Copyright (c) 2003, B Dragovic | ||
5 | * Copyright (c) 2003-2004, M Williamson, K Fraser | ||
6 | * Copyright (c) 2005 Dan M. Smith, IBM Corporation | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License version 2 | ||
10 | * as published by the Free Software Foundation; or, when distributed | ||
11 | * separately from the Linux kernel or incorporated into other | ||
12 | * software packages, subject to the following license: | ||
13 | * | ||
14 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
15 | * of this source file (the "Software"), to deal in the Software without | ||
16 | * restriction, including without limitation the rights to use, copy, modify, | ||
17 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
18 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
19 | * the following conditions: | ||
20 | * | ||
21 | * The above copyright notice and this permission notice shall be included in | ||
22 | * all copies or substantial portions of the Software. | ||
23 | * | ||
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
26 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
27 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
28 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
29 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
30 | * IN THE SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/module.h> | ||
35 | #include <linux/sysdev.h> | ||
36 | #include <linux/capability.h> | ||
37 | |||
38 | #include <xen/xen.h> | ||
39 | #include <xen/interface/xen.h> | ||
40 | #include <xen/balloon.h> | ||
41 | #include <xen/xenbus.h> | ||
42 | #include <xen/features.h> | ||
43 | #include <xen/page.h> | ||
44 | |||
45 | #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) | ||
46 | |||
47 | #define BALLOON_CLASS_NAME "xen_memory" | ||
48 | |||
49 | static struct sys_device balloon_sysdev; | ||
50 | |||
51 | static int register_balloon(struct sys_device *sysdev); | ||
52 | |||
53 | static struct xenbus_watch target_watch = | ||
54 | { | ||
55 | .node = "memory/target" | ||
56 | }; | ||
57 | |||
58 | /* React to a change in the target key */ | ||
59 | static void watch_target(struct xenbus_watch *watch, | ||
60 | const char **vec, unsigned int len) | ||
61 | { | ||
62 | unsigned long long new_target; | ||
63 | int err; | ||
64 | |||
65 | err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); | ||
66 | if (err != 1) { | ||
67 | /* This is ok (for domain0 at least) - so just return */ | ||
68 | return; | ||
69 | } | ||
70 | |||
71 | /* The given memory/target value is in KiB, so it needs converting to | ||
72 | * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. | ||
73 | */ | ||
74 | balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); | ||
75 | } | ||
76 | |||
77 | static int balloon_init_watcher(struct notifier_block *notifier, | ||
78 | unsigned long event, | ||
79 | void *data) | ||
80 | { | ||
81 | int err; | ||
82 | |||
83 | err = register_xenbus_watch(&target_watch); | ||
84 | if (err) | ||
85 | printk(KERN_ERR "Failed to set balloon watcher\n"); | ||
86 | |||
87 | return NOTIFY_DONE; | ||
88 | } | ||
89 | |||
90 | static struct notifier_block xenstore_notifier; | ||
91 | |||
92 | static int __init balloon_init(void) | ||
93 | { | ||
94 | if (!xen_domain()) | ||
95 | return -ENODEV; | ||
96 | |||
97 | pr_info("xen-balloon: Initialising balloon driver.\n"); | ||
98 | |||
99 | register_balloon(&balloon_sysdev); | ||
100 | |||
101 | target_watch.callback = watch_target; | ||
102 | xenstore_notifier.notifier_call = balloon_init_watcher; | ||
103 | |||
104 | register_xenstore_notifier(&xenstore_notifier); | ||
105 | |||
106 | return 0; | ||
107 | } | ||
108 | subsys_initcall(balloon_init); | ||
109 | |||
110 | static void balloon_exit(void) | ||
111 | { | ||
112 | /* XXX - release balloon here */ | ||
113 | return; | ||
114 | } | ||
115 | |||
116 | module_exit(balloon_exit); | ||
117 | |||
118 | #define BALLOON_SHOW(name, format, args...) \ | ||
119 | static ssize_t show_##name(struct sys_device *dev, \ | ||
120 | struct sysdev_attribute *attr, \ | ||
121 | char *buf) \ | ||
122 | { \ | ||
123 | return sprintf(buf, format, ##args); \ | ||
124 | } \ | ||
125 | static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) | ||
126 | |||
127 | BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); | ||
128 | BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); | ||
129 | BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); | ||
130 | |||
131 | static SYSDEV_ULONG_ATTR(schedule_delay, 0444, balloon_stats.schedule_delay); | ||
132 | static SYSDEV_ULONG_ATTR(max_schedule_delay, 0644, balloon_stats.max_schedule_delay); | ||
133 | static SYSDEV_ULONG_ATTR(retry_count, 0444, balloon_stats.retry_count); | ||
134 | static SYSDEV_ULONG_ATTR(max_retry_count, 0644, balloon_stats.max_retry_count); | ||
135 | |||
136 | static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, | ||
137 | char *buf) | ||
138 | { | ||
139 | return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); | ||
140 | } | ||
141 | |||
142 | static ssize_t store_target_kb(struct sys_device *dev, | ||
143 | struct sysdev_attribute *attr, | ||
144 | const char *buf, | ||
145 | size_t count) | ||
146 | { | ||
147 | char *endchar; | ||
148 | unsigned long long target_bytes; | ||
149 | |||
150 | if (!capable(CAP_SYS_ADMIN)) | ||
151 | return -EPERM; | ||
152 | |||
153 | target_bytes = simple_strtoull(buf, &endchar, 0) * 1024; | ||
154 | |||
155 | balloon_set_new_target(target_bytes >> PAGE_SHIFT); | ||
156 | |||
157 | return count; | ||
158 | } | ||
159 | |||
160 | static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, | ||
161 | show_target_kb, store_target_kb); | ||
162 | |||
163 | |||
164 | static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr, | ||
165 | char *buf) | ||
166 | { | ||
167 | return sprintf(buf, "%llu\n", | ||
168 | (unsigned long long)balloon_stats.target_pages | ||
169 | << PAGE_SHIFT); | ||
170 | } | ||
171 | |||
172 | static ssize_t store_target(struct sys_device *dev, | ||
173 | struct sysdev_attribute *attr, | ||
174 | const char *buf, | ||
175 | size_t count) | ||
176 | { | ||
177 | char *endchar; | ||
178 | unsigned long long target_bytes; | ||
179 | |||
180 | if (!capable(CAP_SYS_ADMIN)) | ||
181 | return -EPERM; | ||
182 | |||
183 | target_bytes = memparse(buf, &endchar); | ||
184 | |||
185 | balloon_set_new_target(target_bytes >> PAGE_SHIFT); | ||
186 | |||
187 | return count; | ||
188 | } | ||
189 | |||
190 | static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR, | ||
191 | show_target, store_target); | ||
192 | |||
193 | |||
194 | static struct sysdev_attribute *balloon_attrs[] = { | ||
195 | &attr_target_kb, | ||
196 | &attr_target, | ||
197 | &attr_schedule_delay.attr, | ||
198 | &attr_max_schedule_delay.attr, | ||
199 | &attr_retry_count.attr, | ||
200 | &attr_max_retry_count.attr | ||
201 | }; | ||
202 | |||
203 | static struct attribute *balloon_info_attrs[] = { | ||
204 | &attr_current_kb.attr, | ||
205 | &attr_low_kb.attr, | ||
206 | &attr_high_kb.attr, | ||
207 | NULL | ||
208 | }; | ||
209 | |||
210 | static struct attribute_group balloon_info_group = { | ||
211 | .name = "info", | ||
212 | .attrs = balloon_info_attrs | ||
213 | }; | ||
214 | |||
215 | static struct sysdev_class balloon_sysdev_class = { | ||
216 | .name = BALLOON_CLASS_NAME | ||
217 | }; | ||
218 | |||
219 | static int register_balloon(struct sys_device *sysdev) | ||
220 | { | ||
221 | int i, error; | ||
222 | |||
223 | error = sysdev_class_register(&balloon_sysdev_class); | ||
224 | if (error) | ||
225 | return error; | ||
226 | |||
227 | sysdev->id = 0; | ||
228 | sysdev->cls = &balloon_sysdev_class; | ||
229 | |||
230 | error = sysdev_register(sysdev); | ||
231 | if (error) { | ||
232 | sysdev_class_unregister(&balloon_sysdev_class); | ||
233 | return error; | ||
234 | } | ||
235 | |||
236 | for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { | ||
237 | error = sysdev_create_file(sysdev, balloon_attrs[i]); | ||
238 | if (error) | ||
239 | goto fail; | ||
240 | } | ||
241 | |||
242 | error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); | ||
243 | if (error) | ||
244 | goto fail; | ||
245 | |||
246 | return 0; | ||
247 | |||
248 | fail: | ||
249 | while (--i >= 0) | ||
250 | sysdev_remove_file(sysdev, balloon_attrs[i]); | ||
251 | sysdev_unregister(sysdev); | ||
252 | sysdev_class_unregister(&balloon_sysdev_class); | ||
253 | return error; | ||
254 | } | ||
255 | |||
256 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile index 5571f5b84223..8dca685358b4 100644 --- a/drivers/xen/xenbus/Makefile +++ b/drivers/xen/xenbus/Makefile | |||
@@ -5,3 +5,8 @@ xenbus-objs += xenbus_client.o | |||
5 | xenbus-objs += xenbus_comms.o | 5 | xenbus-objs += xenbus_comms.o |
6 | xenbus-objs += xenbus_xs.o | 6 | xenbus-objs += xenbus_xs.o |
7 | xenbus-objs += xenbus_probe.o | 7 | xenbus-objs += xenbus_probe.o |
8 | |||
9 | xenbus-be-objs-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o | ||
10 | xenbus-objs += $(xenbus-be-objs-y) | ||
11 | |||
12 | obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o | ||
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 7e49527189b6..cdacf923e073 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c | |||
@@ -50,6 +50,8 @@ const char *xenbus_strstate(enum xenbus_state state) | |||
50 | [ XenbusStateConnected ] = "Connected", | 50 | [ XenbusStateConnected ] = "Connected", |
51 | [ XenbusStateClosing ] = "Closing", | 51 | [ XenbusStateClosing ] = "Closing", |
52 | [ XenbusStateClosed ] = "Closed", | 52 | [ XenbusStateClosed ] = "Closed", |
53 | [XenbusStateReconfiguring] = "Reconfiguring", | ||
54 | [XenbusStateReconfigured] = "Reconfigured", | ||
53 | }; | 55 | }; |
54 | return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; | 56 | return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; |
55 | } | 57 | } |
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index d409495876f1..739769551e33 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c | |||
@@ -56,7 +56,6 @@ | |||
56 | #include <xen/events.h> | 56 | #include <xen/events.h> |
57 | #include <xen/page.h> | 57 | #include <xen/page.h> |
58 | 58 | ||
59 | #include <xen/platform_pci.h> | ||
60 | #include <xen/hvm.h> | 59 | #include <xen/hvm.h> |
61 | 60 | ||
62 | #include "xenbus_comms.h" | 61 | #include "xenbus_comms.h" |
@@ -64,22 +63,15 @@ | |||
64 | 63 | ||
65 | 64 | ||
66 | int xen_store_evtchn; | 65 | int xen_store_evtchn; |
67 | EXPORT_SYMBOL(xen_store_evtchn); | 66 | EXPORT_SYMBOL_GPL(xen_store_evtchn); |
68 | 67 | ||
69 | struct xenstore_domain_interface *xen_store_interface; | 68 | struct xenstore_domain_interface *xen_store_interface; |
69 | EXPORT_SYMBOL_GPL(xen_store_interface); | ||
70 | |||
70 | static unsigned long xen_store_mfn; | 71 | static unsigned long xen_store_mfn; |
71 | 72 | ||
72 | static BLOCKING_NOTIFIER_HEAD(xenstore_chain); | 73 | static BLOCKING_NOTIFIER_HEAD(xenstore_chain); |
73 | 74 | ||
74 | static void wait_for_devices(struct xenbus_driver *xendrv); | ||
75 | |||
76 | static int xenbus_probe_frontend(const char *type, const char *name); | ||
77 | |||
78 | static void xenbus_dev_shutdown(struct device *_dev); | ||
79 | |||
80 | static int xenbus_dev_suspend(struct device *dev, pm_message_t state); | ||
81 | static int xenbus_dev_resume(struct device *dev); | ||
82 | |||
83 | /* If something in array of ids matches this device, return it. */ | 75 | /* If something in array of ids matches this device, return it. */ |
84 | static const struct xenbus_device_id * | 76 | static const struct xenbus_device_id * |
85 | match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) | 77 | match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) |
@@ -100,34 +92,7 @@ int xenbus_match(struct device *_dev, struct device_driver *_drv) | |||
100 | 92 | ||
101 | return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; | 93 | return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; |
102 | } | 94 | } |
103 | 95 | EXPORT_SYMBOL_GPL(xenbus_match); | |
104 | static int xenbus_uevent(struct device *_dev, struct kobj_uevent_env *env) | ||
105 | { | ||
106 | struct xenbus_device *dev = to_xenbus_device(_dev); | ||
107 | |||
108 | if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype)) | ||
109 | return -ENOMEM; | ||
110 | |||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | /* device/<type>/<id> => <type>-<id> */ | ||
115 | static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) | ||
116 | { | ||
117 | nodename = strchr(nodename, '/'); | ||
118 | if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) { | ||
119 | printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename); | ||
120 | return -EINVAL; | ||
121 | } | ||
122 | |||
123 | strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); | ||
124 | if (!strchr(bus_id, '/')) { | ||
125 | printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id); | ||
126 | return -EINVAL; | ||
127 | } | ||
128 | *strchr(bus_id, '/') = '-'; | ||
129 | return 0; | ||
130 | } | ||
131 | 96 | ||
132 | 97 | ||
133 | static void free_otherend_details(struct xenbus_device *dev) | 98 | static void free_otherend_details(struct xenbus_device *dev) |
@@ -147,7 +112,30 @@ static void free_otherend_watch(struct xenbus_device *dev) | |||
147 | } | 112 | } |
148 | 113 | ||
149 | 114 | ||
150 | int read_otherend_details(struct xenbus_device *xendev, | 115 | static int talk_to_otherend(struct xenbus_device *dev) |
116 | { | ||
117 | struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); | ||
118 | |||
119 | free_otherend_watch(dev); | ||
120 | free_otherend_details(dev); | ||
121 | |||
122 | return drv->read_otherend_details(dev); | ||
123 | } | ||
124 | |||
125 | |||
126 | |||
127 | static int watch_otherend(struct xenbus_device *dev) | ||
128 | { | ||
129 | struct xen_bus_type *bus = | ||
130 | container_of(dev->dev.bus, struct xen_bus_type, bus); | ||
131 | |||
132 | return xenbus_watch_pathfmt(dev, &dev->otherend_watch, | ||
133 | bus->otherend_changed, | ||
134 | "%s/%s", dev->otherend, "state"); | ||
135 | } | ||
136 | |||
137 | |||
138 | int xenbus_read_otherend_details(struct xenbus_device *xendev, | ||
151 | char *id_node, char *path_node) | 139 | char *id_node, char *path_node) |
152 | { | 140 | { |
153 | int err = xenbus_gather(XBT_NIL, xendev->nodename, | 141 | int err = xenbus_gather(XBT_NIL, xendev->nodename, |
@@ -172,39 +160,11 @@ int read_otherend_details(struct xenbus_device *xendev, | |||
172 | 160 | ||
173 | return 0; | 161 | return 0; |
174 | } | 162 | } |
163 | EXPORT_SYMBOL_GPL(xenbus_read_otherend_details); | ||
175 | 164 | ||
176 | 165 | void xenbus_otherend_changed(struct xenbus_watch *watch, | |
177 | static int read_backend_details(struct xenbus_device *xendev) | 166 | const char **vec, unsigned int len, |
178 | { | 167 | int ignore_on_shutdown) |
179 | return read_otherend_details(xendev, "backend-id", "backend"); | ||
180 | } | ||
181 | |||
182 | static struct device_attribute xenbus_dev_attrs[] = { | ||
183 | __ATTR_NULL | ||
184 | }; | ||
185 | |||
186 | /* Bus type for frontend drivers. */ | ||
187 | static struct xen_bus_type xenbus_frontend = { | ||
188 | .root = "device", | ||
189 | .levels = 2, /* device/type/<id> */ | ||
190 | .get_bus_id = frontend_bus_id, | ||
191 | .probe = xenbus_probe_frontend, | ||
192 | .bus = { | ||
193 | .name = "xen", | ||
194 | .match = xenbus_match, | ||
195 | .uevent = xenbus_uevent, | ||
196 | .probe = xenbus_dev_probe, | ||
197 | .remove = xenbus_dev_remove, | ||
198 | .shutdown = xenbus_dev_shutdown, | ||
199 | .dev_attrs = xenbus_dev_attrs, | ||
200 | |||
201 | .suspend = xenbus_dev_suspend, | ||
202 | .resume = xenbus_dev_resume, | ||
203 | }, | ||
204 | }; | ||
205 | |||
206 | static void otherend_changed(struct xenbus_watch *watch, | ||
207 | const char **vec, unsigned int len) | ||
208 | { | 168 | { |
209 | struct xenbus_device *dev = | 169 | struct xenbus_device *dev = |
210 | container_of(watch, struct xenbus_device, otherend_watch); | 170 | container_of(watch, struct xenbus_device, otherend_watch); |
@@ -232,11 +192,7 @@ static void otherend_changed(struct xenbus_watch *watch, | |||
232 | * work that can fail e.g., when the rootfs is gone. | 192 | * work that can fail e.g., when the rootfs is gone. |
233 | */ | 193 | */ |
234 | if (system_state > SYSTEM_RUNNING) { | 194 | if (system_state > SYSTEM_RUNNING) { |
235 | struct xen_bus_type *bus = bus; | 195 | if (ignore_on_shutdown && (state == XenbusStateClosing)) |
236 | bus = container_of(dev->dev.bus, struct xen_bus_type, bus); | ||
237 | /* If we're frontend, drive the state machine to Closed. */ | ||
238 | /* This should cause the backend to release our resources. */ | ||
239 | if ((bus == &xenbus_frontend) && (state == XenbusStateClosing)) | ||
240 | xenbus_frontend_closed(dev); | 196 | xenbus_frontend_closed(dev); |
241 | return; | 197 | return; |
242 | } | 198 | } |
@@ -244,25 +200,7 @@ static void otherend_changed(struct xenbus_watch *watch, | |||
244 | if (drv->otherend_changed) | 200 | if (drv->otherend_changed) |
245 | drv->otherend_changed(dev, state); | 201 | drv->otherend_changed(dev, state); |
246 | } | 202 | } |
247 | 203 | EXPORT_SYMBOL_GPL(xenbus_otherend_changed); | |
248 | |||
249 | static int talk_to_otherend(struct xenbus_device *dev) | ||
250 | { | ||
251 | struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); | ||
252 | |||
253 | free_otherend_watch(dev); | ||
254 | free_otherend_details(dev); | ||
255 | |||
256 | return drv->read_otherend_details(dev); | ||
257 | } | ||
258 | |||
259 | |||
260 | static int watch_otherend(struct xenbus_device *dev) | ||
261 | { | ||
262 | return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed, | ||
263 | "%s/%s", dev->otherend, "state"); | ||
264 | } | ||
265 | |||
266 | 204 | ||
267 | int xenbus_dev_probe(struct device *_dev) | 205 | int xenbus_dev_probe(struct device *_dev) |
268 | { | 206 | { |
@@ -306,8 +244,9 @@ int xenbus_dev_probe(struct device *_dev) | |||
306 | fail: | 244 | fail: |
307 | xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename); | 245 | xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename); |
308 | xenbus_switch_state(dev, XenbusStateClosed); | 246 | xenbus_switch_state(dev, XenbusStateClosed); |
309 | return -ENODEV; | 247 | return err; |
310 | } | 248 | } |
249 | EXPORT_SYMBOL_GPL(xenbus_dev_probe); | ||
311 | 250 | ||
312 | int xenbus_dev_remove(struct device *_dev) | 251 | int xenbus_dev_remove(struct device *_dev) |
313 | { | 252 | { |
@@ -325,8 +264,9 @@ int xenbus_dev_remove(struct device *_dev) | |||
325 | xenbus_switch_state(dev, XenbusStateClosed); | 264 | xenbus_switch_state(dev, XenbusStateClosed); |
326 | return 0; | 265 | return 0; |
327 | } | 266 | } |
267 | EXPORT_SYMBOL_GPL(xenbus_dev_remove); | ||
328 | 268 | ||
329 | static void xenbus_dev_shutdown(struct device *_dev) | 269 | void xenbus_dev_shutdown(struct device *_dev) |
330 | { | 270 | { |
331 | struct xenbus_device *dev = to_xenbus_device(_dev); | 271 | struct xenbus_device *dev = to_xenbus_device(_dev); |
332 | unsigned long timeout = 5*HZ; | 272 | unsigned long timeout = 5*HZ; |
@@ -347,6 +287,7 @@ static void xenbus_dev_shutdown(struct device *_dev) | |||
347 | out: | 287 | out: |
348 | put_device(&dev->dev); | 288 | put_device(&dev->dev); |
349 | } | 289 | } |
290 | EXPORT_SYMBOL_GPL(xenbus_dev_shutdown); | ||
350 | 291 | ||
351 | int xenbus_register_driver_common(struct xenbus_driver *drv, | 292 | int xenbus_register_driver_common(struct xenbus_driver *drv, |
352 | struct xen_bus_type *bus, | 293 | struct xen_bus_type *bus, |
@@ -360,25 +301,7 @@ int xenbus_register_driver_common(struct xenbus_driver *drv, | |||
360 | 301 | ||
361 | return driver_register(&drv->driver); | 302 | return driver_register(&drv->driver); |
362 | } | 303 | } |
363 | 304 | EXPORT_SYMBOL_GPL(xenbus_register_driver_common); | |
364 | int __xenbus_register_frontend(struct xenbus_driver *drv, | ||
365 | struct module *owner, const char *mod_name) | ||
366 | { | ||
367 | int ret; | ||
368 | |||
369 | drv->read_otherend_details = read_backend_details; | ||
370 | |||
371 | ret = xenbus_register_driver_common(drv, &xenbus_frontend, | ||
372 | owner, mod_name); | ||
373 | if (ret) | ||
374 | return ret; | ||
375 | |||
376 | /* If this driver is loaded as a module wait for devices to attach. */ | ||
377 | wait_for_devices(drv); | ||
378 | |||
379 | return 0; | ||
380 | } | ||
381 | EXPORT_SYMBOL_GPL(__xenbus_register_frontend); | ||
382 | 305 | ||
383 | void xenbus_unregister_driver(struct xenbus_driver *drv) | 306 | void xenbus_unregister_driver(struct xenbus_driver *drv) |
384 | { | 307 | { |
@@ -549,24 +472,7 @@ fail: | |||
549 | kfree(xendev); | 472 | kfree(xendev); |
550 | return err; | 473 | return err; |
551 | } | 474 | } |
552 | 475 | EXPORT_SYMBOL_GPL(xenbus_probe_node); | |
553 | /* device/<typename>/<name> */ | ||
554 | static int xenbus_probe_frontend(const char *type, const char *name) | ||
555 | { | ||
556 | char *nodename; | ||
557 | int err; | ||
558 | |||
559 | nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", | ||
560 | xenbus_frontend.root, type, name); | ||
561 | if (!nodename) | ||
562 | return -ENOMEM; | ||
563 | |||
564 | DPRINTK("%s", nodename); | ||
565 | |||
566 | err = xenbus_probe_node(&xenbus_frontend, type, nodename); | ||
567 | kfree(nodename); | ||
568 | return err; | ||
569 | } | ||
570 | 476 | ||
571 | static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) | 477 | static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) |
572 | { | 478 | { |
@@ -580,10 +486,11 @@ static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) | |||
580 | return PTR_ERR(dir); | 486 | return PTR_ERR(dir); |
581 | 487 | ||
582 | for (i = 0; i < dir_n; i++) { | 488 | for (i = 0; i < dir_n; i++) { |
583 | err = bus->probe(type, dir[i]); | 489 | err = bus->probe(bus, type, dir[i]); |
584 | if (err) | 490 | if (err) |
585 | break; | 491 | break; |
586 | } | 492 | } |
493 | |||
587 | kfree(dir); | 494 | kfree(dir); |
588 | return err; | 495 | return err; |
589 | } | 496 | } |
@@ -603,9 +510,11 @@ int xenbus_probe_devices(struct xen_bus_type *bus) | |||
603 | if (err) | 510 | if (err) |
604 | break; | 511 | break; |
605 | } | 512 | } |
513 | |||
606 | kfree(dir); | 514 | kfree(dir); |
607 | return err; | 515 | return err; |
608 | } | 516 | } |
517 | EXPORT_SYMBOL_GPL(xenbus_probe_devices); | ||
609 | 518 | ||
610 | static unsigned int char_count(const char *str, char c) | 519 | static unsigned int char_count(const char *str, char c) |
611 | { | 520 | { |
@@ -668,54 +577,39 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) | |||
668 | } | 577 | } |
669 | EXPORT_SYMBOL_GPL(xenbus_dev_changed); | 578 | EXPORT_SYMBOL_GPL(xenbus_dev_changed); |
670 | 579 | ||
671 | static void frontend_changed(struct xenbus_watch *watch, | 580 | int xenbus_dev_suspend(struct device *dev) |
672 | const char **vec, unsigned int len) | ||
673 | { | ||
674 | DPRINTK(""); | ||
675 | |||
676 | xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); | ||
677 | } | ||
678 | |||
679 | /* We watch for devices appearing and vanishing. */ | ||
680 | static struct xenbus_watch fe_watch = { | ||
681 | .node = "device", | ||
682 | .callback = frontend_changed, | ||
683 | }; | ||
684 | |||
685 | static int xenbus_dev_suspend(struct device *dev, pm_message_t state) | ||
686 | { | 581 | { |
687 | int err = 0; | 582 | int err = 0; |
688 | struct xenbus_driver *drv; | 583 | struct xenbus_driver *drv; |
689 | struct xenbus_device *xdev; | 584 | struct xenbus_device *xdev |
585 | = container_of(dev, struct xenbus_device, dev); | ||
690 | 586 | ||
691 | DPRINTK(""); | 587 | DPRINTK("%s", xdev->nodename); |
692 | 588 | ||
693 | if (dev->driver == NULL) | 589 | if (dev->driver == NULL) |
694 | return 0; | 590 | return 0; |
695 | drv = to_xenbus_driver(dev->driver); | 591 | drv = to_xenbus_driver(dev->driver); |
696 | xdev = container_of(dev, struct xenbus_device, dev); | ||
697 | if (drv->suspend) | 592 | if (drv->suspend) |
698 | err = drv->suspend(xdev, state); | 593 | err = drv->suspend(xdev); |
699 | if (err) | 594 | if (err) |
700 | printk(KERN_WARNING | 595 | printk(KERN_WARNING |
701 | "xenbus: suspend %s failed: %i\n", dev_name(dev), err); | 596 | "xenbus: suspend %s failed: %i\n", dev_name(dev), err); |
702 | return 0; | 597 | return 0; |
703 | } | 598 | } |
599 | EXPORT_SYMBOL_GPL(xenbus_dev_suspend); | ||
704 | 600 | ||
705 | static int xenbus_dev_resume(struct device *dev) | 601 | int xenbus_dev_resume(struct device *dev) |
706 | { | 602 | { |
707 | int err; | 603 | int err; |
708 | struct xenbus_driver *drv; | 604 | struct xenbus_driver *drv; |
709 | struct xenbus_device *xdev; | 605 | struct xenbus_device *xdev |
606 | = container_of(dev, struct xenbus_device, dev); | ||
710 | 607 | ||
711 | DPRINTK(""); | 608 | DPRINTK("%s", xdev->nodename); |
712 | 609 | ||
713 | if (dev->driver == NULL) | 610 | if (dev->driver == NULL) |
714 | return 0; | 611 | return 0; |
715 | |||
716 | drv = to_xenbus_driver(dev->driver); | 612 | drv = to_xenbus_driver(dev->driver); |
717 | xdev = container_of(dev, struct xenbus_device, dev); | ||
718 | |||
719 | err = talk_to_otherend(xdev); | 613 | err = talk_to_otherend(xdev); |
720 | if (err) { | 614 | if (err) { |
721 | printk(KERN_WARNING | 615 | printk(KERN_WARNING |
@@ -746,6 +640,15 @@ static int xenbus_dev_resume(struct device *dev) | |||
746 | 640 | ||
747 | return 0; | 641 | return 0; |
748 | } | 642 | } |
643 | EXPORT_SYMBOL_GPL(xenbus_dev_resume); | ||
644 | |||
645 | int xenbus_dev_cancel(struct device *dev) | ||
646 | { | ||
647 | /* Do nothing */ | ||
648 | DPRINTK("cancel"); | ||
649 | return 0; | ||
650 | } | ||
651 | EXPORT_SYMBOL_GPL(xenbus_dev_cancel); | ||
749 | 652 | ||
750 | /* A flag to determine if xenstored is 'ready' (i.e. has started) */ | 653 | /* A flag to determine if xenstored is 'ready' (i.e. has started) */ |
751 | int xenstored_ready = 0; | 654 | int xenstored_ready = 0; |
@@ -774,11 +677,6 @@ void xenbus_probe(struct work_struct *unused) | |||
774 | { | 677 | { |
775 | xenstored_ready = 1; | 678 | xenstored_ready = 1; |
776 | 679 | ||
777 | /* Enumerate devices in xenstore and watch for changes. */ | ||
778 | xenbus_probe_devices(&xenbus_frontend); | ||
779 | register_xenbus_watch(&fe_watch); | ||
780 | xenbus_backend_probe_and_watch(); | ||
781 | |||
782 | /* Notify others that xenstore is up */ | 680 | /* Notify others that xenstore is up */ |
783 | blocking_notifier_call_chain(&xenstore_chain, 0, NULL); | 681 | blocking_notifier_call_chain(&xenstore_chain, 0, NULL); |
784 | } | 682 | } |
@@ -801,27 +699,43 @@ device_initcall(xenbus_probe_initcall); | |||
801 | static int __init xenbus_init(void) | 699 | static int __init xenbus_init(void) |
802 | { | 700 | { |
803 | int err = 0; | 701 | int err = 0; |
702 | unsigned long page = 0; | ||
804 | 703 | ||
805 | DPRINTK(""); | 704 | DPRINTK(""); |
806 | 705 | ||
807 | err = -ENODEV; | 706 | err = -ENODEV; |
808 | if (!xen_domain()) | 707 | if (!xen_domain()) |
809 | goto out_error; | 708 | return err; |
810 | |||
811 | /* Register ourselves with the kernel bus subsystem */ | ||
812 | err = bus_register(&xenbus_frontend.bus); | ||
813 | if (err) | ||
814 | goto out_error; | ||
815 | |||
816 | err = xenbus_backend_bus_register(); | ||
817 | if (err) | ||
818 | goto out_unreg_front; | ||
819 | 709 | ||
820 | /* | 710 | /* |
821 | * Domain0 doesn't have a store_evtchn or store_mfn yet. | 711 | * Domain0 doesn't have a store_evtchn or store_mfn yet. |
822 | */ | 712 | */ |
823 | if (xen_initial_domain()) { | 713 | if (xen_initial_domain()) { |
824 | /* dom0 not yet supported */ | 714 | struct evtchn_alloc_unbound alloc_unbound; |
715 | |||
716 | /* Allocate Xenstore page */ | ||
717 | page = get_zeroed_page(GFP_KERNEL); | ||
718 | if (!page) | ||
719 | goto out_error; | ||
720 | |||
721 | xen_store_mfn = xen_start_info->store_mfn = | ||
722 | pfn_to_mfn(virt_to_phys((void *)page) >> | ||
723 | PAGE_SHIFT); | ||
724 | |||
725 | /* Next allocate a local port which xenstored can bind to */ | ||
726 | alloc_unbound.dom = DOMID_SELF; | ||
727 | alloc_unbound.remote_dom = 0; | ||
728 | |||
729 | err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, | ||
730 | &alloc_unbound); | ||
731 | if (err == -ENOSYS) | ||
732 | goto out_error; | ||
733 | |||
734 | BUG_ON(err); | ||
735 | xen_store_evtchn = xen_start_info->store_evtchn = | ||
736 | alloc_unbound.port; | ||
737 | |||
738 | xen_store_interface = mfn_to_virt(xen_store_mfn); | ||
825 | } else { | 739 | } else { |
826 | if (xen_hvm_domain()) { | 740 | if (xen_hvm_domain()) { |
827 | uint64_t v = 0; | 741 | uint64_t v = 0; |
@@ -847,7 +761,7 @@ static int __init xenbus_init(void) | |||
847 | if (err) { | 761 | if (err) { |
848 | printk(KERN_WARNING | 762 | printk(KERN_WARNING |
849 | "XENBUS: Error initializing xenstore comms: %i\n", err); | 763 | "XENBUS: Error initializing xenstore comms: %i\n", err); |
850 | goto out_unreg_back; | 764 | goto out_error; |
851 | } | 765 | } |
852 | 766 | ||
853 | #ifdef CONFIG_XEN_COMPAT_XENFS | 767 | #ifdef CONFIG_XEN_COMPAT_XENFS |
@@ -860,131 +774,13 @@ static int __init xenbus_init(void) | |||
860 | 774 | ||
861 | return 0; | 775 | return 0; |
862 | 776 | ||
863 | out_unreg_back: | ||
864 | xenbus_backend_bus_unregister(); | ||
865 | |||
866 | out_unreg_front: | ||
867 | bus_unregister(&xenbus_frontend.bus); | ||
868 | |||
869 | out_error: | 777 | out_error: |
778 | if (page != 0) | ||
779 | free_page(page); | ||
780 | |||
870 | return err; | 781 | return err; |
871 | } | 782 | } |
872 | 783 | ||
873 | postcore_initcall(xenbus_init); | 784 | postcore_initcall(xenbus_init); |
874 | 785 | ||
875 | MODULE_LICENSE("GPL"); | 786 | MODULE_LICENSE("GPL"); |
876 | |||
877 | static int is_device_connecting(struct device *dev, void *data) | ||
878 | { | ||
879 | struct xenbus_device *xendev = to_xenbus_device(dev); | ||
880 | struct device_driver *drv = data; | ||
881 | struct xenbus_driver *xendrv; | ||
882 | |||
883 | /* | ||
884 | * A device with no driver will never connect. We care only about | ||
885 | * devices which should currently be in the process of connecting. | ||
886 | */ | ||
887 | if (!dev->driver) | ||
888 | return 0; | ||
889 | |||
890 | /* Is this search limited to a particular driver? */ | ||
891 | if (drv && (dev->driver != drv)) | ||
892 | return 0; | ||
893 | |||
894 | xendrv = to_xenbus_driver(dev->driver); | ||
895 | return (xendev->state < XenbusStateConnected || | ||
896 | (xendev->state == XenbusStateConnected && | ||
897 | xendrv->is_ready && !xendrv->is_ready(xendev))); | ||
898 | } | ||
899 | |||
900 | static int exists_connecting_device(struct device_driver *drv) | ||
901 | { | ||
902 | return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, | ||
903 | is_device_connecting); | ||
904 | } | ||
905 | |||
906 | static int print_device_status(struct device *dev, void *data) | ||
907 | { | ||
908 | struct xenbus_device *xendev = to_xenbus_device(dev); | ||
909 | struct device_driver *drv = data; | ||
910 | |||
911 | /* Is this operation limited to a particular driver? */ | ||
912 | if (drv && (dev->driver != drv)) | ||
913 | return 0; | ||
914 | |||
915 | if (!dev->driver) { | ||
916 | /* Information only: is this too noisy? */ | ||
917 | printk(KERN_INFO "XENBUS: Device with no driver: %s\n", | ||
918 | xendev->nodename); | ||
919 | } else if (xendev->state < XenbusStateConnected) { | ||
920 | enum xenbus_state rstate = XenbusStateUnknown; | ||
921 | if (xendev->otherend) | ||
922 | rstate = xenbus_read_driver_state(xendev->otherend); | ||
923 | printk(KERN_WARNING "XENBUS: Timeout connecting " | ||
924 | "to device: %s (local state %d, remote state %d)\n", | ||
925 | xendev->nodename, xendev->state, rstate); | ||
926 | } | ||
927 | |||
928 | return 0; | ||
929 | } | ||
930 | |||
931 | /* We only wait for device setup after most initcalls have run. */ | ||
932 | static int ready_to_wait_for_devices; | ||
933 | |||
934 | /* | ||
935 | * On a 5-minute timeout, wait for all devices currently configured. We need | ||
936 | * to do this to guarantee that the filesystems and / or network devices | ||
937 | * needed for boot are available, before we can allow the boot to proceed. | ||
938 | * | ||
939 | * This needs to be on a late_initcall, to happen after the frontend device | ||
940 | * drivers have been initialised, but before the root fs is mounted. | ||
941 | * | ||
942 | * A possible improvement here would be to have the tools add a per-device | ||
943 | * flag to the store entry, indicating whether it is needed at boot time. | ||
944 | * This would allow people who knew what they were doing to accelerate their | ||
945 | * boot slightly, but of course needs tools or manual intervention to set up | ||
946 | * those flags correctly. | ||
947 | */ | ||
948 | static void wait_for_devices(struct xenbus_driver *xendrv) | ||
949 | { | ||
950 | unsigned long start = jiffies; | ||
951 | struct device_driver *drv = xendrv ? &xendrv->driver : NULL; | ||
952 | unsigned int seconds_waited = 0; | ||
953 | |||
954 | if (!ready_to_wait_for_devices || !xen_domain()) | ||
955 | return; | ||
956 | |||
957 | while (exists_connecting_device(drv)) { | ||
958 | if (time_after(jiffies, start + (seconds_waited+5)*HZ)) { | ||
959 | if (!seconds_waited) | ||
960 | printk(KERN_WARNING "XENBUS: Waiting for " | ||
961 | "devices to initialise: "); | ||
962 | seconds_waited += 5; | ||
963 | printk("%us...", 300 - seconds_waited); | ||
964 | if (seconds_waited == 300) | ||
965 | break; | ||
966 | } | ||
967 | |||
968 | schedule_timeout_interruptible(HZ/10); | ||
969 | } | ||
970 | |||
971 | if (seconds_waited) | ||
972 | printk("\n"); | ||
973 | |||
974 | bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, | ||
975 | print_device_status); | ||
976 | } | ||
977 | |||
978 | #ifndef MODULE | ||
979 | static int __init boot_wait_for_devices(void) | ||
980 | { | ||
981 | if (xen_hvm_domain() && !xen_platform_pci_unplug) | ||
982 | return -ENODEV; | ||
983 | |||
984 | ready_to_wait_for_devices = 1; | ||
985 | wait_for_devices(NULL); | ||
986 | return 0; | ||
987 | } | ||
988 | |||
989 | late_initcall(boot_wait_for_devices); | ||
990 | #endif | ||
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h index 6c5e3185a6a2..888b9900ca08 100644 --- a/drivers/xen/xenbus/xenbus_probe.h +++ b/drivers/xen/xenbus/xenbus_probe.h | |||
@@ -36,26 +36,15 @@ | |||
36 | 36 | ||
37 | #define XEN_BUS_ID_SIZE 20 | 37 | #define XEN_BUS_ID_SIZE 20 |
38 | 38 | ||
39 | #ifdef CONFIG_XEN_BACKEND | ||
40 | extern void xenbus_backend_suspend(int (*fn)(struct device *, void *)); | ||
41 | extern void xenbus_backend_resume(int (*fn)(struct device *, void *)); | ||
42 | extern void xenbus_backend_probe_and_watch(void); | ||
43 | extern int xenbus_backend_bus_register(void); | ||
44 | extern void xenbus_backend_bus_unregister(void); | ||
45 | #else | ||
46 | static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {} | ||
47 | static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {} | ||
48 | static inline void xenbus_backend_probe_and_watch(void) {} | ||
49 | static inline int xenbus_backend_bus_register(void) { return 0; } | ||
50 | static inline void xenbus_backend_bus_unregister(void) {} | ||
51 | #endif | ||
52 | |||
53 | struct xen_bus_type | 39 | struct xen_bus_type |
54 | { | 40 | { |
55 | char *root; | 41 | char *root; |
56 | unsigned int levels; | 42 | unsigned int levels; |
57 | int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); | 43 | int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); |
58 | int (*probe)(const char *type, const char *dir); | 44 | int (*probe)(struct xen_bus_type *bus, const char *type, |
45 | const char *dir); | ||
46 | void (*otherend_changed)(struct xenbus_watch *watch, const char **vec, | ||
47 | unsigned int len); | ||
59 | struct bus_type bus; | 48 | struct bus_type bus; |
60 | }; | 49 | }; |
61 | 50 | ||
@@ -73,4 +62,17 @@ extern int xenbus_probe_devices(struct xen_bus_type *bus); | |||
73 | 62 | ||
74 | extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); | 63 | extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); |
75 | 64 | ||
65 | extern void xenbus_dev_shutdown(struct device *_dev); | ||
66 | |||
67 | extern int xenbus_dev_suspend(struct device *dev); | ||
68 | extern int xenbus_dev_resume(struct device *dev); | ||
69 | extern int xenbus_dev_cancel(struct device *dev); | ||
70 | |||
71 | extern void xenbus_otherend_changed(struct xenbus_watch *watch, | ||
72 | const char **vec, unsigned int len, | ||
73 | int ignore_on_shutdown); | ||
74 | |||
75 | extern int xenbus_read_otherend_details(struct xenbus_device *xendev, | ||
76 | char *id_node, char *path_node); | ||
77 | |||
76 | #endif | 78 | #endif |
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c new file mode 100644 index 000000000000..6cf467bf63ec --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe_backend.c | |||
@@ -0,0 +1,276 @@ | |||
1 | /****************************************************************************** | ||
2 | * Talks to Xen Store to figure out what devices we have (backend half). | ||
3 | * | ||
4 | * Copyright (C) 2005 Rusty Russell, IBM Corporation | ||
5 | * Copyright (C) 2005 Mike Wray, Hewlett-Packard | ||
6 | * Copyright (C) 2005, 2006 XenSource Ltd | ||
7 | * Copyright (C) 2007 Solarflare Communications, Inc. | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License version 2 | ||
11 | * as published by the Free Software Foundation; or, when distributed | ||
12 | * separately from the Linux kernel or incorporated into other | ||
13 | * software packages, subject to the following license: | ||
14 | * | ||
15 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
16 | * of this source file (the "Software"), to deal in the Software without | ||
17 | * restriction, including without limitation the rights to use, copy, modify, | ||
18 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
19 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
20 | * the following conditions: | ||
21 | * | ||
22 | * The above copyright notice and this permission notice shall be included in | ||
23 | * all copies or substantial portions of the Software. | ||
24 | * | ||
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
26 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
27 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
28 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
29 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
31 | * IN THE SOFTWARE. | ||
32 | */ | ||
33 | |||
34 | #define DPRINTK(fmt, args...) \ | ||
35 | pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ | ||
36 | __func__, __LINE__, ##args) | ||
37 | |||
38 | #include <linux/kernel.h> | ||
39 | #include <linux/err.h> | ||
40 | #include <linux/string.h> | ||
41 | #include <linux/ctype.h> | ||
42 | #include <linux/fcntl.h> | ||
43 | #include <linux/mm.h> | ||
44 | #include <linux/notifier.h> | ||
45 | |||
46 | #include <asm/page.h> | ||
47 | #include <asm/pgtable.h> | ||
48 | #include <asm/xen/hypervisor.h> | ||
49 | #include <asm/hypervisor.h> | ||
50 | #include <xen/xenbus.h> | ||
51 | #include <xen/features.h> | ||
52 | |||
53 | #include "xenbus_comms.h" | ||
54 | #include "xenbus_probe.h" | ||
55 | |||
56 | /* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */ | ||
57 | static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) | ||
58 | { | ||
59 | int domid, err; | ||
60 | const char *devid, *type, *frontend; | ||
61 | unsigned int typelen; | ||
62 | |||
63 | type = strchr(nodename, '/'); | ||
64 | if (!type) | ||
65 | return -EINVAL; | ||
66 | type++; | ||
67 | typelen = strcspn(type, "/"); | ||
68 | if (!typelen || type[typelen] != '/') | ||
69 | return -EINVAL; | ||
70 | |||
71 | devid = strrchr(nodename, '/') + 1; | ||
72 | |||
73 | err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid, | ||
74 | "frontend", NULL, &frontend, | ||
75 | NULL); | ||
76 | if (err) | ||
77 | return err; | ||
78 | if (strlen(frontend) == 0) | ||
79 | err = -ERANGE; | ||
80 | if (!err && !xenbus_exists(XBT_NIL, frontend, "")) | ||
81 | err = -ENOENT; | ||
82 | kfree(frontend); | ||
83 | |||
84 | if (err) | ||
85 | return err; | ||
86 | |||
87 | if (snprintf(bus_id, XEN_BUS_ID_SIZE, "%.*s-%i-%s", | ||
88 | typelen, type, domid, devid) >= XEN_BUS_ID_SIZE) | ||
89 | return -ENOSPC; | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | static int xenbus_uevent_backend(struct device *dev, | ||
94 | struct kobj_uevent_env *env) | ||
95 | { | ||
96 | struct xenbus_device *xdev; | ||
97 | struct xenbus_driver *drv; | ||
98 | struct xen_bus_type *bus; | ||
99 | |||
100 | DPRINTK(""); | ||
101 | |||
102 | if (dev == NULL) | ||
103 | return -ENODEV; | ||
104 | |||
105 | xdev = to_xenbus_device(dev); | ||
106 | bus = container_of(xdev->dev.bus, struct xen_bus_type, bus); | ||
107 | if (xdev == NULL) | ||
108 | return -ENODEV; | ||
109 | |||
110 | /* stuff we want to pass to /sbin/hotplug */ | ||
111 | if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) | ||
112 | return -ENOMEM; | ||
113 | |||
114 | if (add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename)) | ||
115 | return -ENOMEM; | ||
116 | |||
117 | if (add_uevent_var(env, "XENBUS_BASE_PATH=%s", bus->root)) | ||
118 | return -ENOMEM; | ||
119 | |||
120 | if (dev->driver) { | ||
121 | drv = to_xenbus_driver(dev->driver); | ||
122 | if (drv && drv->uevent) | ||
123 | return drv->uevent(xdev, env); | ||
124 | } | ||
125 | |||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | /* backend/<typename>/<frontend-uuid>/<name> */ | ||
130 | static int xenbus_probe_backend_unit(struct xen_bus_type *bus, | ||
131 | const char *dir, | ||
132 | const char *type, | ||
133 | const char *name) | ||
134 | { | ||
135 | char *nodename; | ||
136 | int err; | ||
137 | |||
138 | nodename = kasprintf(GFP_KERNEL, "%s/%s", dir, name); | ||
139 | if (!nodename) | ||
140 | return -ENOMEM; | ||
141 | |||
142 | DPRINTK("%s\n", nodename); | ||
143 | |||
144 | err = xenbus_probe_node(bus, type, nodename); | ||
145 | kfree(nodename); | ||
146 | return err; | ||
147 | } | ||
148 | |||
149 | /* backend/<typename>/<frontend-domid> */ | ||
150 | static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type, | ||
151 | const char *domid) | ||
152 | { | ||
153 | char *nodename; | ||
154 | int err = 0; | ||
155 | char **dir; | ||
156 | unsigned int i, dir_n = 0; | ||
157 | |||
158 | DPRINTK(""); | ||
159 | |||
160 | nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, domid); | ||
161 | if (!nodename) | ||
162 | return -ENOMEM; | ||
163 | |||
164 | dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n); | ||
165 | if (IS_ERR(dir)) { | ||
166 | kfree(nodename); | ||
167 | return PTR_ERR(dir); | ||
168 | } | ||
169 | |||
170 | for (i = 0; i < dir_n; i++) { | ||
171 | err = xenbus_probe_backend_unit(bus, nodename, type, dir[i]); | ||
172 | if (err) | ||
173 | break; | ||
174 | } | ||
175 | kfree(dir); | ||
176 | kfree(nodename); | ||
177 | return err; | ||
178 | } | ||
179 | |||
180 | static void frontend_changed(struct xenbus_watch *watch, | ||
181 | const char **vec, unsigned int len) | ||
182 | { | ||
183 | xenbus_otherend_changed(watch, vec, len, 0); | ||
184 | } | ||
185 | |||
186 | static struct device_attribute xenbus_backend_dev_attrs[] = { | ||
187 | __ATTR_NULL | ||
188 | }; | ||
189 | |||
190 | static struct xen_bus_type xenbus_backend = { | ||
191 | .root = "backend", | ||
192 | .levels = 3, /* backend/type/<frontend>/<id> */ | ||
193 | .get_bus_id = backend_bus_id, | ||
194 | .probe = xenbus_probe_backend, | ||
195 | .otherend_changed = frontend_changed, | ||
196 | .bus = { | ||
197 | .name = "xen-backend", | ||
198 | .match = xenbus_match, | ||
199 | .uevent = xenbus_uevent_backend, | ||
200 | .probe = xenbus_dev_probe, | ||
201 | .remove = xenbus_dev_remove, | ||
202 | .shutdown = xenbus_dev_shutdown, | ||
203 | .dev_attrs = xenbus_backend_dev_attrs, | ||
204 | }, | ||
205 | }; | ||
206 | |||
207 | static void backend_changed(struct xenbus_watch *watch, | ||
208 | const char **vec, unsigned int len) | ||
209 | { | ||
210 | DPRINTK(""); | ||
211 | |||
212 | xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); | ||
213 | } | ||
214 | |||
215 | static struct xenbus_watch be_watch = { | ||
216 | .node = "backend", | ||
217 | .callback = backend_changed, | ||
218 | }; | ||
219 | |||
220 | static int read_frontend_details(struct xenbus_device *xendev) | ||
221 | { | ||
222 | return xenbus_read_otherend_details(xendev, "frontend-id", "frontend"); | ||
223 | } | ||
224 | |||
225 | int xenbus_dev_is_online(struct xenbus_device *dev) | ||
226 | { | ||
227 | int rc, val; | ||
228 | |||
229 | rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val); | ||
230 | if (rc != 1) | ||
231 | val = 0; /* no online node present */ | ||
232 | |||
233 | return val; | ||
234 | } | ||
235 | EXPORT_SYMBOL_GPL(xenbus_dev_is_online); | ||
236 | |||
237 | int __xenbus_register_backend(struct xenbus_driver *drv, | ||
238 | struct module *owner, const char *mod_name) | ||
239 | { | ||
240 | drv->read_otherend_details = read_frontend_details; | ||
241 | |||
242 | return xenbus_register_driver_common(drv, &xenbus_backend, | ||
243 | owner, mod_name); | ||
244 | } | ||
245 | EXPORT_SYMBOL_GPL(__xenbus_register_backend); | ||
246 | |||
247 | static int backend_probe_and_watch(struct notifier_block *notifier, | ||
248 | unsigned long event, | ||
249 | void *data) | ||
250 | { | ||
251 | /* Enumerate devices in xenstore and watch for changes. */ | ||
252 | xenbus_probe_devices(&xenbus_backend); | ||
253 | register_xenbus_watch(&be_watch); | ||
254 | |||
255 | return NOTIFY_DONE; | ||
256 | } | ||
257 | |||
258 | static int __init xenbus_probe_backend_init(void) | ||
259 | { | ||
260 | static struct notifier_block xenstore_notifier = { | ||
261 | .notifier_call = backend_probe_and_watch | ||
262 | }; | ||
263 | int err; | ||
264 | |||
265 | DPRINTK(""); | ||
266 | |||
267 | /* Register ourselves with the kernel bus subsystem */ | ||
268 | err = bus_register(&xenbus_backend.bus); | ||
269 | if (err) | ||
270 | return err; | ||
271 | |||
272 | register_xenstore_notifier(&xenstore_notifier); | ||
273 | |||
274 | return 0; | ||
275 | } | ||
276 | subsys_initcall(xenbus_probe_backend_init); | ||
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c new file mode 100644 index 000000000000..b6a2690c9d49 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c | |||
@@ -0,0 +1,301 @@ | |||
1 | #define DPRINTK(fmt, args...) \ | ||
2 | pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ | ||
3 | __func__, __LINE__, ##args) | ||
4 | |||
5 | #include <linux/kernel.h> | ||
6 | #include <linux/err.h> | ||
7 | #include <linux/string.h> | ||
8 | #include <linux/ctype.h> | ||
9 | #include <linux/fcntl.h> | ||
10 | #include <linux/mm.h> | ||
11 | #include <linux/proc_fs.h> | ||
12 | #include <linux/notifier.h> | ||
13 | #include <linux/kthread.h> | ||
14 | #include <linux/mutex.h> | ||
15 | #include <linux/io.h> | ||
16 | |||
17 | #include <asm/page.h> | ||
18 | #include <asm/pgtable.h> | ||
19 | #include <asm/xen/hypervisor.h> | ||
20 | #include <xen/xenbus.h> | ||
21 | #include <xen/events.h> | ||
22 | #include <xen/page.h> | ||
23 | |||
24 | #include <xen/platform_pci.h> | ||
25 | |||
26 | #include "xenbus_comms.h" | ||
27 | #include "xenbus_probe.h" | ||
28 | |||
29 | |||
30 | /* device/<type>/<id> => <type>-<id> */ | ||
31 | static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) | ||
32 | { | ||
33 | nodename = strchr(nodename, '/'); | ||
34 | if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) { | ||
35 | printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename); | ||
36 | return -EINVAL; | ||
37 | } | ||
38 | |||
39 | strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); | ||
40 | if (!strchr(bus_id, '/')) { | ||
41 | printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id); | ||
42 | return -EINVAL; | ||
43 | } | ||
44 | *strchr(bus_id, '/') = '-'; | ||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | /* device/<typename>/<name> */ | ||
49 | static int xenbus_probe_frontend(struct xen_bus_type *bus, const char *type, | ||
50 | const char *name) | ||
51 | { | ||
52 | char *nodename; | ||
53 | int err; | ||
54 | |||
55 | nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, name); | ||
56 | if (!nodename) | ||
57 | return -ENOMEM; | ||
58 | |||
59 | DPRINTK("%s", nodename); | ||
60 | |||
61 | err = xenbus_probe_node(bus, type, nodename); | ||
62 | kfree(nodename); | ||
63 | return err; | ||
64 | } | ||
65 | |||
66 | static int xenbus_uevent_frontend(struct device *_dev, | ||
67 | struct kobj_uevent_env *env) | ||
68 | { | ||
69 | struct xenbus_device *dev = to_xenbus_device(_dev); | ||
70 | |||
71 | if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype)) | ||
72 | return -ENOMEM; | ||
73 | |||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | |||
78 | static void backend_changed(struct xenbus_watch *watch, | ||
79 | const char **vec, unsigned int len) | ||
80 | { | ||
81 | xenbus_otherend_changed(watch, vec, len, 1); | ||
82 | } | ||
83 | |||
84 | static struct device_attribute xenbus_frontend_dev_attrs[] = { | ||
85 | __ATTR_NULL | ||
86 | }; | ||
87 | |||
88 | static const struct dev_pm_ops xenbus_pm_ops = { | ||
89 | .suspend = xenbus_dev_suspend, | ||
90 | .resume = xenbus_dev_resume, | ||
91 | .freeze = xenbus_dev_suspend, | ||
92 | .thaw = xenbus_dev_cancel, | ||
93 | .restore = xenbus_dev_resume, | ||
94 | }; | ||
95 | |||
96 | static struct xen_bus_type xenbus_frontend = { | ||
97 | .root = "device", | ||
98 | .levels = 2, /* device/type/<id> */ | ||
99 | .get_bus_id = frontend_bus_id, | ||
100 | .probe = xenbus_probe_frontend, | ||
101 | .otherend_changed = backend_changed, | ||
102 | .bus = { | ||
103 | .name = "xen", | ||
104 | .match = xenbus_match, | ||
105 | .uevent = xenbus_uevent_frontend, | ||
106 | .probe = xenbus_dev_probe, | ||
107 | .remove = xenbus_dev_remove, | ||
108 | .shutdown = xenbus_dev_shutdown, | ||
109 | .dev_attrs = xenbus_frontend_dev_attrs, | ||
110 | |||
111 | .pm = &xenbus_pm_ops, | ||
112 | }, | ||
113 | }; | ||
114 | |||
115 | static void frontend_changed(struct xenbus_watch *watch, | ||
116 | const char **vec, unsigned int len) | ||
117 | { | ||
118 | DPRINTK(""); | ||
119 | |||
120 | xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); | ||
121 | } | ||
122 | |||
123 | |||
124 | /* We watch for devices appearing and vanishing. */ | ||
125 | static struct xenbus_watch fe_watch = { | ||
126 | .node = "device", | ||
127 | .callback = frontend_changed, | ||
128 | }; | ||
129 | |||
130 | static int read_backend_details(struct xenbus_device *xendev) | ||
131 | { | ||
132 | return xenbus_read_otherend_details(xendev, "backend-id", "backend"); | ||
133 | } | ||
134 | |||
135 | static int is_device_connecting(struct device *dev, void *data) | ||
136 | { | ||
137 | struct xenbus_device *xendev = to_xenbus_device(dev); | ||
138 | struct device_driver *drv = data; | ||
139 | struct xenbus_driver *xendrv; | ||
140 | |||
141 | /* | ||
142 | * A device with no driver will never connect. We care only about | ||
143 | * devices which should currently be in the process of connecting. | ||
144 | */ | ||
145 | if (!dev->driver) | ||
146 | return 0; | ||
147 | |||
148 | /* Is this search limited to a particular driver? */ | ||
149 | if (drv && (dev->driver != drv)) | ||
150 | return 0; | ||
151 | |||
152 | xendrv = to_xenbus_driver(dev->driver); | ||
153 | return (xendev->state < XenbusStateConnected || | ||
154 | (xendev->state == XenbusStateConnected && | ||
155 | xendrv->is_ready && !xendrv->is_ready(xendev))); | ||
156 | } | ||
157 | |||
158 | static int exists_connecting_device(struct device_driver *drv) | ||
159 | { | ||
160 | return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, | ||
161 | is_device_connecting); | ||
162 | } | ||
163 | |||
164 | static int print_device_status(struct device *dev, void *data) | ||
165 | { | ||
166 | struct xenbus_device *xendev = to_xenbus_device(dev); | ||
167 | struct device_driver *drv = data; | ||
168 | |||
169 | /* Is this operation limited to a particular driver? */ | ||
170 | if (drv && (dev->driver != drv)) | ||
171 | return 0; | ||
172 | |||
173 | if (!dev->driver) { | ||
174 | /* Information only: is this too noisy? */ | ||
175 | printk(KERN_INFO "XENBUS: Device with no driver: %s\n", | ||
176 | xendev->nodename); | ||
177 | } else if (xendev->state < XenbusStateConnected) { | ||
178 | enum xenbus_state rstate = XenbusStateUnknown; | ||
179 | if (xendev->otherend) | ||
180 | rstate = xenbus_read_driver_state(xendev->otherend); | ||
181 | printk(KERN_WARNING "XENBUS: Timeout connecting " | ||
182 | "to device: %s (local state %d, remote state %d)\n", | ||
183 | xendev->nodename, xendev->state, rstate); | ||
184 | } | ||
185 | |||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | /* We only wait for device setup after most initcalls have run. */ | ||
190 | static int ready_to_wait_for_devices; | ||
191 | |||
192 | /* | ||
193 | * On a 5-minute timeout, wait for all devices currently configured. We need | ||
194 | * to do this to guarantee that the filesystems and / or network devices | ||
195 | * needed for boot are available, before we can allow the boot to proceed. | ||
196 | * | ||
197 | * This needs to be on a late_initcall, to happen after the frontend device | ||
198 | * drivers have been initialised, but before the root fs is mounted. | ||
199 | * | ||
200 | * A possible improvement here would be to have the tools add a per-device | ||
201 | * flag to the store entry, indicating whether it is needed at boot time. | ||
202 | * This would allow people who knew what they were doing to accelerate their | ||
203 | * boot slightly, but of course needs tools or manual intervention to set up | ||
204 | * those flags correctly. | ||
205 | */ | ||
206 | static void wait_for_devices(struct xenbus_driver *xendrv) | ||
207 | { | ||
208 | unsigned long start = jiffies; | ||
209 | struct device_driver *drv = xendrv ? &xendrv->driver : NULL; | ||
210 | unsigned int seconds_waited = 0; | ||
211 | |||
212 | if (!ready_to_wait_for_devices || !xen_domain()) | ||
213 | return; | ||
214 | |||
215 | while (exists_connecting_device(drv)) { | ||
216 | if (time_after(jiffies, start + (seconds_waited+5)*HZ)) { | ||
217 | if (!seconds_waited) | ||
218 | printk(KERN_WARNING "XENBUS: Waiting for " | ||
219 | "devices to initialise: "); | ||
220 | seconds_waited += 5; | ||
221 | printk("%us...", 300 - seconds_waited); | ||
222 | if (seconds_waited == 300) | ||
223 | break; | ||
224 | } | ||
225 | |||
226 | schedule_timeout_interruptible(HZ/10); | ||
227 | } | ||
228 | |||
229 | if (seconds_waited) | ||
230 | printk("\n"); | ||
231 | |||
232 | bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, | ||
233 | print_device_status); | ||
234 | } | ||
235 | |||
236 | int __xenbus_register_frontend(struct xenbus_driver *drv, | ||
237 | struct module *owner, const char *mod_name) | ||
238 | { | ||
239 | int ret; | ||
240 | |||
241 | drv->read_otherend_details = read_backend_details; | ||
242 | |||
243 | ret = xenbus_register_driver_common(drv, &xenbus_frontend, | ||
244 | owner, mod_name); | ||
245 | if (ret) | ||
246 | return ret; | ||
247 | |||
248 | /* If this driver is loaded as a module wait for devices to attach. */ | ||
249 | wait_for_devices(drv); | ||
250 | |||
251 | return 0; | ||
252 | } | ||
253 | EXPORT_SYMBOL_GPL(__xenbus_register_frontend); | ||
254 | |||
255 | static int frontend_probe_and_watch(struct notifier_block *notifier, | ||
256 | unsigned long event, | ||
257 | void *data) | ||
258 | { | ||
259 | /* Enumerate devices in xenstore and watch for changes. */ | ||
260 | xenbus_probe_devices(&xenbus_frontend); | ||
261 | register_xenbus_watch(&fe_watch); | ||
262 | |||
263 | return NOTIFY_DONE; | ||
264 | } | ||
265 | |||
266 | |||
267 | static int __init xenbus_probe_frontend_init(void) | ||
268 | { | ||
269 | static struct notifier_block xenstore_notifier = { | ||
270 | .notifier_call = frontend_probe_and_watch | ||
271 | }; | ||
272 | int err; | ||
273 | |||
274 | DPRINTK(""); | ||
275 | |||
276 | /* Register ourselves with the kernel bus subsystem */ | ||
277 | err = bus_register(&xenbus_frontend.bus); | ||
278 | if (err) | ||
279 | return err; | ||
280 | |||
281 | register_xenstore_notifier(&xenstore_notifier); | ||
282 | |||
283 | return 0; | ||
284 | } | ||
285 | subsys_initcall(xenbus_probe_frontend_init); | ||
286 | |||
287 | #ifndef MODULE | ||
288 | static int __init boot_wait_for_devices(void) | ||
289 | { | ||
290 | if (xen_hvm_domain() && !xen_platform_pci_unplug) | ||
291 | return -ENODEV; | ||
292 | |||
293 | ready_to_wait_for_devices = 1; | ||
294 | wait_for_devices(NULL); | ||
295 | return 0; | ||
296 | } | ||
297 | |||
298 | late_initcall(boot_wait_for_devices); | ||
299 | #endif | ||
300 | |||
301 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile index 25275c3bbdff..4fde9440fe1f 100644 --- a/drivers/xen/xenfs/Makefile +++ b/drivers/xen/xenfs/Makefile | |||
@@ -1,3 +1,4 @@ | |||
1 | obj-$(CONFIG_XENFS) += xenfs.o | 1 | obj-$(CONFIG_XENFS) += xenfs.o |
2 | 2 | ||
3 | xenfs-objs = super.o xenbus.o \ No newline at end of file | 3 | xenfs-y = super.o xenbus.o privcmd.o |
4 | xenfs-$(CONFIG_XEN_DOM0) += xenstored.o | ||
diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c new file mode 100644 index 000000000000..dbd3b16fd131 --- /dev/null +++ b/drivers/xen/xenfs/privcmd.c | |||
@@ -0,0 +1,400 @@ | |||
1 | /****************************************************************************** | ||
2 | * privcmd.c | ||
3 | * | ||
4 | * Interface to privileged domain-0 commands. | ||
5 | * | ||
6 | * Copyright (c) 2002-2004, K A Fraser, B Dragovic | ||
7 | */ | ||
8 | |||
9 | #include <linux/kernel.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/slab.h> | ||
12 | #include <linux/string.h> | ||
13 | #include <linux/errno.h> | ||
14 | #include <linux/mm.h> | ||
15 | #include <linux/mman.h> | ||
16 | #include <linux/uaccess.h> | ||
17 | #include <linux/swap.h> | ||
18 | #include <linux/highmem.h> | ||
19 | #include <linux/pagemap.h> | ||
20 | #include <linux/seq_file.h> | ||
21 | |||
22 | #include <asm/pgalloc.h> | ||
23 | #include <asm/pgtable.h> | ||
24 | #include <asm/tlb.h> | ||
25 | #include <asm/xen/hypervisor.h> | ||
26 | #include <asm/xen/hypercall.h> | ||
27 | |||
28 | #include <xen/xen.h> | ||
29 | #include <xen/privcmd.h> | ||
30 | #include <xen/interface/xen.h> | ||
31 | #include <xen/features.h> | ||
32 | #include <xen/page.h> | ||
33 | #include <xen/xen-ops.h> | ||
34 | |||
35 | #ifndef HAVE_ARCH_PRIVCMD_MMAP | ||
36 | static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); | ||
37 | #endif | ||
38 | |||
39 | static long privcmd_ioctl_hypercall(void __user *udata) | ||
40 | { | ||
41 | struct privcmd_hypercall hypercall; | ||
42 | long ret; | ||
43 | |||
44 | if (copy_from_user(&hypercall, udata, sizeof(hypercall))) | ||
45 | return -EFAULT; | ||
46 | |||
47 | ret = privcmd_call(hypercall.op, | ||
48 | hypercall.arg[0], hypercall.arg[1], | ||
49 | hypercall.arg[2], hypercall.arg[3], | ||
50 | hypercall.arg[4]); | ||
51 | |||
52 | return ret; | ||
53 | } | ||
54 | |||
55 | static void free_page_list(struct list_head *pages) | ||
56 | { | ||
57 | struct page *p, *n; | ||
58 | |||
59 | list_for_each_entry_safe(p, n, pages, lru) | ||
60 | __free_page(p); | ||
61 | |||
62 | INIT_LIST_HEAD(pages); | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * Given an array of items in userspace, return a list of pages | ||
67 | * containing the data. If copying fails, either because of memory | ||
68 | * allocation failure or a problem reading user memory, return an | ||
69 | * error code; its up to the caller to dispose of any partial list. | ||
70 | */ | ||
71 | static int gather_array(struct list_head *pagelist, | ||
72 | unsigned nelem, size_t size, | ||
73 | void __user *data) | ||
74 | { | ||
75 | unsigned pageidx; | ||
76 | void *pagedata; | ||
77 | int ret; | ||
78 | |||
79 | if (size > PAGE_SIZE) | ||
80 | return 0; | ||
81 | |||
82 | pageidx = PAGE_SIZE; | ||
83 | pagedata = NULL; /* quiet, gcc */ | ||
84 | while (nelem--) { | ||
85 | if (pageidx > PAGE_SIZE-size) { | ||
86 | struct page *page = alloc_page(GFP_KERNEL); | ||
87 | |||
88 | ret = -ENOMEM; | ||
89 | if (page == NULL) | ||
90 | goto fail; | ||
91 | |||
92 | pagedata = page_address(page); | ||
93 | |||
94 | list_add_tail(&page->lru, pagelist); | ||
95 | pageidx = 0; | ||
96 | } | ||
97 | |||
98 | ret = -EFAULT; | ||
99 | if (copy_from_user(pagedata + pageidx, data, size)) | ||
100 | goto fail; | ||
101 | |||
102 | data += size; | ||
103 | pageidx += size; | ||
104 | } | ||
105 | |||
106 | ret = 0; | ||
107 | |||
108 | fail: | ||
109 | return ret; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * Call function "fn" on each element of the array fragmented | ||
114 | * over a list of pages. | ||
115 | */ | ||
116 | static int traverse_pages(unsigned nelem, size_t size, | ||
117 | struct list_head *pos, | ||
118 | int (*fn)(void *data, void *state), | ||
119 | void *state) | ||
120 | { | ||
121 | void *pagedata; | ||
122 | unsigned pageidx; | ||
123 | int ret = 0; | ||
124 | |||
125 | BUG_ON(size > PAGE_SIZE); | ||
126 | |||
127 | pageidx = PAGE_SIZE; | ||
128 | pagedata = NULL; /* hush, gcc */ | ||
129 | |||
130 | while (nelem--) { | ||
131 | if (pageidx > PAGE_SIZE-size) { | ||
132 | struct page *page; | ||
133 | pos = pos->next; | ||
134 | page = list_entry(pos, struct page, lru); | ||
135 | pagedata = page_address(page); | ||
136 | pageidx = 0; | ||
137 | } | ||
138 | |||
139 | ret = (*fn)(pagedata + pageidx, state); | ||
140 | if (ret) | ||
141 | break; | ||
142 | pageidx += size; | ||
143 | } | ||
144 | |||
145 | return ret; | ||
146 | } | ||
147 | |||
148 | struct mmap_mfn_state { | ||
149 | unsigned long va; | ||
150 | struct vm_area_struct *vma; | ||
151 | domid_t domain; | ||
152 | }; | ||
153 | |||
154 | static int mmap_mfn_range(void *data, void *state) | ||
155 | { | ||
156 | struct privcmd_mmap_entry *msg = data; | ||
157 | struct mmap_mfn_state *st = state; | ||
158 | struct vm_area_struct *vma = st->vma; | ||
159 | int rc; | ||
160 | |||
161 | /* Do not allow range to wrap the address space. */ | ||
162 | if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || | ||
163 | ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) | ||
164 | return -EINVAL; | ||
165 | |||
166 | /* Range chunks must be contiguous in va space. */ | ||
167 | if ((msg->va != st->va) || | ||
168 | ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end)) | ||
169 | return -EINVAL; | ||
170 | |||
171 | rc = xen_remap_domain_mfn_range(vma, | ||
172 | msg->va & PAGE_MASK, | ||
173 | msg->mfn, msg->npages, | ||
174 | vma->vm_page_prot, | ||
175 | st->domain); | ||
176 | if (rc < 0) | ||
177 | return rc; | ||
178 | |||
179 | st->va += msg->npages << PAGE_SHIFT; | ||
180 | |||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | static long privcmd_ioctl_mmap(void __user *udata) | ||
185 | { | ||
186 | struct privcmd_mmap mmapcmd; | ||
187 | struct mm_struct *mm = current->mm; | ||
188 | struct vm_area_struct *vma; | ||
189 | int rc; | ||
190 | LIST_HEAD(pagelist); | ||
191 | struct mmap_mfn_state state; | ||
192 | |||
193 | if (!xen_initial_domain()) | ||
194 | return -EPERM; | ||
195 | |||
196 | if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) | ||
197 | return -EFAULT; | ||
198 | |||
199 | rc = gather_array(&pagelist, | ||
200 | mmapcmd.num, sizeof(struct privcmd_mmap_entry), | ||
201 | mmapcmd.entry); | ||
202 | |||
203 | if (rc || list_empty(&pagelist)) | ||
204 | goto out; | ||
205 | |||
206 | down_write(&mm->mmap_sem); | ||
207 | |||
208 | { | ||
209 | struct page *page = list_first_entry(&pagelist, | ||
210 | struct page, lru); | ||
211 | struct privcmd_mmap_entry *msg = page_address(page); | ||
212 | |||
213 | vma = find_vma(mm, msg->va); | ||
214 | rc = -EINVAL; | ||
215 | |||
216 | if (!vma || (msg->va != vma->vm_start) || | ||
217 | !privcmd_enforce_singleshot_mapping(vma)) | ||
218 | goto out_up; | ||
219 | } | ||
220 | |||
221 | state.va = vma->vm_start; | ||
222 | state.vma = vma; | ||
223 | state.domain = mmapcmd.dom; | ||
224 | |||
225 | rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), | ||
226 | &pagelist, | ||
227 | mmap_mfn_range, &state); | ||
228 | |||
229 | |||
230 | out_up: | ||
231 | up_write(&mm->mmap_sem); | ||
232 | |||
233 | out: | ||
234 | free_page_list(&pagelist); | ||
235 | |||
236 | return rc; | ||
237 | } | ||
238 | |||
239 | struct mmap_batch_state { | ||
240 | domid_t domain; | ||
241 | unsigned long va; | ||
242 | struct vm_area_struct *vma; | ||
243 | int err; | ||
244 | |||
245 | xen_pfn_t __user *user; | ||
246 | }; | ||
247 | |||
248 | static int mmap_batch_fn(void *data, void *state) | ||
249 | { | ||
250 | xen_pfn_t *mfnp = data; | ||
251 | struct mmap_batch_state *st = state; | ||
252 | |||
253 | if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, | ||
254 | st->vma->vm_page_prot, st->domain) < 0) { | ||
255 | *mfnp |= 0xf0000000U; | ||
256 | st->err++; | ||
257 | } | ||
258 | st->va += PAGE_SIZE; | ||
259 | |||
260 | return 0; | ||
261 | } | ||
262 | |||
263 | static int mmap_return_errors(void *data, void *state) | ||
264 | { | ||
265 | xen_pfn_t *mfnp = data; | ||
266 | struct mmap_batch_state *st = state; | ||
267 | |||
268 | return put_user(*mfnp, st->user++); | ||
269 | } | ||
270 | |||
271 | static struct vm_operations_struct privcmd_vm_ops; | ||
272 | |||
273 | static long privcmd_ioctl_mmap_batch(void __user *udata) | ||
274 | { | ||
275 | int ret; | ||
276 | struct privcmd_mmapbatch m; | ||
277 | struct mm_struct *mm = current->mm; | ||
278 | struct vm_area_struct *vma; | ||
279 | unsigned long nr_pages; | ||
280 | LIST_HEAD(pagelist); | ||
281 | struct mmap_batch_state state; | ||
282 | |||
283 | if (!xen_initial_domain()) | ||
284 | return -EPERM; | ||
285 | |||
286 | if (copy_from_user(&m, udata, sizeof(m))) | ||
287 | return -EFAULT; | ||
288 | |||
289 | nr_pages = m.num; | ||
290 | if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) | ||
291 | return -EINVAL; | ||
292 | |||
293 | ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), | ||
294 | m.arr); | ||
295 | |||
296 | if (ret || list_empty(&pagelist)) | ||
297 | goto out; | ||
298 | |||
299 | down_write(&mm->mmap_sem); | ||
300 | |||
301 | vma = find_vma(mm, m.addr); | ||
302 | ret = -EINVAL; | ||
303 | if (!vma || | ||
304 | vma->vm_ops != &privcmd_vm_ops || | ||
305 | (m.addr != vma->vm_start) || | ||
306 | ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || | ||
307 | !privcmd_enforce_singleshot_mapping(vma)) { | ||
308 | up_write(&mm->mmap_sem); | ||
309 | goto out; | ||
310 | } | ||
311 | |||
312 | state.domain = m.dom; | ||
313 | state.vma = vma; | ||
314 | state.va = m.addr; | ||
315 | state.err = 0; | ||
316 | |||
317 | ret = traverse_pages(m.num, sizeof(xen_pfn_t), | ||
318 | &pagelist, mmap_batch_fn, &state); | ||
319 | |||
320 | up_write(&mm->mmap_sem); | ||
321 | |||
322 | if (state.err > 0) { | ||
323 | state.user = m.arr; | ||
324 | ret = traverse_pages(m.num, sizeof(xen_pfn_t), | ||
325 | &pagelist, | ||
326 | mmap_return_errors, &state); | ||
327 | } | ||
328 | |||
329 | out: | ||
330 | free_page_list(&pagelist); | ||
331 | |||
332 | return ret; | ||
333 | } | ||
334 | |||
335 | static long privcmd_ioctl(struct file *file, | ||
336 | unsigned int cmd, unsigned long data) | ||
337 | { | ||
338 | int ret = -ENOSYS; | ||
339 | void __user *udata = (void __user *) data; | ||
340 | |||
341 | switch (cmd) { | ||
342 | case IOCTL_PRIVCMD_HYPERCALL: | ||
343 | ret = privcmd_ioctl_hypercall(udata); | ||
344 | break; | ||
345 | |||
346 | case IOCTL_PRIVCMD_MMAP: | ||
347 | ret = privcmd_ioctl_mmap(udata); | ||
348 | break; | ||
349 | |||
350 | case IOCTL_PRIVCMD_MMAPBATCH: | ||
351 | ret = privcmd_ioctl_mmap_batch(udata); | ||
352 | break; | ||
353 | |||
354 | default: | ||
355 | ret = -EINVAL; | ||
356 | break; | ||
357 | } | ||
358 | |||
359 | return ret; | ||
360 | } | ||
361 | |||
362 | #ifndef HAVE_ARCH_PRIVCMD_MMAP | ||
363 | static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
364 | { | ||
365 | printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", | ||
366 | vma, vma->vm_start, vma->vm_end, | ||
367 | vmf->pgoff, vmf->virtual_address); | ||
368 | |||
369 | return VM_FAULT_SIGBUS; | ||
370 | } | ||
371 | |||
372 | static struct vm_operations_struct privcmd_vm_ops = { | ||
373 | .fault = privcmd_fault | ||
374 | }; | ||
375 | |||
376 | static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) | ||
377 | { | ||
378 | /* Unsupported for auto-translate guests. */ | ||
379 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
380 | return -ENOSYS; | ||
381 | |||
382 | /* DONTCOPY is essential for Xen because copy_page_range doesn't know | ||
383 | * how to recreate these mappings */ | ||
384 | vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; | ||
385 | vma->vm_ops = &privcmd_vm_ops; | ||
386 | vma->vm_private_data = NULL; | ||
387 | |||
388 | return 0; | ||
389 | } | ||
390 | |||
391 | static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) | ||
392 | { | ||
393 | return (xchg(&vma->vm_private_data, (void *)1) == NULL); | ||
394 | } | ||
395 | #endif | ||
396 | |||
397 | const struct file_operations privcmd_file_ops = { | ||
398 | .unlocked_ioctl = privcmd_ioctl, | ||
399 | .mmap = privcmd_mmap, | ||
400 | }; | ||
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 78bfab0700ba..1aa389719846 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c | |||
@@ -22,6 +22,46 @@ | |||
22 | MODULE_DESCRIPTION("Xen filesystem"); | 22 | MODULE_DESCRIPTION("Xen filesystem"); |
23 | MODULE_LICENSE("GPL"); | 23 | MODULE_LICENSE("GPL"); |
24 | 24 | ||
25 | static struct inode *xenfs_make_inode(struct super_block *sb, int mode) | ||
26 | { | ||
27 | struct inode *ret = new_inode(sb); | ||
28 | |||
29 | if (ret) { | ||
30 | ret->i_mode = mode; | ||
31 | ret->i_uid = ret->i_gid = 0; | ||
32 | ret->i_blocks = 0; | ||
33 | ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; | ||
34 | } | ||
35 | return ret; | ||
36 | } | ||
37 | |||
38 | static struct dentry *xenfs_create_file(struct super_block *sb, | ||
39 | struct dentry *parent, | ||
40 | const char *name, | ||
41 | const struct file_operations *fops, | ||
42 | void *data, | ||
43 | int mode) | ||
44 | { | ||
45 | struct dentry *dentry; | ||
46 | struct inode *inode; | ||
47 | |||
48 | dentry = d_alloc_name(parent, name); | ||
49 | if (!dentry) | ||
50 | return NULL; | ||
51 | |||
52 | inode = xenfs_make_inode(sb, S_IFREG | mode); | ||
53 | if (!inode) { | ||
54 | dput(dentry); | ||
55 | return NULL; | ||
56 | } | ||
57 | |||
58 | inode->i_fop = fops; | ||
59 | inode->i_private = data; | ||
60 | |||
61 | d_add(dentry, inode); | ||
62 | return dentry; | ||
63 | } | ||
64 | |||
25 | static ssize_t capabilities_read(struct file *file, char __user *buf, | 65 | static ssize_t capabilities_read(struct file *file, char __user *buf, |
26 | size_t size, loff_t *off) | 66 | size_t size, loff_t *off) |
27 | { | 67 | { |
@@ -35,6 +75,7 @@ static ssize_t capabilities_read(struct file *file, char __user *buf, | |||
35 | 75 | ||
36 | static const struct file_operations capabilities_file_ops = { | 76 | static const struct file_operations capabilities_file_ops = { |
37 | .read = capabilities_read, | 77 | .read = capabilities_read, |
78 | .llseek = default_llseek, | ||
38 | }; | 79 | }; |
39 | 80 | ||
40 | static int xenfs_fill_super(struct super_block *sb, void *data, int silent) | 81 | static int xenfs_fill_super(struct super_block *sb, void *data, int silent) |
@@ -43,23 +84,36 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent) | |||
43 | [1] = {}, | 84 | [1] = {}, |
44 | { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR }, | 85 | { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR }, |
45 | { "capabilities", &capabilities_file_ops, S_IRUGO }, | 86 | { "capabilities", &capabilities_file_ops, S_IRUGO }, |
87 | { "privcmd", &privcmd_file_ops, S_IRUSR|S_IWUSR }, | ||
46 | {""}, | 88 | {""}, |
47 | }; | 89 | }; |
90 | int rc; | ||
91 | |||
92 | rc = simple_fill_super(sb, XENFS_SUPER_MAGIC, xenfs_files); | ||
93 | if (rc < 0) | ||
94 | return rc; | ||
95 | |||
96 | if (xen_initial_domain()) { | ||
97 | xenfs_create_file(sb, sb->s_root, "xsd_kva", | ||
98 | &xsd_kva_file_ops, NULL, S_IRUSR|S_IWUSR); | ||
99 | xenfs_create_file(sb, sb->s_root, "xsd_port", | ||
100 | &xsd_port_file_ops, NULL, S_IRUSR|S_IWUSR); | ||
101 | } | ||
48 | 102 | ||
49 | return simple_fill_super(sb, XENFS_SUPER_MAGIC, xenfs_files); | 103 | return rc; |
50 | } | 104 | } |
51 | 105 | ||
52 | static int xenfs_get_sb(struct file_system_type *fs_type, | 106 | static struct dentry *xenfs_mount(struct file_system_type *fs_type, |
53 | int flags, const char *dev_name, | 107 | int flags, const char *dev_name, |
54 | void *data, struct vfsmount *mnt) | 108 | void *data) |
55 | { | 109 | { |
56 | return get_sb_single(fs_type, flags, data, xenfs_fill_super, mnt); | 110 | return mount_single(fs_type, flags, data, xenfs_fill_super); |
57 | } | 111 | } |
58 | 112 | ||
59 | static struct file_system_type xenfs_type = { | 113 | static struct file_system_type xenfs_type = { |
60 | .owner = THIS_MODULE, | 114 | .owner = THIS_MODULE, |
61 | .name = "xenfs", | 115 | .name = "xenfs", |
62 | .get_sb = xenfs_get_sb, | 116 | .mount = xenfs_mount, |
63 | .kill_sb = kill_litter_super, | 117 | .kill_sb = kill_litter_super, |
64 | }; | 118 | }; |
65 | 119 | ||
diff --git a/drivers/xen/xenfs/xenbus.c b/drivers/xen/xenfs/xenbus.c index 3b39c3752e21..bbd000f88af7 100644 --- a/drivers/xen/xenfs/xenbus.c +++ b/drivers/xen/xenfs/xenbus.c | |||
@@ -122,6 +122,7 @@ static ssize_t xenbus_file_read(struct file *filp, | |||
122 | int ret; | 122 | int ret; |
123 | 123 | ||
124 | mutex_lock(&u->reply_mutex); | 124 | mutex_lock(&u->reply_mutex); |
125 | again: | ||
125 | while (list_empty(&u->read_buffers)) { | 126 | while (list_empty(&u->read_buffers)) { |
126 | mutex_unlock(&u->reply_mutex); | 127 | mutex_unlock(&u->reply_mutex); |
127 | if (filp->f_flags & O_NONBLOCK) | 128 | if (filp->f_flags & O_NONBLOCK) |
@@ -144,7 +145,7 @@ static ssize_t xenbus_file_read(struct file *filp, | |||
144 | i += sz - ret; | 145 | i += sz - ret; |
145 | rb->cons += sz - ret; | 146 | rb->cons += sz - ret; |
146 | 147 | ||
147 | if (ret != sz) { | 148 | if (ret != 0) { |
148 | if (i == 0) | 149 | if (i == 0) |
149 | i = -EFAULT; | 150 | i = -EFAULT; |
150 | goto out; | 151 | goto out; |
@@ -160,6 +161,8 @@ static ssize_t xenbus_file_read(struct file *filp, | |||
160 | struct read_buffer, list); | 161 | struct read_buffer, list); |
161 | } | 162 | } |
162 | } | 163 | } |
164 | if (i == 0) | ||
165 | goto again; | ||
163 | 166 | ||
164 | out: | 167 | out: |
165 | mutex_unlock(&u->reply_mutex); | 168 | mutex_unlock(&u->reply_mutex); |
@@ -407,6 +410,7 @@ static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) | |||
407 | 410 | ||
408 | mutex_lock(&u->reply_mutex); | 411 | mutex_lock(&u->reply_mutex); |
409 | rc = queue_reply(&u->read_buffers, &reply, sizeof(reply)); | 412 | rc = queue_reply(&u->read_buffers, &reply, sizeof(reply)); |
413 | wake_up(&u->read_waitq); | ||
410 | mutex_unlock(&u->reply_mutex); | 414 | mutex_unlock(&u->reply_mutex); |
411 | } | 415 | } |
412 | 416 | ||
@@ -455,7 +459,7 @@ static ssize_t xenbus_file_write(struct file *filp, | |||
455 | 459 | ||
456 | ret = copy_from_user(u->u.buffer + u->len, ubuf, len); | 460 | ret = copy_from_user(u->u.buffer + u->len, ubuf, len); |
457 | 461 | ||
458 | if (ret == len) { | 462 | if (ret != 0) { |
459 | rc = -EFAULT; | 463 | rc = -EFAULT; |
460 | goto out; | 464 | goto out; |
461 | } | 465 | } |
@@ -488,21 +492,6 @@ static ssize_t xenbus_file_write(struct file *filp, | |||
488 | msg_type = u->u.msg.type; | 492 | msg_type = u->u.msg.type; |
489 | 493 | ||
490 | switch (msg_type) { | 494 | switch (msg_type) { |
491 | case XS_TRANSACTION_START: | ||
492 | case XS_TRANSACTION_END: | ||
493 | case XS_DIRECTORY: | ||
494 | case XS_READ: | ||
495 | case XS_GET_PERMS: | ||
496 | case XS_RELEASE: | ||
497 | case XS_GET_DOMAIN_PATH: | ||
498 | case XS_WRITE: | ||
499 | case XS_MKDIR: | ||
500 | case XS_RM: | ||
501 | case XS_SET_PERMS: | ||
502 | /* Send out a transaction */ | ||
503 | ret = xenbus_write_transaction(msg_type, u); | ||
504 | break; | ||
505 | |||
506 | case XS_WATCH: | 495 | case XS_WATCH: |
507 | case XS_UNWATCH: | 496 | case XS_UNWATCH: |
508 | /* (Un)Ask for some path to be watched for changes */ | 497 | /* (Un)Ask for some path to be watched for changes */ |
@@ -510,7 +499,8 @@ static ssize_t xenbus_file_write(struct file *filp, | |||
510 | break; | 499 | break; |
511 | 500 | ||
512 | default: | 501 | default: |
513 | ret = -EINVAL; | 502 | /* Send out a transaction */ |
503 | ret = xenbus_write_transaction(msg_type, u); | ||
514 | break; | 504 | break; |
515 | } | 505 | } |
516 | if (ret != 0) | 506 | if (ret != 0) |
@@ -555,6 +545,7 @@ static int xenbus_file_release(struct inode *inode, struct file *filp) | |||
555 | struct xenbus_file_priv *u = filp->private_data; | 545 | struct xenbus_file_priv *u = filp->private_data; |
556 | struct xenbus_transaction_holder *trans, *tmp; | 546 | struct xenbus_transaction_holder *trans, *tmp; |
557 | struct watch_adapter *watch, *tmp_watch; | 547 | struct watch_adapter *watch, *tmp_watch; |
548 | struct read_buffer *rb, *tmp_rb; | ||
558 | 549 | ||
559 | /* | 550 | /* |
560 | * No need for locking here because there are no other users, | 551 | * No need for locking here because there are no other users, |
@@ -573,6 +564,10 @@ static int xenbus_file_release(struct inode *inode, struct file *filp) | |||
573 | free_watch_adapter(watch); | 564 | free_watch_adapter(watch); |
574 | } | 565 | } |
575 | 566 | ||
567 | list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) { | ||
568 | list_del(&rb->list); | ||
569 | kfree(rb); | ||
570 | } | ||
576 | kfree(u); | 571 | kfree(u); |
577 | 572 | ||
578 | return 0; | 573 | return 0; |
@@ -594,4 +589,5 @@ const struct file_operations xenbus_file_ops = { | |||
594 | .open = xenbus_file_open, | 589 | .open = xenbus_file_open, |
595 | .release = xenbus_file_release, | 590 | .release = xenbus_file_release, |
596 | .poll = xenbus_file_poll, | 591 | .poll = xenbus_file_poll, |
592 | .llseek = no_llseek, | ||
597 | }; | 593 | }; |
diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h index 51f08b2d0bf1..b68aa6200003 100644 --- a/drivers/xen/xenfs/xenfs.h +++ b/drivers/xen/xenfs/xenfs.h | |||
@@ -2,5 +2,8 @@ | |||
2 | #define _XENFS_XENBUS_H | 2 | #define _XENFS_XENBUS_H |
3 | 3 | ||
4 | extern const struct file_operations xenbus_file_ops; | 4 | extern const struct file_operations xenbus_file_ops; |
5 | extern const struct file_operations privcmd_file_ops; | ||
6 | extern const struct file_operations xsd_kva_file_ops; | ||
7 | extern const struct file_operations xsd_port_file_ops; | ||
5 | 8 | ||
6 | #endif /* _XENFS_XENBUS_H */ | 9 | #endif /* _XENFS_XENBUS_H */ |
diff --git a/drivers/xen/xenfs/xenstored.c b/drivers/xen/xenfs/xenstored.c new file mode 100644 index 000000000000..fef20dbc6a5c --- /dev/null +++ b/drivers/xen/xenfs/xenstored.c | |||
@@ -0,0 +1,68 @@ | |||
1 | #include <linux/slab.h> | ||
2 | #include <linux/types.h> | ||
3 | #include <linux/mm.h> | ||
4 | #include <linux/fs.h> | ||
5 | |||
6 | #include <xen/page.h> | ||
7 | |||
8 | #include "xenfs.h" | ||
9 | #include "../xenbus/xenbus_comms.h" | ||
10 | |||
11 | static ssize_t xsd_read(struct file *file, char __user *buf, | ||
12 | size_t size, loff_t *off) | ||
13 | { | ||
14 | const char *str = (const char *)file->private_data; | ||
15 | return simple_read_from_buffer(buf, size, off, str, strlen(str)); | ||
16 | } | ||
17 | |||
18 | static int xsd_release(struct inode *inode, struct file *file) | ||
19 | { | ||
20 | kfree(file->private_data); | ||
21 | return 0; | ||
22 | } | ||
23 | |||
24 | static int xsd_kva_open(struct inode *inode, struct file *file) | ||
25 | { | ||
26 | file->private_data = (void *)kasprintf(GFP_KERNEL, "0x%p", | ||
27 | xen_store_interface); | ||
28 | if (!file->private_data) | ||
29 | return -ENOMEM; | ||
30 | return 0; | ||
31 | } | ||
32 | |||
33 | static int xsd_kva_mmap(struct file *file, struct vm_area_struct *vma) | ||
34 | { | ||
35 | size_t size = vma->vm_end - vma->vm_start; | ||
36 | |||
37 | if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0)) | ||
38 | return -EINVAL; | ||
39 | |||
40 | if (remap_pfn_range(vma, vma->vm_start, | ||
41 | virt_to_pfn(xen_store_interface), | ||
42 | size, vma->vm_page_prot)) | ||
43 | return -EAGAIN; | ||
44 | |||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | const struct file_operations xsd_kva_file_ops = { | ||
49 | .open = xsd_kva_open, | ||
50 | .mmap = xsd_kva_mmap, | ||
51 | .read = xsd_read, | ||
52 | .release = xsd_release, | ||
53 | }; | ||
54 | |||
55 | static int xsd_port_open(struct inode *inode, struct file *file) | ||
56 | { | ||
57 | file->private_data = (void *)kasprintf(GFP_KERNEL, "%d", | ||
58 | xen_store_evtchn); | ||
59 | if (!file->private_data) | ||
60 | return -ENOMEM; | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | const struct file_operations xsd_port_file_ops = { | ||
65 | .open = xsd_port_open, | ||
66 | .read = xsd_read, | ||
67 | .release = xsd_release, | ||
68 | }; | ||