diff options
Diffstat (limited to 'drivers')
26 files changed, 6366 insertions, 34 deletions
diff --git a/drivers/Makefile b/drivers/Makefile index 503d82569449..6d9d7fab77f5 100644 --- a/drivers/Makefile +++ b/drivers/Makefile | |||
@@ -15,6 +15,8 @@ obj-$(CONFIG_ACPI) += acpi/ | |||
15 | obj-$(CONFIG_PNP) += pnp/ | 15 | obj-$(CONFIG_PNP) += pnp/ |
16 | obj-$(CONFIG_ARM_AMBA) += amba/ | 16 | obj-$(CONFIG_ARM_AMBA) += amba/ |
17 | 17 | ||
18 | obj-$(CONFIG_XEN) += xen/ | ||
19 | |||
18 | # char/ comes before serial/ etc so that the VT console is the boot-time | 20 | # char/ comes before serial/ etc so that the VT console is the boot-time |
19 | # default. | 21 | # default. |
20 | obj-y += char/ | 22 | obj-y += char/ |
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 88a6fc7fd271..58f1338981bc 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/jiffies.h> | 40 | #include <linux/jiffies.h> |
41 | #include <linux/kmod.h> | 41 | #include <linux/kmod.h> |
42 | #include <linux/seq_file.h> | 42 | #include <linux/seq_file.h> |
43 | #include <linux/reboot.h> | ||
43 | #include <asm/uaccess.h> | 44 | #include <asm/uaccess.h> |
44 | 45 | ||
45 | #include <acpi/acpi_bus.h> | 46 | #include <acpi/acpi_bus.h> |
@@ -59,7 +60,6 @@ | |||
59 | #define ACPI_THERMAL_NOTIFY_CRITICAL 0xF0 | 60 | #define ACPI_THERMAL_NOTIFY_CRITICAL 0xF0 |
60 | #define ACPI_THERMAL_NOTIFY_HOT 0xF1 | 61 | #define ACPI_THERMAL_NOTIFY_HOT 0xF1 |
61 | #define ACPI_THERMAL_MODE_ACTIVE 0x00 | 62 | #define ACPI_THERMAL_MODE_ACTIVE 0x00 |
62 | #define ACPI_THERMAL_PATH_POWEROFF "/sbin/poweroff" | ||
63 | 63 | ||
64 | #define ACPI_THERMAL_MAX_ACTIVE 10 | 64 | #define ACPI_THERMAL_MAX_ACTIVE 10 |
65 | #define ACPI_THERMAL_MAX_LIMIT_STR_LEN 65 | 65 | #define ACPI_THERMAL_MAX_LIMIT_STR_LEN 65 |
@@ -419,26 +419,6 @@ static int acpi_thermal_get_devices(struct acpi_thermal *tz) | |||
419 | return 0; | 419 | return 0; |
420 | } | 420 | } |
421 | 421 | ||
422 | static int acpi_thermal_call_usermode(char *path) | ||
423 | { | ||
424 | char *argv[2] = { NULL, NULL }; | ||
425 | char *envp[3] = { NULL, NULL, NULL }; | ||
426 | |||
427 | |||
428 | if (!path) | ||
429 | return -EINVAL; | ||
430 | |||
431 | argv[0] = path; | ||
432 | |||
433 | /* minimal command environment */ | ||
434 | envp[0] = "HOME=/"; | ||
435 | envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; | ||
436 | |||
437 | call_usermodehelper(argv[0], argv, envp, 0); | ||
438 | |||
439 | return 0; | ||
440 | } | ||
441 | |||
442 | static int acpi_thermal_critical(struct acpi_thermal *tz) | 422 | static int acpi_thermal_critical(struct acpi_thermal *tz) |
443 | { | 423 | { |
444 | if (!tz || !tz->trips.critical.flags.valid) | 424 | if (!tz || !tz->trips.critical.flags.valid) |
@@ -456,7 +436,7 @@ static int acpi_thermal_critical(struct acpi_thermal *tz) | |||
456 | acpi_bus_generate_event(tz->device, ACPI_THERMAL_NOTIFY_CRITICAL, | 436 | acpi_bus_generate_event(tz->device, ACPI_THERMAL_NOTIFY_CRITICAL, |
457 | tz->trips.critical.flags.enabled); | 437 | tz->trips.critical.flags.enabled); |
458 | 438 | ||
459 | acpi_thermal_call_usermode(ACPI_THERMAL_PATH_POWEROFF); | 439 | orderly_poweroff(true); |
460 | 440 | ||
461 | return 0; | 441 | return 0; |
462 | } | 442 | } |
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 8f65b88cf711..a4a311992408 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig | |||
@@ -427,4 +427,13 @@ config XILINX_SYSACE | |||
427 | help | 427 | help |
428 | Include support for the Xilinx SystemACE CompactFlash interface | 428 | Include support for the Xilinx SystemACE CompactFlash interface |
429 | 429 | ||
430 | config XEN_BLKDEV_FRONTEND | ||
431 | tristate "Xen virtual block device support" | ||
432 | depends on XEN | ||
433 | default y | ||
434 | help | ||
435 | This driver implements the front-end of the Xen virtual | ||
436 | block device driver. It communicates with a back-end driver | ||
437 | in another domain which drives the actual block device. | ||
438 | |||
430 | endif # BLK_DEV | 439 | endif # BLK_DEV |
diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 9ee08ab4ffa8..3e31532df0ed 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile | |||
@@ -29,3 +29,4 @@ obj-$(CONFIG_VIODASD) += viodasd.o | |||
29 | obj-$(CONFIG_BLK_DEV_SX8) += sx8.o | 29 | obj-$(CONFIG_BLK_DEV_SX8) += sx8.o |
30 | obj-$(CONFIG_BLK_DEV_UB) += ub.o | 30 | obj-$(CONFIG_BLK_DEV_UB) += ub.o |
31 | 31 | ||
32 | obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o | ||
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c new file mode 100644 index 000000000000..6746c29181f8 --- /dev/null +++ b/drivers/block/xen-blkfront.c | |||
@@ -0,0 +1,988 @@ | |||
1 | /* | ||
2 | * blkfront.c | ||
3 | * | ||
4 | * XenLinux virtual block device driver. | ||
5 | * | ||
6 | * Copyright (c) 2003-2004, Keir Fraser & Steve Hand | ||
7 | * Modifications by Mark A. Williamson are (c) Intel Research Cambridge | ||
8 | * Copyright (c) 2004, Christian Limpach | ||
9 | * Copyright (c) 2004, Andrew Warfield | ||
10 | * Copyright (c) 2005, Christopher Clark | ||
11 | * Copyright (c) 2005, XenSource Ltd | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or | ||
14 | * modify it under the terms of the GNU General Public License version 2 | ||
15 | * as published by the Free Software Foundation; or, when distributed | ||
16 | * separately from the Linux kernel or incorporated into other | ||
17 | * software packages, subject to the following license: | ||
18 | * | ||
19 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
20 | * of this source file (the "Software"), to deal in the Software without | ||
21 | * restriction, including without limitation the rights to use, copy, modify, | ||
22 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
23 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
24 | * the following conditions: | ||
25 | * | ||
26 | * The above copyright notice and this permission notice shall be included in | ||
27 | * all copies or substantial portions of the Software. | ||
28 | * | ||
29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
30 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
31 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
32 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
33 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
34 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
35 | * IN THE SOFTWARE. | ||
36 | */ | ||
37 | |||
38 | #include <linux/interrupt.h> | ||
39 | #include <linux/blkdev.h> | ||
40 | #include <linux/module.h> | ||
41 | |||
42 | #include <xen/xenbus.h> | ||
43 | #include <xen/grant_table.h> | ||
44 | #include <xen/events.h> | ||
45 | #include <xen/page.h> | ||
46 | |||
47 | #include <xen/interface/grant_table.h> | ||
48 | #include <xen/interface/io/blkif.h> | ||
49 | |||
50 | #include <asm/xen/hypervisor.h> | ||
51 | |||
52 | enum blkif_state { | ||
53 | BLKIF_STATE_DISCONNECTED, | ||
54 | BLKIF_STATE_CONNECTED, | ||
55 | BLKIF_STATE_SUSPENDED, | ||
56 | }; | ||
57 | |||
58 | struct blk_shadow { | ||
59 | struct blkif_request req; | ||
60 | unsigned long request; | ||
61 | unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
62 | }; | ||
63 | |||
64 | static struct block_device_operations xlvbd_block_fops; | ||
65 | |||
66 | #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) | ||
67 | |||
68 | /* | ||
69 | * We have one of these per vbd, whether ide, scsi or 'other'. They | ||
70 | * hang in private_data off the gendisk structure. We may end up | ||
71 | * putting all kinds of interesting stuff here :-) | ||
72 | */ | ||
73 | struct blkfront_info | ||
74 | { | ||
75 | struct xenbus_device *xbdev; | ||
76 | dev_t dev; | ||
77 | struct gendisk *gd; | ||
78 | int vdevice; | ||
79 | blkif_vdev_t handle; | ||
80 | enum blkif_state connected; | ||
81 | int ring_ref; | ||
82 | struct blkif_front_ring ring; | ||
83 | unsigned int evtchn, irq; | ||
84 | struct request_queue *rq; | ||
85 | struct work_struct work; | ||
86 | struct gnttab_free_callback callback; | ||
87 | struct blk_shadow shadow[BLK_RING_SIZE]; | ||
88 | unsigned long shadow_free; | ||
89 | int feature_barrier; | ||
90 | |||
91 | /** | ||
92 | * The number of people holding this device open. We won't allow a | ||
93 | * hot-unplug unless this is 0. | ||
94 | */ | ||
95 | int users; | ||
96 | }; | ||
97 | |||
98 | static DEFINE_SPINLOCK(blkif_io_lock); | ||
99 | |||
100 | #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ | ||
101 | (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) | ||
102 | #define GRANT_INVALID_REF 0 | ||
103 | |||
104 | #define PARTS_PER_DISK 16 | ||
105 | |||
106 | #define BLKIF_MAJOR(dev) ((dev)>>8) | ||
107 | #define BLKIF_MINOR(dev) ((dev) & 0xff) | ||
108 | |||
109 | #define DEV_NAME "xvd" /* name in /dev */ | ||
110 | |||
111 | /* Information about our VBDs. */ | ||
112 | #define MAX_VBDS 64 | ||
113 | static LIST_HEAD(vbds_list); | ||
114 | |||
115 | static int get_id_from_freelist(struct blkfront_info *info) | ||
116 | { | ||
117 | unsigned long free = info->shadow_free; | ||
118 | BUG_ON(free > BLK_RING_SIZE); | ||
119 | info->shadow_free = info->shadow[free].req.id; | ||
120 | info->shadow[free].req.id = 0x0fffffee; /* debug */ | ||
121 | return free; | ||
122 | } | ||
123 | |||
124 | static void add_id_to_freelist(struct blkfront_info *info, | ||
125 | unsigned long id) | ||
126 | { | ||
127 | info->shadow[id].req.id = info->shadow_free; | ||
128 | info->shadow[id].request = 0; | ||
129 | info->shadow_free = id; | ||
130 | } | ||
131 | |||
132 | static void blkif_restart_queue_callback(void *arg) | ||
133 | { | ||
134 | struct blkfront_info *info = (struct blkfront_info *)arg; | ||
135 | schedule_work(&info->work); | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * blkif_queue_request | ||
140 | * | ||
141 | * request block io | ||
142 | * | ||
143 | * id: for guest use only. | ||
144 | * operation: BLKIF_OP_{READ,WRITE,PROBE} | ||
145 | * buffer: buffer to read/write into. this should be a | ||
146 | * virtual address in the guest os. | ||
147 | */ | ||
148 | static int blkif_queue_request(struct request *req) | ||
149 | { | ||
150 | struct blkfront_info *info = req->rq_disk->private_data; | ||
151 | unsigned long buffer_mfn; | ||
152 | struct blkif_request *ring_req; | ||
153 | struct bio *bio; | ||
154 | struct bio_vec *bvec; | ||
155 | int idx; | ||
156 | unsigned long id; | ||
157 | unsigned int fsect, lsect; | ||
158 | int ref; | ||
159 | grant_ref_t gref_head; | ||
160 | |||
161 | if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) | ||
162 | return 1; | ||
163 | |||
164 | if (gnttab_alloc_grant_references( | ||
165 | BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { | ||
166 | gnttab_request_free_callback( | ||
167 | &info->callback, | ||
168 | blkif_restart_queue_callback, | ||
169 | info, | ||
170 | BLKIF_MAX_SEGMENTS_PER_REQUEST); | ||
171 | return 1; | ||
172 | } | ||
173 | |||
174 | /* Fill out a communications ring structure. */ | ||
175 | ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); | ||
176 | id = get_id_from_freelist(info); | ||
177 | info->shadow[id].request = (unsigned long)req; | ||
178 | |||
179 | ring_req->id = id; | ||
180 | ring_req->sector_number = (blkif_sector_t)req->sector; | ||
181 | ring_req->handle = info->handle; | ||
182 | |||
183 | ring_req->operation = rq_data_dir(req) ? | ||
184 | BLKIF_OP_WRITE : BLKIF_OP_READ; | ||
185 | if (blk_barrier_rq(req)) | ||
186 | ring_req->operation = BLKIF_OP_WRITE_BARRIER; | ||
187 | |||
188 | ring_req->nr_segments = 0; | ||
189 | rq_for_each_bio (bio, req) { | ||
190 | bio_for_each_segment (bvec, bio, idx) { | ||
191 | BUG_ON(ring_req->nr_segments | ||
192 | == BLKIF_MAX_SEGMENTS_PER_REQUEST); | ||
193 | buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page)); | ||
194 | fsect = bvec->bv_offset >> 9; | ||
195 | lsect = fsect + (bvec->bv_len >> 9) - 1; | ||
196 | /* install a grant reference. */ | ||
197 | ref = gnttab_claim_grant_reference(&gref_head); | ||
198 | BUG_ON(ref == -ENOSPC); | ||
199 | |||
200 | gnttab_grant_foreign_access_ref( | ||
201 | ref, | ||
202 | info->xbdev->otherend_id, | ||
203 | buffer_mfn, | ||
204 | rq_data_dir(req) ); | ||
205 | |||
206 | info->shadow[id].frame[ring_req->nr_segments] = | ||
207 | mfn_to_pfn(buffer_mfn); | ||
208 | |||
209 | ring_req->seg[ring_req->nr_segments] = | ||
210 | (struct blkif_request_segment) { | ||
211 | .gref = ref, | ||
212 | .first_sect = fsect, | ||
213 | .last_sect = lsect }; | ||
214 | |||
215 | ring_req->nr_segments++; | ||
216 | } | ||
217 | } | ||
218 | |||
219 | info->ring.req_prod_pvt++; | ||
220 | |||
221 | /* Keep a private copy so we can reissue requests when recovering. */ | ||
222 | info->shadow[id].req = *ring_req; | ||
223 | |||
224 | gnttab_free_grant_references(gref_head); | ||
225 | |||
226 | return 0; | ||
227 | } | ||
228 | |||
229 | |||
230 | static inline void flush_requests(struct blkfront_info *info) | ||
231 | { | ||
232 | int notify; | ||
233 | |||
234 | RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); | ||
235 | |||
236 | if (notify) | ||
237 | notify_remote_via_irq(info->irq); | ||
238 | } | ||
239 | |||
240 | /* | ||
241 | * do_blkif_request | ||
242 | * read a block; request is in a request queue | ||
243 | */ | ||
244 | static void do_blkif_request(request_queue_t *rq) | ||
245 | { | ||
246 | struct blkfront_info *info = NULL; | ||
247 | struct request *req; | ||
248 | int queued; | ||
249 | |||
250 | pr_debug("Entered do_blkif_request\n"); | ||
251 | |||
252 | queued = 0; | ||
253 | |||
254 | while ((req = elv_next_request(rq)) != NULL) { | ||
255 | info = req->rq_disk->private_data; | ||
256 | if (!blk_fs_request(req)) { | ||
257 | end_request(req, 0); | ||
258 | continue; | ||
259 | } | ||
260 | |||
261 | if (RING_FULL(&info->ring)) | ||
262 | goto wait; | ||
263 | |||
264 | pr_debug("do_blk_req %p: cmd %p, sec %lx, " | ||
265 | "(%u/%li) buffer:%p [%s]\n", | ||
266 | req, req->cmd, (unsigned long)req->sector, | ||
267 | req->current_nr_sectors, | ||
268 | req->nr_sectors, req->buffer, | ||
269 | rq_data_dir(req) ? "write" : "read"); | ||
270 | |||
271 | |||
272 | blkdev_dequeue_request(req); | ||
273 | if (blkif_queue_request(req)) { | ||
274 | blk_requeue_request(rq, req); | ||
275 | wait: | ||
276 | /* Avoid pointless unplugs. */ | ||
277 | blk_stop_queue(rq); | ||
278 | break; | ||
279 | } | ||
280 | |||
281 | queued++; | ||
282 | } | ||
283 | |||
284 | if (queued != 0) | ||
285 | flush_requests(info); | ||
286 | } | ||
287 | |||
288 | static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) | ||
289 | { | ||
290 | request_queue_t *rq; | ||
291 | |||
292 | rq = blk_init_queue(do_blkif_request, &blkif_io_lock); | ||
293 | if (rq == NULL) | ||
294 | return -1; | ||
295 | |||
296 | elevator_init(rq, "noop"); | ||
297 | |||
298 | /* Hard sector size and max sectors impersonate the equiv. hardware. */ | ||
299 | blk_queue_hardsect_size(rq, sector_size); | ||
300 | blk_queue_max_sectors(rq, 512); | ||
301 | |||
302 | /* Each segment in a request is up to an aligned page in size. */ | ||
303 | blk_queue_segment_boundary(rq, PAGE_SIZE - 1); | ||
304 | blk_queue_max_segment_size(rq, PAGE_SIZE); | ||
305 | |||
306 | /* Ensure a merged request will fit in a single I/O ring slot. */ | ||
307 | blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); | ||
308 | blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); | ||
309 | |||
310 | /* Make sure buffer addresses are sector-aligned. */ | ||
311 | blk_queue_dma_alignment(rq, 511); | ||
312 | |||
313 | gd->queue = rq; | ||
314 | |||
315 | return 0; | ||
316 | } | ||
317 | |||
318 | |||
319 | static int xlvbd_barrier(struct blkfront_info *info) | ||
320 | { | ||
321 | int err; | ||
322 | |||
323 | err = blk_queue_ordered(info->rq, | ||
324 | info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, | ||
325 | NULL); | ||
326 | |||
327 | if (err) | ||
328 | return err; | ||
329 | |||
330 | printk(KERN_INFO "blkfront: %s: barriers %s\n", | ||
331 | info->gd->disk_name, | ||
332 | info->feature_barrier ? "enabled" : "disabled"); | ||
333 | return 0; | ||
334 | } | ||
335 | |||
336 | |||
337 | static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, | ||
338 | int vdevice, u16 vdisk_info, u16 sector_size, | ||
339 | struct blkfront_info *info) | ||
340 | { | ||
341 | struct gendisk *gd; | ||
342 | int nr_minors = 1; | ||
343 | int err = -ENODEV; | ||
344 | |||
345 | BUG_ON(info->gd != NULL); | ||
346 | BUG_ON(info->rq != NULL); | ||
347 | |||
348 | if ((minor % PARTS_PER_DISK) == 0) | ||
349 | nr_minors = PARTS_PER_DISK; | ||
350 | |||
351 | gd = alloc_disk(nr_minors); | ||
352 | if (gd == NULL) | ||
353 | goto out; | ||
354 | |||
355 | if (nr_minors > 1) | ||
356 | sprintf(gd->disk_name, "%s%c", DEV_NAME, | ||
357 | 'a' + minor / PARTS_PER_DISK); | ||
358 | else | ||
359 | sprintf(gd->disk_name, "%s%c%d", DEV_NAME, | ||
360 | 'a' + minor / PARTS_PER_DISK, | ||
361 | minor % PARTS_PER_DISK); | ||
362 | |||
363 | gd->major = XENVBD_MAJOR; | ||
364 | gd->first_minor = minor; | ||
365 | gd->fops = &xlvbd_block_fops; | ||
366 | gd->private_data = info; | ||
367 | gd->driverfs_dev = &(info->xbdev->dev); | ||
368 | set_capacity(gd, capacity); | ||
369 | |||
370 | if (xlvbd_init_blk_queue(gd, sector_size)) { | ||
371 | del_gendisk(gd); | ||
372 | goto out; | ||
373 | } | ||
374 | |||
375 | info->rq = gd->queue; | ||
376 | info->gd = gd; | ||
377 | |||
378 | if (info->feature_barrier) | ||
379 | xlvbd_barrier(info); | ||
380 | |||
381 | if (vdisk_info & VDISK_READONLY) | ||
382 | set_disk_ro(gd, 1); | ||
383 | |||
384 | if (vdisk_info & VDISK_REMOVABLE) | ||
385 | gd->flags |= GENHD_FL_REMOVABLE; | ||
386 | |||
387 | if (vdisk_info & VDISK_CDROM) | ||
388 | gd->flags |= GENHD_FL_CD; | ||
389 | |||
390 | return 0; | ||
391 | |||
392 | out: | ||
393 | return err; | ||
394 | } | ||
395 | |||
396 | static void kick_pending_request_queues(struct blkfront_info *info) | ||
397 | { | ||
398 | if (!RING_FULL(&info->ring)) { | ||
399 | /* Re-enable calldowns. */ | ||
400 | blk_start_queue(info->rq); | ||
401 | /* Kick things off immediately. */ | ||
402 | do_blkif_request(info->rq); | ||
403 | } | ||
404 | } | ||
405 | |||
406 | static void blkif_restart_queue(struct work_struct *work) | ||
407 | { | ||
408 | struct blkfront_info *info = container_of(work, struct blkfront_info, work); | ||
409 | |||
410 | spin_lock_irq(&blkif_io_lock); | ||
411 | if (info->connected == BLKIF_STATE_CONNECTED) | ||
412 | kick_pending_request_queues(info); | ||
413 | spin_unlock_irq(&blkif_io_lock); | ||
414 | } | ||
415 | |||
416 | static void blkif_free(struct blkfront_info *info, int suspend) | ||
417 | { | ||
418 | /* Prevent new requests being issued until we fix things up. */ | ||
419 | spin_lock_irq(&blkif_io_lock); | ||
420 | info->connected = suspend ? | ||
421 | BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; | ||
422 | /* No more blkif_request(). */ | ||
423 | if (info->rq) | ||
424 | blk_stop_queue(info->rq); | ||
425 | /* No more gnttab callback work. */ | ||
426 | gnttab_cancel_free_callback(&info->callback); | ||
427 | spin_unlock_irq(&blkif_io_lock); | ||
428 | |||
429 | /* Flush gnttab callback work. Must be done with no locks held. */ | ||
430 | flush_scheduled_work(); | ||
431 | |||
432 | /* Free resources associated with old device channel. */ | ||
433 | if (info->ring_ref != GRANT_INVALID_REF) { | ||
434 | gnttab_end_foreign_access(info->ring_ref, 0, | ||
435 | (unsigned long)info->ring.sring); | ||
436 | info->ring_ref = GRANT_INVALID_REF; | ||
437 | info->ring.sring = NULL; | ||
438 | } | ||
439 | if (info->irq) | ||
440 | unbind_from_irqhandler(info->irq, info); | ||
441 | info->evtchn = info->irq = 0; | ||
442 | |||
443 | } | ||
444 | |||
445 | static void blkif_completion(struct blk_shadow *s) | ||
446 | { | ||
447 | int i; | ||
448 | for (i = 0; i < s->req.nr_segments; i++) | ||
449 | gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL); | ||
450 | } | ||
451 | |||
452 | static irqreturn_t blkif_interrupt(int irq, void *dev_id) | ||
453 | { | ||
454 | struct request *req; | ||
455 | struct blkif_response *bret; | ||
456 | RING_IDX i, rp; | ||
457 | unsigned long flags; | ||
458 | struct blkfront_info *info = (struct blkfront_info *)dev_id; | ||
459 | int uptodate; | ||
460 | |||
461 | spin_lock_irqsave(&blkif_io_lock, flags); | ||
462 | |||
463 | if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { | ||
464 | spin_unlock_irqrestore(&blkif_io_lock, flags); | ||
465 | return IRQ_HANDLED; | ||
466 | } | ||
467 | |||
468 | again: | ||
469 | rp = info->ring.sring->rsp_prod; | ||
470 | rmb(); /* Ensure we see queued responses up to 'rp'. */ | ||
471 | |||
472 | for (i = info->ring.rsp_cons; i != rp; i++) { | ||
473 | unsigned long id; | ||
474 | int ret; | ||
475 | |||
476 | bret = RING_GET_RESPONSE(&info->ring, i); | ||
477 | id = bret->id; | ||
478 | req = (struct request *)info->shadow[id].request; | ||
479 | |||
480 | blkif_completion(&info->shadow[id]); | ||
481 | |||
482 | add_id_to_freelist(info, id); | ||
483 | |||
484 | uptodate = (bret->status == BLKIF_RSP_OKAY); | ||
485 | switch (bret->operation) { | ||
486 | case BLKIF_OP_WRITE_BARRIER: | ||
487 | if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { | ||
488 | printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", | ||
489 | info->gd->disk_name); | ||
490 | uptodate = -EOPNOTSUPP; | ||
491 | info->feature_barrier = 0; | ||
492 | xlvbd_barrier(info); | ||
493 | } | ||
494 | /* fall through */ | ||
495 | case BLKIF_OP_READ: | ||
496 | case BLKIF_OP_WRITE: | ||
497 | if (unlikely(bret->status != BLKIF_RSP_OKAY)) | ||
498 | dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " | ||
499 | "request: %x\n", bret->status); | ||
500 | |||
501 | ret = end_that_request_first(req, uptodate, | ||
502 | req->hard_nr_sectors); | ||
503 | BUG_ON(ret); | ||
504 | end_that_request_last(req, uptodate); | ||
505 | break; | ||
506 | default: | ||
507 | BUG(); | ||
508 | } | ||
509 | } | ||
510 | |||
511 | info->ring.rsp_cons = i; | ||
512 | |||
513 | if (i != info->ring.req_prod_pvt) { | ||
514 | int more_to_do; | ||
515 | RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); | ||
516 | if (more_to_do) | ||
517 | goto again; | ||
518 | } else | ||
519 | info->ring.sring->rsp_event = i + 1; | ||
520 | |||
521 | kick_pending_request_queues(info); | ||
522 | |||
523 | spin_unlock_irqrestore(&blkif_io_lock, flags); | ||
524 | |||
525 | return IRQ_HANDLED; | ||
526 | } | ||
527 | |||
528 | |||
529 | static int setup_blkring(struct xenbus_device *dev, | ||
530 | struct blkfront_info *info) | ||
531 | { | ||
532 | struct blkif_sring *sring; | ||
533 | int err; | ||
534 | |||
535 | info->ring_ref = GRANT_INVALID_REF; | ||
536 | |||
537 | sring = (struct blkif_sring *)__get_free_page(GFP_KERNEL); | ||
538 | if (!sring) { | ||
539 | xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); | ||
540 | return -ENOMEM; | ||
541 | } | ||
542 | SHARED_RING_INIT(sring); | ||
543 | FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); | ||
544 | |||
545 | err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); | ||
546 | if (err < 0) { | ||
547 | free_page((unsigned long)sring); | ||
548 | info->ring.sring = NULL; | ||
549 | goto fail; | ||
550 | } | ||
551 | info->ring_ref = err; | ||
552 | |||
553 | err = xenbus_alloc_evtchn(dev, &info->evtchn); | ||
554 | if (err) | ||
555 | goto fail; | ||
556 | |||
557 | err = bind_evtchn_to_irqhandler(info->evtchn, | ||
558 | blkif_interrupt, | ||
559 | IRQF_SAMPLE_RANDOM, "blkif", info); | ||
560 | if (err <= 0) { | ||
561 | xenbus_dev_fatal(dev, err, | ||
562 | "bind_evtchn_to_irqhandler failed"); | ||
563 | goto fail; | ||
564 | } | ||
565 | info->irq = err; | ||
566 | |||
567 | return 0; | ||
568 | fail: | ||
569 | blkif_free(info, 0); | ||
570 | return err; | ||
571 | } | ||
572 | |||
573 | |||
574 | /* Common code used when first setting up, and when resuming. */ | ||
575 | static int talk_to_backend(struct xenbus_device *dev, | ||
576 | struct blkfront_info *info) | ||
577 | { | ||
578 | const char *message = NULL; | ||
579 | struct xenbus_transaction xbt; | ||
580 | int err; | ||
581 | |||
582 | /* Create shared ring, alloc event channel. */ | ||
583 | err = setup_blkring(dev, info); | ||
584 | if (err) | ||
585 | goto out; | ||
586 | |||
587 | again: | ||
588 | err = xenbus_transaction_start(&xbt); | ||
589 | if (err) { | ||
590 | xenbus_dev_fatal(dev, err, "starting transaction"); | ||
591 | goto destroy_blkring; | ||
592 | } | ||
593 | |||
594 | err = xenbus_printf(xbt, dev->nodename, | ||
595 | "ring-ref", "%u", info->ring_ref); | ||
596 | if (err) { | ||
597 | message = "writing ring-ref"; | ||
598 | goto abort_transaction; | ||
599 | } | ||
600 | err = xenbus_printf(xbt, dev->nodename, | ||
601 | "event-channel", "%u", info->evtchn); | ||
602 | if (err) { | ||
603 | message = "writing event-channel"; | ||
604 | goto abort_transaction; | ||
605 | } | ||
606 | |||
607 | err = xenbus_transaction_end(xbt, 0); | ||
608 | if (err) { | ||
609 | if (err == -EAGAIN) | ||
610 | goto again; | ||
611 | xenbus_dev_fatal(dev, err, "completing transaction"); | ||
612 | goto destroy_blkring; | ||
613 | } | ||
614 | |||
615 | xenbus_switch_state(dev, XenbusStateInitialised); | ||
616 | |||
617 | return 0; | ||
618 | |||
619 | abort_transaction: | ||
620 | xenbus_transaction_end(xbt, 1); | ||
621 | if (message) | ||
622 | xenbus_dev_fatal(dev, err, "%s", message); | ||
623 | destroy_blkring: | ||
624 | blkif_free(info, 0); | ||
625 | out: | ||
626 | return err; | ||
627 | } | ||
628 | |||
629 | |||
630 | /** | ||
631 | * Entry point to this code when a new device is created. Allocate the basic | ||
632 | * structures and the ring buffer for communication with the backend, and | ||
633 | * inform the backend of the appropriate details for those. Switch to | ||
634 | * Initialised state. | ||
635 | */ | ||
636 | static int blkfront_probe(struct xenbus_device *dev, | ||
637 | const struct xenbus_device_id *id) | ||
638 | { | ||
639 | int err, vdevice, i; | ||
640 | struct blkfront_info *info; | ||
641 | |||
642 | /* FIXME: Use dynamic device id if this is not set. */ | ||
643 | err = xenbus_scanf(XBT_NIL, dev->nodename, | ||
644 | "virtual-device", "%i", &vdevice); | ||
645 | if (err != 1) { | ||
646 | xenbus_dev_fatal(dev, err, "reading virtual-device"); | ||
647 | return err; | ||
648 | } | ||
649 | |||
650 | info = kzalloc(sizeof(*info), GFP_KERNEL); | ||
651 | if (!info) { | ||
652 | xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); | ||
653 | return -ENOMEM; | ||
654 | } | ||
655 | |||
656 | info->xbdev = dev; | ||
657 | info->vdevice = vdevice; | ||
658 | info->connected = BLKIF_STATE_DISCONNECTED; | ||
659 | INIT_WORK(&info->work, blkif_restart_queue); | ||
660 | |||
661 | for (i = 0; i < BLK_RING_SIZE; i++) | ||
662 | info->shadow[i].req.id = i+1; | ||
663 | info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; | ||
664 | |||
665 | /* Front end dir is a number, which is used as the id. */ | ||
666 | info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); | ||
667 | dev->dev.driver_data = info; | ||
668 | |||
669 | err = talk_to_backend(dev, info); | ||
670 | if (err) { | ||
671 | kfree(info); | ||
672 | dev->dev.driver_data = NULL; | ||
673 | return err; | ||
674 | } | ||
675 | |||
676 | return 0; | ||
677 | } | ||
678 | |||
679 | |||
680 | static int blkif_recover(struct blkfront_info *info) | ||
681 | { | ||
682 | int i; | ||
683 | struct blkif_request *req; | ||
684 | struct blk_shadow *copy; | ||
685 | int j; | ||
686 | |||
687 | /* Stage 1: Make a safe copy of the shadow state. */ | ||
688 | copy = kmalloc(sizeof(info->shadow), GFP_KERNEL); | ||
689 | if (!copy) | ||
690 | return -ENOMEM; | ||
691 | memcpy(copy, info->shadow, sizeof(info->shadow)); | ||
692 | |||
693 | /* Stage 2: Set up free list. */ | ||
694 | memset(&info->shadow, 0, sizeof(info->shadow)); | ||
695 | for (i = 0; i < BLK_RING_SIZE; i++) | ||
696 | info->shadow[i].req.id = i+1; | ||
697 | info->shadow_free = info->ring.req_prod_pvt; | ||
698 | info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; | ||
699 | |||
700 | /* Stage 3: Find pending requests and requeue them. */ | ||
701 | for (i = 0; i < BLK_RING_SIZE; i++) { | ||
702 | /* Not in use? */ | ||
703 | if (copy[i].request == 0) | ||
704 | continue; | ||
705 | |||
706 | /* Grab a request slot and copy shadow state into it. */ | ||
707 | req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); | ||
708 | *req = copy[i].req; | ||
709 | |||
710 | /* We get a new request id, and must reset the shadow state. */ | ||
711 | req->id = get_id_from_freelist(info); | ||
712 | memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); | ||
713 | |||
714 | /* Rewrite any grant references invalidated by susp/resume. */ | ||
715 | for (j = 0; j < req->nr_segments; j++) | ||
716 | gnttab_grant_foreign_access_ref( | ||
717 | req->seg[j].gref, | ||
718 | info->xbdev->otherend_id, | ||
719 | pfn_to_mfn(info->shadow[req->id].frame[j]), | ||
720 | rq_data_dir( | ||
721 | (struct request *) | ||
722 | info->shadow[req->id].request)); | ||
723 | info->shadow[req->id].req = *req; | ||
724 | |||
725 | info->ring.req_prod_pvt++; | ||
726 | } | ||
727 | |||
728 | kfree(copy); | ||
729 | |||
730 | xenbus_switch_state(info->xbdev, XenbusStateConnected); | ||
731 | |||
732 | spin_lock_irq(&blkif_io_lock); | ||
733 | |||
734 | /* Now safe for us to use the shared ring */ | ||
735 | info->connected = BLKIF_STATE_CONNECTED; | ||
736 | |||
737 | /* Send off requeued requests */ | ||
738 | flush_requests(info); | ||
739 | |||
740 | /* Kick any other new requests queued since we resumed */ | ||
741 | kick_pending_request_queues(info); | ||
742 | |||
743 | spin_unlock_irq(&blkif_io_lock); | ||
744 | |||
745 | return 0; | ||
746 | } | ||
747 | |||
748 | /** | ||
749 | * We are reconnecting to the backend, due to a suspend/resume, or a backend | ||
750 | * driver restart. We tear down our blkif structure and recreate it, but | ||
751 | * leave the device-layer structures intact so that this is transparent to the | ||
752 | * rest of the kernel. | ||
753 | */ | ||
754 | static int blkfront_resume(struct xenbus_device *dev) | ||
755 | { | ||
756 | struct blkfront_info *info = dev->dev.driver_data; | ||
757 | int err; | ||
758 | |||
759 | dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename); | ||
760 | |||
761 | blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); | ||
762 | |||
763 | err = talk_to_backend(dev, info); | ||
764 | if (info->connected == BLKIF_STATE_SUSPENDED && !err) | ||
765 | err = blkif_recover(info); | ||
766 | |||
767 | return err; | ||
768 | } | ||
769 | |||
770 | |||
771 | /* | ||
772 | * Invoked when the backend is finally 'ready' (and has told produced | ||
773 | * the details about the physical device - #sectors, size, etc). | ||
774 | */ | ||
775 | static void blkfront_connect(struct blkfront_info *info) | ||
776 | { | ||
777 | unsigned long long sectors; | ||
778 | unsigned long sector_size; | ||
779 | unsigned int binfo; | ||
780 | int err; | ||
781 | |||
782 | if ((info->connected == BLKIF_STATE_CONNECTED) || | ||
783 | (info->connected == BLKIF_STATE_SUSPENDED) ) | ||
784 | return; | ||
785 | |||
786 | dev_dbg(&info->xbdev->dev, "%s:%s.\n", | ||
787 | __func__, info->xbdev->otherend); | ||
788 | |||
789 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | ||
790 | "sectors", "%llu", §ors, | ||
791 | "info", "%u", &binfo, | ||
792 | "sector-size", "%lu", §or_size, | ||
793 | NULL); | ||
794 | if (err) { | ||
795 | xenbus_dev_fatal(info->xbdev, err, | ||
796 | "reading backend fields at %s", | ||
797 | info->xbdev->otherend); | ||
798 | return; | ||
799 | } | ||
800 | |||
801 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | ||
802 | "feature-barrier", "%lu", &info->feature_barrier, | ||
803 | NULL); | ||
804 | if (err) | ||
805 | info->feature_barrier = 0; | ||
806 | |||
807 | err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice), | ||
808 | sectors, info->vdevice, | ||
809 | binfo, sector_size, info); | ||
810 | if (err) { | ||
811 | xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", | ||
812 | info->xbdev->otherend); | ||
813 | return; | ||
814 | } | ||
815 | |||
816 | xenbus_switch_state(info->xbdev, XenbusStateConnected); | ||
817 | |||
818 | /* Kick pending requests. */ | ||
819 | spin_lock_irq(&blkif_io_lock); | ||
820 | info->connected = BLKIF_STATE_CONNECTED; | ||
821 | kick_pending_request_queues(info); | ||
822 | spin_unlock_irq(&blkif_io_lock); | ||
823 | |||
824 | add_disk(info->gd); | ||
825 | } | ||
826 | |||
827 | /** | ||
828 | * Handle the change of state of the backend to Closing. We must delete our | ||
829 | * device-layer structures now, to ensure that writes are flushed through to | ||
830 | * the backend. Once is this done, we can switch to Closed in | ||
831 | * acknowledgement. | ||
832 | */ | ||
833 | static void blkfront_closing(struct xenbus_device *dev) | ||
834 | { | ||
835 | struct blkfront_info *info = dev->dev.driver_data; | ||
836 | unsigned long flags; | ||
837 | |||
838 | dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename); | ||
839 | |||
840 | if (info->rq == NULL) | ||
841 | goto out; | ||
842 | |||
843 | spin_lock_irqsave(&blkif_io_lock, flags); | ||
844 | |||
845 | del_gendisk(info->gd); | ||
846 | |||
847 | /* No more blkif_request(). */ | ||
848 | blk_stop_queue(info->rq); | ||
849 | |||
850 | /* No more gnttab callback work. */ | ||
851 | gnttab_cancel_free_callback(&info->callback); | ||
852 | spin_unlock_irqrestore(&blkif_io_lock, flags); | ||
853 | |||
854 | /* Flush gnttab callback work. Must be done with no locks held. */ | ||
855 | flush_scheduled_work(); | ||
856 | |||
857 | blk_cleanup_queue(info->rq); | ||
858 | info->rq = NULL; | ||
859 | |||
860 | out: | ||
861 | xenbus_frontend_closed(dev); | ||
862 | } | ||
863 | |||
864 | /** | ||
865 | * Callback received when the backend's state changes. | ||
866 | */ | ||
867 | static void backend_changed(struct xenbus_device *dev, | ||
868 | enum xenbus_state backend_state) | ||
869 | { | ||
870 | struct blkfront_info *info = dev->dev.driver_data; | ||
871 | struct block_device *bd; | ||
872 | |||
873 | dev_dbg(&dev->dev, "blkfront:backend_changed.\n"); | ||
874 | |||
875 | switch (backend_state) { | ||
876 | case XenbusStateInitialising: | ||
877 | case XenbusStateInitWait: | ||
878 | case XenbusStateInitialised: | ||
879 | case XenbusStateUnknown: | ||
880 | case XenbusStateClosed: | ||
881 | break; | ||
882 | |||
883 | case XenbusStateConnected: | ||
884 | blkfront_connect(info); | ||
885 | break; | ||
886 | |||
887 | case XenbusStateClosing: | ||
888 | bd = bdget(info->dev); | ||
889 | if (bd == NULL) | ||
890 | xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); | ||
891 | |||
892 | mutex_lock(&bd->bd_mutex); | ||
893 | if (info->users > 0) | ||
894 | xenbus_dev_error(dev, -EBUSY, | ||
895 | "Device in use; refusing to close"); | ||
896 | else | ||
897 | blkfront_closing(dev); | ||
898 | mutex_unlock(&bd->bd_mutex); | ||
899 | bdput(bd); | ||
900 | break; | ||
901 | } | ||
902 | } | ||
903 | |||
904 | static int blkfront_remove(struct xenbus_device *dev) | ||
905 | { | ||
906 | struct blkfront_info *info = dev->dev.driver_data; | ||
907 | |||
908 | dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename); | ||
909 | |||
910 | blkif_free(info, 0); | ||
911 | |||
912 | kfree(info); | ||
913 | |||
914 | return 0; | ||
915 | } | ||
916 | |||
917 | static int blkif_open(struct inode *inode, struct file *filep) | ||
918 | { | ||
919 | struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; | ||
920 | info->users++; | ||
921 | return 0; | ||
922 | } | ||
923 | |||
924 | static int blkif_release(struct inode *inode, struct file *filep) | ||
925 | { | ||
926 | struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; | ||
927 | info->users--; | ||
928 | if (info->users == 0) { | ||
929 | /* Check whether we have been instructed to close. We will | ||
930 | have ignored this request initially, as the device was | ||
931 | still mounted. */ | ||
932 | struct xenbus_device *dev = info->xbdev; | ||
933 | enum xenbus_state state = xenbus_read_driver_state(dev->otherend); | ||
934 | |||
935 | if (state == XenbusStateClosing) | ||
936 | blkfront_closing(dev); | ||
937 | } | ||
938 | return 0; | ||
939 | } | ||
940 | |||
941 | static struct block_device_operations xlvbd_block_fops = | ||
942 | { | ||
943 | .owner = THIS_MODULE, | ||
944 | .open = blkif_open, | ||
945 | .release = blkif_release, | ||
946 | }; | ||
947 | |||
948 | |||
949 | static struct xenbus_device_id blkfront_ids[] = { | ||
950 | { "vbd" }, | ||
951 | { "" } | ||
952 | }; | ||
953 | |||
954 | static struct xenbus_driver blkfront = { | ||
955 | .name = "vbd", | ||
956 | .owner = THIS_MODULE, | ||
957 | .ids = blkfront_ids, | ||
958 | .probe = blkfront_probe, | ||
959 | .remove = blkfront_remove, | ||
960 | .resume = blkfront_resume, | ||
961 | .otherend_changed = backend_changed, | ||
962 | }; | ||
963 | |||
964 | static int __init xlblk_init(void) | ||
965 | { | ||
966 | if (!is_running_on_xen()) | ||
967 | return -ENODEV; | ||
968 | |||
969 | if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { | ||
970 | printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", | ||
971 | XENVBD_MAJOR, DEV_NAME); | ||
972 | return -ENODEV; | ||
973 | } | ||
974 | |||
975 | return xenbus_register_frontend(&blkfront); | ||
976 | } | ||
977 | module_init(xlblk_init); | ||
978 | |||
979 | |||
980 | static void xlblk_exit(void) | ||
981 | { | ||
982 | return xenbus_unregister_driver(&blkfront); | ||
983 | } | ||
984 | module_exit(xlblk_exit); | ||
985 | |||
986 | MODULE_DESCRIPTION("Xen virtual block device frontend"); | ||
987 | MODULE_LICENSE("GPL"); | ||
988 | MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR); | ||
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 97bd71bc3aea..9e8f21410d2d 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig | |||
@@ -604,6 +604,14 @@ config HVC_BEAT | |||
604 | help | 604 | help |
605 | Toshiba's Cell Reference Set Beat Console device driver | 605 | Toshiba's Cell Reference Set Beat Console device driver |
606 | 606 | ||
607 | config HVC_XEN | ||
608 | bool "Xen Hypervisor Console support" | ||
609 | depends on XEN | ||
610 | select HVC_DRIVER | ||
611 | default y | ||
612 | help | ||
613 | Xen virtual console device driver | ||
614 | |||
607 | config HVCS | 615 | config HVCS |
608 | tristate "IBM Hypervisor Virtual Console Server support" | 616 | tristate "IBM Hypervisor Virtual Console Server support" |
609 | depends on PPC_PSERIES | 617 | depends on PPC_PSERIES |
diff --git a/drivers/char/Makefile b/drivers/char/Makefile index f2996a95eb07..8852b8d643cf 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile | |||
@@ -48,6 +48,7 @@ obj-$(CONFIG_HVC_ISERIES) += hvc_iseries.o | |||
48 | obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o | 48 | obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o |
49 | obj-$(CONFIG_HVC_BEAT) += hvc_beat.o | 49 | obj-$(CONFIG_HVC_BEAT) += hvc_beat.o |
50 | obj-$(CONFIG_HVC_DRIVER) += hvc_console.o | 50 | obj-$(CONFIG_HVC_DRIVER) += hvc_console.o |
51 | obj-$(CONFIG_HVC_XEN) += hvc_xen.o | ||
51 | obj-$(CONFIG_RAW_DRIVER) += raw.o | 52 | obj-$(CONFIG_RAW_DRIVER) += raw.o |
52 | obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o | 53 | obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o |
53 | obj-$(CONFIG_MSPEC) += mspec.o | 54 | obj-$(CONFIG_MSPEC) += mspec.o |
diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c new file mode 100644 index 000000000000..dd68f8541c2d --- /dev/null +++ b/drivers/char/hvc_xen.c | |||
@@ -0,0 +1,159 @@ | |||
1 | /* | ||
2 | * xen console driver interface to hvc_console.c | ||
3 | * | ||
4 | * (c) 2007 Gerd Hoffmann <kraxel@suse.de> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
19 | */ | ||
20 | |||
21 | #include <linux/console.h> | ||
22 | #include <linux/delay.h> | ||
23 | #include <linux/err.h> | ||
24 | #include <linux/init.h> | ||
25 | #include <linux/types.h> | ||
26 | |||
27 | #include <asm/xen/hypervisor.h> | ||
28 | #include <xen/page.h> | ||
29 | #include <xen/events.h> | ||
30 | #include <xen/interface/io/console.h> | ||
31 | #include <xen/hvc-console.h> | ||
32 | |||
33 | #include "hvc_console.h" | ||
34 | |||
35 | #define HVC_COOKIE 0x58656e /* "Xen" in hex */ | ||
36 | |||
37 | static struct hvc_struct *hvc; | ||
38 | static int xencons_irq; | ||
39 | |||
40 | /* ------------------------------------------------------------------ */ | ||
41 | |||
42 | static inline struct xencons_interface *xencons_interface(void) | ||
43 | { | ||
44 | return mfn_to_virt(xen_start_info->console.domU.mfn); | ||
45 | } | ||
46 | |||
47 | static inline void notify_daemon(void) | ||
48 | { | ||
49 | /* Use evtchn: this is called early, before irq is set up. */ | ||
50 | notify_remote_via_evtchn(xen_start_info->console.domU.evtchn); | ||
51 | } | ||
52 | |||
53 | static int write_console(uint32_t vtermno, const char *data, int len) | ||
54 | { | ||
55 | struct xencons_interface *intf = xencons_interface(); | ||
56 | XENCONS_RING_IDX cons, prod; | ||
57 | int sent = 0; | ||
58 | |||
59 | cons = intf->out_cons; | ||
60 | prod = intf->out_prod; | ||
61 | mb(); /* update queue values before going on */ | ||
62 | BUG_ON((prod - cons) > sizeof(intf->out)); | ||
63 | |||
64 | while ((sent < len) && ((prod - cons) < sizeof(intf->out))) | ||
65 | intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++]; | ||
66 | |||
67 | wmb(); /* write ring before updating pointer */ | ||
68 | intf->out_prod = prod; | ||
69 | |||
70 | notify_daemon(); | ||
71 | return sent; | ||
72 | } | ||
73 | |||
74 | static int read_console(uint32_t vtermno, char *buf, int len) | ||
75 | { | ||
76 | struct xencons_interface *intf = xencons_interface(); | ||
77 | XENCONS_RING_IDX cons, prod; | ||
78 | int recv = 0; | ||
79 | |||
80 | cons = intf->in_cons; | ||
81 | prod = intf->in_prod; | ||
82 | mb(); /* get pointers before reading ring */ | ||
83 | BUG_ON((prod - cons) > sizeof(intf->in)); | ||
84 | |||
85 | while (cons != prod && recv < len) | ||
86 | buf[recv++] = intf->in[MASK_XENCONS_IDX(cons++, intf->in)]; | ||
87 | |||
88 | mb(); /* read ring before consuming */ | ||
89 | intf->in_cons = cons; | ||
90 | |||
91 | notify_daemon(); | ||
92 | return recv; | ||
93 | } | ||
94 | |||
95 | static struct hv_ops hvc_ops = { | ||
96 | .get_chars = read_console, | ||
97 | .put_chars = write_console, | ||
98 | }; | ||
99 | |||
100 | static int __init xen_init(void) | ||
101 | { | ||
102 | struct hvc_struct *hp; | ||
103 | |||
104 | if (!is_running_on_xen()) | ||
105 | return 0; | ||
106 | |||
107 | xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn); | ||
108 | if (xencons_irq < 0) | ||
109 | xencons_irq = 0 /* NO_IRQ */; | ||
110 | hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256); | ||
111 | if (IS_ERR(hp)) | ||
112 | return PTR_ERR(hp); | ||
113 | |||
114 | hvc = hp; | ||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | static void __exit xen_fini(void) | ||
119 | { | ||
120 | if (hvc) | ||
121 | hvc_remove(hvc); | ||
122 | } | ||
123 | |||
124 | static int xen_cons_init(void) | ||
125 | { | ||
126 | if (!is_running_on_xen()) | ||
127 | return 0; | ||
128 | |||
129 | hvc_instantiate(HVC_COOKIE, 0, &hvc_ops); | ||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | module_init(xen_init); | ||
134 | module_exit(xen_fini); | ||
135 | console_initcall(xen_cons_init); | ||
136 | |||
137 | static void xenboot_write_console(struct console *console, const char *string, | ||
138 | unsigned len) | ||
139 | { | ||
140 | unsigned int linelen, off = 0; | ||
141 | const char *pos; | ||
142 | |||
143 | while (off < len && NULL != (pos = strchr(string+off, '\n'))) { | ||
144 | linelen = pos-string+off; | ||
145 | if (off + linelen > len) | ||
146 | break; | ||
147 | write_console(0, string+off, linelen); | ||
148 | write_console(0, "\r\n", 2); | ||
149 | off += linelen + 1; | ||
150 | } | ||
151 | if (off < len) | ||
152 | write_console(0, string+off, len-off); | ||
153 | } | ||
154 | |||
155 | struct console xenboot_console = { | ||
156 | .name = "xenboot", | ||
157 | .write = xenboot_write_console, | ||
158 | .flags = CON_PRINTBUFFER | CON_BOOT, | ||
159 | }; | ||
diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c index dbb22403979f..3d90fc002097 100644 --- a/drivers/macintosh/therm_pm72.c +++ b/drivers/macintosh/therm_pm72.c | |||
@@ -1770,7 +1770,8 @@ static int call_critical_overtemp(void) | |||
1770 | "PATH=/sbin:/usr/sbin:/bin:/usr/bin", | 1770 | "PATH=/sbin:/usr/sbin:/bin:/usr/bin", |
1771 | NULL }; | 1771 | NULL }; |
1772 | 1772 | ||
1773 | return call_usermodehelper(critical_overtemp_path, argv, envp, 0); | 1773 | return call_usermodehelper(critical_overtemp_path, |
1774 | argv, envp, UMH_WAIT_EXEC); | ||
1774 | } | 1775 | } |
1775 | 1776 | ||
1776 | 1777 | ||
diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c index e18d265d5d33..516d943227e2 100644 --- a/drivers/macintosh/windfarm_core.c +++ b/drivers/macintosh/windfarm_core.c | |||
@@ -80,7 +80,8 @@ int wf_critical_overtemp(void) | |||
80 | "PATH=/sbin:/usr/sbin:/bin:/usr/bin", | 80 | "PATH=/sbin:/usr/sbin:/bin:/usr/bin", |
81 | NULL }; | 81 | NULL }; |
82 | 82 | ||
83 | return call_usermodehelper(critical_overtemp_path, argv, envp, 0); | 83 | return call_usermodehelper(critical_overtemp_path, |
84 | argv, envp, UMH_WAIT_EXEC); | ||
84 | } | 85 | } |
85 | EXPORT_SYMBOL_GPL(wf_critical_overtemp); | 86 | EXPORT_SYMBOL_GPL(wf_critical_overtemp); |
86 | 87 | ||
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 43d03178064d..5fb659f8b20e 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig | |||
@@ -2486,6 +2486,18 @@ source "drivers/atm/Kconfig" | |||
2486 | 2486 | ||
2487 | source "drivers/s390/net/Kconfig" | 2487 | source "drivers/s390/net/Kconfig" |
2488 | 2488 | ||
2489 | config XEN_NETDEV_FRONTEND | ||
2490 | tristate "Xen network device frontend driver" | ||
2491 | depends on XEN | ||
2492 | default y | ||
2493 | help | ||
2494 | The network device frontend driver allows the kernel to | ||
2495 | access network devices exported exported by a virtual | ||
2496 | machine containing a physical network device driver. The | ||
2497 | frontend driver is intended for unprivileged guest domains; | ||
2498 | if you are compiling a kernel for a Xen guest, you almost | ||
2499 | certainly want to enable this. | ||
2500 | |||
2489 | config ISERIES_VETH | 2501 | config ISERIES_VETH |
2490 | tristate "iSeries Virtual Ethernet driver support" | 2502 | tristate "iSeries Virtual Ethernet driver support" |
2491 | depends on PPC_ISERIES | 2503 | depends on PPC_ISERIES |
diff --git a/drivers/net/Makefile b/drivers/net/Makefile index eb4167622a6a..0e286ab8855a 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile | |||
@@ -127,6 +127,8 @@ obj-$(CONFIG_PPPOL2TP) += pppox.o pppol2tp.o | |||
127 | obj-$(CONFIG_SLIP) += slip.o | 127 | obj-$(CONFIG_SLIP) += slip.o |
128 | obj-$(CONFIG_SLHC) += slhc.o | 128 | obj-$(CONFIG_SLHC) += slhc.o |
129 | 129 | ||
130 | obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o | ||
131 | |||
130 | obj-$(CONFIG_DUMMY) += dummy.o | 132 | obj-$(CONFIG_DUMMY) += dummy.o |
131 | obj-$(CONFIG_IFB) += ifb.o | 133 | obj-$(CONFIG_IFB) += ifb.o |
132 | obj-$(CONFIG_MACVLAN) += macvlan.o | 134 | obj-$(CONFIG_MACVLAN) += macvlan.o |
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 84aa2117c0ee..355c6cf3d112 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c | |||
@@ -320,7 +320,7 @@ static int eppconfig(struct baycom_state *bc) | |||
320 | sprintf(portarg, "%ld", bc->pdev->port->base); | 320 | sprintf(portarg, "%ld", bc->pdev->port->base); |
321 | printk(KERN_DEBUG "%s: %s -s -p %s -m %s\n", bc_drvname, eppconfig_path, portarg, modearg); | 321 | printk(KERN_DEBUG "%s: %s -s -p %s -m %s\n", bc_drvname, eppconfig_path, portarg, modearg); |
322 | 322 | ||
323 | return call_usermodehelper(eppconfig_path, argv, envp, 1); | 323 | return call_usermodehelper(eppconfig_path, argv, envp, UMH_WAIT_PROC); |
324 | } | 324 | } |
325 | 325 | ||
326 | /* ---------------------------------------------------------------------- */ | 326 | /* ---------------------------------------------------------------------- */ |
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c new file mode 100644 index 000000000000..489f69c5d6ca --- /dev/null +++ b/drivers/net/xen-netfront.c | |||
@@ -0,0 +1,1863 @@ | |||
1 | /* | ||
2 | * Virtual network driver for conversing with remote driver backends. | ||
3 | * | ||
4 | * Copyright (c) 2002-2005, K A Fraser | ||
5 | * Copyright (c) 2005, XenSource Ltd | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License version 2 | ||
9 | * as published by the Free Software Foundation; or, when distributed | ||
10 | * separately from the Linux kernel or incorporated into other | ||
11 | * software packages, subject to the following license: | ||
12 | * | ||
13 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
14 | * of this source file (the "Software"), to deal in the Software without | ||
15 | * restriction, including without limitation the rights to use, copy, modify, | ||
16 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
17 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
18 | * the following conditions: | ||
19 | * | ||
20 | * The above copyright notice and this permission notice shall be included in | ||
21 | * all copies or substantial portions of the Software. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
24 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
25 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
26 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
27 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
28 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
29 | * IN THE SOFTWARE. | ||
30 | */ | ||
31 | |||
32 | #include <linux/module.h> | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/netdevice.h> | ||
35 | #include <linux/etherdevice.h> | ||
36 | #include <linux/skbuff.h> | ||
37 | #include <linux/ethtool.h> | ||
38 | #include <linux/if_ether.h> | ||
39 | #include <linux/tcp.h> | ||
40 | #include <linux/udp.h> | ||
41 | #include <linux/moduleparam.h> | ||
42 | #include <linux/mm.h> | ||
43 | #include <net/ip.h> | ||
44 | |||
45 | #include <xen/xenbus.h> | ||
46 | #include <xen/events.h> | ||
47 | #include <xen/page.h> | ||
48 | #include <xen/grant_table.h> | ||
49 | |||
50 | #include <xen/interface/io/netif.h> | ||
51 | #include <xen/interface/memory.h> | ||
52 | #include <xen/interface/grant_table.h> | ||
53 | |||
54 | static struct ethtool_ops xennet_ethtool_ops; | ||
55 | |||
56 | struct netfront_cb { | ||
57 | struct page *page; | ||
58 | unsigned offset; | ||
59 | }; | ||
60 | |||
61 | #define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb)) | ||
62 | |||
63 | #define RX_COPY_THRESHOLD 256 | ||
64 | |||
65 | #define GRANT_INVALID_REF 0 | ||
66 | |||
67 | #define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE) | ||
68 | #define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE) | ||
69 | #define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) | ||
70 | |||
71 | struct netfront_info { | ||
72 | struct list_head list; | ||
73 | struct net_device *netdev; | ||
74 | |||
75 | struct net_device_stats stats; | ||
76 | |||
77 | struct xen_netif_tx_front_ring tx; | ||
78 | struct xen_netif_rx_front_ring rx; | ||
79 | |||
80 | spinlock_t tx_lock; | ||
81 | spinlock_t rx_lock; | ||
82 | |||
83 | unsigned int evtchn; | ||
84 | |||
85 | /* Receive-ring batched refills. */ | ||
86 | #define RX_MIN_TARGET 8 | ||
87 | #define RX_DFL_MIN_TARGET 64 | ||
88 | #define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) | ||
89 | unsigned rx_min_target, rx_max_target, rx_target; | ||
90 | struct sk_buff_head rx_batch; | ||
91 | |||
92 | struct timer_list rx_refill_timer; | ||
93 | |||
94 | /* | ||
95 | * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries | ||
96 | * are linked from tx_skb_freelist through skb_entry.link. | ||
97 | * | ||
98 | * NB. Freelist index entries are always going to be less than | ||
99 | * PAGE_OFFSET, whereas pointers to skbs will always be equal or | ||
100 | * greater than PAGE_OFFSET: we use this property to distinguish | ||
101 | * them. | ||
102 | */ | ||
103 | union skb_entry { | ||
104 | struct sk_buff *skb; | ||
105 | unsigned link; | ||
106 | } tx_skbs[NET_TX_RING_SIZE]; | ||
107 | grant_ref_t gref_tx_head; | ||
108 | grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; | ||
109 | unsigned tx_skb_freelist; | ||
110 | |||
111 | struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; | ||
112 | grant_ref_t gref_rx_head; | ||
113 | grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; | ||
114 | |||
115 | struct xenbus_device *xbdev; | ||
116 | int tx_ring_ref; | ||
117 | int rx_ring_ref; | ||
118 | |||
119 | unsigned long rx_pfn_array[NET_RX_RING_SIZE]; | ||
120 | struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; | ||
121 | struct mmu_update rx_mmu[NET_RX_RING_SIZE]; | ||
122 | }; | ||
123 | |||
124 | struct netfront_rx_info { | ||
125 | struct xen_netif_rx_response rx; | ||
126 | struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; | ||
127 | }; | ||
128 | |||
129 | /* | ||
130 | * Access macros for acquiring freeing slots in tx_skbs[]. | ||
131 | */ | ||
132 | |||
133 | static void add_id_to_freelist(unsigned *head, union skb_entry *list, | ||
134 | unsigned short id) | ||
135 | { | ||
136 | list[id].link = *head; | ||
137 | *head = id; | ||
138 | } | ||
139 | |||
140 | static unsigned short get_id_from_freelist(unsigned *head, | ||
141 | union skb_entry *list) | ||
142 | { | ||
143 | unsigned int id = *head; | ||
144 | *head = list[id].link; | ||
145 | return id; | ||
146 | } | ||
147 | |||
148 | static int xennet_rxidx(RING_IDX idx) | ||
149 | { | ||
150 | return idx & (NET_RX_RING_SIZE - 1); | ||
151 | } | ||
152 | |||
153 | static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np, | ||
154 | RING_IDX ri) | ||
155 | { | ||
156 | int i = xennet_rxidx(ri); | ||
157 | struct sk_buff *skb = np->rx_skbs[i]; | ||
158 | np->rx_skbs[i] = NULL; | ||
159 | return skb; | ||
160 | } | ||
161 | |||
162 | static grant_ref_t xennet_get_rx_ref(struct netfront_info *np, | ||
163 | RING_IDX ri) | ||
164 | { | ||
165 | int i = xennet_rxidx(ri); | ||
166 | grant_ref_t ref = np->grant_rx_ref[i]; | ||
167 | np->grant_rx_ref[i] = GRANT_INVALID_REF; | ||
168 | return ref; | ||
169 | } | ||
170 | |||
171 | #ifdef CONFIG_SYSFS | ||
172 | static int xennet_sysfs_addif(struct net_device *netdev); | ||
173 | static void xennet_sysfs_delif(struct net_device *netdev); | ||
174 | #else /* !CONFIG_SYSFS */ | ||
175 | #define xennet_sysfs_addif(dev) (0) | ||
176 | #define xennet_sysfs_delif(dev) do { } while (0) | ||
177 | #endif | ||
178 | |||
179 | static int xennet_can_sg(struct net_device *dev) | ||
180 | { | ||
181 | return dev->features & NETIF_F_SG; | ||
182 | } | ||
183 | |||
184 | |||
185 | static void rx_refill_timeout(unsigned long data) | ||
186 | { | ||
187 | struct net_device *dev = (struct net_device *)data; | ||
188 | netif_rx_schedule(dev); | ||
189 | } | ||
190 | |||
191 | static int netfront_tx_slot_available(struct netfront_info *np) | ||
192 | { | ||
193 | return ((np->tx.req_prod_pvt - np->tx.rsp_cons) < | ||
194 | (TX_MAX_TARGET - MAX_SKB_FRAGS - 2)); | ||
195 | } | ||
196 | |||
197 | static void xennet_maybe_wake_tx(struct net_device *dev) | ||
198 | { | ||
199 | struct netfront_info *np = netdev_priv(dev); | ||
200 | |||
201 | if (unlikely(netif_queue_stopped(dev)) && | ||
202 | netfront_tx_slot_available(np) && | ||
203 | likely(netif_running(dev))) | ||
204 | netif_wake_queue(dev); | ||
205 | } | ||
206 | |||
207 | static void xennet_alloc_rx_buffers(struct net_device *dev) | ||
208 | { | ||
209 | unsigned short id; | ||
210 | struct netfront_info *np = netdev_priv(dev); | ||
211 | struct sk_buff *skb; | ||
212 | struct page *page; | ||
213 | int i, batch_target, notify; | ||
214 | RING_IDX req_prod = np->rx.req_prod_pvt; | ||
215 | struct xen_memory_reservation reservation; | ||
216 | grant_ref_t ref; | ||
217 | unsigned long pfn; | ||
218 | void *vaddr; | ||
219 | int nr_flips; | ||
220 | struct xen_netif_rx_request *req; | ||
221 | |||
222 | if (unlikely(!netif_carrier_ok(dev))) | ||
223 | return; | ||
224 | |||
225 | /* | ||
226 | * Allocate skbuffs greedily, even though we batch updates to the | ||
227 | * receive ring. This creates a less bursty demand on the memory | ||
228 | * allocator, so should reduce the chance of failed allocation requests | ||
229 | * both for ourself and for other kernel subsystems. | ||
230 | */ | ||
231 | batch_target = np->rx_target - (req_prod - np->rx.rsp_cons); | ||
232 | for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) { | ||
233 | skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD, | ||
234 | GFP_ATOMIC | __GFP_NOWARN); | ||
235 | if (unlikely(!skb)) | ||
236 | goto no_skb; | ||
237 | |||
238 | page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); | ||
239 | if (!page) { | ||
240 | kfree_skb(skb); | ||
241 | no_skb: | ||
242 | /* Any skbuffs queued for refill? Force them out. */ | ||
243 | if (i != 0) | ||
244 | goto refill; | ||
245 | /* Could not allocate any skbuffs. Try again later. */ | ||
246 | mod_timer(&np->rx_refill_timer, | ||
247 | jiffies + (HZ/10)); | ||
248 | break; | ||
249 | } | ||
250 | |||
251 | skb_shinfo(skb)->frags[0].page = page; | ||
252 | skb_shinfo(skb)->nr_frags = 1; | ||
253 | __skb_queue_tail(&np->rx_batch, skb); | ||
254 | } | ||
255 | |||
256 | /* Is the batch large enough to be worthwhile? */ | ||
257 | if (i < (np->rx_target/2)) { | ||
258 | if (req_prod > np->rx.sring->req_prod) | ||
259 | goto push; | ||
260 | return; | ||
261 | } | ||
262 | |||
263 | /* Adjust our fill target if we risked running out of buffers. */ | ||
264 | if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) && | ||
265 | ((np->rx_target *= 2) > np->rx_max_target)) | ||
266 | np->rx_target = np->rx_max_target; | ||
267 | |||
268 | refill: | ||
269 | for (nr_flips = i = 0; ; i++) { | ||
270 | skb = __skb_dequeue(&np->rx_batch); | ||
271 | if (skb == NULL) | ||
272 | break; | ||
273 | |||
274 | skb->dev = dev; | ||
275 | |||
276 | id = xennet_rxidx(req_prod + i); | ||
277 | |||
278 | BUG_ON(np->rx_skbs[id]); | ||
279 | np->rx_skbs[id] = skb; | ||
280 | |||
281 | ref = gnttab_claim_grant_reference(&np->gref_rx_head); | ||
282 | BUG_ON((signed short)ref < 0); | ||
283 | np->grant_rx_ref[id] = ref; | ||
284 | |||
285 | pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page); | ||
286 | vaddr = page_address(skb_shinfo(skb)->frags[0].page); | ||
287 | |||
288 | req = RING_GET_REQUEST(&np->rx, req_prod + i); | ||
289 | gnttab_grant_foreign_access_ref(ref, | ||
290 | np->xbdev->otherend_id, | ||
291 | pfn_to_mfn(pfn), | ||
292 | 0); | ||
293 | |||
294 | req->id = id; | ||
295 | req->gref = ref; | ||
296 | } | ||
297 | |||
298 | if (nr_flips != 0) { | ||
299 | reservation.extent_start = np->rx_pfn_array; | ||
300 | reservation.nr_extents = nr_flips; | ||
301 | reservation.extent_order = 0; | ||
302 | reservation.address_bits = 0; | ||
303 | reservation.domid = DOMID_SELF; | ||
304 | |||
305 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | ||
306 | /* After all PTEs have been zapped, flush the TLB. */ | ||
307 | np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = | ||
308 | UVMF_TLB_FLUSH|UVMF_ALL; | ||
309 | |||
310 | /* Give away a batch of pages. */ | ||
311 | np->rx_mcl[i].op = __HYPERVISOR_memory_op; | ||
312 | np->rx_mcl[i].args[0] = XENMEM_decrease_reservation; | ||
313 | np->rx_mcl[i].args[1] = (unsigned long)&reservation; | ||
314 | |||
315 | /* Zap PTEs and give away pages in one big | ||
316 | * multicall. */ | ||
317 | (void)HYPERVISOR_multicall(np->rx_mcl, i+1); | ||
318 | |||
319 | /* Check return status of HYPERVISOR_memory_op(). */ | ||
320 | if (unlikely(np->rx_mcl[i].result != i)) | ||
321 | panic("Unable to reduce memory reservation\n"); | ||
322 | } else { | ||
323 | if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, | ||
324 | &reservation) != i) | ||
325 | panic("Unable to reduce memory reservation\n"); | ||
326 | } | ||
327 | } else { | ||
328 | wmb(); /* barrier so backend seens requests */ | ||
329 | } | ||
330 | |||
331 | /* Above is a suitable barrier to ensure backend will see requests. */ | ||
332 | np->rx.req_prod_pvt = req_prod + i; | ||
333 | push: | ||
334 | RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify); | ||
335 | if (notify) | ||
336 | notify_remote_via_irq(np->netdev->irq); | ||
337 | } | ||
338 | |||
339 | static int xennet_open(struct net_device *dev) | ||
340 | { | ||
341 | struct netfront_info *np = netdev_priv(dev); | ||
342 | |||
343 | memset(&np->stats, 0, sizeof(np->stats)); | ||
344 | |||
345 | spin_lock_bh(&np->rx_lock); | ||
346 | if (netif_carrier_ok(dev)) { | ||
347 | xennet_alloc_rx_buffers(dev); | ||
348 | np->rx.sring->rsp_event = np->rx.rsp_cons + 1; | ||
349 | if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) | ||
350 | netif_rx_schedule(dev); | ||
351 | } | ||
352 | spin_unlock_bh(&np->rx_lock); | ||
353 | |||
354 | xennet_maybe_wake_tx(dev); | ||
355 | |||
356 | return 0; | ||
357 | } | ||
358 | |||
359 | static void xennet_tx_buf_gc(struct net_device *dev) | ||
360 | { | ||
361 | RING_IDX cons, prod; | ||
362 | unsigned short id; | ||
363 | struct netfront_info *np = netdev_priv(dev); | ||
364 | struct sk_buff *skb; | ||
365 | |||
366 | BUG_ON(!netif_carrier_ok(dev)); | ||
367 | |||
368 | do { | ||
369 | prod = np->tx.sring->rsp_prod; | ||
370 | rmb(); /* Ensure we see responses up to 'rp'. */ | ||
371 | |||
372 | for (cons = np->tx.rsp_cons; cons != prod; cons++) { | ||
373 | struct xen_netif_tx_response *txrsp; | ||
374 | |||
375 | txrsp = RING_GET_RESPONSE(&np->tx, cons); | ||
376 | if (txrsp->status == NETIF_RSP_NULL) | ||
377 | continue; | ||
378 | |||
379 | id = txrsp->id; | ||
380 | skb = np->tx_skbs[id].skb; | ||
381 | if (unlikely(gnttab_query_foreign_access( | ||
382 | np->grant_tx_ref[id]) != 0)) { | ||
383 | printk(KERN_ALERT "xennet_tx_buf_gc: warning " | ||
384 | "-- grant still in use by backend " | ||
385 | "domain.\n"); | ||
386 | BUG(); | ||
387 | } | ||
388 | gnttab_end_foreign_access_ref( | ||
389 | np->grant_tx_ref[id], GNTMAP_readonly); | ||
390 | gnttab_release_grant_reference( | ||
391 | &np->gref_tx_head, np->grant_tx_ref[id]); | ||
392 | np->grant_tx_ref[id] = GRANT_INVALID_REF; | ||
393 | add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id); | ||
394 | dev_kfree_skb_irq(skb); | ||
395 | } | ||
396 | |||
397 | np->tx.rsp_cons = prod; | ||
398 | |||
399 | /* | ||
400 | * Set a new event, then check for race with update of tx_cons. | ||
401 | * Note that it is essential to schedule a callback, no matter | ||
402 | * how few buffers are pending. Even if there is space in the | ||
403 | * transmit ring, higher layers may be blocked because too much | ||
404 | * data is outstanding: in such cases notification from Xen is | ||
405 | * likely to be the only kick that we'll get. | ||
406 | */ | ||
407 | np->tx.sring->rsp_event = | ||
408 | prod + ((np->tx.sring->req_prod - prod) >> 1) + 1; | ||
409 | mb(); /* update shared area */ | ||
410 | } while ((cons == prod) && (prod != np->tx.sring->rsp_prod)); | ||
411 | |||
412 | xennet_maybe_wake_tx(dev); | ||
413 | } | ||
414 | |||
415 | static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, | ||
416 | struct xen_netif_tx_request *tx) | ||
417 | { | ||
418 | struct netfront_info *np = netdev_priv(dev); | ||
419 | char *data = skb->data; | ||
420 | unsigned long mfn; | ||
421 | RING_IDX prod = np->tx.req_prod_pvt; | ||
422 | int frags = skb_shinfo(skb)->nr_frags; | ||
423 | unsigned int offset = offset_in_page(data); | ||
424 | unsigned int len = skb_headlen(skb); | ||
425 | unsigned int id; | ||
426 | grant_ref_t ref; | ||
427 | int i; | ||
428 | |||
429 | /* While the header overlaps a page boundary (including being | ||
430 | larger than a page), split it it into page-sized chunks. */ | ||
431 | while (len > PAGE_SIZE - offset) { | ||
432 | tx->size = PAGE_SIZE - offset; | ||
433 | tx->flags |= NETTXF_more_data; | ||
434 | len -= tx->size; | ||
435 | data += tx->size; | ||
436 | offset = 0; | ||
437 | |||
438 | id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); | ||
439 | np->tx_skbs[id].skb = skb_get(skb); | ||
440 | tx = RING_GET_REQUEST(&np->tx, prod++); | ||
441 | tx->id = id; | ||
442 | ref = gnttab_claim_grant_reference(&np->gref_tx_head); | ||
443 | BUG_ON((signed short)ref < 0); | ||
444 | |||
445 | mfn = virt_to_mfn(data); | ||
446 | gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, | ||
447 | mfn, GNTMAP_readonly); | ||
448 | |||
449 | tx->gref = np->grant_tx_ref[id] = ref; | ||
450 | tx->offset = offset; | ||
451 | tx->size = len; | ||
452 | tx->flags = 0; | ||
453 | } | ||
454 | |||
455 | /* Grant backend access to each skb fragment page. */ | ||
456 | for (i = 0; i < frags; i++) { | ||
457 | skb_frag_t *frag = skb_shinfo(skb)->frags + i; | ||
458 | |||
459 | tx->flags |= NETTXF_more_data; | ||
460 | |||
461 | id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); | ||
462 | np->tx_skbs[id].skb = skb_get(skb); | ||
463 | tx = RING_GET_REQUEST(&np->tx, prod++); | ||
464 | tx->id = id; | ||
465 | ref = gnttab_claim_grant_reference(&np->gref_tx_head); | ||
466 | BUG_ON((signed short)ref < 0); | ||
467 | |||
468 | mfn = pfn_to_mfn(page_to_pfn(frag->page)); | ||
469 | gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, | ||
470 | mfn, GNTMAP_readonly); | ||
471 | |||
472 | tx->gref = np->grant_tx_ref[id] = ref; | ||
473 | tx->offset = frag->page_offset; | ||
474 | tx->size = frag->size; | ||
475 | tx->flags = 0; | ||
476 | } | ||
477 | |||
478 | np->tx.req_prod_pvt = prod; | ||
479 | } | ||
480 | |||
481 | static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) | ||
482 | { | ||
483 | unsigned short id; | ||
484 | struct netfront_info *np = netdev_priv(dev); | ||
485 | struct xen_netif_tx_request *tx; | ||
486 | struct xen_netif_extra_info *extra; | ||
487 | char *data = skb->data; | ||
488 | RING_IDX i; | ||
489 | grant_ref_t ref; | ||
490 | unsigned long mfn; | ||
491 | int notify; | ||
492 | int frags = skb_shinfo(skb)->nr_frags; | ||
493 | unsigned int offset = offset_in_page(data); | ||
494 | unsigned int len = skb_headlen(skb); | ||
495 | |||
496 | frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE; | ||
497 | if (unlikely(frags > MAX_SKB_FRAGS + 1)) { | ||
498 | printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n", | ||
499 | frags); | ||
500 | dump_stack(); | ||
501 | goto drop; | ||
502 | } | ||
503 | |||
504 | spin_lock_irq(&np->tx_lock); | ||
505 | |||
506 | if (unlikely(!netif_carrier_ok(dev) || | ||
507 | (frags > 1 && !xennet_can_sg(dev)) || | ||
508 | netif_needs_gso(dev, skb))) { | ||
509 | spin_unlock_irq(&np->tx_lock); | ||
510 | goto drop; | ||
511 | } | ||
512 | |||
513 | i = np->tx.req_prod_pvt; | ||
514 | |||
515 | id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); | ||
516 | np->tx_skbs[id].skb = skb; | ||
517 | |||
518 | tx = RING_GET_REQUEST(&np->tx, i); | ||
519 | |||
520 | tx->id = id; | ||
521 | ref = gnttab_claim_grant_reference(&np->gref_tx_head); | ||
522 | BUG_ON((signed short)ref < 0); | ||
523 | mfn = virt_to_mfn(data); | ||
524 | gnttab_grant_foreign_access_ref( | ||
525 | ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); | ||
526 | tx->gref = np->grant_tx_ref[id] = ref; | ||
527 | tx->offset = offset; | ||
528 | tx->size = len; | ||
529 | extra = NULL; | ||
530 | |||
531 | tx->flags = 0; | ||
532 | if (skb->ip_summed == CHECKSUM_PARTIAL) | ||
533 | /* local packet? */ | ||
534 | tx->flags |= NETTXF_csum_blank | NETTXF_data_validated; | ||
535 | else if (skb->ip_summed == CHECKSUM_UNNECESSARY) | ||
536 | /* remote but checksummed. */ | ||
537 | tx->flags |= NETTXF_data_validated; | ||
538 | |||
539 | if (skb_shinfo(skb)->gso_size) { | ||
540 | struct xen_netif_extra_info *gso; | ||
541 | |||
542 | gso = (struct xen_netif_extra_info *) | ||
543 | RING_GET_REQUEST(&np->tx, ++i); | ||
544 | |||
545 | if (extra) | ||
546 | extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; | ||
547 | else | ||
548 | tx->flags |= NETTXF_extra_info; | ||
549 | |||
550 | gso->u.gso.size = skb_shinfo(skb)->gso_size; | ||
551 | gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; | ||
552 | gso->u.gso.pad = 0; | ||
553 | gso->u.gso.features = 0; | ||
554 | |||
555 | gso->type = XEN_NETIF_EXTRA_TYPE_GSO; | ||
556 | gso->flags = 0; | ||
557 | extra = gso; | ||
558 | } | ||
559 | |||
560 | np->tx.req_prod_pvt = i + 1; | ||
561 | |||
562 | xennet_make_frags(skb, dev, tx); | ||
563 | tx->size = skb->len; | ||
564 | |||
565 | RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify); | ||
566 | if (notify) | ||
567 | notify_remote_via_irq(np->netdev->irq); | ||
568 | |||
569 | xennet_tx_buf_gc(dev); | ||
570 | |||
571 | if (!netfront_tx_slot_available(np)) | ||
572 | netif_stop_queue(dev); | ||
573 | |||
574 | spin_unlock_irq(&np->tx_lock); | ||
575 | |||
576 | np->stats.tx_bytes += skb->len; | ||
577 | np->stats.tx_packets++; | ||
578 | |||
579 | return 0; | ||
580 | |||
581 | drop: | ||
582 | np->stats.tx_dropped++; | ||
583 | dev_kfree_skb(skb); | ||
584 | return 0; | ||
585 | } | ||
586 | |||
587 | static int xennet_close(struct net_device *dev) | ||
588 | { | ||
589 | struct netfront_info *np = netdev_priv(dev); | ||
590 | netif_stop_queue(np->netdev); | ||
591 | return 0; | ||
592 | } | ||
593 | |||
594 | static struct net_device_stats *xennet_get_stats(struct net_device *dev) | ||
595 | { | ||
596 | struct netfront_info *np = netdev_priv(dev); | ||
597 | return &np->stats; | ||
598 | } | ||
599 | |||
600 | static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb, | ||
601 | grant_ref_t ref) | ||
602 | { | ||
603 | int new = xennet_rxidx(np->rx.req_prod_pvt); | ||
604 | |||
605 | BUG_ON(np->rx_skbs[new]); | ||
606 | np->rx_skbs[new] = skb; | ||
607 | np->grant_rx_ref[new] = ref; | ||
608 | RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new; | ||
609 | RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref; | ||
610 | np->rx.req_prod_pvt++; | ||
611 | } | ||
612 | |||
613 | static int xennet_get_extras(struct netfront_info *np, | ||
614 | struct xen_netif_extra_info *extras, | ||
615 | RING_IDX rp) | ||
616 | |||
617 | { | ||
618 | struct xen_netif_extra_info *extra; | ||
619 | struct device *dev = &np->netdev->dev; | ||
620 | RING_IDX cons = np->rx.rsp_cons; | ||
621 | int err = 0; | ||
622 | |||
623 | do { | ||
624 | struct sk_buff *skb; | ||
625 | grant_ref_t ref; | ||
626 | |||
627 | if (unlikely(cons + 1 == rp)) { | ||
628 | if (net_ratelimit()) | ||
629 | dev_warn(dev, "Missing extra info\n"); | ||
630 | err = -EBADR; | ||
631 | break; | ||
632 | } | ||
633 | |||
634 | extra = (struct xen_netif_extra_info *) | ||
635 | RING_GET_RESPONSE(&np->rx, ++cons); | ||
636 | |||
637 | if (unlikely(!extra->type || | ||
638 | extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { | ||
639 | if (net_ratelimit()) | ||
640 | dev_warn(dev, "Invalid extra type: %d\n", | ||
641 | extra->type); | ||
642 | err = -EINVAL; | ||
643 | } else { | ||
644 | memcpy(&extras[extra->type - 1], extra, | ||
645 | sizeof(*extra)); | ||
646 | } | ||
647 | |||
648 | skb = xennet_get_rx_skb(np, cons); | ||
649 | ref = xennet_get_rx_ref(np, cons); | ||
650 | xennet_move_rx_slot(np, skb, ref); | ||
651 | } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); | ||
652 | |||
653 | np->rx.rsp_cons = cons; | ||
654 | return err; | ||
655 | } | ||
656 | |||
657 | static int xennet_get_responses(struct netfront_info *np, | ||
658 | struct netfront_rx_info *rinfo, RING_IDX rp, | ||
659 | struct sk_buff_head *list) | ||
660 | { | ||
661 | struct xen_netif_rx_response *rx = &rinfo->rx; | ||
662 | struct xen_netif_extra_info *extras = rinfo->extras; | ||
663 | struct device *dev = &np->netdev->dev; | ||
664 | RING_IDX cons = np->rx.rsp_cons; | ||
665 | struct sk_buff *skb = xennet_get_rx_skb(np, cons); | ||
666 | grant_ref_t ref = xennet_get_rx_ref(np, cons); | ||
667 | int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD); | ||
668 | int frags = 1; | ||
669 | int err = 0; | ||
670 | unsigned long ret; | ||
671 | |||
672 | if (rx->flags & NETRXF_extra_info) { | ||
673 | err = xennet_get_extras(np, extras, rp); | ||
674 | cons = np->rx.rsp_cons; | ||
675 | } | ||
676 | |||
677 | for (;;) { | ||
678 | if (unlikely(rx->status < 0 || | ||
679 | rx->offset + rx->status > PAGE_SIZE)) { | ||
680 | if (net_ratelimit()) | ||
681 | dev_warn(dev, "rx->offset: %x, size: %u\n", | ||
682 | rx->offset, rx->status); | ||
683 | xennet_move_rx_slot(np, skb, ref); | ||
684 | err = -EINVAL; | ||
685 | goto next; | ||
686 | } | ||
687 | |||
688 | /* | ||
689 | * This definitely indicates a bug, either in this driver or in | ||
690 | * the backend driver. In future this should flag the bad | ||
691 | * situation to the system controller to reboot the backed. | ||
692 | */ | ||
693 | if (ref == GRANT_INVALID_REF) { | ||
694 | if (net_ratelimit()) | ||
695 | dev_warn(dev, "Bad rx response id %d.\n", | ||
696 | rx->id); | ||
697 | err = -EINVAL; | ||
698 | goto next; | ||
699 | } | ||
700 | |||
701 | ret = gnttab_end_foreign_access_ref(ref, 0); | ||
702 | BUG_ON(!ret); | ||
703 | |||
704 | gnttab_release_grant_reference(&np->gref_rx_head, ref); | ||
705 | |||
706 | __skb_queue_tail(list, skb); | ||
707 | |||
708 | next: | ||
709 | if (!(rx->flags & NETRXF_more_data)) | ||
710 | break; | ||
711 | |||
712 | if (cons + frags == rp) { | ||
713 | if (net_ratelimit()) | ||
714 | dev_warn(dev, "Need more frags\n"); | ||
715 | err = -ENOENT; | ||
716 | break; | ||
717 | } | ||
718 | |||
719 | rx = RING_GET_RESPONSE(&np->rx, cons + frags); | ||
720 | skb = xennet_get_rx_skb(np, cons + frags); | ||
721 | ref = xennet_get_rx_ref(np, cons + frags); | ||
722 | frags++; | ||
723 | } | ||
724 | |||
725 | if (unlikely(frags > max)) { | ||
726 | if (net_ratelimit()) | ||
727 | dev_warn(dev, "Too many frags\n"); | ||
728 | err = -E2BIG; | ||
729 | } | ||
730 | |||
731 | if (unlikely(err)) | ||
732 | np->rx.rsp_cons = cons + frags; | ||
733 | |||
734 | return err; | ||
735 | } | ||
736 | |||
737 | static int xennet_set_skb_gso(struct sk_buff *skb, | ||
738 | struct xen_netif_extra_info *gso) | ||
739 | { | ||
740 | if (!gso->u.gso.size) { | ||
741 | if (net_ratelimit()) | ||
742 | printk(KERN_WARNING "GSO size must not be zero.\n"); | ||
743 | return -EINVAL; | ||
744 | } | ||
745 | |||
746 | /* Currently only TCPv4 S.O. is supported. */ | ||
747 | if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { | ||
748 | if (net_ratelimit()) | ||
749 | printk(KERN_WARNING "Bad GSO type %d.\n", gso->u.gso.type); | ||
750 | return -EINVAL; | ||
751 | } | ||
752 | |||
753 | skb_shinfo(skb)->gso_size = gso->u.gso.size; | ||
754 | skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; | ||
755 | |||
756 | /* Header must be checked, and gso_segs computed. */ | ||
757 | skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; | ||
758 | skb_shinfo(skb)->gso_segs = 0; | ||
759 | |||
760 | return 0; | ||
761 | } | ||
762 | |||
763 | static RING_IDX xennet_fill_frags(struct netfront_info *np, | ||
764 | struct sk_buff *skb, | ||
765 | struct sk_buff_head *list) | ||
766 | { | ||
767 | struct skb_shared_info *shinfo = skb_shinfo(skb); | ||
768 | int nr_frags = shinfo->nr_frags; | ||
769 | RING_IDX cons = np->rx.rsp_cons; | ||
770 | skb_frag_t *frag = shinfo->frags + nr_frags; | ||
771 | struct sk_buff *nskb; | ||
772 | |||
773 | while ((nskb = __skb_dequeue(list))) { | ||
774 | struct xen_netif_rx_response *rx = | ||
775 | RING_GET_RESPONSE(&np->rx, ++cons); | ||
776 | |||
777 | frag->page = skb_shinfo(nskb)->frags[0].page; | ||
778 | frag->page_offset = rx->offset; | ||
779 | frag->size = rx->status; | ||
780 | |||
781 | skb->data_len += rx->status; | ||
782 | |||
783 | skb_shinfo(nskb)->nr_frags = 0; | ||
784 | kfree_skb(nskb); | ||
785 | |||
786 | frag++; | ||
787 | nr_frags++; | ||
788 | } | ||
789 | |||
790 | shinfo->nr_frags = nr_frags; | ||
791 | return cons; | ||
792 | } | ||
793 | |||
794 | static int skb_checksum_setup(struct sk_buff *skb) | ||
795 | { | ||
796 | struct iphdr *iph; | ||
797 | unsigned char *th; | ||
798 | int err = -EPROTO; | ||
799 | |||
800 | if (skb->protocol != htons(ETH_P_IP)) | ||
801 | goto out; | ||
802 | |||
803 | iph = (void *)skb->data; | ||
804 | th = skb->data + 4 * iph->ihl; | ||
805 | if (th >= skb_tail_pointer(skb)) | ||
806 | goto out; | ||
807 | |||
808 | skb->csum_start = th - skb->head; | ||
809 | switch (iph->protocol) { | ||
810 | case IPPROTO_TCP: | ||
811 | skb->csum_offset = offsetof(struct tcphdr, check); | ||
812 | break; | ||
813 | case IPPROTO_UDP: | ||
814 | skb->csum_offset = offsetof(struct udphdr, check); | ||
815 | break; | ||
816 | default: | ||
817 | if (net_ratelimit()) | ||
818 | printk(KERN_ERR "Attempting to checksum a non-" | ||
819 | "TCP/UDP packet, dropping a protocol" | ||
820 | " %d packet", iph->protocol); | ||
821 | goto out; | ||
822 | } | ||
823 | |||
824 | if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) | ||
825 | goto out; | ||
826 | |||
827 | err = 0; | ||
828 | |||
829 | out: | ||
830 | return err; | ||
831 | } | ||
832 | |||
833 | static int handle_incoming_queue(struct net_device *dev, | ||
834 | struct sk_buff_head *rxq) | ||
835 | { | ||
836 | struct netfront_info *np = netdev_priv(dev); | ||
837 | int packets_dropped = 0; | ||
838 | struct sk_buff *skb; | ||
839 | |||
840 | while ((skb = __skb_dequeue(rxq)) != NULL) { | ||
841 | struct page *page = NETFRONT_SKB_CB(skb)->page; | ||
842 | void *vaddr = page_address(page); | ||
843 | unsigned offset = NETFRONT_SKB_CB(skb)->offset; | ||
844 | |||
845 | memcpy(skb->data, vaddr + offset, | ||
846 | skb_headlen(skb)); | ||
847 | |||
848 | if (page != skb_shinfo(skb)->frags[0].page) | ||
849 | __free_page(page); | ||
850 | |||
851 | /* Ethernet work: Delayed to here as it peeks the header. */ | ||
852 | skb->protocol = eth_type_trans(skb, dev); | ||
853 | |||
854 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | ||
855 | if (skb_checksum_setup(skb)) { | ||
856 | kfree_skb(skb); | ||
857 | packets_dropped++; | ||
858 | np->stats.rx_errors++; | ||
859 | continue; | ||
860 | } | ||
861 | } | ||
862 | |||
863 | np->stats.rx_packets++; | ||
864 | np->stats.rx_bytes += skb->len; | ||
865 | |||
866 | /* Pass it up. */ | ||
867 | netif_receive_skb(skb); | ||
868 | dev->last_rx = jiffies; | ||
869 | } | ||
870 | |||
871 | return packets_dropped; | ||
872 | } | ||
873 | |||
874 | static int xennet_poll(struct net_device *dev, int *pbudget) | ||
875 | { | ||
876 | struct netfront_info *np = netdev_priv(dev); | ||
877 | struct sk_buff *skb; | ||
878 | struct netfront_rx_info rinfo; | ||
879 | struct xen_netif_rx_response *rx = &rinfo.rx; | ||
880 | struct xen_netif_extra_info *extras = rinfo.extras; | ||
881 | RING_IDX i, rp; | ||
882 | int work_done, budget, more_to_do = 1; | ||
883 | struct sk_buff_head rxq; | ||
884 | struct sk_buff_head errq; | ||
885 | struct sk_buff_head tmpq; | ||
886 | unsigned long flags; | ||
887 | unsigned int len; | ||
888 | int err; | ||
889 | |||
890 | spin_lock(&np->rx_lock); | ||
891 | |||
892 | if (unlikely(!netif_carrier_ok(dev))) { | ||
893 | spin_unlock(&np->rx_lock); | ||
894 | return 0; | ||
895 | } | ||
896 | |||
897 | skb_queue_head_init(&rxq); | ||
898 | skb_queue_head_init(&errq); | ||
899 | skb_queue_head_init(&tmpq); | ||
900 | |||
901 | budget = *pbudget; | ||
902 | if (budget > dev->quota) | ||
903 | budget = dev->quota; | ||
904 | rp = np->rx.sring->rsp_prod; | ||
905 | rmb(); /* Ensure we see queued responses up to 'rp'. */ | ||
906 | |||
907 | i = np->rx.rsp_cons; | ||
908 | work_done = 0; | ||
909 | while ((i != rp) && (work_done < budget)) { | ||
910 | memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); | ||
911 | memset(extras, 0, sizeof(rinfo.extras)); | ||
912 | |||
913 | err = xennet_get_responses(np, &rinfo, rp, &tmpq); | ||
914 | |||
915 | if (unlikely(err)) { | ||
916 | err: | ||
917 | while ((skb = __skb_dequeue(&tmpq))) | ||
918 | __skb_queue_tail(&errq, skb); | ||
919 | np->stats.rx_errors++; | ||
920 | i = np->rx.rsp_cons; | ||
921 | continue; | ||
922 | } | ||
923 | |||
924 | skb = __skb_dequeue(&tmpq); | ||
925 | |||
926 | if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { | ||
927 | struct xen_netif_extra_info *gso; | ||
928 | gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; | ||
929 | |||
930 | if (unlikely(xennet_set_skb_gso(skb, gso))) { | ||
931 | __skb_queue_head(&tmpq, skb); | ||
932 | np->rx.rsp_cons += skb_queue_len(&tmpq); | ||
933 | goto err; | ||
934 | } | ||
935 | } | ||
936 | |||
937 | NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page; | ||
938 | NETFRONT_SKB_CB(skb)->offset = rx->offset; | ||
939 | |||
940 | len = rx->status; | ||
941 | if (len > RX_COPY_THRESHOLD) | ||
942 | len = RX_COPY_THRESHOLD; | ||
943 | skb_put(skb, len); | ||
944 | |||
945 | if (rx->status > len) { | ||
946 | skb_shinfo(skb)->frags[0].page_offset = | ||
947 | rx->offset + len; | ||
948 | skb_shinfo(skb)->frags[0].size = rx->status - len; | ||
949 | skb->data_len = rx->status - len; | ||
950 | } else { | ||
951 | skb_shinfo(skb)->frags[0].page = NULL; | ||
952 | skb_shinfo(skb)->nr_frags = 0; | ||
953 | } | ||
954 | |||
955 | i = xennet_fill_frags(np, skb, &tmpq); | ||
956 | |||
957 | /* | ||
958 | * Truesize approximates the size of true data plus | ||
959 | * any supervisor overheads. Adding hypervisor | ||
960 | * overheads has been shown to significantly reduce | ||
961 | * achievable bandwidth with the default receive | ||
962 | * buffer size. It is therefore not wise to account | ||
963 | * for it here. | ||
964 | * | ||
965 | * After alloc_skb(RX_COPY_THRESHOLD), truesize is set | ||
966 | * to RX_COPY_THRESHOLD + the supervisor | ||
967 | * overheads. Here, we add the size of the data pulled | ||
968 | * in xennet_fill_frags(). | ||
969 | * | ||
970 | * We also adjust for any unused space in the main | ||
971 | * data area by subtracting (RX_COPY_THRESHOLD - | ||
972 | * len). This is especially important with drivers | ||
973 | * which split incoming packets into header and data, | ||
974 | * using only 66 bytes of the main data area (see the | ||
975 | * e1000 driver for example.) On such systems, | ||
976 | * without this last adjustement, our achievable | ||
977 | * receive throughout using the standard receive | ||
978 | * buffer size was cut by 25%(!!!). | ||
979 | */ | ||
980 | skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len); | ||
981 | skb->len += skb->data_len; | ||
982 | |||
983 | if (rx->flags & NETRXF_csum_blank) | ||
984 | skb->ip_summed = CHECKSUM_PARTIAL; | ||
985 | else if (rx->flags & NETRXF_data_validated) | ||
986 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
987 | |||
988 | __skb_queue_tail(&rxq, skb); | ||
989 | |||
990 | np->rx.rsp_cons = ++i; | ||
991 | work_done++; | ||
992 | } | ||
993 | |||
994 | while ((skb = __skb_dequeue(&errq))) | ||
995 | kfree_skb(skb); | ||
996 | |||
997 | work_done -= handle_incoming_queue(dev, &rxq); | ||
998 | |||
999 | /* If we get a callback with very few responses, reduce fill target. */ | ||
1000 | /* NB. Note exponential increase, linear decrease. */ | ||
1001 | if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > | ||
1002 | ((3*np->rx_target) / 4)) && | ||
1003 | (--np->rx_target < np->rx_min_target)) | ||
1004 | np->rx_target = np->rx_min_target; | ||
1005 | |||
1006 | xennet_alloc_rx_buffers(dev); | ||
1007 | |||
1008 | *pbudget -= work_done; | ||
1009 | dev->quota -= work_done; | ||
1010 | |||
1011 | if (work_done < budget) { | ||
1012 | local_irq_save(flags); | ||
1013 | |||
1014 | RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do); | ||
1015 | if (!more_to_do) | ||
1016 | __netif_rx_complete(dev); | ||
1017 | |||
1018 | local_irq_restore(flags); | ||
1019 | } | ||
1020 | |||
1021 | spin_unlock(&np->rx_lock); | ||
1022 | |||
1023 | return more_to_do; | ||
1024 | } | ||
1025 | |||
1026 | static int xennet_change_mtu(struct net_device *dev, int mtu) | ||
1027 | { | ||
1028 | int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; | ||
1029 | |||
1030 | if (mtu > max) | ||
1031 | return -EINVAL; | ||
1032 | dev->mtu = mtu; | ||
1033 | return 0; | ||
1034 | } | ||
1035 | |||
1036 | static void xennet_release_tx_bufs(struct netfront_info *np) | ||
1037 | { | ||
1038 | struct sk_buff *skb; | ||
1039 | int i; | ||
1040 | |||
1041 | for (i = 0; i < NET_TX_RING_SIZE; i++) { | ||
1042 | /* Skip over entries which are actually freelist references */ | ||
1043 | if ((unsigned long)np->tx_skbs[i].skb < PAGE_OFFSET) | ||
1044 | continue; | ||
1045 | |||
1046 | skb = np->tx_skbs[i].skb; | ||
1047 | gnttab_end_foreign_access_ref(np->grant_tx_ref[i], | ||
1048 | GNTMAP_readonly); | ||
1049 | gnttab_release_grant_reference(&np->gref_tx_head, | ||
1050 | np->grant_tx_ref[i]); | ||
1051 | np->grant_tx_ref[i] = GRANT_INVALID_REF; | ||
1052 | add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i); | ||
1053 | dev_kfree_skb_irq(skb); | ||
1054 | } | ||
1055 | } | ||
1056 | |||
1057 | static void xennet_release_rx_bufs(struct netfront_info *np) | ||
1058 | { | ||
1059 | struct mmu_update *mmu = np->rx_mmu; | ||
1060 | struct multicall_entry *mcl = np->rx_mcl; | ||
1061 | struct sk_buff_head free_list; | ||
1062 | struct sk_buff *skb; | ||
1063 | unsigned long mfn; | ||
1064 | int xfer = 0, noxfer = 0, unused = 0; | ||
1065 | int id, ref; | ||
1066 | |||
1067 | dev_warn(&np->netdev->dev, "%s: fix me for copying receiver.\n", | ||
1068 | __func__); | ||
1069 | return; | ||
1070 | |||
1071 | skb_queue_head_init(&free_list); | ||
1072 | |||
1073 | spin_lock_bh(&np->rx_lock); | ||
1074 | |||
1075 | for (id = 0; id < NET_RX_RING_SIZE; id++) { | ||
1076 | ref = np->grant_rx_ref[id]; | ||
1077 | if (ref == GRANT_INVALID_REF) { | ||
1078 | unused++; | ||
1079 | continue; | ||
1080 | } | ||
1081 | |||
1082 | skb = np->rx_skbs[id]; | ||
1083 | mfn = gnttab_end_foreign_transfer_ref(ref); | ||
1084 | gnttab_release_grant_reference(&np->gref_rx_head, ref); | ||
1085 | np->grant_rx_ref[id] = GRANT_INVALID_REF; | ||
1086 | |||
1087 | if (0 == mfn) { | ||
1088 | skb_shinfo(skb)->nr_frags = 0; | ||
1089 | dev_kfree_skb(skb); | ||
1090 | noxfer++; | ||
1091 | continue; | ||
1092 | } | ||
1093 | |||
1094 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | ||
1095 | /* Remap the page. */ | ||
1096 | struct page *page = skb_shinfo(skb)->frags[0].page; | ||
1097 | unsigned long pfn = page_to_pfn(page); | ||
1098 | void *vaddr = page_address(page); | ||
1099 | |||
1100 | MULTI_update_va_mapping(mcl, (unsigned long)vaddr, | ||
1101 | mfn_pte(mfn, PAGE_KERNEL), | ||
1102 | 0); | ||
1103 | mcl++; | ||
1104 | mmu->ptr = ((u64)mfn << PAGE_SHIFT) | ||
1105 | | MMU_MACHPHYS_UPDATE; | ||
1106 | mmu->val = pfn; | ||
1107 | mmu++; | ||
1108 | |||
1109 | set_phys_to_machine(pfn, mfn); | ||
1110 | } | ||
1111 | __skb_queue_tail(&free_list, skb); | ||
1112 | xfer++; | ||
1113 | } | ||
1114 | |||
1115 | dev_info(&np->netdev->dev, "%s: %d xfer, %d noxfer, %d unused\n", | ||
1116 | __func__, xfer, noxfer, unused); | ||
1117 | |||
1118 | if (xfer) { | ||
1119 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | ||
1120 | /* Do all the remapping work and M2P updates. */ | ||
1121 | MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu, | ||
1122 | 0, DOMID_SELF); | ||
1123 | mcl++; | ||
1124 | HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl); | ||
1125 | } | ||
1126 | } | ||
1127 | |||
1128 | while ((skb = __skb_dequeue(&free_list)) != NULL) | ||
1129 | dev_kfree_skb(skb); | ||
1130 | |||
1131 | spin_unlock_bh(&np->rx_lock); | ||
1132 | } | ||
1133 | |||
1134 | static void xennet_uninit(struct net_device *dev) | ||
1135 | { | ||
1136 | struct netfront_info *np = netdev_priv(dev); | ||
1137 | xennet_release_tx_bufs(np); | ||
1138 | xennet_release_rx_bufs(np); | ||
1139 | gnttab_free_grant_references(np->gref_tx_head); | ||
1140 | gnttab_free_grant_references(np->gref_rx_head); | ||
1141 | } | ||
1142 | |||
1143 | static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev) | ||
1144 | { | ||
1145 | int i, err; | ||
1146 | struct net_device *netdev; | ||
1147 | struct netfront_info *np; | ||
1148 | |||
1149 | netdev = alloc_etherdev(sizeof(struct netfront_info)); | ||
1150 | if (!netdev) { | ||
1151 | printk(KERN_WARNING "%s> alloc_etherdev failed.\n", | ||
1152 | __func__); | ||
1153 | return ERR_PTR(-ENOMEM); | ||
1154 | } | ||
1155 | |||
1156 | np = netdev_priv(netdev); | ||
1157 | np->xbdev = dev; | ||
1158 | |||
1159 | spin_lock_init(&np->tx_lock); | ||
1160 | spin_lock_init(&np->rx_lock); | ||
1161 | |||
1162 | skb_queue_head_init(&np->rx_batch); | ||
1163 | np->rx_target = RX_DFL_MIN_TARGET; | ||
1164 | np->rx_min_target = RX_DFL_MIN_TARGET; | ||
1165 | np->rx_max_target = RX_MAX_TARGET; | ||
1166 | |||
1167 | init_timer(&np->rx_refill_timer); | ||
1168 | np->rx_refill_timer.data = (unsigned long)netdev; | ||
1169 | np->rx_refill_timer.function = rx_refill_timeout; | ||
1170 | |||
1171 | /* Initialise tx_skbs as a free chain containing every entry. */ | ||
1172 | np->tx_skb_freelist = 0; | ||
1173 | for (i = 0; i < NET_TX_RING_SIZE; i++) { | ||
1174 | np->tx_skbs[i].link = i+1; | ||
1175 | np->grant_tx_ref[i] = GRANT_INVALID_REF; | ||
1176 | } | ||
1177 | |||
1178 | /* Clear out rx_skbs */ | ||
1179 | for (i = 0; i < NET_RX_RING_SIZE; i++) { | ||
1180 | np->rx_skbs[i] = NULL; | ||
1181 | np->grant_rx_ref[i] = GRANT_INVALID_REF; | ||
1182 | } | ||
1183 | |||
1184 | /* A grant for every tx ring slot */ | ||
1185 | if (gnttab_alloc_grant_references(TX_MAX_TARGET, | ||
1186 | &np->gref_tx_head) < 0) { | ||
1187 | printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n"); | ||
1188 | err = -ENOMEM; | ||
1189 | goto exit; | ||
1190 | } | ||
1191 | /* A grant for every rx ring slot */ | ||
1192 | if (gnttab_alloc_grant_references(RX_MAX_TARGET, | ||
1193 | &np->gref_rx_head) < 0) { | ||
1194 | printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n"); | ||
1195 | err = -ENOMEM; | ||
1196 | goto exit_free_tx; | ||
1197 | } | ||
1198 | |||
1199 | netdev->open = xennet_open; | ||
1200 | netdev->hard_start_xmit = xennet_start_xmit; | ||
1201 | netdev->stop = xennet_close; | ||
1202 | netdev->get_stats = xennet_get_stats; | ||
1203 | netdev->poll = xennet_poll; | ||
1204 | netdev->uninit = xennet_uninit; | ||
1205 | netdev->change_mtu = xennet_change_mtu; | ||
1206 | netdev->weight = 64; | ||
1207 | netdev->features = NETIF_F_IP_CSUM; | ||
1208 | |||
1209 | SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops); | ||
1210 | SET_MODULE_OWNER(netdev); | ||
1211 | SET_NETDEV_DEV(netdev, &dev->dev); | ||
1212 | |||
1213 | np->netdev = netdev; | ||
1214 | |||
1215 | netif_carrier_off(netdev); | ||
1216 | |||
1217 | return netdev; | ||
1218 | |||
1219 | exit_free_tx: | ||
1220 | gnttab_free_grant_references(np->gref_tx_head); | ||
1221 | exit: | ||
1222 | free_netdev(netdev); | ||
1223 | return ERR_PTR(err); | ||
1224 | } | ||
1225 | |||
1226 | /** | ||
1227 | * Entry point to this code when a new device is created. Allocate the basic | ||
1228 | * structures and the ring buffers for communication with the backend, and | ||
1229 | * inform the backend of the appropriate details for those. | ||
1230 | */ | ||
1231 | static int __devinit netfront_probe(struct xenbus_device *dev, | ||
1232 | const struct xenbus_device_id *id) | ||
1233 | { | ||
1234 | int err; | ||
1235 | struct net_device *netdev; | ||
1236 | struct netfront_info *info; | ||
1237 | |||
1238 | netdev = xennet_create_dev(dev); | ||
1239 | if (IS_ERR(netdev)) { | ||
1240 | err = PTR_ERR(netdev); | ||
1241 | xenbus_dev_fatal(dev, err, "creating netdev"); | ||
1242 | return err; | ||
1243 | } | ||
1244 | |||
1245 | info = netdev_priv(netdev); | ||
1246 | dev->dev.driver_data = info; | ||
1247 | |||
1248 | err = register_netdev(info->netdev); | ||
1249 | if (err) { | ||
1250 | printk(KERN_WARNING "%s: register_netdev err=%d\n", | ||
1251 | __func__, err); | ||
1252 | goto fail; | ||
1253 | } | ||
1254 | |||
1255 | err = xennet_sysfs_addif(info->netdev); | ||
1256 | if (err) { | ||
1257 | unregister_netdev(info->netdev); | ||
1258 | printk(KERN_WARNING "%s: add sysfs failed err=%d\n", | ||
1259 | __func__, err); | ||
1260 | goto fail; | ||
1261 | } | ||
1262 | |||
1263 | return 0; | ||
1264 | |||
1265 | fail: | ||
1266 | free_netdev(netdev); | ||
1267 | dev->dev.driver_data = NULL; | ||
1268 | return err; | ||
1269 | } | ||
1270 | |||
1271 | static void xennet_end_access(int ref, void *page) | ||
1272 | { | ||
1273 | /* This frees the page as a side-effect */ | ||
1274 | if (ref != GRANT_INVALID_REF) | ||
1275 | gnttab_end_foreign_access(ref, 0, (unsigned long)page); | ||
1276 | } | ||
1277 | |||
1278 | static void xennet_disconnect_backend(struct netfront_info *info) | ||
1279 | { | ||
1280 | /* Stop old i/f to prevent errors whilst we rebuild the state. */ | ||
1281 | spin_lock_bh(&info->rx_lock); | ||
1282 | spin_lock_irq(&info->tx_lock); | ||
1283 | netif_carrier_off(info->netdev); | ||
1284 | spin_unlock_irq(&info->tx_lock); | ||
1285 | spin_unlock_bh(&info->rx_lock); | ||
1286 | |||
1287 | if (info->netdev->irq) | ||
1288 | unbind_from_irqhandler(info->netdev->irq, info->netdev); | ||
1289 | info->evtchn = info->netdev->irq = 0; | ||
1290 | |||
1291 | /* End access and free the pages */ | ||
1292 | xennet_end_access(info->tx_ring_ref, info->tx.sring); | ||
1293 | xennet_end_access(info->rx_ring_ref, info->rx.sring); | ||
1294 | |||
1295 | info->tx_ring_ref = GRANT_INVALID_REF; | ||
1296 | info->rx_ring_ref = GRANT_INVALID_REF; | ||
1297 | info->tx.sring = NULL; | ||
1298 | info->rx.sring = NULL; | ||
1299 | } | ||
1300 | |||
1301 | /** | ||
1302 | * We are reconnecting to the backend, due to a suspend/resume, or a backend | ||
1303 | * driver restart. We tear down our netif structure and recreate it, but | ||
1304 | * leave the device-layer structures intact so that this is transparent to the | ||
1305 | * rest of the kernel. | ||
1306 | */ | ||
1307 | static int netfront_resume(struct xenbus_device *dev) | ||
1308 | { | ||
1309 | struct netfront_info *info = dev->dev.driver_data; | ||
1310 | |||
1311 | dev_dbg(&dev->dev, "%s\n", dev->nodename); | ||
1312 | |||
1313 | xennet_disconnect_backend(info); | ||
1314 | return 0; | ||
1315 | } | ||
1316 | |||
1317 | static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) | ||
1318 | { | ||
1319 | char *s, *e, *macstr; | ||
1320 | int i; | ||
1321 | |||
1322 | macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL); | ||
1323 | if (IS_ERR(macstr)) | ||
1324 | return PTR_ERR(macstr); | ||
1325 | |||
1326 | for (i = 0; i < ETH_ALEN; i++) { | ||
1327 | mac[i] = simple_strtoul(s, &e, 16); | ||
1328 | if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) { | ||
1329 | kfree(macstr); | ||
1330 | return -ENOENT; | ||
1331 | } | ||
1332 | s = e+1; | ||
1333 | } | ||
1334 | |||
1335 | kfree(macstr); | ||
1336 | return 0; | ||
1337 | } | ||
1338 | |||
1339 | static irqreturn_t xennet_interrupt(int irq, void *dev_id) | ||
1340 | { | ||
1341 | struct net_device *dev = dev_id; | ||
1342 | struct netfront_info *np = netdev_priv(dev); | ||
1343 | unsigned long flags; | ||
1344 | |||
1345 | spin_lock_irqsave(&np->tx_lock, flags); | ||
1346 | |||
1347 | if (likely(netif_carrier_ok(dev))) { | ||
1348 | xennet_tx_buf_gc(dev); | ||
1349 | /* Under tx_lock: protects access to rx shared-ring indexes. */ | ||
1350 | if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) | ||
1351 | netif_rx_schedule(dev); | ||
1352 | } | ||
1353 | |||
1354 | spin_unlock_irqrestore(&np->tx_lock, flags); | ||
1355 | |||
1356 | return IRQ_HANDLED; | ||
1357 | } | ||
1358 | |||
1359 | static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) | ||
1360 | { | ||
1361 | struct xen_netif_tx_sring *txs; | ||
1362 | struct xen_netif_rx_sring *rxs; | ||
1363 | int err; | ||
1364 | struct net_device *netdev = info->netdev; | ||
1365 | |||
1366 | info->tx_ring_ref = GRANT_INVALID_REF; | ||
1367 | info->rx_ring_ref = GRANT_INVALID_REF; | ||
1368 | info->rx.sring = NULL; | ||
1369 | info->tx.sring = NULL; | ||
1370 | netdev->irq = 0; | ||
1371 | |||
1372 | err = xen_net_read_mac(dev, netdev->dev_addr); | ||
1373 | if (err) { | ||
1374 | xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); | ||
1375 | goto fail; | ||
1376 | } | ||
1377 | |||
1378 | txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_KERNEL); | ||
1379 | if (!txs) { | ||
1380 | err = -ENOMEM; | ||
1381 | xenbus_dev_fatal(dev, err, "allocating tx ring page"); | ||
1382 | goto fail; | ||
1383 | } | ||
1384 | SHARED_RING_INIT(txs); | ||
1385 | FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); | ||
1386 | |||
1387 | err = xenbus_grant_ring(dev, virt_to_mfn(txs)); | ||
1388 | if (err < 0) { | ||
1389 | free_page((unsigned long)txs); | ||
1390 | goto fail; | ||
1391 | } | ||
1392 | |||
1393 | info->tx_ring_ref = err; | ||
1394 | rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_KERNEL); | ||
1395 | if (!rxs) { | ||
1396 | err = -ENOMEM; | ||
1397 | xenbus_dev_fatal(dev, err, "allocating rx ring page"); | ||
1398 | goto fail; | ||
1399 | } | ||
1400 | SHARED_RING_INIT(rxs); | ||
1401 | FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); | ||
1402 | |||
1403 | err = xenbus_grant_ring(dev, virt_to_mfn(rxs)); | ||
1404 | if (err < 0) { | ||
1405 | free_page((unsigned long)rxs); | ||
1406 | goto fail; | ||
1407 | } | ||
1408 | info->rx_ring_ref = err; | ||
1409 | |||
1410 | err = xenbus_alloc_evtchn(dev, &info->evtchn); | ||
1411 | if (err) | ||
1412 | goto fail; | ||
1413 | |||
1414 | err = bind_evtchn_to_irqhandler(info->evtchn, xennet_interrupt, | ||
1415 | IRQF_SAMPLE_RANDOM, netdev->name, | ||
1416 | netdev); | ||
1417 | if (err < 0) | ||
1418 | goto fail; | ||
1419 | netdev->irq = err; | ||
1420 | return 0; | ||
1421 | |||
1422 | fail: | ||
1423 | return err; | ||
1424 | } | ||
1425 | |||
1426 | /* Common code used when first setting up, and when resuming. */ | ||
1427 | static int talk_to_backend(struct xenbus_device *dev, | ||
1428 | struct netfront_info *info) | ||
1429 | { | ||
1430 | const char *message; | ||
1431 | struct xenbus_transaction xbt; | ||
1432 | int err; | ||
1433 | |||
1434 | /* Create shared ring, alloc event channel. */ | ||
1435 | err = setup_netfront(dev, info); | ||
1436 | if (err) | ||
1437 | goto out; | ||
1438 | |||
1439 | again: | ||
1440 | err = xenbus_transaction_start(&xbt); | ||
1441 | if (err) { | ||
1442 | xenbus_dev_fatal(dev, err, "starting transaction"); | ||
1443 | goto destroy_ring; | ||
1444 | } | ||
1445 | |||
1446 | err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref", "%u", | ||
1447 | info->tx_ring_ref); | ||
1448 | if (err) { | ||
1449 | message = "writing tx ring-ref"; | ||
1450 | goto abort_transaction; | ||
1451 | } | ||
1452 | err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref", "%u", | ||
1453 | info->rx_ring_ref); | ||
1454 | if (err) { | ||
1455 | message = "writing rx ring-ref"; | ||
1456 | goto abort_transaction; | ||
1457 | } | ||
1458 | err = xenbus_printf(xbt, dev->nodename, | ||
1459 | "event-channel", "%u", info->evtchn); | ||
1460 | if (err) { | ||
1461 | message = "writing event-channel"; | ||
1462 | goto abort_transaction; | ||
1463 | } | ||
1464 | |||
1465 | err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u", | ||
1466 | 1); | ||
1467 | if (err) { | ||
1468 | message = "writing request-rx-copy"; | ||
1469 | goto abort_transaction; | ||
1470 | } | ||
1471 | |||
1472 | err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1); | ||
1473 | if (err) { | ||
1474 | message = "writing feature-rx-notify"; | ||
1475 | goto abort_transaction; | ||
1476 | } | ||
1477 | |||
1478 | err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1); | ||
1479 | if (err) { | ||
1480 | message = "writing feature-sg"; | ||
1481 | goto abort_transaction; | ||
1482 | } | ||
1483 | |||
1484 | err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1); | ||
1485 | if (err) { | ||
1486 | message = "writing feature-gso-tcpv4"; | ||
1487 | goto abort_transaction; | ||
1488 | } | ||
1489 | |||
1490 | err = xenbus_transaction_end(xbt, 0); | ||
1491 | if (err) { | ||
1492 | if (err == -EAGAIN) | ||
1493 | goto again; | ||
1494 | xenbus_dev_fatal(dev, err, "completing transaction"); | ||
1495 | goto destroy_ring; | ||
1496 | } | ||
1497 | |||
1498 | return 0; | ||
1499 | |||
1500 | abort_transaction: | ||
1501 | xenbus_transaction_end(xbt, 1); | ||
1502 | xenbus_dev_fatal(dev, err, "%s", message); | ||
1503 | destroy_ring: | ||
1504 | xennet_disconnect_backend(info); | ||
1505 | out: | ||
1506 | return err; | ||
1507 | } | ||
1508 | |||
1509 | static int xennet_set_sg(struct net_device *dev, u32 data) | ||
1510 | { | ||
1511 | if (data) { | ||
1512 | struct netfront_info *np = netdev_priv(dev); | ||
1513 | int val; | ||
1514 | |||
1515 | if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg", | ||
1516 | "%d", &val) < 0) | ||
1517 | val = 0; | ||
1518 | if (!val) | ||
1519 | return -ENOSYS; | ||
1520 | } else if (dev->mtu > ETH_DATA_LEN) | ||
1521 | dev->mtu = ETH_DATA_LEN; | ||
1522 | |||
1523 | return ethtool_op_set_sg(dev, data); | ||
1524 | } | ||
1525 | |||
1526 | static int xennet_set_tso(struct net_device *dev, u32 data) | ||
1527 | { | ||
1528 | if (data) { | ||
1529 | struct netfront_info *np = netdev_priv(dev); | ||
1530 | int val; | ||
1531 | |||
1532 | if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, | ||
1533 | "feature-gso-tcpv4", "%d", &val) < 0) | ||
1534 | val = 0; | ||
1535 | if (!val) | ||
1536 | return -ENOSYS; | ||
1537 | } | ||
1538 | |||
1539 | return ethtool_op_set_tso(dev, data); | ||
1540 | } | ||
1541 | |||
1542 | static void xennet_set_features(struct net_device *dev) | ||
1543 | { | ||
1544 | /* Turn off all GSO bits except ROBUST. */ | ||
1545 | dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1; | ||
1546 | dev->features |= NETIF_F_GSO_ROBUST; | ||
1547 | xennet_set_sg(dev, 0); | ||
1548 | |||
1549 | /* We need checksum offload to enable scatter/gather and TSO. */ | ||
1550 | if (!(dev->features & NETIF_F_IP_CSUM)) | ||
1551 | return; | ||
1552 | |||
1553 | if (!xennet_set_sg(dev, 1)) | ||
1554 | xennet_set_tso(dev, 1); | ||
1555 | } | ||
1556 | |||
1557 | static int xennet_connect(struct net_device *dev) | ||
1558 | { | ||
1559 | struct netfront_info *np = netdev_priv(dev); | ||
1560 | int i, requeue_idx, err; | ||
1561 | struct sk_buff *skb; | ||
1562 | grant_ref_t ref; | ||
1563 | struct xen_netif_rx_request *req; | ||
1564 | unsigned int feature_rx_copy; | ||
1565 | |||
1566 | err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, | ||
1567 | "feature-rx-copy", "%u", &feature_rx_copy); | ||
1568 | if (err != 1) | ||
1569 | feature_rx_copy = 0; | ||
1570 | |||
1571 | if (!feature_rx_copy) { | ||
1572 | dev_info(&dev->dev, | ||
1573 | "backend does not support copying recieve path"); | ||
1574 | return -ENODEV; | ||
1575 | } | ||
1576 | |||
1577 | err = talk_to_backend(np->xbdev, np); | ||
1578 | if (err) | ||
1579 | return err; | ||
1580 | |||
1581 | xennet_set_features(dev); | ||
1582 | |||
1583 | spin_lock_bh(&np->rx_lock); | ||
1584 | spin_lock_irq(&np->tx_lock); | ||
1585 | |||
1586 | /* Step 1: Discard all pending TX packet fragments. */ | ||
1587 | xennet_release_tx_bufs(np); | ||
1588 | |||
1589 | /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ | ||
1590 | for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { | ||
1591 | if (!np->rx_skbs[i]) | ||
1592 | continue; | ||
1593 | |||
1594 | skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i); | ||
1595 | ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i); | ||
1596 | req = RING_GET_REQUEST(&np->rx, requeue_idx); | ||
1597 | |||
1598 | gnttab_grant_foreign_access_ref( | ||
1599 | ref, np->xbdev->otherend_id, | ||
1600 | pfn_to_mfn(page_to_pfn(skb_shinfo(skb)-> | ||
1601 | frags->page)), | ||
1602 | 0); | ||
1603 | req->gref = ref; | ||
1604 | req->id = requeue_idx; | ||
1605 | |||
1606 | requeue_idx++; | ||
1607 | } | ||
1608 | |||
1609 | np->rx.req_prod_pvt = requeue_idx; | ||
1610 | |||
1611 | /* | ||
1612 | * Step 3: All public and private state should now be sane. Get | ||
1613 | * ready to start sending and receiving packets and give the driver | ||
1614 | * domain a kick because we've probably just requeued some | ||
1615 | * packets. | ||
1616 | */ | ||
1617 | netif_carrier_on(np->netdev); | ||
1618 | notify_remote_via_irq(np->netdev->irq); | ||
1619 | xennet_tx_buf_gc(dev); | ||
1620 | xennet_alloc_rx_buffers(dev); | ||
1621 | |||
1622 | spin_unlock_irq(&np->tx_lock); | ||
1623 | spin_unlock_bh(&np->rx_lock); | ||
1624 | |||
1625 | return 0; | ||
1626 | } | ||
1627 | |||
1628 | /** | ||
1629 | * Callback received when the backend's state changes. | ||
1630 | */ | ||
1631 | static void backend_changed(struct xenbus_device *dev, | ||
1632 | enum xenbus_state backend_state) | ||
1633 | { | ||
1634 | struct netfront_info *np = dev->dev.driver_data; | ||
1635 | struct net_device *netdev = np->netdev; | ||
1636 | |||
1637 | dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state)); | ||
1638 | |||
1639 | switch (backend_state) { | ||
1640 | case XenbusStateInitialising: | ||
1641 | case XenbusStateInitialised: | ||
1642 | case XenbusStateConnected: | ||
1643 | case XenbusStateUnknown: | ||
1644 | case XenbusStateClosed: | ||
1645 | break; | ||
1646 | |||
1647 | case XenbusStateInitWait: | ||
1648 | if (dev->state != XenbusStateInitialising) | ||
1649 | break; | ||
1650 | if (xennet_connect(netdev) != 0) | ||
1651 | break; | ||
1652 | xenbus_switch_state(dev, XenbusStateConnected); | ||
1653 | break; | ||
1654 | |||
1655 | case XenbusStateClosing: | ||
1656 | xenbus_frontend_closed(dev); | ||
1657 | break; | ||
1658 | } | ||
1659 | } | ||
1660 | |||
1661 | static struct ethtool_ops xennet_ethtool_ops = | ||
1662 | { | ||
1663 | .get_tx_csum = ethtool_op_get_tx_csum, | ||
1664 | .set_tx_csum = ethtool_op_set_tx_csum, | ||
1665 | .get_sg = ethtool_op_get_sg, | ||
1666 | .set_sg = xennet_set_sg, | ||
1667 | .get_tso = ethtool_op_get_tso, | ||
1668 | .set_tso = xennet_set_tso, | ||
1669 | .get_link = ethtool_op_get_link, | ||
1670 | }; | ||
1671 | |||
1672 | #ifdef CONFIG_SYSFS | ||
1673 | static ssize_t show_rxbuf_min(struct device *dev, | ||
1674 | struct device_attribute *attr, char *buf) | ||
1675 | { | ||
1676 | struct net_device *netdev = to_net_dev(dev); | ||
1677 | struct netfront_info *info = netdev_priv(netdev); | ||
1678 | |||
1679 | return sprintf(buf, "%u\n", info->rx_min_target); | ||
1680 | } | ||
1681 | |||
1682 | static ssize_t store_rxbuf_min(struct device *dev, | ||
1683 | struct device_attribute *attr, | ||
1684 | const char *buf, size_t len) | ||
1685 | { | ||
1686 | struct net_device *netdev = to_net_dev(dev); | ||
1687 | struct netfront_info *np = netdev_priv(netdev); | ||
1688 | char *endp; | ||
1689 | unsigned long target; | ||
1690 | |||
1691 | if (!capable(CAP_NET_ADMIN)) | ||
1692 | return -EPERM; | ||
1693 | |||
1694 | target = simple_strtoul(buf, &endp, 0); | ||
1695 | if (endp == buf) | ||
1696 | return -EBADMSG; | ||
1697 | |||
1698 | if (target < RX_MIN_TARGET) | ||
1699 | target = RX_MIN_TARGET; | ||
1700 | if (target > RX_MAX_TARGET) | ||
1701 | target = RX_MAX_TARGET; | ||
1702 | |||
1703 | spin_lock_bh(&np->rx_lock); | ||
1704 | if (target > np->rx_max_target) | ||
1705 | np->rx_max_target = target; | ||
1706 | np->rx_min_target = target; | ||
1707 | if (target > np->rx_target) | ||
1708 | np->rx_target = target; | ||
1709 | |||
1710 | xennet_alloc_rx_buffers(netdev); | ||
1711 | |||
1712 | spin_unlock_bh(&np->rx_lock); | ||
1713 | return len; | ||
1714 | } | ||
1715 | |||
1716 | static ssize_t show_rxbuf_max(struct device *dev, | ||
1717 | struct device_attribute *attr, char *buf) | ||
1718 | { | ||
1719 | struct net_device *netdev = to_net_dev(dev); | ||
1720 | struct netfront_info *info = netdev_priv(netdev); | ||
1721 | |||
1722 | return sprintf(buf, "%u\n", info->rx_max_target); | ||
1723 | } | ||
1724 | |||
1725 | static ssize_t store_rxbuf_max(struct device *dev, | ||
1726 | struct device_attribute *attr, | ||
1727 | const char *buf, size_t len) | ||
1728 | { | ||
1729 | struct net_device *netdev = to_net_dev(dev); | ||
1730 | struct netfront_info *np = netdev_priv(netdev); | ||
1731 | char *endp; | ||
1732 | unsigned long target; | ||
1733 | |||
1734 | if (!capable(CAP_NET_ADMIN)) | ||
1735 | return -EPERM; | ||
1736 | |||
1737 | target = simple_strtoul(buf, &endp, 0); | ||
1738 | if (endp == buf) | ||
1739 | return -EBADMSG; | ||
1740 | |||
1741 | if (target < RX_MIN_TARGET) | ||
1742 | target = RX_MIN_TARGET; | ||
1743 | if (target > RX_MAX_TARGET) | ||
1744 | target = RX_MAX_TARGET; | ||
1745 | |||
1746 | spin_lock_bh(&np->rx_lock); | ||
1747 | if (target < np->rx_min_target) | ||
1748 | np->rx_min_target = target; | ||
1749 | np->rx_max_target = target; | ||
1750 | if (target < np->rx_target) | ||
1751 | np->rx_target = target; | ||
1752 | |||
1753 | xennet_alloc_rx_buffers(netdev); | ||
1754 | |||
1755 | spin_unlock_bh(&np->rx_lock); | ||
1756 | return len; | ||
1757 | } | ||
1758 | |||
1759 | static ssize_t show_rxbuf_cur(struct device *dev, | ||
1760 | struct device_attribute *attr, char *buf) | ||
1761 | { | ||
1762 | struct net_device *netdev = to_net_dev(dev); | ||
1763 | struct netfront_info *info = netdev_priv(netdev); | ||
1764 | |||
1765 | return sprintf(buf, "%u\n", info->rx_target); | ||
1766 | } | ||
1767 | |||
1768 | static struct device_attribute xennet_attrs[] = { | ||
1769 | __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min), | ||
1770 | __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max), | ||
1771 | __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL), | ||
1772 | }; | ||
1773 | |||
1774 | static int xennet_sysfs_addif(struct net_device *netdev) | ||
1775 | { | ||
1776 | int i; | ||
1777 | int err; | ||
1778 | |||
1779 | for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) { | ||
1780 | err = device_create_file(&netdev->dev, | ||
1781 | &xennet_attrs[i]); | ||
1782 | if (err) | ||
1783 | goto fail; | ||
1784 | } | ||
1785 | return 0; | ||
1786 | |||
1787 | fail: | ||
1788 | while (--i >= 0) | ||
1789 | device_remove_file(&netdev->dev, &xennet_attrs[i]); | ||
1790 | return err; | ||
1791 | } | ||
1792 | |||
1793 | static void xennet_sysfs_delif(struct net_device *netdev) | ||
1794 | { | ||
1795 | int i; | ||
1796 | |||
1797 | for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) | ||
1798 | device_remove_file(&netdev->dev, &xennet_attrs[i]); | ||
1799 | } | ||
1800 | |||
1801 | #endif /* CONFIG_SYSFS */ | ||
1802 | |||
1803 | static struct xenbus_device_id netfront_ids[] = { | ||
1804 | { "vif" }, | ||
1805 | { "" } | ||
1806 | }; | ||
1807 | |||
1808 | |||
1809 | static int __devexit xennet_remove(struct xenbus_device *dev) | ||
1810 | { | ||
1811 | struct netfront_info *info = dev->dev.driver_data; | ||
1812 | |||
1813 | dev_dbg(&dev->dev, "%s\n", dev->nodename); | ||
1814 | |||
1815 | unregister_netdev(info->netdev); | ||
1816 | |||
1817 | xennet_disconnect_backend(info); | ||
1818 | |||
1819 | del_timer_sync(&info->rx_refill_timer); | ||
1820 | |||
1821 | xennet_sysfs_delif(info->netdev); | ||
1822 | |||
1823 | free_netdev(info->netdev); | ||
1824 | |||
1825 | return 0; | ||
1826 | } | ||
1827 | |||
1828 | static struct xenbus_driver netfront = { | ||
1829 | .name = "vif", | ||
1830 | .owner = THIS_MODULE, | ||
1831 | .ids = netfront_ids, | ||
1832 | .probe = netfront_probe, | ||
1833 | .remove = __devexit_p(xennet_remove), | ||
1834 | .resume = netfront_resume, | ||
1835 | .otherend_changed = backend_changed, | ||
1836 | }; | ||
1837 | |||
1838 | static int __init netif_init(void) | ||
1839 | { | ||
1840 | if (!is_running_on_xen()) | ||
1841 | return -ENODEV; | ||
1842 | |||
1843 | if (is_initial_xendomain()) | ||
1844 | return 0; | ||
1845 | |||
1846 | printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n"); | ||
1847 | |||
1848 | return xenbus_register_frontend(&netfront); | ||
1849 | } | ||
1850 | module_init(netif_init); | ||
1851 | |||
1852 | |||
1853 | static void __exit netif_exit(void) | ||
1854 | { | ||
1855 | if (is_initial_xendomain()) | ||
1856 | return; | ||
1857 | |||
1858 | return xenbus_unregister_driver(&netfront); | ||
1859 | } | ||
1860 | module_exit(netif_exit); | ||
1861 | |||
1862 | MODULE_DESCRIPTION("Xen virtual network device frontend"); | ||
1863 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c index 03baf1c64a2e..ed112ee16012 100644 --- a/drivers/pnp/pnpbios/core.c +++ b/drivers/pnp/pnpbios/core.c | |||
@@ -147,7 +147,7 @@ static int pnp_dock_event(int dock, struct pnp_docking_station_info *info) | |||
147 | info->location_id, info->serial, info->capabilities); | 147 | info->location_id, info->serial, info->capabilities); |
148 | envp[i] = NULL; | 148 | envp[i] = NULL; |
149 | 149 | ||
150 | value = call_usermodehelper (argv [0], argv, envp, 0); | 150 | value = call_usermodehelper (argv [0], argv, envp, UMH_WAIT_EXEC); |
151 | kfree (buf); | 151 | kfree (buf); |
152 | kfree (envp); | 152 | kfree (envp); |
153 | return 0; | 153 | return 0; |
diff --git a/drivers/sbus/char/bbc_envctrl.c b/drivers/sbus/char/bbc_envctrl.c index a54e4140683a..e821a155b658 100644 --- a/drivers/sbus/char/bbc_envctrl.c +++ b/drivers/sbus/char/bbc_envctrl.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/kthread.h> | 7 | #include <linux/kthread.h> |
8 | #include <linux/delay.h> | 8 | #include <linux/delay.h> |
9 | #include <linux/kmod.h> | 9 | #include <linux/kmod.h> |
10 | #include <linux/reboot.h> | ||
10 | #include <asm/oplib.h> | 11 | #include <asm/oplib.h> |
11 | #include <asm/ebus.h> | 12 | #include <asm/ebus.h> |
12 | 13 | ||
@@ -170,8 +171,6 @@ static void get_current_temps(struct bbc_cpu_temperature *tp) | |||
170 | static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp) | 171 | static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp) |
171 | { | 172 | { |
172 | static int shutting_down = 0; | 173 | static int shutting_down = 0; |
173 | static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; | ||
174 | char *argv[] = { "/sbin/shutdown", "-h", "now", NULL }; | ||
175 | char *type = "???"; | 174 | char *type = "???"; |
176 | s8 val = -1; | 175 | s8 val = -1; |
177 | 176 | ||
@@ -195,7 +194,7 @@ static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp) | |||
195 | printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n"); | 194 | printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n"); |
196 | 195 | ||
197 | shutting_down = 1; | 196 | shutting_down = 1; |
198 | if (call_usermodehelper("/sbin/shutdown", argv, envp, 0) < 0) | 197 | if (orderly_poweroff(true) < 0) |
199 | printk(KERN_CRIT "envctrl: shutdown execution failed\n"); | 198 | printk(KERN_CRIT "envctrl: shutdown execution failed\n"); |
200 | } | 199 | } |
201 | 200 | ||
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c index 8328acab47fd..dadabef116b6 100644 --- a/drivers/sbus/char/envctrl.c +++ b/drivers/sbus/char/envctrl.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/ioport.h> | 26 | #include <linux/ioport.h> |
27 | #include <linux/miscdevice.h> | 27 | #include <linux/miscdevice.h> |
28 | #include <linux/kmod.h> | 28 | #include <linux/kmod.h> |
29 | #include <linux/reboot.h> | ||
29 | 30 | ||
30 | #include <asm/ebus.h> | 31 | #include <asm/ebus.h> |
31 | #include <asm/uaccess.h> | 32 | #include <asm/uaccess.h> |
@@ -966,10 +967,6 @@ static struct i2c_child_t *envctrl_get_i2c_child(unsigned char mon_type) | |||
966 | static void envctrl_do_shutdown(void) | 967 | static void envctrl_do_shutdown(void) |
967 | { | 968 | { |
968 | static int inprog = 0; | 969 | static int inprog = 0; |
969 | static char *envp[] = { | ||
970 | "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; | ||
971 | char *argv[] = { | ||
972 | "/sbin/shutdown", "-h", "now", NULL }; | ||
973 | int ret; | 970 | int ret; |
974 | 971 | ||
975 | if (inprog != 0) | 972 | if (inprog != 0) |
@@ -977,7 +974,7 @@ static void envctrl_do_shutdown(void) | |||
977 | 974 | ||
978 | inprog = 1; | 975 | inprog = 1; |
979 | printk(KERN_CRIT "kenvctrld: WARNING: Shutting down the system now.\n"); | 976 | printk(KERN_CRIT "kenvctrld: WARNING: Shutting down the system now.\n"); |
980 | ret = call_usermodehelper("/sbin/shutdown", argv, envp, 0); | 977 | ret = orderly_poweroff(true); |
981 | if (ret < 0) { | 978 | if (ret < 0) { |
982 | printk(KERN_CRIT "kenvctrld: WARNING: system shutdown failed!\n"); | 979 | printk(KERN_CRIT "kenvctrld: WARNING: system shutdown failed!\n"); |
983 | inprog = 0; /* unlikely to succeed, but we could try again */ | 980 | inprog = 0; /* unlikely to succeed, but we could try again */ |
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile new file mode 100644 index 000000000000..56592f0d6cef --- /dev/null +++ b/drivers/xen/Makefile | |||
@@ -0,0 +1,2 @@ | |||
1 | obj-y += grant-table.o | ||
2 | obj-y += xenbus/ | ||
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c new file mode 100644 index 000000000000..ea94dbabf9a9 --- /dev/null +++ b/drivers/xen/grant-table.c | |||
@@ -0,0 +1,582 @@ | |||
1 | /****************************************************************************** | ||
2 | * grant_table.c | ||
3 | * | ||
4 | * Granting foreign access to our memory reservation. | ||
5 | * | ||
6 | * Copyright (c) 2005-2006, Christopher Clark | ||
7 | * Copyright (c) 2004-2005, K A Fraser | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License version 2 | ||
11 | * as published by the Free Software Foundation; or, when distributed | ||
12 | * separately from the Linux kernel or incorporated into other | ||
13 | * software packages, subject to the following license: | ||
14 | * | ||
15 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
16 | * of this source file (the "Software"), to deal in the Software without | ||
17 | * restriction, including without limitation the rights to use, copy, modify, | ||
18 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
19 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
20 | * the following conditions: | ||
21 | * | ||
22 | * The above copyright notice and this permission notice shall be included in | ||
23 | * all copies or substantial portions of the Software. | ||
24 | * | ||
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
26 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
27 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
28 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
29 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
31 | * IN THE SOFTWARE. | ||
32 | */ | ||
33 | |||
34 | #include <linux/module.h> | ||
35 | #include <linux/sched.h> | ||
36 | #include <linux/mm.h> | ||
37 | #include <linux/vmalloc.h> | ||
38 | #include <linux/uaccess.h> | ||
39 | |||
40 | #include <xen/interface/xen.h> | ||
41 | #include <xen/page.h> | ||
42 | #include <xen/grant_table.h> | ||
43 | |||
44 | #include <asm/pgtable.h> | ||
45 | #include <asm/sync_bitops.h> | ||
46 | |||
47 | |||
48 | /* External tools reserve first few grant table entries. */ | ||
49 | #define NR_RESERVED_ENTRIES 8 | ||
50 | #define GNTTAB_LIST_END 0xffffffff | ||
51 | #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry)) | ||
52 | |||
53 | static grant_ref_t **gnttab_list; | ||
54 | static unsigned int nr_grant_frames; | ||
55 | static unsigned int boot_max_nr_grant_frames; | ||
56 | static int gnttab_free_count; | ||
57 | static grant_ref_t gnttab_free_head; | ||
58 | static DEFINE_SPINLOCK(gnttab_list_lock); | ||
59 | |||
60 | static struct grant_entry *shared; | ||
61 | |||
62 | static struct gnttab_free_callback *gnttab_free_callback_list; | ||
63 | |||
64 | static int gnttab_expand(unsigned int req_entries); | ||
65 | |||
66 | #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) | ||
67 | |||
68 | static inline grant_ref_t *__gnttab_entry(grant_ref_t entry) | ||
69 | { | ||
70 | return &gnttab_list[(entry) / RPP][(entry) % RPP]; | ||
71 | } | ||
72 | /* This can be used as an l-value */ | ||
73 | #define gnttab_entry(entry) (*__gnttab_entry(entry)) | ||
74 | |||
75 | static int get_free_entries(unsigned count) | ||
76 | { | ||
77 | unsigned long flags; | ||
78 | int ref, rc; | ||
79 | grant_ref_t head; | ||
80 | |||
81 | spin_lock_irqsave(&gnttab_list_lock, flags); | ||
82 | |||
83 | if ((gnttab_free_count < count) && | ||
84 | ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) { | ||
85 | spin_unlock_irqrestore(&gnttab_list_lock, flags); | ||
86 | return rc; | ||
87 | } | ||
88 | |||
89 | ref = head = gnttab_free_head; | ||
90 | gnttab_free_count -= count; | ||
91 | while (count-- > 1) | ||
92 | head = gnttab_entry(head); | ||
93 | gnttab_free_head = gnttab_entry(head); | ||
94 | gnttab_entry(head) = GNTTAB_LIST_END; | ||
95 | |||
96 | spin_unlock_irqrestore(&gnttab_list_lock, flags); | ||
97 | |||
98 | return ref; | ||
99 | } | ||
100 | |||
101 | static void do_free_callbacks(void) | ||
102 | { | ||
103 | struct gnttab_free_callback *callback, *next; | ||
104 | |||
105 | callback = gnttab_free_callback_list; | ||
106 | gnttab_free_callback_list = NULL; | ||
107 | |||
108 | while (callback != NULL) { | ||
109 | next = callback->next; | ||
110 | if (gnttab_free_count >= callback->count) { | ||
111 | callback->next = NULL; | ||
112 | callback->fn(callback->arg); | ||
113 | } else { | ||
114 | callback->next = gnttab_free_callback_list; | ||
115 | gnttab_free_callback_list = callback; | ||
116 | } | ||
117 | callback = next; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | static inline void check_free_callbacks(void) | ||
122 | { | ||
123 | if (unlikely(gnttab_free_callback_list)) | ||
124 | do_free_callbacks(); | ||
125 | } | ||
126 | |||
127 | static void put_free_entry(grant_ref_t ref) | ||
128 | { | ||
129 | unsigned long flags; | ||
130 | spin_lock_irqsave(&gnttab_list_lock, flags); | ||
131 | gnttab_entry(ref) = gnttab_free_head; | ||
132 | gnttab_free_head = ref; | ||
133 | gnttab_free_count++; | ||
134 | check_free_callbacks(); | ||
135 | spin_unlock_irqrestore(&gnttab_list_lock, flags); | ||
136 | } | ||
137 | |||
138 | static void update_grant_entry(grant_ref_t ref, domid_t domid, | ||
139 | unsigned long frame, unsigned flags) | ||
140 | { | ||
141 | /* | ||
142 | * Introducing a valid entry into the grant table: | ||
143 | * 1. Write ent->domid. | ||
144 | * 2. Write ent->frame: | ||
145 | * GTF_permit_access: Frame to which access is permitted. | ||
146 | * GTF_accept_transfer: Pseudo-phys frame slot being filled by new | ||
147 | * frame, or zero if none. | ||
148 | * 3. Write memory barrier (WMB). | ||
149 | * 4. Write ent->flags, inc. valid type. | ||
150 | */ | ||
151 | shared[ref].frame = frame; | ||
152 | shared[ref].domid = domid; | ||
153 | wmb(); | ||
154 | shared[ref].flags = flags; | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * Public grant-issuing interface functions | ||
159 | */ | ||
160 | void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, | ||
161 | unsigned long frame, int readonly) | ||
162 | { | ||
163 | update_grant_entry(ref, domid, frame, | ||
164 | GTF_permit_access | (readonly ? GTF_readonly : 0)); | ||
165 | } | ||
166 | EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref); | ||
167 | |||
168 | int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, | ||
169 | int readonly) | ||
170 | { | ||
171 | int ref; | ||
172 | |||
173 | ref = get_free_entries(1); | ||
174 | if (unlikely(ref < 0)) | ||
175 | return -ENOSPC; | ||
176 | |||
177 | gnttab_grant_foreign_access_ref(ref, domid, frame, readonly); | ||
178 | |||
179 | return ref; | ||
180 | } | ||
181 | EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); | ||
182 | |||
183 | int gnttab_query_foreign_access(grant_ref_t ref) | ||
184 | { | ||
185 | u16 nflags; | ||
186 | |||
187 | nflags = shared[ref].flags; | ||
188 | |||
189 | return (nflags & (GTF_reading|GTF_writing)); | ||
190 | } | ||
191 | EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); | ||
192 | |||
193 | int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) | ||
194 | { | ||
195 | u16 flags, nflags; | ||
196 | |||
197 | nflags = shared[ref].flags; | ||
198 | do { | ||
199 | flags = nflags; | ||
200 | if (flags & (GTF_reading|GTF_writing)) { | ||
201 | printk(KERN_ALERT "WARNING: g.e. still in use!\n"); | ||
202 | return 0; | ||
203 | } | ||
204 | } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags); | ||
205 | |||
206 | return 1; | ||
207 | } | ||
208 | EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); | ||
209 | |||
210 | void gnttab_end_foreign_access(grant_ref_t ref, int readonly, | ||
211 | unsigned long page) | ||
212 | { | ||
213 | if (gnttab_end_foreign_access_ref(ref, readonly)) { | ||
214 | put_free_entry(ref); | ||
215 | if (page != 0) | ||
216 | free_page(page); | ||
217 | } else { | ||
218 | /* XXX This needs to be fixed so that the ref and page are | ||
219 | placed on a list to be freed up later. */ | ||
220 | printk(KERN_WARNING | ||
221 | "WARNING: leaking g.e. and page still in use!\n"); | ||
222 | } | ||
223 | } | ||
224 | EXPORT_SYMBOL_GPL(gnttab_end_foreign_access); | ||
225 | |||
226 | int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn) | ||
227 | { | ||
228 | int ref; | ||
229 | |||
230 | ref = get_free_entries(1); | ||
231 | if (unlikely(ref < 0)) | ||
232 | return -ENOSPC; | ||
233 | gnttab_grant_foreign_transfer_ref(ref, domid, pfn); | ||
234 | |||
235 | return ref; | ||
236 | } | ||
237 | EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer); | ||
238 | |||
239 | void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, | ||
240 | unsigned long pfn) | ||
241 | { | ||
242 | update_grant_entry(ref, domid, pfn, GTF_accept_transfer); | ||
243 | } | ||
244 | EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref); | ||
245 | |||
246 | unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) | ||
247 | { | ||
248 | unsigned long frame; | ||
249 | u16 flags; | ||
250 | |||
251 | /* | ||
252 | * If a transfer is not even yet started, try to reclaim the grant | ||
253 | * reference and return failure (== 0). | ||
254 | */ | ||
255 | while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { | ||
256 | if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags) | ||
257 | return 0; | ||
258 | cpu_relax(); | ||
259 | } | ||
260 | |||
261 | /* If a transfer is in progress then wait until it is completed. */ | ||
262 | while (!(flags & GTF_transfer_completed)) { | ||
263 | flags = shared[ref].flags; | ||
264 | cpu_relax(); | ||
265 | } | ||
266 | |||
267 | rmb(); /* Read the frame number /after/ reading completion status. */ | ||
268 | frame = shared[ref].frame; | ||
269 | BUG_ON(frame == 0); | ||
270 | |||
271 | return frame; | ||
272 | } | ||
273 | EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref); | ||
274 | |||
275 | unsigned long gnttab_end_foreign_transfer(grant_ref_t ref) | ||
276 | { | ||
277 | unsigned long frame = gnttab_end_foreign_transfer_ref(ref); | ||
278 | put_free_entry(ref); | ||
279 | return frame; | ||
280 | } | ||
281 | EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer); | ||
282 | |||
283 | void gnttab_free_grant_reference(grant_ref_t ref) | ||
284 | { | ||
285 | put_free_entry(ref); | ||
286 | } | ||
287 | EXPORT_SYMBOL_GPL(gnttab_free_grant_reference); | ||
288 | |||
289 | void gnttab_free_grant_references(grant_ref_t head) | ||
290 | { | ||
291 | grant_ref_t ref; | ||
292 | unsigned long flags; | ||
293 | int count = 1; | ||
294 | if (head == GNTTAB_LIST_END) | ||
295 | return; | ||
296 | spin_lock_irqsave(&gnttab_list_lock, flags); | ||
297 | ref = head; | ||
298 | while (gnttab_entry(ref) != GNTTAB_LIST_END) { | ||
299 | ref = gnttab_entry(ref); | ||
300 | count++; | ||
301 | } | ||
302 | gnttab_entry(ref) = gnttab_free_head; | ||
303 | gnttab_free_head = head; | ||
304 | gnttab_free_count += count; | ||
305 | check_free_callbacks(); | ||
306 | spin_unlock_irqrestore(&gnttab_list_lock, flags); | ||
307 | } | ||
308 | EXPORT_SYMBOL_GPL(gnttab_free_grant_references); | ||
309 | |||
310 | int gnttab_alloc_grant_references(u16 count, grant_ref_t *head) | ||
311 | { | ||
312 | int h = get_free_entries(count); | ||
313 | |||
314 | if (h < 0) | ||
315 | return -ENOSPC; | ||
316 | |||
317 | *head = h; | ||
318 | |||
319 | return 0; | ||
320 | } | ||
321 | EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references); | ||
322 | |||
323 | int gnttab_empty_grant_references(const grant_ref_t *private_head) | ||
324 | { | ||
325 | return (*private_head == GNTTAB_LIST_END); | ||
326 | } | ||
327 | EXPORT_SYMBOL_GPL(gnttab_empty_grant_references); | ||
328 | |||
329 | int gnttab_claim_grant_reference(grant_ref_t *private_head) | ||
330 | { | ||
331 | grant_ref_t g = *private_head; | ||
332 | if (unlikely(g == GNTTAB_LIST_END)) | ||
333 | return -ENOSPC; | ||
334 | *private_head = gnttab_entry(g); | ||
335 | return g; | ||
336 | } | ||
337 | EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference); | ||
338 | |||
339 | void gnttab_release_grant_reference(grant_ref_t *private_head, | ||
340 | grant_ref_t release) | ||
341 | { | ||
342 | gnttab_entry(release) = *private_head; | ||
343 | *private_head = release; | ||
344 | } | ||
345 | EXPORT_SYMBOL_GPL(gnttab_release_grant_reference); | ||
346 | |||
347 | void gnttab_request_free_callback(struct gnttab_free_callback *callback, | ||
348 | void (*fn)(void *), void *arg, u16 count) | ||
349 | { | ||
350 | unsigned long flags; | ||
351 | spin_lock_irqsave(&gnttab_list_lock, flags); | ||
352 | if (callback->next) | ||
353 | goto out; | ||
354 | callback->fn = fn; | ||
355 | callback->arg = arg; | ||
356 | callback->count = count; | ||
357 | callback->next = gnttab_free_callback_list; | ||
358 | gnttab_free_callback_list = callback; | ||
359 | check_free_callbacks(); | ||
360 | out: | ||
361 | spin_unlock_irqrestore(&gnttab_list_lock, flags); | ||
362 | } | ||
363 | EXPORT_SYMBOL_GPL(gnttab_request_free_callback); | ||
364 | |||
365 | void gnttab_cancel_free_callback(struct gnttab_free_callback *callback) | ||
366 | { | ||
367 | struct gnttab_free_callback **pcb; | ||
368 | unsigned long flags; | ||
369 | |||
370 | spin_lock_irqsave(&gnttab_list_lock, flags); | ||
371 | for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { | ||
372 | if (*pcb == callback) { | ||
373 | *pcb = callback->next; | ||
374 | break; | ||
375 | } | ||
376 | } | ||
377 | spin_unlock_irqrestore(&gnttab_list_lock, flags); | ||
378 | } | ||
379 | EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback); | ||
380 | |||
381 | static int grow_gnttab_list(unsigned int more_frames) | ||
382 | { | ||
383 | unsigned int new_nr_grant_frames, extra_entries, i; | ||
384 | |||
385 | new_nr_grant_frames = nr_grant_frames + more_frames; | ||
386 | extra_entries = more_frames * GREFS_PER_GRANT_FRAME; | ||
387 | |||
388 | for (i = nr_grant_frames; i < new_nr_grant_frames; i++) { | ||
389 | gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC); | ||
390 | if (!gnttab_list[i]) | ||
391 | goto grow_nomem; | ||
392 | } | ||
393 | |||
394 | |||
395 | for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; | ||
396 | i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) | ||
397 | gnttab_entry(i) = i + 1; | ||
398 | |||
399 | gnttab_entry(i) = gnttab_free_head; | ||
400 | gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; | ||
401 | gnttab_free_count += extra_entries; | ||
402 | |||
403 | nr_grant_frames = new_nr_grant_frames; | ||
404 | |||
405 | check_free_callbacks(); | ||
406 | |||
407 | return 0; | ||
408 | |||
409 | grow_nomem: | ||
410 | for ( ; i >= nr_grant_frames; i--) | ||
411 | free_page((unsigned long) gnttab_list[i]); | ||
412 | return -ENOMEM; | ||
413 | } | ||
414 | |||
415 | static unsigned int __max_nr_grant_frames(void) | ||
416 | { | ||
417 | struct gnttab_query_size query; | ||
418 | int rc; | ||
419 | |||
420 | query.dom = DOMID_SELF; | ||
421 | |||
422 | rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); | ||
423 | if ((rc < 0) || (query.status != GNTST_okay)) | ||
424 | return 4; /* Legacy max supported number of frames */ | ||
425 | |||
426 | return query.max_nr_frames; | ||
427 | } | ||
428 | |||
429 | static inline unsigned int max_nr_grant_frames(void) | ||
430 | { | ||
431 | unsigned int xen_max = __max_nr_grant_frames(); | ||
432 | |||
433 | if (xen_max > boot_max_nr_grant_frames) | ||
434 | return boot_max_nr_grant_frames; | ||
435 | return xen_max; | ||
436 | } | ||
437 | |||
438 | static int map_pte_fn(pte_t *pte, struct page *pmd_page, | ||
439 | unsigned long addr, void *data) | ||
440 | { | ||
441 | unsigned long **frames = (unsigned long **)data; | ||
442 | |||
443 | set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); | ||
444 | (*frames)++; | ||
445 | return 0; | ||
446 | } | ||
447 | |||
448 | static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, | ||
449 | unsigned long addr, void *data) | ||
450 | { | ||
451 | |||
452 | set_pte_at(&init_mm, addr, pte, __pte(0)); | ||
453 | return 0; | ||
454 | } | ||
455 | |||
456 | static int gnttab_map(unsigned int start_idx, unsigned int end_idx) | ||
457 | { | ||
458 | struct gnttab_setup_table setup; | ||
459 | unsigned long *frames; | ||
460 | unsigned int nr_gframes = end_idx + 1; | ||
461 | int rc; | ||
462 | |||
463 | frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC); | ||
464 | if (!frames) | ||
465 | return -ENOMEM; | ||
466 | |||
467 | setup.dom = DOMID_SELF; | ||
468 | setup.nr_frames = nr_gframes; | ||
469 | setup.frame_list = frames; | ||
470 | |||
471 | rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); | ||
472 | if (rc == -ENOSYS) { | ||
473 | kfree(frames); | ||
474 | return -ENOSYS; | ||
475 | } | ||
476 | |||
477 | BUG_ON(rc || setup.status); | ||
478 | |||
479 | if (shared == NULL) { | ||
480 | struct vm_struct *area; | ||
481 | area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames()); | ||
482 | BUG_ON(area == NULL); | ||
483 | shared = area->addr; | ||
484 | } | ||
485 | rc = apply_to_page_range(&init_mm, (unsigned long)shared, | ||
486 | PAGE_SIZE * nr_gframes, | ||
487 | map_pte_fn, &frames); | ||
488 | BUG_ON(rc); | ||
489 | frames -= nr_gframes; /* adjust after map_pte_fn() */ | ||
490 | |||
491 | kfree(frames); | ||
492 | |||
493 | return 0; | ||
494 | } | ||
495 | |||
496 | static int gnttab_resume(void) | ||
497 | { | ||
498 | if (max_nr_grant_frames() < nr_grant_frames) | ||
499 | return -ENOSYS; | ||
500 | return gnttab_map(0, nr_grant_frames - 1); | ||
501 | } | ||
502 | |||
503 | static int gnttab_suspend(void) | ||
504 | { | ||
505 | apply_to_page_range(&init_mm, (unsigned long)shared, | ||
506 | PAGE_SIZE * nr_grant_frames, | ||
507 | unmap_pte_fn, NULL); | ||
508 | |||
509 | return 0; | ||
510 | } | ||
511 | |||
512 | static int gnttab_expand(unsigned int req_entries) | ||
513 | { | ||
514 | int rc; | ||
515 | unsigned int cur, extra; | ||
516 | |||
517 | cur = nr_grant_frames; | ||
518 | extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) / | ||
519 | GREFS_PER_GRANT_FRAME); | ||
520 | if (cur + extra > max_nr_grant_frames()) | ||
521 | return -ENOSPC; | ||
522 | |||
523 | rc = gnttab_map(cur, cur + extra - 1); | ||
524 | if (rc == 0) | ||
525 | rc = grow_gnttab_list(extra); | ||
526 | |||
527 | return rc; | ||
528 | } | ||
529 | |||
530 | static int __devinit gnttab_init(void) | ||
531 | { | ||
532 | int i; | ||
533 | unsigned int max_nr_glist_frames; | ||
534 | unsigned int nr_init_grefs; | ||
535 | |||
536 | if (!is_running_on_xen()) | ||
537 | return -ENODEV; | ||
538 | |||
539 | nr_grant_frames = 1; | ||
540 | boot_max_nr_grant_frames = __max_nr_grant_frames(); | ||
541 | |||
542 | /* Determine the maximum number of frames required for the | ||
543 | * grant reference free list on the current hypervisor. | ||
544 | */ | ||
545 | max_nr_glist_frames = (boot_max_nr_grant_frames * | ||
546 | GREFS_PER_GRANT_FRAME / | ||
547 | (PAGE_SIZE / sizeof(grant_ref_t))); | ||
548 | |||
549 | gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *), | ||
550 | GFP_KERNEL); | ||
551 | if (gnttab_list == NULL) | ||
552 | return -ENOMEM; | ||
553 | |||
554 | for (i = 0; i < nr_grant_frames; i++) { | ||
555 | gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL); | ||
556 | if (gnttab_list[i] == NULL) | ||
557 | goto ini_nomem; | ||
558 | } | ||
559 | |||
560 | if (gnttab_resume() < 0) | ||
561 | return -ENODEV; | ||
562 | |||
563 | nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; | ||
564 | |||
565 | for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) | ||
566 | gnttab_entry(i) = i + 1; | ||
567 | |||
568 | gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; | ||
569 | gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; | ||
570 | gnttab_free_head = NR_RESERVED_ENTRIES; | ||
571 | |||
572 | printk("Grant table initialized\n"); | ||
573 | return 0; | ||
574 | |||
575 | ini_nomem: | ||
576 | for (i--; i >= 0; i--) | ||
577 | free_page((unsigned long)gnttab_list[i]); | ||
578 | kfree(gnttab_list); | ||
579 | return -ENOMEM; | ||
580 | } | ||
581 | |||
582 | core_initcall(gnttab_init); | ||
diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile new file mode 100644 index 000000000000..5571f5b84223 --- /dev/null +++ b/drivers/xen/xenbus/Makefile | |||
@@ -0,0 +1,7 @@ | |||
1 | obj-y += xenbus.o | ||
2 | |||
3 | xenbus-objs = | ||
4 | xenbus-objs += xenbus_client.o | ||
5 | xenbus-objs += xenbus_comms.o | ||
6 | xenbus-objs += xenbus_xs.o | ||
7 | xenbus-objs += xenbus_probe.o | ||
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c new file mode 100644 index 000000000000..9fd2f70ab46d --- /dev/null +++ b/drivers/xen/xenbus/xenbus_client.c | |||
@@ -0,0 +1,569 @@ | |||
1 | /****************************************************************************** | ||
2 | * Client-facing interface for the Xenbus driver. In other words, the | ||
3 | * interface between the Xenbus and the device-specific code, be it the | ||
4 | * frontend or the backend of that driver. | ||
5 | * | ||
6 | * Copyright (C) 2005 XenSource Ltd | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License version 2 | ||
10 | * as published by the Free Software Foundation; or, when distributed | ||
11 | * separately from the Linux kernel or incorporated into other | ||
12 | * software packages, subject to the following license: | ||
13 | * | ||
14 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
15 | * of this source file (the "Software"), to deal in the Software without | ||
16 | * restriction, including without limitation the rights to use, copy, modify, | ||
17 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
18 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
19 | * the following conditions: | ||
20 | * | ||
21 | * The above copyright notice and this permission notice shall be included in | ||
22 | * all copies or substantial portions of the Software. | ||
23 | * | ||
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
26 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
27 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
28 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
29 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
30 | * IN THE SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/types.h> | ||
34 | #include <linux/vmalloc.h> | ||
35 | #include <asm/xen/hypervisor.h> | ||
36 | #include <xen/interface/xen.h> | ||
37 | #include <xen/interface/event_channel.h> | ||
38 | #include <xen/events.h> | ||
39 | #include <xen/grant_table.h> | ||
40 | #include <xen/xenbus.h> | ||
41 | |||
42 | const char *xenbus_strstate(enum xenbus_state state) | ||
43 | { | ||
44 | static const char *const name[] = { | ||
45 | [ XenbusStateUnknown ] = "Unknown", | ||
46 | [ XenbusStateInitialising ] = "Initialising", | ||
47 | [ XenbusStateInitWait ] = "InitWait", | ||
48 | [ XenbusStateInitialised ] = "Initialised", | ||
49 | [ XenbusStateConnected ] = "Connected", | ||
50 | [ XenbusStateClosing ] = "Closing", | ||
51 | [ XenbusStateClosed ] = "Closed", | ||
52 | }; | ||
53 | return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; | ||
54 | } | ||
55 | EXPORT_SYMBOL_GPL(xenbus_strstate); | ||
56 | |||
57 | /** | ||
58 | * xenbus_watch_path - register a watch | ||
59 | * @dev: xenbus device | ||
60 | * @path: path to watch | ||
61 | * @watch: watch to register | ||
62 | * @callback: callback to register | ||
63 | * | ||
64 | * Register a @watch on the given path, using the given xenbus_watch structure | ||
65 | * for storage, and the given @callback function as the callback. Return 0 on | ||
66 | * success, or -errno on error. On success, the given @path will be saved as | ||
67 | * @watch->node, and remains the caller's to free. On error, @watch->node will | ||
68 | * be NULL, the device will switch to %XenbusStateClosing, and the error will | ||
69 | * be saved in the store. | ||
70 | */ | ||
71 | int xenbus_watch_path(struct xenbus_device *dev, const char *path, | ||
72 | struct xenbus_watch *watch, | ||
73 | void (*callback)(struct xenbus_watch *, | ||
74 | const char **, unsigned int)) | ||
75 | { | ||
76 | int err; | ||
77 | |||
78 | watch->node = path; | ||
79 | watch->callback = callback; | ||
80 | |||
81 | err = register_xenbus_watch(watch); | ||
82 | |||
83 | if (err) { | ||
84 | watch->node = NULL; | ||
85 | watch->callback = NULL; | ||
86 | xenbus_dev_fatal(dev, err, "adding watch on %s", path); | ||
87 | } | ||
88 | |||
89 | return err; | ||
90 | } | ||
91 | EXPORT_SYMBOL_GPL(xenbus_watch_path); | ||
92 | |||
93 | |||
94 | /** | ||
95 | * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path | ||
96 | * @dev: xenbus device | ||
97 | * @watch: watch to register | ||
98 | * @callback: callback to register | ||
99 | * @pathfmt: format of path to watch | ||
100 | * | ||
101 | * Register a watch on the given @path, using the given xenbus_watch | ||
102 | * structure for storage, and the given @callback function as the callback. | ||
103 | * Return 0 on success, or -errno on error. On success, the watched path | ||
104 | * (@path/@path2) will be saved as @watch->node, and becomes the caller's to | ||
105 | * kfree(). On error, watch->node will be NULL, so the caller has nothing to | ||
106 | * free, the device will switch to %XenbusStateClosing, and the error will be | ||
107 | * saved in the store. | ||
108 | */ | ||
109 | int xenbus_watch_pathfmt(struct xenbus_device *dev, | ||
110 | struct xenbus_watch *watch, | ||
111 | void (*callback)(struct xenbus_watch *, | ||
112 | const char **, unsigned int), | ||
113 | const char *pathfmt, ...) | ||
114 | { | ||
115 | int err; | ||
116 | va_list ap; | ||
117 | char *path; | ||
118 | |||
119 | va_start(ap, pathfmt); | ||
120 | path = kvasprintf(GFP_KERNEL, pathfmt, ap); | ||
121 | va_end(ap); | ||
122 | |||
123 | if (!path) { | ||
124 | xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch"); | ||
125 | return -ENOMEM; | ||
126 | } | ||
127 | err = xenbus_watch_path(dev, path, watch, callback); | ||
128 | |||
129 | if (err) | ||
130 | kfree(path); | ||
131 | return err; | ||
132 | } | ||
133 | EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt); | ||
134 | |||
135 | |||
136 | /** | ||
137 | * xenbus_switch_state | ||
138 | * @dev: xenbus device | ||
139 | * @xbt: transaction handle | ||
140 | * @state: new state | ||
141 | * | ||
142 | * Advertise in the store a change of the given driver to the given new_state. | ||
143 | * Return 0 on success, or -errno on error. On error, the device will switch | ||
144 | * to XenbusStateClosing, and the error will be saved in the store. | ||
145 | */ | ||
146 | int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) | ||
147 | { | ||
148 | /* We check whether the state is currently set to the given value, and | ||
149 | if not, then the state is set. We don't want to unconditionally | ||
150 | write the given state, because we don't want to fire watches | ||
151 | unnecessarily. Furthermore, if the node has gone, we don't write | ||
152 | to it, as the device will be tearing down, and we don't want to | ||
153 | resurrect that directory. | ||
154 | |||
155 | Note that, because of this cached value of our state, this function | ||
156 | will not work inside a Xenstore transaction (something it was | ||
157 | trying to in the past) because dev->state would not get reset if | ||
158 | the transaction was aborted. | ||
159 | |||
160 | */ | ||
161 | |||
162 | int current_state; | ||
163 | int err; | ||
164 | |||
165 | if (state == dev->state) | ||
166 | return 0; | ||
167 | |||
168 | err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d", | ||
169 | ¤t_state); | ||
170 | if (err != 1) | ||
171 | return 0; | ||
172 | |||
173 | err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state); | ||
174 | if (err) { | ||
175 | if (state != XenbusStateClosing) /* Avoid looping */ | ||
176 | xenbus_dev_fatal(dev, err, "writing new state"); | ||
177 | return err; | ||
178 | } | ||
179 | |||
180 | dev->state = state; | ||
181 | |||
182 | return 0; | ||
183 | } | ||
184 | EXPORT_SYMBOL_GPL(xenbus_switch_state); | ||
185 | |||
186 | int xenbus_frontend_closed(struct xenbus_device *dev) | ||
187 | { | ||
188 | xenbus_switch_state(dev, XenbusStateClosed); | ||
189 | complete(&dev->down); | ||
190 | return 0; | ||
191 | } | ||
192 | EXPORT_SYMBOL_GPL(xenbus_frontend_closed); | ||
193 | |||
194 | /** | ||
195 | * Return the path to the error node for the given device, or NULL on failure. | ||
196 | * If the value returned is non-NULL, then it is the caller's to kfree. | ||
197 | */ | ||
198 | static char *error_path(struct xenbus_device *dev) | ||
199 | { | ||
200 | return kasprintf(GFP_KERNEL, "error/%s", dev->nodename); | ||
201 | } | ||
202 | |||
203 | |||
204 | static void xenbus_va_dev_error(struct xenbus_device *dev, int err, | ||
205 | const char *fmt, va_list ap) | ||
206 | { | ||
207 | int ret; | ||
208 | unsigned int len; | ||
209 | char *printf_buffer = NULL; | ||
210 | char *path_buffer = NULL; | ||
211 | |||
212 | #define PRINTF_BUFFER_SIZE 4096 | ||
213 | printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); | ||
214 | if (printf_buffer == NULL) | ||
215 | goto fail; | ||
216 | |||
217 | len = sprintf(printf_buffer, "%i ", -err); | ||
218 | ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap); | ||
219 | |||
220 | BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1); | ||
221 | |||
222 | dev_err(&dev->dev, "%s\n", printf_buffer); | ||
223 | |||
224 | path_buffer = error_path(dev); | ||
225 | |||
226 | if (path_buffer == NULL) { | ||
227 | dev_err(&dev->dev, "failed to write error node for %s (%s)\n", | ||
228 | dev->nodename, printf_buffer); | ||
229 | goto fail; | ||
230 | } | ||
231 | |||
232 | if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) { | ||
233 | dev_err(&dev->dev, "failed to write error node for %s (%s)\n", | ||
234 | dev->nodename, printf_buffer); | ||
235 | goto fail; | ||
236 | } | ||
237 | |||
238 | fail: | ||
239 | kfree(printf_buffer); | ||
240 | kfree(path_buffer); | ||
241 | } | ||
242 | |||
243 | |||
244 | /** | ||
245 | * xenbus_dev_error | ||
246 | * @dev: xenbus device | ||
247 | * @err: error to report | ||
248 | * @fmt: error message format | ||
249 | * | ||
250 | * Report the given negative errno into the store, along with the given | ||
251 | * formatted message. | ||
252 | */ | ||
253 | void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...) | ||
254 | { | ||
255 | va_list ap; | ||
256 | |||
257 | va_start(ap, fmt); | ||
258 | xenbus_va_dev_error(dev, err, fmt, ap); | ||
259 | va_end(ap); | ||
260 | } | ||
261 | EXPORT_SYMBOL_GPL(xenbus_dev_error); | ||
262 | |||
263 | /** | ||
264 | * xenbus_dev_fatal | ||
265 | * @dev: xenbus device | ||
266 | * @err: error to report | ||
267 | * @fmt: error message format | ||
268 | * | ||
269 | * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by | ||
270 | * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly | ||
271 | * closedown of this driver and its peer. | ||
272 | */ | ||
273 | |||
274 | void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...) | ||
275 | { | ||
276 | va_list ap; | ||
277 | |||
278 | va_start(ap, fmt); | ||
279 | xenbus_va_dev_error(dev, err, fmt, ap); | ||
280 | va_end(ap); | ||
281 | |||
282 | xenbus_switch_state(dev, XenbusStateClosing); | ||
283 | } | ||
284 | EXPORT_SYMBOL_GPL(xenbus_dev_fatal); | ||
285 | |||
286 | /** | ||
287 | * xenbus_grant_ring | ||
288 | * @dev: xenbus device | ||
289 | * @ring_mfn: mfn of ring to grant | ||
290 | |||
291 | * Grant access to the given @ring_mfn to the peer of the given device. Return | ||
292 | * 0 on success, or -errno on error. On error, the device will switch to | ||
293 | * XenbusStateClosing, and the error will be saved in the store. | ||
294 | */ | ||
295 | int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) | ||
296 | { | ||
297 | int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0); | ||
298 | if (err < 0) | ||
299 | xenbus_dev_fatal(dev, err, "granting access to ring page"); | ||
300 | return err; | ||
301 | } | ||
302 | EXPORT_SYMBOL_GPL(xenbus_grant_ring); | ||
303 | |||
304 | |||
305 | /** | ||
306 | * Allocate an event channel for the given xenbus_device, assigning the newly | ||
307 | * created local port to *port. Return 0 on success, or -errno on error. On | ||
308 | * error, the device will switch to XenbusStateClosing, and the error will be | ||
309 | * saved in the store. | ||
310 | */ | ||
311 | int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port) | ||
312 | { | ||
313 | struct evtchn_alloc_unbound alloc_unbound; | ||
314 | int err; | ||
315 | |||
316 | alloc_unbound.dom = DOMID_SELF; | ||
317 | alloc_unbound.remote_dom = dev->otherend_id; | ||
318 | |||
319 | err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, | ||
320 | &alloc_unbound); | ||
321 | if (err) | ||
322 | xenbus_dev_fatal(dev, err, "allocating event channel"); | ||
323 | else | ||
324 | *port = alloc_unbound.port; | ||
325 | |||
326 | return err; | ||
327 | } | ||
328 | EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn); | ||
329 | |||
330 | |||
331 | /** | ||
332 | * Bind to an existing interdomain event channel in another domain. Returns 0 | ||
333 | * on success and stores the local port in *port. On error, returns -errno, | ||
334 | * switches the device to XenbusStateClosing, and saves the error in XenStore. | ||
335 | */ | ||
336 | int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port) | ||
337 | { | ||
338 | struct evtchn_bind_interdomain bind_interdomain; | ||
339 | int err; | ||
340 | |||
341 | bind_interdomain.remote_dom = dev->otherend_id; | ||
342 | bind_interdomain.remote_port = remote_port; | ||
343 | |||
344 | err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, | ||
345 | &bind_interdomain); | ||
346 | if (err) | ||
347 | xenbus_dev_fatal(dev, err, | ||
348 | "binding to event channel %d from domain %d", | ||
349 | remote_port, dev->otherend_id); | ||
350 | else | ||
351 | *port = bind_interdomain.local_port; | ||
352 | |||
353 | return err; | ||
354 | } | ||
355 | EXPORT_SYMBOL_GPL(xenbus_bind_evtchn); | ||
356 | |||
357 | |||
358 | /** | ||
359 | * Free an existing event channel. Returns 0 on success or -errno on error. | ||
360 | */ | ||
361 | int xenbus_free_evtchn(struct xenbus_device *dev, int port) | ||
362 | { | ||
363 | struct evtchn_close close; | ||
364 | int err; | ||
365 | |||
366 | close.port = port; | ||
367 | |||
368 | err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); | ||
369 | if (err) | ||
370 | xenbus_dev_error(dev, err, "freeing event channel %d", port); | ||
371 | |||
372 | return err; | ||
373 | } | ||
374 | EXPORT_SYMBOL_GPL(xenbus_free_evtchn); | ||
375 | |||
376 | |||
377 | /** | ||
378 | * xenbus_map_ring_valloc | ||
379 | * @dev: xenbus device | ||
380 | * @gnt_ref: grant reference | ||
381 | * @vaddr: pointer to address to be filled out by mapping | ||
382 | * | ||
383 | * Based on Rusty Russell's skeleton driver's map_page. | ||
384 | * Map a page of memory into this domain from another domain's grant table. | ||
385 | * xenbus_map_ring_valloc allocates a page of virtual address space, maps the | ||
386 | * page to that address, and sets *vaddr to that address. | ||
387 | * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) | ||
388 | * or -ENOMEM on error. If an error is returned, device will switch to | ||
389 | * XenbusStateClosing and the error message will be saved in XenStore. | ||
390 | */ | ||
391 | int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) | ||
392 | { | ||
393 | struct gnttab_map_grant_ref op = { | ||
394 | .flags = GNTMAP_host_map, | ||
395 | .ref = gnt_ref, | ||
396 | .dom = dev->otherend_id, | ||
397 | }; | ||
398 | struct vm_struct *area; | ||
399 | |||
400 | *vaddr = NULL; | ||
401 | |||
402 | area = alloc_vm_area(PAGE_SIZE); | ||
403 | if (!area) | ||
404 | return -ENOMEM; | ||
405 | |||
406 | op.host_addr = (unsigned long)area->addr; | ||
407 | |||
408 | if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) | ||
409 | BUG(); | ||
410 | |||
411 | if (op.status != GNTST_okay) { | ||
412 | free_vm_area(area); | ||
413 | xenbus_dev_fatal(dev, op.status, | ||
414 | "mapping in shared page %d from domain %d", | ||
415 | gnt_ref, dev->otherend_id); | ||
416 | return op.status; | ||
417 | } | ||
418 | |||
419 | /* Stuff the handle in an unused field */ | ||
420 | area->phys_addr = (unsigned long)op.handle; | ||
421 | |||
422 | *vaddr = area->addr; | ||
423 | return 0; | ||
424 | } | ||
425 | EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); | ||
426 | |||
427 | |||
428 | /** | ||
429 | * xenbus_map_ring | ||
430 | * @dev: xenbus device | ||
431 | * @gnt_ref: grant reference | ||
432 | * @handle: pointer to grant handle to be filled | ||
433 | * @vaddr: address to be mapped to | ||
434 | * | ||
435 | * Map a page of memory into this domain from another domain's grant table. | ||
436 | * xenbus_map_ring does not allocate the virtual address space (you must do | ||
437 | * this yourself!). It only maps in the page to the specified address. | ||
438 | * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) | ||
439 | * or -ENOMEM on error. If an error is returned, device will switch to | ||
440 | * XenbusStateClosing and the error message will be saved in XenStore. | ||
441 | */ | ||
442 | int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, | ||
443 | grant_handle_t *handle, void *vaddr) | ||
444 | { | ||
445 | struct gnttab_map_grant_ref op = { | ||
446 | .host_addr = (unsigned long)vaddr, | ||
447 | .flags = GNTMAP_host_map, | ||
448 | .ref = gnt_ref, | ||
449 | .dom = dev->otherend_id, | ||
450 | }; | ||
451 | |||
452 | if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) | ||
453 | BUG(); | ||
454 | |||
455 | if (op.status != GNTST_okay) { | ||
456 | xenbus_dev_fatal(dev, op.status, | ||
457 | "mapping in shared page %d from domain %d", | ||
458 | gnt_ref, dev->otherend_id); | ||
459 | } else | ||
460 | *handle = op.handle; | ||
461 | |||
462 | return op.status; | ||
463 | } | ||
464 | EXPORT_SYMBOL_GPL(xenbus_map_ring); | ||
465 | |||
466 | |||
467 | /** | ||
468 | * xenbus_unmap_ring_vfree | ||
469 | * @dev: xenbus device | ||
470 | * @vaddr: addr to unmap | ||
471 | * | ||
472 | * Based on Rusty Russell's skeleton driver's unmap_page. | ||
473 | * Unmap a page of memory in this domain that was imported from another domain. | ||
474 | * Use xenbus_unmap_ring_vfree if you mapped in your memory with | ||
475 | * xenbus_map_ring_valloc (it will free the virtual address space). | ||
476 | * Returns 0 on success and returns GNTST_* on error | ||
477 | * (see xen/include/interface/grant_table.h). | ||
478 | */ | ||
479 | int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) | ||
480 | { | ||
481 | struct vm_struct *area; | ||
482 | struct gnttab_unmap_grant_ref op = { | ||
483 | .host_addr = (unsigned long)vaddr, | ||
484 | }; | ||
485 | |||
486 | /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr) | ||
487 | * method so that we don't have to muck with vmalloc internals here. | ||
488 | * We could force the user to hang on to their struct vm_struct from | ||
489 | * xenbus_map_ring_valloc, but these 6 lines considerably simplify | ||
490 | * this API. | ||
491 | */ | ||
492 | read_lock(&vmlist_lock); | ||
493 | for (area = vmlist; area != NULL; area = area->next) { | ||
494 | if (area->addr == vaddr) | ||
495 | break; | ||
496 | } | ||
497 | read_unlock(&vmlist_lock); | ||
498 | |||
499 | if (!area) { | ||
500 | xenbus_dev_error(dev, -ENOENT, | ||
501 | "can't find mapped virtual address %p", vaddr); | ||
502 | return GNTST_bad_virt_addr; | ||
503 | } | ||
504 | |||
505 | op.handle = (grant_handle_t)area->phys_addr; | ||
506 | |||
507 | if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) | ||
508 | BUG(); | ||
509 | |||
510 | if (op.status == GNTST_okay) | ||
511 | free_vm_area(area); | ||
512 | else | ||
513 | xenbus_dev_error(dev, op.status, | ||
514 | "unmapping page at handle %d error %d", | ||
515 | (int16_t)area->phys_addr, op.status); | ||
516 | |||
517 | return op.status; | ||
518 | } | ||
519 | EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); | ||
520 | |||
521 | |||
522 | /** | ||
523 | * xenbus_unmap_ring | ||
524 | * @dev: xenbus device | ||
525 | * @handle: grant handle | ||
526 | * @vaddr: addr to unmap | ||
527 | * | ||
528 | * Unmap a page of memory in this domain that was imported from another domain. | ||
529 | * Returns 0 on success and returns GNTST_* on error | ||
530 | * (see xen/include/interface/grant_table.h). | ||
531 | */ | ||
532 | int xenbus_unmap_ring(struct xenbus_device *dev, | ||
533 | grant_handle_t handle, void *vaddr) | ||
534 | { | ||
535 | struct gnttab_unmap_grant_ref op = { | ||
536 | .host_addr = (unsigned long)vaddr, | ||
537 | .handle = handle, | ||
538 | }; | ||
539 | |||
540 | if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) | ||
541 | BUG(); | ||
542 | |||
543 | if (op.status != GNTST_okay) | ||
544 | xenbus_dev_error(dev, op.status, | ||
545 | "unmapping page at handle %d error %d", | ||
546 | handle, op.status); | ||
547 | |||
548 | return op.status; | ||
549 | } | ||
550 | EXPORT_SYMBOL_GPL(xenbus_unmap_ring); | ||
551 | |||
552 | |||
553 | /** | ||
554 | * xenbus_read_driver_state | ||
555 | * @path: path for driver | ||
556 | * | ||
557 | * Return the state of the driver rooted at the given store path, or | ||
558 | * XenbusStateUnknown if no state can be read. | ||
559 | */ | ||
560 | enum xenbus_state xenbus_read_driver_state(const char *path) | ||
561 | { | ||
562 | enum xenbus_state result; | ||
563 | int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL); | ||
564 | if (err) | ||
565 | result = XenbusStateUnknown; | ||
566 | |||
567 | return result; | ||
568 | } | ||
569 | EXPORT_SYMBOL_GPL(xenbus_read_driver_state); | ||
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c new file mode 100644 index 000000000000..6efbe3f29ca5 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_comms.c | |||
@@ -0,0 +1,233 @@ | |||
1 | /****************************************************************************** | ||
2 | * xenbus_comms.c | ||
3 | * | ||
4 | * Low level code to talks to Xen Store: ringbuffer and event channel. | ||
5 | * | ||
6 | * Copyright (C) 2005 Rusty Russell, IBM Corporation | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License version 2 | ||
10 | * as published by the Free Software Foundation; or, when distributed | ||
11 | * separately from the Linux kernel or incorporated into other | ||
12 | * software packages, subject to the following license: | ||
13 | * | ||
14 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
15 | * of this source file (the "Software"), to deal in the Software without | ||
16 | * restriction, including without limitation the rights to use, copy, modify, | ||
17 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
18 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
19 | * the following conditions: | ||
20 | * | ||
21 | * The above copyright notice and this permission notice shall be included in | ||
22 | * all copies or substantial portions of the Software. | ||
23 | * | ||
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
26 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
27 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
28 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
29 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
30 | * IN THE SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/wait.h> | ||
34 | #include <linux/interrupt.h> | ||
35 | #include <linux/sched.h> | ||
36 | #include <linux/err.h> | ||
37 | #include <xen/xenbus.h> | ||
38 | #include <asm/xen/hypervisor.h> | ||
39 | #include <xen/events.h> | ||
40 | #include <xen/page.h> | ||
41 | #include "xenbus_comms.h" | ||
42 | |||
43 | static int xenbus_irq; | ||
44 | |||
45 | static DECLARE_WORK(probe_work, xenbus_probe); | ||
46 | |||
47 | static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); | ||
48 | |||
49 | static irqreturn_t wake_waiting(int irq, void *unused) | ||
50 | { | ||
51 | if (unlikely(xenstored_ready == 0)) { | ||
52 | xenstored_ready = 1; | ||
53 | schedule_work(&probe_work); | ||
54 | } | ||
55 | |||
56 | wake_up(&xb_waitq); | ||
57 | return IRQ_HANDLED; | ||
58 | } | ||
59 | |||
60 | static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) | ||
61 | { | ||
62 | return ((prod - cons) <= XENSTORE_RING_SIZE); | ||
63 | } | ||
64 | |||
65 | static void *get_output_chunk(XENSTORE_RING_IDX cons, | ||
66 | XENSTORE_RING_IDX prod, | ||
67 | char *buf, uint32_t *len) | ||
68 | { | ||
69 | *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); | ||
70 | if ((XENSTORE_RING_SIZE - (prod - cons)) < *len) | ||
71 | *len = XENSTORE_RING_SIZE - (prod - cons); | ||
72 | return buf + MASK_XENSTORE_IDX(prod); | ||
73 | } | ||
74 | |||
75 | static const void *get_input_chunk(XENSTORE_RING_IDX cons, | ||
76 | XENSTORE_RING_IDX prod, | ||
77 | const char *buf, uint32_t *len) | ||
78 | { | ||
79 | *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); | ||
80 | if ((prod - cons) < *len) | ||
81 | *len = prod - cons; | ||
82 | return buf + MASK_XENSTORE_IDX(cons); | ||
83 | } | ||
84 | |||
85 | /** | ||
86 | * xb_write - low level write | ||
87 | * @data: buffer to send | ||
88 | * @len: length of buffer | ||
89 | * | ||
90 | * Returns 0 on success, error otherwise. | ||
91 | */ | ||
92 | int xb_write(const void *data, unsigned len) | ||
93 | { | ||
94 | struct xenstore_domain_interface *intf = xen_store_interface; | ||
95 | XENSTORE_RING_IDX cons, prod; | ||
96 | int rc; | ||
97 | |||
98 | while (len != 0) { | ||
99 | void *dst; | ||
100 | unsigned int avail; | ||
101 | |||
102 | rc = wait_event_interruptible( | ||
103 | xb_waitq, | ||
104 | (intf->req_prod - intf->req_cons) != | ||
105 | XENSTORE_RING_SIZE); | ||
106 | if (rc < 0) | ||
107 | return rc; | ||
108 | |||
109 | /* Read indexes, then verify. */ | ||
110 | cons = intf->req_cons; | ||
111 | prod = intf->req_prod; | ||
112 | if (!check_indexes(cons, prod)) { | ||
113 | intf->req_cons = intf->req_prod = 0; | ||
114 | return -EIO; | ||
115 | } | ||
116 | |||
117 | dst = get_output_chunk(cons, prod, intf->req, &avail); | ||
118 | if (avail == 0) | ||
119 | continue; | ||
120 | if (avail > len) | ||
121 | avail = len; | ||
122 | |||
123 | /* Must write data /after/ reading the consumer index. */ | ||
124 | mb(); | ||
125 | |||
126 | memcpy(dst, data, avail); | ||
127 | data += avail; | ||
128 | len -= avail; | ||
129 | |||
130 | /* Other side must not see new producer until data is there. */ | ||
131 | wmb(); | ||
132 | intf->req_prod += avail; | ||
133 | |||
134 | /* Implies mb(): other side will see the updated producer. */ | ||
135 | notify_remote_via_evtchn(xen_store_evtchn); | ||
136 | } | ||
137 | |||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | int xb_data_to_read(void) | ||
142 | { | ||
143 | struct xenstore_domain_interface *intf = xen_store_interface; | ||
144 | return (intf->rsp_cons != intf->rsp_prod); | ||
145 | } | ||
146 | |||
147 | int xb_wait_for_data_to_read(void) | ||
148 | { | ||
149 | return wait_event_interruptible(xb_waitq, xb_data_to_read()); | ||
150 | } | ||
151 | |||
152 | int xb_read(void *data, unsigned len) | ||
153 | { | ||
154 | struct xenstore_domain_interface *intf = xen_store_interface; | ||
155 | XENSTORE_RING_IDX cons, prod; | ||
156 | int rc; | ||
157 | |||
158 | while (len != 0) { | ||
159 | unsigned int avail; | ||
160 | const char *src; | ||
161 | |||
162 | rc = xb_wait_for_data_to_read(); | ||
163 | if (rc < 0) | ||
164 | return rc; | ||
165 | |||
166 | /* Read indexes, then verify. */ | ||
167 | cons = intf->rsp_cons; | ||
168 | prod = intf->rsp_prod; | ||
169 | if (!check_indexes(cons, prod)) { | ||
170 | intf->rsp_cons = intf->rsp_prod = 0; | ||
171 | return -EIO; | ||
172 | } | ||
173 | |||
174 | src = get_input_chunk(cons, prod, intf->rsp, &avail); | ||
175 | if (avail == 0) | ||
176 | continue; | ||
177 | if (avail > len) | ||
178 | avail = len; | ||
179 | |||
180 | /* Must read data /after/ reading the producer index. */ | ||
181 | rmb(); | ||
182 | |||
183 | memcpy(data, src, avail); | ||
184 | data += avail; | ||
185 | len -= avail; | ||
186 | |||
187 | /* Other side must not see free space until we've copied out */ | ||
188 | mb(); | ||
189 | intf->rsp_cons += avail; | ||
190 | |||
191 | pr_debug("Finished read of %i bytes (%i to go)\n", avail, len); | ||
192 | |||
193 | /* Implies mb(): other side will see the updated consumer. */ | ||
194 | notify_remote_via_evtchn(xen_store_evtchn); | ||
195 | } | ||
196 | |||
197 | return 0; | ||
198 | } | ||
199 | |||
200 | /** | ||
201 | * xb_init_comms - Set up interrupt handler off store event channel. | ||
202 | */ | ||
203 | int xb_init_comms(void) | ||
204 | { | ||
205 | struct xenstore_domain_interface *intf = xen_store_interface; | ||
206 | int err; | ||
207 | |||
208 | if (intf->req_prod != intf->req_cons) | ||
209 | printk(KERN_ERR "XENBUS request ring is not quiescent " | ||
210 | "(%08x:%08x)!\n", intf->req_cons, intf->req_prod); | ||
211 | |||
212 | if (intf->rsp_prod != intf->rsp_cons) { | ||
213 | printk(KERN_WARNING "XENBUS response ring is not quiescent " | ||
214 | "(%08x:%08x): fixing up\n", | ||
215 | intf->rsp_cons, intf->rsp_prod); | ||
216 | intf->rsp_cons = intf->rsp_prod; | ||
217 | } | ||
218 | |||
219 | if (xenbus_irq) | ||
220 | unbind_from_irqhandler(xenbus_irq, &xb_waitq); | ||
221 | |||
222 | err = bind_evtchn_to_irqhandler( | ||
223 | xen_store_evtchn, wake_waiting, | ||
224 | 0, "xenbus", &xb_waitq); | ||
225 | if (err <= 0) { | ||
226 | printk(KERN_ERR "XENBUS request irq failed %i\n", err); | ||
227 | return err; | ||
228 | } | ||
229 | |||
230 | xenbus_irq = err; | ||
231 | |||
232 | return 0; | ||
233 | } | ||
diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h new file mode 100644 index 000000000000..c21db7513736 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_comms.h | |||
@@ -0,0 +1,46 @@ | |||
1 | /* | ||
2 | * Private include for xenbus communications. | ||
3 | * | ||
4 | * Copyright (C) 2005 Rusty Russell, IBM Corporation | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License version 2 | ||
8 | * as published by the Free Software Foundation; or, when distributed | ||
9 | * separately from the Linux kernel or incorporated into other | ||
10 | * software packages, subject to the following license: | ||
11 | * | ||
12 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
13 | * of this source file (the "Software"), to deal in the Software without | ||
14 | * restriction, including without limitation the rights to use, copy, modify, | ||
15 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
16 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
17 | * the following conditions: | ||
18 | * | ||
19 | * The above copyright notice and this permission notice shall be included in | ||
20 | * all copies or substantial portions of the Software. | ||
21 | * | ||
22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
24 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
25 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
26 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
27 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
28 | * IN THE SOFTWARE. | ||
29 | */ | ||
30 | |||
31 | #ifndef _XENBUS_COMMS_H | ||
32 | #define _XENBUS_COMMS_H | ||
33 | |||
34 | int xs_init(void); | ||
35 | int xb_init_comms(void); | ||
36 | |||
37 | /* Low level routines. */ | ||
38 | int xb_write(const void *data, unsigned len); | ||
39 | int xb_read(void *data, unsigned len); | ||
40 | int xb_data_to_read(void); | ||
41 | int xb_wait_for_data_to_read(void); | ||
42 | int xs_input_avail(void); | ||
43 | extern struct xenstore_domain_interface *xen_store_interface; | ||
44 | extern int xen_store_evtchn; | ||
45 | |||
46 | #endif /* _XENBUS_COMMS_H */ | ||
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c new file mode 100644 index 000000000000..0b769f7c4a48 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe.c | |||
@@ -0,0 +1,935 @@ | |||
1 | /****************************************************************************** | ||
2 | * Talks to Xen Store to figure out what devices we have. | ||
3 | * | ||
4 | * Copyright (C) 2005 Rusty Russell, IBM Corporation | ||
5 | * Copyright (C) 2005 Mike Wray, Hewlett-Packard | ||
6 | * Copyright (C) 2005, 2006 XenSource Ltd | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License version 2 | ||
10 | * as published by the Free Software Foundation; or, when distributed | ||
11 | * separately from the Linux kernel or incorporated into other | ||
12 | * software packages, subject to the following license: | ||
13 | * | ||
14 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
15 | * of this source file (the "Software"), to deal in the Software without | ||
16 | * restriction, including without limitation the rights to use, copy, modify, | ||
17 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
18 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
19 | * the following conditions: | ||
20 | * | ||
21 | * The above copyright notice and this permission notice shall be included in | ||
22 | * all copies or substantial portions of the Software. | ||
23 | * | ||
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
26 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
27 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
28 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
29 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
30 | * IN THE SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #define DPRINTK(fmt, args...) \ | ||
34 | pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ | ||
35 | __func__, __LINE__, ##args) | ||
36 | |||
37 | #include <linux/kernel.h> | ||
38 | #include <linux/err.h> | ||
39 | #include <linux/string.h> | ||
40 | #include <linux/ctype.h> | ||
41 | #include <linux/fcntl.h> | ||
42 | #include <linux/mm.h> | ||
43 | #include <linux/notifier.h> | ||
44 | #include <linux/kthread.h> | ||
45 | #include <linux/mutex.h> | ||
46 | #include <linux/io.h> | ||
47 | |||
48 | #include <asm/page.h> | ||
49 | #include <asm/pgtable.h> | ||
50 | #include <asm/xen/hypervisor.h> | ||
51 | #include <xen/xenbus.h> | ||
52 | #include <xen/events.h> | ||
53 | #include <xen/page.h> | ||
54 | |||
55 | #include "xenbus_comms.h" | ||
56 | #include "xenbus_probe.h" | ||
57 | |||
58 | int xen_store_evtchn; | ||
59 | struct xenstore_domain_interface *xen_store_interface; | ||
60 | static unsigned long xen_store_mfn; | ||
61 | |||
62 | static BLOCKING_NOTIFIER_HEAD(xenstore_chain); | ||
63 | |||
64 | static void wait_for_devices(struct xenbus_driver *xendrv); | ||
65 | |||
66 | static int xenbus_probe_frontend(const char *type, const char *name); | ||
67 | |||
68 | static void xenbus_dev_shutdown(struct device *_dev); | ||
69 | |||
70 | /* If something in array of ids matches this device, return it. */ | ||
71 | static const struct xenbus_device_id * | ||
72 | match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) | ||
73 | { | ||
74 | for (; *arr->devicetype != '\0'; arr++) { | ||
75 | if (!strcmp(arr->devicetype, dev->devicetype)) | ||
76 | return arr; | ||
77 | } | ||
78 | return NULL; | ||
79 | } | ||
80 | |||
81 | int xenbus_match(struct device *_dev, struct device_driver *_drv) | ||
82 | { | ||
83 | struct xenbus_driver *drv = to_xenbus_driver(_drv); | ||
84 | |||
85 | if (!drv->ids) | ||
86 | return 0; | ||
87 | |||
88 | return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; | ||
89 | } | ||
90 | |||
91 | /* device/<type>/<id> => <type>-<id> */ | ||
92 | static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename) | ||
93 | { | ||
94 | nodename = strchr(nodename, '/'); | ||
95 | if (!nodename || strlen(nodename + 1) >= BUS_ID_SIZE) { | ||
96 | printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename); | ||
97 | return -EINVAL; | ||
98 | } | ||
99 | |||
100 | strlcpy(bus_id, nodename + 1, BUS_ID_SIZE); | ||
101 | if (!strchr(bus_id, '/')) { | ||
102 | printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id); | ||
103 | return -EINVAL; | ||
104 | } | ||
105 | *strchr(bus_id, '/') = '-'; | ||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | |||
110 | static void free_otherend_details(struct xenbus_device *dev) | ||
111 | { | ||
112 | kfree(dev->otherend); | ||
113 | dev->otherend = NULL; | ||
114 | } | ||
115 | |||
116 | |||
117 | static void free_otherend_watch(struct xenbus_device *dev) | ||
118 | { | ||
119 | if (dev->otherend_watch.node) { | ||
120 | unregister_xenbus_watch(&dev->otherend_watch); | ||
121 | kfree(dev->otherend_watch.node); | ||
122 | dev->otherend_watch.node = NULL; | ||
123 | } | ||
124 | } | ||
125 | |||
126 | |||
127 | int read_otherend_details(struct xenbus_device *xendev, | ||
128 | char *id_node, char *path_node) | ||
129 | { | ||
130 | int err = xenbus_gather(XBT_NIL, xendev->nodename, | ||
131 | id_node, "%i", &xendev->otherend_id, | ||
132 | path_node, NULL, &xendev->otherend, | ||
133 | NULL); | ||
134 | if (err) { | ||
135 | xenbus_dev_fatal(xendev, err, | ||
136 | "reading other end details from %s", | ||
137 | xendev->nodename); | ||
138 | return err; | ||
139 | } | ||
140 | if (strlen(xendev->otherend) == 0 || | ||
141 | !xenbus_exists(XBT_NIL, xendev->otherend, "")) { | ||
142 | xenbus_dev_fatal(xendev, -ENOENT, | ||
143 | "unable to read other end from %s. " | ||
144 | "missing or inaccessible.", | ||
145 | xendev->nodename); | ||
146 | free_otherend_details(xendev); | ||
147 | return -ENOENT; | ||
148 | } | ||
149 | |||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | |||
154 | static int read_backend_details(struct xenbus_device *xendev) | ||
155 | { | ||
156 | return read_otherend_details(xendev, "backend-id", "backend"); | ||
157 | } | ||
158 | |||
159 | |||
160 | /* Bus type for frontend drivers. */ | ||
161 | static struct xen_bus_type xenbus_frontend = { | ||
162 | .root = "device", | ||
163 | .levels = 2, /* device/type/<id> */ | ||
164 | .get_bus_id = frontend_bus_id, | ||
165 | .probe = xenbus_probe_frontend, | ||
166 | .bus = { | ||
167 | .name = "xen", | ||
168 | .match = xenbus_match, | ||
169 | .probe = xenbus_dev_probe, | ||
170 | .remove = xenbus_dev_remove, | ||
171 | .shutdown = xenbus_dev_shutdown, | ||
172 | }, | ||
173 | }; | ||
174 | |||
175 | static void otherend_changed(struct xenbus_watch *watch, | ||
176 | const char **vec, unsigned int len) | ||
177 | { | ||
178 | struct xenbus_device *dev = | ||
179 | container_of(watch, struct xenbus_device, otherend_watch); | ||
180 | struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); | ||
181 | enum xenbus_state state; | ||
182 | |||
183 | /* Protect us against watches firing on old details when the otherend | ||
184 | details change, say immediately after a resume. */ | ||
185 | if (!dev->otherend || | ||
186 | strncmp(dev->otherend, vec[XS_WATCH_PATH], | ||
187 | strlen(dev->otherend))) { | ||
188 | dev_dbg(&dev->dev, "Ignoring watch at %s", vec[XS_WATCH_PATH]); | ||
189 | return; | ||
190 | } | ||
191 | |||
192 | state = xenbus_read_driver_state(dev->otherend); | ||
193 | |||
194 | dev_dbg(&dev->dev, "state is %d, (%s), %s, %s", | ||
195 | state, xenbus_strstate(state), dev->otherend_watch.node, | ||
196 | vec[XS_WATCH_PATH]); | ||
197 | |||
198 | /* | ||
199 | * Ignore xenbus transitions during shutdown. This prevents us doing | ||
200 | * work that can fail e.g., when the rootfs is gone. | ||
201 | */ | ||
202 | if (system_state > SYSTEM_RUNNING) { | ||
203 | struct xen_bus_type *bus = bus; | ||
204 | bus = container_of(dev->dev.bus, struct xen_bus_type, bus); | ||
205 | /* If we're frontend, drive the state machine to Closed. */ | ||
206 | /* This should cause the backend to release our resources. */ | ||
207 | if ((bus == &xenbus_frontend) && (state == XenbusStateClosing)) | ||
208 | xenbus_frontend_closed(dev); | ||
209 | return; | ||
210 | } | ||
211 | |||
212 | if (drv->otherend_changed) | ||
213 | drv->otherend_changed(dev, state); | ||
214 | } | ||
215 | |||
216 | |||
217 | static int talk_to_otherend(struct xenbus_device *dev) | ||
218 | { | ||
219 | struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); | ||
220 | |||
221 | free_otherend_watch(dev); | ||
222 | free_otherend_details(dev); | ||
223 | |||
224 | return drv->read_otherend_details(dev); | ||
225 | } | ||
226 | |||
227 | |||
228 | static int watch_otherend(struct xenbus_device *dev) | ||
229 | { | ||
230 | return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed, | ||
231 | "%s/%s", dev->otherend, "state"); | ||
232 | } | ||
233 | |||
234 | |||
235 | int xenbus_dev_probe(struct device *_dev) | ||
236 | { | ||
237 | struct xenbus_device *dev = to_xenbus_device(_dev); | ||
238 | struct xenbus_driver *drv = to_xenbus_driver(_dev->driver); | ||
239 | const struct xenbus_device_id *id; | ||
240 | int err; | ||
241 | |||
242 | DPRINTK("%s", dev->nodename); | ||
243 | |||
244 | if (!drv->probe) { | ||
245 | err = -ENODEV; | ||
246 | goto fail; | ||
247 | } | ||
248 | |||
249 | id = match_device(drv->ids, dev); | ||
250 | if (!id) { | ||
251 | err = -ENODEV; | ||
252 | goto fail; | ||
253 | } | ||
254 | |||
255 | err = talk_to_otherend(dev); | ||
256 | if (err) { | ||
257 | dev_warn(&dev->dev, "talk_to_otherend on %s failed.\n", | ||
258 | dev->nodename); | ||
259 | return err; | ||
260 | } | ||
261 | |||
262 | err = drv->probe(dev, id); | ||
263 | if (err) | ||
264 | goto fail; | ||
265 | |||
266 | err = watch_otherend(dev); | ||
267 | if (err) { | ||
268 | dev_warn(&dev->dev, "watch_otherend on %s failed.\n", | ||
269 | dev->nodename); | ||
270 | return err; | ||
271 | } | ||
272 | |||
273 | return 0; | ||
274 | fail: | ||
275 | xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename); | ||
276 | xenbus_switch_state(dev, XenbusStateClosed); | ||
277 | return -ENODEV; | ||
278 | } | ||
279 | |||
280 | int xenbus_dev_remove(struct device *_dev) | ||
281 | { | ||
282 | struct xenbus_device *dev = to_xenbus_device(_dev); | ||
283 | struct xenbus_driver *drv = to_xenbus_driver(_dev->driver); | ||
284 | |||
285 | DPRINTK("%s", dev->nodename); | ||
286 | |||
287 | free_otherend_watch(dev); | ||
288 | free_otherend_details(dev); | ||
289 | |||
290 | if (drv->remove) | ||
291 | drv->remove(dev); | ||
292 | |||
293 | xenbus_switch_state(dev, XenbusStateClosed); | ||
294 | return 0; | ||
295 | } | ||
296 | |||
297 | static void xenbus_dev_shutdown(struct device *_dev) | ||
298 | { | ||
299 | struct xenbus_device *dev = to_xenbus_device(_dev); | ||
300 | unsigned long timeout = 5*HZ; | ||
301 | |||
302 | DPRINTK("%s", dev->nodename); | ||
303 | |||
304 | get_device(&dev->dev); | ||
305 | if (dev->state != XenbusStateConnected) { | ||
306 | printk(KERN_INFO "%s: %s: %s != Connected, skipping\n", __func__, | ||
307 | dev->nodename, xenbus_strstate(dev->state)); | ||
308 | goto out; | ||
309 | } | ||
310 | xenbus_switch_state(dev, XenbusStateClosing); | ||
311 | timeout = wait_for_completion_timeout(&dev->down, timeout); | ||
312 | if (!timeout) | ||
313 | printk(KERN_INFO "%s: %s timeout closing device\n", | ||
314 | __func__, dev->nodename); | ||
315 | out: | ||
316 | put_device(&dev->dev); | ||
317 | } | ||
318 | |||
319 | int xenbus_register_driver_common(struct xenbus_driver *drv, | ||
320 | struct xen_bus_type *bus, | ||
321 | struct module *owner, | ||
322 | const char *mod_name) | ||
323 | { | ||
324 | drv->driver.name = drv->name; | ||
325 | drv->driver.bus = &bus->bus; | ||
326 | drv->driver.owner = owner; | ||
327 | drv->driver.mod_name = mod_name; | ||
328 | |||
329 | return driver_register(&drv->driver); | ||
330 | } | ||
331 | |||
332 | int __xenbus_register_frontend(struct xenbus_driver *drv, | ||
333 | struct module *owner, const char *mod_name) | ||
334 | { | ||
335 | int ret; | ||
336 | |||
337 | drv->read_otherend_details = read_backend_details; | ||
338 | |||
339 | ret = xenbus_register_driver_common(drv, &xenbus_frontend, | ||
340 | owner, mod_name); | ||
341 | if (ret) | ||
342 | return ret; | ||
343 | |||
344 | /* If this driver is loaded as a module wait for devices to attach. */ | ||
345 | wait_for_devices(drv); | ||
346 | |||
347 | return 0; | ||
348 | } | ||
349 | EXPORT_SYMBOL_GPL(__xenbus_register_frontend); | ||
350 | |||
351 | void xenbus_unregister_driver(struct xenbus_driver *drv) | ||
352 | { | ||
353 | driver_unregister(&drv->driver); | ||
354 | } | ||
355 | EXPORT_SYMBOL_GPL(xenbus_unregister_driver); | ||
356 | |||
357 | struct xb_find_info | ||
358 | { | ||
359 | struct xenbus_device *dev; | ||
360 | const char *nodename; | ||
361 | }; | ||
362 | |||
363 | static int cmp_dev(struct device *dev, void *data) | ||
364 | { | ||
365 | struct xenbus_device *xendev = to_xenbus_device(dev); | ||
366 | struct xb_find_info *info = data; | ||
367 | |||
368 | if (!strcmp(xendev->nodename, info->nodename)) { | ||
369 | info->dev = xendev; | ||
370 | get_device(dev); | ||
371 | return 1; | ||
372 | } | ||
373 | return 0; | ||
374 | } | ||
375 | |||
376 | struct xenbus_device *xenbus_device_find(const char *nodename, | ||
377 | struct bus_type *bus) | ||
378 | { | ||
379 | struct xb_find_info info = { .dev = NULL, .nodename = nodename }; | ||
380 | |||
381 | bus_for_each_dev(bus, NULL, &info, cmp_dev); | ||
382 | return info.dev; | ||
383 | } | ||
384 | |||
385 | static int cleanup_dev(struct device *dev, void *data) | ||
386 | { | ||
387 | struct xenbus_device *xendev = to_xenbus_device(dev); | ||
388 | struct xb_find_info *info = data; | ||
389 | int len = strlen(info->nodename); | ||
390 | |||
391 | DPRINTK("%s", info->nodename); | ||
392 | |||
393 | /* Match the info->nodename path, or any subdirectory of that path. */ | ||
394 | if (strncmp(xendev->nodename, info->nodename, len)) | ||
395 | return 0; | ||
396 | |||
397 | /* If the node name is longer, ensure it really is a subdirectory. */ | ||
398 | if ((strlen(xendev->nodename) > len) && (xendev->nodename[len] != '/')) | ||
399 | return 0; | ||
400 | |||
401 | info->dev = xendev; | ||
402 | get_device(dev); | ||
403 | return 1; | ||
404 | } | ||
405 | |||
406 | static void xenbus_cleanup_devices(const char *path, struct bus_type *bus) | ||
407 | { | ||
408 | struct xb_find_info info = { .nodename = path }; | ||
409 | |||
410 | do { | ||
411 | info.dev = NULL; | ||
412 | bus_for_each_dev(bus, NULL, &info, cleanup_dev); | ||
413 | if (info.dev) { | ||
414 | device_unregister(&info.dev->dev); | ||
415 | put_device(&info.dev->dev); | ||
416 | } | ||
417 | } while (info.dev); | ||
418 | } | ||
419 | |||
420 | static void xenbus_dev_release(struct device *dev) | ||
421 | { | ||
422 | if (dev) | ||
423 | kfree(to_xenbus_device(dev)); | ||
424 | } | ||
425 | |||
426 | static ssize_t xendev_show_nodename(struct device *dev, | ||
427 | struct device_attribute *attr, char *buf) | ||
428 | { | ||
429 | return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); | ||
430 | } | ||
431 | DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); | ||
432 | |||
433 | static ssize_t xendev_show_devtype(struct device *dev, | ||
434 | struct device_attribute *attr, char *buf) | ||
435 | { | ||
436 | return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); | ||
437 | } | ||
438 | DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); | ||
439 | |||
440 | |||
441 | int xenbus_probe_node(struct xen_bus_type *bus, | ||
442 | const char *type, | ||
443 | const char *nodename) | ||
444 | { | ||
445 | int err; | ||
446 | struct xenbus_device *xendev; | ||
447 | size_t stringlen; | ||
448 | char *tmpstring; | ||
449 | |||
450 | enum xenbus_state state = xenbus_read_driver_state(nodename); | ||
451 | |||
452 | if (state != XenbusStateInitialising) { | ||
453 | /* Device is not new, so ignore it. This can happen if a | ||
454 | device is going away after switching to Closed. */ | ||
455 | return 0; | ||
456 | } | ||
457 | |||
458 | stringlen = strlen(nodename) + 1 + strlen(type) + 1; | ||
459 | xendev = kzalloc(sizeof(*xendev) + stringlen, GFP_KERNEL); | ||
460 | if (!xendev) | ||
461 | return -ENOMEM; | ||
462 | |||
463 | xendev->state = XenbusStateInitialising; | ||
464 | |||
465 | /* Copy the strings into the extra space. */ | ||
466 | |||
467 | tmpstring = (char *)(xendev + 1); | ||
468 | strcpy(tmpstring, nodename); | ||
469 | xendev->nodename = tmpstring; | ||
470 | |||
471 | tmpstring += strlen(tmpstring) + 1; | ||
472 | strcpy(tmpstring, type); | ||
473 | xendev->devicetype = tmpstring; | ||
474 | init_completion(&xendev->down); | ||
475 | |||
476 | xendev->dev.bus = &bus->bus; | ||
477 | xendev->dev.release = xenbus_dev_release; | ||
478 | |||
479 | err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename); | ||
480 | if (err) | ||
481 | goto fail; | ||
482 | |||
483 | /* Register with generic device framework. */ | ||
484 | err = device_register(&xendev->dev); | ||
485 | if (err) | ||
486 | goto fail; | ||
487 | |||
488 | err = device_create_file(&xendev->dev, &dev_attr_nodename); | ||
489 | if (err) | ||
490 | goto fail_unregister; | ||
491 | |||
492 | err = device_create_file(&xendev->dev, &dev_attr_devtype); | ||
493 | if (err) | ||
494 | goto fail_remove_file; | ||
495 | |||
496 | return 0; | ||
497 | fail_remove_file: | ||
498 | device_remove_file(&xendev->dev, &dev_attr_nodename); | ||
499 | fail_unregister: | ||
500 | device_unregister(&xendev->dev); | ||
501 | fail: | ||
502 | kfree(xendev); | ||
503 | return err; | ||
504 | } | ||
505 | |||
506 | /* device/<typename>/<name> */ | ||
507 | static int xenbus_probe_frontend(const char *type, const char *name) | ||
508 | { | ||
509 | char *nodename; | ||
510 | int err; | ||
511 | |||
512 | nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", | ||
513 | xenbus_frontend.root, type, name); | ||
514 | if (!nodename) | ||
515 | return -ENOMEM; | ||
516 | |||
517 | DPRINTK("%s", nodename); | ||
518 | |||
519 | err = xenbus_probe_node(&xenbus_frontend, type, nodename); | ||
520 | kfree(nodename); | ||
521 | return err; | ||
522 | } | ||
523 | |||
524 | static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) | ||
525 | { | ||
526 | int err = 0; | ||
527 | char **dir; | ||
528 | unsigned int dir_n = 0; | ||
529 | int i; | ||
530 | |||
531 | dir = xenbus_directory(XBT_NIL, bus->root, type, &dir_n); | ||
532 | if (IS_ERR(dir)) | ||
533 | return PTR_ERR(dir); | ||
534 | |||
535 | for (i = 0; i < dir_n; i++) { | ||
536 | err = bus->probe(type, dir[i]); | ||
537 | if (err) | ||
538 | break; | ||
539 | } | ||
540 | kfree(dir); | ||
541 | return err; | ||
542 | } | ||
543 | |||
544 | int xenbus_probe_devices(struct xen_bus_type *bus) | ||
545 | { | ||
546 | int err = 0; | ||
547 | char **dir; | ||
548 | unsigned int i, dir_n; | ||
549 | |||
550 | dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n); | ||
551 | if (IS_ERR(dir)) | ||
552 | return PTR_ERR(dir); | ||
553 | |||
554 | for (i = 0; i < dir_n; i++) { | ||
555 | err = xenbus_probe_device_type(bus, dir[i]); | ||
556 | if (err) | ||
557 | break; | ||
558 | } | ||
559 | kfree(dir); | ||
560 | return err; | ||
561 | } | ||
562 | |||
563 | static unsigned int char_count(const char *str, char c) | ||
564 | { | ||
565 | unsigned int i, ret = 0; | ||
566 | |||
567 | for (i = 0; str[i]; i++) | ||
568 | if (str[i] == c) | ||
569 | ret++; | ||
570 | return ret; | ||
571 | } | ||
572 | |||
573 | static int strsep_len(const char *str, char c, unsigned int len) | ||
574 | { | ||
575 | unsigned int i; | ||
576 | |||
577 | for (i = 0; str[i]; i++) | ||
578 | if (str[i] == c) { | ||
579 | if (len == 0) | ||
580 | return i; | ||
581 | len--; | ||
582 | } | ||
583 | return (len == 0) ? i : -ERANGE; | ||
584 | } | ||
585 | |||
586 | void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) | ||
587 | { | ||
588 | int exists, rootlen; | ||
589 | struct xenbus_device *dev; | ||
590 | char type[BUS_ID_SIZE]; | ||
591 | const char *p, *root; | ||
592 | |||
593 | if (char_count(node, '/') < 2) | ||
594 | return; | ||
595 | |||
596 | exists = xenbus_exists(XBT_NIL, node, ""); | ||
597 | if (!exists) { | ||
598 | xenbus_cleanup_devices(node, &bus->bus); | ||
599 | return; | ||
600 | } | ||
601 | |||
602 | /* backend/<type>/... or device/<type>/... */ | ||
603 | p = strchr(node, '/') + 1; | ||
604 | snprintf(type, BUS_ID_SIZE, "%.*s", (int)strcspn(p, "/"), p); | ||
605 | type[BUS_ID_SIZE-1] = '\0'; | ||
606 | |||
607 | rootlen = strsep_len(node, '/', bus->levels); | ||
608 | if (rootlen < 0) | ||
609 | return; | ||
610 | root = kasprintf(GFP_KERNEL, "%.*s", rootlen, node); | ||
611 | if (!root) | ||
612 | return; | ||
613 | |||
614 | dev = xenbus_device_find(root, &bus->bus); | ||
615 | if (!dev) | ||
616 | xenbus_probe_node(bus, type, root); | ||
617 | else | ||
618 | put_device(&dev->dev); | ||
619 | |||
620 | kfree(root); | ||
621 | } | ||
622 | |||
623 | static void frontend_changed(struct xenbus_watch *watch, | ||
624 | const char **vec, unsigned int len) | ||
625 | { | ||
626 | DPRINTK(""); | ||
627 | |||
628 | xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); | ||
629 | } | ||
630 | |||
631 | /* We watch for devices appearing and vanishing. */ | ||
632 | static struct xenbus_watch fe_watch = { | ||
633 | .node = "device", | ||
634 | .callback = frontend_changed, | ||
635 | }; | ||
636 | |||
637 | static int suspend_dev(struct device *dev, void *data) | ||
638 | { | ||
639 | int err = 0; | ||
640 | struct xenbus_driver *drv; | ||
641 | struct xenbus_device *xdev; | ||
642 | |||
643 | DPRINTK(""); | ||
644 | |||
645 | if (dev->driver == NULL) | ||
646 | return 0; | ||
647 | drv = to_xenbus_driver(dev->driver); | ||
648 | xdev = container_of(dev, struct xenbus_device, dev); | ||
649 | if (drv->suspend) | ||
650 | err = drv->suspend(xdev); | ||
651 | if (err) | ||
652 | printk(KERN_WARNING | ||
653 | "xenbus: suspend %s failed: %i\n", dev->bus_id, err); | ||
654 | return 0; | ||
655 | } | ||
656 | |||
657 | static int suspend_cancel_dev(struct device *dev, void *data) | ||
658 | { | ||
659 | int err = 0; | ||
660 | struct xenbus_driver *drv; | ||
661 | struct xenbus_device *xdev; | ||
662 | |||
663 | DPRINTK(""); | ||
664 | |||
665 | if (dev->driver == NULL) | ||
666 | return 0; | ||
667 | drv = to_xenbus_driver(dev->driver); | ||
668 | xdev = container_of(dev, struct xenbus_device, dev); | ||
669 | if (drv->suspend_cancel) | ||
670 | err = drv->suspend_cancel(xdev); | ||
671 | if (err) | ||
672 | printk(KERN_WARNING | ||
673 | "xenbus: suspend_cancel %s failed: %i\n", | ||
674 | dev->bus_id, err); | ||
675 | return 0; | ||
676 | } | ||
677 | |||
678 | static int resume_dev(struct device *dev, void *data) | ||
679 | { | ||
680 | int err; | ||
681 | struct xenbus_driver *drv; | ||
682 | struct xenbus_device *xdev; | ||
683 | |||
684 | DPRINTK(""); | ||
685 | |||
686 | if (dev->driver == NULL) | ||
687 | return 0; | ||
688 | |||
689 | drv = to_xenbus_driver(dev->driver); | ||
690 | xdev = container_of(dev, struct xenbus_device, dev); | ||
691 | |||
692 | err = talk_to_otherend(xdev); | ||
693 | if (err) { | ||
694 | printk(KERN_WARNING | ||
695 | "xenbus: resume (talk_to_otherend) %s failed: %i\n", | ||
696 | dev->bus_id, err); | ||
697 | return err; | ||
698 | } | ||
699 | |||
700 | xdev->state = XenbusStateInitialising; | ||
701 | |||
702 | if (drv->resume) { | ||
703 | err = drv->resume(xdev); | ||
704 | if (err) { | ||
705 | printk(KERN_WARNING | ||
706 | "xenbus: resume %s failed: %i\n", | ||
707 | dev->bus_id, err); | ||
708 | return err; | ||
709 | } | ||
710 | } | ||
711 | |||
712 | err = watch_otherend(xdev); | ||
713 | if (err) { | ||
714 | printk(KERN_WARNING | ||
715 | "xenbus_probe: resume (watch_otherend) %s failed: " | ||
716 | "%d.\n", dev->bus_id, err); | ||
717 | return err; | ||
718 | } | ||
719 | |||
720 | return 0; | ||
721 | } | ||
722 | |||
723 | void xenbus_suspend(void) | ||
724 | { | ||
725 | DPRINTK(""); | ||
726 | |||
727 | bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev); | ||
728 | xenbus_backend_suspend(suspend_dev); | ||
729 | xs_suspend(); | ||
730 | } | ||
731 | EXPORT_SYMBOL_GPL(xenbus_suspend); | ||
732 | |||
733 | void xenbus_resume(void) | ||
734 | { | ||
735 | xb_init_comms(); | ||
736 | xs_resume(); | ||
737 | bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev); | ||
738 | xenbus_backend_resume(resume_dev); | ||
739 | } | ||
740 | EXPORT_SYMBOL_GPL(xenbus_resume); | ||
741 | |||
742 | void xenbus_suspend_cancel(void) | ||
743 | { | ||
744 | xs_suspend_cancel(); | ||
745 | bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev); | ||
746 | xenbus_backend_resume(suspend_cancel_dev); | ||
747 | } | ||
748 | EXPORT_SYMBOL_GPL(xenbus_suspend_cancel); | ||
749 | |||
750 | /* A flag to determine if xenstored is 'ready' (i.e. has started) */ | ||
751 | int xenstored_ready = 0; | ||
752 | |||
753 | |||
754 | int register_xenstore_notifier(struct notifier_block *nb) | ||
755 | { | ||
756 | int ret = 0; | ||
757 | |||
758 | if (xenstored_ready > 0) | ||
759 | ret = nb->notifier_call(nb, 0, NULL); | ||
760 | else | ||
761 | blocking_notifier_chain_register(&xenstore_chain, nb); | ||
762 | |||
763 | return ret; | ||
764 | } | ||
765 | EXPORT_SYMBOL_GPL(register_xenstore_notifier); | ||
766 | |||
767 | void unregister_xenstore_notifier(struct notifier_block *nb) | ||
768 | { | ||
769 | blocking_notifier_chain_unregister(&xenstore_chain, nb); | ||
770 | } | ||
771 | EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); | ||
772 | |||
773 | void xenbus_probe(struct work_struct *unused) | ||
774 | { | ||
775 | BUG_ON((xenstored_ready <= 0)); | ||
776 | |||
777 | /* Enumerate devices in xenstore and watch for changes. */ | ||
778 | xenbus_probe_devices(&xenbus_frontend); | ||
779 | register_xenbus_watch(&fe_watch); | ||
780 | xenbus_backend_probe_and_watch(); | ||
781 | |||
782 | /* Notify others that xenstore is up */ | ||
783 | blocking_notifier_call_chain(&xenstore_chain, 0, NULL); | ||
784 | } | ||
785 | |||
786 | static int __init xenbus_probe_init(void) | ||
787 | { | ||
788 | int err = 0; | ||
789 | |||
790 | DPRINTK(""); | ||
791 | |||
792 | err = -ENODEV; | ||
793 | if (!is_running_on_xen()) | ||
794 | goto out_error; | ||
795 | |||
796 | /* Register ourselves with the kernel bus subsystem */ | ||
797 | err = bus_register(&xenbus_frontend.bus); | ||
798 | if (err) | ||
799 | goto out_error; | ||
800 | |||
801 | err = xenbus_backend_bus_register(); | ||
802 | if (err) | ||
803 | goto out_unreg_front; | ||
804 | |||
805 | /* | ||
806 | * Domain0 doesn't have a store_evtchn or store_mfn yet. | ||
807 | */ | ||
808 | if (is_initial_xendomain()) { | ||
809 | /* dom0 not yet supported */ | ||
810 | } else { | ||
811 | xenstored_ready = 1; | ||
812 | xen_store_evtchn = xen_start_info->store_evtchn; | ||
813 | xen_store_mfn = xen_start_info->store_mfn; | ||
814 | } | ||
815 | xen_store_interface = mfn_to_virt(xen_store_mfn); | ||
816 | |||
817 | /* Initialize the interface to xenstore. */ | ||
818 | err = xs_init(); | ||
819 | if (err) { | ||
820 | printk(KERN_WARNING | ||
821 | "XENBUS: Error initializing xenstore comms: %i\n", err); | ||
822 | goto out_unreg_back; | ||
823 | } | ||
824 | |||
825 | if (!is_initial_xendomain()) | ||
826 | xenbus_probe(NULL); | ||
827 | |||
828 | return 0; | ||
829 | |||
830 | out_unreg_back: | ||
831 | xenbus_backend_bus_unregister(); | ||
832 | |||
833 | out_unreg_front: | ||
834 | bus_unregister(&xenbus_frontend.bus); | ||
835 | |||
836 | out_error: | ||
837 | return err; | ||
838 | } | ||
839 | |||
840 | postcore_initcall(xenbus_probe_init); | ||
841 | |||
842 | MODULE_LICENSE("GPL"); | ||
843 | |||
844 | static int is_disconnected_device(struct device *dev, void *data) | ||
845 | { | ||
846 | struct xenbus_device *xendev = to_xenbus_device(dev); | ||
847 | struct device_driver *drv = data; | ||
848 | |||
849 | /* | ||
850 | * A device with no driver will never connect. We care only about | ||
851 | * devices which should currently be in the process of connecting. | ||
852 | */ | ||
853 | if (!dev->driver) | ||
854 | return 0; | ||
855 | |||
856 | /* Is this search limited to a particular driver? */ | ||
857 | if (drv && (dev->driver != drv)) | ||
858 | return 0; | ||
859 | |||
860 | return (xendev->state != XenbusStateConnected); | ||
861 | } | ||
862 | |||
863 | static int exists_disconnected_device(struct device_driver *drv) | ||
864 | { | ||
865 | return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, | ||
866 | is_disconnected_device); | ||
867 | } | ||
868 | |||
869 | static int print_device_status(struct device *dev, void *data) | ||
870 | { | ||
871 | struct xenbus_device *xendev = to_xenbus_device(dev); | ||
872 | struct device_driver *drv = data; | ||
873 | |||
874 | /* Is this operation limited to a particular driver? */ | ||
875 | if (drv && (dev->driver != drv)) | ||
876 | return 0; | ||
877 | |||
878 | if (!dev->driver) { | ||
879 | /* Information only: is this too noisy? */ | ||
880 | printk(KERN_INFO "XENBUS: Device with no driver: %s\n", | ||
881 | xendev->nodename); | ||
882 | } else if (xendev->state != XenbusStateConnected) { | ||
883 | printk(KERN_WARNING "XENBUS: Timeout connecting " | ||
884 | "to device: %s (state %d)\n", | ||
885 | xendev->nodename, xendev->state); | ||
886 | } | ||
887 | |||
888 | return 0; | ||
889 | } | ||
890 | |||
891 | /* We only wait for device setup after most initcalls have run. */ | ||
892 | static int ready_to_wait_for_devices; | ||
893 | |||
894 | /* | ||
895 | * On a 10 second timeout, wait for all devices currently configured. We need | ||
896 | * to do this to guarantee that the filesystems and / or network devices | ||
897 | * needed for boot are available, before we can allow the boot to proceed. | ||
898 | * | ||
899 | * This needs to be on a late_initcall, to happen after the frontend device | ||
900 | * drivers have been initialised, but before the root fs is mounted. | ||
901 | * | ||
902 | * A possible improvement here would be to have the tools add a per-device | ||
903 | * flag to the store entry, indicating whether it is needed at boot time. | ||
904 | * This would allow people who knew what they were doing to accelerate their | ||
905 | * boot slightly, but of course needs tools or manual intervention to set up | ||
906 | * those flags correctly. | ||
907 | */ | ||
908 | static void wait_for_devices(struct xenbus_driver *xendrv) | ||
909 | { | ||
910 | unsigned long timeout = jiffies + 10*HZ; | ||
911 | struct device_driver *drv = xendrv ? &xendrv->driver : NULL; | ||
912 | |||
913 | if (!ready_to_wait_for_devices || !is_running_on_xen()) | ||
914 | return; | ||
915 | |||
916 | while (exists_disconnected_device(drv)) { | ||
917 | if (time_after(jiffies, timeout)) | ||
918 | break; | ||
919 | schedule_timeout_interruptible(HZ/10); | ||
920 | } | ||
921 | |||
922 | bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, | ||
923 | print_device_status); | ||
924 | } | ||
925 | |||
926 | #ifndef MODULE | ||
927 | static int __init boot_wait_for_devices(void) | ||
928 | { | ||
929 | ready_to_wait_for_devices = 1; | ||
930 | wait_for_devices(NULL); | ||
931 | return 0; | ||
932 | } | ||
933 | |||
934 | late_initcall(boot_wait_for_devices); | ||
935 | #endif | ||
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h new file mode 100644 index 000000000000..e09b19415a40 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe.h | |||
@@ -0,0 +1,74 @@ | |||
1 | /****************************************************************************** | ||
2 | * xenbus_probe.h | ||
3 | * | ||
4 | * Talks to Xen Store to figure out what devices we have. | ||
5 | * | ||
6 | * Copyright (C) 2005 Rusty Russell, IBM Corporation | ||
7 | * Copyright (C) 2005 XenSource Ltd. | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License version 2 | ||
11 | * as published by the Free Software Foundation; or, when distributed | ||
12 | * separately from the Linux kernel or incorporated into other | ||
13 | * software packages, subject to the following license: | ||
14 | * | ||
15 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
16 | * of this source file (the "Software"), to deal in the Software without | ||
17 | * restriction, including without limitation the rights to use, copy, modify, | ||
18 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
19 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
20 | * the following conditions: | ||
21 | * | ||
22 | * The above copyright notice and this permission notice shall be included in | ||
23 | * all copies or substantial portions of the Software. | ||
24 | * | ||
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
26 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
27 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
28 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
29 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
31 | * IN THE SOFTWARE. | ||
32 | */ | ||
33 | |||
34 | #ifndef _XENBUS_PROBE_H | ||
35 | #define _XENBUS_PROBE_H | ||
36 | |||
37 | #ifdef CONFIG_XEN_BACKEND | ||
38 | extern void xenbus_backend_suspend(int (*fn)(struct device *, void *)); | ||
39 | extern void xenbus_backend_resume(int (*fn)(struct device *, void *)); | ||
40 | extern void xenbus_backend_probe_and_watch(void); | ||
41 | extern int xenbus_backend_bus_register(void); | ||
42 | extern void xenbus_backend_bus_unregister(void); | ||
43 | #else | ||
44 | static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {} | ||
45 | static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {} | ||
46 | static inline void xenbus_backend_probe_and_watch(void) {} | ||
47 | static inline int xenbus_backend_bus_register(void) { return 0; } | ||
48 | static inline void xenbus_backend_bus_unregister(void) {} | ||
49 | #endif | ||
50 | |||
51 | struct xen_bus_type | ||
52 | { | ||
53 | char *root; | ||
54 | unsigned int levels; | ||
55 | int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename); | ||
56 | int (*probe)(const char *type, const char *dir); | ||
57 | struct bus_type bus; | ||
58 | }; | ||
59 | |||
60 | extern int xenbus_match(struct device *_dev, struct device_driver *_drv); | ||
61 | extern int xenbus_dev_probe(struct device *_dev); | ||
62 | extern int xenbus_dev_remove(struct device *_dev); | ||
63 | extern int xenbus_register_driver_common(struct xenbus_driver *drv, | ||
64 | struct xen_bus_type *bus, | ||
65 | struct module *owner, | ||
66 | const char *mod_name); | ||
67 | extern int xenbus_probe_node(struct xen_bus_type *bus, | ||
68 | const char *type, | ||
69 | const char *nodename); | ||
70 | extern int xenbus_probe_devices(struct xen_bus_type *bus); | ||
71 | |||
72 | extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); | ||
73 | |||
74 | #endif | ||
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c new file mode 100644 index 000000000000..9e943fbce81b --- /dev/null +++ b/drivers/xen/xenbus/xenbus_xs.c | |||
@@ -0,0 +1,861 @@ | |||
1 | /****************************************************************************** | ||
2 | * xenbus_xs.c | ||
3 | * | ||
4 | * This is the kernel equivalent of the "xs" library. We don't need everything | ||
5 | * and we use xenbus_comms for communication. | ||
6 | * | ||
7 | * Copyright (C) 2005 Rusty Russell, IBM Corporation | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License version 2 | ||
11 | * as published by the Free Software Foundation; or, when distributed | ||
12 | * separately from the Linux kernel or incorporated into other | ||
13 | * software packages, subject to the following license: | ||
14 | * | ||
15 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
16 | * of this source file (the "Software"), to deal in the Software without | ||
17 | * restriction, including without limitation the rights to use, copy, modify, | ||
18 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | ||
19 | * and to permit persons to whom the Software is furnished to do so, subject to | ||
20 | * the following conditions: | ||
21 | * | ||
22 | * The above copyright notice and this permission notice shall be included in | ||
23 | * all copies or substantial portions of the Software. | ||
24 | * | ||
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
26 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
27 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
28 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
29 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
31 | * IN THE SOFTWARE. | ||
32 | */ | ||
33 | |||
34 | #include <linux/unistd.h> | ||
35 | #include <linux/errno.h> | ||
36 | #include <linux/types.h> | ||
37 | #include <linux/uio.h> | ||
38 | #include <linux/kernel.h> | ||
39 | #include <linux/string.h> | ||
40 | #include <linux/err.h> | ||
41 | #include <linux/slab.h> | ||
42 | #include <linux/fcntl.h> | ||
43 | #include <linux/kthread.h> | ||
44 | #include <linux/rwsem.h> | ||
45 | #include <linux/module.h> | ||
46 | #include <linux/mutex.h> | ||
47 | #include <xen/xenbus.h> | ||
48 | #include "xenbus_comms.h" | ||
49 | |||
50 | struct xs_stored_msg { | ||
51 | struct list_head list; | ||
52 | |||
53 | struct xsd_sockmsg hdr; | ||
54 | |||
55 | union { | ||
56 | /* Queued replies. */ | ||
57 | struct { | ||
58 | char *body; | ||
59 | } reply; | ||
60 | |||
61 | /* Queued watch events. */ | ||
62 | struct { | ||
63 | struct xenbus_watch *handle; | ||
64 | char **vec; | ||
65 | unsigned int vec_size; | ||
66 | } watch; | ||
67 | } u; | ||
68 | }; | ||
69 | |||
70 | struct xs_handle { | ||
71 | /* A list of replies. Currently only one will ever be outstanding. */ | ||
72 | struct list_head reply_list; | ||
73 | spinlock_t reply_lock; | ||
74 | wait_queue_head_t reply_waitq; | ||
75 | |||
76 | /* | ||
77 | * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex. | ||
78 | * response_mutex is never taken simultaneously with the other three. | ||
79 | */ | ||
80 | |||
81 | /* One request at a time. */ | ||
82 | struct mutex request_mutex; | ||
83 | |||
84 | /* Protect xenbus reader thread against save/restore. */ | ||
85 | struct mutex response_mutex; | ||
86 | |||
87 | /* Protect transactions against save/restore. */ | ||
88 | struct rw_semaphore transaction_mutex; | ||
89 | |||
90 | /* Protect watch (de)register against save/restore. */ | ||
91 | struct rw_semaphore watch_mutex; | ||
92 | }; | ||
93 | |||
94 | static struct xs_handle xs_state; | ||
95 | |||
96 | /* List of registered watches, and a lock to protect it. */ | ||
97 | static LIST_HEAD(watches); | ||
98 | static DEFINE_SPINLOCK(watches_lock); | ||
99 | |||
100 | /* List of pending watch callback events, and a lock to protect it. */ | ||
101 | static LIST_HEAD(watch_events); | ||
102 | static DEFINE_SPINLOCK(watch_events_lock); | ||
103 | |||
104 | /* | ||
105 | * Details of the xenwatch callback kernel thread. The thread waits on the | ||
106 | * watch_events_waitq for work to do (queued on watch_events list). When it | ||
107 | * wakes up it acquires the xenwatch_mutex before reading the list and | ||
108 | * carrying out work. | ||
109 | */ | ||
110 | static pid_t xenwatch_pid; | ||
111 | static DEFINE_MUTEX(xenwatch_mutex); | ||
112 | static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq); | ||
113 | |||
114 | static int get_error(const char *errorstring) | ||
115 | { | ||
116 | unsigned int i; | ||
117 | |||
118 | for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) { | ||
119 | if (i == ARRAY_SIZE(xsd_errors) - 1) { | ||
120 | printk(KERN_WARNING | ||
121 | "XENBUS xen store gave: unknown error %s", | ||
122 | errorstring); | ||
123 | return EINVAL; | ||
124 | } | ||
125 | } | ||
126 | return xsd_errors[i].errnum; | ||
127 | } | ||
128 | |||
129 | static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len) | ||
130 | { | ||
131 | struct xs_stored_msg *msg; | ||
132 | char *body; | ||
133 | |||
134 | spin_lock(&xs_state.reply_lock); | ||
135 | |||
136 | while (list_empty(&xs_state.reply_list)) { | ||
137 | spin_unlock(&xs_state.reply_lock); | ||
138 | /* XXX FIXME: Avoid synchronous wait for response here. */ | ||
139 | wait_event(xs_state.reply_waitq, | ||
140 | !list_empty(&xs_state.reply_list)); | ||
141 | spin_lock(&xs_state.reply_lock); | ||
142 | } | ||
143 | |||
144 | msg = list_entry(xs_state.reply_list.next, | ||
145 | struct xs_stored_msg, list); | ||
146 | list_del(&msg->list); | ||
147 | |||
148 | spin_unlock(&xs_state.reply_lock); | ||
149 | |||
150 | *type = msg->hdr.type; | ||
151 | if (len) | ||
152 | *len = msg->hdr.len; | ||
153 | body = msg->u.reply.body; | ||
154 | |||
155 | kfree(msg); | ||
156 | |||
157 | return body; | ||
158 | } | ||
159 | |||
160 | void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) | ||
161 | { | ||
162 | void *ret; | ||
163 | struct xsd_sockmsg req_msg = *msg; | ||
164 | int err; | ||
165 | |||
166 | if (req_msg.type == XS_TRANSACTION_START) | ||
167 | down_read(&xs_state.transaction_mutex); | ||
168 | |||
169 | mutex_lock(&xs_state.request_mutex); | ||
170 | |||
171 | err = xb_write(msg, sizeof(*msg) + msg->len); | ||
172 | if (err) { | ||
173 | msg->type = XS_ERROR; | ||
174 | ret = ERR_PTR(err); | ||
175 | } else | ||
176 | ret = read_reply(&msg->type, &msg->len); | ||
177 | |||
178 | mutex_unlock(&xs_state.request_mutex); | ||
179 | |||
180 | if ((msg->type == XS_TRANSACTION_END) || | ||
181 | ((req_msg.type == XS_TRANSACTION_START) && | ||
182 | (msg->type == XS_ERROR))) | ||
183 | up_read(&xs_state.transaction_mutex); | ||
184 | |||
185 | return ret; | ||
186 | } | ||
187 | |||
188 | /* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */ | ||
189 | static void *xs_talkv(struct xenbus_transaction t, | ||
190 | enum xsd_sockmsg_type type, | ||
191 | const struct kvec *iovec, | ||
192 | unsigned int num_vecs, | ||
193 | unsigned int *len) | ||
194 | { | ||
195 | struct xsd_sockmsg msg; | ||
196 | void *ret = NULL; | ||
197 | unsigned int i; | ||
198 | int err; | ||
199 | |||
200 | msg.tx_id = t.id; | ||
201 | msg.req_id = 0; | ||
202 | msg.type = type; | ||
203 | msg.len = 0; | ||
204 | for (i = 0; i < num_vecs; i++) | ||
205 | msg.len += iovec[i].iov_len; | ||
206 | |||
207 | mutex_lock(&xs_state.request_mutex); | ||
208 | |||
209 | err = xb_write(&msg, sizeof(msg)); | ||
210 | if (err) { | ||
211 | mutex_unlock(&xs_state.request_mutex); | ||
212 | return ERR_PTR(err); | ||
213 | } | ||
214 | |||
215 | for (i = 0; i < num_vecs; i++) { | ||
216 | err = xb_write(iovec[i].iov_base, iovec[i].iov_len); | ||
217 | if (err) { | ||
218 | mutex_unlock(&xs_state.request_mutex); | ||
219 | return ERR_PTR(err); | ||
220 | } | ||
221 | } | ||
222 | |||
223 | ret = read_reply(&msg.type, len); | ||
224 | |||
225 | mutex_unlock(&xs_state.request_mutex); | ||
226 | |||
227 | if (IS_ERR(ret)) | ||
228 | return ret; | ||
229 | |||
230 | if (msg.type == XS_ERROR) { | ||
231 | err = get_error(ret); | ||
232 | kfree(ret); | ||
233 | return ERR_PTR(-err); | ||
234 | } | ||
235 | |||
236 | if (msg.type != type) { | ||
237 | if (printk_ratelimit()) | ||
238 | printk(KERN_WARNING | ||
239 | "XENBUS unexpected type [%d], expected [%d]\n", | ||
240 | msg.type, type); | ||
241 | kfree(ret); | ||
242 | return ERR_PTR(-EINVAL); | ||
243 | } | ||
244 | return ret; | ||
245 | } | ||
246 | |||
247 | /* Simplified version of xs_talkv: single message. */ | ||
248 | static void *xs_single(struct xenbus_transaction t, | ||
249 | enum xsd_sockmsg_type type, | ||
250 | const char *string, | ||
251 | unsigned int *len) | ||
252 | { | ||
253 | struct kvec iovec; | ||
254 | |||
255 | iovec.iov_base = (void *)string; | ||
256 | iovec.iov_len = strlen(string) + 1; | ||
257 | return xs_talkv(t, type, &iovec, 1, len); | ||
258 | } | ||
259 | |||
260 | /* Many commands only need an ack, don't care what it says. */ | ||
261 | static int xs_error(char *reply) | ||
262 | { | ||
263 | if (IS_ERR(reply)) | ||
264 | return PTR_ERR(reply); | ||
265 | kfree(reply); | ||
266 | return 0; | ||
267 | } | ||
268 | |||
269 | static unsigned int count_strings(const char *strings, unsigned int len) | ||
270 | { | ||
271 | unsigned int num; | ||
272 | const char *p; | ||
273 | |||
274 | for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) | ||
275 | num++; | ||
276 | |||
277 | return num; | ||
278 | } | ||
279 | |||
280 | /* Return the path to dir with /name appended. Buffer must be kfree()'ed. */ | ||
281 | static char *join(const char *dir, const char *name) | ||
282 | { | ||
283 | char *buffer; | ||
284 | |||
285 | if (strlen(name) == 0) | ||
286 | buffer = kasprintf(GFP_KERNEL, "%s", dir); | ||
287 | else | ||
288 | buffer = kasprintf(GFP_KERNEL, "%s/%s", dir, name); | ||
289 | return (!buffer) ? ERR_PTR(-ENOMEM) : buffer; | ||
290 | } | ||
291 | |||
292 | static char **split(char *strings, unsigned int len, unsigned int *num) | ||
293 | { | ||
294 | char *p, **ret; | ||
295 | |||
296 | /* Count the strings. */ | ||
297 | *num = count_strings(strings, len); | ||
298 | |||
299 | /* Transfer to one big alloc for easy freeing. */ | ||
300 | ret = kmalloc(*num * sizeof(char *) + len, GFP_KERNEL); | ||
301 | if (!ret) { | ||
302 | kfree(strings); | ||
303 | return ERR_PTR(-ENOMEM); | ||
304 | } | ||
305 | memcpy(&ret[*num], strings, len); | ||
306 | kfree(strings); | ||
307 | |||
308 | strings = (char *)&ret[*num]; | ||
309 | for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1) | ||
310 | ret[(*num)++] = p; | ||
311 | |||
312 | return ret; | ||
313 | } | ||
314 | |||
315 | char **xenbus_directory(struct xenbus_transaction t, | ||
316 | const char *dir, const char *node, unsigned int *num) | ||
317 | { | ||
318 | char *strings, *path; | ||
319 | unsigned int len; | ||
320 | |||
321 | path = join(dir, node); | ||
322 | if (IS_ERR(path)) | ||
323 | return (char **)path; | ||
324 | |||
325 | strings = xs_single(t, XS_DIRECTORY, path, &len); | ||
326 | kfree(path); | ||
327 | if (IS_ERR(strings)) | ||
328 | return (char **)strings; | ||
329 | |||
330 | return split(strings, len, num); | ||
331 | } | ||
332 | EXPORT_SYMBOL_GPL(xenbus_directory); | ||
333 | |||
334 | /* Check if a path exists. Return 1 if it does. */ | ||
335 | int xenbus_exists(struct xenbus_transaction t, | ||
336 | const char *dir, const char *node) | ||
337 | { | ||
338 | char **d; | ||
339 | int dir_n; | ||
340 | |||
341 | d = xenbus_directory(t, dir, node, &dir_n); | ||
342 | if (IS_ERR(d)) | ||
343 | return 0; | ||
344 | kfree(d); | ||
345 | return 1; | ||
346 | } | ||
347 | EXPORT_SYMBOL_GPL(xenbus_exists); | ||
348 | |||
349 | /* Get the value of a single file. | ||
350 | * Returns a kmalloced value: call free() on it after use. | ||
351 | * len indicates length in bytes. | ||
352 | */ | ||
353 | void *xenbus_read(struct xenbus_transaction t, | ||
354 | const char *dir, const char *node, unsigned int *len) | ||
355 | { | ||
356 | char *path; | ||
357 | void *ret; | ||
358 | |||
359 | path = join(dir, node); | ||
360 | if (IS_ERR(path)) | ||
361 | return (void *)path; | ||
362 | |||
363 | ret = xs_single(t, XS_READ, path, len); | ||
364 | kfree(path); | ||
365 | return ret; | ||
366 | } | ||
367 | EXPORT_SYMBOL_GPL(xenbus_read); | ||
368 | |||
369 | /* Write the value of a single file. | ||
370 | * Returns -err on failure. | ||
371 | */ | ||
372 | int xenbus_write(struct xenbus_transaction t, | ||
373 | const char *dir, const char *node, const char *string) | ||
374 | { | ||
375 | const char *path; | ||
376 | struct kvec iovec[2]; | ||
377 | int ret; | ||
378 | |||
379 | path = join(dir, node); | ||
380 | if (IS_ERR(path)) | ||
381 | return PTR_ERR(path); | ||
382 | |||
383 | iovec[0].iov_base = (void *)path; | ||
384 | iovec[0].iov_len = strlen(path) + 1; | ||
385 | iovec[1].iov_base = (void *)string; | ||
386 | iovec[1].iov_len = strlen(string); | ||
387 | |||
388 | ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL)); | ||
389 | kfree(path); | ||
390 | return ret; | ||
391 | } | ||
392 | EXPORT_SYMBOL_GPL(xenbus_write); | ||
393 | |||
394 | /* Create a new directory. */ | ||
395 | int xenbus_mkdir(struct xenbus_transaction t, | ||
396 | const char *dir, const char *node) | ||
397 | { | ||
398 | char *path; | ||
399 | int ret; | ||
400 | |||
401 | path = join(dir, node); | ||
402 | if (IS_ERR(path)) | ||
403 | return PTR_ERR(path); | ||
404 | |||
405 | ret = xs_error(xs_single(t, XS_MKDIR, path, NULL)); | ||
406 | kfree(path); | ||
407 | return ret; | ||
408 | } | ||
409 | EXPORT_SYMBOL_GPL(xenbus_mkdir); | ||
410 | |||
411 | /* Destroy a file or directory (directories must be empty). */ | ||
412 | int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node) | ||
413 | { | ||
414 | char *path; | ||
415 | int ret; | ||
416 | |||
417 | path = join(dir, node); | ||
418 | if (IS_ERR(path)) | ||
419 | return PTR_ERR(path); | ||
420 | |||
421 | ret = xs_error(xs_single(t, XS_RM, path, NULL)); | ||
422 | kfree(path); | ||
423 | return ret; | ||
424 | } | ||
425 | EXPORT_SYMBOL_GPL(xenbus_rm); | ||
426 | |||
427 | /* Start a transaction: changes by others will not be seen during this | ||
428 | * transaction, and changes will not be visible to others until end. | ||
429 | */ | ||
430 | int xenbus_transaction_start(struct xenbus_transaction *t) | ||
431 | { | ||
432 | char *id_str; | ||
433 | |||
434 | down_read(&xs_state.transaction_mutex); | ||
435 | |||
436 | id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL); | ||
437 | if (IS_ERR(id_str)) { | ||
438 | up_read(&xs_state.transaction_mutex); | ||
439 | return PTR_ERR(id_str); | ||
440 | } | ||
441 | |||
442 | t->id = simple_strtoul(id_str, NULL, 0); | ||
443 | kfree(id_str); | ||
444 | return 0; | ||
445 | } | ||
446 | EXPORT_SYMBOL_GPL(xenbus_transaction_start); | ||
447 | |||
448 | /* End a transaction. | ||
449 | * If abandon is true, transaction is discarded instead of committed. | ||
450 | */ | ||
451 | int xenbus_transaction_end(struct xenbus_transaction t, int abort) | ||
452 | { | ||
453 | char abortstr[2]; | ||
454 | int err; | ||
455 | |||
456 | if (abort) | ||
457 | strcpy(abortstr, "F"); | ||
458 | else | ||
459 | strcpy(abortstr, "T"); | ||
460 | |||
461 | err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL)); | ||
462 | |||
463 | up_read(&xs_state.transaction_mutex); | ||
464 | |||
465 | return err; | ||
466 | } | ||
467 | EXPORT_SYMBOL_GPL(xenbus_transaction_end); | ||
468 | |||
469 | /* Single read and scanf: returns -errno or num scanned. */ | ||
470 | int xenbus_scanf(struct xenbus_transaction t, | ||
471 | const char *dir, const char *node, const char *fmt, ...) | ||
472 | { | ||
473 | va_list ap; | ||
474 | int ret; | ||
475 | char *val; | ||
476 | |||
477 | val = xenbus_read(t, dir, node, NULL); | ||
478 | if (IS_ERR(val)) | ||
479 | return PTR_ERR(val); | ||
480 | |||
481 | va_start(ap, fmt); | ||
482 | ret = vsscanf(val, fmt, ap); | ||
483 | va_end(ap); | ||
484 | kfree(val); | ||
485 | /* Distinctive errno. */ | ||
486 | if (ret == 0) | ||
487 | return -ERANGE; | ||
488 | return ret; | ||
489 | } | ||
490 | EXPORT_SYMBOL_GPL(xenbus_scanf); | ||
491 | |||
492 | /* Single printf and write: returns -errno or 0. */ | ||
493 | int xenbus_printf(struct xenbus_transaction t, | ||
494 | const char *dir, const char *node, const char *fmt, ...) | ||
495 | { | ||
496 | va_list ap; | ||
497 | int ret; | ||
498 | #define PRINTF_BUFFER_SIZE 4096 | ||
499 | char *printf_buffer; | ||
500 | |||
501 | printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); | ||
502 | if (printf_buffer == NULL) | ||
503 | return -ENOMEM; | ||
504 | |||
505 | va_start(ap, fmt); | ||
506 | ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap); | ||
507 | va_end(ap); | ||
508 | |||
509 | BUG_ON(ret > PRINTF_BUFFER_SIZE-1); | ||
510 | ret = xenbus_write(t, dir, node, printf_buffer); | ||
511 | |||
512 | kfree(printf_buffer); | ||
513 | |||
514 | return ret; | ||
515 | } | ||
516 | EXPORT_SYMBOL_GPL(xenbus_printf); | ||
517 | |||
518 | /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ | ||
519 | int xenbus_gather(struct xenbus_transaction t, const char *dir, ...) | ||
520 | { | ||
521 | va_list ap; | ||
522 | const char *name; | ||
523 | int ret = 0; | ||
524 | |||
525 | va_start(ap, dir); | ||
526 | while (ret == 0 && (name = va_arg(ap, char *)) != NULL) { | ||
527 | const char *fmt = va_arg(ap, char *); | ||
528 | void *result = va_arg(ap, void *); | ||
529 | char *p; | ||
530 | |||
531 | p = xenbus_read(t, dir, name, NULL); | ||
532 | if (IS_ERR(p)) { | ||
533 | ret = PTR_ERR(p); | ||
534 | break; | ||
535 | } | ||
536 | if (fmt) { | ||
537 | if (sscanf(p, fmt, result) == 0) | ||
538 | ret = -EINVAL; | ||
539 | kfree(p); | ||
540 | } else | ||
541 | *(char **)result = p; | ||
542 | } | ||
543 | va_end(ap); | ||
544 | return ret; | ||
545 | } | ||
546 | EXPORT_SYMBOL_GPL(xenbus_gather); | ||
547 | |||
548 | static int xs_watch(const char *path, const char *token) | ||
549 | { | ||
550 | struct kvec iov[2]; | ||
551 | |||
552 | iov[0].iov_base = (void *)path; | ||
553 | iov[0].iov_len = strlen(path) + 1; | ||
554 | iov[1].iov_base = (void *)token; | ||
555 | iov[1].iov_len = strlen(token) + 1; | ||
556 | |||
557 | return xs_error(xs_talkv(XBT_NIL, XS_WATCH, iov, | ||
558 | ARRAY_SIZE(iov), NULL)); | ||
559 | } | ||
560 | |||
561 | static int xs_unwatch(const char *path, const char *token) | ||
562 | { | ||
563 | struct kvec iov[2]; | ||
564 | |||
565 | iov[0].iov_base = (char *)path; | ||
566 | iov[0].iov_len = strlen(path) + 1; | ||
567 | iov[1].iov_base = (char *)token; | ||
568 | iov[1].iov_len = strlen(token) + 1; | ||
569 | |||
570 | return xs_error(xs_talkv(XBT_NIL, XS_UNWATCH, iov, | ||
571 | ARRAY_SIZE(iov), NULL)); | ||
572 | } | ||
573 | |||
574 | static struct xenbus_watch *find_watch(const char *token) | ||
575 | { | ||
576 | struct xenbus_watch *i, *cmp; | ||
577 | |||
578 | cmp = (void *)simple_strtoul(token, NULL, 16); | ||
579 | |||
580 | list_for_each_entry(i, &watches, list) | ||
581 | if (i == cmp) | ||
582 | return i; | ||
583 | |||
584 | return NULL; | ||
585 | } | ||
586 | |||
587 | /* Register callback to watch this node. */ | ||
588 | int register_xenbus_watch(struct xenbus_watch *watch) | ||
589 | { | ||
590 | /* Pointer in ascii is the token. */ | ||
591 | char token[sizeof(watch) * 2 + 1]; | ||
592 | int err; | ||
593 | |||
594 | sprintf(token, "%lX", (long)watch); | ||
595 | |||
596 | down_read(&xs_state.watch_mutex); | ||
597 | |||
598 | spin_lock(&watches_lock); | ||
599 | BUG_ON(find_watch(token)); | ||
600 | list_add(&watch->list, &watches); | ||
601 | spin_unlock(&watches_lock); | ||
602 | |||
603 | err = xs_watch(watch->node, token); | ||
604 | |||
605 | /* Ignore errors due to multiple registration. */ | ||
606 | if ((err != 0) && (err != -EEXIST)) { | ||
607 | spin_lock(&watches_lock); | ||
608 | list_del(&watch->list); | ||
609 | spin_unlock(&watches_lock); | ||
610 | } | ||
611 | |||
612 | up_read(&xs_state.watch_mutex); | ||
613 | |||
614 | return err; | ||
615 | } | ||
616 | EXPORT_SYMBOL_GPL(register_xenbus_watch); | ||
617 | |||
618 | void unregister_xenbus_watch(struct xenbus_watch *watch) | ||
619 | { | ||
620 | struct xs_stored_msg *msg, *tmp; | ||
621 | char token[sizeof(watch) * 2 + 1]; | ||
622 | int err; | ||
623 | |||
624 | sprintf(token, "%lX", (long)watch); | ||
625 | |||
626 | down_read(&xs_state.watch_mutex); | ||
627 | |||
628 | spin_lock(&watches_lock); | ||
629 | BUG_ON(!find_watch(token)); | ||
630 | list_del(&watch->list); | ||
631 | spin_unlock(&watches_lock); | ||
632 | |||
633 | err = xs_unwatch(watch->node, token); | ||
634 | if (err) | ||
635 | printk(KERN_WARNING | ||
636 | "XENBUS Failed to release watch %s: %i\n", | ||
637 | watch->node, err); | ||
638 | |||
639 | up_read(&xs_state.watch_mutex); | ||
640 | |||
641 | /* Make sure there are no callbacks running currently (unless | ||
642 | its us) */ | ||
643 | if (current->pid != xenwatch_pid) | ||
644 | mutex_lock(&xenwatch_mutex); | ||
645 | |||
646 | /* Cancel pending watch events. */ | ||
647 | spin_lock(&watch_events_lock); | ||
648 | list_for_each_entry_safe(msg, tmp, &watch_events, list) { | ||
649 | if (msg->u.watch.handle != watch) | ||
650 | continue; | ||
651 | list_del(&msg->list); | ||
652 | kfree(msg->u.watch.vec); | ||
653 | kfree(msg); | ||
654 | } | ||
655 | spin_unlock(&watch_events_lock); | ||
656 | |||
657 | if (current->pid != xenwatch_pid) | ||
658 | mutex_unlock(&xenwatch_mutex); | ||
659 | } | ||
660 | EXPORT_SYMBOL_GPL(unregister_xenbus_watch); | ||
661 | |||
662 | void xs_suspend(void) | ||
663 | { | ||
664 | down_write(&xs_state.transaction_mutex); | ||
665 | down_write(&xs_state.watch_mutex); | ||
666 | mutex_lock(&xs_state.request_mutex); | ||
667 | mutex_lock(&xs_state.response_mutex); | ||
668 | } | ||
669 | |||
670 | void xs_resume(void) | ||
671 | { | ||
672 | struct xenbus_watch *watch; | ||
673 | char token[sizeof(watch) * 2 + 1]; | ||
674 | |||
675 | mutex_unlock(&xs_state.response_mutex); | ||
676 | mutex_unlock(&xs_state.request_mutex); | ||
677 | up_write(&xs_state.transaction_mutex); | ||
678 | |||
679 | /* No need for watches_lock: the watch_mutex is sufficient. */ | ||
680 | list_for_each_entry(watch, &watches, list) { | ||
681 | sprintf(token, "%lX", (long)watch); | ||
682 | xs_watch(watch->node, token); | ||
683 | } | ||
684 | |||
685 | up_write(&xs_state.watch_mutex); | ||
686 | } | ||
687 | |||
688 | void xs_suspend_cancel(void) | ||
689 | { | ||
690 | mutex_unlock(&xs_state.response_mutex); | ||
691 | mutex_unlock(&xs_state.request_mutex); | ||
692 | up_write(&xs_state.watch_mutex); | ||
693 | up_write(&xs_state.transaction_mutex); | ||
694 | } | ||
695 | |||
696 | static int xenwatch_thread(void *unused) | ||
697 | { | ||
698 | struct list_head *ent; | ||
699 | struct xs_stored_msg *msg; | ||
700 | |||
701 | for (;;) { | ||
702 | wait_event_interruptible(watch_events_waitq, | ||
703 | !list_empty(&watch_events)); | ||
704 | |||
705 | if (kthread_should_stop()) | ||
706 | break; | ||
707 | |||
708 | mutex_lock(&xenwatch_mutex); | ||
709 | |||
710 | spin_lock(&watch_events_lock); | ||
711 | ent = watch_events.next; | ||
712 | if (ent != &watch_events) | ||
713 | list_del(ent); | ||
714 | spin_unlock(&watch_events_lock); | ||
715 | |||
716 | if (ent != &watch_events) { | ||
717 | msg = list_entry(ent, struct xs_stored_msg, list); | ||
718 | msg->u.watch.handle->callback( | ||
719 | msg->u.watch.handle, | ||
720 | (const char **)msg->u.watch.vec, | ||
721 | msg->u.watch.vec_size); | ||
722 | kfree(msg->u.watch.vec); | ||
723 | kfree(msg); | ||
724 | } | ||
725 | |||
726 | mutex_unlock(&xenwatch_mutex); | ||
727 | } | ||
728 | |||
729 | return 0; | ||
730 | } | ||
731 | |||
732 | static int process_msg(void) | ||
733 | { | ||
734 | struct xs_stored_msg *msg; | ||
735 | char *body; | ||
736 | int err; | ||
737 | |||
738 | /* | ||
739 | * We must disallow save/restore while reading a xenstore message. | ||
740 | * A partial read across s/r leaves us out of sync with xenstored. | ||
741 | */ | ||
742 | for (;;) { | ||
743 | err = xb_wait_for_data_to_read(); | ||
744 | if (err) | ||
745 | return err; | ||
746 | mutex_lock(&xs_state.response_mutex); | ||
747 | if (xb_data_to_read()) | ||
748 | break; | ||
749 | /* We raced with save/restore: pending data 'disappeared'. */ | ||
750 | mutex_unlock(&xs_state.response_mutex); | ||
751 | } | ||
752 | |||
753 | |||
754 | msg = kmalloc(sizeof(*msg), GFP_KERNEL); | ||
755 | if (msg == NULL) { | ||
756 | err = -ENOMEM; | ||
757 | goto out; | ||
758 | } | ||
759 | |||
760 | err = xb_read(&msg->hdr, sizeof(msg->hdr)); | ||
761 | if (err) { | ||
762 | kfree(msg); | ||
763 | goto out; | ||
764 | } | ||
765 | |||
766 | body = kmalloc(msg->hdr.len + 1, GFP_KERNEL); | ||
767 | if (body == NULL) { | ||
768 | kfree(msg); | ||
769 | err = -ENOMEM; | ||
770 | goto out; | ||
771 | } | ||
772 | |||
773 | err = xb_read(body, msg->hdr.len); | ||
774 | if (err) { | ||
775 | kfree(body); | ||
776 | kfree(msg); | ||
777 | goto out; | ||
778 | } | ||
779 | body[msg->hdr.len] = '\0'; | ||
780 | |||
781 | if (msg->hdr.type == XS_WATCH_EVENT) { | ||
782 | msg->u.watch.vec = split(body, msg->hdr.len, | ||
783 | &msg->u.watch.vec_size); | ||
784 | if (IS_ERR(msg->u.watch.vec)) { | ||
785 | kfree(msg); | ||
786 | err = PTR_ERR(msg->u.watch.vec); | ||
787 | goto out; | ||
788 | } | ||
789 | |||
790 | spin_lock(&watches_lock); | ||
791 | msg->u.watch.handle = find_watch( | ||
792 | msg->u.watch.vec[XS_WATCH_TOKEN]); | ||
793 | if (msg->u.watch.handle != NULL) { | ||
794 | spin_lock(&watch_events_lock); | ||
795 | list_add_tail(&msg->list, &watch_events); | ||
796 | wake_up(&watch_events_waitq); | ||
797 | spin_unlock(&watch_events_lock); | ||
798 | } else { | ||
799 | kfree(msg->u.watch.vec); | ||
800 | kfree(msg); | ||
801 | } | ||
802 | spin_unlock(&watches_lock); | ||
803 | } else { | ||
804 | msg->u.reply.body = body; | ||
805 | spin_lock(&xs_state.reply_lock); | ||
806 | list_add_tail(&msg->list, &xs_state.reply_list); | ||
807 | spin_unlock(&xs_state.reply_lock); | ||
808 | wake_up(&xs_state.reply_waitq); | ||
809 | } | ||
810 | |||
811 | out: | ||
812 | mutex_unlock(&xs_state.response_mutex); | ||
813 | return err; | ||
814 | } | ||
815 | |||
816 | static int xenbus_thread(void *unused) | ||
817 | { | ||
818 | int err; | ||
819 | |||
820 | for (;;) { | ||
821 | err = process_msg(); | ||
822 | if (err) | ||
823 | printk(KERN_WARNING "XENBUS error %d while reading " | ||
824 | "message\n", err); | ||
825 | if (kthread_should_stop()) | ||
826 | break; | ||
827 | } | ||
828 | |||
829 | return 0; | ||
830 | } | ||
831 | |||
832 | int xs_init(void) | ||
833 | { | ||
834 | int err; | ||
835 | struct task_struct *task; | ||
836 | |||
837 | INIT_LIST_HEAD(&xs_state.reply_list); | ||
838 | spin_lock_init(&xs_state.reply_lock); | ||
839 | init_waitqueue_head(&xs_state.reply_waitq); | ||
840 | |||
841 | mutex_init(&xs_state.request_mutex); | ||
842 | mutex_init(&xs_state.response_mutex); | ||
843 | init_rwsem(&xs_state.transaction_mutex); | ||
844 | init_rwsem(&xs_state.watch_mutex); | ||
845 | |||
846 | /* Initialize the shared memory rings to talk to xenstored */ | ||
847 | err = xb_init_comms(); | ||
848 | if (err) | ||
849 | return err; | ||
850 | |||
851 | task = kthread_run(xenwatch_thread, NULL, "xenwatch"); | ||
852 | if (IS_ERR(task)) | ||
853 | return PTR_ERR(task); | ||
854 | xenwatch_pid = task->pid; | ||
855 | |||
856 | task = kthread_run(xenbus_thread, NULL, "xenbus"); | ||
857 | if (IS_ERR(task)) | ||
858 | return PTR_ERR(task); | ||
859 | |||
860 | return 0; | ||
861 | } | ||