aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Makefile2
-rw-r--r--drivers/acpi/thermal.c24
-rw-r--r--drivers/block/Kconfig9
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/block/xen-blkfront.c988
-rw-r--r--drivers/char/Kconfig8
-rw-r--r--drivers/char/Makefile1
-rw-r--r--drivers/char/hvc_xen.c159
-rw-r--r--drivers/macintosh/therm_pm72.c3
-rw-r--r--drivers/macintosh/windfarm_core.c3
-rw-r--r--drivers/net/Kconfig12
-rw-r--r--drivers/net/Makefile2
-rw-r--r--drivers/net/hamradio/baycom_epp.c2
-rw-r--r--drivers/net/xen-netfront.c1863
-rw-r--r--drivers/pnp/pnpbios/core.c2
-rw-r--r--drivers/sbus/char/bbc_envctrl.c5
-rw-r--r--drivers/sbus/char/envctrl.c7
-rw-r--r--drivers/xen/Makefile2
-rw-r--r--drivers/xen/grant-table.c582
-rw-r--r--drivers/xen/xenbus/Makefile7
-rw-r--r--drivers/xen/xenbus/xenbus_client.c569
-rw-r--r--drivers/xen/xenbus/xenbus_comms.c233
-rw-r--r--drivers/xen/xenbus/xenbus_comms.h46
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c935
-rw-r--r--drivers/xen/xenbus/xenbus_probe.h74
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c861
26 files changed, 6366 insertions, 34 deletions
diff --git a/drivers/Makefile b/drivers/Makefile
index 503d82569449..6d9d7fab77f5 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -15,6 +15,8 @@ obj-$(CONFIG_ACPI) += acpi/
15obj-$(CONFIG_PNP) += pnp/ 15obj-$(CONFIG_PNP) += pnp/
16obj-$(CONFIG_ARM_AMBA) += amba/ 16obj-$(CONFIG_ARM_AMBA) += amba/
17 17
18obj-$(CONFIG_XEN) += xen/
19
18# char/ comes before serial/ etc so that the VT console is the boot-time 20# char/ comes before serial/ etc so that the VT console is the boot-time
19# default. 21# default.
20obj-y += char/ 22obj-y += char/
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 88a6fc7fd271..58f1338981bc 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -40,6 +40,7 @@
40#include <linux/jiffies.h> 40#include <linux/jiffies.h>
41#include <linux/kmod.h> 41#include <linux/kmod.h>
42#include <linux/seq_file.h> 42#include <linux/seq_file.h>
43#include <linux/reboot.h>
43#include <asm/uaccess.h> 44#include <asm/uaccess.h>
44 45
45#include <acpi/acpi_bus.h> 46#include <acpi/acpi_bus.h>
@@ -59,7 +60,6 @@
59#define ACPI_THERMAL_NOTIFY_CRITICAL 0xF0 60#define ACPI_THERMAL_NOTIFY_CRITICAL 0xF0
60#define ACPI_THERMAL_NOTIFY_HOT 0xF1 61#define ACPI_THERMAL_NOTIFY_HOT 0xF1
61#define ACPI_THERMAL_MODE_ACTIVE 0x00 62#define ACPI_THERMAL_MODE_ACTIVE 0x00
62#define ACPI_THERMAL_PATH_POWEROFF "/sbin/poweroff"
63 63
64#define ACPI_THERMAL_MAX_ACTIVE 10 64#define ACPI_THERMAL_MAX_ACTIVE 10
65#define ACPI_THERMAL_MAX_LIMIT_STR_LEN 65 65#define ACPI_THERMAL_MAX_LIMIT_STR_LEN 65
@@ -419,26 +419,6 @@ static int acpi_thermal_get_devices(struct acpi_thermal *tz)
419 return 0; 419 return 0;
420} 420}
421 421
422static int acpi_thermal_call_usermode(char *path)
423{
424 char *argv[2] = { NULL, NULL };
425 char *envp[3] = { NULL, NULL, NULL };
426
427
428 if (!path)
429 return -EINVAL;
430
431 argv[0] = path;
432
433 /* minimal command environment */
434 envp[0] = "HOME=/";
435 envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
436
437 call_usermodehelper(argv[0], argv, envp, 0);
438
439 return 0;
440}
441
442static int acpi_thermal_critical(struct acpi_thermal *tz) 422static int acpi_thermal_critical(struct acpi_thermal *tz)
443{ 423{
444 if (!tz || !tz->trips.critical.flags.valid) 424 if (!tz || !tz->trips.critical.flags.valid)
@@ -456,7 +436,7 @@ static int acpi_thermal_critical(struct acpi_thermal *tz)
456 acpi_bus_generate_event(tz->device, ACPI_THERMAL_NOTIFY_CRITICAL, 436 acpi_bus_generate_event(tz->device, ACPI_THERMAL_NOTIFY_CRITICAL,
457 tz->trips.critical.flags.enabled); 437 tz->trips.critical.flags.enabled);
458 438
459 acpi_thermal_call_usermode(ACPI_THERMAL_PATH_POWEROFF); 439 orderly_poweroff(true);
460 440
461 return 0; 441 return 0;
462} 442}
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 8f65b88cf711..a4a311992408 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -427,4 +427,13 @@ config XILINX_SYSACE
427 help 427 help
428 Include support for the Xilinx SystemACE CompactFlash interface 428 Include support for the Xilinx SystemACE CompactFlash interface
429 429
430config XEN_BLKDEV_FRONTEND
431 tristate "Xen virtual block device support"
432 depends on XEN
433 default y
434 help
435 This driver implements the front-end of the Xen virtual
436 block device driver. It communicates with a back-end driver
437 in another domain which drives the actual block device.
438
430endif # BLK_DEV 439endif # BLK_DEV
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 9ee08ab4ffa8..3e31532df0ed 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -29,3 +29,4 @@ obj-$(CONFIG_VIODASD) += viodasd.o
29obj-$(CONFIG_BLK_DEV_SX8) += sx8.o 29obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
30obj-$(CONFIG_BLK_DEV_UB) += ub.o 30obj-$(CONFIG_BLK_DEV_UB) += ub.o
31 31
32obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
new file mode 100644
index 000000000000..6746c29181f8
--- /dev/null
+++ b/drivers/block/xen-blkfront.c
@@ -0,0 +1,988 @@
1/*
2 * blkfront.c
3 *
4 * XenLinux virtual block device driver.
5 *
6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
8 * Copyright (c) 2004, Christian Limpach
9 * Copyright (c) 2004, Andrew Warfield
10 * Copyright (c) 2005, Christopher Clark
11 * Copyright (c) 2005, XenSource Ltd
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version 2
15 * as published by the Free Software Foundation; or, when distributed
16 * separately from the Linux kernel or incorporated into other
17 * software packages, subject to the following license:
18 *
19 * Permission is hereby granted, free of charge, to any person obtaining a copy
20 * of this source file (the "Software"), to deal in the Software without
21 * restriction, including without limitation the rights to use, copy, modify,
22 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
23 * and to permit persons to whom the Software is furnished to do so, subject to
24 * the following conditions:
25 *
26 * The above copyright notice and this permission notice shall be included in
27 * all copies or substantial portions of the Software.
28 *
29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
35 * IN THE SOFTWARE.
36 */
37
38#include <linux/interrupt.h>
39#include <linux/blkdev.h>
40#include <linux/module.h>
41
42#include <xen/xenbus.h>
43#include <xen/grant_table.h>
44#include <xen/events.h>
45#include <xen/page.h>
46
47#include <xen/interface/grant_table.h>
48#include <xen/interface/io/blkif.h>
49
50#include <asm/xen/hypervisor.h>
51
52enum blkif_state {
53 BLKIF_STATE_DISCONNECTED,
54 BLKIF_STATE_CONNECTED,
55 BLKIF_STATE_SUSPENDED,
56};
57
58struct blk_shadow {
59 struct blkif_request req;
60 unsigned long request;
61 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
62};
63
64static struct block_device_operations xlvbd_block_fops;
65
66#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
67
68/*
69 * We have one of these per vbd, whether ide, scsi or 'other'. They
70 * hang in private_data off the gendisk structure. We may end up
71 * putting all kinds of interesting stuff here :-)
72 */
73struct blkfront_info
74{
75 struct xenbus_device *xbdev;
76 dev_t dev;
77 struct gendisk *gd;
78 int vdevice;
79 blkif_vdev_t handle;
80 enum blkif_state connected;
81 int ring_ref;
82 struct blkif_front_ring ring;
83 unsigned int evtchn, irq;
84 struct request_queue *rq;
85 struct work_struct work;
86 struct gnttab_free_callback callback;
87 struct blk_shadow shadow[BLK_RING_SIZE];
88 unsigned long shadow_free;
89 int feature_barrier;
90
91 /**
92 * The number of people holding this device open. We won't allow a
93 * hot-unplug unless this is 0.
94 */
95 int users;
96};
97
98static DEFINE_SPINLOCK(blkif_io_lock);
99
100#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
101 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
102#define GRANT_INVALID_REF 0
103
104#define PARTS_PER_DISK 16
105
106#define BLKIF_MAJOR(dev) ((dev)>>8)
107#define BLKIF_MINOR(dev) ((dev) & 0xff)
108
109#define DEV_NAME "xvd" /* name in /dev */
110
111/* Information about our VBDs. */
112#define MAX_VBDS 64
113static LIST_HEAD(vbds_list);
114
115static int get_id_from_freelist(struct blkfront_info *info)
116{
117 unsigned long free = info->shadow_free;
118 BUG_ON(free > BLK_RING_SIZE);
119 info->shadow_free = info->shadow[free].req.id;
120 info->shadow[free].req.id = 0x0fffffee; /* debug */
121 return free;
122}
123
124static void add_id_to_freelist(struct blkfront_info *info,
125 unsigned long id)
126{
127 info->shadow[id].req.id = info->shadow_free;
128 info->shadow[id].request = 0;
129 info->shadow_free = id;
130}
131
132static void blkif_restart_queue_callback(void *arg)
133{
134 struct blkfront_info *info = (struct blkfront_info *)arg;
135 schedule_work(&info->work);
136}
137
138/*
139 * blkif_queue_request
140 *
141 * request block io
142 *
143 * id: for guest use only.
144 * operation: BLKIF_OP_{READ,WRITE,PROBE}
145 * buffer: buffer to read/write into. this should be a
146 * virtual address in the guest os.
147 */
148static int blkif_queue_request(struct request *req)
149{
150 struct blkfront_info *info = req->rq_disk->private_data;
151 unsigned long buffer_mfn;
152 struct blkif_request *ring_req;
153 struct bio *bio;
154 struct bio_vec *bvec;
155 int idx;
156 unsigned long id;
157 unsigned int fsect, lsect;
158 int ref;
159 grant_ref_t gref_head;
160
161 if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
162 return 1;
163
164 if (gnttab_alloc_grant_references(
165 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
166 gnttab_request_free_callback(
167 &info->callback,
168 blkif_restart_queue_callback,
169 info,
170 BLKIF_MAX_SEGMENTS_PER_REQUEST);
171 return 1;
172 }
173
174 /* Fill out a communications ring structure. */
175 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
176 id = get_id_from_freelist(info);
177 info->shadow[id].request = (unsigned long)req;
178
179 ring_req->id = id;
180 ring_req->sector_number = (blkif_sector_t)req->sector;
181 ring_req->handle = info->handle;
182
183 ring_req->operation = rq_data_dir(req) ?
184 BLKIF_OP_WRITE : BLKIF_OP_READ;
185 if (blk_barrier_rq(req))
186 ring_req->operation = BLKIF_OP_WRITE_BARRIER;
187
188 ring_req->nr_segments = 0;
189 rq_for_each_bio (bio, req) {
190 bio_for_each_segment (bvec, bio, idx) {
191 BUG_ON(ring_req->nr_segments
192 == BLKIF_MAX_SEGMENTS_PER_REQUEST);
193 buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page));
194 fsect = bvec->bv_offset >> 9;
195 lsect = fsect + (bvec->bv_len >> 9) - 1;
196 /* install a grant reference. */
197 ref = gnttab_claim_grant_reference(&gref_head);
198 BUG_ON(ref == -ENOSPC);
199
200 gnttab_grant_foreign_access_ref(
201 ref,
202 info->xbdev->otherend_id,
203 buffer_mfn,
204 rq_data_dir(req) );
205
206 info->shadow[id].frame[ring_req->nr_segments] =
207 mfn_to_pfn(buffer_mfn);
208
209 ring_req->seg[ring_req->nr_segments] =
210 (struct blkif_request_segment) {
211 .gref = ref,
212 .first_sect = fsect,
213 .last_sect = lsect };
214
215 ring_req->nr_segments++;
216 }
217 }
218
219 info->ring.req_prod_pvt++;
220
221 /* Keep a private copy so we can reissue requests when recovering. */
222 info->shadow[id].req = *ring_req;
223
224 gnttab_free_grant_references(gref_head);
225
226 return 0;
227}
228
229
230static inline void flush_requests(struct blkfront_info *info)
231{
232 int notify;
233
234 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
235
236 if (notify)
237 notify_remote_via_irq(info->irq);
238}
239
240/*
241 * do_blkif_request
242 * read a block; request is in a request queue
243 */
244static void do_blkif_request(request_queue_t *rq)
245{
246 struct blkfront_info *info = NULL;
247 struct request *req;
248 int queued;
249
250 pr_debug("Entered do_blkif_request\n");
251
252 queued = 0;
253
254 while ((req = elv_next_request(rq)) != NULL) {
255 info = req->rq_disk->private_data;
256 if (!blk_fs_request(req)) {
257 end_request(req, 0);
258 continue;
259 }
260
261 if (RING_FULL(&info->ring))
262 goto wait;
263
264 pr_debug("do_blk_req %p: cmd %p, sec %lx, "
265 "(%u/%li) buffer:%p [%s]\n",
266 req, req->cmd, (unsigned long)req->sector,
267 req->current_nr_sectors,
268 req->nr_sectors, req->buffer,
269 rq_data_dir(req) ? "write" : "read");
270
271
272 blkdev_dequeue_request(req);
273 if (blkif_queue_request(req)) {
274 blk_requeue_request(rq, req);
275wait:
276 /* Avoid pointless unplugs. */
277 blk_stop_queue(rq);
278 break;
279 }
280
281 queued++;
282 }
283
284 if (queued != 0)
285 flush_requests(info);
286}
287
288static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
289{
290 request_queue_t *rq;
291
292 rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
293 if (rq == NULL)
294 return -1;
295
296 elevator_init(rq, "noop");
297
298 /* Hard sector size and max sectors impersonate the equiv. hardware. */
299 blk_queue_hardsect_size(rq, sector_size);
300 blk_queue_max_sectors(rq, 512);
301
302 /* Each segment in a request is up to an aligned page in size. */
303 blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
304 blk_queue_max_segment_size(rq, PAGE_SIZE);
305
306 /* Ensure a merged request will fit in a single I/O ring slot. */
307 blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
308 blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
309
310 /* Make sure buffer addresses are sector-aligned. */
311 blk_queue_dma_alignment(rq, 511);
312
313 gd->queue = rq;
314
315 return 0;
316}
317
318
319static int xlvbd_barrier(struct blkfront_info *info)
320{
321 int err;
322
323 err = blk_queue_ordered(info->rq,
324 info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE,
325 NULL);
326
327 if (err)
328 return err;
329
330 printk(KERN_INFO "blkfront: %s: barriers %s\n",
331 info->gd->disk_name,
332 info->feature_barrier ? "enabled" : "disabled");
333 return 0;
334}
335
336
337static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity,
338 int vdevice, u16 vdisk_info, u16 sector_size,
339 struct blkfront_info *info)
340{
341 struct gendisk *gd;
342 int nr_minors = 1;
343 int err = -ENODEV;
344
345 BUG_ON(info->gd != NULL);
346 BUG_ON(info->rq != NULL);
347
348 if ((minor % PARTS_PER_DISK) == 0)
349 nr_minors = PARTS_PER_DISK;
350
351 gd = alloc_disk(nr_minors);
352 if (gd == NULL)
353 goto out;
354
355 if (nr_minors > 1)
356 sprintf(gd->disk_name, "%s%c", DEV_NAME,
357 'a' + minor / PARTS_PER_DISK);
358 else
359 sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
360 'a' + minor / PARTS_PER_DISK,
361 minor % PARTS_PER_DISK);
362
363 gd->major = XENVBD_MAJOR;
364 gd->first_minor = minor;
365 gd->fops = &xlvbd_block_fops;
366 gd->private_data = info;
367 gd->driverfs_dev = &(info->xbdev->dev);
368 set_capacity(gd, capacity);
369
370 if (xlvbd_init_blk_queue(gd, sector_size)) {
371 del_gendisk(gd);
372 goto out;
373 }
374
375 info->rq = gd->queue;
376 info->gd = gd;
377
378 if (info->feature_barrier)
379 xlvbd_barrier(info);
380
381 if (vdisk_info & VDISK_READONLY)
382 set_disk_ro(gd, 1);
383
384 if (vdisk_info & VDISK_REMOVABLE)
385 gd->flags |= GENHD_FL_REMOVABLE;
386
387 if (vdisk_info & VDISK_CDROM)
388 gd->flags |= GENHD_FL_CD;
389
390 return 0;
391
392 out:
393 return err;
394}
395
396static void kick_pending_request_queues(struct blkfront_info *info)
397{
398 if (!RING_FULL(&info->ring)) {
399 /* Re-enable calldowns. */
400 blk_start_queue(info->rq);
401 /* Kick things off immediately. */
402 do_blkif_request(info->rq);
403 }
404}
405
406static void blkif_restart_queue(struct work_struct *work)
407{
408 struct blkfront_info *info = container_of(work, struct blkfront_info, work);
409
410 spin_lock_irq(&blkif_io_lock);
411 if (info->connected == BLKIF_STATE_CONNECTED)
412 kick_pending_request_queues(info);
413 spin_unlock_irq(&blkif_io_lock);
414}
415
416static void blkif_free(struct blkfront_info *info, int suspend)
417{
418 /* Prevent new requests being issued until we fix things up. */
419 spin_lock_irq(&blkif_io_lock);
420 info->connected = suspend ?
421 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
422 /* No more blkif_request(). */
423 if (info->rq)
424 blk_stop_queue(info->rq);
425 /* No more gnttab callback work. */
426 gnttab_cancel_free_callback(&info->callback);
427 spin_unlock_irq(&blkif_io_lock);
428
429 /* Flush gnttab callback work. Must be done with no locks held. */
430 flush_scheduled_work();
431
432 /* Free resources associated with old device channel. */
433 if (info->ring_ref != GRANT_INVALID_REF) {
434 gnttab_end_foreign_access(info->ring_ref, 0,
435 (unsigned long)info->ring.sring);
436 info->ring_ref = GRANT_INVALID_REF;
437 info->ring.sring = NULL;
438 }
439 if (info->irq)
440 unbind_from_irqhandler(info->irq, info);
441 info->evtchn = info->irq = 0;
442
443}
444
445static void blkif_completion(struct blk_shadow *s)
446{
447 int i;
448 for (i = 0; i < s->req.nr_segments; i++)
449 gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
450}
451
452static irqreturn_t blkif_interrupt(int irq, void *dev_id)
453{
454 struct request *req;
455 struct blkif_response *bret;
456 RING_IDX i, rp;
457 unsigned long flags;
458 struct blkfront_info *info = (struct blkfront_info *)dev_id;
459 int uptodate;
460
461 spin_lock_irqsave(&blkif_io_lock, flags);
462
463 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
464 spin_unlock_irqrestore(&blkif_io_lock, flags);
465 return IRQ_HANDLED;
466 }
467
468 again:
469 rp = info->ring.sring->rsp_prod;
470 rmb(); /* Ensure we see queued responses up to 'rp'. */
471
472 for (i = info->ring.rsp_cons; i != rp; i++) {
473 unsigned long id;
474 int ret;
475
476 bret = RING_GET_RESPONSE(&info->ring, i);
477 id = bret->id;
478 req = (struct request *)info->shadow[id].request;
479
480 blkif_completion(&info->shadow[id]);
481
482 add_id_to_freelist(info, id);
483
484 uptodate = (bret->status == BLKIF_RSP_OKAY);
485 switch (bret->operation) {
486 case BLKIF_OP_WRITE_BARRIER:
487 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
488 printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
489 info->gd->disk_name);
490 uptodate = -EOPNOTSUPP;
491 info->feature_barrier = 0;
492 xlvbd_barrier(info);
493 }
494 /* fall through */
495 case BLKIF_OP_READ:
496 case BLKIF_OP_WRITE:
497 if (unlikely(bret->status != BLKIF_RSP_OKAY))
498 dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
499 "request: %x\n", bret->status);
500
501 ret = end_that_request_first(req, uptodate,
502 req->hard_nr_sectors);
503 BUG_ON(ret);
504 end_that_request_last(req, uptodate);
505 break;
506 default:
507 BUG();
508 }
509 }
510
511 info->ring.rsp_cons = i;
512
513 if (i != info->ring.req_prod_pvt) {
514 int more_to_do;
515 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
516 if (more_to_do)
517 goto again;
518 } else
519 info->ring.sring->rsp_event = i + 1;
520
521 kick_pending_request_queues(info);
522
523 spin_unlock_irqrestore(&blkif_io_lock, flags);
524
525 return IRQ_HANDLED;
526}
527
528
529static int setup_blkring(struct xenbus_device *dev,
530 struct blkfront_info *info)
531{
532 struct blkif_sring *sring;
533 int err;
534
535 info->ring_ref = GRANT_INVALID_REF;
536
537 sring = (struct blkif_sring *)__get_free_page(GFP_KERNEL);
538 if (!sring) {
539 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
540 return -ENOMEM;
541 }
542 SHARED_RING_INIT(sring);
543 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
544
545 err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
546 if (err < 0) {
547 free_page((unsigned long)sring);
548 info->ring.sring = NULL;
549 goto fail;
550 }
551 info->ring_ref = err;
552
553 err = xenbus_alloc_evtchn(dev, &info->evtchn);
554 if (err)
555 goto fail;
556
557 err = bind_evtchn_to_irqhandler(info->evtchn,
558 blkif_interrupt,
559 IRQF_SAMPLE_RANDOM, "blkif", info);
560 if (err <= 0) {
561 xenbus_dev_fatal(dev, err,
562 "bind_evtchn_to_irqhandler failed");
563 goto fail;
564 }
565 info->irq = err;
566
567 return 0;
568fail:
569 blkif_free(info, 0);
570 return err;
571}
572
573
574/* Common code used when first setting up, and when resuming. */
575static int talk_to_backend(struct xenbus_device *dev,
576 struct blkfront_info *info)
577{
578 const char *message = NULL;
579 struct xenbus_transaction xbt;
580 int err;
581
582 /* Create shared ring, alloc event channel. */
583 err = setup_blkring(dev, info);
584 if (err)
585 goto out;
586
587again:
588 err = xenbus_transaction_start(&xbt);
589 if (err) {
590 xenbus_dev_fatal(dev, err, "starting transaction");
591 goto destroy_blkring;
592 }
593
594 err = xenbus_printf(xbt, dev->nodename,
595 "ring-ref", "%u", info->ring_ref);
596 if (err) {
597 message = "writing ring-ref";
598 goto abort_transaction;
599 }
600 err = xenbus_printf(xbt, dev->nodename,
601 "event-channel", "%u", info->evtchn);
602 if (err) {
603 message = "writing event-channel";
604 goto abort_transaction;
605 }
606
607 err = xenbus_transaction_end(xbt, 0);
608 if (err) {
609 if (err == -EAGAIN)
610 goto again;
611 xenbus_dev_fatal(dev, err, "completing transaction");
612 goto destroy_blkring;
613 }
614
615 xenbus_switch_state(dev, XenbusStateInitialised);
616
617 return 0;
618
619 abort_transaction:
620 xenbus_transaction_end(xbt, 1);
621 if (message)
622 xenbus_dev_fatal(dev, err, "%s", message);
623 destroy_blkring:
624 blkif_free(info, 0);
625 out:
626 return err;
627}
628
629
630/**
631 * Entry point to this code when a new device is created. Allocate the basic
632 * structures and the ring buffer for communication with the backend, and
633 * inform the backend of the appropriate details for those. Switch to
634 * Initialised state.
635 */
636static int blkfront_probe(struct xenbus_device *dev,
637 const struct xenbus_device_id *id)
638{
639 int err, vdevice, i;
640 struct blkfront_info *info;
641
642 /* FIXME: Use dynamic device id if this is not set. */
643 err = xenbus_scanf(XBT_NIL, dev->nodename,
644 "virtual-device", "%i", &vdevice);
645 if (err != 1) {
646 xenbus_dev_fatal(dev, err, "reading virtual-device");
647 return err;
648 }
649
650 info = kzalloc(sizeof(*info), GFP_KERNEL);
651 if (!info) {
652 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
653 return -ENOMEM;
654 }
655
656 info->xbdev = dev;
657 info->vdevice = vdevice;
658 info->connected = BLKIF_STATE_DISCONNECTED;
659 INIT_WORK(&info->work, blkif_restart_queue);
660
661 for (i = 0; i < BLK_RING_SIZE; i++)
662 info->shadow[i].req.id = i+1;
663 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
664
665 /* Front end dir is a number, which is used as the id. */
666 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
667 dev->dev.driver_data = info;
668
669 err = talk_to_backend(dev, info);
670 if (err) {
671 kfree(info);
672 dev->dev.driver_data = NULL;
673 return err;
674 }
675
676 return 0;
677}
678
679
680static int blkif_recover(struct blkfront_info *info)
681{
682 int i;
683 struct blkif_request *req;
684 struct blk_shadow *copy;
685 int j;
686
687 /* Stage 1: Make a safe copy of the shadow state. */
688 copy = kmalloc(sizeof(info->shadow), GFP_KERNEL);
689 if (!copy)
690 return -ENOMEM;
691 memcpy(copy, info->shadow, sizeof(info->shadow));
692
693 /* Stage 2: Set up free list. */
694 memset(&info->shadow, 0, sizeof(info->shadow));
695 for (i = 0; i < BLK_RING_SIZE; i++)
696 info->shadow[i].req.id = i+1;
697 info->shadow_free = info->ring.req_prod_pvt;
698 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
699
700 /* Stage 3: Find pending requests and requeue them. */
701 for (i = 0; i < BLK_RING_SIZE; i++) {
702 /* Not in use? */
703 if (copy[i].request == 0)
704 continue;
705
706 /* Grab a request slot and copy shadow state into it. */
707 req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
708 *req = copy[i].req;
709
710 /* We get a new request id, and must reset the shadow state. */
711 req->id = get_id_from_freelist(info);
712 memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
713
714 /* Rewrite any grant references invalidated by susp/resume. */
715 for (j = 0; j < req->nr_segments; j++)
716 gnttab_grant_foreign_access_ref(
717 req->seg[j].gref,
718 info->xbdev->otherend_id,
719 pfn_to_mfn(info->shadow[req->id].frame[j]),
720 rq_data_dir(
721 (struct request *)
722 info->shadow[req->id].request));
723 info->shadow[req->id].req = *req;
724
725 info->ring.req_prod_pvt++;
726 }
727
728 kfree(copy);
729
730 xenbus_switch_state(info->xbdev, XenbusStateConnected);
731
732 spin_lock_irq(&blkif_io_lock);
733
734 /* Now safe for us to use the shared ring */
735 info->connected = BLKIF_STATE_CONNECTED;
736
737 /* Send off requeued requests */
738 flush_requests(info);
739
740 /* Kick any other new requests queued since we resumed */
741 kick_pending_request_queues(info);
742
743 spin_unlock_irq(&blkif_io_lock);
744
745 return 0;
746}
747
748/**
749 * We are reconnecting to the backend, due to a suspend/resume, or a backend
750 * driver restart. We tear down our blkif structure and recreate it, but
751 * leave the device-layer structures intact so that this is transparent to the
752 * rest of the kernel.
753 */
754static int blkfront_resume(struct xenbus_device *dev)
755{
756 struct blkfront_info *info = dev->dev.driver_data;
757 int err;
758
759 dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
760
761 blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
762
763 err = talk_to_backend(dev, info);
764 if (info->connected == BLKIF_STATE_SUSPENDED && !err)
765 err = blkif_recover(info);
766
767 return err;
768}
769
770
771/*
772 * Invoked when the backend is finally 'ready' (and has told produced
773 * the details about the physical device - #sectors, size, etc).
774 */
775static void blkfront_connect(struct blkfront_info *info)
776{
777 unsigned long long sectors;
778 unsigned long sector_size;
779 unsigned int binfo;
780 int err;
781
782 if ((info->connected == BLKIF_STATE_CONNECTED) ||
783 (info->connected == BLKIF_STATE_SUSPENDED) )
784 return;
785
786 dev_dbg(&info->xbdev->dev, "%s:%s.\n",
787 __func__, info->xbdev->otherend);
788
789 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
790 "sectors", "%llu", &sectors,
791 "info", "%u", &binfo,
792 "sector-size", "%lu", &sector_size,
793 NULL);
794 if (err) {
795 xenbus_dev_fatal(info->xbdev, err,
796 "reading backend fields at %s",
797 info->xbdev->otherend);
798 return;
799 }
800
801 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
802 "feature-barrier", "%lu", &info->feature_barrier,
803 NULL);
804 if (err)
805 info->feature_barrier = 0;
806
807 err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice),
808 sectors, info->vdevice,
809 binfo, sector_size, info);
810 if (err) {
811 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
812 info->xbdev->otherend);
813 return;
814 }
815
816 xenbus_switch_state(info->xbdev, XenbusStateConnected);
817
818 /* Kick pending requests. */
819 spin_lock_irq(&blkif_io_lock);
820 info->connected = BLKIF_STATE_CONNECTED;
821 kick_pending_request_queues(info);
822 spin_unlock_irq(&blkif_io_lock);
823
824 add_disk(info->gd);
825}
826
827/**
828 * Handle the change of state of the backend to Closing. We must delete our
829 * device-layer structures now, to ensure that writes are flushed through to
830 * the backend. Once is this done, we can switch to Closed in
831 * acknowledgement.
832 */
833static void blkfront_closing(struct xenbus_device *dev)
834{
835 struct blkfront_info *info = dev->dev.driver_data;
836 unsigned long flags;
837
838 dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename);
839
840 if (info->rq == NULL)
841 goto out;
842
843 spin_lock_irqsave(&blkif_io_lock, flags);
844
845 del_gendisk(info->gd);
846
847 /* No more blkif_request(). */
848 blk_stop_queue(info->rq);
849
850 /* No more gnttab callback work. */
851 gnttab_cancel_free_callback(&info->callback);
852 spin_unlock_irqrestore(&blkif_io_lock, flags);
853
854 /* Flush gnttab callback work. Must be done with no locks held. */
855 flush_scheduled_work();
856
857 blk_cleanup_queue(info->rq);
858 info->rq = NULL;
859
860 out:
861 xenbus_frontend_closed(dev);
862}
863
864/**
865 * Callback received when the backend's state changes.
866 */
867static void backend_changed(struct xenbus_device *dev,
868 enum xenbus_state backend_state)
869{
870 struct blkfront_info *info = dev->dev.driver_data;
871 struct block_device *bd;
872
873 dev_dbg(&dev->dev, "blkfront:backend_changed.\n");
874
875 switch (backend_state) {
876 case XenbusStateInitialising:
877 case XenbusStateInitWait:
878 case XenbusStateInitialised:
879 case XenbusStateUnknown:
880 case XenbusStateClosed:
881 break;
882
883 case XenbusStateConnected:
884 blkfront_connect(info);
885 break;
886
887 case XenbusStateClosing:
888 bd = bdget(info->dev);
889 if (bd == NULL)
890 xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
891
892 mutex_lock(&bd->bd_mutex);
893 if (info->users > 0)
894 xenbus_dev_error(dev, -EBUSY,
895 "Device in use; refusing to close");
896 else
897 blkfront_closing(dev);
898 mutex_unlock(&bd->bd_mutex);
899 bdput(bd);
900 break;
901 }
902}
903
904static int blkfront_remove(struct xenbus_device *dev)
905{
906 struct blkfront_info *info = dev->dev.driver_data;
907
908 dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename);
909
910 blkif_free(info, 0);
911
912 kfree(info);
913
914 return 0;
915}
916
917static int blkif_open(struct inode *inode, struct file *filep)
918{
919 struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
920 info->users++;
921 return 0;
922}
923
924static int blkif_release(struct inode *inode, struct file *filep)
925{
926 struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
927 info->users--;
928 if (info->users == 0) {
929 /* Check whether we have been instructed to close. We will
930 have ignored this request initially, as the device was
931 still mounted. */
932 struct xenbus_device *dev = info->xbdev;
933 enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
934
935 if (state == XenbusStateClosing)
936 blkfront_closing(dev);
937 }
938 return 0;
939}
940
941static struct block_device_operations xlvbd_block_fops =
942{
943 .owner = THIS_MODULE,
944 .open = blkif_open,
945 .release = blkif_release,
946};
947
948
949static struct xenbus_device_id blkfront_ids[] = {
950 { "vbd" },
951 { "" }
952};
953
954static struct xenbus_driver blkfront = {
955 .name = "vbd",
956 .owner = THIS_MODULE,
957 .ids = blkfront_ids,
958 .probe = blkfront_probe,
959 .remove = blkfront_remove,
960 .resume = blkfront_resume,
961 .otherend_changed = backend_changed,
962};
963
964static int __init xlblk_init(void)
965{
966 if (!is_running_on_xen())
967 return -ENODEV;
968
969 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
970 printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
971 XENVBD_MAJOR, DEV_NAME);
972 return -ENODEV;
973 }
974
975 return xenbus_register_frontend(&blkfront);
976}
977module_init(xlblk_init);
978
979
980static void xlblk_exit(void)
981{
982 return xenbus_unregister_driver(&blkfront);
983}
984module_exit(xlblk_exit);
985
986MODULE_DESCRIPTION("Xen virtual block device frontend");
987MODULE_LICENSE("GPL");
988MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 97bd71bc3aea..9e8f21410d2d 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -604,6 +604,14 @@ config HVC_BEAT
604 help 604 help
605 Toshiba's Cell Reference Set Beat Console device driver 605 Toshiba's Cell Reference Set Beat Console device driver
606 606
607config HVC_XEN
608 bool "Xen Hypervisor Console support"
609 depends on XEN
610 select HVC_DRIVER
611 default y
612 help
613 Xen virtual console device driver
614
607config HVCS 615config HVCS
608 tristate "IBM Hypervisor Virtual Console Server support" 616 tristate "IBM Hypervisor Virtual Console Server support"
609 depends on PPC_PSERIES 617 depends on PPC_PSERIES
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index f2996a95eb07..8852b8d643cf 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_HVC_ISERIES) += hvc_iseries.o
48obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o 48obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o
49obj-$(CONFIG_HVC_BEAT) += hvc_beat.o 49obj-$(CONFIG_HVC_BEAT) += hvc_beat.o
50obj-$(CONFIG_HVC_DRIVER) += hvc_console.o 50obj-$(CONFIG_HVC_DRIVER) += hvc_console.o
51obj-$(CONFIG_HVC_XEN) += hvc_xen.o
51obj-$(CONFIG_RAW_DRIVER) += raw.o 52obj-$(CONFIG_RAW_DRIVER) += raw.o
52obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o 53obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o
53obj-$(CONFIG_MSPEC) += mspec.o 54obj-$(CONFIG_MSPEC) += mspec.o
diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c
new file mode 100644
index 000000000000..dd68f8541c2d
--- /dev/null
+++ b/drivers/char/hvc_xen.c
@@ -0,0 +1,159 @@
1/*
2 * xen console driver interface to hvc_console.c
3 *
4 * (c) 2007 Gerd Hoffmann <kraxel@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
21#include <linux/console.h>
22#include <linux/delay.h>
23#include <linux/err.h>
24#include <linux/init.h>
25#include <linux/types.h>
26
27#include <asm/xen/hypervisor.h>
28#include <xen/page.h>
29#include <xen/events.h>
30#include <xen/interface/io/console.h>
31#include <xen/hvc-console.h>
32
33#include "hvc_console.h"
34
35#define HVC_COOKIE 0x58656e /* "Xen" in hex */
36
37static struct hvc_struct *hvc;
38static int xencons_irq;
39
40/* ------------------------------------------------------------------ */
41
42static inline struct xencons_interface *xencons_interface(void)
43{
44 return mfn_to_virt(xen_start_info->console.domU.mfn);
45}
46
47static inline void notify_daemon(void)
48{
49 /* Use evtchn: this is called early, before irq is set up. */
50 notify_remote_via_evtchn(xen_start_info->console.domU.evtchn);
51}
52
53static int write_console(uint32_t vtermno, const char *data, int len)
54{
55 struct xencons_interface *intf = xencons_interface();
56 XENCONS_RING_IDX cons, prod;
57 int sent = 0;
58
59 cons = intf->out_cons;
60 prod = intf->out_prod;
61 mb(); /* update queue values before going on */
62 BUG_ON((prod - cons) > sizeof(intf->out));
63
64 while ((sent < len) && ((prod - cons) < sizeof(intf->out)))
65 intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++];
66
67 wmb(); /* write ring before updating pointer */
68 intf->out_prod = prod;
69
70 notify_daemon();
71 return sent;
72}
73
74static int read_console(uint32_t vtermno, char *buf, int len)
75{
76 struct xencons_interface *intf = xencons_interface();
77 XENCONS_RING_IDX cons, prod;
78 int recv = 0;
79
80 cons = intf->in_cons;
81 prod = intf->in_prod;
82 mb(); /* get pointers before reading ring */
83 BUG_ON((prod - cons) > sizeof(intf->in));
84
85 while (cons != prod && recv < len)
86 buf[recv++] = intf->in[MASK_XENCONS_IDX(cons++, intf->in)];
87
88 mb(); /* read ring before consuming */
89 intf->in_cons = cons;
90
91 notify_daemon();
92 return recv;
93}
94
95static struct hv_ops hvc_ops = {
96 .get_chars = read_console,
97 .put_chars = write_console,
98};
99
100static int __init xen_init(void)
101{
102 struct hvc_struct *hp;
103
104 if (!is_running_on_xen())
105 return 0;
106
107 xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
108 if (xencons_irq < 0)
109 xencons_irq = 0 /* NO_IRQ */;
110 hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256);
111 if (IS_ERR(hp))
112 return PTR_ERR(hp);
113
114 hvc = hp;
115 return 0;
116}
117
118static void __exit xen_fini(void)
119{
120 if (hvc)
121 hvc_remove(hvc);
122}
123
124static int xen_cons_init(void)
125{
126 if (!is_running_on_xen())
127 return 0;
128
129 hvc_instantiate(HVC_COOKIE, 0, &hvc_ops);
130 return 0;
131}
132
133module_init(xen_init);
134module_exit(xen_fini);
135console_initcall(xen_cons_init);
136
137static void xenboot_write_console(struct console *console, const char *string,
138 unsigned len)
139{
140 unsigned int linelen, off = 0;
141 const char *pos;
142
143 while (off < len && NULL != (pos = strchr(string+off, '\n'))) {
144 linelen = pos-string+off;
145 if (off + linelen > len)
146 break;
147 write_console(0, string+off, linelen);
148 write_console(0, "\r\n", 2);
149 off += linelen + 1;
150 }
151 if (off < len)
152 write_console(0, string+off, len-off);
153}
154
155struct console xenboot_console = {
156 .name = "xenboot",
157 .write = xenboot_write_console,
158 .flags = CON_PRINTBUFFER | CON_BOOT,
159};
diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
index dbb22403979f..3d90fc002097 100644
--- a/drivers/macintosh/therm_pm72.c
+++ b/drivers/macintosh/therm_pm72.c
@@ -1770,7 +1770,8 @@ static int call_critical_overtemp(void)
1770 "PATH=/sbin:/usr/sbin:/bin:/usr/bin", 1770 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
1771 NULL }; 1771 NULL };
1772 1772
1773 return call_usermodehelper(critical_overtemp_path, argv, envp, 0); 1773 return call_usermodehelper(critical_overtemp_path,
1774 argv, envp, UMH_WAIT_EXEC);
1774} 1775}
1775 1776
1776 1777
diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c
index e18d265d5d33..516d943227e2 100644
--- a/drivers/macintosh/windfarm_core.c
+++ b/drivers/macintosh/windfarm_core.c
@@ -80,7 +80,8 @@ int wf_critical_overtemp(void)
80 "PATH=/sbin:/usr/sbin:/bin:/usr/bin", 80 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
81 NULL }; 81 NULL };
82 82
83 return call_usermodehelper(critical_overtemp_path, argv, envp, 0); 83 return call_usermodehelper(critical_overtemp_path,
84 argv, envp, UMH_WAIT_EXEC);
84} 85}
85EXPORT_SYMBOL_GPL(wf_critical_overtemp); 86EXPORT_SYMBOL_GPL(wf_critical_overtemp);
86 87
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 43d03178064d..5fb659f8b20e 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2486,6 +2486,18 @@ source "drivers/atm/Kconfig"
2486 2486
2487source "drivers/s390/net/Kconfig" 2487source "drivers/s390/net/Kconfig"
2488 2488
2489config XEN_NETDEV_FRONTEND
2490 tristate "Xen network device frontend driver"
2491 depends on XEN
2492 default y
2493 help
2494 The network device frontend driver allows the kernel to
2495 access network devices exported exported by a virtual
2496 machine containing a physical network device driver. The
2497 frontend driver is intended for unprivileged guest domains;
2498 if you are compiling a kernel for a Xen guest, you almost
2499 certainly want to enable this.
2500
2489config ISERIES_VETH 2501config ISERIES_VETH
2490 tristate "iSeries Virtual Ethernet driver support" 2502 tristate "iSeries Virtual Ethernet driver support"
2491 depends on PPC_ISERIES 2503 depends on PPC_ISERIES
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index eb4167622a6a..0e286ab8855a 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -127,6 +127,8 @@ obj-$(CONFIG_PPPOL2TP) += pppox.o pppol2tp.o
127obj-$(CONFIG_SLIP) += slip.o 127obj-$(CONFIG_SLIP) += slip.o
128obj-$(CONFIG_SLHC) += slhc.o 128obj-$(CONFIG_SLHC) += slhc.o
129 129
130obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
131
130obj-$(CONFIG_DUMMY) += dummy.o 132obj-$(CONFIG_DUMMY) += dummy.o
131obj-$(CONFIG_IFB) += ifb.o 133obj-$(CONFIG_IFB) += ifb.o
132obj-$(CONFIG_MACVLAN) += macvlan.o 134obj-$(CONFIG_MACVLAN) += macvlan.o
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 84aa2117c0ee..355c6cf3d112 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -320,7 +320,7 @@ static int eppconfig(struct baycom_state *bc)
320 sprintf(portarg, "%ld", bc->pdev->port->base); 320 sprintf(portarg, "%ld", bc->pdev->port->base);
321 printk(KERN_DEBUG "%s: %s -s -p %s -m %s\n", bc_drvname, eppconfig_path, portarg, modearg); 321 printk(KERN_DEBUG "%s: %s -s -p %s -m %s\n", bc_drvname, eppconfig_path, portarg, modearg);
322 322
323 return call_usermodehelper(eppconfig_path, argv, envp, 1); 323 return call_usermodehelper(eppconfig_path, argv, envp, UMH_WAIT_PROC);
324} 324}
325 325
326/* ---------------------------------------------------------------------- */ 326/* ---------------------------------------------------------------------- */
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
new file mode 100644
index 000000000000..489f69c5d6ca
--- /dev/null
+++ b/drivers/net/xen-netfront.c
@@ -0,0 +1,1863 @@
1/*
2 * Virtual network driver for conversing with remote driver backends.
3 *
4 * Copyright (c) 2002-2005, K A Fraser
5 * Copyright (c) 2005, XenSource Ltd
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version 2
9 * as published by the Free Software Foundation; or, when distributed
10 * separately from the Linux kernel or incorporated into other
11 * software packages, subject to the following license:
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a copy
14 * of this source file (the "Software"), to deal in the Software without
15 * restriction, including without limitation the rights to use, copy, modify,
16 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
17 * and to permit persons to whom the Software is furnished to do so, subject to
18 * the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included in
21 * all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29 * IN THE SOFTWARE.
30 */
31
32#include <linux/module.h>
33#include <linux/kernel.h>
34#include <linux/netdevice.h>
35#include <linux/etherdevice.h>
36#include <linux/skbuff.h>
37#include <linux/ethtool.h>
38#include <linux/if_ether.h>
39#include <linux/tcp.h>
40#include <linux/udp.h>
41#include <linux/moduleparam.h>
42#include <linux/mm.h>
43#include <net/ip.h>
44
45#include <xen/xenbus.h>
46#include <xen/events.h>
47#include <xen/page.h>
48#include <xen/grant_table.h>
49
50#include <xen/interface/io/netif.h>
51#include <xen/interface/memory.h>
52#include <xen/interface/grant_table.h>
53
54static struct ethtool_ops xennet_ethtool_ops;
55
56struct netfront_cb {
57 struct page *page;
58 unsigned offset;
59};
60
61#define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb))
62
63#define RX_COPY_THRESHOLD 256
64
65#define GRANT_INVALID_REF 0
66
67#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
68#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
69#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
70
71struct netfront_info {
72 struct list_head list;
73 struct net_device *netdev;
74
75 struct net_device_stats stats;
76
77 struct xen_netif_tx_front_ring tx;
78 struct xen_netif_rx_front_ring rx;
79
80 spinlock_t tx_lock;
81 spinlock_t rx_lock;
82
83 unsigned int evtchn;
84
85 /* Receive-ring batched refills. */
86#define RX_MIN_TARGET 8
87#define RX_DFL_MIN_TARGET 64
88#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
89 unsigned rx_min_target, rx_max_target, rx_target;
90 struct sk_buff_head rx_batch;
91
92 struct timer_list rx_refill_timer;
93
94 /*
95 * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
96 * are linked from tx_skb_freelist through skb_entry.link.
97 *
98 * NB. Freelist index entries are always going to be less than
99 * PAGE_OFFSET, whereas pointers to skbs will always be equal or
100 * greater than PAGE_OFFSET: we use this property to distinguish
101 * them.
102 */
103 union skb_entry {
104 struct sk_buff *skb;
105 unsigned link;
106 } tx_skbs[NET_TX_RING_SIZE];
107 grant_ref_t gref_tx_head;
108 grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
109 unsigned tx_skb_freelist;
110
111 struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
112 grant_ref_t gref_rx_head;
113 grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
114
115 struct xenbus_device *xbdev;
116 int tx_ring_ref;
117 int rx_ring_ref;
118
119 unsigned long rx_pfn_array[NET_RX_RING_SIZE];
120 struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
121 struct mmu_update rx_mmu[NET_RX_RING_SIZE];
122};
123
124struct netfront_rx_info {
125 struct xen_netif_rx_response rx;
126 struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
127};
128
129/*
130 * Access macros for acquiring freeing slots in tx_skbs[].
131 */
132
133static void add_id_to_freelist(unsigned *head, union skb_entry *list,
134 unsigned short id)
135{
136 list[id].link = *head;
137 *head = id;
138}
139
140static unsigned short get_id_from_freelist(unsigned *head,
141 union skb_entry *list)
142{
143 unsigned int id = *head;
144 *head = list[id].link;
145 return id;
146}
147
148static int xennet_rxidx(RING_IDX idx)
149{
150 return idx & (NET_RX_RING_SIZE - 1);
151}
152
153static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
154 RING_IDX ri)
155{
156 int i = xennet_rxidx(ri);
157 struct sk_buff *skb = np->rx_skbs[i];
158 np->rx_skbs[i] = NULL;
159 return skb;
160}
161
162static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
163 RING_IDX ri)
164{
165 int i = xennet_rxidx(ri);
166 grant_ref_t ref = np->grant_rx_ref[i];
167 np->grant_rx_ref[i] = GRANT_INVALID_REF;
168 return ref;
169}
170
171#ifdef CONFIG_SYSFS
172static int xennet_sysfs_addif(struct net_device *netdev);
173static void xennet_sysfs_delif(struct net_device *netdev);
174#else /* !CONFIG_SYSFS */
175#define xennet_sysfs_addif(dev) (0)
176#define xennet_sysfs_delif(dev) do { } while (0)
177#endif
178
179static int xennet_can_sg(struct net_device *dev)
180{
181 return dev->features & NETIF_F_SG;
182}
183
184
185static void rx_refill_timeout(unsigned long data)
186{
187 struct net_device *dev = (struct net_device *)data;
188 netif_rx_schedule(dev);
189}
190
191static int netfront_tx_slot_available(struct netfront_info *np)
192{
193 return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
194 (TX_MAX_TARGET - MAX_SKB_FRAGS - 2));
195}
196
197static void xennet_maybe_wake_tx(struct net_device *dev)
198{
199 struct netfront_info *np = netdev_priv(dev);
200
201 if (unlikely(netif_queue_stopped(dev)) &&
202 netfront_tx_slot_available(np) &&
203 likely(netif_running(dev)))
204 netif_wake_queue(dev);
205}
206
207static void xennet_alloc_rx_buffers(struct net_device *dev)
208{
209 unsigned short id;
210 struct netfront_info *np = netdev_priv(dev);
211 struct sk_buff *skb;
212 struct page *page;
213 int i, batch_target, notify;
214 RING_IDX req_prod = np->rx.req_prod_pvt;
215 struct xen_memory_reservation reservation;
216 grant_ref_t ref;
217 unsigned long pfn;
218 void *vaddr;
219 int nr_flips;
220 struct xen_netif_rx_request *req;
221
222 if (unlikely(!netif_carrier_ok(dev)))
223 return;
224
225 /*
226 * Allocate skbuffs greedily, even though we batch updates to the
227 * receive ring. This creates a less bursty demand on the memory
228 * allocator, so should reduce the chance of failed allocation requests
229 * both for ourself and for other kernel subsystems.
230 */
231 batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
232 for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
233 skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD,
234 GFP_ATOMIC | __GFP_NOWARN);
235 if (unlikely(!skb))
236 goto no_skb;
237
238 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
239 if (!page) {
240 kfree_skb(skb);
241no_skb:
242 /* Any skbuffs queued for refill? Force them out. */
243 if (i != 0)
244 goto refill;
245 /* Could not allocate any skbuffs. Try again later. */
246 mod_timer(&np->rx_refill_timer,
247 jiffies + (HZ/10));
248 break;
249 }
250
251 skb_shinfo(skb)->frags[0].page = page;
252 skb_shinfo(skb)->nr_frags = 1;
253 __skb_queue_tail(&np->rx_batch, skb);
254 }
255
256 /* Is the batch large enough to be worthwhile? */
257 if (i < (np->rx_target/2)) {
258 if (req_prod > np->rx.sring->req_prod)
259 goto push;
260 return;
261 }
262
263 /* Adjust our fill target if we risked running out of buffers. */
264 if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
265 ((np->rx_target *= 2) > np->rx_max_target))
266 np->rx_target = np->rx_max_target;
267
268 refill:
269 for (nr_flips = i = 0; ; i++) {
270 skb = __skb_dequeue(&np->rx_batch);
271 if (skb == NULL)
272 break;
273
274 skb->dev = dev;
275
276 id = xennet_rxidx(req_prod + i);
277
278 BUG_ON(np->rx_skbs[id]);
279 np->rx_skbs[id] = skb;
280
281 ref = gnttab_claim_grant_reference(&np->gref_rx_head);
282 BUG_ON((signed short)ref < 0);
283 np->grant_rx_ref[id] = ref;
284
285 pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
286 vaddr = page_address(skb_shinfo(skb)->frags[0].page);
287
288 req = RING_GET_REQUEST(&np->rx, req_prod + i);
289 gnttab_grant_foreign_access_ref(ref,
290 np->xbdev->otherend_id,
291 pfn_to_mfn(pfn),
292 0);
293
294 req->id = id;
295 req->gref = ref;
296 }
297
298 if (nr_flips != 0) {
299 reservation.extent_start = np->rx_pfn_array;
300 reservation.nr_extents = nr_flips;
301 reservation.extent_order = 0;
302 reservation.address_bits = 0;
303 reservation.domid = DOMID_SELF;
304
305 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
306 /* After all PTEs have been zapped, flush the TLB. */
307 np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
308 UVMF_TLB_FLUSH|UVMF_ALL;
309
310 /* Give away a batch of pages. */
311 np->rx_mcl[i].op = __HYPERVISOR_memory_op;
312 np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
313 np->rx_mcl[i].args[1] = (unsigned long)&reservation;
314
315 /* Zap PTEs and give away pages in one big
316 * multicall. */
317 (void)HYPERVISOR_multicall(np->rx_mcl, i+1);
318
319 /* Check return status of HYPERVISOR_memory_op(). */
320 if (unlikely(np->rx_mcl[i].result != i))
321 panic("Unable to reduce memory reservation\n");
322 } else {
323 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
324 &reservation) != i)
325 panic("Unable to reduce memory reservation\n");
326 }
327 } else {
328 wmb(); /* barrier so backend seens requests */
329 }
330
331 /* Above is a suitable barrier to ensure backend will see requests. */
332 np->rx.req_prod_pvt = req_prod + i;
333 push:
334 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
335 if (notify)
336 notify_remote_via_irq(np->netdev->irq);
337}
338
339static int xennet_open(struct net_device *dev)
340{
341 struct netfront_info *np = netdev_priv(dev);
342
343 memset(&np->stats, 0, sizeof(np->stats));
344
345 spin_lock_bh(&np->rx_lock);
346 if (netif_carrier_ok(dev)) {
347 xennet_alloc_rx_buffers(dev);
348 np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
349 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
350 netif_rx_schedule(dev);
351 }
352 spin_unlock_bh(&np->rx_lock);
353
354 xennet_maybe_wake_tx(dev);
355
356 return 0;
357}
358
359static void xennet_tx_buf_gc(struct net_device *dev)
360{
361 RING_IDX cons, prod;
362 unsigned short id;
363 struct netfront_info *np = netdev_priv(dev);
364 struct sk_buff *skb;
365
366 BUG_ON(!netif_carrier_ok(dev));
367
368 do {
369 prod = np->tx.sring->rsp_prod;
370 rmb(); /* Ensure we see responses up to 'rp'. */
371
372 for (cons = np->tx.rsp_cons; cons != prod; cons++) {
373 struct xen_netif_tx_response *txrsp;
374
375 txrsp = RING_GET_RESPONSE(&np->tx, cons);
376 if (txrsp->status == NETIF_RSP_NULL)
377 continue;
378
379 id = txrsp->id;
380 skb = np->tx_skbs[id].skb;
381 if (unlikely(gnttab_query_foreign_access(
382 np->grant_tx_ref[id]) != 0)) {
383 printk(KERN_ALERT "xennet_tx_buf_gc: warning "
384 "-- grant still in use by backend "
385 "domain.\n");
386 BUG();
387 }
388 gnttab_end_foreign_access_ref(
389 np->grant_tx_ref[id], GNTMAP_readonly);
390 gnttab_release_grant_reference(
391 &np->gref_tx_head, np->grant_tx_ref[id]);
392 np->grant_tx_ref[id] = GRANT_INVALID_REF;
393 add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id);
394 dev_kfree_skb_irq(skb);
395 }
396
397 np->tx.rsp_cons = prod;
398
399 /*
400 * Set a new event, then check for race with update of tx_cons.
401 * Note that it is essential to schedule a callback, no matter
402 * how few buffers are pending. Even if there is space in the
403 * transmit ring, higher layers may be blocked because too much
404 * data is outstanding: in such cases notification from Xen is
405 * likely to be the only kick that we'll get.
406 */
407 np->tx.sring->rsp_event =
408 prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
409 mb(); /* update shared area */
410 } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
411
412 xennet_maybe_wake_tx(dev);
413}
414
415static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
416 struct xen_netif_tx_request *tx)
417{
418 struct netfront_info *np = netdev_priv(dev);
419 char *data = skb->data;
420 unsigned long mfn;
421 RING_IDX prod = np->tx.req_prod_pvt;
422 int frags = skb_shinfo(skb)->nr_frags;
423 unsigned int offset = offset_in_page(data);
424 unsigned int len = skb_headlen(skb);
425 unsigned int id;
426 grant_ref_t ref;
427 int i;
428
429 /* While the header overlaps a page boundary (including being
430 larger than a page), split it it into page-sized chunks. */
431 while (len > PAGE_SIZE - offset) {
432 tx->size = PAGE_SIZE - offset;
433 tx->flags |= NETTXF_more_data;
434 len -= tx->size;
435 data += tx->size;
436 offset = 0;
437
438 id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
439 np->tx_skbs[id].skb = skb_get(skb);
440 tx = RING_GET_REQUEST(&np->tx, prod++);
441 tx->id = id;
442 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
443 BUG_ON((signed short)ref < 0);
444
445 mfn = virt_to_mfn(data);
446 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
447 mfn, GNTMAP_readonly);
448
449 tx->gref = np->grant_tx_ref[id] = ref;
450 tx->offset = offset;
451 tx->size = len;
452 tx->flags = 0;
453 }
454
455 /* Grant backend access to each skb fragment page. */
456 for (i = 0; i < frags; i++) {
457 skb_frag_t *frag = skb_shinfo(skb)->frags + i;
458
459 tx->flags |= NETTXF_more_data;
460
461 id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
462 np->tx_skbs[id].skb = skb_get(skb);
463 tx = RING_GET_REQUEST(&np->tx, prod++);
464 tx->id = id;
465 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
466 BUG_ON((signed short)ref < 0);
467
468 mfn = pfn_to_mfn(page_to_pfn(frag->page));
469 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
470 mfn, GNTMAP_readonly);
471
472 tx->gref = np->grant_tx_ref[id] = ref;
473 tx->offset = frag->page_offset;
474 tx->size = frag->size;
475 tx->flags = 0;
476 }
477
478 np->tx.req_prod_pvt = prod;
479}
480
481static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
482{
483 unsigned short id;
484 struct netfront_info *np = netdev_priv(dev);
485 struct xen_netif_tx_request *tx;
486 struct xen_netif_extra_info *extra;
487 char *data = skb->data;
488 RING_IDX i;
489 grant_ref_t ref;
490 unsigned long mfn;
491 int notify;
492 int frags = skb_shinfo(skb)->nr_frags;
493 unsigned int offset = offset_in_page(data);
494 unsigned int len = skb_headlen(skb);
495
496 frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
497 if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
498 printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
499 frags);
500 dump_stack();
501 goto drop;
502 }
503
504 spin_lock_irq(&np->tx_lock);
505
506 if (unlikely(!netif_carrier_ok(dev) ||
507 (frags > 1 && !xennet_can_sg(dev)) ||
508 netif_needs_gso(dev, skb))) {
509 spin_unlock_irq(&np->tx_lock);
510 goto drop;
511 }
512
513 i = np->tx.req_prod_pvt;
514
515 id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
516 np->tx_skbs[id].skb = skb;
517
518 tx = RING_GET_REQUEST(&np->tx, i);
519
520 tx->id = id;
521 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
522 BUG_ON((signed short)ref < 0);
523 mfn = virt_to_mfn(data);
524 gnttab_grant_foreign_access_ref(
525 ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
526 tx->gref = np->grant_tx_ref[id] = ref;
527 tx->offset = offset;
528 tx->size = len;
529 extra = NULL;
530
531 tx->flags = 0;
532 if (skb->ip_summed == CHECKSUM_PARTIAL)
533 /* local packet? */
534 tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
535 else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
536 /* remote but checksummed. */
537 tx->flags |= NETTXF_data_validated;
538
539 if (skb_shinfo(skb)->gso_size) {
540 struct xen_netif_extra_info *gso;
541
542 gso = (struct xen_netif_extra_info *)
543 RING_GET_REQUEST(&np->tx, ++i);
544
545 if (extra)
546 extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
547 else
548 tx->flags |= NETTXF_extra_info;
549
550 gso->u.gso.size = skb_shinfo(skb)->gso_size;
551 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
552 gso->u.gso.pad = 0;
553 gso->u.gso.features = 0;
554
555 gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
556 gso->flags = 0;
557 extra = gso;
558 }
559
560 np->tx.req_prod_pvt = i + 1;
561
562 xennet_make_frags(skb, dev, tx);
563 tx->size = skb->len;
564
565 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
566 if (notify)
567 notify_remote_via_irq(np->netdev->irq);
568
569 xennet_tx_buf_gc(dev);
570
571 if (!netfront_tx_slot_available(np))
572 netif_stop_queue(dev);
573
574 spin_unlock_irq(&np->tx_lock);
575
576 np->stats.tx_bytes += skb->len;
577 np->stats.tx_packets++;
578
579 return 0;
580
581 drop:
582 np->stats.tx_dropped++;
583 dev_kfree_skb(skb);
584 return 0;
585}
586
587static int xennet_close(struct net_device *dev)
588{
589 struct netfront_info *np = netdev_priv(dev);
590 netif_stop_queue(np->netdev);
591 return 0;
592}
593
594static struct net_device_stats *xennet_get_stats(struct net_device *dev)
595{
596 struct netfront_info *np = netdev_priv(dev);
597 return &np->stats;
598}
599
600static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
601 grant_ref_t ref)
602{
603 int new = xennet_rxidx(np->rx.req_prod_pvt);
604
605 BUG_ON(np->rx_skbs[new]);
606 np->rx_skbs[new] = skb;
607 np->grant_rx_ref[new] = ref;
608 RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
609 RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
610 np->rx.req_prod_pvt++;
611}
612
613static int xennet_get_extras(struct netfront_info *np,
614 struct xen_netif_extra_info *extras,
615 RING_IDX rp)
616
617{
618 struct xen_netif_extra_info *extra;
619 struct device *dev = &np->netdev->dev;
620 RING_IDX cons = np->rx.rsp_cons;
621 int err = 0;
622
623 do {
624 struct sk_buff *skb;
625 grant_ref_t ref;
626
627 if (unlikely(cons + 1 == rp)) {
628 if (net_ratelimit())
629 dev_warn(dev, "Missing extra info\n");
630 err = -EBADR;
631 break;
632 }
633
634 extra = (struct xen_netif_extra_info *)
635 RING_GET_RESPONSE(&np->rx, ++cons);
636
637 if (unlikely(!extra->type ||
638 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
639 if (net_ratelimit())
640 dev_warn(dev, "Invalid extra type: %d\n",
641 extra->type);
642 err = -EINVAL;
643 } else {
644 memcpy(&extras[extra->type - 1], extra,
645 sizeof(*extra));
646 }
647
648 skb = xennet_get_rx_skb(np, cons);
649 ref = xennet_get_rx_ref(np, cons);
650 xennet_move_rx_slot(np, skb, ref);
651 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
652
653 np->rx.rsp_cons = cons;
654 return err;
655}
656
657static int xennet_get_responses(struct netfront_info *np,
658 struct netfront_rx_info *rinfo, RING_IDX rp,
659 struct sk_buff_head *list)
660{
661 struct xen_netif_rx_response *rx = &rinfo->rx;
662 struct xen_netif_extra_info *extras = rinfo->extras;
663 struct device *dev = &np->netdev->dev;
664 RING_IDX cons = np->rx.rsp_cons;
665 struct sk_buff *skb = xennet_get_rx_skb(np, cons);
666 grant_ref_t ref = xennet_get_rx_ref(np, cons);
667 int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
668 int frags = 1;
669 int err = 0;
670 unsigned long ret;
671
672 if (rx->flags & NETRXF_extra_info) {
673 err = xennet_get_extras(np, extras, rp);
674 cons = np->rx.rsp_cons;
675 }
676
677 for (;;) {
678 if (unlikely(rx->status < 0 ||
679 rx->offset + rx->status > PAGE_SIZE)) {
680 if (net_ratelimit())
681 dev_warn(dev, "rx->offset: %x, size: %u\n",
682 rx->offset, rx->status);
683 xennet_move_rx_slot(np, skb, ref);
684 err = -EINVAL;
685 goto next;
686 }
687
688 /*
689 * This definitely indicates a bug, either in this driver or in
690 * the backend driver. In future this should flag the bad
691 * situation to the system controller to reboot the backed.
692 */
693 if (ref == GRANT_INVALID_REF) {
694 if (net_ratelimit())
695 dev_warn(dev, "Bad rx response id %d.\n",
696 rx->id);
697 err = -EINVAL;
698 goto next;
699 }
700
701 ret = gnttab_end_foreign_access_ref(ref, 0);
702 BUG_ON(!ret);
703
704 gnttab_release_grant_reference(&np->gref_rx_head, ref);
705
706 __skb_queue_tail(list, skb);
707
708next:
709 if (!(rx->flags & NETRXF_more_data))
710 break;
711
712 if (cons + frags == rp) {
713 if (net_ratelimit())
714 dev_warn(dev, "Need more frags\n");
715 err = -ENOENT;
716 break;
717 }
718
719 rx = RING_GET_RESPONSE(&np->rx, cons + frags);
720 skb = xennet_get_rx_skb(np, cons + frags);
721 ref = xennet_get_rx_ref(np, cons + frags);
722 frags++;
723 }
724
725 if (unlikely(frags > max)) {
726 if (net_ratelimit())
727 dev_warn(dev, "Too many frags\n");
728 err = -E2BIG;
729 }
730
731 if (unlikely(err))
732 np->rx.rsp_cons = cons + frags;
733
734 return err;
735}
736
737static int xennet_set_skb_gso(struct sk_buff *skb,
738 struct xen_netif_extra_info *gso)
739{
740 if (!gso->u.gso.size) {
741 if (net_ratelimit())
742 printk(KERN_WARNING "GSO size must not be zero.\n");
743 return -EINVAL;
744 }
745
746 /* Currently only TCPv4 S.O. is supported. */
747 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
748 if (net_ratelimit())
749 printk(KERN_WARNING "Bad GSO type %d.\n", gso->u.gso.type);
750 return -EINVAL;
751 }
752
753 skb_shinfo(skb)->gso_size = gso->u.gso.size;
754 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
755
756 /* Header must be checked, and gso_segs computed. */
757 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
758 skb_shinfo(skb)->gso_segs = 0;
759
760 return 0;
761}
762
763static RING_IDX xennet_fill_frags(struct netfront_info *np,
764 struct sk_buff *skb,
765 struct sk_buff_head *list)
766{
767 struct skb_shared_info *shinfo = skb_shinfo(skb);
768 int nr_frags = shinfo->nr_frags;
769 RING_IDX cons = np->rx.rsp_cons;
770 skb_frag_t *frag = shinfo->frags + nr_frags;
771 struct sk_buff *nskb;
772
773 while ((nskb = __skb_dequeue(list))) {
774 struct xen_netif_rx_response *rx =
775 RING_GET_RESPONSE(&np->rx, ++cons);
776
777 frag->page = skb_shinfo(nskb)->frags[0].page;
778 frag->page_offset = rx->offset;
779 frag->size = rx->status;
780
781 skb->data_len += rx->status;
782
783 skb_shinfo(nskb)->nr_frags = 0;
784 kfree_skb(nskb);
785
786 frag++;
787 nr_frags++;
788 }
789
790 shinfo->nr_frags = nr_frags;
791 return cons;
792}
793
794static int skb_checksum_setup(struct sk_buff *skb)
795{
796 struct iphdr *iph;
797 unsigned char *th;
798 int err = -EPROTO;
799
800 if (skb->protocol != htons(ETH_P_IP))
801 goto out;
802
803 iph = (void *)skb->data;
804 th = skb->data + 4 * iph->ihl;
805 if (th >= skb_tail_pointer(skb))
806 goto out;
807
808 skb->csum_start = th - skb->head;
809 switch (iph->protocol) {
810 case IPPROTO_TCP:
811 skb->csum_offset = offsetof(struct tcphdr, check);
812 break;
813 case IPPROTO_UDP:
814 skb->csum_offset = offsetof(struct udphdr, check);
815 break;
816 default:
817 if (net_ratelimit())
818 printk(KERN_ERR "Attempting to checksum a non-"
819 "TCP/UDP packet, dropping a protocol"
820 " %d packet", iph->protocol);
821 goto out;
822 }
823
824 if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
825 goto out;
826
827 err = 0;
828
829out:
830 return err;
831}
832
833static int handle_incoming_queue(struct net_device *dev,
834 struct sk_buff_head *rxq)
835{
836 struct netfront_info *np = netdev_priv(dev);
837 int packets_dropped = 0;
838 struct sk_buff *skb;
839
840 while ((skb = __skb_dequeue(rxq)) != NULL) {
841 struct page *page = NETFRONT_SKB_CB(skb)->page;
842 void *vaddr = page_address(page);
843 unsigned offset = NETFRONT_SKB_CB(skb)->offset;
844
845 memcpy(skb->data, vaddr + offset,
846 skb_headlen(skb));
847
848 if (page != skb_shinfo(skb)->frags[0].page)
849 __free_page(page);
850
851 /* Ethernet work: Delayed to here as it peeks the header. */
852 skb->protocol = eth_type_trans(skb, dev);
853
854 if (skb->ip_summed == CHECKSUM_PARTIAL) {
855 if (skb_checksum_setup(skb)) {
856 kfree_skb(skb);
857 packets_dropped++;
858 np->stats.rx_errors++;
859 continue;
860 }
861 }
862
863 np->stats.rx_packets++;
864 np->stats.rx_bytes += skb->len;
865
866 /* Pass it up. */
867 netif_receive_skb(skb);
868 dev->last_rx = jiffies;
869 }
870
871 return packets_dropped;
872}
873
874static int xennet_poll(struct net_device *dev, int *pbudget)
875{
876 struct netfront_info *np = netdev_priv(dev);
877 struct sk_buff *skb;
878 struct netfront_rx_info rinfo;
879 struct xen_netif_rx_response *rx = &rinfo.rx;
880 struct xen_netif_extra_info *extras = rinfo.extras;
881 RING_IDX i, rp;
882 int work_done, budget, more_to_do = 1;
883 struct sk_buff_head rxq;
884 struct sk_buff_head errq;
885 struct sk_buff_head tmpq;
886 unsigned long flags;
887 unsigned int len;
888 int err;
889
890 spin_lock(&np->rx_lock);
891
892 if (unlikely(!netif_carrier_ok(dev))) {
893 spin_unlock(&np->rx_lock);
894 return 0;
895 }
896
897 skb_queue_head_init(&rxq);
898 skb_queue_head_init(&errq);
899 skb_queue_head_init(&tmpq);
900
901 budget = *pbudget;
902 if (budget > dev->quota)
903 budget = dev->quota;
904 rp = np->rx.sring->rsp_prod;
905 rmb(); /* Ensure we see queued responses up to 'rp'. */
906
907 i = np->rx.rsp_cons;
908 work_done = 0;
909 while ((i != rp) && (work_done < budget)) {
910 memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
911 memset(extras, 0, sizeof(rinfo.extras));
912
913 err = xennet_get_responses(np, &rinfo, rp, &tmpq);
914
915 if (unlikely(err)) {
916err:
917 while ((skb = __skb_dequeue(&tmpq)))
918 __skb_queue_tail(&errq, skb);
919 np->stats.rx_errors++;
920 i = np->rx.rsp_cons;
921 continue;
922 }
923
924 skb = __skb_dequeue(&tmpq);
925
926 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
927 struct xen_netif_extra_info *gso;
928 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
929
930 if (unlikely(xennet_set_skb_gso(skb, gso))) {
931 __skb_queue_head(&tmpq, skb);
932 np->rx.rsp_cons += skb_queue_len(&tmpq);
933 goto err;
934 }
935 }
936
937 NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page;
938 NETFRONT_SKB_CB(skb)->offset = rx->offset;
939
940 len = rx->status;
941 if (len > RX_COPY_THRESHOLD)
942 len = RX_COPY_THRESHOLD;
943 skb_put(skb, len);
944
945 if (rx->status > len) {
946 skb_shinfo(skb)->frags[0].page_offset =
947 rx->offset + len;
948 skb_shinfo(skb)->frags[0].size = rx->status - len;
949 skb->data_len = rx->status - len;
950 } else {
951 skb_shinfo(skb)->frags[0].page = NULL;
952 skb_shinfo(skb)->nr_frags = 0;
953 }
954
955 i = xennet_fill_frags(np, skb, &tmpq);
956
957 /*
958 * Truesize approximates the size of true data plus
959 * any supervisor overheads. Adding hypervisor
960 * overheads has been shown to significantly reduce
961 * achievable bandwidth with the default receive
962 * buffer size. It is therefore not wise to account
963 * for it here.
964 *
965 * After alloc_skb(RX_COPY_THRESHOLD), truesize is set
966 * to RX_COPY_THRESHOLD + the supervisor
967 * overheads. Here, we add the size of the data pulled
968 * in xennet_fill_frags().
969 *
970 * We also adjust for any unused space in the main
971 * data area by subtracting (RX_COPY_THRESHOLD -
972 * len). This is especially important with drivers
973 * which split incoming packets into header and data,
974 * using only 66 bytes of the main data area (see the
975 * e1000 driver for example.) On such systems,
976 * without this last adjustement, our achievable
977 * receive throughout using the standard receive
978 * buffer size was cut by 25%(!!!).
979 */
980 skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
981 skb->len += skb->data_len;
982
983 if (rx->flags & NETRXF_csum_blank)
984 skb->ip_summed = CHECKSUM_PARTIAL;
985 else if (rx->flags & NETRXF_data_validated)
986 skb->ip_summed = CHECKSUM_UNNECESSARY;
987
988 __skb_queue_tail(&rxq, skb);
989
990 np->rx.rsp_cons = ++i;
991 work_done++;
992 }
993
994 while ((skb = __skb_dequeue(&errq)))
995 kfree_skb(skb);
996
997 work_done -= handle_incoming_queue(dev, &rxq);
998
999 /* If we get a callback with very few responses, reduce fill target. */
1000 /* NB. Note exponential increase, linear decrease. */
1001 if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
1002 ((3*np->rx_target) / 4)) &&
1003 (--np->rx_target < np->rx_min_target))
1004 np->rx_target = np->rx_min_target;
1005
1006 xennet_alloc_rx_buffers(dev);
1007
1008 *pbudget -= work_done;
1009 dev->quota -= work_done;
1010
1011 if (work_done < budget) {
1012 local_irq_save(flags);
1013
1014 RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
1015 if (!more_to_do)
1016 __netif_rx_complete(dev);
1017
1018 local_irq_restore(flags);
1019 }
1020
1021 spin_unlock(&np->rx_lock);
1022
1023 return more_to_do;
1024}
1025
1026static int xennet_change_mtu(struct net_device *dev, int mtu)
1027{
1028 int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
1029
1030 if (mtu > max)
1031 return -EINVAL;
1032 dev->mtu = mtu;
1033 return 0;
1034}
1035
1036static void xennet_release_tx_bufs(struct netfront_info *np)
1037{
1038 struct sk_buff *skb;
1039 int i;
1040
1041 for (i = 0; i < NET_TX_RING_SIZE; i++) {
1042 /* Skip over entries which are actually freelist references */
1043 if ((unsigned long)np->tx_skbs[i].skb < PAGE_OFFSET)
1044 continue;
1045
1046 skb = np->tx_skbs[i].skb;
1047 gnttab_end_foreign_access_ref(np->grant_tx_ref[i],
1048 GNTMAP_readonly);
1049 gnttab_release_grant_reference(&np->gref_tx_head,
1050 np->grant_tx_ref[i]);
1051 np->grant_tx_ref[i] = GRANT_INVALID_REF;
1052 add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i);
1053 dev_kfree_skb_irq(skb);
1054 }
1055}
1056
1057static void xennet_release_rx_bufs(struct netfront_info *np)
1058{
1059 struct mmu_update *mmu = np->rx_mmu;
1060 struct multicall_entry *mcl = np->rx_mcl;
1061 struct sk_buff_head free_list;
1062 struct sk_buff *skb;
1063 unsigned long mfn;
1064 int xfer = 0, noxfer = 0, unused = 0;
1065 int id, ref;
1066
1067 dev_warn(&np->netdev->dev, "%s: fix me for copying receiver.\n",
1068 __func__);
1069 return;
1070
1071 skb_queue_head_init(&free_list);
1072
1073 spin_lock_bh(&np->rx_lock);
1074
1075 for (id = 0; id < NET_RX_RING_SIZE; id++) {
1076 ref = np->grant_rx_ref[id];
1077 if (ref == GRANT_INVALID_REF) {
1078 unused++;
1079 continue;
1080 }
1081
1082 skb = np->rx_skbs[id];
1083 mfn = gnttab_end_foreign_transfer_ref(ref);
1084 gnttab_release_grant_reference(&np->gref_rx_head, ref);
1085 np->grant_rx_ref[id] = GRANT_INVALID_REF;
1086
1087 if (0 == mfn) {
1088 skb_shinfo(skb)->nr_frags = 0;
1089 dev_kfree_skb(skb);
1090 noxfer++;
1091 continue;
1092 }
1093
1094 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1095 /* Remap the page. */
1096 struct page *page = skb_shinfo(skb)->frags[0].page;
1097 unsigned long pfn = page_to_pfn(page);
1098 void *vaddr = page_address(page);
1099
1100 MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
1101 mfn_pte(mfn, PAGE_KERNEL),
1102 0);
1103 mcl++;
1104 mmu->ptr = ((u64)mfn << PAGE_SHIFT)
1105 | MMU_MACHPHYS_UPDATE;
1106 mmu->val = pfn;
1107 mmu++;
1108
1109 set_phys_to_machine(pfn, mfn);
1110 }
1111 __skb_queue_tail(&free_list, skb);
1112 xfer++;
1113 }
1114
1115 dev_info(&np->netdev->dev, "%s: %d xfer, %d noxfer, %d unused\n",
1116 __func__, xfer, noxfer, unused);
1117
1118 if (xfer) {
1119 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1120 /* Do all the remapping work and M2P updates. */
1121 MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu,
1122 0, DOMID_SELF);
1123 mcl++;
1124 HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl);
1125 }
1126 }
1127
1128 while ((skb = __skb_dequeue(&free_list)) != NULL)
1129 dev_kfree_skb(skb);
1130
1131 spin_unlock_bh(&np->rx_lock);
1132}
1133
1134static void xennet_uninit(struct net_device *dev)
1135{
1136 struct netfront_info *np = netdev_priv(dev);
1137 xennet_release_tx_bufs(np);
1138 xennet_release_rx_bufs(np);
1139 gnttab_free_grant_references(np->gref_tx_head);
1140 gnttab_free_grant_references(np->gref_rx_head);
1141}
1142
1143static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev)
1144{
1145 int i, err;
1146 struct net_device *netdev;
1147 struct netfront_info *np;
1148
1149 netdev = alloc_etherdev(sizeof(struct netfront_info));
1150 if (!netdev) {
1151 printk(KERN_WARNING "%s> alloc_etherdev failed.\n",
1152 __func__);
1153 return ERR_PTR(-ENOMEM);
1154 }
1155
1156 np = netdev_priv(netdev);
1157 np->xbdev = dev;
1158
1159 spin_lock_init(&np->tx_lock);
1160 spin_lock_init(&np->rx_lock);
1161
1162 skb_queue_head_init(&np->rx_batch);
1163 np->rx_target = RX_DFL_MIN_TARGET;
1164 np->rx_min_target = RX_DFL_MIN_TARGET;
1165 np->rx_max_target = RX_MAX_TARGET;
1166
1167 init_timer(&np->rx_refill_timer);
1168 np->rx_refill_timer.data = (unsigned long)netdev;
1169 np->rx_refill_timer.function = rx_refill_timeout;
1170
1171 /* Initialise tx_skbs as a free chain containing every entry. */
1172 np->tx_skb_freelist = 0;
1173 for (i = 0; i < NET_TX_RING_SIZE; i++) {
1174 np->tx_skbs[i].link = i+1;
1175 np->grant_tx_ref[i] = GRANT_INVALID_REF;
1176 }
1177
1178 /* Clear out rx_skbs */
1179 for (i = 0; i < NET_RX_RING_SIZE; i++) {
1180 np->rx_skbs[i] = NULL;
1181 np->grant_rx_ref[i] = GRANT_INVALID_REF;
1182 }
1183
1184 /* A grant for every tx ring slot */
1185 if (gnttab_alloc_grant_references(TX_MAX_TARGET,
1186 &np->gref_tx_head) < 0) {
1187 printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
1188 err = -ENOMEM;
1189 goto exit;
1190 }
1191 /* A grant for every rx ring slot */
1192 if (gnttab_alloc_grant_references(RX_MAX_TARGET,
1193 &np->gref_rx_head) < 0) {
1194 printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
1195 err = -ENOMEM;
1196 goto exit_free_tx;
1197 }
1198
1199 netdev->open = xennet_open;
1200 netdev->hard_start_xmit = xennet_start_xmit;
1201 netdev->stop = xennet_close;
1202 netdev->get_stats = xennet_get_stats;
1203 netdev->poll = xennet_poll;
1204 netdev->uninit = xennet_uninit;
1205 netdev->change_mtu = xennet_change_mtu;
1206 netdev->weight = 64;
1207 netdev->features = NETIF_F_IP_CSUM;
1208
1209 SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops);
1210 SET_MODULE_OWNER(netdev);
1211 SET_NETDEV_DEV(netdev, &dev->dev);
1212
1213 np->netdev = netdev;
1214
1215 netif_carrier_off(netdev);
1216
1217 return netdev;
1218
1219 exit_free_tx:
1220 gnttab_free_grant_references(np->gref_tx_head);
1221 exit:
1222 free_netdev(netdev);
1223 return ERR_PTR(err);
1224}
1225
1226/**
1227 * Entry point to this code when a new device is created. Allocate the basic
1228 * structures and the ring buffers for communication with the backend, and
1229 * inform the backend of the appropriate details for those.
1230 */
1231static int __devinit netfront_probe(struct xenbus_device *dev,
1232 const struct xenbus_device_id *id)
1233{
1234 int err;
1235 struct net_device *netdev;
1236 struct netfront_info *info;
1237
1238 netdev = xennet_create_dev(dev);
1239 if (IS_ERR(netdev)) {
1240 err = PTR_ERR(netdev);
1241 xenbus_dev_fatal(dev, err, "creating netdev");
1242 return err;
1243 }
1244
1245 info = netdev_priv(netdev);
1246 dev->dev.driver_data = info;
1247
1248 err = register_netdev(info->netdev);
1249 if (err) {
1250 printk(KERN_WARNING "%s: register_netdev err=%d\n",
1251 __func__, err);
1252 goto fail;
1253 }
1254
1255 err = xennet_sysfs_addif(info->netdev);
1256 if (err) {
1257 unregister_netdev(info->netdev);
1258 printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
1259 __func__, err);
1260 goto fail;
1261 }
1262
1263 return 0;
1264
1265 fail:
1266 free_netdev(netdev);
1267 dev->dev.driver_data = NULL;
1268 return err;
1269}
1270
1271static void xennet_end_access(int ref, void *page)
1272{
1273 /* This frees the page as a side-effect */
1274 if (ref != GRANT_INVALID_REF)
1275 gnttab_end_foreign_access(ref, 0, (unsigned long)page);
1276}
1277
1278static void xennet_disconnect_backend(struct netfront_info *info)
1279{
1280 /* Stop old i/f to prevent errors whilst we rebuild the state. */
1281 spin_lock_bh(&info->rx_lock);
1282 spin_lock_irq(&info->tx_lock);
1283 netif_carrier_off(info->netdev);
1284 spin_unlock_irq(&info->tx_lock);
1285 spin_unlock_bh(&info->rx_lock);
1286
1287 if (info->netdev->irq)
1288 unbind_from_irqhandler(info->netdev->irq, info->netdev);
1289 info->evtchn = info->netdev->irq = 0;
1290
1291 /* End access and free the pages */
1292 xennet_end_access(info->tx_ring_ref, info->tx.sring);
1293 xennet_end_access(info->rx_ring_ref, info->rx.sring);
1294
1295 info->tx_ring_ref = GRANT_INVALID_REF;
1296 info->rx_ring_ref = GRANT_INVALID_REF;
1297 info->tx.sring = NULL;
1298 info->rx.sring = NULL;
1299}
1300
1301/**
1302 * We are reconnecting to the backend, due to a suspend/resume, or a backend
1303 * driver restart. We tear down our netif structure and recreate it, but
1304 * leave the device-layer structures intact so that this is transparent to the
1305 * rest of the kernel.
1306 */
1307static int netfront_resume(struct xenbus_device *dev)
1308{
1309 struct netfront_info *info = dev->dev.driver_data;
1310
1311 dev_dbg(&dev->dev, "%s\n", dev->nodename);
1312
1313 xennet_disconnect_backend(info);
1314 return 0;
1315}
1316
1317static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
1318{
1319 char *s, *e, *macstr;
1320 int i;
1321
1322 macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
1323 if (IS_ERR(macstr))
1324 return PTR_ERR(macstr);
1325
1326 for (i = 0; i < ETH_ALEN; i++) {
1327 mac[i] = simple_strtoul(s, &e, 16);
1328 if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
1329 kfree(macstr);
1330 return -ENOENT;
1331 }
1332 s = e+1;
1333 }
1334
1335 kfree(macstr);
1336 return 0;
1337}
1338
1339static irqreturn_t xennet_interrupt(int irq, void *dev_id)
1340{
1341 struct net_device *dev = dev_id;
1342 struct netfront_info *np = netdev_priv(dev);
1343 unsigned long flags;
1344
1345 spin_lock_irqsave(&np->tx_lock, flags);
1346
1347 if (likely(netif_carrier_ok(dev))) {
1348 xennet_tx_buf_gc(dev);
1349 /* Under tx_lock: protects access to rx shared-ring indexes. */
1350 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
1351 netif_rx_schedule(dev);
1352 }
1353
1354 spin_unlock_irqrestore(&np->tx_lock, flags);
1355
1356 return IRQ_HANDLED;
1357}
1358
1359static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
1360{
1361 struct xen_netif_tx_sring *txs;
1362 struct xen_netif_rx_sring *rxs;
1363 int err;
1364 struct net_device *netdev = info->netdev;
1365
1366 info->tx_ring_ref = GRANT_INVALID_REF;
1367 info->rx_ring_ref = GRANT_INVALID_REF;
1368 info->rx.sring = NULL;
1369 info->tx.sring = NULL;
1370 netdev->irq = 0;
1371
1372 err = xen_net_read_mac(dev, netdev->dev_addr);
1373 if (err) {
1374 xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
1375 goto fail;
1376 }
1377
1378 txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_KERNEL);
1379 if (!txs) {
1380 err = -ENOMEM;
1381 xenbus_dev_fatal(dev, err, "allocating tx ring page");
1382 goto fail;
1383 }
1384 SHARED_RING_INIT(txs);
1385 FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
1386
1387 err = xenbus_grant_ring(dev, virt_to_mfn(txs));
1388 if (err < 0) {
1389 free_page((unsigned long)txs);
1390 goto fail;
1391 }
1392
1393 info->tx_ring_ref = err;
1394 rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_KERNEL);
1395 if (!rxs) {
1396 err = -ENOMEM;
1397 xenbus_dev_fatal(dev, err, "allocating rx ring page");
1398 goto fail;
1399 }
1400 SHARED_RING_INIT(rxs);
1401 FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
1402
1403 err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
1404 if (err < 0) {
1405 free_page((unsigned long)rxs);
1406 goto fail;
1407 }
1408 info->rx_ring_ref = err;
1409
1410 err = xenbus_alloc_evtchn(dev, &info->evtchn);
1411 if (err)
1412 goto fail;
1413
1414 err = bind_evtchn_to_irqhandler(info->evtchn, xennet_interrupt,
1415 IRQF_SAMPLE_RANDOM, netdev->name,
1416 netdev);
1417 if (err < 0)
1418 goto fail;
1419 netdev->irq = err;
1420 return 0;
1421
1422 fail:
1423 return err;
1424}
1425
1426/* Common code used when first setting up, and when resuming. */
1427static int talk_to_backend(struct xenbus_device *dev,
1428 struct netfront_info *info)
1429{
1430 const char *message;
1431 struct xenbus_transaction xbt;
1432 int err;
1433
1434 /* Create shared ring, alloc event channel. */
1435 err = setup_netfront(dev, info);
1436 if (err)
1437 goto out;
1438
1439again:
1440 err = xenbus_transaction_start(&xbt);
1441 if (err) {
1442 xenbus_dev_fatal(dev, err, "starting transaction");
1443 goto destroy_ring;
1444 }
1445
1446 err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref", "%u",
1447 info->tx_ring_ref);
1448 if (err) {
1449 message = "writing tx ring-ref";
1450 goto abort_transaction;
1451 }
1452 err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref", "%u",
1453 info->rx_ring_ref);
1454 if (err) {
1455 message = "writing rx ring-ref";
1456 goto abort_transaction;
1457 }
1458 err = xenbus_printf(xbt, dev->nodename,
1459 "event-channel", "%u", info->evtchn);
1460 if (err) {
1461 message = "writing event-channel";
1462 goto abort_transaction;
1463 }
1464
1465 err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
1466 1);
1467 if (err) {
1468 message = "writing request-rx-copy";
1469 goto abort_transaction;
1470 }
1471
1472 err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
1473 if (err) {
1474 message = "writing feature-rx-notify";
1475 goto abort_transaction;
1476 }
1477
1478 err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
1479 if (err) {
1480 message = "writing feature-sg";
1481 goto abort_transaction;
1482 }
1483
1484 err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
1485 if (err) {
1486 message = "writing feature-gso-tcpv4";
1487 goto abort_transaction;
1488 }
1489
1490 err = xenbus_transaction_end(xbt, 0);
1491 if (err) {
1492 if (err == -EAGAIN)
1493 goto again;
1494 xenbus_dev_fatal(dev, err, "completing transaction");
1495 goto destroy_ring;
1496 }
1497
1498 return 0;
1499
1500 abort_transaction:
1501 xenbus_transaction_end(xbt, 1);
1502 xenbus_dev_fatal(dev, err, "%s", message);
1503 destroy_ring:
1504 xennet_disconnect_backend(info);
1505 out:
1506 return err;
1507}
1508
1509static int xennet_set_sg(struct net_device *dev, u32 data)
1510{
1511 if (data) {
1512 struct netfront_info *np = netdev_priv(dev);
1513 int val;
1514
1515 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
1516 "%d", &val) < 0)
1517 val = 0;
1518 if (!val)
1519 return -ENOSYS;
1520 } else if (dev->mtu > ETH_DATA_LEN)
1521 dev->mtu = ETH_DATA_LEN;
1522
1523 return ethtool_op_set_sg(dev, data);
1524}
1525
1526static int xennet_set_tso(struct net_device *dev, u32 data)
1527{
1528 if (data) {
1529 struct netfront_info *np = netdev_priv(dev);
1530 int val;
1531
1532 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1533 "feature-gso-tcpv4", "%d", &val) < 0)
1534 val = 0;
1535 if (!val)
1536 return -ENOSYS;
1537 }
1538
1539 return ethtool_op_set_tso(dev, data);
1540}
1541
1542static void xennet_set_features(struct net_device *dev)
1543{
1544 /* Turn off all GSO bits except ROBUST. */
1545 dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
1546 dev->features |= NETIF_F_GSO_ROBUST;
1547 xennet_set_sg(dev, 0);
1548
1549 /* We need checksum offload to enable scatter/gather and TSO. */
1550 if (!(dev->features & NETIF_F_IP_CSUM))
1551 return;
1552
1553 if (!xennet_set_sg(dev, 1))
1554 xennet_set_tso(dev, 1);
1555}
1556
1557static int xennet_connect(struct net_device *dev)
1558{
1559 struct netfront_info *np = netdev_priv(dev);
1560 int i, requeue_idx, err;
1561 struct sk_buff *skb;
1562 grant_ref_t ref;
1563 struct xen_netif_rx_request *req;
1564 unsigned int feature_rx_copy;
1565
1566 err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1567 "feature-rx-copy", "%u", &feature_rx_copy);
1568 if (err != 1)
1569 feature_rx_copy = 0;
1570
1571 if (!feature_rx_copy) {
1572 dev_info(&dev->dev,
1573 "backend does not support copying recieve path");
1574 return -ENODEV;
1575 }
1576
1577 err = talk_to_backend(np->xbdev, np);
1578 if (err)
1579 return err;
1580
1581 xennet_set_features(dev);
1582
1583 spin_lock_bh(&np->rx_lock);
1584 spin_lock_irq(&np->tx_lock);
1585
1586 /* Step 1: Discard all pending TX packet fragments. */
1587 xennet_release_tx_bufs(np);
1588
1589 /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
1590 for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
1591 if (!np->rx_skbs[i])
1592 continue;
1593
1594 skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
1595 ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
1596 req = RING_GET_REQUEST(&np->rx, requeue_idx);
1597
1598 gnttab_grant_foreign_access_ref(
1599 ref, np->xbdev->otherend_id,
1600 pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
1601 frags->page)),
1602 0);
1603 req->gref = ref;
1604 req->id = requeue_idx;
1605
1606 requeue_idx++;
1607 }
1608
1609 np->rx.req_prod_pvt = requeue_idx;
1610
1611 /*
1612 * Step 3: All public and private state should now be sane. Get
1613 * ready to start sending and receiving packets and give the driver
1614 * domain a kick because we've probably just requeued some
1615 * packets.
1616 */
1617 netif_carrier_on(np->netdev);
1618 notify_remote_via_irq(np->netdev->irq);
1619 xennet_tx_buf_gc(dev);
1620 xennet_alloc_rx_buffers(dev);
1621
1622 spin_unlock_irq(&np->tx_lock);
1623 spin_unlock_bh(&np->rx_lock);
1624
1625 return 0;
1626}
1627
1628/**
1629 * Callback received when the backend's state changes.
1630 */
1631static void backend_changed(struct xenbus_device *dev,
1632 enum xenbus_state backend_state)
1633{
1634 struct netfront_info *np = dev->dev.driver_data;
1635 struct net_device *netdev = np->netdev;
1636
1637 dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
1638
1639 switch (backend_state) {
1640 case XenbusStateInitialising:
1641 case XenbusStateInitialised:
1642 case XenbusStateConnected:
1643 case XenbusStateUnknown:
1644 case XenbusStateClosed:
1645 break;
1646
1647 case XenbusStateInitWait:
1648 if (dev->state != XenbusStateInitialising)
1649 break;
1650 if (xennet_connect(netdev) != 0)
1651 break;
1652 xenbus_switch_state(dev, XenbusStateConnected);
1653 break;
1654
1655 case XenbusStateClosing:
1656 xenbus_frontend_closed(dev);
1657 break;
1658 }
1659}
1660
1661static struct ethtool_ops xennet_ethtool_ops =
1662{
1663 .get_tx_csum = ethtool_op_get_tx_csum,
1664 .set_tx_csum = ethtool_op_set_tx_csum,
1665 .get_sg = ethtool_op_get_sg,
1666 .set_sg = xennet_set_sg,
1667 .get_tso = ethtool_op_get_tso,
1668 .set_tso = xennet_set_tso,
1669 .get_link = ethtool_op_get_link,
1670};
1671
1672#ifdef CONFIG_SYSFS
1673static ssize_t show_rxbuf_min(struct device *dev,
1674 struct device_attribute *attr, char *buf)
1675{
1676 struct net_device *netdev = to_net_dev(dev);
1677 struct netfront_info *info = netdev_priv(netdev);
1678
1679 return sprintf(buf, "%u\n", info->rx_min_target);
1680}
1681
1682static ssize_t store_rxbuf_min(struct device *dev,
1683 struct device_attribute *attr,
1684 const char *buf, size_t len)
1685{
1686 struct net_device *netdev = to_net_dev(dev);
1687 struct netfront_info *np = netdev_priv(netdev);
1688 char *endp;
1689 unsigned long target;
1690
1691 if (!capable(CAP_NET_ADMIN))
1692 return -EPERM;
1693
1694 target = simple_strtoul(buf, &endp, 0);
1695 if (endp == buf)
1696 return -EBADMSG;
1697
1698 if (target < RX_MIN_TARGET)
1699 target = RX_MIN_TARGET;
1700 if (target > RX_MAX_TARGET)
1701 target = RX_MAX_TARGET;
1702
1703 spin_lock_bh(&np->rx_lock);
1704 if (target > np->rx_max_target)
1705 np->rx_max_target = target;
1706 np->rx_min_target = target;
1707 if (target > np->rx_target)
1708 np->rx_target = target;
1709
1710 xennet_alloc_rx_buffers(netdev);
1711
1712 spin_unlock_bh(&np->rx_lock);
1713 return len;
1714}
1715
1716static ssize_t show_rxbuf_max(struct device *dev,
1717 struct device_attribute *attr, char *buf)
1718{
1719 struct net_device *netdev = to_net_dev(dev);
1720 struct netfront_info *info = netdev_priv(netdev);
1721
1722 return sprintf(buf, "%u\n", info->rx_max_target);
1723}
1724
1725static ssize_t store_rxbuf_max(struct device *dev,
1726 struct device_attribute *attr,
1727 const char *buf, size_t len)
1728{
1729 struct net_device *netdev = to_net_dev(dev);
1730 struct netfront_info *np = netdev_priv(netdev);
1731 char *endp;
1732 unsigned long target;
1733
1734 if (!capable(CAP_NET_ADMIN))
1735 return -EPERM;
1736
1737 target = simple_strtoul(buf, &endp, 0);
1738 if (endp == buf)
1739 return -EBADMSG;
1740
1741 if (target < RX_MIN_TARGET)
1742 target = RX_MIN_TARGET;
1743 if (target > RX_MAX_TARGET)
1744 target = RX_MAX_TARGET;
1745
1746 spin_lock_bh(&np->rx_lock);
1747 if (target < np->rx_min_target)
1748 np->rx_min_target = target;
1749 np->rx_max_target = target;
1750 if (target < np->rx_target)
1751 np->rx_target = target;
1752
1753 xennet_alloc_rx_buffers(netdev);
1754
1755 spin_unlock_bh(&np->rx_lock);
1756 return len;
1757}
1758
1759static ssize_t show_rxbuf_cur(struct device *dev,
1760 struct device_attribute *attr, char *buf)
1761{
1762 struct net_device *netdev = to_net_dev(dev);
1763 struct netfront_info *info = netdev_priv(netdev);
1764
1765 return sprintf(buf, "%u\n", info->rx_target);
1766}
1767
1768static struct device_attribute xennet_attrs[] = {
1769 __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
1770 __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
1771 __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
1772};
1773
1774static int xennet_sysfs_addif(struct net_device *netdev)
1775{
1776 int i;
1777 int err;
1778
1779 for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
1780 err = device_create_file(&netdev->dev,
1781 &xennet_attrs[i]);
1782 if (err)
1783 goto fail;
1784 }
1785 return 0;
1786
1787 fail:
1788 while (--i >= 0)
1789 device_remove_file(&netdev->dev, &xennet_attrs[i]);
1790 return err;
1791}
1792
1793static void xennet_sysfs_delif(struct net_device *netdev)
1794{
1795 int i;
1796
1797 for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++)
1798 device_remove_file(&netdev->dev, &xennet_attrs[i]);
1799}
1800
1801#endif /* CONFIG_SYSFS */
1802
1803static struct xenbus_device_id netfront_ids[] = {
1804 { "vif" },
1805 { "" }
1806};
1807
1808
1809static int __devexit xennet_remove(struct xenbus_device *dev)
1810{
1811 struct netfront_info *info = dev->dev.driver_data;
1812
1813 dev_dbg(&dev->dev, "%s\n", dev->nodename);
1814
1815 unregister_netdev(info->netdev);
1816
1817 xennet_disconnect_backend(info);
1818
1819 del_timer_sync(&info->rx_refill_timer);
1820
1821 xennet_sysfs_delif(info->netdev);
1822
1823 free_netdev(info->netdev);
1824
1825 return 0;
1826}
1827
1828static struct xenbus_driver netfront = {
1829 .name = "vif",
1830 .owner = THIS_MODULE,
1831 .ids = netfront_ids,
1832 .probe = netfront_probe,
1833 .remove = __devexit_p(xennet_remove),
1834 .resume = netfront_resume,
1835 .otherend_changed = backend_changed,
1836};
1837
1838static int __init netif_init(void)
1839{
1840 if (!is_running_on_xen())
1841 return -ENODEV;
1842
1843 if (is_initial_xendomain())
1844 return 0;
1845
1846 printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n");
1847
1848 return xenbus_register_frontend(&netfront);
1849}
1850module_init(netif_init);
1851
1852
1853static void __exit netif_exit(void)
1854{
1855 if (is_initial_xendomain())
1856 return;
1857
1858 return xenbus_unregister_driver(&netfront);
1859}
1860module_exit(netif_exit);
1861
1862MODULE_DESCRIPTION("Xen virtual network device frontend");
1863MODULE_LICENSE("GPL");
diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c
index 03baf1c64a2e..ed112ee16012 100644
--- a/drivers/pnp/pnpbios/core.c
+++ b/drivers/pnp/pnpbios/core.c
@@ -147,7 +147,7 @@ static int pnp_dock_event(int dock, struct pnp_docking_station_info *info)
147 info->location_id, info->serial, info->capabilities); 147 info->location_id, info->serial, info->capabilities);
148 envp[i] = NULL; 148 envp[i] = NULL;
149 149
150 value = call_usermodehelper (argv [0], argv, envp, 0); 150 value = call_usermodehelper (argv [0], argv, envp, UMH_WAIT_EXEC);
151 kfree (buf); 151 kfree (buf);
152 kfree (envp); 152 kfree (envp);
153 return 0; 153 return 0;
diff --git a/drivers/sbus/char/bbc_envctrl.c b/drivers/sbus/char/bbc_envctrl.c
index a54e4140683a..e821a155b658 100644
--- a/drivers/sbus/char/bbc_envctrl.c
+++ b/drivers/sbus/char/bbc_envctrl.c
@@ -7,6 +7,7 @@
7#include <linux/kthread.h> 7#include <linux/kthread.h>
8#include <linux/delay.h> 8#include <linux/delay.h>
9#include <linux/kmod.h> 9#include <linux/kmod.h>
10#include <linux/reboot.h>
10#include <asm/oplib.h> 11#include <asm/oplib.h>
11#include <asm/ebus.h> 12#include <asm/ebus.h>
12 13
@@ -170,8 +171,6 @@ static void get_current_temps(struct bbc_cpu_temperature *tp)
170static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp) 171static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
171{ 172{
172 static int shutting_down = 0; 173 static int shutting_down = 0;
173 static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
174 char *argv[] = { "/sbin/shutdown", "-h", "now", NULL };
175 char *type = "???"; 174 char *type = "???";
176 s8 val = -1; 175 s8 val = -1;
177 176
@@ -195,7 +194,7 @@ static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
195 printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n"); 194 printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
196 195
197 shutting_down = 1; 196 shutting_down = 1;
198 if (call_usermodehelper("/sbin/shutdown", argv, envp, 0) < 0) 197 if (orderly_poweroff(true) < 0)
199 printk(KERN_CRIT "envctrl: shutdown execution failed\n"); 198 printk(KERN_CRIT "envctrl: shutdown execution failed\n");
200} 199}
201 200
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c
index 8328acab47fd..dadabef116b6 100644
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -26,6 +26,7 @@
26#include <linux/ioport.h> 26#include <linux/ioport.h>
27#include <linux/miscdevice.h> 27#include <linux/miscdevice.h>
28#include <linux/kmod.h> 28#include <linux/kmod.h>
29#include <linux/reboot.h>
29 30
30#include <asm/ebus.h> 31#include <asm/ebus.h>
31#include <asm/uaccess.h> 32#include <asm/uaccess.h>
@@ -966,10 +967,6 @@ static struct i2c_child_t *envctrl_get_i2c_child(unsigned char mon_type)
966static void envctrl_do_shutdown(void) 967static void envctrl_do_shutdown(void)
967{ 968{
968 static int inprog = 0; 969 static int inprog = 0;
969 static char *envp[] = {
970 "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
971 char *argv[] = {
972 "/sbin/shutdown", "-h", "now", NULL };
973 int ret; 970 int ret;
974 971
975 if (inprog != 0) 972 if (inprog != 0)
@@ -977,7 +974,7 @@ static void envctrl_do_shutdown(void)
977 974
978 inprog = 1; 975 inprog = 1;
979 printk(KERN_CRIT "kenvctrld: WARNING: Shutting down the system now.\n"); 976 printk(KERN_CRIT "kenvctrld: WARNING: Shutting down the system now.\n");
980 ret = call_usermodehelper("/sbin/shutdown", argv, envp, 0); 977 ret = orderly_poweroff(true);
981 if (ret < 0) { 978 if (ret < 0) {
982 printk(KERN_CRIT "kenvctrld: WARNING: system shutdown failed!\n"); 979 printk(KERN_CRIT "kenvctrld: WARNING: system shutdown failed!\n");
983 inprog = 0; /* unlikely to succeed, but we could try again */ 980 inprog = 0; /* unlikely to succeed, but we could try again */
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
new file mode 100644
index 000000000000..56592f0d6cef
--- /dev/null
+++ b/drivers/xen/Makefile
@@ -0,0 +1,2 @@
1obj-y += grant-table.o
2obj-y += xenbus/
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
new file mode 100644
index 000000000000..ea94dbabf9a9
--- /dev/null
+++ b/drivers/xen/grant-table.c
@@ -0,0 +1,582 @@
1/******************************************************************************
2 * grant_table.c
3 *
4 * Granting foreign access to our memory reservation.
5 *
6 * Copyright (c) 2005-2006, Christopher Clark
7 * Copyright (c) 2004-2005, K A Fraser
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation; or, when distributed
12 * separately from the Linux kernel or incorporated into other
13 * software packages, subject to the following license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
33
34#include <linux/module.h>
35#include <linux/sched.h>
36#include <linux/mm.h>
37#include <linux/vmalloc.h>
38#include <linux/uaccess.h>
39
40#include <xen/interface/xen.h>
41#include <xen/page.h>
42#include <xen/grant_table.h>
43
44#include <asm/pgtable.h>
45#include <asm/sync_bitops.h>
46
47
48/* External tools reserve first few grant table entries. */
49#define NR_RESERVED_ENTRIES 8
50#define GNTTAB_LIST_END 0xffffffff
51#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry))
52
53static grant_ref_t **gnttab_list;
54static unsigned int nr_grant_frames;
55static unsigned int boot_max_nr_grant_frames;
56static int gnttab_free_count;
57static grant_ref_t gnttab_free_head;
58static DEFINE_SPINLOCK(gnttab_list_lock);
59
60static struct grant_entry *shared;
61
62static struct gnttab_free_callback *gnttab_free_callback_list;
63
64static int gnttab_expand(unsigned int req_entries);
65
66#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
67
68static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
69{
70 return &gnttab_list[(entry) / RPP][(entry) % RPP];
71}
72/* This can be used as an l-value */
73#define gnttab_entry(entry) (*__gnttab_entry(entry))
74
75static int get_free_entries(unsigned count)
76{
77 unsigned long flags;
78 int ref, rc;
79 grant_ref_t head;
80
81 spin_lock_irqsave(&gnttab_list_lock, flags);
82
83 if ((gnttab_free_count < count) &&
84 ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
85 spin_unlock_irqrestore(&gnttab_list_lock, flags);
86 return rc;
87 }
88
89 ref = head = gnttab_free_head;
90 gnttab_free_count -= count;
91 while (count-- > 1)
92 head = gnttab_entry(head);
93 gnttab_free_head = gnttab_entry(head);
94 gnttab_entry(head) = GNTTAB_LIST_END;
95
96 spin_unlock_irqrestore(&gnttab_list_lock, flags);
97
98 return ref;
99}
100
101static void do_free_callbacks(void)
102{
103 struct gnttab_free_callback *callback, *next;
104
105 callback = gnttab_free_callback_list;
106 gnttab_free_callback_list = NULL;
107
108 while (callback != NULL) {
109 next = callback->next;
110 if (gnttab_free_count >= callback->count) {
111 callback->next = NULL;
112 callback->fn(callback->arg);
113 } else {
114 callback->next = gnttab_free_callback_list;
115 gnttab_free_callback_list = callback;
116 }
117 callback = next;
118 }
119}
120
121static inline void check_free_callbacks(void)
122{
123 if (unlikely(gnttab_free_callback_list))
124 do_free_callbacks();
125}
126
127static void put_free_entry(grant_ref_t ref)
128{
129 unsigned long flags;
130 spin_lock_irqsave(&gnttab_list_lock, flags);
131 gnttab_entry(ref) = gnttab_free_head;
132 gnttab_free_head = ref;
133 gnttab_free_count++;
134 check_free_callbacks();
135 spin_unlock_irqrestore(&gnttab_list_lock, flags);
136}
137
138static void update_grant_entry(grant_ref_t ref, domid_t domid,
139 unsigned long frame, unsigned flags)
140{
141 /*
142 * Introducing a valid entry into the grant table:
143 * 1. Write ent->domid.
144 * 2. Write ent->frame:
145 * GTF_permit_access: Frame to which access is permitted.
146 * GTF_accept_transfer: Pseudo-phys frame slot being filled by new
147 * frame, or zero if none.
148 * 3. Write memory barrier (WMB).
149 * 4. Write ent->flags, inc. valid type.
150 */
151 shared[ref].frame = frame;
152 shared[ref].domid = domid;
153 wmb();
154 shared[ref].flags = flags;
155}
156
157/*
158 * Public grant-issuing interface functions
159 */
160void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
161 unsigned long frame, int readonly)
162{
163 update_grant_entry(ref, domid, frame,
164 GTF_permit_access | (readonly ? GTF_readonly : 0));
165}
166EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
167
168int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
169 int readonly)
170{
171 int ref;
172
173 ref = get_free_entries(1);
174 if (unlikely(ref < 0))
175 return -ENOSPC;
176
177 gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
178
179 return ref;
180}
181EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
182
183int gnttab_query_foreign_access(grant_ref_t ref)
184{
185 u16 nflags;
186
187 nflags = shared[ref].flags;
188
189 return (nflags & (GTF_reading|GTF_writing));
190}
191EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
192
193int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
194{
195 u16 flags, nflags;
196
197 nflags = shared[ref].flags;
198 do {
199 flags = nflags;
200 if (flags & (GTF_reading|GTF_writing)) {
201 printk(KERN_ALERT "WARNING: g.e. still in use!\n");
202 return 0;
203 }
204 } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags);
205
206 return 1;
207}
208EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
209
210void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
211 unsigned long page)
212{
213 if (gnttab_end_foreign_access_ref(ref, readonly)) {
214 put_free_entry(ref);
215 if (page != 0)
216 free_page(page);
217 } else {
218 /* XXX This needs to be fixed so that the ref and page are
219 placed on a list to be freed up later. */
220 printk(KERN_WARNING
221 "WARNING: leaking g.e. and page still in use!\n");
222 }
223}
224EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
225
226int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
227{
228 int ref;
229
230 ref = get_free_entries(1);
231 if (unlikely(ref < 0))
232 return -ENOSPC;
233 gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
234
235 return ref;
236}
237EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
238
239void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
240 unsigned long pfn)
241{
242 update_grant_entry(ref, domid, pfn, GTF_accept_transfer);
243}
244EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
245
246unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
247{
248 unsigned long frame;
249 u16 flags;
250
251 /*
252 * If a transfer is not even yet started, try to reclaim the grant
253 * reference and return failure (== 0).
254 */
255 while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
256 if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags)
257 return 0;
258 cpu_relax();
259 }
260
261 /* If a transfer is in progress then wait until it is completed. */
262 while (!(flags & GTF_transfer_completed)) {
263 flags = shared[ref].flags;
264 cpu_relax();
265 }
266
267 rmb(); /* Read the frame number /after/ reading completion status. */
268 frame = shared[ref].frame;
269 BUG_ON(frame == 0);
270
271 return frame;
272}
273EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
274
275unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
276{
277 unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
278 put_free_entry(ref);
279 return frame;
280}
281EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
282
283void gnttab_free_grant_reference(grant_ref_t ref)
284{
285 put_free_entry(ref);
286}
287EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
288
289void gnttab_free_grant_references(grant_ref_t head)
290{
291 grant_ref_t ref;
292 unsigned long flags;
293 int count = 1;
294 if (head == GNTTAB_LIST_END)
295 return;
296 spin_lock_irqsave(&gnttab_list_lock, flags);
297 ref = head;
298 while (gnttab_entry(ref) != GNTTAB_LIST_END) {
299 ref = gnttab_entry(ref);
300 count++;
301 }
302 gnttab_entry(ref) = gnttab_free_head;
303 gnttab_free_head = head;
304 gnttab_free_count += count;
305 check_free_callbacks();
306 spin_unlock_irqrestore(&gnttab_list_lock, flags);
307}
308EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
309
310int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
311{
312 int h = get_free_entries(count);
313
314 if (h < 0)
315 return -ENOSPC;
316
317 *head = h;
318
319 return 0;
320}
321EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
322
323int gnttab_empty_grant_references(const grant_ref_t *private_head)
324{
325 return (*private_head == GNTTAB_LIST_END);
326}
327EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
328
329int gnttab_claim_grant_reference(grant_ref_t *private_head)
330{
331 grant_ref_t g = *private_head;
332 if (unlikely(g == GNTTAB_LIST_END))
333 return -ENOSPC;
334 *private_head = gnttab_entry(g);
335 return g;
336}
337EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
338
339void gnttab_release_grant_reference(grant_ref_t *private_head,
340 grant_ref_t release)
341{
342 gnttab_entry(release) = *private_head;
343 *private_head = release;
344}
345EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
346
347void gnttab_request_free_callback(struct gnttab_free_callback *callback,
348 void (*fn)(void *), void *arg, u16 count)
349{
350 unsigned long flags;
351 spin_lock_irqsave(&gnttab_list_lock, flags);
352 if (callback->next)
353 goto out;
354 callback->fn = fn;
355 callback->arg = arg;
356 callback->count = count;
357 callback->next = gnttab_free_callback_list;
358 gnttab_free_callback_list = callback;
359 check_free_callbacks();
360out:
361 spin_unlock_irqrestore(&gnttab_list_lock, flags);
362}
363EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
364
365void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
366{
367 struct gnttab_free_callback **pcb;
368 unsigned long flags;
369
370 spin_lock_irqsave(&gnttab_list_lock, flags);
371 for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
372 if (*pcb == callback) {
373 *pcb = callback->next;
374 break;
375 }
376 }
377 spin_unlock_irqrestore(&gnttab_list_lock, flags);
378}
379EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
380
381static int grow_gnttab_list(unsigned int more_frames)
382{
383 unsigned int new_nr_grant_frames, extra_entries, i;
384
385 new_nr_grant_frames = nr_grant_frames + more_frames;
386 extra_entries = more_frames * GREFS_PER_GRANT_FRAME;
387
388 for (i = nr_grant_frames; i < new_nr_grant_frames; i++) {
389 gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
390 if (!gnttab_list[i])
391 goto grow_nomem;
392 }
393
394
395 for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
396 i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
397 gnttab_entry(i) = i + 1;
398
399 gnttab_entry(i) = gnttab_free_head;
400 gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
401 gnttab_free_count += extra_entries;
402
403 nr_grant_frames = new_nr_grant_frames;
404
405 check_free_callbacks();
406
407 return 0;
408
409grow_nomem:
410 for ( ; i >= nr_grant_frames; i--)
411 free_page((unsigned long) gnttab_list[i]);
412 return -ENOMEM;
413}
414
415static unsigned int __max_nr_grant_frames(void)
416{
417 struct gnttab_query_size query;
418 int rc;
419
420 query.dom = DOMID_SELF;
421
422 rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
423 if ((rc < 0) || (query.status != GNTST_okay))
424 return 4; /* Legacy max supported number of frames */
425
426 return query.max_nr_frames;
427}
428
429static inline unsigned int max_nr_grant_frames(void)
430{
431 unsigned int xen_max = __max_nr_grant_frames();
432
433 if (xen_max > boot_max_nr_grant_frames)
434 return boot_max_nr_grant_frames;
435 return xen_max;
436}
437
438static int map_pte_fn(pte_t *pte, struct page *pmd_page,
439 unsigned long addr, void *data)
440{
441 unsigned long **frames = (unsigned long **)data;
442
443 set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
444 (*frames)++;
445 return 0;
446}
447
448static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
449 unsigned long addr, void *data)
450{
451
452 set_pte_at(&init_mm, addr, pte, __pte(0));
453 return 0;
454}
455
456static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
457{
458 struct gnttab_setup_table setup;
459 unsigned long *frames;
460 unsigned int nr_gframes = end_idx + 1;
461 int rc;
462
463 frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
464 if (!frames)
465 return -ENOMEM;
466
467 setup.dom = DOMID_SELF;
468 setup.nr_frames = nr_gframes;
469 setup.frame_list = frames;
470
471 rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
472 if (rc == -ENOSYS) {
473 kfree(frames);
474 return -ENOSYS;
475 }
476
477 BUG_ON(rc || setup.status);
478
479 if (shared == NULL) {
480 struct vm_struct *area;
481 area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
482 BUG_ON(area == NULL);
483 shared = area->addr;
484 }
485 rc = apply_to_page_range(&init_mm, (unsigned long)shared,
486 PAGE_SIZE * nr_gframes,
487 map_pte_fn, &frames);
488 BUG_ON(rc);
489 frames -= nr_gframes; /* adjust after map_pte_fn() */
490
491 kfree(frames);
492
493 return 0;
494}
495
496static int gnttab_resume(void)
497{
498 if (max_nr_grant_frames() < nr_grant_frames)
499 return -ENOSYS;
500 return gnttab_map(0, nr_grant_frames - 1);
501}
502
503static int gnttab_suspend(void)
504{
505 apply_to_page_range(&init_mm, (unsigned long)shared,
506 PAGE_SIZE * nr_grant_frames,
507 unmap_pte_fn, NULL);
508
509 return 0;
510}
511
512static int gnttab_expand(unsigned int req_entries)
513{
514 int rc;
515 unsigned int cur, extra;
516
517 cur = nr_grant_frames;
518 extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
519 GREFS_PER_GRANT_FRAME);
520 if (cur + extra > max_nr_grant_frames())
521 return -ENOSPC;
522
523 rc = gnttab_map(cur, cur + extra - 1);
524 if (rc == 0)
525 rc = grow_gnttab_list(extra);
526
527 return rc;
528}
529
530static int __devinit gnttab_init(void)
531{
532 int i;
533 unsigned int max_nr_glist_frames;
534 unsigned int nr_init_grefs;
535
536 if (!is_running_on_xen())
537 return -ENODEV;
538
539 nr_grant_frames = 1;
540 boot_max_nr_grant_frames = __max_nr_grant_frames();
541
542 /* Determine the maximum number of frames required for the
543 * grant reference free list on the current hypervisor.
544 */
545 max_nr_glist_frames = (boot_max_nr_grant_frames *
546 GREFS_PER_GRANT_FRAME /
547 (PAGE_SIZE / sizeof(grant_ref_t)));
548
549 gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
550 GFP_KERNEL);
551 if (gnttab_list == NULL)
552 return -ENOMEM;
553
554 for (i = 0; i < nr_grant_frames; i++) {
555 gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
556 if (gnttab_list[i] == NULL)
557 goto ini_nomem;
558 }
559
560 if (gnttab_resume() < 0)
561 return -ENODEV;
562
563 nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
564
565 for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
566 gnttab_entry(i) = i + 1;
567
568 gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
569 gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
570 gnttab_free_head = NR_RESERVED_ENTRIES;
571
572 printk("Grant table initialized\n");
573 return 0;
574
575 ini_nomem:
576 for (i--; i >= 0; i--)
577 free_page((unsigned long)gnttab_list[i]);
578 kfree(gnttab_list);
579 return -ENOMEM;
580}
581
582core_initcall(gnttab_init);
diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile
new file mode 100644
index 000000000000..5571f5b84223
--- /dev/null
+++ b/drivers/xen/xenbus/Makefile
@@ -0,0 +1,7 @@
1obj-y += xenbus.o
2
3xenbus-objs =
4xenbus-objs += xenbus_client.o
5xenbus-objs += xenbus_comms.o
6xenbus-objs += xenbus_xs.o
7xenbus-objs += xenbus_probe.o
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
new file mode 100644
index 000000000000..9fd2f70ab46d
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -0,0 +1,569 @@
1/******************************************************************************
2 * Client-facing interface for the Xenbus driver. In other words, the
3 * interface between the Xenbus and the device-specific code, be it the
4 * frontend or the backend of that driver.
5 *
6 * Copyright (C) 2005 XenSource Ltd
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
32
33#include <linux/types.h>
34#include <linux/vmalloc.h>
35#include <asm/xen/hypervisor.h>
36#include <xen/interface/xen.h>
37#include <xen/interface/event_channel.h>
38#include <xen/events.h>
39#include <xen/grant_table.h>
40#include <xen/xenbus.h>
41
42const char *xenbus_strstate(enum xenbus_state state)
43{
44 static const char *const name[] = {
45 [ XenbusStateUnknown ] = "Unknown",
46 [ XenbusStateInitialising ] = "Initialising",
47 [ XenbusStateInitWait ] = "InitWait",
48 [ XenbusStateInitialised ] = "Initialised",
49 [ XenbusStateConnected ] = "Connected",
50 [ XenbusStateClosing ] = "Closing",
51 [ XenbusStateClosed ] = "Closed",
52 };
53 return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
54}
55EXPORT_SYMBOL_GPL(xenbus_strstate);
56
57/**
58 * xenbus_watch_path - register a watch
59 * @dev: xenbus device
60 * @path: path to watch
61 * @watch: watch to register
62 * @callback: callback to register
63 *
64 * Register a @watch on the given path, using the given xenbus_watch structure
65 * for storage, and the given @callback function as the callback. Return 0 on
66 * success, or -errno on error. On success, the given @path will be saved as
67 * @watch->node, and remains the caller's to free. On error, @watch->node will
68 * be NULL, the device will switch to %XenbusStateClosing, and the error will
69 * be saved in the store.
70 */
71int xenbus_watch_path(struct xenbus_device *dev, const char *path,
72 struct xenbus_watch *watch,
73 void (*callback)(struct xenbus_watch *,
74 const char **, unsigned int))
75{
76 int err;
77
78 watch->node = path;
79 watch->callback = callback;
80
81 err = register_xenbus_watch(watch);
82
83 if (err) {
84 watch->node = NULL;
85 watch->callback = NULL;
86 xenbus_dev_fatal(dev, err, "adding watch on %s", path);
87 }
88
89 return err;
90}
91EXPORT_SYMBOL_GPL(xenbus_watch_path);
92
93
94/**
95 * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
96 * @dev: xenbus device
97 * @watch: watch to register
98 * @callback: callback to register
99 * @pathfmt: format of path to watch
100 *
101 * Register a watch on the given @path, using the given xenbus_watch
102 * structure for storage, and the given @callback function as the callback.
103 * Return 0 on success, or -errno on error. On success, the watched path
104 * (@path/@path2) will be saved as @watch->node, and becomes the caller's to
105 * kfree(). On error, watch->node will be NULL, so the caller has nothing to
106 * free, the device will switch to %XenbusStateClosing, and the error will be
107 * saved in the store.
108 */
109int xenbus_watch_pathfmt(struct xenbus_device *dev,
110 struct xenbus_watch *watch,
111 void (*callback)(struct xenbus_watch *,
112 const char **, unsigned int),
113 const char *pathfmt, ...)
114{
115 int err;
116 va_list ap;
117 char *path;
118
119 va_start(ap, pathfmt);
120 path = kvasprintf(GFP_KERNEL, pathfmt, ap);
121 va_end(ap);
122
123 if (!path) {
124 xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
125 return -ENOMEM;
126 }
127 err = xenbus_watch_path(dev, path, watch, callback);
128
129 if (err)
130 kfree(path);
131 return err;
132}
133EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
134
135
136/**
137 * xenbus_switch_state
138 * @dev: xenbus device
139 * @xbt: transaction handle
140 * @state: new state
141 *
142 * Advertise in the store a change of the given driver to the given new_state.
143 * Return 0 on success, or -errno on error. On error, the device will switch
144 * to XenbusStateClosing, and the error will be saved in the store.
145 */
146int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
147{
148 /* We check whether the state is currently set to the given value, and
149 if not, then the state is set. We don't want to unconditionally
150 write the given state, because we don't want to fire watches
151 unnecessarily. Furthermore, if the node has gone, we don't write
152 to it, as the device will be tearing down, and we don't want to
153 resurrect that directory.
154
155 Note that, because of this cached value of our state, this function
156 will not work inside a Xenstore transaction (something it was
157 trying to in the past) because dev->state would not get reset if
158 the transaction was aborted.
159
160 */
161
162 int current_state;
163 int err;
164
165 if (state == dev->state)
166 return 0;
167
168 err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d",
169 &current_state);
170 if (err != 1)
171 return 0;
172
173 err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state);
174 if (err) {
175 if (state != XenbusStateClosing) /* Avoid looping */
176 xenbus_dev_fatal(dev, err, "writing new state");
177 return err;
178 }
179
180 dev->state = state;
181
182 return 0;
183}
184EXPORT_SYMBOL_GPL(xenbus_switch_state);
185
186int xenbus_frontend_closed(struct xenbus_device *dev)
187{
188 xenbus_switch_state(dev, XenbusStateClosed);
189 complete(&dev->down);
190 return 0;
191}
192EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
193
194/**
195 * Return the path to the error node for the given device, or NULL on failure.
196 * If the value returned is non-NULL, then it is the caller's to kfree.
197 */
198static char *error_path(struct xenbus_device *dev)
199{
200 return kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
201}
202
203
204static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
205 const char *fmt, va_list ap)
206{
207 int ret;
208 unsigned int len;
209 char *printf_buffer = NULL;
210 char *path_buffer = NULL;
211
212#define PRINTF_BUFFER_SIZE 4096
213 printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
214 if (printf_buffer == NULL)
215 goto fail;
216
217 len = sprintf(printf_buffer, "%i ", -err);
218 ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
219
220 BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1);
221
222 dev_err(&dev->dev, "%s\n", printf_buffer);
223
224 path_buffer = error_path(dev);
225
226 if (path_buffer == NULL) {
227 dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
228 dev->nodename, printf_buffer);
229 goto fail;
230 }
231
232 if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
233 dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
234 dev->nodename, printf_buffer);
235 goto fail;
236 }
237
238fail:
239 kfree(printf_buffer);
240 kfree(path_buffer);
241}
242
243
244/**
245 * xenbus_dev_error
246 * @dev: xenbus device
247 * @err: error to report
248 * @fmt: error message format
249 *
250 * Report the given negative errno into the store, along with the given
251 * formatted message.
252 */
253void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...)
254{
255 va_list ap;
256
257 va_start(ap, fmt);
258 xenbus_va_dev_error(dev, err, fmt, ap);
259 va_end(ap);
260}
261EXPORT_SYMBOL_GPL(xenbus_dev_error);
262
263/**
264 * xenbus_dev_fatal
265 * @dev: xenbus device
266 * @err: error to report
267 * @fmt: error message format
268 *
269 * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
270 * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly
271 * closedown of this driver and its peer.
272 */
273
274void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
275{
276 va_list ap;
277
278 va_start(ap, fmt);
279 xenbus_va_dev_error(dev, err, fmt, ap);
280 va_end(ap);
281
282 xenbus_switch_state(dev, XenbusStateClosing);
283}
284EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
285
286/**
287 * xenbus_grant_ring
288 * @dev: xenbus device
289 * @ring_mfn: mfn of ring to grant
290
291 * Grant access to the given @ring_mfn to the peer of the given device. Return
292 * 0 on success, or -errno on error. On error, the device will switch to
293 * XenbusStateClosing, and the error will be saved in the store.
294 */
295int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
296{
297 int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
298 if (err < 0)
299 xenbus_dev_fatal(dev, err, "granting access to ring page");
300 return err;
301}
302EXPORT_SYMBOL_GPL(xenbus_grant_ring);
303
304
305/**
306 * Allocate an event channel for the given xenbus_device, assigning the newly
307 * created local port to *port. Return 0 on success, or -errno on error. On
308 * error, the device will switch to XenbusStateClosing, and the error will be
309 * saved in the store.
310 */
311int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port)
312{
313 struct evtchn_alloc_unbound alloc_unbound;
314 int err;
315
316 alloc_unbound.dom = DOMID_SELF;
317 alloc_unbound.remote_dom = dev->otherend_id;
318
319 err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
320 &alloc_unbound);
321 if (err)
322 xenbus_dev_fatal(dev, err, "allocating event channel");
323 else
324 *port = alloc_unbound.port;
325
326 return err;
327}
328EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
329
330
331/**
332 * Bind to an existing interdomain event channel in another domain. Returns 0
333 * on success and stores the local port in *port. On error, returns -errno,
334 * switches the device to XenbusStateClosing, and saves the error in XenStore.
335 */
336int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port)
337{
338 struct evtchn_bind_interdomain bind_interdomain;
339 int err;
340
341 bind_interdomain.remote_dom = dev->otherend_id;
342 bind_interdomain.remote_port = remote_port;
343
344 err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
345 &bind_interdomain);
346 if (err)
347 xenbus_dev_fatal(dev, err,
348 "binding to event channel %d from domain %d",
349 remote_port, dev->otherend_id);
350 else
351 *port = bind_interdomain.local_port;
352
353 return err;
354}
355EXPORT_SYMBOL_GPL(xenbus_bind_evtchn);
356
357
358/**
359 * Free an existing event channel. Returns 0 on success or -errno on error.
360 */
361int xenbus_free_evtchn(struct xenbus_device *dev, int port)
362{
363 struct evtchn_close close;
364 int err;
365
366 close.port = port;
367
368 err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
369 if (err)
370 xenbus_dev_error(dev, err, "freeing event channel %d", port);
371
372 return err;
373}
374EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
375
376
377/**
378 * xenbus_map_ring_valloc
379 * @dev: xenbus device
380 * @gnt_ref: grant reference
381 * @vaddr: pointer to address to be filled out by mapping
382 *
383 * Based on Rusty Russell's skeleton driver's map_page.
384 * Map a page of memory into this domain from another domain's grant table.
385 * xenbus_map_ring_valloc allocates a page of virtual address space, maps the
386 * page to that address, and sets *vaddr to that address.
387 * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
388 * or -ENOMEM on error. If an error is returned, device will switch to
389 * XenbusStateClosing and the error message will be saved in XenStore.
390 */
391int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
392{
393 struct gnttab_map_grant_ref op = {
394 .flags = GNTMAP_host_map,
395 .ref = gnt_ref,
396 .dom = dev->otherend_id,
397 };
398 struct vm_struct *area;
399
400 *vaddr = NULL;
401
402 area = alloc_vm_area(PAGE_SIZE);
403 if (!area)
404 return -ENOMEM;
405
406 op.host_addr = (unsigned long)area->addr;
407
408 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
409 BUG();
410
411 if (op.status != GNTST_okay) {
412 free_vm_area(area);
413 xenbus_dev_fatal(dev, op.status,
414 "mapping in shared page %d from domain %d",
415 gnt_ref, dev->otherend_id);
416 return op.status;
417 }
418
419 /* Stuff the handle in an unused field */
420 area->phys_addr = (unsigned long)op.handle;
421
422 *vaddr = area->addr;
423 return 0;
424}
425EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
426
427
428/**
429 * xenbus_map_ring
430 * @dev: xenbus device
431 * @gnt_ref: grant reference
432 * @handle: pointer to grant handle to be filled
433 * @vaddr: address to be mapped to
434 *
435 * Map a page of memory into this domain from another domain's grant table.
436 * xenbus_map_ring does not allocate the virtual address space (you must do
437 * this yourself!). It only maps in the page to the specified address.
438 * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
439 * or -ENOMEM on error. If an error is returned, device will switch to
440 * XenbusStateClosing and the error message will be saved in XenStore.
441 */
442int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
443 grant_handle_t *handle, void *vaddr)
444{
445 struct gnttab_map_grant_ref op = {
446 .host_addr = (unsigned long)vaddr,
447 .flags = GNTMAP_host_map,
448 .ref = gnt_ref,
449 .dom = dev->otherend_id,
450 };
451
452 if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
453 BUG();
454
455 if (op.status != GNTST_okay) {
456 xenbus_dev_fatal(dev, op.status,
457 "mapping in shared page %d from domain %d",
458 gnt_ref, dev->otherend_id);
459 } else
460 *handle = op.handle;
461
462 return op.status;
463}
464EXPORT_SYMBOL_GPL(xenbus_map_ring);
465
466
467/**
468 * xenbus_unmap_ring_vfree
469 * @dev: xenbus device
470 * @vaddr: addr to unmap
471 *
472 * Based on Rusty Russell's skeleton driver's unmap_page.
473 * Unmap a page of memory in this domain that was imported from another domain.
474 * Use xenbus_unmap_ring_vfree if you mapped in your memory with
475 * xenbus_map_ring_valloc (it will free the virtual address space).
476 * Returns 0 on success and returns GNTST_* on error
477 * (see xen/include/interface/grant_table.h).
478 */
479int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
480{
481 struct vm_struct *area;
482 struct gnttab_unmap_grant_ref op = {
483 .host_addr = (unsigned long)vaddr,
484 };
485
486 /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
487 * method so that we don't have to muck with vmalloc internals here.
488 * We could force the user to hang on to their struct vm_struct from
489 * xenbus_map_ring_valloc, but these 6 lines considerably simplify
490 * this API.
491 */
492 read_lock(&vmlist_lock);
493 for (area = vmlist; area != NULL; area = area->next) {
494 if (area->addr == vaddr)
495 break;
496 }
497 read_unlock(&vmlist_lock);
498
499 if (!area) {
500 xenbus_dev_error(dev, -ENOENT,
501 "can't find mapped virtual address %p", vaddr);
502 return GNTST_bad_virt_addr;
503 }
504
505 op.handle = (grant_handle_t)area->phys_addr;
506
507 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
508 BUG();
509
510 if (op.status == GNTST_okay)
511 free_vm_area(area);
512 else
513 xenbus_dev_error(dev, op.status,
514 "unmapping page at handle %d error %d",
515 (int16_t)area->phys_addr, op.status);
516
517 return op.status;
518}
519EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
520
521
522/**
523 * xenbus_unmap_ring
524 * @dev: xenbus device
525 * @handle: grant handle
526 * @vaddr: addr to unmap
527 *
528 * Unmap a page of memory in this domain that was imported from another domain.
529 * Returns 0 on success and returns GNTST_* on error
530 * (see xen/include/interface/grant_table.h).
531 */
532int xenbus_unmap_ring(struct xenbus_device *dev,
533 grant_handle_t handle, void *vaddr)
534{
535 struct gnttab_unmap_grant_ref op = {
536 .host_addr = (unsigned long)vaddr,
537 .handle = handle,
538 };
539
540 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
541 BUG();
542
543 if (op.status != GNTST_okay)
544 xenbus_dev_error(dev, op.status,
545 "unmapping page at handle %d error %d",
546 handle, op.status);
547
548 return op.status;
549}
550EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
551
552
553/**
554 * xenbus_read_driver_state
555 * @path: path for driver
556 *
557 * Return the state of the driver rooted at the given store path, or
558 * XenbusStateUnknown if no state can be read.
559 */
560enum xenbus_state xenbus_read_driver_state(const char *path)
561{
562 enum xenbus_state result;
563 int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
564 if (err)
565 result = XenbusStateUnknown;
566
567 return result;
568}
569EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
new file mode 100644
index 000000000000..6efbe3f29ca5
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -0,0 +1,233 @@
1/******************************************************************************
2 * xenbus_comms.c
3 *
4 * Low level code to talks to Xen Store: ringbuffer and event channel.
5 *
6 * Copyright (C) 2005 Rusty Russell, IBM Corporation
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
32
33#include <linux/wait.h>
34#include <linux/interrupt.h>
35#include <linux/sched.h>
36#include <linux/err.h>
37#include <xen/xenbus.h>
38#include <asm/xen/hypervisor.h>
39#include <xen/events.h>
40#include <xen/page.h>
41#include "xenbus_comms.h"
42
43static int xenbus_irq;
44
45static DECLARE_WORK(probe_work, xenbus_probe);
46
47static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
48
49static irqreturn_t wake_waiting(int irq, void *unused)
50{
51 if (unlikely(xenstored_ready == 0)) {
52 xenstored_ready = 1;
53 schedule_work(&probe_work);
54 }
55
56 wake_up(&xb_waitq);
57 return IRQ_HANDLED;
58}
59
60static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
61{
62 return ((prod - cons) <= XENSTORE_RING_SIZE);
63}
64
65static void *get_output_chunk(XENSTORE_RING_IDX cons,
66 XENSTORE_RING_IDX prod,
67 char *buf, uint32_t *len)
68{
69 *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
70 if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
71 *len = XENSTORE_RING_SIZE - (prod - cons);
72 return buf + MASK_XENSTORE_IDX(prod);
73}
74
75static const void *get_input_chunk(XENSTORE_RING_IDX cons,
76 XENSTORE_RING_IDX prod,
77 const char *buf, uint32_t *len)
78{
79 *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
80 if ((prod - cons) < *len)
81 *len = prod - cons;
82 return buf + MASK_XENSTORE_IDX(cons);
83}
84
85/**
86 * xb_write - low level write
87 * @data: buffer to send
88 * @len: length of buffer
89 *
90 * Returns 0 on success, error otherwise.
91 */
92int xb_write(const void *data, unsigned len)
93{
94 struct xenstore_domain_interface *intf = xen_store_interface;
95 XENSTORE_RING_IDX cons, prod;
96 int rc;
97
98 while (len != 0) {
99 void *dst;
100 unsigned int avail;
101
102 rc = wait_event_interruptible(
103 xb_waitq,
104 (intf->req_prod - intf->req_cons) !=
105 XENSTORE_RING_SIZE);
106 if (rc < 0)
107 return rc;
108
109 /* Read indexes, then verify. */
110 cons = intf->req_cons;
111 prod = intf->req_prod;
112 if (!check_indexes(cons, prod)) {
113 intf->req_cons = intf->req_prod = 0;
114 return -EIO;
115 }
116
117 dst = get_output_chunk(cons, prod, intf->req, &avail);
118 if (avail == 0)
119 continue;
120 if (avail > len)
121 avail = len;
122
123 /* Must write data /after/ reading the consumer index. */
124 mb();
125
126 memcpy(dst, data, avail);
127 data += avail;
128 len -= avail;
129
130 /* Other side must not see new producer until data is there. */
131 wmb();
132 intf->req_prod += avail;
133
134 /* Implies mb(): other side will see the updated producer. */
135 notify_remote_via_evtchn(xen_store_evtchn);
136 }
137
138 return 0;
139}
140
141int xb_data_to_read(void)
142{
143 struct xenstore_domain_interface *intf = xen_store_interface;
144 return (intf->rsp_cons != intf->rsp_prod);
145}
146
147int xb_wait_for_data_to_read(void)
148{
149 return wait_event_interruptible(xb_waitq, xb_data_to_read());
150}
151
152int xb_read(void *data, unsigned len)
153{
154 struct xenstore_domain_interface *intf = xen_store_interface;
155 XENSTORE_RING_IDX cons, prod;
156 int rc;
157
158 while (len != 0) {
159 unsigned int avail;
160 const char *src;
161
162 rc = xb_wait_for_data_to_read();
163 if (rc < 0)
164 return rc;
165
166 /* Read indexes, then verify. */
167 cons = intf->rsp_cons;
168 prod = intf->rsp_prod;
169 if (!check_indexes(cons, prod)) {
170 intf->rsp_cons = intf->rsp_prod = 0;
171 return -EIO;
172 }
173
174 src = get_input_chunk(cons, prod, intf->rsp, &avail);
175 if (avail == 0)
176 continue;
177 if (avail > len)
178 avail = len;
179
180 /* Must read data /after/ reading the producer index. */
181 rmb();
182
183 memcpy(data, src, avail);
184 data += avail;
185 len -= avail;
186
187 /* Other side must not see free space until we've copied out */
188 mb();
189 intf->rsp_cons += avail;
190
191 pr_debug("Finished read of %i bytes (%i to go)\n", avail, len);
192
193 /* Implies mb(): other side will see the updated consumer. */
194 notify_remote_via_evtchn(xen_store_evtchn);
195 }
196
197 return 0;
198}
199
200/**
201 * xb_init_comms - Set up interrupt handler off store event channel.
202 */
203int xb_init_comms(void)
204{
205 struct xenstore_domain_interface *intf = xen_store_interface;
206 int err;
207
208 if (intf->req_prod != intf->req_cons)
209 printk(KERN_ERR "XENBUS request ring is not quiescent "
210 "(%08x:%08x)!\n", intf->req_cons, intf->req_prod);
211
212 if (intf->rsp_prod != intf->rsp_cons) {
213 printk(KERN_WARNING "XENBUS response ring is not quiescent "
214 "(%08x:%08x): fixing up\n",
215 intf->rsp_cons, intf->rsp_prod);
216 intf->rsp_cons = intf->rsp_prod;
217 }
218
219 if (xenbus_irq)
220 unbind_from_irqhandler(xenbus_irq, &xb_waitq);
221
222 err = bind_evtchn_to_irqhandler(
223 xen_store_evtchn, wake_waiting,
224 0, "xenbus", &xb_waitq);
225 if (err <= 0) {
226 printk(KERN_ERR "XENBUS request irq failed %i\n", err);
227 return err;
228 }
229
230 xenbus_irq = err;
231
232 return 0;
233}
diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h
new file mode 100644
index 000000000000..c21db7513736
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_comms.h
@@ -0,0 +1,46 @@
1/*
2 * Private include for xenbus communications.
3 *
4 * Copyright (C) 2005 Rusty Russell, IBM Corporation
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version 2
8 * as published by the Free Software Foundation; or, when distributed
9 * separately from the Linux kernel or incorporated into other
10 * software packages, subject to the following license:
11 *
12 * Permission is hereby granted, free of charge, to any person obtaining a copy
13 * of this source file (the "Software"), to deal in the Software without
14 * restriction, including without limitation the rights to use, copy, modify,
15 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16 * and to permit persons to whom the Software is furnished to do so, subject to
17 * the following conditions:
18 *
19 * The above copyright notice and this permission notice shall be included in
20 * all copies or substantial portions of the Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 * IN THE SOFTWARE.
29 */
30
31#ifndef _XENBUS_COMMS_H
32#define _XENBUS_COMMS_H
33
34int xs_init(void);
35int xb_init_comms(void);
36
37/* Low level routines. */
38int xb_write(const void *data, unsigned len);
39int xb_read(void *data, unsigned len);
40int xb_data_to_read(void);
41int xb_wait_for_data_to_read(void);
42int xs_input_avail(void);
43extern struct xenstore_domain_interface *xen_store_interface;
44extern int xen_store_evtchn;
45
46#endif /* _XENBUS_COMMS_H */
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
new file mode 100644
index 000000000000..0b769f7c4a48
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -0,0 +1,935 @@
1/******************************************************************************
2 * Talks to Xen Store to figure out what devices we have.
3 *
4 * Copyright (C) 2005 Rusty Russell, IBM Corporation
5 * Copyright (C) 2005 Mike Wray, Hewlett-Packard
6 * Copyright (C) 2005, 2006 XenSource Ltd
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
32
33#define DPRINTK(fmt, args...) \
34 pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
35 __func__, __LINE__, ##args)
36
37#include <linux/kernel.h>
38#include <linux/err.h>
39#include <linux/string.h>
40#include <linux/ctype.h>
41#include <linux/fcntl.h>
42#include <linux/mm.h>
43#include <linux/notifier.h>
44#include <linux/kthread.h>
45#include <linux/mutex.h>
46#include <linux/io.h>
47
48#include <asm/page.h>
49#include <asm/pgtable.h>
50#include <asm/xen/hypervisor.h>
51#include <xen/xenbus.h>
52#include <xen/events.h>
53#include <xen/page.h>
54
55#include "xenbus_comms.h"
56#include "xenbus_probe.h"
57
58int xen_store_evtchn;
59struct xenstore_domain_interface *xen_store_interface;
60static unsigned long xen_store_mfn;
61
62static BLOCKING_NOTIFIER_HEAD(xenstore_chain);
63
64static void wait_for_devices(struct xenbus_driver *xendrv);
65
66static int xenbus_probe_frontend(const char *type, const char *name);
67
68static void xenbus_dev_shutdown(struct device *_dev);
69
70/* If something in array of ids matches this device, return it. */
71static const struct xenbus_device_id *
72match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
73{
74 for (; *arr->devicetype != '\0'; arr++) {
75 if (!strcmp(arr->devicetype, dev->devicetype))
76 return arr;
77 }
78 return NULL;
79}
80
81int xenbus_match(struct device *_dev, struct device_driver *_drv)
82{
83 struct xenbus_driver *drv = to_xenbus_driver(_drv);
84
85 if (!drv->ids)
86 return 0;
87
88 return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
89}
90
91/* device/<type>/<id> => <type>-<id> */
92static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
93{
94 nodename = strchr(nodename, '/');
95 if (!nodename || strlen(nodename + 1) >= BUS_ID_SIZE) {
96 printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename);
97 return -EINVAL;
98 }
99
100 strlcpy(bus_id, nodename + 1, BUS_ID_SIZE);
101 if (!strchr(bus_id, '/')) {
102 printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id);
103 return -EINVAL;
104 }
105 *strchr(bus_id, '/') = '-';
106 return 0;
107}
108
109
110static void free_otherend_details(struct xenbus_device *dev)
111{
112 kfree(dev->otherend);
113 dev->otherend = NULL;
114}
115
116
117static void free_otherend_watch(struct xenbus_device *dev)
118{
119 if (dev->otherend_watch.node) {
120 unregister_xenbus_watch(&dev->otherend_watch);
121 kfree(dev->otherend_watch.node);
122 dev->otherend_watch.node = NULL;
123 }
124}
125
126
127int read_otherend_details(struct xenbus_device *xendev,
128 char *id_node, char *path_node)
129{
130 int err = xenbus_gather(XBT_NIL, xendev->nodename,
131 id_node, "%i", &xendev->otherend_id,
132 path_node, NULL, &xendev->otherend,
133 NULL);
134 if (err) {
135 xenbus_dev_fatal(xendev, err,
136 "reading other end details from %s",
137 xendev->nodename);
138 return err;
139 }
140 if (strlen(xendev->otherend) == 0 ||
141 !xenbus_exists(XBT_NIL, xendev->otherend, "")) {
142 xenbus_dev_fatal(xendev, -ENOENT,
143 "unable to read other end from %s. "
144 "missing or inaccessible.",
145 xendev->nodename);
146 free_otherend_details(xendev);
147 return -ENOENT;
148 }
149
150 return 0;
151}
152
153
154static int read_backend_details(struct xenbus_device *xendev)
155{
156 return read_otherend_details(xendev, "backend-id", "backend");
157}
158
159
160/* Bus type for frontend drivers. */
161static struct xen_bus_type xenbus_frontend = {
162 .root = "device",
163 .levels = 2, /* device/type/<id> */
164 .get_bus_id = frontend_bus_id,
165 .probe = xenbus_probe_frontend,
166 .bus = {
167 .name = "xen",
168 .match = xenbus_match,
169 .probe = xenbus_dev_probe,
170 .remove = xenbus_dev_remove,
171 .shutdown = xenbus_dev_shutdown,
172 },
173};
174
175static void otherend_changed(struct xenbus_watch *watch,
176 const char **vec, unsigned int len)
177{
178 struct xenbus_device *dev =
179 container_of(watch, struct xenbus_device, otherend_watch);
180 struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
181 enum xenbus_state state;
182
183 /* Protect us against watches firing on old details when the otherend
184 details change, say immediately after a resume. */
185 if (!dev->otherend ||
186 strncmp(dev->otherend, vec[XS_WATCH_PATH],
187 strlen(dev->otherend))) {
188 dev_dbg(&dev->dev, "Ignoring watch at %s", vec[XS_WATCH_PATH]);
189 return;
190 }
191
192 state = xenbus_read_driver_state(dev->otherend);
193
194 dev_dbg(&dev->dev, "state is %d, (%s), %s, %s",
195 state, xenbus_strstate(state), dev->otherend_watch.node,
196 vec[XS_WATCH_PATH]);
197
198 /*
199 * Ignore xenbus transitions during shutdown. This prevents us doing
200 * work that can fail e.g., when the rootfs is gone.
201 */
202 if (system_state > SYSTEM_RUNNING) {
203 struct xen_bus_type *bus = bus;
204 bus = container_of(dev->dev.bus, struct xen_bus_type, bus);
205 /* If we're frontend, drive the state machine to Closed. */
206 /* This should cause the backend to release our resources. */
207 if ((bus == &xenbus_frontend) && (state == XenbusStateClosing))
208 xenbus_frontend_closed(dev);
209 return;
210 }
211
212 if (drv->otherend_changed)
213 drv->otherend_changed(dev, state);
214}
215
216
217static int talk_to_otherend(struct xenbus_device *dev)
218{
219 struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
220
221 free_otherend_watch(dev);
222 free_otherend_details(dev);
223
224 return drv->read_otherend_details(dev);
225}
226
227
228static int watch_otherend(struct xenbus_device *dev)
229{
230 return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed,
231 "%s/%s", dev->otherend, "state");
232}
233
234
235int xenbus_dev_probe(struct device *_dev)
236{
237 struct xenbus_device *dev = to_xenbus_device(_dev);
238 struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
239 const struct xenbus_device_id *id;
240 int err;
241
242 DPRINTK("%s", dev->nodename);
243
244 if (!drv->probe) {
245 err = -ENODEV;
246 goto fail;
247 }
248
249 id = match_device(drv->ids, dev);
250 if (!id) {
251 err = -ENODEV;
252 goto fail;
253 }
254
255 err = talk_to_otherend(dev);
256 if (err) {
257 dev_warn(&dev->dev, "talk_to_otherend on %s failed.\n",
258 dev->nodename);
259 return err;
260 }
261
262 err = drv->probe(dev, id);
263 if (err)
264 goto fail;
265
266 err = watch_otherend(dev);
267 if (err) {
268 dev_warn(&dev->dev, "watch_otherend on %s failed.\n",
269 dev->nodename);
270 return err;
271 }
272
273 return 0;
274fail:
275 xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
276 xenbus_switch_state(dev, XenbusStateClosed);
277 return -ENODEV;
278}
279
280int xenbus_dev_remove(struct device *_dev)
281{
282 struct xenbus_device *dev = to_xenbus_device(_dev);
283 struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
284
285 DPRINTK("%s", dev->nodename);
286
287 free_otherend_watch(dev);
288 free_otherend_details(dev);
289
290 if (drv->remove)
291 drv->remove(dev);
292
293 xenbus_switch_state(dev, XenbusStateClosed);
294 return 0;
295}
296
297static void xenbus_dev_shutdown(struct device *_dev)
298{
299 struct xenbus_device *dev = to_xenbus_device(_dev);
300 unsigned long timeout = 5*HZ;
301
302 DPRINTK("%s", dev->nodename);
303
304 get_device(&dev->dev);
305 if (dev->state != XenbusStateConnected) {
306 printk(KERN_INFO "%s: %s: %s != Connected, skipping\n", __func__,
307 dev->nodename, xenbus_strstate(dev->state));
308 goto out;
309 }
310 xenbus_switch_state(dev, XenbusStateClosing);
311 timeout = wait_for_completion_timeout(&dev->down, timeout);
312 if (!timeout)
313 printk(KERN_INFO "%s: %s timeout closing device\n",
314 __func__, dev->nodename);
315 out:
316 put_device(&dev->dev);
317}
318
319int xenbus_register_driver_common(struct xenbus_driver *drv,
320 struct xen_bus_type *bus,
321 struct module *owner,
322 const char *mod_name)
323{
324 drv->driver.name = drv->name;
325 drv->driver.bus = &bus->bus;
326 drv->driver.owner = owner;
327 drv->driver.mod_name = mod_name;
328
329 return driver_register(&drv->driver);
330}
331
332int __xenbus_register_frontend(struct xenbus_driver *drv,
333 struct module *owner, const char *mod_name)
334{
335 int ret;
336
337 drv->read_otherend_details = read_backend_details;
338
339 ret = xenbus_register_driver_common(drv, &xenbus_frontend,
340 owner, mod_name);
341 if (ret)
342 return ret;
343
344 /* If this driver is loaded as a module wait for devices to attach. */
345 wait_for_devices(drv);
346
347 return 0;
348}
349EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
350
351void xenbus_unregister_driver(struct xenbus_driver *drv)
352{
353 driver_unregister(&drv->driver);
354}
355EXPORT_SYMBOL_GPL(xenbus_unregister_driver);
356
357struct xb_find_info
358{
359 struct xenbus_device *dev;
360 const char *nodename;
361};
362
363static int cmp_dev(struct device *dev, void *data)
364{
365 struct xenbus_device *xendev = to_xenbus_device(dev);
366 struct xb_find_info *info = data;
367
368 if (!strcmp(xendev->nodename, info->nodename)) {
369 info->dev = xendev;
370 get_device(dev);
371 return 1;
372 }
373 return 0;
374}
375
376struct xenbus_device *xenbus_device_find(const char *nodename,
377 struct bus_type *bus)
378{
379 struct xb_find_info info = { .dev = NULL, .nodename = nodename };
380
381 bus_for_each_dev(bus, NULL, &info, cmp_dev);
382 return info.dev;
383}
384
385static int cleanup_dev(struct device *dev, void *data)
386{
387 struct xenbus_device *xendev = to_xenbus_device(dev);
388 struct xb_find_info *info = data;
389 int len = strlen(info->nodename);
390
391 DPRINTK("%s", info->nodename);
392
393 /* Match the info->nodename path, or any subdirectory of that path. */
394 if (strncmp(xendev->nodename, info->nodename, len))
395 return 0;
396
397 /* If the node name is longer, ensure it really is a subdirectory. */
398 if ((strlen(xendev->nodename) > len) && (xendev->nodename[len] != '/'))
399 return 0;
400
401 info->dev = xendev;
402 get_device(dev);
403 return 1;
404}
405
406static void xenbus_cleanup_devices(const char *path, struct bus_type *bus)
407{
408 struct xb_find_info info = { .nodename = path };
409
410 do {
411 info.dev = NULL;
412 bus_for_each_dev(bus, NULL, &info, cleanup_dev);
413 if (info.dev) {
414 device_unregister(&info.dev->dev);
415 put_device(&info.dev->dev);
416 }
417 } while (info.dev);
418}
419
420static void xenbus_dev_release(struct device *dev)
421{
422 if (dev)
423 kfree(to_xenbus_device(dev));
424}
425
426static ssize_t xendev_show_nodename(struct device *dev,
427 struct device_attribute *attr, char *buf)
428{
429 return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
430}
431DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
432
433static ssize_t xendev_show_devtype(struct device *dev,
434 struct device_attribute *attr, char *buf)
435{
436 return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
437}
438DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
439
440
441int xenbus_probe_node(struct xen_bus_type *bus,
442 const char *type,
443 const char *nodename)
444{
445 int err;
446 struct xenbus_device *xendev;
447 size_t stringlen;
448 char *tmpstring;
449
450 enum xenbus_state state = xenbus_read_driver_state(nodename);
451
452 if (state != XenbusStateInitialising) {
453 /* Device is not new, so ignore it. This can happen if a
454 device is going away after switching to Closed. */
455 return 0;
456 }
457
458 stringlen = strlen(nodename) + 1 + strlen(type) + 1;
459 xendev = kzalloc(sizeof(*xendev) + stringlen, GFP_KERNEL);
460 if (!xendev)
461 return -ENOMEM;
462
463 xendev->state = XenbusStateInitialising;
464
465 /* Copy the strings into the extra space. */
466
467 tmpstring = (char *)(xendev + 1);
468 strcpy(tmpstring, nodename);
469 xendev->nodename = tmpstring;
470
471 tmpstring += strlen(tmpstring) + 1;
472 strcpy(tmpstring, type);
473 xendev->devicetype = tmpstring;
474 init_completion(&xendev->down);
475
476 xendev->dev.bus = &bus->bus;
477 xendev->dev.release = xenbus_dev_release;
478
479 err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename);
480 if (err)
481 goto fail;
482
483 /* Register with generic device framework. */
484 err = device_register(&xendev->dev);
485 if (err)
486 goto fail;
487
488 err = device_create_file(&xendev->dev, &dev_attr_nodename);
489 if (err)
490 goto fail_unregister;
491
492 err = device_create_file(&xendev->dev, &dev_attr_devtype);
493 if (err)
494 goto fail_remove_file;
495
496 return 0;
497fail_remove_file:
498 device_remove_file(&xendev->dev, &dev_attr_nodename);
499fail_unregister:
500 device_unregister(&xendev->dev);
501fail:
502 kfree(xendev);
503 return err;
504}
505
506/* device/<typename>/<name> */
507static int xenbus_probe_frontend(const char *type, const char *name)
508{
509 char *nodename;
510 int err;
511
512 nodename = kasprintf(GFP_KERNEL, "%s/%s/%s",
513 xenbus_frontend.root, type, name);
514 if (!nodename)
515 return -ENOMEM;
516
517 DPRINTK("%s", nodename);
518
519 err = xenbus_probe_node(&xenbus_frontend, type, nodename);
520 kfree(nodename);
521 return err;
522}
523
524static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
525{
526 int err = 0;
527 char **dir;
528 unsigned int dir_n = 0;
529 int i;
530
531 dir = xenbus_directory(XBT_NIL, bus->root, type, &dir_n);
532 if (IS_ERR(dir))
533 return PTR_ERR(dir);
534
535 for (i = 0; i < dir_n; i++) {
536 err = bus->probe(type, dir[i]);
537 if (err)
538 break;
539 }
540 kfree(dir);
541 return err;
542}
543
544int xenbus_probe_devices(struct xen_bus_type *bus)
545{
546 int err = 0;
547 char **dir;
548 unsigned int i, dir_n;
549
550 dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n);
551 if (IS_ERR(dir))
552 return PTR_ERR(dir);
553
554 for (i = 0; i < dir_n; i++) {
555 err = xenbus_probe_device_type(bus, dir[i]);
556 if (err)
557 break;
558 }
559 kfree(dir);
560 return err;
561}
562
563static unsigned int char_count(const char *str, char c)
564{
565 unsigned int i, ret = 0;
566
567 for (i = 0; str[i]; i++)
568 if (str[i] == c)
569 ret++;
570 return ret;
571}
572
573static int strsep_len(const char *str, char c, unsigned int len)
574{
575 unsigned int i;
576
577 for (i = 0; str[i]; i++)
578 if (str[i] == c) {
579 if (len == 0)
580 return i;
581 len--;
582 }
583 return (len == 0) ? i : -ERANGE;
584}
585
586void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
587{
588 int exists, rootlen;
589 struct xenbus_device *dev;
590 char type[BUS_ID_SIZE];
591 const char *p, *root;
592
593 if (char_count(node, '/') < 2)
594 return;
595
596 exists = xenbus_exists(XBT_NIL, node, "");
597 if (!exists) {
598 xenbus_cleanup_devices(node, &bus->bus);
599 return;
600 }
601
602 /* backend/<type>/... or device/<type>/... */
603 p = strchr(node, '/') + 1;
604 snprintf(type, BUS_ID_SIZE, "%.*s", (int)strcspn(p, "/"), p);
605 type[BUS_ID_SIZE-1] = '\0';
606
607 rootlen = strsep_len(node, '/', bus->levels);
608 if (rootlen < 0)
609 return;
610 root = kasprintf(GFP_KERNEL, "%.*s", rootlen, node);
611 if (!root)
612 return;
613
614 dev = xenbus_device_find(root, &bus->bus);
615 if (!dev)
616 xenbus_probe_node(bus, type, root);
617 else
618 put_device(&dev->dev);
619
620 kfree(root);
621}
622
623static void frontend_changed(struct xenbus_watch *watch,
624 const char **vec, unsigned int len)
625{
626 DPRINTK("");
627
628 xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
629}
630
631/* We watch for devices appearing and vanishing. */
632static struct xenbus_watch fe_watch = {
633 .node = "device",
634 .callback = frontend_changed,
635};
636
637static int suspend_dev(struct device *dev, void *data)
638{
639 int err = 0;
640 struct xenbus_driver *drv;
641 struct xenbus_device *xdev;
642
643 DPRINTK("");
644
645 if (dev->driver == NULL)
646 return 0;
647 drv = to_xenbus_driver(dev->driver);
648 xdev = container_of(dev, struct xenbus_device, dev);
649 if (drv->suspend)
650 err = drv->suspend(xdev);
651 if (err)
652 printk(KERN_WARNING
653 "xenbus: suspend %s failed: %i\n", dev->bus_id, err);
654 return 0;
655}
656
657static int suspend_cancel_dev(struct device *dev, void *data)
658{
659 int err = 0;
660 struct xenbus_driver *drv;
661 struct xenbus_device *xdev;
662
663 DPRINTK("");
664
665 if (dev->driver == NULL)
666 return 0;
667 drv = to_xenbus_driver(dev->driver);
668 xdev = container_of(dev, struct xenbus_device, dev);
669 if (drv->suspend_cancel)
670 err = drv->suspend_cancel(xdev);
671 if (err)
672 printk(KERN_WARNING
673 "xenbus: suspend_cancel %s failed: %i\n",
674 dev->bus_id, err);
675 return 0;
676}
677
678static int resume_dev(struct device *dev, void *data)
679{
680 int err;
681 struct xenbus_driver *drv;
682 struct xenbus_device *xdev;
683
684 DPRINTK("");
685
686 if (dev->driver == NULL)
687 return 0;
688
689 drv = to_xenbus_driver(dev->driver);
690 xdev = container_of(dev, struct xenbus_device, dev);
691
692 err = talk_to_otherend(xdev);
693 if (err) {
694 printk(KERN_WARNING
695 "xenbus: resume (talk_to_otherend) %s failed: %i\n",
696 dev->bus_id, err);
697 return err;
698 }
699
700 xdev->state = XenbusStateInitialising;
701
702 if (drv->resume) {
703 err = drv->resume(xdev);
704 if (err) {
705 printk(KERN_WARNING
706 "xenbus: resume %s failed: %i\n",
707 dev->bus_id, err);
708 return err;
709 }
710 }
711
712 err = watch_otherend(xdev);
713 if (err) {
714 printk(KERN_WARNING
715 "xenbus_probe: resume (watch_otherend) %s failed: "
716 "%d.\n", dev->bus_id, err);
717 return err;
718 }
719
720 return 0;
721}
722
723void xenbus_suspend(void)
724{
725 DPRINTK("");
726
727 bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
728 xenbus_backend_suspend(suspend_dev);
729 xs_suspend();
730}
731EXPORT_SYMBOL_GPL(xenbus_suspend);
732
733void xenbus_resume(void)
734{
735 xb_init_comms();
736 xs_resume();
737 bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
738 xenbus_backend_resume(resume_dev);
739}
740EXPORT_SYMBOL_GPL(xenbus_resume);
741
742void xenbus_suspend_cancel(void)
743{
744 xs_suspend_cancel();
745 bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev);
746 xenbus_backend_resume(suspend_cancel_dev);
747}
748EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
749
750/* A flag to determine if xenstored is 'ready' (i.e. has started) */
751int xenstored_ready = 0;
752
753
754int register_xenstore_notifier(struct notifier_block *nb)
755{
756 int ret = 0;
757
758 if (xenstored_ready > 0)
759 ret = nb->notifier_call(nb, 0, NULL);
760 else
761 blocking_notifier_chain_register(&xenstore_chain, nb);
762
763 return ret;
764}
765EXPORT_SYMBOL_GPL(register_xenstore_notifier);
766
767void unregister_xenstore_notifier(struct notifier_block *nb)
768{
769 blocking_notifier_chain_unregister(&xenstore_chain, nb);
770}
771EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
772
773void xenbus_probe(struct work_struct *unused)
774{
775 BUG_ON((xenstored_ready <= 0));
776
777 /* Enumerate devices in xenstore and watch for changes. */
778 xenbus_probe_devices(&xenbus_frontend);
779 register_xenbus_watch(&fe_watch);
780 xenbus_backend_probe_and_watch();
781
782 /* Notify others that xenstore is up */
783 blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
784}
785
786static int __init xenbus_probe_init(void)
787{
788 int err = 0;
789
790 DPRINTK("");
791
792 err = -ENODEV;
793 if (!is_running_on_xen())
794 goto out_error;
795
796 /* Register ourselves with the kernel bus subsystem */
797 err = bus_register(&xenbus_frontend.bus);
798 if (err)
799 goto out_error;
800
801 err = xenbus_backend_bus_register();
802 if (err)
803 goto out_unreg_front;
804
805 /*
806 * Domain0 doesn't have a store_evtchn or store_mfn yet.
807 */
808 if (is_initial_xendomain()) {
809 /* dom0 not yet supported */
810 } else {
811 xenstored_ready = 1;
812 xen_store_evtchn = xen_start_info->store_evtchn;
813 xen_store_mfn = xen_start_info->store_mfn;
814 }
815 xen_store_interface = mfn_to_virt(xen_store_mfn);
816
817 /* Initialize the interface to xenstore. */
818 err = xs_init();
819 if (err) {
820 printk(KERN_WARNING
821 "XENBUS: Error initializing xenstore comms: %i\n", err);
822 goto out_unreg_back;
823 }
824
825 if (!is_initial_xendomain())
826 xenbus_probe(NULL);
827
828 return 0;
829
830 out_unreg_back:
831 xenbus_backend_bus_unregister();
832
833 out_unreg_front:
834 bus_unregister(&xenbus_frontend.bus);
835
836 out_error:
837 return err;
838}
839
840postcore_initcall(xenbus_probe_init);
841
842MODULE_LICENSE("GPL");
843
844static int is_disconnected_device(struct device *dev, void *data)
845{
846 struct xenbus_device *xendev = to_xenbus_device(dev);
847 struct device_driver *drv = data;
848
849 /*
850 * A device with no driver will never connect. We care only about
851 * devices which should currently be in the process of connecting.
852 */
853 if (!dev->driver)
854 return 0;
855
856 /* Is this search limited to a particular driver? */
857 if (drv && (dev->driver != drv))
858 return 0;
859
860 return (xendev->state != XenbusStateConnected);
861}
862
863static int exists_disconnected_device(struct device_driver *drv)
864{
865 return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
866 is_disconnected_device);
867}
868
869static int print_device_status(struct device *dev, void *data)
870{
871 struct xenbus_device *xendev = to_xenbus_device(dev);
872 struct device_driver *drv = data;
873
874 /* Is this operation limited to a particular driver? */
875 if (drv && (dev->driver != drv))
876 return 0;
877
878 if (!dev->driver) {
879 /* Information only: is this too noisy? */
880 printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
881 xendev->nodename);
882 } else if (xendev->state != XenbusStateConnected) {
883 printk(KERN_WARNING "XENBUS: Timeout connecting "
884 "to device: %s (state %d)\n",
885 xendev->nodename, xendev->state);
886 }
887
888 return 0;
889}
890
891/* We only wait for device setup after most initcalls have run. */
892static int ready_to_wait_for_devices;
893
894/*
895 * On a 10 second timeout, wait for all devices currently configured. We need
896 * to do this to guarantee that the filesystems and / or network devices
897 * needed for boot are available, before we can allow the boot to proceed.
898 *
899 * This needs to be on a late_initcall, to happen after the frontend device
900 * drivers have been initialised, but before the root fs is mounted.
901 *
902 * A possible improvement here would be to have the tools add a per-device
903 * flag to the store entry, indicating whether it is needed at boot time.
904 * This would allow people who knew what they were doing to accelerate their
905 * boot slightly, but of course needs tools or manual intervention to set up
906 * those flags correctly.
907 */
908static void wait_for_devices(struct xenbus_driver *xendrv)
909{
910 unsigned long timeout = jiffies + 10*HZ;
911 struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
912
913 if (!ready_to_wait_for_devices || !is_running_on_xen())
914 return;
915
916 while (exists_disconnected_device(drv)) {
917 if (time_after(jiffies, timeout))
918 break;
919 schedule_timeout_interruptible(HZ/10);
920 }
921
922 bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
923 print_device_status);
924}
925
926#ifndef MODULE
927static int __init boot_wait_for_devices(void)
928{
929 ready_to_wait_for_devices = 1;
930 wait_for_devices(NULL);
931 return 0;
932}
933
934late_initcall(boot_wait_for_devices);
935#endif
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h
new file mode 100644
index 000000000000..e09b19415a40
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_probe.h
@@ -0,0 +1,74 @@
1/******************************************************************************
2 * xenbus_probe.h
3 *
4 * Talks to Xen Store to figure out what devices we have.
5 *
6 * Copyright (C) 2005 Rusty Russell, IBM Corporation
7 * Copyright (C) 2005 XenSource Ltd.
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation; or, when distributed
12 * separately from the Linux kernel or incorporated into other
13 * software packages, subject to the following license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
33
34#ifndef _XENBUS_PROBE_H
35#define _XENBUS_PROBE_H
36
37#ifdef CONFIG_XEN_BACKEND
38extern void xenbus_backend_suspend(int (*fn)(struct device *, void *));
39extern void xenbus_backend_resume(int (*fn)(struct device *, void *));
40extern void xenbus_backend_probe_and_watch(void);
41extern int xenbus_backend_bus_register(void);
42extern void xenbus_backend_bus_unregister(void);
43#else
44static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {}
45static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {}
46static inline void xenbus_backend_probe_and_watch(void) {}
47static inline int xenbus_backend_bus_register(void) { return 0; }
48static inline void xenbus_backend_bus_unregister(void) {}
49#endif
50
51struct xen_bus_type
52{
53 char *root;
54 unsigned int levels;
55 int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
56 int (*probe)(const char *type, const char *dir);
57 struct bus_type bus;
58};
59
60extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
61extern int xenbus_dev_probe(struct device *_dev);
62extern int xenbus_dev_remove(struct device *_dev);
63extern int xenbus_register_driver_common(struct xenbus_driver *drv,
64 struct xen_bus_type *bus,
65 struct module *owner,
66 const char *mod_name);
67extern int xenbus_probe_node(struct xen_bus_type *bus,
68 const char *type,
69 const char *nodename);
70extern int xenbus_probe_devices(struct xen_bus_type *bus);
71
72extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
73
74#endif
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
new file mode 100644
index 000000000000..9e943fbce81b
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -0,0 +1,861 @@
1/******************************************************************************
2 * xenbus_xs.c
3 *
4 * This is the kernel equivalent of the "xs" library. We don't need everything
5 * and we use xenbus_comms for communication.
6 *
7 * Copyright (C) 2005 Rusty Russell, IBM Corporation
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation; or, when distributed
12 * separately from the Linux kernel or incorporated into other
13 * software packages, subject to the following license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
33
34#include <linux/unistd.h>
35#include <linux/errno.h>
36#include <linux/types.h>
37#include <linux/uio.h>
38#include <linux/kernel.h>
39#include <linux/string.h>
40#include <linux/err.h>
41#include <linux/slab.h>
42#include <linux/fcntl.h>
43#include <linux/kthread.h>
44#include <linux/rwsem.h>
45#include <linux/module.h>
46#include <linux/mutex.h>
47#include <xen/xenbus.h>
48#include "xenbus_comms.h"
49
50struct xs_stored_msg {
51 struct list_head list;
52
53 struct xsd_sockmsg hdr;
54
55 union {
56 /* Queued replies. */
57 struct {
58 char *body;
59 } reply;
60
61 /* Queued watch events. */
62 struct {
63 struct xenbus_watch *handle;
64 char **vec;
65 unsigned int vec_size;
66 } watch;
67 } u;
68};
69
70struct xs_handle {
71 /* A list of replies. Currently only one will ever be outstanding. */
72 struct list_head reply_list;
73 spinlock_t reply_lock;
74 wait_queue_head_t reply_waitq;
75
76 /*
77 * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex.
78 * response_mutex is never taken simultaneously with the other three.
79 */
80
81 /* One request at a time. */
82 struct mutex request_mutex;
83
84 /* Protect xenbus reader thread against save/restore. */
85 struct mutex response_mutex;
86
87 /* Protect transactions against save/restore. */
88 struct rw_semaphore transaction_mutex;
89
90 /* Protect watch (de)register against save/restore. */
91 struct rw_semaphore watch_mutex;
92};
93
94static struct xs_handle xs_state;
95
96/* List of registered watches, and a lock to protect it. */
97static LIST_HEAD(watches);
98static DEFINE_SPINLOCK(watches_lock);
99
100/* List of pending watch callback events, and a lock to protect it. */
101static LIST_HEAD(watch_events);
102static DEFINE_SPINLOCK(watch_events_lock);
103
104/*
105 * Details of the xenwatch callback kernel thread. The thread waits on the
106 * watch_events_waitq for work to do (queued on watch_events list). When it
107 * wakes up it acquires the xenwatch_mutex before reading the list and
108 * carrying out work.
109 */
110static pid_t xenwatch_pid;
111static DEFINE_MUTEX(xenwatch_mutex);
112static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq);
113
114static int get_error(const char *errorstring)
115{
116 unsigned int i;
117
118 for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) {
119 if (i == ARRAY_SIZE(xsd_errors) - 1) {
120 printk(KERN_WARNING
121 "XENBUS xen store gave: unknown error %s",
122 errorstring);
123 return EINVAL;
124 }
125 }
126 return xsd_errors[i].errnum;
127}
128
129static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
130{
131 struct xs_stored_msg *msg;
132 char *body;
133
134 spin_lock(&xs_state.reply_lock);
135
136 while (list_empty(&xs_state.reply_list)) {
137 spin_unlock(&xs_state.reply_lock);
138 /* XXX FIXME: Avoid synchronous wait for response here. */
139 wait_event(xs_state.reply_waitq,
140 !list_empty(&xs_state.reply_list));
141 spin_lock(&xs_state.reply_lock);
142 }
143
144 msg = list_entry(xs_state.reply_list.next,
145 struct xs_stored_msg, list);
146 list_del(&msg->list);
147
148 spin_unlock(&xs_state.reply_lock);
149
150 *type = msg->hdr.type;
151 if (len)
152 *len = msg->hdr.len;
153 body = msg->u.reply.body;
154
155 kfree(msg);
156
157 return body;
158}
159
160void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
161{
162 void *ret;
163 struct xsd_sockmsg req_msg = *msg;
164 int err;
165
166 if (req_msg.type == XS_TRANSACTION_START)
167 down_read(&xs_state.transaction_mutex);
168
169 mutex_lock(&xs_state.request_mutex);
170
171 err = xb_write(msg, sizeof(*msg) + msg->len);
172 if (err) {
173 msg->type = XS_ERROR;
174 ret = ERR_PTR(err);
175 } else
176 ret = read_reply(&msg->type, &msg->len);
177
178 mutex_unlock(&xs_state.request_mutex);
179
180 if ((msg->type == XS_TRANSACTION_END) ||
181 ((req_msg.type == XS_TRANSACTION_START) &&
182 (msg->type == XS_ERROR)))
183 up_read(&xs_state.transaction_mutex);
184
185 return ret;
186}
187
188/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */
189static void *xs_talkv(struct xenbus_transaction t,
190 enum xsd_sockmsg_type type,
191 const struct kvec *iovec,
192 unsigned int num_vecs,
193 unsigned int *len)
194{
195 struct xsd_sockmsg msg;
196 void *ret = NULL;
197 unsigned int i;
198 int err;
199
200 msg.tx_id = t.id;
201 msg.req_id = 0;
202 msg.type = type;
203 msg.len = 0;
204 for (i = 0; i < num_vecs; i++)
205 msg.len += iovec[i].iov_len;
206
207 mutex_lock(&xs_state.request_mutex);
208
209 err = xb_write(&msg, sizeof(msg));
210 if (err) {
211 mutex_unlock(&xs_state.request_mutex);
212 return ERR_PTR(err);
213 }
214
215 for (i = 0; i < num_vecs; i++) {
216 err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
217 if (err) {
218 mutex_unlock(&xs_state.request_mutex);
219 return ERR_PTR(err);
220 }
221 }
222
223 ret = read_reply(&msg.type, len);
224
225 mutex_unlock(&xs_state.request_mutex);
226
227 if (IS_ERR(ret))
228 return ret;
229
230 if (msg.type == XS_ERROR) {
231 err = get_error(ret);
232 kfree(ret);
233 return ERR_PTR(-err);
234 }
235
236 if (msg.type != type) {
237 if (printk_ratelimit())
238 printk(KERN_WARNING
239 "XENBUS unexpected type [%d], expected [%d]\n",
240 msg.type, type);
241 kfree(ret);
242 return ERR_PTR(-EINVAL);
243 }
244 return ret;
245}
246
247/* Simplified version of xs_talkv: single message. */
248static void *xs_single(struct xenbus_transaction t,
249 enum xsd_sockmsg_type type,
250 const char *string,
251 unsigned int *len)
252{
253 struct kvec iovec;
254
255 iovec.iov_base = (void *)string;
256 iovec.iov_len = strlen(string) + 1;
257 return xs_talkv(t, type, &iovec, 1, len);
258}
259
260/* Many commands only need an ack, don't care what it says. */
261static int xs_error(char *reply)
262{
263 if (IS_ERR(reply))
264 return PTR_ERR(reply);
265 kfree(reply);
266 return 0;
267}
268
269static unsigned int count_strings(const char *strings, unsigned int len)
270{
271 unsigned int num;
272 const char *p;
273
274 for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
275 num++;
276
277 return num;
278}
279
280/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */
281static char *join(const char *dir, const char *name)
282{
283 char *buffer;
284
285 if (strlen(name) == 0)
286 buffer = kasprintf(GFP_KERNEL, "%s", dir);
287 else
288 buffer = kasprintf(GFP_KERNEL, "%s/%s", dir, name);
289 return (!buffer) ? ERR_PTR(-ENOMEM) : buffer;
290}
291
292static char **split(char *strings, unsigned int len, unsigned int *num)
293{
294 char *p, **ret;
295
296 /* Count the strings. */
297 *num = count_strings(strings, len);
298
299 /* Transfer to one big alloc for easy freeing. */
300 ret = kmalloc(*num * sizeof(char *) + len, GFP_KERNEL);
301 if (!ret) {
302 kfree(strings);
303 return ERR_PTR(-ENOMEM);
304 }
305 memcpy(&ret[*num], strings, len);
306 kfree(strings);
307
308 strings = (char *)&ret[*num];
309 for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
310 ret[(*num)++] = p;
311
312 return ret;
313}
314
315char **xenbus_directory(struct xenbus_transaction t,
316 const char *dir, const char *node, unsigned int *num)
317{
318 char *strings, *path;
319 unsigned int len;
320
321 path = join(dir, node);
322 if (IS_ERR(path))
323 return (char **)path;
324
325 strings = xs_single(t, XS_DIRECTORY, path, &len);
326 kfree(path);
327 if (IS_ERR(strings))
328 return (char **)strings;
329
330 return split(strings, len, num);
331}
332EXPORT_SYMBOL_GPL(xenbus_directory);
333
334/* Check if a path exists. Return 1 if it does. */
335int xenbus_exists(struct xenbus_transaction t,
336 const char *dir, const char *node)
337{
338 char **d;
339 int dir_n;
340
341 d = xenbus_directory(t, dir, node, &dir_n);
342 if (IS_ERR(d))
343 return 0;
344 kfree(d);
345 return 1;
346}
347EXPORT_SYMBOL_GPL(xenbus_exists);
348
349/* Get the value of a single file.
350 * Returns a kmalloced value: call free() on it after use.
351 * len indicates length in bytes.
352 */
353void *xenbus_read(struct xenbus_transaction t,
354 const char *dir, const char *node, unsigned int *len)
355{
356 char *path;
357 void *ret;
358
359 path = join(dir, node);
360 if (IS_ERR(path))
361 return (void *)path;
362
363 ret = xs_single(t, XS_READ, path, len);
364 kfree(path);
365 return ret;
366}
367EXPORT_SYMBOL_GPL(xenbus_read);
368
369/* Write the value of a single file.
370 * Returns -err on failure.
371 */
372int xenbus_write(struct xenbus_transaction t,
373 const char *dir, const char *node, const char *string)
374{
375 const char *path;
376 struct kvec iovec[2];
377 int ret;
378
379 path = join(dir, node);
380 if (IS_ERR(path))
381 return PTR_ERR(path);
382
383 iovec[0].iov_base = (void *)path;
384 iovec[0].iov_len = strlen(path) + 1;
385 iovec[1].iov_base = (void *)string;
386 iovec[1].iov_len = strlen(string);
387
388 ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
389 kfree(path);
390 return ret;
391}
392EXPORT_SYMBOL_GPL(xenbus_write);
393
394/* Create a new directory. */
395int xenbus_mkdir(struct xenbus_transaction t,
396 const char *dir, const char *node)
397{
398 char *path;
399 int ret;
400
401 path = join(dir, node);
402 if (IS_ERR(path))
403 return PTR_ERR(path);
404
405 ret = xs_error(xs_single(t, XS_MKDIR, path, NULL));
406 kfree(path);
407 return ret;
408}
409EXPORT_SYMBOL_GPL(xenbus_mkdir);
410
411/* Destroy a file or directory (directories must be empty). */
412int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node)
413{
414 char *path;
415 int ret;
416
417 path = join(dir, node);
418 if (IS_ERR(path))
419 return PTR_ERR(path);
420
421 ret = xs_error(xs_single(t, XS_RM, path, NULL));
422 kfree(path);
423 return ret;
424}
425EXPORT_SYMBOL_GPL(xenbus_rm);
426
427/* Start a transaction: changes by others will not be seen during this
428 * transaction, and changes will not be visible to others until end.
429 */
430int xenbus_transaction_start(struct xenbus_transaction *t)
431{
432 char *id_str;
433
434 down_read(&xs_state.transaction_mutex);
435
436 id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL);
437 if (IS_ERR(id_str)) {
438 up_read(&xs_state.transaction_mutex);
439 return PTR_ERR(id_str);
440 }
441
442 t->id = simple_strtoul(id_str, NULL, 0);
443 kfree(id_str);
444 return 0;
445}
446EXPORT_SYMBOL_GPL(xenbus_transaction_start);
447
448/* End a transaction.
449 * If abandon is true, transaction is discarded instead of committed.
450 */
451int xenbus_transaction_end(struct xenbus_transaction t, int abort)
452{
453 char abortstr[2];
454 int err;
455
456 if (abort)
457 strcpy(abortstr, "F");
458 else
459 strcpy(abortstr, "T");
460
461 err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
462
463 up_read(&xs_state.transaction_mutex);
464
465 return err;
466}
467EXPORT_SYMBOL_GPL(xenbus_transaction_end);
468
469/* Single read and scanf: returns -errno or num scanned. */
470int xenbus_scanf(struct xenbus_transaction t,
471 const char *dir, const char *node, const char *fmt, ...)
472{
473 va_list ap;
474 int ret;
475 char *val;
476
477 val = xenbus_read(t, dir, node, NULL);
478 if (IS_ERR(val))
479 return PTR_ERR(val);
480
481 va_start(ap, fmt);
482 ret = vsscanf(val, fmt, ap);
483 va_end(ap);
484 kfree(val);
485 /* Distinctive errno. */
486 if (ret == 0)
487 return -ERANGE;
488 return ret;
489}
490EXPORT_SYMBOL_GPL(xenbus_scanf);
491
492/* Single printf and write: returns -errno or 0. */
493int xenbus_printf(struct xenbus_transaction t,
494 const char *dir, const char *node, const char *fmt, ...)
495{
496 va_list ap;
497 int ret;
498#define PRINTF_BUFFER_SIZE 4096
499 char *printf_buffer;
500
501 printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
502 if (printf_buffer == NULL)
503 return -ENOMEM;
504
505 va_start(ap, fmt);
506 ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
507 va_end(ap);
508
509 BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
510 ret = xenbus_write(t, dir, node, printf_buffer);
511
512 kfree(printf_buffer);
513
514 return ret;
515}
516EXPORT_SYMBOL_GPL(xenbus_printf);
517
518/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
519int xenbus_gather(struct xenbus_transaction t, const char *dir, ...)
520{
521 va_list ap;
522 const char *name;
523 int ret = 0;
524
525 va_start(ap, dir);
526 while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
527 const char *fmt = va_arg(ap, char *);
528 void *result = va_arg(ap, void *);
529 char *p;
530
531 p = xenbus_read(t, dir, name, NULL);
532 if (IS_ERR(p)) {
533 ret = PTR_ERR(p);
534 break;
535 }
536 if (fmt) {
537 if (sscanf(p, fmt, result) == 0)
538 ret = -EINVAL;
539 kfree(p);
540 } else
541 *(char **)result = p;
542 }
543 va_end(ap);
544 return ret;
545}
546EXPORT_SYMBOL_GPL(xenbus_gather);
547
548static int xs_watch(const char *path, const char *token)
549{
550 struct kvec iov[2];
551
552 iov[0].iov_base = (void *)path;
553 iov[0].iov_len = strlen(path) + 1;
554 iov[1].iov_base = (void *)token;
555 iov[1].iov_len = strlen(token) + 1;
556
557 return xs_error(xs_talkv(XBT_NIL, XS_WATCH, iov,
558 ARRAY_SIZE(iov), NULL));
559}
560
561static int xs_unwatch(const char *path, const char *token)
562{
563 struct kvec iov[2];
564
565 iov[0].iov_base = (char *)path;
566 iov[0].iov_len = strlen(path) + 1;
567 iov[1].iov_base = (char *)token;
568 iov[1].iov_len = strlen(token) + 1;
569
570 return xs_error(xs_talkv(XBT_NIL, XS_UNWATCH, iov,
571 ARRAY_SIZE(iov), NULL));
572}
573
574static struct xenbus_watch *find_watch(const char *token)
575{
576 struct xenbus_watch *i, *cmp;
577
578 cmp = (void *)simple_strtoul(token, NULL, 16);
579
580 list_for_each_entry(i, &watches, list)
581 if (i == cmp)
582 return i;
583
584 return NULL;
585}
586
587/* Register callback to watch this node. */
588int register_xenbus_watch(struct xenbus_watch *watch)
589{
590 /* Pointer in ascii is the token. */
591 char token[sizeof(watch) * 2 + 1];
592 int err;
593
594 sprintf(token, "%lX", (long)watch);
595
596 down_read(&xs_state.watch_mutex);
597
598 spin_lock(&watches_lock);
599 BUG_ON(find_watch(token));
600 list_add(&watch->list, &watches);
601 spin_unlock(&watches_lock);
602
603 err = xs_watch(watch->node, token);
604
605 /* Ignore errors due to multiple registration. */
606 if ((err != 0) && (err != -EEXIST)) {
607 spin_lock(&watches_lock);
608 list_del(&watch->list);
609 spin_unlock(&watches_lock);
610 }
611
612 up_read(&xs_state.watch_mutex);
613
614 return err;
615}
616EXPORT_SYMBOL_GPL(register_xenbus_watch);
617
618void unregister_xenbus_watch(struct xenbus_watch *watch)
619{
620 struct xs_stored_msg *msg, *tmp;
621 char token[sizeof(watch) * 2 + 1];
622 int err;
623
624 sprintf(token, "%lX", (long)watch);
625
626 down_read(&xs_state.watch_mutex);
627
628 spin_lock(&watches_lock);
629 BUG_ON(!find_watch(token));
630 list_del(&watch->list);
631 spin_unlock(&watches_lock);
632
633 err = xs_unwatch(watch->node, token);
634 if (err)
635 printk(KERN_WARNING
636 "XENBUS Failed to release watch %s: %i\n",
637 watch->node, err);
638
639 up_read(&xs_state.watch_mutex);
640
641 /* Make sure there are no callbacks running currently (unless
642 its us) */
643 if (current->pid != xenwatch_pid)
644 mutex_lock(&xenwatch_mutex);
645
646 /* Cancel pending watch events. */
647 spin_lock(&watch_events_lock);
648 list_for_each_entry_safe(msg, tmp, &watch_events, list) {
649 if (msg->u.watch.handle != watch)
650 continue;
651 list_del(&msg->list);
652 kfree(msg->u.watch.vec);
653 kfree(msg);
654 }
655 spin_unlock(&watch_events_lock);
656
657 if (current->pid != xenwatch_pid)
658 mutex_unlock(&xenwatch_mutex);
659}
660EXPORT_SYMBOL_GPL(unregister_xenbus_watch);
661
662void xs_suspend(void)
663{
664 down_write(&xs_state.transaction_mutex);
665 down_write(&xs_state.watch_mutex);
666 mutex_lock(&xs_state.request_mutex);
667 mutex_lock(&xs_state.response_mutex);
668}
669
670void xs_resume(void)
671{
672 struct xenbus_watch *watch;
673 char token[sizeof(watch) * 2 + 1];
674
675 mutex_unlock(&xs_state.response_mutex);
676 mutex_unlock(&xs_state.request_mutex);
677 up_write(&xs_state.transaction_mutex);
678
679 /* No need for watches_lock: the watch_mutex is sufficient. */
680 list_for_each_entry(watch, &watches, list) {
681 sprintf(token, "%lX", (long)watch);
682 xs_watch(watch->node, token);
683 }
684
685 up_write(&xs_state.watch_mutex);
686}
687
688void xs_suspend_cancel(void)
689{
690 mutex_unlock(&xs_state.response_mutex);
691 mutex_unlock(&xs_state.request_mutex);
692 up_write(&xs_state.watch_mutex);
693 up_write(&xs_state.transaction_mutex);
694}
695
696static int xenwatch_thread(void *unused)
697{
698 struct list_head *ent;
699 struct xs_stored_msg *msg;
700
701 for (;;) {
702 wait_event_interruptible(watch_events_waitq,
703 !list_empty(&watch_events));
704
705 if (kthread_should_stop())
706 break;
707
708 mutex_lock(&xenwatch_mutex);
709
710 spin_lock(&watch_events_lock);
711 ent = watch_events.next;
712 if (ent != &watch_events)
713 list_del(ent);
714 spin_unlock(&watch_events_lock);
715
716 if (ent != &watch_events) {
717 msg = list_entry(ent, struct xs_stored_msg, list);
718 msg->u.watch.handle->callback(
719 msg->u.watch.handle,
720 (const char **)msg->u.watch.vec,
721 msg->u.watch.vec_size);
722 kfree(msg->u.watch.vec);
723 kfree(msg);
724 }
725
726 mutex_unlock(&xenwatch_mutex);
727 }
728
729 return 0;
730}
731
732static int process_msg(void)
733{
734 struct xs_stored_msg *msg;
735 char *body;
736 int err;
737
738 /*
739 * We must disallow save/restore while reading a xenstore message.
740 * A partial read across s/r leaves us out of sync with xenstored.
741 */
742 for (;;) {
743 err = xb_wait_for_data_to_read();
744 if (err)
745 return err;
746 mutex_lock(&xs_state.response_mutex);
747 if (xb_data_to_read())
748 break;
749 /* We raced with save/restore: pending data 'disappeared'. */
750 mutex_unlock(&xs_state.response_mutex);
751 }
752
753
754 msg = kmalloc(sizeof(*msg), GFP_KERNEL);
755 if (msg == NULL) {
756 err = -ENOMEM;
757 goto out;
758 }
759
760 err = xb_read(&msg->hdr, sizeof(msg->hdr));
761 if (err) {
762 kfree(msg);
763 goto out;
764 }
765
766 body = kmalloc(msg->hdr.len + 1, GFP_KERNEL);
767 if (body == NULL) {
768 kfree(msg);
769 err = -ENOMEM;
770 goto out;
771 }
772
773 err = xb_read(body, msg->hdr.len);
774 if (err) {
775 kfree(body);
776 kfree(msg);
777 goto out;
778 }
779 body[msg->hdr.len] = '\0';
780
781 if (msg->hdr.type == XS_WATCH_EVENT) {
782 msg->u.watch.vec = split(body, msg->hdr.len,
783 &msg->u.watch.vec_size);
784 if (IS_ERR(msg->u.watch.vec)) {
785 kfree(msg);
786 err = PTR_ERR(msg->u.watch.vec);
787 goto out;
788 }
789
790 spin_lock(&watches_lock);
791 msg->u.watch.handle = find_watch(
792 msg->u.watch.vec[XS_WATCH_TOKEN]);
793 if (msg->u.watch.handle != NULL) {
794 spin_lock(&watch_events_lock);
795 list_add_tail(&msg->list, &watch_events);
796 wake_up(&watch_events_waitq);
797 spin_unlock(&watch_events_lock);
798 } else {
799 kfree(msg->u.watch.vec);
800 kfree(msg);
801 }
802 spin_unlock(&watches_lock);
803 } else {
804 msg->u.reply.body = body;
805 spin_lock(&xs_state.reply_lock);
806 list_add_tail(&msg->list, &xs_state.reply_list);
807 spin_unlock(&xs_state.reply_lock);
808 wake_up(&xs_state.reply_waitq);
809 }
810
811 out:
812 mutex_unlock(&xs_state.response_mutex);
813 return err;
814}
815
816static int xenbus_thread(void *unused)
817{
818 int err;
819
820 for (;;) {
821 err = process_msg();
822 if (err)
823 printk(KERN_WARNING "XENBUS error %d while reading "
824 "message\n", err);
825 if (kthread_should_stop())
826 break;
827 }
828
829 return 0;
830}
831
832int xs_init(void)
833{
834 int err;
835 struct task_struct *task;
836
837 INIT_LIST_HEAD(&xs_state.reply_list);
838 spin_lock_init(&xs_state.reply_lock);
839 init_waitqueue_head(&xs_state.reply_waitq);
840
841 mutex_init(&xs_state.request_mutex);
842 mutex_init(&xs_state.response_mutex);
843 init_rwsem(&xs_state.transaction_mutex);
844 init_rwsem(&xs_state.watch_mutex);
845
846 /* Initialize the shared memory rings to talk to xenstored */
847 err = xb_init_comms();
848 if (err)
849 return err;
850
851 task = kthread_run(xenwatch_thread, NULL, "xenwatch");
852 if (IS_ERR(task))
853 return PTR_ERR(task);
854 xenwatch_pid = task->pid;
855
856 task = kthread_run(xenbus_thread, NULL, "xenbus");
857 if (IS_ERR(task))
858 return PTR_ERR(task);
859
860 return 0;
861}