Remove old lguest bus and drivers.

This gets rid of the lguest bus, drivers and DMA mechanism, to make way for a generic virtio mechanism. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
author: Rusty Russell <rusty@rustcorp.com.au> 2007-10-21 21:20:02 -0400
committer: Rusty Russell <rusty@rustcorp.com.au> 2007-10-23 01:49:55 -0400
commit: 0ca49ca946409f87a8cd0b14d5acb6dea58de6f3 (patch)
tree: 5f5927f1b0bf46998f4132d3628ae4c51e5ccf5a /drivers/block
parent: 0a8a69dd77ddbd4513b21363021ecde7e1025502 (diff)
2 files changed, 0 insertions, 422 deletions
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index d199eba7a080..7691505a2e12 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -32,4 +32,3 @@ obj-$(CONFIG_BLK_DEV_SX8)	+= sx8.o
 obj-$(CONFIG_BLK_DEV_UB)        += ub.o
 obj-$(CONFIG_XEN_BLKDEV_FRONTEND)       += xen-blkfront.o
-obj-$(CONFIG_LGUEST_BLOCK)      += lguest_blk.o
diff --git a/drivers/block/lguest_blk.c b/drivers/block/lguest_blk.c
deleted file mode 100644
index fa8e42341b87..000000000000
--- a/drivers/block/lguest_blk.c
+++ /dev/null
@@ -1,421 +0,0 @@
-/*D:400
- * The Guest block driver
- *
- * This is a simple block driver, which appears as /dev/lgba, lgbb, lgbc etc.
- * The mechanism is simple: we place the information about the request in the
- * device page, then use SEND_DMA (containing the data for a write, or an empty
- * "ping" DMA for a read).
- :*/
-/* Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-//#define DEBUG
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/blkdev.h>
-#include <linux/interrupt.h>
-#include <linux/lguest_bus.h>
-static char next_block_index = 'a';
-/*D:420 Here is the structure which holds all the information we need about
- * each Guest block device.
- *
- * I'm sure at this stage, you're wondering "hey, where was the adventure I was
- * promised?" and thinking "Rusty sucks, I shall say nasty things about him on
- * my blog".  I think Real adventures have boring bits, too, and you're in the
- * middle of one.  But it gets better.  Just not quite yet. */
-struct blockdev
-{
-        /* The block queue infrastructure wants a spinlock: it is held while it
-         * calls our block request function.  We grab it in our interrupt
-         * handler so the responses don't mess with new requests. */
-        spinlock_t lock;
-        /* The disk structure registered with kernel. */
-        struct gendisk *disk;
-        /* The major device number for this disk, and the interrupt.  We only
-         * really keep them here for completeness; we'd need them if we
-         * supported device unplugging. */
-        int major;
-        int irq;
-        /* The physical address of this device's memory page */
-        unsigned long phys_addr;
-        /* The mapped memory page for convenient acces. */
-        struct lguest_block_page *lb_page;
-        /* We only have a single request outstanding at a time: this is it. */
-        struct lguest_dma dma;
-        struct request *req;
-};
-/*D:495 We originally used end_request() throughout the driver, but it turns
- * out that end_request() is deprecated, and doesn't actually end the request
- * (which seems like a good reason to deprecate it!).  It simply ends the first
- * bio.  So if we had 3 bios in a "struct request" we would do all 3,
- * end_request(), do 2, end_request(), do 1 and end_request(): twice as much
- * work as we needed to do.
- *
- * This reinforced to me that I do not understand the block layer.
- *
- * Nonetheless, Jens Axboe gave me this nice helper to end all chunks of a
- * request.  This improved disk speed by 130%. */
-static void end_entire_request(struct request *req, int uptodate)
-{
-        if (end_that_request_first(req, uptodate, req->hard_nr_sectors))
-                BUG();
-        add_disk_randomness(req->rq_disk);
-        blkdev_dequeue_request(req);
-        end_that_request_last(req, uptodate);
-}
-/* I'm told there are only two stories in the world worth telling: love and
- * hate.  So there used to be a love scene here like this:
- *
- *  Launcher:   We could make beautiful I/O together, you and I.
- *  Guest:      My, that's a big disk!
- *
- * Unfortunately, it was just too raunchy for our otherwise-gentle tale. */
-/*D:490 This is the interrupt handler, called when a block read or write has
- * been completed for us. */
-static irqreturn_t lgb_irq(int irq, void *_bd)
-{
-        /* We handed our "struct blockdev" as the argument to request_irq(), so
-         * it is passed through to us here.  This tells us which device we're
-         * dealing with in case we have more than one. */
-        struct blockdev *bd = _bd;
-        unsigned long flags;
-        /* We weren't doing anything?  Strange, but could happen if we shared
-         * interrupts (we don't!). */
-        if (!bd->req) {
-                pr_debug("No work!\n");
-                return IRQ_NONE;
-        }
-        /* Not done yet?  That's equally strange. */
-        if (!bd->lb_page->result) {
-                pr_debug("No result!\n");
-                return IRQ_NONE;
-        }
-        /* We have to grab the lock before ending the request. */
-        spin_lock_irqsave(&bd->lock, flags);
-        /* "result" is 1 for success, 2 for failure: end_entire_request() wants
-         * to know whether this succeeded or not. */
-        end_entire_request(bd->req, bd->lb_page->result == 1);
-        /* Clear out request, it's done. */
-        bd->req = NULL;
-        /* Reset incoming DMA for next time. */
-        bd->dma.used_len = 0;
-        /* Ready for more reads or writes */
-        blk_start_queue(bd->disk->queue);
-        spin_unlock_irqrestore(&bd->lock, flags);
-        /* The interrupt was for us, we dealt with it. */
-        return IRQ_HANDLED;
-}
-/*D:480 The block layer's "struct request" contains a number of "struct bio"s,
- * each of which contains "struct bio_vec"s, each of which contains a page, an
- * offset and a length.
- *
- * Fortunately there are iterators to help us walk through the "struct
- * request".  Even more fortunately, there were plenty of places to steal the
- * code from.  We pack the "struct request" into our "struct lguest_dma" and
- * return the total length. */
-static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma)
-{
-        unsigned int i = 0, len = 0;
-        struct req_iterator iter;
-        struct bio_vec *bvec;
-        rq_for_each_segment(bvec, req, iter) {
-                /* We told the block layer not to give us too many. */
-                BUG_ON(i == LGUEST_MAX_DMA_SECTIONS);
-                /* If we had a zero-length segment, it would look like
-                 * the end of the data referred to by the "struct
-                 * lguest_dma", so make sure that doesn't happen. */
-                BUG_ON(!bvec->bv_len);
-                /* Convert page & offset to a physical address */
-                dma->addr[i] = page_to_phys(bvec->bv_page)
-                        + bvec->bv_offset;
-                dma->len[i] = bvec->bv_len;
-                len += bvec->bv_len;
-                i++;
-        }
-        /* If the array isn't full, we mark the end with a 0 length */
-        if (i < LGUEST_MAX_DMA_SECTIONS)
-                dma->len[i] = 0;
-        return len;
-}
-/* This creates an empty DMA, useful for prodding the Host without sending data
- * (ie. when we want to do a read) */
-static void empty_dma(struct lguest_dma *dma)
-{
-        dma->len[0] = 0;
-}
-/*D:470 Setting up a request is fairly easy: */
-static void setup_req(struct blockdev *bd,
-                      int type, struct request *req, struct lguest_dma *dma)
-{
-        /* The type is 1 (write) or 0 (read). */
-        bd->lb_page->type = type;
-        /* The sector on disk where the read or write starts. */
-        bd->lb_page->sector = req->sector;
-        /* The result is initialized to 0 (unfinished). */
-        bd->lb_page->result = 0;
-        /* The current request (so we can end it in the interrupt handler). */
-        bd->req = req;
-        /* The number of bytes: returned as a side-effect of req_to_dma(),
-         * which packs the block layer's "struct request" into our "struct
-         * lguest_dma" */
-        bd->lb_page->bytes = req_to_dma(req, dma);
-}
-/*D:450 Write is pretty straightforward: we pack the request into a "struct
- * lguest_dma", then use SEND_DMA to send the request. */
-static void do_write(struct blockdev *bd, struct request *req)
-{
-        struct lguest_dma send;
-        pr_debug("lgb: WRITE sector %li\n", (long)req->sector);
-        setup_req(bd, 1, req, &send);
-        lguest_send_dma(bd->phys_addr, &send);
-}
-/* Read is similar to write, except we pack the request into our receive
- * "struct lguest_dma" and send through an empty DMA just to tell the Host that
- * there's a request pending. */
-static void do_read(struct blockdev *bd, struct request *req)
-{
-        struct lguest_dma ping;
-        pr_debug("lgb: READ sector %li\n", (long)req->sector);
-        setup_req(bd, 0, req, &bd->dma);
-        empty_dma(&ping);
-        lguest_send_dma(bd->phys_addr, &ping);
-}
-/*D:440 This where requests come in: we get handed the request queue and are
- * expected to pull a "struct request" off it until we've finished them or
- * we're waiting for a reply: */
-static void do_lgb_request(struct request_queue *q)
-{
-        struct blockdev *bd;
-        struct request *req;
-again:
-        /* This sometimes returns NULL even on the very first time around.  I
-         * wonder if it's something to do with letting elves handle the request
-         * queue... */
-        req = elv_next_request(q);
-        if (!req)
-                return;
-        /* We attached the struct blockdev to the disk: get it back */
-        bd = req->rq_disk->private_data;
-        /* Sometimes we get repeated requests after blk_stop_queue(), but we
-         * can only handle one at a time. */
-        if (bd->req)
-                return;
-        /* We only do reads and writes: no tricky business! */
-        if (!blk_fs_request(req)) {
-                pr_debug("Got non-command 0x%08x\n", req->cmd_type);
-                req->errors++;
-                end_entire_request(req, 0);
-                goto again;
-        }
-        if (rq_data_dir(req) == WRITE)
-                do_write(bd, req);
-        else
-                do_read(bd, req);
-        /* We've put out the request, so stop any more coming in until we get
-         * an interrupt, which takes us to lgb_irq() to re-enable the queue. */
-        blk_stop_queue(q);
-}
-/*D:430 This is the "struct block_device_operations" we attach to the disk at
- * the end of lguestblk_probe().  It doesn't seem to want much. */
-static struct block_device_operations lguestblk_fops = {
-        .owner = THIS_MODULE,
-};
-/*D:425 Setting up a disk device seems to involve a lot of code.  I'm not sure
- * quite why.  I do know that the IDE code sent two or three of the maintainers
- * insane, perhaps this is the fringe of the same disease?
- *
- * As in the console code, the probe function gets handed the generic
- * lguest_device from lguest_bus.c: */
-static int lguestblk_probe(struct lguest_device *lgdev)
-{
-        struct blockdev *bd;
-        int err;
-        int irqflags = IRQF_SHARED;
-        /* First we allocate our own "struct blockdev" and initialize the easy
-         * fields. */
-        bd = kmalloc(sizeof(*bd), GFP_KERNEL);
-        if (!bd)
-                return -ENOMEM;
-        spin_lock_init(&bd->lock);
-        bd->irq = lgdev_irq(lgdev);
-        bd->req = NULL;
-        bd->dma.used_len = 0;
-        bd->dma.len[0] = 0;
-        /* The descriptor in the lguest_devices array provided by the Host
-         * gives the Guest the physical page number of the device's page. */
-        bd->phys_addr = (lguest_devices[lgdev->index].pfn << PAGE_SHIFT);
-        /* We use lguest_map() to get a pointer to the device page */
-        bd->lb_page = lguest_map(bd->phys_addr, 1);
-        if (!bd->lb_page) {
-                err = -ENOMEM;
-                goto out_free_bd;
-        }
-        /* We need a major device number: 0 means "assign one dynamically". */
-        bd->major = register_blkdev(0, "lguestblk");
-        if (bd->major < 0) {
-                err = bd->major;
-                goto out_unmap;
-        }
-        /* This allocates a "struct gendisk" where we pack all the information
-         * about the disk which the rest of Linux sees.  The argument is the
-         * number of minor devices desired: we need one minor for the main
-         * disk, and one for each partition.  Of course, we can't possibly know
-         * how many partitions are on the disk (add_disk does that).
-         */
-        bd->disk = alloc_disk(16);
-        if (!bd->disk) {
-                err = -ENOMEM;
-                goto out_unregister_blkdev;
-        }
-        /* Every disk needs a queue for requests to come in: we set up the
-         * queue with a callback function (the core of our driver) and the lock
-         * to use. */
-        bd->disk->queue = blk_init_queue(do_lgb_request, &bd->lock);
-        if (!bd->disk->queue) {
-                err = -ENOMEM;
-                goto out_put_disk;
-        }
-        /* We can only handle a certain number of pointers in our SEND_DMA
-         * call, so we set that with blk_queue_max_hw_segments().  This is not
-         * to be confused with blk_queue_max_phys_segments() of course!  I
-         * know, who could possibly confuse the two?
-         *
-         * Well, it's simple to tell them apart: this one seems to work and the
-         * other one didn't. */
-        blk_queue_max_hw_segments(bd->disk->queue, LGUEST_MAX_DMA_SECTIONS);
-        /* Due to technical limitations of our Host (and simple coding) we
-         * can't have a single buffer which crosses a page boundary.  Tell it
-         * here.  This means that our maximum request size is 16
-         * (LGUEST_MAX_DMA_SECTIONS) pages. */
-        blk_queue_segment_boundary(bd->disk->queue, PAGE_SIZE-1);
-        /* We name our disk: this becomes the device name when udev does its
-         * magic thing and creates the device node, such as /dev/lgba.
-         * next_block_index is a global which starts at 'a'.  Unfortunately
-         * this simple increment logic means that the 27th disk will be called
-         * "/dev/lgb{".  In that case, I recommend having at least 29 disks, so
-         * your /dev directory will be balanced. */
-        sprintf(bd->disk->disk_name, "lgb%c", next_block_index++);
-        /* We look to the device descriptor again to see if this device's
-         * interrupts are expected to be random.  If they are, we tell the irq
-         * subsystem.  At the moment this bit is always set. */
-        if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS)
-                irqflags |= IRQF_SAMPLE_RANDOM;
-        /* Now we have the name and irqflags, we can request the interrupt; we
-         * give it the "struct blockdev" we have set up to pass to lgb_irq()
-         * when there is an interrupt. */
-        err = request_irq(bd->irq, lgb_irq, irqflags, bd->disk->disk_name, bd);
-        if (err)
-                goto out_cleanup_queue;
-        /* We bind our one-entry DMA pool to the key for this block device so
-         * the Host can reply to our requests.  The key is equal to the
-         * physical address of the device's page, which is conveniently
-         * unique. */
-        err = lguest_bind_dma(bd->phys_addr, &bd->dma, 1, bd->irq);
-        if (err)
-                goto out_free_irq;
-        /* We finish our disk initialization and add the disk to the system. */
-        bd->disk->major = bd->major;
-        bd->disk->first_minor = 0;
-        bd->disk->private_data = bd;
-        bd->disk->fops = &lguestblk_fops;
-        /* This is initialized to the disk size by the Launcher. */
-        set_capacity(bd->disk, bd->lb_page->num_sectors);
-        add_disk(bd->disk);
-        printk(KERN_INFO "%s: device %i at major %d\n",
-               bd->disk->disk_name, lgdev->index, bd->major);
-        /* We don't need to keep the "struct blockdev" around, but if we ever
-         * implemented device removal, we'd need this. */
-        lgdev->private = bd;
-        return 0;
-out_free_irq:
-        free_irq(bd->irq, bd);
-out_cleanup_queue:
-        blk_cleanup_queue(bd->disk->queue);
-out_put_disk:
-        put_disk(bd->disk);
-out_unregister_blkdev:
-        unregister_blkdev(bd->major, "lguestblk");
-out_unmap:
-        lguest_unmap(bd->lb_page);
-out_free_bd:
-        kfree(bd);
-        return err;
-}
-/*D:410 The boilerplate code for registering the lguest block driver is just
- * like the console: */
-static struct lguest_driver lguestblk_drv = {
-        .name = "lguestblk",
-        .owner = THIS_MODULE,
-        .device_type = LGUEST_DEVICE_T_BLOCK,
-        .probe = lguestblk_probe,
-};
-static __init int lguestblk_init(void)
-{
-        return register_lguest_driver(&lguestblk_drv);
-}
-module_init(lguestblk_init);
-MODULE_DESCRIPTION("Lguest block driver");
-MODULE_LICENSE("GPL");
author	Rusty Russell <rusty@rustcorp.com.au>	2007-10-21 21:20:02 -0400
committer	Rusty Russell <rusty@rustcorp.com.au>	2007-10-23 01:49:55 -0400
commit	0ca49ca946409f87a8cd0b14d5acb6dea58de6f3 (patch)
tree	5f5927f1b0bf46998f4132d3628ae4c51e5ccf5a /drivers/block
parent	0a8a69dd77ddbd4513b21363021ecde7e1025502 (diff)

diff --git a/drivers/block/Makefile b/drivers/block/Makefile index d199eba7a080..7691505a2e12 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile
@@ -32,4 +32,3 @@ obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
32	obj-$(CONFIG_BLK_DEV_UB) += ub.o	32	obj-$(CONFIG_BLK_DEV_UB) += ub.o
33		33
34	obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o	34	obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
35	obj-$(CONFIG_LGUEST_BLOCK) += lguest_blk.o


diff --git a/drivers/block/lguest_blk.c b/drivers/block/lguest_blk.c deleted file mode 100644 index fa8e42341b87..000000000000 --- a/drivers/block/lguest_blk.c +++ /dev/null
@@ -1,421 +0,0 @@
1	/*D:400
2	* The Guest block driver
3	*
4	* This is a simple block driver, which appears as /dev/lgba, lgbb, lgbc etc.
5	* The mechanism is simple: we place the information about the request in the
6	* device page, then use SEND_DMA (containing the data for a write, or an empty
7	* "ping" DMA for a read).
8	:*/
9	/* Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
10	*
11	* This program is free software; you can redistribute it and/or modify
12	* it under the terms of the GNU General Public License as published by
13	* the Free Software Foundation; either version 2 of the License, or
14	* (at your option) any later version.
15	*
16	* This program is distributed in the hope that it will be useful,
17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	* GNU General Public License for more details.
20	*
21	* You should have received a copy of the GNU General Public License
22	* along with this program; if not, write to the Free Software
23	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24	*/
25	//#define DEBUG
26	#include <linux/init.h>
27	#include <linux/types.h>
28	#include <linux/blkdev.h>
29	#include <linux/interrupt.h>
30	#include <linux/lguest_bus.h>
31
32	static char next_block_index = 'a';
33
34	/*D:420 Here is the structure which holds all the information we need about
35	* each Guest block device.
36	*
37	* I'm sure at this stage, you're wondering "hey, where was the adventure I was
38	* promised?" and thinking "Rusty sucks, I shall say nasty things about him on
39	* my blog". I think Real adventures have boring bits, too, and you're in the
40	* middle of one. But it gets better. Just not quite yet. */
41	struct blockdev
42	{
43	/* The block queue infrastructure wants a spinlock: it is held while it
44	* calls our block request function. We grab it in our interrupt
45	* handler so the responses don't mess with new requests. */
46	spinlock_t lock;
47
48	/* The disk structure registered with kernel. */
49	struct gendisk *disk;
50
51	/* The major device number for this disk, and the interrupt. We only
52	* really keep them here for completeness; we'd need them if we
53	* supported device unplugging. */
54	int major;
55	int irq;
56
57	/* The physical address of this device's memory page */
58	unsigned long phys_addr;
59	/* The mapped memory page for convenient acces. */
60	struct lguest_block_page *lb_page;
61
62	/* We only have a single request outstanding at a time: this is it. */
63	struct lguest_dma dma;
64	struct request *req;
65	};
66
67	/*D:495 We originally used end_request() throughout the driver, but it turns
68	* out that end_request() is deprecated, and doesn't actually end the request
69	* (which seems like a good reason to deprecate it!). It simply ends the first
70	* bio. So if we had 3 bios in a "struct request" we would do all 3,
71	* end_request(), do 2, end_request(), do 1 and end_request(): twice as much
72	* work as we needed to do.
73	*
74	* This reinforced to me that I do not understand the block layer.
75	*
76	* Nonetheless, Jens Axboe gave me this nice helper to end all chunks of a
77	* request. This improved disk speed by 130%. */
78	static void end_entire_request(struct request *req, int uptodate)
79	{
80	if (end_that_request_first(req, uptodate, req->hard_nr_sectors))
81	BUG();
82	add_disk_randomness(req->rq_disk);
83	blkdev_dequeue_request(req);
84	end_that_request_last(req, uptodate);
85	}
86
87	/* I'm told there are only two stories in the world worth telling: love and
88	* hate. So there used to be a love scene here like this:
89	*
90	* Launcher: We could make beautiful I/O together, you and I.
91	* Guest: My, that's a big disk!
92	*
93	* Unfortunately, it was just too raunchy for our otherwise-gentle tale. */
94
95	/*D:490 This is the interrupt handler, called when a block read or write has
96	* been completed for us. */
97	static irqreturn_t lgb_irq(int irq, void *_bd)
98	{
99	/* We handed our "struct blockdev" as the argument to request_irq(), so
100	* it is passed through to us here. This tells us which device we're
101	* dealing with in case we have more than one. */
102	struct blockdev *bd = _bd;
103	unsigned long flags;
104
105	/* We weren't doing anything? Strange, but could happen if we shared
106	* interrupts (we don't!). */
107	if (!bd->req) {
108	pr_debug("No work!\n");
109	return IRQ_NONE;
110	}
111
112	/* Not done yet? That's equally strange. */
113	if (!bd->lb_page->result) {
114	pr_debug("No result!\n");
115	return IRQ_NONE;
116	}
117
118	/* We have to grab the lock before ending the request. */
119	spin_lock_irqsave(&bd->lock, flags);
120	/* "result" is 1 for success, 2 for failure: end_entire_request() wants
121	* to know whether this succeeded or not. */
122	end_entire_request(bd->req, bd->lb_page->result == 1);
123	/* Clear out request, it's done. */
124	bd->req = NULL;
125	/* Reset incoming DMA for next time. */
126	bd->dma.used_len = 0;
127	/* Ready for more reads or writes */
128	blk_start_queue(bd->disk->queue);
129	spin_unlock_irqrestore(&bd->lock, flags);
130
131	/* The interrupt was for us, we dealt with it. */
132	return IRQ_HANDLED;
133	}
134
135	/*D:480 The block layer's "struct request" contains a number of "struct bio"s,
136	* each of which contains "struct bio_vec"s, each of which contains a page, an
137	* offset and a length.
138	*
139	* Fortunately there are iterators to help us walk through the "struct
140	* request". Even more fortunately, there were plenty of places to steal the
141	* code from. We pack the "struct request" into our "struct lguest_dma" and
142	* return the total length. */
143	static unsigned int req_to_dma(struct request req, struct lguest_dma dma)
144	{
145	unsigned int i = 0, len = 0;
146	struct req_iterator iter;
147	struct bio_vec *bvec;
148
149	rq_for_each_segment(bvec, req, iter) {
150	/* We told the block layer not to give us too many. */
151	BUG_ON(i == LGUEST_MAX_DMA_SECTIONS);
152	/* If we had a zero-length segment, it would look like
153	* the end of the data referred to by the "struct
154	* lguest_dma", so make sure that doesn't happen. */
155	BUG_ON(!bvec->bv_len);
156	/* Convert page & offset to a physical address */
157	dma->addr[i] = page_to_phys(bvec->bv_page)
158	+ bvec->bv_offset;
159	dma->len[i] = bvec->bv_len;
160	len += bvec->bv_len;
161	i++;
162	}
163	/* If the array isn't full, we mark the end with a 0 length */
164	if (i < LGUEST_MAX_DMA_SECTIONS)
165	dma->len[i] = 0;
166	return len;
167	}
168
169	/* This creates an empty DMA, useful for prodding the Host without sending data
170	* (ie. when we want to do a read) */
171	static void empty_dma(struct lguest_dma *dma)
172	{
173	dma->len[0] = 0;
174	}
175
176	/D:470 Setting up a request is fairly easy: /
177	static void setup_req(struct blockdev *bd,
178	int type, struct request req, struct lguest_dma dma)
179	{
180	/* The type is 1 (write) or 0 (read). */
181	bd->lb_page->type = type;
182	/* The sector on disk where the read or write starts. */
183	bd->lb_page->sector = req->sector;
184	/* The result is initialized to 0 (unfinished). */
185	bd->lb_page->result = 0;
186	/* The current request (so we can end it in the interrupt handler). */
187	bd->req = req;
188	/* The number of bytes: returned as a side-effect of req_to_dma(),
189	* which packs the block layer's "struct request" into our "struct
190	* lguest_dma" */
191	bd->lb_page->bytes = req_to_dma(req, dma);
192	}
193
194	/*D:450 Write is pretty straightforward: we pack the request into a "struct
195	* lguest_dma", then use SEND_DMA to send the request. */
196	static void do_write(struct blockdev bd, struct request req)
197	{
198	struct lguest_dma send;
199
200	pr_debug("lgb: WRITE sector %li\n", (long)req->sector);
201	setup_req(bd, 1, req, &send);
202
203	lguest_send_dma(bd->phys_addr, &send);
204	}
205
206	/* Read is similar to write, except we pack the request into our receive
207	* "struct lguest_dma" and send through an empty DMA just to tell the Host that
208	* there's a request pending. */
209	static void do_read(struct blockdev bd, struct request req)
210	{
211	struct lguest_dma ping;
212
213	pr_debug("lgb: READ sector %li\n", (long)req->sector);
214	setup_req(bd, 0, req, &bd->dma);
215
216	empty_dma(&ping);
217	lguest_send_dma(bd->phys_addr, &ping);
218	}
219
220	/*D:440 This where requests come in: we get handed the request queue and are
221	* expected to pull a "struct request" off it until we've finished them or
222	* we're waiting for a reply: */
223	static void do_lgb_request(struct request_queue *q)
224	{
225	struct blockdev *bd;
226	struct request *req;
227
228	again:
229	/* This sometimes returns NULL even on the very first time around. I
230	* wonder if it's something to do with letting elves handle the request
231	* queue... */
232	req = elv_next_request(q);
233	if (!req)
234	return;
235
236	/* We attached the struct blockdev to the disk: get it back */
237	bd = req->rq_disk->private_data;
238	/* Sometimes we get repeated requests after blk_stop_queue(), but we
239	* can only handle one at a time. */
240	if (bd->req)
241	return;
242
243	/* We only do reads and writes: no tricky business! */
244	if (!blk_fs_request(req)) {
245	pr_debug("Got non-command 0x%08x\n", req->cmd_type);
246	req->errors++;
247	end_entire_request(req, 0);
248	goto again;
249	}
250
251	if (rq_data_dir(req) == WRITE)
252	do_write(bd, req);
253	else
254	do_read(bd, req);
255
256	/* We've put out the request, so stop any more coming in until we get
257	* an interrupt, which takes us to lgb_irq() to re-enable the queue. */
258	blk_stop_queue(q);
259	}
260
261	/*D:430 This is the "struct block_device_operations" we attach to the disk at
262	* the end of lguestblk_probe(). It doesn't seem to want much. */
263	static struct block_device_operations lguestblk_fops = {
264	.owner = THIS_MODULE,
265	};
266
267	/*D:425 Setting up a disk device seems to involve a lot of code. I'm not sure
268	* quite why. I do know that the IDE code sent two or three of the maintainers
269	* insane, perhaps this is the fringe of the same disease?
270	*
271	* As in the console code, the probe function gets handed the generic
272	* lguest_device from lguest_bus.c: */
273	static int lguestblk_probe(struct lguest_device *lgdev)
274	{
275	struct blockdev *bd;
276	int err;
277	int irqflags = IRQF_SHARED;
278
279	/* First we allocate our own "struct blockdev" and initialize the easy
280	* fields. */
281	bd = kmalloc(sizeof(*bd), GFP_KERNEL);
282	if (!bd)
283	return -ENOMEM;
284
285	spin_lock_init(&bd->lock);
286	bd->irq = lgdev_irq(lgdev);
287	bd->req = NULL;
288	bd->dma.used_len = 0;
289	bd->dma.len[0] = 0;
290	/* The descriptor in the lguest_devices array provided by the Host
291	* gives the Guest the physical page number of the device's page. */
292	bd->phys_addr = (lguest_devices[lgdev->index].pfn << PAGE_SHIFT);
293
294	/* We use lguest_map() to get a pointer to the device page */
295	bd->lb_page = lguest_map(bd->phys_addr, 1);
296	if (!bd->lb_page) {
297	err = -ENOMEM;
298	goto out_free_bd;
299	}
300
301	/* We need a major device number: 0 means "assign one dynamically". */
302	bd->major = register_blkdev(0, "lguestblk");
303	if (bd->major < 0) {
304	err = bd->major;
305	goto out_unmap;
306	}
307
308	/* This allocates a "struct gendisk" where we pack all the information
309	* about the disk which the rest of Linux sees. The argument is the
310	* number of minor devices desired: we need one minor for the main
311	* disk, and one for each partition. Of course, we can't possibly know
312	* how many partitions are on the disk (add_disk does that).
313	*/
314	bd->disk = alloc_disk(16);
315	if (!bd->disk) {
316	err = -ENOMEM;
317	goto out_unregister_blkdev;
318	}
319
320	/* Every disk needs a queue for requests to come in: we set up the
321	* queue with a callback function (the core of our driver) and the lock
322	* to use. */
323	bd->disk->queue = blk_init_queue(do_lgb_request, &bd->lock);
324	if (!bd->disk->queue) {
325	err = -ENOMEM;
326	goto out_put_disk;
327	}
328
329	/* We can only handle a certain number of pointers in our SEND_DMA
330	* call, so we set that with blk_queue_max_hw_segments(). This is not
331	* to be confused with blk_queue_max_phys_segments() of course! I
332	* know, who could possibly confuse the two?
333	*
334	* Well, it's simple to tell them apart: this one seems to work and the
335	* other one didn't. */
336	blk_queue_max_hw_segments(bd->disk->queue, LGUEST_MAX_DMA_SECTIONS);
337
338	/* Due to technical limitations of our Host (and simple coding) we
339	* can't have a single buffer which crosses a page boundary. Tell it
340	* here. This means that our maximum request size is 16
341	* (LGUEST_MAX_DMA_SECTIONS) pages. */
342	blk_queue_segment_boundary(bd->disk->queue, PAGE_SIZE-1);
343
344	/* We name our disk: this becomes the device name when udev does its
345	* magic thing and creates the device node, such as /dev/lgba.
346	* next_block_index is a global which starts at 'a'. Unfortunately
347	* this simple increment logic means that the 27th disk will be called
348	* "/dev/lgb{". In that case, I recommend having at least 29 disks, so
349	* your /dev directory will be balanced. */
350	sprintf(bd->disk->disk_name, "lgb%c", next_block_index++);
351
352	/* We look to the device descriptor again to see if this device's
353	* interrupts are expected to be random. If they are, we tell the irq
354	* subsystem. At the moment this bit is always set. */
355	if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS)
356	irqflags \|= IRQF_SAMPLE_RANDOM;
357
358	/* Now we have the name and irqflags, we can request the interrupt; we
359	* give it the "struct blockdev" we have set up to pass to lgb_irq()
360	* when there is an interrupt. */
361	err = request_irq(bd->irq, lgb_irq, irqflags, bd->disk->disk_name, bd);
362	if (err)
363	goto out_cleanup_queue;
364
365	/* We bind our one-entry DMA pool to the key for this block device so
366	* the Host can reply to our requests. The key is equal to the
367	* physical address of the device's page, which is conveniently
368	* unique. */
369	err = lguest_bind_dma(bd->phys_addr, &bd->dma, 1, bd->irq);
370	if (err)
371	goto out_free_irq;
372
373	/* We finish our disk initialization and add the disk to the system. */
374	bd->disk->major = bd->major;
375	bd->disk->first_minor = 0;
376	bd->disk->private_data = bd;
377	bd->disk->fops = &lguestblk_fops;
378	/* This is initialized to the disk size by the Launcher. */
379	set_capacity(bd->disk, bd->lb_page->num_sectors);
380	add_disk(bd->disk);
381
382	printk(KERN_INFO "%s: device %i at major %d\n",
383	bd->disk->disk_name, lgdev->index, bd->major);
384
385	/* We don't need to keep the "struct blockdev" around, but if we ever
386	* implemented device removal, we'd need this. */
387	lgdev->private = bd;
388	return 0;
389
390	out_free_irq:
391	free_irq(bd->irq, bd);
392	out_cleanup_queue:
393	blk_cleanup_queue(bd->disk->queue);
394	out_put_disk:
395	put_disk(bd->disk);
396	out_unregister_blkdev:
397	unregister_blkdev(bd->major, "lguestblk");
398	out_unmap:
399	lguest_unmap(bd->lb_page);
400	out_free_bd:
401	kfree(bd);
402	return err;
403	}
404
405	/*D:410 The boilerplate code for registering the lguest block driver is just
406	* like the console: */
407	static struct lguest_driver lguestblk_drv = {
408	.name = "lguestblk",
409	.owner = THIS_MODULE,
410	.device_type = LGUEST_DEVICE_T_BLOCK,
411	.probe = lguestblk_probe,
412	};
413
414	static __init int lguestblk_init(void)
415	{
416	return register_lguest_driver(&lguestblk_drv);
417	}
418	module_init(lguestblk_init);
419
420	MODULE_DESCRIPTION("Lguest block driver");
421	MODULE_LICENSE("GPL");