diff options
Diffstat (limited to 'drivers/block/lguest_blk.c')
| -rw-r--r-- | drivers/block/lguest_blk.c | 421 |
1 files changed, 0 insertions, 421 deletions
diff --git a/drivers/block/lguest_blk.c b/drivers/block/lguest_blk.c deleted file mode 100644 index fa8e42341b87..000000000000 --- a/drivers/block/lguest_blk.c +++ /dev/null | |||
| @@ -1,421 +0,0 @@ | |||
| 1 | /*D:400 | ||
| 2 | * The Guest block driver | ||
| 3 | * | ||
| 4 | * This is a simple block driver, which appears as /dev/lgba, lgbb, lgbc etc. | ||
| 5 | * The mechanism is simple: we place the information about the request in the | ||
| 6 | * device page, then use SEND_DMA (containing the data for a write, or an empty | ||
| 7 | * "ping" DMA for a read). | ||
| 8 | :*/ | ||
| 9 | /* Copyright 2006 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation | ||
| 10 | * | ||
| 11 | * This program is free software; you can redistribute it and/or modify | ||
| 12 | * it under the terms of the GNU General Public License as published by | ||
| 13 | * the Free Software Foundation; either version 2 of the License, or | ||
| 14 | * (at your option) any later version. | ||
| 15 | * | ||
| 16 | * This program is distributed in the hope that it will be useful, | ||
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 19 | * GNU General Public License for more details. | ||
| 20 | * | ||
| 21 | * You should have received a copy of the GNU General Public License | ||
| 22 | * along with this program; if not, write to the Free Software | ||
| 23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
| 24 | */ | ||
| 25 | //#define DEBUG | ||
| 26 | #include <linux/init.h> | ||
| 27 | #include <linux/types.h> | ||
| 28 | #include <linux/blkdev.h> | ||
| 29 | #include <linux/interrupt.h> | ||
| 30 | #include <linux/lguest_bus.h> | ||
| 31 | |||
| 32 | static char next_block_index = 'a'; | ||
| 33 | |||
| 34 | /*D:420 Here is the structure which holds all the information we need about | ||
| 35 | * each Guest block device. | ||
| 36 | * | ||
| 37 | * I'm sure at this stage, you're wondering "hey, where was the adventure I was | ||
| 38 | * promised?" and thinking "Rusty sucks, I shall say nasty things about him on | ||
| 39 | * my blog". I think Real adventures have boring bits, too, and you're in the | ||
| 40 | * middle of one. But it gets better. Just not quite yet. */ | ||
| 41 | struct blockdev | ||
| 42 | { | ||
| 43 | /* The block queue infrastructure wants a spinlock: it is held while it | ||
| 44 | * calls our block request function. We grab it in our interrupt | ||
| 45 | * handler so the responses don't mess with new requests. */ | ||
| 46 | spinlock_t lock; | ||
| 47 | |||
| 48 | /* The disk structure registered with kernel. */ | ||
| 49 | struct gendisk *disk; | ||
| 50 | |||
| 51 | /* The major device number for this disk, and the interrupt. We only | ||
| 52 | * really keep them here for completeness; we'd need them if we | ||
| 53 | * supported device unplugging. */ | ||
| 54 | int major; | ||
| 55 | int irq; | ||
| 56 | |||
| 57 | /* The physical address of this device's memory page */ | ||
| 58 | unsigned long phys_addr; | ||
| 59 | /* The mapped memory page for convenient acces. */ | ||
| 60 | struct lguest_block_page *lb_page; | ||
| 61 | |||
| 62 | /* We only have a single request outstanding at a time: this is it. */ | ||
| 63 | struct lguest_dma dma; | ||
| 64 | struct request *req; | ||
| 65 | }; | ||
| 66 | |||
| 67 | /*D:495 We originally used end_request() throughout the driver, but it turns | ||
| 68 | * out that end_request() is deprecated, and doesn't actually end the request | ||
| 69 | * (which seems like a good reason to deprecate it!). It simply ends the first | ||
| 70 | * bio. So if we had 3 bios in a "struct request" we would do all 3, | ||
| 71 | * end_request(), do 2, end_request(), do 1 and end_request(): twice as much | ||
| 72 | * work as we needed to do. | ||
| 73 | * | ||
| 74 | * This reinforced to me that I do not understand the block layer. | ||
| 75 | * | ||
| 76 | * Nonetheless, Jens Axboe gave me this nice helper to end all chunks of a | ||
| 77 | * request. This improved disk speed by 130%. */ | ||
| 78 | static void end_entire_request(struct request *req, int uptodate) | ||
| 79 | { | ||
| 80 | if (end_that_request_first(req, uptodate, req->hard_nr_sectors)) | ||
| 81 | BUG(); | ||
| 82 | add_disk_randomness(req->rq_disk); | ||
| 83 | blkdev_dequeue_request(req); | ||
| 84 | end_that_request_last(req, uptodate); | ||
| 85 | } | ||
| 86 | |||
| 87 | /* I'm told there are only two stories in the world worth telling: love and | ||
| 88 | * hate. So there used to be a love scene here like this: | ||
| 89 | * | ||
| 90 | * Launcher: We could make beautiful I/O together, you and I. | ||
| 91 | * Guest: My, that's a big disk! | ||
| 92 | * | ||
| 93 | * Unfortunately, it was just too raunchy for our otherwise-gentle tale. */ | ||
| 94 | |||
| 95 | /*D:490 This is the interrupt handler, called when a block read or write has | ||
| 96 | * been completed for us. */ | ||
| 97 | static irqreturn_t lgb_irq(int irq, void *_bd) | ||
| 98 | { | ||
| 99 | /* We handed our "struct blockdev" as the argument to request_irq(), so | ||
| 100 | * it is passed through to us here. This tells us which device we're | ||
| 101 | * dealing with in case we have more than one. */ | ||
| 102 | struct blockdev *bd = _bd; | ||
| 103 | unsigned long flags; | ||
| 104 | |||
| 105 | /* We weren't doing anything? Strange, but could happen if we shared | ||
| 106 | * interrupts (we don't!). */ | ||
| 107 | if (!bd->req) { | ||
| 108 | pr_debug("No work!\n"); | ||
| 109 | return IRQ_NONE; | ||
| 110 | } | ||
| 111 | |||
| 112 | /* Not done yet? That's equally strange. */ | ||
| 113 | if (!bd->lb_page->result) { | ||
| 114 | pr_debug("No result!\n"); | ||
| 115 | return IRQ_NONE; | ||
| 116 | } | ||
| 117 | |||
| 118 | /* We have to grab the lock before ending the request. */ | ||
| 119 | spin_lock_irqsave(&bd->lock, flags); | ||
| 120 | /* "result" is 1 for success, 2 for failure: end_entire_request() wants | ||
| 121 | * to know whether this succeeded or not. */ | ||
| 122 | end_entire_request(bd->req, bd->lb_page->result == 1); | ||
| 123 | /* Clear out request, it's done. */ | ||
| 124 | bd->req = NULL; | ||
| 125 | /* Reset incoming DMA for next time. */ | ||
| 126 | bd->dma.used_len = 0; | ||
| 127 | /* Ready for more reads or writes */ | ||
| 128 | blk_start_queue(bd->disk->queue); | ||
| 129 | spin_unlock_irqrestore(&bd->lock, flags); | ||
| 130 | |||
| 131 | /* The interrupt was for us, we dealt with it. */ | ||
| 132 | return IRQ_HANDLED; | ||
| 133 | } | ||
| 134 | |||
| 135 | /*D:480 The block layer's "struct request" contains a number of "struct bio"s, | ||
| 136 | * each of which contains "struct bio_vec"s, each of which contains a page, an | ||
| 137 | * offset and a length. | ||
| 138 | * | ||
| 139 | * Fortunately there are iterators to help us walk through the "struct | ||
| 140 | * request". Even more fortunately, there were plenty of places to steal the | ||
| 141 | * code from. We pack the "struct request" into our "struct lguest_dma" and | ||
| 142 | * return the total length. */ | ||
| 143 | static unsigned int req_to_dma(struct request *req, struct lguest_dma *dma) | ||
| 144 | { | ||
| 145 | unsigned int i = 0, len = 0; | ||
| 146 | struct req_iterator iter; | ||
| 147 | struct bio_vec *bvec; | ||
| 148 | |||
| 149 | rq_for_each_segment(bvec, req, iter) { | ||
| 150 | /* We told the block layer not to give us too many. */ | ||
| 151 | BUG_ON(i == LGUEST_MAX_DMA_SECTIONS); | ||
| 152 | /* If we had a zero-length segment, it would look like | ||
| 153 | * the end of the data referred to by the "struct | ||
| 154 | * lguest_dma", so make sure that doesn't happen. */ | ||
| 155 | BUG_ON(!bvec->bv_len); | ||
| 156 | /* Convert page & offset to a physical address */ | ||
| 157 | dma->addr[i] = page_to_phys(bvec->bv_page) | ||
| 158 | + bvec->bv_offset; | ||
| 159 | dma->len[i] = bvec->bv_len; | ||
| 160 | len += bvec->bv_len; | ||
| 161 | i++; | ||
| 162 | } | ||
| 163 | /* If the array isn't full, we mark the end with a 0 length */ | ||
| 164 | if (i < LGUEST_MAX_DMA_SECTIONS) | ||
| 165 | dma->len[i] = 0; | ||
| 166 | return len; | ||
| 167 | } | ||
| 168 | |||
| 169 | /* This creates an empty DMA, useful for prodding the Host without sending data | ||
| 170 | * (ie. when we want to do a read) */ | ||
| 171 | static void empty_dma(struct lguest_dma *dma) | ||
| 172 | { | ||
| 173 | dma->len[0] = 0; | ||
| 174 | } | ||
| 175 | |||
| 176 | /*D:470 Setting up a request is fairly easy: */ | ||
| 177 | static void setup_req(struct blockdev *bd, | ||
| 178 | int type, struct request *req, struct lguest_dma *dma) | ||
| 179 | { | ||
| 180 | /* The type is 1 (write) or 0 (read). */ | ||
| 181 | bd->lb_page->type = type; | ||
| 182 | /* The sector on disk where the read or write starts. */ | ||
| 183 | bd->lb_page->sector = req->sector; | ||
| 184 | /* The result is initialized to 0 (unfinished). */ | ||
| 185 | bd->lb_page->result = 0; | ||
| 186 | /* The current request (so we can end it in the interrupt handler). */ | ||
| 187 | bd->req = req; | ||
| 188 | /* The number of bytes: returned as a side-effect of req_to_dma(), | ||
| 189 | * which packs the block layer's "struct request" into our "struct | ||
| 190 | * lguest_dma" */ | ||
| 191 | bd->lb_page->bytes = req_to_dma(req, dma); | ||
| 192 | } | ||
| 193 | |||
| 194 | /*D:450 Write is pretty straightforward: we pack the request into a "struct | ||
| 195 | * lguest_dma", then use SEND_DMA to send the request. */ | ||
| 196 | static void do_write(struct blockdev *bd, struct request *req) | ||
| 197 | { | ||
| 198 | struct lguest_dma send; | ||
| 199 | |||
| 200 | pr_debug("lgb: WRITE sector %li\n", (long)req->sector); | ||
| 201 | setup_req(bd, 1, req, &send); | ||
| 202 | |||
| 203 | lguest_send_dma(bd->phys_addr, &send); | ||
| 204 | } | ||
| 205 | |||
| 206 | /* Read is similar to write, except we pack the request into our receive | ||
| 207 | * "struct lguest_dma" and send through an empty DMA just to tell the Host that | ||
| 208 | * there's a request pending. */ | ||
| 209 | static void do_read(struct blockdev *bd, struct request *req) | ||
| 210 | { | ||
| 211 | struct lguest_dma ping; | ||
| 212 | |||
| 213 | pr_debug("lgb: READ sector %li\n", (long)req->sector); | ||
| 214 | setup_req(bd, 0, req, &bd->dma); | ||
| 215 | |||
| 216 | empty_dma(&ping); | ||
| 217 | lguest_send_dma(bd->phys_addr, &ping); | ||
| 218 | } | ||
| 219 | |||
| 220 | /*D:440 This where requests come in: we get handed the request queue and are | ||
| 221 | * expected to pull a "struct request" off it until we've finished them or | ||
| 222 | * we're waiting for a reply: */ | ||
| 223 | static void do_lgb_request(struct request_queue *q) | ||
| 224 | { | ||
| 225 | struct blockdev *bd; | ||
| 226 | struct request *req; | ||
| 227 | |||
| 228 | again: | ||
| 229 | /* This sometimes returns NULL even on the very first time around. I | ||
| 230 | * wonder if it's something to do with letting elves handle the request | ||
| 231 | * queue... */ | ||
| 232 | req = elv_next_request(q); | ||
| 233 | if (!req) | ||
| 234 | return; | ||
| 235 | |||
| 236 | /* We attached the struct blockdev to the disk: get it back */ | ||
| 237 | bd = req->rq_disk->private_data; | ||
| 238 | /* Sometimes we get repeated requests after blk_stop_queue(), but we | ||
| 239 | * can only handle one at a time. */ | ||
| 240 | if (bd->req) | ||
| 241 | return; | ||
| 242 | |||
| 243 | /* We only do reads and writes: no tricky business! */ | ||
| 244 | if (!blk_fs_request(req)) { | ||
| 245 | pr_debug("Got non-command 0x%08x\n", req->cmd_type); | ||
| 246 | req->errors++; | ||
| 247 | end_entire_request(req, 0); | ||
| 248 | goto again; | ||
| 249 | } | ||
| 250 | |||
| 251 | if (rq_data_dir(req) == WRITE) | ||
| 252 | do_write(bd, req); | ||
| 253 | else | ||
| 254 | do_read(bd, req); | ||
| 255 | |||
| 256 | /* We've put out the request, so stop any more coming in until we get | ||
| 257 | * an interrupt, which takes us to lgb_irq() to re-enable the queue. */ | ||
| 258 | blk_stop_queue(q); | ||
| 259 | } | ||
| 260 | |||
| 261 | /*D:430 This is the "struct block_device_operations" we attach to the disk at | ||
| 262 | * the end of lguestblk_probe(). It doesn't seem to want much. */ | ||
| 263 | static struct block_device_operations lguestblk_fops = { | ||
| 264 | .owner = THIS_MODULE, | ||
| 265 | }; | ||
| 266 | |||
| 267 | /*D:425 Setting up a disk device seems to involve a lot of code. I'm not sure | ||
| 268 | * quite why. I do know that the IDE code sent two or three of the maintainers | ||
| 269 | * insane, perhaps this is the fringe of the same disease? | ||
| 270 | * | ||
| 271 | * As in the console code, the probe function gets handed the generic | ||
| 272 | * lguest_device from lguest_bus.c: */ | ||
| 273 | static int lguestblk_probe(struct lguest_device *lgdev) | ||
| 274 | { | ||
| 275 | struct blockdev *bd; | ||
| 276 | int err; | ||
| 277 | int irqflags = IRQF_SHARED; | ||
| 278 | |||
| 279 | /* First we allocate our own "struct blockdev" and initialize the easy | ||
| 280 | * fields. */ | ||
| 281 | bd = kmalloc(sizeof(*bd), GFP_KERNEL); | ||
| 282 | if (!bd) | ||
| 283 | return -ENOMEM; | ||
| 284 | |||
| 285 | spin_lock_init(&bd->lock); | ||
| 286 | bd->irq = lgdev_irq(lgdev); | ||
| 287 | bd->req = NULL; | ||
| 288 | bd->dma.used_len = 0; | ||
| 289 | bd->dma.len[0] = 0; | ||
| 290 | /* The descriptor in the lguest_devices array provided by the Host | ||
| 291 | * gives the Guest the physical page number of the device's page. */ | ||
| 292 | bd->phys_addr = (lguest_devices[lgdev->index].pfn << PAGE_SHIFT); | ||
| 293 | |||
| 294 | /* We use lguest_map() to get a pointer to the device page */ | ||
| 295 | bd->lb_page = lguest_map(bd->phys_addr, 1); | ||
| 296 | if (!bd->lb_page) { | ||
| 297 | err = -ENOMEM; | ||
| 298 | goto out_free_bd; | ||
| 299 | } | ||
| 300 | |||
| 301 | /* We need a major device number: 0 means "assign one dynamically". */ | ||
| 302 | bd->major = register_blkdev(0, "lguestblk"); | ||
| 303 | if (bd->major < 0) { | ||
| 304 | err = bd->major; | ||
| 305 | goto out_unmap; | ||
| 306 | } | ||
| 307 | |||
| 308 | /* This allocates a "struct gendisk" where we pack all the information | ||
| 309 | * about the disk which the rest of Linux sees. The argument is the | ||
| 310 | * number of minor devices desired: we need one minor for the main | ||
| 311 | * disk, and one for each partition. Of course, we can't possibly know | ||
| 312 | * how many partitions are on the disk (add_disk does that). | ||
| 313 | */ | ||
| 314 | bd->disk = alloc_disk(16); | ||
| 315 | if (!bd->disk) { | ||
| 316 | err = -ENOMEM; | ||
| 317 | goto out_unregister_blkdev; | ||
| 318 | } | ||
| 319 | |||
| 320 | /* Every disk needs a queue for requests to come in: we set up the | ||
| 321 | * queue with a callback function (the core of our driver) and the lock | ||
| 322 | * to use. */ | ||
| 323 | bd->disk->queue = blk_init_queue(do_lgb_request, &bd->lock); | ||
| 324 | if (!bd->disk->queue) { | ||
| 325 | err = -ENOMEM; | ||
| 326 | goto out_put_disk; | ||
| 327 | } | ||
| 328 | |||
| 329 | /* We can only handle a certain number of pointers in our SEND_DMA | ||
| 330 | * call, so we set that with blk_queue_max_hw_segments(). This is not | ||
| 331 | * to be confused with blk_queue_max_phys_segments() of course! I | ||
| 332 | * know, who could possibly confuse the two? | ||
| 333 | * | ||
| 334 | * Well, it's simple to tell them apart: this one seems to work and the | ||
| 335 | * other one didn't. */ | ||
| 336 | blk_queue_max_hw_segments(bd->disk->queue, LGUEST_MAX_DMA_SECTIONS); | ||
| 337 | |||
| 338 | /* Due to technical limitations of our Host (and simple coding) we | ||
| 339 | * can't have a single buffer which crosses a page boundary. Tell it | ||
| 340 | * here. This means that our maximum request size is 16 | ||
| 341 | * (LGUEST_MAX_DMA_SECTIONS) pages. */ | ||
| 342 | blk_queue_segment_boundary(bd->disk->queue, PAGE_SIZE-1); | ||
| 343 | |||
| 344 | /* We name our disk: this becomes the device name when udev does its | ||
| 345 | * magic thing and creates the device node, such as /dev/lgba. | ||
| 346 | * next_block_index is a global which starts at 'a'. Unfortunately | ||
| 347 | * this simple increment logic means that the 27th disk will be called | ||
| 348 | * "/dev/lgb{". In that case, I recommend having at least 29 disks, so | ||
| 349 | * your /dev directory will be balanced. */ | ||
| 350 | sprintf(bd->disk->disk_name, "lgb%c", next_block_index++); | ||
| 351 | |||
| 352 | /* We look to the device descriptor again to see if this device's | ||
| 353 | * interrupts are expected to be random. If they are, we tell the irq | ||
| 354 | * subsystem. At the moment this bit is always set. */ | ||
| 355 | if (lguest_devices[lgdev->index].features & LGUEST_DEVICE_F_RANDOMNESS) | ||
| 356 | irqflags |= IRQF_SAMPLE_RANDOM; | ||
| 357 | |||
| 358 | /* Now we have the name and irqflags, we can request the interrupt; we | ||
| 359 | * give it the "struct blockdev" we have set up to pass to lgb_irq() | ||
| 360 | * when there is an interrupt. */ | ||
| 361 | err = request_irq(bd->irq, lgb_irq, irqflags, bd->disk->disk_name, bd); | ||
| 362 | if (err) | ||
| 363 | goto out_cleanup_queue; | ||
| 364 | |||
| 365 | /* We bind our one-entry DMA pool to the key for this block device so | ||
| 366 | * the Host can reply to our requests. The key is equal to the | ||
| 367 | * physical address of the device's page, which is conveniently | ||
| 368 | * unique. */ | ||
| 369 | err = lguest_bind_dma(bd->phys_addr, &bd->dma, 1, bd->irq); | ||
| 370 | if (err) | ||
| 371 | goto out_free_irq; | ||
| 372 | |||
| 373 | /* We finish our disk initialization and add the disk to the system. */ | ||
| 374 | bd->disk->major = bd->major; | ||
| 375 | bd->disk->first_minor = 0; | ||
| 376 | bd->disk->private_data = bd; | ||
| 377 | bd->disk->fops = &lguestblk_fops; | ||
| 378 | /* This is initialized to the disk size by the Launcher. */ | ||
| 379 | set_capacity(bd->disk, bd->lb_page->num_sectors); | ||
| 380 | add_disk(bd->disk); | ||
| 381 | |||
| 382 | printk(KERN_INFO "%s: device %i at major %d\n", | ||
| 383 | bd->disk->disk_name, lgdev->index, bd->major); | ||
| 384 | |||
| 385 | /* We don't need to keep the "struct blockdev" around, but if we ever | ||
| 386 | * implemented device removal, we'd need this. */ | ||
| 387 | lgdev->private = bd; | ||
| 388 | return 0; | ||
| 389 | |||
| 390 | out_free_irq: | ||
| 391 | free_irq(bd->irq, bd); | ||
| 392 | out_cleanup_queue: | ||
| 393 | blk_cleanup_queue(bd->disk->queue); | ||
| 394 | out_put_disk: | ||
| 395 | put_disk(bd->disk); | ||
| 396 | out_unregister_blkdev: | ||
| 397 | unregister_blkdev(bd->major, "lguestblk"); | ||
| 398 | out_unmap: | ||
| 399 | lguest_unmap(bd->lb_page); | ||
| 400 | out_free_bd: | ||
| 401 | kfree(bd); | ||
| 402 | return err; | ||
| 403 | } | ||
| 404 | |||
| 405 | /*D:410 The boilerplate code for registering the lguest block driver is just | ||
| 406 | * like the console: */ | ||
| 407 | static struct lguest_driver lguestblk_drv = { | ||
| 408 | .name = "lguestblk", | ||
| 409 | .owner = THIS_MODULE, | ||
| 410 | .device_type = LGUEST_DEVICE_T_BLOCK, | ||
| 411 | .probe = lguestblk_probe, | ||
| 412 | }; | ||
| 413 | |||
| 414 | static __init int lguestblk_init(void) | ||
| 415 | { | ||
| 416 | return register_lguest_driver(&lguestblk_drv); | ||
| 417 | } | ||
| 418 | module_init(lguestblk_init); | ||
| 419 | |||
| 420 | MODULE_DESCRIPTION("Lguest block driver"); | ||
| 421 | MODULE_LICENSE("GPL"); | ||
