diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2007-10-21 21:03:38 -0400 |
---|---|---|
committer | Rusty Russell <rusty@rustcorp.com.au> | 2007-10-23 01:49:54 -0400 |
commit | e467cde238184d1b0923db2cd61ae1c5a6dc15aa (patch) | |
tree | 1383321c88ca25fcad20f56f14a8ca658bb25fb3 /drivers/block | |
parent | 296f96fcfc160e29c01819c0c7b20c2dc8320edd (diff) |
Block driver using virtio.
The block driver uses scatter-gather lists with sg[0] being the
request information (struct virtio_blk_outhdr) with the type, sector
and inbuf id. The next N sg entries are the bio itself, then the last
sg is the status byte. Whether the N entries are in or out depends on
whether it's a read or a write.
We accept the normal (SCSI) ioctls: they get handed through to the other
side which can then handle it or reply that it's unsupported. It's
not clear that this actually works in general, since I don't know
if blk_pc_request() requests have an accurate rq_data_dir().
Although we try to reply -ENOTTY on unsupported commands, ioctl(fd,
CDROMEJECT) returns success to userspace. This needs a separate
patch.
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/Kconfig | 6 | ||||
-rw-r--r-- | drivers/block/Makefile | 1 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 308 |
3 files changed, 315 insertions, 0 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index ce4b1e484e6..4d0119ea9e3 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig | |||
@@ -425,4 +425,10 @@ config XEN_BLKDEV_FRONTEND | |||
425 | block device driver. It communicates with a back-end driver | 425 | block device driver. It communicates with a back-end driver |
426 | in another domain which drives the actual block device. | 426 | in another domain which drives the actual block device. |
427 | 427 | ||
428 | config VIRTIO_BLK | ||
429 | tristate "Virtio block driver (EXPERIMENTAL)" | ||
430 | depends on EXPERIMENTAL && VIRTIO | ||
431 | ---help--- | ||
432 | This is the virtual block driver for lguest. Say Y or M. | ||
433 | |||
428 | endif # BLK_DEV | 434 | endif # BLK_DEV |
diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 014e72121b5..d199eba7a08 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile | |||
@@ -25,6 +25,7 @@ obj-$(CONFIG_SUNVDC) += sunvdc.o | |||
25 | obj-$(CONFIG_BLK_DEV_UMEM) += umem.o | 25 | obj-$(CONFIG_BLK_DEV_UMEM) += umem.o |
26 | obj-$(CONFIG_BLK_DEV_NBD) += nbd.o | 26 | obj-$(CONFIG_BLK_DEV_NBD) += nbd.o |
27 | obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o | 27 | obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o |
28 | obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o | ||
28 | 29 | ||
29 | obj-$(CONFIG_VIODASD) += viodasd.o | 30 | obj-$(CONFIG_VIODASD) += viodasd.o |
30 | obj-$(CONFIG_BLK_DEV_SX8) += sx8.o | 31 | obj-$(CONFIG_BLK_DEV_SX8) += sx8.o |
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c new file mode 100644 index 00000000000..a901eee64ba --- /dev/null +++ b/drivers/block/virtio_blk.c | |||
@@ -0,0 +1,308 @@ | |||
1 | //#define DEBUG | ||
2 | #include <linux/spinlock.h> | ||
3 | #include <linux/blkdev.h> | ||
4 | #include <linux/hdreg.h> | ||
5 | #include <linux/virtio.h> | ||
6 | #include <linux/virtio_blk.h> | ||
7 | #include <linux/virtio_blk.h> | ||
8 | |||
9 | static unsigned char virtblk_index = 'a'; | ||
10 | struct virtio_blk | ||
11 | { | ||
12 | spinlock_t lock; | ||
13 | |||
14 | struct virtio_device *vdev; | ||
15 | struct virtqueue *vq; | ||
16 | |||
17 | /* The disk structure for the kernel. */ | ||
18 | struct gendisk *disk; | ||
19 | |||
20 | /* Request tracking. */ | ||
21 | struct list_head reqs; | ||
22 | |||
23 | mempool_t *pool; | ||
24 | |||
25 | /* Scatterlist: can be too big for stack. */ | ||
26 | struct scatterlist sg[3+MAX_PHYS_SEGMENTS]; | ||
27 | }; | ||
28 | |||
29 | struct virtblk_req | ||
30 | { | ||
31 | struct list_head list; | ||
32 | struct request *req; | ||
33 | struct virtio_blk_outhdr out_hdr; | ||
34 | struct virtio_blk_inhdr in_hdr; | ||
35 | }; | ||
36 | |||
37 | static bool blk_done(struct virtqueue *vq) | ||
38 | { | ||
39 | struct virtio_blk *vblk = vq->vdev->priv; | ||
40 | struct virtblk_req *vbr; | ||
41 | unsigned int len; | ||
42 | unsigned long flags; | ||
43 | |||
44 | spin_lock_irqsave(&vblk->lock, flags); | ||
45 | while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) { | ||
46 | int uptodate; | ||
47 | switch (vbr->in_hdr.status) { | ||
48 | case VIRTIO_BLK_S_OK: | ||
49 | uptodate = 1; | ||
50 | break; | ||
51 | case VIRTIO_BLK_S_UNSUPP: | ||
52 | uptodate = -ENOTTY; | ||
53 | break; | ||
54 | default: | ||
55 | uptodate = 0; | ||
56 | break; | ||
57 | } | ||
58 | |||
59 | end_dequeued_request(vbr->req, uptodate); | ||
60 | list_del(&vbr->list); | ||
61 | mempool_free(vbr, vblk->pool); | ||
62 | } | ||
63 | /* In case queue is stopped waiting for more buffers. */ | ||
64 | blk_start_queue(vblk->disk->queue); | ||
65 | spin_unlock_irqrestore(&vblk->lock, flags); | ||
66 | return true; | ||
67 | } | ||
68 | |||
69 | static bool do_req(struct request_queue *q, struct virtio_blk *vblk, | ||
70 | struct request *req) | ||
71 | { | ||
72 | unsigned long num, out, in; | ||
73 | struct virtblk_req *vbr; | ||
74 | |||
75 | vbr = mempool_alloc(vblk->pool, GFP_ATOMIC); | ||
76 | if (!vbr) | ||
77 | /* When another request finishes we'll try again. */ | ||
78 | return false; | ||
79 | |||
80 | vbr->req = req; | ||
81 | if (blk_fs_request(vbr->req)) { | ||
82 | vbr->out_hdr.type = 0; | ||
83 | vbr->out_hdr.sector = vbr->req->sector; | ||
84 | vbr->out_hdr.ioprio = vbr->req->ioprio; | ||
85 | } else if (blk_pc_request(vbr->req)) { | ||
86 | vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; | ||
87 | vbr->out_hdr.sector = 0; | ||
88 | vbr->out_hdr.ioprio = vbr->req->ioprio; | ||
89 | } else { | ||
90 | /* We don't put anything else in the queue. */ | ||
91 | BUG(); | ||
92 | } | ||
93 | |||
94 | if (blk_barrier_rq(vbr->req)) | ||
95 | vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER; | ||
96 | |||
97 | /* We have to zero this, otherwise blk_rq_map_sg gets upset. */ | ||
98 | memset(vblk->sg, 0, sizeof(vblk->sg)); | ||
99 | sg_set_buf(&vblk->sg[0], &vbr->out_hdr, sizeof(vbr->out_hdr)); | ||
100 | num = blk_rq_map_sg(q, vbr->req, vblk->sg+1); | ||
101 | sg_set_buf(&vblk->sg[num+1], &vbr->in_hdr, sizeof(vbr->in_hdr)); | ||
102 | |||
103 | if (rq_data_dir(vbr->req) == WRITE) { | ||
104 | vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; | ||
105 | out = 1 + num; | ||
106 | in = 1; | ||
107 | } else { | ||
108 | vbr->out_hdr.type |= VIRTIO_BLK_T_IN; | ||
109 | out = 1; | ||
110 | in = 1 + num; | ||
111 | } | ||
112 | |||
113 | if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr)) { | ||
114 | mempool_free(vbr, vblk->pool); | ||
115 | return false; | ||
116 | } | ||
117 | |||
118 | list_add_tail(&vbr->list, &vblk->reqs); | ||
119 | return true; | ||
120 | } | ||
121 | |||
122 | static void do_virtblk_request(struct request_queue *q) | ||
123 | { | ||
124 | struct virtio_blk *vblk = NULL; | ||
125 | struct request *req; | ||
126 | unsigned int issued = 0; | ||
127 | |||
128 | while ((req = elv_next_request(q)) != NULL) { | ||
129 | vblk = req->rq_disk->private_data; | ||
130 | BUG_ON(req->nr_phys_segments > ARRAY_SIZE(vblk->sg)); | ||
131 | |||
132 | /* If this request fails, stop queue and wait for something to | ||
133 | finish to restart it. */ | ||
134 | if (!do_req(q, vblk, req)) { | ||
135 | blk_stop_queue(q); | ||
136 | break; | ||
137 | } | ||
138 | blkdev_dequeue_request(req); | ||
139 | issued++; | ||
140 | } | ||
141 | |||
142 | if (issued) | ||
143 | vblk->vq->vq_ops->kick(vblk->vq); | ||
144 | } | ||
145 | |||
146 | static int virtblk_ioctl(struct inode *inode, struct file *filp, | ||
147 | unsigned cmd, unsigned long data) | ||
148 | { | ||
149 | return scsi_cmd_ioctl(filp, inode->i_bdev->bd_disk->queue, | ||
150 | inode->i_bdev->bd_disk, cmd, | ||
151 | (void __user *)data); | ||
152 | } | ||
153 | |||
154 | static struct block_device_operations virtblk_fops = { | ||
155 | .ioctl = virtblk_ioctl, | ||
156 | .owner = THIS_MODULE, | ||
157 | }; | ||
158 | |||
159 | static int virtblk_probe(struct virtio_device *vdev) | ||
160 | { | ||
161 | struct virtio_blk *vblk; | ||
162 | int err, major; | ||
163 | void *token; | ||
164 | unsigned int len; | ||
165 | u64 cap; | ||
166 | u32 v; | ||
167 | |||
168 | vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); | ||
169 | if (!vblk) { | ||
170 | err = -ENOMEM; | ||
171 | goto out; | ||
172 | } | ||
173 | |||
174 | INIT_LIST_HEAD(&vblk->reqs); | ||
175 | spin_lock_init(&vblk->lock); | ||
176 | vblk->vdev = vdev; | ||
177 | |||
178 | /* We expect one virtqueue, for output. */ | ||
179 | vblk->vq = vdev->config->find_vq(vdev, blk_done); | ||
180 | if (IS_ERR(vblk->vq)) { | ||
181 | err = PTR_ERR(vblk->vq); | ||
182 | goto out_free_vblk; | ||
183 | } | ||
184 | |||
185 | vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req)); | ||
186 | if (!vblk->pool) { | ||
187 | err = -ENOMEM; | ||
188 | goto out_free_vq; | ||
189 | } | ||
190 | |||
191 | major = register_blkdev(0, "virtblk"); | ||
192 | if (major < 0) { | ||
193 | err = major; | ||
194 | goto out_mempool; | ||
195 | } | ||
196 | |||
197 | /* FIXME: How many partitions? How long is a piece of string? */ | ||
198 | vblk->disk = alloc_disk(1 << 4); | ||
199 | if (!vblk->disk) { | ||
200 | err = -ENOMEM; | ||
201 | goto out_unregister_blkdev; | ||
202 | } | ||
203 | |||
204 | vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); | ||
205 | if (!vblk->disk->queue) { | ||
206 | err = -ENOMEM; | ||
207 | goto out_put_disk; | ||
208 | } | ||
209 | |||
210 | sprintf(vblk->disk->disk_name, "vd%c", virtblk_index++); | ||
211 | vblk->disk->major = major; | ||
212 | vblk->disk->first_minor = 0; | ||
213 | vblk->disk->private_data = vblk; | ||
214 | vblk->disk->fops = &virtblk_fops; | ||
215 | |||
216 | /* If barriers are supported, tell block layer that queue is ordered */ | ||
217 | token = vdev->config->find(vdev, VIRTIO_CONFIG_BLK_F, &len); | ||
218 | if (virtio_use_bit(vdev, token, len, VIRTIO_BLK_F_BARRIER)) | ||
219 | blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL); | ||
220 | |||
221 | err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_CAPACITY, &cap); | ||
222 | if (err) { | ||
223 | dev_err(&vdev->dev, "Bad/missing capacity in config\n"); | ||
224 | goto out_put_disk; | ||
225 | } | ||
226 | |||
227 | /* If capacity is too big, truncate with warning. */ | ||
228 | if ((sector_t)cap != cap) { | ||
229 | dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n", | ||
230 | (unsigned long long)cap); | ||
231 | cap = (sector_t)-1; | ||
232 | } | ||
233 | set_capacity(vblk->disk, cap); | ||
234 | |||
235 | err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_SIZE_MAX, &v); | ||
236 | if (!err) | ||
237 | blk_queue_max_segment_size(vblk->disk->queue, v); | ||
238 | else if (err != -ENOENT) { | ||
239 | dev_err(&vdev->dev, "Bad SIZE_MAX in config\n"); | ||
240 | goto out_put_disk; | ||
241 | } | ||
242 | |||
243 | err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_SEG_MAX, &v); | ||
244 | if (!err) | ||
245 | blk_queue_max_hw_segments(vblk->disk->queue, v); | ||
246 | else if (err != -ENOENT) { | ||
247 | dev_err(&vdev->dev, "Bad SEG_MAX in config\n"); | ||
248 | goto out_put_disk; | ||
249 | } | ||
250 | |||
251 | add_disk(vblk->disk); | ||
252 | return 0; | ||
253 | |||
254 | out_put_disk: | ||
255 | put_disk(vblk->disk); | ||
256 | out_unregister_blkdev: | ||
257 | unregister_blkdev(major, "virtblk"); | ||
258 | out_mempool: | ||
259 | mempool_destroy(vblk->pool); | ||
260 | out_free_vq: | ||
261 | vdev->config->del_vq(vblk->vq); | ||
262 | out_free_vblk: | ||
263 | kfree(vblk); | ||
264 | out: | ||
265 | return err; | ||
266 | } | ||
267 | |||
268 | static void virtblk_remove(struct virtio_device *vdev) | ||
269 | { | ||
270 | struct virtio_blk *vblk = vdev->priv; | ||
271 | int major = vblk->disk->major; | ||
272 | |||
273 | BUG_ON(!list_empty(&vblk->reqs)); | ||
274 | blk_cleanup_queue(vblk->disk->queue); | ||
275 | put_disk(vblk->disk); | ||
276 | unregister_blkdev(major, "virtblk"); | ||
277 | mempool_destroy(vblk->pool); | ||
278 | kfree(vblk); | ||
279 | } | ||
280 | |||
281 | static struct virtio_device_id id_table[] = { | ||
282 | { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, | ||
283 | { 0 }, | ||
284 | }; | ||
285 | |||
286 | static struct virtio_driver virtio_blk = { | ||
287 | .driver.name = KBUILD_MODNAME, | ||
288 | .driver.owner = THIS_MODULE, | ||
289 | .id_table = id_table, | ||
290 | .probe = virtblk_probe, | ||
291 | .remove = __devexit_p(virtblk_remove), | ||
292 | }; | ||
293 | |||
294 | static int __init init(void) | ||
295 | { | ||
296 | return register_virtio_driver(&virtio_blk); | ||
297 | } | ||
298 | |||
299 | static void __exit fini(void) | ||
300 | { | ||
301 | unregister_virtio_driver(&virtio_blk); | ||
302 | } | ||
303 | module_init(init); | ||
304 | module_exit(fini); | ||
305 | |||
306 | MODULE_DEVICE_TABLE(virtio, id_table); | ||
307 | MODULE_DESCRIPTION("Virtio block driver"); | ||
308 | MODULE_LICENSE("GPL"); | ||