aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/block/virtio_blk.c322
1 files changed, 65 insertions, 257 deletions
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5cdf88b7ad9e..7455fe24bbbe 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -11,12 +11,11 @@
11#include <linux/string_helpers.h> 11#include <linux/string_helpers.h>
12#include <scsi/scsi_cmnd.h> 12#include <scsi/scsi_cmnd.h>
13#include <linux/idr.h> 13#include <linux/idr.h>
14#include <linux/blk-mq.h>
15#include <linux/numa.h>
14 16
15#define PART_BITS 4 17#define PART_BITS 4
16 18
17static bool use_bio;
18module_param(use_bio, bool, S_IRUGO);
19
20static int major; 19static int major;
21static DEFINE_IDA(vd_index_ida); 20static DEFINE_IDA(vd_index_ida);
22 21
@@ -26,13 +25,11 @@ struct virtio_blk
26{ 25{
27 struct virtio_device *vdev; 26 struct virtio_device *vdev;
28 struct virtqueue *vq; 27 struct virtqueue *vq;
29 wait_queue_head_t queue_wait; 28 spinlock_t vq_lock;
30 29
31 /* The disk structure for the kernel. */ 30 /* The disk structure for the kernel. */
32 struct gendisk *disk; 31 struct gendisk *disk;
33 32
34 mempool_t *pool;
35
36 /* Process context for config space updates */ 33 /* Process context for config space updates */
37 struct work_struct config_work; 34 struct work_struct config_work;
38 35
@@ -47,31 +44,17 @@ struct virtio_blk
47 44
48 /* Ida index - used to track minor number allocations. */ 45 /* Ida index - used to track minor number allocations. */
49 int index; 46 int index;
50
51 /* Scatterlist: can be too big for stack. */
52 struct scatterlist sg[/*sg_elems*/];
53}; 47};
54 48
55struct virtblk_req 49struct virtblk_req
56{ 50{
57 struct request *req; 51 struct request *req;
58 struct bio *bio;
59 struct virtio_blk_outhdr out_hdr; 52 struct virtio_blk_outhdr out_hdr;
60 struct virtio_scsi_inhdr in_hdr; 53 struct virtio_scsi_inhdr in_hdr;
61 struct work_struct work;
62 struct virtio_blk *vblk;
63 int flags;
64 u8 status; 54 u8 status;
65 struct scatterlist sg[]; 55 struct scatterlist sg[];
66}; 56};
67 57
68enum {
69 VBLK_IS_FLUSH = 1,
70 VBLK_REQ_FLUSH = 2,
71 VBLK_REQ_DATA = 4,
72 VBLK_REQ_FUA = 8,
73};
74
75static inline int virtblk_result(struct virtblk_req *vbr) 58static inline int virtblk_result(struct virtblk_req *vbr)
76{ 59{
77 switch (vbr->status) { 60 switch (vbr->status) {
@@ -84,22 +67,6 @@ static inline int virtblk_result(struct virtblk_req *vbr)
84 } 67 }
85} 68}
86 69
87static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
88 gfp_t gfp_mask)
89{
90 struct virtblk_req *vbr;
91
92 vbr = mempool_alloc(vblk->pool, gfp_mask);
93 if (!vbr)
94 return NULL;
95
96 vbr->vblk = vblk;
97 if (use_bio)
98 sg_init_table(vbr->sg, vblk->sg_elems);
99
100 return vbr;
101}
102
103static int __virtblk_add_req(struct virtqueue *vq, 70static int __virtblk_add_req(struct virtqueue *vq,
104 struct virtblk_req *vbr, 71 struct virtblk_req *vbr,
105 struct scatterlist *data_sg, 72 struct scatterlist *data_sg,
@@ -143,83 +110,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
143 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); 110 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
144} 111}
145 112
146static void virtblk_add_req(struct virtblk_req *vbr, bool have_data)
147{
148 struct virtio_blk *vblk = vbr->vblk;
149 DEFINE_WAIT(wait);
150 int ret;
151
152 spin_lock_irq(vblk->disk->queue->queue_lock);
153 while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg,
154 have_data)) < 0)) {
155 prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
156 TASK_UNINTERRUPTIBLE);
157
158 spin_unlock_irq(vblk->disk->queue->queue_lock);
159 io_schedule();
160 spin_lock_irq(vblk->disk->queue->queue_lock);
161
162 finish_wait(&vblk->queue_wait, &wait);
163 }
164
165 virtqueue_kick(vblk->vq);
166 spin_unlock_irq(vblk->disk->queue->queue_lock);
167}
168
169static void virtblk_bio_send_flush(struct virtblk_req *vbr)
170{
171 vbr->flags |= VBLK_IS_FLUSH;
172 vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
173 vbr->out_hdr.sector = 0;
174 vbr->out_hdr.ioprio = 0;
175
176 virtblk_add_req(vbr, false);
177}
178
179static void virtblk_bio_send_data(struct virtblk_req *vbr)
180{
181 struct virtio_blk *vblk = vbr->vblk;
182 struct bio *bio = vbr->bio;
183 bool have_data;
184
185 vbr->flags &= ~VBLK_IS_FLUSH;
186 vbr->out_hdr.type = 0;
187 vbr->out_hdr.sector = bio->bi_sector;
188 vbr->out_hdr.ioprio = bio_prio(bio);
189
190 if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) {
191 have_data = true;
192 if (bio->bi_rw & REQ_WRITE)
193 vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
194 else
195 vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
196 } else
197 have_data = false;
198
199 virtblk_add_req(vbr, have_data);
200}
201
202static void virtblk_bio_send_data_work(struct work_struct *work)
203{
204 struct virtblk_req *vbr;
205
206 vbr = container_of(work, struct virtblk_req, work);
207
208 virtblk_bio_send_data(vbr);
209}
210
211static void virtblk_bio_send_flush_work(struct work_struct *work)
212{
213 struct virtblk_req *vbr;
214
215 vbr = container_of(work, struct virtblk_req, work);
216
217 virtblk_bio_send_flush(vbr);
218}
219
220static inline void virtblk_request_done(struct virtblk_req *vbr) 113static inline void virtblk_request_done(struct virtblk_req *vbr)
221{ 114{
222 struct virtio_blk *vblk = vbr->vblk;
223 struct request *req = vbr->req; 115 struct request *req = vbr->req;
224 int error = virtblk_result(vbr); 116 int error = virtblk_result(vbr);
225 117
@@ -231,90 +123,43 @@ static inline void virtblk_request_done(struct virtblk_req *vbr)
231 req->errors = (error != 0); 123 req->errors = (error != 0);
232 } 124 }
233 125
234 __blk_end_request_all(req, error); 126 blk_mq_end_io(req, error);
235 mempool_free(vbr, vblk->pool);
236}
237
238static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
239{
240 struct virtio_blk *vblk = vbr->vblk;
241
242 if (vbr->flags & VBLK_REQ_DATA) {
243 /* Send out the actual write data */
244 INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
245 queue_work(virtblk_wq, &vbr->work);
246 } else {
247 bio_endio(vbr->bio, virtblk_result(vbr));
248 mempool_free(vbr, vblk->pool);
249 }
250}
251
252static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
253{
254 struct virtio_blk *vblk = vbr->vblk;
255
256 if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
257 /* Send out a flush before end the bio */
258 vbr->flags &= ~VBLK_REQ_DATA;
259 INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
260 queue_work(virtblk_wq, &vbr->work);
261 } else {
262 bio_endio(vbr->bio, virtblk_result(vbr));
263 mempool_free(vbr, vblk->pool);
264 }
265}
266
267static inline void virtblk_bio_done(struct virtblk_req *vbr)
268{
269 if (unlikely(vbr->flags & VBLK_IS_FLUSH))
270 virtblk_bio_flush_done(vbr);
271 else
272 virtblk_bio_data_done(vbr);
273} 127}
274 128
275static void virtblk_done(struct virtqueue *vq) 129static void virtblk_done(struct virtqueue *vq)
276{ 130{
277 struct virtio_blk *vblk = vq->vdev->priv; 131 struct virtio_blk *vblk = vq->vdev->priv;
278 bool bio_done = false, req_done = false; 132 bool req_done = false;
279 struct virtblk_req *vbr; 133 struct virtblk_req *vbr;
280 unsigned long flags; 134 unsigned long flags;
281 unsigned int len; 135 unsigned int len;
282 136
283 spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); 137 spin_lock_irqsave(&vblk->vq_lock, flags);
284 do { 138 do {
285 virtqueue_disable_cb(vq); 139 virtqueue_disable_cb(vq);
286 while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { 140 while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
287 if (vbr->bio) { 141 virtblk_request_done(vbr);
288 virtblk_bio_done(vbr); 142 req_done = true;
289 bio_done = true;
290 } else {
291 virtblk_request_done(vbr);
292 req_done = true;
293 }
294 } 143 }
295 } while (!virtqueue_enable_cb(vq)); 144 } while (!virtqueue_enable_cb(vq));
145 spin_unlock_irqrestore(&vblk->vq_lock, flags);
146
296 /* In case queue is stopped waiting for more buffers. */ 147 /* In case queue is stopped waiting for more buffers. */
297 if (req_done) 148 if (req_done)
298 blk_start_queue(vblk->disk->queue); 149 blk_mq_start_stopped_hw_queues(vblk->disk->queue);
299 spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
300
301 if (bio_done)
302 wake_up(&vblk->queue_wait);
303} 150}
304 151
305static bool do_req(struct request_queue *q, struct virtio_blk *vblk, 152static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
306 struct request *req)
307{ 153{
154 struct virtio_blk *vblk = hctx->queue->queuedata;
155 struct virtblk_req *vbr = req->special;
156 unsigned long flags;
308 unsigned int num; 157 unsigned int num;
309 struct virtblk_req *vbr; 158 const bool last = (req->cmd_flags & REQ_END) != 0;
310 159
311 vbr = virtblk_alloc_req(vblk, GFP_ATOMIC); 160 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
312 if (!vbr)
313 /* When another request finishes we'll try again. */
314 return false;
315 161
316 vbr->req = req; 162 vbr->req = req;
317 vbr->bio = NULL;
318 if (req->cmd_flags & REQ_FLUSH) { 163 if (req->cmd_flags & REQ_FLUSH) {
319 vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; 164 vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
320 vbr->out_hdr.sector = 0; 165 vbr->out_hdr.sector = 0;
@@ -342,7 +187,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
342 } 187 }
343 } 188 }
344 189
345 num = blk_rq_map_sg(q, vbr->req, vblk->sg); 190 num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
346 if (num) { 191 if (num) {
347 if (rq_data_dir(vbr->req) == WRITE) 192 if (rq_data_dir(vbr->req) == WRITE)
348 vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; 193 vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
@@ -350,63 +195,18 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
350 vbr->out_hdr.type |= VIRTIO_BLK_T_IN; 195 vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
351 } 196 }
352 197
353 if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) { 198 spin_lock_irqsave(&vblk->vq_lock, flags);
354 mempool_free(vbr, vblk->pool); 199 if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
355 return false; 200 spin_unlock_irqrestore(&vblk->vq_lock, flags);
356 } 201 blk_mq_stop_hw_queue(hctx);
357
358 return true;
359}
360
361static void virtblk_request(struct request_queue *q)
362{
363 struct virtio_blk *vblk = q->queuedata;
364 struct request *req;
365 unsigned int issued = 0;
366
367 while ((req = blk_peek_request(q)) != NULL) {
368 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
369
370 /* If this request fails, stop queue and wait for something to
371 finish to restart it. */
372 if (!do_req(q, vblk, req)) {
373 blk_stop_queue(q);
374 break;
375 }
376 blk_start_request(req);
377 issued++;
378 }
379
380 if (issued)
381 virtqueue_kick(vblk->vq); 202 virtqueue_kick(vblk->vq);
382} 203 return BLK_MQ_RQ_QUEUE_BUSY;
383
384static void virtblk_make_request(struct request_queue *q, struct bio *bio)
385{
386 struct virtio_blk *vblk = q->queuedata;
387 struct virtblk_req *vbr;
388
389 BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
390
391 vbr = virtblk_alloc_req(vblk, GFP_NOIO);
392 if (!vbr) {
393 bio_endio(bio, -ENOMEM);
394 return;
395 } 204 }
205 spin_unlock_irqrestore(&vblk->vq_lock, flags);
396 206
397 vbr->bio = bio; 207 if (last)
398 vbr->flags = 0; 208 virtqueue_kick(vblk->vq);
399 if (bio->bi_rw & REQ_FLUSH) 209 return BLK_MQ_RQ_QUEUE_OK;
400 vbr->flags |= VBLK_REQ_FLUSH;
401 if (bio->bi_rw & REQ_FUA)
402 vbr->flags |= VBLK_REQ_FUA;
403 if (bio->bi_size)
404 vbr->flags |= VBLK_REQ_DATA;
405
406 if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
407 virtblk_bio_send_flush(vbr);
408 else
409 virtblk_bio_send_data(vbr);
410} 210}
411 211
412/* return id (s/n) string for *disk to *id_str 212/* return id (s/n) string for *disk to *id_str
@@ -680,12 +480,35 @@ static const struct device_attribute dev_attr_cache_type_rw =
680 __ATTR(cache_type, S_IRUGO|S_IWUSR, 480 __ATTR(cache_type, S_IRUGO|S_IWUSR,
681 virtblk_cache_type_show, virtblk_cache_type_store); 481 virtblk_cache_type_show, virtblk_cache_type_store);
682 482
483static struct blk_mq_ops virtio_mq_ops = {
484 .queue_rq = virtio_queue_rq,
485 .map_queue = blk_mq_map_queue,
486 .alloc_hctx = blk_mq_alloc_single_hw_queue,
487 .free_hctx = blk_mq_free_single_hw_queue,
488};
489
490static struct blk_mq_reg virtio_mq_reg = {
491 .ops = &virtio_mq_ops,
492 .nr_hw_queues = 1,
493 .queue_depth = 64,
494 .numa_node = NUMA_NO_NODE,
495 .flags = BLK_MQ_F_SHOULD_MERGE,
496};
497
498static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
499 struct request *rq, unsigned int nr)
500{
501 struct virtio_blk *vblk = data;
502 struct virtblk_req *vbr = rq->special;
503
504 sg_init_table(vbr->sg, vblk->sg_elems);
505}
506
683static int virtblk_probe(struct virtio_device *vdev) 507static int virtblk_probe(struct virtio_device *vdev)
684{ 508{
685 struct virtio_blk *vblk; 509 struct virtio_blk *vblk;
686 struct request_queue *q; 510 struct request_queue *q;
687 int err, index; 511 int err, index;
688 int pool_size;
689 512
690 u64 cap; 513 u64 cap;
691 u32 v, blk_size, sg_elems, opt_io_size; 514 u32 v, blk_size, sg_elems, opt_io_size;
@@ -709,17 +532,14 @@ static int virtblk_probe(struct virtio_device *vdev)
709 532
710 /* We need an extra sg elements at head and tail. */ 533 /* We need an extra sg elements at head and tail. */
711 sg_elems += 2; 534 sg_elems += 2;
712 vdev->priv = vblk = kmalloc(sizeof(*vblk) + 535 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
713 sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
714 if (!vblk) { 536 if (!vblk) {
715 err = -ENOMEM; 537 err = -ENOMEM;
716 goto out_free_index; 538 goto out_free_index;
717 } 539 }
718 540
719 init_waitqueue_head(&vblk->queue_wait);
720 vblk->vdev = vdev; 541 vblk->vdev = vdev;
721 vblk->sg_elems = sg_elems; 542 vblk->sg_elems = sg_elems;
722 sg_init_table(vblk->sg, vblk->sg_elems);
723 mutex_init(&vblk->config_lock); 543 mutex_init(&vblk->config_lock);
724 544
725 INIT_WORK(&vblk->config_work, virtblk_config_changed_work); 545 INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
@@ -728,31 +548,27 @@ static int virtblk_probe(struct virtio_device *vdev)
728 err = init_vq(vblk); 548 err = init_vq(vblk);
729 if (err) 549 if (err)
730 goto out_free_vblk; 550 goto out_free_vblk;
731 551 spin_lock_init(&vblk->vq_lock);
732 pool_size = sizeof(struct virtblk_req);
733 if (use_bio)
734 pool_size += sizeof(struct scatterlist) * sg_elems;
735 vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
736 if (!vblk->pool) {
737 err = -ENOMEM;
738 goto out_free_vq;
739 }
740 552
741 /* FIXME: How many partitions? How long is a piece of string? */ 553 /* FIXME: How many partitions? How long is a piece of string? */
742 vblk->disk = alloc_disk(1 << PART_BITS); 554 vblk->disk = alloc_disk(1 << PART_BITS);
743 if (!vblk->disk) { 555 if (!vblk->disk) {
744 err = -ENOMEM; 556 err = -ENOMEM;
745 goto out_mempool; 557 goto out_free_vq;
746 } 558 }
747 559
748 q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL); 560 virtio_mq_reg.cmd_size =
561 sizeof(struct virtblk_req) +
562 sizeof(struct scatterlist) * sg_elems;
563
564 q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
749 if (!q) { 565 if (!q) {
750 err = -ENOMEM; 566 err = -ENOMEM;
751 goto out_put_disk; 567 goto out_put_disk;
752 } 568 }
753 569
754 if (use_bio) 570 blk_mq_init_commands(q, virtblk_init_vbr, vblk);
755 blk_queue_make_request(q, virtblk_make_request); 571
756 q->queuedata = vblk; 572 q->queuedata = vblk;
757 573
758 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); 574 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
@@ -857,8 +673,6 @@ out_del_disk:
857 blk_cleanup_queue(vblk->disk->queue); 673 blk_cleanup_queue(vblk->disk->queue);
858out_put_disk: 674out_put_disk:
859 put_disk(vblk->disk); 675 put_disk(vblk->disk);
860out_mempool:
861 mempool_destroy(vblk->pool);
862out_free_vq: 676out_free_vq:
863 vdev->config->del_vqs(vdev); 677 vdev->config->del_vqs(vdev);
864out_free_vblk: 678out_free_vblk:
@@ -890,7 +704,6 @@ static void virtblk_remove(struct virtio_device *vdev)
890 704
891 refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); 705 refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
892 put_disk(vblk->disk); 706 put_disk(vblk->disk);
893 mempool_destroy(vblk->pool);
894 vdev->config->del_vqs(vdev); 707 vdev->config->del_vqs(vdev);
895 kfree(vblk); 708 kfree(vblk);
896 709
@@ -914,10 +727,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
914 727
915 flush_work(&vblk->config_work); 728 flush_work(&vblk->config_work);
916 729
917 spin_lock_irq(vblk->disk->queue->queue_lock); 730 blk_mq_stop_hw_queues(vblk->disk->queue);
918 blk_stop_queue(vblk->disk->queue);
919 spin_unlock_irq(vblk->disk->queue->queue_lock);
920 blk_sync_queue(vblk->disk->queue);
921 731
922 vdev->config->del_vqs(vdev); 732 vdev->config->del_vqs(vdev);
923 return 0; 733 return 0;
@@ -930,11 +740,9 @@ static int virtblk_restore(struct virtio_device *vdev)
930 740
931 vblk->config_enable = true; 741 vblk->config_enable = true;
932 ret = init_vq(vdev->priv); 742 ret = init_vq(vdev->priv);
933 if (!ret) { 743 if (!ret)
934 spin_lock_irq(vblk->disk->queue->queue_lock); 744 blk_mq_start_stopped_hw_queues(vblk->disk->queue);
935 blk_start_queue(vblk->disk->queue); 745
936 spin_unlock_irq(vblk->disk->queue->queue_lock);
937 }
938 return ret; 746 return ret;
939} 747}
940#endif 748#endif