aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2011-10-20 09:10:59 -0400
committerJens Axboe <axboe@kernel.dk>2011-10-20 09:10:59 -0400
commitb8d8bdfe31a67981bbc398a4886ccc67aff521d5 (patch)
treeb9e123aaad821d251f12da495a7eb46d87985603 /drivers/block
parent4c823cc3d568277aa6340d8df6981e34f4c4dee5 (diff)
parent6927d92091df2848fc0e6a693a017d4b2df549c2 (diff)
Merge branch 'stable/for-jens-3.2' of git://oss.oracle.com/git/kwilk/xen into for-3.2/drivers
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/xen-blkback/blkback.c130
-rw-r--r--drivers/block/xen-blkback/common.h100
-rw-r--r--drivers/block/xen-blkback/xenbus.c80
-rw-r--r--drivers/block/xen-blkfront.c123
4 files changed, 367 insertions, 66 deletions
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 2330a9ad5e95..79efec24569b 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -39,6 +39,9 @@
39#include <linux/list.h> 39#include <linux/list.h>
40#include <linux/delay.h> 40#include <linux/delay.h>
41#include <linux/freezer.h> 41#include <linux/freezer.h>
42#include <linux/loop.h>
43#include <linux/falloc.h>
44#include <linux/fs.h>
42 45
43#include <xen/events.h> 46#include <xen/events.h>
44#include <xen/page.h> 47#include <xen/page.h>
@@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
258 261
259static void print_stats(struct xen_blkif *blkif) 262static void print_stats(struct xen_blkif *blkif)
260{ 263{
261 pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", 264 pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d"
265 " | ds %4d\n",
262 current->comm, blkif->st_oo_req, 266 current->comm, blkif->st_oo_req,
263 blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); 267 blkif->st_rd_req, blkif->st_wr_req,
268 blkif->st_f_req, blkif->st_ds_req);
264 blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); 269 blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
265 blkif->st_rd_req = 0; 270 blkif->st_rd_req = 0;
266 blkif->st_wr_req = 0; 271 blkif->st_wr_req = 0;
267 blkif->st_oo_req = 0; 272 blkif->st_oo_req = 0;
273 blkif->st_ds_req = 0;
268} 274}
269 275
270int xen_blkif_schedule(void *arg) 276int xen_blkif_schedule(void *arg)
@@ -410,6 +416,59 @@ static int xen_blkbk_map(struct blkif_request *req,
410 return ret; 416 return ret;
411} 417}
412 418
419static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req)
420{
421 int err = 0;
422 int status = BLKIF_RSP_OKAY;
423 struct block_device *bdev = blkif->vbd.bdev;
424
425 if (blkif->blk_backend_type == BLKIF_BACKEND_PHY)
426 /* just forward the discard request */
427 err = blkdev_issue_discard(bdev,
428 req->u.discard.sector_number,
429 req->u.discard.nr_sectors,
430 GFP_KERNEL, 0);
431 else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) {
432 /* punch a hole in the backing file */
433 struct loop_device *lo = bdev->bd_disk->private_data;
434 struct file *file = lo->lo_backing_file;
435
436 if (file->f_op->fallocate)
437 err = file->f_op->fallocate(file,
438 FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
439 req->u.discard.sector_number << 9,
440 req->u.discard.nr_sectors << 9);
441 else
442 err = -EOPNOTSUPP;
443 } else
444 err = -EOPNOTSUPP;
445
446 if (err == -EOPNOTSUPP) {
447 pr_debug(DRV_PFX "discard op failed, not supported\n");
448 status = BLKIF_RSP_EOPNOTSUPP;
449 } else if (err)
450 status = BLKIF_RSP_ERROR;
451
452 make_response(blkif, req->id, req->operation, status);
453}
454
455static void xen_blk_drain_io(struct xen_blkif *blkif)
456{
457 atomic_set(&blkif->drain, 1);
458 do {
459 /* The initial value is one, and one refcnt taken at the
460 * start of the xen_blkif_schedule thread. */
461 if (atomic_read(&blkif->refcnt) <= 2)
462 break;
463 wait_for_completion_interruptible_timeout(
464 &blkif->drain_complete, HZ);
465
466 if (!atomic_read(&blkif->drain))
467 break;
468 } while (!kthread_should_stop());
469 atomic_set(&blkif->drain, 0);
470}
471
413/* 472/*
414 * Completion callback on the bio's. Called as bh->b_end_io() 473 * Completion callback on the bio's. Called as bh->b_end_io()
415 */ 474 */
@@ -422,6 +481,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
422 pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); 481 pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
423 xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); 482 xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
424 pending_req->status = BLKIF_RSP_EOPNOTSUPP; 483 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
484 } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
485 (error == -EOPNOTSUPP)) {
486 pr_debug(DRV_PFX "write barrier op failed, not supported\n");
487 xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
488 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
425 } else if (error) { 489 } else if (error) {
426 pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," 490 pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
427 " error=%d\n", error); 491 " error=%d\n", error);
@@ -438,6 +502,10 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
438 make_response(pending_req->blkif, pending_req->id, 502 make_response(pending_req->blkif, pending_req->id,
439 pending_req->operation, pending_req->status); 503 pending_req->operation, pending_req->status);
440 xen_blkif_put(pending_req->blkif); 504 xen_blkif_put(pending_req->blkif);
505 if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
506 if (atomic_read(&pending_req->blkif->drain))
507 complete(&pending_req->blkif->drain_complete);
508 }
441 free_req(pending_req); 509 free_req(pending_req);
442 } 510 }
443} 511}
@@ -532,7 +600,6 @@ do_block_io_op(struct xen_blkif *blkif)
532 600
533 return more_to_do; 601 return more_to_do;
534} 602}
535
536/* 603/*
537 * Transmutation of the 'struct blkif_request' to a proper 'struct bio' 604 * Transmutation of the 'struct blkif_request' to a proper 'struct bio'
538 * and call the 'submit_bio' to pass it to the underlying storage. 605 * and call the 'submit_bio' to pass it to the underlying storage.
@@ -549,6 +616,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
549 int i, nbio = 0; 616 int i, nbio = 0;
550 int operation; 617 int operation;
551 struct blk_plug plug; 618 struct blk_plug plug;
619 bool drain = false;
552 620
553 switch (req->operation) { 621 switch (req->operation) {
554 case BLKIF_OP_READ: 622 case BLKIF_OP_READ:
@@ -559,11 +627,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
559 blkif->st_wr_req++; 627 blkif->st_wr_req++;
560 operation = WRITE_ODIRECT; 628 operation = WRITE_ODIRECT;
561 break; 629 break;
630 case BLKIF_OP_WRITE_BARRIER:
631 drain = true;
562 case BLKIF_OP_FLUSH_DISKCACHE: 632 case BLKIF_OP_FLUSH_DISKCACHE:
563 blkif->st_f_req++; 633 blkif->st_f_req++;
564 operation = WRITE_FLUSH; 634 operation = WRITE_FLUSH;
565 break; 635 break;
566 case BLKIF_OP_WRITE_BARRIER: 636 case BLKIF_OP_DISCARD:
637 blkif->st_ds_req++;
638 operation = REQ_DISCARD;
639 break;
567 default: 640 default:
568 operation = 0; /* make gcc happy */ 641 operation = 0; /* make gcc happy */
569 goto fail_response; 642 goto fail_response;
@@ -572,7 +645,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
572 645
573 /* Check that the number of segments is sane. */ 646 /* Check that the number of segments is sane. */
574 nseg = req->nr_segments; 647 nseg = req->nr_segments;
575 if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || 648 if (unlikely(nseg == 0 && operation != WRITE_FLUSH &&
649 operation != REQ_DISCARD) ||
576 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { 650 unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
577 pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", 651 pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
578 nseg); 652 nseg);
@@ -621,16 +695,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
621 } 695 }
622 } 696 }
623 697
698 /* Wait on all outstanding I/O's and once that has been completed
699 * issue the WRITE_FLUSH.
700 */
701 if (drain)
702 xen_blk_drain_io(pending_req->blkif);
703
624 /* 704 /*
625 * If we have failed at this point, we need to undo the M2P override, 705 * If we have failed at this point, we need to undo the M2P override,
626 * set gnttab_set_unmap_op on all of the grant references and perform 706 * set gnttab_set_unmap_op on all of the grant references and perform
627 * the hypercall to unmap the grants - that is all done in 707 * the hypercall to unmap the grants - that is all done in
628 * xen_blkbk_unmap. 708 * xen_blkbk_unmap.
629 */ 709 */
630 if (xen_blkbk_map(req, pending_req, seg)) 710 if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg))
631 goto fail_flush; 711 goto fail_flush;
632 712
633 /* This corresponding xen_blkif_put is done in __end_block_io_op */ 713 /*
714 * This corresponding xen_blkif_put is done in __end_block_io_op, or
715 * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
716 */
634 xen_blkif_get(blkif); 717 xen_blkif_get(blkif);
635 718
636 for (i = 0; i < nseg; i++) { 719 for (i = 0; i < nseg; i++) {
@@ -654,18 +737,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
654 preq.sector_number += seg[i].nsec; 737 preq.sector_number += seg[i].nsec;
655 } 738 }
656 739
657 /* This will be hit if the operation was a flush. */ 740 /* This will be hit if the operation was a flush or discard. */
658 if (!bio) { 741 if (!bio) {
659 BUG_ON(operation != WRITE_FLUSH); 742 BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD);
660 743
661 bio = bio_alloc(GFP_KERNEL, 0); 744 if (operation == WRITE_FLUSH) {
662 if (unlikely(bio == NULL)) 745 bio = bio_alloc(GFP_KERNEL, 0);
663 goto fail_put_bio; 746 if (unlikely(bio == NULL))
747 goto fail_put_bio;
664 748
665 biolist[nbio++] = bio; 749 biolist[nbio++] = bio;
666 bio->bi_bdev = preq.bdev; 750 bio->bi_bdev = preq.bdev;
667 bio->bi_private = pending_req; 751 bio->bi_private = pending_req;
668 bio->bi_end_io = end_block_io_op; 752 bio->bi_end_io = end_block_io_op;
753 } else if (operation == REQ_DISCARD) {
754 xen_blk_discard(blkif, req);
755 xen_blkif_put(blkif);
756 free_req(pending_req);
757 return 0;
758 }
669 } 759 }
670 760
671 /* 761 /*
@@ -685,7 +775,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
685 775
686 if (operation == READ) 776 if (operation == READ)
687 blkif->st_rd_sect += preq.nr_sects; 777 blkif->st_rd_sect += preq.nr_sects;
688 else if (operation == WRITE || operation == WRITE_FLUSH) 778 else if (operation & WRITE)
689 blkif->st_wr_sect += preq.nr_sects; 779 blkif->st_wr_sect += preq.nr_sects;
690 780
691 return 0; 781 return 0;
@@ -765,9 +855,9 @@ static int __init xen_blkif_init(void)
765 855
766 mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; 856 mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
767 857
768 blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * 858 blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) *
769 xen_blkif_reqs, GFP_KERNEL); 859 xen_blkif_reqs, GFP_KERNEL);
770 blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * 860 blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
771 mmap_pages, GFP_KERNEL); 861 mmap_pages, GFP_KERNEL);
772 blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * 862 blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) *
773 mmap_pages, GFP_KERNEL); 863 mmap_pages, GFP_KERNEL);
@@ -790,8 +880,6 @@ static int __init xen_blkif_init(void)
790 if (rc) 880 if (rc)
791 goto failed_init; 881 goto failed_init;
792 882
793 memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs));
794
795 INIT_LIST_HEAD(&blkbk->pending_free); 883 INIT_LIST_HEAD(&blkbk->pending_free);
796 spin_lock_init(&blkbk->pending_free_lock); 884 spin_lock_init(&blkbk->pending_free_lock);
797 init_waitqueue_head(&blkbk->pending_free_wq); 885 init_waitqueue_head(&blkbk->pending_free_wq);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 9e40b283a468..e638457d9de4 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -46,7 +46,7 @@
46 46
47#define DRV_PFX "xen-blkback:" 47#define DRV_PFX "xen-blkback:"
48#define DPRINTK(fmt, args...) \ 48#define DPRINTK(fmt, args...) \
49 pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ 49 pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \
50 __func__, __LINE__, ##args) 50 __func__, __LINE__, ##args)
51 51
52 52
@@ -63,13 +63,26 @@ struct blkif_common_response {
63 63
64/* i386 protocol version */ 64/* i386 protocol version */
65#pragma pack(push, 4) 65#pragma pack(push, 4)
66
67struct blkif_x86_32_request_rw {
68 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
69 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
70};
71
72struct blkif_x86_32_request_discard {
73 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
74 uint64_t nr_sectors;
75};
76
66struct blkif_x86_32_request { 77struct blkif_x86_32_request {
67 uint8_t operation; /* BLKIF_OP_??? */ 78 uint8_t operation; /* BLKIF_OP_??? */
68 uint8_t nr_segments; /* number of segments */ 79 uint8_t nr_segments; /* number of segments */
69 blkif_vdev_t handle; /* only for read/write requests */ 80 blkif_vdev_t handle; /* only for read/write requests */
70 uint64_t id; /* private guest value, echoed in resp */ 81 uint64_t id; /* private guest value, echoed in resp */
71 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 82 union {
72 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 83 struct blkif_x86_32_request_rw rw;
84 struct blkif_x86_32_request_discard discard;
85 } u;
73}; 86};
74struct blkif_x86_32_response { 87struct blkif_x86_32_response {
75 uint64_t id; /* copied from request */ 88 uint64_t id; /* copied from request */
@@ -79,13 +92,26 @@ struct blkif_x86_32_response {
79#pragma pack(pop) 92#pragma pack(pop)
80 93
81/* x86_64 protocol version */ 94/* x86_64 protocol version */
95
96struct blkif_x86_64_request_rw {
97 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
98 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
99};
100
101struct blkif_x86_64_request_discard {
102 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
103 uint64_t nr_sectors;
104};
105
82struct blkif_x86_64_request { 106struct blkif_x86_64_request {
83 uint8_t operation; /* BLKIF_OP_??? */ 107 uint8_t operation; /* BLKIF_OP_??? */
84 uint8_t nr_segments; /* number of segments */ 108 uint8_t nr_segments; /* number of segments */
85 blkif_vdev_t handle; /* only for read/write requests */ 109 blkif_vdev_t handle; /* only for read/write requests */
86 uint64_t __attribute__((__aligned__(8))) id; 110 uint64_t __attribute__((__aligned__(8))) id;
87 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 111 union {
88 struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 112 struct blkif_x86_64_request_rw rw;
113 struct blkif_x86_64_request_discard discard;
114 } u;
89}; 115};
90struct blkif_x86_64_response { 116struct blkif_x86_64_response {
91 uint64_t __attribute__((__aligned__(8))) id; 117 uint64_t __attribute__((__aligned__(8))) id;
@@ -113,6 +139,11 @@ enum blkif_protocol {
113 BLKIF_PROTOCOL_X86_64 = 3, 139 BLKIF_PROTOCOL_X86_64 = 3,
114}; 140};
115 141
142enum blkif_backend_type {
143 BLKIF_BACKEND_PHY = 1,
144 BLKIF_BACKEND_FILE = 2,
145};
146
116struct xen_vbd { 147struct xen_vbd {
117 /* What the domain refers to this vbd as. */ 148 /* What the domain refers to this vbd as. */
118 blkif_vdev_t handle; 149 blkif_vdev_t handle;
@@ -138,6 +169,7 @@ struct xen_blkif {
138 unsigned int irq; 169 unsigned int irq;
139 /* Comms information. */ 170 /* Comms information. */
140 enum blkif_protocol blk_protocol; 171 enum blkif_protocol blk_protocol;
172 enum blkif_backend_type blk_backend_type;
141 union blkif_back_rings blk_rings; 173 union blkif_back_rings blk_rings;
142 struct vm_struct *blk_ring_area; 174 struct vm_struct *blk_ring_area;
143 /* The VBD attached to this interface. */ 175 /* The VBD attached to this interface. */
@@ -149,6 +181,9 @@ struct xen_blkif {
149 atomic_t refcnt; 181 atomic_t refcnt;
150 182
151 wait_queue_head_t wq; 183 wait_queue_head_t wq;
184 /* for barrier (drain) requests */
185 struct completion drain_complete;
186 atomic_t drain;
152 /* One thread per one blkif. */ 187 /* One thread per one blkif. */
153 struct task_struct *xenblkd; 188 struct task_struct *xenblkd;
154 unsigned int waiting_reqs; 189 unsigned int waiting_reqs;
@@ -159,6 +194,7 @@ struct xen_blkif {
159 int st_wr_req; 194 int st_wr_req;
160 int st_oo_req; 195 int st_oo_req;
161 int st_f_req; 196 int st_f_req;
197 int st_ds_req;
162 int st_rd_sect; 198 int st_rd_sect;
163 int st_wr_sect; 199 int st_wr_sect;
164 200
@@ -182,7 +218,7 @@ struct xen_blkif {
182 218
183struct phys_req { 219struct phys_req {
184 unsigned short dev; 220 unsigned short dev;
185 unsigned short nr_sects; 221 blkif_sector_t nr_sects;
186 struct block_device *bdev; 222 struct block_device *bdev;
187 blkif_sector_t sector_number; 223 blkif_sector_t sector_number;
188}; 224};
@@ -196,6 +232,8 @@ int xen_blkif_schedule(void *arg);
196int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, 232int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
197 struct backend_info *be, int state); 233 struct backend_info *be, int state);
198 234
235int xen_blkbk_barrier(struct xenbus_transaction xbt,
236 struct backend_info *be, int state);
199struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); 237struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
200 238
201static inline void blkif_get_x86_32_req(struct blkif_request *dst, 239static inline void blkif_get_x86_32_req(struct blkif_request *dst,
@@ -206,12 +244,25 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
206 dst->nr_segments = src->nr_segments; 244 dst->nr_segments = src->nr_segments;
207 dst->handle = src->handle; 245 dst->handle = src->handle;
208 dst->id = src->id; 246 dst->id = src->id;
209 dst->u.rw.sector_number = src->sector_number; 247 switch (src->operation) {
210 barrier(); 248 case BLKIF_OP_READ:
211 if (n > dst->nr_segments) 249 case BLKIF_OP_WRITE:
212 n = dst->nr_segments; 250 case BLKIF_OP_WRITE_BARRIER:
213 for (i = 0; i < n; i++) 251 case BLKIF_OP_FLUSH_DISKCACHE:
214 dst->u.rw.seg[i] = src->seg[i]; 252 dst->u.rw.sector_number = src->u.rw.sector_number;
253 barrier();
254 if (n > dst->nr_segments)
255 n = dst->nr_segments;
256 for (i = 0; i < n; i++)
257 dst->u.rw.seg[i] = src->u.rw.seg[i];
258 break;
259 case BLKIF_OP_DISCARD:
260 dst->u.discard.sector_number = src->u.discard.sector_number;
261 dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
262 break;
263 default:
264 break;
265 }
215} 266}
216 267
217static inline void blkif_get_x86_64_req(struct blkif_request *dst, 268static inline void blkif_get_x86_64_req(struct blkif_request *dst,
@@ -222,12 +273,25 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
222 dst->nr_segments = src->nr_segments; 273 dst->nr_segments = src->nr_segments;
223 dst->handle = src->handle; 274 dst->handle = src->handle;
224 dst->id = src->id; 275 dst->id = src->id;
225 dst->u.rw.sector_number = src->sector_number; 276 switch (src->operation) {
226 barrier(); 277 case BLKIF_OP_READ:
227 if (n > dst->nr_segments) 278 case BLKIF_OP_WRITE:
228 n = dst->nr_segments; 279 case BLKIF_OP_WRITE_BARRIER:
229 for (i = 0; i < n; i++) 280 case BLKIF_OP_FLUSH_DISKCACHE:
230 dst->u.rw.seg[i] = src->seg[i]; 281 dst->u.rw.sector_number = src->u.rw.sector_number;
282 barrier();
283 if (n > dst->nr_segments)
284 n = dst->nr_segments;
285 for (i = 0; i < n; i++)
286 dst->u.rw.seg[i] = src->u.rw.seg[i];
287 break;
288 case BLKIF_OP_DISCARD:
289 dst->u.discard.sector_number = src->u.discard.sector_number;
290 dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
291 break;
292 default:
293 break;
294 }
231} 295}
232 296
233#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */ 297#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 3f129b45451a..a6d43030b107 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -114,6 +114,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
114 spin_lock_init(&blkif->blk_ring_lock); 114 spin_lock_init(&blkif->blk_ring_lock);
115 atomic_set(&blkif->refcnt, 1); 115 atomic_set(&blkif->refcnt, 1);
116 init_waitqueue_head(&blkif->wq); 116 init_waitqueue_head(&blkif->wq);
117 init_completion(&blkif->drain_complete);
118 atomic_set(&blkif->drain, 0);
117 blkif->st_print = jiffies; 119 blkif->st_print = jiffies;
118 init_waitqueue_head(&blkif->waiting_to_free); 120 init_waitqueue_head(&blkif->waiting_to_free);
119 121
@@ -272,6 +274,7 @@ VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
272VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); 274VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
273VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); 275VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
274VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); 276VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req);
277VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req);
275VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); 278VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
276VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); 279VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
277 280
@@ -280,6 +283,7 @@ static struct attribute *xen_vbdstat_attrs[] = {
280 &dev_attr_rd_req.attr, 283 &dev_attr_rd_req.attr,
281 &dev_attr_wr_req.attr, 284 &dev_attr_wr_req.attr,
282 &dev_attr_f_req.attr, 285 &dev_attr_f_req.attr,
286 &dev_attr_ds_req.attr,
283 &dev_attr_rd_sect.attr, 287 &dev_attr_rd_sect.attr,
284 &dev_attr_wr_sect.attr, 288 &dev_attr_wr_sect.attr,
285 NULL 289 NULL
@@ -419,6 +423,73 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
419 return err; 423 return err;
420} 424}
421 425
426int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
427{
428 struct xenbus_device *dev = be->dev;
429 struct xen_blkif *blkif = be->blkif;
430 char *type;
431 int err;
432 int state = 0;
433
434 type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL);
435 if (!IS_ERR(type)) {
436 if (strncmp(type, "file", 4) == 0) {
437 state = 1;
438 blkif->blk_backend_type = BLKIF_BACKEND_FILE;
439 }
440 if (strncmp(type, "phy", 3) == 0) {
441 struct block_device *bdev = be->blkif->vbd.bdev;
442 struct request_queue *q = bdev_get_queue(bdev);
443 if (blk_queue_discard(q)) {
444 err = xenbus_printf(xbt, dev->nodename,
445 "discard-granularity", "%u",
446 q->limits.discard_granularity);
447 if (err) {
448 xenbus_dev_fatal(dev, err,
449 "writing discard-granularity");
450 goto kfree;
451 }
452 err = xenbus_printf(xbt, dev->nodename,
453 "discard-alignment", "%u",
454 q->limits.discard_alignment);
455 if (err) {
456 xenbus_dev_fatal(dev, err,
457 "writing discard-alignment");
458 goto kfree;
459 }
460 state = 1;
461 blkif->blk_backend_type = BLKIF_BACKEND_PHY;
462 }
463 }
464 } else {
465 err = PTR_ERR(type);
466 xenbus_dev_fatal(dev, err, "reading type");
467 goto out;
468 }
469
470 err = xenbus_printf(xbt, dev->nodename, "feature-discard",
471 "%d", state);
472 if (err)
473 xenbus_dev_fatal(dev, err, "writing feature-discard");
474kfree:
475 kfree(type);
476out:
477 return err;
478}
479int xen_blkbk_barrier(struct xenbus_transaction xbt,
480 struct backend_info *be, int state)
481{
482 struct xenbus_device *dev = be->dev;
483 int err;
484
485 err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
486 "%d", state);
487 if (err)
488 xenbus_dev_fatal(dev, err, "writing feature-barrier");
489
490 return err;
491}
492
422/* 493/*
423 * Entry point to this code when a new device is created. Allocate the basic 494 * Entry point to this code when a new device is created. Allocate the basic
424 * structures, and watch the store waiting for the hotplug scripts to tell us 495 * structures, and watch the store waiting for the hotplug scripts to tell us
@@ -590,7 +661,7 @@ static void frontend_changed(struct xenbus_device *dev,
590 661
591 /* 662 /*
592 * Enforce precondition before potential leak point. 663 * Enforce precondition before potential leak point.
593 * blkif_disconnect() is idempotent. 664 * xen_blkif_disconnect() is idempotent.
594 */ 665 */
595 xen_blkif_disconnect(be->blkif); 666 xen_blkif_disconnect(be->blkif);
596 667
@@ -611,7 +682,7 @@ static void frontend_changed(struct xenbus_device *dev,
611 break; 682 break;
612 /* fall through if not online */ 683 /* fall through if not online */
613 case XenbusStateUnknown: 684 case XenbusStateUnknown:
614 /* implies blkif_disconnect() via blkback_remove() */ 685 /* implies xen_blkif_disconnect() via xen_blkbk_remove() */
615 device_unregister(&dev->dev); 686 device_unregister(&dev->dev);
616 break; 687 break;
617 688
@@ -650,6 +721,11 @@ again:
650 if (err) 721 if (err)
651 goto abort; 722 goto abort;
652 723
724 err = xen_blkbk_discard(xbt, be);
725
726 /* If we can't advertise it is OK. */
727 err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
728
653 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", 729 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
654 (unsigned long long)vbd_sz(&be->blkif->vbd)); 730 (unsigned long long)vbd_sz(&be->blkif->vbd));
655 if (err) { 731 if (err) {
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index b536a9cef917..773da7d6491e 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -98,6 +98,9 @@ struct blkfront_info
98 unsigned long shadow_free; 98 unsigned long shadow_free;
99 unsigned int feature_flush; 99 unsigned int feature_flush;
100 unsigned int flush_op; 100 unsigned int flush_op;
101 unsigned int feature_discard;
102 unsigned int discard_granularity;
103 unsigned int discard_alignment;
101 int is_ready; 104 int is_ready;
102}; 105};
103 106
@@ -302,29 +305,36 @@ static int blkif_queue_request(struct request *req)
302 ring_req->operation = info->flush_op; 305 ring_req->operation = info->flush_op;
303 } 306 }
304 307
305 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); 308 if (unlikely(req->cmd_flags & REQ_DISCARD)) {
306 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); 309 /* id, sector_number and handle are set above. */
310 ring_req->operation = BLKIF_OP_DISCARD;
311 ring_req->nr_segments = 0;
312 ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
313 } else {
314 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
315 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
307 316
308 for_each_sg(info->sg, sg, ring_req->nr_segments, i) { 317 for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
309 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); 318 buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
310 fsect = sg->offset >> 9; 319 fsect = sg->offset >> 9;
311 lsect = fsect + (sg->length >> 9) - 1; 320 lsect = fsect + (sg->length >> 9) - 1;
312 /* install a grant reference. */ 321 /* install a grant reference. */
313 ref = gnttab_claim_grant_reference(&gref_head); 322 ref = gnttab_claim_grant_reference(&gref_head);
314 BUG_ON(ref == -ENOSPC); 323 BUG_ON(ref == -ENOSPC);
315 324
316 gnttab_grant_foreign_access_ref( 325 gnttab_grant_foreign_access_ref(
317 ref, 326 ref,
318 info->xbdev->otherend_id, 327 info->xbdev->otherend_id,
319 buffer_mfn, 328 buffer_mfn,
320 rq_data_dir(req) ); 329 rq_data_dir(req));
321 330
322 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); 331 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
323 ring_req->u.rw.seg[i] = 332 ring_req->u.rw.seg[i] =
324 (struct blkif_request_segment) { 333 (struct blkif_request_segment) {
325 .gref = ref, 334 .gref = ref,
326 .first_sect = fsect, 335 .first_sect = fsect,
327 .last_sect = lsect }; 336 .last_sect = lsect };
337 }
328 } 338 }
329 339
330 info->ring.req_prod_pvt++; 340 info->ring.req_prod_pvt++;
@@ -370,7 +380,9 @@ static void do_blkif_request(struct request_queue *rq)
370 380
371 blk_start_request(req); 381 blk_start_request(req);
372 382
373 if (req->cmd_type != REQ_TYPE_FS) { 383 if ((req->cmd_type != REQ_TYPE_FS) ||
384 ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) &&
385 !info->flush_op)) {
374 __blk_end_request_all(req, -EIO); 386 __blk_end_request_all(req, -EIO);
375 continue; 387 continue;
376 } 388 }
@@ -399,6 +411,7 @@ wait:
399static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) 411static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
400{ 412{
401 struct request_queue *rq; 413 struct request_queue *rq;
414 struct blkfront_info *info = gd->private_data;
402 415
403 rq = blk_init_queue(do_blkif_request, &blkif_io_lock); 416 rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
404 if (rq == NULL) 417 if (rq == NULL)
@@ -406,6 +419,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
406 419
407 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); 420 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
408 421
422 if (info->feature_discard) {
423 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
424 blk_queue_max_discard_sectors(rq, get_capacity(gd));
425 rq->limits.discard_granularity = info->discard_granularity;
426 rq->limits.discard_alignment = info->discard_alignment;
427 }
428
409 /* Hard sector size and max sectors impersonate the equiv. hardware. */ 429 /* Hard sector size and max sectors impersonate the equiv. hardware. */
410 blk_queue_logical_block_size(rq, sector_size); 430 blk_queue_logical_block_size(rq, sector_size);
411 blk_queue_max_hw_sectors(rq, 512); 431 blk_queue_max_hw_sectors(rq, 512);
@@ -722,6 +742,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
722 742
723 error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; 743 error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
724 switch (bret->operation) { 744 switch (bret->operation) {
745 case BLKIF_OP_DISCARD:
746 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
747 struct request_queue *rq = info->rq;
748 printk(KERN_WARNING "blkfront: %s: discard op failed\n",
749 info->gd->disk_name);
750 error = -EOPNOTSUPP;
751 info->feature_discard = 0;
752 queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
753 }
754 __blk_end_request_all(req, error);
755 break;
725 case BLKIF_OP_FLUSH_DISKCACHE: 756 case BLKIF_OP_FLUSH_DISKCACHE:
726 case BLKIF_OP_WRITE_BARRIER: 757 case BLKIF_OP_WRITE_BARRIER:
727 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { 758 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
@@ -1098,6 +1129,33 @@ blkfront_closing(struct blkfront_info *info)
1098 bdput(bdev); 1129 bdput(bdev);
1099} 1130}
1100 1131
1132static void blkfront_setup_discard(struct blkfront_info *info)
1133{
1134 int err;
1135 char *type;
1136 unsigned int discard_granularity;
1137 unsigned int discard_alignment;
1138
1139 type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL);
1140 if (IS_ERR(type))
1141 return;
1142
1143 if (strncmp(type, "phy", 3) == 0) {
1144 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1145 "discard-granularity", "%u", &discard_granularity,
1146 "discard-alignment", "%u", &discard_alignment,
1147 NULL);
1148 if (!err) {
1149 info->feature_discard = 1;
1150 info->discard_granularity = discard_granularity;
1151 info->discard_alignment = discard_alignment;
1152 }
1153 } else if (strncmp(type, "file", 4) == 0)
1154 info->feature_discard = 1;
1155
1156 kfree(type);
1157}
1158
1101/* 1159/*
1102 * Invoked when the backend is finally 'ready' (and has told produced 1160 * Invoked when the backend is finally 'ready' (and has told produced
1103 * the details about the physical device - #sectors, size, etc). 1161 * the details about the physical device - #sectors, size, etc).
@@ -1108,7 +1166,7 @@ static void blkfront_connect(struct blkfront_info *info)
1108 unsigned long sector_size; 1166 unsigned long sector_size;
1109 unsigned int binfo; 1167 unsigned int binfo;
1110 int err; 1168 int err;
1111 int barrier, flush; 1169 int barrier, flush, discard;
1112 1170
1113 switch (info->connected) { 1171 switch (info->connected) {
1114 case BLKIF_STATE_CONNECTED: 1172 case BLKIF_STATE_CONNECTED:
@@ -1178,7 +1236,14 @@ static void blkfront_connect(struct blkfront_info *info)
1178 info->feature_flush = REQ_FLUSH; 1236 info->feature_flush = REQ_FLUSH;
1179 info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; 1237 info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
1180 } 1238 }
1181 1239
1240 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1241 "feature-discard", "%d", &discard,
1242 NULL);
1243
1244 if (!err && discard)
1245 blkfront_setup_discard(info);
1246
1182 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); 1247 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
1183 if (err) { 1248 if (err) {
1184 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", 1249 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
@@ -1385,6 +1450,8 @@ static struct xenbus_driver blkfront = {
1385 1450
1386static int __init xlblk_init(void) 1451static int __init xlblk_init(void)
1387{ 1452{
1453 int ret;
1454
1388 if (!xen_domain()) 1455 if (!xen_domain())
1389 return -ENODEV; 1456 return -ENODEV;
1390 1457
@@ -1394,7 +1461,13 @@ static int __init xlblk_init(void)
1394 return -ENODEV; 1461 return -ENODEV;
1395 } 1462 }
1396 1463
1397 return xenbus_register_frontend(&blkfront); 1464 ret = xenbus_register_frontend(&blkfront);
1465 if (ret) {
1466 unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
1467 return ret;
1468 }
1469
1470 return 0;
1398} 1471}
1399module_init(xlblk_init); 1472module_init(xlblk_init);
1400 1473