diff options
| -rw-r--r-- | drivers/block/xen-blkback/blkback.c | 130 | ||||
| -rw-r--r-- | drivers/block/xen-blkback/common.h | 100 | ||||
| -rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 80 | ||||
| -rw-r--r-- | drivers/block/xen-blkfront.c | 123 | ||||
| -rw-r--r-- | include/xen/interface/io/blkif.h | 36 |
5 files changed, 403 insertions, 66 deletions
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 2330a9ad5e95..79efec24569b 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c | |||
| @@ -39,6 +39,9 @@ | |||
| 39 | #include <linux/list.h> | 39 | #include <linux/list.h> |
| 40 | #include <linux/delay.h> | 40 | #include <linux/delay.h> |
| 41 | #include <linux/freezer.h> | 41 | #include <linux/freezer.h> |
| 42 | #include <linux/loop.h> | ||
| 43 | #include <linux/falloc.h> | ||
| 44 | #include <linux/fs.h> | ||
| 42 | 45 | ||
| 43 | #include <xen/events.h> | 46 | #include <xen/events.h> |
| 44 | #include <xen/page.h> | 47 | #include <xen/page.h> |
| @@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) | |||
| 258 | 261 | ||
| 259 | static void print_stats(struct xen_blkif *blkif) | 262 | static void print_stats(struct xen_blkif *blkif) |
| 260 | { | 263 | { |
| 261 | pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", | 264 | pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d" |
| 265 | " | ds %4d\n", | ||
| 262 | current->comm, blkif->st_oo_req, | 266 | current->comm, blkif->st_oo_req, |
| 263 | blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); | 267 | blkif->st_rd_req, blkif->st_wr_req, |
| 268 | blkif->st_f_req, blkif->st_ds_req); | ||
| 264 | blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); | 269 | blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); |
| 265 | blkif->st_rd_req = 0; | 270 | blkif->st_rd_req = 0; |
| 266 | blkif->st_wr_req = 0; | 271 | blkif->st_wr_req = 0; |
| 267 | blkif->st_oo_req = 0; | 272 | blkif->st_oo_req = 0; |
| 273 | blkif->st_ds_req = 0; | ||
| 268 | } | 274 | } |
| 269 | 275 | ||
| 270 | int xen_blkif_schedule(void *arg) | 276 | int xen_blkif_schedule(void *arg) |
| @@ -410,6 +416,59 @@ static int xen_blkbk_map(struct blkif_request *req, | |||
| 410 | return ret; | 416 | return ret; |
| 411 | } | 417 | } |
| 412 | 418 | ||
| 419 | static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) | ||
| 420 | { | ||
| 421 | int err = 0; | ||
| 422 | int status = BLKIF_RSP_OKAY; | ||
| 423 | struct block_device *bdev = blkif->vbd.bdev; | ||
| 424 | |||
| 425 | if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) | ||
| 426 | /* just forward the discard request */ | ||
| 427 | err = blkdev_issue_discard(bdev, | ||
| 428 | req->u.discard.sector_number, | ||
| 429 | req->u.discard.nr_sectors, | ||
| 430 | GFP_KERNEL, 0); | ||
| 431 | else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) { | ||
| 432 | /* punch a hole in the backing file */ | ||
| 433 | struct loop_device *lo = bdev->bd_disk->private_data; | ||
| 434 | struct file *file = lo->lo_backing_file; | ||
| 435 | |||
| 436 | if (file->f_op->fallocate) | ||
| 437 | err = file->f_op->fallocate(file, | ||
| 438 | FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, | ||
| 439 | req->u.discard.sector_number << 9, | ||
| 440 | req->u.discard.nr_sectors << 9); | ||
| 441 | else | ||
| 442 | err = -EOPNOTSUPP; | ||
| 443 | } else | ||
| 444 | err = -EOPNOTSUPP; | ||
| 445 | |||
| 446 | if (err == -EOPNOTSUPP) { | ||
| 447 | pr_debug(DRV_PFX "discard op failed, not supported\n"); | ||
| 448 | status = BLKIF_RSP_EOPNOTSUPP; | ||
| 449 | } else if (err) | ||
| 450 | status = BLKIF_RSP_ERROR; | ||
| 451 | |||
| 452 | make_response(blkif, req->id, req->operation, status); | ||
| 453 | } | ||
| 454 | |||
| 455 | static void xen_blk_drain_io(struct xen_blkif *blkif) | ||
| 456 | { | ||
| 457 | atomic_set(&blkif->drain, 1); | ||
| 458 | do { | ||
| 459 | /* The initial value is one, and one refcnt taken at the | ||
| 460 | * start of the xen_blkif_schedule thread. */ | ||
| 461 | if (atomic_read(&blkif->refcnt) <= 2) | ||
| 462 | break; | ||
| 463 | wait_for_completion_interruptible_timeout( | ||
| 464 | &blkif->drain_complete, HZ); | ||
| 465 | |||
| 466 | if (!atomic_read(&blkif->drain)) | ||
| 467 | break; | ||
| 468 | } while (!kthread_should_stop()); | ||
| 469 | atomic_set(&blkif->drain, 0); | ||
| 470 | } | ||
| 471 | |||
| 413 | /* | 472 | /* |
| 414 | * Completion callback on the bio's. Called as bh->b_end_io() | 473 | * Completion callback on the bio's. Called as bh->b_end_io() |
| 415 | */ | 474 | */ |
| @@ -422,6 +481,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) | |||
| 422 | pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); | 481 | pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); |
| 423 | xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); | 482 | xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); |
| 424 | pending_req->status = BLKIF_RSP_EOPNOTSUPP; | 483 | pending_req->status = BLKIF_RSP_EOPNOTSUPP; |
| 484 | } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && | ||
| 485 | (error == -EOPNOTSUPP)) { | ||
| 486 | pr_debug(DRV_PFX "write barrier op failed, not supported\n"); | ||
| 487 | xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0); | ||
| 488 | pending_req->status = BLKIF_RSP_EOPNOTSUPP; | ||
| 425 | } else if (error) { | 489 | } else if (error) { |
| 426 | pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," | 490 | pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," |
| 427 | " error=%d\n", error); | 491 | " error=%d\n", error); |
| @@ -438,6 +502,10 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) | |||
| 438 | make_response(pending_req->blkif, pending_req->id, | 502 | make_response(pending_req->blkif, pending_req->id, |
| 439 | pending_req->operation, pending_req->status); | 503 | pending_req->operation, pending_req->status); |
| 440 | xen_blkif_put(pending_req->blkif); | 504 | xen_blkif_put(pending_req->blkif); |
| 505 | if (atomic_read(&pending_req->blkif->refcnt) <= 2) { | ||
| 506 | if (atomic_read(&pending_req->blkif->drain)) | ||
| 507 | complete(&pending_req->blkif->drain_complete); | ||
| 508 | } | ||
| 441 | free_req(pending_req); | 509 | free_req(pending_req); |
| 442 | } | 510 | } |
| 443 | } | 511 | } |
| @@ -532,7 +600,6 @@ do_block_io_op(struct xen_blkif *blkif) | |||
| 532 | 600 | ||
| 533 | return more_to_do; | 601 | return more_to_do; |
| 534 | } | 602 | } |
| 535 | |||
| 536 | /* | 603 | /* |
| 537 | * Transmutation of the 'struct blkif_request' to a proper 'struct bio' | 604 | * Transmutation of the 'struct blkif_request' to a proper 'struct bio' |
| 538 | * and call the 'submit_bio' to pass it to the underlying storage. | 605 | * and call the 'submit_bio' to pass it to the underlying storage. |
| @@ -549,6 +616,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
| 549 | int i, nbio = 0; | 616 | int i, nbio = 0; |
| 550 | int operation; | 617 | int operation; |
| 551 | struct blk_plug plug; | 618 | struct blk_plug plug; |
| 619 | bool drain = false; | ||
| 552 | 620 | ||
| 553 | switch (req->operation) { | 621 | switch (req->operation) { |
| 554 | case BLKIF_OP_READ: | 622 | case BLKIF_OP_READ: |
| @@ -559,11 +627,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
| 559 | blkif->st_wr_req++; | 627 | blkif->st_wr_req++; |
| 560 | operation = WRITE_ODIRECT; | 628 | operation = WRITE_ODIRECT; |
| 561 | break; | 629 | break; |
| 630 | case BLKIF_OP_WRITE_BARRIER: | ||
| 631 | drain = true; | ||
| 562 | case BLKIF_OP_FLUSH_DISKCACHE: | 632 | case BLKIF_OP_FLUSH_DISKCACHE: |
| 563 | blkif->st_f_req++; | 633 | blkif->st_f_req++; |
| 564 | operation = WRITE_FLUSH; | 634 | operation = WRITE_FLUSH; |
| 565 | break; | 635 | break; |
| 566 | case BLKIF_OP_WRITE_BARRIER: | 636 | case BLKIF_OP_DISCARD: |
| 637 | blkif->st_ds_req++; | ||
| 638 | operation = REQ_DISCARD; | ||
| 639 | break; | ||
| 567 | default: | 640 | default: |
| 568 | operation = 0; /* make gcc happy */ | 641 | operation = 0; /* make gcc happy */ |
| 569 | goto fail_response; | 642 | goto fail_response; |
| @@ -572,7 +645,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
| 572 | 645 | ||
| 573 | /* Check that the number of segments is sane. */ | 646 | /* Check that the number of segments is sane. */ |
| 574 | nseg = req->nr_segments; | 647 | nseg = req->nr_segments; |
| 575 | if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || | 648 | if (unlikely(nseg == 0 && operation != WRITE_FLUSH && |
| 649 | operation != REQ_DISCARD) || | ||
| 576 | unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { | 650 | unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { |
| 577 | pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", | 651 | pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", |
| 578 | nseg); | 652 | nseg); |
| @@ -621,16 +695,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
| 621 | } | 695 | } |
| 622 | } | 696 | } |
| 623 | 697 | ||
| 698 | /* Wait on all outstanding I/O's and once that has been completed | ||
| 699 | * issue the WRITE_FLUSH. | ||
| 700 | */ | ||
| 701 | if (drain) | ||
| 702 | xen_blk_drain_io(pending_req->blkif); | ||
| 703 | |||
| 624 | /* | 704 | /* |
| 625 | * If we have failed at this point, we need to undo the M2P override, | 705 | * If we have failed at this point, we need to undo the M2P override, |
| 626 | * set gnttab_set_unmap_op on all of the grant references and perform | 706 | * set gnttab_set_unmap_op on all of the grant references and perform |
| 627 | * the hypercall to unmap the grants - that is all done in | 707 | * the hypercall to unmap the grants - that is all done in |
| 628 | * xen_blkbk_unmap. | 708 | * xen_blkbk_unmap. |
| 629 | */ | 709 | */ |
| 630 | if (xen_blkbk_map(req, pending_req, seg)) | 710 | if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg)) |
| 631 | goto fail_flush; | 711 | goto fail_flush; |
| 632 | 712 | ||
| 633 | /* This corresponding xen_blkif_put is done in __end_block_io_op */ | 713 | /* |
| 714 | * This corresponding xen_blkif_put is done in __end_block_io_op, or | ||
| 715 | * below (in "!bio") if we are handling a BLKIF_OP_DISCARD. | ||
| 716 | */ | ||
| 634 | xen_blkif_get(blkif); | 717 | xen_blkif_get(blkif); |
| 635 | 718 | ||
| 636 | for (i = 0; i < nseg; i++) { | 719 | for (i = 0; i < nseg; i++) { |
| @@ -654,18 +737,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
| 654 | preq.sector_number += seg[i].nsec; | 737 | preq.sector_number += seg[i].nsec; |
| 655 | } | 738 | } |
| 656 | 739 | ||
| 657 | /* This will be hit if the operation was a flush. */ | 740 | /* This will be hit if the operation was a flush or discard. */ |
| 658 | if (!bio) { | 741 | if (!bio) { |
| 659 | BUG_ON(operation != WRITE_FLUSH); | 742 | BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD); |
| 660 | 743 | ||
| 661 | bio = bio_alloc(GFP_KERNEL, 0); | 744 | if (operation == WRITE_FLUSH) { |
| 662 | if (unlikely(bio == NULL)) | 745 | bio = bio_alloc(GFP_KERNEL, 0); |
| 663 | goto fail_put_bio; | 746 | if (unlikely(bio == NULL)) |
| 747 | goto fail_put_bio; | ||
| 664 | 748 | ||
| 665 | biolist[nbio++] = bio; | 749 | biolist[nbio++] = bio; |
| 666 | bio->bi_bdev = preq.bdev; | 750 | bio->bi_bdev = preq.bdev; |
| 667 | bio->bi_private = pending_req; | 751 | bio->bi_private = pending_req; |
| 668 | bio->bi_end_io = end_block_io_op; | 752 | bio->bi_end_io = end_block_io_op; |
| 753 | } else if (operation == REQ_DISCARD) { | ||
| 754 | xen_blk_discard(blkif, req); | ||
| 755 | xen_blkif_put(blkif); | ||
| 756 | free_req(pending_req); | ||
| 757 | return 0; | ||
| 758 | } | ||
| 669 | } | 759 | } |
| 670 | 760 | ||
| 671 | /* | 761 | /* |
| @@ -685,7 +775,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
| 685 | 775 | ||
| 686 | if (operation == READ) | 776 | if (operation == READ) |
| 687 | blkif->st_rd_sect += preq.nr_sects; | 777 | blkif->st_rd_sect += preq.nr_sects; |
| 688 | else if (operation == WRITE || operation == WRITE_FLUSH) | 778 | else if (operation & WRITE) |
| 689 | blkif->st_wr_sect += preq.nr_sects; | 779 | blkif->st_wr_sect += preq.nr_sects; |
| 690 | 780 | ||
| 691 | return 0; | 781 | return 0; |
| @@ -765,9 +855,9 @@ static int __init xen_blkif_init(void) | |||
| 765 | 855 | ||
| 766 | mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; | 856 | mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; |
| 767 | 857 | ||
| 768 | blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * | 858 | blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) * |
| 769 | xen_blkif_reqs, GFP_KERNEL); | 859 | xen_blkif_reqs, GFP_KERNEL); |
| 770 | blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * | 860 | blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * |
| 771 | mmap_pages, GFP_KERNEL); | 861 | mmap_pages, GFP_KERNEL); |
| 772 | blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * | 862 | blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * |
| 773 | mmap_pages, GFP_KERNEL); | 863 | mmap_pages, GFP_KERNEL); |
| @@ -790,8 +880,6 @@ static int __init xen_blkif_init(void) | |||
| 790 | if (rc) | 880 | if (rc) |
| 791 | goto failed_init; | 881 | goto failed_init; |
| 792 | 882 | ||
| 793 | memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs)); | ||
| 794 | |||
| 795 | INIT_LIST_HEAD(&blkbk->pending_free); | 883 | INIT_LIST_HEAD(&blkbk->pending_free); |
| 796 | spin_lock_init(&blkbk->pending_free_lock); | 884 | spin_lock_init(&blkbk->pending_free_lock); |
| 797 | init_waitqueue_head(&blkbk->pending_free_wq); | 885 | init_waitqueue_head(&blkbk->pending_free_wq); |
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 9e40b283a468..e638457d9de4 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h | |||
| @@ -46,7 +46,7 @@ | |||
| 46 | 46 | ||
| 47 | #define DRV_PFX "xen-blkback:" | 47 | #define DRV_PFX "xen-blkback:" |
| 48 | #define DPRINTK(fmt, args...) \ | 48 | #define DPRINTK(fmt, args...) \ |
| 49 | pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ | 49 | pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ |
| 50 | __func__, __LINE__, ##args) | 50 | __func__, __LINE__, ##args) |
| 51 | 51 | ||
| 52 | 52 | ||
| @@ -63,13 +63,26 @@ struct blkif_common_response { | |||
| 63 | 63 | ||
| 64 | /* i386 protocol version */ | 64 | /* i386 protocol version */ |
| 65 | #pragma pack(push, 4) | 65 | #pragma pack(push, 4) |
| 66 | |||
| 67 | struct blkif_x86_32_request_rw { | ||
| 68 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | ||
| 69 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
| 70 | }; | ||
| 71 | |||
| 72 | struct blkif_x86_32_request_discard { | ||
| 73 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | ||
| 74 | uint64_t nr_sectors; | ||
| 75 | }; | ||
| 76 | |||
| 66 | struct blkif_x86_32_request { | 77 | struct blkif_x86_32_request { |
| 67 | uint8_t operation; /* BLKIF_OP_??? */ | 78 | uint8_t operation; /* BLKIF_OP_??? */ |
| 68 | uint8_t nr_segments; /* number of segments */ | 79 | uint8_t nr_segments; /* number of segments */ |
| 69 | blkif_vdev_t handle; /* only for read/write requests */ | 80 | blkif_vdev_t handle; /* only for read/write requests */ |
| 70 | uint64_t id; /* private guest value, echoed in resp */ | 81 | uint64_t id; /* private guest value, echoed in resp */ |
| 71 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | 82 | union { |
| 72 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 83 | struct blkif_x86_32_request_rw rw; |
| 84 | struct blkif_x86_32_request_discard discard; | ||
| 85 | } u; | ||
| 73 | }; | 86 | }; |
| 74 | struct blkif_x86_32_response { | 87 | struct blkif_x86_32_response { |
| 75 | uint64_t id; /* copied from request */ | 88 | uint64_t id; /* copied from request */ |
| @@ -79,13 +92,26 @@ struct blkif_x86_32_response { | |||
| 79 | #pragma pack(pop) | 92 | #pragma pack(pop) |
| 80 | 93 | ||
| 81 | /* x86_64 protocol version */ | 94 | /* x86_64 protocol version */ |
| 95 | |||
| 96 | struct blkif_x86_64_request_rw { | ||
| 97 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | ||
| 98 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
| 99 | }; | ||
| 100 | |||
| 101 | struct blkif_x86_64_request_discard { | ||
| 102 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | ||
| 103 | uint64_t nr_sectors; | ||
| 104 | }; | ||
| 105 | |||
| 82 | struct blkif_x86_64_request { | 106 | struct blkif_x86_64_request { |
| 83 | uint8_t operation; /* BLKIF_OP_??? */ | 107 | uint8_t operation; /* BLKIF_OP_??? */ |
| 84 | uint8_t nr_segments; /* number of segments */ | 108 | uint8_t nr_segments; /* number of segments */ |
| 85 | blkif_vdev_t handle; /* only for read/write requests */ | 109 | blkif_vdev_t handle; /* only for read/write requests */ |
| 86 | uint64_t __attribute__((__aligned__(8))) id; | 110 | uint64_t __attribute__((__aligned__(8))) id; |
| 87 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | 111 | union { |
| 88 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 112 | struct blkif_x86_64_request_rw rw; |
| 113 | struct blkif_x86_64_request_discard discard; | ||
| 114 | } u; | ||
| 89 | }; | 115 | }; |
| 90 | struct blkif_x86_64_response { | 116 | struct blkif_x86_64_response { |
| 91 | uint64_t __attribute__((__aligned__(8))) id; | 117 | uint64_t __attribute__((__aligned__(8))) id; |
| @@ -113,6 +139,11 @@ enum blkif_protocol { | |||
| 113 | BLKIF_PROTOCOL_X86_64 = 3, | 139 | BLKIF_PROTOCOL_X86_64 = 3, |
| 114 | }; | 140 | }; |
| 115 | 141 | ||
| 142 | enum blkif_backend_type { | ||
| 143 | BLKIF_BACKEND_PHY = 1, | ||
| 144 | BLKIF_BACKEND_FILE = 2, | ||
| 145 | }; | ||
| 146 | |||
| 116 | struct xen_vbd { | 147 | struct xen_vbd { |
| 117 | /* What the domain refers to this vbd as. */ | 148 | /* What the domain refers to this vbd as. */ |
| 118 | blkif_vdev_t handle; | 149 | blkif_vdev_t handle; |
| @@ -138,6 +169,7 @@ struct xen_blkif { | |||
| 138 | unsigned int irq; | 169 | unsigned int irq; |
| 139 | /* Comms information. */ | 170 | /* Comms information. */ |
| 140 | enum blkif_protocol blk_protocol; | 171 | enum blkif_protocol blk_protocol; |
| 172 | enum blkif_backend_type blk_backend_type; | ||
| 141 | union blkif_back_rings blk_rings; | 173 | union blkif_back_rings blk_rings; |
| 142 | struct vm_struct *blk_ring_area; | 174 | struct vm_struct *blk_ring_area; |
| 143 | /* The VBD attached to this interface. */ | 175 | /* The VBD attached to this interface. */ |
| @@ -149,6 +181,9 @@ struct xen_blkif { | |||
| 149 | atomic_t refcnt; | 181 | atomic_t refcnt; |
| 150 | 182 | ||
| 151 | wait_queue_head_t wq; | 183 | wait_queue_head_t wq; |
| 184 | /* for barrier (drain) requests */ | ||
| 185 | struct completion drain_complete; | ||
| 186 | atomic_t drain; | ||
| 152 | /* One thread per one blkif. */ | 187 | /* One thread per one blkif. */ |
| 153 | struct task_struct *xenblkd; | 188 | struct task_struct *xenblkd; |
| 154 | unsigned int waiting_reqs; | 189 | unsigned int waiting_reqs; |
| @@ -159,6 +194,7 @@ struct xen_blkif { | |||
| 159 | int st_wr_req; | 194 | int st_wr_req; |
| 160 | int st_oo_req; | 195 | int st_oo_req; |
| 161 | int st_f_req; | 196 | int st_f_req; |
| 197 | int st_ds_req; | ||
| 162 | int st_rd_sect; | 198 | int st_rd_sect; |
| 163 | int st_wr_sect; | 199 | int st_wr_sect; |
| 164 | 200 | ||
| @@ -182,7 +218,7 @@ struct xen_blkif { | |||
| 182 | 218 | ||
| 183 | struct phys_req { | 219 | struct phys_req { |
| 184 | unsigned short dev; | 220 | unsigned short dev; |
| 185 | unsigned short nr_sects; | 221 | blkif_sector_t nr_sects; |
| 186 | struct block_device *bdev; | 222 | struct block_device *bdev; |
| 187 | blkif_sector_t sector_number; | 223 | blkif_sector_t sector_number; |
| 188 | }; | 224 | }; |
| @@ -196,6 +232,8 @@ int xen_blkif_schedule(void *arg); | |||
| 196 | int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, | 232 | int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, |
| 197 | struct backend_info *be, int state); | 233 | struct backend_info *be, int state); |
| 198 | 234 | ||
| 235 | int xen_blkbk_barrier(struct xenbus_transaction xbt, | ||
| 236 | struct backend_info *be, int state); | ||
| 199 | struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); | 237 | struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); |
| 200 | 238 | ||
| 201 | static inline void blkif_get_x86_32_req(struct blkif_request *dst, | 239 | static inline void blkif_get_x86_32_req(struct blkif_request *dst, |
| @@ -206,12 +244,25 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, | |||
| 206 | dst->nr_segments = src->nr_segments; | 244 | dst->nr_segments = src->nr_segments; |
| 207 | dst->handle = src->handle; | 245 | dst->handle = src->handle; |
| 208 | dst->id = src->id; | 246 | dst->id = src->id; |
| 209 | dst->u.rw.sector_number = src->sector_number; | 247 | switch (src->operation) { |
| 210 | barrier(); | 248 | case BLKIF_OP_READ: |
| 211 | if (n > dst->nr_segments) | 249 | case BLKIF_OP_WRITE: |
| 212 | n = dst->nr_segments; | 250 | case BLKIF_OP_WRITE_BARRIER: |
| 213 | for (i = 0; i < n; i++) | 251 | case BLKIF_OP_FLUSH_DISKCACHE: |
| 214 | dst->u.rw.seg[i] = src->seg[i]; | 252 | dst->u.rw.sector_number = src->u.rw.sector_number; |
| 253 | barrier(); | ||
| 254 | if (n > dst->nr_segments) | ||
| 255 | n = dst->nr_segments; | ||
| 256 | for (i = 0; i < n; i++) | ||
| 257 | dst->u.rw.seg[i] = src->u.rw.seg[i]; | ||
| 258 | break; | ||
| 259 | case BLKIF_OP_DISCARD: | ||
| 260 | dst->u.discard.sector_number = src->u.discard.sector_number; | ||
| 261 | dst->u.discard.nr_sectors = src->u.discard.nr_sectors; | ||
| 262 | break; | ||
| 263 | default: | ||
| 264 | break; | ||
| 265 | } | ||
| 215 | } | 266 | } |
| 216 | 267 | ||
| 217 | static inline void blkif_get_x86_64_req(struct blkif_request *dst, | 268 | static inline void blkif_get_x86_64_req(struct blkif_request *dst, |
| @@ -222,12 +273,25 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, | |||
| 222 | dst->nr_segments = src->nr_segments; | 273 | dst->nr_segments = src->nr_segments; |
| 223 | dst->handle = src->handle; | 274 | dst->handle = src->handle; |
| 224 | dst->id = src->id; | 275 | dst->id = src->id; |
| 225 | dst->u.rw.sector_number = src->sector_number; | 276 | switch (src->operation) { |
| 226 | barrier(); | 277 | case BLKIF_OP_READ: |
| 227 | if (n > dst->nr_segments) | 278 | case BLKIF_OP_WRITE: |
| 228 | n = dst->nr_segments; | 279 | case BLKIF_OP_WRITE_BARRIER: |
| 229 | for (i = 0; i < n; i++) | 280 | case BLKIF_OP_FLUSH_DISKCACHE: |
| 230 | dst->u.rw.seg[i] = src->seg[i]; | 281 | dst->u.rw.sector_number = src->u.rw.sector_number; |
| 282 | barrier(); | ||
| 283 | if (n > dst->nr_segments) | ||
| 284 | n = dst->nr_segments; | ||
| 285 | for (i = 0; i < n; i++) | ||
| 286 | dst->u.rw.seg[i] = src->u.rw.seg[i]; | ||
| 287 | break; | ||
| 288 | case BLKIF_OP_DISCARD: | ||
| 289 | dst->u.discard.sector_number = src->u.discard.sector_number; | ||
| 290 | dst->u.discard.nr_sectors = src->u.discard.nr_sectors; | ||
| 291 | break; | ||
| 292 | default: | ||
| 293 | break; | ||
| 294 | } | ||
| 231 | } | 295 | } |
| 232 | 296 | ||
| 233 | #endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */ | 297 | #endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */ |
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 3f129b45451a..a6d43030b107 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c | |||
| @@ -114,6 +114,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) | |||
| 114 | spin_lock_init(&blkif->blk_ring_lock); | 114 | spin_lock_init(&blkif->blk_ring_lock); |
| 115 | atomic_set(&blkif->refcnt, 1); | 115 | atomic_set(&blkif->refcnt, 1); |
| 116 | init_waitqueue_head(&blkif->wq); | 116 | init_waitqueue_head(&blkif->wq); |
| 117 | init_completion(&blkif->drain_complete); | ||
| 118 | atomic_set(&blkif->drain, 0); | ||
| 117 | blkif->st_print = jiffies; | 119 | blkif->st_print = jiffies; |
| 118 | init_waitqueue_head(&blkif->waiting_to_free); | 120 | init_waitqueue_head(&blkif->waiting_to_free); |
| 119 | 121 | ||
| @@ -272,6 +274,7 @@ VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); | |||
| 272 | VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); | 274 | VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); |
| 273 | VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); | 275 | VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); |
| 274 | VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); | 276 | VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req); |
| 277 | VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req); | ||
| 275 | VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); | 278 | VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); |
| 276 | VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); | 279 | VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); |
| 277 | 280 | ||
| @@ -280,6 +283,7 @@ static struct attribute *xen_vbdstat_attrs[] = { | |||
| 280 | &dev_attr_rd_req.attr, | 283 | &dev_attr_rd_req.attr, |
| 281 | &dev_attr_wr_req.attr, | 284 | &dev_attr_wr_req.attr, |
| 282 | &dev_attr_f_req.attr, | 285 | &dev_attr_f_req.attr, |
| 286 | &dev_attr_ds_req.attr, | ||
| 283 | &dev_attr_rd_sect.attr, | 287 | &dev_attr_rd_sect.attr, |
| 284 | &dev_attr_wr_sect.attr, | 288 | &dev_attr_wr_sect.attr, |
| 285 | NULL | 289 | NULL |
| @@ -419,6 +423,73 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, | |||
| 419 | return err; | 423 | return err; |
| 420 | } | 424 | } |
| 421 | 425 | ||
| 426 | int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be) | ||
| 427 | { | ||
| 428 | struct xenbus_device *dev = be->dev; | ||
| 429 | struct xen_blkif *blkif = be->blkif; | ||
| 430 | char *type; | ||
| 431 | int err; | ||
| 432 | int state = 0; | ||
| 433 | |||
| 434 | type = xenbus_read(XBT_NIL, dev->nodename, "type", NULL); | ||
| 435 | if (!IS_ERR(type)) { | ||
| 436 | if (strncmp(type, "file", 4) == 0) { | ||
| 437 | state = 1; | ||
| 438 | blkif->blk_backend_type = BLKIF_BACKEND_FILE; | ||
| 439 | } | ||
| 440 | if (strncmp(type, "phy", 3) == 0) { | ||
| 441 | struct block_device *bdev = be->blkif->vbd.bdev; | ||
| 442 | struct request_queue *q = bdev_get_queue(bdev); | ||
| 443 | if (blk_queue_discard(q)) { | ||
| 444 | err = xenbus_printf(xbt, dev->nodename, | ||
| 445 | "discard-granularity", "%u", | ||
| 446 | q->limits.discard_granularity); | ||
| 447 | if (err) { | ||
| 448 | xenbus_dev_fatal(dev, err, | ||
| 449 | "writing discard-granularity"); | ||
| 450 | goto kfree; | ||
| 451 | } | ||
| 452 | err = xenbus_printf(xbt, dev->nodename, | ||
| 453 | "discard-alignment", "%u", | ||
| 454 | q->limits.discard_alignment); | ||
| 455 | if (err) { | ||
| 456 | xenbus_dev_fatal(dev, err, | ||
| 457 | "writing discard-alignment"); | ||
| 458 | goto kfree; | ||
| 459 | } | ||
| 460 | state = 1; | ||
| 461 | blkif->blk_backend_type = BLKIF_BACKEND_PHY; | ||
| 462 | } | ||
| 463 | } | ||
| 464 | } else { | ||
| 465 | err = PTR_ERR(type); | ||
| 466 | xenbus_dev_fatal(dev, err, "reading type"); | ||
| 467 | goto out; | ||
| 468 | } | ||
| 469 | |||
| 470 | err = xenbus_printf(xbt, dev->nodename, "feature-discard", | ||
| 471 | "%d", state); | ||
| 472 | if (err) | ||
| 473 | xenbus_dev_fatal(dev, err, "writing feature-discard"); | ||
| 474 | kfree: | ||
| 475 | kfree(type); | ||
| 476 | out: | ||
| 477 | return err; | ||
| 478 | } | ||
| 479 | int xen_blkbk_barrier(struct xenbus_transaction xbt, | ||
| 480 | struct backend_info *be, int state) | ||
| 481 | { | ||
| 482 | struct xenbus_device *dev = be->dev; | ||
| 483 | int err; | ||
| 484 | |||
| 485 | err = xenbus_printf(xbt, dev->nodename, "feature-barrier", | ||
| 486 | "%d", state); | ||
| 487 | if (err) | ||
| 488 | xenbus_dev_fatal(dev, err, "writing feature-barrier"); | ||
| 489 | |||
| 490 | return err; | ||
| 491 | } | ||
| 492 | |||
| 422 | /* | 493 | /* |
| 423 | * Entry point to this code when a new device is created. Allocate the basic | 494 | * Entry point to this code when a new device is created. Allocate the basic |
| 424 | * structures, and watch the store waiting for the hotplug scripts to tell us | 495 | * structures, and watch the store waiting for the hotplug scripts to tell us |
| @@ -590,7 +661,7 @@ static void frontend_changed(struct xenbus_device *dev, | |||
| 590 | 661 | ||
| 591 | /* | 662 | /* |
| 592 | * Enforce precondition before potential leak point. | 663 | * Enforce precondition before potential leak point. |
| 593 | * blkif_disconnect() is idempotent. | 664 | * xen_blkif_disconnect() is idempotent. |
| 594 | */ | 665 | */ |
| 595 | xen_blkif_disconnect(be->blkif); | 666 | xen_blkif_disconnect(be->blkif); |
| 596 | 667 | ||
| @@ -611,7 +682,7 @@ static void frontend_changed(struct xenbus_device *dev, | |||
| 611 | break; | 682 | break; |
| 612 | /* fall through if not online */ | 683 | /* fall through if not online */ |
| 613 | case XenbusStateUnknown: | 684 | case XenbusStateUnknown: |
| 614 | /* implies blkif_disconnect() via blkback_remove() */ | 685 | /* implies xen_blkif_disconnect() via xen_blkbk_remove() */ |
| 615 | device_unregister(&dev->dev); | 686 | device_unregister(&dev->dev); |
| 616 | break; | 687 | break; |
| 617 | 688 | ||
| @@ -650,6 +721,11 @@ again: | |||
| 650 | if (err) | 721 | if (err) |
| 651 | goto abort; | 722 | goto abort; |
| 652 | 723 | ||
| 724 | err = xen_blkbk_discard(xbt, be); | ||
| 725 | |||
| 726 | /* If we can't advertise it is OK. */ | ||
| 727 | err = xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); | ||
| 728 | |||
| 653 | err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", | 729 | err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", |
| 654 | (unsigned long long)vbd_sz(&be->blkif->vbd)); | 730 | (unsigned long long)vbd_sz(&be->blkif->vbd)); |
| 655 | if (err) { | 731 | if (err) { |
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index b536a9cef917..773da7d6491e 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c | |||
| @@ -98,6 +98,9 @@ struct blkfront_info | |||
| 98 | unsigned long shadow_free; | 98 | unsigned long shadow_free; |
| 99 | unsigned int feature_flush; | 99 | unsigned int feature_flush; |
| 100 | unsigned int flush_op; | 100 | unsigned int flush_op; |
| 101 | unsigned int feature_discard; | ||
| 102 | unsigned int discard_granularity; | ||
| 103 | unsigned int discard_alignment; | ||
| 101 | int is_ready; | 104 | int is_ready; |
| 102 | }; | 105 | }; |
| 103 | 106 | ||
| @@ -302,29 +305,36 @@ static int blkif_queue_request(struct request *req) | |||
| 302 | ring_req->operation = info->flush_op; | 305 | ring_req->operation = info->flush_op; |
| 303 | } | 306 | } |
| 304 | 307 | ||
| 305 | ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); | 308 | if (unlikely(req->cmd_flags & REQ_DISCARD)) { |
| 306 | BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); | 309 | /* id, sector_number and handle are set above. */ |
| 310 | ring_req->operation = BLKIF_OP_DISCARD; | ||
| 311 | ring_req->nr_segments = 0; | ||
| 312 | ring_req->u.discard.nr_sectors = blk_rq_sectors(req); | ||
| 313 | } else { | ||
| 314 | ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); | ||
| 315 | BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); | ||
| 307 | 316 | ||
| 308 | for_each_sg(info->sg, sg, ring_req->nr_segments, i) { | 317 | for_each_sg(info->sg, sg, ring_req->nr_segments, i) { |
| 309 | buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); | 318 | buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); |
| 310 | fsect = sg->offset >> 9; | 319 | fsect = sg->offset >> 9; |
| 311 | lsect = fsect + (sg->length >> 9) - 1; | 320 | lsect = fsect + (sg->length >> 9) - 1; |
| 312 | /* install a grant reference. */ | 321 | /* install a grant reference. */ |
| 313 | ref = gnttab_claim_grant_reference(&gref_head); | 322 | ref = gnttab_claim_grant_reference(&gref_head); |
| 314 | BUG_ON(ref == -ENOSPC); | 323 | BUG_ON(ref == -ENOSPC); |
| 315 | 324 | ||
| 316 | gnttab_grant_foreign_access_ref( | 325 | gnttab_grant_foreign_access_ref( |
| 317 | ref, | 326 | ref, |
| 318 | info->xbdev->otherend_id, | 327 | info->xbdev->otherend_id, |
| 319 | buffer_mfn, | 328 | buffer_mfn, |
| 320 | rq_data_dir(req) ); | 329 | rq_data_dir(req)); |
| 321 | 330 | ||
| 322 | info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); | 331 | info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); |
| 323 | ring_req->u.rw.seg[i] = | 332 | ring_req->u.rw.seg[i] = |
| 324 | (struct blkif_request_segment) { | 333 | (struct blkif_request_segment) { |
| 325 | .gref = ref, | 334 | .gref = ref, |
| 326 | .first_sect = fsect, | 335 | .first_sect = fsect, |
| 327 | .last_sect = lsect }; | 336 | .last_sect = lsect }; |
| 337 | } | ||
| 328 | } | 338 | } |
| 329 | 339 | ||
| 330 | info->ring.req_prod_pvt++; | 340 | info->ring.req_prod_pvt++; |
| @@ -370,7 +380,9 @@ static void do_blkif_request(struct request_queue *rq) | |||
| 370 | 380 | ||
| 371 | blk_start_request(req); | 381 | blk_start_request(req); |
| 372 | 382 | ||
| 373 | if (req->cmd_type != REQ_TYPE_FS) { | 383 | if ((req->cmd_type != REQ_TYPE_FS) || |
| 384 | ((req->cmd_flags & (REQ_FLUSH | REQ_FUA)) && | ||
| 385 | !info->flush_op)) { | ||
| 374 | __blk_end_request_all(req, -EIO); | 386 | __blk_end_request_all(req, -EIO); |
| 375 | continue; | 387 | continue; |
| 376 | } | 388 | } |
| @@ -399,6 +411,7 @@ wait: | |||
| 399 | static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) | 411 | static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) |
| 400 | { | 412 | { |
| 401 | struct request_queue *rq; | 413 | struct request_queue *rq; |
| 414 | struct blkfront_info *info = gd->private_data; | ||
| 402 | 415 | ||
| 403 | rq = blk_init_queue(do_blkif_request, &blkif_io_lock); | 416 | rq = blk_init_queue(do_blkif_request, &blkif_io_lock); |
| 404 | if (rq == NULL) | 417 | if (rq == NULL) |
| @@ -406,6 +419,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) | |||
| 406 | 419 | ||
| 407 | queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); | 420 | queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); |
| 408 | 421 | ||
| 422 | if (info->feature_discard) { | ||
| 423 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); | ||
| 424 | blk_queue_max_discard_sectors(rq, get_capacity(gd)); | ||
| 425 | rq->limits.discard_granularity = info->discard_granularity; | ||
| 426 | rq->limits.discard_alignment = info->discard_alignment; | ||
| 427 | } | ||
| 428 | |||
| 409 | /* Hard sector size and max sectors impersonate the equiv. hardware. */ | 429 | /* Hard sector size and max sectors impersonate the equiv. hardware. */ |
| 410 | blk_queue_logical_block_size(rq, sector_size); | 430 | blk_queue_logical_block_size(rq, sector_size); |
| 411 | blk_queue_max_hw_sectors(rq, 512); | 431 | blk_queue_max_hw_sectors(rq, 512); |
| @@ -722,6 +742,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) | |||
| 722 | 742 | ||
| 723 | error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; | 743 | error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; |
| 724 | switch (bret->operation) { | 744 | switch (bret->operation) { |
| 745 | case BLKIF_OP_DISCARD: | ||
| 746 | if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { | ||
| 747 | struct request_queue *rq = info->rq; | ||
| 748 | printk(KERN_WARNING "blkfront: %s: discard op failed\n", | ||
| 749 | info->gd->disk_name); | ||
| 750 | error = -EOPNOTSUPP; | ||
| 751 | info->feature_discard = 0; | ||
| 752 | queue_flag_clear(QUEUE_FLAG_DISCARD, rq); | ||
| 753 | } | ||
| 754 | __blk_end_request_all(req, error); | ||
| 755 | break; | ||
| 725 | case BLKIF_OP_FLUSH_DISKCACHE: | 756 | case BLKIF_OP_FLUSH_DISKCACHE: |
| 726 | case BLKIF_OP_WRITE_BARRIER: | 757 | case BLKIF_OP_WRITE_BARRIER: |
| 727 | if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { | 758 | if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { |
| @@ -1098,6 +1129,33 @@ blkfront_closing(struct blkfront_info *info) | |||
| 1098 | bdput(bdev); | 1129 | bdput(bdev); |
| 1099 | } | 1130 | } |
| 1100 | 1131 | ||
| 1132 | static void blkfront_setup_discard(struct blkfront_info *info) | ||
| 1133 | { | ||
| 1134 | int err; | ||
| 1135 | char *type; | ||
| 1136 | unsigned int discard_granularity; | ||
| 1137 | unsigned int discard_alignment; | ||
| 1138 | |||
| 1139 | type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL); | ||
| 1140 | if (IS_ERR(type)) | ||
| 1141 | return; | ||
| 1142 | |||
| 1143 | if (strncmp(type, "phy", 3) == 0) { | ||
| 1144 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | ||
| 1145 | "discard-granularity", "%u", &discard_granularity, | ||
| 1146 | "discard-alignment", "%u", &discard_alignment, | ||
| 1147 | NULL); | ||
| 1148 | if (!err) { | ||
| 1149 | info->feature_discard = 1; | ||
| 1150 | info->discard_granularity = discard_granularity; | ||
| 1151 | info->discard_alignment = discard_alignment; | ||
| 1152 | } | ||
| 1153 | } else if (strncmp(type, "file", 4) == 0) | ||
| 1154 | info->feature_discard = 1; | ||
| 1155 | |||
| 1156 | kfree(type); | ||
| 1157 | } | ||
| 1158 | |||
| 1101 | /* | 1159 | /* |
| 1102 | * Invoked when the backend is finally 'ready' (and has told produced | 1160 | * Invoked when the backend is finally 'ready' (and has told produced |
| 1103 | * the details about the physical device - #sectors, size, etc). | 1161 | * the details about the physical device - #sectors, size, etc). |
| @@ -1108,7 +1166,7 @@ static void blkfront_connect(struct blkfront_info *info) | |||
| 1108 | unsigned long sector_size; | 1166 | unsigned long sector_size; |
| 1109 | unsigned int binfo; | 1167 | unsigned int binfo; |
| 1110 | int err; | 1168 | int err; |
| 1111 | int barrier, flush; | 1169 | int barrier, flush, discard; |
| 1112 | 1170 | ||
| 1113 | switch (info->connected) { | 1171 | switch (info->connected) { |
| 1114 | case BLKIF_STATE_CONNECTED: | 1172 | case BLKIF_STATE_CONNECTED: |
| @@ -1178,7 +1236,14 @@ static void blkfront_connect(struct blkfront_info *info) | |||
| 1178 | info->feature_flush = REQ_FLUSH; | 1236 | info->feature_flush = REQ_FLUSH; |
| 1179 | info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; | 1237 | info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; |
| 1180 | } | 1238 | } |
| 1181 | 1239 | ||
| 1240 | err = xenbus_gather(XBT_NIL, info->xbdev->otherend, | ||
| 1241 | "feature-discard", "%d", &discard, | ||
| 1242 | NULL); | ||
| 1243 | |||
| 1244 | if (!err && discard) | ||
| 1245 | blkfront_setup_discard(info); | ||
| 1246 | |||
| 1182 | err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); | 1247 | err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); |
| 1183 | if (err) { | 1248 | if (err) { |
| 1184 | xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", | 1249 | xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", |
| @@ -1385,6 +1450,8 @@ static struct xenbus_driver blkfront = { | |||
| 1385 | 1450 | ||
| 1386 | static int __init xlblk_init(void) | 1451 | static int __init xlblk_init(void) |
| 1387 | { | 1452 | { |
| 1453 | int ret; | ||
| 1454 | |||
| 1388 | if (!xen_domain()) | 1455 | if (!xen_domain()) |
| 1389 | return -ENODEV; | 1456 | return -ENODEV; |
| 1390 | 1457 | ||
| @@ -1394,7 +1461,13 @@ static int __init xlblk_init(void) | |||
| 1394 | return -ENODEV; | 1461 | return -ENODEV; |
| 1395 | } | 1462 | } |
| 1396 | 1463 | ||
| 1397 | return xenbus_register_frontend(&blkfront); | 1464 | ret = xenbus_register_frontend(&blkfront); |
| 1465 | if (ret) { | ||
| 1466 | unregister_blkdev(XENVBD_MAJOR, DEV_NAME); | ||
| 1467 | return ret; | ||
| 1468 | } | ||
| 1469 | |||
| 1470 | return 0; | ||
| 1398 | } | 1471 | } |
| 1399 | module_init(xlblk_init); | 1472 | module_init(xlblk_init); |
| 1400 | 1473 | ||
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index 3d5d6db864fe..9324488f23f0 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h | |||
| @@ -57,6 +57,36 @@ typedef uint64_t blkif_sector_t; | |||
| 57 | * "feature-flush-cache" node! | 57 | * "feature-flush-cache" node! |
| 58 | */ | 58 | */ |
| 59 | #define BLKIF_OP_FLUSH_DISKCACHE 3 | 59 | #define BLKIF_OP_FLUSH_DISKCACHE 3 |
| 60 | |||
| 61 | /* | ||
| 62 | * Recognised only if "feature-discard" is present in backend xenbus info. | ||
| 63 | * The "feature-discard" node contains a boolean indicating whether trim | ||
| 64 | * (ATA) or unmap (SCSI) - conviently called discard requests are likely | ||
| 65 | * to succeed or fail. Either way, a discard request | ||
| 66 | * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by | ||
| 67 | * the underlying block-device hardware. The boolean simply indicates whether | ||
| 68 | * or not it is worthwhile for the frontend to attempt discard requests. | ||
| 69 | * If a backend does not recognise BLKIF_OP_DISCARD, it should *not* | ||
| 70 | * create the "feature-discard" node! | ||
| 71 | * | ||
| 72 | * Discard operation is a request for the underlying block device to mark | ||
| 73 | * extents to be erased. However, discard does not guarantee that the blocks | ||
| 74 | * will be erased from the device - it is just a hint to the device | ||
| 75 | * controller that these blocks are no longer in use. What the device | ||
| 76 | * controller does with that information is left to the controller. | ||
| 77 | * Discard operations are passed with sector_number as the | ||
| 78 | * sector index to begin discard operations at and nr_sectors as the number of | ||
| 79 | * sectors to be discarded. The specified sectors should be discarded if the | ||
| 80 | * underlying block device supports trim (ATA) or unmap (SCSI) operations, | ||
| 81 | * or a BLKIF_RSP_EOPNOTSUPP should be returned. | ||
| 82 | * More information about trim/unmap operations at: | ||
| 83 | * http://t13.org/Documents/UploadedDocuments/docs2008/ | ||
| 84 | * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc | ||
| 85 | * http://www.seagate.com/staticfiles/support/disc/manuals/ | ||
| 86 | * Interface%20manuals/100293068c.pdf | ||
| 87 | */ | ||
| 88 | #define BLKIF_OP_DISCARD 5 | ||
| 89 | |||
| 60 | /* | 90 | /* |
| 61 | * Maximum scatter/gather segments per request. | 91 | * Maximum scatter/gather segments per request. |
| 62 | * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. | 92 | * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. |
| @@ -74,6 +104,11 @@ struct blkif_request_rw { | |||
| 74 | } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 104 | } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
| 75 | }; | 105 | }; |
| 76 | 106 | ||
| 107 | struct blkif_request_discard { | ||
| 108 | blkif_sector_t sector_number; | ||
| 109 | uint64_t nr_sectors; | ||
| 110 | }; | ||
| 111 | |||
| 77 | struct blkif_request { | 112 | struct blkif_request { |
| 78 | uint8_t operation; /* BLKIF_OP_??? */ | 113 | uint8_t operation; /* BLKIF_OP_??? */ |
| 79 | uint8_t nr_segments; /* number of segments */ | 114 | uint8_t nr_segments; /* number of segments */ |
| @@ -81,6 +116,7 @@ struct blkif_request { | |||
| 81 | uint64_t id; /* private guest value, echoed in resp */ | 116 | uint64_t id; /* private guest value, echoed in resp */ |
| 82 | union { | 117 | union { |
| 83 | struct blkif_request_rw rw; | 118 | struct blkif_request_rw rw; |
| 119 | struct blkif_request_discard discard; | ||
| 84 | } u; | 120 | } u; |
| 85 | }; | 121 | }; |
| 86 | 122 | ||
