diff options
author | Christoph Hellwig <hch@infradead.org> | 2009-09-30 07:52:12 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-10-01 15:19:30 -0400 |
commit | c15227de132f1295f3db6b7df9079956b1020fd8 (patch) | |
tree | ad06f119f283cf8a6313681055e8132ba2851ddb | |
parent | 3bd0f0c763e497c8674b28e3df2732f48683dabd (diff) |
block: use normal I/O path for discard requests
prepare_discard_fn() was being called in a place where memory allocation
was effectively impossible. This makes it inappropriate for all but
the most trivial translations of Linux's DISCARD operation to the block
command set. Additionally adding a payload there makes the ownership
of the bio backing unclear as it's now allocated by the device driver
and not the submitter as usual.
It is replaced with QUEUE_FLAG_DISCARD which is used to indicate whether
the queue supports discard operations or not. blkdev_issue_discard now
allocates a one-page, sector-length payload which is the right thing
for the common ATA and SCSI implementations.
The mtd implementation of prepare_discard_fn() is replaced with simply
checking for the request being a discard.
Largely based on a previous patch from Matthew Wilcox <matthew@wil.cx>
which did the prepare_discard_fn but not the different payload allocation
yet.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | block/blk-barrier.c | 35 | ||||
-rw-r--r-- | block/blk-core.c | 3 | ||||
-rw-r--r-- | block/blk-settings.c | 17 | ||||
-rw-r--r-- | drivers/mtd/mtd_blkdevs.c | 19 | ||||
-rw-r--r-- | drivers/staging/dst/dcore.c | 2 | ||||
-rw-r--r-- | include/linux/blkdev.h | 6 |
6 files changed, 39 insertions, 43 deletions
diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 6593ab39cfe9..21f5025c3945 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c | |||
@@ -350,6 +350,7 @@ static void blkdev_discard_end_io(struct bio *bio, int err) | |||
350 | 350 | ||
351 | if (bio->bi_private) | 351 | if (bio->bi_private) |
352 | complete(bio->bi_private); | 352 | complete(bio->bi_private); |
353 | __free_page(bio_page(bio)); | ||
353 | 354 | ||
354 | bio_put(bio); | 355 | bio_put(bio); |
355 | } | 356 | } |
@@ -372,26 +373,44 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | |||
372 | struct request_queue *q = bdev_get_queue(bdev); | 373 | struct request_queue *q = bdev_get_queue(bdev); |
373 | int type = flags & DISCARD_FL_BARRIER ? | 374 | int type = flags & DISCARD_FL_BARRIER ? |
374 | DISCARD_BARRIER : DISCARD_NOBARRIER; | 375 | DISCARD_BARRIER : DISCARD_NOBARRIER; |
376 | struct bio *bio; | ||
377 | struct page *page; | ||
375 | int ret = 0; | 378 | int ret = 0; |
376 | 379 | ||
377 | if (!q) | 380 | if (!q) |
378 | return -ENXIO; | 381 | return -ENXIO; |
379 | 382 | ||
380 | if (!q->prepare_discard_fn) | 383 | if (!blk_queue_discard(q)) |
381 | return -EOPNOTSUPP; | 384 | return -EOPNOTSUPP; |
382 | 385 | ||
383 | while (nr_sects && !ret) { | 386 | while (nr_sects && !ret) { |
384 | struct bio *bio = bio_alloc(gfp_mask, 0); | 387 | unsigned int sector_size = q->limits.logical_block_size; |
385 | if (!bio) | ||
386 | return -ENOMEM; | ||
387 | 388 | ||
389 | bio = bio_alloc(gfp_mask, 1); | ||
390 | if (!bio) | ||
391 | goto out; | ||
392 | bio->bi_sector = sector; | ||
388 | bio->bi_end_io = blkdev_discard_end_io; | 393 | bio->bi_end_io = blkdev_discard_end_io; |
389 | bio->bi_bdev = bdev; | 394 | bio->bi_bdev = bdev; |
390 | if (flags & DISCARD_FL_WAIT) | 395 | if (flags & DISCARD_FL_WAIT) |
391 | bio->bi_private = &wait; | 396 | bio->bi_private = &wait; |
392 | 397 | ||
393 | bio->bi_sector = sector; | 398 | /* |
399 | * Add a zeroed one-sector payload as that's what | ||
400 | * our current implementations need. If we'll ever need | ||
401 | * more the interface will need revisiting. | ||
402 | */ | ||
403 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
404 | if (!page) | ||
405 | goto out_free_bio; | ||
406 | if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size) | ||
407 | goto out_free_page; | ||
394 | 408 | ||
409 | /* | ||
410 | * And override the bio size - the way discard works we | ||
411 | * touch many more blocks on disk than the actual payload | ||
412 | * length. | ||
413 | */ | ||
395 | if (nr_sects > queue_max_hw_sectors(q)) { | 414 | if (nr_sects > queue_max_hw_sectors(q)) { |
396 | bio->bi_size = queue_max_hw_sectors(q) << 9; | 415 | bio->bi_size = queue_max_hw_sectors(q) << 9; |
397 | nr_sects -= queue_max_hw_sectors(q); | 416 | nr_sects -= queue_max_hw_sectors(q); |
@@ -414,5 +433,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | |||
414 | bio_put(bio); | 433 | bio_put(bio); |
415 | } | 434 | } |
416 | return ret; | 435 | return ret; |
436 | out_free_page: | ||
437 | __free_page(page); | ||
438 | out_free_bio: | ||
439 | bio_put(bio); | ||
440 | out: | ||
441 | return -ENOMEM; | ||
417 | } | 442 | } |
418 | EXPORT_SYMBOL(blkdev_issue_discard); | 443 | EXPORT_SYMBOL(blkdev_issue_discard); |
diff --git a/block/blk-core.c b/block/blk-core.c index 8135228e4b29..80a020dd1580 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -1124,7 +1124,6 @@ void init_request_from_bio(struct request *req, struct bio *bio) | |||
1124 | req->cmd_flags |= REQ_DISCARD; | 1124 | req->cmd_flags |= REQ_DISCARD; |
1125 | if (bio_rw_flagged(bio, BIO_RW_BARRIER)) | 1125 | if (bio_rw_flagged(bio, BIO_RW_BARRIER)) |
1126 | req->cmd_flags |= REQ_SOFTBARRIER; | 1126 | req->cmd_flags |= REQ_SOFTBARRIER; |
1127 | req->q->prepare_discard_fn(req->q, req); | ||
1128 | } else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) | 1127 | } else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) |
1129 | req->cmd_flags |= REQ_HARDBARRIER; | 1128 | req->cmd_flags |= REQ_HARDBARRIER; |
1130 | 1129 | ||
@@ -1470,7 +1469,7 @@ static inline void __generic_make_request(struct bio *bio) | |||
1470 | goto end_io; | 1469 | goto end_io; |
1471 | 1470 | ||
1472 | if (bio_rw_flagged(bio, BIO_RW_DISCARD) && | 1471 | if (bio_rw_flagged(bio, BIO_RW_DISCARD) && |
1473 | !q->prepare_discard_fn) { | 1472 | !blk_queue_discard(q)) { |
1474 | err = -EOPNOTSUPP; | 1473 | err = -EOPNOTSUPP; |
1475 | goto end_io; | 1474 | goto end_io; |
1476 | } | 1475 | } |
diff --git a/block/blk-settings.c b/block/blk-settings.c index eaf122ff5f16..d29498ef1eb5 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
@@ -34,23 +34,6 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) | |||
34 | EXPORT_SYMBOL(blk_queue_prep_rq); | 34 | EXPORT_SYMBOL(blk_queue_prep_rq); |
35 | 35 | ||
36 | /** | 36 | /** |
37 | * blk_queue_set_discard - set a discard_sectors function for queue | ||
38 | * @q: queue | ||
39 | * @dfn: prepare_discard function | ||
40 | * | ||
41 | * It's possible for a queue to register a discard callback which is used | ||
42 | * to transform a discard request into the appropriate type for the | ||
43 | * hardware. If none is registered, then discard requests are failed | ||
44 | * with %EOPNOTSUPP. | ||
45 | * | ||
46 | */ | ||
47 | void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn) | ||
48 | { | ||
49 | q->prepare_discard_fn = dfn; | ||
50 | } | ||
51 | EXPORT_SYMBOL(blk_queue_set_discard); | ||
52 | |||
53 | /** | ||
54 | * blk_queue_merge_bvec - set a merge_bvec function for queue | 37 | * blk_queue_merge_bvec - set a merge_bvec function for queue |
55 | * @q: queue | 38 | * @q: queue |
56 | * @mbfn: merge_bvec_fn | 39 | * @mbfn: merge_bvec_fn |
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 0acbf4f5be50..8ca17a3e96ea 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c | |||
@@ -32,14 +32,6 @@ struct mtd_blkcore_priv { | |||
32 | spinlock_t queue_lock; | 32 | spinlock_t queue_lock; |
33 | }; | 33 | }; |
34 | 34 | ||
35 | static int blktrans_discard_request(struct request_queue *q, | ||
36 | struct request *req) | ||
37 | { | ||
38 | req->cmd_type = REQ_TYPE_LINUX_BLOCK; | ||
39 | req->cmd[0] = REQ_LB_OP_DISCARD; | ||
40 | return 0; | ||
41 | } | ||
42 | |||
43 | static int do_blktrans_request(struct mtd_blktrans_ops *tr, | 35 | static int do_blktrans_request(struct mtd_blktrans_ops *tr, |
44 | struct mtd_blktrans_dev *dev, | 36 | struct mtd_blktrans_dev *dev, |
45 | struct request *req) | 37 | struct request *req) |
@@ -52,10 +44,6 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, | |||
52 | 44 | ||
53 | buf = req->buffer; | 45 | buf = req->buffer; |
54 | 46 | ||
55 | if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && | ||
56 | req->cmd[0] == REQ_LB_OP_DISCARD) | ||
57 | return tr->discard(dev, block, nsect); | ||
58 | |||
59 | if (!blk_fs_request(req)) | 47 | if (!blk_fs_request(req)) |
60 | return -EIO; | 48 | return -EIO; |
61 | 49 | ||
@@ -63,6 +51,9 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, | |||
63 | get_capacity(req->rq_disk)) | 51 | get_capacity(req->rq_disk)) |
64 | return -EIO; | 52 | return -EIO; |
65 | 53 | ||
54 | if (blk_discard_rq(req)) | ||
55 | return tr->discard(dev, block, nsect); | ||
56 | |||
66 | switch(rq_data_dir(req)) { | 57 | switch(rq_data_dir(req)) { |
67 | case READ: | 58 | case READ: |
68 | for (; nsect > 0; nsect--, block++, buf += tr->blksize) | 59 | for (; nsect > 0; nsect--, block++, buf += tr->blksize) |
@@ -380,8 +371,8 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr) | |||
380 | tr->blkcore_priv->rq->queuedata = tr; | 371 | tr->blkcore_priv->rq->queuedata = tr; |
381 | blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize); | 372 | blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize); |
382 | if (tr->discard) | 373 | if (tr->discard) |
383 | blk_queue_set_discard(tr->blkcore_priv->rq, | 374 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, |
384 | blktrans_discard_request); | 375 | tr->blkcore_priv->rq); |
385 | 376 | ||
386 | tr->blkshift = ffs(tr->blksize) - 1; | 377 | tr->blkshift = ffs(tr->blksize) - 1; |
387 | 378 | ||
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c index ac8577358ba0..5e8db0677582 100644 --- a/drivers/staging/dst/dcore.c +++ b/drivers/staging/dst/dcore.c | |||
@@ -102,7 +102,7 @@ static int dst_request(struct request_queue *q, struct bio *bio) | |||
102 | struct dst_node *n = q->queuedata; | 102 | struct dst_node *n = q->queuedata; |
103 | int err = -EIO; | 103 | int err = -EIO; |
104 | 104 | ||
105 | if (bio_empty_barrier(bio) && !q->prepare_discard_fn) { | 105 | if (bio_empty_barrier(bio) && !blk_queue_discard(q)) { |
106 | /* | 106 | /* |
107 | * This is a dirty^Wnice hack, but if we complete this | 107 | * This is a dirty^Wnice hack, but if we complete this |
108 | * operation with -EOPNOTSUPP like intended, XFS | 108 | * operation with -EOPNOTSUPP like intended, XFS |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e23a86cae5ac..f62d45e87618 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -82,7 +82,6 @@ enum rq_cmd_type_bits { | |||
82 | enum { | 82 | enum { |
83 | REQ_LB_OP_EJECT = 0x40, /* eject request */ | 83 | REQ_LB_OP_EJECT = 0x40, /* eject request */ |
84 | REQ_LB_OP_FLUSH = 0x41, /* flush request */ | 84 | REQ_LB_OP_FLUSH = 0x41, /* flush request */ |
85 | REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ | ||
86 | }; | 85 | }; |
87 | 86 | ||
88 | /* | 87 | /* |
@@ -261,7 +260,6 @@ typedef void (request_fn_proc) (struct request_queue *q); | |||
261 | typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); | 260 | typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); |
262 | typedef int (prep_rq_fn) (struct request_queue *, struct request *); | 261 | typedef int (prep_rq_fn) (struct request_queue *, struct request *); |
263 | typedef void (unplug_fn) (struct request_queue *); | 262 | typedef void (unplug_fn) (struct request_queue *); |
264 | typedef int (prepare_discard_fn) (struct request_queue *, struct request *); | ||
265 | 263 | ||
266 | struct bio_vec; | 264 | struct bio_vec; |
267 | struct bvec_merge_data { | 265 | struct bvec_merge_data { |
@@ -340,7 +338,6 @@ struct request_queue | |||
340 | make_request_fn *make_request_fn; | 338 | make_request_fn *make_request_fn; |
341 | prep_rq_fn *prep_rq_fn; | 339 | prep_rq_fn *prep_rq_fn; |
342 | unplug_fn *unplug_fn; | 340 | unplug_fn *unplug_fn; |
343 | prepare_discard_fn *prepare_discard_fn; | ||
344 | merge_bvec_fn *merge_bvec_fn; | 341 | merge_bvec_fn *merge_bvec_fn; |
345 | prepare_flush_fn *prepare_flush_fn; | 342 | prepare_flush_fn *prepare_flush_fn; |
346 | softirq_done_fn *softirq_done_fn; | 343 | softirq_done_fn *softirq_done_fn; |
@@ -460,6 +457,7 @@ struct request_queue | |||
460 | #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ | 457 | #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ |
461 | #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ | 458 | #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ |
462 | #define QUEUE_FLAG_CQ 16 /* hardware does queuing */ | 459 | #define QUEUE_FLAG_CQ 16 /* hardware does queuing */ |
460 | #define QUEUE_FLAG_DISCARD 17 /* supports DISCARD */ | ||
463 | 461 | ||
464 | #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ | 462 | #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ |
465 | (1 << QUEUE_FLAG_CLUSTER) | \ | 463 | (1 << QUEUE_FLAG_CLUSTER) | \ |
@@ -591,6 +589,7 @@ enum { | |||
591 | #define blk_queue_flushing(q) ((q)->ordseq) | 589 | #define blk_queue_flushing(q) ((q)->ordseq) |
592 | #define blk_queue_stackable(q) \ | 590 | #define blk_queue_stackable(q) \ |
593 | test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) | 591 | test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) |
592 | #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) | ||
594 | 593 | ||
595 | #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) | 594 | #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) |
596 | #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) | 595 | #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) |
@@ -955,7 +954,6 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); | |||
955 | extern void blk_queue_dma_alignment(struct request_queue *, int); | 954 | extern void blk_queue_dma_alignment(struct request_queue *, int); |
956 | extern void blk_queue_update_dma_alignment(struct request_queue *, int); | 955 | extern void blk_queue_update_dma_alignment(struct request_queue *, int); |
957 | extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); | 956 | extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); |
958 | extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); | ||
959 | extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); | 957 | extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); |
960 | extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); | 958 | extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); |
961 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); | 959 | extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); |