aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2009-09-30 07:52:12 -0400
committerJens Axboe <jens.axboe@oracle.com>2009-10-01 15:19:30 -0400
commitc15227de132f1295f3db6b7df9079956b1020fd8 (patch)
treead06f119f283cf8a6313681055e8132ba2851ddb
parent3bd0f0c763e497c8674b28e3df2732f48683dabd (diff)
block: use normal I/O path for discard requests
prepare_discard_fn() was being called in a place where memory allocation was effectively impossible. This makes it inappropriate for all but the most trivial translations of Linux's DISCARD operation to the block command set. Additionally adding a payload there makes the ownership of the bio backing unclear as it's now allocated by the device driver and not the submitter as usual. It is replaced with QUEUE_FLAG_DISCARD which is used to indicate whether the queue supports discard operations or not. blkdev_issue_discard now allocates a one-page, sector-length payload which is the right thing for the common ATA and SCSI implementations. The mtd implementation of prepare_discard_fn() is replaced with simply checking for the request being a discard. Largely based on a previous patch from Matthew Wilcox <matthew@wil.cx> which did the prepare_discard_fn but not the different payload allocation yet. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r--block/blk-barrier.c35
-rw-r--r--block/blk-core.c3
-rw-r--r--block/blk-settings.c17
-rw-r--r--drivers/mtd/mtd_blkdevs.c19
-rw-r--r--drivers/staging/dst/dcore.c2
-rw-r--r--include/linux/blkdev.h6
6 files changed, 39 insertions, 43 deletions
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 6593ab39cfe9..21f5025c3945 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -350,6 +350,7 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
350 350
351 if (bio->bi_private) 351 if (bio->bi_private)
352 complete(bio->bi_private); 352 complete(bio->bi_private);
353 __free_page(bio_page(bio));
353 354
354 bio_put(bio); 355 bio_put(bio);
355} 356}
@@ -372,26 +373,44 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
372 struct request_queue *q = bdev_get_queue(bdev); 373 struct request_queue *q = bdev_get_queue(bdev);
373 int type = flags & DISCARD_FL_BARRIER ? 374 int type = flags & DISCARD_FL_BARRIER ?
374 DISCARD_BARRIER : DISCARD_NOBARRIER; 375 DISCARD_BARRIER : DISCARD_NOBARRIER;
376 struct bio *bio;
377 struct page *page;
375 int ret = 0; 378 int ret = 0;
376 379
377 if (!q) 380 if (!q)
378 return -ENXIO; 381 return -ENXIO;
379 382
380 if (!q->prepare_discard_fn) 383 if (!blk_queue_discard(q))
381 return -EOPNOTSUPP; 384 return -EOPNOTSUPP;
382 385
383 while (nr_sects && !ret) { 386 while (nr_sects && !ret) {
384 struct bio *bio = bio_alloc(gfp_mask, 0); 387 unsigned int sector_size = q->limits.logical_block_size;
385 if (!bio)
386 return -ENOMEM;
387 388
389 bio = bio_alloc(gfp_mask, 1);
390 if (!bio)
391 goto out;
392 bio->bi_sector = sector;
388 bio->bi_end_io = blkdev_discard_end_io; 393 bio->bi_end_io = blkdev_discard_end_io;
389 bio->bi_bdev = bdev; 394 bio->bi_bdev = bdev;
390 if (flags & DISCARD_FL_WAIT) 395 if (flags & DISCARD_FL_WAIT)
391 bio->bi_private = &wait; 396 bio->bi_private = &wait;
392 397
393 bio->bi_sector = sector; 398 /*
399 * Add a zeroed one-sector payload as that's what
400 * our current implementations need. If we'll ever need
401 * more the interface will need revisiting.
402 */
403 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
404 if (!page)
405 goto out_free_bio;
406 if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
407 goto out_free_page;
394 408
409 /*
410 * And override the bio size - the way discard works we
411 * touch many more blocks on disk than the actual payload
412 * length.
413 */
395 if (nr_sects > queue_max_hw_sectors(q)) { 414 if (nr_sects > queue_max_hw_sectors(q)) {
396 bio->bi_size = queue_max_hw_sectors(q) << 9; 415 bio->bi_size = queue_max_hw_sectors(q) << 9;
397 nr_sects -= queue_max_hw_sectors(q); 416 nr_sects -= queue_max_hw_sectors(q);
@@ -414,5 +433,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
414 bio_put(bio); 433 bio_put(bio);
415 } 434 }
416 return ret; 435 return ret;
436out_free_page:
437 __free_page(page);
438out_free_bio:
439 bio_put(bio);
440out:
441 return -ENOMEM;
417} 442}
418EXPORT_SYMBOL(blkdev_issue_discard); 443EXPORT_SYMBOL(blkdev_issue_discard);
diff --git a/block/blk-core.c b/block/blk-core.c
index 8135228e4b29..80a020dd1580 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1124,7 +1124,6 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1124 req->cmd_flags |= REQ_DISCARD; 1124 req->cmd_flags |= REQ_DISCARD;
1125 if (bio_rw_flagged(bio, BIO_RW_BARRIER)) 1125 if (bio_rw_flagged(bio, BIO_RW_BARRIER))
1126 req->cmd_flags |= REQ_SOFTBARRIER; 1126 req->cmd_flags |= REQ_SOFTBARRIER;
1127 req->q->prepare_discard_fn(req->q, req);
1128 } else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) 1127 } else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
1129 req->cmd_flags |= REQ_HARDBARRIER; 1128 req->cmd_flags |= REQ_HARDBARRIER;
1130 1129
@@ -1470,7 +1469,7 @@ static inline void __generic_make_request(struct bio *bio)
1470 goto end_io; 1469 goto end_io;
1471 1470
1472 if (bio_rw_flagged(bio, BIO_RW_DISCARD) && 1471 if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
1473 !q->prepare_discard_fn) { 1472 !blk_queue_discard(q)) {
1474 err = -EOPNOTSUPP; 1473 err = -EOPNOTSUPP;
1475 goto end_io; 1474 goto end_io;
1476 } 1475 }
diff --git a/block/blk-settings.c b/block/blk-settings.c
index eaf122ff5f16..d29498ef1eb5 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -34,23 +34,6 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
34EXPORT_SYMBOL(blk_queue_prep_rq); 34EXPORT_SYMBOL(blk_queue_prep_rq);
35 35
36/** 36/**
37 * blk_queue_set_discard - set a discard_sectors function for queue
38 * @q: queue
39 * @dfn: prepare_discard function
40 *
41 * It's possible for a queue to register a discard callback which is used
42 * to transform a discard request into the appropriate type for the
43 * hardware. If none is registered, then discard requests are failed
44 * with %EOPNOTSUPP.
45 *
46 */
47void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
48{
49 q->prepare_discard_fn = dfn;
50}
51EXPORT_SYMBOL(blk_queue_set_discard);
52
53/**
54 * blk_queue_merge_bvec - set a merge_bvec function for queue 37 * blk_queue_merge_bvec - set a merge_bvec function for queue
55 * @q: queue 38 * @q: queue
56 * @mbfn: merge_bvec_fn 39 * @mbfn: merge_bvec_fn
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 0acbf4f5be50..8ca17a3e96ea 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -32,14 +32,6 @@ struct mtd_blkcore_priv {
32 spinlock_t queue_lock; 32 spinlock_t queue_lock;
33}; 33};
34 34
35static int blktrans_discard_request(struct request_queue *q,
36 struct request *req)
37{
38 req->cmd_type = REQ_TYPE_LINUX_BLOCK;
39 req->cmd[0] = REQ_LB_OP_DISCARD;
40 return 0;
41}
42
43static int do_blktrans_request(struct mtd_blktrans_ops *tr, 35static int do_blktrans_request(struct mtd_blktrans_ops *tr,
44 struct mtd_blktrans_dev *dev, 36 struct mtd_blktrans_dev *dev,
45 struct request *req) 37 struct request *req)
@@ -52,10 +44,6 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
52 44
53 buf = req->buffer; 45 buf = req->buffer;
54 46
55 if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
56 req->cmd[0] == REQ_LB_OP_DISCARD)
57 return tr->discard(dev, block, nsect);
58
59 if (!blk_fs_request(req)) 47 if (!blk_fs_request(req))
60 return -EIO; 48 return -EIO;
61 49
@@ -63,6 +51,9 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
63 get_capacity(req->rq_disk)) 51 get_capacity(req->rq_disk))
64 return -EIO; 52 return -EIO;
65 53
54 if (blk_discard_rq(req))
55 return tr->discard(dev, block, nsect);
56
66 switch(rq_data_dir(req)) { 57 switch(rq_data_dir(req)) {
67 case READ: 58 case READ:
68 for (; nsect > 0; nsect--, block++, buf += tr->blksize) 59 for (; nsect > 0; nsect--, block++, buf += tr->blksize)
@@ -380,8 +371,8 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
380 tr->blkcore_priv->rq->queuedata = tr; 371 tr->blkcore_priv->rq->queuedata = tr;
381 blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize); 372 blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize);
382 if (tr->discard) 373 if (tr->discard)
383 blk_queue_set_discard(tr->blkcore_priv->rq, 374 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
384 blktrans_discard_request); 375 tr->blkcore_priv->rq);
385 376
386 tr->blkshift = ffs(tr->blksize) - 1; 377 tr->blkshift = ffs(tr->blksize) - 1;
387 378
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
index ac8577358ba0..5e8db0677582 100644
--- a/drivers/staging/dst/dcore.c
+++ b/drivers/staging/dst/dcore.c
@@ -102,7 +102,7 @@ static int dst_request(struct request_queue *q, struct bio *bio)
102 struct dst_node *n = q->queuedata; 102 struct dst_node *n = q->queuedata;
103 int err = -EIO; 103 int err = -EIO;
104 104
105 if (bio_empty_barrier(bio) && !q->prepare_discard_fn) { 105 if (bio_empty_barrier(bio) && !blk_queue_discard(q)) {
106 /* 106 /*
107 * This is a dirty^Wnice hack, but if we complete this 107 * This is a dirty^Wnice hack, but if we complete this
108 * operation with -EOPNOTSUPP like intended, XFS 108 * operation with -EOPNOTSUPP like intended, XFS
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e23a86cae5ac..f62d45e87618 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -82,7 +82,6 @@ enum rq_cmd_type_bits {
82enum { 82enum {
83 REQ_LB_OP_EJECT = 0x40, /* eject request */ 83 REQ_LB_OP_EJECT = 0x40, /* eject request */
84 REQ_LB_OP_FLUSH = 0x41, /* flush request */ 84 REQ_LB_OP_FLUSH = 0x41, /* flush request */
85 REQ_LB_OP_DISCARD = 0x42, /* discard sectors */
86}; 85};
87 86
88/* 87/*
@@ -261,7 +260,6 @@ typedef void (request_fn_proc) (struct request_queue *q);
261typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); 260typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
262typedef int (prep_rq_fn) (struct request_queue *, struct request *); 261typedef int (prep_rq_fn) (struct request_queue *, struct request *);
263typedef void (unplug_fn) (struct request_queue *); 262typedef void (unplug_fn) (struct request_queue *);
264typedef int (prepare_discard_fn) (struct request_queue *, struct request *);
265 263
266struct bio_vec; 264struct bio_vec;
267struct bvec_merge_data { 265struct bvec_merge_data {
@@ -340,7 +338,6 @@ struct request_queue
340 make_request_fn *make_request_fn; 338 make_request_fn *make_request_fn;
341 prep_rq_fn *prep_rq_fn; 339 prep_rq_fn *prep_rq_fn;
342 unplug_fn *unplug_fn; 340 unplug_fn *unplug_fn;
343 prepare_discard_fn *prepare_discard_fn;
344 merge_bvec_fn *merge_bvec_fn; 341 merge_bvec_fn *merge_bvec_fn;
345 prepare_flush_fn *prepare_flush_fn; 342 prepare_flush_fn *prepare_flush_fn;
346 softirq_done_fn *softirq_done_fn; 343 softirq_done_fn *softirq_done_fn;
@@ -460,6 +457,7 @@ struct request_queue
460#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ 457#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
461#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ 458#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */
462#define QUEUE_FLAG_CQ 16 /* hardware does queuing */ 459#define QUEUE_FLAG_CQ 16 /* hardware does queuing */
460#define QUEUE_FLAG_DISCARD 17 /* supports DISCARD */
463 461
464#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ 462#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
465 (1 << QUEUE_FLAG_CLUSTER) | \ 463 (1 << QUEUE_FLAG_CLUSTER) | \
@@ -591,6 +589,7 @@ enum {
591#define blk_queue_flushing(q) ((q)->ordseq) 589#define blk_queue_flushing(q) ((q)->ordseq)
592#define blk_queue_stackable(q) \ 590#define blk_queue_stackable(q) \
593 test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) 591 test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
592#define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
594 593
595#define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) 594#define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS)
596#define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) 595#define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
@@ -955,7 +954,6 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
955extern void blk_queue_dma_alignment(struct request_queue *, int); 954extern void blk_queue_dma_alignment(struct request_queue *, int);
956extern void blk_queue_update_dma_alignment(struct request_queue *, int); 955extern void blk_queue_update_dma_alignment(struct request_queue *, int);
957extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); 956extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
958extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *);
959extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); 957extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
960extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); 958extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
961extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); 959extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);