aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Woodhouse <David.Woodhouse@intel.com>2008-08-05 13:01:53 -0400
committerJens Axboe <jens.axboe@oracle.com>2008-10-09 02:56:01 -0400
commitfb2dce862d9f9a68e6b9374579056ec9eca02a63 (patch)
tree888e0fd7248c9329fa1aa3981043a2dc2457d488
parentd628eaef310533767ce68664873869c2d7f78f09 (diff)
Add 'discard' request handling
Some block devices benefit from a hint that they can forget the contents of certain sectors. Add basic support for this to the block core, along with a 'blkdev_issue_discard()' helper function which issues such requests. The caller doesn't get to provide an end_io functio, since blkdev_issue_discard() will automatically split the request up into multiple bios if appropriate. Neither does the function wait for completion -- it's expected that callers won't care about when, or even _if_, the request completes. It's only a hint to the device anyway. By definition, the file system doesn't _care_ about these sectors any more. [With feedback from OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> and Jens Axboe <jens.axboe@oracle.com] Signed-off-by: David Woodhouse <David.Woodhouse@intel.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r--block/blk-barrier.c69
-rw-r--r--block/blk-core.c28
-rw-r--r--block/blk-settings.c17
-rw-r--r--include/linux/bio.h8
-rw-r--r--include/linux/blkdev.h16
-rw-r--r--include/linux/fs.h3
6 files changed, 130 insertions, 11 deletions
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index a09ead19f9c5..273121c0eb80 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -315,3 +315,72 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
315 return ret; 315 return ret;
316} 316}
317EXPORT_SYMBOL(blkdev_issue_flush); 317EXPORT_SYMBOL(blkdev_issue_flush);
318
319static void blkdev_discard_end_io(struct bio *bio, int err)
320{
321 if (err) {
322 if (err == -EOPNOTSUPP)
323 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
324 clear_bit(BIO_UPTODATE, &bio->bi_flags);
325 }
326
327 bio_put(bio);
328}
329
330/**
331 * blkdev_issue_discard - queue a discard
332 * @bdev: blockdev to issue discard for
333 * @sector: start sector
334 * @nr_sects: number of sectors to discard
335 *
336 * Description:
337 * Issue a discard request for the sectors in question. Does not wait.
338 */
339int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
340 unsigned nr_sects)
341{
342 struct request_queue *q;
343 struct bio *bio;
344 int ret = 0;
345
346 if (bdev->bd_disk == NULL)
347 return -ENXIO;
348
349 q = bdev_get_queue(bdev);
350 if (!q)
351 return -ENXIO;
352
353 if (!q->prepare_discard_fn)
354 return -EOPNOTSUPP;
355
356 while (nr_sects && !ret) {
357 bio = bio_alloc(GFP_KERNEL, 0);
358 if (!bio)
359 return -ENOMEM;
360
361 bio->bi_end_io = blkdev_discard_end_io;
362 bio->bi_bdev = bdev;
363
364 bio->bi_sector = sector;
365
366 if (nr_sects > q->max_hw_sectors) {
367 bio->bi_size = q->max_hw_sectors << 9;
368 nr_sects -= q->max_hw_sectors;
369 sector += q->max_hw_sectors;
370 } else {
371 bio->bi_size = nr_sects << 9;
372 nr_sects = 0;
373 }
374 bio_get(bio);
375 submit_bio(WRITE_DISCARD, bio);
376
377 /* Check if it failed immediately */
378 if (bio_flagged(bio, BIO_EOPNOTSUPP))
379 ret = -EOPNOTSUPP;
380 else if (!bio_flagged(bio, BIO_UPTODATE))
381 ret = -EIO;
382 bio_put(bio);
383 }
384 return ret;
385}
386EXPORT_SYMBOL(blkdev_issue_discard);
diff --git a/block/blk-core.c b/block/blk-core.c
index a496727df7ef..1e143c4f9d34 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1079,6 +1079,10 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1079 */ 1079 */
1080 if (unlikely(bio_barrier(bio))) 1080 if (unlikely(bio_barrier(bio)))
1081 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); 1081 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
1082 if (unlikely(bio_discard(bio))) {
1083 req->cmd_flags |= (REQ_SOFTBARRIER | REQ_DISCARD);
1084 req->q->prepare_discard_fn(req->q, req);
1085 }
1082 1086
1083 if (bio_sync(bio)) 1087 if (bio_sync(bio))
1084 req->cmd_flags |= REQ_RW_SYNC; 1088 req->cmd_flags |= REQ_RW_SYNC;
@@ -1095,7 +1099,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1095static int __make_request(struct request_queue *q, struct bio *bio) 1099static int __make_request(struct request_queue *q, struct bio *bio)
1096{ 1100{
1097 struct request *req; 1101 struct request *req;
1098 int el_ret, nr_sectors, barrier, err; 1102 int el_ret, nr_sectors, barrier, discard, err;
1099 const unsigned short prio = bio_prio(bio); 1103 const unsigned short prio = bio_prio(bio);
1100 const int sync = bio_sync(bio); 1104 const int sync = bio_sync(bio);
1101 int rw_flags; 1105 int rw_flags;
@@ -1115,6 +1119,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1115 goto end_io; 1119 goto end_io;
1116 } 1120 }
1117 1121
1122 discard = bio_discard(bio);
1123 if (unlikely(discard) && !q->prepare_discard_fn) {
1124 err = -EOPNOTSUPP;
1125 goto end_io;
1126 }
1127
1118 spin_lock_irq(q->queue_lock); 1128 spin_lock_irq(q->queue_lock);
1119 1129
1120 if (unlikely(barrier) || elv_queue_empty(q)) 1130 if (unlikely(barrier) || elv_queue_empty(q))
@@ -1405,7 +1415,8 @@ end_io:
1405 1415
1406 if (bio_check_eod(bio, nr_sectors)) 1416 if (bio_check_eod(bio, nr_sectors))
1407 goto end_io; 1417 goto end_io;
1408 if (bio_empty_barrier(bio) && !q->prepare_flush_fn) { 1418 if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) ||
1419 (bio_discard(bio) && !q->prepare_discard_fn)) {
1409 err = -EOPNOTSUPP; 1420 err = -EOPNOTSUPP;
1410 goto end_io; 1421 goto end_io;
1411 } 1422 }
@@ -1487,7 +1498,6 @@ void submit_bio(int rw, struct bio *bio)
1487 * go through the normal accounting stuff before submission. 1498 * go through the normal accounting stuff before submission.
1488 */ 1499 */
1489 if (bio_has_data(bio)) { 1500 if (bio_has_data(bio)) {
1490
1491 if (rw & WRITE) { 1501 if (rw & WRITE) {
1492 count_vm_events(PGPGOUT, count); 1502 count_vm_events(PGPGOUT, count);
1493 } else { 1503 } else {
@@ -1881,7 +1891,7 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
1881 struct request_queue *q = rq->q; 1891 struct request_queue *q = rq->q;
1882 unsigned long flags = 0UL; 1892 unsigned long flags = 0UL;
1883 1893
1884 if (bio_has_data(rq->bio)) { 1894 if (bio_has_data(rq->bio) || blk_discard_rq(rq)) {
1885 if (__end_that_request_first(rq, error, nr_bytes)) 1895 if (__end_that_request_first(rq, error, nr_bytes))
1886 return 1; 1896 return 1;
1887 1897
@@ -1939,7 +1949,7 @@ EXPORT_SYMBOL_GPL(blk_end_request);
1939 **/ 1949 **/
1940int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 1950int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
1941{ 1951{
1942 if (bio_has_data(rq->bio) && 1952 if ((bio_has_data(rq->bio) || blk_discard_rq(rq)) &&
1943 __end_that_request_first(rq, error, nr_bytes)) 1953 __end_that_request_first(rq, error, nr_bytes))
1944 return 1; 1954 return 1;
1945 1955
@@ -2012,12 +2022,14 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2012 we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */ 2022 we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */
2013 rq->cmd_flags |= (bio->bi_rw & 3); 2023 rq->cmd_flags |= (bio->bi_rw & 3);
2014 2024
2015 rq->nr_phys_segments = bio_phys_segments(q, bio); 2025 if (bio_has_data(bio)) {
2016 rq->nr_hw_segments = bio_hw_segments(q, bio); 2026 rq->nr_phys_segments = bio_phys_segments(q, bio);
2027 rq->nr_hw_segments = bio_hw_segments(q, bio);
2028 rq->buffer = bio_data(bio);
2029 }
2017 rq->current_nr_sectors = bio_cur_sectors(bio); 2030 rq->current_nr_sectors = bio_cur_sectors(bio);
2018 rq->hard_cur_sectors = rq->current_nr_sectors; 2031 rq->hard_cur_sectors = rq->current_nr_sectors;
2019 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); 2032 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
2020 rq->buffer = bio_data(bio);
2021 rq->data_len = bio->bi_size; 2033 rq->data_len = bio->bi_size;
2022 2034
2023 rq->bio = rq->biotail = bio; 2035 rq->bio = rq->biotail = bio;
diff --git a/block/blk-settings.c b/block/blk-settings.c
index dfc77012843f..539d873c820d 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -33,6 +33,23 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
33EXPORT_SYMBOL(blk_queue_prep_rq); 33EXPORT_SYMBOL(blk_queue_prep_rq);
34 34
35/** 35/**
36 * blk_queue_set_discard - set a discard_sectors function for queue
37 * @q: queue
38 * @dfn: prepare_discard function
39 *
40 * It's possible for a queue to register a discard callback which is used
41 * to transform a discard request into the appropriate type for the
42 * hardware. If none is registered, then discard requests are failed
43 * with %EOPNOTSUPP.
44 *
45 */
46void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
47{
48 q->prepare_discard_fn = dfn;
49}
50EXPORT_SYMBOL(blk_queue_set_discard);
51
52/**
36 * blk_queue_merge_bvec - set a merge_bvec function for queue 53 * blk_queue_merge_bvec - set a merge_bvec function for queue
37 * @q: queue 54 * @q: queue
38 * @mbfn: merge_bvec_fn 55 * @mbfn: merge_bvec_fn
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 17f1fbdb31bf..1fdfc5621c83 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -149,6 +149,8 @@ struct bio {
149 * bit 2 -- barrier 149 * bit 2 -- barrier
150 * bit 3 -- fail fast, don't want low level driver retries 150 * bit 3 -- fail fast, don't want low level driver retries
151 * bit 4 -- synchronous I/O hint: the block layer will unplug immediately 151 * bit 4 -- synchronous I/O hint: the block layer will unplug immediately
152 * bit 5 -- metadata request
153 * bit 6 -- discard sectors
152 */ 154 */
153#define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ 155#define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */
154#define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ 156#define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */
@@ -156,6 +158,7 @@ struct bio {
156#define BIO_RW_FAILFAST 3 158#define BIO_RW_FAILFAST 3
157#define BIO_RW_SYNC 4 159#define BIO_RW_SYNC 4
158#define BIO_RW_META 5 160#define BIO_RW_META 5
161#define BIO_RW_DISCARD 6
159 162
160/* 163/*
161 * upper 16 bits of bi_rw define the io priority of this bio 164 * upper 16 bits of bi_rw define the io priority of this bio
@@ -186,13 +189,14 @@ struct bio {
186#define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) 189#define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD))
187#define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) 190#define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META))
188#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio)) 191#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio))
192#define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD))
189 193
190static inline unsigned int bio_cur_sectors(struct bio *bio) 194static inline unsigned int bio_cur_sectors(struct bio *bio)
191{ 195{
192 if (bio->bi_vcnt) 196 if (bio->bi_vcnt)
193 return bio_iovec(bio)->bv_len >> 9; 197 return bio_iovec(bio)->bv_len >> 9;
194 198 else /* dataless requests such as discard */
195 return 0; 199 return bio->bi_size >> 9;
196} 200}
197 201
198static inline void *bio_data(struct bio *bio) 202static inline void *bio_data(struct bio *bio)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e0ba018f5e88..26ececbbebe2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -89,6 +89,7 @@ enum {
89enum rq_flag_bits { 89enum rq_flag_bits {
90 __REQ_RW, /* not set, read. set, write */ 90 __REQ_RW, /* not set, read. set, write */
91 __REQ_FAILFAST, /* no low level driver retries */ 91 __REQ_FAILFAST, /* no low level driver retries */
92 __REQ_DISCARD, /* request to discard sectors */
92 __REQ_SORTED, /* elevator knows about this request */ 93 __REQ_SORTED, /* elevator knows about this request */
93 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ 94 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
94 __REQ_HARDBARRIER, /* may not be passed by drive either */ 95 __REQ_HARDBARRIER, /* may not be passed by drive either */
@@ -111,6 +112,7 @@ enum rq_flag_bits {
111}; 112};
112 113
113#define REQ_RW (1 << __REQ_RW) 114#define REQ_RW (1 << __REQ_RW)
115#define REQ_DISCARD (1 << __REQ_DISCARD)
114#define REQ_FAILFAST (1 << __REQ_FAILFAST) 116#define REQ_FAILFAST (1 << __REQ_FAILFAST)
115#define REQ_SORTED (1 << __REQ_SORTED) 117#define REQ_SORTED (1 << __REQ_SORTED)
116#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) 118#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
@@ -252,6 +254,7 @@ typedef void (request_fn_proc) (struct request_queue *q);
252typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); 254typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
253typedef int (prep_rq_fn) (struct request_queue *, struct request *); 255typedef int (prep_rq_fn) (struct request_queue *, struct request *);
254typedef void (unplug_fn) (struct request_queue *); 256typedef void (unplug_fn) (struct request_queue *);
257typedef int (prepare_discard_fn) (struct request_queue *, struct request *);
255 258
256struct bio_vec; 259struct bio_vec;
257struct bvec_merge_data { 260struct bvec_merge_data {
@@ -307,6 +310,7 @@ struct request_queue
307 make_request_fn *make_request_fn; 310 make_request_fn *make_request_fn;
308 prep_rq_fn *prep_rq_fn; 311 prep_rq_fn *prep_rq_fn;
309 unplug_fn *unplug_fn; 312 unplug_fn *unplug_fn;
313 prepare_discard_fn *prepare_discard_fn;
310 merge_bvec_fn *merge_bvec_fn; 314 merge_bvec_fn *merge_bvec_fn;
311 prepare_flush_fn *prepare_flush_fn; 315 prepare_flush_fn *prepare_flush_fn;
312 softirq_done_fn *softirq_done_fn; 316 softirq_done_fn *softirq_done_fn;
@@ -546,6 +550,7 @@ enum {
546#define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) 550#define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED)
547#define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) 551#define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER)
548#define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) 552#define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA)
553#define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD)
549#define blk_bidi_rq(rq) ((rq)->next_rq != NULL) 554#define blk_bidi_rq(rq) ((rq)->next_rq != NULL)
550#define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) 555#define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
551/* rq->queuelist of dequeued request must be list_empty() */ 556/* rq->queuelist of dequeued request must be list_empty() */
@@ -796,6 +801,7 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
796extern void blk_queue_dma_alignment(struct request_queue *, int); 801extern void blk_queue_dma_alignment(struct request_queue *, int);
797extern void blk_queue_update_dma_alignment(struct request_queue *, int); 802extern void blk_queue_update_dma_alignment(struct request_queue *, int);
798extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); 803extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
804extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *);
799extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); 805extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
800extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); 806extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
801extern int blk_do_ordered(struct request_queue *, struct request **); 807extern int blk_do_ordered(struct request_queue *, struct request **);
@@ -837,6 +843,16 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
837} 843}
838 844
839extern int blkdev_issue_flush(struct block_device *, sector_t *); 845extern int blkdev_issue_flush(struct block_device *, sector_t *);
846extern int blkdev_issue_discard(struct block_device *, sector_t sector,
847 unsigned nr_sects);
848
849static inline int sb_issue_discard(struct super_block *sb,
850 sector_t block, unsigned nr_blocks)
851{
852 block <<= (sb->s_blocksize_bits - 9);
853 nr_blocks <<= (sb->s_blocksize_bits - 9);
854 return blkdev_issue_discard(sb->s_bdev, block, nr_blocks);
855}
840 856
841/* 857/*
842* command filter functions 858* command filter functions
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 580b513668fe..eb0131319134 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -86,7 +86,8 @@ extern int dir_notify_enable;
86#define READ_META (READ | (1 << BIO_RW_META)) 86#define READ_META (READ | (1 << BIO_RW_META))
87#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) 87#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC))
88#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) 88#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC))
89#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) 89#define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER))
90#define WRITE_DISCARD (WRITE | (1 << BIO_RW_DISCARD))
90 91
91#define SEL_IN 1 92#define SEL_IN 1
92#define SEL_OUT 2 93#define SEL_OUT 2