diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2007-09-27 07:01:25 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2007-10-16 05:03:56 -0400 |
commit | bf2de6f5a4faf0197268f18d08969b003b87b6e8 (patch) | |
tree | 16830a15a7effea352445a7aba5dbb433314d3eb | |
parent | c07e2b41291853b19fff11ceee3657df252a4e42 (diff) |
block: Initial support for data-less (or empty) barrier support
This implements functionality to pass down or insert a barrier
in a queue, without having data attached to it. The ->prepare_flush_fn()
infrastructure from data barriers are reused to provide this
functionality.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | block/elevator.c | 8 | ||||
-rw-r--r-- | block/ll_rw_blk.c | 58 | ||||
-rw-r--r-- | include/linux/bio.h | 19 | ||||
-rw-r--r-- | include/linux/blkdev.h | 1 | ||||
-rw-r--r-- | mm/bounce.c | 6 |
5 files changed, 71 insertions, 21 deletions
diff --git a/block/elevator.c b/block/elevator.c index ec23ca02f2fe..952aee04a68a 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -712,6 +712,14 @@ struct request *elv_next_request(struct request_queue *q) | |||
712 | int ret; | 712 | int ret; |
713 | 713 | ||
714 | while ((rq = __elv_next_request(q)) != NULL) { | 714 | while ((rq = __elv_next_request(q)) != NULL) { |
715 | /* | ||
716 | * Kill the empty barrier place holder, the driver must | ||
717 | * not ever see it. | ||
718 | */ | ||
719 | if (blk_empty_barrier(rq)) { | ||
720 | end_queued_request(rq, 1); | ||
721 | continue; | ||
722 | } | ||
715 | if (!(rq->cmd_flags & REQ_STARTED)) { | 723 | if (!(rq->cmd_flags & REQ_STARTED)) { |
716 | /* | 724 | /* |
717 | * This is the first time the device driver | 725 | * This is the first time the device driver |
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 548f0d826679..4fde3a3c92d3 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c | |||
@@ -458,9 +458,12 @@ static inline struct request *start_ordered(struct request_queue *q, | |||
458 | * Queue ordered sequence. As we stack them at the head, we | 458 | * Queue ordered sequence. As we stack them at the head, we |
459 | * need to queue in reverse order. Note that we rely on that | 459 | * need to queue in reverse order. Note that we rely on that |
460 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs | 460 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs |
461 | * request gets inbetween ordered sequence. | 461 | * request gets inbetween ordered sequence. If this request is |
462 | * an empty barrier, we don't need to do a postflush ever since | ||
463 | * there will be no data written between the pre and post flush. | ||
464 | * Hence a single flush will suffice. | ||
462 | */ | 465 | */ |
463 | if (q->ordered & QUEUE_ORDERED_POSTFLUSH) | 466 | if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq)) |
464 | queue_flush(q, QUEUE_ORDERED_POSTFLUSH); | 467 | queue_flush(q, QUEUE_ORDERED_POSTFLUSH); |
465 | else | 468 | else |
466 | q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; | 469 | q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; |
@@ -484,7 +487,7 @@ static inline struct request *start_ordered(struct request_queue *q, | |||
484 | int blk_do_ordered(struct request_queue *q, struct request **rqp) | 487 | int blk_do_ordered(struct request_queue *q, struct request **rqp) |
485 | { | 488 | { |
486 | struct request *rq = *rqp; | 489 | struct request *rq = *rqp; |
487 | int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); | 490 | const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); |
488 | 491 | ||
489 | if (!q->ordseq) { | 492 | if (!q->ordseq) { |
490 | if (!is_barrier) | 493 | if (!is_barrier) |
@@ -3054,7 +3057,7 @@ static inline void blk_partition_remap(struct bio *bio) | |||
3054 | { | 3057 | { |
3055 | struct block_device *bdev = bio->bi_bdev; | 3058 | struct block_device *bdev = bio->bi_bdev; |
3056 | 3059 | ||
3057 | if (bdev != bdev->bd_contains) { | 3060 | if (bio_sectors(bio) && bdev != bdev->bd_contains) { |
3058 | struct hd_struct *p = bdev->bd_part; | 3061 | struct hd_struct *p = bdev->bd_part; |
3059 | const int rw = bio_data_dir(bio); | 3062 | const int rw = bio_data_dir(bio); |
3060 | 3063 | ||
@@ -3313,23 +3316,32 @@ void submit_bio(int rw, struct bio *bio) | |||
3313 | { | 3316 | { |
3314 | int count = bio_sectors(bio); | 3317 | int count = bio_sectors(bio); |
3315 | 3318 | ||
3316 | BIO_BUG_ON(!bio->bi_size); | ||
3317 | BIO_BUG_ON(!bio->bi_io_vec); | ||
3318 | bio->bi_rw |= rw; | 3319 | bio->bi_rw |= rw; |
3319 | if (rw & WRITE) { | ||
3320 | count_vm_events(PGPGOUT, count); | ||
3321 | } else { | ||
3322 | task_io_account_read(bio->bi_size); | ||
3323 | count_vm_events(PGPGIN, count); | ||
3324 | } | ||
3325 | 3320 | ||
3326 | if (unlikely(block_dump)) { | 3321 | /* |
3327 | char b[BDEVNAME_SIZE]; | 3322 | * If it's a regular read/write or a barrier with data attached, |
3328 | printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", | 3323 | * go through the normal accounting stuff before submission. |
3329 | current->comm, current->pid, | 3324 | */ |
3330 | (rw & WRITE) ? "WRITE" : "READ", | 3325 | if (!bio_empty_barrier(bio)) { |
3331 | (unsigned long long)bio->bi_sector, | 3326 | |
3332 | bdevname(bio->bi_bdev,b)); | 3327 | BIO_BUG_ON(!bio->bi_size); |
3328 | BIO_BUG_ON(!bio->bi_io_vec); | ||
3329 | |||
3330 | if (rw & WRITE) { | ||
3331 | count_vm_events(PGPGOUT, count); | ||
3332 | } else { | ||
3333 | task_io_account_read(bio->bi_size); | ||
3334 | count_vm_events(PGPGIN, count); | ||
3335 | } | ||
3336 | |||
3337 | if (unlikely(block_dump)) { | ||
3338 | char b[BDEVNAME_SIZE]; | ||
3339 | printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", | ||
3340 | current->comm, current->pid, | ||
3341 | (rw & WRITE) ? "WRITE" : "READ", | ||
3342 | (unsigned long long)bio->bi_sector, | ||
3343 | bdevname(bio->bi_bdev,b)); | ||
3344 | } | ||
3333 | } | 3345 | } |
3334 | 3346 | ||
3335 | generic_make_request(bio); | 3347 | generic_make_request(bio); |
@@ -3405,6 +3417,14 @@ static int __end_that_request_first(struct request *req, int uptodate, | |||
3405 | while ((bio = req->bio) != NULL) { | 3417 | while ((bio = req->bio) != NULL) { |
3406 | int nbytes; | 3418 | int nbytes; |
3407 | 3419 | ||
3420 | /* | ||
3421 | * For an empty barrier request, the low level driver must | ||
3422 | * store a potential error location in ->sector. We pass | ||
3423 | * that back up in ->bi_sector. | ||
3424 | */ | ||
3425 | if (blk_empty_barrier(req)) | ||
3426 | bio->bi_sector = req->sector; | ||
3427 | |||
3408 | if (nr_bytes >= bio->bi_size) { | 3428 | if (nr_bytes >= bio->bi_size) { |
3409 | req->bio = bio->bi_next; | 3429 | req->bio = bio->bi_next; |
3410 | nbytes = bio->bi_size; | 3430 | nbytes = bio->bi_size; |
diff --git a/include/linux/bio.h b/include/linux/bio.h index 089a8bc55dd4..4da441337d6e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h | |||
@@ -176,13 +176,28 @@ struct bio { | |||
176 | #define bio_offset(bio) bio_iovec((bio))->bv_offset | 176 | #define bio_offset(bio) bio_iovec((bio))->bv_offset |
177 | #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) | 177 | #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) |
178 | #define bio_sectors(bio) ((bio)->bi_size >> 9) | 178 | #define bio_sectors(bio) ((bio)->bi_size >> 9) |
179 | #define bio_cur_sectors(bio) (bio_iovec(bio)->bv_len >> 9) | ||
180 | #define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio))) | ||
181 | #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) | 179 | #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) |
182 | #define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) | 180 | #define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) |
183 | #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) | 181 | #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) |
184 | #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) | 182 | #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) |
185 | #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) | 183 | #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) |
184 | #define bio_empty_barrier(bio) (bio_barrier(bio) && !(bio)->bi_size) | ||
185 | |||
186 | static inline unsigned int bio_cur_sectors(struct bio *bio) | ||
187 | { | ||
188 | if (bio->bi_vcnt) | ||
189 | return bio_iovec(bio)->bv_len >> 9; | ||
190 | |||
191 | return 0; | ||
192 | } | ||
193 | |||
194 | static inline void *bio_data(struct bio *bio) | ||
195 | { | ||
196 | if (bio->bi_vcnt) | ||
197 | return page_address(bio_page(bio)) + bio_offset(bio); | ||
198 | |||
199 | return NULL; | ||
200 | } | ||
186 | 201 | ||
187 | /* | 202 | /* |
188 | * will die | 203 | * will die |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 610967992ddb..fb2ff749dc1f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -540,6 +540,7 @@ enum { | |||
540 | #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) | 540 | #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) |
541 | #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) | 541 | #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) |
542 | #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) | 542 | #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) |
543 | #define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) | ||
543 | 544 | ||
544 | #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) | 545 | #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) |
545 | 546 | ||
diff --git a/mm/bounce.c b/mm/bounce.c index 3b549bf31f7d..b6d2d0f1019b 100644 --- a/mm/bounce.c +++ b/mm/bounce.c | |||
@@ -265,6 +265,12 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) | |||
265 | mempool_t *pool; | 265 | mempool_t *pool; |
266 | 266 | ||
267 | /* | 267 | /* |
268 | * Data-less bio, nothing to bounce | ||
269 | */ | ||
270 | if (bio_empty_barrier(*bio_orig)) | ||
271 | return; | ||
272 | |||
273 | /* | ||
268 | * for non-isa bounce case, just check if the bounce pfn is equal | 274 | * for non-isa bounce case, just check if the bounce pfn is equal |
269 | * to or bigger than the highest pfn in the system -- in that case, | 275 | * to or bigger than the highest pfn in the system -- in that case, |
270 | * don't waste time iterating over bio segments | 276 | * don't waste time iterating over bio segments |