diff options
Diffstat (limited to 'block/ll_rw_blk.c')
-rw-r--r-- | block/ll_rw_blk.c | 270 |
1 files changed, 190 insertions, 80 deletions
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index d875673e76cd..4df7d027eb06 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c | |||
@@ -304,23 +304,6 @@ int blk_queue_ordered(struct request_queue *q, unsigned ordered, | |||
304 | 304 | ||
305 | EXPORT_SYMBOL(blk_queue_ordered); | 305 | EXPORT_SYMBOL(blk_queue_ordered); |
306 | 306 | ||
307 | /** | ||
308 | * blk_queue_issue_flush_fn - set function for issuing a flush | ||
309 | * @q: the request queue | ||
310 | * @iff: the function to be called issuing the flush | ||
311 | * | ||
312 | * Description: | ||
313 | * If a driver supports issuing a flush command, the support is notified | ||
314 | * to the block layer by defining it through this call. | ||
315 | * | ||
316 | **/ | ||
317 | void blk_queue_issue_flush_fn(struct request_queue *q, issue_flush_fn *iff) | ||
318 | { | ||
319 | q->issue_flush_fn = iff; | ||
320 | } | ||
321 | |||
322 | EXPORT_SYMBOL(blk_queue_issue_flush_fn); | ||
323 | |||
324 | /* | 307 | /* |
325 | * Cache flushing for ordered writes handling | 308 | * Cache flushing for ordered writes handling |
326 | */ | 309 | */ |
@@ -377,10 +360,12 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) | |||
377 | /* | 360 | /* |
378 | * Okay, sequence complete. | 361 | * Okay, sequence complete. |
379 | */ | 362 | */ |
380 | rq = q->orig_bar_rq; | 363 | uptodate = 1; |
381 | uptodate = q->orderr ? q->orderr : 1; | 364 | if (q->orderr) |
365 | uptodate = q->orderr; | ||
382 | 366 | ||
383 | q->ordseq = 0; | 367 | q->ordseq = 0; |
368 | rq = q->orig_bar_rq; | ||
384 | 369 | ||
385 | end_that_request_first(rq, uptodate, rq->hard_nr_sectors); | 370 | end_that_request_first(rq, uptodate, rq->hard_nr_sectors); |
386 | end_that_request_last(rq, uptodate); | 371 | end_that_request_last(rq, uptodate); |
@@ -445,7 +430,8 @@ static inline struct request *start_ordered(struct request_queue *q, | |||
445 | rq_init(q, rq); | 430 | rq_init(q, rq); |
446 | if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) | 431 | if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) |
447 | rq->cmd_flags |= REQ_RW; | 432 | rq->cmd_flags |= REQ_RW; |
448 | rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; | 433 | if (q->ordered & QUEUE_ORDERED_FUA) |
434 | rq->cmd_flags |= REQ_FUA; | ||
449 | rq->elevator_private = NULL; | 435 | rq->elevator_private = NULL; |
450 | rq->elevator_private2 = NULL; | 436 | rq->elevator_private2 = NULL; |
451 | init_request_from_bio(rq, q->orig_bar_rq->bio); | 437 | init_request_from_bio(rq, q->orig_bar_rq->bio); |
@@ -455,9 +441,12 @@ static inline struct request *start_ordered(struct request_queue *q, | |||
455 | * Queue ordered sequence. As we stack them at the head, we | 441 | * Queue ordered sequence. As we stack them at the head, we |
456 | * need to queue in reverse order. Note that we rely on that | 442 | * need to queue in reverse order. Note that we rely on that |
457 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs | 443 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs |
458 | * request gets inbetween ordered sequence. | 444 | * request gets inbetween ordered sequence. If this request is |
445 | * an empty barrier, we don't need to do a postflush ever since | ||
446 | * there will be no data written between the pre and post flush. | ||
447 | * Hence a single flush will suffice. | ||
459 | */ | 448 | */ |
460 | if (q->ordered & QUEUE_ORDERED_POSTFLUSH) | 449 | if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq)) |
461 | queue_flush(q, QUEUE_ORDERED_POSTFLUSH); | 450 | queue_flush(q, QUEUE_ORDERED_POSTFLUSH); |
462 | else | 451 | else |
463 | q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; | 452 | q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; |
@@ -481,7 +470,7 @@ static inline struct request *start_ordered(struct request_queue *q, | |||
481 | int blk_do_ordered(struct request_queue *q, struct request **rqp) | 470 | int blk_do_ordered(struct request_queue *q, struct request **rqp) |
482 | { | 471 | { |
483 | struct request *rq = *rqp; | 472 | struct request *rq = *rqp; |
484 | int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); | 473 | const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); |
485 | 474 | ||
486 | if (!q->ordseq) { | 475 | if (!q->ordseq) { |
487 | if (!is_barrier) | 476 | if (!is_barrier) |
@@ -2660,6 +2649,14 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, | |||
2660 | 2649 | ||
2661 | EXPORT_SYMBOL(blk_execute_rq); | 2650 | EXPORT_SYMBOL(blk_execute_rq); |
2662 | 2651 | ||
2652 | static void bio_end_empty_barrier(struct bio *bio, int err) | ||
2653 | { | ||
2654 | if (err) | ||
2655 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
2656 | |||
2657 | complete(bio->bi_private); | ||
2658 | } | ||
2659 | |||
2663 | /** | 2660 | /** |
2664 | * blkdev_issue_flush - queue a flush | 2661 | * blkdev_issue_flush - queue a flush |
2665 | * @bdev: blockdev to issue flush for | 2662 | * @bdev: blockdev to issue flush for |
@@ -2672,7 +2669,10 @@ EXPORT_SYMBOL(blk_execute_rq); | |||
2672 | */ | 2669 | */ |
2673 | int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) | 2670 | int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) |
2674 | { | 2671 | { |
2672 | DECLARE_COMPLETION_ONSTACK(wait); | ||
2675 | struct request_queue *q; | 2673 | struct request_queue *q; |
2674 | struct bio *bio; | ||
2675 | int ret; | ||
2676 | 2676 | ||
2677 | if (bdev->bd_disk == NULL) | 2677 | if (bdev->bd_disk == NULL) |
2678 | return -ENXIO; | 2678 | return -ENXIO; |
@@ -2680,10 +2680,32 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) | |||
2680 | q = bdev_get_queue(bdev); | 2680 | q = bdev_get_queue(bdev); |
2681 | if (!q) | 2681 | if (!q) |
2682 | return -ENXIO; | 2682 | return -ENXIO; |
2683 | if (!q->issue_flush_fn) | ||
2684 | return -EOPNOTSUPP; | ||
2685 | 2683 | ||
2686 | return q->issue_flush_fn(q, bdev->bd_disk, error_sector); | 2684 | bio = bio_alloc(GFP_KERNEL, 0); |
2685 | if (!bio) | ||
2686 | return -ENOMEM; | ||
2687 | |||
2688 | bio->bi_end_io = bio_end_empty_barrier; | ||
2689 | bio->bi_private = &wait; | ||
2690 | bio->bi_bdev = bdev; | ||
2691 | submit_bio(1 << BIO_RW_BARRIER, bio); | ||
2692 | |||
2693 | wait_for_completion(&wait); | ||
2694 | |||
2695 | /* | ||
2696 | * The driver must store the error location in ->bi_sector, if | ||
2697 | * it supports it. For non-stacked drivers, this should be copied | ||
2698 | * from rq->sector. | ||
2699 | */ | ||
2700 | if (error_sector) | ||
2701 | *error_sector = bio->bi_sector; | ||
2702 | |||
2703 | ret = 0; | ||
2704 | if (!bio_flagged(bio, BIO_UPTODATE)) | ||
2705 | ret = -EIO; | ||
2706 | |||
2707 | bio_put(bio); | ||
2708 | return ret; | ||
2687 | } | 2709 | } |
2688 | 2710 | ||
2689 | EXPORT_SYMBOL(blkdev_issue_flush); | 2711 | EXPORT_SYMBOL(blkdev_issue_flush); |
@@ -3051,7 +3073,7 @@ static inline void blk_partition_remap(struct bio *bio) | |||
3051 | { | 3073 | { |
3052 | struct block_device *bdev = bio->bi_bdev; | 3074 | struct block_device *bdev = bio->bi_bdev; |
3053 | 3075 | ||
3054 | if (bdev != bdev->bd_contains) { | 3076 | if (bio_sectors(bio) && bdev != bdev->bd_contains) { |
3055 | struct hd_struct *p = bdev->bd_part; | 3077 | struct hd_struct *p = bdev->bd_part; |
3056 | const int rw = bio_data_dir(bio); | 3078 | const int rw = bio_data_dir(bio); |
3057 | 3079 | ||
@@ -3117,6 +3139,35 @@ static inline int should_fail_request(struct bio *bio) | |||
3117 | 3139 | ||
3118 | #endif /* CONFIG_FAIL_MAKE_REQUEST */ | 3140 | #endif /* CONFIG_FAIL_MAKE_REQUEST */ |
3119 | 3141 | ||
3142 | /* | ||
3143 | * Check whether this bio extends beyond the end of the device. | ||
3144 | */ | ||
3145 | static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) | ||
3146 | { | ||
3147 | sector_t maxsector; | ||
3148 | |||
3149 | if (!nr_sectors) | ||
3150 | return 0; | ||
3151 | |||
3152 | /* Test device or partition size, when known. */ | ||
3153 | maxsector = bio->bi_bdev->bd_inode->i_size >> 9; | ||
3154 | if (maxsector) { | ||
3155 | sector_t sector = bio->bi_sector; | ||
3156 | |||
3157 | if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { | ||
3158 | /* | ||
3159 | * This may well happen - the kernel calls bread() | ||
3160 | * without checking the size of the device, e.g., when | ||
3161 | * mounting a device. | ||
3162 | */ | ||
3163 | handle_bad_sector(bio); | ||
3164 | return 1; | ||
3165 | } | ||
3166 | } | ||
3167 | |||
3168 | return 0; | ||
3169 | } | ||
3170 | |||
3120 | /** | 3171 | /** |
3121 | * generic_make_request: hand a buffer to its device driver for I/O | 3172 | * generic_make_request: hand a buffer to its device driver for I/O |
3122 | * @bio: The bio describing the location in memory and on the device. | 3173 | * @bio: The bio describing the location in memory and on the device. |
@@ -3144,27 +3195,14 @@ static inline int should_fail_request(struct bio *bio) | |||
3144 | static inline void __generic_make_request(struct bio *bio) | 3195 | static inline void __generic_make_request(struct bio *bio) |
3145 | { | 3196 | { |
3146 | struct request_queue *q; | 3197 | struct request_queue *q; |
3147 | sector_t maxsector; | ||
3148 | sector_t old_sector; | 3198 | sector_t old_sector; |
3149 | int ret, nr_sectors = bio_sectors(bio); | 3199 | int ret, nr_sectors = bio_sectors(bio); |
3150 | dev_t old_dev; | 3200 | dev_t old_dev; |
3151 | 3201 | ||
3152 | might_sleep(); | 3202 | might_sleep(); |
3153 | /* Test device or partition size, when known. */ | ||
3154 | maxsector = bio->bi_bdev->bd_inode->i_size >> 9; | ||
3155 | if (maxsector) { | ||
3156 | sector_t sector = bio->bi_sector; | ||
3157 | 3203 | ||
3158 | if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { | 3204 | if (bio_check_eod(bio, nr_sectors)) |
3159 | /* | 3205 | goto end_io; |
3160 | * This may well happen - the kernel calls bread() | ||
3161 | * without checking the size of the device, e.g., when | ||
3162 | * mounting a device. | ||
3163 | */ | ||
3164 | handle_bad_sector(bio); | ||
3165 | goto end_io; | ||
3166 | } | ||
3167 | } | ||
3168 | 3206 | ||
3169 | /* | 3207 | /* |
3170 | * Resolve the mapping until finished. (drivers are | 3208 | * Resolve the mapping until finished. (drivers are |
@@ -3191,7 +3229,7 @@ end_io: | |||
3191 | break; | 3229 | break; |
3192 | } | 3230 | } |
3193 | 3231 | ||
3194 | if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) { | 3232 | if (unlikely(nr_sectors > q->max_hw_sectors)) { |
3195 | printk("bio too big device %s (%u > %u)\n", | 3233 | printk("bio too big device %s (%u > %u)\n", |
3196 | bdevname(bio->bi_bdev, b), | 3234 | bdevname(bio->bi_bdev, b), |
3197 | bio_sectors(bio), | 3235 | bio_sectors(bio), |
@@ -3212,7 +3250,7 @@ end_io: | |||
3212 | blk_partition_remap(bio); | 3250 | blk_partition_remap(bio); |
3213 | 3251 | ||
3214 | if (old_sector != -1) | 3252 | if (old_sector != -1) |
3215 | blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, | 3253 | blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, |
3216 | old_sector); | 3254 | old_sector); |
3217 | 3255 | ||
3218 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE); | 3256 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE); |
@@ -3220,21 +3258,8 @@ end_io: | |||
3220 | old_sector = bio->bi_sector; | 3258 | old_sector = bio->bi_sector; |
3221 | old_dev = bio->bi_bdev->bd_dev; | 3259 | old_dev = bio->bi_bdev->bd_dev; |
3222 | 3260 | ||
3223 | maxsector = bio->bi_bdev->bd_inode->i_size >> 9; | 3261 | if (bio_check_eod(bio, nr_sectors)) |
3224 | if (maxsector) { | 3262 | goto end_io; |
3225 | sector_t sector = bio->bi_sector; | ||
3226 | |||
3227 | if (maxsector < nr_sectors || | ||
3228 | maxsector - nr_sectors < sector) { | ||
3229 | /* | ||
3230 | * This may well happen - partitions are not | ||
3231 | * checked to make sure they are within the size | ||
3232 | * of the whole device. | ||
3233 | */ | ||
3234 | handle_bad_sector(bio); | ||
3235 | goto end_io; | ||
3236 | } | ||
3237 | } | ||
3238 | 3263 | ||
3239 | ret = q->make_request_fn(q, bio); | 3264 | ret = q->make_request_fn(q, bio); |
3240 | } while (ret); | 3265 | } while (ret); |
@@ -3307,23 +3332,32 @@ void submit_bio(int rw, struct bio *bio) | |||
3307 | { | 3332 | { |
3308 | int count = bio_sectors(bio); | 3333 | int count = bio_sectors(bio); |
3309 | 3334 | ||
3310 | BIO_BUG_ON(!bio->bi_size); | ||
3311 | BIO_BUG_ON(!bio->bi_io_vec); | ||
3312 | bio->bi_rw |= rw; | 3335 | bio->bi_rw |= rw; |
3313 | if (rw & WRITE) { | ||
3314 | count_vm_events(PGPGOUT, count); | ||
3315 | } else { | ||
3316 | task_io_account_read(bio->bi_size); | ||
3317 | count_vm_events(PGPGIN, count); | ||
3318 | } | ||
3319 | 3336 | ||
3320 | if (unlikely(block_dump)) { | 3337 | /* |
3321 | char b[BDEVNAME_SIZE]; | 3338 | * If it's a regular read/write or a barrier with data attached, |
3322 | printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", | 3339 | * go through the normal accounting stuff before submission. |
3323 | current->comm, current->pid, | 3340 | */ |
3324 | (rw & WRITE) ? "WRITE" : "READ", | 3341 | if (!bio_empty_barrier(bio)) { |
3325 | (unsigned long long)bio->bi_sector, | 3342 | |
3326 | bdevname(bio->bi_bdev,b)); | 3343 | BIO_BUG_ON(!bio->bi_size); |
3344 | BIO_BUG_ON(!bio->bi_io_vec); | ||
3345 | |||
3346 | if (rw & WRITE) { | ||
3347 | count_vm_events(PGPGOUT, count); | ||
3348 | } else { | ||
3349 | task_io_account_read(bio->bi_size); | ||
3350 | count_vm_events(PGPGIN, count); | ||
3351 | } | ||
3352 | |||
3353 | if (unlikely(block_dump)) { | ||
3354 | char b[BDEVNAME_SIZE]; | ||
3355 | printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", | ||
3356 | current->comm, current->pid, | ||
3357 | (rw & WRITE) ? "WRITE" : "READ", | ||
3358 | (unsigned long long)bio->bi_sector, | ||
3359 | bdevname(bio->bi_bdev,b)); | ||
3360 | } | ||
3327 | } | 3361 | } |
3328 | 3362 | ||
3329 | generic_make_request(bio); | 3363 | generic_make_request(bio); |
@@ -3399,6 +3433,14 @@ static int __end_that_request_first(struct request *req, int uptodate, | |||
3399 | while ((bio = req->bio) != NULL) { | 3433 | while ((bio = req->bio) != NULL) { |
3400 | int nbytes; | 3434 | int nbytes; |
3401 | 3435 | ||
3436 | /* | ||
3437 | * For an empty barrier request, the low level driver must | ||
3438 | * store a potential error location in ->sector. We pass | ||
3439 | * that back up in ->bi_sector. | ||
3440 | */ | ||
3441 | if (blk_empty_barrier(req)) | ||
3442 | bio->bi_sector = req->sector; | ||
3443 | |||
3402 | if (nr_bytes >= bio->bi_size) { | 3444 | if (nr_bytes >= bio->bi_size) { |
3403 | req->bio = bio->bi_next; | 3445 | req->bio = bio->bi_next; |
3404 | nbytes = bio->bi_size; | 3446 | nbytes = bio->bi_size; |
@@ -3564,7 +3606,7 @@ static struct notifier_block blk_cpu_notifier __cpuinitdata = { | |||
3564 | * Description: | 3606 | * Description: |
3565 | * Ends all I/O on a request. It does not handle partial completions, | 3607 | * Ends all I/O on a request. It does not handle partial completions, |
3566 | * unless the driver actually implements this in its completion callback | 3608 | * unless the driver actually implements this in its completion callback |
3567 | * through requeueing. Theh actual completion happens out-of-order, | 3609 | * through requeueing. The actual completion happens out-of-order, |
3568 | * through a softirq handler. The user must have registered a completion | 3610 | * through a softirq handler. The user must have registered a completion |
3569 | * callback through blk_queue_softirq_done(). | 3611 | * callback through blk_queue_softirq_done(). |
3570 | **/ | 3612 | **/ |
@@ -3627,15 +3669,83 @@ void end_that_request_last(struct request *req, int uptodate) | |||
3627 | 3669 | ||
3628 | EXPORT_SYMBOL(end_that_request_last); | 3670 | EXPORT_SYMBOL(end_that_request_last); |
3629 | 3671 | ||
3630 | void end_request(struct request *req, int uptodate) | 3672 | static inline void __end_request(struct request *rq, int uptodate, |
3673 | unsigned int nr_bytes, int dequeue) | ||
3631 | { | 3674 | { |
3632 | if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { | 3675 | if (!end_that_request_chunk(rq, uptodate, nr_bytes)) { |
3633 | add_disk_randomness(req->rq_disk); | 3676 | if (dequeue) |
3634 | blkdev_dequeue_request(req); | 3677 | blkdev_dequeue_request(rq); |
3635 | end_that_request_last(req, uptodate); | 3678 | add_disk_randomness(rq->rq_disk); |
3679 | end_that_request_last(rq, uptodate); | ||
3636 | } | 3680 | } |
3637 | } | 3681 | } |
3638 | 3682 | ||
3683 | static unsigned int rq_byte_size(struct request *rq) | ||
3684 | { | ||
3685 | if (blk_fs_request(rq)) | ||
3686 | return rq->hard_nr_sectors << 9; | ||
3687 | |||
3688 | return rq->data_len; | ||
3689 | } | ||
3690 | |||
3691 | /** | ||
3692 | * end_queued_request - end all I/O on a queued request | ||
3693 | * @rq: the request being processed | ||
3694 | * @uptodate: error value or 0/1 uptodate flag | ||
3695 | * | ||
3696 | * Description: | ||
3697 | * Ends all I/O on a request, and removes it from the block layer queues. | ||
3698 | * Not suitable for normal IO completion, unless the driver still has | ||
3699 | * the request attached to the block layer. | ||
3700 | * | ||
3701 | **/ | ||
3702 | void end_queued_request(struct request *rq, int uptodate) | ||
3703 | { | ||
3704 | __end_request(rq, uptodate, rq_byte_size(rq), 1); | ||
3705 | } | ||
3706 | EXPORT_SYMBOL(end_queued_request); | ||
3707 | |||
3708 | /** | ||
3709 | * end_dequeued_request - end all I/O on a dequeued request | ||
3710 | * @rq: the request being processed | ||
3711 | * @uptodate: error value or 0/1 uptodate flag | ||
3712 | * | ||
3713 | * Description: | ||
3714 | * Ends all I/O on a request. The request must already have been | ||
3715 | * dequeued using blkdev_dequeue_request(), as is normally the case | ||
3716 | * for most drivers. | ||
3717 | * | ||
3718 | **/ | ||
3719 | void end_dequeued_request(struct request *rq, int uptodate) | ||
3720 | { | ||
3721 | __end_request(rq, uptodate, rq_byte_size(rq), 0); | ||
3722 | } | ||
3723 | EXPORT_SYMBOL(end_dequeued_request); | ||
3724 | |||
3725 | |||
3726 | /** | ||
3727 | * end_request - end I/O on the current segment of the request | ||
3728 | * @rq: the request being processed | ||
3729 | * @uptodate: error value or 0/1 uptodate flag | ||
3730 | * | ||
3731 | * Description: | ||
3732 | * Ends I/O on the current segment of a request. If that is the only | ||
3733 | * remaining segment, the request is also completed and freed. | ||
3734 | * | ||
3735 | * This is a remnant of how older block drivers handled IO completions. | ||
3736 | * Modern drivers typically end IO on the full request in one go, unless | ||
3737 | * they have a residual value to account for. For that case this function | ||
3738 | * isn't really useful, unless the residual just happens to be the | ||
3739 | * full current segment. In other words, don't use this function in new | ||
3740 | * code. Either use end_request_completely(), or the | ||
3741 | * end_that_request_chunk() (along with end_that_request_last()) for | ||
3742 | * partial completions. | ||
3743 | * | ||
3744 | **/ | ||
3745 | void end_request(struct request *req, int uptodate) | ||
3746 | { | ||
3747 | __end_request(req, uptodate, req->hard_cur_sectors << 9, 1); | ||
3748 | } | ||
3639 | EXPORT_SYMBOL(end_request); | 3749 | EXPORT_SYMBOL(end_request); |
3640 | 3750 | ||
3641 | static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | 3751 | static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |