aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/elevator.c17
-rw-r--r--block/ll_rw_blk.c270
2 files changed, 199 insertions, 88 deletions
diff --git a/block/elevator.c b/block/elevator.c
index b9c518afe1f8..952aee04a68a 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -712,6 +712,14 @@ struct request *elv_next_request(struct request_queue *q)
712 int ret; 712 int ret;
713 713
714 while ((rq = __elv_next_request(q)) != NULL) { 714 while ((rq = __elv_next_request(q)) != NULL) {
715 /*
716 * Kill the empty barrier place holder, the driver must
717 * not ever see it.
718 */
719 if (blk_empty_barrier(rq)) {
720 end_queued_request(rq, 1);
721 continue;
722 }
715 if (!(rq->cmd_flags & REQ_STARTED)) { 723 if (!(rq->cmd_flags & REQ_STARTED)) {
716 /* 724 /*
717 * This is the first time the device driver 725 * This is the first time the device driver
@@ -751,15 +759,8 @@ struct request *elv_next_request(struct request_queue *q)
751 rq = NULL; 759 rq = NULL;
752 break; 760 break;
753 } else if (ret == BLKPREP_KILL) { 761 } else if (ret == BLKPREP_KILL) {
754 int nr_bytes = rq->hard_nr_sectors << 9;
755
756 if (!nr_bytes)
757 nr_bytes = rq->data_len;
758
759 blkdev_dequeue_request(rq);
760 rq->cmd_flags |= REQ_QUIET; 762 rq->cmd_flags |= REQ_QUIET;
761 end_that_request_chunk(rq, 0, nr_bytes); 763 end_queued_request(rq, 0);
762 end_that_request_last(rq, 0);
763 } else { 764 } else {
764 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, 765 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
765 ret); 766 ret);
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index d875673e76cd..4df7d027eb06 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -304,23 +304,6 @@ int blk_queue_ordered(struct request_queue *q, unsigned ordered,
304 304
305EXPORT_SYMBOL(blk_queue_ordered); 305EXPORT_SYMBOL(blk_queue_ordered);
306 306
307/**
308 * blk_queue_issue_flush_fn - set function for issuing a flush
309 * @q: the request queue
310 * @iff: the function to be called issuing the flush
311 *
312 * Description:
313 * If a driver supports issuing a flush command, the support is notified
314 * to the block layer by defining it through this call.
315 *
316 **/
317void blk_queue_issue_flush_fn(struct request_queue *q, issue_flush_fn *iff)
318{
319 q->issue_flush_fn = iff;
320}
321
322EXPORT_SYMBOL(blk_queue_issue_flush_fn);
323
324/* 307/*
325 * Cache flushing for ordered writes handling 308 * Cache flushing for ordered writes handling
326 */ 309 */
@@ -377,10 +360,12 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
377 /* 360 /*
378 * Okay, sequence complete. 361 * Okay, sequence complete.
379 */ 362 */
380 rq = q->orig_bar_rq; 363 uptodate = 1;
381 uptodate = q->orderr ? q->orderr : 1; 364 if (q->orderr)
365 uptodate = q->orderr;
382 366
383 q->ordseq = 0; 367 q->ordseq = 0;
368 rq = q->orig_bar_rq;
384 369
385 end_that_request_first(rq, uptodate, rq->hard_nr_sectors); 370 end_that_request_first(rq, uptodate, rq->hard_nr_sectors);
386 end_that_request_last(rq, uptodate); 371 end_that_request_last(rq, uptodate);
@@ -445,7 +430,8 @@ static inline struct request *start_ordered(struct request_queue *q,
445 rq_init(q, rq); 430 rq_init(q, rq);
446 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) 431 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
447 rq->cmd_flags |= REQ_RW; 432 rq->cmd_flags |= REQ_RW;
448 rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; 433 if (q->ordered & QUEUE_ORDERED_FUA)
434 rq->cmd_flags |= REQ_FUA;
449 rq->elevator_private = NULL; 435 rq->elevator_private = NULL;
450 rq->elevator_private2 = NULL; 436 rq->elevator_private2 = NULL;
451 init_request_from_bio(rq, q->orig_bar_rq->bio); 437 init_request_from_bio(rq, q->orig_bar_rq->bio);
@@ -455,9 +441,12 @@ static inline struct request *start_ordered(struct request_queue *q,
455 * Queue ordered sequence. As we stack them at the head, we 441 * Queue ordered sequence. As we stack them at the head, we
456 * need to queue in reverse order. Note that we rely on that 442 * need to queue in reverse order. Note that we rely on that
457 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs 443 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
458 * request gets inbetween ordered sequence. 444 * request gets inbetween ordered sequence. If this request is
445 * an empty barrier, we don't need to do a postflush ever since
446 * there will be no data written between the pre and post flush.
447 * Hence a single flush will suffice.
459 */ 448 */
460 if (q->ordered & QUEUE_ORDERED_POSTFLUSH) 449 if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
461 queue_flush(q, QUEUE_ORDERED_POSTFLUSH); 450 queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
462 else 451 else
463 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; 452 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
@@ -481,7 +470,7 @@ static inline struct request *start_ordered(struct request_queue *q,
481int blk_do_ordered(struct request_queue *q, struct request **rqp) 470int blk_do_ordered(struct request_queue *q, struct request **rqp)
482{ 471{
483 struct request *rq = *rqp; 472 struct request *rq = *rqp;
484 int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); 473 const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
485 474
486 if (!q->ordseq) { 475 if (!q->ordseq) {
487 if (!is_barrier) 476 if (!is_barrier)
@@ -2660,6 +2649,14 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
2660 2649
2661EXPORT_SYMBOL(blk_execute_rq); 2650EXPORT_SYMBOL(blk_execute_rq);
2662 2651
2652static void bio_end_empty_barrier(struct bio *bio, int err)
2653{
2654 if (err)
2655 clear_bit(BIO_UPTODATE, &bio->bi_flags);
2656
2657 complete(bio->bi_private);
2658}
2659
2663/** 2660/**
2664 * blkdev_issue_flush - queue a flush 2661 * blkdev_issue_flush - queue a flush
2665 * @bdev: blockdev to issue flush for 2662 * @bdev: blockdev to issue flush for
@@ -2672,7 +2669,10 @@ EXPORT_SYMBOL(blk_execute_rq);
2672 */ 2669 */
2673int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) 2670int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
2674{ 2671{
2672 DECLARE_COMPLETION_ONSTACK(wait);
2675 struct request_queue *q; 2673 struct request_queue *q;
2674 struct bio *bio;
2675 int ret;
2676 2676
2677 if (bdev->bd_disk == NULL) 2677 if (bdev->bd_disk == NULL)
2678 return -ENXIO; 2678 return -ENXIO;
@@ -2680,10 +2680,32 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
2680 q = bdev_get_queue(bdev); 2680 q = bdev_get_queue(bdev);
2681 if (!q) 2681 if (!q)
2682 return -ENXIO; 2682 return -ENXIO;
2683 if (!q->issue_flush_fn)
2684 return -EOPNOTSUPP;
2685 2683
2686 return q->issue_flush_fn(q, bdev->bd_disk, error_sector); 2684 bio = bio_alloc(GFP_KERNEL, 0);
2685 if (!bio)
2686 return -ENOMEM;
2687
2688 bio->bi_end_io = bio_end_empty_barrier;
2689 bio->bi_private = &wait;
2690 bio->bi_bdev = bdev;
2691 submit_bio(1 << BIO_RW_BARRIER, bio);
2692
2693 wait_for_completion(&wait);
2694
2695 /*
2696 * The driver must store the error location in ->bi_sector, if
2697 * it supports it. For non-stacked drivers, this should be copied
2698 * from rq->sector.
2699 */
2700 if (error_sector)
2701 *error_sector = bio->bi_sector;
2702
2703 ret = 0;
2704 if (!bio_flagged(bio, BIO_UPTODATE))
2705 ret = -EIO;
2706
2707 bio_put(bio);
2708 return ret;
2687} 2709}
2688 2710
2689EXPORT_SYMBOL(blkdev_issue_flush); 2711EXPORT_SYMBOL(blkdev_issue_flush);
@@ -3051,7 +3073,7 @@ static inline void blk_partition_remap(struct bio *bio)
3051{ 3073{
3052 struct block_device *bdev = bio->bi_bdev; 3074 struct block_device *bdev = bio->bi_bdev;
3053 3075
3054 if (bdev != bdev->bd_contains) { 3076 if (bio_sectors(bio) && bdev != bdev->bd_contains) {
3055 struct hd_struct *p = bdev->bd_part; 3077 struct hd_struct *p = bdev->bd_part;
3056 const int rw = bio_data_dir(bio); 3078 const int rw = bio_data_dir(bio);
3057 3079
@@ -3117,6 +3139,35 @@ static inline int should_fail_request(struct bio *bio)
3117 3139
3118#endif /* CONFIG_FAIL_MAKE_REQUEST */ 3140#endif /* CONFIG_FAIL_MAKE_REQUEST */
3119 3141
3142/*
3143 * Check whether this bio extends beyond the end of the device.
3144 */
3145static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
3146{
3147 sector_t maxsector;
3148
3149 if (!nr_sectors)
3150 return 0;
3151
3152 /* Test device or partition size, when known. */
3153 maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
3154 if (maxsector) {
3155 sector_t sector = bio->bi_sector;
3156
3157 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
3158 /*
3159 * This may well happen - the kernel calls bread()
3160 * without checking the size of the device, e.g., when
3161 * mounting a device.
3162 */
3163 handle_bad_sector(bio);
3164 return 1;
3165 }
3166 }
3167
3168 return 0;
3169}
3170
3120/** 3171/**
3121 * generic_make_request: hand a buffer to its device driver for I/O 3172 * generic_make_request: hand a buffer to its device driver for I/O
3122 * @bio: The bio describing the location in memory and on the device. 3173 * @bio: The bio describing the location in memory and on the device.
@@ -3144,27 +3195,14 @@ static inline int should_fail_request(struct bio *bio)
3144static inline void __generic_make_request(struct bio *bio) 3195static inline void __generic_make_request(struct bio *bio)
3145{ 3196{
3146 struct request_queue *q; 3197 struct request_queue *q;
3147 sector_t maxsector;
3148 sector_t old_sector; 3198 sector_t old_sector;
3149 int ret, nr_sectors = bio_sectors(bio); 3199 int ret, nr_sectors = bio_sectors(bio);
3150 dev_t old_dev; 3200 dev_t old_dev;
3151 3201
3152 might_sleep(); 3202 might_sleep();
3153 /* Test device or partition size, when known. */
3154 maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
3155 if (maxsector) {
3156 sector_t sector = bio->bi_sector;
3157 3203
3158 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { 3204 if (bio_check_eod(bio, nr_sectors))
3159 /* 3205 goto end_io;
3160 * This may well happen - the kernel calls bread()
3161 * without checking the size of the device, e.g., when
3162 * mounting a device.
3163 */
3164 handle_bad_sector(bio);
3165 goto end_io;
3166 }
3167 }
3168 3206
3169 /* 3207 /*
3170 * Resolve the mapping until finished. (drivers are 3208 * Resolve the mapping until finished. (drivers are
@@ -3191,7 +3229,7 @@ end_io:
3191 break; 3229 break;
3192 } 3230 }
3193 3231
3194 if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) { 3232 if (unlikely(nr_sectors > q->max_hw_sectors)) {
3195 printk("bio too big device %s (%u > %u)\n", 3233 printk("bio too big device %s (%u > %u)\n",
3196 bdevname(bio->bi_bdev, b), 3234 bdevname(bio->bi_bdev, b),
3197 bio_sectors(bio), 3235 bio_sectors(bio),
@@ -3212,7 +3250,7 @@ end_io:
3212 blk_partition_remap(bio); 3250 blk_partition_remap(bio);
3213 3251
3214 if (old_sector != -1) 3252 if (old_sector != -1)
3215 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, 3253 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
3216 old_sector); 3254 old_sector);
3217 3255
3218 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 3256 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
@@ -3220,21 +3258,8 @@ end_io:
3220 old_sector = bio->bi_sector; 3258 old_sector = bio->bi_sector;
3221 old_dev = bio->bi_bdev->bd_dev; 3259 old_dev = bio->bi_bdev->bd_dev;
3222 3260
3223 maxsector = bio->bi_bdev->bd_inode->i_size >> 9; 3261 if (bio_check_eod(bio, nr_sectors))
3224 if (maxsector) { 3262 goto end_io;
3225 sector_t sector = bio->bi_sector;
3226
3227 if (maxsector < nr_sectors ||
3228 maxsector - nr_sectors < sector) {
3229 /*
3230 * This may well happen - partitions are not
3231 * checked to make sure they are within the size
3232 * of the whole device.
3233 */
3234 handle_bad_sector(bio);
3235 goto end_io;
3236 }
3237 }
3238 3263
3239 ret = q->make_request_fn(q, bio); 3264 ret = q->make_request_fn(q, bio);
3240 } while (ret); 3265 } while (ret);
@@ -3307,23 +3332,32 @@ void submit_bio(int rw, struct bio *bio)
3307{ 3332{
3308 int count = bio_sectors(bio); 3333 int count = bio_sectors(bio);
3309 3334
3310 BIO_BUG_ON(!bio->bi_size);
3311 BIO_BUG_ON(!bio->bi_io_vec);
3312 bio->bi_rw |= rw; 3335 bio->bi_rw |= rw;
3313 if (rw & WRITE) {
3314 count_vm_events(PGPGOUT, count);
3315 } else {
3316 task_io_account_read(bio->bi_size);
3317 count_vm_events(PGPGIN, count);
3318 }
3319 3336
3320 if (unlikely(block_dump)) { 3337 /*
3321 char b[BDEVNAME_SIZE]; 3338 * If it's a regular read/write or a barrier with data attached,
3322 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", 3339 * go through the normal accounting stuff before submission.
3323 current->comm, current->pid, 3340 */
3324 (rw & WRITE) ? "WRITE" : "READ", 3341 if (!bio_empty_barrier(bio)) {
3325 (unsigned long long)bio->bi_sector, 3342
3326 bdevname(bio->bi_bdev,b)); 3343 BIO_BUG_ON(!bio->bi_size);
3344 BIO_BUG_ON(!bio->bi_io_vec);
3345
3346 if (rw & WRITE) {
3347 count_vm_events(PGPGOUT, count);
3348 } else {
3349 task_io_account_read(bio->bi_size);
3350 count_vm_events(PGPGIN, count);
3351 }
3352
3353 if (unlikely(block_dump)) {
3354 char b[BDEVNAME_SIZE];
3355 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
3356 current->comm, current->pid,
3357 (rw & WRITE) ? "WRITE" : "READ",
3358 (unsigned long long)bio->bi_sector,
3359 bdevname(bio->bi_bdev,b));
3360 }
3327 } 3361 }
3328 3362
3329 generic_make_request(bio); 3363 generic_make_request(bio);
@@ -3399,6 +3433,14 @@ static int __end_that_request_first(struct request *req, int uptodate,
3399 while ((bio = req->bio) != NULL) { 3433 while ((bio = req->bio) != NULL) {
3400 int nbytes; 3434 int nbytes;
3401 3435
3436 /*
3437 * For an empty barrier request, the low level driver must
3438 * store a potential error location in ->sector. We pass
3439 * that back up in ->bi_sector.
3440 */
3441 if (blk_empty_barrier(req))
3442 bio->bi_sector = req->sector;
3443
3402 if (nr_bytes >= bio->bi_size) { 3444 if (nr_bytes >= bio->bi_size) {
3403 req->bio = bio->bi_next; 3445 req->bio = bio->bi_next;
3404 nbytes = bio->bi_size; 3446 nbytes = bio->bi_size;
@@ -3564,7 +3606,7 @@ static struct notifier_block blk_cpu_notifier __cpuinitdata = {
3564 * Description: 3606 * Description:
3565 * Ends all I/O on a request. It does not handle partial completions, 3607 * Ends all I/O on a request. It does not handle partial completions,
3566 * unless the driver actually implements this in its completion callback 3608 * unless the driver actually implements this in its completion callback
3567 * through requeueing. Theh actual completion happens out-of-order, 3609 * through requeueing. The actual completion happens out-of-order,
3568 * through a softirq handler. The user must have registered a completion 3610 * through a softirq handler. The user must have registered a completion
3569 * callback through blk_queue_softirq_done(). 3611 * callback through blk_queue_softirq_done().
3570 **/ 3612 **/
@@ -3627,15 +3669,83 @@ void end_that_request_last(struct request *req, int uptodate)
3627 3669
3628EXPORT_SYMBOL(end_that_request_last); 3670EXPORT_SYMBOL(end_that_request_last);
3629 3671
3630void end_request(struct request *req, int uptodate) 3672static inline void __end_request(struct request *rq, int uptodate,
3673 unsigned int nr_bytes, int dequeue)
3631{ 3674{
3632 if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { 3675 if (!end_that_request_chunk(rq, uptodate, nr_bytes)) {
3633 add_disk_randomness(req->rq_disk); 3676 if (dequeue)
3634 blkdev_dequeue_request(req); 3677 blkdev_dequeue_request(rq);
3635 end_that_request_last(req, uptodate); 3678 add_disk_randomness(rq->rq_disk);
3679 end_that_request_last(rq, uptodate);
3636 } 3680 }
3637} 3681}
3638 3682
3683static unsigned int rq_byte_size(struct request *rq)
3684{
3685 if (blk_fs_request(rq))
3686 return rq->hard_nr_sectors << 9;
3687
3688 return rq->data_len;
3689}
3690
3691/**
3692 * end_queued_request - end all I/O on a queued request
3693 * @rq: the request being processed
3694 * @uptodate: error value or 0/1 uptodate flag
3695 *
3696 * Description:
3697 * Ends all I/O on a request, and removes it from the block layer queues.
3698 * Not suitable for normal IO completion, unless the driver still has
3699 * the request attached to the block layer.
3700 *
3701 **/
3702void end_queued_request(struct request *rq, int uptodate)
3703{
3704 __end_request(rq, uptodate, rq_byte_size(rq), 1);
3705}
3706EXPORT_SYMBOL(end_queued_request);
3707
3708/**
3709 * end_dequeued_request - end all I/O on a dequeued request
3710 * @rq: the request being processed
3711 * @uptodate: error value or 0/1 uptodate flag
3712 *
3713 * Description:
3714 * Ends all I/O on a request. The request must already have been
3715 * dequeued using blkdev_dequeue_request(), as is normally the case
3716 * for most drivers.
3717 *
3718 **/
3719void end_dequeued_request(struct request *rq, int uptodate)
3720{
3721 __end_request(rq, uptodate, rq_byte_size(rq), 0);
3722}
3723EXPORT_SYMBOL(end_dequeued_request);
3724
3725
3726/**
3727 * end_request - end I/O on the current segment of the request
3728 * @rq: the request being processed
3729 * @uptodate: error value or 0/1 uptodate flag
3730 *
3731 * Description:
3732 * Ends I/O on the current segment of a request. If that is the only
3733 * remaining segment, the request is also completed and freed.
3734 *
3735 * This is a remnant of how older block drivers handled IO completions.
3736 * Modern drivers typically end IO on the full request in one go, unless
3737 * they have a residual value to account for. For that case this function
3738 * isn't really useful, unless the residual just happens to be the
3739 * full current segment. In other words, don't use this function in new
3740 * code. Either use end_request_completely(), or the
3741 * end_that_request_chunk() (along with end_that_request_last()) for
3742 * partial completions.
3743 *
3744 **/
3745void end_request(struct request *req, int uptodate)
3746{
3747 __end_request(req, uptodate, req->hard_cur_sectors << 9, 1);
3748}
3639EXPORT_SYMBOL(end_request); 3749EXPORT_SYMBOL(end_request);
3640 3750
3641static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 3751static void blk_rq_bio_prep(struct request_queue *q, struct request *rq,