aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig11
-rw-r--r--block/as-iosched.c24
-rw-r--r--block/blk-barrier.c27
-rw-r--r--block/blk-core.c848
-rw-r--r--block/blk-exec.c1
-rw-r--r--block/blk-integrity.c2
-rw-r--r--block/blk-ioc.c12
-rw-r--r--block/blk-map.c25
-rw-r--r--block/blk-merge.c71
-rw-r--r--block/blk-settings.c269
-rw-r--r--block/blk-sysfs.c62
-rw-r--r--block/blk-tag.c17
-rw-r--r--block/blk-timeout.c22
-rw-r--r--block/blk.h51
-rw-r--r--block/bsg.c8
-rw-r--r--block/cfq-iosched.c38
-rw-r--r--block/compat_ioctl.c4
-rw-r--r--block/deadline-iosched.c2
-rw-r--r--block/elevator.c185
-rw-r--r--block/genhd.c11
-rw-r--r--block/ioctl.c12
-rw-r--r--block/scsi_ioctl.c13
22 files changed, 1061 insertions, 654 deletions
diff --git a/block/Kconfig b/block/Kconfig
index e7d12782bcfb..2c39527aa7db 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -26,6 +26,7 @@ if BLOCK
26config LBD 26config LBD
27 bool "Support for large block devices and files" 27 bool "Support for large block devices and files"
28 depends on !64BIT 28 depends on !64BIT
29 default y
29 help 30 help
30 Enable block devices or files of size 2TB and larger. 31 Enable block devices or files of size 2TB and larger.
31 32
@@ -38,11 +39,13 @@ config LBD
38 39
39 The ext4 filesystem requires that this feature be enabled in 40 The ext4 filesystem requires that this feature be enabled in
40 order to support filesystems that have the huge_file feature 41 order to support filesystems that have the huge_file feature
41 enabled. Otherwise, it will refuse to mount any filesystems 42 enabled. Otherwise, it will refuse to mount in the read-write
42 that use the huge_file feature, which is enabled by default 43 mode any filesystems that use the huge_file feature, which is
43 by mke2fs.ext4. The GFS2 filesystem also requires this feature. 44 enabled by default by mke2fs.ext4.
44 45
45 If unsure, say N. 46 The GFS2 filesystem also requires this feature.
47
48 If unsure, say Y.
46 49
47config BLK_DEV_BSG 50config BLK_DEV_BSG
48 bool "Block layer SG support v4 (EXPERIMENTAL)" 51 bool "Block layer SG support v4 (EXPERIMENTAL)"
diff --git a/block/as-iosched.c b/block/as-iosched.c
index c48fa670d221..7a12cf6ee1d3 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -306,8 +306,8 @@ as_choose_req(struct as_data *ad, struct request *rq1, struct request *rq2)
306 data_dir = rq_is_sync(rq1); 306 data_dir = rq_is_sync(rq1);
307 307
308 last = ad->last_sector[data_dir]; 308 last = ad->last_sector[data_dir];
309 s1 = rq1->sector; 309 s1 = blk_rq_pos(rq1);
310 s2 = rq2->sector; 310 s2 = blk_rq_pos(rq2);
311 311
312 BUG_ON(data_dir != rq_is_sync(rq2)); 312 BUG_ON(data_dir != rq_is_sync(rq2));
313 313
@@ -566,13 +566,15 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
566 as_update_thinktime(ad, aic, thinktime); 566 as_update_thinktime(ad, aic, thinktime);
567 567
568 /* Calculate read -> read seek distance */ 568 /* Calculate read -> read seek distance */
569 if (aic->last_request_pos < rq->sector) 569 if (aic->last_request_pos < blk_rq_pos(rq))
570 seek_dist = rq->sector - aic->last_request_pos; 570 seek_dist = blk_rq_pos(rq) -
571 aic->last_request_pos;
571 else 572 else
572 seek_dist = aic->last_request_pos - rq->sector; 573 seek_dist = aic->last_request_pos -
574 blk_rq_pos(rq);
573 as_update_seekdist(ad, aic, seek_dist); 575 as_update_seekdist(ad, aic, seek_dist);
574 } 576 }
575 aic->last_request_pos = rq->sector + rq->nr_sectors; 577 aic->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
576 set_bit(AS_TASK_IOSTARTED, &aic->state); 578 set_bit(AS_TASK_IOSTARTED, &aic->state);
577 spin_unlock(&aic->lock); 579 spin_unlock(&aic->lock);
578 } 580 }
@@ -587,7 +589,7 @@ static int as_close_req(struct as_data *ad, struct as_io_context *aic,
587{ 589{
588 unsigned long delay; /* jiffies */ 590 unsigned long delay; /* jiffies */
589 sector_t last = ad->last_sector[ad->batch_data_dir]; 591 sector_t last = ad->last_sector[ad->batch_data_dir];
590 sector_t next = rq->sector; 592 sector_t next = blk_rq_pos(rq);
591 sector_t delta; /* acceptable close offset (in sectors) */ 593 sector_t delta; /* acceptable close offset (in sectors) */
592 sector_t s; 594 sector_t s;
593 595
@@ -981,7 +983,7 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
981 * This has to be set in order to be correctly updated by 983 * This has to be set in order to be correctly updated by
982 * as_find_next_rq 984 * as_find_next_rq
983 */ 985 */
984 ad->last_sector[data_dir] = rq->sector + rq->nr_sectors; 986 ad->last_sector[data_dir] = blk_rq_pos(rq) + blk_rq_sectors(rq);
985 987
986 if (data_dir == BLK_RW_SYNC) { 988 if (data_dir == BLK_RW_SYNC) {
987 struct io_context *ioc = RQ_IOC(rq); 989 struct io_context *ioc = RQ_IOC(rq);
@@ -1312,12 +1314,8 @@ static void as_merged_requests(struct request_queue *q, struct request *req,
1312static void as_work_handler(struct work_struct *work) 1314static void as_work_handler(struct work_struct *work)
1313{ 1315{
1314 struct as_data *ad = container_of(work, struct as_data, antic_work); 1316 struct as_data *ad = container_of(work, struct as_data, antic_work);
1315 struct request_queue *q = ad->q;
1316 unsigned long flags;
1317 1317
1318 spin_lock_irqsave(q->queue_lock, flags); 1318 blk_run_queue(ad->q);
1319 blk_start_queueing(q);
1320 spin_unlock_irqrestore(q->queue_lock, flags);
1321} 1319}
1322 1320
1323static int as_may_queue(struct request_queue *q, int rw) 1321static int as_may_queue(struct request_queue *q, int rw)
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 20b4111fa050..30022b4e2f63 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -106,10 +106,7 @@ bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
106 */ 106 */
107 q->ordseq = 0; 107 q->ordseq = 0;
108 rq = q->orig_bar_rq; 108 rq = q->orig_bar_rq;
109 109 __blk_end_request_all(rq, q->orderr);
110 if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq)))
111 BUG();
112
113 return true; 110 return true;
114} 111}
115 112
@@ -166,7 +163,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
166 * For an empty barrier, there's no actual BAR request, which 163 * For an empty barrier, there's no actual BAR request, which
167 * in turn makes POSTFLUSH unnecessary. Mask them off. 164 * in turn makes POSTFLUSH unnecessary. Mask them off.
168 */ 165 */
169 if (!rq->hard_nr_sectors) { 166 if (!blk_rq_sectors(rq)) {
170 q->ordered &= ~(QUEUE_ORDERED_DO_BAR | 167 q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
171 QUEUE_ORDERED_DO_POSTFLUSH); 168 QUEUE_ORDERED_DO_POSTFLUSH);
172 /* 169 /*
@@ -183,7 +180,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
183 } 180 }
184 181
185 /* stash away the original request */ 182 /* stash away the original request */
186 elv_dequeue_request(q, rq); 183 blk_dequeue_request(rq);
187 q->orig_bar_rq = rq; 184 q->orig_bar_rq = rq;
188 rq = NULL; 185 rq = NULL;
189 186
@@ -221,7 +218,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
221 } else 218 } else
222 skip |= QUEUE_ORDSEQ_PREFLUSH; 219 skip |= QUEUE_ORDSEQ_PREFLUSH;
223 220
224 if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight) 221 if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q))
225 rq = NULL; 222 rq = NULL;
226 else 223 else
227 skip |= QUEUE_ORDSEQ_DRAIN; 224 skip |= QUEUE_ORDSEQ_DRAIN;
@@ -251,10 +248,8 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp)
251 * Queue ordering not supported. Terminate 248 * Queue ordering not supported. Terminate
252 * with prejudice. 249 * with prejudice.
253 */ 250 */
254 elv_dequeue_request(q, rq); 251 blk_dequeue_request(rq);
255 if (__blk_end_request(rq, -EOPNOTSUPP, 252 __blk_end_request_all(rq, -EOPNOTSUPP);
256 blk_rq_bytes(rq)))
257 BUG();
258 *rqp = NULL; 253 *rqp = NULL;
259 return false; 254 return false;
260 } 255 }
@@ -329,7 +324,7 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
329 /* 324 /*
330 * The driver must store the error location in ->bi_sector, if 325 * The driver must store the error location in ->bi_sector, if
331 * it supports it. For non-stacked drivers, this should be copied 326 * it supports it. For non-stacked drivers, this should be copied
332 * from rq->sector. 327 * from blk_rq_pos(rq).
333 */ 328 */
334 if (error_sector) 329 if (error_sector)
335 *error_sector = bio->bi_sector; 330 *error_sector = bio->bi_sector;
@@ -393,10 +388,10 @@ int blkdev_issue_discard(struct block_device *bdev,
393 388
394 bio->bi_sector = sector; 389 bio->bi_sector = sector;
395 390
396 if (nr_sects > q->max_hw_sectors) { 391 if (nr_sects > queue_max_hw_sectors(q)) {
397 bio->bi_size = q->max_hw_sectors << 9; 392 bio->bi_size = queue_max_hw_sectors(q) << 9;
398 nr_sects -= q->max_hw_sectors; 393 nr_sects -= queue_max_hw_sectors(q);
399 sector += q->max_hw_sectors; 394 sector += queue_max_hw_sectors(q);
400 } else { 395 } else {
401 bio->bi_size = nr_sects << 9; 396 bio->bi_size = nr_sects << 9;
402 nr_sects = 0; 397 nr_sects = 0;
diff --git a/block/blk-core.c b/block/blk-core.c
index 648f15cb41f1..d17d71c71d4f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -60,11 +60,11 @@ static void drive_stat_acct(struct request *rq, int new_io)
60 int rw = rq_data_dir(rq); 60 int rw = rq_data_dir(rq);
61 int cpu; 61 int cpu;
62 62
63 if (!blk_fs_request(rq) || !blk_do_io_stat(rq)) 63 if (!blk_do_io_stat(rq))
64 return; 64 return;
65 65
66 cpu = part_stat_lock(); 66 cpu = part_stat_lock();
67 part = disk_map_sector_rcu(rq->rq_disk, rq->sector); 67 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
68 68
69 if (!new_io) 69 if (!new_io)
70 part_stat_inc(cpu, part, merges[rw]); 70 part_stat_inc(cpu, part, merges[rw]);
@@ -119,13 +119,14 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
119 INIT_LIST_HEAD(&rq->timeout_list); 119 INIT_LIST_HEAD(&rq->timeout_list);
120 rq->cpu = -1; 120 rq->cpu = -1;
121 rq->q = q; 121 rq->q = q;
122 rq->sector = rq->hard_sector = (sector_t) -1; 122 rq->__sector = (sector_t) -1;
123 INIT_HLIST_NODE(&rq->hash); 123 INIT_HLIST_NODE(&rq->hash);
124 RB_CLEAR_NODE(&rq->rb_node); 124 RB_CLEAR_NODE(&rq->rb_node);
125 rq->cmd = rq->__cmd; 125 rq->cmd = rq->__cmd;
126 rq->cmd_len = BLK_MAX_CDB; 126 rq->cmd_len = BLK_MAX_CDB;
127 rq->tag = -1; 127 rq->tag = -1;
128 rq->ref_count = 1; 128 rq->ref_count = 1;
129 rq->start_time = jiffies;
129} 130}
130EXPORT_SYMBOL(blk_rq_init); 131EXPORT_SYMBOL(blk_rq_init);
131 132
@@ -176,14 +177,11 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
176 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, 177 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
177 rq->cmd_flags); 178 rq->cmd_flags);
178 179
179 printk(KERN_INFO " sector %llu, nr/cnr %lu/%u\n", 180 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
180 (unsigned long long)rq->sector, 181 (unsigned long long)blk_rq_pos(rq),
181 rq->nr_sectors, 182 blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
182 rq->current_nr_sectors); 183 printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",
183 printk(KERN_INFO " bio %p, biotail %p, buffer %p, data %p, len %u\n", 184 rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
184 rq->bio, rq->biotail,
185 rq->buffer, rq->data,
186 rq->data_len);
187 185
188 if (blk_pc_request(rq)) { 186 if (blk_pc_request(rq)) {
189 printk(KERN_INFO " cdb: "); 187 printk(KERN_INFO " cdb: ");
@@ -325,24 +323,6 @@ void blk_unplug(struct request_queue *q)
325} 323}
326EXPORT_SYMBOL(blk_unplug); 324EXPORT_SYMBOL(blk_unplug);
327 325
328static void blk_invoke_request_fn(struct request_queue *q)
329{
330 if (unlikely(blk_queue_stopped(q)))
331 return;
332
333 /*
334 * one level of recursion is ok and is much faster than kicking
335 * the unplug handling
336 */
337 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
338 q->request_fn(q);
339 queue_flag_clear(QUEUE_FLAG_REENTER, q);
340 } else {
341 queue_flag_set(QUEUE_FLAG_PLUGGED, q);
342 kblockd_schedule_work(q, &q->unplug_work);
343 }
344}
345
346/** 326/**
347 * blk_start_queue - restart a previously stopped queue 327 * blk_start_queue - restart a previously stopped queue
348 * @q: The &struct request_queue in question 328 * @q: The &struct request_queue in question
@@ -357,7 +337,7 @@ void blk_start_queue(struct request_queue *q)
357 WARN_ON(!irqs_disabled()); 337 WARN_ON(!irqs_disabled());
358 338
359 queue_flag_clear(QUEUE_FLAG_STOPPED, q); 339 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
360 blk_invoke_request_fn(q); 340 __blk_run_queue(q);
361} 341}
362EXPORT_SYMBOL(blk_start_queue); 342EXPORT_SYMBOL(blk_start_queue);
363 343
@@ -417,12 +397,23 @@ void __blk_run_queue(struct request_queue *q)
417{ 397{
418 blk_remove_plug(q); 398 blk_remove_plug(q);
419 399
400 if (unlikely(blk_queue_stopped(q)))
401 return;
402
403 if (elv_queue_empty(q))
404 return;
405
420 /* 406 /*
421 * Only recurse once to avoid overrunning the stack, let the unplug 407 * Only recurse once to avoid overrunning the stack, let the unplug
422 * handling reinvoke the handler shortly if we already got there. 408 * handling reinvoke the handler shortly if we already got there.
423 */ 409 */
424 if (!elv_queue_empty(q)) 410 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
425 blk_invoke_request_fn(q); 411 q->request_fn(q);
412 queue_flag_clear(QUEUE_FLAG_REENTER, q);
413 } else {
414 queue_flag_set(QUEUE_FLAG_PLUGGED, q);
415 kblockd_schedule_work(q, &q->unplug_work);
416 }
426} 417}
427EXPORT_SYMBOL(__blk_run_queue); 418EXPORT_SYMBOL(__blk_run_queue);
428 419
@@ -432,9 +423,7 @@ EXPORT_SYMBOL(__blk_run_queue);
432 * 423 *
433 * Description: 424 * Description:
434 * Invoke request handling on this queue, if it has pending work to do. 425 * Invoke request handling on this queue, if it has pending work to do.
435 * May be used to restart queueing when a request has completed. Also 426 * May be used to restart queueing when a request has completed.
436 * See @blk_start_queueing.
437 *
438 */ 427 */
439void blk_run_queue(struct request_queue *q) 428void blk_run_queue(struct request_queue *q)
440{ 429{
@@ -894,26 +883,58 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
894EXPORT_SYMBOL(blk_get_request); 883EXPORT_SYMBOL(blk_get_request);
895 884
896/** 885/**
897 * blk_start_queueing - initiate dispatch of requests to device 886 * blk_make_request - given a bio, allocate a corresponding struct request.
898 * @q: request queue to kick into gear 887 *
888 * @bio: The bio describing the memory mappings that will be submitted for IO.
889 * It may be a chained-bio properly constructed by block/bio layer.
899 * 890 *
900 * This is basically a helper to remove the need to know whether a queue 891 * blk_make_request is the parallel of generic_make_request for BLOCK_PC
901 * is plugged or not if someone just wants to initiate dispatch of requests 892 * type commands. Where the struct request needs to be farther initialized by
902 * for this queue. Should be used to start queueing on a device outside 893 * the caller. It is passed a &struct bio, which describes the memory info of
903 * of ->request_fn() context. Also see @blk_run_queue. 894 * the I/O transfer.
904 * 895 *
905 * The queue lock must be held with interrupts disabled. 896 * The caller of blk_make_request must make sure that bi_io_vec
897 * are set to describe the memory buffers. That bio_data_dir() will return
898 * the needed direction of the request. (And all bio's in the passed bio-chain
899 * are properly set accordingly)
900 *
901 * If called under none-sleepable conditions, mapped bio buffers must not
902 * need bouncing, by calling the appropriate masked or flagged allocator,
903 * suitable for the target device. Otherwise the call to blk_queue_bounce will
904 * BUG.
905 *
906 * WARNING: When allocating/cloning a bio-chain, careful consideration should be
907 * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for
908 * anything but the first bio in the chain. Otherwise you risk waiting for IO
909 * completion of a bio that hasn't been submitted yet, thus resulting in a
910 * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead
911 * of bio_alloc(), as that avoids the mempool deadlock.
912 * If possible a big IO should be split into smaller parts when allocation
913 * fails. Partial allocation should not be an error, or you risk a live-lock.
906 */ 914 */
907void blk_start_queueing(struct request_queue *q) 915struct request *blk_make_request(struct request_queue *q, struct bio *bio,
916 gfp_t gfp_mask)
908{ 917{
909 if (!blk_queue_plugged(q)) { 918 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
910 if (unlikely(blk_queue_stopped(q))) 919
911 return; 920 if (unlikely(!rq))
912 q->request_fn(q); 921 return ERR_PTR(-ENOMEM);
913 } else 922
914 __generic_unplug_device(q); 923 for_each_bio(bio) {
924 struct bio *bounce_bio = bio;
925 int ret;
926
927 blk_queue_bounce(q, &bounce_bio);
928 ret = blk_rq_append_bio(q, rq, bounce_bio);
929 if (unlikely(ret)) {
930 blk_put_request(rq);
931 return ERR_PTR(ret);
932 }
933 }
934
935 return rq;
915} 936}
916EXPORT_SYMBOL(blk_start_queueing); 937EXPORT_SYMBOL(blk_make_request);
917 938
918/** 939/**
919 * blk_requeue_request - put a request back on queue 940 * blk_requeue_request - put a request back on queue
@@ -934,6 +955,8 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
934 if (blk_rq_tagged(rq)) 955 if (blk_rq_tagged(rq))
935 blk_queue_end_tag(q, rq); 956 blk_queue_end_tag(q, rq);
936 957
958 BUG_ON(blk_queued_rq(rq));
959
937 elv_requeue_request(q, rq); 960 elv_requeue_request(q, rq);
938} 961}
939EXPORT_SYMBOL(blk_requeue_request); 962EXPORT_SYMBOL(blk_requeue_request);
@@ -969,7 +992,6 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
969 * barrier 992 * barrier
970 */ 993 */
971 rq->cmd_type = REQ_TYPE_SPECIAL; 994 rq->cmd_type = REQ_TYPE_SPECIAL;
972 rq->cmd_flags |= REQ_SOFTBARRIER;
973 995
974 rq->special = data; 996 rq->special = data;
975 997
@@ -983,7 +1005,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
983 1005
984 drive_stat_acct(rq, 1); 1006 drive_stat_acct(rq, 1);
985 __elv_add_request(q, rq, where, 0); 1007 __elv_add_request(q, rq, where, 0);
986 blk_start_queueing(q); 1008 __blk_run_queue(q);
987 spin_unlock_irqrestore(q->queue_lock, flags); 1009 spin_unlock_irqrestore(q->queue_lock, flags);
988} 1010}
989EXPORT_SYMBOL(blk_insert_request); 1011EXPORT_SYMBOL(blk_insert_request);
@@ -1105,16 +1127,13 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1105 if (bio_failfast_driver(bio)) 1127 if (bio_failfast_driver(bio))
1106 req->cmd_flags |= REQ_FAILFAST_DRIVER; 1128 req->cmd_flags |= REQ_FAILFAST_DRIVER;
1107 1129
1108 /*
1109 * REQ_BARRIER implies no merging, but lets make it explicit
1110 */
1111 if (unlikely(bio_discard(bio))) { 1130 if (unlikely(bio_discard(bio))) {
1112 req->cmd_flags |= REQ_DISCARD; 1131 req->cmd_flags |= REQ_DISCARD;
1113 if (bio_barrier(bio)) 1132 if (bio_barrier(bio))
1114 req->cmd_flags |= REQ_SOFTBARRIER; 1133 req->cmd_flags |= REQ_SOFTBARRIER;
1115 req->q->prepare_discard_fn(req->q, req); 1134 req->q->prepare_discard_fn(req->q, req);
1116 } else if (unlikely(bio_barrier(bio))) 1135 } else if (unlikely(bio_barrier(bio)))
1117 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); 1136 req->cmd_flags |= REQ_HARDBARRIER;
1118 1137
1119 if (bio_sync(bio)) 1138 if (bio_sync(bio))
1120 req->cmd_flags |= REQ_RW_SYNC; 1139 req->cmd_flags |= REQ_RW_SYNC;
@@ -1124,9 +1143,8 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1124 req->cmd_flags |= REQ_NOIDLE; 1143 req->cmd_flags |= REQ_NOIDLE;
1125 1144
1126 req->errors = 0; 1145 req->errors = 0;
1127 req->hard_sector = req->sector = bio->bi_sector; 1146 req->__sector = bio->bi_sector;
1128 req->ioprio = bio_prio(bio); 1147 req->ioprio = bio_prio(bio);
1129 req->start_time = jiffies;
1130 blk_rq_bio_prep(req->q, req, bio); 1148 blk_rq_bio_prep(req->q, req, bio);
1131} 1149}
1132 1150
@@ -1142,14 +1160,13 @@ static inline bool queue_should_plug(struct request_queue *q)
1142static int __make_request(struct request_queue *q, struct bio *bio) 1160static int __make_request(struct request_queue *q, struct bio *bio)
1143{ 1161{
1144 struct request *req; 1162 struct request *req;
1145 int el_ret, nr_sectors; 1163 int el_ret;
1164 unsigned int bytes = bio->bi_size;
1146 const unsigned short prio = bio_prio(bio); 1165 const unsigned short prio = bio_prio(bio);
1147 const int sync = bio_sync(bio); 1166 const int sync = bio_sync(bio);
1148 const int unplug = bio_unplug(bio); 1167 const int unplug = bio_unplug(bio);
1149 int rw_flags; 1168 int rw_flags;
1150 1169
1151 nr_sectors = bio_sectors(bio);
1152
1153 /* 1170 /*
1154 * low level driver can indicate that it wants pages above a 1171 * low level driver can indicate that it wants pages above a
1155 * certain limit bounced to low memory (ie for highmem, or even 1172 * certain limit bounced to low memory (ie for highmem, or even
@@ -1174,7 +1191,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1174 1191
1175 req->biotail->bi_next = bio; 1192 req->biotail->bi_next = bio;
1176 req->biotail = bio; 1193 req->biotail = bio;
1177 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 1194 req->__data_len += bytes;
1178 req->ioprio = ioprio_best(req->ioprio, prio); 1195 req->ioprio = ioprio_best(req->ioprio, prio);
1179 if (!blk_rq_cpu_valid(req)) 1196 if (!blk_rq_cpu_valid(req))
1180 req->cpu = bio->bi_comp_cpu; 1197 req->cpu = bio->bi_comp_cpu;
@@ -1200,10 +1217,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1200 * not touch req->buffer either... 1217 * not touch req->buffer either...
1201 */ 1218 */
1202 req->buffer = bio_data(bio); 1219 req->buffer = bio_data(bio);
1203 req->current_nr_sectors = bio_cur_sectors(bio); 1220 req->__sector = bio->bi_sector;
1204 req->hard_cur_sectors = req->current_nr_sectors; 1221 req->__data_len += bytes;
1205 req->sector = req->hard_sector = bio->bi_sector;
1206 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
1207 req->ioprio = ioprio_best(req->ioprio, prio); 1222 req->ioprio = ioprio_best(req->ioprio, prio);
1208 if (!blk_rq_cpu_valid(req)) 1223 if (!blk_rq_cpu_valid(req))
1209 req->cpu = bio->bi_comp_cpu; 1224 req->cpu = bio->bi_comp_cpu;
@@ -1414,11 +1429,11 @@ static inline void __generic_make_request(struct bio *bio)
1414 goto end_io; 1429 goto end_io;
1415 } 1430 }
1416 1431
1417 if (unlikely(nr_sectors > q->max_hw_sectors)) { 1432 if (unlikely(nr_sectors > queue_max_hw_sectors(q))) {
1418 printk(KERN_ERR "bio too big device %s (%u > %u)\n", 1433 printk(KERN_ERR "bio too big device %s (%u > %u)\n",
1419 bdevname(bio->bi_bdev, b), 1434 bdevname(bio->bi_bdev, b),
1420 bio_sectors(bio), 1435 bio_sectors(bio),
1421 q->max_hw_sectors); 1436 queue_max_hw_sectors(q));
1422 goto end_io; 1437 goto end_io;
1423 } 1438 }
1424 1439
@@ -1584,8 +1599,8 @@ EXPORT_SYMBOL(submit_bio);
1584 */ 1599 */
1585int blk_rq_check_limits(struct request_queue *q, struct request *rq) 1600int blk_rq_check_limits(struct request_queue *q, struct request *rq)
1586{ 1601{
1587 if (rq->nr_sectors > q->max_sectors || 1602 if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
1588 rq->data_len > q->max_hw_sectors << 9) { 1603 blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
1589 printk(KERN_ERR "%s: over max size limit.\n", __func__); 1604 printk(KERN_ERR "%s: over max size limit.\n", __func__);
1590 return -EIO; 1605 return -EIO;
1591 } 1606 }
@@ -1597,8 +1612,8 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq)
1597 * limitation. 1612 * limitation.
1598 */ 1613 */
1599 blk_recalc_rq_segments(rq); 1614 blk_recalc_rq_segments(rq);
1600 if (rq->nr_phys_segments > q->max_phys_segments || 1615 if (rq->nr_phys_segments > queue_max_phys_segments(q) ||
1601 rq->nr_phys_segments > q->max_hw_segments) { 1616 rq->nr_phys_segments > queue_max_hw_segments(q)) {
1602 printk(KERN_ERR "%s: over max segments limit.\n", __func__); 1617 printk(KERN_ERR "%s: over max segments limit.\n", __func__);
1603 return -EIO; 1618 return -EIO;
1604 } 1619 }
@@ -1642,40 +1657,15 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1642} 1657}
1643EXPORT_SYMBOL_GPL(blk_insert_cloned_request); 1658EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
1644 1659
1645/**
1646 * blkdev_dequeue_request - dequeue request and start timeout timer
1647 * @req: request to dequeue
1648 *
1649 * Dequeue @req and start timeout timer on it. This hands off the
1650 * request to the driver.
1651 *
1652 * Block internal functions which don't want to start timer should
1653 * call elv_dequeue_request().
1654 */
1655void blkdev_dequeue_request(struct request *req)
1656{
1657 elv_dequeue_request(req->q, req);
1658
1659 /*
1660 * We are now handing the request to the hardware, add the
1661 * timeout handler.
1662 */
1663 blk_add_timer(req);
1664}
1665EXPORT_SYMBOL(blkdev_dequeue_request);
1666
1667static void blk_account_io_completion(struct request *req, unsigned int bytes) 1660static void blk_account_io_completion(struct request *req, unsigned int bytes)
1668{ 1661{
1669 if (!blk_do_io_stat(req)) 1662 if (blk_do_io_stat(req)) {
1670 return;
1671
1672 if (blk_fs_request(req)) {
1673 const int rw = rq_data_dir(req); 1663 const int rw = rq_data_dir(req);
1674 struct hd_struct *part; 1664 struct hd_struct *part;
1675 int cpu; 1665 int cpu;
1676 1666
1677 cpu = part_stat_lock(); 1667 cpu = part_stat_lock();
1678 part = disk_map_sector_rcu(req->rq_disk, req->sector); 1668 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
1679 part_stat_add(cpu, part, sectors[rw], bytes >> 9); 1669 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
1680 part_stat_unlock(); 1670 part_stat_unlock();
1681 } 1671 }
@@ -1683,22 +1673,19 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
1683 1673
1684static void blk_account_io_done(struct request *req) 1674static void blk_account_io_done(struct request *req)
1685{ 1675{
1686 if (!blk_do_io_stat(req))
1687 return;
1688
1689 /* 1676 /*
1690 * Account IO completion. bar_rq isn't accounted as a normal 1677 * Account IO completion. bar_rq isn't accounted as a normal
1691 * IO on queueing nor completion. Accounting the containing 1678 * IO on queueing nor completion. Accounting the containing
1692 * request is enough. 1679 * request is enough.
1693 */ 1680 */
1694 if (blk_fs_request(req) && req != &req->q->bar_rq) { 1681 if (blk_do_io_stat(req) && req != &req->q->bar_rq) {
1695 unsigned long duration = jiffies - req->start_time; 1682 unsigned long duration = jiffies - req->start_time;
1696 const int rw = rq_data_dir(req); 1683 const int rw = rq_data_dir(req);
1697 struct hd_struct *part; 1684 struct hd_struct *part;
1698 int cpu; 1685 int cpu;
1699 1686
1700 cpu = part_stat_lock(); 1687 cpu = part_stat_lock();
1701 part = disk_map_sector_rcu(req->rq_disk, req->sector); 1688 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
1702 1689
1703 part_stat_inc(cpu, part, ios[rw]); 1690 part_stat_inc(cpu, part, ios[rw]);
1704 part_stat_add(cpu, part, ticks[rw], duration); 1691 part_stat_add(cpu, part, ticks[rw], duration);
@@ -1710,25 +1697,209 @@ static void blk_account_io_done(struct request *req)
1710} 1697}
1711 1698
1712/** 1699/**
1713 * __end_that_request_first - end I/O on a request 1700 * blk_peek_request - peek at the top of a request queue
1714 * @req: the request being processed 1701 * @q: request queue to peek at
1702 *
1703 * Description:
1704 * Return the request at the top of @q. The returned request
1705 * should be started using blk_start_request() before LLD starts
1706 * processing it.
1707 *
1708 * Return:
1709 * Pointer to the request at the top of @q if available. Null
1710 * otherwise.
1711 *
1712 * Context:
1713 * queue_lock must be held.
1714 */
1715struct request *blk_peek_request(struct request_queue *q)
1716{
1717 struct request *rq;
1718 int ret;
1719
1720 while ((rq = __elv_next_request(q)) != NULL) {
1721 if (!(rq->cmd_flags & REQ_STARTED)) {
1722 /*
1723 * This is the first time the device driver
1724 * sees this request (possibly after
1725 * requeueing). Notify IO scheduler.
1726 */
1727 if (blk_sorted_rq(rq))
1728 elv_activate_rq(q, rq);
1729
1730 /*
1731 * just mark as started even if we don't start
1732 * it, a request that has been delayed should
1733 * not be passed by new incoming requests
1734 */
1735 rq->cmd_flags |= REQ_STARTED;
1736 trace_block_rq_issue(q, rq);
1737 }
1738
1739 if (!q->boundary_rq || q->boundary_rq == rq) {
1740 q->end_sector = rq_end_sector(rq);
1741 q->boundary_rq = NULL;
1742 }
1743
1744 if (rq->cmd_flags & REQ_DONTPREP)
1745 break;
1746
1747 if (q->dma_drain_size && blk_rq_bytes(rq)) {
1748 /*
1749 * make sure space for the drain appears we
1750 * know we can do this because max_hw_segments
1751 * has been adjusted to be one fewer than the
1752 * device can handle
1753 */
1754 rq->nr_phys_segments++;
1755 }
1756
1757 if (!q->prep_rq_fn)
1758 break;
1759
1760 ret = q->prep_rq_fn(q, rq);
1761 if (ret == BLKPREP_OK) {
1762 break;
1763 } else if (ret == BLKPREP_DEFER) {
1764 /*
1765 * the request may have been (partially) prepped.
1766 * we need to keep this request in the front to
1767 * avoid resource deadlock. REQ_STARTED will
1768 * prevent other fs requests from passing this one.
1769 */
1770 if (q->dma_drain_size && blk_rq_bytes(rq) &&
1771 !(rq->cmd_flags & REQ_DONTPREP)) {
1772 /*
1773 * remove the space for the drain we added
1774 * so that we don't add it again
1775 */
1776 --rq->nr_phys_segments;
1777 }
1778
1779 rq = NULL;
1780 break;
1781 } else if (ret == BLKPREP_KILL) {
1782 rq->cmd_flags |= REQ_QUIET;
1783 /*
1784 * Mark this request as started so we don't trigger
1785 * any debug logic in the end I/O path.
1786 */
1787 blk_start_request(rq);
1788 __blk_end_request_all(rq, -EIO);
1789 } else {
1790 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
1791 break;
1792 }
1793 }
1794
1795 return rq;
1796}
1797EXPORT_SYMBOL(blk_peek_request);
1798
1799void blk_dequeue_request(struct request *rq)
1800{
1801 struct request_queue *q = rq->q;
1802
1803 BUG_ON(list_empty(&rq->queuelist));
1804 BUG_ON(ELV_ON_HASH(rq));
1805
1806 list_del_init(&rq->queuelist);
1807
1808 /*
1809 * the time frame between a request being removed from the lists
1810 * and to it is freed is accounted as io that is in progress at
1811 * the driver side.
1812 */
1813 if (blk_account_rq(rq))
1814 q->in_flight[rq_is_sync(rq)]++;
1815}
1816
1817/**
1818 * blk_start_request - start request processing on the driver
1819 * @req: request to dequeue
1820 *
1821 * Description:
1822 * Dequeue @req and start timeout timer on it. This hands off the
1823 * request to the driver.
1824 *
1825 * Block internal functions which don't want to start timer should
1826 * call blk_dequeue_request().
1827 *
1828 * Context:
1829 * queue_lock must be held.
1830 */
1831void blk_start_request(struct request *req)
1832{
1833 blk_dequeue_request(req);
1834
1835 /*
1836 * We are now handing the request to the hardware, initialize
1837 * resid_len to full count and add the timeout handler.
1838 */
1839 req->resid_len = blk_rq_bytes(req);
1840 if (unlikely(blk_bidi_rq(req)))
1841 req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
1842
1843 blk_add_timer(req);
1844}
1845EXPORT_SYMBOL(blk_start_request);
1846
1847/**
1848 * blk_fetch_request - fetch a request from a request queue
1849 * @q: request queue to fetch a request from
1850 *
1851 * Description:
1852 * Return the request at the top of @q. The request is started on
1853 * return and LLD can start processing it immediately.
1854 *
1855 * Return:
1856 * Pointer to the request at the top of @q if available. Null
1857 * otherwise.
1858 *
1859 * Context:
1860 * queue_lock must be held.
1861 */
1862struct request *blk_fetch_request(struct request_queue *q)
1863{
1864 struct request *rq;
1865
1866 rq = blk_peek_request(q);
1867 if (rq)
1868 blk_start_request(rq);
1869 return rq;
1870}
1871EXPORT_SYMBOL(blk_fetch_request);
1872
1873/**
1874 * blk_update_request - Special helper function for request stacking drivers
1875 * @rq: the request being processed
1715 * @error: %0 for success, < %0 for error 1876 * @error: %0 for success, < %0 for error
1716 * @nr_bytes: number of bytes to complete 1877 * @nr_bytes: number of bytes to complete @rq
1717 * 1878 *
1718 * Description: 1879 * Description:
1719 * Ends I/O on a number of bytes attached to @req, and sets it up 1880 * Ends I/O on a number of bytes attached to @rq, but doesn't complete
1720 * for the next range of segments (if any) in the cluster. 1881 * the request structure even if @rq doesn't have leftover.
1882 * If @rq has leftover, sets it up for the next range of segments.
1883 *
1884 * This special helper function is only for request stacking drivers
1885 * (e.g. request-based dm) so that they can handle partial completion.
1886 * Actual device drivers should use blk_end_request instead.
1887 *
1888 * Passing the result of blk_rq_bytes() as @nr_bytes guarantees
1889 * %false return from this function.
1721 * 1890 *
1722 * Return: 1891 * Return:
1723 * %0 - we are done with this request, call end_that_request_last() 1892 * %false - this request doesn't have any more data
1724 * %1 - still buffers pending for this request 1893 * %true - this request has more data
1725 **/ 1894 **/
1726static int __end_that_request_first(struct request *req, int error, 1895bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
1727 int nr_bytes)
1728{ 1896{
1729 int total_bytes, bio_nbytes, next_idx = 0; 1897 int total_bytes, bio_nbytes, next_idx = 0;
1730 struct bio *bio; 1898 struct bio *bio;
1731 1899
1900 if (!req->bio)
1901 return false;
1902
1732 trace_block_rq_complete(req->q, req); 1903 trace_block_rq_complete(req->q, req);
1733 1904
1734 /* 1905 /*
@@ -1745,7 +1916,7 @@ static int __end_that_request_first(struct request *req, int error,
1745 if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) { 1916 if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
1746 printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n", 1917 printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",
1747 req->rq_disk ? req->rq_disk->disk_name : "?", 1918 req->rq_disk ? req->rq_disk->disk_name : "?",
1748 (unsigned long long)req->sector); 1919 (unsigned long long)blk_rq_pos(req));
1749 } 1920 }
1750 1921
1751 blk_account_io_completion(req, nr_bytes); 1922 blk_account_io_completion(req, nr_bytes);
@@ -1805,8 +1976,15 @@ static int __end_that_request_first(struct request *req, int error,
1805 /* 1976 /*
1806 * completely done 1977 * completely done
1807 */ 1978 */
1808 if (!req->bio) 1979 if (!req->bio) {
1809 return 0; 1980 /*
1981 * Reset counters so that the request stacking driver
1982 * can find how many bytes remain in the request
1983 * later.
1984 */
1985 req->__data_len = 0;
1986 return false;
1987 }
1810 1988
1811 /* 1989 /*
1812 * if the request wasn't completed, update state 1990 * if the request wasn't completed, update state
@@ -1818,21 +1996,55 @@ static int __end_that_request_first(struct request *req, int error,
1818 bio_iovec(bio)->bv_len -= nr_bytes; 1996 bio_iovec(bio)->bv_len -= nr_bytes;
1819 } 1997 }
1820 1998
1821 blk_recalc_rq_sectors(req, total_bytes >> 9); 1999 req->__data_len -= total_bytes;
2000 req->buffer = bio_data(req->bio);
2001
2002 /* update sector only for requests with clear definition of sector */
2003 if (blk_fs_request(req) || blk_discard_rq(req))
2004 req->__sector += total_bytes >> 9;
2005
2006 /*
2007 * If total number of sectors is less than the first segment
2008 * size, something has gone terribly wrong.
2009 */
2010 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
2011 printk(KERN_ERR "blk: request botched\n");
2012 req->__data_len = blk_rq_cur_bytes(req);
2013 }
2014
2015 /* recalculate the number of segments */
1822 blk_recalc_rq_segments(req); 2016 blk_recalc_rq_segments(req);
1823 return 1; 2017
2018 return true;
2019}
2020EXPORT_SYMBOL_GPL(blk_update_request);
2021
2022static bool blk_update_bidi_request(struct request *rq, int error,
2023 unsigned int nr_bytes,
2024 unsigned int bidi_bytes)
2025{
2026 if (blk_update_request(rq, error, nr_bytes))
2027 return true;
2028
2029 /* Bidi request must be completed as a whole */
2030 if (unlikely(blk_bidi_rq(rq)) &&
2031 blk_update_request(rq->next_rq, error, bidi_bytes))
2032 return true;
2033
2034 add_disk_randomness(rq->rq_disk);
2035
2036 return false;
1824} 2037}
1825 2038
1826/* 2039/*
1827 * queue lock must be held 2040 * queue lock must be held
1828 */ 2041 */
1829static void end_that_request_last(struct request *req, int error) 2042static void blk_finish_request(struct request *req, int error)
1830{ 2043{
1831 if (blk_rq_tagged(req)) 2044 if (blk_rq_tagged(req))
1832 blk_queue_end_tag(req->q, req); 2045 blk_queue_end_tag(req->q, req);
1833 2046
1834 if (blk_queued_rq(req)) 2047 BUG_ON(blk_queued_rq(req));
1835 elv_dequeue_request(req->q, req);
1836 2048
1837 if (unlikely(laptop_mode) && blk_fs_request(req)) 2049 if (unlikely(laptop_mode) && blk_fs_request(req))
1838 laptop_io_completion(); 2050 laptop_io_completion();
@@ -1852,117 +2064,62 @@ static void end_that_request_last(struct request *req, int error)
1852} 2064}
1853 2065
1854/** 2066/**
1855 * blk_rq_bytes - Returns bytes left to complete in the entire request 2067 * blk_end_bidi_request - Complete a bidi request
1856 * @rq: the request being processed 2068 * @rq: the request to complete
1857 **/ 2069 * @error: %0 for success, < %0 for error
1858unsigned int blk_rq_bytes(struct request *rq) 2070 * @nr_bytes: number of bytes to complete @rq
1859{ 2071 * @bidi_bytes: number of bytes to complete @rq->next_rq
1860 if (blk_fs_request(rq))
1861 return rq->hard_nr_sectors << 9;
1862
1863 return rq->data_len;
1864}
1865EXPORT_SYMBOL_GPL(blk_rq_bytes);
1866
1867/**
1868 * blk_rq_cur_bytes - Returns bytes left to complete in the current segment
1869 * @rq: the request being processed
1870 **/
1871unsigned int blk_rq_cur_bytes(struct request *rq)
1872{
1873 if (blk_fs_request(rq))
1874 return rq->current_nr_sectors << 9;
1875
1876 if (rq->bio)
1877 return rq->bio->bi_size;
1878
1879 return rq->data_len;
1880}
1881EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
1882
1883/**
1884 * end_request - end I/O on the current segment of the request
1885 * @req: the request being processed
1886 * @uptodate: error value or %0/%1 uptodate flag
1887 * 2072 *
1888 * Description: 2073 * Description:
1889 * Ends I/O on the current segment of a request. If that is the only 2074 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
1890 * remaining segment, the request is also completed and freed. 2075 * Drivers that supports bidi can safely call this member for any
1891 * 2076 * type of request, bidi or uni. In the later case @bidi_bytes is
1892 * This is a remnant of how older block drivers handled I/O completions. 2077 * just ignored.
1893 * Modern drivers typically end I/O on the full request in one go, unless 2078 *
1894 * they have a residual value to account for. For that case this function 2079 * Return:
1895 * isn't really useful, unless the residual just happens to be the 2080 * %false - we are done with this request
1896 * full current segment. In other words, don't use this function in new 2081 * %true - still buffers pending for this request
1897 * code. Use blk_end_request() or __blk_end_request() to end a request.
1898 **/ 2082 **/
1899void end_request(struct request *req, int uptodate) 2083static bool blk_end_bidi_request(struct request *rq, int error,
1900{
1901 int error = 0;
1902
1903 if (uptodate <= 0)
1904 error = uptodate ? uptodate : -EIO;
1905
1906 __blk_end_request(req, error, req->hard_cur_sectors << 9);
1907}
1908EXPORT_SYMBOL(end_request);
1909
1910static int end_that_request_data(struct request *rq, int error,
1911 unsigned int nr_bytes, unsigned int bidi_bytes) 2084 unsigned int nr_bytes, unsigned int bidi_bytes)
1912{ 2085{
1913 if (rq->bio) { 2086 struct request_queue *q = rq->q;
1914 if (__end_that_request_first(rq, error, nr_bytes)) 2087 unsigned long flags;
1915 return 1;
1916 2088
1917 /* Bidi request must be completed as a whole */ 2089 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
1918 if (blk_bidi_rq(rq) && 2090 return true;
1919 __end_that_request_first(rq->next_rq, error, bidi_bytes))
1920 return 1;
1921 }
1922 2091
1923 return 0; 2092 spin_lock_irqsave(q->queue_lock, flags);
2093 blk_finish_request(rq, error);
2094 spin_unlock_irqrestore(q->queue_lock, flags);
2095
2096 return false;
1924} 2097}
1925 2098
1926/** 2099/**
1927 * blk_end_io - Generic end_io function to complete a request. 2100 * __blk_end_bidi_request - Complete a bidi request with queue lock held
1928 * @rq: the request being processed 2101 * @rq: the request to complete
1929 * @error: %0 for success, < %0 for error 2102 * @error: %0 for success, < %0 for error
1930 * @nr_bytes: number of bytes to complete @rq 2103 * @nr_bytes: number of bytes to complete @rq
1931 * @bidi_bytes: number of bytes to complete @rq->next_rq 2104 * @bidi_bytes: number of bytes to complete @rq->next_rq
1932 * @drv_callback: function called between completion of bios in the request
1933 * and completion of the request.
1934 * If the callback returns non %0, this helper returns without
1935 * completion of the request.
1936 * 2105 *
1937 * Description: 2106 * Description:
1938 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. 2107 * Identical to blk_end_bidi_request() except that queue lock is
1939 * If @rq has leftover, sets it up for the next range of segments. 2108 * assumed to be locked on entry and remains so on return.
1940 * 2109 *
1941 * Return: 2110 * Return:
1942 * %0 - we are done with this request 2111 * %false - we are done with this request
1943 * %1 - this request is not freed yet, it still has pending buffers. 2112 * %true - still buffers pending for this request
1944 **/ 2113 **/
1945static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, 2114static bool __blk_end_bidi_request(struct request *rq, int error,
1946 unsigned int bidi_bytes, 2115 unsigned int nr_bytes, unsigned int bidi_bytes)
1947 int (drv_callback)(struct request *))
1948{ 2116{
1949 struct request_queue *q = rq->q; 2117 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
1950 unsigned long flags = 0UL; 2118 return true;
1951
1952 if (end_that_request_data(rq, error, nr_bytes, bidi_bytes))
1953 return 1;
1954
1955 /* Special feature for tricky drivers */
1956 if (drv_callback && drv_callback(rq))
1957 return 1;
1958
1959 add_disk_randomness(rq->rq_disk);
1960 2119
1961 spin_lock_irqsave(q->queue_lock, flags); 2120 blk_finish_request(rq, error);
1962 end_that_request_last(rq, error);
1963 spin_unlock_irqrestore(q->queue_lock, flags);
1964 2121
1965 return 0; 2122 return false;
1966} 2123}
1967 2124
1968/** 2125/**
@@ -1976,124 +2133,112 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
1976 * If @rq has leftover, sets it up for the next range of segments. 2133 * If @rq has leftover, sets it up for the next range of segments.
1977 * 2134 *
1978 * Return: 2135 * Return:
1979 * %0 - we are done with this request 2136 * %false - we are done with this request
1980 * %1 - still buffers pending for this request 2137 * %true - still buffers pending for this request
1981 **/ 2138 **/
1982int blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 2139bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
1983{ 2140{
1984 return blk_end_io(rq, error, nr_bytes, 0, NULL); 2141 return blk_end_bidi_request(rq, error, nr_bytes, 0);
1985} 2142}
1986EXPORT_SYMBOL_GPL(blk_end_request); 2143EXPORT_SYMBOL_GPL(blk_end_request);
1987 2144
1988/** 2145/**
1989 * __blk_end_request - Helper function for drivers to complete the request. 2146 * blk_end_request_all - Helper function for drives to finish the request.
1990 * @rq: the request being processed 2147 * @rq: the request to finish
1991 * @error: %0 for success, < %0 for error 2148 * @err: %0 for success, < %0 for error
1992 * @nr_bytes: number of bytes to complete
1993 * 2149 *
1994 * Description: 2150 * Description:
1995 * Must be called with queue lock held unlike blk_end_request(). 2151 * Completely finish @rq.
1996 * 2152 */
1997 * Return: 2153void blk_end_request_all(struct request *rq, int error)
1998 * %0 - we are done with this request
1999 * %1 - still buffers pending for this request
2000 **/
2001int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2002{ 2154{
2003 if (rq->bio && __end_that_request_first(rq, error, nr_bytes)) 2155 bool pending;
2004 return 1; 2156 unsigned int bidi_bytes = 0;
2005 2157
2006 add_disk_randomness(rq->rq_disk); 2158 if (unlikely(blk_bidi_rq(rq)))
2159 bidi_bytes = blk_rq_bytes(rq->next_rq);
2007 2160
2008 end_that_request_last(rq, error); 2161 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2162 BUG_ON(pending);
2163}
2164EXPORT_SYMBOL_GPL(blk_end_request_all);
2009 2165
2010 return 0; 2166/**
2167 * blk_end_request_cur - Helper function to finish the current request chunk.
2168 * @rq: the request to finish the current chunk for
2169 * @err: %0 for success, < %0 for error
2170 *
2171 * Description:
2172 * Complete the current consecutively mapped chunk from @rq.
2173 *
2174 * Return:
2175 * %false - we are done with this request
2176 * %true - still buffers pending for this request
2177 */
2178bool blk_end_request_cur(struct request *rq, int error)
2179{
2180 return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2011} 2181}
2012EXPORT_SYMBOL_GPL(__blk_end_request); 2182EXPORT_SYMBOL_GPL(blk_end_request_cur);
2013 2183
2014/** 2184/**
2015 * blk_end_bidi_request - Helper function for drivers to complete bidi request. 2185 * __blk_end_request - Helper function for drivers to complete the request.
2016 * @rq: the bidi request being processed 2186 * @rq: the request being processed
2017 * @error: %0 for success, < %0 for error 2187 * @error: %0 for success, < %0 for error
2018 * @nr_bytes: number of bytes to complete @rq 2188 * @nr_bytes: number of bytes to complete
2019 * @bidi_bytes: number of bytes to complete @rq->next_rq
2020 * 2189 *
2021 * Description: 2190 * Description:
2022 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. 2191 * Must be called with queue lock held unlike blk_end_request().
2023 * 2192 *
2024 * Return: 2193 * Return:
2025 * %0 - we are done with this request 2194 * %false - we are done with this request
2026 * %1 - still buffers pending for this request 2195 * %true - still buffers pending for this request
2027 **/ 2196 **/
2028int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, 2197bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2029 unsigned int bidi_bytes)
2030{ 2198{
2031 return blk_end_io(rq, error, nr_bytes, bidi_bytes, NULL); 2199 return __blk_end_bidi_request(rq, error, nr_bytes, 0);
2032} 2200}
2033EXPORT_SYMBOL_GPL(blk_end_bidi_request); 2201EXPORT_SYMBOL_GPL(__blk_end_request);
2034 2202
2035/** 2203/**
2036 * blk_update_request - Special helper function for request stacking drivers 2204 * __blk_end_request_all - Helper function for drives to finish the request.
2037 * @rq: the request being processed 2205 * @rq: the request to finish
2038 * @error: %0 for success, < %0 for error 2206 * @err: %0 for success, < %0 for error
2039 * @nr_bytes: number of bytes to complete @rq
2040 * 2207 *
2041 * Description: 2208 * Description:
2042 * Ends I/O on a number of bytes attached to @rq, but doesn't complete 2209 * Completely finish @rq. Must be called with queue lock held.
2043 * the request structure even if @rq doesn't have leftover.
2044 * If @rq has leftover, sets it up for the next range of segments.
2045 *
2046 * This special helper function is only for request stacking drivers
2047 * (e.g. request-based dm) so that they can handle partial completion.
2048 * Actual device drivers should use blk_end_request instead.
2049 */ 2210 */
2050void blk_update_request(struct request *rq, int error, unsigned int nr_bytes) 2211void __blk_end_request_all(struct request *rq, int error)
2051{ 2212{
2052 if (!end_that_request_data(rq, error, nr_bytes, 0)) { 2213 bool pending;
2053 /* 2214 unsigned int bidi_bytes = 0;
2054 * These members are not updated in end_that_request_data() 2215
2055 * when all bios are completed. 2216 if (unlikely(blk_bidi_rq(rq)))
2056 * Update them so that the request stacking driver can find 2217 bidi_bytes = blk_rq_bytes(rq->next_rq);
2057 * how many bytes remain in the request later. 2218
2058 */ 2219 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2059 rq->nr_sectors = rq->hard_nr_sectors = 0; 2220 BUG_ON(pending);
2060 rq->current_nr_sectors = rq->hard_cur_sectors = 0;
2061 }
2062} 2221}
2063EXPORT_SYMBOL_GPL(blk_update_request); 2222EXPORT_SYMBOL_GPL(__blk_end_request_all);
2064 2223
2065/** 2224/**
2066 * blk_end_request_callback - Special helper function for tricky drivers 2225 * __blk_end_request_cur - Helper function to finish the current request chunk.
2067 * @rq: the request being processed 2226 * @rq: the request to finish the current chunk for
2068 * @error: %0 for success, < %0 for error 2227 * @err: %0 for success, < %0 for error
2069 * @nr_bytes: number of bytes to complete
2070 * @drv_callback: function called between completion of bios in the request
2071 * and completion of the request.
2072 * If the callback returns non %0, this helper returns without
2073 * completion of the request.
2074 * 2228 *
2075 * Description: 2229 * Description:
2076 * Ends I/O on a number of bytes attached to @rq. 2230 * Complete the current consecutively mapped chunk from @rq. Must
2077 * If @rq has leftover, sets it up for the next range of segments. 2231 * be called with queue lock held.
2078 *
2079 * This special helper function is used only for existing tricky drivers.
2080 * (e.g. cdrom_newpc_intr() of ide-cd)
2081 * This interface will be removed when such drivers are rewritten.
2082 * Don't use this interface in other places anymore.
2083 * 2232 *
2084 * Return: 2233 * Return:
2085 * %0 - we are done with this request 2234 * %false - we are done with this request
2086 * %1 - this request is not freed yet. 2235 * %true - still buffers pending for this request
2087 * this request still has pending buffers or 2236 */
2088 * the driver doesn't want to finish this request yet. 2237bool __blk_end_request_cur(struct request *rq, int error)
2089 **/
2090int blk_end_request_callback(struct request *rq, int error,
2091 unsigned int nr_bytes,
2092 int (drv_callback)(struct request *))
2093{ 2238{
2094 return blk_end_io(rq, error, nr_bytes, 0, drv_callback); 2239 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2095} 2240}
2096EXPORT_SYMBOL_GPL(blk_end_request_callback); 2241EXPORT_SYMBOL_GPL(__blk_end_request_cur);
2097 2242
2098void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 2243void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2099 struct bio *bio) 2244 struct bio *bio)
@@ -2106,11 +2251,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2106 rq->nr_phys_segments = bio_phys_segments(q, bio); 2251 rq->nr_phys_segments = bio_phys_segments(q, bio);
2107 rq->buffer = bio_data(bio); 2252 rq->buffer = bio_data(bio);
2108 } 2253 }
2109 rq->current_nr_sectors = bio_cur_sectors(bio); 2254 rq->__data_len = bio->bi_size;
2110 rq->hard_cur_sectors = rq->current_nr_sectors;
2111 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
2112 rq->data_len = bio->bi_size;
2113
2114 rq->bio = rq->biotail = bio; 2255 rq->bio = rq->biotail = bio;
2115 2256
2116 if (bio->bi_bdev) 2257 if (bio->bi_bdev)
@@ -2145,6 +2286,106 @@ int blk_lld_busy(struct request_queue *q)
2145} 2286}
2146EXPORT_SYMBOL_GPL(blk_lld_busy); 2287EXPORT_SYMBOL_GPL(blk_lld_busy);
2147 2288
2289/**
2290 * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
2291 * @rq: the clone request to be cleaned up
2292 *
2293 * Description:
2294 * Free all bios in @rq for a cloned request.
2295 */
2296void blk_rq_unprep_clone(struct request *rq)
2297{
2298 struct bio *bio;
2299
2300 while ((bio = rq->bio) != NULL) {
2301 rq->bio = bio->bi_next;
2302
2303 bio_put(bio);
2304 }
2305}
2306EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
2307
2308/*
2309 * Copy attributes of the original request to the clone request.
2310 * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.
2311 */
2312static void __blk_rq_prep_clone(struct request *dst, struct request *src)
2313{
2314 dst->cpu = src->cpu;
2315 dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);
2316 dst->cmd_type = src->cmd_type;
2317 dst->__sector = blk_rq_pos(src);
2318 dst->__data_len = blk_rq_bytes(src);
2319 dst->nr_phys_segments = src->nr_phys_segments;
2320 dst->ioprio = src->ioprio;
2321 dst->extra_len = src->extra_len;
2322}
2323
2324/**
2325 * blk_rq_prep_clone - Helper function to setup clone request
2326 * @rq: the request to be setup
2327 * @rq_src: original request to be cloned
2328 * @bs: bio_set that bios for clone are allocated from
2329 * @gfp_mask: memory allocation mask for bio
2330 * @bio_ctr: setup function to be called for each clone bio.
2331 * Returns %0 for success, non %0 for failure.
2332 * @data: private data to be passed to @bio_ctr
2333 *
2334 * Description:
2335 * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
2336 * The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)
2337 * are not copied, and copying such parts is the caller's responsibility.
2338 * Also, pages which the original bios are pointing to are not copied
2339 * and the cloned bios just point same pages.
2340 * So cloned bios must be completed before original bios, which means
2341 * the caller must complete @rq before @rq_src.
2342 */
2343int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2344 struct bio_set *bs, gfp_t gfp_mask,
2345 int (*bio_ctr)(struct bio *, struct bio *, void *),
2346 void *data)
2347{
2348 struct bio *bio, *bio_src;
2349
2350 if (!bs)
2351 bs = fs_bio_set;
2352
2353 blk_rq_init(NULL, rq);
2354
2355 __rq_for_each_bio(bio_src, rq_src) {
2356 bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
2357 if (!bio)
2358 goto free_and_out;
2359
2360 __bio_clone(bio, bio_src);
2361
2362 if (bio_integrity(bio_src) &&
2363 bio_integrity_clone(bio, bio_src, gfp_mask))
2364 goto free_and_out;
2365
2366 if (bio_ctr && bio_ctr(bio, bio_src, data))
2367 goto free_and_out;
2368
2369 if (rq->bio) {
2370 rq->biotail->bi_next = bio;
2371 rq->biotail = bio;
2372 } else
2373 rq->bio = rq->biotail = bio;
2374 }
2375
2376 __blk_rq_prep_clone(rq, rq_src);
2377
2378 return 0;
2379
2380free_and_out:
2381 if (bio)
2382 bio_free(bio, bs);
2383 blk_rq_unprep_clone(rq);
2384
2385 return -ENOMEM;
2386}
2387EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
2388
2148int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) 2389int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
2149{ 2390{
2150 return queue_work(kblockd_workqueue, work); 2391 return queue_work(kblockd_workqueue, work);
@@ -2153,6 +2394,9 @@ EXPORT_SYMBOL(kblockd_schedule_work);
2153 2394
2154int __init blk_dev_init(void) 2395int __init blk_dev_init(void)
2155{ 2396{
2397 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
2398 sizeof(((struct request *)0)->cmd_flags));
2399
2156 kblockd_workqueue = create_workqueue("kblockd"); 2400 kblockd_workqueue = create_workqueue("kblockd");
2157 if (!kblockd_workqueue) 2401 if (!kblockd_workqueue)
2158 panic("Failed to create kblockd\n"); 2402 panic("Failed to create kblockd\n");
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 6af716d1e54e..49557e91f0da 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -51,7 +51,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
51 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 51 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
52 52
53 rq->rq_disk = bd_disk; 53 rq->rq_disk = bd_disk;
54 rq->cmd_flags |= REQ_NOMERGE;
55 rq->end_io = done; 54 rq->end_io = done;
56 WARN_ON(irqs_disabled()); 55 WARN_ON(irqs_disabled());
57 spin_lock_irq(q->queue_lock); 56 spin_lock_irq(q->queue_lock);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 91fa8e06b6a5..73e28d355688 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -340,7 +340,7 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
340 kobject_uevent(&bi->kobj, KOBJ_ADD); 340 kobject_uevent(&bi->kobj, KOBJ_ADD);
341 341
342 bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE; 342 bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE;
343 bi->sector_size = disk->queue->hardsect_size; 343 bi->sector_size = queue_logical_block_size(disk->queue);
344 disk->integrity = bi; 344 disk->integrity = bi;
345 } else 345 } else
346 bi = disk->integrity; 346 bi = disk->integrity;
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 012f065ac8e2..d4ed6000147d 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -35,9 +35,9 @@ int put_io_context(struct io_context *ioc)
35 if (ioc == NULL) 35 if (ioc == NULL)
36 return 1; 36 return 1;
37 37
38 BUG_ON(atomic_read(&ioc->refcount) == 0); 38 BUG_ON(atomic_long_read(&ioc->refcount) == 0);
39 39
40 if (atomic_dec_and_test(&ioc->refcount)) { 40 if (atomic_long_dec_and_test(&ioc->refcount)) {
41 rcu_read_lock(); 41 rcu_read_lock();
42 if (ioc->aic && ioc->aic->dtor) 42 if (ioc->aic && ioc->aic->dtor)
43 ioc->aic->dtor(ioc->aic); 43 ioc->aic->dtor(ioc->aic);
@@ -90,7 +90,7 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
90 90
91 ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); 91 ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
92 if (ret) { 92 if (ret) {
93 atomic_set(&ret->refcount, 1); 93 atomic_long_set(&ret->refcount, 1);
94 atomic_set(&ret->nr_tasks, 1); 94 atomic_set(&ret->nr_tasks, 1);
95 spin_lock_init(&ret->lock); 95 spin_lock_init(&ret->lock);
96 ret->ioprio_changed = 0; 96 ret->ioprio_changed = 0;
@@ -151,7 +151,7 @@ struct io_context *get_io_context(gfp_t gfp_flags, int node)
151 ret = current_io_context(gfp_flags, node); 151 ret = current_io_context(gfp_flags, node);
152 if (unlikely(!ret)) 152 if (unlikely(!ret))
153 break; 153 break;
154 } while (!atomic_inc_not_zero(&ret->refcount)); 154 } while (!atomic_long_inc_not_zero(&ret->refcount));
155 155
156 return ret; 156 return ret;
157} 157}
@@ -163,8 +163,8 @@ void copy_io_context(struct io_context **pdst, struct io_context **psrc)
163 struct io_context *dst = *pdst; 163 struct io_context *dst = *pdst;
164 164
165 if (src) { 165 if (src) {
166 BUG_ON(atomic_read(&src->refcount) == 0); 166 BUG_ON(atomic_long_read(&src->refcount) == 0);
167 atomic_inc(&src->refcount); 167 atomic_long_inc(&src->refcount);
168 put_io_context(dst); 168 put_io_context(dst);
169 *pdst = src; 169 *pdst = src;
170 } 170 }
diff --git a/block/blk-map.c b/block/blk-map.c
index f103729b462f..9083cf0180cc 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -20,11 +20,10 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
20 rq->biotail->bi_next = bio; 20 rq->biotail->bi_next = bio;
21 rq->biotail = bio; 21 rq->biotail = bio;
22 22
23 rq->data_len += bio->bi_size; 23 rq->__data_len += bio->bi_size;
24 } 24 }
25 return 0; 25 return 0;
26} 26}
27EXPORT_SYMBOL(blk_rq_append_bio);
28 27
29static int __blk_rq_unmap_user(struct bio *bio) 28static int __blk_rq_unmap_user(struct bio *bio)
30{ 29{
@@ -116,7 +115,7 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
116 struct bio *bio = NULL; 115 struct bio *bio = NULL;
117 int ret; 116 int ret;
118 117
119 if (len > (q->max_hw_sectors << 9)) 118 if (len > (queue_max_hw_sectors(q) << 9))
120 return -EINVAL; 119 return -EINVAL;
121 if (!len) 120 if (!len)
122 return -EINVAL; 121 return -EINVAL;
@@ -156,7 +155,7 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
156 if (!bio_flagged(bio, BIO_USER_MAPPED)) 155 if (!bio_flagged(bio, BIO_USER_MAPPED))
157 rq->cmd_flags |= REQ_COPY_USER; 156 rq->cmd_flags |= REQ_COPY_USER;
158 157
159 rq->buffer = rq->data = NULL; 158 rq->buffer = NULL;
160 return 0; 159 return 0;
161unmap_rq: 160unmap_rq:
162 blk_rq_unmap_user(bio); 161 blk_rq_unmap_user(bio);
@@ -235,7 +234,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
235 blk_queue_bounce(q, &bio); 234 blk_queue_bounce(q, &bio);
236 bio_get(bio); 235 bio_get(bio);
237 blk_rq_bio_prep(q, rq, bio); 236 blk_rq_bio_prep(q, rq, bio);
238 rq->buffer = rq->data = NULL; 237 rq->buffer = NULL;
239 return 0; 238 return 0;
240} 239}
241EXPORT_SYMBOL(blk_rq_map_user_iov); 240EXPORT_SYMBOL(blk_rq_map_user_iov);
@@ -282,7 +281,8 @@ EXPORT_SYMBOL(blk_rq_unmap_user);
282 * 281 *
283 * Description: 282 * Description:
284 * Data will be mapped directly if possible. Otherwise a bounce 283 * Data will be mapped directly if possible. Otherwise a bounce
285 * buffer is used. 284 * buffer is used. Can be called multple times to append multple
285 * buffers.
286 */ 286 */
287int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, 287int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
288 unsigned int len, gfp_t gfp_mask) 288 unsigned int len, gfp_t gfp_mask)
@@ -290,8 +290,9 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
290 int reading = rq_data_dir(rq) == READ; 290 int reading = rq_data_dir(rq) == READ;
291 int do_copy = 0; 291 int do_copy = 0;
292 struct bio *bio; 292 struct bio *bio;
293 int ret;
293 294
294 if (len > (q->max_hw_sectors << 9)) 295 if (len > (queue_max_hw_sectors(q) << 9))
295 return -EINVAL; 296 return -EINVAL;
296 if (!len || !kbuf) 297 if (!len || !kbuf)
297 return -EINVAL; 298 return -EINVAL;
@@ -311,9 +312,15 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
311 if (do_copy) 312 if (do_copy)
312 rq->cmd_flags |= REQ_COPY_USER; 313 rq->cmd_flags |= REQ_COPY_USER;
313 314
314 blk_rq_bio_prep(q, rq, bio); 315 ret = blk_rq_append_bio(q, rq, bio);
316 if (unlikely(ret)) {
317 /* request is too big */
318 bio_put(bio);
319 return ret;
320 }
321
315 blk_queue_bounce(q, &rq->bio); 322 blk_queue_bounce(q, &rq->bio);
316 rq->buffer = rq->data = NULL; 323 rq->buffer = NULL;
317 return 0; 324 return 0;
318} 325}
319EXPORT_SYMBOL(blk_rq_map_kern); 326EXPORT_SYMBOL(blk_rq_map_kern);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 23d2a6fe34a3..39ce64432ba6 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -9,35 +9,6 @@
9 9
10#include "blk.h" 10#include "blk.h"
11 11
12void blk_recalc_rq_sectors(struct request *rq, int nsect)
13{
14 if (blk_fs_request(rq) || blk_discard_rq(rq)) {
15 rq->hard_sector += nsect;
16 rq->hard_nr_sectors -= nsect;
17
18 /*
19 * Move the I/O submission pointers ahead if required.
20 */
21 if ((rq->nr_sectors >= rq->hard_nr_sectors) &&
22 (rq->sector <= rq->hard_sector)) {
23 rq->sector = rq->hard_sector;
24 rq->nr_sectors = rq->hard_nr_sectors;
25 rq->hard_cur_sectors = bio_cur_sectors(rq->bio);
26 rq->current_nr_sectors = rq->hard_cur_sectors;
27 rq->buffer = bio_data(rq->bio);
28 }
29
30 /*
31 * if total number of sectors is less than the first segment
32 * size, something has gone terribly wrong
33 */
34 if (rq->nr_sectors < rq->current_nr_sectors) {
35 printk(KERN_ERR "blk: request botched\n");
36 rq->nr_sectors = rq->current_nr_sectors;
37 }
38 }
39}
40
41static unsigned int __blk_recalc_rq_segments(struct request_queue *q, 12static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
42 struct bio *bio) 13 struct bio *bio)
43{ 14{
@@ -61,11 +32,12 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
61 * never considered part of another segment, since that 32 * never considered part of another segment, since that
62 * might change with the bounce page. 33 * might change with the bounce page.
63 */ 34 */
64 high = page_to_pfn(bv->bv_page) > q->bounce_pfn; 35 high = page_to_pfn(bv->bv_page) > queue_bounce_pfn(q);
65 if (high || highprv) 36 if (high || highprv)
66 goto new_segment; 37 goto new_segment;
67 if (cluster) { 38 if (cluster) {
68 if (seg_size + bv->bv_len > q->max_segment_size) 39 if (seg_size + bv->bv_len
40 > queue_max_segment_size(q))
69 goto new_segment; 41 goto new_segment;
70 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) 42 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
71 goto new_segment; 43 goto new_segment;
@@ -120,7 +92,7 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
120 return 0; 92 return 0;
121 93
122 if (bio->bi_seg_back_size + nxt->bi_seg_front_size > 94 if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
123 q->max_segment_size) 95 queue_max_segment_size(q))
124 return 0; 96 return 0;
125 97
126 if (!bio_has_data(bio)) 98 if (!bio_has_data(bio))
@@ -163,7 +135,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
163 int nbytes = bvec->bv_len; 135 int nbytes = bvec->bv_len;
164 136
165 if (bvprv && cluster) { 137 if (bvprv && cluster) {
166 if (sg->length + nbytes > q->max_segment_size) 138 if (sg->length + nbytes > queue_max_segment_size(q))
167 goto new_segment; 139 goto new_segment;
168 140
169 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) 141 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
@@ -199,8 +171,9 @@ new_segment:
199 171
200 172
201 if (unlikely(rq->cmd_flags & REQ_COPY_USER) && 173 if (unlikely(rq->cmd_flags & REQ_COPY_USER) &&
202 (rq->data_len & q->dma_pad_mask)) { 174 (blk_rq_bytes(rq) & q->dma_pad_mask)) {
203 unsigned int pad_len = (q->dma_pad_mask & ~rq->data_len) + 1; 175 unsigned int pad_len =
176 (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
204 177
205 sg->length += pad_len; 178 sg->length += pad_len;
206 rq->extra_len += pad_len; 179 rq->extra_len += pad_len;
@@ -233,8 +206,8 @@ static inline int ll_new_hw_segment(struct request_queue *q,
233{ 206{
234 int nr_phys_segs = bio_phys_segments(q, bio); 207 int nr_phys_segs = bio_phys_segments(q, bio);
235 208
236 if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments 209 if (req->nr_phys_segments + nr_phys_segs > queue_max_hw_segments(q) ||
237 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { 210 req->nr_phys_segments + nr_phys_segs > queue_max_phys_segments(q)) {
238 req->cmd_flags |= REQ_NOMERGE; 211 req->cmd_flags |= REQ_NOMERGE;
239 if (req == q->last_merge) 212 if (req == q->last_merge)
240 q->last_merge = NULL; 213 q->last_merge = NULL;
@@ -255,11 +228,11 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
255 unsigned short max_sectors; 228 unsigned short max_sectors;
256 229
257 if (unlikely(blk_pc_request(req))) 230 if (unlikely(blk_pc_request(req)))
258 max_sectors = q->max_hw_sectors; 231 max_sectors = queue_max_hw_sectors(q);
259 else 232 else
260 max_sectors = q->max_sectors; 233 max_sectors = queue_max_sectors(q);
261 234
262 if (req->nr_sectors + bio_sectors(bio) > max_sectors) { 235 if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
263 req->cmd_flags |= REQ_NOMERGE; 236 req->cmd_flags |= REQ_NOMERGE;
264 if (req == q->last_merge) 237 if (req == q->last_merge)
265 q->last_merge = NULL; 238 q->last_merge = NULL;
@@ -279,12 +252,12 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
279 unsigned short max_sectors; 252 unsigned short max_sectors;
280 253
281 if (unlikely(blk_pc_request(req))) 254 if (unlikely(blk_pc_request(req)))
282 max_sectors = q->max_hw_sectors; 255 max_sectors = queue_max_hw_sectors(q);
283 else 256 else
284 max_sectors = q->max_sectors; 257 max_sectors = queue_max_sectors(q);
285 258
286 259
287 if (req->nr_sectors + bio_sectors(bio) > max_sectors) { 260 if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
288 req->cmd_flags |= REQ_NOMERGE; 261 req->cmd_flags |= REQ_NOMERGE;
289 if (req == q->last_merge) 262 if (req == q->last_merge)
290 q->last_merge = NULL; 263 q->last_merge = NULL;
@@ -315,7 +288,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
315 /* 288 /*
316 * Will it become too large? 289 * Will it become too large?
317 */ 290 */
318 if ((req->nr_sectors + next->nr_sectors) > q->max_sectors) 291 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > queue_max_sectors(q))
319 return 0; 292 return 0;
320 293
321 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 294 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
@@ -327,10 +300,10 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
327 total_phys_segments--; 300 total_phys_segments--;
328 } 301 }
329 302
330 if (total_phys_segments > q->max_phys_segments) 303 if (total_phys_segments > queue_max_phys_segments(q))
331 return 0; 304 return 0;
332 305
333 if (total_phys_segments > q->max_hw_segments) 306 if (total_phys_segments > queue_max_hw_segments(q))
334 return 0; 307 return 0;
335 308
336 /* Merge is OK... */ 309 /* Merge is OK... */
@@ -345,7 +318,7 @@ static void blk_account_io_merge(struct request *req)
345 int cpu; 318 int cpu;
346 319
347 cpu = part_stat_lock(); 320 cpu = part_stat_lock();
348 part = disk_map_sector_rcu(req->rq_disk, req->sector); 321 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
349 322
350 part_round_stats(cpu, part); 323 part_round_stats(cpu, part);
351 part_dec_in_flight(part); 324 part_dec_in_flight(part);
@@ -366,7 +339,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
366 /* 339 /*
367 * not contiguous 340 * not contiguous
368 */ 341 */
369 if (req->sector + req->nr_sectors != next->sector) 342 if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
370 return 0; 343 return 0;
371 344
372 if (rq_data_dir(req) != rq_data_dir(next) 345 if (rq_data_dir(req) != rq_data_dir(next)
@@ -398,7 +371,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
398 req->biotail->bi_next = next->bio; 371 req->biotail->bi_next = next->bio;
399 req->biotail = next->biotail; 372 req->biotail = next->biotail;
400 373
401 req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; 374 req->__data_len += blk_rq_bytes(next);
402 375
403 elv_merge_requests(q, req, next); 376 elv_merge_requests(q, req, next);
404 377
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 57af728d94bb..1c4df9bf6813 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -134,7 +134,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
134 q->backing_dev_info.state = 0; 134 q->backing_dev_info.state = 0;
135 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; 135 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
136 blk_queue_max_sectors(q, SAFE_MAX_SECTORS); 136 blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
137 blk_queue_hardsect_size(q, 512); 137 blk_queue_logical_block_size(q, 512);
138 blk_queue_dma_alignment(q, 511); 138 blk_queue_dma_alignment(q, 511);
139 blk_queue_congestion_threshold(q); 139 blk_queue_congestion_threshold(q);
140 q->nr_batching = BLK_BATCH_REQ; 140 q->nr_batching = BLK_BATCH_REQ;
@@ -179,16 +179,16 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
179 */ 179 */
180 if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) 180 if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
181 dma = 1; 181 dma = 1;
182 q->bounce_pfn = max_low_pfn; 182 q->limits.bounce_pfn = max_low_pfn;
183#else 183#else
184 if (b_pfn < blk_max_low_pfn) 184 if (b_pfn < blk_max_low_pfn)
185 dma = 1; 185 dma = 1;
186 q->bounce_pfn = b_pfn; 186 q->limits.bounce_pfn = b_pfn;
187#endif 187#endif
188 if (dma) { 188 if (dma) {
189 init_emergency_isa_pool(); 189 init_emergency_isa_pool();
190 q->bounce_gfp = GFP_NOIO | GFP_DMA; 190 q->bounce_gfp = GFP_NOIO | GFP_DMA;
191 q->bounce_pfn = b_pfn; 191 q->limits.bounce_pfn = b_pfn;
192 } 192 }
193} 193}
194EXPORT_SYMBOL(blk_queue_bounce_limit); 194EXPORT_SYMBOL(blk_queue_bounce_limit);
@@ -211,14 +211,23 @@ void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors)
211 } 211 }
212 212
213 if (BLK_DEF_MAX_SECTORS > max_sectors) 213 if (BLK_DEF_MAX_SECTORS > max_sectors)
214 q->max_hw_sectors = q->max_sectors = max_sectors; 214 q->limits.max_hw_sectors = q->limits.max_sectors = max_sectors;
215 else { 215 else {
216 q->max_sectors = BLK_DEF_MAX_SECTORS; 216 q->limits.max_sectors = BLK_DEF_MAX_SECTORS;
217 q->max_hw_sectors = max_sectors; 217 q->limits.max_hw_sectors = max_sectors;
218 } 218 }
219} 219}
220EXPORT_SYMBOL(blk_queue_max_sectors); 220EXPORT_SYMBOL(blk_queue_max_sectors);
221 221
222void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
223{
224 if (BLK_DEF_MAX_SECTORS > max_sectors)
225 q->limits.max_hw_sectors = BLK_DEF_MAX_SECTORS;
226 else
227 q->limits.max_hw_sectors = max_sectors;
228}
229EXPORT_SYMBOL(blk_queue_max_hw_sectors);
230
222/** 231/**
223 * blk_queue_max_phys_segments - set max phys segments for a request for this queue 232 * blk_queue_max_phys_segments - set max phys segments for a request for this queue
224 * @q: the request queue for the device 233 * @q: the request queue for the device
@@ -238,7 +247,7 @@ void blk_queue_max_phys_segments(struct request_queue *q,
238 __func__, max_segments); 247 __func__, max_segments);
239 } 248 }
240 249
241 q->max_phys_segments = max_segments; 250 q->limits.max_phys_segments = max_segments;
242} 251}
243EXPORT_SYMBOL(blk_queue_max_phys_segments); 252EXPORT_SYMBOL(blk_queue_max_phys_segments);
244 253
@@ -262,7 +271,7 @@ void blk_queue_max_hw_segments(struct request_queue *q,
262 __func__, max_segments); 271 __func__, max_segments);
263 } 272 }
264 273
265 q->max_hw_segments = max_segments; 274 q->limits.max_hw_segments = max_segments;
266} 275}
267EXPORT_SYMBOL(blk_queue_max_hw_segments); 276EXPORT_SYMBOL(blk_queue_max_hw_segments);
268 277
@@ -283,26 +292,110 @@ void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
283 __func__, max_size); 292 __func__, max_size);
284 } 293 }
285 294
286 q->max_segment_size = max_size; 295 q->limits.max_segment_size = max_size;
287} 296}
288EXPORT_SYMBOL(blk_queue_max_segment_size); 297EXPORT_SYMBOL(blk_queue_max_segment_size);
289 298
290/** 299/**
291 * blk_queue_hardsect_size - set hardware sector size for the queue 300 * blk_queue_logical_block_size - set logical block size for the queue
292 * @q: the request queue for the device 301 * @q: the request queue for the device
293 * @size: the hardware sector size, in bytes 302 * @size: the logical block size, in bytes
294 * 303 *
295 * Description: 304 * Description:
296 * This should typically be set to the lowest possible sector size 305 * This should be set to the lowest possible block size that the
297 * that the hardware can operate on (possible without reverting to 306 * storage device can address. The default of 512 covers most
298 * even internal read-modify-write operations). Usually the default 307 * hardware.
299 * of 512 covers most hardware.
300 **/ 308 **/
301void blk_queue_hardsect_size(struct request_queue *q, unsigned short size) 309void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)
310{
311 q->limits.logical_block_size = size;
312
313 if (q->limits.physical_block_size < size)
314 q->limits.physical_block_size = size;
315
316 if (q->limits.io_min < q->limits.physical_block_size)
317 q->limits.io_min = q->limits.physical_block_size;
318}
319EXPORT_SYMBOL(blk_queue_logical_block_size);
320
321/**
322 * blk_queue_physical_block_size - set physical block size for the queue
323 * @q: the request queue for the device
324 * @size: the physical block size, in bytes
325 *
326 * Description:
327 * This should be set to the lowest possible sector size that the
328 * hardware can operate on without reverting to read-modify-write
329 * operations.
330 */
331void blk_queue_physical_block_size(struct request_queue *q, unsigned short size)
332{
333 q->limits.physical_block_size = size;
334
335 if (q->limits.physical_block_size < q->limits.logical_block_size)
336 q->limits.physical_block_size = q->limits.logical_block_size;
337
338 if (q->limits.io_min < q->limits.physical_block_size)
339 q->limits.io_min = q->limits.physical_block_size;
340}
341EXPORT_SYMBOL(blk_queue_physical_block_size);
342
343/**
344 * blk_queue_alignment_offset - set physical block alignment offset
345 * @q: the request queue for the device
346 * @alignment: alignment offset in bytes
347 *
348 * Description:
349 * Some devices are naturally misaligned to compensate for things like
350 * the legacy DOS partition table 63-sector offset. Low-level drivers
351 * should call this function for devices whose first sector is not
352 * naturally aligned.
353 */
354void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
302{ 355{
303 q->hardsect_size = size; 356 q->limits.alignment_offset =
357 offset & (q->limits.physical_block_size - 1);
358 q->limits.misaligned = 0;
304} 359}
305EXPORT_SYMBOL(blk_queue_hardsect_size); 360EXPORT_SYMBOL(blk_queue_alignment_offset);
361
362/**
363 * blk_queue_io_min - set minimum request size for the queue
364 * @q: the request queue for the device
365 * @io_min: smallest I/O size in bytes
366 *
367 * Description:
368 * Some devices have an internal block size bigger than the reported
369 * hardware sector size. This function can be used to signal the
370 * smallest I/O the device can perform without incurring a performance
371 * penalty.
372 */
373void blk_queue_io_min(struct request_queue *q, unsigned int min)
374{
375 q->limits.io_min = min;
376
377 if (q->limits.io_min < q->limits.logical_block_size)
378 q->limits.io_min = q->limits.logical_block_size;
379
380 if (q->limits.io_min < q->limits.physical_block_size)
381 q->limits.io_min = q->limits.physical_block_size;
382}
383EXPORT_SYMBOL(blk_queue_io_min);
384
385/**
386 * blk_queue_io_opt - set optimal request size for the queue
387 * @q: the request queue for the device
388 * @io_opt: optimal request size in bytes
389 *
390 * Description:
391 * Drivers can call this function to set the preferred I/O request
392 * size for devices that report such a value.
393 */
394void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
395{
396 q->limits.io_opt = opt;
397}
398EXPORT_SYMBOL(blk_queue_io_opt);
306 399
307/* 400/*
308 * Returns the minimum that is _not_ zero, unless both are zero. 401 * Returns the minimum that is _not_ zero, unless both are zero.
@@ -317,14 +410,27 @@ EXPORT_SYMBOL(blk_queue_hardsect_size);
317void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) 410void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
318{ 411{
319 /* zero is "infinity" */ 412 /* zero is "infinity" */
320 t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); 413 t->limits.max_sectors = min_not_zero(queue_max_sectors(t),
321 t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); 414 queue_max_sectors(b));
322 t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask); 415
416 t->limits.max_hw_sectors = min_not_zero(queue_max_hw_sectors(t),
417 queue_max_hw_sectors(b));
418
419 t->limits.seg_boundary_mask = min_not_zero(queue_segment_boundary(t),
420 queue_segment_boundary(b));
421
422 t->limits.max_phys_segments = min_not_zero(queue_max_phys_segments(t),
423 queue_max_phys_segments(b));
424
425 t->limits.max_hw_segments = min_not_zero(queue_max_hw_segments(t),
426 queue_max_hw_segments(b));
427
428 t->limits.max_segment_size = min_not_zero(queue_max_segment_size(t),
429 queue_max_segment_size(b));
430
431 t->limits.logical_block_size = max(queue_logical_block_size(t),
432 queue_logical_block_size(b));
323 433
324 t->max_phys_segments = min_not_zero(t->max_phys_segments, b->max_phys_segments);
325 t->max_hw_segments = min_not_zero(t->max_hw_segments, b->max_hw_segments);
326 t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size);
327 t->hardsect_size = max(t->hardsect_size, b->hardsect_size);
328 if (!t->queue_lock) 434 if (!t->queue_lock)
329 WARN_ON_ONCE(1); 435 WARN_ON_ONCE(1);
330 else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { 436 else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
@@ -337,6 +443,109 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
337EXPORT_SYMBOL(blk_queue_stack_limits); 443EXPORT_SYMBOL(blk_queue_stack_limits);
338 444
339/** 445/**
446 * blk_stack_limits - adjust queue_limits for stacked devices
447 * @t: the stacking driver limits (top)
448 * @b: the underlying queue limits (bottom)
449 * @offset: offset to beginning of data within component device
450 *
451 * Description:
452 * Merges two queue_limit structs. Returns 0 if alignment didn't
453 * change. Returns -1 if adding the bottom device caused
454 * misalignment.
455 */
456int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
457 sector_t offset)
458{
459 t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
460 t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
461 t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
462
463 t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
464 b->seg_boundary_mask);
465
466 t->max_phys_segments = min_not_zero(t->max_phys_segments,
467 b->max_phys_segments);
468
469 t->max_hw_segments = min_not_zero(t->max_hw_segments,
470 b->max_hw_segments);
471
472 t->max_segment_size = min_not_zero(t->max_segment_size,
473 b->max_segment_size);
474
475 t->logical_block_size = max(t->logical_block_size,
476 b->logical_block_size);
477
478 t->physical_block_size = max(t->physical_block_size,
479 b->physical_block_size);
480
481 t->io_min = max(t->io_min, b->io_min);
482 t->no_cluster |= b->no_cluster;
483
484 /* Bottom device offset aligned? */
485 if (offset &&
486 (offset & (b->physical_block_size - 1)) != b->alignment_offset) {
487 t->misaligned = 1;
488 return -1;
489 }
490
491 /* If top has no alignment offset, inherit from bottom */
492 if (!t->alignment_offset)
493 t->alignment_offset =
494 b->alignment_offset & (b->physical_block_size - 1);
495
496 /* Top device aligned on logical block boundary? */
497 if (t->alignment_offset & (t->logical_block_size - 1)) {
498 t->misaligned = 1;
499 return -1;
500 }
501
502 return 0;
503}
504EXPORT_SYMBOL(blk_stack_limits);
505
506/**
507 * disk_stack_limits - adjust queue limits for stacked drivers
508 * @disk: MD/DM gendisk (top)
509 * @bdev: the underlying block device (bottom)
510 * @offset: offset to beginning of data within component device
511 *
512 * Description:
513 * Merges the limits for two queues. Returns 0 if alignment
514 * didn't change. Returns -1 if adding the bottom device caused
515 * misalignment.
516 */
517void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
518 sector_t offset)
519{
520 struct request_queue *t = disk->queue;
521 struct request_queue *b = bdev_get_queue(bdev);
522
523 offset += get_start_sect(bdev) << 9;
524
525 if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) {
526 char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];
527
528 disk_name(disk, 0, top);
529 bdevname(bdev, bottom);
530
531 printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
532 top, bottom);
533 }
534
535 if (!t->queue_lock)
536 WARN_ON_ONCE(1);
537 else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
538 unsigned long flags;
539
540 spin_lock_irqsave(t->queue_lock, flags);
541 if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
542 queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
543 spin_unlock_irqrestore(t->queue_lock, flags);
544 }
545}
546EXPORT_SYMBOL(disk_stack_limits);
547
548/**
340 * blk_queue_dma_pad - set pad mask 549 * blk_queue_dma_pad - set pad mask
341 * @q: the request queue for the device 550 * @q: the request queue for the device
342 * @mask: pad mask 551 * @mask: pad mask
@@ -396,11 +605,11 @@ int blk_queue_dma_drain(struct request_queue *q,
396 dma_drain_needed_fn *dma_drain_needed, 605 dma_drain_needed_fn *dma_drain_needed,
397 void *buf, unsigned int size) 606 void *buf, unsigned int size)
398{ 607{
399 if (q->max_hw_segments < 2 || q->max_phys_segments < 2) 608 if (queue_max_hw_segments(q) < 2 || queue_max_phys_segments(q) < 2)
400 return -EINVAL; 609 return -EINVAL;
401 /* make room for appending the drain */ 610 /* make room for appending the drain */
402 --q->max_hw_segments; 611 blk_queue_max_hw_segments(q, queue_max_hw_segments(q) - 1);
403 --q->max_phys_segments; 612 blk_queue_max_phys_segments(q, queue_max_phys_segments(q) - 1);
404 q->dma_drain_needed = dma_drain_needed; 613 q->dma_drain_needed = dma_drain_needed;
405 q->dma_drain_buffer = buf; 614 q->dma_drain_buffer = buf;
406 q->dma_drain_size = size; 615 q->dma_drain_size = size;
@@ -422,7 +631,7 @@ void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask)
422 __func__, mask); 631 __func__, mask);
423 } 632 }
424 633
425 q->seg_boundary_mask = mask; 634 q->limits.seg_boundary_mask = mask;
426} 635}
427EXPORT_SYMBOL(blk_queue_segment_boundary); 636EXPORT_SYMBOL(blk_queue_segment_boundary);
428 637
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 26f9ec28f56c..b1cd04087d6a 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -95,21 +95,36 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
95 95
96static ssize_t queue_max_sectors_show(struct request_queue *q, char *page) 96static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
97{ 97{
98 int max_sectors_kb = q->max_sectors >> 1; 98 int max_sectors_kb = queue_max_sectors(q) >> 1;
99 99
100 return queue_var_show(max_sectors_kb, (page)); 100 return queue_var_show(max_sectors_kb, (page));
101} 101}
102 102
103static ssize_t queue_hw_sector_size_show(struct request_queue *q, char *page) 103static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
104{ 104{
105 return queue_var_show(q->hardsect_size, page); 105 return queue_var_show(queue_logical_block_size(q), page);
106}
107
108static ssize_t queue_physical_block_size_show(struct request_queue *q, char *page)
109{
110 return queue_var_show(queue_physical_block_size(q), page);
111}
112
113static ssize_t queue_io_min_show(struct request_queue *q, char *page)
114{
115 return queue_var_show(queue_io_min(q), page);
116}
117
118static ssize_t queue_io_opt_show(struct request_queue *q, char *page)
119{
120 return queue_var_show(queue_io_opt(q), page);
106} 121}
107 122
108static ssize_t 123static ssize_t
109queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) 124queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
110{ 125{
111 unsigned long max_sectors_kb, 126 unsigned long max_sectors_kb,
112 max_hw_sectors_kb = q->max_hw_sectors >> 1, 127 max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1,
113 page_kb = 1 << (PAGE_CACHE_SHIFT - 10); 128 page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
114 ssize_t ret = queue_var_store(&max_sectors_kb, page, count); 129 ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
115 130
@@ -117,7 +132,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
117 return -EINVAL; 132 return -EINVAL;
118 133
119 spin_lock_irq(q->queue_lock); 134 spin_lock_irq(q->queue_lock);
120 q->max_sectors = max_sectors_kb << 1; 135 blk_queue_max_sectors(q, max_sectors_kb << 1);
121 spin_unlock_irq(q->queue_lock); 136 spin_unlock_irq(q->queue_lock);
122 137
123 return ret; 138 return ret;
@@ -125,7 +140,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
125 140
126static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) 141static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
127{ 142{
128 int max_hw_sectors_kb = q->max_hw_sectors >> 1; 143 int max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1;
129 144
130 return queue_var_show(max_hw_sectors_kb, (page)); 145 return queue_var_show(max_hw_sectors_kb, (page));
131} 146}
@@ -249,7 +264,27 @@ static struct queue_sysfs_entry queue_iosched_entry = {
249 264
250static struct queue_sysfs_entry queue_hw_sector_size_entry = { 265static struct queue_sysfs_entry queue_hw_sector_size_entry = {
251 .attr = {.name = "hw_sector_size", .mode = S_IRUGO }, 266 .attr = {.name = "hw_sector_size", .mode = S_IRUGO },
252 .show = queue_hw_sector_size_show, 267 .show = queue_logical_block_size_show,
268};
269
270static struct queue_sysfs_entry queue_logical_block_size_entry = {
271 .attr = {.name = "logical_block_size", .mode = S_IRUGO },
272 .show = queue_logical_block_size_show,
273};
274
275static struct queue_sysfs_entry queue_physical_block_size_entry = {
276 .attr = {.name = "physical_block_size", .mode = S_IRUGO },
277 .show = queue_physical_block_size_show,
278};
279
280static struct queue_sysfs_entry queue_io_min_entry = {
281 .attr = {.name = "minimum_io_size", .mode = S_IRUGO },
282 .show = queue_io_min_show,
283};
284
285static struct queue_sysfs_entry queue_io_opt_entry = {
286 .attr = {.name = "optimal_io_size", .mode = S_IRUGO },
287 .show = queue_io_opt_show,
253}; 288};
254 289
255static struct queue_sysfs_entry queue_nonrot_entry = { 290static struct queue_sysfs_entry queue_nonrot_entry = {
@@ -283,6 +318,10 @@ static struct attribute *default_attrs[] = {
283 &queue_max_sectors_entry.attr, 318 &queue_max_sectors_entry.attr,
284 &queue_iosched_entry.attr, 319 &queue_iosched_entry.attr,
285 &queue_hw_sector_size_entry.attr, 320 &queue_hw_sector_size_entry.attr,
321 &queue_logical_block_size_entry.attr,
322 &queue_physical_block_size_entry.attr,
323 &queue_io_min_entry.attr,
324 &queue_io_opt_entry.attr,
286 &queue_nonrot_entry.attr, 325 &queue_nonrot_entry.attr,
287 &queue_nomerges_entry.attr, 326 &queue_nomerges_entry.attr,
288 &queue_rq_affinity_entry.attr, 327 &queue_rq_affinity_entry.attr,
@@ -394,16 +433,15 @@ int blk_register_queue(struct gendisk *disk)
394 if (ret) 433 if (ret)
395 return ret; 434 return ret;
396 435
397 if (!q->request_fn) 436 ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue");
398 return 0;
399
400 ret = kobject_add(&q->kobj, kobject_get(&dev->kobj),
401 "%s", "queue");
402 if (ret < 0) 437 if (ret < 0)
403 return ret; 438 return ret;
404 439
405 kobject_uevent(&q->kobj, KOBJ_ADD); 440 kobject_uevent(&q->kobj, KOBJ_ADD);
406 441
442 if (!q->request_fn)
443 return 0;
444
407 ret = elv_register_queue(q); 445 ret = elv_register_queue(q);
408 if (ret) { 446 if (ret) {
409 kobject_uevent(&q->kobj, KOBJ_REMOVE); 447 kobject_uevent(&q->kobj, KOBJ_REMOVE);
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 3c518e3303ae..2e5cfeb59333 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -336,7 +336,7 @@ EXPORT_SYMBOL(blk_queue_end_tag);
336int blk_queue_start_tag(struct request_queue *q, struct request *rq) 336int blk_queue_start_tag(struct request_queue *q, struct request *rq)
337{ 337{
338 struct blk_queue_tag *bqt = q->queue_tags; 338 struct blk_queue_tag *bqt = q->queue_tags;
339 unsigned max_depth, offset; 339 unsigned max_depth;
340 int tag; 340 int tag;
341 341
342 if (unlikely((rq->cmd_flags & REQ_QUEUED))) { 342 if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
@@ -355,13 +355,16 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
355 * to starve sync IO on behalf of flooding async IO. 355 * to starve sync IO on behalf of flooding async IO.
356 */ 356 */
357 max_depth = bqt->max_depth; 357 max_depth = bqt->max_depth;
358 if (rq_is_sync(rq)) 358 if (!rq_is_sync(rq) && max_depth > 1) {
359 offset = 0; 359 max_depth -= 2;
360 else 360 if (!max_depth)
361 offset = max_depth >> 2; 361 max_depth = 1;
362 if (q->in_flight[0] > max_depth)
363 return 1;
364 }
362 365
363 do { 366 do {
364 tag = find_next_zero_bit(bqt->tag_map, max_depth, offset); 367 tag = find_first_zero_bit(bqt->tag_map, max_depth);
365 if (tag >= max_depth) 368 if (tag >= max_depth)
366 return 1; 369 return 1;
367 370
@@ -374,7 +377,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
374 rq->cmd_flags |= REQ_QUEUED; 377 rq->cmd_flags |= REQ_QUEUED;
375 rq->tag = tag; 378 rq->tag = tag;
376 bqt->tag_index[tag] = rq; 379 bqt->tag_index[tag] = rq;
377 blkdev_dequeue_request(rq); 380 blk_start_request(rq);
378 list_add(&rq->queuelist, &q->tag_busy_list); 381 list_add(&rq->queuelist, &q->tag_busy_list);
379 return 0; 382 return 0;
380} 383}
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 1ec0d503cacd..1ba7e0aca878 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -122,10 +122,8 @@ void blk_rq_timed_out_timer(unsigned long data)
122 if (blk_mark_rq_complete(rq)) 122 if (blk_mark_rq_complete(rq))
123 continue; 123 continue;
124 blk_rq_timed_out(rq); 124 blk_rq_timed_out(rq);
125 } else { 125 } else if (!next || time_after(next, rq->deadline))
126 if (!next || time_after(next, rq->deadline)) 126 next = rq->deadline;
127 next = rq->deadline;
128 }
129 } 127 }
130 128
131 /* 129 /*
@@ -176,16 +174,14 @@ void blk_add_timer(struct request *req)
176 BUG_ON(!list_empty(&req->timeout_list)); 174 BUG_ON(!list_empty(&req->timeout_list));
177 BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags)); 175 BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
178 176
179 if (req->timeout) 177 /*
180 req->deadline = jiffies + req->timeout; 178 * Some LLDs, like scsi, peek at the timeout to prevent a
181 else { 179 * command from being retried forever.
182 req->deadline = jiffies + q->rq_timeout; 180 */
183 /* 181 if (!req->timeout)
184 * Some LLDs, like scsi, peek at the timeout to prevent
185 * a command from being retried forever.
186 */
187 req->timeout = q->rq_timeout; 182 req->timeout = q->rq_timeout;
188 } 183
184 req->deadline = jiffies + req->timeout;
189 list_add_tail(&req->timeout_list, &q->timeout_list); 185 list_add_tail(&req->timeout_list, &q->timeout_list);
190 186
191 /* 187 /*
diff --git a/block/blk.h b/block/blk.h
index 79c85f7c9ff5..3fae6add5430 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -13,6 +13,9 @@ extern struct kobj_type blk_queue_ktype;
13void init_request_from_bio(struct request *req, struct bio *bio); 13void init_request_from_bio(struct request *req, struct bio *bio);
14void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 14void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
15 struct bio *bio); 15 struct bio *bio);
16int blk_rq_append_bio(struct request_queue *q, struct request *rq,
17 struct bio *bio);
18void blk_dequeue_request(struct request *rq);
16void __blk_queue_free_tags(struct request_queue *q); 19void __blk_queue_free_tags(struct request_queue *q);
17 20
18void blk_unplug_work(struct work_struct *work); 21void blk_unplug_work(struct work_struct *work);
@@ -43,6 +46,43 @@ static inline void blk_clear_rq_complete(struct request *rq)
43 clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); 46 clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
44} 47}
45 48
49/*
50 * Internal elevator interface
51 */
52#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
53
54static inline struct request *__elv_next_request(struct request_queue *q)
55{
56 struct request *rq;
57
58 while (1) {
59 while (!list_empty(&q->queue_head)) {
60 rq = list_entry_rq(q->queue_head.next);
61 if (blk_do_ordered(q, &rq))
62 return rq;
63 }
64
65 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
66 return NULL;
67 }
68}
69
70static inline void elv_activate_rq(struct request_queue *q, struct request *rq)
71{
72 struct elevator_queue *e = q->elevator;
73
74 if (e->ops->elevator_activate_req_fn)
75 e->ops->elevator_activate_req_fn(q, rq);
76}
77
78static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq)
79{
80 struct elevator_queue *e = q->elevator;
81
82 if (e->ops->elevator_deactivate_req_fn)
83 e->ops->elevator_deactivate_req_fn(q, rq);
84}
85
46#ifdef CONFIG_FAIL_IO_TIMEOUT 86#ifdef CONFIG_FAIL_IO_TIMEOUT
47int blk_should_fake_timeout(struct request_queue *); 87int blk_should_fake_timeout(struct request_queue *);
48ssize_t part_timeout_show(struct device *, struct device_attribute *, char *); 88ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
@@ -64,7 +104,6 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
64int attempt_back_merge(struct request_queue *q, struct request *rq); 104int attempt_back_merge(struct request_queue *q, struct request *rq);
65int attempt_front_merge(struct request_queue *q, struct request *rq); 105int attempt_front_merge(struct request_queue *q, struct request *rq);
66void blk_recalc_rq_segments(struct request *rq); 106void blk_recalc_rq_segments(struct request *rq);
67void blk_recalc_rq_sectors(struct request *rq, int nsect);
68 107
69void blk_queue_congestion_threshold(struct request_queue *q); 108void blk_queue_congestion_threshold(struct request_queue *q);
70 109
@@ -112,9 +151,17 @@ static inline int blk_cpu_to_group(int cpu)
112#endif 151#endif
113} 152}
114 153
154/*
155 * Contribute to IO statistics IFF:
156 *
157 * a) it's attached to a gendisk, and
158 * b) the queue had IO stats enabled when this request was started, and
159 * c) it's a file system request or a discard request
160 */
115static inline int blk_do_io_stat(struct request *rq) 161static inline int blk_do_io_stat(struct request *rq)
116{ 162{
117 return rq->rq_disk && blk_rq_io_stat(rq); 163 return rq->rq_disk && blk_rq_io_stat(rq) &&
164 (blk_fs_request(rq) || blk_discard_rq(rq));
118} 165}
119 166
120#endif 167#endif
diff --git a/block/bsg.c b/block/bsg.c
index dd81be455e00..5358f9ae13c1 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -446,15 +446,15 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
446 } 446 }
447 447
448 if (rq->next_rq) { 448 if (rq->next_rq) {
449 hdr->dout_resid = rq->data_len; 449 hdr->dout_resid = rq->resid_len;
450 hdr->din_resid = rq->next_rq->data_len; 450 hdr->din_resid = rq->next_rq->resid_len;
451 blk_rq_unmap_user(bidi_bio); 451 blk_rq_unmap_user(bidi_bio);
452 rq->next_rq->bio = NULL; 452 rq->next_rq->bio = NULL;
453 blk_put_request(rq->next_rq); 453 blk_put_request(rq->next_rq);
454 } else if (rq_data_dir(rq) == READ) 454 } else if (rq_data_dir(rq) == READ)
455 hdr->din_resid = rq->data_len; 455 hdr->din_resid = rq->resid_len;
456 else 456 else
457 hdr->dout_resid = rq->data_len; 457 hdr->dout_resid = rq->resid_len;
458 458
459 /* 459 /*
460 * If the request generated a negative error number, return it 460 * If the request generated a negative error number, return it
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index a55a9bd75bd1..ef2f72d42434 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -349,8 +349,8 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2)
349 else if (rq_is_meta(rq2) && !rq_is_meta(rq1)) 349 else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
350 return rq2; 350 return rq2;
351 351
352 s1 = rq1->sector; 352 s1 = blk_rq_pos(rq1);
353 s2 = rq2->sector; 353 s2 = blk_rq_pos(rq2);
354 354
355 last = cfqd->last_position; 355 last = cfqd->last_position;
356 356
@@ -579,9 +579,9 @@ cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root,
579 * Sort strictly based on sector. Smallest to the left, 579 * Sort strictly based on sector. Smallest to the left,
580 * largest to the right. 580 * largest to the right.
581 */ 581 */
582 if (sector > cfqq->next_rq->sector) 582 if (sector > blk_rq_pos(cfqq->next_rq))
583 n = &(*p)->rb_right; 583 n = &(*p)->rb_right;
584 else if (sector < cfqq->next_rq->sector) 584 else if (sector < blk_rq_pos(cfqq->next_rq))
585 n = &(*p)->rb_left; 585 n = &(*p)->rb_left;
586 else 586 else
587 break; 587 break;
@@ -611,8 +611,8 @@ static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
611 return; 611 return;
612 612
613 cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio]; 613 cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio];
614 __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, cfqq->next_rq->sector, 614 __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root,
615 &parent, &p); 615 blk_rq_pos(cfqq->next_rq), &parent, &p);
616 if (!__cfqq) { 616 if (!__cfqq) {
617 rb_link_node(&cfqq->p_node, parent, p); 617 rb_link_node(&cfqq->p_node, parent, p);
618 rb_insert_color(&cfqq->p_node, cfqq->p_root); 618 rb_insert_color(&cfqq->p_node, cfqq->p_root);
@@ -760,7 +760,7 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
760 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", 760 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
761 cfqd->rq_in_driver); 761 cfqd->rq_in_driver);
762 762
763 cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors; 763 cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
764} 764}
765 765
766static void cfq_deactivate_request(struct request_queue *q, struct request *rq) 766static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
@@ -949,10 +949,10 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
949static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, 949static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
950 struct request *rq) 950 struct request *rq)
951{ 951{
952 if (rq->sector >= cfqd->last_position) 952 if (blk_rq_pos(rq) >= cfqd->last_position)
953 return rq->sector - cfqd->last_position; 953 return blk_rq_pos(rq) - cfqd->last_position;
954 else 954 else
955 return cfqd->last_position - rq->sector; 955 return cfqd->last_position - blk_rq_pos(rq);
956} 956}
957 957
958#define CIC_SEEK_THR 8 * 1024 958#define CIC_SEEK_THR 8 * 1024
@@ -996,7 +996,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
996 if (cfq_rq_close(cfqd, __cfqq->next_rq)) 996 if (cfq_rq_close(cfqd, __cfqq->next_rq))
997 return __cfqq; 997 return __cfqq;
998 998
999 if (__cfqq->next_rq->sector < sector) 999 if (blk_rq_pos(__cfqq->next_rq) < sector)
1000 node = rb_next(&__cfqq->p_node); 1000 node = rb_next(&__cfqq->p_node);
1001 else 1001 else
1002 node = rb_prev(&__cfqq->p_node); 1002 node = rb_prev(&__cfqq->p_node);
@@ -1282,7 +1282,7 @@ static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1282 if (!cfqd->active_cic) { 1282 if (!cfqd->active_cic) {
1283 struct cfq_io_context *cic = RQ_CIC(rq); 1283 struct cfq_io_context *cic = RQ_CIC(rq);
1284 1284
1285 atomic_inc(&cic->ioc->refcount); 1285 atomic_long_inc(&cic->ioc->refcount);
1286 cfqd->active_cic = cic; 1286 cfqd->active_cic = cic;
1287 } 1287 }
1288} 1288}
@@ -1918,10 +1918,10 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
1918 1918
1919 if (!cic->last_request_pos) 1919 if (!cic->last_request_pos)
1920 sdist = 0; 1920 sdist = 0;
1921 else if (cic->last_request_pos < rq->sector) 1921 else if (cic->last_request_pos < blk_rq_pos(rq))
1922 sdist = rq->sector - cic->last_request_pos; 1922 sdist = blk_rq_pos(rq) - cic->last_request_pos;
1923 else 1923 else
1924 sdist = cic->last_request_pos - rq->sector; 1924 sdist = cic->last_request_pos - blk_rq_pos(rq);
1925 1925
1926 /* 1926 /*
1927 * Don't allow the seek distance to get too large from the 1927 * Don't allow the seek distance to get too large from the
@@ -2071,7 +2071,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2071 cfq_update_io_seektime(cfqd, cic, rq); 2071 cfq_update_io_seektime(cfqd, cic, rq);
2072 cfq_update_idle_window(cfqd, cfqq, cic); 2072 cfq_update_idle_window(cfqd, cfqq, cic);
2073 2073
2074 cic->last_request_pos = rq->sector + rq->nr_sectors; 2074 cic->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
2075 2075
2076 if (cfqq == cfqd->active_queue) { 2076 if (cfqq == cfqd->active_queue) {
2077 /* 2077 /*
@@ -2088,7 +2088,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2088 if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || 2088 if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
2089 cfqd->busy_queues > 1) { 2089 cfqd->busy_queues > 1) {
2090 del_timer(&cfqd->idle_slice_timer); 2090 del_timer(&cfqd->idle_slice_timer);
2091 blk_start_queueing(cfqd->queue); 2091 __blk_run_queue(cfqd->queue);
2092 } 2092 }
2093 cfq_mark_cfqq_must_dispatch(cfqq); 2093 cfq_mark_cfqq_must_dispatch(cfqq);
2094 } 2094 }
@@ -2100,7 +2100,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2100 * this new queue is RT and the current one is BE 2100 * this new queue is RT and the current one is BE
2101 */ 2101 */
2102 cfq_preempt_queue(cfqd, cfqq); 2102 cfq_preempt_queue(cfqd, cfqq);
2103 blk_start_queueing(cfqd->queue); 2103 __blk_run_queue(cfqd->queue);
2104 } 2104 }
2105} 2105}
2106 2106
@@ -2345,7 +2345,7 @@ static void cfq_kick_queue(struct work_struct *work)
2345 struct request_queue *q = cfqd->queue; 2345 struct request_queue *q = cfqd->queue;
2346 2346
2347 spin_lock_irq(q->queue_lock); 2347 spin_lock_irq(q->queue_lock);
2348 blk_start_queueing(q); 2348 __blk_run_queue(cfqd->queue);
2349 spin_unlock_irq(q->queue_lock); 2349 spin_unlock_irq(q->queue_lock);
2350} 2350}
2351 2351
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index f8c218cd08e1..7865a34e0faa 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -763,10 +763,10 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
763 case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ 763 case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */
764 return compat_put_int(arg, block_size(bdev)); 764 return compat_put_int(arg, block_size(bdev));
765 case BLKSSZGET: /* get block device hardware sector size */ 765 case BLKSSZGET: /* get block device hardware sector size */
766 return compat_put_int(arg, bdev_hardsect_size(bdev)); 766 return compat_put_int(arg, bdev_logical_block_size(bdev));
767 case BLKSECTGET: 767 case BLKSECTGET:
768 return compat_put_ushort(arg, 768 return compat_put_ushort(arg,
769 bdev_get_queue(bdev)->max_sectors); 769 queue_max_sectors(bdev_get_queue(bdev)));
770 case BLKRASET: /* compatible, but no compat_ptr (!) */ 770 case BLKRASET: /* compatible, but no compat_ptr (!) */
771 case BLKFRASET: 771 case BLKFRASET:
772 if (!capable(CAP_SYS_ADMIN)) 772 if (!capable(CAP_SYS_ADMIN))
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index c4d991d4adef..b547cbca7b23 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -138,7 +138,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
138 138
139 __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector); 139 __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
140 if (__rq) { 140 if (__rq) {
141 BUG_ON(sector != __rq->sector); 141 BUG_ON(sector != blk_rq_pos(__rq));
142 142
143 if (elv_rq_merge_ok(__rq, bio)) { 143 if (elv_rq_merge_ok(__rq, bio)) {
144 ret = ELEVATOR_FRONT_MERGE; 144 ret = ELEVATOR_FRONT_MERGE;
diff --git a/block/elevator.c b/block/elevator.c
index e220f0c543e3..ca861927ba41 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -51,8 +51,7 @@ static const int elv_hash_shift = 6;
51#define ELV_HASH_FN(sec) \ 51#define ELV_HASH_FN(sec) \
52 (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) 52 (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
53#define ELV_HASH_ENTRIES (1 << elv_hash_shift) 53#define ELV_HASH_ENTRIES (1 << elv_hash_shift)
54#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) 54#define rq_hash_key(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
55#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
56 55
57/* 56/*
58 * Query io scheduler to see if the current process issuing bio may be 57 * Query io scheduler to see if the current process issuing bio may be
@@ -116,9 +115,9 @@ static inline int elv_try_merge(struct request *__rq, struct bio *bio)
116 * we can merge and sequence is ok, check if it's possible 115 * we can merge and sequence is ok, check if it's possible
117 */ 116 */
118 if (elv_rq_merge_ok(__rq, bio)) { 117 if (elv_rq_merge_ok(__rq, bio)) {
119 if (__rq->sector + __rq->nr_sectors == bio->bi_sector) 118 if (blk_rq_pos(__rq) + blk_rq_sectors(__rq) == bio->bi_sector)
120 ret = ELEVATOR_BACK_MERGE; 119 ret = ELEVATOR_BACK_MERGE;
121 else if (__rq->sector - bio_sectors(bio) == bio->bi_sector) 120 else if (blk_rq_pos(__rq) - bio_sectors(bio) == bio->bi_sector)
122 ret = ELEVATOR_FRONT_MERGE; 121 ret = ELEVATOR_FRONT_MERGE;
123 } 122 }
124 123
@@ -306,22 +305,6 @@ void elevator_exit(struct elevator_queue *e)
306} 305}
307EXPORT_SYMBOL(elevator_exit); 306EXPORT_SYMBOL(elevator_exit);
308 307
309static void elv_activate_rq(struct request_queue *q, struct request *rq)
310{
311 struct elevator_queue *e = q->elevator;
312
313 if (e->ops->elevator_activate_req_fn)
314 e->ops->elevator_activate_req_fn(q, rq);
315}
316
317static void elv_deactivate_rq(struct request_queue *q, struct request *rq)
318{
319 struct elevator_queue *e = q->elevator;
320
321 if (e->ops->elevator_deactivate_req_fn)
322 e->ops->elevator_deactivate_req_fn(q, rq);
323}
324
325static inline void __elv_rqhash_del(struct request *rq) 308static inline void __elv_rqhash_del(struct request *rq)
326{ 309{
327 hlist_del_init(&rq->hash); 310 hlist_del_init(&rq->hash);
@@ -383,9 +366,9 @@ struct request *elv_rb_add(struct rb_root *root, struct request *rq)
383 parent = *p; 366 parent = *p;
384 __rq = rb_entry(parent, struct request, rb_node); 367 __rq = rb_entry(parent, struct request, rb_node);
385 368
386 if (rq->sector < __rq->sector) 369 if (blk_rq_pos(rq) < blk_rq_pos(__rq))
387 p = &(*p)->rb_left; 370 p = &(*p)->rb_left;
388 else if (rq->sector > __rq->sector) 371 else if (blk_rq_pos(rq) > blk_rq_pos(__rq))
389 p = &(*p)->rb_right; 372 p = &(*p)->rb_right;
390 else 373 else
391 return __rq; 374 return __rq;
@@ -413,9 +396,9 @@ struct request *elv_rb_find(struct rb_root *root, sector_t sector)
413 while (n) { 396 while (n) {
414 rq = rb_entry(n, struct request, rb_node); 397 rq = rb_entry(n, struct request, rb_node);
415 398
416 if (sector < rq->sector) 399 if (sector < blk_rq_pos(rq))
417 n = n->rb_left; 400 n = n->rb_left;
418 else if (sector > rq->sector) 401 else if (sector > blk_rq_pos(rq))
419 n = n->rb_right; 402 n = n->rb_right;
420 else 403 else
421 return rq; 404 return rq;
@@ -454,14 +437,14 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
454 break; 437 break;
455 if (pos->cmd_flags & stop_flags) 438 if (pos->cmd_flags & stop_flags)
456 break; 439 break;
457 if (rq->sector >= boundary) { 440 if (blk_rq_pos(rq) >= boundary) {
458 if (pos->sector < boundary) 441 if (blk_rq_pos(pos) < boundary)
459 continue; 442 continue;
460 } else { 443 } else {
461 if (pos->sector >= boundary) 444 if (blk_rq_pos(pos) >= boundary)
462 break; 445 break;
463 } 446 }
464 if (rq->sector >= pos->sector) 447 if (blk_rq_pos(rq) >= blk_rq_pos(pos))
465 break; 448 break;
466 } 449 }
467 450
@@ -559,7 +542,7 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
559 * in_flight count again 542 * in_flight count again
560 */ 543 */
561 if (blk_account_rq(rq)) { 544 if (blk_account_rq(rq)) {
562 q->in_flight--; 545 q->in_flight[rq_is_sync(rq)]--;
563 if (blk_sorted_rq(rq)) 546 if (blk_sorted_rq(rq))
564 elv_deactivate_rq(q, rq); 547 elv_deactivate_rq(q, rq);
565 } 548 }
@@ -588,6 +571,9 @@ void elv_drain_elevator(struct request_queue *q)
588 */ 571 */
589void elv_quiesce_start(struct request_queue *q) 572void elv_quiesce_start(struct request_queue *q)
590{ 573{
574 if (!q->elevator)
575 return;
576
591 queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); 577 queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
592 578
593 /* 579 /*
@@ -595,7 +581,7 @@ void elv_quiesce_start(struct request_queue *q)
595 */ 581 */
596 elv_drain_elevator(q); 582 elv_drain_elevator(q);
597 while (q->rq.elvpriv) { 583 while (q->rq.elvpriv) {
598 blk_start_queueing(q); 584 __blk_run_queue(q);
599 spin_unlock_irq(q->queue_lock); 585 spin_unlock_irq(q->queue_lock);
600 msleep(10); 586 msleep(10);
601 spin_lock_irq(q->queue_lock); 587 spin_lock_irq(q->queue_lock);
@@ -639,8 +625,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
639 * with anything. There's no point in delaying queue 625 * with anything. There's no point in delaying queue
640 * processing. 626 * processing.
641 */ 627 */
642 blk_remove_plug(q); 628 __blk_run_queue(q);
643 blk_start_queueing(q);
644 break; 629 break;
645 630
646 case ELEVATOR_INSERT_SORT: 631 case ELEVATOR_INSERT_SORT:
@@ -699,7 +684,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
699 684
700 if (unplug_it && blk_queue_plugged(q)) { 685 if (unplug_it && blk_queue_plugged(q)) {
701 int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC] 686 int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
702 - q->in_flight; 687 - queue_in_flight(q);
703 688
704 if (nrq >= q->unplug_thresh) 689 if (nrq >= q->unplug_thresh)
705 __generic_unplug_device(q); 690 __generic_unplug_device(q);
@@ -755,117 +740,6 @@ void elv_add_request(struct request_queue *q, struct request *rq, int where,
755} 740}
756EXPORT_SYMBOL(elv_add_request); 741EXPORT_SYMBOL(elv_add_request);
757 742
758static inline struct request *__elv_next_request(struct request_queue *q)
759{
760 struct request *rq;
761
762 while (1) {
763 while (!list_empty(&q->queue_head)) {
764 rq = list_entry_rq(q->queue_head.next);
765 if (blk_do_ordered(q, &rq))
766 return rq;
767 }
768
769 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
770 return NULL;
771 }
772}
773
774struct request *elv_next_request(struct request_queue *q)
775{
776 struct request *rq;
777 int ret;
778
779 while ((rq = __elv_next_request(q)) != NULL) {
780 if (!(rq->cmd_flags & REQ_STARTED)) {
781 /*
782 * This is the first time the device driver
783 * sees this request (possibly after
784 * requeueing). Notify IO scheduler.
785 */
786 if (blk_sorted_rq(rq))
787 elv_activate_rq(q, rq);
788
789 /*
790 * just mark as started even if we don't start
791 * it, a request that has been delayed should
792 * not be passed by new incoming requests
793 */
794 rq->cmd_flags |= REQ_STARTED;
795 trace_block_rq_issue(q, rq);
796 }
797
798 if (!q->boundary_rq || q->boundary_rq == rq) {
799 q->end_sector = rq_end_sector(rq);
800 q->boundary_rq = NULL;
801 }
802
803 if (rq->cmd_flags & REQ_DONTPREP)
804 break;
805
806 if (q->dma_drain_size && rq->data_len) {
807 /*
808 * make sure space for the drain appears we
809 * know we can do this because max_hw_segments
810 * has been adjusted to be one fewer than the
811 * device can handle
812 */
813 rq->nr_phys_segments++;
814 }
815
816 if (!q->prep_rq_fn)
817 break;
818
819 ret = q->prep_rq_fn(q, rq);
820 if (ret == BLKPREP_OK) {
821 break;
822 } else if (ret == BLKPREP_DEFER) {
823 /*
824 * the request may have been (partially) prepped.
825 * we need to keep this request in the front to
826 * avoid resource deadlock. REQ_STARTED will
827 * prevent other fs requests from passing this one.
828 */
829 if (q->dma_drain_size && rq->data_len &&
830 !(rq->cmd_flags & REQ_DONTPREP)) {
831 /*
832 * remove the space for the drain we added
833 * so that we don't add it again
834 */
835 --rq->nr_phys_segments;
836 }
837
838 rq = NULL;
839 break;
840 } else if (ret == BLKPREP_KILL) {
841 rq->cmd_flags |= REQ_QUIET;
842 __blk_end_request(rq, -EIO, blk_rq_bytes(rq));
843 } else {
844 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
845 break;
846 }
847 }
848
849 return rq;
850}
851EXPORT_SYMBOL(elv_next_request);
852
853void elv_dequeue_request(struct request_queue *q, struct request *rq)
854{
855 BUG_ON(list_empty(&rq->queuelist));
856 BUG_ON(ELV_ON_HASH(rq));
857
858 list_del_init(&rq->queuelist);
859
860 /*
861 * the time frame between a request being removed from the lists
862 * and to it is freed is accounted as io that is in progress at
863 * the driver side.
864 */
865 if (blk_account_rq(rq))
866 q->in_flight++;
867}
868
869int elv_queue_empty(struct request_queue *q) 743int elv_queue_empty(struct request_queue *q)
870{ 744{
871 struct elevator_queue *e = q->elevator; 745 struct elevator_queue *e = q->elevator;
@@ -935,7 +809,12 @@ void elv_abort_queue(struct request_queue *q)
935 rq = list_entry_rq(q->queue_head.next); 809 rq = list_entry_rq(q->queue_head.next);
936 rq->cmd_flags |= REQ_QUIET; 810 rq->cmd_flags |= REQ_QUIET;
937 trace_block_rq_abort(q, rq); 811 trace_block_rq_abort(q, rq);
938 __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); 812 /*
813 * Mark this request as started so we don't trigger
814 * any debug logic in the end I/O path.
815 */
816 blk_start_request(rq);
817 __blk_end_request_all(rq, -EIO);
939 } 818 }
940} 819}
941EXPORT_SYMBOL(elv_abort_queue); 820EXPORT_SYMBOL(elv_abort_queue);
@@ -948,7 +827,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
948 * request is released from the driver, io must be done 827 * request is released from the driver, io must be done
949 */ 828 */
950 if (blk_account_rq(rq)) { 829 if (blk_account_rq(rq)) {
951 q->in_flight--; 830 q->in_flight[rq_is_sync(rq)]--;
952 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) 831 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
953 e->ops->elevator_completed_req_fn(q, rq); 832 e->ops->elevator_completed_req_fn(q, rq);
954 } 833 }
@@ -963,11 +842,11 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
963 if (!list_empty(&q->queue_head)) 842 if (!list_empty(&q->queue_head))
964 next = list_entry_rq(q->queue_head.next); 843 next = list_entry_rq(q->queue_head.next);
965 844
966 if (!q->in_flight && 845 if (!queue_in_flight(q) &&
967 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && 846 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
968 (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) { 847 (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
969 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); 848 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
970 blk_start_queueing(q); 849 __blk_run_queue(q);
971 } 850 }
972 } 851 }
973} 852}
@@ -1175,6 +1054,9 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
1175 char elevator_name[ELV_NAME_MAX]; 1054 char elevator_name[ELV_NAME_MAX];
1176 struct elevator_type *e; 1055 struct elevator_type *e;
1177 1056
1057 if (!q->elevator)
1058 return count;
1059
1178 strlcpy(elevator_name, name, sizeof(elevator_name)); 1060 strlcpy(elevator_name, name, sizeof(elevator_name));
1179 strstrip(elevator_name); 1061 strstrip(elevator_name);
1180 1062
@@ -1198,10 +1080,15 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
1198ssize_t elv_iosched_show(struct request_queue *q, char *name) 1080ssize_t elv_iosched_show(struct request_queue *q, char *name)
1199{ 1081{
1200 struct elevator_queue *e = q->elevator; 1082 struct elevator_queue *e = q->elevator;
1201 struct elevator_type *elv = e->elevator_type; 1083 struct elevator_type *elv;
1202 struct elevator_type *__e; 1084 struct elevator_type *__e;
1203 int len = 0; 1085 int len = 0;
1204 1086
1087 if (!q->elevator)
1088 return sprintf(name, "none\n");
1089
1090 elv = e->elevator_type;
1091
1205 spin_lock(&elv_list_lock); 1092 spin_lock(&elv_list_lock);
1206 list_for_each_entry(__e, &elv_list, list) { 1093 list_for_each_entry(__e, &elv_list, list) {
1207 if (!strcmp(elv->elevator_name, __e->elevator_name)) 1094 if (!strcmp(elv->elevator_name, __e->elevator_name))
diff --git a/block/genhd.c b/block/genhd.c
index 1a4916e01732..fe7ccc0a618f 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -852,11 +852,21 @@ static ssize_t disk_capability_show(struct device *dev,
852 return sprintf(buf, "%x\n", disk->flags); 852 return sprintf(buf, "%x\n", disk->flags);
853} 853}
854 854
855static ssize_t disk_alignment_offset_show(struct device *dev,
856 struct device_attribute *attr,
857 char *buf)
858{
859 struct gendisk *disk = dev_to_disk(dev);
860
861 return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
862}
863
855static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); 864static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
856static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); 865static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
857static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); 866static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
858static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); 867static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
859static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 868static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
869static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
860static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); 870static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
861static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 871static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
862#ifdef CONFIG_FAIL_MAKE_REQUEST 872#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -875,6 +885,7 @@ static struct attribute *disk_attrs[] = {
875 &dev_attr_removable.attr, 885 &dev_attr_removable.attr,
876 &dev_attr_ro.attr, 886 &dev_attr_ro.attr,
877 &dev_attr_size.attr, 887 &dev_attr_size.attr,
888 &dev_attr_alignment_offset.attr,
878 &dev_attr_capability.attr, 889 &dev_attr_capability.attr,
879 &dev_attr_stat.attr, 890 &dev_attr_stat.attr,
880#ifdef CONFIG_FAIL_MAKE_REQUEST 891#ifdef CONFIG_FAIL_MAKE_REQUEST
diff --git a/block/ioctl.c b/block/ioctl.c
index ad474d4bbcce..500e4c73cc52 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -152,10 +152,10 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
152 bio->bi_private = &wait; 152 bio->bi_private = &wait;
153 bio->bi_sector = start; 153 bio->bi_sector = start;
154 154
155 if (len > q->max_hw_sectors) { 155 if (len > queue_max_hw_sectors(q)) {
156 bio->bi_size = q->max_hw_sectors << 9; 156 bio->bi_size = queue_max_hw_sectors(q) << 9;
157 len -= q->max_hw_sectors; 157 len -= queue_max_hw_sectors(q);
158 start += q->max_hw_sectors; 158 start += queue_max_hw_sectors(q);
159 } else { 159 } else {
160 bio->bi_size = len << 9; 160 bio->bi_size = len << 9;
161 len = 0; 161 len = 0;
@@ -311,9 +311,9 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
311 case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */ 311 case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */
312 return put_int(arg, block_size(bdev)); 312 return put_int(arg, block_size(bdev));
313 case BLKSSZGET: /* get block device hardware sector size */ 313 case BLKSSZGET: /* get block device hardware sector size */
314 return put_int(arg, bdev_hardsect_size(bdev)); 314 return put_int(arg, bdev_logical_block_size(bdev));
315 case BLKSECTGET: 315 case BLKSECTGET:
316 return put_ushort(arg, bdev_get_queue(bdev)->max_sectors); 316 return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
317 case BLKRASET: 317 case BLKRASET:
318 case BLKFRASET: 318 case BLKFRASET:
319 if(!capable(CAP_SYS_ADMIN)) 319 if(!capable(CAP_SYS_ADMIN))
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 82a0ca2f6729..5f8e798ede4e 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -75,7 +75,7 @@ static int sg_set_timeout(struct request_queue *q, int __user *p)
75 75
76static int sg_get_reserved_size(struct request_queue *q, int __user *p) 76static int sg_get_reserved_size(struct request_queue *q, int __user *p)
77{ 77{
78 unsigned val = min(q->sg_reserved_size, q->max_sectors << 9); 78 unsigned val = min(q->sg_reserved_size, queue_max_sectors(q) << 9);
79 79
80 return put_user(val, p); 80 return put_user(val, p);
81} 81}
@@ -89,8 +89,8 @@ static int sg_set_reserved_size(struct request_queue *q, int __user *p)
89 89
90 if (size < 0) 90 if (size < 0)
91 return -EINVAL; 91 return -EINVAL;
92 if (size > (q->max_sectors << 9)) 92 if (size > (queue_max_sectors(q) << 9))
93 size = q->max_sectors << 9; 93 size = queue_max_sectors(q) << 9;
94 94
95 q->sg_reserved_size = size; 95 q->sg_reserved_size = size;
96 return 0; 96 return 0;
@@ -230,7 +230,7 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
230 hdr->info = 0; 230 hdr->info = 0;
231 if (hdr->masked_status || hdr->host_status || hdr->driver_status) 231 if (hdr->masked_status || hdr->host_status || hdr->driver_status)
232 hdr->info |= SG_INFO_CHECK; 232 hdr->info |= SG_INFO_CHECK;
233 hdr->resid = rq->data_len; 233 hdr->resid = rq->resid_len;
234 hdr->sb_len_wr = 0; 234 hdr->sb_len_wr = 0;
235 235
236 if (rq->sense_len && hdr->sbp) { 236 if (rq->sense_len && hdr->sbp) {
@@ -264,7 +264,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
264 if (hdr->cmd_len > BLK_MAX_CDB) 264 if (hdr->cmd_len > BLK_MAX_CDB)
265 return -EINVAL; 265 return -EINVAL;
266 266
267 if (hdr->dxfer_len > (q->max_hw_sectors << 9)) 267 if (hdr->dxfer_len > (queue_max_hw_sectors(q) << 9))
268 return -EIO; 268 return -EIO;
269 269
270 if (hdr->dxfer_len) 270 if (hdr->dxfer_len)
@@ -500,9 +500,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
500 500
501 rq = blk_get_request(q, WRITE, __GFP_WAIT); 501 rq = blk_get_request(q, WRITE, __GFP_WAIT);
502 rq->cmd_type = REQ_TYPE_BLOCK_PC; 502 rq->cmd_type = REQ_TYPE_BLOCK_PC;
503 rq->data = NULL;
504 rq->data_len = 0;
505 rq->extra_len = 0;
506 rq->timeout = BLK_DEFAULT_SG_TIMEOUT; 503 rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
507 rq->cmd[0] = cmd; 504 rq->cmd[0] = cmd;
508 rq->cmd[4] = data; 505 rq->cmd[4] = data;