aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-26 18:03:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-26 18:03:07 -0400
commitd05d7f40791ccbb6e543cc5dd6a6aa08fc71d635 (patch)
treedc0039fe490a41a70de10d58fe8e6136db46463a /block
parent75a442efb1ca613f8d1cc71a32c2c9b0aefae4a5 (diff)
parent17007f3994cdb4643355c73f54f0adad006cf59e (diff)
Merge branch 'for-4.8/core' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe: - the big change is the cleanup from Mike Christie, cleaning up our uses of command types and modified flags. This is what will throw some merge conflicts - regression fix for the above for btrfs, from Vincent - following up to the above, better packing of struct request from Christoph - a 2038 fix for blktrace from Arnd - a few trivial/spelling fixes from Bart Van Assche - a front merge check fix from Damien, which could cause issues on SMR drives - Atari partition fix from Gabriel - convert cfq to highres timers, since jiffies isn't granular enough for some devices these days. From Jan and Jeff - CFQ priority boost fix idle classes, from me - cleanup series from Ming, improving our bio/bvec iteration - a direct issue fix for blk-mq from Omar - fix for plug merging not involving the IO scheduler, like we do for other types of merges. From Tahsin - expose DAX type internally and through sysfs. From Toshi and Yigal * 'for-4.8/core' of git://git.kernel.dk/linux-block: (76 commits) block: Fix front merge check block: do not merge requests without consulting with io scheduler block: Fix spelling in a source code comment block: expose QUEUE_FLAG_DAX in sysfs block: add QUEUE_FLAG_DAX for devices to advertise their DAX support Btrfs: fix comparison in __btrfs_map_block() block: atari: Return early for unsupported sector size Doc: block: Fix a typo in queue-sysfs.txt cfq-iosched: Charge at least 1 jiffie instead of 1 ns cfq-iosched: Fix regression in bonnie++ rewrite performance cfq-iosched: Convert slice_resid from u64 to s64 block: Convert fifo_time from ulong to u64 blktrace: avoid using timespec block/blk-cgroup.c: Declare local symbols static block/bio-integrity.c: Add #include "blk.h" block/partition-generic.c: Remove a set-but-not-used variable block: bio: kill BIO_MAX_SIZE cfq-iosched: temporarily boost queue priority for idle classes block: drbd: avoid to use BIO_MAX_SIZE block: bio: remove BIO_MAX_SECTORS ...
Diffstat (limited to 'block')
-rw-r--r--block/bio-integrity.c1
-rw-r--r--block/bio.c20
-rw-r--r--block/blk-cgroup.c4
-rw-r--r--block/blk-core.c96
-rw-r--r--block/blk-exec.c2
-rw-r--r--block/blk-flush.c23
-rw-r--r--block/blk-lib.c31
-rw-r--r--block/blk-map.c2
-rw-r--r--block/blk-merge.c36
-rw-r--r--block/blk-mq.c42
-rw-r--r--block/blk-sysfs.c11
-rw-r--r--block/cfq-iosched.c432
-rw-r--r--block/deadline-iosched.c7
-rw-r--r--block/elevator.c29
-rw-r--r--block/partition-generic.c3
-rw-r--r--block/partitions/atari.c7
16 files changed, 434 insertions, 312 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 711e4d8de6fa..15d37b1cd500 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -26,6 +26,7 @@
26#include <linux/bio.h> 26#include <linux/bio.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include "blk.h"
29 30
30#define BIP_INLINE_VECS 4 31#define BIP_INLINE_VECS 4
31 32
diff --git a/block/bio.c b/block/bio.c
index 0e4aa42bc30d..848cd351513b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -656,16 +656,15 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
656 bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs); 656 bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
657 if (!bio) 657 if (!bio)
658 return NULL; 658 return NULL;
659
660 bio->bi_bdev = bio_src->bi_bdev; 659 bio->bi_bdev = bio_src->bi_bdev;
661 bio->bi_rw = bio_src->bi_rw; 660 bio->bi_rw = bio_src->bi_rw;
662 bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; 661 bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
663 bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; 662 bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
664 663
665 if (bio->bi_rw & REQ_DISCARD) 664 if (bio_op(bio) == REQ_OP_DISCARD)
666 goto integrity_clone; 665 goto integrity_clone;
667 666
668 if (bio->bi_rw & REQ_WRITE_SAME) { 667 if (bio_op(bio) == REQ_OP_WRITE_SAME) {
669 bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0]; 668 bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
670 goto integrity_clone; 669 goto integrity_clone;
671 } 670 }
@@ -854,21 +853,20 @@ static void submit_bio_wait_endio(struct bio *bio)
854 853
855/** 854/**
856 * submit_bio_wait - submit a bio, and wait until it completes 855 * submit_bio_wait - submit a bio, and wait until it completes
857 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
858 * @bio: The &struct bio which describes the I/O 856 * @bio: The &struct bio which describes the I/O
859 * 857 *
860 * Simple wrapper around submit_bio(). Returns 0 on success, or the error from 858 * Simple wrapper around submit_bio(). Returns 0 on success, or the error from
861 * bio_endio() on failure. 859 * bio_endio() on failure.
862 */ 860 */
863int submit_bio_wait(int rw, struct bio *bio) 861int submit_bio_wait(struct bio *bio)
864{ 862{
865 struct submit_bio_ret ret; 863 struct submit_bio_ret ret;
866 864
867 rw |= REQ_SYNC;
868 init_completion(&ret.event); 865 init_completion(&ret.event);
869 bio->bi_private = &ret; 866 bio->bi_private = &ret;
870 bio->bi_end_io = submit_bio_wait_endio; 867 bio->bi_end_io = submit_bio_wait_endio;
871 submit_bio(rw, bio); 868 bio->bi_rw |= REQ_SYNC;
869 submit_bio(bio);
872 wait_for_completion_io(&ret.event); 870 wait_for_completion_io(&ret.event);
873 871
874 return ret.error; 872 return ret.error;
@@ -1167,7 +1165,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
1167 goto out_bmd; 1165 goto out_bmd;
1168 1166
1169 if (iter->type & WRITE) 1167 if (iter->type & WRITE)
1170 bio->bi_rw |= REQ_WRITE; 1168 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
1171 1169
1172 ret = 0; 1170 ret = 0;
1173 1171
@@ -1337,7 +1335,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
1337 * set data direction, and check if mapped pages need bouncing 1335 * set data direction, and check if mapped pages need bouncing
1338 */ 1336 */
1339 if (iter->type & WRITE) 1337 if (iter->type & WRITE)
1340 bio->bi_rw |= REQ_WRITE; 1338 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
1341 1339
1342 bio_set_flag(bio, BIO_USER_MAPPED); 1340 bio_set_flag(bio, BIO_USER_MAPPED);
1343 1341
@@ -1530,7 +1528,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
1530 bio->bi_private = data; 1528 bio->bi_private = data;
1531 } else { 1529 } else {
1532 bio->bi_end_io = bio_copy_kern_endio; 1530 bio->bi_end_io = bio_copy_kern_endio;
1533 bio->bi_rw |= REQ_WRITE; 1531 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
1534 } 1532 }
1535 1533
1536 return bio; 1534 return bio;
@@ -1785,7 +1783,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
1785 * Discards need a mutable bio_vec to accommodate the payload 1783 * Discards need a mutable bio_vec to accommodate the payload
1786 * required by the DSM TRIM and UNMAP commands. 1784 * required by the DSM TRIM and UNMAP commands.
1787 */ 1785 */
1788 if (bio->bi_rw & REQ_DISCARD) 1786 if (bio_op(bio) == REQ_OP_DISCARD)
1789 split = bio_clone_bioset(bio, gfp, bs); 1787 split = bio_clone_bioset(bio, gfp, bs);
1790 else 1788 else
1791 split = bio_clone_fast(bio, gfp, bs); 1789 split = bio_clone_fast(bio, gfp, bs);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 66e6f1aae02e..dd38e5ced4a3 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -905,7 +905,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
905 return 0; 905 return 0;
906} 906}
907 907
908struct cftype blkcg_files[] = { 908static struct cftype blkcg_files[] = {
909 { 909 {
910 .name = "stat", 910 .name = "stat",
911 .flags = CFTYPE_NOT_ON_ROOT, 911 .flags = CFTYPE_NOT_ON_ROOT,
@@ -914,7 +914,7 @@ struct cftype blkcg_files[] = {
914 { } /* terminate */ 914 { } /* terminate */
915}; 915};
916 916
917struct cftype blkcg_legacy_files[] = { 917static struct cftype blkcg_legacy_files[] = {
918 { 918 {
919 .name = "reset_stats", 919 .name = "reset_stats",
920 .write_u64 = blkcg_reset_stats, 920 .write_u64 = blkcg_reset_stats,
diff --git a/block/blk-core.c b/block/blk-core.c
index 2475b1c72773..3cfd67d006fb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -959,10 +959,10 @@ static void __freed_request(struct request_list *rl, int sync)
959 * A request has just been released. Account for it, update the full and 959 * A request has just been released. Account for it, update the full and
960 * congestion status, wake up any waiters. Called under q->queue_lock. 960 * congestion status, wake up any waiters. Called under q->queue_lock.
961 */ 961 */
962static void freed_request(struct request_list *rl, unsigned int flags) 962static void freed_request(struct request_list *rl, int op, unsigned int flags)
963{ 963{
964 struct request_queue *q = rl->q; 964 struct request_queue *q = rl->q;
965 int sync = rw_is_sync(flags); 965 int sync = rw_is_sync(op, flags);
966 966
967 q->nr_rqs[sync]--; 967 q->nr_rqs[sync]--;
968 rl->count[sync]--; 968 rl->count[sync]--;
@@ -1029,7 +1029,7 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
1029 * Flush requests do not use the elevator so skip initialization. 1029 * Flush requests do not use the elevator so skip initialization.
1030 * This allows a request to share the flush and elevator data. 1030 * This allows a request to share the flush and elevator data.
1031 */ 1031 */
1032 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) 1032 if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA))
1033 return false; 1033 return false;
1034 1034
1035 return true; 1035 return true;
@@ -1054,7 +1054,8 @@ static struct io_context *rq_ioc(struct bio *bio)
1054/** 1054/**
1055 * __get_request - get a free request 1055 * __get_request - get a free request
1056 * @rl: request list to allocate from 1056 * @rl: request list to allocate from
1057 * @rw_flags: RW and SYNC flags 1057 * @op: REQ_OP_READ/REQ_OP_WRITE
1058 * @op_flags: rq_flag_bits
1058 * @bio: bio to allocate request for (can be %NULL) 1059 * @bio: bio to allocate request for (can be %NULL)
1059 * @gfp_mask: allocation mask 1060 * @gfp_mask: allocation mask
1060 * 1061 *
@@ -1065,21 +1066,22 @@ static struct io_context *rq_ioc(struct bio *bio)
1065 * Returns ERR_PTR on failure, with @q->queue_lock held. 1066 * Returns ERR_PTR on failure, with @q->queue_lock held.
1066 * Returns request pointer on success, with @q->queue_lock *not held*. 1067 * Returns request pointer on success, with @q->queue_lock *not held*.
1067 */ 1068 */
1068static struct request *__get_request(struct request_list *rl, int rw_flags, 1069static struct request *__get_request(struct request_list *rl, int op,
1069 struct bio *bio, gfp_t gfp_mask) 1070 int op_flags, struct bio *bio,
1071 gfp_t gfp_mask)
1070{ 1072{
1071 struct request_queue *q = rl->q; 1073 struct request_queue *q = rl->q;
1072 struct request *rq; 1074 struct request *rq;
1073 struct elevator_type *et = q->elevator->type; 1075 struct elevator_type *et = q->elevator->type;
1074 struct io_context *ioc = rq_ioc(bio); 1076 struct io_context *ioc = rq_ioc(bio);
1075 struct io_cq *icq = NULL; 1077 struct io_cq *icq = NULL;
1076 const bool is_sync = rw_is_sync(rw_flags) != 0; 1078 const bool is_sync = rw_is_sync(op, op_flags) != 0;
1077 int may_queue; 1079 int may_queue;
1078 1080
1079 if (unlikely(blk_queue_dying(q))) 1081 if (unlikely(blk_queue_dying(q)))
1080 return ERR_PTR(-ENODEV); 1082 return ERR_PTR(-ENODEV);
1081 1083
1082 may_queue = elv_may_queue(q, rw_flags); 1084 may_queue = elv_may_queue(q, op, op_flags);
1083 if (may_queue == ELV_MQUEUE_NO) 1085 if (may_queue == ELV_MQUEUE_NO)
1084 goto rq_starved; 1086 goto rq_starved;
1085 1087
@@ -1123,7 +1125,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
1123 1125
1124 /* 1126 /*
1125 * Decide whether the new request will be managed by elevator. If 1127 * Decide whether the new request will be managed by elevator. If
1126 * so, mark @rw_flags and increment elvpriv. Non-zero elvpriv will 1128 * so, mark @op_flags and increment elvpriv. Non-zero elvpriv will
1127 * prevent the current elevator from being destroyed until the new 1129 * prevent the current elevator from being destroyed until the new
1128 * request is freed. This guarantees icq's won't be destroyed and 1130 * request is freed. This guarantees icq's won't be destroyed and
1129 * makes creating new ones safe. 1131 * makes creating new ones safe.
@@ -1132,14 +1134,14 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
1132 * it will be created after releasing queue_lock. 1134 * it will be created after releasing queue_lock.
1133 */ 1135 */
1134 if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) { 1136 if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
1135 rw_flags |= REQ_ELVPRIV; 1137 op_flags |= REQ_ELVPRIV;
1136 q->nr_rqs_elvpriv++; 1138 q->nr_rqs_elvpriv++;
1137 if (et->icq_cache && ioc) 1139 if (et->icq_cache && ioc)
1138 icq = ioc_lookup_icq(ioc, q); 1140 icq = ioc_lookup_icq(ioc, q);
1139 } 1141 }
1140 1142
1141 if (blk_queue_io_stat(q)) 1143 if (blk_queue_io_stat(q))
1142 rw_flags |= REQ_IO_STAT; 1144 op_flags |= REQ_IO_STAT;
1143 spin_unlock_irq(q->queue_lock); 1145 spin_unlock_irq(q->queue_lock);
1144 1146
1145 /* allocate and init request */ 1147 /* allocate and init request */
@@ -1149,10 +1151,10 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
1149 1151
1150 blk_rq_init(q, rq); 1152 blk_rq_init(q, rq);
1151 blk_rq_set_rl(rq, rl); 1153 blk_rq_set_rl(rq, rl);
1152 rq->cmd_flags = rw_flags | REQ_ALLOCED; 1154 req_set_op_attrs(rq, op, op_flags | REQ_ALLOCED);
1153 1155
1154 /* init elvpriv */ 1156 /* init elvpriv */
1155 if (rw_flags & REQ_ELVPRIV) { 1157 if (op_flags & REQ_ELVPRIV) {
1156 if (unlikely(et->icq_cache && !icq)) { 1158 if (unlikely(et->icq_cache && !icq)) {
1157 if (ioc) 1159 if (ioc)
1158 icq = ioc_create_icq(ioc, q, gfp_mask); 1160 icq = ioc_create_icq(ioc, q, gfp_mask);
@@ -1178,7 +1180,7 @@ out:
1178 if (ioc_batching(q, ioc)) 1180 if (ioc_batching(q, ioc))
1179 ioc->nr_batch_requests--; 1181 ioc->nr_batch_requests--;
1180 1182
1181 trace_block_getrq(q, bio, rw_flags & 1); 1183 trace_block_getrq(q, bio, op);
1182 return rq; 1184 return rq;
1183 1185
1184fail_elvpriv: 1186fail_elvpriv:
@@ -1208,7 +1210,7 @@ fail_alloc:
1208 * queue, but this is pretty rare. 1210 * queue, but this is pretty rare.
1209 */ 1211 */
1210 spin_lock_irq(q->queue_lock); 1212 spin_lock_irq(q->queue_lock);
1211 freed_request(rl, rw_flags); 1213 freed_request(rl, op, op_flags);
1212 1214
1213 /* 1215 /*
1214 * in the very unlikely event that allocation failed and no 1216 * in the very unlikely event that allocation failed and no
@@ -1226,7 +1228,8 @@ rq_starved:
1226/** 1228/**
1227 * get_request - get a free request 1229 * get_request - get a free request
1228 * @q: request_queue to allocate request from 1230 * @q: request_queue to allocate request from
1229 * @rw_flags: RW and SYNC flags 1231 * @op: REQ_OP_READ/REQ_OP_WRITE
1232 * @op_flags: rq_flag_bits
1230 * @bio: bio to allocate request for (can be %NULL) 1233 * @bio: bio to allocate request for (can be %NULL)
1231 * @gfp_mask: allocation mask 1234 * @gfp_mask: allocation mask
1232 * 1235 *
@@ -1237,17 +1240,18 @@ rq_starved:
1237 * Returns ERR_PTR on failure, with @q->queue_lock held. 1240 * Returns ERR_PTR on failure, with @q->queue_lock held.
1238 * Returns request pointer on success, with @q->queue_lock *not held*. 1241 * Returns request pointer on success, with @q->queue_lock *not held*.
1239 */ 1242 */
1240static struct request *get_request(struct request_queue *q, int rw_flags, 1243static struct request *get_request(struct request_queue *q, int op,
1241 struct bio *bio, gfp_t gfp_mask) 1244 int op_flags, struct bio *bio,
1245 gfp_t gfp_mask)
1242{ 1246{
1243 const bool is_sync = rw_is_sync(rw_flags) != 0; 1247 const bool is_sync = rw_is_sync(op, op_flags) != 0;
1244 DEFINE_WAIT(wait); 1248 DEFINE_WAIT(wait);
1245 struct request_list *rl; 1249 struct request_list *rl;
1246 struct request *rq; 1250 struct request *rq;
1247 1251
1248 rl = blk_get_rl(q, bio); /* transferred to @rq on success */ 1252 rl = blk_get_rl(q, bio); /* transferred to @rq on success */
1249retry: 1253retry:
1250 rq = __get_request(rl, rw_flags, bio, gfp_mask); 1254 rq = __get_request(rl, op, op_flags, bio, gfp_mask);
1251 if (!IS_ERR(rq)) 1255 if (!IS_ERR(rq))
1252 return rq; 1256 return rq;
1253 1257
@@ -1260,7 +1264,7 @@ retry:
1260 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, 1264 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
1261 TASK_UNINTERRUPTIBLE); 1265 TASK_UNINTERRUPTIBLE);
1262 1266
1263 trace_block_sleeprq(q, bio, rw_flags & 1); 1267 trace_block_sleeprq(q, bio, op);
1264 1268
1265 spin_unlock_irq(q->queue_lock); 1269 spin_unlock_irq(q->queue_lock);
1266 io_schedule(); 1270 io_schedule();
@@ -1289,7 +1293,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
1289 create_io_context(gfp_mask, q->node); 1293 create_io_context(gfp_mask, q->node);
1290 1294
1291 spin_lock_irq(q->queue_lock); 1295 spin_lock_irq(q->queue_lock);
1292 rq = get_request(q, rw, NULL, gfp_mask); 1296 rq = get_request(q, rw, 0, NULL, gfp_mask);
1293 if (IS_ERR(rq)) 1297 if (IS_ERR(rq))
1294 spin_unlock_irq(q->queue_lock); 1298 spin_unlock_irq(q->queue_lock);
1295 /* q->queue_lock is unlocked at this point */ 1299 /* q->queue_lock is unlocked at this point */
@@ -1491,13 +1495,14 @@ void __blk_put_request(struct request_queue *q, struct request *req)
1491 */ 1495 */
1492 if (req->cmd_flags & REQ_ALLOCED) { 1496 if (req->cmd_flags & REQ_ALLOCED) {
1493 unsigned int flags = req->cmd_flags; 1497 unsigned int flags = req->cmd_flags;
1498 int op = req_op(req);
1494 struct request_list *rl = blk_rq_rl(req); 1499 struct request_list *rl = blk_rq_rl(req);
1495 1500
1496 BUG_ON(!list_empty(&req->queuelist)); 1501 BUG_ON(!list_empty(&req->queuelist));
1497 BUG_ON(ELV_ON_HASH(req)); 1502 BUG_ON(ELV_ON_HASH(req));
1498 1503
1499 blk_free_request(rl, req); 1504 blk_free_request(rl, req);
1500 freed_request(rl, flags); 1505 freed_request(rl, op, flags);
1501 blk_put_rl(rl); 1506 blk_put_rl(rl);
1502 } 1507 }
1503} 1508}
@@ -1712,7 +1717,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
1712{ 1717{
1713 const bool sync = !!(bio->bi_rw & REQ_SYNC); 1718 const bool sync = !!(bio->bi_rw & REQ_SYNC);
1714 struct blk_plug *plug; 1719 struct blk_plug *plug;
1715 int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; 1720 int el_ret, rw_flags = 0, where = ELEVATOR_INSERT_SORT;
1716 struct request *req; 1721 struct request *req;
1717 unsigned int request_count = 0; 1722 unsigned int request_count = 0;
1718 1723
@@ -1731,7 +1736,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
1731 return BLK_QC_T_NONE; 1736 return BLK_QC_T_NONE;
1732 } 1737 }
1733 1738
1734 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { 1739 if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA)) {
1735 spin_lock_irq(q->queue_lock); 1740 spin_lock_irq(q->queue_lock);
1736 where = ELEVATOR_INSERT_FLUSH; 1741 where = ELEVATOR_INSERT_FLUSH;
1737 goto get_rq; 1742 goto get_rq;
@@ -1772,15 +1777,19 @@ get_rq:
1772 * but we need to set it earlier to expose the sync flag to the 1777 * but we need to set it earlier to expose the sync flag to the
1773 * rq allocator and io schedulers. 1778 * rq allocator and io schedulers.
1774 */ 1779 */
1775 rw_flags = bio_data_dir(bio);
1776 if (sync) 1780 if (sync)
1777 rw_flags |= REQ_SYNC; 1781 rw_flags |= REQ_SYNC;
1778 1782
1779 /* 1783 /*
1784 * Add in META/PRIO flags, if set, before we get to the IO scheduler
1785 */
1786 rw_flags |= (bio->bi_rw & (REQ_META | REQ_PRIO));
1787
1788 /*
1780 * Grab a free request. This is might sleep but can not fail. 1789 * Grab a free request. This is might sleep but can not fail.
1781 * Returns with the queue unlocked. 1790 * Returns with the queue unlocked.
1782 */ 1791 */
1783 req = get_request(q, rw_flags, bio, GFP_NOIO); 1792 req = get_request(q, bio_data_dir(bio), rw_flags, bio, GFP_NOIO);
1784 if (IS_ERR(req)) { 1793 if (IS_ERR(req)) {
1785 bio->bi_error = PTR_ERR(req); 1794 bio->bi_error = PTR_ERR(req);
1786 bio_endio(bio); 1795 bio_endio(bio);
@@ -1849,7 +1858,7 @@ static void handle_bad_sector(struct bio *bio)
1849 char b[BDEVNAME_SIZE]; 1858 char b[BDEVNAME_SIZE];
1850 1859
1851 printk(KERN_INFO "attempt to access beyond end of device\n"); 1860 printk(KERN_INFO "attempt to access beyond end of device\n");
1852 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", 1861 printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
1853 bdevname(bio->bi_bdev, b), 1862 bdevname(bio->bi_bdev, b),
1854 bio->bi_rw, 1863 bio->bi_rw,
1855 (unsigned long long)bio_end_sector(bio), 1864 (unsigned long long)bio_end_sector(bio),
@@ -1964,23 +1973,23 @@ generic_make_request_checks(struct bio *bio)
1964 * drivers without flush support don't have to worry 1973 * drivers without flush support don't have to worry
1965 * about them. 1974 * about them.
1966 */ 1975 */
1967 if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && 1976 if ((bio->bi_rw & (REQ_PREFLUSH | REQ_FUA)) &&
1968 !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { 1977 !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
1969 bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); 1978 bio->bi_rw &= ~(REQ_PREFLUSH | REQ_FUA);
1970 if (!nr_sectors) { 1979 if (!nr_sectors) {
1971 err = 0; 1980 err = 0;
1972 goto end_io; 1981 goto end_io;
1973 } 1982 }
1974 } 1983 }
1975 1984
1976 if ((bio->bi_rw & REQ_DISCARD) && 1985 if ((bio_op(bio) == REQ_OP_DISCARD) &&
1977 (!blk_queue_discard(q) || 1986 (!blk_queue_discard(q) ||
1978 ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) { 1987 ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) {
1979 err = -EOPNOTSUPP; 1988 err = -EOPNOTSUPP;
1980 goto end_io; 1989 goto end_io;
1981 } 1990 }
1982 1991
1983 if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) { 1992 if (bio_op(bio) == REQ_OP_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
1984 err = -EOPNOTSUPP; 1993 err = -EOPNOTSUPP;
1985 goto end_io; 1994 goto end_io;
1986 } 1995 }
@@ -2094,7 +2103,6 @@ EXPORT_SYMBOL(generic_make_request);
2094 2103
2095/** 2104/**
2096 * submit_bio - submit a bio to the block device layer for I/O 2105 * submit_bio - submit a bio to the block device layer for I/O
2097 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
2098 * @bio: The &struct bio which describes the I/O 2106 * @bio: The &struct bio which describes the I/O
2099 * 2107 *
2100 * submit_bio() is very similar in purpose to generic_make_request(), and 2108 * submit_bio() is very similar in purpose to generic_make_request(), and
@@ -2102,10 +2110,8 @@ EXPORT_SYMBOL(generic_make_request);
2102 * interfaces; @bio must be presetup and ready for I/O. 2110 * interfaces; @bio must be presetup and ready for I/O.
2103 * 2111 *
2104 */ 2112 */
2105blk_qc_t submit_bio(int rw, struct bio *bio) 2113blk_qc_t submit_bio(struct bio *bio)
2106{ 2114{
2107 bio->bi_rw |= rw;
2108
2109 /* 2115 /*
2110 * If it's a regular read/write or a barrier with data attached, 2116 * If it's a regular read/write or a barrier with data attached,
2111 * go through the normal accounting stuff before submission. 2117 * go through the normal accounting stuff before submission.
@@ -2113,12 +2119,12 @@ blk_qc_t submit_bio(int rw, struct bio *bio)
2113 if (bio_has_data(bio)) { 2119 if (bio_has_data(bio)) {
2114 unsigned int count; 2120 unsigned int count;
2115 2121
2116 if (unlikely(rw & REQ_WRITE_SAME)) 2122 if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
2117 count = bdev_logical_block_size(bio->bi_bdev) >> 9; 2123 count = bdev_logical_block_size(bio->bi_bdev) >> 9;
2118 else 2124 else
2119 count = bio_sectors(bio); 2125 count = bio_sectors(bio);
2120 2126
2121 if (rw & WRITE) { 2127 if (op_is_write(bio_op(bio))) {
2122 count_vm_events(PGPGOUT, count); 2128 count_vm_events(PGPGOUT, count);
2123 } else { 2129 } else {
2124 task_io_account_read(bio->bi_iter.bi_size); 2130 task_io_account_read(bio->bi_iter.bi_size);
@@ -2129,7 +2135,7 @@ blk_qc_t submit_bio(int rw, struct bio *bio)
2129 char b[BDEVNAME_SIZE]; 2135 char b[BDEVNAME_SIZE];
2130 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n", 2136 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
2131 current->comm, task_pid_nr(current), 2137 current->comm, task_pid_nr(current),
2132 (rw & WRITE) ? "WRITE" : "READ", 2138 op_is_write(bio_op(bio)) ? "WRITE" : "READ",
2133 (unsigned long long)bio->bi_iter.bi_sector, 2139 (unsigned long long)bio->bi_iter.bi_sector,
2134 bdevname(bio->bi_bdev, b), 2140 bdevname(bio->bi_bdev, b),
2135 count); 2141 count);
@@ -2160,7 +2166,7 @@ EXPORT_SYMBOL(submit_bio);
2160static int blk_cloned_rq_check_limits(struct request_queue *q, 2166static int blk_cloned_rq_check_limits(struct request_queue *q,
2161 struct request *rq) 2167 struct request *rq)
2162{ 2168{
2163 if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) { 2169 if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) {
2164 printk(KERN_ERR "%s: over max size limit.\n", __func__); 2170 printk(KERN_ERR "%s: over max size limit.\n", __func__);
2165 return -EIO; 2171 return -EIO;
2166 } 2172 }
@@ -2216,7 +2222,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
2216 */ 2222 */
2217 BUG_ON(blk_queued_rq(rq)); 2223 BUG_ON(blk_queued_rq(rq));
2218 2224
2219 if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA)) 2225 if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
2220 where = ELEVATOR_INSERT_FLUSH; 2226 where = ELEVATOR_INSERT_FLUSH;
2221 2227
2222 add_acct_request(q, rq, where); 2228 add_acct_request(q, rq, where);
@@ -2979,8 +2985,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request_err);
2979void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 2985void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2980 struct bio *bio) 2986 struct bio *bio)
2981{ 2987{
2982 /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ 2988 req_set_op(rq, bio_op(bio));
2983 rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
2984 2989
2985 if (bio_has_data(bio)) 2990 if (bio_has_data(bio))
2986 rq->nr_phys_segments = bio_phys_segments(q, bio); 2991 rq->nr_phys_segments = bio_phys_segments(q, bio);
@@ -3065,7 +3070,8 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
3065static void __blk_rq_prep_clone(struct request *dst, struct request *src) 3070static void __blk_rq_prep_clone(struct request *dst, struct request *src)
3066{ 3071{
3067 dst->cpu = src->cpu; 3072 dst->cpu = src->cpu;
3068 dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; 3073 req_set_op_attrs(dst, req_op(src),
3074 (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE);
3069 dst->cmd_type = src->cmd_type; 3075 dst->cmd_type = src->cmd_type;
3070 dst->__sector = blk_rq_pos(src); 3076 dst->__sector = blk_rq_pos(src);
3071 dst->__data_len = blk_rq_bytes(src); 3077 dst->__data_len = blk_rq_bytes(src);
@@ -3310,7 +3316,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
3310 /* 3316 /*
3311 * rq is already accounted, so use raw insert 3317 * rq is already accounted, so use raw insert
3312 */ 3318 */
3313 if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) 3319 if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
3314 __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); 3320 __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
3315 else 3321 else
3316 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); 3322 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 3fec8a29d0fa..7ea04325d02f 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -62,7 +62,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
62 62
63 /* 63 /*
64 * don't check dying flag for MQ because the request won't 64 * don't check dying flag for MQ because the request won't
65 * be resued after dying flag is set 65 * be reused after dying flag is set
66 */ 66 */
67 if (q->mq_ops) { 67 if (q->mq_ops) {
68 blk_mq_insert_request(rq, at_head, true, false); 68 blk_mq_insert_request(rq, at_head, true, false);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index b1c91d229e5e..d308def812db 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -10,8 +10,8 @@
10 * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request 10 * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request
11 * properties and hardware capability. 11 * properties and hardware capability.
12 * 12 *
13 * If a request doesn't have data, only REQ_FLUSH makes sense, which 13 * If a request doesn't have data, only REQ_PREFLUSH makes sense, which
14 * indicates a simple flush request. If there is data, REQ_FLUSH indicates 14 * indicates a simple flush request. If there is data, REQ_PREFLUSH indicates
15 * that the device cache should be flushed before the data is executed, and 15 * that the device cache should be flushed before the data is executed, and
16 * REQ_FUA means that the data must be on non-volatile media on request 16 * REQ_FUA means that the data must be on non-volatile media on request
17 * completion. 17 * completion.
@@ -20,16 +20,16 @@
20 * difference. The requests are either completed immediately if there's no 20 * difference. The requests are either completed immediately if there's no
21 * data or executed as normal requests otherwise. 21 * data or executed as normal requests otherwise.
22 * 22 *
23 * If the device has writeback cache and supports FUA, REQ_FLUSH is 23 * If the device has writeback cache and supports FUA, REQ_PREFLUSH is
24 * translated to PREFLUSH but REQ_FUA is passed down directly with DATA. 24 * translated to PREFLUSH but REQ_FUA is passed down directly with DATA.
25 * 25 *
26 * If the device has writeback cache and doesn't support FUA, REQ_FLUSH is 26 * If the device has writeback cache and doesn't support FUA, REQ_PREFLUSH
27 * translated to PREFLUSH and REQ_FUA to POSTFLUSH. 27 * is translated to PREFLUSH and REQ_FUA to POSTFLUSH.
28 * 28 *
29 * The actual execution of flush is double buffered. Whenever a request 29 * The actual execution of flush is double buffered. Whenever a request
30 * needs to execute PRE or POSTFLUSH, it queues at 30 * needs to execute PRE or POSTFLUSH, it queues at
31 * fq->flush_queue[fq->flush_pending_idx]. Once certain criteria are met, a 31 * fq->flush_queue[fq->flush_pending_idx]. Once certain criteria are met, a
32 * flush is issued and the pending_idx is toggled. When the flush 32 * REQ_OP_FLUSH is issued and the pending_idx is toggled. When the flush
33 * completes, all the requests which were pending are proceeded to the next 33 * completes, all the requests which were pending are proceeded to the next
34 * step. This allows arbitrary merging of different types of FLUSH/FUA 34 * step. This allows arbitrary merging of different types of FLUSH/FUA
35 * requests. 35 * requests.
@@ -103,7 +103,7 @@ static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq)
103 policy |= REQ_FSEQ_DATA; 103 policy |= REQ_FSEQ_DATA;
104 104
105 if (fflags & (1UL << QUEUE_FLAG_WC)) { 105 if (fflags & (1UL << QUEUE_FLAG_WC)) {
106 if (rq->cmd_flags & REQ_FLUSH) 106 if (rq->cmd_flags & REQ_PREFLUSH)
107 policy |= REQ_FSEQ_PREFLUSH; 107 policy |= REQ_FSEQ_PREFLUSH;
108 if (!(fflags & (1UL << QUEUE_FLAG_FUA)) && 108 if (!(fflags & (1UL << QUEUE_FLAG_FUA)) &&
109 (rq->cmd_flags & REQ_FUA)) 109 (rq->cmd_flags & REQ_FUA))
@@ -330,7 +330,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
330 } 330 }
331 331
332 flush_rq->cmd_type = REQ_TYPE_FS; 332 flush_rq->cmd_type = REQ_TYPE_FS;
333 flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; 333 req_set_op_attrs(flush_rq, REQ_OP_FLUSH, WRITE_FLUSH | REQ_FLUSH_SEQ);
334 flush_rq->rq_disk = first_rq->rq_disk; 334 flush_rq->rq_disk = first_rq->rq_disk;
335 flush_rq->end_io = flush_end_io; 335 flush_rq->end_io = flush_end_io;
336 336
@@ -391,9 +391,9 @@ void blk_insert_flush(struct request *rq)
391 391
392 /* 392 /*
393 * @policy now records what operations need to be done. Adjust 393 * @policy now records what operations need to be done. Adjust
394 * REQ_FLUSH and FUA for the driver. 394 * REQ_PREFLUSH and FUA for the driver.
395 */ 395 */
396 rq->cmd_flags &= ~REQ_FLUSH; 396 rq->cmd_flags &= ~REQ_PREFLUSH;
397 if (!(fflags & (1UL << QUEUE_FLAG_FUA))) 397 if (!(fflags & (1UL << QUEUE_FLAG_FUA)))
398 rq->cmd_flags &= ~REQ_FUA; 398 rq->cmd_flags &= ~REQ_FUA;
399 399
@@ -485,8 +485,9 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
485 485
486 bio = bio_alloc(gfp_mask, 0); 486 bio = bio_alloc(gfp_mask, 0);
487 bio->bi_bdev = bdev; 487 bio->bi_bdev = bdev;
488 bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
488 489
489 ret = submit_bio_wait(WRITE_FLUSH, bio); 490 ret = submit_bio_wait(bio);
490 491
491 /* 492 /*
492 * The driver must store the error location in ->bi_sector, if 493 * The driver must store the error location in ->bi_sector, if
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9e29dc351695..9031d2af0b47 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -9,21 +9,22 @@
9 9
10#include "blk.h" 10#include "blk.h"
11 11
12static struct bio *next_bio(struct bio *bio, int rw, unsigned int nr_pages, 12static struct bio *next_bio(struct bio *bio, unsigned int nr_pages,
13 gfp_t gfp) 13 gfp_t gfp)
14{ 14{
15 struct bio *new = bio_alloc(gfp, nr_pages); 15 struct bio *new = bio_alloc(gfp, nr_pages);
16 16
17 if (bio) { 17 if (bio) {
18 bio_chain(bio, new); 18 bio_chain(bio, new);
19 submit_bio(rw, bio); 19 submit_bio(bio);
20 } 20 }
21 21
22 return new; 22 return new;
23} 23}
24 24
25int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, 25int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
26 sector_t nr_sects, gfp_t gfp_mask, int type, struct bio **biop) 26 sector_t nr_sects, gfp_t gfp_mask, int op_flags,
27 struct bio **biop)
27{ 28{
28 struct request_queue *q = bdev_get_queue(bdev); 29 struct request_queue *q = bdev_get_queue(bdev);
29 struct bio *bio = *biop; 30 struct bio *bio = *biop;
@@ -34,7 +35,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
34 return -ENXIO; 35 return -ENXIO;
35 if (!blk_queue_discard(q)) 36 if (!blk_queue_discard(q))
36 return -EOPNOTSUPP; 37 return -EOPNOTSUPP;
37 if ((type & REQ_SECURE) && !blk_queue_secdiscard(q)) 38 if ((op_flags & REQ_SECURE) && !blk_queue_secdiscard(q))
38 return -EOPNOTSUPP; 39 return -EOPNOTSUPP;
39 40
40 /* Zero-sector (unknown) and one-sector granularities are the same. */ 41 /* Zero-sector (unknown) and one-sector granularities are the same. */
@@ -62,9 +63,10 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
62 req_sects = end_sect - sector; 63 req_sects = end_sect - sector;
63 } 64 }
64 65
65 bio = next_bio(bio, type, 1, gfp_mask); 66 bio = next_bio(bio, 1, gfp_mask);
66 bio->bi_iter.bi_sector = sector; 67 bio->bi_iter.bi_sector = sector;
67 bio->bi_bdev = bdev; 68 bio->bi_bdev = bdev;
69 bio_set_op_attrs(bio, REQ_OP_DISCARD, op_flags);
68 70
69 bio->bi_iter.bi_size = req_sects << 9; 71 bio->bi_iter.bi_size = req_sects << 9;
70 nr_sects -= req_sects; 72 nr_sects -= req_sects;
@@ -98,19 +100,19 @@ EXPORT_SYMBOL(__blkdev_issue_discard);
98int blkdev_issue_discard(struct block_device *bdev, sector_t sector, 100int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
99 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) 101 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
100{ 102{
101 int type = REQ_WRITE | REQ_DISCARD; 103 int op_flags = 0;
102 struct bio *bio = NULL; 104 struct bio *bio = NULL;
103 struct blk_plug plug; 105 struct blk_plug plug;
104 int ret; 106 int ret;
105 107
106 if (flags & BLKDEV_DISCARD_SECURE) 108 if (flags & BLKDEV_DISCARD_SECURE)
107 type |= REQ_SECURE; 109 op_flags |= REQ_SECURE;
108 110
109 blk_start_plug(&plug); 111 blk_start_plug(&plug);
110 ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, type, 112 ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, op_flags,
111 &bio); 113 &bio);
112 if (!ret && bio) { 114 if (!ret && bio) {
113 ret = submit_bio_wait(type, bio); 115 ret = submit_bio_wait(bio);
114 if (ret == -EOPNOTSUPP) 116 if (ret == -EOPNOTSUPP)
115 ret = 0; 117 ret = 0;
116 bio_put(bio); 118 bio_put(bio);
@@ -148,13 +150,14 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
148 max_write_same_sectors = UINT_MAX >> 9; 150 max_write_same_sectors = UINT_MAX >> 9;
149 151
150 while (nr_sects) { 152 while (nr_sects) {
151 bio = next_bio(bio, REQ_WRITE | REQ_WRITE_SAME, 1, gfp_mask); 153 bio = next_bio(bio, 1, gfp_mask);
152 bio->bi_iter.bi_sector = sector; 154 bio->bi_iter.bi_sector = sector;
153 bio->bi_bdev = bdev; 155 bio->bi_bdev = bdev;
154 bio->bi_vcnt = 1; 156 bio->bi_vcnt = 1;
155 bio->bi_io_vec->bv_page = page; 157 bio->bi_io_vec->bv_page = page;
156 bio->bi_io_vec->bv_offset = 0; 158 bio->bi_io_vec->bv_offset = 0;
157 bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); 159 bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
160 bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
158 161
159 if (nr_sects > max_write_same_sectors) { 162 if (nr_sects > max_write_same_sectors) {
160 bio->bi_iter.bi_size = max_write_same_sectors << 9; 163 bio->bi_iter.bi_size = max_write_same_sectors << 9;
@@ -167,7 +170,7 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
167 } 170 }
168 171
169 if (bio) { 172 if (bio) {
170 ret = submit_bio_wait(REQ_WRITE | REQ_WRITE_SAME, bio); 173 ret = submit_bio_wait(bio);
171 bio_put(bio); 174 bio_put(bio);
172 } 175 }
173 return ret != -EOPNOTSUPP ? ret : 0; 176 return ret != -EOPNOTSUPP ? ret : 0;
@@ -193,11 +196,11 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
193 unsigned int sz; 196 unsigned int sz;
194 197
195 while (nr_sects != 0) { 198 while (nr_sects != 0) {
196 bio = next_bio(bio, WRITE, 199 bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES),
197 min(nr_sects, (sector_t)BIO_MAX_PAGES),
198 gfp_mask); 200 gfp_mask);
199 bio->bi_iter.bi_sector = sector; 201 bio->bi_iter.bi_sector = sector;
200 bio->bi_bdev = bdev; 202 bio->bi_bdev = bdev;
203 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
201 204
202 while (nr_sects != 0) { 205 while (nr_sects != 0) {
203 sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects); 206 sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
@@ -210,7 +213,7 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
210 } 213 }
211 214
212 if (bio) { 215 if (bio) {
213 ret = submit_bio_wait(WRITE, bio); 216 ret = submit_bio_wait(bio);
214 bio_put(bio); 217 bio_put(bio);
215 return ret; 218 return ret;
216 } 219 }
diff --git a/block/blk-map.c b/block/blk-map.c
index b9f88b7751fb..61733a660c3a 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -224,7 +224,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
224 return PTR_ERR(bio); 224 return PTR_ERR(bio);
225 225
226 if (!reading) 226 if (!reading)
227 bio->bi_rw |= REQ_WRITE; 227 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
228 228
229 if (do_copy) 229 if (do_copy)
230 rq->cmd_flags |= REQ_COPY_USER; 230 rq->cmd_flags |= REQ_COPY_USER;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 261353166dcf..5e4d93edeaf7 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -172,9 +172,9 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
172 struct bio *split, *res; 172 struct bio *split, *res;
173 unsigned nsegs; 173 unsigned nsegs;
174 174
175 if ((*bio)->bi_rw & REQ_DISCARD) 175 if (bio_op(*bio) == REQ_OP_DISCARD)
176 split = blk_bio_discard_split(q, *bio, bs, &nsegs); 176 split = blk_bio_discard_split(q, *bio, bs, &nsegs);
177 else if ((*bio)->bi_rw & REQ_WRITE_SAME) 177 else if (bio_op(*bio) == REQ_OP_WRITE_SAME)
178 split = blk_bio_write_same_split(q, *bio, bs, &nsegs); 178 split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
179 else 179 else
180 split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs); 180 split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
@@ -213,10 +213,10 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
213 * This should probably be returning 0, but blk_add_request_payload() 213 * This should probably be returning 0, but blk_add_request_payload()
214 * (Christoph!!!!) 214 * (Christoph!!!!)
215 */ 215 */
216 if (bio->bi_rw & REQ_DISCARD) 216 if (bio_op(bio) == REQ_OP_DISCARD)
217 return 1; 217 return 1;
218 218
219 if (bio->bi_rw & REQ_WRITE_SAME) 219 if (bio_op(bio) == REQ_OP_WRITE_SAME)
220 return 1; 220 return 1;
221 221
222 fbio = bio; 222 fbio = bio;
@@ -385,7 +385,7 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
385 nsegs = 0; 385 nsegs = 0;
386 cluster = blk_queue_cluster(q); 386 cluster = blk_queue_cluster(q);
387 387
388 if (bio->bi_rw & REQ_DISCARD) { 388 if (bio_op(bio) == REQ_OP_DISCARD) {
389 /* 389 /*
390 * This is a hack - drivers should be neither modifying the 390 * This is a hack - drivers should be neither modifying the
391 * biovec, nor relying on bi_vcnt - but because of 391 * biovec, nor relying on bi_vcnt - but because of
@@ -400,7 +400,7 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
400 return 0; 400 return 0;
401 } 401 }
402 402
403 if (bio->bi_rw & REQ_WRITE_SAME) { 403 if (bio_op(bio) == REQ_OP_WRITE_SAME) {
404single_segment: 404single_segment:
405 *sg = sglist; 405 *sg = sglist;
406 bvec = bio_iovec(bio); 406 bvec = bio_iovec(bio);
@@ -439,7 +439,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
439 } 439 }
440 440
441 if (q->dma_drain_size && q->dma_drain_needed(rq)) { 441 if (q->dma_drain_size && q->dma_drain_needed(rq)) {
442 if (rq->cmd_flags & REQ_WRITE) 442 if (op_is_write(req_op(rq)))
443 memset(q->dma_drain_buffer, 0, q->dma_drain_size); 443 memset(q->dma_drain_buffer, 0, q->dma_drain_size);
444 444
445 sg_unmark_end(sg); 445 sg_unmark_end(sg);
@@ -500,7 +500,7 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
500 integrity_req_gap_back_merge(req, bio)) 500 integrity_req_gap_back_merge(req, bio))
501 return 0; 501 return 0;
502 if (blk_rq_sectors(req) + bio_sectors(bio) > 502 if (blk_rq_sectors(req) + bio_sectors(bio) >
503 blk_rq_get_max_sectors(req)) { 503 blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
504 req->cmd_flags |= REQ_NOMERGE; 504 req->cmd_flags |= REQ_NOMERGE;
505 if (req == q->last_merge) 505 if (req == q->last_merge)
506 q->last_merge = NULL; 506 q->last_merge = NULL;
@@ -524,7 +524,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
524 integrity_req_gap_front_merge(req, bio)) 524 integrity_req_gap_front_merge(req, bio))
525 return 0; 525 return 0;
526 if (blk_rq_sectors(req) + bio_sectors(bio) > 526 if (blk_rq_sectors(req) + bio_sectors(bio) >
527 blk_rq_get_max_sectors(req)) { 527 blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
528 req->cmd_flags |= REQ_NOMERGE; 528 req->cmd_flags |= REQ_NOMERGE;
529 if (req == q->last_merge) 529 if (req == q->last_merge)
530 q->last_merge = NULL; 530 q->last_merge = NULL;
@@ -570,7 +570,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
570 * Will it become too large? 570 * Will it become too large?
571 */ 571 */
572 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > 572 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
573 blk_rq_get_max_sectors(req)) 573 blk_rq_get_max_sectors(req, blk_rq_pos(req)))
574 return 0; 574 return 0;
575 575
576 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 576 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
@@ -649,7 +649,8 @@ static int attempt_merge(struct request_queue *q, struct request *req,
649 if (!rq_mergeable(req) || !rq_mergeable(next)) 649 if (!rq_mergeable(req) || !rq_mergeable(next))
650 return 0; 650 return 0;
651 651
652 if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags)) 652 if (!blk_check_merge_flags(req->cmd_flags, req_op(req), next->cmd_flags,
653 req_op(next)))
653 return 0; 654 return 0;
654 655
655 /* 656 /*
@@ -663,7 +664,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
663 || req_no_special_merge(next)) 664 || req_no_special_merge(next))
664 return 0; 665 return 0;
665 666
666 if (req->cmd_flags & REQ_WRITE_SAME && 667 if (req_op(req) == REQ_OP_WRITE_SAME &&
667 !blk_write_same_mergeable(req->bio, next->bio)) 668 !blk_write_same_mergeable(req->bio, next->bio))
668 return 0; 669 return 0;
669 670
@@ -743,6 +744,12 @@ int attempt_front_merge(struct request_queue *q, struct request *rq)
743int blk_attempt_req_merge(struct request_queue *q, struct request *rq, 744int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
744 struct request *next) 745 struct request *next)
745{ 746{
747 struct elevator_queue *e = q->elevator;
748
749 if (e->type->ops.elevator_allow_rq_merge_fn)
750 if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next))
751 return 0;
752
746 return attempt_merge(q, rq, next); 753 return attempt_merge(q, rq, next);
747} 754}
748 755
@@ -751,7 +758,8 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
751 if (!rq_mergeable(rq) || !bio_mergeable(bio)) 758 if (!rq_mergeable(rq) || !bio_mergeable(bio))
752 return false; 759 return false;
753 760
754 if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw)) 761 if (!blk_check_merge_flags(rq->cmd_flags, req_op(rq), bio->bi_rw,
762 bio_op(bio)))
755 return false; 763 return false;
756 764
757 /* different data direction or already started, don't merge */ 765 /* different data direction or already started, don't merge */
@@ -767,7 +775,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
767 return false; 775 return false;
768 776
769 /* must be using the same buffer */ 777 /* must be using the same buffer */
770 if (rq->cmd_flags & REQ_WRITE_SAME && 778 if (req_op(rq) == REQ_OP_WRITE_SAME &&
771 !blk_write_same_mergeable(rq->bio, bio)) 779 !blk_write_same_mergeable(rq->bio, bio))
772 return false; 780 return false;
773 781
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f9b9049b1284..2a1920c6d6e5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -159,16 +159,17 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
159EXPORT_SYMBOL(blk_mq_can_queue); 159EXPORT_SYMBOL(blk_mq_can_queue);
160 160
161static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, 161static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
162 struct request *rq, unsigned int rw_flags) 162 struct request *rq, int op,
163 unsigned int op_flags)
163{ 164{
164 if (blk_queue_io_stat(q)) 165 if (blk_queue_io_stat(q))
165 rw_flags |= REQ_IO_STAT; 166 op_flags |= REQ_IO_STAT;
166 167
167 INIT_LIST_HEAD(&rq->queuelist); 168 INIT_LIST_HEAD(&rq->queuelist);
168 /* csd/requeue_work/fifo_time is initialized before use */ 169 /* csd/requeue_work/fifo_time is initialized before use */
169 rq->q = q; 170 rq->q = q;
170 rq->mq_ctx = ctx; 171 rq->mq_ctx = ctx;
171 rq->cmd_flags |= rw_flags; 172 req_set_op_attrs(rq, op, op_flags);
172 /* do not touch atomic flags, it needs atomic ops against the timer */ 173 /* do not touch atomic flags, it needs atomic ops against the timer */
173 rq->cpu = -1; 174 rq->cpu = -1;
174 INIT_HLIST_NODE(&rq->hash); 175 INIT_HLIST_NODE(&rq->hash);
@@ -203,11 +204,11 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
203 rq->end_io_data = NULL; 204 rq->end_io_data = NULL;
204 rq->next_rq = NULL; 205 rq->next_rq = NULL;
205 206
206 ctx->rq_dispatched[rw_is_sync(rw_flags)]++; 207 ctx->rq_dispatched[rw_is_sync(op, op_flags)]++;
207} 208}
208 209
209static struct request * 210static struct request *
210__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw) 211__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int op, int op_flags)
211{ 212{
212 struct request *rq; 213 struct request *rq;
213 unsigned int tag; 214 unsigned int tag;
@@ -222,7 +223,7 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
222 } 223 }
223 224
224 rq->tag = tag; 225 rq->tag = tag;
225 blk_mq_rq_ctx_init(data->q, data->ctx, rq, rw); 226 blk_mq_rq_ctx_init(data->q, data->ctx, rq, op, op_flags);
226 return rq; 227 return rq;
227 } 228 }
228 229
@@ -246,7 +247,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
246 hctx = q->mq_ops->map_queue(q, ctx->cpu); 247 hctx = q->mq_ops->map_queue(q, ctx->cpu);
247 blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); 248 blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
248 249
249 rq = __blk_mq_alloc_request(&alloc_data, rw); 250 rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
250 if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) { 251 if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) {
251 __blk_mq_run_hw_queue(hctx); 252 __blk_mq_run_hw_queue(hctx);
252 blk_mq_put_ctx(ctx); 253 blk_mq_put_ctx(ctx);
@@ -254,7 +255,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
254 ctx = blk_mq_get_ctx(q); 255 ctx = blk_mq_get_ctx(q);
255 hctx = q->mq_ops->map_queue(q, ctx->cpu); 256 hctx = q->mq_ops->map_queue(q, ctx->cpu);
256 blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); 257 blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
257 rq = __blk_mq_alloc_request(&alloc_data, rw); 258 rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
258 ctx = alloc_data.ctx; 259 ctx = alloc_data.ctx;
259 } 260 }
260 blk_mq_put_ctx(ctx); 261 blk_mq_put_ctx(ctx);
@@ -784,7 +785,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
784 switch (ret) { 785 switch (ret) {
785 case BLK_MQ_RQ_QUEUE_OK: 786 case BLK_MQ_RQ_QUEUE_OK:
786 queued++; 787 queued++;
787 continue; 788 break;
788 case BLK_MQ_RQ_QUEUE_BUSY: 789 case BLK_MQ_RQ_QUEUE_BUSY:
789 list_add(&rq->queuelist, &rq_list); 790 list_add(&rq->queuelist, &rq_list);
790 __blk_mq_requeue_request(rq); 791 __blk_mq_requeue_request(rq);
@@ -1169,28 +1170,29 @@ static struct request *blk_mq_map_request(struct request_queue *q,
1169 struct blk_mq_hw_ctx *hctx; 1170 struct blk_mq_hw_ctx *hctx;
1170 struct blk_mq_ctx *ctx; 1171 struct blk_mq_ctx *ctx;
1171 struct request *rq; 1172 struct request *rq;
1172 int rw = bio_data_dir(bio); 1173 int op = bio_data_dir(bio);
1174 int op_flags = 0;
1173 struct blk_mq_alloc_data alloc_data; 1175 struct blk_mq_alloc_data alloc_data;
1174 1176
1175 blk_queue_enter_live(q); 1177 blk_queue_enter_live(q);
1176 ctx = blk_mq_get_ctx(q); 1178 ctx = blk_mq_get_ctx(q);
1177 hctx = q->mq_ops->map_queue(q, ctx->cpu); 1179 hctx = q->mq_ops->map_queue(q, ctx->cpu);
1178 1180
1179 if (rw_is_sync(bio->bi_rw)) 1181 if (rw_is_sync(bio_op(bio), bio->bi_rw))
1180 rw |= REQ_SYNC; 1182 op_flags |= REQ_SYNC;
1181 1183
1182 trace_block_getrq(q, bio, rw); 1184 trace_block_getrq(q, bio, op);
1183 blk_mq_set_alloc_data(&alloc_data, q, BLK_MQ_REQ_NOWAIT, ctx, hctx); 1185 blk_mq_set_alloc_data(&alloc_data, q, BLK_MQ_REQ_NOWAIT, ctx, hctx);
1184 rq = __blk_mq_alloc_request(&alloc_data, rw); 1186 rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
1185 if (unlikely(!rq)) { 1187 if (unlikely(!rq)) {
1186 __blk_mq_run_hw_queue(hctx); 1188 __blk_mq_run_hw_queue(hctx);
1187 blk_mq_put_ctx(ctx); 1189 blk_mq_put_ctx(ctx);
1188 trace_block_sleeprq(q, bio, rw); 1190 trace_block_sleeprq(q, bio, op);
1189 1191
1190 ctx = blk_mq_get_ctx(q); 1192 ctx = blk_mq_get_ctx(q);
1191 hctx = q->mq_ops->map_queue(q, ctx->cpu); 1193 hctx = q->mq_ops->map_queue(q, ctx->cpu);
1192 blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx); 1194 blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx);
1193 rq = __blk_mq_alloc_request(&alloc_data, rw); 1195 rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
1194 ctx = alloc_data.ctx; 1196 ctx = alloc_data.ctx;
1195 hctx = alloc_data.hctx; 1197 hctx = alloc_data.hctx;
1196 } 1198 }
@@ -1244,8 +1246,8 @@ static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie)
1244 */ 1246 */
1245static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) 1247static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1246{ 1248{
1247 const int is_sync = rw_is_sync(bio->bi_rw); 1249 const int is_sync = rw_is_sync(bio_op(bio), bio->bi_rw);
1248 const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); 1250 const int is_flush_fua = bio->bi_rw & (REQ_PREFLUSH | REQ_FUA);
1249 struct blk_map_ctx data; 1251 struct blk_map_ctx data;
1250 struct request *rq; 1252 struct request *rq;
1251 unsigned int request_count = 0; 1253 unsigned int request_count = 0;
@@ -1338,8 +1340,8 @@ done:
1338 */ 1340 */
1339static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) 1341static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
1340{ 1342{
1341 const int is_sync = rw_is_sync(bio->bi_rw); 1343 const int is_sync = rw_is_sync(bio_op(bio), bio->bi_rw);
1342 const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); 1344 const int is_flush_fua = bio->bi_rw & (REQ_PREFLUSH | REQ_FUA);
1343 struct blk_plug *plug; 1345 struct blk_plug *plug;
1344 unsigned int request_count = 0; 1346 unsigned int request_count = 0;
1345 struct blk_map_ctx data; 1347 struct blk_map_ctx data;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 99205965f559..f87a7e747d36 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -379,6 +379,11 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page,
379 return count; 379 return count;
380} 380}
381 381
382static ssize_t queue_dax_show(struct request_queue *q, char *page)
383{
384 return queue_var_show(blk_queue_dax(q), page);
385}
386
382static struct queue_sysfs_entry queue_requests_entry = { 387static struct queue_sysfs_entry queue_requests_entry = {
383 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, 388 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
384 .show = queue_requests_show, 389 .show = queue_requests_show,
@@ -516,6 +521,11 @@ static struct queue_sysfs_entry queue_wc_entry = {
516 .store = queue_wc_store, 521 .store = queue_wc_store,
517}; 522};
518 523
524static struct queue_sysfs_entry queue_dax_entry = {
525 .attr = {.name = "dax", .mode = S_IRUGO },
526 .show = queue_dax_show,
527};
528
519static struct attribute *default_attrs[] = { 529static struct attribute *default_attrs[] = {
520 &queue_requests_entry.attr, 530 &queue_requests_entry.attr,
521 &queue_ra_entry.attr, 531 &queue_ra_entry.attr,
@@ -542,6 +552,7 @@ static struct attribute *default_attrs[] = {
542 &queue_random_entry.attr, 552 &queue_random_entry.attr,
543 &queue_poll_entry.attr, 553 &queue_poll_entry.attr,
544 &queue_wc_entry.attr, 554 &queue_wc_entry.attr,
555 &queue_dax_entry.attr,
545 NULL, 556 NULL,
546}; 557};
547 558
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 4a349787bc62..acabba198de9 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -10,7 +10,7 @@
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/blkdev.h> 11#include <linux/blkdev.h>
12#include <linux/elevator.h> 12#include <linux/elevator.h>
13#include <linux/jiffies.h> 13#include <linux/ktime.h>
14#include <linux/rbtree.h> 14#include <linux/rbtree.h>
15#include <linux/ioprio.h> 15#include <linux/ioprio.h>
16#include <linux/blktrace_api.h> 16#include <linux/blktrace_api.h>
@@ -22,28 +22,28 @@
22 */ 22 */
23/* max queue in one round of service */ 23/* max queue in one round of service */
24static const int cfq_quantum = 8; 24static const int cfq_quantum = 8;
25static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; 25static const u64 cfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
26/* maximum backwards seek, in KiB */ 26/* maximum backwards seek, in KiB */
27static const int cfq_back_max = 16 * 1024; 27static const int cfq_back_max = 16 * 1024;
28/* penalty of a backwards seek */ 28/* penalty of a backwards seek */
29static const int cfq_back_penalty = 2; 29static const int cfq_back_penalty = 2;
30static const int cfq_slice_sync = HZ / 10; 30static const u64 cfq_slice_sync = NSEC_PER_SEC / 10;
31static int cfq_slice_async = HZ / 25; 31static u64 cfq_slice_async = NSEC_PER_SEC / 25;
32static const int cfq_slice_async_rq = 2; 32static const int cfq_slice_async_rq = 2;
33static int cfq_slice_idle = HZ / 125; 33static u64 cfq_slice_idle = NSEC_PER_SEC / 125;
34static int cfq_group_idle = HZ / 125; 34static u64 cfq_group_idle = NSEC_PER_SEC / 125;
35static const int cfq_target_latency = HZ * 3/10; /* 300 ms */ 35static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */
36static const int cfq_hist_divisor = 4; 36static const int cfq_hist_divisor = 4;
37 37
38/* 38/*
39 * offset from end of service tree 39 * offset from end of service tree
40 */ 40 */
41#define CFQ_IDLE_DELAY (HZ / 5) 41#define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5)
42 42
43/* 43/*
44 * below this threshold, we consider thinktime immediate 44 * below this threshold, we consider thinktime immediate
45 */ 45 */
46#define CFQ_MIN_TT (2) 46#define CFQ_MIN_TT (2 * NSEC_PER_SEC / HZ)
47 47
48#define CFQ_SLICE_SCALE (5) 48#define CFQ_SLICE_SCALE (5)
49#define CFQ_HW_QUEUE_MIN (5) 49#define CFQ_HW_QUEUE_MIN (5)
@@ -73,11 +73,11 @@ static struct kmem_cache *cfq_pool;
73#define CFQ_WEIGHT_LEGACY_MAX 1000 73#define CFQ_WEIGHT_LEGACY_MAX 1000
74 74
75struct cfq_ttime { 75struct cfq_ttime {
76 unsigned long last_end_request; 76 u64 last_end_request;
77 77
78 unsigned long ttime_total; 78 u64 ttime_total;
79 u64 ttime_mean;
79 unsigned long ttime_samples; 80 unsigned long ttime_samples;
80 unsigned long ttime_mean;
81}; 81};
82 82
83/* 83/*
@@ -94,7 +94,7 @@ struct cfq_rb_root {
94 struct cfq_ttime ttime; 94 struct cfq_ttime ttime;
95}; 95};
96#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \ 96#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \
97 .ttime = {.last_end_request = jiffies,},} 97 .ttime = {.last_end_request = ktime_get_ns(),},}
98 98
99/* 99/*
100 * Per process-grouping structure 100 * Per process-grouping structure
@@ -109,7 +109,7 @@ struct cfq_queue {
109 /* service_tree member */ 109 /* service_tree member */
110 struct rb_node rb_node; 110 struct rb_node rb_node;
111 /* service_tree key */ 111 /* service_tree key */
112 unsigned long rb_key; 112 u64 rb_key;
113 /* prio tree member */ 113 /* prio tree member */
114 struct rb_node p_node; 114 struct rb_node p_node;
115 /* prio tree root we belong to, if any */ 115 /* prio tree root we belong to, if any */
@@ -126,13 +126,13 @@ struct cfq_queue {
126 struct list_head fifo; 126 struct list_head fifo;
127 127
128 /* time when queue got scheduled in to dispatch first request. */ 128 /* time when queue got scheduled in to dispatch first request. */
129 unsigned long dispatch_start; 129 u64 dispatch_start;
130 unsigned int allocated_slice; 130 u64 allocated_slice;
131 unsigned int slice_dispatch; 131 u64 slice_dispatch;
132 /* time when first request from queue completed and slice started. */ 132 /* time when first request from queue completed and slice started. */
133 unsigned long slice_start; 133 u64 slice_start;
134 unsigned long slice_end; 134 u64 slice_end;
135 long slice_resid; 135 s64 slice_resid;
136 136
137 /* pending priority requests */ 137 /* pending priority requests */
138 int prio_pending; 138 int prio_pending;
@@ -141,7 +141,7 @@ struct cfq_queue {
141 141
142 /* io prio of this group */ 142 /* io prio of this group */
143 unsigned short ioprio, org_ioprio; 143 unsigned short ioprio, org_ioprio;
144 unsigned short ioprio_class; 144 unsigned short ioprio_class, org_ioprio_class;
145 145
146 pid_t pid; 146 pid_t pid;
147 147
@@ -290,7 +290,7 @@ struct cfq_group {
290 struct cfq_rb_root service_trees[2][3]; 290 struct cfq_rb_root service_trees[2][3];
291 struct cfq_rb_root service_tree_idle; 291 struct cfq_rb_root service_tree_idle;
292 292
293 unsigned long saved_wl_slice; 293 u64 saved_wl_slice;
294 enum wl_type_t saved_wl_type; 294 enum wl_type_t saved_wl_type;
295 enum wl_class_t saved_wl_class; 295 enum wl_class_t saved_wl_class;
296 296
@@ -329,7 +329,7 @@ struct cfq_data {
329 */ 329 */
330 enum wl_class_t serving_wl_class; 330 enum wl_class_t serving_wl_class;
331 enum wl_type_t serving_wl_type; 331 enum wl_type_t serving_wl_type;
332 unsigned long workload_expires; 332 u64 workload_expires;
333 struct cfq_group *serving_group; 333 struct cfq_group *serving_group;
334 334
335 /* 335 /*
@@ -362,7 +362,7 @@ struct cfq_data {
362 /* 362 /*
363 * idle window management 363 * idle window management
364 */ 364 */
365 struct timer_list idle_slice_timer; 365 struct hrtimer idle_slice_timer;
366 struct work_struct unplug_work; 366 struct work_struct unplug_work;
367 367
368 struct cfq_queue *active_queue; 368 struct cfq_queue *active_queue;
@@ -374,22 +374,22 @@ struct cfq_data {
374 * tunables, see top of file 374 * tunables, see top of file
375 */ 375 */
376 unsigned int cfq_quantum; 376 unsigned int cfq_quantum;
377 unsigned int cfq_fifo_expire[2];
378 unsigned int cfq_back_penalty; 377 unsigned int cfq_back_penalty;
379 unsigned int cfq_back_max; 378 unsigned int cfq_back_max;
380 unsigned int cfq_slice[2];
381 unsigned int cfq_slice_async_rq; 379 unsigned int cfq_slice_async_rq;
382 unsigned int cfq_slice_idle;
383 unsigned int cfq_group_idle;
384 unsigned int cfq_latency; 380 unsigned int cfq_latency;
385 unsigned int cfq_target_latency; 381 u64 cfq_fifo_expire[2];
382 u64 cfq_slice[2];
383 u64 cfq_slice_idle;
384 u64 cfq_group_idle;
385 u64 cfq_target_latency;
386 386
387 /* 387 /*
388 * Fallback dummy cfqq for extreme OOM conditions 388 * Fallback dummy cfqq for extreme OOM conditions
389 */ 389 */
390 struct cfq_queue oom_cfqq; 390 struct cfq_queue oom_cfqq;
391 391
392 unsigned long last_delayed_sync; 392 u64 last_delayed_sync;
393}; 393};
394 394
395static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); 395static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
@@ -667,15 +667,16 @@ static inline void cfqg_put(struct cfq_group *cfqg)
667} while (0) 667} while (0)
668 668
669static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, 669static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
670 struct cfq_group *curr_cfqg, int rw) 670 struct cfq_group *curr_cfqg, int op,
671 int op_flags)
671{ 672{
672 blkg_rwstat_add(&cfqg->stats.queued, rw, 1); 673 blkg_rwstat_add(&cfqg->stats.queued, op, op_flags, 1);
673 cfqg_stats_end_empty_time(&cfqg->stats); 674 cfqg_stats_end_empty_time(&cfqg->stats);
674 cfqg_stats_set_start_group_wait_time(cfqg, curr_cfqg); 675 cfqg_stats_set_start_group_wait_time(cfqg, curr_cfqg);
675} 676}
676 677
677static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, 678static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
678 unsigned long time, unsigned long unaccounted_time) 679 uint64_t time, unsigned long unaccounted_time)
679{ 680{
680 blkg_stat_add(&cfqg->stats.time, time); 681 blkg_stat_add(&cfqg->stats.time, time);
681#ifdef CONFIG_DEBUG_BLK_CGROUP 682#ifdef CONFIG_DEBUG_BLK_CGROUP
@@ -683,26 +684,30 @@ static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
683#endif 684#endif
684} 685}
685 686
686static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw) 687static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op,
688 int op_flags)
687{ 689{
688 blkg_rwstat_add(&cfqg->stats.queued, rw, -1); 690 blkg_rwstat_add(&cfqg->stats.queued, op, op_flags, -1);
689} 691}
690 692
691static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw) 693static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op,
694 int op_flags)
692{ 695{
693 blkg_rwstat_add(&cfqg->stats.merged, rw, 1); 696 blkg_rwstat_add(&cfqg->stats.merged, op, op_flags, 1);
694} 697}
695 698
696static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, 699static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
697 uint64_t start_time, uint64_t io_start_time, int rw) 700 uint64_t start_time, uint64_t io_start_time, int op,
701 int op_flags)
698{ 702{
699 struct cfqg_stats *stats = &cfqg->stats; 703 struct cfqg_stats *stats = &cfqg->stats;
700 unsigned long long now = sched_clock(); 704 unsigned long long now = sched_clock();
701 705
702 if (time_after64(now, io_start_time)) 706 if (time_after64(now, io_start_time))
703 blkg_rwstat_add(&stats->service_time, rw, now - io_start_time); 707 blkg_rwstat_add(&stats->service_time, op, op_flags,
708 now - io_start_time);
704 if (time_after64(io_start_time, start_time)) 709 if (time_after64(io_start_time, start_time))
705 blkg_rwstat_add(&stats->wait_time, rw, 710 blkg_rwstat_add(&stats->wait_time, op, op_flags,
706 io_start_time - start_time); 711 io_start_time - start_time);
707} 712}
708 713
@@ -781,13 +786,16 @@ static inline void cfqg_put(struct cfq_group *cfqg) { }
781#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0) 786#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0)
782 787
783static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, 788static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
784 struct cfq_group *curr_cfqg, int rw) { } 789 struct cfq_group *curr_cfqg, int op, int op_flags) { }
785static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, 790static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
786 unsigned long time, unsigned long unaccounted_time) { } 791 uint64_t time, unsigned long unaccounted_time) { }
787static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw) { } 792static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op,
788static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw) { } 793 int op_flags) { }
794static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op,
795 int op_flags) { }
789static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, 796static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
790 uint64_t start_time, uint64_t io_start_time, int rw) { } 797 uint64_t start_time, uint64_t io_start_time, int op,
798 int op_flags) { }
791 799
792#endif /* CONFIG_CFQ_GROUP_IOSCHED */ 800#endif /* CONFIG_CFQ_GROUP_IOSCHED */
793 801
@@ -807,7 +815,7 @@ static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
807static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd, 815static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
808 struct cfq_ttime *ttime, bool group_idle) 816 struct cfq_ttime *ttime, bool group_idle)
809{ 817{
810 unsigned long slice; 818 u64 slice;
811 if (!sample_valid(ttime->ttime_samples)) 819 if (!sample_valid(ttime->ttime_samples))
812 return false; 820 return false;
813 if (group_idle) 821 if (group_idle)
@@ -930,17 +938,18 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
930 * if a queue is marked sync and has sync io queued. A sync queue with async 938 * if a queue is marked sync and has sync io queued. A sync queue with async
931 * io only, should not get full sync slice length. 939 * io only, should not get full sync slice length.
932 */ 940 */
933static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync, 941static inline u64 cfq_prio_slice(struct cfq_data *cfqd, bool sync,
934 unsigned short prio) 942 unsigned short prio)
935{ 943{
936 const int base_slice = cfqd->cfq_slice[sync]; 944 u64 base_slice = cfqd->cfq_slice[sync];
945 u64 slice = div_u64(base_slice, CFQ_SLICE_SCALE);
937 946
938 WARN_ON(prio >= IOPRIO_BE_NR); 947 WARN_ON(prio >= IOPRIO_BE_NR);
939 948
940 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio)); 949 return base_slice + (slice * (4 - prio));
941} 950}
942 951
943static inline int 952static inline u64
944cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 953cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
945{ 954{
946 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); 955 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
@@ -958,15 +967,14 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
958 * 967 *
959 * The result is also in fixed point w/ CFQ_SERVICE_SHIFT. 968 * The result is also in fixed point w/ CFQ_SERVICE_SHIFT.
960 */ 969 */
961static inline u64 cfqg_scale_charge(unsigned long charge, 970static inline u64 cfqg_scale_charge(u64 charge,
962 unsigned int vfraction) 971 unsigned int vfraction)
963{ 972{
964 u64 c = charge << CFQ_SERVICE_SHIFT; /* make it fixed point */ 973 u64 c = charge << CFQ_SERVICE_SHIFT; /* make it fixed point */
965 974
966 /* charge / vfraction */ 975 /* charge / vfraction */
967 c <<= CFQ_SERVICE_SHIFT; 976 c <<= CFQ_SERVICE_SHIFT;
968 do_div(c, vfraction); 977 return div_u64(c, vfraction);
969 return c;
970} 978}
971 979
972static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) 980static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
@@ -1019,16 +1027,16 @@ static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
1019 return cfqg->busy_queues_avg[rt]; 1027 return cfqg->busy_queues_avg[rt];
1020} 1028}
1021 1029
1022static inline unsigned 1030static inline u64
1023cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) 1031cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
1024{ 1032{
1025 return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT; 1033 return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT;
1026} 1034}
1027 1035
1028static inline unsigned 1036static inline u64
1029cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1037cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1030{ 1038{
1031 unsigned slice = cfq_prio_to_slice(cfqd, cfqq); 1039 u64 slice = cfq_prio_to_slice(cfqd, cfqq);
1032 if (cfqd->cfq_latency) { 1040 if (cfqd->cfq_latency) {
1033 /* 1041 /*
1034 * interested queues (we consider only the ones with the same 1042 * interested queues (we consider only the ones with the same
@@ -1036,20 +1044,22 @@ cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1036 */ 1044 */
1037 unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg, 1045 unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
1038 cfq_class_rt(cfqq)); 1046 cfq_class_rt(cfqq));
1039 unsigned sync_slice = cfqd->cfq_slice[1]; 1047 u64 sync_slice = cfqd->cfq_slice[1];
1040 unsigned expect_latency = sync_slice * iq; 1048 u64 expect_latency = sync_slice * iq;
1041 unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg); 1049 u64 group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
1042 1050
1043 if (expect_latency > group_slice) { 1051 if (expect_latency > group_slice) {
1044 unsigned base_low_slice = 2 * cfqd->cfq_slice_idle; 1052 u64 base_low_slice = 2 * cfqd->cfq_slice_idle;
1053 u64 low_slice;
1054
1045 /* scale low_slice according to IO priority 1055 /* scale low_slice according to IO priority
1046 * and sync vs async */ 1056 * and sync vs async */
1047 unsigned low_slice = 1057 low_slice = div64_u64(base_low_slice*slice, sync_slice);
1048 min(slice, base_low_slice * slice / sync_slice); 1058 low_slice = min(slice, low_slice);
1049 /* the adapted slice value is scaled to fit all iqs 1059 /* the adapted slice value is scaled to fit all iqs
1050 * into the target latency */ 1060 * into the target latency */
1051 slice = max(slice * group_slice / expect_latency, 1061 slice = div64_u64(slice*group_slice, expect_latency);
1052 low_slice); 1062 slice = max(slice, low_slice);
1053 } 1063 }
1054 } 1064 }
1055 return slice; 1065 return slice;
@@ -1058,12 +1068,13 @@ cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1058static inline void 1068static inline void
1059cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1069cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1060{ 1070{
1061 unsigned slice = cfq_scaled_cfqq_slice(cfqd, cfqq); 1071 u64 slice = cfq_scaled_cfqq_slice(cfqd, cfqq);
1072 u64 now = ktime_get_ns();
1062 1073
1063 cfqq->slice_start = jiffies; 1074 cfqq->slice_start = now;
1064 cfqq->slice_end = jiffies + slice; 1075 cfqq->slice_end = now + slice;
1065 cfqq->allocated_slice = slice; 1076 cfqq->allocated_slice = slice;
1066 cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies); 1077 cfq_log_cfqq(cfqd, cfqq, "set_slice=%llu", cfqq->slice_end - now);
1067} 1078}
1068 1079
1069/* 1080/*
@@ -1075,7 +1086,7 @@ static inline bool cfq_slice_used(struct cfq_queue *cfqq)
1075{ 1086{
1076 if (cfq_cfqq_slice_new(cfqq)) 1087 if (cfq_cfqq_slice_new(cfqq))
1077 return false; 1088 return false;
1078 if (time_before(jiffies, cfqq->slice_end)) 1089 if (ktime_get_ns() < cfqq->slice_end)
1079 return false; 1090 return false;
1080 1091
1081 return true; 1092 return true;
@@ -1241,8 +1252,8 @@ cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1241 return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last)); 1252 return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last));
1242} 1253}
1243 1254
1244static unsigned long cfq_slice_offset(struct cfq_data *cfqd, 1255static u64 cfq_slice_offset(struct cfq_data *cfqd,
1245 struct cfq_queue *cfqq) 1256 struct cfq_queue *cfqq)
1246{ 1257{
1247 /* 1258 /*
1248 * just an approximation, should be ok. 1259 * just an approximation, should be ok.
@@ -1435,31 +1446,32 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
1435 cfqg_stats_update_dequeue(cfqg); 1446 cfqg_stats_update_dequeue(cfqg);
1436} 1447}
1437 1448
1438static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq, 1449static inline u64 cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
1439 unsigned int *unaccounted_time) 1450 u64 *unaccounted_time)
1440{ 1451{
1441 unsigned int slice_used; 1452 u64 slice_used;
1453 u64 now = ktime_get_ns();
1442 1454
1443 /* 1455 /*
1444 * Queue got expired before even a single request completed or 1456 * Queue got expired before even a single request completed or
1445 * got expired immediately after first request completion. 1457 * got expired immediately after first request completion.
1446 */ 1458 */
1447 if (!cfqq->slice_start || cfqq->slice_start == jiffies) { 1459 if (!cfqq->slice_start || cfqq->slice_start == now) {
1448 /* 1460 /*
1449 * Also charge the seek time incurred to the group, otherwise 1461 * Also charge the seek time incurred to the group, otherwise
1450 * if there are mutiple queues in the group, each can dispatch 1462 * if there are mutiple queues in the group, each can dispatch
1451 * a single request on seeky media and cause lots of seek time 1463 * a single request on seeky media and cause lots of seek time
1452 * and group will never know it. 1464 * and group will never know it.
1453 */ 1465 */
1454 slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start), 1466 slice_used = max_t(u64, (now - cfqq->dispatch_start),
1455 1); 1467 jiffies_to_nsecs(1));
1456 } else { 1468 } else {
1457 slice_used = jiffies - cfqq->slice_start; 1469 slice_used = now - cfqq->slice_start;
1458 if (slice_used > cfqq->allocated_slice) { 1470 if (slice_used > cfqq->allocated_slice) {
1459 *unaccounted_time = slice_used - cfqq->allocated_slice; 1471 *unaccounted_time = slice_used - cfqq->allocated_slice;
1460 slice_used = cfqq->allocated_slice; 1472 slice_used = cfqq->allocated_slice;
1461 } 1473 }
1462 if (time_after(cfqq->slice_start, cfqq->dispatch_start)) 1474 if (cfqq->slice_start > cfqq->dispatch_start)
1463 *unaccounted_time += cfqq->slice_start - 1475 *unaccounted_time += cfqq->slice_start -
1464 cfqq->dispatch_start; 1476 cfqq->dispatch_start;
1465 } 1477 }
@@ -1471,10 +1483,11 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
1471 struct cfq_queue *cfqq) 1483 struct cfq_queue *cfqq)
1472{ 1484{
1473 struct cfq_rb_root *st = &cfqd->grp_service_tree; 1485 struct cfq_rb_root *st = &cfqd->grp_service_tree;
1474 unsigned int used_sl, charge, unaccounted_sl = 0; 1486 u64 used_sl, charge, unaccounted_sl = 0;
1475 int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) 1487 int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
1476 - cfqg->service_tree_idle.count; 1488 - cfqg->service_tree_idle.count;
1477 unsigned int vfr; 1489 unsigned int vfr;
1490 u64 now = ktime_get_ns();
1478 1491
1479 BUG_ON(nr_sync < 0); 1492 BUG_ON(nr_sync < 0);
1480 used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl); 1493 used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);
@@ -1496,9 +1509,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
1496 cfq_group_service_tree_add(st, cfqg); 1509 cfq_group_service_tree_add(st, cfqg);
1497 1510
1498 /* This group is being expired. Save the context */ 1511 /* This group is being expired. Save the context */
1499 if (time_after(cfqd->workload_expires, jiffies)) { 1512 if (cfqd->workload_expires > now) {
1500 cfqg->saved_wl_slice = cfqd->workload_expires 1513 cfqg->saved_wl_slice = cfqd->workload_expires - now;
1501 - jiffies;
1502 cfqg->saved_wl_type = cfqd->serving_wl_type; 1514 cfqg->saved_wl_type = cfqd->serving_wl_type;
1503 cfqg->saved_wl_class = cfqd->serving_wl_class; 1515 cfqg->saved_wl_class = cfqd->serving_wl_class;
1504 } else 1516 } else
@@ -1507,7 +1519,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
1507 cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, 1519 cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
1508 st->min_vdisktime); 1520 st->min_vdisktime);
1509 cfq_log_cfqq(cfqq->cfqd, cfqq, 1521 cfq_log_cfqq(cfqq->cfqd, cfqq,
1510 "sl_used=%u disp=%u charge=%u iops=%u sect=%lu", 1522 "sl_used=%llu disp=%llu charge=%llu iops=%u sect=%lu",
1511 used_sl, cfqq->slice_dispatch, charge, 1523 used_sl, cfqq->slice_dispatch, charge,
1512 iops_mode(cfqd), cfqq->nr_sectors); 1524 iops_mode(cfqd), cfqq->nr_sectors);
1513 cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl); 1525 cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl);
@@ -1530,7 +1542,7 @@ static void cfq_init_cfqg_base(struct cfq_group *cfqg)
1530 *st = CFQ_RB_ROOT; 1542 *st = CFQ_RB_ROOT;
1531 RB_CLEAR_NODE(&cfqg->rb_node); 1543 RB_CLEAR_NODE(&cfqg->rb_node);
1532 1544
1533 cfqg->ttime.last_end_request = jiffies; 1545 cfqg->ttime.last_end_request = ktime_get_ns();
1534} 1546}
1535 1547
1536#ifdef CONFIG_CFQ_GROUP_IOSCHED 1548#ifdef CONFIG_CFQ_GROUP_IOSCHED
@@ -2213,10 +2225,11 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2213{ 2225{
2214 struct rb_node **p, *parent; 2226 struct rb_node **p, *parent;
2215 struct cfq_queue *__cfqq; 2227 struct cfq_queue *__cfqq;
2216 unsigned long rb_key; 2228 u64 rb_key;
2217 struct cfq_rb_root *st; 2229 struct cfq_rb_root *st;
2218 int left; 2230 int left;
2219 int new_cfqq = 1; 2231 int new_cfqq = 1;
2232 u64 now = ktime_get_ns();
2220 2233
2221 st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq)); 2234 st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq));
2222 if (cfq_class_idle(cfqq)) { 2235 if (cfq_class_idle(cfqq)) {
@@ -2226,7 +2239,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2226 __cfqq = rb_entry(parent, struct cfq_queue, rb_node); 2239 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
2227 rb_key += __cfqq->rb_key; 2240 rb_key += __cfqq->rb_key;
2228 } else 2241 } else
2229 rb_key += jiffies; 2242 rb_key += now;
2230 } else if (!add_front) { 2243 } else if (!add_front) {
2231 /* 2244 /*
2232 * Get our rb key offset. Subtract any residual slice 2245 * Get our rb key offset. Subtract any residual slice
@@ -2234,13 +2247,13 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2234 * count indicates slice overrun, and this should position 2247 * count indicates slice overrun, and this should position
2235 * the next service time further away in the tree. 2248 * the next service time further away in the tree.
2236 */ 2249 */
2237 rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies; 2250 rb_key = cfq_slice_offset(cfqd, cfqq) + now;
2238 rb_key -= cfqq->slice_resid; 2251 rb_key -= cfqq->slice_resid;
2239 cfqq->slice_resid = 0; 2252 cfqq->slice_resid = 0;
2240 } else { 2253 } else {
2241 rb_key = -HZ; 2254 rb_key = -NSEC_PER_SEC;
2242 __cfqq = cfq_rb_first(st); 2255 __cfqq = cfq_rb_first(st);
2243 rb_key += __cfqq ? __cfqq->rb_key : jiffies; 2256 rb_key += __cfqq ? __cfqq->rb_key : now;
2244 } 2257 }
2245 2258
2246 if (!RB_EMPTY_NODE(&cfqq->rb_node)) { 2259 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
@@ -2266,7 +2279,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2266 /* 2279 /*
2267 * sort by key, that represents service time. 2280 * sort by key, that represents service time.
2268 */ 2281 */
2269 if (time_before(rb_key, __cfqq->rb_key)) 2282 if (rb_key < __cfqq->rb_key)
2270 p = &parent->rb_left; 2283 p = &parent->rb_left;
2271 else { 2284 else {
2272 p = &parent->rb_right; 2285 p = &parent->rb_right;
@@ -2461,10 +2474,10 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
2461{ 2474{
2462 elv_rb_del(&cfqq->sort_list, rq); 2475 elv_rb_del(&cfqq->sort_list, rq);
2463 cfqq->queued[rq_is_sync(rq)]--; 2476 cfqq->queued[rq_is_sync(rq)]--;
2464 cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags); 2477 cfqg_stats_update_io_remove(RQ_CFQG(rq), req_op(rq), rq->cmd_flags);
2465 cfq_add_rq_rb(rq); 2478 cfq_add_rq_rb(rq);
2466 cfqg_stats_update_io_add(RQ_CFQG(rq), cfqq->cfqd->serving_group, 2479 cfqg_stats_update_io_add(RQ_CFQG(rq), cfqq->cfqd->serving_group,
2467 rq->cmd_flags); 2480 req_op(rq), rq->cmd_flags);
2468} 2481}
2469 2482
2470static struct request * 2483static struct request *
@@ -2517,7 +2530,7 @@ static void cfq_remove_request(struct request *rq)
2517 cfq_del_rq_rb(rq); 2530 cfq_del_rq_rb(rq);
2518 2531
2519 cfqq->cfqd->rq_queued--; 2532 cfqq->cfqd->rq_queued--;
2520 cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags); 2533 cfqg_stats_update_io_remove(RQ_CFQG(rq), req_op(rq), rq->cmd_flags);
2521 if (rq->cmd_flags & REQ_PRIO) { 2534 if (rq->cmd_flags & REQ_PRIO) {
2522 WARN_ON(!cfqq->prio_pending); 2535 WARN_ON(!cfqq->prio_pending);
2523 cfqq->prio_pending--; 2536 cfqq->prio_pending--;
@@ -2531,7 +2544,7 @@ static int cfq_merge(struct request_queue *q, struct request **req,
2531 struct request *__rq; 2544 struct request *__rq;
2532 2545
2533 __rq = cfq_find_rq_fmerge(cfqd, bio); 2546 __rq = cfq_find_rq_fmerge(cfqd, bio);
2534 if (__rq && elv_rq_merge_ok(__rq, bio)) { 2547 if (__rq && elv_bio_merge_ok(__rq, bio)) {
2535 *req = __rq; 2548 *req = __rq;
2536 return ELEVATOR_FRONT_MERGE; 2549 return ELEVATOR_FRONT_MERGE;
2537 } 2550 }
@@ -2552,7 +2565,7 @@ static void cfq_merged_request(struct request_queue *q, struct request *req,
2552static void cfq_bio_merged(struct request_queue *q, struct request *req, 2565static void cfq_bio_merged(struct request_queue *q, struct request *req,
2553 struct bio *bio) 2566 struct bio *bio)
2554{ 2567{
2555 cfqg_stats_update_io_merged(RQ_CFQG(req), bio->bi_rw); 2568 cfqg_stats_update_io_merged(RQ_CFQG(req), bio_op(bio), bio->bi_rw);
2556} 2569}
2557 2570
2558static void 2571static void
@@ -2566,7 +2579,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
2566 * reposition in fifo if next is older than rq 2579 * reposition in fifo if next is older than rq
2567 */ 2580 */
2568 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && 2581 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
2569 time_before(next->fifo_time, rq->fifo_time) && 2582 next->fifo_time < rq->fifo_time &&
2570 cfqq == RQ_CFQQ(next)) { 2583 cfqq == RQ_CFQQ(next)) {
2571 list_move(&rq->queuelist, &next->queuelist); 2584 list_move(&rq->queuelist, &next->queuelist);
2572 rq->fifo_time = next->fifo_time; 2585 rq->fifo_time = next->fifo_time;
@@ -2575,7 +2588,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
2575 if (cfqq->next_rq == next) 2588 if (cfqq->next_rq == next)
2576 cfqq->next_rq = rq; 2589 cfqq->next_rq = rq;
2577 cfq_remove_request(next); 2590 cfq_remove_request(next);
2578 cfqg_stats_update_io_merged(RQ_CFQG(rq), next->cmd_flags); 2591 cfqg_stats_update_io_merged(RQ_CFQG(rq), req_op(next), next->cmd_flags);
2579 2592
2580 cfqq = RQ_CFQQ(next); 2593 cfqq = RQ_CFQQ(next);
2581 /* 2594 /*
@@ -2588,8 +2601,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
2588 cfq_del_cfqq_rr(cfqd, cfqq); 2601 cfq_del_cfqq_rr(cfqd, cfqq);
2589} 2602}
2590 2603
2591static int cfq_allow_merge(struct request_queue *q, struct request *rq, 2604static int cfq_allow_bio_merge(struct request_queue *q, struct request *rq,
2592 struct bio *bio) 2605 struct bio *bio)
2593{ 2606{
2594 struct cfq_data *cfqd = q->elevator->elevator_data; 2607 struct cfq_data *cfqd = q->elevator->elevator_data;
2595 struct cfq_io_cq *cic; 2608 struct cfq_io_cq *cic;
@@ -2613,9 +2626,15 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
2613 return cfqq == RQ_CFQQ(rq); 2626 return cfqq == RQ_CFQQ(rq);
2614} 2627}
2615 2628
2629static int cfq_allow_rq_merge(struct request_queue *q, struct request *rq,
2630 struct request *next)
2631{
2632 return RQ_CFQQ(rq) == RQ_CFQQ(next);
2633}
2634
2616static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) 2635static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2617{ 2636{
2618 del_timer(&cfqd->idle_slice_timer); 2637 hrtimer_try_to_cancel(&cfqd->idle_slice_timer);
2619 cfqg_stats_update_idle_time(cfqq->cfqg); 2638 cfqg_stats_update_idle_time(cfqq->cfqg);
2620} 2639}
2621 2640
@@ -2627,7 +2646,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
2627 cfqd->serving_wl_class, cfqd->serving_wl_type); 2646 cfqd->serving_wl_class, cfqd->serving_wl_type);
2628 cfqg_stats_update_avg_queue_size(cfqq->cfqg); 2647 cfqg_stats_update_avg_queue_size(cfqq->cfqg);
2629 cfqq->slice_start = 0; 2648 cfqq->slice_start = 0;
2630 cfqq->dispatch_start = jiffies; 2649 cfqq->dispatch_start = ktime_get_ns();
2631 cfqq->allocated_slice = 0; 2650 cfqq->allocated_slice = 0;
2632 cfqq->slice_end = 0; 2651 cfqq->slice_end = 0;
2633 cfqq->slice_dispatch = 0; 2652 cfqq->slice_dispatch = 0;
@@ -2676,8 +2695,8 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2676 if (cfq_cfqq_slice_new(cfqq)) 2695 if (cfq_cfqq_slice_new(cfqq))
2677 cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq); 2696 cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq);
2678 else 2697 else
2679 cfqq->slice_resid = cfqq->slice_end - jiffies; 2698 cfqq->slice_resid = cfqq->slice_end - ktime_get_ns();
2680 cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); 2699 cfq_log_cfqq(cfqd, cfqq, "resid=%lld", cfqq->slice_resid);
2681 } 2700 }
2682 2701
2683 cfq_group_served(cfqd, cfqq->cfqg, cfqq); 2702 cfq_group_served(cfqd, cfqq->cfqg, cfqq);
@@ -2911,7 +2930,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
2911 struct cfq_queue *cfqq = cfqd->active_queue; 2930 struct cfq_queue *cfqq = cfqd->active_queue;
2912 struct cfq_rb_root *st = cfqq->service_tree; 2931 struct cfq_rb_root *st = cfqq->service_tree;
2913 struct cfq_io_cq *cic; 2932 struct cfq_io_cq *cic;
2914 unsigned long sl, group_idle = 0; 2933 u64 sl, group_idle = 0;
2934 u64 now = ktime_get_ns();
2915 2935
2916 /* 2936 /*
2917 * SSD device without seek penalty, disable idling. But only do so 2937 * SSD device without seek penalty, disable idling. But only do so
@@ -2954,8 +2974,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
2954 * time slice. 2974 * time slice.
2955 */ 2975 */
2956 if (sample_valid(cic->ttime.ttime_samples) && 2976 if (sample_valid(cic->ttime.ttime_samples) &&
2957 (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) { 2977 (cfqq->slice_end - now < cic->ttime.ttime_mean)) {
2958 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu", 2978 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%llu",
2959 cic->ttime.ttime_mean); 2979 cic->ttime.ttime_mean);
2960 return; 2980 return;
2961 } 2981 }
@@ -2976,9 +2996,10 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
2976 else 2996 else
2977 sl = cfqd->cfq_slice_idle; 2997 sl = cfqd->cfq_slice_idle;
2978 2998
2979 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 2999 hrtimer_start(&cfqd->idle_slice_timer, ns_to_ktime(sl),
3000 HRTIMER_MODE_REL);
2980 cfqg_stats_set_start_idle_time(cfqq->cfqg); 3001 cfqg_stats_set_start_idle_time(cfqq->cfqg);
2981 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl, 3002 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %llu group_idle: %d", sl,
2982 group_idle ? 1 : 0); 3003 group_idle ? 1 : 0);
2983} 3004}
2984 3005
@@ -3018,7 +3039,7 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
3018 return NULL; 3039 return NULL;
3019 3040
3020 rq = rq_entry_fifo(cfqq->fifo.next); 3041 rq = rq_entry_fifo(cfqq->fifo.next);
3021 if (time_before(jiffies, rq->fifo_time)) 3042 if (ktime_get_ns() < rq->fifo_time)
3022 rq = NULL; 3043 rq = NULL;
3023 3044
3024 cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq); 3045 cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
@@ -3096,14 +3117,14 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
3096 struct cfq_queue *queue; 3117 struct cfq_queue *queue;
3097 int i; 3118 int i;
3098 bool key_valid = false; 3119 bool key_valid = false;
3099 unsigned long lowest_key = 0; 3120 u64 lowest_key = 0;
3100 enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD; 3121 enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
3101 3122
3102 for (i = 0; i <= SYNC_WORKLOAD; ++i) { 3123 for (i = 0; i <= SYNC_WORKLOAD; ++i) {
3103 /* select the one with lowest rb_key */ 3124 /* select the one with lowest rb_key */
3104 queue = cfq_rb_first(st_for(cfqg, wl_class, i)); 3125 queue = cfq_rb_first(st_for(cfqg, wl_class, i));
3105 if (queue && 3126 if (queue &&
3106 (!key_valid || time_before(queue->rb_key, lowest_key))) { 3127 (!key_valid || queue->rb_key < lowest_key)) {
3107 lowest_key = queue->rb_key; 3128 lowest_key = queue->rb_key;
3108 cur_best = i; 3129 cur_best = i;
3109 key_valid = true; 3130 key_valid = true;
@@ -3116,11 +3137,12 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
3116static void 3137static void
3117choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg) 3138choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
3118{ 3139{
3119 unsigned slice; 3140 u64 slice;
3120 unsigned count; 3141 unsigned count;
3121 struct cfq_rb_root *st; 3142 struct cfq_rb_root *st;
3122 unsigned group_slice; 3143 u64 group_slice;
3123 enum wl_class_t original_class = cfqd->serving_wl_class; 3144 enum wl_class_t original_class = cfqd->serving_wl_class;
3145 u64 now = ktime_get_ns();
3124 3146
3125 /* Choose next priority. RT > BE > IDLE */ 3147 /* Choose next priority. RT > BE > IDLE */
3126 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) 3148 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
@@ -3129,7 +3151,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
3129 cfqd->serving_wl_class = BE_WORKLOAD; 3151 cfqd->serving_wl_class = BE_WORKLOAD;
3130 else { 3152 else {
3131 cfqd->serving_wl_class = IDLE_WORKLOAD; 3153 cfqd->serving_wl_class = IDLE_WORKLOAD;
3132 cfqd->workload_expires = jiffies + 1; 3154 cfqd->workload_expires = now + jiffies_to_nsecs(1);
3133 return; 3155 return;
3134 } 3156 }
3135 3157
@@ -3147,7 +3169,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
3147 /* 3169 /*
3148 * check workload expiration, and that we still have other queues ready 3170 * check workload expiration, and that we still have other queues ready
3149 */ 3171 */
3150 if (count && !time_after(jiffies, cfqd->workload_expires)) 3172 if (count && !(now > cfqd->workload_expires))
3151 return; 3173 return;
3152 3174
3153new_workload: 3175new_workload:
@@ -3164,13 +3186,13 @@ new_workload:
3164 */ 3186 */
3165 group_slice = cfq_group_slice(cfqd, cfqg); 3187 group_slice = cfq_group_slice(cfqd, cfqg);
3166 3188
3167 slice = group_slice * count / 3189 slice = div_u64(group_slice * count,
3168 max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class], 3190 max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class],
3169 cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd, 3191 cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd,
3170 cfqg)); 3192 cfqg)));
3171 3193
3172 if (cfqd->serving_wl_type == ASYNC_WORKLOAD) { 3194 if (cfqd->serving_wl_type == ASYNC_WORKLOAD) {
3173 unsigned int tmp; 3195 u64 tmp;
3174 3196
3175 /* 3197 /*
3176 * Async queues are currently system wide. Just taking 3198 * Async queues are currently system wide. Just taking
@@ -3181,19 +3203,19 @@ new_workload:
3181 */ 3203 */
3182 tmp = cfqd->cfq_target_latency * 3204 tmp = cfqd->cfq_target_latency *
3183 cfqg_busy_async_queues(cfqd, cfqg); 3205 cfqg_busy_async_queues(cfqd, cfqg);
3184 tmp = tmp/cfqd->busy_queues; 3206 tmp = div_u64(tmp, cfqd->busy_queues);
3185 slice = min_t(unsigned, slice, tmp); 3207 slice = min_t(u64, slice, tmp);
3186 3208
3187 /* async workload slice is scaled down according to 3209 /* async workload slice is scaled down according to
3188 * the sync/async slice ratio. */ 3210 * the sync/async slice ratio. */
3189 slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1]; 3211 slice = div64_u64(slice*cfqd->cfq_slice[0], cfqd->cfq_slice[1]);
3190 } else 3212 } else
3191 /* sync workload slice is at least 2 * cfq_slice_idle */ 3213 /* sync workload slice is at least 2 * cfq_slice_idle */
3192 slice = max(slice, 2 * cfqd->cfq_slice_idle); 3214 slice = max(slice, 2 * cfqd->cfq_slice_idle);
3193 3215
3194 slice = max_t(unsigned, slice, CFQ_MIN_TT); 3216 slice = max_t(u64, slice, CFQ_MIN_TT);
3195 cfq_log(cfqd, "workload slice:%d", slice); 3217 cfq_log(cfqd, "workload slice:%llu", slice);
3196 cfqd->workload_expires = jiffies + slice; 3218 cfqd->workload_expires = now + slice;
3197} 3219}
3198 3220
3199static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) 3221static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
@@ -3211,16 +3233,17 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
3211static void cfq_choose_cfqg(struct cfq_data *cfqd) 3233static void cfq_choose_cfqg(struct cfq_data *cfqd)
3212{ 3234{
3213 struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd); 3235 struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
3236 u64 now = ktime_get_ns();
3214 3237
3215 cfqd->serving_group = cfqg; 3238 cfqd->serving_group = cfqg;
3216 3239
3217 /* Restore the workload type data */ 3240 /* Restore the workload type data */
3218 if (cfqg->saved_wl_slice) { 3241 if (cfqg->saved_wl_slice) {
3219 cfqd->workload_expires = jiffies + cfqg->saved_wl_slice; 3242 cfqd->workload_expires = now + cfqg->saved_wl_slice;
3220 cfqd->serving_wl_type = cfqg->saved_wl_type; 3243 cfqd->serving_wl_type = cfqg->saved_wl_type;
3221 cfqd->serving_wl_class = cfqg->saved_wl_class; 3244 cfqd->serving_wl_class = cfqg->saved_wl_class;
3222 } else 3245 } else
3223 cfqd->workload_expires = jiffies - 1; 3246 cfqd->workload_expires = now - 1;
3224 3247
3225 choose_wl_class_and_type(cfqd, cfqg); 3248 choose_wl_class_and_type(cfqd, cfqg);
3226} 3249}
@@ -3232,6 +3255,7 @@ static void cfq_choose_cfqg(struct cfq_data *cfqd)
3232static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) 3255static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
3233{ 3256{
3234 struct cfq_queue *cfqq, *new_cfqq = NULL; 3257 struct cfq_queue *cfqq, *new_cfqq = NULL;
3258 u64 now = ktime_get_ns();
3235 3259
3236 cfqq = cfqd->active_queue; 3260 cfqq = cfqd->active_queue;
3237 if (!cfqq) 3261 if (!cfqq)
@@ -3292,7 +3316,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
3292 * flight or is idling for a new request, allow either of these 3316 * flight or is idling for a new request, allow either of these
3293 * conditions to happen (or time out) before selecting a new queue. 3317 * conditions to happen (or time out) before selecting a new queue.
3294 */ 3318 */
3295 if (timer_pending(&cfqd->idle_slice_timer)) { 3319 if (hrtimer_active(&cfqd->idle_slice_timer)) {
3296 cfqq = NULL; 3320 cfqq = NULL;
3297 goto keep_queue; 3321 goto keep_queue;
3298 } 3322 }
@@ -3303,7 +3327,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
3303 **/ 3327 **/
3304 if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) && 3328 if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
3305 (cfq_cfqq_slice_new(cfqq) || 3329 (cfq_cfqq_slice_new(cfqq) ||
3306 (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) { 3330 (cfqq->slice_end - now > now - cfqq->slice_start))) {
3307 cfq_clear_cfqq_deep(cfqq); 3331 cfq_clear_cfqq_deep(cfqq);
3308 cfq_clear_cfqq_idle_window(cfqq); 3332 cfq_clear_cfqq_idle_window(cfqq);
3309 } 3333 }
@@ -3381,11 +3405,12 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
3381static inline bool cfq_slice_used_soon(struct cfq_data *cfqd, 3405static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
3382 struct cfq_queue *cfqq) 3406 struct cfq_queue *cfqq)
3383{ 3407{
3408 u64 now = ktime_get_ns();
3409
3384 /* the queue hasn't finished any request, can't estimate */ 3410 /* the queue hasn't finished any request, can't estimate */
3385 if (cfq_cfqq_slice_new(cfqq)) 3411 if (cfq_cfqq_slice_new(cfqq))
3386 return true; 3412 return true;
3387 if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, 3413 if (now + cfqd->cfq_slice_idle * cfqq->dispatched > cfqq->slice_end)
3388 cfqq->slice_end))
3389 return true; 3414 return true;
3390 3415
3391 return false; 3416 return false;
@@ -3460,10 +3485,10 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3460 * based on the last sync IO we serviced 3485 * based on the last sync IO we serviced
3461 */ 3486 */
3462 if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) { 3487 if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
3463 unsigned long last_sync = jiffies - cfqd->last_delayed_sync; 3488 u64 last_sync = ktime_get_ns() - cfqd->last_delayed_sync;
3464 unsigned int depth; 3489 unsigned int depth;
3465 3490
3466 depth = last_sync / cfqd->cfq_slice[1]; 3491 depth = div64_u64(last_sync, cfqd->cfq_slice[1]);
3467 if (!depth && !cfqq->dispatched) 3492 if (!depth && !cfqq->dispatched)
3468 depth = 1; 3493 depth = 1;
3469 if (depth < max_dispatch) 3494 if (depth < max_dispatch)
@@ -3546,7 +3571,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
3546 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) && 3571 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
3547 cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) || 3572 cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
3548 cfq_class_idle(cfqq))) { 3573 cfq_class_idle(cfqq))) {
3549 cfqq->slice_end = jiffies + 1; 3574 cfqq->slice_end = ktime_get_ns() + 1;
3550 cfq_slice_expired(cfqd, 0); 3575 cfq_slice_expired(cfqd, 0);
3551 } 3576 }
3552 3577
@@ -3624,7 +3649,7 @@ static void cfq_init_icq(struct io_cq *icq)
3624{ 3649{
3625 struct cfq_io_cq *cic = icq_to_cic(icq); 3650 struct cfq_io_cq *cic = icq_to_cic(icq);
3626 3651
3627 cic->ttime.last_end_request = jiffies; 3652 cic->ttime.last_end_request = ktime_get_ns();
3628} 3653}
3629 3654
3630static void cfq_exit_icq(struct io_cq *icq) 3655static void cfq_exit_icq(struct io_cq *icq)
@@ -3682,6 +3707,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct cfq_io_cq *cic)
3682 * elevate the priority of this queue 3707 * elevate the priority of this queue
3683 */ 3708 */
3684 cfqq->org_ioprio = cfqq->ioprio; 3709 cfqq->org_ioprio = cfqq->ioprio;
3710 cfqq->org_ioprio_class = cfqq->ioprio_class;
3685 cfq_clear_cfqq_prio_changed(cfqq); 3711 cfq_clear_cfqq_prio_changed(cfqq);
3686} 3712}
3687 3713
@@ -3845,14 +3871,15 @@ out:
3845} 3871}
3846 3872
3847static void 3873static void
3848__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle) 3874__cfq_update_io_thinktime(struct cfq_ttime *ttime, u64 slice_idle)
3849{ 3875{
3850 unsigned long elapsed = jiffies - ttime->last_end_request; 3876 u64 elapsed = ktime_get_ns() - ttime->last_end_request;
3851 elapsed = min(elapsed, 2UL * slice_idle); 3877 elapsed = min(elapsed, 2UL * slice_idle);
3852 3878
3853 ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8; 3879 ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
3854 ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8; 3880 ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed, 8);
3855 ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples; 3881 ttime->ttime_mean = div64_ul(ttime->ttime_total + 128,
3882 ttime->ttime_samples);
3856} 3883}
3857 3884
3858static void 3885static void
@@ -4105,10 +4132,10 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
4105 cfq_log_cfqq(cfqd, cfqq, "insert_request"); 4132 cfq_log_cfqq(cfqd, cfqq, "insert_request");
4106 cfq_init_prio_data(cfqq, RQ_CIC(rq)); 4133 cfq_init_prio_data(cfqq, RQ_CIC(rq));
4107 4134
4108 rq->fifo_time = jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]; 4135 rq->fifo_time = ktime_get_ns() + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
4109 list_add_tail(&rq->queuelist, &cfqq->fifo); 4136 list_add_tail(&rq->queuelist, &cfqq->fifo);
4110 cfq_add_rq_rb(rq); 4137 cfq_add_rq_rb(rq);
4111 cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, 4138 cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, req_op(rq),
4112 rq->cmd_flags); 4139 rq->cmd_flags);
4113 cfq_rq_enqueued(cfqd, cfqq, rq); 4140 cfq_rq_enqueued(cfqd, cfqq, rq);
4114} 4141}
@@ -4153,6 +4180,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd)
4153static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) 4180static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
4154{ 4181{
4155 struct cfq_io_cq *cic = cfqd->active_cic; 4182 struct cfq_io_cq *cic = cfqd->active_cic;
4183 u64 now = ktime_get_ns();
4156 4184
4157 /* If the queue already has requests, don't wait */ 4185 /* If the queue already has requests, don't wait */
4158 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) 4186 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
@@ -4171,7 +4199,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
4171 4199
4172 /* if slice left is less than think time, wait busy */ 4200 /* if slice left is less than think time, wait busy */
4173 if (cic && sample_valid(cic->ttime.ttime_samples) 4201 if (cic && sample_valid(cic->ttime.ttime_samples)
4174 && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) 4202 && (cfqq->slice_end - now < cic->ttime.ttime_mean))
4175 return true; 4203 return true;
4176 4204
4177 /* 4205 /*
@@ -4181,7 +4209,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
4181 * case where think time is less than a jiffy, mark the queue wait 4209 * case where think time is less than a jiffy, mark the queue wait
4182 * busy if only 1 jiffy is left in the slice. 4210 * busy if only 1 jiffy is left in the slice.
4183 */ 4211 */
4184 if (cfqq->slice_end - jiffies == 1) 4212 if (cfqq->slice_end - now <= jiffies_to_nsecs(1))
4185 return true; 4213 return true;
4186 4214
4187 return false; 4215 return false;
@@ -4192,9 +4220,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
4192 struct cfq_queue *cfqq = RQ_CFQQ(rq); 4220 struct cfq_queue *cfqq = RQ_CFQQ(rq);
4193 struct cfq_data *cfqd = cfqq->cfqd; 4221 struct cfq_data *cfqd = cfqq->cfqd;
4194 const int sync = rq_is_sync(rq); 4222 const int sync = rq_is_sync(rq);
4195 unsigned long now; 4223 u64 now = ktime_get_ns();
4196 4224
4197 now = jiffies;
4198 cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", 4225 cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d",
4199 !!(rq->cmd_flags & REQ_NOIDLE)); 4226 !!(rq->cmd_flags & REQ_NOIDLE));
4200 4227
@@ -4206,7 +4233,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
4206 cfqq->dispatched--; 4233 cfqq->dispatched--;
4207 (RQ_CFQG(rq))->dispatched--; 4234 (RQ_CFQG(rq))->dispatched--;
4208 cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq), 4235 cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq),
4209 rq_io_start_time_ns(rq), rq->cmd_flags); 4236 rq_io_start_time_ns(rq), req_op(rq),
4237 rq->cmd_flags);
4210 4238
4211 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; 4239 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
4212 4240
@@ -4222,7 +4250,16 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
4222 cfqq_type(cfqq)); 4250 cfqq_type(cfqq));
4223 4251
4224 st->ttime.last_end_request = now; 4252 st->ttime.last_end_request = now;
4225 if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now)) 4253 /*
4254 * We have to do this check in jiffies since start_time is in
4255 * jiffies and it is not trivial to convert to ns. If
4256 * cfq_fifo_expire[1] ever comes close to 1 jiffie, this test
4257 * will become problematic but so far we are fine (the default
4258 * is 128 ms).
4259 */
4260 if (!time_after(rq->start_time +
4261 nsecs_to_jiffies(cfqd->cfq_fifo_expire[1]),
4262 jiffies))
4226 cfqd->last_delayed_sync = now; 4263 cfqd->last_delayed_sync = now;
4227 } 4264 }
4228 4265
@@ -4247,10 +4284,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
4247 * the queue. 4284 * the queue.
4248 */ 4285 */
4249 if (cfq_should_wait_busy(cfqd, cfqq)) { 4286 if (cfq_should_wait_busy(cfqd, cfqq)) {
4250 unsigned long extend_sl = cfqd->cfq_slice_idle; 4287 u64 extend_sl = cfqd->cfq_slice_idle;
4251 if (!cfqd->cfq_slice_idle) 4288 if (!cfqd->cfq_slice_idle)
4252 extend_sl = cfqd->cfq_group_idle; 4289 extend_sl = cfqd->cfq_group_idle;
4253 cfqq->slice_end = jiffies + extend_sl; 4290 cfqq->slice_end = now + extend_sl;
4254 cfq_mark_cfqq_wait_busy(cfqq); 4291 cfq_mark_cfqq_wait_busy(cfqq);
4255 cfq_log_cfqq(cfqd, cfqq, "will busy wait"); 4292 cfq_log_cfqq(cfqd, cfqq, "will busy wait");
4256 } 4293 }
@@ -4275,6 +4312,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
4275 cfq_schedule_dispatch(cfqd); 4312 cfq_schedule_dispatch(cfqd);
4276} 4313}
4277 4314
4315static void cfqq_boost_on_prio(struct cfq_queue *cfqq, int op_flags)
4316{
4317 /*
4318 * If REQ_PRIO is set, boost class and prio level, if it's below
4319 * BE/NORM. If prio is not set, restore the potentially boosted
4320 * class/prio level.
4321 */
4322 if (!(op_flags & REQ_PRIO)) {
4323 cfqq->ioprio_class = cfqq->org_ioprio_class;
4324 cfqq->ioprio = cfqq->org_ioprio;
4325 } else {
4326 if (cfq_class_idle(cfqq))
4327 cfqq->ioprio_class = IOPRIO_CLASS_BE;
4328 if (cfqq->ioprio > IOPRIO_NORM)
4329 cfqq->ioprio = IOPRIO_NORM;
4330 }
4331}
4332
4278static inline int __cfq_may_queue(struct cfq_queue *cfqq) 4333static inline int __cfq_may_queue(struct cfq_queue *cfqq)
4279{ 4334{
4280 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) { 4335 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -4285,7 +4340,7 @@ static inline int __cfq_may_queue(struct cfq_queue *cfqq)
4285 return ELV_MQUEUE_MAY; 4340 return ELV_MQUEUE_MAY;
4286} 4341}
4287 4342
4288static int cfq_may_queue(struct request_queue *q, int rw) 4343static int cfq_may_queue(struct request_queue *q, int op, int op_flags)
4289{ 4344{
4290 struct cfq_data *cfqd = q->elevator->elevator_data; 4345 struct cfq_data *cfqd = q->elevator->elevator_data;
4291 struct task_struct *tsk = current; 4346 struct task_struct *tsk = current;
@@ -4302,9 +4357,10 @@ static int cfq_may_queue(struct request_queue *q, int rw)
4302 if (!cic) 4357 if (!cic)
4303 return ELV_MQUEUE_MAY; 4358 return ELV_MQUEUE_MAY;
4304 4359
4305 cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); 4360 cfqq = cic_to_cfqq(cic, rw_is_sync(op, op_flags));
4306 if (cfqq) { 4361 if (cfqq) {
4307 cfq_init_prio_data(cfqq, cic); 4362 cfq_init_prio_data(cfqq, cic);
4363 cfqq_boost_on_prio(cfqq, op_flags);
4308 4364
4309 return __cfq_may_queue(cfqq); 4365 return __cfq_may_queue(cfqq);
4310 } 4366 }
@@ -4435,9 +4491,10 @@ static void cfq_kick_queue(struct work_struct *work)
4435/* 4491/*
4436 * Timer running if the active_queue is currently idling inside its time slice 4492 * Timer running if the active_queue is currently idling inside its time slice
4437 */ 4493 */
4438static void cfq_idle_slice_timer(unsigned long data) 4494static enum hrtimer_restart cfq_idle_slice_timer(struct hrtimer *timer)
4439{ 4495{
4440 struct cfq_data *cfqd = (struct cfq_data *) data; 4496 struct cfq_data *cfqd = container_of(timer, struct cfq_data,
4497 idle_slice_timer);
4441 struct cfq_queue *cfqq; 4498 struct cfq_queue *cfqq;
4442 unsigned long flags; 4499 unsigned long flags;
4443 int timed_out = 1; 4500 int timed_out = 1;
@@ -4486,11 +4543,12 @@ out_kick:
4486 cfq_schedule_dispatch(cfqd); 4543 cfq_schedule_dispatch(cfqd);
4487out_cont: 4544out_cont:
4488 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 4545 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
4546 return HRTIMER_NORESTART;
4489} 4547}
4490 4548
4491static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) 4549static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
4492{ 4550{
4493 del_timer_sync(&cfqd->idle_slice_timer); 4551 hrtimer_cancel(&cfqd->idle_slice_timer);
4494 cancel_work_sync(&cfqd->unplug_work); 4552 cancel_work_sync(&cfqd->unplug_work);
4495} 4553}
4496 4554
@@ -4586,9 +4644,9 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e)
4586 cfqg_put(cfqd->root_group); 4644 cfqg_put(cfqd->root_group);
4587 spin_unlock_irq(q->queue_lock); 4645 spin_unlock_irq(q->queue_lock);
4588 4646
4589 init_timer(&cfqd->idle_slice_timer); 4647 hrtimer_init(&cfqd->idle_slice_timer, CLOCK_MONOTONIC,
4648 HRTIMER_MODE_REL);
4590 cfqd->idle_slice_timer.function = cfq_idle_slice_timer; 4649 cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
4591 cfqd->idle_slice_timer.data = (unsigned long) cfqd;
4592 4650
4593 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue); 4651 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
4594 4652
@@ -4609,7 +4667,7 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e)
4609 * we optimistically start assuming sync ops weren't delayed in last 4667 * we optimistically start assuming sync ops weren't delayed in last
4610 * second, in order to have larger depth for async operations. 4668 * second, in order to have larger depth for async operations.
4611 */ 4669 */
4612 cfqd->last_delayed_sync = jiffies - HZ; 4670 cfqd->last_delayed_sync = ktime_get_ns() - NSEC_PER_SEC;
4613 return 0; 4671 return 0;
4614 4672
4615out_free: 4673out_free:
@@ -4652,9 +4710,9 @@ cfq_var_store(unsigned int *var, const char *page, size_t count)
4652static ssize_t __FUNC(struct elevator_queue *e, char *page) \ 4710static ssize_t __FUNC(struct elevator_queue *e, char *page) \
4653{ \ 4711{ \
4654 struct cfq_data *cfqd = e->elevator_data; \ 4712 struct cfq_data *cfqd = e->elevator_data; \
4655 unsigned int __data = __VAR; \ 4713 u64 __data = __VAR; \
4656 if (__CONV) \ 4714 if (__CONV) \
4657 __data = jiffies_to_msecs(__data); \ 4715 __data = div_u64(__data, NSEC_PER_MSEC); \
4658 return cfq_var_show(__data, (page)); \ 4716 return cfq_var_show(__data, (page)); \
4659} 4717}
4660SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); 4718SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
@@ -4671,6 +4729,21 @@ SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
4671SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1); 4729SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1);
4672#undef SHOW_FUNCTION 4730#undef SHOW_FUNCTION
4673 4731
4732#define USEC_SHOW_FUNCTION(__FUNC, __VAR) \
4733static ssize_t __FUNC(struct elevator_queue *e, char *page) \
4734{ \
4735 struct cfq_data *cfqd = e->elevator_data; \
4736 u64 __data = __VAR; \
4737 __data = div_u64(__data, NSEC_PER_USEC); \
4738 return cfq_var_show(__data, (page)); \
4739}
4740USEC_SHOW_FUNCTION(cfq_slice_idle_us_show, cfqd->cfq_slice_idle);
4741USEC_SHOW_FUNCTION(cfq_group_idle_us_show, cfqd->cfq_group_idle);
4742USEC_SHOW_FUNCTION(cfq_slice_sync_us_show, cfqd->cfq_slice[1]);
4743USEC_SHOW_FUNCTION(cfq_slice_async_us_show, cfqd->cfq_slice[0]);
4744USEC_SHOW_FUNCTION(cfq_target_latency_us_show, cfqd->cfq_target_latency);
4745#undef USEC_SHOW_FUNCTION
4746
4674#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 4747#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
4675static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ 4748static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
4676{ \ 4749{ \
@@ -4682,7 +4755,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
4682 else if (__data > (MAX)) \ 4755 else if (__data > (MAX)) \
4683 __data = (MAX); \ 4756 __data = (MAX); \
4684 if (__CONV) \ 4757 if (__CONV) \
4685 *(__PTR) = msecs_to_jiffies(__data); \ 4758 *(__PTR) = (u64)__data * NSEC_PER_MSEC; \
4686 else \ 4759 else \
4687 *(__PTR) = __data; \ 4760 *(__PTR) = __data; \
4688 return ret; \ 4761 return ret; \
@@ -4705,6 +4778,26 @@ STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
4705STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, UINT_MAX, 1); 4778STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, UINT_MAX, 1);
4706#undef STORE_FUNCTION 4779#undef STORE_FUNCTION
4707 4780
4781#define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
4782static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
4783{ \
4784 struct cfq_data *cfqd = e->elevator_data; \
4785 unsigned int __data; \
4786 int ret = cfq_var_store(&__data, (page), count); \
4787 if (__data < (MIN)) \
4788 __data = (MIN); \
4789 else if (__data > (MAX)) \
4790 __data = (MAX); \
4791 *(__PTR) = (u64)__data * NSEC_PER_USEC; \
4792 return ret; \
4793}
4794USEC_STORE_FUNCTION(cfq_slice_idle_us_store, &cfqd->cfq_slice_idle, 0, UINT_MAX);
4795USEC_STORE_FUNCTION(cfq_group_idle_us_store, &cfqd->cfq_group_idle, 0, UINT_MAX);
4796USEC_STORE_FUNCTION(cfq_slice_sync_us_store, &cfqd->cfq_slice[1], 1, UINT_MAX);
4797USEC_STORE_FUNCTION(cfq_slice_async_us_store, &cfqd->cfq_slice[0], 1, UINT_MAX);
4798USEC_STORE_FUNCTION(cfq_target_latency_us_store, &cfqd->cfq_target_latency, 1, UINT_MAX);
4799#undef USEC_STORE_FUNCTION
4800
4708#define CFQ_ATTR(name) \ 4801#define CFQ_ATTR(name) \
4709 __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store) 4802 __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
4710 4803
@@ -4715,12 +4808,17 @@ static struct elv_fs_entry cfq_attrs[] = {
4715 CFQ_ATTR(back_seek_max), 4808 CFQ_ATTR(back_seek_max),
4716 CFQ_ATTR(back_seek_penalty), 4809 CFQ_ATTR(back_seek_penalty),
4717 CFQ_ATTR(slice_sync), 4810 CFQ_ATTR(slice_sync),
4811 CFQ_ATTR(slice_sync_us),
4718 CFQ_ATTR(slice_async), 4812 CFQ_ATTR(slice_async),
4813 CFQ_ATTR(slice_async_us),
4719 CFQ_ATTR(slice_async_rq), 4814 CFQ_ATTR(slice_async_rq),
4720 CFQ_ATTR(slice_idle), 4815 CFQ_ATTR(slice_idle),
4816 CFQ_ATTR(slice_idle_us),
4721 CFQ_ATTR(group_idle), 4817 CFQ_ATTR(group_idle),
4818 CFQ_ATTR(group_idle_us),
4722 CFQ_ATTR(low_latency), 4819 CFQ_ATTR(low_latency),
4723 CFQ_ATTR(target_latency), 4820 CFQ_ATTR(target_latency),
4821 CFQ_ATTR(target_latency_us),
4724 __ATTR_NULL 4822 __ATTR_NULL
4725}; 4823};
4726 4824
@@ -4729,7 +4827,8 @@ static struct elevator_type iosched_cfq = {
4729 .elevator_merge_fn = cfq_merge, 4827 .elevator_merge_fn = cfq_merge,
4730 .elevator_merged_fn = cfq_merged_request, 4828 .elevator_merged_fn = cfq_merged_request,
4731 .elevator_merge_req_fn = cfq_merged_requests, 4829 .elevator_merge_req_fn = cfq_merged_requests,
4732 .elevator_allow_merge_fn = cfq_allow_merge, 4830 .elevator_allow_bio_merge_fn = cfq_allow_bio_merge,
4831 .elevator_allow_rq_merge_fn = cfq_allow_rq_merge,
4733 .elevator_bio_merged_fn = cfq_bio_merged, 4832 .elevator_bio_merged_fn = cfq_bio_merged,
4734 .elevator_dispatch_fn = cfq_dispatch_requests, 4833 .elevator_dispatch_fn = cfq_dispatch_requests,
4735 .elevator_add_req_fn = cfq_insert_request, 4834 .elevator_add_req_fn = cfq_insert_request,
@@ -4776,18 +4875,7 @@ static int __init cfq_init(void)
4776{ 4875{
4777 int ret; 4876 int ret;
4778 4877
4779 /*
4780 * could be 0 on HZ < 1000 setups
4781 */
4782 if (!cfq_slice_async)
4783 cfq_slice_async = 1;
4784 if (!cfq_slice_idle)
4785 cfq_slice_idle = 1;
4786
4787#ifdef CONFIG_CFQ_GROUP_IOSCHED 4878#ifdef CONFIG_CFQ_GROUP_IOSCHED
4788 if (!cfq_group_idle)
4789 cfq_group_idle = 1;
4790
4791 ret = blkcg_policy_register(&blkcg_policy_cfq); 4879 ret = blkcg_policy_register(&blkcg_policy_cfq);
4792 if (ret) 4880 if (ret)
4793 return ret; 4881 return ret;
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index d0dd7882d8c7..55e0bb6d7da7 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -137,7 +137,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
137 if (__rq) { 137 if (__rq) {
138 BUG_ON(sector != blk_rq_pos(__rq)); 138 BUG_ON(sector != blk_rq_pos(__rq));
139 139
140 if (elv_rq_merge_ok(__rq, bio)) { 140 if (elv_bio_merge_ok(__rq, bio)) {
141 ret = ELEVATOR_FRONT_MERGE; 141 ret = ELEVATOR_FRONT_MERGE;
142 goto out; 142 goto out;
143 } 143 }
@@ -173,7 +173,8 @@ deadline_merged_requests(struct request_queue *q, struct request *req,
173 * and move into next position (next will be deleted) in fifo 173 * and move into next position (next will be deleted) in fifo
174 */ 174 */
175 if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) { 175 if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
176 if (time_before(next->fifo_time, req->fifo_time)) { 176 if (time_before((unsigned long)next->fifo_time,
177 (unsigned long)req->fifo_time)) {
177 list_move(&req->queuelist, &next->queuelist); 178 list_move(&req->queuelist, &next->queuelist);
178 req->fifo_time = next->fifo_time; 179 req->fifo_time = next->fifo_time;
179 } 180 }
@@ -227,7 +228,7 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
227 /* 228 /*
228 * rq is expired! 229 * rq is expired!
229 */ 230 */
230 if (time_after_eq(jiffies, rq->fifo_time)) 231 if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
231 return 1; 232 return 1;
232 233
233 return 0; 234 return 0;
diff --git a/block/elevator.c b/block/elevator.c
index c3555c9c672f..7096c22041e7 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -53,13 +53,13 @@ static LIST_HEAD(elv_list);
53 * Query io scheduler to see if the current process issuing bio may be 53 * Query io scheduler to see if the current process issuing bio may be
54 * merged with rq. 54 * merged with rq.
55 */ 55 */
56static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) 56static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
57{ 57{
58 struct request_queue *q = rq->q; 58 struct request_queue *q = rq->q;
59 struct elevator_queue *e = q->elevator; 59 struct elevator_queue *e = q->elevator;
60 60
61 if (e->type->ops.elevator_allow_merge_fn) 61 if (e->type->ops.elevator_allow_bio_merge_fn)
62 return e->type->ops.elevator_allow_merge_fn(q, rq, bio); 62 return e->type->ops.elevator_allow_bio_merge_fn(q, rq, bio);
63 63
64 return 1; 64 return 1;
65} 65}
@@ -67,17 +67,17 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
67/* 67/*
68 * can we safely merge with this request? 68 * can we safely merge with this request?
69 */ 69 */
70bool elv_rq_merge_ok(struct request *rq, struct bio *bio) 70bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
71{ 71{
72 if (!blk_rq_merge_ok(rq, bio)) 72 if (!blk_rq_merge_ok(rq, bio))
73 return 0; 73 return false;
74 74
75 if (!elv_iosched_allow_merge(rq, bio)) 75 if (!elv_iosched_allow_bio_merge(rq, bio))
76 return 0; 76 return false;
77 77
78 return 1; 78 return true;
79} 79}
80EXPORT_SYMBOL(elv_rq_merge_ok); 80EXPORT_SYMBOL(elv_bio_merge_ok);
81 81
82static struct elevator_type *elevator_find(const char *name) 82static struct elevator_type *elevator_find(const char *name)
83{ 83{
@@ -366,8 +366,7 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
366 list_for_each_prev(entry, &q->queue_head) { 366 list_for_each_prev(entry, &q->queue_head) {
367 struct request *pos = list_entry_rq(entry); 367 struct request *pos = list_entry_rq(entry);
368 368
369 if ((rq->cmd_flags & REQ_DISCARD) != 369 if ((req_op(rq) == REQ_OP_DISCARD) != (req_op(pos) == REQ_OP_DISCARD))
370 (pos->cmd_flags & REQ_DISCARD))
371 break; 370 break;
372 if (rq_data_dir(rq) != rq_data_dir(pos)) 371 if (rq_data_dir(rq) != rq_data_dir(pos))
373 break; 372 break;
@@ -426,7 +425,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
426 /* 425 /*
427 * First try one-hit cache. 426 * First try one-hit cache.
428 */ 427 */
429 if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) { 428 if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
430 ret = blk_try_merge(q->last_merge, bio); 429 ret = blk_try_merge(q->last_merge, bio);
431 if (ret != ELEVATOR_NO_MERGE) { 430 if (ret != ELEVATOR_NO_MERGE) {
432 *req = q->last_merge; 431 *req = q->last_merge;
@@ -441,7 +440,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
441 * See if our hash lookup can find a potential backmerge. 440 * See if our hash lookup can find a potential backmerge.
442 */ 441 */
443 __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector); 442 __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
444 if (__rq && elv_rq_merge_ok(__rq, bio)) { 443 if (__rq && elv_bio_merge_ok(__rq, bio)) {
445 *req = __rq; 444 *req = __rq;
446 return ELEVATOR_BACK_MERGE; 445 return ELEVATOR_BACK_MERGE;
447 } 446 }
@@ -717,12 +716,12 @@ void elv_put_request(struct request_queue *q, struct request *rq)
717 e->type->ops.elevator_put_req_fn(rq); 716 e->type->ops.elevator_put_req_fn(rq);
718} 717}
719 718
720int elv_may_queue(struct request_queue *q, int rw) 719int elv_may_queue(struct request_queue *q, int op, int op_flags)
721{ 720{
722 struct elevator_queue *e = q->elevator; 721 struct elevator_queue *e = q->elevator;
723 722
724 if (e->type->ops.elevator_may_queue_fn) 723 if (e->type->ops.elevator_may_queue_fn)
725 return e->type->ops.elevator_may_queue_fn(q, rw); 724 return e->type->ops.elevator_may_queue_fn(q, op, op_flags);
726 725
727 return ELV_MQUEUE_MAY; 726 return ELV_MQUEUE_MAY;
728} 727}
diff --git a/block/partition-generic.c b/block/partition-generic.c
index d7eb77e1e3a8..71d9ed9df8da 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -495,7 +495,6 @@ rescan:
495 /* add partitions */ 495 /* add partitions */
496 for (p = 1; p < state->limit; p++) { 496 for (p = 1; p < state->limit; p++) {
497 sector_t size, from; 497 sector_t size, from;
498 struct partition_meta_info *info = NULL;
499 498
500 size = state->parts[p].size; 499 size = state->parts[p].size;
501 if (!size) 500 if (!size)
@@ -530,8 +529,6 @@ rescan:
530 } 529 }
531 } 530 }
532 531
533 if (state->parts[p].has_info)
534 info = &state->parts[p].info;
535 part = add_partition(disk, p, from, size, 532 part = add_partition(disk, p, from, size,
536 state->parts[p].flags, 533 state->parts[p].flags,
537 &state->parts[p].info); 534 &state->parts[p].info);
diff --git a/block/partitions/atari.c b/block/partitions/atari.c
index 9875b05e80a2..ff1fb93712c1 100644
--- a/block/partitions/atari.c
+++ b/block/partitions/atari.c
@@ -42,6 +42,13 @@ int atari_partition(struct parsed_partitions *state)
42 int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */ 42 int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */
43#endif 43#endif
44 44
45 /*
46 * ATARI partition scheme supports 512 lba only. If this is not
47 * the case, bail early to avoid miscalculating hd_size.
48 */
49 if (bdev_logical_block_size(state->bdev) != 512)
50 return 0;
51
45 rs = read_part_sector(state, 0, &sect); 52 rs = read_part_sector(state, 0, &sect);
46 if (!rs) 53 if (!rs)
47 return -1; 54 return -1;