Merge branch 'for-4.8/core' of git://git.kernel.dk/linux-block

Pull core block updates from Jens Axboe: - the big change is the cleanup from Mike Christie, cleaning up our uses of command types and modified flags. This is what will throw some merge conflicts - regression fix for the above for btrfs, from Vincent - following up to the above, better packing of struct request from Christoph - a 2038 fix for blktrace from Arnd - a few trivial/spelling fixes from Bart Van Assche - a front merge check fix from Damien, which could cause issues on SMR drives - Atari partition fix from Gabriel - convert cfq to highres timers, since jiffies isn't granular enough for some devices these days. From Jan and Jeff - CFQ priority boost fix idle classes, from me - cleanup series from Ming, improving our bio/bvec iteration - a direct issue fix for blk-mq from Omar - fix for plug merging not involving the IO scheduler, like we do for other types of merges. From Tahsin - expose DAX type internally and through sysfs. From Toshi and Yigal * 'for-4.8/core' of git://git.kernel.dk/linux-block: (76 commits) block: Fix front merge check block: do not merge requests without consulting with io scheduler block: Fix spelling in a source code comment block: expose QUEUE_FLAG_DAX in sysfs block: add QUEUE_FLAG_DAX for devices to advertise their DAX support Btrfs: fix comparison in __btrfs_map_block() block: atari: Return early for unsupported sector size Doc: block: Fix a typo in queue-sysfs.txt cfq-iosched: Charge at least 1 jiffie instead of 1 ns cfq-iosched: Fix regression in bonnie++ rewrite performance cfq-iosched: Convert slice_resid from u64 to s64 block: Convert fifo_time from ulong to u64 blktrace: avoid using timespec block/blk-cgroup.c: Declare local symbols static block/bio-integrity.c: Add #include "blk.h" block/partition-generic.c: Remove a set-but-not-used variable block: bio: kill BIO_MAX_SIZE cfq-iosched: temporarily boost queue priority for idle classes block: drbd: avoid to use BIO_MAX_SIZE block: bio: remove BIO_MAX_SECTORS ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-07-26 18:03:07 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-07-26 18:03:07 -0400
commit: d05d7f40791ccbb6e543cc5dd6a6aa08fc71d635 (patch)
tree: dc0039fe490a41a70de10d58fe8e6136db46463a /block
parent: 75a442efb1ca613f8d1cc71a32c2c9b0aefae4a5 (diff)
parent: 17007f3994cdb4643355c73f54f0adad006cf59e (diff)
16 files changed, 434 insertions, 312 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 711e4d8de6fa..15d37b1cd500 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -26,6 +26,7 @@
 #include <linux/bio.h>
 #include <linux/workqueue.h>
 #include <linux/slab.h>
+#include "blk.h"
 #define BIP_INLINE_VECS 4
diff --git a/block/bio.c b/block/bio.c
index 0e4aa42bc30d..848cd351513b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -656,16 +656,15 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
        bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
        if (!bio)
                return NULL;
        bio->bi_bdev            = bio_src->bi_bdev;
        bio->bi_rw              = bio_src->bi_rw;
        bio->bi_iter.bi_sector  = bio_src->bi_iter.bi_sector;
        bio->bi_iter.bi_size    = bio_src->bi_iter.bi_size;
-        if (bio->bi_rw & REQ_DISCARD)
+        if (bio_op(bio) == REQ_OP_DISCARD)
                goto integrity_clone;
-        if (bio->bi_rw & REQ_WRITE_SAME) {
+        if (bio_op(bio) == REQ_OP_WRITE_SAME) {
                bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
                goto integrity_clone;
        }
@@ -854,21 +853,20 @@ static void submit_bio_wait_endio(struct bio *bio)
 /**
 * submit_bio_wait - submit a bio, and wait until it completes
- * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
 * @bio: The &struct bio which describes the I/O
 *
 * Simple wrapper around submit_bio(). Returns 0 on success, or the error from
 * bio_endio() on failure.
 */
-int submit_bio_wait(int rw, struct bio *bio)
+int submit_bio_wait(struct bio *bio)
 {
        struct submit_bio_ret ret;
-        rw |= REQ_SYNC;
        init_completion(&ret.event);
        bio->bi_private = &ret;
        bio->bi_end_io = submit_bio_wait_endio;
-        submit_bio(rw, bio);
+        bio->bi_rw |= REQ_SYNC;
+        submit_bio(bio);
        wait_for_completion_io(&ret.event);
        return ret.error;
@@ -1167,7 +1165,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
                goto out_bmd;
        if (iter->type & WRITE)
-                bio->bi_rw |= REQ_WRITE;
+                bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
        ret = 0;
@@ -1337,7 +1335,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
         * set data direction, and check if mapped pages need bouncing
         */
        if (iter->type & WRITE)
-                bio->bi_rw |= REQ_WRITE;
+                bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
        bio_set_flag(bio, BIO_USER_MAPPED);
@@ -1530,7 +1528,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
                bio->bi_private = data;
        } else {
                bio->bi_end_io = bio_copy_kern_endio;
-                bio->bi_rw |= REQ_WRITE;
+                bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
        }
        return bio;
@@ -1785,7 +1783,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
         * Discards need a mutable bio_vec to accommodate the payload
         * required by the DSM TRIM and UNMAP commands.
         */
-        if (bio->bi_rw & REQ_DISCARD)
+        if (bio_op(bio) == REQ_OP_DISCARD)
                split = bio_clone_bioset(bio, gfp, bs);
        else
                split = bio_clone_fast(bio, gfp, bs);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 66e6f1aae02e..dd38e5ced4a3 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -905,7 +905,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
        return 0;
 }
-struct cftype blkcg_files[] = {
+static struct cftype blkcg_files[] = {
        {
                .name = "stat",
                .flags = CFTYPE_NOT_ON_ROOT,
@@ -914,7 +914,7 @@ struct cftype blkcg_files[] = {
        { }     /* terminate */
 };
-struct cftype blkcg_legacy_files[] = {
+static struct cftype blkcg_legacy_files[] = {
        {
                .name = "reset_stats",
                .write_u64 = blkcg_reset_stats,
diff --git a/block/blk-core.c b/block/blk-core.c
index 2475b1c72773..3cfd67d006fb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -959,10 +959,10 @@ static void __freed_request(struct request_list *rl, int sync)
 * A request has just been released.  Account for it, update the full and
 * congestion status, wake up any waiters.   Called under q->queue_lock.
 */
-static void freed_request(struct request_list *rl, unsigned int flags)
+static void freed_request(struct request_list *rl, int op, unsigned int flags)
 {
        struct request_queue *q = rl->q;
-        int sync = rw_is_sync(flags);
+        int sync = rw_is_sync(op, flags);
        q->nr_rqs[sync]--;
        rl->count[sync]--;
@@ -1029,7 +1029,7 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
         * Flush requests do not use the elevator so skip initialization.
         * This allows a request to share the flush and elevator data.
         */
-        if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))
+        if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA))
                return false;
        return true;
@@ -1054,7 +1054,8 @@ static struct io_context *rq_ioc(struct bio *bio)
 /**
 * __get_request - get a free request
 * @rl: request list to allocate from
- * @rw_flags: RW and SYNC flags
+ * @op: REQ_OP_READ/REQ_OP_WRITE
+ * @op_flags: rq_flag_bits
 * @bio: bio to allocate request for (can be %NULL)
 * @gfp_mask: allocation mask
 *
@@ -1065,21 +1066,22 @@ static struct io_context *rq_ioc(struct bio *bio)
 * Returns ERR_PTR on failure, with @q->queue_lock held.
 * Returns request pointer on success, with @q->queue_lock *not held*.
 */
-static struct request *__get_request(struct request_list *rl, int rw_flags,
+static struct request *__get_request(struct request_list *rl, int op,
-                                     struct bio *bio, gfp_t gfp_mask)
+                                     int op_flags, struct bio *bio,
+                                     gfp_t gfp_mask)
 {
        struct request_queue *q = rl->q;
        struct request *rq;
        struct elevator_type *et = q->elevator->type;
        struct io_context *ioc = rq_ioc(bio);
        struct io_cq *icq = NULL;
-        const bool is_sync = rw_is_sync(rw_flags) != 0;
+        const bool is_sync = rw_is_sync(op, op_flags) != 0;
        int may_queue;
        if (unlikely(blk_queue_dying(q)))
                return ERR_PTR(-ENODEV);
-        may_queue = elv_may_queue(q, rw_flags);
+        may_queue = elv_may_queue(q, op, op_flags);
        if (may_queue == ELV_MQUEUE_NO)
                goto rq_starved;
@@ -1123,7 +1125,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
        /*
         * Decide whether the new request will be managed by elevator.  If
-         * so, mark @rw_flags and increment elvpriv.  Non-zero elvpriv will
+         * so, mark @op_flags and increment elvpriv.  Non-zero elvpriv will
         * prevent the current elevator from being destroyed until the new
         * request is freed.  This guarantees icq's won't be destroyed and
         * makes creating new ones safe.
@@ -1132,14 +1134,14 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
         * it will be created after releasing queue_lock.
         */
        if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
-                rw_flags |= REQ_ELVPRIV;
+                op_flags |= REQ_ELVPRIV;
                q->nr_rqs_elvpriv++;
                if (et->icq_cache && ioc)
                        icq = ioc_lookup_icq(ioc, q);
        }
        if (blk_queue_io_stat(q))
-                rw_flags |= REQ_IO_STAT;
+                op_flags |= REQ_IO_STAT;
        spin_unlock_irq(q->queue_lock);
        /* allocate and init request */
@@ -1149,10 +1151,10 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
        blk_rq_init(q, rq);
        blk_rq_set_rl(rq, rl);
-        rq->cmd_flags = rw_flags | REQ_ALLOCED;
+        req_set_op_attrs(rq, op, op_flags | REQ_ALLOCED);
        /* init elvpriv */
-        if (rw_flags & REQ_ELVPRIV) {
+        if (op_flags & REQ_ELVPRIV) {
                if (unlikely(et->icq_cache && !icq)) {
                        if (ioc)
                                icq = ioc_create_icq(ioc, q, gfp_mask);
@@ -1178,7 +1180,7 @@ out:
        if (ioc_batching(q, ioc))
                ioc->nr_batch_requests--;
-        trace_block_getrq(q, bio, rw_flags & 1);
+        trace_block_getrq(q, bio, op);
        return rq;
 fail_elvpriv:
@@ -1208,7 +1210,7 @@ fail_alloc:
         * queue, but this is pretty rare.
         */
        spin_lock_irq(q->queue_lock);
-        freed_request(rl, rw_flags);
+        freed_request(rl, op, op_flags);
        /*
         * in the very unlikely event that allocation failed and no
@@ -1226,7 +1228,8 @@ rq_starved:
 /**
 * get_request - get a free request
 * @q: request_queue to allocate request from
- * @rw_flags: RW and SYNC flags
+ * @op: REQ_OP_READ/REQ_OP_WRITE
+ * @op_flags: rq_flag_bits
 * @bio: bio to allocate request for (can be %NULL)
 * @gfp_mask: allocation mask
 *
@@ -1237,17 +1240,18 @@ rq_starved:
 * Returns ERR_PTR on failure, with @q->queue_lock held.
 * Returns request pointer on success, with @q->queue_lock *not held*.
 */
-static struct request *get_request(struct request_queue *q, int rw_flags,
+static struct request *get_request(struct request_queue *q, int op,
-                                   struct bio *bio, gfp_t gfp_mask)
+                                   int op_flags, struct bio *bio,
+                                   gfp_t gfp_mask)
 {
-        const bool is_sync = rw_is_sync(rw_flags) != 0;
+        const bool is_sync = rw_is_sync(op, op_flags) != 0;
        DEFINE_WAIT(wait);
        struct request_list *rl;
        struct request *rq;
        rl = blk_get_rl(q, bio);        /* transferred to @rq on success */
 retry:
-        rq = __get_request(rl, rw_flags, bio, gfp_mask);
+        rq = __get_request(rl, op, op_flags, bio, gfp_mask);
        if (!IS_ERR(rq))
                return rq;
@@ -1260,7 +1264,7 @@ retry:
        prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
                                  TASK_UNINTERRUPTIBLE);
-        trace_block_sleeprq(q, bio, rw_flags & 1);
+        trace_block_sleeprq(q, bio, op);
        spin_unlock_irq(q->queue_lock);
        io_schedule();
@@ -1289,7 +1293,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
        create_io_context(gfp_mask, q->node);
        spin_lock_irq(q->queue_lock);
-        rq = get_request(q, rw, NULL, gfp_mask);
+        rq = get_request(q, rw, 0, NULL, gfp_mask);
        if (IS_ERR(rq))
                spin_unlock_irq(q->queue_lock);
        /* q->queue_lock is unlocked at this point */
@@ -1491,13 +1495,14 @@ void __blk_put_request(struct request_queue *q, struct request *req)
         */
        if (req->cmd_flags & REQ_ALLOCED) {
                unsigned int flags = req->cmd_flags;
+                int op = req_op(req);
                struct request_list *rl = blk_rq_rl(req);
                BUG_ON(!list_empty(&req->queuelist));
                BUG_ON(ELV_ON_HASH(req));
                blk_free_request(rl, req);
-                freed_request(rl, flags);
+                freed_request(rl, op, flags);
                blk_put_rl(rl);
        }
 }
@@ -1712,7 +1717,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 {
        const bool sync = !!(bio->bi_rw & REQ_SYNC);
        struct blk_plug *plug;
-        int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
+        int el_ret, rw_flags = 0, where = ELEVATOR_INSERT_SORT;
        struct request *req;
        unsigned int request_count = 0;
@@ -1731,7 +1736,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
                return BLK_QC_T_NONE;
        }
-        if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
+        if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA)) {
                spin_lock_irq(q->queue_lock);
                where = ELEVATOR_INSERT_FLUSH;
                goto get_rq;
@@ -1772,15 +1777,19 @@ get_rq:
         * but we need to set it earlier to expose the sync flag to the
         * rq allocator and io schedulers.
         */
-        rw_flags = bio_data_dir(bio);
        if (sync)
                rw_flags |= REQ_SYNC;
        /*
+         * Add in META/PRIO flags, if set, before we get to the IO scheduler
+         */
+        rw_flags |= (bio->bi_rw & (REQ_META | REQ_PRIO));
+        /*
         * Grab a free request. This is might sleep but can not fail.
         * Returns with the queue unlocked.
         */
-        req = get_request(q, rw_flags, bio, GFP_NOIO);
+        req = get_request(q, bio_data_dir(bio), rw_flags, bio, GFP_NOIO);
        if (IS_ERR(req)) {
                bio->bi_error = PTR_ERR(req);
                bio_endio(bio);
@@ -1849,7 +1858,7 @@ static void handle_bad_sector(struct bio *bio)
        char b[BDEVNAME_SIZE];
        printk(KERN_INFO "attempt to access beyond end of device\n");
-        printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
+        printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
                        bdevname(bio->bi_bdev, b),
                        bio->bi_rw,
                        (unsigned long long)bio_end_sector(bio),
@@ -1964,23 +1973,23 @@ generic_make_request_checks(struct bio *bio)
         * drivers without flush support don't have to worry
         * about them.
         */
-        if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) &&
+        if ((bio->bi_rw & (REQ_PREFLUSH | REQ_FUA)) &&
            !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
-                bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
+                bio->bi_rw &= ~(REQ_PREFLUSH | REQ_FUA);
                if (!nr_sectors) {
                        err = 0;
                        goto end_io;
                }
        }
-        if ((bio->bi_rw & REQ_DISCARD) &&
+        if ((bio_op(bio) == REQ_OP_DISCARD) &&
            (!blk_queue_discard(q) ||
             ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) {
                err = -EOPNOTSUPP;
                goto end_io;
        }
-        if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
+        if (bio_op(bio) == REQ_OP_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
                err = -EOPNOTSUPP;
                goto end_io;
        }
@@ -2094,7 +2103,6 @@ EXPORT_SYMBOL(generic_make_request);
 /**
 * submit_bio - submit a bio to the block device layer for I/O
- * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
 * @bio: The &struct bio which describes the I/O
 *
 * submit_bio() is very similar in purpose to generic_make_request(), and
@@ -2102,10 +2110,8 @@ EXPORT_SYMBOL(generic_make_request);
 * interfaces; @bio must be presetup and ready for I/O.
 *
 */
-blk_qc_t submit_bio(int rw, struct bio *bio)
+blk_qc_t submit_bio(struct bio *bio)
 {
-        bio->bi_rw |= rw;
        /*
         * If it's a regular read/write or a barrier with data attached,
         * go through the normal accounting stuff before submission.
@@ -2113,12 +2119,12 @@ blk_qc_t submit_bio(int rw, struct bio *bio)
        if (bio_has_data(bio)) {
                unsigned int count;
-                if (unlikely(rw & REQ_WRITE_SAME))
+                if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
                        count = bdev_logical_block_size(bio->bi_bdev) >> 9;
                else
                        count = bio_sectors(bio);
-                if (rw & WRITE) {
+                if (op_is_write(bio_op(bio))) {
                        count_vm_events(PGPGOUT, count);
                } else {
                        task_io_account_read(bio->bi_iter.bi_size);
@@ -2129,7 +2135,7 @@ blk_qc_t submit_bio(int rw, struct bio *bio)
                        char b[BDEVNAME_SIZE];
                        printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
                        current->comm, task_pid_nr(current),
-                                (rw & WRITE) ? "WRITE" : "READ",
+                                op_is_write(bio_op(bio)) ? "WRITE" : "READ",
                                (unsigned long long)bio->bi_iter.bi_sector,
                                bdevname(bio->bi_bdev, b),
                                count);
@@ -2160,7 +2166,7 @@ EXPORT_SYMBOL(submit_bio);
 static int blk_cloned_rq_check_limits(struct request_queue *q,
                                      struct request *rq)
 {
-        if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
+        if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) {
                printk(KERN_ERR "%s: over max size limit.\n", __func__);
                return -EIO;
        }
@@ -2216,7 +2222,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
         */
        BUG_ON(blk_queued_rq(rq));
-        if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
+        if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
                where = ELEVATOR_INSERT_FLUSH;
        add_acct_request(q, rq, where);
@@ -2979,8 +2985,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request_err);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
                     struct bio *bio)
 {
-        /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
+        req_set_op(rq, bio_op(bio));
-        rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
        if (bio_has_data(bio))
                rq->nr_phys_segments = bio_phys_segments(q, bio);
@@ -3065,7 +3070,8 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
 static void __blk_rq_prep_clone(struct request *dst, struct request *src)
 {
        dst->cpu = src->cpu;
-        dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
+        req_set_op_attrs(dst, req_op(src),
+                         (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE);
        dst->cmd_type = src->cmd_type;
        dst->__sector = blk_rq_pos(src);
        dst->__data_len = blk_rq_bytes(src);
@@ -3310,7 +3316,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
                /*
                 * rq is already accounted, so use raw insert
                 */
-                if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
+                if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
                        __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
                else
                        __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 3fec8a29d0fa..7ea04325d02f 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -62,7 +62,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
        /*
         * don't check dying flag for MQ because the request won't
-         * be resued after dying flag is set
+         * be reused after dying flag is set
         */
        if (q->mq_ops) {
                blk_mq_insert_request(rq, at_head, true, false);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index b1c91d229e5e..d308def812db 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -10,8 +10,8 @@
 * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request
 * properties and hardware capability.
 *
- * If a request doesn't have data, only REQ_FLUSH makes sense, which
+ * If a request doesn't have data, only REQ_PREFLUSH makes sense, which
- * indicates a simple flush request.  If there is data, REQ_FLUSH indicates
+ * indicates a simple flush request.  If there is data, REQ_PREFLUSH indicates
 * that the device cache should be flushed before the data is executed, and
 * REQ_FUA means that the data must be on non-volatile media on request
 * completion.
@@ -20,16 +20,16 @@
 * difference.  The requests are either completed immediately if there's no
 * data or executed as normal requests otherwise.
 *
- * If the device has writeback cache and supports FUA, REQ_FLUSH is
+ * If the device has writeback cache and supports FUA, REQ_PREFLUSH is
 * translated to PREFLUSH but REQ_FUA is passed down directly with DATA.
 *
- * If the device has writeback cache and doesn't support FUA, REQ_FLUSH is
+ * If the device has writeback cache and doesn't support FUA, REQ_PREFLUSH
- * translated to PREFLUSH and REQ_FUA to POSTFLUSH.
+ * is translated to PREFLUSH and REQ_FUA to POSTFLUSH.
 *
 * The actual execution of flush is double buffered.  Whenever a request
 * needs to execute PRE or POSTFLUSH, it queues at
 * fq->flush_queue[fq->flush_pending_idx].  Once certain criteria are met, a
- * flush is issued and the pending_idx is toggled.  When the flush
+ * REQ_OP_FLUSH is issued and the pending_idx is toggled.  When the flush
 * completes, all the requests which were pending are proceeded to the next
 * step.  This allows arbitrary merging of different types of FLUSH/FUA
 * requests.
@@ -103,7 +103,7 @@ static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq)
                policy |= REQ_FSEQ_DATA;
        if (fflags & (1UL << QUEUE_FLAG_WC)) {
-                if (rq->cmd_flags & REQ_FLUSH)
+                if (rq->cmd_flags & REQ_PREFLUSH)
                        policy |= REQ_FSEQ_PREFLUSH;
                if (!(fflags & (1UL << QUEUE_FLAG_FUA)) &&
                    (rq->cmd_flags & REQ_FUA))
@@ -330,7 +330,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
        }
        flush_rq->cmd_type = REQ_TYPE_FS;
-        flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
+        req_set_op_attrs(flush_rq, REQ_OP_FLUSH, WRITE_FLUSH | REQ_FLUSH_SEQ);
        flush_rq->rq_disk = first_rq->rq_disk;
        flush_rq->end_io = flush_end_io;
@@ -391,9 +391,9 @@ void blk_insert_flush(struct request *rq)
        /*
         * @policy now records what operations need to be done.  Adjust
-         * REQ_FLUSH and FUA for the driver.
+         * REQ_PREFLUSH and FUA for the driver.
         */
-        rq->cmd_flags &= ~REQ_FLUSH;
+        rq->cmd_flags &= ~REQ_PREFLUSH;
        if (!(fflags & (1UL << QUEUE_FLAG_FUA)))
                rq->cmd_flags &= ~REQ_FUA;
@@ -485,8 +485,9 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
        bio = bio_alloc(gfp_mask, 0);
        bio->bi_bdev = bdev;
+        bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
-        ret = submit_bio_wait(WRITE_FLUSH, bio);
+        ret = submit_bio_wait(bio);
        /*
         * The driver must store the error location in ->bi_sector, if
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9e29dc351695..9031d2af0b47 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -9,21 +9,22 @@
 #include "blk.h"
-static struct bio *next_bio(struct bio *bio, int rw, unsigned int nr_pages,
+static struct bio *next_bio(struct bio *bio, unsigned int nr_pages,
                gfp_t gfp)
 {
        struct bio *new = bio_alloc(gfp, nr_pages);
        if (bio) {
                bio_chain(bio, new);
-                submit_bio(rw, bio);
+                submit_bio(bio);
        }
        return new;
 }
 int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
-                sector_t nr_sects, gfp_t gfp_mask, int type, struct bio **biop)
+                sector_t nr_sects, gfp_t gfp_mask, int op_flags,
+                struct bio **biop)
 {
        struct request_queue *q = bdev_get_queue(bdev);
        struct bio *bio = *biop;
@@ -34,7 +35,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                return -ENXIO;
        if (!blk_queue_discard(q))
                return -EOPNOTSUPP;
-        if ((type & REQ_SECURE) && !blk_queue_secdiscard(q))
+        if ((op_flags & REQ_SECURE) && !blk_queue_secdiscard(q))
                return -EOPNOTSUPP;
        /* Zero-sector (unknown) and one-sector granularities are the same.  */
@@ -62,9 +63,10 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                        req_sects = end_sect - sector;
                }
-                bio = next_bio(bio, type, 1, gfp_mask);
+                bio = next_bio(bio, 1, gfp_mask);
                bio->bi_iter.bi_sector = sector;
                bio->bi_bdev = bdev;
+                bio_set_op_attrs(bio, REQ_OP_DISCARD, op_flags);
                bio->bi_iter.bi_size = req_sects << 9;
                nr_sects -= req_sects;
@@ -98,19 +100,19 @@ EXPORT_SYMBOL(__blkdev_issue_discard);
 int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
 {
-        int type = REQ_WRITE | REQ_DISCARD;
+        int op_flags = 0;
        struct bio *bio = NULL;
        struct blk_plug plug;
        int ret;
        if (flags & BLKDEV_DISCARD_SECURE)
-                type |= REQ_SECURE;
+                op_flags |= REQ_SECURE;
        blk_start_plug(&plug);
-        ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, type,
+        ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, op_flags,
                        &bio);
        if (!ret && bio) {
-                ret = submit_bio_wait(type, bio);
+                ret = submit_bio_wait(bio);
                if (ret == -EOPNOTSUPP)
                        ret = 0;
                bio_put(bio);
@@ -148,13 +150,14 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
        max_write_same_sectors = UINT_MAX >> 9;
        while (nr_sects) {
-                bio = next_bio(bio, REQ_WRITE | REQ_WRITE_SAME, 1, gfp_mask);
+                bio = next_bio(bio, 1, gfp_mask);
                bio->bi_iter.bi_sector = sector;
                bio->bi_bdev = bdev;
                bio->bi_vcnt = 1;
                bio->bi_io_vec->bv_page = page;
                bio->bi_io_vec->bv_offset = 0;
                bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
+                bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
                if (nr_sects > max_write_same_sectors) {
                        bio->bi_iter.bi_size = max_write_same_sectors << 9;
@@ -167,7 +170,7 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
        }
        if (bio) {
-                ret = submit_bio_wait(REQ_WRITE | REQ_WRITE_SAME, bio);
+                ret = submit_bio_wait(bio);
                bio_put(bio);
        }
        return ret != -EOPNOTSUPP ? ret : 0;
@@ -193,11 +196,11 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
        unsigned int sz;
        while (nr_sects != 0) {
-                bio = next_bio(bio, WRITE,
+                bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES),
-                                min(nr_sects, (sector_t)BIO_MAX_PAGES),
                                gfp_mask);
                bio->bi_iter.bi_sector = sector;
                bio->bi_bdev   = bdev;
+                bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
                while (nr_sects != 0) {
                        sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
@@ -210,7 +213,7 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
        }
        if (bio) {
-                ret = submit_bio_wait(WRITE, bio);
+                ret = submit_bio_wait(bio);
                bio_put(bio);
                return ret;
        }
diff --git a/block/blk-map.c b/block/blk-map.c
index b9f88b7751fb..61733a660c3a 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -224,7 +224,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
                return PTR_ERR(bio);
        if (!reading)
-                bio->bi_rw |= REQ_WRITE;
+                bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
        if (do_copy)
                rq->cmd_flags |= REQ_COPY_USER;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 261353166dcf..5e4d93edeaf7 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -172,9 +172,9 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
        struct bio *split, *res;
        unsigned nsegs;
-        if ((*bio)->bi_rw & REQ_DISCARD)
+        if (bio_op(*bio) == REQ_OP_DISCARD)
                split = blk_bio_discard_split(q, *bio, bs, &nsegs);
-        else if ((*bio)->bi_rw & REQ_WRITE_SAME)
+        else if (bio_op(*bio) == REQ_OP_WRITE_SAME)
                split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
        else
                split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
@@ -213,10 +213,10 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
         * This should probably be returning 0, but blk_add_request_payload()
         * (Christoph!!!!)
         */
-        if (bio->bi_rw & REQ_DISCARD)
+        if (bio_op(bio) == REQ_OP_DISCARD)
                return 1;
-        if (bio->bi_rw & REQ_WRITE_SAME)
+        if (bio_op(bio) == REQ_OP_WRITE_SAME)
                return 1;
        fbio = bio;
@@ -385,7 +385,7 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
        nsegs = 0;
        cluster = blk_queue_cluster(q);
-        if (bio->bi_rw & REQ_DISCARD) {
+        if (bio_op(bio) == REQ_OP_DISCARD) {
                /*
                 * This is a hack - drivers should be neither modifying the
                 * biovec, nor relying on bi_vcnt - but because of
@@ -400,7 +400,7 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
                return 0;
        }
-        if (bio->bi_rw & REQ_WRITE_SAME) {
+        if (bio_op(bio) == REQ_OP_WRITE_SAME) {
 single_segment:
                *sg = sglist;
                bvec = bio_iovec(bio);
@@ -439,7 +439,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
        }
        if (q->dma_drain_size && q->dma_drain_needed(rq)) {
-                if (rq->cmd_flags & REQ_WRITE)
+                if (op_is_write(req_op(rq)))
                        memset(q->dma_drain_buffer, 0, q->dma_drain_size);
                sg_unmark_end(sg);
@@ -500,7 +500,7 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
            integrity_req_gap_back_merge(req, bio))
                return 0;
        if (blk_rq_sectors(req) + bio_sectors(bio) >
-            blk_rq_get_max_sectors(req)) {
+            blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
                req->cmd_flags |= REQ_NOMERGE;
                if (req == q->last_merge)
                        q->last_merge = NULL;
@@ -524,7 +524,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
            integrity_req_gap_front_merge(req, bio))
                return 0;
        if (blk_rq_sectors(req) + bio_sectors(bio) >
-            blk_rq_get_max_sectors(req)) {
+            blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
                req->cmd_flags |= REQ_NOMERGE;
                if (req == q->last_merge)
                        q->last_merge = NULL;
@@ -570,7 +570,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
         * Will it become too large?
         */
        if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
-            blk_rq_get_max_sectors(req))
+            blk_rq_get_max_sectors(req, blk_rq_pos(req)))
                return 0;
        total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
@@ -649,7 +649,8 @@ static int attempt_merge(struct request_queue *q, struct request *req,
        if (!rq_mergeable(req) || !rq_mergeable(next))
                return 0;
-        if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags))
+        if (!blk_check_merge_flags(req->cmd_flags, req_op(req), next->cmd_flags,
+                                   req_op(next)))
                return 0;
        /*
@@ -663,7 +664,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
            || req_no_special_merge(next))
                return 0;
-        if (req->cmd_flags & REQ_WRITE_SAME &&
+        if (req_op(req) == REQ_OP_WRITE_SAME &&
            !blk_write_same_mergeable(req->bio, next->bio))
                return 0;
@@ -743,6 +744,12 @@ int attempt_front_merge(struct request_queue *q, struct request *rq)
 int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
                          struct request *next)
 {
+        struct elevator_queue *e = q->elevator;
+        if (e->type->ops.elevator_allow_rq_merge_fn)
+                if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next))
+                        return 0;
        return attempt_merge(q, rq, next);
 }
@@ -751,7 +758,8 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
        if (!rq_mergeable(rq) || !bio_mergeable(bio))
                return false;
-        if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw))
+        if (!blk_check_merge_flags(rq->cmd_flags, req_op(rq), bio->bi_rw,
+                                   bio_op(bio)))
                return false;
        /* different data direction or already started, don't merge */
@@ -767,7 +775,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
                return false;
        /* must be using the same buffer */
-        if (rq->cmd_flags & REQ_WRITE_SAME &&
+        if (req_op(rq) == REQ_OP_WRITE_SAME &&
            !blk_write_same_mergeable(rq->bio, bio))
                return false;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f9b9049b1284..2a1920c6d6e5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -159,16 +159,17 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 EXPORT_SYMBOL(blk_mq_can_queue);
 static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
-                               struct request *rq, unsigned int rw_flags)
+                               struct request *rq, int op,
+                               unsigned int op_flags)
 {
        if (blk_queue_io_stat(q))
-                rw_flags |= REQ_IO_STAT;
+                op_flags |= REQ_IO_STAT;
        INIT_LIST_HEAD(&rq->queuelist);
        /* csd/requeue_work/fifo_time is initialized before use */
        rq->q = q;
        rq->mq_ctx = ctx;
-        rq->cmd_flags |= rw_flags;
+        req_set_op_attrs(rq, op, op_flags);
        /* do not touch atomic flags, it needs atomic ops against the timer */
        rq->cpu = -1;
        INIT_HLIST_NODE(&rq->hash);
@@ -203,11 +204,11 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
        rq->end_io_data = NULL;
        rq->next_rq = NULL;
-        ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
+        ctx->rq_dispatched[rw_is_sync(op, op_flags)]++;
 }
 static struct request *
-__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
+__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int op, int op_flags)
 {
        struct request *rq;
        unsigned int tag;
@@ -222,7 +223,7 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
                }
                rq->tag = tag;
-                blk_mq_rq_ctx_init(data->q, data->ctx, rq, rw);
+                blk_mq_rq_ctx_init(data->q, data->ctx, rq, op, op_flags);
                return rq;
        }
@@ -246,7 +247,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
        hctx = q->mq_ops->map_queue(q, ctx->cpu);
        blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
-        rq = __blk_mq_alloc_request(&alloc_data, rw);
+        rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
        if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) {
                __blk_mq_run_hw_queue(hctx);
                blk_mq_put_ctx(ctx);
@@ -254,7 +255,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
                ctx = blk_mq_get_ctx(q);
                hctx = q->mq_ops->map_queue(q, ctx->cpu);
                blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
-                rq =  __blk_mq_alloc_request(&alloc_data, rw);
+                rq =  __blk_mq_alloc_request(&alloc_data, rw, 0);
                ctx = alloc_data.ctx;
        }
        blk_mq_put_ctx(ctx);
@@ -784,7 +785,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
                switch (ret) {
                case BLK_MQ_RQ_QUEUE_OK:
                        queued++;
-                        continue;
+                        break;
                case BLK_MQ_RQ_QUEUE_BUSY:
                        list_add(&rq->queuelist, &rq_list);
                        __blk_mq_requeue_request(rq);
@@ -1169,28 +1170,29 @@ static struct request *blk_mq_map_request(struct request_queue *q,
        struct blk_mq_hw_ctx *hctx;
        struct blk_mq_ctx *ctx;
        struct request *rq;
-        int rw = bio_data_dir(bio);
+        int op = bio_data_dir(bio);
+        int op_flags = 0;
        struct blk_mq_alloc_data alloc_data;
        blk_queue_enter_live(q);
        ctx = blk_mq_get_ctx(q);
        hctx = q->mq_ops->map_queue(q, ctx->cpu);
-        if (rw_is_sync(bio->bi_rw))
+        if (rw_is_sync(bio_op(bio), bio->bi_rw))
-                rw |= REQ_SYNC;
+                op_flags |= REQ_SYNC;
-        trace_block_getrq(q, bio, rw);
+        trace_block_getrq(q, bio, op);
        blk_mq_set_alloc_data(&alloc_data, q, BLK_MQ_REQ_NOWAIT, ctx, hctx);
-        rq = __blk_mq_alloc_request(&alloc_data, rw);
+        rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
        if (unlikely(!rq)) {
                __blk_mq_run_hw_queue(hctx);
                blk_mq_put_ctx(ctx);
-                trace_block_sleeprq(q, bio, rw);
+                trace_block_sleeprq(q, bio, op);
                ctx = blk_mq_get_ctx(q);
                hctx = q->mq_ops->map_queue(q, ctx->cpu);
                blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx);
-                rq = __blk_mq_alloc_request(&alloc_data, rw);
+                rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
                ctx = alloc_data.ctx;
                hctx = alloc_data.hctx;
        }
@@ -1244,8 +1246,8 @@ static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie)
 */
 static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
-        const int is_sync = rw_is_sync(bio->bi_rw);
+        const int is_sync = rw_is_sync(bio_op(bio), bio->bi_rw);
-        const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
+        const int is_flush_fua = bio->bi_rw & (REQ_PREFLUSH | REQ_FUA);
        struct blk_map_ctx data;
        struct request *rq;
        unsigned int request_count = 0;
@@ -1338,8 +1340,8 @@ done:
 */
 static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 {
-        const int is_sync = rw_is_sync(bio->bi_rw);
+        const int is_sync = rw_is_sync(bio_op(bio), bio->bi_rw);
-        const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
+        const int is_flush_fua = bio->bi_rw & (REQ_PREFLUSH | REQ_FUA);
        struct blk_plug *plug;
        unsigned int request_count = 0;
        struct blk_map_ctx data;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 99205965f559..f87a7e747d36 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -379,6 +379,11 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page,
        return count;
 }
+static ssize_t queue_dax_show(struct request_queue *q, char *page)
+{
+        return queue_var_show(blk_queue_dax(q), page);
+}
 static struct queue_sysfs_entry queue_requests_entry = {
        .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
        .show = queue_requests_show,
@@ -516,6 +521,11 @@ static struct queue_sysfs_entry queue_wc_entry = {
        .store = queue_wc_store,
 };
+static struct queue_sysfs_entry queue_dax_entry = {
+        .attr = {.name = "dax", .mode = S_IRUGO },
+        .show = queue_dax_show,
+};
 static struct attribute *default_attrs[] = {
        &queue_requests_entry.attr,
        &queue_ra_entry.attr,
@@ -542,6 +552,7 @@ static struct attribute *default_attrs[] = {
        &queue_random_entry.attr,
        &queue_poll_entry.attr,
        &queue_wc_entry.attr,
+        &queue_dax_entry.attr,
        NULL,
 };
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 4a349787bc62..acabba198de9 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -10,7 +10,7 @@
 #include <linux/slab.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
-#include <linux/jiffies.h>
+#include <linux/ktime.h>
 #include <linux/rbtree.h>
 #include <linux/ioprio.h>
 #include <linux/blktrace_api.h>
@@ -22,28 +22,28 @@
 */
 /* max queue in one round of service */
 static const int cfq_quantum = 8;
-static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
+static const u64 cfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
 /* maximum backwards seek, in KiB */
 static const int cfq_back_max = 16 * 1024;
 /* penalty of a backwards seek */
 static const int cfq_back_penalty = 2;
-static const int cfq_slice_sync = HZ / 10;
+static const u64 cfq_slice_sync = NSEC_PER_SEC / 10;
-static int cfq_slice_async = HZ / 25;
+static u64 cfq_slice_async = NSEC_PER_SEC / 25;
 static const int cfq_slice_async_rq = 2;
-static int cfq_slice_idle = HZ / 125;
+static u64 cfq_slice_idle = NSEC_PER_SEC / 125;
-static int cfq_group_idle = HZ / 125;
+static u64 cfq_group_idle = NSEC_PER_SEC / 125;
-static const int cfq_target_latency = HZ * 3/10; /* 300 ms */
+static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */
 static const int cfq_hist_divisor = 4;
 /*
 * offset from end of service tree
 */
-#define CFQ_IDLE_DELAY          (HZ / 5)
+#define CFQ_IDLE_DELAY          (NSEC_PER_SEC / 5)
 /*
 * below this threshold, we consider thinktime immediate
 */
-#define CFQ_MIN_TT              (2)
+#define CFQ_MIN_TT              (2 * NSEC_PER_SEC / HZ)
 #define CFQ_SLICE_SCALE         (5)
 #define CFQ_HW_QUEUE_MIN        (5)
@@ -73,11 +73,11 @@ static struct kmem_cache *cfq_pool;
 #define CFQ_WEIGHT_LEGACY_MAX   1000
 struct cfq_ttime {
-        unsigned long last_end_request;
+        u64 last_end_request;
-        unsigned long ttime_total;
+        u64 ttime_total;
+        u64 ttime_mean;
        unsigned long ttime_samples;
-        unsigned long ttime_mean;
 };
 /*
@@ -94,7 +94,7 @@ struct cfq_rb_root {
        struct cfq_ttime ttime;
 };
 #define CFQ_RB_ROOT     (struct cfq_rb_root) { .rb = RB_ROOT, \
-                        .ttime = {.last_end_request = jiffies,},}
+                        .ttime = {.last_end_request = ktime_get_ns(),},}
 /*
 * Per process-grouping structure
@@ -109,7 +109,7 @@ struct cfq_queue {
        /* service_tree member */
        struct rb_node rb_node;
        /* service_tree key */
-        unsigned long rb_key;
+        u64 rb_key;
        /* prio tree member */
        struct rb_node p_node;
        /* prio tree root we belong to, if any */
@@ -126,13 +126,13 @@ struct cfq_queue {
        struct list_head fifo;
        /* time when queue got scheduled in to dispatch first request. */
-        unsigned long dispatch_start;
+        u64 dispatch_start;
-        unsigned int allocated_slice;
+        u64 allocated_slice;
-        unsigned int slice_dispatch;
+        u64 slice_dispatch;
        /* time when first request from queue completed and slice started. */
-        unsigned long slice_start;
+        u64 slice_start;
-        unsigned long slice_end;
+        u64 slice_end;
-        long slice_resid;
+        s64 slice_resid;
        /* pending priority requests */
        int prio_pending;
@@ -141,7 +141,7 @@ struct cfq_queue {
        /* io prio of this group */
        unsigned short ioprio, org_ioprio;
-        unsigned short ioprio_class;
+        unsigned short ioprio_class, org_ioprio_class;
        pid_t pid;
@@ -290,7 +290,7 @@ struct cfq_group {
        struct cfq_rb_root service_trees[2][3];
        struct cfq_rb_root service_tree_idle;
-        unsigned long saved_wl_slice;
+        u64 saved_wl_slice;
        enum wl_type_t saved_wl_type;
        enum wl_class_t saved_wl_class;
@@ -329,7 +329,7 @@ struct cfq_data {
         */
        enum wl_class_t serving_wl_class;
        enum wl_type_t serving_wl_type;
-        unsigned long workload_expires;
+        u64 workload_expires;
        struct cfq_group *serving_group;
        /*
@@ -362,7 +362,7 @@ struct cfq_data {
        /*
         * idle window management
         */
-        struct timer_list idle_slice_timer;
+        struct hrtimer idle_slice_timer;
        struct work_struct unplug_work;
        struct cfq_queue *active_queue;
@@ -374,22 +374,22 @@ struct cfq_data {
         * tunables, see top of file
         */
        unsigned int cfq_quantum;
-        unsigned int cfq_fifo_expire[2];
        unsigned int cfq_back_penalty;
        unsigned int cfq_back_max;
-        unsigned int cfq_slice[2];
        unsigned int cfq_slice_async_rq;
-        unsigned int cfq_slice_idle;
-        unsigned int cfq_group_idle;
        unsigned int cfq_latency;
-        unsigned int cfq_target_latency;
+        u64 cfq_fifo_expire[2];
+        u64 cfq_slice[2];
+        u64 cfq_slice_idle;
+        u64 cfq_group_idle;
+        u64 cfq_target_latency;
        /*
         * Fallback dummy cfqq for extreme OOM conditions
         */
        struct cfq_queue oom_cfqq;
-        unsigned long last_delayed_sync;
+        u64 last_delayed_sync;
 };
 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
@@ -667,15 +667,16 @@ static inline void cfqg_put(struct cfq_group *cfqg)
 } while (0)
 static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
-                                            struct cfq_group *curr_cfqg, int rw)
+                                            struct cfq_group *curr_cfqg, int op,
+                                            int op_flags)
 {
-        blkg_rwstat_add(&cfqg->stats.queued, rw, 1);
+        blkg_rwstat_add(&cfqg->stats.queued, op, op_flags, 1);
        cfqg_stats_end_empty_time(&cfqg->stats);
        cfqg_stats_set_start_group_wait_time(cfqg, curr_cfqg);
 }
 static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
-                        unsigned long time, unsigned long unaccounted_time)
+                        uint64_t time, unsigned long unaccounted_time)
 {
        blkg_stat_add(&cfqg->stats.time, time);
 #ifdef CONFIG_DEBUG_BLK_CGROUP
@@ -683,26 +684,30 @@ static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
 #endif
 }
-static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw)
+static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op,
+                                               int op_flags)
 {
-        blkg_rwstat_add(&cfqg->stats.queued, rw, -1);
+        blkg_rwstat_add(&cfqg->stats.queued, op, op_flags, -1);
 }
-static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw)
+static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op,
+                                               int op_flags)
 {
-        blkg_rwstat_add(&cfqg->stats.merged, rw, 1);
+        blkg_rwstat_add(&cfqg->stats.merged, op, op_flags, 1);
 }
 static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
-                        uint64_t start_time, uint64_t io_start_time, int rw)
+                        uint64_t start_time, uint64_t io_start_time, int op,
+                        int op_flags)
 {
        struct cfqg_stats *stats = &cfqg->stats;
        unsigned long long now = sched_clock();
        if (time_after64(now, io_start_time))
-                blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
+                blkg_rwstat_add(&stats->service_time, op, op_flags,
+                                now - io_start_time);
        if (time_after64(io_start_time, start_time))
-                blkg_rwstat_add(&stats->wait_time, rw,
+                blkg_rwstat_add(&stats->wait_time, op, op_flags,
                                io_start_time - start_time);
 }
@@ -781,13 +786,16 @@ static inline void cfqg_put(struct cfq_group *cfqg) { }
 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...)          do {} while (0)
 static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
-                        struct cfq_group *curr_cfqg, int rw) { }
+                        struct cfq_group *curr_cfqg, int op, int op_flags) { }
 static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
-                        unsigned long time, unsigned long unaccounted_time) { }
+                        uint64_t time, unsigned long unaccounted_time) { }
-static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw) { }
+static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op,
-static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw) { }
+                        int op_flags) { }
+static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op,
+                        int op_flags) { }
 static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
-                        uint64_t start_time, uint64_t io_start_time, int rw) { }
+                        uint64_t start_time, uint64_t io_start_time, int op,
+                        int op_flags) { }
 #endif  /* CONFIG_CFQ_GROUP_IOSCHED */
@@ -807,7 +815,7 @@ static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
 static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
        struct cfq_ttime *ttime, bool group_idle)
 {
-        unsigned long slice;
+        u64 slice;
        if (!sample_valid(ttime->ttime_samples))
                return false;
        if (group_idle)
@@ -930,17 +938,18 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
 * if a queue is marked sync and has sync io queued. A sync queue with async
 * io only, should not get full sync slice length.
 */
-static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync,
+static inline u64 cfq_prio_slice(struct cfq_data *cfqd, bool sync,
                                 unsigned short prio)
 {
-        const int base_slice = cfqd->cfq_slice[sync];
+        u64 base_slice = cfqd->cfq_slice[sync];
+        u64 slice = div_u64(base_slice, CFQ_SLICE_SCALE);
        WARN_ON(prio >= IOPRIO_BE_NR);
-        return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio));
+        return base_slice + (slice * (4 - prio));
 }
-static inline int
+static inline u64
 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
        return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
@@ -958,15 +967,14 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 *
 * The result is also in fixed point w/ CFQ_SERVICE_SHIFT.
 */
-static inline u64 cfqg_scale_charge(unsigned long charge,
+static inline u64 cfqg_scale_charge(u64 charge,
                                    unsigned int vfraction)
 {
        u64 c = charge << CFQ_SERVICE_SHIFT;    /* make it fixed point */
        /* charge / vfraction */
        c <<= CFQ_SERVICE_SHIFT;
-        do_div(c, vfraction);
+        return div_u64(c, vfraction);
-        return c;
 }
 static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
@@ -1019,16 +1027,16 @@ static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
        return cfqg->busy_queues_avg[rt];
 }
-static inline unsigned
+static inline u64
 cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
        return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT;
 }
-static inline unsigned
+static inline u64
 cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-        unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
+        u64 slice = cfq_prio_to_slice(cfqd, cfqq);
        if (cfqd->cfq_latency) {
                /*
                 * interested queues (we consider only the ones with the same
@@ -1036,20 +1044,22 @@ cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
                 */
                unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
                                                cfq_class_rt(cfqq));
-                unsigned sync_slice = cfqd->cfq_slice[1];
+                u64 sync_slice = cfqd->cfq_slice[1];
-                unsigned expect_latency = sync_slice * iq;
+                u64 expect_latency = sync_slice * iq;
-                unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
+                u64 group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
                if (expect_latency > group_slice) {
-                        unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;
+                        u64 base_low_slice = 2 * cfqd->cfq_slice_idle;
+                        u64 low_slice;
                        /* scale low_slice according to IO priority
                         * and sync vs async */
-                        unsigned low_slice =
+                        low_slice = div64_u64(base_low_slice*slice, sync_slice);
-                                min(slice, base_low_slice * slice / sync_slice);
+                        low_slice = min(slice, low_slice);
                        /* the adapted slice value is scaled to fit all iqs
                         * into the target latency */
-                        slice = max(slice * group_slice / expect_latency,
+                        slice = div64_u64(slice*group_slice, expect_latency);
-                                    low_slice);
+                        slice = max(slice, low_slice);
                }
        }
        return slice;
@@ -1058,12 +1068,13 @@ cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static inline void
 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-        unsigned slice = cfq_scaled_cfqq_slice(cfqd, cfqq);
+        u64 slice = cfq_scaled_cfqq_slice(cfqd, cfqq);
+        u64 now = ktime_get_ns();
-        cfqq->slice_start = jiffies;
+        cfqq->slice_start = now;
-        cfqq->slice_end = jiffies + slice;
+        cfqq->slice_end = now + slice;
        cfqq->allocated_slice = slice;
-        cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
+        cfq_log_cfqq(cfqd, cfqq, "set_slice=%llu", cfqq->slice_end - now);
 }
 /*
@@ -1075,7 +1086,7 @@ static inline bool cfq_slice_used(struct cfq_queue *cfqq)
 {
        if (cfq_cfqq_slice_new(cfqq))
                return false;
-        if (time_before(jiffies, cfqq->slice_end))
+        if (ktime_get_ns() < cfqq->slice_end)
                return false;
        return true;
@@ -1241,8 +1252,8 @@ cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last));
 }
-static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
+static u64 cfq_slice_offset(struct cfq_data *cfqd,
-                                      struct cfq_queue *cfqq)
+                            struct cfq_queue *cfqq)
 {
        /*
         * just an approximation, should be ok.
@@ -1435,31 +1446,32 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
        cfqg_stats_update_dequeue(cfqg);
 }
-static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
+static inline u64 cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
-                                                unsigned int *unaccounted_time)
+                                       u64 *unaccounted_time)
 {
-        unsigned int slice_used;
+        u64 slice_used;
+        u64 now = ktime_get_ns();
        /*
         * Queue got expired before even a single request completed or
         * got expired immediately after first request completion.
         */
-        if (!cfqq->slice_start || cfqq->slice_start == jiffies) {
+        if (!cfqq->slice_start || cfqq->slice_start == now) {
                /*
                 * Also charge the seek time incurred to the group, otherwise
                 * if there are mutiple queues in the group, each can dispatch
                 * a single request on seeky media and cause lots of seek time
                 * and group will never know it.
                 */
-                slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start),
+                slice_used = max_t(u64, (now - cfqq->dispatch_start),
-                                        1);
+                                        jiffies_to_nsecs(1));
        } else {
-                slice_used = jiffies - cfqq->slice_start;
+                slice_used = now - cfqq->slice_start;
                if (slice_used > cfqq->allocated_slice) {
                        *unaccounted_time = slice_used - cfqq->allocated_slice;
                        slice_used = cfqq->allocated_slice;
                }
-                if (time_after(cfqq->slice_start, cfqq->dispatch_start))
+                if (cfqq->slice_start > cfqq->dispatch_start)
                        *unaccounted_time += cfqq->slice_start -
                                        cfqq->dispatch_start;
        }
@@ -1471,10 +1483,11 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
                                struct cfq_queue *cfqq)
 {
        struct cfq_rb_root *st = &cfqd->grp_service_tree;
-        unsigned int used_sl, charge, unaccounted_sl = 0;
+        u64 used_sl, charge, unaccounted_sl = 0;
        int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
                        - cfqg->service_tree_idle.count;
        unsigned int vfr;
+        u64 now = ktime_get_ns();
        BUG_ON(nr_sync < 0);
        used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);
@@ -1496,9 +1509,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
        cfq_group_service_tree_add(st, cfqg);
        /* This group is being expired. Save the context */
-        if (time_after(cfqd->workload_expires, jiffies)) {
+        if (cfqd->workload_expires > now) {
-                cfqg->saved_wl_slice = cfqd->workload_expires
+                cfqg->saved_wl_slice = cfqd->workload_expires - now;
-                                                - jiffies;
                cfqg->saved_wl_type = cfqd->serving_wl_type;
                cfqg->saved_wl_class = cfqd->serving_wl_class;
        } else
@@ -1507,7 +1519,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
        cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
                                        st->min_vdisktime);
        cfq_log_cfqq(cfqq->cfqd, cfqq,
-                     "sl_used=%u disp=%u charge=%u iops=%u sect=%lu",
+                     "sl_used=%llu disp=%llu charge=%llu iops=%u sect=%lu",
                     used_sl, cfqq->slice_dispatch, charge,
                     iops_mode(cfqd), cfqq->nr_sectors);
        cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl);
@@ -1530,7 +1542,7 @@ static void cfq_init_cfqg_base(struct cfq_group *cfqg)
                *st = CFQ_RB_ROOT;
        RB_CLEAR_NODE(&cfqg->rb_node);
-        cfqg->ttime.last_end_request = jiffies;
+        cfqg->ttime.last_end_request = ktime_get_ns();
 }
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
@@ -2213,10 +2225,11 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 {
        struct rb_node **p, *parent;
        struct cfq_queue *__cfqq;
-        unsigned long rb_key;
+        u64 rb_key;
        struct cfq_rb_root *st;
        int left;
        int new_cfqq = 1;
+        u64 now = ktime_get_ns();
        st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq));
        if (cfq_class_idle(cfqq)) {
@@ -2226,7 +2239,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                        __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
                        rb_key += __cfqq->rb_key;
                } else
-                        rb_key += jiffies;
+                        rb_key += now;
        } else if (!add_front) {
                /*
                 * Get our rb key offset. Subtract any residual slice
@@ -2234,13 +2247,13 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                 * count indicates slice overrun, and this should position
                 * the next service time further away in the tree.
                 */
-                rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
+                rb_key = cfq_slice_offset(cfqd, cfqq) + now;
                rb_key -= cfqq->slice_resid;
                cfqq->slice_resid = 0;
        } else {
-                rb_key = -HZ;
+                rb_key = -NSEC_PER_SEC;
                __cfqq = cfq_rb_first(st);
-                rb_key += __cfqq ? __cfqq->rb_key : jiffies;
+                rb_key += __cfqq ? __cfqq->rb_key : now;
        }
        if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
@@ -2266,7 +2279,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                /*
                 * sort by key, that represents service time.
                 */
-                if (time_before(rb_key, __cfqq->rb_key))
+                if (rb_key < __cfqq->rb_key)
                        p = &parent->rb_left;
                else {
                        p = &parent->rb_right;
@@ -2461,10 +2474,10 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
 {
        elv_rb_del(&cfqq->sort_list, rq);
        cfqq->queued[rq_is_sync(rq)]--;
-        cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
+        cfqg_stats_update_io_remove(RQ_CFQG(rq), req_op(rq), rq->cmd_flags);
        cfq_add_rq_rb(rq);
        cfqg_stats_update_io_add(RQ_CFQG(rq), cfqq->cfqd->serving_group,
-                                 rq->cmd_flags);
+                                 req_op(rq), rq->cmd_flags);
 }
 static struct request *
@@ -2517,7 +2530,7 @@ static void cfq_remove_request(struct request *rq)
        cfq_del_rq_rb(rq);
        cfqq->cfqd->rq_queued--;
-        cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
+        cfqg_stats_update_io_remove(RQ_CFQG(rq), req_op(rq), rq->cmd_flags);
        if (rq->cmd_flags & REQ_PRIO) {
                WARN_ON(!cfqq->prio_pending);
                cfqq->prio_pending--;
@@ -2531,7 +2544,7 @@ static int cfq_merge(struct request_queue *q, struct request **req,
        struct request *__rq;
        __rq = cfq_find_rq_fmerge(cfqd, bio);
-        if (__rq && elv_rq_merge_ok(__rq, bio)) {
+        if (__rq && elv_bio_merge_ok(__rq, bio)) {
                *req = __rq;
                return ELEVATOR_FRONT_MERGE;
        }
@@ -2552,7 +2565,7 @@ static void cfq_merged_request(struct request_queue *q, struct request *req,
 static void cfq_bio_merged(struct request_queue *q, struct request *req,
                                struct bio *bio)
 {
-        cfqg_stats_update_io_merged(RQ_CFQG(req), bio->bi_rw);
+        cfqg_stats_update_io_merged(RQ_CFQG(req), bio_op(bio), bio->bi_rw);
 }
 static void
@@ -2566,7 +2579,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
         * reposition in fifo if next is older than rq
         */
        if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
-            time_before(next->fifo_time, rq->fifo_time) &&
+            next->fifo_time < rq->fifo_time &&
            cfqq == RQ_CFQQ(next)) {
                list_move(&rq->queuelist, &next->queuelist);
                rq->fifo_time = next->fifo_time;
@@ -2575,7 +2588,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
        if (cfqq->next_rq == next)
                cfqq->next_rq = rq;
        cfq_remove_request(next);
-        cfqg_stats_update_io_merged(RQ_CFQG(rq), next->cmd_flags);
+        cfqg_stats_update_io_merged(RQ_CFQG(rq), req_op(next), next->cmd_flags);
        cfqq = RQ_CFQQ(next);
        /*
@@ -2588,8 +2601,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
                cfq_del_cfqq_rr(cfqd, cfqq);
 }
-static int cfq_allow_merge(struct request_queue *q, struct request *rq,
+static int cfq_allow_bio_merge(struct request_queue *q, struct request *rq,
-                           struct bio *bio)
+                               struct bio *bio)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct cfq_io_cq *cic;
@@ -2613,9 +2626,15 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
        return cfqq == RQ_CFQQ(rq);
 }
+static int cfq_allow_rq_merge(struct request_queue *q, struct request *rq,
+                              struct request *next)
+{
+        return RQ_CFQQ(rq) == RQ_CFQQ(next);
+}
 static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-        del_timer(&cfqd->idle_slice_timer);
+        hrtimer_try_to_cancel(&cfqd->idle_slice_timer);
        cfqg_stats_update_idle_time(cfqq->cfqg);
 }
@@ -2627,7 +2646,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
                                cfqd->serving_wl_class, cfqd->serving_wl_type);
                cfqg_stats_update_avg_queue_size(cfqq->cfqg);
                cfqq->slice_start = 0;
-                cfqq->dispatch_start = jiffies;
+                cfqq->dispatch_start = ktime_get_ns();
                cfqq->allocated_slice = 0;
                cfqq->slice_end = 0;
                cfqq->slice_dispatch = 0;
@@ -2676,8 +2695,8 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                if (cfq_cfqq_slice_new(cfqq))
                        cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq);
                else
-                        cfqq->slice_resid = cfqq->slice_end - jiffies;
+                        cfqq->slice_resid = cfqq->slice_end - ktime_get_ns();
-                cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
+                cfq_log_cfqq(cfqd, cfqq, "resid=%lld", cfqq->slice_resid);
        }
        cfq_group_served(cfqd, cfqq->cfqg, cfqq);
@@ -2911,7 +2930,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
        struct cfq_queue *cfqq = cfqd->active_queue;
        struct cfq_rb_root *st = cfqq->service_tree;
        struct cfq_io_cq *cic;
-        unsigned long sl, group_idle = 0;
+        u64 sl, group_idle = 0;
+        u64 now = ktime_get_ns();
        /*
         * SSD device without seek penalty, disable idling. But only do so
@@ -2954,8 +2974,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
         * time slice.
         */
        if (sample_valid(cic->ttime.ttime_samples) &&
-            (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) {
+            (cfqq->slice_end - now < cic->ttime.ttime_mean)) {
-                cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
+                cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%llu",
                             cic->ttime.ttime_mean);
                return;
        }
@@ -2976,9 +2996,10 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
        else
                sl = cfqd->cfq_slice_idle;
-        mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
+        hrtimer_start(&cfqd->idle_slice_timer, ns_to_ktime(sl),
+                      HRTIMER_MODE_REL);
        cfqg_stats_set_start_idle_time(cfqq->cfqg);
-        cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl,
+        cfq_log_cfqq(cfqd, cfqq, "arm_idle: %llu group_idle: %d", sl,
                        group_idle ? 1 : 0);
 }
@@ -3018,7 +3039,7 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
                return NULL;
        rq = rq_entry_fifo(cfqq->fifo.next);
-        if (time_before(jiffies, rq->fifo_time))
+        if (ktime_get_ns() < rq->fifo_time)
                rq = NULL;
        cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
@@ -3096,14 +3117,14 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
        struct cfq_queue *queue;
        int i;
        bool key_valid = false;
-        unsigned long lowest_key = 0;
+        u64 lowest_key = 0;
        enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
        for (i = 0; i <= SYNC_WORKLOAD; ++i) {
                /* select the one with lowest rb_key */
                queue = cfq_rb_first(st_for(cfqg, wl_class, i));
                if (queue &&
-                    (!key_valid || time_before(queue->rb_key, lowest_key))) {
+                    (!key_valid || queue->rb_key < lowest_key)) {
                        lowest_key = queue->rb_key;
                        cur_best = i;
                        key_valid = true;
@@ -3116,11 +3137,12 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
 static void
 choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
-        unsigned slice;
+        u64 slice;
        unsigned count;
        struct cfq_rb_root *st;
-        unsigned group_slice;
+        u64 group_slice;
        enum wl_class_t original_class = cfqd->serving_wl_class;
+        u64 now = ktime_get_ns();
        /* Choose next priority. RT > BE > IDLE */
        if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
@@ -3129,7 +3151,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
                cfqd->serving_wl_class = BE_WORKLOAD;
        else {
                cfqd->serving_wl_class = IDLE_WORKLOAD;
-                cfqd->workload_expires = jiffies + 1;
+                cfqd->workload_expires = now + jiffies_to_nsecs(1);
                return;
        }
@@ -3147,7 +3169,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
        /*
         * check workload expiration, and that we still have other queues ready
         */
-        if (count && !time_after(jiffies, cfqd->workload_expires))
+        if (count && !(now > cfqd->workload_expires))
                return;
 new_workload:
@@ -3164,13 +3186,13 @@ new_workload:
         */
        group_slice = cfq_group_slice(cfqd, cfqg);
-        slice = group_slice * count /
+        slice = div_u64(group_slice * count,
                max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class],
                      cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd,
-                                        cfqg));
+                                        cfqg)));
        if (cfqd->serving_wl_type == ASYNC_WORKLOAD) {
-                unsigned int tmp;
+                u64 tmp;
                /*
                 * Async queues are currently system wide. Just taking
@@ -3181,19 +3203,19 @@ new_workload:
                 */
                tmp = cfqd->cfq_target_latency *
                        cfqg_busy_async_queues(cfqd, cfqg);
-                tmp = tmp/cfqd->busy_queues;
+                tmp = div_u64(tmp, cfqd->busy_queues);
-                slice = min_t(unsigned, slice, tmp);
+                slice = min_t(u64, slice, tmp);
                /* async workload slice is scaled down according to
                 * the sync/async slice ratio. */
-                slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1];
+                slice = div64_u64(slice*cfqd->cfq_slice[0], cfqd->cfq_slice[1]);
        } else
                /* sync workload slice is at least 2 * cfq_slice_idle */
                slice = max(slice, 2 * cfqd->cfq_slice_idle);
-        slice = max_t(unsigned, slice, CFQ_MIN_TT);
+        slice = max_t(u64, slice, CFQ_MIN_TT);
-        cfq_log(cfqd, "workload slice:%d", slice);
+        cfq_log(cfqd, "workload slice:%llu", slice);
-        cfqd->workload_expires = jiffies + slice;
+        cfqd->workload_expires = now + slice;
 }
 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
@@ -3211,16 +3233,17 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
 static void cfq_choose_cfqg(struct cfq_data *cfqd)
 {
        struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
+        u64 now = ktime_get_ns();
        cfqd->serving_group = cfqg;
        /* Restore the workload type data */
        if (cfqg->saved_wl_slice) {
-                cfqd->workload_expires = jiffies + cfqg->saved_wl_slice;
+                cfqd->workload_expires = now + cfqg->saved_wl_slice;
                cfqd->serving_wl_type = cfqg->saved_wl_type;
                cfqd->serving_wl_class = cfqg->saved_wl_class;
        } else
-                cfqd->workload_expires = jiffies - 1;
+                cfqd->workload_expires = now - 1;
        choose_wl_class_and_type(cfqd, cfqg);
 }
@@ -3232,6 +3255,7 @@ static void cfq_choose_cfqg(struct cfq_data *cfqd)
 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
 {
        struct cfq_queue *cfqq, *new_cfqq = NULL;
+        u64 now = ktime_get_ns();
        cfqq = cfqd->active_queue;
        if (!cfqq)
@@ -3292,7 +3316,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
         * flight or is idling for a new request, allow either of these
         * conditions to happen (or time out) before selecting a new queue.
         */
-        if (timer_pending(&cfqd->idle_slice_timer)) {
+        if (hrtimer_active(&cfqd->idle_slice_timer)) {
                cfqq = NULL;
                goto keep_queue;
        }
@@ -3303,7 +3327,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
         **/
        if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
            (cfq_cfqq_slice_new(cfqq) ||
-            (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) {
+            (cfqq->slice_end - now > now - cfqq->slice_start))) {
                cfq_clear_cfqq_deep(cfqq);
                cfq_clear_cfqq_idle_window(cfqq);
        }
@@ -3381,11 +3405,12 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
 static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
        struct cfq_queue *cfqq)
 {
+        u64 now = ktime_get_ns();
        /* the queue hasn't finished any request, can't estimate */
        if (cfq_cfqq_slice_new(cfqq))
                return true;
-        if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
+        if (now + cfqd->cfq_slice_idle * cfqq->dispatched > cfqq->slice_end)
-                cfqq->slice_end))
                return true;
        return false;
@@ -3460,10 +3485,10 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
         * based on the last sync IO we serviced
         */
        if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
-                unsigned long last_sync = jiffies - cfqd->last_delayed_sync;
+                u64 last_sync = ktime_get_ns() - cfqd->last_delayed_sync;
                unsigned int depth;
-                depth = last_sync / cfqd->cfq_slice[1];
+                depth = div64_u64(last_sync, cfqd->cfq_slice[1]);
                if (!depth && !cfqq->dispatched)
                        depth = 1;
                if (depth < max_dispatch)
@@ -3546,7 +3571,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
        if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
            cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
            cfq_class_idle(cfqq))) {
-                cfqq->slice_end = jiffies + 1;
+                cfqq->slice_end = ktime_get_ns() + 1;
                cfq_slice_expired(cfqd, 0);
        }
@@ -3624,7 +3649,7 @@ static void cfq_init_icq(struct io_cq *icq)
 {
        struct cfq_io_cq *cic = icq_to_cic(icq);
-        cic->ttime.last_end_request = jiffies;
+        cic->ttime.last_end_request = ktime_get_ns();
 }
 static void cfq_exit_icq(struct io_cq *icq)
@@ -3682,6 +3707,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct cfq_io_cq *cic)
         * elevate the priority of this queue
         */
        cfqq->org_ioprio = cfqq->ioprio;
+        cfqq->org_ioprio_class = cfqq->ioprio_class;
        cfq_clear_cfqq_prio_changed(cfqq);
 }
@@ -3845,14 +3871,15 @@ out:
 }
 static void
-__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
+__cfq_update_io_thinktime(struct cfq_ttime *ttime, u64 slice_idle)
 {
-        unsigned long elapsed = jiffies - ttime->last_end_request;
+        u64 elapsed = ktime_get_ns() - ttime->last_end_request;
        elapsed = min(elapsed, 2UL * slice_idle);
        ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
-        ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8;
+        ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed,  8);
-        ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples;
+        ttime->ttime_mean = div64_ul(ttime->ttime_total + 128,
+                                     ttime->ttime_samples);
 }
 static void
@@ -4105,10 +4132,10 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
        cfq_log_cfqq(cfqd, cfqq, "insert_request");
        cfq_init_prio_data(cfqq, RQ_CIC(rq));
-        rq->fifo_time = jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
+        rq->fifo_time = ktime_get_ns() + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
        list_add_tail(&rq->queuelist, &cfqq->fifo);
        cfq_add_rq_rb(rq);
-        cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group,
+        cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, req_op(rq),
                                 rq->cmd_flags);
        cfq_rq_enqueued(cfqd, cfqq, rq);
 }
@@ -4153,6 +4180,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd)
 static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
        struct cfq_io_cq *cic = cfqd->active_cic;
+        u64 now = ktime_get_ns();
        /* If the queue already has requests, don't wait */
        if (!RB_EMPTY_ROOT(&cfqq->sort_list))
@@ -4171,7 +4199,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
        /* if slice left is less than think time, wait busy */
        if (cic && sample_valid(cic->ttime.ttime_samples)
-            && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean))
+            && (cfqq->slice_end - now < cic->ttime.ttime_mean))
                return true;
        /*
@@ -4181,7 +4209,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
         * case where think time is less than a jiffy, mark the queue wait
         * busy if only 1 jiffy is left in the slice.
         */
-        if (cfqq->slice_end - jiffies == 1)
+        if (cfqq->slice_end - now <= jiffies_to_nsecs(1))
                return true;
        return false;
@@ -4192,9 +4220,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
        struct cfq_queue *cfqq = RQ_CFQQ(rq);
        struct cfq_data *cfqd = cfqq->cfqd;
        const int sync = rq_is_sync(rq);
-        unsigned long now;
+        u64 now = ktime_get_ns();
-        now = jiffies;
        cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d",
                     !!(rq->cmd_flags & REQ_NOIDLE));
@@ -4206,7 +4233,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
        cfqq->dispatched--;
        (RQ_CFQG(rq))->dispatched--;
        cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq),
-                                     rq_io_start_time_ns(rq), rq->cmd_flags);
+                                     rq_io_start_time_ns(rq), req_op(rq),
+                                     rq->cmd_flags);
        cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
@@ -4222,7 +4250,16 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
                                        cfqq_type(cfqq));
                st->ttime.last_end_request = now;
-                if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
+                /*
+                 * We have to do this check in jiffies since start_time is in
+                 * jiffies and it is not trivial to convert to ns. If
+                 * cfq_fifo_expire[1] ever comes close to 1 jiffie, this test
+                 * will become problematic but so far we are fine (the default
+                 * is 128 ms).
+                 */
+                if (!time_after(rq->start_time +
+                                  nsecs_to_jiffies(cfqd->cfq_fifo_expire[1]),
+                                jiffies))
                        cfqd->last_delayed_sync = now;
        }
@@ -4247,10 +4284,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
                 * the queue.
                 */
                if (cfq_should_wait_busy(cfqd, cfqq)) {
-                        unsigned long extend_sl = cfqd->cfq_slice_idle;
+                        u64 extend_sl = cfqd->cfq_slice_idle;
                        if (!cfqd->cfq_slice_idle)
                                extend_sl = cfqd->cfq_group_idle;
-                        cfqq->slice_end = jiffies + extend_sl;
+                        cfqq->slice_end = now + extend_sl;
                        cfq_mark_cfqq_wait_busy(cfqq);
                        cfq_log_cfqq(cfqd, cfqq, "will busy wait");
                }
@@ -4275,6 +4312,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
                cfq_schedule_dispatch(cfqd);
 }
+static void cfqq_boost_on_prio(struct cfq_queue *cfqq, int op_flags)
+{
+        /*
+         * If REQ_PRIO is set, boost class and prio level, if it's below
+         * BE/NORM. If prio is not set, restore the potentially boosted
+         * class/prio level.
+         */
+        if (!(op_flags & REQ_PRIO)) {
+                cfqq->ioprio_class = cfqq->org_ioprio_class;
+                cfqq->ioprio = cfqq->org_ioprio;
+        } else {
+                if (cfq_class_idle(cfqq))
+                        cfqq->ioprio_class = IOPRIO_CLASS_BE;
+                if (cfqq->ioprio > IOPRIO_NORM)
+                        cfqq->ioprio = IOPRIO_NORM;
+        }
+}
 static inline int __cfq_may_queue(struct cfq_queue *cfqq)
 {
        if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -4285,7 +4340,7 @@ static inline int __cfq_may_queue(struct cfq_queue *cfqq)
        return ELV_MQUEUE_MAY;
 }
-static int cfq_may_queue(struct request_queue *q, int rw)
+static int cfq_may_queue(struct request_queue *q, int op, int op_flags)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct task_struct *tsk = current;
@@ -4302,9 +4357,10 @@ static int cfq_may_queue(struct request_queue *q, int rw)
        if (!cic)
                return ELV_MQUEUE_MAY;
-        cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
+        cfqq = cic_to_cfqq(cic, rw_is_sync(op, op_flags));
        if (cfqq) {
                cfq_init_prio_data(cfqq, cic);
+                cfqq_boost_on_prio(cfqq, op_flags);
                return __cfq_may_queue(cfqq);
        }
@@ -4435,9 +4491,10 @@ static void cfq_kick_queue(struct work_struct *work)
 /*
 * Timer running if the active_queue is currently idling inside its time slice
 */
-static void cfq_idle_slice_timer(unsigned long data)
+static enum hrtimer_restart cfq_idle_slice_timer(struct hrtimer *timer)
 {
-        struct cfq_data *cfqd = (struct cfq_data *) data;
+        struct cfq_data *cfqd = container_of(timer, struct cfq_data,
+                                             idle_slice_timer);
        struct cfq_queue *cfqq;
        unsigned long flags;
        int timed_out = 1;
@@ -4486,11 +4543,12 @@ out_kick:
        cfq_schedule_dispatch(cfqd);
 out_cont:
        spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
+        return HRTIMER_NORESTART;
 }
 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
 {
-        del_timer_sync(&cfqd->idle_slice_timer);
+        hrtimer_cancel(&cfqd->idle_slice_timer);
        cancel_work_sync(&cfqd->unplug_work);
 }
@@ -4586,9 +4644,9 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e)
        cfqg_put(cfqd->root_group);
        spin_unlock_irq(q->queue_lock);
-        init_timer(&cfqd->idle_slice_timer);
+        hrtimer_init(&cfqd->idle_slice_timer, CLOCK_MONOTONIC,
+                     HRTIMER_MODE_REL);
        cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
-        cfqd->idle_slice_timer.data = (unsigned long) cfqd;
        INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
@@ -4609,7 +4667,7 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e)
         * we optimistically start assuming sync ops weren't delayed in last
         * second, in order to have larger depth for async operations.
         */
-        cfqd->last_delayed_sync = jiffies - HZ;
+        cfqd->last_delayed_sync = ktime_get_ns() - NSEC_PER_SEC;
        return 0;
 out_free:
@@ -4652,9 +4710,9 @@ cfq_var_store(unsigned int *var, const char *page, size_t count)
 static ssize_t __FUNC(struct elevator_queue *e, char *page)             \
 {                                                                       \
        struct cfq_data *cfqd = e->elevator_data;                       \
-        unsigned int __data = __VAR;                                    \
+        u64 __data = __VAR;                                             \
        if (__CONV)                                                     \
-                __data = jiffies_to_msecs(__data);                      \
+                __data = div_u64(__data, NSEC_PER_MSEC);                        \
        return cfq_var_show(__data, (page));                            \
 }
 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
@@ -4671,6 +4729,21 @@ SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
 SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1);
 #undef SHOW_FUNCTION
+#define USEC_SHOW_FUNCTION(__FUNC, __VAR)                               \
+static ssize_t __FUNC(struct elevator_queue *e, char *page)             \
+{                                                                       \
+        struct cfq_data *cfqd = e->elevator_data;                       \
+        u64 __data = __VAR;                                             \
+        __data = div_u64(__data, NSEC_PER_USEC);                        \
+        return cfq_var_show(__data, (page));                            \
+}
+USEC_SHOW_FUNCTION(cfq_slice_idle_us_show, cfqd->cfq_slice_idle);
+USEC_SHOW_FUNCTION(cfq_group_idle_us_show, cfqd->cfq_group_idle);
+USEC_SHOW_FUNCTION(cfq_slice_sync_us_show, cfqd->cfq_slice[1]);
+USEC_SHOW_FUNCTION(cfq_slice_async_us_show, cfqd->cfq_slice[0]);
+USEC_SHOW_FUNCTION(cfq_target_latency_us_show, cfqd->cfq_target_latency);
+#undef USEC_SHOW_FUNCTION
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)                 \
 static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
 {                                                                       \
@@ -4682,7 +4755,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
        else if (__data > (MAX))                                        \
                __data = (MAX);                                         \
        if (__CONV)                                                     \
-                *(__PTR) = msecs_to_jiffies(__data);                    \
+                *(__PTR) = (u64)__data * NSEC_PER_MSEC;                 \
        else                                                            \
                *(__PTR) = __data;                                      \
        return ret;                                                     \
@@ -4705,6 +4778,26 @@ STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
 STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, UINT_MAX, 1);
 #undef STORE_FUNCTION
+#define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX)                    \
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
+{                                                                       \
+        struct cfq_data *cfqd = e->elevator_data;                       \
+        unsigned int __data;                                            \
+        int ret = cfq_var_store(&__data, (page), count);                \
+        if (__data < (MIN))                                             \
+                __data = (MIN);                                         \
+        else if (__data > (MAX))                                        \
+                __data = (MAX);                                         \
+        *(__PTR) = (u64)__data * NSEC_PER_USEC;                         \
+        return ret;                                                     \
+}
+USEC_STORE_FUNCTION(cfq_slice_idle_us_store, &cfqd->cfq_slice_idle, 0, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_group_idle_us_store, &cfqd->cfq_group_idle, 0, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_slice_sync_us_store, &cfqd->cfq_slice[1], 1, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_slice_async_us_store, &cfqd->cfq_slice[0], 1, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_target_latency_us_store, &cfqd->cfq_target_latency, 1, UINT_MAX);
+#undef USEC_STORE_FUNCTION
 #define CFQ_ATTR(name) \
        __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
@@ -4715,12 +4808,17 @@ static struct elv_fs_entry cfq_attrs[] = {
        CFQ_ATTR(back_seek_max),
        CFQ_ATTR(back_seek_penalty),
        CFQ_ATTR(slice_sync),
+        CFQ_ATTR(slice_sync_us),
        CFQ_ATTR(slice_async),
+        CFQ_ATTR(slice_async_us),
        CFQ_ATTR(slice_async_rq),
        CFQ_ATTR(slice_idle),
+        CFQ_ATTR(slice_idle_us),
        CFQ_ATTR(group_idle),
+        CFQ_ATTR(group_idle_us),
        CFQ_ATTR(low_latency),
        CFQ_ATTR(target_latency),
+        CFQ_ATTR(target_latency_us),
        __ATTR_NULL
 };
@@ -4729,7 +4827,8 @@ static struct elevator_type iosched_cfq = {
                .elevator_merge_fn =            cfq_merge,
                .elevator_merged_fn =           cfq_merged_request,
                .elevator_merge_req_fn =        cfq_merged_requests,
-                .elevator_allow_merge_fn =      cfq_allow_merge,
+                .elevator_allow_bio_merge_fn =  cfq_allow_bio_merge,
+                .elevator_allow_rq_merge_fn =   cfq_allow_rq_merge,
                .elevator_bio_merged_fn =       cfq_bio_merged,
                .elevator_dispatch_fn =         cfq_dispatch_requests,
                .elevator_add_req_fn =          cfq_insert_request,
@@ -4776,18 +4875,7 @@ static int __init cfq_init(void)
 {
        int ret;
-        /*
-         * could be 0 on HZ < 1000 setups
-         */
-        if (!cfq_slice_async)
-                cfq_slice_async = 1;
-        if (!cfq_slice_idle)
-                cfq_slice_idle = 1;
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-        if (!cfq_group_idle)
-                cfq_group_idle = 1;
        ret = blkcg_policy_register(&blkcg_policy_cfq);
        if (ret)
                return ret;
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index d0dd7882d8c7..55e0bb6d7da7 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -137,7 +137,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
                if (__rq) {
                        BUG_ON(sector != blk_rq_pos(__rq));
-                        if (elv_rq_merge_ok(__rq, bio)) {
+                        if (elv_bio_merge_ok(__rq, bio)) {
                                ret = ELEVATOR_FRONT_MERGE;
                                goto out;
                        }
@@ -173,7 +173,8 @@ deadline_merged_requests(struct request_queue *q, struct request *req,
         * and move into next position (next will be deleted) in fifo
         */
        if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
-                if (time_before(next->fifo_time, req->fifo_time)) {
+                if (time_before((unsigned long)next->fifo_time,
+                                (unsigned long)req->fifo_time)) {
                        list_move(&req->queuelist, &next->queuelist);
                        req->fifo_time = next->fifo_time;
                }
@@ -227,7 +228,7 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
        /*
         * rq is expired!
         */
-        if (time_after_eq(jiffies, rq->fifo_time))
+        if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
                return 1;
        return 0;
diff --git a/block/elevator.c b/block/elevator.c
index c3555c9c672f..7096c22041e7 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -53,13 +53,13 @@ static LIST_HEAD(elv_list);
 * Query io scheduler to see if the current process issuing bio may be
 * merged with rq.
 */
-static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
+static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
 {
        struct request_queue *q = rq->q;
        struct elevator_queue *e = q->elevator;
-        if (e->type->ops.elevator_allow_merge_fn)
+        if (e->type->ops.elevator_allow_bio_merge_fn)
-                return e->type->ops.elevator_allow_merge_fn(q, rq, bio);
+                return e->type->ops.elevator_allow_bio_merge_fn(q, rq, bio);
        return 1;
 }
@@ -67,17 +67,17 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
 /*
 * can we safely merge with this request?
 */
-bool elv_rq_merge_ok(struct request *rq, struct bio *bio)
+bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
 {
        if (!blk_rq_merge_ok(rq, bio))
-                return 0;
+                return false;
-        if (!elv_iosched_allow_merge(rq, bio))
+        if (!elv_iosched_allow_bio_merge(rq, bio))
-                return 0;
+                return false;
-        return 1;
+        return true;
 }
-EXPORT_SYMBOL(elv_rq_merge_ok);
+EXPORT_SYMBOL(elv_bio_merge_ok);
 static struct elevator_type *elevator_find(const char *name)
 {
@@ -366,8 +366,7 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
        list_for_each_prev(entry, &q->queue_head) {
                struct request *pos = list_entry_rq(entry);
-                if ((rq->cmd_flags & REQ_DISCARD) !=
+                if ((req_op(rq) == REQ_OP_DISCARD) != (req_op(pos) == REQ_OP_DISCARD))
-                    (pos->cmd_flags & REQ_DISCARD))
                        break;
                if (rq_data_dir(rq) != rq_data_dir(pos))
                        break;
@@ -426,7 +425,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
        /*
         * First try one-hit cache.
         */
-        if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) {
+        if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
                ret = blk_try_merge(q->last_merge, bio);
                if (ret != ELEVATOR_NO_MERGE) {
                        *req = q->last_merge;
@@ -441,7 +440,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
         * See if our hash lookup can find a potential backmerge.
         */
        __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
-        if (__rq && elv_rq_merge_ok(__rq, bio)) {
+        if (__rq && elv_bio_merge_ok(__rq, bio)) {
                *req = __rq;
                return ELEVATOR_BACK_MERGE;
        }
@@ -717,12 +716,12 @@ void elv_put_request(struct request_queue *q, struct request *rq)
                e->type->ops.elevator_put_req_fn(rq);
 }
-int elv_may_queue(struct request_queue *q, int rw)
+int elv_may_queue(struct request_queue *q, int op, int op_flags)
 {
        struct elevator_queue *e = q->elevator;
        if (e->type->ops.elevator_may_queue_fn)
-                return e->type->ops.elevator_may_queue_fn(q, rw);
+                return e->type->ops.elevator_may_queue_fn(q, op, op_flags);
        return ELV_MQUEUE_MAY;
 }
diff --git a/block/partition-generic.c b/block/partition-generic.c
index d7eb77e1e3a8..71d9ed9df8da 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -495,7 +495,6 @@ rescan:
        /* add partitions */
        for (p = 1; p < state->limit; p++) {
                sector_t size, from;
-                struct partition_meta_info *info = NULL;
                size = state->parts[p].size;
                if (!size)
@@ -530,8 +529,6 @@ rescan:
                        }
                }
-                if (state->parts[p].has_info)
-                        info = &state->parts[p].info;
                part = add_partition(disk, p, from, size,
                                     state->parts[p].flags,
                                     &state->parts[p].info);
diff --git a/block/partitions/atari.c b/block/partitions/atari.c
index 9875b05e80a2..ff1fb93712c1 100644
--- a/block/partitions/atari.c
+++ b/block/partitions/atari.c
@@ -42,6 +42,13 @@ int atari_partition(struct parsed_partitions *state)
        int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */
 #endif
+        /*
+         * ATARI partition scheme supports 512 lba only.  If this is not
+         * the case, bail early to avoid miscalculating hd_size.
+         */
+        if (bdev_logical_block_size(state->bdev) != 512)
+                return 0;
        rs = read_part_sector(state, 0, &sect);
        if (!rs)
                return -1;
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-07-26 18:03:07 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-07-26 18:03:07 -0400
commit	d05d7f40791ccbb6e543cc5dd6a6aa08fc71d635 (patch)
tree	dc0039fe490a41a70de10d58fe8e6136db46463a /block
parent	75a442efb1ca613f8d1cc71a32c2c9b0aefae4a5 (diff)
parent	17007f3994cdb4643355c73f54f0adad006cf59e (diff)