11 files changed, 264 insertions, 228 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 859879d0a0bf..07ab75403e1a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -64,12 +64,11 @@ static struct workqueue_struct *kblockd_workqueue;
 static void drive_stat_acct(struct request *rq, int new_io)
 {
-        struct gendisk *disk = rq->rq_disk;
        struct hd_struct *part;
        int rw = rq_data_dir(rq);
        int cpu;
-        if (!blk_fs_request(rq) || !disk || !blk_do_io_stat(disk->queue))
+        if (!blk_fs_request(rq) || !blk_do_io_stat(rq))
                return;
        cpu = part_stat_lock();
@@ -485,11 +484,11 @@ static int blk_init_free_list(struct request_queue *q)
 {
        struct request_list *rl = &q->rq;
-        rl->count[READ] = rl->count[WRITE] = 0;
+        rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
-        rl->starved[READ] = rl->starved[WRITE] = 0;
+        rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
        rl->elvpriv = 0;
-        init_waitqueue_head(&rl->wait[READ]);
+        init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
-        init_waitqueue_head(&rl->wait[WRITE]);
+        init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
        rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
                                mempool_free_slab, request_cachep, q->node);
@@ -604,13 +603,10 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
        q->queue_flags          = QUEUE_FLAG_DEFAULT;
        q->queue_lock           = lock;
-        blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK);
+        /*
+         * This also sets hw/phys segments, boundary and size
+         */
        blk_queue_make_request(q, __make_request);
-        blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
-        blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
-        blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
        q->sg_reserved_size = INT_MAX;
@@ -703,18 +699,18 @@ static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
        ioc->last_waited = jiffies;
 }
-static void __freed_request(struct request_queue *q, int rw)
+static void __freed_request(struct request_queue *q, int sync)
 {
        struct request_list *rl = &q->rq;
-        if (rl->count[rw] < queue_congestion_off_threshold(q))
+        if (rl->count[sync] < queue_congestion_off_threshold(q))
-                blk_clear_queue_congested(q, rw);
+                blk_clear_queue_congested(q, sync);
-        if (rl->count[rw] + 1 <= q->nr_requests) {
+        if (rl->count[sync] + 1 <= q->nr_requests) {
-                if (waitqueue_active(&rl->wait[rw]))
+                if (waitqueue_active(&rl->wait[sync]))
-                        wake_up(&rl->wait[rw]);
+                        wake_up(&rl->wait[sync]);
-                blk_clear_queue_full(q, rw);
+                blk_clear_queue_full(q, sync);
        }
 }
@@ -722,21 +718,20 @@ static void __freed_request(struct request_queue *q, int rw)
 * A request has just been released.  Account for it, update the full and
 * congestion status, wake up any waiters.   Called under q->queue_lock.
 */
-static void freed_request(struct request_queue *q, int rw, int priv)
+static void freed_request(struct request_queue *q, int sync, int priv)
 {
        struct request_list *rl = &q->rq;
-        rl->count[rw]--;
+        rl->count[sync]--;
        if (priv)
                rl->elvpriv--;
-        __freed_request(q, rw);
+        __freed_request(q, sync);
-        if (unlikely(rl->starved[rw ^ 1]))
+        if (unlikely(rl->starved[sync ^ 1]))
-                __freed_request(q, rw ^ 1);
+                __freed_request(q, sync ^ 1);
 }
-#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
 /*
 * Get a free request, queue_lock must be held.
 * Returns NULL on failure, with queue_lock held.
@@ -748,15 +743,15 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
        struct request *rq = NULL;
        struct request_list *rl = &q->rq;
        struct io_context *ioc = NULL;
-        const int rw = rw_flags & 0x01;
+        const bool is_sync = rw_is_sync(rw_flags) != 0;
        int may_queue, priv;
        may_queue = elv_may_queue(q, rw_flags);
        if (may_queue == ELV_MQUEUE_NO)
                goto rq_starved;
-        if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
+        if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
-                if (rl->count[rw]+1 >= q->nr_requests) {
+                if (rl->count[is_sync]+1 >= q->nr_requests) {
                        ioc = current_io_context(GFP_ATOMIC, q->node);
                        /*
                         * The queue will fill after this allocation, so set
@@ -764,9 +759,9 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
                         * This process will be allowed to complete a batch of
                         * requests, others will be blocked.
                         */
-                        if (!blk_queue_full(q, rw)) {
+                        if (!blk_queue_full(q, is_sync)) {
                                ioc_set_batching(q, ioc);
-                                blk_set_queue_full(q, rw);
+                                blk_set_queue_full(q, is_sync);
                        } else {
                                if (may_queue != ELV_MQUEUE_MUST
                                                && !ioc_batching(q, ioc)) {
@@ -779,7 +774,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
                                }
                        }
                }
-                blk_set_queue_congested(q, rw);
+                blk_set_queue_congested(q, is_sync);
        }
        /*
@@ -787,11 +782,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
         * limit of requests, otherwise we could have thousands of requests
         * allocated with any setting of ->nr_requests
         */
-        if (rl->count[rw] >= (3 * q->nr_requests / 2))
+        if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
                goto out;
-        rl->count[rw]++;
+        rl->count[is_sync]++;
-        rl->starved[rw] = 0;
+        rl->starved[is_sync] = 0;
        priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
        if (priv)
@@ -809,7 +804,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
                 * wait queue, but this is pretty rare.
                 */
                spin_lock_irq(q->queue_lock);
-                freed_request(q, rw, priv);
+                freed_request(q, is_sync, priv);
                /*
                 * in the very unlikely event that allocation failed and no
@@ -819,8 +814,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
                 * rq mempool into READ and WRITE
                 */
 rq_starved:
-                if (unlikely(rl->count[rw] == 0))
+                if (unlikely(rl->count[is_sync] == 0))
-                        rl->starved[rw] = 1;
+                        rl->starved[is_sync] = 1;
                goto out;
        }
@@ -834,7 +829,7 @@ rq_starved:
        if (ioc_batching(q, ioc))
                ioc->nr_batch_requests--;
-        trace_block_getrq(q, bio, rw);
+        trace_block_getrq(q, bio, rw_flags & 1);
 out:
        return rq;
 }
@@ -848,7 +843,7 @@ out:
 static struct request *get_request_wait(struct request_queue *q, int rw_flags,
                                        struct bio *bio)
 {
-        const int rw = rw_flags & 0x01;
+        const bool is_sync = rw_is_sync(rw_flags) != 0;
        struct request *rq;
        rq = get_request(q, rw_flags, bio, GFP_NOIO);
@@ -857,10 +852,10 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
                struct io_context *ioc;
                struct request_list *rl = &q->rq;
-                prepare_to_wait_exclusive(&rl->wait[rw], &wait,
+                prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
                                TASK_UNINTERRUPTIBLE);
-                trace_block_sleeprq(q, bio, rw);
+                trace_block_sleeprq(q, bio, rw_flags & 1);
                __generic_unplug_device(q);
                spin_unlock_irq(q->queue_lock);
@@ -876,7 +871,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
                ioc_set_batching(q, ioc);
                spin_lock_irq(q->queue_lock);
-                finish_wait(&rl->wait[rw], &wait);
+                finish_wait(&rl->wait[is_sync], &wait);
                rq = get_request(q, rw_flags, bio, GFP_NOIO);
        };
@@ -1067,19 +1062,22 @@ void __blk_put_request(struct request_queue *q, struct request *req)
        elv_completed_request(q, req);
+        /* this is a bio leak */
+        WARN_ON(req->bio != NULL);
        /*
         * Request may not have originated from ll_rw_blk. if not,
         * it didn't come out of our reserved rq pools
         */
        if (req->cmd_flags & REQ_ALLOCED) {
-                int rw = rq_data_dir(req);
+                int is_sync = rq_is_sync(req) != 0;
                int priv = req->cmd_flags & REQ_ELVPRIV;
                BUG_ON(!list_empty(&req->queuelist));
                BUG_ON(!hlist_unhashed(&req->hash));
                blk_free_request(q, req);
-                freed_request(q, rw, priv);
+                freed_request(q, is_sync, priv);
        }
 }
 EXPORT_SYMBOL_GPL(__blk_put_request);
@@ -1126,10 +1124,10 @@ void init_request_from_bio(struct request *req, struct bio *bio)
        if (bio_sync(bio))
                req->cmd_flags |= REQ_RW_SYNC;
-        if (bio_unplug(bio))
-                req->cmd_flags |= REQ_UNPLUG;
        if (bio_rw_meta(bio))
                req->cmd_flags |= REQ_RW_META;
+        if (bio_noidle(bio))
+                req->cmd_flags |= REQ_NOIDLE;
        req->errors = 0;
        req->hard_sector = req->sector = bio->bi_sector;
@@ -1138,6 +1136,15 @@ void init_request_from_bio(struct request *req, struct bio *bio)
        blk_rq_bio_prep(req->q, req, bio);
 }
+/*
+ * Only disabling plugging for non-rotational devices if it does tagging
+ * as well, otherwise we do need the proper merging
+ */
+static inline bool queue_should_plug(struct request_queue *q)
+{
+        return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
+}
 static int __make_request(struct request_queue *q, struct bio *bio)
 {
        struct request *req;
@@ -1244,11 +1251,11 @@ get_rq:
        if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
            bio_flagged(bio, BIO_CPU_AFFINE))
                req->cpu = blk_cpu_to_group(smp_processor_id());
-        if (!blk_queue_nonrot(q) && elv_queue_empty(q))
+        if (queue_should_plug(q) && elv_queue_empty(q))
                blk_plug_device(q);
        add_request(q, req);
 out:
-        if (unplug || blk_queue_nonrot(q))
+        if (unplug || !queue_should_plug(q))
                __generic_unplug_device(q);
        spin_unlock_irq(q->queue_lock);
        return 0;
@@ -1666,9 +1673,7 @@ EXPORT_SYMBOL(blkdev_dequeue_request);
 static void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
-        struct gendisk *disk = req->rq_disk;
+        if (!blk_do_io_stat(req))
-        if (!disk || !blk_do_io_stat(disk->queue))
                return;
        if (blk_fs_request(req)) {
@@ -1685,9 +1690,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
 static void blk_account_io_done(struct request *req)
 {
-        struct gendisk *disk = req->rq_disk;
+        if (!blk_do_io_stat(req))
-        if (!disk || !blk_do_io_stat(disk->queue))
                return;
        /*
@@ -1702,7 +1705,7 @@ static void blk_account_io_done(struct request *req)
                int cpu;
                cpu = part_stat_lock();
-                part = disk_map_sector_rcu(disk, req->sector);
+                part = disk_map_sector_rcu(req->rq_disk, req->sector);
                part_stat_inc(cpu, part, ios[rw]);
                part_stat_add(cpu, part, ticks[rw], duration);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 5a244f05360f..63760ca3da0f 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -338,6 +338,22 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
        return 1;
 }
+static void blk_account_io_merge(struct request *req)
+{
+        if (blk_do_io_stat(req)) {
+                struct hd_struct *part;
+                int cpu;
+                cpu = part_stat_lock();
+                part = disk_map_sector_rcu(req->rq_disk, req->sector);
+                part_round_stats(cpu, part);
+                part_dec_in_flight(part);
+                part_stat_unlock();
+        }
+}
 /*
 * Has to be called with the request spinlock acquired
 */
@@ -386,23 +402,14 @@ static int attempt_merge(struct request_queue *q, struct request *req,
        elv_merge_requests(q, req, next);
-        if (req->rq_disk) {
+        blk_account_io_merge(req);
-                struct hd_struct *part;
-                int cpu;
-                cpu = part_stat_lock();
-                part = disk_map_sector_rcu(req->rq_disk, req->sector);
-                part_round_stats(cpu, part);
-                part_dec_in_flight(part);
-                part_stat_unlock();
-        }
        req->ioprio = ioprio_best(req->ioprio, next->ioprio);
        if (blk_rq_cpu_valid(next))
                req->cpu = next->cpu;
+        /* owner-ship of bio passed from next to req */
+        next->bio = NULL;
        __blk_put_request(q, next);
        return 1;
 }
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 59fd05d9f1d5..69c42adde52b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -431,7 +431,7 @@ EXPORT_SYMBOL(blk_queue_segment_boundary);
 *
 * description:
 *    set required memory and length alignment for direct dma transactions.
- *    this is used when buiding direct io requests for the queue.
+ *    this is used when building direct io requests for the queue.
 *
 **/
 void blk_queue_dma_alignment(struct request_queue *q, int mask)
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index ce0efc6b26dc..ee9c21602228 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -64,7 +64,7 @@ static int raise_blk_irq(int cpu, struct request *rq)
                data->info = rq;
                data->flags = 0;
-                __smp_call_function_single(cpu, data);
+                __smp_call_function_single(cpu, data, 0);
                return 0;
        }
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e29ddfc73cf4..73f36beff5cd 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -48,28 +48,28 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
        q->nr_requests = nr;
        blk_queue_congestion_threshold(q);
-        if (rl->count[READ] >= queue_congestion_on_threshold(q))
+        if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
-                blk_set_queue_congested(q, READ);
+                blk_set_queue_congested(q, BLK_RW_SYNC);
-        else if (rl->count[READ] < queue_congestion_off_threshold(q))
+        else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
-                blk_clear_queue_congested(q, READ);
+                blk_clear_queue_congested(q, BLK_RW_SYNC);
-        if (rl->count[WRITE] >= queue_congestion_on_threshold(q))
+        if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
-                blk_set_queue_congested(q, WRITE);
+                blk_set_queue_congested(q, BLK_RW_ASYNC);
-        else if (rl->count[WRITE] < queue_congestion_off_threshold(q))
+        else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
-                blk_clear_queue_congested(q, WRITE);
+                blk_clear_queue_congested(q, BLK_RW_ASYNC);
-        if (rl->count[READ] >= q->nr_requests) {
+        if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
-                blk_set_queue_full(q, READ);
+                blk_set_queue_full(q, BLK_RW_SYNC);
-        } else if (rl->count[READ]+1 <= q->nr_requests) {
+        } else if (rl->count[BLK_RW_SYNC]+1 <= q->nr_requests) {
-                blk_clear_queue_full(q, READ);
+                blk_clear_queue_full(q, BLK_RW_SYNC);
-                wake_up(&rl->wait[READ]);
+                wake_up(&rl->wait[BLK_RW_SYNC]);
        }
-        if (rl->count[WRITE] >= q->nr_requests) {
+        if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
-                blk_set_queue_full(q, WRITE);
+                blk_set_queue_full(q, BLK_RW_ASYNC);
-        } else if (rl->count[WRITE]+1 <= q->nr_requests) {
+        } else if (rl->count[BLK_RW_ASYNC]+1 <= q->nr_requests) {
-                blk_clear_queue_full(q, WRITE);
+                blk_clear_queue_full(q, BLK_RW_ASYNC);
-                wake_up(&rl->wait[WRITE]);
+                wake_up(&rl->wait[BLK_RW_ASYNC]);
        }
        spin_unlock_irq(q->queue_lock);
        return ret;
@@ -209,10 +209,14 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
        ssize_t ret = queue_var_store(&stats, page, count);
        spin_lock_irq(q->queue_lock);
+        elv_quisce_start(q);
        if (stats)
                queue_flag_set(QUEUE_FLAG_IO_STAT, q);
        else
                queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
+        elv_quisce_end(q);
        spin_unlock_irq(q->queue_lock);
        return ret;
diff --git a/block/blk.h b/block/blk.h
index 0dce92c37496..24fcaeeaf620 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -70,6 +70,10 @@ void blk_queue_congestion_threshold(struct request_queue *q);
 int blk_dev_init(void);
+void elv_quisce_start(struct request_queue *q);
+void elv_quisce_end(struct request_queue *q);
 /*
 * Return the threshold (number of used requests) at which the queue is
 * considered to be congested.  It include a little hysteresis to keep the
@@ -102,18 +106,20 @@ static inline int blk_cpu_to_group(int cpu)
        const struct cpumask *mask = cpu_coregroup_mask(cpu);
        return cpumask_first(mask);
 #elif defined(CONFIG_SCHED_SMT)
-        return first_cpu(per_cpu(cpu_sibling_map, cpu));
+        return cpumask_first(topology_thread_cpumask(cpu));
 #else
        return cpu;
 #endif
 }
-static inline int blk_do_io_stat(struct request_queue *q)
+static inline int blk_do_io_stat(struct request *rq)
 {
-        if (q)
+        struct gendisk *disk = rq->rq_disk;
-                return blk_queue_io_stat(q);
-        return 0;
+        if (!disk || !disk->queue)
+                return 0;
+        return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV);
 }
 #endif
diff --git a/block/bsg.c b/block/bsg.c
index 0ce8806dd0c1..206060e795da 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -218,9 +218,6 @@ bsg_validate_sgv4_hdr(struct request_queue *q, struct sg_io_v4 *hdr, int *rw)
        if (hdr->guard != 'Q')
                return -EINVAL;
-        if (hdr->dout_xfer_len > (q->max_sectors << 9) ||
-            hdr->din_xfer_len > (q->max_sectors << 9))
-                return -EIO;
        switch (hdr->protocol) {
        case BSG_PROTOCOL_SCSI:
@@ -353,6 +350,8 @@ static void bsg_rq_end_io(struct request *rq, int uptodate)
 static void bsg_add_command(struct bsg_device *bd, struct request_queue *q,
                            struct bsg_command *bc, struct request *rq)
 {
+        int at_head = (0 == (bc->hdr.flags & BSG_FLAG_Q_AT_TAIL));
        /*
         * add bc command to busy queue and submit rq for io
         */
@@ -368,7 +367,7 @@ static void bsg_add_command(struct bsg_device *bd, struct request_queue *q,
        dprintk("%s: queueing rq %p, bc %p\n", bd->name, rq, bc);
        rq->end_io_data = bc;
-        blk_execute_rq_nowait(q, NULL, rq, 1, bsg_rq_end_io);
+        blk_execute_rq_nowait(q, NULL, rq, at_head, bsg_rq_end_io);
 }
 static struct bsg_command *bsg_next_done_cmd(struct bsg_device *bd)
@@ -924,6 +923,7 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                struct request *rq;
                struct bio *bio, *bidi_bio = NULL;
                struct sg_io_v4 hdr;
+                int at_head;
                u8 sense[SCSI_SENSE_BUFFERSIZE];
                if (copy_from_user(&hdr, uarg, sizeof(hdr)))
@@ -936,7 +936,9 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                bio = rq->bio;
                if (rq->next_rq)
                        bidi_bio = rq->next_rq->bio;
-                blk_execute_rq(bd->queue, NULL, rq, 0);
+                at_head = (0 == (hdr.flags & BSG_FLAG_Q_AT_TAIL));
+                blk_execute_rq(bd->queue, NULL, rq, at_head);
                ret = blk_complete_sgv4_hdr_rq(rq, &hdr, bio, bidi_bio);
                if (copy_to_user(uarg, &hdr, sizeof(hdr)))
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 664ebfd092ec..a4809de6fea6 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -160,6 +160,7 @@ struct cfq_queue {
        unsigned long slice_end;
        long slice_resid;
+        unsigned int slice_dispatch;
        /* pending metadata requests */
        int meta_pending;
@@ -176,13 +177,12 @@ struct cfq_queue {
 enum cfqq_state_flags {
        CFQ_CFQQ_FLAG_on_rr = 0,        /* on round-robin busy list */
        CFQ_CFQQ_FLAG_wait_request,     /* waiting for a request */
+        CFQ_CFQQ_FLAG_must_dispatch,    /* must be allowed a dispatch */
        CFQ_CFQQ_FLAG_must_alloc,       /* must be allowed rq alloc */
        CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */
-        CFQ_CFQQ_FLAG_must_dispatch,    /* must dispatch, even if expired */
        CFQ_CFQQ_FLAG_fifo_expire,      /* FIFO checked in this slice */
        CFQ_CFQQ_FLAG_idle_window,      /* slice idling enabled */
        CFQ_CFQQ_FLAG_prio_changed,     /* task priority has changed */
-        CFQ_CFQQ_FLAG_queue_new,        /* queue never been serviced */
        CFQ_CFQQ_FLAG_slice_new,        /* no requests dispatched in slice */
        CFQ_CFQQ_FLAG_sync,             /* synchronous queue */
 };
@@ -203,13 +203,12 @@ static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq)		\
 CFQ_CFQQ_FNS(on_rr);
 CFQ_CFQQ_FNS(wait_request);
+CFQ_CFQQ_FNS(must_dispatch);
 CFQ_CFQQ_FNS(must_alloc);
 CFQ_CFQQ_FNS(must_alloc_slice);
-CFQ_CFQQ_FNS(must_dispatch);
 CFQ_CFQQ_FNS(fifo_expire);
 CFQ_CFQQ_FNS(idle_window);
 CFQ_CFQQ_FNS(prio_changed);
-CFQ_CFQQ_FNS(queue_new);
 CFQ_CFQQ_FNS(slice_new);
 CFQ_CFQQ_FNS(sync);
 #undef CFQ_CFQQ_FNS
@@ -774,10 +773,15 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
        if (cfqq) {
                cfq_log_cfqq(cfqd, cfqq, "set_active");
                cfqq->slice_end = 0;
+                cfqq->slice_dispatch = 0;
+                cfq_clear_cfqq_wait_request(cfqq);
+                cfq_clear_cfqq_must_dispatch(cfqq);
                cfq_clear_cfqq_must_alloc_slice(cfqq);
                cfq_clear_cfqq_fifo_expire(cfqq);
                cfq_mark_cfqq_slice_new(cfqq);
-                cfq_clear_cfqq_queue_new(cfqq);
+                del_timer(&cfqd->idle_slice_timer);
        }
        cfqd->active_queue = cfqq;
@@ -795,7 +799,6 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        if (cfq_cfqq_wait_request(cfqq))
                del_timer(&cfqd->idle_slice_timer);
-        cfq_clear_cfqq_must_dispatch(cfqq);
        cfq_clear_cfqq_wait_request(cfqq);
        /*
@@ -924,7 +927,6 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
            (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2))
                return;
-        cfq_mark_cfqq_must_dispatch(cfqq);
        cfq_mark_cfqq_wait_request(cfqq);
        /*
@@ -1010,7 +1012,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
        /*
         * The active queue has run out of time, expire it and select new.
         */
-        if (cfq_slice_used(cfqq))
+        if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq))
                goto expire;
        /*
@@ -1053,66 +1055,6 @@ keep_queue:
        return cfqq;
 }
-/*
- * Dispatch some requests from cfqq, moving them to the request queue
- * dispatch list.
- */
-static int
-__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-                        int max_dispatch)
-{
-        int dispatched = 0;
-        BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
-        do {
-                struct request *rq;
-                /*
-                 * follow expired path, else get first next available
-                 */
-                rq = cfq_check_fifo(cfqq);
-                if (rq == NULL)
-                        rq = cfqq->next_rq;
-                /*
-                 * finally, insert request into driver dispatch list
-                 */
-                cfq_dispatch_insert(cfqd->queue, rq);
-                dispatched++;
-                if (!cfqd->active_cic) {
-                        atomic_inc(&RQ_CIC(rq)->ioc->refcount);
-                        cfqd->active_cic = RQ_CIC(rq);
-                }
-                if (RB_EMPTY_ROOT(&cfqq->sort_list))
-                        break;
-                /*
-                 * If there is a non-empty RT cfqq waiting for current
-                 * cfqq's timeslice to complete, pre-empt this cfqq
-                 */
-                if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues)
-                        break;
-        } while (dispatched < max_dispatch);
-        /*
-         * expire an async queue immediately if it has used up its slice. idle
-         * queue always expire after 1 dispatch round.
-         */
-        if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
-            dispatched >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
-            cfq_class_idle(cfqq))) {
-                cfqq->slice_end = jiffies + 1;
-                cfq_slice_expired(cfqd, 0);
-        }
-        return dispatched;
-}
 static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
 {
        int dispatched = 0;
@@ -1146,11 +1088,45 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
        return dispatched;
 }
+/*
+ * Dispatch a request from cfqq, moving them to the request queue
+ * dispatch list.
+ */
+static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+        struct request *rq;
+        BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
+        /*
+         * follow expired path, else get first next available
+         */
+        rq = cfq_check_fifo(cfqq);
+        if (!rq)
+                rq = cfqq->next_rq;
+        /*
+         * insert request into driver dispatch list
+         */
+        cfq_dispatch_insert(cfqd->queue, rq);
+        if (!cfqd->active_cic) {
+                struct cfq_io_context *cic = RQ_CIC(rq);
+                atomic_inc(&cic->ioc->refcount);
+                cfqd->active_cic = cic;
+        }
+}
+/*
+ * Find the cfqq that we need to service and move a request from that to the
+ * dispatch list
+ */
 static int cfq_dispatch_requests(struct request_queue *q, int force)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct cfq_queue *cfqq;
-        int dispatched;
+        unsigned int max_dispatch;
        if (!cfqd->busy_queues)
                return 0;
@@ -1158,29 +1134,63 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
        if (unlikely(force))
                return cfq_forced_dispatch(cfqd);
-        dispatched = 0;
+        cfqq = cfq_select_queue(cfqd);
-        while ((cfqq = cfq_select_queue(cfqd)) != NULL) {
+        if (!cfqq)
-                int max_dispatch;
+                return 0;
+        /*
+         * If this is an async queue and we have sync IO in flight, let it wait
+         */
+        if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
+                return 0;
+        max_dispatch = cfqd->cfq_quantum;
+        if (cfq_class_idle(cfqq))
+                max_dispatch = 1;
-                max_dispatch = cfqd->cfq_quantum;
+        /*
+         * Does this cfqq already have too much IO in flight?
+         */
+        if (cfqq->dispatched >= max_dispatch) {
+                /*
+                 * idle queue must always only have a single IO in flight
+                 */
                if (cfq_class_idle(cfqq))
-                        max_dispatch = 1;
+                        return 0;
-                if (cfqq->dispatched >= max_dispatch && cfqd->busy_queues > 1)
+                /*
-                        break;
+                 * We have other queues, don't allow more IO from this one
+                 */
+                if (cfqd->busy_queues > 1)
+                        return 0;
-                if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
+                /*
-                        break;
+                 * we are the only queue, allow up to 4 times of 'quantum'
+                 */
+                if (cfqq->dispatched >= 4 * max_dispatch)
+                        return 0;
+        }
-                cfq_clear_cfqq_must_dispatch(cfqq);
+        /*
-                cfq_clear_cfqq_wait_request(cfqq);
+         * Dispatch a request from this cfqq
-                del_timer(&cfqd->idle_slice_timer);
+         */
+        cfq_dispatch_request(cfqd, cfqq);
+        cfqq->slice_dispatch++;
+        cfq_clear_cfqq_must_dispatch(cfqq);
-                dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
+        /*
+         * expire an async queue immediately if it has used up its slice. idle
+         * queue always expire after 1 dispatch round.
+         */
+        if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
+            cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
+            cfq_class_idle(cfqq))) {
+                cfqq->slice_end = jiffies + 1;
+                cfq_slice_expired(cfqd, 0);
        }
-        cfq_log(cfqd, "dispatched=%d", dispatched);
+        cfq_log(cfqd, "dispatched a request");
-        return dispatched;
+        return 1;
 }
 /*
@@ -1506,7 +1516,6 @@ retry:
                cfqq->cfqd = cfqd;
                cfq_mark_cfqq_prio_changed(cfqq);
-                cfq_mark_cfqq_queue_new(cfqq);
                cfq_init_prio_data(cfqq, ioc);
@@ -1893,15 +1902,13 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        if (cfqq == cfqd->active_queue) {
                /*
-                 * if we are waiting for a request for this queue, let it rip
+                 * Remember that we saw a request from this process, but
-                 * immediately and flag that we must not expire this queue
+                 * don't start queuing just yet. Otherwise we risk seeing lots
-                 * just now
+                 * of tiny requests, because we disrupt the normal plugging
+                 * and merging.
                 */
-                if (cfq_cfqq_wait_request(cfqq)) {
+                if (cfq_cfqq_wait_request(cfqq))
                        cfq_mark_cfqq_must_dispatch(cfqq);
-                        del_timer(&cfqd->idle_slice_timer);
-                        blk_start_queueing(cfqd->queue);
-                }
        } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
                /*
                 * not the active queue - expire current slice if it is
@@ -1910,7 +1917,6 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                 * this new queue is RT and the current one is BE
                 */
                cfq_preempt_queue(cfqd, cfqq);
-                cfq_mark_cfqq_must_dispatch(cfqq);
                blk_start_queueing(cfqd->queue);
        }
 }
@@ -1992,8 +1998,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
                }
                if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
                        cfq_slice_expired(cfqd, 1);
-                else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list))
+                else if (sync && !rq_noidle(rq) &&
+                         RB_EMPTY_ROOT(&cfqq->sort_list)) {
                        cfq_arm_slice_timer(cfqd);
+                }
        }
        if (!cfqd->rq_in_driver)
@@ -2170,6 +2178,12 @@ static void cfq_idle_slice_timer(unsigned long data)
                timed_out = 0;
                /*
+                 * We saw a request before the queue expired, let it through
+                 */
+                if (cfq_cfqq_must_dispatch(cfqq))
+                        goto out_kick;
+                /*
                 * expired
                 */
                if (cfq_slice_used(cfqq))
@@ -2185,10 +2199,8 @@ static void cfq_idle_slice_timer(unsigned long data)
                /*
                 * not expired and it has a request pending, let it dispatch
                 */
-                if (!RB_EMPTY_ROOT(&cfqq->sort_list)) {
+                if (!RB_EMPTY_ROOT(&cfqq->sort_list))
-                        cfq_mark_cfqq_must_dispatch(cfqq);
                        goto out_kick;
-                }
        }
 expire:
        cfq_slice_expired(cfqd, timed_out);
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
index 504b275e1b90..572bbc2f900d 100644
--- a/block/cmd-filter.c
+++ b/block/cmd-filter.c
@@ -22,6 +22,7 @@
 #include <linux/spinlock.h>
 #include <linux/capability.h>
 #include <linux/bitops.h>
+#include <linux/blkdev.h>
 #include <scsi/scsi.h>
 #include <linux/cdrom.h>
diff --git a/block/elevator.c b/block/elevator.c
index 98259eda0ef6..fb81bcc14a8c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -573,7 +573,7 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
        elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
 }
-static void elv_drain_elevator(struct request_queue *q)
+void elv_drain_elevator(struct request_queue *q)
 {
        static int printed;
        while (q->elevator->ops->elevator_dispatch_fn(q, 1))
@@ -587,6 +587,31 @@ static void elv_drain_elevator(struct request_queue *q)
        }
 }
+/*
+ * Call with queue lock held, interrupts disabled
+ */
+void elv_quisce_start(struct request_queue *q)
+{
+        queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
+        /*
+         * make sure we don't have any requests in flight
+         */
+        elv_drain_elevator(q);
+        while (q->rq.elvpriv) {
+                blk_start_queueing(q);
+                spin_unlock_irq(q->queue_lock);
+                msleep(10);
+                spin_lock_irq(q->queue_lock);
+                elv_drain_elevator(q);
+        }
+}
+void elv_quisce_end(struct request_queue *q)
+{
+        queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
+}
 void elv_insert(struct request_queue *q, struct request *rq, int where)
 {
        struct list_head *pos;
@@ -677,7 +702,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
        }
        if (unplug_it && blk_queue_plugged(q)) {
-                int nrq = q->rq.count[READ] + q->rq.count[WRITE]
+                int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
                        - q->in_flight;
                if (nrq >= q->unplug_thresh)
@@ -1101,18 +1126,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
         * Turn on BYPASS and drain all requests w/ elevator private data
         */
        spin_lock_irq(q->queue_lock);
+        elv_quisce_start(q);
-        queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
-        elv_drain_elevator(q);
-        while (q->rq.elvpriv) {
-                blk_start_queueing(q);
-                spin_unlock_irq(q->queue_lock);
-                msleep(10);
-                spin_lock_irq(q->queue_lock);
-                elv_drain_elevator(q);
-        }
        /*
         * Remember old elevator.
@@ -1136,7 +1150,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
         */
        elevator_exit(old_elevator);
        spin_lock_irq(q->queue_lock);
-        queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
+        elv_quisce_end(q);
        spin_unlock_irq(q->queue_lock);
        blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index ee9c67d7e1be..626ee274c5c4 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -214,21 +214,10 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
        return 0;
 }
-/*
- * unmap a request that was previously mapped to this sg_io_hdr. handles
- * both sg and non-sg sg_io_hdr.
- */
-static int blk_unmap_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr)
-{
-        blk_rq_unmap_user(rq->bio);
-        blk_put_request(rq);
-        return 0;
-}
 static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
                                 struct bio *bio)
 {
-        int r, ret = 0;
+        int ret = 0;
        /*
         * fill in all the output members
@@ -253,12 +242,10 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
                        ret = -EFAULT;
        }
-        rq->bio = bio;
+        blk_rq_unmap_user(bio);
-        r = blk_unmap_sghdr_rq(rq, hdr);
+        blk_put_request(rq);
-        if (ret)
-                r = ret;
-        return r;
+        return ret;
 }
 static int sg_io(struct request_queue *q, struct gendisk *bd_disk,