Merge branch 'generic-dispatch' of git://brick.kernel.dk/data/git/linux-2.6-block

author: Linus Torvalds <torvalds@g5.osdl.org> 2005-10-28 11:53:49 -0400
committer: Linus Torvalds <torvalds@g5.osdl.org> 2005-10-28 11:53:49 -0400
commit: 28d721e24c88496ff8e9c4a0959bdc1415c0658e (patch)
tree: 0652161bbbcbfddf47c7ddb25d2db8ecd4cbec89
parent: 0ee40c6628434f0535da31deeacc28b61e80d810 (diff)
parent: cb19833dccb32f97cacbfff834b53523915f13f6 (diff)
9 files changed, 459 insertions, 847 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 6dd274d7e1cf..2d65c2182161 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -906,9 +906,20 @@ Aside:
 4. The I/O scheduler
-I/O schedulers are now per queue. They should be runtime switchable and modular
+I/O scheduler, a.k.a. elevator, is implemented in two layers.  Generic dispatch
-but aren't yet. Jens has most bits to do this, but the sysfs implementation is
+queue and specific I/O schedulers.  Unless stated otherwise, elevator is used
-missing.
+to refer to both parts and I/O scheduler to specific I/O schedulers.
+Block layer implements generic dispatch queue in ll_rw_blk.c and elevator.c.
+The generic dispatch queue is responsible for properly ordering barrier
+requests, requeueing, handling non-fs requests and all other subtleties.
+Specific I/O schedulers are responsible for ordering normal filesystem
+requests.  They can also choose to delay certain requests to improve
+throughput or whatever purpose.  As the plural form indicates, there are
+multiple I/O schedulers.  They can be built as modules but at least one should
+be built inside the kernel.  Each queue can choose different one and can also
+change to another one dynamically.
 A block layer call to the i/o scheduler follows the convention elv_xxx(). This
 calls elevator_xxx_fn in the elevator switch (drivers/block/elevator.c). Oh,
@@ -921,44 +932,36 @@ keeping work.
 The functions an elevator may implement are: (* are mandatory)
 elevator_merge_fn               called to query requests for merge with a bio
-elevator_merge_req_fn           " " "  with another request
+elevator_merge_req_fn           called when two requests get merged. the one
+                                which gets merged into the other one will be
+                                never seen by I/O scheduler again. IOW, after
+                                being merged, the request is gone.
 elevator_merged_fn              called when a request in the scheduler has been
                                involved in a merge. It is used in the deadline
                                scheduler for example, to reposition the request
                                if its sorting order has changed.
-*elevator_next_req_fn           returns the next scheduled request, or NULL
+elevator_dispatch_fn            fills the dispatch queue with ready requests.
-                                if there are none (or none are ready).
+                                I/O schedulers are free to postpone requests by
+                                not filling the dispatch queue unless @force
+                                is non-zero.  Once dispatched, I/O schedulers
+                                are not allowed to manipulate the requests -
+                                they belong to generic dispatch queue.
-*elevator_add_req_fn            called to add a new request into the scheduler
+elevator_add_req_fn             called to add a new request into the scheduler
 elevator_queue_empty_fn         returns true if the merge queue is empty.
                                Drivers shouldn't use this, but rather check
                                if elv_next_request is NULL (without losing the
                                request if one exists!)
-elevator_remove_req_fn          This is called when a driver claims ownership of
-                                the target request - it now belongs to the
-                                driver. It must not be modified or merged.
-                                Drivers must not lose the request! A subsequent
-                                call of elevator_next_req_fn must  return the
-                                _next_ request.
-elevator_requeue_req_fn         called to add a request to the scheduler. This
-                                is used when the request has alrnadebeen
-                                returned by elv_next_request, but hasn't
-                                completed. If this is not implemented then
-                                elevator_add_req_fn is called instead.
 elevator_former_req_fn
 elevator_latter_req_fn          These return the request before or after the
                                one specified in disk sort order. Used by the
                                block layer to find merge possibilities.
-elevator_completed_req_fn       called when a request is completed. This might
+elevator_completed_req_fn       called when a request is completed.
-                                come about due to being merged with another or
-                                when the device completes the request.
 elevator_may_queue_fn           returns true if the scheduler wants to allow the
                                current context to queue a new request even if
@@ -967,13 +970,33 @@ elevator_may_queue_fn		returns true if the scheduler wants to allow the
 elevator_set_req_fn
 elevator_put_req_fn             Must be used to allocate and free any elevator
-                                specific storate for a request.
+                                specific storage for a request.
+elevator_activate_req_fn        Called when device driver first sees a request.
+                                I/O schedulers can use this callback to
+                                determine when actual execution of a request
+                                starts.
+elevator_deactivate_req_fn      Called when device driver decides to delay
+                                a request by requeueing it.
 elevator_init_fn
 elevator_exit_fn                Allocate and free any elevator specific storage
                                for a queue.
-4.2 I/O scheduler implementation
+4.2 Request flows seen by I/O schedulers
+All requests seens by I/O schedulers strictly follow one of the following three
+flows.
+ set_req_fn ->
+ i.   add_req_fn -> (merged_fn ->)* -> dispatch_fn -> activate_req_fn ->
+      (deactivate_req_fn -> activate_req_fn ->)* -> completed_req_fn
+ ii.  add_req_fn -> (merged_fn ->)* -> merge_req_fn
+ iii. [none]
+ -> put_req_fn
+4.3 I/O scheduler implementation
 The generic i/o scheduler algorithm attempts to sort/merge/batch requests for
 optimal disk scan and request servicing performance (based on generic
 principles and device capabilities), optimized for:
@@ -993,18 +1016,7 @@ request in sort order to prevent binary tree lookups.
 This arrangement is not a generic block layer characteristic however, so
 elevators may implement queues as they please.
-ii. Last merge hint
+ii. Merge hash
-The last merge hint is part of the generic queue layer. I/O schedulers must do
-some management on it. For the most part, the most important thing is to make
-sure q->last_merge is cleared (set to NULL) when the request on it is no longer
-a candidate for merging (for example if it has been sent to the driver).
-The last merge performed is cached as a hint for the subsequent request. If
-sequential data is being submitted, the hint is used to perform merges without
-any scanning. This is not sufficient when there are multiple processes doing
-I/O though, so a "merge hash" is used by some schedulers.
-iii. Merge hash
 AS and deadline use a hash table indexed by the last sector of a request. This
 enables merging code to quickly look up "back merge" candidates, even when
 multiple I/O streams are being performed at once on one disk.
@@ -1013,29 +1025,8 @@ multiple I/O streams are being performed at once on one disk.
 are far less common than "back merges" due to the nature of most I/O patterns.
 Front merges are handled by the binary trees in AS and deadline schedulers.
-iv. Handling barrier cases
+iii. Plugging the queue to batch requests in anticipation of opportunities for
-A request with flags REQ_HARDBARRIER or REQ_SOFTBARRIER must not be ordered
+     merge/sort optimizations
-around. That is, they must be processed after all older requests, and before
-any newer ones. This includes merges!
-In AS and deadline schedulers, barriers have the effect of flushing the reorder
-queue. The performance cost of this will vary from nothing to a lot depending
-on i/o patterns and device characteristics. Obviously they won't improve
-performance, so their use should be kept to a minimum.
-v. Handling insertion position directives
-A request may be inserted with a position directive. The directives are one of
-ELEVATOR_INSERT_BACK, ELEVATOR_INSERT_FRONT, ELEVATOR_INSERT_SORT.
-ELEVATOR_INSERT_SORT is a general directive for non-barrier requests.
-ELEVATOR_INSERT_BACK is used to insert a barrier to the back of the queue.
-ELEVATOR_INSERT_FRONT is used to insert a barrier to the front of the queue, and
-overrides the ordering requested by any previous barriers. In practice this is
-harmless and required, because it is used for SCSI requeueing. This does not
-require flushing the reorder queue, so does not impose a performance penalty.
-vi. Plugging the queue to batch requests in anticipation of opportunities for
-  merge/sort optimizations
 This is just the same as in 2.4 so far, though per-device unplugging
 support is anticipated for 2.5. Also with a priority-based i/o scheduler,
@@ -1069,7 +1060,7 @@ Aside:
  blk_kick_queue() to unplug a specific queue (right away ?)
  or optionally, all queues, is in the plan.
-4.3 I/O contexts
+4.4 I/O contexts
 I/O contexts provide a dynamically allocated per process data area. They may
 be used in I/O schedulers, and in the block layer (could be used for IO statis,
 priorities for example). See *io_context in drivers/block/ll_rw_blk.c, and
diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c
index 1f08e14697e9..4081c36c8c19 100644
--- a/drivers/block/as-iosched.c
+++ b/drivers/block/as-iosched.c
@@ -98,7 +98,6 @@ struct as_data {
        struct as_rq *next_arq[2];      /* next in sort order */
        sector_t last_sector[2];        /* last REQ_SYNC & REQ_ASYNC sectors */
-        struct list_head *dispatch;     /* driver dispatch queue */
        struct list_head *hash;         /* request hash */
        unsigned long exit_prob;        /* probability a task will exit while
@@ -239,6 +238,25 @@ static struct io_context *as_get_io_context(void)
        return ioc;
 }
+static void as_put_io_context(struct as_rq *arq)
+{
+        struct as_io_context *aic;
+        if (unlikely(!arq->io_context))
+                return;
+        aic = arq->io_context->aic;
+        if (arq->is_sync == REQ_SYNC && aic) {
+                spin_lock(&aic->lock);
+                set_bit(AS_TASK_IORUNNING, &aic->state);
+                aic->last_end_request = jiffies;
+                spin_unlock(&aic->lock);
+        }
+        put_io_context(arq->io_context);
+}
 /*
 * the back merge hash support functions
 */
@@ -261,14 +279,6 @@ static inline void as_del_arq_hash(struct as_rq *arq)
                __as_del_arq_hash(arq);
 }
-static void as_remove_merge_hints(request_queue_t *q, struct as_rq *arq)
-{
-        as_del_arq_hash(arq);
-        if (q->last_merge == arq->request)
-                q->last_merge = NULL;
-}
 static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq)
 {
        struct request *rq = arq->request;
@@ -312,7 +322,7 @@ static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset)
                BUG_ON(!arq->on_hash);
                if (!rq_mergeable(__rq)) {
-                        as_remove_merge_hints(ad->q, arq);
+                        as_del_arq_hash(arq);
                        continue;
                }
@@ -950,23 +960,12 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
        WARN_ON(!list_empty(&rq->queuelist));
-        if (arq->state == AS_RQ_PRESCHED) {
-                WARN_ON(arq->io_context);
-                goto out;
-        }
-        if (arq->state == AS_RQ_MERGED)
-                goto out_ioc;
        if (arq->state != AS_RQ_REMOVED) {
                printk("arq->state %d\n", arq->state);
                WARN_ON(1);
                goto out;
        }
-        if (!blk_fs_request(rq))
-                goto out;
        if (ad->changed_batch && ad->nr_dispatched == 1) {
                kblockd_schedule_work(&ad->antic_work);
                ad->changed_batch = 0;
@@ -1001,21 +1000,7 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
                }
        }
-out_ioc:
+        as_put_io_context(arq);
-        if (!arq->io_context)
-                goto out;
-        if (arq->is_sync == REQ_SYNC) {
-                struct as_io_context *aic = arq->io_context->aic;
-                if (aic) {
-                        spin_lock(&aic->lock);
-                        set_bit(AS_TASK_IORUNNING, &aic->state);
-                        aic->last_end_request = jiffies;
-                        spin_unlock(&aic->lock);
-                }
-        }
-        put_io_context(arq->io_context);
 out:
        arq->state = AS_RQ_POSTSCHED;
 }
@@ -1047,73 +1032,11 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq)
                ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
        list_del_init(&arq->fifo);
-        as_remove_merge_hints(q, arq);
+        as_del_arq_hash(arq);
        as_del_arq_rb(ad, arq);
 }
 /*
- * as_remove_dispatched_request is called to remove a request which has gone
- * to the dispatch list.
- */
-static void as_remove_dispatched_request(request_queue_t *q, struct request *rq)
-{
-        struct as_rq *arq = RQ_DATA(rq);
-        struct as_io_context *aic;
-        if (!arq) {
-                WARN_ON(1);
-                return;
-        }
-        WARN_ON(arq->state != AS_RQ_DISPATCHED);
-        WARN_ON(ON_RB(&arq->rb_node));
-        if (arq->io_context && arq->io_context->aic) {
-                aic = arq->io_context->aic;
-                if (aic) {
-                        WARN_ON(!atomic_read(&aic->nr_dispatched));
-                        atomic_dec(&aic->nr_dispatched);
-                }
-        }
-}
-/*
- * as_remove_request is called when a driver has finished with a request.
- * This should be only called for dispatched requests, but for some reason
- * a POWER4 box running hwscan it does not.
- */
-static void as_remove_request(request_queue_t *q, struct request *rq)
-{
-        struct as_rq *arq = RQ_DATA(rq);
-        if (unlikely(arq->state == AS_RQ_NEW))
-                goto out;
-        if (ON_RB(&arq->rb_node)) {
-                if (arq->state != AS_RQ_QUEUED) {
-                        printk("arq->state %d\n", arq->state);
-                        WARN_ON(1);
-                        goto out;
-                }
-                /*
-                 * We'll lose the aliased request(s) here. I don't think this
-                 * will ever happen, but if it does, hopefully someone will
-                 * report it.
-                 */
-                WARN_ON(!list_empty(&rq->queuelist));
-                as_remove_queued_request(q, rq);
-        } else {
-                if (arq->state != AS_RQ_DISPATCHED) {
-                        printk("arq->state %d\n", arq->state);
-                        WARN_ON(1);
-                        goto out;
-                }
-                as_remove_dispatched_request(q, rq);
-        }
-out:
-        arq->state = AS_RQ_REMOVED;
-}
-/*
 * as_fifo_expired returns 0 if there are no expired reads on the fifo,
 * 1 otherwise.  It is ratelimited so that we only perform the check once per
 * `fifo_expire' interval.  Otherwise a large number of expired requests
@@ -1165,7 +1088,6 @@ static inline int as_batch_expired(struct as_data *ad)
 static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 {
        struct request *rq = arq->request;
-        struct list_head *insert;
        const int data_dir = arq->is_sync;
        BUG_ON(!ON_RB(&arq->rb_node));
@@ -1198,13 +1120,13 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
        /*
         * take it off the sort and fifo list, add to dispatch queue
         */
-        insert = ad->dispatch->prev;
        while (!list_empty(&rq->queuelist)) {
                struct request *__rq = list_entry_rq(rq->queuelist.next);
                struct as_rq *__arq = RQ_DATA(__rq);
-                list_move_tail(&__rq->queuelist, ad->dispatch);
+                list_del(&__rq->queuelist);
+                elv_dispatch_add_tail(ad->q, __rq);
                if (__arq->io_context && __arq->io_context->aic)
                        atomic_inc(&__arq->io_context->aic->nr_dispatched);
@@ -1218,7 +1140,8 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
        as_remove_queued_request(ad->q, rq);
        WARN_ON(arq->state != AS_RQ_QUEUED);
-        list_add(&rq->queuelist, insert);
+        elv_dispatch_sort(ad->q, rq);
        arq->state = AS_RQ_DISPATCHED;
        if (arq->io_context && arq->io_context->aic)
                atomic_inc(&arq->io_context->aic->nr_dispatched);
@@ -1230,12 +1153,42 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 * read/write expire, batch expire, etc, and moves it to the dispatch
 * queue. Returns 1 if a request was found, 0 otherwise.
 */
-static int as_dispatch_request(struct as_data *ad)
+static int as_dispatch_request(request_queue_t *q, int force)
 {
+        struct as_data *ad = q->elevator->elevator_data;
        struct as_rq *arq;
        const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
        const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
+        if (unlikely(force)) {
+                /*
+                 * Forced dispatch, accounting is useless.  Reset
+                 * accounting states and dump fifo_lists.  Note that
+                 * batch_data_dir is reset to REQ_SYNC to avoid
+                 * screwing write batch accounting as write batch
+                 * accounting occurs on W->R transition.
+                 */
+                int dispatched = 0;
+                ad->batch_data_dir = REQ_SYNC;
+                ad->changed_batch = 0;
+                ad->new_batch = 0;
+                while (ad->next_arq[REQ_SYNC]) {
+                        as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
+                        dispatched++;
+                }
+                ad->last_check_fifo[REQ_SYNC] = jiffies;
+                while (ad->next_arq[REQ_ASYNC]) {
+                        as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
+                        dispatched++;
+                }
+                ad->last_check_fifo[REQ_ASYNC] = jiffies;
+                return dispatched;
+        }
        /* Signal that the write batch was uncontended, so we can't time it */
        if (ad->batch_data_dir == REQ_ASYNC && !reads) {
                if (ad->current_write_count == 0 || !writes)
@@ -1359,20 +1312,6 @@ fifo_expired:
        return 1;
 }
-static struct request *as_next_request(request_queue_t *q)
-{
-        struct as_data *ad = q->elevator->elevator_data;
-        struct request *rq = NULL;
-        /*
-         * if there are still requests on the dispatch queue, grab the first
-         */
-        if (!list_empty(ad->dispatch) || as_dispatch_request(ad))
-                rq = list_entry_rq(ad->dispatch->next);
-        return rq;
-}
 /*
 * Add arq to a list behind alias
 */
@@ -1404,17 +1343,25 @@ as_add_aliased_request(struct as_data *ad, struct as_rq *arq, struct as_rq *alia
        /*
         * Don't want to have to handle merges.
         */
-        as_remove_merge_hints(ad->q, arq);
+        as_del_arq_hash(arq);
 }
 /*
 * add arq to rbtree and fifo
 */
-static void as_add_request(struct as_data *ad, struct as_rq *arq)
+static void as_add_request(request_queue_t *q, struct request *rq)
 {
+        struct as_data *ad = q->elevator->elevator_data;
+        struct as_rq *arq = RQ_DATA(rq);
        struct as_rq *alias;
        int data_dir;
+        if (arq->state != AS_RQ_PRESCHED) {
+                printk("arq->state: %d\n", arq->state);
+                WARN_ON(1);
+        }
+        arq->state = AS_RQ_NEW;
        if (rq_data_dir(arq->request) == READ
                        || current->flags&PF_SYNCWRITE)
                arq->is_sync = 1;
@@ -1437,12 +1384,8 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq)
                arq->expires = jiffies + ad->fifo_expire[data_dir];
                list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
-                if (rq_mergeable(arq->request)) {
+                if (rq_mergeable(arq->request))
                        as_add_arq_hash(ad, arq);
-                        if (!ad->q->last_merge)
-                                ad->q->last_merge = arq->request;
-                }
                as_update_arq(ad, arq); /* keep state machine up to date */
        } else {
@@ -1463,96 +1406,24 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq)
        arq->state = AS_RQ_QUEUED;
 }
-static void as_deactivate_request(request_queue_t *q, struct request *rq)
+static void as_activate_request(request_queue_t *q, struct request *rq)
 {
-        struct as_data *ad = q->elevator->elevator_data;
        struct as_rq *arq = RQ_DATA(rq);
-        if (arq) {
+        WARN_ON(arq->state != AS_RQ_DISPATCHED);
-                if (arq->state == AS_RQ_REMOVED) {
+        arq->state = AS_RQ_REMOVED;
-                        arq->state = AS_RQ_DISPATCHED;
+        if (arq->io_context && arq->io_context->aic)
-                        if (arq->io_context && arq->io_context->aic)
+                atomic_dec(&arq->io_context->aic->nr_dispatched);
-                                atomic_inc(&arq->io_context->aic->nr_dispatched);
-                }
-        } else
-                WARN_ON(blk_fs_request(rq)
-                        && (!(rq->flags & (REQ_HARDBARRIER|REQ_SOFTBARRIER))) );
-        /* Stop anticipating - let this request get through */
-        as_antic_stop(ad);
-}
-/*
- * requeue the request. The request has not been completed, nor is it a
- * new request, so don't touch accounting.
- */
-static void as_requeue_request(request_queue_t *q, struct request *rq)
-{
-        as_deactivate_request(q, rq);
-        list_add(&rq->queuelist, &q->queue_head);
-}
-/*
- * Account a request that is inserted directly onto the dispatch queue.
- * arq->io_context->aic->nr_dispatched should not need to be incremented
- * because only new requests should come through here: requeues go through
- * our explicit requeue handler.
- */
-static void as_account_queued_request(struct as_data *ad, struct request *rq)
-{
-        if (blk_fs_request(rq)) {
-                struct as_rq *arq = RQ_DATA(rq);
-                arq->state = AS_RQ_DISPATCHED;
-                ad->nr_dispatched++;
-        }
 }
-static void
+static void as_deactivate_request(request_queue_t *q, struct request *rq)
-as_insert_request(request_queue_t *q, struct request *rq, int where)
 {
-        struct as_data *ad = q->elevator->elevator_data;
        struct as_rq *arq = RQ_DATA(rq);
-        if (arq) {
+        WARN_ON(arq->state != AS_RQ_REMOVED);
-                if (arq->state != AS_RQ_PRESCHED) {
+        arq->state = AS_RQ_DISPATCHED;
-                        printk("arq->state: %d\n", arq->state);
+        if (arq->io_context && arq->io_context->aic)
-                        WARN_ON(1);
+                atomic_inc(&arq->io_context->aic->nr_dispatched);
-                }
-                arq->state = AS_RQ_NEW;
-        }
-        /* barriers must flush the reorder queue */
-        if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
-                        && where == ELEVATOR_INSERT_SORT)) {
-                WARN_ON(1);
-                where = ELEVATOR_INSERT_BACK;
-        }
-        switch (where) {
-                case ELEVATOR_INSERT_BACK:
-                        while (ad->next_arq[REQ_SYNC])
-                                as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
-                        while (ad->next_arq[REQ_ASYNC])
-                                as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
-                        list_add_tail(&rq->queuelist, ad->dispatch);
-                        as_account_queued_request(ad, rq);
-                        as_antic_stop(ad);
-                        break;
-                case ELEVATOR_INSERT_FRONT:
-                        list_add(&rq->queuelist, ad->dispatch);
-                        as_account_queued_request(ad, rq);
-                        as_antic_stop(ad);
-                        break;
-                case ELEVATOR_INSERT_SORT:
-                        BUG_ON(!blk_fs_request(rq));
-                        as_add_request(ad, arq);
-                        break;
-                default:
-                        BUG();
-                        return;
-        }
 }
 /*
@@ -1565,12 +1436,8 @@ static int as_queue_empty(request_queue_t *q)
 {
        struct as_data *ad = q->elevator->elevator_data;
-        if (!list_empty(&ad->fifo_list[REQ_ASYNC])
+        return list_empty(&ad->fifo_list[REQ_ASYNC])
-                || !list_empty(&ad->fifo_list[REQ_SYNC])
+                && list_empty(&ad->fifo_list[REQ_SYNC]);
-                || !list_empty(ad->dispatch))
-                        return 0;
-        return 1;
 }
 static struct request *
@@ -1608,15 +1475,6 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
        int ret;
        /*
-         * try last_merge to avoid going to hash
-         */
-        ret = elv_try_last_merge(q, bio);
-        if (ret != ELEVATOR_NO_MERGE) {
-                __rq = q->last_merge;
-                goto out_insert;
-        }
-        /*
         * see if the merge hash can satisfy a back merge
         */
        __rq = as_find_arq_hash(ad, bio->bi_sector);
@@ -1644,9 +1502,6 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
        return ELEVATOR_NO_MERGE;
 out:
-        if (rq_mergeable(__rq))
-                q->last_merge = __rq;
-out_insert:
        if (ret) {
                if (rq_mergeable(__rq))
                        as_hot_arq_hash(ad, RQ_DATA(__rq));
@@ -1693,9 +1548,6 @@ static void as_merged_request(request_queue_t *q, struct request *req)
                 * behind the disk head. We currently don't bother adjusting.
                 */
        }
-        if (arq->on_hash)
-                q->last_merge = req;
 }
 static void
@@ -1763,6 +1615,7 @@ as_merged_requests(request_queue_t *q, struct request *req,
         * kill knowledge of next, this one is a goner
         */
        as_remove_queued_request(q, next);
+        as_put_io_context(anext);
        anext->state = AS_RQ_MERGED;
 }
@@ -1782,7 +1635,7 @@ static void as_work_handler(void *data)
        unsigned long flags;
        spin_lock_irqsave(q->queue_lock, flags);
-        if (as_next_request(q))
+        if (!as_queue_empty(q))
                q->request_fn(q);
        spin_unlock_irqrestore(q->queue_lock, flags);
 }
@@ -1797,7 +1650,9 @@ static void as_put_request(request_queue_t *q, struct request *rq)
                return;
        }
-        if (arq->state != AS_RQ_POSTSCHED && arq->state != AS_RQ_PRESCHED) {
+        if (unlikely(arq->state != AS_RQ_POSTSCHED &&
+                     arq->state != AS_RQ_PRESCHED &&
+                     arq->state != AS_RQ_MERGED)) {
                printk("arq->state %d\n", arq->state);
                WARN_ON(1);
        }
@@ -1907,7 +1762,6 @@ static int as_init_queue(request_queue_t *q, elevator_t *e)
        INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
        ad->sort_list[REQ_SYNC] = RB_ROOT;
        ad->sort_list[REQ_ASYNC] = RB_ROOT;
-        ad->dispatch = &q->queue_head;
        ad->fifo_expire[REQ_SYNC] = default_read_expire;
        ad->fifo_expire[REQ_ASYNC] = default_write_expire;
        ad->antic_expire = default_antic_expire;
@@ -2072,10 +1926,9 @@ static struct elevator_type iosched_as = {
                .elevator_merge_fn =            as_merge,
                .elevator_merged_fn =           as_merged_request,
                .elevator_merge_req_fn =        as_merged_requests,
-                .elevator_next_req_fn =         as_next_request,
+                .elevator_dispatch_fn =         as_dispatch_request,
-                .elevator_add_req_fn =          as_insert_request,
+                .elevator_add_req_fn =          as_add_request,
-                .elevator_remove_req_fn =       as_remove_request,
+                .elevator_activate_req_fn =     as_activate_request,
-                .elevator_requeue_req_fn =      as_requeue_request,
                .elevator_deactivate_req_fn =   as_deactivate_request,
                .elevator_queue_empty_fn =      as_queue_empty,
                .elevator_completed_req_fn =    as_completed_request,
diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c
index d3bfe8cfb039..94690e4d41e0 100644
--- a/drivers/block/cfq-iosched.c
+++ b/drivers/block/cfq-iosched.c
@@ -84,7 +84,6 @@ static int cfq_max_depth = 2;
        (node)->rb_left = NULL;         \
 } while (0)
 #define RB_CLEAR_ROOT(root)     ((root)->rb_node = NULL)
-#define ON_RB(node)             ((node)->rb_color != RB_NONE)
 #define rb_entry_crq(node)      rb_entry((node), struct cfq_rq, rb_node)
 #define rq_rb_key(rq)           (rq)->sector
@@ -271,10 +270,7 @@ CFQ_CFQQ_FNS(expired);
 #undef CFQ_CFQQ_FNS
 enum cfq_rq_state_flags {
-        CFQ_CRQ_FLAG_in_flight = 0,
+        CFQ_CRQ_FLAG_is_sync = 0,
-        CFQ_CRQ_FLAG_in_driver,
-        CFQ_CRQ_FLAG_is_sync,
-        CFQ_CRQ_FLAG_requeued,
 };
 #define CFQ_CRQ_FNS(name)                                               \
@@ -291,14 +287,11 @@ static inline int cfq_crq_##name(const struct cfq_rq *crq)		\
        return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0;      \
 }
-CFQ_CRQ_FNS(in_flight);
-CFQ_CRQ_FNS(in_driver);
 CFQ_CRQ_FNS(is_sync);
-CFQ_CRQ_FNS(requeued);
 #undef CFQ_CRQ_FNS
 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
-static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *);
+static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
 static void cfq_put_cfqd(struct cfq_data *cfqd);
 #define process_sync(tsk)       ((tsk)->flags & PF_SYNCWRITE)
@@ -311,14 +304,6 @@ static inline void cfq_del_crq_hash(struct cfq_rq *crq)
        hlist_del_init(&crq->hash);
 }
-static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
-{
-        cfq_del_crq_hash(crq);
-        if (q->last_merge == crq->request)
-                q->last_merge = NULL;
-}
 static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
 {
        const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
@@ -347,18 +332,13 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
        return NULL;
 }
-static inline int cfq_pending_requests(struct cfq_data *cfqd)
-{
-        return !list_empty(&cfqd->queue->queue_head) || cfqd->busy_queues;
-}
 /*
 * scheduler run of queue, if there are requests pending and no one in the
 * driver that will restart queueing
 */
 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
 {
-        if (!cfqd->rq_in_driver && cfq_pending_requests(cfqd))
+        if (!cfqd->rq_in_driver && cfqd->busy_queues)
                kblockd_schedule_work(&cfqd->unplug_work);
 }
@@ -366,7 +346,7 @@ static int cfq_queue_empty(request_queue_t *q)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
-        return !cfq_pending_requests(cfqd);
+        return !cfqd->busy_queues;
 }
 /*
@@ -386,11 +366,6 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
        if (crq2 == NULL)
                return crq1;
-        if (cfq_crq_requeued(crq1) && !cfq_crq_requeued(crq2))
-                return crq1;
-        else if (cfq_crq_requeued(crq2) && !cfq_crq_requeued(crq1))
-                return crq2;
        if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2))
                return crq1;
        else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1))
@@ -461,10 +436,7 @@ cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
        struct rb_node *rbnext, *rbprev;
-        rbnext = NULL;
+        if (!(rbnext = rb_next(&last->rb_node))) {
-        if (ON_RB(&last->rb_node))
-                rbnext = rb_next(&last->rb_node);
-        if (!rbnext) {
                rbnext = rb_first(&cfqq->sort_list);
                if (rbnext == &last->rb_node)
                        rbnext = NULL;
@@ -545,13 +517,13 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
 * the pending list according to last request service
 */
 static inline void
-cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue)
+cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
        BUG_ON(cfq_cfqq_on_rr(cfqq));
        cfq_mark_cfqq_on_rr(cfqq);
        cfqd->busy_queues++;
-        cfq_resort_rr_list(cfqq, requeue);
+        cfq_resort_rr_list(cfqq, 0);
 }
 static inline void
@@ -571,22 +543,19 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static inline void cfq_del_crq_rb(struct cfq_rq *crq)
 {
        struct cfq_queue *cfqq = crq->cfq_queue;
+        struct cfq_data *cfqd = cfqq->cfqd;
+        const int sync = cfq_crq_is_sync(crq);
-        if (ON_RB(&crq->rb_node)) {
+        BUG_ON(!cfqq->queued[sync]);
-                struct cfq_data *cfqd = cfqq->cfqd;
+        cfqq->queued[sync]--;
-                const int sync = cfq_crq_is_sync(crq);
-                BUG_ON(!cfqq->queued[sync]);
+        cfq_update_next_crq(crq);
-                cfqq->queued[sync]--;
-                cfq_update_next_crq(crq);
+        rb_erase(&crq->rb_node, &cfqq->sort_list);
+        RB_CLEAR_COLOR(&crq->rb_node);
-                rb_erase(&crq->rb_node, &cfqq->sort_list);
+        if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
-                RB_CLEAR_COLOR(&crq->rb_node);
+                cfq_del_cfqq_rr(cfqd, cfqq);
-                if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
-                        cfq_del_cfqq_rr(cfqd, cfqq);
-        }
 }
 static struct cfq_rq *
@@ -627,12 +596,12 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
         * if that happens, put the alias on the dispatch list
         */
        while ((__alias = __cfq_add_crq_rb(crq)) != NULL)
-                cfq_dispatch_sort(cfqd->queue, __alias);
+                cfq_dispatch_insert(cfqd->queue, __alias);
        rb_insert_color(&crq->rb_node, &cfqq->sort_list);
        if (!cfq_cfqq_on_rr(cfqq))
-                cfq_add_cfqq_rr(cfqd, cfqq, cfq_crq_requeued(crq));
+                cfq_add_cfqq_rr(cfqd, cfqq);
        /*
         * check if this request is a better next-serve candidate
@@ -643,10 +612,8 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
 static inline void
 cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
 {
-        if (ON_RB(&crq->rb_node)) {
+        rb_erase(&crq->rb_node, &cfqq->sort_list);
-                rb_erase(&crq->rb_node, &cfqq->sort_list);
+        cfqq->queued[cfq_crq_is_sync(crq)]--;
-                cfqq->queued[cfq_crq_is_sync(crq)]--;
-        }
        cfq_add_crq_rb(crq);
 }
@@ -676,49 +643,28 @@ out:
        return NULL;
 }
-static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
+static void cfq_activate_request(request_queue_t *q, struct request *rq)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
-        struct cfq_rq *crq = RQ_DATA(rq);
-        if (crq) {
-                struct cfq_queue *cfqq = crq->cfq_queue;
-                if (cfq_crq_in_driver(crq)) {
-                        cfq_clear_crq_in_driver(crq);
-                        WARN_ON(!cfqd->rq_in_driver);
-                        cfqd->rq_in_driver--;
-                }
-                if (cfq_crq_in_flight(crq)) {
-                        const int sync = cfq_crq_is_sync(crq);
-                        cfq_clear_crq_in_flight(crq);
+        cfqd->rq_in_driver++;
-                        WARN_ON(!cfqq->on_dispatch[sync]);
-                        cfqq->on_dispatch[sync]--;
-                }
-                cfq_mark_crq_requeued(crq);
-        }
 }
-/*
+static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
- * make sure the service time gets corrected on reissue of this request
- */
-static void cfq_requeue_request(request_queue_t *q, struct request *rq)
 {
-        cfq_deactivate_request(q, rq);
+        struct cfq_data *cfqd = q->elevator->elevator_data;
-        list_add(&rq->queuelist, &q->queue_head);
+        WARN_ON(!cfqd->rq_in_driver);
+        cfqd->rq_in_driver--;
 }
-static void cfq_remove_request(request_queue_t *q, struct request *rq)
+static void cfq_remove_request(struct request *rq)
 {
        struct cfq_rq *crq = RQ_DATA(rq);
-        if (crq) {
+        list_del_init(&rq->queuelist);
-                list_del_init(&rq->queuelist);
+        cfq_del_crq_rb(crq);
-                cfq_del_crq_rb(crq);
+        cfq_del_crq_hash(crq);
-                cfq_remove_merge_hints(q, crq);
-        }
 }
 static int
@@ -728,12 +674,6 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
        struct request *__rq;
        int ret;
-        ret = elv_try_last_merge(q, bio);
-        if (ret != ELEVATOR_NO_MERGE) {
-                __rq = q->last_merge;
-                goto out_insert;
-        }
        __rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
        if (__rq && elv_rq_merge_ok(__rq, bio)) {
                ret = ELEVATOR_BACK_MERGE;
@@ -748,8 +688,6 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
        return ELEVATOR_NO_MERGE;
 out:
-        q->last_merge = __rq;
-out_insert:
        *req = __rq;
        return ret;
 }
@@ -762,14 +700,12 @@ static void cfq_merged_request(request_queue_t *q, struct request *req)
        cfq_del_crq_hash(crq);
        cfq_add_crq_hash(cfqd, crq);
-        if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) {
+        if (rq_rb_key(req) != crq->rb_key) {
                struct cfq_queue *cfqq = crq->cfq_queue;
                cfq_update_next_crq(crq);
                cfq_reposition_crq_rb(cfqq, crq);
        }
-        q->last_merge = req;
 }
 static void
@@ -785,7 +721,7 @@ cfq_merged_requests(request_queue_t *q, struct request *rq,
            time_before(next->start_time, rq->start_time))
                list_move(&rq->queuelist, &next->queuelist);
-        cfq_remove_request(q, next);
+        cfq_remove_request(next);
 }
 static inline void
@@ -992,53 +928,15 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
        return 1;
 }
-/*
+static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq)
- * we dispatch cfqd->cfq_quantum requests in total from the rr_list queues,
- * this function sector sorts the selected request to minimize seeks. we start
- * at cfqd->last_sector, not 0.
- */
-static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct cfq_queue *cfqq = crq->cfq_queue;
-        struct list_head *head = &q->queue_head, *entry = head;
-        struct request *__rq;
-        sector_t last;
-        list_del(&crq->request->queuelist);
-        last = cfqd->last_sector;
-        list_for_each_entry_reverse(__rq, head, queuelist) {
-                struct cfq_rq *__crq = RQ_DATA(__rq);
-                if (blk_barrier_rq(__rq))
-                        break;
-                if (!blk_fs_request(__rq))
-                        break;
-                if (cfq_crq_requeued(__crq))
-                        break;
-                if (__rq->sector <= crq->request->sector)
-                        break;
-                if (__rq->sector > last && crq->request->sector < last) {
-                        last = crq->request->sector + crq->request->nr_sectors;
-                        break;
-                }
-                entry = &__rq->queuelist;
-        }
-        cfqd->last_sector = last;
        cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
+        cfq_remove_request(crq->request);
-        cfq_del_crq_rb(crq);
-        cfq_remove_merge_hints(q, crq);
-        cfq_mark_crq_in_flight(crq);
-        cfq_clear_crq_requeued(crq);
        cfqq->on_dispatch[cfq_crq_is_sync(crq)]++;
-        list_add_tail(&crq->request->queuelist, entry);
+        elv_dispatch_sort(q, crq->request);
 }
 /*
@@ -1159,7 +1057,7 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                /*
                 * finally, insert request into driver dispatch list
                 */
-                cfq_dispatch_sort(cfqd->queue, crq);
+                cfq_dispatch_insert(cfqd->queue, crq);
                cfqd->dispatch_slice++;
                dispatched++;
@@ -1194,7 +1092,7 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 }
 static int
-cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
+cfq_dispatch_requests(request_queue_t *q, int force)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct cfq_queue *cfqq;
@@ -1204,12 +1102,25 @@ cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
        cfqq = cfq_select_queue(cfqd, force);
        if (cfqq) {
+                int max_dispatch;
+                /*
+                 * if idle window is disabled, allow queue buildup
+                 */
+                if (!cfq_cfqq_idle_window(cfqq) &&
+                    cfqd->rq_in_driver >= cfqd->cfq_max_depth)
+                        return 0;
                cfq_clear_cfqq_must_dispatch(cfqq);
                cfq_clear_cfqq_wait_request(cfqq);
                del_timer(&cfqd->idle_slice_timer);
-                if (cfq_class_idle(cfqq))
+                if (!force) {
-                        max_dispatch = 1;
+                        max_dispatch = cfqd->cfq_quantum;
+                        if (cfq_class_idle(cfqq))
+                                max_dispatch = 1;
+                } else
+                        max_dispatch = INT_MAX;
                return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
        }
@@ -1217,93 +1128,6 @@ cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
        return 0;
 }
-static inline void cfq_account_dispatch(struct cfq_rq *crq)
-{
-        struct cfq_queue *cfqq = crq->cfq_queue;
-        struct cfq_data *cfqd = cfqq->cfqd;
-        if (unlikely(!blk_fs_request(crq->request)))
-                return;
-        /*
-         * accounted bit is necessary since some drivers will call
-         * elv_next_request() many times for the same request (eg ide)
-         */
-        if (cfq_crq_in_driver(crq))
-                return;
-        cfq_mark_crq_in_driver(crq);
-        cfqd->rq_in_driver++;
-}
-static inline void
-cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq)
-{
-        struct cfq_data *cfqd = cfqq->cfqd;
-        unsigned long now;
-        if (!cfq_crq_in_driver(crq))
-                return;
-        now = jiffies;
-        WARN_ON(!cfqd->rq_in_driver);
-        cfqd->rq_in_driver--;
-        if (!cfq_class_idle(cfqq))
-                cfqd->last_end_request = now;
-        if (!cfq_cfqq_dispatched(cfqq)) {
-                if (cfq_cfqq_on_rr(cfqq)) {
-                        cfqq->service_last = now;
-                        cfq_resort_rr_list(cfqq, 0);
-                }
-                if (cfq_cfqq_expired(cfqq)) {
-                        __cfq_slice_expired(cfqd, cfqq, 0);
-                        cfq_schedule_dispatch(cfqd);
-                }
-        }
-        if (cfq_crq_is_sync(crq))
-                crq->io_context->last_end_request = now;
-}
-static struct request *cfq_next_request(request_queue_t *q)
-{
-        struct cfq_data *cfqd = q->elevator->elevator_data;
-        struct request *rq;
-        if (!list_empty(&q->queue_head)) {
-                struct cfq_rq *crq;
-dispatch:
-                rq = list_entry_rq(q->queue_head.next);
-                crq = RQ_DATA(rq);
-                if (crq) {
-                        struct cfq_queue *cfqq = crq->cfq_queue;
-                        /*
-                         * if idle window is disabled, allow queue buildup
-                         */
-                        if (!cfq_crq_in_driver(crq) &&
-                            !cfq_cfqq_idle_window(cfqq) &&
-                            !blk_barrier_rq(rq) &&
-                            cfqd->rq_in_driver >= cfqd->cfq_max_depth)
-                                return NULL;
-                        cfq_remove_merge_hints(q, crq);
-                        cfq_account_dispatch(crq);
-                }
-                return rq;
-        }
-        if (cfq_dispatch_requests(q, cfqd->cfq_quantum, 0))
-                goto dispatch;
-        return NULL;
-}
 /*
 * task holds one reference to the queue, dropped when task exits. each crq
 * in-flight on this queue also holds a reference, dropped when crq is freed.
@@ -1816,8 +1640,9 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        }
 }
-static void cfq_enqueue(struct cfq_data *cfqd, struct request *rq)
+static void cfq_insert_request(request_queue_t *q, struct request *rq)
 {
+        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct cfq_rq *crq = RQ_DATA(rq);
        struct cfq_queue *cfqq = crq->cfq_queue;
@@ -1827,66 +1652,43 @@ static void cfq_enqueue(struct cfq_data *cfqd, struct request *rq)
        list_add_tail(&rq->queuelist, &cfqq->fifo);
-        if (rq_mergeable(rq)) {
+        if (rq_mergeable(rq))
                cfq_add_crq_hash(cfqd, crq);
-                if (!cfqd->queue->last_merge)
-                        cfqd->queue->last_merge = rq;
-        }
        cfq_crq_enqueued(cfqd, cfqq, crq);
 }
-static void
-cfq_insert_request(request_queue_t *q, struct request *rq, int where)
-{
-        struct cfq_data *cfqd = q->elevator->elevator_data;
-        switch (where) {
-                case ELEVATOR_INSERT_BACK:
-                        while (cfq_dispatch_requests(q, INT_MAX, 1))
-                                ;
-                        list_add_tail(&rq->queuelist, &q->queue_head);
-                        /*
-                         * If we were idling with pending requests on
-                         * inactive cfqqs, force dispatching will
-                         * remove the idle timer and the queue won't
-                         * be kicked by __make_request() afterward.
-                         * Kick it here.
-                         */
-                        cfq_schedule_dispatch(cfqd);
-                        break;
-                case ELEVATOR_INSERT_FRONT:
-                        list_add(&rq->queuelist, &q->queue_head);
-                        break;
-                case ELEVATOR_INSERT_SORT:
-                        BUG_ON(!blk_fs_request(rq));
-                        cfq_enqueue(cfqd, rq);
-                        break;
-                default:
-                        printk("%s: bad insert point %d\n", __FUNCTION__,where);
-                        return;
-        }
-}
 static void cfq_completed_request(request_queue_t *q, struct request *rq)
 {
        struct cfq_rq *crq = RQ_DATA(rq);
-        struct cfq_queue *cfqq;
+        struct cfq_queue *cfqq = crq->cfq_queue;
+        struct cfq_data *cfqd = cfqq->cfqd;
+        const int sync = cfq_crq_is_sync(crq);
+        unsigned long now;
-        if (unlikely(!blk_fs_request(rq)))
+        now = jiffies;
-                return;
-        cfqq = crq->cfq_queue;
+        WARN_ON(!cfqd->rq_in_driver);
+        WARN_ON(!cfqq->on_dispatch[sync]);
+        cfqd->rq_in_driver--;
+        cfqq->on_dispatch[sync]--;
-        if (cfq_crq_in_flight(crq)) {
+        if (!cfq_class_idle(cfqq))
-                const int sync = cfq_crq_is_sync(crq);
+                cfqd->last_end_request = now;
-                WARN_ON(!cfqq->on_dispatch[sync]);
+        if (!cfq_cfqq_dispatched(cfqq)) {
-                cfqq->on_dispatch[sync]--;
+                if (cfq_cfqq_on_rr(cfqq)) {
+                        cfqq->service_last = now;
+                        cfq_resort_rr_list(cfqq, 0);
+                }
+                if (cfq_cfqq_expired(cfqq)) {
+                        __cfq_slice_expired(cfqd, cfqq, 0);
+                        cfq_schedule_dispatch(cfqd);
+                }
        }
-        cfq_account_completion(cfqq, crq);
+        if (cfq_crq_is_sync(crq))
+                crq->io_context->last_end_request = now;
 }
 static struct request *
@@ -2118,9 +1920,6 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
                INIT_HLIST_NODE(&crq->hash);
                crq->cfq_queue = cfqq;
                crq->io_context = cic;
-                cfq_clear_crq_in_flight(crq);
-                cfq_clear_crq_in_driver(crq);
-                cfq_clear_crq_requeued(crq);
                if (rw == READ || process_sync(tsk))
                        cfq_mark_crq_is_sync(crq);
@@ -2201,7 +2000,7 @@ static void cfq_idle_slice_timer(unsigned long data)
                 * only expire and reinvoke request handler, if there are
                 * other queues with pending requests
                 */
-                if (!cfq_pending_requests(cfqd)) {
+                if (!cfqd->busy_queues) {
                        cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end);
                        add_timer(&cfqd->idle_slice_timer);
                        goto out_cont;
@@ -2576,10 +2375,9 @@ static struct elevator_type iosched_cfq = {
                .elevator_merge_fn =            cfq_merge,
                .elevator_merged_fn =           cfq_merged_request,
                .elevator_merge_req_fn =        cfq_merged_requests,
-                .elevator_next_req_fn =         cfq_next_request,
+                .elevator_dispatch_fn =         cfq_dispatch_requests,
                .elevator_add_req_fn =          cfq_insert_request,
-                .elevator_remove_req_fn =       cfq_remove_request,
+                .elevator_activate_req_fn =     cfq_activate_request,
-                .elevator_requeue_req_fn =      cfq_requeue_request,
                .elevator_deactivate_req_fn =   cfq_deactivate_request,
                .elevator_queue_empty_fn =      cfq_queue_empty,
                .elevator_completed_req_fn =    cfq_completed_request,
diff --git a/drivers/block/deadline-iosched.c b/drivers/block/deadline-iosched.c
index 753546ba2262..7929471d7df7 100644
--- a/drivers/block/deadline-iosched.c
+++ b/drivers/block/deadline-iosched.c
@@ -50,7 +50,6 @@ struct deadline_data {
         * next in sort order. read, write or both are NULL
         */
        struct deadline_rq *next_drq[2];
-        struct list_head *dispatch;     /* driver dispatch queue */
        struct list_head *hash;         /* request hash */
        unsigned int batching;          /* number of sequential requests made */
        sector_t last_sector;           /* head position */
@@ -113,15 +112,6 @@ static inline void deadline_del_drq_hash(struct deadline_rq *drq)
                __deadline_del_drq_hash(drq);
 }
-static void
-deadline_remove_merge_hints(request_queue_t *q, struct deadline_rq *drq)
-{
-        deadline_del_drq_hash(drq);
-        if (q->last_merge == drq->request)
-                q->last_merge = NULL;
-}
 static inline void
 deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
 {
@@ -239,10 +229,9 @@ deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
                        dd->next_drq[data_dir] = rb_entry_drq(rbnext);
        }
-        if (ON_RB(&drq->rb_node)) {
+        BUG_ON(!ON_RB(&drq->rb_node));
-                rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
+        rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
-                RB_CLEAR(&drq->rb_node);
+        RB_CLEAR(&drq->rb_node);
-        }
 }
 static struct request *
@@ -286,7 +275,7 @@ deadline_find_first_drq(struct deadline_data *dd, int data_dir)
 /*
 * add drq to rbtree and fifo
 */
-static inline void
+static void
 deadline_add_request(struct request_queue *q, struct request *rq)
 {
        struct deadline_data *dd = q->elevator->elevator_data;
@@ -301,12 +290,8 @@ deadline_add_request(struct request_queue *q, struct request *rq)
        drq->expires = jiffies + dd->fifo_expire[data_dir];
        list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
-        if (rq_mergeable(rq)) {
+        if (rq_mergeable(rq))
                deadline_add_drq_hash(dd, drq);
-                if (!q->last_merge)
-                        q->last_merge = rq;
-        }
 }
 /*
@@ -315,14 +300,11 @@ deadline_add_request(struct request_queue *q, struct request *rq)
 static void deadline_remove_request(request_queue_t *q, struct request *rq)
 {
        struct deadline_rq *drq = RQ_DATA(rq);
+        struct deadline_data *dd = q->elevator->elevator_data;
-        if (drq) {
+        list_del_init(&drq->fifo);
-                struct deadline_data *dd = q->elevator->elevator_data;
+        deadline_del_drq_rb(dd, drq);
+        deadline_del_drq_hash(drq);
-                list_del_init(&drq->fifo);
-                deadline_remove_merge_hints(q, drq);
-                deadline_del_drq_rb(dd, drq);
-        }
 }
 static int
@@ -333,15 +315,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
        int ret;
        /*
-         * try last_merge to avoid going to hash
-         */
-        ret = elv_try_last_merge(q, bio);
-        if (ret != ELEVATOR_NO_MERGE) {
-                __rq = q->last_merge;
-                goto out_insert;
-        }
-        /*
         * see if the merge hash can satisfy a back merge
         */
        __rq = deadline_find_drq_hash(dd, bio->bi_sector);
@@ -373,8 +346,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
        return ELEVATOR_NO_MERGE;
 out:
-        q->last_merge = __rq;
-out_insert:
        if (ret)
                deadline_hot_drq_hash(dd, RQ_DATA(__rq));
        *req = __rq;
@@ -399,8 +370,6 @@ static void deadline_merged_request(request_queue_t *q, struct request *req)
                deadline_del_drq_rb(dd, drq);
                deadline_add_drq_rb(dd, drq);
        }
-        q->last_merge = req;
 }
 static void
@@ -452,7 +421,7 @@ deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq)
        request_queue_t *q = drq->request->q;
        deadline_remove_request(q, drq->request);
-        list_add_tail(&drq->request->queuelist, dd->dispatch);
+        elv_dispatch_add_tail(q, drq->request);
 }
 /*
@@ -502,8 +471,9 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
 * deadline_dispatch_requests selects the best request according to
 * read/write expire, fifo_batch, etc
 */
-static int deadline_dispatch_requests(struct deadline_data *dd)
+static int deadline_dispatch_requests(request_queue_t *q, int force)
 {
+        struct deadline_data *dd = q->elevator->elevator_data;
        const int reads = !list_empty(&dd->fifo_list[READ]);
        const int writes = !list_empty(&dd->fifo_list[WRITE]);
        struct deadline_rq *drq;
@@ -597,65 +567,12 @@ dispatch_request:
        return 1;
 }
-static struct request *deadline_next_request(request_queue_t *q)
-{
-        struct deadline_data *dd = q->elevator->elevator_data;
-        struct request *rq;
-        /*
-         * if there are still requests on the dispatch queue, grab the first one
-         */
-        if (!list_empty(dd->dispatch)) {
-dispatch:
-                rq = list_entry_rq(dd->dispatch->next);
-                return rq;
-        }
-        if (deadline_dispatch_requests(dd))
-                goto dispatch;
-        return NULL;
-}
-static void
-deadline_insert_request(request_queue_t *q, struct request *rq, int where)
-{
-        struct deadline_data *dd = q->elevator->elevator_data;
-        /* barriers must flush the reorder queue */
-        if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
-                        && where == ELEVATOR_INSERT_SORT))
-                where = ELEVATOR_INSERT_BACK;
-        switch (where) {
-                case ELEVATOR_INSERT_BACK:
-                        while (deadline_dispatch_requests(dd))
-                                ;
-                        list_add_tail(&rq->queuelist, dd->dispatch);
-                        break;
-                case ELEVATOR_INSERT_FRONT:
-                        list_add(&rq->queuelist, dd->dispatch);
-                        break;
-                case ELEVATOR_INSERT_SORT:
-                        BUG_ON(!blk_fs_request(rq));
-                        deadline_add_request(q, rq);
-                        break;
-                default:
-                        printk("%s: bad insert point %d\n", __FUNCTION__,where);
-                        return;
-        }
-}
 static int deadline_queue_empty(request_queue_t *q)
 {
        struct deadline_data *dd = q->elevator->elevator_data;
-        if (!list_empty(&dd->fifo_list[WRITE])
+        return list_empty(&dd->fifo_list[WRITE])
-            || !list_empty(&dd->fifo_list[READ])
+                && list_empty(&dd->fifo_list[READ]);
-            || !list_empty(dd->dispatch))
-                return 0;
-        return 1;
 }
 static struct request *
@@ -733,7 +650,6 @@ static int deadline_init_queue(request_queue_t *q, elevator_t *e)
        INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
        dd->sort_list[READ] = RB_ROOT;
        dd->sort_list[WRITE] = RB_ROOT;
-        dd->dispatch = &q->queue_head;
        dd->fifo_expire[READ] = read_expire;
        dd->fifo_expire[WRITE] = write_expire;
        dd->writes_starved = writes_starved;
@@ -748,10 +664,8 @@ static void deadline_put_request(request_queue_t *q, struct request *rq)
        struct deadline_data *dd = q->elevator->elevator_data;
        struct deadline_rq *drq = RQ_DATA(rq);
-        if (drq) {
+        mempool_free(drq, dd->drq_pool);
-                mempool_free(drq, dd->drq_pool);
+        rq->elevator_private = NULL;
-                rq->elevator_private = NULL;
-        }
 }
 static int
@@ -917,9 +831,8 @@ static struct elevator_type iosched_deadline = {
                .elevator_merge_fn =            deadline_merge,
                .elevator_merged_fn =           deadline_merged_request,
                .elevator_merge_req_fn =        deadline_merged_requests,
-                .elevator_next_req_fn =         deadline_next_request,
+                .elevator_dispatch_fn =         deadline_dispatch_requests,
-                .elevator_add_req_fn =          deadline_insert_request,
+                .elevator_add_req_fn =          deadline_add_request,
-                .elevator_remove_req_fn =       deadline_remove_request,
                .elevator_queue_empty_fn =      deadline_queue_empty,
                .elevator_former_req_fn =       deadline_former_request,
                .elevator_latter_req_fn =       deadline_latter_request,
diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c
index 4f69d222b183..3b8099ccedff 100644
--- a/drivers/block/elevator.c
+++ b/drivers/block/elevator.c
@@ -83,15 +83,6 @@ inline int elv_try_merge(struct request *__rq, struct bio *bio)
 }
 EXPORT_SYMBOL(elv_try_merge);
-inline int elv_try_last_merge(request_queue_t *q, struct bio *bio)
-{
-        if (q->last_merge)
-                return elv_try_merge(q->last_merge, bio);
-        return ELEVATOR_NO_MERGE;
-}
-EXPORT_SYMBOL(elv_try_last_merge);
 static struct elevator_type *elevator_find(const char *name)
 {
        struct elevator_type *e = NULL;
@@ -143,6 +134,8 @@ static int elevator_attach(request_queue_t *q, struct elevator_type *e,
        INIT_LIST_HEAD(&q->queue_head);
        q->last_merge = NULL;
        q->elevator = eq;
+        q->end_sector = 0;
+        q->boundary_rq = NULL;
        if (eq->ops->elevator_init_fn)
                ret = eq->ops->elevator_init_fn(q, eq);
@@ -225,9 +218,52 @@ void elevator_exit(elevator_t *e)
        kfree(e);
 }
+/*
+ * Insert rq into dispatch queue of q.  Queue lock must be held on
+ * entry.  If sort != 0, rq is sort-inserted; otherwise, rq will be
+ * appended to the dispatch queue.  To be used by specific elevators.
+ */
+void elv_dispatch_sort(request_queue_t *q, struct request *rq)
+{
+        sector_t boundary;
+        struct list_head *entry;
+        if (q->last_merge == rq)
+                q->last_merge = NULL;
+        boundary = q->end_sector;
+        list_for_each_prev(entry, &q->queue_head) {
+                struct request *pos = list_entry_rq(entry);
+                if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
+                        break;
+                if (rq->sector >= boundary) {
+                        if (pos->sector < boundary)
+                                continue;
+                } else {
+                        if (pos->sector >= boundary)
+                                break;
+                }
+                if (rq->sector >= pos->sector)
+                        break;
+        }
+        list_add(&rq->queuelist, entry);
+}
 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
        elevator_t *e = q->elevator;
+        int ret;
+        if (q->last_merge) {
+                ret = elv_try_merge(q->last_merge, bio);
+                if (ret != ELEVATOR_NO_MERGE) {
+                        *req = q->last_merge;
+                        return ret;
+                }
+        }
        if (e->ops->elevator_merge_fn)
                return e->ops->elevator_merge_fn(q, req, bio);
@@ -241,6 +277,8 @@ void elv_merged_request(request_queue_t *q, struct request *rq)
        if (e->ops->elevator_merged_fn)
                e->ops->elevator_merged_fn(q, rq);
+        q->last_merge = rq;
 }
 void elv_merge_requests(request_queue_t *q, struct request *rq,
@@ -248,20 +286,13 @@ void elv_merge_requests(request_queue_t *q, struct request *rq,
 {
        elevator_t *e = q->elevator;
-        if (q->last_merge == next)
-                q->last_merge = NULL;
        if (e->ops->elevator_merge_req_fn)
                e->ops->elevator_merge_req_fn(q, rq, next);
+        q->last_merge = rq;
 }
-/*
+void elv_requeue_request(request_queue_t *q, struct request *rq)
- * For careful internal use by the block layer. Essentially the same as
- * a requeue in that it tells the io scheduler that this request is not
- * active in the driver or hardware anymore, but we don't want the request
- * added back to the scheduler. Function is not exported.
- */
-void elv_deactivate_request(request_queue_t *q, struct request *rq)
 {
        elevator_t *e = q->elevator;
@@ -269,19 +300,14 @@ void elv_deactivate_request(request_queue_t *q, struct request *rq)
         * it already went through dequeue, we need to decrement the
         * in_flight count again
         */
-        if (blk_account_rq(rq))
+        if (blk_account_rq(rq)) {
                q->in_flight--;
+                if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
+                        e->ops->elevator_deactivate_req_fn(q, rq);
+        }
        rq->flags &= ~REQ_STARTED;
-        if (e->ops->elevator_deactivate_req_fn)
-                e->ops->elevator_deactivate_req_fn(q, rq);
-}
-void elv_requeue_request(request_queue_t *q, struct request *rq)
-{
-        elv_deactivate_request(q, rq);
        /*
         * if this is the flush, requeue the original instead and drop the flush
         */
@@ -290,55 +316,91 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
                rq = rq->end_io_data;
        }
-        /*
+        __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
-         * the request is prepped and may have some resources allocated.
-         * allowing unprepped requests to pass this one may cause resource
-         * deadlock.  turn on softbarrier.
-         */
-        rq->flags |= REQ_SOFTBARRIER;
-        /*
-         * if iosched has an explicit requeue hook, then use that. otherwise
-         * just put the request at the front of the queue
-         */
-        if (q->elevator->ops->elevator_requeue_req_fn)
-                q->elevator->ops->elevator_requeue_req_fn(q, rq);
-        else
-                __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
 }
 void __elv_add_request(request_queue_t *q, struct request *rq, int where,
                       int plug)
 {
-        /*
+        if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
-         * barriers implicitly indicate back insertion
+                /*
-         */
+                 * barriers implicitly indicate back insertion
-        if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER) &&
+                 */
-            where == ELEVATOR_INSERT_SORT)
+                if (where == ELEVATOR_INSERT_SORT)
-                where = ELEVATOR_INSERT_BACK;
+                        where = ELEVATOR_INSERT_BACK;
+                /*
+                 * this request is scheduling boundary, update end_sector
+                 */
+                if (blk_fs_request(rq)) {
+                        q->end_sector = rq_end_sector(rq);
+                        q->boundary_rq = rq;
+                }
+        }
        if (plug)
                blk_plug_device(q);
        rq->q = q;
-        if (!test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) {
+        if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) {
-                q->elevator->ops->elevator_add_req_fn(q, rq, where);
-                if (blk_queue_plugged(q)) {
-                        int nrq = q->rq.count[READ] + q->rq.count[WRITE]
-                                  - q->in_flight;
-                        if (nrq >= q->unplug_thresh)
-                                __generic_unplug_device(q);
-                }
-        } else
                /*
                 * if drain is set, store the request "locally". when the drain
                 * is finished, the requests will be handed ordered to the io
                 * scheduler
                 */
                list_add_tail(&rq->queuelist, &q->drain_list);
+                return;
+        }
+        switch (where) {
+        case ELEVATOR_INSERT_FRONT:
+                rq->flags |= REQ_SOFTBARRIER;
+                list_add(&rq->queuelist, &q->queue_head);
+                break;
+        case ELEVATOR_INSERT_BACK:
+                rq->flags |= REQ_SOFTBARRIER;
+                while (q->elevator->ops->elevator_dispatch_fn(q, 1))
+                        ;
+                list_add_tail(&rq->queuelist, &q->queue_head);
+                /*
+                 * We kick the queue here for the following reasons.
+                 * - The elevator might have returned NULL previously
+                 *   to delay requests and returned them now.  As the
+                 *   queue wasn't empty before this request, ll_rw_blk
+                 *   won't run the queue on return, resulting in hang.
+                 * - Usually, back inserted requests won't be merged
+                 *   with anything.  There's no point in delaying queue
+                 *   processing.
+                 */
+                blk_remove_plug(q);
+                q->request_fn(q);
+                break;
+        case ELEVATOR_INSERT_SORT:
+                BUG_ON(!blk_fs_request(rq));
+                rq->flags |= REQ_SORTED;
+                q->elevator->ops->elevator_add_req_fn(q, rq);
+                if (q->last_merge == NULL && rq_mergeable(rq))
+                        q->last_merge = rq;
+                break;
+        default:
+                printk(KERN_ERR "%s: bad insertion point %d\n",
+                       __FUNCTION__, where);
+                BUG();
+        }
+        if (blk_queue_plugged(q)) {
+                int nrq = q->rq.count[READ] + q->rq.count[WRITE]
+                        - q->in_flight;
+                if (nrq >= q->unplug_thresh)
+                        __generic_unplug_device(q);
+        }
 }
 void elv_add_request(request_queue_t *q, struct request *rq, int where,
@@ -353,13 +415,19 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where,
 static inline struct request *__elv_next_request(request_queue_t *q)
 {
-        struct request *rq = q->elevator->ops->elevator_next_req_fn(q);
+        struct request *rq;
+        if (unlikely(list_empty(&q->queue_head) &&
+                     !q->elevator->ops->elevator_dispatch_fn(q, 0)))
+                return NULL;
+        rq = list_entry_rq(q->queue_head.next);
        /*
         * if this is a barrier write and the device has to issue a
         * flush sequence to support it, check how far we are
         */
-        if (rq && blk_fs_request(rq) && blk_barrier_rq(rq)) {
+        if (blk_fs_request(rq) && blk_barrier_rq(rq)) {
                BUG_ON(q->ordered == QUEUE_ORDERED_NONE);
                if (q->ordered == QUEUE_ORDERED_FLUSH &&
@@ -376,15 +444,30 @@ struct request *elv_next_request(request_queue_t *q)
        int ret;
        while ((rq = __elv_next_request(q)) != NULL) {
-                /*
+                if (!(rq->flags & REQ_STARTED)) {
-                 * just mark as started even if we don't start it, a request
+                        elevator_t *e = q->elevator;
-                 * that has been delayed should not be passed by new incoming
-                 * requests
+                        /*
-                 */
+                         * This is the first time the device driver
-                rq->flags |= REQ_STARTED;
+                         * sees this request (possibly after
+                         * requeueing).  Notify IO scheduler.
+                         */
+                        if (blk_sorted_rq(rq) &&
+                            e->ops->elevator_activate_req_fn)
+                                e->ops->elevator_activate_req_fn(q, rq);
-                if (rq == q->last_merge)
+                        /*
-                        q->last_merge = NULL;
+                         * just mark as started even if we don't start
+                         * it, a request that has been delayed should
+                         * not be passed by new incoming requests
+                         */
+                        rq->flags |= REQ_STARTED;
+                }
+                if (!q->boundary_rq || q->boundary_rq == rq) {
+                        q->end_sector = rq_end_sector(rq);
+                        q->boundary_rq = NULL;
+                }
                if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
                        break;
@@ -396,9 +479,9 @@ struct request *elv_next_request(request_queue_t *q)
                        /*
                         * the request may have been (partially) prepped.
                         * we need to keep this request in the front to
-                         * avoid resource deadlock.  turn on softbarrier.
+                         * avoid resource deadlock.  REQ_STARTED will
+                         * prevent other fs requests from passing this one.
                         */
-                        rq->flags |= REQ_SOFTBARRIER;
                        rq = NULL;
                        break;
                } else if (ret == BLKPREP_KILL) {
@@ -421,42 +504,32 @@ struct request *elv_next_request(request_queue_t *q)
        return rq;
 }
-void elv_remove_request(request_queue_t *q, struct request *rq)
+void elv_dequeue_request(request_queue_t *q, struct request *rq)
 {
-        elevator_t *e = q->elevator;
+        BUG_ON(list_empty(&rq->queuelist));
+        list_del_init(&rq->queuelist);
        /*
         * the time frame between a request being removed from the lists
         * and to it is freed is accounted as io that is in progress at
-         * the driver side. note that we only account requests that the
+         * the driver side.
-         * driver has seen (REQ_STARTED set), to avoid false accounting
-         * for request-request merges
         */
        if (blk_account_rq(rq))
                q->in_flight++;
-        /*
-         * the main clearing point for q->last_merge is on retrieval of
-         * request by driver (it calls elv_next_request()), but it _can_
-         * also happen here if a request is added to the queue but later
-         * deleted without ever being given to driver (merged with another
-         * request).
-         */
-        if (rq == q->last_merge)
-                q->last_merge = NULL;
-        if (e->ops->elevator_remove_req_fn)
-                e->ops->elevator_remove_req_fn(q, rq);
 }
 int elv_queue_empty(request_queue_t *q)
 {
        elevator_t *e = q->elevator;
+        if (!list_empty(&q->queue_head))
+                return 0;
        if (e->ops->elevator_queue_empty_fn)
                return e->ops->elevator_queue_empty_fn(q);
-        return list_empty(&q->queue_head);
+        return 1;
 }
 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
@@ -528,11 +601,11 @@ void elv_completed_request(request_queue_t *q, struct request *rq)
        /*
         * request is released from the driver, io must be done
         */
-        if (blk_account_rq(rq))
+        if (blk_account_rq(rq)) {
                q->in_flight--;
+                if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
-        if (e->ops->elevator_completed_req_fn)
+                        e->ops->elevator_completed_req_fn(q, rq);
-                e->ops->elevator_completed_req_fn(q, rq);
+        }
 }
 int elv_register_queue(struct request_queue *q)
@@ -705,11 +778,12 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name)
        return len;
 }
+EXPORT_SYMBOL(elv_dispatch_sort);
 EXPORT_SYMBOL(elv_add_request);
 EXPORT_SYMBOL(__elv_add_request);
 EXPORT_SYMBOL(elv_requeue_request);
 EXPORT_SYMBOL(elv_next_request);
-EXPORT_SYMBOL(elv_remove_request);
+EXPORT_SYMBOL(elv_dequeue_request);
 EXPORT_SYMBOL(elv_queue_empty);
 EXPORT_SYMBOL(elv_completed_request);
 EXPORT_SYMBOL(elevator_exit);
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index ac31ea170058..c4f186223bc5 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -353,6 +353,8 @@ static void blk_pre_flush_end_io(struct request *flush_rq)
        struct request *rq = flush_rq->end_io_data;
        request_queue_t *q = rq->q;
+        elv_completed_request(q, flush_rq);
        rq->flags |= REQ_BAR_PREFLUSH;
        if (!flush_rq->errors)
@@ -369,6 +371,8 @@ static void blk_post_flush_end_io(struct request *flush_rq)
        struct request *rq = flush_rq->end_io_data;
        request_queue_t *q = rq->q;
+        elv_completed_request(q, flush_rq);
        rq->flags |= REQ_BAR_POSTFLUSH;
        q->end_flush_fn(q, flush_rq);
@@ -408,8 +412,6 @@ struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq)
        if (!list_empty(&rq->queuelist))
                blkdev_dequeue_request(rq);
-        elv_deactivate_request(q, rq);
        flush_rq->end_io_data = rq;
        flush_rq->end_io = blk_pre_flush_end_io;
@@ -1040,6 +1042,7 @@ EXPORT_SYMBOL(blk_queue_invalidate_tags);
 static char *rq_flags[] = {
        "REQ_RW",
        "REQ_FAILFAST",
+        "REQ_SORTED",
        "REQ_SOFTBARRIER",
        "REQ_HARDBARRIER",
        "REQ_CMD",
@@ -2456,6 +2459,8 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
        if (unlikely(--req->ref_count))
                return;
+        elv_completed_request(q, req);
        req->rq_status = RQ_INACTIVE;
        req->rl = NULL;
@@ -2466,8 +2471,6 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
        if (rl) {
                int rw = rq_data_dir(req);
-                elv_completed_request(q, req);
                BUG_ON(!list_empty(&req->queuelist));
                blk_free_request(q, req);
@@ -2477,14 +2480,14 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
 void blk_put_request(struct request *req)
 {
+        unsigned long flags;
+        request_queue_t *q = req->q;
        /*
-         * if req->rl isn't set, this request didnt originate from the
+         * Gee, IDE calls in w/ NULL q.  Fix IDE and remove the
-         * block layer, so it's safe to just disregard it
+         * following if (q) test.
         */
-        if (req->rl) {
+        if (q) {
-                unsigned long flags;
-                request_queue_t *q = req->q;
                spin_lock_irqsave(q->queue_lock, flags);
                __blk_put_request(q, req);
                spin_unlock_irqrestore(q->queue_lock, flags);
diff --git a/drivers/block/noop-iosched.c b/drivers/block/noop-iosched.c
index b1730b62c37e..f56b8edb06e4 100644
--- a/drivers/block/noop-iosched.c
+++ b/drivers/block/noop-iosched.c
@@ -7,57 +7,19 @@
 #include <linux/module.h>
 #include <linux/init.h>
-/*
+static void elevator_noop_add_request(request_queue_t *q, struct request *rq)
- * See if we can find a request that this buffer can be coalesced with.
- */
-static int elevator_noop_merge(request_queue_t *q, struct request **req,
-                               struct bio *bio)
-{
-        int ret;
-        ret = elv_try_last_merge(q, bio);
-        if (ret != ELEVATOR_NO_MERGE)
-                *req = q->last_merge;
-        return ret;
-}
-static void elevator_noop_merge_requests(request_queue_t *q, struct request *req,
-                                         struct request *next)
-{
-        list_del_init(&next->queuelist);
-}
-static void elevator_noop_add_request(request_queue_t *q, struct request *rq,
-                                      int where)
 {
-        if (where == ELEVATOR_INSERT_FRONT)
+        elv_dispatch_add_tail(q, rq);
-                list_add(&rq->queuelist, &q->queue_head);
-        else
-                list_add_tail(&rq->queuelist, &q->queue_head);
-        /*
-         * new merges must not precede this barrier
-         */
-        if (rq->flags & REQ_HARDBARRIER)
-                q->last_merge = NULL;
-        else if (!q->last_merge)
-                q->last_merge = rq;
 }
-static struct request *elevator_noop_next_request(request_queue_t *q)
+static int elevator_noop_dispatch(request_queue_t *q, int force)
 {
-        if (!list_empty(&q->queue_head))
+        return 0;
-                return list_entry_rq(q->queue_head.next);
-        return NULL;
 }
 static struct elevator_type elevator_noop = {
        .ops = {
-                .elevator_merge_fn              = elevator_noop_merge,
+                .elevator_dispatch_fn           = elevator_noop_dispatch,
-                .elevator_merge_req_fn          = elevator_noop_merge_requests,
-                .elevator_next_req_fn           = elevator_noop_next_request,
                .elevator_add_req_fn            = elevator_noop_add_request,
        },
        .elevator_name = "noop",
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1afbdb2d752c..fffe163316aa 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -203,6 +203,7 @@ struct request {
 enum rq_flag_bits {
        __REQ_RW,               /* not set, read. set, write */
        __REQ_FAILFAST,         /* no low level driver retries */
+        __REQ_SORTED,           /* elevator knows about this request */
        __REQ_SOFTBARRIER,      /* may not be passed by ioscheduler */
        __REQ_HARDBARRIER,      /* may not be passed by drive either */
        __REQ_CMD,              /* is a regular fs rw request */
@@ -235,6 +236,7 @@ enum rq_flag_bits {
 #define REQ_RW          (1 << __REQ_RW)
 #define REQ_FAILFAST    (1 << __REQ_FAILFAST)
+#define REQ_SORTED      (1 << __REQ_SORTED)
 #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
 #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER)
 #define REQ_CMD         (1 << __REQ_CMD)
@@ -333,6 +335,12 @@ struct request_queue
        end_flush_fn            *end_flush_fn;
        /*
+         * Dispatch queue sorting
+         */
+        sector_t                end_sector;
+        struct request          *boundary_rq;
+        /*
         * Auto-unplugging state
         */
        struct timer_list       unplug_timer;
@@ -454,6 +462,7 @@ enum {
 #define blk_pm_request(rq)      \
        ((rq)->flags & (REQ_PM_SUSPEND | REQ_PM_RESUME))
+#define blk_sorted_rq(rq)       ((rq)->flags & REQ_SORTED)
 #define blk_barrier_rq(rq)      ((rq)->flags & REQ_HARDBARRIER)
 #define blk_barrier_preflush(rq)        ((rq)->flags & REQ_BAR_PREFLUSH)
 #define blk_barrier_postflush(rq)       ((rq)->flags & REQ_BAR_POSTFLUSH)
@@ -611,12 +620,21 @@ extern void end_request(struct request *req, int uptodate);
 static inline void blkdev_dequeue_request(struct request *req)
 {
-        BUG_ON(list_empty(&req->queuelist));
+        elv_dequeue_request(req->q, req);
+}
-        list_del_init(&req->queuelist);
+/*
+ * This should be in elevator.h, but that requires pulling in rq and q
+ */
+static inline void elv_dispatch_add_tail(struct request_queue *q,
+                                         struct request *rq)
+{
+        if (q->last_merge == rq)
+                q->last_merge = NULL;
-        if (req->rl)
+        q->end_sector = rq_end_sector(rq);
-                elv_remove_request(req->q, req);
+        q->boundary_rq = rq;
+        list_add_tail(&rq->queuelist, &q->queue_head);
 }
 /*
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index ed93125c1db5..a74c27e460ba 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -8,18 +8,17 @@ typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struc
 typedef void (elevator_merged_fn) (request_queue_t *, struct request *);
-typedef struct request *(elevator_next_req_fn) (request_queue_t *);
+typedef int (elevator_dispatch_fn) (request_queue_t *, int);
-typedef void (elevator_add_req_fn) (request_queue_t *, struct request *, int);
+typedef void (elevator_add_req_fn) (request_queue_t *, struct request *);
 typedef int (elevator_queue_empty_fn) (request_queue_t *);
-typedef void (elevator_remove_req_fn) (request_queue_t *, struct request *);
-typedef void (elevator_requeue_req_fn) (request_queue_t *, struct request *);
 typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *);
 typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *);
 typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *);
 typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, gfp_t);
 typedef void (elevator_put_req_fn) (request_queue_t *, struct request *);
+typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *);
 typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *);
 typedef int (elevator_init_fn) (request_queue_t *, elevator_t *);
@@ -31,10 +30,9 @@ struct elevator_ops
        elevator_merged_fn *elevator_merged_fn;
        elevator_merge_req_fn *elevator_merge_req_fn;
-        elevator_next_req_fn *elevator_next_req_fn;
+        elevator_dispatch_fn *elevator_dispatch_fn;
        elevator_add_req_fn *elevator_add_req_fn;
-        elevator_remove_req_fn *elevator_remove_req_fn;
+        elevator_activate_req_fn *elevator_activate_req_fn;
-        elevator_requeue_req_fn *elevator_requeue_req_fn;
        elevator_deactivate_req_fn *elevator_deactivate_req_fn;
        elevator_queue_empty_fn *elevator_queue_empty_fn;
@@ -81,15 +79,15 @@ struct elevator_queue
 /*
 * block elevator interface
 */
+extern void elv_dispatch_sort(request_queue_t *, struct request *);
 extern void elv_add_request(request_queue_t *, struct request *, int, int);
 extern void __elv_add_request(request_queue_t *, struct request *, int, int);
 extern int elv_merge(request_queue_t *, struct request **, struct bio *);
 extern void elv_merge_requests(request_queue_t *, struct request *,
                               struct request *);
 extern void elv_merged_request(request_queue_t *, struct request *);
-extern void elv_remove_request(request_queue_t *, struct request *);
+extern void elv_dequeue_request(request_queue_t *, struct request *);
 extern void elv_requeue_request(request_queue_t *, struct request *);
-extern void elv_deactivate_request(request_queue_t *, struct request *);
 extern int elv_queue_empty(request_queue_t *);
 extern struct request *elv_next_request(struct request_queue *q);
 extern struct request *elv_former_request(request_queue_t *, struct request *);
@@ -142,4 +140,6 @@ enum {
        ELV_MQUEUE_MUST,
 };
+#define rq_end_sector(rq)       ((rq)->sector + (rq)->nr_sectors)
 #endif
author	Linus Torvalds <torvalds@g5.osdl.org>	2005-10-28 11:53:49 -0400
committer	Linus Torvalds <torvalds@g5.osdl.org>	2005-10-28 11:53:49 -0400
commit	28d721e24c88496ff8e9c4a0959bdc1415c0658e (patch)
tree	0652161bbbcbfddf47c7ddb25d2db8ecd4cbec89
parent	0ee40c6628434f0535da31deeacc28b61e80d810 (diff)
parent	cb19833dccb32f97cacbfff834b53523915f13f6 (diff)