Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

* 'for-linus' of git://git.kernel.dk/linux-2.6-block: loop: mutex already unlocked in loop_clr_fd() cfq-iosched: don't let idling interfere with plugging block: remove unused REQ_UNPLUG cfq-iosched: kill two unused cfqq flags cfq-iosched: change dispatch logic to deal with single requests at the time mflash: initial support cciss: change to discover first memory BAR cciss: kernel scan thread for MSA2012 cciss: fix residual count for block pc requests block: fix inconsistency in I/O stat accounting code block: elevator quiescing helpers
author: Linus Torvalds <torvalds@linux-foundation.org> 2009-04-07 14:06:41 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2009-04-07 14:06:41 -0400
commit: 6a5d263866d699ebf6843105497afc86ee53de5b (patch)
tree: 439195e272631908cdc2e3e44abaf7e1c3447157 /block
parent: aeeae86859f4319de0a4946b44771d9926eeed54 (diff)
parent: ffcd7dca3ab78f9f425971756e5e90024157f6be (diff)
6 files changed, 169 insertions, 137 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 25572802dac2..43fdedc524ee 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -64,12 +64,11 @@ static struct workqueue_struct *kblockd_workqueue;
 static void drive_stat_acct(struct request *rq, int new_io)
 {
-        struct gendisk *disk = rq->rq_disk;
        struct hd_struct *part;
        int rw = rq_data_dir(rq);
        int cpu;
-        if (!blk_fs_request(rq) || !disk || !blk_do_io_stat(disk->queue))
+        if (!blk_fs_request(rq) || !blk_do_io_stat(rq))
                return;
        cpu = part_stat_lock();
@@ -1124,8 +1123,6 @@ void init_request_from_bio(struct request *req, struct bio *bio)
        if (bio_sync(bio))
                req->cmd_flags |= REQ_RW_SYNC;
-        if (bio_unplug(bio))
-                req->cmd_flags |= REQ_UNPLUG;
        if (bio_rw_meta(bio))
                req->cmd_flags |= REQ_RW_META;
        if (bio_noidle(bio))
@@ -1675,9 +1672,7 @@ EXPORT_SYMBOL(blkdev_dequeue_request);
 static void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
-        struct gendisk *disk = req->rq_disk;
+        if (!blk_do_io_stat(req))
-        if (!disk || !blk_do_io_stat(disk->queue))
                return;
        if (blk_fs_request(req)) {
@@ -1694,9 +1689,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
 static void blk_account_io_done(struct request *req)
 {
-        struct gendisk *disk = req->rq_disk;
+        if (!blk_do_io_stat(req))
-        if (!disk || !blk_do_io_stat(disk->queue))
                return;
        /*
@@ -1711,7 +1704,7 @@ static void blk_account_io_done(struct request *req)
                int cpu;
                cpu = part_stat_lock();
-                part = disk_map_sector_rcu(disk, req->sector);
+                part = disk_map_sector_rcu(req->rq_disk, req->sector);
                part_stat_inc(cpu, part, ios[rw]);
                part_stat_add(cpu, part, ticks[rw], duration);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index e39cb24b7679..63760ca3da0f 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -338,6 +338,22 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
        return 1;
 }
+static void blk_account_io_merge(struct request *req)
+{
+        if (blk_do_io_stat(req)) {
+                struct hd_struct *part;
+                int cpu;
+                cpu = part_stat_lock();
+                part = disk_map_sector_rcu(req->rq_disk, req->sector);
+                part_round_stats(cpu, part);
+                part_dec_in_flight(part);
+                part_stat_unlock();
+        }
+}
 /*
 * Has to be called with the request spinlock acquired
 */
@@ -386,18 +402,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
        elv_merge_requests(q, req, next);
-        if (req->rq_disk) {
+        blk_account_io_merge(req);
-                struct hd_struct *part;
-                int cpu;
-                cpu = part_stat_lock();
-                part = disk_map_sector_rcu(req->rq_disk, req->sector);
-                part_round_stats(cpu, part);
-                part_dec_in_flight(part);
-                part_stat_unlock();
-        }
        req->ioprio = ioprio_best(req->ioprio, next->ioprio);
        if (blk_rq_cpu_valid(next))
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3ff9bba3379a..73f36beff5cd 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -209,10 +209,14 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
        ssize_t ret = queue_var_store(&stats, page, count);
        spin_lock_irq(q->queue_lock);
+        elv_quisce_start(q);
        if (stats)
                queue_flag_set(QUEUE_FLAG_IO_STAT, q);
        else
                queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
+        elv_quisce_end(q);
        spin_unlock_irq(q->queue_lock);
        return ret;
diff --git a/block/blk.h b/block/blk.h
index 3ee94358b43d..24fcaeeaf620 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -70,6 +70,10 @@ void blk_queue_congestion_threshold(struct request_queue *q);
 int blk_dev_init(void);
+void elv_quisce_start(struct request_queue *q);
+void elv_quisce_end(struct request_queue *q);
 /*
 * Return the threshold (number of used requests) at which the queue is
 * considered to be congested.  It include a little hysteresis to keep the
@@ -108,12 +112,14 @@ static inline int blk_cpu_to_group(int cpu)
 #endif
 }
-static inline int blk_do_io_stat(struct request_queue *q)
+static inline int blk_do_io_stat(struct request *rq)
 {
-        if (q)
+        struct gendisk *disk = rq->rq_disk;
-                return blk_queue_io_stat(q);
-        return 0;
+        if (!disk || !disk->queue)
+                return 0;
+        return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV);
 }
 #endif
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 9e809345f71a..a4809de6fea6 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -160,6 +160,7 @@ struct cfq_queue {
        unsigned long slice_end;
        long slice_resid;
+        unsigned int slice_dispatch;
        /* pending metadata requests */
        int meta_pending;
@@ -176,13 +177,12 @@ struct cfq_queue {
 enum cfqq_state_flags {
        CFQ_CFQQ_FLAG_on_rr = 0,        /* on round-robin busy list */
        CFQ_CFQQ_FLAG_wait_request,     /* waiting for a request */
+        CFQ_CFQQ_FLAG_must_dispatch,    /* must be allowed a dispatch */
        CFQ_CFQQ_FLAG_must_alloc,       /* must be allowed rq alloc */
        CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */
-        CFQ_CFQQ_FLAG_must_dispatch,    /* must dispatch, even if expired */
        CFQ_CFQQ_FLAG_fifo_expire,      /* FIFO checked in this slice */
        CFQ_CFQQ_FLAG_idle_window,      /* slice idling enabled */
        CFQ_CFQQ_FLAG_prio_changed,     /* task priority has changed */
-        CFQ_CFQQ_FLAG_queue_new,        /* queue never been serviced */
        CFQ_CFQQ_FLAG_slice_new,        /* no requests dispatched in slice */
        CFQ_CFQQ_FLAG_sync,             /* synchronous queue */
 };
@@ -203,13 +203,12 @@ static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq)		\
 CFQ_CFQQ_FNS(on_rr);
 CFQ_CFQQ_FNS(wait_request);
+CFQ_CFQQ_FNS(must_dispatch);
 CFQ_CFQQ_FNS(must_alloc);
 CFQ_CFQQ_FNS(must_alloc_slice);
-CFQ_CFQQ_FNS(must_dispatch);
 CFQ_CFQQ_FNS(fifo_expire);
 CFQ_CFQQ_FNS(idle_window);
 CFQ_CFQQ_FNS(prio_changed);
-CFQ_CFQQ_FNS(queue_new);
 CFQ_CFQQ_FNS(slice_new);
 CFQ_CFQQ_FNS(sync);
 #undef CFQ_CFQQ_FNS
@@ -774,10 +773,15 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
        if (cfqq) {
                cfq_log_cfqq(cfqd, cfqq, "set_active");
                cfqq->slice_end = 0;
+                cfqq->slice_dispatch = 0;
+                cfq_clear_cfqq_wait_request(cfqq);
+                cfq_clear_cfqq_must_dispatch(cfqq);
                cfq_clear_cfqq_must_alloc_slice(cfqq);
                cfq_clear_cfqq_fifo_expire(cfqq);
                cfq_mark_cfqq_slice_new(cfqq);
-                cfq_clear_cfqq_queue_new(cfqq);
+                del_timer(&cfqd->idle_slice_timer);
        }
        cfqd->active_queue = cfqq;
@@ -795,7 +799,6 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        if (cfq_cfqq_wait_request(cfqq))
                del_timer(&cfqd->idle_slice_timer);
-        cfq_clear_cfqq_must_dispatch(cfqq);
        cfq_clear_cfqq_wait_request(cfqq);
        /*
@@ -924,7 +927,6 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
            (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2))
                return;
-        cfq_mark_cfqq_must_dispatch(cfqq);
        cfq_mark_cfqq_wait_request(cfqq);
        /*
@@ -1010,7 +1012,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
        /*
         * The active queue has run out of time, expire it and select new.
         */
-        if (cfq_slice_used(cfqq))
+        if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq))
                goto expire;
        /*
@@ -1053,66 +1055,6 @@ keep_queue:
        return cfqq;
 }
-/*
- * Dispatch some requests from cfqq, moving them to the request queue
- * dispatch list.
- */
-static int
-__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-                        int max_dispatch)
-{
-        int dispatched = 0;
-        BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
-        do {
-                struct request *rq;
-                /*
-                 * follow expired path, else get first next available
-                 */
-                rq = cfq_check_fifo(cfqq);
-                if (rq == NULL)
-                        rq = cfqq->next_rq;
-                /*
-                 * finally, insert request into driver dispatch list
-                 */
-                cfq_dispatch_insert(cfqd->queue, rq);
-                dispatched++;
-                if (!cfqd->active_cic) {
-                        atomic_inc(&RQ_CIC(rq)->ioc->refcount);
-                        cfqd->active_cic = RQ_CIC(rq);
-                }
-                if (RB_EMPTY_ROOT(&cfqq->sort_list))
-                        break;
-                /*
-                 * If there is a non-empty RT cfqq waiting for current
-                 * cfqq's timeslice to complete, pre-empt this cfqq
-                 */
-                if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues)
-                        break;
-        } while (dispatched < max_dispatch);
-        /*
-         * expire an async queue immediately if it has used up its slice. idle
-         * queue always expire after 1 dispatch round.
-         */
-        if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
-            dispatched >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
-            cfq_class_idle(cfqq))) {
-                cfqq->slice_end = jiffies + 1;
-                cfq_slice_expired(cfqd, 0);
-        }
-        return dispatched;
-}
 static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
 {
        int dispatched = 0;
@@ -1146,11 +1088,45 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
        return dispatched;
 }
+/*
+ * Dispatch a request from cfqq, moving them to the request queue
+ * dispatch list.
+ */
+static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+        struct request *rq;
+        BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
+        /*
+         * follow expired path, else get first next available
+         */
+        rq = cfq_check_fifo(cfqq);
+        if (!rq)
+                rq = cfqq->next_rq;
+        /*
+         * insert request into driver dispatch list
+         */
+        cfq_dispatch_insert(cfqd->queue, rq);
+        if (!cfqd->active_cic) {
+                struct cfq_io_context *cic = RQ_CIC(rq);
+                atomic_inc(&cic->ioc->refcount);
+                cfqd->active_cic = cic;
+        }
+}
+/*
+ * Find the cfqq that we need to service and move a request from that to the
+ * dispatch list
+ */
 static int cfq_dispatch_requests(struct request_queue *q, int force)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct cfq_queue *cfqq;
-        int dispatched;
+        unsigned int max_dispatch;
        if (!cfqd->busy_queues)
                return 0;
@@ -1158,29 +1134,63 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
        if (unlikely(force))
                return cfq_forced_dispatch(cfqd);
-        dispatched = 0;
+        cfqq = cfq_select_queue(cfqd);
-        while ((cfqq = cfq_select_queue(cfqd)) != NULL) {
+        if (!cfqq)
-                int max_dispatch;
+                return 0;
+        /*
+         * If this is an async queue and we have sync IO in flight, let it wait
+         */
+        if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
+                return 0;
+        max_dispatch = cfqd->cfq_quantum;
+        if (cfq_class_idle(cfqq))
+                max_dispatch = 1;
-                max_dispatch = cfqd->cfq_quantum;
+        /*
+         * Does this cfqq already have too much IO in flight?
+         */
+        if (cfqq->dispatched >= max_dispatch) {
+                /*
+                 * idle queue must always only have a single IO in flight
+                 */
                if (cfq_class_idle(cfqq))
-                        max_dispatch = 1;
+                        return 0;
-                if (cfqq->dispatched >= max_dispatch && cfqd->busy_queues > 1)
+                /*
-                        break;
+                 * We have other queues, don't allow more IO from this one
+                 */
+                if (cfqd->busy_queues > 1)
+                        return 0;
-                if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
+                /*
-                        break;
+                 * we are the only queue, allow up to 4 times of 'quantum'
+                 */
+                if (cfqq->dispatched >= 4 * max_dispatch)
+                        return 0;
+        }
-                cfq_clear_cfqq_must_dispatch(cfqq);
+        /*
-                cfq_clear_cfqq_wait_request(cfqq);
+         * Dispatch a request from this cfqq
-                del_timer(&cfqd->idle_slice_timer);
+         */
+        cfq_dispatch_request(cfqd, cfqq);
+        cfqq->slice_dispatch++;
+        cfq_clear_cfqq_must_dispatch(cfqq);
-                dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
+        /*
+         * expire an async queue immediately if it has used up its slice. idle
+         * queue always expire after 1 dispatch round.
+         */
+        if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
+            cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
+            cfq_class_idle(cfqq))) {
+                cfqq->slice_end = jiffies + 1;
+                cfq_slice_expired(cfqd, 0);
        }
-        cfq_log(cfqd, "dispatched=%d", dispatched);
+        cfq_log(cfqd, "dispatched a request");
-        return dispatched;
+        return 1;
 }
 /*
@@ -1506,7 +1516,6 @@ retry:
                cfqq->cfqd = cfqd;
                cfq_mark_cfqq_prio_changed(cfqq);
-                cfq_mark_cfqq_queue_new(cfqq);
                cfq_init_prio_data(cfqq, ioc);
@@ -1893,15 +1902,13 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        if (cfqq == cfqd->active_queue) {
                /*
-                 * if we are waiting for a request for this queue, let it rip
+                 * Remember that we saw a request from this process, but
-                 * immediately and flag that we must not expire this queue
+                 * don't start queuing just yet. Otherwise we risk seeing lots
-                 * just now
+                 * of tiny requests, because we disrupt the normal plugging
+                 * and merging.
                 */
-                if (cfq_cfqq_wait_request(cfqq)) {
+                if (cfq_cfqq_wait_request(cfqq))
                        cfq_mark_cfqq_must_dispatch(cfqq);
-                        del_timer(&cfqd->idle_slice_timer);
-                        blk_start_queueing(cfqd->queue);
-                }
        } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
                /*
                 * not the active queue - expire current slice if it is
@@ -1910,7 +1917,6 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                 * this new queue is RT and the current one is BE
                 */
                cfq_preempt_queue(cfqd, cfqq);
-                cfq_mark_cfqq_must_dispatch(cfqq);
                blk_start_queueing(cfqd->queue);
        }
 }
@@ -2172,6 +2178,12 @@ static void cfq_idle_slice_timer(unsigned long data)
                timed_out = 0;
                /*
+                 * We saw a request before the queue expired, let it through
+                 */
+                if (cfq_cfqq_must_dispatch(cfqq))
+                        goto out_kick;
+                /*
                 * expired
                 */
                if (cfq_slice_used(cfqq))
@@ -2187,10 +2199,8 @@ static void cfq_idle_slice_timer(unsigned long data)
                /*
                 * not expired and it has a request pending, let it dispatch
                 */
-                if (!RB_EMPTY_ROOT(&cfqq->sort_list)) {
+                if (!RB_EMPTY_ROOT(&cfqq->sort_list))
-                        cfq_mark_cfqq_must_dispatch(cfqq);
                        goto out_kick;
-                }
        }
 expire:
        cfq_slice_expired(cfqd, timed_out);
diff --git a/block/elevator.c b/block/elevator.c
index ca6788a0195a..fb81bcc14a8c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -573,7 +573,7 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
        elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
 }
-static void elv_drain_elevator(struct request_queue *q)
+void elv_drain_elevator(struct request_queue *q)
 {
        static int printed;
        while (q->elevator->ops->elevator_dispatch_fn(q, 1))
@@ -587,6 +587,31 @@ static void elv_drain_elevator(struct request_queue *q)
        }
 }
+/*
+ * Call with queue lock held, interrupts disabled
+ */
+void elv_quisce_start(struct request_queue *q)
+{
+        queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
+        /*
+         * make sure we don't have any requests in flight
+         */
+        elv_drain_elevator(q);
+        while (q->rq.elvpriv) {
+                blk_start_queueing(q);
+                spin_unlock_irq(q->queue_lock);
+                msleep(10);
+                spin_lock_irq(q->queue_lock);
+                elv_drain_elevator(q);
+        }
+}
+void elv_quisce_end(struct request_queue *q)
+{
+        queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
+}
 void elv_insert(struct request_queue *q, struct request *rq, int where)
 {
        struct list_head *pos;
@@ -1101,18 +1126,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
         * Turn on BYPASS and drain all requests w/ elevator private data
         */
        spin_lock_irq(q->queue_lock);
+        elv_quisce_start(q);
-        queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
-        elv_drain_elevator(q);
-        while (q->rq.elvpriv) {
-                blk_start_queueing(q);
-                spin_unlock_irq(q->queue_lock);
-                msleep(10);
-                spin_lock_irq(q->queue_lock);
-                elv_drain_elevator(q);
-        }
        /*
         * Remember old elevator.
@@ -1136,7 +1150,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
         */
        elevator_exit(old_elevator);
        spin_lock_irq(q->queue_lock);
-        queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
+        elv_quisce_end(q);
        spin_unlock_irq(q->queue_lock);
        blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
author	Linus Torvalds <torvalds@linux-foundation.org>	2009-04-07 14:06:41 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2009-04-07 14:06:41 -0400
commit	6a5d263866d699ebf6843105497afc86ee53de5b (patch)
tree	439195e272631908cdc2e3e44abaf7e1c3447157 /block
parent	aeeae86859f4319de0a4946b44771d9926eeed54 (diff)
parent	ffcd7dca3ab78f9f425971756e5e90024157f6be (diff)