Merge branch 'v3.1-rc10' into for-3.2/core

Conflicts: block/blk-core.c include/linux/blkdev.h Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Jens Axboe <axboe@kernel.dk> 2011-10-19 08:30:42 -0400
committer: Jens Axboe <axboe@kernel.dk> 2011-10-19 08:30:42 -0400
commit: 5c04b426f2e8b46cfc7969a35b2631063a3c646c (patch)
tree: 2d27d9f5d2fe5d5e8fbc01a467ec58bcb50235c1 /block
parent: 499337bb6511e665a236a6a947f819d98ea340c6 (diff)
parent: 899e3ee404961a90b828ad527573aaaac39f0ab1 (diff)
12 files changed, 413 insertions, 54 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 60be1e0455da..e97934eececa 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -65,6 +65,16 @@ config BLK_DEV_BSG
          If unsure, say Y.
+config BLK_DEV_BSGLIB
+        bool "Block layer SG support v4 helper lib"
+        default n
+        select BLK_DEV_BSG
+        help
+          Subsystems will normally enable this if needed. Users will not
+          normally need to manually enable this.
+          If unsure, say N.
 config BLK_DEV_INTEGRITY
        bool "Block layer data integrity support"
        ---help---
diff --git a/block/Makefile b/block/Makefile
index 0fec4b3fab51..514c6e4f427a 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
                        blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
 obj-$(CONFIG_BLK_DEV_BSG)       += bsg.o
+obj-$(CONFIG_BLK_DEV_BSGLIB)    += bsg-lib.o
 obj-$(CONFIG_BLK_CGROUP)        += blk-cgroup.o
 obj-$(CONFIG_BLK_DEV_THROTTLING)        += blk-throttle.o
 obj-$(CONFIG_IOSCHED_NOOP)      += noop-iosched.o
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index bcaf16ee6ad1..b596e54ddd71 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -785,10 +785,10 @@ static int blkio_policy_parse_and_set(char *buf,
 {
        char *s[4], *p, *major_s = NULL, *minor_s = NULL;
        int ret;
-        unsigned long major, minor, temp;
+        unsigned long major, minor;
        int i = 0;
        dev_t dev;
-        u64 bps, iops;
+        u64 temp;
        memset(s, 0, sizeof(s));
@@ -826,20 +826,23 @@ static int blkio_policy_parse_and_set(char *buf,
        dev = MKDEV(major, minor);
-        ret = blkio_check_dev_num(dev);
+        ret = strict_strtoull(s[1], 10, &temp);
        if (ret)
-                return ret;
+                return -EINVAL;
-        newpn->dev = dev;
+        /* For rule removal, do not check for device presence. */
+        if (temp) {
+                ret = blkio_check_dev_num(dev);
+                if (ret)
+                        return ret;
+        }
-        if (s[1] == NULL)
+        newpn->dev = dev;
-                return -EINVAL;
        switch (plid) {
        case BLKIO_POLICY_PROP:
-                ret = strict_strtoul(s[1], 10, &temp);
+                if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
-                if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
+                     temp > BLKIO_WEIGHT_MAX)
-                        temp > BLKIO_WEIGHT_MAX)
                        return -EINVAL;
                newpn->plid = plid;
@@ -850,26 +853,18 @@ static int blkio_policy_parse_and_set(char *buf,
                switch(fileid) {
                case BLKIO_THROTL_read_bps_device:
                case BLKIO_THROTL_write_bps_device:
-                        ret = strict_strtoull(s[1], 10, &bps);
-                        if (ret)
-                                return -EINVAL;
                        newpn->plid = plid;
                        newpn->fileid = fileid;
-                        newpn->val.bps = bps;
+                        newpn->val.bps = temp;
                        break;
                case BLKIO_THROTL_read_iops_device:
                case BLKIO_THROTL_write_iops_device:
-                        ret = strict_strtoull(s[1], 10, &iops);
+                        if (temp > THROTL_IOPS_MAX)
-                        if (ret)
-                                return -EINVAL;
-                        if (iops > THROTL_IOPS_MAX)
                                return -EINVAL;
                        newpn->plid = plid;
                        newpn->fileid = fileid;
-                        newpn->val.iops = (unsigned int)iops;
+                        newpn->val.iops = (unsigned int)temp;
                        break;
                }
                break;
diff --git a/block/blk-core.c b/block/blk-core.c
index 97e9e5405b83..79e41a76d96a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -346,9 +346,10 @@ void blk_put_queue(struct request_queue *q)
 EXPORT_SYMBOL(blk_put_queue);
 /*
- * Note: If a driver supplied the queue lock, it should not zap that lock
+ * Note: If a driver supplied the queue lock, it is disconnected
- * unexpectedly as some queue cleanup components like elevator_exit() and
+ * by this function. The actual state of the lock doesn't matter
- * blk_throtl_exit() need queue lock.
+ * here as the request_queue isn't accessible after this point
+ * (QUEUE_FLAG_DEAD is set) and no other requests will be queued.
 */
 void blk_cleanup_queue(struct request_queue *q)
 {
@@ -365,10 +366,8 @@ void blk_cleanup_queue(struct request_queue *q)
        queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
        mutex_unlock(&q->sysfs_lock);
-        if (q->elevator)
+        if (q->queue_lock != &q->__queue_lock)
-                elevator_exit(q->elevator);
+                q->queue_lock = &q->__queue_lock;
-        blk_throtl_exit(q);
        blk_put_queue(q);
 }
@@ -1165,7 +1164,7 @@ static bool bio_attempt_front_merge(struct request_queue *q,
 * true if merge was successful, otherwise false.
 */
 static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
-                               struct bio *bio)
+                               struct bio *bio, unsigned int *request_count)
 {
        struct blk_plug *plug;
        struct request *rq;
@@ -1174,10 +1173,13 @@ static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
        plug = tsk->plug;
        if (!plug)
                goto out;
+        *request_count = 0;
        list_for_each_entry_reverse(rq, &plug->list, queuelist) {
                int el_ret;
+                (*request_count)++;
                if (rq->q != q)
                        continue;
@@ -1217,6 +1219,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
        struct blk_plug *plug;
        int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
        struct request *req;
+        unsigned int request_count = 0;
        /*
         * low level driver can indicate that it wants pages above a
@@ -1235,7 +1238,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
         * Check if we can merge with the plugged list before grabbing
         * any locks.
         */
-        if (attempt_plug_merge(current, q, bio))
+        if (attempt_plug_merge(current, q, bio, &request_count))
                return;
        spin_lock_irq(q->queue_lock);
@@ -1300,11 +1303,10 @@ get_rq:
                        if (__rq->q != q)
                                plug->should_sort = 1;
                }
+                if (request_count >= BLK_MAX_REQUEST_COUNT)
+                        blk_flush_plug_list(plug, false);
                list_add_tail(&req->queuelist, &plug->list);
-                plug->count++;
                drive_stat_acct(req, 1);
-                if (plug->count >= BLK_MAX_REQUEST_COUNT)
-                        blk_flush_plug_list(plug, false);
        } else {
                spin_lock_irq(q->queue_lock);
                add_acct_request(q, req, where);
@@ -1675,6 +1677,7 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits);
 int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 {
        unsigned long flags;
+        int where = ELEVATOR_INSERT_BACK;
        if (blk_rq_check_limits(q, rq))
                return -EIO;
@@ -1691,7 +1694,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
         */
        BUG_ON(blk_queued_rq(rq));
-        add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
+        if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
+                where = ELEVATOR_INSERT_FLUSH;
+        add_acct_request(q, rq, where);
        spin_unlock_irqrestore(q->queue_lock, flags);
        return 0;
@@ -2248,7 +2254,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
 *     %false - we are done with this request
 *     %true  - still buffers pending for this request
 **/
-static bool __blk_end_bidi_request(struct request *rq, int error,
+bool __blk_end_bidi_request(struct request *rq, int error,
                                   unsigned int nr_bytes, unsigned int bidi_bytes)
 {
        if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
@@ -2617,7 +2623,6 @@ void blk_start_plug(struct blk_plug *plug)
        INIT_LIST_HEAD(&plug->list);
        INIT_LIST_HEAD(&plug->cb_list);
        plug->should_sort = 0;
-        plug->count = 0;
        /*
         * If this is a nested plug, don't actually assign it. It will be
@@ -2701,7 +2706,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
                return;
        list_splice_init(&plug->list, &list);
-        plug->count = 0;
        if (plug->should_sort) {
                list_sort(NULL, &list, plug_rq_cmp);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index bb21e4c36f70..491eb30a242d 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -95,11 +95,12 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
 {
        unsigned int policy = 0;
+        if (blk_rq_sectors(rq))
+                policy |= REQ_FSEQ_DATA;
        if (fflags & REQ_FLUSH) {
                if (rq->cmd_flags & REQ_FLUSH)
                        policy |= REQ_FSEQ_PREFLUSH;
-                if (blk_rq_sectors(rq))
-                        policy |= REQ_FSEQ_DATA;
                if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
                        policy |= REQ_FSEQ_POSTFLUSH;
        }
@@ -122,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq)
        /* make @rq a normal request */
        rq->cmd_flags &= ~REQ_FLUSH_SEQ;
-        rq->end_io = NULL;
+        rq->end_io = rq->flush.saved_end_io;
 }
 /**
@@ -300,9 +301,6 @@ void blk_insert_flush(struct request *rq)
        unsigned int fflags = q->flush_flags;   /* may change, cache */
        unsigned int policy = blk_flush_policy(fflags, rq);
-        BUG_ON(rq->end_io);
-        BUG_ON(!rq->bio || rq->bio != rq->biotail);
        /*
         * @policy now records what operations need to be done.  Adjust
         * REQ_FLUSH and FUA for the driver.
@@ -312,6 +310,19 @@ void blk_insert_flush(struct request *rq)
                rq->cmd_flags &= ~REQ_FUA;
        /*
+         * An empty flush handed down from a stacking driver may
+         * translate into nothing if the underlying device does not
+         * advertise a write-back cache.  In this case, simply
+         * complete the request.
+         */
+        if (!policy) {
+                __blk_end_bidi_request(rq, 0, 0, 0);
+                return;
+        }
+        BUG_ON(!rq->bio || rq->bio != rq->biotail);
+        /*
         * If there's data but flush is not necessary, the request can be
         * processed directly without going through flush machinery.  Queue
         * for normal execution.
@@ -319,6 +330,7 @@ void blk_insert_flush(struct request *rq)
        if ((policy & REQ_FSEQ_DATA) &&
            !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
                list_add_tail(&rq->queuelist, &q->queue_head);
+                blk_run_queue_async(q);
                return;
        }
@@ -329,6 +341,7 @@ void blk_insert_flush(struct request *rq)
        memset(&rq->flush, 0, sizeof(rq->flush));
        INIT_LIST_HEAD(&rq->flush.list);
        rq->cmd_flags |= REQ_FLUSH_SEQ;
+        rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
        rq->end_io = flush_data_end_io;
        blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 475fab809a80..1366a89d8e66 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -115,7 +115,7 @@ void __blk_complete_request(struct request *req)
        /*
         * Select completion CPU
         */
-        if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) {
+        if (req->cpu != -1) {
                ccpu = req->cpu;
                if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) {
                        ccpu = blk_cpu_to_group(ccpu);
@@ -124,6 +124,14 @@ void __blk_complete_request(struct request *req)
        } else
                ccpu = cpu;
+        /*
+         * If current CPU and requested CPU are in the same group, running
+         * softirq in current CPU. One might concern this is just like
+         * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
+         * running in interrupt handler, and currently I/O controller doesn't
+         * support multiple interrupts, so current CPU is unique actually. This
+         * avoids IPI sending from current CPU to the first CPU of a group.
+         */
        if (ccpu == cpu || ccpu == group_cpu) {
                struct list_head *list;
 do_local:
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index adc923e9d1f8..a8eff5f8b9c5 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -258,11 +258,13 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
        ret = queue_var_store(&val, page, count);
        spin_lock_irq(q->queue_lock);
-        if (val) {
+        if (val == 2) {
                queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
-                if (val == 2)
+                queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
-                        queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
+        } else if (val == 1) {
-        } else {
+                queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
+                queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
+        } else if (val == 0) {
                queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
                queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
        }
@@ -477,6 +479,11 @@ static void blk_release_queue(struct kobject *kobj)
        blk_sync_queue(q);
+        if (q->elevator)
+                elevator_exit(q->elevator);
+        blk_throtl_exit(q);
        if (rl->rq_pool)
                mempool_destroy(rl->rq_pool);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index f6a794120505..a19f58c6fc3a 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -746,7 +746,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg,
 static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
 {
        bool rw = bio_data_dir(bio);
-        bool sync = bio->bi_rw & REQ_SYNC;
+        bool sync = rw_is_sync(bio->bi_rw);
        /* Charge the bio to the group */
        tg->bytes_disp[rw] += bio->bi_size;
@@ -1150,7 +1150,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
                if (tg_no_rule_group(tg, rw)) {
                        blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
-                                        rw, bio->bi_rw & REQ_SYNC);
+                                        rw, rw_is_sync(bio->bi_rw));
                        rcu_read_unlock();
                        return 0;
                }
diff --git a/block/blk.h b/block/blk.h
index d6586287adc9..20b900a377c9 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
                      struct bio *bio);
 void blk_dequeue_request(struct request *rq);
 void __blk_queue_free_tags(struct request_queue *q);
+bool __blk_end_bidi_request(struct request *rq, int error,
+                            unsigned int nr_bytes, unsigned int bidi_bytes);
 void blk_rq_timed_out_timer(unsigned long data);
 void blk_delete_timer(struct request *);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
new file mode 100644
index 000000000000..6690e6e41037
--- /dev/null
+++ b/block/bsg-lib.c
@@ -0,0 +1,298 @@
+/*
+ *  BSG helper library
+ *
+ *  Copyright (C) 2008   James Smart, Emulex Corporation
+ *  Copyright (C) 2011   Red Hat, Inc.  All rights reserved.
+ *  Copyright (C) 2011   Mike Christie
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/scatterlist.h>
+#include <linux/bsg-lib.h>
+#include <linux/module.h>
+#include <scsi/scsi_cmnd.h>
+/**
+ * bsg_destroy_job - routine to teardown/delete a bsg job
+ * @job: bsg_job that is to be torn down
+ */
+static void bsg_destroy_job(struct bsg_job *job)
+{
+        put_device(job->dev);   /* release reference for the request */
+        kfree(job->request_payload.sg_list);
+        kfree(job->reply_payload.sg_list);
+        kfree(job);
+}
+/**
+ * bsg_job_done - completion routine for bsg requests
+ * @job: bsg_job that is complete
+ * @result: job reply result
+ * @reply_payload_rcv_len: length of payload recvd
+ *
+ * The LLD should call this when the bsg job has completed.
+ */
+void bsg_job_done(struct bsg_job *job, int result,
+                  unsigned int reply_payload_rcv_len)
+{
+        struct request *req = job->req;
+        struct request *rsp = req->next_rq;
+        int err;
+        err = job->req->errors = result;
+        if (err < 0)
+                /* we're only returning the result field in the reply */
+                job->req->sense_len = sizeof(u32);
+        else
+                job->req->sense_len = job->reply_len;
+        /* we assume all request payload was transferred, residual == 0 */
+        req->resid_len = 0;
+        if (rsp) {
+                WARN_ON(reply_payload_rcv_len > rsp->resid_len);
+                /* set reply (bidi) residual */
+                rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len);
+        }
+        blk_complete_request(req);
+}
+EXPORT_SYMBOL_GPL(bsg_job_done);
+/**
+ * bsg_softirq_done - softirq done routine for destroying the bsg requests
+ * @rq: BSG request that holds the job to be destroyed
+ */
+static void bsg_softirq_done(struct request *rq)
+{
+        struct bsg_job *job = rq->special;
+        blk_end_request_all(rq, rq->errors);
+        bsg_destroy_job(job);
+}
+static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
+{
+        size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments);
+        BUG_ON(!req->nr_phys_segments);
+        buf->sg_list = kzalloc(sz, GFP_KERNEL);
+        if (!buf->sg_list)
+                return -ENOMEM;
+        sg_init_table(buf->sg_list, req->nr_phys_segments);
+        buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
+        buf->payload_len = blk_rq_bytes(req);
+        return 0;
+}
+/**
+ * bsg_create_job - create the bsg_job structure for the bsg request
+ * @dev: device that is being sent the bsg request
+ * @req: BSG request that needs a job structure
+ */
+static int bsg_create_job(struct device *dev, struct request *req)
+{
+        struct request *rsp = req->next_rq;
+        struct request_queue *q = req->q;
+        struct bsg_job *job;
+        int ret;
+        BUG_ON(req->special);
+        job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
+        if (!job)
+                return -ENOMEM;
+        req->special = job;
+        job->req = req;
+        if (q->bsg_job_size)
+                job->dd_data = (void *)&job[1];
+        job->request = req->cmd;
+        job->request_len = req->cmd_len;
+        job->reply = req->sense;
+        job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
+                                                 * allocated */
+        if (req->bio) {
+                ret = bsg_map_buffer(&job->request_payload, req);
+                if (ret)
+                        goto failjob_rls_job;
+        }
+        if (rsp && rsp->bio) {
+                ret = bsg_map_buffer(&job->reply_payload, rsp);
+                if (ret)
+                        goto failjob_rls_rqst_payload;
+        }
+        job->dev = dev;
+        /* take a reference for the request */
+        get_device(job->dev);
+        return 0;
+failjob_rls_rqst_payload:
+        kfree(job->request_payload.sg_list);
+failjob_rls_job:
+        kfree(job);
+        return -ENOMEM;
+}
+/*
+ * bsg_goose_queue - restart queue in case it was stopped
+ * @q: request q to be restarted
+ */
+void bsg_goose_queue(struct request_queue *q)
+{
+        if (!q)
+                return;
+        blk_run_queue_async(q);
+}
+EXPORT_SYMBOL_GPL(bsg_goose_queue);
+/**
+ * bsg_request_fn - generic handler for bsg requests
+ * @q: request queue to manage
+ *
+ * On error the create_bsg_job function should return a -Exyz error value
+ * that will be set to the req->errors.
+ *
+ * Drivers/subsys should pass this to the queue init function.
+ */
+void bsg_request_fn(struct request_queue *q)
+{
+        struct device *dev = q->queuedata;
+        struct request *req;
+        struct bsg_job *job;
+        int ret;
+        if (!get_device(dev))
+                return;
+        while (1) {
+                req = blk_fetch_request(q);
+                if (!req)
+                        break;
+                spin_unlock_irq(q->queue_lock);
+                ret = bsg_create_job(dev, req);
+                if (ret) {
+                        req->errors = ret;
+                        blk_end_request_all(req, ret);
+                        spin_lock_irq(q->queue_lock);
+                        continue;
+                }
+                job = req->special;
+                ret = q->bsg_job_fn(job);
+                spin_lock_irq(q->queue_lock);
+                if (ret)
+                        break;
+        }
+        spin_unlock_irq(q->queue_lock);
+        put_device(dev);
+        spin_lock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL_GPL(bsg_request_fn);
+/**
+ * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
+ * @dev: device to attach bsg device to
+ * @q: request queue setup by caller
+ * @name: device to give bsg device
+ * @job_fn: bsg job handler
+ * @dd_job_size: size of LLD data needed for each job
+ *
+ * The caller should have setup the reuqest queue with bsg_request_fn
+ * as the request_fn.
+ */
+int bsg_setup_queue(struct device *dev, struct request_queue *q,
+                    char *name, bsg_job_fn *job_fn, int dd_job_size)
+{
+        int ret;
+        q->queuedata = dev;
+        q->bsg_job_size = dd_job_size;
+        q->bsg_job_fn = job_fn;
+        queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+        blk_queue_softirq_done(q, bsg_softirq_done);
+        blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
+        ret = bsg_register_queue(q, dev, name, NULL);
+        if (ret) {
+                printk(KERN_ERR "%s: bsg interface failed to "
+                       "initialize - register queue\n", dev->kobj.name);
+                return ret;
+        }
+        return 0;
+}
+EXPORT_SYMBOL_GPL(bsg_setup_queue);
+/**
+ * bsg_remove_queue - Deletes the bsg dev from the q
+ * @q:  the request_queue that is to be torn down.
+ *
+ * Notes:
+ *   Before unregistering the queue empty any requests that are blocked
+ */
+void bsg_remove_queue(struct request_queue *q)
+{
+        struct request *req; /* block request */
+        int counts; /* totals for request_list count and starved */
+        if (!q)
+                return;
+        /* Stop taking in new requests */
+        spin_lock_irq(q->queue_lock);
+        blk_stop_queue(q);
+        /* drain all requests in the queue */
+        while (1) {
+                /* need the lock to fetch a request
+                 * this may fetch the same reqeust as the previous pass
+                 */
+                req = blk_fetch_request(q);
+                /* save requests in use and starved */
+                counts = q->rq.count[0] + q->rq.count[1] +
+                         q->rq.starved[0] + q->rq.starved[1];
+                spin_unlock_irq(q->queue_lock);
+                /* any requests still outstanding? */
+                if (counts == 0)
+                        break;
+                /* This may be the same req as the previous iteration,
+                 * always send the blk_end_request_all after a prefetch.
+                 * It is not okay to not end the request because the
+                 * prefetch started the request.
+                 */
+                if (req) {
+                        /* return -ENXIO to indicate that this queue is
+                         * going away
+                         */
+                        req->errors = -ENXIO;
+                        blk_end_request_all(req, -ENXIO);
+                }
+                msleep(200); /* allow bsg to possibly finish */
+                spin_lock_irq(q->queue_lock);
+        }
+        bsg_unregister_queue(q);
+}
+EXPORT_SYMBOL_GPL(bsg_remove_queue);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1f96ad6254f1..16ace89613bc 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -130,6 +130,8 @@ struct cfq_queue {
        unsigned long slice_end;
        long slice_resid;
+        /* pending priority requests */
+        int prio_pending;
        /* number of requests that are on the dispatch list or inside driver */
        int dispatched;
@@ -682,6 +684,9 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
        if (rq_is_sync(rq1) != rq_is_sync(rq2))
                return rq_is_sync(rq1) ? rq1 : rq2;
+        if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_PRIO)
+                return rq1->cmd_flags & REQ_PRIO ? rq1 : rq2;
        s1 = blk_rq_pos(rq1);
        s2 = blk_rq_pos(rq2);
@@ -1209,6 +1214,9 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
        hlist_del_init(&cfqg->cfqd_node);
+        BUG_ON(cfqd->nr_blkcg_linked_grps <= 0);
+        cfqd->nr_blkcg_linked_grps--;
        /*
         * Put the reference taken at the time of creation so that when all
         * queues are gone, group can be destroyed.
@@ -1604,6 +1612,10 @@ static void cfq_remove_request(struct request *rq)
        cfqq->cfqd->rq_queued--;
        cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
                                        rq_data_dir(rq), rq_is_sync(rq));
+        if (rq->cmd_flags & REQ_PRIO) {
+                WARN_ON(!cfqq->prio_pending);
+                cfqq->prio_pending--;
+        }
 }
 static int cfq_merge(struct request_queue *q, struct request **req,
@@ -3357,6 +3369,13 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
                return true;
        /*
+         * So both queues are sync. Let the new request get disk time if
+         * it's a metadata request and the current queue is doing regular IO.
+         */
+        if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending)
+                return true;
+        /*
         * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
         */
        if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
@@ -3420,6 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        struct cfq_io_context *cic = RQ_CIC(rq);
        cfqd->rq_queued++;
+        if (rq->cmd_flags & REQ_PRIO)
+                cfqq->prio_pending++;
        cfq_update_io_thinktime(cfqd, cfqq, cic);
        cfq_update_io_seektime(cfqd, cfqq, rq);
diff --git a/block/genhd.c b/block/genhd.c
index 5cb51c55f6d8..e2f67902dd02 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1146,17 +1146,17 @@ static int diskstats_show(struct seq_file *seqf, void *v)
                cpu = part_stat_lock();
                part_round_stats(cpu, hd);
                part_stat_unlock();
-                seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
+                seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
-                           "%u %lu %lu %llu %u %u %u %u\n",
+                           "%u %lu %lu %lu %u %u %u %u\n",
                           MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
                           disk_name(gp, hd->partno, buf),
                           part_stat_read(hd, ios[READ]),
                           part_stat_read(hd, merges[READ]),
-                           (unsigned long long)part_stat_read(hd, sectors[READ]),
+                           part_stat_read(hd, sectors[READ]),
                           jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
                           part_stat_read(hd, ios[WRITE]),
                           part_stat_read(hd, merges[WRITE]),
-                           (unsigned long long)part_stat_read(hd, sectors[WRITE]),
+                           part_stat_read(hd, sectors[WRITE]),
                           jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
                           part_in_flight(hd),
                           jiffies_to_msecs(part_stat_read(hd, io_ticks)),
author	Jens Axboe <axboe@kernel.dk>	2011-10-19 08:30:42 -0400
committer	Jens Axboe <axboe@kernel.dk>	2011-10-19 08:30:42 -0400
commit	5c04b426f2e8b46cfc7969a35b2631063a3c646c (patch)
tree	2d27d9f5d2fe5d5e8fbc01a467ec58bcb50235c1 /block
parent	499337bb6511e665a236a6a947f819d98ea340c6 (diff)
parent	899e3ee404961a90b828ad527573aaaac39f0ab1 (diff)