aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-08-19 13:47:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-08-19 13:47:07 -0400
commit5ccc38740a283aba81a00e92941310d0c1aeb2ee (patch)
treeba7d725947975a9391e085bd1d5958b004bfdc3e /block
parent0c3bef612881ee6216a36952ffaabfc35b83545c (diff)
parentb53d1ed734a2b9af8da115b836b658daa7d47a48 (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
* 'for-linus' of git://git.kernel.dk/linux-block: (23 commits) Revert "cfq: Remove special treatment for metadata rqs." block: fix flush machinery for stacking drivers with differring flush flags block: improve rq_affinity placement blktrace: add FLUSH/FUA support Move some REQ flags to the common bio/request area allow blk_flush_policy to return REQ_FSEQ_DATA independent of *FLUSH xen/blkback: Make description more obvious. cfq-iosched: Add documentation about idling block: Make rq_affinity = 1 work as expected block: swim3: fix unterminated of_device_id table block/genhd.c: remove useless cast in diskstats_show() drivers/cdrom/cdrom.c: relax check on dvd manufacturer value drivers/block/drbd/drbd_nl.c: use bitmap_parse instead of __bitmap_parse bsg-lib: add module.h include cfq-iosched: Reduce linked group count upon group destruction blk-throttle: correctly determine sync bio loop: fix deadlock when sysfs and LOOP_CLR_FD race against each other loop: add BLK_DEV_LOOP_MIN_COUNT=%i to allow distros 0 pre-allocated loop devices loop: add management interface for on-demand device allocation loop: replace linked list of allocated devices with an idr index ...
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig10
-rw-r--r--block/Makefile1
-rw-r--r--block/blk-core.c8
-rw-r--r--block/blk-flush.c25
-rw-r--r--block/blk-softirq.c8
-rw-r--r--block/blk-throttle.c4
-rw-r--r--block/blk.h2
-rw-r--r--block/bsg-lib.c298
-rw-r--r--block/cfq-iosched.c21
-rw-r--r--block/genhd.c8
10 files changed, 371 insertions, 14 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 60be1e0455da..e97934eececa 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -65,6 +65,16 @@ config BLK_DEV_BSG
65 65
66 If unsure, say Y. 66 If unsure, say Y.
67 67
68config BLK_DEV_BSGLIB
69 bool "Block layer SG support v4 helper lib"
70 default n
71 select BLK_DEV_BSG
72 help
73 Subsystems will normally enable this if needed. Users will not
74 normally need to manually enable this.
75
76 If unsure, say N.
77
68config BLK_DEV_INTEGRITY 78config BLK_DEV_INTEGRITY
69 bool "Block layer data integrity support" 79 bool "Block layer data integrity support"
70 ---help--- 80 ---help---
diff --git a/block/Makefile b/block/Makefile
index 0fec4b3fab51..514c6e4f427a 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
8 blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o 8 blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
9 9
10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o 10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
11obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
11obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o 12obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
12obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o 13obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
13obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 14obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/blk-core.c b/block/blk-core.c
index b627558c461f..90e1ffdeb415 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1702,6 +1702,7 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits);
1702int blk_insert_cloned_request(struct request_queue *q, struct request *rq) 1702int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1703{ 1703{
1704 unsigned long flags; 1704 unsigned long flags;
1705 int where = ELEVATOR_INSERT_BACK;
1705 1706
1706 if (blk_rq_check_limits(q, rq)) 1707 if (blk_rq_check_limits(q, rq))
1707 return -EIO; 1708 return -EIO;
@@ -1718,7 +1719,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1718 */ 1719 */
1719 BUG_ON(blk_queued_rq(rq)); 1720 BUG_ON(blk_queued_rq(rq));
1720 1721
1721 add_acct_request(q, rq, ELEVATOR_INSERT_BACK); 1722 if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
1723 where = ELEVATOR_INSERT_FLUSH;
1724
1725 add_acct_request(q, rq, where);
1722 spin_unlock_irqrestore(q->queue_lock, flags); 1726 spin_unlock_irqrestore(q->queue_lock, flags);
1723 1727
1724 return 0; 1728 return 0;
@@ -2275,7 +2279,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
2275 * %false - we are done with this request 2279 * %false - we are done with this request
2276 * %true - still buffers pending for this request 2280 * %true - still buffers pending for this request
2277 **/ 2281 **/
2278static bool __blk_end_bidi_request(struct request *rq, int error, 2282bool __blk_end_bidi_request(struct request *rq, int error,
2279 unsigned int nr_bytes, unsigned int bidi_bytes) 2283 unsigned int nr_bytes, unsigned int bidi_bytes)
2280{ 2284{
2281 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) 2285 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
diff --git a/block/blk-flush.c b/block/blk-flush.c
index bb21e4c36f70..491eb30a242d 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -95,11 +95,12 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
95{ 95{
96 unsigned int policy = 0; 96 unsigned int policy = 0;
97 97
98 if (blk_rq_sectors(rq))
99 policy |= REQ_FSEQ_DATA;
100
98 if (fflags & REQ_FLUSH) { 101 if (fflags & REQ_FLUSH) {
99 if (rq->cmd_flags & REQ_FLUSH) 102 if (rq->cmd_flags & REQ_FLUSH)
100 policy |= REQ_FSEQ_PREFLUSH; 103 policy |= REQ_FSEQ_PREFLUSH;
101 if (blk_rq_sectors(rq))
102 policy |= REQ_FSEQ_DATA;
103 if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) 104 if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
104 policy |= REQ_FSEQ_POSTFLUSH; 105 policy |= REQ_FSEQ_POSTFLUSH;
105 } 106 }
@@ -122,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq)
122 123
123 /* make @rq a normal request */ 124 /* make @rq a normal request */
124 rq->cmd_flags &= ~REQ_FLUSH_SEQ; 125 rq->cmd_flags &= ~REQ_FLUSH_SEQ;
125 rq->end_io = NULL; 126 rq->end_io = rq->flush.saved_end_io;
126} 127}
127 128
128/** 129/**
@@ -300,9 +301,6 @@ void blk_insert_flush(struct request *rq)
300 unsigned int fflags = q->flush_flags; /* may change, cache */ 301 unsigned int fflags = q->flush_flags; /* may change, cache */
301 unsigned int policy = blk_flush_policy(fflags, rq); 302 unsigned int policy = blk_flush_policy(fflags, rq);
302 303
303 BUG_ON(rq->end_io);
304 BUG_ON(!rq->bio || rq->bio != rq->biotail);
305
306 /* 304 /*
307 * @policy now records what operations need to be done. Adjust 305 * @policy now records what operations need to be done. Adjust
308 * REQ_FLUSH and FUA for the driver. 306 * REQ_FLUSH and FUA for the driver.
@@ -312,6 +310,19 @@ void blk_insert_flush(struct request *rq)
312 rq->cmd_flags &= ~REQ_FUA; 310 rq->cmd_flags &= ~REQ_FUA;
313 311
314 /* 312 /*
313 * An empty flush handed down from a stacking driver may
314 * translate into nothing if the underlying device does not
315 * advertise a write-back cache. In this case, simply
316 * complete the request.
317 */
318 if (!policy) {
319 __blk_end_bidi_request(rq, 0, 0, 0);
320 return;
321 }
322
323 BUG_ON(!rq->bio || rq->bio != rq->biotail);
324
325 /*
315 * If there's data but flush is not necessary, the request can be 326 * If there's data but flush is not necessary, the request can be
316 * processed directly without going through flush machinery. Queue 327 * processed directly without going through flush machinery. Queue
317 * for normal execution. 328 * for normal execution.
@@ -319,6 +330,7 @@ void blk_insert_flush(struct request *rq)
319 if ((policy & REQ_FSEQ_DATA) && 330 if ((policy & REQ_FSEQ_DATA) &&
320 !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { 331 !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
321 list_add_tail(&rq->queuelist, &q->queue_head); 332 list_add_tail(&rq->queuelist, &q->queue_head);
333 blk_run_queue_async(q);
322 return; 334 return;
323 } 335 }
324 336
@@ -329,6 +341,7 @@ void blk_insert_flush(struct request *rq)
329 memset(&rq->flush, 0, sizeof(rq->flush)); 341 memset(&rq->flush, 0, sizeof(rq->flush));
330 INIT_LIST_HEAD(&rq->flush.list); 342 INIT_LIST_HEAD(&rq->flush.list);
331 rq->cmd_flags |= REQ_FLUSH_SEQ; 343 rq->cmd_flags |= REQ_FLUSH_SEQ;
344 rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
332 rq->end_io = flush_data_end_io; 345 rq->end_io = flush_data_end_io;
333 346
334 blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); 347 blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 475fab809a80..58340d0cb23a 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -124,6 +124,14 @@ void __blk_complete_request(struct request *req)
124 } else 124 } else
125 ccpu = cpu; 125 ccpu = cpu;
126 126
127 /*
128 * If current CPU and requested CPU are in the same group, running
129 * softirq in current CPU. One might concern this is just like
130 * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
131 * running in interrupt handler, and currently I/O controller doesn't
132 * support multiple interrupts, so current CPU is unique actually. This
133 * avoids IPI sending from current CPU to the first CPU of a group.
134 */
127 if (ccpu == cpu || ccpu == group_cpu) { 135 if (ccpu == cpu || ccpu == group_cpu) {
128 struct list_head *list; 136 struct list_head *list;
129do_local: 137do_local:
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index f6a794120505..a19f58c6fc3a 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -746,7 +746,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg,
746static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) 746static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
747{ 747{
748 bool rw = bio_data_dir(bio); 748 bool rw = bio_data_dir(bio);
749 bool sync = bio->bi_rw & REQ_SYNC; 749 bool sync = rw_is_sync(bio->bi_rw);
750 750
751 /* Charge the bio to the group */ 751 /* Charge the bio to the group */
752 tg->bytes_disp[rw] += bio->bi_size; 752 tg->bytes_disp[rw] += bio->bi_size;
@@ -1150,7 +1150,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
1150 1150
1151 if (tg_no_rule_group(tg, rw)) { 1151 if (tg_no_rule_group(tg, rw)) {
1152 blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, 1152 blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
1153 rw, bio->bi_rw & REQ_SYNC); 1153 rw, rw_is_sync(bio->bi_rw));
1154 rcu_read_unlock(); 1154 rcu_read_unlock();
1155 return 0; 1155 return 0;
1156 } 1156 }
diff --git a/block/blk.h b/block/blk.h
index d6586287adc9..20b900a377c9 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
17 struct bio *bio); 17 struct bio *bio);
18void blk_dequeue_request(struct request *rq); 18void blk_dequeue_request(struct request *rq);
19void __blk_queue_free_tags(struct request_queue *q); 19void __blk_queue_free_tags(struct request_queue *q);
20bool __blk_end_bidi_request(struct request *rq, int error,
21 unsigned int nr_bytes, unsigned int bidi_bytes);
20 22
21void blk_rq_timed_out_timer(unsigned long data); 23void blk_rq_timed_out_timer(unsigned long data);
22void blk_delete_timer(struct request *); 24void blk_delete_timer(struct request *);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
new file mode 100644
index 000000000000..6690e6e41037
--- /dev/null
+++ b/block/bsg-lib.c
@@ -0,0 +1,298 @@
1/*
2 * BSG helper library
3 *
4 * Copyright (C) 2008 James Smart, Emulex Corporation
5 * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
6 * Copyright (C) 2011 Mike Christie
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 */
23#include <linux/slab.h>
24#include <linux/blkdev.h>
25#include <linux/delay.h>
26#include <linux/scatterlist.h>
27#include <linux/bsg-lib.h>
28#include <linux/module.h>
29#include <scsi/scsi_cmnd.h>
30
31/**
32 * bsg_destroy_job - routine to teardown/delete a bsg job
33 * @job: bsg_job that is to be torn down
34 */
35static void bsg_destroy_job(struct bsg_job *job)
36{
37 put_device(job->dev); /* release reference for the request */
38
39 kfree(job->request_payload.sg_list);
40 kfree(job->reply_payload.sg_list);
41 kfree(job);
42}
43
44/**
45 * bsg_job_done - completion routine for bsg requests
46 * @job: bsg_job that is complete
47 * @result: job reply result
48 * @reply_payload_rcv_len: length of payload recvd
49 *
50 * The LLD should call this when the bsg job has completed.
51 */
52void bsg_job_done(struct bsg_job *job, int result,
53 unsigned int reply_payload_rcv_len)
54{
55 struct request *req = job->req;
56 struct request *rsp = req->next_rq;
57 int err;
58
59 err = job->req->errors = result;
60 if (err < 0)
61 /* we're only returning the result field in the reply */
62 job->req->sense_len = sizeof(u32);
63 else
64 job->req->sense_len = job->reply_len;
65 /* we assume all request payload was transferred, residual == 0 */
66 req->resid_len = 0;
67
68 if (rsp) {
69 WARN_ON(reply_payload_rcv_len > rsp->resid_len);
70
71 /* set reply (bidi) residual */
72 rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len);
73 }
74 blk_complete_request(req);
75}
76EXPORT_SYMBOL_GPL(bsg_job_done);
77
78/**
79 * bsg_softirq_done - softirq done routine for destroying the bsg requests
80 * @rq: BSG request that holds the job to be destroyed
81 */
82static void bsg_softirq_done(struct request *rq)
83{
84 struct bsg_job *job = rq->special;
85
86 blk_end_request_all(rq, rq->errors);
87 bsg_destroy_job(job);
88}
89
90static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
91{
92 size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments);
93
94 BUG_ON(!req->nr_phys_segments);
95
96 buf->sg_list = kzalloc(sz, GFP_KERNEL);
97 if (!buf->sg_list)
98 return -ENOMEM;
99 sg_init_table(buf->sg_list, req->nr_phys_segments);
100 buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
101 buf->payload_len = blk_rq_bytes(req);
102 return 0;
103}
104
105/**
106 * bsg_create_job - create the bsg_job structure for the bsg request
107 * @dev: device that is being sent the bsg request
108 * @req: BSG request that needs a job structure
109 */
110static int bsg_create_job(struct device *dev, struct request *req)
111{
112 struct request *rsp = req->next_rq;
113 struct request_queue *q = req->q;
114 struct bsg_job *job;
115 int ret;
116
117 BUG_ON(req->special);
118
119 job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
120 if (!job)
121 return -ENOMEM;
122
123 req->special = job;
124 job->req = req;
125 if (q->bsg_job_size)
126 job->dd_data = (void *)&job[1];
127 job->request = req->cmd;
128 job->request_len = req->cmd_len;
129 job->reply = req->sense;
130 job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
131 * allocated */
132 if (req->bio) {
133 ret = bsg_map_buffer(&job->request_payload, req);
134 if (ret)
135 goto failjob_rls_job;
136 }
137 if (rsp && rsp->bio) {
138 ret = bsg_map_buffer(&job->reply_payload, rsp);
139 if (ret)
140 goto failjob_rls_rqst_payload;
141 }
142 job->dev = dev;
143 /* take a reference for the request */
144 get_device(job->dev);
145 return 0;
146
147failjob_rls_rqst_payload:
148 kfree(job->request_payload.sg_list);
149failjob_rls_job:
150 kfree(job);
151 return -ENOMEM;
152}
153
154/*
155 * bsg_goose_queue - restart queue in case it was stopped
156 * @q: request q to be restarted
157 */
158void bsg_goose_queue(struct request_queue *q)
159{
160 if (!q)
161 return;
162
163 blk_run_queue_async(q);
164}
165EXPORT_SYMBOL_GPL(bsg_goose_queue);
166
167/**
168 * bsg_request_fn - generic handler for bsg requests
169 * @q: request queue to manage
170 *
171 * On error the create_bsg_job function should return a -Exyz error value
172 * that will be set to the req->errors.
173 *
174 * Drivers/subsys should pass this to the queue init function.
175 */
176void bsg_request_fn(struct request_queue *q)
177{
178 struct device *dev = q->queuedata;
179 struct request *req;
180 struct bsg_job *job;
181 int ret;
182
183 if (!get_device(dev))
184 return;
185
186 while (1) {
187 req = blk_fetch_request(q);
188 if (!req)
189 break;
190 spin_unlock_irq(q->queue_lock);
191
192 ret = bsg_create_job(dev, req);
193 if (ret) {
194 req->errors = ret;
195 blk_end_request_all(req, ret);
196 spin_lock_irq(q->queue_lock);
197 continue;
198 }
199
200 job = req->special;
201 ret = q->bsg_job_fn(job);
202 spin_lock_irq(q->queue_lock);
203 if (ret)
204 break;
205 }
206
207 spin_unlock_irq(q->queue_lock);
208 put_device(dev);
209 spin_lock_irq(q->queue_lock);
210}
211EXPORT_SYMBOL_GPL(bsg_request_fn);
212
213/**
214 * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
215 * @dev: device to attach bsg device to
216 * @q: request queue setup by caller
217 * @name: device to give bsg device
218 * @job_fn: bsg job handler
219 * @dd_job_size: size of LLD data needed for each job
220 *
221 * The caller should have setup the reuqest queue with bsg_request_fn
222 * as the request_fn.
223 */
224int bsg_setup_queue(struct device *dev, struct request_queue *q,
225 char *name, bsg_job_fn *job_fn, int dd_job_size)
226{
227 int ret;
228
229 q->queuedata = dev;
230 q->bsg_job_size = dd_job_size;
231 q->bsg_job_fn = job_fn;
232 queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
233 blk_queue_softirq_done(q, bsg_softirq_done);
234 blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
235
236 ret = bsg_register_queue(q, dev, name, NULL);
237 if (ret) {
238 printk(KERN_ERR "%s: bsg interface failed to "
239 "initialize - register queue\n", dev->kobj.name);
240 return ret;
241 }
242
243 return 0;
244}
245EXPORT_SYMBOL_GPL(bsg_setup_queue);
246
247/**
248 * bsg_remove_queue - Deletes the bsg dev from the q
249 * @q: the request_queue that is to be torn down.
250 *
251 * Notes:
252 * Before unregistering the queue empty any requests that are blocked
253 */
254void bsg_remove_queue(struct request_queue *q)
255{
256 struct request *req; /* block request */
257 int counts; /* totals for request_list count and starved */
258
259 if (!q)
260 return;
261
262 /* Stop taking in new requests */
263 spin_lock_irq(q->queue_lock);
264 blk_stop_queue(q);
265
266 /* drain all requests in the queue */
267 while (1) {
268 /* need the lock to fetch a request
269 * this may fetch the same reqeust as the previous pass
270 */
271 req = blk_fetch_request(q);
272 /* save requests in use and starved */
273 counts = q->rq.count[0] + q->rq.count[1] +
274 q->rq.starved[0] + q->rq.starved[1];
275 spin_unlock_irq(q->queue_lock);
276 /* any requests still outstanding? */
277 if (counts == 0)
278 break;
279
280 /* This may be the same req as the previous iteration,
281 * always send the blk_end_request_all after a prefetch.
282 * It is not okay to not end the request because the
283 * prefetch started the request.
284 */
285 if (req) {
286 /* return -ENXIO to indicate that this queue is
287 * going away
288 */
289 req->errors = -ENXIO;
290 blk_end_request_all(req, -ENXIO);
291 }
292
293 msleep(200); /* allow bsg to possibly finish */
294 spin_lock_irq(q->queue_lock);
295 }
296 bsg_unregister_queue(q);
297}
298EXPORT_SYMBOL_GPL(bsg_remove_queue);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1f96ad6254f1..a33bd4377c61 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -130,6 +130,8 @@ struct cfq_queue {
130 unsigned long slice_end; 130 unsigned long slice_end;
131 long slice_resid; 131 long slice_resid;
132 132
133 /* pending metadata requests */
134 int meta_pending;
133 /* number of requests that are on the dispatch list or inside driver */ 135 /* number of requests that are on the dispatch list or inside driver */
134 int dispatched; 136 int dispatched;
135 137
@@ -682,6 +684,9 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
682 if (rq_is_sync(rq1) != rq_is_sync(rq2)) 684 if (rq_is_sync(rq1) != rq_is_sync(rq2))
683 return rq_is_sync(rq1) ? rq1 : rq2; 685 return rq_is_sync(rq1) ? rq1 : rq2;
684 686
687 if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
688 return rq1->cmd_flags & REQ_META ? rq1 : rq2;
689
685 s1 = blk_rq_pos(rq1); 690 s1 = blk_rq_pos(rq1);
686 s2 = blk_rq_pos(rq2); 691 s2 = blk_rq_pos(rq2);
687 692
@@ -1209,6 +1214,9 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
1209 1214
1210 hlist_del_init(&cfqg->cfqd_node); 1215 hlist_del_init(&cfqg->cfqd_node);
1211 1216
1217 BUG_ON(cfqd->nr_blkcg_linked_grps <= 0);
1218 cfqd->nr_blkcg_linked_grps--;
1219
1212 /* 1220 /*
1213 * Put the reference taken at the time of creation so that when all 1221 * Put the reference taken at the time of creation so that when all
1214 * queues are gone, group can be destroyed. 1222 * queues are gone, group can be destroyed.
@@ -1604,6 +1612,10 @@ static void cfq_remove_request(struct request *rq)
1604 cfqq->cfqd->rq_queued--; 1612 cfqq->cfqd->rq_queued--;
1605 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, 1613 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
1606 rq_data_dir(rq), rq_is_sync(rq)); 1614 rq_data_dir(rq), rq_is_sync(rq));
1615 if (rq->cmd_flags & REQ_META) {
1616 WARN_ON(!cfqq->meta_pending);
1617 cfqq->meta_pending--;
1618 }
1607} 1619}
1608 1620
1609static int cfq_merge(struct request_queue *q, struct request **req, 1621static int cfq_merge(struct request_queue *q, struct request **req,
@@ -3357,6 +3369,13 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
3357 return true; 3369 return true;
3358 3370
3359 /* 3371 /*
3372 * So both queues are sync. Let the new request get disk time if
3373 * it's a metadata request and the current queue is doing regular IO.
3374 */
3375 if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
3376 return true;
3377
3378 /*
3360 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. 3379 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
3361 */ 3380 */
3362 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) 3381 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
@@ -3420,6 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3420 struct cfq_io_context *cic = RQ_CIC(rq); 3439 struct cfq_io_context *cic = RQ_CIC(rq);
3421 3440
3422 cfqd->rq_queued++; 3441 cfqd->rq_queued++;
3442 if (rq->cmd_flags & REQ_META)
3443 cfqq->meta_pending++;
3423 3444
3424 cfq_update_io_thinktime(cfqd, cfqq, cic); 3445 cfq_update_io_thinktime(cfqd, cfqq, cic);
3425 cfq_update_io_seektime(cfqd, cfqq, rq); 3446 cfq_update_io_seektime(cfqd, cfqq, rq);
diff --git a/block/genhd.c b/block/genhd.c
index 5cb51c55f6d8..e2f67902dd02 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1146,17 +1146,17 @@ static int diskstats_show(struct seq_file *seqf, void *v)
1146 cpu = part_stat_lock(); 1146 cpu = part_stat_lock();
1147 part_round_stats(cpu, hd); 1147 part_round_stats(cpu, hd);
1148 part_stat_unlock(); 1148 part_stat_unlock();
1149 seq_printf(seqf, "%4d %7d %s %lu %lu %llu " 1149 seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
1150 "%u %lu %lu %llu %u %u %u %u\n", 1150 "%u %lu %lu %lu %u %u %u %u\n",
1151 MAJOR(part_devt(hd)), MINOR(part_devt(hd)), 1151 MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
1152 disk_name(gp, hd->partno, buf), 1152 disk_name(gp, hd->partno, buf),
1153 part_stat_read(hd, ios[READ]), 1153 part_stat_read(hd, ios[READ]),
1154 part_stat_read(hd, merges[READ]), 1154 part_stat_read(hd, merges[READ]),
1155 (unsigned long long)part_stat_read(hd, sectors[READ]), 1155 part_stat_read(hd, sectors[READ]),
1156 jiffies_to_msecs(part_stat_read(hd, ticks[READ])), 1156 jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
1157 part_stat_read(hd, ios[WRITE]), 1157 part_stat_read(hd, ios[WRITE]),
1158 part_stat_read(hd, merges[WRITE]), 1158 part_stat_read(hd, merges[WRITE]),
1159 (unsigned long long)part_stat_read(hd, sectors[WRITE]), 1159 part_stat_read(hd, sectors[WRITE]),
1160 jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), 1160 jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
1161 part_in_flight(hd), 1161 part_in_flight(hd),
1162 jiffies_to_msecs(part_stat_read(hd, io_ticks)), 1162 jiffies_to_msecs(part_stat_read(hd, io_ticks)),