aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2011-10-19 08:30:42 -0400
committerJens Axboe <axboe@kernel.dk>2011-10-19 08:30:42 -0400
commit5c04b426f2e8b46cfc7969a35b2631063a3c646c (patch)
tree2d27d9f5d2fe5d5e8fbc01a467ec58bcb50235c1 /block
parent499337bb6511e665a236a6a947f819d98ea340c6 (diff)
parent899e3ee404961a90b828ad527573aaaac39f0ab1 (diff)
Merge branch 'v3.1-rc10' into for-3.2/core
Conflicts: block/blk-core.c include/linux/blkdev.h Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig10
-rw-r--r--block/Makefile1
-rw-r--r--block/blk-cgroup.c37
-rw-r--r--block/blk-core.c36
-rw-r--r--block/blk-flush.c25
-rw-r--r--block/blk-softirq.c10
-rw-r--r--block/blk-sysfs.c15
-rw-r--r--block/blk-throttle.c4
-rw-r--r--block/blk.h2
-rw-r--r--block/bsg-lib.c298
-rw-r--r--block/cfq-iosched.c21
-rw-r--r--block/genhd.c8
12 files changed, 413 insertions, 54 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 60be1e0455da..e97934eececa 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -65,6 +65,16 @@ config BLK_DEV_BSG
65 65
66 If unsure, say Y. 66 If unsure, say Y.
67 67
68config BLK_DEV_BSGLIB
69 bool "Block layer SG support v4 helper lib"
70 default n
71 select BLK_DEV_BSG
72 help
73 Subsystems will normally enable this if needed. Users will not
74 normally need to manually enable this.
75
76 If unsure, say N.
77
68config BLK_DEV_INTEGRITY 78config BLK_DEV_INTEGRITY
69 bool "Block layer data integrity support" 79 bool "Block layer data integrity support"
70 ---help--- 80 ---help---
diff --git a/block/Makefile b/block/Makefile
index 0fec4b3fab51..514c6e4f427a 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
8 blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o 8 blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
9 9
10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o 10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
11obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
11obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o 12obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
12obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o 13obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
13obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 14obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index bcaf16ee6ad1..b596e54ddd71 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -785,10 +785,10 @@ static int blkio_policy_parse_and_set(char *buf,
785{ 785{
786 char *s[4], *p, *major_s = NULL, *minor_s = NULL; 786 char *s[4], *p, *major_s = NULL, *minor_s = NULL;
787 int ret; 787 int ret;
788 unsigned long major, minor, temp; 788 unsigned long major, minor;
789 int i = 0; 789 int i = 0;
790 dev_t dev; 790 dev_t dev;
791 u64 bps, iops; 791 u64 temp;
792 792
793 memset(s, 0, sizeof(s)); 793 memset(s, 0, sizeof(s));
794 794
@@ -826,20 +826,23 @@ static int blkio_policy_parse_and_set(char *buf,
826 826
827 dev = MKDEV(major, minor); 827 dev = MKDEV(major, minor);
828 828
829 ret = blkio_check_dev_num(dev); 829 ret = strict_strtoull(s[1], 10, &temp);
830 if (ret) 830 if (ret)
831 return ret; 831 return -EINVAL;
832 832
833 newpn->dev = dev; 833 /* For rule removal, do not check for device presence. */
834 if (temp) {
835 ret = blkio_check_dev_num(dev);
836 if (ret)
837 return ret;
838 }
834 839
835 if (s[1] == NULL) 840 newpn->dev = dev;
836 return -EINVAL;
837 841
838 switch (plid) { 842 switch (plid) {
839 case BLKIO_POLICY_PROP: 843 case BLKIO_POLICY_PROP:
840 ret = strict_strtoul(s[1], 10, &temp); 844 if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
841 if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) || 845 temp > BLKIO_WEIGHT_MAX)
842 temp > BLKIO_WEIGHT_MAX)
843 return -EINVAL; 846 return -EINVAL;
844 847
845 newpn->plid = plid; 848 newpn->plid = plid;
@@ -850,26 +853,18 @@ static int blkio_policy_parse_and_set(char *buf,
850 switch(fileid) { 853 switch(fileid) {
851 case BLKIO_THROTL_read_bps_device: 854 case BLKIO_THROTL_read_bps_device:
852 case BLKIO_THROTL_write_bps_device: 855 case BLKIO_THROTL_write_bps_device:
853 ret = strict_strtoull(s[1], 10, &bps);
854 if (ret)
855 return -EINVAL;
856
857 newpn->plid = plid; 856 newpn->plid = plid;
858 newpn->fileid = fileid; 857 newpn->fileid = fileid;
859 newpn->val.bps = bps; 858 newpn->val.bps = temp;
860 break; 859 break;
861 case BLKIO_THROTL_read_iops_device: 860 case BLKIO_THROTL_read_iops_device:
862 case BLKIO_THROTL_write_iops_device: 861 case BLKIO_THROTL_write_iops_device:
863 ret = strict_strtoull(s[1], 10, &iops); 862 if (temp > THROTL_IOPS_MAX)
864 if (ret)
865 return -EINVAL;
866
867 if (iops > THROTL_IOPS_MAX)
868 return -EINVAL; 863 return -EINVAL;
869 864
870 newpn->plid = plid; 865 newpn->plid = plid;
871 newpn->fileid = fileid; 866 newpn->fileid = fileid;
872 newpn->val.iops = (unsigned int)iops; 867 newpn->val.iops = (unsigned int)temp;
873 break; 868 break;
874 } 869 }
875 break; 870 break;
diff --git a/block/blk-core.c b/block/blk-core.c
index 97e9e5405b83..79e41a76d96a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -346,9 +346,10 @@ void blk_put_queue(struct request_queue *q)
346EXPORT_SYMBOL(blk_put_queue); 346EXPORT_SYMBOL(blk_put_queue);
347 347
348/* 348/*
349 * Note: If a driver supplied the queue lock, it should not zap that lock 349 * Note: If a driver supplied the queue lock, it is disconnected
350 * unexpectedly as some queue cleanup components like elevator_exit() and 350 * by this function. The actual state of the lock doesn't matter
351 * blk_throtl_exit() need queue lock. 351 * here as the request_queue isn't accessible after this point
352 * (QUEUE_FLAG_DEAD is set) and no other requests will be queued.
352 */ 353 */
353void blk_cleanup_queue(struct request_queue *q) 354void blk_cleanup_queue(struct request_queue *q)
354{ 355{
@@ -365,10 +366,8 @@ void blk_cleanup_queue(struct request_queue *q)
365 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); 366 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
366 mutex_unlock(&q->sysfs_lock); 367 mutex_unlock(&q->sysfs_lock);
367 368
368 if (q->elevator) 369 if (q->queue_lock != &q->__queue_lock)
369 elevator_exit(q->elevator); 370 q->queue_lock = &q->__queue_lock;
370
371 blk_throtl_exit(q);
372 371
373 blk_put_queue(q); 372 blk_put_queue(q);
374} 373}
@@ -1165,7 +1164,7 @@ static bool bio_attempt_front_merge(struct request_queue *q,
1165 * true if merge was successful, otherwise false. 1164 * true if merge was successful, otherwise false.
1166 */ 1165 */
1167static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, 1166static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
1168 struct bio *bio) 1167 struct bio *bio, unsigned int *request_count)
1169{ 1168{
1170 struct blk_plug *plug; 1169 struct blk_plug *plug;
1171 struct request *rq; 1170 struct request *rq;
@@ -1174,10 +1173,13 @@ static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
1174 plug = tsk->plug; 1173 plug = tsk->plug;
1175 if (!plug) 1174 if (!plug)
1176 goto out; 1175 goto out;
1176 *request_count = 0;
1177 1177
1178 list_for_each_entry_reverse(rq, &plug->list, queuelist) { 1178 list_for_each_entry_reverse(rq, &plug->list, queuelist) {
1179 int el_ret; 1179 int el_ret;
1180 1180
1181 (*request_count)++;
1182
1181 if (rq->q != q) 1183 if (rq->q != q)
1182 continue; 1184 continue;
1183 1185
@@ -1217,6 +1219,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
1217 struct blk_plug *plug; 1219 struct blk_plug *plug;
1218 int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; 1220 int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
1219 struct request *req; 1221 struct request *req;
1222 unsigned int request_count = 0;
1220 1223
1221 /* 1224 /*
1222 * low level driver can indicate that it wants pages above a 1225 * low level driver can indicate that it wants pages above a
@@ -1235,7 +1238,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
1235 * Check if we can merge with the plugged list before grabbing 1238 * Check if we can merge with the plugged list before grabbing
1236 * any locks. 1239 * any locks.
1237 */ 1240 */
1238 if (attempt_plug_merge(current, q, bio)) 1241 if (attempt_plug_merge(current, q, bio, &request_count))
1239 return; 1242 return;
1240 1243
1241 spin_lock_irq(q->queue_lock); 1244 spin_lock_irq(q->queue_lock);
@@ -1300,11 +1303,10 @@ get_rq:
1300 if (__rq->q != q) 1303 if (__rq->q != q)
1301 plug->should_sort = 1; 1304 plug->should_sort = 1;
1302 } 1305 }
1306 if (request_count >= BLK_MAX_REQUEST_COUNT)
1307 blk_flush_plug_list(plug, false);
1303 list_add_tail(&req->queuelist, &plug->list); 1308 list_add_tail(&req->queuelist, &plug->list);
1304 plug->count++;
1305 drive_stat_acct(req, 1); 1309 drive_stat_acct(req, 1);
1306 if (plug->count >= BLK_MAX_REQUEST_COUNT)
1307 blk_flush_plug_list(plug, false);
1308 } else { 1310 } else {
1309 spin_lock_irq(q->queue_lock); 1311 spin_lock_irq(q->queue_lock);
1310 add_acct_request(q, req, where); 1312 add_acct_request(q, req, where);
@@ -1675,6 +1677,7 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits);
1675int blk_insert_cloned_request(struct request_queue *q, struct request *rq) 1677int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1676{ 1678{
1677 unsigned long flags; 1679 unsigned long flags;
1680 int where = ELEVATOR_INSERT_BACK;
1678 1681
1679 if (blk_rq_check_limits(q, rq)) 1682 if (blk_rq_check_limits(q, rq))
1680 return -EIO; 1683 return -EIO;
@@ -1691,7 +1694,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1691 */ 1694 */
1692 BUG_ON(blk_queued_rq(rq)); 1695 BUG_ON(blk_queued_rq(rq));
1693 1696
1694 add_acct_request(q, rq, ELEVATOR_INSERT_BACK); 1697 if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
1698 where = ELEVATOR_INSERT_FLUSH;
1699
1700 add_acct_request(q, rq, where);
1695 spin_unlock_irqrestore(q->queue_lock, flags); 1701 spin_unlock_irqrestore(q->queue_lock, flags);
1696 1702
1697 return 0; 1703 return 0;
@@ -2248,7 +2254,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
2248 * %false - we are done with this request 2254 * %false - we are done with this request
2249 * %true - still buffers pending for this request 2255 * %true - still buffers pending for this request
2250 **/ 2256 **/
2251static bool __blk_end_bidi_request(struct request *rq, int error, 2257bool __blk_end_bidi_request(struct request *rq, int error,
2252 unsigned int nr_bytes, unsigned int bidi_bytes) 2258 unsigned int nr_bytes, unsigned int bidi_bytes)
2253{ 2259{
2254 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) 2260 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
@@ -2617,7 +2623,6 @@ void blk_start_plug(struct blk_plug *plug)
2617 INIT_LIST_HEAD(&plug->list); 2623 INIT_LIST_HEAD(&plug->list);
2618 INIT_LIST_HEAD(&plug->cb_list); 2624 INIT_LIST_HEAD(&plug->cb_list);
2619 plug->should_sort = 0; 2625 plug->should_sort = 0;
2620 plug->count = 0;
2621 2626
2622 /* 2627 /*
2623 * If this is a nested plug, don't actually assign it. It will be 2628 * If this is a nested plug, don't actually assign it. It will be
@@ -2701,7 +2706,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
2701 return; 2706 return;
2702 2707
2703 list_splice_init(&plug->list, &list); 2708 list_splice_init(&plug->list, &list);
2704 plug->count = 0;
2705 2709
2706 if (plug->should_sort) { 2710 if (plug->should_sort) {
2707 list_sort(NULL, &list, plug_rq_cmp); 2711 list_sort(NULL, &list, plug_rq_cmp);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index bb21e4c36f70..491eb30a242d 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -95,11 +95,12 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
95{ 95{
96 unsigned int policy = 0; 96 unsigned int policy = 0;
97 97
98 if (blk_rq_sectors(rq))
99 policy |= REQ_FSEQ_DATA;
100
98 if (fflags & REQ_FLUSH) { 101 if (fflags & REQ_FLUSH) {
99 if (rq->cmd_flags & REQ_FLUSH) 102 if (rq->cmd_flags & REQ_FLUSH)
100 policy |= REQ_FSEQ_PREFLUSH; 103 policy |= REQ_FSEQ_PREFLUSH;
101 if (blk_rq_sectors(rq))
102 policy |= REQ_FSEQ_DATA;
103 if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) 104 if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
104 policy |= REQ_FSEQ_POSTFLUSH; 105 policy |= REQ_FSEQ_POSTFLUSH;
105 } 106 }
@@ -122,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq)
122 123
123 /* make @rq a normal request */ 124 /* make @rq a normal request */
124 rq->cmd_flags &= ~REQ_FLUSH_SEQ; 125 rq->cmd_flags &= ~REQ_FLUSH_SEQ;
125 rq->end_io = NULL; 126 rq->end_io = rq->flush.saved_end_io;
126} 127}
127 128
128/** 129/**
@@ -300,9 +301,6 @@ void blk_insert_flush(struct request *rq)
300 unsigned int fflags = q->flush_flags; /* may change, cache */ 301 unsigned int fflags = q->flush_flags; /* may change, cache */
301 unsigned int policy = blk_flush_policy(fflags, rq); 302 unsigned int policy = blk_flush_policy(fflags, rq);
302 303
303 BUG_ON(rq->end_io);
304 BUG_ON(!rq->bio || rq->bio != rq->biotail);
305
306 /* 304 /*
307 * @policy now records what operations need to be done. Adjust 305 * @policy now records what operations need to be done. Adjust
308 * REQ_FLUSH and FUA for the driver. 306 * REQ_FLUSH and FUA for the driver.
@@ -312,6 +310,19 @@ void blk_insert_flush(struct request *rq)
312 rq->cmd_flags &= ~REQ_FUA; 310 rq->cmd_flags &= ~REQ_FUA;
313 311
314 /* 312 /*
313 * An empty flush handed down from a stacking driver may
314 * translate into nothing if the underlying device does not
315 * advertise a write-back cache. In this case, simply
316 * complete the request.
317 */
318 if (!policy) {
319 __blk_end_bidi_request(rq, 0, 0, 0);
320 return;
321 }
322
323 BUG_ON(!rq->bio || rq->bio != rq->biotail);
324
325 /*
315 * If there's data but flush is not necessary, the request can be 326 * If there's data but flush is not necessary, the request can be
316 * processed directly without going through flush machinery. Queue 327 * processed directly without going through flush machinery. Queue
317 * for normal execution. 328 * for normal execution.
@@ -319,6 +330,7 @@ void blk_insert_flush(struct request *rq)
319 if ((policy & REQ_FSEQ_DATA) && 330 if ((policy & REQ_FSEQ_DATA) &&
320 !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { 331 !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
321 list_add_tail(&rq->queuelist, &q->queue_head); 332 list_add_tail(&rq->queuelist, &q->queue_head);
333 blk_run_queue_async(q);
322 return; 334 return;
323 } 335 }
324 336
@@ -329,6 +341,7 @@ void blk_insert_flush(struct request *rq)
329 memset(&rq->flush, 0, sizeof(rq->flush)); 341 memset(&rq->flush, 0, sizeof(rq->flush));
330 INIT_LIST_HEAD(&rq->flush.list); 342 INIT_LIST_HEAD(&rq->flush.list);
331 rq->cmd_flags |= REQ_FLUSH_SEQ; 343 rq->cmd_flags |= REQ_FLUSH_SEQ;
344 rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
332 rq->end_io = flush_data_end_io; 345 rq->end_io = flush_data_end_io;
333 346
334 blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); 347 blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 475fab809a80..1366a89d8e66 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -115,7 +115,7 @@ void __blk_complete_request(struct request *req)
115 /* 115 /*
116 * Select completion CPU 116 * Select completion CPU
117 */ 117 */
118 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) { 118 if (req->cpu != -1) {
119 ccpu = req->cpu; 119 ccpu = req->cpu;
120 if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) { 120 if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) {
121 ccpu = blk_cpu_to_group(ccpu); 121 ccpu = blk_cpu_to_group(ccpu);
@@ -124,6 +124,14 @@ void __blk_complete_request(struct request *req)
124 } else 124 } else
125 ccpu = cpu; 125 ccpu = cpu;
126 126
127 /*
128 * If current CPU and requested CPU are in the same group, running
129 * softirq in current CPU. One might concern this is just like
130 * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
131 * running in interrupt handler, and currently I/O controller doesn't
132 * support multiple interrupts, so current CPU is unique actually. This
133 * avoids IPI sending from current CPU to the first CPU of a group.
134 */
127 if (ccpu == cpu || ccpu == group_cpu) { 135 if (ccpu == cpu || ccpu == group_cpu) {
128 struct list_head *list; 136 struct list_head *list;
129do_local: 137do_local:
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index adc923e9d1f8..a8eff5f8b9c5 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -258,11 +258,13 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
258 258
259 ret = queue_var_store(&val, page, count); 259 ret = queue_var_store(&val, page, count);
260 spin_lock_irq(q->queue_lock); 260 spin_lock_irq(q->queue_lock);
261 if (val) { 261 if (val == 2) {
262 queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 262 queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
263 if (val == 2) 263 queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
264 queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); 264 } else if (val == 1) {
265 } else { 265 queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
266 queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
267 } else if (val == 0) {
266 queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); 268 queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
267 queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); 269 queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
268 } 270 }
@@ -477,6 +479,11 @@ static void blk_release_queue(struct kobject *kobj)
477 479
478 blk_sync_queue(q); 480 blk_sync_queue(q);
479 481
482 if (q->elevator)
483 elevator_exit(q->elevator);
484
485 blk_throtl_exit(q);
486
480 if (rl->rq_pool) 487 if (rl->rq_pool)
481 mempool_destroy(rl->rq_pool); 488 mempool_destroy(rl->rq_pool);
482 489
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index f6a794120505..a19f58c6fc3a 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -746,7 +746,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg,
746static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) 746static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
747{ 747{
748 bool rw = bio_data_dir(bio); 748 bool rw = bio_data_dir(bio);
749 bool sync = bio->bi_rw & REQ_SYNC; 749 bool sync = rw_is_sync(bio->bi_rw);
750 750
751 /* Charge the bio to the group */ 751 /* Charge the bio to the group */
752 tg->bytes_disp[rw] += bio->bi_size; 752 tg->bytes_disp[rw] += bio->bi_size;
@@ -1150,7 +1150,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
1150 1150
1151 if (tg_no_rule_group(tg, rw)) { 1151 if (tg_no_rule_group(tg, rw)) {
1152 blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, 1152 blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
1153 rw, bio->bi_rw & REQ_SYNC); 1153 rw, rw_is_sync(bio->bi_rw));
1154 rcu_read_unlock(); 1154 rcu_read_unlock();
1155 return 0; 1155 return 0;
1156 } 1156 }
diff --git a/block/blk.h b/block/blk.h
index d6586287adc9..20b900a377c9 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
17 struct bio *bio); 17 struct bio *bio);
18void blk_dequeue_request(struct request *rq); 18void blk_dequeue_request(struct request *rq);
19void __blk_queue_free_tags(struct request_queue *q); 19void __blk_queue_free_tags(struct request_queue *q);
20bool __blk_end_bidi_request(struct request *rq, int error,
21 unsigned int nr_bytes, unsigned int bidi_bytes);
20 22
21void blk_rq_timed_out_timer(unsigned long data); 23void blk_rq_timed_out_timer(unsigned long data);
22void blk_delete_timer(struct request *); 24void blk_delete_timer(struct request *);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
new file mode 100644
index 000000000000..6690e6e41037
--- /dev/null
+++ b/block/bsg-lib.c
@@ -0,0 +1,298 @@
1/*
2 * BSG helper library
3 *
4 * Copyright (C) 2008 James Smart, Emulex Corporation
5 * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
6 * Copyright (C) 2011 Mike Christie
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 */
23#include <linux/slab.h>
24#include <linux/blkdev.h>
25#include <linux/delay.h>
26#include <linux/scatterlist.h>
27#include <linux/bsg-lib.h>
28#include <linux/module.h>
29#include <scsi/scsi_cmnd.h>
30
31/**
32 * bsg_destroy_job - routine to teardown/delete a bsg job
33 * @job: bsg_job that is to be torn down
34 */
35static void bsg_destroy_job(struct bsg_job *job)
36{
37 put_device(job->dev); /* release reference for the request */
38
39 kfree(job->request_payload.sg_list);
40 kfree(job->reply_payload.sg_list);
41 kfree(job);
42}
43
44/**
45 * bsg_job_done - completion routine for bsg requests
46 * @job: bsg_job that is complete
47 * @result: job reply result
48 * @reply_payload_rcv_len: length of payload recvd
49 *
50 * The LLD should call this when the bsg job has completed.
51 */
52void bsg_job_done(struct bsg_job *job, int result,
53 unsigned int reply_payload_rcv_len)
54{
55 struct request *req = job->req;
56 struct request *rsp = req->next_rq;
57 int err;
58
59 err = job->req->errors = result;
60 if (err < 0)
61 /* we're only returning the result field in the reply */
62 job->req->sense_len = sizeof(u32);
63 else
64 job->req->sense_len = job->reply_len;
65 /* we assume all request payload was transferred, residual == 0 */
66 req->resid_len = 0;
67
68 if (rsp) {
69 WARN_ON(reply_payload_rcv_len > rsp->resid_len);
70
71 /* set reply (bidi) residual */
72 rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len);
73 }
74 blk_complete_request(req);
75}
76EXPORT_SYMBOL_GPL(bsg_job_done);
77
78/**
79 * bsg_softirq_done - softirq done routine for destroying the bsg requests
80 * @rq: BSG request that holds the job to be destroyed
81 */
82static void bsg_softirq_done(struct request *rq)
83{
84 struct bsg_job *job = rq->special;
85
86 blk_end_request_all(rq, rq->errors);
87 bsg_destroy_job(job);
88}
89
90static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
91{
92 size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments);
93
94 BUG_ON(!req->nr_phys_segments);
95
96 buf->sg_list = kzalloc(sz, GFP_KERNEL);
97 if (!buf->sg_list)
98 return -ENOMEM;
99 sg_init_table(buf->sg_list, req->nr_phys_segments);
100 buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
101 buf->payload_len = blk_rq_bytes(req);
102 return 0;
103}
104
105/**
106 * bsg_create_job - create the bsg_job structure for the bsg request
107 * @dev: device that is being sent the bsg request
108 * @req: BSG request that needs a job structure
109 */
110static int bsg_create_job(struct device *dev, struct request *req)
111{
112 struct request *rsp = req->next_rq;
113 struct request_queue *q = req->q;
114 struct bsg_job *job;
115 int ret;
116
117 BUG_ON(req->special);
118
119 job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
120 if (!job)
121 return -ENOMEM;
122
123 req->special = job;
124 job->req = req;
125 if (q->bsg_job_size)
126 job->dd_data = (void *)&job[1];
127 job->request = req->cmd;
128 job->request_len = req->cmd_len;
129 job->reply = req->sense;
130 job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
131 * allocated */
132 if (req->bio) {
133 ret = bsg_map_buffer(&job->request_payload, req);
134 if (ret)
135 goto failjob_rls_job;
136 }
137 if (rsp && rsp->bio) {
138 ret = bsg_map_buffer(&job->reply_payload, rsp);
139 if (ret)
140 goto failjob_rls_rqst_payload;
141 }
142 job->dev = dev;
143 /* take a reference for the request */
144 get_device(job->dev);
145 return 0;
146
147failjob_rls_rqst_payload:
148 kfree(job->request_payload.sg_list);
149failjob_rls_job:
150 kfree(job);
151 return -ENOMEM;
152}
153
154/*
155 * bsg_goose_queue - restart queue in case it was stopped
156 * @q: request q to be restarted
157 */
158void bsg_goose_queue(struct request_queue *q)
159{
160 if (!q)
161 return;
162
163 blk_run_queue_async(q);
164}
165EXPORT_SYMBOL_GPL(bsg_goose_queue);
166
167/**
168 * bsg_request_fn - generic handler for bsg requests
169 * @q: request queue to manage
170 *
171 * On error the create_bsg_job function should return a -Exyz error value
172 * that will be set to the req->errors.
173 *
174 * Drivers/subsys should pass this to the queue init function.
175 */
176void bsg_request_fn(struct request_queue *q)
177{
178 struct device *dev = q->queuedata;
179 struct request *req;
180 struct bsg_job *job;
181 int ret;
182
183 if (!get_device(dev))
184 return;
185
186 while (1) {
187 req = blk_fetch_request(q);
188 if (!req)
189 break;
190 spin_unlock_irq(q->queue_lock);
191
192 ret = bsg_create_job(dev, req);
193 if (ret) {
194 req->errors = ret;
195 blk_end_request_all(req, ret);
196 spin_lock_irq(q->queue_lock);
197 continue;
198 }
199
200 job = req->special;
201 ret = q->bsg_job_fn(job);
202 spin_lock_irq(q->queue_lock);
203 if (ret)
204 break;
205 }
206
207 spin_unlock_irq(q->queue_lock);
208 put_device(dev);
209 spin_lock_irq(q->queue_lock);
210}
211EXPORT_SYMBOL_GPL(bsg_request_fn);
212
213/**
214 * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
215 * @dev: device to attach bsg device to
216 * @q: request queue setup by caller
217 * @name: device to give bsg device
218 * @job_fn: bsg job handler
219 * @dd_job_size: size of LLD data needed for each job
220 *
221 * The caller should have setup the reuqest queue with bsg_request_fn
222 * as the request_fn.
223 */
224int bsg_setup_queue(struct device *dev, struct request_queue *q,
225 char *name, bsg_job_fn *job_fn, int dd_job_size)
226{
227 int ret;
228
229 q->queuedata = dev;
230 q->bsg_job_size = dd_job_size;
231 q->bsg_job_fn = job_fn;
232 queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
233 blk_queue_softirq_done(q, bsg_softirq_done);
234 blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
235
236 ret = bsg_register_queue(q, dev, name, NULL);
237 if (ret) {
238 printk(KERN_ERR "%s: bsg interface failed to "
239 "initialize - register queue\n", dev->kobj.name);
240 return ret;
241 }
242
243 return 0;
244}
245EXPORT_SYMBOL_GPL(bsg_setup_queue);
246
247/**
248 * bsg_remove_queue - Deletes the bsg dev from the q
249 * @q: the request_queue that is to be torn down.
250 *
251 * Notes:
252 * Before unregistering the queue empty any requests that are blocked
253 */
254void bsg_remove_queue(struct request_queue *q)
255{
256 struct request *req; /* block request */
257 int counts; /* totals for request_list count and starved */
258
259 if (!q)
260 return;
261
262 /* Stop taking in new requests */
263 spin_lock_irq(q->queue_lock);
264 blk_stop_queue(q);
265
266 /* drain all requests in the queue */
267 while (1) {
268 /* need the lock to fetch a request
269 * this may fetch the same reqeust as the previous pass
270 */
271 req = blk_fetch_request(q);
272 /* save requests in use and starved */
273 counts = q->rq.count[0] + q->rq.count[1] +
274 q->rq.starved[0] + q->rq.starved[1];
275 spin_unlock_irq(q->queue_lock);
276 /* any requests still outstanding? */
277 if (counts == 0)
278 break;
279
280 /* This may be the same req as the previous iteration,
281 * always send the blk_end_request_all after a prefetch.
282 * It is not okay to not end the request because the
283 * prefetch started the request.
284 */
285 if (req) {
286 /* return -ENXIO to indicate that this queue is
287 * going away
288 */
289 req->errors = -ENXIO;
290 blk_end_request_all(req, -ENXIO);
291 }
292
293 msleep(200); /* allow bsg to possibly finish */
294 spin_lock_irq(q->queue_lock);
295 }
296 bsg_unregister_queue(q);
297}
298EXPORT_SYMBOL_GPL(bsg_remove_queue);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1f96ad6254f1..16ace89613bc 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -130,6 +130,8 @@ struct cfq_queue {
130 unsigned long slice_end; 130 unsigned long slice_end;
131 long slice_resid; 131 long slice_resid;
132 132
133 /* pending priority requests */
134 int prio_pending;
133 /* number of requests that are on the dispatch list or inside driver */ 135 /* number of requests that are on the dispatch list or inside driver */
134 int dispatched; 136 int dispatched;
135 137
@@ -682,6 +684,9 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
682 if (rq_is_sync(rq1) != rq_is_sync(rq2)) 684 if (rq_is_sync(rq1) != rq_is_sync(rq2))
683 return rq_is_sync(rq1) ? rq1 : rq2; 685 return rq_is_sync(rq1) ? rq1 : rq2;
684 686
687 if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_PRIO)
688 return rq1->cmd_flags & REQ_PRIO ? rq1 : rq2;
689
685 s1 = blk_rq_pos(rq1); 690 s1 = blk_rq_pos(rq1);
686 s2 = blk_rq_pos(rq2); 691 s2 = blk_rq_pos(rq2);
687 692
@@ -1209,6 +1214,9 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
1209 1214
1210 hlist_del_init(&cfqg->cfqd_node); 1215 hlist_del_init(&cfqg->cfqd_node);
1211 1216
1217 BUG_ON(cfqd->nr_blkcg_linked_grps <= 0);
1218 cfqd->nr_blkcg_linked_grps--;
1219
1212 /* 1220 /*
1213 * Put the reference taken at the time of creation so that when all 1221 * Put the reference taken at the time of creation so that when all
1214 * queues are gone, group can be destroyed. 1222 * queues are gone, group can be destroyed.
@@ -1604,6 +1612,10 @@ static void cfq_remove_request(struct request *rq)
1604 cfqq->cfqd->rq_queued--; 1612 cfqq->cfqd->rq_queued--;
1605 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, 1613 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
1606 rq_data_dir(rq), rq_is_sync(rq)); 1614 rq_data_dir(rq), rq_is_sync(rq));
1615 if (rq->cmd_flags & REQ_PRIO) {
1616 WARN_ON(!cfqq->prio_pending);
1617 cfqq->prio_pending--;
1618 }
1607} 1619}
1608 1620
1609static int cfq_merge(struct request_queue *q, struct request **req, 1621static int cfq_merge(struct request_queue *q, struct request **req,
@@ -3357,6 +3369,13 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
3357 return true; 3369 return true;
3358 3370
3359 /* 3371 /*
3372 * So both queues are sync. Let the new request get disk time if
3373 * it's a metadata request and the current queue is doing regular IO.
3374 */
3375 if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending)
3376 return true;
3377
3378 /*
3360 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. 3379 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
3361 */ 3380 */
3362 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) 3381 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
@@ -3420,6 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3420 struct cfq_io_context *cic = RQ_CIC(rq); 3439 struct cfq_io_context *cic = RQ_CIC(rq);
3421 3440
3422 cfqd->rq_queued++; 3441 cfqd->rq_queued++;
3442 if (rq->cmd_flags & REQ_PRIO)
3443 cfqq->prio_pending++;
3423 3444
3424 cfq_update_io_thinktime(cfqd, cfqq, cic); 3445 cfq_update_io_thinktime(cfqd, cfqq, cic);
3425 cfq_update_io_seektime(cfqd, cfqq, rq); 3446 cfq_update_io_seektime(cfqd, cfqq, rq);
diff --git a/block/genhd.c b/block/genhd.c
index 5cb51c55f6d8..e2f67902dd02 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1146,17 +1146,17 @@ static int diskstats_show(struct seq_file *seqf, void *v)
1146 cpu = part_stat_lock(); 1146 cpu = part_stat_lock();
1147 part_round_stats(cpu, hd); 1147 part_round_stats(cpu, hd);
1148 part_stat_unlock(); 1148 part_stat_unlock();
1149 seq_printf(seqf, "%4d %7d %s %lu %lu %llu " 1149 seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
1150 "%u %lu %lu %llu %u %u %u %u\n", 1150 "%u %lu %lu %lu %u %u %u %u\n",
1151 MAJOR(part_devt(hd)), MINOR(part_devt(hd)), 1151 MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
1152 disk_name(gp, hd->partno, buf), 1152 disk_name(gp, hd->partno, buf),
1153 part_stat_read(hd, ios[READ]), 1153 part_stat_read(hd, ios[READ]),
1154 part_stat_read(hd, merges[READ]), 1154 part_stat_read(hd, merges[READ]),
1155 (unsigned long long)part_stat_read(hd, sectors[READ]), 1155 part_stat_read(hd, sectors[READ]),
1156 jiffies_to_msecs(part_stat_read(hd, ticks[READ])), 1156 jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
1157 part_stat_read(hd, ios[WRITE]), 1157 part_stat_read(hd, ios[WRITE]),
1158 part_stat_read(hd, merges[WRITE]), 1158 part_stat_read(hd, merges[WRITE]),
1159 (unsigned long long)part_stat_read(hd, sectors[WRITE]), 1159 part_stat_read(hd, sectors[WRITE]),
1160 jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), 1160 jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
1161 part_in_flight(hd), 1161 part_in_flight(hd),
1162 jiffies_to_msecs(part_stat_read(hd, io_ticks)), 1162 jiffies_to_msecs(part_stat_read(hd, io_ticks)),