aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig10
-rw-r--r--block/Makefile1
-rw-r--r--block/blk-cgroup.c37
-rw-r--r--block/blk-core.c88
-rw-r--r--block/blk-exec.c7
-rw-r--r--block/blk-flush.c24
-rw-r--r--block/blk-ioc.c40
-rw-r--r--block/blk-lib.c5
-rw-r--r--block/blk-map.c7
-rw-r--r--block/blk-softirq.c19
-rw-r--r--block/blk-sysfs.c20
-rw-r--r--block/blk-throttle.c12
-rw-r--r--block/blk-timeout.c5
-rw-r--r--block/blk.h2
-rw-r--r--block/bsg-lib.c298
-rw-r--r--block/bsg.c18
-rw-r--r--block/cfq-iosched.c173
-rw-r--r--block/compat_ioctl.c14
-rw-r--r--block/deadline-iosched.c4
-rw-r--r--block/elevator.c7
-rw-r--r--block/genhd.c71
21 files changed, 627 insertions, 235 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 60be1e0455d..e97934eecec 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -65,6 +65,16 @@ config BLK_DEV_BSG
65 65
66 If unsure, say Y. 66 If unsure, say Y.
67 67
68config BLK_DEV_BSGLIB
69 bool "Block layer SG support v4 helper lib"
70 default n
71 select BLK_DEV_BSG
72 help
73 Subsystems will normally enable this if needed. Users will not
74 normally need to manually enable this.
75
76 If unsure, say N.
77
68config BLK_DEV_INTEGRITY 78config BLK_DEV_INTEGRITY
69 bool "Block layer data integrity support" 79 bool "Block layer data integrity support"
70 ---help--- 80 ---help---
diff --git a/block/Makefile b/block/Makefile
index 0fec4b3fab5..514c6e4f427 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
8 blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o 8 blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
9 9
10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o 10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
11obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
11obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o 12obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
12obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o 13obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
13obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 14obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index bcaf16ee6ad..b596e54ddd7 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -785,10 +785,10 @@ static int blkio_policy_parse_and_set(char *buf,
785{ 785{
786 char *s[4], *p, *major_s = NULL, *minor_s = NULL; 786 char *s[4], *p, *major_s = NULL, *minor_s = NULL;
787 int ret; 787 int ret;
788 unsigned long major, minor, temp; 788 unsigned long major, minor;
789 int i = 0; 789 int i = 0;
790 dev_t dev; 790 dev_t dev;
791 u64 bps, iops; 791 u64 temp;
792 792
793 memset(s, 0, sizeof(s)); 793 memset(s, 0, sizeof(s));
794 794
@@ -826,20 +826,23 @@ static int blkio_policy_parse_and_set(char *buf,
826 826
827 dev = MKDEV(major, minor); 827 dev = MKDEV(major, minor);
828 828
829 ret = blkio_check_dev_num(dev); 829 ret = strict_strtoull(s[1], 10, &temp);
830 if (ret) 830 if (ret)
831 return ret; 831 return -EINVAL;
832 832
833 newpn->dev = dev; 833 /* For rule removal, do not check for device presence. */
834 if (temp) {
835 ret = blkio_check_dev_num(dev);
836 if (ret)
837 return ret;
838 }
834 839
835 if (s[1] == NULL) 840 newpn->dev = dev;
836 return -EINVAL;
837 841
838 switch (plid) { 842 switch (plid) {
839 case BLKIO_POLICY_PROP: 843 case BLKIO_POLICY_PROP:
840 ret = strict_strtoul(s[1], 10, &temp); 844 if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
841 if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) || 845 temp > BLKIO_WEIGHT_MAX)
842 temp > BLKIO_WEIGHT_MAX)
843 return -EINVAL; 846 return -EINVAL;
844 847
845 newpn->plid = plid; 848 newpn->plid = plid;
@@ -850,26 +853,18 @@ static int blkio_policy_parse_and_set(char *buf,
850 switch(fileid) { 853 switch(fileid) {
851 case BLKIO_THROTL_read_bps_device: 854 case BLKIO_THROTL_read_bps_device:
852 case BLKIO_THROTL_write_bps_device: 855 case BLKIO_THROTL_write_bps_device:
853 ret = strict_strtoull(s[1], 10, &bps);
854 if (ret)
855 return -EINVAL;
856
857 newpn->plid = plid; 856 newpn->plid = plid;
858 newpn->fileid = fileid; 857 newpn->fileid = fileid;
859 newpn->val.bps = bps; 858 newpn->val.bps = temp;
860 break; 859 break;
861 case BLKIO_THROTL_read_iops_device: 860 case BLKIO_THROTL_read_iops_device:
862 case BLKIO_THROTL_write_iops_device: 861 case BLKIO_THROTL_write_iops_device:
863 ret = strict_strtoull(s[1], 10, &iops); 862 if (temp > THROTL_IOPS_MAX)
864 if (ret)
865 return -EINVAL;
866
867 if (iops > THROTL_IOPS_MAX)
868 return -EINVAL; 863 return -EINVAL;
869 864
870 newpn->plid = plid; 865 newpn->plid = plid;
871 newpn->fileid = fileid; 866 newpn->fileid = fileid;
872 newpn->val.iops = (unsigned int)iops; 867 newpn->val.iops = (unsigned int)temp;
873 break; 868 break;
874 } 869 }
875 break; 870 break;
diff --git a/block/blk-core.c b/block/blk-core.c
index d2f8f4049ab..8fc4ae28a19 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -348,9 +348,10 @@ void blk_put_queue(struct request_queue *q)
348EXPORT_SYMBOL(blk_put_queue); 348EXPORT_SYMBOL(blk_put_queue);
349 349
350/* 350/*
351 * Note: If a driver supplied the queue lock, it should not zap that lock 351 * Note: If a driver supplied the queue lock, it is disconnected
352 * unexpectedly as some queue cleanup components like elevator_exit() and 352 * by this function. The actual state of the lock doesn't matter
353 * blk_throtl_exit() need queue lock. 353 * here as the request_queue isn't accessible after this point
354 * (QUEUE_FLAG_DEAD is set) and no other requests will be queued.
354 */ 355 */
355void blk_cleanup_queue(struct request_queue *q) 356void blk_cleanup_queue(struct request_queue *q)
356{ 357{
@@ -367,10 +368,8 @@ void blk_cleanup_queue(struct request_queue *q)
367 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); 368 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
368 mutex_unlock(&q->sysfs_lock); 369 mutex_unlock(&q->sysfs_lock);
369 370
370 if (q->elevator) 371 if (q->queue_lock != &q->__queue_lock)
371 elevator_exit(q->elevator); 372 q->queue_lock = &q->__queue_lock;
372
373 blk_throtl_exit(q);
374 373
375 blk_put_queue(q); 374 blk_put_queue(q);
376} 375}
@@ -419,6 +418,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
419 q->backing_dev_info.state = 0; 418 q->backing_dev_info.state = 0;
420 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; 419 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
421 q->backing_dev_info.name = "block"; 420 q->backing_dev_info.name = "block";
421 q->node = node_id;
422 422
423 err = bdi_init(&q->backing_dev_info); 423 err = bdi_init(&q->backing_dev_info);
424 if (err) { 424 if (err) {
@@ -503,7 +503,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
503 if (!uninit_q) 503 if (!uninit_q)
504 return NULL; 504 return NULL;
505 505
506 q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id); 506 q = blk_init_allocated_queue(uninit_q, rfn, lock);
507 if (!q) 507 if (!q)
508 blk_cleanup_queue(uninit_q); 508 blk_cleanup_queue(uninit_q);
509 509
@@ -515,18 +515,9 @@ struct request_queue *
515blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, 515blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
516 spinlock_t *lock) 516 spinlock_t *lock)
517{ 517{
518 return blk_init_allocated_queue_node(q, rfn, lock, -1);
519}
520EXPORT_SYMBOL(blk_init_allocated_queue);
521
522struct request_queue *
523blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
524 spinlock_t *lock, int node_id)
525{
526 if (!q) 518 if (!q)
527 return NULL; 519 return NULL;
528 520
529 q->node = node_id;
530 if (blk_init_free_list(q)) 521 if (blk_init_free_list(q))
531 return NULL; 522 return NULL;
532 523
@@ -556,7 +547,7 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
556 547
557 return NULL; 548 return NULL;
558} 549}
559EXPORT_SYMBOL(blk_init_allocated_queue_node); 550EXPORT_SYMBOL(blk_init_allocated_queue);
560 551
561int blk_get_queue(struct request_queue *q) 552int blk_get_queue(struct request_queue *q)
562{ 553{
@@ -839,6 +830,9 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
839{ 830{
840 struct request *rq; 831 struct request *rq;
841 832
833 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
834 return NULL;
835
842 BUG_ON(rw != READ && rw != WRITE); 836 BUG_ON(rw != READ && rw != WRITE);
843 837
844 spin_lock_irq(q->queue_lock); 838 spin_lock_irq(q->queue_lock);
@@ -1164,7 +1158,7 @@ static bool bio_attempt_front_merge(struct request_queue *q,
1164 * true if merge was successful, otherwise false. 1158 * true if merge was successful, otherwise false.
1165 */ 1159 */
1166static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, 1160static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
1167 struct bio *bio) 1161 struct bio *bio, unsigned int *request_count)
1168{ 1162{
1169 struct blk_plug *plug; 1163 struct blk_plug *plug;
1170 struct request *rq; 1164 struct request *rq;
@@ -1173,10 +1167,13 @@ static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
1173 plug = tsk->plug; 1167 plug = tsk->plug;
1174 if (!plug) 1168 if (!plug)
1175 goto out; 1169 goto out;
1170 *request_count = 0;
1176 1171
1177 list_for_each_entry_reverse(rq, &plug->list, queuelist) { 1172 list_for_each_entry_reverse(rq, &plug->list, queuelist) {
1178 int el_ret; 1173 int el_ret;
1179 1174
1175 (*request_count)++;
1176
1180 if (rq->q != q) 1177 if (rq->q != q)
1181 continue; 1178 continue;
1182 1179
@@ -1216,6 +1213,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1216 struct blk_plug *plug; 1213 struct blk_plug *plug;
1217 int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; 1214 int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
1218 struct request *req; 1215 struct request *req;
1216 unsigned int request_count = 0;
1219 1217
1220 /* 1218 /*
1221 * low level driver can indicate that it wants pages above a 1219 * low level driver can indicate that it wants pages above a
@@ -1234,7 +1232,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1234 * Check if we can merge with the plugged list before grabbing 1232 * Check if we can merge with the plugged list before grabbing
1235 * any locks. 1233 * any locks.
1236 */ 1234 */
1237 if (attempt_plug_merge(current, q, bio)) 1235 if (attempt_plug_merge(current, q, bio, &request_count))
1238 goto out; 1236 goto out;
1239 1237
1240 spin_lock_irq(q->queue_lock); 1238 spin_lock_irq(q->queue_lock);
@@ -1279,10 +1277,8 @@ get_rq:
1279 init_request_from_bio(req, bio); 1277 init_request_from_bio(req, bio);
1280 1278
1281 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || 1279 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1282 bio_flagged(bio, BIO_CPU_AFFINE)) { 1280 bio_flagged(bio, BIO_CPU_AFFINE))
1283 req->cpu = blk_cpu_to_group(get_cpu()); 1281 req->cpu = raw_smp_processor_id();
1284 put_cpu();
1285 }
1286 1282
1287 plug = current->plug; 1283 plug = current->plug;
1288 if (plug) { 1284 if (plug) {
@@ -1301,6 +1297,8 @@ get_rq:
1301 if (__rq->q != q) 1297 if (__rq->q != q)
1302 plug->should_sort = 1; 1298 plug->should_sort = 1;
1303 } 1299 }
1300 if (request_count >= BLK_MAX_REQUEST_COUNT)
1301 blk_flush_plug_list(plug, false);
1304 list_add_tail(&req->queuelist, &plug->list); 1302 list_add_tail(&req->queuelist, &plug->list);
1305 drive_stat_acct(req, 1); 1303 drive_stat_acct(req, 1);
1306 } else { 1304 } else {
@@ -1357,29 +1355,27 @@ static int __init setup_fail_make_request(char *str)
1357} 1355}
1358__setup("fail_make_request=", setup_fail_make_request); 1356__setup("fail_make_request=", setup_fail_make_request);
1359 1357
1360static int should_fail_request(struct bio *bio) 1358static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
1361{ 1359{
1362 struct hd_struct *part = bio->bi_bdev->bd_part; 1360 return part->make_it_fail && should_fail(&fail_make_request, bytes);
1363
1364 if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
1365 return should_fail(&fail_make_request, bio->bi_size);
1366
1367 return 0;
1368} 1361}
1369 1362
1370static int __init fail_make_request_debugfs(void) 1363static int __init fail_make_request_debugfs(void)
1371{ 1364{
1372 return init_fault_attr_dentries(&fail_make_request, 1365 struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
1373 "fail_make_request"); 1366 NULL, &fail_make_request);
1367
1368 return IS_ERR(dir) ? PTR_ERR(dir) : 0;
1374} 1369}
1375 1370
1376late_initcall(fail_make_request_debugfs); 1371late_initcall(fail_make_request_debugfs);
1377 1372
1378#else /* CONFIG_FAIL_MAKE_REQUEST */ 1373#else /* CONFIG_FAIL_MAKE_REQUEST */
1379 1374
1380static inline int should_fail_request(struct bio *bio) 1375static inline bool should_fail_request(struct hd_struct *part,
1376 unsigned int bytes)
1381{ 1377{
1382 return 0; 1378 return false;
1383} 1379}
1384 1380
1385#endif /* CONFIG_FAIL_MAKE_REQUEST */ 1381#endif /* CONFIG_FAIL_MAKE_REQUEST */
@@ -1462,6 +1458,7 @@ static inline void __generic_make_request(struct bio *bio)
1462 old_dev = 0; 1458 old_dev = 0;
1463 do { 1459 do {
1464 char b[BDEVNAME_SIZE]; 1460 char b[BDEVNAME_SIZE];
1461 struct hd_struct *part;
1465 1462
1466 q = bdev_get_queue(bio->bi_bdev); 1463 q = bdev_get_queue(bio->bi_bdev);
1467 if (unlikely(!q)) { 1464 if (unlikely(!q)) {
@@ -1485,7 +1482,10 @@ static inline void __generic_make_request(struct bio *bio)
1485 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) 1482 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
1486 goto end_io; 1483 goto end_io;
1487 1484
1488 if (should_fail_request(bio)) 1485 part = bio->bi_bdev->bd_part;
1486 if (should_fail_request(part, bio->bi_size) ||
1487 should_fail_request(&part_to_disk(part)->part0,
1488 bio->bi_size))
1489 goto end_io; 1489 goto end_io;
1490 1490
1491 /* 1491 /*
@@ -1696,15 +1696,14 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits);
1696int blk_insert_cloned_request(struct request_queue *q, struct request *rq) 1696int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1697{ 1697{
1698 unsigned long flags; 1698 unsigned long flags;
1699 int where = ELEVATOR_INSERT_BACK;
1699 1700
1700 if (blk_rq_check_limits(q, rq)) 1701 if (blk_rq_check_limits(q, rq))
1701 return -EIO; 1702 return -EIO;
1702 1703
1703#ifdef CONFIG_FAIL_MAKE_REQUEST 1704 if (rq->rq_disk &&
1704 if (rq->rq_disk && rq->rq_disk->part0.make_it_fail && 1705 should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
1705 should_fail(&fail_make_request, blk_rq_bytes(rq)))
1706 return -EIO; 1706 return -EIO;
1707#endif
1708 1707
1709 spin_lock_irqsave(q->queue_lock, flags); 1708 spin_lock_irqsave(q->queue_lock, flags);
1710 1709
@@ -1714,7 +1713,12 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1714 */ 1713 */
1715 BUG_ON(blk_queued_rq(rq)); 1714 BUG_ON(blk_queued_rq(rq));
1716 1715
1717 add_acct_request(q, rq, ELEVATOR_INSERT_BACK); 1716 if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
1717 where = ELEVATOR_INSERT_FLUSH;
1718
1719 add_acct_request(q, rq, where);
1720 if (where == ELEVATOR_INSERT_FLUSH)
1721 __blk_run_queue(q);
1718 spin_unlock_irqrestore(q->queue_lock, flags); 1722 spin_unlock_irqrestore(q->queue_lock, flags);
1719 1723
1720 return 0; 1724 return 0;
@@ -2271,7 +2275,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
2271 * %false - we are done with this request 2275 * %false - we are done with this request
2272 * %true - still buffers pending for this request 2276 * %true - still buffers pending for this request
2273 **/ 2277 **/
2274static bool __blk_end_bidi_request(struct request *rq, int error, 2278bool __blk_end_bidi_request(struct request *rq, int error,
2275 unsigned int nr_bytes, unsigned int bidi_bytes) 2279 unsigned int nr_bytes, unsigned int bidi_bytes)
2276{ 2280{
2277 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) 2281 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 8a0e7ec056e..a1ebceb332f 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -50,6 +50,13 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
50{ 50{
51 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 51 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
52 52
53 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
54 rq->errors = -ENXIO;
55 if (rq->end_io)
56 rq->end_io(rq, rq->errors);
57 return;
58 }
59
53 rq->rq_disk = bd_disk; 60 rq->rq_disk = bd_disk;
54 rq->end_io = done; 61 rq->end_io = done;
55 WARN_ON(irqs_disabled()); 62 WARN_ON(irqs_disabled());
diff --git a/block/blk-flush.c b/block/blk-flush.c
index bb21e4c36f7..720ad607ff9 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -95,11 +95,12 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
95{ 95{
96 unsigned int policy = 0; 96 unsigned int policy = 0;
97 97
98 if (blk_rq_sectors(rq))
99 policy |= REQ_FSEQ_DATA;
100
98 if (fflags & REQ_FLUSH) { 101 if (fflags & REQ_FLUSH) {
99 if (rq->cmd_flags & REQ_FLUSH) 102 if (rq->cmd_flags & REQ_FLUSH)
100 policy |= REQ_FSEQ_PREFLUSH; 103 policy |= REQ_FSEQ_PREFLUSH;
101 if (blk_rq_sectors(rq))
102 policy |= REQ_FSEQ_DATA;
103 if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) 104 if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
104 policy |= REQ_FSEQ_POSTFLUSH; 105 policy |= REQ_FSEQ_POSTFLUSH;
105 } 106 }
@@ -122,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq)
122 123
123 /* make @rq a normal request */ 124 /* make @rq a normal request */
124 rq->cmd_flags &= ~REQ_FLUSH_SEQ; 125 rq->cmd_flags &= ~REQ_FLUSH_SEQ;
125 rq->end_io = NULL; 126 rq->end_io = rq->flush.saved_end_io;
126} 127}
127 128
128/** 129/**
@@ -300,9 +301,6 @@ void blk_insert_flush(struct request *rq)
300 unsigned int fflags = q->flush_flags; /* may change, cache */ 301 unsigned int fflags = q->flush_flags; /* may change, cache */
301 unsigned int policy = blk_flush_policy(fflags, rq); 302 unsigned int policy = blk_flush_policy(fflags, rq);
302 303
303 BUG_ON(rq->end_io);
304 BUG_ON(!rq->bio || rq->bio != rq->biotail);
305
306 /* 304 /*
307 * @policy now records what operations need to be done. Adjust 305 * @policy now records what operations need to be done. Adjust
308 * REQ_FLUSH and FUA for the driver. 306 * REQ_FLUSH and FUA for the driver.
@@ -312,6 +310,19 @@ void blk_insert_flush(struct request *rq)
312 rq->cmd_flags &= ~REQ_FUA; 310 rq->cmd_flags &= ~REQ_FUA;
313 311
314 /* 312 /*
313 * An empty flush handed down from a stacking driver may
314 * translate into nothing if the underlying device does not
315 * advertise a write-back cache. In this case, simply
316 * complete the request.
317 */
318 if (!policy) {
319 __blk_end_bidi_request(rq, 0, 0, 0);
320 return;
321 }
322
323 BUG_ON(rq->bio != rq->biotail); /*assumes zero or single bio rq */
324
325 /*
315 * If there's data but flush is not necessary, the request can be 326 * If there's data but flush is not necessary, the request can be
316 * processed directly without going through flush machinery. Queue 327 * processed directly without going through flush machinery. Queue
317 * for normal execution. 328 * for normal execution.
@@ -329,6 +340,7 @@ void blk_insert_flush(struct request *rq)
329 memset(&rq->flush, 0, sizeof(rq->flush)); 340 memset(&rq->flush, 0, sizeof(rq->flush));
330 INIT_LIST_HEAD(&rq->flush.list); 341 INIT_LIST_HEAD(&rq->flush.list);
331 rq->cmd_flags |= REQ_FLUSH_SEQ; 342 rq->cmd_flags |= REQ_FLUSH_SEQ;
343 rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
332 rq->end_io = flush_data_end_io; 344 rq->end_io = flush_data_end_io;
333 345
334 blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); 346 blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 342eae9b0d3..6f9bbd97865 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -82,26 +82,26 @@ void exit_io_context(struct task_struct *task)
82 82
83struct io_context *alloc_io_context(gfp_t gfp_flags, int node) 83struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
84{ 84{
85 struct io_context *ret; 85 struct io_context *ioc;
86 86
87 ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); 87 ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
88 if (ret) { 88 if (ioc) {
89 atomic_long_set(&ret->refcount, 1); 89 atomic_long_set(&ioc->refcount, 1);
90 atomic_set(&ret->nr_tasks, 1); 90 atomic_set(&ioc->nr_tasks, 1);
91 spin_lock_init(&ret->lock); 91 spin_lock_init(&ioc->lock);
92 ret->ioprio_changed = 0; 92 ioc->ioprio_changed = 0;
93 ret->ioprio = 0; 93 ioc->ioprio = 0;
94 ret->last_waited = 0; /* doesn't matter... */ 94 ioc->last_waited = 0; /* doesn't matter... */
95 ret->nr_batch_requests = 0; /* because this is 0 */ 95 ioc->nr_batch_requests = 0; /* because this is 0 */
96 INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH); 96 INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
97 INIT_HLIST_HEAD(&ret->cic_list); 97 INIT_HLIST_HEAD(&ioc->cic_list);
98 ret->ioc_data = NULL; 98 ioc->ioc_data = NULL;
99#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) 99#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
100 ret->cgroup_changed = 0; 100 ioc->cgroup_changed = 0;
101#endif 101#endif
102 } 102 }
103 103
104 return ret; 104 return ioc;
105} 105}
106 106
107/* 107/*
@@ -139,19 +139,19 @@ struct io_context *current_io_context(gfp_t gfp_flags, int node)
139 */ 139 */
140struct io_context *get_io_context(gfp_t gfp_flags, int node) 140struct io_context *get_io_context(gfp_t gfp_flags, int node)
141{ 141{
142 struct io_context *ret = NULL; 142 struct io_context *ioc = NULL;
143 143
144 /* 144 /*
145 * Check for unlikely race with exiting task. ioc ref count is 145 * Check for unlikely race with exiting task. ioc ref count is
146 * zero when ioc is being detached. 146 * zero when ioc is being detached.
147 */ 147 */
148 do { 148 do {
149 ret = current_io_context(gfp_flags, node); 149 ioc = current_io_context(gfp_flags, node);
150 if (unlikely(!ret)) 150 if (unlikely(!ioc))
151 break; 151 break;
152 } while (!atomic_long_inc_not_zero(&ret->refcount)); 152 } while (!atomic_long_inc_not_zero(&ioc->refcount));
153 153
154 return ret; 154 return ioc;
155} 155}
156EXPORT_SYMBOL(get_io_context); 156EXPORT_SYMBOL(get_io_context);
157 157
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 78e627e2581..2b461b496a7 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -59,7 +59,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
59 * granularity 59 * granularity
60 */ 60 */
61 max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); 61 max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
62 if (q->limits.discard_granularity) { 62 if (unlikely(!max_discard_sectors)) {
63 /* Avoid infinite loop below. Being cautious never hurts. */
64 return -EOPNOTSUPP;
65 } else if (q->limits.discard_granularity) {
63 unsigned int disc_sects = q->limits.discard_granularity >> 9; 66 unsigned int disc_sects = q->limits.discard_granularity >> 9;
64 67
65 max_discard_sectors &= ~(disc_sects - 1); 68 max_discard_sectors &= ~(disc_sects - 1);
diff --git a/block/blk-map.c b/block/blk-map.c
index e663ac2d8e6..164cd005970 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -204,10 +204,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
204 if (!iov[i].iov_len) 204 if (!iov[i].iov_len)
205 return -EINVAL; 205 return -EINVAL;
206 206
207 if (uaddr & queue_dma_alignment(q)) { 207 /*
208 * Keep going so we check length of all segments
209 */
210 if (uaddr & queue_dma_alignment(q))
208 unaligned = 1; 211 unaligned = 1;
209 break;
210 }
211 } 212 }
212 213
213 if (unaligned || (q->dma_pad_mask & len) || map_data) 214 if (unaligned || (q->dma_pad_mask & len) || map_data)
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index ee9c2160222..1366a89d8e6 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -103,24 +103,35 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = {
103 103
104void __blk_complete_request(struct request *req) 104void __blk_complete_request(struct request *req)
105{ 105{
106 int ccpu, cpu, group_cpu = NR_CPUS;
106 struct request_queue *q = req->q; 107 struct request_queue *q = req->q;
107 unsigned long flags; 108 unsigned long flags;
108 int ccpu, cpu, group_cpu;
109 109
110 BUG_ON(!q->softirq_done_fn); 110 BUG_ON(!q->softirq_done_fn);
111 111
112 local_irq_save(flags); 112 local_irq_save(flags);
113 cpu = smp_processor_id(); 113 cpu = smp_processor_id();
114 group_cpu = blk_cpu_to_group(cpu);
115 114
116 /* 115 /*
117 * Select completion CPU 116 * Select completion CPU
118 */ 117 */
119 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) 118 if (req->cpu != -1) {
120 ccpu = req->cpu; 119 ccpu = req->cpu;
121 else 120 if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) {
121 ccpu = blk_cpu_to_group(ccpu);
122 group_cpu = blk_cpu_to_group(cpu);
123 }
124 } else
122 ccpu = cpu; 125 ccpu = cpu;
123 126
127 /*
128 * If current CPU and requested CPU are in the same group, running
129 * softirq in current CPU. One might concern this is just like
130 * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
131 * running in interrupt handler, and currently I/O controller doesn't
132 * support multiple interrupts, so current CPU is unique actually. This
133 * avoids IPI sending from current CPU to the first CPU of a group.
134 */
124 if (ccpu == cpu || ccpu == group_cpu) { 135 if (ccpu == cpu || ccpu == group_cpu) {
125 struct list_head *list; 136 struct list_head *list;
126do_local: 137do_local:
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index d935bd859c8..60fda88c57f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -244,8 +244,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
244static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page) 244static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
245{ 245{
246 bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags); 246 bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
247 bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags);
247 248
248 return queue_var_show(set, page); 249 return queue_var_show(set << force, page);
249} 250}
250 251
251static ssize_t 252static ssize_t
@@ -257,10 +258,16 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
257 258
258 ret = queue_var_store(&val, page, count); 259 ret = queue_var_store(&val, page, count);
259 spin_lock_irq(q->queue_lock); 260 spin_lock_irq(q->queue_lock);
260 if (val) 261 if (val == 2) {
261 queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 262 queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
262 else 263 queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
263 queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); 264 } else if (val == 1) {
265 queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
266 queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
267 } else if (val == 0) {
268 queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
269 queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
270 }
264 spin_unlock_irq(q->queue_lock); 271 spin_unlock_irq(q->queue_lock);
265#endif 272#endif
266 return ret; 273 return ret;
@@ -472,6 +479,11 @@ static void blk_release_queue(struct kobject *kobj)
472 479
473 blk_sync_queue(q); 480 blk_sync_queue(q);
474 481
482 if (q->elevator)
483 elevator_exit(q->elevator);
484
485 blk_throtl_exit(q);
486
475 if (rl->rq_pool) 487 if (rl->rq_pool)
476 mempool_destroy(rl->rq_pool); 488 mempool_destroy(rl->rq_pool);
477 489
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 3689f833afd..a19f58c6fc3 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -142,9 +142,9 @@ static inline struct throtl_grp *tg_of_blkg(struct blkio_group *blkg)
142 return NULL; 142 return NULL;
143} 143}
144 144
145static inline int total_nr_queued(struct throtl_data *td) 145static inline unsigned int total_nr_queued(struct throtl_data *td)
146{ 146{
147 return (td->nr_queued[0] + td->nr_queued[1]); 147 return td->nr_queued[0] + td->nr_queued[1];
148} 148}
149 149
150static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg) 150static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg)
@@ -746,7 +746,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg,
746static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) 746static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
747{ 747{
748 bool rw = bio_data_dir(bio); 748 bool rw = bio_data_dir(bio);
749 bool sync = bio->bi_rw & REQ_SYNC; 749 bool sync = rw_is_sync(bio->bi_rw);
750 750
751 /* Charge the bio to the group */ 751 /* Charge the bio to the group */
752 tg->bytes_disp[rw] += bio->bi_size; 752 tg->bytes_disp[rw] += bio->bi_size;
@@ -927,7 +927,7 @@ static int throtl_dispatch(struct request_queue *q)
927 927
928 bio_list_init(&bio_list_on_stack); 928 bio_list_init(&bio_list_on_stack);
929 929
930 throtl_log(td, "dispatch nr_queued=%d read=%u write=%u", 930 throtl_log(td, "dispatch nr_queued=%u read=%u write=%u",
931 total_nr_queued(td), td->nr_queued[READ], 931 total_nr_queued(td), td->nr_queued[READ],
932 td->nr_queued[WRITE]); 932 td->nr_queued[WRITE]);
933 933
@@ -970,7 +970,7 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
970 struct delayed_work *dwork = &td->throtl_work; 970 struct delayed_work *dwork = &td->throtl_work;
971 971
972 /* schedule work if limits changed even if no bio is queued */ 972 /* schedule work if limits changed even if no bio is queued */
973 if (total_nr_queued(td) > 0 || td->limits_changed) { 973 if (total_nr_queued(td) || td->limits_changed) {
974 /* 974 /*
975 * We might have a work scheduled to be executed in future. 975 * We might have a work scheduled to be executed in future.
976 * Cancel that and schedule a new one. 976 * Cancel that and schedule a new one.
@@ -1150,7 +1150,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
1150 1150
1151 if (tg_no_rule_group(tg, rw)) { 1151 if (tg_no_rule_group(tg, rw)) {
1152 blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, 1152 blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
1153 rw, bio->bi_rw & REQ_SYNC); 1153 rw, rw_is_sync(bio->bi_rw));
1154 rcu_read_unlock(); 1154 rcu_read_unlock();
1155 return 0; 1155 return 0;
1156 } 1156 }
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 4f0c06c7a33..78035488895 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -28,7 +28,10 @@ int blk_should_fake_timeout(struct request_queue *q)
28 28
29static int __init fail_io_timeout_debugfs(void) 29static int __init fail_io_timeout_debugfs(void)
30{ 30{
31 return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout"); 31 struct dentry *dir = fault_create_debugfs_attr("fail_io_timeout",
32 NULL, &fail_io_timeout);
33
34 return IS_ERR(dir) ? PTR_ERR(dir) : 0;
32} 35}
33 36
34late_initcall(fail_io_timeout_debugfs); 37late_initcall(fail_io_timeout_debugfs);
diff --git a/block/blk.h b/block/blk.h
index d6586287adc..20b900a377c 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
17 struct bio *bio); 17 struct bio *bio);
18void blk_dequeue_request(struct request *rq); 18void blk_dequeue_request(struct request *rq);
19void __blk_queue_free_tags(struct request_queue *q); 19void __blk_queue_free_tags(struct request_queue *q);
20bool __blk_end_bidi_request(struct request *rq, int error,
21 unsigned int nr_bytes, unsigned int bidi_bytes);
20 22
21void blk_rq_timed_out_timer(unsigned long data); 23void blk_rq_timed_out_timer(unsigned long data);
22void blk_delete_timer(struct request *); 24void blk_delete_timer(struct request *);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
new file mode 100644
index 00000000000..6690e6e4103
--- /dev/null
+++ b/block/bsg-lib.c
@@ -0,0 +1,298 @@
1/*
2 * BSG helper library
3 *
4 * Copyright (C) 2008 James Smart, Emulex Corporation
5 * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
6 * Copyright (C) 2011 Mike Christie
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 */
23#include <linux/slab.h>
24#include <linux/blkdev.h>
25#include <linux/delay.h>
26#include <linux/scatterlist.h>
27#include <linux/bsg-lib.h>
28#include <linux/module.h>
29#include <scsi/scsi_cmnd.h>
30
31/**
32 * bsg_destroy_job - routine to teardown/delete a bsg job
33 * @job: bsg_job that is to be torn down
34 */
35static void bsg_destroy_job(struct bsg_job *job)
36{
37 put_device(job->dev); /* release reference for the request */
38
39 kfree(job->request_payload.sg_list);
40 kfree(job->reply_payload.sg_list);
41 kfree(job);
42}
43
44/**
45 * bsg_job_done - completion routine for bsg requests
46 * @job: bsg_job that is complete
47 * @result: job reply result
48 * @reply_payload_rcv_len: length of payload recvd
49 *
50 * The LLD should call this when the bsg job has completed.
51 */
52void bsg_job_done(struct bsg_job *job, int result,
53 unsigned int reply_payload_rcv_len)
54{
55 struct request *req = job->req;
56 struct request *rsp = req->next_rq;
57 int err;
58
59 err = job->req->errors = result;
60 if (err < 0)
61 /* we're only returning the result field in the reply */
62 job->req->sense_len = sizeof(u32);
63 else
64 job->req->sense_len = job->reply_len;
65 /* we assume all request payload was transferred, residual == 0 */
66 req->resid_len = 0;
67
68 if (rsp) {
69 WARN_ON(reply_payload_rcv_len > rsp->resid_len);
70
71 /* set reply (bidi) residual */
72 rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len);
73 }
74 blk_complete_request(req);
75}
76EXPORT_SYMBOL_GPL(bsg_job_done);
77
78/**
79 * bsg_softirq_done - softirq done routine for destroying the bsg requests
80 * @rq: BSG request that holds the job to be destroyed
81 */
82static void bsg_softirq_done(struct request *rq)
83{
84 struct bsg_job *job = rq->special;
85
86 blk_end_request_all(rq, rq->errors);
87 bsg_destroy_job(job);
88}
89
90static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
91{
92 size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments);
93
94 BUG_ON(!req->nr_phys_segments);
95
96 buf->sg_list = kzalloc(sz, GFP_KERNEL);
97 if (!buf->sg_list)
98 return -ENOMEM;
99 sg_init_table(buf->sg_list, req->nr_phys_segments);
100 buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
101 buf->payload_len = blk_rq_bytes(req);
102 return 0;
103}
104
105/**
106 * bsg_create_job - create the bsg_job structure for the bsg request
107 * @dev: device that is being sent the bsg request
108 * @req: BSG request that needs a job structure
109 */
110static int bsg_create_job(struct device *dev, struct request *req)
111{
112 struct request *rsp = req->next_rq;
113 struct request_queue *q = req->q;
114 struct bsg_job *job;
115 int ret;
116
117 BUG_ON(req->special);
118
119 job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
120 if (!job)
121 return -ENOMEM;
122
123 req->special = job;
124 job->req = req;
125 if (q->bsg_job_size)
126 job->dd_data = (void *)&job[1];
127 job->request = req->cmd;
128 job->request_len = req->cmd_len;
129 job->reply = req->sense;
130 job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
131 * allocated */
132 if (req->bio) {
133 ret = bsg_map_buffer(&job->request_payload, req);
134 if (ret)
135 goto failjob_rls_job;
136 }
137 if (rsp && rsp->bio) {
138 ret = bsg_map_buffer(&job->reply_payload, rsp);
139 if (ret)
140 goto failjob_rls_rqst_payload;
141 }
142 job->dev = dev;
143 /* take a reference for the request */
144 get_device(job->dev);
145 return 0;
146
147failjob_rls_rqst_payload:
148 kfree(job->request_payload.sg_list);
149failjob_rls_job:
150 kfree(job);
151 return -ENOMEM;
152}
153
154/*
155 * bsg_goose_queue - restart queue in case it was stopped
156 * @q: request q to be restarted
157 */
158void bsg_goose_queue(struct request_queue *q)
159{
160 if (!q)
161 return;
162
163 blk_run_queue_async(q);
164}
165EXPORT_SYMBOL_GPL(bsg_goose_queue);
166
167/**
168 * bsg_request_fn - generic handler for bsg requests
169 * @q: request queue to manage
170 *
171 * On error the create_bsg_job function should return a -Exyz error value
172 * that will be set to the req->errors.
173 *
174 * Drivers/subsys should pass this to the queue init function.
175 */
176void bsg_request_fn(struct request_queue *q)
177{
178 struct device *dev = q->queuedata;
179 struct request *req;
180 struct bsg_job *job;
181 int ret;
182
183 if (!get_device(dev))
184 return;
185
186 while (1) {
187 req = blk_fetch_request(q);
188 if (!req)
189 break;
190 spin_unlock_irq(q->queue_lock);
191
192 ret = bsg_create_job(dev, req);
193 if (ret) {
194 req->errors = ret;
195 blk_end_request_all(req, ret);
196 spin_lock_irq(q->queue_lock);
197 continue;
198 }
199
200 job = req->special;
201 ret = q->bsg_job_fn(job);
202 spin_lock_irq(q->queue_lock);
203 if (ret)
204 break;
205 }
206
207 spin_unlock_irq(q->queue_lock);
208 put_device(dev);
209 spin_lock_irq(q->queue_lock);
210}
211EXPORT_SYMBOL_GPL(bsg_request_fn);
212
213/**
214 * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
215 * @dev: device to attach bsg device to
216 * @q: request queue setup by caller
217 * @name: device to give bsg device
218 * @job_fn: bsg job handler
219 * @dd_job_size: size of LLD data needed for each job
220 *
221 * The caller should have setup the reuqest queue with bsg_request_fn
222 * as the request_fn.
223 */
224int bsg_setup_queue(struct device *dev, struct request_queue *q,
225 char *name, bsg_job_fn *job_fn, int dd_job_size)
226{
227 int ret;
228
229 q->queuedata = dev;
230 q->bsg_job_size = dd_job_size;
231 q->bsg_job_fn = job_fn;
232 queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
233 blk_queue_softirq_done(q, bsg_softirq_done);
234 blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
235
236 ret = bsg_register_queue(q, dev, name, NULL);
237 if (ret) {
238 printk(KERN_ERR "%s: bsg interface failed to "
239 "initialize - register queue\n", dev->kobj.name);
240 return ret;
241 }
242
243 return 0;
244}
245EXPORT_SYMBOL_GPL(bsg_setup_queue);
246
247/**
248 * bsg_remove_queue - Deletes the bsg dev from the q
249 * @q: the request_queue that is to be torn down.
250 *
251 * Notes:
252 * Before unregistering the queue empty any requests that are blocked
253 */
254void bsg_remove_queue(struct request_queue *q)
255{
256 struct request *req; /* block request */
257 int counts; /* totals for request_list count and starved */
258
259 if (!q)
260 return;
261
262 /* Stop taking in new requests */
263 spin_lock_irq(q->queue_lock);
264 blk_stop_queue(q);
265
266 /* drain all requests in the queue */
267 while (1) {
268 /* need the lock to fetch a request
269 * this may fetch the same reqeust as the previous pass
270 */
271 req = blk_fetch_request(q);
272 /* save requests in use and starved */
273 counts = q->rq.count[0] + q->rq.count[1] +
274 q->rq.starved[0] + q->rq.starved[1];
275 spin_unlock_irq(q->queue_lock);
276 /* any requests still outstanding? */
277 if (counts == 0)
278 break;
279
280 /* This may be the same req as the previous iteration,
281 * always send the blk_end_request_all after a prefetch.
282 * It is not okay to not end the request because the
283 * prefetch started the request.
284 */
285 if (req) {
286 /* return -ENXIO to indicate that this queue is
287 * going away
288 */
289 req->errors = -ENXIO;
290 blk_end_request_all(req, -ENXIO);
291 }
292
293 msleep(200); /* allow bsg to possibly finish */
294 spin_lock_irq(q->queue_lock);
295 }
296 bsg_unregister_queue(q);
297}
298EXPORT_SYMBOL_GPL(bsg_remove_queue);
diff --git a/block/bsg.c b/block/bsg.c
index 0c8b64a1648..702f1316bb8 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -182,7 +182,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
182 return -ENOMEM; 182 return -ENOMEM;
183 } 183 }
184 184
185 if (copy_from_user(rq->cmd, (void *)(unsigned long)hdr->request, 185 if (copy_from_user(rq->cmd, (void __user *)(unsigned long)hdr->request,
186 hdr->request_len)) 186 hdr->request_len))
187 return -EFAULT; 187 return -EFAULT;
188 188
@@ -249,7 +249,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
249 struct request *rq, *next_rq = NULL; 249 struct request *rq, *next_rq = NULL;
250 int ret, rw; 250 int ret, rw;
251 unsigned int dxfer_len; 251 unsigned int dxfer_len;
252 void *dxferp = NULL; 252 void __user *dxferp = NULL;
253 struct bsg_class_device *bcd = &q->bsg_dev; 253 struct bsg_class_device *bcd = &q->bsg_dev;
254 254
255 /* if the LLD has been removed then the bsg_unregister_queue will 255 /* if the LLD has been removed then the bsg_unregister_queue will
@@ -291,7 +291,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
291 rq->next_rq = next_rq; 291 rq->next_rq = next_rq;
292 next_rq->cmd_type = rq->cmd_type; 292 next_rq->cmd_type = rq->cmd_type;
293 293
294 dxferp = (void*)(unsigned long)hdr->din_xferp; 294 dxferp = (void __user *)(unsigned long)hdr->din_xferp;
295 ret = blk_rq_map_user(q, next_rq, NULL, dxferp, 295 ret = blk_rq_map_user(q, next_rq, NULL, dxferp,
296 hdr->din_xfer_len, GFP_KERNEL); 296 hdr->din_xfer_len, GFP_KERNEL);
297 if (ret) 297 if (ret)
@@ -300,10 +300,10 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
300 300
301 if (hdr->dout_xfer_len) { 301 if (hdr->dout_xfer_len) {
302 dxfer_len = hdr->dout_xfer_len; 302 dxfer_len = hdr->dout_xfer_len;
303 dxferp = (void*)(unsigned long)hdr->dout_xferp; 303 dxferp = (void __user *)(unsigned long)hdr->dout_xferp;
304 } else if (hdr->din_xfer_len) { 304 } else if (hdr->din_xfer_len) {
305 dxfer_len = hdr->din_xfer_len; 305 dxfer_len = hdr->din_xfer_len;
306 dxferp = (void*)(unsigned long)hdr->din_xferp; 306 dxferp = (void __user *)(unsigned long)hdr->din_xferp;
307 } else 307 } else
308 dxfer_len = 0; 308 dxfer_len = 0;
309 309
@@ -445,7 +445,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
445 int len = min_t(unsigned int, hdr->max_response_len, 445 int len = min_t(unsigned int, hdr->max_response_len,
446 rq->sense_len); 446 rq->sense_len);
447 447
448 ret = copy_to_user((void*)(unsigned long)hdr->response, 448 ret = copy_to_user((void __user *)(unsigned long)hdr->response,
449 rq->sense, len); 449 rq->sense, len);
450 if (!ret) 450 if (!ret)
451 hdr->response_len = len; 451 hdr->response_len = len;
@@ -606,7 +606,7 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
606 ret = __bsg_read(buf, count, bd, NULL, &bytes_read); 606 ret = __bsg_read(buf, count, bd, NULL, &bytes_read);
607 *ppos = bytes_read; 607 *ppos = bytes_read;
608 608
609 if (!bytes_read || (bytes_read && err_block_err(ret))) 609 if (!bytes_read || err_block_err(ret))
610 bytes_read = ret; 610 bytes_read = ret;
611 611
612 return bytes_read; 612 return bytes_read;
@@ -686,7 +686,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
686 /* 686 /*
687 * return bytes written on non-fatal errors 687 * return bytes written on non-fatal errors
688 */ 688 */
689 if (!bytes_written || (bytes_written && err_block_err(ret))) 689 if (!bytes_written || err_block_err(ret))
690 bytes_written = ret; 690 bytes_written = ret;
691 691
692 dprintk("%s: returning %Zd\n", bd->name, bytes_written); 692 dprintk("%s: returning %Zd\n", bd->name, bytes_written);
@@ -878,7 +878,7 @@ static unsigned int bsg_poll(struct file *file, poll_table *wait)
878 spin_lock_irq(&bd->lock); 878 spin_lock_irq(&bd->lock);
879 if (!list_empty(&bd->done_list)) 879 if (!list_empty(&bd->done_list))
880 mask |= POLLIN | POLLRDNORM; 880 mask |= POLLIN | POLLRDNORM;
881 if (bd->queued_cmds >= bd->max_queue) 881 if (bd->queued_cmds < bd->max_queue)
882 mask |= POLLOUT; 882 mask |= POLLOUT;
883 spin_unlock_irq(&bd->lock); 883 spin_unlock_irq(&bd->lock);
884 884
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ae21919f15e..4c12869fcf7 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -87,9 +87,10 @@ struct cfq_rb_root {
87 unsigned count; 87 unsigned count;
88 unsigned total_weight; 88 unsigned total_weight;
89 u64 min_vdisktime; 89 u64 min_vdisktime;
90 struct cfq_ttime ttime;
90}; 91};
91#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ 92#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \
92 .count = 0, .min_vdisktime = 0, } 93 .ttime = {.last_end_request = jiffies,},}
93 94
94/* 95/*
95 * Per process-grouping structure 96 * Per process-grouping structure
@@ -129,14 +130,14 @@ struct cfq_queue {
129 unsigned long slice_end; 130 unsigned long slice_end;
130 long slice_resid; 131 long slice_resid;
131 132
132 /* pending metadata requests */ 133 /* pending priority requests */
133 int meta_pending; 134 int prio_pending;
134 /* number of requests that are on the dispatch list or inside driver */ 135 /* number of requests that are on the dispatch list or inside driver */
135 int dispatched; 136 int dispatched;
136 137
137 /* io prio of this group */ 138 /* io prio of this group */
138 unsigned short ioprio, org_ioprio; 139 unsigned short ioprio, org_ioprio;
139 unsigned short ioprio_class, org_ioprio_class; 140 unsigned short ioprio_class;
140 141
141 pid_t pid; 142 pid_t pid;
142 143
@@ -212,6 +213,7 @@ struct cfq_group {
212#endif 213#endif
213 /* number of requests that are on the dispatch list or inside driver */ 214 /* number of requests that are on the dispatch list or inside driver */
214 int dispatched; 215 int dispatched;
216 struct cfq_ttime ttime;
215}; 217};
216 218
217/* 219/*
@@ -393,6 +395,18 @@ CFQ_CFQQ_FNS(wait_busy);
393 j++, st = i < IDLE_WORKLOAD ? \ 395 j++, st = i < IDLE_WORKLOAD ? \
394 &cfqg->service_trees[i][j]: NULL) \ 396 &cfqg->service_trees[i][j]: NULL) \
395 397
398static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
399 struct cfq_ttime *ttime, bool group_idle)
400{
401 unsigned long slice;
402 if (!sample_valid(ttime->ttime_samples))
403 return false;
404 if (group_idle)
405 slice = cfqd->cfq_group_idle;
406 else
407 slice = cfqd->cfq_slice_idle;
408 return ttime->ttime_mean > slice;
409}
396 410
397static inline bool iops_mode(struct cfq_data *cfqd) 411static inline bool iops_mode(struct cfq_data *cfqd)
398{ 412{
@@ -670,8 +684,8 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
670 if (rq_is_sync(rq1) != rq_is_sync(rq2)) 684 if (rq_is_sync(rq1) != rq_is_sync(rq2))
671 return rq_is_sync(rq1) ? rq1 : rq2; 685 return rq_is_sync(rq1) ? rq1 : rq2;
672 686
673 if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META) 687 if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_PRIO)
674 return rq1->cmd_flags & REQ_META ? rq1 : rq2; 688 return rq1->cmd_flags & REQ_PRIO ? rq1 : rq2;
675 689
676 s1 = blk_rq_pos(rq1); 690 s1 = blk_rq_pos(rq1);
677 s2 = blk_rq_pos(rq2); 691 s2 = blk_rq_pos(rq2);
@@ -1005,8 +1019,8 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
1005 return NULL; 1019 return NULL;
1006} 1020}
1007 1021
1008void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, 1022static void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
1009 unsigned int weight) 1023 unsigned int weight)
1010{ 1024{
1011 struct cfq_group *cfqg = cfqg_of_blkg(blkg); 1025 struct cfq_group *cfqg = cfqg_of_blkg(blkg);
1012 cfqg->new_weight = weight; 1026 cfqg->new_weight = weight;
@@ -1059,6 +1073,8 @@ static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
1059 *st = CFQ_RB_ROOT; 1073 *st = CFQ_RB_ROOT;
1060 RB_CLEAR_NODE(&cfqg->rb_node); 1074 RB_CLEAR_NODE(&cfqg->rb_node);
1061 1075
1076 cfqg->ttime.last_end_request = jiffies;
1077
1062 /* 1078 /*
1063 * Take the initial reference that will be released on destroy 1079 * Take the initial reference that will be released on destroy
1064 * This can be thought of a joint reference by cgroup and 1080 * This can be thought of a joint reference by cgroup and
@@ -1198,6 +1214,9 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
1198 1214
1199 hlist_del_init(&cfqg->cfqd_node); 1215 hlist_del_init(&cfqg->cfqd_node);
1200 1216
1217 BUG_ON(cfqd->nr_blkcg_linked_grps <= 0);
1218 cfqd->nr_blkcg_linked_grps--;
1219
1201 /* 1220 /*
1202 * Put the reference taken at the time of creation so that when all 1221 * Put the reference taken at the time of creation so that when all
1203 * queues are gone, group can be destroyed. 1222 * queues are gone, group can be destroyed.
@@ -1235,7 +1254,7 @@ static void cfq_release_cfq_groups(struct cfq_data *cfqd)
1235 * it should not be NULL as even if elevator was exiting, cgroup deltion 1254 * it should not be NULL as even if elevator was exiting, cgroup deltion
1236 * path got to it first. 1255 * path got to it first.
1237 */ 1256 */
1238void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg) 1257static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
1239{ 1258{
1240 unsigned long flags; 1259 unsigned long flags;
1241 struct cfq_data *cfqd = key; 1260 struct cfq_data *cfqd = key;
@@ -1502,16 +1521,11 @@ static void cfq_add_rq_rb(struct request *rq)
1502{ 1521{
1503 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1522 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1504 struct cfq_data *cfqd = cfqq->cfqd; 1523 struct cfq_data *cfqd = cfqq->cfqd;
1505 struct request *__alias, *prev; 1524 struct request *prev;
1506 1525
1507 cfqq->queued[rq_is_sync(rq)]++; 1526 cfqq->queued[rq_is_sync(rq)]++;
1508 1527
1509 /* 1528 elv_rb_add(&cfqq->sort_list, rq);
1510 * looks a little odd, but the first insert might return an alias.
1511 * if that happens, put the alias on the dispatch list
1512 */
1513 while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL)
1514 cfq_dispatch_insert(cfqd->queue, __alias);
1515 1529
1516 if (!cfq_cfqq_on_rr(cfqq)) 1530 if (!cfq_cfqq_on_rr(cfqq))
1517 cfq_add_cfqq_rr(cfqd, cfqq); 1531 cfq_add_cfqq_rr(cfqd, cfqq);
@@ -1598,9 +1612,9 @@ static void cfq_remove_request(struct request *rq)
1598 cfqq->cfqd->rq_queued--; 1612 cfqq->cfqd->rq_queued--;
1599 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, 1613 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
1600 rq_data_dir(rq), rq_is_sync(rq)); 1614 rq_data_dir(rq), rq_is_sync(rq));
1601 if (rq->cmd_flags & REQ_META) { 1615 if (rq->cmd_flags & REQ_PRIO) {
1602 WARN_ON(!cfqq->meta_pending); 1616 WARN_ON(!cfqq->prio_pending);
1603 cfqq->meta_pending--; 1617 cfqq->prio_pending--;
1604 } 1618 }
1605} 1619}
1606 1620
@@ -1969,7 +1983,8 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1969 * Otherwise, we do only if they are the last ones 1983 * Otherwise, we do only if they are the last ones
1970 * in their service tree. 1984 * in their service tree.
1971 */ 1985 */
1972 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq)) 1986 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq) &&
1987 !cfq_io_thinktime_big(cfqd, &service_tree->ttime, false))
1973 return true; 1988 return true;
1974 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", 1989 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
1975 service_tree->count); 1990 service_tree->count);
@@ -2022,10 +2037,10 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
2022 * slice, then don't idle. This avoids overrunning the allotted 2037 * slice, then don't idle. This avoids overrunning the allotted
2023 * time slice. 2038 * time slice.
2024 */ 2039 */
2025 if (sample_valid(cic->ttime_samples) && 2040 if (sample_valid(cic->ttime.ttime_samples) &&
2026 (cfqq->slice_end - jiffies < cic->ttime_mean)) { 2041 (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) {
2027 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu", 2042 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
2028 cic->ttime_mean); 2043 cic->ttime.ttime_mean);
2029 return; 2044 return;
2030 } 2045 }
2031 2046
@@ -2381,8 +2396,9 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
2381 * this group, wait for requests to complete. 2396 * this group, wait for requests to complete.
2382 */ 2397 */
2383check_group_idle: 2398check_group_idle:
2384 if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 2399 if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 &&
2385 && cfqq->cfqg->dispatched) { 2400 cfqq->cfqg->dispatched &&
2401 !cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) {
2386 cfqq = NULL; 2402 cfqq = NULL;
2387 goto keep_queue; 2403 goto keep_queue;
2388 } 2404 }
@@ -2833,7 +2849,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
2833 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO, 2849 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
2834 cfqd->queue->node); 2850 cfqd->queue->node);
2835 if (cic) { 2851 if (cic) {
2836 cic->last_end_request = jiffies; 2852 cic->ttime.last_end_request = jiffies;
2837 INIT_LIST_HEAD(&cic->queue_list); 2853 INIT_LIST_HEAD(&cic->queue_list);
2838 INIT_HLIST_NODE(&cic->cic_list); 2854 INIT_HLIST_NODE(&cic->cic_list);
2839 cic->dtor = cfq_free_io_context; 2855 cic->dtor = cfq_free_io_context;
@@ -2883,7 +2899,6 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
2883 * elevate the priority of this queue 2899 * elevate the priority of this queue
2884 */ 2900 */
2885 cfqq->org_ioprio = cfqq->ioprio; 2901 cfqq->org_ioprio = cfqq->ioprio;
2886 cfqq->org_ioprio_class = cfqq->ioprio_class;
2887 cfq_clear_cfqq_prio_changed(cfqq); 2902 cfq_clear_cfqq_prio_changed(cfqq);
2888} 2903}
2889 2904
@@ -3169,7 +3184,7 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
3169 } 3184 }
3170 } 3185 }
3171 3186
3172 if (ret) 3187 if (ret && ret != -EEXIST)
3173 printk(KERN_ERR "cfq: cic link failed!\n"); 3188 printk(KERN_ERR "cfq: cic link failed!\n");
3174 3189
3175 return ret; 3190 return ret;
@@ -3185,6 +3200,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
3185{ 3200{
3186 struct io_context *ioc = NULL; 3201 struct io_context *ioc = NULL;
3187 struct cfq_io_context *cic; 3202 struct cfq_io_context *cic;
3203 int ret;
3188 3204
3189 might_sleep_if(gfp_mask & __GFP_WAIT); 3205 might_sleep_if(gfp_mask & __GFP_WAIT);
3190 3206
@@ -3192,6 +3208,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
3192 if (!ioc) 3208 if (!ioc)
3193 return NULL; 3209 return NULL;
3194 3210
3211retry:
3195 cic = cfq_cic_lookup(cfqd, ioc); 3212 cic = cfq_cic_lookup(cfqd, ioc);
3196 if (cic) 3213 if (cic)
3197 goto out; 3214 goto out;
@@ -3200,7 +3217,12 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
3200 if (cic == NULL) 3217 if (cic == NULL)
3201 goto err; 3218 goto err;
3202 3219
3203 if (cfq_cic_link(cfqd, ioc, cic, gfp_mask)) 3220 ret = cfq_cic_link(cfqd, ioc, cic, gfp_mask);
3221 if (ret == -EEXIST) {
3222 /* someone has linked cic to ioc already */
3223 cfq_cic_free(cic);
3224 goto retry;
3225 } else if (ret)
3204 goto err_free; 3226 goto err_free;
3205 3227
3206out: 3228out:
@@ -3221,14 +3243,28 @@ err:
3221} 3243}
3222 3244
3223static void 3245static void
3224cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) 3246__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
3225{ 3247{
3226 unsigned long elapsed = jiffies - cic->last_end_request; 3248 unsigned long elapsed = jiffies - ttime->last_end_request;
3227 unsigned long ttime = min(elapsed, 2UL * cfqd->cfq_slice_idle); 3249 elapsed = min(elapsed, 2UL * slice_idle);
3228 3250
3229 cic->ttime_samples = (7*cic->ttime_samples + 256) / 8; 3251 ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
3230 cic->ttime_total = (7*cic->ttime_total + 256*ttime) / 8; 3252 ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8;
3231 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; 3253 ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples;
3254}
3255
3256static void
3257cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3258 struct cfq_io_context *cic)
3259{
3260 if (cfq_cfqq_sync(cfqq)) {
3261 __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle);
3262 __cfq_update_io_thinktime(&cfqq->service_tree->ttime,
3263 cfqd->cfq_slice_idle);
3264 }
3265#ifdef CONFIG_CFQ_GROUP_IOSCHED
3266 __cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle);
3267#endif
3232} 3268}
3233 3269
3234static void 3270static void
@@ -3277,8 +3313,8 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3277 else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || 3313 else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
3278 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) 3314 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
3279 enable_idle = 0; 3315 enable_idle = 0;
3280 else if (sample_valid(cic->ttime_samples)) { 3316 else if (sample_valid(cic->ttime.ttime_samples)) {
3281 if (cic->ttime_mean > cfqd->cfq_slice_idle) 3317 if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle)
3282 enable_idle = 0; 3318 enable_idle = 0;
3283 else 3319 else
3284 enable_idle = 1; 3320 enable_idle = 1;
@@ -3343,7 +3379,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
3343 * So both queues are sync. Let the new request get disk time if 3379 * So both queues are sync. Let the new request get disk time if
3344 * it's a metadata request and the current queue is doing regular IO. 3380 * it's a metadata request and the current queue is doing regular IO.
3345 */ 3381 */
3346 if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending) 3382 if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending)
3347 return true; 3383 return true;
3348 3384
3349 /* 3385 /*
@@ -3410,10 +3446,10 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3410 struct cfq_io_context *cic = RQ_CIC(rq); 3446 struct cfq_io_context *cic = RQ_CIC(rq);
3411 3447
3412 cfqd->rq_queued++; 3448 cfqd->rq_queued++;
3413 if (rq->cmd_flags & REQ_META) 3449 if (rq->cmd_flags & REQ_PRIO)
3414 cfqq->meta_pending++; 3450 cfqq->prio_pending++;
3415 3451
3416 cfq_update_io_thinktime(cfqd, cic); 3452 cfq_update_io_thinktime(cfqd, cfqq, cic);
3417 cfq_update_io_seektime(cfqd, cfqq, rq); 3453 cfq_update_io_seektime(cfqd, cfqq, rq);
3418 cfq_update_idle_window(cfqd, cfqq, cic); 3454 cfq_update_idle_window(cfqd, cfqq, cic);
3419 3455
@@ -3520,12 +3556,16 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3520 if (cfqq->cfqg->nr_cfqq > 1) 3556 if (cfqq->cfqg->nr_cfqq > 1)
3521 return false; 3557 return false;
3522 3558
3559 /* the only queue in the group, but think time is big */
3560 if (cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true))
3561 return false;
3562
3523 if (cfq_slice_used(cfqq)) 3563 if (cfq_slice_used(cfqq))
3524 return true; 3564 return true;
3525 3565
3526 /* if slice left is less than think time, wait busy */ 3566 /* if slice left is less than think time, wait busy */
3527 if (cic && sample_valid(cic->ttime_samples) 3567 if (cic && sample_valid(cic->ttime.ttime_samples)
3528 && (cfqq->slice_end - jiffies < cic->ttime_mean)) 3568 && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean))
3529 return true; 3569 return true;
3530 3570
3531 /* 3571 /*
@@ -3566,11 +3606,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
3566 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; 3606 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
3567 3607
3568 if (sync) { 3608 if (sync) {
3569 RQ_CIC(rq)->last_end_request = now; 3609 struct cfq_rb_root *service_tree;
3610
3611 RQ_CIC(rq)->ttime.last_end_request = now;
3612
3613 if (cfq_cfqq_on_rr(cfqq))
3614 service_tree = cfqq->service_tree;
3615 else
3616 service_tree = service_tree_for(cfqq->cfqg,
3617 cfqq_prio(cfqq), cfqq_type(cfqq));
3618 service_tree->ttime.last_end_request = now;
3570 if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now)) 3619 if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
3571 cfqd->last_delayed_sync = now; 3620 cfqd->last_delayed_sync = now;
3572 } 3621 }
3573 3622
3623#ifdef CONFIG_CFQ_GROUP_IOSCHED
3624 cfqq->cfqg->ttime.last_end_request = now;
3625#endif
3626
3574 /* 3627 /*
3575 * If this is the active queue, check if it needs to be expired, 3628 * If this is the active queue, check if it needs to be expired,
3576 * or if we want to idle in case it has no pending requests. 3629 * or if we want to idle in case it has no pending requests.
@@ -3616,30 +3669,6 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
3616 cfq_schedule_dispatch(cfqd); 3669 cfq_schedule_dispatch(cfqd);
3617} 3670}
3618 3671
3619/*
3620 * we temporarily boost lower priority queues if they are holding fs exclusive
3621 * resources. they are boosted to normal prio (CLASS_BE/4)
3622 */
3623static void cfq_prio_boost(struct cfq_queue *cfqq)
3624{
3625 if (has_fs_excl()) {
3626 /*
3627 * boost idle prio on transactions that would lock out other
3628 * users of the filesystem
3629 */
3630 if (cfq_class_idle(cfqq))
3631 cfqq->ioprio_class = IOPRIO_CLASS_BE;
3632 if (cfqq->ioprio > IOPRIO_NORM)
3633 cfqq->ioprio = IOPRIO_NORM;
3634 } else {
3635 /*
3636 * unboost the queue (if needed)
3637 */
3638 cfqq->ioprio_class = cfqq->org_ioprio_class;
3639 cfqq->ioprio = cfqq->org_ioprio;
3640 }
3641}
3642
3643static inline int __cfq_may_queue(struct cfq_queue *cfqq) 3672static inline int __cfq_may_queue(struct cfq_queue *cfqq)
3644{ 3673{
3645 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) { 3674 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -3670,7 +3699,6 @@ static int cfq_may_queue(struct request_queue *q, int rw)
3670 cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); 3699 cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
3671 if (cfqq) { 3700 if (cfqq) {
3672 cfq_init_prio_data(cfqq, cic->ioc); 3701 cfq_init_prio_data(cfqq, cic->ioc);
3673 cfq_prio_boost(cfqq);
3674 3702
3675 return __cfq_may_queue(cfqq); 3703 return __cfq_may_queue(cfqq);
3676 } 3704 }
@@ -4015,6 +4043,11 @@ static void *cfq_init_queue(struct request_queue *q)
4015 4043
4016 if (blkio_alloc_blkg_stats(&cfqg->blkg)) { 4044 if (blkio_alloc_blkg_stats(&cfqg->blkg)) {
4017 kfree(cfqg); 4045 kfree(cfqg);
4046
4047 spin_lock(&cic_index_lock);
4048 ida_remove(&cic_index_ida, cfqd->cic_index);
4049 spin_unlock(&cic_index_lock);
4050
4018 kfree(cfqd); 4051 kfree(cfqd);
4019 return NULL; 4052 return NULL;
4020 } 4053 }
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index cc3eb78e333..7b725020823 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -208,19 +208,6 @@ static int compat_blkpg_ioctl(struct block_device *bdev, fmode_t mode,
208#define BLKBSZSET_32 _IOW(0x12, 113, int) 208#define BLKBSZSET_32 _IOW(0x12, 113, int)
209#define BLKGETSIZE64_32 _IOR(0x12, 114, int) 209#define BLKGETSIZE64_32 _IOR(0x12, 114, int)
210 210
211struct compat_floppy_struct {
212 compat_uint_t size;
213 compat_uint_t sect;
214 compat_uint_t head;
215 compat_uint_t track;
216 compat_uint_t stretch;
217 unsigned char gap;
218 unsigned char rate;
219 unsigned char spec1;
220 unsigned char fmt_gap;
221 const compat_caddr_t name;
222};
223
224struct compat_floppy_drive_params { 211struct compat_floppy_drive_params {
225 char cmos; 212 char cmos;
226 compat_ulong_t max_dtr; 213 compat_ulong_t max_dtr;
@@ -288,7 +275,6 @@ struct compat_floppy_write_errors {
288 275
289#define FDSETPRM32 _IOW(2, 0x42, struct compat_floppy_struct) 276#define FDSETPRM32 _IOW(2, 0x42, struct compat_floppy_struct)
290#define FDDEFPRM32 _IOW(2, 0x43, struct compat_floppy_struct) 277#define FDDEFPRM32 _IOW(2, 0x43, struct compat_floppy_struct)
291#define FDGETPRM32 _IOR(2, 0x04, struct compat_floppy_struct)
292#define FDSETDRVPRM32 _IOW(2, 0x90, struct compat_floppy_drive_params) 278#define FDSETDRVPRM32 _IOW(2, 0x90, struct compat_floppy_drive_params)
293#define FDGETDRVPRM32 _IOR(2, 0x11, struct compat_floppy_drive_params) 279#define FDGETDRVPRM32 _IOR(2, 0x11, struct compat_floppy_drive_params)
294#define FDGETDRVSTAT32 _IOR(2, 0x12, struct compat_floppy_drive_struct) 280#define FDGETDRVSTAT32 _IOR(2, 0x12, struct compat_floppy_drive_struct)
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 5139c0ea186..c644137d9cd 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -77,10 +77,8 @@ static void
77deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) 77deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
78{ 78{
79 struct rb_root *root = deadline_rb_root(dd, rq); 79 struct rb_root *root = deadline_rb_root(dd, rq);
80 struct request *__alias;
81 80
82 while (unlikely(__alias = elv_rb_add(root, rq))) 81 elv_rb_add(root, rq);
83 deadline_move_request(dd, __alias);
84} 82}
85 83
86static inline void 84static inline void
diff --git a/block/elevator.c b/block/elevator.c
index b0b38ce0dcb..a3b64bc71d8 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -353,7 +353,7 @@ static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
353 * RB-tree support functions for inserting/lookup/removal of requests 353 * RB-tree support functions for inserting/lookup/removal of requests
354 * in a sorted RB tree. 354 * in a sorted RB tree.
355 */ 355 */
356struct request *elv_rb_add(struct rb_root *root, struct request *rq) 356void elv_rb_add(struct rb_root *root, struct request *rq)
357{ 357{
358 struct rb_node **p = &root->rb_node; 358 struct rb_node **p = &root->rb_node;
359 struct rb_node *parent = NULL; 359 struct rb_node *parent = NULL;
@@ -365,15 +365,12 @@ struct request *elv_rb_add(struct rb_root *root, struct request *rq)
365 365
366 if (blk_rq_pos(rq) < blk_rq_pos(__rq)) 366 if (blk_rq_pos(rq) < blk_rq_pos(__rq))
367 p = &(*p)->rb_left; 367 p = &(*p)->rb_left;
368 else if (blk_rq_pos(rq) > blk_rq_pos(__rq)) 368 else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
369 p = &(*p)->rb_right; 369 p = &(*p)->rb_right;
370 else
371 return __rq;
372 } 370 }
373 371
374 rb_link_node(&rq->rb_node, parent, p); 372 rb_link_node(&rq->rb_node, parent, p);
375 rb_insert_color(&rq->rb_node, root); 373 rb_insert_color(&rq->rb_node, root);
376 return NULL;
377} 374}
378EXPORT_SYMBOL(elv_rb_add); 375EXPORT_SYMBOL(elv_rb_add);
379 376
diff --git a/block/genhd.c b/block/genhd.c
index 3608289c8ec..d3834710b95 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -602,7 +602,7 @@ void add_disk(struct gendisk *disk)
602 disk->major = MAJOR(devt); 602 disk->major = MAJOR(devt);
603 disk->first_minor = MINOR(devt); 603 disk->first_minor = MINOR(devt);
604 604
605 /* Register BDI before referencing it from bdev */ 605 /* Register BDI before referencing it from bdev */
606 bdi = &disk->queue->backing_dev_info; 606 bdi = &disk->queue->backing_dev_info;
607 bdi_register_dev(bdi, disk_devt(disk)); 607 bdi_register_dev(bdi, disk_devt(disk));
608 608
@@ -611,6 +611,12 @@ void add_disk(struct gendisk *disk)
611 register_disk(disk); 611 register_disk(disk);
612 blk_register_queue(disk); 612 blk_register_queue(disk);
613 613
614 /*
615 * Take an extra ref on queue which will be put on disk_release()
616 * so that it sticks around as long as @disk is there.
617 */
618 WARN_ON_ONCE(blk_get_queue(disk->queue));
619
614 retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, 620 retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
615 "bdi"); 621 "bdi");
616 WARN_ON(retval); 622 WARN_ON(retval);
@@ -1018,14 +1024,6 @@ static const struct attribute_group *disk_attr_groups[] = {
1018 NULL 1024 NULL
1019}; 1025};
1020 1026
1021static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
1022{
1023 struct disk_part_tbl *ptbl =
1024 container_of(head, struct disk_part_tbl, rcu_head);
1025
1026 kfree(ptbl);
1027}
1028
1029/** 1027/**
1030 * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way 1028 * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
1031 * @disk: disk to replace part_tbl for 1029 * @disk: disk to replace part_tbl for
@@ -1046,7 +1044,7 @@ static void disk_replace_part_tbl(struct gendisk *disk,
1046 1044
1047 if (old_ptbl) { 1045 if (old_ptbl) {
1048 rcu_assign_pointer(old_ptbl->last_lookup, NULL); 1046 rcu_assign_pointer(old_ptbl->last_lookup, NULL);
1049 call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb); 1047 kfree_rcu(old_ptbl, rcu_head);
1050 } 1048 }
1051} 1049}
1052 1050
@@ -1103,8 +1101,26 @@ static void disk_release(struct device *dev)
1103 disk_replace_part_tbl(disk, NULL); 1101 disk_replace_part_tbl(disk, NULL);
1104 free_part_stats(&disk->part0); 1102 free_part_stats(&disk->part0);
1105 free_part_info(&disk->part0); 1103 free_part_info(&disk->part0);
1104 if (disk->queue)
1105 blk_put_queue(disk->queue);
1106 kfree(disk); 1106 kfree(disk);
1107} 1107}
1108
1109static int disk_uevent(struct device *dev, struct kobj_uevent_env *env)
1110{
1111 struct gendisk *disk = dev_to_disk(dev);
1112 struct disk_part_iter piter;
1113 struct hd_struct *part;
1114 int cnt = 0;
1115
1116 disk_part_iter_init(&piter, disk, 0);
1117 while((part = disk_part_iter_next(&piter)))
1118 cnt++;
1119 disk_part_iter_exit(&piter);
1120 add_uevent_var(env, "NPARTS=%u", cnt);
1121 return 0;
1122}
1123
1108struct class block_class = { 1124struct class block_class = {
1109 .name = "block", 1125 .name = "block",
1110}; 1126};
@@ -1123,6 +1139,7 @@ static struct device_type disk_type = {
1123 .groups = disk_attr_groups, 1139 .groups = disk_attr_groups,
1124 .release = disk_release, 1140 .release = disk_release,
1125 .devnode = block_devnode, 1141 .devnode = block_devnode,
1142 .uevent = disk_uevent,
1126}; 1143};
1127 1144
1128#ifdef CONFIG_PROC_FS 1145#ifdef CONFIG_PROC_FS
@@ -1148,23 +1165,23 @@ static int diskstats_show(struct seq_file *seqf, void *v)
1148 "wsect wuse running use aveq" 1165 "wsect wuse running use aveq"
1149 "\n\n"); 1166 "\n\n");
1150 */ 1167 */
1151 1168
1152 disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); 1169 disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
1153 while ((hd = disk_part_iter_next(&piter))) { 1170 while ((hd = disk_part_iter_next(&piter))) {
1154 cpu = part_stat_lock(); 1171 cpu = part_stat_lock();
1155 part_round_stats(cpu, hd); 1172 part_round_stats(cpu, hd);
1156 part_stat_unlock(); 1173 part_stat_unlock();
1157 seq_printf(seqf, "%4d %7d %s %lu %lu %llu " 1174 seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
1158 "%u %lu %lu %llu %u %u %u %u\n", 1175 "%u %lu %lu %lu %u %u %u %u\n",
1159 MAJOR(part_devt(hd)), MINOR(part_devt(hd)), 1176 MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
1160 disk_name(gp, hd->partno, buf), 1177 disk_name(gp, hd->partno, buf),
1161 part_stat_read(hd, ios[READ]), 1178 part_stat_read(hd, ios[READ]),
1162 part_stat_read(hd, merges[READ]), 1179 part_stat_read(hd, merges[READ]),
1163 (unsigned long long)part_stat_read(hd, sectors[READ]), 1180 part_stat_read(hd, sectors[READ]),
1164 jiffies_to_msecs(part_stat_read(hd, ticks[READ])), 1181 jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
1165 part_stat_read(hd, ios[WRITE]), 1182 part_stat_read(hd, ios[WRITE]),
1166 part_stat_read(hd, merges[WRITE]), 1183 part_stat_read(hd, merges[WRITE]),
1167 (unsigned long long)part_stat_read(hd, sectors[WRITE]), 1184 part_stat_read(hd, sectors[WRITE]),
1168 jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), 1185 jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
1169 part_in_flight(hd), 1186 part_in_flight(hd),
1170 jiffies_to_msecs(part_stat_read(hd, io_ticks)), 1187 jiffies_to_msecs(part_stat_read(hd, io_ticks)),
@@ -1172,7 +1189,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
1172 ); 1189 );
1173 } 1190 }
1174 disk_part_iter_exit(&piter); 1191 disk_part_iter_exit(&piter);
1175 1192
1176 return 0; 1193 return 0;
1177} 1194}
1178 1195
@@ -1500,30 +1517,32 @@ void disk_unblock_events(struct gendisk *disk)
1500} 1517}
1501 1518
1502/** 1519/**
1503 * disk_check_events - schedule immediate event checking 1520 * disk_flush_events - schedule immediate event checking and flushing
1504 * @disk: disk to check events for 1521 * @disk: disk to check and flush events for
1522 * @mask: events to flush
1505 * 1523 *
1506 * Schedule immediate event checking on @disk if not blocked. 1524 * Schedule immediate event checking on @disk if not blocked. Events in
1525 * @mask are scheduled to be cleared from the driver. Note that this
1526 * doesn't clear the events from @disk->ev.
1507 * 1527 *
1508 * CONTEXT: 1528 * CONTEXT:
1509 * Don't care. Safe to call from irq context. 1529 * If @mask is non-zero must be called with bdev->bd_mutex held.
1510 */ 1530 */
1511void disk_check_events(struct gendisk *disk) 1531void disk_flush_events(struct gendisk *disk, unsigned int mask)
1512{ 1532{
1513 struct disk_events *ev = disk->ev; 1533 struct disk_events *ev = disk->ev;
1514 unsigned long flags;
1515 1534
1516 if (!ev) 1535 if (!ev)
1517 return; 1536 return;
1518 1537
1519 spin_lock_irqsave(&ev->lock, flags); 1538 spin_lock_irq(&ev->lock);
1539 ev->clearing |= mask;
1520 if (!ev->block) { 1540 if (!ev->block) {
1521 cancel_delayed_work(&ev->dwork); 1541 cancel_delayed_work(&ev->dwork);
1522 queue_delayed_work(system_nrt_wq, &ev->dwork, 0); 1542 queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
1523 } 1543 }
1524 spin_unlock_irqrestore(&ev->lock, flags); 1544 spin_unlock_irq(&ev->lock);
1525} 1545}
1526EXPORT_SYMBOL_GPL(disk_check_events);
1527 1546
1528/** 1547/**
1529 * disk_clear_events - synchronously check, clear and return pending events 1548 * disk_clear_events - synchronously check, clear and return pending events
@@ -1713,7 +1732,7 @@ static int disk_events_set_dfl_poll_msecs(const char *val,
1713 mutex_lock(&disk_events_mutex); 1732 mutex_lock(&disk_events_mutex);
1714 1733
1715 list_for_each_entry(ev, &disk_events, node) 1734 list_for_each_entry(ev, &disk_events, node)
1716 disk_check_events(ev->disk); 1735 disk_flush_events(ev->disk, 0);
1717 1736
1718 mutex_unlock(&disk_events_mutex); 1737 mutex_unlock(&disk_events_mutex);
1719 1738