aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/as-iosched.c116
-rw-r--r--block/blk-barrier.c3
-rw-r--r--block/blk-core.c6
-rw-r--r--block/blk-merge.c5
-rw-r--r--block/blk-settings.c20
-rw-r--r--block/blk-sysfs.c4
-rw-r--r--block/blk-timeout.c13
-rw-r--r--block/blk.h11
-rw-r--r--block/cfq-iosched.c300
-rw-r--r--block/elevator.c8
-rw-r--r--block/genhd.c12
-rw-r--r--block/ioctl.c2
-rw-r--r--block/scsi_ioctl.c19
13 files changed, 370 insertions, 149 deletions
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 631f6f44460a..c48fa670d221 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -17,9 +17,6 @@
17#include <linux/rbtree.h> 17#include <linux/rbtree.h>
18#include <linux/interrupt.h> 18#include <linux/interrupt.h>
19 19
20#define REQ_SYNC 1
21#define REQ_ASYNC 0
22
23/* 20/*
24 * See Documentation/block/as-iosched.txt 21 * See Documentation/block/as-iosched.txt
25 */ 22 */
@@ -93,7 +90,7 @@ struct as_data {
93 struct list_head fifo_list[2]; 90 struct list_head fifo_list[2];
94 91
95 struct request *next_rq[2]; /* next in sort order */ 92 struct request *next_rq[2]; /* next in sort order */
96 sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */ 93 sector_t last_sector[2]; /* last SYNC & ASYNC sectors */
97 94
98 unsigned long exit_prob; /* probability a task will exit while 95 unsigned long exit_prob; /* probability a task will exit while
99 being waited on */ 96 being waited on */
@@ -109,7 +106,7 @@ struct as_data {
109 unsigned long last_check_fifo[2]; 106 unsigned long last_check_fifo[2];
110 int changed_batch; /* 1: waiting for old batch to end */ 107 int changed_batch; /* 1: waiting for old batch to end */
111 int new_batch; /* 1: waiting on first read complete */ 108 int new_batch; /* 1: waiting on first read complete */
112 int batch_data_dir; /* current batch REQ_SYNC / REQ_ASYNC */ 109 int batch_data_dir; /* current batch SYNC / ASYNC */
113 int write_batch_count; /* max # of reqs in a write batch */ 110 int write_batch_count; /* max # of reqs in a write batch */
114 int current_write_count; /* how many requests left this batch */ 111 int current_write_count; /* how many requests left this batch */
115 int write_batch_idled; /* has the write batch gone idle? */ 112 int write_batch_idled; /* has the write batch gone idle? */
@@ -554,7 +551,7 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
554 if (aic == NULL) 551 if (aic == NULL)
555 return; 552 return;
556 553
557 if (data_dir == REQ_SYNC) { 554 if (data_dir == BLK_RW_SYNC) {
558 unsigned long in_flight = atomic_read(&aic->nr_queued) 555 unsigned long in_flight = atomic_read(&aic->nr_queued)
559 + atomic_read(&aic->nr_dispatched); 556 + atomic_read(&aic->nr_dispatched);
560 spin_lock(&aic->lock); 557 spin_lock(&aic->lock);
@@ -811,7 +808,7 @@ static void as_update_rq(struct as_data *ad, struct request *rq)
811 */ 808 */
812static void update_write_batch(struct as_data *ad) 809static void update_write_batch(struct as_data *ad)
813{ 810{
814 unsigned long batch = ad->batch_expire[REQ_ASYNC]; 811 unsigned long batch = ad->batch_expire[BLK_RW_ASYNC];
815 long write_time; 812 long write_time;
816 813
817 write_time = (jiffies - ad->current_batch_expires) + batch; 814 write_time = (jiffies - ad->current_batch_expires) + batch;
@@ -855,7 +852,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
855 kblockd_schedule_work(q, &ad->antic_work); 852 kblockd_schedule_work(q, &ad->antic_work);
856 ad->changed_batch = 0; 853 ad->changed_batch = 0;
857 854
858 if (ad->batch_data_dir == REQ_SYNC) 855 if (ad->batch_data_dir == BLK_RW_SYNC)
859 ad->new_batch = 1; 856 ad->new_batch = 1;
860 } 857 }
861 WARN_ON(ad->nr_dispatched == 0); 858 WARN_ON(ad->nr_dispatched == 0);
@@ -869,7 +866,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
869 if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) { 866 if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) {
870 update_write_batch(ad); 867 update_write_batch(ad);
871 ad->current_batch_expires = jiffies + 868 ad->current_batch_expires = jiffies +
872 ad->batch_expire[REQ_SYNC]; 869 ad->batch_expire[BLK_RW_SYNC];
873 ad->new_batch = 0; 870 ad->new_batch = 0;
874 } 871 }
875 872
@@ -960,7 +957,7 @@ static inline int as_batch_expired(struct as_data *ad)
960 if (ad->changed_batch || ad->new_batch) 957 if (ad->changed_batch || ad->new_batch)
961 return 0; 958 return 0;
962 959
963 if (ad->batch_data_dir == REQ_SYNC) 960 if (ad->batch_data_dir == BLK_RW_SYNC)
964 /* TODO! add a check so a complete fifo gets written? */ 961 /* TODO! add a check so a complete fifo gets written? */
965 return time_after(jiffies, ad->current_batch_expires); 962 return time_after(jiffies, ad->current_batch_expires);
966 963
@@ -986,7 +983,7 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
986 */ 983 */
987 ad->last_sector[data_dir] = rq->sector + rq->nr_sectors; 984 ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
988 985
989 if (data_dir == REQ_SYNC) { 986 if (data_dir == BLK_RW_SYNC) {
990 struct io_context *ioc = RQ_IOC(rq); 987 struct io_context *ioc = RQ_IOC(rq);
991 /* In case we have to anticipate after this */ 988 /* In case we have to anticipate after this */
992 copy_io_context(&ad->io_context, &ioc); 989 copy_io_context(&ad->io_context, &ioc);
@@ -1025,41 +1022,41 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
1025static int as_dispatch_request(struct request_queue *q, int force) 1022static int as_dispatch_request(struct request_queue *q, int force)
1026{ 1023{
1027 struct as_data *ad = q->elevator->elevator_data; 1024 struct as_data *ad = q->elevator->elevator_data;
1028 const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]); 1025 const int reads = !list_empty(&ad->fifo_list[BLK_RW_SYNC]);
1029 const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]); 1026 const int writes = !list_empty(&ad->fifo_list[BLK_RW_ASYNC]);
1030 struct request *rq; 1027 struct request *rq;
1031 1028
1032 if (unlikely(force)) { 1029 if (unlikely(force)) {
1033 /* 1030 /*
1034 * Forced dispatch, accounting is useless. Reset 1031 * Forced dispatch, accounting is useless. Reset
1035 * accounting states and dump fifo_lists. Note that 1032 * accounting states and dump fifo_lists. Note that
1036 * batch_data_dir is reset to REQ_SYNC to avoid 1033 * batch_data_dir is reset to BLK_RW_SYNC to avoid
1037 * screwing write batch accounting as write batch 1034 * screwing write batch accounting as write batch
1038 * accounting occurs on W->R transition. 1035 * accounting occurs on W->R transition.
1039 */ 1036 */
1040 int dispatched = 0; 1037 int dispatched = 0;
1041 1038
1042 ad->batch_data_dir = REQ_SYNC; 1039 ad->batch_data_dir = BLK_RW_SYNC;
1043 ad->changed_batch = 0; 1040 ad->changed_batch = 0;
1044 ad->new_batch = 0; 1041 ad->new_batch = 0;
1045 1042
1046 while (ad->next_rq[REQ_SYNC]) { 1043 while (ad->next_rq[BLK_RW_SYNC]) {
1047 as_move_to_dispatch(ad, ad->next_rq[REQ_SYNC]); 1044 as_move_to_dispatch(ad, ad->next_rq[BLK_RW_SYNC]);
1048 dispatched++; 1045 dispatched++;
1049 } 1046 }
1050 ad->last_check_fifo[REQ_SYNC] = jiffies; 1047 ad->last_check_fifo[BLK_RW_SYNC] = jiffies;
1051 1048
1052 while (ad->next_rq[REQ_ASYNC]) { 1049 while (ad->next_rq[BLK_RW_ASYNC]) {
1053 as_move_to_dispatch(ad, ad->next_rq[REQ_ASYNC]); 1050 as_move_to_dispatch(ad, ad->next_rq[BLK_RW_ASYNC]);
1054 dispatched++; 1051 dispatched++;
1055 } 1052 }
1056 ad->last_check_fifo[REQ_ASYNC] = jiffies; 1053 ad->last_check_fifo[BLK_RW_ASYNC] = jiffies;
1057 1054
1058 return dispatched; 1055 return dispatched;
1059 } 1056 }
1060 1057
1061 /* Signal that the write batch was uncontended, so we can't time it */ 1058 /* Signal that the write batch was uncontended, so we can't time it */
1062 if (ad->batch_data_dir == REQ_ASYNC && !reads) { 1059 if (ad->batch_data_dir == BLK_RW_ASYNC && !reads) {
1063 if (ad->current_write_count == 0 || !writes) 1060 if (ad->current_write_count == 0 || !writes)
1064 ad->write_batch_idled = 1; 1061 ad->write_batch_idled = 1;
1065 } 1062 }
@@ -1076,8 +1073,8 @@ static int as_dispatch_request(struct request_queue *q, int force)
1076 */ 1073 */
1077 rq = ad->next_rq[ad->batch_data_dir]; 1074 rq = ad->next_rq[ad->batch_data_dir];
1078 1075
1079 if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) { 1076 if (ad->batch_data_dir == BLK_RW_SYNC && ad->antic_expire) {
1080 if (as_fifo_expired(ad, REQ_SYNC)) 1077 if (as_fifo_expired(ad, BLK_RW_SYNC))
1081 goto fifo_expired; 1078 goto fifo_expired;
1082 1079
1083 if (as_can_anticipate(ad, rq)) { 1080 if (as_can_anticipate(ad, rq)) {
@@ -1090,7 +1087,7 @@ static int as_dispatch_request(struct request_queue *q, int force)
1090 /* we have a "next request" */ 1087 /* we have a "next request" */
1091 if (reads && !writes) 1088 if (reads && !writes)
1092 ad->current_batch_expires = 1089 ad->current_batch_expires =
1093 jiffies + ad->batch_expire[REQ_SYNC]; 1090 jiffies + ad->batch_expire[BLK_RW_SYNC];
1094 goto dispatch_request; 1091 goto dispatch_request;
1095 } 1092 }
1096 } 1093 }
@@ -1101,20 +1098,20 @@ static int as_dispatch_request(struct request_queue *q, int force)
1101 */ 1098 */
1102 1099
1103 if (reads) { 1100 if (reads) {
1104 BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[REQ_SYNC])); 1101 BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[BLK_RW_SYNC]));
1105 1102
1106 if (writes && ad->batch_data_dir == REQ_SYNC) 1103 if (writes && ad->batch_data_dir == BLK_RW_SYNC)
1107 /* 1104 /*
1108 * Last batch was a read, switch to writes 1105 * Last batch was a read, switch to writes
1109 */ 1106 */
1110 goto dispatch_writes; 1107 goto dispatch_writes;
1111 1108
1112 if (ad->batch_data_dir == REQ_ASYNC) { 1109 if (ad->batch_data_dir == BLK_RW_ASYNC) {
1113 WARN_ON(ad->new_batch); 1110 WARN_ON(ad->new_batch);
1114 ad->changed_batch = 1; 1111 ad->changed_batch = 1;
1115 } 1112 }
1116 ad->batch_data_dir = REQ_SYNC; 1113 ad->batch_data_dir = BLK_RW_SYNC;
1117 rq = rq_entry_fifo(ad->fifo_list[REQ_SYNC].next); 1114 rq = rq_entry_fifo(ad->fifo_list[BLK_RW_SYNC].next);
1118 ad->last_check_fifo[ad->batch_data_dir] = jiffies; 1115 ad->last_check_fifo[ad->batch_data_dir] = jiffies;
1119 goto dispatch_request; 1116 goto dispatch_request;
1120 } 1117 }
@@ -1125,9 +1122,9 @@ static int as_dispatch_request(struct request_queue *q, int force)
1125 1122
1126 if (writes) { 1123 if (writes) {
1127dispatch_writes: 1124dispatch_writes:
1128 BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[REQ_ASYNC])); 1125 BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[BLK_RW_ASYNC]));
1129 1126
1130 if (ad->batch_data_dir == REQ_SYNC) { 1127 if (ad->batch_data_dir == BLK_RW_SYNC) {
1131 ad->changed_batch = 1; 1128 ad->changed_batch = 1;
1132 1129
1133 /* 1130 /*
@@ -1137,11 +1134,11 @@ dispatch_writes:
1137 */ 1134 */
1138 ad->new_batch = 0; 1135 ad->new_batch = 0;
1139 } 1136 }
1140 ad->batch_data_dir = REQ_ASYNC; 1137 ad->batch_data_dir = BLK_RW_ASYNC;
1141 ad->current_write_count = ad->write_batch_count; 1138 ad->current_write_count = ad->write_batch_count;
1142 ad->write_batch_idled = 0; 1139 ad->write_batch_idled = 0;
1143 rq = rq_entry_fifo(ad->fifo_list[REQ_ASYNC].next); 1140 rq = rq_entry_fifo(ad->fifo_list[BLK_RW_ASYNC].next);
1144 ad->last_check_fifo[REQ_ASYNC] = jiffies; 1141 ad->last_check_fifo[BLK_RW_ASYNC] = jiffies;
1145 goto dispatch_request; 1142 goto dispatch_request;
1146 } 1143 }
1147 1144
@@ -1164,9 +1161,9 @@ fifo_expired:
1164 if (ad->nr_dispatched) 1161 if (ad->nr_dispatched)
1165 return 0; 1162 return 0;
1166 1163
1167 if (ad->batch_data_dir == REQ_ASYNC) 1164 if (ad->batch_data_dir == BLK_RW_ASYNC)
1168 ad->current_batch_expires = jiffies + 1165 ad->current_batch_expires = jiffies +
1169 ad->batch_expire[REQ_ASYNC]; 1166 ad->batch_expire[BLK_RW_ASYNC];
1170 else 1167 else
1171 ad->new_batch = 1; 1168 ad->new_batch = 1;
1172 1169
@@ -1238,8 +1235,8 @@ static int as_queue_empty(struct request_queue *q)
1238{ 1235{
1239 struct as_data *ad = q->elevator->elevator_data; 1236 struct as_data *ad = q->elevator->elevator_data;
1240 1237
1241 return list_empty(&ad->fifo_list[REQ_ASYNC]) 1238 return list_empty(&ad->fifo_list[BLK_RW_ASYNC])
1242 && list_empty(&ad->fifo_list[REQ_SYNC]); 1239 && list_empty(&ad->fifo_list[BLK_RW_SYNC]);
1243} 1240}
1244 1241
1245static int 1242static int
@@ -1346,8 +1343,8 @@ static void as_exit_queue(struct elevator_queue *e)
1346 del_timer_sync(&ad->antic_timer); 1343 del_timer_sync(&ad->antic_timer);
1347 cancel_work_sync(&ad->antic_work); 1344 cancel_work_sync(&ad->antic_work);
1348 1345
1349 BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); 1346 BUG_ON(!list_empty(&ad->fifo_list[BLK_RW_SYNC]));
1350 BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); 1347 BUG_ON(!list_empty(&ad->fifo_list[BLK_RW_ASYNC]));
1351 1348
1352 put_io_context(ad->io_context); 1349 put_io_context(ad->io_context);
1353 kfree(ad); 1350 kfree(ad);
@@ -1372,18 +1369,18 @@ static void *as_init_queue(struct request_queue *q)
1372 init_timer(&ad->antic_timer); 1369 init_timer(&ad->antic_timer);
1373 INIT_WORK(&ad->antic_work, as_work_handler); 1370 INIT_WORK(&ad->antic_work, as_work_handler);
1374 1371
1375 INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]); 1372 INIT_LIST_HEAD(&ad->fifo_list[BLK_RW_SYNC]);
1376 INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]); 1373 INIT_LIST_HEAD(&ad->fifo_list[BLK_RW_ASYNC]);
1377 ad->sort_list[REQ_SYNC] = RB_ROOT; 1374 ad->sort_list[BLK_RW_SYNC] = RB_ROOT;
1378 ad->sort_list[REQ_ASYNC] = RB_ROOT; 1375 ad->sort_list[BLK_RW_ASYNC] = RB_ROOT;
1379 ad->fifo_expire[REQ_SYNC] = default_read_expire; 1376 ad->fifo_expire[BLK_RW_SYNC] = default_read_expire;
1380 ad->fifo_expire[REQ_ASYNC] = default_write_expire; 1377 ad->fifo_expire[BLK_RW_ASYNC] = default_write_expire;
1381 ad->antic_expire = default_antic_expire; 1378 ad->antic_expire = default_antic_expire;
1382 ad->batch_expire[REQ_SYNC] = default_read_batch_expire; 1379 ad->batch_expire[BLK_RW_SYNC] = default_read_batch_expire;
1383 ad->batch_expire[REQ_ASYNC] = default_write_batch_expire; 1380 ad->batch_expire[BLK_RW_ASYNC] = default_write_batch_expire;
1384 1381
1385 ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; 1382 ad->current_batch_expires = jiffies + ad->batch_expire[BLK_RW_SYNC];
1386 ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10; 1383 ad->write_batch_count = ad->batch_expire[BLK_RW_ASYNC] / 10;
1387 if (ad->write_batch_count < 2) 1384 if (ad->write_batch_count < 2)
1388 ad->write_batch_count = 2; 1385 ad->write_batch_count = 2;
1389 1386
@@ -1432,11 +1429,11 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page) \
1432 struct as_data *ad = e->elevator_data; \ 1429 struct as_data *ad = e->elevator_data; \
1433 return as_var_show(jiffies_to_msecs((__VAR)), (page)); \ 1430 return as_var_show(jiffies_to_msecs((__VAR)), (page)); \
1434} 1431}
1435SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[REQ_SYNC]); 1432SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[BLK_RW_SYNC]);
1436SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[REQ_ASYNC]); 1433SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[BLK_RW_ASYNC]);
1437SHOW_FUNCTION(as_antic_expire_show, ad->antic_expire); 1434SHOW_FUNCTION(as_antic_expire_show, ad->antic_expire);
1438SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[REQ_SYNC]); 1435SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[BLK_RW_SYNC]);
1439SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]); 1436SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[BLK_RW_ASYNC]);
1440#undef SHOW_FUNCTION 1437#undef SHOW_FUNCTION
1441 1438
1442#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ 1439#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
@@ -1451,13 +1448,14 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
1451 *(__PTR) = msecs_to_jiffies(*(__PTR)); \ 1448 *(__PTR) = msecs_to_jiffies(*(__PTR)); \
1452 return ret; \ 1449 return ret; \
1453} 1450}
1454STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX); 1451STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[BLK_RW_SYNC], 0, INT_MAX);
1455STORE_FUNCTION(as_write_expire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX); 1452STORE_FUNCTION(as_write_expire_store,
1453 &ad->fifo_expire[BLK_RW_ASYNC], 0, INT_MAX);
1456STORE_FUNCTION(as_antic_expire_store, &ad->antic_expire, 0, INT_MAX); 1454STORE_FUNCTION(as_antic_expire_store, &ad->antic_expire, 0, INT_MAX);
1457STORE_FUNCTION(as_read_batch_expire_store, 1455STORE_FUNCTION(as_read_batch_expire_store,
1458 &ad->batch_expire[REQ_SYNC], 0, INT_MAX); 1456 &ad->batch_expire[BLK_RW_SYNC], 0, INT_MAX);
1459STORE_FUNCTION(as_write_batch_expire_store, 1457STORE_FUNCTION(as_write_batch_expire_store,
1460 &ad->batch_expire[REQ_ASYNC], 0, INT_MAX); 1458 &ad->batch_expire[BLK_RW_ASYNC], 0, INT_MAX);
1461#undef STORE_FUNCTION 1459#undef STORE_FUNCTION
1462 1460
1463#define AS_ATTR(name) \ 1461#define AS_ATTR(name) \
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index f7dae57e6cab..20b4111fa050 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -319,9 +319,6 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
319 return -ENXIO; 319 return -ENXIO;
320 320
321 bio = bio_alloc(GFP_KERNEL, 0); 321 bio = bio_alloc(GFP_KERNEL, 0);
322 if (!bio)
323 return -ENOMEM;
324
325 bio->bi_end_io = bio_end_empty_barrier; 322 bio->bi_end_io = bio_end_empty_barrier;
326 bio->bi_private = &wait; 323 bio->bi_private = &wait;
327 bio->bi_bdev = bdev; 324 bio->bi_bdev = bdev;
diff --git a/block/blk-core.c b/block/blk-core.c
index a5f747a8312e..d028baf946a3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -643,7 +643,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
643} 643}
644 644
645static struct request * 645static struct request *
646blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) 646blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
647{ 647{
648 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 648 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
649 649
@@ -652,7 +652,7 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
652 652
653 blk_rq_init(q, rq); 653 blk_rq_init(q, rq);
654 654
655 rq->cmd_flags = rw | REQ_ALLOCED; 655 rq->cmd_flags = flags | REQ_ALLOCED;
656 656
657 if (priv) { 657 if (priv) {
658 if (unlikely(elv_set_request(q, rq, gfp_mask))) { 658 if (unlikely(elv_set_request(q, rq, gfp_mask))) {
@@ -792,6 +792,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
792 if (priv) 792 if (priv)
793 rl->elvpriv++; 793 rl->elvpriv++;
794 794
795 if (blk_queue_io_stat(q))
796 rw_flags |= REQ_IO_STAT;
795 spin_unlock_irq(q->queue_lock); 797 spin_unlock_irq(q->queue_lock);
796 798
797 rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); 799 rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 63760ca3da0f..23d2a6fe34a3 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -402,7 +402,10 @@ static int attempt_merge(struct request_queue *q, struct request *req,
402 402
403 elv_merge_requests(q, req, next); 403 elv_merge_requests(q, req, next);
404 404
405 blk_account_io_merge(req); 405 /*
406 * 'next' is going away, so update stats accordingly
407 */
408 blk_account_io_merge(next);
406 409
407 req->ioprio = ioprio_best(req->ioprio, next->ioprio); 410 req->ioprio = ioprio_best(req->ioprio, next->ioprio);
408 if (blk_rq_cpu_valid(next)) 411 if (blk_rq_cpu_valid(next))
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 69c42adde52b..57af728d94bb 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -156,26 +156,28 @@ EXPORT_SYMBOL(blk_queue_make_request);
156 156
157/** 157/**
158 * blk_queue_bounce_limit - set bounce buffer limit for queue 158 * blk_queue_bounce_limit - set bounce buffer limit for queue
159 * @q: the request queue for the device 159 * @q: the request queue for the device
160 * @dma_addr: bus address limit 160 * @dma_mask: the maximum address the device can handle
161 * 161 *
162 * Description: 162 * Description:
163 * Different hardware can have different requirements as to what pages 163 * Different hardware can have different requirements as to what pages
164 * it can do I/O directly to. A low level driver can call 164 * it can do I/O directly to. A low level driver can call
165 * blk_queue_bounce_limit to have lower memory pages allocated as bounce 165 * blk_queue_bounce_limit to have lower memory pages allocated as bounce
166 * buffers for doing I/O to pages residing above @dma_addr. 166 * buffers for doing I/O to pages residing above @dma_mask.
167 **/ 167 **/
168void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) 168void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
169{ 169{
170 unsigned long b_pfn = dma_addr >> PAGE_SHIFT; 170 unsigned long b_pfn = dma_mask >> PAGE_SHIFT;
171 int dma = 0; 171 int dma = 0;
172 172
173 q->bounce_gfp = GFP_NOIO; 173 q->bounce_gfp = GFP_NOIO;
174#if BITS_PER_LONG == 64 174#if BITS_PER_LONG == 64
175 /* Assume anything <= 4GB can be handled by IOMMU. 175 /*
176 Actually some IOMMUs can handle everything, but I don't 176 * Assume anything <= 4GB can be handled by IOMMU. Actually
177 know of a way to test this here. */ 177 * some IOMMUs can handle everything, but I don't know of a
178 if (b_pfn < (min_t(u64, 0x100000000UL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) 178 * way to test this here.
179 */
180 if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
179 dma = 1; 181 dma = 1;
180 q->bounce_pfn = max_low_pfn; 182 q->bounce_pfn = max_low_pfn;
181#else 183#else
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 8653d710b39e..26f9ec28f56c 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -209,14 +209,10 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
209 ssize_t ret = queue_var_store(&stats, page, count); 209 ssize_t ret = queue_var_store(&stats, page, count);
210 210
211 spin_lock_irq(q->queue_lock); 211 spin_lock_irq(q->queue_lock);
212 elv_quisce_start(q);
213
214 if (stats) 212 if (stats)
215 queue_flag_set(QUEUE_FLAG_IO_STAT, q); 213 queue_flag_set(QUEUE_FLAG_IO_STAT, q);
216 else 214 else
217 queue_flag_clear(QUEUE_FLAG_IO_STAT, q); 215 queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
218
219 elv_quisce_end(q);
220 spin_unlock_irq(q->queue_lock); 216 spin_unlock_irq(q->queue_lock);
221 217
222 return ret; 218 return ret;
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index bbbdc4b8ccf2..1ec0d503cacd 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -211,6 +211,12 @@ void blk_abort_queue(struct request_queue *q)
211 struct request *rq, *tmp; 211 struct request *rq, *tmp;
212 LIST_HEAD(list); 212 LIST_HEAD(list);
213 213
214 /*
215 * Not a request based block device, nothing to abort
216 */
217 if (!q->request_fn)
218 return;
219
214 spin_lock_irqsave(q->queue_lock, flags); 220 spin_lock_irqsave(q->queue_lock, flags);
215 221
216 elv_abort_queue(q); 222 elv_abort_queue(q);
@@ -224,6 +230,13 @@ void blk_abort_queue(struct request_queue *q)
224 list_for_each_entry_safe(rq, tmp, &list, timeout_list) 230 list_for_each_entry_safe(rq, tmp, &list, timeout_list)
225 blk_abort_request(rq); 231 blk_abort_request(rq);
226 232
233 /*
234 * Occasionally, blk_abort_request() will return without
235 * deleting the element from the list. Make sure we add those back
236 * instead of leaving them on the local stack list.
237 */
238 list_splice(&list, &q->timeout_list);
239
227 spin_unlock_irqrestore(q->queue_lock, flags); 240 spin_unlock_irqrestore(q->queue_lock, flags);
228 241
229} 242}
diff --git a/block/blk.h b/block/blk.h
index 24fcaeeaf620..79c85f7c9ff5 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -70,8 +70,8 @@ void blk_queue_congestion_threshold(struct request_queue *q);
70 70
71int blk_dev_init(void); 71int blk_dev_init(void);
72 72
73void elv_quisce_start(struct request_queue *q); 73void elv_quiesce_start(struct request_queue *q);
74void elv_quisce_end(struct request_queue *q); 74void elv_quiesce_end(struct request_queue *q);
75 75
76 76
77/* 77/*
@@ -114,12 +114,7 @@ static inline int blk_cpu_to_group(int cpu)
114 114
115static inline int blk_do_io_stat(struct request *rq) 115static inline int blk_do_io_stat(struct request *rq)
116{ 116{
117 struct gendisk *disk = rq->rq_disk; 117 return rq->rq_disk && blk_rq_io_stat(rq);
118
119 if (!disk || !disk->queue)
120 return 0;
121
122 return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV);
123} 118}
124 119
125#endif 120#endif
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index a4809de6fea6..a55a9bd75bd1 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -56,9 +56,6 @@ static DEFINE_SPINLOCK(ioc_gone_lock);
56#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 56#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
57#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) 57#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
58 58
59#define ASYNC (0)
60#define SYNC (1)
61
62#define sample_valid(samples) ((samples) > 80) 59#define sample_valid(samples) ((samples) > 80)
63 60
64/* 61/*
@@ -83,6 +80,14 @@ struct cfq_data {
83 * rr list of queues with requests and the count of them 80 * rr list of queues with requests and the count of them
84 */ 81 */
85 struct cfq_rb_root service_tree; 82 struct cfq_rb_root service_tree;
83
84 /*
85 * Each priority tree is sorted by next_request position. These
86 * trees are used when determining if two or more queues are
87 * interleaving requests (see cfq_close_cooperator).
88 */
89 struct rb_root prio_trees[CFQ_PRIO_LISTS];
90
86 unsigned int busy_queues; 91 unsigned int busy_queues;
87 /* 92 /*
88 * Used to track any pending rt requests so we can pre-empt current 93 * Used to track any pending rt requests so we can pre-empt current
@@ -147,6 +152,10 @@ struct cfq_queue {
147 struct rb_node rb_node; 152 struct rb_node rb_node;
148 /* service_tree key */ 153 /* service_tree key */
149 unsigned long rb_key; 154 unsigned long rb_key;
155 /* prio tree member */
156 struct rb_node p_node;
157 /* prio tree root we belong to, if any */
158 struct rb_root *p_root;
150 /* sorted list of pending requests */ 159 /* sorted list of pending requests */
151 struct rb_root sort_list; 160 struct rb_root sort_list;
152 /* if fifo isn't expired, next request to serve */ 161 /* if fifo isn't expired, next request to serve */
@@ -185,6 +194,7 @@ enum cfqq_state_flags {
185 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ 194 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */
186 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ 195 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */
187 CFQ_CFQQ_FLAG_sync, /* synchronous queue */ 196 CFQ_CFQQ_FLAG_sync, /* synchronous queue */
197 CFQ_CFQQ_FLAG_coop, /* has done a coop jump of the queue */
188}; 198};
189 199
190#define CFQ_CFQQ_FNS(name) \ 200#define CFQ_CFQQ_FNS(name) \
@@ -211,6 +221,7 @@ CFQ_CFQQ_FNS(idle_window);
211CFQ_CFQQ_FNS(prio_changed); 221CFQ_CFQQ_FNS(prio_changed);
212CFQ_CFQQ_FNS(slice_new); 222CFQ_CFQQ_FNS(slice_new);
213CFQ_CFQQ_FNS(sync); 223CFQ_CFQQ_FNS(sync);
224CFQ_CFQQ_FNS(coop);
214#undef CFQ_CFQQ_FNS 225#undef CFQ_CFQQ_FNS
215 226
216#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ 227#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
@@ -419,13 +430,17 @@ static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root)
419 return NULL; 430 return NULL;
420} 431}
421 432
433static void rb_erase_init(struct rb_node *n, struct rb_root *root)
434{
435 rb_erase(n, root);
436 RB_CLEAR_NODE(n);
437}
438
422static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) 439static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
423{ 440{
424 if (root->left == n) 441 if (root->left == n)
425 root->left = NULL; 442 root->left = NULL;
426 443 rb_erase_init(n, &root->rb);
427 rb_erase(n, &root->rb);
428 RB_CLEAR_NODE(n);
429} 444}
430 445
431/* 446/*
@@ -470,8 +485,8 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
470 * requests waiting to be processed. It is sorted in the order that 485 * requests waiting to be processed. It is sorted in the order that
471 * we will service the queues. 486 * we will service the queues.
472 */ 487 */
473static void cfq_service_tree_add(struct cfq_data *cfqd, 488static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
474 struct cfq_queue *cfqq, int add_front) 489 int add_front)
475{ 490{
476 struct rb_node **p, *parent; 491 struct rb_node **p, *parent;
477 struct cfq_queue *__cfqq; 492 struct cfq_queue *__cfqq;
@@ -544,6 +559,67 @@ static void cfq_service_tree_add(struct cfq_data *cfqd,
544 rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb); 559 rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb);
545} 560}
546 561
562static struct cfq_queue *
563cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root,
564 sector_t sector, struct rb_node **ret_parent,
565 struct rb_node ***rb_link)
566{
567 struct rb_node **p, *parent;
568 struct cfq_queue *cfqq = NULL;
569
570 parent = NULL;
571 p = &root->rb_node;
572 while (*p) {
573 struct rb_node **n;
574
575 parent = *p;
576 cfqq = rb_entry(parent, struct cfq_queue, p_node);
577
578 /*
579 * Sort strictly based on sector. Smallest to the left,
580 * largest to the right.
581 */
582 if (sector > cfqq->next_rq->sector)
583 n = &(*p)->rb_right;
584 else if (sector < cfqq->next_rq->sector)
585 n = &(*p)->rb_left;
586 else
587 break;
588 p = n;
589 cfqq = NULL;
590 }
591
592 *ret_parent = parent;
593 if (rb_link)
594 *rb_link = p;
595 return cfqq;
596}
597
598static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
599{
600 struct rb_node **p, *parent;
601 struct cfq_queue *__cfqq;
602
603 if (cfqq->p_root) {
604 rb_erase(&cfqq->p_node, cfqq->p_root);
605 cfqq->p_root = NULL;
606 }
607
608 if (cfq_class_idle(cfqq))
609 return;
610 if (!cfqq->next_rq)
611 return;
612
613 cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio];
614 __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, cfqq->next_rq->sector,
615 &parent, &p);
616 if (!__cfqq) {
617 rb_link_node(&cfqq->p_node, parent, p);
618 rb_insert_color(&cfqq->p_node, cfqq->p_root);
619 } else
620 cfqq->p_root = NULL;
621}
622
547/* 623/*
548 * Update cfqq's position in the service tree. 624 * Update cfqq's position in the service tree.
549 */ 625 */
@@ -552,8 +628,10 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
552 /* 628 /*
553 * Resorting requires the cfqq to be on the RR list already. 629 * Resorting requires the cfqq to be on the RR list already.
554 */ 630 */
555 if (cfq_cfqq_on_rr(cfqq)) 631 if (cfq_cfqq_on_rr(cfqq)) {
556 cfq_service_tree_add(cfqd, cfqq, 0); 632 cfq_service_tree_add(cfqd, cfqq, 0);
633 cfq_prio_tree_add(cfqd, cfqq);
634 }
557} 635}
558 636
559/* 637/*
@@ -584,6 +662,10 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
584 662
585 if (!RB_EMPTY_NODE(&cfqq->rb_node)) 663 if (!RB_EMPTY_NODE(&cfqq->rb_node))
586 cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); 664 cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
665 if (cfqq->p_root) {
666 rb_erase(&cfqq->p_node, cfqq->p_root);
667 cfqq->p_root = NULL;
668 }
587 669
588 BUG_ON(!cfqd->busy_queues); 670 BUG_ON(!cfqd->busy_queues);
589 cfqd->busy_queues--; 671 cfqd->busy_queues--;
@@ -613,7 +695,7 @@ static void cfq_add_rq_rb(struct request *rq)
613{ 695{
614 struct cfq_queue *cfqq = RQ_CFQQ(rq); 696 struct cfq_queue *cfqq = RQ_CFQQ(rq);
615 struct cfq_data *cfqd = cfqq->cfqd; 697 struct cfq_data *cfqd = cfqq->cfqd;
616 struct request *__alias; 698 struct request *__alias, *prev;
617 699
618 cfqq->queued[rq_is_sync(rq)]++; 700 cfqq->queued[rq_is_sync(rq)]++;
619 701
@@ -630,7 +712,15 @@ static void cfq_add_rq_rb(struct request *rq)
630 /* 712 /*
631 * check if this request is a better next-serve candidate 713 * check if this request is a better next-serve candidate
632 */ 714 */
715 prev = cfqq->next_rq;
633 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq); 716 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
717
718 /*
719 * adjust priority tree position, if ->next_rq changes
720 */
721 if (prev != cfqq->next_rq)
722 cfq_prio_tree_add(cfqd, cfqq);
723
634 BUG_ON(!cfqq->next_rq); 724 BUG_ON(!cfqq->next_rq);
635} 725}
636 726
@@ -843,11 +933,15 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
843/* 933/*
844 * Get and set a new active queue for service. 934 * Get and set a new active queue for service.
845 */ 935 */
846static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) 936static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
937 struct cfq_queue *cfqq)
847{ 938{
848 struct cfq_queue *cfqq; 939 if (!cfqq) {
940 cfqq = cfq_get_next_queue(cfqd);
941 if (cfqq)
942 cfq_clear_cfqq_coop(cfqq);
943 }
849 944
850 cfqq = cfq_get_next_queue(cfqd);
851 __cfq_set_active_queue(cfqd, cfqq); 945 __cfq_set_active_queue(cfqd, cfqq);
852 return cfqq; 946 return cfqq;
853} 947}
@@ -861,28 +955,100 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
861 return cfqd->last_position - rq->sector; 955 return cfqd->last_position - rq->sector;
862} 956}
863 957
958#define CIC_SEEK_THR 8 * 1024
959#define CIC_SEEKY(cic) ((cic)->seek_mean > CIC_SEEK_THR)
960
864static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq) 961static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq)
865{ 962{
866 struct cfq_io_context *cic = cfqd->active_cic; 963 struct cfq_io_context *cic = cfqd->active_cic;
964 sector_t sdist = cic->seek_mean;
867 965
868 if (!sample_valid(cic->seek_samples)) 966 if (!sample_valid(cic->seek_samples))
869 return 0; 967 sdist = CIC_SEEK_THR;
968
969 return cfq_dist_from_last(cfqd, rq) <= sdist;
970}
971
972static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
973 struct cfq_queue *cur_cfqq)
974{
975 struct rb_root *root = &cfqd->prio_trees[cur_cfqq->org_ioprio];
976 struct rb_node *parent, *node;
977 struct cfq_queue *__cfqq;
978 sector_t sector = cfqd->last_position;
979
980 if (RB_EMPTY_ROOT(root))
981 return NULL;
982
983 /*
984 * First, if we find a request starting at the end of the last
985 * request, choose it.
986 */
987 __cfqq = cfq_prio_tree_lookup(cfqd, root, sector, &parent, NULL);
988 if (__cfqq)
989 return __cfqq;
990
991 /*
992 * If the exact sector wasn't found, the parent of the NULL leaf
993 * will contain the closest sector.
994 */
995 __cfqq = rb_entry(parent, struct cfq_queue, p_node);
996 if (cfq_rq_close(cfqd, __cfqq->next_rq))
997 return __cfqq;
998
999 if (__cfqq->next_rq->sector < sector)
1000 node = rb_next(&__cfqq->p_node);
1001 else
1002 node = rb_prev(&__cfqq->p_node);
1003 if (!node)
1004 return NULL;
870 1005
871 return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean; 1006 __cfqq = rb_entry(node, struct cfq_queue, p_node);
1007 if (cfq_rq_close(cfqd, __cfqq->next_rq))
1008 return __cfqq;
1009
1010 return NULL;
872} 1011}
873 1012
874static int cfq_close_cooperator(struct cfq_data *cfq_data, 1013/*
875 struct cfq_queue *cfqq) 1014 * cfqd - obvious
1015 * cur_cfqq - passed in so that we don't decide that the current queue is
1016 * closely cooperating with itself.
1017 *
1018 * So, basically we're assuming that that cur_cfqq has dispatched at least
1019 * one request, and that cfqd->last_position reflects a position on the disk
1020 * associated with the I/O issued by cur_cfqq. I'm not sure this is a valid
1021 * assumption.
1022 */
1023static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
1024 struct cfq_queue *cur_cfqq,
1025 int probe)
876{ 1026{
1027 struct cfq_queue *cfqq;
1028
1029 /*
1030 * A valid cfq_io_context is necessary to compare requests against
1031 * the seek_mean of the current cfqq.
1032 */
1033 if (!cfqd->active_cic)
1034 return NULL;
1035
877 /* 1036 /*
878 * We should notice if some of the queues are cooperating, eg 1037 * We should notice if some of the queues are cooperating, eg
879 * working closely on the same area of the disk. In that case, 1038 * working closely on the same area of the disk. In that case,
880 * we can group them together and don't waste time idling. 1039 * we can group them together and don't waste time idling.
881 */ 1040 */
882 return 0; 1041 cfqq = cfqq_close(cfqd, cur_cfqq);
883} 1042 if (!cfqq)
1043 return NULL;
884 1044
885#define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024)) 1045 if (cfq_cfqq_coop(cfqq))
1046 return NULL;
1047
1048 if (!probe)
1049 cfq_mark_cfqq_coop(cfqq);
1050 return cfqq;
1051}
886 1052
887static void cfq_arm_slice_timer(struct cfq_data *cfqd) 1053static void cfq_arm_slice_timer(struct cfq_data *cfqd)
888{ 1054{
@@ -920,13 +1086,6 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
920 if (!cic || !atomic_read(&cic->ioc->nr_tasks)) 1086 if (!cic || !atomic_read(&cic->ioc->nr_tasks))
921 return; 1087 return;
922 1088
923 /*
924 * See if this prio level has a good candidate
925 */
926 if (cfq_close_cooperator(cfqd, cfqq) &&
927 (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2))
928 return;
929
930 cfq_mark_cfqq_wait_request(cfqq); 1089 cfq_mark_cfqq_wait_request(cfqq);
931 1090
932 /* 1091 /*
@@ -939,7 +1098,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
939 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); 1098 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
940 1099
941 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 1100 mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
942 cfq_log(cfqd, "arm_idle: %lu", sl); 1101 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl);
943} 1102}
944 1103
945/* 1104/*
@@ -1003,7 +1162,7 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1003 */ 1162 */
1004static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) 1163static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1005{ 1164{
1006 struct cfq_queue *cfqq; 1165 struct cfq_queue *cfqq, *new_cfqq = NULL;
1007 1166
1008 cfqq = cfqd->active_queue; 1167 cfqq = cfqd->active_queue;
1009 if (!cfqq) 1168 if (!cfqq)
@@ -1037,6 +1196,16 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1037 goto keep_queue; 1196 goto keep_queue;
1038 1197
1039 /* 1198 /*
1199 * If another queue has a request waiting within our mean seek
1200 * distance, let it run. The expire code will check for close
1201 * cooperators and put the close queue at the front of the service
1202 * tree.
1203 */
1204 new_cfqq = cfq_close_cooperator(cfqd, cfqq, 0);
1205 if (new_cfqq)
1206 goto expire;
1207
1208 /*
1040 * No requests pending. If the active queue still has requests in 1209 * No requests pending. If the active queue still has requests in
1041 * flight or is idling for a new request, allow either of these 1210 * flight or is idling for a new request, allow either of these
1042 * conditions to happen (or time out) before selecting a new queue. 1211 * conditions to happen (or time out) before selecting a new queue.
@@ -1050,7 +1219,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1050expire: 1219expire:
1051 cfq_slice_expired(cfqd, 0); 1220 cfq_slice_expired(cfqd, 0);
1052new_queue: 1221new_queue:
1053 cfqq = cfq_set_active_queue(cfqd); 1222 cfqq = cfq_set_active_queue(cfqd, new_cfqq);
1054keep_queue: 1223keep_queue:
1055 return cfqq; 1224 return cfqq;
1056} 1225}
@@ -1333,14 +1502,14 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
1333 if (ioc->ioc_data == cic) 1502 if (ioc->ioc_data == cic)
1334 rcu_assign_pointer(ioc->ioc_data, NULL); 1503 rcu_assign_pointer(ioc->ioc_data, NULL);
1335 1504
1336 if (cic->cfqq[ASYNC]) { 1505 if (cic->cfqq[BLK_RW_ASYNC]) {
1337 cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]); 1506 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
1338 cic->cfqq[ASYNC] = NULL; 1507 cic->cfqq[BLK_RW_ASYNC] = NULL;
1339 } 1508 }
1340 1509
1341 if (cic->cfqq[SYNC]) { 1510 if (cic->cfqq[BLK_RW_SYNC]) {
1342 cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]); 1511 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_SYNC]);
1343 cic->cfqq[SYNC] = NULL; 1512 cic->cfqq[BLK_RW_SYNC] = NULL;
1344 } 1513 }
1345} 1514}
1346 1515
@@ -1449,17 +1618,18 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
1449 1618
1450 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 1619 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
1451 1620
1452 cfqq = cic->cfqq[ASYNC]; 1621 cfqq = cic->cfqq[BLK_RW_ASYNC];
1453 if (cfqq) { 1622 if (cfqq) {
1454 struct cfq_queue *new_cfqq; 1623 struct cfq_queue *new_cfqq;
1455 new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc, GFP_ATOMIC); 1624 new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc,
1625 GFP_ATOMIC);
1456 if (new_cfqq) { 1626 if (new_cfqq) {
1457 cic->cfqq[ASYNC] = new_cfqq; 1627 cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
1458 cfq_put_queue(cfqq); 1628 cfq_put_queue(cfqq);
1459 } 1629 }
1460 } 1630 }
1461 1631
1462 cfqq = cic->cfqq[SYNC]; 1632 cfqq = cic->cfqq[BLK_RW_SYNC];
1463 if (cfqq) 1633 if (cfqq)
1464 cfq_mark_cfqq_prio_changed(cfqq); 1634 cfq_mark_cfqq_prio_changed(cfqq);
1465 1635
@@ -1510,6 +1680,7 @@ retry:
1510 } 1680 }
1511 1681
1512 RB_CLEAR_NODE(&cfqq->rb_node); 1682 RB_CLEAR_NODE(&cfqq->rb_node);
1683 RB_CLEAR_NODE(&cfqq->p_node);
1513 INIT_LIST_HEAD(&cfqq->fifo); 1684 INIT_LIST_HEAD(&cfqq->fifo);
1514 1685
1515 atomic_set(&cfqq->ref, 0); 1686 atomic_set(&cfqq->ref, 0);
@@ -1745,7 +1916,9 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
1745 sector_t sdist; 1916 sector_t sdist;
1746 u64 total; 1917 u64 total;
1747 1918
1748 if (cic->last_request_pos < rq->sector) 1919 if (!cic->last_request_pos)
1920 sdist = 0;
1921 else if (cic->last_request_pos < rq->sector)
1749 sdist = rq->sector - cic->last_request_pos; 1922 sdist = rq->sector - cic->last_request_pos;
1750 else 1923 else
1751 sdist = cic->last_request_pos - rq->sector; 1924 sdist = cic->last_request_pos - rq->sector;
@@ -1905,10 +2078,20 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1905 * Remember that we saw a request from this process, but 2078 * Remember that we saw a request from this process, but
1906 * don't start queuing just yet. Otherwise we risk seeing lots 2079 * don't start queuing just yet. Otherwise we risk seeing lots
1907 * of tiny requests, because we disrupt the normal plugging 2080 * of tiny requests, because we disrupt the normal plugging
1908 * and merging. 2081 * and merging. If the request is already larger than a single
2082 * page, let it rip immediately. For that case we assume that
2083 * merging is already done. Ditto for a busy system that
2084 * has other work pending, don't risk delaying until the
2085 * idle timer unplug to continue working.
1909 */ 2086 */
1910 if (cfq_cfqq_wait_request(cfqq)) 2087 if (cfq_cfqq_wait_request(cfqq)) {
2088 if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
2089 cfqd->busy_queues > 1) {
2090 del_timer(&cfqd->idle_slice_timer);
2091 blk_start_queueing(cfqd->queue);
2092 }
1911 cfq_mark_cfqq_must_dispatch(cfqq); 2093 cfq_mark_cfqq_must_dispatch(cfqq);
2094 }
1912 } else if (cfq_should_preempt(cfqd, cfqq, rq)) { 2095 } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
1913 /* 2096 /*
1914 * not the active queue - expire current slice if it is 2097 * not the active queue - expire current slice if it is
@@ -1992,16 +2175,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
1992 * or if we want to idle in case it has no pending requests. 2175 * or if we want to idle in case it has no pending requests.
1993 */ 2176 */
1994 if (cfqd->active_queue == cfqq) { 2177 if (cfqd->active_queue == cfqq) {
2178 const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list);
2179
1995 if (cfq_cfqq_slice_new(cfqq)) { 2180 if (cfq_cfqq_slice_new(cfqq)) {
1996 cfq_set_prio_slice(cfqd, cfqq); 2181 cfq_set_prio_slice(cfqd, cfqq);
1997 cfq_clear_cfqq_slice_new(cfqq); 2182 cfq_clear_cfqq_slice_new(cfqq);
1998 } 2183 }
2184 /*
2185 * If there are no requests waiting in this queue, and
2186 * there are other queues ready to issue requests, AND
2187 * those other queues are issuing requests within our
2188 * mean seek distance, give them a chance to run instead
2189 * of idling.
2190 */
1999 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) 2191 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
2000 cfq_slice_expired(cfqd, 1); 2192 cfq_slice_expired(cfqd, 1);
2001 else if (sync && !rq_noidle(rq) && 2193 else if (cfqq_empty && !cfq_close_cooperator(cfqd, cfqq, 1) &&
2002 RB_EMPTY_ROOT(&cfqq->sort_list)) { 2194 sync && !rq_noidle(rq))
2003 cfq_arm_slice_timer(cfqd); 2195 cfq_arm_slice_timer(cfqd);
2004 }
2005 } 2196 }
2006 2197
2007 if (!cfqd->rq_in_driver) 2198 if (!cfqd->rq_in_driver)
@@ -2062,7 +2253,7 @@ static int cfq_may_queue(struct request_queue *q, int rw)
2062 if (!cic) 2253 if (!cic)
2063 return ELV_MQUEUE_MAY; 2254 return ELV_MQUEUE_MAY;
2064 2255
2065 cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC); 2256 cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
2066 if (cfqq) { 2257 if (cfqq) {
2067 cfq_init_prio_data(cfqq, cic->ioc); 2258 cfq_init_prio_data(cfqq, cic->ioc);
2068 cfq_prio_boost(cfqq); 2259 cfq_prio_boost(cfqq);
@@ -2152,11 +2343,10 @@ static void cfq_kick_queue(struct work_struct *work)
2152 struct cfq_data *cfqd = 2343 struct cfq_data *cfqd =
2153 container_of(work, struct cfq_data, unplug_work); 2344 container_of(work, struct cfq_data, unplug_work);
2154 struct request_queue *q = cfqd->queue; 2345 struct request_queue *q = cfqd->queue;
2155 unsigned long flags;
2156 2346
2157 spin_lock_irqsave(q->queue_lock, flags); 2347 spin_lock_irq(q->queue_lock);
2158 blk_start_queueing(q); 2348 blk_start_queueing(q);
2159 spin_unlock_irqrestore(q->queue_lock, flags); 2349 spin_unlock_irq(q->queue_lock);
2160} 2350}
2161 2351
2162/* 2352/*
@@ -2263,12 +2453,22 @@ static void cfq_exit_queue(struct elevator_queue *e)
2263static void *cfq_init_queue(struct request_queue *q) 2453static void *cfq_init_queue(struct request_queue *q)
2264{ 2454{
2265 struct cfq_data *cfqd; 2455 struct cfq_data *cfqd;
2456 int i;
2266 2457
2267 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); 2458 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
2268 if (!cfqd) 2459 if (!cfqd)
2269 return NULL; 2460 return NULL;
2270 2461
2271 cfqd->service_tree = CFQ_RB_ROOT; 2462 cfqd->service_tree = CFQ_RB_ROOT;
2463
2464 /*
2465 * Not strictly needed (since RB_ROOT just clears the node and we
2466 * zeroed cfqd on alloc), but better be safe in case someone decides
2467 * to add magic to the rb code
2468 */
2469 for (i = 0; i < CFQ_PRIO_LISTS; i++)
2470 cfqd->prio_trees[i] = RB_ROOT;
2471
2272 INIT_LIST_HEAD(&cfqd->cic_list); 2472 INIT_LIST_HEAD(&cfqd->cic_list);
2273 2473
2274 cfqd->queue = q; 2474 cfqd->queue = q;
diff --git a/block/elevator.c b/block/elevator.c
index fb81bcc14a8c..7073a9072577 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -590,7 +590,7 @@ void elv_drain_elevator(struct request_queue *q)
590/* 590/*
591 * Call with queue lock held, interrupts disabled 591 * Call with queue lock held, interrupts disabled
592 */ 592 */
593void elv_quisce_start(struct request_queue *q) 593void elv_quiesce_start(struct request_queue *q)
594{ 594{
595 queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); 595 queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
596 596
@@ -607,7 +607,7 @@ void elv_quisce_start(struct request_queue *q)
607 } 607 }
608} 608}
609 609
610void elv_quisce_end(struct request_queue *q) 610void elv_quiesce_end(struct request_queue *q)
611{ 611{
612 queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); 612 queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
613} 613}
@@ -1126,7 +1126,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
1126 * Turn on BYPASS and drain all requests w/ elevator private data 1126 * Turn on BYPASS and drain all requests w/ elevator private data
1127 */ 1127 */
1128 spin_lock_irq(q->queue_lock); 1128 spin_lock_irq(q->queue_lock);
1129 elv_quisce_start(q); 1129 elv_quiesce_start(q);
1130 1130
1131 /* 1131 /*
1132 * Remember old elevator. 1132 * Remember old elevator.
@@ -1150,7 +1150,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
1150 */ 1150 */
1151 elevator_exit(old_elevator); 1151 elevator_exit(old_elevator);
1152 spin_lock_irq(q->queue_lock); 1152 spin_lock_irq(q->queue_lock);
1153 elv_quisce_end(q); 1153 elv_quiesce_end(q);
1154 spin_unlock_irq(q->queue_lock); 1154 spin_unlock_irq(q->queue_lock);
1155 1155
1156 blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); 1156 blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
diff --git a/block/genhd.c b/block/genhd.c
index a9ec910974c1..1a4916e01732 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -98,7 +98,7 @@ void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
98 98
99 if (flags & DISK_PITER_REVERSE) 99 if (flags & DISK_PITER_REVERSE)
100 piter->idx = ptbl->len - 1; 100 piter->idx = ptbl->len - 1;
101 else if (flags & DISK_PITER_INCL_PART0) 101 else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0))
102 piter->idx = 0; 102 piter->idx = 0;
103 else 103 else
104 piter->idx = 1; 104 piter->idx = 1;
@@ -134,7 +134,8 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
134 /* determine iteration parameters */ 134 /* determine iteration parameters */
135 if (piter->flags & DISK_PITER_REVERSE) { 135 if (piter->flags & DISK_PITER_REVERSE) {
136 inc = -1; 136 inc = -1;
137 if (piter->flags & DISK_PITER_INCL_PART0) 137 if (piter->flags & (DISK_PITER_INCL_PART0 |
138 DISK_PITER_INCL_EMPTY_PART0))
138 end = -1; 139 end = -1;
139 else 140 else
140 end = 0; 141 end = 0;
@@ -150,7 +151,10 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
150 part = rcu_dereference(ptbl->part[piter->idx]); 151 part = rcu_dereference(ptbl->part[piter->idx]);
151 if (!part) 152 if (!part)
152 continue; 153 continue;
153 if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) 154 if (!part->nr_sects &&
155 !(piter->flags & DISK_PITER_INCL_EMPTY) &&
156 !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
157 piter->idx == 0))
154 continue; 158 continue;
155 159
156 get_device(part_to_dev(part)); 160 get_device(part_to_dev(part));
@@ -1011,7 +1015,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
1011 "\n\n"); 1015 "\n\n");
1012 */ 1016 */
1013 1017
1014 disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0); 1018 disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
1015 while ((hd = disk_part_iter_next(&piter))) { 1019 while ((hd = disk_part_iter_next(&piter))) {
1016 cpu = part_stat_lock(); 1020 cpu = part_stat_lock();
1017 part_round_stats(cpu, hd); 1021 part_round_stats(cpu, hd);
diff --git a/block/ioctl.c b/block/ioctl.c
index 0f22e629b13c..ad474d4bbcce 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -146,8 +146,6 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
146 struct bio *bio; 146 struct bio *bio;
147 147
148 bio = bio_alloc(GFP_KERNEL, 0); 148 bio = bio_alloc(GFP_KERNEL, 0);
149 if (!bio)
150 return -ENOMEM;
151 149
152 bio->bi_end_io = blk_ioc_discard_endio; 150 bio->bi_end_io = blk_ioc_discard_endio;
153 bio->bi_bdev = bdev; 151 bio->bi_bdev = bdev;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 626ee274c5c4..82a0ca2f6729 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -217,7 +217,7 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
217static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, 217static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
218 struct bio *bio) 218 struct bio *bio)
219{ 219{
220 int ret = 0; 220 int r, ret = 0;
221 221
222 /* 222 /*
223 * fill in all the output members 223 * fill in all the output members
@@ -242,7 +242,9 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
242 ret = -EFAULT; 242 ret = -EFAULT;
243 } 243 }
244 244
245 blk_rq_unmap_user(bio); 245 r = blk_rq_unmap_user(bio);
246 if (!ret)
247 ret = r;
246 blk_put_request(rq); 248 blk_put_request(rq);
247 249
248 return ret; 250 return ret;
@@ -288,6 +290,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
288 290
289 if (hdr->iovec_count) { 291 if (hdr->iovec_count) {
290 const int size = sizeof(struct sg_iovec) * hdr->iovec_count; 292 const int size = sizeof(struct sg_iovec) * hdr->iovec_count;
293 size_t iov_data_len;
291 struct sg_iovec *iov; 294 struct sg_iovec *iov;
292 295
293 iov = kmalloc(size, GFP_KERNEL); 296 iov = kmalloc(size, GFP_KERNEL);
@@ -302,8 +305,18 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
302 goto out; 305 goto out;
303 } 306 }
304 307
308 /* SG_IO howto says that the shorter of the two wins */
309 iov_data_len = iov_length((struct iovec *)iov,
310 hdr->iovec_count);
311 if (hdr->dxfer_len < iov_data_len) {
312 hdr->iovec_count = iov_shorten((struct iovec *)iov,
313 hdr->iovec_count,
314 hdr->dxfer_len);
315 iov_data_len = hdr->dxfer_len;
316 }
317
305 ret = blk_rq_map_user_iov(q, rq, NULL, iov, hdr->iovec_count, 318 ret = blk_rq_map_user_iov(q, rq, NULL, iov, hdr->iovec_count,
306 hdr->dxfer_len, GFP_KERNEL); 319 iov_data_len, GFP_KERNEL);
307 kfree(iov); 320 kfree(iov);
308 } else if (hdr->dxfer_len) 321 } else if (hdr->dxfer_len)
309 ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len, 322 ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len,