aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-04-24 04:11:18 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-24 04:11:23 -0400
commit416dfdcdb894432547ead4fcb9fa6a36b396059e (patch)
tree8033fdda07397a59c5fa98c88927040906ce6c1a /block
parent56449f437add737a1e5e1cb7e00f63ac8ead1938 (diff)
parent091069740304c979f957ceacec39c461d0192158 (diff)
Merge commit 'v2.6.30-rc3' into tracing/hw-branch-tracing
Conflicts: arch/x86/kernel/ptrace.c Merge reason: fix the conflict above, and also pick up the CONFIG_BROKEN dependency change from upstream so that we can remove it here. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'block')
-rw-r--r--block/as-iosched.c116
-rw-r--r--block/blk-barrier.c3
-rw-r--r--block/blk-core.c16
-rw-r--r--block/blk-merge.c29
-rw-r--r--block/blk-settings.c2
-rw-r--r--block/blk-sysfs.c4
-rw-r--r--block/blk.h14
-rw-r--r--block/cfq-iosched.c464
-rw-r--r--block/elevator.c42
-rw-r--r--block/ioctl.c2
-rw-r--r--block/scsi_ioctl.c6
11 files changed, 453 insertions, 245 deletions
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 631f6f44460a..c48fa670d221 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -17,9 +17,6 @@
17#include <linux/rbtree.h> 17#include <linux/rbtree.h>
18#include <linux/interrupt.h> 18#include <linux/interrupt.h>
19 19
20#define REQ_SYNC 1
21#define REQ_ASYNC 0
22
23/* 20/*
24 * See Documentation/block/as-iosched.txt 21 * See Documentation/block/as-iosched.txt
25 */ 22 */
@@ -93,7 +90,7 @@ struct as_data {
93 struct list_head fifo_list[2]; 90 struct list_head fifo_list[2];
94 91
95 struct request *next_rq[2]; /* next in sort order */ 92 struct request *next_rq[2]; /* next in sort order */
96 sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */ 93 sector_t last_sector[2]; /* last SYNC & ASYNC sectors */
97 94
98 unsigned long exit_prob; /* probability a task will exit while 95 unsigned long exit_prob; /* probability a task will exit while
99 being waited on */ 96 being waited on */
@@ -109,7 +106,7 @@ struct as_data {
109 unsigned long last_check_fifo[2]; 106 unsigned long last_check_fifo[2];
110 int changed_batch; /* 1: waiting for old batch to end */ 107 int changed_batch; /* 1: waiting for old batch to end */
111 int new_batch; /* 1: waiting on first read complete */ 108 int new_batch; /* 1: waiting on first read complete */
112 int batch_data_dir; /* current batch REQ_SYNC / REQ_ASYNC */ 109 int batch_data_dir; /* current batch SYNC / ASYNC */
113 int write_batch_count; /* max # of reqs in a write batch */ 110 int write_batch_count; /* max # of reqs in a write batch */
114 int current_write_count; /* how many requests left this batch */ 111 int current_write_count; /* how many requests left this batch */
115 int write_batch_idled; /* has the write batch gone idle? */ 112 int write_batch_idled; /* has the write batch gone idle? */
@@ -554,7 +551,7 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
554 if (aic == NULL) 551 if (aic == NULL)
555 return; 552 return;
556 553
557 if (data_dir == REQ_SYNC) { 554 if (data_dir == BLK_RW_SYNC) {
558 unsigned long in_flight = atomic_read(&aic->nr_queued) 555 unsigned long in_flight = atomic_read(&aic->nr_queued)
559 + atomic_read(&aic->nr_dispatched); 556 + atomic_read(&aic->nr_dispatched);
560 spin_lock(&aic->lock); 557 spin_lock(&aic->lock);
@@ -811,7 +808,7 @@ static void as_update_rq(struct as_data *ad, struct request *rq)
811 */ 808 */
812static void update_write_batch(struct as_data *ad) 809static void update_write_batch(struct as_data *ad)
813{ 810{
814 unsigned long batch = ad->batch_expire[REQ_ASYNC]; 811 unsigned long batch = ad->batch_expire[BLK_RW_ASYNC];
815 long write_time; 812 long write_time;
816 813
817 write_time = (jiffies - ad->current_batch_expires) + batch; 814 write_time = (jiffies - ad->current_batch_expires) + batch;
@@ -855,7 +852,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
855 kblockd_schedule_work(q, &ad->antic_work); 852 kblockd_schedule_work(q, &ad->antic_work);
856 ad->changed_batch = 0; 853 ad->changed_batch = 0;
857 854
858 if (ad->batch_data_dir == REQ_SYNC) 855 if (ad->batch_data_dir == BLK_RW_SYNC)
859 ad->new_batch = 1; 856 ad->new_batch = 1;
860 } 857 }
861 WARN_ON(ad->nr_dispatched == 0); 858 WARN_ON(ad->nr_dispatched == 0);
@@ -869,7 +866,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
869 if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) { 866 if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) {
870 update_write_batch(ad); 867 update_write_batch(ad);
871 ad->current_batch_expires = jiffies + 868 ad->current_batch_expires = jiffies +
872 ad->batch_expire[REQ_SYNC]; 869 ad->batch_expire[BLK_RW_SYNC];
873 ad->new_batch = 0; 870 ad->new_batch = 0;
874 } 871 }
875 872
@@ -960,7 +957,7 @@ static inline int as_batch_expired(struct as_data *ad)
960 if (ad->changed_batch || ad->new_batch) 957 if (ad->changed_batch || ad->new_batch)
961 return 0; 958 return 0;
962 959
963 if (ad->batch_data_dir == REQ_SYNC) 960 if (ad->batch_data_dir == BLK_RW_SYNC)
964 /* TODO! add a check so a complete fifo gets written? */ 961 /* TODO! add a check so a complete fifo gets written? */
965 return time_after(jiffies, ad->current_batch_expires); 962 return time_after(jiffies, ad->current_batch_expires);
966 963
@@ -986,7 +983,7 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
986 */ 983 */
987 ad->last_sector[data_dir] = rq->sector + rq->nr_sectors; 984 ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
988 985
989 if (data_dir == REQ_SYNC) { 986 if (data_dir == BLK_RW_SYNC) {
990 struct io_context *ioc = RQ_IOC(rq); 987 struct io_context *ioc = RQ_IOC(rq);
991 /* In case we have to anticipate after this */ 988 /* In case we have to anticipate after this */
992 copy_io_context(&ad->io_context, &ioc); 989 copy_io_context(&ad->io_context, &ioc);
@@ -1025,41 +1022,41 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
1025static int as_dispatch_request(struct request_queue *q, int force) 1022static int as_dispatch_request(struct request_queue *q, int force)
1026{ 1023{
1027 struct as_data *ad = q->elevator->elevator_data; 1024 struct as_data *ad = q->elevator->elevator_data;
1028 const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]); 1025 const int reads = !list_empty(&ad->fifo_list[BLK_RW_SYNC]);
1029 const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]); 1026 const int writes = !list_empty(&ad->fifo_list[BLK_RW_ASYNC]);
1030 struct request *rq; 1027 struct request *rq;
1031 1028
1032 if (unlikely(force)) { 1029 if (unlikely(force)) {
1033 /* 1030 /*
1034 * Forced dispatch, accounting is useless. Reset 1031 * Forced dispatch, accounting is useless. Reset
1035 * accounting states and dump fifo_lists. Note that 1032 * accounting states and dump fifo_lists. Note that
1036 * batch_data_dir is reset to REQ_SYNC to avoid 1033 * batch_data_dir is reset to BLK_RW_SYNC to avoid
1037 * screwing write batch accounting as write batch 1034 * screwing write batch accounting as write batch
1038 * accounting occurs on W->R transition. 1035 * accounting occurs on W->R transition.
1039 */ 1036 */
1040 int dispatched = 0; 1037 int dispatched = 0;
1041 1038
1042 ad->batch_data_dir = REQ_SYNC; 1039 ad->batch_data_dir = BLK_RW_SYNC;
1043 ad->changed_batch = 0; 1040 ad->changed_batch = 0;
1044 ad->new_batch = 0; 1041 ad->new_batch = 0;
1045 1042
1046 while (ad->next_rq[REQ_SYNC]) { 1043 while (ad->next_rq[BLK_RW_SYNC]) {
1047 as_move_to_dispatch(ad, ad->next_rq[REQ_SYNC]); 1044 as_move_to_dispatch(ad, ad->next_rq[BLK_RW_SYNC]);
1048 dispatched++; 1045 dispatched++;
1049 } 1046 }
1050 ad->last_check_fifo[REQ_SYNC] = jiffies; 1047 ad->last_check_fifo[BLK_RW_SYNC] = jiffies;
1051 1048
1052 while (ad->next_rq[REQ_ASYNC]) { 1049 while (ad->next_rq[BLK_RW_ASYNC]) {
1053 as_move_to_dispatch(ad, ad->next_rq[REQ_ASYNC]); 1050 as_move_to_dispatch(ad, ad->next_rq[BLK_RW_ASYNC]);
1054 dispatched++; 1051 dispatched++;
1055 } 1052 }
1056 ad->last_check_fifo[REQ_ASYNC] = jiffies; 1053 ad->last_check_fifo[BLK_RW_ASYNC] = jiffies;
1057 1054
1058 return dispatched; 1055 return dispatched;
1059 } 1056 }
1060 1057
1061 /* Signal that the write batch was uncontended, so we can't time it */ 1058 /* Signal that the write batch was uncontended, so we can't time it */
1062 if (ad->batch_data_dir == REQ_ASYNC && !reads) { 1059 if (ad->batch_data_dir == BLK_RW_ASYNC && !reads) {
1063 if (ad->current_write_count == 0 || !writes) 1060 if (ad->current_write_count == 0 || !writes)
1064 ad->write_batch_idled = 1; 1061 ad->write_batch_idled = 1;
1065 } 1062 }
@@ -1076,8 +1073,8 @@ static int as_dispatch_request(struct request_queue *q, int force)
1076 */ 1073 */
1077 rq = ad->next_rq[ad->batch_data_dir]; 1074 rq = ad->next_rq[ad->batch_data_dir];
1078 1075
1079 if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) { 1076 if (ad->batch_data_dir == BLK_RW_SYNC && ad->antic_expire) {
1080 if (as_fifo_expired(ad, REQ_SYNC)) 1077 if (as_fifo_expired(ad, BLK_RW_SYNC))
1081 goto fifo_expired; 1078 goto fifo_expired;
1082 1079
1083 if (as_can_anticipate(ad, rq)) { 1080 if (as_can_anticipate(ad, rq)) {
@@ -1090,7 +1087,7 @@ static int as_dispatch_request(struct request_queue *q, int force)
1090 /* we have a "next request" */ 1087 /* we have a "next request" */
1091 if (reads && !writes) 1088 if (reads && !writes)
1092 ad->current_batch_expires = 1089 ad->current_batch_expires =
1093 jiffies + ad->batch_expire[REQ_SYNC]; 1090 jiffies + ad->batch_expire[BLK_RW_SYNC];
1094 goto dispatch_request; 1091 goto dispatch_request;
1095 } 1092 }
1096 } 1093 }
@@ -1101,20 +1098,20 @@ static int as_dispatch_request(struct request_queue *q, int force)
1101 */ 1098 */
1102 1099
1103 if (reads) { 1100 if (reads) {
1104 BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[REQ_SYNC])); 1101 BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[BLK_RW_SYNC]));
1105 1102
1106 if (writes && ad->batch_data_dir == REQ_SYNC) 1103 if (writes && ad->batch_data_dir == BLK_RW_SYNC)
1107 /* 1104 /*
1108 * Last batch was a read, switch to writes 1105 * Last batch was a read, switch to writes
1109 */ 1106 */
1110 goto dispatch_writes; 1107 goto dispatch_writes;
1111 1108
1112 if (ad->batch_data_dir == REQ_ASYNC) { 1109 if (ad->batch_data_dir == BLK_RW_ASYNC) {
1113 WARN_ON(ad->new_batch); 1110 WARN_ON(ad->new_batch);
1114 ad->changed_batch = 1; 1111 ad->changed_batch = 1;
1115 } 1112 }
1116 ad->batch_data_dir = REQ_SYNC; 1113 ad->batch_data_dir = BLK_RW_SYNC;
1117 rq = rq_entry_fifo(ad->fifo_list[REQ_SYNC].next); 1114 rq = rq_entry_fifo(ad->fifo_list[BLK_RW_SYNC].next);
1118 ad->last_check_fifo[ad->batch_data_dir] = jiffies; 1115 ad->last_check_fifo[ad->batch_data_dir] = jiffies;
1119 goto dispatch_request; 1116 goto dispatch_request;
1120 } 1117 }
@@ -1125,9 +1122,9 @@ static int as_dispatch_request(struct request_queue *q, int force)
1125 1122
1126 if (writes) { 1123 if (writes) {
1127dispatch_writes: 1124dispatch_writes:
1128 BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[REQ_ASYNC])); 1125 BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[BLK_RW_ASYNC]));
1129 1126
1130 if (ad->batch_data_dir == REQ_SYNC) { 1127 if (ad->batch_data_dir == BLK_RW_SYNC) {
1131 ad->changed_batch = 1; 1128 ad->changed_batch = 1;
1132 1129
1133 /* 1130 /*
@@ -1137,11 +1134,11 @@ dispatch_writes:
1137 */ 1134 */
1138 ad->new_batch = 0; 1135 ad->new_batch = 0;
1139 } 1136 }
1140 ad->batch_data_dir = REQ_ASYNC; 1137 ad->batch_data_dir = BLK_RW_ASYNC;
1141 ad->current_write_count = ad->write_batch_count; 1138 ad->current_write_count = ad->write_batch_count;
1142 ad->write_batch_idled = 0; 1139 ad->write_batch_idled = 0;
1143 rq = rq_entry_fifo(ad->fifo_list[REQ_ASYNC].next); 1140 rq = rq_entry_fifo(ad->fifo_list[BLK_RW_ASYNC].next);
1144 ad->last_check_fifo[REQ_ASYNC] = jiffies; 1141 ad->last_check_fifo[BLK_RW_ASYNC] = jiffies;
1145 goto dispatch_request; 1142 goto dispatch_request;
1146 } 1143 }
1147 1144
@@ -1164,9 +1161,9 @@ fifo_expired:
1164 if (ad->nr_dispatched) 1161 if (ad->nr_dispatched)
1165 return 0; 1162 return 0;
1166 1163
1167 if (ad->batch_data_dir == REQ_ASYNC) 1164 if (ad->batch_data_dir == BLK_RW_ASYNC)
1168 ad->current_batch_expires = jiffies + 1165 ad->current_batch_expires = jiffies +
1169 ad->batch_expire[REQ_ASYNC]; 1166 ad->batch_expire[BLK_RW_ASYNC];
1170 else 1167 else
1171 ad->new_batch = 1; 1168 ad->new_batch = 1;
1172 1169
@@ -1238,8 +1235,8 @@ static int as_queue_empty(struct request_queue *q)
1238{ 1235{
1239 struct as_data *ad = q->elevator->elevator_data; 1236 struct as_data *ad = q->elevator->elevator_data;
1240 1237
1241 return list_empty(&ad->fifo_list[REQ_ASYNC]) 1238 return list_empty(&ad->fifo_list[BLK_RW_ASYNC])
1242 && list_empty(&ad->fifo_list[REQ_SYNC]); 1239 && list_empty(&ad->fifo_list[BLK_RW_SYNC]);
1243} 1240}
1244 1241
1245static int 1242static int
@@ -1346,8 +1343,8 @@ static void as_exit_queue(struct elevator_queue *e)
1346 del_timer_sync(&ad->antic_timer); 1343 del_timer_sync(&ad->antic_timer);
1347 cancel_work_sync(&ad->antic_work); 1344 cancel_work_sync(&ad->antic_work);
1348 1345
1349 BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); 1346 BUG_ON(!list_empty(&ad->fifo_list[BLK_RW_SYNC]));
1350 BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); 1347 BUG_ON(!list_empty(&ad->fifo_list[BLK_RW_ASYNC]));
1351 1348
1352 put_io_context(ad->io_context); 1349 put_io_context(ad->io_context);
1353 kfree(ad); 1350 kfree(ad);
@@ -1372,18 +1369,18 @@ static void *as_init_queue(struct request_queue *q)
1372 init_timer(&ad->antic_timer); 1369 init_timer(&ad->antic_timer);
1373 INIT_WORK(&ad->antic_work, as_work_handler); 1370 INIT_WORK(&ad->antic_work, as_work_handler);
1374 1371
1375 INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]); 1372 INIT_LIST_HEAD(&ad->fifo_list[BLK_RW_SYNC]);
1376 INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]); 1373 INIT_LIST_HEAD(&ad->fifo_list[BLK_RW_ASYNC]);
1377 ad->sort_list[REQ_SYNC] = RB_ROOT; 1374 ad->sort_list[BLK_RW_SYNC] = RB_ROOT;
1378 ad->sort_list[REQ_ASYNC] = RB_ROOT; 1375 ad->sort_list[BLK_RW_ASYNC] = RB_ROOT;
1379 ad->fifo_expire[REQ_SYNC] = default_read_expire; 1376 ad->fifo_expire[BLK_RW_SYNC] = default_read_expire;
1380 ad->fifo_expire[REQ_ASYNC] = default_write_expire; 1377 ad->fifo_expire[BLK_RW_ASYNC] = default_write_expire;
1381 ad->antic_expire = default_antic_expire; 1378 ad->antic_expire = default_antic_expire;
1382 ad->batch_expire[REQ_SYNC] = default_read_batch_expire; 1379 ad->batch_expire[BLK_RW_SYNC] = default_read_batch_expire;
1383 ad->batch_expire[REQ_ASYNC] = default_write_batch_expire; 1380 ad->batch_expire[BLK_RW_ASYNC] = default_write_batch_expire;
1384 1381
1385 ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; 1382 ad->current_batch_expires = jiffies + ad->batch_expire[BLK_RW_SYNC];
1386 ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10; 1383 ad->write_batch_count = ad->batch_expire[BLK_RW_ASYNC] / 10;
1387 if (ad->write_batch_count < 2) 1384 if (ad->write_batch_count < 2)
1388 ad->write_batch_count = 2; 1385 ad->write_batch_count = 2;
1389 1386
@@ -1432,11 +1429,11 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page) \
1432 struct as_data *ad = e->elevator_data; \ 1429 struct as_data *ad = e->elevator_data; \
1433 return as_var_show(jiffies_to_msecs((__VAR)), (page)); \ 1430 return as_var_show(jiffies_to_msecs((__VAR)), (page)); \
1434} 1431}
1435SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[REQ_SYNC]); 1432SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[BLK_RW_SYNC]);
1436SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[REQ_ASYNC]); 1433SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[BLK_RW_ASYNC]);
1437SHOW_FUNCTION(as_antic_expire_show, ad->antic_expire); 1434SHOW_FUNCTION(as_antic_expire_show, ad->antic_expire);
1438SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[REQ_SYNC]); 1435SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[BLK_RW_SYNC]);
1439SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]); 1436SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[BLK_RW_ASYNC]);
1440#undef SHOW_FUNCTION 1437#undef SHOW_FUNCTION
1441 1438
1442#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ 1439#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
@@ -1451,13 +1448,14 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
1451 *(__PTR) = msecs_to_jiffies(*(__PTR)); \ 1448 *(__PTR) = msecs_to_jiffies(*(__PTR)); \
1452 return ret; \ 1449 return ret; \
1453} 1450}
1454STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX); 1451STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[BLK_RW_SYNC], 0, INT_MAX);
1455STORE_FUNCTION(as_write_expire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX); 1452STORE_FUNCTION(as_write_expire_store,
1453 &ad->fifo_expire[BLK_RW_ASYNC], 0, INT_MAX);
1456STORE_FUNCTION(as_antic_expire_store, &ad->antic_expire, 0, INT_MAX); 1454STORE_FUNCTION(as_antic_expire_store, &ad->antic_expire, 0, INT_MAX);
1457STORE_FUNCTION(as_read_batch_expire_store, 1455STORE_FUNCTION(as_read_batch_expire_store,
1458 &ad->batch_expire[REQ_SYNC], 0, INT_MAX); 1456 &ad->batch_expire[BLK_RW_SYNC], 0, INT_MAX);
1459STORE_FUNCTION(as_write_batch_expire_store, 1457STORE_FUNCTION(as_write_batch_expire_store,
1460 &ad->batch_expire[REQ_ASYNC], 0, INT_MAX); 1458 &ad->batch_expire[BLK_RW_ASYNC], 0, INT_MAX);
1461#undef STORE_FUNCTION 1459#undef STORE_FUNCTION
1462 1460
1463#define AS_ATTR(name) \ 1461#define AS_ATTR(name) \
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index f7dae57e6cab..20b4111fa050 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -319,9 +319,6 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
319 return -ENXIO; 319 return -ENXIO;
320 320
321 bio = bio_alloc(GFP_KERNEL, 0); 321 bio = bio_alloc(GFP_KERNEL, 0);
322 if (!bio)
323 return -ENOMEM;
324
325 bio->bi_end_io = bio_end_empty_barrier; 322 bio->bi_end_io = bio_end_empty_barrier;
326 bio->bi_private = &wait; 323 bio->bi_private = &wait;
327 bio->bi_bdev = bdev; 324 bio->bi_bdev = bdev;
diff --git a/block/blk-core.c b/block/blk-core.c
index 25572802dac2..07ab75403e1a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -64,12 +64,11 @@ static struct workqueue_struct *kblockd_workqueue;
64 64
65static void drive_stat_acct(struct request *rq, int new_io) 65static void drive_stat_acct(struct request *rq, int new_io)
66{ 66{
67 struct gendisk *disk = rq->rq_disk;
68 struct hd_struct *part; 67 struct hd_struct *part;
69 int rw = rq_data_dir(rq); 68 int rw = rq_data_dir(rq);
70 int cpu; 69 int cpu;
71 70
72 if (!blk_fs_request(rq) || !disk || !blk_do_io_stat(disk->queue)) 71 if (!blk_fs_request(rq) || !blk_do_io_stat(rq))
73 return; 72 return;
74 73
75 cpu = part_stat_lock(); 74 cpu = part_stat_lock();
@@ -132,6 +131,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
132 INIT_HLIST_NODE(&rq->hash); 131 INIT_HLIST_NODE(&rq->hash);
133 RB_CLEAR_NODE(&rq->rb_node); 132 RB_CLEAR_NODE(&rq->rb_node);
134 rq->cmd = rq->__cmd; 133 rq->cmd = rq->__cmd;
134 rq->cmd_len = BLK_MAX_CDB;
135 rq->tag = -1; 135 rq->tag = -1;
136 rq->ref_count = 1; 136 rq->ref_count = 1;
137} 137}
@@ -1124,8 +1124,6 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1124 1124
1125 if (bio_sync(bio)) 1125 if (bio_sync(bio))
1126 req->cmd_flags |= REQ_RW_SYNC; 1126 req->cmd_flags |= REQ_RW_SYNC;
1127 if (bio_unplug(bio))
1128 req->cmd_flags |= REQ_UNPLUG;
1129 if (bio_rw_meta(bio)) 1127 if (bio_rw_meta(bio))
1130 req->cmd_flags |= REQ_RW_META; 1128 req->cmd_flags |= REQ_RW_META;
1131 if (bio_noidle(bio)) 1129 if (bio_noidle(bio))
@@ -1675,9 +1673,7 @@ EXPORT_SYMBOL(blkdev_dequeue_request);
1675 1673
1676static void blk_account_io_completion(struct request *req, unsigned int bytes) 1674static void blk_account_io_completion(struct request *req, unsigned int bytes)
1677{ 1675{
1678 struct gendisk *disk = req->rq_disk; 1676 if (!blk_do_io_stat(req))
1679
1680 if (!disk || !blk_do_io_stat(disk->queue))
1681 return; 1677 return;
1682 1678
1683 if (blk_fs_request(req)) { 1679 if (blk_fs_request(req)) {
@@ -1694,9 +1690,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
1694 1690
1695static void blk_account_io_done(struct request *req) 1691static void blk_account_io_done(struct request *req)
1696{ 1692{
1697 struct gendisk *disk = req->rq_disk; 1693 if (!blk_do_io_stat(req))
1698
1699 if (!disk || !blk_do_io_stat(disk->queue))
1700 return; 1694 return;
1701 1695
1702 /* 1696 /*
@@ -1711,7 +1705,7 @@ static void blk_account_io_done(struct request *req)
1711 int cpu; 1705 int cpu;
1712 1706
1713 cpu = part_stat_lock(); 1707 cpu = part_stat_lock();
1714 part = disk_map_sector_rcu(disk, req->sector); 1708 part = disk_map_sector_rcu(req->rq_disk, req->sector);
1715 1709
1716 part_stat_inc(cpu, part, ios[rw]); 1710 part_stat_inc(cpu, part, ios[rw]);
1717 part_stat_add(cpu, part, ticks[rw], duration); 1711 part_stat_add(cpu, part, ticks[rw], duration);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index e39cb24b7679..63760ca3da0f 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -338,6 +338,22 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
338 return 1; 338 return 1;
339} 339}
340 340
341static void blk_account_io_merge(struct request *req)
342{
343 if (blk_do_io_stat(req)) {
344 struct hd_struct *part;
345 int cpu;
346
347 cpu = part_stat_lock();
348 part = disk_map_sector_rcu(req->rq_disk, req->sector);
349
350 part_round_stats(cpu, part);
351 part_dec_in_flight(part);
352
353 part_stat_unlock();
354 }
355}
356
341/* 357/*
342 * Has to be called with the request spinlock acquired 358 * Has to be called with the request spinlock acquired
343 */ 359 */
@@ -386,18 +402,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
386 402
387 elv_merge_requests(q, req, next); 403 elv_merge_requests(q, req, next);
388 404
389 if (req->rq_disk) { 405 blk_account_io_merge(req);
390 struct hd_struct *part;
391 int cpu;
392
393 cpu = part_stat_lock();
394 part = disk_map_sector_rcu(req->rq_disk, req->sector);
395
396 part_round_stats(cpu, part);
397 part_dec_in_flight(part);
398
399 part_stat_unlock();
400 }
401 406
402 req->ioprio = ioprio_best(req->ioprio, next->ioprio); 407 req->ioprio = ioprio_best(req->ioprio, next->ioprio);
403 if (blk_rq_cpu_valid(next)) 408 if (blk_rq_cpu_valid(next))
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 59fd05d9f1d5..69c42adde52b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -431,7 +431,7 @@ EXPORT_SYMBOL(blk_queue_segment_boundary);
431 * 431 *
432 * description: 432 * description:
433 * set required memory and length alignment for direct dma transactions. 433 * set required memory and length alignment for direct dma transactions.
434 * this is used when buiding direct io requests for the queue. 434 * this is used when building direct io requests for the queue.
435 * 435 *
436 **/ 436 **/
437void blk_queue_dma_alignment(struct request_queue *q, int mask) 437void blk_queue_dma_alignment(struct request_queue *q, int mask)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3ff9bba3379a..cac4e9febe6a 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -209,10 +209,14 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
209 ssize_t ret = queue_var_store(&stats, page, count); 209 ssize_t ret = queue_var_store(&stats, page, count);
210 210
211 spin_lock_irq(q->queue_lock); 211 spin_lock_irq(q->queue_lock);
212 elv_quiesce_start(q);
213
212 if (stats) 214 if (stats)
213 queue_flag_set(QUEUE_FLAG_IO_STAT, q); 215 queue_flag_set(QUEUE_FLAG_IO_STAT, q);
214 else 216 else
215 queue_flag_clear(QUEUE_FLAG_IO_STAT, q); 217 queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
218
219 elv_quiesce_end(q);
216 spin_unlock_irq(q->queue_lock); 220 spin_unlock_irq(q->queue_lock);
217 221
218 return ret; 222 return ret;
diff --git a/block/blk.h b/block/blk.h
index 3ee94358b43d..5dfc41267a08 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -70,6 +70,10 @@ void blk_queue_congestion_threshold(struct request_queue *q);
70 70
71int blk_dev_init(void); 71int blk_dev_init(void);
72 72
73void elv_quiesce_start(struct request_queue *q);
74void elv_quiesce_end(struct request_queue *q);
75
76
73/* 77/*
74 * Return the threshold (number of used requests) at which the queue is 78 * Return the threshold (number of used requests) at which the queue is
75 * considered to be congested. It include a little hysteresis to keep the 79 * considered to be congested. It include a little hysteresis to keep the
@@ -108,12 +112,14 @@ static inline int blk_cpu_to_group(int cpu)
108#endif 112#endif
109} 113}
110 114
111static inline int blk_do_io_stat(struct request_queue *q) 115static inline int blk_do_io_stat(struct request *rq)
112{ 116{
113 if (q) 117 struct gendisk *disk = rq->rq_disk;
114 return blk_queue_io_stat(q);
115 118
116 return 0; 119 if (!disk || !disk->queue)
120 return 0;
121
122 return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV);
117} 123}
118 124
119#endif 125#endif
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 9e809345f71a..0d3b70de3d80 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -56,9 +56,6 @@ static DEFINE_SPINLOCK(ioc_gone_lock);
56#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 56#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
57#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) 57#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
58 58
59#define ASYNC (0)
60#define SYNC (1)
61
62#define sample_valid(samples) ((samples) > 80) 59#define sample_valid(samples) ((samples) > 80)
63 60
64/* 61/*
@@ -83,6 +80,14 @@ struct cfq_data {
83 * rr list of queues with requests and the count of them 80 * rr list of queues with requests and the count of them
84 */ 81 */
85 struct cfq_rb_root service_tree; 82 struct cfq_rb_root service_tree;
83
84 /*
85 * Each priority tree is sorted by next_request position. These
86 * trees are used when determining if two or more queues are
87 * interleaving requests (see cfq_close_cooperator).
88 */
89 struct rb_root prio_trees[CFQ_PRIO_LISTS];
90
86 unsigned int busy_queues; 91 unsigned int busy_queues;
87 /* 92 /*
88 * Used to track any pending rt requests so we can pre-empt current 93 * Used to track any pending rt requests so we can pre-empt current
@@ -147,6 +152,8 @@ struct cfq_queue {
147 struct rb_node rb_node; 152 struct rb_node rb_node;
148 /* service_tree key */ 153 /* service_tree key */
149 unsigned long rb_key; 154 unsigned long rb_key;
155 /* prio tree member */
156 struct rb_node p_node;
150 /* sorted list of pending requests */ 157 /* sorted list of pending requests */
151 struct rb_root sort_list; 158 struct rb_root sort_list;
152 /* if fifo isn't expired, next request to serve */ 159 /* if fifo isn't expired, next request to serve */
@@ -160,6 +167,7 @@ struct cfq_queue {
160 167
161 unsigned long slice_end; 168 unsigned long slice_end;
162 long slice_resid; 169 long slice_resid;
170 unsigned int slice_dispatch;
163 171
164 /* pending metadata requests */ 172 /* pending metadata requests */
165 int meta_pending; 173 int meta_pending;
@@ -176,15 +184,15 @@ struct cfq_queue {
176enum cfqq_state_flags { 184enum cfqq_state_flags {
177 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */ 185 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */
178 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */ 186 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */
187 CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */
179 CFQ_CFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ 188 CFQ_CFQQ_FLAG_must_alloc, /* must be allowed rq alloc */
180 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */ 189 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */
181 CFQ_CFQQ_FLAG_must_dispatch, /* must dispatch, even if expired */
182 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ 190 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */
183 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */ 191 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */
184 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ 192 CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */
185 CFQ_CFQQ_FLAG_queue_new, /* queue never been serviced */
186 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ 193 CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */
187 CFQ_CFQQ_FLAG_sync, /* synchronous queue */ 194 CFQ_CFQQ_FLAG_sync, /* synchronous queue */
195 CFQ_CFQQ_FLAG_coop, /* has done a coop jump of the queue */
188}; 196};
189 197
190#define CFQ_CFQQ_FNS(name) \ 198#define CFQ_CFQQ_FNS(name) \
@@ -203,15 +211,15 @@ static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
203 211
204CFQ_CFQQ_FNS(on_rr); 212CFQ_CFQQ_FNS(on_rr);
205CFQ_CFQQ_FNS(wait_request); 213CFQ_CFQQ_FNS(wait_request);
214CFQ_CFQQ_FNS(must_dispatch);
206CFQ_CFQQ_FNS(must_alloc); 215CFQ_CFQQ_FNS(must_alloc);
207CFQ_CFQQ_FNS(must_alloc_slice); 216CFQ_CFQQ_FNS(must_alloc_slice);
208CFQ_CFQQ_FNS(must_dispatch);
209CFQ_CFQQ_FNS(fifo_expire); 217CFQ_CFQQ_FNS(fifo_expire);
210CFQ_CFQQ_FNS(idle_window); 218CFQ_CFQQ_FNS(idle_window);
211CFQ_CFQQ_FNS(prio_changed); 219CFQ_CFQQ_FNS(prio_changed);
212CFQ_CFQQ_FNS(queue_new);
213CFQ_CFQQ_FNS(slice_new); 220CFQ_CFQQ_FNS(slice_new);
214CFQ_CFQQ_FNS(sync); 221CFQ_CFQQ_FNS(sync);
222CFQ_CFQQ_FNS(coop);
215#undef CFQ_CFQQ_FNS 223#undef CFQ_CFQQ_FNS
216 224
217#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ 225#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
@@ -420,13 +428,17 @@ static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root)
420 return NULL; 428 return NULL;
421} 429}
422 430
431static void rb_erase_init(struct rb_node *n, struct rb_root *root)
432{
433 rb_erase(n, root);
434 RB_CLEAR_NODE(n);
435}
436
423static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) 437static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
424{ 438{
425 if (root->left == n) 439 if (root->left == n)
426 root->left = NULL; 440 root->left = NULL;
427 441 rb_erase_init(n, &root->rb);
428 rb_erase(n, &root->rb);
429 RB_CLEAR_NODE(n);
430} 442}
431 443
432/* 444/*
@@ -471,8 +483,8 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
471 * requests waiting to be processed. It is sorted in the order that 483 * requests waiting to be processed. It is sorted in the order that
472 * we will service the queues. 484 * we will service the queues.
473 */ 485 */
474static void cfq_service_tree_add(struct cfq_data *cfqd, 486static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
475 struct cfq_queue *cfqq, int add_front) 487 int add_front)
476{ 488{
477 struct rb_node **p, *parent; 489 struct rb_node **p, *parent;
478 struct cfq_queue *__cfqq; 490 struct cfq_queue *__cfqq;
@@ -545,6 +557,63 @@ static void cfq_service_tree_add(struct cfq_data *cfqd,
545 rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb); 557 rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb);
546} 558}
547 559
560static struct cfq_queue *
561cfq_prio_tree_lookup(struct cfq_data *cfqd, int ioprio, sector_t sector,
562 struct rb_node **ret_parent, struct rb_node ***rb_link)
563{
564 struct rb_root *root = &cfqd->prio_trees[ioprio];
565 struct rb_node **p, *parent;
566 struct cfq_queue *cfqq = NULL;
567
568 parent = NULL;
569 p = &root->rb_node;
570 while (*p) {
571 struct rb_node **n;
572
573 parent = *p;
574 cfqq = rb_entry(parent, struct cfq_queue, p_node);
575
576 /*
577 * Sort strictly based on sector. Smallest to the left,
578 * largest to the right.
579 */
580 if (sector > cfqq->next_rq->sector)
581 n = &(*p)->rb_right;
582 else if (sector < cfqq->next_rq->sector)
583 n = &(*p)->rb_left;
584 else
585 break;
586 p = n;
587 }
588
589 *ret_parent = parent;
590 if (rb_link)
591 *rb_link = p;
592 return NULL;
593}
594
595static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
596{
597 struct rb_root *root = &cfqd->prio_trees[cfqq->ioprio];
598 struct rb_node **p, *parent;
599 struct cfq_queue *__cfqq;
600
601 if (!RB_EMPTY_NODE(&cfqq->p_node))
602 rb_erase_init(&cfqq->p_node, root);
603
604 if (cfq_class_idle(cfqq))
605 return;
606 if (!cfqq->next_rq)
607 return;
608
609 __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->ioprio, cfqq->next_rq->sector,
610 &parent, &p);
611 BUG_ON(__cfqq);
612
613 rb_link_node(&cfqq->p_node, parent, p);
614 rb_insert_color(&cfqq->p_node, root);
615}
616
548/* 617/*
549 * Update cfqq's position in the service tree. 618 * Update cfqq's position in the service tree.
550 */ 619 */
@@ -553,8 +622,10 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
553 /* 622 /*
554 * Resorting requires the cfqq to be on the RR list already. 623 * Resorting requires the cfqq to be on the RR list already.
555 */ 624 */
556 if (cfq_cfqq_on_rr(cfqq)) 625 if (cfq_cfqq_on_rr(cfqq)) {
557 cfq_service_tree_add(cfqd, cfqq, 0); 626 cfq_service_tree_add(cfqd, cfqq, 0);
627 cfq_prio_tree_add(cfqd, cfqq);
628 }
558} 629}
559 630
560/* 631/*
@@ -585,6 +656,8 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
585 656
586 if (!RB_EMPTY_NODE(&cfqq->rb_node)) 657 if (!RB_EMPTY_NODE(&cfqq->rb_node))
587 cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); 658 cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
659 if (!RB_EMPTY_NODE(&cfqq->p_node))
660 rb_erase_init(&cfqq->p_node, &cfqd->prio_trees[cfqq->ioprio]);
588 661
589 BUG_ON(!cfqd->busy_queues); 662 BUG_ON(!cfqd->busy_queues);
590 cfqd->busy_queues--; 663 cfqd->busy_queues--;
@@ -614,7 +687,7 @@ static void cfq_add_rq_rb(struct request *rq)
614{ 687{
615 struct cfq_queue *cfqq = RQ_CFQQ(rq); 688 struct cfq_queue *cfqq = RQ_CFQQ(rq);
616 struct cfq_data *cfqd = cfqq->cfqd; 689 struct cfq_data *cfqd = cfqq->cfqd;
617 struct request *__alias; 690 struct request *__alias, *prev;
618 691
619 cfqq->queued[rq_is_sync(rq)]++; 692 cfqq->queued[rq_is_sync(rq)]++;
620 693
@@ -631,7 +704,15 @@ static void cfq_add_rq_rb(struct request *rq)
631 /* 704 /*
632 * check if this request is a better next-serve candidate 705 * check if this request is a better next-serve candidate
633 */ 706 */
707 prev = cfqq->next_rq;
634 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq); 708 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
709
710 /*
711 * adjust priority tree position, if ->next_rq changes
712 */
713 if (prev != cfqq->next_rq)
714 cfq_prio_tree_add(cfqd, cfqq);
715
635 BUG_ON(!cfqq->next_rq); 716 BUG_ON(!cfqq->next_rq);
636} 717}
637 718
@@ -774,10 +855,15 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
774 if (cfqq) { 855 if (cfqq) {
775 cfq_log_cfqq(cfqd, cfqq, "set_active"); 856 cfq_log_cfqq(cfqd, cfqq, "set_active");
776 cfqq->slice_end = 0; 857 cfqq->slice_end = 0;
858 cfqq->slice_dispatch = 0;
859
860 cfq_clear_cfqq_wait_request(cfqq);
861 cfq_clear_cfqq_must_dispatch(cfqq);
777 cfq_clear_cfqq_must_alloc_slice(cfqq); 862 cfq_clear_cfqq_must_alloc_slice(cfqq);
778 cfq_clear_cfqq_fifo_expire(cfqq); 863 cfq_clear_cfqq_fifo_expire(cfqq);
779 cfq_mark_cfqq_slice_new(cfqq); 864 cfq_mark_cfqq_slice_new(cfqq);
780 cfq_clear_cfqq_queue_new(cfqq); 865
866 del_timer(&cfqd->idle_slice_timer);
781 } 867 }
782 868
783 cfqd->active_queue = cfqq; 869 cfqd->active_queue = cfqq;
@@ -795,7 +881,6 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
795 if (cfq_cfqq_wait_request(cfqq)) 881 if (cfq_cfqq_wait_request(cfqq))
796 del_timer(&cfqd->idle_slice_timer); 882 del_timer(&cfqd->idle_slice_timer);
797 883
798 cfq_clear_cfqq_must_dispatch(cfqq);
799 cfq_clear_cfqq_wait_request(cfqq); 884 cfq_clear_cfqq_wait_request(cfqq);
800 885
801 /* 886 /*
@@ -840,11 +925,15 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
840/* 925/*
841 * Get and set a new active queue for service. 926 * Get and set a new active queue for service.
842 */ 927 */
843static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) 928static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
929 struct cfq_queue *cfqq)
844{ 930{
845 struct cfq_queue *cfqq; 931 if (!cfqq) {
932 cfqq = cfq_get_next_queue(cfqd);
933 if (cfqq)
934 cfq_clear_cfqq_coop(cfqq);
935 }
846 936
847 cfqq = cfq_get_next_queue(cfqd);
848 __cfq_set_active_queue(cfqd, cfqq); 937 __cfq_set_active_queue(cfqd, cfqq);
849 return cfqq; 938 return cfqq;
850} 939}
@@ -868,17 +957,89 @@ static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq)
868 return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean; 957 return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean;
869} 958}
870 959
871static int cfq_close_cooperator(struct cfq_data *cfq_data, 960static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
872 struct cfq_queue *cfqq) 961 struct cfq_queue *cur_cfqq)
873{ 962{
963 struct rb_root *root = &cfqd->prio_trees[cur_cfqq->ioprio];
964 struct rb_node *parent, *node;
965 struct cfq_queue *__cfqq;
966 sector_t sector = cfqd->last_position;
967
968 if (RB_EMPTY_ROOT(root))
969 return NULL;
970
971 /*
972 * First, if we find a request starting at the end of the last
973 * request, choose it.
974 */
975 __cfqq = cfq_prio_tree_lookup(cfqd, cur_cfqq->ioprio,
976 sector, &parent, NULL);
977 if (__cfqq)
978 return __cfqq;
979
980 /*
981 * If the exact sector wasn't found, the parent of the NULL leaf
982 * will contain the closest sector.
983 */
984 __cfqq = rb_entry(parent, struct cfq_queue, p_node);
985 if (cfq_rq_close(cfqd, __cfqq->next_rq))
986 return __cfqq;
987
988 if (__cfqq->next_rq->sector < sector)
989 node = rb_next(&__cfqq->p_node);
990 else
991 node = rb_prev(&__cfqq->p_node);
992 if (!node)
993 return NULL;
994
995 __cfqq = rb_entry(node, struct cfq_queue, p_node);
996 if (cfq_rq_close(cfqd, __cfqq->next_rq))
997 return __cfqq;
998
999 return NULL;
1000}
1001
1002/*
1003 * cfqd - obvious
1004 * cur_cfqq - passed in so that we don't decide that the current queue is
1005 * closely cooperating with itself.
1006 *
1007 * So, basically we're assuming that that cur_cfqq has dispatched at least
1008 * one request, and that cfqd->last_position reflects a position on the disk
1009 * associated with the I/O issued by cur_cfqq. I'm not sure this is a valid
1010 * assumption.
1011 */
1012static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
1013 struct cfq_queue *cur_cfqq,
1014 int probe)
1015{
1016 struct cfq_queue *cfqq;
1017
1018 /*
1019 * A valid cfq_io_context is necessary to compare requests against
1020 * the seek_mean of the current cfqq.
1021 */
1022 if (!cfqd->active_cic)
1023 return NULL;
1024
874 /* 1025 /*
875 * We should notice if some of the queues are cooperating, eg 1026 * We should notice if some of the queues are cooperating, eg
876 * working closely on the same area of the disk. In that case, 1027 * working closely on the same area of the disk. In that case,
877 * we can group them together and don't waste time idling. 1028 * we can group them together and don't waste time idling.
878 */ 1029 */
879 return 0; 1030 cfqq = cfqq_close(cfqd, cur_cfqq);
1031 if (!cfqq)
1032 return NULL;
1033
1034 if (cfq_cfqq_coop(cfqq))
1035 return NULL;
1036
1037 if (!probe)
1038 cfq_mark_cfqq_coop(cfqq);
1039 return cfqq;
880} 1040}
881 1041
1042
882#define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024)) 1043#define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024))
883 1044
884static void cfq_arm_slice_timer(struct cfq_data *cfqd) 1045static void cfq_arm_slice_timer(struct cfq_data *cfqd)
@@ -917,14 +1078,6 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
917 if (!cic || !atomic_read(&cic->ioc->nr_tasks)) 1078 if (!cic || !atomic_read(&cic->ioc->nr_tasks))
918 return; 1079 return;
919 1080
920 /*
921 * See if this prio level has a good candidate
922 */
923 if (cfq_close_cooperator(cfqd, cfqq) &&
924 (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2))
925 return;
926
927 cfq_mark_cfqq_must_dispatch(cfqq);
928 cfq_mark_cfqq_wait_request(cfqq); 1081 cfq_mark_cfqq_wait_request(cfqq);
929 1082
930 /* 1083 /*
@@ -937,7 +1090,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
937 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); 1090 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
938 1091
939 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 1092 mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
940 cfq_log(cfqd, "arm_idle: %lu", sl); 1093 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl);
941} 1094}
942 1095
943/* 1096/*
@@ -1001,7 +1154,7 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1001 */ 1154 */
1002static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) 1155static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1003{ 1156{
1004 struct cfq_queue *cfqq; 1157 struct cfq_queue *cfqq, *new_cfqq = NULL;
1005 1158
1006 cfqq = cfqd->active_queue; 1159 cfqq = cfqd->active_queue;
1007 if (!cfqq) 1160 if (!cfqq)
@@ -1010,7 +1163,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1010 /* 1163 /*
1011 * The active queue has run out of time, expire it and select new. 1164 * The active queue has run out of time, expire it and select new.
1012 */ 1165 */
1013 if (cfq_slice_used(cfqq)) 1166 if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq))
1014 goto expire; 1167 goto expire;
1015 1168
1016 /* 1169 /*
@@ -1035,6 +1188,16 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1035 goto keep_queue; 1188 goto keep_queue;
1036 1189
1037 /* 1190 /*
1191 * If another queue has a request waiting within our mean seek
1192 * distance, let it run. The expire code will check for close
1193 * cooperators and put the close queue at the front of the service
1194 * tree.
1195 */
1196 new_cfqq = cfq_close_cooperator(cfqd, cfqq, 0);
1197 if (new_cfqq)
1198 goto expire;
1199
1200 /*
1038 * No requests pending. If the active queue still has requests in 1201 * No requests pending. If the active queue still has requests in
1039 * flight or is idling for a new request, allow either of these 1202 * flight or is idling for a new request, allow either of these
1040 * conditions to happen (or time out) before selecting a new queue. 1203 * conditions to happen (or time out) before selecting a new queue.
@@ -1048,71 +1211,11 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1048expire: 1211expire:
1049 cfq_slice_expired(cfqd, 0); 1212 cfq_slice_expired(cfqd, 0);
1050new_queue: 1213new_queue:
1051 cfqq = cfq_set_active_queue(cfqd); 1214 cfqq = cfq_set_active_queue(cfqd, new_cfqq);
1052keep_queue: 1215keep_queue:
1053 return cfqq; 1216 return cfqq;
1054} 1217}
1055 1218
1056/*
1057 * Dispatch some requests from cfqq, moving them to the request queue
1058 * dispatch list.
1059 */
1060static int
1061__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1062 int max_dispatch)
1063{
1064 int dispatched = 0;
1065
1066 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
1067
1068 do {
1069 struct request *rq;
1070
1071 /*
1072 * follow expired path, else get first next available
1073 */
1074 rq = cfq_check_fifo(cfqq);
1075 if (rq == NULL)
1076 rq = cfqq->next_rq;
1077
1078 /*
1079 * finally, insert request into driver dispatch list
1080 */
1081 cfq_dispatch_insert(cfqd->queue, rq);
1082
1083 dispatched++;
1084
1085 if (!cfqd->active_cic) {
1086 atomic_inc(&RQ_CIC(rq)->ioc->refcount);
1087 cfqd->active_cic = RQ_CIC(rq);
1088 }
1089
1090 if (RB_EMPTY_ROOT(&cfqq->sort_list))
1091 break;
1092
1093 /*
1094 * If there is a non-empty RT cfqq waiting for current
1095 * cfqq's timeslice to complete, pre-empt this cfqq
1096 */
1097 if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues)
1098 break;
1099
1100 } while (dispatched < max_dispatch);
1101
1102 /*
1103 * expire an async queue immediately if it has used up its slice. idle
1104 * queue always expire after 1 dispatch round.
1105 */
1106 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
1107 dispatched >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
1108 cfq_class_idle(cfqq))) {
1109 cfqq->slice_end = jiffies + 1;
1110 cfq_slice_expired(cfqd, 0);
1111 }
1112
1113 return dispatched;
1114}
1115
1116static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) 1219static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
1117{ 1220{
1118 int dispatched = 0; 1221 int dispatched = 0;
@@ -1146,11 +1249,45 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
1146 return dispatched; 1249 return dispatched;
1147} 1250}
1148 1251
1252/*
1253 * Dispatch a request from cfqq, moving them to the request queue
1254 * dispatch list.
1255 */
1256static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1257{
1258 struct request *rq;
1259
1260 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
1261
1262 /*
1263 * follow expired path, else get first next available
1264 */
1265 rq = cfq_check_fifo(cfqq);
1266 if (!rq)
1267 rq = cfqq->next_rq;
1268
1269 /*
1270 * insert request into driver dispatch list
1271 */
1272 cfq_dispatch_insert(cfqd->queue, rq);
1273
1274 if (!cfqd->active_cic) {
1275 struct cfq_io_context *cic = RQ_CIC(rq);
1276
1277 atomic_inc(&cic->ioc->refcount);
1278 cfqd->active_cic = cic;
1279 }
1280}
1281
1282/*
1283 * Find the cfqq that we need to service and move a request from that to the
1284 * dispatch list
1285 */
1149static int cfq_dispatch_requests(struct request_queue *q, int force) 1286static int cfq_dispatch_requests(struct request_queue *q, int force)
1150{ 1287{
1151 struct cfq_data *cfqd = q->elevator->elevator_data; 1288 struct cfq_data *cfqd = q->elevator->elevator_data;
1152 struct cfq_queue *cfqq; 1289 struct cfq_queue *cfqq;
1153 int dispatched; 1290 unsigned int max_dispatch;
1154 1291
1155 if (!cfqd->busy_queues) 1292 if (!cfqd->busy_queues)
1156 return 0; 1293 return 0;
@@ -1158,29 +1295,63 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
1158 if (unlikely(force)) 1295 if (unlikely(force))
1159 return cfq_forced_dispatch(cfqd); 1296 return cfq_forced_dispatch(cfqd);
1160 1297
1161 dispatched = 0; 1298 cfqq = cfq_select_queue(cfqd);
1162 while ((cfqq = cfq_select_queue(cfqd)) != NULL) { 1299 if (!cfqq)
1163 int max_dispatch; 1300 return 0;
1301
1302 /*
1303 * If this is an async queue and we have sync IO in flight, let it wait
1304 */
1305 if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
1306 return 0;
1164 1307
1165 max_dispatch = cfqd->cfq_quantum; 1308 max_dispatch = cfqd->cfq_quantum;
1309 if (cfq_class_idle(cfqq))
1310 max_dispatch = 1;
1311
1312 /*
1313 * Does this cfqq already have too much IO in flight?
1314 */
1315 if (cfqq->dispatched >= max_dispatch) {
1316 /*
1317 * idle queue must always only have a single IO in flight
1318 */
1166 if (cfq_class_idle(cfqq)) 1319 if (cfq_class_idle(cfqq))
1167 max_dispatch = 1; 1320 return 0;
1168 1321
1169 if (cfqq->dispatched >= max_dispatch && cfqd->busy_queues > 1) 1322 /*
1170 break; 1323 * We have other queues, don't allow more IO from this one
1324 */
1325 if (cfqd->busy_queues > 1)
1326 return 0;
1171 1327
1172 if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq)) 1328 /*
1173 break; 1329 * we are the only queue, allow up to 4 times of 'quantum'
1330 */
1331 if (cfqq->dispatched >= 4 * max_dispatch)
1332 return 0;
1333 }
1174 1334
1175 cfq_clear_cfqq_must_dispatch(cfqq); 1335 /*
1176 cfq_clear_cfqq_wait_request(cfqq); 1336 * Dispatch a request from this cfqq
1177 del_timer(&cfqd->idle_slice_timer); 1337 */
1338 cfq_dispatch_request(cfqd, cfqq);
1339 cfqq->slice_dispatch++;
1340 cfq_clear_cfqq_must_dispatch(cfqq);
1178 1341
1179 dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); 1342 /*
1343 * expire an async queue immediately if it has used up its slice. idle
1344 * queue always expire after 1 dispatch round.
1345 */
1346 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
1347 cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
1348 cfq_class_idle(cfqq))) {
1349 cfqq->slice_end = jiffies + 1;
1350 cfq_slice_expired(cfqd, 0);
1180 } 1351 }
1181 1352
1182 cfq_log(cfqd, "dispatched=%d", dispatched); 1353 cfq_log(cfqd, "dispatched a request");
1183 return dispatched; 1354 return 1;
1184} 1355}
1185 1356
1186/* 1357/*
@@ -1323,14 +1494,14 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
1323 if (ioc->ioc_data == cic) 1494 if (ioc->ioc_data == cic)
1324 rcu_assign_pointer(ioc->ioc_data, NULL); 1495 rcu_assign_pointer(ioc->ioc_data, NULL);
1325 1496
1326 if (cic->cfqq[ASYNC]) { 1497 if (cic->cfqq[BLK_RW_ASYNC]) {
1327 cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]); 1498 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
1328 cic->cfqq[ASYNC] = NULL; 1499 cic->cfqq[BLK_RW_ASYNC] = NULL;
1329 } 1500 }
1330 1501
1331 if (cic->cfqq[SYNC]) { 1502 if (cic->cfqq[BLK_RW_SYNC]) {
1332 cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]); 1503 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_SYNC]);
1333 cic->cfqq[SYNC] = NULL; 1504 cic->cfqq[BLK_RW_SYNC] = NULL;
1334 } 1505 }
1335} 1506}
1336 1507
@@ -1439,17 +1610,18 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
1439 1610
1440 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 1611 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
1441 1612
1442 cfqq = cic->cfqq[ASYNC]; 1613 cfqq = cic->cfqq[BLK_RW_ASYNC];
1443 if (cfqq) { 1614 if (cfqq) {
1444 struct cfq_queue *new_cfqq; 1615 struct cfq_queue *new_cfqq;
1445 new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc, GFP_ATOMIC); 1616 new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc,
1617 GFP_ATOMIC);
1446 if (new_cfqq) { 1618 if (new_cfqq) {
1447 cic->cfqq[ASYNC] = new_cfqq; 1619 cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
1448 cfq_put_queue(cfqq); 1620 cfq_put_queue(cfqq);
1449 } 1621 }
1450 } 1622 }
1451 1623
1452 cfqq = cic->cfqq[SYNC]; 1624 cfqq = cic->cfqq[BLK_RW_SYNC];
1453 if (cfqq) 1625 if (cfqq)
1454 cfq_mark_cfqq_prio_changed(cfqq); 1626 cfq_mark_cfqq_prio_changed(cfqq);
1455 1627
@@ -1500,13 +1672,13 @@ retry:
1500 } 1672 }
1501 1673
1502 RB_CLEAR_NODE(&cfqq->rb_node); 1674 RB_CLEAR_NODE(&cfqq->rb_node);
1675 RB_CLEAR_NODE(&cfqq->p_node);
1503 INIT_LIST_HEAD(&cfqq->fifo); 1676 INIT_LIST_HEAD(&cfqq->fifo);
1504 1677
1505 atomic_set(&cfqq->ref, 0); 1678 atomic_set(&cfqq->ref, 0);
1506 cfqq->cfqd = cfqd; 1679 cfqq->cfqd = cfqd;
1507 1680
1508 cfq_mark_cfqq_prio_changed(cfqq); 1681 cfq_mark_cfqq_prio_changed(cfqq);
1509 cfq_mark_cfqq_queue_new(cfqq);
1510 1682
1511 cfq_init_prio_data(cfqq, ioc); 1683 cfq_init_prio_data(cfqq, ioc);
1512 1684
@@ -1893,14 +2065,22 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1893 2065
1894 if (cfqq == cfqd->active_queue) { 2066 if (cfqq == cfqd->active_queue) {
1895 /* 2067 /*
1896 * if we are waiting for a request for this queue, let it rip 2068 * Remember that we saw a request from this process, but
1897 * immediately and flag that we must not expire this queue 2069 * don't start queuing just yet. Otherwise we risk seeing lots
1898 * just now 2070 * of tiny requests, because we disrupt the normal plugging
2071 * and merging. If the request is already larger than a single
2072 * page, let it rip immediately. For that case we assume that
2073 * merging is already done. Ditto for a busy system that
2074 * has other work pending, don't risk delaying until the
2075 * idle timer unplug to continue working.
1899 */ 2076 */
1900 if (cfq_cfqq_wait_request(cfqq)) { 2077 if (cfq_cfqq_wait_request(cfqq)) {
2078 if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
2079 cfqd->busy_queues > 1) {
2080 del_timer(&cfqd->idle_slice_timer);
2081 blk_start_queueing(cfqd->queue);
2082 }
1901 cfq_mark_cfqq_must_dispatch(cfqq); 2083 cfq_mark_cfqq_must_dispatch(cfqq);
1902 del_timer(&cfqd->idle_slice_timer);
1903 blk_start_queueing(cfqd->queue);
1904 } 2084 }
1905 } else if (cfq_should_preempt(cfqd, cfqq, rq)) { 2085 } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
1906 /* 2086 /*
@@ -1910,7 +2090,6 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1910 * this new queue is RT and the current one is BE 2090 * this new queue is RT and the current one is BE
1911 */ 2091 */
1912 cfq_preempt_queue(cfqd, cfqq); 2092 cfq_preempt_queue(cfqd, cfqq);
1913 cfq_mark_cfqq_must_dispatch(cfqq);
1914 blk_start_queueing(cfqd->queue); 2093 blk_start_queueing(cfqd->queue);
1915 } 2094 }
1916} 2095}
@@ -1986,16 +2165,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
1986 * or if we want to idle in case it has no pending requests. 2165 * or if we want to idle in case it has no pending requests.
1987 */ 2166 */
1988 if (cfqd->active_queue == cfqq) { 2167 if (cfqd->active_queue == cfqq) {
2168 const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list);
2169
1989 if (cfq_cfqq_slice_new(cfqq)) { 2170 if (cfq_cfqq_slice_new(cfqq)) {
1990 cfq_set_prio_slice(cfqd, cfqq); 2171 cfq_set_prio_slice(cfqd, cfqq);
1991 cfq_clear_cfqq_slice_new(cfqq); 2172 cfq_clear_cfqq_slice_new(cfqq);
1992 } 2173 }
2174 /*
2175 * If there are no requests waiting in this queue, and
2176 * there are other queues ready to issue requests, AND
2177 * those other queues are issuing requests within our
2178 * mean seek distance, give them a chance to run instead
2179 * of idling.
2180 */
1993 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) 2181 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
1994 cfq_slice_expired(cfqd, 1); 2182 cfq_slice_expired(cfqd, 1);
1995 else if (sync && !rq_noidle(rq) && 2183 else if (cfqq_empty && !cfq_close_cooperator(cfqd, cfqq, 1) &&
1996 RB_EMPTY_ROOT(&cfqq->sort_list)) { 2184 sync && !rq_noidle(rq))
1997 cfq_arm_slice_timer(cfqd); 2185 cfq_arm_slice_timer(cfqd);
1998 }
1999 } 2186 }
2000 2187
2001 if (!cfqd->rq_in_driver) 2188 if (!cfqd->rq_in_driver)
@@ -2056,7 +2243,7 @@ static int cfq_may_queue(struct request_queue *q, int rw)
2056 if (!cic) 2243 if (!cic)
2057 return ELV_MQUEUE_MAY; 2244 return ELV_MQUEUE_MAY;
2058 2245
2059 cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC); 2246 cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
2060 if (cfqq) { 2247 if (cfqq) {
2061 cfq_init_prio_data(cfqq, cic->ioc); 2248 cfq_init_prio_data(cfqq, cic->ioc);
2062 cfq_prio_boost(cfqq); 2249 cfq_prio_boost(cfqq);
@@ -2146,11 +2333,10 @@ static void cfq_kick_queue(struct work_struct *work)
2146 struct cfq_data *cfqd = 2333 struct cfq_data *cfqd =
2147 container_of(work, struct cfq_data, unplug_work); 2334 container_of(work, struct cfq_data, unplug_work);
2148 struct request_queue *q = cfqd->queue; 2335 struct request_queue *q = cfqd->queue;
2149 unsigned long flags;
2150 2336
2151 spin_lock_irqsave(q->queue_lock, flags); 2337 spin_lock_irq(q->queue_lock);
2152 blk_start_queueing(q); 2338 blk_start_queueing(q);
2153 spin_unlock_irqrestore(q->queue_lock, flags); 2339 spin_unlock_irq(q->queue_lock);
2154} 2340}
2155 2341
2156/* 2342/*
@@ -2172,6 +2358,12 @@ static void cfq_idle_slice_timer(unsigned long data)
2172 timed_out = 0; 2358 timed_out = 0;
2173 2359
2174 /* 2360 /*
2361 * We saw a request before the queue expired, let it through
2362 */
2363 if (cfq_cfqq_must_dispatch(cfqq))
2364 goto out_kick;
2365
2366 /*
2175 * expired 2367 * expired
2176 */ 2368 */
2177 if (cfq_slice_used(cfqq)) 2369 if (cfq_slice_used(cfqq))
@@ -2187,10 +2379,8 @@ static void cfq_idle_slice_timer(unsigned long data)
2187 /* 2379 /*
2188 * not expired and it has a request pending, let it dispatch 2380 * not expired and it has a request pending, let it dispatch
2189 */ 2381 */
2190 if (!RB_EMPTY_ROOT(&cfqq->sort_list)) { 2382 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
2191 cfq_mark_cfqq_must_dispatch(cfqq);
2192 goto out_kick; 2383 goto out_kick;
2193 }
2194 } 2384 }
2195expire: 2385expire:
2196 cfq_slice_expired(cfqd, timed_out); 2386 cfq_slice_expired(cfqd, timed_out);
diff --git a/block/elevator.c b/block/elevator.c
index ca6788a0195a..7073a9072577 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -573,7 +573,7 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
573 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); 573 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
574} 574}
575 575
576static void elv_drain_elevator(struct request_queue *q) 576void elv_drain_elevator(struct request_queue *q)
577{ 577{
578 static int printed; 578 static int printed;
579 while (q->elevator->ops->elevator_dispatch_fn(q, 1)) 579 while (q->elevator->ops->elevator_dispatch_fn(q, 1))
@@ -587,6 +587,31 @@ static void elv_drain_elevator(struct request_queue *q)
587 } 587 }
588} 588}
589 589
590/*
591 * Call with queue lock held, interrupts disabled
592 */
593void elv_quiesce_start(struct request_queue *q)
594{
595 queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
596
597 /*
598 * make sure we don't have any requests in flight
599 */
600 elv_drain_elevator(q);
601 while (q->rq.elvpriv) {
602 blk_start_queueing(q);
603 spin_unlock_irq(q->queue_lock);
604 msleep(10);
605 spin_lock_irq(q->queue_lock);
606 elv_drain_elevator(q);
607 }
608}
609
610void elv_quiesce_end(struct request_queue *q)
611{
612 queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
613}
614
590void elv_insert(struct request_queue *q, struct request *rq, int where) 615void elv_insert(struct request_queue *q, struct request *rq, int where)
591{ 616{
592 struct list_head *pos; 617 struct list_head *pos;
@@ -1101,18 +1126,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
1101 * Turn on BYPASS and drain all requests w/ elevator private data 1126 * Turn on BYPASS and drain all requests w/ elevator private data
1102 */ 1127 */
1103 spin_lock_irq(q->queue_lock); 1128 spin_lock_irq(q->queue_lock);
1104 1129 elv_quiesce_start(q);
1105 queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
1106
1107 elv_drain_elevator(q);
1108
1109 while (q->rq.elvpriv) {
1110 blk_start_queueing(q);
1111 spin_unlock_irq(q->queue_lock);
1112 msleep(10);
1113 spin_lock_irq(q->queue_lock);
1114 elv_drain_elevator(q);
1115 }
1116 1130
1117 /* 1131 /*
1118 * Remember old elevator. 1132 * Remember old elevator.
@@ -1136,7 +1150,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
1136 */ 1150 */
1137 elevator_exit(old_elevator); 1151 elevator_exit(old_elevator);
1138 spin_lock_irq(q->queue_lock); 1152 spin_lock_irq(q->queue_lock);
1139 queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); 1153 elv_quiesce_end(q);
1140 spin_unlock_irq(q->queue_lock); 1154 spin_unlock_irq(q->queue_lock);
1141 1155
1142 blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); 1156 blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
diff --git a/block/ioctl.c b/block/ioctl.c
index 0f22e629b13c..ad474d4bbcce 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -146,8 +146,6 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
146 struct bio *bio; 146 struct bio *bio;
147 147
148 bio = bio_alloc(GFP_KERNEL, 0); 148 bio = bio_alloc(GFP_KERNEL, 0);
149 if (!bio)
150 return -ENOMEM;
151 149
152 bio->bi_end_io = blk_ioc_discard_endio; 150 bio->bi_end_io = blk_ioc_discard_endio;
153 bio->bi_bdev = bdev; 151 bio->bi_bdev = bdev;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 626ee274c5c4..84b7f8709f41 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -217,7 +217,7 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
217static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, 217static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
218 struct bio *bio) 218 struct bio *bio)
219{ 219{
220 int ret = 0; 220 int r, ret = 0;
221 221
222 /* 222 /*
223 * fill in all the output members 223 * fill in all the output members
@@ -242,7 +242,9 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
242 ret = -EFAULT; 242 ret = -EFAULT;
243 } 243 }
244 244
245 blk_rq_unmap_user(bio); 245 r = blk_rq_unmap_user(bio);
246 if (!ret)
247 ret = r;
246 blk_put_request(rq); 248 blk_put_request(rq);
247 249
248 return ret; 250 return ret;