diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-28 12:25:44 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-28 12:25:44 -0500 |
commit | 7baf398f12585ae77748716fa77113c1f1831153 (patch) | |
tree | 492361d848d3dfc33563a1bdf0d0f61b454aac82 | |
parent | 78cd9e04e0acea4f622e84ca0c760c7eae0c6854 (diff) | |
parent | 206dc69b31ca05baac68c75b8ed2ba7dd857d273 (diff) |
Merge branch 'cfq-merge' of git://brick.kernel.dk/data/git/linux-2.6-block
* 'cfq-merge' of git://brick.kernel.dk/data/git/linux-2.6-block:
[BLOCK] cfq-iosched: seek and async performance fixes
[PATCH] ll_rw_blk: fix 80-col offender in put_io_context()
[PATCH] cfq-iosched: small cfq_choose_req() optimization
[PATCH] [BLOCK] cfq-iosched: change cfq io context linking from list to tree
-rw-r--r-- | block/cfq-iosched.c | 357 | ||||
-rw-r--r-- | block/ll_rw_blk.c | 21 | ||||
-rw-r--r-- | include/linux/blkdev.h | 22 |
3 files changed, 220 insertions, 180 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index bde40a6ae665..67d446de0227 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -26,18 +26,12 @@ static const int cfq_back_penalty = 2; /* penalty of a backwards seek */ | |||
26 | static const int cfq_slice_sync = HZ / 10; | 26 | static const int cfq_slice_sync = HZ / 10; |
27 | static int cfq_slice_async = HZ / 25; | 27 | static int cfq_slice_async = HZ / 25; |
28 | static const int cfq_slice_async_rq = 2; | 28 | static const int cfq_slice_async_rq = 2; |
29 | static int cfq_slice_idle = HZ / 100; | 29 | static int cfq_slice_idle = HZ / 70; |
30 | 30 | ||
31 | #define CFQ_IDLE_GRACE (HZ / 10) | 31 | #define CFQ_IDLE_GRACE (HZ / 10) |
32 | #define CFQ_SLICE_SCALE (5) | 32 | #define CFQ_SLICE_SCALE (5) |
33 | 33 | ||
34 | #define CFQ_KEY_ASYNC (0) | 34 | #define CFQ_KEY_ASYNC (0) |
35 | #define CFQ_KEY_ANY (0xffff) | ||
36 | |||
37 | /* | ||
38 | * disable queueing at the driver/hardware level | ||
39 | */ | ||
40 | static const int cfq_max_depth = 2; | ||
41 | 35 | ||
42 | static DEFINE_RWLOCK(cfq_exit_lock); | 36 | static DEFINE_RWLOCK(cfq_exit_lock); |
43 | 37 | ||
@@ -102,6 +96,8 @@ static struct completion *ioc_gone; | |||
102 | #define cfq_cfqq_sync(cfqq) \ | 96 | #define cfq_cfqq_sync(cfqq) \ |
103 | (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC]) | 97 | (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC]) |
104 | 98 | ||
99 | #define sample_valid(samples) ((samples) > 80) | ||
100 | |||
105 | /* | 101 | /* |
106 | * Per block device queue structure | 102 | * Per block device queue structure |
107 | */ | 103 | */ |
@@ -170,7 +166,6 @@ struct cfq_data { | |||
170 | unsigned int cfq_slice[2]; | 166 | unsigned int cfq_slice[2]; |
171 | unsigned int cfq_slice_async_rq; | 167 | unsigned int cfq_slice_async_rq; |
172 | unsigned int cfq_slice_idle; | 168 | unsigned int cfq_slice_idle; |
173 | unsigned int cfq_max_depth; | ||
174 | 169 | ||
175 | struct list_head cic_list; | 170 | struct list_head cic_list; |
176 | }; | 171 | }; |
@@ -343,17 +338,27 @@ static int cfq_queue_empty(request_queue_t *q) | |||
343 | return !cfqd->busy_queues; | 338 | return !cfqd->busy_queues; |
344 | } | 339 | } |
345 | 340 | ||
341 | static inline pid_t cfq_queue_pid(struct task_struct *task, int rw) | ||
342 | { | ||
343 | if (rw == READ || process_sync(task)) | ||
344 | return task->pid; | ||
345 | |||
346 | return CFQ_KEY_ASYNC; | ||
347 | } | ||
348 | |||
346 | /* | 349 | /* |
347 | * Lifted from AS - choose which of crq1 and crq2 that is best served now. | 350 | * Lifted from AS - choose which of crq1 and crq2 that is best served now. |
348 | * We choose the request that is closest to the head right now. Distance | 351 | * We choose the request that is closest to the head right now. Distance |
349 | * behind the head are penalized and only allowed to a certain extent. | 352 | * behind the head is penalized and only allowed to a certain extent. |
350 | */ | 353 | */ |
351 | static struct cfq_rq * | 354 | static struct cfq_rq * |
352 | cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) | 355 | cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) |
353 | { | 356 | { |
354 | sector_t last, s1, s2, d1 = 0, d2 = 0; | 357 | sector_t last, s1, s2, d1 = 0, d2 = 0; |
355 | int r1_wrap = 0, r2_wrap = 0; /* requests are behind the disk head */ | ||
356 | unsigned long back_max; | 358 | unsigned long back_max; |
359 | #define CFQ_RQ1_WRAP 0x01 /* request 1 wraps */ | ||
360 | #define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */ | ||
361 | unsigned wrap = 0; /* bit mask: requests behind the disk head? */ | ||
357 | 362 | ||
358 | if (crq1 == NULL || crq1 == crq2) | 363 | if (crq1 == NULL || crq1 == crq2) |
359 | return crq2; | 364 | return crq2; |
@@ -385,35 +390,47 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) | |||
385 | else if (s1 + back_max >= last) | 390 | else if (s1 + back_max >= last) |
386 | d1 = (last - s1) * cfqd->cfq_back_penalty; | 391 | d1 = (last - s1) * cfqd->cfq_back_penalty; |
387 | else | 392 | else |
388 | r1_wrap = 1; | 393 | wrap |= CFQ_RQ1_WRAP; |
389 | 394 | ||
390 | if (s2 >= last) | 395 | if (s2 >= last) |
391 | d2 = s2 - last; | 396 | d2 = s2 - last; |
392 | else if (s2 + back_max >= last) | 397 | else if (s2 + back_max >= last) |
393 | d2 = (last - s2) * cfqd->cfq_back_penalty; | 398 | d2 = (last - s2) * cfqd->cfq_back_penalty; |
394 | else | 399 | else |
395 | r2_wrap = 1; | 400 | wrap |= CFQ_RQ2_WRAP; |
396 | 401 | ||
397 | /* Found required data */ | 402 | /* Found required data */ |
398 | if (!r1_wrap && r2_wrap) | 403 | |
399 | return crq1; | 404 | /* |
400 | else if (!r2_wrap && r1_wrap) | 405 | * By doing switch() on the bit mask "wrap" we avoid having to |
401 | return crq2; | 406 | * check two variables for all permutations: --> faster! |
402 | else if (r1_wrap && r2_wrap) { | 407 | */ |
403 | /* both behind the head */ | 408 | switch (wrap) { |
404 | if (s1 <= s2) | 409 | case 0: /* common case for CFQ: crq1 and crq2 not wrapped */ |
410 | if (d1 < d2) | ||
405 | return crq1; | 411 | return crq1; |
406 | else | 412 | else if (d2 < d1) |
407 | return crq2; | 413 | return crq2; |
408 | } | 414 | else { |
415 | if (s1 >= s2) | ||
416 | return crq1; | ||
417 | else | ||
418 | return crq2; | ||
419 | } | ||
409 | 420 | ||
410 | /* Both requests in front of the head */ | 421 | case CFQ_RQ2_WRAP: |
411 | if (d1 < d2) | ||
412 | return crq1; | 422 | return crq1; |
413 | else if (d2 < d1) | 423 | case CFQ_RQ1_WRAP: |
414 | return crq2; | 424 | return crq2; |
415 | else { | 425 | case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both crqs wrapped */ |
416 | if (s1 >= s2) | 426 | default: |
427 | /* | ||
428 | * Since both rqs are wrapped, | ||
429 | * start with the one that's further behind head | ||
430 | * (--> only *one* back seek required), | ||
431 | * since back seek takes more time than forward. | ||
432 | */ | ||
433 | if (s1 <= s2) | ||
417 | return crq1; | 434 | return crq1; |
418 | else | 435 | else |
419 | return crq2; | 436 | return crq2; |
@@ -612,15 +629,20 @@ cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) | |||
612 | cfq_add_crq_rb(crq); | 629 | cfq_add_crq_rb(crq); |
613 | } | 630 | } |
614 | 631 | ||
615 | static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector) | 632 | static struct request * |
616 | 633 | cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) | |
617 | { | 634 | { |
618 | struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid, CFQ_KEY_ANY); | 635 | struct task_struct *tsk = current; |
636 | pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio)); | ||
637 | struct cfq_queue *cfqq; | ||
619 | struct rb_node *n; | 638 | struct rb_node *n; |
639 | sector_t sector; | ||
620 | 640 | ||
641 | cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio); | ||
621 | if (!cfqq) | 642 | if (!cfqq) |
622 | goto out; | 643 | goto out; |
623 | 644 | ||
645 | sector = bio->bi_sector + bio_sectors(bio); | ||
624 | n = cfqq->sort_list.rb_node; | 646 | n = cfqq->sort_list.rb_node; |
625 | while (n) { | 647 | while (n) { |
626 | struct cfq_rq *crq = rb_entry_crq(n); | 648 | struct cfq_rq *crq = rb_entry_crq(n); |
@@ -674,7 +696,7 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) | |||
674 | goto out; | 696 | goto out; |
675 | } | 697 | } |
676 | 698 | ||
677 | __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio)); | 699 | __rq = cfq_find_rq_fmerge(cfqd, bio); |
678 | if (__rq && elv_rq_merge_ok(__rq, bio)) { | 700 | if (__rq && elv_rq_merge_ok(__rq, bio)) { |
679 | ret = ELEVATOR_FRONT_MERGE; | 701 | ret = ELEVATOR_FRONT_MERGE; |
680 | goto out; | 702 | goto out; |
@@ -877,6 +899,7 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) | |||
877 | static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) | 899 | static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) |
878 | 900 | ||
879 | { | 901 | { |
902 | struct cfq_io_context *cic; | ||
880 | unsigned long sl; | 903 | unsigned long sl; |
881 | 904 | ||
882 | WARN_ON(!RB_EMPTY(&cfqq->sort_list)); | 905 | WARN_ON(!RB_EMPTY(&cfqq->sort_list)); |
@@ -892,13 +915,23 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
892 | /* | 915 | /* |
893 | * task has exited, don't wait | 916 | * task has exited, don't wait |
894 | */ | 917 | */ |
895 | if (cfqd->active_cic && !cfqd->active_cic->ioc->task) | 918 | cic = cfqd->active_cic; |
919 | if (!cic || !cic->ioc->task) | ||
896 | return 0; | 920 | return 0; |
897 | 921 | ||
898 | cfq_mark_cfqq_must_dispatch(cfqq); | 922 | cfq_mark_cfqq_must_dispatch(cfqq); |
899 | cfq_mark_cfqq_wait_request(cfqq); | 923 | cfq_mark_cfqq_wait_request(cfqq); |
900 | 924 | ||
901 | sl = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle); | 925 | sl = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle); |
926 | |||
927 | /* | ||
928 | * we don't want to idle for seeks, but we do want to allow | ||
929 | * fair distribution of slice time for a process doing back-to-back | ||
930 | * seeks. so allow a little bit of time for him to submit a new rq | ||
931 | */ | ||
932 | if (sample_valid(cic->seek_samples) && cic->seek_mean > 131072) | ||
933 | sl = 2; | ||
934 | |||
902 | mod_timer(&cfqd->idle_slice_timer, jiffies + sl); | 935 | mod_timer(&cfqd->idle_slice_timer, jiffies + sl); |
903 | return 1; | 936 | return 1; |
904 | } | 937 | } |
@@ -1115,13 +1148,6 @@ cfq_dispatch_requests(request_queue_t *q, int force) | |||
1115 | if (cfqq) { | 1148 | if (cfqq) { |
1116 | int max_dispatch; | 1149 | int max_dispatch; |
1117 | 1150 | ||
1118 | /* | ||
1119 | * if idle window is disabled, allow queue buildup | ||
1120 | */ | ||
1121 | if (!cfq_cfqq_idle_window(cfqq) && | ||
1122 | cfqd->rq_in_driver >= cfqd->cfq_max_depth) | ||
1123 | return 0; | ||
1124 | |||
1125 | cfq_clear_cfqq_must_dispatch(cfqq); | 1151 | cfq_clear_cfqq_must_dispatch(cfqq); |
1126 | cfq_clear_cfqq_wait_request(cfqq); | 1152 | cfq_clear_cfqq_wait_request(cfqq); |
1127 | del_timer(&cfqd->idle_slice_timer); | 1153 | del_timer(&cfqd->idle_slice_timer); |
@@ -1171,13 +1197,13 @@ __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio, | |||
1171 | const int hashval) | 1197 | const int hashval) |
1172 | { | 1198 | { |
1173 | struct hlist_head *hash_list = &cfqd->cfq_hash[hashval]; | 1199 | struct hlist_head *hash_list = &cfqd->cfq_hash[hashval]; |
1174 | struct hlist_node *entry, *next; | 1200 | struct hlist_node *entry; |
1201 | struct cfq_queue *__cfqq; | ||
1175 | 1202 | ||
1176 | hlist_for_each_safe(entry, next, hash_list) { | 1203 | hlist_for_each_entry(__cfqq, entry, hash_list, cfq_hash) { |
1177 | struct cfq_queue *__cfqq = list_entry_qhash(entry); | ||
1178 | const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->org_ioprio_class, __cfqq->org_ioprio); | 1204 | const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->org_ioprio_class, __cfqq->org_ioprio); |
1179 | 1205 | ||
1180 | if (__cfqq->key == key && (__p == prio || prio == CFQ_KEY_ANY)) | 1206 | if (__cfqq->key == key && (__p == prio || !prio)) |
1181 | return __cfqq; | 1207 | return __cfqq; |
1182 | } | 1208 | } |
1183 | 1209 | ||
@@ -1190,19 +1216,19 @@ cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned short prio) | |||
1190 | return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT)); | 1216 | return __cfq_find_cfq_hash(cfqd, key, prio, hash_long(key, CFQ_QHASH_SHIFT)); |
1191 | } | 1217 | } |
1192 | 1218 | ||
1193 | static void cfq_free_io_context(struct cfq_io_context *cic) | 1219 | static void cfq_free_io_context(struct io_context *ioc) |
1194 | { | 1220 | { |
1195 | struct cfq_io_context *__cic; | 1221 | struct cfq_io_context *__cic; |
1196 | struct list_head *entry, *next; | 1222 | struct rb_node *n; |
1197 | int freed = 1; | 1223 | int freed = 0; |
1198 | 1224 | ||
1199 | list_for_each_safe(entry, next, &cic->list) { | 1225 | while ((n = rb_first(&ioc->cic_root)) != NULL) { |
1200 | __cic = list_entry(entry, struct cfq_io_context, list); | 1226 | __cic = rb_entry(n, struct cfq_io_context, rb_node); |
1227 | rb_erase(&__cic->rb_node, &ioc->cic_root); | ||
1201 | kmem_cache_free(cfq_ioc_pool, __cic); | 1228 | kmem_cache_free(cfq_ioc_pool, __cic); |
1202 | freed++; | 1229 | freed++; |
1203 | } | 1230 | } |
1204 | 1231 | ||
1205 | kmem_cache_free(cfq_ioc_pool, cic); | ||
1206 | if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone) | 1232 | if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone) |
1207 | complete(ioc_gone); | 1233 | complete(ioc_gone); |
1208 | } | 1234 | } |
@@ -1210,8 +1236,7 @@ static void cfq_free_io_context(struct cfq_io_context *cic) | |||
1210 | static void cfq_trim(struct io_context *ioc) | 1236 | static void cfq_trim(struct io_context *ioc) |
1211 | { | 1237 | { |
1212 | ioc->set_ioprio = NULL; | 1238 | ioc->set_ioprio = NULL; |
1213 | if (ioc->cic) | 1239 | cfq_free_io_context(ioc); |
1214 | cfq_free_io_context(ioc->cic); | ||
1215 | } | 1240 | } |
1216 | 1241 | ||
1217 | /* | 1242 | /* |
@@ -1250,26 +1275,26 @@ static void cfq_exit_single_io_context(struct cfq_io_context *cic) | |||
1250 | spin_unlock(q->queue_lock); | 1275 | spin_unlock(q->queue_lock); |
1251 | } | 1276 | } |
1252 | 1277 | ||
1253 | static void cfq_exit_io_context(struct cfq_io_context *cic) | 1278 | static void cfq_exit_io_context(struct io_context *ioc) |
1254 | { | 1279 | { |
1255 | struct cfq_io_context *__cic; | 1280 | struct cfq_io_context *__cic; |
1256 | struct list_head *entry; | ||
1257 | unsigned long flags; | 1281 | unsigned long flags; |
1258 | 1282 | struct rb_node *n; | |
1259 | local_irq_save(flags); | ||
1260 | 1283 | ||
1261 | /* | 1284 | /* |
1262 | * put the reference this task is holding to the various queues | 1285 | * put the reference this task is holding to the various queues |
1263 | */ | 1286 | */ |
1264 | read_lock(&cfq_exit_lock); | 1287 | read_lock_irqsave(&cfq_exit_lock, flags); |
1265 | list_for_each(entry, &cic->list) { | 1288 | |
1266 | __cic = list_entry(entry, struct cfq_io_context, list); | 1289 | n = rb_first(&ioc->cic_root); |
1290 | while (n != NULL) { | ||
1291 | __cic = rb_entry(n, struct cfq_io_context, rb_node); | ||
1292 | |||
1267 | cfq_exit_single_io_context(__cic); | 1293 | cfq_exit_single_io_context(__cic); |
1294 | n = rb_next(n); | ||
1268 | } | 1295 | } |
1269 | 1296 | ||
1270 | cfq_exit_single_io_context(cic); | 1297 | read_unlock_irqrestore(&cfq_exit_lock, flags); |
1271 | read_unlock(&cfq_exit_lock); | ||
1272 | local_irq_restore(flags); | ||
1273 | } | 1298 | } |
1274 | 1299 | ||
1275 | static struct cfq_io_context * | 1300 | static struct cfq_io_context * |
@@ -1278,10 +1303,10 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | |||
1278 | struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); | 1303 | struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); |
1279 | 1304 | ||
1280 | if (cic) { | 1305 | if (cic) { |
1281 | INIT_LIST_HEAD(&cic->list); | 1306 | RB_CLEAR(&cic->rb_node); |
1307 | cic->key = NULL; | ||
1282 | cic->cfqq[ASYNC] = NULL; | 1308 | cic->cfqq[ASYNC] = NULL; |
1283 | cic->cfqq[SYNC] = NULL; | 1309 | cic->cfqq[SYNC] = NULL; |
1284 | cic->key = NULL; | ||
1285 | cic->last_end_request = jiffies; | 1310 | cic->last_end_request = jiffies; |
1286 | cic->ttime_total = 0; | 1311 | cic->ttime_total = 0; |
1287 | cic->ttime_samples = 0; | 1312 | cic->ttime_samples = 0; |
@@ -1373,15 +1398,17 @@ static inline void changed_ioprio(struct cfq_io_context *cic) | |||
1373 | static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) | 1398 | static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) |
1374 | { | 1399 | { |
1375 | struct cfq_io_context *cic; | 1400 | struct cfq_io_context *cic; |
1401 | struct rb_node *n; | ||
1376 | 1402 | ||
1377 | write_lock(&cfq_exit_lock); | 1403 | write_lock(&cfq_exit_lock); |
1378 | 1404 | ||
1379 | cic = ioc->cic; | 1405 | n = rb_first(&ioc->cic_root); |
1380 | 1406 | while (n != NULL) { | |
1381 | changed_ioprio(cic); | 1407 | cic = rb_entry(n, struct cfq_io_context, rb_node); |
1382 | 1408 | ||
1383 | list_for_each_entry(cic, &cic->list, list) | ||
1384 | changed_ioprio(cic); | 1409 | changed_ioprio(cic); |
1410 | n = rb_next(n); | ||
1411 | } | ||
1385 | 1412 | ||
1386 | write_unlock(&cfq_exit_lock); | 1413 | write_unlock(&cfq_exit_lock); |
1387 | 1414 | ||
@@ -1445,14 +1472,67 @@ out: | |||
1445 | return cfqq; | 1472 | return cfqq; |
1446 | } | 1473 | } |
1447 | 1474 | ||
1475 | static struct cfq_io_context * | ||
1476 | cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) | ||
1477 | { | ||
1478 | struct rb_node *n = ioc->cic_root.rb_node; | ||
1479 | struct cfq_io_context *cic; | ||
1480 | void *key = cfqd; | ||
1481 | |||
1482 | while (n) { | ||
1483 | cic = rb_entry(n, struct cfq_io_context, rb_node); | ||
1484 | |||
1485 | if (key < cic->key) | ||
1486 | n = n->rb_left; | ||
1487 | else if (key > cic->key) | ||
1488 | n = n->rb_right; | ||
1489 | else | ||
1490 | return cic; | ||
1491 | } | ||
1492 | |||
1493 | return NULL; | ||
1494 | } | ||
1495 | |||
1496 | static inline void | ||
1497 | cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, | ||
1498 | struct cfq_io_context *cic) | ||
1499 | { | ||
1500 | struct rb_node **p = &ioc->cic_root.rb_node; | ||
1501 | struct rb_node *parent = NULL; | ||
1502 | struct cfq_io_context *__cic; | ||
1503 | |||
1504 | read_lock(&cfq_exit_lock); | ||
1505 | |||
1506 | cic->ioc = ioc; | ||
1507 | cic->key = cfqd; | ||
1508 | |||
1509 | ioc->set_ioprio = cfq_ioc_set_ioprio; | ||
1510 | |||
1511 | while (*p) { | ||
1512 | parent = *p; | ||
1513 | __cic = rb_entry(parent, struct cfq_io_context, rb_node); | ||
1514 | |||
1515 | if (cic->key < __cic->key) | ||
1516 | p = &(*p)->rb_left; | ||
1517 | else if (cic->key > __cic->key) | ||
1518 | p = &(*p)->rb_right; | ||
1519 | else | ||
1520 | BUG(); | ||
1521 | } | ||
1522 | |||
1523 | rb_link_node(&cic->rb_node, parent, p); | ||
1524 | rb_insert_color(&cic->rb_node, &ioc->cic_root); | ||
1525 | list_add(&cic->queue_list, &cfqd->cic_list); | ||
1526 | read_unlock(&cfq_exit_lock); | ||
1527 | } | ||
1528 | |||
1448 | /* | 1529 | /* |
1449 | * Setup general io context and cfq io context. There can be several cfq | 1530 | * Setup general io context and cfq io context. There can be several cfq |
1450 | * io contexts per general io context, if this process is doing io to more | 1531 | * io contexts per general io context, if this process is doing io to more |
1451 | * than one device managed by cfq. Note that caller is holding a reference to | 1532 | * than one device managed by cfq. |
1452 | * cfqq, so we don't need to worry about it disappearing | ||
1453 | */ | 1533 | */ |
1454 | static struct cfq_io_context * | 1534 | static struct cfq_io_context * |
1455 | cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask) | 1535 | cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) |
1456 | { | 1536 | { |
1457 | struct io_context *ioc = NULL; | 1537 | struct io_context *ioc = NULL; |
1458 | struct cfq_io_context *cic; | 1538 | struct cfq_io_context *cic; |
@@ -1463,88 +1543,15 @@ cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask) | |||
1463 | if (!ioc) | 1543 | if (!ioc) |
1464 | return NULL; | 1544 | return NULL; |
1465 | 1545 | ||
1466 | restart: | 1546 | cic = cfq_cic_rb_lookup(cfqd, ioc); |
1467 | if ((cic = ioc->cic) == NULL) { | 1547 | if (cic) |
1468 | cic = cfq_alloc_io_context(cfqd, gfp_mask); | 1548 | goto out; |
1469 | |||
1470 | if (cic == NULL) | ||
1471 | goto err; | ||
1472 | |||
1473 | /* | ||
1474 | * manually increment generic io_context usage count, it | ||
1475 | * cannot go away since we are already holding one ref to it | ||
1476 | */ | ||
1477 | cic->ioc = ioc; | ||
1478 | cic->key = cfqd; | ||
1479 | read_lock(&cfq_exit_lock); | ||
1480 | ioc->set_ioprio = cfq_ioc_set_ioprio; | ||
1481 | ioc->cic = cic; | ||
1482 | list_add(&cic->queue_list, &cfqd->cic_list); | ||
1483 | read_unlock(&cfq_exit_lock); | ||
1484 | } else { | ||
1485 | struct cfq_io_context *__cic; | ||
1486 | |||
1487 | /* | ||
1488 | * the first cic on the list is actually the head itself | ||
1489 | */ | ||
1490 | if (cic->key == cfqd) | ||
1491 | goto out; | ||
1492 | |||
1493 | if (unlikely(!cic->key)) { | ||
1494 | read_lock(&cfq_exit_lock); | ||
1495 | if (list_empty(&cic->list)) | ||
1496 | ioc->cic = NULL; | ||
1497 | else | ||
1498 | ioc->cic = list_entry(cic->list.next, | ||
1499 | struct cfq_io_context, | ||
1500 | list); | ||
1501 | read_unlock(&cfq_exit_lock); | ||
1502 | kmem_cache_free(cfq_ioc_pool, cic); | ||
1503 | atomic_dec(&ioc_count); | ||
1504 | goto restart; | ||
1505 | } | ||
1506 | |||
1507 | /* | ||
1508 | * cic exists, check if we already are there. linear search | ||
1509 | * should be ok here, the list will usually not be more than | ||
1510 | * 1 or a few entries long | ||
1511 | */ | ||
1512 | list_for_each_entry(__cic, &cic->list, list) { | ||
1513 | /* | ||
1514 | * this process is already holding a reference to | ||
1515 | * this queue, so no need to get one more | ||
1516 | */ | ||
1517 | if (__cic->key == cfqd) { | ||
1518 | cic = __cic; | ||
1519 | goto out; | ||
1520 | } | ||
1521 | if (unlikely(!__cic->key)) { | ||
1522 | read_lock(&cfq_exit_lock); | ||
1523 | list_del(&__cic->list); | ||
1524 | read_unlock(&cfq_exit_lock); | ||
1525 | kmem_cache_free(cfq_ioc_pool, __cic); | ||
1526 | atomic_dec(&ioc_count); | ||
1527 | goto restart; | ||
1528 | } | ||
1529 | } | ||
1530 | 1549 | ||
1531 | /* | 1550 | cic = cfq_alloc_io_context(cfqd, gfp_mask); |
1532 | * nope, process doesn't have a cic assoicated with this | 1551 | if (cic == NULL) |
1533 | * cfqq yet. get a new one and add to list | 1552 | goto err; |
1534 | */ | ||
1535 | __cic = cfq_alloc_io_context(cfqd, gfp_mask); | ||
1536 | if (__cic == NULL) | ||
1537 | goto err; | ||
1538 | |||
1539 | __cic->ioc = ioc; | ||
1540 | __cic->key = cfqd; | ||
1541 | read_lock(&cfq_exit_lock); | ||
1542 | list_add(&__cic->list, &cic->list); | ||
1543 | list_add(&__cic->queue_list, &cfqd->cic_list); | ||
1544 | read_unlock(&cfq_exit_lock); | ||
1545 | cic = __cic; | ||
1546 | } | ||
1547 | 1553 | ||
1554 | cfq_cic_link(cfqd, ioc, cic); | ||
1548 | out: | 1555 | out: |
1549 | return cic; | 1556 | return cic; |
1550 | err: | 1557 | err: |
@@ -1577,7 +1584,33 @@ cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) | |||
1577 | cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; | 1584 | cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; |
1578 | } | 1585 | } |
1579 | 1586 | ||
1580 | #define sample_valid(samples) ((samples) > 80) | 1587 | static void |
1588 | cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic, | ||
1589 | struct cfq_rq *crq) | ||
1590 | { | ||
1591 | sector_t sdist; | ||
1592 | u64 total; | ||
1593 | |||
1594 | if (cic->last_request_pos < crq->request->sector) | ||
1595 | sdist = crq->request->sector - cic->last_request_pos; | ||
1596 | else | ||
1597 | sdist = cic->last_request_pos - crq->request->sector; | ||
1598 | |||
1599 | /* | ||
1600 | * Don't allow the seek distance to get too large from the | ||
1601 | * odd fragment, pagein, etc | ||
1602 | */ | ||
1603 | if (cic->seek_samples <= 60) /* second&third seek */ | ||
1604 | sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*1024); | ||
1605 | else | ||
1606 | sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*64); | ||
1607 | |||
1608 | cic->seek_samples = (7*cic->seek_samples + 256) / 8; | ||
1609 | cic->seek_total = (7*cic->seek_total + (u64)256*sdist) / 8; | ||
1610 | total = cic->seek_total + (cic->seek_samples/2); | ||
1611 | do_div(total, cic->seek_samples); | ||
1612 | cic->seek_mean = (sector_t)total; | ||
1613 | } | ||
1581 | 1614 | ||
1582 | /* | 1615 | /* |
1583 | * Disable idle window if the process thinks too long or seeks so much that | 1616 | * Disable idle window if the process thinks too long or seeks so much that |
@@ -1690,9 +1723,11 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1690 | cic = crq->io_context; | 1723 | cic = crq->io_context; |
1691 | 1724 | ||
1692 | cfq_update_io_thinktime(cfqd, cic); | 1725 | cfq_update_io_thinktime(cfqd, cic); |
1726 | cfq_update_io_seektime(cfqd, cic, crq); | ||
1693 | cfq_update_idle_window(cfqd, cfqq, cic); | 1727 | cfq_update_idle_window(cfqd, cfqq, cic); |
1694 | 1728 | ||
1695 | cic->last_queue = jiffies; | 1729 | cic->last_queue = jiffies; |
1730 | cic->last_request_pos = crq->request->sector + crq->request->nr_sectors; | ||
1696 | 1731 | ||
1697 | if (cfqq == cfqd->active_queue) { | 1732 | if (cfqq == cfqd->active_queue) { |
1698 | /* | 1733 | /* |
@@ -1825,14 +1860,6 @@ static void cfq_prio_boost(struct cfq_queue *cfqq) | |||
1825 | cfq_resort_rr_list(cfqq, 0); | 1860 | cfq_resort_rr_list(cfqq, 0); |
1826 | } | 1861 | } |
1827 | 1862 | ||
1828 | static inline pid_t cfq_queue_pid(struct task_struct *task, int rw) | ||
1829 | { | ||
1830 | if (rw == READ || process_sync(task)) | ||
1831 | return task->pid; | ||
1832 | |||
1833 | return CFQ_KEY_ASYNC; | ||
1834 | } | ||
1835 | |||
1836 | static inline int | 1863 | static inline int |
1837 | __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, | 1864 | __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
1838 | struct task_struct *task, int rw) | 1865 | struct task_struct *task, int rw) |
@@ -1965,7 +1992,7 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, | |||
1965 | 1992 | ||
1966 | might_sleep_if(gfp_mask & __GFP_WAIT); | 1993 | might_sleep_if(gfp_mask & __GFP_WAIT); |
1967 | 1994 | ||
1968 | cic = cfq_get_io_context(cfqd, key, gfp_mask); | 1995 | cic = cfq_get_io_context(cfqd, gfp_mask); |
1969 | 1996 | ||
1970 | spin_lock_irqsave(q->queue_lock, flags); | 1997 | spin_lock_irqsave(q->queue_lock, flags); |
1971 | 1998 | ||
@@ -2133,11 +2160,14 @@ static void cfq_exit_queue(elevator_t *e) | |||
2133 | request_queue_t *q = cfqd->queue; | 2160 | request_queue_t *q = cfqd->queue; |
2134 | 2161 | ||
2135 | cfq_shutdown_timer_wq(cfqd); | 2162 | cfq_shutdown_timer_wq(cfqd); |
2163 | |||
2136 | write_lock(&cfq_exit_lock); | 2164 | write_lock(&cfq_exit_lock); |
2137 | spin_lock_irq(q->queue_lock); | 2165 | spin_lock_irq(q->queue_lock); |
2166 | |||
2138 | if (cfqd->active_queue) | 2167 | if (cfqd->active_queue) |
2139 | __cfq_slice_expired(cfqd, cfqd->active_queue, 0); | 2168 | __cfq_slice_expired(cfqd, cfqd->active_queue, 0); |
2140 | while(!list_empty(&cfqd->cic_list)) { | 2169 | |
2170 | while (!list_empty(&cfqd->cic_list)) { | ||
2141 | struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, | 2171 | struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, |
2142 | struct cfq_io_context, | 2172 | struct cfq_io_context, |
2143 | queue_list); | 2173 | queue_list); |
@@ -2152,6 +2182,7 @@ static void cfq_exit_queue(elevator_t *e) | |||
2152 | cic->key = NULL; | 2182 | cic->key = NULL; |
2153 | list_del_init(&cic->queue_list); | 2183 | list_del_init(&cic->queue_list); |
2154 | } | 2184 | } |
2185 | |||
2155 | spin_unlock_irq(q->queue_lock); | 2186 | spin_unlock_irq(q->queue_lock); |
2156 | write_unlock(&cfq_exit_lock); | 2187 | write_unlock(&cfq_exit_lock); |
2157 | 2188 | ||
@@ -2227,7 +2258,6 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e) | |||
2227 | cfqd->cfq_slice[1] = cfq_slice_sync; | 2258 | cfqd->cfq_slice[1] = cfq_slice_sync; |
2228 | cfqd->cfq_slice_async_rq = cfq_slice_async_rq; | 2259 | cfqd->cfq_slice_async_rq = cfq_slice_async_rq; |
2229 | cfqd->cfq_slice_idle = cfq_slice_idle; | 2260 | cfqd->cfq_slice_idle = cfq_slice_idle; |
2230 | cfqd->cfq_max_depth = cfq_max_depth; | ||
2231 | 2261 | ||
2232 | return 0; | 2262 | return 0; |
2233 | out_crqpool: | 2263 | out_crqpool: |
@@ -2310,7 +2340,6 @@ SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); | |||
2310 | SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); | 2340 | SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); |
2311 | SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); | 2341 | SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); |
2312 | SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); | 2342 | SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); |
2313 | SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0); | ||
2314 | #undef SHOW_FUNCTION | 2343 | #undef SHOW_FUNCTION |
2315 | 2344 | ||
2316 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ | 2345 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ |
@@ -2339,7 +2368,6 @@ STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1); | |||
2339 | STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); | 2368 | STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); |
2340 | STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); | 2369 | STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); |
2341 | STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); | 2370 | STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); |
2342 | STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 1, UINT_MAX, 0); | ||
2343 | #undef STORE_FUNCTION | 2371 | #undef STORE_FUNCTION |
2344 | 2372 | ||
2345 | #define CFQ_ATTR(name) \ | 2373 | #define CFQ_ATTR(name) \ |
@@ -2356,7 +2384,6 @@ static struct elv_fs_entry cfq_attrs[] = { | |||
2356 | CFQ_ATTR(slice_async), | 2384 | CFQ_ATTR(slice_async), |
2357 | CFQ_ATTR(slice_async_rq), | 2385 | CFQ_ATTR(slice_async_rq), |
2358 | CFQ_ATTR(slice_idle), | 2386 | CFQ_ATTR(slice_idle), |
2359 | CFQ_ATTR(max_depth), | ||
2360 | __ATTR_NULL | 2387 | __ATTR_NULL |
2361 | }; | 2388 | }; |
2362 | 2389 | ||
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 5a19e2eb5711..5b26af8597f3 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c | |||
@@ -3539,11 +3539,17 @@ void put_io_context(struct io_context *ioc) | |||
3539 | BUG_ON(atomic_read(&ioc->refcount) == 0); | 3539 | BUG_ON(atomic_read(&ioc->refcount) == 0); |
3540 | 3540 | ||
3541 | if (atomic_dec_and_test(&ioc->refcount)) { | 3541 | if (atomic_dec_and_test(&ioc->refcount)) { |
3542 | struct cfq_io_context *cic; | ||
3543 | |||
3542 | rcu_read_lock(); | 3544 | rcu_read_lock(); |
3543 | if (ioc->aic && ioc->aic->dtor) | 3545 | if (ioc->aic && ioc->aic->dtor) |
3544 | ioc->aic->dtor(ioc->aic); | 3546 | ioc->aic->dtor(ioc->aic); |
3545 | if (ioc->cic && ioc->cic->dtor) | 3547 | if (ioc->cic_root.rb_node != NULL) { |
3546 | ioc->cic->dtor(ioc->cic); | 3548 | struct rb_node *n = rb_first(&ioc->cic_root); |
3549 | |||
3550 | cic = rb_entry(n, struct cfq_io_context, rb_node); | ||
3551 | cic->dtor(ioc); | ||
3552 | } | ||
3547 | rcu_read_unlock(); | 3553 | rcu_read_unlock(); |
3548 | 3554 | ||
3549 | kmem_cache_free(iocontext_cachep, ioc); | 3555 | kmem_cache_free(iocontext_cachep, ioc); |
@@ -3556,6 +3562,7 @@ void exit_io_context(void) | |||
3556 | { | 3562 | { |
3557 | unsigned long flags; | 3563 | unsigned long flags; |
3558 | struct io_context *ioc; | 3564 | struct io_context *ioc; |
3565 | struct cfq_io_context *cic; | ||
3559 | 3566 | ||
3560 | local_irq_save(flags); | 3567 | local_irq_save(flags); |
3561 | task_lock(current); | 3568 | task_lock(current); |
@@ -3567,9 +3574,11 @@ void exit_io_context(void) | |||
3567 | 3574 | ||
3568 | if (ioc->aic && ioc->aic->exit) | 3575 | if (ioc->aic && ioc->aic->exit) |
3569 | ioc->aic->exit(ioc->aic); | 3576 | ioc->aic->exit(ioc->aic); |
3570 | if (ioc->cic && ioc->cic->exit) | 3577 | if (ioc->cic_root.rb_node != NULL) { |
3571 | ioc->cic->exit(ioc->cic); | 3578 | cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); |
3572 | 3579 | cic->exit(ioc); | |
3580 | } | ||
3581 | |||
3573 | put_io_context(ioc); | 3582 | put_io_context(ioc); |
3574 | } | 3583 | } |
3575 | 3584 | ||
@@ -3598,7 +3607,7 @@ struct io_context *current_io_context(gfp_t gfp_flags) | |||
3598 | ret->last_waited = jiffies; /* doesn't matter... */ | 3607 | ret->last_waited = jiffies; /* doesn't matter... */ |
3599 | ret->nr_batch_requests = 0; /* because this is 0 */ | 3608 | ret->nr_batch_requests = 0; /* because this is 0 */ |
3600 | ret->aic = NULL; | 3609 | ret->aic = NULL; |
3601 | ret->cic = NULL; | 3610 | ret->cic_root.rb_node = NULL; |
3602 | tsk->io_context = ret; | 3611 | tsk->io_context = ret; |
3603 | } | 3612 | } |
3604 | 3613 | ||
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c179966f1a2f..d0cac8b58de7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -55,25 +55,29 @@ struct as_io_context { | |||
55 | 55 | ||
56 | struct cfq_queue; | 56 | struct cfq_queue; |
57 | struct cfq_io_context { | 57 | struct cfq_io_context { |
58 | /* | 58 | struct rb_node rb_node; |
59 | * circular list of cfq_io_contexts belonging to a process io context | ||
60 | */ | ||
61 | struct list_head list; | ||
62 | struct cfq_queue *cfqq[2]; | ||
63 | void *key; | 59 | void *key; |
64 | 60 | ||
61 | struct cfq_queue *cfqq[2]; | ||
62 | |||
65 | struct io_context *ioc; | 63 | struct io_context *ioc; |
66 | 64 | ||
67 | unsigned long last_end_request; | 65 | unsigned long last_end_request; |
68 | unsigned long last_queue; | 66 | sector_t last_request_pos; |
67 | unsigned long last_queue; | ||
68 | |||
69 | unsigned long ttime_total; | 69 | unsigned long ttime_total; |
70 | unsigned long ttime_samples; | 70 | unsigned long ttime_samples; |
71 | unsigned long ttime_mean; | 71 | unsigned long ttime_mean; |
72 | 72 | ||
73 | unsigned int seek_samples; | ||
74 | u64 seek_total; | ||
75 | sector_t seek_mean; | ||
76 | |||
73 | struct list_head queue_list; | 77 | struct list_head queue_list; |
74 | 78 | ||
75 | void (*dtor)(struct cfq_io_context *); | 79 | void (*dtor)(struct io_context *); /* destructor */ |
76 | void (*exit)(struct cfq_io_context *); | 80 | void (*exit)(struct io_context *); /* called on task exit */ |
77 | }; | 81 | }; |
78 | 82 | ||
79 | /* | 83 | /* |
@@ -94,7 +98,7 @@ struct io_context { | |||
94 | int nr_batch_requests; /* Number of requests left in the batch */ | 98 | int nr_batch_requests; /* Number of requests left in the batch */ |
95 | 99 | ||
96 | struct as_io_context *aic; | 100 | struct as_io_context *aic; |
97 | struct cfq_io_context *cic; | 101 | struct rb_root cic_root; |
98 | }; | 102 | }; |
99 | 103 | ||
100 | void put_io_context(struct io_context *ioc); | 104 | void put_io_context(struct io_context *ioc); |