aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@suse.de>2006-03-28 06:03:44 -0500
committerJens Axboe <axboe@suse.de>2006-03-28 06:03:44 -0500
commit206dc69b31ca05baac68c75b8ed2ba7dd857d273 (patch)
treef9ca5d996e19cb072165b1f6474c39b59b0e7451
parent7143dd4b0127141a4f773e819d1d1f4ab82bb517 (diff)
[BLOCK] cfq-iosched: seek and async performance fixes
Detect whether a given process is seeky and if so disable (mostly) the idle window if it is. We still allow just a little idle time, just enough to allow that process to submit a new request. That is needed to maintain fairness across priority groups. In some cases, we could setup several async queues. This is not optimal from a performance POV, since we want all async io in one queue to perform good sorting on it. It also impacted sync queues, as async io got too much slice time. Signed-off-by: Jens Axboe <axboe@suse.de>
-rw-r--r--block/cfq-iosched.c102
-rw-r--r--include/linux/blkdev.h8
2 files changed, 72 insertions, 38 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 15152e2da0d2..67d446de0227 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -26,18 +26,12 @@ static const int cfq_back_penalty = 2; /* penalty of a backwards seek */
26static const int cfq_slice_sync = HZ / 10; 26static const int cfq_slice_sync = HZ / 10;
27static int cfq_slice_async = HZ / 25; 27static int cfq_slice_async = HZ / 25;
28static const int cfq_slice_async_rq = 2; 28static const int cfq_slice_async_rq = 2;
29static int cfq_slice_idle = HZ / 100; 29static int cfq_slice_idle = HZ / 70;
30 30
31#define CFQ_IDLE_GRACE (HZ / 10) 31#define CFQ_IDLE_GRACE (HZ / 10)
32#define CFQ_SLICE_SCALE (5) 32#define CFQ_SLICE_SCALE (5)
33 33
34#define CFQ_KEY_ASYNC (0) 34#define CFQ_KEY_ASYNC (0)
35#define CFQ_KEY_ANY (0xffff)
36
37/*
38 * disable queueing at the driver/hardware level
39 */
40static const int cfq_max_depth = 2;
41 35
42static DEFINE_RWLOCK(cfq_exit_lock); 36static DEFINE_RWLOCK(cfq_exit_lock);
43 37
@@ -102,6 +96,8 @@ static struct completion *ioc_gone;
102#define cfq_cfqq_sync(cfqq) \ 96#define cfq_cfqq_sync(cfqq) \
103 (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC]) 97 (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC])
104 98
99#define sample_valid(samples) ((samples) > 80)
100
105/* 101/*
106 * Per block device queue structure 102 * Per block device queue structure
107 */ 103 */
@@ -170,7 +166,6 @@ struct cfq_data {
170 unsigned int cfq_slice[2]; 166 unsigned int cfq_slice[2];
171 unsigned int cfq_slice_async_rq; 167 unsigned int cfq_slice_async_rq;
172 unsigned int cfq_slice_idle; 168 unsigned int cfq_slice_idle;
173 unsigned int cfq_max_depth;
174 169
175 struct list_head cic_list; 170 struct list_head cic_list;
176}; 171};
@@ -343,6 +338,14 @@ static int cfq_queue_empty(request_queue_t *q)
343 return !cfqd->busy_queues; 338 return !cfqd->busy_queues;
344} 339}
345 340
341static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
342{
343 if (rw == READ || process_sync(task))
344 return task->pid;
345
346 return CFQ_KEY_ASYNC;
347}
348
346/* 349/*
347 * Lifted from AS - choose which of crq1 and crq2 that is best served now. 350 * Lifted from AS - choose which of crq1 and crq2 that is best served now.
348 * We choose the request that is closest to the head right now. Distance 351 * We choose the request that is closest to the head right now. Distance
@@ -626,15 +629,20 @@ cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
626 cfq_add_crq_rb(crq); 629 cfq_add_crq_rb(crq);
627} 630}
628 631
629static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector) 632static struct request *
630 633cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
631{ 634{
632 struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid, CFQ_KEY_ANY); 635 struct task_struct *tsk = current;
636 pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio));
637 struct cfq_queue *cfqq;
633 struct rb_node *n; 638 struct rb_node *n;
639 sector_t sector;
634 640
641 cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio);
635 if (!cfqq) 642 if (!cfqq)
636 goto out; 643 goto out;
637 644
645 sector = bio->bi_sector + bio_sectors(bio);
638 n = cfqq->sort_list.rb_node; 646 n = cfqq->sort_list.rb_node;
639 while (n) { 647 while (n) {
640 struct cfq_rq *crq = rb_entry_crq(n); 648 struct cfq_rq *crq = rb_entry_crq(n);
@@ -688,7 +696,7 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
688 goto out; 696 goto out;
689 } 697 }
690 698
691 __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio)); 699 __rq = cfq_find_rq_fmerge(cfqd, bio);
692 if (__rq && elv_rq_merge_ok(__rq, bio)) { 700 if (__rq && elv_rq_merge_ok(__rq, bio)) {
693 ret = ELEVATOR_FRONT_MERGE; 701 ret = ELEVATOR_FRONT_MERGE;
694 goto out; 702 goto out;
@@ -891,6 +899,7 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
891static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) 899static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
892 900
893{ 901{
902 struct cfq_io_context *cic;
894 unsigned long sl; 903 unsigned long sl;
895 904
896 WARN_ON(!RB_EMPTY(&cfqq->sort_list)); 905 WARN_ON(!RB_EMPTY(&cfqq->sort_list));
@@ -906,13 +915,23 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
906 /* 915 /*
907 * task has exited, don't wait 916 * task has exited, don't wait
908 */ 917 */
909 if (cfqd->active_cic && !cfqd->active_cic->ioc->task) 918 cic = cfqd->active_cic;
919 if (!cic || !cic->ioc->task)
910 return 0; 920 return 0;
911 921
912 cfq_mark_cfqq_must_dispatch(cfqq); 922 cfq_mark_cfqq_must_dispatch(cfqq);
913 cfq_mark_cfqq_wait_request(cfqq); 923 cfq_mark_cfqq_wait_request(cfqq);
914 924
915 sl = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle); 925 sl = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle);
926
927 /*
928 * we don't want to idle for seeks, but we do want to allow
929 * fair distribution of slice time for a process doing back-to-back
930 * seeks. so allow a little bit of time for him to submit a new rq
931 */
932 if (sample_valid(cic->seek_samples) && cic->seek_mean > 131072)
933 sl = 2;
934
916 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 935 mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
917 return 1; 936 return 1;
918} 937}
@@ -1129,13 +1148,6 @@ cfq_dispatch_requests(request_queue_t *q, int force)
1129 if (cfqq) { 1148 if (cfqq) {
1130 int max_dispatch; 1149 int max_dispatch;
1131 1150
1132 /*
1133 * if idle window is disabled, allow queue buildup
1134 */
1135 if (!cfq_cfqq_idle_window(cfqq) &&
1136 cfqd->rq_in_driver >= cfqd->cfq_max_depth)
1137 return 0;
1138
1139 cfq_clear_cfqq_must_dispatch(cfqq); 1151 cfq_clear_cfqq_must_dispatch(cfqq);
1140 cfq_clear_cfqq_wait_request(cfqq); 1152 cfq_clear_cfqq_wait_request(cfqq);
1141 del_timer(&cfqd->idle_slice_timer); 1153 del_timer(&cfqd->idle_slice_timer);
@@ -1185,13 +1197,13 @@ __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio,
1185 const int hashval) 1197 const int hashval)
1186{ 1198{
1187 struct hlist_head *hash_list = &cfqd->cfq_hash[hashval]; 1199 struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
1188 struct hlist_node *entry, *next; 1200 struct hlist_node *entry;
1201 struct cfq_queue *__cfqq;
1189 1202
1190 hlist_for_each_safe(entry, next, hash_list) { 1203 hlist_for_each_entry(__cfqq, entry, hash_list, cfq_hash) {
1191 struct cfq_queue *__cfqq = list_entry_qhash(entry);
1192 const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->org_ioprio_class, __cfqq->org_ioprio); 1204 const unsigned short __p = IOPRIO_PRIO_VALUE(__cfqq->org_ioprio_class, __cfqq->org_ioprio);
1193 1205
1194 if (__cfqq->key == key && (__p == prio || prio == CFQ_KEY_ANY)) 1206 if (__cfqq->key == key && (__p == prio || !prio))
1195 return __cfqq; 1207 return __cfqq;
1196 } 1208 }
1197 1209
@@ -1572,7 +1584,33 @@ cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic)
1572 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples; 1584 cic->ttime_mean = (cic->ttime_total + 128) / cic->ttime_samples;
1573} 1585}
1574 1586
1575#define sample_valid(samples) ((samples) > 80) 1587static void
1588cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic,
1589 struct cfq_rq *crq)
1590{
1591 sector_t sdist;
1592 u64 total;
1593
1594 if (cic->last_request_pos < crq->request->sector)
1595 sdist = crq->request->sector - cic->last_request_pos;
1596 else
1597 sdist = cic->last_request_pos - crq->request->sector;
1598
1599 /*
1600 * Don't allow the seek distance to get too large from the
1601 * odd fragment, pagein, etc
1602 */
1603 if (cic->seek_samples <= 60) /* second&third seek */
1604 sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*1024);
1605 else
1606 sdist = min(sdist, (cic->seek_mean * 4) + 2*1024*64);
1607
1608 cic->seek_samples = (7*cic->seek_samples + 256) / 8;
1609 cic->seek_total = (7*cic->seek_total + (u64)256*sdist) / 8;
1610 total = cic->seek_total + (cic->seek_samples/2);
1611 do_div(total, cic->seek_samples);
1612 cic->seek_mean = (sector_t)total;
1613}
1576 1614
1577/* 1615/*
1578 * Disable idle window if the process thinks too long or seeks so much that 1616 * Disable idle window if the process thinks too long or seeks so much that
@@ -1685,9 +1723,11 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1685 cic = crq->io_context; 1723 cic = crq->io_context;
1686 1724
1687 cfq_update_io_thinktime(cfqd, cic); 1725 cfq_update_io_thinktime(cfqd, cic);
1726 cfq_update_io_seektime(cfqd, cic, crq);
1688 cfq_update_idle_window(cfqd, cfqq, cic); 1727 cfq_update_idle_window(cfqd, cfqq, cic);
1689 1728
1690 cic->last_queue = jiffies; 1729 cic->last_queue = jiffies;
1730 cic->last_request_pos = crq->request->sector + crq->request->nr_sectors;
1691 1731
1692 if (cfqq == cfqd->active_queue) { 1732 if (cfqq == cfqd->active_queue) {
1693 /* 1733 /*
@@ -1820,14 +1860,6 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
1820 cfq_resort_rr_list(cfqq, 0); 1860 cfq_resort_rr_list(cfqq, 0);
1821} 1861}
1822 1862
1823static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
1824{
1825 if (rw == READ || process_sync(task))
1826 return task->pid;
1827
1828 return CFQ_KEY_ASYNC;
1829}
1830
1831static inline int 1863static inline int
1832__cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1864__cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1833 struct task_struct *task, int rw) 1865 struct task_struct *task, int rw)
@@ -2226,7 +2258,6 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e)
2226 cfqd->cfq_slice[1] = cfq_slice_sync; 2258 cfqd->cfq_slice[1] = cfq_slice_sync;
2227 cfqd->cfq_slice_async_rq = cfq_slice_async_rq; 2259 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
2228 cfqd->cfq_slice_idle = cfq_slice_idle; 2260 cfqd->cfq_slice_idle = cfq_slice_idle;
2229 cfqd->cfq_max_depth = cfq_max_depth;
2230 2261
2231 return 0; 2262 return 0;
2232out_crqpool: 2263out_crqpool:
@@ -2309,7 +2340,6 @@ SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
2309SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); 2340SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
2310SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); 2341SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
2311SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); 2342SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
2312SHOW_FUNCTION(cfq_max_depth_show, cfqd->cfq_max_depth, 0);
2313#undef SHOW_FUNCTION 2343#undef SHOW_FUNCTION
2314 2344
2315#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 2345#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
@@ -2338,7 +2368,6 @@ STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
2338STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); 2368STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
2339STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); 2369STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
2340STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0); 2370STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, 0);
2341STORE_FUNCTION(cfq_max_depth_store, &cfqd->cfq_max_depth, 1, UINT_MAX, 0);
2342#undef STORE_FUNCTION 2371#undef STORE_FUNCTION
2343 2372
2344#define CFQ_ATTR(name) \ 2373#define CFQ_ATTR(name) \
@@ -2355,7 +2384,6 @@ static struct elv_fs_entry cfq_attrs[] = {
2355 CFQ_ATTR(slice_async), 2384 CFQ_ATTR(slice_async),
2356 CFQ_ATTR(slice_async_rq), 2385 CFQ_ATTR(slice_async_rq),
2357 CFQ_ATTR(slice_idle), 2386 CFQ_ATTR(slice_idle),
2358 CFQ_ATTR(max_depth),
2359 __ATTR_NULL 2387 __ATTR_NULL
2360}; 2388};
2361 2389
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ed0ffa673568..d0cac8b58de7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -63,11 +63,17 @@ struct cfq_io_context {
63 struct io_context *ioc; 63 struct io_context *ioc;
64 64
65 unsigned long last_end_request; 65 unsigned long last_end_request;
66 unsigned long last_queue; 66 sector_t last_request_pos;
67 unsigned long last_queue;
68
67 unsigned long ttime_total; 69 unsigned long ttime_total;
68 unsigned long ttime_samples; 70 unsigned long ttime_samples;
69 unsigned long ttime_mean; 71 unsigned long ttime_mean;
70 72
73 unsigned int seek_samples;
74 u64 seek_total;
75 sector_t seek_mean;
76
71 struct list_head queue_list; 77 struct list_head queue_list;
72 78
73 void (*dtor)(struct io_context *); /* destructor */ 79 void (*dtor)(struct io_context *); /* destructor */