aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorCorrado Zoccolo <czoccolo@gmail.com>2009-10-26 17:45:29 -0400
committerJens Axboe <jens.axboe@oracle.com>2009-10-28 04:23:26 -0400
commit718eee0579b802aabe3bafacf09d0a9b0830f1dd (patch)
tree3a85a6d38ed7b68ed6ca21d04158afee13980e5e /block
parenta6d44e982d3734583b3b4e1d36921af8cfd61fc0 (diff)
cfq-iosched: fairness for sync no-idle queues
Currently no-idle queues in cfq are not serviced fairly: even if they can only dispatch a small number of requests at a time, they have to compete with idling queues to be serviced, experiencing large latencies. We should notice, instead, that no-idle queues are the ones that would benefit most from having low latency, in fact they are any of: * processes with large think times (e.g. interactive ones like file managers) * seeky (e.g. programs faulting in their code at startup) * or marked as no-idle from upper levels, to improve latencies of those requests. This patch improves the fairness and latency for those queues, by: * separating sync idle, sync no-idle and async queues in separate service_trees, for each priority * service all no-idle queues together * and idling when the last no-idle queue has been serviced, to anticipate for more no-idle work * the timeslices allotted for idle and no-idle service_trees are computed proportionally to the number of processes in each set. Servicing all no-idle queues together should have a performance boost for NCQ-capable drives, without compromising fairness. Signed-off-by: Corrado Zoccolo <czoccolo@gmail.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'block')
-rw-r--r--block/cfq-iosched.c200
1 files changed, 168 insertions, 32 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 76afa3696894..859f534ae9ef 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -134,7 +134,7 @@ struct cfq_queue {
134}; 134};
135 135
136/* 136/*
137 * Index in the service_trees. 137 * First index in the service_trees.
138 * IDLE is handled separately, so it has negative index 138 * IDLE is handled separately, so it has negative index
139 */ 139 */
140enum wl_prio_t { 140enum wl_prio_t {
@@ -144,6 +144,16 @@ enum wl_prio_t {
144}; 144};
145 145
146/* 146/*
147 * Second index in the service_trees.
148 */
149enum wl_type_t {
150 ASYNC_WORKLOAD = 0,
151 SYNC_NOIDLE_WORKLOAD = 1,
152 SYNC_WORKLOAD = 2
153};
154
155
156/*
147 * Per block device queue structure 157 * Per block device queue structure
148 */ 158 */
149struct cfq_data { 159struct cfq_data {
@@ -153,12 +163,14 @@ struct cfq_data {
153 * rr lists of queues with requests, onle rr for each priority class. 163 * rr lists of queues with requests, onle rr for each priority class.
154 * Counts are embedded in the cfq_rb_root 164 * Counts are embedded in the cfq_rb_root
155 */ 165 */
156 struct cfq_rb_root service_trees[2]; 166 struct cfq_rb_root service_trees[2][3];
157 struct cfq_rb_root service_tree_idle; 167 struct cfq_rb_root service_tree_idle;
158 /* 168 /*
159 * The priority currently being served 169 * The priority currently being served
160 */ 170 */
161 enum wl_prio_t serving_prio; 171 enum wl_prio_t serving_prio;
172 enum wl_type_t serving_type;
173 unsigned long workload_expires;
162 174
163 /* 175 /*
164 * Each priority tree is sorted by next_request position. These 176 * Each priority tree is sorted by next_request position. These
@@ -221,12 +233,13 @@ struct cfq_data {
221}; 233};
222 234
223static struct cfq_rb_root *service_tree_for(enum wl_prio_t prio, 235static struct cfq_rb_root *service_tree_for(enum wl_prio_t prio,
236 enum wl_type_t type,
224 struct cfq_data *cfqd) 237 struct cfq_data *cfqd)
225{ 238{
226 if (prio == IDLE_WORKLOAD) 239 if (prio == IDLE_WORKLOAD)
227 return &cfqd->service_tree_idle; 240 return &cfqd->service_tree_idle;
228 241
229 return &cfqd->service_trees[prio]; 242 return &cfqd->service_trees[prio][type];
230} 243}
231 244
232enum cfqq_state_flags { 245enum cfqq_state_flags {
@@ -282,12 +295,24 @@ static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq)
282 return BE_WORKLOAD; 295 return BE_WORKLOAD;
283} 296}
284 297
298
299static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
300{
301 if (!cfq_cfqq_sync(cfqq))
302 return ASYNC_WORKLOAD;
303 if (!cfq_cfqq_idle_window(cfqq))
304 return SYNC_NOIDLE_WORKLOAD;
305 return SYNC_WORKLOAD;
306}
307
285static inline int cfq_busy_queues_wl(enum wl_prio_t wl, struct cfq_data *cfqd) 308static inline int cfq_busy_queues_wl(enum wl_prio_t wl, struct cfq_data *cfqd)
286{ 309{
287 if (wl == IDLE_WORKLOAD) 310 if (wl == IDLE_WORKLOAD)
288 return cfqd->service_tree_idle.count; 311 return cfqd->service_tree_idle.count;
289 312
290 return cfqd->service_trees[wl].count; 313 return cfqd->service_trees[wl][ASYNC_WORKLOAD].count
314 + cfqd->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count
315 + cfqd->service_trees[wl][SYNC_WORKLOAD].count;
291} 316}
292 317
293static void cfq_dispatch_insert(struct request_queue *, struct request *); 318static void cfq_dispatch_insert(struct request_queue *, struct request *);
@@ -597,7 +622,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
597 struct cfq_rb_root *service_tree; 622 struct cfq_rb_root *service_tree;
598 int left; 623 int left;
599 624
600 service_tree = service_tree_for(cfqq_prio(cfqq), cfqd); 625 service_tree = service_tree_for(cfqq_prio(cfqq), cfqq_type(cfqq), cfqd);
601 if (cfq_class_idle(cfqq)) { 626 if (cfq_class_idle(cfqq)) {
602 rb_key = CFQ_IDLE_DELAY; 627 rb_key = CFQ_IDLE_DELAY;
603 parent = rb_last(&service_tree->rb); 628 parent = rb_last(&service_tree->rb);
@@ -1030,7 +1055,7 @@ static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
1030static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) 1055static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
1031{ 1056{
1032 struct cfq_rb_root *service_tree = 1057 struct cfq_rb_root *service_tree =
1033 service_tree_for(cfqd->serving_prio, cfqd); 1058 service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd);
1034 1059
1035 if (RB_EMPTY_ROOT(&service_tree->rb)) 1060 if (RB_EMPTY_ROOT(&service_tree->rb))
1036 return NULL; 1061 return NULL;
@@ -1167,7 +1192,7 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
1167static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1192static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1168{ 1193{
1169 enum wl_prio_t prio = cfqq_prio(cfqq); 1194 enum wl_prio_t prio = cfqq_prio(cfqq);
1170 struct cfq_rb_root *service_tree; 1195 struct cfq_rb_root *service_tree = cfqq->service_tree;
1171 1196
1172 /* We never do for idle class queues. */ 1197 /* We never do for idle class queues. */
1173 if (prio == IDLE_WORKLOAD) 1198 if (prio == IDLE_WORKLOAD)
@@ -1181,7 +1206,9 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1181 * Otherwise, we do only if they are the last ones 1206 * Otherwise, we do only if they are the last ones
1182 * in their service tree. 1207 * in their service tree.
1183 */ 1208 */
1184 service_tree = service_tree_for(prio, cfqd); 1209 if (!service_tree)
1210 service_tree = service_tree_for(prio, cfqq_type(cfqq), cfqd);
1211
1185 if (service_tree->count == 0) 1212 if (service_tree->count == 0)
1186 return true; 1213 return true;
1187 1214
@@ -1235,14 +1262,20 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
1235 1262
1236 cfq_mark_cfqq_wait_request(cfqq); 1263 cfq_mark_cfqq_wait_request(cfqq);
1237 1264
1238 /*
1239 * we don't want to idle for seeks, but we do want to allow
1240 * fair distribution of slice time for a process doing back-to-back
1241 * seeks. so allow a little bit of time for him to submit a new rq
1242 */
1243 sl = cfqd->cfq_slice_idle; 1265 sl = cfqd->cfq_slice_idle;
1244 if (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq)) 1266 /* are we servicing noidle tree, and there are more queues?
1267 * non-rotational or NCQ: no idle
1268 * non-NCQ rotational : very small idle, to allow
1269 * fair distribution of slice time for a process doing back-to-back
1270 * seeks.
1271 */
1272 if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD &&
1273 service_tree_for(cfqd->serving_prio, SYNC_NOIDLE_WORKLOAD, cfqd)
1274 ->count > 0) {
1275 if (blk_queue_nonrot(cfqd->queue) || cfqd->hw_tag)
1276 return;
1245 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); 1277 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
1278 }
1246 1279
1247 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 1280 mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
1248 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl); 1281 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl);
@@ -1346,6 +1379,106 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
1346 } 1379 }
1347} 1380}
1348 1381
1382static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, enum wl_prio_t prio,
1383 bool prio_changed)
1384{
1385 struct cfq_queue *queue;
1386 int i;
1387 bool key_valid = false;
1388 unsigned long lowest_key = 0;
1389 enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
1390
1391 if (prio_changed) {
1392 /*
1393 * When priorities switched, we prefer starting
1394 * from SYNC_NOIDLE (first choice), or just SYNC
1395 * over ASYNC
1396 */
1397 if (service_tree_for(prio, cur_best, cfqd)->count)
1398 return cur_best;
1399 cur_best = SYNC_WORKLOAD;
1400 if (service_tree_for(prio, cur_best, cfqd)->count)
1401 return cur_best;
1402
1403 return ASYNC_WORKLOAD;
1404 }
1405
1406 for (i = 0; i < 3; ++i) {
1407 /* otherwise, select the one with lowest rb_key */
1408 queue = cfq_rb_first(service_tree_for(prio, i, cfqd));
1409 if (queue &&
1410 (!key_valid || time_before(queue->rb_key, lowest_key))) {
1411 lowest_key = queue->rb_key;
1412 cur_best = i;
1413 key_valid = true;
1414 }
1415 }
1416
1417 return cur_best;
1418}
1419
1420static void choose_service_tree(struct cfq_data *cfqd)
1421{
1422 enum wl_prio_t previous_prio = cfqd->serving_prio;
1423 bool prio_changed;
1424 unsigned slice;
1425 unsigned count;
1426
1427 /* Choose next priority. RT > BE > IDLE */
1428 if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd))
1429 cfqd->serving_prio = RT_WORKLOAD;
1430 else if (cfq_busy_queues_wl(BE_WORKLOAD, cfqd))
1431 cfqd->serving_prio = BE_WORKLOAD;
1432 else {
1433 cfqd->serving_prio = IDLE_WORKLOAD;
1434 cfqd->workload_expires = jiffies + 1;
1435 return;
1436 }
1437
1438 /*
1439 * For RT and BE, we have to choose also the type
1440 * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
1441 * expiration time
1442 */
1443 prio_changed = (cfqd->serving_prio != previous_prio);
1444 count = service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd)
1445 ->count;
1446
1447 /*
1448 * If priority didn't change, check workload expiration,
1449 * and that we still have other queues ready
1450 */
1451 if (!prio_changed && count &&
1452 !time_after(jiffies, cfqd->workload_expires))
1453 return;
1454
1455 /* otherwise select new workload type */
1456 cfqd->serving_type =
1457 cfq_choose_wl(cfqd, cfqd->serving_prio, prio_changed);
1458 count = service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd)
1459 ->count;
1460
1461 /*
1462 * the workload slice is computed as a fraction of target latency
1463 * proportional to the number of queues in that workload, over
1464 * all the queues in the same priority class
1465 */
1466 slice = cfq_target_latency * count /
1467 max_t(unsigned, cfqd->busy_queues_avg[cfqd->serving_prio],
1468 cfq_busy_queues_wl(cfqd->serving_prio, cfqd));
1469
1470 if (cfqd->serving_type == ASYNC_WORKLOAD)
1471 /* async workload slice is scaled down according to
1472 * the sync/async slice ratio. */
1473 slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1];
1474 else
1475 /* sync workload slice is at least 2 * cfq_slice_idle */
1476 slice = max(slice, 2 * cfqd->cfq_slice_idle);
1477
1478 slice = max_t(unsigned, slice, CFQ_MIN_TT);
1479 cfqd->workload_expires = jiffies + slice;
1480}
1481
1349/* 1482/*
1350 * Select a queue for service. If we have a current active queue, 1483 * Select a queue for service. If we have a current active queue,
1351 * check whether to continue servicing it, or retrieve and set a new one. 1484 * check whether to continue servicing it, or retrieve and set a new one.
@@ -1398,14 +1531,13 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1398expire: 1531expire:
1399 cfq_slice_expired(cfqd, 0); 1532 cfq_slice_expired(cfqd, 0);
1400new_queue: 1533new_queue:
1401 if (!new_cfqq) { 1534 /*
1402 if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd)) 1535 * Current queue expired. Check if we have to switch to a new
1403 cfqd->serving_prio = RT_WORKLOAD; 1536 * service tree
1404 else if (cfq_busy_queues_wl(BE_WORKLOAD, cfqd)) 1537 */
1405 cfqd->serving_prio = BE_WORKLOAD; 1538 if (!new_cfqq)
1406 else 1539 choose_service_tree(cfqd);
1407 cfqd->serving_prio = IDLE_WORKLOAD; 1540
1408 }
1409 cfqq = cfq_set_active_queue(cfqd, new_cfqq); 1541 cfqq = cfq_set_active_queue(cfqd, new_cfqq);
1410keep_queue: 1542keep_queue:
1411 return cfqq; 1543 return cfqq;
@@ -1432,10 +1564,12 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
1432{ 1564{
1433 struct cfq_queue *cfqq; 1565 struct cfq_queue *cfqq;
1434 int dispatched = 0; 1566 int dispatched = 0;
1435 int i; 1567 int i, j;
1436 for (i = 0; i < 2; ++i) 1568 for (i = 0; i < 2; ++i)
1437 while ((cfqq = cfq_rb_first(&cfqd->service_trees[i])) != NULL) 1569 for (j = 0; j < 3; ++j)
1438 dispatched += __cfq_forced_dispatch_cfqq(cfqq); 1570 while ((cfqq = cfq_rb_first(&cfqd->service_trees[i][j]))
1571 != NULL)
1572 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
1439 1573
1440 while ((cfqq = cfq_rb_first(&cfqd->service_tree_idle)) != NULL) 1574 while ((cfqq = cfq_rb_first(&cfqd->service_tree_idle)) != NULL)
1441 dispatched += __cfq_forced_dispatch_cfqq(cfqq); 1575 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
@@ -2218,13 +2352,10 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2218 enable_idle = old_idle = cfq_cfqq_idle_window(cfqq); 2352 enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
2219 2353
2220 if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || 2354 if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
2221 (!cfqd->cfq_latency && cfqd->hw_tag && CFQQ_SEEKY(cfqq))) 2355 (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq)))
2222 enable_idle = 0; 2356 enable_idle = 0;
2223 else if (sample_valid(cic->ttime_samples)) { 2357 else if (sample_valid(cic->ttime_samples)) {
2224 unsigned int slice_idle = cfqd->cfq_slice_idle; 2358 if (cic->ttime_mean > cfqd->cfq_slice_idle)
2225 if (sample_valid(cfqq->seek_samples) && CFQQ_SEEKY(cfqq))
2226 slice_idle = msecs_to_jiffies(CFQ_MIN_TT);
2227 if (cic->ttime_mean > slice_idle)
2228 enable_idle = 0; 2359 enable_idle = 0;
2229 else 2360 else
2230 enable_idle = 1; 2361 enable_idle = 1;
@@ -2262,6 +2393,10 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
2262 if (cfq_class_idle(cfqq)) 2393 if (cfq_class_idle(cfqq))
2263 return true; 2394 return true;
2264 2395
2396 if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD
2397 && new_cfqq->service_tree == cfqq->service_tree)
2398 return true;
2399
2265 /* 2400 /*
2266 * if the new request is sync, but the currently running queue is 2401 * if the new request is sync, but the currently running queue is
2267 * not, let the sync request have priority. 2402 * not, let the sync request have priority.
@@ -2778,14 +2913,15 @@ static void cfq_exit_queue(struct elevator_queue *e)
2778static void *cfq_init_queue(struct request_queue *q) 2913static void *cfq_init_queue(struct request_queue *q)
2779{ 2914{
2780 struct cfq_data *cfqd; 2915 struct cfq_data *cfqd;
2781 int i; 2916 int i, j;
2782 2917
2783 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); 2918 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
2784 if (!cfqd) 2919 if (!cfqd)
2785 return NULL; 2920 return NULL;
2786 2921
2787 for (i = 0; i < 2; ++i) 2922 for (i = 0; i < 2; ++i)
2788 cfqd->service_trees[i] = CFQ_RB_ROOT; 2923 for (j = 0; j < 3; ++j)
2924 cfqd->service_trees[i][j] = CFQ_RB_ROOT;
2789 cfqd->service_tree_idle = CFQ_RB_ROOT; 2925 cfqd->service_tree_idle = CFQ_RB_ROOT;
2790 2926
2791 /* 2927 /*