diff options
-rw-r--r-- | Documentation/cgroups/blkio-controller.txt | 28 | ||||
-rw-r--r-- | block/cfq-iosched.c | 37 |
2 files changed, 1 insertions, 64 deletions
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt index 4ed7b5ceeed2..d915c16df42c 100644 --- a/Documentation/cgroups/blkio-controller.txt +++ b/Documentation/cgroups/blkio-controller.txt | |||
@@ -343,34 +343,6 @@ Common files among various policies | |||
343 | 343 | ||
344 | CFQ sysfs tunable | 344 | CFQ sysfs tunable |
345 | ================= | 345 | ================= |
346 | /sys/block/<disk>/queue/iosched/group_isolation | ||
347 | ----------------------------------------------- | ||
348 | |||
349 | If group_isolation=1, it provides stronger isolation between groups at the | ||
350 | expense of throughput. By default group_isolation is 0. In general that | ||
351 | means that if group_isolation=0, expect fairness for sequential workload | ||
352 | only. Set group_isolation=1 to see fairness for random IO workload also. | ||
353 | |||
354 | Generally CFQ will put random seeky workload in sync-noidle category. CFQ | ||
355 | will disable idling on these queues and it does a collective idling on group | ||
356 | of such queues. Generally these are slow moving queues and if there is a | ||
357 | sync-noidle service tree in each group, that group gets exclusive access to | ||
358 | disk for certain period. That means it will bring the throughput down if | ||
359 | group does not have enough IO to drive deeper queue depths and utilize disk | ||
360 | capacity to the fullest in the slice allocated to it. But the flip side is | ||
361 | that even a random reader should get better latencies and overall throughput | ||
362 | if there are lots of sequential readers/sync-idle workload running in the | ||
363 | system. | ||
364 | |||
365 | If group_isolation=0, then CFQ automatically moves all the random seeky queues | ||
366 | in the root group. That means there will be no service differentiation for | ||
367 | that kind of workload. This leads to better throughput as we do collective | ||
368 | idling on root sync-noidle tree. | ||
369 | |||
370 | By default one should run with group_isolation=0. If that is not sufficient | ||
371 | and one wants stronger isolation between groups, then set group_isolation=1 | ||
372 | but this will come at cost of reduced throughput. | ||
373 | |||
374 | /sys/block/<disk>/queue/iosched/slice_idle | 346 | /sys/block/<disk>/queue/iosched/slice_idle |
375 | ------------------------------------------ | 347 | ------------------------------------------ |
376 | On a faster hardware CFQ can be slow, especially with sequential workload. | 348 | On a faster hardware CFQ can be slow, especially with sequential workload. |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index f27ff3efe6cd..3202c7e87fb3 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -146,7 +146,6 @@ struct cfq_queue { | |||
146 | struct cfq_rb_root *service_tree; | 146 | struct cfq_rb_root *service_tree; |
147 | struct cfq_queue *new_cfqq; | 147 | struct cfq_queue *new_cfqq; |
148 | struct cfq_group *cfqg; | 148 | struct cfq_group *cfqg; |
149 | struct cfq_group *orig_cfqg; | ||
150 | /* Number of sectors dispatched from queue in single dispatch round */ | 149 | /* Number of sectors dispatched from queue in single dispatch round */ |
151 | unsigned long nr_sectors; | 150 | unsigned long nr_sectors; |
152 | }; | 151 | }; |
@@ -285,7 +284,6 @@ struct cfq_data { | |||
285 | unsigned int cfq_slice_idle; | 284 | unsigned int cfq_slice_idle; |
286 | unsigned int cfq_group_idle; | 285 | unsigned int cfq_group_idle; |
287 | unsigned int cfq_latency; | 286 | unsigned int cfq_latency; |
288 | unsigned int cfq_group_isolation; | ||
289 | 287 | ||
290 | unsigned int cic_index; | 288 | unsigned int cic_index; |
291 | struct list_head cic_list; | 289 | struct list_head cic_list; |
@@ -1187,32 +1185,6 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1187 | int new_cfqq = 1; | 1185 | int new_cfqq = 1; |
1188 | int group_changed = 0; | 1186 | int group_changed = 0; |
1189 | 1187 | ||
1190 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | ||
1191 | if (!cfqd->cfq_group_isolation | ||
1192 | && cfqq_type(cfqq) == SYNC_NOIDLE_WORKLOAD | ||
1193 | && cfqq->cfqg && cfqq->cfqg != &cfqd->root_group) { | ||
1194 | /* Move this cfq to root group */ | ||
1195 | cfq_log_cfqq(cfqd, cfqq, "moving to root group"); | ||
1196 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) | ||
1197 | cfq_group_service_tree_del(cfqd, cfqq->cfqg); | ||
1198 | cfqq->orig_cfqg = cfqq->cfqg; | ||
1199 | cfqq->cfqg = &cfqd->root_group; | ||
1200 | cfqd->root_group.ref++; | ||
1201 | group_changed = 1; | ||
1202 | } else if (!cfqd->cfq_group_isolation | ||
1203 | && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) { | ||
1204 | /* cfqq is sequential now needs to go to its original group */ | ||
1205 | BUG_ON(cfqq->cfqg != &cfqd->root_group); | ||
1206 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) | ||
1207 | cfq_group_service_tree_del(cfqd, cfqq->cfqg); | ||
1208 | cfq_put_cfqg(cfqq->cfqg); | ||
1209 | cfqq->cfqg = cfqq->orig_cfqg; | ||
1210 | cfqq->orig_cfqg = NULL; | ||
1211 | group_changed = 1; | ||
1212 | cfq_log_cfqq(cfqd, cfqq, "moved to origin group"); | ||
1213 | } | ||
1214 | #endif | ||
1215 | |||
1216 | service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), | 1188 | service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), |
1217 | cfqq_type(cfqq)); | 1189 | cfqq_type(cfqq)); |
1218 | if (cfq_class_idle(cfqq)) { | 1190 | if (cfq_class_idle(cfqq)) { |
@@ -2542,7 +2514,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) | |||
2542 | static void cfq_put_queue(struct cfq_queue *cfqq) | 2514 | static void cfq_put_queue(struct cfq_queue *cfqq) |
2543 | { | 2515 | { |
2544 | struct cfq_data *cfqd = cfqq->cfqd; | 2516 | struct cfq_data *cfqd = cfqq->cfqd; |
2545 | struct cfq_group *cfqg, *orig_cfqg; | 2517 | struct cfq_group *cfqg; |
2546 | 2518 | ||
2547 | BUG_ON(cfqq->ref <= 0); | 2519 | BUG_ON(cfqq->ref <= 0); |
2548 | 2520 | ||
@@ -2554,7 +2526,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
2554 | BUG_ON(rb_first(&cfqq->sort_list)); | 2526 | BUG_ON(rb_first(&cfqq->sort_list)); |
2555 | BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); | 2527 | BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); |
2556 | cfqg = cfqq->cfqg; | 2528 | cfqg = cfqq->cfqg; |
2557 | orig_cfqg = cfqq->orig_cfqg; | ||
2558 | 2529 | ||
2559 | if (unlikely(cfqd->active_queue == cfqq)) { | 2530 | if (unlikely(cfqd->active_queue == cfqq)) { |
2560 | __cfq_slice_expired(cfqd, cfqq, 0); | 2531 | __cfq_slice_expired(cfqd, cfqq, 0); |
@@ -2564,8 +2535,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
2564 | BUG_ON(cfq_cfqq_on_rr(cfqq)); | 2535 | BUG_ON(cfq_cfqq_on_rr(cfqq)); |
2565 | kmem_cache_free(cfq_pool, cfqq); | 2536 | kmem_cache_free(cfq_pool, cfqq); |
2566 | cfq_put_cfqg(cfqg); | 2537 | cfq_put_cfqg(cfqg); |
2567 | if (orig_cfqg) | ||
2568 | cfq_put_cfqg(orig_cfqg); | ||
2569 | } | 2538 | } |
2570 | 2539 | ||
2571 | /* | 2540 | /* |
@@ -3953,7 +3922,6 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3953 | cfqd->cfq_slice_idle = cfq_slice_idle; | 3922 | cfqd->cfq_slice_idle = cfq_slice_idle; |
3954 | cfqd->cfq_group_idle = cfq_group_idle; | 3923 | cfqd->cfq_group_idle = cfq_group_idle; |
3955 | cfqd->cfq_latency = 1; | 3924 | cfqd->cfq_latency = 1; |
3956 | cfqd->cfq_group_isolation = 0; | ||
3957 | cfqd->hw_tag = -1; | 3925 | cfqd->hw_tag = -1; |
3958 | /* | 3926 | /* |
3959 | * we optimistically start assuming sync ops weren't delayed in last | 3927 | * we optimistically start assuming sync ops weren't delayed in last |
@@ -4029,7 +3997,6 @@ SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); | |||
4029 | SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); | 3997 | SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); |
4030 | SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); | 3998 | SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); |
4031 | SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); | 3999 | SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); |
4032 | SHOW_FUNCTION(cfq_group_isolation_show, cfqd->cfq_group_isolation, 0); | ||
4033 | #undef SHOW_FUNCTION | 4000 | #undef SHOW_FUNCTION |
4034 | 4001 | ||
4035 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ | 4002 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ |
@@ -4063,7 +4030,6 @@ STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); | |||
4063 | STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, | 4030 | STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, |
4064 | UINT_MAX, 0); | 4031 | UINT_MAX, 0); |
4065 | STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); | 4032 | STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); |
4066 | STORE_FUNCTION(cfq_group_isolation_store, &cfqd->cfq_group_isolation, 0, 1, 0); | ||
4067 | #undef STORE_FUNCTION | 4033 | #undef STORE_FUNCTION |
4068 | 4034 | ||
4069 | #define CFQ_ATTR(name) \ | 4035 | #define CFQ_ATTR(name) \ |
@@ -4081,7 +4047,6 @@ static struct elv_fs_entry cfq_attrs[] = { | |||
4081 | CFQ_ATTR(slice_idle), | 4047 | CFQ_ATTR(slice_idle), |
4082 | CFQ_ATTR(group_idle), | 4048 | CFQ_ATTR(group_idle), |
4083 | CFQ_ATTR(low_latency), | 4049 | CFQ_ATTR(low_latency), |
4084 | CFQ_ATTR(group_isolation), | ||
4085 | __ATTR_NULL | 4050 | __ATTR_NULL |
4086 | }; | 4051 | }; |
4087 | 4052 | ||