diff options
author | Vivek Goyal <vgoyal@redhat.com> | 2009-12-03 12:59:38 -0500 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-12-03 13:28:51 -0500 |
commit | cdb16e8f739985b8a5c9f4569b026583bbcd01a5 (patch) | |
tree | 860b74f8134cfbd516cc73b8b9a9edfe4e3d2db6 /block | |
parent | bf7919371025412978268efca4b09dd847acb395 (diff) |
blkio: Introduce the notion of cfq groups
o This patch introduce the notion of cfq groups. Soon we will can have multiple
groups of different weights in the system.
o Various service trees (prioclass and workload type trees), will become per
cfq group. So hierarchy looks as follows.
cfq_groups
|
workload type
|
cfq queue
o When an scheduling decision has to be taken, first we select the cfq group
then workload with-in the group and then cfq queue with-in the workload
type.
o This patch just makes various workload service tree per cfq group and
introduce the function to be able to choose a group for scheduling.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/cfq-iosched.c | 108 |
1 files changed, 75 insertions, 33 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 15b53616516a..a4d17265411e 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -132,6 +132,7 @@ struct cfq_queue { | |||
132 | 132 | ||
133 | struct cfq_rb_root *service_tree; | 133 | struct cfq_rb_root *service_tree; |
134 | struct cfq_queue *new_cfqq; | 134 | struct cfq_queue *new_cfqq; |
135 | struct cfq_group *cfqg; | ||
135 | }; | 136 | }; |
136 | 137 | ||
137 | /* | 138 | /* |
@@ -153,25 +154,30 @@ enum wl_type_t { | |||
153 | SYNC_WORKLOAD = 2 | 154 | SYNC_WORKLOAD = 2 |
154 | }; | 155 | }; |
155 | 156 | ||
157 | /* This is per cgroup per device grouping structure */ | ||
158 | struct cfq_group { | ||
159 | /* | ||
160 | * rr lists of queues with requests, onle rr for each priority class. | ||
161 | * Counts are embedded in the cfq_rb_root | ||
162 | */ | ||
163 | struct cfq_rb_root service_trees[2][3]; | ||
164 | struct cfq_rb_root service_tree_idle; | ||
165 | }; | ||
156 | 166 | ||
157 | /* | 167 | /* |
158 | * Per block device queue structure | 168 | * Per block device queue structure |
159 | */ | 169 | */ |
160 | struct cfq_data { | 170 | struct cfq_data { |
161 | struct request_queue *queue; | 171 | struct request_queue *queue; |
172 | struct cfq_group root_group; | ||
162 | 173 | ||
163 | /* | 174 | /* |
164 | * rr lists of queues with requests, onle rr for each priority class. | ||
165 | * Counts are embedded in the cfq_rb_root | ||
166 | */ | ||
167 | struct cfq_rb_root service_trees[2][3]; | ||
168 | struct cfq_rb_root service_tree_idle; | ||
169 | /* | ||
170 | * The priority currently being served | 175 | * The priority currently being served |
171 | */ | 176 | */ |
172 | enum wl_prio_t serving_prio; | 177 | enum wl_prio_t serving_prio; |
173 | enum wl_type_t serving_type; | 178 | enum wl_type_t serving_type; |
174 | unsigned long workload_expires; | 179 | unsigned long workload_expires; |
180 | struct cfq_group *serving_group; | ||
175 | bool noidle_tree_requires_idle; | 181 | bool noidle_tree_requires_idle; |
176 | 182 | ||
177 | /* | 183 | /* |
@@ -240,14 +246,15 @@ struct cfq_data { | |||
240 | unsigned long last_end_sync_rq; | 246 | unsigned long last_end_sync_rq; |
241 | }; | 247 | }; |
242 | 248 | ||
243 | static struct cfq_rb_root *service_tree_for(enum wl_prio_t prio, | 249 | static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg, |
250 | enum wl_prio_t prio, | ||
244 | enum wl_type_t type, | 251 | enum wl_type_t type, |
245 | struct cfq_data *cfqd) | 252 | struct cfq_data *cfqd) |
246 | { | 253 | { |
247 | if (prio == IDLE_WORKLOAD) | 254 | if (prio == IDLE_WORKLOAD) |
248 | return &cfqd->service_tree_idle; | 255 | return &cfqg->service_tree_idle; |
249 | 256 | ||
250 | return &cfqd->service_trees[prio][type]; | 257 | return &cfqg->service_trees[prio][type]; |
251 | } | 258 | } |
252 | 259 | ||
253 | enum cfqq_state_flags { | 260 | enum cfqq_state_flags { |
@@ -317,12 +324,14 @@ static enum wl_type_t cfqq_type(struct cfq_queue *cfqq) | |||
317 | 324 | ||
318 | static inline int cfq_busy_queues_wl(enum wl_prio_t wl, struct cfq_data *cfqd) | 325 | static inline int cfq_busy_queues_wl(enum wl_prio_t wl, struct cfq_data *cfqd) |
319 | { | 326 | { |
327 | struct cfq_group *cfqg = &cfqd->root_group; | ||
328 | |||
320 | if (wl == IDLE_WORKLOAD) | 329 | if (wl == IDLE_WORKLOAD) |
321 | return cfqd->service_tree_idle.count; | 330 | return cfqg->service_tree_idle.count; |
322 | 331 | ||
323 | return cfqd->service_trees[wl][ASYNC_WORKLOAD].count | 332 | return cfqg->service_trees[wl][ASYNC_WORKLOAD].count |
324 | + cfqd->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count | 333 | + cfqg->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count |
325 | + cfqd->service_trees[wl][SYNC_WORKLOAD].count; | 334 | + cfqg->service_trees[wl][SYNC_WORKLOAD].count; |
326 | } | 335 | } |
327 | 336 | ||
328 | static void cfq_dispatch_insert(struct request_queue *, struct request *); | 337 | static void cfq_dispatch_insert(struct request_queue *, struct request *); |
@@ -612,7 +621,7 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd, | |||
612 | /* | 621 | /* |
613 | * just an approximation, should be ok. | 622 | * just an approximation, should be ok. |
614 | */ | 623 | */ |
615 | return (cfqd->busy_queues - 1) * (cfq_prio_slice(cfqd, 1, 0) - | 624 | return (cfqq->cfqg->nr_cfqq - 1) * (cfq_prio_slice(cfqd, 1, 0) - |
616 | cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio)); | 625 | cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio)); |
617 | } | 626 | } |
618 | 627 | ||
@@ -630,7 +639,8 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
630 | struct cfq_rb_root *service_tree; | 639 | struct cfq_rb_root *service_tree; |
631 | int left; | 640 | int left; |
632 | 641 | ||
633 | service_tree = service_tree_for(cfqq_prio(cfqq), cfqq_type(cfqq), cfqd); | 642 | service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), |
643 | cfqq_type(cfqq), cfqd); | ||
634 | if (cfq_class_idle(cfqq)) { | 644 | if (cfq_class_idle(cfqq)) { |
635 | rb_key = CFQ_IDLE_DELAY; | 645 | rb_key = CFQ_IDLE_DELAY; |
636 | parent = rb_last(&service_tree->rb); | 646 | parent = rb_last(&service_tree->rb); |
@@ -1066,7 +1076,8 @@ static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out) | |||
1066 | static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) | 1076 | static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) |
1067 | { | 1077 | { |
1068 | struct cfq_rb_root *service_tree = | 1078 | struct cfq_rb_root *service_tree = |
1069 | service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd); | 1079 | service_tree_for(cfqd->serving_group, cfqd->serving_prio, |
1080 | cfqd->serving_type, cfqd); | ||
1070 | 1081 | ||
1071 | if (RB_EMPTY_ROOT(&service_tree->rb)) | 1082 | if (RB_EMPTY_ROOT(&service_tree->rb)) |
1072 | return NULL; | 1083 | return NULL; |
@@ -1218,7 +1229,8 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
1218 | * in their service tree. | 1229 | * in their service tree. |
1219 | */ | 1230 | */ |
1220 | if (!service_tree) | 1231 | if (!service_tree) |
1221 | service_tree = service_tree_for(prio, cfqq_type(cfqq), cfqd); | 1232 | service_tree = service_tree_for(cfqq->cfqg, prio, |
1233 | cfqq_type(cfqq), cfqd); | ||
1222 | 1234 | ||
1223 | if (service_tree->count == 0) | 1235 | if (service_tree->count == 0) |
1224 | return true; | 1236 | return true; |
@@ -1377,8 +1389,9 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq) | |||
1377 | } | 1389 | } |
1378 | } | 1390 | } |
1379 | 1391 | ||
1380 | static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, enum wl_prio_t prio, | 1392 | static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, |
1381 | bool prio_changed) | 1393 | struct cfq_group *cfqg, enum wl_prio_t prio, |
1394 | bool prio_changed) | ||
1382 | { | 1395 | { |
1383 | struct cfq_queue *queue; | 1396 | struct cfq_queue *queue; |
1384 | int i; | 1397 | int i; |
@@ -1392,10 +1405,10 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, enum wl_prio_t prio, | |||
1392 | * from SYNC_NOIDLE (first choice), or just SYNC | 1405 | * from SYNC_NOIDLE (first choice), or just SYNC |
1393 | * over ASYNC | 1406 | * over ASYNC |
1394 | */ | 1407 | */ |
1395 | if (service_tree_for(prio, cur_best, cfqd)->count) | 1408 | if (service_tree_for(cfqg, prio, cur_best, cfqd)->count) |
1396 | return cur_best; | 1409 | return cur_best; |
1397 | cur_best = SYNC_WORKLOAD; | 1410 | cur_best = SYNC_WORKLOAD; |
1398 | if (service_tree_for(prio, cur_best, cfqd)->count) | 1411 | if (service_tree_for(cfqg, prio, cur_best, cfqd)->count) |
1399 | return cur_best; | 1412 | return cur_best; |
1400 | 1413 | ||
1401 | return ASYNC_WORKLOAD; | 1414 | return ASYNC_WORKLOAD; |
@@ -1403,7 +1416,7 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, enum wl_prio_t prio, | |||
1403 | 1416 | ||
1404 | for (i = 0; i < 3; ++i) { | 1417 | for (i = 0; i < 3; ++i) { |
1405 | /* otherwise, select the one with lowest rb_key */ | 1418 | /* otherwise, select the one with lowest rb_key */ |
1406 | queue = cfq_rb_first(service_tree_for(prio, i, cfqd)); | 1419 | queue = cfq_rb_first(service_tree_for(cfqg, prio, i, cfqd)); |
1407 | if (queue && | 1420 | if (queue && |
1408 | (!key_valid || time_before(queue->rb_key, lowest_key))) { | 1421 | (!key_valid || time_before(queue->rb_key, lowest_key))) { |
1409 | lowest_key = queue->rb_key; | 1422 | lowest_key = queue->rb_key; |
@@ -1415,12 +1428,13 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, enum wl_prio_t prio, | |||
1415 | return cur_best; | 1428 | return cur_best; |
1416 | } | 1429 | } |
1417 | 1430 | ||
1418 | static void choose_service_tree(struct cfq_data *cfqd) | 1431 | static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) |
1419 | { | 1432 | { |
1420 | enum wl_prio_t previous_prio = cfqd->serving_prio; | 1433 | enum wl_prio_t previous_prio = cfqd->serving_prio; |
1421 | bool prio_changed; | 1434 | bool prio_changed; |
1422 | unsigned slice; | 1435 | unsigned slice; |
1423 | unsigned count; | 1436 | unsigned count; |
1437 | struct cfq_rb_root *st; | ||
1424 | 1438 | ||
1425 | /* Choose next priority. RT > BE > IDLE */ | 1439 | /* Choose next priority. RT > BE > IDLE */ |
1426 | if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd)) | 1440 | if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd)) |
@@ -1439,8 +1453,9 @@ static void choose_service_tree(struct cfq_data *cfqd) | |||
1439 | * expiration time | 1453 | * expiration time |
1440 | */ | 1454 | */ |
1441 | prio_changed = (cfqd->serving_prio != previous_prio); | 1455 | prio_changed = (cfqd->serving_prio != previous_prio); |
1442 | count = service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd) | 1456 | st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type, |
1443 | ->count; | 1457 | cfqd); |
1458 | count = st->count; | ||
1444 | 1459 | ||
1445 | /* | 1460 | /* |
1446 | * If priority didn't change, check workload expiration, | 1461 | * If priority didn't change, check workload expiration, |
@@ -1452,9 +1467,10 @@ static void choose_service_tree(struct cfq_data *cfqd) | |||
1452 | 1467 | ||
1453 | /* otherwise select new workload type */ | 1468 | /* otherwise select new workload type */ |
1454 | cfqd->serving_type = | 1469 | cfqd->serving_type = |
1455 | cfq_choose_wl(cfqd, cfqd->serving_prio, prio_changed); | 1470 | cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio, prio_changed); |
1456 | count = service_tree_for(cfqd->serving_prio, cfqd->serving_type, cfqd) | 1471 | st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type, |
1457 | ->count; | 1472 | cfqd); |
1473 | count = st->count; | ||
1458 | 1474 | ||
1459 | /* | 1475 | /* |
1460 | * the workload slice is computed as a fraction of target latency | 1476 | * the workload slice is computed as a fraction of target latency |
@@ -1478,6 +1494,12 @@ static void choose_service_tree(struct cfq_data *cfqd) | |||
1478 | cfqd->noidle_tree_requires_idle = false; | 1494 | cfqd->noidle_tree_requires_idle = false; |
1479 | } | 1495 | } |
1480 | 1496 | ||
1497 | static void cfq_choose_cfqg(struct cfq_data *cfqd) | ||
1498 | { | ||
1499 | cfqd->serving_group = &cfqd->root_group; | ||
1500 | choose_service_tree(cfqd, &cfqd->root_group); | ||
1501 | } | ||
1502 | |||
1481 | /* | 1503 | /* |
1482 | * Select a queue for service. If we have a current active queue, | 1504 | * Select a queue for service. If we have a current active queue, |
1483 | * check whether to continue servicing it, or retrieve and set a new one. | 1505 | * check whether to continue servicing it, or retrieve and set a new one. |
@@ -1535,7 +1557,7 @@ new_queue: | |||
1535 | * service tree | 1557 | * service tree |
1536 | */ | 1558 | */ |
1537 | if (!new_cfqq) | 1559 | if (!new_cfqq) |
1538 | choose_service_tree(cfqd); | 1560 | cfq_choose_cfqg(cfqd); |
1539 | 1561 | ||
1540 | cfqq = cfq_set_active_queue(cfqd, new_cfqq); | 1562 | cfqq = cfq_set_active_queue(cfqd, new_cfqq); |
1541 | keep_queue: | 1563 | keep_queue: |
@@ -1564,13 +1586,15 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) | |||
1564 | struct cfq_queue *cfqq; | 1586 | struct cfq_queue *cfqq; |
1565 | int dispatched = 0; | 1587 | int dispatched = 0; |
1566 | int i, j; | 1588 | int i, j; |
1589 | struct cfq_group *cfqg = &cfqd->root_group; | ||
1590 | |||
1567 | for (i = 0; i < 2; ++i) | 1591 | for (i = 0; i < 2; ++i) |
1568 | for (j = 0; j < 3; ++j) | 1592 | for (j = 0; j < 3; ++j) |
1569 | while ((cfqq = cfq_rb_first(&cfqd->service_trees[i][j])) | 1593 | while ((cfqq = cfq_rb_first(&cfqg->service_trees[i][j])) |
1570 | != NULL) | 1594 | != NULL) |
1571 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); | 1595 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); |
1572 | 1596 | ||
1573 | while ((cfqq = cfq_rb_first(&cfqd->service_tree_idle)) != NULL) | 1597 | while ((cfqq = cfq_rb_first(&cfqg->service_tree_idle)) != NULL) |
1574 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); | 1598 | dispatched += __cfq_forced_dispatch_cfqq(cfqq); |
1575 | 1599 | ||
1576 | cfq_slice_expired(cfqd, 0); | 1600 | cfq_slice_expired(cfqd, 0); |
@@ -2041,14 +2065,26 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
2041 | cfqq->pid = pid; | 2065 | cfqq->pid = pid; |
2042 | } | 2066 | } |
2043 | 2067 | ||
2068 | static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) | ||
2069 | { | ||
2070 | cfqq->cfqg = cfqg; | ||
2071 | } | ||
2072 | |||
2073 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) | ||
2074 | { | ||
2075 | return &cfqd->root_group; | ||
2076 | } | ||
2077 | |||
2044 | static struct cfq_queue * | 2078 | static struct cfq_queue * |
2045 | cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, | 2079 | cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, |
2046 | struct io_context *ioc, gfp_t gfp_mask) | 2080 | struct io_context *ioc, gfp_t gfp_mask) |
2047 | { | 2081 | { |
2048 | struct cfq_queue *cfqq, *new_cfqq = NULL; | 2082 | struct cfq_queue *cfqq, *new_cfqq = NULL; |
2049 | struct cfq_io_context *cic; | 2083 | struct cfq_io_context *cic; |
2084 | struct cfq_group *cfqg; | ||
2050 | 2085 | ||
2051 | retry: | 2086 | retry: |
2087 | cfqg = cfq_get_cfqg(cfqd, 1); | ||
2052 | cic = cfq_cic_lookup(cfqd, ioc); | 2088 | cic = cfq_cic_lookup(cfqd, ioc); |
2053 | /* cic always exists here */ | 2089 | /* cic always exists here */ |
2054 | cfqq = cic_to_cfqq(cic, is_sync); | 2090 | cfqq = cic_to_cfqq(cic, is_sync); |
@@ -2079,6 +2115,7 @@ retry: | |||
2079 | if (cfqq) { | 2115 | if (cfqq) { |
2080 | cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); | 2116 | cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); |
2081 | cfq_init_prio_data(cfqq, ioc); | 2117 | cfq_init_prio_data(cfqq, ioc); |
2118 | cfq_link_cfqq_cfqg(cfqq, cfqg); | ||
2082 | cfq_log_cfqq(cfqd, cfqq, "alloced"); | 2119 | cfq_log_cfqq(cfqd, cfqq, "alloced"); |
2083 | } else | 2120 | } else |
2084 | cfqq = &cfqd->oom_cfqq; | 2121 | cfqq = &cfqd->oom_cfqq; |
@@ -2931,15 +2968,19 @@ static void *cfq_init_queue(struct request_queue *q) | |||
2931 | { | 2968 | { |
2932 | struct cfq_data *cfqd; | 2969 | struct cfq_data *cfqd; |
2933 | int i, j; | 2970 | int i, j; |
2971 | struct cfq_group *cfqg; | ||
2934 | 2972 | ||
2935 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); | 2973 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); |
2936 | if (!cfqd) | 2974 | if (!cfqd) |
2937 | return NULL; | 2975 | return NULL; |
2938 | 2976 | ||
2977 | /* Init root group */ | ||
2978 | cfqg = &cfqd->root_group; | ||
2979 | |||
2939 | for (i = 0; i < 2; ++i) | 2980 | for (i = 0; i < 2; ++i) |
2940 | for (j = 0; j < 3; ++j) | 2981 | for (j = 0; j < 3; ++j) |
2941 | cfqd->service_trees[i][j] = CFQ_RB_ROOT; | 2982 | cfqg->service_trees[i][j] = CFQ_RB_ROOT; |
2942 | cfqd->service_tree_idle = CFQ_RB_ROOT; | 2983 | cfqg->service_tree_idle = CFQ_RB_ROOT; |
2943 | 2984 | ||
2944 | /* | 2985 | /* |
2945 | * Not strictly needed (since RB_ROOT just clears the node and we | 2986 | * Not strictly needed (since RB_ROOT just clears the node and we |
@@ -2956,6 +2997,7 @@ static void *cfq_init_queue(struct request_queue *q) | |||
2956 | */ | 2997 | */ |
2957 | cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); | 2998 | cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); |
2958 | atomic_inc(&cfqd->oom_cfqq.ref); | 2999 | atomic_inc(&cfqd->oom_cfqq.ref); |
3000 | cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group); | ||
2959 | 3001 | ||
2960 | INIT_LIST_HEAD(&cfqd->cic_list); | 3002 | INIT_LIST_HEAD(&cfqd->cic_list); |
2961 | 3003 | ||