diff options
| author | Vivek Goyal <vgoyal@redhat.com> | 2009-12-03 12:59:43 -0500 |
|---|---|---|
| committer | Jens Axboe <jens.axboe@oracle.com> | 2009-12-03 13:28:52 -0500 |
| commit | 25bc6b07767fe77422312eda2af99c9477f76191 (patch) | |
| tree | 1e0f2d5a18efb63c651df8773a6d5ad2b87d7687 | |
| parent | 31e4c28d95e64f2d5d3c497a3ecf37c62de635b4 (diff) | |
blkio: Introduce per cfq group weights and vdisktime calculations
o Bring in the per cfq group weight and how vdisktime is calculated for the
group. Also bring in the functionality of updating the min_vdisktime of
the group service tree.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
| -rw-r--r-- | block/Kconfig.iosched | 9 | ||||
| -rw-r--r-- | block/cfq-iosched.c | 62 |
2 files changed, 69 insertions, 2 deletions
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index be0280deec29..fa95fa770570 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched | |||
| @@ -23,7 +23,6 @@ config IOSCHED_DEADLINE | |||
| 23 | 23 | ||
| 24 | config IOSCHED_CFQ | 24 | config IOSCHED_CFQ |
| 25 | tristate "CFQ I/O scheduler" | 25 | tristate "CFQ I/O scheduler" |
| 26 | select BLK_CGROUP | ||
| 27 | default y | 26 | default y |
| 28 | ---help--- | 27 | ---help--- |
| 29 | The CFQ I/O scheduler tries to distribute bandwidth equally | 28 | The CFQ I/O scheduler tries to distribute bandwidth equally |
| @@ -33,6 +32,14 @@ config IOSCHED_CFQ | |||
| 33 | 32 | ||
| 34 | This is the default I/O scheduler. | 33 | This is the default I/O scheduler. |
| 35 | 34 | ||
| 35 | config CFQ_GROUP_IOSCHED | ||
| 36 | bool "CFQ Group Scheduling support" | ||
| 37 | depends on IOSCHED_CFQ && CGROUPS | ||
| 38 | select BLK_CGROUP | ||
| 39 | default n | ||
| 40 | ---help--- | ||
| 41 | Enable group IO scheduling in CFQ. | ||
| 42 | |||
| 36 | choice | 43 | choice |
| 37 | prompt "Default I/O scheduler" | 44 | prompt "Default I/O scheduler" |
| 38 | default DEFAULT_CFQ | 45 | default DEFAULT_CFQ |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e1f822ac4690..019f28eea9df 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/rbtree.h> | 13 | #include <linux/rbtree.h> |
| 14 | #include <linux/ioprio.h> | 14 | #include <linux/ioprio.h> |
| 15 | #include <linux/blktrace_api.h> | 15 | #include <linux/blktrace_api.h> |
| 16 | #include "blk-cgroup.h" | ||
| 16 | 17 | ||
| 17 | /* | 18 | /* |
| 18 | * tunables | 19 | * tunables |
| @@ -49,6 +50,7 @@ static const int cfq_hist_divisor = 4; | |||
| 49 | 50 | ||
| 50 | #define CFQ_SLICE_SCALE (5) | 51 | #define CFQ_SLICE_SCALE (5) |
| 51 | #define CFQ_HW_QUEUE_MIN (5) | 52 | #define CFQ_HW_QUEUE_MIN (5) |
| 53 | #define CFQ_SERVICE_SHIFT 12 | ||
| 52 | 54 | ||
| 53 | #define RQ_CIC(rq) \ | 55 | #define RQ_CIC(rq) \ |
| 54 | ((struct cfq_io_context *) (rq)->elevator_private) | 56 | ((struct cfq_io_context *) (rq)->elevator_private) |
| @@ -79,6 +81,7 @@ struct cfq_rb_root { | |||
| 79 | struct rb_node *left; | 81 | struct rb_node *left; |
| 80 | unsigned count; | 82 | unsigned count; |
| 81 | u64 min_vdisktime; | 83 | u64 min_vdisktime; |
| 84 | struct rb_node *active; | ||
| 82 | }; | 85 | }; |
| 83 | #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, } | 86 | #define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, } |
| 84 | 87 | ||
| @@ -163,6 +166,7 @@ struct cfq_group { | |||
| 163 | 166 | ||
| 164 | /* group service_tree key */ | 167 | /* group service_tree key */ |
| 165 | u64 vdisktime; | 168 | u64 vdisktime; |
| 169 | unsigned int weight; | ||
| 166 | bool on_st; | 170 | bool on_st; |
| 167 | 171 | ||
| 168 | /* number of cfqq currently on this group */ | 172 | /* number of cfqq currently on this group */ |
| @@ -434,6 +438,51 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
| 434 | return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); | 438 | return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); |
| 435 | } | 439 | } |
| 436 | 440 | ||
| 441 | static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg) | ||
| 442 | { | ||
| 443 | u64 d = delta << CFQ_SERVICE_SHIFT; | ||
| 444 | |||
| 445 | d = d * BLKIO_WEIGHT_DEFAULT; | ||
| 446 | do_div(d, cfqg->weight); | ||
| 447 | return d; | ||
| 448 | } | ||
| 449 | |||
| 450 | static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) | ||
| 451 | { | ||
| 452 | s64 delta = (s64)(vdisktime - min_vdisktime); | ||
| 453 | if (delta > 0) | ||
| 454 | min_vdisktime = vdisktime; | ||
| 455 | |||
| 456 | return min_vdisktime; | ||
| 457 | } | ||
| 458 | |||
| 459 | static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime) | ||
| 460 | { | ||
| 461 | s64 delta = (s64)(vdisktime - min_vdisktime); | ||
| 462 | if (delta < 0) | ||
| 463 | min_vdisktime = vdisktime; | ||
| 464 | |||
| 465 | return min_vdisktime; | ||
| 466 | } | ||
| 467 | |||
| 468 | static void update_min_vdisktime(struct cfq_rb_root *st) | ||
| 469 | { | ||
| 470 | u64 vdisktime = st->min_vdisktime; | ||
| 471 | struct cfq_group *cfqg; | ||
| 472 | |||
| 473 | if (st->active) { | ||
| 474 | cfqg = rb_entry_cfqg(st->active); | ||
| 475 | vdisktime = cfqg->vdisktime; | ||
| 476 | } | ||
| 477 | |||
| 478 | if (st->left) { | ||
| 479 | cfqg = rb_entry_cfqg(st->left); | ||
| 480 | vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime); | ||
| 481 | } | ||
| 482 | |||
| 483 | st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime); | ||
| 484 | } | ||
| 485 | |||
| 437 | /* | 486 | /* |
| 438 | * get averaged number of queues of RT/BE priority. | 487 | * get averaged number of queues of RT/BE priority. |
| 439 | * average is updated, with a formula that gives more weight to higher numbers, | 488 | * average is updated, with a formula that gives more weight to higher numbers, |
| @@ -734,8 +783,12 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
| 734 | { | 783 | { |
| 735 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 784 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
| 736 | 785 | ||
| 786 | if (st->active == &cfqg->rb_node) | ||
| 787 | st->active = NULL; | ||
| 788 | |||
| 737 | BUG_ON(cfqg->nr_cfqq < 1); | 789 | BUG_ON(cfqg->nr_cfqq < 1); |
| 738 | cfqg->nr_cfqq--; | 790 | cfqg->nr_cfqq--; |
| 791 | |||
| 739 | /* If there are other cfq queues under this group, don't delete it */ | 792 | /* If there are other cfq queues under this group, don't delete it */ |
| 740 | if (cfqg->nr_cfqq) | 793 | if (cfqg->nr_cfqq) |
| 741 | return; | 794 | return; |
| @@ -1654,10 +1707,14 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
| 1654 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) | 1707 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) |
| 1655 | { | 1708 | { |
| 1656 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 1709 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
| 1710 | struct cfq_group *cfqg; | ||
| 1657 | 1711 | ||
| 1658 | if (RB_EMPTY_ROOT(&st->rb)) | 1712 | if (RB_EMPTY_ROOT(&st->rb)) |
| 1659 | return NULL; | 1713 | return NULL; |
| 1660 | return cfq_rb_first_group(st); | 1714 | cfqg = cfq_rb_first_group(st); |
| 1715 | st->active = &cfqg->rb_node; | ||
| 1716 | update_min_vdisktime(st); | ||
| 1717 | return cfqg; | ||
| 1661 | } | 1718 | } |
| 1662 | 1719 | ||
| 1663 | static void cfq_choose_cfqg(struct cfq_data *cfqd) | 1720 | static void cfq_choose_cfqg(struct cfq_data *cfqd) |
| @@ -3150,6 +3207,9 @@ static void *cfq_init_queue(struct request_queue *q) | |||
| 3150 | *st = CFQ_RB_ROOT; | 3207 | *st = CFQ_RB_ROOT; |
| 3151 | RB_CLEAR_NODE(&cfqg->rb_node); | 3208 | RB_CLEAR_NODE(&cfqg->rb_node); |
| 3152 | 3209 | ||
| 3210 | /* Give preference to root group over other groups */ | ||
| 3211 | cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT; | ||
| 3212 | |||
| 3153 | /* | 3213 | /* |
| 3154 | * Not strictly needed (since RB_ROOT just clears the node and we | 3214 | * Not strictly needed (since RB_ROOT just clears the node and we |
| 3155 | * zeroed cfqd on alloc), but better be safe in case someone decides | 3215 | * zeroed cfqd on alloc), but better be safe in case someone decides |
