aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig.iosched9
-rw-r--r--block/cfq-iosched.c62
2 files changed, 69 insertions, 2 deletions
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index be0280deec29..fa95fa770570 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -23,7 +23,6 @@ config IOSCHED_DEADLINE
23 23
24config IOSCHED_CFQ 24config IOSCHED_CFQ
25 tristate "CFQ I/O scheduler" 25 tristate "CFQ I/O scheduler"
26 select BLK_CGROUP
27 default y 26 default y
28 ---help--- 27 ---help---
29 The CFQ I/O scheduler tries to distribute bandwidth equally 28 The CFQ I/O scheduler tries to distribute bandwidth equally
@@ -33,6 +32,14 @@ config IOSCHED_CFQ
33 32
34 This is the default I/O scheduler. 33 This is the default I/O scheduler.
35 34
35config CFQ_GROUP_IOSCHED
36 bool "CFQ Group Scheduling support"
37 depends on IOSCHED_CFQ && CGROUPS
38 select BLK_CGROUP
39 default n
40 ---help---
41 Enable group IO scheduling in CFQ.
42
36choice 43choice
37 prompt "Default I/O scheduler" 44 prompt "Default I/O scheduler"
38 default DEFAULT_CFQ 45 default DEFAULT_CFQ
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index e1f822ac4690..019f28eea9df 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -13,6 +13,7 @@
13#include <linux/rbtree.h> 13#include <linux/rbtree.h>
14#include <linux/ioprio.h> 14#include <linux/ioprio.h>
15#include <linux/blktrace_api.h> 15#include <linux/blktrace_api.h>
16#include "blk-cgroup.h"
16 17
17/* 18/*
18 * tunables 19 * tunables
@@ -49,6 +50,7 @@ static const int cfq_hist_divisor = 4;
49 50
50#define CFQ_SLICE_SCALE (5) 51#define CFQ_SLICE_SCALE (5)
51#define CFQ_HW_QUEUE_MIN (5) 52#define CFQ_HW_QUEUE_MIN (5)
53#define CFQ_SERVICE_SHIFT 12
52 54
53#define RQ_CIC(rq) \ 55#define RQ_CIC(rq) \
54 ((struct cfq_io_context *) (rq)->elevator_private) 56 ((struct cfq_io_context *) (rq)->elevator_private)
@@ -79,6 +81,7 @@ struct cfq_rb_root {
79 struct rb_node *left; 81 struct rb_node *left;
80 unsigned count; 82 unsigned count;
81 u64 min_vdisktime; 83 u64 min_vdisktime;
84 struct rb_node *active;
82}; 85};
83#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, } 86#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }
84 87
@@ -163,6 +166,7 @@ struct cfq_group {
163 166
164 /* group service_tree key */ 167 /* group service_tree key */
165 u64 vdisktime; 168 u64 vdisktime;
169 unsigned int weight;
166 bool on_st; 170 bool on_st;
167 171
168 /* number of cfqq currently on this group */ 172 /* number of cfqq currently on this group */
@@ -434,6 +438,51 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
434 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); 438 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
435} 439}
436 440
441static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg)
442{
443 u64 d = delta << CFQ_SERVICE_SHIFT;
444
445 d = d * BLKIO_WEIGHT_DEFAULT;
446 do_div(d, cfqg->weight);
447 return d;
448}
449
450static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
451{
452 s64 delta = (s64)(vdisktime - min_vdisktime);
453 if (delta > 0)
454 min_vdisktime = vdisktime;
455
456 return min_vdisktime;
457}
458
459static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
460{
461 s64 delta = (s64)(vdisktime - min_vdisktime);
462 if (delta < 0)
463 min_vdisktime = vdisktime;
464
465 return min_vdisktime;
466}
467
468static void update_min_vdisktime(struct cfq_rb_root *st)
469{
470 u64 vdisktime = st->min_vdisktime;
471 struct cfq_group *cfqg;
472
473 if (st->active) {
474 cfqg = rb_entry_cfqg(st->active);
475 vdisktime = cfqg->vdisktime;
476 }
477
478 if (st->left) {
479 cfqg = rb_entry_cfqg(st->left);
480 vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
481 }
482
483 st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime);
484}
485
437/* 486/*
438 * get averaged number of queues of RT/BE priority. 487 * get averaged number of queues of RT/BE priority.
439 * average is updated, with a formula that gives more weight to higher numbers, 488 * average is updated, with a formula that gives more weight to higher numbers,
@@ -734,8 +783,12 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
734{ 783{
735 struct cfq_rb_root *st = &cfqd->grp_service_tree; 784 struct cfq_rb_root *st = &cfqd->grp_service_tree;
736 785
786 if (st->active == &cfqg->rb_node)
787 st->active = NULL;
788
737 BUG_ON(cfqg->nr_cfqq < 1); 789 BUG_ON(cfqg->nr_cfqq < 1);
738 cfqg->nr_cfqq--; 790 cfqg->nr_cfqq--;
791
739 /* If there are other cfq queues under this group, don't delete it */ 792 /* If there are other cfq queues under this group, don't delete it */
740 if (cfqg->nr_cfqq) 793 if (cfqg->nr_cfqq)
741 return; 794 return;
@@ -1654,10 +1707,14 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
1654static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) 1707static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
1655{ 1708{
1656 struct cfq_rb_root *st = &cfqd->grp_service_tree; 1709 struct cfq_rb_root *st = &cfqd->grp_service_tree;
1710 struct cfq_group *cfqg;
1657 1711
1658 if (RB_EMPTY_ROOT(&st->rb)) 1712 if (RB_EMPTY_ROOT(&st->rb))
1659 return NULL; 1713 return NULL;
1660 return cfq_rb_first_group(st); 1714 cfqg = cfq_rb_first_group(st);
1715 st->active = &cfqg->rb_node;
1716 update_min_vdisktime(st);
1717 return cfqg;
1661} 1718}
1662 1719
1663static void cfq_choose_cfqg(struct cfq_data *cfqd) 1720static void cfq_choose_cfqg(struct cfq_data *cfqd)
@@ -3150,6 +3207,9 @@ static void *cfq_init_queue(struct request_queue *q)
3150 *st = CFQ_RB_ROOT; 3207 *st = CFQ_RB_ROOT;
3151 RB_CLEAR_NODE(&cfqg->rb_node); 3208 RB_CLEAR_NODE(&cfqg->rb_node);
3152 3209
3210 /* Give preference to root group over other groups */
3211 cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
3212
3153 /* 3213 /*
3154 * Not strictly needed (since RB_ROOT just clears the node and we 3214 * Not strictly needed (since RB_ROOT just clears the node and we
3155 * zeroed cfqd on alloc), but better be safe in case someone decides 3215 * zeroed cfqd on alloc), but better be safe in case someone decides