diff options
author | Vivek Goyal <vgoyal@redhat.com> | 2009-12-03 12:59:42 -0500 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-12-03 13:28:51 -0500 |
commit | 31e4c28d95e64f2d5d3c497a3ecf37c62de635b4 (patch) | |
tree | ffbb99b2565c4bdd4921fd7077164e7fc295b2bc | |
parent | 1fa8f6d68b5c8ca0a608fd8d296c5f07ac788cd6 (diff) |
blkio: Introduce blkio controller cgroup interface
o This is basic implementation of blkio controller cgroup interface. This is
the common interface visible to user space and should be used by different
IO control policies as we implement those.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | block/Kconfig | 13 | ||||
-rw-r--r-- | block/Kconfig.iosched | 1 | ||||
-rw-r--r-- | block/Makefile | 1 | ||||
-rw-r--r-- | block/blk-cgroup.c | 177 | ||||
-rw-r--r-- | block/blk-cgroup.h | 58 | ||||
-rw-r--r-- | include/linux/cgroup_subsys.h | 6 | ||||
-rw-r--r-- | include/linux/iocontext.h | 4 |
7 files changed, 260 insertions, 0 deletions
diff --git a/block/Kconfig b/block/Kconfig index 9be0b56eaee1..6ba1a8e3388b 100644 --- a/block/Kconfig +++ b/block/Kconfig | |||
@@ -77,6 +77,19 @@ config BLK_DEV_INTEGRITY | |||
77 | T10/SCSI Data Integrity Field or the T13/ATA External Path | 77 | T10/SCSI Data Integrity Field or the T13/ATA External Path |
78 | Protection. If in doubt, say N. | 78 | Protection. If in doubt, say N. |
79 | 79 | ||
80 | config BLK_CGROUP | ||
81 | bool | ||
82 | depends on CGROUPS | ||
83 | default n | ||
84 | ---help--- | ||
85 | Generic block IO controller cgroup interface. This is the common | ||
86 | cgroup interface which should be used by various IO controlling | ||
87 | policies. | ||
88 | |||
89 | Currently, CFQ IO scheduler uses it to recognize task groups and | ||
90 | control disk bandwidth allocation (proportional time slice allocation) | ||
91 | to such task groups. | ||
92 | |||
80 | endif # BLOCK | 93 | endif # BLOCK |
81 | 94 | ||
82 | config BLOCK_COMPAT | 95 | config BLOCK_COMPAT |
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 8bd105115a69..be0280deec29 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched | |||
@@ -23,6 +23,7 @@ config IOSCHED_DEADLINE | |||
23 | 23 | ||
24 | config IOSCHED_CFQ | 24 | config IOSCHED_CFQ |
25 | tristate "CFQ I/O scheduler" | 25 | tristate "CFQ I/O scheduler" |
26 | select BLK_CGROUP | ||
26 | default y | 27 | default y |
27 | ---help--- | 28 | ---help--- |
28 | The CFQ I/O scheduler tries to distribute bandwidth equally | 29 | The CFQ I/O scheduler tries to distribute bandwidth equally |
diff --git a/block/Makefile b/block/Makefile index 7914108952f2..cb2d515ebd6e 100644 --- a/block/Makefile +++ b/block/Makefile | |||
@@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ | |||
8 | blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o | 8 | blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o |
9 | 9 | ||
10 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o | 10 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o |
11 | obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o | ||
11 | obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o | 12 | obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o |
12 | obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o | 13 | obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o |
13 | obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o | 14 | obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o |
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c new file mode 100644 index 000000000000..4f6afd76ec59 --- /dev/null +++ b/block/blk-cgroup.c | |||
@@ -0,0 +1,177 @@ | |||
1 | /* | ||
2 | * Common Block IO controller cgroup interface | ||
3 | * | ||
4 | * Based on ideas and code from CFQ, CFS and BFQ: | ||
5 | * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> | ||
6 | * | ||
7 | * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> | ||
8 | * Paolo Valente <paolo.valente@unimore.it> | ||
9 | * | ||
10 | * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> | ||
11 | * Nauman Rafique <nauman@google.com> | ||
12 | */ | ||
13 | #include <linux/ioprio.h> | ||
14 | #include "blk-cgroup.h" | ||
15 | |||
16 | struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; | ||
17 | |||
18 | struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) | ||
19 | { | ||
20 | return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), | ||
21 | struct blkio_cgroup, css); | ||
22 | } | ||
23 | |||
24 | void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | ||
25 | struct blkio_group *blkg, void *key) | ||
26 | { | ||
27 | unsigned long flags; | ||
28 | |||
29 | spin_lock_irqsave(&blkcg->lock, flags); | ||
30 | rcu_assign_pointer(blkg->key, key); | ||
31 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); | ||
32 | spin_unlock_irqrestore(&blkcg->lock, flags); | ||
33 | } | ||
34 | |||
35 | int blkiocg_del_blkio_group(struct blkio_group *blkg) | ||
36 | { | ||
37 | /* Implemented later */ | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | /* called under rcu_read_lock(). */ | ||
42 | struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) | ||
43 | { | ||
44 | struct blkio_group *blkg; | ||
45 | struct hlist_node *n; | ||
46 | void *__key; | ||
47 | |||
48 | hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) { | ||
49 | __key = blkg->key; | ||
50 | if (__key == key) | ||
51 | return blkg; | ||
52 | } | ||
53 | |||
54 | return NULL; | ||
55 | } | ||
56 | |||
57 | #define SHOW_FUNCTION(__VAR) \ | ||
58 | static u64 blkiocg_##__VAR##_read(struct cgroup *cgroup, \ | ||
59 | struct cftype *cftype) \ | ||
60 | { \ | ||
61 | struct blkio_cgroup *blkcg; \ | ||
62 | \ | ||
63 | blkcg = cgroup_to_blkio_cgroup(cgroup); \ | ||
64 | return (u64)blkcg->__VAR; \ | ||
65 | } | ||
66 | |||
67 | SHOW_FUNCTION(weight); | ||
68 | #undef SHOW_FUNCTION | ||
69 | |||
70 | static int | ||
71 | blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) | ||
72 | { | ||
73 | struct blkio_cgroup *blkcg; | ||
74 | |||
75 | if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) | ||
76 | return -EINVAL; | ||
77 | |||
78 | blkcg = cgroup_to_blkio_cgroup(cgroup); | ||
79 | blkcg->weight = (unsigned int)val; | ||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | struct cftype blkio_files[] = { | ||
84 | { | ||
85 | .name = "weight", | ||
86 | .read_u64 = blkiocg_weight_read, | ||
87 | .write_u64 = blkiocg_weight_write, | ||
88 | }, | ||
89 | }; | ||
90 | |||
91 | static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) | ||
92 | { | ||
93 | return cgroup_add_files(cgroup, subsys, blkio_files, | ||
94 | ARRAY_SIZE(blkio_files)); | ||
95 | } | ||
96 | |||
97 | static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) | ||
98 | { | ||
99 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); | ||
100 | |||
101 | free_css_id(&blkio_subsys, &blkcg->css); | ||
102 | kfree(blkcg); | ||
103 | } | ||
104 | |||
105 | static struct cgroup_subsys_state * | ||
106 | blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup) | ||
107 | { | ||
108 | struct blkio_cgroup *blkcg, *parent_blkcg; | ||
109 | |||
110 | if (!cgroup->parent) { | ||
111 | blkcg = &blkio_root_cgroup; | ||
112 | goto done; | ||
113 | } | ||
114 | |||
115 | /* Currently we do not support hierarchy deeper than two level (0,1) */ | ||
116 | parent_blkcg = cgroup_to_blkio_cgroup(cgroup->parent); | ||
117 | if (css_depth(&parent_blkcg->css) > 0) | ||
118 | return ERR_PTR(-EINVAL); | ||
119 | |||
120 | blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); | ||
121 | if (!blkcg) | ||
122 | return ERR_PTR(-ENOMEM); | ||
123 | |||
124 | blkcg->weight = BLKIO_WEIGHT_DEFAULT; | ||
125 | done: | ||
126 | spin_lock_init(&blkcg->lock); | ||
127 | INIT_HLIST_HEAD(&blkcg->blkg_list); | ||
128 | |||
129 | return &blkcg->css; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * We cannot support shared io contexts, as we have no mean to support | ||
134 | * two tasks with the same ioc in two different groups without major rework | ||
135 | * of the main cic data structures. For now we allow a task to change | ||
136 | * its cgroup only if it's the only owner of its ioc. | ||
137 | */ | ||
138 | static int blkiocg_can_attach(struct cgroup_subsys *subsys, | ||
139 | struct cgroup *cgroup, struct task_struct *tsk, | ||
140 | bool threadgroup) | ||
141 | { | ||
142 | struct io_context *ioc; | ||
143 | int ret = 0; | ||
144 | |||
145 | /* task_lock() is needed to avoid races with exit_io_context() */ | ||
146 | task_lock(tsk); | ||
147 | ioc = tsk->io_context; | ||
148 | if (ioc && atomic_read(&ioc->nr_tasks) > 1) | ||
149 | ret = -EINVAL; | ||
150 | task_unlock(tsk); | ||
151 | |||
152 | return ret; | ||
153 | } | ||
154 | |||
155 | static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, | ||
156 | struct cgroup *prev, struct task_struct *tsk, | ||
157 | bool threadgroup) | ||
158 | { | ||
159 | struct io_context *ioc; | ||
160 | |||
161 | task_lock(tsk); | ||
162 | ioc = tsk->io_context; | ||
163 | if (ioc) | ||
164 | ioc->cgroup_changed = 1; | ||
165 | task_unlock(tsk); | ||
166 | } | ||
167 | |||
168 | struct cgroup_subsys blkio_subsys = { | ||
169 | .name = "blkio", | ||
170 | .create = blkiocg_create, | ||
171 | .can_attach = blkiocg_can_attach, | ||
172 | .attach = blkiocg_attach, | ||
173 | .destroy = blkiocg_destroy, | ||
174 | .populate = blkiocg_populate, | ||
175 | .subsys_id = blkio_subsys_id, | ||
176 | .use_id = 1, | ||
177 | }; | ||
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h new file mode 100644 index 000000000000..ba5703f69b42 --- /dev/null +++ b/block/blk-cgroup.h | |||
@@ -0,0 +1,58 @@ | |||
1 | #ifndef _BLK_CGROUP_H | ||
2 | #define _BLK_CGROUP_H | ||
3 | /* | ||
4 | * Common Block IO controller cgroup interface | ||
5 | * | ||
6 | * Based on ideas and code from CFQ, CFS and BFQ: | ||
7 | * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> | ||
8 | * | ||
9 | * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> | ||
10 | * Paolo Valente <paolo.valente@unimore.it> | ||
11 | * | ||
12 | * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> | ||
13 | * Nauman Rafique <nauman@google.com> | ||
14 | */ | ||
15 | |||
16 | #include <linux/cgroup.h> | ||
17 | |||
18 | struct blkio_cgroup { | ||
19 | struct cgroup_subsys_state css; | ||
20 | unsigned int weight; | ||
21 | spinlock_t lock; | ||
22 | struct hlist_head blkg_list; | ||
23 | }; | ||
24 | |||
25 | struct blkio_group { | ||
26 | /* An rcu protected unique identifier for the group */ | ||
27 | void *key; | ||
28 | struct hlist_node blkcg_node; | ||
29 | }; | ||
30 | |||
31 | #define BLKIO_WEIGHT_MIN 100 | ||
32 | #define BLKIO_WEIGHT_MAX 1000 | ||
33 | #define BLKIO_WEIGHT_DEFAULT 500 | ||
34 | |||
35 | #ifdef CONFIG_BLK_CGROUP | ||
36 | extern struct blkio_cgroup blkio_root_cgroup; | ||
37 | extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup); | ||
38 | extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | ||
39 | struct blkio_group *blkg, void *key); | ||
40 | extern int blkiocg_del_blkio_group(struct blkio_group *blkg); | ||
41 | extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, | ||
42 | void *key); | ||
43 | #else | ||
44 | static inline struct blkio_cgroup * | ||
45 | cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; } | ||
46 | |||
47 | static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | ||
48 | struct blkio_group *blkg, void *key) | ||
49 | { | ||
50 | } | ||
51 | |||
52 | static inline int | ||
53 | blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } | ||
54 | |||
55 | static inline struct blkio_group * | ||
56 | blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } | ||
57 | #endif | ||
58 | #endif /* _BLK_CGROUP_H */ | ||
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 9c8d31bacf46..ccefff02b6cb 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h | |||
@@ -60,3 +60,9 @@ SUBSYS(net_cls) | |||
60 | #endif | 60 | #endif |
61 | 61 | ||
62 | /* */ | 62 | /* */ |
63 | |||
64 | #ifdef CONFIG_BLK_CGROUP | ||
65 | SUBSYS(blkio) | ||
66 | #endif | ||
67 | |||
68 | /* */ | ||
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index eb73632440f1..d61b0b8b5cd1 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h | |||
@@ -68,6 +68,10 @@ struct io_context { | |||
68 | unsigned short ioprio; | 68 | unsigned short ioprio; |
69 | unsigned short ioprio_changed; | 69 | unsigned short ioprio_changed; |
70 | 70 | ||
71 | #ifdef CONFIG_BLK_CGROUP | ||
72 | unsigned short cgroup_changed; | ||
73 | #endif | ||
74 | |||
71 | /* | 75 | /* |
72 | * For request batching | 76 | * For request batching |
73 | */ | 77 | */ |