aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVivek Goyal <vgoyal@redhat.com>2009-12-03 12:59:42 -0500
committerJens Axboe <jens.axboe@oracle.com>2009-12-03 13:28:51 -0500
commit31e4c28d95e64f2d5d3c497a3ecf37c62de635b4 (patch)
treeffbb99b2565c4bdd4921fd7077164e7fc295b2bc
parent1fa8f6d68b5c8ca0a608fd8d296c5f07ac788cd6 (diff)
blkio: Introduce blkio controller cgroup interface
o This is basic implementation of blkio controller cgroup interface. This is the common interface visible to user space and should be used by different IO control policies as we implement those. Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r--block/Kconfig13
-rw-r--r--block/Kconfig.iosched1
-rw-r--r--block/Makefile1
-rw-r--r--block/blk-cgroup.c177
-rw-r--r--block/blk-cgroup.h58
-rw-r--r--include/linux/cgroup_subsys.h6
-rw-r--r--include/linux/iocontext.h4
7 files changed, 260 insertions, 0 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 9be0b56eaee1..6ba1a8e3388b 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -77,6 +77,19 @@ config BLK_DEV_INTEGRITY
77 T10/SCSI Data Integrity Field or the T13/ATA External Path 77 T10/SCSI Data Integrity Field or the T13/ATA External Path
78 Protection. If in doubt, say N. 78 Protection. If in doubt, say N.
79 79
80config BLK_CGROUP
81 bool
82 depends on CGROUPS
83 default n
84 ---help---
85 Generic block IO controller cgroup interface. This is the common
86 cgroup interface which should be used by various IO controlling
87 policies.
88
89 Currently, CFQ IO scheduler uses it to recognize task groups and
90 control disk bandwidth allocation (proportional time slice allocation)
91 to such task groups.
92
80endif # BLOCK 93endif # BLOCK
81 94
82config BLOCK_COMPAT 95config BLOCK_COMPAT
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 8bd105115a69..be0280deec29 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -23,6 +23,7 @@ config IOSCHED_DEADLINE
23 23
24config IOSCHED_CFQ 24config IOSCHED_CFQ
25 tristate "CFQ I/O scheduler" 25 tristate "CFQ I/O scheduler"
26 select BLK_CGROUP
26 default y 27 default y
27 ---help--- 28 ---help---
28 The CFQ I/O scheduler tries to distribute bandwidth equally 29 The CFQ I/O scheduler tries to distribute bandwidth equally
diff --git a/block/Makefile b/block/Makefile
index 7914108952f2..cb2d515ebd6e 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
8 blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o 8 blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o
9 9
10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o 10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
11obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
11obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 12obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
12obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o 13obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
13obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o 14obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
new file mode 100644
index 000000000000..4f6afd76ec59
--- /dev/null
+++ b/block/blk-cgroup.c
@@ -0,0 +1,177 @@
1/*
2 * Common Block IO controller cgroup interface
3 *
4 * Based on ideas and code from CFQ, CFS and BFQ:
5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
6 *
7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
8 * Paolo Valente <paolo.valente@unimore.it>
9 *
10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
11 * Nauman Rafique <nauman@google.com>
12 */
13#include <linux/ioprio.h>
14#include "blk-cgroup.h"
15
16struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
17
18struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
19{
20 return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
21 struct blkio_cgroup, css);
22}
23
24void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
25 struct blkio_group *blkg, void *key)
26{
27 unsigned long flags;
28
29 spin_lock_irqsave(&blkcg->lock, flags);
30 rcu_assign_pointer(blkg->key, key);
31 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
32 spin_unlock_irqrestore(&blkcg->lock, flags);
33}
34
35int blkiocg_del_blkio_group(struct blkio_group *blkg)
36{
37 /* Implemented later */
38 return 0;
39}
40
41/* called under rcu_read_lock(). */
42struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
43{
44 struct blkio_group *blkg;
45 struct hlist_node *n;
46 void *__key;
47
48 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
49 __key = blkg->key;
50 if (__key == key)
51 return blkg;
52 }
53
54 return NULL;
55}
56
57#define SHOW_FUNCTION(__VAR) \
58static u64 blkiocg_##__VAR##_read(struct cgroup *cgroup, \
59 struct cftype *cftype) \
60{ \
61 struct blkio_cgroup *blkcg; \
62 \
63 blkcg = cgroup_to_blkio_cgroup(cgroup); \
64 return (u64)blkcg->__VAR; \
65}
66
67SHOW_FUNCTION(weight);
68#undef SHOW_FUNCTION
69
70static int
71blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
72{
73 struct blkio_cgroup *blkcg;
74
75 if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
76 return -EINVAL;
77
78 blkcg = cgroup_to_blkio_cgroup(cgroup);
79 blkcg->weight = (unsigned int)val;
80 return 0;
81}
82
83struct cftype blkio_files[] = {
84 {
85 .name = "weight",
86 .read_u64 = blkiocg_weight_read,
87 .write_u64 = blkiocg_weight_write,
88 },
89};
90
91static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
92{
93 return cgroup_add_files(cgroup, subsys, blkio_files,
94 ARRAY_SIZE(blkio_files));
95}
96
97static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
98{
99 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
100
101 free_css_id(&blkio_subsys, &blkcg->css);
102 kfree(blkcg);
103}
104
105static struct cgroup_subsys_state *
106blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
107{
108 struct blkio_cgroup *blkcg, *parent_blkcg;
109
110 if (!cgroup->parent) {
111 blkcg = &blkio_root_cgroup;
112 goto done;
113 }
114
115 /* Currently we do not support hierarchy deeper than two level (0,1) */
116 parent_blkcg = cgroup_to_blkio_cgroup(cgroup->parent);
117 if (css_depth(&parent_blkcg->css) > 0)
118 return ERR_PTR(-EINVAL);
119
120 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
121 if (!blkcg)
122 return ERR_PTR(-ENOMEM);
123
124 blkcg->weight = BLKIO_WEIGHT_DEFAULT;
125done:
126 spin_lock_init(&blkcg->lock);
127 INIT_HLIST_HEAD(&blkcg->blkg_list);
128
129 return &blkcg->css;
130}
131
132/*
133 * We cannot support shared io contexts, as we have no mean to support
134 * two tasks with the same ioc in two different groups without major rework
135 * of the main cic data structures. For now we allow a task to change
136 * its cgroup only if it's the only owner of its ioc.
137 */
138static int blkiocg_can_attach(struct cgroup_subsys *subsys,
139 struct cgroup *cgroup, struct task_struct *tsk,
140 bool threadgroup)
141{
142 struct io_context *ioc;
143 int ret = 0;
144
145 /* task_lock() is needed to avoid races with exit_io_context() */
146 task_lock(tsk);
147 ioc = tsk->io_context;
148 if (ioc && atomic_read(&ioc->nr_tasks) > 1)
149 ret = -EINVAL;
150 task_unlock(tsk);
151
152 return ret;
153}
154
155static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
156 struct cgroup *prev, struct task_struct *tsk,
157 bool threadgroup)
158{
159 struct io_context *ioc;
160
161 task_lock(tsk);
162 ioc = tsk->io_context;
163 if (ioc)
164 ioc->cgroup_changed = 1;
165 task_unlock(tsk);
166}
167
168struct cgroup_subsys blkio_subsys = {
169 .name = "blkio",
170 .create = blkiocg_create,
171 .can_attach = blkiocg_can_attach,
172 .attach = blkiocg_attach,
173 .destroy = blkiocg_destroy,
174 .populate = blkiocg_populate,
175 .subsys_id = blkio_subsys_id,
176 .use_id = 1,
177};
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
new file mode 100644
index 000000000000..ba5703f69b42
--- /dev/null
+++ b/block/blk-cgroup.h
@@ -0,0 +1,58 @@
1#ifndef _BLK_CGROUP_H
2#define _BLK_CGROUP_H
3/*
4 * Common Block IO controller cgroup interface
5 *
6 * Based on ideas and code from CFQ, CFS and BFQ:
7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
8 *
9 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
10 * Paolo Valente <paolo.valente@unimore.it>
11 *
12 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
13 * Nauman Rafique <nauman@google.com>
14 */
15
16#include <linux/cgroup.h>
17
18struct blkio_cgroup {
19 struct cgroup_subsys_state css;
20 unsigned int weight;
21 spinlock_t lock;
22 struct hlist_head blkg_list;
23};
24
25struct blkio_group {
26 /* An rcu protected unique identifier for the group */
27 void *key;
28 struct hlist_node blkcg_node;
29};
30
31#define BLKIO_WEIGHT_MIN 100
32#define BLKIO_WEIGHT_MAX 1000
33#define BLKIO_WEIGHT_DEFAULT 500
34
35#ifdef CONFIG_BLK_CGROUP
36extern struct blkio_cgroup blkio_root_cgroup;
37extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
38extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
39 struct blkio_group *blkg, void *key);
40extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
41extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
42 void *key);
43#else
44static inline struct blkio_cgroup *
45cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
46
47static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
48 struct blkio_group *blkg, void *key)
49{
50}
51
52static inline int
53blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
54
55static inline struct blkio_group *
56blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
57#endif
58#endif /* _BLK_CGROUP_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 9c8d31bacf46..ccefff02b6cb 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -60,3 +60,9 @@ SUBSYS(net_cls)
60#endif 60#endif
61 61
62/* */ 62/* */
63
64#ifdef CONFIG_BLK_CGROUP
65SUBSYS(blkio)
66#endif
67
68/* */
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index eb73632440f1..d61b0b8b5cd1 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -68,6 +68,10 @@ struct io_context {
68 unsigned short ioprio; 68 unsigned short ioprio;
69 unsigned short ioprio_changed; 69 unsigned short ioprio_changed;
70 70
71#ifdef CONFIG_BLK_CGROUP
72 unsigned short cgroup_changed;
73#endif
74
71 /* 75 /*
72 * For request batching 76 * For request batching
73 */ 77 */