aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-03-05 16:15:07 -0500
committerJens Axboe <axboe@kernel.dk>2012-03-06 15:27:22 -0500
commite56da7e287967667474a58c4f60c286279e3f487 (patch)
tree4ce64581888f287a25b4abdc21fc641210351612 /block
parentcd1604fab4f95f7cfc227d3955fd7ae14da61f38 (diff)
blkcg: don't allow or retain configuration of missing devices
blkcg is very peculiar in that it allows setting and remembering configurations for non-existent devices by maintaining separate data structures for configuration. This behavior is completely out of the usual norms and outright confusing; furthermore, it uses dev_t number to match the configuration to devices, which is unpredictable to begin with and becomes completely unuseable if EXT_DEVT is fully used. It is wholely unnecessary - we already have fully functional userland mechanism to program devices being hotplugged which has full access to device identification, connection topology and filesystem information. Add a new struct blkio_group_conf which contains all blkcg configurations to blkio_group and let blkio_group, which can be created iff the associated device exists and is removed when the associated device goes away, carry all configurations. Note that, after this patch, all newly created blkg's will always have the default configuration (unlimited for throttling and blkcg's weight for propio). This patch makes blkio_policy_node meaningless but doesn't remove it. The next patch will. -v2: Updated to retry after short sleep if blkg lookup/creation failed due to the queue being temporarily bypassed as indicated by -EBUSY return. Pointed out by Vivek. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Kay Sievers <kay.sievers@vrfy.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c94
-rw-r--r--block/blk-cgroup.h9
-rw-r--r--block/blk-throttle.c8
-rw-r--r--block/cfq-iosched.c2
4 files changed, 87 insertions, 26 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index bc9891496318..fe8ce148017a 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -855,9 +855,12 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
855} 855}
856 856
857static int blkio_policy_parse_and_set(char *buf, 857static int blkio_policy_parse_and_set(char *buf,
858 struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid) 858 struct blkio_policy_node *newpn,
859 enum blkio_policy_id plid, int fileid,
860 struct blkio_cgroup *blkcg)
859{ 861{
860 struct gendisk *disk = NULL; 862 struct gendisk *disk = NULL;
863 struct blkio_group *blkg = NULL;
861 char *s[4], *p, *major_s = NULL, *minor_s = NULL; 864 char *s[4], *p, *major_s = NULL, *minor_s = NULL;
862 unsigned long major, minor; 865 unsigned long major, minor;
863 int i = 0, ret = -EINVAL; 866 int i = 0, ret = -EINVAL;
@@ -903,11 +906,25 @@ static int blkio_policy_parse_and_set(char *buf,
903 goto out; 906 goto out;
904 907
905 /* For rule removal, do not check for device presence. */ 908 /* For rule removal, do not check for device presence. */
906 if (temp) { 909 disk = get_gendisk(dev, &part);
907 disk = get_gendisk(dev, &part); 910
908 if (!disk || part) { 911 if ((!disk || part) && temp) {
909 ret = -ENODEV; 912 ret = -ENODEV;
910 goto out; 913 goto out;
914 }
915
916 rcu_read_lock();
917
918 if (disk && !part) {
919 spin_lock_irq(disk->queue->queue_lock);
920 blkg = blkg_lookup_create(blkcg, disk->queue, plid, false);
921 spin_unlock_irq(disk->queue->queue_lock);
922
923 if (IS_ERR(blkg)) {
924 ret = PTR_ERR(blkg);
925 if (ret == -EBUSY)
926 goto out_unlock;
927 blkg = NULL;
911 } 928 }
912 } 929 }
913 930
@@ -917,25 +934,46 @@ static int blkio_policy_parse_and_set(char *buf,
917 case BLKIO_POLICY_PROP: 934 case BLKIO_POLICY_PROP:
918 if ((temp < BLKIO_WEIGHT_MIN && temp > 0) || 935 if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
919 temp > BLKIO_WEIGHT_MAX) 936 temp > BLKIO_WEIGHT_MAX)
920 goto out; 937 goto out_unlock;
921 938
922 newpn->plid = plid; 939 newpn->plid = plid;
923 newpn->fileid = fileid; 940 newpn->fileid = fileid;
924 newpn->val.weight = temp; 941 newpn->val.weight = temp;
942 if (blkg)
943 blkg->conf.weight = temp;
925 break; 944 break;
926 case BLKIO_POLICY_THROTL: 945 case BLKIO_POLICY_THROTL:
927 switch(fileid) { 946 switch(fileid) {
928 case BLKIO_THROTL_read_bps_device: 947 case BLKIO_THROTL_read_bps_device:
948 if (blkg)
949 blkg->conf.bps[READ] = temp;
950 newpn->plid = plid;
951 newpn->fileid = fileid;
952 newpn->val.bps = temp;
953 break;
929 case BLKIO_THROTL_write_bps_device: 954 case BLKIO_THROTL_write_bps_device:
955 if (blkg)
956 blkg->conf.bps[WRITE] = temp;
930 newpn->plid = plid; 957 newpn->plid = plid;
931 newpn->fileid = fileid; 958 newpn->fileid = fileid;
932 newpn->val.bps = temp; 959 newpn->val.bps = temp;
933 break; 960 break;
934 case BLKIO_THROTL_read_iops_device: 961 case BLKIO_THROTL_read_iops_device:
962 if (temp > THROTL_IOPS_MAX)
963 goto out_unlock;
964
965 if (blkg)
966 blkg->conf.iops[READ] = temp;
967 newpn->plid = plid;
968 newpn->fileid = fileid;
969 newpn->val.iops = (unsigned int)temp;
970 break;
935 case BLKIO_THROTL_write_iops_device: 971 case BLKIO_THROTL_write_iops_device:
936 if (temp > THROTL_IOPS_MAX) 972 if (temp > THROTL_IOPS_MAX)
937 goto out; 973 goto out_unlock;
938 974
975 if (blkg)
976 blkg->conf.iops[WRITE] = temp;
939 newpn->plid = plid; 977 newpn->plid = plid;
940 newpn->fileid = fileid; 978 newpn->fileid = fileid;
941 newpn->val.iops = (unsigned int)temp; 979 newpn->val.iops = (unsigned int)temp;
@@ -946,8 +984,21 @@ static int blkio_policy_parse_and_set(char *buf,
946 BUG(); 984 BUG();
947 } 985 }
948 ret = 0; 986 ret = 0;
987out_unlock:
988 rcu_read_unlock();
949out: 989out:
950 put_disk(disk); 990 put_disk(disk);
991
992 /*
993 * If queue was bypassing, we should retry. Do so after a short
994 * msleep(). It isn't strictly necessary but queue can be
995 * bypassing for some time and it's always nice to avoid busy
996 * looping.
997 */
998 if (ret == -EBUSY) {
999 msleep(10);
1000 return restart_syscall();
1001 }
951 return ret; 1002 return ret;
952} 1003}
953 1004
@@ -1095,26 +1146,29 @@ static void blkio_update_policy_rule(struct blkio_policy_node *oldpn,
1095static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg, 1146static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg,
1096 struct blkio_group *blkg, struct blkio_policy_node *pn) 1147 struct blkio_group *blkg, struct blkio_policy_node *pn)
1097{ 1148{
1098 unsigned int weight, iops; 1149 struct blkio_group_conf *conf = &blkg->conf;
1099 u64 bps;
1100 1150
1101 switch(pn->plid) { 1151 switch(pn->plid) {
1102 case BLKIO_POLICY_PROP: 1152 case BLKIO_POLICY_PROP:
1103 weight = pn->val.weight ? pn->val.weight : 1153 blkio_update_group_weight(blkg, conf->weight ?: blkcg->weight);
1104 blkcg->weight;
1105 blkio_update_group_weight(blkg, weight);
1106 break; 1154 break;
1107 case BLKIO_POLICY_THROTL: 1155 case BLKIO_POLICY_THROTL:
1108 switch(pn->fileid) { 1156 switch(pn->fileid) {
1109 case BLKIO_THROTL_read_bps_device: 1157 case BLKIO_THROTL_read_bps_device:
1158 blkio_update_group_bps(blkg, conf->bps[READ] ?: -1,
1159 pn->fileid);
1160 break;
1110 case BLKIO_THROTL_write_bps_device: 1161 case BLKIO_THROTL_write_bps_device:
1111 bps = pn->val.bps ? pn->val.bps : (-1); 1162 blkio_update_group_bps(blkg, conf->bps[WRITE] ?: -1,
1112 blkio_update_group_bps(blkg, bps, pn->fileid); 1163 pn->fileid);
1113 break; 1164 break;
1114 case BLKIO_THROTL_read_iops_device: 1165 case BLKIO_THROTL_read_iops_device:
1166 blkio_update_group_iops(blkg, conf->iops[READ] ?: -1,
1167 pn->fileid);
1168 break;
1115 case BLKIO_THROTL_write_iops_device: 1169 case BLKIO_THROTL_write_iops_device:
1116 iops = pn->val.iops ? pn->val.iops : (-1); 1170 blkio_update_group_iops(blkg, conf->iops[WRITE] ?: -1,
1117 blkio_update_group_iops(blkg, iops, pn->fileid); 1171 pn->fileid);
1118 break; 1172 break;
1119 } 1173 }
1120 break; 1174 break;
@@ -1152,7 +1206,7 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
1152 int ret = 0; 1206 int ret = 0;
1153 char *buf; 1207 char *buf;
1154 struct blkio_policy_node *newpn, *pn; 1208 struct blkio_policy_node *newpn, *pn;
1155 struct blkio_cgroup *blkcg; 1209 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
1156 int keep_newpn = 0; 1210 int keep_newpn = 0;
1157 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); 1211 enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1158 int fileid = BLKIOFILE_ATTR(cft->private); 1212 int fileid = BLKIOFILE_ATTR(cft->private);
@@ -1167,12 +1221,10 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
1167 goto free_buf; 1221 goto free_buf;
1168 } 1222 }
1169 1223
1170 ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid); 1224 ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid, blkcg);
1171 if (ret) 1225 if (ret)
1172 goto free_newpn; 1226 goto free_newpn;
1173 1227
1174 blkcg = cgroup_to_blkio_cgroup(cgrp);
1175
1176 spin_lock_irq(&blkcg->lock); 1228 spin_lock_irq(&blkcg->lock);
1177 1229
1178 pn = blkio_policy_search_node(blkcg, newpn->dev, plid, fileid); 1230 pn = blkio_policy_search_node(blkcg, newpn->dev, plid, fileid);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 2600ae7e6f60..81efe718a1c6 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -154,6 +154,12 @@ struct blkio_group_stats_cpu {
154 struct u64_stats_sync syncp; 154 struct u64_stats_sync syncp;
155}; 155};
156 156
157struct blkio_group_conf {
158 unsigned int weight;
159 unsigned int iops[2];
160 u64 bps[2];
161};
162
157struct blkio_group { 163struct blkio_group {
158 /* Pointer to the associated request_queue, RCU protected */ 164 /* Pointer to the associated request_queue, RCU protected */
159 struct request_queue __rcu *q; 165 struct request_queue __rcu *q;
@@ -166,6 +172,9 @@ struct blkio_group {
166 /* policy which owns this blk group */ 172 /* policy which owns this blk group */
167 enum blkio_policy_id plid; 173 enum blkio_policy_id plid;
168 174
175 /* Configuration */
176 struct blkio_group_conf conf;
177
169 /* Need to serialize the stats in the case of reset/update */ 178 /* Need to serialize the stats in the case of reset/update */
170 spinlock_t stats_lock; 179 spinlock_t stats_lock;
171 struct blkio_group_stats stats; 180 struct blkio_group_stats stats;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 2ae637b9e80c..791b10719e43 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -196,10 +196,10 @@ static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q,
196 bio_list_init(&tg->bio_lists[1]); 196 bio_list_init(&tg->bio_lists[1]);
197 tg->limits_changed = false; 197 tg->limits_changed = false;
198 198
199 tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev); 199 tg->bps[READ] = -1;
200 tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev); 200 tg->bps[WRITE] = -1;
201 tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev); 201 tg->iops[READ] = -1;
202 tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev); 202 tg->iops[WRITE] = -1;
203 203
204 /* 204 /*
205 * Take the initial reference that will be released on destroy 205 * Take the initial reference that will be released on destroy
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index acef564578c3..08d4fdd188fa 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1083,7 +1083,7 @@ static struct blkio_group *cfq_alloc_blkio_group(struct request_queue *q,
1083 return NULL; 1083 return NULL;
1084 1084
1085 cfq_init_cfqg_base(cfqg); 1085 cfq_init_cfqg_base(cfqg);
1086 cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); 1086 cfqg->weight = blkcg->weight;
1087 1087
1088 /* 1088 /*
1089 * Take the initial reference that will be released on destroy 1089 * Take the initial reference that will be released on destroy