aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-cgroup.c
diff options
context:
space:
mode:
authorArianna Avanzini <avanzini.arianna@gmail.com>2015-06-05 17:38:42 -0400
committerJens Axboe <axboe@fb.com>2015-06-07 10:10:08 -0400
commite48453c386f39ca9ea29e9df6efef78f56746af0 (patch)
treeb8aab51e5a1bb48f872640712e1f79ef09271438 /block/blk-cgroup.c
parent41c0126b3f22ef36b97b3c38b8f29569848a5ce2 (diff)
block, cgroup: implement policy-specific per-blkcg data
The block IO (blkio) controller enables the block layer to provide service guarantees in a hierarchical fashion. Specifically, service guarantees are provided by registered request-accounting policies. As of now, a proportional-share and a throttling policy are available. They are implemented, respectively, by the CFQ I/O scheduler and the blk-throttle subsystem. Unfortunately, as for adding new policies, the current implementation of the block IO controller is only halfway ready to allow new policies to be plugged in. This commit provides a solution to make the block IO controller fully ready to handle new policies. In what follows, we first describe briefly the current state, and then list the changes made by this commit. The throttling policy does not need any per-cgroup information to perform its task. In contrast, the proportional share policy uses, for each cgroup, both the weight assigned by the user to the cgroup, and a set of dynamically- computed weights, one for each device. The first, user-defined weight is stored in the blkcg data structure: the block IO controller allocates a private blkcg data structure for each cgroup in the blkio cgroups hierarchy (regardless of which policy is active). In other words, the block IO controller internally mirrors the blkio cgroups with private blkcg data structures. On the other hand, for each cgroup and device, the corresponding dynamically- computed weight is maintained in the following, different way. For each device, the block IO controller keeps a private blkcg_gq structure for each cgroup in blkio. In other words, block IO also keeps one private mirror copy of the blkio cgroups hierarchy for each device, made of blkcg_gq structures. Each blkcg_gq structure keeps per-policy information in a generic array of dynamically-allocated 'dedicated' data structures, one for each registered policy (so currently the array contains two elements). To be inserted into the generic array, each dedicated data structure embeds a generic blkg_policy_data structure. Consider now the array contained in the blkcg_gq structure corresponding to a given pair of cgroup and device: one of the elements of the array contains the dedicated data structure for the proportional-share policy, and this dedicated data structure contains the dynamically-computed weight for that pair of cgroup and device. The generic strategy adopted for storing per-policy data in blkcg_gq structures is already capable of handling new policies, whereas the one adopted with blkcg structures is not, because per-policy data are hard-coded in the blkcg structures themselves (currently only data related to the proportional- share policy). This commit addresses the above issues through the following changes: . It generalizes blkcg structures so that per-policy data are stored in the same way as in blkcg_gq structures. Specifically, it lets also the blkcg structure store per-policy data in a generic array of dynamically-allocated dedicated data structures. We will refer to these data structures as blkcg dedicated data structures, to distinguish them from the dedicated data structures inserted in the generic arrays kept by blkcg_gq structures. To allow blkcg dedicated data structures to be inserted in the generic array inside a blkcg structure, this commit also introduces a new blkcg_policy_data structure, which is the equivalent of blkg_policy_data for blkcg dedicated data structures. . It adds to the blkcg_policy structure, i.e., to the descriptor of a policy, a cpd_size field and a cpd_init field, to be initialized by the policy with, respectively, the size of the blkcg dedicated data structures, and the address of a constructor function for blkcg dedicated data structures. . It moves the CFQ-specific fields embedded in the blkcg data structure (i.e., the fields related to the proportional-share policy), into a new blkcg dedicated data structure called cfq_group_data. Signed-off-by: Paolo Valente <paolo.valente@unimore.it> Signed-off-by: Arianna Avanzini <avanzini.arianna@gmail.com> Acked-by: Tejun Heo <tj@kernel.org> Cc: Jens Axboe <axboe@fb.com> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r--block/blk-cgroup.c92
1 files changed, 81 insertions, 11 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 0ac817b750db..6e43fa355e71 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -9,6 +9,10 @@
9 * 9 *
10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> 10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
11 * Nauman Rafique <nauman@google.com> 11 * Nauman Rafique <nauman@google.com>
12 *
13 * For policy-specific per-blkcg data:
14 * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
15 * Arianna Avanzini <avanzini.arianna@gmail.com>
12 */ 16 */
13#include <linux/ioprio.h> 17#include <linux/ioprio.h>
14#include <linux/kdev_t.h> 18#include <linux/kdev_t.h>
@@ -26,8 +30,7 @@
26 30
27static DEFINE_MUTEX(blkcg_pol_mutex); 31static DEFINE_MUTEX(blkcg_pol_mutex);
28 32
29struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT, 33struct blkcg blkcg_root;
30 .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, };
31EXPORT_SYMBOL_GPL(blkcg_root); 34EXPORT_SYMBOL_GPL(blkcg_root);
32 35
33static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; 36static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
@@ -823,6 +826,8 @@ static struct cgroup_subsys_state *
823blkcg_css_alloc(struct cgroup_subsys_state *parent_css) 826blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
824{ 827{
825 struct blkcg *blkcg; 828 struct blkcg *blkcg;
829 struct cgroup_subsys_state *ret;
830 int i;
826 831
827 if (!parent_css) { 832 if (!parent_css) {
828 blkcg = &blkcg_root; 833 blkcg = &blkcg_root;
@@ -830,17 +835,49 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
830 } 835 }
831 836
832 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); 837 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
833 if (!blkcg) 838 if (!blkcg) {
834 return ERR_PTR(-ENOMEM); 839 ret = ERR_PTR(-ENOMEM);
840 goto free_blkcg;
841 }
842
843 for (i = 0; i < BLKCG_MAX_POLS ; i++) {
844 struct blkcg_policy *pol = blkcg_policy[i];
845 struct blkcg_policy_data *cpd;
846
847 /*
848 * If the policy hasn't been attached yet, wait for it
849 * to be attached before doing anything else. Otherwise,
850 * check if the policy requires any specific per-cgroup
851 * data: if it does, allocate and initialize it.
852 */
853 if (!pol || !pol->cpd_size)
854 continue;
855
856 BUG_ON(blkcg->pd[i]);
857 cpd = kzalloc(pol->cpd_size, GFP_KERNEL);
858 if (!cpd) {
859 ret = ERR_PTR(-ENOMEM);
860 goto free_pd_blkcg;
861 }
862 blkcg->pd[i] = cpd;
863 cpd->plid = i;
864 pol->cpd_init_fn(blkcg);
865 }
835 866
836 blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
837 blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT;
838done: 867done:
839 spin_lock_init(&blkcg->lock); 868 spin_lock_init(&blkcg->lock);
840 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); 869 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
841 INIT_HLIST_HEAD(&blkcg->blkg_list); 870 INIT_HLIST_HEAD(&blkcg->blkg_list);
842 871
843 return &blkcg->css; 872 return &blkcg->css;
873
874free_pd_blkcg:
875 for (i--; i >= 0; i--)
876 kfree(blkcg->pd[i]);
877
878free_blkcg:
879 kfree(blkcg);
880 return ret;
844} 881}
845 882
846/** 883/**
@@ -958,8 +995,10 @@ int blkcg_activate_policy(struct request_queue *q,
958 const struct blkcg_policy *pol) 995 const struct blkcg_policy *pol)
959{ 996{
960 LIST_HEAD(pds); 997 LIST_HEAD(pds);
998 LIST_HEAD(cpds);
961 struct blkcg_gq *blkg, *new_blkg; 999 struct blkcg_gq *blkg, *new_blkg;
962 struct blkg_policy_data *pd, *n; 1000 struct blkg_policy_data *pd, *nd;
1001 struct blkcg_policy_data *cpd, *cnd;
963 int cnt = 0, ret; 1002 int cnt = 0, ret;
964 bool preloaded; 1003 bool preloaded;
965 1004
@@ -1003,7 +1042,10 @@ int blkcg_activate_policy(struct request_queue *q,
1003 1042
1004 spin_unlock_irq(q->queue_lock); 1043 spin_unlock_irq(q->queue_lock);
1005 1044
1006 /* allocate policy_data for all existing blkgs */ 1045 /*
1046 * Allocate per-blkg and per-blkcg policy data
1047 * for all existing blkgs.
1048 */
1007 while (cnt--) { 1049 while (cnt--) {
1008 pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); 1050 pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
1009 if (!pd) { 1051 if (!pd) {
@@ -1011,26 +1053,50 @@ int blkcg_activate_policy(struct request_queue *q,
1011 goto out_free; 1053 goto out_free;
1012 } 1054 }
1013 list_add_tail(&pd->alloc_node, &pds); 1055 list_add_tail(&pd->alloc_node, &pds);
1056
1057 if (!pol->cpd_size)
1058 continue;
1059 cpd = kzalloc_node(pol->cpd_size, GFP_KERNEL, q->node);
1060 if (!cpd) {
1061 ret = -ENOMEM;
1062 goto out_free;
1063 }
1064 list_add_tail(&cpd->alloc_node, &cpds);
1014 } 1065 }
1015 1066
1016 /* 1067 /*
1017 * Install the allocated pds. With @q bypassing, no new blkg 1068 * Install the allocated pds and cpds. With @q bypassing, no new blkg
1018 * should have been created while the queue lock was dropped. 1069 * should have been created while the queue lock was dropped.
1019 */ 1070 */
1020 spin_lock_irq(q->queue_lock); 1071 spin_lock_irq(q->queue_lock);
1021 1072
1022 list_for_each_entry(blkg, &q->blkg_list, q_node) { 1073 list_for_each_entry(blkg, &q->blkg_list, q_node) {
1023 if (WARN_ON(list_empty(&pds))) { 1074 if (WARN_ON(list_empty(&pds)) ||
1075 WARN_ON(pol->cpd_size && list_empty(&cpds))) {
1024 /* umm... this shouldn't happen, just abort */ 1076 /* umm... this shouldn't happen, just abort */
1025 ret = -ENOMEM; 1077 ret = -ENOMEM;
1026 goto out_unlock; 1078 goto out_unlock;
1027 } 1079 }
1080 cpd = list_first_entry(&cpds, struct blkcg_policy_data,
1081 alloc_node);
1082 list_del_init(&cpd->alloc_node);
1028 pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node); 1083 pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node);
1029 list_del_init(&pd->alloc_node); 1084 list_del_init(&pd->alloc_node);
1030 1085
1031 /* grab blkcg lock too while installing @pd on @blkg */ 1086 /* grab blkcg lock too while installing @pd on @blkg */
1032 spin_lock(&blkg->blkcg->lock); 1087 spin_lock(&blkg->blkcg->lock);
1033 1088
1089 if (!pol->cpd_size)
1090 goto no_cpd;
1091 if (!blkg->blkcg->pd[pol->plid]) {
1092 /* Per-policy per-blkcg data */
1093 blkg->blkcg->pd[pol->plid] = cpd;
1094 cpd->plid = pol->plid;
1095 pol->cpd_init_fn(blkg->blkcg);
1096 } else { /* must free it as it has already been extracted */
1097 kfree(cpd);
1098 }
1099no_cpd:
1034 blkg->pd[pol->plid] = pd; 1100 blkg->pd[pol->plid] = pd;
1035 pd->blkg = blkg; 1101 pd->blkg = blkg;
1036 pd->plid = pol->plid; 1102 pd->plid = pol->plid;
@@ -1045,8 +1111,10 @@ out_unlock:
1045 spin_unlock_irq(q->queue_lock); 1111 spin_unlock_irq(q->queue_lock);
1046out_free: 1112out_free:
1047 blk_queue_bypass_end(q); 1113 blk_queue_bypass_end(q);
1048 list_for_each_entry_safe(pd, n, &pds, alloc_node) 1114 list_for_each_entry_safe(pd, nd, &pds, alloc_node)
1049 kfree(pd); 1115 kfree(pd);
1116 list_for_each_entry_safe(cpd, cnd, &cpds, alloc_node)
1117 kfree(cpd);
1050 return ret; 1118 return ret;
1051} 1119}
1052EXPORT_SYMBOL_GPL(blkcg_activate_policy); 1120EXPORT_SYMBOL_GPL(blkcg_activate_policy);
@@ -1087,6 +1155,8 @@ void blkcg_deactivate_policy(struct request_queue *q,
1087 1155
1088 kfree(blkg->pd[pol->plid]); 1156 kfree(blkg->pd[pol->plid]);
1089 blkg->pd[pol->plid] = NULL; 1157 blkg->pd[pol->plid] = NULL;
1158 kfree(blkg->blkcg->pd[pol->plid]);
1159 blkg->blkcg->pd[pol->plid] = NULL;
1090 1160
1091 spin_unlock(&blkg->blkcg->lock); 1161 spin_unlock(&blkg->blkcg->lock);
1092 } 1162 }