summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--block/blk-cgroup.c53
-rw-r--r--include/linux/blk-cgroup.h44
-rw-r--r--mm/backing-dev.c5
3 files changed, 94 insertions, 8 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 2998e4f095d1..c19f9078da1e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1042,21 +1042,59 @@ static struct cftype blkcg_legacy_files[] = {
1042 { } /* terminate */ 1042 { } /* terminate */
1043}; 1043};
1044 1044
1045/*
1046 * blkcg destruction is a three-stage process.
1047 *
1048 * 1. Destruction starts. The blkcg_css_offline() callback is invoked
1049 * which offlines writeback. Here we tie the next stage of blkg destruction
1050 * to the completion of writeback associated with the blkcg. This lets us
1051 * avoid punting potentially large amounts of outstanding writeback to root
1052 * while maintaining any ongoing policies. The next stage is triggered when
1053 * the nr_cgwbs count goes to zero.
1054 *
1055 * 2. When the nr_cgwbs count goes to zero, blkcg_destroy_blkgs() is called
1056 * and handles the destruction of blkgs. Here the css reference held by
1057 * the blkg is put back eventually allowing blkcg_css_free() to be called.
1058 * This work may occur in cgwb_release_workfn() on the cgwb_release
1059 * workqueue. Any submitted ios that fail to get the blkg ref will be
1060 * punted to the root_blkg.
1061 *
1062 * 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called.
1063 * This finally frees the blkcg.
1064 */
1065
1045/** 1066/**
1046 * blkcg_css_offline - cgroup css_offline callback 1067 * blkcg_css_offline - cgroup css_offline callback
1047 * @css: css of interest 1068 * @css: css of interest
1048 * 1069 *
1049 * This function is called when @css is about to go away and responsible 1070 * This function is called when @css is about to go away. Here the cgwbs are
1050 * for shooting down all blkgs associated with @css. blkgs should be 1071 * offlined first and only once writeback associated with the blkcg has
1051 * removed while holding both q and blkcg locks. As blkcg lock is nested 1072 * finished do we start step 2 (see above).
1052 * inside q lock, this function performs reverse double lock dancing.
1053 *
1054 * This is the blkcg counterpart of ioc_release_fn().
1055 */ 1073 */
1056static void blkcg_css_offline(struct cgroup_subsys_state *css) 1074static void blkcg_css_offline(struct cgroup_subsys_state *css)
1057{ 1075{
1058 struct blkcg *blkcg = css_to_blkcg(css); 1076 struct blkcg *blkcg = css_to_blkcg(css);
1059 1077
1078 /* this prevents anyone from attaching or migrating to this blkcg */
1079 wb_blkcg_offline(blkcg);
1080
1081 /* put the base cgwb reference allowing step 2 to be triggered */
1082 blkcg_cgwb_put(blkcg);
1083}
1084
1085/**
1086 * blkcg_destroy_blkgs - responsible for shooting down blkgs
1087 * @blkcg: blkcg of interest
1088 *
1089 * blkgs should be removed while holding both q and blkcg locks. As blkcg lock
1090 * is nested inside q lock, this function performs reverse double lock dancing.
1091 * Destroying the blkgs releases the reference held on the blkcg's css allowing
1092 * blkcg_css_free to eventually be called.
1093 *
1094 * This is the blkcg counterpart of ioc_release_fn().
1095 */
1096void blkcg_destroy_blkgs(struct blkcg *blkcg)
1097{
1060 spin_lock_irq(&blkcg->lock); 1098 spin_lock_irq(&blkcg->lock);
1061 1099
1062 while (!hlist_empty(&blkcg->blkg_list)) { 1100 while (!hlist_empty(&blkcg->blkg_list)) {
@@ -1075,8 +1113,6 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
1075 } 1113 }
1076 1114
1077 spin_unlock_irq(&blkcg->lock); 1115 spin_unlock_irq(&blkcg->lock);
1078
1079 wb_blkcg_offline(blkcg);
1080} 1116}
1081 1117
1082static void blkcg_css_free(struct cgroup_subsys_state *css) 1118static void blkcg_css_free(struct cgroup_subsys_state *css)
@@ -1146,6 +1182,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
1146 INIT_HLIST_HEAD(&blkcg->blkg_list); 1182 INIT_HLIST_HEAD(&blkcg->blkg_list);
1147#ifdef CONFIG_CGROUP_WRITEBACK 1183#ifdef CONFIG_CGROUP_WRITEBACK
1148 INIT_LIST_HEAD(&blkcg->cgwb_list); 1184 INIT_LIST_HEAD(&blkcg->cgwb_list);
1185 refcount_set(&blkcg->cgwb_refcnt, 1);
1149#endif 1186#endif
1150 list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); 1187 list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);
1151 1188
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 1615cdd4c797..6d766a19f2bb 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -56,6 +56,7 @@ struct blkcg {
56 struct list_head all_blkcgs_node; 56 struct list_head all_blkcgs_node;
57#ifdef CONFIG_CGROUP_WRITEBACK 57#ifdef CONFIG_CGROUP_WRITEBACK
58 struct list_head cgwb_list; 58 struct list_head cgwb_list;
59 refcount_t cgwb_refcnt;
59#endif 60#endif
60}; 61};
61 62
@@ -386,6 +387,49 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
386 return cpd ? cpd->blkcg : NULL; 387 return cpd ? cpd->blkcg : NULL;
387} 388}
388 389
390extern void blkcg_destroy_blkgs(struct blkcg *blkcg);
391
392#ifdef CONFIG_CGROUP_WRITEBACK
393
394/**
395 * blkcg_cgwb_get - get a reference for blkcg->cgwb_list
396 * @blkcg: blkcg of interest
397 *
398 * This is used to track the number of active wb's related to a blkcg.
399 */
400static inline void blkcg_cgwb_get(struct blkcg *blkcg)
401{
402 refcount_inc(&blkcg->cgwb_refcnt);
403}
404
405/**
406 * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list
407 * @blkcg: blkcg of interest
408 *
409 * This is used to track the number of active wb's related to a blkcg.
410 * When this count goes to zero, all active wb has finished so the
411 * blkcg can continue destruction by calling blkcg_destroy_blkgs().
412 * This work may occur in cgwb_release_workfn() on the cgwb_release
413 * workqueue.
414 */
415static inline void blkcg_cgwb_put(struct blkcg *blkcg)
416{
417 if (refcount_dec_and_test(&blkcg->cgwb_refcnt))
418 blkcg_destroy_blkgs(blkcg);
419}
420
421#else
422
423static inline void blkcg_cgwb_get(struct blkcg *blkcg) { }
424
425static inline void blkcg_cgwb_put(struct blkcg *blkcg)
426{
427 /* wb isn't being accounted, so trigger destruction right away */
428 blkcg_destroy_blkgs(blkcg);
429}
430
431#endif
432
389/** 433/**
390 * blkg_path - format cgroup path of blkg 434 * blkg_path - format cgroup path of blkg
391 * @blkg: blkg of interest 435 * @blkg: blkg of interest
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f5981e9d6ae2..8a8bb8796c6c 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -491,6 +491,7 @@ static void cgwb_release_workfn(struct work_struct *work)
491{ 491{
492 struct bdi_writeback *wb = container_of(work, struct bdi_writeback, 492 struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
493 release_work); 493 release_work);
494 struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css);
494 495
495 mutex_lock(&wb->bdi->cgwb_release_mutex); 496 mutex_lock(&wb->bdi->cgwb_release_mutex);
496 wb_shutdown(wb); 497 wb_shutdown(wb);
@@ -499,6 +500,9 @@ static void cgwb_release_workfn(struct work_struct *work)
499 css_put(wb->blkcg_css); 500 css_put(wb->blkcg_css);
500 mutex_unlock(&wb->bdi->cgwb_release_mutex); 501 mutex_unlock(&wb->bdi->cgwb_release_mutex);
501 502
503 /* triggers blkg destruction if cgwb_refcnt becomes zero */
504 blkcg_cgwb_put(blkcg);
505
502 fprop_local_destroy_percpu(&wb->memcg_completions); 506 fprop_local_destroy_percpu(&wb->memcg_completions);
503 percpu_ref_exit(&wb->refcnt); 507 percpu_ref_exit(&wb->refcnt);
504 wb_exit(wb); 508 wb_exit(wb);
@@ -597,6 +601,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
597 list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); 601 list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
598 list_add(&wb->memcg_node, memcg_cgwb_list); 602 list_add(&wb->memcg_node, memcg_cgwb_list);
599 list_add(&wb->blkcg_node, blkcg_cgwb_list); 603 list_add(&wb->blkcg_node, blkcg_cgwb_list);
604 blkcg_cgwb_get(blkcg);
600 css_get(memcg_css); 605 css_get(memcg_css);
601 css_get(blkcg_css); 606 css_get(blkcg_css);
602 } 607 }