diff options
-rw-r--r-- | block/blk-cgroup.c | 53 | ||||
-rw-r--r-- | include/linux/blk-cgroup.h | 44 | ||||
-rw-r--r-- | mm/backing-dev.c | 5 |
3 files changed, 94 insertions, 8 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 2998e4f095d1..c19f9078da1e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -1042,21 +1042,59 @@ static struct cftype blkcg_legacy_files[] = { | |||
1042 | { } /* terminate */ | 1042 | { } /* terminate */ |
1043 | }; | 1043 | }; |
1044 | 1044 | ||
1045 | /* | ||
1046 | * blkcg destruction is a three-stage process. | ||
1047 | * | ||
1048 | * 1. Destruction starts. The blkcg_css_offline() callback is invoked | ||
1049 | * which offlines writeback. Here we tie the next stage of blkg destruction | ||
1050 | * to the completion of writeback associated with the blkcg. This lets us | ||
1051 | * avoid punting potentially large amounts of outstanding writeback to root | ||
1052 | * while maintaining any ongoing policies. The next stage is triggered when | ||
1053 | * the nr_cgwbs count goes to zero. | ||
1054 | * | ||
1055 | * 2. When the nr_cgwbs count goes to zero, blkcg_destroy_blkgs() is called | ||
1056 | * and handles the destruction of blkgs. Here the css reference held by | ||
1057 | * the blkg is put back eventually allowing blkcg_css_free() to be called. | ||
1058 | * This work may occur in cgwb_release_workfn() on the cgwb_release | ||
1059 | * workqueue. Any submitted ios that fail to get the blkg ref will be | ||
1060 | * punted to the root_blkg. | ||
1061 | * | ||
1062 | * 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called. | ||
1063 | * This finally frees the blkcg. | ||
1064 | */ | ||
1065 | |||
1045 | /** | 1066 | /** |
1046 | * blkcg_css_offline - cgroup css_offline callback | 1067 | * blkcg_css_offline - cgroup css_offline callback |
1047 | * @css: css of interest | 1068 | * @css: css of interest |
1048 | * | 1069 | * |
1049 | * This function is called when @css is about to go away and responsible | 1070 | * This function is called when @css is about to go away. Here the cgwbs are |
1050 | * for shooting down all blkgs associated with @css. blkgs should be | 1071 | * offlined first and only once writeback associated with the blkcg has |
1051 | * removed while holding both q and blkcg locks. As blkcg lock is nested | 1072 | * finished do we start step 2 (see above). |
1052 | * inside q lock, this function performs reverse double lock dancing. | ||
1053 | * | ||
1054 | * This is the blkcg counterpart of ioc_release_fn(). | ||
1055 | */ | 1073 | */ |
1056 | static void blkcg_css_offline(struct cgroup_subsys_state *css) | 1074 | static void blkcg_css_offline(struct cgroup_subsys_state *css) |
1057 | { | 1075 | { |
1058 | struct blkcg *blkcg = css_to_blkcg(css); | 1076 | struct blkcg *blkcg = css_to_blkcg(css); |
1059 | 1077 | ||
1078 | /* this prevents anyone from attaching or migrating to this blkcg */ | ||
1079 | wb_blkcg_offline(blkcg); | ||
1080 | |||
1081 | /* put the base cgwb reference allowing step 2 to be triggered */ | ||
1082 | blkcg_cgwb_put(blkcg); | ||
1083 | } | ||
1084 | |||
1085 | /** | ||
1086 | * blkcg_destroy_blkgs - responsible for shooting down blkgs | ||
1087 | * @blkcg: blkcg of interest | ||
1088 | * | ||
1089 | * blkgs should be removed while holding both q and blkcg locks. As blkcg lock | ||
1090 | * is nested inside q lock, this function performs reverse double lock dancing. | ||
1091 | * Destroying the blkgs releases the reference held on the blkcg's css allowing | ||
1092 | * blkcg_css_free to eventually be called. | ||
1093 | * | ||
1094 | * This is the blkcg counterpart of ioc_release_fn(). | ||
1095 | */ | ||
1096 | void blkcg_destroy_blkgs(struct blkcg *blkcg) | ||
1097 | { | ||
1060 | spin_lock_irq(&blkcg->lock); | 1098 | spin_lock_irq(&blkcg->lock); |
1061 | 1099 | ||
1062 | while (!hlist_empty(&blkcg->blkg_list)) { | 1100 | while (!hlist_empty(&blkcg->blkg_list)) { |
@@ -1075,8 +1113,6 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) | |||
1075 | } | 1113 | } |
1076 | 1114 | ||
1077 | spin_unlock_irq(&blkcg->lock); | 1115 | spin_unlock_irq(&blkcg->lock); |
1078 | |||
1079 | wb_blkcg_offline(blkcg); | ||
1080 | } | 1116 | } |
1081 | 1117 | ||
1082 | static void blkcg_css_free(struct cgroup_subsys_state *css) | 1118 | static void blkcg_css_free(struct cgroup_subsys_state *css) |
@@ -1146,6 +1182,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) | |||
1146 | INIT_HLIST_HEAD(&blkcg->blkg_list); | 1182 | INIT_HLIST_HEAD(&blkcg->blkg_list); |
1147 | #ifdef CONFIG_CGROUP_WRITEBACK | 1183 | #ifdef CONFIG_CGROUP_WRITEBACK |
1148 | INIT_LIST_HEAD(&blkcg->cgwb_list); | 1184 | INIT_LIST_HEAD(&blkcg->cgwb_list); |
1185 | refcount_set(&blkcg->cgwb_refcnt, 1); | ||
1149 | #endif | 1186 | #endif |
1150 | list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); | 1187 | list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); |
1151 | 1188 | ||
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 1615cdd4c797..6d766a19f2bb 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h | |||
@@ -56,6 +56,7 @@ struct blkcg { | |||
56 | struct list_head all_blkcgs_node; | 56 | struct list_head all_blkcgs_node; |
57 | #ifdef CONFIG_CGROUP_WRITEBACK | 57 | #ifdef CONFIG_CGROUP_WRITEBACK |
58 | struct list_head cgwb_list; | 58 | struct list_head cgwb_list; |
59 | refcount_t cgwb_refcnt; | ||
59 | #endif | 60 | #endif |
60 | }; | 61 | }; |
61 | 62 | ||
@@ -386,6 +387,49 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) | |||
386 | return cpd ? cpd->blkcg : NULL; | 387 | return cpd ? cpd->blkcg : NULL; |
387 | } | 388 | } |
388 | 389 | ||
390 | extern void blkcg_destroy_blkgs(struct blkcg *blkcg); | ||
391 | |||
392 | #ifdef CONFIG_CGROUP_WRITEBACK | ||
393 | |||
394 | /** | ||
395 | * blkcg_cgwb_get - get a reference for blkcg->cgwb_list | ||
396 | * @blkcg: blkcg of interest | ||
397 | * | ||
398 | * This is used to track the number of active wb's related to a blkcg. | ||
399 | */ | ||
400 | static inline void blkcg_cgwb_get(struct blkcg *blkcg) | ||
401 | { | ||
402 | refcount_inc(&blkcg->cgwb_refcnt); | ||
403 | } | ||
404 | |||
405 | /** | ||
406 | * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list | ||
407 | * @blkcg: blkcg of interest | ||
408 | * | ||
409 | * This is used to track the number of active wb's related to a blkcg. | ||
410 | * When this count goes to zero, all active wb has finished so the | ||
411 | * blkcg can continue destruction by calling blkcg_destroy_blkgs(). | ||
412 | * This work may occur in cgwb_release_workfn() on the cgwb_release | ||
413 | * workqueue. | ||
414 | */ | ||
415 | static inline void blkcg_cgwb_put(struct blkcg *blkcg) | ||
416 | { | ||
417 | if (refcount_dec_and_test(&blkcg->cgwb_refcnt)) | ||
418 | blkcg_destroy_blkgs(blkcg); | ||
419 | } | ||
420 | |||
421 | #else | ||
422 | |||
423 | static inline void blkcg_cgwb_get(struct blkcg *blkcg) { } | ||
424 | |||
425 | static inline void blkcg_cgwb_put(struct blkcg *blkcg) | ||
426 | { | ||
427 | /* wb isn't being accounted, so trigger destruction right away */ | ||
428 | blkcg_destroy_blkgs(blkcg); | ||
429 | } | ||
430 | |||
431 | #endif | ||
432 | |||
389 | /** | 433 | /** |
390 | * blkg_path - format cgroup path of blkg | 434 | * blkg_path - format cgroup path of blkg |
391 | * @blkg: blkg of interest | 435 | * @blkg: blkg of interest |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f5981e9d6ae2..8a8bb8796c6c 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -491,6 +491,7 @@ static void cgwb_release_workfn(struct work_struct *work) | |||
491 | { | 491 | { |
492 | struct bdi_writeback *wb = container_of(work, struct bdi_writeback, | 492 | struct bdi_writeback *wb = container_of(work, struct bdi_writeback, |
493 | release_work); | 493 | release_work); |
494 | struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css); | ||
494 | 495 | ||
495 | mutex_lock(&wb->bdi->cgwb_release_mutex); | 496 | mutex_lock(&wb->bdi->cgwb_release_mutex); |
496 | wb_shutdown(wb); | 497 | wb_shutdown(wb); |
@@ -499,6 +500,9 @@ static void cgwb_release_workfn(struct work_struct *work) | |||
499 | css_put(wb->blkcg_css); | 500 | css_put(wb->blkcg_css); |
500 | mutex_unlock(&wb->bdi->cgwb_release_mutex); | 501 | mutex_unlock(&wb->bdi->cgwb_release_mutex); |
501 | 502 | ||
503 | /* triggers blkg destruction if cgwb_refcnt becomes zero */ | ||
504 | blkcg_cgwb_put(blkcg); | ||
505 | |||
502 | fprop_local_destroy_percpu(&wb->memcg_completions); | 506 | fprop_local_destroy_percpu(&wb->memcg_completions); |
503 | percpu_ref_exit(&wb->refcnt); | 507 | percpu_ref_exit(&wb->refcnt); |
504 | wb_exit(wb); | 508 | wb_exit(wb); |
@@ -597,6 +601,7 @@ static int cgwb_create(struct backing_dev_info *bdi, | |||
597 | list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); | 601 | list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); |
598 | list_add(&wb->memcg_node, memcg_cgwb_list); | 602 | list_add(&wb->memcg_node, memcg_cgwb_list); |
599 | list_add(&wb->blkcg_node, blkcg_cgwb_list); | 603 | list_add(&wb->blkcg_node, blkcg_cgwb_list); |
604 | blkcg_cgwb_get(blkcg); | ||
600 | css_get(memcg_css); | 605 | css_get(memcg_css); |
601 | css_get(blkcg_css); | 606 | css_get(blkcg_css); |
602 | } | 607 | } |