diff options
| -rw-r--r-- | block/blk-cgroup.c | 53 | ||||
| -rw-r--r-- | include/linux/blk-cgroup.h | 44 | ||||
| -rw-r--r-- | mm/backing-dev.c | 5 |
3 files changed, 94 insertions, 8 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 2998e4f095d1..c19f9078da1e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
| @@ -1042,21 +1042,59 @@ static struct cftype blkcg_legacy_files[] = { | |||
| 1042 | { } /* terminate */ | 1042 | { } /* terminate */ |
| 1043 | }; | 1043 | }; |
| 1044 | 1044 | ||
| 1045 | /* | ||
| 1046 | * blkcg destruction is a three-stage process. | ||
| 1047 | * | ||
| 1048 | * 1. Destruction starts. The blkcg_css_offline() callback is invoked | ||
| 1049 | * which offlines writeback. Here we tie the next stage of blkg destruction | ||
| 1050 | * to the completion of writeback associated with the blkcg. This lets us | ||
| 1051 | * avoid punting potentially large amounts of outstanding writeback to root | ||
| 1052 | * while maintaining any ongoing policies. The next stage is triggered when | ||
| 1053 | * the nr_cgwbs count goes to zero. | ||
| 1054 | * | ||
| 1055 | * 2. When the nr_cgwbs count goes to zero, blkcg_destroy_blkgs() is called | ||
| 1056 | * and handles the destruction of blkgs. Here the css reference held by | ||
| 1057 | * the blkg is put back eventually allowing blkcg_css_free() to be called. | ||
| 1058 | * This work may occur in cgwb_release_workfn() on the cgwb_release | ||
| 1059 | * workqueue. Any submitted ios that fail to get the blkg ref will be | ||
| 1060 | * punted to the root_blkg. | ||
| 1061 | * | ||
| 1062 | * 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called. | ||
| 1063 | * This finally frees the blkcg. | ||
| 1064 | */ | ||
| 1065 | |||
| 1045 | /** | 1066 | /** |
| 1046 | * blkcg_css_offline - cgroup css_offline callback | 1067 | * blkcg_css_offline - cgroup css_offline callback |
| 1047 | * @css: css of interest | 1068 | * @css: css of interest |
| 1048 | * | 1069 | * |
| 1049 | * This function is called when @css is about to go away and responsible | 1070 | * This function is called when @css is about to go away. Here the cgwbs are |
| 1050 | * for shooting down all blkgs associated with @css. blkgs should be | 1071 | * offlined first and only once writeback associated with the blkcg has |
| 1051 | * removed while holding both q and blkcg locks. As blkcg lock is nested | 1072 | * finished do we start step 2 (see above). |
| 1052 | * inside q lock, this function performs reverse double lock dancing. | ||
| 1053 | * | ||
| 1054 | * This is the blkcg counterpart of ioc_release_fn(). | ||
| 1055 | */ | 1073 | */ |
| 1056 | static void blkcg_css_offline(struct cgroup_subsys_state *css) | 1074 | static void blkcg_css_offline(struct cgroup_subsys_state *css) |
| 1057 | { | 1075 | { |
| 1058 | struct blkcg *blkcg = css_to_blkcg(css); | 1076 | struct blkcg *blkcg = css_to_blkcg(css); |
| 1059 | 1077 | ||
| 1078 | /* this prevents anyone from attaching or migrating to this blkcg */ | ||
| 1079 | wb_blkcg_offline(blkcg); | ||
| 1080 | |||
| 1081 | /* put the base cgwb reference allowing step 2 to be triggered */ | ||
| 1082 | blkcg_cgwb_put(blkcg); | ||
| 1083 | } | ||
| 1084 | |||
| 1085 | /** | ||
| 1086 | * blkcg_destroy_blkgs - responsible for shooting down blkgs | ||
| 1087 | * @blkcg: blkcg of interest | ||
| 1088 | * | ||
| 1089 | * blkgs should be removed while holding both q and blkcg locks. As blkcg lock | ||
| 1090 | * is nested inside q lock, this function performs reverse double lock dancing. | ||
| 1091 | * Destroying the blkgs releases the reference held on the blkcg's css allowing | ||
| 1092 | * blkcg_css_free to eventually be called. | ||
| 1093 | * | ||
| 1094 | * This is the blkcg counterpart of ioc_release_fn(). | ||
| 1095 | */ | ||
| 1096 | void blkcg_destroy_blkgs(struct blkcg *blkcg) | ||
| 1097 | { | ||
| 1060 | spin_lock_irq(&blkcg->lock); | 1098 | spin_lock_irq(&blkcg->lock); |
| 1061 | 1099 | ||
| 1062 | while (!hlist_empty(&blkcg->blkg_list)) { | 1100 | while (!hlist_empty(&blkcg->blkg_list)) { |
| @@ -1075,8 +1113,6 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) | |||
| 1075 | } | 1113 | } |
| 1076 | 1114 | ||
| 1077 | spin_unlock_irq(&blkcg->lock); | 1115 | spin_unlock_irq(&blkcg->lock); |
| 1078 | |||
| 1079 | wb_blkcg_offline(blkcg); | ||
| 1080 | } | 1116 | } |
| 1081 | 1117 | ||
| 1082 | static void blkcg_css_free(struct cgroup_subsys_state *css) | 1118 | static void blkcg_css_free(struct cgroup_subsys_state *css) |
| @@ -1146,6 +1182,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) | |||
| 1146 | INIT_HLIST_HEAD(&blkcg->blkg_list); | 1182 | INIT_HLIST_HEAD(&blkcg->blkg_list); |
| 1147 | #ifdef CONFIG_CGROUP_WRITEBACK | 1183 | #ifdef CONFIG_CGROUP_WRITEBACK |
| 1148 | INIT_LIST_HEAD(&blkcg->cgwb_list); | 1184 | INIT_LIST_HEAD(&blkcg->cgwb_list); |
| 1185 | refcount_set(&blkcg->cgwb_refcnt, 1); | ||
| 1149 | #endif | 1186 | #endif |
| 1150 | list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); | 1187 | list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); |
| 1151 | 1188 | ||
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 1615cdd4c797..6d766a19f2bb 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h | |||
| @@ -56,6 +56,7 @@ struct blkcg { | |||
| 56 | struct list_head all_blkcgs_node; | 56 | struct list_head all_blkcgs_node; |
| 57 | #ifdef CONFIG_CGROUP_WRITEBACK | 57 | #ifdef CONFIG_CGROUP_WRITEBACK |
| 58 | struct list_head cgwb_list; | 58 | struct list_head cgwb_list; |
| 59 | refcount_t cgwb_refcnt; | ||
| 59 | #endif | 60 | #endif |
| 60 | }; | 61 | }; |
| 61 | 62 | ||
| @@ -386,6 +387,49 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) | |||
| 386 | return cpd ? cpd->blkcg : NULL; | 387 | return cpd ? cpd->blkcg : NULL; |
| 387 | } | 388 | } |
| 388 | 389 | ||
| 390 | extern void blkcg_destroy_blkgs(struct blkcg *blkcg); | ||
| 391 | |||
| 392 | #ifdef CONFIG_CGROUP_WRITEBACK | ||
| 393 | |||
| 394 | /** | ||
| 395 | * blkcg_cgwb_get - get a reference for blkcg->cgwb_list | ||
| 396 | * @blkcg: blkcg of interest | ||
| 397 | * | ||
| 398 | * This is used to track the number of active wb's related to a blkcg. | ||
| 399 | */ | ||
| 400 | static inline void blkcg_cgwb_get(struct blkcg *blkcg) | ||
| 401 | { | ||
| 402 | refcount_inc(&blkcg->cgwb_refcnt); | ||
| 403 | } | ||
| 404 | |||
| 405 | /** | ||
| 406 | * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list | ||
| 407 | * @blkcg: blkcg of interest | ||
| 408 | * | ||
| 409 | * This is used to track the number of active wb's related to a blkcg. | ||
| 410 | * When this count goes to zero, all active wb has finished so the | ||
| 411 | * blkcg can continue destruction by calling blkcg_destroy_blkgs(). | ||
| 412 | * This work may occur in cgwb_release_workfn() on the cgwb_release | ||
| 413 | * workqueue. | ||
| 414 | */ | ||
| 415 | static inline void blkcg_cgwb_put(struct blkcg *blkcg) | ||
| 416 | { | ||
| 417 | if (refcount_dec_and_test(&blkcg->cgwb_refcnt)) | ||
| 418 | blkcg_destroy_blkgs(blkcg); | ||
| 419 | } | ||
| 420 | |||
| 421 | #else | ||
| 422 | |||
| 423 | static inline void blkcg_cgwb_get(struct blkcg *blkcg) { } | ||
| 424 | |||
| 425 | static inline void blkcg_cgwb_put(struct blkcg *blkcg) | ||
| 426 | { | ||
| 427 | /* wb isn't being accounted, so trigger destruction right away */ | ||
| 428 | blkcg_destroy_blkgs(blkcg); | ||
| 429 | } | ||
| 430 | |||
| 431 | #endif | ||
| 432 | |||
| 389 | /** | 433 | /** |
| 390 | * blkg_path - format cgroup path of blkg | 434 | * blkg_path - format cgroup path of blkg |
| 391 | * @blkg: blkg of interest | 435 | * @blkg: blkg of interest |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f5981e9d6ae2..8a8bb8796c6c 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
| @@ -491,6 +491,7 @@ static void cgwb_release_workfn(struct work_struct *work) | |||
| 491 | { | 491 | { |
| 492 | struct bdi_writeback *wb = container_of(work, struct bdi_writeback, | 492 | struct bdi_writeback *wb = container_of(work, struct bdi_writeback, |
| 493 | release_work); | 493 | release_work); |
| 494 | struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css); | ||
| 494 | 495 | ||
| 495 | mutex_lock(&wb->bdi->cgwb_release_mutex); | 496 | mutex_lock(&wb->bdi->cgwb_release_mutex); |
| 496 | wb_shutdown(wb); | 497 | wb_shutdown(wb); |
| @@ -499,6 +500,9 @@ static void cgwb_release_workfn(struct work_struct *work) | |||
| 499 | css_put(wb->blkcg_css); | 500 | css_put(wb->blkcg_css); |
| 500 | mutex_unlock(&wb->bdi->cgwb_release_mutex); | 501 | mutex_unlock(&wb->bdi->cgwb_release_mutex); |
| 501 | 502 | ||
| 503 | /* triggers blkg destruction if cgwb_refcnt becomes zero */ | ||
| 504 | blkcg_cgwb_put(blkcg); | ||
| 505 | |||
| 502 | fprop_local_destroy_percpu(&wb->memcg_completions); | 506 | fprop_local_destroy_percpu(&wb->memcg_completions); |
| 503 | percpu_ref_exit(&wb->refcnt); | 507 | percpu_ref_exit(&wb->refcnt); |
| 504 | wb_exit(wb); | 508 | wb_exit(wb); |
| @@ -597,6 +601,7 @@ static int cgwb_create(struct backing_dev_info *bdi, | |||
| 597 | list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); | 601 | list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); |
| 598 | list_add(&wb->memcg_node, memcg_cgwb_list); | 602 | list_add(&wb->memcg_node, memcg_cgwb_list); |
| 599 | list_add(&wb->blkcg_node, blkcg_cgwb_list); | 603 | list_add(&wb->blkcg_node, blkcg_cgwb_list); |
| 604 | blkcg_cgwb_get(blkcg); | ||
| 600 | css_get(memcg_css); | 605 | css_get(memcg_css); |
| 601 | css_get(blkcg_css); | 606 | css_get(blkcg_css); |
| 602 | } | 607 | } |
