aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c175
1 files changed, 121 insertions, 54 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 136eceadeed1..bb263d0caab3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -277,6 +277,10 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
277 if (!(cgrp->root->subsys_mask & (1 << ss->id))) 277 if (!(cgrp->root->subsys_mask & (1 << ss->id)))
278 return NULL; 278 return NULL;
279 279
280 /*
281 * This function is used while updating css associations and thus
282 * can't test the csses directly. Use ->child_subsys_mask.
283 */
280 while (cgroup_parent(cgrp) && 284 while (cgroup_parent(cgrp) &&
281 !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id))) 285 !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id)))
282 cgrp = cgroup_parent(cgrp); 286 cgrp = cgroup_parent(cgrp);
@@ -284,6 +288,39 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
284 return cgroup_css(cgrp, ss); 288 return cgroup_css(cgrp, ss);
285} 289}
286 290
291/**
292 * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
293 * @cgrp: the cgroup of interest
294 * @ss: the subsystem of interest
295 *
296 * Find and get the effective css of @cgrp for @ss. The effective css is
297 * defined as the matching css of the nearest ancestor including self which
298 * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on,
299 * the root css is returned, so this function always returns a valid css.
300 * The returned css must be put using css_put().
301 */
302struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
303 struct cgroup_subsys *ss)
304{
305 struct cgroup_subsys_state *css;
306
307 rcu_read_lock();
308
309 do {
310 css = cgroup_css(cgrp, ss);
311
312 if (css && css_tryget_online(css))
313 goto out_unlock;
314 cgrp = cgroup_parent(cgrp);
315 } while (cgrp);
316
317 css = init_css_set.subsys[ss->id];
318 css_get(css);
319out_unlock:
320 rcu_read_unlock();
321 return css;
322}
323
287/* convenient tests for these bits */ 324/* convenient tests for these bits */
288static inline bool cgroup_is_dead(const struct cgroup *cgrp) 325static inline bool cgroup_is_dead(const struct cgroup *cgrp)
289{ 326{
@@ -1019,31 +1056,30 @@ static void cgroup_put(struct cgroup *cgrp)
1019} 1056}
1020 1057
1021/** 1058/**
1022 * cgroup_refresh_child_subsys_mask - update child_subsys_mask 1059 * cgroup_calc_child_subsys_mask - calculate child_subsys_mask
1023 * @cgrp: the target cgroup 1060 * @cgrp: the target cgroup
1061 * @subtree_control: the new subtree_control mask to consider
1024 * 1062 *
1025 * On the default hierarchy, a subsystem may request other subsystems to be 1063 * On the default hierarchy, a subsystem may request other subsystems to be
1026 * enabled together through its ->depends_on mask. In such cases, more 1064 * enabled together through its ->depends_on mask. In such cases, more
1027 * subsystems than specified in "cgroup.subtree_control" may be enabled. 1065 * subsystems than specified in "cgroup.subtree_control" may be enabled.
1028 * 1066 *
1029 * This function determines which subsystems need to be enabled given the 1067 * This function calculates which subsystems need to be enabled if
1030 * current @cgrp->subtree_control and records it in 1068 * @subtree_control is to be applied to @cgrp. The returned mask is always
1031 * @cgrp->child_subsys_mask. The resulting mask is always a superset of 1069 * a superset of @subtree_control and follows the usual hierarchy rules.
1032 * @cgrp->subtree_control and follows the usual hierarchy rules.
1033 */ 1070 */
1034static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp) 1071static unsigned int cgroup_calc_child_subsys_mask(struct cgroup *cgrp,
1072 unsigned int subtree_control)
1035{ 1073{
1036 struct cgroup *parent = cgroup_parent(cgrp); 1074 struct cgroup *parent = cgroup_parent(cgrp);
1037 unsigned int cur_ss_mask = cgrp->subtree_control; 1075 unsigned int cur_ss_mask = subtree_control;
1038 struct cgroup_subsys *ss; 1076 struct cgroup_subsys *ss;
1039 int ssid; 1077 int ssid;
1040 1078
1041 lockdep_assert_held(&cgroup_mutex); 1079 lockdep_assert_held(&cgroup_mutex);
1042 1080
1043 if (!cgroup_on_dfl(cgrp)) { 1081 if (!cgroup_on_dfl(cgrp))
1044 cgrp->child_subsys_mask = cur_ss_mask; 1082 return cur_ss_mask;
1045 return;
1046 }
1047 1083
1048 while (true) { 1084 while (true) {
1049 unsigned int new_ss_mask = cur_ss_mask; 1085 unsigned int new_ss_mask = cur_ss_mask;
@@ -1067,7 +1103,20 @@ static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
1067 cur_ss_mask = new_ss_mask; 1103 cur_ss_mask = new_ss_mask;
1068 } 1104 }
1069 1105
1070 cgrp->child_subsys_mask = cur_ss_mask; 1106 return cur_ss_mask;
1107}
1108
1109/**
1110 * cgroup_refresh_child_subsys_mask - update child_subsys_mask
1111 * @cgrp: the target cgroup
1112 *
1113 * Update @cgrp->child_subsys_mask according to the current
1114 * @cgrp->subtree_control using cgroup_calc_child_subsys_mask().
1115 */
1116static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
1117{
1118 cgrp->child_subsys_mask =
1119 cgroup_calc_child_subsys_mask(cgrp, cgrp->subtree_control);
1071} 1120}
1072 1121
1073/** 1122/**
@@ -2641,7 +2690,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
2641 loff_t off) 2690 loff_t off)
2642{ 2691{
2643 unsigned int enable = 0, disable = 0; 2692 unsigned int enable = 0, disable = 0;
2644 unsigned int css_enable, css_disable, old_ctrl, new_ctrl; 2693 unsigned int css_enable, css_disable, old_sc, new_sc, old_ss, new_ss;
2645 struct cgroup *cgrp, *child; 2694 struct cgroup *cgrp, *child;
2646 struct cgroup_subsys *ss; 2695 struct cgroup_subsys *ss;
2647 char *tok; 2696 char *tok;
@@ -2693,36 +2742,6 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
2693 ret = -ENOENT; 2742 ret = -ENOENT;
2694 goto out_unlock; 2743 goto out_unlock;
2695 } 2744 }
2696
2697 /*
2698 * @ss is already enabled through dependency and
2699 * we'll just make it visible. Skip draining.
2700 */
2701 if (cgrp->child_subsys_mask & (1 << ssid))
2702 continue;
2703
2704 /*
2705 * Because css offlining is asynchronous, userland
2706 * might try to re-enable the same controller while
2707 * the previous instance is still around. In such
2708 * cases, wait till it's gone using offline_waitq.
2709 */
2710 cgroup_for_each_live_child(child, cgrp) {
2711 DEFINE_WAIT(wait);
2712
2713 if (!cgroup_css(child, ss))
2714 continue;
2715
2716 cgroup_get(child);
2717 prepare_to_wait(&child->offline_waitq, &wait,
2718 TASK_UNINTERRUPTIBLE);
2719 cgroup_kn_unlock(of->kn);
2720 schedule();
2721 finish_wait(&child->offline_waitq, &wait);
2722 cgroup_put(child);
2723
2724 return restart_syscall();
2725 }
2726 } else if (disable & (1 << ssid)) { 2745 } else if (disable & (1 << ssid)) {
2727 if (!(cgrp->subtree_control & (1 << ssid))) { 2746 if (!(cgrp->subtree_control & (1 << ssid))) {
2728 disable &= ~(1 << ssid); 2747 disable &= ~(1 << ssid);
@@ -2758,19 +2777,48 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
2758 * subsystems than specified may need to be enabled or disabled 2777 * subsystems than specified may need to be enabled or disabled
2759 * depending on subsystem dependencies. 2778 * depending on subsystem dependencies.
2760 */ 2779 */
2761 cgrp->subtree_control |= enable; 2780 old_sc = cgrp->subtree_control;
2762 cgrp->subtree_control &= ~disable; 2781 old_ss = cgrp->child_subsys_mask;
2782 new_sc = (old_sc | enable) & ~disable;
2783 new_ss = cgroup_calc_child_subsys_mask(cgrp, new_sc);
2763 2784
2764 old_ctrl = cgrp->child_subsys_mask; 2785 css_enable = ~old_ss & new_ss;
2765 cgroup_refresh_child_subsys_mask(cgrp); 2786 css_disable = old_ss & ~new_ss;
2766 new_ctrl = cgrp->child_subsys_mask;
2767
2768 css_enable = ~old_ctrl & new_ctrl;
2769 css_disable = old_ctrl & ~new_ctrl;
2770 enable |= css_enable; 2787 enable |= css_enable;
2771 disable |= css_disable; 2788 disable |= css_disable;
2772 2789
2773 /* 2790 /*
2791 * Because css offlining is asynchronous, userland might try to
2792 * re-enable the same controller while the previous instance is
2793 * still around. In such cases, wait till it's gone using
2794 * offline_waitq.
2795 */
2796 for_each_subsys(ss, ssid) {
2797 if (!(css_enable & (1 << ssid)))
2798 continue;
2799
2800 cgroup_for_each_live_child(child, cgrp) {
2801 DEFINE_WAIT(wait);
2802
2803 if (!cgroup_css(child, ss))
2804 continue;
2805
2806 cgroup_get(child);
2807 prepare_to_wait(&child->offline_waitq, &wait,
2808 TASK_UNINTERRUPTIBLE);
2809 cgroup_kn_unlock(of->kn);
2810 schedule();
2811 finish_wait(&child->offline_waitq, &wait);
2812 cgroup_put(child);
2813
2814 return restart_syscall();
2815 }
2816 }
2817
2818 cgrp->subtree_control = new_sc;
2819 cgrp->child_subsys_mask = new_ss;
2820
2821 /*
2774 * Create new csses or make the existing ones visible. A css is 2822 * Create new csses or make the existing ones visible. A css is
2775 * created invisible if it's being implicitly enabled through 2823 * created invisible if it's being implicitly enabled through
2776 * dependency. An invisible css is made visible when the userland 2824 * dependency. An invisible css is made visible when the userland
@@ -2825,6 +2873,24 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
2825 } 2873 }
2826 } 2874 }
2827 2875
2876 /*
2877 * The effective csses of all the descendants (excluding @cgrp) may
2878 * have changed. Subsystems can optionally subscribe to this event
2879 * by implementing ->css_e_css_changed() which is invoked if any of
2880 * the effective csses seen from the css's cgroup may have changed.
2881 */
2882 for_each_subsys(ss, ssid) {
2883 struct cgroup_subsys_state *this_css = cgroup_css(cgrp, ss);
2884 struct cgroup_subsys_state *css;
2885
2886 if (!ss->css_e_css_changed || !this_css)
2887 continue;
2888
2889 css_for_each_descendant_pre(css, this_css)
2890 if (css != this_css)
2891 ss->css_e_css_changed(css);
2892 }
2893
2828 kernfs_activate(cgrp->kn); 2894 kernfs_activate(cgrp->kn);
2829 ret = 0; 2895 ret = 0;
2830out_unlock: 2896out_unlock:
@@ -2832,9 +2898,8 @@ out_unlock:
2832 return ret ?: nbytes; 2898 return ret ?: nbytes;
2833 2899
2834err_undo_css: 2900err_undo_css:
2835 cgrp->subtree_control &= ~enable; 2901 cgrp->subtree_control = old_sc;
2836 cgrp->subtree_control |= disable; 2902 cgrp->child_subsys_mask = old_ss;
2837 cgroup_refresh_child_subsys_mask(cgrp);
2838 2903
2839 for_each_subsys(ss, ssid) { 2904 for_each_subsys(ss, ssid) {
2840 if (!(enable & (1 << ssid))) 2905 if (!(enable & (1 << ssid)))
@@ -4370,6 +4435,8 @@ static void css_release_work_fn(struct work_struct *work)
4370 if (ss) { 4435 if (ss) {
4371 /* css release path */ 4436 /* css release path */
4372 cgroup_idr_remove(&ss->css_idr, css->id); 4437 cgroup_idr_remove(&ss->css_idr, css->id);
4438 if (ss->css_released)
4439 ss->css_released(css);
4373 } else { 4440 } else {
4374 /* cgroup release path */ 4441 /* cgroup release path */
4375 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); 4442 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);