diff options
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 175 |
1 files changed, 121 insertions, 54 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 136eceadeed1..bb263d0caab3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -277,6 +277,10 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, | |||
277 | if (!(cgrp->root->subsys_mask & (1 << ss->id))) | 277 | if (!(cgrp->root->subsys_mask & (1 << ss->id))) |
278 | return NULL; | 278 | return NULL; |
279 | 279 | ||
280 | /* | ||
281 | * This function is used while updating css associations and thus | ||
282 | * can't test the csses directly. Use ->child_subsys_mask. | ||
283 | */ | ||
280 | while (cgroup_parent(cgrp) && | 284 | while (cgroup_parent(cgrp) && |
281 | !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id))) | 285 | !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id))) |
282 | cgrp = cgroup_parent(cgrp); | 286 | cgrp = cgroup_parent(cgrp); |
@@ -284,6 +288,39 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, | |||
284 | return cgroup_css(cgrp, ss); | 288 | return cgroup_css(cgrp, ss); |
285 | } | 289 | } |
286 | 290 | ||
291 | /** | ||
292 | * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem | ||
293 | * @cgrp: the cgroup of interest | ||
294 | * @ss: the subsystem of interest | ||
295 | * | ||
296 | * Find and get the effective css of @cgrp for @ss. The effective css is | ||
297 | * defined as the matching css of the nearest ancestor including self which | ||
298 | * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on, | ||
299 | * the root css is returned, so this function always returns a valid css. | ||
300 | * The returned css must be put using css_put(). | ||
301 | */ | ||
302 | struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp, | ||
303 | struct cgroup_subsys *ss) | ||
304 | { | ||
305 | struct cgroup_subsys_state *css; | ||
306 | |||
307 | rcu_read_lock(); | ||
308 | |||
309 | do { | ||
310 | css = cgroup_css(cgrp, ss); | ||
311 | |||
312 | if (css && css_tryget_online(css)) | ||
313 | goto out_unlock; | ||
314 | cgrp = cgroup_parent(cgrp); | ||
315 | } while (cgrp); | ||
316 | |||
317 | css = init_css_set.subsys[ss->id]; | ||
318 | css_get(css); | ||
319 | out_unlock: | ||
320 | rcu_read_unlock(); | ||
321 | return css; | ||
322 | } | ||
323 | |||
287 | /* convenient tests for these bits */ | 324 | /* convenient tests for these bits */ |
288 | static inline bool cgroup_is_dead(const struct cgroup *cgrp) | 325 | static inline bool cgroup_is_dead(const struct cgroup *cgrp) |
289 | { | 326 | { |
@@ -1019,31 +1056,30 @@ static void cgroup_put(struct cgroup *cgrp) | |||
1019 | } | 1056 | } |
1020 | 1057 | ||
1021 | /** | 1058 | /** |
1022 | * cgroup_refresh_child_subsys_mask - update child_subsys_mask | 1059 | * cgroup_calc_child_subsys_mask - calculate child_subsys_mask |
1023 | * @cgrp: the target cgroup | 1060 | * @cgrp: the target cgroup |
1061 | * @subtree_control: the new subtree_control mask to consider | ||
1024 | * | 1062 | * |
1025 | * On the default hierarchy, a subsystem may request other subsystems to be | 1063 | * On the default hierarchy, a subsystem may request other subsystems to be |
1026 | * enabled together through its ->depends_on mask. In such cases, more | 1064 | * enabled together through its ->depends_on mask. In such cases, more |
1027 | * subsystems than specified in "cgroup.subtree_control" may be enabled. | 1065 | * subsystems than specified in "cgroup.subtree_control" may be enabled. |
1028 | * | 1066 | * |
1029 | * This function determines which subsystems need to be enabled given the | 1067 | * This function calculates which subsystems need to be enabled if |
1030 | * current @cgrp->subtree_control and records it in | 1068 | * @subtree_control is to be applied to @cgrp. The returned mask is always |
1031 | * @cgrp->child_subsys_mask. The resulting mask is always a superset of | 1069 | * a superset of @subtree_control and follows the usual hierarchy rules. |
1032 | * @cgrp->subtree_control and follows the usual hierarchy rules. | ||
1033 | */ | 1070 | */ |
1034 | static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp) | 1071 | static unsigned int cgroup_calc_child_subsys_mask(struct cgroup *cgrp, |
1072 | unsigned int subtree_control) | ||
1035 | { | 1073 | { |
1036 | struct cgroup *parent = cgroup_parent(cgrp); | 1074 | struct cgroup *parent = cgroup_parent(cgrp); |
1037 | unsigned int cur_ss_mask = cgrp->subtree_control; | 1075 | unsigned int cur_ss_mask = subtree_control; |
1038 | struct cgroup_subsys *ss; | 1076 | struct cgroup_subsys *ss; |
1039 | int ssid; | 1077 | int ssid; |
1040 | 1078 | ||
1041 | lockdep_assert_held(&cgroup_mutex); | 1079 | lockdep_assert_held(&cgroup_mutex); |
1042 | 1080 | ||
1043 | if (!cgroup_on_dfl(cgrp)) { | 1081 | if (!cgroup_on_dfl(cgrp)) |
1044 | cgrp->child_subsys_mask = cur_ss_mask; | 1082 | return cur_ss_mask; |
1045 | return; | ||
1046 | } | ||
1047 | 1083 | ||
1048 | while (true) { | 1084 | while (true) { |
1049 | unsigned int new_ss_mask = cur_ss_mask; | 1085 | unsigned int new_ss_mask = cur_ss_mask; |
@@ -1067,7 +1103,20 @@ static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp) | |||
1067 | cur_ss_mask = new_ss_mask; | 1103 | cur_ss_mask = new_ss_mask; |
1068 | } | 1104 | } |
1069 | 1105 | ||
1070 | cgrp->child_subsys_mask = cur_ss_mask; | 1106 | return cur_ss_mask; |
1107 | } | ||
1108 | |||
1109 | /** | ||
1110 | * cgroup_refresh_child_subsys_mask - update child_subsys_mask | ||
1111 | * @cgrp: the target cgroup | ||
1112 | * | ||
1113 | * Update @cgrp->child_subsys_mask according to the current | ||
1114 | * @cgrp->subtree_control using cgroup_calc_child_subsys_mask(). | ||
1115 | */ | ||
1116 | static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp) | ||
1117 | { | ||
1118 | cgrp->child_subsys_mask = | ||
1119 | cgroup_calc_child_subsys_mask(cgrp, cgrp->subtree_control); | ||
1071 | } | 1120 | } |
1072 | 1121 | ||
1073 | /** | 1122 | /** |
@@ -2641,7 +2690,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, | |||
2641 | loff_t off) | 2690 | loff_t off) |
2642 | { | 2691 | { |
2643 | unsigned int enable = 0, disable = 0; | 2692 | unsigned int enable = 0, disable = 0; |
2644 | unsigned int css_enable, css_disable, old_ctrl, new_ctrl; | 2693 | unsigned int css_enable, css_disable, old_sc, new_sc, old_ss, new_ss; |
2645 | struct cgroup *cgrp, *child; | 2694 | struct cgroup *cgrp, *child; |
2646 | struct cgroup_subsys *ss; | 2695 | struct cgroup_subsys *ss; |
2647 | char *tok; | 2696 | char *tok; |
@@ -2693,36 +2742,6 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, | |||
2693 | ret = -ENOENT; | 2742 | ret = -ENOENT; |
2694 | goto out_unlock; | 2743 | goto out_unlock; |
2695 | } | 2744 | } |
2696 | |||
2697 | /* | ||
2698 | * @ss is already enabled through dependency and | ||
2699 | * we'll just make it visible. Skip draining. | ||
2700 | */ | ||
2701 | if (cgrp->child_subsys_mask & (1 << ssid)) | ||
2702 | continue; | ||
2703 | |||
2704 | /* | ||
2705 | * Because css offlining is asynchronous, userland | ||
2706 | * might try to re-enable the same controller while | ||
2707 | * the previous instance is still around. In such | ||
2708 | * cases, wait till it's gone using offline_waitq. | ||
2709 | */ | ||
2710 | cgroup_for_each_live_child(child, cgrp) { | ||
2711 | DEFINE_WAIT(wait); | ||
2712 | |||
2713 | if (!cgroup_css(child, ss)) | ||
2714 | continue; | ||
2715 | |||
2716 | cgroup_get(child); | ||
2717 | prepare_to_wait(&child->offline_waitq, &wait, | ||
2718 | TASK_UNINTERRUPTIBLE); | ||
2719 | cgroup_kn_unlock(of->kn); | ||
2720 | schedule(); | ||
2721 | finish_wait(&child->offline_waitq, &wait); | ||
2722 | cgroup_put(child); | ||
2723 | |||
2724 | return restart_syscall(); | ||
2725 | } | ||
2726 | } else if (disable & (1 << ssid)) { | 2745 | } else if (disable & (1 << ssid)) { |
2727 | if (!(cgrp->subtree_control & (1 << ssid))) { | 2746 | if (!(cgrp->subtree_control & (1 << ssid))) { |
2728 | disable &= ~(1 << ssid); | 2747 | disable &= ~(1 << ssid); |
@@ -2758,19 +2777,48 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, | |||
2758 | * subsystems than specified may need to be enabled or disabled | 2777 | * subsystems than specified may need to be enabled or disabled |
2759 | * depending on subsystem dependencies. | 2778 | * depending on subsystem dependencies. |
2760 | */ | 2779 | */ |
2761 | cgrp->subtree_control |= enable; | 2780 | old_sc = cgrp->subtree_control; |
2762 | cgrp->subtree_control &= ~disable; | 2781 | old_ss = cgrp->child_subsys_mask; |
2782 | new_sc = (old_sc | enable) & ~disable; | ||
2783 | new_ss = cgroup_calc_child_subsys_mask(cgrp, new_sc); | ||
2763 | 2784 | ||
2764 | old_ctrl = cgrp->child_subsys_mask; | 2785 | css_enable = ~old_ss & new_ss; |
2765 | cgroup_refresh_child_subsys_mask(cgrp); | 2786 | css_disable = old_ss & ~new_ss; |
2766 | new_ctrl = cgrp->child_subsys_mask; | ||
2767 | |||
2768 | css_enable = ~old_ctrl & new_ctrl; | ||
2769 | css_disable = old_ctrl & ~new_ctrl; | ||
2770 | enable |= css_enable; | 2787 | enable |= css_enable; |
2771 | disable |= css_disable; | 2788 | disable |= css_disable; |
2772 | 2789 | ||
2773 | /* | 2790 | /* |
2791 | * Because css offlining is asynchronous, userland might try to | ||
2792 | * re-enable the same controller while the previous instance is | ||
2793 | * still around. In such cases, wait till it's gone using | ||
2794 | * offline_waitq. | ||
2795 | */ | ||
2796 | for_each_subsys(ss, ssid) { | ||
2797 | if (!(css_enable & (1 << ssid))) | ||
2798 | continue; | ||
2799 | |||
2800 | cgroup_for_each_live_child(child, cgrp) { | ||
2801 | DEFINE_WAIT(wait); | ||
2802 | |||
2803 | if (!cgroup_css(child, ss)) | ||
2804 | continue; | ||
2805 | |||
2806 | cgroup_get(child); | ||
2807 | prepare_to_wait(&child->offline_waitq, &wait, | ||
2808 | TASK_UNINTERRUPTIBLE); | ||
2809 | cgroup_kn_unlock(of->kn); | ||
2810 | schedule(); | ||
2811 | finish_wait(&child->offline_waitq, &wait); | ||
2812 | cgroup_put(child); | ||
2813 | |||
2814 | return restart_syscall(); | ||
2815 | } | ||
2816 | } | ||
2817 | |||
2818 | cgrp->subtree_control = new_sc; | ||
2819 | cgrp->child_subsys_mask = new_ss; | ||
2820 | |||
2821 | /* | ||
2774 | * Create new csses or make the existing ones visible. A css is | 2822 | * Create new csses or make the existing ones visible. A css is |
2775 | * created invisible if it's being implicitly enabled through | 2823 | * created invisible if it's being implicitly enabled through |
2776 | * dependency. An invisible css is made visible when the userland | 2824 | * dependency. An invisible css is made visible when the userland |
@@ -2825,6 +2873,24 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, | |||
2825 | } | 2873 | } |
2826 | } | 2874 | } |
2827 | 2875 | ||
2876 | /* | ||
2877 | * The effective csses of all the descendants (excluding @cgrp) may | ||
2878 | * have changed. Subsystems can optionally subscribe to this event | ||
2879 | * by implementing ->css_e_css_changed() which is invoked if any of | ||
2880 | * the effective csses seen from the css's cgroup may have changed. | ||
2881 | */ | ||
2882 | for_each_subsys(ss, ssid) { | ||
2883 | struct cgroup_subsys_state *this_css = cgroup_css(cgrp, ss); | ||
2884 | struct cgroup_subsys_state *css; | ||
2885 | |||
2886 | if (!ss->css_e_css_changed || !this_css) | ||
2887 | continue; | ||
2888 | |||
2889 | css_for_each_descendant_pre(css, this_css) | ||
2890 | if (css != this_css) | ||
2891 | ss->css_e_css_changed(css); | ||
2892 | } | ||
2893 | |||
2828 | kernfs_activate(cgrp->kn); | 2894 | kernfs_activate(cgrp->kn); |
2829 | ret = 0; | 2895 | ret = 0; |
2830 | out_unlock: | 2896 | out_unlock: |
@@ -2832,9 +2898,8 @@ out_unlock: | |||
2832 | return ret ?: nbytes; | 2898 | return ret ?: nbytes; |
2833 | 2899 | ||
2834 | err_undo_css: | 2900 | err_undo_css: |
2835 | cgrp->subtree_control &= ~enable; | 2901 | cgrp->subtree_control = old_sc; |
2836 | cgrp->subtree_control |= disable; | 2902 | cgrp->child_subsys_mask = old_ss; |
2837 | cgroup_refresh_child_subsys_mask(cgrp); | ||
2838 | 2903 | ||
2839 | for_each_subsys(ss, ssid) { | 2904 | for_each_subsys(ss, ssid) { |
2840 | if (!(enable & (1 << ssid))) | 2905 | if (!(enable & (1 << ssid))) |
@@ -4370,6 +4435,8 @@ static void css_release_work_fn(struct work_struct *work) | |||
4370 | if (ss) { | 4435 | if (ss) { |
4371 | /* css release path */ | 4436 | /* css release path */ |
4372 | cgroup_idr_remove(&ss->css_idr, css->id); | 4437 | cgroup_idr_remove(&ss->css_idr, css->id); |
4438 | if (ss->css_released) | ||
4439 | ss->css_released(css); | ||
4373 | } else { | 4440 | } else { |
4374 | /* cgroup release path */ | 4441 | /* cgroup release path */ |
4375 | cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); | 4442 | cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); |