aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c257
1 files changed, 84 insertions, 173 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6937f16f5ecb..f6bc78f4ed13 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -250,13 +250,6 @@ enum res_type {
250/* Used for OOM nofiier */ 250/* Used for OOM nofiier */
251#define OOM_CONTROL (0) 251#define OOM_CONTROL (0)
252 252
253/*
254 * The memcg_create_mutex will be held whenever a new cgroup is created.
255 * As a consequence, any change that needs to protect against new child cgroups
256 * appearing has to hold it as well.
257 */
258static DEFINE_MUTEX(memcg_create_mutex);
259
260/* Some nice accessors for the vmpressure. */ 253/* Some nice accessors for the vmpressure. */
261struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg) 254struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg)
262{ 255{
@@ -899,17 +892,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
899 if (css == &root->css) 892 if (css == &root->css)
900 break; 893 break;
901 894
902 if (css_tryget(css)) { 895 if (css_tryget(css))
903 /* 896 break;
904 * Make sure the memcg is initialized:
905 * mem_cgroup_css_online() orders the the
906 * initialization against setting the flag.
907 */
908 if (smp_load_acquire(&memcg->initialized))
909 break;
910
911 css_put(css);
912 }
913 897
914 memcg = NULL; 898 memcg = NULL;
915 } 899 }
@@ -2690,14 +2674,6 @@ static inline bool memcg_has_children(struct mem_cgroup *memcg)
2690{ 2674{
2691 bool ret; 2675 bool ret;
2692 2676
2693 /*
2694 * The lock does not prevent addition or deletion of children, but
2695 * it prevents a new child from being initialized based on this
2696 * parent in css_online(), so it's enough to decide whether
2697 * hierarchically inherited attributes can still be changed or not.
2698 */
2699 lockdep_assert_held(&memcg_create_mutex);
2700
2701 rcu_read_lock(); 2677 rcu_read_lock();
2702 ret = css_next_child(NULL, &memcg->css); 2678 ret = css_next_child(NULL, &memcg->css);
2703 rcu_read_unlock(); 2679 rcu_read_unlock();
@@ -2760,10 +2736,8 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
2760 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 2736 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
2761 struct mem_cgroup *parent_memcg = mem_cgroup_from_css(memcg->css.parent); 2737 struct mem_cgroup *parent_memcg = mem_cgroup_from_css(memcg->css.parent);
2762 2738
2763 mutex_lock(&memcg_create_mutex);
2764
2765 if (memcg->use_hierarchy == val) 2739 if (memcg->use_hierarchy == val)
2766 goto out; 2740 return 0;
2767 2741
2768 /* 2742 /*
2769 * If parent's use_hierarchy is set, we can't make any modifications 2743 * If parent's use_hierarchy is set, we can't make any modifications
@@ -2782,9 +2756,6 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
2782 } else 2756 } else
2783 retval = -EINVAL; 2757 retval = -EINVAL;
2784 2758
2785out:
2786 mutex_unlock(&memcg_create_mutex);
2787
2788 return retval; 2759 return retval;
2789} 2760}
2790 2761
@@ -2872,37 +2843,14 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
2872#ifndef CONFIG_SLOB 2843#ifndef CONFIG_SLOB
2873static int memcg_online_kmem(struct mem_cgroup *memcg) 2844static int memcg_online_kmem(struct mem_cgroup *memcg)
2874{ 2845{
2875 int err = 0;
2876 int memcg_id; 2846 int memcg_id;
2877 2847
2878 BUG_ON(memcg->kmemcg_id >= 0); 2848 BUG_ON(memcg->kmemcg_id >= 0);
2879 BUG_ON(memcg->kmem_state); 2849 BUG_ON(memcg->kmem_state);
2880 2850
2881 /*
2882 * For simplicity, we won't allow this to be disabled. It also can't
2883 * be changed if the cgroup has children already, or if tasks had
2884 * already joined.
2885 *
2886 * If tasks join before we set the limit, a person looking at
2887 * kmem.usage_in_bytes will have no way to determine when it took
2888 * place, which makes the value quite meaningless.
2889 *
2890 * After it first became limited, changes in the value of the limit are
2891 * of course permitted.
2892 */
2893 mutex_lock(&memcg_create_mutex);
2894 if (cgroup_is_populated(memcg->css.cgroup) ||
2895 (memcg->use_hierarchy && memcg_has_children(memcg)))
2896 err = -EBUSY;
2897 mutex_unlock(&memcg_create_mutex);
2898 if (err)
2899 goto out;
2900
2901 memcg_id = memcg_alloc_cache_id(); 2851 memcg_id = memcg_alloc_cache_id();
2902 if (memcg_id < 0) { 2852 if (memcg_id < 0)
2903 err = memcg_id; 2853 return memcg_id;
2904 goto out;
2905 }
2906 2854
2907 static_branch_inc(&memcg_kmem_enabled_key); 2855 static_branch_inc(&memcg_kmem_enabled_key);
2908 /* 2856 /*
@@ -2913,17 +2861,14 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
2913 */ 2861 */
2914 memcg->kmemcg_id = memcg_id; 2862 memcg->kmemcg_id = memcg_id;
2915 memcg->kmem_state = KMEM_ONLINE; 2863 memcg->kmem_state = KMEM_ONLINE;
2916out: 2864
2917 return err; 2865 return 0;
2918} 2866}
2919 2867
2920static int memcg_propagate_kmem(struct mem_cgroup *memcg) 2868static int memcg_propagate_kmem(struct mem_cgroup *parent,
2869 struct mem_cgroup *memcg)
2921{ 2870{
2922 int ret = 0; 2871 int ret = 0;
2923 struct mem_cgroup *parent = parent_mem_cgroup(memcg);
2924
2925 if (!parent)
2926 return 0;
2927 2872
2928 mutex_lock(&memcg_limit_mutex); 2873 mutex_lock(&memcg_limit_mutex);
2929 /* 2874 /*
@@ -2985,6 +2930,10 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
2985 2930
2986static void memcg_free_kmem(struct mem_cgroup *memcg) 2931static void memcg_free_kmem(struct mem_cgroup *memcg)
2987{ 2932{
2933 /* css_alloc() failed, offlining didn't happen */
2934 if (unlikely(memcg->kmem_state == KMEM_ONLINE))
2935 memcg_offline_kmem(memcg);
2936
2988 if (memcg->kmem_state == KMEM_ALLOCATED) { 2937 if (memcg->kmem_state == KMEM_ALLOCATED) {
2989 memcg_destroy_kmem_caches(memcg); 2938 memcg_destroy_kmem_caches(memcg);
2990 static_branch_dec(&memcg_kmem_enabled_key); 2939 static_branch_dec(&memcg_kmem_enabled_key);
@@ -2992,7 +2941,11 @@ static void memcg_free_kmem(struct mem_cgroup *memcg)
2992 } 2941 }
2993} 2942}
2994#else 2943#else
2995static int memcg_propagate_kmem(struct mem_cgroup *memcg) 2944static int memcg_propagate_kmem(struct mem_cgroup *parent, struct mem_cgroup *memcg)
2945{
2946 return 0;
2947}
2948static int memcg_online_kmem(struct mem_cgroup *memcg)
2996{ 2949{
2997 return 0; 2950 return 0;
2998} 2951}
@@ -3007,11 +2960,16 @@ static void memcg_free_kmem(struct mem_cgroup *memcg)
3007static int memcg_update_kmem_limit(struct mem_cgroup *memcg, 2960static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
3008 unsigned long limit) 2961 unsigned long limit)
3009{ 2962{
3010 int ret; 2963 int ret = 0;
3011 2964
3012 mutex_lock(&memcg_limit_mutex); 2965 mutex_lock(&memcg_limit_mutex);
3013 /* Top-level cgroup doesn't propagate from root */ 2966 /* Top-level cgroup doesn't propagate from root */
3014 if (!memcg_kmem_online(memcg)) { 2967 if (!memcg_kmem_online(memcg)) {
2968 if (cgroup_is_populated(memcg->css.cgroup) ||
2969 (memcg->use_hierarchy && memcg_has_children(memcg)))
2970 ret = -EBUSY;
2971 if (ret)
2972 goto out;
3015 ret = memcg_online_kmem(memcg); 2973 ret = memcg_online_kmem(memcg);
3016 if (ret) 2974 if (ret)
3017 goto out; 2975 goto out;
@@ -4167,90 +4125,44 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
4167 kfree(memcg->nodeinfo[node]); 4125 kfree(memcg->nodeinfo[node]);
4168} 4126}
4169 4127
4170static struct mem_cgroup *mem_cgroup_alloc(void) 4128static void mem_cgroup_free(struct mem_cgroup *memcg)
4171{
4172 struct mem_cgroup *memcg;
4173 size_t size;
4174
4175 size = sizeof(struct mem_cgroup);
4176 size += nr_node_ids * sizeof(struct mem_cgroup_per_node *);
4177
4178 memcg = kzalloc(size, GFP_KERNEL);
4179 if (!memcg)
4180 return NULL;
4181
4182 memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
4183 if (!memcg->stat)
4184 goto out_free;
4185
4186 if (memcg_wb_domain_init(memcg, GFP_KERNEL))
4187 goto out_free_stat;
4188
4189 return memcg;
4190
4191out_free_stat:
4192 free_percpu(memcg->stat);
4193out_free:
4194 kfree(memcg);
4195 return NULL;
4196}
4197
4198/*
4199 * At destroying mem_cgroup, references from swap_cgroup can remain.
4200 * (scanning all at force_empty is too costly...)
4201 *
4202 * Instead of clearing all references at force_empty, we remember
4203 * the number of reference from swap_cgroup and free mem_cgroup when
4204 * it goes down to 0.
4205 *
4206 * Removal of cgroup itself succeeds regardless of refs from swap.
4207 */
4208
4209static void __mem_cgroup_free(struct mem_cgroup *memcg)
4210{ 4129{
4211 int node; 4130 int node;
4212 4131
4213 cancel_work_sync(&memcg->high_work); 4132 memcg_wb_domain_exit(memcg);
4214
4215 mem_cgroup_remove_from_trees(memcg);
4216
4217 for_each_node(node) 4133 for_each_node(node)
4218 free_mem_cgroup_per_zone_info(memcg, node); 4134 free_mem_cgroup_per_zone_info(memcg, node);
4219
4220 free_percpu(memcg->stat); 4135 free_percpu(memcg->stat);
4221 memcg_wb_domain_exit(memcg);
4222 kfree(memcg); 4136 kfree(memcg);
4223} 4137}
4224 4138
4225static struct cgroup_subsys_state * __ref 4139static struct mem_cgroup *mem_cgroup_alloc(void)
4226mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
4227{ 4140{
4228 struct mem_cgroup *memcg; 4141 struct mem_cgroup *memcg;
4229 long error = -ENOMEM; 4142 size_t size;
4230 int node; 4143 int node;
4231 4144
4232 memcg = mem_cgroup_alloc(); 4145 size = sizeof(struct mem_cgroup);
4146 size += nr_node_ids * sizeof(struct mem_cgroup_per_node *);
4147
4148 memcg = kzalloc(size, GFP_KERNEL);
4233 if (!memcg) 4149 if (!memcg)
4234 return ERR_PTR(error); 4150 return NULL;
4151
4152 memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
4153 if (!memcg->stat)
4154 goto fail;
4235 4155
4236 for_each_node(node) 4156 for_each_node(node)
4237 if (alloc_mem_cgroup_per_zone_info(memcg, node)) 4157 if (alloc_mem_cgroup_per_zone_info(memcg, node))
4238 goto free_out; 4158 goto fail;
4239 4159
4240 /* root ? */ 4160 if (memcg_wb_domain_init(memcg, GFP_KERNEL))
4241 if (parent_css == NULL) { 4161 goto fail;
4242 root_mem_cgroup = memcg;
4243 page_counter_init(&memcg->memory, NULL);
4244 memcg->high = PAGE_COUNTER_MAX;
4245 memcg->soft_limit = PAGE_COUNTER_MAX;
4246 page_counter_init(&memcg->memsw, NULL);
4247 page_counter_init(&memcg->kmem, NULL);
4248 }
4249 4162
4250 INIT_WORK(&memcg->high_work, high_work_func); 4163 INIT_WORK(&memcg->high_work, high_work_func);
4251 memcg->last_scanned_node = MAX_NUMNODES; 4164 memcg->last_scanned_node = MAX_NUMNODES;
4252 INIT_LIST_HEAD(&memcg->oom_notify); 4165 INIT_LIST_HEAD(&memcg->oom_notify);
4253 memcg->move_charge_at_immigrate = 0;
4254 mutex_init(&memcg->thresholds_lock); 4166 mutex_init(&memcg->thresholds_lock);
4255 spin_lock_init(&memcg->move_lock); 4167 spin_lock_init(&memcg->move_lock);
4256 vmpressure_init(&memcg->vmpressure); 4168 vmpressure_init(&memcg->vmpressure);
@@ -4263,48 +4175,37 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
4263#ifdef CONFIG_CGROUP_WRITEBACK 4175#ifdef CONFIG_CGROUP_WRITEBACK
4264 INIT_LIST_HEAD(&memcg->cgwb_list); 4176 INIT_LIST_HEAD(&memcg->cgwb_list);
4265#endif 4177#endif
4266 return &memcg->css; 4178 return memcg;
4267 4179fail:
4268free_out: 4180 mem_cgroup_free(memcg);
4269 __mem_cgroup_free(memcg); 4181 return NULL;
4270 return ERR_PTR(error);
4271} 4182}
4272 4183
4273static int 4184static struct cgroup_subsys_state * __ref
4274mem_cgroup_css_online(struct cgroup_subsys_state *css) 4185mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
4275{ 4186{
4276 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 4187 struct mem_cgroup *parent = mem_cgroup_from_css(parent_css);
4277 struct mem_cgroup *parent = mem_cgroup_from_css(css->parent); 4188 struct mem_cgroup *memcg;
4278 int ret; 4189 long error = -ENOMEM;
4279
4280 if (css->id > MEM_CGROUP_ID_MAX)
4281 return -ENOSPC;
4282
4283 if (!parent)
4284 return 0;
4285
4286 mutex_lock(&memcg_create_mutex);
4287 4190
4288 memcg->use_hierarchy = parent->use_hierarchy; 4191 memcg = mem_cgroup_alloc();
4289 memcg->oom_kill_disable = parent->oom_kill_disable; 4192 if (!memcg)
4290 memcg->swappiness = mem_cgroup_swappiness(parent); 4193 return ERR_PTR(error);
4291 4194
4292 if (parent->use_hierarchy) { 4195 memcg->high = PAGE_COUNTER_MAX;
4196 memcg->soft_limit = PAGE_COUNTER_MAX;
4197 if (parent) {
4198 memcg->swappiness = mem_cgroup_swappiness(parent);
4199 memcg->oom_kill_disable = parent->oom_kill_disable;
4200 }
4201 if (parent && parent->use_hierarchy) {
4202 memcg->use_hierarchy = true;
4293 page_counter_init(&memcg->memory, &parent->memory); 4203 page_counter_init(&memcg->memory, &parent->memory);
4294 memcg->high = PAGE_COUNTER_MAX;
4295 memcg->soft_limit = PAGE_COUNTER_MAX;
4296 page_counter_init(&memcg->memsw, &parent->memsw); 4204 page_counter_init(&memcg->memsw, &parent->memsw);
4297 page_counter_init(&memcg->kmem, &parent->kmem); 4205 page_counter_init(&memcg->kmem, &parent->kmem);
4298 page_counter_init(&memcg->tcpmem, &parent->tcpmem); 4206 page_counter_init(&memcg->tcpmem, &parent->tcpmem);
4299
4300 /*
4301 * No need to take a reference to the parent because cgroup
4302 * core guarantees its existence.
4303 */
4304 } else { 4207 } else {
4305 page_counter_init(&memcg->memory, NULL); 4208 page_counter_init(&memcg->memory, NULL);
4306 memcg->high = PAGE_COUNTER_MAX;
4307 memcg->soft_limit = PAGE_COUNTER_MAX;
4308 page_counter_init(&memcg->memsw, NULL); 4209 page_counter_init(&memcg->memsw, NULL);
4309 page_counter_init(&memcg->kmem, NULL); 4210 page_counter_init(&memcg->kmem, NULL);
4310 page_counter_init(&memcg->tcpmem, NULL); 4211 page_counter_init(&memcg->tcpmem, NULL);
@@ -4316,21 +4217,31 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
4316 if (parent != root_mem_cgroup) 4217 if (parent != root_mem_cgroup)
4317 memory_cgrp_subsys.broken_hierarchy = true; 4218 memory_cgrp_subsys.broken_hierarchy = true;
4318 } 4219 }
4319 mutex_unlock(&memcg_create_mutex);
4320 4220
4321 ret = memcg_propagate_kmem(memcg); 4221 /* The following stuff does not apply to the root */
4322 if (ret) 4222 if (!parent) {
4323 return ret; 4223 root_mem_cgroup = memcg;
4224 return &memcg->css;
4225 }
4226
4227 error = memcg_propagate_kmem(parent, memcg);
4228 if (error)
4229 goto fail;
4324 4230
4325 if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket) 4231 if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
4326 static_branch_inc(&memcg_sockets_enabled_key); 4232 static_branch_inc(&memcg_sockets_enabled_key);
4327 4233
4328 /* 4234 return &memcg->css;
4329 * Make sure the memcg is initialized: mem_cgroup_iter() 4235fail:
4330 * orders reading memcg->initialized against its callers 4236 mem_cgroup_free(memcg);
4331 * reading the memcg members. 4237 return NULL;
4332 */ 4238}
4333 smp_store_release(&memcg->initialized, 1); 4239
4240static int
4241mem_cgroup_css_online(struct cgroup_subsys_state *css)
4242{
4243 if (css->id > MEM_CGROUP_ID_MAX)
4244 return -ENOSPC;
4334 4245
4335 return 0; 4246 return 0;
4336} 4247}
@@ -4352,10 +4263,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
4352 } 4263 }
4353 spin_unlock(&memcg->event_list_lock); 4264 spin_unlock(&memcg->event_list_lock);
4354 4265
4355 vmpressure_cleanup(&memcg->vmpressure);
4356
4357 memcg_offline_kmem(memcg); 4266 memcg_offline_kmem(memcg);
4358
4359 wb_memcg_offline(memcg); 4267 wb_memcg_offline(memcg);
4360} 4268}
4361 4269
@@ -4376,8 +4284,11 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
4376 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_active) 4284 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_active)
4377 static_branch_dec(&memcg_sockets_enabled_key); 4285 static_branch_dec(&memcg_sockets_enabled_key);
4378 4286
4287 vmpressure_cleanup(&memcg->vmpressure);
4288 cancel_work_sync(&memcg->high_work);
4289 mem_cgroup_remove_from_trees(memcg);
4379 memcg_free_kmem(memcg); 4290 memcg_free_kmem(memcg);
4380 __mem_cgroup_free(memcg); 4291 mem_cgroup_free(memcg);
4381} 4292}
4382 4293
4383/** 4294/**