summaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2016-01-20 18:02:35 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-20 20:09:18 -0500
commit52c29b04823cb1bab2805336b80866325fe2bc3f (patch)
treefb0d3b6323105cfac622b5695c7f63e2902308aa /mm/memcontrol.c
parent127424c86bb6cb87f0b563d9fdcfbbaf3c86ecec (diff)
mm: memcontrol: account "kmem" consumers in cgroup2 memory controller
The original cgroup memory controller has an extension to account slab memory (and other "kernel memory" consumers) in a separate "kmem" counter, once the user set an explicit limit on that "kmem" pool. However, this includes various consumers whose sizes are directly linked to userspace activity. Accounting them as an optional "kmem" extension is problematic for several reasons: 1. It leaves the main memory interface with incomplete semantics. A user who puts their workload into a cgroup and configures a memory limit does not expect us to leave holes in the containment as big as the dentry and inode cache, or the kernel stack pages. 2. If the limit set on this random historical subgroup of consumers is reached, subsequent allocations will fail even when the main memory pool available to the cgroup is not yet exhausted and/or has reclaimable memory in it. 3. Calling it 'kernel memory' is misleading. The dentry and inode caches are no more 'kernel' (or no less 'user') memory than the page cache itself. Treating these consumers as different classes is a historical implementation detail that should not leak to users. So, in addition to page cache, anonymous memory, and network socket memory, account the following memory consumers per default in the cgroup2 memory controller: - threadinfo - task_struct - task_delay_info - pid - cred - mm_struct - vm_area_struct and vm_region (nommu) - anon_vma and anon_vma_chain - signal_struct - sighand_struct - fs_struct - files_struct - fdtable and fdtable->full_fds_bits - dentry and external_name - inode for all filesystems. This should give us reasonable memory isolation for most common workloads out of the box. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Tejun Heo <tj@kernel.org> Acked-by: Vladimir Davydov <vdavydov@virtuozzo.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c18
1 files changed, 11 insertions, 7 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fe51d5e61389..9e7a4e521917 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2381,13 +2381,14 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
2381 if (!memcg_kmem_online(memcg)) 2381 if (!memcg_kmem_online(memcg))
2382 return 0; 2382 return 0;
2383 2383
2384 if (!page_counter_try_charge(&memcg->kmem, nr_pages, &counter))
2385 return -ENOMEM;
2386
2387 ret = try_charge(memcg, gfp, nr_pages); 2384 ret = try_charge(memcg, gfp, nr_pages);
2388 if (ret) { 2385 if (ret)
2389 page_counter_uncharge(&memcg->kmem, nr_pages);
2390 return ret; 2386 return ret;
2387
2388 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
2389 !page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) {
2390 cancel_charge(memcg, nr_pages);
2391 return -ENOMEM;
2391 } 2392 }
2392 2393
2393 page->mem_cgroup = memcg; 2394 page->mem_cgroup = memcg;
@@ -2416,7 +2417,9 @@ void __memcg_kmem_uncharge(struct page *page, int order)
2416 2417
2417 VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page); 2418 VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
2418 2419
2419 page_counter_uncharge(&memcg->kmem, nr_pages); 2420 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
2421 page_counter_uncharge(&memcg->kmem, nr_pages);
2422
2420 page_counter_uncharge(&memcg->memory, nr_pages); 2423 page_counter_uncharge(&memcg->memory, nr_pages);
2421 if (do_memsw_account()) 2424 if (do_memsw_account())
2422 page_counter_uncharge(&memcg->memsw, nr_pages); 2425 page_counter_uncharge(&memcg->memsw, nr_pages);
@@ -2922,7 +2925,8 @@ static int memcg_propagate_kmem(struct mem_cgroup *memcg)
2922 * onlined after this point, because it has at least one child 2925 * onlined after this point, because it has at least one child
2923 * already. 2926 * already.
2924 */ 2927 */
2925 if (memcg_kmem_online(parent)) 2928 if (cgroup_subsys_on_dfl(memory_cgrp_subsys) ||
2929 memcg_kmem_online(parent))
2926 ret = memcg_online_kmem(memcg); 2930 ret = memcg_online_kmem(memcg);
2927 mutex_unlock(&memcg_limit_mutex); 2931 mutex_unlock(&memcg_limit_mutex);
2928 return ret; 2932 return ret;