aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2016-01-14 18:21:29 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-14 19:00:49 -0500
commitf7e1cb6ec51b041335b5ad4dd7aefb37a56d79a6 (patch)
treee99135a85ed74987871cf8608287321735249305 /mm/memcontrol.c
parent1109208766d9fa7059a9b66ad488e66d99ce49af (diff)
mm: memcontrol: account socket memory in unified hierarchy memory controller
Socket memory can be a significant share of overall memory consumed by common workloads. In order to provide reasonable resource isolation in the unified hierarchy, this type of memory needs to be included in the tracking/accounting of a cgroup under active memory resource control. Overhead is only incurred when a non-root control group is created AND the memory controller is instructed to track and account the memory footprint of that group. cgroup.memory=nosocket can be specified on the boot commandline to override any runtime configuration and forcibly exclude socket memory from active memory resource control. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: David S. Miller <davem@davemloft.net> Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c122
1 files changed, 98 insertions, 24 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6aac8d2e31d7..60ebc486c2aa 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -80,6 +80,9 @@ struct mem_cgroup *root_mem_cgroup __read_mostly;
80 80
81#define MEM_CGROUP_RECLAIM_RETRIES 5 81#define MEM_CGROUP_RECLAIM_RETRIES 5
82 82
83/* Socket memory accounting disabled? */
84static bool cgroup_memory_nosocket;
85
83/* Whether the swap controller is active */ 86/* Whether the swap controller is active */
84#ifdef CONFIG_MEMCG_SWAP 87#ifdef CONFIG_MEMCG_SWAP
85int do_swap_account __read_mostly; 88int do_swap_account __read_mostly;
@@ -1945,6 +1948,26 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
1945 return NOTIFY_OK; 1948 return NOTIFY_OK;
1946} 1949}
1947 1950
1951static void reclaim_high(struct mem_cgroup *memcg,
1952 unsigned int nr_pages,
1953 gfp_t gfp_mask)
1954{
1955 do {
1956 if (page_counter_read(&memcg->memory) <= memcg->high)
1957 continue;
1958 mem_cgroup_events(memcg, MEMCG_HIGH, 1);
1959 try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
1960 } while ((memcg = parent_mem_cgroup(memcg)));
1961}
1962
1963static void high_work_func(struct work_struct *work)
1964{
1965 struct mem_cgroup *memcg;
1966
1967 memcg = container_of(work, struct mem_cgroup, high_work);
1968 reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL);
1969}
1970
1948/* 1971/*
1949 * Scheduled by try_charge() to be executed from the userland return path 1972 * Scheduled by try_charge() to be executed from the userland return path
1950 * and reclaims memory over the high limit. 1973 * and reclaims memory over the high limit.
@@ -1952,20 +1975,13 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
1952void mem_cgroup_handle_over_high(void) 1975void mem_cgroup_handle_over_high(void)
1953{ 1976{
1954 unsigned int nr_pages = current->memcg_nr_pages_over_high; 1977 unsigned int nr_pages = current->memcg_nr_pages_over_high;
1955 struct mem_cgroup *memcg, *pos; 1978 struct mem_cgroup *memcg;
1956 1979
1957 if (likely(!nr_pages)) 1980 if (likely(!nr_pages))
1958 return; 1981 return;
1959 1982
1960 pos = memcg = get_mem_cgroup_from_mm(current->mm); 1983 memcg = get_mem_cgroup_from_mm(current->mm);
1961 1984 reclaim_high(memcg, nr_pages, GFP_KERNEL);
1962 do {
1963 if (page_counter_read(&pos->memory) <= pos->high)
1964 continue;
1965 mem_cgroup_events(pos, MEMCG_HIGH, 1);
1966 try_to_free_mem_cgroup_pages(pos, nr_pages, GFP_KERNEL, true);
1967 } while ((pos = parent_mem_cgroup(pos)));
1968
1969 css_put(&memcg->css); 1985 css_put(&memcg->css);
1970 current->memcg_nr_pages_over_high = 0; 1986 current->memcg_nr_pages_over_high = 0;
1971} 1987}
@@ -2100,6 +2116,11 @@ done_restock:
2100 */ 2116 */
2101 do { 2117 do {
2102 if (page_counter_read(&memcg->memory) > memcg->high) { 2118 if (page_counter_read(&memcg->memory) > memcg->high) {
2119 /* Don't bother a random interrupted task */
2120 if (in_interrupt()) {
2121 schedule_work(&memcg->high_work);
2122 break;
2123 }
2103 current->memcg_nr_pages_over_high += batch; 2124 current->memcg_nr_pages_over_high += batch;
2104 set_notify_resume(current); 2125 set_notify_resume(current);
2105 break; 2126 break;
@@ -4150,6 +4171,8 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
4150{ 4171{
4151 int node; 4172 int node;
4152 4173
4174 cancel_work_sync(&memcg->high_work);
4175
4153 mem_cgroup_remove_from_trees(memcg); 4176 mem_cgroup_remove_from_trees(memcg);
4154 4177
4155 for_each_node(node) 4178 for_each_node(node)
@@ -4196,6 +4219,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
4196 page_counter_init(&memcg->kmem, NULL); 4219 page_counter_init(&memcg->kmem, NULL);
4197 } 4220 }
4198 4221
4222 INIT_WORK(&memcg->high_work, high_work_func);
4199 memcg->last_scanned_node = MAX_NUMNODES; 4223 memcg->last_scanned_node = MAX_NUMNODES;
4200 INIT_LIST_HEAD(&memcg->oom_notify); 4224 INIT_LIST_HEAD(&memcg->oom_notify);
4201 memcg->move_charge_at_immigrate = 0; 4225 memcg->move_charge_at_immigrate = 0;
@@ -4267,6 +4291,11 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
4267 if (ret) 4291 if (ret)
4268 return ret; 4292 return ret;
4269 4293
4294#ifdef CONFIG_INET
4295 if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
4296 static_key_slow_inc(&memcg_sockets_enabled_key);
4297#endif
4298
4270 /* 4299 /*
4271 * Make sure the memcg is initialized: mem_cgroup_iter() 4300 * Make sure the memcg is initialized: mem_cgroup_iter()
4272 * orders reading memcg->initialized against its callers 4301 * orders reading memcg->initialized against its callers
@@ -4313,6 +4342,10 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
4313 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 4342 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
4314 4343
4315 memcg_destroy_kmem(memcg); 4344 memcg_destroy_kmem(memcg);
4345#ifdef CONFIG_INET
4346 if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
4347 static_key_slow_dec(&memcg_sockets_enabled_key);
4348#endif
4316 __mem_cgroup_free(memcg); 4349 __mem_cgroup_free(memcg);
4317} 4350}
4318 4351
@@ -5533,8 +5566,7 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
5533 commit_charge(newpage, memcg, true); 5566 commit_charge(newpage, memcg, true);
5534} 5567}
5535 5568
5536/* Writing them here to avoid exposing memcg's inner layout */ 5569#ifdef CONFIG_INET
5537#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
5538 5570
5539struct static_key memcg_sockets_enabled_key; 5571struct static_key memcg_sockets_enabled_key;
5540EXPORT_SYMBOL(memcg_sockets_enabled_key); 5572EXPORT_SYMBOL(memcg_sockets_enabled_key);
@@ -5559,10 +5591,15 @@ void sock_update_memcg(struct sock *sk)
5559 5591
5560 rcu_read_lock(); 5592 rcu_read_lock();
5561 memcg = mem_cgroup_from_task(current); 5593 memcg = mem_cgroup_from_task(current);
5562 if (memcg != root_mem_cgroup && 5594 if (memcg == root_mem_cgroup)
5563 memcg->tcp_mem.active && 5595 goto out;
5564 css_tryget_online(&memcg->css)) 5596#ifdef CONFIG_MEMCG_KMEM
5597 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && !memcg->tcp_mem.active)
5598 goto out;
5599#endif
5600 if (css_tryget_online(&memcg->css))
5565 sk->sk_memcg = memcg; 5601 sk->sk_memcg = memcg;
5602out:
5566 rcu_read_unlock(); 5603 rcu_read_unlock();
5567} 5604}
5568EXPORT_SYMBOL(sock_update_memcg); 5605EXPORT_SYMBOL(sock_update_memcg);
@@ -5583,15 +5620,30 @@ void sock_release_memcg(struct sock *sk)
5583 */ 5620 */
5584bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) 5621bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
5585{ 5622{
5586 struct page_counter *counter; 5623 gfp_t gfp_mask = GFP_KERNEL;
5587 5624
5588 if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated, 5625#ifdef CONFIG_MEMCG_KMEM
5589 nr_pages, &counter)) { 5626 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
5590 memcg->tcp_mem.memory_pressure = 0; 5627 struct page_counter *counter;
5591 return true; 5628
5629 if (page_counter_try_charge(&memcg->tcp_mem.memory_allocated,
5630 nr_pages, &counter)) {
5631 memcg->tcp_mem.memory_pressure = 0;
5632 return true;
5633 }
5634 page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages);
5635 memcg->tcp_mem.memory_pressure = 1;
5636 return false;
5592 } 5637 }
5593 page_counter_charge(&memcg->tcp_mem.memory_allocated, nr_pages); 5638#endif
5594 memcg->tcp_mem.memory_pressure = 1; 5639 /* Don't block in the packet receive path */
5640 if (in_softirq())
5641 gfp_mask = GFP_NOWAIT;
5642
5643 if (try_charge(memcg, gfp_mask, nr_pages) == 0)
5644 return true;
5645
5646 try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages);
5595 return false; 5647 return false;
5596} 5648}
5597 5649
@@ -5602,10 +5654,32 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
5602 */ 5654 */
5603void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) 5655void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
5604{ 5656{
5605 page_counter_uncharge(&memcg->tcp_mem.memory_allocated, nr_pages); 5657#ifdef CONFIG_MEMCG_KMEM
5658 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
5659 page_counter_uncharge(&memcg->tcp_mem.memory_allocated,
5660 nr_pages);
5661 return;
5662 }
5663#endif
5664 page_counter_uncharge(&memcg->memory, nr_pages);
5665 css_put_many(&memcg->css, nr_pages);
5606} 5666}
5607 5667
5608#endif 5668#endif /* CONFIG_INET */
5669
5670static int __init cgroup_memory(char *s)
5671{
5672 char *token;
5673
5674 while ((token = strsep(&s, ",")) != NULL) {
5675 if (!*token)
5676 continue;
5677 if (!strcmp(token, "nosocket"))
5678 cgroup_memory_nosocket = true;
5679 }
5680 return 0;
5681}
5682__setup("cgroup.memory=", cgroup_memory);
5609 5683
5610/* 5684/*
5611 * subsys_initcall() for memory controller. 5685 * subsys_initcall() for memory controller.