aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorGlauber Costa <glommer@parallels.com>2012-12-18 17:22:07 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-18 18:02:13 -0500
commit7de37682bec35bbe0cd69b8112ef257bc5fb1c3e (patch)
tree09d0ff77741b1f154005fb139adb4bd8520f9870 /mm/memcontrol.c
parent50bdd430c20566b13d8bc59946184b08f5875de6 (diff)
memcg: kmem accounting lifecycle management
Because kmem charges can outlive the cgroup, we need to make sure that we won't free the memcg structure while charges are still in flight. For reviewing simplicity, the charge functions will issue mem_cgroup_get() at every charge, and mem_cgroup_put() at every uncharge. This can get expensive, however, and we can do better. mem_cgroup_get() only really needs to be issued once: when the first limit is set. In the same spirit, we only need to issue mem_cgroup_put() when the last charge is gone. We'll need an extra bit in kmem_account_flags for that: KMEM_ACCOUNTED_DEAD. it will be set when the cgroup dies, if there are charges in the group. If there aren't, we can proceed right away. Our uncharge function will have to test that bit every time the charges drop to 0. Because that is not the likely output of res_counter_uncharge, this should not impose a big hit on us: it is certainly much better than a reference count decrease at every operation. Signed-off-by: Glauber Costa <glommer@parallels.com> Acked-by: Michal Hocko <mhocko@suse.cz> Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Suleiman Souhlal <suleiman@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Frederic Weisbecker <fweisbec@redhat.com> Cc: Greg Thelen <gthelen@google.com> Cc: JoonSoo Kim <js1304@gmail.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c57
1 files changed, 50 insertions, 7 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b9afa060b8d6..9a62ac3ea881 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -346,6 +346,7 @@ struct mem_cgroup {
346/* internal only representation about the status of kmem accounting. */ 346/* internal only representation about the status of kmem accounting. */
347enum { 347enum {
348 KMEM_ACCOUNTED_ACTIVE = 0, /* accounted by this cgroup itself */ 348 KMEM_ACCOUNTED_ACTIVE = 0, /* accounted by this cgroup itself */
349 KMEM_ACCOUNTED_DEAD, /* dead memcg with pending kmem charges */
349}; 350};
350 351
351#define KMEM_ACCOUNTED_MASK (1 << KMEM_ACCOUNTED_ACTIVE) 352#define KMEM_ACCOUNTED_MASK (1 << KMEM_ACCOUNTED_ACTIVE)
@@ -355,6 +356,23 @@ static inline void memcg_kmem_set_active(struct mem_cgroup *memcg)
355{ 356{
356 set_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags); 357 set_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
357} 358}
359
360static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
361{
362 return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
363}
364
365static void memcg_kmem_mark_dead(struct mem_cgroup *memcg)
366{
367 if (test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags))
368 set_bit(KMEM_ACCOUNTED_DEAD, &memcg->kmem_account_flags);
369}
370
371static bool memcg_kmem_test_and_clear_dead(struct mem_cgroup *memcg)
372{
373 return test_and_clear_bit(KMEM_ACCOUNTED_DEAD,
374 &memcg->kmem_account_flags);
375}
358#endif 376#endif
359 377
360/* Stuffs for move charges at task migration. */ 378/* Stuffs for move charges at task migration. */
@@ -2722,10 +2740,16 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
2722 2740
2723static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size) 2741static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
2724{ 2742{
2725 res_counter_uncharge(&memcg->kmem, size);
2726 res_counter_uncharge(&memcg->res, size); 2743 res_counter_uncharge(&memcg->res, size);
2727 if (do_swap_account) 2744 if (do_swap_account)
2728 res_counter_uncharge(&memcg->memsw, size); 2745 res_counter_uncharge(&memcg->memsw, size);
2746
2747 /* Not down to 0 */
2748 if (res_counter_uncharge(&memcg->kmem, size))
2749 return;
2750
2751 if (memcg_kmem_test_and_clear_dead(memcg))
2752 mem_cgroup_put(memcg);
2729} 2753}
2730 2754
2731/* 2755/*
@@ -2764,13 +2788,9 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
2764 return true; 2788 return true;
2765 } 2789 }
2766 2790
2767 mem_cgroup_get(memcg);
2768
2769 ret = memcg_charge_kmem(memcg, gfp, PAGE_SIZE << order); 2791 ret = memcg_charge_kmem(memcg, gfp, PAGE_SIZE << order);
2770 if (!ret) 2792 if (!ret)
2771 *_memcg = memcg; 2793 *_memcg = memcg;
2772 else
2773 mem_cgroup_put(memcg);
2774 2794
2775 css_put(&memcg->css); 2795 css_put(&memcg->css);
2776 return (ret == 0); 2796 return (ret == 0);
@@ -2786,7 +2806,6 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
2786 /* The page allocation failed. Revert */ 2806 /* The page allocation failed. Revert */
2787 if (!page) { 2807 if (!page) {
2788 memcg_uncharge_kmem(memcg, PAGE_SIZE << order); 2808 memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
2789 mem_cgroup_put(memcg);
2790 return; 2809 return;
2791 } 2810 }
2792 2811
@@ -2827,7 +2846,6 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
2827 2846
2828 VM_BUG_ON(mem_cgroup_is_root(memcg)); 2847 VM_BUG_ON(mem_cgroup_is_root(memcg));
2829 memcg_uncharge_kmem(memcg, PAGE_SIZE << order); 2848 memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
2830 mem_cgroup_put(memcg);
2831} 2849}
2832#endif /* CONFIG_MEMCG_KMEM */ 2850#endif /* CONFIG_MEMCG_KMEM */
2833 2851
@@ -4217,6 +4235,13 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
4217 VM_BUG_ON(ret); 4235 VM_BUG_ON(ret);
4218 4236
4219 memcg_kmem_set_active(memcg); 4237 memcg_kmem_set_active(memcg);
4238 /*
4239 * kmem charges can outlive the cgroup. In the case of slab
4240 * pages, for instance, a page contain objects from various
4241 * processes, so it is unfeasible to migrate them away. We
4242 * need to reference count the memcg because of that.
4243 */
4244 mem_cgroup_get(memcg);
4220 } else 4245 } else
4221 ret = res_counter_set_limit(&memcg->kmem, val); 4246 ret = res_counter_set_limit(&memcg->kmem, val);
4222out: 4247out:
@@ -4232,6 +4257,10 @@ static void memcg_propagate_kmem(struct mem_cgroup *memcg)
4232 if (!parent) 4257 if (!parent)
4233 return; 4258 return;
4234 memcg->kmem_account_flags = parent->kmem_account_flags; 4259 memcg->kmem_account_flags = parent->kmem_account_flags;
4260#ifdef CONFIG_MEMCG_KMEM
4261 if (memcg_kmem_is_active(memcg))
4262 mem_cgroup_get(memcg);
4263#endif
4235} 4264}
4236 4265
4237/* 4266/*
@@ -4920,6 +4949,20 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
4920static void kmem_cgroup_destroy(struct mem_cgroup *memcg) 4949static void kmem_cgroup_destroy(struct mem_cgroup *memcg)
4921{ 4950{
4922 mem_cgroup_sockets_destroy(memcg); 4951 mem_cgroup_sockets_destroy(memcg);
4952
4953 memcg_kmem_mark_dead(memcg);
4954
4955 if (res_counter_read_u64(&memcg->kmem, RES_USAGE) != 0)
4956 return;
4957
4958 /*
4959 * Charges already down to 0, undo mem_cgroup_get() done in the charge
4960 * path here, being careful not to race with memcg_uncharge_kmem: it is
4961 * possible that the charges went down to 0 between mark_dead and the
4962 * res_counter read, so in that case, we don't need the put
4963 */
4964 if (memcg_kmem_test_and_clear_dead(memcg))
4965 mem_cgroup_put(memcg);
4923} 4966}
4924#else 4967#else
4925static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) 4968static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)