aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorGlauber Costa <glommer@parallels.com>2012-12-18 17:21:56 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-18 18:02:12 -0500
commit7ae1e1d0f8ac2927ed7e3ca6d15e42d485903459 (patch)
tree6b95f008400510bee9a7742ee21bf5316a59f851 /mm
parent7a64bf05b2a6fe3703062d13d389e3eb904741c6 (diff)
memcg: kmem controller infrastructure
Introduce infrastructure for tracking kernel memory pages to a given memcg. This will happen whenever the caller includes the flag __GFP_KMEMCG flag, and the task belong to a memcg other than the root. In memcontrol.h those functions are wrapped in inline acessors. The idea is to later on, patch those with static branches, so we don't incur any overhead when no mem cgroups with limited kmem are being used. Users of this functionality shall interact with the memcg core code through the following functions: memcg_kmem_newpage_charge: will return true if the group can handle the allocation. At this point, struct page is not yet allocated. memcg_kmem_commit_charge: will either revert the charge, if struct page allocation failed, or embed memcg information into page_cgroup. memcg_kmem_uncharge_page: called at free time, will revert the charge. Signed-off-by: Glauber Costa <glommer@parallels.com> Acked-by: Michal Hocko <mhocko@suse.cz> Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Tejun Heo <tj@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Frederic Weisbecker <fweisbec@redhat.com> Cc: Greg Thelen <gthelen@google.com> Cc: JoonSoo Kim <js1304@gmail.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Rik van Riel <riel@redhat.com> Cc: Suleiman Souhlal <suleiman@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c170
1 files changed, 170 insertions, 0 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bba1cb4bbb82..b9afa060b8d6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -10,6 +10,10 @@
10 * Copyright (C) 2009 Nokia Corporation 10 * Copyright (C) 2009 Nokia Corporation
11 * Author: Kirill A. Shutemov 11 * Author: Kirill A. Shutemov
12 * 12 *
13 * Kernel Memory Controller
14 * Copyright (C) 2012 Parallels Inc. and Google Inc.
15 * Authors: Glauber Costa and Suleiman Souhlal
16 *
13 * This program is free software; you can redistribute it and/or modify 17 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by 18 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or 19 * the Free Software Foundation; either version 2 of the License, or
@@ -2661,6 +2665,172 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2661 memcg_check_events(memcg, page); 2665 memcg_check_events(memcg, page);
2662} 2666}
2663 2667
2668#ifdef CONFIG_MEMCG_KMEM
2669static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
2670{
2671 return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) &&
2672 (memcg->kmem_account_flags & KMEM_ACCOUNTED_MASK);
2673}
2674
2675static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
2676{
2677 struct res_counter *fail_res;
2678 struct mem_cgroup *_memcg;
2679 int ret = 0;
2680 bool may_oom;
2681
2682 ret = res_counter_charge(&memcg->kmem, size, &fail_res);
2683 if (ret)
2684 return ret;
2685
2686 /*
2687 * Conditions under which we can wait for the oom_killer. Those are
2688 * the same conditions tested by the core page allocator
2689 */
2690 may_oom = (gfp & __GFP_FS) && !(gfp & __GFP_NORETRY);
2691
2692 _memcg = memcg;
2693 ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT,
2694 &_memcg, may_oom);
2695
2696 if (ret == -EINTR) {
2697 /*
2698 * __mem_cgroup_try_charge() chosed to bypass to root due to
2699 * OOM kill or fatal signal. Since our only options are to
2700 * either fail the allocation or charge it to this cgroup, do
2701 * it as a temporary condition. But we can't fail. From a
2702 * kmem/slab perspective, the cache has already been selected,
2703 * by mem_cgroup_kmem_get_cache(), so it is too late to change
2704 * our minds.
2705 *
2706 * This condition will only trigger if the task entered
2707 * memcg_charge_kmem in a sane state, but was OOM-killed during
2708 * __mem_cgroup_try_charge() above. Tasks that were already
2709 * dying when the allocation triggers should have been already
2710 * directed to the root cgroup in memcontrol.h
2711 */
2712 res_counter_charge_nofail(&memcg->res, size, &fail_res);
2713 if (do_swap_account)
2714 res_counter_charge_nofail(&memcg->memsw, size,
2715 &fail_res);
2716 ret = 0;
2717 } else if (ret)
2718 res_counter_uncharge(&memcg->kmem, size);
2719
2720 return ret;
2721}
2722
2723static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
2724{
2725 res_counter_uncharge(&memcg->kmem, size);
2726 res_counter_uncharge(&memcg->res, size);
2727 if (do_swap_account)
2728 res_counter_uncharge(&memcg->memsw, size);
2729}
2730
2731/*
2732 * We need to verify if the allocation against current->mm->owner's memcg is
2733 * possible for the given order. But the page is not allocated yet, so we'll
2734 * need a further commit step to do the final arrangements.
2735 *
2736 * It is possible for the task to switch cgroups in this mean time, so at
2737 * commit time, we can't rely on task conversion any longer. We'll then use
2738 * the handle argument to return to the caller which cgroup we should commit
2739 * against. We could also return the memcg directly and avoid the pointer
2740 * passing, but a boolean return value gives better semantics considering
2741 * the compiled-out case as well.
2742 *
2743 * Returning true means the allocation is possible.
2744 */
2745bool
2746__memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
2747{
2748 struct mem_cgroup *memcg;
2749 int ret;
2750
2751 *_memcg = NULL;
2752 memcg = try_get_mem_cgroup_from_mm(current->mm);
2753
2754 /*
2755 * very rare case described in mem_cgroup_from_task. Unfortunately there
2756 * isn't much we can do without complicating this too much, and it would
2757 * be gfp-dependent anyway. Just let it go
2758 */
2759 if (unlikely(!memcg))
2760 return true;
2761
2762 if (!memcg_can_account_kmem(memcg)) {
2763 css_put(&memcg->css);
2764 return true;
2765 }
2766
2767 mem_cgroup_get(memcg);
2768
2769 ret = memcg_charge_kmem(memcg, gfp, PAGE_SIZE << order);
2770 if (!ret)
2771 *_memcg = memcg;
2772 else
2773 mem_cgroup_put(memcg);
2774
2775 css_put(&memcg->css);
2776 return (ret == 0);
2777}
2778
2779void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
2780 int order)
2781{
2782 struct page_cgroup *pc;
2783
2784 VM_BUG_ON(mem_cgroup_is_root(memcg));
2785
2786 /* The page allocation failed. Revert */
2787 if (!page) {
2788 memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
2789 mem_cgroup_put(memcg);
2790 return;
2791 }
2792
2793 pc = lookup_page_cgroup(page);
2794 lock_page_cgroup(pc);
2795 pc->mem_cgroup = memcg;
2796 SetPageCgroupUsed(pc);
2797 unlock_page_cgroup(pc);
2798}
2799
2800void __memcg_kmem_uncharge_pages(struct page *page, int order)
2801{
2802 struct mem_cgroup *memcg = NULL;
2803 struct page_cgroup *pc;
2804
2805
2806 pc = lookup_page_cgroup(page);
2807 /*
2808 * Fast unlocked return. Theoretically might have changed, have to
2809 * check again after locking.
2810 */
2811 if (!PageCgroupUsed(pc))
2812 return;
2813
2814 lock_page_cgroup(pc);
2815 if (PageCgroupUsed(pc)) {
2816 memcg = pc->mem_cgroup;
2817 ClearPageCgroupUsed(pc);
2818 }
2819 unlock_page_cgroup(pc);
2820
2821 /*
2822 * We trust that only if there is a memcg associated with the page, it
2823 * is a valid allocation
2824 */
2825 if (!memcg)
2826 return;
2827
2828 VM_BUG_ON(mem_cgroup_is_root(memcg));
2829 memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
2830 mem_cgroup_put(memcg);
2831}
2832#endif /* CONFIG_MEMCG_KMEM */
2833
2664#ifdef CONFIG_TRANSPARENT_HUGEPAGE 2834#ifdef CONFIG_TRANSPARENT_HUGEPAGE
2665 2835
2666#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION) 2836#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)