diff options
author | Glauber Costa <glommer@parallels.com> | 2012-12-18 17:22:00 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-18 18:02:12 -0500 |
commit | 6a1a0d3b625a4091e7a0eb249aefc6a644385149 (patch) | |
tree | dcffaa5af65847680d1acdf7b4956d75a66e5885 /mm | |
parent | 7ae1e1d0f8ac2927ed7e3ca6d15e42d485903459 (diff) |
mm: allocate kernel pages to the right memcg
When a process tries to allocate a page with the __GFP_KMEMCG flag, the
page allocator will call the corresponding memcg functions to validate
the allocation. Tasks in the root memcg can always proceed.
To avoid adding markers to the page - and a kmem flag that would
necessarily follow, as much as doing page_cgroup lookups for no reason,
whoever is marking its allocations with __GFP_KMEMCG flag is responsible
for telling the page allocator that this is such an allocation at
free_pages() time. This is done by the invocation of
__free_accounted_pages() and free_accounted_pages().
Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/page_alloc.c | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 62496edbd8dd..2ad2ad168efe 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2612,6 +2612,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2612 | int migratetype = allocflags_to_migratetype(gfp_mask); | 2612 | int migratetype = allocflags_to_migratetype(gfp_mask); |
2613 | unsigned int cpuset_mems_cookie; | 2613 | unsigned int cpuset_mems_cookie; |
2614 | int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; | 2614 | int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; |
2615 | struct mem_cgroup *memcg = NULL; | ||
2615 | 2616 | ||
2616 | gfp_mask &= gfp_allowed_mask; | 2617 | gfp_mask &= gfp_allowed_mask; |
2617 | 2618 | ||
@@ -2630,6 +2631,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2630 | if (unlikely(!zonelist->_zonerefs->zone)) | 2631 | if (unlikely(!zonelist->_zonerefs->zone)) |
2631 | return NULL; | 2632 | return NULL; |
2632 | 2633 | ||
2634 | /* | ||
2635 | * Will only have any effect when __GFP_KMEMCG is set. This is | ||
2636 | * verified in the (always inline) callee | ||
2637 | */ | ||
2638 | if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order)) | ||
2639 | return NULL; | ||
2640 | |||
2633 | retry_cpuset: | 2641 | retry_cpuset: |
2634 | cpuset_mems_cookie = get_mems_allowed(); | 2642 | cpuset_mems_cookie = get_mems_allowed(); |
2635 | 2643 | ||
@@ -2665,6 +2673,8 @@ out: | |||
2665 | if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) | 2673 | if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) |
2666 | goto retry_cpuset; | 2674 | goto retry_cpuset; |
2667 | 2675 | ||
2676 | memcg_kmem_commit_charge(page, memcg, order); | ||
2677 | |||
2668 | return page; | 2678 | return page; |
2669 | } | 2679 | } |
2670 | EXPORT_SYMBOL(__alloc_pages_nodemask); | 2680 | EXPORT_SYMBOL(__alloc_pages_nodemask); |
@@ -2717,6 +2727,31 @@ void free_pages(unsigned long addr, unsigned int order) | |||
2717 | 2727 | ||
2718 | EXPORT_SYMBOL(free_pages); | 2728 | EXPORT_SYMBOL(free_pages); |
2719 | 2729 | ||
2730 | /* | ||
2731 | * __free_memcg_kmem_pages and free_memcg_kmem_pages will free | ||
2732 | * pages allocated with __GFP_KMEMCG. | ||
2733 | * | ||
2734 | * Those pages are accounted to a particular memcg, embedded in the | ||
2735 | * corresponding page_cgroup. To avoid adding a hit in the allocator to search | ||
2736 | * for that information only to find out that it is NULL for users who have no | ||
2737 | * interest in that whatsoever, we provide these functions. | ||
2738 | * | ||
2739 | * The caller knows better which flags it relies on. | ||
2740 | */ | ||
2741 | void __free_memcg_kmem_pages(struct page *page, unsigned int order) | ||
2742 | { | ||
2743 | memcg_kmem_uncharge_pages(page, order); | ||
2744 | __free_pages(page, order); | ||
2745 | } | ||
2746 | |||
2747 | void free_memcg_kmem_pages(unsigned long addr, unsigned int order) | ||
2748 | { | ||
2749 | if (addr != 0) { | ||
2750 | VM_BUG_ON(!virt_addr_valid((void *)addr)); | ||
2751 | __free_memcg_kmem_pages(virt_to_page((void *)addr), order); | ||
2752 | } | ||
2753 | } | ||
2754 | |||
2720 | static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size) | 2755 | static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size) |
2721 | { | 2756 | { |
2722 | if (addr) { | 2757 | if (addr) { |