aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@virtuozzo.com>2016-07-26 18:24:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-26 19:19:19 -0400
commit4949148ad433f6f11cf837978b2907092ec99f3a (patch)
tree9ae57d8b9d040aaa66c51ce3e62debced020094a
parent452647784b2fccfdeeb976f6f842c6719fb2daac (diff)
mm: charge/uncharge kmemcg from generic page allocator paths
Currently, to charge a non-slab allocation to kmemcg one has to use alloc_kmem_pages helper with __GFP_ACCOUNT flag. A page allocated with this helper should finally be freed using free_kmem_pages, otherwise it won't be uncharged. This API suits its current users fine, but it turns out to be impossible to use along with page reference counting, i.e. when an allocation is supposed to be freed with put_page, as it is the case with pipe or unix socket buffers. To overcome this limitation, this patch moves charging/uncharging to generic page allocator paths, i.e. to __alloc_pages_nodemask and free_pages_prepare, and zaps alloc/free_kmem_pages helpers. This way, one can use any of the available page allocation functions to get the allocated page charged to kmemcg - it's enough to pass __GFP_ACCOUNT, just like in case of kmalloc and friends. A charged page will be automatically uncharged on free. To make it possible, we need to mark pages charged to kmemcg somehow. To avoid introducing a new page flag, we make use of page->_mapcount for marking such pages. Since pages charged to kmemcg are not supposed to be mapped to userspace, it should work just fine. There are other (ab)users of page->_mapcount - buddy and balloon pages - but we don't conflict with them. In case kmemcg is compiled out or not used at runtime, this patch introduces no overhead to generic page allocator paths. If kmemcg is used, it will be plus one gfp flags check on alloc and plus one page->_mapcount check on free, which shouldn't hurt performance, because the data accessed are hot. Link: http://lkml.kernel.org/r/a9736d856f895bcb465d9f257b54efe32eda6f99.1464079538.git.vdavydov@virtuozzo.com Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/gfp.h10
-rw-r--r--include/linux/page-flags.h7
-rw-r--r--kernel/fork.c6
-rw-r--r--mm/page_alloc.c66
-rw-r--r--mm/slab_common.c2
-rw-r--r--mm/slub.c6
-rw-r--r--mm/vmalloc.c6
7 files changed, 31 insertions, 72 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 570383a41853..c29e9d347bc6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -78,8 +78,7 @@ struct vm_area_struct;
78 * __GFP_THISNODE forces the allocation to be satisified from the requested 78 * __GFP_THISNODE forces the allocation to be satisified from the requested
79 * node with no fallbacks or placement policy enforcements. 79 * node with no fallbacks or placement policy enforcements.
80 * 80 *
81 * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg (only relevant 81 * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
82 * to kmem allocations).
83 */ 82 */
84#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) 83#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
85#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) 84#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE)
@@ -486,10 +485,6 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
486#define alloc_page_vma_node(gfp_mask, vma, addr, node) \ 485#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
487 alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) 486 alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
488 487
489extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
490extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
491 unsigned int order);
492
493extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); 488extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
494extern unsigned long get_zeroed_page(gfp_t gfp_mask); 489extern unsigned long get_zeroed_page(gfp_t gfp_mask);
495 490
@@ -513,9 +508,6 @@ extern void *__alloc_page_frag(struct page_frag_cache *nc,
513 unsigned int fragsz, gfp_t gfp_mask); 508 unsigned int fragsz, gfp_t gfp_mask);
514extern void __free_page_frag(void *addr); 509extern void __free_page_frag(void *addr);
515 510
516extern void __free_kmem_pages(struct page *page, unsigned int order);
517extern void free_kmem_pages(unsigned long addr, unsigned int order);
518
519#define __free_page(page) __free_pages((page), 0) 511#define __free_page(page) __free_pages((page), 0)
520#define free_page(addr) free_pages((addr), 0) 512#define free_page(addr) free_pages((addr), 0)
521 513
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 96084ee74ee8..7c8e82ac2eb7 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -641,6 +641,13 @@ PAGE_MAPCOUNT_OPS(Buddy, BUDDY)
641#define PAGE_BALLOON_MAPCOUNT_VALUE (-256) 641#define PAGE_BALLOON_MAPCOUNT_VALUE (-256)
642PAGE_MAPCOUNT_OPS(Balloon, BALLOON) 642PAGE_MAPCOUNT_OPS(Balloon, BALLOON)
643 643
644/*
645 * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on
646 * pages allocated with __GFP_ACCOUNT. It gets cleared on page free.
647 */
648#define PAGE_KMEMCG_MAPCOUNT_VALUE (-512)
649PAGE_MAPCOUNT_OPS(Kmemcg, KMEMCG)
650
644extern bool is_free_buddy_page(struct page *page); 651extern bool is_free_buddy_page(struct page *page);
645 652
646__PAGEFLAG(Isolated, isolated, PF_ANY); 653__PAGEFLAG(Isolated, isolated, PF_ANY);
diff --git a/kernel/fork.c b/kernel/fork.c
index 4a7ec0c6c88c..de21f25e0d2c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -162,8 +162,8 @@ void __weak arch_release_thread_stack(unsigned long *stack)
162static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, 162static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
163 int node) 163 int node)
164{ 164{
165 struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP, 165 struct page *page = alloc_pages_node(node, THREADINFO_GFP,
166 THREAD_SIZE_ORDER); 166 THREAD_SIZE_ORDER);
167 167
168 if (page) 168 if (page)
169 memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK, 169 memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
@@ -178,7 +178,7 @@ static inline void free_thread_stack(unsigned long *stack)
178 178
179 memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK, 179 memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
180 -(1 << THREAD_SIZE_ORDER)); 180 -(1 << THREAD_SIZE_ORDER));
181 __free_kmem_pages(page, THREAD_SIZE_ORDER); 181 __free_pages(page, THREAD_SIZE_ORDER);
182} 182}
183# else 183# else
184static struct kmem_cache *thread_stack_cache; 184static struct kmem_cache *thread_stack_cache;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index de2491c42d4f..7023a31edc5c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -63,6 +63,7 @@
63#include <linux/sched/rt.h> 63#include <linux/sched/rt.h>
64#include <linux/page_owner.h> 64#include <linux/page_owner.h>
65#include <linux/kthread.h> 65#include <linux/kthread.h>
66#include <linux/memcontrol.h>
66 67
67#include <asm/sections.h> 68#include <asm/sections.h>
68#include <asm/tlbflush.h> 69#include <asm/tlbflush.h>
@@ -1018,6 +1019,10 @@ static __always_inline bool free_pages_prepare(struct page *page,
1018 } 1019 }
1019 if (PageMappingFlags(page)) 1020 if (PageMappingFlags(page))
1020 page->mapping = NULL; 1021 page->mapping = NULL;
1022 if (memcg_kmem_enabled() && PageKmemcg(page)) {
1023 memcg_kmem_uncharge(page, order);
1024 __ClearPageKmemcg(page);
1025 }
1021 if (check_free) 1026 if (check_free)
1022 bad += free_pages_check(page); 1027 bad += free_pages_check(page);
1023 if (bad) 1028 if (bad)
@@ -3841,6 +3846,14 @@ no_zone:
3841 } 3846 }
3842 3847
3843out: 3848out:
3849 if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page) {
3850 if (unlikely(memcg_kmem_charge(page, gfp_mask, order))) {
3851 __free_pages(page, order);
3852 page = NULL;
3853 } else
3854 __SetPageKmemcg(page);
3855 }
3856
3844 if (kmemcheck_enabled && page) 3857 if (kmemcheck_enabled && page)
3845 kmemcheck_pagealloc_alloc(page, order, gfp_mask); 3858 kmemcheck_pagealloc_alloc(page, order, gfp_mask);
3846 3859
@@ -3996,59 +4009,6 @@ void __free_page_frag(void *addr)
3996} 4009}
3997EXPORT_SYMBOL(__free_page_frag); 4010EXPORT_SYMBOL(__free_page_frag);
3998 4011
3999/*
4000 * alloc_kmem_pages charges newly allocated pages to the kmem resource counter
4001 * of the current memory cgroup if __GFP_ACCOUNT is set, other than that it is
4002 * equivalent to alloc_pages.
4003 *
4004 * It should be used when the caller would like to use kmalloc, but since the
4005 * allocation is large, it has to fall back to the page allocator.
4006 */
4007struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
4008{
4009 struct page *page;
4010
4011 page = alloc_pages(gfp_mask, order);
4012 if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) &&
4013 page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
4014 __free_pages(page, order);
4015 page = NULL;
4016 }
4017 return page;
4018}
4019
4020struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
4021{
4022 struct page *page;
4023
4024 page = alloc_pages_node(nid, gfp_mask, order);
4025 if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) &&
4026 page && memcg_kmem_charge(page, gfp_mask, order) != 0) {
4027 __free_pages(page, order);
4028 page = NULL;
4029 }
4030 return page;
4031}
4032
4033/*
4034 * __free_kmem_pages and free_kmem_pages will free pages allocated with
4035 * alloc_kmem_pages.
4036 */
4037void __free_kmem_pages(struct page *page, unsigned int order)
4038{
4039 if (memcg_kmem_enabled())
4040 memcg_kmem_uncharge(page, order);
4041 __free_pages(page, order);
4042}
4043
4044void free_kmem_pages(unsigned long addr, unsigned int order)
4045{
4046 if (addr != 0) {
4047 VM_BUG_ON(!virt_addr_valid((void *)addr));
4048 __free_kmem_pages(virt_to_page((void *)addr), order);
4049 }
4050}
4051
4052static void *make_alloc_exact(unsigned long addr, unsigned int order, 4012static void *make_alloc_exact(unsigned long addr, unsigned int order,
4053 size_t size) 4013 size_t size)
4054{ 4014{
diff --git a/mm/slab_common.c b/mm/slab_common.c
index da88c1588752..71f0b28a1bec 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1012,7 +1012,7 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
1012 struct page *page; 1012 struct page *page;
1013 1013
1014 flags |= __GFP_COMP; 1014 flags |= __GFP_COMP;
1015 page = alloc_kmem_pages(flags, order); 1015 page = alloc_pages(flags, order);
1016 ret = page ? page_address(page) : NULL; 1016 ret = page ? page_address(page) : NULL;
1017 kmemleak_alloc(ret, size, 1, flags); 1017 kmemleak_alloc(ret, size, 1, flags);
1018 kasan_kmalloc_large(ret, size, flags); 1018 kasan_kmalloc_large(ret, size, flags);
diff --git a/mm/slub.c b/mm/slub.c
index c0cfa2722539..f9da8716b8b3 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2977,7 +2977,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
2977 if (unlikely(!PageSlab(page))) { 2977 if (unlikely(!PageSlab(page))) {
2978 BUG_ON(!PageCompound(page)); 2978 BUG_ON(!PageCompound(page));
2979 kfree_hook(object); 2979 kfree_hook(object);
2980 __free_kmem_pages(page, compound_order(page)); 2980 __free_pages(page, compound_order(page));
2981 p[size] = NULL; /* mark object processed */ 2981 p[size] = NULL; /* mark object processed */
2982 return size; 2982 return size;
2983 } 2983 }
@@ -3693,7 +3693,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3693 void *ptr = NULL; 3693 void *ptr = NULL;
3694 3694
3695 flags |= __GFP_COMP | __GFP_NOTRACK; 3695 flags |= __GFP_COMP | __GFP_NOTRACK;
3696 page = alloc_kmem_pages_node(node, flags, get_order(size)); 3696 page = alloc_pages_node(node, flags, get_order(size));
3697 if (page) 3697 if (page)
3698 ptr = page_address(page); 3698 ptr = page_address(page);
3699 3699
@@ -3774,7 +3774,7 @@ void kfree(const void *x)
3774 if (unlikely(!PageSlab(page))) { 3774 if (unlikely(!PageSlab(page))) {
3775 BUG_ON(!PageCompound(page)); 3775 BUG_ON(!PageCompound(page));
3776 kfree_hook(x); 3776 kfree_hook(x);
3777 __free_kmem_pages(page, compound_order(page)); 3777 __free_pages(page, compound_order(page));
3778 return; 3778 return;
3779 } 3779 }
3780 slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_); 3780 slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index e11475cdeb7a..91f44e78c516 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1501,7 +1501,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
1501 struct page *page = area->pages[i]; 1501 struct page *page = area->pages[i];
1502 1502
1503 BUG_ON(!page); 1503 BUG_ON(!page);
1504 __free_kmem_pages(page, 0); 1504 __free_pages(page, 0);
1505 } 1505 }
1506 1506
1507 kvfree(area->pages); 1507 kvfree(area->pages);
@@ -1629,9 +1629,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1629 struct page *page; 1629 struct page *page;
1630 1630
1631 if (node == NUMA_NO_NODE) 1631 if (node == NUMA_NO_NODE)
1632 page = alloc_kmem_pages(alloc_mask, order); 1632 page = alloc_pages(alloc_mask, order);
1633 else 1633 else
1634 page = alloc_kmem_pages_node(node, alloc_mask, order); 1634 page = alloc_pages_node(node, alloc_mask, order);
1635 1635
1636 if (unlikely(!page)) { 1636 if (unlikely(!page)) {
1637 /* Successfully allocated i pages, free them in __vunmap() */ 1637 /* Successfully allocated i pages, free them in __vunmap() */