aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@parallels.com>2014-06-04 19:06:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-04 19:53:56 -0400
commit52383431b37cdbec63944e953ffc2698a7ad9722 (patch)
tree5c7002b9f8723899099a6a8fb2d0039641b9ca09
parent5dfb417509921eb90ee123a4d1525e8916b4ace4 (diff)
mm: get rid of __GFP_KMEMCG
Currently to allocate a page that should be charged to kmemcg (e.g. threadinfo), we pass __GFP_KMEMCG flag to the page allocator. The page allocated is then to be freed by free_memcg_kmem_pages. Apart from looking asymmetrical, this also requires intrusion to the general allocation path. So let's introduce separate functions that will alloc/free pages charged to kmemcg. The new functions are called alloc_kmem_pages and free_kmem_pages. They should be used when the caller actually would like to use kmalloc, but has to fall back to the page allocator for the allocation is large. They only differ from alloc_pages and free_pages in that besides allocating or freeing pages they also charge them to the kmem resource counter of the current memory cgroup. [sfr@canb.auug.org.au: export kmalloc_order() to modules] Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Acked-by: Greg Thelen <gthelen@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Glauber Costa <glommer@gmail.com> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: Pekka Enberg <penberg@kernel.org> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/gfp.h10
-rw-r--r--include/linux/memcontrol.h2
-rw-r--r--include/linux/slab.h11
-rw-r--r--include/linux/thread_info.h2
-rw-r--r--include/trace/events/gfpflags.h1
-rw-r--r--kernel/fork.c6
-rw-r--r--mm/memcontrol.c11
-rw-r--r--mm/page_alloc.c56
-rw-r--r--mm/slab_common.c13
-rw-r--r--mm/slub.c6
10 files changed, 68 insertions, 50 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 39b81dc7d01a..d382db71e300 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -31,7 +31,6 @@ struct vm_area_struct;
31#define ___GFP_HARDWALL 0x20000u 31#define ___GFP_HARDWALL 0x20000u
32#define ___GFP_THISNODE 0x40000u 32#define ___GFP_THISNODE 0x40000u
33#define ___GFP_RECLAIMABLE 0x80000u 33#define ___GFP_RECLAIMABLE 0x80000u
34#define ___GFP_KMEMCG 0x100000u
35#define ___GFP_NOTRACK 0x200000u 34#define ___GFP_NOTRACK 0x200000u
36#define ___GFP_NO_KSWAPD 0x400000u 35#define ___GFP_NO_KSWAPD 0x400000u
37#define ___GFP_OTHER_NODE 0x800000u 36#define ___GFP_OTHER_NODE 0x800000u
@@ -91,7 +90,6 @@ struct vm_area_struct;
91 90
92#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) 91#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
93#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ 92#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
94#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
95#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ 93#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
96 94
97/* 95/*
@@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
353#define alloc_page_vma_node(gfp_mask, vma, addr, node) \ 351#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
354 alloc_pages_vma(gfp_mask, 0, vma, addr, node) 352 alloc_pages_vma(gfp_mask, 0, vma, addr, node)
355 353
354extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
355extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
356 unsigned int order);
357
356extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); 358extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
357extern unsigned long get_zeroed_page(gfp_t gfp_mask); 359extern unsigned long get_zeroed_page(gfp_t gfp_mask);
358 360
@@ -372,8 +374,8 @@ extern void free_pages(unsigned long addr, unsigned int order);
372extern void free_hot_cold_page(struct page *page, int cold); 374extern void free_hot_cold_page(struct page *page, int cold);
373extern void free_hot_cold_page_list(struct list_head *list, int cold); 375extern void free_hot_cold_page_list(struct list_head *list, int cold);
374 376
375extern void __free_memcg_kmem_pages(struct page *page, unsigned int order); 377extern void __free_kmem_pages(struct page *page, unsigned int order);
376extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order); 378extern void free_kmem_pages(unsigned long addr, unsigned int order);
377 379
378#define __free_page(page) __free_pages((page), 0) 380#define __free_page(page) __free_pages((page), 0)
379#define free_page(addr) free_pages((addr), 0) 381#define free_page(addr) free_pages((addr), 0)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 96e5d2573eb0..5155d09e749d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -537,7 +537,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
537 * res_counter_charge_nofail, but we hope those allocations are rare, 537 * res_counter_charge_nofail, but we hope those allocations are rare,
538 * and won't be worth the trouble. 538 * and won't be worth the trouble.
539 */ 539 */
540 if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL)) 540 if (gfp & __GFP_NOFAIL)
541 return true; 541 return true;
542 if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) 542 if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
543 return true; 543 return true;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 307bfbe62387..a6aab2c0dfc5 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -369,16 +369,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
369#include <linux/slub_def.h> 369#include <linux/slub_def.h>
370#endif 370#endif
371 371
372static __always_inline void * 372extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
373kmalloc_order(size_t size, gfp_t flags, unsigned int order)
374{
375 void *ret;
376
377 flags |= (__GFP_COMP | __GFP_KMEMCG);
378 ret = (void *) __get_free_pages(flags, order);
379 kmemleak_alloc(ret, size, 1, flags);
380 return ret;
381}
382 373
383#ifdef CONFIG_TRACING 374#ifdef CONFIG_TRACING
384extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order); 375extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index cb0cec94fda3..ff307b548ed3 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -61,8 +61,6 @@ extern long do_no_restart_syscall(struct restart_block *parm);
61# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK) 61# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
62#endif 62#endif
63 63
64#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
65
66/* 64/*
67 * flag set/clear/test wrappers 65 * flag set/clear/test wrappers
68 * - pass TIF_xxxx constants to these functions 66 * - pass TIF_xxxx constants to these functions
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index 1eddbf1557f2..d6fd8e5b14b7 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -34,7 +34,6 @@
34 {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \ 34 {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
35 {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \ 35 {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
36 {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ 36 {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
37 {(unsigned long)__GFP_KMEMCG, "GFP_KMEMCG"}, \
38 {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ 37 {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
39 {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ 38 {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
40 {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \ 39 {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \
diff --git a/kernel/fork.c b/kernel/fork.c
index 54a8d26f612f..59e3dcc5b8f2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -150,15 +150,15 @@ void __weak arch_release_thread_info(struct thread_info *ti)
150static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, 150static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
151 int node) 151 int node)
152{ 152{
153 struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED, 153 struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
154 THREAD_SIZE_ORDER); 154 THREAD_SIZE_ORDER);
155 155
156 return page ? page_address(page) : NULL; 156 return page ? page_address(page) : NULL;
157} 157}
158 158
159static inline void free_thread_info(struct thread_info *ti) 159static inline void free_thread_info(struct thread_info *ti)
160{ 160{
161 free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER); 161 free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
162} 162}
163# else 163# else
164static struct kmem_cache *thread_info_cache; 164static struct kmem_cache *thread_info_cache;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 56a768b3d5a8..7bab1de50f48 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3540,11 +3540,12 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
3540 /* 3540 /*
3541 * Disabling accounting is only relevant for some specific memcg 3541 * Disabling accounting is only relevant for some specific memcg
3542 * internal allocations. Therefore we would initially not have such 3542 * internal allocations. Therefore we would initially not have such
3543 * check here, since direct calls to the page allocator that are marked 3543 * check here, since direct calls to the page allocator that are
3544 * with GFP_KMEMCG only happen outside memcg core. We are mostly 3544 * accounted to kmemcg (alloc_kmem_pages and friends) only happen
3545 * concerned with cache allocations, and by having this test at 3545 * outside memcg core. We are mostly concerned with cache allocations,
3546 * memcg_kmem_get_cache, we are already able to relay the allocation to 3546 * and by having this test at memcg_kmem_get_cache, we are already able
3547 * the root cache and bypass the memcg cache altogether. 3547 * to relay the allocation to the root cache and bypass the memcg cache
3548 * altogether.
3548 * 3549 *
3549 * There is one exception, though: the SLUB allocator does not create 3550 * There is one exception, though: the SLUB allocator does not create
3550 * large order caches, but rather service large kmallocs directly from 3551 * large order caches, but rather service large kmallocs directly from
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5dba2933c9c0..7cfdcd808f52 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2697,7 +2697,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2697 int migratetype = allocflags_to_migratetype(gfp_mask); 2697 int migratetype = allocflags_to_migratetype(gfp_mask);
2698 unsigned int cpuset_mems_cookie; 2698 unsigned int cpuset_mems_cookie;
2699 int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR; 2699 int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
2700 struct mem_cgroup *memcg = NULL;
2701 2700
2702 gfp_mask &= gfp_allowed_mask; 2701 gfp_mask &= gfp_allowed_mask;
2703 2702
@@ -2716,13 +2715,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2716 if (unlikely(!zonelist->_zonerefs->zone)) 2715 if (unlikely(!zonelist->_zonerefs->zone))
2717 return NULL; 2716 return NULL;
2718 2717
2719 /*
2720 * Will only have any effect when __GFP_KMEMCG is set. This is
2721 * verified in the (always inline) callee
2722 */
2723 if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
2724 return NULL;
2725
2726retry_cpuset: 2718retry_cpuset:
2727 cpuset_mems_cookie = read_mems_allowed_begin(); 2719 cpuset_mems_cookie = read_mems_allowed_begin();
2728 2720
@@ -2782,8 +2774,6 @@ out:
2782 if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) 2774 if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
2783 goto retry_cpuset; 2775 goto retry_cpuset;
2784 2776
2785 memcg_kmem_commit_charge(page, memcg, order);
2786
2787 return page; 2777 return page;
2788} 2778}
2789EXPORT_SYMBOL(__alloc_pages_nodemask); 2779EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -2837,27 +2827,51 @@ void free_pages(unsigned long addr, unsigned int order)
2837EXPORT_SYMBOL(free_pages); 2827EXPORT_SYMBOL(free_pages);
2838 2828
2839/* 2829/*
2840 * __free_memcg_kmem_pages and free_memcg_kmem_pages will free 2830 * alloc_kmem_pages charges newly allocated pages to the kmem resource counter
2841 * pages allocated with __GFP_KMEMCG. 2831 * of the current memory cgroup.
2842 * 2832 *
2843 * Those pages are accounted to a particular memcg, embedded in the 2833 * It should be used when the caller would like to use kmalloc, but since the
2844 * corresponding page_cgroup. To avoid adding a hit in the allocator to search 2834 * allocation is large, it has to fall back to the page allocator.
2845 * for that information only to find out that it is NULL for users who have no 2835 */
2846 * interest in that whatsoever, we provide these functions. 2836struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
2847 * 2837{
2848 * The caller knows better which flags it relies on. 2838 struct page *page;
2839 struct mem_cgroup *memcg = NULL;
2840
2841 if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
2842 return NULL;
2843 page = alloc_pages(gfp_mask, order);
2844 memcg_kmem_commit_charge(page, memcg, order);
2845 return page;
2846}
2847
2848struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
2849{
2850 struct page *page;
2851 struct mem_cgroup *memcg = NULL;
2852
2853 if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
2854 return NULL;
2855 page = alloc_pages_node(nid, gfp_mask, order);
2856 memcg_kmem_commit_charge(page, memcg, order);
2857 return page;
2858}
2859
2860/*
2861 * __free_kmem_pages and free_kmem_pages will free pages allocated with
2862 * alloc_kmem_pages.
2849 */ 2863 */
2850void __free_memcg_kmem_pages(struct page *page, unsigned int order) 2864void __free_kmem_pages(struct page *page, unsigned int order)
2851{ 2865{
2852 memcg_kmem_uncharge_pages(page, order); 2866 memcg_kmem_uncharge_pages(page, order);
2853 __free_pages(page, order); 2867 __free_pages(page, order);
2854} 2868}
2855 2869
2856void free_memcg_kmem_pages(unsigned long addr, unsigned int order) 2870void free_kmem_pages(unsigned long addr, unsigned int order)
2857{ 2871{
2858 if (addr != 0) { 2872 if (addr != 0) {
2859 VM_BUG_ON(!virt_addr_valid((void *)addr)); 2873 VM_BUG_ON(!virt_addr_valid((void *)addr));
2860 __free_memcg_kmem_pages(virt_to_page((void *)addr), order); 2874 __free_kmem_pages(virt_to_page((void *)addr), order);
2861 } 2875 }
2862} 2876}
2863 2877
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 06f0c6125632..1950c8f4d1a6 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -582,6 +582,19 @@ void __init create_kmalloc_caches(unsigned long flags)
582} 582}
583#endif /* !CONFIG_SLOB */ 583#endif /* !CONFIG_SLOB */
584 584
585void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
586{
587 void *ret;
588 struct page *page;
589
590 flags |= __GFP_COMP;
591 page = alloc_kmem_pages(flags, order);
592 ret = page ? page_address(page) : NULL;
593 kmemleak_alloc(ret, size, 1, flags);
594 return ret;
595}
596EXPORT_SYMBOL(kmalloc_order);
597
585#ifdef CONFIG_TRACING 598#ifdef CONFIG_TRACING
586void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) 599void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
587{ 600{
diff --git a/mm/slub.c b/mm/slub.c
index fc9831851be6..ddb60795f373 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3311,8 +3311,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3311 struct page *page; 3311 struct page *page;
3312 void *ptr = NULL; 3312 void *ptr = NULL;
3313 3313
3314 flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG; 3314 flags |= __GFP_COMP | __GFP_NOTRACK;
3315 page = alloc_pages_node(node, flags, get_order(size)); 3315 page = alloc_kmem_pages_node(node, flags, get_order(size));
3316 if (page) 3316 if (page)
3317 ptr = page_address(page); 3317 ptr = page_address(page);
3318 3318
@@ -3381,7 +3381,7 @@ void kfree(const void *x)
3381 if (unlikely(!PageSlab(page))) { 3381 if (unlikely(!PageSlab(page))) {
3382 BUG_ON(!PageCompound(page)); 3382 BUG_ON(!PageCompound(page));
3383 kfree_hook(x); 3383 kfree_hook(x);
3384 __free_memcg_kmem_pages(page, compound_order(page)); 3384 __free_kmem_pages(page, compound_order(page));
3385 return; 3385 return;
3386 } 3386 }
3387 slab_free(page->slab_cache, page, object, _RET_IP_); 3387 slab_free(page->slab_cache, page, object, _RET_IP_);