aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/gfp.h5
-rw-r--r--include/linux/hugetlb_cgroup.h5
-rw-r--r--include/linux/memcontrol.h209
-rw-r--r--include/linux/res_counter.h12
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/linux/slab.h48
-rw-r--r--include/linux/slab_def.h3
-rw-r--r--include/linux/slub_def.h9
-rw-r--r--include/linux/thread_info.h2
-rw-r--r--include/trace/events/gfpflags.h1
10 files changed, 286 insertions, 9 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f74856e17e48..0f615eb23d05 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -30,6 +30,7 @@ struct vm_area_struct;
30#define ___GFP_HARDWALL 0x20000u 30#define ___GFP_HARDWALL 0x20000u
31#define ___GFP_THISNODE 0x40000u 31#define ___GFP_THISNODE 0x40000u
32#define ___GFP_RECLAIMABLE 0x80000u 32#define ___GFP_RECLAIMABLE 0x80000u
33#define ___GFP_KMEMCG 0x100000u
33#define ___GFP_NOTRACK 0x200000u 34#define ___GFP_NOTRACK 0x200000u
34#define ___GFP_NO_KSWAPD 0x400000u 35#define ___GFP_NO_KSWAPD 0x400000u
35#define ___GFP_OTHER_NODE 0x800000u 36#define ___GFP_OTHER_NODE 0x800000u
@@ -89,6 +90,7 @@ struct vm_area_struct;
89 90
90#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) 91#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
91#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ 92#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
93#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
92#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ 94#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
93 95
94/* 96/*
@@ -365,6 +367,9 @@ extern void free_pages(unsigned long addr, unsigned int order);
365extern void free_hot_cold_page(struct page *page, int cold); 367extern void free_hot_cold_page(struct page *page, int cold);
366extern void free_hot_cold_page_list(struct list_head *list, int cold); 368extern void free_hot_cold_page_list(struct list_head *list, int cold);
367 369
370extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
371extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
372
368#define __free_page(page) __free_pages((page), 0) 373#define __free_page(page) __free_pages((page), 0)
369#define free_page(addr) free_pages((addr), 0) 374#define free_page(addr) free_pages((addr), 0)
370 375
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index d73878c694b3..ce8217f7b5c2 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -62,7 +62,7 @@ extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
62 struct page *page); 62 struct page *page);
63extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 63extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
64 struct hugetlb_cgroup *h_cg); 64 struct hugetlb_cgroup *h_cg);
65extern int hugetlb_cgroup_file_init(int idx) __init; 65extern void hugetlb_cgroup_file_init(void) __init;
66extern void hugetlb_cgroup_migrate(struct page *oldhpage, 66extern void hugetlb_cgroup_migrate(struct page *oldhpage,
67 struct page *newhpage); 67 struct page *newhpage);
68 68
@@ -111,9 +111,8 @@ hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
111 return; 111 return;
112} 112}
113 113
114static inline int __init hugetlb_cgroup_file_init(int idx) 114static inline void hugetlb_cgroup_file_init(void)
115{ 115{
116 return 0;
117} 116}
118 117
119static inline void hugetlb_cgroup_migrate(struct page *oldhpage, 118static inline void hugetlb_cgroup_migrate(struct page *oldhpage,
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e98a74c0c9c0..0108a56f814e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -21,11 +21,14 @@
21#define _LINUX_MEMCONTROL_H 21#define _LINUX_MEMCONTROL_H
22#include <linux/cgroup.h> 22#include <linux/cgroup.h>
23#include <linux/vm_event_item.h> 23#include <linux/vm_event_item.h>
24#include <linux/hardirq.h>
25#include <linux/jump_label.h>
24 26
25struct mem_cgroup; 27struct mem_cgroup;
26struct page_cgroup; 28struct page_cgroup;
27struct page; 29struct page;
28struct mm_struct; 30struct mm_struct;
31struct kmem_cache;
29 32
30/* Stats that can be updated by kernel. */ 33/* Stats that can be updated by kernel. */
31enum mem_cgroup_page_stat_item { 34enum mem_cgroup_page_stat_item {
@@ -414,5 +417,211 @@ static inline void sock_release_memcg(struct sock *sk)
414{ 417{
415} 418}
416#endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */ 419#endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
420
421#ifdef CONFIG_MEMCG_KMEM
422extern struct static_key memcg_kmem_enabled_key;
423
424extern int memcg_limited_groups_array_size;
425
426/*
427 * Helper macro to loop through all memcg-specific caches. Callers must still
428 * check if the cache is valid (it is either valid or NULL).
429 * the slab_mutex must be held when looping through those caches
430 */
431#define for_each_memcg_cache_index(_idx) \
432 for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++)
433
434static inline bool memcg_kmem_enabled(void)
435{
436 return static_key_false(&memcg_kmem_enabled_key);
437}
438
439/*
440 * In general, we'll do everything in our power to not incur in any overhead
441 * for non-memcg users for the kmem functions. Not even a function call, if we
442 * can avoid it.
443 *
444 * Therefore, we'll inline all those functions so that in the best case, we'll
445 * see that kmemcg is off for everybody and proceed quickly. If it is on,
446 * we'll still do most of the flag checking inline. We check a lot of
447 * conditions, but because they are pretty simple, they are expected to be
448 * fast.
449 */
450bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg,
451 int order);
452void __memcg_kmem_commit_charge(struct page *page,
453 struct mem_cgroup *memcg, int order);
454void __memcg_kmem_uncharge_pages(struct page *page, int order);
455
456int memcg_cache_id(struct mem_cgroup *memcg);
457int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
458 struct kmem_cache *root_cache);
459void memcg_release_cache(struct kmem_cache *cachep);
460void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
461
462int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
463void memcg_update_array_size(int num_groups);
464
465struct kmem_cache *
466__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
467
468void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
469void kmem_cache_destroy_memcg_children(struct kmem_cache *s);
470
471/**
472 * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
473 * @gfp: the gfp allocation flags.
474 * @memcg: a pointer to the memcg this was charged against.
475 * @order: allocation order.
476 *
477 * returns true if the memcg where the current task belongs can hold this
478 * allocation.
479 *
480 * We return true automatically if this allocation is not to be accounted to
481 * any memcg.
482 */
483static inline bool
484memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
485{
486 if (!memcg_kmem_enabled())
487 return true;
488
489 /*
490 * __GFP_NOFAIL allocations will move on even if charging is not
491 * possible. Therefore we don't even try, and have this allocation
492 * unaccounted. We could in theory charge it with
493 * res_counter_charge_nofail, but we hope those allocations are rare,
494 * and won't be worth the trouble.
495 */
496 if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
497 return true;
498 if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
499 return true;
500
501 /* If the test is dying, just let it go. */
502 if (unlikely(fatal_signal_pending(current)))
503 return true;
504
505 return __memcg_kmem_newpage_charge(gfp, memcg, order);
506}
507
508/**
509 * memcg_kmem_uncharge_pages: uncharge pages from memcg
510 * @page: pointer to struct page being freed
511 * @order: allocation order.
512 *
513 * there is no need to specify memcg here, since it is embedded in page_cgroup
514 */
515static inline void
516memcg_kmem_uncharge_pages(struct page *page, int order)
517{
518 if (memcg_kmem_enabled())
519 __memcg_kmem_uncharge_pages(page, order);
520}
521
522/**
523 * memcg_kmem_commit_charge: embeds correct memcg in a page
524 * @page: pointer to struct page recently allocated
525 * @memcg: the memcg structure we charged against
526 * @order: allocation order.
527 *
528 * Needs to be called after memcg_kmem_newpage_charge, regardless of success or
529 * failure of the allocation. if @page is NULL, this function will revert the
530 * charges. Otherwise, it will commit the memcg given by @memcg to the
531 * corresponding page_cgroup.
532 */
533static inline void
534memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
535{
536 if (memcg_kmem_enabled() && memcg)
537 __memcg_kmem_commit_charge(page, memcg, order);
538}
539
540/**
541 * memcg_kmem_get_cache: selects the correct per-memcg cache for allocation
542 * @cachep: the original global kmem cache
543 * @gfp: allocation flags.
544 *
545 * This function assumes that the task allocating, which determines the memcg
546 * in the page allocator, belongs to the same cgroup throughout the whole
547 * process. Misacounting can happen if the task calls memcg_kmem_get_cache()
548 * while belonging to a cgroup, and later on changes. This is considered
549 * acceptable, and should only happen upon task migration.
550 *
551 * Before the cache is created by the memcg core, there is also a possible
552 * imbalance: the task belongs to a memcg, but the cache being allocated from
553 * is the global cache, since the child cache is not yet guaranteed to be
554 * ready. This case is also fine, since in this case the GFP_KMEMCG will not be
555 * passed and the page allocator will not attempt any cgroup accounting.
556 */
557static __always_inline struct kmem_cache *
558memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
559{
560 if (!memcg_kmem_enabled())
561 return cachep;
562 if (gfp & __GFP_NOFAIL)
563 return cachep;
564 if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
565 return cachep;
566 if (unlikely(fatal_signal_pending(current)))
567 return cachep;
568
569 return __memcg_kmem_get_cache(cachep, gfp);
570}
571#else
572#define for_each_memcg_cache_index(_idx) \
573 for (; NULL; )
574
575static inline bool memcg_kmem_enabled(void)
576{
577 return false;
578}
579
580static inline bool
581memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
582{
583 return true;
584}
585
586static inline void memcg_kmem_uncharge_pages(struct page *page, int order)
587{
588}
589
590static inline void
591memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
592{
593}
594
595static inline int memcg_cache_id(struct mem_cgroup *memcg)
596{
597 return -1;
598}
599
600static inline int
601memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
602 struct kmem_cache *root_cache)
603{
604 return 0;
605}
606
607static inline void memcg_release_cache(struct kmem_cache *cachep)
608{
609}
610
611static inline void memcg_cache_list_add(struct mem_cgroup *memcg,
612 struct kmem_cache *s)
613{
614}
615
616static inline struct kmem_cache *
617memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
618{
619 return cachep;
620}
621
622static inline void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
623{
624}
625#endif /* CONFIG_MEMCG_KMEM */
417#endif /* _LINUX_MEMCONTROL_H */ 626#endif /* _LINUX_MEMCONTROL_H */
418 627
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 6f54e40fa218..5ae8456d9670 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -125,14 +125,16 @@ int res_counter_charge_nofail(struct res_counter *counter,
125 * 125 *
126 * these calls check for usage underflow and show a warning on the console 126 * these calls check for usage underflow and show a warning on the console
127 * _locked call expects the counter->lock to be taken 127 * _locked call expects the counter->lock to be taken
128 *
129 * returns the total charges still present in @counter.
128 */ 130 */
129 131
130void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); 132u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
131void res_counter_uncharge(struct res_counter *counter, unsigned long val); 133u64 res_counter_uncharge(struct res_counter *counter, unsigned long val);
132 134
133void res_counter_uncharge_until(struct res_counter *counter, 135u64 res_counter_uncharge_until(struct res_counter *counter,
134 struct res_counter *top, 136 struct res_counter *top,
135 unsigned long val); 137 unsigned long val);
136/** 138/**
137 * res_counter_margin - calculate chargeable space of a counter 139 * res_counter_margin - calculate chargeable space of a counter
138 * @cnt: the counter 140 * @cnt: the counter
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9914c662ed7b..f712465b05c5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1597,6 +1597,7 @@ struct task_struct {
1597 unsigned long nr_pages; /* uncharged usage */ 1597 unsigned long nr_pages; /* uncharged usage */
1598 unsigned long memsw_nr_pages; /* uncharged mem+swap usage */ 1598 unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
1599 } memcg_batch; 1599 } memcg_batch;
1600 unsigned int memcg_kmem_skip_account;
1600#endif 1601#endif
1601#ifdef CONFIG_HAVE_HW_BREAKPOINT 1602#ifdef CONFIG_HAVE_HW_BREAKPOINT
1602 atomic_t ptrace_bp_refcnt; 1603 atomic_t ptrace_bp_refcnt;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 743a10415122..5d168d7e0a28 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -11,6 +11,8 @@
11 11
12#include <linux/gfp.h> 12#include <linux/gfp.h>
13#include <linux/types.h> 13#include <linux/types.h>
14#include <linux/workqueue.h>
15
14 16
15/* 17/*
16 * Flags to pass to kmem_cache_create(). 18 * Flags to pass to kmem_cache_create().
@@ -116,6 +118,7 @@ struct kmem_cache {
116}; 118};
117#endif 119#endif
118 120
121struct mem_cgroup;
119/* 122/*
120 * struct kmem_cache related prototypes 123 * struct kmem_cache related prototypes
121 */ 124 */
@@ -125,6 +128,9 @@ int slab_is_available(void);
125struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, 128struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
126 unsigned long, 129 unsigned long,
127 void (*)(void *)); 130 void (*)(void *));
131struct kmem_cache *
132kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t,
133 unsigned long, void (*)(void *), struct kmem_cache *);
128void kmem_cache_destroy(struct kmem_cache *); 134void kmem_cache_destroy(struct kmem_cache *);
129int kmem_cache_shrink(struct kmem_cache *); 135int kmem_cache_shrink(struct kmem_cache *);
130void kmem_cache_free(struct kmem_cache *, void *); 136void kmem_cache_free(struct kmem_cache *, void *);
@@ -175,6 +181,48 @@ void kmem_cache_free(struct kmem_cache *, void *);
175#ifndef ARCH_SLAB_MINALIGN 181#ifndef ARCH_SLAB_MINALIGN
176#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) 182#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
177#endif 183#endif
184/*
185 * This is the main placeholder for memcg-related information in kmem caches.
186 * struct kmem_cache will hold a pointer to it, so the memory cost while
187 * disabled is 1 pointer. The runtime cost while enabled, gets bigger than it
188 * would otherwise be if that would be bundled in kmem_cache: we'll need an
189 * extra pointer chase. But the trade off clearly lays in favor of not
190 * penalizing non-users.
191 *
192 * Both the root cache and the child caches will have it. For the root cache,
193 * this will hold a dynamically allocated array large enough to hold
194 * information about the currently limited memcgs in the system.
195 *
196 * Child caches will hold extra metadata needed for its operation. Fields are:
197 *
198 * @memcg: pointer to the memcg this cache belongs to
199 * @list: list_head for the list of all caches in this memcg
200 * @root_cache: pointer to the global, root cache, this cache was derived from
201 * @dead: set to true after the memcg dies; the cache may still be around.
202 * @nr_pages: number of pages that belongs to this cache.
203 * @destroy: worker to be called whenever we are ready, or believe we may be
204 * ready, to destroy this cache.
205 */
206struct memcg_cache_params {
207 bool is_root_cache;
208 union {
209 struct kmem_cache *memcg_caches[0];
210 struct {
211 struct mem_cgroup *memcg;
212 struct list_head list;
213 struct kmem_cache *root_cache;
214 bool dead;
215 atomic_t nr_pages;
216 struct work_struct destroy;
217 };
218 };
219};
220
221int memcg_update_all_caches(int num_memcgs);
222
223struct seq_file;
224int cache_show(struct kmem_cache *s, struct seq_file *m);
225void print_slabinfo_header(struct seq_file *m);
178 226
179/* 227/*
180 * Common kmalloc functions provided by all allocators 228 * Common kmalloc functions provided by all allocators
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 45c0356fdc8c..8bb6e0eaf3c6 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -81,6 +81,9 @@ struct kmem_cache {
81 */ 81 */
82 int obj_offset; 82 int obj_offset;
83#endif /* CONFIG_DEBUG_SLAB */ 83#endif /* CONFIG_DEBUG_SLAB */
84#ifdef CONFIG_MEMCG_KMEM
85 struct memcg_cache_params *memcg_params;
86#endif
84 87
85/* 6) per-cpu/per-node data, touched during every alloc/free */ 88/* 6) per-cpu/per-node data, touched during every alloc/free */
86 /* 89 /*
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index df448adb7283..9db4825cd393 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -101,6 +101,10 @@ struct kmem_cache {
101#ifdef CONFIG_SYSFS 101#ifdef CONFIG_SYSFS
102 struct kobject kobj; /* For sysfs */ 102 struct kobject kobj; /* For sysfs */
103#endif 103#endif
104#ifdef CONFIG_MEMCG_KMEM
105 struct memcg_cache_params *memcg_params;
106 int max_attr_size; /* for propagation, maximum size of a stored attr */
107#endif
104 108
105#ifdef CONFIG_NUMA 109#ifdef CONFIG_NUMA
106 /* 110 /*
@@ -222,7 +226,10 @@ void *__kmalloc(size_t size, gfp_t flags);
222static __always_inline void * 226static __always_inline void *
223kmalloc_order(size_t size, gfp_t flags, unsigned int order) 227kmalloc_order(size_t size, gfp_t flags, unsigned int order)
224{ 228{
225 void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order); 229 void *ret;
230
231 flags |= (__GFP_COMP | __GFP_KMEMCG);
232 ret = (void *) __get_free_pages(flags, order);
226 kmemleak_alloc(ret, size, 1, flags); 233 kmemleak_alloc(ret, size, 1, flags);
227 return ret; 234 return ret;
228} 235}
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index ccc1899bd62e..e7e04736802f 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -61,6 +61,8 @@ extern long do_no_restart_syscall(struct restart_block *parm);
61# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK) 61# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
62#endif 62#endif
63 63
64#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
65
64/* 66/*
65 * flag set/clear/test wrappers 67 * flag set/clear/test wrappers
66 * - pass TIF_xxxx constants to these functions 68 * - pass TIF_xxxx constants to these functions
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index d6fd8e5b14b7..1eddbf1557f2 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -34,6 +34,7 @@
34 {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \ 34 {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
35 {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \ 35 {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
36 {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ 36 {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
37 {(unsigned long)__GFP_KMEMCG, "GFP_KMEMCG"}, \
37 {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ 38 {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
38 {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ 39 {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
39 {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \ 40 {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \