memcg: infrastructure to match an allocation to the right cache

The page allocator is able to bind a page to a memcg when it is allocated. But for the caches, we'd like to have as many objects as possible in a page belonging to the same cache. This is done in this patch by calling memcg_kmem_get_cache in the beginning of every allocation function. This function is patched out by static branches when kernel memory controller is not being used. It assumes that the task allocating, which determines the memcg in the page allocator, belongs to the same cgroup throughout the whole process. Misaccounting can happen if the task calls memcg_kmem_get_cache() while belonging to a cgroup, and later on changes. This is considered acceptable, and should only happen upon task migration. Before the cache is created by the memcg core, there is also a possible imbalance: the task belongs to a memcg, but the cache being allocated from is the global cache, since the child cache is not yet guaranteed to be ready. This case is also fine, since in this case the GFP_KMEMCG will not be passed and the page allocator will not attempt any cgroup accounting. Signed-off-by: Glauber Costa <glommer@parallels.com> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Cc: Frederic Weisbecker <fweisbec@redhat.com> Cc: Greg Thelen <gthelen@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: JoonSoo Kim <js1304@gmail.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Michal Hocko <mhocko@suse.cz> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Rik van Riel <riel@redhat.com> Cc: Suleiman Souhlal <suleiman@google.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Glauber Costa <glommer@parallels.com> 2012-12-18 17:22:40 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-12-18 18:02:14 -0500
commit: d7f25f8a2f81252d1ac134470ba1d0a287cf8fcd (patch)
tree: ecde8b5d98762e15a6fa1984d098ddf86646942b /include
parent: 55007d849759252ddd573aeb36143b947202d509 (diff)
1 files changed, 41 insertions, 0 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 45085e14e023..bd9b5d73bc2b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -449,6 +449,10 @@ void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
 int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
 void memcg_update_array_size(int num_groups);
+struct kmem_cache *
+__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
 /**
 * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
 * @gfp: the gfp allocation flags.
@@ -518,6 +522,37 @@ memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
                __memcg_kmem_commit_charge(page, memcg, order);
 }
+/**
+ * memcg_kmem_get_cache: selects the correct per-memcg cache for allocation
+ * @cachep: the original global kmem cache
+ * @gfp: allocation flags.
+ *
+ * This function assumes that the task allocating, which determines the memcg
+ * in the page allocator, belongs to the same cgroup throughout the whole
+ * process.  Misacounting can happen if the task calls memcg_kmem_get_cache()
+ * while belonging to a cgroup, and later on changes. This is considered
+ * acceptable, and should only happen upon task migration.
+ *
+ * Before the cache is created by the memcg core, there is also a possible
+ * imbalance: the task belongs to a memcg, but the cache being allocated from
+ * is the global cache, since the child cache is not yet guaranteed to be
+ * ready. This case is also fine, since in this case the GFP_KMEMCG will not be
+ * passed and the page allocator will not attempt any cgroup accounting.
+ */
+static __always_inline struct kmem_cache *
+memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
+{
+        if (!memcg_kmem_enabled())
+                return cachep;
+        if (gfp & __GFP_NOFAIL)
+                return cachep;
+        if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
+                return cachep;
+        if (unlikely(fatal_signal_pending(current)))
+                return cachep;
+        return __memcg_kmem_get_cache(cachep, gfp);
+}
 #else
 static inline bool
 memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
@@ -553,6 +588,12 @@ static inline void memcg_cache_list_add(struct mem_cgroup *memcg,
                                        struct kmem_cache *s)
 {
 }
+static inline struct kmem_cache *
+memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
+{
+        return cachep;
+}
 #endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
author	Glauber Costa <glommer@parallels.com>	2012-12-18 17:22:40 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-12-18 18:02:14 -0500
commit	d7f25f8a2f81252d1ac134470ba1d0a287cf8fcd (patch)
tree	ecde8b5d98762e15a6fa1984d098ddf86646942b /include
parent	55007d849759252ddd573aeb36143b947202d509 (diff)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 45085e14e023..bd9b5d73bc2b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h
@@ -449,6 +449,10 @@ void memcg_cache_list_add(struct mem_cgroup memcg, struct kmem_cache cachep);
449		449
450	int memcg_update_cache_size(struct kmem_cache *s, int num_groups);	450	int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
451	void memcg_update_array_size(int num_groups);	451	void memcg_update_array_size(int num_groups);
		452
		453	struct kmem_cache *
		454	__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
		455
452	/**	456	/**
453	* memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.	457	* memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
454	* @gfp: the gfp allocation flags.	458	* @gfp: the gfp allocation flags.
@@ -518,6 +522,37 @@ memcg_kmem_commit_charge(struct page page, struct mem_cgroup memcg, int order)
518	__memcg_kmem_commit_charge(page, memcg, order);	522	__memcg_kmem_commit_charge(page, memcg, order);
519	}	523	}
520		524
		525	/**
		526	* memcg_kmem_get_cache: selects the correct per-memcg cache for allocation
		527	* @cachep: the original global kmem cache
		528	* @gfp: allocation flags.
		529	*
		530	* This function assumes that the task allocating, which determines the memcg
		531	* in the page allocator, belongs to the same cgroup throughout the whole
		532	* process. Misacounting can happen if the task calls memcg_kmem_get_cache()
		533	* while belonging to a cgroup, and later on changes. This is considered
		534	* acceptable, and should only happen upon task migration.
		535	*
		536	* Before the cache is created by the memcg core, there is also a possible
		537	* imbalance: the task belongs to a memcg, but the cache being allocated from
		538	* is the global cache, since the child cache is not yet guaranteed to be
		539	* ready. This case is also fine, since in this case the GFP_KMEMCG will not be
		540	* passed and the page allocator will not attempt any cgroup accounting.
		541	*/
		542	static __always_inline struct kmem_cache *
		543	memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
		544	{
		545	if (!memcg_kmem_enabled())
		546	return cachep;
		547	if (gfp & __GFP_NOFAIL)
		548	return cachep;
		549	if (in_interrupt() \|\| (!current->mm) \|\| (current->flags & PF_KTHREAD))
		550	return cachep;
		551	if (unlikely(fatal_signal_pending(current)))
		552	return cachep;
		553
		554	return __memcg_kmem_get_cache(cachep, gfp);
		555	}
521	#else	556	#else
522	static inline bool	557	static inline bool
523	memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)	558	memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
@@ -553,6 +588,12 @@ static inline void memcg_cache_list_add(struct mem_cgroup *memcg,
553	struct kmem_cache *s)	588	struct kmem_cache *s)
554	{	589	{
555	}	590	}
		591
		592	static inline struct kmem_cache *
		593	memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
		594	{
		595	return cachep;
		596	}
556	#endif /* CONFIG_MEMCG_KMEM */	597	#endif /* CONFIG_MEMCG_KMEM */
557	#endif /* _LINUX_MEMCONTROL_H */	598	#endif /* _LINUX_MEMCONTROL_H */
558		599