aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@parallels.com>2014-06-04 19:06:38 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-04 19:53:56 -0400
commit5dfb417509921eb90ee123a4d1525e8916b4ace4 (patch)
tree3ca55eb4452bf4d5f2a5ebbf835488ac4343fd30
parent8eae1492675d0ffc12189f8db573624413232e15 (diff)
sl[au]b: charge slabs to kmemcg explicitly
We have only a few places where we actually want to charge kmem so instead of intruding into the general page allocation path with __GFP_KMEMCG it's better to explictly charge kmem there. All kmem charges will be easier to follow that way. This is a step towards removing __GFP_KMEMCG. It removes __GFP_KMEMCG from memcg caches' allocflags. Instead it makes slab allocation path call memcg_charge_kmem directly getting memcg to charge from the cache's memcg params. This also eliminates any possibility of misaccounting an allocation going from one memcg's cache to another memcg, because now we always charge slabs against the memcg the cache belongs to. That's why this patch removes the big comment to memcg_kmem_get_cache. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Acked-by: Greg Thelen <gthelen@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Glauber Costa <glommer@gmail.com> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: Pekka Enberg <penberg@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/memcontrol.h15
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/slab.c7
-rw-r--r--mm/slab.h29
-rw-r--r--mm/slab_common.c6
-rw-r--r--mm/slub.c24
6 files changed, 59 insertions, 26 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b569b8be5c5a..96e5d2573eb0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -506,6 +506,9 @@ void memcg_update_array_size(int num_groups);
506struct kmem_cache * 506struct kmem_cache *
507__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp); 507__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
508 508
509int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size);
510void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size);
511
509void mem_cgroup_destroy_cache(struct kmem_cache *cachep); 512void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
510int __kmem_cache_destroy_memcg_children(struct kmem_cache *s); 513int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);
511 514
@@ -583,17 +586,7 @@ memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
583 * @cachep: the original global kmem cache 586 * @cachep: the original global kmem cache
584 * @gfp: allocation flags. 587 * @gfp: allocation flags.
585 * 588 *
586 * This function assumes that the task allocating, which determines the memcg 589 * All memory allocated from a per-memcg cache is charged to the owner memcg.
587 * in the page allocator, belongs to the same cgroup throughout the whole
588 * process. Misacounting can happen if the task calls memcg_kmem_get_cache()
589 * while belonging to a cgroup, and later on changes. This is considered
590 * acceptable, and should only happen upon task migration.
591 *
592 * Before the cache is created by the memcg core, there is also a possible
593 * imbalance: the task belongs to a memcg, but the cache being allocated from
594 * is the global cache, since the child cache is not yet guaranteed to be
595 * ready. This case is also fine, since in this case the GFP_KMEMCG will not be
596 * passed and the page allocator will not attempt any cgroup accounting.
597 */ 590 */
598static __always_inline struct kmem_cache * 591static __always_inline struct kmem_cache *
599memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) 592memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5177c6d4a2dd..56a768b3d5a8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2953,7 +2953,7 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
2953} 2953}
2954#endif 2954#endif
2955 2955
2956static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size) 2956int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
2957{ 2957{
2958 struct res_counter *fail_res; 2958 struct res_counter *fail_res;
2959 int ret = 0; 2959 int ret = 0;
@@ -2991,7 +2991,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
2991 return ret; 2991 return ret;
2992} 2992}
2993 2993
2994static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size) 2994void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
2995{ 2995{
2996 res_counter_uncharge(&memcg->res, size); 2996 res_counter_uncharge(&memcg->res, size);
2997 if (do_swap_account) 2997 if (do_swap_account)
diff --git a/mm/slab.c b/mm/slab.c
index 5c846d25c17d..944ac58cfcf8 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1688,8 +1688,12 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
1688 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1688 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1689 flags |= __GFP_RECLAIMABLE; 1689 flags |= __GFP_RECLAIMABLE;
1690 1690
1691 if (memcg_charge_slab(cachep, flags, cachep->gfporder))
1692 return NULL;
1693
1691 page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder); 1694 page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
1692 if (!page) { 1695 if (!page) {
1696 memcg_uncharge_slab(cachep, cachep->gfporder);
1693 slab_out_of_memory(cachep, flags, nodeid); 1697 slab_out_of_memory(cachep, flags, nodeid);
1694 return NULL; 1698 return NULL;
1695 } 1699 }
@@ -1747,7 +1751,8 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
1747 memcg_release_pages(cachep, cachep->gfporder); 1751 memcg_release_pages(cachep, cachep->gfporder);
1748 if (current->reclaim_state) 1752 if (current->reclaim_state)
1749 current->reclaim_state->reclaimed_slab += nr_freed; 1753 current->reclaim_state->reclaimed_slab += nr_freed;
1750 __free_memcg_kmem_pages(page, cachep->gfporder); 1754 __free_pages(page, cachep->gfporder);
1755 memcg_uncharge_slab(cachep, cachep->gfporder);
1751} 1756}
1752 1757
1753static void kmem_rcu_free(struct rcu_head *head) 1758static void kmem_rcu_free(struct rcu_head *head)
diff --git a/mm/slab.h b/mm/slab.h
index 6bd4c353704f..863e67b8c8c9 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -192,6 +192,26 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
192 return s; 192 return s;
193 return s->memcg_params->root_cache; 193 return s->memcg_params->root_cache;
194} 194}
195
196static __always_inline int memcg_charge_slab(struct kmem_cache *s,
197 gfp_t gfp, int order)
198{
199 if (!memcg_kmem_enabled())
200 return 0;
201 if (is_root_cache(s))
202 return 0;
203 return memcg_charge_kmem(s->memcg_params->memcg, gfp,
204 PAGE_SIZE << order);
205}
206
207static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
208{
209 if (!memcg_kmem_enabled())
210 return;
211 if (is_root_cache(s))
212 return;
213 memcg_uncharge_kmem(s->memcg_params->memcg, PAGE_SIZE << order);
214}
195#else 215#else
196static inline bool is_root_cache(struct kmem_cache *s) 216static inline bool is_root_cache(struct kmem_cache *s)
197{ 217{
@@ -227,6 +247,15 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
227{ 247{
228 return s; 248 return s;
229} 249}
250
251static inline int memcg_charge_slab(struct kmem_cache *s, gfp_t gfp, int order)
252{
253 return 0;
254}
255
256static inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
257{
258}
230#endif 259#endif
231 260
232static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) 261static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 102cc6fca3d3..06f0c6125632 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -290,12 +290,8 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c
290 root_cache->size, root_cache->align, 290 root_cache->size, root_cache->align,
291 root_cache->flags, root_cache->ctor, 291 root_cache->flags, root_cache->ctor,
292 memcg, root_cache); 292 memcg, root_cache);
293 if (IS_ERR(s)) { 293 if (IS_ERR(s))
294 kfree(cache_name); 294 kfree(cache_name);
295 goto out_unlock;
296 }
297
298 s->allocflags |= __GFP_KMEMCG;
299 295
300out_unlock: 296out_unlock:
301 mutex_unlock(&slab_mutex); 297 mutex_unlock(&slab_mutex);
diff --git a/mm/slub.c b/mm/slub.c
index d05a5483106d..fc9831851be6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1312,17 +1312,26 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
1312/* 1312/*
1313 * Slab allocation and freeing 1313 * Slab allocation and freeing
1314 */ 1314 */
1315static inline struct page *alloc_slab_page(gfp_t flags, int node, 1315static inline struct page *alloc_slab_page(struct kmem_cache *s,
1316 struct kmem_cache_order_objects oo) 1316 gfp_t flags, int node, struct kmem_cache_order_objects oo)
1317{ 1317{
1318 struct page *page;
1318 int order = oo_order(oo); 1319 int order = oo_order(oo);
1319 1320
1320 flags |= __GFP_NOTRACK; 1321 flags |= __GFP_NOTRACK;
1321 1322
1323 if (memcg_charge_slab(s, flags, order))
1324 return NULL;
1325
1322 if (node == NUMA_NO_NODE) 1326 if (node == NUMA_NO_NODE)
1323 return alloc_pages(flags, order); 1327 page = alloc_pages(flags, order);
1324 else 1328 else
1325 return alloc_pages_exact_node(node, flags, order); 1329 page = alloc_pages_exact_node(node, flags, order);
1330
1331 if (!page)
1332 memcg_uncharge_slab(s, order);
1333
1334 return page;
1326} 1335}
1327 1336
1328static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) 1337static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
@@ -1344,7 +1353,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1344 */ 1353 */
1345 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL; 1354 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1346 1355
1347 page = alloc_slab_page(alloc_gfp, node, oo); 1356 page = alloc_slab_page(s, alloc_gfp, node, oo);
1348 if (unlikely(!page)) { 1357 if (unlikely(!page)) {
1349 oo = s->min; 1358 oo = s->min;
1350 alloc_gfp = flags; 1359 alloc_gfp = flags;
@@ -1352,7 +1361,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1352 * Allocation may have failed due to fragmentation. 1361 * Allocation may have failed due to fragmentation.
1353 * Try a lower order alloc if possible 1362 * Try a lower order alloc if possible
1354 */ 1363 */
1355 page = alloc_slab_page(alloc_gfp, node, oo); 1364 page = alloc_slab_page(s, alloc_gfp, node, oo);
1356 1365
1357 if (page) 1366 if (page)
1358 stat(s, ORDER_FALLBACK); 1367 stat(s, ORDER_FALLBACK);
@@ -1468,7 +1477,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1468 page_mapcount_reset(page); 1477 page_mapcount_reset(page);
1469 if (current->reclaim_state) 1478 if (current->reclaim_state)
1470 current->reclaim_state->reclaimed_slab += pages; 1479 current->reclaim_state->reclaimed_slab += pages;
1471 __free_memcg_kmem_pages(page, order); 1480 __free_pages(page, order);
1481 memcg_uncharge_slab(s, order);
1472} 1482}
1473 1483
1474#define need_reserve_slab_rcu \ 1484#define need_reserve_slab_rcu \