diff options
author | Roman Gushchin <guro@fb.com> | 2019-07-11 23:56:31 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-12 14:05:44 -0400 |
commit | 4d96ba3530750fae3f3f01150adfecde96157815 (patch) | |
tree | 07ecb2d8f21bff7471ed9fd5c46b4a45fa6dcf5b /mm | |
parent | f0a3a24b532d9a7e56a33c5112b2a212ed6ec580 (diff) |
mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages
Every slab page charged to a non-root memory cgroup has a pointer to the
memory cgroup and holds a reference to it, which protects a non-empty
memory cgroup from being released. At the same time the page has a
pointer to the corresponding kmem_cache, and also hold a reference to the
kmem_cache. And kmem_cache by itself holds a reference to the cgroup.
So there is clearly some redundancy, which allows to stop setting the
page->mem_cgroup pointer and rely on getting memcg pointer indirectly via
kmem_cache. Further it will allow to change this pointer easier, without
a need to go over all charged pages.
So let's stop setting page->mem_cgroup pointer for slab pages, and stop
using the css refcounter directly for protecting the memory cgroup from
going away. Instead rely on kmem_cache as an intermediate object.
Make sure that vmstats and shrinker lists are working as previously, as
well as /proc/kpagecgroup interface.
Link: http://lkml.kernel.org/r/20190611231813.3148843-10-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Waiman Long <longman@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Andrei Vagin <avagin@gmail.com>
Cc: Qian Cai <cai@lca.pw>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/list_lru.c | 3 | ||||
-rw-r--r-- | mm/memcontrol.c | 12 | ||||
-rw-r--r-- | mm/slab.h | 74 |
3 files changed, 70 insertions, 19 deletions
diff --git a/mm/list_lru.c b/mm/list_lru.c index 927d85be32f6..0f1f6b06b7f3 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/mutex.h> | 13 | #include <linux/mutex.h> |
14 | #include <linux/memcontrol.h> | 14 | #include <linux/memcontrol.h> |
15 | #include "slab.h" | ||
15 | 16 | ||
16 | #ifdef CONFIG_MEMCG_KMEM | 17 | #ifdef CONFIG_MEMCG_KMEM |
17 | static LIST_HEAD(list_lrus); | 18 | static LIST_HEAD(list_lrus); |
@@ -63,7 +64,7 @@ static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr) | |||
63 | if (!memcg_kmem_enabled()) | 64 | if (!memcg_kmem_enabled()) |
64 | return NULL; | 65 | return NULL; |
65 | page = virt_to_head_page(ptr); | 66 | page = virt_to_head_page(ptr); |
66 | return page->mem_cgroup; | 67 | return memcg_from_slab_page(page); |
67 | } | 68 | } |
68 | 69 | ||
69 | static inline struct list_lru_one * | 70 | static inline struct list_lru_one * |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ce4ce5e7937b..fa39e51b3d94 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -486,7 +486,10 @@ ino_t page_cgroup_ino(struct page *page) | |||
486 | unsigned long ino = 0; | 486 | unsigned long ino = 0; |
487 | 487 | ||
488 | rcu_read_lock(); | 488 | rcu_read_lock(); |
489 | memcg = READ_ONCE(page->mem_cgroup); | 489 | if (PageHead(page) && PageSlab(page)) |
490 | memcg = memcg_from_slab_page(page); | ||
491 | else | ||
492 | memcg = READ_ONCE(page->mem_cgroup); | ||
490 | while (memcg && !(memcg->css.flags & CSS_ONLINE)) | 493 | while (memcg && !(memcg->css.flags & CSS_ONLINE)) |
491 | memcg = parent_mem_cgroup(memcg); | 494 | memcg = parent_mem_cgroup(memcg); |
492 | if (memcg) | 495 | if (memcg) |
@@ -2802,9 +2805,6 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order, | |||
2802 | cancel_charge(memcg, nr_pages); | 2805 | cancel_charge(memcg, nr_pages); |
2803 | return -ENOMEM; | 2806 | return -ENOMEM; |
2804 | } | 2807 | } |
2805 | |||
2806 | page->mem_cgroup = memcg; | ||
2807 | |||
2808 | return 0; | 2808 | return 0; |
2809 | } | 2809 | } |
2810 | 2810 | ||
@@ -2827,8 +2827,10 @@ int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order) | |||
2827 | memcg = get_mem_cgroup_from_current(); | 2827 | memcg = get_mem_cgroup_from_current(); |
2828 | if (!mem_cgroup_is_root(memcg)) { | 2828 | if (!mem_cgroup_is_root(memcg)) { |
2829 | ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg); | 2829 | ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg); |
2830 | if (!ret) | 2830 | if (!ret) { |
2831 | page->mem_cgroup = memcg; | ||
2831 | __SetPageKmemcg(page); | 2832 | __SetPageKmemcg(page); |
2833 | } | ||
2832 | } | 2834 | } |
2833 | css_put(&memcg->css); | 2835 | css_put(&memcg->css); |
2834 | return ret; | 2836 | return ret; |
@@ -255,30 +255,67 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s) | |||
255 | return s->memcg_params.root_cache; | 255 | return s->memcg_params.root_cache; |
256 | } | 256 | } |
257 | 257 | ||
258 | /* | ||
259 | * Expects a pointer to a slab page. Please note, that PageSlab() check | ||
260 | * isn't sufficient, as it returns true also for tail compound slab pages, | ||
261 | * which do not have slab_cache pointer set. | ||
262 | * So this function assumes that the page can pass PageHead() and PageSlab() | ||
263 | * checks. | ||
264 | */ | ||
265 | static inline struct mem_cgroup *memcg_from_slab_page(struct page *page) | ||
266 | { | ||
267 | struct kmem_cache *s; | ||
268 | |||
269 | s = READ_ONCE(page->slab_cache); | ||
270 | if (s && !is_root_cache(s)) | ||
271 | return s->memcg_params.memcg; | ||
272 | |||
273 | return NULL; | ||
274 | } | ||
275 | |||
276 | /* | ||
277 | * Charge the slab page belonging to the non-root kmem_cache. | ||
278 | * Can be called for non-root kmem_caches only. | ||
279 | */ | ||
258 | static __always_inline int memcg_charge_slab(struct page *page, | 280 | static __always_inline int memcg_charge_slab(struct page *page, |
259 | gfp_t gfp, int order, | 281 | gfp_t gfp, int order, |
260 | struct kmem_cache *s) | 282 | struct kmem_cache *s) |
261 | { | 283 | { |
284 | struct mem_cgroup *memcg; | ||
285 | struct lruvec *lruvec; | ||
262 | int ret; | 286 | int ret; |
263 | 287 | ||
264 | if (is_root_cache(s)) | 288 | memcg = s->memcg_params.memcg; |
265 | return 0; | 289 | ret = memcg_kmem_charge_memcg(page, gfp, order, memcg); |
266 | |||
267 | ret = memcg_kmem_charge_memcg(page, gfp, order, s->memcg_params.memcg); | ||
268 | if (ret) | 290 | if (ret) |
269 | return ret; | 291 | return ret; |
270 | 292 | ||
293 | lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg); | ||
294 | mod_lruvec_state(lruvec, cache_vmstat_idx(s), 1 << order); | ||
295 | |||
296 | /* transer try_charge() page references to kmem_cache */ | ||
271 | percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order); | 297 | percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order); |
298 | css_put_many(&memcg->css, 1 << order); | ||
272 | 299 | ||
273 | return 0; | 300 | return 0; |
274 | } | 301 | } |
275 | 302 | ||
303 | /* | ||
304 | * Uncharge a slab page belonging to a non-root kmem_cache. | ||
305 | * Can be called for non-root kmem_caches only. | ||
306 | */ | ||
276 | static __always_inline void memcg_uncharge_slab(struct page *page, int order, | 307 | static __always_inline void memcg_uncharge_slab(struct page *page, int order, |
277 | struct kmem_cache *s) | 308 | struct kmem_cache *s) |
278 | { | 309 | { |
279 | if (!is_root_cache(s)) | 310 | struct mem_cgroup *memcg; |
280 | percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order); | 311 | struct lruvec *lruvec; |
281 | memcg_kmem_uncharge(page, order); | 312 | |
313 | memcg = s->memcg_params.memcg; | ||
314 | lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg); | ||
315 | mod_lruvec_state(lruvec, cache_vmstat_idx(s), -(1 << order)); | ||
316 | memcg_kmem_uncharge_memcg(page, order, memcg); | ||
317 | |||
318 | percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order); | ||
282 | } | 319 | } |
283 | 320 | ||
284 | extern void slab_init_memcg_params(struct kmem_cache *); | 321 | extern void slab_init_memcg_params(struct kmem_cache *); |
@@ -314,6 +351,11 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s) | |||
314 | return s; | 351 | return s; |
315 | } | 352 | } |
316 | 353 | ||
354 | static inline struct mem_cgroup *memcg_from_slab_page(struct page *page) | ||
355 | { | ||
356 | return NULL; | ||
357 | } | ||
358 | |||
317 | static inline int memcg_charge_slab(struct page *page, gfp_t gfp, int order, | 359 | static inline int memcg_charge_slab(struct page *page, gfp_t gfp, int order, |
318 | struct kmem_cache *s) | 360 | struct kmem_cache *s) |
319 | { | 361 | { |
@@ -351,18 +393,24 @@ static __always_inline int charge_slab_page(struct page *page, | |||
351 | gfp_t gfp, int order, | 393 | gfp_t gfp, int order, |
352 | struct kmem_cache *s) | 394 | struct kmem_cache *s) |
353 | { | 395 | { |
354 | int ret = memcg_charge_slab(page, gfp, order, s); | 396 | if (is_root_cache(s)) { |
355 | 397 | mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), | |
356 | if (!ret) | 398 | 1 << order); |
357 | mod_lruvec_page_state(page, cache_vmstat_idx(s), 1 << order); | 399 | return 0; |
400 | } | ||
358 | 401 | ||
359 | return ret; | 402 | return memcg_charge_slab(page, gfp, order, s); |
360 | } | 403 | } |
361 | 404 | ||
362 | static __always_inline void uncharge_slab_page(struct page *page, int order, | 405 | static __always_inline void uncharge_slab_page(struct page *page, int order, |
363 | struct kmem_cache *s) | 406 | struct kmem_cache *s) |
364 | { | 407 | { |
365 | mod_lruvec_page_state(page, cache_vmstat_idx(s), -(1 << order)); | 408 | if (is_root_cache(s)) { |
409 | mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), | ||
410 | -(1 << order)); | ||
411 | return; | ||
412 | } | ||
413 | |||
366 | memcg_uncharge_slab(page, order, s); | 414 | memcg_uncharge_slab(page, order, s); |
367 | } | 415 | } |
368 | 416 | ||