diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2011-03-23 19:42:42 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-23 22:46:33 -0400 |
commit | 5a6475a4e162200f43855e2d42bbf55bcca1a9f2 (patch) | |
tree | 361ef686ef833cd1560e884ca1420d512e4d06bb /mm/memcontrol.c | |
parent | 6cfddb261555dd0c0529a5fb7cf8bc5b85ad95a5 (diff) |
memcg: fix leak on wrong LRU with FUSE
fs/fuse/dev.c::fuse_try_move_page() does
(1) remove a page by ->steal()
(2) re-add the page to page cache
(3) link the page to LRU if it was not on LRU at (1)
This implies the page is _on_ LRU when it's added to radix-tree. So, the
page is added to memory cgroup while it's on LRU. because LRU is lazy and
no one flushs it.
This is the same behavior as SwapCache and needs special care as
- remove page from LRU before overwrite pc->mem_cgroup.
- add page to LRU after overwrite pc->mem_cgroup.
And we need to taking care of pagevec.
If PageLRU(page) is set before we add PCG_USED bit, the page will not be
added to memcg's LRU (in short period). So, regardlress of PageLRU(page)
value before commit_charge(), we need to check PageLRU(page) after
commit_charge().
Addresses https://bugzilla.kernel.org/show_bug.cgi?id=30432
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Balbir Singh <balbir@in.ibm.com>
Reported-by: Daniel Poelzleithner <poelzi@poelzi.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 70 |
1 files changed, 52 insertions, 18 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 61ffe712afe0..1f0b460fe58c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -926,18 +926,28 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) | |||
926 | } | 926 | } |
927 | 927 | ||
928 | /* | 928 | /* |
929 | * At handling SwapCache, pc->mem_cgroup may be changed while it's linked to | 929 | * At handling SwapCache and other FUSE stuff, pc->mem_cgroup may be changed |
930 | * lru because the page may.be reused after it's fully uncharged (because of | 930 | * while it's linked to lru because the page may be reused after it's fully |
931 | * SwapCache behavior).To handle that, unlink page_cgroup from LRU when charge | 931 | * uncharged. To handle that, unlink page_cgroup from LRU when charge it again. |
932 | * it again. This function is only used to charge SwapCache. It's done under | 932 | * It's done under lock_page and expected that zone->lru_lock isnever held. |
933 | * lock_page and expected that zone->lru_lock is never held. | ||
934 | */ | 933 | */ |
935 | static void mem_cgroup_lru_del_before_commit_swapcache(struct page *page) | 934 | static void mem_cgroup_lru_del_before_commit(struct page *page) |
936 | { | 935 | { |
937 | unsigned long flags; | 936 | unsigned long flags; |
938 | struct zone *zone = page_zone(page); | 937 | struct zone *zone = page_zone(page); |
939 | struct page_cgroup *pc = lookup_page_cgroup(page); | 938 | struct page_cgroup *pc = lookup_page_cgroup(page); |
940 | 939 | ||
940 | /* | ||
941 | * Doing this check without taking ->lru_lock seems wrong but this | ||
942 | * is safe. Because if page_cgroup's USED bit is unset, the page | ||
943 | * will not be added to any memcg's LRU. If page_cgroup's USED bit is | ||
944 | * set, the commit after this will fail, anyway. | ||
945 | * This all charge/uncharge is done under some mutual execustion. | ||
946 | * So, we don't need to taking care of changes in USED bit. | ||
947 | */ | ||
948 | if (likely(!PageLRU(page))) | ||
949 | return; | ||
950 | |||
941 | spin_lock_irqsave(&zone->lru_lock, flags); | 951 | spin_lock_irqsave(&zone->lru_lock, flags); |
942 | /* | 952 | /* |
943 | * Forget old LRU when this page_cgroup is *not* used. This Used bit | 953 | * Forget old LRU when this page_cgroup is *not* used. This Used bit |
@@ -948,12 +958,15 @@ static void mem_cgroup_lru_del_before_commit_swapcache(struct page *page) | |||
948 | spin_unlock_irqrestore(&zone->lru_lock, flags); | 958 | spin_unlock_irqrestore(&zone->lru_lock, flags); |
949 | } | 959 | } |
950 | 960 | ||
951 | static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page) | 961 | static void mem_cgroup_lru_add_after_commit(struct page *page) |
952 | { | 962 | { |
953 | unsigned long flags; | 963 | unsigned long flags; |
954 | struct zone *zone = page_zone(page); | 964 | struct zone *zone = page_zone(page); |
955 | struct page_cgroup *pc = lookup_page_cgroup(page); | 965 | struct page_cgroup *pc = lookup_page_cgroup(page); |
956 | 966 | ||
967 | /* taking care of that the page is added to LRU while we commit it */ | ||
968 | if (likely(!PageLRU(page))) | ||
969 | return; | ||
957 | spin_lock_irqsave(&zone->lru_lock, flags); | 970 | spin_lock_irqsave(&zone->lru_lock, flags); |
958 | /* link when the page is linked to LRU but page_cgroup isn't */ | 971 | /* link when the page is linked to LRU but page_cgroup isn't */ |
959 | if (PageLRU(page) && !PageCgroupAcctLRU(pc)) | 972 | if (PageLRU(page) && !PageCgroupAcctLRU(pc)) |
@@ -2431,9 +2444,26 @@ static void | |||
2431 | __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | 2444 | __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, |
2432 | enum charge_type ctype); | 2445 | enum charge_type ctype); |
2433 | 2446 | ||
2447 | static void | ||
2448 | __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem, | ||
2449 | enum charge_type ctype) | ||
2450 | { | ||
2451 | struct page_cgroup *pc = lookup_page_cgroup(page); | ||
2452 | /* | ||
2453 | * In some case, SwapCache, FUSE(splice_buf->radixtree), the page | ||
2454 | * is already on LRU. It means the page may on some other page_cgroup's | ||
2455 | * LRU. Take care of it. | ||
2456 | */ | ||
2457 | mem_cgroup_lru_del_before_commit(page); | ||
2458 | __mem_cgroup_commit_charge(mem, page, 1, pc, ctype); | ||
2459 | mem_cgroup_lru_add_after_commit(page); | ||
2460 | return; | ||
2461 | } | ||
2462 | |||
2434 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | 2463 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
2435 | gfp_t gfp_mask) | 2464 | gfp_t gfp_mask) |
2436 | { | 2465 | { |
2466 | struct mem_cgroup *mem = NULL; | ||
2437 | int ret; | 2467 | int ret; |
2438 | 2468 | ||
2439 | if (mem_cgroup_disabled()) | 2469 | if (mem_cgroup_disabled()) |
@@ -2468,14 +2498,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
2468 | if (unlikely(!mm)) | 2498 | if (unlikely(!mm)) |
2469 | mm = &init_mm; | 2499 | mm = &init_mm; |
2470 | 2500 | ||
2471 | if (page_is_file_cache(page)) | 2501 | if (page_is_file_cache(page)) { |
2472 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 2502 | ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &mem, true); |
2473 | MEM_CGROUP_CHARGE_TYPE_CACHE); | 2503 | if (ret || !mem) |
2504 | return ret; | ||
2474 | 2505 | ||
2506 | /* | ||
2507 | * FUSE reuses pages without going through the final | ||
2508 | * put that would remove them from the LRU list, make | ||
2509 | * sure that they get relinked properly. | ||
2510 | */ | ||
2511 | __mem_cgroup_commit_charge_lrucare(page, mem, | ||
2512 | MEM_CGROUP_CHARGE_TYPE_CACHE); | ||
2513 | return ret; | ||
2514 | } | ||
2475 | /* shmem */ | 2515 | /* shmem */ |
2476 | if (PageSwapCache(page)) { | 2516 | if (PageSwapCache(page)) { |
2477 | struct mem_cgroup *mem; | ||
2478 | |||
2479 | ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); | 2517 | ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); |
2480 | if (!ret) | 2518 | if (!ret) |
2481 | __mem_cgroup_commit_charge_swapin(page, mem, | 2519 | __mem_cgroup_commit_charge_swapin(page, mem, |
@@ -2532,17 +2570,13 @@ static void | |||
2532 | __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | 2570 | __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, |
2533 | enum charge_type ctype) | 2571 | enum charge_type ctype) |
2534 | { | 2572 | { |
2535 | struct page_cgroup *pc; | ||
2536 | |||
2537 | if (mem_cgroup_disabled()) | 2573 | if (mem_cgroup_disabled()) |
2538 | return; | 2574 | return; |
2539 | if (!ptr) | 2575 | if (!ptr) |
2540 | return; | 2576 | return; |
2541 | cgroup_exclude_rmdir(&ptr->css); | 2577 | cgroup_exclude_rmdir(&ptr->css); |
2542 | pc = lookup_page_cgroup(page); | 2578 | |
2543 | mem_cgroup_lru_del_before_commit_swapcache(page); | 2579 | __mem_cgroup_commit_charge_lrucare(page, ptr, ctype); |
2544 | __mem_cgroup_commit_charge(ptr, page, 1, pc, ctype); | ||
2545 | mem_cgroup_lru_add_after_commit_swapcache(page); | ||
2546 | /* | 2580 | /* |
2547 | * Now swap is on-memory. This means this page may be | 2581 | * Now swap is on-memory. This means this page may be |
2548 | * counted both as mem and swap....double count. | 2582 | * counted both as mem and swap....double count. |