diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2012-01-12 20:17:44 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2012-01-25 20:24:43 -0500 |
commit | ea1c62778121f6ece5e0120250716b45e204cb13 (patch) | |
tree | 65501073ab7e5a33a351f375bb0fa24216852ef9 /mm/filemap.c | |
parent | b9e11747e1227d7ad67c5b80be4b206e4059687e (diff) |
memcg: add mem_cgroup_replace_page_cache() to fix LRU issue
commit ab936cbcd02072a34b60d268f94440fd5cf1970b upstream.
Commit ef6a3c6311 ("mm: add replace_page_cache_page() function") added a
function replace_page_cache_page(). This function replaces a page in the
radix-tree with a new page. WHen doing this, memory cgroup needs to fix
up the accounting information. memcg need to check PCG_USED bit etc.
In some(many?) cases, 'newpage' is on LRU before calling
replace_page_cache(). So, memcg's LRU accounting information should be
fixed, too.
This patch adds mem_cgroup_replace_page_cache() and removes the old hooks.
In that function, old pages will be unaccounted without touching
res_counter and new page will be accounted to the memcg (of old page).
WHen overwriting pc->mem_cgroup of newpage, take zone->lru_lock and avoid
races with LRU handling.
Background:
replace_page_cache_page() is called by FUSE code in its splice() handling.
Here, 'newpage' is replacing oldpage but this newpage is not a newly allocated
page and may be on LRU. LRU mis-accounting will be critical for memory cgroup
because rmdir() checks the whole LRU is empty and there is no account leak.
If a page is on the other LRU than it should be, rmdir() will fail.
This bug was added in March 2011, but no bug report yet. I guess there
are not many people who use memcg and FUSE at the same time with upstream
kernels.
The result of this bug is that admin cannot destroy a memcg because of
account leak. So, no panic, no deadlock. And, even if an active cgroup
exist, umount can succseed. So no problem at shutdown.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 18 |
1 files changed, 2 insertions, 16 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index dd828ea59dc..3c981baadb7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -396,24 +396,11 @@ EXPORT_SYMBOL(filemap_write_and_wait_range); | |||
396 | int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) | 396 | int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) |
397 | { | 397 | { |
398 | int error; | 398 | int error; |
399 | struct mem_cgroup *memcg = NULL; | ||
400 | 399 | ||
401 | VM_BUG_ON(!PageLocked(old)); | 400 | VM_BUG_ON(!PageLocked(old)); |
402 | VM_BUG_ON(!PageLocked(new)); | 401 | VM_BUG_ON(!PageLocked(new)); |
403 | VM_BUG_ON(new->mapping); | 402 | VM_BUG_ON(new->mapping); |
404 | 403 | ||
405 | /* | ||
406 | * This is not page migration, but prepare_migration and | ||
407 | * end_migration does enough work for charge replacement. | ||
408 | * | ||
409 | * In the longer term we probably want a specialized function | ||
410 | * for moving the charge from old to new in a more efficient | ||
411 | * manner. | ||
412 | */ | ||
413 | error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask); | ||
414 | if (error) | ||
415 | return error; | ||
416 | |||
417 | error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); | 404 | error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); |
418 | if (!error) { | 405 | if (!error) { |
419 | struct address_space *mapping = old->mapping; | 406 | struct address_space *mapping = old->mapping; |
@@ -435,13 +422,12 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) | |||
435 | if (PageSwapBacked(new)) | 422 | if (PageSwapBacked(new)) |
436 | __inc_zone_page_state(new, NR_SHMEM); | 423 | __inc_zone_page_state(new, NR_SHMEM); |
437 | spin_unlock_irq(&mapping->tree_lock); | 424 | spin_unlock_irq(&mapping->tree_lock); |
425 | /* mem_cgroup codes must not be called under tree_lock */ | ||
426 | mem_cgroup_replace_page_cache(old, new); | ||
438 | radix_tree_preload_end(); | 427 | radix_tree_preload_end(); |
439 | if (freepage) | 428 | if (freepage) |
440 | freepage(old); | 429 | freepage(old); |
441 | page_cache_release(old); | 430 | page_cache_release(old); |
442 | mem_cgroup_end_migration(memcg, old, new, true); | ||
443 | } else { | ||
444 | mem_cgroup_end_migration(memcg, old, new, false); | ||
445 | } | 431 | } |
446 | 432 | ||
447 | return error; | 433 | return error; |