aboutsummaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-08-08 17:19:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-08 18:57:17 -0400
commit00501b531c4723972aa11d6d4ebcf8d6552007c8 (patch)
treeb3ad4850d58f137cf87b8424412d962fb251839f /mm/filemap.c
parent4449a51a7c281602d3a385044ab928322a122a02 (diff)
mm: memcontrol: rewrite charge API
These patches rework memcg charge lifetime to integrate more naturally with the lifetime of user pages. This drastically simplifies the code and reduces charging and uncharging overhead. The most expensive part of charging and uncharging is the page_cgroup bit spinlock, which is removed entirely after this series. Here are the top-10 profile entries of a stress test that reads a 128G sparse file on a freshly booted box, without even a dedicated cgroup (i.e. executing in the root memcg). Before: 15.36% cat [kernel.kallsyms] [k] copy_user_generic_string 13.31% cat [kernel.kallsyms] [k] memset 11.48% cat [kernel.kallsyms] [k] do_mpage_readpage 4.23% cat [kernel.kallsyms] [k] get_page_from_freelist 2.38% cat [kernel.kallsyms] [k] put_page 2.32% cat [kernel.kallsyms] [k] __mem_cgroup_commit_charge 2.18% kswapd0 [kernel.kallsyms] [k] __mem_cgroup_uncharge_common 1.92% kswapd0 [kernel.kallsyms] [k] shrink_page_list 1.86% cat [kernel.kallsyms] [k] __radix_tree_lookup 1.62% cat [kernel.kallsyms] [k] __pagevec_lru_add_fn After: 15.67% cat [kernel.kallsyms] [k] copy_user_generic_string 13.48% cat [kernel.kallsyms] [k] memset 11.42% cat [kernel.kallsyms] [k] do_mpage_readpage 3.98% cat [kernel.kallsyms] [k] get_page_from_freelist 2.46% cat [kernel.kallsyms] [k] put_page 2.13% kswapd0 [kernel.kallsyms] [k] shrink_page_list 1.88% cat [kernel.kallsyms] [k] __radix_tree_lookup 1.67% cat [kernel.kallsyms] [k] __pagevec_lru_add_fn 1.39% kswapd0 [kernel.kallsyms] [k] free_pcppages_bulk 1.30% cat [kernel.kallsyms] [k] kfree As you can see, the memcg footprint has shrunk quite a bit. text data bss dec hex filename 37970 9892 400 48262 bc86 mm/memcontrol.o.old 35239 9892 400 45531 b1db mm/memcontrol.o This patch (of 4): The memcg charge API charges pages before they are rmapped - i.e. have an actual "type" - and so every callsite needs its own set of charge and uncharge functions to know what type is being operated on. Worse, uncharge has to happen from a context that is still type-specific, rather than at the end of the page's lifetime with exclusive access, and so requires a lot of synchronization. Rewrite the charge API to provide a generic set of try_charge(), commit_charge() and cancel_charge() transaction operations, much like what's currently done for swap-in: mem_cgroup_try_charge() attempts to reserve a charge, reclaiming pages from the memcg if necessary. mem_cgroup_commit_charge() commits the page to the charge once it has a valid page->mapping and PageAnon() reliably tells the type. mem_cgroup_cancel_charge() aborts the transaction. This reduces the charge API and enables subsequent patches to drastically simplify uncharging. As pages need to be committed after rmap is established but before they are added to the LRU, page_add_new_anon_rmap() must stop doing LRU additions again. Revive lru_cache_add_active_or_unevictable(). [hughd@google.com: fix shmem_unuse] [hughd@google.com: Add comments on the private use of -EAGAIN] Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Tejun Heo <tj@kernel.org> Cc: Vladimir Davydov <vdavydov@parallels.com> Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c21
1 files changed, 15 insertions, 6 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index af19a6b079f5..349a40e35545 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -31,6 +31,7 @@
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/cpuset.h> 32#include <linux/cpuset.h>
33#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ 33#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
34#include <linux/hugetlb.h>
34#include <linux/memcontrol.h> 35#include <linux/memcontrol.h>
35#include <linux/cleancache.h> 36#include <linux/cleancache.h>
36#include <linux/rmap.h> 37#include <linux/rmap.h>
@@ -548,19 +549,24 @@ static int __add_to_page_cache_locked(struct page *page,
548 pgoff_t offset, gfp_t gfp_mask, 549 pgoff_t offset, gfp_t gfp_mask,
549 void **shadowp) 550 void **shadowp)
550{ 551{
552 int huge = PageHuge(page);
553 struct mem_cgroup *memcg;
551 int error; 554 int error;
552 555
553 VM_BUG_ON_PAGE(!PageLocked(page), page); 556 VM_BUG_ON_PAGE(!PageLocked(page), page);
554 VM_BUG_ON_PAGE(PageSwapBacked(page), page); 557 VM_BUG_ON_PAGE(PageSwapBacked(page), page);
555 558
556 error = mem_cgroup_charge_file(page, current->mm, 559 if (!huge) {
557 gfp_mask & GFP_RECLAIM_MASK); 560 error = mem_cgroup_try_charge(page, current->mm,
558 if (error) 561 gfp_mask, &memcg);
559 return error; 562 if (error)
563 return error;
564 }
560 565
561 error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM); 566 error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
562 if (error) { 567 if (error) {
563 mem_cgroup_uncharge_cache_page(page); 568 if (!huge)
569 mem_cgroup_cancel_charge(page, memcg);
564 return error; 570 return error;
565 } 571 }
566 572
@@ -575,13 +581,16 @@ static int __add_to_page_cache_locked(struct page *page,
575 goto err_insert; 581 goto err_insert;
576 __inc_zone_page_state(page, NR_FILE_PAGES); 582 __inc_zone_page_state(page, NR_FILE_PAGES);
577 spin_unlock_irq(&mapping->tree_lock); 583 spin_unlock_irq(&mapping->tree_lock);
584 if (!huge)
585 mem_cgroup_commit_charge(page, memcg, false);
578 trace_mm_filemap_add_to_page_cache(page); 586 trace_mm_filemap_add_to_page_cache(page);
579 return 0; 587 return 0;
580err_insert: 588err_insert:
581 page->mapping = NULL; 589 page->mapping = NULL;
582 /* Leave page->index set: truncation relies upon it */ 590 /* Leave page->index set: truncation relies upon it */
583 spin_unlock_irq(&mapping->tree_lock); 591 spin_unlock_irq(&mapping->tree_lock);
584 mem_cgroup_uncharge_cache_page(page); 592 if (!huge)
593 mem_cgroup_cancel_charge(page, memcg);
585 page_cache_release(page); 594 page_cache_release(page);
586 return error; 595 return error;
587} 596}