aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-08-08 17:19:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-08 18:57:17 -0400
commit00501b531c4723972aa11d6d4ebcf8d6552007c8 (patch)
treeb3ad4850d58f137cf87b8424412d962fb251839f /include/linux
parent4449a51a7c281602d3a385044ab928322a122a02 (diff)
mm: memcontrol: rewrite charge API
These patches rework memcg charge lifetime to integrate more naturally with the lifetime of user pages. This drastically simplifies the code and reduces charging and uncharging overhead. The most expensive part of charging and uncharging is the page_cgroup bit spinlock, which is removed entirely after this series. Here are the top-10 profile entries of a stress test that reads a 128G sparse file on a freshly booted box, without even a dedicated cgroup (i.e. executing in the root memcg). Before: 15.36% cat [kernel.kallsyms] [k] copy_user_generic_string 13.31% cat [kernel.kallsyms] [k] memset 11.48% cat [kernel.kallsyms] [k] do_mpage_readpage 4.23% cat [kernel.kallsyms] [k] get_page_from_freelist 2.38% cat [kernel.kallsyms] [k] put_page 2.32% cat [kernel.kallsyms] [k] __mem_cgroup_commit_charge 2.18% kswapd0 [kernel.kallsyms] [k] __mem_cgroup_uncharge_common 1.92% kswapd0 [kernel.kallsyms] [k] shrink_page_list 1.86% cat [kernel.kallsyms] [k] __radix_tree_lookup 1.62% cat [kernel.kallsyms] [k] __pagevec_lru_add_fn After: 15.67% cat [kernel.kallsyms] [k] copy_user_generic_string 13.48% cat [kernel.kallsyms] [k] memset 11.42% cat [kernel.kallsyms] [k] do_mpage_readpage 3.98% cat [kernel.kallsyms] [k] get_page_from_freelist 2.46% cat [kernel.kallsyms] [k] put_page 2.13% kswapd0 [kernel.kallsyms] [k] shrink_page_list 1.88% cat [kernel.kallsyms] [k] __radix_tree_lookup 1.67% cat [kernel.kallsyms] [k] __pagevec_lru_add_fn 1.39% kswapd0 [kernel.kallsyms] [k] free_pcppages_bulk 1.30% cat [kernel.kallsyms] [k] kfree As you can see, the memcg footprint has shrunk quite a bit. text data bss dec hex filename 37970 9892 400 48262 bc86 mm/memcontrol.o.old 35239 9892 400 45531 b1db mm/memcontrol.o This patch (of 4): The memcg charge API charges pages before they are rmapped - i.e. have an actual "type" - and so every callsite needs its own set of charge and uncharge functions to know what type is being operated on. Worse, uncharge has to happen from a context that is still type-specific, rather than at the end of the page's lifetime with exclusive access, and so requires a lot of synchronization. Rewrite the charge API to provide a generic set of try_charge(), commit_charge() and cancel_charge() transaction operations, much like what's currently done for swap-in: mem_cgroup_try_charge() attempts to reserve a charge, reclaiming pages from the memcg if necessary. mem_cgroup_commit_charge() commits the page to the charge once it has a valid page->mapping and PageAnon() reliably tells the type. mem_cgroup_cancel_charge() aborts the transaction. This reduces the charge API and enables subsequent patches to drastically simplify uncharging. As pages need to be committed after rmap is established but before they are added to the LRU, page_add_new_anon_rmap() must stop doing LRU additions again. Revive lru_cache_add_active_or_unevictable(). [hughd@google.com: fix shmem_unuse] [hughd@google.com: Add comments on the private use of -EAGAIN] Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Tejun Heo <tj@kernel.org> Cc: Vladimir Davydov <vdavydov@parallels.com> Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/memcontrol.h53
-rw-r--r--include/linux/swap.h3
2 files changed, 17 insertions, 39 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index eb65d29516ca..1a9a096858e0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -54,28 +54,11 @@ struct mem_cgroup_reclaim_cookie {
54}; 54};
55 55
56#ifdef CONFIG_MEMCG 56#ifdef CONFIG_MEMCG
57/* 57int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
58 * All "charge" functions with gfp_mask should use GFP_KERNEL or 58 gfp_t gfp_mask, struct mem_cgroup **memcgp);
59 * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't 59void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
60 * alloc memory but reclaims memory from all available zones. So, "where I want 60 bool lrucare);
61 * memory from" bits of gfp_mask has no meaning. So any bits of that field is 61void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
62 * available but adding a rule is better. charge functions' gfp_mask should
63 * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous
64 * codes.
65 * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
66 */
67
68extern int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm,
69 gfp_t gfp_mask);
70/* for swap handling */
71extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
72 struct page *page, gfp_t mask, struct mem_cgroup **memcgp);
73extern void mem_cgroup_commit_charge_swapin(struct page *page,
74 struct mem_cgroup *memcg);
75extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg);
76
77extern int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
78 gfp_t gfp_mask);
79 62
80struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); 63struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
81struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); 64struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
@@ -233,30 +216,22 @@ void mem_cgroup_print_bad_page(struct page *page);
233#else /* CONFIG_MEMCG */ 216#else /* CONFIG_MEMCG */
234struct mem_cgroup; 217struct mem_cgroup;
235 218
236static inline int mem_cgroup_charge_anon(struct page *page, 219static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
237 struct mm_struct *mm, gfp_t gfp_mask) 220 gfp_t gfp_mask,
238{ 221 struct mem_cgroup **memcgp)
239 return 0;
240}
241
242static inline int mem_cgroup_charge_file(struct page *page,
243 struct mm_struct *mm, gfp_t gfp_mask)
244{
245 return 0;
246}
247
248static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
249 struct page *page, gfp_t gfp_mask, struct mem_cgroup **memcgp)
250{ 222{
223 *memcgp = NULL;
251 return 0; 224 return 0;
252} 225}
253 226
254static inline void mem_cgroup_commit_charge_swapin(struct page *page, 227static inline void mem_cgroup_commit_charge(struct page *page,
255 struct mem_cgroup *memcg) 228 struct mem_cgroup *memcg,
229 bool lrucare)
256{ 230{
257} 231}
258 232
259static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg) 233static inline void mem_cgroup_cancel_charge(struct page *page,
234 struct mem_cgroup *memcg)
260{ 235{
261} 236}
262 237
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1eb64043c076..46a649e4e8cd 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -320,6 +320,9 @@ extern void swap_setup(void);
320 320
321extern void add_page_to_unevictable_list(struct page *page); 321extern void add_page_to_unevictable_list(struct page *page);
322 322
323extern void lru_cache_add_active_or_unevictable(struct page *page,
324 struct vm_area_struct *vma);
325
323/* linux/mm/vmscan.c */ 326/* linux/mm/vmscan.c */
324extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, 327extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
325 gfp_t gfp_mask, nodemask_t *mask); 328 gfp_t gfp_mask, nodemask_t *mask);