aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-08-08 17:19:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-08 18:57:17 -0400
commit0a31bc97c80c3fa87b32c091d9a930ac19cd0c40 (patch)
tree06dafd237309f9b8ded980eb420a5377989e2c0b /include/linux
parent00501b531c4723972aa11d6d4ebcf8d6552007c8 (diff)
mm: memcontrol: rewrite uncharge API
The memcg uncharging code that is involved towards the end of a page's lifetime - truncation, reclaim, swapout, migration - is impressively complicated and fragile. Because anonymous and file pages were always charged before they had their page->mapping established, uncharges had to happen when the page type could still be known from the context; as in unmap for anonymous, page cache removal for file and shmem pages, and swap cache truncation for swap pages. However, these operations happen well before the page is actually freed, and so a lot of synchronization is necessary: - Charging, uncharging, page migration, and charge migration all need to take a per-page bit spinlock as they could race with uncharging. - Swap cache truncation happens during both swap-in and swap-out, and possibly repeatedly before the page is actually freed. This means that the memcg swapout code is called from many contexts that make no sense and it has to figure out the direction from page state to make sure memory and memory+swap are always correctly charged. - On page migration, the old page might be unmapped but then reused, so memcg code has to prevent untimely uncharging in that case. Because this code - which should be a simple charge transfer - is so special-cased, it is not reusable for replace_page_cache(). But now that charged pages always have a page->mapping, introduce mem_cgroup_uncharge(), which is called after the final put_page(), when we know for sure that nobody is looking at the page anymore. For page migration, introduce mem_cgroup_migrate(), which is called after the migration is successful and the new page is fully rmapped. Because the old page is no longer uncharged after migration, prevent double charges by decoupling the page's memcg association (PCG_USED and pc->mem_cgroup) from the page holding an actual charge. The new bits PCG_MEM and PCG_MEMSW represent the respective charges and are transferred to the new page during migration. mem_cgroup_migrate() is suitable for replace_page_cache() as well, which gets rid of mem_cgroup_replace_page_cache(). However, care needs to be taken because both the source and the target page can already be charged and on the LRU when fuse is splicing: grab the page lock on the charge moving side to prevent changing pc->mem_cgroup of a page under migration. Also, the lruvecs of both pages change as we uncharge the old and charge the new during migration, and putback may race with us, so grab the lru lock and isolate the pages iff on LRU to prevent races and ensure the pages are on the right lruvec afterward. Swap accounting is massively simplified: because the page is no longer uncharged as early as swap cache deletion, a new mem_cgroup_swapout() can transfer the page's memory+swap charge (PCG_MEMSW) to the swap entry before the final put_page() in page reclaim. Finally, page_cgroup changes are now protected by whatever protection the page itself offers: anonymous pages are charged under the page table lock, whereas page cache insertions, swapin, and migration hold the page lock. Uncharging happens under full exclusion with no outstanding references. Charging and uncharging also ensure that the page is off-LRU, which serializes against charge migration. Remove the very costly page_cgroup lock and set pc->flags non-atomically. [mhocko@suse.cz: mem_cgroup_charge_statistics needs preempt_disable] [vdavydov@parallels.com: fix flags definition] Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Hugh Dickins <hughd@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: Vladimir Davydov <vdavydov@parallels.com> Tested-by: Jet Chen <jet.chen@intel.com> Acked-by: Michal Hocko <mhocko@suse.cz> Tested-by: Felipe Balbi <balbi@ti.com> Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/memcontrol.h49
-rw-r--r--include/linux/page_cgroup.h43
-rw-r--r--include/linux/swap.h12
3 files changed, 28 insertions, 76 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1a9a096858e0..806b8fa15c5f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -60,15 +60,17 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
60 bool lrucare); 60 bool lrucare);
61void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg); 61void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
62 62
63struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); 63void mem_cgroup_uncharge(struct page *page);
64struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); 64
65/* Batched uncharging */
66void mem_cgroup_uncharge_start(void);
67void mem_cgroup_uncharge_end(void);
65 68
66/* For coalescing uncharge for reducing memcg' overhead*/ 69void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
67extern void mem_cgroup_uncharge_start(void); 70 bool lrucare);
68extern void mem_cgroup_uncharge_end(void);
69 71
70extern void mem_cgroup_uncharge_page(struct page *page); 72struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
71extern void mem_cgroup_uncharge_cache_page(struct page *page); 73struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
72 74
73bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, 75bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
74 struct mem_cgroup *memcg); 76 struct mem_cgroup *memcg);
@@ -96,12 +98,6 @@ bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
96 98
97extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg); 99extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
98 100
99extern void
100mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
101 struct mem_cgroup **memcgp);
102extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
103 struct page *oldpage, struct page *newpage, bool migration_ok);
104
105struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, 101struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
106 struct mem_cgroup *, 102 struct mem_cgroup *,
107 struct mem_cgroup_reclaim_cookie *); 103 struct mem_cgroup_reclaim_cookie *);
@@ -116,8 +112,6 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
116void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int); 112void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int);
117extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, 113extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
118 struct task_struct *p); 114 struct task_struct *p);
119extern void mem_cgroup_replace_page_cache(struct page *oldpage,
120 struct page *newpage);
121 115
122static inline void mem_cgroup_oom_enable(void) 116static inline void mem_cgroup_oom_enable(void)
123{ 117{
@@ -235,19 +229,21 @@ static inline void mem_cgroup_cancel_charge(struct page *page,
235{ 229{
236} 230}
237 231
238static inline void mem_cgroup_uncharge_start(void) 232static inline void mem_cgroup_uncharge(struct page *page)
239{ 233{
240} 234}
241 235
242static inline void mem_cgroup_uncharge_end(void) 236static inline void mem_cgroup_uncharge_start(void)
243{ 237{
244} 238}
245 239
246static inline void mem_cgroup_uncharge_page(struct page *page) 240static inline void mem_cgroup_uncharge_end(void)
247{ 241{
248} 242}
249 243
250static inline void mem_cgroup_uncharge_cache_page(struct page *page) 244static inline void mem_cgroup_migrate(struct page *oldpage,
245 struct page *newpage,
246 bool lrucare)
251{ 247{
252} 248}
253 249
@@ -286,17 +282,6 @@ static inline struct cgroup_subsys_state
286 return NULL; 282 return NULL;
287} 283}
288 284
289static inline void
290mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
291 struct mem_cgroup **memcgp)
292{
293}
294
295static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
296 struct page *oldpage, struct page *newpage, bool migration_ok)
297{
298}
299
300static inline struct mem_cgroup * 285static inline struct mem_cgroup *
301mem_cgroup_iter(struct mem_cgroup *root, 286mem_cgroup_iter(struct mem_cgroup *root,
302 struct mem_cgroup *prev, 287 struct mem_cgroup *prev,
@@ -392,10 +377,6 @@ static inline
392void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) 377void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
393{ 378{
394} 379}
395static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
396 struct page *newpage)
397{
398}
399#endif /* CONFIG_MEMCG */ 380#endif /* CONFIG_MEMCG */
400 381
401#if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM) 382#if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM)
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 777a524716db..9bfb8e68a595 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -3,9 +3,9 @@
3 3
4enum { 4enum {
5 /* flags for mem_cgroup */ 5 /* flags for mem_cgroup */
6 PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */ 6 PCG_USED = 0x01, /* This page is charged to a memcg */
7 PCG_USED, /* this object is in use. */ 7 PCG_MEM = 0x02, /* This page holds a memory charge */
8 PCG_MIGRATION, /* under page migration */ 8 PCG_MEMSW = 0x04, /* This page holds a memory+swap charge */
9 __NR_PCG_FLAGS, 9 __NR_PCG_FLAGS,
10}; 10};
11 11
@@ -44,42 +44,9 @@ static inline void __init page_cgroup_init(void)
44struct page_cgroup *lookup_page_cgroup(struct page *page); 44struct page_cgroup *lookup_page_cgroup(struct page *page);
45struct page *lookup_cgroup_page(struct page_cgroup *pc); 45struct page *lookup_cgroup_page(struct page_cgroup *pc);
46 46
47#define TESTPCGFLAG(uname, lname) \ 47static inline int PageCgroupUsed(struct page_cgroup *pc)
48static inline int PageCgroup##uname(struct page_cgroup *pc) \
49 { return test_bit(PCG_##lname, &pc->flags); }
50
51#define SETPCGFLAG(uname, lname) \
52static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
53 { set_bit(PCG_##lname, &pc->flags); }
54
55#define CLEARPCGFLAG(uname, lname) \
56static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
57 { clear_bit(PCG_##lname, &pc->flags); }
58
59#define TESTCLEARPCGFLAG(uname, lname) \
60static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \
61 { return test_and_clear_bit(PCG_##lname, &pc->flags); }
62
63TESTPCGFLAG(Used, USED)
64CLEARPCGFLAG(Used, USED)
65SETPCGFLAG(Used, USED)
66
67SETPCGFLAG(Migration, MIGRATION)
68CLEARPCGFLAG(Migration, MIGRATION)
69TESTPCGFLAG(Migration, MIGRATION)
70
71static inline void lock_page_cgroup(struct page_cgroup *pc)
72{
73 /*
74 * Don't take this lock in IRQ context.
75 * This lock is for pc->mem_cgroup, USED, MIGRATION
76 */
77 bit_spin_lock(PCG_LOCK, &pc->flags);
78}
79
80static inline void unlock_page_cgroup(struct page_cgroup *pc)
81{ 48{
82 bit_spin_unlock(PCG_LOCK, &pc->flags); 49 return !!(pc->flags & PCG_USED);
83} 50}
84 51
85#else /* CONFIG_MEMCG */ 52#else /* CONFIG_MEMCG */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 46a649e4e8cd..1b72060f093a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -381,9 +381,13 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
381} 381}
382#endif 382#endif
383#ifdef CONFIG_MEMCG_SWAP 383#ifdef CONFIG_MEMCG_SWAP
384extern void mem_cgroup_uncharge_swap(swp_entry_t ent); 384extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
385extern void mem_cgroup_uncharge_swap(swp_entry_t entry);
385#else 386#else
386static inline void mem_cgroup_uncharge_swap(swp_entry_t ent) 387static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
388{
389}
390static inline void mem_cgroup_uncharge_swap(swp_entry_t entry)
387{ 391{
388} 392}
389#endif 393#endif
@@ -443,7 +447,7 @@ extern void swap_shmem_alloc(swp_entry_t);
443extern int swap_duplicate(swp_entry_t); 447extern int swap_duplicate(swp_entry_t);
444extern int swapcache_prepare(swp_entry_t); 448extern int swapcache_prepare(swp_entry_t);
445extern void swap_free(swp_entry_t); 449extern void swap_free(swp_entry_t);
446extern void swapcache_free(swp_entry_t, struct page *page); 450extern void swapcache_free(swp_entry_t);
447extern int free_swap_and_cache(swp_entry_t); 451extern int free_swap_and_cache(swp_entry_t);
448extern int swap_type_of(dev_t, sector_t, struct block_device **); 452extern int swap_type_of(dev_t, sector_t, struct block_device **);
449extern unsigned int count_swap_pages(int, int); 453extern unsigned int count_swap_pages(int, int);
@@ -507,7 +511,7 @@ static inline void swap_free(swp_entry_t swp)
507{ 511{
508} 512}
509 513
510static inline void swapcache_free(swp_entry_t swp, struct page *page) 514static inline void swapcache_free(swp_entry_t swp)
511{ 515{
512} 516}
513 517