aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2009-01-07 21:08:01 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 11:31:05 -0500
commit08e552c69c6930d64722de3ec18c51844d06ee28 (patch)
treea744d57ed4b23401115f1033dcaac9e85d550e09 /include/linux
parent8c7c6e34a1256a5082d38c8e9bd1474476912715 (diff)
memcg: synchronized LRU
A big patch for changing memcg's LRU semantics. Now, - page_cgroup is linked to mem_cgroup's its own LRU (per zone). - LRU of page_cgroup is not synchronous with global LRU. - page and page_cgroup is one-to-one and statically allocated. - To find page_cgroup is on what LRU, you have to check pc->mem_cgroup as - lru = page_cgroup_zoneinfo(pc, nid_of_pc, zid_of_pc); - SwapCache is handled. And, when we handle LRU list of page_cgroup, we do following. pc = lookup_page_cgroup(page); lock_page_cgroup(pc); .....................(1) mz = page_cgroup_zoneinfo(pc); spin_lock(&mz->lru_lock); .....add to LRU spin_unlock(&mz->lru_lock); unlock_page_cgroup(pc); But (1) is spin_lock and we have to be afraid of dead-lock with zone->lru_lock. So, trylock() is used at (1), now. Without (1), we can't trust "mz" is correct. This is a trial to remove this dirty nesting of locks. This patch changes mz->lru_lock to be zone->lru_lock. Then, above sequence will be written as spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU mem_cgroup_add/remove/etc_lru() { pc = lookup_page_cgroup(page); mz = page_cgroup_zoneinfo(pc); if (PageCgroupUsed(pc)) { ....add to LRU } spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU This is much simpler. (*) We're safe even if we don't take lock_page_cgroup(pc). Because.. 1. When pc->mem_cgroup can be modified. - at charge. - at account_move(). 2. at charge the PCG_USED bit is not set before pc->mem_cgroup is fixed. 3. at account_move() the page is isolated and not on LRU. Pros. - easy for maintenance. - memcg can make use of laziness of pagevec. - we don't have to duplicated LRU/Active/Unevictable bit in page_cgroup. - LRU status of memcg will be synchronized with global LRU's one. - # of locks are reduced. - account_move() is simplified very much. Cons. - may increase cost of LRU rotation. (no impact if memcg is not configured.) Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Pavel Emelyanov <xemul@openvz.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/memcontrol.h29
-rw-r--r--include/linux/mm_inline.h3
-rw-r--r--include/linux/page_cgroup.h17
3 files changed, 30 insertions, 19 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ca51ac72d6c0..32c07b1852d6 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -40,7 +40,12 @@ extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
40 40
41extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 41extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
42 gfp_t gfp_mask); 42 gfp_t gfp_mask);
43extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru); 43extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru);
44extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru);
45extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru);
46extern void mem_cgroup_del_lru(struct page *page);
47extern void mem_cgroup_move_lists(struct page *page,
48 enum lru_list from, enum lru_list to);
44extern void mem_cgroup_uncharge_page(struct page *page); 49extern void mem_cgroup_uncharge_page(struct page *page);
45extern void mem_cgroup_uncharge_cache_page(struct page *page); 50extern void mem_cgroup_uncharge_cache_page(struct page *page);
46extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); 51extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
@@ -131,7 +136,27 @@ static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
131 return 0; 136 return 0;
132} 137}
133 138
134static inline void mem_cgroup_move_lists(struct page *page, bool active) 139static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
140{
141}
142
143static inline void mem_cgroup_del_lru_list(struct page *page, int lru)
144{
145 return ;
146}
147
148static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru)
149{
150 return ;
151}
152
153static inline void mem_cgroup_del_lru(struct page *page)
154{
155 return ;
156}
157
158static inline void
159mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to)
135{ 160{
136} 161}
137 162
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index c948350c378e..37ef13d0f01e 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -28,6 +28,7 @@ add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
28{ 28{
29 list_add(&page->lru, &zone->lru[l].list); 29 list_add(&page->lru, &zone->lru[l].list);
30 __inc_zone_state(zone, NR_LRU_BASE + l); 30 __inc_zone_state(zone, NR_LRU_BASE + l);
31 mem_cgroup_add_lru_list(page, l);
31} 32}
32 33
33static inline void 34static inline void
@@ -35,6 +36,7 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
35{ 36{
36 list_del(&page->lru); 37 list_del(&page->lru);
37 __dec_zone_state(zone, NR_LRU_BASE + l); 38 __dec_zone_state(zone, NR_LRU_BASE + l);
39 mem_cgroup_del_lru_list(page, l);
38} 40}
39 41
40static inline void 42static inline void
@@ -54,6 +56,7 @@ del_page_from_lru(struct zone *zone, struct page *page)
54 l += page_is_file_cache(page); 56 l += page_is_file_cache(page);
55 } 57 }
56 __dec_zone_state(zone, NR_LRU_BASE + l); 58 __dec_zone_state(zone, NR_LRU_BASE + l);
59 mem_cgroup_del_lru_list(page, l);
57} 60}
58 61
59/** 62/**
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index d754b2dfbf2d..602cc1fdee90 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -26,10 +26,6 @@ enum {
26 PCG_LOCK, /* page cgroup is locked */ 26 PCG_LOCK, /* page cgroup is locked */
27 PCG_CACHE, /* charged as cache */ 27 PCG_CACHE, /* charged as cache */
28 PCG_USED, /* this object is in use. */ 28 PCG_USED, /* this object is in use. */
29 /* flags for LRU placement */
30 PCG_ACTIVE, /* page is active in this cgroup */
31 PCG_FILE, /* page is file system backed */
32 PCG_UNEVICTABLE, /* page is unevictableable */
33}; 29};
34 30
35#define TESTPCGFLAG(uname, lname) \ 31#define TESTPCGFLAG(uname, lname) \
@@ -50,19 +46,6 @@ TESTPCGFLAG(Cache, CACHE)
50TESTPCGFLAG(Used, USED) 46TESTPCGFLAG(Used, USED)
51CLEARPCGFLAG(Used, USED) 47CLEARPCGFLAG(Used, USED)
52 48
53/* LRU management flags (from global-lru definition) */
54TESTPCGFLAG(File, FILE)
55SETPCGFLAG(File, FILE)
56CLEARPCGFLAG(File, FILE)
57
58TESTPCGFLAG(Active, ACTIVE)
59SETPCGFLAG(Active, ACTIVE)
60CLEARPCGFLAG(Active, ACTIVE)
61
62TESTPCGFLAG(Unevictable, UNEVICTABLE)
63SETPCGFLAG(Unevictable, UNEVICTABLE)
64CLEARPCGFLAG(Unevictable, UNEVICTABLE)
65
66static inline int page_cgroup_nid(struct page_cgroup *pc) 49static inline int page_cgroup_nid(struct page_cgroup *pc)
67{ 50{
68 return page_to_nid(pc->page); 51 return page_to_nid(pc->page);