diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2009-01-07 21:08:01 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-08 11:31:05 -0500 |
commit | 08e552c69c6930d64722de3ec18c51844d06ee28 (patch) | |
tree | a744d57ed4b23401115f1033dcaac9e85d550e09 /include/linux | |
parent | 8c7c6e34a1256a5082d38c8e9bd1474476912715 (diff) |
memcg: synchronized LRU
A big patch for changing memcg's LRU semantics.
Now,
- page_cgroup is linked to mem_cgroup's its own LRU (per zone).
- LRU of page_cgroup is not synchronous with global LRU.
- page and page_cgroup is one-to-one and statically allocated.
- To find page_cgroup is on what LRU, you have to check pc->mem_cgroup as
- lru = page_cgroup_zoneinfo(pc, nid_of_pc, zid_of_pc);
- SwapCache is handled.
And, when we handle LRU list of page_cgroup, we do following.
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc); .....................(1)
mz = page_cgroup_zoneinfo(pc);
spin_lock(&mz->lru_lock);
.....add to LRU
spin_unlock(&mz->lru_lock);
unlock_page_cgroup(pc);
But (1) is spin_lock and we have to be afraid of dead-lock with zone->lru_lock.
So, trylock() is used at (1), now. Without (1), we can't trust "mz" is correct.
This is a trial to remove this dirty nesting of locks.
This patch changes mz->lru_lock to be zone->lru_lock.
Then, above sequence will be written as
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
mem_cgroup_add/remove/etc_lru() {
pc = lookup_page_cgroup(page);
mz = page_cgroup_zoneinfo(pc);
if (PageCgroupUsed(pc)) {
....add to LRU
}
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
This is much simpler.
(*) We're safe even if we don't take lock_page_cgroup(pc). Because..
1. When pc->mem_cgroup can be modified.
- at charge.
- at account_move().
2. at charge
the PCG_USED bit is not set before pc->mem_cgroup is fixed.
3. at account_move()
the page is isolated and not on LRU.
Pros.
- easy for maintenance.
- memcg can make use of laziness of pagevec.
- we don't have to duplicated LRU/Active/Unevictable bit in page_cgroup.
- LRU status of memcg will be synchronized with global LRU's one.
- # of locks are reduced.
- account_move() is simplified very much.
Cons.
- may increase cost of LRU rotation.
(no impact if memcg is not configured.)
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/memcontrol.h | 29 | ||||
-rw-r--r-- | include/linux/mm_inline.h | 3 | ||||
-rw-r--r-- | include/linux/page_cgroup.h | 17 |
3 files changed, 30 insertions, 19 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ca51ac72d6c0..32c07b1852d6 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -40,7 +40,12 @@ extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr); | |||
40 | 40 | ||
41 | extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | 41 | extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
42 | gfp_t gfp_mask); | 42 | gfp_t gfp_mask); |
43 | extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru); | 43 | extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru); |
44 | extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru); | ||
45 | extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru); | ||
46 | extern void mem_cgroup_del_lru(struct page *page); | ||
47 | extern void mem_cgroup_move_lists(struct page *page, | ||
48 | enum lru_list from, enum lru_list to); | ||
44 | extern void mem_cgroup_uncharge_page(struct page *page); | 49 | extern void mem_cgroup_uncharge_page(struct page *page); |
45 | extern void mem_cgroup_uncharge_cache_page(struct page *page); | 50 | extern void mem_cgroup_uncharge_cache_page(struct page *page); |
46 | extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); | 51 | extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); |
@@ -131,7 +136,27 @@ static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) | |||
131 | return 0; | 136 | return 0; |
132 | } | 137 | } |
133 | 138 | ||
134 | static inline void mem_cgroup_move_lists(struct page *page, bool active) | 139 | static inline void mem_cgroup_add_lru_list(struct page *page, int lru) |
140 | { | ||
141 | } | ||
142 | |||
143 | static inline void mem_cgroup_del_lru_list(struct page *page, int lru) | ||
144 | { | ||
145 | return ; | ||
146 | } | ||
147 | |||
148 | static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru) | ||
149 | { | ||
150 | return ; | ||
151 | } | ||
152 | |||
153 | static inline void mem_cgroup_del_lru(struct page *page) | ||
154 | { | ||
155 | return ; | ||
156 | } | ||
157 | |||
158 | static inline void | ||
159 | mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to) | ||
135 | { | 160 | { |
136 | } | 161 | } |
137 | 162 | ||
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index c948350c378e..37ef13d0f01e 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h | |||
@@ -28,6 +28,7 @@ add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l) | |||
28 | { | 28 | { |
29 | list_add(&page->lru, &zone->lru[l].list); | 29 | list_add(&page->lru, &zone->lru[l].list); |
30 | __inc_zone_state(zone, NR_LRU_BASE + l); | 30 | __inc_zone_state(zone, NR_LRU_BASE + l); |
31 | mem_cgroup_add_lru_list(page, l); | ||
31 | } | 32 | } |
32 | 33 | ||
33 | static inline void | 34 | static inline void |
@@ -35,6 +36,7 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l) | |||
35 | { | 36 | { |
36 | list_del(&page->lru); | 37 | list_del(&page->lru); |
37 | __dec_zone_state(zone, NR_LRU_BASE + l); | 38 | __dec_zone_state(zone, NR_LRU_BASE + l); |
39 | mem_cgroup_del_lru_list(page, l); | ||
38 | } | 40 | } |
39 | 41 | ||
40 | static inline void | 42 | static inline void |
@@ -54,6 +56,7 @@ del_page_from_lru(struct zone *zone, struct page *page) | |||
54 | l += page_is_file_cache(page); | 56 | l += page_is_file_cache(page); |
55 | } | 57 | } |
56 | __dec_zone_state(zone, NR_LRU_BASE + l); | 58 | __dec_zone_state(zone, NR_LRU_BASE + l); |
59 | mem_cgroup_del_lru_list(page, l); | ||
57 | } | 60 | } |
58 | 61 | ||
59 | /** | 62 | /** |
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index d754b2dfbf2d..602cc1fdee90 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h | |||
@@ -26,10 +26,6 @@ enum { | |||
26 | PCG_LOCK, /* page cgroup is locked */ | 26 | PCG_LOCK, /* page cgroup is locked */ |
27 | PCG_CACHE, /* charged as cache */ | 27 | PCG_CACHE, /* charged as cache */ |
28 | PCG_USED, /* this object is in use. */ | 28 | PCG_USED, /* this object is in use. */ |
29 | /* flags for LRU placement */ | ||
30 | PCG_ACTIVE, /* page is active in this cgroup */ | ||
31 | PCG_FILE, /* page is file system backed */ | ||
32 | PCG_UNEVICTABLE, /* page is unevictableable */ | ||
33 | }; | 29 | }; |
34 | 30 | ||
35 | #define TESTPCGFLAG(uname, lname) \ | 31 | #define TESTPCGFLAG(uname, lname) \ |
@@ -50,19 +46,6 @@ TESTPCGFLAG(Cache, CACHE) | |||
50 | TESTPCGFLAG(Used, USED) | 46 | TESTPCGFLAG(Used, USED) |
51 | CLEARPCGFLAG(Used, USED) | 47 | CLEARPCGFLAG(Used, USED) |
52 | 48 | ||
53 | /* LRU management flags (from global-lru definition) */ | ||
54 | TESTPCGFLAG(File, FILE) | ||
55 | SETPCGFLAG(File, FILE) | ||
56 | CLEARPCGFLAG(File, FILE) | ||
57 | |||
58 | TESTPCGFLAG(Active, ACTIVE) | ||
59 | SETPCGFLAG(Active, ACTIVE) | ||
60 | CLEARPCGFLAG(Active, ACTIVE) | ||
61 | |||
62 | TESTPCGFLAG(Unevictable, UNEVICTABLE) | ||
63 | SETPCGFLAG(Unevictable, UNEVICTABLE) | ||
64 | CLEARPCGFLAG(Unevictable, UNEVICTABLE) | ||
65 | |||
66 | static inline int page_cgroup_nid(struct page_cgroup *pc) | 49 | static inline int page_cgroup_nid(struct page_cgroup *pc) |
67 | { | 50 | { |
68 | return page_to_nid(pc->page); | 51 | return page_to_nid(pc->page); |