diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2009-01-07 21:08:01 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-08 11:31:05 -0500 |
commit | 08e552c69c6930d64722de3ec18c51844d06ee28 (patch) | |
tree | a744d57ed4b23401115f1033dcaac9e85d550e09 /mm/vmscan.c | |
parent | 8c7c6e34a1256a5082d38c8e9bd1474476912715 (diff) |
memcg: synchronized LRU
A big patch for changing memcg's LRU semantics.
Now,
- page_cgroup is linked to mem_cgroup's its own LRU (per zone).
- LRU of page_cgroup is not synchronous with global LRU.
- page and page_cgroup is one-to-one and statically allocated.
- To find page_cgroup is on what LRU, you have to check pc->mem_cgroup as
- lru = page_cgroup_zoneinfo(pc, nid_of_pc, zid_of_pc);
- SwapCache is handled.
And, when we handle LRU list of page_cgroup, we do following.
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc); .....................(1)
mz = page_cgroup_zoneinfo(pc);
spin_lock(&mz->lru_lock);
.....add to LRU
spin_unlock(&mz->lru_lock);
unlock_page_cgroup(pc);
But (1) is spin_lock and we have to be afraid of dead-lock with zone->lru_lock.
So, trylock() is used at (1), now. Without (1), we can't trust "mz" is correct.
This is a trial to remove this dirty nesting of locks.
This patch changes mz->lru_lock to be zone->lru_lock.
Then, above sequence will be written as
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
mem_cgroup_add/remove/etc_lru() {
pc = lookup_page_cgroup(page);
mz = page_cgroup_zoneinfo(pc);
if (PageCgroupUsed(pc)) {
....add to LRU
}
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
This is much simpler.
(*) We're safe even if we don't take lock_page_cgroup(pc). Because..
1. When pc->mem_cgroup can be modified.
- at charge.
- at account_move().
2. at charge
the PCG_USED bit is not set before pc->mem_cgroup is fixed.
3. at account_move()
the page is isolated and not on LRU.
Pros.
- easy for maintenance.
- memcg can make use of laziness of pagevec.
- we don't have to duplicated LRU/Active/Unevictable bit in page_cgroup.
- LRU status of memcg will be synchronized with global LRU's one.
- # of locks are reduced.
- account_move() is simplified very much.
Cons.
- may increase cost of LRU rotation.
(no impact if memcg is not configured.)
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index f63b20dd7714..45983af1de3d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -512,7 +512,6 @@ redo: | |||
512 | lru = LRU_UNEVICTABLE; | 512 | lru = LRU_UNEVICTABLE; |
513 | add_page_to_unevictable_list(page); | 513 | add_page_to_unevictable_list(page); |
514 | } | 514 | } |
515 | mem_cgroup_move_lists(page, lru); | ||
516 | 515 | ||
517 | /* | 516 | /* |
518 | * page's status can change while we move it among lru. If an evictable | 517 | * page's status can change while we move it among lru. If an evictable |
@@ -547,7 +546,6 @@ void putback_lru_page(struct page *page) | |||
547 | 546 | ||
548 | lru = !!TestClearPageActive(page) + page_is_file_cache(page); | 547 | lru = !!TestClearPageActive(page) + page_is_file_cache(page); |
549 | lru_cache_add_lru(page, lru); | 548 | lru_cache_add_lru(page, lru); |
550 | mem_cgroup_move_lists(page, lru); | ||
551 | put_page(page); | 549 | put_page(page); |
552 | } | 550 | } |
553 | #endif /* CONFIG_UNEVICTABLE_LRU */ | 551 | #endif /* CONFIG_UNEVICTABLE_LRU */ |
@@ -813,6 +811,7 @@ int __isolate_lru_page(struct page *page, int mode, int file) | |||
813 | return ret; | 811 | return ret; |
814 | 812 | ||
815 | ret = -EBUSY; | 813 | ret = -EBUSY; |
814 | |||
816 | if (likely(get_page_unless_zero(page))) { | 815 | if (likely(get_page_unless_zero(page))) { |
817 | /* | 816 | /* |
818 | * Be careful not to clear PageLRU until after we're | 817 | * Be careful not to clear PageLRU until after we're |
@@ -821,6 +820,7 @@ int __isolate_lru_page(struct page *page, int mode, int file) | |||
821 | */ | 820 | */ |
822 | ClearPageLRU(page); | 821 | ClearPageLRU(page); |
823 | ret = 0; | 822 | ret = 0; |
823 | mem_cgroup_del_lru(page); | ||
824 | } | 824 | } |
825 | 825 | ||
826 | return ret; | 826 | return ret; |
@@ -1134,7 +1134,6 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1134 | SetPageLRU(page); | 1134 | SetPageLRU(page); |
1135 | lru = page_lru(page); | 1135 | lru = page_lru(page); |
1136 | add_page_to_lru_list(zone, page, lru); | 1136 | add_page_to_lru_list(zone, page, lru); |
1137 | mem_cgroup_move_lists(page, lru); | ||
1138 | if (PageActive(page) && scan_global_lru(sc)) { | 1137 | if (PageActive(page) && scan_global_lru(sc)) { |
1139 | int file = !!page_is_file_cache(page); | 1138 | int file = !!page_is_file_cache(page); |
1140 | zone->recent_rotated[file]++; | 1139 | zone->recent_rotated[file]++; |
@@ -1263,7 +1262,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1263 | ClearPageActive(page); | 1262 | ClearPageActive(page); |
1264 | 1263 | ||
1265 | list_move(&page->lru, &zone->lru[lru].list); | 1264 | list_move(&page->lru, &zone->lru[lru].list); |
1266 | mem_cgroup_move_lists(page, lru); | 1265 | mem_cgroup_add_lru_list(page, lru); |
1267 | pgmoved++; | 1266 | pgmoved++; |
1268 | if (!pagevec_add(&pvec, page)) { | 1267 | if (!pagevec_add(&pvec, page)) { |
1269 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); | 1268 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); |
@@ -2408,6 +2407,7 @@ retry: | |||
2408 | 2407 | ||
2409 | __dec_zone_state(zone, NR_UNEVICTABLE); | 2408 | __dec_zone_state(zone, NR_UNEVICTABLE); |
2410 | list_move(&page->lru, &zone->lru[l].list); | 2409 | list_move(&page->lru, &zone->lru[l].list); |
2410 | mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l); | ||
2411 | __inc_zone_state(zone, NR_INACTIVE_ANON + l); | 2411 | __inc_zone_state(zone, NR_INACTIVE_ANON + l); |
2412 | __count_vm_event(UNEVICTABLE_PGRESCUED); | 2412 | __count_vm_event(UNEVICTABLE_PGRESCUED); |
2413 | } else { | 2413 | } else { |
@@ -2416,6 +2416,7 @@ retry: | |||
2416 | */ | 2416 | */ |
2417 | SetPageUnevictable(page); | 2417 | SetPageUnevictable(page); |
2418 | list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list); | 2418 | list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list); |
2419 | mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE); | ||
2419 | if (page_evictable(page, NULL)) | 2420 | if (page_evictable(page, NULL)) |
2420 | goto retry; | 2421 | goto retry; |
2421 | } | 2422 | } |