diff options
author | Hugh Dickins <hughd@google.com> | 2012-01-12 20:19:52 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-12 23:13:09 -0500 |
commit | 12d27107867fc7216e8faaff0b894b0f162dcf75 (patch) | |
tree | 8f35d39f7e5d0b0f0ba10a3475e9fa4a2581e509 | |
parent | 0cee34fd72c582b4f8ad8ce00645b75fb4168199 (diff) |
memcg: fix split_huge_page_refcounts()
This patch started off as a cleanup: __split_huge_page_refcounts() has to
cope with two scenarios, when the hugepage being split is already on LRU,
and when it is not; but why does it have to split that accounting across
three different sites? Consolidate it in lru_add_page_tail(), handling
evictable and unevictable alike, and use standard add_page_to_lru_list()
when accounting is needed (when the head is not yet on LRU).
But a recent regression in -next, I guess the removal of PageCgroupAcctLRU
test from mem_cgroup_split_huge_fixup(), makes this now a necessary fix:
under load, the MEM_CGROUP_ZSTAT count was wrapping to a huge number,
messing up reclaim calculations and causing a freeze at rmdir of cgroup.
Add a VM_BUG_ON to mem_cgroup_lru_del_list() when we're about to wrap that
count - this has not been the only such incident. Document that
lru_add_page_tail() is for Transparent HugePages by #ifdef around it.
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/huge_memory.c | 10 | ||||
-rw-r--r-- | mm/memcontrol.c | 12 | ||||
-rw-r--r-- | mm/swap.c | 29 |
3 files changed, 21 insertions, 30 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 76cc3f7dd4f0..b3ffc21ce801 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1229,7 +1229,6 @@ static void __split_huge_page_refcount(struct page *page) | |||
1229 | { | 1229 | { |
1230 | int i; | 1230 | int i; |
1231 | struct zone *zone = page_zone(page); | 1231 | struct zone *zone = page_zone(page); |
1232 | int zonestat; | ||
1233 | int tail_count = 0; | 1232 | int tail_count = 0; |
1234 | 1233 | ||
1235 | /* prevent PageLRU to go away from under us, and freeze lru stats */ | 1234 | /* prevent PageLRU to go away from under us, and freeze lru stats */ |
@@ -1317,15 +1316,6 @@ static void __split_huge_page_refcount(struct page *page) | |||
1317 | __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); | 1316 | __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); |
1318 | __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); | 1317 | __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); |
1319 | 1318 | ||
1320 | /* | ||
1321 | * A hugepage counts for HPAGE_PMD_NR pages on the LRU statistics, | ||
1322 | * so adjust those appropriately if this page is on the LRU. | ||
1323 | */ | ||
1324 | if (PageLRU(page)) { | ||
1325 | zonestat = NR_LRU_BASE + page_lru(page); | ||
1326 | __mod_zone_page_state(zone, zonestat, -(HPAGE_PMD_NR-1)); | ||
1327 | } | ||
1328 | |||
1329 | ClearPageCompound(page); | 1319 | ClearPageCompound(page); |
1330 | compound_unlock(page); | 1320 | compound_unlock(page); |
1331 | spin_unlock_irq(&zone->lru_lock); | 1321 | spin_unlock_irq(&zone->lru_lock); |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 609c49f492e6..9f2f64697409 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1071,6 +1071,7 @@ void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru) | |||
1071 | VM_BUG_ON(!memcg); | 1071 | VM_BUG_ON(!memcg); |
1072 | mz = page_cgroup_zoneinfo(memcg, page); | 1072 | mz = page_cgroup_zoneinfo(memcg, page); |
1073 | /* huge page split is done under lru_lock. so, we have no races. */ | 1073 | /* huge page split is done under lru_lock. so, we have no races. */ |
1074 | VM_BUG_ON(MEM_CGROUP_ZSTAT(mz, lru) < (1 << compound_order(page))); | ||
1074 | MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page); | 1075 | MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page); |
1075 | } | 1076 | } |
1076 | 1077 | ||
@@ -2465,9 +2466,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, | |||
2465 | void mem_cgroup_split_huge_fixup(struct page *head) | 2466 | void mem_cgroup_split_huge_fixup(struct page *head) |
2466 | { | 2467 | { |
2467 | struct page_cgroup *head_pc = lookup_page_cgroup(head); | 2468 | struct page_cgroup *head_pc = lookup_page_cgroup(head); |
2468 | struct mem_cgroup_per_zone *mz; | ||
2469 | struct page_cgroup *pc; | 2469 | struct page_cgroup *pc; |
2470 | enum lru_list lru; | ||
2471 | int i; | 2470 | int i; |
2472 | 2471 | ||
2473 | if (mem_cgroup_disabled()) | 2472 | if (mem_cgroup_disabled()) |
@@ -2478,15 +2477,8 @@ void mem_cgroup_split_huge_fixup(struct page *head) | |||
2478 | smp_wmb();/* see __commit_charge() */ | 2477 | smp_wmb();/* see __commit_charge() */ |
2479 | pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; | 2478 | pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; |
2480 | } | 2479 | } |
2481 | /* | ||
2482 | * Tail pages will be added to LRU. | ||
2483 | * We hold lru_lock,then,reduce counter directly. | ||
2484 | */ | ||
2485 | lru = page_lru(head); | ||
2486 | mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head); | ||
2487 | MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1; | ||
2488 | } | 2480 | } |
2489 | #endif | 2481 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
2490 | 2482 | ||
2491 | /** | 2483 | /** |
2492 | * mem_cgroup_move_account - move account of the page | 2484 | * mem_cgroup_move_account - move account of the page |
@@ -650,6 +650,7 @@ void __pagevec_release(struct pagevec *pvec) | |||
650 | 650 | ||
651 | EXPORT_SYMBOL(__pagevec_release); | 651 | EXPORT_SYMBOL(__pagevec_release); |
652 | 652 | ||
653 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
653 | /* used by __split_huge_page_refcount() */ | 654 | /* used by __split_huge_page_refcount() */ |
654 | void lru_add_page_tail(struct zone* zone, | 655 | void lru_add_page_tail(struct zone* zone, |
655 | struct page *page, struct page *page_tail) | 656 | struct page *page, struct page *page_tail) |
@@ -666,8 +667,6 @@ void lru_add_page_tail(struct zone* zone, | |||
666 | SetPageLRU(page_tail); | 667 | SetPageLRU(page_tail); |
667 | 668 | ||
668 | if (page_evictable(page_tail, NULL)) { | 669 | if (page_evictable(page_tail, NULL)) { |
669 | struct lruvec *lruvec; | ||
670 | |||
671 | if (PageActive(page)) { | 670 | if (PageActive(page)) { |
672 | SetPageActive(page_tail); | 671 | SetPageActive(page_tail); |
673 | active = 1; | 672 | active = 1; |
@@ -677,18 +676,28 @@ void lru_add_page_tail(struct zone* zone, | |||
677 | lru = LRU_INACTIVE_ANON; | 676 | lru = LRU_INACTIVE_ANON; |
678 | } | 677 | } |
679 | update_page_reclaim_stat(zone, page_tail, file, active); | 678 | update_page_reclaim_stat(zone, page_tail, file, active); |
680 | lruvec = mem_cgroup_lru_add_list(zone, page_tail, lru); | ||
681 | if (likely(PageLRU(page))) | ||
682 | list_add(&page_tail->lru, page->lru.prev); | ||
683 | else | ||
684 | list_add(&page_tail->lru, lruvec->lists[lru].prev); | ||
685 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, | ||
686 | hpage_nr_pages(page_tail)); | ||
687 | } else { | 679 | } else { |
688 | SetPageUnevictable(page_tail); | 680 | SetPageUnevictable(page_tail); |
689 | add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE); | 681 | lru = LRU_UNEVICTABLE; |
682 | } | ||
683 | |||
684 | if (likely(PageLRU(page))) | ||
685 | list_add_tail(&page_tail->lru, &page->lru); | ||
686 | else { | ||
687 | struct list_head *list_head; | ||
688 | /* | ||
689 | * Head page has not yet been counted, as an hpage, | ||
690 | * so we must account for each subpage individually. | ||
691 | * | ||
692 | * Use the standard add function to put page_tail on the list, | ||
693 | * but then correct its position so they all end up in order. | ||
694 | */ | ||
695 | add_page_to_lru_list(zone, page_tail, lru); | ||
696 | list_head = page_tail->lru.prev; | ||
697 | list_move_tail(&page_tail->lru, list_head); | ||
690 | } | 698 | } |
691 | } | 699 | } |
700 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
692 | 701 | ||
693 | static void ____pagevec_lru_add_fn(struct page *page, void *arg) | 702 | static void ____pagevec_lru_add_fn(struct page *page, void *arg) |
694 | { | 703 | { |