aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2012-01-12 20:19:52 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-12 23:13:09 -0500
commit12d27107867fc7216e8faaff0b894b0f162dcf75 (patch)
tree8f35d39f7e5d0b0f0ba10a3475e9fa4a2581e509
parent0cee34fd72c582b4f8ad8ce00645b75fb4168199 (diff)
memcg: fix split_huge_page_refcounts()
This patch started off as a cleanup: __split_huge_page_refcounts() has to cope with two scenarios, when the hugepage being split is already on LRU, and when it is not; but why does it have to split that accounting across three different sites? Consolidate it in lru_add_page_tail(), handling evictable and unevictable alike, and use standard add_page_to_lru_list() when accounting is needed (when the head is not yet on LRU). But a recent regression in -next, I guess the removal of PageCgroupAcctLRU test from mem_cgroup_split_huge_fixup(), makes this now a necessary fix: under load, the MEM_CGROUP_ZSTAT count was wrapping to a huge number, messing up reclaim calculations and causing a freeze at rmdir of cgroup. Add a VM_BUG_ON to mem_cgroup_lru_del_list() when we're about to wrap that count - this has not been the only such incident. Document that lru_add_page_tail() is for Transparent HugePages by #ifdef around it. Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/huge_memory.c10
-rw-r--r--mm/memcontrol.c12
-rw-r--r--mm/swap.c29
3 files changed, 21 insertions, 30 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 76cc3f7dd4f0..b3ffc21ce801 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1229,7 +1229,6 @@ static void __split_huge_page_refcount(struct page *page)
1229{ 1229{
1230 int i; 1230 int i;
1231 struct zone *zone = page_zone(page); 1231 struct zone *zone = page_zone(page);
1232 int zonestat;
1233 int tail_count = 0; 1232 int tail_count = 0;
1234 1233
1235 /* prevent PageLRU to go away from under us, and freeze lru stats */ 1234 /* prevent PageLRU to go away from under us, and freeze lru stats */
@@ -1317,15 +1316,6 @@ static void __split_huge_page_refcount(struct page *page)
1317 __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); 1316 __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
1318 __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); 1317 __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
1319 1318
1320 /*
1321 * A hugepage counts for HPAGE_PMD_NR pages on the LRU statistics,
1322 * so adjust those appropriately if this page is on the LRU.
1323 */
1324 if (PageLRU(page)) {
1325 zonestat = NR_LRU_BASE + page_lru(page);
1326 __mod_zone_page_state(zone, zonestat, -(HPAGE_PMD_NR-1));
1327 }
1328
1329 ClearPageCompound(page); 1319 ClearPageCompound(page);
1330 compound_unlock(page); 1320 compound_unlock(page);
1331 spin_unlock_irq(&zone->lru_lock); 1321 spin_unlock_irq(&zone->lru_lock);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 609c49f492e6..9f2f64697409 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1071,6 +1071,7 @@ void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
1071 VM_BUG_ON(!memcg); 1071 VM_BUG_ON(!memcg);
1072 mz = page_cgroup_zoneinfo(memcg, page); 1072 mz = page_cgroup_zoneinfo(memcg, page);
1073 /* huge page split is done under lru_lock. so, we have no races. */ 1073 /* huge page split is done under lru_lock. so, we have no races. */
1074 VM_BUG_ON(MEM_CGROUP_ZSTAT(mz, lru) < (1 << compound_order(page)));
1074 MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page); 1075 MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
1075} 1076}
1076 1077
@@ -2465,9 +2466,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2465void mem_cgroup_split_huge_fixup(struct page *head) 2466void mem_cgroup_split_huge_fixup(struct page *head)
2466{ 2467{
2467 struct page_cgroup *head_pc = lookup_page_cgroup(head); 2468 struct page_cgroup *head_pc = lookup_page_cgroup(head);
2468 struct mem_cgroup_per_zone *mz;
2469 struct page_cgroup *pc; 2469 struct page_cgroup *pc;
2470 enum lru_list lru;
2471 int i; 2470 int i;
2472 2471
2473 if (mem_cgroup_disabled()) 2472 if (mem_cgroup_disabled())
@@ -2478,15 +2477,8 @@ void mem_cgroup_split_huge_fixup(struct page *head)
2478 smp_wmb();/* see __commit_charge() */ 2477 smp_wmb();/* see __commit_charge() */
2479 pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; 2478 pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
2480 } 2479 }
2481 /*
2482 * Tail pages will be added to LRU.
2483 * We hold lru_lock,then,reduce counter directly.
2484 */
2485 lru = page_lru(head);
2486 mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
2487 MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1;
2488} 2480}
2489#endif 2481#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
2490 2482
2491/** 2483/**
2492 * mem_cgroup_move_account - move account of the page 2484 * mem_cgroup_move_account - move account of the page
diff --git a/mm/swap.c b/mm/swap.c
index ddccf8e0b4ae..db6defaf2e55 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -650,6 +650,7 @@ void __pagevec_release(struct pagevec *pvec)
650 650
651EXPORT_SYMBOL(__pagevec_release); 651EXPORT_SYMBOL(__pagevec_release);
652 652
653#ifdef CONFIG_TRANSPARENT_HUGEPAGE
653/* used by __split_huge_page_refcount() */ 654/* used by __split_huge_page_refcount() */
654void lru_add_page_tail(struct zone* zone, 655void lru_add_page_tail(struct zone* zone,
655 struct page *page, struct page *page_tail) 656 struct page *page, struct page *page_tail)
@@ -666,8 +667,6 @@ void lru_add_page_tail(struct zone* zone,
666 SetPageLRU(page_tail); 667 SetPageLRU(page_tail);
667 668
668 if (page_evictable(page_tail, NULL)) { 669 if (page_evictable(page_tail, NULL)) {
669 struct lruvec *lruvec;
670
671 if (PageActive(page)) { 670 if (PageActive(page)) {
672 SetPageActive(page_tail); 671 SetPageActive(page_tail);
673 active = 1; 672 active = 1;
@@ -677,18 +676,28 @@ void lru_add_page_tail(struct zone* zone,
677 lru = LRU_INACTIVE_ANON; 676 lru = LRU_INACTIVE_ANON;
678 } 677 }
679 update_page_reclaim_stat(zone, page_tail, file, active); 678 update_page_reclaim_stat(zone, page_tail, file, active);
680 lruvec = mem_cgroup_lru_add_list(zone, page_tail, lru);
681 if (likely(PageLRU(page)))
682 list_add(&page_tail->lru, page->lru.prev);
683 else
684 list_add(&page_tail->lru, lruvec->lists[lru].prev);
685 __mod_zone_page_state(zone, NR_LRU_BASE + lru,
686 hpage_nr_pages(page_tail));
687 } else { 679 } else {
688 SetPageUnevictable(page_tail); 680 SetPageUnevictable(page_tail);
689 add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE); 681 lru = LRU_UNEVICTABLE;
682 }
683
684 if (likely(PageLRU(page)))
685 list_add_tail(&page_tail->lru, &page->lru);
686 else {
687 struct list_head *list_head;
688 /*
689 * Head page has not yet been counted, as an hpage,
690 * so we must account for each subpage individually.
691 *
692 * Use the standard add function to put page_tail on the list,
693 * but then correct its position so they all end up in order.
694 */
695 add_page_to_lru_list(zone, page_tail, lru);
696 list_head = page_tail->lru.prev;
697 list_move_tail(&page_tail->lru, list_head);
690 } 698 }
691} 699}
700#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
692 701
693static void ____pagevec_lru_add_fn(struct page *page, void *arg) 702static void ____pagevec_lru_add_fn(struct page *page, void *arg)
694{ 703{