aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2016-03-25 17:21:50 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-25 19:37:42 -0400
commitd9dddbf556674bf125ecd925b24e43a5cf2a568a (patch)
treec56f3631764b4629abca28ac90874281c960e3c2
parentf419a08fb329e235df0cb8e329cff770e02d171a (diff)
mm/page_alloc: prevent merging between isolated and other pageblocks
Hanjun Guo has reported that a CMA stress test causes broken accounting of CMA and free pages: > Before the test, I got: > -bash-4.3# cat /proc/meminfo | grep Cma > CmaTotal: 204800 kB > CmaFree: 195044 kB > > > After running the test: > -bash-4.3# cat /proc/meminfo | grep Cma > CmaTotal: 204800 kB > CmaFree: 6602584 kB > > So the freed CMA memory is more than total.. > > Also the the MemFree is more than mem total: > > -bash-4.3# cat /proc/meminfo > MemTotal: 16342016 kB > MemFree: 22367268 kB > MemAvailable: 22370528 kB Laura Abbott has confirmed the issue and suspected the freepage accounting rewrite around 3.18/4.0 by Joonsoo Kim. Joonsoo had a theory that this is caused by unexpected merging between MIGRATE_ISOLATE and MIGRATE_CMA pageblocks: > CMA isolates MAX_ORDER aligned blocks, but, during the process, > partialy isolated block exists. If MAX_ORDER is 11 and > pageblock_order is 9, two pageblocks make up MAX_ORDER > aligned block and I can think following scenario because pageblock > (un)isolation would be done one by one. > > (each character means one pageblock. 'C', 'I' means MIGRATE_CMA, > MIGRATE_ISOLATE, respectively. > > CC -> IC -> II (Isolation) > II -> CI -> CC (Un-isolation) > > If some pages are freed at this intermediate state such as IC or CI, > that page could be merged to the other page that is resident on > different type of pageblock and it will cause wrong freepage count. This was supposed to be prevented by CMA operating on MAX_ORDER blocks, but since it doesn't hold the zone->lock between pageblocks, a race window does exist. It's also likely that unexpected merging can occur between MIGRATE_ISOLATE and non-CMA pageblocks. This should be prevented in __free_one_page() since commit 3c605096d315 ("mm/page_alloc: restrict max order of merging on isolated pageblock"). However, we only check the migratetype of the pageblock where buddy merging has been initiated, not the migratetype of the buddy pageblock (or group of pageblocks) which can be MIGRATE_ISOLATE. Joonsoo has suggested checking for buddy migratetype as part of page_is_buddy(), but that would add extra checks in allocator hotpath and bloat-o-meter has shown significant code bloat (the function is inline). This patch reduces the bloat at some expense of more complicated code. The buddy-merging while-loop in __free_one_page() is initially bounded to pageblock_border and without any migratetype checks. The checks are placed outside, bumping the max_order if merging is allowed, and returning to the while-loop with a statement which can't be possibly considered harmful. This fixes the accounting bug and also removes the arguably weird state in the original commit 3c605096d315 where buddies could be left unmerged. Fixes: 3c605096d315 ("mm/page_alloc: restrict max order of merging on isolated pageblock") Link: https://lkml.org/lkml/2016/3/2/280 Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Reported-by: Hanjun Guo <guohanjun@huawei.com> Tested-by: Hanjun Guo <guohanjun@huawei.com> Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com> Debugged-by: Laura Abbott <labbott@redhat.com> Debugged-by: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: "Kirill A. Shutemov" <kirill@shutemov.name> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com> Cc: Michal Nazarewicz <mina86@mina86.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> Cc: <stable@vger.kernel.org> [3.18+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/page_alloc.c46
1 files changed, 33 insertions, 13 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a762be57e46e..59de90d5d3a3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -692,34 +692,28 @@ static inline void __free_one_page(struct page *page,
692 unsigned long combined_idx; 692 unsigned long combined_idx;
693 unsigned long uninitialized_var(buddy_idx); 693 unsigned long uninitialized_var(buddy_idx);
694 struct page *buddy; 694 struct page *buddy;
695 unsigned int max_order = MAX_ORDER; 695 unsigned int max_order;
696
697 max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
696 698
697 VM_BUG_ON(!zone_is_initialized(zone)); 699 VM_BUG_ON(!zone_is_initialized(zone));
698 VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page); 700 VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
699 701
700 VM_BUG_ON(migratetype == -1); 702 VM_BUG_ON(migratetype == -1);
701 if (is_migrate_isolate(migratetype)) { 703 if (likely(!is_migrate_isolate(migratetype)))
702 /*
703 * We restrict max order of merging to prevent merge
704 * between freepages on isolate pageblock and normal
705 * pageblock. Without this, pageblock isolation
706 * could cause incorrect freepage accounting.
707 */
708 max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
709 } else {
710 __mod_zone_freepage_state(zone, 1 << order, migratetype); 704 __mod_zone_freepage_state(zone, 1 << order, migratetype);
711 }
712 705
713 page_idx = pfn & ((1 << max_order) - 1); 706 page_idx = pfn & ((1 << MAX_ORDER) - 1);
714 707
715 VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); 708 VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
716 VM_BUG_ON_PAGE(bad_range(zone, page), page); 709 VM_BUG_ON_PAGE(bad_range(zone, page), page);
717 710
711continue_merging:
718 while (order < max_order - 1) { 712 while (order < max_order - 1) {
719 buddy_idx = __find_buddy_index(page_idx, order); 713 buddy_idx = __find_buddy_index(page_idx, order);
720 buddy = page + (buddy_idx - page_idx); 714 buddy = page + (buddy_idx - page_idx);
721 if (!page_is_buddy(page, buddy, order)) 715 if (!page_is_buddy(page, buddy, order))
722 break; 716 goto done_merging;
723 /* 717 /*
724 * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page, 718 * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
725 * merge with it and move up one order. 719 * merge with it and move up one order.
@@ -736,6 +730,32 @@ static inline void __free_one_page(struct page *page,
736 page_idx = combined_idx; 730 page_idx = combined_idx;
737 order++; 731 order++;
738 } 732 }
733 if (max_order < MAX_ORDER) {
734 /* If we are here, it means order is >= pageblock_order.
735 * We want to prevent merge between freepages on isolate
736 * pageblock and normal pageblock. Without this, pageblock
737 * isolation could cause incorrect freepage or CMA accounting.
738 *
739 * We don't want to hit this code for the more frequent
740 * low-order merging.
741 */
742 if (unlikely(has_isolate_pageblock(zone))) {
743 int buddy_mt;
744
745 buddy_idx = __find_buddy_index(page_idx, order);
746 buddy = page + (buddy_idx - page_idx);
747 buddy_mt = get_pageblock_migratetype(buddy);
748
749 if (migratetype != buddy_mt
750 && (is_migrate_isolate(migratetype) ||
751 is_migrate_isolate(buddy_mt)))
752 goto done_merging;
753 }
754 max_order++;
755 goto continue_merging;
756 }
757
758done_merging:
739 set_page_order(page, order); 759 set_page_order(page, order);
740 760
741 /* 761 /*