mm, compaction: skip buddy pages by their order in the migrate scanner

The migration scanner skips PageBuddy pages, but does not consider their order as checking page_order() is generally unsafe without holding the zone->lock, and acquiring the lock just for the check wouldn't be a good tradeoff. Still, this could avoid some iterations over the rest of the buddy page, and if we are careful, the race window between PageBuddy() check and page_order() is small, and the worst thing that can happen is that we skip too much and miss some isolation candidates. This is not that bad, as compaction can already fail for many other reasons like parallel allocations, and those have much larger race window. This patch therefore makes the migration scanner obtain the buddy page order and use it to skip the whole buddy page, if the order appears to be in the valid range. It's important that the page_order() is read only once, so that the value used in the checks and in the pfn calculation is the same. But in theory the compiler can replace the local variable by multiple inlines of page_order(). Therefore, the patch introduces page_order_unsafe() that uses ACCESS_ONCE to prevent this. Testing with stress-highalloc from mmtests shows a 15% reduction in number of pages scanned by migration scanner. The reduction is >60% with __GFP_NO_KSWAPD allocations, along with success rates better by few percent. Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com> Acked-by: Minchan Kim <minchan@kernel.org> Acked-by: Mel Gorman <mgorman@suse.de> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Michal Nazarewicz <mina86@mina86.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Christoph Lameter <cl@linux.com> Cc: Rik van Riel <riel@redhat.com> Acked-by: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Vlastimil Babka <vbabka@suse.cz> 2014-10-09 18:27:23 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-10-09 22:25:54 -0400
commit: 99c0fd5e51c447917264154cb01a967804ace745 (patch)
tree: b733abc6c90b4689a68e189095bb6217d0ff8933 /mm/compaction.c
parent: e14c720efdd73c6d69cd8d07fa894bcd11fe1973 (diff)
1 files changed, 31 insertions, 5 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index b69b7dac0361..b9cf751cc00e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -313,8 +313,15 @@ static inline bool compact_should_abort(struct compact_control *cc)
 static bool suitable_migration_target(struct page *page)
 {
        /* If the page is a large free page, then disallow migration */
-        if (PageBuddy(page) && page_order(page) >= pageblock_order)
+        if (PageBuddy(page)) {
-                return false;
+                /*
+                 * We are checking page_order without zone->lock taken. But
+                 * the only small danger is that we skip a potentially suitable
+                 * pageblock, so it's not worth to check order for valid range.
+                 */
+                if (page_order_unsafe(page) >= pageblock_order)
+                        return false;
+        }
        /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
        if (migrate_async_suitable(get_pageblock_migratetype(page)))
@@ -608,11 +615,23 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                        valid_page = page;
                /*
-                 * Skip if free. page_order cannot be used without zone->lock
+                 * Skip if free. We read page order here without zone lock
-                 * as nothing prevents parallel allocations or buddy merging.
+                 * which is generally unsafe, but the race window is small and
+                 * the worst thing that can happen is that we skip some
+                 * potential isolation targets.
                 */
-                if (PageBuddy(page))
+                if (PageBuddy(page)) {
+                        unsigned long freepage_order = page_order_unsafe(page);
+                        /*
+                         * Without lock, we cannot be sure that what we got is
+                         * a valid page order. Consider only values in the
+                         * valid order range to prevent low_pfn overflow.
+                         */
+                        if (freepage_order > 0 && freepage_order < MAX_ORDER)
+                                low_pfn += (1UL << freepage_order) - 1;
                        continue;
+                }
                /*
                 * Check may be lockless but that's ok as we recheck later.
@@ -698,6 +717,13 @@ isolate_success:
                }
        }
+        /*
+         * The PageBuddy() check could have potentially brought us outside
+         * the range to be scanned.
+         */
+        if (unlikely(low_pfn > end_pfn))
+                low_pfn = end_pfn;
        if (locked)
                spin_unlock_irqrestore(&zone->lru_lock, flags);
author	Vlastimil Babka <vbabka@suse.cz>	2014-10-09 18:27:23 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-10-09 22:25:54 -0400
commit	99c0fd5e51c447917264154cb01a967804ace745 (patch)
tree	b733abc6c90b4689a68e189095bb6217d0ff8933 /mm/compaction.c
parent	e14c720efdd73c6d69cd8d07fa894bcd11fe1973 (diff)

diff --git a/mm/compaction.c b/mm/compaction.c index b69b7dac0361..b9cf751cc00e 100644 --- a/mm/compaction.c +++ b/mm/compaction.c
@@ -313,8 +313,15 @@ static inline bool compact_should_abort(struct compact_control *cc)
313	static bool suitable_migration_target(struct page *page)	313	static bool suitable_migration_target(struct page *page)
314	{	314	{
315	/* If the page is a large free page, then disallow migration */	315	/* If the page is a large free page, then disallow migration */
316	if (PageBuddy(page) && page_order(page) >= pageblock_order)	316	if (PageBuddy(page)) {
317	return false;	317	/*
		318	* We are checking page_order without zone->lock taken. But
		319	* the only small danger is that we skip a potentially suitable
		320	* pageblock, so it's not worth to check order for valid range.
		321	*/
		322	if (page_order_unsafe(page) >= pageblock_order)
		323	return false;
		324	}
318		325
319	/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */	326	/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
320	if (migrate_async_suitable(get_pageblock_migratetype(page)))	327	if (migrate_async_suitable(get_pageblock_migratetype(page)))
@@ -608,11 +615,23 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
608	valid_page = page;	615	valid_page = page;
609		616
610	/*	617	/*
611	* Skip if free. page_order cannot be used without zone->lock	618	* Skip if free. We read page order here without zone lock
612	* as nothing prevents parallel allocations or buddy merging.	619	* which is generally unsafe, but the race window is small and
		620	* the worst thing that can happen is that we skip some
		621	* potential isolation targets.
613	*/	622	*/
614	if (PageBuddy(page))	623	if (PageBuddy(page)) {
		624	unsigned long freepage_order = page_order_unsafe(page);
		625
		626	/*
		627	* Without lock, we cannot be sure that what we got is
		628	* a valid page order. Consider only values in the
		629	* valid order range to prevent low_pfn overflow.
		630	*/
		631	if (freepage_order > 0 && freepage_order < MAX_ORDER)
		632	low_pfn += (1UL << freepage_order) - 1;
615	continue;	633	continue;
		634	}
616		635
617	/*	636	/*
618	* Check may be lockless but that's ok as we recheck later.	637	* Check may be lockless but that's ok as we recheck later.
@@ -698,6 +717,13 @@ isolate_success:
698	}	717	}
699	}	718	}
700		719
		720	/*
		721	* The PageBuddy() check could have potentially brought us outside
		722	* the range to be scanned.
		723	*/
		724	if (unlikely(low_pfn > end_pfn))
		725	low_pfn = end_pfn;
		726
701	if (locked)	727	if (locked)
702	spin_unlock_irqrestore(&zone->lru_lock, flags);	728	spin_unlock_irqrestore(&zone->lru_lock, flags);
703		729