aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2014-10-09 18:27:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-09 22:25:54 -0400
commit99c0fd5e51c447917264154cb01a967804ace745 (patch)
treeb733abc6c90b4689a68e189095bb6217d0ff8933
parente14c720efdd73c6d69cd8d07fa894bcd11fe1973 (diff)
mm, compaction: skip buddy pages by their order in the migrate scanner
The migration scanner skips PageBuddy pages, but does not consider their order as checking page_order() is generally unsafe without holding the zone->lock, and acquiring the lock just for the check wouldn't be a good tradeoff. Still, this could avoid some iterations over the rest of the buddy page, and if we are careful, the race window between PageBuddy() check and page_order() is small, and the worst thing that can happen is that we skip too much and miss some isolation candidates. This is not that bad, as compaction can already fail for many other reasons like parallel allocations, and those have much larger race window. This patch therefore makes the migration scanner obtain the buddy page order and use it to skip the whole buddy page, if the order appears to be in the valid range. It's important that the page_order() is read only once, so that the value used in the checks and in the pfn calculation is the same. But in theory the compiler can replace the local variable by multiple inlines of page_order(). Therefore, the patch introduces page_order_unsafe() that uses ACCESS_ONCE to prevent this. Testing with stress-highalloc from mmtests shows a 15% reduction in number of pages scanned by migration scanner. The reduction is >60% with __GFP_NO_KSWAPD allocations, along with success rates better by few percent. Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com> Acked-by: Minchan Kim <minchan@kernel.org> Acked-by: Mel Gorman <mgorman@suse.de> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Michal Nazarewicz <mina86@mina86.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Christoph Lameter <cl@linux.com> Cc: Rik van Riel <riel@redhat.com> Acked-by: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/compaction.c36
-rw-r--r--mm/internal.h16
2 files changed, 46 insertions, 6 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index b69b7dac0361..b9cf751cc00e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -313,8 +313,15 @@ static inline bool compact_should_abort(struct compact_control *cc)
313static bool suitable_migration_target(struct page *page) 313static bool suitable_migration_target(struct page *page)
314{ 314{
315 /* If the page is a large free page, then disallow migration */ 315 /* If the page is a large free page, then disallow migration */
316 if (PageBuddy(page) && page_order(page) >= pageblock_order) 316 if (PageBuddy(page)) {
317 return false; 317 /*
318 * We are checking page_order without zone->lock taken. But
319 * the only small danger is that we skip a potentially suitable
320 * pageblock, so it's not worth to check order for valid range.
321 */
322 if (page_order_unsafe(page) >= pageblock_order)
323 return false;
324 }
318 325
319 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ 326 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
320 if (migrate_async_suitable(get_pageblock_migratetype(page))) 327 if (migrate_async_suitable(get_pageblock_migratetype(page)))
@@ -608,11 +615,23 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
608 valid_page = page; 615 valid_page = page;
609 616
610 /* 617 /*
611 * Skip if free. page_order cannot be used without zone->lock 618 * Skip if free. We read page order here without zone lock
612 * as nothing prevents parallel allocations or buddy merging. 619 * which is generally unsafe, but the race window is small and
620 * the worst thing that can happen is that we skip some
621 * potential isolation targets.
613 */ 622 */
614 if (PageBuddy(page)) 623 if (PageBuddy(page)) {
624 unsigned long freepage_order = page_order_unsafe(page);
625
626 /*
627 * Without lock, we cannot be sure that what we got is
628 * a valid page order. Consider only values in the
629 * valid order range to prevent low_pfn overflow.
630 */
631 if (freepage_order > 0 && freepage_order < MAX_ORDER)
632 low_pfn += (1UL << freepage_order) - 1;
615 continue; 633 continue;
634 }
616 635
617 /* 636 /*
618 * Check may be lockless but that's ok as we recheck later. 637 * Check may be lockless but that's ok as we recheck later.
@@ -698,6 +717,13 @@ isolate_success:
698 } 717 }
699 } 718 }
700 719
720 /*
721 * The PageBuddy() check could have potentially brought us outside
722 * the range to be scanned.
723 */
724 if (unlikely(low_pfn > end_pfn))
725 low_pfn = end_pfn;
726
701 if (locked) 727 if (locked)
702 spin_unlock_irqrestore(&zone->lru_lock, flags); 728 spin_unlock_irqrestore(&zone->lru_lock, flags);
703 729
diff --git a/mm/internal.h b/mm/internal.h
index 4c1d604c396c..86ae964a25b0 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -164,7 +164,8 @@ isolate_migratepages_range(struct compact_control *cc,
164 * general, page_zone(page)->lock must be held by the caller to prevent the 164 * general, page_zone(page)->lock must be held by the caller to prevent the
165 * page from being allocated in parallel and returning garbage as the order. 165 * page from being allocated in parallel and returning garbage as the order.
166 * If a caller does not hold page_zone(page)->lock, it must guarantee that the 166 * If a caller does not hold page_zone(page)->lock, it must guarantee that the
167 * page cannot be allocated or merged in parallel. 167 * page cannot be allocated or merged in parallel. Alternatively, it must
168 * handle invalid values gracefully, and use page_order_unsafe() below.
168 */ 169 */
169static inline unsigned long page_order(struct page *page) 170static inline unsigned long page_order(struct page *page)
170{ 171{
@@ -172,6 +173,19 @@ static inline unsigned long page_order(struct page *page)
172 return page_private(page); 173 return page_private(page);
173} 174}
174 175
176/*
177 * Like page_order(), but for callers who cannot afford to hold the zone lock.
178 * PageBuddy() should be checked first by the caller to minimize race window,
179 * and invalid values must be handled gracefully.
180 *
181 * ACCESS_ONCE is used so that if the caller assigns the result into a local
182 * variable and e.g. tests it for valid range before using, the compiler cannot
183 * decide to remove the variable and inline the page_private(page) multiple
184 * times, potentially observing different values in the tests and the actual
185 * use of the result.
186 */
187#define page_order_unsafe(page) ACCESS_ONCE(page_private(page))
188
175static inline bool is_cow_mapping(vm_flags_t flags) 189static inline bool is_cow_mapping(vm_flags_t flags)
176{ 190{
177 return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; 191 return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;