diff options
-rw-r--r-- | mm/compaction.c | 36 | ||||
-rw-r--r-- | mm/internal.h | 16 |
2 files changed, 46 insertions, 6 deletions
diff --git a/mm/compaction.c b/mm/compaction.c index b69b7dac0361..b9cf751cc00e 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -313,8 +313,15 @@ static inline bool compact_should_abort(struct compact_control *cc) | |||
313 | static bool suitable_migration_target(struct page *page) | 313 | static bool suitable_migration_target(struct page *page) |
314 | { | 314 | { |
315 | /* If the page is a large free page, then disallow migration */ | 315 | /* If the page is a large free page, then disallow migration */ |
316 | if (PageBuddy(page) && page_order(page) >= pageblock_order) | 316 | if (PageBuddy(page)) { |
317 | return false; | 317 | /* |
318 | * We are checking page_order without zone->lock taken. But | ||
319 | * the only small danger is that we skip a potentially suitable | ||
320 | * pageblock, so it's not worth to check order for valid range. | ||
321 | */ | ||
322 | if (page_order_unsafe(page) >= pageblock_order) | ||
323 | return false; | ||
324 | } | ||
318 | 325 | ||
319 | /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ | 326 | /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ |
320 | if (migrate_async_suitable(get_pageblock_migratetype(page))) | 327 | if (migrate_async_suitable(get_pageblock_migratetype(page))) |
@@ -608,11 +615,23 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, | |||
608 | valid_page = page; | 615 | valid_page = page; |
609 | 616 | ||
610 | /* | 617 | /* |
611 | * Skip if free. page_order cannot be used without zone->lock | 618 | * Skip if free. We read page order here without zone lock |
612 | * as nothing prevents parallel allocations or buddy merging. | 619 | * which is generally unsafe, but the race window is small and |
620 | * the worst thing that can happen is that we skip some | ||
621 | * potential isolation targets. | ||
613 | */ | 622 | */ |
614 | if (PageBuddy(page)) | 623 | if (PageBuddy(page)) { |
624 | unsigned long freepage_order = page_order_unsafe(page); | ||
625 | |||
626 | /* | ||
627 | * Without lock, we cannot be sure that what we got is | ||
628 | * a valid page order. Consider only values in the | ||
629 | * valid order range to prevent low_pfn overflow. | ||
630 | */ | ||
631 | if (freepage_order > 0 && freepage_order < MAX_ORDER) | ||
632 | low_pfn += (1UL << freepage_order) - 1; | ||
615 | continue; | 633 | continue; |
634 | } | ||
616 | 635 | ||
617 | /* | 636 | /* |
618 | * Check may be lockless but that's ok as we recheck later. | 637 | * Check may be lockless but that's ok as we recheck later. |
@@ -698,6 +717,13 @@ isolate_success: | |||
698 | } | 717 | } |
699 | } | 718 | } |
700 | 719 | ||
720 | /* | ||
721 | * The PageBuddy() check could have potentially brought us outside | ||
722 | * the range to be scanned. | ||
723 | */ | ||
724 | if (unlikely(low_pfn > end_pfn)) | ||
725 | low_pfn = end_pfn; | ||
726 | |||
701 | if (locked) | 727 | if (locked) |
702 | spin_unlock_irqrestore(&zone->lru_lock, flags); | 728 | spin_unlock_irqrestore(&zone->lru_lock, flags); |
703 | 729 | ||
diff --git a/mm/internal.h b/mm/internal.h index 4c1d604c396c..86ae964a25b0 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -164,7 +164,8 @@ isolate_migratepages_range(struct compact_control *cc, | |||
164 | * general, page_zone(page)->lock must be held by the caller to prevent the | 164 | * general, page_zone(page)->lock must be held by the caller to prevent the |
165 | * page from being allocated in parallel and returning garbage as the order. | 165 | * page from being allocated in parallel and returning garbage as the order. |
166 | * If a caller does not hold page_zone(page)->lock, it must guarantee that the | 166 | * If a caller does not hold page_zone(page)->lock, it must guarantee that the |
167 | * page cannot be allocated or merged in parallel. | 167 | * page cannot be allocated or merged in parallel. Alternatively, it must |
168 | * handle invalid values gracefully, and use page_order_unsafe() below. | ||
168 | */ | 169 | */ |
169 | static inline unsigned long page_order(struct page *page) | 170 | static inline unsigned long page_order(struct page *page) |
170 | { | 171 | { |
@@ -172,6 +173,19 @@ static inline unsigned long page_order(struct page *page) | |||
172 | return page_private(page); | 173 | return page_private(page); |
173 | } | 174 | } |
174 | 175 | ||
176 | /* | ||
177 | * Like page_order(), but for callers who cannot afford to hold the zone lock. | ||
178 | * PageBuddy() should be checked first by the caller to minimize race window, | ||
179 | * and invalid values must be handled gracefully. | ||
180 | * | ||
181 | * ACCESS_ONCE is used so that if the caller assigns the result into a local | ||
182 | * variable and e.g. tests it for valid range before using, the compiler cannot | ||
183 | * decide to remove the variable and inline the page_private(page) multiple | ||
184 | * times, potentially observing different values in the tests and the actual | ||
185 | * use of the result. | ||
186 | */ | ||
187 | #define page_order_unsafe(page) ACCESS_ONCE(page_private(page)) | ||
188 | |||
175 | static inline bool is_cow_mapping(vm_flags_t flags) | 189 | static inline bool is_cow_mapping(vm_flags_t flags) |
176 | { | 190 | { |
177 | return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; | 191 | return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; |