aboutsummaryrefslogtreecommitdiffstats
path: root/mm/compaction.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/compaction.c')
-rw-r--r--mm/compaction.c562
1 files changed, 386 insertions, 176 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 7fcd3a52e68d..2c4ce17651d8 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -50,6 +50,111 @@ static inline bool migrate_async_suitable(int migratetype)
50 return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; 50 return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
51} 51}
52 52
53#ifdef CONFIG_COMPACTION
54/* Returns true if the pageblock should be scanned for pages to isolate. */
55static inline bool isolation_suitable(struct compact_control *cc,
56 struct page *page)
57{
58 if (cc->ignore_skip_hint)
59 return true;
60
61 return !get_pageblock_skip(page);
62}
63
64/*
65 * This function is called to clear all cached information on pageblocks that
66 * should be skipped for page isolation when the migrate and free page scanner
67 * meet.
68 */
69static void __reset_isolation_suitable(struct zone *zone)
70{
71 unsigned long start_pfn = zone->zone_start_pfn;
72 unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages;
73 unsigned long pfn;
74
75 zone->compact_cached_migrate_pfn = start_pfn;
76 zone->compact_cached_free_pfn = end_pfn;
77 zone->compact_blockskip_flush = false;
78
79 /* Walk the zone and mark every pageblock as suitable for isolation */
80 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
81 struct page *page;
82
83 cond_resched();
84
85 if (!pfn_valid(pfn))
86 continue;
87
88 page = pfn_to_page(pfn);
89 if (zone != page_zone(page))
90 continue;
91
92 clear_pageblock_skip(page);
93 }
94}
95
96void reset_isolation_suitable(pg_data_t *pgdat)
97{
98 int zoneid;
99
100 for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
101 struct zone *zone = &pgdat->node_zones[zoneid];
102 if (!populated_zone(zone))
103 continue;
104
105 /* Only flush if a full compaction finished recently */
106 if (zone->compact_blockskip_flush)
107 __reset_isolation_suitable(zone);
108 }
109}
110
111/*
112 * If no pages were isolated then mark this pageblock to be skipped in the
113 * future. The information is later cleared by __reset_isolation_suitable().
114 */
115static void update_pageblock_skip(struct compact_control *cc,
116 struct page *page, unsigned long nr_isolated,
117 bool migrate_scanner)
118{
119 struct zone *zone = cc->zone;
120 if (!page)
121 return;
122
123 if (!nr_isolated) {
124 unsigned long pfn = page_to_pfn(page);
125 set_pageblock_skip(page);
126
127 /* Update where compaction should restart */
128 if (migrate_scanner) {
129 if (!cc->finished_update_migrate &&
130 pfn > zone->compact_cached_migrate_pfn)
131 zone->compact_cached_migrate_pfn = pfn;
132 } else {
133 if (!cc->finished_update_free &&
134 pfn < zone->compact_cached_free_pfn)
135 zone->compact_cached_free_pfn = pfn;
136 }
137 }
138}
139#else
140static inline bool isolation_suitable(struct compact_control *cc,
141 struct page *page)
142{
143 return true;
144}
145
146static void update_pageblock_skip(struct compact_control *cc,
147 struct page *page, unsigned long nr_isolated,
148 bool migrate_scanner)
149{
150}
151#endif /* CONFIG_COMPACTION */
152
153static inline bool should_release_lock(spinlock_t *lock)
154{
155 return need_resched() || spin_is_contended(lock);
156}
157
53/* 158/*
54 * Compaction requires the taking of some coarse locks that are potentially 159 * Compaction requires the taking of some coarse locks that are potentially
55 * very heavily contended. Check if the process needs to be scheduled or 160 * very heavily contended. Check if the process needs to be scheduled or
@@ -62,7 +167,7 @@ static inline bool migrate_async_suitable(int migratetype)
62static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags, 167static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
63 bool locked, struct compact_control *cc) 168 bool locked, struct compact_control *cc)
64{ 169{
65 if (need_resched() || spin_is_contended(lock)) { 170 if (should_release_lock(lock)) {
66 if (locked) { 171 if (locked) {
67 spin_unlock_irqrestore(lock, *flags); 172 spin_unlock_irqrestore(lock, *flags);
68 locked = false; 173 locked = false;
@@ -70,14 +175,11 @@ static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
70 175
71 /* async aborts if taking too long or contended */ 176 /* async aborts if taking too long or contended */
72 if (!cc->sync) { 177 if (!cc->sync) {
73 if (cc->contended) 178 cc->contended = true;
74 *cc->contended = true;
75 return false; 179 return false;
76 } 180 }
77 181
78 cond_resched(); 182 cond_resched();
79 if (fatal_signal_pending(current))
80 return false;
81 } 183 }
82 184
83 if (!locked) 185 if (!locked)
@@ -91,44 +193,139 @@ static inline bool compact_trylock_irqsave(spinlock_t *lock,
91 return compact_checklock_irqsave(lock, flags, false, cc); 193 return compact_checklock_irqsave(lock, flags, false, cc);
92} 194}
93 195
196/* Returns true if the page is within a block suitable for migration to */
197static bool suitable_migration_target(struct page *page)
198{
199 int migratetype = get_pageblock_migratetype(page);
200
201 /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
202 if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE)
203 return false;
204
205 /* If the page is a large free page, then allow migration */
206 if (PageBuddy(page) && page_order(page) >= pageblock_order)
207 return true;
208
209 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
210 if (migrate_async_suitable(migratetype))
211 return true;
212
213 /* Otherwise skip the block */
214 return false;
215}
216
217static void compact_capture_page(struct compact_control *cc)
218{
219 unsigned long flags;
220 int mtype, mtype_low, mtype_high;
221
222 if (!cc->page || *cc->page)
223 return;
224
225 /*
226 * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP
227 * regardless of the migratetype of the freelist is is captured from.
228 * This is fine because the order for a high-order MIGRATE_MOVABLE
229 * allocation is typically at least a pageblock size and overall
230 * fragmentation is not impaired. Other allocation types must
231 * capture pages from their own migratelist because otherwise they
232 * could pollute other pageblocks like MIGRATE_MOVABLE with
233 * difficult to move pages and making fragmentation worse overall.
234 */
235 if (cc->migratetype == MIGRATE_MOVABLE) {
236 mtype_low = 0;
237 mtype_high = MIGRATE_PCPTYPES;
238 } else {
239 mtype_low = cc->migratetype;
240 mtype_high = cc->migratetype + 1;
241 }
242
243 /* Speculatively examine the free lists without zone lock */
244 for (mtype = mtype_low; mtype < mtype_high; mtype++) {
245 int order;
246 for (order = cc->order; order < MAX_ORDER; order++) {
247 struct page *page;
248 struct free_area *area;
249 area = &(cc->zone->free_area[order]);
250 if (list_empty(&area->free_list[mtype]))
251 continue;
252
253 /* Take the lock and attempt capture of the page */
254 if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
255 return;
256 if (!list_empty(&area->free_list[mtype])) {
257 page = list_entry(area->free_list[mtype].next,
258 struct page, lru);
259 if (capture_free_page(page, cc->order, mtype)) {
260 spin_unlock_irqrestore(&cc->zone->lock,
261 flags);
262 *cc->page = page;
263 return;
264 }
265 }
266 spin_unlock_irqrestore(&cc->zone->lock, flags);
267 }
268 }
269}
270
94/* 271/*
95 * Isolate free pages onto a private freelist. Caller must hold zone->lock. 272 * Isolate free pages onto a private freelist. Caller must hold zone->lock.
96 * If @strict is true, will abort returning 0 on any invalid PFNs or non-free 273 * If @strict is true, will abort returning 0 on any invalid PFNs or non-free
97 * pages inside of the pageblock (even though it may still end up isolating 274 * pages inside of the pageblock (even though it may still end up isolating
98 * some pages). 275 * some pages).
99 */ 276 */
100static unsigned long isolate_freepages_block(unsigned long blockpfn, 277static unsigned long isolate_freepages_block(struct compact_control *cc,
278 unsigned long blockpfn,
101 unsigned long end_pfn, 279 unsigned long end_pfn,
102 struct list_head *freelist, 280 struct list_head *freelist,
103 bool strict) 281 bool strict)
104{ 282{
105 int nr_scanned = 0, total_isolated = 0; 283 int nr_scanned = 0, total_isolated = 0;
106 struct page *cursor; 284 struct page *cursor, *valid_page = NULL;
285 unsigned long nr_strict_required = end_pfn - blockpfn;
286 unsigned long flags;
287 bool locked = false;
107 288
108 cursor = pfn_to_page(blockpfn); 289 cursor = pfn_to_page(blockpfn);
109 290
110 /* Isolate free pages. This assumes the block is valid */ 291 /* Isolate free pages. */
111 for (; blockpfn < end_pfn; blockpfn++, cursor++) { 292 for (; blockpfn < end_pfn; blockpfn++, cursor++) {
112 int isolated, i; 293 int isolated, i;
113 struct page *page = cursor; 294 struct page *page = cursor;
114 295
115 if (!pfn_valid_within(blockpfn)) {
116 if (strict)
117 return 0;
118 continue;
119 }
120 nr_scanned++; 296 nr_scanned++;
297 if (!pfn_valid_within(blockpfn))
298 continue;
299 if (!valid_page)
300 valid_page = page;
301 if (!PageBuddy(page))
302 continue;
121 303
122 if (!PageBuddy(page)) { 304 /*
123 if (strict) 305 * The zone lock must be held to isolate freepages.
124 return 0; 306 * Unfortunately this is a very coarse lock and can be
307 * heavily contended if there are parallel allocations
308 * or parallel compactions. For async compaction do not
309 * spin on the lock and we acquire the lock as late as
310 * possible.
311 */
312 locked = compact_checklock_irqsave(&cc->zone->lock, &flags,
313 locked, cc);
314 if (!locked)
315 break;
316
317 /* Recheck this is a suitable migration target under lock */
318 if (!strict && !suitable_migration_target(page))
319 break;
320
321 /* Recheck this is a buddy page under lock */
322 if (!PageBuddy(page))
125 continue; 323 continue;
126 }
127 324
128 /* Found a free page, break it into order-0 pages */ 325 /* Found a free page, break it into order-0 pages */
129 isolated = split_free_page(page); 326 isolated = split_free_page(page);
130 if (!isolated && strict) 327 if (!isolated && strict)
131 return 0; 328 break;
132 total_isolated += isolated; 329 total_isolated += isolated;
133 for (i = 0; i < isolated; i++) { 330 for (i = 0; i < isolated; i++) {
134 list_add(&page->lru, freelist); 331 list_add(&page->lru, freelist);
@@ -143,6 +340,22 @@ static unsigned long isolate_freepages_block(unsigned long blockpfn,
143 } 340 }
144 341
145 trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated); 342 trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated);
343
344 /*
345 * If strict isolation is requested by CMA then check that all the
346 * pages requested were isolated. If there were any failures, 0 is
347 * returned and CMA will fail.
348 */
349 if (strict && nr_strict_required != total_isolated)
350 total_isolated = 0;
351
352 if (locked)
353 spin_unlock_irqrestore(&cc->zone->lock, flags);
354
355 /* Update the pageblock-skip if the whole pageblock was scanned */
356 if (blockpfn == end_pfn)
357 update_pageblock_skip(cc, valid_page, total_isolated, false);
358
146 return total_isolated; 359 return total_isolated;
147} 360}
148 361
@@ -160,17 +373,14 @@ static unsigned long isolate_freepages_block(unsigned long blockpfn,
160 * a free page). 373 * a free page).
161 */ 374 */
162unsigned long 375unsigned long
163isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn) 376isolate_freepages_range(struct compact_control *cc,
377 unsigned long start_pfn, unsigned long end_pfn)
164{ 378{
165 unsigned long isolated, pfn, block_end_pfn, flags; 379 unsigned long isolated, pfn, block_end_pfn;
166 struct zone *zone = NULL;
167 LIST_HEAD(freelist); 380 LIST_HEAD(freelist);
168 381
169 if (pfn_valid(start_pfn))
170 zone = page_zone(pfn_to_page(start_pfn));
171
172 for (pfn = start_pfn; pfn < end_pfn; pfn += isolated) { 382 for (pfn = start_pfn; pfn < end_pfn; pfn += isolated) {
173 if (!pfn_valid(pfn) || zone != page_zone(pfn_to_page(pfn))) 383 if (!pfn_valid(pfn) || cc->zone != page_zone(pfn_to_page(pfn)))
174 break; 384 break;
175 385
176 /* 386 /*
@@ -180,10 +390,8 @@ isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn)
180 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); 390 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
181 block_end_pfn = min(block_end_pfn, end_pfn); 391 block_end_pfn = min(block_end_pfn, end_pfn);
182 392
183 spin_lock_irqsave(&zone->lock, flags); 393 isolated = isolate_freepages_block(cc, pfn, block_end_pfn,
184 isolated = isolate_freepages_block(pfn, block_end_pfn,
185 &freelist, true); 394 &freelist, true);
186 spin_unlock_irqrestore(&zone->lock, flags);
187 395
188 /* 396 /*
189 * In strict mode, isolate_freepages_block() returns 0 if 397 * In strict mode, isolate_freepages_block() returns 0 if
@@ -253,6 +461,7 @@ static bool too_many_isolated(struct zone *zone)
253 * @cc: Compaction control structure. 461 * @cc: Compaction control structure.
254 * @low_pfn: The first PFN of the range. 462 * @low_pfn: The first PFN of the range.
255 * @end_pfn: The one-past-the-last PFN of the range. 463 * @end_pfn: The one-past-the-last PFN of the range.
464 * @unevictable: true if it allows to isolate unevictable pages
256 * 465 *
257 * Isolate all pages that can be migrated from the range specified by 466 * Isolate all pages that can be migrated from the range specified by
258 * [low_pfn, end_pfn). Returns zero if there is a fatal signal 467 * [low_pfn, end_pfn). Returns zero if there is a fatal signal
@@ -268,7 +477,7 @@ static bool too_many_isolated(struct zone *zone)
268 */ 477 */
269unsigned long 478unsigned long
270isolate_migratepages_range(struct zone *zone, struct compact_control *cc, 479isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
271 unsigned long low_pfn, unsigned long end_pfn) 480 unsigned long low_pfn, unsigned long end_pfn, bool unevictable)
272{ 481{
273 unsigned long last_pageblock_nr = 0, pageblock_nr; 482 unsigned long last_pageblock_nr = 0, pageblock_nr;
274 unsigned long nr_scanned = 0, nr_isolated = 0; 483 unsigned long nr_scanned = 0, nr_isolated = 0;
@@ -276,7 +485,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
276 isolate_mode_t mode = 0; 485 isolate_mode_t mode = 0;
277 struct lruvec *lruvec; 486 struct lruvec *lruvec;
278 unsigned long flags; 487 unsigned long flags;
279 bool locked; 488 bool locked = false;
489 struct page *page = NULL, *valid_page = NULL;
280 490
281 /* 491 /*
282 * Ensure that there are not too many pages isolated from the LRU 492 * Ensure that there are not too many pages isolated from the LRU
@@ -296,23 +506,15 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
296 506
297 /* Time to isolate some pages for migration */ 507 /* Time to isolate some pages for migration */
298 cond_resched(); 508 cond_resched();
299 spin_lock_irqsave(&zone->lru_lock, flags);
300 locked = true;
301 for (; low_pfn < end_pfn; low_pfn++) { 509 for (; low_pfn < end_pfn; low_pfn++) {
302 struct page *page;
303
304 /* give a chance to irqs before checking need_resched() */ 510 /* give a chance to irqs before checking need_resched() */
305 if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) { 511 if (locked && !((low_pfn+1) % SWAP_CLUSTER_MAX)) {
306 spin_unlock_irqrestore(&zone->lru_lock, flags); 512 if (should_release_lock(&zone->lru_lock)) {
307 locked = false; 513 spin_unlock_irqrestore(&zone->lru_lock, flags);
514 locked = false;
515 }
308 } 516 }
309 517
310 /* Check if it is ok to still hold the lock */
311 locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
312 locked, cc);
313 if (!locked)
314 break;
315
316 /* 518 /*
317 * migrate_pfn does not necessarily start aligned to a 519 * migrate_pfn does not necessarily start aligned to a
318 * pageblock. Ensure that pfn_valid is called when moving 520 * pageblock. Ensure that pfn_valid is called when moving
@@ -340,6 +542,14 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
340 if (page_zone(page) != zone) 542 if (page_zone(page) != zone)
341 continue; 543 continue;
342 544
545 if (!valid_page)
546 valid_page = page;
547
548 /* If isolation recently failed, do not retry */
549 pageblock_nr = low_pfn >> pageblock_order;
550 if (!isolation_suitable(cc, page))
551 goto next_pageblock;
552
343 /* Skip if free */ 553 /* Skip if free */
344 if (PageBuddy(page)) 554 if (PageBuddy(page))
345 continue; 555 continue;
@@ -349,24 +559,43 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
349 * migration is optimistic to see if the minimum amount of work 559 * migration is optimistic to see if the minimum amount of work
350 * satisfies the allocation 560 * satisfies the allocation
351 */ 561 */
352 pageblock_nr = low_pfn >> pageblock_order;
353 if (!cc->sync && last_pageblock_nr != pageblock_nr && 562 if (!cc->sync && last_pageblock_nr != pageblock_nr &&
354 !migrate_async_suitable(get_pageblock_migratetype(page))) { 563 !migrate_async_suitable(get_pageblock_migratetype(page))) {
355 low_pfn += pageblock_nr_pages; 564 cc->finished_update_migrate = true;
356 low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; 565 goto next_pageblock;
357 last_pageblock_nr = pageblock_nr;
358 continue;
359 } 566 }
360 567
568 /* Check may be lockless but that's ok as we recheck later */
361 if (!PageLRU(page)) 569 if (!PageLRU(page))
362 continue; 570 continue;
363 571
364 /* 572 /*
365 * PageLRU is set, and lru_lock excludes isolation, 573 * PageLRU is set. lru_lock normally excludes isolation
366 * splitting and collapsing (collapsing has already 574 * splitting and collapsing (collapsing has already happened
367 * happened if PageLRU is set). 575 * if PageLRU is set) but the lock is not necessarily taken
576 * here and it is wasteful to take it just to check transhuge.
577 * Check TransHuge without lock and skip the whole pageblock if
578 * it's either a transhuge or hugetlbfs page, as calling
579 * compound_order() without preventing THP from splitting the
580 * page underneath us may return surprising results.
368 */ 581 */
369 if (PageTransHuge(page)) { 582 if (PageTransHuge(page)) {
583 if (!locked)
584 goto next_pageblock;
585 low_pfn += (1 << compound_order(page)) - 1;
586 continue;
587 }
588
589 /* Check if it is ok to still hold the lock */
590 locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
591 locked, cc);
592 if (!locked || fatal_signal_pending(current))
593 break;
594
595 /* Recheck PageLRU and PageTransHuge under lock */
596 if (!PageLRU(page))
597 continue;
598 if (PageTransHuge(page)) {
370 low_pfn += (1 << compound_order(page)) - 1; 599 low_pfn += (1 << compound_order(page)) - 1;
371 continue; 600 continue;
372 } 601 }
@@ -374,6 +603,9 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
374 if (!cc->sync) 603 if (!cc->sync)
375 mode |= ISOLATE_ASYNC_MIGRATE; 604 mode |= ISOLATE_ASYNC_MIGRATE;
376 605
606 if (unevictable)
607 mode |= ISOLATE_UNEVICTABLE;
608
377 lruvec = mem_cgroup_page_lruvec(page, zone); 609 lruvec = mem_cgroup_page_lruvec(page, zone);
378 610
379 /* Try isolate the page */ 611 /* Try isolate the page */
@@ -383,6 +615,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
383 VM_BUG_ON(PageTransCompound(page)); 615 VM_BUG_ON(PageTransCompound(page));
384 616
385 /* Successfully isolated */ 617 /* Successfully isolated */
618 cc->finished_update_migrate = true;
386 del_page_from_lru_list(page, lruvec, page_lru(page)); 619 del_page_from_lru_list(page, lruvec, page_lru(page));
387 list_add(&page->lru, migratelist); 620 list_add(&page->lru, migratelist);
388 cc->nr_migratepages++; 621 cc->nr_migratepages++;
@@ -393,6 +626,13 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
393 ++low_pfn; 626 ++low_pfn;
394 break; 627 break;
395 } 628 }
629
630 continue;
631
632next_pageblock:
633 low_pfn += pageblock_nr_pages;
634 low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
635 last_pageblock_nr = pageblock_nr;
396 } 636 }
397 637
398 acct_isolated(zone, locked, cc); 638 acct_isolated(zone, locked, cc);
@@ -400,6 +640,10 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
400 if (locked) 640 if (locked)
401 spin_unlock_irqrestore(&zone->lru_lock, flags); 641 spin_unlock_irqrestore(&zone->lru_lock, flags);
402 642
643 /* Update the pageblock-skip if the whole pageblock was scanned */
644 if (low_pfn == end_pfn)
645 update_pageblock_skip(cc, valid_page, nr_isolated, true);
646
403 trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); 647 trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
404 648
405 return low_pfn; 649 return low_pfn;
@@ -407,43 +651,6 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
407 651
408#endif /* CONFIG_COMPACTION || CONFIG_CMA */ 652#endif /* CONFIG_COMPACTION || CONFIG_CMA */
409#ifdef CONFIG_COMPACTION 653#ifdef CONFIG_COMPACTION
410
411/* Returns true if the page is within a block suitable for migration to */
412static bool suitable_migration_target(struct page *page)
413{
414
415 int migratetype = get_pageblock_migratetype(page);
416
417 /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
418 if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE)
419 return false;
420
421 /* If the page is a large free page, then allow migration */
422 if (PageBuddy(page) && page_order(page) >= pageblock_order)
423 return true;
424
425 /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
426 if (migrate_async_suitable(migratetype))
427 return true;
428
429 /* Otherwise skip the block */
430 return false;
431}
432
433/*
434 * Returns the start pfn of the last page block in a zone. This is the starting
435 * point for full compaction of a zone. Compaction searches for free pages from
436 * the end of each zone, while isolate_freepages_block scans forward inside each
437 * page block.
438 */
439static unsigned long start_free_pfn(struct zone *zone)
440{
441 unsigned long free_pfn;
442 free_pfn = zone->zone_start_pfn + zone->spanned_pages;
443 free_pfn &= ~(pageblock_nr_pages-1);
444 return free_pfn;
445}
446
447/* 654/*
448 * Based on information in the current compact_control, find blocks 655 * Based on information in the current compact_control, find blocks
449 * suitable for isolating free pages from and then isolate them. 656 * suitable for isolating free pages from and then isolate them.
@@ -453,7 +660,6 @@ static void isolate_freepages(struct zone *zone,
453{ 660{
454 struct page *page; 661 struct page *page;
455 unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn; 662 unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn;
456 unsigned long flags;
457 int nr_freepages = cc->nr_freepages; 663 int nr_freepages = cc->nr_freepages;
458 struct list_head *freelist = &cc->freepages; 664 struct list_head *freelist = &cc->freepages;
459 665
@@ -501,30 +707,16 @@ static void isolate_freepages(struct zone *zone,
501 if (!suitable_migration_target(page)) 707 if (!suitable_migration_target(page))
502 continue; 708 continue;
503 709
504 /* 710 /* If isolation recently failed, do not retry */
505 * Found a block suitable for isolating free pages from. Now 711 if (!isolation_suitable(cc, page))
506 * we disabled interrupts, double check things are ok and 712 continue;
507 * isolate the pages. This is to minimise the time IRQs
508 * are disabled
509 */
510 isolated = 0;
511 713
512 /* 714 /* Found a block suitable for isolating free pages from */
513 * The zone lock must be held to isolate freepages. This 715 isolated = 0;
514 * unfortunately this is a very coarse lock and can be 716 end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
515 * heavily contended if there are parallel allocations 717 isolated = isolate_freepages_block(cc, pfn, end_pfn,
516 * or parallel compactions. For async compaction do not 718 freelist, false);
517 * spin on the lock 719 nr_freepages += isolated;
518 */
519 if (!compact_trylock_irqsave(&zone->lock, &flags, cc))
520 break;
521 if (suitable_migration_target(page)) {
522 end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
523 isolated = isolate_freepages_block(pfn, end_pfn,
524 freelist, false);
525 nr_freepages += isolated;
526 }
527 spin_unlock_irqrestore(&zone->lock, flags);
528 720
529 /* 721 /*
530 * Record the highest PFN we isolated pages from. When next 722 * Record the highest PFN we isolated pages from. When next
@@ -532,17 +724,8 @@ static void isolate_freepages(struct zone *zone,
532 * page migration may have returned some pages to the allocator 724 * page migration may have returned some pages to the allocator
533 */ 725 */
534 if (isolated) { 726 if (isolated) {
727 cc->finished_update_free = true;
535 high_pfn = max(high_pfn, pfn); 728 high_pfn = max(high_pfn, pfn);
536
537 /*
538 * If the free scanner has wrapped, update
539 * compact_cached_free_pfn to point to the highest
540 * pageblock with free pages. This reduces excessive
541 * scanning of full pageblocks near the end of the
542 * zone
543 */
544 if (cc->order > 0 && cc->wrapped)
545 zone->compact_cached_free_pfn = high_pfn;
546 } 729 }
547 } 730 }
548 731
@@ -551,11 +734,6 @@ static void isolate_freepages(struct zone *zone,
551 734
552 cc->free_pfn = high_pfn; 735 cc->free_pfn = high_pfn;
553 cc->nr_freepages = nr_freepages; 736 cc->nr_freepages = nr_freepages;
554
555 /* If compact_cached_free_pfn is reset then set it now */
556 if (cc->order > 0 && !cc->wrapped &&
557 zone->compact_cached_free_pfn == start_free_pfn(zone))
558 zone->compact_cached_free_pfn = high_pfn;
559} 737}
560 738
561/* 739/*
@@ -633,8 +811,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
633 } 811 }
634 812
635 /* Perform the isolation */ 813 /* Perform the isolation */
636 low_pfn = isolate_migratepages_range(zone, cc, low_pfn, end_pfn); 814 low_pfn = isolate_migratepages_range(zone, cc, low_pfn, end_pfn, false);
637 if (!low_pfn) 815 if (!low_pfn || cc->contended)
638 return ISOLATE_ABORT; 816 return ISOLATE_ABORT;
639 817
640 cc->migrate_pfn = low_pfn; 818 cc->migrate_pfn = low_pfn;
@@ -645,33 +823,24 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
645static int compact_finished(struct zone *zone, 823static int compact_finished(struct zone *zone,
646 struct compact_control *cc) 824 struct compact_control *cc)
647{ 825{
648 unsigned int order;
649 unsigned long watermark; 826 unsigned long watermark;
650 827
651 if (fatal_signal_pending(current)) 828 if (fatal_signal_pending(current))
652 return COMPACT_PARTIAL; 829 return COMPACT_PARTIAL;
653 830
654 /* 831 /* Compaction run completes if the migrate and free scanner meet */
655 * A full (order == -1) compaction run starts at the beginning and
656 * end of a zone; it completes when the migrate and free scanner meet.
657 * A partial (order > 0) compaction can start with the free scanner
658 * at a random point in the zone, and may have to restart.
659 */
660 if (cc->free_pfn <= cc->migrate_pfn) { 832 if (cc->free_pfn <= cc->migrate_pfn) {
661 if (cc->order > 0 && !cc->wrapped) { 833 /*
662 /* We started partway through; restart at the end. */ 834 * Mark that the PG_migrate_skip information should be cleared
663 unsigned long free_pfn = start_free_pfn(zone); 835 * by kswapd when it goes to sleep. kswapd does not set the
664 zone->compact_cached_free_pfn = free_pfn; 836 * flag itself as the decision to be clear should be directly
665 cc->free_pfn = free_pfn; 837 * based on an allocation request.
666 cc->wrapped = 1; 838 */
667 return COMPACT_CONTINUE; 839 if (!current_is_kswapd())
668 } 840 zone->compact_blockskip_flush = true;
669 return COMPACT_COMPLETE;
670 }
671 841
672 /* We wrapped around and ended up where we started. */
673 if (cc->wrapped && cc->free_pfn <= cc->start_free_pfn)
674 return COMPACT_COMPLETE; 842 return COMPACT_COMPLETE;
843 }
675 844
676 /* 845 /*
677 * order == -1 is expected when compacting via 846 * order == -1 is expected when compacting via
@@ -688,14 +857,22 @@ static int compact_finished(struct zone *zone,
688 return COMPACT_CONTINUE; 857 return COMPACT_CONTINUE;
689 858
690 /* Direct compactor: Is a suitable page free? */ 859 /* Direct compactor: Is a suitable page free? */
691 for (order = cc->order; order < MAX_ORDER; order++) { 860 if (cc->page) {
692 /* Job done if page is free of the right migratetype */ 861 /* Was a suitable page captured? */
693 if (!list_empty(&zone->free_area[order].free_list[cc->migratetype])) 862 if (*cc->page)
694 return COMPACT_PARTIAL;
695
696 /* Job done if allocation would set block type */
697 if (order >= pageblock_order && zone->free_area[order].nr_free)
698 return COMPACT_PARTIAL; 863 return COMPACT_PARTIAL;
864 } else {
865 unsigned int order;
866 for (order = cc->order; order < MAX_ORDER; order++) {
867 struct free_area *area = &zone->free_area[cc->order];
868 /* Job done if page is free of the right migratetype */
869 if (!list_empty(&area->free_list[cc->migratetype]))
870 return COMPACT_PARTIAL;
871
872 /* Job done if allocation would set block type */
873 if (cc->order >= pageblock_order && area->nr_free)
874 return COMPACT_PARTIAL;
875 }
699 } 876 }
700 877
701 return COMPACT_CONTINUE; 878 return COMPACT_CONTINUE;
@@ -754,6 +931,8 @@ unsigned long compaction_suitable(struct zone *zone, int order)
754static int compact_zone(struct zone *zone, struct compact_control *cc) 931static int compact_zone(struct zone *zone, struct compact_control *cc)
755{ 932{
756 int ret; 933 int ret;
934 unsigned long start_pfn = zone->zone_start_pfn;
935 unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages;
757 936
758 ret = compaction_suitable(zone, cc->order); 937 ret = compaction_suitable(zone, cc->order);
759 switch (ret) { 938 switch (ret) {
@@ -766,18 +945,30 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
766 ; 945 ;
767 } 946 }
768 947
769 /* Setup to move all movable pages to the end of the zone */ 948 /*
770 cc->migrate_pfn = zone->zone_start_pfn; 949 * Setup to move all movable pages to the end of the zone. Used cached
771 950 * information on where the scanners should start but check that it
772 if (cc->order > 0) { 951 * is initialised by ensuring the values are within zone boundaries.
773 /* Incremental compaction. Start where the last one stopped. */ 952 */
774 cc->free_pfn = zone->compact_cached_free_pfn; 953 cc->migrate_pfn = zone->compact_cached_migrate_pfn;
775 cc->start_free_pfn = cc->free_pfn; 954 cc->free_pfn = zone->compact_cached_free_pfn;
776 } else { 955 if (cc->free_pfn < start_pfn || cc->free_pfn > end_pfn) {
777 /* Order == -1 starts at the end of the zone. */ 956 cc->free_pfn = end_pfn & ~(pageblock_nr_pages-1);
778 cc->free_pfn = start_free_pfn(zone); 957 zone->compact_cached_free_pfn = cc->free_pfn;
958 }
959 if (cc->migrate_pfn < start_pfn || cc->migrate_pfn > end_pfn) {
960 cc->migrate_pfn = start_pfn;
961 zone->compact_cached_migrate_pfn = cc->migrate_pfn;
779 } 962 }
780 963
964 /*
965 * Clear pageblock skip if there were failures recently and compaction
966 * is about to be retried after being deferred. kswapd does not do
967 * this reset as it'll reset the cached information when going to sleep.
968 */
969 if (compaction_restarting(zone, cc->order) && !current_is_kswapd())
970 __reset_isolation_suitable(zone);
971
781 migrate_prep_local(); 972 migrate_prep_local();
782 973
783 while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) { 974 while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {
@@ -787,6 +978,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
787 switch (isolate_migratepages(zone, cc)) { 978 switch (isolate_migratepages(zone, cc)) {
788 case ISOLATE_ABORT: 979 case ISOLATE_ABORT:
789 ret = COMPACT_PARTIAL; 980 ret = COMPACT_PARTIAL;
981 putback_lru_pages(&cc->migratepages);
982 cc->nr_migratepages = 0;
790 goto out; 983 goto out;
791 case ISOLATE_NONE: 984 case ISOLATE_NONE:
792 continue; 985 continue;
@@ -817,6 +1010,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
817 goto out; 1010 goto out;
818 } 1011 }
819 } 1012 }
1013
1014 /* Capture a page now if it is a suitable size */
1015 compact_capture_page(cc);
820 } 1016 }
821 1017
822out: 1018out:
@@ -829,8 +1025,10 @@ out:
829 1025
830static unsigned long compact_zone_order(struct zone *zone, 1026static unsigned long compact_zone_order(struct zone *zone,
831 int order, gfp_t gfp_mask, 1027 int order, gfp_t gfp_mask,
832 bool sync, bool *contended) 1028 bool sync, bool *contended,
1029 struct page **page)
833{ 1030{
1031 unsigned long ret;
834 struct compact_control cc = { 1032 struct compact_control cc = {
835 .nr_freepages = 0, 1033 .nr_freepages = 0,
836 .nr_migratepages = 0, 1034 .nr_migratepages = 0,
@@ -838,12 +1036,18 @@ static unsigned long compact_zone_order(struct zone *zone,
838 .migratetype = allocflags_to_migratetype(gfp_mask), 1036 .migratetype = allocflags_to_migratetype(gfp_mask),
839 .zone = zone, 1037 .zone = zone,
840 .sync = sync, 1038 .sync = sync,
841 .contended = contended, 1039 .page = page,
842 }; 1040 };
843 INIT_LIST_HEAD(&cc.freepages); 1041 INIT_LIST_HEAD(&cc.freepages);
844 INIT_LIST_HEAD(&cc.migratepages); 1042 INIT_LIST_HEAD(&cc.migratepages);
845 1043
846 return compact_zone(zone, &cc); 1044 ret = compact_zone(zone, &cc);
1045
1046 VM_BUG_ON(!list_empty(&cc.freepages));
1047 VM_BUG_ON(!list_empty(&cc.migratepages));
1048
1049 *contended = cc.contended;
1050 return ret;
847} 1051}
848 1052
849int sysctl_extfrag_threshold = 500; 1053int sysctl_extfrag_threshold = 500;
@@ -855,12 +1059,14 @@ int sysctl_extfrag_threshold = 500;
855 * @gfp_mask: The GFP mask of the current allocation 1059 * @gfp_mask: The GFP mask of the current allocation
856 * @nodemask: The allowed nodes to allocate from 1060 * @nodemask: The allowed nodes to allocate from
857 * @sync: Whether migration is synchronous or not 1061 * @sync: Whether migration is synchronous or not
1062 * @contended: Return value that is true if compaction was aborted due to lock contention
1063 * @page: Optionally capture a free page of the requested order during compaction
858 * 1064 *
859 * This is the main entry point for direct page compaction. 1065 * This is the main entry point for direct page compaction.
860 */ 1066 */
861unsigned long try_to_compact_pages(struct zonelist *zonelist, 1067unsigned long try_to_compact_pages(struct zonelist *zonelist,
862 int order, gfp_t gfp_mask, nodemask_t *nodemask, 1068 int order, gfp_t gfp_mask, nodemask_t *nodemask,
863 bool sync, bool *contended) 1069 bool sync, bool *contended, struct page **page)
864{ 1070{
865 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 1071 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
866 int may_enter_fs = gfp_mask & __GFP_FS; 1072 int may_enter_fs = gfp_mask & __GFP_FS;
@@ -868,28 +1074,30 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
868 struct zoneref *z; 1074 struct zoneref *z;
869 struct zone *zone; 1075 struct zone *zone;
870 int rc = COMPACT_SKIPPED; 1076 int rc = COMPACT_SKIPPED;
1077 int alloc_flags = 0;
871 1078
872 /* 1079 /* Check if the GFP flags allow compaction */
873 * Check whether it is worth even starting compaction. The order check is
874 * made because an assumption is made that the page allocator can satisfy
875 * the "cheaper" orders without taking special steps
876 */
877 if (!order || !may_enter_fs || !may_perform_io) 1080 if (!order || !may_enter_fs || !may_perform_io)
878 return rc; 1081 return rc;
879 1082
880 count_vm_event(COMPACTSTALL); 1083 count_vm_event(COMPACTSTALL);
881 1084
1085#ifdef CONFIG_CMA
1086 if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
1087 alloc_flags |= ALLOC_CMA;
1088#endif
882 /* Compact each zone in the list */ 1089 /* Compact each zone in the list */
883 for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, 1090 for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
884 nodemask) { 1091 nodemask) {
885 int status; 1092 int status;
886 1093
887 status = compact_zone_order(zone, order, gfp_mask, sync, 1094 status = compact_zone_order(zone, order, gfp_mask, sync,
888 contended); 1095 contended, page);
889 rc = max(status, rc); 1096 rc = max(status, rc);
890 1097
891 /* If a normal allocation would succeed, stop compacting */ 1098 /* If a normal allocation would succeed, stop compacting */
892 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) 1099 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0,
1100 alloc_flags))
893 break; 1101 break;
894 } 1102 }
895 1103
@@ -940,6 +1148,7 @@ int compact_pgdat(pg_data_t *pgdat, int order)
940 struct compact_control cc = { 1148 struct compact_control cc = {
941 .order = order, 1149 .order = order,
942 .sync = false, 1150 .sync = false,
1151 .page = NULL,
943 }; 1152 };
944 1153
945 return __compact_pgdat(pgdat, &cc); 1154 return __compact_pgdat(pgdat, &cc);
@@ -950,6 +1159,7 @@ static int compact_node(int nid)
950 struct compact_control cc = { 1159 struct compact_control cc = {
951 .order = -1, 1160 .order = -1,
952 .sync = true, 1161 .sync = true,
1162 .page = NULL,
953 }; 1163 };
954 1164
955 return __compact_pgdat(NODE_DATA(nid), &cc); 1165 return __compact_pgdat(NODE_DATA(nid), &cc);