aboutsummaryrefslogtreecommitdiffstats
path: root/mm/compaction.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/compaction.c')
-rw-r--r--mm/compaction.c174
1 files changed, 132 insertions, 42 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 1a8894eadf72..6d592a021072 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -16,6 +16,9 @@
16#include <linux/sysfs.h> 16#include <linux/sysfs.h>
17#include "internal.h" 17#include "internal.h"
18 18
19#define CREATE_TRACE_POINTS
20#include <trace/events/compaction.h>
21
19/* 22/*
20 * compact_control is used to track pages being migrated and the free pages 23 * compact_control is used to track pages being migrated and the free pages
21 * they are being migrated to during memory compaction. The free_pfn starts 24 * they are being migrated to during memory compaction. The free_pfn starts
@@ -30,6 +33,7 @@ struct compact_control {
30 unsigned long nr_migratepages; /* Number of pages to migrate */ 33 unsigned long nr_migratepages; /* Number of pages to migrate */
31 unsigned long free_pfn; /* isolate_freepages search base */ 34 unsigned long free_pfn; /* isolate_freepages search base */
32 unsigned long migrate_pfn; /* isolate_migratepages search base */ 35 unsigned long migrate_pfn; /* isolate_migratepages search base */
36 bool sync; /* Synchronous migration */
33 37
34 /* Account for isolated anon and file pages */ 38 /* Account for isolated anon and file pages */
35 unsigned long nr_anon; 39 unsigned long nr_anon;
@@ -38,6 +42,8 @@ struct compact_control {
38 unsigned int order; /* order a direct compactor needs */ 42 unsigned int order; /* order a direct compactor needs */
39 int migratetype; /* MOVABLE, RECLAIMABLE etc */ 43 int migratetype; /* MOVABLE, RECLAIMABLE etc */
40 struct zone *zone; 44 struct zone *zone;
45
46 int compact_mode;
41}; 47};
42 48
43static unsigned long release_freepages(struct list_head *freelist) 49static unsigned long release_freepages(struct list_head *freelist)
@@ -60,7 +66,7 @@ static unsigned long isolate_freepages_block(struct zone *zone,
60 struct list_head *freelist) 66 struct list_head *freelist)
61{ 67{
62 unsigned long zone_end_pfn, end_pfn; 68 unsigned long zone_end_pfn, end_pfn;
63 int total_isolated = 0; 69 int nr_scanned = 0, total_isolated = 0;
64 struct page *cursor; 70 struct page *cursor;
65 71
66 /* Get the last PFN we should scan for free pages at */ 72 /* Get the last PFN we should scan for free pages at */
@@ -81,6 +87,7 @@ static unsigned long isolate_freepages_block(struct zone *zone,
81 87
82 if (!pfn_valid_within(blockpfn)) 88 if (!pfn_valid_within(blockpfn))
83 continue; 89 continue;
90 nr_scanned++;
84 91
85 if (!PageBuddy(page)) 92 if (!PageBuddy(page))
86 continue; 93 continue;
@@ -100,6 +107,7 @@ static unsigned long isolate_freepages_block(struct zone *zone,
100 } 107 }
101 } 108 }
102 109
110 trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated);
103 return total_isolated; 111 return total_isolated;
104} 112}
105 113
@@ -234,6 +242,8 @@ static unsigned long isolate_migratepages(struct zone *zone,
234 struct compact_control *cc) 242 struct compact_control *cc)
235{ 243{
236 unsigned long low_pfn, end_pfn; 244 unsigned long low_pfn, end_pfn;
245 unsigned long last_pageblock_nr = 0, pageblock_nr;
246 unsigned long nr_scanned = 0, nr_isolated = 0;
237 struct list_head *migratelist = &cc->migratepages; 247 struct list_head *migratelist = &cc->migratepages;
238 248
239 /* Do not scan outside zone boundaries */ 249 /* Do not scan outside zone boundaries */
@@ -266,20 +276,51 @@ static unsigned long isolate_migratepages(struct zone *zone,
266 struct page *page; 276 struct page *page;
267 if (!pfn_valid_within(low_pfn)) 277 if (!pfn_valid_within(low_pfn))
268 continue; 278 continue;
279 nr_scanned++;
269 280
270 /* Get the page and skip if free */ 281 /* Get the page and skip if free */
271 page = pfn_to_page(low_pfn); 282 page = pfn_to_page(low_pfn);
272 if (PageBuddy(page)) 283 if (PageBuddy(page))
273 continue; 284 continue;
274 285
286 /*
287 * For async migration, also only scan in MOVABLE blocks. Async
288 * migration is optimistic to see if the minimum amount of work
289 * satisfies the allocation
290 */
291 pageblock_nr = low_pfn >> pageblock_order;
292 if (!cc->sync && last_pageblock_nr != pageblock_nr &&
293 get_pageblock_migratetype(page) != MIGRATE_MOVABLE) {
294 low_pfn += pageblock_nr_pages;
295 low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
296 last_pageblock_nr = pageblock_nr;
297 continue;
298 }
299
300 if (!PageLRU(page))
301 continue;
302
303 /*
304 * PageLRU is set, and lru_lock excludes isolation,
305 * splitting and collapsing (collapsing has already
306 * happened if PageLRU is set).
307 */
308 if (PageTransHuge(page)) {
309 low_pfn += (1 << compound_order(page)) - 1;
310 continue;
311 }
312
275 /* Try isolate the page */ 313 /* Try isolate the page */
276 if (__isolate_lru_page(page, ISOLATE_BOTH, 0) != 0) 314 if (__isolate_lru_page(page, ISOLATE_BOTH, 0) != 0)
277 continue; 315 continue;
278 316
317 VM_BUG_ON(PageTransCompound(page));
318
279 /* Successfully isolated */ 319 /* Successfully isolated */
280 del_page_from_lru_list(zone, page, page_lru(page)); 320 del_page_from_lru_list(zone, page, page_lru(page));
281 list_add(&page->lru, migratelist); 321 list_add(&page->lru, migratelist);
282 cc->nr_migratepages++; 322 cc->nr_migratepages++;
323 nr_isolated++;
283 324
284 /* Avoid isolating too much */ 325 /* Avoid isolating too much */
285 if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) 326 if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
@@ -291,6 +332,8 @@ static unsigned long isolate_migratepages(struct zone *zone,
291 spin_unlock_irq(&zone->lru_lock); 332 spin_unlock_irq(&zone->lru_lock);
292 cc->migrate_pfn = low_pfn; 333 cc->migrate_pfn = low_pfn;
293 334
335 trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
336
294 return cc->nr_migratepages; 337 return cc->nr_migratepages;
295} 338}
296 339
@@ -341,10 +384,10 @@ static void update_nr_listpages(struct compact_control *cc)
341} 384}
342 385
343static int compact_finished(struct zone *zone, 386static int compact_finished(struct zone *zone,
344 struct compact_control *cc) 387 struct compact_control *cc)
345{ 388{
346 unsigned int order; 389 unsigned int order;
347 unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order); 390 unsigned long watermark;
348 391
349 if (fatal_signal_pending(current)) 392 if (fatal_signal_pending(current))
350 return COMPACT_PARTIAL; 393 return COMPACT_PARTIAL;
@@ -354,12 +397,27 @@ static int compact_finished(struct zone *zone,
354 return COMPACT_COMPLETE; 397 return COMPACT_COMPLETE;
355 398
356 /* Compaction run is not finished if the watermark is not met */ 399 /* Compaction run is not finished if the watermark is not met */
400 if (cc->compact_mode != COMPACT_MODE_KSWAPD)
401 watermark = low_wmark_pages(zone);
402 else
403 watermark = high_wmark_pages(zone);
404 watermark += (1 << cc->order);
405
357 if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) 406 if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
358 return COMPACT_CONTINUE; 407 return COMPACT_CONTINUE;
359 408
360 if (cc->order == -1) 409 if (cc->order == -1)
361 return COMPACT_CONTINUE; 410 return COMPACT_CONTINUE;
362 411
412 /*
413 * Generating only one page of the right order is not enough
414 * for kswapd, we must continue until we're above the high
415 * watermark as a pool for high order GFP_ATOMIC allocations
416 * too.
417 */
418 if (cc->compact_mode == COMPACT_MODE_KSWAPD)
419 return COMPACT_CONTINUE;
420
363 /* Direct compactor: Is a suitable page free? */ 421 /* Direct compactor: Is a suitable page free? */
364 for (order = cc->order; order < MAX_ORDER; order++) { 422 for (order = cc->order; order < MAX_ORDER; order++) {
365 /* Job done if page is free of the right migratetype */ 423 /* Job done if page is free of the right migratetype */
@@ -374,10 +432,62 @@ static int compact_finished(struct zone *zone,
374 return COMPACT_CONTINUE; 432 return COMPACT_CONTINUE;
375} 433}
376 434
435/*
436 * compaction_suitable: Is this suitable to run compaction on this zone now?
437 * Returns
438 * COMPACT_SKIPPED - If there are too few free pages for compaction
439 * COMPACT_PARTIAL - If the allocation would succeed without compaction
440 * COMPACT_CONTINUE - If compaction should run now
441 */
442unsigned long compaction_suitable(struct zone *zone, int order)
443{
444 int fragindex;
445 unsigned long watermark;
446
447 /*
448 * Watermarks for order-0 must be met for compaction. Note the 2UL.
449 * This is because during migration, copies of pages need to be
450 * allocated and for a short time, the footprint is higher
451 */
452 watermark = low_wmark_pages(zone) + (2UL << order);
453 if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
454 return COMPACT_SKIPPED;
455
456 /*
457 * fragmentation index determines if allocation failures are due to
458 * low memory or external fragmentation
459 *
460 * index of -1 implies allocations might succeed dependingon watermarks
461 * index towards 0 implies failure is due to lack of memory
462 * index towards 1000 implies failure is due to fragmentation
463 *
464 * Only compact if a failure would be due to fragmentation.
465 */
466 fragindex = fragmentation_index(zone, order);
467 if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
468 return COMPACT_SKIPPED;
469
470 if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0))
471 return COMPACT_PARTIAL;
472
473 return COMPACT_CONTINUE;
474}
475
377static int compact_zone(struct zone *zone, struct compact_control *cc) 476static int compact_zone(struct zone *zone, struct compact_control *cc)
378{ 477{
379 int ret; 478 int ret;
380 479
480 ret = compaction_suitable(zone, cc->order);
481 switch (ret) {
482 case COMPACT_PARTIAL:
483 case COMPACT_SKIPPED:
484 /* Compaction is likely to fail */
485 return ret;
486 case COMPACT_CONTINUE:
487 /* Fall through to compaction */
488 ;
489 }
490
381 /* Setup to move all movable pages to the end of the zone */ 491 /* Setup to move all movable pages to the end of the zone */
382 cc->migrate_pfn = zone->zone_start_pfn; 492 cc->migrate_pfn = zone->zone_start_pfn;
383 cc->free_pfn = cc->migrate_pfn + zone->spanned_pages; 493 cc->free_pfn = cc->migrate_pfn + zone->spanned_pages;
@@ -393,7 +503,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
393 503
394 nr_migrate = cc->nr_migratepages; 504 nr_migrate = cc->nr_migratepages;
395 migrate_pages(&cc->migratepages, compaction_alloc, 505 migrate_pages(&cc->migratepages, compaction_alloc,
396 (unsigned long)cc, 0); 506 (unsigned long)cc, false,
507 cc->sync);
397 update_nr_listpages(cc); 508 update_nr_listpages(cc);
398 nr_remaining = cc->nr_migratepages; 509 nr_remaining = cc->nr_migratepages;
399 510
@@ -401,6 +512,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
401 count_vm_events(COMPACTPAGES, nr_migrate - nr_remaining); 512 count_vm_events(COMPACTPAGES, nr_migrate - nr_remaining);
402 if (nr_remaining) 513 if (nr_remaining)
403 count_vm_events(COMPACTPAGEFAILED, nr_remaining); 514 count_vm_events(COMPACTPAGEFAILED, nr_remaining);
515 trace_mm_compaction_migratepages(nr_migrate - nr_remaining,
516 nr_remaining);
404 517
405 /* Release LRU pages not migrated */ 518 /* Release LRU pages not migrated */
406 if (!list_empty(&cc->migratepages)) { 519 if (!list_empty(&cc->migratepages)) {
@@ -417,8 +530,10 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
417 return ret; 530 return ret;
418} 531}
419 532
420static unsigned long compact_zone_order(struct zone *zone, 533unsigned long compact_zone_order(struct zone *zone,
421 int order, gfp_t gfp_mask) 534 int order, gfp_t gfp_mask,
535 bool sync,
536 int compact_mode)
422{ 537{
423 struct compact_control cc = { 538 struct compact_control cc = {
424 .nr_freepages = 0, 539 .nr_freepages = 0,
@@ -426,6 +541,8 @@ static unsigned long compact_zone_order(struct zone *zone,
426 .order = order, 541 .order = order,
427 .migratetype = allocflags_to_migratetype(gfp_mask), 542 .migratetype = allocflags_to_migratetype(gfp_mask),
428 .zone = zone, 543 .zone = zone,
544 .sync = sync,
545 .compact_mode = compact_mode,
429 }; 546 };
430 INIT_LIST_HEAD(&cc.freepages); 547 INIT_LIST_HEAD(&cc.freepages);
431 INIT_LIST_HEAD(&cc.migratepages); 548 INIT_LIST_HEAD(&cc.migratepages);
@@ -441,16 +558,17 @@ int sysctl_extfrag_threshold = 500;
441 * @order: The order of the current allocation 558 * @order: The order of the current allocation
442 * @gfp_mask: The GFP mask of the current allocation 559 * @gfp_mask: The GFP mask of the current allocation
443 * @nodemask: The allowed nodes to allocate from 560 * @nodemask: The allowed nodes to allocate from
561 * @sync: Whether migration is synchronous or not
444 * 562 *
445 * This is the main entry point for direct page compaction. 563 * This is the main entry point for direct page compaction.
446 */ 564 */
447unsigned long try_to_compact_pages(struct zonelist *zonelist, 565unsigned long try_to_compact_pages(struct zonelist *zonelist,
448 int order, gfp_t gfp_mask, nodemask_t *nodemask) 566 int order, gfp_t gfp_mask, nodemask_t *nodemask,
567 bool sync)
449{ 568{
450 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 569 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
451 int may_enter_fs = gfp_mask & __GFP_FS; 570 int may_enter_fs = gfp_mask & __GFP_FS;
452 int may_perform_io = gfp_mask & __GFP_IO; 571 int may_perform_io = gfp_mask & __GFP_IO;
453 unsigned long watermark;
454 struct zoneref *z; 572 struct zoneref *z;
455 struct zone *zone; 573 struct zone *zone;
456 int rc = COMPACT_SKIPPED; 574 int rc = COMPACT_SKIPPED;
@@ -460,7 +578,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
460 * made because an assumption is made that the page allocator can satisfy 578 * made because an assumption is made that the page allocator can satisfy
461 * the "cheaper" orders without taking special steps 579 * the "cheaper" orders without taking special steps
462 */ 580 */
463 if (order <= PAGE_ALLOC_COSTLY_ORDER || !may_enter_fs || !may_perform_io) 581 if (!order || !may_enter_fs || !may_perform_io)
464 return rc; 582 return rc;
465 583
466 count_vm_event(COMPACTSTALL); 584 count_vm_event(COMPACTSTALL);
@@ -468,43 +586,14 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
468 /* Compact each zone in the list */ 586 /* Compact each zone in the list */
469 for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, 587 for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
470 nodemask) { 588 nodemask) {
471 int fragindex;
472 int status; 589 int status;
473 590
474 /* 591 status = compact_zone_order(zone, order, gfp_mask, sync,
475 * Watermarks for order-0 must be met for compaction. Note 592 COMPACT_MODE_DIRECT_RECLAIM);
476 * the 2UL. This is because during migration, copies of
477 * pages need to be allocated and for a short time, the
478 * footprint is higher
479 */
480 watermark = low_wmark_pages(zone) + (2UL << order);
481 if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
482 continue;
483
484 /*
485 * fragmentation index determines if allocation failures are
486 * due to low memory or external fragmentation
487 *
488 * index of -1 implies allocations might succeed depending
489 * on watermarks
490 * index towards 0 implies failure is due to lack of memory
491 * index towards 1000 implies failure is due to fragmentation
492 *
493 * Only compact if a failure would be due to fragmentation.
494 */
495 fragindex = fragmentation_index(zone, order);
496 if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
497 continue;
498
499 if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) {
500 rc = COMPACT_PARTIAL;
501 break;
502 }
503
504 status = compact_zone_order(zone, order, gfp_mask);
505 rc = max(status, rc); 593 rc = max(status, rc);
506 594
507 if (zone_watermark_ok(zone, order, watermark, 0, 0)) 595 /* If a normal allocation would succeed, stop compacting */
596 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
508 break; 597 break;
509 } 598 }
510 599
@@ -531,6 +620,7 @@ static int compact_node(int nid)
531 .nr_freepages = 0, 620 .nr_freepages = 0,
532 .nr_migratepages = 0, 621 .nr_migratepages = 0,
533 .order = -1, 622 .order = -1,
623 .compact_mode = COMPACT_MODE_DIRECT_RECLAIM,
534 }; 624 };
535 625
536 zone = &pgdat->node_zones[zoneid]; 626 zone = &pgdat->node_zones[zoneid];