diff options
Diffstat (limited to 'mm/compaction.c')
-rw-r--r-- | mm/compaction.c | 174 |
1 files changed, 132 insertions, 42 deletions
diff --git a/mm/compaction.c b/mm/compaction.c index 1a8894eadf72..6d592a021072 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -16,6 +16,9 @@ | |||
16 | #include <linux/sysfs.h> | 16 | #include <linux/sysfs.h> |
17 | #include "internal.h" | 17 | #include "internal.h" |
18 | 18 | ||
19 | #define CREATE_TRACE_POINTS | ||
20 | #include <trace/events/compaction.h> | ||
21 | |||
19 | /* | 22 | /* |
20 | * compact_control is used to track pages being migrated and the free pages | 23 | * compact_control is used to track pages being migrated and the free pages |
21 | * they are being migrated to during memory compaction. The free_pfn starts | 24 | * they are being migrated to during memory compaction. The free_pfn starts |
@@ -30,6 +33,7 @@ struct compact_control { | |||
30 | unsigned long nr_migratepages; /* Number of pages to migrate */ | 33 | unsigned long nr_migratepages; /* Number of pages to migrate */ |
31 | unsigned long free_pfn; /* isolate_freepages search base */ | 34 | unsigned long free_pfn; /* isolate_freepages search base */ |
32 | unsigned long migrate_pfn; /* isolate_migratepages search base */ | 35 | unsigned long migrate_pfn; /* isolate_migratepages search base */ |
36 | bool sync; /* Synchronous migration */ | ||
33 | 37 | ||
34 | /* Account for isolated anon and file pages */ | 38 | /* Account for isolated anon and file pages */ |
35 | unsigned long nr_anon; | 39 | unsigned long nr_anon; |
@@ -38,6 +42,8 @@ struct compact_control { | |||
38 | unsigned int order; /* order a direct compactor needs */ | 42 | unsigned int order; /* order a direct compactor needs */ |
39 | int migratetype; /* MOVABLE, RECLAIMABLE etc */ | 43 | int migratetype; /* MOVABLE, RECLAIMABLE etc */ |
40 | struct zone *zone; | 44 | struct zone *zone; |
45 | |||
46 | int compact_mode; | ||
41 | }; | 47 | }; |
42 | 48 | ||
43 | static unsigned long release_freepages(struct list_head *freelist) | 49 | static unsigned long release_freepages(struct list_head *freelist) |
@@ -60,7 +66,7 @@ static unsigned long isolate_freepages_block(struct zone *zone, | |||
60 | struct list_head *freelist) | 66 | struct list_head *freelist) |
61 | { | 67 | { |
62 | unsigned long zone_end_pfn, end_pfn; | 68 | unsigned long zone_end_pfn, end_pfn; |
63 | int total_isolated = 0; | 69 | int nr_scanned = 0, total_isolated = 0; |
64 | struct page *cursor; | 70 | struct page *cursor; |
65 | 71 | ||
66 | /* Get the last PFN we should scan for free pages at */ | 72 | /* Get the last PFN we should scan for free pages at */ |
@@ -81,6 +87,7 @@ static unsigned long isolate_freepages_block(struct zone *zone, | |||
81 | 87 | ||
82 | if (!pfn_valid_within(blockpfn)) | 88 | if (!pfn_valid_within(blockpfn)) |
83 | continue; | 89 | continue; |
90 | nr_scanned++; | ||
84 | 91 | ||
85 | if (!PageBuddy(page)) | 92 | if (!PageBuddy(page)) |
86 | continue; | 93 | continue; |
@@ -100,6 +107,7 @@ static unsigned long isolate_freepages_block(struct zone *zone, | |||
100 | } | 107 | } |
101 | } | 108 | } |
102 | 109 | ||
110 | trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated); | ||
103 | return total_isolated; | 111 | return total_isolated; |
104 | } | 112 | } |
105 | 113 | ||
@@ -234,6 +242,8 @@ static unsigned long isolate_migratepages(struct zone *zone, | |||
234 | struct compact_control *cc) | 242 | struct compact_control *cc) |
235 | { | 243 | { |
236 | unsigned long low_pfn, end_pfn; | 244 | unsigned long low_pfn, end_pfn; |
245 | unsigned long last_pageblock_nr = 0, pageblock_nr; | ||
246 | unsigned long nr_scanned = 0, nr_isolated = 0; | ||
237 | struct list_head *migratelist = &cc->migratepages; | 247 | struct list_head *migratelist = &cc->migratepages; |
238 | 248 | ||
239 | /* Do not scan outside zone boundaries */ | 249 | /* Do not scan outside zone boundaries */ |
@@ -266,20 +276,51 @@ static unsigned long isolate_migratepages(struct zone *zone, | |||
266 | struct page *page; | 276 | struct page *page; |
267 | if (!pfn_valid_within(low_pfn)) | 277 | if (!pfn_valid_within(low_pfn)) |
268 | continue; | 278 | continue; |
279 | nr_scanned++; | ||
269 | 280 | ||
270 | /* Get the page and skip if free */ | 281 | /* Get the page and skip if free */ |
271 | page = pfn_to_page(low_pfn); | 282 | page = pfn_to_page(low_pfn); |
272 | if (PageBuddy(page)) | 283 | if (PageBuddy(page)) |
273 | continue; | 284 | continue; |
274 | 285 | ||
286 | /* | ||
287 | * For async migration, also only scan in MOVABLE blocks. Async | ||
288 | * migration is optimistic to see if the minimum amount of work | ||
289 | * satisfies the allocation | ||
290 | */ | ||
291 | pageblock_nr = low_pfn >> pageblock_order; | ||
292 | if (!cc->sync && last_pageblock_nr != pageblock_nr && | ||
293 | get_pageblock_migratetype(page) != MIGRATE_MOVABLE) { | ||
294 | low_pfn += pageblock_nr_pages; | ||
295 | low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; | ||
296 | last_pageblock_nr = pageblock_nr; | ||
297 | continue; | ||
298 | } | ||
299 | |||
300 | if (!PageLRU(page)) | ||
301 | continue; | ||
302 | |||
303 | /* | ||
304 | * PageLRU is set, and lru_lock excludes isolation, | ||
305 | * splitting and collapsing (collapsing has already | ||
306 | * happened if PageLRU is set). | ||
307 | */ | ||
308 | if (PageTransHuge(page)) { | ||
309 | low_pfn += (1 << compound_order(page)) - 1; | ||
310 | continue; | ||
311 | } | ||
312 | |||
275 | /* Try isolate the page */ | 313 | /* Try isolate the page */ |
276 | if (__isolate_lru_page(page, ISOLATE_BOTH, 0) != 0) | 314 | if (__isolate_lru_page(page, ISOLATE_BOTH, 0) != 0) |
277 | continue; | 315 | continue; |
278 | 316 | ||
317 | VM_BUG_ON(PageTransCompound(page)); | ||
318 | |||
279 | /* Successfully isolated */ | 319 | /* Successfully isolated */ |
280 | del_page_from_lru_list(zone, page, page_lru(page)); | 320 | del_page_from_lru_list(zone, page, page_lru(page)); |
281 | list_add(&page->lru, migratelist); | 321 | list_add(&page->lru, migratelist); |
282 | cc->nr_migratepages++; | 322 | cc->nr_migratepages++; |
323 | nr_isolated++; | ||
283 | 324 | ||
284 | /* Avoid isolating too much */ | 325 | /* Avoid isolating too much */ |
285 | if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) | 326 | if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) |
@@ -291,6 +332,8 @@ static unsigned long isolate_migratepages(struct zone *zone, | |||
291 | spin_unlock_irq(&zone->lru_lock); | 332 | spin_unlock_irq(&zone->lru_lock); |
292 | cc->migrate_pfn = low_pfn; | 333 | cc->migrate_pfn = low_pfn; |
293 | 334 | ||
335 | trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); | ||
336 | |||
294 | return cc->nr_migratepages; | 337 | return cc->nr_migratepages; |
295 | } | 338 | } |
296 | 339 | ||
@@ -341,10 +384,10 @@ static void update_nr_listpages(struct compact_control *cc) | |||
341 | } | 384 | } |
342 | 385 | ||
343 | static int compact_finished(struct zone *zone, | 386 | static int compact_finished(struct zone *zone, |
344 | struct compact_control *cc) | 387 | struct compact_control *cc) |
345 | { | 388 | { |
346 | unsigned int order; | 389 | unsigned int order; |
347 | unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order); | 390 | unsigned long watermark; |
348 | 391 | ||
349 | if (fatal_signal_pending(current)) | 392 | if (fatal_signal_pending(current)) |
350 | return COMPACT_PARTIAL; | 393 | return COMPACT_PARTIAL; |
@@ -354,12 +397,27 @@ static int compact_finished(struct zone *zone, | |||
354 | return COMPACT_COMPLETE; | 397 | return COMPACT_COMPLETE; |
355 | 398 | ||
356 | /* Compaction run is not finished if the watermark is not met */ | 399 | /* Compaction run is not finished if the watermark is not met */ |
400 | if (cc->compact_mode != COMPACT_MODE_KSWAPD) | ||
401 | watermark = low_wmark_pages(zone); | ||
402 | else | ||
403 | watermark = high_wmark_pages(zone); | ||
404 | watermark += (1 << cc->order); | ||
405 | |||
357 | if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) | 406 | if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) |
358 | return COMPACT_CONTINUE; | 407 | return COMPACT_CONTINUE; |
359 | 408 | ||
360 | if (cc->order == -1) | 409 | if (cc->order == -1) |
361 | return COMPACT_CONTINUE; | 410 | return COMPACT_CONTINUE; |
362 | 411 | ||
412 | /* | ||
413 | * Generating only one page of the right order is not enough | ||
414 | * for kswapd, we must continue until we're above the high | ||
415 | * watermark as a pool for high order GFP_ATOMIC allocations | ||
416 | * too. | ||
417 | */ | ||
418 | if (cc->compact_mode == COMPACT_MODE_KSWAPD) | ||
419 | return COMPACT_CONTINUE; | ||
420 | |||
363 | /* Direct compactor: Is a suitable page free? */ | 421 | /* Direct compactor: Is a suitable page free? */ |
364 | for (order = cc->order; order < MAX_ORDER; order++) { | 422 | for (order = cc->order; order < MAX_ORDER; order++) { |
365 | /* Job done if page is free of the right migratetype */ | 423 | /* Job done if page is free of the right migratetype */ |
@@ -374,10 +432,62 @@ static int compact_finished(struct zone *zone, | |||
374 | return COMPACT_CONTINUE; | 432 | return COMPACT_CONTINUE; |
375 | } | 433 | } |
376 | 434 | ||
435 | /* | ||
436 | * compaction_suitable: Is this suitable to run compaction on this zone now? | ||
437 | * Returns | ||
438 | * COMPACT_SKIPPED - If there are too few free pages for compaction | ||
439 | * COMPACT_PARTIAL - If the allocation would succeed without compaction | ||
440 | * COMPACT_CONTINUE - If compaction should run now | ||
441 | */ | ||
442 | unsigned long compaction_suitable(struct zone *zone, int order) | ||
443 | { | ||
444 | int fragindex; | ||
445 | unsigned long watermark; | ||
446 | |||
447 | /* | ||
448 | * Watermarks for order-0 must be met for compaction. Note the 2UL. | ||
449 | * This is because during migration, copies of pages need to be | ||
450 | * allocated and for a short time, the footprint is higher | ||
451 | */ | ||
452 | watermark = low_wmark_pages(zone) + (2UL << order); | ||
453 | if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) | ||
454 | return COMPACT_SKIPPED; | ||
455 | |||
456 | /* | ||
457 | * fragmentation index determines if allocation failures are due to | ||
458 | * low memory or external fragmentation | ||
459 | * | ||
460 | * index of -1 implies allocations might succeed dependingon watermarks | ||
461 | * index towards 0 implies failure is due to lack of memory | ||
462 | * index towards 1000 implies failure is due to fragmentation | ||
463 | * | ||
464 | * Only compact if a failure would be due to fragmentation. | ||
465 | */ | ||
466 | fragindex = fragmentation_index(zone, order); | ||
467 | if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold) | ||
468 | return COMPACT_SKIPPED; | ||
469 | |||
470 | if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) | ||
471 | return COMPACT_PARTIAL; | ||
472 | |||
473 | return COMPACT_CONTINUE; | ||
474 | } | ||
475 | |||
377 | static int compact_zone(struct zone *zone, struct compact_control *cc) | 476 | static int compact_zone(struct zone *zone, struct compact_control *cc) |
378 | { | 477 | { |
379 | int ret; | 478 | int ret; |
380 | 479 | ||
480 | ret = compaction_suitable(zone, cc->order); | ||
481 | switch (ret) { | ||
482 | case COMPACT_PARTIAL: | ||
483 | case COMPACT_SKIPPED: | ||
484 | /* Compaction is likely to fail */ | ||
485 | return ret; | ||
486 | case COMPACT_CONTINUE: | ||
487 | /* Fall through to compaction */ | ||
488 | ; | ||
489 | } | ||
490 | |||
381 | /* Setup to move all movable pages to the end of the zone */ | 491 | /* Setup to move all movable pages to the end of the zone */ |
382 | cc->migrate_pfn = zone->zone_start_pfn; | 492 | cc->migrate_pfn = zone->zone_start_pfn; |
383 | cc->free_pfn = cc->migrate_pfn + zone->spanned_pages; | 493 | cc->free_pfn = cc->migrate_pfn + zone->spanned_pages; |
@@ -393,7 +503,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) | |||
393 | 503 | ||
394 | nr_migrate = cc->nr_migratepages; | 504 | nr_migrate = cc->nr_migratepages; |
395 | migrate_pages(&cc->migratepages, compaction_alloc, | 505 | migrate_pages(&cc->migratepages, compaction_alloc, |
396 | (unsigned long)cc, 0); | 506 | (unsigned long)cc, false, |
507 | cc->sync); | ||
397 | update_nr_listpages(cc); | 508 | update_nr_listpages(cc); |
398 | nr_remaining = cc->nr_migratepages; | 509 | nr_remaining = cc->nr_migratepages; |
399 | 510 | ||
@@ -401,6 +512,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) | |||
401 | count_vm_events(COMPACTPAGES, nr_migrate - nr_remaining); | 512 | count_vm_events(COMPACTPAGES, nr_migrate - nr_remaining); |
402 | if (nr_remaining) | 513 | if (nr_remaining) |
403 | count_vm_events(COMPACTPAGEFAILED, nr_remaining); | 514 | count_vm_events(COMPACTPAGEFAILED, nr_remaining); |
515 | trace_mm_compaction_migratepages(nr_migrate - nr_remaining, | ||
516 | nr_remaining); | ||
404 | 517 | ||
405 | /* Release LRU pages not migrated */ | 518 | /* Release LRU pages not migrated */ |
406 | if (!list_empty(&cc->migratepages)) { | 519 | if (!list_empty(&cc->migratepages)) { |
@@ -417,8 +530,10 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) | |||
417 | return ret; | 530 | return ret; |
418 | } | 531 | } |
419 | 532 | ||
420 | static unsigned long compact_zone_order(struct zone *zone, | 533 | unsigned long compact_zone_order(struct zone *zone, |
421 | int order, gfp_t gfp_mask) | 534 | int order, gfp_t gfp_mask, |
535 | bool sync, | ||
536 | int compact_mode) | ||
422 | { | 537 | { |
423 | struct compact_control cc = { | 538 | struct compact_control cc = { |
424 | .nr_freepages = 0, | 539 | .nr_freepages = 0, |
@@ -426,6 +541,8 @@ static unsigned long compact_zone_order(struct zone *zone, | |||
426 | .order = order, | 541 | .order = order, |
427 | .migratetype = allocflags_to_migratetype(gfp_mask), | 542 | .migratetype = allocflags_to_migratetype(gfp_mask), |
428 | .zone = zone, | 543 | .zone = zone, |
544 | .sync = sync, | ||
545 | .compact_mode = compact_mode, | ||
429 | }; | 546 | }; |
430 | INIT_LIST_HEAD(&cc.freepages); | 547 | INIT_LIST_HEAD(&cc.freepages); |
431 | INIT_LIST_HEAD(&cc.migratepages); | 548 | INIT_LIST_HEAD(&cc.migratepages); |
@@ -441,16 +558,17 @@ int sysctl_extfrag_threshold = 500; | |||
441 | * @order: The order of the current allocation | 558 | * @order: The order of the current allocation |
442 | * @gfp_mask: The GFP mask of the current allocation | 559 | * @gfp_mask: The GFP mask of the current allocation |
443 | * @nodemask: The allowed nodes to allocate from | 560 | * @nodemask: The allowed nodes to allocate from |
561 | * @sync: Whether migration is synchronous or not | ||
444 | * | 562 | * |
445 | * This is the main entry point for direct page compaction. | 563 | * This is the main entry point for direct page compaction. |
446 | */ | 564 | */ |
447 | unsigned long try_to_compact_pages(struct zonelist *zonelist, | 565 | unsigned long try_to_compact_pages(struct zonelist *zonelist, |
448 | int order, gfp_t gfp_mask, nodemask_t *nodemask) | 566 | int order, gfp_t gfp_mask, nodemask_t *nodemask, |
567 | bool sync) | ||
449 | { | 568 | { |
450 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | 569 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); |
451 | int may_enter_fs = gfp_mask & __GFP_FS; | 570 | int may_enter_fs = gfp_mask & __GFP_FS; |
452 | int may_perform_io = gfp_mask & __GFP_IO; | 571 | int may_perform_io = gfp_mask & __GFP_IO; |
453 | unsigned long watermark; | ||
454 | struct zoneref *z; | 572 | struct zoneref *z; |
455 | struct zone *zone; | 573 | struct zone *zone; |
456 | int rc = COMPACT_SKIPPED; | 574 | int rc = COMPACT_SKIPPED; |
@@ -460,7 +578,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, | |||
460 | * made because an assumption is made that the page allocator can satisfy | 578 | * made because an assumption is made that the page allocator can satisfy |
461 | * the "cheaper" orders without taking special steps | 579 | * the "cheaper" orders without taking special steps |
462 | */ | 580 | */ |
463 | if (order <= PAGE_ALLOC_COSTLY_ORDER || !may_enter_fs || !may_perform_io) | 581 | if (!order || !may_enter_fs || !may_perform_io) |
464 | return rc; | 582 | return rc; |
465 | 583 | ||
466 | count_vm_event(COMPACTSTALL); | 584 | count_vm_event(COMPACTSTALL); |
@@ -468,43 +586,14 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, | |||
468 | /* Compact each zone in the list */ | 586 | /* Compact each zone in the list */ |
469 | for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, | 587 | for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, |
470 | nodemask) { | 588 | nodemask) { |
471 | int fragindex; | ||
472 | int status; | 589 | int status; |
473 | 590 | ||
474 | /* | 591 | status = compact_zone_order(zone, order, gfp_mask, sync, |
475 | * Watermarks for order-0 must be met for compaction. Note | 592 | COMPACT_MODE_DIRECT_RECLAIM); |
476 | * the 2UL. This is because during migration, copies of | ||
477 | * pages need to be allocated and for a short time, the | ||
478 | * footprint is higher | ||
479 | */ | ||
480 | watermark = low_wmark_pages(zone) + (2UL << order); | ||
481 | if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) | ||
482 | continue; | ||
483 | |||
484 | /* | ||
485 | * fragmentation index determines if allocation failures are | ||
486 | * due to low memory or external fragmentation | ||
487 | * | ||
488 | * index of -1 implies allocations might succeed depending | ||
489 | * on watermarks | ||
490 | * index towards 0 implies failure is due to lack of memory | ||
491 | * index towards 1000 implies failure is due to fragmentation | ||
492 | * | ||
493 | * Only compact if a failure would be due to fragmentation. | ||
494 | */ | ||
495 | fragindex = fragmentation_index(zone, order); | ||
496 | if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold) | ||
497 | continue; | ||
498 | |||
499 | if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) { | ||
500 | rc = COMPACT_PARTIAL; | ||
501 | break; | ||
502 | } | ||
503 | |||
504 | status = compact_zone_order(zone, order, gfp_mask); | ||
505 | rc = max(status, rc); | 593 | rc = max(status, rc); |
506 | 594 | ||
507 | if (zone_watermark_ok(zone, order, watermark, 0, 0)) | 595 | /* If a normal allocation would succeed, stop compacting */ |
596 | if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) | ||
508 | break; | 597 | break; |
509 | } | 598 | } |
510 | 599 | ||
@@ -531,6 +620,7 @@ static int compact_node(int nid) | |||
531 | .nr_freepages = 0, | 620 | .nr_freepages = 0, |
532 | .nr_migratepages = 0, | 621 | .nr_migratepages = 0, |
533 | .order = -1, | 622 | .order = -1, |
623 | .compact_mode = COMPACT_MODE_DIRECT_RECLAIM, | ||
534 | }; | 624 | }; |
535 | 625 | ||
536 | zone = &pgdat->node_zones[zoneid]; | 626 | zone = &pgdat->node_zones[zoneid]; |