diff options
author | Mel Gorman <mel@csn.ul.ie> | 2010-05-24 17:32:30 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-25 11:06:59 -0400 |
commit | 56de7263fcf3eb10c8dcdf8d59a9cec831795f3f (patch) | |
tree | 164637c0b678e20adfdcec4129563d9234faf405 /mm/compaction.c | |
parent | ed4a6d7f0676db50b5023cc01f6cda82a2f2a307 (diff) |
mm: compaction: direct compact when a high-order allocation fails
Ordinarily when a high-order allocation fails, direct reclaim is entered
to free pages to satisfy the allocation. With this patch, it is
determined if an allocation failed due to external fragmentation instead
of low memory and if so, the calling process will compact until a suitable
page is freed. Compaction by moving pages in memory is considerably
cheaper than paging out to disk and works where there are locked pages or
no swap. If compaction fails to free a page of a suitable size, then
reclaim will still occur.
Direct compaction returns as soon as possible. As each block is
compacted, it is checked if a suitable page has been freed and if so, it
returns.
[akpm@linux-foundation.org: Fix build errors]
[aarcange@redhat.com: fix count_vm_event preempt in memory compaction direct reclaim]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/compaction.c')
-rw-r--r-- | mm/compaction.c | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/mm/compaction.c b/mm/compaction.c index f61f77983ff4..9583e193dc47 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -35,6 +35,8 @@ struct compact_control { | |||
35 | unsigned long nr_anon; | 35 | unsigned long nr_anon; |
36 | unsigned long nr_file; | 36 | unsigned long nr_file; |
37 | 37 | ||
38 | unsigned int order; /* order a direct compactor needs */ | ||
39 | int migratetype; /* MOVABLE, RECLAIMABLE etc */ | ||
38 | struct zone *zone; | 40 | struct zone *zone; |
39 | }; | 41 | }; |
40 | 42 | ||
@@ -341,6 +343,9 @@ static void update_nr_listpages(struct compact_control *cc) | |||
341 | static int compact_finished(struct zone *zone, | 343 | static int compact_finished(struct zone *zone, |
342 | struct compact_control *cc) | 344 | struct compact_control *cc) |
343 | { | 345 | { |
346 | unsigned int order; | ||
347 | unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order); | ||
348 | |||
344 | if (fatal_signal_pending(current)) | 349 | if (fatal_signal_pending(current)) |
345 | return COMPACT_PARTIAL; | 350 | return COMPACT_PARTIAL; |
346 | 351 | ||
@@ -348,6 +353,24 @@ static int compact_finished(struct zone *zone, | |||
348 | if (cc->free_pfn <= cc->migrate_pfn) | 353 | if (cc->free_pfn <= cc->migrate_pfn) |
349 | return COMPACT_COMPLETE; | 354 | return COMPACT_COMPLETE; |
350 | 355 | ||
356 | /* Compaction run is not finished if the watermark is not met */ | ||
357 | if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) | ||
358 | return COMPACT_CONTINUE; | ||
359 | |||
360 | if (cc->order == -1) | ||
361 | return COMPACT_CONTINUE; | ||
362 | |||
363 | /* Direct compactor: Is a suitable page free? */ | ||
364 | for (order = cc->order; order < MAX_ORDER; order++) { | ||
365 | /* Job done if page is free of the right migratetype */ | ||
366 | if (!list_empty(&zone->free_area[order].free_list[cc->migratetype])) | ||
367 | return COMPACT_PARTIAL; | ||
368 | |||
369 | /* Job done if allocation would set block type */ | ||
370 | if (order >= pageblock_order && zone->free_area[order].nr_free) | ||
371 | return COMPACT_PARTIAL; | ||
372 | } | ||
373 | |||
351 | return COMPACT_CONTINUE; | 374 | return COMPACT_CONTINUE; |
352 | } | 375 | } |
353 | 376 | ||
@@ -394,6 +417,99 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) | |||
394 | return ret; | 417 | return ret; |
395 | } | 418 | } |
396 | 419 | ||
420 | static unsigned long compact_zone_order(struct zone *zone, | ||
421 | int order, gfp_t gfp_mask) | ||
422 | { | ||
423 | struct compact_control cc = { | ||
424 | .nr_freepages = 0, | ||
425 | .nr_migratepages = 0, | ||
426 | .order = order, | ||
427 | .migratetype = allocflags_to_migratetype(gfp_mask), | ||
428 | .zone = zone, | ||
429 | }; | ||
430 | INIT_LIST_HEAD(&cc.freepages); | ||
431 | INIT_LIST_HEAD(&cc.migratepages); | ||
432 | |||
433 | return compact_zone(zone, &cc); | ||
434 | } | ||
435 | |||
436 | /** | ||
437 | * try_to_compact_pages - Direct compact to satisfy a high-order allocation | ||
438 | * @zonelist: The zonelist used for the current allocation | ||
439 | * @order: The order of the current allocation | ||
440 | * @gfp_mask: The GFP mask of the current allocation | ||
441 | * @nodemask: The allowed nodes to allocate from | ||
442 | * | ||
443 | * This is the main entry point for direct page compaction. | ||
444 | */ | ||
445 | unsigned long try_to_compact_pages(struct zonelist *zonelist, | ||
446 | int order, gfp_t gfp_mask, nodemask_t *nodemask) | ||
447 | { | ||
448 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | ||
449 | int may_enter_fs = gfp_mask & __GFP_FS; | ||
450 | int may_perform_io = gfp_mask & __GFP_IO; | ||
451 | unsigned long watermark; | ||
452 | struct zoneref *z; | ||
453 | struct zone *zone; | ||
454 | int rc = COMPACT_SKIPPED; | ||
455 | |||
456 | /* | ||
457 | * Check whether it is worth even starting compaction. The order check is | ||
458 | * made because an assumption is made that the page allocator can satisfy | ||
459 | * the "cheaper" orders without taking special steps | ||
460 | */ | ||
461 | if (order <= PAGE_ALLOC_COSTLY_ORDER || !may_enter_fs || !may_perform_io) | ||
462 | return rc; | ||
463 | |||
464 | count_vm_event(COMPACTSTALL); | ||
465 | |||
466 | /* Compact each zone in the list */ | ||
467 | for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, | ||
468 | nodemask) { | ||
469 | int fragindex; | ||
470 | int status; | ||
471 | |||
472 | /* | ||
473 | * Watermarks for order-0 must be met for compaction. Note | ||
474 | * the 2UL. This is because during migration, copies of | ||
475 | * pages need to be allocated and for a short time, the | ||
476 | * footprint is higher | ||
477 | */ | ||
478 | watermark = low_wmark_pages(zone) + (2UL << order); | ||
479 | if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) | ||
480 | continue; | ||
481 | |||
482 | /* | ||
483 | * fragmentation index determines if allocation failures are | ||
484 | * due to low memory or external fragmentation | ||
485 | * | ||
486 | * index of -1 implies allocations might succeed depending | ||
487 | * on watermarks | ||
488 | * index towards 0 implies failure is due to lack of memory | ||
489 | * index towards 1000 implies failure is due to fragmentation | ||
490 | * | ||
491 | * Only compact if a failure would be due to fragmentation. | ||
492 | */ | ||
493 | fragindex = fragmentation_index(zone, order); | ||
494 | if (fragindex >= 0 && fragindex <= 500) | ||
495 | continue; | ||
496 | |||
497 | if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) { | ||
498 | rc = COMPACT_PARTIAL; | ||
499 | break; | ||
500 | } | ||
501 | |||
502 | status = compact_zone_order(zone, order, gfp_mask); | ||
503 | rc = max(status, rc); | ||
504 | |||
505 | if (zone_watermark_ok(zone, order, watermark, 0, 0)) | ||
506 | break; | ||
507 | } | ||
508 | |||
509 | return rc; | ||
510 | } | ||
511 | |||
512 | |||
397 | /* Compact all zones within a node */ | 513 | /* Compact all zones within a node */ |
398 | static int compact_node(int nid) | 514 | static int compact_node(int nid) |
399 | { | 515 | { |
@@ -412,6 +528,7 @@ static int compact_node(int nid) | |||
412 | struct compact_control cc = { | 528 | struct compact_control cc = { |
413 | .nr_freepages = 0, | 529 | .nr_freepages = 0, |
414 | .nr_migratepages = 0, | 530 | .nr_migratepages = 0, |
531 | .order = -1, | ||
415 | }; | 532 | }; |
416 | 533 | ||
417 | zone = &pgdat->node_zones[zoneid]; | 534 | zone = &pgdat->node_zones[zoneid]; |