aboutsummaryrefslogtreecommitdiffstats
path: root/mm/compaction.c
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2010-05-24 17:32:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-25 11:06:59 -0400
commit56de7263fcf3eb10c8dcdf8d59a9cec831795f3f (patch)
tree164637c0b678e20adfdcec4129563d9234faf405 /mm/compaction.c
parented4a6d7f0676db50b5023cc01f6cda82a2f2a307 (diff)
mm: compaction: direct compact when a high-order allocation fails
Ordinarily when a high-order allocation fails, direct reclaim is entered to free pages to satisfy the allocation. With this patch, it is determined if an allocation failed due to external fragmentation instead of low memory and if so, the calling process will compact until a suitable page is freed. Compaction by moving pages in memory is considerably cheaper than paging out to disk and works where there are locked pages or no swap. If compaction fails to free a page of a suitable size, then reclaim will still occur. Direct compaction returns as soon as possible. As each block is compacted, it is checked if a suitable page has been freed and if so, it returns. [akpm@linux-foundation.org: Fix build errors] [aarcange@redhat.com: fix count_vm_event preempt in memory compaction direct reclaim] Signed-off-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Rik van Riel <riel@redhat.com> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/compaction.c')
-rw-r--r--mm/compaction.c117
1 files changed, 117 insertions, 0 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index f61f77983ff4..9583e193dc47 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -35,6 +35,8 @@ struct compact_control {
35 unsigned long nr_anon; 35 unsigned long nr_anon;
36 unsigned long nr_file; 36 unsigned long nr_file;
37 37
38 unsigned int order; /* order a direct compactor needs */
39 int migratetype; /* MOVABLE, RECLAIMABLE etc */
38 struct zone *zone; 40 struct zone *zone;
39}; 41};
40 42
@@ -341,6 +343,9 @@ static void update_nr_listpages(struct compact_control *cc)
341static int compact_finished(struct zone *zone, 343static int compact_finished(struct zone *zone,
342 struct compact_control *cc) 344 struct compact_control *cc)
343{ 345{
346 unsigned int order;
347 unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order);
348
344 if (fatal_signal_pending(current)) 349 if (fatal_signal_pending(current))
345 return COMPACT_PARTIAL; 350 return COMPACT_PARTIAL;
346 351
@@ -348,6 +353,24 @@ static int compact_finished(struct zone *zone,
348 if (cc->free_pfn <= cc->migrate_pfn) 353 if (cc->free_pfn <= cc->migrate_pfn)
349 return COMPACT_COMPLETE; 354 return COMPACT_COMPLETE;
350 355
356 /* Compaction run is not finished if the watermark is not met */
357 if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
358 return COMPACT_CONTINUE;
359
360 if (cc->order == -1)
361 return COMPACT_CONTINUE;
362
363 /* Direct compactor: Is a suitable page free? */
364 for (order = cc->order; order < MAX_ORDER; order++) {
365 /* Job done if page is free of the right migratetype */
366 if (!list_empty(&zone->free_area[order].free_list[cc->migratetype]))
367 return COMPACT_PARTIAL;
368
369 /* Job done if allocation would set block type */
370 if (order >= pageblock_order && zone->free_area[order].nr_free)
371 return COMPACT_PARTIAL;
372 }
373
351 return COMPACT_CONTINUE; 374 return COMPACT_CONTINUE;
352} 375}
353 376
@@ -394,6 +417,99 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
394 return ret; 417 return ret;
395} 418}
396 419
420static unsigned long compact_zone_order(struct zone *zone,
421 int order, gfp_t gfp_mask)
422{
423 struct compact_control cc = {
424 .nr_freepages = 0,
425 .nr_migratepages = 0,
426 .order = order,
427 .migratetype = allocflags_to_migratetype(gfp_mask),
428 .zone = zone,
429 };
430 INIT_LIST_HEAD(&cc.freepages);
431 INIT_LIST_HEAD(&cc.migratepages);
432
433 return compact_zone(zone, &cc);
434}
435
436/**
437 * try_to_compact_pages - Direct compact to satisfy a high-order allocation
438 * @zonelist: The zonelist used for the current allocation
439 * @order: The order of the current allocation
440 * @gfp_mask: The GFP mask of the current allocation
441 * @nodemask: The allowed nodes to allocate from
442 *
443 * This is the main entry point for direct page compaction.
444 */
445unsigned long try_to_compact_pages(struct zonelist *zonelist,
446 int order, gfp_t gfp_mask, nodemask_t *nodemask)
447{
448 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
449 int may_enter_fs = gfp_mask & __GFP_FS;
450 int may_perform_io = gfp_mask & __GFP_IO;
451 unsigned long watermark;
452 struct zoneref *z;
453 struct zone *zone;
454 int rc = COMPACT_SKIPPED;
455
456 /*
457 * Check whether it is worth even starting compaction. The order check is
458 * made because an assumption is made that the page allocator can satisfy
459 * the "cheaper" orders without taking special steps
460 */
461 if (order <= PAGE_ALLOC_COSTLY_ORDER || !may_enter_fs || !may_perform_io)
462 return rc;
463
464 count_vm_event(COMPACTSTALL);
465
466 /* Compact each zone in the list */
467 for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
468 nodemask) {
469 int fragindex;
470 int status;
471
472 /*
473 * Watermarks for order-0 must be met for compaction. Note
474 * the 2UL. This is because during migration, copies of
475 * pages need to be allocated and for a short time, the
476 * footprint is higher
477 */
478 watermark = low_wmark_pages(zone) + (2UL << order);
479 if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
480 continue;
481
482 /*
483 * fragmentation index determines if allocation failures are
484 * due to low memory or external fragmentation
485 *
486 * index of -1 implies allocations might succeed depending
487 * on watermarks
488 * index towards 0 implies failure is due to lack of memory
489 * index towards 1000 implies failure is due to fragmentation
490 *
491 * Only compact if a failure would be due to fragmentation.
492 */
493 fragindex = fragmentation_index(zone, order);
494 if (fragindex >= 0 && fragindex <= 500)
495 continue;
496
497 if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) {
498 rc = COMPACT_PARTIAL;
499 break;
500 }
501
502 status = compact_zone_order(zone, order, gfp_mask);
503 rc = max(status, rc);
504
505 if (zone_watermark_ok(zone, order, watermark, 0, 0))
506 break;
507 }
508
509 return rc;
510}
511
512
397/* Compact all zones within a node */ 513/* Compact all zones within a node */
398static int compact_node(int nid) 514static int compact_node(int nid)
399{ 515{
@@ -412,6 +528,7 @@ static int compact_node(int nid)
412 struct compact_control cc = { 528 struct compact_control cc = {
413 .nr_freepages = 0, 529 .nr_freepages = 0,
414 .nr_migratepages = 0, 530 .nr_migratepages = 0,
531 .order = -1,
415 }; 532 };
416 533
417 zone = &pgdat->node_zones[zoneid]; 534 zone = &pgdat->node_zones[zoneid];