aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2010-05-24 17:32:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-25 11:06:59 -0400
commit56de7263fcf3eb10c8dcdf8d59a9cec831795f3f (patch)
tree164637c0b678e20adfdcec4129563d9234faf405
parented4a6d7f0676db50b5023cc01f6cda82a2f2a307 (diff)
mm: compaction: direct compact when a high-order allocation fails
Ordinarily when a high-order allocation fails, direct reclaim is entered to free pages to satisfy the allocation. With this patch, it is determined if an allocation failed due to external fragmentation instead of low memory and if so, the calling process will compact until a suitable page is freed. Compaction by moving pages in memory is considerably cheaper than paging out to disk and works where there are locked pages or no swap. If compaction fails to free a page of a suitable size, then reclaim will still occur. Direct compaction returns as soon as possible. As each block is compacted, it is checked if a suitable page has been freed and if so, it returns. [akpm@linux-foundation.org: Fix build errors] [aarcange@redhat.com: fix count_vm_event preempt in memory compaction direct reclaim] Signed-off-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Rik van Riel <riel@redhat.com> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/compaction.h24
-rw-r--r--include/linux/vmstat.h1
-rw-r--r--mm/compaction.c117
-rw-r--r--mm/page_alloc.c63
-rw-r--r--mm/vmstat.c16
5 files changed, 215 insertions, 6 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index ba98cfe0ae15..eed40ec4280b 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -1,15 +1,31 @@
1#ifndef _LINUX_COMPACTION_H 1#ifndef _LINUX_COMPACTION_H
2#define _LINUX_COMPACTION_H 2#define _LINUX_COMPACTION_H
3 3
4/* Return values for compact_zone() */ 4/* Return values for compact_zone() and try_to_compact_pages() */
5#define COMPACT_CONTINUE 0 5/* compaction didn't start as it was not possible or direct reclaim was more suitable */
6#define COMPACT_PARTIAL 1 6#define COMPACT_SKIPPED 0
7#define COMPACT_COMPLETE 2 7/* compaction should continue to another pageblock */
8#define COMPACT_CONTINUE 1
9/* direct compaction partially compacted a zone and there are suitable pages */
10#define COMPACT_PARTIAL 2
11/* The full zone was compacted */
12#define COMPACT_COMPLETE 3
8 13
9#ifdef CONFIG_COMPACTION 14#ifdef CONFIG_COMPACTION
10extern int sysctl_compact_memory; 15extern int sysctl_compact_memory;
11extern int sysctl_compaction_handler(struct ctl_table *table, int write, 16extern int sysctl_compaction_handler(struct ctl_table *table, int write,
12 void __user *buffer, size_t *length, loff_t *ppos); 17 void __user *buffer, size_t *length, loff_t *ppos);
18
19extern int fragmentation_index(struct zone *zone, unsigned int order);
20extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
21 int order, gfp_t gfp_mask, nodemask_t *mask);
22#else
23static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
24 int order, gfp_t gfp_mask, nodemask_t *nodemask)
25{
26 return COMPACT_CONTINUE;
27}
28
13#endif /* CONFIG_COMPACTION */ 29#endif /* CONFIG_COMPACTION */
14 30
15#if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) 31#if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index b421d1b22b62..7f43ccdc1d38 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -45,6 +45,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
45 PAGEOUTRUN, ALLOCSTALL, PGROTATED, 45 PAGEOUTRUN, ALLOCSTALL, PGROTATED,
46#ifdef CONFIG_COMPACTION 46#ifdef CONFIG_COMPACTION
47 COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED, 47 COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED,
48 COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
48#endif 49#endif
49#ifdef CONFIG_HUGETLB_PAGE 50#ifdef CONFIG_HUGETLB_PAGE
50 HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, 51 HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
diff --git a/mm/compaction.c b/mm/compaction.c
index f61f77983ff4..9583e193dc47 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -35,6 +35,8 @@ struct compact_control {
35 unsigned long nr_anon; 35 unsigned long nr_anon;
36 unsigned long nr_file; 36 unsigned long nr_file;
37 37
38 unsigned int order; /* order a direct compactor needs */
39 int migratetype; /* MOVABLE, RECLAIMABLE etc */
38 struct zone *zone; 40 struct zone *zone;
39}; 41};
40 42
@@ -341,6 +343,9 @@ static void update_nr_listpages(struct compact_control *cc)
341static int compact_finished(struct zone *zone, 343static int compact_finished(struct zone *zone,
342 struct compact_control *cc) 344 struct compact_control *cc)
343{ 345{
346 unsigned int order;
347 unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order);
348
344 if (fatal_signal_pending(current)) 349 if (fatal_signal_pending(current))
345 return COMPACT_PARTIAL; 350 return COMPACT_PARTIAL;
346 351
@@ -348,6 +353,24 @@ static int compact_finished(struct zone *zone,
348 if (cc->free_pfn <= cc->migrate_pfn) 353 if (cc->free_pfn <= cc->migrate_pfn)
349 return COMPACT_COMPLETE; 354 return COMPACT_COMPLETE;
350 355
356 /* Compaction run is not finished if the watermark is not met */
357 if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
358 return COMPACT_CONTINUE;
359
360 if (cc->order == -1)
361 return COMPACT_CONTINUE;
362
363 /* Direct compactor: Is a suitable page free? */
364 for (order = cc->order; order < MAX_ORDER; order++) {
365 /* Job done if page is free of the right migratetype */
366 if (!list_empty(&zone->free_area[order].free_list[cc->migratetype]))
367 return COMPACT_PARTIAL;
368
369 /* Job done if allocation would set block type */
370 if (order >= pageblock_order && zone->free_area[order].nr_free)
371 return COMPACT_PARTIAL;
372 }
373
351 return COMPACT_CONTINUE; 374 return COMPACT_CONTINUE;
352} 375}
353 376
@@ -394,6 +417,99 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
394 return ret; 417 return ret;
395} 418}
396 419
420static unsigned long compact_zone_order(struct zone *zone,
421 int order, gfp_t gfp_mask)
422{
423 struct compact_control cc = {
424 .nr_freepages = 0,
425 .nr_migratepages = 0,
426 .order = order,
427 .migratetype = allocflags_to_migratetype(gfp_mask),
428 .zone = zone,
429 };
430 INIT_LIST_HEAD(&cc.freepages);
431 INIT_LIST_HEAD(&cc.migratepages);
432
433 return compact_zone(zone, &cc);
434}
435
436/**
437 * try_to_compact_pages - Direct compact to satisfy a high-order allocation
438 * @zonelist: The zonelist used for the current allocation
439 * @order: The order of the current allocation
440 * @gfp_mask: The GFP mask of the current allocation
441 * @nodemask: The allowed nodes to allocate from
442 *
443 * This is the main entry point for direct page compaction.
444 */
445unsigned long try_to_compact_pages(struct zonelist *zonelist,
446 int order, gfp_t gfp_mask, nodemask_t *nodemask)
447{
448 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
449 int may_enter_fs = gfp_mask & __GFP_FS;
450 int may_perform_io = gfp_mask & __GFP_IO;
451 unsigned long watermark;
452 struct zoneref *z;
453 struct zone *zone;
454 int rc = COMPACT_SKIPPED;
455
456 /*
457 * Check whether it is worth even starting compaction. The order check is
458 * made because an assumption is made that the page allocator can satisfy
459 * the "cheaper" orders without taking special steps
460 */
461 if (order <= PAGE_ALLOC_COSTLY_ORDER || !may_enter_fs || !may_perform_io)
462 return rc;
463
464 count_vm_event(COMPACTSTALL);
465
466 /* Compact each zone in the list */
467 for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
468 nodemask) {
469 int fragindex;
470 int status;
471
472 /*
473 * Watermarks for order-0 must be met for compaction. Note
474 * the 2UL. This is because during migration, copies of
475 * pages need to be allocated and for a short time, the
476 * footprint is higher
477 */
478 watermark = low_wmark_pages(zone) + (2UL << order);
479 if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
480 continue;
481
482 /*
483 * fragmentation index determines if allocation failures are
484 * due to low memory or external fragmentation
485 *
486 * index of -1 implies allocations might succeed depending
487 * on watermarks
488 * index towards 0 implies failure is due to lack of memory
489 * index towards 1000 implies failure is due to fragmentation
490 *
491 * Only compact if a failure would be due to fragmentation.
492 */
493 fragindex = fragmentation_index(zone, order);
494 if (fragindex >= 0 && fragindex <= 500)
495 continue;
496
497 if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) {
498 rc = COMPACT_PARTIAL;
499 break;
500 }
501
502 status = compact_zone_order(zone, order, gfp_mask);
503 rc = max(status, rc);
504
505 if (zone_watermark_ok(zone, order, watermark, 0, 0))
506 break;
507 }
508
509 return rc;
510}
511
512
397/* Compact all zones within a node */ 513/* Compact all zones within a node */
398static int compact_node(int nid) 514static int compact_node(int nid)
399{ 515{
@@ -412,6 +528,7 @@ static int compact_node(int nid)
412 struct compact_control cc = { 528 struct compact_control cc = {
413 .nr_freepages = 0, 529 .nr_freepages = 0,
414 .nr_migratepages = 0, 530 .nr_migratepages = 0,
531 .order = -1,
415 }; 532 };
416 533
417 zone = &pgdat->node_zones[zoneid]; 534 zone = &pgdat->node_zones[zoneid];
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c54376a09f30..cd88a860f088 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -49,6 +49,7 @@
49#include <linux/debugobjects.h> 49#include <linux/debugobjects.h>
50#include <linux/kmemleak.h> 50#include <linux/kmemleak.h>
51#include <linux/memory.h> 51#include <linux/memory.h>
52#include <linux/compaction.h>
52#include <trace/events/kmem.h> 53#include <trace/events/kmem.h>
53#include <linux/ftrace_event.h> 54#include <linux/ftrace_event.h>
54 55
@@ -1758,6 +1759,59 @@ out:
1758 return page; 1759 return page;
1759} 1760}
1760 1761
1762#ifdef CONFIG_COMPACTION
1763/* Try memory compaction for high-order allocations before reclaim */
1764static struct page *
1765__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
1766 struct zonelist *zonelist, enum zone_type high_zoneidx,
1767 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
1768 int migratetype, unsigned long *did_some_progress)
1769{
1770 struct page *page;
1771
1772 if (!order)
1773 return NULL;
1774
1775 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
1776 nodemask);
1777 if (*did_some_progress != COMPACT_SKIPPED) {
1778
1779 /* Page migration frees to the PCP lists but we want merging */
1780 drain_pages(get_cpu());
1781 put_cpu();
1782
1783 page = get_page_from_freelist(gfp_mask, nodemask,
1784 order, zonelist, high_zoneidx,
1785 alloc_flags, preferred_zone,
1786 migratetype);
1787 if (page) {
1788 count_vm_event(COMPACTSUCCESS);
1789 return page;
1790 }
1791
1792 /*
1793 * It's bad if compaction run occurs and fails.
1794 * The most likely reason is that pages exist,
1795 * but not enough to satisfy watermarks.
1796 */
1797 count_vm_event(COMPACTFAIL);
1798
1799 cond_resched();
1800 }
1801
1802 return NULL;
1803}
1804#else
1805static inline struct page *
1806__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
1807 struct zonelist *zonelist, enum zone_type high_zoneidx,
1808 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
1809 int migratetype, unsigned long *did_some_progress)
1810{
1811 return NULL;
1812}
1813#endif /* CONFIG_COMPACTION */
1814
1761/* The really slow allocator path where we enter direct reclaim */ 1815/* The really slow allocator path where we enter direct reclaim */
1762static inline struct page * 1816static inline struct page *
1763__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, 1817__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
@@ -1944,6 +1998,15 @@ rebalance:
1944 if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) 1998 if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL))
1945 goto nopage; 1999 goto nopage;
1946 2000
2001 /* Try direct compaction */
2002 page = __alloc_pages_direct_compact(gfp_mask, order,
2003 zonelist, high_zoneidx,
2004 nodemask,
2005 alloc_flags, preferred_zone,
2006 migratetype, &did_some_progress);
2007 if (page)
2008 goto got_pg;
2009
1947 /* Try direct reclaim and then allocating */ 2010 /* Try direct reclaim and then allocating */
1948 page = __alloc_pages_direct_reclaim(gfp_mask, order, 2011 page = __alloc_pages_direct_reclaim(gfp_mask, order,
1949 zonelist, high_zoneidx, 2012 zonelist, high_zoneidx,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c6aacf51b554..7759941d4e77 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -429,7 +429,7 @@ static void fill_contig_page_info(struct zone *zone,
429 * The value can be used to determine if page reclaim or compaction 429 * The value can be used to determine if page reclaim or compaction
430 * should be used 430 * should be used
431 */ 431 */
432int fragmentation_index(unsigned int order, struct contig_page_info *info) 432static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
433{ 433{
434 unsigned long requested = 1UL << order; 434 unsigned long requested = 1UL << order;
435 435
@@ -448,6 +448,15 @@ int fragmentation_index(unsigned int order, struct contig_page_info *info)
448 */ 448 */
449 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total); 449 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
450} 450}
451
452/* Same as __fragmentation index but allocs contig_page_info on stack */
453int fragmentation_index(struct zone *zone, unsigned int order)
454{
455 struct contig_page_info info;
456
457 fill_contig_page_info(zone, order, &info);
458 return __fragmentation_index(order, &info);
459}
451#endif 460#endif
452 461
453#if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION) 462#if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION)
@@ -771,6 +780,9 @@ static const char * const vmstat_text[] = {
771 "compact_blocks_moved", 780 "compact_blocks_moved",
772 "compact_pages_moved", 781 "compact_pages_moved",
773 "compact_pagemigrate_failed", 782 "compact_pagemigrate_failed",
783 "compact_stall",
784 "compact_fail",
785 "compact_success",
774#endif 786#endif
775 787
776#ifdef CONFIG_HUGETLB_PAGE 788#ifdef CONFIG_HUGETLB_PAGE
@@ -1136,7 +1148,7 @@ static void extfrag_show_print(struct seq_file *m,
1136 zone->name); 1148 zone->name);
1137 for (order = 0; order < MAX_ORDER; ++order) { 1149 for (order = 0; order < MAX_ORDER; ++order) {
1138 fill_contig_page_info(zone, order, &info); 1150 fill_contig_page_info(zone, order, &info);
1139 index = fragmentation_index(order, &info); 1151 index = __fragmentation_index(order, &info);
1140 seq_printf(m, "%d.%03d ", index / 1000, index % 1000); 1152 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1141 } 1153 }
1142 1154