aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/compaction.h24
-rw-r--r--include/linux/vmstat.h1
-rw-r--r--mm/compaction.c117
-rw-r--r--mm/page_alloc.c63
-rw-r--r--mm/vmstat.c16
5 files changed, 215 insertions, 6 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index ba98cfe0ae15..eed40ec4280b 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -1,15 +1,31 @@
1#ifndef _LINUX_COMPACTION_H 1#ifndef _LINUX_COMPACTION_H
2#define _LINUX_COMPACTION_H 2#define _LINUX_COMPACTION_H
3 3
4/* Return values for compact_zone() */ 4/* Return values for compact_zone() and try_to_compact_pages() */
5#define COMPACT_CONTINUE 0 5/* compaction didn't start as it was not possible or direct reclaim was more suitable */
6#define COMPACT_PARTIAL 1 6#define COMPACT_SKIPPED 0
7#define COMPACT_COMPLETE 2 7/* compaction should continue to another pageblock */
8#define COMPACT_CONTINUE 1
9/* direct compaction partially compacted a zone and there are suitable pages */
10#define COMPACT_PARTIAL 2
11/* The full zone was compacted */
12#define COMPACT_COMPLETE 3
8 13
9#ifdef CONFIG_COMPACTION 14#ifdef CONFIG_COMPACTION
10extern int sysctl_compact_memory; 15extern int sysctl_compact_memory;
11extern int sysctl_compaction_handler(struct ctl_table *table, int write, 16extern int sysctl_compaction_handler(struct ctl_table *table, int write,
12 void __user *buffer, size_t *length, loff_t *ppos); 17 void __user *buffer, size_t *length, loff_t *ppos);
18
19extern int fragmentation_index(struct zone *zone, unsigned int order);
20extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
21 int order, gfp_t gfp_mask, nodemask_t *mask);
22#else
23static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
24 int order, gfp_t gfp_mask, nodemask_t *nodemask)
25{
26 return COMPACT_CONTINUE;
27}
28
13#endif /* CONFIG_COMPACTION */ 29#endif /* CONFIG_COMPACTION */
14 30
15#if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) 31#if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index b421d1b22b62..7f43ccdc1d38 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -45,6 +45,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
45 PAGEOUTRUN, ALLOCSTALL, PGROTATED, 45 PAGEOUTRUN, ALLOCSTALL, PGROTATED,
46#ifdef CONFIG_COMPACTION 46#ifdef CONFIG_COMPACTION
47 COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED, 47 COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED,
48 COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
48#endif 49#endif
49#ifdef CONFIG_HUGETLB_PAGE 50#ifdef CONFIG_HUGETLB_PAGE
50 HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, 51 HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
diff --git a/mm/compaction.c b/mm/compaction.c
index f61f77983ff4..9583e193dc47 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -35,6 +35,8 @@ struct compact_control {
35 unsigned long nr_anon; 35 unsigned long nr_anon;
36 unsigned long nr_file; 36 unsigned long nr_file;
37 37
38 unsigned int order; /* order a direct compactor needs */
39 int migratetype; /* MOVABLE, RECLAIMABLE etc */
38 struct zone *zone; 40 struct zone *zone;
39}; 41};
40 42
@@ -341,6 +343,9 @@ static void update_nr_listpages(struct compact_control *cc)
341static int compact_finished(struct zone *zone, 343static int compact_finished(struct zone *zone,
342 struct compact_control *cc) 344 struct compact_control *cc)
343{ 345{
346 unsigned int order;
347 unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order);
348
344 if (fatal_signal_pending(current)) 349 if (fatal_signal_pending(current))
345 return COMPACT_PARTIAL; 350 return COMPACT_PARTIAL;
346 351
@@ -348,6 +353,24 @@ static int compact_finished(struct zone *zone,
348 if (cc->free_pfn <= cc->migrate_pfn) 353 if (cc->free_pfn <= cc->migrate_pfn)
349 return COMPACT_COMPLETE; 354 return COMPACT_COMPLETE;
350 355
356 /* Compaction run is not finished if the watermark is not met */
357 if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
358 return COMPACT_CONTINUE;
359
360 if (cc->order == -1)
361 return COMPACT_CONTINUE;
362
363 /* Direct compactor: Is a suitable page free? */
364 for (order = cc->order; order < MAX_ORDER; order++) {
365 /* Job done if page is free of the right migratetype */
366 if (!list_empty(&zone->free_area[order].free_list[cc->migratetype]))
367 return COMPACT_PARTIAL;
368
369 /* Job done if allocation would set block type */
370 if (order >= pageblock_order && zone->free_area[order].nr_free)
371 return COMPACT_PARTIAL;
372 }
373
351 return COMPACT_CONTINUE; 374 return COMPACT_CONTINUE;
352} 375}
353 376
@@ -394,6 +417,99 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
394 return ret; 417 return ret;
395} 418}
396 419
420static unsigned long compact_zone_order(struct zone *zone,
421 int order, gfp_t gfp_mask)
422{
423 struct compact_control cc = {
424 .nr_freepages = 0,
425 .nr_migratepages = 0,
426 .order = order,
427 .migratetype = allocflags_to_migratetype(gfp_mask),
428 .zone = zone,
429 };
430 INIT_LIST_HEAD(&cc.freepages);
431 INIT_LIST_HEAD(&cc.migratepages);
432
433 return compact_zone(zone, &cc);
434}
435
436/**
437 * try_to_compact_pages - Direct compact to satisfy a high-order allocation
438 * @zonelist: The zonelist used for the current allocation
439 * @order: The order of the current allocation
440 * @gfp_mask: The GFP mask of the current allocation
441 * @nodemask: The allowed nodes to allocate from
442 *
443 * This is the main entry point for direct page compaction.
444 */
445unsigned long try_to_compact_pages(struct zonelist *zonelist,
446 int order, gfp_t gfp_mask, nodemask_t *nodemask)
447{
448 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
449 int may_enter_fs = gfp_mask & __GFP_FS;
450 int may_perform_io = gfp_mask & __GFP_IO;
451 unsigned long watermark;
452 struct zoneref *z;
453 struct zone *zone;
454 int rc = COMPACT_SKIPPED;
455
456 /*
457 * Check whether it is worth even starting compaction. The order check is
458 * made because an assumption is made that the page allocator can satisfy
459 * the "cheaper" orders without taking special steps
460 */
461 if (order <= PAGE_ALLOC_COSTLY_ORDER || !may_enter_fs || !may_perform_io)
462 return rc;
463
464 count_vm_event(COMPACTSTALL);
465
466 /* Compact each zone in the list */
467 for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
468 nodemask) {
469 int fragindex;
470 int status;
471
472 /*
473 * Watermarks for order-0 must be met for compaction. Note
474 * the 2UL. This is because during migration, copies of
475 * pages need to be allocated and for a short time, the
476 * footprint is higher
477 */
478 watermark = low_wmark_pages(zone) + (2UL << order);
479 if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
480 continue;
481
482 /*
483 * fragmentation index determines if allocation failures are
484 * due to low memory or external fragmentation
485 *
486 * index of -1 implies allocations might succeed depending
487 * on watermarks
488 * index towards 0 implies failure is due to lack of memory
489 * index towards 1000 implies failure is due to fragmentation
490 *
491 * Only compact if a failure would be due to fragmentation.
492 */
493 fragindex = fragmentation_index(zone, order);
494 if (fragindex >= 0 && fragindex <= 500)
495 continue;
496
497 if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) {
498 rc = COMPACT_PARTIAL;
499 break;
500 }
501
502 status = compact_zone_order(zone, order, gfp_mask);
503 rc = max(status, rc);
504
505 if (zone_watermark_ok(zone, order, watermark, 0, 0))
506 break;
507 }
508
509 return rc;
510}
511
512
397/* Compact all zones within a node */ 513/* Compact all zones within a node */
398static int compact_node(int nid) 514static int compact_node(int nid)
399{ 515{
@@ -412,6 +528,7 @@ static int compact_node(int nid)
412 struct compact_control cc = { 528 struct compact_control cc = {
413 .nr_freepages = 0, 529 .nr_freepages = 0,
414 .nr_migratepages = 0, 530 .nr_migratepages = 0,
531 .order = -1,
415 }; 532 };
416 533
417 zone = &pgdat->node_zones[zoneid]; 534 zone = &pgdat->node_zones[zoneid];
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c54376a09f30..cd88a860f088 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -49,6 +49,7 @@
49#include <linux/debugobjects.h> 49#include <linux/debugobjects.h>
50#include <linux/kmemleak.h> 50#include <linux/kmemleak.h>
51#include <linux/memory.h> 51#include <linux/memory.h>
52#include <linux/compaction.h>
52#include <trace/events/kmem.h> 53#include <trace/events/kmem.h>
53#include <linux/ftrace_event.h> 54#include <linux/ftrace_event.h>
54 55
@@ -1758,6 +1759,59 @@ out:
1758 return page; 1759 return page;
1759} 1760}
1760 1761
1762#ifdef CONFIG_COMPACTION
1763/* Try memory compaction for high-order allocations before reclaim */
1764static struct page *
1765__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
1766 struct zonelist *zonelist, enum zone_type high_zoneidx,
1767 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
1768 int migratetype, unsigned long *did_some_progress)
1769{
1770 struct page *page;
1771
1772 if (!order)
1773 return NULL;
1774
1775 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
1776 nodemask);
1777 if (*did_some_progress != COMPACT_SKIPPED) {
1778
1779 /* Page migration frees to the PCP lists but we want merging */
1780 drain_pages(get_cpu());
1781 put_cpu();
1782
1783 page = get_page_from_freelist(gfp_mask, nodemask,
1784 order, zonelist, high_zoneidx,
1785 alloc_flags, preferred_zone,
1786 migratetype);
1787 if (page) {
1788 count_vm_event(COMPACTSUCCESS);
1789 return page;
1790 }
1791
1792 /*
1793 * It's bad if compaction run occurs and fails.
1794 * The most likely reason is that pages exist,
1795 * but not enough to satisfy watermarks.
1796 */
1797 count_vm_event(COMPACTFAIL);
1798
1799 cond_resched();
1800 }
1801
1802 return NULL;
1803}
1804#else
1805static inline struct page *
1806__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
1807 struct zonelist *zonelist, enum zone_type high_zoneidx,
1808 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
1809 int migratetype, unsigned long *did_some_progress)
1810{
1811 return NULL;
1812}
1813#endif /* CONFIG_COMPACTION */
1814
1761/* The really slow allocator path where we enter direct reclaim */ 1815/* The really slow allocator path where we enter direct reclaim */
1762static inline struct page * 1816static inline struct page *
1763__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, 1817__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
@@ -1944,6 +1998,15 @@ rebalance:
1944 if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) 1998 if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL))
1945 goto nopage; 1999 goto nopage;
1946 2000
2001 /* Try direct compaction */
2002 page = __alloc_pages_direct_compact(gfp_mask, order,
2003 zonelist, high_zoneidx,
2004 nodemask,
2005 alloc_flags, preferred_zone,
2006 migratetype, &did_some_progress);
2007 if (page)
2008 goto got_pg;
2009
1947 /* Try direct reclaim and then allocating */ 2010 /* Try direct reclaim and then allocating */
1948 page = __alloc_pages_direct_reclaim(gfp_mask, order, 2011 page = __alloc_pages_direct_reclaim(gfp_mask, order,
1949 zonelist, high_zoneidx, 2012 zonelist, high_zoneidx,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c6aacf51b554..7759941d4e77 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -429,7 +429,7 @@ static void fill_contig_page_info(struct zone *zone,
429 * The value can be used to determine if page reclaim or compaction 429 * The value can be used to determine if page reclaim or compaction
430 * should be used 430 * should be used
431 */ 431 */
432int fragmentation_index(unsigned int order, struct contig_page_info *info) 432static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
433{ 433{
434 unsigned long requested = 1UL << order; 434 unsigned long requested = 1UL << order;
435 435
@@ -448,6 +448,15 @@ int fragmentation_index(unsigned int order, struct contig_page_info *info)
448 */ 448 */
449 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total); 449 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
450} 450}
451
452/* Same as __fragmentation index but allocs contig_page_info on stack */
453int fragmentation_index(struct zone *zone, unsigned int order)
454{
455 struct contig_page_info info;
456
457 fill_contig_page_info(zone, order, &info);
458 return __fragmentation_index(order, &info);
459}
451#endif 460#endif
452 461
453#if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION) 462#if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION)
@@ -771,6 +780,9 @@ static const char * const vmstat_text[] = {
771 "compact_blocks_moved", 780 "compact_blocks_moved",
772 "compact_pages_moved", 781 "compact_pages_moved",
773 "compact_pagemigrate_failed", 782 "compact_pagemigrate_failed",
783 "compact_stall",
784 "compact_fail",
785 "compact_success",
774#endif 786#endif
775 787
776#ifdef CONFIG_HUGETLB_PAGE 788#ifdef CONFIG_HUGETLB_PAGE
@@ -1136,7 +1148,7 @@ static void extfrag_show_print(struct seq_file *m,
1136 zone->name); 1148 zone->name);
1137 for (order = 0; order < MAX_ORDER; ++order) { 1149 for (order = 0; order < MAX_ORDER; ++order) {
1138 fill_contig_page_info(zone, order, &info); 1150 fill_contig_page_info(zone, order, &info);
1139 index = fragmentation_index(order, &info); 1151 index = __fragmentation_index(order, &info);
1140 seq_printf(m, "%d.%03d ", index / 1000, index % 1000); 1152 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1141 } 1153 }
1142 1154