diff options
-rw-r--r-- | include/linux/compaction.h | 24 | ||||
-rw-r--r-- | include/linux/vmstat.h | 1 | ||||
-rw-r--r-- | mm/compaction.c | 117 | ||||
-rw-r--r-- | mm/page_alloc.c | 63 | ||||
-rw-r--r-- | mm/vmstat.c | 16 |
5 files changed, 215 insertions, 6 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h index ba98cfe0ae15..eed40ec4280b 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h | |||
@@ -1,15 +1,31 @@ | |||
1 | #ifndef _LINUX_COMPACTION_H | 1 | #ifndef _LINUX_COMPACTION_H |
2 | #define _LINUX_COMPACTION_H | 2 | #define _LINUX_COMPACTION_H |
3 | 3 | ||
4 | /* Return values for compact_zone() */ | 4 | /* Return values for compact_zone() and try_to_compact_pages() */ |
5 | #define COMPACT_CONTINUE 0 | 5 | /* compaction didn't start as it was not possible or direct reclaim was more suitable */ |
6 | #define COMPACT_PARTIAL 1 | 6 | #define COMPACT_SKIPPED 0 |
7 | #define COMPACT_COMPLETE 2 | 7 | /* compaction should continue to another pageblock */ |
8 | #define COMPACT_CONTINUE 1 | ||
9 | /* direct compaction partially compacted a zone and there are suitable pages */ | ||
10 | #define COMPACT_PARTIAL 2 | ||
11 | /* The full zone was compacted */ | ||
12 | #define COMPACT_COMPLETE 3 | ||
8 | 13 | ||
9 | #ifdef CONFIG_COMPACTION | 14 | #ifdef CONFIG_COMPACTION |
10 | extern int sysctl_compact_memory; | 15 | extern int sysctl_compact_memory; |
11 | extern int sysctl_compaction_handler(struct ctl_table *table, int write, | 16 | extern int sysctl_compaction_handler(struct ctl_table *table, int write, |
12 | void __user *buffer, size_t *length, loff_t *ppos); | 17 | void __user *buffer, size_t *length, loff_t *ppos); |
18 | |||
19 | extern int fragmentation_index(struct zone *zone, unsigned int order); | ||
20 | extern unsigned long try_to_compact_pages(struct zonelist *zonelist, | ||
21 | int order, gfp_t gfp_mask, nodemask_t *mask); | ||
22 | #else | ||
23 | static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, | ||
24 | int order, gfp_t gfp_mask, nodemask_t *nodemask) | ||
25 | { | ||
26 | return COMPACT_CONTINUE; | ||
27 | } | ||
28 | |||
13 | #endif /* CONFIG_COMPACTION */ | 29 | #endif /* CONFIG_COMPACTION */ |
14 | 30 | ||
15 | #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) | 31 | #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) |
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index b421d1b22b62..7f43ccdc1d38 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h | |||
@@ -45,6 +45,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, | |||
45 | PAGEOUTRUN, ALLOCSTALL, PGROTATED, | 45 | PAGEOUTRUN, ALLOCSTALL, PGROTATED, |
46 | #ifdef CONFIG_COMPACTION | 46 | #ifdef CONFIG_COMPACTION |
47 | COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED, | 47 | COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED, |
48 | COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS, | ||
48 | #endif | 49 | #endif |
49 | #ifdef CONFIG_HUGETLB_PAGE | 50 | #ifdef CONFIG_HUGETLB_PAGE |
50 | HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, | 51 | HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, |
diff --git a/mm/compaction.c b/mm/compaction.c index f61f77983ff4..9583e193dc47 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -35,6 +35,8 @@ struct compact_control { | |||
35 | unsigned long nr_anon; | 35 | unsigned long nr_anon; |
36 | unsigned long nr_file; | 36 | unsigned long nr_file; |
37 | 37 | ||
38 | unsigned int order; /* order a direct compactor needs */ | ||
39 | int migratetype; /* MOVABLE, RECLAIMABLE etc */ | ||
38 | struct zone *zone; | 40 | struct zone *zone; |
39 | }; | 41 | }; |
40 | 42 | ||
@@ -341,6 +343,9 @@ static void update_nr_listpages(struct compact_control *cc) | |||
341 | static int compact_finished(struct zone *zone, | 343 | static int compact_finished(struct zone *zone, |
342 | struct compact_control *cc) | 344 | struct compact_control *cc) |
343 | { | 345 | { |
346 | unsigned int order; | ||
347 | unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order); | ||
348 | |||
344 | if (fatal_signal_pending(current)) | 349 | if (fatal_signal_pending(current)) |
345 | return COMPACT_PARTIAL; | 350 | return COMPACT_PARTIAL; |
346 | 351 | ||
@@ -348,6 +353,24 @@ static int compact_finished(struct zone *zone, | |||
348 | if (cc->free_pfn <= cc->migrate_pfn) | 353 | if (cc->free_pfn <= cc->migrate_pfn) |
349 | return COMPACT_COMPLETE; | 354 | return COMPACT_COMPLETE; |
350 | 355 | ||
356 | /* Compaction run is not finished if the watermark is not met */ | ||
357 | if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) | ||
358 | return COMPACT_CONTINUE; | ||
359 | |||
360 | if (cc->order == -1) | ||
361 | return COMPACT_CONTINUE; | ||
362 | |||
363 | /* Direct compactor: Is a suitable page free? */ | ||
364 | for (order = cc->order; order < MAX_ORDER; order++) { | ||
365 | /* Job done if page is free of the right migratetype */ | ||
366 | if (!list_empty(&zone->free_area[order].free_list[cc->migratetype])) | ||
367 | return COMPACT_PARTIAL; | ||
368 | |||
369 | /* Job done if allocation would set block type */ | ||
370 | if (order >= pageblock_order && zone->free_area[order].nr_free) | ||
371 | return COMPACT_PARTIAL; | ||
372 | } | ||
373 | |||
351 | return COMPACT_CONTINUE; | 374 | return COMPACT_CONTINUE; |
352 | } | 375 | } |
353 | 376 | ||
@@ -394,6 +417,99 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) | |||
394 | return ret; | 417 | return ret; |
395 | } | 418 | } |
396 | 419 | ||
420 | static unsigned long compact_zone_order(struct zone *zone, | ||
421 | int order, gfp_t gfp_mask) | ||
422 | { | ||
423 | struct compact_control cc = { | ||
424 | .nr_freepages = 0, | ||
425 | .nr_migratepages = 0, | ||
426 | .order = order, | ||
427 | .migratetype = allocflags_to_migratetype(gfp_mask), | ||
428 | .zone = zone, | ||
429 | }; | ||
430 | INIT_LIST_HEAD(&cc.freepages); | ||
431 | INIT_LIST_HEAD(&cc.migratepages); | ||
432 | |||
433 | return compact_zone(zone, &cc); | ||
434 | } | ||
435 | |||
436 | /** | ||
437 | * try_to_compact_pages - Direct compact to satisfy a high-order allocation | ||
438 | * @zonelist: The zonelist used for the current allocation | ||
439 | * @order: The order of the current allocation | ||
440 | * @gfp_mask: The GFP mask of the current allocation | ||
441 | * @nodemask: The allowed nodes to allocate from | ||
442 | * | ||
443 | * This is the main entry point for direct page compaction. | ||
444 | */ | ||
445 | unsigned long try_to_compact_pages(struct zonelist *zonelist, | ||
446 | int order, gfp_t gfp_mask, nodemask_t *nodemask) | ||
447 | { | ||
448 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | ||
449 | int may_enter_fs = gfp_mask & __GFP_FS; | ||
450 | int may_perform_io = gfp_mask & __GFP_IO; | ||
451 | unsigned long watermark; | ||
452 | struct zoneref *z; | ||
453 | struct zone *zone; | ||
454 | int rc = COMPACT_SKIPPED; | ||
455 | |||
456 | /* | ||
457 | * Check whether it is worth even starting compaction. The order check is | ||
458 | * made because an assumption is made that the page allocator can satisfy | ||
459 | * the "cheaper" orders without taking special steps | ||
460 | */ | ||
461 | if (order <= PAGE_ALLOC_COSTLY_ORDER || !may_enter_fs || !may_perform_io) | ||
462 | return rc; | ||
463 | |||
464 | count_vm_event(COMPACTSTALL); | ||
465 | |||
466 | /* Compact each zone in the list */ | ||
467 | for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, | ||
468 | nodemask) { | ||
469 | int fragindex; | ||
470 | int status; | ||
471 | |||
472 | /* | ||
473 | * Watermarks for order-0 must be met for compaction. Note | ||
474 | * the 2UL. This is because during migration, copies of | ||
475 | * pages need to be allocated and for a short time, the | ||
476 | * footprint is higher | ||
477 | */ | ||
478 | watermark = low_wmark_pages(zone) + (2UL << order); | ||
479 | if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) | ||
480 | continue; | ||
481 | |||
482 | /* | ||
483 | * fragmentation index determines if allocation failures are | ||
484 | * due to low memory or external fragmentation | ||
485 | * | ||
486 | * index of -1 implies allocations might succeed depending | ||
487 | * on watermarks | ||
488 | * index towards 0 implies failure is due to lack of memory | ||
489 | * index towards 1000 implies failure is due to fragmentation | ||
490 | * | ||
491 | * Only compact if a failure would be due to fragmentation. | ||
492 | */ | ||
493 | fragindex = fragmentation_index(zone, order); | ||
494 | if (fragindex >= 0 && fragindex <= 500) | ||
495 | continue; | ||
496 | |||
497 | if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) { | ||
498 | rc = COMPACT_PARTIAL; | ||
499 | break; | ||
500 | } | ||
501 | |||
502 | status = compact_zone_order(zone, order, gfp_mask); | ||
503 | rc = max(status, rc); | ||
504 | |||
505 | if (zone_watermark_ok(zone, order, watermark, 0, 0)) | ||
506 | break; | ||
507 | } | ||
508 | |||
509 | return rc; | ||
510 | } | ||
511 | |||
512 | |||
397 | /* Compact all zones within a node */ | 513 | /* Compact all zones within a node */ |
398 | static int compact_node(int nid) | 514 | static int compact_node(int nid) |
399 | { | 515 | { |
@@ -412,6 +528,7 @@ static int compact_node(int nid) | |||
412 | struct compact_control cc = { | 528 | struct compact_control cc = { |
413 | .nr_freepages = 0, | 529 | .nr_freepages = 0, |
414 | .nr_migratepages = 0, | 530 | .nr_migratepages = 0, |
531 | .order = -1, | ||
415 | }; | 532 | }; |
416 | 533 | ||
417 | zone = &pgdat->node_zones[zoneid]; | 534 | zone = &pgdat->node_zones[zoneid]; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c54376a09f30..cd88a860f088 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/debugobjects.h> | 49 | #include <linux/debugobjects.h> |
50 | #include <linux/kmemleak.h> | 50 | #include <linux/kmemleak.h> |
51 | #include <linux/memory.h> | 51 | #include <linux/memory.h> |
52 | #include <linux/compaction.h> | ||
52 | #include <trace/events/kmem.h> | 53 | #include <trace/events/kmem.h> |
53 | #include <linux/ftrace_event.h> | 54 | #include <linux/ftrace_event.h> |
54 | 55 | ||
@@ -1758,6 +1759,59 @@ out: | |||
1758 | return page; | 1759 | return page; |
1759 | } | 1760 | } |
1760 | 1761 | ||
1762 | #ifdef CONFIG_COMPACTION | ||
1763 | /* Try memory compaction for high-order allocations before reclaim */ | ||
1764 | static struct page * | ||
1765 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | ||
1766 | struct zonelist *zonelist, enum zone_type high_zoneidx, | ||
1767 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, | ||
1768 | int migratetype, unsigned long *did_some_progress) | ||
1769 | { | ||
1770 | struct page *page; | ||
1771 | |||
1772 | if (!order) | ||
1773 | return NULL; | ||
1774 | |||
1775 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, | ||
1776 | nodemask); | ||
1777 | if (*did_some_progress != COMPACT_SKIPPED) { | ||
1778 | |||
1779 | /* Page migration frees to the PCP lists but we want merging */ | ||
1780 | drain_pages(get_cpu()); | ||
1781 | put_cpu(); | ||
1782 | |||
1783 | page = get_page_from_freelist(gfp_mask, nodemask, | ||
1784 | order, zonelist, high_zoneidx, | ||
1785 | alloc_flags, preferred_zone, | ||
1786 | migratetype); | ||
1787 | if (page) { | ||
1788 | count_vm_event(COMPACTSUCCESS); | ||
1789 | return page; | ||
1790 | } | ||
1791 | |||
1792 | /* | ||
1793 | * It's bad if compaction run occurs and fails. | ||
1794 | * The most likely reason is that pages exist, | ||
1795 | * but not enough to satisfy watermarks. | ||
1796 | */ | ||
1797 | count_vm_event(COMPACTFAIL); | ||
1798 | |||
1799 | cond_resched(); | ||
1800 | } | ||
1801 | |||
1802 | return NULL; | ||
1803 | } | ||
1804 | #else | ||
1805 | static inline struct page * | ||
1806 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | ||
1807 | struct zonelist *zonelist, enum zone_type high_zoneidx, | ||
1808 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, | ||
1809 | int migratetype, unsigned long *did_some_progress) | ||
1810 | { | ||
1811 | return NULL; | ||
1812 | } | ||
1813 | #endif /* CONFIG_COMPACTION */ | ||
1814 | |||
1761 | /* The really slow allocator path where we enter direct reclaim */ | 1815 | /* The really slow allocator path where we enter direct reclaim */ |
1762 | static inline struct page * | 1816 | static inline struct page * |
1763 | __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | 1817 | __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, |
@@ -1944,6 +1998,15 @@ rebalance: | |||
1944 | if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) | 1998 | if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) |
1945 | goto nopage; | 1999 | goto nopage; |
1946 | 2000 | ||
2001 | /* Try direct compaction */ | ||
2002 | page = __alloc_pages_direct_compact(gfp_mask, order, | ||
2003 | zonelist, high_zoneidx, | ||
2004 | nodemask, | ||
2005 | alloc_flags, preferred_zone, | ||
2006 | migratetype, &did_some_progress); | ||
2007 | if (page) | ||
2008 | goto got_pg; | ||
2009 | |||
1947 | /* Try direct reclaim and then allocating */ | 2010 | /* Try direct reclaim and then allocating */ |
1948 | page = __alloc_pages_direct_reclaim(gfp_mask, order, | 2011 | page = __alloc_pages_direct_reclaim(gfp_mask, order, |
1949 | zonelist, high_zoneidx, | 2012 | zonelist, high_zoneidx, |
diff --git a/mm/vmstat.c b/mm/vmstat.c index c6aacf51b554..7759941d4e77 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -429,7 +429,7 @@ static void fill_contig_page_info(struct zone *zone, | |||
429 | * The value can be used to determine if page reclaim or compaction | 429 | * The value can be used to determine if page reclaim or compaction |
430 | * should be used | 430 | * should be used |
431 | */ | 431 | */ |
432 | int fragmentation_index(unsigned int order, struct contig_page_info *info) | 432 | static int __fragmentation_index(unsigned int order, struct contig_page_info *info) |
433 | { | 433 | { |
434 | unsigned long requested = 1UL << order; | 434 | unsigned long requested = 1UL << order; |
435 | 435 | ||
@@ -448,6 +448,15 @@ int fragmentation_index(unsigned int order, struct contig_page_info *info) | |||
448 | */ | 448 | */ |
449 | return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total); | 449 | return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total); |
450 | } | 450 | } |
451 | |||
452 | /* Same as __fragmentation index but allocs contig_page_info on stack */ | ||
453 | int fragmentation_index(struct zone *zone, unsigned int order) | ||
454 | { | ||
455 | struct contig_page_info info; | ||
456 | |||
457 | fill_contig_page_info(zone, order, &info); | ||
458 | return __fragmentation_index(order, &info); | ||
459 | } | ||
451 | #endif | 460 | #endif |
452 | 461 | ||
453 | #if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION) | 462 | #if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION) |
@@ -771,6 +780,9 @@ static const char * const vmstat_text[] = { | |||
771 | "compact_blocks_moved", | 780 | "compact_blocks_moved", |
772 | "compact_pages_moved", | 781 | "compact_pages_moved", |
773 | "compact_pagemigrate_failed", | 782 | "compact_pagemigrate_failed", |
783 | "compact_stall", | ||
784 | "compact_fail", | ||
785 | "compact_success", | ||
774 | #endif | 786 | #endif |
775 | 787 | ||
776 | #ifdef CONFIG_HUGETLB_PAGE | 788 | #ifdef CONFIG_HUGETLB_PAGE |
@@ -1136,7 +1148,7 @@ static void extfrag_show_print(struct seq_file *m, | |||
1136 | zone->name); | 1148 | zone->name); |
1137 | for (order = 0; order < MAX_ORDER; ++order) { | 1149 | for (order = 0; order < MAX_ORDER; ++order) { |
1138 | fill_contig_page_info(zone, order, &info); | 1150 | fill_contig_page_info(zone, order, &info); |
1139 | index = fragmentation_index(order, &info); | 1151 | index = __fragmentation_index(order, &info); |
1140 | seq_printf(m, "%d.%03d ", index / 1000, index % 1000); | 1152 | seq_printf(m, "%d.%03d ", index / 1000, index % 1000); |
1141 | } | 1153 | } |
1142 | 1154 | ||