diff options
| -rw-r--r-- | include/linux/compaction.h | 24 | ||||
| -rw-r--r-- | include/linux/vmstat.h | 1 | ||||
| -rw-r--r-- | mm/compaction.c | 117 | ||||
| -rw-r--r-- | mm/page_alloc.c | 63 | ||||
| -rw-r--r-- | mm/vmstat.c | 16 |
5 files changed, 215 insertions, 6 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h index ba98cfe0ae15..eed40ec4280b 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h | |||
| @@ -1,15 +1,31 @@ | |||
| 1 | #ifndef _LINUX_COMPACTION_H | 1 | #ifndef _LINUX_COMPACTION_H |
| 2 | #define _LINUX_COMPACTION_H | 2 | #define _LINUX_COMPACTION_H |
| 3 | 3 | ||
| 4 | /* Return values for compact_zone() */ | 4 | /* Return values for compact_zone() and try_to_compact_pages() */ |
| 5 | #define COMPACT_CONTINUE 0 | 5 | /* compaction didn't start as it was not possible or direct reclaim was more suitable */ |
| 6 | #define COMPACT_PARTIAL 1 | 6 | #define COMPACT_SKIPPED 0 |
| 7 | #define COMPACT_COMPLETE 2 | 7 | /* compaction should continue to another pageblock */ |
| 8 | #define COMPACT_CONTINUE 1 | ||
| 9 | /* direct compaction partially compacted a zone and there are suitable pages */ | ||
| 10 | #define COMPACT_PARTIAL 2 | ||
| 11 | /* The full zone was compacted */ | ||
| 12 | #define COMPACT_COMPLETE 3 | ||
| 8 | 13 | ||
| 9 | #ifdef CONFIG_COMPACTION | 14 | #ifdef CONFIG_COMPACTION |
| 10 | extern int sysctl_compact_memory; | 15 | extern int sysctl_compact_memory; |
| 11 | extern int sysctl_compaction_handler(struct ctl_table *table, int write, | 16 | extern int sysctl_compaction_handler(struct ctl_table *table, int write, |
| 12 | void __user *buffer, size_t *length, loff_t *ppos); | 17 | void __user *buffer, size_t *length, loff_t *ppos); |
| 18 | |||
| 19 | extern int fragmentation_index(struct zone *zone, unsigned int order); | ||
| 20 | extern unsigned long try_to_compact_pages(struct zonelist *zonelist, | ||
| 21 | int order, gfp_t gfp_mask, nodemask_t *mask); | ||
| 22 | #else | ||
| 23 | static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, | ||
| 24 | int order, gfp_t gfp_mask, nodemask_t *nodemask) | ||
| 25 | { | ||
| 26 | return COMPACT_CONTINUE; | ||
| 27 | } | ||
| 28 | |||
| 13 | #endif /* CONFIG_COMPACTION */ | 29 | #endif /* CONFIG_COMPACTION */ |
| 14 | 30 | ||
| 15 | #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) | 31 | #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) |
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index b421d1b22b62..7f43ccdc1d38 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h | |||
| @@ -45,6 +45,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, | |||
| 45 | PAGEOUTRUN, ALLOCSTALL, PGROTATED, | 45 | PAGEOUTRUN, ALLOCSTALL, PGROTATED, |
| 46 | #ifdef CONFIG_COMPACTION | 46 | #ifdef CONFIG_COMPACTION |
| 47 | COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED, | 47 | COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED, |
| 48 | COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS, | ||
| 48 | #endif | 49 | #endif |
| 49 | #ifdef CONFIG_HUGETLB_PAGE | 50 | #ifdef CONFIG_HUGETLB_PAGE |
| 50 | HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, | 51 | HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, |
diff --git a/mm/compaction.c b/mm/compaction.c index f61f77983ff4..9583e193dc47 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
| @@ -35,6 +35,8 @@ struct compact_control { | |||
| 35 | unsigned long nr_anon; | 35 | unsigned long nr_anon; |
| 36 | unsigned long nr_file; | 36 | unsigned long nr_file; |
| 37 | 37 | ||
| 38 | unsigned int order; /* order a direct compactor needs */ | ||
| 39 | int migratetype; /* MOVABLE, RECLAIMABLE etc */ | ||
| 38 | struct zone *zone; | 40 | struct zone *zone; |
| 39 | }; | 41 | }; |
| 40 | 42 | ||
| @@ -341,6 +343,9 @@ static void update_nr_listpages(struct compact_control *cc) | |||
| 341 | static int compact_finished(struct zone *zone, | 343 | static int compact_finished(struct zone *zone, |
| 342 | struct compact_control *cc) | 344 | struct compact_control *cc) |
| 343 | { | 345 | { |
| 346 | unsigned int order; | ||
| 347 | unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order); | ||
| 348 | |||
| 344 | if (fatal_signal_pending(current)) | 349 | if (fatal_signal_pending(current)) |
| 345 | return COMPACT_PARTIAL; | 350 | return COMPACT_PARTIAL; |
| 346 | 351 | ||
| @@ -348,6 +353,24 @@ static int compact_finished(struct zone *zone, | |||
| 348 | if (cc->free_pfn <= cc->migrate_pfn) | 353 | if (cc->free_pfn <= cc->migrate_pfn) |
| 349 | return COMPACT_COMPLETE; | 354 | return COMPACT_COMPLETE; |
| 350 | 355 | ||
| 356 | /* Compaction run is not finished if the watermark is not met */ | ||
| 357 | if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) | ||
| 358 | return COMPACT_CONTINUE; | ||
| 359 | |||
| 360 | if (cc->order == -1) | ||
| 361 | return COMPACT_CONTINUE; | ||
| 362 | |||
| 363 | /* Direct compactor: Is a suitable page free? */ | ||
| 364 | for (order = cc->order; order < MAX_ORDER; order++) { | ||
| 365 | /* Job done if page is free of the right migratetype */ | ||
| 366 | if (!list_empty(&zone->free_area[order].free_list[cc->migratetype])) | ||
| 367 | return COMPACT_PARTIAL; | ||
| 368 | |||
| 369 | /* Job done if allocation would set block type */ | ||
| 370 | if (order >= pageblock_order && zone->free_area[order].nr_free) | ||
| 371 | return COMPACT_PARTIAL; | ||
| 372 | } | ||
| 373 | |||
| 351 | return COMPACT_CONTINUE; | 374 | return COMPACT_CONTINUE; |
| 352 | } | 375 | } |
| 353 | 376 | ||
| @@ -394,6 +417,99 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) | |||
| 394 | return ret; | 417 | return ret; |
| 395 | } | 418 | } |
| 396 | 419 | ||
| 420 | static unsigned long compact_zone_order(struct zone *zone, | ||
| 421 | int order, gfp_t gfp_mask) | ||
| 422 | { | ||
| 423 | struct compact_control cc = { | ||
| 424 | .nr_freepages = 0, | ||
| 425 | .nr_migratepages = 0, | ||
| 426 | .order = order, | ||
| 427 | .migratetype = allocflags_to_migratetype(gfp_mask), | ||
| 428 | .zone = zone, | ||
| 429 | }; | ||
| 430 | INIT_LIST_HEAD(&cc.freepages); | ||
| 431 | INIT_LIST_HEAD(&cc.migratepages); | ||
| 432 | |||
| 433 | return compact_zone(zone, &cc); | ||
| 434 | } | ||
| 435 | |||
| 436 | /** | ||
| 437 | * try_to_compact_pages - Direct compact to satisfy a high-order allocation | ||
| 438 | * @zonelist: The zonelist used for the current allocation | ||
| 439 | * @order: The order of the current allocation | ||
| 440 | * @gfp_mask: The GFP mask of the current allocation | ||
| 441 | * @nodemask: The allowed nodes to allocate from | ||
| 442 | * | ||
| 443 | * This is the main entry point for direct page compaction. | ||
| 444 | */ | ||
| 445 | unsigned long try_to_compact_pages(struct zonelist *zonelist, | ||
| 446 | int order, gfp_t gfp_mask, nodemask_t *nodemask) | ||
| 447 | { | ||
| 448 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | ||
| 449 | int may_enter_fs = gfp_mask & __GFP_FS; | ||
| 450 | int may_perform_io = gfp_mask & __GFP_IO; | ||
| 451 | unsigned long watermark; | ||
| 452 | struct zoneref *z; | ||
| 453 | struct zone *zone; | ||
| 454 | int rc = COMPACT_SKIPPED; | ||
| 455 | |||
| 456 | /* | ||
| 457 | * Check whether it is worth even starting compaction. The order check is | ||
| 458 | * made because an assumption is made that the page allocator can satisfy | ||
| 459 | * the "cheaper" orders without taking special steps | ||
| 460 | */ | ||
| 461 | if (order <= PAGE_ALLOC_COSTLY_ORDER || !may_enter_fs || !may_perform_io) | ||
| 462 | return rc; | ||
| 463 | |||
| 464 | count_vm_event(COMPACTSTALL); | ||
| 465 | |||
| 466 | /* Compact each zone in the list */ | ||
| 467 | for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, | ||
| 468 | nodemask) { | ||
| 469 | int fragindex; | ||
| 470 | int status; | ||
| 471 | |||
| 472 | /* | ||
| 473 | * Watermarks for order-0 must be met for compaction. Note | ||
| 474 | * the 2UL. This is because during migration, copies of | ||
| 475 | * pages need to be allocated and for a short time, the | ||
| 476 | * footprint is higher | ||
| 477 | */ | ||
| 478 | watermark = low_wmark_pages(zone) + (2UL << order); | ||
| 479 | if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) | ||
| 480 | continue; | ||
| 481 | |||
| 482 | /* | ||
| 483 | * fragmentation index determines if allocation failures are | ||
| 484 | * due to low memory or external fragmentation | ||
| 485 | * | ||
| 486 | * index of -1 implies allocations might succeed depending | ||
| 487 | * on watermarks | ||
| 488 | * index towards 0 implies failure is due to lack of memory | ||
| 489 | * index towards 1000 implies failure is due to fragmentation | ||
| 490 | * | ||
| 491 | * Only compact if a failure would be due to fragmentation. | ||
| 492 | */ | ||
| 493 | fragindex = fragmentation_index(zone, order); | ||
| 494 | if (fragindex >= 0 && fragindex <= 500) | ||
| 495 | continue; | ||
| 496 | |||
| 497 | if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) { | ||
| 498 | rc = COMPACT_PARTIAL; | ||
| 499 | break; | ||
| 500 | } | ||
| 501 | |||
| 502 | status = compact_zone_order(zone, order, gfp_mask); | ||
| 503 | rc = max(status, rc); | ||
| 504 | |||
| 505 | if (zone_watermark_ok(zone, order, watermark, 0, 0)) | ||
| 506 | break; | ||
| 507 | } | ||
| 508 | |||
| 509 | return rc; | ||
| 510 | } | ||
| 511 | |||
| 512 | |||
| 397 | /* Compact all zones within a node */ | 513 | /* Compact all zones within a node */ |
| 398 | static int compact_node(int nid) | 514 | static int compact_node(int nid) |
| 399 | { | 515 | { |
| @@ -412,6 +528,7 @@ static int compact_node(int nid) | |||
| 412 | struct compact_control cc = { | 528 | struct compact_control cc = { |
| 413 | .nr_freepages = 0, | 529 | .nr_freepages = 0, |
| 414 | .nr_migratepages = 0, | 530 | .nr_migratepages = 0, |
| 531 | .order = -1, | ||
| 415 | }; | 532 | }; |
| 416 | 533 | ||
| 417 | zone = &pgdat->node_zones[zoneid]; | 534 | zone = &pgdat->node_zones[zoneid]; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c54376a09f30..cd88a860f088 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -49,6 +49,7 @@ | |||
| 49 | #include <linux/debugobjects.h> | 49 | #include <linux/debugobjects.h> |
| 50 | #include <linux/kmemleak.h> | 50 | #include <linux/kmemleak.h> |
| 51 | #include <linux/memory.h> | 51 | #include <linux/memory.h> |
| 52 | #include <linux/compaction.h> | ||
| 52 | #include <trace/events/kmem.h> | 53 | #include <trace/events/kmem.h> |
| 53 | #include <linux/ftrace_event.h> | 54 | #include <linux/ftrace_event.h> |
| 54 | 55 | ||
| @@ -1758,6 +1759,59 @@ out: | |||
| 1758 | return page; | 1759 | return page; |
| 1759 | } | 1760 | } |
| 1760 | 1761 | ||
| 1762 | #ifdef CONFIG_COMPACTION | ||
| 1763 | /* Try memory compaction for high-order allocations before reclaim */ | ||
| 1764 | static struct page * | ||
| 1765 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | ||
| 1766 | struct zonelist *zonelist, enum zone_type high_zoneidx, | ||
| 1767 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, | ||
| 1768 | int migratetype, unsigned long *did_some_progress) | ||
| 1769 | { | ||
| 1770 | struct page *page; | ||
| 1771 | |||
| 1772 | if (!order) | ||
| 1773 | return NULL; | ||
| 1774 | |||
| 1775 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, | ||
| 1776 | nodemask); | ||
| 1777 | if (*did_some_progress != COMPACT_SKIPPED) { | ||
| 1778 | |||
| 1779 | /* Page migration frees to the PCP lists but we want merging */ | ||
| 1780 | drain_pages(get_cpu()); | ||
| 1781 | put_cpu(); | ||
| 1782 | |||
| 1783 | page = get_page_from_freelist(gfp_mask, nodemask, | ||
| 1784 | order, zonelist, high_zoneidx, | ||
| 1785 | alloc_flags, preferred_zone, | ||
| 1786 | migratetype); | ||
| 1787 | if (page) { | ||
| 1788 | count_vm_event(COMPACTSUCCESS); | ||
| 1789 | return page; | ||
| 1790 | } | ||
| 1791 | |||
| 1792 | /* | ||
| 1793 | * It's bad if compaction run occurs and fails. | ||
| 1794 | * The most likely reason is that pages exist, | ||
| 1795 | * but not enough to satisfy watermarks. | ||
| 1796 | */ | ||
| 1797 | count_vm_event(COMPACTFAIL); | ||
| 1798 | |||
| 1799 | cond_resched(); | ||
| 1800 | } | ||
| 1801 | |||
| 1802 | return NULL; | ||
| 1803 | } | ||
| 1804 | #else | ||
| 1805 | static inline struct page * | ||
| 1806 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | ||
| 1807 | struct zonelist *zonelist, enum zone_type high_zoneidx, | ||
| 1808 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, | ||
| 1809 | int migratetype, unsigned long *did_some_progress) | ||
| 1810 | { | ||
| 1811 | return NULL; | ||
| 1812 | } | ||
| 1813 | #endif /* CONFIG_COMPACTION */ | ||
| 1814 | |||
| 1761 | /* The really slow allocator path where we enter direct reclaim */ | 1815 | /* The really slow allocator path where we enter direct reclaim */ |
| 1762 | static inline struct page * | 1816 | static inline struct page * |
| 1763 | __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | 1817 | __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, |
| @@ -1944,6 +1998,15 @@ rebalance: | |||
| 1944 | if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) | 1998 | if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) |
| 1945 | goto nopage; | 1999 | goto nopage; |
| 1946 | 2000 | ||
| 2001 | /* Try direct compaction */ | ||
| 2002 | page = __alloc_pages_direct_compact(gfp_mask, order, | ||
| 2003 | zonelist, high_zoneidx, | ||
| 2004 | nodemask, | ||
| 2005 | alloc_flags, preferred_zone, | ||
| 2006 | migratetype, &did_some_progress); | ||
| 2007 | if (page) | ||
| 2008 | goto got_pg; | ||
| 2009 | |||
| 1947 | /* Try direct reclaim and then allocating */ | 2010 | /* Try direct reclaim and then allocating */ |
| 1948 | page = __alloc_pages_direct_reclaim(gfp_mask, order, | 2011 | page = __alloc_pages_direct_reclaim(gfp_mask, order, |
| 1949 | zonelist, high_zoneidx, | 2012 | zonelist, high_zoneidx, |
diff --git a/mm/vmstat.c b/mm/vmstat.c index c6aacf51b554..7759941d4e77 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
| @@ -429,7 +429,7 @@ static void fill_contig_page_info(struct zone *zone, | |||
| 429 | * The value can be used to determine if page reclaim or compaction | 429 | * The value can be used to determine if page reclaim or compaction |
| 430 | * should be used | 430 | * should be used |
| 431 | */ | 431 | */ |
| 432 | int fragmentation_index(unsigned int order, struct contig_page_info *info) | 432 | static int __fragmentation_index(unsigned int order, struct contig_page_info *info) |
| 433 | { | 433 | { |
| 434 | unsigned long requested = 1UL << order; | 434 | unsigned long requested = 1UL << order; |
| 435 | 435 | ||
| @@ -448,6 +448,15 @@ int fragmentation_index(unsigned int order, struct contig_page_info *info) | |||
| 448 | */ | 448 | */ |
| 449 | return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total); | 449 | return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total); |
| 450 | } | 450 | } |
| 451 | |||
| 452 | /* Same as __fragmentation index but allocs contig_page_info on stack */ | ||
| 453 | int fragmentation_index(struct zone *zone, unsigned int order) | ||
| 454 | { | ||
| 455 | struct contig_page_info info; | ||
| 456 | |||
| 457 | fill_contig_page_info(zone, order, &info); | ||
| 458 | return __fragmentation_index(order, &info); | ||
| 459 | } | ||
| 451 | #endif | 460 | #endif |
| 452 | 461 | ||
| 453 | #if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION) | 462 | #if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION) |
| @@ -771,6 +780,9 @@ static const char * const vmstat_text[] = { | |||
| 771 | "compact_blocks_moved", | 780 | "compact_blocks_moved", |
| 772 | "compact_pages_moved", | 781 | "compact_pages_moved", |
| 773 | "compact_pagemigrate_failed", | 782 | "compact_pagemigrate_failed", |
| 783 | "compact_stall", | ||
| 784 | "compact_fail", | ||
| 785 | "compact_success", | ||
| 774 | #endif | 786 | #endif |
| 775 | 787 | ||
| 776 | #ifdef CONFIG_HUGETLB_PAGE | 788 | #ifdef CONFIG_HUGETLB_PAGE |
| @@ -1136,7 +1148,7 @@ static void extfrag_show_print(struct seq_file *m, | |||
| 1136 | zone->name); | 1148 | zone->name); |
| 1137 | for (order = 0; order < MAX_ORDER; ++order) { | 1149 | for (order = 0; order < MAX_ORDER; ++order) { |
| 1138 | fill_contig_page_info(zone, order, &info); | 1150 | fill_contig_page_info(zone, order, &info); |
| 1139 | index = fragmentation_index(order, &info); | 1151 | index = __fragmentation_index(order, &info); |
| 1140 | seq_printf(m, "%d.%03d ", index / 1000, index % 1000); | 1152 | seq_printf(m, "%d.%03d ", index / 1000, index % 1000); |
| 1141 | } | 1153 | } |
| 1142 | 1154 | ||
