aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-10-08 19:32:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:22:51 -0400
commit62997027ca5b3d4618198ed8b1aba40b61b1137b (patch)
treecf26352e091ae10f7201d98ca774a8c0e5f8cdfd
parentc89511ab2f8fe2b47585e60da8af7fd213ec877e (diff)
mm: compaction: clear PG_migrate_skip based on compaction and reclaim activity
Compaction caches if a pageblock was scanned and no pages were isolated so that the pageblocks can be skipped in the future to reduce scanning. This information is not cleared by the page allocator based on activity due to the impact it would have to the page allocator fast paths. Hence there is a requirement that something clear the cache or pageblocks will be skipped forever. Currently the cache is cleared if there were a number of recent allocation failures and it has not been cleared within the last 5 seconds. Time-based decisions like this are terrible as they have no relationship to VM activity and is basically a big hammer. Unfortunately, accurate heuristics would add cost to some hot paths so this patch implements a rough heuristic. There are two cases where the cache is cleared. 1. If a !kswapd process completes a compaction cycle (migrate and free scanner meet), the zone is marked compact_blockskip_flush. When kswapd goes to sleep, it will clear the cache. This is expected to be the common case where the cache is cleared. It does not really matter if kswapd happens to be asleep or going to sleep when the flag is set as it will be woken on the next allocation request. 2. If there have been multiple failures recently and compaction just finished being deferred then a process will clear the cache and start a full scan. This situation happens if there are multiple high-order allocation requests under heavy memory pressure. The clearing of the PG_migrate_skip bits and other scans is inherently racy but the race is harmless. For allocations that can fail such as THP, they will simply fail. For requests that cannot fail, they will retry the allocation. Tests indicated that scanning rates were roughly similar to when the time-based heuristic was used and the allocation success rates were similar. Signed-off-by: Mel Gorman <mgorman@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: Richard Davies <richard@arachsys.com> Cc: Shaohua Li <shli@kernel.org> Cc: Avi Kivity <avi@redhat.com> Cc: Rafael Aquini <aquini@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/compaction.h15
-rw-r--r--include/linux/mmzone.h3
-rw-r--r--mm/compaction.c50
-rw-r--r--mm/page_alloc.c1
-rw-r--r--mm/vmscan.c8
5 files changed, 60 insertions, 17 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 0e38a1deeb23..6ecb6dc2f303 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -24,6 +24,7 @@ extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
24 int order, gfp_t gfp_mask, nodemask_t *mask, 24 int order, gfp_t gfp_mask, nodemask_t *mask,
25 bool sync, bool *contended, struct page **page); 25 bool sync, bool *contended, struct page **page);
26extern int compact_pgdat(pg_data_t *pgdat, int order); 26extern int compact_pgdat(pg_data_t *pgdat, int order);
27extern void reset_isolation_suitable(pg_data_t *pgdat);
27extern unsigned long compaction_suitable(struct zone *zone, int order); 28extern unsigned long compaction_suitable(struct zone *zone, int order);
28 29
29/* Do not skip compaction more than 64 times */ 30/* Do not skip compaction more than 64 times */
@@ -61,6 +62,16 @@ static inline bool compaction_deferred(struct zone *zone, int order)
61 return zone->compact_considered < defer_limit; 62 return zone->compact_considered < defer_limit;
62} 63}
63 64
65/* Returns true if restarting compaction after many failures */
66static inline bool compaction_restarting(struct zone *zone, int order)
67{
68 if (order < zone->compact_order_failed)
69 return false;
70
71 return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT &&
72 zone->compact_considered >= 1UL << zone->compact_defer_shift;
73}
74
64#else 75#else
65static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, 76static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
66 int order, gfp_t gfp_mask, nodemask_t *nodemask, 77 int order, gfp_t gfp_mask, nodemask_t *nodemask,
@@ -74,6 +85,10 @@ static inline int compact_pgdat(pg_data_t *pgdat, int order)
74 return COMPACT_CONTINUE; 85 return COMPACT_CONTINUE;
75} 86}
76 87
88static inline void reset_isolation_suitable(pg_data_t *pgdat)
89{
90}
91
77static inline unsigned long compaction_suitable(struct zone *zone, int order) 92static inline unsigned long compaction_suitable(struct zone *zone, int order)
78{ 93{
79 return COMPACT_SKIPPED; 94 return COMPACT_SKIPPED;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c8b3abc97a1e..d240efa8f846 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -370,7 +370,8 @@ struct zone {
370 spinlock_t lock; 370 spinlock_t lock;
371 int all_unreclaimable; /* All pages pinned */ 371 int all_unreclaimable; /* All pages pinned */
372#if defined CONFIG_COMPACTION || defined CONFIG_CMA 372#if defined CONFIG_COMPACTION || defined CONFIG_CMA
373 unsigned long compact_blockskip_expire; 373 /* Set to true when the PG_migrate_skip bits should be cleared */
374 bool compact_blockskip_flush;
374 375
375 /* pfns where compaction scanners should start */ 376 /* pfns where compaction scanners should start */
376 unsigned long compact_cached_free_pfn; 377 unsigned long compact_cached_free_pfn;
diff --git a/mm/compaction.c b/mm/compaction.c
index f94cbc0b99a5..d8187f9cabbf 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -66,24 +66,15 @@ static inline bool isolation_suitable(struct compact_control *cc,
66 * should be skipped for page isolation when the migrate and free page scanner 66 * should be skipped for page isolation when the migrate and free page scanner
67 * meet. 67 * meet.
68 */ 68 */
69static void reset_isolation_suitable(struct zone *zone) 69static void __reset_isolation_suitable(struct zone *zone)
70{ 70{
71 unsigned long start_pfn = zone->zone_start_pfn; 71 unsigned long start_pfn = zone->zone_start_pfn;
72 unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages; 72 unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages;
73 unsigned long pfn; 73 unsigned long pfn;
74 74
75 /*
76 * Do not reset more than once every five seconds. If allocations are
77 * failing sufficiently quickly to allow this to happen then continually
78 * scanning for compaction is not going to help. The choice of five
79 * seconds is arbitrary but will mitigate excessive scanning.
80 */
81 if (time_before(jiffies, zone->compact_blockskip_expire))
82 return;
83
84 zone->compact_cached_migrate_pfn = start_pfn; 75 zone->compact_cached_migrate_pfn = start_pfn;
85 zone->compact_cached_free_pfn = end_pfn; 76 zone->compact_cached_free_pfn = end_pfn;
86 zone->compact_blockskip_expire = jiffies + (HZ * 5); 77 zone->compact_blockskip_flush = false;
87 78
88 /* Walk the zone and mark every pageblock as suitable for isolation */ 79 /* Walk the zone and mark every pageblock as suitable for isolation */
89 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 80 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
@@ -102,9 +93,24 @@ static void reset_isolation_suitable(struct zone *zone)
102 } 93 }
103} 94}
104 95
96void reset_isolation_suitable(pg_data_t *pgdat)
97{
98 int zoneid;
99
100 for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
101 struct zone *zone = &pgdat->node_zones[zoneid];
102 if (!populated_zone(zone))
103 continue;
104
105 /* Only flush if a full compaction finished recently */
106 if (zone->compact_blockskip_flush)
107 __reset_isolation_suitable(zone);
108 }
109}
110
105/* 111/*
106 * If no pages were isolated then mark this pageblock to be skipped in the 112 * If no pages were isolated then mark this pageblock to be skipped in the
107 * future. The information is later cleared by reset_isolation_suitable(). 113 * future. The information is later cleared by __reset_isolation_suitable().
108 */ 114 */
109static void update_pageblock_skip(struct compact_control *cc, 115static void update_pageblock_skip(struct compact_control *cc,
110 struct page *page, unsigned long nr_isolated, 116 struct page *page, unsigned long nr_isolated,
@@ -820,7 +826,15 @@ static int compact_finished(struct zone *zone,
820 826
821 /* Compaction run completes if the migrate and free scanner meet */ 827 /* Compaction run completes if the migrate and free scanner meet */
822 if (cc->free_pfn <= cc->migrate_pfn) { 828 if (cc->free_pfn <= cc->migrate_pfn) {
823 reset_isolation_suitable(cc->zone); 829 /*
830 * Mark that the PG_migrate_skip information should be cleared
831 * by kswapd when it goes to sleep. kswapd does not set the
832 * flag itself as the decision to be clear should be directly
833 * based on an allocation request.
834 */
835 if (!current_is_kswapd())
836 zone->compact_blockskip_flush = true;
837
824 return COMPACT_COMPLETE; 838 return COMPACT_COMPLETE;
825 } 839 }
826 840
@@ -943,9 +957,13 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
943 zone->compact_cached_migrate_pfn = cc->migrate_pfn; 957 zone->compact_cached_migrate_pfn = cc->migrate_pfn;
944 } 958 }
945 959
946 /* Clear pageblock skip if there are numerous alloc failures */ 960 /*
947 if (zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT) 961 * Clear pageblock skip if there were failures recently and compaction
948 reset_isolation_suitable(zone); 962 * is about to be retried after being deferred. kswapd does not do
963 * this reset as it'll reset the cached information when going to sleep.
964 */
965 if (compaction_restarting(zone, cc->order) && !current_is_kswapd())
966 __reset_isolation_suitable(zone);
949 967
950 migrate_prep_local(); 968 migrate_prep_local();
951 969
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 44c56049edf9..b97cf12f07a9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2172,6 +2172,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2172 preferred_zone, migratetype); 2172 preferred_zone, migratetype);
2173 if (page) { 2173 if (page) {
2174got_page: 2174got_page:
2175 preferred_zone->compact_blockskip_flush = false;
2175 preferred_zone->compact_considered = 0; 2176 preferred_zone->compact_considered = 0;
2176 preferred_zone->compact_defer_shift = 0; 2177 preferred_zone->compact_defer_shift = 0;
2177 if (order >= preferred_zone->compact_order_failed) 2178 if (order >= preferred_zone->compact_order_failed)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1ee4b69a28a5..b010efc43891 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2895,6 +2895,14 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
2895 */ 2895 */
2896 set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); 2896 set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold);
2897 2897
2898 /*
2899 * Compaction records what page blocks it recently failed to
2900 * isolate pages from and skips them in the future scanning.
2901 * When kswapd is going to sleep, it is reasonable to assume
2902 * that pages and compaction may succeed so reset the cache.
2903 */
2904 reset_isolation_suitable(pgdat);
2905
2898 if (!kthread_should_stop()) 2906 if (!kthread_should_stop())
2899 schedule(); 2907 schedule();
2900 2908