5 files changed, 60 insertions, 17 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 0e38a1deeb23..6ecb6dc2f303 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -24,6 +24,7 @@ extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
                        int order, gfp_t gfp_mask, nodemask_t *mask,
                        bool sync, bool *contended, struct page **page);
 extern int compact_pgdat(pg_data_t *pgdat, int order);
+extern void reset_isolation_suitable(pg_data_t *pgdat);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
 /* Do not skip compaction more than 64 times */
@@ -61,6 +62,16 @@ static inline bool compaction_deferred(struct zone *zone, int order)
        return zone->compact_considered < defer_limit;
 }
+/* Returns true if restarting compaction after many failures */
+static inline bool compaction_restarting(struct zone *zone, int order)
+{
+        if (order < zone->compact_order_failed)
+                return false;
+        return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT &&
+                zone->compact_considered >= 1UL << zone->compact_defer_shift;
+}
 #else
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
                        int order, gfp_t gfp_mask, nodemask_t *nodemask,
@@ -74,6 +85,10 @@ static inline int compact_pgdat(pg_data_t *pgdat, int order)
        return COMPACT_CONTINUE;
 }
+static inline void reset_isolation_suitable(pg_data_t *pgdat)
+{
+}
 static inline unsigned long compaction_suitable(struct zone *zone, int order)
 {
        return COMPACT_SKIPPED;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c8b3abc97a1e..d240efa8f846 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -370,7 +370,8 @@ struct zone {
        spinlock_t              lock;
        int                     all_unreclaimable; /* All pages pinned */
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
-        unsigned long           compact_blockskip_expire;
+        /* Set to true when the PG_migrate_skip bits should be cleared */
+        bool                    compact_blockskip_flush;
        /* pfns where compaction scanners should start */
        unsigned long           compact_cached_free_pfn;
diff --git a/mm/compaction.c b/mm/compaction.c
index f94cbc0b99a5..d8187f9cabbf 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -66,24 +66,15 @@ static inline bool isolation_suitable(struct compact_control *cc,
 * should be skipped for page isolation when the migrate and free page scanner
 * meet.
 */
-static void reset_isolation_suitable(struct zone *zone)
+static void __reset_isolation_suitable(struct zone *zone)
 {
        unsigned long start_pfn = zone->zone_start_pfn;
        unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages;
        unsigned long pfn;
-        /*
-         * Do not reset more than once every five seconds. If allocations are
-         * failing sufficiently quickly to allow this to happen then continually
-         * scanning for compaction is not going to help. The choice of five
-         * seconds is arbitrary but will mitigate excessive scanning.
-         */
-        if (time_before(jiffies, zone->compact_blockskip_expire))
-                return;
        zone->compact_cached_migrate_pfn = start_pfn;
        zone->compact_cached_free_pfn = end_pfn;
-        zone->compact_blockskip_expire = jiffies + (HZ * 5);
+        zone->compact_blockskip_flush = false;
        /* Walk the zone and mark every pageblock as suitable for isolation */
        for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
@@ -102,9 +93,24 @@ static void reset_isolation_suitable(struct zone *zone)
        }
 }
+void reset_isolation_suitable(pg_data_t *pgdat)
+{
+        int zoneid;
+        for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
+                struct zone *zone = &pgdat->node_zones[zoneid];
+                if (!populated_zone(zone))
+                        continue;
+                /* Only flush if a full compaction finished recently */
+                if (zone->compact_blockskip_flush)
+                        __reset_isolation_suitable(zone);
+        }
+}
 /*
 * If no pages were isolated then mark this pageblock to be skipped in the
- * future. The information is later cleared by reset_isolation_suitable().
+ * future. The information is later cleared by __reset_isolation_suitable().
 */
 static void update_pageblock_skip(struct compact_control *cc,
                        struct page *page, unsigned long nr_isolated,
@@ -820,7 +826,15 @@ static int compact_finished(struct zone *zone,
        /* Compaction run completes if the migrate and free scanner meet */
        if (cc->free_pfn <= cc->migrate_pfn) {
-                reset_isolation_suitable(cc->zone);
+                /*
+                 * Mark that the PG_migrate_skip information should be cleared
+                 * by kswapd when it goes to sleep. kswapd does not set the
+                 * flag itself as the decision to be clear should be directly
+                 * based on an allocation request.
+                 */
+                if (!current_is_kswapd())
+                        zone->compact_blockskip_flush = true;
                return COMPACT_COMPLETE;
        }
@@ -943,9 +957,13 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
                zone->compact_cached_migrate_pfn = cc->migrate_pfn;
        }
-        /* Clear pageblock skip if there are numerous alloc failures */
+        /*
-        if (zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT)
+         * Clear pageblock skip if there were failures recently and compaction
-                reset_isolation_suitable(zone);
+         * is about to be retried after being deferred. kswapd does not do
+         * this reset as it'll reset the cached information when going to sleep.
+         */
+        if (compaction_restarting(zone, cc->order) && !current_is_kswapd())
+                __reset_isolation_suitable(zone);
        migrate_prep_local();
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 44c56049edf9..b97cf12f07a9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2172,6 +2172,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
                                preferred_zone, migratetype);
                if (page) {
 got_page:
+                        preferred_zone->compact_blockskip_flush = false;
                        preferred_zone->compact_considered = 0;
                        preferred_zone->compact_defer_shift = 0;
                        if (order >= preferred_zone->compact_order_failed)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1ee4b69a28a5..b010efc43891 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2895,6 +2895,14 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
                 */
                set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold);
+                /*
+                 * Compaction records what page blocks it recently failed to
+                 * isolate pages from and skips them in the future scanning.
+                 * When kswapd is going to sleep, it is reasonable to assume
+                 * that pages and compaction may succeed so reset the cache.
+                 */
+                reset_isolation_suitable(pgdat);
                if (!kthread_should_stop())
                        schedule();