mm: vmscan: flatten kswapd priority loop

kswapd stops raising the scanning priority when at least SWAP_CLUSTER_MAX pages have been reclaimed or the pgdat is considered balanced. It then rechecks if it needs to restart at DEF_PRIORITY and whether high-order reclaim needs to be reset. This is not wrong per-se but it is confusing to follow and forcing kswapd to stay at DEF_PRIORITY may require several restarts before it has scanned enough pages to meet the high watermark even at 100% efficiency. This patch irons out the logic a bit by controlling when priority is raised and removing the "goto loop_again". This patch has kswapd raise the scanning priority until it is scanning enough pages that it could meet the high watermark in one shrink of the LRU lists if it is able to reclaim at 100% efficiency. It will not raise the scanning prioirty higher unless it is failing to reclaim any pages. To avoid infinite looping for high-order allocation requests kswapd will not reclaim for high-order allocations when it has reclaimed at least twice the number of pages as the allocation request. Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Michal Hocko <mhocko@suse.cz> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Rik van Riel <riel@redhat.com> Cc: Jiri Slaby <jslaby@suse.cz> Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu> Tested-by: Zlatko Calusic <zcalusic@bitsync.net> Cc: dormando <dormando@rydia.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Mel Gorman <mgorman@suse.de> 2013-07-03 18:01:45 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2013-07-03 19:07:28 -0400
commit: b8e83b942a16eb73e63406592d3178207a4f07a1 (patch)
tree: 2fc7f3c0989b924b6e2a7cd9b81b0d11690c5294 /mm
parent: e82e0561dae9f3ae5a21fc2d3d3ccbe69d90be46 (diff)
1 files changed, 41 insertions, 45 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 26ad67f1962c..1c10ee512215 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2654,8 +2654,12 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
 /*
 * kswapd shrinks the zone by the number of pages required to reach
 * the high watermark.
+ *
+ * Returns true if kswapd scanned at least the requested number of pages to
+ * reclaim. This is used to determine if the scanning priority needs to be
+ * raised.
 */
-static void kswapd_shrink_zone(struct zone *zone,
+static bool kswapd_shrink_zone(struct zone *zone,
                               struct scan_control *sc,
                               unsigned long lru_pages)
 {
@@ -2675,6 +2679,8 @@ static void kswapd_shrink_zone(struct zone *zone,
        if (nr_slab == 0 && !zone_reclaimable(zone))
                zone->all_unreclaimable = 1;
+        return sc->nr_scanned >= sc->nr_to_reclaim;
 }
 /*
@@ -2701,26 +2707,26 @@ static void kswapd_shrink_zone(struct zone *zone,
 static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                                                        int *classzone_idx)
 {
-        bool pgdat_is_balanced = false;
        int i;
        int end_zone = 0;       /* Inclusive.  0 = ZONE_DMA */
        unsigned long nr_soft_reclaimed;
        unsigned long nr_soft_scanned;
        struct scan_control sc = {
                .gfp_mask = GFP_KERNEL,
+                .priority = DEF_PRIORITY,
                .may_unmap = 1,
                .may_swap = 1,
+                .may_writepage = !laptop_mode,
                .order = order,
                .target_mem_cgroup = NULL,
        };
-loop_again:
-        sc.priority = DEF_PRIORITY;
-        sc.nr_reclaimed = 0;
-        sc.may_writepage = !laptop_mode;
        count_vm_event(PAGEOUTRUN);
        do {
                unsigned long lru_pages = 0;
+                bool raise_priority = true;
+                sc.nr_reclaimed = 0;
                /*
                 * Scan in the highmem->dma direction for the highest
@@ -2762,10 +2768,8 @@ loop_again:
                        }
                }
-                if (i < 0) {
+                if (i < 0)
-                        pgdat_is_balanced = true;
                        goto out;
-                }
                for (i = 0; i <= end_zone; i++) {
                        struct zone *zone = pgdat->node_zones + i;
@@ -2832,8 +2836,16 @@ loop_again:
                        if ((buffer_heads_over_limit && is_highmem_idx(i)) ||
                            !zone_balanced(zone, testorder,
-                                           balance_gap, end_zone))
+                                           balance_gap, end_zone)) {
-                                kswapd_shrink_zone(zone, &sc, lru_pages);
+                                /*
+                                 * There should be no need to raise the
+                                 * scanning priority if enough pages are
+                                 * already being scanned that high
+                                 * watermark would be met at 100% efficiency.
+                                 */
+                                if (kswapd_shrink_zone(zone, &sc, lru_pages))
+                                        raise_priority = false;
+                        }
                        /*
                         * If we're getting trouble reclaiming, start doing
@@ -2868,46 +2880,29 @@ loop_again:
                                pfmemalloc_watermark_ok(pgdat))
                        wake_up(&pgdat->pfmemalloc_wait);
-                if (pgdat_balanced(pgdat, order, *classzone_idx)) {
-                        pgdat_is_balanced = true;
-                        break;          /* kswapd: all done */
-                }
                /*
-                 * We do this so kswapd doesn't build up large priorities for
+                 * Fragmentation may mean that the system cannot be rebalanced
-                 * example when it is freeing in parallel with allocators. It
+                 * for high-order allocations in all zones. If twice the
-                 * matches the direct reclaim path behaviour in terms of impact
+                 * allocation size has been reclaimed and the zones are still
-                 * on zone->*_priority.
+                 * not balanced then recheck the watermarks at order-0 to
+                 * prevent kswapd reclaiming excessively. Assume that a
+                 * process requested a high-order can direct reclaim/compact.
                 */
-                if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)
+                if (order && sc.nr_reclaimed >= 2UL << order)
-                        break;
+                        order = sc.order = 0;
-        } while (--sc.priority >= 0);
-out:
-        if (!pgdat_is_balanced) {
-                cond_resched();
-                try_to_freeze();
+                /* Check if kswapd should be suspending */
+                if (try_to_freeze() || kthread_should_stop())
+                        break;
                /*
-                 * Fragmentation may mean that the system cannot be
+                 * Raise priority if scanning rate is too low or there was no
-                 * rebalanced for high-order allocations in all zones.
+                 * progress in reclaiming pages
-                 * At this point, if nr_reclaimed < SWAP_CLUSTER_MAX,
-                 * it means the zones have been fully scanned and are still
-                 * not balanced. For high-order allocations, there is
-                 * little point trying all over again as kswapd may
-                 * infinite loop.
-                 *
-                 * Instead, recheck all watermarks at order-0 as they
-                 * are the most important. If watermarks are ok, kswapd will go
-                 * back to sleep. High-order users can still perform direct
-                 * reclaim if they wish.
                 */
-                if (sc.nr_reclaimed < SWAP_CLUSTER_MAX)
+                if (raise_priority || !sc.nr_reclaimed)
-                        order = sc.order = 0;
+                        sc.priority--;
+        } while (sc.priority >= 0 &&
-                goto loop_again;
+                 !pgdat_balanced(pgdat, order, *classzone_idx));
-        }
        /*
         * If kswapd was reclaiming at a higher order, it has the option of
@@ -2936,6 +2931,7 @@ out:
                        compact_pgdat(pgdat, order);
        }
+out:
        /*
         * Return the order we were reclaiming at so prepare_kswapd_sleep()
         * makes a decision on the order we were last reclaiming at. However,
author	Mel Gorman <mgorman@suse.de>	2013-07-03 18:01:45 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2013-07-03 19:07:28 -0400
commit	b8e83b942a16eb73e63406592d3178207a4f07a1 (patch)
tree	2fc7f3c0989b924b6e2a7cd9b81b0d11690c5294 /mm
parent	e82e0561dae9f3ae5a21fc2d3d3ccbe69d90be46 (diff)

diff --git a/mm/vmscan.c b/mm/vmscan.c index 26ad67f1962c..1c10ee512215 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c
@@ -2654,8 +2654,12 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
2654	/*	2654	/*
2655	* kswapd shrinks the zone by the number of pages required to reach	2655	* kswapd shrinks the zone by the number of pages required to reach
2656	* the high watermark.	2656	* the high watermark.
		2657	*
		2658	* Returns true if kswapd scanned at least the requested number of pages to
		2659	* reclaim. This is used to determine if the scanning priority needs to be
		2660	* raised.
2657	*/	2661	*/
2658	static void kswapd_shrink_zone(struct zone *zone,	2662	static bool kswapd_shrink_zone(struct zone *zone,
2659	struct scan_control *sc,	2663	struct scan_control *sc,
2660	unsigned long lru_pages)	2664	unsigned long lru_pages)
2661	{	2665	{
@@ -2675,6 +2679,8 @@ static void kswapd_shrink_zone(struct zone *zone,
2675		2679
2676	if (nr_slab == 0 && !zone_reclaimable(zone))	2680	if (nr_slab == 0 && !zone_reclaimable(zone))
2677	zone->all_unreclaimable = 1;	2681	zone->all_unreclaimable = 1;
		2682
		2683	return sc->nr_scanned >= sc->nr_to_reclaim;
2678	}	2684	}
2679		2685
2680	/*	2686	/*
@@ -2701,26 +2707,26 @@ static void kswapd_shrink_zone(struct zone *zone,
2701	static unsigned long balance_pgdat(pg_data_t *pgdat, int order,	2707	static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
2702	int *classzone_idx)	2708	int *classzone_idx)
2703	{	2709	{
2704	bool pgdat_is_balanced = false;
2705	int i;	2710	int i;
2706	int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */	2711	int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
2707	unsigned long nr_soft_reclaimed;	2712	unsigned long nr_soft_reclaimed;
2708	unsigned long nr_soft_scanned;	2713	unsigned long nr_soft_scanned;
2709	struct scan_control sc = {	2714	struct scan_control sc = {
2710	.gfp_mask = GFP_KERNEL,	2715	.gfp_mask = GFP_KERNEL,
		2716	.priority = DEF_PRIORITY,
2711	.may_unmap = 1,	2717	.may_unmap = 1,
2712	.may_swap = 1,	2718	.may_swap = 1,
		2719	.may_writepage = !laptop_mode,
2713	.order = order,	2720	.order = order,
2714	.target_mem_cgroup = NULL,	2721	.target_mem_cgroup = NULL,
2715	};	2722	};
2716	loop_again:
2717	sc.priority = DEF_PRIORITY;
2718	sc.nr_reclaimed = 0;
2719	sc.may_writepage = !laptop_mode;
2720	count_vm_event(PAGEOUTRUN);	2723	count_vm_event(PAGEOUTRUN);
2721		2724
2722	do {	2725	do {
2723	unsigned long lru_pages = 0;	2726	unsigned long lru_pages = 0;
		2727	bool raise_priority = true;
		2728
		2729	sc.nr_reclaimed = 0;
2724		2730
2725	/*	2731	/*
2726	* Scan in the highmem->dma direction for the highest	2732	* Scan in the highmem->dma direction for the highest
@@ -2762,10 +2768,8 @@ loop_again:
2762	}	2768	}
2763	}	2769	}
2764		2770
2765	if (i < 0) {	2771	if (i < 0)
2766	pgdat_is_balanced = true;
2767	goto out;	2772	goto out;
2768	}
2769		2773
2770	for (i = 0; i <= end_zone; i++) {	2774	for (i = 0; i <= end_zone; i++) {
2771	struct zone *zone = pgdat->node_zones + i;	2775	struct zone *zone = pgdat->node_zones + i;
@@ -2832,8 +2836,16 @@ loop_again:
2832		2836
2833	if ((buffer_heads_over_limit && is_highmem_idx(i)) \|\|	2837	if ((buffer_heads_over_limit && is_highmem_idx(i)) \|\|
2834	!zone_balanced(zone, testorder,	2838	!zone_balanced(zone, testorder,
2835	balance_gap, end_zone))	2839	balance_gap, end_zone)) {
2836	kswapd_shrink_zone(zone, &sc, lru_pages);	2840	/*
		2841	* There should be no need to raise the
		2842	* scanning priority if enough pages are
		2843	* already being scanned that high
		2844	* watermark would be met at 100% efficiency.
		2845	*/
		2846	if (kswapd_shrink_zone(zone, &sc, lru_pages))
		2847	raise_priority = false;
		2848	}
2837		2849
2838	/*	2850	/*
2839	* If we're getting trouble reclaiming, start doing	2851	* If we're getting trouble reclaiming, start doing
@@ -2868,46 +2880,29 @@ loop_again:
2868	pfmemalloc_watermark_ok(pgdat))	2880	pfmemalloc_watermark_ok(pgdat))
2869	wake_up(&pgdat->pfmemalloc_wait);	2881	wake_up(&pgdat->pfmemalloc_wait);
2870		2882
2871	if (pgdat_balanced(pgdat, order, *classzone_idx)) {
2872	pgdat_is_balanced = true;
2873	break; /* kswapd: all done */
2874	}
2875
2876	/*	2883	/*
2877	* We do this so kswapd doesn't build up large priorities for	2884	* Fragmentation may mean that the system cannot be rebalanced
2878	* example when it is freeing in parallel with allocators. It	2885	* for high-order allocations in all zones. If twice the
2879	* matches the direct reclaim path behaviour in terms of impact	2886	* allocation size has been reclaimed and the zones are still
2880	* on zone->*_priority.	2887	* not balanced then recheck the watermarks at order-0 to
		2888	* prevent kswapd reclaiming excessively. Assume that a
		2889	* process requested a high-order can direct reclaim/compact.
2881	*/	2890	*/
2882	if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)	2891	if (order && sc.nr_reclaimed >= 2UL << order)
2883	break;	2892	order = sc.order = 0;
2884	} while (--sc.priority >= 0);
2885
2886	out:
2887	if (!pgdat_is_balanced) {
2888	cond_resched();
2889		2893
2890	try_to_freeze();	2894	/* Check if kswapd should be suspending */
		2895	if (try_to_freeze() \|\| kthread_should_stop())
		2896	break;
2891		2897
2892	/*	2898	/*
2893	* Fragmentation may mean that the system cannot be	2899	* Raise priority if scanning rate is too low or there was no
2894	* rebalanced for high-order allocations in all zones.	2900	* progress in reclaiming pages
2895	* At this point, if nr_reclaimed < SWAP_CLUSTER_MAX,
2896	* it means the zones have been fully scanned and are still
2897	* not balanced. For high-order allocations, there is
2898	* little point trying all over again as kswapd may
2899	* infinite loop.
2900	*
2901	* Instead, recheck all watermarks at order-0 as they
2902	* are the most important. If watermarks are ok, kswapd will go
2903	* back to sleep. High-order users can still perform direct
2904	* reclaim if they wish.
2905	*/	2901	*/
2906	if (sc.nr_reclaimed < SWAP_CLUSTER_MAX)	2902	if (raise_priority \|\| !sc.nr_reclaimed)
2907	order = sc.order = 0;	2903	sc.priority--;
2908		2904	} while (sc.priority >= 0 &&
2909	goto loop_again;	2905	!pgdat_balanced(pgdat, order, *classzone_idx));
2910	}
2911		2906
2912	/*	2907	/*
2913	* If kswapd was reclaiming at a higher order, it has the option of	2908	* If kswapd was reclaiming at a higher order, it has the option of
@@ -2936,6 +2931,7 @@ out:
2936	compact_pgdat(pgdat, order);	2931	compact_pgdat(pgdat, order);
2937	}	2932	}
2938		2933
		2934	out:
2939	/*	2935	/*
2940	* Return the order we were reclaiming at so prepare_kswapd_sleep()	2936	* Return the order we were reclaiming at so prepare_kswapd_sleep()
2941	* makes a decision on the order we were last reclaiming at. However,	2937	* makes a decision on the order we were last reclaiming at. However,