vmscan: kill hibernation specific reclaim logic and unify it

shrink_all_zone() was introduced by commit d6277db4ab (swsusp: rework memory shrinker) for hibernate performance improvement. and sc.swap_cluster_max was introduced by commit a06fe4d307 (Speed freeing memory for suspend). commit a06fe4d307 said Without the patch: Freed 14600 pages in 1749 jiffies = 32.61 MB/s (Anomolous!) Freed 88563 pages in 14719 jiffies = 23.50 MB/s Freed 205734 pages in 32389 jiffies = 24.81 MB/s With the patch: Freed 68252 pages in 496 jiffies = 537.52 MB/s Freed 116464 pages in 569 jiffies = 798.54 MB/s Freed 209699 pages in 705 jiffies = 1161.89 MB/s At that time, their patch was pretty worth. However, Modern Hardware trend and recent VM improvement broke its worth. From several reason, I think we should remove shrink_all_zones() at all. detail: 1) Old days, shrink_zone()'s slowness was mainly caused by stupid io-throttle at no i/o congestion. but current shrink_zone() is sane, not slow. 2) shrink_all_zone() try to shrink all pages at a time. but it doesn't works fine on numa system. example) System has 4GB memory and each node have 2GB. and hibernate need 1GB. optimal) steal 500MB from each node. shrink_all_zones) steal 1GB from node-0. Oh, Cache balancing logic was broken. ;) Unfortunately, Desktop system moved ahead NUMA at nowadays. (Side note, if hibernate require 2GB, shrink_all_zones() never success on above machine) 3) if the node has several I/O flighting pages, shrink_all_zones() makes pretty bad result. schenario) hibernate need 1GB 1) shrink_all_zones() try to reclaim 1GB from Node-0 2) but it only reclaimed 990MB 3) stupidly, shrink_all_zones() try to reclaim 1GB from Node-1 4) it reclaimed 990MB Oh, well. it reclaimed twice much than required. In the other hand, current shrink_zone() has sane baling out logic. then, it doesn't make overkill reclaim. then, we lost shrink_zones()'s risk. 4) SplitLRU VM always keep active/inactive ratio very carefully. inactive list only shrinking break its assumption. it makes unnecessary OOM risk. it obviously suboptimal. Now, shrink_all_memory() is only the wrapper function of do_try_to_free_pages(). it bring good reviewability and debuggability, and solve above problems. side note: Reclaim logic unificication makes two good side effect. - Fix recursive reclaim bug on shrink_all_memory(). it did forgot to use PF_MEMALLOC. it mean the system be able to stuck into deadlock. - Now, shrink_all_memory() got lockdep awareness. it bring good debuggability. Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Reviewed-by: Rik van Riel <riel@redhat.com> Acked-by: Rafael J. Wysocki <rjw@sisk.pl> Cc: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> 2009-12-14 20:59:12 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2009-12-15 11:53:18 -0500
commit: 7b51755c3b38483b574d363d5ee587283c3f7999 (patch)
tree: 29ac6d3b73f0a28488dabff03a7df948c0333906 /mm/vmscan.c
parent: 22fba33545b731408deab6e96b6e231ee05fd10b (diff)
1 files changed, 26 insertions, 127 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7b0d5c784c7e..63bd521bb229 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -58,6 +58,8 @@ struct scan_control {
        /* How many pages shrink_list() should reclaim */
        unsigned long nr_to_reclaim;
+        unsigned long hibernation_mode;
        /* This context's GFP mask */
        gfp_t gfp_mask;
@@ -1796,7 +1798,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                }
                /* Take a nap, wait for some writeback to complete */
-                if (sc->nr_scanned && priority < DEF_PRIORITY - 2)
+                if (!sc->hibernation_mode && sc->nr_scanned &&
+                    priority < DEF_PRIORITY - 2)
                        congestion_wait(BLK_RW_ASYNC, HZ/10);
        }
        /* top priority shrink_zones still had more to do? don't OOM, then */
@@ -2336,148 +2339,44 @@ unsigned long zone_reclaimable_pages(struct zone *zone)
 #ifdef CONFIG_HIBERNATION
 /*
- * Helper function for shrink_all_memory().  Tries to reclaim 'nr_pages' pages
+ * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
- * from LRU lists system-wide, for given pass and priority.
- *
- * For pass > 3 we also try to shrink the LRU lists that contain a few pages
- */
-static void shrink_all_zones(unsigned long nr_pages, int prio,
-                                      int pass, struct scan_control *sc)
-{
-        struct zone *zone;
-        unsigned long nr_reclaimed = 0;
-        struct zone_reclaim_stat *reclaim_stat;
-        for_each_populated_zone(zone) {
-                enum lru_list l;
-                if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
-                        continue;
-                for_each_evictable_lru(l) {
-                        enum zone_stat_item ls = NR_LRU_BASE + l;
-                        unsigned long lru_pages = zone_page_state(zone, ls);
-                        /* For pass = 0, we don't shrink the active list */
-                        if (pass == 0 && (l == LRU_ACTIVE_ANON ||
-                                                l == LRU_ACTIVE_FILE))
-                                continue;
-                        reclaim_stat = get_reclaim_stat(zone, sc);
-                        reclaim_stat->nr_saved_scan[l] +=
-                                                (lru_pages >> prio) + 1;
-                        if (reclaim_stat->nr_saved_scan[l]
-                                                >= nr_pages || pass > 3) {
-                                unsigned long nr_to_scan;
-                                reclaim_stat->nr_saved_scan[l] = 0;
-                                nr_to_scan = min(nr_pages, lru_pages);
-                                nr_reclaimed += shrink_list(l, nr_to_scan, zone,
-                                                                sc, prio);
-                                if (nr_reclaimed >= nr_pages) {
-                                        sc->nr_reclaimed += nr_reclaimed;
-                                        return;
-                                }
-                        }
-                }
-        }
-        sc->nr_reclaimed += nr_reclaimed;
-}
-/*
- * Try to free `nr_pages' of memory, system-wide, and return the number of
 * freed pages.
 *
 * Rather than trying to age LRUs the aim is to preserve the overall
 * LRU order by reclaiming preferentially
 * inactive > active > active referenced > active mapped
 */
-unsigned long shrink_all_memory(unsigned long nr_pages)
+unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 {
-        unsigned long lru_pages, nr_slab;
-        int pass;
        struct reclaim_state reclaim_state;
        struct scan_control sc = {
-                .gfp_mask = GFP_KERNEL,
+                .gfp_mask = GFP_HIGHUSER_MOVABLE,
-                .may_unmap = 0,
+                .may_swap = 1,
+                .may_unmap = 1,
                .may_writepage = 1,
+                .swap_cluster_max = SWAP_CLUSTER_MAX,
+                .nr_to_reclaim = nr_to_reclaim,
+                .hibernation_mode = 1,
+                .swappiness = vm_swappiness,
+                .order = 0,
                .isolate_pages = isolate_pages_global,
-                .nr_reclaimed = 0,
        };
+        struct zonelist * zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
+        struct task_struct *p = current;
+        unsigned long nr_reclaimed;
-        current->reclaim_state = &reclaim_state;
+        p->flags |= PF_MEMALLOC;
+        lockdep_set_current_reclaim_state(sc.gfp_mask);
-        lru_pages = global_reclaimable_pages();
+        reclaim_state.reclaimed_slab = 0;
-        nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
+        p->reclaim_state = &reclaim_state;
-        /* If slab caches are huge, it's better to hit them first */
-        while (nr_slab >= lru_pages) {
-                reclaim_state.reclaimed_slab = 0;
-                shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
-                if (!reclaim_state.reclaimed_slab)
-                        break;
-                sc.nr_reclaimed += reclaim_state.reclaimed_slab;
-                if (sc.nr_reclaimed >= nr_pages)
-                        goto out;
-                nr_slab -= reclaim_state.reclaimed_slab;
-        }
-        /*
-         * We try to shrink LRUs in 5 passes:
-         * 0 = Reclaim from inactive_list only
-         * 1 = Reclaim from active list but don't reclaim mapped
-         * 2 = 2nd pass of type 1
-         * 3 = Reclaim mapped (normal reclaim)
-         * 4 = 2nd pass of type 3
-         */
-        for (pass = 0; pass < 5; pass++) {
-                int prio;
-                /* Force reclaiming mapped pages in the passes #3 and #4 */
-                if (pass > 2)
-                        sc.may_unmap = 1;
-                for (prio = DEF_PRIORITY; prio >= 0; prio--) {
-                        unsigned long nr_to_scan = nr_pages - sc.nr_reclaimed;
-                        sc.nr_scanned = 0;
-                        sc.swap_cluster_max = nr_to_scan;
-                        shrink_all_zones(nr_to_scan, prio, pass, &sc);
-                        if (sc.nr_reclaimed >= nr_pages)
-                                goto out;
-                        reclaim_state.reclaimed_slab = 0;
-                        shrink_slab(sc.nr_scanned, sc.gfp_mask,
-                                    global_reclaimable_pages());
-                        sc.nr_reclaimed += reclaim_state.reclaimed_slab;
-                        if (sc.nr_reclaimed >= nr_pages)
-                                goto out;
-                        if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
-                                congestion_wait(BLK_RW_ASYNC, HZ / 10);
-                }
-        }
-        /*
-         * If sc.nr_reclaimed = 0, we could not shrink LRUs, but there may be
-         * something in slab caches
-         */
-        if (!sc.nr_reclaimed) {
-                do {
-                        reclaim_state.reclaimed_slab = 0;
-                        shrink_slab(nr_pages, sc.gfp_mask,
-                                    global_reclaimable_pages());
-                        sc.nr_reclaimed += reclaim_state.reclaimed_slab;
-                } while (sc.nr_reclaimed < nr_pages &&
-                                reclaim_state.reclaimed_slab > 0);
-        }
+        nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
-out:
+        p->reclaim_state = NULL;
-        current->reclaim_state = NULL;
+        lockdep_clear_current_reclaim_state();
+        p->flags &= ~PF_MEMALLOC;
-        return sc.nr_reclaimed;
+        return nr_reclaimed;
 }
 #endif /* CONFIG_HIBERNATION */
author	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>	2009-12-14 20:59:12 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2009-12-15 11:53:18 -0500
commit	7b51755c3b38483b574d363d5ee587283c3f7999 (patch)
tree	29ac6d3b73f0a28488dabff03a7df948c0333906 /mm/vmscan.c
parent	22fba33545b731408deab6e96b6e231ee05fd10b (diff)

diff --git a/mm/vmscan.c b/mm/vmscan.c index 7b0d5c784c7e..63bd521bb229 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c
@@ -58,6 +58,8 @@ struct scan_control {
58	/* How many pages shrink_list() should reclaim */	58	/* How many pages shrink_list() should reclaim */
59	unsigned long nr_to_reclaim;	59	unsigned long nr_to_reclaim;
60		60
		61	unsigned long hibernation_mode;
		62
61	/* This context's GFP mask */	63	/* This context's GFP mask */
62	gfp_t gfp_mask;	64	gfp_t gfp_mask;
63		65
@@ -1796,7 +1798,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1796	}	1798	}
1797		1799
1798	/* Take a nap, wait for some writeback to complete */	1800	/* Take a nap, wait for some writeback to complete */
1799	if (sc->nr_scanned && priority < DEF_PRIORITY - 2)	1801	if (!sc->hibernation_mode && sc->nr_scanned &&
		1802	priority < DEF_PRIORITY - 2)
1800	congestion_wait(BLK_RW_ASYNC, HZ/10);	1803	congestion_wait(BLK_RW_ASYNC, HZ/10);
1801	}	1804	}
1802	/* top priority shrink_zones still had more to do? don't OOM, then */	1805	/* top priority shrink_zones still had more to do? don't OOM, then */
@@ -2336,148 +2339,44 @@ unsigned long zone_reclaimable_pages(struct zone *zone)
2336		2339
2337	#ifdef CONFIG_HIBERNATION	2340	#ifdef CONFIG_HIBERNATION
2338	/*	2341	/*
2339	* Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages	2342	* Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
2340	* from LRU lists system-wide, for given pass and priority.
2341	*
2342	* For pass > 3 we also try to shrink the LRU lists that contain a few pages
2343	*/
2344	static void shrink_all_zones(unsigned long nr_pages, int prio,
2345	int pass, struct scan_control *sc)
2346	{
2347	struct zone *zone;
2348	unsigned long nr_reclaimed = 0;
2349	struct zone_reclaim_stat *reclaim_stat;
2350
2351	for_each_populated_zone(zone) {
2352	enum lru_list l;
2353
2354	if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
2355	continue;
2356
2357	for_each_evictable_lru(l) {
2358	enum zone_stat_item ls = NR_LRU_BASE + l;
2359	unsigned long lru_pages = zone_page_state(zone, ls);
2360
2361	/* For pass = 0, we don't shrink the active list */
2362	if (pass == 0 && (l == LRU_ACTIVE_ANON \|\|
2363	l == LRU_ACTIVE_FILE))
2364	continue;
2365
2366	reclaim_stat = get_reclaim_stat(zone, sc);
2367	reclaim_stat->nr_saved_scan[l] +=
2368	(lru_pages >> prio) + 1;
2369	if (reclaim_stat->nr_saved_scan[l]
2370	>= nr_pages \|\| pass > 3) {
2371	unsigned long nr_to_scan;
2372
2373	reclaim_stat->nr_saved_scan[l] = 0;
2374	nr_to_scan = min(nr_pages, lru_pages);
2375	nr_reclaimed += shrink_list(l, nr_to_scan, zone,
2376	sc, prio);
2377	if (nr_reclaimed >= nr_pages) {
2378	sc->nr_reclaimed += nr_reclaimed;
2379	return;
2380	}
2381	}
2382	}
2383	}
2384	sc->nr_reclaimed += nr_reclaimed;
2385	}
2386
2387	/*
2388	* Try to free `nr_pages' of memory, system-wide, and return the number of
2389	* freed pages.	2343	* freed pages.
2390	*	2344	*
2391	* Rather than trying to age LRUs the aim is to preserve the overall	2345	* Rather than trying to age LRUs the aim is to preserve the overall
2392	* LRU order by reclaiming preferentially	2346	* LRU order by reclaiming preferentially
2393	* inactive > active > active referenced > active mapped	2347	* inactive > active > active referenced > active mapped
2394	*/	2348	*/
2395	unsigned long shrink_all_memory(unsigned long nr_pages)	2349	unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
2396	{	2350	{
2397	unsigned long lru_pages, nr_slab;
2398	int pass;
2399	struct reclaim_state reclaim_state;	2351	struct reclaim_state reclaim_state;
2400	struct scan_control sc = {	2352	struct scan_control sc = {
2401	.gfp_mask = GFP_KERNEL,	2353	.gfp_mask = GFP_HIGHUSER_MOVABLE,
2402	.may_unmap = 0,	2354	.may_swap = 1,
		2355	.may_unmap = 1,
2403	.may_writepage = 1,	2356	.may_writepage = 1,
		2357	.swap_cluster_max = SWAP_CLUSTER_MAX,
		2358	.nr_to_reclaim = nr_to_reclaim,
		2359	.hibernation_mode = 1,
		2360	.swappiness = vm_swappiness,
		2361	.order = 0,
2404	.isolate_pages = isolate_pages_global,	2362	.isolate_pages = isolate_pages_global,
2405	.nr_reclaimed = 0,
2406	};	2363	};
		2364	struct zonelist * zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
		2365	struct task_struct *p = current;
		2366	unsigned long nr_reclaimed;
2407		2367
2408	current->reclaim_state = &reclaim_state;	2368	p->flags \|= PF_MEMALLOC;
2409		2369	lockdep_set_current_reclaim_state(sc.gfp_mask);
2410	lru_pages = global_reclaimable_pages();	2370	reclaim_state.reclaimed_slab = 0;
2411	nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);	2371	p->reclaim_state = &reclaim_state;
2412	/* If slab caches are huge, it's better to hit them first */
2413	while (nr_slab >= lru_pages) {
2414	reclaim_state.reclaimed_slab = 0;
2415	shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
2416	if (!reclaim_state.reclaimed_slab)
2417	break;
2418
2419	sc.nr_reclaimed += reclaim_state.reclaimed_slab;
2420	if (sc.nr_reclaimed >= nr_pages)
2421	goto out;
2422
2423	nr_slab -= reclaim_state.reclaimed_slab;
2424	}
2425
2426	/*
2427	* We try to shrink LRUs in 5 passes:
2428	* 0 = Reclaim from inactive_list only
2429	* 1 = Reclaim from active list but don't reclaim mapped
2430	* 2 = 2nd pass of type 1
2431	* 3 = Reclaim mapped (normal reclaim)
2432	* 4 = 2nd pass of type 3
2433	*/
2434	for (pass = 0; pass < 5; pass++) {
2435	int prio;
2436
2437	/* Force reclaiming mapped pages in the passes #3 and #4 */
2438	if (pass > 2)
2439	sc.may_unmap = 1;
2440
2441	for (prio = DEF_PRIORITY; prio >= 0; prio--) {
2442	unsigned long nr_to_scan = nr_pages - sc.nr_reclaimed;
2443
2444	sc.nr_scanned = 0;
2445	sc.swap_cluster_max = nr_to_scan;
2446	shrink_all_zones(nr_to_scan, prio, pass, &sc);
2447	if (sc.nr_reclaimed >= nr_pages)
2448	goto out;
2449
2450	reclaim_state.reclaimed_slab = 0;
2451	shrink_slab(sc.nr_scanned, sc.gfp_mask,
2452	global_reclaimable_pages());
2453	sc.nr_reclaimed += reclaim_state.reclaimed_slab;
2454	if (sc.nr_reclaimed >= nr_pages)
2455	goto out;
2456
2457	if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
2458	congestion_wait(BLK_RW_ASYNC, HZ / 10);
2459	}
2460	}
2461
2462	/*
2463	* If sc.nr_reclaimed = 0, we could not shrink LRUs, but there may be
2464	* something in slab caches
2465	*/
2466	if (!sc.nr_reclaimed) {
2467	do {
2468	reclaim_state.reclaimed_slab = 0;
2469	shrink_slab(nr_pages, sc.gfp_mask,
2470	global_reclaimable_pages());
2471	sc.nr_reclaimed += reclaim_state.reclaimed_slab;
2472	} while (sc.nr_reclaimed < nr_pages &&
2473	reclaim_state.reclaimed_slab > 0);
2474	}
2475		2372
		2373	nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
2476		2374
2477	out:	2375	p->reclaim_state = NULL;
2478	current->reclaim_state = NULL;	2376	lockdep_clear_current_reclaim_state();
		2377	p->flags &= ~PF_MEMALLOC;
2479		2378
2480	return sc.nr_reclaimed;	2379	return nr_reclaimed;
2481	}	2380	}
2482	#endif /* CONFIG_HIBERNATION */	2381	#endif /* CONFIG_HIBERNATION */
2483		2382