2 files changed, 172 insertions, 57 deletions
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index c4016cbbd3e0..f9238faf76e4 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -175,6 +175,12 @@ void free_all_swap_pages(int swap, struct bitmap_page *bitmap)
 */
 #define SHRINK_BITE     10000
+static inline unsigned long __shrink_memory(long tmp)
+{
+        if (tmp > SHRINK_BITE)
+                tmp = SHRINK_BITE;
+        return shrink_all_memory(tmp);
+}
 int swsusp_shrink_memory(void)
 {
@@ -195,12 +201,12 @@ int swsusp_shrink_memory(void)
                        if (!is_highmem(zone))
                                tmp -= zone->free_pages;
                if (tmp > 0) {
-                        tmp = shrink_all_memory(SHRINK_BITE);
+                        tmp = __shrink_memory(tmp);
                        if (!tmp)
                                return -ENOMEM;
                        pages += tmp;
                } else if (size > image_size / PAGE_SIZE) {
-                        tmp = shrink_all_memory(SHRINK_BITE);
+                        tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
                        pages += tmp;
                }
                printk("\b%c", p[i++%4]);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 440a733fe2e9..46be8a02280e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -61,6 +61,8 @@ struct scan_control {
         * In this context, it doesn't matter that we scan the
         * whole list at once. */
        int swap_cluster_max;
+        int swappiness;
 };
 /*
@@ -741,7 +743,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
                 * A 100% value of vm_swappiness overrides this algorithm
                 * altogether.
                 */
-                swap_tendency = mapped_ratio / 2 + distress + vm_swappiness;
+                swap_tendency = mapped_ratio / 2 + distress + sc->swappiness;
                /*
                 * Now use this metric to decide whether to start moving mapped
@@ -957,6 +959,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
                .may_writepage = !laptop_mode,
                .swap_cluster_max = SWAP_CLUSTER_MAX,
                .may_swap = 1,
+                .swappiness = vm_swappiness,
        };
        inc_page_state(allocstall);
@@ -1021,10 +1024,6 @@ out:
 * For kswapd, balance_pgdat() will work across all this node's zones until
 * they are all at pages_high.
 *
- * If `nr_pages' is non-zero then it is the number of pages which are to be
- * reclaimed, regardless of the zone occupancies.  This is a software suspend
- * special.
- *
 * Returns the number of pages which were actually freed.
 *
 * There is special handling here for zones which are full of pinned pages.
@@ -1042,10 +1041,8 @@ out:
 * the page allocator fallback scheme to ensure that aging of pages is balanced
 * across the zones.
 */
-static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages,
+static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
-                                int order)
 {
-        unsigned long to_free = nr_pages;
        int all_zones_ok;
        int priority;
        int i;
@@ -1055,7 +1052,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages,
        struct scan_control sc = {
                .gfp_mask = GFP_KERNEL,
                .may_swap = 1,
-                .swap_cluster_max = nr_pages ? nr_pages : SWAP_CLUSTER_MAX,
+                .swap_cluster_max = SWAP_CLUSTER_MAX,
+                .swappiness = vm_swappiness,
        };
 loop_again:
@@ -1082,31 +1080,26 @@ loop_again:
                all_zones_ok = 1;
-                if (nr_pages == 0) {
+                /*
-                        /*
+                 * Scan in the highmem->dma direction for the highest
-                         * Scan in the highmem->dma direction for the highest
+                 * zone which needs scanning
-                         * zone which needs scanning
+                 */
-                         */
+                for (i = pgdat->nr_zones - 1; i >= 0; i--) {
-                        for (i = pgdat->nr_zones - 1; i >= 0; i--) {
+                        struct zone *zone = pgdat->node_zones + i;
-                                struct zone *zone = pgdat->node_zones + i;
-                                if (!populated_zone(zone))
+                        if (!populated_zone(zone))
-                                        continue;
+                                continue;
-                                if (zone->all_unreclaimable &&
+                        if (zone->all_unreclaimable && priority != DEF_PRIORITY)
-                                                priority != DEF_PRIORITY)
+                                continue;
-                                        continue;
-                                if (!zone_watermark_ok(zone, order,
+                        if (!zone_watermark_ok(zone, order, zone->pages_high,
-                                                zone->pages_high, 0, 0)) {
+                                               0, 0)) {
-                                        end_zone = i;
+                                end_zone = i;
-                                        goto scan;
+                                goto scan;
-                                }
                        }
-                        goto out;
-                } else {
-                        end_zone = pgdat->nr_zones - 1;
                }
+                goto out;
 scan:
                for (i = 0; i <= end_zone; i++) {
                        struct zone *zone = pgdat->node_zones + i;
@@ -1133,11 +1126,9 @@ scan:
                        if (zone->all_unreclaimable && priority != DEF_PRIORITY)
                                continue;
-                        if (nr_pages == 0) {    /* Not software suspend */
+                        if (!zone_watermark_ok(zone, order, zone->pages_high,
-                                if (!zone_watermark_ok(zone, order,
+                                               end_zone, 0))
-                                                zone->pages_high, end_zone, 0))
+                                all_zones_ok = 0;
-                                        all_zones_ok = 0;
-                        }
                        zone->temp_priority = priority;
                        if (zone->prev_priority > priority)
                                zone->prev_priority = priority;
@@ -1162,8 +1153,6 @@ scan:
                            total_scanned > nr_reclaimed + nr_reclaimed / 2)
                                sc.may_writepage = 1;
                }
-                if (nr_pages && to_free > nr_reclaimed)
-                        continue;       /* swsusp: need to do more work */
                if (all_zones_ok)
                        break;          /* kswapd: all done */
                /*
@@ -1179,7 +1168,7 @@ scan:
                 * matches the direct reclaim path behaviour in terms of impact
                 * on zone->*_priority.
                 */
-                if ((nr_reclaimed >= SWAP_CLUSTER_MAX) && !nr_pages)
+                if (nr_reclaimed >= SWAP_CLUSTER_MAX)
                        break;
        }
 out:
@@ -1261,7 +1250,7 @@ static int kswapd(void *p)
                }
                finish_wait(&pgdat->kswapd_wait, &wait);
-                balance_pgdat(pgdat, 0, order);
+                balance_pgdat(pgdat, order);
        }
        return 0;
 }
@@ -1290,35 +1279,154 @@ void wakeup_kswapd(struct zone *zone, int order)
 #ifdef CONFIG_PM
 /*
- * Try to free `nr_pages' of memory, system-wide.  Returns the number of freed
+ * Helper function for shrink_all_memory().  Tries to reclaim 'nr_pages' pages
- * pages.
+ * from LRU lists system-wide, for given pass and priority, and returns the
+ * number of reclaimed pages
+ *
+ * For pass > 3 we also try to shrink the LRU lists that contain a few pages
+ */
+static unsigned long shrink_all_zones(unsigned long nr_pages, int pass,
+                                      int prio, struct scan_control *sc)
+{
+        struct zone *zone;
+        unsigned long nr_to_scan, ret = 0;
+        for_each_zone(zone) {
+                if (!populated_zone(zone))
+                        continue;
+                if (zone->all_unreclaimable && prio != DEF_PRIORITY)
+                        continue;
+                /* For pass = 0 we don't shrink the active list */
+                if (pass > 0) {
+                        zone->nr_scan_active += (zone->nr_active >> prio) + 1;
+                        if (zone->nr_scan_active >= nr_pages || pass > 3) {
+                                zone->nr_scan_active = 0;
+                                nr_to_scan = min(nr_pages, zone->nr_active);
+                                shrink_active_list(nr_to_scan, zone, sc);
+                        }
+                }
+                zone->nr_scan_inactive += (zone->nr_inactive >> prio) + 1;
+                if (zone->nr_scan_inactive >= nr_pages || pass > 3) {
+                        zone->nr_scan_inactive = 0;
+                        nr_to_scan = min(nr_pages, zone->nr_inactive);
+                        ret += shrink_inactive_list(nr_to_scan, zone, sc);
+                        if (ret >= nr_pages)
+                                return ret;
+                }
+        }
+        return ret;
+}
+/*
+ * Try to free `nr_pages' of memory, system-wide, and return the number of
+ * freed pages.
+ *
+ * Rather than trying to age LRUs the aim is to preserve the overall
+ * LRU order by reclaiming preferentially
+ * inactive > active > active referenced > active mapped
 */
 unsigned long shrink_all_memory(unsigned long nr_pages)
 {
-        pg_data_t *pgdat;
+        unsigned long lru_pages, nr_slab;
-        unsigned long nr_to_free = nr_pages;
        unsigned long ret = 0;
-        unsigned retry = 2;
+        int pass;
-        struct reclaim_state reclaim_state = {
+        struct reclaim_state reclaim_state;
-                .reclaimed_slab = 0,
+        struct zone *zone;
+        struct scan_control sc = {
+                .gfp_mask = GFP_KERNEL,
+                .may_swap = 0,
+                .swap_cluster_max = nr_pages,
+                .may_writepage = 1,
+                .swappiness = vm_swappiness,
        };
        current->reclaim_state = &reclaim_state;
-repeat:
-        for_each_online_pgdat(pgdat) {
-                unsigned long freed;
-                freed = balance_pgdat(pgdat, nr_to_free, 0);
+        lru_pages = 0;
-                ret += freed;
+        for_each_zone(zone)
-                nr_to_free -= freed;
+                lru_pages += zone->nr_active + zone->nr_inactive;
-                if ((long)nr_to_free <= 0)
+        nr_slab = read_page_state(nr_slab);
+        /* If slab caches are huge, it's better to hit them first */
+        while (nr_slab >= lru_pages) {
+                reclaim_state.reclaimed_slab = 0;
+                shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
+                if (!reclaim_state.reclaimed_slab)
                        break;
+                ret += reclaim_state.reclaimed_slab;
+                if (ret >= nr_pages)
+                        goto out;
+                nr_slab -= reclaim_state.reclaimed_slab;
        }
-        if (retry-- && ret < nr_pages) {
-                blk_congestion_wait(WRITE, HZ/5);
+        /*
-                goto repeat;
+         * We try to shrink LRUs in 5 passes:
+         * 0 = Reclaim from inactive_list only
+         * 1 = Reclaim from active list but don't reclaim mapped
+         * 2 = 2nd pass of type 1
+         * 3 = Reclaim mapped (normal reclaim)
+         * 4 = 2nd pass of type 3
+         */
+        for (pass = 0; pass < 5; pass++) {
+                int prio;
+                /* Needed for shrinking slab caches later on */
+                if (!lru_pages)
+                        for_each_zone(zone) {
+                                lru_pages += zone->nr_active;
+                                lru_pages += zone->nr_inactive;
+                        }
+                /* Force reclaiming mapped pages in the passes #3 and #4 */
+                if (pass > 2) {
+                        sc.may_swap = 1;
+                        sc.swappiness = 100;
+                }
+                for (prio = DEF_PRIORITY; prio >= 0; prio--) {
+                        unsigned long nr_to_scan = nr_pages - ret;
+                        sc.nr_mapped = read_page_state(nr_mapped);
+                        sc.nr_scanned = 0;
+                        ret += shrink_all_zones(nr_to_scan, prio, pass, &sc);
+                        if (ret >= nr_pages)
+                                goto out;
+                        reclaim_state.reclaimed_slab = 0;
+                        shrink_slab(sc.nr_scanned, sc.gfp_mask, lru_pages);
+                        ret += reclaim_state.reclaimed_slab;
+                        if (ret >= nr_pages)
+                                goto out;
+                        if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
+                                blk_congestion_wait(WRITE, HZ / 10);
+                }
+                lru_pages = 0;
        }
+        /*
+         * If ret = 0, we could not shrink LRUs, but there may be something
+         * in slab caches
+         */
+        if (!ret)
+                do {
+                        reclaim_state.reclaimed_slab = 0;
+                        shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
+                        ret += reclaim_state.reclaimed_slab;
+                } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
+out:
        current->reclaim_state = NULL;
        return ret;
 }
 #endif
@@ -1416,6 +1524,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
                .swap_cluster_max = max_t(unsigned long, nr_pages,
                                        SWAP_CLUSTER_MAX),
                .gfp_mask = gfp_mask,
+                .swappiness = vm_swappiness,
        };
        disable_swap_token();

diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index c4016cbbd3e0..f9238faf76e4 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c
@@ -175,6 +175,12 @@ void free_all_swap_pages(int swap, struct bitmap_page *bitmap)
175	*/	175	*/
176		176
177	#define SHRINK_BITE 10000	177	#define SHRINK_BITE 10000
		178	static inline unsigned long __shrink_memory(long tmp)
		179	{
		180	if (tmp > SHRINK_BITE)
		181	tmp = SHRINK_BITE;
		182	return shrink_all_memory(tmp);
		183	}
178		184
179	int swsusp_shrink_memory(void)	185	int swsusp_shrink_memory(void)
180	{	186	{
@@ -195,12 +201,12 @@ int swsusp_shrink_memory(void)
195	if (!is_highmem(zone))	201	if (!is_highmem(zone))
196	tmp -= zone->free_pages;	202	tmp -= zone->free_pages;
197	if (tmp > 0) {	203	if (tmp > 0) {
198	tmp = shrink_all_memory(SHRINK_BITE);	204	tmp = __shrink_memory(tmp);
199	if (!tmp)	205	if (!tmp)
200	return -ENOMEM;	206	return -ENOMEM;
201	pages += tmp;	207	pages += tmp;
202	} else if (size > image_size / PAGE_SIZE) {	208	} else if (size > image_size / PAGE_SIZE) {
203	tmp = shrink_all_memory(SHRINK_BITE);	209	tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
204	pages += tmp;	210	pages += tmp;
205	}	211	}
206	printk("\b%c", p[i++%4]);	212	printk("\b%c", p[i++%4]);


diff --git a/mm/vmscan.c b/mm/vmscan.c index 440a733fe2e9..46be8a02280e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c
@@ -61,6 +61,8 @@ struct scan_control {
61	* In this context, it doesn't matter that we scan the	61	* In this context, it doesn't matter that we scan the
62	* whole list at once. */	62	* whole list at once. */
63	int swap_cluster_max;	63	int swap_cluster_max;
		64
		65	int swappiness;
64	};	66	};
65		67
66	/*	68	/*
@@ -741,7 +743,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
741	* A 100% value of vm_swappiness overrides this algorithm	743	* A 100% value of vm_swappiness overrides this algorithm
742	* altogether.	744	* altogether.
743	*/	745	*/
744	swap_tendency = mapped_ratio / 2 + distress + vm_swappiness;	746	swap_tendency = mapped_ratio / 2 + distress + sc->swappiness;
745		747
746	/*	748	/*
747	* Now use this metric to decide whether to start moving mapped	749	* Now use this metric to decide whether to start moving mapped
@@ -957,6 +959,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
957	.may_writepage = !laptop_mode,	959	.may_writepage = !laptop_mode,
958	.swap_cluster_max = SWAP_CLUSTER_MAX,	960	.swap_cluster_max = SWAP_CLUSTER_MAX,
959	.may_swap = 1,	961	.may_swap = 1,
		962	.swappiness = vm_swappiness,
960	};	963	};
961		964
962	inc_page_state(allocstall);	965	inc_page_state(allocstall);
@@ -1021,10 +1024,6 @@ out:
1021	* For kswapd, balance_pgdat() will work across all this node's zones until	1024	* For kswapd, balance_pgdat() will work across all this node's zones until
1022	* they are all at pages_high.	1025	* they are all at pages_high.
1023	*	1026	*
1024	* If `nr_pages' is non-zero then it is the number of pages which are to be
1025	* reclaimed, regardless of the zone occupancies. This is a software suspend
1026	* special.
1027	*
1028	* Returns the number of pages which were actually freed.	1027	* Returns the number of pages which were actually freed.
1029	*	1028	*
1030	* There is special handling here for zones which are full of pinned pages.	1029	* There is special handling here for zones which are full of pinned pages.
@@ -1042,10 +1041,8 @@ out:
1042	* the page allocator fallback scheme to ensure that aging of pages is balanced	1041	* the page allocator fallback scheme to ensure that aging of pages is balanced
1043	* across the zones.	1042	* across the zones.
1044	*/	1043	*/
1045	static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages,	1044	static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
1046	int order)
1047	{	1045	{
1048	unsigned long to_free = nr_pages;
1049	int all_zones_ok;	1046	int all_zones_ok;
1050	int priority;	1047	int priority;
1051	int i;	1048	int i;
@@ -1055,7 +1052,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages,
1055	struct scan_control sc = {	1052	struct scan_control sc = {
1056	.gfp_mask = GFP_KERNEL,	1053	.gfp_mask = GFP_KERNEL,
1057	.may_swap = 1,	1054	.may_swap = 1,
1058	.swap_cluster_max = nr_pages ? nr_pages : SWAP_CLUSTER_MAX,	1055	.swap_cluster_max = SWAP_CLUSTER_MAX,
		1056	.swappiness = vm_swappiness,
1059	};	1057	};
1060		1058
1061	loop_again:	1059	loop_again:
@@ -1082,31 +1080,26 @@ loop_again:
1082		1080
1083	all_zones_ok = 1;	1081	all_zones_ok = 1;
1084		1082
1085	if (nr_pages == 0) {	1083	/*
1086	/*	1084	* Scan in the highmem->dma direction for the highest
1087	* Scan in the highmem->dma direction for the highest	1085	* zone which needs scanning
1088	* zone which needs scanning	1086	*/
1089	*/	1087	for (i = pgdat->nr_zones - 1; i >= 0; i--) {
1090	for (i = pgdat->nr_zones - 1; i >= 0; i--) {	1088	struct zone *zone = pgdat->node_zones + i;
1091	struct zone *zone = pgdat->node_zones + i;
1092		1089
1093	if (!populated_zone(zone))	1090	if (!populated_zone(zone))
1094	continue;	1091	continue;
1095		1092
1096	if (zone->all_unreclaimable &&	1093	if (zone->all_unreclaimable && priority != DEF_PRIORITY)
1097	priority != DEF_PRIORITY)	1094	continue;
1098	continue;
1099		1095
1100	if (!zone_watermark_ok(zone, order,	1096	if (!zone_watermark_ok(zone, order, zone->pages_high,
1101	zone->pages_high, 0, 0)) {	1097	0, 0)) {
1102	end_zone = i;	1098	end_zone = i;
1103	goto scan;	1099	goto scan;
1104	}
1105	}	1100	}
1106	goto out;
1107	} else {
1108	end_zone = pgdat->nr_zones - 1;
1109	}	1101	}
		1102	goto out;
1110	scan:	1103	scan:
1111	for (i = 0; i <= end_zone; i++) {	1104	for (i = 0; i <= end_zone; i++) {
1112	struct zone *zone = pgdat->node_zones + i;	1105	struct zone *zone = pgdat->node_zones + i;
@@ -1133,11 +1126,9 @@ scan:
1133	if (zone->all_unreclaimable && priority != DEF_PRIORITY)	1126	if (zone->all_unreclaimable && priority != DEF_PRIORITY)
1134	continue;	1127	continue;
1135		1128
1136	if (nr_pages == 0) { /* Not software suspend */	1129	if (!zone_watermark_ok(zone, order, zone->pages_high,
1137	if (!zone_watermark_ok(zone, order,	1130	end_zone, 0))
1138	zone->pages_high, end_zone, 0))	1131	all_zones_ok = 0;
1139	all_zones_ok = 0;
1140	}
1141	zone->temp_priority = priority;	1132	zone->temp_priority = priority;
1142	if (zone->prev_priority > priority)	1133	if (zone->prev_priority > priority)
1143	zone->prev_priority = priority;	1134	zone->prev_priority = priority;
@@ -1162,8 +1153,6 @@ scan:
1162	total_scanned > nr_reclaimed + nr_reclaimed / 2)	1153	total_scanned > nr_reclaimed + nr_reclaimed / 2)
1163	sc.may_writepage = 1;	1154	sc.may_writepage = 1;
1164	}	1155	}
1165	if (nr_pages && to_free > nr_reclaimed)
1166	continue; /* swsusp: need to do more work */
1167	if (all_zones_ok)	1156	if (all_zones_ok)
1168	break; /* kswapd: all done */	1157	break; /* kswapd: all done */
1169	/*	1158	/*
@@ -1179,7 +1168,7 @@ scan:
1179	* matches the direct reclaim path behaviour in terms of impact	1168	* matches the direct reclaim path behaviour in terms of impact
1180	* on zone->*_priority.	1169	* on zone->*_priority.
1181	*/	1170	*/
1182	if ((nr_reclaimed >= SWAP_CLUSTER_MAX) && !nr_pages)	1171	if (nr_reclaimed >= SWAP_CLUSTER_MAX)
1183	break;	1172	break;
1184	}	1173	}
1185	out:	1174	out:
@@ -1261,7 +1250,7 @@ static int kswapd(void *p)
1261	}	1250	}
1262	finish_wait(&pgdat->kswapd_wait, &wait);	1251	finish_wait(&pgdat->kswapd_wait, &wait);
1263		1252
1264	balance_pgdat(pgdat, 0, order);	1253	balance_pgdat(pgdat, order);
1265	}	1254	}
1266	return 0;	1255	return 0;
1267	}	1256	}
@@ -1290,35 +1279,154 @@ void wakeup_kswapd(struct zone *zone, int order)
1290		1279
1291	#ifdef CONFIG_PM	1280	#ifdef CONFIG_PM
1292	/*	1281	/*
1293	* Try to free `nr_pages' of memory, system-wide. Returns the number of freed	1282	* Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages
1294	* pages.	1283	* from LRU lists system-wide, for given pass and priority, and returns the
		1284	* number of reclaimed pages
		1285	*
		1286	* For pass > 3 we also try to shrink the LRU lists that contain a few pages
		1287	*/
		1288	static unsigned long shrink_all_zones(unsigned long nr_pages, int pass,
		1289	int prio, struct scan_control *sc)
		1290	{
		1291	struct zone *zone;
		1292	unsigned long nr_to_scan, ret = 0;
		1293
		1294	for_each_zone(zone) {
		1295
		1296	if (!populated_zone(zone))
		1297	continue;
		1298
		1299	if (zone->all_unreclaimable && prio != DEF_PRIORITY)
		1300	continue;
		1301
		1302	/* For pass = 0 we don't shrink the active list */
		1303	if (pass > 0) {
		1304	zone->nr_scan_active += (zone->nr_active >> prio) + 1;
		1305	if (zone->nr_scan_active >= nr_pages \|\| pass > 3) {
		1306	zone->nr_scan_active = 0;
		1307	nr_to_scan = min(nr_pages, zone->nr_active);
		1308	shrink_active_list(nr_to_scan, zone, sc);
		1309	}
		1310	}
		1311
		1312	zone->nr_scan_inactive += (zone->nr_inactive >> prio) + 1;
		1313	if (zone->nr_scan_inactive >= nr_pages \|\| pass > 3) {
		1314	zone->nr_scan_inactive = 0;
		1315	nr_to_scan = min(nr_pages, zone->nr_inactive);
		1316	ret += shrink_inactive_list(nr_to_scan, zone, sc);
		1317	if (ret >= nr_pages)
		1318	return ret;
		1319	}
		1320	}
		1321
		1322	return ret;
		1323	}
		1324
		1325	/*
		1326	* Try to free `nr_pages' of memory, system-wide, and return the number of
		1327	* freed pages.
		1328	*
		1329	* Rather than trying to age LRUs the aim is to preserve the overall
		1330	* LRU order by reclaiming preferentially
		1331	* inactive > active > active referenced > active mapped
1295	*/	1332	*/
1296	unsigned long shrink_all_memory(unsigned long nr_pages)	1333	unsigned long shrink_all_memory(unsigned long nr_pages)
1297	{	1334	{
1298	pg_data_t *pgdat;	1335	unsigned long lru_pages, nr_slab;
1299	unsigned long nr_to_free = nr_pages;
1300	unsigned long ret = 0;	1336	unsigned long ret = 0;
1301	unsigned retry = 2;	1337	int pass;
1302	struct reclaim_state reclaim_state = {	1338	struct reclaim_state reclaim_state;
1303	.reclaimed_slab = 0,	1339	struct zone *zone;
		1340	struct scan_control sc = {
		1341	.gfp_mask = GFP_KERNEL,
		1342	.may_swap = 0,
		1343	.swap_cluster_max = nr_pages,
		1344	.may_writepage = 1,
		1345	.swappiness = vm_swappiness,
1304	};	1346	};
1305		1347
1306	current->reclaim_state = &reclaim_state;	1348	current->reclaim_state = &reclaim_state;
1307	repeat:
1308	for_each_online_pgdat(pgdat) {
1309	unsigned long freed;
1310		1349
1311	freed = balance_pgdat(pgdat, nr_to_free, 0);	1350	lru_pages = 0;
1312	ret += freed;	1351	for_each_zone(zone)
1313	nr_to_free -= freed;	1352	lru_pages += zone->nr_active + zone->nr_inactive;
1314	if ((long)nr_to_free <= 0)	1353
		1354	nr_slab = read_page_state(nr_slab);
		1355	/* If slab caches are huge, it's better to hit them first */
		1356	while (nr_slab >= lru_pages) {
		1357	reclaim_state.reclaimed_slab = 0;
		1358	shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
		1359	if (!reclaim_state.reclaimed_slab)
1315	break;	1360	break;
		1361
		1362	ret += reclaim_state.reclaimed_slab;
		1363	if (ret >= nr_pages)
		1364	goto out;
		1365
		1366	nr_slab -= reclaim_state.reclaimed_slab;
1316	}	1367	}
1317	if (retry-- && ret < nr_pages) {	1368
1318	blk_congestion_wait(WRITE, HZ/5);	1369	/*
1319	goto repeat;	1370	* We try to shrink LRUs in 5 passes:
		1371	* 0 = Reclaim from inactive_list only
		1372	* 1 = Reclaim from active list but don't reclaim mapped
		1373	* 2 = 2nd pass of type 1
		1374	* 3 = Reclaim mapped (normal reclaim)
		1375	* 4 = 2nd pass of type 3
		1376	*/
		1377	for (pass = 0; pass < 5; pass++) {
		1378	int prio;
		1379
		1380	/* Needed for shrinking slab caches later on */
		1381	if (!lru_pages)
		1382	for_each_zone(zone) {
		1383	lru_pages += zone->nr_active;
		1384	lru_pages += zone->nr_inactive;
		1385	}
		1386
		1387	/* Force reclaiming mapped pages in the passes #3 and #4 */
		1388	if (pass > 2) {
		1389	sc.may_swap = 1;
		1390	sc.swappiness = 100;
		1391	}
		1392
		1393	for (prio = DEF_PRIORITY; prio >= 0; prio--) {
		1394	unsigned long nr_to_scan = nr_pages - ret;
		1395
		1396	sc.nr_mapped = read_page_state(nr_mapped);
		1397	sc.nr_scanned = 0;
		1398
		1399	ret += shrink_all_zones(nr_to_scan, prio, pass, &sc);
		1400	if (ret >= nr_pages)
		1401	goto out;
		1402
		1403	reclaim_state.reclaimed_slab = 0;
		1404	shrink_slab(sc.nr_scanned, sc.gfp_mask, lru_pages);
		1405	ret += reclaim_state.reclaimed_slab;
		1406	if (ret >= nr_pages)
		1407	goto out;
		1408
		1409	if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
		1410	blk_congestion_wait(WRITE, HZ / 10);
		1411	}
		1412
		1413	lru_pages = 0;
1320	}	1414	}
		1415
		1416	/*
		1417	* If ret = 0, we could not shrink LRUs, but there may be something
		1418	* in slab caches
		1419	*/
		1420	if (!ret)
		1421	do {
		1422	reclaim_state.reclaimed_slab = 0;
		1423	shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
		1424	ret += reclaim_state.reclaimed_slab;
		1425	} while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
		1426
		1427	out:
1321	current->reclaim_state = NULL;	1428	current->reclaim_state = NULL;
		1429
1322	return ret;	1430	return ret;
1323	}	1431	}
1324	#endif	1432	#endif
@@ -1416,6 +1524,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1416	.swap_cluster_max = max_t(unsigned long, nr_pages,	1524	.swap_cluster_max = max_t(unsigned long, nr_pages,
1417	SWAP_CLUSTER_MAX),	1525	SWAP_CLUSTER_MAX),
1418	.gfp_mask = gfp_mask,	1526	.gfp_mask = gfp_mask,
		1527	.swappiness = vm_swappiness,
1419	};	1528	};
1420		1529
1421	disable_swap_token();	1530	disable_swap_token();