vmscan: narrow the scenarios in whcih lumpy reclaim uses synchrounous reclaim

shrink_page_list() can decide to give up reclaiming a page under a number of conditions such as 1. trylock_page() failure 2. page is unevictable 3. zone reclaim and page is mapped 4. PageWriteback() is true 5. page is swapbacked and swap is full 6. add_to_swap() failure 7. page is dirty and gfpmask don't have GFP_IO, GFP_FS 8. page is pinned 9. IO queue is congested 10. pageout() start IO, but not finished With lumpy reclaim, failures result in entering synchronous lumpy reclaim but this can be unnecessary. In cases (2), (3), (5), (6), (7) and (8), there is no point retrying. This patch causes lumpy reclaim to abort when it is known it will fail. Case (9) is more interesting. current behavior is, 1. start shrink_page_list(async) 2. found queue_congested() 3. skip pageout write 4. still start shrink_page_list(sync) 5. wait on a lot of pages 6. again, found queue_congested() 7. give up pageout write again So, it's useless time wasting. However, just skipping page reclaim is also notgood as x86 allocating a huge page needs 512 pages for example. It can have more dirty pages than queue congestion threshold (~=128). After this patch, pageout() behaves as follows; - If order > PAGE_ALLOC_COSTLY_ORDER Ignore queue congestion always. - If order <= PAGE_ALLOC_COSTLY_ORDER skip write page and disable lumpy reclaim. Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> 2010-10-26 17:21:42 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2010-10-26 19:52:07 -0400
commit: 7d3579e8e61937cbba268ea9b218d006b6d64221 (patch)
tree: 4fa1863641343eee551681d60a823a84a2611289 /mm/vmscan.c
parent: bc57e00f5e0b2480ef222c775c49552d3a930db7 (diff)
1 files changed, 75 insertions, 45 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 399d54e8a82c..d9fc2dce93af 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -51,6 +51,12 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/vmscan.h>
+enum lumpy_mode {
+        LUMPY_MODE_NONE,
+        LUMPY_MODE_ASYNC,
+        LUMPY_MODE_SYNC,
+};
 struct scan_control {
        /* Incremented by the number of inactive pages that were scanned */
        unsigned long nr_scanned;
@@ -82,7 +88,7 @@ struct scan_control {
         * Intend to reclaim enough continuous memory rather than reclaim
         * enough amount of memory. i.e, mode for high order allocation.
         */
-        bool lumpy_reclaim_mode;
+        enum lumpy_mode lumpy_reclaim_mode;
        /* Which cgroup do we reclaim from */
        struct mem_cgroup *mem_cgroup;
@@ -265,6 +271,36 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
        return ret;
 }
+static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc,
+                                   bool sync)
+{
+        enum lumpy_mode mode = sync ? LUMPY_MODE_SYNC : LUMPY_MODE_ASYNC;
+        /*
+         * Some reclaim have alredy been failed. No worth to try synchronous
+         * lumpy reclaim.
+         */
+        if (sync && sc->lumpy_reclaim_mode == LUMPY_MODE_NONE)
+                return;
+        /*
+         * If we need a large contiguous chunk of memory, or have
+         * trouble getting a small set of contiguous pages, we
+         * will reclaim both active and inactive pages.
+         */
+        if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
+                sc->lumpy_reclaim_mode = mode;
+        else if (sc->order && priority < DEF_PRIORITY - 2)
+                sc->lumpy_reclaim_mode = mode;
+        else
+                sc->lumpy_reclaim_mode = LUMPY_MODE_NONE;
+}
+static void disable_lumpy_reclaim_mode(struct scan_control *sc)
+{
+        sc->lumpy_reclaim_mode = LUMPY_MODE_NONE;
+}
 static inline int is_page_cache_freeable(struct page *page)
 {
        /*
@@ -275,7 +311,8 @@ static inline int is_page_cache_freeable(struct page *page)
        return page_count(page) - page_has_private(page) == 2;
 }
-static int may_write_to_queue(struct backing_dev_info *bdi)
+static int may_write_to_queue(struct backing_dev_info *bdi,
+                              struct scan_control *sc)
 {
        if (current->flags & PF_SWAPWRITE)
                return 1;
@@ -283,6 +320,10 @@ static int may_write_to_queue(struct backing_dev_info *bdi)
                return 1;
        if (bdi == current->backing_dev_info)
                return 1;
+        /* lumpy reclaim for hugepage often need a lot of write */
+        if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
+                return 1;
        return 0;
 }
@@ -307,12 +348,6 @@ static void handle_write_error(struct address_space *mapping,
        unlock_page(page);
 }
-/* Request for sync pageout. */
-enum pageout_io {
-        PAGEOUT_IO_ASYNC,
-        PAGEOUT_IO_SYNC,
-};
 /* possible outcome of pageout() */
 typedef enum {
        /* failed to write page out, page is locked */
@@ -330,7 +365,7 @@ typedef enum {
 * Calls ->writepage().
 */
 static pageout_t pageout(struct page *page, struct address_space *mapping,
-                                                enum pageout_io sync_writeback)
+                         struct scan_control *sc)
 {
        /*
         * If the page is dirty, only perform writeback if that write
@@ -366,8 +401,10 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
        }
        if (mapping->a_ops->writepage == NULL)
                return PAGE_ACTIVATE;
-        if (!may_write_to_queue(mapping->backing_dev_info))
+        if (!may_write_to_queue(mapping->backing_dev_info, sc)) {
+                disable_lumpy_reclaim_mode(sc);
                return PAGE_KEEP;
+        }
        if (clear_page_dirty_for_io(page)) {
                int res;
@@ -393,7 +430,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
                 * direct reclaiming a large contiguous area and the
                 * first attempt to free a range of pages fails.
                 */
-                if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC)
+                if (PageWriteback(page) &&
+                    sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC)
                        wait_on_page_writeback(page);
                if (!PageWriteback(page)) {
@@ -401,7 +439,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
                        ClearPageReclaim(page);
                }
                trace_mm_vmscan_writepage(page,
-                        trace_reclaim_flags(page, sync_writeback));
+                        trace_reclaim_flags(page, sc->lumpy_reclaim_mode));
                inc_zone_page_state(page, NR_VMSCAN_WRITE);
                return PAGE_SUCCESS;
        }
@@ -579,7 +617,7 @@ static enum page_references page_check_references(struct page *page,
        referenced_page = TestClearPageReferenced(page);
        /* Lumpy reclaim - ignore references */
-        if (sc->lumpy_reclaim_mode)
+        if (sc->lumpy_reclaim_mode != LUMPY_MODE_NONE)
                return PAGEREF_RECLAIM;
        /*
@@ -643,8 +681,7 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages)
 * shrink_page_list() returns the number of reclaimed pages
 */
 static unsigned long shrink_page_list(struct list_head *page_list,
-                                        struct scan_control *sc,
+                                      struct scan_control *sc)
-                                        enum pageout_io sync_writeback)
 {
        LIST_HEAD(ret_pages);
        LIST_HEAD(free_pages);
@@ -693,10 +730,13 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                         * for any page for which writeback has already
                         * started.
                         */
-                        if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs)
+                        if (sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC &&
+                            may_enter_fs)
                                wait_on_page_writeback(page);
-                        else
+                        else {
-                                goto keep_locked;
+                                unlock_page(page);
+                                goto keep_lumpy;
+                        }
                }
                references = page_check_references(page, sc);
@@ -750,14 +790,17 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                goto keep_locked;
                        /* Page is dirty, try to write it out here */
-                        switch (pageout(page, mapping, sync_writeback)) {
+                        switch (pageout(page, mapping, sc)) {
                        case PAGE_KEEP:
                                goto keep_locked;
                        case PAGE_ACTIVATE:
                                goto activate_locked;
                        case PAGE_SUCCESS:
-                                if (PageWriteback(page) || PageDirty(page))
+                                if (PageWriteback(page))
+                                        goto keep_lumpy;
+                                if (PageDirty(page))
                                        goto keep;
                                /*
                                 * A synchronous write - probably a ramdisk.  Go
                                 * ahead and try to reclaim the page.
@@ -840,6 +883,7 @@ cull_mlocked:
                        try_to_free_swap(page);
                unlock_page(page);
                putback_lru_page(page);
+                disable_lumpy_reclaim_mode(sc);
                continue;
 activate_locked:
@@ -852,6 +896,8 @@ activate_locked:
 keep_locked:
                unlock_page(page);
 keep:
+                disable_lumpy_reclaim_mode(sc);
+keep_lumpy:
                list_add(&page->lru, &ret_pages);
                VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
        }
@@ -1252,7 +1298,7 @@ static inline bool should_reclaim_stall(unsigned long nr_taken,
                return false;
        /* Only stall on lumpy reclaim */
-        if (!sc->lumpy_reclaim_mode)
+        if (sc->lumpy_reclaim_mode == LUMPY_MODE_NONE)
                return false;
        /* If we have relaimed everything on the isolated list, no stall */
@@ -1297,15 +1343,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
                        return SWAP_CLUSTER_MAX;
        }
+        set_lumpy_reclaim_mode(priority, sc, false);
        lru_add_drain();
        spin_lock_irq(&zone->lru_lock);
        if (scanning_global_lru(sc)) {
                nr_taken = isolate_pages_global(nr_to_scan,
                        &page_list, &nr_scanned, sc->order,
-                        sc->lumpy_reclaim_mode ?
+                        sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ?
-                                ISOLATE_BOTH : ISOLATE_INACTIVE,
+                                        ISOLATE_INACTIVE : ISOLATE_BOTH,
                        zone, 0, file);
                zone->pages_scanned += nr_scanned;
                if (current_is_kswapd())
@@ -1317,8 +1363,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
        } else {
                nr_taken = mem_cgroup_isolate_pages(nr_to_scan,
                        &page_list, &nr_scanned, sc->order,
-                        sc->lumpy_reclaim_mode ?
+                        sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ?
-                                ISOLATE_BOTH : ISOLATE_INACTIVE,
+                                        ISOLATE_INACTIVE : ISOLATE_BOTH,
                        zone, sc->mem_cgroup,
                        0, file);
                /*
@@ -1336,7 +1382,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
        spin_unlock_irq(&zone->lru_lock);
-        nr_reclaimed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC);
+        nr_reclaimed = shrink_page_list(&page_list, sc);
        /* Check if we should syncronously wait for writeback */
        if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
@@ -1347,7 +1393,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
                nr_active = clear_active_flags(&page_list, NULL);
                count_vm_events(PGDEACTIVATE, nr_active);
-                nr_reclaimed += shrink_page_list(&page_list, sc, PAGEOUT_IO_SYNC);
+                set_lumpy_reclaim_mode(priority, sc, true);
+                nr_reclaimed += shrink_page_list(&page_list, sc);
        }
        local_irq_disable();
@@ -1739,21 +1786,6 @@ out:
        }
 }
-static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc)
-{
-        /*
-         * If we need a large contiguous chunk of memory, or have
-         * trouble getting a small set of contiguous pages, we
-         * will reclaim both active and inactive pages.
-         */
-        if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
-                sc->lumpy_reclaim_mode = 1;
-        else if (sc->order && priority < DEF_PRIORITY - 2)
-                sc->lumpy_reclaim_mode = 1;
-        else
-                sc->lumpy_reclaim_mode = 0;
-}
 /*
 * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
 */
@@ -1768,8 +1800,6 @@ static void shrink_zone(int priority, struct zone *zone,
        get_scan_count(zone, sc, nr, priority);
-        set_lumpy_reclaim_mode(priority, sc);
        while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
                                        nr[LRU_INACTIVE_FILE]) {
                for_each_evictable_lru(l) {
author	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>	2010-10-26 17:21:42 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2010-10-26 19:52:07 -0400
commit	7d3579e8e61937cbba268ea9b218d006b6d64221 (patch)
tree	4fa1863641343eee551681d60a823a84a2611289 /mm/vmscan.c
parent	bc57e00f5e0b2480ef222c775c49552d3a930db7 (diff)

diff --git a/mm/vmscan.c b/mm/vmscan.c index 399d54e8a82c..d9fc2dce93af 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c
@@ -51,6 +51,12 @@
51	#define CREATE_TRACE_POINTS	51	#define CREATE_TRACE_POINTS
52	#include <trace/events/vmscan.h>	52	#include <trace/events/vmscan.h>
53		53
		54	enum lumpy_mode {
		55	LUMPY_MODE_NONE,
		56	LUMPY_MODE_ASYNC,
		57	LUMPY_MODE_SYNC,
		58	};
		59
54	struct scan_control {	60	struct scan_control {
55	/* Incremented by the number of inactive pages that were scanned */	61	/* Incremented by the number of inactive pages that were scanned */
56	unsigned long nr_scanned;	62	unsigned long nr_scanned;
@@ -82,7 +88,7 @@ struct scan_control {
82	* Intend to reclaim enough continuous memory rather than reclaim	88	* Intend to reclaim enough continuous memory rather than reclaim
83	* enough amount of memory. i.e, mode for high order allocation.	89	* enough amount of memory. i.e, mode for high order allocation.
84	*/	90	*/
85	bool lumpy_reclaim_mode;	91	enum lumpy_mode lumpy_reclaim_mode;
86		92
87	/* Which cgroup do we reclaim from */	93	/* Which cgroup do we reclaim from */
88	struct mem_cgroup *mem_cgroup;	94	struct mem_cgroup *mem_cgroup;
@@ -265,6 +271,36 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
265	return ret;	271	return ret;
266	}	272	}
267		273
		274	static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc,
		275	bool sync)
		276	{
		277	enum lumpy_mode mode = sync ? LUMPY_MODE_SYNC : LUMPY_MODE_ASYNC;
		278
		279	/*
		280	* Some reclaim have alredy been failed. No worth to try synchronous
		281	* lumpy reclaim.
		282	*/
		283	if (sync && sc->lumpy_reclaim_mode == LUMPY_MODE_NONE)
		284	return;
		285
		286	/*
		287	* If we need a large contiguous chunk of memory, or have
		288	* trouble getting a small set of contiguous pages, we
		289	* will reclaim both active and inactive pages.
		290	*/
		291	if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
		292	sc->lumpy_reclaim_mode = mode;
		293	else if (sc->order && priority < DEF_PRIORITY - 2)
		294	sc->lumpy_reclaim_mode = mode;
		295	else
		296	sc->lumpy_reclaim_mode = LUMPY_MODE_NONE;
		297	}
		298
		299	static void disable_lumpy_reclaim_mode(struct scan_control *sc)
		300	{
		301	sc->lumpy_reclaim_mode = LUMPY_MODE_NONE;
		302	}
		303
268	static inline int is_page_cache_freeable(struct page *page)	304	static inline int is_page_cache_freeable(struct page *page)
269	{	305	{
270	/*	306	/*
@@ -275,7 +311,8 @@ static inline int is_page_cache_freeable(struct page *page)
275	return page_count(page) - page_has_private(page) == 2;	311	return page_count(page) - page_has_private(page) == 2;
276	}	312	}
277		313
278	static int may_write_to_queue(struct backing_dev_info *bdi)	314	static int may_write_to_queue(struct backing_dev_info *bdi,
		315	struct scan_control *sc)
279	{	316	{
280	if (current->flags & PF_SWAPWRITE)	317	if (current->flags & PF_SWAPWRITE)
281	return 1;	318	return 1;
@@ -283,6 +320,10 @@ static int may_write_to_queue(struct backing_dev_info *bdi)
283	return 1;	320	return 1;
284	if (bdi == current->backing_dev_info)	321	if (bdi == current->backing_dev_info)
285	return 1;	322	return 1;
		323
		324	/* lumpy reclaim for hugepage often need a lot of write */
		325	if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
		326	return 1;
286	return 0;	327	return 0;
287	}	328	}
288		329
@@ -307,12 +348,6 @@ static void handle_write_error(struct address_space *mapping,
307	unlock_page(page);	348	unlock_page(page);
308	}	349	}
309		350
310	/* Request for sync pageout. */
311	enum pageout_io {
312	PAGEOUT_IO_ASYNC,
313	PAGEOUT_IO_SYNC,
314	};
315
316	/* possible outcome of pageout() */	351	/* possible outcome of pageout() */
317	typedef enum {	352	typedef enum {
318	/* failed to write page out, page is locked */	353	/* failed to write page out, page is locked */
@@ -330,7 +365,7 @@ typedef enum {
330	* Calls ->writepage().	365	* Calls ->writepage().
331	*/	366	*/
332	static pageout_t pageout(struct page page, struct address_space mapping,	367	static pageout_t pageout(struct page page, struct address_space mapping,
333	enum pageout_io sync_writeback)	368	struct scan_control *sc)
334	{	369	{
335	/*	370	/*
336	* If the page is dirty, only perform writeback if that write	371	* If the page is dirty, only perform writeback if that write
@@ -366,8 +401,10 @@ static pageout_t pageout(struct page page, struct address_space mapping,
366	}	401	}
367	if (mapping->a_ops->writepage == NULL)	402	if (mapping->a_ops->writepage == NULL)
368	return PAGE_ACTIVATE;	403	return PAGE_ACTIVATE;
369	if (!may_write_to_queue(mapping->backing_dev_info))	404	if (!may_write_to_queue(mapping->backing_dev_info, sc)) {
		405	disable_lumpy_reclaim_mode(sc);
370	return PAGE_KEEP;	406	return PAGE_KEEP;
		407	}
371		408
372	if (clear_page_dirty_for_io(page)) {	409	if (clear_page_dirty_for_io(page)) {
373	int res;	410	int res;
@@ -393,7 +430,8 @@ static pageout_t pageout(struct page page, struct address_space mapping,
393	* direct reclaiming a large contiguous area and the	430	* direct reclaiming a large contiguous area and the
394	* first attempt to free a range of pages fails.	431	* first attempt to free a range of pages fails.
395	*/	432	*/
396	if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC)	433	if (PageWriteback(page) &&
		434	sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC)
397	wait_on_page_writeback(page);	435	wait_on_page_writeback(page);
398		436
399	if (!PageWriteback(page)) {	437	if (!PageWriteback(page)) {
@@ -401,7 +439,7 @@ static pageout_t pageout(struct page page, struct address_space mapping,
401	ClearPageReclaim(page);	439	ClearPageReclaim(page);
402	}	440	}
403	trace_mm_vmscan_writepage(page,	441	trace_mm_vmscan_writepage(page,
404	trace_reclaim_flags(page, sync_writeback));	442	trace_reclaim_flags(page, sc->lumpy_reclaim_mode));
405	inc_zone_page_state(page, NR_VMSCAN_WRITE);	443	inc_zone_page_state(page, NR_VMSCAN_WRITE);
406	return PAGE_SUCCESS;	444	return PAGE_SUCCESS;
407	}	445	}
@@ -579,7 +617,7 @@ static enum page_references page_check_references(struct page *page,
579	referenced_page = TestClearPageReferenced(page);	617	referenced_page = TestClearPageReferenced(page);
580		618
581	/* Lumpy reclaim - ignore references */	619	/* Lumpy reclaim - ignore references */
582	if (sc->lumpy_reclaim_mode)	620	if (sc->lumpy_reclaim_mode != LUMPY_MODE_NONE)
583	return PAGEREF_RECLAIM;	621	return PAGEREF_RECLAIM;
584		622
585	/*	623	/*
@@ -643,8 +681,7 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages)
643	* shrink_page_list() returns the number of reclaimed pages	681	* shrink_page_list() returns the number of reclaimed pages
644	*/	682	*/
645	static unsigned long shrink_page_list(struct list_head *page_list,	683	static unsigned long shrink_page_list(struct list_head *page_list,
646	struct scan_control *sc,	684	struct scan_control *sc)
647	enum pageout_io sync_writeback)
648	{	685	{
649	LIST_HEAD(ret_pages);	686	LIST_HEAD(ret_pages);
650	LIST_HEAD(free_pages);	687	LIST_HEAD(free_pages);
@@ -693,10 +730,13 @@ static unsigned long shrink_page_list(struct list_head *page_list,
693	* for any page for which writeback has already	730	* for any page for which writeback has already
694	* started.	731	* started.
695	*/	732	*/
696	if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs)	733	if (sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC &&
		734	may_enter_fs)
697	wait_on_page_writeback(page);	735	wait_on_page_writeback(page);
698	else	736	else {
699	goto keep_locked;	737	unlock_page(page);
		738	goto keep_lumpy;
		739	}
700	}	740	}
701		741
702	references = page_check_references(page, sc);	742	references = page_check_references(page, sc);
@@ -750,14 +790,17 @@ static unsigned long shrink_page_list(struct list_head *page_list,
750	goto keep_locked;	790	goto keep_locked;
751		791
752	/* Page is dirty, try to write it out here */	792	/* Page is dirty, try to write it out here */
753	switch (pageout(page, mapping, sync_writeback)) {	793	switch (pageout(page, mapping, sc)) {
754	case PAGE_KEEP:	794	case PAGE_KEEP:
755	goto keep_locked;	795	goto keep_locked;
756	case PAGE_ACTIVATE:	796	case PAGE_ACTIVATE:
757	goto activate_locked;	797	goto activate_locked;
758	case PAGE_SUCCESS:	798	case PAGE_SUCCESS:
759	if (PageWriteback(page) \|\| PageDirty(page))	799	if (PageWriteback(page))
		800	goto keep_lumpy;
		801	if (PageDirty(page))
760	goto keep;	802	goto keep;
		803
761	/*	804	/*
762	* A synchronous write - probably a ramdisk. Go	805	* A synchronous write - probably a ramdisk. Go
763	* ahead and try to reclaim the page.	806	* ahead and try to reclaim the page.
@@ -840,6 +883,7 @@ cull_mlocked:
840	try_to_free_swap(page);	883	try_to_free_swap(page);
841	unlock_page(page);	884	unlock_page(page);
842	putback_lru_page(page);	885	putback_lru_page(page);
		886	disable_lumpy_reclaim_mode(sc);
843	continue;	887	continue;
844		888
845	activate_locked:	889	activate_locked:
@@ -852,6 +896,8 @@ activate_locked:
852	keep_locked:	896	keep_locked:
853	unlock_page(page);	897	unlock_page(page);
854	keep:	898	keep:
		899	disable_lumpy_reclaim_mode(sc);
		900	keep_lumpy:
855	list_add(&page->lru, &ret_pages);	901	list_add(&page->lru, &ret_pages);
856	VM_BUG_ON(PageLRU(page) \|\| PageUnevictable(page));	902	VM_BUG_ON(PageLRU(page) \|\| PageUnevictable(page));
857	}	903	}
@@ -1252,7 +1298,7 @@ static inline bool should_reclaim_stall(unsigned long nr_taken,
1252	return false;	1298	return false;
1253		1299
1254	/* Only stall on lumpy reclaim */	1300	/* Only stall on lumpy reclaim */
1255	if (!sc->lumpy_reclaim_mode)	1301	if (sc->lumpy_reclaim_mode == LUMPY_MODE_NONE)
1256	return false;	1302	return false;
1257		1303
1258	/* If we have relaimed everything on the isolated list, no stall */	1304	/* If we have relaimed everything on the isolated list, no stall */
@@ -1297,15 +1343,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1297	return SWAP_CLUSTER_MAX;	1343	return SWAP_CLUSTER_MAX;
1298	}	1344	}
1299		1345
1300		1346	set_lumpy_reclaim_mode(priority, sc, false);
1301	lru_add_drain();	1347	lru_add_drain();
1302	spin_lock_irq(&zone->lru_lock);	1348	spin_lock_irq(&zone->lru_lock);
1303		1349
1304	if (scanning_global_lru(sc)) {	1350	if (scanning_global_lru(sc)) {
1305	nr_taken = isolate_pages_global(nr_to_scan,	1351	nr_taken = isolate_pages_global(nr_to_scan,
1306	&page_list, &nr_scanned, sc->order,	1352	&page_list, &nr_scanned, sc->order,
1307	sc->lumpy_reclaim_mode ?	1353	sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ?
1308	ISOLATE_BOTH : ISOLATE_INACTIVE,	1354	ISOLATE_INACTIVE : ISOLATE_BOTH,
1309	zone, 0, file);	1355	zone, 0, file);
1310	zone->pages_scanned += nr_scanned;	1356	zone->pages_scanned += nr_scanned;
1311	if (current_is_kswapd())	1357	if (current_is_kswapd())
@@ -1317,8 +1363,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1317	} else {	1363	} else {
1318	nr_taken = mem_cgroup_isolate_pages(nr_to_scan,	1364	nr_taken = mem_cgroup_isolate_pages(nr_to_scan,
1319	&page_list, &nr_scanned, sc->order,	1365	&page_list, &nr_scanned, sc->order,
1320	sc->lumpy_reclaim_mode ?	1366	sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ?
1321	ISOLATE_BOTH : ISOLATE_INACTIVE,	1367	ISOLATE_INACTIVE : ISOLATE_BOTH,
1322	zone, sc->mem_cgroup,	1368	zone, sc->mem_cgroup,
1323	0, file);	1369	0, file);
1324	/*	1370	/*
@@ -1336,7 +1382,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1336		1382
1337	spin_unlock_irq(&zone->lru_lock);	1383	spin_unlock_irq(&zone->lru_lock);
1338		1384
1339	nr_reclaimed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC);	1385	nr_reclaimed = shrink_page_list(&page_list, sc);
1340		1386
1341	/* Check if we should syncronously wait for writeback */	1387	/* Check if we should syncronously wait for writeback */
1342	if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {	1388	if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
@@ -1347,7 +1393,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1347	nr_active = clear_active_flags(&page_list, NULL);	1393	nr_active = clear_active_flags(&page_list, NULL);
1348	count_vm_events(PGDEACTIVATE, nr_active);	1394	count_vm_events(PGDEACTIVATE, nr_active);
1349		1395
1350	nr_reclaimed += shrink_page_list(&page_list, sc, PAGEOUT_IO_SYNC);	1396	set_lumpy_reclaim_mode(priority, sc, true);
		1397	nr_reclaimed += shrink_page_list(&page_list, sc);
1351	}	1398	}
1352		1399
1353	local_irq_disable();	1400	local_irq_disable();
@@ -1739,21 +1786,6 @@ out:
1739	}	1786	}
1740	}	1787	}
1741		1788
1742	static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc)
1743	{
1744	/*
1745	* If we need a large contiguous chunk of memory, or have
1746	* trouble getting a small set of contiguous pages, we
1747	* will reclaim both active and inactive pages.
1748	*/
1749	if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
1750	sc->lumpy_reclaim_mode = 1;
1751	else if (sc->order && priority < DEF_PRIORITY - 2)
1752	sc->lumpy_reclaim_mode = 1;
1753	else
1754	sc->lumpy_reclaim_mode = 0;
1755	}
1756
1757	/*	1789	/*
1758	* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.	1790	* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
1759	*/	1791	*/
@@ -1768,8 +1800,6 @@ static void shrink_zone(int priority, struct zone *zone,
1768		1800
1769	get_scan_count(zone, sc, nr, priority);	1801	get_scan_count(zone, sc, nr, priority);
1770		1802
1771	set_lumpy_reclaim_mode(priority, sc);
1772
1773	while (nr[LRU_INACTIVE_ANON] \|\| nr[LRU_ACTIVE_FILE] \|\|	1803	while (nr[LRU_INACTIVE_ANON] \|\| nr[LRU_ACTIVE_FILE] \|\|
1774	nr[LRU_INACTIVE_FILE]) {	1804	nr[LRU_INACTIVE_FILE]) {
1775	for_each_evictable_lru(l) {	1805	for_each_evictable_lru(l) {