Lumpy Reclaim V4

When we are out of memory of a suitable size we enter reclaim. The current reclaim algorithm targets pages in LRU order, which is great for fairness at order-0 but highly unsuitable if you desire pages at higher orders. To get pages of higher order we must shoot down a very high proportion of memory; >95% in a lot of cases. This patch set adds a lumpy reclaim algorithm to the allocator. It targets groups of pages at the specified order anchored at the end of the active and inactive lists. This encourages groups of pages at the requested orders to move from active to inactive, and active to free lists. This behaviour is only triggered out of direct reclaim when higher order pages have been requested. This patch set is particularly effective when utilised with an anti-fragmentation scheme which groups pages of similar reclaimability together. This patch set is based on Peter Zijlstra's lumpy reclaim V2 patch which forms the foundation. Credit to Mel Gorman for sanitity checking. Mel said: The patches have an application with hugepage pool resizing. When lumpy-reclaim is used used with ZONE_MOVABLE, the hugepages pool can be resized with greater reliability. Testing on a desktop machine with 2GB of RAM showed that growing the hugepage pool with ZONE_MOVABLE on it's own was very slow as the success rate was quite low. Without lumpy-reclaim, each attempt to grow the pool by 100 pages would yield 1 or 2 hugepages. With lumpy-reclaim, getting 40 to 70 hugepages on each attempt was typical. [akpm@osdl.org: ia64 pfn_to_nid fixes and loop cleanup] [bunk@stusta.de: static declarations for internal functions] [a.p.zijlstra@chello.nl: initial lumpy V2 implementation] Signed-off-by: Andy Whitcroft <apw@shadowen.org> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Mel Gorman <mel@csn.ul.ie> Cc: Bob Picco <bob.picco@hp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Andy Whitcroft <apw@shadowen.org> 2007-07-17 07:03:16 -0400
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2007-07-17 13:22:59 -0400
commit: 5ad333eb66ff1e52a87639822ae088577669dcf9 (patch)
tree: addae6bbd19585f19328f309924d06d647e8f2b7 /mm
parent: 7e63efef857575320fb413fbc3d0ee704b72845f (diff)
2 files changed, 152 insertions, 24 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ac4f8c6b5c10..1a889c3fec59 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1333,7 +1333,7 @@ nofail_alloc:
        reclaim_state.reclaimed_slab = 0;
        p->reclaim_state = &reclaim_state;
-        did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
+        did_some_progress = try_to_free_pages(zonelist->zones, order, gfp_mask);
        p->reclaim_state = NULL;
        p->flags &= ~PF_MEMALLOC;
@@ -1370,7 +1370,8 @@ nofail_alloc:
         */
        do_retry = 0;
        if (!(gfp_mask & __GFP_NORETRY)) {
-                if ((order <= 3) || (gfp_mask & __GFP_REPEAT))
+                if ((order <= PAGE_ALLOC_COSTLY_ORDER) ||
+                                                (gfp_mask & __GFP_REPEAT))
                        do_retry = 1;
                if (gfp_mask & __GFP_NOFAIL)
                        do_retry = 1;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1be5a6376ef0..1d9971d8924b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -66,6 +66,8 @@ struct scan_control {
        int swappiness;
        int all_unreclaimable;
+        int order;
 };
 /*
@@ -481,7 +483,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                referenced = page_referenced(page, 1);
                /* In active use or really unfreeable?  Activate it. */
-                if (referenced && page_mapping_inuse(page))
+                if (sc->order <= PAGE_ALLOC_COSTLY_ORDER &&
+                                        referenced && page_mapping_inuse(page))
                        goto activate_locked;
 #ifdef CONFIG_SWAP
@@ -514,7 +517,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                }
                if (PageDirty(page)) {
-                        if (referenced)
+                        if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced)
                                goto keep_locked;
                        if (!may_enter_fs)
                                goto keep_locked;
@@ -598,6 +601,51 @@ keep:
        return nr_reclaimed;
 }
+/* LRU Isolation modes. */
+#define ISOLATE_INACTIVE 0      /* Isolate inactive pages. */
+#define ISOLATE_ACTIVE 1        /* Isolate active pages. */
+#define ISOLATE_BOTH 2          /* Isolate both active and inactive pages. */
+/*
+ * Attempt to remove the specified page from its LRU.  Only take this page
+ * if it is of the appropriate PageActive status.  Pages which are being
+ * freed elsewhere are also ignored.
+ *
+ * page:        page to consider
+ * mode:        one of the LRU isolation modes defined above
+ *
+ * returns 0 on success, -ve errno on failure.
+ */
+static int __isolate_lru_page(struct page *page, int mode)
+{
+        int ret = -EINVAL;
+        /* Only take pages on the LRU. */
+        if (!PageLRU(page))
+                return ret;
+        /*
+         * When checking the active state, we need to be sure we are
+         * dealing with comparible boolean values.  Take the logical not
+         * of each.
+         */
+        if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
+                return ret;
+        ret = -EBUSY;
+        if (likely(get_page_unless_zero(page))) {
+                /*
+                 * Be careful not to clear PageLRU until after we're
+                 * sure the page is not being freed elsewhere -- the
+                 * page release code relies on it.
+                 */
+                ClearPageLRU(page);
+                ret = 0;
+        }
+        return ret;
+}
 /*
 * zone->lru_lock is heavily contended.  Some of the functions that
 * shrink the lists perform better by taking out a batch of pages
@@ -612,38 +660,90 @@ keep:
 * @src:        The LRU list to pull pages off.
 * @dst:        The temp list to put pages on to.
 * @scanned:    The number of pages that were scanned.
+ * @order:      The caller's attempted allocation order
+ * @mode:       One of the LRU isolation modes
 *
 * returns how many pages were moved onto *@dst.
 */
 static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                struct list_head *src, struct list_head *dst,
-                unsigned long *scanned)
+                unsigned long *scanned, int order, int mode)
 {
        unsigned long nr_taken = 0;
-        struct page *page;
        unsigned long scan;
        for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
-                struct list_head *target;
+                struct page *page;
+                unsigned long pfn;
+                unsigned long end_pfn;
+                unsigned long page_pfn;
+                int zone_id;
                page = lru_to_page(src);
                prefetchw_prev_lru_page(page, src, flags);
                VM_BUG_ON(!PageLRU(page));
-                list_del(&page->lru);
+                switch (__isolate_lru_page(page, mode)) {
-                target = src;
+                case 0:
-                if (likely(get_page_unless_zero(page))) {
+                        list_move(&page->lru, dst);
-                        /*
-                         * Be careful not to clear PageLRU until after we're
-                         * sure the page is not being freed elsewhere -- the
-                         * page release code relies on it.
-                         */
-                        ClearPageLRU(page);
-                        target = dst;
                        nr_taken++;
-                } /* else it is being freed elsewhere */
+                        break;
-                list_add(&page->lru, target);
+                case -EBUSY:
+                        /* else it is being freed elsewhere */
+                        list_move(&page->lru, src);
+                        continue;
+                default:
+                        BUG();
+                }
+                if (!order)
+                        continue;
+                /*
+                 * Attempt to take all pages in the order aligned region
+                 * surrounding the tag page.  Only take those pages of
+                 * the same active state as that tag page.  We may safely
+                 * round the target page pfn down to the requested order
+                 * as the mem_map is guarenteed valid out to MAX_ORDER,
+                 * where that page is in a different zone we will detect
+                 * it from its zone id and abort this block scan.
+                 */
+                zone_id = page_zone_id(page);
+                page_pfn = page_to_pfn(page);
+                pfn = page_pfn & ~((1 << order) - 1);
+                end_pfn = pfn + (1 << order);
+                for (; pfn < end_pfn; pfn++) {
+                        struct page *cursor_page;
+                        /* The target page is in the block, ignore it. */
+                        if (unlikely(pfn == page_pfn))
+                                continue;
+                        /* Avoid holes within the zone. */
+                        if (unlikely(!pfn_valid_within(pfn)))
+                                break;
+                        cursor_page = pfn_to_page(pfn);
+                        /* Check that we have not crossed a zone boundary. */
+                        if (unlikely(page_zone_id(cursor_page) != zone_id))
+                                continue;
+                        switch (__isolate_lru_page(cursor_page, mode)) {
+                        case 0:
+                                list_move(&cursor_page->lru, dst);
+                                nr_taken++;
+                                scan++;
+                                break;
+                        case -EBUSY:
+                                /* else it is being freed elsewhere */
+                                list_move(&cursor_page->lru, src);
+                        default:
+                                break;
+                        }
+                }
        }
        *scanned = scan;
@@ -651,6 +751,24 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 }
 /*
+ * clear_active_flags() is a helper for shrink_active_list(), clearing
+ * any active bits from the pages in the list.
+ */
+static unsigned long clear_active_flags(struct list_head *page_list)
+{
+        int nr_active = 0;
+        struct page *page;
+        list_for_each_entry(page, page_list, lru)
+                if (PageActive(page)) {
+                        ClearPageActive(page);
+                        nr_active++;
+                }
+        return nr_active;
+}
+/*
 * shrink_inactive_list() is a helper for shrink_zone().  It returns the number
 * of reclaimed pages
 */
@@ -671,11 +789,18 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
                unsigned long nr_taken;
                unsigned long nr_scan;
                unsigned long nr_freed;
+                unsigned long nr_active;
                nr_taken = isolate_lru_pages(sc->swap_cluster_max,
-                                             &zone->inactive_list,
+                             &zone->inactive_list,
-                                             &page_list, &nr_scan);
+                             &page_list, &nr_scan, sc->order,
-                __mod_zone_page_state(zone, NR_INACTIVE, -nr_taken);
+                             (sc->order > PAGE_ALLOC_COSTLY_ORDER)?
+                                             ISOLATE_BOTH : ISOLATE_INACTIVE);
+                nr_active = clear_active_flags(&page_list);
+                __mod_zone_page_state(zone, NR_ACTIVE, -nr_active);
+                __mod_zone_page_state(zone, NR_INACTIVE,
+                                                -(nr_taken - nr_active));
                zone->pages_scanned += nr_scan;
                spin_unlock_irq(&zone->lru_lock);
@@ -820,7 +945,7 @@ force_reclaim_mapped:
        lru_add_drain();
        spin_lock_irq(&zone->lru_lock);
        pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
-                                    &l_hold, &pgscanned);
+                            &l_hold, &pgscanned, sc->order, ISOLATE_ACTIVE);
        zone->pages_scanned += pgscanned;
        __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
        spin_unlock_irq(&zone->lru_lock);
@@ -1011,7 +1136,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
 * holds filesystem locks which prevent writeout this might not work, and the
 * allocation attempt will fail.
 */
-unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
+unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
 {
        int priority;
        int ret = 0;
@@ -1026,6 +1151,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
                .swap_cluster_max = SWAP_CLUSTER_MAX,
                .may_swap = 1,
                .swappiness = vm_swappiness,
+                .order = order,
        };
        count_vm_event(ALLOCSTALL);
@@ -1131,6 +1257,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
                .may_swap = 1,
                .swap_cluster_max = SWAP_CLUSTER_MAX,
                .swappiness = vm_swappiness,
+                .order = order,
        };
        /*
         * temp_priority is used to remember the scanning priority at which
author	Andy Whitcroft <apw@shadowen.org>	2007-07-17 07:03:16 -0400
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2007-07-17 13:22:59 -0400
commit	5ad333eb66ff1e52a87639822ae088577669dcf9 (patch)
tree	addae6bbd19585f19328f309924d06d647e8f2b7 /mm
parent	7e63efef857575320fb413fbc3d0ee704b72845f (diff)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ac4f8c6b5c10..1a889c3fec59 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -1333,7 +1333,7 @@ nofail_alloc:
1333	reclaim_state.reclaimed_slab = 0;	1333	reclaim_state.reclaimed_slab = 0;
1334	p->reclaim_state = &reclaim_state;	1334	p->reclaim_state = &reclaim_state;
1335		1335
1336	did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);	1336	did_some_progress = try_to_free_pages(zonelist->zones, order, gfp_mask);
1337		1337
1338	p->reclaim_state = NULL;	1338	p->reclaim_state = NULL;
1339	p->flags &= ~PF_MEMALLOC;	1339	p->flags &= ~PF_MEMALLOC;
@@ -1370,7 +1370,8 @@ nofail_alloc:
1370	*/	1370	*/
1371	do_retry = 0;	1371	do_retry = 0;
1372	if (!(gfp_mask & __GFP_NORETRY)) {	1372	if (!(gfp_mask & __GFP_NORETRY)) {
1373	if ((order <= 3) \|\| (gfp_mask & __GFP_REPEAT))	1373	if ((order <= PAGE_ALLOC_COSTLY_ORDER) \|\|
		1374	(gfp_mask & __GFP_REPEAT))
1374	do_retry = 1;	1375	do_retry = 1;
1375	if (gfp_mask & __GFP_NOFAIL)	1376	if (gfp_mask & __GFP_NOFAIL)
1376	do_retry = 1;	1377	do_retry = 1;


diff --git a/mm/vmscan.c b/mm/vmscan.c index 1be5a6376ef0..1d9971d8924b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c
@@ -66,6 +66,8 @@ struct scan_control {
66	int swappiness;	66	int swappiness;
67		67
68	int all_unreclaimable;	68	int all_unreclaimable;
		69
		70	int order;
69	};	71	};
70		72
71	/*	73	/*
@@ -481,7 +483,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
481		483
482	referenced = page_referenced(page, 1);	484	referenced = page_referenced(page, 1);
483	/* In active use or really unfreeable? Activate it. */	485	/* In active use or really unfreeable? Activate it. */
484	if (referenced && page_mapping_inuse(page))	486	if (sc->order <= PAGE_ALLOC_COSTLY_ORDER &&
		487	referenced && page_mapping_inuse(page))
485	goto activate_locked;	488	goto activate_locked;
486		489
487	#ifdef CONFIG_SWAP	490	#ifdef CONFIG_SWAP
@@ -514,7 +517,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
514	}	517	}
515		518
516	if (PageDirty(page)) {	519	if (PageDirty(page)) {
517	if (referenced)	520	if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced)
518	goto keep_locked;	521	goto keep_locked;
519	if (!may_enter_fs)	522	if (!may_enter_fs)
520	goto keep_locked;	523	goto keep_locked;
@@ -598,6 +601,51 @@ keep:
598	return nr_reclaimed;	601	return nr_reclaimed;
599	}	602	}
600		603
		604	/* LRU Isolation modes. */
		605	#define ISOLATE_INACTIVE 0 /* Isolate inactive pages. */
		606	#define ISOLATE_ACTIVE 1 /* Isolate active pages. */
		607	#define ISOLATE_BOTH 2 /* Isolate both active and inactive pages. */
		608
		609	/*
		610	* Attempt to remove the specified page from its LRU. Only take this page
		611	* if it is of the appropriate PageActive status. Pages which are being
		612	* freed elsewhere are also ignored.
		613	*
		614	* page: page to consider
		615	* mode: one of the LRU isolation modes defined above
		616	*
		617	* returns 0 on success, -ve errno on failure.
		618	*/
		619	static int __isolate_lru_page(struct page *page, int mode)
		620	{
		621	int ret = -EINVAL;
		622
		623	/* Only take pages on the LRU. */
		624	if (!PageLRU(page))
		625	return ret;
		626
		627	/*
		628	* When checking the active state, we need to be sure we are
		629	* dealing with comparible boolean values. Take the logical not
		630	* of each.
		631	*/
		632	if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
		633	return ret;
		634
		635	ret = -EBUSY;
		636	if (likely(get_page_unless_zero(page))) {
		637	/*
		638	* Be careful not to clear PageLRU until after we're
		639	* sure the page is not being freed elsewhere -- the
		640	* page release code relies on it.
		641	*/
		642	ClearPageLRU(page);
		643	ret = 0;
		644	}
		645
		646	return ret;
		647	}
		648
601	/*	649	/*
602	* zone->lru_lock is heavily contended. Some of the functions that	650	* zone->lru_lock is heavily contended. Some of the functions that
603	* shrink the lists perform better by taking out a batch of pages	651	* shrink the lists perform better by taking out a batch of pages
@@ -612,38 +660,90 @@ keep:
612	* @src: The LRU list to pull pages off.	660	* @src: The LRU list to pull pages off.
613	* @dst: The temp list to put pages on to.	661	* @dst: The temp list to put pages on to.
614	* @scanned: The number of pages that were scanned.	662	* @scanned: The number of pages that were scanned.
		663	* @order: The caller's attempted allocation order
		664	* @mode: One of the LRU isolation modes
615	*	665	*
616	* returns how many pages were moved onto *@dst.	666	* returns how many pages were moved onto *@dst.
617	*/	667	*/
618	static unsigned long isolate_lru_pages(unsigned long nr_to_scan,	668	static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
619	struct list_head src, struct list_head dst,	669	struct list_head src, struct list_head dst,
620	unsigned long *scanned)	670	unsigned long *scanned, int order, int mode)
621	{	671	{
622	unsigned long nr_taken = 0;	672	unsigned long nr_taken = 0;
623	struct page *page;
624	unsigned long scan;	673	unsigned long scan;
625		674
626	for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {	675	for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
627	struct list_head *target;	676	struct page *page;
		677	unsigned long pfn;
		678	unsigned long end_pfn;
		679	unsigned long page_pfn;
		680	int zone_id;
		681
628	page = lru_to_page(src);	682	page = lru_to_page(src);
629	prefetchw_prev_lru_page(page, src, flags);	683	prefetchw_prev_lru_page(page, src, flags);
630		684
631	VM_BUG_ON(!PageLRU(page));	685	VM_BUG_ON(!PageLRU(page));
632		686
633	list_del(&page->lru);	687	switch (__isolate_lru_page(page, mode)) {
634	target = src;	688	case 0:
635	if (likely(get_page_unless_zero(page))) {	689	list_move(&page->lru, dst);
636	/*
637	* Be careful not to clear PageLRU until after we're
638	* sure the page is not being freed elsewhere -- the
639	* page release code relies on it.
640	*/
641	ClearPageLRU(page);
642	target = dst;
643	nr_taken++;	690	nr_taken++;
644	} /* else it is being freed elsewhere */	691	break;
645		692
646	list_add(&page->lru, target);	693	case -EBUSY:
		694	/* else it is being freed elsewhere */
		695	list_move(&page->lru, src);
		696	continue;
		697
		698	default:
		699	BUG();
		700	}
		701
		702	if (!order)
		703	continue;
		704
		705	/*
		706	* Attempt to take all pages in the order aligned region
		707	* surrounding the tag page. Only take those pages of
		708	* the same active state as that tag page. We may safely
		709	* round the target page pfn down to the requested order
		710	* as the mem_map is guarenteed valid out to MAX_ORDER,
		711	* where that page is in a different zone we will detect
		712	* it from its zone id and abort this block scan.
		713	*/
		714	zone_id = page_zone_id(page);
		715	page_pfn = page_to_pfn(page);
		716	pfn = page_pfn & ~((1 << order) - 1);
		717	end_pfn = pfn + (1 << order);
		718	for (; pfn < end_pfn; pfn++) {
		719	struct page *cursor_page;
		720
		721	/* The target page is in the block, ignore it. */
		722	if (unlikely(pfn == page_pfn))
		723	continue;
		724
		725	/* Avoid holes within the zone. */
		726	if (unlikely(!pfn_valid_within(pfn)))
		727	break;
		728
		729	cursor_page = pfn_to_page(pfn);
		730	/* Check that we have not crossed a zone boundary. */
		731	if (unlikely(page_zone_id(cursor_page) != zone_id))
		732	continue;
		733	switch (__isolate_lru_page(cursor_page, mode)) {
		734	case 0:
		735	list_move(&cursor_page->lru, dst);
		736	nr_taken++;
		737	scan++;
		738	break;
		739
		740	case -EBUSY:
		741	/* else it is being freed elsewhere */
		742	list_move(&cursor_page->lru, src);
		743	default:
		744	break;
		745	}
		746	}
647	}	747	}
648		748
649	*scanned = scan;	749	*scanned = scan;
@@ -651,6 +751,24 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
651	}	751	}
652		752
653	/*	753	/*
		754	* clear_active_flags() is a helper for shrink_active_list(), clearing
		755	* any active bits from the pages in the list.
		756	*/
		757	static unsigned long clear_active_flags(struct list_head *page_list)
		758	{
		759	int nr_active = 0;
		760	struct page *page;
		761
		762	list_for_each_entry(page, page_list, lru)
		763	if (PageActive(page)) {
		764	ClearPageActive(page);
		765	nr_active++;
		766	}
		767
		768	return nr_active;
		769	}
		770
		771	/*
654	* shrink_inactive_list() is a helper for shrink_zone(). It returns the number	772	* shrink_inactive_list() is a helper for shrink_zone(). It returns the number
655	* of reclaimed pages	773	* of reclaimed pages
656	*/	774	*/
@@ -671,11 +789,18 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
671	unsigned long nr_taken;	789	unsigned long nr_taken;
672	unsigned long nr_scan;	790	unsigned long nr_scan;
673	unsigned long nr_freed;	791	unsigned long nr_freed;
		792	unsigned long nr_active;
674		793
675	nr_taken = isolate_lru_pages(sc->swap_cluster_max,	794	nr_taken = isolate_lru_pages(sc->swap_cluster_max,
676	&zone->inactive_list,	795	&zone->inactive_list,
677	&page_list, &nr_scan);	796	&page_list, &nr_scan, sc->order,
678	__mod_zone_page_state(zone, NR_INACTIVE, -nr_taken);	797	(sc->order > PAGE_ALLOC_COSTLY_ORDER)?
		798	ISOLATE_BOTH : ISOLATE_INACTIVE);
		799	nr_active = clear_active_flags(&page_list);
		800
		801	__mod_zone_page_state(zone, NR_ACTIVE, -nr_active);
		802	__mod_zone_page_state(zone, NR_INACTIVE,
		803	-(nr_taken - nr_active));
679	zone->pages_scanned += nr_scan;	804	zone->pages_scanned += nr_scan;
680	spin_unlock_irq(&zone->lru_lock);	805	spin_unlock_irq(&zone->lru_lock);
681		806
@@ -820,7 +945,7 @@ force_reclaim_mapped:
820	lru_add_drain();	945	lru_add_drain();
821	spin_lock_irq(&zone->lru_lock);	946	spin_lock_irq(&zone->lru_lock);
822	pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,	947	pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
823	&l_hold, &pgscanned);	948	&l_hold, &pgscanned, sc->order, ISOLATE_ACTIVE);
824	zone->pages_scanned += pgscanned;	949	zone->pages_scanned += pgscanned;
825	__mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);	950	__mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
826	spin_unlock_irq(&zone->lru_lock);	951	spin_unlock_irq(&zone->lru_lock);
@@ -1011,7 +1136,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
1011	* holds filesystem locks which prevent writeout this might not work, and the	1136	* holds filesystem locks which prevent writeout this might not work, and the
1012	* allocation attempt will fail.	1137	* allocation attempt will fail.
1013	*/	1138	*/
1014	unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)	1139	unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
1015	{	1140	{
1016	int priority;	1141	int priority;
1017	int ret = 0;	1142	int ret = 0;
@@ -1026,6 +1151,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
1026	.swap_cluster_max = SWAP_CLUSTER_MAX,	1151	.swap_cluster_max = SWAP_CLUSTER_MAX,
1027	.may_swap = 1,	1152	.may_swap = 1,
1028	.swappiness = vm_swappiness,	1153	.swappiness = vm_swappiness,
		1154	.order = order,
1029	};	1155	};
1030		1156
1031	count_vm_event(ALLOCSTALL);	1157	count_vm_event(ALLOCSTALL);
@@ -1131,6 +1257,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
1131	.may_swap = 1,	1257	.may_swap = 1,
1132	.swap_cluster_max = SWAP_CLUSTER_MAX,	1258	.swap_cluster_max = SWAP_CLUSTER_MAX,
1133	.swappiness = vm_swappiness,	1259	.swappiness = vm_swappiness,
		1260	.order = order,
1134	};	1261	};
1135	/*	1262	/*
1136	* temp_priority is used to remember the scanning priority at which	1263	* temp_priority is used to remember the scanning priority at which