5 files changed, 163 insertions, 26 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 94344b2e0b46..d654a3b6209e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -356,7 +356,7 @@ static void free_more_memory(void)
        for_each_online_pgdat(pgdat) {
                zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones;
                if (*zones)
-                        try_to_free_pages(zones, GFP_NOFS);
+                        try_to_free_pages(zones, 0, GFP_NOFS);
        }
 }
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d71ff763c9df..da8eb8ad9e9b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -24,6 +24,14 @@
 #endif
 #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
+/*
+ * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
+ * costly to service.  That is between allocation orders which should
+ * coelesce naturally under reasonable reclaim pressure and those which
+ * will not.
+ */
+#define PAGE_ALLOC_COSTLY_ORDER 3
 struct free_area {
        struct list_head        free_list;
        unsigned long           nr_free;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 006868881346..665f85f2a3af 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -188,7 +188,8 @@ extern int rotate_reclaimable_page(struct page *page);
 extern void swap_setup(void);
 /* linux/mm/vmscan.c */
-extern unsigned long try_to_free_pages(struct zone **, gfp_t);
+extern unsigned long try_to_free_pages(struct zone **zones, int order,
+                                        gfp_t gfp_mask);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
 extern int remove_mapping(struct address_space *mapping, struct page *page);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ac4f8c6b5c10..1a889c3fec59 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1333,7 +1333,7 @@ nofail_alloc:
        reclaim_state.reclaimed_slab = 0;
        p->reclaim_state = &reclaim_state;
-        did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
+        did_some_progress = try_to_free_pages(zonelist->zones, order, gfp_mask);
        p->reclaim_state = NULL;
        p->flags &= ~PF_MEMALLOC;
@@ -1370,7 +1370,8 @@ nofail_alloc:
         */
        do_retry = 0;
        if (!(gfp_mask & __GFP_NORETRY)) {
-                if ((order <= 3) || (gfp_mask & __GFP_REPEAT))
+                if ((order <= PAGE_ALLOC_COSTLY_ORDER) ||
+                                                (gfp_mask & __GFP_REPEAT))
                        do_retry = 1;
                if (gfp_mask & __GFP_NOFAIL)
                        do_retry = 1;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1be5a6376ef0..1d9971d8924b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -66,6 +66,8 @@ struct scan_control {
        int swappiness;
        int all_unreclaimable;
+        int order;
 };
 /*
@@ -481,7 +483,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                referenced = page_referenced(page, 1);
                /* In active use or really unfreeable?  Activate it. */
-                if (referenced && page_mapping_inuse(page))
+                if (sc->order <= PAGE_ALLOC_COSTLY_ORDER &&
+                                        referenced && page_mapping_inuse(page))
                        goto activate_locked;
 #ifdef CONFIG_SWAP
@@ -514,7 +517,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                }
                if (PageDirty(page)) {
-                        if (referenced)
+                        if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced)
                                goto keep_locked;
                        if (!may_enter_fs)
                                goto keep_locked;
@@ -598,6 +601,51 @@ keep:
        return nr_reclaimed;
 }
+/* LRU Isolation modes. */
+#define ISOLATE_INACTIVE 0      /* Isolate inactive pages. */
+#define ISOLATE_ACTIVE 1        /* Isolate active pages. */
+#define ISOLATE_BOTH 2          /* Isolate both active and inactive pages. */
+/*
+ * Attempt to remove the specified page from its LRU.  Only take this page
+ * if it is of the appropriate PageActive status.  Pages which are being
+ * freed elsewhere are also ignored.
+ *
+ * page:        page to consider
+ * mode:        one of the LRU isolation modes defined above
+ *
+ * returns 0 on success, -ve errno on failure.
+ */
+static int __isolate_lru_page(struct page *page, int mode)
+{
+        int ret = -EINVAL;
+        /* Only take pages on the LRU. */
+        if (!PageLRU(page))
+                return ret;
+        /*
+         * When checking the active state, we need to be sure we are
+         * dealing with comparible boolean values.  Take the logical not
+         * of each.
+         */
+        if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
+                return ret;
+        ret = -EBUSY;
+        if (likely(get_page_unless_zero(page))) {
+                /*
+                 * Be careful not to clear PageLRU until after we're
+                 * sure the page is not being freed elsewhere -- the
+                 * page release code relies on it.
+                 */
+                ClearPageLRU(page);
+                ret = 0;
+        }
+        return ret;
+}
 /*
 * zone->lru_lock is heavily contended.  Some of the functions that
 * shrink the lists perform better by taking out a batch of pages
@@ -612,38 +660,90 @@ keep:
 * @src:        The LRU list to pull pages off.
 * @dst:        The temp list to put pages on to.
 * @scanned:    The number of pages that were scanned.
+ * @order:      The caller's attempted allocation order
+ * @mode:       One of the LRU isolation modes
 *
 * returns how many pages were moved onto *@dst.
 */
 static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                struct list_head *src, struct list_head *dst,
-                unsigned long *scanned)
+                unsigned long *scanned, int order, int mode)
 {
        unsigned long nr_taken = 0;
-        struct page *page;
        unsigned long scan;
        for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
-                struct list_head *target;
+                struct page *page;
+                unsigned long pfn;
+                unsigned long end_pfn;
+                unsigned long page_pfn;
+                int zone_id;
                page = lru_to_page(src);
                prefetchw_prev_lru_page(page, src, flags);
                VM_BUG_ON(!PageLRU(page));
-                list_del(&page->lru);
+                switch (__isolate_lru_page(page, mode)) {
-                target = src;
+                case 0:
-                if (likely(get_page_unless_zero(page))) {
+                        list_move(&page->lru, dst);
-                        /*
-                         * Be careful not to clear PageLRU until after we're
-                         * sure the page is not being freed elsewhere -- the
-                         * page release code relies on it.
-                         */
-                        ClearPageLRU(page);
-                        target = dst;
                        nr_taken++;
-                } /* else it is being freed elsewhere */
+                        break;
-                list_add(&page->lru, target);
+                case -EBUSY:
+                        /* else it is being freed elsewhere */
+                        list_move(&page->lru, src);
+                        continue;
+                default:
+                        BUG();
+                }
+                if (!order)
+                        continue;
+                /*
+                 * Attempt to take all pages in the order aligned region
+                 * surrounding the tag page.  Only take those pages of
+                 * the same active state as that tag page.  We may safely
+                 * round the target page pfn down to the requested order
+                 * as the mem_map is guarenteed valid out to MAX_ORDER,
+                 * where that page is in a different zone we will detect
+                 * it from its zone id and abort this block scan.
+                 */
+                zone_id = page_zone_id(page);
+                page_pfn = page_to_pfn(page);
+                pfn = page_pfn & ~((1 << order) - 1);
+                end_pfn = pfn + (1 << order);
+                for (; pfn < end_pfn; pfn++) {
+                        struct page *cursor_page;
+                        /* The target page is in the block, ignore it. */
+                        if (unlikely(pfn == page_pfn))
+                                continue;
+                        /* Avoid holes within the zone. */
+                        if (unlikely(!pfn_valid_within(pfn)))
+                                break;
+                        cursor_page = pfn_to_page(pfn);
+                        /* Check that we have not crossed a zone boundary. */
+                        if (unlikely(page_zone_id(cursor_page) != zone_id))
+                                continue;
+                        switch (__isolate_lru_page(cursor_page, mode)) {
+                        case 0:
+                                list_move(&cursor_page->lru, dst);
+                                nr_taken++;
+                                scan++;
+                                break;
+                        case -EBUSY:
+                                /* else it is being freed elsewhere */
+                                list_move(&cursor_page->lru, src);
+                        default:
+                                break;
+                        }
+                }
        }
        *scanned = scan;
@@ -651,6 +751,24 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 }
 /*
+ * clear_active_flags() is a helper for shrink_active_list(), clearing
+ * any active bits from the pages in the list.
+ */
+static unsigned long clear_active_flags(struct list_head *page_list)
+{
+        int nr_active = 0;
+        struct page *page;
+        list_for_each_entry(page, page_list, lru)
+                if (PageActive(page)) {
+                        ClearPageActive(page);
+                        nr_active++;
+                }
+        return nr_active;
+}
+/*
 * shrink_inactive_list() is a helper for shrink_zone().  It returns the number
 * of reclaimed pages
 */
@@ -671,11 +789,18 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
                unsigned long nr_taken;
                unsigned long nr_scan;
                unsigned long nr_freed;
+                unsigned long nr_active;
                nr_taken = isolate_lru_pages(sc->swap_cluster_max,
-                                             &zone->inactive_list,
+                             &zone->inactive_list,
-                                             &page_list, &nr_scan);
+                             &page_list, &nr_scan, sc->order,
-                __mod_zone_page_state(zone, NR_INACTIVE, -nr_taken);
+                             (sc->order > PAGE_ALLOC_COSTLY_ORDER)?
+                                             ISOLATE_BOTH : ISOLATE_INACTIVE);
+                nr_active = clear_active_flags(&page_list);
+                __mod_zone_page_state(zone, NR_ACTIVE, -nr_active);
+                __mod_zone_page_state(zone, NR_INACTIVE,
+                                                -(nr_taken - nr_active));
                zone->pages_scanned += nr_scan;
                spin_unlock_irq(&zone->lru_lock);
@@ -820,7 +945,7 @@ force_reclaim_mapped:
        lru_add_drain();
        spin_lock_irq(&zone->lru_lock);
        pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
-                                    &l_hold, &pgscanned);
+                            &l_hold, &pgscanned, sc->order, ISOLATE_ACTIVE);
        zone->pages_scanned += pgscanned;
        __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
        spin_unlock_irq(&zone->lru_lock);
@@ -1011,7 +1136,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
 * holds filesystem locks which prevent writeout this might not work, and the
 * allocation attempt will fail.
 */
-unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
+unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
 {
        int priority;
        int ret = 0;
@@ -1026,6 +1151,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
                .swap_cluster_max = SWAP_CLUSTER_MAX,
                .may_swap = 1,
                .swappiness = vm_swappiness,
+                .order = order,
        };
        count_vm_event(ALLOCSTALL);
@@ -1131,6 +1257,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
                .may_swap = 1,
                .swap_cluster_max = SWAP_CLUSTER_MAX,
                .swappiness = vm_swappiness,
+                .order = order,
        };
        /*
         * temp_priority is used to remember the scanning priority at which

diff --git a/fs/buffer.c b/fs/buffer.c index 94344b2e0b46..d654a3b6209e 100644 --- a/fs/buffer.c +++ b/fs/buffer.c
@@ -356,7 +356,7 @@ static void free_more_memory(void)
356	for_each_online_pgdat(pgdat) {	356	for_each_online_pgdat(pgdat) {
357	zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones;	357	zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones;
358	if (*zones)	358	if (*zones)
359	try_to_free_pages(zones, GFP_NOFS);	359	try_to_free_pages(zones, 0, GFP_NOFS);
360	}	360	}
361	}	361	}
362		362


diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d71ff763c9df..da8eb8ad9e9b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h
@@ -24,6 +24,14 @@
24	#endif	24	#endif
25	#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))	25	#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
26		26
		27	/*
		28	* PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
		29	* costly to service. That is between allocation orders which should
		30	* coelesce naturally under reasonable reclaim pressure and those which
		31	* will not.
		32	*/
		33	#define PAGE_ALLOC_COSTLY_ORDER 3
		34
27	struct free_area {	35	struct free_area {
28	struct list_head free_list;	36	struct list_head free_list;
29	unsigned long nr_free;	37	unsigned long nr_free;


diff --git a/include/linux/swap.h b/include/linux/swap.h index 006868881346..665f85f2a3af 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h
@@ -188,7 +188,8 @@ extern int rotate_reclaimable_page(struct page *page);
188	extern void swap_setup(void);	188	extern void swap_setup(void);
189		189
190	/* linux/mm/vmscan.c */	190	/* linux/mm/vmscan.c */
191	extern unsigned long try_to_free_pages(struct zone **, gfp_t);	191	extern unsigned long try_to_free_pages(struct zone **zones, int order,
		192	gfp_t gfp_mask);
192	extern unsigned long shrink_all_memory(unsigned long nr_pages);	193	extern unsigned long shrink_all_memory(unsigned long nr_pages);
193	extern int vm_swappiness;	194	extern int vm_swappiness;
194	extern int remove_mapping(struct address_space mapping, struct page page);	195	extern int remove_mapping(struct address_space mapping, struct page page);


diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ac4f8c6b5c10..1a889c3fec59 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -1333,7 +1333,7 @@ nofail_alloc:
1333	reclaim_state.reclaimed_slab = 0;	1333	reclaim_state.reclaimed_slab = 0;
1334	p->reclaim_state = &reclaim_state;	1334	p->reclaim_state = &reclaim_state;
1335		1335
1336	did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);	1336	did_some_progress = try_to_free_pages(zonelist->zones, order, gfp_mask);
1337		1337
1338	p->reclaim_state = NULL;	1338	p->reclaim_state = NULL;
1339	p->flags &= ~PF_MEMALLOC;	1339	p->flags &= ~PF_MEMALLOC;
@@ -1370,7 +1370,8 @@ nofail_alloc:
1370	*/	1370	*/
1371	do_retry = 0;	1371	do_retry = 0;
1372	if (!(gfp_mask & __GFP_NORETRY)) {	1372	if (!(gfp_mask & __GFP_NORETRY)) {
1373	if ((order <= 3) \|\| (gfp_mask & __GFP_REPEAT))	1373	if ((order <= PAGE_ALLOC_COSTLY_ORDER) \|\|
		1374	(gfp_mask & __GFP_REPEAT))
1374	do_retry = 1;	1375	do_retry = 1;
1375	if (gfp_mask & __GFP_NOFAIL)	1376	if (gfp_mask & __GFP_NOFAIL)
1376	do_retry = 1;	1377	do_retry = 1;


diff --git a/mm/vmscan.c b/mm/vmscan.c index 1be5a6376ef0..1d9971d8924b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c
@@ -66,6 +66,8 @@ struct scan_control {
66	int swappiness;	66	int swappiness;
67		67
68	int all_unreclaimable;	68	int all_unreclaimable;
		69
		70	int order;
69	};	71	};
70		72
71	/*	73	/*
@@ -481,7 +483,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
481		483
482	referenced = page_referenced(page, 1);	484	referenced = page_referenced(page, 1);
483	/* In active use or really unfreeable? Activate it. */	485	/* In active use or really unfreeable? Activate it. */
484	if (referenced && page_mapping_inuse(page))	486	if (sc->order <= PAGE_ALLOC_COSTLY_ORDER &&
		487	referenced && page_mapping_inuse(page))
485	goto activate_locked;	488	goto activate_locked;
486		489
487	#ifdef CONFIG_SWAP	490	#ifdef CONFIG_SWAP
@@ -514,7 +517,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
514	}	517	}
515		518
516	if (PageDirty(page)) {	519	if (PageDirty(page)) {
517	if (referenced)	520	if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced)
518	goto keep_locked;	521	goto keep_locked;
519	if (!may_enter_fs)	522	if (!may_enter_fs)
520	goto keep_locked;	523	goto keep_locked;
@@ -598,6 +601,51 @@ keep:
598	return nr_reclaimed;	601	return nr_reclaimed;
599	}	602	}
600		603
		604	/* LRU Isolation modes. */
		605	#define ISOLATE_INACTIVE 0 /* Isolate inactive pages. */
		606	#define ISOLATE_ACTIVE 1 /* Isolate active pages. */
		607	#define ISOLATE_BOTH 2 /* Isolate both active and inactive pages. */
		608
		609	/*
		610	* Attempt to remove the specified page from its LRU. Only take this page
		611	* if it is of the appropriate PageActive status. Pages which are being
		612	* freed elsewhere are also ignored.
		613	*
		614	* page: page to consider
		615	* mode: one of the LRU isolation modes defined above
		616	*
		617	* returns 0 on success, -ve errno on failure.
		618	*/
		619	static int __isolate_lru_page(struct page *page, int mode)
		620	{
		621	int ret = -EINVAL;
		622
		623	/* Only take pages on the LRU. */
		624	if (!PageLRU(page))
		625	return ret;
		626
		627	/*
		628	* When checking the active state, we need to be sure we are
		629	* dealing with comparible boolean values. Take the logical not
		630	* of each.
		631	*/
		632	if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
		633	return ret;
		634
		635	ret = -EBUSY;
		636	if (likely(get_page_unless_zero(page))) {
		637	/*
		638	* Be careful not to clear PageLRU until after we're
		639	* sure the page is not being freed elsewhere -- the
		640	* page release code relies on it.
		641	*/
		642	ClearPageLRU(page);
		643	ret = 0;
		644	}
		645
		646	return ret;
		647	}
		648
601	/*	649	/*
602	* zone->lru_lock is heavily contended. Some of the functions that	650	* zone->lru_lock is heavily contended. Some of the functions that
603	* shrink the lists perform better by taking out a batch of pages	651	* shrink the lists perform better by taking out a batch of pages
@@ -612,38 +660,90 @@ keep:
612	* @src: The LRU list to pull pages off.	660	* @src: The LRU list to pull pages off.
613	* @dst: The temp list to put pages on to.	661	* @dst: The temp list to put pages on to.
614	* @scanned: The number of pages that were scanned.	662	* @scanned: The number of pages that were scanned.
		663	* @order: The caller's attempted allocation order
		664	* @mode: One of the LRU isolation modes
615	*	665	*
616	* returns how many pages were moved onto *@dst.	666	* returns how many pages were moved onto *@dst.
617	*/	667	*/
618	static unsigned long isolate_lru_pages(unsigned long nr_to_scan,	668	static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
619	struct list_head src, struct list_head dst,	669	struct list_head src, struct list_head dst,
620	unsigned long *scanned)	670	unsigned long *scanned, int order, int mode)
621	{	671	{
622	unsigned long nr_taken = 0;	672	unsigned long nr_taken = 0;
623	struct page *page;
624	unsigned long scan;	673	unsigned long scan;
625		674
626	for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {	675	for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
627	struct list_head *target;	676	struct page *page;
		677	unsigned long pfn;
		678	unsigned long end_pfn;
		679	unsigned long page_pfn;
		680	int zone_id;
		681
628	page = lru_to_page(src);	682	page = lru_to_page(src);
629	prefetchw_prev_lru_page(page, src, flags);	683	prefetchw_prev_lru_page(page, src, flags);
630		684
631	VM_BUG_ON(!PageLRU(page));	685	VM_BUG_ON(!PageLRU(page));
632		686
633	list_del(&page->lru);	687	switch (__isolate_lru_page(page, mode)) {
634	target = src;	688	case 0:
635	if (likely(get_page_unless_zero(page))) {	689	list_move(&page->lru, dst);
636	/*
637	* Be careful not to clear PageLRU until after we're
638	* sure the page is not being freed elsewhere -- the
639	* page release code relies on it.
640	*/
641	ClearPageLRU(page);
642	target = dst;
643	nr_taken++;	690	nr_taken++;
644	} /* else it is being freed elsewhere */	691	break;
645		692
646	list_add(&page->lru, target);	693	case -EBUSY:
		694	/* else it is being freed elsewhere */
		695	list_move(&page->lru, src);
		696	continue;
		697
		698	default:
		699	BUG();
		700	}
		701
		702	if (!order)
		703	continue;
		704
		705	/*
		706	* Attempt to take all pages in the order aligned region
		707	* surrounding the tag page. Only take those pages of
		708	* the same active state as that tag page. We may safely
		709	* round the target page pfn down to the requested order
		710	* as the mem_map is guarenteed valid out to MAX_ORDER,
		711	* where that page is in a different zone we will detect
		712	* it from its zone id and abort this block scan.
		713	*/
		714	zone_id = page_zone_id(page);
		715	page_pfn = page_to_pfn(page);
		716	pfn = page_pfn & ~((1 << order) - 1);
		717	end_pfn = pfn + (1 << order);
		718	for (; pfn < end_pfn; pfn++) {
		719	struct page *cursor_page;
		720
		721	/* The target page is in the block, ignore it. */
		722	if (unlikely(pfn == page_pfn))
		723	continue;
		724
		725	/* Avoid holes within the zone. */
		726	if (unlikely(!pfn_valid_within(pfn)))
		727	break;
		728
		729	cursor_page = pfn_to_page(pfn);
		730	/* Check that we have not crossed a zone boundary. */
		731	if (unlikely(page_zone_id(cursor_page) != zone_id))
		732	continue;
		733	switch (__isolate_lru_page(cursor_page, mode)) {
		734	case 0:
		735	list_move(&cursor_page->lru, dst);
		736	nr_taken++;
		737	scan++;
		738	break;
		739
		740	case -EBUSY:
		741	/* else it is being freed elsewhere */
		742	list_move(&cursor_page->lru, src);
		743	default:
		744	break;
		745	}
		746	}
647	}	747	}
648		748
649	*scanned = scan;	749	*scanned = scan;
@@ -651,6 +751,24 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
651	}	751	}
652		752
653	/*	753	/*
		754	* clear_active_flags() is a helper for shrink_active_list(), clearing
		755	* any active bits from the pages in the list.
		756	*/
		757	static unsigned long clear_active_flags(struct list_head *page_list)
		758	{
		759	int nr_active = 0;
		760	struct page *page;
		761
		762	list_for_each_entry(page, page_list, lru)
		763	if (PageActive(page)) {
		764	ClearPageActive(page);
		765	nr_active++;
		766	}
		767
		768	return nr_active;
		769	}
		770
		771	/*
654	* shrink_inactive_list() is a helper for shrink_zone(). It returns the number	772	* shrink_inactive_list() is a helper for shrink_zone(). It returns the number
655	* of reclaimed pages	773	* of reclaimed pages
656	*/	774	*/
@@ -671,11 +789,18 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
671	unsigned long nr_taken;	789	unsigned long nr_taken;
672	unsigned long nr_scan;	790	unsigned long nr_scan;
673	unsigned long nr_freed;	791	unsigned long nr_freed;
		792	unsigned long nr_active;
674		793
675	nr_taken = isolate_lru_pages(sc->swap_cluster_max,	794	nr_taken = isolate_lru_pages(sc->swap_cluster_max,
676	&zone->inactive_list,	795	&zone->inactive_list,
677	&page_list, &nr_scan);	796	&page_list, &nr_scan, sc->order,
678	__mod_zone_page_state(zone, NR_INACTIVE, -nr_taken);	797	(sc->order > PAGE_ALLOC_COSTLY_ORDER)?
		798	ISOLATE_BOTH : ISOLATE_INACTIVE);
		799	nr_active = clear_active_flags(&page_list);
		800
		801	__mod_zone_page_state(zone, NR_ACTIVE, -nr_active);
		802	__mod_zone_page_state(zone, NR_INACTIVE,
		803	-(nr_taken - nr_active));
679	zone->pages_scanned += nr_scan;	804	zone->pages_scanned += nr_scan;
680	spin_unlock_irq(&zone->lru_lock);	805	spin_unlock_irq(&zone->lru_lock);
681		806
@@ -820,7 +945,7 @@ force_reclaim_mapped:
820	lru_add_drain();	945	lru_add_drain();
821	spin_lock_irq(&zone->lru_lock);	946	spin_lock_irq(&zone->lru_lock);
822	pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,	947	pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
823	&l_hold, &pgscanned);	948	&l_hold, &pgscanned, sc->order, ISOLATE_ACTIVE);
824	zone->pages_scanned += pgscanned;	949	zone->pages_scanned += pgscanned;
825	__mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);	950	__mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
826	spin_unlock_irq(&zone->lru_lock);	951	spin_unlock_irq(&zone->lru_lock);
@@ -1011,7 +1136,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
1011	* holds filesystem locks which prevent writeout this might not work, and the	1136	* holds filesystem locks which prevent writeout this might not work, and the
1012	* allocation attempt will fail.	1137	* allocation attempt will fail.
1013	*/	1138	*/
1014	unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)	1139	unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
1015	{	1140	{
1016	int priority;	1141	int priority;
1017	int ret = 0;	1142	int ret = 0;
@@ -1026,6 +1151,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
1026	.swap_cluster_max = SWAP_CLUSTER_MAX,	1151	.swap_cluster_max = SWAP_CLUSTER_MAX,
1027	.may_swap = 1,	1152	.may_swap = 1,
1028	.swappiness = vm_swappiness,	1153	.swappiness = vm_swappiness,
		1154	.order = order,
1029	};	1155	};
1030		1156
1031	count_vm_event(ALLOCSTALL);	1157	count_vm_event(ALLOCSTALL);
@@ -1131,6 +1257,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
1131	.may_swap = 1,	1257	.may_swap = 1,
1132	.swap_cluster_max = SWAP_CLUSTER_MAX,	1258	.swap_cluster_max = SWAP_CLUSTER_MAX,
1133	.swappiness = vm_swappiness,	1259	.swappiness = vm_swappiness,
		1260	.order = order,
1134	};	1261	};
1135	/*	1262	/*
1136	* temp_priority is used to remember the scanning priority at which	1263	* temp_priority is used to remember the scanning priority at which