[PATCH] mm: __alloc_pages cleanup

Clean up of __alloc_pages. Restoration of previous behaviour, plus further cleanups by introducing an 'alloc_flags', removing the last of should_reclaim_zone. Signed-off-by: Rohit Seth <rohit.seth@intel.com> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Rohit Seth <rohit.seth@intel.com> 2005-11-13 19:06:43 -0500
committer: Linus Torvalds <torvalds@g5.osdl.org> 2005-11-13 21:14:12 -0500
commit: 7fb1d9fca5c6e3b06773b69165a73f3fb786b8ee (patch)
tree: 21f8b7deeb8a7e1197edfccc610365a12d60322e
parent: 51c6f666fceb3184eeff045dad4432b602cd648e (diff)
3 files changed, 89 insertions, 114 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f5fa3082fd6a..6cfb114a0c34 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -329,7 +329,7 @@ void get_zone_counts(unsigned long *active, unsigned long *inactive,
 void build_all_zonelists(void);
 void wakeup_kswapd(struct zone *zone, int order);
 int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
-                int alloc_type, int can_try_harder, gfp_t gfp_high);
+                int classzone_idx, int alloc_flags);
 #ifdef CONFIG_HAVE_MEMORY_PRESENT
 void memory_present(int nid, unsigned long start, unsigned long end);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b37dc0f78d07..845b91749a42 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -732,9 +732,7 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
                }
                local_irq_restore(flags);
                put_cpu();
-        }
+        } else {
-        if (page == NULL) {
                spin_lock_irqsave(&zone->lock, flags);
                page = __rmqueue(zone, order);
                spin_unlock_irqrestore(&zone->lock, flags);
@@ -754,20 +752,25 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
        return page;
 }
+#define ALLOC_NO_WATERMARKS     0x01 /* don't check watermarks at all */
+#define ALLOC_HARDER            0x02 /* try to alloc harder */
+#define ALLOC_HIGH              0x04 /* __GFP_HIGH set */
+#define ALLOC_CPUSET            0x08 /* check for correct cpuset */
 /*
 * Return 1 if free pages are above 'mark'. This takes into account the order
 * of the allocation.
 */
 int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
-                      int classzone_idx, int can_try_harder, gfp_t gfp_high)
+                      int classzone_idx, int alloc_flags)
 {
        /* free_pages my go negative - that's OK */
        long min = mark, free_pages = z->free_pages - (1 << order) + 1;
        int o;
-        if (gfp_high)
+        if (alloc_flags & ALLOC_HIGH)
                min -= min / 2;
-        if (can_try_harder)
+        if (alloc_flags & ALLOC_HARDER)
                min -= min / 4;
        if (free_pages <= min + z->lowmem_reserve[classzone_idx])
@@ -785,14 +788,40 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
        return 1;
 }
-static inline int
+/*
-should_reclaim_zone(struct zone *z, gfp_t gfp_mask)
+ * get_page_from_freeliest goes through the zonelist trying to allocate
+ * a page.
+ */
+static struct page *
+get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
+                struct zonelist *zonelist, int alloc_flags)
 {
-        if (!z->reclaim_pages)
+        struct zone **z = zonelist->zones;
-                return 0;
+        struct page *page = NULL;
-        if (gfp_mask & __GFP_NORECLAIM)
+        int classzone_idx = zone_idx(*z);
-                return 0;
-        return 1;
+        /*
+         * Go through the zonelist once, looking for a zone with enough free.
+         * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+         */
+        do {
+                if ((alloc_flags & ALLOC_CPUSET) &&
+                                !cpuset_zone_allowed(*z, gfp_mask))
+                        continue;
+                if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
+                        if (!zone_watermark_ok(*z, order, (*z)->pages_low,
+                                    classzone_idx, alloc_flags))
+                                continue;
+                }
+                page = buffered_rmqueue(*z, order, gfp_mask);
+                if (page) {
+                        zone_statistics(zonelist, *z);
+                        break;
+                }
+        } while (*(++z) != NULL);
+        return page;
 }
 /*
@@ -803,92 +832,60 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
                struct zonelist *zonelist)
 {
        const gfp_t wait = gfp_mask & __GFP_WAIT;
-        struct zone **zones, *z;
+        struct zone **z;
        struct page *page;
        struct reclaim_state reclaim_state;
        struct task_struct *p = current;
-        int i;
-        int classzone_idx;
        int do_retry;
-        int can_try_harder;
+        int alloc_flags;
        int did_some_progress;
        might_sleep_if(wait);
-        /*
+        z = zonelist->zones;  /* the list of zones suitable for gfp_mask */
-         * The caller may dip into page reserves a bit more if the caller
-         * cannot run direct reclaim, or is the caller has realtime scheduling
-         * policy
-         */
-        can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
-        zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */
-        if (unlikely(zones[0] == NULL)) {
+        if (unlikely(*z == NULL)) {
                /* Should this ever happen?? */
                return NULL;
        }
+restart:
+        page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+                                zonelist, ALLOC_CPUSET);
+        if (page)
+                goto got_pg;
-        classzone_idx = zone_idx(zones[0]);
+        do
+                wakeup_kswapd(*z, order);
+        while (*(++z));
-restart:
        /*
-         * Go through the zonelist once, looking for a zone with enough free.
+         * OK, we're below the kswapd watermark and have kicked background
-         * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+         * reclaim. Now things get more complex, so set up alloc_flags according
+         * to how we want to proceed.
+         *
+         * The caller may dip into page reserves a bit more if the caller
+         * cannot run direct reclaim, or if the caller has realtime scheduling
+         * policy.
         */
-        for (i = 0; (z = zones[i]) != NULL; i++) {
+        alloc_flags = 0;
-                int do_reclaim = should_reclaim_zone(z, gfp_mask);
+        if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait)
+                alloc_flags |= ALLOC_HARDER;
-                if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
+        if (gfp_mask & __GFP_HIGH)
-                        continue;
+                alloc_flags |= ALLOC_HIGH;
+        if (wait)
-                /*
+                alloc_flags |= ALLOC_CPUSET;
-                 * If the zone is to attempt early page reclaim then this loop
-                 * will try to reclaim pages and check the watermark a second
-                 * time before giving up and falling back to the next zone.
-                 */
-zone_reclaim_retry:
-                if (!zone_watermark_ok(z, order, z->pages_low,
-                                       classzone_idx, 0, 0)) {
-                        if (!do_reclaim)
-                                continue;
-                        else {
-                                zone_reclaim(z, gfp_mask, order);
-                                /* Only try reclaim once */
-                                do_reclaim = 0;
-                                goto zone_reclaim_retry;
-                        }
-                }
-                page = buffered_rmqueue(z, order, gfp_mask);
-                if (page)
-                        goto got_pg;
-        }
-        for (i = 0; (z = zones[i]) != NULL; i++)
-                wakeup_kswapd(z, order);
        /*
         * Go through the zonelist again. Let __GFP_HIGH and allocations
-         * coming from realtime tasks to go deeper into reserves
+         * coming from realtime tasks go deeper into reserves.
         *
         * This is the last chance, in general, before the goto nopage.
         * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
         * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
         */
-        for (i = 0; (z = zones[i]) != NULL; i++) {
+        page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
-                if (!zone_watermark_ok(z, order, z->pages_min,
+        if (page)
-                                       classzone_idx, can_try_harder,
+                goto got_pg;
-                                       gfp_mask & __GFP_HIGH))
-                        continue;
-                if (wait && !cpuset_zone_allowed(z, gfp_mask))
-                        continue;
-                page = buffered_rmqueue(z, order, gfp_mask);
-                if (page)
-                        goto got_pg;
-        }
        /* This allocation should allow future memory freeing. */
@@ -897,13 +894,10 @@ zone_reclaim_retry:
                if (!(gfp_mask & __GFP_NOMEMALLOC)) {
 nofail_alloc:
                        /* go through the zonelist yet again, ignoring mins */
-                        for (i = 0; (z = zones[i]) != NULL; i++) {
+                        page = get_page_from_freelist(gfp_mask, order,
-                                if (!cpuset_zone_allowed(z, gfp_mask))
+                                zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET);
-                                        continue;
+                        if (page)
-                                page = buffered_rmqueue(z, order, gfp_mask);
+                                goto got_pg;
-                                if (page)
-                                        goto got_pg;
-                        }
                        if (gfp_mask & __GFP_NOFAIL) {
                                blk_congestion_wait(WRITE, HZ/50);
                                goto nofail_alloc;
@@ -924,7 +918,7 @@ rebalance:
        reclaim_state.reclaimed_slab = 0;
        p->reclaim_state = &reclaim_state;
-        did_some_progress = try_to_free_pages(zones, gfp_mask);
+        did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
        p->reclaim_state = NULL;
        p->flags &= ~PF_MEMALLOC;
@@ -932,19 +926,10 @@ rebalance:
        cond_resched();
        if (likely(did_some_progress)) {
-                for (i = 0; (z = zones[i]) != NULL; i++) {
+                page = get_page_from_freelist(gfp_mask, order,
-                        if (!zone_watermark_ok(z, order, z->pages_min,
+                                                zonelist, alloc_flags);
-                                               classzone_idx, can_try_harder,
+                if (page)
-                                               gfp_mask & __GFP_HIGH))
+                        goto got_pg;
-                                continue;
-                        if (!cpuset_zone_allowed(z, gfp_mask))
-                                continue;
-                        page = buffered_rmqueue(z, order, gfp_mask);
-                        if (page)
-                                goto got_pg;
-                }
        } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
                /*
                 * Go through the zonelist yet one more time, keep
@@ -952,18 +937,10 @@ rebalance:
                 * a parallel oom killing, we must fail if we're still
                 * under heavy pressure.
                 */
-                for (i = 0; (z = zones[i]) != NULL; i++) {
+                page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
-                        if (!zone_watermark_ok(z, order, z->pages_high,
+                                                zonelist, ALLOC_CPUSET);
-                                               classzone_idx, 0, 0))
+                if (page)
-                                continue;
+                        goto got_pg;
-                        if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
-                                continue;
-                        page = buffered_rmqueue(z, order, gfp_mask);
-                        if (page)
-                                goto got_pg;
-                }
                out_of_memory(gfp_mask, order);
                goto restart;
@@ -996,9 +973,7 @@ nopage:
                dump_stack();
                show_mem();
        }
-        return NULL;
 got_pg:
-        zone_statistics(zonelist, z);
        return page;
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 135bf8ca96ee..28130541270f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1074,7 +1074,7 @@ loop_again:
                                        continue;
                                if (!zone_watermark_ok(zone, order,
-                                                zone->pages_high, 0, 0, 0)) {
+                                                zone->pages_high, 0, 0)) {
                                        end_zone = i;
                                        goto scan;
                                }
@@ -1111,7 +1111,7 @@ scan:
                        if (nr_pages == 0) {    /* Not software suspend */
                                if (!zone_watermark_ok(zone, order,
-                                                zone->pages_high, end_zone, 0, 0))
+                                                zone->pages_high, end_zone, 0))
                                        all_zones_ok = 0;
                        }
                        zone->temp_priority = priority;
@@ -1259,7 +1259,7 @@ void wakeup_kswapd(struct zone *zone, int order)
                return;
        pgdat = zone->zone_pgdat;
-        if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0))
+        if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
                return;
        if (pgdat->kswapd_max_order < order)
                pgdat->kswapd_max_order = order;
author	Rohit Seth <rohit.seth@intel.com>	2005-11-13 19:06:43 -0500
committer	Linus Torvalds <torvalds@g5.osdl.org>	2005-11-13 21:14:12 -0500
commit	7fb1d9fca5c6e3b06773b69165a73f3fb786b8ee (patch)
tree	21f8b7deeb8a7e1197edfccc610365a12d60322e
parent	51c6f666fceb3184eeff045dad4432b602cd648e (diff)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f5fa3082fd6a..6cfb114a0c34 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h
@@ -329,7 +329,7 @@ void get_zone_counts(unsigned long active, unsigned long inactive,
329	void build_all_zonelists(void);	329	void build_all_zonelists(void);
330	void wakeup_kswapd(struct zone *zone, int order);	330	void wakeup_kswapd(struct zone *zone, int order);
331	int zone_watermark_ok(struct zone *z, int order, unsigned long mark,	331	int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
332	int alloc_type, int can_try_harder, gfp_t gfp_high);	332	int classzone_idx, int alloc_flags);
333		333
334	#ifdef CONFIG_HAVE_MEMORY_PRESENT	334	#ifdef CONFIG_HAVE_MEMORY_PRESENT
335	void memory_present(int nid, unsigned long start, unsigned long end);	335	void memory_present(int nid, unsigned long start, unsigned long end);


diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b37dc0f78d07..845b91749a42 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -732,9 +732,7 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
732	}	732	}
733	local_irq_restore(flags);	733	local_irq_restore(flags);
734	put_cpu();	734	put_cpu();
735	}	735	} else {
736
737	if (page == NULL) {
738	spin_lock_irqsave(&zone->lock, flags);	736	spin_lock_irqsave(&zone->lock, flags);
739	page = __rmqueue(zone, order);	737	page = __rmqueue(zone, order);
740	spin_unlock_irqrestore(&zone->lock, flags);	738	spin_unlock_irqrestore(&zone->lock, flags);
@@ -754,20 +752,25 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
754	return page;	752	return page;
755	}	753	}
756		754
		755	#define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */
		756	#define ALLOC_HARDER 0x02 /* try to alloc harder */
		757	#define ALLOC_HIGH 0x04 /* __GFP_HIGH set */
		758	#define ALLOC_CPUSET 0x08 /* check for correct cpuset */
		759
757	/*	760	/*
758	* Return 1 if free pages are above 'mark'. This takes into account the order	761	* Return 1 if free pages are above 'mark'. This takes into account the order
759	* of the allocation.	762	* of the allocation.
760	*/	763	*/
761	int zone_watermark_ok(struct zone *z, int order, unsigned long mark,	764	int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
762	int classzone_idx, int can_try_harder, gfp_t gfp_high)	765	int classzone_idx, int alloc_flags)
763	{	766	{
764	/* free_pages my go negative - that's OK */	767	/* free_pages my go negative - that's OK */
765	long min = mark, free_pages = z->free_pages - (1 << order) + 1;	768	long min = mark, free_pages = z->free_pages - (1 << order) + 1;
766	int o;	769	int o;
767		770
768	if (gfp_high)	771	if (alloc_flags & ALLOC_HIGH)
769	min -= min / 2;	772	min -= min / 2;
770	if (can_try_harder)	773	if (alloc_flags & ALLOC_HARDER)
771	min -= min / 4;	774	min -= min / 4;
772		775
773	if (free_pages <= min + z->lowmem_reserve[classzone_idx])	776	if (free_pages <= min + z->lowmem_reserve[classzone_idx])
@@ -785,14 +788,40 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
785	return 1;	788	return 1;
786	}	789	}
787		790
788	static inline int	791	/*
789	should_reclaim_zone(struct zone *z, gfp_t gfp_mask)	792	* get_page_from_freeliest goes through the zonelist trying to allocate
		793	* a page.
		794	*/
		795	static struct page *
		796	get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
		797	struct zonelist *zonelist, int alloc_flags)
790	{	798	{
791	if (!z->reclaim_pages)	799	struct zone **z = zonelist->zones;
792	return 0;	800	struct page *page = NULL;
793	if (gfp_mask & __GFP_NORECLAIM)	801	int classzone_idx = zone_idx(*z);
794	return 0;	802
795	return 1;	803	/*
		804	* Go through the zonelist once, looking for a zone with enough free.
		805	* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
		806	*/
		807	do {
		808	if ((alloc_flags & ALLOC_CPUSET) &&
		809	!cpuset_zone_allowed(*z, gfp_mask))
		810	continue;
		811
		812	if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
		813	if (!zone_watermark_ok(z, order, (z)->pages_low,
		814	classzone_idx, alloc_flags))
		815	continue;
		816	}
		817
		818	page = buffered_rmqueue(*z, order, gfp_mask);
		819	if (page) {
		820	zone_statistics(zonelist, *z);
		821	break;
		822	}
		823	} while (*(++z) != NULL);
		824	return page;
796	}	825	}
797		826
798	/*	827	/*
@@ -803,92 +832,60 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
803	struct zonelist *zonelist)	832	struct zonelist *zonelist)
804	{	833	{
805	const gfp_t wait = gfp_mask & __GFP_WAIT;	834	const gfp_t wait = gfp_mask & __GFP_WAIT;
806	struct zone *zones, z;	835	struct zone **z;
807	struct page *page;	836	struct page *page;
808	struct reclaim_state reclaim_state;	837	struct reclaim_state reclaim_state;
809	struct task_struct *p = current;	838	struct task_struct *p = current;
810	int i;
811	int classzone_idx;
812	int do_retry;	839	int do_retry;
813	int can_try_harder;	840	int alloc_flags;
814	int did_some_progress;	841	int did_some_progress;
815		842
816	might_sleep_if(wait);	843	might_sleep_if(wait);
817		844
818	/*	845	z = zonelist->zones; /* the list of zones suitable for gfp_mask */
819	* The caller may dip into page reserves a bit more if the caller
820	* cannot run direct reclaim, or is the caller has realtime scheduling
821	* policy
822	*/
823	can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) \|\| !wait;
824
825	zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
826		846
827	if (unlikely(zones[0] == NULL)) {	847	if (unlikely(*z == NULL)) {
828	/* Should this ever happen?? */	848	/* Should this ever happen?? */
829	return NULL;	849	return NULL;
830	}	850	}
		851	restart:
		852	page = get_page_from_freelist(gfp_mask\|__GFP_HARDWALL, order,
		853	zonelist, ALLOC_CPUSET);
		854	if (page)
		855	goto got_pg;
831		856
832	classzone_idx = zone_idx(zones[0]);	857	do
		858	wakeup_kswapd(*z, order);
		859	while (*(++z));
833		860
834	restart:
835	/*	861	/*
836	* Go through the zonelist once, looking for a zone with enough free.	862	* OK, we're below the kswapd watermark and have kicked background
837	* See also cpuset_zone_allowed() comment in kernel/cpuset.c.	863	* reclaim. Now things get more complex, so set up alloc_flags according
		864	* to how we want to proceed.
		865	*
		866	* The caller may dip into page reserves a bit more if the caller
		867	* cannot run direct reclaim, or if the caller has realtime scheduling
		868	* policy.
838	*/	869	*/
839	for (i = 0; (z = zones[i]) != NULL; i++) {	870	alloc_flags = 0;
840	int do_reclaim = should_reclaim_zone(z, gfp_mask);	871	if ((unlikely(rt_task(p)) && !in_interrupt()) \|\| !wait)
841		872	alloc_flags \|= ALLOC_HARDER;
842	if (!cpuset_zone_allowed(z, __GFP_HARDWALL))	873	if (gfp_mask & __GFP_HIGH)
843	continue;	874	alloc_flags \|= ALLOC_HIGH;
844		875	if (wait)
845	/*	876	alloc_flags \|= ALLOC_CPUSET;
846	* If the zone is to attempt early page reclaim then this loop
847	* will try to reclaim pages and check the watermark a second
848	* time before giving up and falling back to the next zone.
849	*/
850	zone_reclaim_retry:
851	if (!zone_watermark_ok(z, order, z->pages_low,
852	classzone_idx, 0, 0)) {
853	if (!do_reclaim)
854	continue;
855	else {
856	zone_reclaim(z, gfp_mask, order);
857	/* Only try reclaim once */
858	do_reclaim = 0;
859	goto zone_reclaim_retry;
860	}
861	}
862
863	page = buffered_rmqueue(z, order, gfp_mask);
864	if (page)
865	goto got_pg;
866	}
867
868	for (i = 0; (z = zones[i]) != NULL; i++)
869	wakeup_kswapd(z, order);
870		877
871	/*	878	/*
872	* Go through the zonelist again. Let __GFP_HIGH and allocations	879	* Go through the zonelist again. Let __GFP_HIGH and allocations
873	* coming from realtime tasks to go deeper into reserves	880	* coming from realtime tasks go deeper into reserves.
874	*	881	*
875	* This is the last chance, in general, before the goto nopage.	882	* This is the last chance, in general, before the goto nopage.
876	* Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.	883	* Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
877	* See also cpuset_zone_allowed() comment in kernel/cpuset.c.	884	* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
878	*/	885	*/
879	for (i = 0; (z = zones[i]) != NULL; i++) {	886	page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
880	if (!zone_watermark_ok(z, order, z->pages_min,	887	if (page)
881	classzone_idx, can_try_harder,	888	goto got_pg;
882	gfp_mask & __GFP_HIGH))
883	continue;
884
885	if (wait && !cpuset_zone_allowed(z, gfp_mask))
886	continue;
887
888	page = buffered_rmqueue(z, order, gfp_mask);
889	if (page)
890	goto got_pg;
891	}
892		889
893	/* This allocation should allow future memory freeing. */	890	/* This allocation should allow future memory freeing. */
894		891
@@ -897,13 +894,10 @@ zone_reclaim_retry:
897	if (!(gfp_mask & __GFP_NOMEMALLOC)) {	894	if (!(gfp_mask & __GFP_NOMEMALLOC)) {
898	nofail_alloc:	895	nofail_alloc:
899	/* go through the zonelist yet again, ignoring mins */	896	/* go through the zonelist yet again, ignoring mins */
900	for (i = 0; (z = zones[i]) != NULL; i++) {	897	page = get_page_from_freelist(gfp_mask, order,
901	if (!cpuset_zone_allowed(z, gfp_mask))	898	zonelist, ALLOC_NO_WATERMARKS\|ALLOC_CPUSET);
902	continue;	899	if (page)
903	page = buffered_rmqueue(z, order, gfp_mask);	900	goto got_pg;
904	if (page)
905	goto got_pg;
906	}
907	if (gfp_mask & __GFP_NOFAIL) {	901	if (gfp_mask & __GFP_NOFAIL) {
908	blk_congestion_wait(WRITE, HZ/50);	902	blk_congestion_wait(WRITE, HZ/50);
909	goto nofail_alloc;	903	goto nofail_alloc;
@@ -924,7 +918,7 @@ rebalance:
924	reclaim_state.reclaimed_slab = 0;	918	reclaim_state.reclaimed_slab = 0;
925	p->reclaim_state = &reclaim_state;	919	p->reclaim_state = &reclaim_state;
926		920
927	did_some_progress = try_to_free_pages(zones, gfp_mask);	921	did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
928		922
929	p->reclaim_state = NULL;	923	p->reclaim_state = NULL;
930	p->flags &= ~PF_MEMALLOC;	924	p->flags &= ~PF_MEMALLOC;
@@ -932,19 +926,10 @@ rebalance:
932	cond_resched();	926	cond_resched();
933		927
934	if (likely(did_some_progress)) {	928	if (likely(did_some_progress)) {
935	for (i = 0; (z = zones[i]) != NULL; i++) {	929	page = get_page_from_freelist(gfp_mask, order,
936	if (!zone_watermark_ok(z, order, z->pages_min,	930	zonelist, alloc_flags);
937	classzone_idx, can_try_harder,	931	if (page)
938	gfp_mask & __GFP_HIGH))	932	goto got_pg;
939	continue;
940
941	if (!cpuset_zone_allowed(z, gfp_mask))
942	continue;
943
944	page = buffered_rmqueue(z, order, gfp_mask);
945	if (page)
946	goto got_pg;
947	}
948	} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {	933	} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
949	/*	934	/*
950	* Go through the zonelist yet one more time, keep	935	* Go through the zonelist yet one more time, keep
@@ -952,18 +937,10 @@ rebalance:
952	* a parallel oom killing, we must fail if we're still	937	* a parallel oom killing, we must fail if we're still
953	* under heavy pressure.	938	* under heavy pressure.
954	*/	939	*/
955	for (i = 0; (z = zones[i]) != NULL; i++) {	940	page = get_page_from_freelist(gfp_mask\|__GFP_HARDWALL, order,
956	if (!zone_watermark_ok(z, order, z->pages_high,	941	zonelist, ALLOC_CPUSET);
957	classzone_idx, 0, 0))	942	if (page)
958	continue;	943	goto got_pg;
959
960	if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
961	continue;
962
963	page = buffered_rmqueue(z, order, gfp_mask);
964	if (page)
965	goto got_pg;
966	}
967		944
968	out_of_memory(gfp_mask, order);	945	out_of_memory(gfp_mask, order);
969	goto restart;	946	goto restart;
@@ -996,9 +973,7 @@ nopage:
996	dump_stack();	973	dump_stack();
997	show_mem();	974	show_mem();
998	}	975	}
999	return NULL;
1000	got_pg:	976	got_pg:
1001	zone_statistics(zonelist, z);
1002	return page;	977	return page;
1003	}	978	}
1004		979


diff --git a/mm/vmscan.c b/mm/vmscan.c index 135bf8ca96ee..28130541270f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c
@@ -1074,7 +1074,7 @@ loop_again:
1074	continue;	1074	continue;
1075		1075
1076	if (!zone_watermark_ok(zone, order,	1076	if (!zone_watermark_ok(zone, order,
1077	zone->pages_high, 0, 0, 0)) {	1077	zone->pages_high, 0, 0)) {
1078	end_zone = i;	1078	end_zone = i;
1079	goto scan;	1079	goto scan;
1080	}	1080	}
@@ -1111,7 +1111,7 @@ scan:
1111		1111
1112	if (nr_pages == 0) { /* Not software suspend */	1112	if (nr_pages == 0) { /* Not software suspend */
1113	if (!zone_watermark_ok(zone, order,	1113	if (!zone_watermark_ok(zone, order,
1114	zone->pages_high, end_zone, 0, 0))	1114	zone->pages_high, end_zone, 0))
1115	all_zones_ok = 0;	1115	all_zones_ok = 0;
1116	}	1116	}
1117	zone->temp_priority = priority;	1117	zone->temp_priority = priority;
@@ -1259,7 +1259,7 @@ void wakeup_kswapd(struct zone *zone, int order)
1259	return;	1259	return;
1260		1260
1261	pgdat = zone->zone_pgdat;	1261	pgdat = zone->zone_pgdat;
1262	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0))	1262	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
1263	return;	1263	return;
1264	if (pgdat->kswapd_max_order < order)	1264	if (pgdat->kswapd_max_order < order)
1265	pgdat->kswapd_max_order = order;	1265	pgdat->kswapd_max_order = order;