Merge branch 'master' into upstream

author: Jiri Kosina <jkosina@suse.cz> 2010-12-10 09:19:18 -0500
committer: Jiri Kosina <jkosina@suse.cz> 2010-12-10 09:19:18 -0500
commit: 2ade0c1d9d93b7642212657ef76f4a1e30233711 (patch)
tree: 63bc720c0ffe5f4760cac4ed617b9870b050175e /mm/page_alloc.c
parent: 504499f22c08a03e2e19dc88d31aa0ecd2ac815e (diff)
parent: 6313e3c21743cc88bb5bd8aa72948ee1e83937b6 (diff)
1 files changed, 89 insertions, 43 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2a362c52fdf4..ff7e15872398 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -104,19 +104,24 @@ gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
 * only be modified with pm_mutex held, unless the suspend/hibernate code is
 * guaranteed not to run in parallel with that modification).
 */
-void set_gfp_allowed_mask(gfp_t mask)
+static gfp_t saved_gfp_mask;
+void pm_restore_gfp_mask(void)
 {
        WARN_ON(!mutex_is_locked(&pm_mutex));
-        gfp_allowed_mask = mask;
+        if (saved_gfp_mask) {
+                gfp_allowed_mask = saved_gfp_mask;
+                saved_gfp_mask = 0;
+        }
 }
-gfp_t clear_gfp_allowed_mask(gfp_t mask)
+void pm_restrict_gfp_mask(void)
 {
-        gfp_t ret = gfp_allowed_mask;
        WARN_ON(!mutex_is_locked(&pm_mutex));
-        gfp_allowed_mask &= ~mask;
+        WARN_ON(saved_gfp_mask);
-        return ret;
+        saved_gfp_mask = gfp_allowed_mask;
+        gfp_allowed_mask &= ~GFP_IOFS;
 }
 #endif /* CONFIG_PM_SLEEP */
@@ -531,7 +536,7 @@ static inline void __free_one_page(struct page *page,
         * so it's less likely to be used soon and more likely to be merged
         * as a higher order page
         */
-        if ((order < MAX_ORDER-1) && pfn_valid_within(page_to_pfn(buddy))) {
+        if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) {
                struct page *higher_page, *higher_buddy;
                combined_idx = __find_combined_index(page_idx, order);
                higher_page = page + combined_idx - page_idx;
@@ -1907,7 +1912,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
                        preferred_zone, migratetype);
                if (!page && gfp_mask & __GFP_NOFAIL)
-                        congestion_wait(BLK_RW_ASYNC, HZ/50);
+                        wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
        } while (!page && (gfp_mask & __GFP_NOFAIL));
        return page;
@@ -1932,7 +1937,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
        const gfp_t wait = gfp_mask & __GFP_WAIT;
        /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
-        BUILD_BUG_ON(__GFP_HIGH != ALLOC_HIGH);
+        BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
        /*
         * The caller may dip into page reserves a bit more if the caller
@@ -1940,7 +1945,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
         * policy or is asking for __GFP_HIGH memory.  GFP_ATOMIC requests will
         * set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH).
         */
-        alloc_flags |= (gfp_mask & __GFP_HIGH);
+        alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
        if (!wait) {
                alloc_flags |= ALLOC_HARDER;
@@ -2095,7 +2100,7 @@ rebalance:
        pages_reclaimed += did_some_progress;
        if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
                /* Wait for some write requests to complete then retry */
-                congestion_wait(BLK_RW_ASYNC, HZ/50);
+                wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
                goto rebalance;
        }
@@ -3008,14 +3013,6 @@ static __init_refok int __build_all_zonelists(void *data)
                build_zonelist_cache(pgdat);
        }
-#ifdef CONFIG_MEMORY_HOTPLUG
-        /* Setup real pagesets for the new zone */
-        if (data) {
-                struct zone *zone = data;
-                setup_zone_pageset(zone);
-        }
-#endif
        /*
         * Initialize the boot_pagesets that are going to be used
         * for bootstrapping processors. The real pagesets for
@@ -3064,7 +3061,11 @@ void build_all_zonelists(void *data)
        } else {
                /* we have to stop all cpus to guarantee there is no user
                   of zonelist */
-                stop_machine(__build_all_zonelists, data, NULL);
+#ifdef CONFIG_MEMORY_HOTPLUG
+                if (data)
+                        setup_zone_pageset((struct zone *)data);
+#endif
+                stop_machine(__build_all_zonelists, NULL, NULL);
                /* cpuset refresh routine should be here */
        }
        vm_total_pages = nr_free_pagecache_pages();
@@ -5297,12 +5298,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
 * page allocater never alloc memory from ISOLATE block.
 */
+static int
+__count_immobile_pages(struct zone *zone, struct page *page, int count)
+{
+        unsigned long pfn, iter, found;
+        /*
+         * For avoiding noise data, lru_add_drain_all() should be called
+         * If ZONE_MOVABLE, the zone never contains immobile pages
+         */
+        if (zone_idx(zone) == ZONE_MOVABLE)
+                return true;
+        if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE)
+                return true;
+        pfn = page_to_pfn(page);
+        for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
+                unsigned long check = pfn + iter;
+                if (!pfn_valid_within(check)) {
+                        iter++;
+                        continue;
+                }
+                page = pfn_to_page(check);
+                if (!page_count(page)) {
+                        if (PageBuddy(page))
+                                iter += (1 << page_order(page)) - 1;
+                        continue;
+                }
+                if (!PageLRU(page))
+                        found++;
+                /*
+                 * If there are RECLAIMABLE pages, we need to check it.
+                 * But now, memory offline itself doesn't call shrink_slab()
+                 * and it still to be fixed.
+                 */
+                /*
+                 * If the page is not RAM, page_count()should be 0.
+                 * we don't need more check. This is an _used_ not-movable page.
+                 *
+                 * The problematic thing here is PG_reserved pages. PG_reserved
+                 * is set to both of a memory hole page and a _used_ kernel
+                 * page at boot.
+                 */
+                if (found > count)
+                        return false;
+        }
+        return true;
+}
+bool is_pageblock_removable_nolock(struct page *page)
+{
+        struct zone *zone = page_zone(page);
+        return __count_immobile_pages(zone, page, 0);
+}
 int set_migratetype_isolate(struct page *page)
 {
        struct zone *zone;
-        struct page *curr_page;
+        unsigned long flags, pfn;
-        unsigned long flags, pfn, iter;
-        unsigned long immobile = 0;
        struct memory_isolate_notify arg;
        int notifier_ret;
        int ret = -EBUSY;
@@ -5312,11 +5366,6 @@ int set_migratetype_isolate(struct page *page)
        zone_idx = zone_idx(zone);
        spin_lock_irqsave(&zone->lock, flags);
-        if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
-            zone_idx == ZONE_MOVABLE) {
-                ret = 0;
-                goto out;
-        }
        pfn = page_to_pfn(page);
        arg.start_pfn = pfn;
@@ -5336,23 +5385,20 @@ int set_migratetype_isolate(struct page *page)
         */
        notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
        notifier_ret = notifier_to_errno(notifier_ret);
-        if (notifier_ret || !arg.pages_found)
+        if (notifier_ret)
                goto out;
+        /*
-        for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
+         * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
-                if (!pfn_valid_within(pfn))
+         * We just check MOVABLE pages.
-                        continue;
+         */
+        if (__count_immobile_pages(zone, page, arg.pages_found))
-                curr_page = pfn_to_page(iter);
-                if (!page_count(curr_page) || PageLRU(curr_page))
-                        continue;
-                immobile++;
-        }
-        if (arg.pages_found == immobile)
                ret = 0;
+        /*
+         * immobile means "not-on-lru" paes. If immobile is larger than
+         * removable-by-driver pages reported by notifier, we'll fail.
+         */
 out:
        if (!ret) {
                set_pageblock_migratetype(page, MIGRATE_ISOLATE);
author	Jiri Kosina <jkosina@suse.cz>	2010-12-10 09:19:18 -0500
committer	Jiri Kosina <jkosina@suse.cz>	2010-12-10 09:19:18 -0500
commit	2ade0c1d9d93b7642212657ef76f4a1e30233711 (patch)
tree	63bc720c0ffe5f4760cac4ed617b9870b050175e /mm/page_alloc.c
parent	504499f22c08a03e2e19dc88d31aa0ecd2ac815e (diff)
parent	6313e3c21743cc88bb5bd8aa72948ee1e83937b6 (diff)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2a362c52fdf4..ff7e15872398 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -104,19 +104,24 @@ gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
104	* only be modified with pm_mutex held, unless the suspend/hibernate code is	104	* only be modified with pm_mutex held, unless the suspend/hibernate code is
105	* guaranteed not to run in parallel with that modification).	105	* guaranteed not to run in parallel with that modification).
106	*/	106	*/
107	void set_gfp_allowed_mask(gfp_t mask)	107
		108	static gfp_t saved_gfp_mask;
		109
		110	void pm_restore_gfp_mask(void)
108	{	111	{
109	WARN_ON(!mutex_is_locked(&pm_mutex));	112	WARN_ON(!mutex_is_locked(&pm_mutex));
110	gfp_allowed_mask = mask;	113	if (saved_gfp_mask) {
		114	gfp_allowed_mask = saved_gfp_mask;
		115	saved_gfp_mask = 0;
		116	}
111	}	117	}
112		118
113	gfp_t clear_gfp_allowed_mask(gfp_t mask)	119	void pm_restrict_gfp_mask(void)
114	{	120	{
115	gfp_t ret = gfp_allowed_mask;
116
117	WARN_ON(!mutex_is_locked(&pm_mutex));	121	WARN_ON(!mutex_is_locked(&pm_mutex));
118	gfp_allowed_mask &= ~mask;	122	WARN_ON(saved_gfp_mask);
119	return ret;	123	saved_gfp_mask = gfp_allowed_mask;
		124	gfp_allowed_mask &= ~GFP_IOFS;
120	}	125	}
121	#endif /* CONFIG_PM_SLEEP */	126	#endif /* CONFIG_PM_SLEEP */
122		127
@@ -531,7 +536,7 @@ static inline void __free_one_page(struct page *page,
531	* so it's less likely to be used soon and more likely to be merged	536	* so it's less likely to be used soon and more likely to be merged
532	* as a higher order page	537	* as a higher order page
533	*/	538	*/
534	if ((order < MAX_ORDER-1) && pfn_valid_within(page_to_pfn(buddy))) {	539	if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) {
535	struct page higher_page, higher_buddy;	540	struct page higher_page, higher_buddy;
536	combined_idx = __find_combined_index(page_idx, order);	541	combined_idx = __find_combined_index(page_idx, order);
537	higher_page = page + combined_idx - page_idx;	542	higher_page = page + combined_idx - page_idx;
@@ -1907,7 +1912,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
1907	preferred_zone, migratetype);	1912	preferred_zone, migratetype);
1908		1913
1909	if (!page && gfp_mask & __GFP_NOFAIL)	1914	if (!page && gfp_mask & __GFP_NOFAIL)
1910	congestion_wait(BLK_RW_ASYNC, HZ/50);	1915	wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
1911	} while (!page && (gfp_mask & __GFP_NOFAIL));	1916	} while (!page && (gfp_mask & __GFP_NOFAIL));
1912		1917
1913	return page;	1918	return page;
@@ -1932,7 +1937,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
1932	const gfp_t wait = gfp_mask & __GFP_WAIT;	1937	const gfp_t wait = gfp_mask & __GFP_WAIT;
1933		1938
1934	/* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */	1939	/* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
1935	BUILD_BUG_ON(__GFP_HIGH != ALLOC_HIGH);	1940	BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
1936		1941
1937	/*	1942	/*
1938	* The caller may dip into page reserves a bit more if the caller	1943	* The caller may dip into page reserves a bit more if the caller
@@ -1940,7 +1945,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
1940	* policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will	1945	* policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will
1941	* set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH).	1946	* set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH).
1942	*/	1947	*/
1943	alloc_flags \|= (gfp_mask & __GFP_HIGH);	1948	alloc_flags \|= (__force int) (gfp_mask & __GFP_HIGH);
1944		1949
1945	if (!wait) {	1950	if (!wait) {
1946	alloc_flags \|= ALLOC_HARDER;	1951	alloc_flags \|= ALLOC_HARDER;
@@ -2095,7 +2100,7 @@ rebalance:
2095	pages_reclaimed += did_some_progress;	2100	pages_reclaimed += did_some_progress;
2096	if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {	2101	if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
2097	/* Wait for some write requests to complete then retry */	2102	/* Wait for some write requests to complete then retry */
2098	congestion_wait(BLK_RW_ASYNC, HZ/50);	2103	wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
2099	goto rebalance;	2104	goto rebalance;
2100	}	2105	}
2101		2106
@@ -3008,14 +3013,6 @@ static __init_refok int __build_all_zonelists(void *data)
3008	build_zonelist_cache(pgdat);	3013	build_zonelist_cache(pgdat);
3009	}	3014	}
3010		3015
3011	#ifdef CONFIG_MEMORY_HOTPLUG
3012	/* Setup real pagesets for the new zone */
3013	if (data) {
3014	struct zone *zone = data;
3015	setup_zone_pageset(zone);
3016	}
3017	#endif
3018
3019	/*	3016	/*
3020	* Initialize the boot_pagesets that are going to be used	3017	* Initialize the boot_pagesets that are going to be used
3021	* for bootstrapping processors. The real pagesets for	3018	* for bootstrapping processors. The real pagesets for
@@ -3064,7 +3061,11 @@ void build_all_zonelists(void *data)
3064	} else {	3061	} else {
3065	/* we have to stop all cpus to guarantee there is no user	3062	/* we have to stop all cpus to guarantee there is no user
3066	of zonelist */	3063	of zonelist */
3067	stop_machine(__build_all_zonelists, data, NULL);	3064	#ifdef CONFIG_MEMORY_HOTPLUG
		3065	if (data)
		3066	setup_zone_pageset((struct zone *)data);
		3067	#endif
		3068	stop_machine(__build_all_zonelists, NULL, NULL);
3068	/* cpuset refresh routine should be here */	3069	/* cpuset refresh routine should be here */
3069	}	3070	}
3070	vm_total_pages = nr_free_pagecache_pages();	3071	vm_total_pages = nr_free_pagecache_pages();
@@ -5297,12 +5298,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
5297	* page allocater never alloc memory from ISOLATE block.	5298	* page allocater never alloc memory from ISOLATE block.
5298	*/	5299	*/
5299		5300
		5301	static int
		5302	__count_immobile_pages(struct zone zone, struct page page, int count)
		5303	{
		5304	unsigned long pfn, iter, found;
		5305	/*
		5306	* For avoiding noise data, lru_add_drain_all() should be called
		5307	* If ZONE_MOVABLE, the zone never contains immobile pages
		5308	*/
		5309	if (zone_idx(zone) == ZONE_MOVABLE)
		5310	return true;
		5311
		5312	if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE)
		5313	return true;
		5314
		5315	pfn = page_to_pfn(page);
		5316	for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
		5317	unsigned long check = pfn + iter;
		5318
		5319	if (!pfn_valid_within(check)) {
		5320	iter++;
		5321	continue;
		5322	}
		5323	page = pfn_to_page(check);
		5324	if (!page_count(page)) {
		5325	if (PageBuddy(page))
		5326	iter += (1 << page_order(page)) - 1;
		5327	continue;
		5328	}
		5329	if (!PageLRU(page))
		5330	found++;
		5331	/*
		5332	* If there are RECLAIMABLE pages, we need to check it.
		5333	* But now, memory offline itself doesn't call shrink_slab()
		5334	* and it still to be fixed.
		5335	*/
		5336	/*
		5337	* If the page is not RAM, page_count()should be 0.
		5338	* we don't need more check. This is an _used_ not-movable page.
		5339	*
		5340	* The problematic thing here is PG_reserved pages. PG_reserved
		5341	* is set to both of a memory hole page and a _used_ kernel
		5342	* page at boot.
		5343	*/
		5344	if (found > count)
		5345	return false;
		5346	}
		5347	return true;
		5348	}
		5349
		5350	bool is_pageblock_removable_nolock(struct page *page)
		5351	{
		5352	struct zone *zone = page_zone(page);
		5353	return __count_immobile_pages(zone, page, 0);
		5354	}
		5355
5300	int set_migratetype_isolate(struct page *page)	5356	int set_migratetype_isolate(struct page *page)
5301	{	5357	{
5302	struct zone *zone;	5358	struct zone *zone;
5303	struct page *curr_page;	5359	unsigned long flags, pfn;
5304	unsigned long flags, pfn, iter;
5305	unsigned long immobile = 0;
5306	struct memory_isolate_notify arg;	5360	struct memory_isolate_notify arg;
5307	int notifier_ret;	5361	int notifier_ret;
5308	int ret = -EBUSY;	5362	int ret = -EBUSY;
@@ -5312,11 +5366,6 @@ int set_migratetype_isolate(struct page *page)
5312	zone_idx = zone_idx(zone);	5366	zone_idx = zone_idx(zone);
5313		5367
5314	spin_lock_irqsave(&zone->lock, flags);	5368	spin_lock_irqsave(&zone->lock, flags);
5315	if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE \|\|
5316	zone_idx == ZONE_MOVABLE) {
5317	ret = 0;
5318	goto out;
5319	}
5320		5369
5321	pfn = page_to_pfn(page);	5370	pfn = page_to_pfn(page);
5322	arg.start_pfn = pfn;	5371	arg.start_pfn = pfn;
@@ -5336,23 +5385,20 @@ int set_migratetype_isolate(struct page *page)
5336	*/	5385	*/
5337	notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);	5386	notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
5338	notifier_ret = notifier_to_errno(notifier_ret);	5387	notifier_ret = notifier_to_errno(notifier_ret);
5339	if (notifier_ret \|\| !arg.pages_found)	5388	if (notifier_ret)
5340	goto out;	5389	goto out;
5341		5390	/*
5342	for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {	5391	* FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
5343	if (!pfn_valid_within(pfn))	5392	* We just check MOVABLE pages.
5344	continue;	5393	*/
5345		5394	if (__count_immobile_pages(zone, page, arg.pages_found))
5346	curr_page = pfn_to_page(iter);
5347	if (!page_count(curr_page) \|\| PageLRU(curr_page))
5348	continue;
5349
5350	immobile++;
5351	}
5352
5353	if (arg.pages_found == immobile)
5354	ret = 0;	5395	ret = 0;
5355		5396
		5397	/*
		5398	* immobile means "not-on-lru" paes. If immobile is larger than
		5399	* removable-by-driver pages reported by notifier, we'll fail.
		5400	*/
		5401
5356	out:	5402	out:
5357	if (!ret) {	5403	if (!ret) {
5358	set_pageblock_migratetype(page, MIGRATE_ISOLATE);	5404	set_pageblock_migratetype(page, MIGRATE_ISOLATE);