mm: compaction: memory compaction core

This patch is the core of a mechanism which compacts memory in a zone by relocating movable pages towards the end of the zone. A single compaction run involves a migration scanner and a free scanner. Both scanners operate on pageblock-sized areas in the zone. The migration scanner starts at the bottom of the zone and searches for all movable pages within each area, isolating them onto a private list called migratelist. The free scanner starts at the top of the zone and searches for suitable areas and consumes the free pages within making them available for the migration scanner. The pages isolated for migration are then migrated to the newly isolated free pages. [aarcange@redhat.com: Fix unsafe optimisation] [mel@csn.ul.ie: do not schedule work on other CPUs for compaction] Signed-off-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Rik van Riel <riel@redhat.com> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Mel Gorman <mel@csn.ul.ie> 2010-05-24 17:32:27 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2010-05-25 11:06:59 -0400
commit: 748446bb6b5a9390b546af38ec899c868a9dbcf0 (patch)
tree: 4c27d0805a5e094b39ff938ad60dd270b953a79f /mm/compaction.c
parent: c175a0ce7584e5b498fff8cbdb9aa7912aa9fbba (diff)
1 files changed, 393 insertions, 0 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
new file mode 100644
index 000000000000..be1ff3f7552b
--- /dev/null
+++ b/mm/compaction.c
@@ -0,0 +1,393 @@
+/*
+ * linux/mm/compaction.c
+ *
+ * Memory compaction for the reduction of external fragmentation. Note that
+ * this heavily depends upon page migration to do all the real heavy
+ * lifting
+ *
+ * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
+ */
+#include <linux/swap.h>
+#include <linux/migrate.h>
+#include <linux/compaction.h>
+#include <linux/mm_inline.h>
+#include <linux/backing-dev.h>
+#include "internal.h"
+/*
+ * compact_control is used to track pages being migrated and the free pages
+ * they are being migrated to during memory compaction. The free_pfn starts
+ * at the end of a zone and migrate_pfn begins at the start. Movable pages
+ * are moved to the end of a zone during a compaction run and the run
+ * completes when free_pfn <= migrate_pfn
+ */
+struct compact_control {
+        struct list_head freepages;     /* List of free pages to migrate to */
+        struct list_head migratepages;  /* List of pages being migrated */
+        unsigned long nr_freepages;     /* Number of isolated free pages */
+        unsigned long nr_migratepages;  /* Number of pages to migrate */
+        unsigned long free_pfn;         /* isolate_freepages search base */
+        unsigned long migrate_pfn;      /* isolate_migratepages search base */
+        /* Account for isolated anon and file pages */
+        unsigned long nr_anon;
+        unsigned long nr_file;
+        struct zone *zone;
+};
+static unsigned long release_freepages(struct list_head *freelist)
+{
+        struct page *page, *next;
+        unsigned long count = 0;
+        list_for_each_entry_safe(page, next, freelist, lru) {
+                list_del(&page->lru);
+                __free_page(page);
+                count++;
+        }
+        return count;
+}
+/* Isolate free pages onto a private freelist. Must hold zone->lock */
+static unsigned long isolate_freepages_block(struct zone *zone,
+                                unsigned long blockpfn,
+                                struct list_head *freelist)
+{
+        unsigned long zone_end_pfn, end_pfn;
+        int total_isolated = 0;
+        struct page *cursor;
+        /* Get the last PFN we should scan for free pages at */
+        zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
+        end_pfn = min(blockpfn + pageblock_nr_pages, zone_end_pfn);
+        /* Find the first usable PFN in the block to initialse page cursor */
+        for (; blockpfn < end_pfn; blockpfn++) {
+                if (pfn_valid_within(blockpfn))
+                        break;
+        }
+        cursor = pfn_to_page(blockpfn);
+        /* Isolate free pages. This assumes the block is valid */
+        for (; blockpfn < end_pfn; blockpfn++, cursor++) {
+                int isolated, i;
+                struct page *page = cursor;
+                if (!pfn_valid_within(blockpfn))
+                        continue;
+                if (!PageBuddy(page))
+                        continue;
+                /* Found a free page, break it into order-0 pages */
+                isolated = split_free_page(page);
+                total_isolated += isolated;
+                for (i = 0; i < isolated; i++) {
+                        list_add(&page->lru, freelist);
+                        page++;
+                }
+                /* If a page was split, advance to the end of it */
+                if (isolated) {
+                        blockpfn += isolated - 1;
+                        cursor += isolated - 1;
+                }
+        }
+        return total_isolated;
+}
+/* Returns true if the page is within a block suitable for migration to */
+static bool suitable_migration_target(struct page *page)
+{
+        int migratetype = get_pageblock_migratetype(page);
+        /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
+        if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE)
+                return false;
+        /* If the page is a large free page, then allow migration */
+        if (PageBuddy(page) && page_order(page) >= pageblock_order)
+                return true;
+        /* If the block is MIGRATE_MOVABLE, allow migration */
+        if (migratetype == MIGRATE_MOVABLE)
+                return true;
+        /* Otherwise skip the block */
+        return false;
+}
+/*
+ * Based on information in the current compact_control, find blocks
+ * suitable for isolating free pages from and then isolate them.
+ */
+static void isolate_freepages(struct zone *zone,
+                                struct compact_control *cc)
+{
+        struct page *page;
+        unsigned long high_pfn, low_pfn, pfn;
+        unsigned long flags;
+        int nr_freepages = cc->nr_freepages;
+        struct list_head *freelist = &cc->freepages;
+        pfn = cc->free_pfn;
+        low_pfn = cc->migrate_pfn + pageblock_nr_pages;
+        high_pfn = low_pfn;
+        /*
+         * Isolate free pages until enough are available to migrate the
+         * pages on cc->migratepages. We stop searching if the migrate
+         * and free page scanners meet or enough free pages are isolated.
+         */
+        spin_lock_irqsave(&zone->lock, flags);
+        for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
+                                        pfn -= pageblock_nr_pages) {
+                unsigned long isolated;
+                if (!pfn_valid(pfn))
+                        continue;
+                /*
+                 * Check for overlapping nodes/zones. It's possible on some
+                 * configurations to have a setup like
+                 * node0 node1 node0
+                 * i.e. it's possible that all pages within a zones range of
+                 * pages do not belong to a single zone.
+                 */
+                page = pfn_to_page(pfn);
+                if (page_zone(page) != zone)
+                        continue;
+                /* Check the block is suitable for migration */
+                if (!suitable_migration_target(page))
+                        continue;
+                /* Found a block suitable for isolating free pages from */
+                isolated = isolate_freepages_block(zone, pfn, freelist);
+                nr_freepages += isolated;
+                /*
+                 * Record the highest PFN we isolated pages from. When next
+                 * looking for free pages, the search will restart here as
+                 * page migration may have returned some pages to the allocator
+                 */
+                if (isolated)
+                        high_pfn = max(high_pfn, pfn);
+        }
+        spin_unlock_irqrestore(&zone->lock, flags);
+        /* split_free_page does not map the pages */
+        list_for_each_entry(page, freelist, lru) {
+                arch_alloc_page(page, 0);
+                kernel_map_pages(page, 1, 1);
+        }
+        cc->free_pfn = high_pfn;
+        cc->nr_freepages = nr_freepages;
+}
+/* Update the number of anon and file isolated pages in the zone */
+static void acct_isolated(struct zone *zone, struct compact_control *cc)
+{
+        struct page *page;
+        unsigned int count[NR_LRU_LISTS] = { 0, };
+        list_for_each_entry(page, &cc->migratepages, lru) {
+                int lru = page_lru_base_type(page);
+                count[lru]++;
+        }
+        cc->nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
+        cc->nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
+        __mod_zone_page_state(zone, NR_ISOLATED_ANON, cc->nr_anon);
+        __mod_zone_page_state(zone, NR_ISOLATED_FILE, cc->nr_file);
+}
+/* Similar to reclaim, but different enough that they don't share logic */
+static bool too_many_isolated(struct zone *zone)
+{
+        unsigned long inactive, isolated;
+        inactive = zone_page_state(zone, NR_INACTIVE_FILE) +
+                                        zone_page_state(zone, NR_INACTIVE_ANON);
+        isolated = zone_page_state(zone, NR_ISOLATED_FILE) +
+                                        zone_page_state(zone, NR_ISOLATED_ANON);
+        return isolated > inactive;
+}
+/*
+ * Isolate all pages that can be migrated from the block pointed to by
+ * the migrate scanner within compact_control.
+ */
+static unsigned long isolate_migratepages(struct zone *zone,
+                                        struct compact_control *cc)
+{
+        unsigned long low_pfn, end_pfn;
+        struct list_head *migratelist = &cc->migratepages;
+        /* Do not scan outside zone boundaries */
+        low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn);
+        /* Only scan within a pageblock boundary */
+        end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages);
+        /* Do not cross the free scanner or scan within a memory hole */
+        if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) {
+                cc->migrate_pfn = end_pfn;
+                return 0;
+        }
+        /*
+         * Ensure that there are not too many pages isolated from the LRU
+         * list by either parallel reclaimers or compaction. If there are,
+         * delay for some time until fewer pages are isolated
+         */
+        while (unlikely(too_many_isolated(zone))) {
+                congestion_wait(BLK_RW_ASYNC, HZ/10);
+                if (fatal_signal_pending(current))
+                        return 0;
+        }
+        /* Time to isolate some pages for migration */
+        spin_lock_irq(&zone->lru_lock);
+        for (; low_pfn < end_pfn; low_pfn++) {
+                struct page *page;
+                if (!pfn_valid_within(low_pfn))
+                        continue;
+                /* Get the page and skip if free */
+                page = pfn_to_page(low_pfn);
+                if (PageBuddy(page))
+                        continue;
+                /* Try isolate the page */
+                if (__isolate_lru_page(page, ISOLATE_BOTH, 0) != 0)
+                        continue;
+                /* Successfully isolated */
+                del_page_from_lru_list(zone, page, page_lru(page));
+                list_add(&page->lru, migratelist);
+                mem_cgroup_del_lru(page);
+                cc->nr_migratepages++;
+                /* Avoid isolating too much */
+                if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
+                        break;
+        }
+        acct_isolated(zone, cc);
+        spin_unlock_irq(&zone->lru_lock);
+        cc->migrate_pfn = low_pfn;
+        return cc->nr_migratepages;
+}
+/*
+ * This is a migrate-callback that "allocates" freepages by taking pages
+ * from the isolated freelists in the block we are migrating to.
+ */
+static struct page *compaction_alloc(struct page *migratepage,
+                                        unsigned long data,
+                                        int **result)
+{
+        struct compact_control *cc = (struct compact_control *)data;
+        struct page *freepage;
+        /* Isolate free pages if necessary */
+        if (list_empty(&cc->freepages)) {
+                isolate_freepages(cc->zone, cc);
+                if (list_empty(&cc->freepages))
+                        return NULL;
+        }
+        freepage = list_entry(cc->freepages.next, struct page, lru);
+        list_del(&freepage->lru);
+        cc->nr_freepages--;
+        return freepage;
+}
+/*
+ * We cannot control nr_migratepages and nr_freepages fully when migration is
+ * running as migrate_pages() has no knowledge of compact_control. When
+ * migration is complete, we count the number of pages on the lists by hand.
+ */
+static void update_nr_listpages(struct compact_control *cc)
+{
+        int nr_migratepages = 0;
+        int nr_freepages = 0;
+        struct page *page;
+        list_for_each_entry(page, &cc->migratepages, lru)
+                nr_migratepages++;
+        list_for_each_entry(page, &cc->freepages, lru)
+                nr_freepages++;
+        cc->nr_migratepages = nr_migratepages;
+        cc->nr_freepages = nr_freepages;
+}
+static int compact_finished(struct zone *zone,
+                                                struct compact_control *cc)
+{
+        if (fatal_signal_pending(current))
+                return COMPACT_PARTIAL;
+        /* Compaction run completes if the migrate and free scanner meet */
+        if (cc->free_pfn <= cc->migrate_pfn)
+                return COMPACT_COMPLETE;
+        return COMPACT_CONTINUE;
+}
+static int compact_zone(struct zone *zone, struct compact_control *cc)
+{
+        int ret;
+        /* Setup to move all movable pages to the end of the zone */
+        cc->migrate_pfn = zone->zone_start_pfn;
+        cc->free_pfn = cc->migrate_pfn + zone->spanned_pages;
+        cc->free_pfn &= ~(pageblock_nr_pages-1);
+        migrate_prep_local();
+        while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {
+                unsigned long nr_migrate, nr_remaining;
+                if (!isolate_migratepages(zone, cc))
+                        continue;
+                nr_migrate = cc->nr_migratepages;
+                migrate_pages(&cc->migratepages, compaction_alloc,
+                                                (unsigned long)cc, 0);
+                update_nr_listpages(cc);
+                nr_remaining = cc->nr_migratepages;
+                count_vm_event(COMPACTBLOCKS);
+                count_vm_events(COMPACTPAGES, nr_migrate - nr_remaining);
+                if (nr_remaining)
+                        count_vm_events(COMPACTPAGEFAILED, nr_remaining);
+                /* Release LRU pages not migrated */
+                if (!list_empty(&cc->migratepages)) {
+                        putback_lru_pages(&cc->migratepages);
+                        cc->nr_migratepages = 0;
+                }
+        }
+        /* Release free pages and check accounting */
+        cc->nr_freepages -= release_freepages(&cc->freepages);
+        VM_BUG_ON(cc->nr_freepages != 0);
+        return ret;
+}
author	Mel Gorman <mel@csn.ul.ie>	2010-05-24 17:32:27 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2010-05-25 11:06:59 -0400
commit	748446bb6b5a9390b546af38ec899c868a9dbcf0 (patch)
tree	4c27d0805a5e094b39ff938ad60dd270b953a79f /mm/compaction.c
parent	c175a0ce7584e5b498fff8cbdb9aa7912aa9fbba (diff)

diff --git a/mm/compaction.c b/mm/compaction.c new file mode 100644 index 000000000000..be1ff3f7552b --- /dev/null +++ b/mm/compaction.c
@@ -0,0 +1,393 @@
	1	/*
	2	* linux/mm/compaction.c
	3	*
	4	* Memory compaction for the reduction of external fragmentation. Note that
	5	* this heavily depends upon page migration to do all the real heavy
	6	* lifting
	7	*
	8	* Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
	9	*/
	10	#include <linux/swap.h>
	11	#include <linux/migrate.h>
	12	#include <linux/compaction.h>
	13	#include <linux/mm_inline.h>
	14	#include <linux/backing-dev.h>
	15	#include "internal.h"
	16
	17	/*
	18	* compact_control is used to track pages being migrated and the free pages
	19	* they are being migrated to during memory compaction. The free_pfn starts
	20	* at the end of a zone and migrate_pfn begins at the start. Movable pages
	21	* are moved to the end of a zone during a compaction run and the run
	22	* completes when free_pfn <= migrate_pfn
	23	*/
	24	struct compact_control {
	25	struct list_head freepages; /* List of free pages to migrate to */
	26	struct list_head migratepages; /* List of pages being migrated */
	27	unsigned long nr_freepages; /* Number of isolated free pages */
	28	unsigned long nr_migratepages; /* Number of pages to migrate */
	29	unsigned long free_pfn; /* isolate_freepages search base */
	30	unsigned long migrate_pfn; /* isolate_migratepages search base */
	31
	32	/* Account for isolated anon and file pages */
	33	unsigned long nr_anon;
	34	unsigned long nr_file;
	35
	36	struct zone *zone;
	37	};
	38
	39	static unsigned long release_freepages(struct list_head *freelist)
	40	{
	41	struct page page, next;
	42	unsigned long count = 0;
	43
	44	list_for_each_entry_safe(page, next, freelist, lru) {
	45	list_del(&page->lru);
	46	__free_page(page);
	47	count++;
	48	}
	49
	50	return count;
	51	}
	52
	53	/* Isolate free pages onto a private freelist. Must hold zone->lock */
	54	static unsigned long isolate_freepages_block(struct zone *zone,
	55	unsigned long blockpfn,
	56	struct list_head *freelist)
	57	{
	58	unsigned long zone_end_pfn, end_pfn;
	59	int total_isolated = 0;
	60	struct page *cursor;
	61
	62	/* Get the last PFN we should scan for free pages at */
	63	zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
	64	end_pfn = min(blockpfn + pageblock_nr_pages, zone_end_pfn);
	65
	66	/* Find the first usable PFN in the block to initialse page cursor */
	67	for (; blockpfn < end_pfn; blockpfn++) {
	68	if (pfn_valid_within(blockpfn))
	69	break;
	70	}
	71	cursor = pfn_to_page(blockpfn);
	72
	73	/* Isolate free pages. This assumes the block is valid */
	74	for (; blockpfn < end_pfn; blockpfn++, cursor++) {
	75	int isolated, i;
	76	struct page *page = cursor;
	77
	78	if (!pfn_valid_within(blockpfn))
	79	continue;
	80
	81	if (!PageBuddy(page))
	82	continue;
	83
	84	/* Found a free page, break it into order-0 pages */
	85	isolated = split_free_page(page);
	86	total_isolated += isolated;
	87	for (i = 0; i < isolated; i++) {
	88	list_add(&page->lru, freelist);
	89	page++;
	90	}
	91
	92	/* If a page was split, advance to the end of it */
	93	if (isolated) {
	94	blockpfn += isolated - 1;
	95	cursor += isolated - 1;
	96	}
	97	}
	98
	99	return total_isolated;
	100	}
	101
	102	/* Returns true if the page is within a block suitable for migration to */
	103	static bool suitable_migration_target(struct page *page)
	104	{
	105
	106	int migratetype = get_pageblock_migratetype(page);
	107
	108	/* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
	109	if (migratetype == MIGRATE_ISOLATE \|\| migratetype == MIGRATE_RESERVE)
	110	return false;
	111
	112	/* If the page is a large free page, then allow migration */
	113	if (PageBuddy(page) && page_order(page) >= pageblock_order)
	114	return true;
	115
	116	/* If the block is MIGRATE_MOVABLE, allow migration */
	117	if (migratetype == MIGRATE_MOVABLE)
	118	return true;
	119
	120	/* Otherwise skip the block */
	121	return false;
	122	}
	123
	124	/*
	125	* Based on information in the current compact_control, find blocks
	126	* suitable for isolating free pages from and then isolate them.
	127	*/
	128	static void isolate_freepages(struct zone *zone,
	129	struct compact_control *cc)
	130	{
	131	struct page *page;
	132	unsigned long high_pfn, low_pfn, pfn;
	133	unsigned long flags;
	134	int nr_freepages = cc->nr_freepages;
	135	struct list_head *freelist = &cc->freepages;
	136
	137	pfn = cc->free_pfn;
	138	low_pfn = cc->migrate_pfn + pageblock_nr_pages;
	139	high_pfn = low_pfn;
	140
	141	/*
	142	* Isolate free pages until enough are available to migrate the
	143	* pages on cc->migratepages. We stop searching if the migrate
	144	* and free page scanners meet or enough free pages are isolated.
	145	*/
	146	spin_lock_irqsave(&zone->lock, flags);
	147	for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
	148	pfn -= pageblock_nr_pages) {
	149	unsigned long isolated;
	150
	151	if (!pfn_valid(pfn))
	152	continue;
	153
	154	/*
	155	* Check for overlapping nodes/zones. It's possible on some
	156	* configurations to have a setup like
	157	* node0 node1 node0
	158	* i.e. it's possible that all pages within a zones range of
	159	* pages do not belong to a single zone.
	160	*/
	161	page = pfn_to_page(pfn);
	162	if (page_zone(page) != zone)
	163	continue;
	164
	165	/* Check the block is suitable for migration */
	166	if (!suitable_migration_target(page))
	167	continue;
	168
	169	/* Found a block suitable for isolating free pages from */
	170	isolated = isolate_freepages_block(zone, pfn, freelist);
	171	nr_freepages += isolated;
	172
	173	/*
	174	* Record the highest PFN we isolated pages from. When next
	175	* looking for free pages, the search will restart here as
	176	* page migration may have returned some pages to the allocator
	177	*/
	178	if (isolated)
	179	high_pfn = max(high_pfn, pfn);
	180	}
	181	spin_unlock_irqrestore(&zone->lock, flags);
	182
	183	/* split_free_page does not map the pages */
	184	list_for_each_entry(page, freelist, lru) {
	185	arch_alloc_page(page, 0);
	186	kernel_map_pages(page, 1, 1);
	187	}
	188
	189	cc->free_pfn = high_pfn;
	190	cc->nr_freepages = nr_freepages;
	191	}
	192
	193	/* Update the number of anon and file isolated pages in the zone */
	194	static void acct_isolated(struct zone zone, struct compact_control cc)
	195	{
	196	struct page *page;
	197	unsigned int count[NR_LRU_LISTS] = { 0, };
	198
	199	list_for_each_entry(page, &cc->migratepages, lru) {
	200	int lru = page_lru_base_type(page);
	201	count[lru]++;
	202	}
	203
	204	cc->nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
	205	cc->nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
	206	__mod_zone_page_state(zone, NR_ISOLATED_ANON, cc->nr_anon);
	207	__mod_zone_page_state(zone, NR_ISOLATED_FILE, cc->nr_file);
	208	}
	209
	210	/* Similar to reclaim, but different enough that they don't share logic */
	211	static bool too_many_isolated(struct zone *zone)
	212	{
	213
	214	unsigned long inactive, isolated;
	215
	216	inactive = zone_page_state(zone, NR_INACTIVE_FILE) +
	217	zone_page_state(zone, NR_INACTIVE_ANON);
	218	isolated = zone_page_state(zone, NR_ISOLATED_FILE) +
	219	zone_page_state(zone, NR_ISOLATED_ANON);
	220
	221	return isolated > inactive;
	222	}
	223
	224	/*
	225	* Isolate all pages that can be migrated from the block pointed to by
	226	* the migrate scanner within compact_control.
	227	*/
	228	static unsigned long isolate_migratepages(struct zone *zone,
	229	struct compact_control *cc)
	230	{
	231	unsigned long low_pfn, end_pfn;
	232	struct list_head *migratelist = &cc->migratepages;
	233
	234	/* Do not scan outside zone boundaries */
	235	low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn);
	236
	237	/* Only scan within a pageblock boundary */
	238	end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages);
	239
	240	/* Do not cross the free scanner or scan within a memory hole */
	241	if (end_pfn > cc->free_pfn \|\| !pfn_valid(low_pfn)) {
	242	cc->migrate_pfn = end_pfn;
	243	return 0;
	244	}
	245
	246	/*
	247	* Ensure that there are not too many pages isolated from the LRU
	248	* list by either parallel reclaimers or compaction. If there are,
	249	* delay for some time until fewer pages are isolated
	250	*/
	251	while (unlikely(too_many_isolated(zone))) {
	252	congestion_wait(BLK_RW_ASYNC, HZ/10);
	253
	254	if (fatal_signal_pending(current))
	255	return 0;
	256	}
	257
	258	/* Time to isolate some pages for migration */
	259	spin_lock_irq(&zone->lru_lock);
	260	for (; low_pfn < end_pfn; low_pfn++) {
	261	struct page *page;
	262	if (!pfn_valid_within(low_pfn))
	263	continue;
	264
	265	/* Get the page and skip if free */
	266	page = pfn_to_page(low_pfn);
	267	if (PageBuddy(page))
	268	continue;
	269
	270	/* Try isolate the page */
	271	if (__isolate_lru_page(page, ISOLATE_BOTH, 0) != 0)
	272	continue;
	273
	274	/* Successfully isolated */
	275	del_page_from_lru_list(zone, page, page_lru(page));
	276	list_add(&page->lru, migratelist);
	277	mem_cgroup_del_lru(page);
	278	cc->nr_migratepages++;
	279
	280	/* Avoid isolating too much */
	281	if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
	282	break;
	283	}
	284
	285	acct_isolated(zone, cc);
	286
	287	spin_unlock_irq(&zone->lru_lock);
	288	cc->migrate_pfn = low_pfn;
	289
	290	return cc->nr_migratepages;
	291	}
	292
	293	/*
	294	* This is a migrate-callback that "allocates" freepages by taking pages
	295	* from the isolated freelists in the block we are migrating to.
	296	*/
	297	static struct page compaction_alloc(struct page migratepage,
	298	unsigned long data,
	299	int **result)
	300	{
	301	struct compact_control cc = (struct compact_control )data;
	302	struct page *freepage;
	303
	304	/* Isolate free pages if necessary */
	305	if (list_empty(&cc->freepages)) {
	306	isolate_freepages(cc->zone, cc);
	307
	308	if (list_empty(&cc->freepages))
	309	return NULL;
	310	}
	311
	312	freepage = list_entry(cc->freepages.next, struct page, lru);
	313	list_del(&freepage->lru);
	314	cc->nr_freepages--;
	315
	316	return freepage;
	317	}
	318
	319	/*
	320	* We cannot control nr_migratepages and nr_freepages fully when migration is
	321	* running as migrate_pages() has no knowledge of compact_control. When
	322	* migration is complete, we count the number of pages on the lists by hand.
	323	*/
	324	static void update_nr_listpages(struct compact_control *cc)
	325	{
	326	int nr_migratepages = 0;
	327	int nr_freepages = 0;
	328	struct page *page;
	329
	330	list_for_each_entry(page, &cc->migratepages, lru)
	331	nr_migratepages++;
	332	list_for_each_entry(page, &cc->freepages, lru)
	333	nr_freepages++;
	334
	335	cc->nr_migratepages = nr_migratepages;
	336	cc->nr_freepages = nr_freepages;
	337	}
	338
	339	static int compact_finished(struct zone *zone,
	340	struct compact_control *cc)
	341	{
	342	if (fatal_signal_pending(current))
	343	return COMPACT_PARTIAL;
	344
	345	/* Compaction run completes if the migrate and free scanner meet */
	346	if (cc->free_pfn <= cc->migrate_pfn)
	347	return COMPACT_COMPLETE;
	348
	349	return COMPACT_CONTINUE;
	350	}
	351
	352	static int compact_zone(struct zone zone, struct compact_control cc)
	353	{
	354	int ret;
	355
	356	/* Setup to move all movable pages to the end of the zone */
	357	cc->migrate_pfn = zone->zone_start_pfn;
	358	cc->free_pfn = cc->migrate_pfn + zone->spanned_pages;
	359	cc->free_pfn &= ~(pageblock_nr_pages-1);
	360
	361	migrate_prep_local();
	362
	363	while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {
	364	unsigned long nr_migrate, nr_remaining;
	365
	366	if (!isolate_migratepages(zone, cc))
	367	continue;
	368
	369	nr_migrate = cc->nr_migratepages;
	370	migrate_pages(&cc->migratepages, compaction_alloc,
	371	(unsigned long)cc, 0);
	372	update_nr_listpages(cc);
	373	nr_remaining = cc->nr_migratepages;
	374
	375	count_vm_event(COMPACTBLOCKS);
	376	count_vm_events(COMPACTPAGES, nr_migrate - nr_remaining);
	377	if (nr_remaining)
	378	count_vm_events(COMPACTPAGEFAILED, nr_remaining);
	379
	380	/* Release LRU pages not migrated */
	381	if (!list_empty(&cc->migratepages)) {
	382	putback_lru_pages(&cc->migratepages);
	383	cc->nr_migratepages = 0;
	384	}
	385
	386	}
	387
	388	/* Release free pages and check accounting */
	389	cc->nr_freepages -= release_freepages(&cc->freepages);
	390	VM_BUG_ON(cc->nr_freepages != 0);
	391
	392	return ret;
	393	}