mm: compaction: capture a suitable high-order page immediately when it is made available

While compaction is migrating pages to free up large contiguous blocks for allocation it races with other allocation requests that may steal these blocks or break them up. This patch alters direct compaction to capture a suitable free page as soon as it becomes available to reduce this race. It uses similar logic to split_free_page() to ensure that watermarks are still obeyed. Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Reviewed-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Mel Gorman <mgorman@suse.de> 2012-10-08 19:29:12 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-10-09 03:22:21 -0400
commit: 1fb3f8ca0e9222535a39b884cb67a34628411b9f (patch)
tree: 5e72498f44d4fcae18f29eb87dd05a849a7a23c9 /mm/compaction.c
parent: 83fde0f22872aa8c1d46f775cc7bdfa864499e65 (diff)
1 files changed, 79 insertions, 11 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 7168edc7592c..0fbc6b73a522 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -91,6 +91,60 @@ static inline bool compact_trylock_irqsave(spinlock_t *lock,
        return compact_checklock_irqsave(lock, flags, false, cc);
 }
+static void compact_capture_page(struct compact_control *cc)
+{
+        unsigned long flags;
+        int mtype, mtype_low, mtype_high;
+        if (!cc->page || *cc->page)
+                return;
+        /*
+         * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP
+         * regardless of the migratetype of the freelist is is captured from.
+         * This is fine because the order for a high-order MIGRATE_MOVABLE
+         * allocation is typically at least a pageblock size and overall
+         * fragmentation is not impaired. Other allocation types must
+         * capture pages from their own migratelist because otherwise they
+         * could pollute other pageblocks like MIGRATE_MOVABLE with
+         * difficult to move pages and making fragmentation worse overall.
+         */
+        if (cc->migratetype == MIGRATE_MOVABLE) {
+                mtype_low = 0;
+                mtype_high = MIGRATE_PCPTYPES;
+        } else {
+                mtype_low = cc->migratetype;
+                mtype_high = cc->migratetype + 1;
+        }
+        /* Speculatively examine the free lists without zone lock */
+        for (mtype = mtype_low; mtype < mtype_high; mtype++) {
+                int order;
+                for (order = cc->order; order < MAX_ORDER; order++) {
+                        struct page *page;
+                        struct free_area *area;
+                        area = &(cc->zone->free_area[order]);
+                        if (list_empty(&area->free_list[mtype]))
+                                continue;
+                        /* Take the lock and attempt capture of the page */
+                        if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
+                                return;
+                        if (!list_empty(&area->free_list[mtype])) {
+                                page = list_entry(area->free_list[mtype].next,
+                                                        struct page, lru);
+                                if (capture_free_page(page, cc->order, mtype)) {
+                                        spin_unlock_irqrestore(&cc->zone->lock,
+                                                                        flags);
+                                        *cc->page = page;
+                                        return;
+                                }
+                        }
+                        spin_unlock_irqrestore(&cc->zone->lock, flags);
+                }
+        }
+}
 /*
 * Isolate free pages onto a private freelist. Caller must hold zone->lock.
 * If @strict is true, will abort returning 0 on any invalid PFNs or non-free
@@ -645,7 +699,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
 static int compact_finished(struct zone *zone,
                            struct compact_control *cc)
 {
-        unsigned int order;
        unsigned long watermark;
        if (fatal_signal_pending(current))
@@ -688,14 +741,22 @@ static int compact_finished(struct zone *zone,
                return COMPACT_CONTINUE;
        /* Direct compactor: Is a suitable page free? */
-        for (order = cc->order; order < MAX_ORDER; order++) {
+        if (cc->page) {
-                /* Job done if page is free of the right migratetype */
+                /* Was a suitable page captured? */
-                if (!list_empty(&zone->free_area[order].free_list[cc->migratetype]))
+                if (*cc->page)
-                        return COMPACT_PARTIAL;
-                /* Job done if allocation would set block type */
-                if (order >= pageblock_order && zone->free_area[order].nr_free)
                        return COMPACT_PARTIAL;
+        } else {
+                unsigned int order;
+                for (order = cc->order; order < MAX_ORDER; order++) {
+                        struct free_area *area = &zone->free_area[cc->order];
+                        /* Job done if page is free of the right migratetype */
+                        if (!list_empty(&area->free_list[cc->migratetype]))
+                                return COMPACT_PARTIAL;
+                        /* Job done if allocation would set block type */
+                        if (cc->order >= pageblock_order && area->nr_free)
+                                return COMPACT_PARTIAL;
+                }
        }
        return COMPACT_CONTINUE;
@@ -817,6 +878,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
                                goto out;
                        }
                }
+                /* Capture a page now if it is a suitable size */
+                compact_capture_page(cc);
        }
 out:
@@ -829,7 +893,8 @@ out:
 static unsigned long compact_zone_order(struct zone *zone,
                                 int order, gfp_t gfp_mask,
-                                 bool sync, bool *contended)
+                                 bool sync, bool *contended,
+                                 struct page **page)
 {
        struct compact_control cc = {
                .nr_freepages = 0,
@@ -839,6 +904,7 @@ static unsigned long compact_zone_order(struct zone *zone,
                .zone = zone,
                .sync = sync,
                .contended = contended,
+                .page = page,
        };
        INIT_LIST_HEAD(&cc.freepages);
        INIT_LIST_HEAD(&cc.migratepages);
@@ -860,7 +926,7 @@ int sysctl_extfrag_threshold = 500;
 */
 unsigned long try_to_compact_pages(struct zonelist *zonelist,
                        int order, gfp_t gfp_mask, nodemask_t *nodemask,
-                        bool sync, bool *contended)
+                        bool sync, bool *contended, struct page **page)
 {
        enum zone_type high_zoneidx = gfp_zone(gfp_mask);
        int may_enter_fs = gfp_mask & __GFP_FS;
@@ -881,7 +947,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
                int status;
                status = compact_zone_order(zone, order, gfp_mask, sync,
-                                                contended);
+                                                contended, page);
                rc = max(status, rc);
                /* If a normal allocation would succeed, stop compacting */
@@ -936,6 +1002,7 @@ int compact_pgdat(pg_data_t *pgdat, int order)
        struct compact_control cc = {
                .order = order,
                .sync = false,
+                .page = NULL,
        };
        return __compact_pgdat(pgdat, &cc);
@@ -946,6 +1013,7 @@ static int compact_node(int nid)
        struct compact_control cc = {
                .order = -1,
                .sync = true,
+                .page = NULL,
        };
        return __compact_pgdat(NODE_DATA(nid), &cc);
author	Mel Gorman <mgorman@suse.de>	2012-10-08 19:29:12 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-10-09 03:22:21 -0400
commit	1fb3f8ca0e9222535a39b884cb67a34628411b9f (patch)
tree	5e72498f44d4fcae18f29eb87dd05a849a7a23c9 /mm/compaction.c
parent	83fde0f22872aa8c1d46f775cc7bdfa864499e65 (diff)

diff --git a/mm/compaction.c b/mm/compaction.c index 7168edc7592c..0fbc6b73a522 100644 --- a/mm/compaction.c +++ b/mm/compaction.c
@@ -91,6 +91,60 @@ static inline bool compact_trylock_irqsave(spinlock_t *lock,
91	return compact_checklock_irqsave(lock, flags, false, cc);	91	return compact_checklock_irqsave(lock, flags, false, cc);
92	}	92	}
93		93
		94	static void compact_capture_page(struct compact_control *cc)
		95	{
		96	unsigned long flags;
		97	int mtype, mtype_low, mtype_high;
		98
		99	if (!cc->page \|\| *cc->page)
		100	return;
		101
		102	/*
		103	* For MIGRATE_MOVABLE allocations we capture a suitable page ASAP
		104	* regardless of the migratetype of the freelist is is captured from.
		105	* This is fine because the order for a high-order MIGRATE_MOVABLE
		106	* allocation is typically at least a pageblock size and overall
		107	* fragmentation is not impaired. Other allocation types must
		108	* capture pages from their own migratelist because otherwise they
		109	* could pollute other pageblocks like MIGRATE_MOVABLE with
		110	* difficult to move pages and making fragmentation worse overall.
		111	*/
		112	if (cc->migratetype == MIGRATE_MOVABLE) {
		113	mtype_low = 0;
		114	mtype_high = MIGRATE_PCPTYPES;
		115	} else {
		116	mtype_low = cc->migratetype;
		117	mtype_high = cc->migratetype + 1;
		118	}
		119
		120	/* Speculatively examine the free lists without zone lock */
		121	for (mtype = mtype_low; mtype < mtype_high; mtype++) {
		122	int order;
		123	for (order = cc->order; order < MAX_ORDER; order++) {
		124	struct page *page;
		125	struct free_area *area;
		126	area = &(cc->zone->free_area[order]);
		127	if (list_empty(&area->free_list[mtype]))
		128	continue;
		129
		130	/* Take the lock and attempt capture of the page */
		131	if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
		132	return;
		133	if (!list_empty(&area->free_list[mtype])) {
		134	page = list_entry(area->free_list[mtype].next,
		135	struct page, lru);
		136	if (capture_free_page(page, cc->order, mtype)) {
		137	spin_unlock_irqrestore(&cc->zone->lock,
		138	flags);
		139	*cc->page = page;
		140	return;
		141	}
		142	}
		143	spin_unlock_irqrestore(&cc->zone->lock, flags);
		144	}
		145	}
		146	}
		147
94	/*	148	/*
95	* Isolate free pages onto a private freelist. Caller must hold zone->lock.	149	* Isolate free pages onto a private freelist. Caller must hold zone->lock.
96	* If @strict is true, will abort returning 0 on any invalid PFNs or non-free	150	* If @strict is true, will abort returning 0 on any invalid PFNs or non-free
@@ -645,7 +699,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
645	static int compact_finished(struct zone *zone,	699	static int compact_finished(struct zone *zone,
646	struct compact_control *cc)	700	struct compact_control *cc)
647	{	701	{
648	unsigned int order;
649	unsigned long watermark;	702	unsigned long watermark;
650		703
651	if (fatal_signal_pending(current))	704	if (fatal_signal_pending(current))
@@ -688,14 +741,22 @@ static int compact_finished(struct zone *zone,
688	return COMPACT_CONTINUE;	741	return COMPACT_CONTINUE;
689		742
690	/* Direct compactor: Is a suitable page free? */	743	/* Direct compactor: Is a suitable page free? */
691	for (order = cc->order; order < MAX_ORDER; order++) {	744	if (cc->page) {
692	/* Job done if page is free of the right migratetype */	745	/* Was a suitable page captured? */
693	if (!list_empty(&zone->free_area[order].free_list[cc->migratetype]))	746	if (*cc->page)
694	return COMPACT_PARTIAL;
695
696	/* Job done if allocation would set block type */
697	if (order >= pageblock_order && zone->free_area[order].nr_free)
698	return COMPACT_PARTIAL;	747	return COMPACT_PARTIAL;
		748	} else {
		749	unsigned int order;
		750	for (order = cc->order; order < MAX_ORDER; order++) {
		751	struct free_area *area = &zone->free_area[cc->order];
		752	/* Job done if page is free of the right migratetype */
		753	if (!list_empty(&area->free_list[cc->migratetype]))
		754	return COMPACT_PARTIAL;
		755
		756	/* Job done if allocation would set block type */
		757	if (cc->order >= pageblock_order && area->nr_free)
		758	return COMPACT_PARTIAL;
		759	}
699	}	760	}
700		761
701	return COMPACT_CONTINUE;	762	return COMPACT_CONTINUE;
@@ -817,6 +878,9 @@ static int compact_zone(struct zone zone, struct compact_control cc)
817	goto out;	878	goto out;
818	}	879	}
819	}	880	}
		881
		882	/* Capture a page now if it is a suitable size */
		883	compact_capture_page(cc);
820	}	884	}
821		885
822	out:	886	out:
@@ -829,7 +893,8 @@ out:
829		893
830	static unsigned long compact_zone_order(struct zone *zone,	894	static unsigned long compact_zone_order(struct zone *zone,
831	int order, gfp_t gfp_mask,	895	int order, gfp_t gfp_mask,
832	bool sync, bool *contended)	896	bool sync, bool *contended,
		897	struct page **page)
833	{	898	{
834	struct compact_control cc = {	899	struct compact_control cc = {
835	.nr_freepages = 0,	900	.nr_freepages = 0,
@@ -839,6 +904,7 @@ static unsigned long compact_zone_order(struct zone *zone,
839	.zone = zone,	904	.zone = zone,
840	.sync = sync,	905	.sync = sync,
841	.contended = contended,	906	.contended = contended,
		907	.page = page,
842	};	908	};
843	INIT_LIST_HEAD(&cc.freepages);	909	INIT_LIST_HEAD(&cc.freepages);
844	INIT_LIST_HEAD(&cc.migratepages);	910	INIT_LIST_HEAD(&cc.migratepages);
@@ -860,7 +926,7 @@ int sysctl_extfrag_threshold = 500;
860	*/	926	*/
861	unsigned long try_to_compact_pages(struct zonelist *zonelist,	927	unsigned long try_to_compact_pages(struct zonelist *zonelist,
862	int order, gfp_t gfp_mask, nodemask_t *nodemask,	928	int order, gfp_t gfp_mask, nodemask_t *nodemask,
863	bool sync, bool *contended)	929	bool sync, bool contended, struct page *page)
864	{	930	{
865	enum zone_type high_zoneidx = gfp_zone(gfp_mask);	931	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
866	int may_enter_fs = gfp_mask & __GFP_FS;	932	int may_enter_fs = gfp_mask & __GFP_FS;
@@ -881,7 +947,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
881	int status;	947	int status;
882		948
883	status = compact_zone_order(zone, order, gfp_mask, sync,	949	status = compact_zone_order(zone, order, gfp_mask, sync,
884	contended);	950	contended, page);
885	rc = max(status, rc);	951	rc = max(status, rc);
886		952
887	/* If a normal allocation would succeed, stop compacting */	953	/* If a normal allocation would succeed, stop compacting */
@@ -936,6 +1002,7 @@ int compact_pgdat(pg_data_t *pgdat, int order)
936	struct compact_control cc = {	1002	struct compact_control cc = {
937	.order = order,	1003	.order = order,
938	.sync = false,	1004	.sync = false,
		1005	.page = NULL,
939	};	1006	};
940		1007
941	return __compact_pgdat(pgdat, &cc);	1008	return __compact_pgdat(pgdat, &cc);
@@ -946,6 +1013,7 @@ static int compact_node(int nid)
946	struct compact_control cc = {	1013	struct compact_control cc = {
947	.order = -1,	1014	.order = -1,
948	.sync = true,	1015	.sync = true,
		1016	.page = NULL,
949	};	1017	};
950		1018
951	return __compact_pgdat(NODE_DATA(nid), &cc);	1019	return __compact_pgdat(NODE_DATA(nid), &cc);