mm: compaction: direct compact when a high-order allocation fails

Ordinarily when a high-order allocation fails, direct reclaim is entered to free pages to satisfy the allocation. With this patch, it is determined if an allocation failed due to external fragmentation instead of low memory and if so, the calling process will compact until a suitable page is freed. Compaction by moving pages in memory is considerably cheaper than paging out to disk and works where there are locked pages or no swap. If compaction fails to free a page of a suitable size, then reclaim will still occur. Direct compaction returns as soon as possible. As each block is compacted, it is checked if a suitable page has been freed and if so, it returns. [akpm@linux-foundation.org: Fix build errors] [aarcange@redhat.com: fix count_vm_event preempt in memory compaction direct reclaim] Signed-off-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Rik van Riel <riel@redhat.com> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Mel Gorman <mel@csn.ul.ie> 2010-05-24 17:32:30 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2010-05-25 11:06:59 -0400
commit: 56de7263fcf3eb10c8dcdf8d59a9cec831795f3f (patch)
tree: 164637c0b678e20adfdcec4129563d9234faf405 /mm/compaction.c
parent: ed4a6d7f0676db50b5023cc01f6cda82a2f2a307 (diff)
1 files changed, 117 insertions, 0 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index f61f77983ff4..9583e193dc47 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -35,6 +35,8 @@ struct compact_control {
        unsigned long nr_anon;
        unsigned long nr_file;
+        unsigned int order;             /* order a direct compactor needs */
+        int migratetype;                /* MOVABLE, RECLAIMABLE etc */
        struct zone *zone;
 };
@@ -341,6 +343,9 @@ static void update_nr_listpages(struct compact_control *cc)
 static int compact_finished(struct zone *zone,
                                                struct compact_control *cc)
 {
+        unsigned int order;
+        unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order);
        if (fatal_signal_pending(current))
                return COMPACT_PARTIAL;
@@ -348,6 +353,24 @@ static int compact_finished(struct zone *zone,
        if (cc->free_pfn <= cc->migrate_pfn)
                return COMPACT_COMPLETE;
+        /* Compaction run is not finished if the watermark is not met */
+        if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
+                return COMPACT_CONTINUE;
+        if (cc->order == -1)
+                return COMPACT_CONTINUE;
+        /* Direct compactor: Is a suitable page free? */
+        for (order = cc->order; order < MAX_ORDER; order++) {
+                /* Job done if page is free of the right migratetype */
+                if (!list_empty(&zone->free_area[order].free_list[cc->migratetype]))
+                        return COMPACT_PARTIAL;
+                /* Job done if allocation would set block type */
+                if (order >= pageblock_order && zone->free_area[order].nr_free)
+                        return COMPACT_PARTIAL;
+        }
        return COMPACT_CONTINUE;
 }
@@ -394,6 +417,99 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
        return ret;
 }
+static unsigned long compact_zone_order(struct zone *zone,
+                                                int order, gfp_t gfp_mask)
+{
+        struct compact_control cc = {
+                .nr_freepages = 0,
+                .nr_migratepages = 0,
+                .order = order,
+                .migratetype = allocflags_to_migratetype(gfp_mask),
+                .zone = zone,
+        };
+        INIT_LIST_HEAD(&cc.freepages);
+        INIT_LIST_HEAD(&cc.migratepages);
+        return compact_zone(zone, &cc);
+}
+/**
+ * try_to_compact_pages - Direct compact to satisfy a high-order allocation
+ * @zonelist: The zonelist used for the current allocation
+ * @order: The order of the current allocation
+ * @gfp_mask: The GFP mask of the current allocation
+ * @nodemask: The allowed nodes to allocate from
+ *
+ * This is the main entry point for direct page compaction.
+ */
+unsigned long try_to_compact_pages(struct zonelist *zonelist,
+                        int order, gfp_t gfp_mask, nodemask_t *nodemask)
+{
+        enum zone_type high_zoneidx = gfp_zone(gfp_mask);
+        int may_enter_fs = gfp_mask & __GFP_FS;
+        int may_perform_io = gfp_mask & __GFP_IO;
+        unsigned long watermark;
+        struct zoneref *z;
+        struct zone *zone;
+        int rc = COMPACT_SKIPPED;
+        /*
+         * Check whether it is worth even starting compaction. The order check is
+         * made because an assumption is made that the page allocator can satisfy
+         * the "cheaper" orders without taking special steps
+         */
+        if (order <= PAGE_ALLOC_COSTLY_ORDER || !may_enter_fs || !may_perform_io)
+                return rc;
+        count_vm_event(COMPACTSTALL);
+        /* Compact each zone in the list */
+        for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
+                                                                nodemask) {
+                int fragindex;
+                int status;
+                /*
+                 * Watermarks for order-0 must be met for compaction. Note
+                 * the 2UL. This is because during migration, copies of
+                 * pages need to be allocated and for a short time, the
+                 * footprint is higher
+                 */
+                watermark = low_wmark_pages(zone) + (2UL << order);
+                if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
+                        continue;
+                /*
+                 * fragmentation index determines if allocation failures are
+                 * due to low memory or external fragmentation
+                 *
+                 * index of -1 implies allocations might succeed depending
+                 *      on watermarks
+                 * index towards 0 implies failure is due to lack of memory
+                 * index towards 1000 implies failure is due to fragmentation
+                 *
+                 * Only compact if a failure would be due to fragmentation.
+                 */
+                fragindex = fragmentation_index(zone, order);
+                if (fragindex >= 0 && fragindex <= 500)
+                        continue;
+                if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) {
+                        rc = COMPACT_PARTIAL;
+                        break;
+                }
+                status = compact_zone_order(zone, order, gfp_mask);
+                rc = max(status, rc);
+                if (zone_watermark_ok(zone, order, watermark, 0, 0))
+                        break;
+        }
+        return rc;
+}
 /* Compact all zones within a node */
 static int compact_node(int nid)
 {
@@ -412,6 +528,7 @@ static int compact_node(int nid)
                struct compact_control cc = {
                        .nr_freepages = 0,
                        .nr_migratepages = 0,
+                        .order = -1,
                };
                zone = &pgdat->node_zones[zoneid];
author	Mel Gorman <mel@csn.ul.ie>	2010-05-24 17:32:30 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2010-05-25 11:06:59 -0400
commit	56de7263fcf3eb10c8dcdf8d59a9cec831795f3f (patch)
tree	164637c0b678e20adfdcec4129563d9234faf405 /mm/compaction.c
parent	ed4a6d7f0676db50b5023cc01f6cda82a2f2a307 (diff)

diff --git a/mm/compaction.c b/mm/compaction.c index f61f77983ff4..9583e193dc47 100644 --- a/mm/compaction.c +++ b/mm/compaction.c
@@ -35,6 +35,8 @@ struct compact_control {
35	unsigned long nr_anon;	35	unsigned long nr_anon;
36	unsigned long nr_file;	36	unsigned long nr_file;
37		37
		38	unsigned int order; /* order a direct compactor needs */
		39	int migratetype; /* MOVABLE, RECLAIMABLE etc */
38	struct zone *zone;	40	struct zone *zone;
39	};	41	};
40		42
@@ -341,6 +343,9 @@ static void update_nr_listpages(struct compact_control *cc)
341	static int compact_finished(struct zone *zone,	343	static int compact_finished(struct zone *zone,
342	struct compact_control *cc)	344	struct compact_control *cc)
343	{	345	{
		346	unsigned int order;
		347	unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order);
		348
344	if (fatal_signal_pending(current))	349	if (fatal_signal_pending(current))
345	return COMPACT_PARTIAL;	350	return COMPACT_PARTIAL;
346		351
@@ -348,6 +353,24 @@ static int compact_finished(struct zone *zone,
348	if (cc->free_pfn <= cc->migrate_pfn)	353	if (cc->free_pfn <= cc->migrate_pfn)
349	return COMPACT_COMPLETE;	354	return COMPACT_COMPLETE;
350		355
		356	/* Compaction run is not finished if the watermark is not met */
		357	if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
		358	return COMPACT_CONTINUE;
		359
		360	if (cc->order == -1)
		361	return COMPACT_CONTINUE;
		362
		363	/* Direct compactor: Is a suitable page free? */
		364	for (order = cc->order; order < MAX_ORDER; order++) {
		365	/* Job done if page is free of the right migratetype */
		366	if (!list_empty(&zone->free_area[order].free_list[cc->migratetype]))
		367	return COMPACT_PARTIAL;
		368
		369	/* Job done if allocation would set block type */
		370	if (order >= pageblock_order && zone->free_area[order].nr_free)
		371	return COMPACT_PARTIAL;
		372	}
		373
351	return COMPACT_CONTINUE;	374	return COMPACT_CONTINUE;
352	}	375	}
353		376
@@ -394,6 +417,99 @@ static int compact_zone(struct zone zone, struct compact_control cc)
394	return ret;	417	return ret;
395	}	418	}
396		419
		420	static unsigned long compact_zone_order(struct zone *zone,
		421	int order, gfp_t gfp_mask)
		422	{
		423	struct compact_control cc = {
		424	.nr_freepages = 0,
		425	.nr_migratepages = 0,
		426	.order = order,
		427	.migratetype = allocflags_to_migratetype(gfp_mask),
		428	.zone = zone,
		429	};
		430	INIT_LIST_HEAD(&cc.freepages);
		431	INIT_LIST_HEAD(&cc.migratepages);
		432
		433	return compact_zone(zone, &cc);
		434	}
		435
		436	/**
		437	* try_to_compact_pages - Direct compact to satisfy a high-order allocation
		438	* @zonelist: The zonelist used for the current allocation
		439	* @order: The order of the current allocation
		440	* @gfp_mask: The GFP mask of the current allocation
		441	* @nodemask: The allowed nodes to allocate from
		442	*
		443	* This is the main entry point for direct page compaction.
		444	*/
		445	unsigned long try_to_compact_pages(struct zonelist *zonelist,
		446	int order, gfp_t gfp_mask, nodemask_t *nodemask)
		447	{
		448	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
		449	int may_enter_fs = gfp_mask & __GFP_FS;
		450	int may_perform_io = gfp_mask & __GFP_IO;
		451	unsigned long watermark;
		452	struct zoneref *z;
		453	struct zone *zone;
		454	int rc = COMPACT_SKIPPED;
		455
		456	/*
		457	* Check whether it is worth even starting compaction. The order check is
		458	* made because an assumption is made that the page allocator can satisfy
		459	* the "cheaper" orders without taking special steps
		460	*/
		461	if (order <= PAGE_ALLOC_COSTLY_ORDER \|\| !may_enter_fs \|\| !may_perform_io)
		462	return rc;
		463
		464	count_vm_event(COMPACTSTALL);
		465
		466	/* Compact each zone in the list */
		467	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
		468	nodemask) {
		469	int fragindex;
		470	int status;
		471
		472	/*
		473	* Watermarks for order-0 must be met for compaction. Note
		474	* the 2UL. This is because during migration, copies of
		475	* pages need to be allocated and for a short time, the
		476	* footprint is higher
		477	*/
		478	watermark = low_wmark_pages(zone) + (2UL << order);
		479	if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
		480	continue;
		481
		482	/*
		483	* fragmentation index determines if allocation failures are
		484	* due to low memory or external fragmentation
		485	*
		486	* index of -1 implies allocations might succeed depending
		487	* on watermarks
		488	* index towards 0 implies failure is due to lack of memory
		489	* index towards 1000 implies failure is due to fragmentation
		490	*
		491	* Only compact if a failure would be due to fragmentation.
		492	*/
		493	fragindex = fragmentation_index(zone, order);
		494	if (fragindex >= 0 && fragindex <= 500)
		495	continue;
		496
		497	if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) {
		498	rc = COMPACT_PARTIAL;
		499	break;
		500	}
		501
		502	status = compact_zone_order(zone, order, gfp_mask);
		503	rc = max(status, rc);
		504
		505	if (zone_watermark_ok(zone, order, watermark, 0, 0))
		506	break;
		507	}
		508
		509	return rc;
		510	}
		511
		512
397	/* Compact all zones within a node */	513	/* Compact all zones within a node */
398	static int compact_node(int nid)	514	static int compact_node(int nid)
399	{	515	{
@@ -412,6 +528,7 @@ static int compact_node(int nid)
412	struct compact_control cc = {	528	struct compact_control cc = {
413	.nr_freepages = 0,	529	.nr_freepages = 0,
414	.nr_migratepages = 0,	530	.nr_migratepages = 0,
		531	.order = -1,
415	};	532	};
416		533
417	zone = &pgdat->node_zones[zoneid];	534	zone = &pgdat->node_zones[zoneid];