aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-10-08 19:29:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:22:21 -0400
commit1fb3f8ca0e9222535a39b884cb67a34628411b9f (patch)
tree5e72498f44d4fcae18f29eb87dd05a849a7a23c9 /mm
parent83fde0f22872aa8c1d46f775cc7bdfa864499e65 (diff)
mm: compaction: capture a suitable high-order page immediately when it is made available
While compaction is migrating pages to free up large contiguous blocks for allocation it races with other allocation requests that may steal these blocks or break them up. This patch alters direct compaction to capture a suitable free page as soon as it becomes available to reduce this race. It uses similar logic to split_free_page() to ensure that watermarks are still obeyed. Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Reviewed-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/compaction.c90
-rw-r--r--mm/internal.h1
-rw-r--r--mm/page_alloc.c63
3 files changed, 127 insertions, 27 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 7168edc7592..0fbc6b73a52 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -91,6 +91,60 @@ static inline bool compact_trylock_irqsave(spinlock_t *lock,
91 return compact_checklock_irqsave(lock, flags, false, cc); 91 return compact_checklock_irqsave(lock, flags, false, cc);
92} 92}
93 93
94static void compact_capture_page(struct compact_control *cc)
95{
96 unsigned long flags;
97 int mtype, mtype_low, mtype_high;
98
99 if (!cc->page || *cc->page)
100 return;
101
102 /*
103 * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP
104 * regardless of the migratetype of the freelist is is captured from.
105 * This is fine because the order for a high-order MIGRATE_MOVABLE
106 * allocation is typically at least a pageblock size and overall
107 * fragmentation is not impaired. Other allocation types must
108 * capture pages from their own migratelist because otherwise they
109 * could pollute other pageblocks like MIGRATE_MOVABLE with
110 * difficult to move pages and making fragmentation worse overall.
111 */
112 if (cc->migratetype == MIGRATE_MOVABLE) {
113 mtype_low = 0;
114 mtype_high = MIGRATE_PCPTYPES;
115 } else {
116 mtype_low = cc->migratetype;
117 mtype_high = cc->migratetype + 1;
118 }
119
120 /* Speculatively examine the free lists without zone lock */
121 for (mtype = mtype_low; mtype < mtype_high; mtype++) {
122 int order;
123 for (order = cc->order; order < MAX_ORDER; order++) {
124 struct page *page;
125 struct free_area *area;
126 area = &(cc->zone->free_area[order]);
127 if (list_empty(&area->free_list[mtype]))
128 continue;
129
130 /* Take the lock and attempt capture of the page */
131 if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
132 return;
133 if (!list_empty(&area->free_list[mtype])) {
134 page = list_entry(area->free_list[mtype].next,
135 struct page, lru);
136 if (capture_free_page(page, cc->order, mtype)) {
137 spin_unlock_irqrestore(&cc->zone->lock,
138 flags);
139 *cc->page = page;
140 return;
141 }
142 }
143 spin_unlock_irqrestore(&cc->zone->lock, flags);
144 }
145 }
146}
147
94/* 148/*
95 * Isolate free pages onto a private freelist. Caller must hold zone->lock. 149 * Isolate free pages onto a private freelist. Caller must hold zone->lock.
96 * If @strict is true, will abort returning 0 on any invalid PFNs or non-free 150 * If @strict is true, will abort returning 0 on any invalid PFNs or non-free
@@ -645,7 +699,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
645static int compact_finished(struct zone *zone, 699static int compact_finished(struct zone *zone,
646 struct compact_control *cc) 700 struct compact_control *cc)
647{ 701{
648 unsigned int order;
649 unsigned long watermark; 702 unsigned long watermark;
650 703
651 if (fatal_signal_pending(current)) 704 if (fatal_signal_pending(current))
@@ -688,14 +741,22 @@ static int compact_finished(struct zone *zone,
688 return COMPACT_CONTINUE; 741 return COMPACT_CONTINUE;
689 742
690 /* Direct compactor: Is a suitable page free? */ 743 /* Direct compactor: Is a suitable page free? */
691 for (order = cc->order; order < MAX_ORDER; order++) { 744 if (cc->page) {
692 /* Job done if page is free of the right migratetype */ 745 /* Was a suitable page captured? */
693 if (!list_empty(&zone->free_area[order].free_list[cc->migratetype])) 746 if (*cc->page)
694 return COMPACT_PARTIAL;
695
696 /* Job done if allocation would set block type */
697 if (order >= pageblock_order && zone->free_area[order].nr_free)
698 return COMPACT_PARTIAL; 747 return COMPACT_PARTIAL;
748 } else {
749 unsigned int order;
750 for (order = cc->order; order < MAX_ORDER; order++) {
751 struct free_area *area = &zone->free_area[cc->order];
752 /* Job done if page is free of the right migratetype */
753 if (!list_empty(&area->free_list[cc->migratetype]))
754 return COMPACT_PARTIAL;
755
756 /* Job done if allocation would set block type */
757 if (cc->order >= pageblock_order && area->nr_free)
758 return COMPACT_PARTIAL;
759 }
699 } 760 }
700 761
701 return COMPACT_CONTINUE; 762 return COMPACT_CONTINUE;
@@ -817,6 +878,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
817 goto out; 878 goto out;
818 } 879 }
819 } 880 }
881
882 /* Capture a page now if it is a suitable size */
883 compact_capture_page(cc);
820 } 884 }
821 885
822out: 886out:
@@ -829,7 +893,8 @@ out:
829 893
830static unsigned long compact_zone_order(struct zone *zone, 894static unsigned long compact_zone_order(struct zone *zone,
831 int order, gfp_t gfp_mask, 895 int order, gfp_t gfp_mask,
832 bool sync, bool *contended) 896 bool sync, bool *contended,
897 struct page **page)
833{ 898{
834 struct compact_control cc = { 899 struct compact_control cc = {
835 .nr_freepages = 0, 900 .nr_freepages = 0,
@@ -839,6 +904,7 @@ static unsigned long compact_zone_order(struct zone *zone,
839 .zone = zone, 904 .zone = zone,
840 .sync = sync, 905 .sync = sync,
841 .contended = contended, 906 .contended = contended,
907 .page = page,
842 }; 908 };
843 INIT_LIST_HEAD(&cc.freepages); 909 INIT_LIST_HEAD(&cc.freepages);
844 INIT_LIST_HEAD(&cc.migratepages); 910 INIT_LIST_HEAD(&cc.migratepages);
@@ -860,7 +926,7 @@ int sysctl_extfrag_threshold = 500;
860 */ 926 */
861unsigned long try_to_compact_pages(struct zonelist *zonelist, 927unsigned long try_to_compact_pages(struct zonelist *zonelist,
862 int order, gfp_t gfp_mask, nodemask_t *nodemask, 928 int order, gfp_t gfp_mask, nodemask_t *nodemask,
863 bool sync, bool *contended) 929 bool sync, bool *contended, struct page **page)
864{ 930{
865 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 931 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
866 int may_enter_fs = gfp_mask & __GFP_FS; 932 int may_enter_fs = gfp_mask & __GFP_FS;
@@ -881,7 +947,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
881 int status; 947 int status;
882 948
883 status = compact_zone_order(zone, order, gfp_mask, sync, 949 status = compact_zone_order(zone, order, gfp_mask, sync,
884 contended); 950 contended, page);
885 rc = max(status, rc); 951 rc = max(status, rc);
886 952
887 /* If a normal allocation would succeed, stop compacting */ 953 /* If a normal allocation would succeed, stop compacting */
@@ -936,6 +1002,7 @@ int compact_pgdat(pg_data_t *pgdat, int order)
936 struct compact_control cc = { 1002 struct compact_control cc = {
937 .order = order, 1003 .order = order,
938 .sync = false, 1004 .sync = false,
1005 .page = NULL,
939 }; 1006 };
940 1007
941 return __compact_pgdat(pgdat, &cc); 1008 return __compact_pgdat(pgdat, &cc);
@@ -946,6 +1013,7 @@ static int compact_node(int nid)
946 struct compact_control cc = { 1013 struct compact_control cc = {
947 .order = -1, 1014 .order = -1,
948 .sync = true, 1015 .sync = true,
1016 .page = NULL,
949 }; 1017 };
950 1018
951 return __compact_pgdat(NODE_DATA(nid), &cc); 1019 return __compact_pgdat(NODE_DATA(nid), &cc);
diff --git a/mm/internal.h b/mm/internal.h
index b8c91b342e2..e549a7fbc29 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -131,6 +131,7 @@ struct compact_control {
131 int migratetype; /* MOVABLE, RECLAIMABLE etc */ 131 int migratetype; /* MOVABLE, RECLAIMABLE etc */
132 struct zone *zone; 132 struct zone *zone;
133 bool *contended; /* True if a lock was contended */ 133 bool *contended; /* True if a lock was contended */
134 struct page **page; /* Page captured of requested size */
134}; 135};
135 136
136unsigned long 137unsigned long
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5e92698e539..cfd565dbe12 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1380,16 +1380,11 @@ void split_page(struct page *page, unsigned int order)
1380} 1380}
1381 1381
1382/* 1382/*
1383 * Similar to split_page except the page is already free. As this is only 1383 * Similar to the split_page family of functions except that the page
1384 * being used for migration, the migratetype of the block also changes. 1384 * required at the given order and being isolated now to prevent races
1385 * As this is called with interrupts disabled, the caller is responsible 1385 * with parallel allocators
1386 * for calling arch_alloc_page() and kernel_map_page() after interrupts
1387 * are enabled.
1388 *
1389 * Note: this is probably too low level an operation for use in drivers.
1390 * Please consult with lkml before using this in your driver.
1391 */ 1386 */
1392int split_free_page(struct page *page) 1387int capture_free_page(struct page *page, int alloc_order, int migratetype)
1393{ 1388{
1394 unsigned int order; 1389 unsigned int order;
1395 unsigned long watermark; 1390 unsigned long watermark;
@@ -1411,10 +1406,11 @@ int split_free_page(struct page *page)
1411 rmv_page_order(page); 1406 rmv_page_order(page);
1412 __mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL << order)); 1407 __mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL << order));
1413 1408
1414 /* Split into individual pages */ 1409 if (alloc_order != order)
1415 set_page_refcounted(page); 1410 expand(zone, page, alloc_order, order,
1416 split_page(page, order); 1411 &zone->free_area[order], migratetype);
1417 1412
1413 /* Set the pageblock if the captured page is at least a pageblock */
1418 if (order >= pageblock_order - 1) { 1414 if (order >= pageblock_order - 1) {
1419 struct page *endpage = page + (1 << order) - 1; 1415 struct page *endpage = page + (1 << order) - 1;
1420 for (; page < endpage; page += pageblock_nr_pages) { 1416 for (; page < endpage; page += pageblock_nr_pages) {
@@ -1425,7 +1421,35 @@ int split_free_page(struct page *page)
1425 } 1421 }
1426 } 1422 }
1427 1423
1428 return 1 << order; 1424 return 1UL << order;
1425}
1426
1427/*
1428 * Similar to split_page except the page is already free. As this is only
1429 * being used for migration, the migratetype of the block also changes.
1430 * As this is called with interrupts disabled, the caller is responsible
1431 * for calling arch_alloc_page() and kernel_map_page() after interrupts
1432 * are enabled.
1433 *
1434 * Note: this is probably too low level an operation for use in drivers.
1435 * Please consult with lkml before using this in your driver.
1436 */
1437int split_free_page(struct page *page)
1438{
1439 unsigned int order;
1440 int nr_pages;
1441
1442 BUG_ON(!PageBuddy(page));
1443 order = page_order(page);
1444
1445 nr_pages = capture_free_page(page, order, 0);
1446 if (!nr_pages)
1447 return 0;
1448
1449 /* Split into individual pages */
1450 set_page_refcounted(page);
1451 split_page(page, order);
1452 return nr_pages;
1429} 1453}
1430 1454
1431/* 1455/*
@@ -2105,7 +2129,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2105 bool *contended_compaction, bool *deferred_compaction, 2129 bool *contended_compaction, bool *deferred_compaction,
2106 unsigned long *did_some_progress) 2130 unsigned long *did_some_progress)
2107{ 2131{
2108 struct page *page; 2132 struct page *page = NULL;
2109 2133
2110 if (!order) 2134 if (!order)
2111 return NULL; 2135 return NULL;
@@ -2118,10 +2142,16 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2118 current->flags |= PF_MEMALLOC; 2142 current->flags |= PF_MEMALLOC;
2119 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, 2143 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
2120 nodemask, sync_migration, 2144 nodemask, sync_migration,
2121 contended_compaction); 2145 contended_compaction, &page);
2122 current->flags &= ~PF_MEMALLOC; 2146 current->flags &= ~PF_MEMALLOC;
2123 if (*did_some_progress != COMPACT_SKIPPED) {
2124 2147
2148 /* If compaction captured a page, prep and use it */
2149 if (page) {
2150 prep_new_page(page, order, gfp_mask);
2151 goto got_page;
2152 }
2153
2154 if (*did_some_progress != COMPACT_SKIPPED) {
2125 /* Page migration frees to the PCP lists but we want merging */ 2155 /* Page migration frees to the PCP lists but we want merging */
2126 drain_pages(get_cpu()); 2156 drain_pages(get_cpu());
2127 put_cpu(); 2157 put_cpu();
@@ -2131,6 +2161,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2131 alloc_flags & ~ALLOC_NO_WATERMARKS, 2161 alloc_flags & ~ALLOC_NO_WATERMARKS,
2132 preferred_zone, migratetype); 2162 preferred_zone, migratetype);
2133 if (page) { 2163 if (page) {
2164got_page:
2134 preferred_zone->compact_considered = 0; 2165 preferred_zone->compact_considered = 0;
2135 preferred_zone->compact_defer_shift = 0; 2166 preferred_zone->compact_defer_shift = 0;
2136 if (order >= preferred_zone->compact_order_failed) 2167 if (order >= preferred_zone->compact_order_failed)