aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2013-01-11 17:32:16 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-01-11 17:54:56 -0500
commit8fb74b9fb2b182d54beee592350d9ea1f325917a (patch)
treeccdcf36cfedb0cf54268226ebde75330e4882539 /mm/page_alloc.c
parentc0a3a20b6c4b5229ef5d26fd9b1c4b1957632aa7 (diff)
mm: compaction: partially revert capture of suitable high-order page
Eric Wong reported on 3.7 and 3.8-rc2 that ppoll() got stuck when waiting for POLLIN on a local TCP socket. It was easier to trigger if there was disk IO and dirty pages at the same time and he bisected it to commit 1fb3f8ca0e92 ("mm: compaction: capture a suitable high-order page immediately when it is made available"). The intention of that patch was to improve high-order allocations under memory pressure after changes made to reclaim in 3.6 drastically hurt THP allocations but the approach was flawed. For Eric, the problem was that page->pfmemalloc was not being cleared for captured pages leading to a poor interaction with swap-over-NFS support causing the packets to be dropped. However, I identified a few more problems with the patch including the fact that it can increase contention on zone->lock in some cases which could result in async direct compaction being aborted early. In retrospect the capture patch took the wrong approach. What it should have done is mark the pageblock being migrated as MIGRATE_ISOLATE if it was allocating for THP and avoided races that way. While the patch was showing to improve allocation success rates at the time, the benefit is marginal given the relative complexity and it should be revisited from scratch in the context of the other reclaim-related changes that have taken place since the patch was first written and tested. This patch partially reverts commit 1fb3f8ca0e92 ("mm: compaction: capture a suitable high-order page immediately when it is made available"). Reported-and-tested-by: Eric Wong <normalperson@yhbt.net> Tested-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: <stable@vger.kernel.org> Signed-off-by: Mel Gorman <mgorman@suse.de> Cc: David Miller <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c35
1 files changed, 8 insertions, 27 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c957805a7f0e..df2022ff0c8a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1384,14 +1384,8 @@ void split_page(struct page *page, unsigned int order)
1384 set_page_refcounted(page + i); 1384 set_page_refcounted(page + i);
1385} 1385}
1386 1386
1387/* 1387static int __isolate_free_page(struct page *page, unsigned int order)
1388 * Similar to the split_page family of functions except that the page
1389 * required at the given order and being isolated now to prevent races
1390 * with parallel allocators
1391 */
1392int capture_free_page(struct page *page, int alloc_order, int migratetype)
1393{ 1388{
1394 unsigned int order;
1395 unsigned long watermark; 1389 unsigned long watermark;
1396 struct zone *zone; 1390 struct zone *zone;
1397 int mt; 1391 int mt;
@@ -1399,7 +1393,6 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
1399 BUG_ON(!PageBuddy(page)); 1393 BUG_ON(!PageBuddy(page));
1400 1394
1401 zone = page_zone(page); 1395 zone = page_zone(page);
1402 order = page_order(page);
1403 mt = get_pageblock_migratetype(page); 1396 mt = get_pageblock_migratetype(page);
1404 1397
1405 if (mt != MIGRATE_ISOLATE) { 1398 if (mt != MIGRATE_ISOLATE) {
@@ -1408,7 +1401,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
1408 if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) 1401 if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
1409 return 0; 1402 return 0;
1410 1403
1411 __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt); 1404 __mod_zone_freepage_state(zone, -(1UL << order), mt);
1412 } 1405 }
1413 1406
1414 /* Remove page from free list */ 1407 /* Remove page from free list */
@@ -1416,11 +1409,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
1416 zone->free_area[order].nr_free--; 1409 zone->free_area[order].nr_free--;
1417 rmv_page_order(page); 1410 rmv_page_order(page);
1418 1411
1419 if (alloc_order != order) 1412 /* Set the pageblock if the isolated page is at least a pageblock */
1420 expand(zone, page, alloc_order, order,
1421 &zone->free_area[order], migratetype);
1422
1423 /* Set the pageblock if the captured page is at least a pageblock */
1424 if (order >= pageblock_order - 1) { 1413 if (order >= pageblock_order - 1) {
1425 struct page *endpage = page + (1 << order) - 1; 1414 struct page *endpage = page + (1 << order) - 1;
1426 for (; page < endpage; page += pageblock_nr_pages) { 1415 for (; page < endpage; page += pageblock_nr_pages) {
@@ -1431,7 +1420,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
1431 } 1420 }
1432 } 1421 }
1433 1422
1434 return 1UL << alloc_order; 1423 return 1UL << order;
1435} 1424}
1436 1425
1437/* 1426/*
@@ -1449,10 +1438,9 @@ int split_free_page(struct page *page)
1449 unsigned int order; 1438 unsigned int order;
1450 int nr_pages; 1439 int nr_pages;
1451 1440
1452 BUG_ON(!PageBuddy(page));
1453 order = page_order(page); 1441 order = page_order(page);
1454 1442
1455 nr_pages = capture_free_page(page, order, 0); 1443 nr_pages = __isolate_free_page(page, order);
1456 if (!nr_pages) 1444 if (!nr_pages)
1457 return 0; 1445 return 0;
1458 1446
@@ -2136,8 +2124,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2136 bool *contended_compaction, bool *deferred_compaction, 2124 bool *contended_compaction, bool *deferred_compaction,
2137 unsigned long *did_some_progress) 2125 unsigned long *did_some_progress)
2138{ 2126{
2139 struct page *page = NULL;
2140
2141 if (!order) 2127 if (!order)
2142 return NULL; 2128 return NULL;
2143 2129
@@ -2149,16 +2135,12 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2149 current->flags |= PF_MEMALLOC; 2135 current->flags |= PF_MEMALLOC;
2150 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, 2136 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
2151 nodemask, sync_migration, 2137 nodemask, sync_migration,
2152 contended_compaction, &page); 2138 contended_compaction);
2153 current->flags &= ~PF_MEMALLOC; 2139 current->flags &= ~PF_MEMALLOC;
2154 2140
2155 /* If compaction captured a page, prep and use it */
2156 if (page) {
2157 prep_new_page(page, order, gfp_mask);
2158 goto got_page;
2159 }
2160
2161 if (*did_some_progress != COMPACT_SKIPPED) { 2141 if (*did_some_progress != COMPACT_SKIPPED) {
2142 struct page *page;
2143
2162 /* Page migration frees to the PCP lists but we want merging */ 2144 /* Page migration frees to the PCP lists but we want merging */
2163 drain_pages(get_cpu()); 2145 drain_pages(get_cpu());
2164 put_cpu(); 2146 put_cpu();
@@ -2168,7 +2150,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2168 alloc_flags & ~ALLOC_NO_WATERMARKS, 2150 alloc_flags & ~ALLOC_NO_WATERMARKS,
2169 preferred_zone, migratetype); 2151 preferred_zone, migratetype);
2170 if (page) { 2152 if (page) {
2171got_page:
2172 preferred_zone->compact_blockskip_flush = false; 2153 preferred_zone->compact_blockskip_flush = false;
2173 preferred_zone->compact_considered = 0; 2154 preferred_zone->compact_considered = 0;
2174 preferred_zone->compact_defer_shift = 0; 2155 preferred_zone->compact_defer_shift = 0;