aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-10-08 19:32:41 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:22:50 -0400
commitbb13ffeb9f6bfeb301443994dfbf29f91117dfb3 (patch)
tree45e0e6574c0165da9cdc993b3401fe3263e4761c /mm
parent753341a4b85ff337487b9959c71c529f522004f4 (diff)
mm: compaction: cache if a pageblock was scanned and no pages were isolated
When compaction was implemented it was known that scanning could potentially be excessive. The ideal was that a counter be maintained for each pageblock but maintaining this information would incur a severe penalty due to a shared writable cache line. It has reached the point where the scanning costs are a serious problem, particularly on long-lived systems where a large process starts and allocates a large number of THPs at the same time. Instead of using a shared counter, this patch adds another bit to the pageblock flags called PG_migrate_skip. If a pageblock is scanned by either migrate or free scanner and 0 pages were isolated, the pageblock is marked to be skipped in the future. When scanning, this bit is checked before any scanning takes place and the block skipped if set. The main difficulty with a patch like this is "when to ignore the cached information?" If it's ignored too often, the scanning rates will still be excessive. If the information is too stale then allocations will fail that might have otherwise succeeded. In this patch o CMA always ignores the information o If the migrate and free scanner meet then the cached information will be discarded if it's at least 5 seconds since the last time the cache was discarded o If there are a large number of allocation failures, discard the cache. The time-based heuristic is very clumsy but there are few choices for a better event. Depending solely on multiple allocation failures still allows excessive scanning when THP allocations are failing in quick succession due to memory pressure. Waiting until memory pressure is relieved would cause compaction to continually fail instead of using reclaim/compaction to try allocate the page. The time-based mechanism is clumsy but a better option is not obvious. Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Rik van Riel <riel@redhat.com> Cc: Richard Davies <richard@arachsys.com> Cc: Shaohua Li <shli@kernel.org> Cc: Avi Kivity <avi@redhat.com> Acked-by: Rafael Aquini <aquini@redhat.com> Cc: Fengguang Wu <fengguang.wu@intel.com> Cc: Michal Nazarewicz <mina86@mina86.com> Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com> Cc: Kyungmin Park <kyungmin.park@samsung.com> Cc: Mark Brown <broonie@opensource.wolfsonmicro.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/compaction.c125
-rw-r--r--mm/internal.h4
-rw-r--r--mm/page_alloc.c38
3 files changed, 131 insertions, 36 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index db76361a3117..d9dbb97e607b 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -50,6 +50,79 @@ static inline bool migrate_async_suitable(int migratetype)
50 return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; 50 return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
51} 51}
52 52
53#ifdef CONFIG_COMPACTION
54/* Returns true if the pageblock should be scanned for pages to isolate. */
55static inline bool isolation_suitable(struct compact_control *cc,
56 struct page *page)
57{
58 if (cc->ignore_skip_hint)
59 return true;
60
61 return !get_pageblock_skip(page);
62}
63
64/*
65 * This function is called to clear all cached information on pageblocks that
66 * should be skipped for page isolation when the migrate and free page scanner
67 * meet.
68 */
69static void reset_isolation_suitable(struct zone *zone)
70{
71 unsigned long start_pfn = zone->zone_start_pfn;
72 unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages;
73 unsigned long pfn;
74
75 /*
76 * Do not reset more than once every five seconds. If allocations are
77 * failing sufficiently quickly to allow this to happen then continually
78 * scanning for compaction is not going to help. The choice of five
79 * seconds is arbitrary but will mitigate excessive scanning.
80 */
81 if (time_before(jiffies, zone->compact_blockskip_expire))
82 return;
83 zone->compact_blockskip_expire = jiffies + (HZ * 5);
84
85 /* Walk the zone and mark every pageblock as suitable for isolation */
86 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
87 struct page *page;
88
89 cond_resched();
90
91 if (!pfn_valid(pfn))
92 continue;
93
94 page = pfn_to_page(pfn);
95 if (zone != page_zone(page))
96 continue;
97
98 clear_pageblock_skip(page);
99 }
100}
101
102/*
103 * If no pages were isolated then mark this pageblock to be skipped in the
104 * future. The information is later cleared by reset_isolation_suitable().
105 */
106static void update_pageblock_skip(struct page *page, unsigned long nr_isolated)
107{
108 if (!page)
109 return;
110
111 if (!nr_isolated)
112 set_pageblock_skip(page);
113}
114#else
115static inline bool isolation_suitable(struct compact_control *cc,
116 struct page *page)
117{
118 return true;
119}
120
121static void update_pageblock_skip(struct page *page, unsigned long nr_isolated)
122{
123}
124#endif /* CONFIG_COMPACTION */
125
53static inline bool should_release_lock(spinlock_t *lock) 126static inline bool should_release_lock(spinlock_t *lock)
54{ 127{
55 return need_resched() || spin_is_contended(lock); 128 return need_resched() || spin_is_contended(lock);
@@ -181,7 +254,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
181 bool strict) 254 bool strict)
182{ 255{
183 int nr_scanned = 0, total_isolated = 0; 256 int nr_scanned = 0, total_isolated = 0;
184 struct page *cursor; 257 struct page *cursor, *valid_page = NULL;
185 unsigned long nr_strict_required = end_pfn - blockpfn; 258 unsigned long nr_strict_required = end_pfn - blockpfn;
186 unsigned long flags; 259 unsigned long flags;
187 bool locked = false; 260 bool locked = false;
@@ -196,6 +269,8 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
196 nr_scanned++; 269 nr_scanned++;
197 if (!pfn_valid_within(blockpfn)) 270 if (!pfn_valid_within(blockpfn))
198 continue; 271 continue;
272 if (!valid_page)
273 valid_page = page;
199 if (!PageBuddy(page)) 274 if (!PageBuddy(page))
200 continue; 275 continue;
201 276
@@ -250,6 +325,10 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
250 if (locked) 325 if (locked)
251 spin_unlock_irqrestore(&cc->zone->lock, flags); 326 spin_unlock_irqrestore(&cc->zone->lock, flags);
252 327
328 /* Update the pageblock-skip if the whole pageblock was scanned */
329 if (blockpfn == end_pfn)
330 update_pageblock_skip(valid_page, total_isolated);
331
253 return total_isolated; 332 return total_isolated;
254} 333}
255 334
@@ -267,22 +346,14 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
267 * a free page). 346 * a free page).
268 */ 347 */
269unsigned long 348unsigned long
270isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn) 349isolate_freepages_range(struct compact_control *cc,
350 unsigned long start_pfn, unsigned long end_pfn)
271{ 351{
272 unsigned long isolated, pfn, block_end_pfn; 352 unsigned long isolated, pfn, block_end_pfn;
273 struct zone *zone = NULL;
274 LIST_HEAD(freelist); 353 LIST_HEAD(freelist);
275 354
276 /* cc needed for isolate_freepages_block to acquire zone->lock */
277 struct compact_control cc = {
278 .sync = true,
279 };
280
281 if (pfn_valid(start_pfn))
282 cc.zone = zone = page_zone(pfn_to_page(start_pfn));
283
284 for (pfn = start_pfn; pfn < end_pfn; pfn += isolated) { 355 for (pfn = start_pfn; pfn < end_pfn; pfn += isolated) {
285 if (!pfn_valid(pfn) || zone != page_zone(pfn_to_page(pfn))) 356 if (!pfn_valid(pfn) || cc->zone != page_zone(pfn_to_page(pfn)))
286 break; 357 break;
287 358
288 /* 359 /*
@@ -292,7 +363,7 @@ isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn)
292 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); 363 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
293 block_end_pfn = min(block_end_pfn, end_pfn); 364 block_end_pfn = min(block_end_pfn, end_pfn);
294 365
295 isolated = isolate_freepages_block(&cc, pfn, block_end_pfn, 366 isolated = isolate_freepages_block(cc, pfn, block_end_pfn,
296 &freelist, true); 367 &freelist, true);
297 368
298 /* 369 /*
@@ -387,6 +458,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
387 struct lruvec *lruvec; 458 struct lruvec *lruvec;
388 unsigned long flags; 459 unsigned long flags;
389 bool locked = false; 460 bool locked = false;
461 struct page *page = NULL, *valid_page = NULL;
390 462
391 /* 463 /*
392 * Ensure that there are not too many pages isolated from the LRU 464 * Ensure that there are not too many pages isolated from the LRU
@@ -407,8 +479,6 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
407 /* Time to isolate some pages for migration */ 479 /* Time to isolate some pages for migration */
408 cond_resched(); 480 cond_resched();
409 for (; low_pfn < end_pfn; low_pfn++) { 481 for (; low_pfn < end_pfn; low_pfn++) {
410 struct page *page;
411
412 /* give a chance to irqs before checking need_resched() */ 482 /* give a chance to irqs before checking need_resched() */
413 if (locked && !((low_pfn+1) % SWAP_CLUSTER_MAX)) { 483 if (locked && !((low_pfn+1) % SWAP_CLUSTER_MAX)) {
414 if (should_release_lock(&zone->lru_lock)) { 484 if (should_release_lock(&zone->lru_lock)) {
@@ -444,6 +514,14 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
444 if (page_zone(page) != zone) 514 if (page_zone(page) != zone)
445 continue; 515 continue;
446 516
517 if (!valid_page)
518 valid_page = page;
519
520 /* If isolation recently failed, do not retry */
521 pageblock_nr = low_pfn >> pageblock_order;
522 if (!isolation_suitable(cc, page))
523 goto next_pageblock;
524
447 /* Skip if free */ 525 /* Skip if free */
448 if (PageBuddy(page)) 526 if (PageBuddy(page))
449 continue; 527 continue;
@@ -453,7 +531,6 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
453 * migration is optimistic to see if the minimum amount of work 531 * migration is optimistic to see if the minimum amount of work
454 * satisfies the allocation 532 * satisfies the allocation
455 */ 533 */
456 pageblock_nr = low_pfn >> pageblock_order;
457 if (!cc->sync && last_pageblock_nr != pageblock_nr && 534 if (!cc->sync && last_pageblock_nr != pageblock_nr &&
458 !migrate_async_suitable(get_pageblock_migratetype(page))) { 535 !migrate_async_suitable(get_pageblock_migratetype(page))) {
459 goto next_pageblock; 536 goto next_pageblock;
@@ -530,6 +607,10 @@ next_pageblock:
530 if (locked) 607 if (locked)
531 spin_unlock_irqrestore(&zone->lru_lock, flags); 608 spin_unlock_irqrestore(&zone->lru_lock, flags);
532 609
610 /* Update the pageblock-skip if the whole pageblock was scanned */
611 if (low_pfn == end_pfn)
612 update_pageblock_skip(valid_page, nr_isolated);
613
533 trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); 614 trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
534 615
535 return low_pfn; 616 return low_pfn;
@@ -593,6 +674,10 @@ static void isolate_freepages(struct zone *zone,
593 if (!suitable_migration_target(page)) 674 if (!suitable_migration_target(page))
594 continue; 675 continue;
595 676
677 /* If isolation recently failed, do not retry */
678 if (!isolation_suitable(cc, page))
679 continue;
680
596 /* Found a block suitable for isolating free pages from */ 681 /* Found a block suitable for isolating free pages from */
597 isolated = 0; 682 isolated = 0;
598 end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); 683 end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
@@ -709,8 +794,10 @@ static int compact_finished(struct zone *zone,
709 return COMPACT_PARTIAL; 794 return COMPACT_PARTIAL;
710 795
711 /* Compaction run completes if the migrate and free scanner meet */ 796 /* Compaction run completes if the migrate and free scanner meet */
712 if (cc->free_pfn <= cc->migrate_pfn) 797 if (cc->free_pfn <= cc->migrate_pfn) {
798 reset_isolation_suitable(cc->zone);
713 return COMPACT_COMPLETE; 799 return COMPACT_COMPLETE;
800 }
714 801
715 /* 802 /*
716 * order == -1 is expected when compacting via 803 * order == -1 is expected when compacting via
@@ -818,6 +905,10 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
818 cc->free_pfn = cc->migrate_pfn + zone->spanned_pages; 905 cc->free_pfn = cc->migrate_pfn + zone->spanned_pages;
819 cc->free_pfn &= ~(pageblock_nr_pages-1); 906 cc->free_pfn &= ~(pageblock_nr_pages-1);
820 907
908 /* Clear pageblock skip if there are numerous alloc failures */
909 if (zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT)
910 reset_isolation_suitable(zone);
911
821 migrate_prep_local(); 912 migrate_prep_local();
822 913
823 while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) { 914 while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {
diff --git a/mm/internal.h b/mm/internal.h
index 6f6bb9ab9386..7ba56ac360b2 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -120,6 +120,7 @@ struct compact_control {
120 unsigned long free_pfn; /* isolate_freepages search base */ 120 unsigned long free_pfn; /* isolate_freepages search base */
121 unsigned long migrate_pfn; /* isolate_migratepages search base */ 121 unsigned long migrate_pfn; /* isolate_migratepages search base */
122 bool sync; /* Synchronous migration */ 122 bool sync; /* Synchronous migration */
123 bool ignore_skip_hint; /* Scan blocks even if marked skip */
123 124
124 int order; /* order a direct compactor needs */ 125 int order; /* order a direct compactor needs */
125 int migratetype; /* MOVABLE, RECLAIMABLE etc */ 126 int migratetype; /* MOVABLE, RECLAIMABLE etc */
@@ -129,7 +130,8 @@ struct compact_control {
129}; 130};
130 131
131unsigned long 132unsigned long
132isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn); 133isolate_freepages_range(struct compact_control *cc,
134 unsigned long start_pfn, unsigned long end_pfn);
133unsigned long 135unsigned long
134isolate_migratepages_range(struct zone *zone, struct compact_control *cc, 136isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
135 unsigned long low_pfn, unsigned long end_pfn); 137 unsigned long low_pfn, unsigned long end_pfn);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 628968c1ccf4..44c56049edf9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5679,7 +5679,8 @@ __alloc_contig_migrate_alloc(struct page *page, unsigned long private,
5679} 5679}
5680 5680
5681/* [start, end) must belong to a single zone. */ 5681/* [start, end) must belong to a single zone. */
5682static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) 5682static int __alloc_contig_migrate_range(struct compact_control *cc,
5683 unsigned long start, unsigned long end)
5683{ 5684{
5684 /* This function is based on compact_zone() from compaction.c. */ 5685 /* This function is based on compact_zone() from compaction.c. */
5685 5686
@@ -5687,25 +5688,17 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
5687 unsigned int tries = 0; 5688 unsigned int tries = 0;
5688 int ret = 0; 5689 int ret = 0;
5689 5690
5690 struct compact_control cc = {
5691 .nr_migratepages = 0,
5692 .order = -1,
5693 .zone = page_zone(pfn_to_page(start)),
5694 .sync = true,
5695 };
5696 INIT_LIST_HEAD(&cc.migratepages);
5697
5698 migrate_prep_local(); 5691 migrate_prep_local();
5699 5692
5700 while (pfn < end || !list_empty(&cc.migratepages)) { 5693 while (pfn < end || !list_empty(&cc->migratepages)) {
5701 if (fatal_signal_pending(current)) { 5694 if (fatal_signal_pending(current)) {
5702 ret = -EINTR; 5695 ret = -EINTR;
5703 break; 5696 break;
5704 } 5697 }
5705 5698
5706 if (list_empty(&cc.migratepages)) { 5699 if (list_empty(&cc->migratepages)) {
5707 cc.nr_migratepages = 0; 5700 cc->nr_migratepages = 0;
5708 pfn = isolate_migratepages_range(cc.zone, &cc, 5701 pfn = isolate_migratepages_range(cc->zone, cc,
5709 pfn, end); 5702 pfn, end);
5710 if (!pfn) { 5703 if (!pfn) {
5711 ret = -EINTR; 5704 ret = -EINTR;
@@ -5717,14 +5710,14 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
5717 break; 5710 break;
5718 } 5711 }
5719 5712
5720 reclaim_clean_pages_from_list(cc.zone, &cc.migratepages); 5713 reclaim_clean_pages_from_list(cc->zone, &cc->migratepages);
5721 5714
5722 ret = migrate_pages(&cc.migratepages, 5715 ret = migrate_pages(&cc->migratepages,
5723 __alloc_contig_migrate_alloc, 5716 __alloc_contig_migrate_alloc,
5724 0, false, MIGRATE_SYNC); 5717 0, false, MIGRATE_SYNC);
5725 } 5718 }
5726 5719
5727 putback_lru_pages(&cc.migratepages); 5720 putback_lru_pages(&cc->migratepages);
5728 return ret > 0 ? 0 : ret; 5721 return ret > 0 ? 0 : ret;
5729} 5722}
5730 5723
@@ -5803,6 +5796,15 @@ int alloc_contig_range(unsigned long start, unsigned long end,
5803 unsigned long outer_start, outer_end; 5796 unsigned long outer_start, outer_end;
5804 int ret = 0, order; 5797 int ret = 0, order;
5805 5798
5799 struct compact_control cc = {
5800 .nr_migratepages = 0,
5801 .order = -1,
5802 .zone = page_zone(pfn_to_page(start)),
5803 .sync = true,
5804 .ignore_skip_hint = true,
5805 };
5806 INIT_LIST_HEAD(&cc.migratepages);
5807
5806 /* 5808 /*
5807 * What we do here is we mark all pageblocks in range as 5809 * What we do here is we mark all pageblocks in range as
5808 * MIGRATE_ISOLATE. Because pageblock and max order pages may 5810 * MIGRATE_ISOLATE. Because pageblock and max order pages may
@@ -5832,7 +5834,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
5832 if (ret) 5834 if (ret)
5833 goto done; 5835 goto done;
5834 5836
5835 ret = __alloc_contig_migrate_range(start, end); 5837 ret = __alloc_contig_migrate_range(&cc, start, end);
5836 if (ret) 5838 if (ret)
5837 goto done; 5839 goto done;
5838 5840
@@ -5881,7 +5883,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
5881 __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start); 5883 __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start);
5882 5884
5883 /* Grab isolated pages from freelists. */ 5885 /* Grab isolated pages from freelists. */
5884 outer_end = isolate_freepages_range(outer_start, end); 5886 outer_end = isolate_freepages_range(&cc, outer_start, end);
5885 if (!outer_end) { 5887 if (!outer_end) {
5886 ret = -EBUSY; 5888 ret = -EBUSY;
5887 goto done; 5889 goto done;