aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2016-07-28 18:49:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-28 19:07:41 -0400
commitc3486f5376696034d0fcbef8ba70c70cfcb26f51 (patch)
tree5faec99d3537ddabaaf79e90d3335f0812e69766
parenta5508cd83f10f663e05d212cb81f600a3af46e40 (diff)
mm, compaction: simplify contended compaction handling
Async compaction detects contention either due to failing trylock on zone->lock or lru_lock, or by need_resched(). Since 1f9efdef4f3f ("mm, compaction: khugepaged should not give up due to need_resched()") the code got quite complicated to distinguish these two up to the __alloc_pages_slowpath() level, so different decisions could be taken for khugepaged allocations. After the recent changes, khugepaged allocations don't check for contended compaction anymore, so we again don't need to distinguish lock and sched contention, and simplify the current convoluted code a lot. However, I believe it's also possible to simplify even more and completely remove the check for contended compaction after the initial async compaction for costly orders, which was originally aimed at THP page fault allocations. There are several reasons why this can be done now: - with the new defaults, THP page faults no longer do reclaim/compaction at all, unless the system admin has overridden the default, or application has indicated via madvise that it can benefit from THP's. In both cases, it means that the potential extra latency is expected and worth the benefits. - even if reclaim/compaction proceeds after this patch where it previously wouldn't, the second compaction attempt is still async and will detect the contention and back off, if the contention persists - there are still heuristics like deferred compaction and pageblock skip bits in place that prevent excessive THP page fault latencies Link: http://lkml.kernel.org/r/20160721073614.24395-9-vbabka@suse.cz Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/compaction.h13
-rw-r--r--mm/compaction.c72
-rw-r--r--mm/internal.h5
-rw-r--r--mm/page_alloc.c28
4 files changed, 17 insertions, 101 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 0980a6ce4436..d4e106b5dc27 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -55,14 +55,6 @@ enum compact_result {
55 COMPACT_PARTIAL, 55 COMPACT_PARTIAL,
56}; 56};
57 57
58/* Used to signal whether compaction detected need_sched() or lock contention */
59/* No contention detected */
60#define COMPACT_CONTENDED_NONE 0
61/* Either need_sched() was true or fatal signal pending */
62#define COMPACT_CONTENDED_SCHED 1
63/* Zone lock or lru_lock was contended in async compaction */
64#define COMPACT_CONTENDED_LOCK 2
65
66struct alloc_context; /* in mm/internal.h */ 58struct alloc_context; /* in mm/internal.h */
67 59
68#ifdef CONFIG_COMPACTION 60#ifdef CONFIG_COMPACTION
@@ -76,9 +68,8 @@ extern int sysctl_compact_unevictable_allowed;
76 68
77extern int fragmentation_index(struct zone *zone, unsigned int order); 69extern int fragmentation_index(struct zone *zone, unsigned int order);
78extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, 70extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
79 unsigned int order, 71 unsigned int order, unsigned int alloc_flags,
80 unsigned int alloc_flags, const struct alloc_context *ac, 72 const struct alloc_context *ac, enum compact_priority prio);
81 enum compact_priority prio, int *contended);
82extern void compact_pgdat(pg_data_t *pgdat, int order); 73extern void compact_pgdat(pg_data_t *pgdat, int order);
83extern void reset_isolation_suitable(pg_data_t *pgdat); 74extern void reset_isolation_suitable(pg_data_t *pgdat);
84extern enum compact_result compaction_suitable(struct zone *zone, int order, 75extern enum compact_result compaction_suitable(struct zone *zone, int order,
diff --git a/mm/compaction.c b/mm/compaction.c
index 4719a391242f..9affb2908304 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -331,7 +331,7 @@ static bool compact_trylock_irqsave(spinlock_t *lock, unsigned long *flags,
331{ 331{
332 if (cc->mode == MIGRATE_ASYNC) { 332 if (cc->mode == MIGRATE_ASYNC) {
333 if (!spin_trylock_irqsave(lock, *flags)) { 333 if (!spin_trylock_irqsave(lock, *flags)) {
334 cc->contended = COMPACT_CONTENDED_LOCK; 334 cc->contended = true;
335 return false; 335 return false;
336 } 336 }
337 } else { 337 } else {
@@ -365,13 +365,13 @@ static bool compact_unlock_should_abort(spinlock_t *lock,
365 } 365 }
366 366
367 if (fatal_signal_pending(current)) { 367 if (fatal_signal_pending(current)) {
368 cc->contended = COMPACT_CONTENDED_SCHED; 368 cc->contended = true;
369 return true; 369 return true;
370 } 370 }
371 371
372 if (need_resched()) { 372 if (need_resched()) {
373 if (cc->mode == MIGRATE_ASYNC) { 373 if (cc->mode == MIGRATE_ASYNC) {
374 cc->contended = COMPACT_CONTENDED_SCHED; 374 cc->contended = true;
375 return true; 375 return true;
376 } 376 }
377 cond_resched(); 377 cond_resched();
@@ -394,7 +394,7 @@ static inline bool compact_should_abort(struct compact_control *cc)
394 /* async compaction aborts if contended */ 394 /* async compaction aborts if contended */
395 if (need_resched()) { 395 if (need_resched()) {
396 if (cc->mode == MIGRATE_ASYNC) { 396 if (cc->mode == MIGRATE_ASYNC) {
397 cc->contended = COMPACT_CONTENDED_SCHED; 397 cc->contended = true;
398 return true; 398 return true;
399 } 399 }
400 400
@@ -1619,14 +1619,11 @@ out:
1619 trace_mm_compaction_end(start_pfn, cc->migrate_pfn, 1619 trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
1620 cc->free_pfn, end_pfn, sync, ret); 1620 cc->free_pfn, end_pfn, sync, ret);
1621 1621
1622 if (ret == COMPACT_CONTENDED)
1623 ret = COMPACT_PARTIAL;
1624
1625 return ret; 1622 return ret;
1626} 1623}
1627 1624
1628static enum compact_result compact_zone_order(struct zone *zone, int order, 1625static enum compact_result compact_zone_order(struct zone *zone, int order,
1629 gfp_t gfp_mask, enum compact_priority prio, int *contended, 1626 gfp_t gfp_mask, enum compact_priority prio,
1630 unsigned int alloc_flags, int classzone_idx) 1627 unsigned int alloc_flags, int classzone_idx)
1631{ 1628{
1632 enum compact_result ret; 1629 enum compact_result ret;
@@ -1650,7 +1647,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
1650 VM_BUG_ON(!list_empty(&cc.freepages)); 1647 VM_BUG_ON(!list_empty(&cc.freepages));
1651 VM_BUG_ON(!list_empty(&cc.migratepages)); 1648 VM_BUG_ON(!list_empty(&cc.migratepages));
1652 1649
1653 *contended = cc.contended;
1654 return ret; 1650 return ret;
1655} 1651}
1656 1652
@@ -1663,23 +1659,18 @@ int sysctl_extfrag_threshold = 500;
1663 * @alloc_flags: The allocation flags of the current allocation 1659 * @alloc_flags: The allocation flags of the current allocation
1664 * @ac: The context of current allocation 1660 * @ac: The context of current allocation
1665 * @mode: The migration mode for async, sync light, or sync migration 1661 * @mode: The migration mode for async, sync light, or sync migration
1666 * @contended: Return value that determines if compaction was aborted due to
1667 * need_resched() or lock contention
1668 * 1662 *
1669 * This is the main entry point for direct page compaction. 1663 * This is the main entry point for direct page compaction.
1670 */ 1664 */
1671enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, 1665enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
1672 unsigned int alloc_flags, const struct alloc_context *ac, 1666 unsigned int alloc_flags, const struct alloc_context *ac,
1673 enum compact_priority prio, int *contended) 1667 enum compact_priority prio)
1674{ 1668{
1675 int may_enter_fs = gfp_mask & __GFP_FS; 1669 int may_enter_fs = gfp_mask & __GFP_FS;
1676 int may_perform_io = gfp_mask & __GFP_IO; 1670 int may_perform_io = gfp_mask & __GFP_IO;
1677 struct zoneref *z; 1671 struct zoneref *z;
1678 struct zone *zone; 1672 struct zone *zone;
1679 enum compact_result rc = COMPACT_SKIPPED; 1673 enum compact_result rc = COMPACT_SKIPPED;
1680 int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */
1681
1682 *contended = COMPACT_CONTENDED_NONE;
1683 1674
1684 /* Check if the GFP flags allow compaction */ 1675 /* Check if the GFP flags allow compaction */
1685 if (!may_enter_fs || !may_perform_io) 1676 if (!may_enter_fs || !may_perform_io)
@@ -1691,7 +1682,6 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
1691 for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, 1682 for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
1692 ac->nodemask) { 1683 ac->nodemask) {
1693 enum compact_result status; 1684 enum compact_result status;
1694 int zone_contended;
1695 1685
1696 if (compaction_deferred(zone, order)) { 1686 if (compaction_deferred(zone, order)) {
1697 rc = max_t(enum compact_result, COMPACT_DEFERRED, rc); 1687 rc = max_t(enum compact_result, COMPACT_DEFERRED, rc);
@@ -1699,14 +1689,8 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
1699 } 1689 }
1700 1690
1701 status = compact_zone_order(zone, order, gfp_mask, prio, 1691 status = compact_zone_order(zone, order, gfp_mask, prio,
1702 &zone_contended, alloc_flags, 1692 alloc_flags, ac_classzone_idx(ac));
1703 ac_classzone_idx(ac));
1704 rc = max(status, rc); 1693 rc = max(status, rc);
1705 /*
1706 * It takes at least one zone that wasn't lock contended
1707 * to clear all_zones_contended.
1708 */
1709 all_zones_contended &= zone_contended;
1710 1694
1711 /* If a normal allocation would succeed, stop compacting */ 1695 /* If a normal allocation would succeed, stop compacting */
1712 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 1696 if (zone_watermark_ok(zone, order, low_wmark_pages(zone),
@@ -1718,59 +1702,29 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
1718 * succeeds in this zone. 1702 * succeeds in this zone.
1719 */ 1703 */
1720 compaction_defer_reset(zone, order, false); 1704 compaction_defer_reset(zone, order, false);
1721 /*
1722 * It is possible that async compaction aborted due to
1723 * need_resched() and the watermarks were ok thanks to
1724 * somebody else freeing memory. The allocation can
1725 * however still fail so we better signal the
1726 * need_resched() contention anyway (this will not
1727 * prevent the allocation attempt).
1728 */
1729 if (zone_contended == COMPACT_CONTENDED_SCHED)
1730 *contended = COMPACT_CONTENDED_SCHED;
1731 1705
1732 goto break_loop; 1706 break;
1733 } 1707 }
1734 1708
1735 if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE || 1709 if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE ||
1736 status == COMPACT_PARTIAL_SKIPPED)) { 1710 status == COMPACT_PARTIAL_SKIPPED))
1737 /* 1711 /*
1738 * We think that allocation won't succeed in this zone 1712 * We think that allocation won't succeed in this zone
1739 * so we defer compaction there. If it ends up 1713 * so we defer compaction there. If it ends up
1740 * succeeding after all, it will be reset. 1714 * succeeding after all, it will be reset.
1741 */ 1715 */
1742 defer_compaction(zone, order); 1716 defer_compaction(zone, order);
1743 }
1744 1717
1745 /* 1718 /*
1746 * We might have stopped compacting due to need_resched() in 1719 * We might have stopped compacting due to need_resched() in
1747 * async compaction, or due to a fatal signal detected. In that 1720 * async compaction, or due to a fatal signal detected. In that
1748 * case do not try further zones and signal need_resched() 1721 * case do not try further zones
1749 * contention.
1750 */
1751 if ((zone_contended == COMPACT_CONTENDED_SCHED)
1752 || fatal_signal_pending(current)) {
1753 *contended = COMPACT_CONTENDED_SCHED;
1754 goto break_loop;
1755 }
1756
1757 continue;
1758break_loop:
1759 /*
1760 * We might not have tried all the zones, so be conservative
1761 * and assume they are not all lock contended.
1762 */ 1722 */
1763 all_zones_contended = 0; 1723 if ((prio == COMPACT_PRIO_ASYNC && need_resched())
1764 break; 1724 || fatal_signal_pending(current))
1725 break;
1765 } 1726 }
1766 1727
1767 /*
1768 * If at least one zone wasn't deferred or skipped, we report if all
1769 * zones that were tried were lock contended.
1770 */
1771 if (rc > COMPACT_INACTIVE && all_zones_contended)
1772 *contended = COMPACT_CONTENDED_LOCK;
1773
1774 return rc; 1728 return rc;
1775} 1729}
1776 1730
diff --git a/mm/internal.h b/mm/internal.h
index 28932cd6a195..1501304f87a4 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -185,10 +185,7 @@ struct compact_control {
185 const unsigned int alloc_flags; /* alloc flags of a direct compactor */ 185 const unsigned int alloc_flags; /* alloc flags of a direct compactor */
186 const int classzone_idx; /* zone index of a direct compactor */ 186 const int classzone_idx; /* zone index of a direct compactor */
187 struct zone *zone; 187 struct zone *zone;
188 int contended; /* Signal need_sched() or lock 188 bool contended; /* Signal lock or sched contention */
189 * contention detected during
190 * compaction
191 */
192}; 189};
193 190
194unsigned long 191unsigned long
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 26c6fe74f5c5..ea759b935360 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3099,14 +3099,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
3099 enum compact_priority prio, enum compact_result *compact_result) 3099 enum compact_priority prio, enum compact_result *compact_result)
3100{ 3100{
3101 struct page *page; 3101 struct page *page;
3102 int contended_compaction;
3103 3102
3104 if (!order) 3103 if (!order)
3105 return NULL; 3104 return NULL;
3106 3105
3107 current->flags |= PF_MEMALLOC; 3106 current->flags |= PF_MEMALLOC;
3108 *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, 3107 *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
3109 prio, &contended_compaction); 3108 prio);
3110 current->flags &= ~PF_MEMALLOC; 3109 current->flags &= ~PF_MEMALLOC;
3111 3110
3112 if (*compact_result <= COMPACT_INACTIVE) 3111 if (*compact_result <= COMPACT_INACTIVE)
@@ -3135,24 +3134,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
3135 */ 3134 */
3136 count_vm_event(COMPACTFAIL); 3135 count_vm_event(COMPACTFAIL);
3137 3136
3138 /*
3139 * In all zones where compaction was attempted (and not
3140 * deferred or skipped), lock contention has been detected.
3141 * For THP allocation we do not want to disrupt the others
3142 * so we fallback to base pages instead.
3143 */
3144 if (contended_compaction == COMPACT_CONTENDED_LOCK)
3145 *compact_result = COMPACT_CONTENDED;
3146
3147 /*
3148 * If compaction was aborted due to need_resched(), we do not
3149 * want to further increase allocation latency, unless it is
3150 * khugepaged trying to collapse.
3151 */
3152 if (contended_compaction == COMPACT_CONTENDED_SCHED
3153 && !(current->flags & PF_KTHREAD))
3154 *compact_result = COMPACT_CONTENDED;
3155
3156 cond_resched(); 3137 cond_resched();
3157 3138
3158 return NULL; 3139 return NULL;
@@ -3548,13 +3529,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3548 goto nopage; 3529 goto nopage;
3549 3530
3550 /* 3531 /*
3551 * Compaction is contended so rather back off than cause
3552 * excessive stalls.
3553 */
3554 if (compact_result == COMPACT_CONTENDED)
3555 goto nopage;
3556
3557 /*
3558 * Looks like reclaim/compaction is worth trying, but 3532 * Looks like reclaim/compaction is worth trying, but
3559 * sync compaction could be very expensive, so keep 3533 * sync compaction could be very expensive, so keep
3560 * using async compaction. 3534 * using async compaction.