aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2014-10-09 18:27:14 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-09 22:25:54 -0400
commit1f9efdef4f3f1d2a073e524113fd0038af636f2b (patch)
tree0de1b4fcc02a95df56e7c6847d1b1592d1b09a13 /mm
parent7d49d8868336bbf4f68714d8282ca5fd65e387ed (diff)
mm, compaction: khugepaged should not give up due to need_resched()
Async compaction aborts when it detects zone lock contention or need_resched() is true. David Rientjes has reported that in practice, most direct async compactions for THP allocation abort due to need_resched(). This means that a second direct compaction is never attempted, which might be OK for a page fault, but khugepaged is intended to attempt a sync compaction in such case and in these cases it won't. This patch replaces "bool contended" in compact_control with an int that distinguishes between aborting due to need_resched() and aborting due to lock contention. This allows propagating the abort through all compaction functions as before, but passing the abort reason up to __alloc_pages_slowpath() which decides when to continue with direct reclaim and another compaction attempt. Another problem is that try_to_compact_pages() did not act upon the reported contention (both need_resched() or lock contention) immediately and would proceed with another zone from the zonelist. When need_resched() is true, that means initializing another zone compaction, only to check again need_resched() in isolate_migratepages() and aborting. For zone lock contention, the unintended consequence is that the lock contended status reported back to the allocator is detrmined from the last zone where compaction was attempted, which is rather arbitrary. This patch fixes the problem in the following way: - async compaction of a zone aborting due to need_resched() or fatal signal pending means that further zones should not be tried. We report COMPACT_CONTENDED_SCHED to the allocator. - aborting zone compaction due to lock contention means we can still try another zone, since it has different set of locks. We report back COMPACT_CONTENDED_LOCK only if *all* zones where compaction was attempted, it was aborted due to lock contention. As a result of these fixes, khugepaged will proceed with second sync compaction as intended, when the preceding async compaction aborted due to need_resched(). Page fault compactions aborting due to need_resched() will spare some cycles previously wasted by initializing another zone compaction only to abort again. Lock contention will be reported only when compaction in all zones aborted due to lock contention, and therefore it's not a good idea to try again after reclaim. In stress-highalloc from mmtests configured to use __GFP_NO_KSWAPD, this has improved number of THP collapse allocations by 10%, which shows positive effect on khugepaged. The benchmark's success rates are unchanged as it is not recognized as khugepaged. Numbers of compact_stall and compact_fail events have however decreased by 20%, with compact_success still a bit improved, which is good. With benchmark configured not to use __GFP_NO_KSWAPD, there is 6% improvement in THP collapse allocations, and only slight improvement in stalls and failures. [akpm@linux-foundation.org: fix warnings] Reported-by: David Rientjes <rientjes@google.com> Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Cc: Minchan Kim <minchan@kernel.org> Acked-by: Mel Gorman <mgorman@suse.de> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Michal Nazarewicz <mina86@mina86.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Christoph Lameter <cl@linux.com> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/compaction.c87
-rw-r--r--mm/internal.h4
-rw-r--r--mm/page_alloc.c45
3 files changed, 111 insertions, 25 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 1067c07cb33d..26bb20ef853d 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -223,9 +223,21 @@ static void update_pageblock_skip(struct compact_control *cc,
223} 223}
224#endif /* CONFIG_COMPACTION */ 224#endif /* CONFIG_COMPACTION */
225 225
226static inline bool should_release_lock(spinlock_t *lock) 226static int should_release_lock(spinlock_t *lock)
227{ 227{
228 return need_resched() || spin_is_contended(lock); 228 /*
229 * Sched contention has higher priority here as we may potentially
230 * have to abort whole compaction ASAP. Returning with lock contention
231 * means we will try another zone, and further decisions are
232 * influenced only when all zones are lock contended. That means
233 * potentially missing a lock contention is less critical.
234 */
235 if (need_resched())
236 return COMPACT_CONTENDED_SCHED;
237 else if (spin_is_contended(lock))
238 return COMPACT_CONTENDED_LOCK;
239
240 return COMPACT_CONTENDED_NONE;
229} 241}
230 242
231/* 243/*
@@ -240,7 +252,9 @@ static inline bool should_release_lock(spinlock_t *lock)
240static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags, 252static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
241 bool locked, struct compact_control *cc) 253 bool locked, struct compact_control *cc)
242{ 254{
243 if (should_release_lock(lock)) { 255 int contended = should_release_lock(lock);
256
257 if (contended) {
244 if (locked) { 258 if (locked) {
245 spin_unlock_irqrestore(lock, *flags); 259 spin_unlock_irqrestore(lock, *flags);
246 locked = false; 260 locked = false;
@@ -248,7 +262,7 @@ static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
248 262
249 /* async aborts if taking too long or contended */ 263 /* async aborts if taking too long or contended */
250 if (cc->mode == MIGRATE_ASYNC) { 264 if (cc->mode == MIGRATE_ASYNC) {
251 cc->contended = true; 265 cc->contended = contended;
252 return false; 266 return false;
253 } 267 }
254 268
@@ -274,7 +288,7 @@ static inline bool compact_should_abort(struct compact_control *cc)
274 /* async compaction aborts if contended */ 288 /* async compaction aborts if contended */
275 if (need_resched()) { 289 if (need_resched()) {
276 if (cc->mode == MIGRATE_ASYNC) { 290 if (cc->mode == MIGRATE_ASYNC) {
277 cc->contended = true; 291 cc->contended = COMPACT_CONTENDED_SCHED;
278 return true; 292 return true;
279 } 293 }
280 294
@@ -1140,7 +1154,7 @@ out:
1140} 1154}
1141 1155
1142static unsigned long compact_zone_order(struct zone *zone, int order, 1156static unsigned long compact_zone_order(struct zone *zone, int order,
1143 gfp_t gfp_mask, enum migrate_mode mode, bool *contended) 1157 gfp_t gfp_mask, enum migrate_mode mode, int *contended)
1144{ 1158{
1145 unsigned long ret; 1159 unsigned long ret;
1146 struct compact_control cc = { 1160 struct compact_control cc = {
@@ -1172,14 +1186,15 @@ int sysctl_extfrag_threshold = 500;
1172 * @gfp_mask: The GFP mask of the current allocation 1186 * @gfp_mask: The GFP mask of the current allocation
1173 * @nodemask: The allowed nodes to allocate from 1187 * @nodemask: The allowed nodes to allocate from
1174 * @mode: The migration mode for async, sync light, or sync migration 1188 * @mode: The migration mode for async, sync light, or sync migration
1175 * @contended: Return value that is true if compaction was aborted due to lock contention 1189 * @contended: Return value that determines if compaction was aborted due to
1190 * need_resched() or lock contention
1176 * @candidate_zone: Return the zone where we think allocation should succeed 1191 * @candidate_zone: Return the zone where we think allocation should succeed
1177 * 1192 *
1178 * This is the main entry point for direct page compaction. 1193 * This is the main entry point for direct page compaction.
1179 */ 1194 */
1180unsigned long try_to_compact_pages(struct zonelist *zonelist, 1195unsigned long try_to_compact_pages(struct zonelist *zonelist,
1181 int order, gfp_t gfp_mask, nodemask_t *nodemask, 1196 int order, gfp_t gfp_mask, nodemask_t *nodemask,
1182 enum migrate_mode mode, bool *contended, 1197 enum migrate_mode mode, int *contended,
1183 struct zone **candidate_zone) 1198 struct zone **candidate_zone)
1184{ 1199{
1185 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 1200 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
@@ -1189,6 +1204,9 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
1189 struct zone *zone; 1204 struct zone *zone;
1190 int rc = COMPACT_DEFERRED; 1205 int rc = COMPACT_DEFERRED;
1191 int alloc_flags = 0; 1206 int alloc_flags = 0;
1207 int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */
1208
1209 *contended = COMPACT_CONTENDED_NONE;
1192 1210
1193 /* Check if the GFP flags allow compaction */ 1211 /* Check if the GFP flags allow compaction */
1194 if (!order || !may_enter_fs || !may_perform_io) 1212 if (!order || !may_enter_fs || !may_perform_io)
@@ -1202,13 +1220,19 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
1202 for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, 1220 for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
1203 nodemask) { 1221 nodemask) {
1204 int status; 1222 int status;
1223 int zone_contended;
1205 1224
1206 if (compaction_deferred(zone, order)) 1225 if (compaction_deferred(zone, order))
1207 continue; 1226 continue;
1208 1227
1209 status = compact_zone_order(zone, order, gfp_mask, mode, 1228 status = compact_zone_order(zone, order, gfp_mask, mode,
1210 contended); 1229 &zone_contended);
1211 rc = max(status, rc); 1230 rc = max(status, rc);
1231 /*
1232 * It takes at least one zone that wasn't lock contended
1233 * to clear all_zones_contended.
1234 */
1235 all_zones_contended &= zone_contended;
1212 1236
1213 /* If a normal allocation would succeed, stop compacting */ 1237 /* If a normal allocation would succeed, stop compacting */
1214 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 1238 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0,
@@ -1221,8 +1245,21 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
1221 * succeeds in this zone. 1245 * succeeds in this zone.
1222 */ 1246 */
1223 compaction_defer_reset(zone, order, false); 1247 compaction_defer_reset(zone, order, false);
1224 break; 1248 /*
1225 } else if (mode != MIGRATE_ASYNC) { 1249 * It is possible that async compaction aborted due to
1250 * need_resched() and the watermarks were ok thanks to
1251 * somebody else freeing memory. The allocation can
1252 * however still fail so we better signal the
1253 * need_resched() contention anyway (this will not
1254 * prevent the allocation attempt).
1255 */
1256 if (zone_contended == COMPACT_CONTENDED_SCHED)
1257 *contended = COMPACT_CONTENDED_SCHED;
1258
1259 goto break_loop;
1260 }
1261
1262 if (mode != MIGRATE_ASYNC) {
1226 /* 1263 /*
1227 * We think that allocation won't succeed in this zone 1264 * We think that allocation won't succeed in this zone
1228 * so we defer compaction there. If it ends up 1265 * so we defer compaction there. If it ends up
@@ -1230,8 +1267,36 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
1230 */ 1267 */
1231 defer_compaction(zone, order); 1268 defer_compaction(zone, order);
1232 } 1269 }
1270
1271 /*
1272 * We might have stopped compacting due to need_resched() in
1273 * async compaction, or due to a fatal signal detected. In that
1274 * case do not try further zones and signal need_resched()
1275 * contention.
1276 */
1277 if ((zone_contended == COMPACT_CONTENDED_SCHED)
1278 || fatal_signal_pending(current)) {
1279 *contended = COMPACT_CONTENDED_SCHED;
1280 goto break_loop;
1281 }
1282
1283 continue;
1284break_loop:
1285 /*
1286 * We might not have tried all the zones, so be conservative
1287 * and assume they are not all lock contended.
1288 */
1289 all_zones_contended = 0;
1290 break;
1233 } 1291 }
1234 1292
1293 /*
1294 * If at least one zone wasn't deferred or skipped, we report if all
1295 * zones that were tried were lock contended.
1296 */
1297 if (rc > COMPACT_SKIPPED && all_zones_contended)
1298 *contended = COMPACT_CONTENDED_LOCK;
1299
1235 return rc; 1300 return rc;
1236} 1301}
1237 1302
diff --git a/mm/internal.h b/mm/internal.h
index 5a0738fa649c..4c1d604c396c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -144,8 +144,8 @@ struct compact_control {
144 int order; /* order a direct compactor needs */ 144 int order; /* order a direct compactor needs */
145 int migratetype; /* MOVABLE, RECLAIMABLE etc */ 145 int migratetype; /* MOVABLE, RECLAIMABLE etc */
146 struct zone *zone; 146 struct zone *zone;
147 bool contended; /* True if a lock was contended, or 147 int contended; /* Signal need_sched() or lock
148 * need_resched() true during async 148 * contention detected during
149 * compaction 149 * compaction
150 */ 150 */
151}; 151};
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dfbf54b51649..313338d74095 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2297,7 +2297,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2297 struct zonelist *zonelist, enum zone_type high_zoneidx, 2297 struct zonelist *zonelist, enum zone_type high_zoneidx,
2298 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, 2298 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
2299 int classzone_idx, int migratetype, enum migrate_mode mode, 2299 int classzone_idx, int migratetype, enum migrate_mode mode,
2300 bool *contended_compaction, bool *deferred_compaction) 2300 int *contended_compaction, bool *deferred_compaction)
2301{ 2301{
2302 struct zone *last_compact_zone = NULL; 2302 struct zone *last_compact_zone = NULL;
2303 unsigned long compact_result; 2303 unsigned long compact_result;
@@ -2371,7 +2371,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2371 struct zonelist *zonelist, enum zone_type high_zoneidx, 2371 struct zonelist *zonelist, enum zone_type high_zoneidx,
2372 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, 2372 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
2373 int classzone_idx, int migratetype, enum migrate_mode mode, 2373 int classzone_idx, int migratetype, enum migrate_mode mode,
2374 bool *contended_compaction, bool *deferred_compaction) 2374 int *contended_compaction, bool *deferred_compaction)
2375{ 2375{
2376 return NULL; 2376 return NULL;
2377} 2377}
@@ -2547,7 +2547,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2547 unsigned long did_some_progress; 2547 unsigned long did_some_progress;
2548 enum migrate_mode migration_mode = MIGRATE_ASYNC; 2548 enum migrate_mode migration_mode = MIGRATE_ASYNC;
2549 bool deferred_compaction = false; 2549 bool deferred_compaction = false;
2550 bool contended_compaction = false; 2550 int contended_compaction = COMPACT_CONTENDED_NONE;
2551 2551
2552 /* 2552 /*
2553 * In the slowpath, we sanity check order to avoid ever trying to 2553 * In the slowpath, we sanity check order to avoid ever trying to
@@ -2651,15 +2651,36 @@ rebalance:
2651 if (page) 2651 if (page)
2652 goto got_pg; 2652 goto got_pg;
2653 2653
2654 /* 2654 /* Checks for THP-specific high-order allocations */
2655 * If compaction is deferred for high-order allocations, it is because 2655 if ((gfp_mask & GFP_TRANSHUGE) == GFP_TRANSHUGE) {
2656 * sync compaction recently failed. In this is the case and the caller 2656 /*
2657 * requested a movable allocation that does not heavily disrupt the 2657 * If compaction is deferred for high-order allocations, it is
2658 * system then fail the allocation instead of entering direct reclaim. 2658 * because sync compaction recently failed. If this is the case
2659 */ 2659 * and the caller requested a THP allocation, we do not want
2660 if ((deferred_compaction || contended_compaction) && 2660 * to heavily disrupt the system, so we fail the allocation
2661 (gfp_mask & __GFP_NO_KSWAPD)) 2661 * instead of entering direct reclaim.
2662 goto nopage; 2662 */
2663 if (deferred_compaction)
2664 goto nopage;
2665
2666 /*
2667 * In all zones where compaction was attempted (and not
2668 * deferred or skipped), lock contention has been detected.
2669 * For THP allocation we do not want to disrupt the others
2670 * so we fallback to base pages instead.
2671 */
2672 if (contended_compaction == COMPACT_CONTENDED_LOCK)
2673 goto nopage;
2674
2675 /*
2676 * If compaction was aborted due to need_resched(), we do not
2677 * want to further increase allocation latency, unless it is
2678 * khugepaged trying to collapse.
2679 */
2680 if (contended_compaction == COMPACT_CONTENDED_SCHED
2681 && !(current->flags & PF_KTHREAD))
2682 goto nopage;
2683 }
2663 2684
2664 /* 2685 /*
2665 * It can become very expensive to allocate transparent hugepages at 2686 * It can become very expensive to allocate transparent hugepages at