aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2014-12-10 18:43:22 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-10 20:41:06 -0500
commitebff398017c69a3810bcbc5200ba224d5ccaa207 (patch)
tree6c63381aa6317eaf8dd4d929195588cf83a9a2c8
parent1da58ee2a0279a1b0afd3248396de5659b8cf95b (diff)
mm, compaction: pass classzone_idx and alloc_flags to watermark checking
Compaction relies on zone watermark checks for decisions such as if it's worth to start compacting in compaction_suitable() or whether compaction should stop in compact_finished(). The watermark checks take classzone_idx and alloc_flags parameters, which are related to the memory allocation request. But from the context of compaction they are currently passed as 0, including the direct compaction which is invoked to satisfy the allocation request, and could therefore know the proper values. The lack of proper values can lead to mismatch between decisions taken during compaction and decisions related to the allocation request. Lack of proper classzone_idx value means that lowmem_reserve is not taken into account. This has manifested (during recent changes to deferred compaction) when DMA zone was used as fallback for preferred Normal zone. compaction_suitable() without proper classzone_idx would think that the watermarks are already satisfied, but watermark check in get_page_from_freelist() would fail. Because of this problem, deferring compaction has extra complexity that can be removed in the following patch. The issue (not confirmed in practice) with missing alloc_flags is opposite in nature. For allocations that include ALLOC_HIGH, ALLOC_HIGHER or ALLOC_CMA in alloc_flags (the last includes all MOVABLE allocations on CMA-enabled systems) the watermark checking in compaction with 0 passed will be stricter than in get_page_from_freelist(). In these cases compaction might be running for a longer time than is really needed. Another issue compaction_suitable() is that the check for "does the zone need compaction at all?" comes only after the check "does the zone have enough free free pages to succeed compaction". The latter considers extra pages for migration and can therefore in some situations fail and return COMPACT_SKIPPED, although the high-order allocation would succeed and we should return COMPACT_PARTIAL. This patch fixes these problems by adding alloc_flags and classzone_idx to struct compact_control and related functions involved in direct compaction and watermark checking. Where possible, all other callers of compaction_suitable() pass proper values where those are known. This is currently limited to classzone_idx, which is sometimes known in kswapd context. However, the direct reclaim callers should_continue_reclaim() and compaction_ready() do not currently know the proper values, so the coordination between reclaim and compaction may still not be as accurate as it could. This can be fixed later, if it's shown to be an issue. Additionaly the checks in compact_suitable() are reordered to address the second issue described above. The effect of this patch should be slightly better high-order allocation success rates and/or less compaction overhead, depending on the type of allocations and presence of CMA. It allows simplifying deferred compaction code in a followup patch. When testing with stress-highalloc, there was some slight improvement (which might be just due to variance) in success rates of non-THP-like allocations. Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Cc: Minchan Kim <minchan@kernel.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Michal Nazarewicz <mina86@mina86.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Christoph Lameter <cl@linux.com> Acked-by: Rik van Riel <riel@redhat.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/compaction.h8
-rw-r--r--mm/compaction.c48
-rw-r--r--mm/internal.h2
-rw-r--r--mm/page_alloc.c1
-rw-r--r--mm/vmscan.c12
5 files changed, 42 insertions, 29 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 60bdf8dc02a3..d896765a15b0 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -33,10 +33,12 @@ extern int fragmentation_index(struct zone *zone, unsigned int order);
33extern unsigned long try_to_compact_pages(struct zonelist *zonelist, 33extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
34 int order, gfp_t gfp_mask, nodemask_t *mask, 34 int order, gfp_t gfp_mask, nodemask_t *mask,
35 enum migrate_mode mode, int *contended, 35 enum migrate_mode mode, int *contended,
36 int alloc_flags, int classzone_idx,
36 struct zone **candidate_zone); 37 struct zone **candidate_zone);
37extern void compact_pgdat(pg_data_t *pgdat, int order); 38extern void compact_pgdat(pg_data_t *pgdat, int order);
38extern void reset_isolation_suitable(pg_data_t *pgdat); 39extern void reset_isolation_suitable(pg_data_t *pgdat);
39extern unsigned long compaction_suitable(struct zone *zone, int order); 40extern unsigned long compaction_suitable(struct zone *zone, int order,
41 int alloc_flags, int classzone_idx);
40 42
41/* Do not skip compaction more than 64 times */ 43/* Do not skip compaction more than 64 times */
42#define COMPACT_MAX_DEFER_SHIFT 6 44#define COMPACT_MAX_DEFER_SHIFT 6
@@ -103,6 +105,7 @@ static inline bool compaction_restarting(struct zone *zone, int order)
103static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, 105static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
104 int order, gfp_t gfp_mask, nodemask_t *nodemask, 106 int order, gfp_t gfp_mask, nodemask_t *nodemask,
105 enum migrate_mode mode, int *contended, 107 enum migrate_mode mode, int *contended,
108 int alloc_flags, int classzone_idx,
106 struct zone **candidate_zone) 109 struct zone **candidate_zone)
107{ 110{
108 return COMPACT_CONTINUE; 111 return COMPACT_CONTINUE;
@@ -116,7 +119,8 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat)
116{ 119{
117} 120}
118 121
119static inline unsigned long compaction_suitable(struct zone *zone, int order) 122static inline unsigned long compaction_suitable(struct zone *zone, int order,
123 int alloc_flags, int classzone_idx)
120{ 124{
121 return COMPACT_SKIPPED; 125 return COMPACT_SKIPPED;
122} 126}
diff --git a/mm/compaction.c b/mm/compaction.c
index f9792ba3537c..1fc6736815e0 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1086,9 +1086,9 @@ static int compact_finished(struct zone *zone, struct compact_control *cc,
1086 1086
1087 /* Compaction run is not finished if the watermark is not met */ 1087 /* Compaction run is not finished if the watermark is not met */
1088 watermark = low_wmark_pages(zone); 1088 watermark = low_wmark_pages(zone);
1089 watermark += (1 << cc->order);
1090 1089
1091 if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) 1090 if (!zone_watermark_ok(zone, cc->order, watermark, cc->classzone_idx,
1091 cc->alloc_flags))
1092 return COMPACT_CONTINUE; 1092 return COMPACT_CONTINUE;
1093 1093
1094 /* Direct compactor: Is a suitable page free? */ 1094 /* Direct compactor: Is a suitable page free? */
@@ -1114,7 +1114,8 @@ static int compact_finished(struct zone *zone, struct compact_control *cc,
1114 * COMPACT_PARTIAL - If the allocation would succeed without compaction 1114 * COMPACT_PARTIAL - If the allocation would succeed without compaction
1115 * COMPACT_CONTINUE - If compaction should run now 1115 * COMPACT_CONTINUE - If compaction should run now
1116 */ 1116 */
1117unsigned long compaction_suitable(struct zone *zone, int order) 1117unsigned long compaction_suitable(struct zone *zone, int order,
1118 int alloc_flags, int classzone_idx)
1118{ 1119{
1119 int fragindex; 1120 int fragindex;
1120 unsigned long watermark; 1121 unsigned long watermark;
@@ -1126,21 +1127,30 @@ unsigned long compaction_suitable(struct zone *zone, int order)
1126 if (order == -1) 1127 if (order == -1)
1127 return COMPACT_CONTINUE; 1128 return COMPACT_CONTINUE;
1128 1129
1130 watermark = low_wmark_pages(zone);
1131 /*
1132 * If watermarks for high-order allocation are already met, there
1133 * should be no need for compaction at all.
1134 */
1135 if (zone_watermark_ok(zone, order, watermark, classzone_idx,
1136 alloc_flags))
1137 return COMPACT_PARTIAL;
1138
1129 /* 1139 /*
1130 * Watermarks for order-0 must be met for compaction. Note the 2UL. 1140 * Watermarks for order-0 must be met for compaction. Note the 2UL.
1131 * This is because during migration, copies of pages need to be 1141 * This is because during migration, copies of pages need to be
1132 * allocated and for a short time, the footprint is higher 1142 * allocated and for a short time, the footprint is higher
1133 */ 1143 */
1134 watermark = low_wmark_pages(zone) + (2UL << order); 1144 watermark += (2UL << order);
1135 if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) 1145 if (!zone_watermark_ok(zone, 0, watermark, classzone_idx, alloc_flags))
1136 return COMPACT_SKIPPED; 1146 return COMPACT_SKIPPED;
1137 1147
1138 /* 1148 /*
1139 * fragmentation index determines if allocation failures are due to 1149 * fragmentation index determines if allocation failures are due to
1140 * low memory or external fragmentation 1150 * low memory or external fragmentation
1141 * 1151 *
1142 * index of -1000 implies allocations might succeed depending on 1152 * index of -1000 would imply allocations might succeed depending on
1143 * watermarks 1153 * watermarks, but we already failed the high-order watermark check
1144 * index towards 0 implies failure is due to lack of memory 1154 * index towards 0 implies failure is due to lack of memory
1145 * index towards 1000 implies failure is due to fragmentation 1155 * index towards 1000 implies failure is due to fragmentation
1146 * 1156 *
@@ -1150,10 +1160,6 @@ unsigned long compaction_suitable(struct zone *zone, int order)
1150 if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold) 1160 if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
1151 return COMPACT_SKIPPED; 1161 return COMPACT_SKIPPED;
1152 1162
1153 if (fragindex == -1000 && zone_watermark_ok(zone, order, watermark,
1154 0, 0))
1155 return COMPACT_PARTIAL;
1156
1157 return COMPACT_CONTINUE; 1163 return COMPACT_CONTINUE;
1158} 1164}
1159 1165
@@ -1165,7 +1171,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
1165 const int migratetype = gfpflags_to_migratetype(cc->gfp_mask); 1171 const int migratetype = gfpflags_to_migratetype(cc->gfp_mask);
1166 const bool sync = cc->mode != MIGRATE_ASYNC; 1172 const bool sync = cc->mode != MIGRATE_ASYNC;
1167 1173
1168 ret = compaction_suitable(zone, cc->order); 1174 ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
1175 cc->classzone_idx);
1169 switch (ret) { 1176 switch (ret) {
1170 case COMPACT_PARTIAL: 1177 case COMPACT_PARTIAL:
1171 case COMPACT_SKIPPED: 1178 case COMPACT_SKIPPED:
@@ -1254,7 +1261,8 @@ out:
1254} 1261}
1255 1262
1256static unsigned long compact_zone_order(struct zone *zone, int order, 1263static unsigned long compact_zone_order(struct zone *zone, int order,
1257 gfp_t gfp_mask, enum migrate_mode mode, int *contended) 1264 gfp_t gfp_mask, enum migrate_mode mode, int *contended,
1265 int alloc_flags, int classzone_idx)
1258{ 1266{
1259 unsigned long ret; 1267 unsigned long ret;
1260 struct compact_control cc = { 1268 struct compact_control cc = {
@@ -1264,6 +1272,8 @@ static unsigned long compact_zone_order(struct zone *zone, int order,
1264 .gfp_mask = gfp_mask, 1272 .gfp_mask = gfp_mask,
1265 .zone = zone, 1273 .zone = zone,
1266 .mode = mode, 1274 .mode = mode,
1275 .alloc_flags = alloc_flags,
1276 .classzone_idx = classzone_idx,
1267 }; 1277 };
1268 INIT_LIST_HEAD(&cc.freepages); 1278 INIT_LIST_HEAD(&cc.freepages);
1269 INIT_LIST_HEAD(&cc.migratepages); 1279 INIT_LIST_HEAD(&cc.migratepages);
@@ -1295,6 +1305,7 @@ int sysctl_extfrag_threshold = 500;
1295unsigned long try_to_compact_pages(struct zonelist *zonelist, 1305unsigned long try_to_compact_pages(struct zonelist *zonelist,
1296 int order, gfp_t gfp_mask, nodemask_t *nodemask, 1306 int order, gfp_t gfp_mask, nodemask_t *nodemask,
1297 enum migrate_mode mode, int *contended, 1307 enum migrate_mode mode, int *contended,
1308 int alloc_flags, int classzone_idx,
1298 struct zone **candidate_zone) 1309 struct zone **candidate_zone)
1299{ 1310{
1300 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 1311 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
@@ -1303,7 +1314,6 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
1303 struct zoneref *z; 1314 struct zoneref *z;
1304 struct zone *zone; 1315 struct zone *zone;
1305 int rc = COMPACT_DEFERRED; 1316 int rc = COMPACT_DEFERRED;
1306 int alloc_flags = 0;
1307 int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */ 1317 int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */
1308 1318
1309 *contended = COMPACT_CONTENDED_NONE; 1319 *contended = COMPACT_CONTENDED_NONE;
@@ -1312,10 +1322,6 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
1312 if (!order || !may_enter_fs || !may_perform_io) 1322 if (!order || !may_enter_fs || !may_perform_io)
1313 return COMPACT_SKIPPED; 1323 return COMPACT_SKIPPED;
1314 1324
1315#ifdef CONFIG_CMA
1316 if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
1317 alloc_flags |= ALLOC_CMA;
1318#endif
1319 /* Compact each zone in the list */ 1325 /* Compact each zone in the list */
1320 for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, 1326 for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
1321 nodemask) { 1327 nodemask) {
@@ -1326,7 +1332,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
1326 continue; 1332 continue;
1327 1333
1328 status = compact_zone_order(zone, order, gfp_mask, mode, 1334 status = compact_zone_order(zone, order, gfp_mask, mode,
1329 &zone_contended); 1335 &zone_contended, alloc_flags, classzone_idx);
1330 rc = max(status, rc); 1336 rc = max(status, rc);
1331 /* 1337 /*
1332 * It takes at least one zone that wasn't lock contended 1338 * It takes at least one zone that wasn't lock contended
@@ -1335,8 +1341,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
1335 all_zones_contended &= zone_contended; 1341 all_zones_contended &= zone_contended;
1336 1342
1337 /* If a normal allocation would succeed, stop compacting */ 1343 /* If a normal allocation would succeed, stop compacting */
1338 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 1344 if (zone_watermark_ok(zone, order, low_wmark_pages(zone),
1339 alloc_flags)) { 1345 classzone_idx, alloc_flags)) {
1340 *candidate_zone = zone; 1346 *candidate_zone = zone;
1341 /* 1347 /*
1342 * We think the allocation will succeed in this zone, 1348 * We think the allocation will succeed in this zone,
diff --git a/mm/internal.h b/mm/internal.h
index a4f90ba7068e..b643938fcf12 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -168,6 +168,8 @@ struct compact_control {
168 168
169 int order; /* order a direct compactor needs */ 169 int order; /* order a direct compactor needs */
170 const gfp_t gfp_mask; /* gfp mask of a direct compactor */ 170 const gfp_t gfp_mask; /* gfp mask of a direct compactor */
171 const int alloc_flags; /* alloc flags of a direct compactor */
172 const int classzone_idx; /* zone index of a direct compactor */
171 struct zone *zone; 173 struct zone *zone;
172 int contended; /* Signal need_sched() or lock 174 int contended; /* Signal need_sched() or lock
173 * contention detected during 175 * contention detected during
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b7c18f094697..e32121fa2ba9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2341,6 +2341,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2341 compact_result = try_to_compact_pages(zonelist, order, gfp_mask, 2341 compact_result = try_to_compact_pages(zonelist, order, gfp_mask,
2342 nodemask, mode, 2342 nodemask, mode,
2343 contended_compaction, 2343 contended_compaction,
2344 alloc_flags, classzone_idx,
2344 &last_compact_zone); 2345 &last_compact_zone);
2345 current->flags &= ~PF_MEMALLOC; 2346 current->flags &= ~PF_MEMALLOC;
2346 2347
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 53157e157061..4636d9e822c1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2249,7 +2249,7 @@ static inline bool should_continue_reclaim(struct zone *zone,
2249 return true; 2249 return true;
2250 2250
2251 /* If compaction would go ahead or the allocation would succeed, stop */ 2251 /* If compaction would go ahead or the allocation would succeed, stop */
2252 switch (compaction_suitable(zone, sc->order)) { 2252 switch (compaction_suitable(zone, sc->order, 0, 0)) {
2253 case COMPACT_PARTIAL: 2253 case COMPACT_PARTIAL:
2254 case COMPACT_CONTINUE: 2254 case COMPACT_CONTINUE:
2255 return false; 2255 return false;
@@ -2346,7 +2346,7 @@ static inline bool compaction_ready(struct zone *zone, int order)
2346 * If compaction is not ready to start and allocation is not likely 2346 * If compaction is not ready to start and allocation is not likely
2347 * to succeed without it, then keep reclaiming. 2347 * to succeed without it, then keep reclaiming.
2348 */ 2348 */
2349 if (compaction_suitable(zone, order) == COMPACT_SKIPPED) 2349 if (compaction_suitable(zone, order, 0, 0) == COMPACT_SKIPPED)
2350 return false; 2350 return false;
2351 2351
2352 return watermark_ok; 2352 return watermark_ok;
@@ -2824,8 +2824,8 @@ static bool zone_balanced(struct zone *zone, int order,
2824 balance_gap, classzone_idx, 0)) 2824 balance_gap, classzone_idx, 0))
2825 return false; 2825 return false;
2826 2826
2827 if (IS_ENABLED(CONFIG_COMPACTION) && order && 2827 if (IS_ENABLED(CONFIG_COMPACTION) && order && compaction_suitable(zone,
2828 compaction_suitable(zone, order) == COMPACT_SKIPPED) 2828 order, 0, classzone_idx) == COMPACT_SKIPPED)
2829 return false; 2829 return false;
2830 2830
2831 return true; 2831 return true;
@@ -2952,8 +2952,8 @@ static bool kswapd_shrink_zone(struct zone *zone,
2952 * from memory. Do not reclaim more than needed for compaction. 2952 * from memory. Do not reclaim more than needed for compaction.
2953 */ 2953 */
2954 if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && 2954 if (IS_ENABLED(CONFIG_COMPACTION) && sc->order &&
2955 compaction_suitable(zone, sc->order) != 2955 compaction_suitable(zone, sc->order, 0, classzone_idx)
2956 COMPACT_SKIPPED) 2956 != COMPACT_SKIPPED)
2957 testorder = 0; 2957 testorder = 0;
2958 2958
2959 /* 2959 /*