summaryrefslogtreecommitdiffstats
path: root/mm/compaction.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2019-03-05 18:45:01 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-06 00:07:16 -0500
commit5a811889de10f1ebb8e03a2744be006e909c405c (patch)
tree77a03cb8068270555d00899fca9e09ca1b1c8ddc /mm/compaction.c
parente380bebe4771548df9bece8b7ad9dab07d9158a6 (diff)
mm, compaction: use free lists to quickly locate a migration target
Similar to the migration scanner, this patch uses the free lists to quickly locate a migration target. The search is different in that lower orders will be searched for a suitable high PFN if necessary but the search is still bound. This is justified on the grounds that the free scanner typically scans linearly much more than the migration scanner. If a free page is found, it is isolated and compaction continues if enough pages were isolated. For SYNC* scanning, the full pageblock is scanned for any remaining free pages so that is can be marked for skipping in the near future. 1-socket thpfioscale 5.0.0-rc1 5.0.0-rc1 isolmig-v3r15 findfree-v3r16 Amean fault-both-3 3024.41 ( 0.00%) 3200.68 ( -5.83%) Amean fault-both-5 4749.30 ( 0.00%) 4847.75 ( -2.07%) Amean fault-both-7 6454.95 ( 0.00%) 6658.92 ( -3.16%) Amean fault-both-12 10324.83 ( 0.00%) 11077.62 ( -7.29%) Amean fault-both-18 12896.82 ( 0.00%) 12403.97 ( 3.82%) Amean fault-both-24 13470.60 ( 0.00%) 15607.10 * -15.86%* Amean fault-both-30 17143.99 ( 0.00%) 18752.27 ( -9.38%) Amean fault-both-32 17743.91 ( 0.00%) 21207.54 * -19.52%* The impact on latency is variable but the search is optimistic and sensitive to the exact system state. Success rates are similar but the major impact is to the rate of scanning 5.0.0-rc1 5.0.0-rc1 isolmig-v3r15 findfree-v3r16 Compaction migrate scanned 25646769 29507205 Compaction free scanned 201558184 100359571 The free scan rates are reduced by 50%. The 2-socket reductions for the free scanner are more dramatic which is a likely reflection that the machine has more memory. [dan.carpenter@oracle.com: fix static checker warning] [vbabka@suse.cz: correct number of pages scanned for lower orders] Link: http://lkml.kernel.org/r/20190118175136.31341-12-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Dan Carpenter <dan.carpenter@oracle.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: David Rientjes <rientjes@google.com> Cc: YueHaibing <yuehaibing@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/compaction.c')
-rw-r--r--mm/compaction.c218
1 files changed, 213 insertions, 5 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 097572e2ec5d..6d42ea126242 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1124,7 +1124,29 @@ static inline bool compact_scanners_met(struct compact_control *cc)
1124 <= (cc->migrate_pfn >> pageblock_order); 1124 <= (cc->migrate_pfn >> pageblock_order);
1125} 1125}
1126 1126
1127/* Reorder the free list to reduce repeated future searches */ 1127/*
1128 * Used when scanning for a suitable migration target which scans freelists
1129 * in reverse. Reorders the list such as the unscanned pages are scanned
1130 * first on the next iteration of the free scanner
1131 */
1132static void
1133move_freelist_head(struct list_head *freelist, struct page *freepage)
1134{
1135 LIST_HEAD(sublist);
1136
1137 if (!list_is_last(freelist, &freepage->lru)) {
1138 list_cut_before(&sublist, freelist, &freepage->lru);
1139 if (!list_empty(&sublist))
1140 list_splice_tail(&sublist, freelist);
1141 }
1142}
1143
1144/*
1145 * Similar to move_freelist_head except used by the migration scanner
1146 * when scanning forward. It's possible for these list operations to
1147 * move against each other if they search the free list exactly in
1148 * lockstep.
1149 */
1128static void 1150static void
1129move_freelist_tail(struct list_head *freelist, struct page *freepage) 1151move_freelist_tail(struct list_head *freelist, struct page *freepage)
1130{ 1152{
@@ -1137,6 +1159,186 @@ move_freelist_tail(struct list_head *freelist, struct page *freepage)
1137 } 1159 }
1138} 1160}
1139 1161
1162static void
1163fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated)
1164{
1165 unsigned long start_pfn, end_pfn;
1166 struct page *page = pfn_to_page(pfn);
1167
1168 /* Do not search around if there are enough pages already */
1169 if (cc->nr_freepages >= cc->nr_migratepages)
1170 return;
1171
1172 /* Minimise scanning during async compaction */
1173 if (cc->direct_compaction && cc->mode == MIGRATE_ASYNC)
1174 return;
1175
1176 /* Pageblock boundaries */
1177 start_pfn = pageblock_start_pfn(pfn);
1178 end_pfn = min(start_pfn + pageblock_nr_pages, zone_end_pfn(cc->zone));
1179
1180 /* Scan before */
1181 if (start_pfn != pfn) {
1182 isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, false);
1183 if (cc->nr_freepages >= cc->nr_migratepages)
1184 return;
1185 }
1186
1187 /* Scan after */
1188 start_pfn = pfn + nr_isolated;
1189 if (start_pfn != end_pfn)
1190 isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, false);
1191
1192 /* Skip this pageblock in the future as it's full or nearly full */
1193 if (cc->nr_freepages < cc->nr_migratepages)
1194 set_pageblock_skip(page);
1195}
1196
1197static unsigned long
1198fast_isolate_freepages(struct compact_control *cc)
1199{
1200 unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1);
1201 unsigned int nr_scanned = 0;
1202 unsigned long low_pfn, min_pfn, high_pfn = 0, highest = 0;
1203 unsigned long nr_isolated = 0;
1204 unsigned long distance;
1205 struct page *page = NULL;
1206 bool scan_start = false;
1207 int order;
1208
1209 /* Full compaction passes in a negative order */
1210 if (cc->order <= 0)
1211 return cc->free_pfn;
1212
1213 /*
1214 * If starting the scan, use a deeper search and use the highest
1215 * PFN found if a suitable one is not found.
1216 */
1217 if (cc->free_pfn == pageblock_start_pfn(zone_end_pfn(cc->zone) - 1)) {
1218 limit = pageblock_nr_pages >> 1;
1219 scan_start = true;
1220 }
1221
1222 /*
1223 * Preferred point is in the top quarter of the scan space but take
1224 * a pfn from the top half if the search is problematic.
1225 */
1226 distance = (cc->free_pfn - cc->migrate_pfn);
1227 low_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 2));
1228 min_pfn = pageblock_start_pfn(cc->free_pfn - (distance >> 1));
1229
1230 if (WARN_ON_ONCE(min_pfn > low_pfn))
1231 low_pfn = min_pfn;
1232
1233 for (order = cc->order - 1;
1234 order >= 0 && !page;
1235 order--) {
1236 struct free_area *area = &cc->zone->free_area[order];
1237 struct list_head *freelist;
1238 struct page *freepage;
1239 unsigned long flags;
1240 unsigned int order_scanned = 0;
1241
1242 if (!area->nr_free)
1243 continue;
1244
1245 spin_lock_irqsave(&cc->zone->lock, flags);
1246 freelist = &area->free_list[MIGRATE_MOVABLE];
1247 list_for_each_entry_reverse(freepage, freelist, lru) {
1248 unsigned long pfn;
1249
1250 order_scanned++;
1251 nr_scanned++;
1252 pfn = page_to_pfn(freepage);
1253
1254 if (pfn >= highest)
1255 highest = pageblock_start_pfn(pfn);
1256
1257 if (pfn >= low_pfn) {
1258 cc->fast_search_fail = 0;
1259 page = freepage;
1260 break;
1261 }
1262
1263 if (pfn >= min_pfn && pfn > high_pfn) {
1264 high_pfn = pfn;
1265
1266 /* Shorten the scan if a candidate is found */
1267 limit >>= 1;
1268 }
1269
1270 if (order_scanned >= limit)
1271 break;
1272 }
1273
1274 /* Use a minimum pfn if a preferred one was not found */
1275 if (!page && high_pfn) {
1276 page = pfn_to_page(high_pfn);
1277
1278 /* Update freepage for the list reorder below */
1279 freepage = page;
1280 }
1281
1282 /* Reorder to so a future search skips recent pages */
1283 move_freelist_head(freelist, freepage);
1284
1285 /* Isolate the page if available */
1286 if (page) {
1287 if (__isolate_free_page(page, order)) {
1288 set_page_private(page, order);
1289 nr_isolated = 1 << order;
1290 cc->nr_freepages += nr_isolated;
1291 list_add_tail(&page->lru, &cc->freepages);
1292 count_compact_events(COMPACTISOLATED, nr_isolated);
1293 } else {
1294 /* If isolation fails, abort the search */
1295 order = -1;
1296 page = NULL;
1297 }
1298 }
1299
1300 spin_unlock_irqrestore(&cc->zone->lock, flags);
1301
1302 /*
1303 * Smaller scan on next order so the total scan ig related
1304 * to freelist_scan_limit.
1305 */
1306 if (order_scanned >= limit)
1307 limit = min(1U, limit >> 1);
1308 }
1309
1310 if (!page) {
1311 cc->fast_search_fail++;
1312 if (scan_start) {
1313 /*
1314 * Use the highest PFN found above min. If one was
1315 * not found, be pessemistic for direct compaction
1316 * and use the min mark.
1317 */
1318 if (highest) {
1319 page = pfn_to_page(highest);
1320 cc->free_pfn = highest;
1321 } else {
1322 if (cc->direct_compaction) {
1323 page = pfn_to_page(min_pfn);
1324 cc->free_pfn = min_pfn;
1325 }
1326 }
1327 }
1328 }
1329
1330 if (highest && highest > cc->zone->compact_cached_free_pfn)
1331 cc->zone->compact_cached_free_pfn = highest;
1332
1333 cc->total_free_scanned += nr_scanned;
1334 if (!page)
1335 return cc->free_pfn;
1336
1337 low_pfn = page_to_pfn(page);
1338 fast_isolate_around(cc, low_pfn, nr_isolated);
1339 return low_pfn;
1340}
1341
1140/* 1342/*
1141 * Based on information in the current compact_control, find blocks 1343 * Based on information in the current compact_control, find blocks
1142 * suitable for isolating free pages from and then isolate them. 1344 * suitable for isolating free pages from and then isolate them.
@@ -1151,6 +1353,11 @@ static void isolate_freepages(struct compact_control *cc)
1151 unsigned long low_pfn; /* lowest pfn scanner is able to scan */ 1353 unsigned long low_pfn; /* lowest pfn scanner is able to scan */
1152 struct list_head *freelist = &cc->freepages; 1354 struct list_head *freelist = &cc->freepages;
1153 1355
1356 /* Try a small search of the free lists for a candidate */
1357 isolate_start_pfn = fast_isolate_freepages(cc);
1358 if (cc->nr_freepages)
1359 goto splitmap;
1360
1154 /* 1361 /*
1155 * Initialise the free scanner. The starting point is where we last 1362 * Initialise the free scanner. The starting point is where we last
1156 * successfully isolated from, zone-cached value, or the end of the 1363 * successfully isolated from, zone-cached value, or the end of the
@@ -1163,7 +1370,7 @@ static void isolate_freepages(struct compact_control *cc)
1163 * is using. 1370 * is using.
1164 */ 1371 */
1165 isolate_start_pfn = cc->free_pfn; 1372 isolate_start_pfn = cc->free_pfn;
1166 block_start_pfn = pageblock_start_pfn(cc->free_pfn); 1373 block_start_pfn = pageblock_start_pfn(isolate_start_pfn);
1167 block_end_pfn = min(block_start_pfn + pageblock_nr_pages, 1374 block_end_pfn = min(block_start_pfn + pageblock_nr_pages,
1168 zone_end_pfn(zone)); 1375 zone_end_pfn(zone));
1169 low_pfn = pageblock_end_pfn(cc->migrate_pfn); 1376 low_pfn = pageblock_end_pfn(cc->migrate_pfn);
@@ -1227,9 +1434,6 @@ static void isolate_freepages(struct compact_control *cc)
1227 } 1434 }
1228 } 1435 }
1229 1436
1230 /* __isolate_free_page() does not map the pages */
1231 split_map_pages(freelist);
1232
1233 /* 1437 /*
1234 * Record where the free scanner will restart next time. Either we 1438 * Record where the free scanner will restart next time. Either we
1235 * broke from the loop and set isolate_start_pfn based on the last 1439 * broke from the loop and set isolate_start_pfn based on the last
@@ -1237,6 +1441,10 @@ static void isolate_freepages(struct compact_control *cc)
1237 * and the loop terminated due to isolate_start_pfn < low_pfn 1441 * and the loop terminated due to isolate_start_pfn < low_pfn
1238 */ 1442 */
1239 cc->free_pfn = isolate_start_pfn; 1443 cc->free_pfn = isolate_start_pfn;
1444
1445splitmap:
1446 /* __isolate_free_page() does not map the pages */
1447 split_map_pages(freelist);
1240} 1448}
1241 1449
1242/* 1450/*