summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2019-03-05 18:44:54 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-06 00:07:16 -0500
commit70b44595eafe9c7c235f076d653a268ca1ab9fdb (patch)
tree133eb55e36f488e7d337536ff9e9dca121116883
parentfd1444b2729289ea3ef6b6096be604f8983e9f9f (diff)
mm, compaction: use free lists to quickly locate a migration source
The migration scanner is a linear scan of a zone with a potentiall large search space. Furthermore, many pageblocks are unusable such as those filled with reserved pages or partially filled with pages that cannot migrate. These still get scanned in the common case of allocating a THP and the cost accumulates. The patch uses a partial search of the free lists to locate a migration source candidate that is marked as MOVABLE when allocating a THP. It prefers picking a block with a larger number of free pages already on the basis that there are fewer pages to migrate to free the entire block. The lowest PFN found during searches is tracked as the basis of the start for the linear search after the first search of the free list fails. After the search, the free list is shuffled so that the next search will not encounter the same page. If the search fails then the subsequent searches will be shorter and the linear scanner is used. If this search fails, or if the request is for a small or unmovable/reclaimable allocation then the linear scanner is still used. It is somewhat pointless to use the list search in those cases. Small free pages must be used for the search and there is no guarantee that movable pages are located within that block that are contiguous. 5.0.0-rc1 5.0.0-rc1 noboost-v3r10 findmig-v3r15 Amean fault-both-3 3771.41 ( 0.00%) 3390.40 ( 10.10%) Amean fault-both-5 5409.05 ( 0.00%) 5082.28 ( 6.04%) Amean fault-both-7 7040.74 ( 0.00%) 7012.51 ( 0.40%) Amean fault-both-12 11887.35 ( 0.00%) 11346.63 ( 4.55%) Amean fault-both-18 16718.19 ( 0.00%) 15324.19 ( 8.34%) Amean fault-both-24 21157.19 ( 0.00%) 16088.50 * 23.96%* Amean fault-both-30 21175.92 ( 0.00%) 18723.42 * 11.58%* Amean fault-both-32 21339.03 ( 0.00%) 18612.01 * 12.78%* 5.0.0-rc1 5.0.0-rc1 noboost-v3r10 findmig-v3r15 Percentage huge-3 86.50 ( 0.00%) 89.83 ( 3.85%) Percentage huge-5 92.52 ( 0.00%) 91.96 ( -0.61%) Percentage huge-7 92.44 ( 0.00%) 92.85 ( 0.44%) Percentage huge-12 92.98 ( 0.00%) 92.74 ( -0.25%) Percentage huge-18 91.70 ( 0.00%) 91.71 ( 0.02%) Percentage huge-24 91.59 ( 0.00%) 92.13 ( 0.60%) Percentage huge-30 90.14 ( 0.00%) 93.79 ( 4.04%) Percentage huge-32 90.03 ( 0.00%) 91.27 ( 1.37%) This shows an improvement in allocation latencies with similar allocation success rates. While not presented, there was a 31% reduction in migration scanning and a 8% reduction on system CPU usage. A 2-socket machine showed similar benefits. [mgorman@techsingularity.net: several fixes] Link: http://lkml.kernel.org/r/20190204120111.GL9565@techsingularity.net [vbabka@suse.cz: migrate block that was found-fast, some optimisations] Link: http://lkml.kernel.org/r/20190118175136.31341-10-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Vlastimil Babka <Vbabka@suse.cz> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Dan Carpenter <dan.carpenter@oracle.com> Cc: David Rientjes <rientjes@google.com> Cc: YueHaibing <yuehaibing@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/gpu/drm/i915/i915_utils.h6
-rw-r--r--include/linux/list.h11
-rw-r--r--mm/compaction.c178
-rw-r--r--mm/internal.h2
4 files changed, 188 insertions, 9 deletions
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 9726df37c4c4..540e20eb032c 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -123,12 +123,6 @@ static inline u64 ptr_to_u64(const void *ptr)
123 123
124#include <linux/list.h> 124#include <linux/list.h>
125 125
126static inline int list_is_first(const struct list_head *list,
127 const struct list_head *head)
128{
129 return head->next == list;
130}
131
132static inline void __list_del_many(struct list_head *head, 126static inline void __list_del_many(struct list_head *head,
133 struct list_head *first) 127 struct list_head *first)
134{ 128{
diff --git a/include/linux/list.h b/include/linux/list.h
index edb7628e46ed..79626b5ab36c 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -207,6 +207,17 @@ static inline void list_bulk_move_tail(struct list_head *head,
207} 207}
208 208
209/** 209/**
210 * list_is_first -- tests whether @ list is the first entry in list @head
211 * @list: the entry to test
212 * @head: the head of the list
213 */
214static inline int list_is_first(const struct list_head *list,
215 const struct list_head *head)
216{
217 return list->prev == head;
218}
219
220/**
210 * list_is_last - tests whether @list is the last entry in list @head 221 * list_is_last - tests whether @list is the last entry in list @head
211 * @list: the entry to test 222 * @list: the entry to test
212 * @head: the head of the list 223 * @head: the head of the list
diff --git a/mm/compaction.c b/mm/compaction.c
index 3d11c209614a..55f7ab142af2 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1040,6 +1040,12 @@ static bool suitable_migration_target(struct compact_control *cc,
1040 return false; 1040 return false;
1041} 1041}
1042 1042
1043static inline unsigned int
1044freelist_scan_limit(struct compact_control *cc)
1045{
1046 return (COMPACT_CLUSTER_MAX >> cc->fast_search_fail) + 1;
1047}
1048
1043/* 1049/*
1044 * Test whether the free scanner has reached the same or lower pageblock than 1050 * Test whether the free scanner has reached the same or lower pageblock than
1045 * the migration scanner, and compaction should thus terminate. 1051 * the migration scanner, and compaction should thus terminate.
@@ -1050,6 +1056,19 @@ static inline bool compact_scanners_met(struct compact_control *cc)
1050 <= (cc->migrate_pfn >> pageblock_order); 1056 <= (cc->migrate_pfn >> pageblock_order);
1051} 1057}
1052 1058
1059/* Reorder the free list to reduce repeated future searches */
1060static void
1061move_freelist_tail(struct list_head *freelist, struct page *freepage)
1062{
1063 LIST_HEAD(sublist);
1064
1065 if (!list_is_first(freelist, &freepage->lru)) {
1066 list_cut_position(&sublist, freelist, &freepage->lru);
1067 if (!list_empty(&sublist))
1068 list_splice_tail(&sublist, freelist);
1069 }
1070}
1071
1053/* 1072/*
1054 * Based on information in the current compact_control, find blocks 1073 * Based on information in the current compact_control, find blocks
1055 * suitable for isolating free pages from and then isolate them. 1074 * suitable for isolating free pages from and then isolate them.
@@ -1207,6 +1226,148 @@ typedef enum {
1207 */ 1226 */
1208int sysctl_compact_unevictable_allowed __read_mostly = 1; 1227int sysctl_compact_unevictable_allowed __read_mostly = 1;
1209 1228
1229static inline void
1230update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)
1231{
1232 if (cc->fast_start_pfn == ULONG_MAX)
1233 return;
1234
1235 if (!cc->fast_start_pfn)
1236 cc->fast_start_pfn = pfn;
1237
1238 cc->fast_start_pfn = min(cc->fast_start_pfn, pfn);
1239}
1240
1241static inline unsigned long
1242reinit_migrate_pfn(struct compact_control *cc)
1243{
1244 if (!cc->fast_start_pfn || cc->fast_start_pfn == ULONG_MAX)
1245 return cc->migrate_pfn;
1246
1247 cc->migrate_pfn = cc->fast_start_pfn;
1248 cc->fast_start_pfn = ULONG_MAX;
1249
1250 return cc->migrate_pfn;
1251}
1252
1253/*
1254 * Briefly search the free lists for a migration source that already has
1255 * some free pages to reduce the number of pages that need migration
1256 * before a pageblock is free.
1257 */
1258static unsigned long fast_find_migrateblock(struct compact_control *cc)
1259{
1260 unsigned int limit = freelist_scan_limit(cc);
1261 unsigned int nr_scanned = 0;
1262 unsigned long distance;
1263 unsigned long pfn = cc->migrate_pfn;
1264 unsigned long high_pfn;
1265 int order;
1266
1267 /* Skip hints are relied on to avoid repeats on the fast search */
1268 if (cc->ignore_skip_hint)
1269 return pfn;
1270
1271 /*
1272 * If the migrate_pfn is not at the start of a zone or the start
1273 * of a pageblock then assume this is a continuation of a previous
1274 * scan restarted due to COMPACT_CLUSTER_MAX.
1275 */
1276 if (pfn != cc->zone->zone_start_pfn && pfn != pageblock_start_pfn(pfn))
1277 return pfn;
1278
1279 /*
1280 * For smaller orders, just linearly scan as the number of pages
1281 * to migrate should be relatively small and does not necessarily
1282 * justify freeing up a large block for a small allocation.
1283 */
1284 if (cc->order <= PAGE_ALLOC_COSTLY_ORDER)
1285 return pfn;
1286
1287 /*
1288 * Only allow kcompactd and direct requests for movable pages to
1289 * quickly clear out a MOVABLE pageblock for allocation. This
1290 * reduces the risk that a large movable pageblock is freed for
1291 * an unmovable/reclaimable small allocation.
1292 */
1293 if (cc->direct_compaction && cc->migratetype != MIGRATE_MOVABLE)
1294 return pfn;
1295
1296 /*
1297 * When starting the migration scanner, pick any pageblock within the
1298 * first half of the search space. Otherwise try and pick a pageblock
1299 * within the first eighth to reduce the chances that a migration
1300 * target later becomes a source.
1301 */
1302 distance = (cc->free_pfn - cc->migrate_pfn) >> 1;
1303 if (cc->migrate_pfn != cc->zone->zone_start_pfn)
1304 distance >>= 2;
1305 high_pfn = pageblock_start_pfn(cc->migrate_pfn + distance);
1306
1307 for (order = cc->order - 1;
1308 order >= PAGE_ALLOC_COSTLY_ORDER && pfn == cc->migrate_pfn && nr_scanned < limit;
1309 order--) {
1310 struct free_area *area = &cc->zone->free_area[order];
1311 struct list_head *freelist;
1312 unsigned long flags;
1313 struct page *freepage;
1314
1315 if (!area->nr_free)
1316 continue;
1317
1318 spin_lock_irqsave(&cc->zone->lock, flags);
1319 freelist = &area->free_list[MIGRATE_MOVABLE];
1320 list_for_each_entry(freepage, freelist, lru) {
1321 unsigned long free_pfn;
1322
1323 nr_scanned++;
1324 free_pfn = page_to_pfn(freepage);
1325 if (free_pfn < high_pfn) {
1326 update_fast_start_pfn(cc, free_pfn);
1327
1328 /*
1329 * Avoid if skipped recently. Ideally it would
1330 * move to the tail but even safe iteration of
1331 * the list assumes an entry is deleted, not
1332 * reordered.
1333 */
1334 if (get_pageblock_skip(freepage)) {
1335 if (list_is_last(freelist, &freepage->lru))
1336 break;
1337
1338 continue;
1339 }
1340
1341 /* Reorder to so a future search skips recent pages */
1342 move_freelist_tail(freelist, freepage);
1343
1344 pfn = pageblock_start_pfn(free_pfn);
1345 cc->fast_search_fail = 0;
1346 set_pageblock_skip(freepage);
1347 break;
1348 }
1349
1350 if (nr_scanned >= limit) {
1351 cc->fast_search_fail++;
1352 move_freelist_tail(freelist, freepage);
1353 break;
1354 }
1355 }
1356 spin_unlock_irqrestore(&cc->zone->lock, flags);
1357 }
1358
1359 cc->total_migrate_scanned += nr_scanned;
1360
1361 /*
1362 * If fast scanning failed then use a cached entry for a page block
1363 * that had free pages as the basis for starting a linear scan.
1364 */
1365 if (pfn == cc->migrate_pfn)
1366 pfn = reinit_migrate_pfn(cc);
1367
1368 return pfn;
1369}
1370
1210/* 1371/*
1211 * Isolate all pages that can be migrated from the first suitable block, 1372 * Isolate all pages that can be migrated from the first suitable block,
1212 * starting at the block pointed to by the migrate scanner pfn within 1373 * starting at the block pointed to by the migrate scanner pfn within
@@ -1222,16 +1383,25 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
1222 const isolate_mode_t isolate_mode = 1383 const isolate_mode_t isolate_mode =
1223 (sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) | 1384 (sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
1224 (cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0); 1385 (cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
1386 bool fast_find_block;
1225 1387
1226 /* 1388 /*
1227 * Start at where we last stopped, or beginning of the zone as 1389 * Start at where we last stopped, or beginning of the zone as
1228 * initialized by compact_zone() 1390 * initialized by compact_zone(). The first failure will use
1391 * the lowest PFN as the starting point for linear scanning.
1229 */ 1392 */
1230 low_pfn = cc->migrate_pfn; 1393 low_pfn = fast_find_migrateblock(cc);
1231 block_start_pfn = pageblock_start_pfn(low_pfn); 1394 block_start_pfn = pageblock_start_pfn(low_pfn);
1232 if (block_start_pfn < zone->zone_start_pfn) 1395 if (block_start_pfn < zone->zone_start_pfn)
1233 block_start_pfn = zone->zone_start_pfn; 1396 block_start_pfn = zone->zone_start_pfn;
1234 1397
1398 /*
1399 * fast_find_migrateblock marks a pageblock skipped so to avoid
1400 * the isolation_suitable check below, check whether the fast
1401 * search was successful.
1402 */
1403 fast_find_block = low_pfn != cc->migrate_pfn && !cc->fast_search_fail;
1404
1235 /* Only scan within a pageblock boundary */ 1405 /* Only scan within a pageblock boundary */
1236 block_end_pfn = pageblock_end_pfn(low_pfn); 1406 block_end_pfn = pageblock_end_pfn(low_pfn);
1237 1407
@@ -1240,6 +1410,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
1240 * Do not cross the free scanner. 1410 * Do not cross the free scanner.
1241 */ 1411 */
1242 for (; block_end_pfn <= cc->free_pfn; 1412 for (; block_end_pfn <= cc->free_pfn;
1413 fast_find_block = false,
1243 low_pfn = block_end_pfn, 1414 low_pfn = block_end_pfn,
1244 block_start_pfn = block_end_pfn, 1415 block_start_pfn = block_end_pfn,
1245 block_end_pfn += pageblock_nr_pages) { 1416 block_end_pfn += pageblock_nr_pages) {
@@ -1259,7 +1430,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
1259 continue; 1430 continue;
1260 1431
1261 /* If isolation recently failed, do not retry */ 1432 /* If isolation recently failed, do not retry */
1262 if (!isolation_suitable(cc, page)) 1433 if (!isolation_suitable(cc, page) && !fast_find_block)
1263 continue; 1434 continue;
1264 1435
1265 /* 1436 /*
@@ -1550,6 +1721,7 @@ static enum compact_result compact_zone(struct compact_control *cc)
1550 * want to compact the whole zone), but check that it is initialised 1721 * want to compact the whole zone), but check that it is initialised
1551 * by ensuring the values are within zone boundaries. 1722 * by ensuring the values are within zone boundaries.
1552 */ 1723 */
1724 cc->fast_start_pfn = 0;
1553 if (cc->whole_zone) { 1725 if (cc->whole_zone) {
1554 cc->migrate_pfn = start_pfn; 1726 cc->migrate_pfn = start_pfn;
1555 cc->free_pfn = pageblock_start_pfn(end_pfn - 1); 1727 cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
diff --git a/mm/internal.h b/mm/internal.h
index 9b32f4cab0ae..983cb975545f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -188,9 +188,11 @@ struct compact_control {
188 unsigned int nr_migratepages; /* Number of pages to migrate */ 188 unsigned int nr_migratepages; /* Number of pages to migrate */
189 unsigned long free_pfn; /* isolate_freepages search base */ 189 unsigned long free_pfn; /* isolate_freepages search base */
190 unsigned long migrate_pfn; /* isolate_migratepages search base */ 190 unsigned long migrate_pfn; /* isolate_migratepages search base */
191 unsigned long fast_start_pfn; /* a pfn to start linear scan from */
191 struct zone *zone; 192 struct zone *zone;
192 unsigned long total_migrate_scanned; 193 unsigned long total_migrate_scanned;
193 unsigned long total_free_scanned; 194 unsigned long total_free_scanned;
195 unsigned int fast_search_fail; /* failures to use free list searches */
194 const gfp_t gfp_mask; /* gfp mask of a direct compactor */ 196 const gfp_t gfp_mask; /* gfp mask of a direct compactor */
195 int order; /* order a direct compactor needs */ 197 int order; /* order a direct compactor needs */
196 int migratetype; /* migratetype of direct compactor */ 198 int migratetype; /* migratetype of direct compactor */