diff options
-rw-r--r-- | include/trace/events/vmscan.h | 26 | ||||
-rw-r--r-- | mm/vmscan.c | 144 |
2 files changed, 19 insertions, 151 deletions
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 572195459d58..bdaf32f8a874 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h | |||
@@ -263,22 +263,16 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, | |||
263 | unsigned long nr_requested, | 263 | unsigned long nr_requested, |
264 | unsigned long nr_scanned, | 264 | unsigned long nr_scanned, |
265 | unsigned long nr_taken, | 265 | unsigned long nr_taken, |
266 | unsigned long nr_lumpy_taken, | ||
267 | unsigned long nr_lumpy_dirty, | ||
268 | unsigned long nr_lumpy_failed, | ||
269 | isolate_mode_t isolate_mode, | 266 | isolate_mode_t isolate_mode, |
270 | int file), | 267 | int file), |
271 | 268 | ||
272 | TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file), | 269 | TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file), |
273 | 270 | ||
274 | TP_STRUCT__entry( | 271 | TP_STRUCT__entry( |
275 | __field(int, order) | 272 | __field(int, order) |
276 | __field(unsigned long, nr_requested) | 273 | __field(unsigned long, nr_requested) |
277 | __field(unsigned long, nr_scanned) | 274 | __field(unsigned long, nr_scanned) |
278 | __field(unsigned long, nr_taken) | 275 | __field(unsigned long, nr_taken) |
279 | __field(unsigned long, nr_lumpy_taken) | ||
280 | __field(unsigned long, nr_lumpy_dirty) | ||
281 | __field(unsigned long, nr_lumpy_failed) | ||
282 | __field(isolate_mode_t, isolate_mode) | 276 | __field(isolate_mode_t, isolate_mode) |
283 | __field(int, file) | 277 | __field(int, file) |
284 | ), | 278 | ), |
@@ -288,22 +282,16 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, | |||
288 | __entry->nr_requested = nr_requested; | 282 | __entry->nr_requested = nr_requested; |
289 | __entry->nr_scanned = nr_scanned; | 283 | __entry->nr_scanned = nr_scanned; |
290 | __entry->nr_taken = nr_taken; | 284 | __entry->nr_taken = nr_taken; |
291 | __entry->nr_lumpy_taken = nr_lumpy_taken; | ||
292 | __entry->nr_lumpy_dirty = nr_lumpy_dirty; | ||
293 | __entry->nr_lumpy_failed = nr_lumpy_failed; | ||
294 | __entry->isolate_mode = isolate_mode; | 285 | __entry->isolate_mode = isolate_mode; |
295 | __entry->file = file; | 286 | __entry->file = file; |
296 | ), | 287 | ), |
297 | 288 | ||
298 | TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu contig_taken=%lu contig_dirty=%lu contig_failed=%lu file=%d", | 289 | TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu file=%d", |
299 | __entry->isolate_mode, | 290 | __entry->isolate_mode, |
300 | __entry->order, | 291 | __entry->order, |
301 | __entry->nr_requested, | 292 | __entry->nr_requested, |
302 | __entry->nr_scanned, | 293 | __entry->nr_scanned, |
303 | __entry->nr_taken, | 294 | __entry->nr_taken, |
304 | __entry->nr_lumpy_taken, | ||
305 | __entry->nr_lumpy_dirty, | ||
306 | __entry->nr_lumpy_failed, | ||
307 | __entry->file) | 295 | __entry->file) |
308 | ); | 296 | ); |
309 | 297 | ||
@@ -313,13 +301,10 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate, | |||
313 | unsigned long nr_requested, | 301 | unsigned long nr_requested, |
314 | unsigned long nr_scanned, | 302 | unsigned long nr_scanned, |
315 | unsigned long nr_taken, | 303 | unsigned long nr_taken, |
316 | unsigned long nr_lumpy_taken, | ||
317 | unsigned long nr_lumpy_dirty, | ||
318 | unsigned long nr_lumpy_failed, | ||
319 | isolate_mode_t isolate_mode, | 304 | isolate_mode_t isolate_mode, |
320 | int file), | 305 | int file), |
321 | 306 | ||
322 | TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file) | 307 | TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file) |
323 | 308 | ||
324 | ); | 309 | ); |
325 | 310 | ||
@@ -329,13 +314,10 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate, | |||
329 | unsigned long nr_requested, | 314 | unsigned long nr_requested, |
330 | unsigned long nr_scanned, | 315 | unsigned long nr_scanned, |
331 | unsigned long nr_taken, | 316 | unsigned long nr_taken, |
332 | unsigned long nr_lumpy_taken, | ||
333 | unsigned long nr_lumpy_dirty, | ||
334 | unsigned long nr_lumpy_failed, | ||
335 | isolate_mode_t isolate_mode, | 317 | isolate_mode_t isolate_mode, |
336 | int file), | 318 | int file), |
337 | 319 | ||
338 | TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file) | 320 | TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file) |
339 | 321 | ||
340 | ); | 322 | ); |
341 | 323 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index ca46080bb074..546d02ce90ee 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -58,9 +58,6 @@ | |||
58 | * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages | 58 | * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages |
59 | * RECLAIM_MODE_ASYNC: Do not block | 59 | * RECLAIM_MODE_ASYNC: Do not block |
60 | * RECLAIM_MODE_SYNC: Allow blocking e.g. call wait_on_page_writeback | 60 | * RECLAIM_MODE_SYNC: Allow blocking e.g. call wait_on_page_writeback |
61 | * RECLAIM_MODE_LUMPYRECLAIM: For high-order allocations, take a reference | ||
62 | * page from the LRU and reclaim all pages within a | ||
63 | * naturally aligned range | ||
64 | * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of | 61 | * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of |
65 | * order-0 pages and then compact the zone | 62 | * order-0 pages and then compact the zone |
66 | */ | 63 | */ |
@@ -68,7 +65,6 @@ typedef unsigned __bitwise__ reclaim_mode_t; | |||
68 | #define RECLAIM_MODE_SINGLE ((__force reclaim_mode_t)0x01u) | 65 | #define RECLAIM_MODE_SINGLE ((__force reclaim_mode_t)0x01u) |
69 | #define RECLAIM_MODE_ASYNC ((__force reclaim_mode_t)0x02u) | 66 | #define RECLAIM_MODE_ASYNC ((__force reclaim_mode_t)0x02u) |
70 | #define RECLAIM_MODE_SYNC ((__force reclaim_mode_t)0x04u) | 67 | #define RECLAIM_MODE_SYNC ((__force reclaim_mode_t)0x04u) |
71 | #define RECLAIM_MODE_LUMPYRECLAIM ((__force reclaim_mode_t)0x08u) | ||
72 | #define RECLAIM_MODE_COMPACTION ((__force reclaim_mode_t)0x10u) | 68 | #define RECLAIM_MODE_COMPACTION ((__force reclaim_mode_t)0x10u) |
73 | 69 | ||
74 | struct scan_control { | 70 | struct scan_control { |
@@ -367,27 +363,17 @@ out: | |||
367 | static void set_reclaim_mode(int priority, struct scan_control *sc, | 363 | static void set_reclaim_mode(int priority, struct scan_control *sc, |
368 | bool sync) | 364 | bool sync) |
369 | { | 365 | { |
366 | /* Sync reclaim used only for compaction */ | ||
370 | reclaim_mode_t syncmode = sync ? RECLAIM_MODE_SYNC : RECLAIM_MODE_ASYNC; | 367 | reclaim_mode_t syncmode = sync ? RECLAIM_MODE_SYNC : RECLAIM_MODE_ASYNC; |
371 | 368 | ||
372 | /* | 369 | /* |
373 | * Initially assume we are entering either lumpy reclaim or | 370 | * Restrict reclaim/compaction to costly allocations or when |
374 | * reclaim/compaction.Depending on the order, we will either set the | ||
375 | * sync mode or just reclaim order-0 pages later. | ||
376 | */ | ||
377 | if (COMPACTION_BUILD) | ||
378 | sc->reclaim_mode = RECLAIM_MODE_COMPACTION; | ||
379 | else | ||
380 | sc->reclaim_mode = RECLAIM_MODE_LUMPYRECLAIM; | ||
381 | |||
382 | /* | ||
383 | * Avoid using lumpy reclaim or reclaim/compaction if possible by | ||
384 | * restricting when its set to either costly allocations or when | ||
385 | * under memory pressure | 371 | * under memory pressure |
386 | */ | 372 | */ |
387 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | 373 | if (COMPACTION_BUILD && sc->order && |
388 | sc->reclaim_mode |= syncmode; | 374 | (sc->order > PAGE_ALLOC_COSTLY_ORDER || |
389 | else if (sc->order && priority < DEF_PRIORITY - 2) | 375 | priority < DEF_PRIORITY - 2)) |
390 | sc->reclaim_mode |= syncmode; | 376 | sc->reclaim_mode = RECLAIM_MODE_COMPACTION | syncmode; |
391 | else | 377 | else |
392 | sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC; | 378 | sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC; |
393 | } | 379 | } |
@@ -416,10 +402,6 @@ static int may_write_to_queue(struct backing_dev_info *bdi, | |||
416 | return 1; | 402 | return 1; |
417 | if (bdi == current->backing_dev_info) | 403 | if (bdi == current->backing_dev_info) |
418 | return 1; | 404 | return 1; |
419 | |||
420 | /* lumpy reclaim for hugepage often need a lot of write */ | ||
421 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
422 | return 1; | ||
423 | return 0; | 405 | return 0; |
424 | } | 406 | } |
425 | 407 | ||
@@ -710,10 +692,6 @@ static enum page_references page_check_references(struct page *page, | |||
710 | referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags); | 692 | referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags); |
711 | referenced_page = TestClearPageReferenced(page); | 693 | referenced_page = TestClearPageReferenced(page); |
712 | 694 | ||
713 | /* Lumpy reclaim - ignore references */ | ||
714 | if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM) | ||
715 | return PAGEREF_RECLAIM; | ||
716 | |||
717 | /* | 695 | /* |
718 | * Mlock lost the isolation race with us. Let try_to_unmap() | 696 | * Mlock lost the isolation race with us. Let try_to_unmap() |
719 | * move the page to the unevictable list. | 697 | * move the page to the unevictable list. |
@@ -824,7 +802,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
824 | wait_on_page_writeback(page); | 802 | wait_on_page_writeback(page); |
825 | else { | 803 | else { |
826 | unlock_page(page); | 804 | unlock_page(page); |
827 | goto keep_lumpy; | 805 | goto keep_reclaim_mode; |
828 | } | 806 | } |
829 | } | 807 | } |
830 | 808 | ||
@@ -908,7 +886,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
908 | goto activate_locked; | 886 | goto activate_locked; |
909 | case PAGE_SUCCESS: | 887 | case PAGE_SUCCESS: |
910 | if (PageWriteback(page)) | 888 | if (PageWriteback(page)) |
911 | goto keep_lumpy; | 889 | goto keep_reclaim_mode; |
912 | if (PageDirty(page)) | 890 | if (PageDirty(page)) |
913 | goto keep; | 891 | goto keep; |
914 | 892 | ||
@@ -1008,7 +986,7 @@ keep_locked: | |||
1008 | unlock_page(page); | 986 | unlock_page(page); |
1009 | keep: | 987 | keep: |
1010 | reset_reclaim_mode(sc); | 988 | reset_reclaim_mode(sc); |
1011 | keep_lumpy: | 989 | keep_reclaim_mode: |
1012 | list_add(&page->lru, &ret_pages); | 990 | list_add(&page->lru, &ret_pages); |
1013 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); | 991 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); |
1014 | } | 992 | } |
@@ -1064,11 +1042,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) | |||
1064 | if (!all_lru_mode && !!page_is_file_cache(page) != file) | 1042 | if (!all_lru_mode && !!page_is_file_cache(page) != file) |
1065 | return ret; | 1043 | return ret; |
1066 | 1044 | ||
1067 | /* | 1045 | /* Do not give back unevictable pages for compaction */ |
1068 | * When this function is being called for lumpy reclaim, we | ||
1069 | * initially look into all LRU pages, active, inactive and | ||
1070 | * unevictable; only give shrink_page_list evictable pages. | ||
1071 | */ | ||
1072 | if (PageUnevictable(page)) | 1046 | if (PageUnevictable(page)) |
1073 | return ret; | 1047 | return ret; |
1074 | 1048 | ||
@@ -1153,9 +1127,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
1153 | struct lruvec *lruvec; | 1127 | struct lruvec *lruvec; |
1154 | struct list_head *src; | 1128 | struct list_head *src; |
1155 | unsigned long nr_taken = 0; | 1129 | unsigned long nr_taken = 0; |
1156 | unsigned long nr_lumpy_taken = 0; | ||
1157 | unsigned long nr_lumpy_dirty = 0; | ||
1158 | unsigned long nr_lumpy_failed = 0; | ||
1159 | unsigned long scan; | 1130 | unsigned long scan; |
1160 | int lru = LRU_BASE; | 1131 | int lru = LRU_BASE; |
1161 | 1132 | ||
@@ -1168,10 +1139,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
1168 | 1139 | ||
1169 | for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { | 1140 | for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { |
1170 | struct page *page; | 1141 | struct page *page; |
1171 | unsigned long pfn; | ||
1172 | unsigned long end_pfn; | ||
1173 | unsigned long page_pfn; | ||
1174 | int zone_id; | ||
1175 | 1142 | ||
1176 | page = lru_to_page(src); | 1143 | page = lru_to_page(src); |
1177 | prefetchw_prev_lru_page(page, src, flags); | 1144 | prefetchw_prev_lru_page(page, src, flags); |
@@ -1193,84 +1160,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
1193 | default: | 1160 | default: |
1194 | BUG(); | 1161 | BUG(); |
1195 | } | 1162 | } |
1196 | |||
1197 | if (!sc->order || !(sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)) | ||
1198 | continue; | ||
1199 | |||
1200 | /* | ||
1201 | * Attempt to take all pages in the order aligned region | ||
1202 | * surrounding the tag page. Only take those pages of | ||
1203 | * the same active state as that tag page. We may safely | ||
1204 | * round the target page pfn down to the requested order | ||
1205 | * as the mem_map is guaranteed valid out to MAX_ORDER, | ||
1206 | * where that page is in a different zone we will detect | ||
1207 | * it from its zone id and abort this block scan. | ||
1208 | */ | ||
1209 | zone_id = page_zone_id(page); | ||
1210 | page_pfn = page_to_pfn(page); | ||
1211 | pfn = page_pfn & ~((1 << sc->order) - 1); | ||
1212 | end_pfn = pfn + (1 << sc->order); | ||
1213 | for (; pfn < end_pfn; pfn++) { | ||
1214 | struct page *cursor_page; | ||
1215 | |||
1216 | /* The target page is in the block, ignore it. */ | ||
1217 | if (unlikely(pfn == page_pfn)) | ||
1218 | continue; | ||
1219 | |||
1220 | /* Avoid holes within the zone. */ | ||
1221 | if (unlikely(!pfn_valid_within(pfn))) | ||
1222 | break; | ||
1223 | |||
1224 | cursor_page = pfn_to_page(pfn); | ||
1225 | |||
1226 | /* Check that we have not crossed a zone boundary. */ | ||
1227 | if (unlikely(page_zone_id(cursor_page) != zone_id)) | ||
1228 | break; | ||
1229 | |||
1230 | /* | ||
1231 | * If we don't have enough swap space, reclaiming of | ||
1232 | * anon page which don't already have a swap slot is | ||
1233 | * pointless. | ||
1234 | */ | ||
1235 | if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) && | ||
1236 | !PageSwapCache(cursor_page)) | ||
1237 | break; | ||
1238 | |||
1239 | if (__isolate_lru_page(cursor_page, mode, file) == 0) { | ||
1240 | unsigned int isolated_pages; | ||
1241 | |||
1242 | mem_cgroup_lru_del(cursor_page); | ||
1243 | list_move(&cursor_page->lru, dst); | ||
1244 | isolated_pages = hpage_nr_pages(cursor_page); | ||
1245 | nr_taken += isolated_pages; | ||
1246 | nr_lumpy_taken += isolated_pages; | ||
1247 | if (PageDirty(cursor_page)) | ||
1248 | nr_lumpy_dirty += isolated_pages; | ||
1249 | scan++; | ||
1250 | pfn += isolated_pages - 1; | ||
1251 | } else { | ||
1252 | /* | ||
1253 | * Check if the page is freed already. | ||
1254 | * | ||
1255 | * We can't use page_count() as that | ||
1256 | * requires compound_head and we don't | ||
1257 | * have a pin on the page here. If a | ||
1258 | * page is tail, we may or may not | ||
1259 | * have isolated the head, so assume | ||
1260 | * it's not free, it'd be tricky to | ||
1261 | * track the head status without a | ||
1262 | * page pin. | ||
1263 | */ | ||
1264 | if (!PageTail(cursor_page) && | ||
1265 | !atomic_read(&cursor_page->_count)) | ||
1266 | continue; | ||
1267 | break; | ||
1268 | } | ||
1269 | } | ||
1270 | |||
1271 | /* If we break out of the loop above, lumpy reclaim failed */ | ||
1272 | if (pfn < end_pfn) | ||
1273 | nr_lumpy_failed++; | ||
1274 | } | 1163 | } |
1275 | 1164 | ||
1276 | *nr_scanned = scan; | 1165 | *nr_scanned = scan; |
@@ -1278,7 +1167,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
1278 | trace_mm_vmscan_lru_isolate(sc->order, | 1167 | trace_mm_vmscan_lru_isolate(sc->order, |
1279 | nr_to_scan, scan, | 1168 | nr_to_scan, scan, |
1280 | nr_taken, | 1169 | nr_taken, |
1281 | nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, | ||
1282 | mode, file); | 1170 | mode, file); |
1283 | return nr_taken; | 1171 | return nr_taken; |
1284 | } | 1172 | } |
@@ -1466,13 +1354,13 @@ static inline bool should_reclaim_stall(unsigned long nr_taken, | |||
1466 | int priority, | 1354 | int priority, |
1467 | struct scan_control *sc) | 1355 | struct scan_control *sc) |
1468 | { | 1356 | { |
1469 | int lumpy_stall_priority; | 1357 | int stall_priority; |
1470 | 1358 | ||
1471 | /* kswapd should not stall on sync IO */ | 1359 | /* kswapd should not stall on sync IO */ |
1472 | if (current_is_kswapd()) | 1360 | if (current_is_kswapd()) |
1473 | return false; | 1361 | return false; |
1474 | 1362 | ||
1475 | /* Only stall on lumpy reclaim */ | 1363 | /* Only stall for memory compaction */ |
1476 | if (sc->reclaim_mode & RECLAIM_MODE_SINGLE) | 1364 | if (sc->reclaim_mode & RECLAIM_MODE_SINGLE) |
1477 | return false; | 1365 | return false; |
1478 | 1366 | ||
@@ -1487,11 +1375,11 @@ static inline bool should_reclaim_stall(unsigned long nr_taken, | |||
1487 | * priority to be much higher before stalling. | 1375 | * priority to be much higher before stalling. |
1488 | */ | 1376 | */ |
1489 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | 1377 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) |
1490 | lumpy_stall_priority = DEF_PRIORITY; | 1378 | stall_priority = DEF_PRIORITY; |
1491 | else | 1379 | else |
1492 | lumpy_stall_priority = DEF_PRIORITY / 3; | 1380 | stall_priority = DEF_PRIORITY / 3; |
1493 | 1381 | ||
1494 | return priority <= lumpy_stall_priority; | 1382 | return priority <= stall_priority; |
1495 | } | 1383 | } |
1496 | 1384 | ||
1497 | /* | 1385 | /* |
@@ -1523,8 +1411,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, | |||
1523 | } | 1411 | } |
1524 | 1412 | ||
1525 | set_reclaim_mode(priority, sc, false); | 1413 | set_reclaim_mode(priority, sc, false); |
1526 | if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM) | ||
1527 | isolate_mode |= ISOLATE_ACTIVE; | ||
1528 | 1414 | ||
1529 | lru_add_drain(); | 1415 | lru_add_drain(); |
1530 | 1416 | ||