aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-05-29 18:06:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-29 19:22:19 -0400
commit41ac1999c3e3563e1810b14878a869c79c9368bb (patch)
tree3ee7d3778dd638ae1f429444af4ea0360cca191f
parentc53919adc045bf803252e912f23028a68525753d (diff)
mm: vmscan: do not stall on writeback during memory compaction
This patch stops reclaim/compaction entering sync reclaim as this was only intended for lumpy reclaim and an oversight. Page migration has its own logic for stalling on writeback pages if necessary and memory compaction is already using it. Waiting on page writeback is bad for a number of reasons but the primary one is that waiting on writeback to a slow device like USB can take a considerable length of time. Page reclaim instead uses wait_iff_congested() to throttle if too many dirty pages are being scanned. Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Rik van Riel <riel@redhat.com> Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Konstantin Khlebnikov <khlebnikov@openvz.org> Cc: Hugh Dickins <hughd@google.com> Cc: Ying Han <yinghan@google.com> Cc: Andy Whitcroft <apw@shadowen.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/trace/events/vmscan.h12
-rw-r--r--mm/vmscan.c85
2 files changed, 14 insertions, 83 deletions
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index bdaf32f8a874..82f693395ac5 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -13,7 +13,7 @@
13#define RECLAIM_WB_ANON 0x0001u 13#define RECLAIM_WB_ANON 0x0001u
14#define RECLAIM_WB_FILE 0x0002u 14#define RECLAIM_WB_FILE 0x0002u
15#define RECLAIM_WB_MIXED 0x0010u 15#define RECLAIM_WB_MIXED 0x0010u
16#define RECLAIM_WB_SYNC 0x0004u 16#define RECLAIM_WB_SYNC 0x0004u /* Unused, all reclaim async */
17#define RECLAIM_WB_ASYNC 0x0008u 17#define RECLAIM_WB_ASYNC 0x0008u
18 18
19#define show_reclaim_flags(flags) \ 19#define show_reclaim_flags(flags) \
@@ -27,13 +27,13 @@
27 27
28#define trace_reclaim_flags(page, sync) ( \ 28#define trace_reclaim_flags(page, sync) ( \
29 (page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ 29 (page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \
30 (sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \ 30 (RECLAIM_WB_ASYNC) \
31 ) 31 )
32 32
33#define trace_shrink_flags(file, sync) ( \ 33#define trace_shrink_flags(file, sync) \
34 (sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_MIXED : \ 34 ( \
35 (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON)) | \ 35 (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \
36 (sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \ 36 (RECLAIM_WB_ASYNC) \
37 ) 37 )
38 38
39TRACE_EVENT(mm_vmscan_kswapd_sleep, 39TRACE_EVENT(mm_vmscan_kswapd_sleep,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 546d02ce90ee..e27f27d4cc19 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -56,15 +56,11 @@
56/* 56/*
57 * reclaim_mode determines how the inactive list is shrunk 57 * reclaim_mode determines how the inactive list is shrunk
58 * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages 58 * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages
59 * RECLAIM_MODE_ASYNC: Do not block
60 * RECLAIM_MODE_SYNC: Allow blocking e.g. call wait_on_page_writeback
61 * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of 59 * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of
62 * order-0 pages and then compact the zone 60 * order-0 pages and then compact the zone
63 */ 61 */
64typedef unsigned __bitwise__ reclaim_mode_t; 62typedef unsigned __bitwise__ reclaim_mode_t;
65#define RECLAIM_MODE_SINGLE ((__force reclaim_mode_t)0x01u) 63#define RECLAIM_MODE_SINGLE ((__force reclaim_mode_t)0x01u)
66#define RECLAIM_MODE_ASYNC ((__force reclaim_mode_t)0x02u)
67#define RECLAIM_MODE_SYNC ((__force reclaim_mode_t)0x04u)
68#define RECLAIM_MODE_COMPACTION ((__force reclaim_mode_t)0x10u) 64#define RECLAIM_MODE_COMPACTION ((__force reclaim_mode_t)0x10u)
69 65
70struct scan_control { 66struct scan_control {
@@ -360,12 +356,8 @@ out:
360 return ret; 356 return ret;
361} 357}
362 358
363static void set_reclaim_mode(int priority, struct scan_control *sc, 359static void set_reclaim_mode(int priority, struct scan_control *sc)
364 bool sync)
365{ 360{
366 /* Sync reclaim used only for compaction */
367 reclaim_mode_t syncmode = sync ? RECLAIM_MODE_SYNC : RECLAIM_MODE_ASYNC;
368
369 /* 361 /*
370 * Restrict reclaim/compaction to costly allocations or when 362 * Restrict reclaim/compaction to costly allocations or when
371 * under memory pressure 363 * under memory pressure
@@ -373,14 +365,14 @@ static void set_reclaim_mode(int priority, struct scan_control *sc,
373 if (COMPACTION_BUILD && sc->order && 365 if (COMPACTION_BUILD && sc->order &&
374 (sc->order > PAGE_ALLOC_COSTLY_ORDER || 366 (sc->order > PAGE_ALLOC_COSTLY_ORDER ||
375 priority < DEF_PRIORITY - 2)) 367 priority < DEF_PRIORITY - 2))
376 sc->reclaim_mode = RECLAIM_MODE_COMPACTION | syncmode; 368 sc->reclaim_mode = RECLAIM_MODE_COMPACTION;
377 else 369 else
378 sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC; 370 sc->reclaim_mode = RECLAIM_MODE_SINGLE;
379} 371}
380 372
381static void reset_reclaim_mode(struct scan_control *sc) 373static void reset_reclaim_mode(struct scan_control *sc)
382{ 374{
383 sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC; 375 sc->reclaim_mode = RECLAIM_MODE_SINGLE;
384} 376}
385 377
386static inline int is_page_cache_freeable(struct page *page) 378static inline int is_page_cache_freeable(struct page *page)
@@ -791,19 +783,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
791 783
792 if (PageWriteback(page)) { 784 if (PageWriteback(page)) {
793 nr_writeback++; 785 nr_writeback++;
794 /* 786 unlock_page(page);
795 * Synchronous reclaim cannot queue pages for 787 goto keep;
796 * writeback due to the possibility of stack overflow
797 * but if it encounters a page under writeback, wait
798 * for the IO to complete.
799 */
800 if ((sc->reclaim_mode & RECLAIM_MODE_SYNC) &&
801 may_enter_fs)
802 wait_on_page_writeback(page);
803 else {
804 unlock_page(page);
805 goto keep_reclaim_mode;
806 }
807 } 788 }
808 789
809 references = page_check_references(page, mz, sc); 790 references = page_check_references(page, mz, sc);
@@ -886,7 +867,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
886 goto activate_locked; 867 goto activate_locked;
887 case PAGE_SUCCESS: 868 case PAGE_SUCCESS:
888 if (PageWriteback(page)) 869 if (PageWriteback(page))
889 goto keep_reclaim_mode; 870 goto keep;
890 if (PageDirty(page)) 871 if (PageDirty(page))
891 goto keep; 872 goto keep;
892 873
@@ -985,8 +966,6 @@ activate_locked:
985keep_locked: 966keep_locked:
986 unlock_page(page); 967 unlock_page(page);
987keep: 968keep:
988 reset_reclaim_mode(sc);
989keep_reclaim_mode:
990 list_add(&page->lru, &ret_pages); 969 list_add(&page->lru, &ret_pages);
991 VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); 970 VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
992 } 971 }
@@ -1342,47 +1321,6 @@ update_isolated_counts(struct mem_cgroup_zone *mz,
1342} 1321}
1343 1322
1344/* 1323/*
1345 * Returns true if a direct reclaim should wait on pages under writeback.
1346 *
1347 * If we are direct reclaiming for contiguous pages and we do not reclaim
1348 * everything in the list, try again and wait for writeback IO to complete.
1349 * This will stall high-order allocations noticeably. Only do that when really
1350 * need to free the pages under high memory pressure.
1351 */
1352static inline bool should_reclaim_stall(unsigned long nr_taken,
1353 unsigned long nr_freed,
1354 int priority,
1355 struct scan_control *sc)
1356{
1357 int stall_priority;
1358
1359 /* kswapd should not stall on sync IO */
1360 if (current_is_kswapd())
1361 return false;
1362
1363 /* Only stall for memory compaction */
1364 if (sc->reclaim_mode & RECLAIM_MODE_SINGLE)
1365 return false;
1366
1367 /* If we have reclaimed everything on the isolated list, no stall */
1368 if (nr_freed == nr_taken)
1369 return false;
1370
1371 /*
1372 * For high-order allocations, there are two stall thresholds.
1373 * High-cost allocations stall immediately where as lower
1374 * order allocations such as stacks require the scanning
1375 * priority to be much higher before stalling.
1376 */
1377 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
1378 stall_priority = DEF_PRIORITY;
1379 else
1380 stall_priority = DEF_PRIORITY / 3;
1381
1382 return priority <= stall_priority;
1383}
1384
1385/*
1386 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number 1324 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number
1387 * of reclaimed pages 1325 * of reclaimed pages
1388 */ 1326 */
@@ -1410,7 +1348,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
1410 return SWAP_CLUSTER_MAX; 1348 return SWAP_CLUSTER_MAX;
1411 } 1349 }
1412 1350
1413 set_reclaim_mode(priority, sc, false); 1351 set_reclaim_mode(priority, sc);
1414 1352
1415 lru_add_drain(); 1353 lru_add_drain();
1416 1354
@@ -1442,13 +1380,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
1442 nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority, 1380 nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority,
1443 &nr_dirty, &nr_writeback); 1381 &nr_dirty, &nr_writeback);
1444 1382
1445 /* Check if we should syncronously wait for writeback */
1446 if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
1447 set_reclaim_mode(priority, sc, true);
1448 nr_reclaimed += shrink_page_list(&page_list, mz, sc,
1449 priority, &nr_dirty, &nr_writeback);
1450 }
1451
1452 spin_lock_irq(&zone->lru_lock); 1383 spin_lock_irq(&zone->lru_lock);
1453 1384
1454 reclaim_stat->recent_scanned[0] += nr_anon; 1385 reclaim_stat->recent_scanned[0] += nr_anon;