diff options
author | Mel Gorman <mel@csn.ul.ie> | 2011-01-13 18:45:56 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 20:32:33 -0500 |
commit | 3e7d344970673c5334cf7b5bb27c8c0942b06126 (patch) | |
tree | 832ecb4da5fd27efa5a503df5b96bfdee2a52ffd /mm/vmscan.c | |
parent | ee64fc9354e515a79c7232cfde65c88ec627308b (diff) |
mm: vmscan: reclaim order-0 and use compaction instead of lumpy reclaim
Lumpy reclaim is disruptive. It reclaims a large number of pages and
ignores the age of the pages it reclaims. This can incur significant
stalls and potentially increase the number of major faults.
Compaction has reached the point where it is considered reasonably stable
(meaning it has passed a lot of testing) and is a potential candidate for
displacing lumpy reclaim. This patch introduces an alternative to lumpy
reclaim whe compaction is available called reclaim/compaction. The basic
operation is very simple - instead of selecting a contiguous range of
pages to reclaim, a number of order-0 pages are reclaimed and then
compaction is later by either kswapd (compact_zone_order()) or direct
compaction (__alloc_pages_direct_compact()).
[akpm@linux-foundation.org: fix build]
[akpm@linux-foundation.org: use conventional task_struct naming]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 102 |
1 files changed, 87 insertions, 15 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 3464312bde07..10ebd74a423c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/topology.h> | 32 | #include <linux/topology.h> |
33 | #include <linux/cpu.h> | 33 | #include <linux/cpu.h> |
34 | #include <linux/cpuset.h> | 34 | #include <linux/cpuset.h> |
35 | #include <linux/compaction.h> | ||
35 | #include <linux/notifier.h> | 36 | #include <linux/notifier.h> |
36 | #include <linux/rwsem.h> | 37 | #include <linux/rwsem.h> |
37 | #include <linux/delay.h> | 38 | #include <linux/delay.h> |
@@ -59,12 +60,15 @@ | |||
59 | * LUMPY_MODE_CONTIGRECLAIM: For high-order allocations, take a reference | 60 | * LUMPY_MODE_CONTIGRECLAIM: For high-order allocations, take a reference |
60 | * page from the LRU and reclaim all pages within a | 61 | * page from the LRU and reclaim all pages within a |
61 | * naturally aligned range | 62 | * naturally aligned range |
63 | * LUMPY_MODE_COMPACTION: For high-order allocations, reclaim a number of | ||
64 | * order-0 pages and then compact the zone | ||
62 | */ | 65 | */ |
63 | typedef unsigned __bitwise__ lumpy_mode; | 66 | typedef unsigned __bitwise__ lumpy_mode; |
64 | #define LUMPY_MODE_SINGLE ((__force lumpy_mode)0x01u) | 67 | #define LUMPY_MODE_SINGLE ((__force lumpy_mode)0x01u) |
65 | #define LUMPY_MODE_ASYNC ((__force lumpy_mode)0x02u) | 68 | #define LUMPY_MODE_ASYNC ((__force lumpy_mode)0x02u) |
66 | #define LUMPY_MODE_SYNC ((__force lumpy_mode)0x04u) | 69 | #define LUMPY_MODE_SYNC ((__force lumpy_mode)0x04u) |
67 | #define LUMPY_MODE_CONTIGRECLAIM ((__force lumpy_mode)0x08u) | 70 | #define LUMPY_MODE_CONTIGRECLAIM ((__force lumpy_mode)0x08u) |
71 | #define LUMPY_MODE_COMPACTION ((__force lumpy_mode)0x10u) | ||
68 | 72 | ||
69 | struct scan_control { | 73 | struct scan_control { |
70 | /* Incremented by the number of inactive pages that were scanned */ | 74 | /* Incremented by the number of inactive pages that were scanned */ |
@@ -286,18 +290,20 @@ static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc, | |||
286 | lumpy_mode syncmode = sync ? LUMPY_MODE_SYNC : LUMPY_MODE_ASYNC; | 290 | lumpy_mode syncmode = sync ? LUMPY_MODE_SYNC : LUMPY_MODE_ASYNC; |
287 | 291 | ||
288 | /* | 292 | /* |
289 | * Some reclaim have alredy been failed. No worth to try synchronous | 293 | * Initially assume we are entering either lumpy reclaim or |
290 | * lumpy reclaim. | 294 | * reclaim/compaction.Depending on the order, we will either set the |
295 | * sync mode or just reclaim order-0 pages later. | ||
291 | */ | 296 | */ |
292 | if (sync && sc->lumpy_reclaim_mode & LUMPY_MODE_SINGLE) | 297 | if (COMPACTION_BUILD) |
293 | return; | 298 | sc->lumpy_reclaim_mode = LUMPY_MODE_COMPACTION; |
299 | else | ||
300 | sc->lumpy_reclaim_mode = LUMPY_MODE_CONTIGRECLAIM; | ||
294 | 301 | ||
295 | /* | 302 | /* |
296 | * If we need a large contiguous chunk of memory, or have | 303 | * Avoid using lumpy reclaim or reclaim/compaction if possible by |
297 | * trouble getting a small set of contiguous pages, we | 304 | * restricting when its set to either costly allocations or when |
298 | * will reclaim both active and inactive pages. | 305 | * under memory pressure |
299 | */ | 306 | */ |
300 | sc->lumpy_reclaim_mode = LUMPY_MODE_CONTIGRECLAIM; | ||
301 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | 307 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) |
302 | sc->lumpy_reclaim_mode |= syncmode; | 308 | sc->lumpy_reclaim_mode |= syncmode; |
303 | else if (sc->order && priority < DEF_PRIORITY - 2) | 309 | else if (sc->order && priority < DEF_PRIORITY - 2) |
@@ -1385,8 +1391,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1385 | if (scanning_global_lru(sc)) { | 1391 | if (scanning_global_lru(sc)) { |
1386 | nr_taken = isolate_pages_global(nr_to_scan, | 1392 | nr_taken = isolate_pages_global(nr_to_scan, |
1387 | &page_list, &nr_scanned, sc->order, | 1393 | &page_list, &nr_scanned, sc->order, |
1388 | sc->lumpy_reclaim_mode & LUMPY_MODE_SINGLE ? | 1394 | sc->lumpy_reclaim_mode & LUMPY_MODE_CONTIGRECLAIM ? |
1389 | ISOLATE_INACTIVE : ISOLATE_BOTH, | 1395 | ISOLATE_BOTH : ISOLATE_INACTIVE, |
1390 | zone, 0, file); | 1396 | zone, 0, file); |
1391 | zone->pages_scanned += nr_scanned; | 1397 | zone->pages_scanned += nr_scanned; |
1392 | if (current_is_kswapd()) | 1398 | if (current_is_kswapd()) |
@@ -1398,8 +1404,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1398 | } else { | 1404 | } else { |
1399 | nr_taken = mem_cgroup_isolate_pages(nr_to_scan, | 1405 | nr_taken = mem_cgroup_isolate_pages(nr_to_scan, |
1400 | &page_list, &nr_scanned, sc->order, | 1406 | &page_list, &nr_scanned, sc->order, |
1401 | sc->lumpy_reclaim_mode & LUMPY_MODE_SINGLE ? | 1407 | sc->lumpy_reclaim_mode & LUMPY_MODE_CONTIGRECLAIM ? |
1402 | ISOLATE_INACTIVE : ISOLATE_BOTH, | 1408 | ISOLATE_BOTH : ISOLATE_INACTIVE, |
1403 | zone, sc->mem_cgroup, | 1409 | zone, sc->mem_cgroup, |
1404 | 0, file); | 1410 | 0, file); |
1405 | /* | 1411 | /* |
@@ -1815,6 +1821,57 @@ out: | |||
1815 | } | 1821 | } |
1816 | 1822 | ||
1817 | /* | 1823 | /* |
1824 | * Reclaim/compaction depends on a number of pages being freed. To avoid | ||
1825 | * disruption to the system, a small number of order-0 pages continue to be | ||
1826 | * rotated and reclaimed in the normal fashion. However, by the time we get | ||
1827 | * back to the allocator and call try_to_compact_zone(), we ensure that | ||
1828 | * there are enough free pages for it to be likely successful | ||
1829 | */ | ||
1830 | static inline bool should_continue_reclaim(struct zone *zone, | ||
1831 | unsigned long nr_reclaimed, | ||
1832 | unsigned long nr_scanned, | ||
1833 | struct scan_control *sc) | ||
1834 | { | ||
1835 | unsigned long pages_for_compaction; | ||
1836 | unsigned long inactive_lru_pages; | ||
1837 | |||
1838 | /* If not in reclaim/compaction mode, stop */ | ||
1839 | if (!(sc->lumpy_reclaim_mode & LUMPY_MODE_COMPACTION)) | ||
1840 | return false; | ||
1841 | |||
1842 | /* | ||
1843 | * If we failed to reclaim and have scanned the full list, stop. | ||
1844 | * NOTE: Checking just nr_reclaimed would exit reclaim/compaction far | ||
1845 | * faster but obviously would be less likely to succeed | ||
1846 | * allocation. If this is desirable, use GFP_REPEAT to decide | ||
1847 | * if both reclaimed and scanned should be checked or just | ||
1848 | * reclaimed | ||
1849 | */ | ||
1850 | if (!nr_reclaimed && !nr_scanned) | ||
1851 | return false; | ||
1852 | |||
1853 | /* | ||
1854 | * If we have not reclaimed enough pages for compaction and the | ||
1855 | * inactive lists are large enough, continue reclaiming | ||
1856 | */ | ||
1857 | pages_for_compaction = (2UL << sc->order); | ||
1858 | inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON) + | ||
1859 | zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); | ||
1860 | if (sc->nr_reclaimed < pages_for_compaction && | ||
1861 | inactive_lru_pages > pages_for_compaction) | ||
1862 | return true; | ||
1863 | |||
1864 | /* If compaction would go ahead or the allocation would succeed, stop */ | ||
1865 | switch (compaction_suitable(zone, sc->order)) { | ||
1866 | case COMPACT_PARTIAL: | ||
1867 | case COMPACT_CONTINUE: | ||
1868 | return false; | ||
1869 | default: | ||
1870 | return true; | ||
1871 | } | ||
1872 | } | ||
1873 | |||
1874 | /* | ||
1818 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 1875 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. |
1819 | */ | 1876 | */ |
1820 | static void shrink_zone(int priority, struct zone *zone, | 1877 | static void shrink_zone(int priority, struct zone *zone, |
@@ -1823,9 +1880,12 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1823 | unsigned long nr[NR_LRU_LISTS]; | 1880 | unsigned long nr[NR_LRU_LISTS]; |
1824 | unsigned long nr_to_scan; | 1881 | unsigned long nr_to_scan; |
1825 | enum lru_list l; | 1882 | enum lru_list l; |
1826 | unsigned long nr_reclaimed = sc->nr_reclaimed; | 1883 | unsigned long nr_reclaimed; |
1827 | unsigned long nr_to_reclaim = sc->nr_to_reclaim; | 1884 | unsigned long nr_to_reclaim = sc->nr_to_reclaim; |
1885 | unsigned long nr_scanned = sc->nr_scanned; | ||
1828 | 1886 | ||
1887 | restart: | ||
1888 | nr_reclaimed = 0; | ||
1829 | get_scan_count(zone, sc, nr, priority); | 1889 | get_scan_count(zone, sc, nr, priority); |
1830 | 1890 | ||
1831 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 1891 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
@@ -1851,8 +1911,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1851 | if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) | 1911 | if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) |
1852 | break; | 1912 | break; |
1853 | } | 1913 | } |
1854 | 1914 | sc->nr_reclaimed += nr_reclaimed; | |
1855 | sc->nr_reclaimed = nr_reclaimed; | ||
1856 | 1915 | ||
1857 | /* | 1916 | /* |
1858 | * Even if we did not try to evict anon pages at all, we want to | 1917 | * Even if we did not try to evict anon pages at all, we want to |
@@ -1861,6 +1920,11 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1861 | if (inactive_anon_is_low(zone, sc)) | 1920 | if (inactive_anon_is_low(zone, sc)) |
1862 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | 1921 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); |
1863 | 1922 | ||
1923 | /* reclaim/compaction might need reclaim to continue */ | ||
1924 | if (should_continue_reclaim(zone, nr_reclaimed, | ||
1925 | sc->nr_scanned - nr_scanned, sc)) | ||
1926 | goto restart; | ||
1927 | |||
1864 | throttle_vm_writeout(sc->gfp_mask); | 1928 | throttle_vm_writeout(sc->gfp_mask); |
1865 | } | 1929 | } |
1866 | 1930 | ||
@@ -2307,6 +2371,14 @@ loop_again: | |||
2307 | total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) | 2371 | total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) |
2308 | sc.may_writepage = 1; | 2372 | sc.may_writepage = 1; |
2309 | 2373 | ||
2374 | /* | ||
2375 | * Compact the zone for higher orders to reduce | ||
2376 | * latencies for higher-order allocations that | ||
2377 | * would ordinarily call try_to_compact_pages() | ||
2378 | */ | ||
2379 | if (sc.order > PAGE_ALLOC_COSTLY_ORDER) | ||
2380 | compact_zone_order(zone, sc.order, sc.gfp_mask); | ||
2381 | |||
2310 | if (!zone_watermark_ok_safe(zone, order, | 2382 | if (!zone_watermark_ok_safe(zone, order, |
2311 | high_wmark_pages(zone), end_zone, 0)) { | 2383 | high_wmark_pages(zone), end_zone, 0)) { |
2312 | all_zones_ok = 0; | 2384 | all_zones_ok = 0; |