aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2011-01-13 18:45:56 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:32:33 -0500
commit3e7d344970673c5334cf7b5bb27c8c0942b06126 (patch)
tree832ecb4da5fd27efa5a503df5b96bfdee2a52ffd /mm/vmscan.c
parentee64fc9354e515a79c7232cfde65c88ec627308b (diff)
mm: vmscan: reclaim order-0 and use compaction instead of lumpy reclaim
Lumpy reclaim is disruptive. It reclaims a large number of pages and ignores the age of the pages it reclaims. This can incur significant stalls and potentially increase the number of major faults. Compaction has reached the point where it is considered reasonably stable (meaning it has passed a lot of testing) and is a potential candidate for displacing lumpy reclaim. This patch introduces an alternative to lumpy reclaim whe compaction is available called reclaim/compaction. The basic operation is very simple - instead of selecting a contiguous range of pages to reclaim, a number of order-0 pages are reclaimed and then compaction is later by either kswapd (compact_zone_order()) or direct compaction (__alloc_pages_direct_compact()). [akpm@linux-foundation.org: fix build] [akpm@linux-foundation.org: use conventional task_struct naming] Signed-off-by: Mel Gorman <mel@csn.ul.ie> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Rik van Riel <riel@redhat.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Andy Whitcroft <apw@shadowen.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c102
1 files changed, 87 insertions, 15 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3464312bde07..10ebd74a423c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -32,6 +32,7 @@
32#include <linux/topology.h> 32#include <linux/topology.h>
33#include <linux/cpu.h> 33#include <linux/cpu.h>
34#include <linux/cpuset.h> 34#include <linux/cpuset.h>
35#include <linux/compaction.h>
35#include <linux/notifier.h> 36#include <linux/notifier.h>
36#include <linux/rwsem.h> 37#include <linux/rwsem.h>
37#include <linux/delay.h> 38#include <linux/delay.h>
@@ -59,12 +60,15 @@
59 * LUMPY_MODE_CONTIGRECLAIM: For high-order allocations, take a reference 60 * LUMPY_MODE_CONTIGRECLAIM: For high-order allocations, take a reference
60 * page from the LRU and reclaim all pages within a 61 * page from the LRU and reclaim all pages within a
61 * naturally aligned range 62 * naturally aligned range
63 * LUMPY_MODE_COMPACTION: For high-order allocations, reclaim a number of
64 * order-0 pages and then compact the zone
62 */ 65 */
63typedef unsigned __bitwise__ lumpy_mode; 66typedef unsigned __bitwise__ lumpy_mode;
64#define LUMPY_MODE_SINGLE ((__force lumpy_mode)0x01u) 67#define LUMPY_MODE_SINGLE ((__force lumpy_mode)0x01u)
65#define LUMPY_MODE_ASYNC ((__force lumpy_mode)0x02u) 68#define LUMPY_MODE_ASYNC ((__force lumpy_mode)0x02u)
66#define LUMPY_MODE_SYNC ((__force lumpy_mode)0x04u) 69#define LUMPY_MODE_SYNC ((__force lumpy_mode)0x04u)
67#define LUMPY_MODE_CONTIGRECLAIM ((__force lumpy_mode)0x08u) 70#define LUMPY_MODE_CONTIGRECLAIM ((__force lumpy_mode)0x08u)
71#define LUMPY_MODE_COMPACTION ((__force lumpy_mode)0x10u)
68 72
69struct scan_control { 73struct scan_control {
70 /* Incremented by the number of inactive pages that were scanned */ 74 /* Incremented by the number of inactive pages that were scanned */
@@ -286,18 +290,20 @@ static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc,
286 lumpy_mode syncmode = sync ? LUMPY_MODE_SYNC : LUMPY_MODE_ASYNC; 290 lumpy_mode syncmode = sync ? LUMPY_MODE_SYNC : LUMPY_MODE_ASYNC;
287 291
288 /* 292 /*
289 * Some reclaim have alredy been failed. No worth to try synchronous 293 * Initially assume we are entering either lumpy reclaim or
290 * lumpy reclaim. 294 * reclaim/compaction.Depending on the order, we will either set the
295 * sync mode or just reclaim order-0 pages later.
291 */ 296 */
292 if (sync && sc->lumpy_reclaim_mode & LUMPY_MODE_SINGLE) 297 if (COMPACTION_BUILD)
293 return; 298 sc->lumpy_reclaim_mode = LUMPY_MODE_COMPACTION;
299 else
300 sc->lumpy_reclaim_mode = LUMPY_MODE_CONTIGRECLAIM;
294 301
295 /* 302 /*
296 * If we need a large contiguous chunk of memory, or have 303 * Avoid using lumpy reclaim or reclaim/compaction if possible by
297 * trouble getting a small set of contiguous pages, we 304 * restricting when its set to either costly allocations or when
298 * will reclaim both active and inactive pages. 305 * under memory pressure
299 */ 306 */
300 sc->lumpy_reclaim_mode = LUMPY_MODE_CONTIGRECLAIM;
301 if (sc->order > PAGE_ALLOC_COSTLY_ORDER) 307 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
302 sc->lumpy_reclaim_mode |= syncmode; 308 sc->lumpy_reclaim_mode |= syncmode;
303 else if (sc->order && priority < DEF_PRIORITY - 2) 309 else if (sc->order && priority < DEF_PRIORITY - 2)
@@ -1385,8 +1391,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1385 if (scanning_global_lru(sc)) { 1391 if (scanning_global_lru(sc)) {
1386 nr_taken = isolate_pages_global(nr_to_scan, 1392 nr_taken = isolate_pages_global(nr_to_scan,
1387 &page_list, &nr_scanned, sc->order, 1393 &page_list, &nr_scanned, sc->order,
1388 sc->lumpy_reclaim_mode & LUMPY_MODE_SINGLE ? 1394 sc->lumpy_reclaim_mode & LUMPY_MODE_CONTIGRECLAIM ?
1389 ISOLATE_INACTIVE : ISOLATE_BOTH, 1395 ISOLATE_BOTH : ISOLATE_INACTIVE,
1390 zone, 0, file); 1396 zone, 0, file);
1391 zone->pages_scanned += nr_scanned; 1397 zone->pages_scanned += nr_scanned;
1392 if (current_is_kswapd()) 1398 if (current_is_kswapd())
@@ -1398,8 +1404,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1398 } else { 1404 } else {
1399 nr_taken = mem_cgroup_isolate_pages(nr_to_scan, 1405 nr_taken = mem_cgroup_isolate_pages(nr_to_scan,
1400 &page_list, &nr_scanned, sc->order, 1406 &page_list, &nr_scanned, sc->order,
1401 sc->lumpy_reclaim_mode & LUMPY_MODE_SINGLE ? 1407 sc->lumpy_reclaim_mode & LUMPY_MODE_CONTIGRECLAIM ?
1402 ISOLATE_INACTIVE : ISOLATE_BOTH, 1408 ISOLATE_BOTH : ISOLATE_INACTIVE,
1403 zone, sc->mem_cgroup, 1409 zone, sc->mem_cgroup,
1404 0, file); 1410 0, file);
1405 /* 1411 /*
@@ -1815,6 +1821,57 @@ out:
1815} 1821}
1816 1822
1817/* 1823/*
1824 * Reclaim/compaction depends on a number of pages being freed. To avoid
1825 * disruption to the system, a small number of order-0 pages continue to be
1826 * rotated and reclaimed in the normal fashion. However, by the time we get
1827 * back to the allocator and call try_to_compact_zone(), we ensure that
1828 * there are enough free pages for it to be likely successful
1829 */
1830static inline bool should_continue_reclaim(struct zone *zone,
1831 unsigned long nr_reclaimed,
1832 unsigned long nr_scanned,
1833 struct scan_control *sc)
1834{
1835 unsigned long pages_for_compaction;
1836 unsigned long inactive_lru_pages;
1837
1838 /* If not in reclaim/compaction mode, stop */
1839 if (!(sc->lumpy_reclaim_mode & LUMPY_MODE_COMPACTION))
1840 return false;
1841
1842 /*
1843 * If we failed to reclaim and have scanned the full list, stop.
1844 * NOTE: Checking just nr_reclaimed would exit reclaim/compaction far
1845 * faster but obviously would be less likely to succeed
1846 * allocation. If this is desirable, use GFP_REPEAT to decide
1847 * if both reclaimed and scanned should be checked or just
1848 * reclaimed
1849 */
1850 if (!nr_reclaimed && !nr_scanned)
1851 return false;
1852
1853 /*
1854 * If we have not reclaimed enough pages for compaction and the
1855 * inactive lists are large enough, continue reclaiming
1856 */
1857 pages_for_compaction = (2UL << sc->order);
1858 inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON) +
1859 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
1860 if (sc->nr_reclaimed < pages_for_compaction &&
1861 inactive_lru_pages > pages_for_compaction)
1862 return true;
1863
1864 /* If compaction would go ahead or the allocation would succeed, stop */
1865 switch (compaction_suitable(zone, sc->order)) {
1866 case COMPACT_PARTIAL:
1867 case COMPACT_CONTINUE:
1868 return false;
1869 default:
1870 return true;
1871 }
1872}
1873
1874/*
1818 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. 1875 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
1819 */ 1876 */
1820static void shrink_zone(int priority, struct zone *zone, 1877static void shrink_zone(int priority, struct zone *zone,
@@ -1823,9 +1880,12 @@ static void shrink_zone(int priority, struct zone *zone,
1823 unsigned long nr[NR_LRU_LISTS]; 1880 unsigned long nr[NR_LRU_LISTS];
1824 unsigned long nr_to_scan; 1881 unsigned long nr_to_scan;
1825 enum lru_list l; 1882 enum lru_list l;
1826 unsigned long nr_reclaimed = sc->nr_reclaimed; 1883 unsigned long nr_reclaimed;
1827 unsigned long nr_to_reclaim = sc->nr_to_reclaim; 1884 unsigned long nr_to_reclaim = sc->nr_to_reclaim;
1885 unsigned long nr_scanned = sc->nr_scanned;
1828 1886
1887restart:
1888 nr_reclaimed = 0;
1829 get_scan_count(zone, sc, nr, priority); 1889 get_scan_count(zone, sc, nr, priority);
1830 1890
1831 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || 1891 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -1851,8 +1911,7 @@ static void shrink_zone(int priority, struct zone *zone,
1851 if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) 1911 if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY)
1852 break; 1912 break;
1853 } 1913 }
1854 1914 sc->nr_reclaimed += nr_reclaimed;
1855 sc->nr_reclaimed = nr_reclaimed;
1856 1915
1857 /* 1916 /*
1858 * Even if we did not try to evict anon pages at all, we want to 1917 * Even if we did not try to evict anon pages at all, we want to
@@ -1861,6 +1920,11 @@ static void shrink_zone(int priority, struct zone *zone,
1861 if (inactive_anon_is_low(zone, sc)) 1920 if (inactive_anon_is_low(zone, sc))
1862 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); 1921 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
1863 1922
1923 /* reclaim/compaction might need reclaim to continue */
1924 if (should_continue_reclaim(zone, nr_reclaimed,
1925 sc->nr_scanned - nr_scanned, sc))
1926 goto restart;
1927
1864 throttle_vm_writeout(sc->gfp_mask); 1928 throttle_vm_writeout(sc->gfp_mask);
1865} 1929}
1866 1930
@@ -2307,6 +2371,14 @@ loop_again:
2307 total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) 2371 total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
2308 sc.may_writepage = 1; 2372 sc.may_writepage = 1;
2309 2373
2374 /*
2375 * Compact the zone for higher orders to reduce
2376 * latencies for higher-order allocations that
2377 * would ordinarily call try_to_compact_pages()
2378 */
2379 if (sc.order > PAGE_ALLOC_COSTLY_ORDER)
2380 compact_zone_order(zone, sc.order, sc.gfp_mask);
2381
2310 if (!zone_watermark_ok_safe(zone, order, 2382 if (!zone_watermark_ok_safe(zone, order,
2311 high_wmark_pages(zone), end_zone, 0)) { 2383 high_wmark_pages(zone), end_zone, 0)) {
2312 all_zones_ok = 0; 2384 all_zones_ok = 0;