aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/sysctl/vm.txt38
-rw-r--r--mm/vmscan.c9
2 files changed, 37 insertions, 10 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 44518c023949..4bca2a3d9174 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -127,17 +127,39 @@ the high water marks for each per cpu page list.
127 127
128zone_reclaim_mode: 128zone_reclaim_mode:
129 129
130This is set during bootup to 1 if it is determined that pages from 130Zone_reclaim_mode allows to set more or less agressive approaches to
131remote zones will cause a significant performance reduction. The 131reclaim memory when a zone runs out of memory. If it is set to zero then no
132zone reclaim occurs. Allocations will be satisfied from other zones / nodes
133in the system.
134
135This is value ORed together of
136
1371 = Zone reclaim on
1382 = Zone reclaim writes dirty pages out
1394 = Zone reclaim swaps pages
140
141zone_reclaim_mode is set during bootup to 1 if it is determined that pages
142from remote zones will cause a measurable performance reduction. The
132page allocator will then reclaim easily reusable pages (those page 143page allocator will then reclaim easily reusable pages (those page
133cache pages that are currently not used) before going off node. 144cache pages that are currently not used) before allocating off node pages.
145
146It may be beneficial to switch off zone reclaim if the system is
147used for a file server and all of memory should be used for caching files
148from disk. In that case the caching effect is more important than
149data locality.
150
151Allowing zone reclaim to write out pages stops processes that are
152writing large amounts of data from dirtying pages on other nodes. Zone
153reclaim will write out dirty pages if a zone fills up and so effectively
154throttle the process. This may decrease the performance of a single process
155since it cannot use all of system memory to buffer the outgoing writes
156anymore but it preserve the memory on other nodes so that the performance
157of other processes running on other nodes will not be affected.
134 158
135The user can override this setting. It may be beneficial to switch 159Allowing regular swap effectively restricts allocations to the local
136off zone reclaim if the system is used for a file server and all 160node unless explicitly overridden by memory policies or cpuset
137of memory should be used for caching files from disk. 161configurations.
138 162
139It may be beneficial to switch this on if one wants to do zone
140reclaim regardless of the numa distances in the system.
141================================================================ 163================================================================
142 164
143zone_reclaim_interval: 165zone_reclaim_interval:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8760a4abfa1f..9e2ef3624d77 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1592,6 +1592,11 @@ module_init(kswapd_init)
1592 */ 1592 */
1593int zone_reclaim_mode __read_mostly; 1593int zone_reclaim_mode __read_mostly;
1594 1594
1595#define RECLAIM_OFF 0
1596#define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */
1597#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
1598#define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */
1599
1595/* 1600/*
1596 * Mininum time between zone reclaim scans 1601 * Mininum time between zone reclaim scans
1597 */ 1602 */
@@ -1630,8 +1635,8 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1630 if (!cpus_empty(mask) && node_id != numa_node_id()) 1635 if (!cpus_empty(mask) && node_id != numa_node_id())
1631 return 0; 1636 return 0;
1632 1637
1633 sc.may_writepage = 0; 1638 sc.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE);
1634 sc.may_swap = 0; 1639 sc.may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP);
1635 sc.nr_scanned = 0; 1640 sc.nr_scanned = 0;
1636 sc.nr_reclaimed = 0; 1641 sc.nr_reclaimed = 0;
1637 sc.priority = ZONE_RECLAIM_PRIORITY + 1; 1642 sc.priority = ZONE_RECLAIM_PRIORITY + 1;