diff options
-rw-r--r-- | Documentation/sysctl/vm.txt | 38 | ||||
-rw-r--r-- | mm/vmscan.c | 9 |
2 files changed, 37 insertions, 10 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 44518c023949..4bca2a3d9174 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -127,17 +127,39 @@ the high water marks for each per cpu page list. | |||
127 | 127 | ||
128 | zone_reclaim_mode: | 128 | zone_reclaim_mode: |
129 | 129 | ||
130 | This is set during bootup to 1 if it is determined that pages from | 130 | Zone_reclaim_mode allows to set more or less agressive approaches to |
131 | remote zones will cause a significant performance reduction. The | 131 | reclaim memory when a zone runs out of memory. If it is set to zero then no |
132 | zone reclaim occurs. Allocations will be satisfied from other zones / nodes | ||
133 | in the system. | ||
134 | |||
135 | This is value ORed together of | ||
136 | |||
137 | 1 = Zone reclaim on | ||
138 | 2 = Zone reclaim writes dirty pages out | ||
139 | 4 = Zone reclaim swaps pages | ||
140 | |||
141 | zone_reclaim_mode is set during bootup to 1 if it is determined that pages | ||
142 | from remote zones will cause a measurable performance reduction. The | ||
132 | page allocator will then reclaim easily reusable pages (those page | 143 | page allocator will then reclaim easily reusable pages (those page |
133 | cache pages that are currently not used) before going off node. | 144 | cache pages that are currently not used) before allocating off node pages. |
145 | |||
146 | It may be beneficial to switch off zone reclaim if the system is | ||
147 | used for a file server and all of memory should be used for caching files | ||
148 | from disk. In that case the caching effect is more important than | ||
149 | data locality. | ||
150 | |||
151 | Allowing zone reclaim to write out pages stops processes that are | ||
152 | writing large amounts of data from dirtying pages on other nodes. Zone | ||
153 | reclaim will write out dirty pages if a zone fills up and so effectively | ||
154 | throttle the process. This may decrease the performance of a single process | ||
155 | since it cannot use all of system memory to buffer the outgoing writes | ||
156 | anymore but it preserve the memory on other nodes so that the performance | ||
157 | of other processes running on other nodes will not be affected. | ||
134 | 158 | ||
135 | The user can override this setting. It may be beneficial to switch | 159 | Allowing regular swap effectively restricts allocations to the local |
136 | off zone reclaim if the system is used for a file server and all | 160 | node unless explicitly overridden by memory policies or cpuset |
137 | of memory should be used for caching files from disk. | 161 | configurations. |
138 | 162 | ||
139 | It may be beneficial to switch this on if one wants to do zone | ||
140 | reclaim regardless of the numa distances in the system. | ||
141 | ================================================================ | 163 | ================================================================ |
142 | 164 | ||
143 | zone_reclaim_interval: | 165 | zone_reclaim_interval: |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 8760a4abfa1f..9e2ef3624d77 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1592,6 +1592,11 @@ module_init(kswapd_init) | |||
1592 | */ | 1592 | */ |
1593 | int zone_reclaim_mode __read_mostly; | 1593 | int zone_reclaim_mode __read_mostly; |
1594 | 1594 | ||
1595 | #define RECLAIM_OFF 0 | ||
1596 | #define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */ | ||
1597 | #define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ | ||
1598 | #define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */ | ||
1599 | |||
1595 | /* | 1600 | /* |
1596 | * Mininum time between zone reclaim scans | 1601 | * Mininum time between zone reclaim scans |
1597 | */ | 1602 | */ |
@@ -1630,8 +1635,8 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1630 | if (!cpus_empty(mask) && node_id != numa_node_id()) | 1635 | if (!cpus_empty(mask) && node_id != numa_node_id()) |
1631 | return 0; | 1636 | return 0; |
1632 | 1637 | ||
1633 | sc.may_writepage = 0; | 1638 | sc.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE); |
1634 | sc.may_swap = 0; | 1639 | sc.may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP); |
1635 | sc.nr_scanned = 0; | 1640 | sc.nr_scanned = 0; |
1636 | sc.nr_reclaimed = 0; | 1641 | sc.nr_reclaimed = 0; |
1637 | sc.priority = ZONE_RECLAIM_PRIORITY + 1; | 1642 | sc.priority = ZONE_RECLAIM_PRIORITY + 1; |