diff options
-rw-r--r-- | Documentation/sysctl/vm.txt | 14 | ||||
-rw-r--r-- | include/linux/mmzone.h | 6 | ||||
-rw-r--r-- | include/linux/swap.h | 1 | ||||
-rw-r--r-- | include/linux/sysctl.h | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 11 | ||||
-rw-r--r-- | mm/page_alloc.c | 22 | ||||
-rw-r--r-- | mm/vmscan.c | 27 |
7 files changed, 69 insertions, 14 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 86754eb390da..7cee90223d3a 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -28,6 +28,7 @@ Currently, these files are in /proc/sys/vm: | |||
28 | - block_dump | 28 | - block_dump |
29 | - drop-caches | 29 | - drop-caches |
30 | - zone_reclaim_mode | 30 | - zone_reclaim_mode |
31 | - min_unmapped_ratio | ||
31 | - panic_on_oom | 32 | - panic_on_oom |
32 | 33 | ||
33 | ============================================================== | 34 | ============================================================== |
@@ -168,6 +169,19 @@ in all nodes of the system. | |||
168 | 169 | ||
169 | ============================================================= | 170 | ============================================================= |
170 | 171 | ||
172 | min_unmapped_ratio: | ||
173 | |||
174 | This is available only on NUMA kernels. | ||
175 | |||
176 | A percentage of the file backed pages in each zone. Zone reclaim will only | ||
177 | occur if more than this percentage of pages are file backed and unmapped. | ||
178 | This is to insure that a minimal amount of local pages is still available for | ||
179 | file I/O even if the node is overallocated. | ||
180 | |||
181 | The default is 1 percent. | ||
182 | |||
183 | ============================================================= | ||
184 | |||
171 | panic_on_oom | 185 | panic_on_oom |
172 | 186 | ||
173 | This enables or disables panic on out-of-memory feature. If this is set to 1, | 187 | This enables or disables panic on out-of-memory feature. If this is set to 1, |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 27e748eb72b0..656b588a9f96 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -150,6 +150,10 @@ struct zone { | |||
150 | unsigned long lowmem_reserve[MAX_NR_ZONES]; | 150 | unsigned long lowmem_reserve[MAX_NR_ZONES]; |
151 | 151 | ||
152 | #ifdef CONFIG_NUMA | 152 | #ifdef CONFIG_NUMA |
153 | /* | ||
154 | * zone reclaim becomes active if more unmapped pages exist. | ||
155 | */ | ||
156 | unsigned long min_unmapped_ratio; | ||
153 | struct per_cpu_pageset *pageset[NR_CPUS]; | 157 | struct per_cpu_pageset *pageset[NR_CPUS]; |
154 | #else | 158 | #else |
155 | struct per_cpu_pageset pageset[NR_CPUS]; | 159 | struct per_cpu_pageset pageset[NR_CPUS]; |
@@ -414,6 +418,8 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, | |||
414 | void __user *, size_t *, loff_t *); | 418 | void __user *, size_t *, loff_t *); |
415 | int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, | 419 | int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, |
416 | void __user *, size_t *, loff_t *); | 420 | void __user *, size_t *, loff_t *); |
421 | int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, | ||
422 | struct file *, void __user *, size_t *, loff_t *); | ||
417 | 423 | ||
418 | #include <linux/topology.h> | 424 | #include <linux/topology.h> |
419 | /* Returns the number of the current Node. */ | 425 | /* Returns the number of the current Node. */ |
diff --git a/include/linux/swap.h b/include/linux/swap.h index cf6ca6e377bd..5e59184c9096 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -189,6 +189,7 @@ extern long vm_total_pages; | |||
189 | 189 | ||
190 | #ifdef CONFIG_NUMA | 190 | #ifdef CONFIG_NUMA |
191 | extern int zone_reclaim_mode; | 191 | extern int zone_reclaim_mode; |
192 | extern int sysctl_min_unmapped_ratio; | ||
192 | extern int zone_reclaim(struct zone *, gfp_t, unsigned int); | 193 | extern int zone_reclaim(struct zone *, gfp_t, unsigned int); |
193 | #else | 194 | #else |
194 | #define zone_reclaim_mode 0 | 195 | #define zone_reclaim_mode 0 |
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 46e4d8f2771f..e4b1a4d4dcf3 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h | |||
@@ -188,7 +188,7 @@ enum | |||
188 | VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ | 188 | VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ |
189 | VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ | 189 | VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ |
190 | VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ | 190 | VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ |
191 | VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ | 191 | VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */ |
192 | VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ | 192 | VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ |
193 | VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ | 193 | VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ |
194 | }; | 194 | }; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 99a58f279077..362a0cc37138 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -932,6 +932,17 @@ static ctl_table vm_table[] = { | |||
932 | .strategy = &sysctl_intvec, | 932 | .strategy = &sysctl_intvec, |
933 | .extra1 = &zero, | 933 | .extra1 = &zero, |
934 | }, | 934 | }, |
935 | { | ||
936 | .ctl_name = VM_MIN_UNMAPPED, | ||
937 | .procname = "min_unmapped_ratio", | ||
938 | .data = &sysctl_min_unmapped_ratio, | ||
939 | .maxlen = sizeof(sysctl_min_unmapped_ratio), | ||
940 | .mode = 0644, | ||
941 | .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler, | ||
942 | .strategy = &sysctl_intvec, | ||
943 | .extra1 = &zero, | ||
944 | .extra2 = &one_hundred, | ||
945 | }, | ||
935 | #endif | 946 | #endif |
936 | #ifdef CONFIG_X86_32 | 947 | #ifdef CONFIG_X86_32 |
937 | { | 948 | { |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3e792a583f3b..54a4f5375bba 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2005,6 +2005,10 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, | |||
2005 | 2005 | ||
2006 | zone->spanned_pages = size; | 2006 | zone->spanned_pages = size; |
2007 | zone->present_pages = realsize; | 2007 | zone->present_pages = realsize; |
2008 | #ifdef CONFIG_NUMA | ||
2009 | zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio) | ||
2010 | / 100; | ||
2011 | #endif | ||
2008 | zone->name = zone_names[j]; | 2012 | zone->name = zone_names[j]; |
2009 | spin_lock_init(&zone->lock); | 2013 | spin_lock_init(&zone->lock); |
2010 | spin_lock_init(&zone->lru_lock); | 2014 | spin_lock_init(&zone->lru_lock); |
@@ -2298,6 +2302,24 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, | |||
2298 | return 0; | 2302 | return 0; |
2299 | } | 2303 | } |
2300 | 2304 | ||
2305 | #ifdef CONFIG_NUMA | ||
2306 | int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, | ||
2307 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | ||
2308 | { | ||
2309 | struct zone *zone; | ||
2310 | int rc; | ||
2311 | |||
2312 | rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); | ||
2313 | if (rc) | ||
2314 | return rc; | ||
2315 | |||
2316 | for_each_zone(zone) | ||
2317 | zone->min_unmapped_ratio = (zone->present_pages * | ||
2318 | sysctl_min_unmapped_ratio) / 100; | ||
2319 | return 0; | ||
2320 | } | ||
2321 | #endif | ||
2322 | |||
2301 | /* | 2323 | /* |
2302 | * lowmem_reserve_ratio_sysctl_handler - just a wrapper around | 2324 | * lowmem_reserve_ratio_sysctl_handler - just a wrapper around |
2303 | * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve() | 2325 | * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve() |
diff --git a/mm/vmscan.c b/mm/vmscan.c index ff2ebe9458a3..5d4c4d02254d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1503,10 +1503,6 @@ module_init(kswapd_init) | |||
1503 | * | 1503 | * |
1504 | * If non-zero call zone_reclaim when the number of free pages falls below | 1504 | * If non-zero call zone_reclaim when the number of free pages falls below |
1505 | * the watermarks. | 1505 | * the watermarks. |
1506 | * | ||
1507 | * In the future we may add flags to the mode. However, the page allocator | ||
1508 | * should only have to check that zone_reclaim_mode != 0 before calling | ||
1509 | * zone_reclaim(). | ||
1510 | */ | 1506 | */ |
1511 | int zone_reclaim_mode __read_mostly; | 1507 | int zone_reclaim_mode __read_mostly; |
1512 | 1508 | ||
@@ -1524,6 +1520,12 @@ int zone_reclaim_mode __read_mostly; | |||
1524 | #define ZONE_RECLAIM_PRIORITY 4 | 1520 | #define ZONE_RECLAIM_PRIORITY 4 |
1525 | 1521 | ||
1526 | /* | 1522 | /* |
1523 | * Percentage of pages in a zone that must be unmapped for zone_reclaim to | ||
1524 | * occur. | ||
1525 | */ | ||
1526 | int sysctl_min_unmapped_ratio = 1; | ||
1527 | |||
1528 | /* | ||
1527 | * Try to free up some pages from this zone through reclaim. | 1529 | * Try to free up some pages from this zone through reclaim. |
1528 | */ | 1530 | */ |
1529 | static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | 1531 | static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) |
@@ -1590,18 +1592,17 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1590 | int node_id; | 1592 | int node_id; |
1591 | 1593 | ||
1592 | /* | 1594 | /* |
1593 | * Do not reclaim if there are not enough reclaimable pages in this | 1595 | * Zone reclaim reclaims unmapped file backed pages. |
1594 | * zone that would satify this allocations. | ||
1595 | * | 1596 | * |
1596 | * All unmapped pagecache pages are reclaimable. | 1597 | * A small portion of unmapped file backed pages is needed for |
1597 | * | 1598 | * file I/O otherwise pages read by file I/O will be immediately |
1598 | * Both counters may be temporarily off a bit so we use | 1599 | * thrown out if the zone is overallocated. So we do not reclaim |
1599 | * SWAP_CLUSTER_MAX as the boundary. It may also be good to | 1600 | * if less than a specified percentage of the zone is used by |
1600 | * leave a few frequently used unmapped pagecache pages around. | 1601 | * unmapped file backed pages. |
1601 | */ | 1602 | */ |
1602 | if (zone_page_state(zone, NR_FILE_PAGES) - | 1603 | if (zone_page_state(zone, NR_FILE_PAGES) - |
1603 | zone_page_state(zone, NR_FILE_MAPPED) < SWAP_CLUSTER_MAX) | 1604 | zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio) |
1604 | return 0; | 1605 | return 0; |
1605 | 1606 | ||
1606 | /* | 1607 | /* |
1607 | * Avoid concurrent zone reclaims, do not reclaim in a zone that does | 1608 | * Avoid concurrent zone reclaims, do not reclaim in a zone that does |