aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/sysctl/vm.txt14
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--include/linux/swap.h1
-rw-r--r--include/linux/sysctl.h2
-rw-r--r--kernel/sysctl.c11
-rw-r--r--mm/page_alloc.c22
-rw-r--r--mm/vmscan.c27
7 files changed, 69 insertions, 14 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 86754eb390da..7cee90223d3a 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -28,6 +28,7 @@ Currently, these files are in /proc/sys/vm:
28- block_dump 28- block_dump
29- drop-caches 29- drop-caches
30- zone_reclaim_mode 30- zone_reclaim_mode
31- min_unmapped_ratio
31- panic_on_oom 32- panic_on_oom
32 33
33============================================================== 34==============================================================
@@ -168,6 +169,19 @@ in all nodes of the system.
168 169
169============================================================= 170=============================================================
170 171
172min_unmapped_ratio:
173
174This is available only on NUMA kernels.
175
176A percentage of the file backed pages in each zone. Zone reclaim will only
177occur if more than this percentage of pages are file backed and unmapped.
178This is to insure that a minimal amount of local pages is still available for
179file I/O even if the node is overallocated.
180
181The default is 1 percent.
182
183=============================================================
184
171panic_on_oom 185panic_on_oom
172 186
173This enables or disables panic on out-of-memory feature. If this is set to 1, 187This enables or disables panic on out-of-memory feature. If this is set to 1,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 27e748eb72b0..656b588a9f96 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -150,6 +150,10 @@ struct zone {
150 unsigned long lowmem_reserve[MAX_NR_ZONES]; 150 unsigned long lowmem_reserve[MAX_NR_ZONES];
151 151
152#ifdef CONFIG_NUMA 152#ifdef CONFIG_NUMA
153 /*
154 * zone reclaim becomes active if more unmapped pages exist.
155 */
156 unsigned long min_unmapped_ratio;
153 struct per_cpu_pageset *pageset[NR_CPUS]; 157 struct per_cpu_pageset *pageset[NR_CPUS];
154#else 158#else
155 struct per_cpu_pageset pageset[NR_CPUS]; 159 struct per_cpu_pageset pageset[NR_CPUS];
@@ -414,6 +418,8 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
414 void __user *, size_t *, loff_t *); 418 void __user *, size_t *, loff_t *);
415int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, 419int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *,
416 void __user *, size_t *, loff_t *); 420 void __user *, size_t *, loff_t *);
421int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
422 struct file *, void __user *, size_t *, loff_t *);
417 423
418#include <linux/topology.h> 424#include <linux/topology.h>
419/* Returns the number of the current Node. */ 425/* Returns the number of the current Node. */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index cf6ca6e377bd..5e59184c9096 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -189,6 +189,7 @@ extern long vm_total_pages;
189 189
190#ifdef CONFIG_NUMA 190#ifdef CONFIG_NUMA
191extern int zone_reclaim_mode; 191extern int zone_reclaim_mode;
192extern int sysctl_min_unmapped_ratio;
192extern int zone_reclaim(struct zone *, gfp_t, unsigned int); 193extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
193#else 194#else
194#define zone_reclaim_mode 0 195#define zone_reclaim_mode 0
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 46e4d8f2771f..e4b1a4d4dcf3 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -188,7 +188,7 @@ enum
188 VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ 188 VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */
189 VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ 189 VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
190 VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ 190 VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */
191 VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ 191 VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */
192 VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ 192 VM_PANIC_ON_OOM=33, /* panic at out-of-memory */
193 VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ 193 VM_VDSO_ENABLED=34, /* map VDSO into new processes? */
194}; 194};
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 99a58f279077..362a0cc37138 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -932,6 +932,17 @@ static ctl_table vm_table[] = {
932 .strategy = &sysctl_intvec, 932 .strategy = &sysctl_intvec,
933 .extra1 = &zero, 933 .extra1 = &zero,
934 }, 934 },
935 {
936 .ctl_name = VM_MIN_UNMAPPED,
937 .procname = "min_unmapped_ratio",
938 .data = &sysctl_min_unmapped_ratio,
939 .maxlen = sizeof(sysctl_min_unmapped_ratio),
940 .mode = 0644,
941 .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler,
942 .strategy = &sysctl_intvec,
943 .extra1 = &zero,
944 .extra2 = &one_hundred,
945 },
935#endif 946#endif
936#ifdef CONFIG_X86_32 947#ifdef CONFIG_X86_32
937 { 948 {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3e792a583f3b..54a4f5375bba 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2005,6 +2005,10 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2005 2005
2006 zone->spanned_pages = size; 2006 zone->spanned_pages = size;
2007 zone->present_pages = realsize; 2007 zone->present_pages = realsize;
2008#ifdef CONFIG_NUMA
2009 zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio)
2010 / 100;
2011#endif
2008 zone->name = zone_names[j]; 2012 zone->name = zone_names[j];
2009 spin_lock_init(&zone->lock); 2013 spin_lock_init(&zone->lock);
2010 spin_lock_init(&zone->lru_lock); 2014 spin_lock_init(&zone->lru_lock);
@@ -2298,6 +2302,24 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
2298 return 0; 2302 return 0;
2299} 2303}
2300 2304
2305#ifdef CONFIG_NUMA
2306int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
2307 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
2308{
2309 struct zone *zone;
2310 int rc;
2311
2312 rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
2313 if (rc)
2314 return rc;
2315
2316 for_each_zone(zone)
2317 zone->min_unmapped_ratio = (zone->present_pages *
2318 sysctl_min_unmapped_ratio) / 100;
2319 return 0;
2320}
2321#endif
2322
2301/* 2323/*
2302 * lowmem_reserve_ratio_sysctl_handler - just a wrapper around 2324 * lowmem_reserve_ratio_sysctl_handler - just a wrapper around
2303 * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve() 2325 * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve()
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ff2ebe9458a3..5d4c4d02254d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1503,10 +1503,6 @@ module_init(kswapd_init)
1503 * 1503 *
1504 * If non-zero call zone_reclaim when the number of free pages falls below 1504 * If non-zero call zone_reclaim when the number of free pages falls below
1505 * the watermarks. 1505 * the watermarks.
1506 *
1507 * In the future we may add flags to the mode. However, the page allocator
1508 * should only have to check that zone_reclaim_mode != 0 before calling
1509 * zone_reclaim().
1510 */ 1506 */
1511int zone_reclaim_mode __read_mostly; 1507int zone_reclaim_mode __read_mostly;
1512 1508
@@ -1524,6 +1520,12 @@ int zone_reclaim_mode __read_mostly;
1524#define ZONE_RECLAIM_PRIORITY 4 1520#define ZONE_RECLAIM_PRIORITY 4
1525 1521
1526/* 1522/*
1523 * Percentage of pages in a zone that must be unmapped for zone_reclaim to
1524 * occur.
1525 */
1526int sysctl_min_unmapped_ratio = 1;
1527
1528/*
1527 * Try to free up some pages from this zone through reclaim. 1529 * Try to free up some pages from this zone through reclaim.
1528 */ 1530 */
1529static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) 1531static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
@@ -1590,18 +1592,17 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1590 int node_id; 1592 int node_id;
1591 1593
1592 /* 1594 /*
1593 * Do not reclaim if there are not enough reclaimable pages in this 1595 * Zone reclaim reclaims unmapped file backed pages.
1594 * zone that would satify this allocations.
1595 * 1596 *
1596 * All unmapped pagecache pages are reclaimable. 1597 * A small portion of unmapped file backed pages is needed for
1597 * 1598 * file I/O otherwise pages read by file I/O will be immediately
1598 * Both counters may be temporarily off a bit so we use 1599 * thrown out if the zone is overallocated. So we do not reclaim
1599 * SWAP_CLUSTER_MAX as the boundary. It may also be good to 1600 * if less than a specified percentage of the zone is used by
1600 * leave a few frequently used unmapped pagecache pages around. 1601 * unmapped file backed pages.
1601 */ 1602 */
1602 if (zone_page_state(zone, NR_FILE_PAGES) - 1603 if (zone_page_state(zone, NR_FILE_PAGES) -
1603 zone_page_state(zone, NR_FILE_MAPPED) < SWAP_CLUSTER_MAX) 1604 zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio)
1604 return 0; 1605 return 0;
1605 1606
1606 /* 1607 /*
1607 * Avoid concurrent zone reclaims, do not reclaim in a zone that does 1608 * Avoid concurrent zone reclaims, do not reclaim in a zone that does