aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/sysctl/vm.txt3
-rw-r--r--kernel/sysctl.c3
-rw-r--r--mm/page_alloc.c40
3 files changed, 31 insertions, 15 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index bd4b34c03738..4415aa915681 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -702,7 +702,8 @@ The batch value of each per cpu pagelist is also updated as a result. It is
702set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8) 702set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8)
703 703
704The initial value is zero. Kernel does not use this value at boot time to set 704The initial value is zero. Kernel does not use this value at boot time to set
705the high water marks for each per cpu page list. 705the high water marks for each per cpu page list. If the user writes '0' to this
706sysctl, it will revert to this default behavior.
706 707
707============================================================== 708==============================================================
708 709
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7de6555cfea0..075d1903138f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -136,7 +136,6 @@ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
136/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 136/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
137static int maxolduid = 65535; 137static int maxolduid = 65535;
138static int minolduid; 138static int minolduid;
139static int min_percpu_pagelist_fract = 8;
140 139
141static int ngroups_max = NGROUPS_MAX; 140static int ngroups_max = NGROUPS_MAX;
142static const int cap_last_cap = CAP_LAST_CAP; 141static const int cap_last_cap = CAP_LAST_CAP;
@@ -1317,7 +1316,7 @@ static struct ctl_table vm_table[] = {
1317 .maxlen = sizeof(percpu_pagelist_fraction), 1316 .maxlen = sizeof(percpu_pagelist_fraction),
1318 .mode = 0644, 1317 .mode = 0644,
1319 .proc_handler = percpu_pagelist_fraction_sysctl_handler, 1318 .proc_handler = percpu_pagelist_fraction_sysctl_handler,
1320 .extra1 = &min_percpu_pagelist_fract, 1319 .extra1 = &zero,
1321 }, 1320 },
1322#ifdef CONFIG_MMU 1321#ifdef CONFIG_MMU
1323 { 1322 {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4f59fa29eda8..20d17f8266fe 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -69,6 +69,7 @@
69 69
70/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ 70/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
71static DEFINE_MUTEX(pcp_batch_high_lock); 71static DEFINE_MUTEX(pcp_batch_high_lock);
72#define MIN_PERCPU_PAGELIST_FRACTION (8)
72 73
73#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID 74#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
74DEFINE_PER_CPU(int, numa_node); 75DEFINE_PER_CPU(int, numa_node);
@@ -4145,7 +4146,7 @@ static void __meminit zone_init_free_lists(struct zone *zone)
4145 memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) 4146 memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY)
4146#endif 4147#endif
4147 4148
4148static int __meminit zone_batchsize(struct zone *zone) 4149static int zone_batchsize(struct zone *zone)
4149{ 4150{
4150#ifdef CONFIG_MMU 4151#ifdef CONFIG_MMU
4151 int batch; 4152 int batch;
@@ -4261,8 +4262,8 @@ static void pageset_set_high(struct per_cpu_pageset *p,
4261 pageset_update(&p->pcp, high, batch); 4262 pageset_update(&p->pcp, high, batch);
4262} 4263}
4263 4264
4264static void __meminit pageset_set_high_and_batch(struct zone *zone, 4265static void pageset_set_high_and_batch(struct zone *zone,
4265 struct per_cpu_pageset *pcp) 4266 struct per_cpu_pageset *pcp)
4266{ 4267{
4267 if (percpu_pagelist_fraction) 4268 if (percpu_pagelist_fraction)
4268 pageset_set_high(pcp, 4269 pageset_set_high(pcp,
@@ -5881,23 +5882,38 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *table, int write,
5881 void __user *buffer, size_t *length, loff_t *ppos) 5882 void __user *buffer, size_t *length, loff_t *ppos)
5882{ 5883{
5883 struct zone *zone; 5884 struct zone *zone;
5884 unsigned int cpu; 5885 int old_percpu_pagelist_fraction;
5885 int ret; 5886 int ret;
5886 5887
5888 mutex_lock(&pcp_batch_high_lock);
5889 old_percpu_pagelist_fraction = percpu_pagelist_fraction;
5890
5887 ret = proc_dointvec_minmax(table, write, buffer, length, ppos); 5891 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
5888 if (!write || (ret < 0)) 5892 if (!write || ret < 0)
5889 return ret; 5893 goto out;
5894
5895 /* Sanity checking to avoid pcp imbalance */
5896 if (percpu_pagelist_fraction &&
5897 percpu_pagelist_fraction < MIN_PERCPU_PAGELIST_FRACTION) {
5898 percpu_pagelist_fraction = old_percpu_pagelist_fraction;
5899 ret = -EINVAL;
5900 goto out;
5901 }
5902
5903 /* No change? */
5904 if (percpu_pagelist_fraction == old_percpu_pagelist_fraction)
5905 goto out;
5890 5906
5891 mutex_lock(&pcp_batch_high_lock);
5892 for_each_populated_zone(zone) { 5907 for_each_populated_zone(zone) {
5893 unsigned long high; 5908 unsigned int cpu;
5894 high = zone->managed_pages / percpu_pagelist_fraction; 5909
5895 for_each_possible_cpu(cpu) 5910 for_each_possible_cpu(cpu)
5896 pageset_set_high(per_cpu_ptr(zone->pageset, cpu), 5911 pageset_set_high_and_batch(zone,
5897 high); 5912 per_cpu_ptr(zone->pageset, cpu));
5898 } 5913 }
5914out:
5899 mutex_unlock(&pcp_batch_high_lock); 5915 mutex_unlock(&pcp_batch_high_lock);
5900 return 0; 5916 return ret;
5901} 5917}
5902 5918
5903int hashdist = HASHDIST_DEFAULT; 5919int hashdist = HASHDIST_DEFAULT;