aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorTim Chen <tim.c.chen@linux.intel.com>2013-07-03 18:02:44 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-03 19:07:32 -0400
commit917d9290af749fac9c4d90bacf18699c9d8ba28d (patch)
tree506f3a6f90d318a612161da2adf8aae8f81aaab3 /mm
parent2415cf12e04d415b16d9c2f2a705bcd6cd9a0474 (diff)
mm: tune vm_committed_as percpu_counter batching size
Currently the per cpu counter's batch size for memory accounting is configured as twice the number of cpus in the system. However, for system with very large memory, it is more appropriate to make it proportional to the memory size per cpu in the system. For example, for a x86_64 system with 64 cpus and 128 GB of memory, the batch size is only 2*64 pages (0.5 MB). So any memory accounting changes of more than 0.5MB will overflow the per cpu counter into the global counter. Instead, for the new scheme, the batch size is configured to be 0.4% of the memory/cpu = 8MB (128 GB/64 /256), which is more inline with the memory size. I've done a repeated brk test of 800KB (from will-it-scale test suite) with 80 concurrent processes on a 4 socket Westmere machine with a total of 40 cores. Without the patch, about 80% of cpu is spent on spin-lock contention within the vm_committed_as counter. With the patch, there's a 73x speedup on the benchmark and the lock contention drops off almost entirely. [akpm@linux-foundation.org: fix section mismatch] Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Cc: Tejun Heo <tj@kernel.org> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/mm_init.c47
1 files changed, 47 insertions, 0 deletions
diff --git a/mm/mm_init.c b/mm/mm_init.c
index c280a02ea11e..633c08863fd8 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -9,6 +9,8 @@
9#include <linux/init.h> 9#include <linux/init.h>
10#include <linux/kobject.h> 10#include <linux/kobject.h>
11#include <linux/export.h> 11#include <linux/export.h>
12#include <linux/memory.h>
13#include <linux/notifier.h>
12#include "internal.h" 14#include "internal.h"
13 15
14#ifdef CONFIG_DEBUG_MEMORY_INIT 16#ifdef CONFIG_DEBUG_MEMORY_INIT
@@ -147,6 +149,51 @@ early_param("mminit_loglevel", set_mminit_loglevel);
147struct kobject *mm_kobj; 149struct kobject *mm_kobj;
148EXPORT_SYMBOL_GPL(mm_kobj); 150EXPORT_SYMBOL_GPL(mm_kobj);
149 151
152#ifdef CONFIG_SMP
153s32 vm_committed_as_batch = 32;
154
155static void __meminit mm_compute_batch(void)
156{
157 u64 memsized_batch;
158 s32 nr = num_present_cpus();
159 s32 batch = max_t(s32, nr*2, 32);
160
161 /* batch size set to 0.4% of (total memory/#cpus), or max int32 */
162 memsized_batch = min_t(u64, (totalram_pages/nr)/256, 0x7fffffff);
163
164 vm_committed_as_batch = max_t(s32, memsized_batch, batch);
165}
166
167static int __meminit mm_compute_batch_notifier(struct notifier_block *self,
168 unsigned long action, void *arg)
169{
170 switch (action) {
171 case MEM_ONLINE:
172 case MEM_OFFLINE:
173 mm_compute_batch();
174 default:
175 break;
176 }
177 return NOTIFY_OK;
178}
179
180static struct notifier_block compute_batch_nb __meminitdata = {
181 .notifier_call = mm_compute_batch_notifier,
182 .priority = IPC_CALLBACK_PRI, /* use lowest priority */
183};
184
185static int __init mm_compute_batch_init(void)
186{
187 mm_compute_batch();
188 register_hotmemory_notifier(&compute_batch_nb);
189
190 return 0;
191}
192
193__initcall(mm_compute_batch_init);
194
195#endif
196
150static int __init mm_sysfs_init(void) 197static int __init mm_sysfs_init(void)
151{ 198{
152 mm_kobj = kobject_create_and_add("mm", kernel_kobj); 199 mm_kobj = kobject_create_and_add("mm", kernel_kobj);