diff options
author | KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> | 2009-04-30 18:08:51 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-05-02 18:36:10 -0400 |
commit | 00a62ce91e554198ef28234c91c36f850f5a3bc9 (patch) | |
tree | 367ef134219deef91903c3fa0eb108c13658f2c7 /mm | |
parent | 0763ed2355198cdef2f6a2098e9d52eb1fe4365d (diff) |
mm: fix Committed_AS underflow on large NR_CPUS environment
The Committed_AS field can underflow in certain situations:
> # while true; do cat /proc/meminfo | grep _AS; sleep 1; done | uniq -c
> 1 Committed_AS: 18446744073709323392 kB
> 11 Committed_AS: 18446744073709455488 kB
> 6 Committed_AS: 35136 kB
> 5 Committed_AS: 18446744073709454400 kB
> 7 Committed_AS: 35904 kB
> 3 Committed_AS: 18446744073709453248 kB
> 2 Committed_AS: 34752 kB
> 9 Committed_AS: 18446744073709453248 kB
> 8 Committed_AS: 34752 kB
> 3 Committed_AS: 18446744073709320960 kB
> 7 Committed_AS: 18446744073709454080 kB
> 3 Committed_AS: 18446744073709320960 kB
> 5 Committed_AS: 18446744073709454080 kB
> 6 Committed_AS: 18446744073709320960 kB
Because NR_CPUS can be greater than 1000 and meminfo_proc_show() does
not check for underflow.
But NR_CPUS proportional isn't good calculation. In general,
possibility of lock contention is proportional to the number of online
cpus, not theorical maximum cpus (NR_CPUS).
The current kernel has generic percpu-counter stuff. using it is right
way. it makes code simplify and percpu_counter_read_positive() don't
make underflow issue.
Reported-by: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Eric B Munson <ebmunson@us.ibm.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: <stable@kernel.org> [All kernel versions]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/mmap.c | 12 | ||||
-rw-r--r-- | mm/nommu.c | 13 | ||||
-rw-r--r-- | mm/swap.c | 46 |
3 files changed, 13 insertions, 58 deletions
@@ -85,7 +85,7 @@ EXPORT_SYMBOL(vm_get_page_prot); | |||
85 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ | 85 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ |
86 | int sysctl_overcommit_ratio = 50; /* default is 50% */ | 86 | int sysctl_overcommit_ratio = 50; /* default is 50% */ |
87 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; | 87 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; |
88 | atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); | 88 | struct percpu_counter vm_committed_as; |
89 | 89 | ||
90 | /* | 90 | /* |
91 | * Check that a process has enough memory to allocate a new virtual | 91 | * Check that a process has enough memory to allocate a new virtual |
@@ -179,11 +179,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
179 | if (mm) | 179 | if (mm) |
180 | allowed -= mm->total_vm / 32; | 180 | allowed -= mm->total_vm / 32; |
181 | 181 | ||
182 | /* | 182 | if (percpu_counter_read_positive(&vm_committed_as) < allowed) |
183 | * cast `allowed' as a signed long because vm_committed_space | ||
184 | * sometimes has a negative value | ||
185 | */ | ||
186 | if (atomic_long_read(&vm_committed_space) < (long)allowed) | ||
187 | return 0; | 183 | return 0; |
188 | error: | 184 | error: |
189 | vm_unacct_memory(pages); | 185 | vm_unacct_memory(pages); |
@@ -2481,4 +2477,8 @@ void mm_drop_all_locks(struct mm_struct *mm) | |||
2481 | */ | 2477 | */ |
2482 | void __init mmap_init(void) | 2478 | void __init mmap_init(void) |
2483 | { | 2479 | { |
2480 | int ret; | ||
2481 | |||
2482 | ret = percpu_counter_init(&vm_committed_as, 0); | ||
2483 | VM_BUG_ON(ret); | ||
2484 | } | 2484 | } |
diff --git a/mm/nommu.c b/mm/nommu.c index 72eda4aee2cb..809998aa7b50 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -62,7 +62,7 @@ void *high_memory; | |||
62 | struct page *mem_map; | 62 | struct page *mem_map; |
63 | unsigned long max_mapnr; | 63 | unsigned long max_mapnr; |
64 | unsigned long num_physpages; | 64 | unsigned long num_physpages; |
65 | atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); | 65 | struct percpu_counter vm_committed_as; |
66 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ | 66 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ |
67 | int sysctl_overcommit_ratio = 50; /* default is 50% */ | 67 | int sysctl_overcommit_ratio = 50; /* default is 50% */ |
68 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; | 68 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; |
@@ -463,6 +463,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) | |||
463 | */ | 463 | */ |
464 | void __init mmap_init(void) | 464 | void __init mmap_init(void) |
465 | { | 465 | { |
466 | int ret; | ||
467 | |||
468 | ret = percpu_counter_init(&vm_committed_as, 0); | ||
469 | VM_BUG_ON(ret); | ||
466 | vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); | 470 | vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); |
467 | } | 471 | } |
468 | 472 | ||
@@ -1847,12 +1851,9 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
1847 | if (mm) | 1851 | if (mm) |
1848 | allowed -= mm->total_vm / 32; | 1852 | allowed -= mm->total_vm / 32; |
1849 | 1853 | ||
1850 | /* | 1854 | if (percpu_counter_read_positive(&vm_committed_as) < allowed) |
1851 | * cast `allowed' as a signed long because vm_committed_space | ||
1852 | * sometimes has a negative value | ||
1853 | */ | ||
1854 | if (atomic_long_read(&vm_committed_space) < (long)allowed) | ||
1855 | return 0; | 1855 | return 0; |
1856 | |||
1856 | error: | 1857 | error: |
1857 | vm_unacct_memory(pages); | 1858 | vm_unacct_memory(pages); |
1858 | 1859 | ||
@@ -491,49 +491,6 @@ unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, | |||
491 | 491 | ||
492 | EXPORT_SYMBOL(pagevec_lookup_tag); | 492 | EXPORT_SYMBOL(pagevec_lookup_tag); |
493 | 493 | ||
494 | #ifdef CONFIG_SMP | ||
495 | /* | ||
496 | * We tolerate a little inaccuracy to avoid ping-ponging the counter between | ||
497 | * CPUs | ||
498 | */ | ||
499 | #define ACCT_THRESHOLD max(16, NR_CPUS * 2) | ||
500 | |||
501 | static DEFINE_PER_CPU(long, committed_space); | ||
502 | |||
503 | void vm_acct_memory(long pages) | ||
504 | { | ||
505 | long *local; | ||
506 | |||
507 | preempt_disable(); | ||
508 | local = &__get_cpu_var(committed_space); | ||
509 | *local += pages; | ||
510 | if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { | ||
511 | atomic_long_add(*local, &vm_committed_space); | ||
512 | *local = 0; | ||
513 | } | ||
514 | preempt_enable(); | ||
515 | } | ||
516 | |||
517 | #ifdef CONFIG_HOTPLUG_CPU | ||
518 | |||
519 | /* Drop the CPU's cached committed space back into the central pool. */ | ||
520 | static int cpu_swap_callback(struct notifier_block *nfb, | ||
521 | unsigned long action, | ||
522 | void *hcpu) | ||
523 | { | ||
524 | long *committed; | ||
525 | |||
526 | committed = &per_cpu(committed_space, (long)hcpu); | ||
527 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | ||
528 | atomic_long_add(*committed, &vm_committed_space); | ||
529 | *committed = 0; | ||
530 | drain_cpu_pagevecs((long)hcpu); | ||
531 | } | ||
532 | return NOTIFY_OK; | ||
533 | } | ||
534 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
535 | #endif /* CONFIG_SMP */ | ||
536 | |||
537 | /* | 494 | /* |
538 | * Perform any setup for the swap system | 495 | * Perform any setup for the swap system |
539 | */ | 496 | */ |
@@ -554,7 +511,4 @@ void __init swap_setup(void) | |||
554 | * Right now other parts of the system means that we | 511 | * Right now other parts of the system means that we |
555 | * _really_ don't want to cluster much more | 512 | * _really_ don't want to cluster much more |
556 | */ | 513 | */ |
557 | #ifdef CONFIG_HOTPLUG_CPU | ||
558 | hotcpu_notifier(cpu_swap_callback, 0); | ||
559 | #endif | ||
560 | } | 514 | } |