diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-05-18 01:37:44 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-05-18 01:37:49 -0400 |
commit | dc3f81b129b5439ba7bac265bbc6a51a39275dae (patch) | |
tree | 216030731d911249496d2e97206cd61431e31c89 /mm | |
parent | d2517a49d55536b38c7a87e5289550cfedaa4dcc (diff) | |
parent | 1406de8e11eb043681297adf86d6892ff8efc27a (diff) |
Merge commit 'v2.6.30-rc6' into perfcounters/core
Merge reason: this branch was on an -rc4 base, merge it up to -rc6
to get the latest upstream fixes.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 28 | ||||
-rw-r--r-- | mm/memcontrol.c | 38 | ||||
-rw-r--r-- | mm/memory.c | 112 | ||||
-rw-r--r-- | mm/mmap.c | 12 | ||||
-rw-r--r-- | mm/nommu.c | 17 | ||||
-rw-r--r-- | mm/oom_kill.c | 44 | ||||
-rw-r--r-- | mm/page_alloc.c | 20 | ||||
-rw-r--r-- | mm/pdflush.c | 31 | ||||
-rw-r--r-- | mm/shmem.c | 8 | ||||
-rw-r--r-- | mm/swap.c | 46 | ||||
-rw-r--r-- | mm/vmalloc.c | 1 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
12 files changed, 194 insertions, 165 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 57971d2ab848..c2b57d81e153 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -225,3 +225,31 @@ config HAVE_MLOCKED_PAGE_BIT | |||
225 | 225 | ||
226 | config MMU_NOTIFIER | 226 | config MMU_NOTIFIER |
227 | bool | 227 | bool |
228 | |||
229 | config NOMMU_INITIAL_TRIM_EXCESS | ||
230 | int "Turn on mmap() excess space trimming before booting" | ||
231 | depends on !MMU | ||
232 | default 1 | ||
233 | help | ||
234 | The NOMMU mmap() frequently needs to allocate large contiguous chunks | ||
235 | of memory on which to store mappings, but it can only ask the system | ||
236 | allocator for chunks in 2^N*PAGE_SIZE amounts - which is frequently | ||
237 | more than it requires. To deal with this, mmap() is able to trim off | ||
238 | the excess and return it to the allocator. | ||
239 | |||
240 | If trimming is enabled, the excess is trimmed off and returned to the | ||
241 | system allocator, which can cause extra fragmentation, particularly | ||
242 | if there are a lot of transient processes. | ||
243 | |||
244 | If trimming is disabled, the excess is kept, but not used, which for | ||
245 | long-term mappings means that the space is wasted. | ||
246 | |||
247 | Trimming can be dynamically controlled through a sysctl option | ||
248 | (/proc/sys/vm/nr_trim_pages) which specifies the minimum number of | ||
249 | excess pages there must be before trimming should occur, or zero if | ||
250 | no trimming is to occur. | ||
251 | |||
252 | This option specifies the initial value of this option. The default | ||
253 | of 1 says that all excess pages should be trimmed. | ||
254 | |||
255 | See Documentation/nommu-mmap.txt for more information. | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e44fb0fbb80e..01c2d8f14685 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1024,9 +1024,7 @@ static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page) | |||
1024 | return NULL; | 1024 | return NULL; |
1025 | 1025 | ||
1026 | pc = lookup_page_cgroup(page); | 1026 | pc = lookup_page_cgroup(page); |
1027 | /* | 1027 | lock_page_cgroup(pc); |
1028 | * Used bit of swapcache is solid under page lock. | ||
1029 | */ | ||
1030 | if (PageCgroupUsed(pc)) { | 1028 | if (PageCgroupUsed(pc)) { |
1031 | mem = pc->mem_cgroup; | 1029 | mem = pc->mem_cgroup; |
1032 | if (mem && !css_tryget(&mem->css)) | 1030 | if (mem && !css_tryget(&mem->css)) |
@@ -1040,6 +1038,7 @@ static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page) | |||
1040 | mem = NULL; | 1038 | mem = NULL; |
1041 | rcu_read_unlock(); | 1039 | rcu_read_unlock(); |
1042 | } | 1040 | } |
1041 | unlock_page_cgroup(pc); | ||
1043 | return mem; | 1042 | return mem; |
1044 | } | 1043 | } |
1045 | 1044 | ||
@@ -1618,37 +1617,28 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, | |||
1618 | } | 1617 | } |
1619 | 1618 | ||
1620 | /* | 1619 | /* |
1621 | * A call to try to shrink memory usage under specified resource controller. | 1620 | * A call to try to shrink memory usage on charge failure at shmem's swapin. |
1622 | * This is typically used for page reclaiming for shmem for reducing side | 1621 | * Calling hierarchical_reclaim is not enough because we should update |
1623 | * effect of page allocation from shmem, which is used by some mem_cgroup. | 1622 | * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM. |
1623 | * Moreover considering hierarchy, we should reclaim from the mem_over_limit, | ||
1624 | * not from the memcg which this page would be charged to. | ||
1625 | * try_charge_swapin does all of these works properly. | ||
1624 | */ | 1626 | */ |
1625 | int mem_cgroup_shrink_usage(struct page *page, | 1627 | int mem_cgroup_shmem_charge_fallback(struct page *page, |
1626 | struct mm_struct *mm, | 1628 | struct mm_struct *mm, |
1627 | gfp_t gfp_mask) | 1629 | gfp_t gfp_mask) |
1628 | { | 1630 | { |
1629 | struct mem_cgroup *mem = NULL; | 1631 | struct mem_cgroup *mem = NULL; |
1630 | int progress = 0; | 1632 | int ret; |
1631 | int retry = MEM_CGROUP_RECLAIM_RETRIES; | ||
1632 | 1633 | ||
1633 | if (mem_cgroup_disabled()) | 1634 | if (mem_cgroup_disabled()) |
1634 | return 0; | 1635 | return 0; |
1635 | if (page) | ||
1636 | mem = try_get_mem_cgroup_from_swapcache(page); | ||
1637 | if (!mem && mm) | ||
1638 | mem = try_get_mem_cgroup_from_mm(mm); | ||
1639 | if (unlikely(!mem)) | ||
1640 | return 0; | ||
1641 | 1636 | ||
1642 | do { | 1637 | ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); |
1643 | progress = mem_cgroup_hierarchical_reclaim(mem, | 1638 | if (!ret) |
1644 | gfp_mask, true, false); | 1639 | mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */ |
1645 | progress += mem_cgroup_check_under_limit(mem); | ||
1646 | } while (!progress && --retry); | ||
1647 | 1640 | ||
1648 | css_put(&mem->css); | 1641 | return ret; |
1649 | if (!retry) | ||
1650 | return -ENOMEM; | ||
1651 | return 0; | ||
1652 | } | 1642 | } |
1653 | 1643 | ||
1654 | static DEFINE_MUTEX(set_limit_mutex); | 1644 | static DEFINE_MUTEX(set_limit_mutex); |
diff --git a/mm/memory.c b/mm/memory.c index cf6873e91c6a..4126dd16778c 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1971,6 +1971,15 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1971 | ret = tmp; | 1971 | ret = tmp; |
1972 | goto unwritable_page; | 1972 | goto unwritable_page; |
1973 | } | 1973 | } |
1974 | if (unlikely(!(tmp & VM_FAULT_LOCKED))) { | ||
1975 | lock_page(old_page); | ||
1976 | if (!old_page->mapping) { | ||
1977 | ret = 0; /* retry the fault */ | ||
1978 | unlock_page(old_page); | ||
1979 | goto unwritable_page; | ||
1980 | } | ||
1981 | } else | ||
1982 | VM_BUG_ON(!PageLocked(old_page)); | ||
1974 | 1983 | ||
1975 | /* | 1984 | /* |
1976 | * Since we dropped the lock we need to revalidate | 1985 | * Since we dropped the lock we need to revalidate |
@@ -1980,9 +1989,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1980 | */ | 1989 | */ |
1981 | page_table = pte_offset_map_lock(mm, pmd, address, | 1990 | page_table = pte_offset_map_lock(mm, pmd, address, |
1982 | &ptl); | 1991 | &ptl); |
1983 | page_cache_release(old_page); | 1992 | if (!pte_same(*page_table, orig_pte)) { |
1984 | if (!pte_same(*page_table, orig_pte)) | 1993 | unlock_page(old_page); |
1994 | page_cache_release(old_page); | ||
1985 | goto unlock; | 1995 | goto unlock; |
1996 | } | ||
1986 | 1997 | ||
1987 | page_mkwrite = 1; | 1998 | page_mkwrite = 1; |
1988 | } | 1999 | } |
@@ -2094,9 +2105,6 @@ gotten: | |||
2094 | unlock: | 2105 | unlock: |
2095 | pte_unmap_unlock(page_table, ptl); | 2106 | pte_unmap_unlock(page_table, ptl); |
2096 | if (dirty_page) { | 2107 | if (dirty_page) { |
2097 | if (vma->vm_file) | ||
2098 | file_update_time(vma->vm_file); | ||
2099 | |||
2100 | /* | 2108 | /* |
2101 | * Yes, Virginia, this is actually required to prevent a race | 2109 | * Yes, Virginia, this is actually required to prevent a race |
2102 | * with clear_page_dirty_for_io() from clearing the page dirty | 2110 | * with clear_page_dirty_for_io() from clearing the page dirty |
@@ -2105,16 +2113,41 @@ unlock: | |||
2105 | * | 2113 | * |
2106 | * do_no_page is protected similarly. | 2114 | * do_no_page is protected similarly. |
2107 | */ | 2115 | */ |
2108 | wait_on_page_locked(dirty_page); | 2116 | if (!page_mkwrite) { |
2109 | set_page_dirty_balance(dirty_page, page_mkwrite); | 2117 | wait_on_page_locked(dirty_page); |
2118 | set_page_dirty_balance(dirty_page, page_mkwrite); | ||
2119 | } | ||
2110 | put_page(dirty_page); | 2120 | put_page(dirty_page); |
2121 | if (page_mkwrite) { | ||
2122 | struct address_space *mapping = dirty_page->mapping; | ||
2123 | |||
2124 | set_page_dirty(dirty_page); | ||
2125 | unlock_page(dirty_page); | ||
2126 | page_cache_release(dirty_page); | ||
2127 | if (mapping) { | ||
2128 | /* | ||
2129 | * Some device drivers do not set page.mapping | ||
2130 | * but still dirty their pages | ||
2131 | */ | ||
2132 | balance_dirty_pages_ratelimited(mapping); | ||
2133 | } | ||
2134 | } | ||
2135 | |||
2136 | /* file_update_time outside page_lock */ | ||
2137 | if (vma->vm_file) | ||
2138 | file_update_time(vma->vm_file); | ||
2111 | } | 2139 | } |
2112 | return ret; | 2140 | return ret; |
2113 | oom_free_new: | 2141 | oom_free_new: |
2114 | page_cache_release(new_page); | 2142 | page_cache_release(new_page); |
2115 | oom: | 2143 | oom: |
2116 | if (old_page) | 2144 | if (old_page) { |
2145 | if (page_mkwrite) { | ||
2146 | unlock_page(old_page); | ||
2147 | page_cache_release(old_page); | ||
2148 | } | ||
2117 | page_cache_release(old_page); | 2149 | page_cache_release(old_page); |
2150 | } | ||
2118 | return VM_FAULT_OOM; | 2151 | return VM_FAULT_OOM; |
2119 | 2152 | ||
2120 | unwritable_page: | 2153 | unwritable_page: |
@@ -2458,8 +2491,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2458 | 2491 | ||
2459 | if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { | 2492 | if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { |
2460 | ret = VM_FAULT_OOM; | 2493 | ret = VM_FAULT_OOM; |
2461 | unlock_page(page); | 2494 | goto out_page; |
2462 | goto out; | ||
2463 | } | 2495 | } |
2464 | 2496 | ||
2465 | /* | 2497 | /* |
@@ -2521,6 +2553,7 @@ out: | |||
2521 | out_nomap: | 2553 | out_nomap: |
2522 | mem_cgroup_cancel_charge_swapin(ptr); | 2554 | mem_cgroup_cancel_charge_swapin(ptr); |
2523 | pte_unmap_unlock(page_table, ptl); | 2555 | pte_unmap_unlock(page_table, ptl); |
2556 | out_page: | ||
2524 | unlock_page(page); | 2557 | unlock_page(page); |
2525 | page_cache_release(page); | 2558 | page_cache_release(page); |
2526 | return ret; | 2559 | return ret; |
@@ -2664,27 +2697,22 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2664 | int tmp; | 2697 | int tmp; |
2665 | 2698 | ||
2666 | unlock_page(page); | 2699 | unlock_page(page); |
2667 | vmf.flags |= FAULT_FLAG_MKWRITE; | 2700 | vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; |
2668 | tmp = vma->vm_ops->page_mkwrite(vma, &vmf); | 2701 | tmp = vma->vm_ops->page_mkwrite(vma, &vmf); |
2669 | if (unlikely(tmp & | 2702 | if (unlikely(tmp & |
2670 | (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { | 2703 | (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { |
2671 | ret = tmp; | 2704 | ret = tmp; |
2672 | anon = 1; /* no anon but release vmf.page */ | 2705 | goto unwritable_page; |
2673 | goto out_unlocked; | ||
2674 | } | ||
2675 | lock_page(page); | ||
2676 | /* | ||
2677 | * XXX: this is not quite right (racy vs | ||
2678 | * invalidate) to unlock and relock the page | ||
2679 | * like this, however a better fix requires | ||
2680 | * reworking page_mkwrite locking API, which | ||
2681 | * is better done later. | ||
2682 | */ | ||
2683 | if (!page->mapping) { | ||
2684 | ret = 0; | ||
2685 | anon = 1; /* no anon but release vmf.page */ | ||
2686 | goto out; | ||
2687 | } | 2706 | } |
2707 | if (unlikely(!(tmp & VM_FAULT_LOCKED))) { | ||
2708 | lock_page(page); | ||
2709 | if (!page->mapping) { | ||
2710 | ret = 0; /* retry the fault */ | ||
2711 | unlock_page(page); | ||
2712 | goto unwritable_page; | ||
2713 | } | ||
2714 | } else | ||
2715 | VM_BUG_ON(!PageLocked(page)); | ||
2688 | page_mkwrite = 1; | 2716 | page_mkwrite = 1; |
2689 | } | 2717 | } |
2690 | } | 2718 | } |
@@ -2736,19 +2764,35 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2736 | pte_unmap_unlock(page_table, ptl); | 2764 | pte_unmap_unlock(page_table, ptl); |
2737 | 2765 | ||
2738 | out: | 2766 | out: |
2739 | unlock_page(vmf.page); | 2767 | if (dirty_page) { |
2740 | out_unlocked: | 2768 | struct address_space *mapping = page->mapping; |
2741 | if (anon) | ||
2742 | page_cache_release(vmf.page); | ||
2743 | else if (dirty_page) { | ||
2744 | if (vma->vm_file) | ||
2745 | file_update_time(vma->vm_file); | ||
2746 | 2769 | ||
2747 | set_page_dirty_balance(dirty_page, page_mkwrite); | 2770 | if (set_page_dirty(dirty_page)) |
2771 | page_mkwrite = 1; | ||
2772 | unlock_page(dirty_page); | ||
2748 | put_page(dirty_page); | 2773 | put_page(dirty_page); |
2774 | if (page_mkwrite && mapping) { | ||
2775 | /* | ||
2776 | * Some device drivers do not set page.mapping but still | ||
2777 | * dirty their pages | ||
2778 | */ | ||
2779 | balance_dirty_pages_ratelimited(mapping); | ||
2780 | } | ||
2781 | |||
2782 | /* file_update_time outside page_lock */ | ||
2783 | if (vma->vm_file) | ||
2784 | file_update_time(vma->vm_file); | ||
2785 | } else { | ||
2786 | unlock_page(vmf.page); | ||
2787 | if (anon) | ||
2788 | page_cache_release(vmf.page); | ||
2749 | } | 2789 | } |
2750 | 2790 | ||
2751 | return ret; | 2791 | return ret; |
2792 | |||
2793 | unwritable_page: | ||
2794 | page_cache_release(page); | ||
2795 | return ret; | ||
2752 | } | 2796 | } |
2753 | 2797 | ||
2754 | static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 2798 | static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
@@ -86,7 +86,7 @@ EXPORT_SYMBOL(vm_get_page_prot); | |||
86 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ | 86 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ |
87 | int sysctl_overcommit_ratio = 50; /* default is 50% */ | 87 | int sysctl_overcommit_ratio = 50; /* default is 50% */ |
88 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; | 88 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; |
89 | atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); | 89 | struct percpu_counter vm_committed_as; |
90 | 90 | ||
91 | /* | 91 | /* |
92 | * Check that a process has enough memory to allocate a new virtual | 92 | * Check that a process has enough memory to allocate a new virtual |
@@ -180,11 +180,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
180 | if (mm) | 180 | if (mm) |
181 | allowed -= mm->total_vm / 32; | 181 | allowed -= mm->total_vm / 32; |
182 | 182 | ||
183 | /* | 183 | if (percpu_counter_read_positive(&vm_committed_as) < allowed) |
184 | * cast `allowed' as a signed long because vm_committed_space | ||
185 | * sometimes has a negative value | ||
186 | */ | ||
187 | if (atomic_long_read(&vm_committed_space) < (long)allowed) | ||
188 | return 0; | 184 | return 0; |
189 | error: | 185 | error: |
190 | vm_unacct_memory(pages); | 186 | vm_unacct_memory(pages); |
@@ -2491,4 +2487,8 @@ void mm_drop_all_locks(struct mm_struct *mm) | |||
2491 | */ | 2487 | */ |
2492 | void __init mmap_init(void) | 2488 | void __init mmap_init(void) |
2493 | { | 2489 | { |
2490 | int ret; | ||
2491 | |||
2492 | ret = percpu_counter_init(&vm_committed_as, 0); | ||
2493 | VM_BUG_ON(ret); | ||
2494 | } | 2494 | } |
diff --git a/mm/nommu.c b/mm/nommu.c index 72eda4aee2cb..b571ef707428 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -62,11 +62,11 @@ void *high_memory; | |||
62 | struct page *mem_map; | 62 | struct page *mem_map; |
63 | unsigned long max_mapnr; | 63 | unsigned long max_mapnr; |
64 | unsigned long num_physpages; | 64 | unsigned long num_physpages; |
65 | atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); | 65 | struct percpu_counter vm_committed_as; |
66 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ | 66 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ |
67 | int sysctl_overcommit_ratio = 50; /* default is 50% */ | 67 | int sysctl_overcommit_ratio = 50; /* default is 50% */ |
68 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; | 68 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; |
69 | int sysctl_nr_trim_pages = 1; /* page trimming behaviour */ | 69 | int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; |
70 | int heap_stack_gap = 0; | 70 | int heap_stack_gap = 0; |
71 | 71 | ||
72 | atomic_long_t mmap_pages_allocated; | 72 | atomic_long_t mmap_pages_allocated; |
@@ -463,6 +463,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) | |||
463 | */ | 463 | */ |
464 | void __init mmap_init(void) | 464 | void __init mmap_init(void) |
465 | { | 465 | { |
466 | int ret; | ||
467 | |||
468 | ret = percpu_counter_init(&vm_committed_as, 0); | ||
469 | VM_BUG_ON(ret); | ||
466 | vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); | 470 | vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); |
467 | } | 471 | } |
468 | 472 | ||
@@ -511,8 +515,6 @@ static void add_nommu_region(struct vm_region *region) | |||
511 | 515 | ||
512 | validate_nommu_regions(); | 516 | validate_nommu_regions(); |
513 | 517 | ||
514 | BUG_ON(region->vm_start & ~PAGE_MASK); | ||
515 | |||
516 | parent = NULL; | 518 | parent = NULL; |
517 | p = &nommu_region_tree.rb_node; | 519 | p = &nommu_region_tree.rb_node; |
518 | while (*p) { | 520 | while (*p) { |
@@ -1847,12 +1849,9 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
1847 | if (mm) | 1849 | if (mm) |
1848 | allowed -= mm->total_vm / 32; | 1850 | allowed -= mm->total_vm / 32; |
1849 | 1851 | ||
1850 | /* | 1852 | if (percpu_counter_read_positive(&vm_committed_as) < allowed) |
1851 | * cast `allowed' as a signed long because vm_committed_space | ||
1852 | * sometimes has a negative value | ||
1853 | */ | ||
1854 | if (atomic_long_read(&vm_committed_space) < (long)allowed) | ||
1855 | return 0; | 1853 | return 0; |
1854 | |||
1856 | error: | 1855 | error: |
1857 | vm_unacct_memory(pages); | 1856 | vm_unacct_memory(pages); |
1858 | 1857 | ||
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 2f3166e308d9..92bcf1db16b2 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -514,34 +514,32 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) | |||
514 | */ | 514 | */ |
515 | static void __out_of_memory(gfp_t gfp_mask, int order) | 515 | static void __out_of_memory(gfp_t gfp_mask, int order) |
516 | { | 516 | { |
517 | if (sysctl_oom_kill_allocating_task) { | 517 | struct task_struct *p; |
518 | oom_kill_process(current, gfp_mask, order, 0, NULL, | 518 | unsigned long points; |
519 | "Out of memory (oom_kill_allocating_task)"); | ||
520 | |||
521 | } else { | ||
522 | unsigned long points; | ||
523 | struct task_struct *p; | ||
524 | |||
525 | retry: | ||
526 | /* | ||
527 | * Rambo mode: Shoot down a process and hope it solves whatever | ||
528 | * issues we may have. | ||
529 | */ | ||
530 | p = select_bad_process(&points, NULL); | ||
531 | 519 | ||
532 | if (PTR_ERR(p) == -1UL) | 520 | if (sysctl_oom_kill_allocating_task) |
521 | if (!oom_kill_process(current, gfp_mask, order, 0, NULL, | ||
522 | "Out of memory (oom_kill_allocating_task)")) | ||
533 | return; | 523 | return; |
524 | retry: | ||
525 | /* | ||
526 | * Rambo mode: Shoot down a process and hope it solves whatever | ||
527 | * issues we may have. | ||
528 | */ | ||
529 | p = select_bad_process(&points, NULL); | ||
534 | 530 | ||
535 | /* Found nothing?!?! Either we hang forever, or we panic. */ | 531 | if (PTR_ERR(p) == -1UL) |
536 | if (!p) { | 532 | return; |
537 | read_unlock(&tasklist_lock); | ||
538 | panic("Out of memory and no killable processes...\n"); | ||
539 | } | ||
540 | 533 | ||
541 | if (oom_kill_process(p, gfp_mask, order, points, NULL, | 534 | /* Found nothing?!?! Either we hang forever, or we panic. */ |
542 | "Out of memory")) | 535 | if (!p) { |
543 | goto retry; | 536 | read_unlock(&tasklist_lock); |
537 | panic("Out of memory and no killable processes...\n"); | ||
544 | } | 538 | } |
539 | |||
540 | if (oom_kill_process(p, gfp_mask, order, points, NULL, | ||
541 | "Out of memory")) | ||
542 | goto retry; | ||
545 | } | 543 | } |
546 | 544 | ||
547 | /* | 545 | /* |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e2f26991fff1..fe753ecf2aa5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2681,6 +2681,7 @@ static void __meminit zone_init_free_lists(struct zone *zone) | |||
2681 | 2681 | ||
2682 | static int zone_batchsize(struct zone *zone) | 2682 | static int zone_batchsize(struct zone *zone) |
2683 | { | 2683 | { |
2684 | #ifdef CONFIG_MMU | ||
2684 | int batch; | 2685 | int batch; |
2685 | 2686 | ||
2686 | /* | 2687 | /* |
@@ -2706,9 +2707,26 @@ static int zone_batchsize(struct zone *zone) | |||
2706 | * of pages of one half of the possible page colors | 2707 | * of pages of one half of the possible page colors |
2707 | * and the other with pages of the other colors. | 2708 | * and the other with pages of the other colors. |
2708 | */ | 2709 | */ |
2709 | batch = (1 << (fls(batch + batch/2)-1)) - 1; | 2710 | batch = rounddown_pow_of_two(batch + batch/2) - 1; |
2710 | 2711 | ||
2711 | return batch; | 2712 | return batch; |
2713 | |||
2714 | #else | ||
2715 | /* The deferral and batching of frees should be suppressed under NOMMU | ||
2716 | * conditions. | ||
2717 | * | ||
2718 | * The problem is that NOMMU needs to be able to allocate large chunks | ||
2719 | * of contiguous memory as there's no hardware page translation to | ||
2720 | * assemble apparent contiguous memory from discontiguous pages. | ||
2721 | * | ||
2722 | * Queueing large contiguous runs of pages for batching, however, | ||
2723 | * causes the pages to actually be freed in smaller chunks. As there | ||
2724 | * can be a significant delay between the individual batches being | ||
2725 | * recycled, this leads to the once large chunks of space being | ||
2726 | * fragmented and becoming unavailable for high-order allocations. | ||
2727 | */ | ||
2728 | return 0; | ||
2729 | #endif | ||
2712 | } | 2730 | } |
2713 | 2731 | ||
2714 | static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) | 2732 | static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) |
diff --git a/mm/pdflush.c b/mm/pdflush.c index f2caf96993f8..235ac440c44e 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c | |||
@@ -58,14 +58,6 @@ static DEFINE_SPINLOCK(pdflush_lock); | |||
58 | int nr_pdflush_threads = 0; | 58 | int nr_pdflush_threads = 0; |
59 | 59 | ||
60 | /* | 60 | /* |
61 | * The max/min number of pdflush threads. R/W by sysctl at | ||
62 | * /proc/sys/vm/nr_pdflush_threads_max/min | ||
63 | */ | ||
64 | int nr_pdflush_threads_max __read_mostly = MAX_PDFLUSH_THREADS; | ||
65 | int nr_pdflush_threads_min __read_mostly = MIN_PDFLUSH_THREADS; | ||
66 | |||
67 | |||
68 | /* | ||
69 | * The time at which the pdflush thread pool last went empty | 61 | * The time at which the pdflush thread pool last went empty |
70 | */ | 62 | */ |
71 | static unsigned long last_empty_jifs; | 63 | static unsigned long last_empty_jifs; |
@@ -76,7 +68,7 @@ static unsigned long last_empty_jifs; | |||
76 | * Thread pool management algorithm: | 68 | * Thread pool management algorithm: |
77 | * | 69 | * |
78 | * - The minimum and maximum number of pdflush instances are bound | 70 | * - The minimum and maximum number of pdflush instances are bound |
79 | * by nr_pdflush_threads_min and nr_pdflush_threads_max. | 71 | * by MIN_PDFLUSH_THREADS and MAX_PDFLUSH_THREADS. |
80 | * | 72 | * |
81 | * - If there have been no idle pdflush instances for 1 second, create | 73 | * - If there have been no idle pdflush instances for 1 second, create |
82 | * a new one. | 74 | * a new one. |
@@ -142,13 +134,14 @@ static int __pdflush(struct pdflush_work *my_work) | |||
142 | * To throttle creation, we reset last_empty_jifs. | 134 | * To throttle creation, we reset last_empty_jifs. |
143 | */ | 135 | */ |
144 | if (time_after(jiffies, last_empty_jifs + 1 * HZ)) { | 136 | if (time_after(jiffies, last_empty_jifs + 1 * HZ)) { |
145 | if (list_empty(&pdflush_list) && | 137 | if (list_empty(&pdflush_list)) { |
146 | nr_pdflush_threads < nr_pdflush_threads_max) { | 138 | if (nr_pdflush_threads < MAX_PDFLUSH_THREADS) { |
147 | last_empty_jifs = jiffies; | 139 | last_empty_jifs = jiffies; |
148 | nr_pdflush_threads++; | 140 | nr_pdflush_threads++; |
149 | spin_unlock_irq(&pdflush_lock); | 141 | spin_unlock_irq(&pdflush_lock); |
150 | start_one_pdflush_thread(); | 142 | start_one_pdflush_thread(); |
151 | spin_lock_irq(&pdflush_lock); | 143 | spin_lock_irq(&pdflush_lock); |
144 | } | ||
152 | } | 145 | } |
153 | } | 146 | } |
154 | 147 | ||
@@ -160,7 +153,7 @@ static int __pdflush(struct pdflush_work *my_work) | |||
160 | */ | 153 | */ |
161 | if (list_empty(&pdflush_list)) | 154 | if (list_empty(&pdflush_list)) |
162 | continue; | 155 | continue; |
163 | if (nr_pdflush_threads <= nr_pdflush_threads_min) | 156 | if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS) |
164 | continue; | 157 | continue; |
165 | pdf = list_entry(pdflush_list.prev, struct pdflush_work, list); | 158 | pdf = list_entry(pdflush_list.prev, struct pdflush_work, list); |
166 | if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) { | 159 | if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) { |
@@ -266,9 +259,9 @@ static int __init pdflush_init(void) | |||
266 | * Pre-set nr_pdflush_threads... If we fail to create, | 259 | * Pre-set nr_pdflush_threads... If we fail to create, |
267 | * the count will be decremented. | 260 | * the count will be decremented. |
268 | */ | 261 | */ |
269 | nr_pdflush_threads = nr_pdflush_threads_min; | 262 | nr_pdflush_threads = MIN_PDFLUSH_THREADS; |
270 | 263 | ||
271 | for (i = 0; i < nr_pdflush_threads_min; i++) | 264 | for (i = 0; i < MIN_PDFLUSH_THREADS; i++) |
272 | start_one_pdflush_thread(); | 265 | start_one_pdflush_thread(); |
273 | return 0; | 266 | return 0; |
274 | } | 267 | } |
diff --git a/mm/shmem.c b/mm/shmem.c index f9cb20ebb990..b25f95ce3db7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -1340,8 +1340,12 @@ repeat: | |||
1340 | shmem_swp_unmap(entry); | 1340 | shmem_swp_unmap(entry); |
1341 | spin_unlock(&info->lock); | 1341 | spin_unlock(&info->lock); |
1342 | if (error == -ENOMEM) { | 1342 | if (error == -ENOMEM) { |
1343 | /* allow reclaim from this memory cgroup */ | 1343 | /* |
1344 | error = mem_cgroup_shrink_usage(swappage, | 1344 | * reclaim from proper memory cgroup and |
1345 | * call memcg's OOM if needed. | ||
1346 | */ | ||
1347 | error = mem_cgroup_shmem_charge_fallback( | ||
1348 | swappage, | ||
1345 | current->mm, | 1349 | current->mm, |
1346 | gfp); | 1350 | gfp); |
1347 | if (error) { | 1351 | if (error) { |
@@ -491,49 +491,6 @@ unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, | |||
491 | 491 | ||
492 | EXPORT_SYMBOL(pagevec_lookup_tag); | 492 | EXPORT_SYMBOL(pagevec_lookup_tag); |
493 | 493 | ||
494 | #ifdef CONFIG_SMP | ||
495 | /* | ||
496 | * We tolerate a little inaccuracy to avoid ping-ponging the counter between | ||
497 | * CPUs | ||
498 | */ | ||
499 | #define ACCT_THRESHOLD max(16, NR_CPUS * 2) | ||
500 | |||
501 | static DEFINE_PER_CPU(long, committed_space); | ||
502 | |||
503 | void vm_acct_memory(long pages) | ||
504 | { | ||
505 | long *local; | ||
506 | |||
507 | preempt_disable(); | ||
508 | local = &__get_cpu_var(committed_space); | ||
509 | *local += pages; | ||
510 | if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { | ||
511 | atomic_long_add(*local, &vm_committed_space); | ||
512 | *local = 0; | ||
513 | } | ||
514 | preempt_enable(); | ||
515 | } | ||
516 | |||
517 | #ifdef CONFIG_HOTPLUG_CPU | ||
518 | |||
519 | /* Drop the CPU's cached committed space back into the central pool. */ | ||
520 | static int cpu_swap_callback(struct notifier_block *nfb, | ||
521 | unsigned long action, | ||
522 | void *hcpu) | ||
523 | { | ||
524 | long *committed; | ||
525 | |||
526 | committed = &per_cpu(committed_space, (long)hcpu); | ||
527 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | ||
528 | atomic_long_add(*committed, &vm_committed_space); | ||
529 | *committed = 0; | ||
530 | drain_cpu_pagevecs((long)hcpu); | ||
531 | } | ||
532 | return NOTIFY_OK; | ||
533 | } | ||
534 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
535 | #endif /* CONFIG_SMP */ | ||
536 | |||
537 | /* | 494 | /* |
538 | * Perform any setup for the swap system | 495 | * Perform any setup for the swap system |
539 | */ | 496 | */ |
@@ -554,7 +511,4 @@ void __init swap_setup(void) | |||
554 | * Right now other parts of the system means that we | 511 | * Right now other parts of the system means that we |
555 | * _really_ don't want to cluster much more | 512 | * _really_ don't want to cluster much more |
556 | */ | 513 | */ |
557 | #ifdef CONFIG_HOTPLUG_CPU | ||
558 | hotcpu_notifier(cpu_swap_callback, 0); | ||
559 | #endif | ||
560 | } | 514 | } |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index fab19876b4d1..083716ea38c9 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -402,6 +402,7 @@ overflow: | |||
402 | printk(KERN_WARNING | 402 | printk(KERN_WARNING |
403 | "vmap allocation for size %lu failed: " | 403 | "vmap allocation for size %lu failed: " |
404 | "use vmalloc=<size> to increase size.\n", size); | 404 | "use vmalloc=<size> to increase size.\n", size); |
405 | kfree(va); | ||
405 | return ERR_PTR(-EBUSY); | 406 | return ERR_PTR(-EBUSY); |
406 | } | 407 | } |
407 | 408 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index eac9577941f9..5fa3eda1f03f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1471,7 +1471,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1471 | 1471 | ||
1472 | for_each_evictable_lru(l) { | 1472 | for_each_evictable_lru(l) { |
1473 | int file = is_file_lru(l); | 1473 | int file = is_file_lru(l); |
1474 | int scan; | 1474 | unsigned long scan; |
1475 | 1475 | ||
1476 | scan = zone_nr_pages(zone, sc, l); | 1476 | scan = zone_nr_pages(zone, sc, l); |
1477 | if (priority) { | 1477 | if (priority) { |