diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 30 | ||||
-rw-r--r-- | mm/filemap.c | 5 | ||||
-rw-r--r-- | mm/madvise.c | 8 | ||||
-rw-r--r-- | mm/memcontrol.c | 40 | ||||
-rw-r--r-- | mm/memory.c | 112 | ||||
-rw-r--r-- | mm/mmap.c | 14 | ||||
-rw-r--r-- | mm/nommu.c | 17 | ||||
-rw-r--r-- | mm/oom_kill.c | 44 | ||||
-rw-r--r-- | mm/page_alloc.c | 20 | ||||
-rw-r--r-- | mm/shmem.c | 35 | ||||
-rw-r--r-- | mm/swap.c | 46 | ||||
-rw-r--r-- | mm/util.c | 16 | ||||
-rw-r--r-- | mm/vmalloc.c | 1 | ||||
-rw-r--r-- | mm/vmscan.c | 19 |
14 files changed, 245 insertions, 162 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index b53427ad30a3..c2b57d81e153 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -213,6 +213,8 @@ config UNEVICTABLE_LRU | |||
213 | will use one page flag and increase the code size a little, | 213 | will use one page flag and increase the code size a little, |
214 | say Y unless you know what you are doing. | 214 | say Y unless you know what you are doing. |
215 | 215 | ||
216 | See Documentation/vm/unevictable-lru.txt for more information. | ||
217 | |||
216 | config HAVE_MLOCK | 218 | config HAVE_MLOCK |
217 | bool | 219 | bool |
218 | default y if MMU=y | 220 | default y if MMU=y |
@@ -223,3 +225,31 @@ config HAVE_MLOCKED_PAGE_BIT | |||
223 | 225 | ||
224 | config MMU_NOTIFIER | 226 | config MMU_NOTIFIER |
225 | bool | 227 | bool |
228 | |||
229 | config NOMMU_INITIAL_TRIM_EXCESS | ||
230 | int "Turn on mmap() excess space trimming before booting" | ||
231 | depends on !MMU | ||
232 | default 1 | ||
233 | help | ||
234 | The NOMMU mmap() frequently needs to allocate large contiguous chunks | ||
235 | of memory on which to store mappings, but it can only ask the system | ||
236 | allocator for chunks in 2^N*PAGE_SIZE amounts - which is frequently | ||
237 | more than it requires. To deal with this, mmap() is able to trim off | ||
238 | the excess and return it to the allocator. | ||
239 | |||
240 | If trimming is enabled, the excess is trimmed off and returned to the | ||
241 | system allocator, which can cause extra fragmentation, particularly | ||
242 | if there are a lot of transient processes. | ||
243 | |||
244 | If trimming is disabled, the excess is kept, but not used, which for | ||
245 | long-term mappings means that the space is wasted. | ||
246 | |||
247 | Trimming can be dynamically controlled through a sysctl option | ||
248 | (/proc/sys/vm/nr_trim_pages) which specifies the minimum number of | ||
249 | excess pages there must be before trimming should occur, or zero if | ||
250 | no trimming is to occur. | ||
251 | |||
252 | This option specifies the initial value of this option. The default | ||
253 | of 1 says that all excess pages should be trimmed. | ||
254 | |||
255 | See Documentation/nommu-mmap.txt for more information. | ||
diff --git a/mm/filemap.c b/mm/filemap.c index 2e2d38ebda4b..379ff0bcbf6e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -441,6 +441,7 @@ int filemap_write_and_wait_range(struct address_space *mapping, | |||
441 | } | 441 | } |
442 | return err; | 442 | return err; |
443 | } | 443 | } |
444 | EXPORT_SYMBOL(filemap_write_and_wait_range); | ||
444 | 445 | ||
445 | /** | 446 | /** |
446 | * add_to_page_cache_locked - add a locked page to the pagecache | 447 | * add_to_page_cache_locked - add a locked page to the pagecache |
@@ -567,8 +568,8 @@ EXPORT_SYMBOL(wait_on_page_bit); | |||
567 | 568 | ||
568 | /** | 569 | /** |
569 | * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue | 570 | * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue |
570 | * @page - Page defining the wait queue of interest | 571 | * @page: Page defining the wait queue of interest |
571 | * @waiter - Waiter to add to the queue | 572 | * @waiter: Waiter to add to the queue |
572 | * | 573 | * |
573 | * Add an arbitrary @waiter to the wait queue for the nominated @page. | 574 | * Add an arbitrary @waiter to the wait queue for the nominated @page. |
574 | */ | 575 | */ |
diff --git a/mm/madvise.c b/mm/madvise.c index b9ce574827c8..36d6ea2b6340 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -112,6 +112,14 @@ static long madvise_willneed(struct vm_area_struct * vma, | |||
112 | if (!file) | 112 | if (!file) |
113 | return -EBADF; | 113 | return -EBADF; |
114 | 114 | ||
115 | /* | ||
116 | * Page cache readahead assumes page cache pages are order-0 which | ||
117 | * is not the case for hugetlbfs. Do not give a bad return value | ||
118 | * but ignore the advice. | ||
119 | */ | ||
120 | if (vma->vm_flags & VM_HUGETLB) | ||
121 | return 0; | ||
122 | |||
115 | if (file->f_mapping->a_ops->get_xip_mem) { | 123 | if (file->f_mapping->a_ops->get_xip_mem) { |
116 | /* no bad return value, but ignore advice */ | 124 | /* no bad return value, but ignore advice */ |
117 | return 0; | 125 | return 0; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2fc6d6c48238..01c2d8f14685 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -932,7 +932,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
932 | if (unlikely(!mem)) | 932 | if (unlikely(!mem)) |
933 | return 0; | 933 | return 0; |
934 | 934 | ||
935 | VM_BUG_ON(mem_cgroup_is_obsolete(mem)); | 935 | VM_BUG_ON(!mem || mem_cgroup_is_obsolete(mem)); |
936 | 936 | ||
937 | while (1) { | 937 | while (1) { |
938 | int ret; | 938 | int ret; |
@@ -1024,9 +1024,7 @@ static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page) | |||
1024 | return NULL; | 1024 | return NULL; |
1025 | 1025 | ||
1026 | pc = lookup_page_cgroup(page); | 1026 | pc = lookup_page_cgroup(page); |
1027 | /* | 1027 | lock_page_cgroup(pc); |
1028 | * Used bit of swapcache is solid under page lock. | ||
1029 | */ | ||
1030 | if (PageCgroupUsed(pc)) { | 1028 | if (PageCgroupUsed(pc)) { |
1031 | mem = pc->mem_cgroup; | 1029 | mem = pc->mem_cgroup; |
1032 | if (mem && !css_tryget(&mem->css)) | 1030 | if (mem && !css_tryget(&mem->css)) |
@@ -1040,6 +1038,7 @@ static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page) | |||
1040 | mem = NULL; | 1038 | mem = NULL; |
1041 | rcu_read_unlock(); | 1039 | rcu_read_unlock(); |
1042 | } | 1040 | } |
1041 | unlock_page_cgroup(pc); | ||
1043 | return mem; | 1042 | return mem; |
1044 | } | 1043 | } |
1045 | 1044 | ||
@@ -1618,37 +1617,28 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, | |||
1618 | } | 1617 | } |
1619 | 1618 | ||
1620 | /* | 1619 | /* |
1621 | * A call to try to shrink memory usage under specified resource controller. | 1620 | * A call to try to shrink memory usage on charge failure at shmem's swapin. |
1622 | * This is typically used for page reclaiming for shmem for reducing side | 1621 | * Calling hierarchical_reclaim is not enough because we should update |
1623 | * effect of page allocation from shmem, which is used by some mem_cgroup. | 1622 | * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM. |
1623 | * Moreover considering hierarchy, we should reclaim from the mem_over_limit, | ||
1624 | * not from the memcg which this page would be charged to. | ||
1625 | * try_charge_swapin does all of these works properly. | ||
1624 | */ | 1626 | */ |
1625 | int mem_cgroup_shrink_usage(struct page *page, | 1627 | int mem_cgroup_shmem_charge_fallback(struct page *page, |
1626 | struct mm_struct *mm, | 1628 | struct mm_struct *mm, |
1627 | gfp_t gfp_mask) | 1629 | gfp_t gfp_mask) |
1628 | { | 1630 | { |
1629 | struct mem_cgroup *mem = NULL; | 1631 | struct mem_cgroup *mem = NULL; |
1630 | int progress = 0; | 1632 | int ret; |
1631 | int retry = MEM_CGROUP_RECLAIM_RETRIES; | ||
1632 | 1633 | ||
1633 | if (mem_cgroup_disabled()) | 1634 | if (mem_cgroup_disabled()) |
1634 | return 0; | 1635 | return 0; |
1635 | if (page) | ||
1636 | mem = try_get_mem_cgroup_from_swapcache(page); | ||
1637 | if (!mem && mm) | ||
1638 | mem = try_get_mem_cgroup_from_mm(mm); | ||
1639 | if (unlikely(!mem)) | ||
1640 | return 0; | ||
1641 | 1636 | ||
1642 | do { | 1637 | ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); |
1643 | progress = mem_cgroup_hierarchical_reclaim(mem, | 1638 | if (!ret) |
1644 | gfp_mask, true, false); | 1639 | mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */ |
1645 | progress += mem_cgroup_check_under_limit(mem); | ||
1646 | } while (!progress && --retry); | ||
1647 | 1640 | ||
1648 | css_put(&mem->css); | 1641 | return ret; |
1649 | if (!retry) | ||
1650 | return -ENOMEM; | ||
1651 | return 0; | ||
1652 | } | 1642 | } |
1653 | 1643 | ||
1654 | static DEFINE_MUTEX(set_limit_mutex); | 1644 | static DEFINE_MUTEX(set_limit_mutex); |
diff --git a/mm/memory.c b/mm/memory.c index cf6873e91c6a..4126dd16778c 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1971,6 +1971,15 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1971 | ret = tmp; | 1971 | ret = tmp; |
1972 | goto unwritable_page; | 1972 | goto unwritable_page; |
1973 | } | 1973 | } |
1974 | if (unlikely(!(tmp & VM_FAULT_LOCKED))) { | ||
1975 | lock_page(old_page); | ||
1976 | if (!old_page->mapping) { | ||
1977 | ret = 0; /* retry the fault */ | ||
1978 | unlock_page(old_page); | ||
1979 | goto unwritable_page; | ||
1980 | } | ||
1981 | } else | ||
1982 | VM_BUG_ON(!PageLocked(old_page)); | ||
1974 | 1983 | ||
1975 | /* | 1984 | /* |
1976 | * Since we dropped the lock we need to revalidate | 1985 | * Since we dropped the lock we need to revalidate |
@@ -1980,9 +1989,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1980 | */ | 1989 | */ |
1981 | page_table = pte_offset_map_lock(mm, pmd, address, | 1990 | page_table = pte_offset_map_lock(mm, pmd, address, |
1982 | &ptl); | 1991 | &ptl); |
1983 | page_cache_release(old_page); | 1992 | if (!pte_same(*page_table, orig_pte)) { |
1984 | if (!pte_same(*page_table, orig_pte)) | 1993 | unlock_page(old_page); |
1994 | page_cache_release(old_page); | ||
1985 | goto unlock; | 1995 | goto unlock; |
1996 | } | ||
1986 | 1997 | ||
1987 | page_mkwrite = 1; | 1998 | page_mkwrite = 1; |
1988 | } | 1999 | } |
@@ -2094,9 +2105,6 @@ gotten: | |||
2094 | unlock: | 2105 | unlock: |
2095 | pte_unmap_unlock(page_table, ptl); | 2106 | pte_unmap_unlock(page_table, ptl); |
2096 | if (dirty_page) { | 2107 | if (dirty_page) { |
2097 | if (vma->vm_file) | ||
2098 | file_update_time(vma->vm_file); | ||
2099 | |||
2100 | /* | 2108 | /* |
2101 | * Yes, Virginia, this is actually required to prevent a race | 2109 | * Yes, Virginia, this is actually required to prevent a race |
2102 | * with clear_page_dirty_for_io() from clearing the page dirty | 2110 | * with clear_page_dirty_for_io() from clearing the page dirty |
@@ -2105,16 +2113,41 @@ unlock: | |||
2105 | * | 2113 | * |
2106 | * do_no_page is protected similarly. | 2114 | * do_no_page is protected similarly. |
2107 | */ | 2115 | */ |
2108 | wait_on_page_locked(dirty_page); | 2116 | if (!page_mkwrite) { |
2109 | set_page_dirty_balance(dirty_page, page_mkwrite); | 2117 | wait_on_page_locked(dirty_page); |
2118 | set_page_dirty_balance(dirty_page, page_mkwrite); | ||
2119 | } | ||
2110 | put_page(dirty_page); | 2120 | put_page(dirty_page); |
2121 | if (page_mkwrite) { | ||
2122 | struct address_space *mapping = dirty_page->mapping; | ||
2123 | |||
2124 | set_page_dirty(dirty_page); | ||
2125 | unlock_page(dirty_page); | ||
2126 | page_cache_release(dirty_page); | ||
2127 | if (mapping) { | ||
2128 | /* | ||
2129 | * Some device drivers do not set page.mapping | ||
2130 | * but still dirty their pages | ||
2131 | */ | ||
2132 | balance_dirty_pages_ratelimited(mapping); | ||
2133 | } | ||
2134 | } | ||
2135 | |||
2136 | /* file_update_time outside page_lock */ | ||
2137 | if (vma->vm_file) | ||
2138 | file_update_time(vma->vm_file); | ||
2111 | } | 2139 | } |
2112 | return ret; | 2140 | return ret; |
2113 | oom_free_new: | 2141 | oom_free_new: |
2114 | page_cache_release(new_page); | 2142 | page_cache_release(new_page); |
2115 | oom: | 2143 | oom: |
2116 | if (old_page) | 2144 | if (old_page) { |
2145 | if (page_mkwrite) { | ||
2146 | unlock_page(old_page); | ||
2147 | page_cache_release(old_page); | ||
2148 | } | ||
2117 | page_cache_release(old_page); | 2149 | page_cache_release(old_page); |
2150 | } | ||
2118 | return VM_FAULT_OOM; | 2151 | return VM_FAULT_OOM; |
2119 | 2152 | ||
2120 | unwritable_page: | 2153 | unwritable_page: |
@@ -2458,8 +2491,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2458 | 2491 | ||
2459 | if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { | 2492 | if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { |
2460 | ret = VM_FAULT_OOM; | 2493 | ret = VM_FAULT_OOM; |
2461 | unlock_page(page); | 2494 | goto out_page; |
2462 | goto out; | ||
2463 | } | 2495 | } |
2464 | 2496 | ||
2465 | /* | 2497 | /* |
@@ -2521,6 +2553,7 @@ out: | |||
2521 | out_nomap: | 2553 | out_nomap: |
2522 | mem_cgroup_cancel_charge_swapin(ptr); | 2554 | mem_cgroup_cancel_charge_swapin(ptr); |
2523 | pte_unmap_unlock(page_table, ptl); | 2555 | pte_unmap_unlock(page_table, ptl); |
2556 | out_page: | ||
2524 | unlock_page(page); | 2557 | unlock_page(page); |
2525 | page_cache_release(page); | 2558 | page_cache_release(page); |
2526 | return ret; | 2559 | return ret; |
@@ -2664,27 +2697,22 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2664 | int tmp; | 2697 | int tmp; |
2665 | 2698 | ||
2666 | unlock_page(page); | 2699 | unlock_page(page); |
2667 | vmf.flags |= FAULT_FLAG_MKWRITE; | 2700 | vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; |
2668 | tmp = vma->vm_ops->page_mkwrite(vma, &vmf); | 2701 | tmp = vma->vm_ops->page_mkwrite(vma, &vmf); |
2669 | if (unlikely(tmp & | 2702 | if (unlikely(tmp & |
2670 | (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { | 2703 | (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { |
2671 | ret = tmp; | 2704 | ret = tmp; |
2672 | anon = 1; /* no anon but release vmf.page */ | 2705 | goto unwritable_page; |
2673 | goto out_unlocked; | ||
2674 | } | ||
2675 | lock_page(page); | ||
2676 | /* | ||
2677 | * XXX: this is not quite right (racy vs | ||
2678 | * invalidate) to unlock and relock the page | ||
2679 | * like this, however a better fix requires | ||
2680 | * reworking page_mkwrite locking API, which | ||
2681 | * is better done later. | ||
2682 | */ | ||
2683 | if (!page->mapping) { | ||
2684 | ret = 0; | ||
2685 | anon = 1; /* no anon but release vmf.page */ | ||
2686 | goto out; | ||
2687 | } | 2706 | } |
2707 | if (unlikely(!(tmp & VM_FAULT_LOCKED))) { | ||
2708 | lock_page(page); | ||
2709 | if (!page->mapping) { | ||
2710 | ret = 0; /* retry the fault */ | ||
2711 | unlock_page(page); | ||
2712 | goto unwritable_page; | ||
2713 | } | ||
2714 | } else | ||
2715 | VM_BUG_ON(!PageLocked(page)); | ||
2688 | page_mkwrite = 1; | 2716 | page_mkwrite = 1; |
2689 | } | 2717 | } |
2690 | } | 2718 | } |
@@ -2736,19 +2764,35 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2736 | pte_unmap_unlock(page_table, ptl); | 2764 | pte_unmap_unlock(page_table, ptl); |
2737 | 2765 | ||
2738 | out: | 2766 | out: |
2739 | unlock_page(vmf.page); | 2767 | if (dirty_page) { |
2740 | out_unlocked: | 2768 | struct address_space *mapping = page->mapping; |
2741 | if (anon) | ||
2742 | page_cache_release(vmf.page); | ||
2743 | else if (dirty_page) { | ||
2744 | if (vma->vm_file) | ||
2745 | file_update_time(vma->vm_file); | ||
2746 | 2769 | ||
2747 | set_page_dirty_balance(dirty_page, page_mkwrite); | 2770 | if (set_page_dirty(dirty_page)) |
2771 | page_mkwrite = 1; | ||
2772 | unlock_page(dirty_page); | ||
2748 | put_page(dirty_page); | 2773 | put_page(dirty_page); |
2774 | if (page_mkwrite && mapping) { | ||
2775 | /* | ||
2776 | * Some device drivers do not set page.mapping but still | ||
2777 | * dirty their pages | ||
2778 | */ | ||
2779 | balance_dirty_pages_ratelimited(mapping); | ||
2780 | } | ||
2781 | |||
2782 | /* file_update_time outside page_lock */ | ||
2783 | if (vma->vm_file) | ||
2784 | file_update_time(vma->vm_file); | ||
2785 | } else { | ||
2786 | unlock_page(vmf.page); | ||
2787 | if (anon) | ||
2788 | page_cache_release(vmf.page); | ||
2749 | } | 2789 | } |
2750 | 2790 | ||
2751 | return ret; | 2791 | return ret; |
2792 | |||
2793 | unwritable_page: | ||
2794 | page_cache_release(page); | ||
2795 | return ret; | ||
2752 | } | 2796 | } |
2753 | 2797 | ||
2754 | static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 2798 | static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
@@ -85,7 +85,7 @@ EXPORT_SYMBOL(vm_get_page_prot); | |||
85 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ | 85 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ |
86 | int sysctl_overcommit_ratio = 50; /* default is 50% */ | 86 | int sysctl_overcommit_ratio = 50; /* default is 50% */ |
87 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; | 87 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; |
88 | atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); | 88 | struct percpu_counter vm_committed_as; |
89 | 89 | ||
90 | /* | 90 | /* |
91 | * Check that a process has enough memory to allocate a new virtual | 91 | * Check that a process has enough memory to allocate a new virtual |
@@ -179,11 +179,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
179 | if (mm) | 179 | if (mm) |
180 | allowed -= mm->total_vm / 32; | 180 | allowed -= mm->total_vm / 32; |
181 | 181 | ||
182 | /* | 182 | if (percpu_counter_read_positive(&vm_committed_as) < allowed) |
183 | * cast `allowed' as a signed long because vm_committed_space | ||
184 | * sometimes has a negative value | ||
185 | */ | ||
186 | if (atomic_long_read(&vm_committed_space) < (long)allowed) | ||
187 | return 0; | 183 | return 0; |
188 | error: | 184 | error: |
189 | vm_unacct_memory(pages); | 185 | vm_unacct_memory(pages); |
@@ -1575,7 +1571,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns | |||
1575 | * Overcommit.. This must be the final test, as it will | 1571 | * Overcommit.. This must be the final test, as it will |
1576 | * update security statistics. | 1572 | * update security statistics. |
1577 | */ | 1573 | */ |
1578 | if (security_vm_enough_memory(grow)) | 1574 | if (security_vm_enough_memory_mm(mm, grow)) |
1579 | return -ENOMEM; | 1575 | return -ENOMEM; |
1580 | 1576 | ||
1581 | /* Ok, everything looks good - let it rip */ | 1577 | /* Ok, everything looks good - let it rip */ |
@@ -2481,4 +2477,8 @@ void mm_drop_all_locks(struct mm_struct *mm) | |||
2481 | */ | 2477 | */ |
2482 | void __init mmap_init(void) | 2478 | void __init mmap_init(void) |
2483 | { | 2479 | { |
2480 | int ret; | ||
2481 | |||
2482 | ret = percpu_counter_init(&vm_committed_as, 0); | ||
2483 | VM_BUG_ON(ret); | ||
2484 | } | 2484 | } |
diff --git a/mm/nommu.c b/mm/nommu.c index 72eda4aee2cb..b571ef707428 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -62,11 +62,11 @@ void *high_memory; | |||
62 | struct page *mem_map; | 62 | struct page *mem_map; |
63 | unsigned long max_mapnr; | 63 | unsigned long max_mapnr; |
64 | unsigned long num_physpages; | 64 | unsigned long num_physpages; |
65 | atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); | 65 | struct percpu_counter vm_committed_as; |
66 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ | 66 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ |
67 | int sysctl_overcommit_ratio = 50; /* default is 50% */ | 67 | int sysctl_overcommit_ratio = 50; /* default is 50% */ |
68 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; | 68 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; |
69 | int sysctl_nr_trim_pages = 1; /* page trimming behaviour */ | 69 | int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; |
70 | int heap_stack_gap = 0; | 70 | int heap_stack_gap = 0; |
71 | 71 | ||
72 | atomic_long_t mmap_pages_allocated; | 72 | atomic_long_t mmap_pages_allocated; |
@@ -463,6 +463,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) | |||
463 | */ | 463 | */ |
464 | void __init mmap_init(void) | 464 | void __init mmap_init(void) |
465 | { | 465 | { |
466 | int ret; | ||
467 | |||
468 | ret = percpu_counter_init(&vm_committed_as, 0); | ||
469 | VM_BUG_ON(ret); | ||
466 | vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); | 470 | vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); |
467 | } | 471 | } |
468 | 472 | ||
@@ -511,8 +515,6 @@ static void add_nommu_region(struct vm_region *region) | |||
511 | 515 | ||
512 | validate_nommu_regions(); | 516 | validate_nommu_regions(); |
513 | 517 | ||
514 | BUG_ON(region->vm_start & ~PAGE_MASK); | ||
515 | |||
516 | parent = NULL; | 518 | parent = NULL; |
517 | p = &nommu_region_tree.rb_node; | 519 | p = &nommu_region_tree.rb_node; |
518 | while (*p) { | 520 | while (*p) { |
@@ -1847,12 +1849,9 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
1847 | if (mm) | 1849 | if (mm) |
1848 | allowed -= mm->total_vm / 32; | 1850 | allowed -= mm->total_vm / 32; |
1849 | 1851 | ||
1850 | /* | 1852 | if (percpu_counter_read_positive(&vm_committed_as) < allowed) |
1851 | * cast `allowed' as a signed long because vm_committed_space | ||
1852 | * sometimes has a negative value | ||
1853 | */ | ||
1854 | if (atomic_long_read(&vm_committed_space) < (long)allowed) | ||
1855 | return 0; | 1853 | return 0; |
1854 | |||
1856 | error: | 1855 | error: |
1857 | vm_unacct_memory(pages); | 1856 | vm_unacct_memory(pages); |
1858 | 1857 | ||
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 2f3166e308d9..92bcf1db16b2 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -514,34 +514,32 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) | |||
514 | */ | 514 | */ |
515 | static void __out_of_memory(gfp_t gfp_mask, int order) | 515 | static void __out_of_memory(gfp_t gfp_mask, int order) |
516 | { | 516 | { |
517 | if (sysctl_oom_kill_allocating_task) { | 517 | struct task_struct *p; |
518 | oom_kill_process(current, gfp_mask, order, 0, NULL, | 518 | unsigned long points; |
519 | "Out of memory (oom_kill_allocating_task)"); | ||
520 | |||
521 | } else { | ||
522 | unsigned long points; | ||
523 | struct task_struct *p; | ||
524 | |||
525 | retry: | ||
526 | /* | ||
527 | * Rambo mode: Shoot down a process and hope it solves whatever | ||
528 | * issues we may have. | ||
529 | */ | ||
530 | p = select_bad_process(&points, NULL); | ||
531 | 519 | ||
532 | if (PTR_ERR(p) == -1UL) | 520 | if (sysctl_oom_kill_allocating_task) |
521 | if (!oom_kill_process(current, gfp_mask, order, 0, NULL, | ||
522 | "Out of memory (oom_kill_allocating_task)")) | ||
533 | return; | 523 | return; |
524 | retry: | ||
525 | /* | ||
526 | * Rambo mode: Shoot down a process and hope it solves whatever | ||
527 | * issues we may have. | ||
528 | */ | ||
529 | p = select_bad_process(&points, NULL); | ||
534 | 530 | ||
535 | /* Found nothing?!?! Either we hang forever, or we panic. */ | 531 | if (PTR_ERR(p) == -1UL) |
536 | if (!p) { | 532 | return; |
537 | read_unlock(&tasklist_lock); | ||
538 | panic("Out of memory and no killable processes...\n"); | ||
539 | } | ||
540 | 533 | ||
541 | if (oom_kill_process(p, gfp_mask, order, points, NULL, | 534 | /* Found nothing?!?! Either we hang forever, or we panic. */ |
542 | "Out of memory")) | 535 | if (!p) { |
543 | goto retry; | 536 | read_unlock(&tasklist_lock); |
537 | panic("Out of memory and no killable processes...\n"); | ||
544 | } | 538 | } |
539 | |||
540 | if (oom_kill_process(p, gfp_mask, order, points, NULL, | ||
541 | "Out of memory")) | ||
542 | goto retry; | ||
545 | } | 543 | } |
546 | 544 | ||
547 | /* | 545 | /* |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e2f26991fff1..fe753ecf2aa5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2681,6 +2681,7 @@ static void __meminit zone_init_free_lists(struct zone *zone) | |||
2681 | 2681 | ||
2682 | static int zone_batchsize(struct zone *zone) | 2682 | static int zone_batchsize(struct zone *zone) |
2683 | { | 2683 | { |
2684 | #ifdef CONFIG_MMU | ||
2684 | int batch; | 2685 | int batch; |
2685 | 2686 | ||
2686 | /* | 2687 | /* |
@@ -2706,9 +2707,26 @@ static int zone_batchsize(struct zone *zone) | |||
2706 | * of pages of one half of the possible page colors | 2707 | * of pages of one half of the possible page colors |
2707 | * and the other with pages of the other colors. | 2708 | * and the other with pages of the other colors. |
2708 | */ | 2709 | */ |
2709 | batch = (1 << (fls(batch + batch/2)-1)) - 1; | 2710 | batch = rounddown_pow_of_two(batch + batch/2) - 1; |
2710 | 2711 | ||
2711 | return batch; | 2712 | return batch; |
2713 | |||
2714 | #else | ||
2715 | /* The deferral and batching of frees should be suppressed under NOMMU | ||
2716 | * conditions. | ||
2717 | * | ||
2718 | * The problem is that NOMMU needs to be able to allocate large chunks | ||
2719 | * of contiguous memory as there's no hardware page translation to | ||
2720 | * assemble apparent contiguous memory from discontiguous pages. | ||
2721 | * | ||
2722 | * Queueing large contiguous runs of pages for batching, however, | ||
2723 | * causes the pages to actually be freed in smaller chunks. As there | ||
2724 | * can be a significant delay between the individual batches being | ||
2725 | * recycled, this leads to the once large chunks of space being | ||
2726 | * fragmented and becoming unavailable for high-order allocations. | ||
2727 | */ | ||
2728 | return 0; | ||
2729 | #endif | ||
2712 | } | 2730 | } |
2713 | 2731 | ||
2714 | static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) | 2732 | static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) |
diff --git a/mm/shmem.c b/mm/shmem.c index d94d2e9146bc..b25f95ce3db7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/init.h> | 24 | #include <linux/init.h> |
25 | #include <linux/vfs.h> | 25 | #include <linux/vfs.h> |
26 | #include <linux/mount.h> | 26 | #include <linux/mount.h> |
27 | #include <linux/pagemap.h> | ||
27 | #include <linux/file.h> | 28 | #include <linux/file.h> |
28 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
29 | #include <linux/module.h> | 30 | #include <linux/module.h> |
@@ -43,7 +44,6 @@ static struct vfsmount *shm_mnt; | |||
43 | #include <linux/exportfs.h> | 44 | #include <linux/exportfs.h> |
44 | #include <linux/generic_acl.h> | 45 | #include <linux/generic_acl.h> |
45 | #include <linux/mman.h> | 46 | #include <linux/mman.h> |
46 | #include <linux/pagemap.h> | ||
47 | #include <linux/string.h> | 47 | #include <linux/string.h> |
48 | #include <linux/slab.h> | 48 | #include <linux/slab.h> |
49 | #include <linux/backing-dev.h> | 49 | #include <linux/backing-dev.h> |
@@ -65,13 +65,28 @@ static struct vfsmount *shm_mnt; | |||
65 | #include <asm/div64.h> | 65 | #include <asm/div64.h> |
66 | #include <asm/pgtable.h> | 66 | #include <asm/pgtable.h> |
67 | 67 | ||
68 | /* | ||
69 | * The maximum size of a shmem/tmpfs file is limited by the maximum size of | ||
70 | * its triple-indirect swap vector - see illustration at shmem_swp_entry(). | ||
71 | * | ||
72 | * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel, | ||
73 | * but one eighth of that on a 64-bit kernel. With 8kB page size, maximum | ||
74 | * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel, | ||
75 | * MAX_LFS_FILESIZE being then more restrictive than swap vector layout. | ||
76 | * | ||
77 | * We use / and * instead of shifts in the definitions below, so that the swap | ||
78 | * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE. | ||
79 | */ | ||
68 | #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) | 80 | #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) |
69 | #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) | 81 | #define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) |
70 | #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) | ||
71 | 82 | ||
72 | #define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) | 83 | #define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) |
73 | #define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT) | 84 | #define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT) |
74 | 85 | ||
86 | #define SHMEM_MAX_BYTES min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE) | ||
87 | #define SHMEM_MAX_INDEX ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT)) | ||
88 | |||
89 | #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) | ||
75 | #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) | 90 | #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) |
76 | 91 | ||
77 | /* info->flags needs VM_flags to handle pagein/truncate races efficiently */ | 92 | /* info->flags needs VM_flags to handle pagein/truncate races efficiently */ |
@@ -1325,8 +1340,12 @@ repeat: | |||
1325 | shmem_swp_unmap(entry); | 1340 | shmem_swp_unmap(entry); |
1326 | spin_unlock(&info->lock); | 1341 | spin_unlock(&info->lock); |
1327 | if (error == -ENOMEM) { | 1342 | if (error == -ENOMEM) { |
1328 | /* allow reclaim from this memory cgroup */ | 1343 | /* |
1329 | error = mem_cgroup_shrink_usage(swappage, | 1344 | * reclaim from proper memory cgroup and |
1345 | * call memcg's OOM if needed. | ||
1346 | */ | ||
1347 | error = mem_cgroup_shmem_charge_fallback( | ||
1348 | swappage, | ||
1330 | current->mm, | 1349 | current->mm, |
1331 | gfp); | 1350 | gfp); |
1332 | if (error) { | 1351 | if (error) { |
@@ -2581,7 +2600,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page) | |||
2581 | #define shmem_get_inode(sb, mode, dev, flags) ramfs_get_inode(sb, mode, dev) | 2600 | #define shmem_get_inode(sb, mode, dev, flags) ramfs_get_inode(sb, mode, dev) |
2582 | #define shmem_acct_size(flags, size) 0 | 2601 | #define shmem_acct_size(flags, size) 0 |
2583 | #define shmem_unacct_size(flags, size) do {} while (0) | 2602 | #define shmem_unacct_size(flags, size) do {} while (0) |
2584 | #define SHMEM_MAX_BYTES LLONG_MAX | 2603 | #define SHMEM_MAX_BYTES MAX_LFS_FILESIZE |
2585 | 2604 | ||
2586 | #endif /* CONFIG_SHMEM */ | 2605 | #endif /* CONFIG_SHMEM */ |
2587 | 2606 | ||
@@ -491,49 +491,6 @@ unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, | |||
491 | 491 | ||
492 | EXPORT_SYMBOL(pagevec_lookup_tag); | 492 | EXPORT_SYMBOL(pagevec_lookup_tag); |
493 | 493 | ||
494 | #ifdef CONFIG_SMP | ||
495 | /* | ||
496 | * We tolerate a little inaccuracy to avoid ping-ponging the counter between | ||
497 | * CPUs | ||
498 | */ | ||
499 | #define ACCT_THRESHOLD max(16, NR_CPUS * 2) | ||
500 | |||
501 | static DEFINE_PER_CPU(long, committed_space); | ||
502 | |||
503 | void vm_acct_memory(long pages) | ||
504 | { | ||
505 | long *local; | ||
506 | |||
507 | preempt_disable(); | ||
508 | local = &__get_cpu_var(committed_space); | ||
509 | *local += pages; | ||
510 | if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { | ||
511 | atomic_long_add(*local, &vm_committed_space); | ||
512 | *local = 0; | ||
513 | } | ||
514 | preempt_enable(); | ||
515 | } | ||
516 | |||
517 | #ifdef CONFIG_HOTPLUG_CPU | ||
518 | |||
519 | /* Drop the CPU's cached committed space back into the central pool. */ | ||
520 | static int cpu_swap_callback(struct notifier_block *nfb, | ||
521 | unsigned long action, | ||
522 | void *hcpu) | ||
523 | { | ||
524 | long *committed; | ||
525 | |||
526 | committed = &per_cpu(committed_space, (long)hcpu); | ||
527 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | ||
528 | atomic_long_add(*committed, &vm_committed_space); | ||
529 | *committed = 0; | ||
530 | drain_cpu_pagevecs((long)hcpu); | ||
531 | } | ||
532 | return NOTIFY_OK; | ||
533 | } | ||
534 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
535 | #endif /* CONFIG_SMP */ | ||
536 | |||
537 | /* | 494 | /* |
538 | * Perform any setup for the swap system | 495 | * Perform any setup for the swap system |
539 | */ | 496 | */ |
@@ -554,7 +511,4 @@ void __init swap_setup(void) | |||
554 | * Right now other parts of the system means that we | 511 | * Right now other parts of the system means that we |
555 | * _really_ don't want to cluster much more | 512 | * _really_ don't want to cluster much more |
556 | */ | 513 | */ |
557 | #ifdef CONFIG_HOTPLUG_CPU | ||
558 | hotcpu_notifier(cpu_swap_callback, 0); | ||
559 | #endif | ||
560 | } | 514 | } |
@@ -223,6 +223,22 @@ void arch_pick_mmap_layout(struct mm_struct *mm) | |||
223 | } | 223 | } |
224 | #endif | 224 | #endif |
225 | 225 | ||
226 | /** | ||
227 | * get_user_pages_fast() - pin user pages in memory | ||
228 | * @start: starting user address | ||
229 | * @nr_pages: number of pages from start to pin | ||
230 | * @write: whether pages will be written to | ||
231 | * @pages: array that receives pointers to the pages pinned. | ||
232 | * Should be at least nr_pages long. | ||
233 | * | ||
234 | * Attempt to pin user pages in memory without taking mm->mmap_sem. | ||
235 | * If not successful, it will fall back to taking the lock and | ||
236 | * calling get_user_pages(). | ||
237 | * | ||
238 | * Returns number of pages pinned. This may be fewer than the number | ||
239 | * requested. If nr_pages is 0 or negative, returns 0. If no pages | ||
240 | * were pinned, returns -errno. | ||
241 | */ | ||
226 | int __attribute__((weak)) get_user_pages_fast(unsigned long start, | 242 | int __attribute__((weak)) get_user_pages_fast(unsigned long start, |
227 | int nr_pages, int write, struct page **pages) | 243 | int nr_pages, int write, struct page **pages) |
228 | { | 244 | { |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index fab19876b4d1..083716ea38c9 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -402,6 +402,7 @@ overflow: | |||
402 | printk(KERN_WARNING | 402 | printk(KERN_WARNING |
403 | "vmap allocation for size %lu failed: " | 403 | "vmap allocation for size %lu failed: " |
404 | "use vmalloc=<size> to increase size.\n", size); | 404 | "use vmalloc=<size> to increase size.\n", size); |
405 | kfree(va); | ||
405 | return ERR_PTR(-EBUSY); | 406 | return ERR_PTR(-EBUSY); |
406 | } | 407 | } |
407 | 408 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index 39fdfb14eeaa..5fa3eda1f03f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -63,6 +63,9 @@ struct scan_control { | |||
63 | /* Can mapped pages be reclaimed? */ | 63 | /* Can mapped pages be reclaimed? */ |
64 | int may_unmap; | 64 | int may_unmap; |
65 | 65 | ||
66 | /* Can pages be swapped as part of reclaim? */ | ||
67 | int may_swap; | ||
68 | |||
66 | /* This context's SWAP_CLUSTER_MAX. If freeing memory for | 69 | /* This context's SWAP_CLUSTER_MAX. If freeing memory for |
67 | * suspend, we effectively ignore SWAP_CLUSTER_MAX. | 70 | * suspend, we effectively ignore SWAP_CLUSTER_MAX. |
68 | * In this context, it doesn't matter that we scan the | 71 | * In this context, it doesn't matter that we scan the |
@@ -1380,7 +1383,7 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
1380 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1383 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); |
1381 | 1384 | ||
1382 | /* If we have no swap space, do not bother scanning anon pages. */ | 1385 | /* If we have no swap space, do not bother scanning anon pages. */ |
1383 | if (nr_swap_pages <= 0) { | 1386 | if (!sc->may_swap || (nr_swap_pages <= 0)) { |
1384 | percent[0] = 0; | 1387 | percent[0] = 0; |
1385 | percent[1] = 100; | 1388 | percent[1] = 100; |
1386 | return; | 1389 | return; |
@@ -1468,7 +1471,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1468 | 1471 | ||
1469 | for_each_evictable_lru(l) { | 1472 | for_each_evictable_lru(l) { |
1470 | int file = is_file_lru(l); | 1473 | int file = is_file_lru(l); |
1471 | int scan; | 1474 | unsigned long scan; |
1472 | 1475 | ||
1473 | scan = zone_nr_pages(zone, sc, l); | 1476 | scan = zone_nr_pages(zone, sc, l); |
1474 | if (priority) { | 1477 | if (priority) { |
@@ -1697,6 +1700,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
1697 | .may_writepage = !laptop_mode, | 1700 | .may_writepage = !laptop_mode, |
1698 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1701 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
1699 | .may_unmap = 1, | 1702 | .may_unmap = 1, |
1703 | .may_swap = 1, | ||
1700 | .swappiness = vm_swappiness, | 1704 | .swappiness = vm_swappiness, |
1701 | .order = order, | 1705 | .order = order, |
1702 | .mem_cgroup = NULL, | 1706 | .mem_cgroup = NULL, |
@@ -1717,6 +1721,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
1717 | struct scan_control sc = { | 1721 | struct scan_control sc = { |
1718 | .may_writepage = !laptop_mode, | 1722 | .may_writepage = !laptop_mode, |
1719 | .may_unmap = 1, | 1723 | .may_unmap = 1, |
1724 | .may_swap = !noswap, | ||
1720 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1725 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
1721 | .swappiness = swappiness, | 1726 | .swappiness = swappiness, |
1722 | .order = 0, | 1727 | .order = 0, |
@@ -1726,9 +1731,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
1726 | }; | 1731 | }; |
1727 | struct zonelist *zonelist; | 1732 | struct zonelist *zonelist; |
1728 | 1733 | ||
1729 | if (noswap) | ||
1730 | sc.may_unmap = 0; | ||
1731 | |||
1732 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 1734 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
1733 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); | 1735 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
1734 | zonelist = NODE_DATA(numa_node_id())->node_zonelists; | 1736 | zonelist = NODE_DATA(numa_node_id())->node_zonelists; |
@@ -1767,6 +1769,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | |||
1767 | struct scan_control sc = { | 1769 | struct scan_control sc = { |
1768 | .gfp_mask = GFP_KERNEL, | 1770 | .gfp_mask = GFP_KERNEL, |
1769 | .may_unmap = 1, | 1771 | .may_unmap = 1, |
1772 | .may_swap = 1, | ||
1770 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1773 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
1771 | .swappiness = vm_swappiness, | 1774 | .swappiness = vm_swappiness, |
1772 | .order = order, | 1775 | .order = order, |
@@ -2088,13 +2091,13 @@ static void shrink_all_zones(unsigned long nr_pages, int prio, | |||
2088 | nr_reclaimed += shrink_list(l, nr_to_scan, zone, | 2091 | nr_reclaimed += shrink_list(l, nr_to_scan, zone, |
2089 | sc, prio); | 2092 | sc, prio); |
2090 | if (nr_reclaimed >= nr_pages) { | 2093 | if (nr_reclaimed >= nr_pages) { |
2091 | sc->nr_reclaimed = nr_reclaimed; | 2094 | sc->nr_reclaimed += nr_reclaimed; |
2092 | return; | 2095 | return; |
2093 | } | 2096 | } |
2094 | } | 2097 | } |
2095 | } | 2098 | } |
2096 | } | 2099 | } |
2097 | sc->nr_reclaimed = nr_reclaimed; | 2100 | sc->nr_reclaimed += nr_reclaimed; |
2098 | } | 2101 | } |
2099 | 2102 | ||
2100 | /* | 2103 | /* |
@@ -2115,6 +2118,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) | |||
2115 | .may_unmap = 0, | 2118 | .may_unmap = 0, |
2116 | .may_writepage = 1, | 2119 | .may_writepage = 1, |
2117 | .isolate_pages = isolate_pages_global, | 2120 | .isolate_pages = isolate_pages_global, |
2121 | .nr_reclaimed = 0, | ||
2118 | }; | 2122 | }; |
2119 | 2123 | ||
2120 | current->reclaim_state = &reclaim_state; | 2124 | current->reclaim_state = &reclaim_state; |
@@ -2297,6 +2301,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2297 | struct scan_control sc = { | 2301 | struct scan_control sc = { |
2298 | .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), | 2302 | .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), |
2299 | .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), | 2303 | .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), |
2304 | .may_swap = 1, | ||
2300 | .swap_cluster_max = max_t(unsigned long, nr_pages, | 2305 | .swap_cluster_max = max_t(unsigned long, nr_pages, |
2301 | SWAP_CLUSTER_MAX), | 2306 | SWAP_CLUSTER_MAX), |
2302 | .gfp_mask = gfp_mask, | 2307 | .gfp_mask = gfp_mask, |