diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-05-07 05:17:13 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-05-07 05:17:34 -0400 |
commit | 44347d947f628060b92449702071bfe1d31dfb75 (patch) | |
tree | c6ed74610d5b3295df4296659f80f5feb94b28cc /mm | |
parent | d94fc523f3c35bd8013f04827e94756cbc0212f4 (diff) | |
parent | 413f81eba35d6ede9289b0c8a920c013a84fac71 (diff) |
Merge branch 'linus' into tracing/core
Merge reason: tracing/core was on a .30-rc1 base and was missing out on
on a handful of tracing fixes present in .30-rc5-almost.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 2 | ||||
-rw-r--r-- | mm/filemap.c | 5 | ||||
-rw-r--r-- | mm/madvise.c | 8 | ||||
-rw-r--r-- | mm/memcontrol.c | 40 | ||||
-rw-r--r-- | mm/memory.c | 112 | ||||
-rw-r--r-- | mm/mmap.c | 14 | ||||
-rw-r--r-- | mm/nommu.c | 13 | ||||
-rw-r--r-- | mm/shmem.c | 35 | ||||
-rw-r--r-- | mm/swap.c | 46 | ||||
-rw-r--r-- | mm/util.c | 16 | ||||
-rw-r--r-- | mm/vmscan.c | 19 |
11 files changed, 175 insertions, 135 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index b53427ad30a3..57971d2ab848 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -213,6 +213,8 @@ config UNEVICTABLE_LRU | |||
213 | will use one page flag and increase the code size a little, | 213 | will use one page flag and increase the code size a little, |
214 | say Y unless you know what you are doing. | 214 | say Y unless you know what you are doing. |
215 | 215 | ||
216 | See Documentation/vm/unevictable-lru.txt for more information. | ||
217 | |||
216 | config HAVE_MLOCK | 218 | config HAVE_MLOCK |
217 | bool | 219 | bool |
218 | default y if MMU=y | 220 | default y if MMU=y |
diff --git a/mm/filemap.c b/mm/filemap.c index 2e2d38ebda4b..379ff0bcbf6e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -441,6 +441,7 @@ int filemap_write_and_wait_range(struct address_space *mapping, | |||
441 | } | 441 | } |
442 | return err; | 442 | return err; |
443 | } | 443 | } |
444 | EXPORT_SYMBOL(filemap_write_and_wait_range); | ||
444 | 445 | ||
445 | /** | 446 | /** |
446 | * add_to_page_cache_locked - add a locked page to the pagecache | 447 | * add_to_page_cache_locked - add a locked page to the pagecache |
@@ -567,8 +568,8 @@ EXPORT_SYMBOL(wait_on_page_bit); | |||
567 | 568 | ||
568 | /** | 569 | /** |
569 | * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue | 570 | * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue |
570 | * @page - Page defining the wait queue of interest | 571 | * @page: Page defining the wait queue of interest |
571 | * @waiter - Waiter to add to the queue | 572 | * @waiter: Waiter to add to the queue |
572 | * | 573 | * |
573 | * Add an arbitrary @waiter to the wait queue for the nominated @page. | 574 | * Add an arbitrary @waiter to the wait queue for the nominated @page. |
574 | */ | 575 | */ |
diff --git a/mm/madvise.c b/mm/madvise.c index b9ce574827c8..36d6ea2b6340 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -112,6 +112,14 @@ static long madvise_willneed(struct vm_area_struct * vma, | |||
112 | if (!file) | 112 | if (!file) |
113 | return -EBADF; | 113 | return -EBADF; |
114 | 114 | ||
115 | /* | ||
116 | * Page cache readahead assumes page cache pages are order-0 which | ||
117 | * is not the case for hugetlbfs. Do not give a bad return value | ||
118 | * but ignore the advice. | ||
119 | */ | ||
120 | if (vma->vm_flags & VM_HUGETLB) | ||
121 | return 0; | ||
122 | |||
115 | if (file->f_mapping->a_ops->get_xip_mem) { | 123 | if (file->f_mapping->a_ops->get_xip_mem) { |
116 | /* no bad return value, but ignore advice */ | 124 | /* no bad return value, but ignore advice */ |
117 | return 0; | 125 | return 0; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2fc6d6c48238..01c2d8f14685 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -932,7 +932,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
932 | if (unlikely(!mem)) | 932 | if (unlikely(!mem)) |
933 | return 0; | 933 | return 0; |
934 | 934 | ||
935 | VM_BUG_ON(mem_cgroup_is_obsolete(mem)); | 935 | VM_BUG_ON(!mem || mem_cgroup_is_obsolete(mem)); |
936 | 936 | ||
937 | while (1) { | 937 | while (1) { |
938 | int ret; | 938 | int ret; |
@@ -1024,9 +1024,7 @@ static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page) | |||
1024 | return NULL; | 1024 | return NULL; |
1025 | 1025 | ||
1026 | pc = lookup_page_cgroup(page); | 1026 | pc = lookup_page_cgroup(page); |
1027 | /* | 1027 | lock_page_cgroup(pc); |
1028 | * Used bit of swapcache is solid under page lock. | ||
1029 | */ | ||
1030 | if (PageCgroupUsed(pc)) { | 1028 | if (PageCgroupUsed(pc)) { |
1031 | mem = pc->mem_cgroup; | 1029 | mem = pc->mem_cgroup; |
1032 | if (mem && !css_tryget(&mem->css)) | 1030 | if (mem && !css_tryget(&mem->css)) |
@@ -1040,6 +1038,7 @@ static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page) | |||
1040 | mem = NULL; | 1038 | mem = NULL; |
1041 | rcu_read_unlock(); | 1039 | rcu_read_unlock(); |
1042 | } | 1040 | } |
1041 | unlock_page_cgroup(pc); | ||
1043 | return mem; | 1042 | return mem; |
1044 | } | 1043 | } |
1045 | 1044 | ||
@@ -1618,37 +1617,28 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, | |||
1618 | } | 1617 | } |
1619 | 1618 | ||
1620 | /* | 1619 | /* |
1621 | * A call to try to shrink memory usage under specified resource controller. | 1620 | * A call to try to shrink memory usage on charge failure at shmem's swapin. |
1622 | * This is typically used for page reclaiming for shmem for reducing side | 1621 | * Calling hierarchical_reclaim is not enough because we should update |
1623 | * effect of page allocation from shmem, which is used by some mem_cgroup. | 1622 | * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM. |
1623 | * Moreover considering hierarchy, we should reclaim from the mem_over_limit, | ||
1624 | * not from the memcg which this page would be charged to. | ||
1625 | * try_charge_swapin does all of these works properly. | ||
1624 | */ | 1626 | */ |
1625 | int mem_cgroup_shrink_usage(struct page *page, | 1627 | int mem_cgroup_shmem_charge_fallback(struct page *page, |
1626 | struct mm_struct *mm, | 1628 | struct mm_struct *mm, |
1627 | gfp_t gfp_mask) | 1629 | gfp_t gfp_mask) |
1628 | { | 1630 | { |
1629 | struct mem_cgroup *mem = NULL; | 1631 | struct mem_cgroup *mem = NULL; |
1630 | int progress = 0; | 1632 | int ret; |
1631 | int retry = MEM_CGROUP_RECLAIM_RETRIES; | ||
1632 | 1633 | ||
1633 | if (mem_cgroup_disabled()) | 1634 | if (mem_cgroup_disabled()) |
1634 | return 0; | 1635 | return 0; |
1635 | if (page) | ||
1636 | mem = try_get_mem_cgroup_from_swapcache(page); | ||
1637 | if (!mem && mm) | ||
1638 | mem = try_get_mem_cgroup_from_mm(mm); | ||
1639 | if (unlikely(!mem)) | ||
1640 | return 0; | ||
1641 | 1636 | ||
1642 | do { | 1637 | ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); |
1643 | progress = mem_cgroup_hierarchical_reclaim(mem, | 1638 | if (!ret) |
1644 | gfp_mask, true, false); | 1639 | mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */ |
1645 | progress += mem_cgroup_check_under_limit(mem); | ||
1646 | } while (!progress && --retry); | ||
1647 | 1640 | ||
1648 | css_put(&mem->css); | 1641 | return ret; |
1649 | if (!retry) | ||
1650 | return -ENOMEM; | ||
1651 | return 0; | ||
1652 | } | 1642 | } |
1653 | 1643 | ||
1654 | static DEFINE_MUTEX(set_limit_mutex); | 1644 | static DEFINE_MUTEX(set_limit_mutex); |
diff --git a/mm/memory.c b/mm/memory.c index cf6873e91c6a..4126dd16778c 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1971,6 +1971,15 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1971 | ret = tmp; | 1971 | ret = tmp; |
1972 | goto unwritable_page; | 1972 | goto unwritable_page; |
1973 | } | 1973 | } |
1974 | if (unlikely(!(tmp & VM_FAULT_LOCKED))) { | ||
1975 | lock_page(old_page); | ||
1976 | if (!old_page->mapping) { | ||
1977 | ret = 0; /* retry the fault */ | ||
1978 | unlock_page(old_page); | ||
1979 | goto unwritable_page; | ||
1980 | } | ||
1981 | } else | ||
1982 | VM_BUG_ON(!PageLocked(old_page)); | ||
1974 | 1983 | ||
1975 | /* | 1984 | /* |
1976 | * Since we dropped the lock we need to revalidate | 1985 | * Since we dropped the lock we need to revalidate |
@@ -1980,9 +1989,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1980 | */ | 1989 | */ |
1981 | page_table = pte_offset_map_lock(mm, pmd, address, | 1990 | page_table = pte_offset_map_lock(mm, pmd, address, |
1982 | &ptl); | 1991 | &ptl); |
1983 | page_cache_release(old_page); | 1992 | if (!pte_same(*page_table, orig_pte)) { |
1984 | if (!pte_same(*page_table, orig_pte)) | 1993 | unlock_page(old_page); |
1994 | page_cache_release(old_page); | ||
1985 | goto unlock; | 1995 | goto unlock; |
1996 | } | ||
1986 | 1997 | ||
1987 | page_mkwrite = 1; | 1998 | page_mkwrite = 1; |
1988 | } | 1999 | } |
@@ -2094,9 +2105,6 @@ gotten: | |||
2094 | unlock: | 2105 | unlock: |
2095 | pte_unmap_unlock(page_table, ptl); | 2106 | pte_unmap_unlock(page_table, ptl); |
2096 | if (dirty_page) { | 2107 | if (dirty_page) { |
2097 | if (vma->vm_file) | ||
2098 | file_update_time(vma->vm_file); | ||
2099 | |||
2100 | /* | 2108 | /* |
2101 | * Yes, Virginia, this is actually required to prevent a race | 2109 | * Yes, Virginia, this is actually required to prevent a race |
2102 | * with clear_page_dirty_for_io() from clearing the page dirty | 2110 | * with clear_page_dirty_for_io() from clearing the page dirty |
@@ -2105,16 +2113,41 @@ unlock: | |||
2105 | * | 2113 | * |
2106 | * do_no_page is protected similarly. | 2114 | * do_no_page is protected similarly. |
2107 | */ | 2115 | */ |
2108 | wait_on_page_locked(dirty_page); | 2116 | if (!page_mkwrite) { |
2109 | set_page_dirty_balance(dirty_page, page_mkwrite); | 2117 | wait_on_page_locked(dirty_page); |
2118 | set_page_dirty_balance(dirty_page, page_mkwrite); | ||
2119 | } | ||
2110 | put_page(dirty_page); | 2120 | put_page(dirty_page); |
2121 | if (page_mkwrite) { | ||
2122 | struct address_space *mapping = dirty_page->mapping; | ||
2123 | |||
2124 | set_page_dirty(dirty_page); | ||
2125 | unlock_page(dirty_page); | ||
2126 | page_cache_release(dirty_page); | ||
2127 | if (mapping) { | ||
2128 | /* | ||
2129 | * Some device drivers do not set page.mapping | ||
2130 | * but still dirty their pages | ||
2131 | */ | ||
2132 | balance_dirty_pages_ratelimited(mapping); | ||
2133 | } | ||
2134 | } | ||
2135 | |||
2136 | /* file_update_time outside page_lock */ | ||
2137 | if (vma->vm_file) | ||
2138 | file_update_time(vma->vm_file); | ||
2111 | } | 2139 | } |
2112 | return ret; | 2140 | return ret; |
2113 | oom_free_new: | 2141 | oom_free_new: |
2114 | page_cache_release(new_page); | 2142 | page_cache_release(new_page); |
2115 | oom: | 2143 | oom: |
2116 | if (old_page) | 2144 | if (old_page) { |
2145 | if (page_mkwrite) { | ||
2146 | unlock_page(old_page); | ||
2147 | page_cache_release(old_page); | ||
2148 | } | ||
2117 | page_cache_release(old_page); | 2149 | page_cache_release(old_page); |
2150 | } | ||
2118 | return VM_FAULT_OOM; | 2151 | return VM_FAULT_OOM; |
2119 | 2152 | ||
2120 | unwritable_page: | 2153 | unwritable_page: |
@@ -2458,8 +2491,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2458 | 2491 | ||
2459 | if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { | 2492 | if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { |
2460 | ret = VM_FAULT_OOM; | 2493 | ret = VM_FAULT_OOM; |
2461 | unlock_page(page); | 2494 | goto out_page; |
2462 | goto out; | ||
2463 | } | 2495 | } |
2464 | 2496 | ||
2465 | /* | 2497 | /* |
@@ -2521,6 +2553,7 @@ out: | |||
2521 | out_nomap: | 2553 | out_nomap: |
2522 | mem_cgroup_cancel_charge_swapin(ptr); | 2554 | mem_cgroup_cancel_charge_swapin(ptr); |
2523 | pte_unmap_unlock(page_table, ptl); | 2555 | pte_unmap_unlock(page_table, ptl); |
2556 | out_page: | ||
2524 | unlock_page(page); | 2557 | unlock_page(page); |
2525 | page_cache_release(page); | 2558 | page_cache_release(page); |
2526 | return ret; | 2559 | return ret; |
@@ -2664,27 +2697,22 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2664 | int tmp; | 2697 | int tmp; |
2665 | 2698 | ||
2666 | unlock_page(page); | 2699 | unlock_page(page); |
2667 | vmf.flags |= FAULT_FLAG_MKWRITE; | 2700 | vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; |
2668 | tmp = vma->vm_ops->page_mkwrite(vma, &vmf); | 2701 | tmp = vma->vm_ops->page_mkwrite(vma, &vmf); |
2669 | if (unlikely(tmp & | 2702 | if (unlikely(tmp & |
2670 | (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { | 2703 | (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { |
2671 | ret = tmp; | 2704 | ret = tmp; |
2672 | anon = 1; /* no anon but release vmf.page */ | 2705 | goto unwritable_page; |
2673 | goto out_unlocked; | ||
2674 | } | ||
2675 | lock_page(page); | ||
2676 | /* | ||
2677 | * XXX: this is not quite right (racy vs | ||
2678 | * invalidate) to unlock and relock the page | ||
2679 | * like this, however a better fix requires | ||
2680 | * reworking page_mkwrite locking API, which | ||
2681 | * is better done later. | ||
2682 | */ | ||
2683 | if (!page->mapping) { | ||
2684 | ret = 0; | ||
2685 | anon = 1; /* no anon but release vmf.page */ | ||
2686 | goto out; | ||
2687 | } | 2706 | } |
2707 | if (unlikely(!(tmp & VM_FAULT_LOCKED))) { | ||
2708 | lock_page(page); | ||
2709 | if (!page->mapping) { | ||
2710 | ret = 0; /* retry the fault */ | ||
2711 | unlock_page(page); | ||
2712 | goto unwritable_page; | ||
2713 | } | ||
2714 | } else | ||
2715 | VM_BUG_ON(!PageLocked(page)); | ||
2688 | page_mkwrite = 1; | 2716 | page_mkwrite = 1; |
2689 | } | 2717 | } |
2690 | } | 2718 | } |
@@ -2736,19 +2764,35 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2736 | pte_unmap_unlock(page_table, ptl); | 2764 | pte_unmap_unlock(page_table, ptl); |
2737 | 2765 | ||
2738 | out: | 2766 | out: |
2739 | unlock_page(vmf.page); | 2767 | if (dirty_page) { |
2740 | out_unlocked: | 2768 | struct address_space *mapping = page->mapping; |
2741 | if (anon) | ||
2742 | page_cache_release(vmf.page); | ||
2743 | else if (dirty_page) { | ||
2744 | if (vma->vm_file) | ||
2745 | file_update_time(vma->vm_file); | ||
2746 | 2769 | ||
2747 | set_page_dirty_balance(dirty_page, page_mkwrite); | 2770 | if (set_page_dirty(dirty_page)) |
2771 | page_mkwrite = 1; | ||
2772 | unlock_page(dirty_page); | ||
2748 | put_page(dirty_page); | 2773 | put_page(dirty_page); |
2774 | if (page_mkwrite && mapping) { | ||
2775 | /* | ||
2776 | * Some device drivers do not set page.mapping but still | ||
2777 | * dirty their pages | ||
2778 | */ | ||
2779 | balance_dirty_pages_ratelimited(mapping); | ||
2780 | } | ||
2781 | |||
2782 | /* file_update_time outside page_lock */ | ||
2783 | if (vma->vm_file) | ||
2784 | file_update_time(vma->vm_file); | ||
2785 | } else { | ||
2786 | unlock_page(vmf.page); | ||
2787 | if (anon) | ||
2788 | page_cache_release(vmf.page); | ||
2749 | } | 2789 | } |
2750 | 2790 | ||
2751 | return ret; | 2791 | return ret; |
2792 | |||
2793 | unwritable_page: | ||
2794 | page_cache_release(page); | ||
2795 | return ret; | ||
2752 | } | 2796 | } |
2753 | 2797 | ||
2754 | static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 2798 | static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
@@ -85,7 +85,7 @@ EXPORT_SYMBOL(vm_get_page_prot); | |||
85 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ | 85 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ |
86 | int sysctl_overcommit_ratio = 50; /* default is 50% */ | 86 | int sysctl_overcommit_ratio = 50; /* default is 50% */ |
87 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; | 87 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; |
88 | atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); | 88 | struct percpu_counter vm_committed_as; |
89 | 89 | ||
90 | /* | 90 | /* |
91 | * Check that a process has enough memory to allocate a new virtual | 91 | * Check that a process has enough memory to allocate a new virtual |
@@ -179,11 +179,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
179 | if (mm) | 179 | if (mm) |
180 | allowed -= mm->total_vm / 32; | 180 | allowed -= mm->total_vm / 32; |
181 | 181 | ||
182 | /* | 182 | if (percpu_counter_read_positive(&vm_committed_as) < allowed) |
183 | * cast `allowed' as a signed long because vm_committed_space | ||
184 | * sometimes has a negative value | ||
185 | */ | ||
186 | if (atomic_long_read(&vm_committed_space) < (long)allowed) | ||
187 | return 0; | 183 | return 0; |
188 | error: | 184 | error: |
189 | vm_unacct_memory(pages); | 185 | vm_unacct_memory(pages); |
@@ -1575,7 +1571,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns | |||
1575 | * Overcommit.. This must be the final test, as it will | 1571 | * Overcommit.. This must be the final test, as it will |
1576 | * update security statistics. | 1572 | * update security statistics. |
1577 | */ | 1573 | */ |
1578 | if (security_vm_enough_memory(grow)) | 1574 | if (security_vm_enough_memory_mm(mm, grow)) |
1579 | return -ENOMEM; | 1575 | return -ENOMEM; |
1580 | 1576 | ||
1581 | /* Ok, everything looks good - let it rip */ | 1577 | /* Ok, everything looks good - let it rip */ |
@@ -2481,4 +2477,8 @@ void mm_drop_all_locks(struct mm_struct *mm) | |||
2481 | */ | 2477 | */ |
2482 | void __init mmap_init(void) | 2478 | void __init mmap_init(void) |
2483 | { | 2479 | { |
2480 | int ret; | ||
2481 | |||
2482 | ret = percpu_counter_init(&vm_committed_as, 0); | ||
2483 | VM_BUG_ON(ret); | ||
2484 | } | 2484 | } |
diff --git a/mm/nommu.c b/mm/nommu.c index 72eda4aee2cb..809998aa7b50 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -62,7 +62,7 @@ void *high_memory; | |||
62 | struct page *mem_map; | 62 | struct page *mem_map; |
63 | unsigned long max_mapnr; | 63 | unsigned long max_mapnr; |
64 | unsigned long num_physpages; | 64 | unsigned long num_physpages; |
65 | atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); | 65 | struct percpu_counter vm_committed_as; |
66 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ | 66 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ |
67 | int sysctl_overcommit_ratio = 50; /* default is 50% */ | 67 | int sysctl_overcommit_ratio = 50; /* default is 50% */ |
68 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; | 68 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; |
@@ -463,6 +463,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) | |||
463 | */ | 463 | */ |
464 | void __init mmap_init(void) | 464 | void __init mmap_init(void) |
465 | { | 465 | { |
466 | int ret; | ||
467 | |||
468 | ret = percpu_counter_init(&vm_committed_as, 0); | ||
469 | VM_BUG_ON(ret); | ||
466 | vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); | 470 | vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); |
467 | } | 471 | } |
468 | 472 | ||
@@ -1847,12 +1851,9 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
1847 | if (mm) | 1851 | if (mm) |
1848 | allowed -= mm->total_vm / 32; | 1852 | allowed -= mm->total_vm / 32; |
1849 | 1853 | ||
1850 | /* | 1854 | if (percpu_counter_read_positive(&vm_committed_as) < allowed) |
1851 | * cast `allowed' as a signed long because vm_committed_space | ||
1852 | * sometimes has a negative value | ||
1853 | */ | ||
1854 | if (atomic_long_read(&vm_committed_space) < (long)allowed) | ||
1855 | return 0; | 1855 | return 0; |
1856 | |||
1856 | error: | 1857 | error: |
1857 | vm_unacct_memory(pages); | 1858 | vm_unacct_memory(pages); |
1858 | 1859 | ||
diff --git a/mm/shmem.c b/mm/shmem.c index d94d2e9146bc..b25f95ce3db7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/init.h> | 24 | #include <linux/init.h> |
25 | #include <linux/vfs.h> | 25 | #include <linux/vfs.h> |
26 | #include <linux/mount.h> | 26 | #include <linux/mount.h> |
27 | #include <linux/pagemap.h> | ||
27 | #include <linux/file.h> | 28 | #include <linux/file.h> |
28 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
29 | #include <linux/module.h> | 30 | #include <linux/module.h> |
@@ -43,7 +44,6 @@ static struct vfsmount *shm_mnt; | |||
43 | #include <linux/exportfs.h> | 44 | #include <linux/exportfs.h> |
44 | #include <linux/generic_acl.h> | 45 | #include <linux/generic_acl.h> |
45 | #include <linux/mman.h> | 46 | #include <linux/mman.h> |
46 | #include <linux/pagemap.h> | ||
47 | #include <linux/string.h> | 47 | #include <linux/string.h> |
48 | #include <linux/slab.h> | 48 | #include <linux/slab.h> |
49 | #include <linux/backing-dev.h> | 49 | #include <linux/backing-dev.h> |
@@ -65,13 +65,28 @@ static struct vfsmount *shm_mnt; | |||
65 | #include <asm/div64.h> | 65 | #include <asm/div64.h> |
66 | #include <asm/pgtable.h> | 66 | #include <asm/pgtable.h> |
67 | 67 | ||
68 | /* | ||
69 | * The maximum size of a shmem/tmpfs file is limited by the maximum size of | ||
70 | * its triple-indirect swap vector - see illustration at shmem_swp_entry(). | ||
71 | * | ||
72 | * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel, | ||
73 | * but one eighth of that on a 64-bit kernel. With 8kB page size, maximum | ||
74 | * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel, | ||
75 | * MAX_LFS_FILESIZE being then more restrictive than swap vector layout. | ||
76 | * | ||
77 | * We use / and * instead of shifts in the definitions below, so that the swap | ||
78 | * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE. | ||
79 | */ | ||
68 | #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) | 80 | #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) |
69 | #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) | 81 | #define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) |
70 | #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) | ||
71 | 82 | ||
72 | #define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) | 83 | #define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) |
73 | #define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT) | 84 | #define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT) |
74 | 85 | ||
86 | #define SHMEM_MAX_BYTES min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE) | ||
87 | #define SHMEM_MAX_INDEX ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT)) | ||
88 | |||
89 | #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) | ||
75 | #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) | 90 | #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) |
76 | 91 | ||
77 | /* info->flags needs VM_flags to handle pagein/truncate races efficiently */ | 92 | /* info->flags needs VM_flags to handle pagein/truncate races efficiently */ |
@@ -1325,8 +1340,12 @@ repeat: | |||
1325 | shmem_swp_unmap(entry); | 1340 | shmem_swp_unmap(entry); |
1326 | spin_unlock(&info->lock); | 1341 | spin_unlock(&info->lock); |
1327 | if (error == -ENOMEM) { | 1342 | if (error == -ENOMEM) { |
1328 | /* allow reclaim from this memory cgroup */ | 1343 | /* |
1329 | error = mem_cgroup_shrink_usage(swappage, | 1344 | * reclaim from proper memory cgroup and |
1345 | * call memcg's OOM if needed. | ||
1346 | */ | ||
1347 | error = mem_cgroup_shmem_charge_fallback( | ||
1348 | swappage, | ||
1330 | current->mm, | 1349 | current->mm, |
1331 | gfp); | 1350 | gfp); |
1332 | if (error) { | 1351 | if (error) { |
@@ -2581,7 +2600,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page) | |||
2581 | #define shmem_get_inode(sb, mode, dev, flags) ramfs_get_inode(sb, mode, dev) | 2600 | #define shmem_get_inode(sb, mode, dev, flags) ramfs_get_inode(sb, mode, dev) |
2582 | #define shmem_acct_size(flags, size) 0 | 2601 | #define shmem_acct_size(flags, size) 0 |
2583 | #define shmem_unacct_size(flags, size) do {} while (0) | 2602 | #define shmem_unacct_size(flags, size) do {} while (0) |
2584 | #define SHMEM_MAX_BYTES LLONG_MAX | 2603 | #define SHMEM_MAX_BYTES MAX_LFS_FILESIZE |
2585 | 2604 | ||
2586 | #endif /* CONFIG_SHMEM */ | 2605 | #endif /* CONFIG_SHMEM */ |
2587 | 2606 | ||
@@ -491,49 +491,6 @@ unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, | |||
491 | 491 | ||
492 | EXPORT_SYMBOL(pagevec_lookup_tag); | 492 | EXPORT_SYMBOL(pagevec_lookup_tag); |
493 | 493 | ||
494 | #ifdef CONFIG_SMP | ||
495 | /* | ||
496 | * We tolerate a little inaccuracy to avoid ping-ponging the counter between | ||
497 | * CPUs | ||
498 | */ | ||
499 | #define ACCT_THRESHOLD max(16, NR_CPUS * 2) | ||
500 | |||
501 | static DEFINE_PER_CPU(long, committed_space); | ||
502 | |||
503 | void vm_acct_memory(long pages) | ||
504 | { | ||
505 | long *local; | ||
506 | |||
507 | preempt_disable(); | ||
508 | local = &__get_cpu_var(committed_space); | ||
509 | *local += pages; | ||
510 | if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { | ||
511 | atomic_long_add(*local, &vm_committed_space); | ||
512 | *local = 0; | ||
513 | } | ||
514 | preempt_enable(); | ||
515 | } | ||
516 | |||
517 | #ifdef CONFIG_HOTPLUG_CPU | ||
518 | |||
519 | /* Drop the CPU's cached committed space back into the central pool. */ | ||
520 | static int cpu_swap_callback(struct notifier_block *nfb, | ||
521 | unsigned long action, | ||
522 | void *hcpu) | ||
523 | { | ||
524 | long *committed; | ||
525 | |||
526 | committed = &per_cpu(committed_space, (long)hcpu); | ||
527 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | ||
528 | atomic_long_add(*committed, &vm_committed_space); | ||
529 | *committed = 0; | ||
530 | drain_cpu_pagevecs((long)hcpu); | ||
531 | } | ||
532 | return NOTIFY_OK; | ||
533 | } | ||
534 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
535 | #endif /* CONFIG_SMP */ | ||
536 | |||
537 | /* | 494 | /* |
538 | * Perform any setup for the swap system | 495 | * Perform any setup for the swap system |
539 | */ | 496 | */ |
@@ -554,7 +511,4 @@ void __init swap_setup(void) | |||
554 | * Right now other parts of the system means that we | 511 | * Right now other parts of the system means that we |
555 | * _really_ don't want to cluster much more | 512 | * _really_ don't want to cluster much more |
556 | */ | 513 | */ |
557 | #ifdef CONFIG_HOTPLUG_CPU | ||
558 | hotcpu_notifier(cpu_swap_callback, 0); | ||
559 | #endif | ||
560 | } | 514 | } |
@@ -225,6 +225,22 @@ void arch_pick_mmap_layout(struct mm_struct *mm) | |||
225 | } | 225 | } |
226 | #endif | 226 | #endif |
227 | 227 | ||
228 | /** | ||
229 | * get_user_pages_fast() - pin user pages in memory | ||
230 | * @start: starting user address | ||
231 | * @nr_pages: number of pages from start to pin | ||
232 | * @write: whether pages will be written to | ||
233 | * @pages: array that receives pointers to the pages pinned. | ||
234 | * Should be at least nr_pages long. | ||
235 | * | ||
236 | * Attempt to pin user pages in memory without taking mm->mmap_sem. | ||
237 | * If not successful, it will fall back to taking the lock and | ||
238 | * calling get_user_pages(). | ||
239 | * | ||
240 | * Returns number of pages pinned. This may be fewer than the number | ||
241 | * requested. If nr_pages is 0 or negative, returns 0. If no pages | ||
242 | * were pinned, returns -errno. | ||
243 | */ | ||
228 | int __attribute__((weak)) get_user_pages_fast(unsigned long start, | 244 | int __attribute__((weak)) get_user_pages_fast(unsigned long start, |
229 | int nr_pages, int write, struct page **pages) | 245 | int nr_pages, int write, struct page **pages) |
230 | { | 246 | { |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 39fdfb14eeaa..5fa3eda1f03f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -63,6 +63,9 @@ struct scan_control { | |||
63 | /* Can mapped pages be reclaimed? */ | 63 | /* Can mapped pages be reclaimed? */ |
64 | int may_unmap; | 64 | int may_unmap; |
65 | 65 | ||
66 | /* Can pages be swapped as part of reclaim? */ | ||
67 | int may_swap; | ||
68 | |||
66 | /* This context's SWAP_CLUSTER_MAX. If freeing memory for | 69 | /* This context's SWAP_CLUSTER_MAX. If freeing memory for |
67 | * suspend, we effectively ignore SWAP_CLUSTER_MAX. | 70 | * suspend, we effectively ignore SWAP_CLUSTER_MAX. |
68 | * In this context, it doesn't matter that we scan the | 71 | * In this context, it doesn't matter that we scan the |
@@ -1380,7 +1383,7 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
1380 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1383 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); |
1381 | 1384 | ||
1382 | /* If we have no swap space, do not bother scanning anon pages. */ | 1385 | /* If we have no swap space, do not bother scanning anon pages. */ |
1383 | if (nr_swap_pages <= 0) { | 1386 | if (!sc->may_swap || (nr_swap_pages <= 0)) { |
1384 | percent[0] = 0; | 1387 | percent[0] = 0; |
1385 | percent[1] = 100; | 1388 | percent[1] = 100; |
1386 | return; | 1389 | return; |
@@ -1468,7 +1471,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1468 | 1471 | ||
1469 | for_each_evictable_lru(l) { | 1472 | for_each_evictable_lru(l) { |
1470 | int file = is_file_lru(l); | 1473 | int file = is_file_lru(l); |
1471 | int scan; | 1474 | unsigned long scan; |
1472 | 1475 | ||
1473 | scan = zone_nr_pages(zone, sc, l); | 1476 | scan = zone_nr_pages(zone, sc, l); |
1474 | if (priority) { | 1477 | if (priority) { |
@@ -1697,6 +1700,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
1697 | .may_writepage = !laptop_mode, | 1700 | .may_writepage = !laptop_mode, |
1698 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1701 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
1699 | .may_unmap = 1, | 1702 | .may_unmap = 1, |
1703 | .may_swap = 1, | ||
1700 | .swappiness = vm_swappiness, | 1704 | .swappiness = vm_swappiness, |
1701 | .order = order, | 1705 | .order = order, |
1702 | .mem_cgroup = NULL, | 1706 | .mem_cgroup = NULL, |
@@ -1717,6 +1721,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
1717 | struct scan_control sc = { | 1721 | struct scan_control sc = { |
1718 | .may_writepage = !laptop_mode, | 1722 | .may_writepage = !laptop_mode, |
1719 | .may_unmap = 1, | 1723 | .may_unmap = 1, |
1724 | .may_swap = !noswap, | ||
1720 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1725 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
1721 | .swappiness = swappiness, | 1726 | .swappiness = swappiness, |
1722 | .order = 0, | 1727 | .order = 0, |
@@ -1726,9 +1731,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
1726 | }; | 1731 | }; |
1727 | struct zonelist *zonelist; | 1732 | struct zonelist *zonelist; |
1728 | 1733 | ||
1729 | if (noswap) | ||
1730 | sc.may_unmap = 0; | ||
1731 | |||
1732 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 1734 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
1733 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); | 1735 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
1734 | zonelist = NODE_DATA(numa_node_id())->node_zonelists; | 1736 | zonelist = NODE_DATA(numa_node_id())->node_zonelists; |
@@ -1767,6 +1769,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | |||
1767 | struct scan_control sc = { | 1769 | struct scan_control sc = { |
1768 | .gfp_mask = GFP_KERNEL, | 1770 | .gfp_mask = GFP_KERNEL, |
1769 | .may_unmap = 1, | 1771 | .may_unmap = 1, |
1772 | .may_swap = 1, | ||
1770 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1773 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
1771 | .swappiness = vm_swappiness, | 1774 | .swappiness = vm_swappiness, |
1772 | .order = order, | 1775 | .order = order, |
@@ -2088,13 +2091,13 @@ static void shrink_all_zones(unsigned long nr_pages, int prio, | |||
2088 | nr_reclaimed += shrink_list(l, nr_to_scan, zone, | 2091 | nr_reclaimed += shrink_list(l, nr_to_scan, zone, |
2089 | sc, prio); | 2092 | sc, prio); |
2090 | if (nr_reclaimed >= nr_pages) { | 2093 | if (nr_reclaimed >= nr_pages) { |
2091 | sc->nr_reclaimed = nr_reclaimed; | 2094 | sc->nr_reclaimed += nr_reclaimed; |
2092 | return; | 2095 | return; |
2093 | } | 2096 | } |
2094 | } | 2097 | } |
2095 | } | 2098 | } |
2096 | } | 2099 | } |
2097 | sc->nr_reclaimed = nr_reclaimed; | 2100 | sc->nr_reclaimed += nr_reclaimed; |
2098 | } | 2101 | } |
2099 | 2102 | ||
2100 | /* | 2103 | /* |
@@ -2115,6 +2118,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) | |||
2115 | .may_unmap = 0, | 2118 | .may_unmap = 0, |
2116 | .may_writepage = 1, | 2119 | .may_writepage = 1, |
2117 | .isolate_pages = isolate_pages_global, | 2120 | .isolate_pages = isolate_pages_global, |
2121 | .nr_reclaimed = 0, | ||
2118 | }; | 2122 | }; |
2119 | 2123 | ||
2120 | current->reclaim_state = &reclaim_state; | 2124 | current->reclaim_state = &reclaim_state; |
@@ -2297,6 +2301,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2297 | struct scan_control sc = { | 2301 | struct scan_control sc = { |
2298 | .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), | 2302 | .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), |
2299 | .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), | 2303 | .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), |
2304 | .may_swap = 1, | ||
2300 | .swap_cluster_max = max_t(unsigned long, nr_pages, | 2305 | .swap_cluster_max = max_t(unsigned long, nr_pages, |
2301 | SWAP_CLUSTER_MAX), | 2306 | SWAP_CLUSTER_MAX), |
2302 | .gfp_mask = gfp_mask, | 2307 | .gfp_mask = gfp_mask, |