aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig.debug9
-rw-r--r--mm/memcontrol.c17
-rw-r--r--mm/memory.c39
-rw-r--r--mm/mmap.c13
-rw-r--r--mm/page-writeback.c43
-rw-r--r--mm/rmap.c42
-rw-r--r--mm/vmscan.c24
7 files changed, 102 insertions, 85 deletions
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 56badfc4810a..957d3da53ddd 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -14,7 +14,6 @@ config DEBUG_PAGEALLOC
14 depends on !KMEMCHECK 14 depends on !KMEMCHECK
15 select PAGE_EXTENSION 15 select PAGE_EXTENSION
16 select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC 16 select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC
17 select PAGE_GUARD if ARCH_SUPPORTS_DEBUG_PAGEALLOC
18 ---help--- 17 ---help---
19 Unmap pages from the kernel linear mapping after free_pages(). 18 Unmap pages from the kernel linear mapping after free_pages().
20 This results in a large slowdown, but helps to find certain types 19 This results in a large slowdown, but helps to find certain types
@@ -27,13 +26,5 @@ config DEBUG_PAGEALLOC
27 that would result in incorrect warnings of memory corruption after 26 that would result in incorrect warnings of memory corruption after
28 a resume because free pages are not saved to the suspend image. 27 a resume because free pages are not saved to the suspend image.
29 28
30config WANT_PAGE_DEBUG_FLAGS
31 bool
32
33config PAGE_POISONING 29config PAGE_POISONING
34 bool 30 bool
35 select WANT_PAGE_DEBUG_FLAGS
36
37config PAGE_GUARD
38 bool
39 select WANT_PAGE_DEBUG_FLAGS
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ef91e856c7e4..851924fa5170 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3043,18 +3043,6 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
3043 if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) { 3043 if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
3044 mem_cgroup_swap_statistics(from, false); 3044 mem_cgroup_swap_statistics(from, false);
3045 mem_cgroup_swap_statistics(to, true); 3045 mem_cgroup_swap_statistics(to, true);
3046 /*
3047 * This function is only called from task migration context now.
3048 * It postpones page_counter and refcount handling till the end
3049 * of task migration(mem_cgroup_clear_mc()) for performance
3050 * improvement. But we cannot postpone css_get(to) because if
3051 * the process that has been moved to @to does swap-in, the
3052 * refcount of @to might be decreased to 0.
3053 *
3054 * We are in attach() phase, so the cgroup is guaranteed to be
3055 * alive, so we can just call css_get().
3056 */
3057 css_get(&to->css);
3058 return 0; 3046 return 0;
3059 } 3047 }
3060 return -EINVAL; 3048 return -EINVAL;
@@ -4679,6 +4667,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
4679 if (parent_css == NULL) { 4667 if (parent_css == NULL) {
4680 root_mem_cgroup = memcg; 4668 root_mem_cgroup = memcg;
4681 page_counter_init(&memcg->memory, NULL); 4669 page_counter_init(&memcg->memory, NULL);
4670 memcg->soft_limit = PAGE_COUNTER_MAX;
4682 page_counter_init(&memcg->memsw, NULL); 4671 page_counter_init(&memcg->memsw, NULL);
4683 page_counter_init(&memcg->kmem, NULL); 4672 page_counter_init(&memcg->kmem, NULL);
4684 } 4673 }
@@ -4724,6 +4713,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
4724 4713
4725 if (parent->use_hierarchy) { 4714 if (parent->use_hierarchy) {
4726 page_counter_init(&memcg->memory, &parent->memory); 4715 page_counter_init(&memcg->memory, &parent->memory);
4716 memcg->soft_limit = PAGE_COUNTER_MAX;
4727 page_counter_init(&memcg->memsw, &parent->memsw); 4717 page_counter_init(&memcg->memsw, &parent->memsw);
4728 page_counter_init(&memcg->kmem, &parent->kmem); 4718 page_counter_init(&memcg->kmem, &parent->kmem);
4729 4719
@@ -4733,6 +4723,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
4733 */ 4723 */
4734 } else { 4724 } else {
4735 page_counter_init(&memcg->memory, NULL); 4725 page_counter_init(&memcg->memory, NULL);
4726 memcg->soft_limit = PAGE_COUNTER_MAX;
4736 page_counter_init(&memcg->memsw, NULL); 4727 page_counter_init(&memcg->memsw, NULL);
4737 page_counter_init(&memcg->kmem, NULL); 4728 page_counter_init(&memcg->kmem, NULL);
4738 /* 4729 /*
@@ -4807,7 +4798,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
4807 mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX); 4798 mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX);
4808 mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX); 4799 mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX);
4809 memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX); 4800 memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX);
4810 memcg->soft_limit = 0; 4801 memcg->soft_limit = PAGE_COUNTER_MAX;
4811} 4802}
4812 4803
4813#ifdef CONFIG_MMU 4804#ifdef CONFIG_MMU
diff --git a/mm/memory.c b/mm/memory.c
index ca920d1fd314..54f3a9b00956 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -235,6 +235,9 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long
235 235
236static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) 236static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
237{ 237{
238 if (!tlb->end)
239 return;
240
238 tlb_flush(tlb); 241 tlb_flush(tlb);
239 mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end); 242 mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
240#ifdef CONFIG_HAVE_RCU_TABLE_FREE 243#ifdef CONFIG_HAVE_RCU_TABLE_FREE
@@ -247,7 +250,7 @@ static void tlb_flush_mmu_free(struct mmu_gather *tlb)
247{ 250{
248 struct mmu_gather_batch *batch; 251 struct mmu_gather_batch *batch;
249 252
250 for (batch = &tlb->local; batch; batch = batch->next) { 253 for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
251 free_pages_and_swap_cache(batch->pages, batch->nr); 254 free_pages_and_swap_cache(batch->pages, batch->nr);
252 batch->nr = 0; 255 batch->nr = 0;
253 } 256 }
@@ -256,9 +259,6 @@ static void tlb_flush_mmu_free(struct mmu_gather *tlb)
256 259
257void tlb_flush_mmu(struct mmu_gather *tlb) 260void tlb_flush_mmu(struct mmu_gather *tlb)
258{ 261{
259 if (!tlb->end)
260 return;
261
262 tlb_flush_mmu_tlbonly(tlb); 262 tlb_flush_mmu_tlbonly(tlb);
263 tlb_flush_mmu_free(tlb); 263 tlb_flush_mmu_free(tlb);
264} 264}
@@ -2137,17 +2137,24 @@ reuse:
2137 if (!dirty_page) 2137 if (!dirty_page)
2138 return ret; 2138 return ret;
2139 2139
2140 /*
2141 * Yes, Virginia, this is actually required to prevent a race
2142 * with clear_page_dirty_for_io() from clearing the page dirty
2143 * bit after it clear all dirty ptes, but before a racing
2144 * do_wp_page installs a dirty pte.
2145 *
2146 * do_shared_fault is protected similarly.
2147 */
2148 if (!page_mkwrite) { 2140 if (!page_mkwrite) {
2149 wait_on_page_locked(dirty_page); 2141 struct address_space *mapping;
2150 set_page_dirty_balance(dirty_page); 2142 int dirtied;
2143
2144 lock_page(dirty_page);
2145 dirtied = set_page_dirty(dirty_page);
2146 VM_BUG_ON_PAGE(PageAnon(dirty_page), dirty_page);
2147 mapping = dirty_page->mapping;
2148 unlock_page(dirty_page);
2149
2150 if (dirtied && mapping) {
2151 /*
2152 * Some device drivers do not set page.mapping
2153 * but still dirty their pages
2154 */
2155 balance_dirty_pages_ratelimited(mapping);
2156 }
2157
2151 /* file_update_time outside page_lock */ 2158 /* file_update_time outside page_lock */
2152 if (vma->vm_file) 2159 if (vma->vm_file)
2153 file_update_time(vma->vm_file); 2160 file_update_time(vma->vm_file);
@@ -2593,7 +2600,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
2593 if (prev && prev->vm_end == address) 2600 if (prev && prev->vm_end == address)
2594 return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM; 2601 return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
2595 2602
2596 expand_downwards(vma, address - PAGE_SIZE); 2603 return expand_downwards(vma, address - PAGE_SIZE);
2597 } 2604 }
2598 if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) { 2605 if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
2599 struct vm_area_struct *next = vma->vm_next; 2606 struct vm_area_struct *next = vma->vm_next;
@@ -2602,7 +2609,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
2602 if (next && next->vm_start == address + PAGE_SIZE) 2609 if (next && next->vm_start == address + PAGE_SIZE)
2603 return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM; 2610 return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
2604 2611
2605 expand_upwards(vma, address + PAGE_SIZE); 2612 return expand_upwards(vma, address + PAGE_SIZE);
2606 } 2613 }
2607 return 0; 2614 return 0;
2608} 2615}
diff --git a/mm/mmap.c b/mm/mmap.c
index 7b36aa7cc89a..7f684d5a8087 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -778,10 +778,12 @@ again: remove_next = 1 + (end > next->vm_end);
778 if (exporter && exporter->anon_vma && !importer->anon_vma) { 778 if (exporter && exporter->anon_vma && !importer->anon_vma) {
779 int error; 779 int error;
780 780
781 importer->anon_vma = exporter->anon_vma;
781 error = anon_vma_clone(importer, exporter); 782 error = anon_vma_clone(importer, exporter);
782 if (error) 783 if (error) {
784 importer->anon_vma = NULL;
783 return error; 785 return error;
784 importer->anon_vma = exporter->anon_vma; 786 }
785 } 787 }
786 } 788 }
787 789
@@ -2099,14 +2101,17 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
2099{ 2101{
2100 struct mm_struct *mm = vma->vm_mm; 2102 struct mm_struct *mm = vma->vm_mm;
2101 struct rlimit *rlim = current->signal->rlim; 2103 struct rlimit *rlim = current->signal->rlim;
2102 unsigned long new_start; 2104 unsigned long new_start, actual_size;
2103 2105
2104 /* address space limit tests */ 2106 /* address space limit tests */
2105 if (!may_expand_vm(mm, grow)) 2107 if (!may_expand_vm(mm, grow))
2106 return -ENOMEM; 2108 return -ENOMEM;
2107 2109
2108 /* Stack limit test */ 2110 /* Stack limit test */
2109 if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur)) 2111 actual_size = size;
2112 if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
2113 actual_size -= PAGE_SIZE;
2114 if (actual_size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
2110 return -ENOMEM; 2115 return -ENOMEM;
2111 2116
2112 /* mlock limit tests */ 2117 /* mlock limit tests */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d5d81f5384d1..6f4335238e33 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1541,16 +1541,6 @@ pause:
1541 bdi_start_background_writeback(bdi); 1541 bdi_start_background_writeback(bdi);
1542} 1542}
1543 1543
1544void set_page_dirty_balance(struct page *page)
1545{
1546 if (set_page_dirty(page)) {
1547 struct address_space *mapping = page_mapping(page);
1548
1549 if (mapping)
1550 balance_dirty_pages_ratelimited(mapping);
1551 }
1552}
1553
1554static DEFINE_PER_CPU(int, bdp_ratelimits); 1544static DEFINE_PER_CPU(int, bdp_ratelimits);
1555 1545
1556/* 1546/*
@@ -2123,32 +2113,25 @@ EXPORT_SYMBOL(account_page_dirtied);
2123 * page dirty in that case, but not all the buffers. This is a "bottom-up" 2113 * page dirty in that case, but not all the buffers. This is a "bottom-up"
2124 * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying. 2114 * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying.
2125 * 2115 *
2126 * Most callers have locked the page, which pins the address_space in memory. 2116 * The caller must ensure this doesn't race with truncation. Most will simply
2127 * But zap_pte_range() does not lock the page, however in that case the 2117 * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and
2128 * mapping is pinned by the vma's ->vm_file reference. 2118 * the pte lock held, which also locks out truncation.
2129 *
2130 * We take care to handle the case where the page was truncated from the
2131 * mapping by re-checking page_mapping() inside tree_lock.
2132 */ 2119 */
2133int __set_page_dirty_nobuffers(struct page *page) 2120int __set_page_dirty_nobuffers(struct page *page)
2134{ 2121{
2135 if (!TestSetPageDirty(page)) { 2122 if (!TestSetPageDirty(page)) {
2136 struct address_space *mapping = page_mapping(page); 2123 struct address_space *mapping = page_mapping(page);
2137 struct address_space *mapping2;
2138 unsigned long flags; 2124 unsigned long flags;
2139 2125
2140 if (!mapping) 2126 if (!mapping)
2141 return 1; 2127 return 1;
2142 2128
2143 spin_lock_irqsave(&mapping->tree_lock, flags); 2129 spin_lock_irqsave(&mapping->tree_lock, flags);
2144 mapping2 = page_mapping(page); 2130 BUG_ON(page_mapping(page) != mapping);
2145 if (mapping2) { /* Race with truncate? */ 2131 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
2146 BUG_ON(mapping2 != mapping); 2132 account_page_dirtied(page, mapping);
2147 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); 2133 radix_tree_tag_set(&mapping->page_tree, page_index(page),
2148 account_page_dirtied(page, mapping); 2134 PAGECACHE_TAG_DIRTY);
2149 radix_tree_tag_set(&mapping->page_tree,
2150 page_index(page), PAGECACHE_TAG_DIRTY);
2151 }
2152 spin_unlock_irqrestore(&mapping->tree_lock, flags); 2135 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2153 if (mapping->host) { 2136 if (mapping->host) {
2154 /* !PageAnon && !swapper_space */ 2137 /* !PageAnon && !swapper_space */
@@ -2305,12 +2288,10 @@ int clear_page_dirty_for_io(struct page *page)
2305 /* 2288 /*
2306 * We carefully synchronise fault handlers against 2289 * We carefully synchronise fault handlers against
2307 * installing a dirty pte and marking the page dirty 2290 * installing a dirty pte and marking the page dirty
2308 * at this point. We do this by having them hold the 2291 * at this point. We do this by having them hold the
2309 * page lock at some point after installing their 2292 * page lock while dirtying the page, and pages are
2310 * pte, but before marking the page dirty. 2293 * always locked coming in here, so we get the desired
2311 * Pages are always locked coming in here, so we get 2294 * exclusion.
2312 * the desired exclusion. See mm/memory.c:do_wp_page()
2313 * for more comments.
2314 */ 2295 */
2315 if (TestClearPageDirty(page)) { 2296 if (TestClearPageDirty(page)) {
2316 dec_zone_page_state(page, NR_FILE_DIRTY); 2297 dec_zone_page_state(page, NR_FILE_DIRTY);
diff --git a/mm/rmap.c b/mm/rmap.c
index c5bc241127b2..71cd5bd0c17d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -72,6 +72,8 @@ static inline struct anon_vma *anon_vma_alloc(void)
72 anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); 72 anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
73 if (anon_vma) { 73 if (anon_vma) {
74 atomic_set(&anon_vma->refcount, 1); 74 atomic_set(&anon_vma->refcount, 1);
75 anon_vma->degree = 1; /* Reference for first vma */
76 anon_vma->parent = anon_vma;
75 /* 77 /*
76 * Initialise the anon_vma root to point to itself. If called 78 * Initialise the anon_vma root to point to itself. If called
77 * from fork, the root will be reset to the parents anon_vma. 79 * from fork, the root will be reset to the parents anon_vma.
@@ -188,6 +190,8 @@ int anon_vma_prepare(struct vm_area_struct *vma)
188 if (likely(!vma->anon_vma)) { 190 if (likely(!vma->anon_vma)) {
189 vma->anon_vma = anon_vma; 191 vma->anon_vma = anon_vma;
190 anon_vma_chain_link(vma, avc, anon_vma); 192 anon_vma_chain_link(vma, avc, anon_vma);
193 /* vma reference or self-parent link for new root */
194 anon_vma->degree++;
191 allocated = NULL; 195 allocated = NULL;
192 avc = NULL; 196 avc = NULL;
193 } 197 }
@@ -236,6 +240,14 @@ static inline void unlock_anon_vma_root(struct anon_vma *root)
236/* 240/*
237 * Attach the anon_vmas from src to dst. 241 * Attach the anon_vmas from src to dst.
238 * Returns 0 on success, -ENOMEM on failure. 242 * Returns 0 on success, -ENOMEM on failure.
243 *
244 * If dst->anon_vma is NULL this function tries to find and reuse existing
245 * anon_vma which has no vmas and only one child anon_vma. This prevents
246 * degradation of anon_vma hierarchy to endless linear chain in case of
247 * constantly forking task. On the other hand, an anon_vma with more than one
248 * child isn't reused even if there was no alive vma, thus rmap walker has a
249 * good chance of avoiding scanning the whole hierarchy when it searches where
250 * page is mapped.
239 */ 251 */
240int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) 252int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
241{ 253{
@@ -256,7 +268,21 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
256 anon_vma = pavc->anon_vma; 268 anon_vma = pavc->anon_vma;
257 root = lock_anon_vma_root(root, anon_vma); 269 root = lock_anon_vma_root(root, anon_vma);
258 anon_vma_chain_link(dst, avc, anon_vma); 270 anon_vma_chain_link(dst, avc, anon_vma);
271
272 /*
273 * Reuse existing anon_vma if its degree lower than two,
274 * that means it has no vma and only one anon_vma child.
275 *
276 * Do not chose parent anon_vma, otherwise first child
277 * will always reuse it. Root anon_vma is never reused:
278 * it has self-parent reference and at least one child.
279 */
280 if (!dst->anon_vma && anon_vma != src->anon_vma &&
281 anon_vma->degree < 2)
282 dst->anon_vma = anon_vma;
259 } 283 }
284 if (dst->anon_vma)
285 dst->anon_vma->degree++;
260 unlock_anon_vma_root(root); 286 unlock_anon_vma_root(root);
261 return 0; 287 return 0;
262 288
@@ -280,6 +306,9 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
280 if (!pvma->anon_vma) 306 if (!pvma->anon_vma)
281 return 0; 307 return 0;
282 308
309 /* Drop inherited anon_vma, we'll reuse existing or allocate new. */
310 vma->anon_vma = NULL;
311
283 /* 312 /*
284 * First, attach the new VMA to the parent VMA's anon_vmas, 313 * First, attach the new VMA to the parent VMA's anon_vmas,
285 * so rmap can find non-COWed pages in child processes. 314 * so rmap can find non-COWed pages in child processes.
@@ -288,6 +317,10 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
288 if (error) 317 if (error)
289 return error; 318 return error;
290 319
320 /* An existing anon_vma has been reused, all done then. */
321 if (vma->anon_vma)
322 return 0;
323
291 /* Then add our own anon_vma. */ 324 /* Then add our own anon_vma. */
292 anon_vma = anon_vma_alloc(); 325 anon_vma = anon_vma_alloc();
293 if (!anon_vma) 326 if (!anon_vma)
@@ -301,6 +334,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
301 * lock any of the anon_vmas in this anon_vma tree. 334 * lock any of the anon_vmas in this anon_vma tree.
302 */ 335 */
303 anon_vma->root = pvma->anon_vma->root; 336 anon_vma->root = pvma->anon_vma->root;
337 anon_vma->parent = pvma->anon_vma;
304 /* 338 /*
305 * With refcounts, an anon_vma can stay around longer than the 339 * With refcounts, an anon_vma can stay around longer than the
306 * process it belongs to. The root anon_vma needs to be pinned until 340 * process it belongs to. The root anon_vma needs to be pinned until
@@ -311,6 +345,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
311 vma->anon_vma = anon_vma; 345 vma->anon_vma = anon_vma;
312 anon_vma_lock_write(anon_vma); 346 anon_vma_lock_write(anon_vma);
313 anon_vma_chain_link(vma, avc, anon_vma); 347 anon_vma_chain_link(vma, avc, anon_vma);
348 anon_vma->parent->degree++;
314 anon_vma_unlock_write(anon_vma); 349 anon_vma_unlock_write(anon_vma);
315 350
316 return 0; 351 return 0;
@@ -341,12 +376,16 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
341 * Leave empty anon_vmas on the list - we'll need 376 * Leave empty anon_vmas on the list - we'll need
342 * to free them outside the lock. 377 * to free them outside the lock.
343 */ 378 */
344 if (RB_EMPTY_ROOT(&anon_vma->rb_root)) 379 if (RB_EMPTY_ROOT(&anon_vma->rb_root)) {
380 anon_vma->parent->degree--;
345 continue; 381 continue;
382 }
346 383
347 list_del(&avc->same_vma); 384 list_del(&avc->same_vma);
348 anon_vma_chain_free(avc); 385 anon_vma_chain_free(avc);
349 } 386 }
387 if (vma->anon_vma)
388 vma->anon_vma->degree--;
350 unlock_anon_vma_root(root); 389 unlock_anon_vma_root(root);
351 390
352 /* 391 /*
@@ -357,6 +396,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
357 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { 396 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
358 struct anon_vma *anon_vma = avc->anon_vma; 397 struct anon_vma *anon_vma = avc->anon_vma;
359 398
399 BUG_ON(anon_vma->degree);
360 put_anon_vma(anon_vma); 400 put_anon_vma(anon_vma);
361 401
362 list_del(&avc->same_vma); 402 list_del(&avc->same_vma);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bd9a72bc4a1b..ab2505c3ef54 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2921,18 +2921,20 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
2921 return false; 2921 return false;
2922 2922
2923 /* 2923 /*
2924 * There is a potential race between when kswapd checks its watermarks 2924 * The throttled processes are normally woken up in balance_pgdat() as
2925 * and a process gets throttled. There is also a potential race if 2925 * soon as pfmemalloc_watermark_ok() is true. But there is a potential
2926 * processes get throttled, kswapd wakes, a large process exits therby 2926 * race between when kswapd checks the watermarks and a process gets
2927 * balancing the zones that causes kswapd to miss a wakeup. If kswapd 2927 * throttled. There is also a potential race if processes get
2928 * is going to sleep, no process should be sleeping on pfmemalloc_wait 2928 * throttled, kswapd wakes, a large process exits thereby balancing the
2929 * so wake them now if necessary. If necessary, processes will wake 2929 * zones, which causes kswapd to exit balance_pgdat() before reaching
2930 * kswapd and get throttled again 2930 * the wake up checks. If kswapd is going to sleep, no process should
2931 * be sleeping on pfmemalloc_wait, so wake them now if necessary. If
2932 * the wake up is premature, processes will wake kswapd and get
2933 * throttled again. The difference from wake ups in balance_pgdat() is
2934 * that here we are under prepare_to_wait().
2931 */ 2935 */
2932 if (waitqueue_active(&pgdat->pfmemalloc_wait)) { 2936 if (waitqueue_active(&pgdat->pfmemalloc_wait))
2933 wake_up(&pgdat->pfmemalloc_wait); 2937 wake_up_all(&pgdat->pfmemalloc_wait);
2934 return false;
2935 }
2936 2938
2937 return pgdat_balanced(pgdat, order, classzone_idx); 2939 return pgdat_balanced(pgdat, order, classzone_idx);
2938} 2940}