aboutsummaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c568
1 files changed, 372 insertions, 196 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index dd43373a483f..0feeef860a8f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -49,6 +49,7 @@
49#include <linux/swapops.h> 49#include <linux/swapops.h>
50#include <linux/slab.h> 50#include <linux/slab.h>
51#include <linux/init.h> 51#include <linux/init.h>
52#include <linux/ksm.h>
52#include <linux/rmap.h> 53#include <linux/rmap.h>
53#include <linux/rcupdate.h> 54#include <linux/rcupdate.h>
54#include <linux/module.h> 55#include <linux/module.h>
@@ -61,17 +62,28 @@
61#include "internal.h" 62#include "internal.h"
62 63
63static struct kmem_cache *anon_vma_cachep; 64static struct kmem_cache *anon_vma_cachep;
65static struct kmem_cache *anon_vma_chain_cachep;
64 66
65static inline struct anon_vma *anon_vma_alloc(void) 67static inline struct anon_vma *anon_vma_alloc(void)
66{ 68{
67 return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); 69 return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
68} 70}
69 71
70static inline void anon_vma_free(struct anon_vma *anon_vma) 72void anon_vma_free(struct anon_vma *anon_vma)
71{ 73{
72 kmem_cache_free(anon_vma_cachep, anon_vma); 74 kmem_cache_free(anon_vma_cachep, anon_vma);
73} 75}
74 76
77static inline struct anon_vma_chain *anon_vma_chain_alloc(void)
78{
79 return kmem_cache_alloc(anon_vma_chain_cachep, GFP_KERNEL);
80}
81
82void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
83{
84 kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
85}
86
75/** 87/**
76 * anon_vma_prepare - attach an anon_vma to a memory region 88 * anon_vma_prepare - attach an anon_vma to a memory region
77 * @vma: the memory region in question 89 * @vma: the memory region in question
@@ -102,87 +114,167 @@ static inline void anon_vma_free(struct anon_vma *anon_vma)
102int anon_vma_prepare(struct vm_area_struct *vma) 114int anon_vma_prepare(struct vm_area_struct *vma)
103{ 115{
104 struct anon_vma *anon_vma = vma->anon_vma; 116 struct anon_vma *anon_vma = vma->anon_vma;
117 struct anon_vma_chain *avc;
105 118
106 might_sleep(); 119 might_sleep();
107 if (unlikely(!anon_vma)) { 120 if (unlikely(!anon_vma)) {
108 struct mm_struct *mm = vma->vm_mm; 121 struct mm_struct *mm = vma->vm_mm;
109 struct anon_vma *allocated; 122 struct anon_vma *allocated;
110 123
124 avc = anon_vma_chain_alloc();
125 if (!avc)
126 goto out_enomem;
127
111 anon_vma = find_mergeable_anon_vma(vma); 128 anon_vma = find_mergeable_anon_vma(vma);
112 allocated = NULL; 129 allocated = NULL;
113 if (!anon_vma) { 130 if (!anon_vma) {
114 anon_vma = anon_vma_alloc(); 131 anon_vma = anon_vma_alloc();
115 if (unlikely(!anon_vma)) 132 if (unlikely(!anon_vma))
116 return -ENOMEM; 133 goto out_enomem_free_avc;
117 allocated = anon_vma; 134 allocated = anon_vma;
118 } 135 }
119 spin_lock(&anon_vma->lock);
120 136
137 spin_lock(&anon_vma->lock);
121 /* page_table_lock to protect against threads */ 138 /* page_table_lock to protect against threads */
122 spin_lock(&mm->page_table_lock); 139 spin_lock(&mm->page_table_lock);
123 if (likely(!vma->anon_vma)) { 140 if (likely(!vma->anon_vma)) {
124 vma->anon_vma = anon_vma; 141 vma->anon_vma = anon_vma;
125 list_add_tail(&vma->anon_vma_node, &anon_vma->head); 142 avc->anon_vma = anon_vma;
143 avc->vma = vma;
144 list_add(&avc->same_vma, &vma->anon_vma_chain);
145 list_add(&avc->same_anon_vma, &anon_vma->head);
126 allocated = NULL; 146 allocated = NULL;
147 avc = NULL;
127 } 148 }
128 spin_unlock(&mm->page_table_lock); 149 spin_unlock(&mm->page_table_lock);
129
130 spin_unlock(&anon_vma->lock); 150 spin_unlock(&anon_vma->lock);
151
131 if (unlikely(allocated)) 152 if (unlikely(allocated))
132 anon_vma_free(allocated); 153 anon_vma_free(allocated);
154 if (unlikely(avc))
155 anon_vma_chain_free(avc);
133 } 156 }
134 return 0; 157 return 0;
158
159 out_enomem_free_avc:
160 anon_vma_chain_free(avc);
161 out_enomem:
162 return -ENOMEM;
135} 163}
136 164
137void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next) 165static void anon_vma_chain_link(struct vm_area_struct *vma,
166 struct anon_vma_chain *avc,
167 struct anon_vma *anon_vma)
138{ 168{
139 BUG_ON(vma->anon_vma != next->anon_vma); 169 avc->vma = vma;
140 list_del(&next->anon_vma_node); 170 avc->anon_vma = anon_vma;
171 list_add(&avc->same_vma, &vma->anon_vma_chain);
172
173 spin_lock(&anon_vma->lock);
174 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
175 spin_unlock(&anon_vma->lock);
141} 176}
142 177
143void __anon_vma_link(struct vm_area_struct *vma) 178/*
179 * Attach the anon_vmas from src to dst.
180 * Returns 0 on success, -ENOMEM on failure.
181 */
182int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
144{ 183{
145 struct anon_vma *anon_vma = vma->anon_vma; 184 struct anon_vma_chain *avc, *pavc;
146 185
147 if (anon_vma) 186 list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
148 list_add_tail(&vma->anon_vma_node, &anon_vma->head); 187 avc = anon_vma_chain_alloc();
188 if (!avc)
189 goto enomem_failure;
190 anon_vma_chain_link(dst, avc, pavc->anon_vma);
191 }
192 return 0;
193
194 enomem_failure:
195 unlink_anon_vmas(dst);
196 return -ENOMEM;
149} 197}
150 198
151void anon_vma_link(struct vm_area_struct *vma) 199/*
200 * Attach vma to its own anon_vma, as well as to the anon_vmas that
201 * the corresponding VMA in the parent process is attached to.
202 * Returns 0 on success, non-zero on failure.
203 */
204int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
152{ 205{
153 struct anon_vma *anon_vma = vma->anon_vma; 206 struct anon_vma_chain *avc;
207 struct anon_vma *anon_vma;
154 208
155 if (anon_vma) { 209 /* Don't bother if the parent process has no anon_vma here. */
156 spin_lock(&anon_vma->lock); 210 if (!pvma->anon_vma)
157 list_add_tail(&vma->anon_vma_node, &anon_vma->head); 211 return 0;
158 spin_unlock(&anon_vma->lock); 212
159 } 213 /*
214 * First, attach the new VMA to the parent VMA's anon_vmas,
215 * so rmap can find non-COWed pages in child processes.
216 */
217 if (anon_vma_clone(vma, pvma))
218 return -ENOMEM;
219
220 /* Then add our own anon_vma. */
221 anon_vma = anon_vma_alloc();
222 if (!anon_vma)
223 goto out_error;
224 avc = anon_vma_chain_alloc();
225 if (!avc)
226 goto out_error_free_anon_vma;
227 anon_vma_chain_link(vma, avc, anon_vma);
228 /* Mark this anon_vma as the one where our new (COWed) pages go. */
229 vma->anon_vma = anon_vma;
230
231 return 0;
232
233 out_error_free_anon_vma:
234 anon_vma_free(anon_vma);
235 out_error:
236 unlink_anon_vmas(vma);
237 return -ENOMEM;
160} 238}
161 239
162void anon_vma_unlink(struct vm_area_struct *vma) 240static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
163{ 241{
164 struct anon_vma *anon_vma = vma->anon_vma; 242 struct anon_vma *anon_vma = anon_vma_chain->anon_vma;
165 int empty; 243 int empty;
166 244
245 /* If anon_vma_fork fails, we can get an empty anon_vma_chain. */
167 if (!anon_vma) 246 if (!anon_vma)
168 return; 247 return;
169 248
170 spin_lock(&anon_vma->lock); 249 spin_lock(&anon_vma->lock);
171 list_del(&vma->anon_vma_node); 250 list_del(&anon_vma_chain->same_anon_vma);
172 251
173 /* We must garbage collect the anon_vma if it's empty */ 252 /* We must garbage collect the anon_vma if it's empty */
174 empty = list_empty(&anon_vma->head); 253 empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma);
175 spin_unlock(&anon_vma->lock); 254 spin_unlock(&anon_vma->lock);
176 255
177 if (empty) 256 if (empty)
178 anon_vma_free(anon_vma); 257 anon_vma_free(anon_vma);
179} 258}
180 259
260void unlink_anon_vmas(struct vm_area_struct *vma)
261{
262 struct anon_vma_chain *avc, *next;
263
264 /* Unlink each anon_vma chained to the VMA. */
265 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
266 anon_vma_unlink(avc);
267 list_del(&avc->same_vma);
268 anon_vma_chain_free(avc);
269 }
270}
271
181static void anon_vma_ctor(void *data) 272static void anon_vma_ctor(void *data)
182{ 273{
183 struct anon_vma *anon_vma = data; 274 struct anon_vma *anon_vma = data;
184 275
185 spin_lock_init(&anon_vma->lock); 276 spin_lock_init(&anon_vma->lock);
277 ksm_refcount_init(anon_vma);
186 INIT_LIST_HEAD(&anon_vma->head); 278 INIT_LIST_HEAD(&anon_vma->head);
187} 279}
188 280
@@ -190,6 +282,7 @@ void __init anon_vma_init(void)
190{ 282{
191 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), 283 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
192 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor); 284 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
285 anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
193} 286}
194 287
195/* 288/*
@@ -202,8 +295,8 @@ struct anon_vma *page_lock_anon_vma(struct page *page)
202 unsigned long anon_mapping; 295 unsigned long anon_mapping;
203 296
204 rcu_read_lock(); 297 rcu_read_lock();
205 anon_mapping = (unsigned long) page->mapping; 298 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
206 if (!(anon_mapping & PAGE_MAPPING_ANON)) 299 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
207 goto out; 300 goto out;
208 if (!page_mapped(page)) 301 if (!page_mapped(page))
209 goto out; 302 goto out;
@@ -243,15 +336,13 @@ vma_address(struct page *page, struct vm_area_struct *vma)
243 336
244/* 337/*
245 * At what user virtual address is page expected in vma? 338 * At what user virtual address is page expected in vma?
246 * checking that the page matches the vma. 339 * Caller should check the page is actually part of the vma.
247 */ 340 */
248unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) 341unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
249{ 342{
250 if (PageAnon(page)) { 343 if (PageAnon(page))
251 if ((void *)vma->anon_vma != 344 ;
252 (void *)page->mapping - PAGE_MAPPING_ANON) 345 else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
253 return -EFAULT;
254 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
255 if (!vma->vm_file || 346 if (!vma->vm_file ||
256 vma->vm_file->f_mapping != page->mapping) 347 vma->vm_file->f_mapping != page->mapping)
257 return -EFAULT; 348 return -EFAULT;
@@ -337,21 +428,15 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
337 * Subfunctions of page_referenced: page_referenced_one called 428 * Subfunctions of page_referenced: page_referenced_one called
338 * repeatedly from either page_referenced_anon or page_referenced_file. 429 * repeatedly from either page_referenced_anon or page_referenced_file.
339 */ 430 */
340static int page_referenced_one(struct page *page, 431int page_referenced_one(struct page *page, struct vm_area_struct *vma,
341 struct vm_area_struct *vma, 432 unsigned long address, unsigned int *mapcount,
342 unsigned int *mapcount, 433 unsigned long *vm_flags)
343 unsigned long *vm_flags)
344{ 434{
345 struct mm_struct *mm = vma->vm_mm; 435 struct mm_struct *mm = vma->vm_mm;
346 unsigned long address;
347 pte_t *pte; 436 pte_t *pte;
348 spinlock_t *ptl; 437 spinlock_t *ptl;
349 int referenced = 0; 438 int referenced = 0;
350 439
351 address = vma_address(page, vma);
352 if (address == -EFAULT)
353 goto out;
354
355 pte = page_check_address(page, mm, address, &ptl, 0); 440 pte = page_check_address(page, mm, address, &ptl, 0);
356 if (!pte) 441 if (!pte)
357 goto out; 442 goto out;
@@ -388,9 +473,10 @@ static int page_referenced_one(struct page *page,
388out_unmap: 473out_unmap:
389 (*mapcount)--; 474 (*mapcount)--;
390 pte_unmap_unlock(pte, ptl); 475 pte_unmap_unlock(pte, ptl);
391out: 476
392 if (referenced) 477 if (referenced)
393 *vm_flags |= vma->vm_flags; 478 *vm_flags |= vma->vm_flags;
479out:
394 return referenced; 480 return referenced;
395} 481}
396 482
@@ -400,7 +486,7 @@ static int page_referenced_anon(struct page *page,
400{ 486{
401 unsigned int mapcount; 487 unsigned int mapcount;
402 struct anon_vma *anon_vma; 488 struct anon_vma *anon_vma;
403 struct vm_area_struct *vma; 489 struct anon_vma_chain *avc;
404 int referenced = 0; 490 int referenced = 0;
405 491
406 anon_vma = page_lock_anon_vma(page); 492 anon_vma = page_lock_anon_vma(page);
@@ -408,7 +494,11 @@ static int page_referenced_anon(struct page *page,
408 return referenced; 494 return referenced;
409 495
410 mapcount = page_mapcount(page); 496 mapcount = page_mapcount(page);
411 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 497 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
498 struct vm_area_struct *vma = avc->vma;
499 unsigned long address = vma_address(page, vma);
500 if (address == -EFAULT)
501 continue;
412 /* 502 /*
413 * If we are reclaiming on behalf of a cgroup, skip 503 * If we are reclaiming on behalf of a cgroup, skip
414 * counting on behalf of references from different 504 * counting on behalf of references from different
@@ -416,7 +506,7 @@ static int page_referenced_anon(struct page *page,
416 */ 506 */
417 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) 507 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
418 continue; 508 continue;
419 referenced += page_referenced_one(page, vma, 509 referenced += page_referenced_one(page, vma, address,
420 &mapcount, vm_flags); 510 &mapcount, vm_flags);
421 if (!mapcount) 511 if (!mapcount)
422 break; 512 break;
@@ -474,6 +564,9 @@ static int page_referenced_file(struct page *page,
474 mapcount = page_mapcount(page); 564 mapcount = page_mapcount(page);
475 565
476 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 566 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
567 unsigned long address = vma_address(page, vma);
568 if (address == -EFAULT)
569 continue;
477 /* 570 /*
478 * If we are reclaiming on behalf of a cgroup, skip 571 * If we are reclaiming on behalf of a cgroup, skip
479 * counting on behalf of references from different 572 * counting on behalf of references from different
@@ -481,7 +574,7 @@ static int page_referenced_file(struct page *page,
481 */ 574 */
482 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) 575 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
483 continue; 576 continue;
484 referenced += page_referenced_one(page, vma, 577 referenced += page_referenced_one(page, vma, address,
485 &mapcount, vm_flags); 578 &mapcount, vm_flags);
486 if (!mapcount) 579 if (!mapcount)
487 break; 580 break;
@@ -507,46 +600,44 @@ int page_referenced(struct page *page,
507 unsigned long *vm_flags) 600 unsigned long *vm_flags)
508{ 601{
509 int referenced = 0; 602 int referenced = 0;
510 603 int we_locked = 0;
511 if (TestClearPageReferenced(page))
512 referenced++;
513 604
514 *vm_flags = 0; 605 *vm_flags = 0;
515 if (page_mapped(page) && page->mapping) { 606 if (page_mapped(page) && page_rmapping(page)) {
516 if (PageAnon(page)) 607 if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
608 we_locked = trylock_page(page);
609 if (!we_locked) {
610 referenced++;
611 goto out;
612 }
613 }
614 if (unlikely(PageKsm(page)))
615 referenced += page_referenced_ksm(page, mem_cont,
616 vm_flags);
617 else if (PageAnon(page))
517 referenced += page_referenced_anon(page, mem_cont, 618 referenced += page_referenced_anon(page, mem_cont,
518 vm_flags); 619 vm_flags);
519 else if (is_locked) 620 else if (page->mapping)
520 referenced += page_referenced_file(page, mem_cont, 621 referenced += page_referenced_file(page, mem_cont,
521 vm_flags); 622 vm_flags);
522 else if (!trylock_page(page)) 623 if (we_locked)
523 referenced++;
524 else {
525 if (page->mapping)
526 referenced += page_referenced_file(page,
527 mem_cont, vm_flags);
528 unlock_page(page); 624 unlock_page(page);
529 }
530 } 625 }
531 626out:
532 if (page_test_and_clear_young(page)) 627 if (page_test_and_clear_young(page))
533 referenced++; 628 referenced++;
534 629
535 return referenced; 630 return referenced;
536} 631}
537 632
538static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) 633static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
634 unsigned long address)
539{ 635{
540 struct mm_struct *mm = vma->vm_mm; 636 struct mm_struct *mm = vma->vm_mm;
541 unsigned long address;
542 pte_t *pte; 637 pte_t *pte;
543 spinlock_t *ptl; 638 spinlock_t *ptl;
544 int ret = 0; 639 int ret = 0;
545 640
546 address = vma_address(page, vma);
547 if (address == -EFAULT)
548 goto out;
549
550 pte = page_check_address(page, mm, address, &ptl, 1); 641 pte = page_check_address(page, mm, address, &ptl, 1);
551 if (!pte) 642 if (!pte)
552 goto out; 643 goto out;
@@ -578,8 +669,12 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page)
578 669
579 spin_lock(&mapping->i_mmap_lock); 670 spin_lock(&mapping->i_mmap_lock);
580 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 671 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
581 if (vma->vm_flags & VM_SHARED) 672 if (vma->vm_flags & VM_SHARED) {
582 ret += page_mkclean_one(page, vma); 673 unsigned long address = vma_address(page, vma);
674 if (address == -EFAULT)
675 continue;
676 ret += page_mkclean_one(page, vma, address);
677 }
583 } 678 }
584 spin_unlock(&mapping->i_mmap_lock); 679 spin_unlock(&mapping->i_mmap_lock);
585 return ret; 680 return ret;
@@ -607,27 +702,60 @@ int page_mkclean(struct page *page)
607EXPORT_SYMBOL_GPL(page_mkclean); 702EXPORT_SYMBOL_GPL(page_mkclean);
608 703
609/** 704/**
705 * page_move_anon_rmap - move a page to our anon_vma
706 * @page: the page to move to our anon_vma
707 * @vma: the vma the page belongs to
708 * @address: the user virtual address mapped
709 *
710 * When a page belongs exclusively to one process after a COW event,
711 * that page can be moved into the anon_vma that belongs to just that
712 * process, so the rmap code will not search the parent or sibling
713 * processes.
714 */
715void page_move_anon_rmap(struct page *page,
716 struct vm_area_struct *vma, unsigned long address)
717{
718 struct anon_vma *anon_vma = vma->anon_vma;
719
720 VM_BUG_ON(!PageLocked(page));
721 VM_BUG_ON(!anon_vma);
722 VM_BUG_ON(page->index != linear_page_index(vma, address));
723
724 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
725 page->mapping = (struct address_space *) anon_vma;
726}
727
728/**
610 * __page_set_anon_rmap - setup new anonymous rmap 729 * __page_set_anon_rmap - setup new anonymous rmap
611 * @page: the page to add the mapping to 730 * @page: the page to add the mapping to
612 * @vma: the vm area in which the mapping is added 731 * @vma: the vm area in which the mapping is added
613 * @address: the user virtual address mapped 732 * @address: the user virtual address mapped
733 * @exclusive: the page is exclusively owned by the current process
614 */ 734 */
615static void __page_set_anon_rmap(struct page *page, 735static void __page_set_anon_rmap(struct page *page,
616 struct vm_area_struct *vma, unsigned long address) 736 struct vm_area_struct *vma, unsigned long address, int exclusive)
617{ 737{
618 struct anon_vma *anon_vma = vma->anon_vma; 738 struct anon_vma *anon_vma = vma->anon_vma;
619 739
620 BUG_ON(!anon_vma); 740 BUG_ON(!anon_vma);
621 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
622 page->mapping = (struct address_space *) anon_vma;
623
624 page->index = linear_page_index(vma, address);
625 741
626 /* 742 /*
627 * nr_mapped state can be updated without turning off 743 * If the page isn't exclusively mapped into this vma,
628 * interrupts because it is not modified via interrupt. 744 * we must use the _oldest_ possible anon_vma for the
745 * page mapping!
746 *
747 * So take the last AVC chain entry in the vma, which is
748 * the deepest ancestor, and use the anon_vma from that.
629 */ 749 */
630 __inc_zone_page_state(page, NR_ANON_PAGES); 750 if (!exclusive) {
751 struct anon_vma_chain *avc;
752 avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma);
753 anon_vma = avc->anon_vma;
754 }
755
756 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
757 page->mapping = (struct address_space *) anon_vma;
758 page->index = linear_page_index(vma, address);
631} 759}
632 760
633/** 761/**
@@ -652,9 +780,6 @@ static void __page_check_anon_rmap(struct page *page,
652 * are initially only visible via the pagetables, and the pte is locked 780 * are initially only visible via the pagetables, and the pte is locked
653 * over the call to page_add_new_anon_rmap. 781 * over the call to page_add_new_anon_rmap.
654 */ 782 */
655 struct anon_vma *anon_vma = vma->anon_vma;
656 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
657 BUG_ON(page->mapping != (struct address_space *)anon_vma);
658 BUG_ON(page->index != linear_page_index(vma, address)); 783 BUG_ON(page->index != linear_page_index(vma, address));
659#endif 784#endif
660} 785}
@@ -665,15 +790,24 @@ static void __page_check_anon_rmap(struct page *page,
665 * @vma: the vm area in which the mapping is added 790 * @vma: the vm area in which the mapping is added
666 * @address: the user virtual address mapped 791 * @address: the user virtual address mapped
667 * 792 *
668 * The caller needs to hold the pte lock and the page must be locked. 793 * The caller needs to hold the pte lock, and the page must be locked in
794 * the anon_vma case: to serialize mapping,index checking after setting,
795 * and to ensure that PageAnon is not being upgraded racily to PageKsm
796 * (but PageKsm is never downgraded to PageAnon).
669 */ 797 */
670void page_add_anon_rmap(struct page *page, 798void page_add_anon_rmap(struct page *page,
671 struct vm_area_struct *vma, unsigned long address) 799 struct vm_area_struct *vma, unsigned long address)
672{ 800{
801 int first = atomic_inc_and_test(&page->_mapcount);
802 if (first)
803 __inc_zone_page_state(page, NR_ANON_PAGES);
804 if (unlikely(PageKsm(page)))
805 return;
806
673 VM_BUG_ON(!PageLocked(page)); 807 VM_BUG_ON(!PageLocked(page));
674 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); 808 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
675 if (atomic_inc_and_test(&page->_mapcount)) 809 if (first)
676 __page_set_anon_rmap(page, vma, address); 810 __page_set_anon_rmap(page, vma, address, 0);
677 else 811 else
678 __page_check_anon_rmap(page, vma, address); 812 __page_check_anon_rmap(page, vma, address);
679} 813}
@@ -694,7 +828,8 @@ void page_add_new_anon_rmap(struct page *page,
694 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); 828 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
695 SetPageSwapBacked(page); 829 SetPageSwapBacked(page);
696 atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ 830 atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
697 __page_set_anon_rmap(page, vma, address); 831 __inc_zone_page_state(page, NR_ANON_PAGES);
832 __page_set_anon_rmap(page, vma, address, 1);
698 if (page_evictable(page, vma)) 833 if (page_evictable(page, vma))
699 lru_cache_add_lru(page, LRU_ACTIVE_ANON); 834 lru_cache_add_lru(page, LRU_ACTIVE_ANON);
700 else 835 else
@@ -711,7 +846,7 @@ void page_add_file_rmap(struct page *page)
711{ 846{
712 if (atomic_inc_and_test(&page->_mapcount)) { 847 if (atomic_inc_and_test(&page->_mapcount)) {
713 __inc_zone_page_state(page, NR_FILE_MAPPED); 848 __inc_zone_page_state(page, NR_FILE_MAPPED);
714 mem_cgroup_update_mapped_file_stat(page, 1); 849 mem_cgroup_update_file_mapped(page, 1);
715 } 850 }
716} 851}
717 852
@@ -743,8 +878,8 @@ void page_remove_rmap(struct page *page)
743 __dec_zone_page_state(page, NR_ANON_PAGES); 878 __dec_zone_page_state(page, NR_ANON_PAGES);
744 } else { 879 } else {
745 __dec_zone_page_state(page, NR_FILE_MAPPED); 880 __dec_zone_page_state(page, NR_FILE_MAPPED);
881 mem_cgroup_update_file_mapped(page, -1);
746 } 882 }
747 mem_cgroup_update_mapped_file_stat(page, -1);
748 /* 883 /*
749 * It would be tidy to reset the PageAnon mapping here, 884 * It would be tidy to reset the PageAnon mapping here,
750 * but that might overwrite a racing page_add_anon_rmap 885 * but that might overwrite a racing page_add_anon_rmap
@@ -760,20 +895,15 @@ void page_remove_rmap(struct page *page)
760 * Subfunctions of try_to_unmap: try_to_unmap_one called 895 * Subfunctions of try_to_unmap: try_to_unmap_one called
761 * repeatedly from either try_to_unmap_anon or try_to_unmap_file. 896 * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
762 */ 897 */
763static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, 898int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
764 enum ttu_flags flags) 899 unsigned long address, enum ttu_flags flags)
765{ 900{
766 struct mm_struct *mm = vma->vm_mm; 901 struct mm_struct *mm = vma->vm_mm;
767 unsigned long address;
768 pte_t *pte; 902 pte_t *pte;
769 pte_t pteval; 903 pte_t pteval;
770 spinlock_t *ptl; 904 spinlock_t *ptl;
771 int ret = SWAP_AGAIN; 905 int ret = SWAP_AGAIN;
772 906
773 address = vma_address(page, vma);
774 if (address == -EFAULT)
775 goto out;
776
777 pte = page_check_address(page, mm, address, &ptl, 0); 907 pte = page_check_address(page, mm, address, &ptl, 0);
778 if (!pte) 908 if (!pte)
779 goto out; 909 goto out;
@@ -784,10 +914,11 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
784 * skipped over this mm) then we should reactivate it. 914 * skipped over this mm) then we should reactivate it.
785 */ 915 */
786 if (!(flags & TTU_IGNORE_MLOCK)) { 916 if (!(flags & TTU_IGNORE_MLOCK)) {
787 if (vma->vm_flags & VM_LOCKED) { 917 if (vma->vm_flags & VM_LOCKED)
788 ret = SWAP_MLOCK; 918 goto out_mlock;
919
920 if (TTU_ACTION(flags) == TTU_MUNLOCK)
789 goto out_unmap; 921 goto out_unmap;
790 }
791 } 922 }
792 if (!(flags & TTU_IGNORE_ACCESS)) { 923 if (!(flags & TTU_IGNORE_ACCESS)) {
793 if (ptep_clear_flush_young_notify(vma, address, pte)) { 924 if (ptep_clear_flush_young_notify(vma, address, pte)) {
@@ -809,9 +940,9 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
809 940
810 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { 941 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
811 if (PageAnon(page)) 942 if (PageAnon(page))
812 dec_mm_counter(mm, anon_rss); 943 dec_mm_counter(mm, MM_ANONPAGES);
813 else 944 else
814 dec_mm_counter(mm, file_rss); 945 dec_mm_counter(mm, MM_FILEPAGES);
815 set_pte_at(mm, address, pte, 946 set_pte_at(mm, address, pte,
816 swp_entry_to_pte(make_hwpoison_entry(page))); 947 swp_entry_to_pte(make_hwpoison_entry(page)));
817 } else if (PageAnon(page)) { 948 } else if (PageAnon(page)) {
@@ -822,14 +953,19 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
822 * Store the swap location in the pte. 953 * Store the swap location in the pte.
823 * See handle_pte_fault() ... 954 * See handle_pte_fault() ...
824 */ 955 */
825 swap_duplicate(entry); 956 if (swap_duplicate(entry) < 0) {
957 set_pte_at(mm, address, pte, pteval);
958 ret = SWAP_FAIL;
959 goto out_unmap;
960 }
826 if (list_empty(&mm->mmlist)) { 961 if (list_empty(&mm->mmlist)) {
827 spin_lock(&mmlist_lock); 962 spin_lock(&mmlist_lock);
828 if (list_empty(&mm->mmlist)) 963 if (list_empty(&mm->mmlist))
829 list_add(&mm->mmlist, &init_mm.mmlist); 964 list_add(&mm->mmlist, &init_mm.mmlist);
830 spin_unlock(&mmlist_lock); 965 spin_unlock(&mmlist_lock);
831 } 966 }
832 dec_mm_counter(mm, anon_rss); 967 dec_mm_counter(mm, MM_ANONPAGES);
968 inc_mm_counter(mm, MM_SWAPENTS);
833 } else if (PAGE_MIGRATION) { 969 } else if (PAGE_MIGRATION) {
834 /* 970 /*
835 * Store the pfn of the page in a special migration 971 * Store the pfn of the page in a special migration
@@ -847,8 +983,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
847 entry = make_migration_entry(page, pte_write(pteval)); 983 entry = make_migration_entry(page, pte_write(pteval));
848 set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); 984 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
849 } else 985 } else
850 dec_mm_counter(mm, file_rss); 986 dec_mm_counter(mm, MM_FILEPAGES);
851
852 987
853 page_remove_rmap(page); 988 page_remove_rmap(page);
854 page_cache_release(page); 989 page_cache_release(page);
@@ -857,6 +992,27 @@ out_unmap:
857 pte_unmap_unlock(pte, ptl); 992 pte_unmap_unlock(pte, ptl);
858out: 993out:
859 return ret; 994 return ret;
995
996out_mlock:
997 pte_unmap_unlock(pte, ptl);
998
999
1000 /*
1001 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
1002 * unstable result and race. Plus, We can't wait here because
1003 * we now hold anon_vma->lock or mapping->i_mmap_lock.
1004 * if trylock failed, the page remain in evictable lru and later
1005 * vmscan could retry to move the page to unevictable lru if the
1006 * page is actually mlocked.
1007 */
1008 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
1009 if (vma->vm_flags & VM_LOCKED) {
1010 mlock_vma_page(page);
1011 ret = SWAP_MLOCK;
1012 }
1013 up_read(&vma->vm_mm->mmap_sem);
1014 }
1015 return ret;
860} 1016}
861 1017
862/* 1018/*
@@ -922,11 +1078,10 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
922 return ret; 1078 return ret;
923 1079
924 /* 1080 /*
925 * MLOCK_PAGES => feature is configured. 1081 * If we can acquire the mmap_sem for read, and vma is VM_LOCKED,
926 * if we can acquire the mmap_sem for read, and vma is VM_LOCKED,
927 * keep the sem while scanning the cluster for mlocking pages. 1082 * keep the sem while scanning the cluster for mlocking pages.
928 */ 1083 */
929 if (MLOCK_PAGES && down_read_trylock(&vma->vm_mm->mmap_sem)) { 1084 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
930 locked_vma = (vma->vm_flags & VM_LOCKED); 1085 locked_vma = (vma->vm_flags & VM_LOCKED);
931 if (!locked_vma) 1086 if (!locked_vma)
932 up_read(&vma->vm_mm->mmap_sem); /* don't need it */ 1087 up_read(&vma->vm_mm->mmap_sem); /* don't need it */
@@ -967,7 +1122,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
967 1122
968 page_remove_rmap(page); 1123 page_remove_rmap(page);
969 page_cache_release(page); 1124 page_cache_release(page);
970 dec_mm_counter(mm, file_rss); 1125 dec_mm_counter(mm, MM_FILEPAGES);
971 (*mapcount)--; 1126 (*mapcount)--;
972 } 1127 }
973 pte_unmap_unlock(pte - 1, ptl); 1128 pte_unmap_unlock(pte - 1, ptl);
@@ -976,29 +1131,11 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
976 return ret; 1131 return ret;
977} 1132}
978 1133
979/*
980 * common handling for pages mapped in VM_LOCKED vmas
981 */
982static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
983{
984 int mlocked = 0;
985
986 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
987 if (vma->vm_flags & VM_LOCKED) {
988 mlock_vma_page(page);
989 mlocked++; /* really mlocked the page */
990 }
991 up_read(&vma->vm_mm->mmap_sem);
992 }
993 return mlocked;
994}
995
996/** 1134/**
997 * try_to_unmap_anon - unmap or unlock anonymous page using the object-based 1135 * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
998 * rmap method 1136 * rmap method
999 * @page: the page to unmap/unlock 1137 * @page: the page to unmap/unlock
1000 * @unlock: request for unlock rather than unmap [unlikely] 1138 * @flags: action and flags
1001 * @migration: unmapping for migration - ignored if @unlock
1002 * 1139 *
1003 * Find all the mappings of a page using the mapping pointer and the vma chains 1140 * Find all the mappings of a page using the mapping pointer and the vma chains
1004 * contained in the anon_vma struct it points to. 1141 * contained in the anon_vma struct it points to.
@@ -1013,43 +1150,24 @@ static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
1013static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) 1150static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1014{ 1151{
1015 struct anon_vma *anon_vma; 1152 struct anon_vma *anon_vma;
1016 struct vm_area_struct *vma; 1153 struct anon_vma_chain *avc;
1017 unsigned int mlocked = 0;
1018 int ret = SWAP_AGAIN; 1154 int ret = SWAP_AGAIN;
1019 int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
1020
1021 if (MLOCK_PAGES && unlikely(unlock))
1022 ret = SWAP_SUCCESS; /* default for try_to_munlock() */
1023 1155
1024 anon_vma = page_lock_anon_vma(page); 1156 anon_vma = page_lock_anon_vma(page);
1025 if (!anon_vma) 1157 if (!anon_vma)
1026 return ret; 1158 return ret;
1027 1159
1028 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 1160 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1029 if (MLOCK_PAGES && unlikely(unlock)) { 1161 struct vm_area_struct *vma = avc->vma;
1030 if (!((vma->vm_flags & VM_LOCKED) && 1162 unsigned long address = vma_address(page, vma);
1031 page_mapped_in_vma(page, vma))) 1163 if (address == -EFAULT)
1032 continue; /* must visit all unlocked vmas */ 1164 continue;
1033 ret = SWAP_MLOCK; /* saw at least one mlocked vma */ 1165 ret = try_to_unmap_one(page, vma, address, flags);
1034 } else { 1166 if (ret != SWAP_AGAIN || !page_mapped(page))
1035 ret = try_to_unmap_one(page, vma, flags); 1167 break;
1036 if (ret == SWAP_FAIL || !page_mapped(page))
1037 break;
1038 }
1039 if (ret == SWAP_MLOCK) {
1040 mlocked = try_to_mlock_page(page, vma);
1041 if (mlocked)
1042 break; /* stop if actually mlocked page */
1043 }
1044 } 1168 }
1045 1169
1046 page_unlock_anon_vma(anon_vma); 1170 page_unlock_anon_vma(anon_vma);
1047
1048 if (mlocked)
1049 ret = SWAP_MLOCK; /* actually mlocked the page */
1050 else if (ret == SWAP_MLOCK)
1051 ret = SWAP_AGAIN; /* saw VM_LOCKED vma */
1052
1053 return ret; 1171 return ret;
1054} 1172}
1055 1173
@@ -1079,48 +1197,30 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1079 unsigned long max_nl_cursor = 0; 1197 unsigned long max_nl_cursor = 0;
1080 unsigned long max_nl_size = 0; 1198 unsigned long max_nl_size = 0;
1081 unsigned int mapcount; 1199 unsigned int mapcount;
1082 unsigned int mlocked = 0;
1083 int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
1084
1085 if (MLOCK_PAGES && unlikely(unlock))
1086 ret = SWAP_SUCCESS; /* default for try_to_munlock() */
1087 1200
1088 spin_lock(&mapping->i_mmap_lock); 1201 spin_lock(&mapping->i_mmap_lock);
1089 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 1202 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1090 if (MLOCK_PAGES && unlikely(unlock)) { 1203 unsigned long address = vma_address(page, vma);
1091 if (!((vma->vm_flags & VM_LOCKED) && 1204 if (address == -EFAULT)
1092 page_mapped_in_vma(page, vma))) 1205 continue;
1093 continue; /* must visit all vmas */ 1206 ret = try_to_unmap_one(page, vma, address, flags);
1094 ret = SWAP_MLOCK; 1207 if (ret != SWAP_AGAIN || !page_mapped(page))
1095 } else { 1208 goto out;
1096 ret = try_to_unmap_one(page, vma, flags);
1097 if (ret == SWAP_FAIL || !page_mapped(page))
1098 goto out;
1099 }
1100 if (ret == SWAP_MLOCK) {
1101 mlocked = try_to_mlock_page(page, vma);
1102 if (mlocked)
1103 break; /* stop if actually mlocked page */
1104 }
1105 } 1209 }
1106 1210
1107 if (mlocked) 1211 if (list_empty(&mapping->i_mmap_nonlinear))
1108 goto out; 1212 goto out;
1109 1213
1110 if (list_empty(&mapping->i_mmap_nonlinear)) 1214 /*
1215 * We don't bother to try to find the munlocked page in nonlinears.
1216 * It's costly. Instead, later, page reclaim logic may call
1217 * try_to_unmap(TTU_MUNLOCK) and recover PG_mlocked lazily.
1218 */
1219 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1111 goto out; 1220 goto out;
1112 1221
1113 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 1222 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1114 shared.vm_set.list) { 1223 shared.vm_set.list) {
1115 if (MLOCK_PAGES && unlikely(unlock)) {
1116 if (!(vma->vm_flags & VM_LOCKED))
1117 continue; /* must visit all vmas */
1118 ret = SWAP_MLOCK; /* leave mlocked == 0 */
1119 goto out; /* no need to look further */
1120 }
1121 if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
1122 (vma->vm_flags & VM_LOCKED))
1123 continue;
1124 cursor = (unsigned long) vma->vm_private_data; 1224 cursor = (unsigned long) vma->vm_private_data;
1125 if (cursor > max_nl_cursor) 1225 if (cursor > max_nl_cursor)
1126 max_nl_cursor = cursor; 1226 max_nl_cursor = cursor;
@@ -1153,16 +1253,12 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1153 do { 1253 do {
1154 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 1254 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1155 shared.vm_set.list) { 1255 shared.vm_set.list) {
1156 if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
1157 (vma->vm_flags & VM_LOCKED))
1158 continue;
1159 cursor = (unsigned long) vma->vm_private_data; 1256 cursor = (unsigned long) vma->vm_private_data;
1160 while ( cursor < max_nl_cursor && 1257 while ( cursor < max_nl_cursor &&
1161 cursor < vma->vm_end - vma->vm_start) { 1258 cursor < vma->vm_end - vma->vm_start) {
1162 ret = try_to_unmap_cluster(cursor, &mapcount, 1259 if (try_to_unmap_cluster(cursor, &mapcount,
1163 vma, page); 1260 vma, page) == SWAP_MLOCK)
1164 if (ret == SWAP_MLOCK) 1261 ret = SWAP_MLOCK;
1165 mlocked = 2; /* to return below */
1166 cursor += CLUSTER_SIZE; 1262 cursor += CLUSTER_SIZE;
1167 vma->vm_private_data = (void *) cursor; 1263 vma->vm_private_data = (void *) cursor;
1168 if ((int)mapcount <= 0) 1264 if ((int)mapcount <= 0)
@@ -1183,10 +1279,6 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1183 vma->vm_private_data = NULL; 1279 vma->vm_private_data = NULL;
1184out: 1280out:
1185 spin_unlock(&mapping->i_mmap_lock); 1281 spin_unlock(&mapping->i_mmap_lock);
1186 if (mlocked)
1187 ret = SWAP_MLOCK; /* actually mlocked the page */
1188 else if (ret == SWAP_MLOCK)
1189 ret = SWAP_AGAIN; /* saw VM_LOCKED vma */
1190 return ret; 1282 return ret;
1191} 1283}
1192 1284
@@ -1210,7 +1302,9 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
1210 1302
1211 BUG_ON(!PageLocked(page)); 1303 BUG_ON(!PageLocked(page));
1212 1304
1213 if (PageAnon(page)) 1305 if (unlikely(PageKsm(page)))
1306 ret = try_to_unmap_ksm(page, flags);
1307 else if (PageAnon(page))
1214 ret = try_to_unmap_anon(page, flags); 1308 ret = try_to_unmap_anon(page, flags);
1215 else 1309 else
1216 ret = try_to_unmap_file(page, flags); 1310 ret = try_to_unmap_file(page, flags);
@@ -1229,17 +1323,99 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
1229 * 1323 *
1230 * Return values are: 1324 * Return values are:
1231 * 1325 *
1232 * SWAP_SUCCESS - no vma's holding page mlocked. 1326 * SWAP_AGAIN - no vma is holding page mlocked, or,
1233 * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem 1327 * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem
1328 * SWAP_FAIL - page cannot be located at present
1234 * SWAP_MLOCK - page is now mlocked. 1329 * SWAP_MLOCK - page is now mlocked.
1235 */ 1330 */
1236int try_to_munlock(struct page *page) 1331int try_to_munlock(struct page *page)
1237{ 1332{
1238 VM_BUG_ON(!PageLocked(page) || PageLRU(page)); 1333 VM_BUG_ON(!PageLocked(page) || PageLRU(page));
1239 1334
1240 if (PageAnon(page)) 1335 if (unlikely(PageKsm(page)))
1336 return try_to_unmap_ksm(page, TTU_MUNLOCK);
1337 else if (PageAnon(page))
1241 return try_to_unmap_anon(page, TTU_MUNLOCK); 1338 return try_to_unmap_anon(page, TTU_MUNLOCK);
1242 else 1339 else
1243 return try_to_unmap_file(page, TTU_MUNLOCK); 1340 return try_to_unmap_file(page, TTU_MUNLOCK);
1244} 1341}
1245 1342
1343#ifdef CONFIG_MIGRATION
1344/*
1345 * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
1346 * Called by migrate.c to remove migration ptes, but might be used more later.
1347 */
1348static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1349 struct vm_area_struct *, unsigned long, void *), void *arg)
1350{
1351 struct anon_vma *anon_vma;
1352 struct anon_vma_chain *avc;
1353 int ret = SWAP_AGAIN;
1354
1355 /*
1356 * Note: remove_migration_ptes() cannot use page_lock_anon_vma()
1357 * because that depends on page_mapped(); but not all its usages
1358 * are holding mmap_sem, which also gave the necessary guarantee
1359 * (that this anon_vma's slab has not already been destroyed).
1360 * This needs to be reviewed later: avoiding page_lock_anon_vma()
1361 * is risky, and currently limits the usefulness of rmap_walk().
1362 */
1363 anon_vma = page_anon_vma(page);
1364 if (!anon_vma)
1365 return ret;
1366 spin_lock(&anon_vma->lock);
1367 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1368 struct vm_area_struct *vma = avc->vma;
1369 unsigned long address = vma_address(page, vma);
1370 if (address == -EFAULT)
1371 continue;
1372 ret = rmap_one(page, vma, address, arg);
1373 if (ret != SWAP_AGAIN)
1374 break;
1375 }
1376 spin_unlock(&anon_vma->lock);
1377 return ret;
1378}
1379
1380static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
1381 struct vm_area_struct *, unsigned long, void *), void *arg)
1382{
1383 struct address_space *mapping = page->mapping;
1384 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1385 struct vm_area_struct *vma;
1386 struct prio_tree_iter iter;
1387 int ret = SWAP_AGAIN;
1388
1389 if (!mapping)
1390 return ret;
1391 spin_lock(&mapping->i_mmap_lock);
1392 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1393 unsigned long address = vma_address(page, vma);
1394 if (address == -EFAULT)
1395 continue;
1396 ret = rmap_one(page, vma, address, arg);
1397 if (ret != SWAP_AGAIN)
1398 break;
1399 }
1400 /*
1401 * No nonlinear handling: being always shared, nonlinear vmas
1402 * never contain migration ptes. Decide what to do about this
1403 * limitation to linear when we need rmap_walk() on nonlinear.
1404 */
1405 spin_unlock(&mapping->i_mmap_lock);
1406 return ret;
1407}
1408
1409int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
1410 struct vm_area_struct *, unsigned long, void *), void *arg)
1411{
1412 VM_BUG_ON(!PageLocked(page));
1413
1414 if (unlikely(PageKsm(page)))
1415 return rmap_walk_ksm(page, rmap_one, arg);
1416 else if (PageAnon(page))
1417 return rmap_walk_anon(page, rmap_one, arg);
1418 else
1419 return rmap_walk_file(page, rmap_one, arg);
1420}
1421#endif /* CONFIG_MIGRATION */