aboutsummaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c536
1 files changed, 348 insertions, 188 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index dd43373a483f..eaa7a09eb72e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -49,6 +49,7 @@
49#include <linux/swapops.h> 49#include <linux/swapops.h>
50#include <linux/slab.h> 50#include <linux/slab.h>
51#include <linux/init.h> 51#include <linux/init.h>
52#include <linux/ksm.h>
52#include <linux/rmap.h> 53#include <linux/rmap.h>
53#include <linux/rcupdate.h> 54#include <linux/rcupdate.h>
54#include <linux/module.h> 55#include <linux/module.h>
@@ -61,17 +62,28 @@
61#include "internal.h" 62#include "internal.h"
62 63
63static struct kmem_cache *anon_vma_cachep; 64static struct kmem_cache *anon_vma_cachep;
65static struct kmem_cache *anon_vma_chain_cachep;
64 66
65static inline struct anon_vma *anon_vma_alloc(void) 67static inline struct anon_vma *anon_vma_alloc(void)
66{ 68{
67 return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); 69 return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
68} 70}
69 71
70static inline void anon_vma_free(struct anon_vma *anon_vma) 72void anon_vma_free(struct anon_vma *anon_vma)
71{ 73{
72 kmem_cache_free(anon_vma_cachep, anon_vma); 74 kmem_cache_free(anon_vma_cachep, anon_vma);
73} 75}
74 76
77static inline struct anon_vma_chain *anon_vma_chain_alloc(void)
78{
79 return kmem_cache_alloc(anon_vma_chain_cachep, GFP_KERNEL);
80}
81
82void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
83{
84 kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
85}
86
75/** 87/**
76 * anon_vma_prepare - attach an anon_vma to a memory region 88 * anon_vma_prepare - attach an anon_vma to a memory region
77 * @vma: the memory region in question 89 * @vma: the memory region in question
@@ -102,18 +114,23 @@ static inline void anon_vma_free(struct anon_vma *anon_vma)
102int anon_vma_prepare(struct vm_area_struct *vma) 114int anon_vma_prepare(struct vm_area_struct *vma)
103{ 115{
104 struct anon_vma *anon_vma = vma->anon_vma; 116 struct anon_vma *anon_vma = vma->anon_vma;
117 struct anon_vma_chain *avc;
105 118
106 might_sleep(); 119 might_sleep();
107 if (unlikely(!anon_vma)) { 120 if (unlikely(!anon_vma)) {
108 struct mm_struct *mm = vma->vm_mm; 121 struct mm_struct *mm = vma->vm_mm;
109 struct anon_vma *allocated; 122 struct anon_vma *allocated;
110 123
124 avc = anon_vma_chain_alloc();
125 if (!avc)
126 goto out_enomem;
127
111 anon_vma = find_mergeable_anon_vma(vma); 128 anon_vma = find_mergeable_anon_vma(vma);
112 allocated = NULL; 129 allocated = NULL;
113 if (!anon_vma) { 130 if (!anon_vma) {
114 anon_vma = anon_vma_alloc(); 131 anon_vma = anon_vma_alloc();
115 if (unlikely(!anon_vma)) 132 if (unlikely(!anon_vma))
116 return -ENOMEM; 133 goto out_enomem_free_avc;
117 allocated = anon_vma; 134 allocated = anon_vma;
118 } 135 }
119 spin_lock(&anon_vma->lock); 136 spin_lock(&anon_vma->lock);
@@ -122,67 +139,141 @@ int anon_vma_prepare(struct vm_area_struct *vma)
122 spin_lock(&mm->page_table_lock); 139 spin_lock(&mm->page_table_lock);
123 if (likely(!vma->anon_vma)) { 140 if (likely(!vma->anon_vma)) {
124 vma->anon_vma = anon_vma; 141 vma->anon_vma = anon_vma;
125 list_add_tail(&vma->anon_vma_node, &anon_vma->head); 142 avc->anon_vma = anon_vma;
143 avc->vma = vma;
144 list_add(&avc->same_vma, &vma->anon_vma_chain);
145 list_add(&avc->same_anon_vma, &anon_vma->head);
126 allocated = NULL; 146 allocated = NULL;
127 } 147 }
128 spin_unlock(&mm->page_table_lock); 148 spin_unlock(&mm->page_table_lock);
129 149
130 spin_unlock(&anon_vma->lock); 150 spin_unlock(&anon_vma->lock);
131 if (unlikely(allocated)) 151 if (unlikely(allocated)) {
132 anon_vma_free(allocated); 152 anon_vma_free(allocated);
153 anon_vma_chain_free(avc);
154 }
133 } 155 }
134 return 0; 156 return 0;
157
158 out_enomem_free_avc:
159 anon_vma_chain_free(avc);
160 out_enomem:
161 return -ENOMEM;
135} 162}
136 163
137void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next) 164static void anon_vma_chain_link(struct vm_area_struct *vma,
165 struct anon_vma_chain *avc,
166 struct anon_vma *anon_vma)
138{ 167{
139 BUG_ON(vma->anon_vma != next->anon_vma); 168 avc->vma = vma;
140 list_del(&next->anon_vma_node); 169 avc->anon_vma = anon_vma;
170 list_add(&avc->same_vma, &vma->anon_vma_chain);
171
172 spin_lock(&anon_vma->lock);
173 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
174 spin_unlock(&anon_vma->lock);
141} 175}
142 176
143void __anon_vma_link(struct vm_area_struct *vma) 177/*
178 * Attach the anon_vmas from src to dst.
179 * Returns 0 on success, -ENOMEM on failure.
180 */
181int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
144{ 182{
145 struct anon_vma *anon_vma = vma->anon_vma; 183 struct anon_vma_chain *avc, *pavc;
184
185 list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) {
186 avc = anon_vma_chain_alloc();
187 if (!avc)
188 goto enomem_failure;
189 anon_vma_chain_link(dst, avc, pavc->anon_vma);
190 }
191 return 0;
146 192
147 if (anon_vma) 193 enomem_failure:
148 list_add_tail(&vma->anon_vma_node, &anon_vma->head); 194 unlink_anon_vmas(dst);
195 return -ENOMEM;
149} 196}
150 197
151void anon_vma_link(struct vm_area_struct *vma) 198/*
199 * Attach vma to its own anon_vma, as well as to the anon_vmas that
200 * the corresponding VMA in the parent process is attached to.
201 * Returns 0 on success, non-zero on failure.
202 */
203int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
152{ 204{
153 struct anon_vma *anon_vma = vma->anon_vma; 205 struct anon_vma_chain *avc;
206 struct anon_vma *anon_vma;
154 207
155 if (anon_vma) { 208 /* Don't bother if the parent process has no anon_vma here. */
156 spin_lock(&anon_vma->lock); 209 if (!pvma->anon_vma)
157 list_add_tail(&vma->anon_vma_node, &anon_vma->head); 210 return 0;
158 spin_unlock(&anon_vma->lock); 211
159 } 212 /*
213 * First, attach the new VMA to the parent VMA's anon_vmas,
214 * so rmap can find non-COWed pages in child processes.
215 */
216 if (anon_vma_clone(vma, pvma))
217 return -ENOMEM;
218
219 /* Then add our own anon_vma. */
220 anon_vma = anon_vma_alloc();
221 if (!anon_vma)
222 goto out_error;
223 avc = anon_vma_chain_alloc();
224 if (!avc)
225 goto out_error_free_anon_vma;
226 anon_vma_chain_link(vma, avc, anon_vma);
227 /* Mark this anon_vma as the one where our new (COWed) pages go. */
228 vma->anon_vma = anon_vma;
229
230 return 0;
231
232 out_error_free_anon_vma:
233 anon_vma_free(anon_vma);
234 out_error:
235 unlink_anon_vmas(vma);
236 return -ENOMEM;
160} 237}
161 238
162void anon_vma_unlink(struct vm_area_struct *vma) 239static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
163{ 240{
164 struct anon_vma *anon_vma = vma->anon_vma; 241 struct anon_vma *anon_vma = anon_vma_chain->anon_vma;
165 int empty; 242 int empty;
166 243
244 /* If anon_vma_fork fails, we can get an empty anon_vma_chain. */
167 if (!anon_vma) 245 if (!anon_vma)
168 return; 246 return;
169 247
170 spin_lock(&anon_vma->lock); 248 spin_lock(&anon_vma->lock);
171 list_del(&vma->anon_vma_node); 249 list_del(&anon_vma_chain->same_anon_vma);
172 250
173 /* We must garbage collect the anon_vma if it's empty */ 251 /* We must garbage collect the anon_vma if it's empty */
174 empty = list_empty(&anon_vma->head); 252 empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma);
175 spin_unlock(&anon_vma->lock); 253 spin_unlock(&anon_vma->lock);
176 254
177 if (empty) 255 if (empty)
178 anon_vma_free(anon_vma); 256 anon_vma_free(anon_vma);
179} 257}
180 258
259void unlink_anon_vmas(struct vm_area_struct *vma)
260{
261 struct anon_vma_chain *avc, *next;
262
263 /* Unlink each anon_vma chained to the VMA. */
264 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
265 anon_vma_unlink(avc);
266 list_del(&avc->same_vma);
267 anon_vma_chain_free(avc);
268 }
269}
270
181static void anon_vma_ctor(void *data) 271static void anon_vma_ctor(void *data)
182{ 272{
183 struct anon_vma *anon_vma = data; 273 struct anon_vma *anon_vma = data;
184 274
185 spin_lock_init(&anon_vma->lock); 275 spin_lock_init(&anon_vma->lock);
276 ksm_refcount_init(anon_vma);
186 INIT_LIST_HEAD(&anon_vma->head); 277 INIT_LIST_HEAD(&anon_vma->head);
187} 278}
188 279
@@ -190,6 +281,7 @@ void __init anon_vma_init(void)
190{ 281{
191 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), 282 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
192 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor); 283 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
284 anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
193} 285}
194 286
195/* 287/*
@@ -202,8 +294,8 @@ struct anon_vma *page_lock_anon_vma(struct page *page)
202 unsigned long anon_mapping; 294 unsigned long anon_mapping;
203 295
204 rcu_read_lock(); 296 rcu_read_lock();
205 anon_mapping = (unsigned long) page->mapping; 297 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
206 if (!(anon_mapping & PAGE_MAPPING_ANON)) 298 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
207 goto out; 299 goto out;
208 if (!page_mapped(page)) 300 if (!page_mapped(page))
209 goto out; 301 goto out;
@@ -248,8 +340,7 @@ vma_address(struct page *page, struct vm_area_struct *vma)
248unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) 340unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
249{ 341{
250 if (PageAnon(page)) { 342 if (PageAnon(page)) {
251 if ((void *)vma->anon_vma != 343 if (vma->anon_vma != page_anon_vma(page))
252 (void *)page->mapping - PAGE_MAPPING_ANON)
253 return -EFAULT; 344 return -EFAULT;
254 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { 345 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
255 if (!vma->vm_file || 346 if (!vma->vm_file ||
@@ -337,21 +428,15 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
337 * Subfunctions of page_referenced: page_referenced_one called 428 * Subfunctions of page_referenced: page_referenced_one called
338 * repeatedly from either page_referenced_anon or page_referenced_file. 429 * repeatedly from either page_referenced_anon or page_referenced_file.
339 */ 430 */
340static int page_referenced_one(struct page *page, 431int page_referenced_one(struct page *page, struct vm_area_struct *vma,
341 struct vm_area_struct *vma, 432 unsigned long address, unsigned int *mapcount,
342 unsigned int *mapcount, 433 unsigned long *vm_flags)
343 unsigned long *vm_flags)
344{ 434{
345 struct mm_struct *mm = vma->vm_mm; 435 struct mm_struct *mm = vma->vm_mm;
346 unsigned long address;
347 pte_t *pte; 436 pte_t *pte;
348 spinlock_t *ptl; 437 spinlock_t *ptl;
349 int referenced = 0; 438 int referenced = 0;
350 439
351 address = vma_address(page, vma);
352 if (address == -EFAULT)
353 goto out;
354
355 pte = page_check_address(page, mm, address, &ptl, 0); 440 pte = page_check_address(page, mm, address, &ptl, 0);
356 if (!pte) 441 if (!pte)
357 goto out; 442 goto out;
@@ -388,9 +473,10 @@ static int page_referenced_one(struct page *page,
388out_unmap: 473out_unmap:
389 (*mapcount)--; 474 (*mapcount)--;
390 pte_unmap_unlock(pte, ptl); 475 pte_unmap_unlock(pte, ptl);
391out: 476
392 if (referenced) 477 if (referenced)
393 *vm_flags |= vma->vm_flags; 478 *vm_flags |= vma->vm_flags;
479out:
394 return referenced; 480 return referenced;
395} 481}
396 482
@@ -400,7 +486,7 @@ static int page_referenced_anon(struct page *page,
400{ 486{
401 unsigned int mapcount; 487 unsigned int mapcount;
402 struct anon_vma *anon_vma; 488 struct anon_vma *anon_vma;
403 struct vm_area_struct *vma; 489 struct anon_vma_chain *avc;
404 int referenced = 0; 490 int referenced = 0;
405 491
406 anon_vma = page_lock_anon_vma(page); 492 anon_vma = page_lock_anon_vma(page);
@@ -408,7 +494,11 @@ static int page_referenced_anon(struct page *page,
408 return referenced; 494 return referenced;
409 495
410 mapcount = page_mapcount(page); 496 mapcount = page_mapcount(page);
411 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 497 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
498 struct vm_area_struct *vma = avc->vma;
499 unsigned long address = vma_address(page, vma);
500 if (address == -EFAULT)
501 continue;
412 /* 502 /*
413 * If we are reclaiming on behalf of a cgroup, skip 503 * If we are reclaiming on behalf of a cgroup, skip
414 * counting on behalf of references from different 504 * counting on behalf of references from different
@@ -416,7 +506,7 @@ static int page_referenced_anon(struct page *page,
416 */ 506 */
417 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) 507 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
418 continue; 508 continue;
419 referenced += page_referenced_one(page, vma, 509 referenced += page_referenced_one(page, vma, address,
420 &mapcount, vm_flags); 510 &mapcount, vm_flags);
421 if (!mapcount) 511 if (!mapcount)
422 break; 512 break;
@@ -474,6 +564,9 @@ static int page_referenced_file(struct page *page,
474 mapcount = page_mapcount(page); 564 mapcount = page_mapcount(page);
475 565
476 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 566 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
567 unsigned long address = vma_address(page, vma);
568 if (address == -EFAULT)
569 continue;
477 /* 570 /*
478 * If we are reclaiming on behalf of a cgroup, skip 571 * If we are reclaiming on behalf of a cgroup, skip
479 * counting on behalf of references from different 572 * counting on behalf of references from different
@@ -481,7 +574,7 @@ static int page_referenced_file(struct page *page,
481 */ 574 */
482 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) 575 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
483 continue; 576 continue;
484 referenced += page_referenced_one(page, vma, 577 referenced += page_referenced_one(page, vma, address,
485 &mapcount, vm_flags); 578 &mapcount, vm_flags);
486 if (!mapcount) 579 if (!mapcount)
487 break; 580 break;
@@ -507,46 +600,44 @@ int page_referenced(struct page *page,
507 unsigned long *vm_flags) 600 unsigned long *vm_flags)
508{ 601{
509 int referenced = 0; 602 int referenced = 0;
510 603 int we_locked = 0;
511 if (TestClearPageReferenced(page))
512 referenced++;
513 604
514 *vm_flags = 0; 605 *vm_flags = 0;
515 if (page_mapped(page) && page->mapping) { 606 if (page_mapped(page) && page_rmapping(page)) {
516 if (PageAnon(page)) 607 if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
608 we_locked = trylock_page(page);
609 if (!we_locked) {
610 referenced++;
611 goto out;
612 }
613 }
614 if (unlikely(PageKsm(page)))
615 referenced += page_referenced_ksm(page, mem_cont,
616 vm_flags);
617 else if (PageAnon(page))
517 referenced += page_referenced_anon(page, mem_cont, 618 referenced += page_referenced_anon(page, mem_cont,
518 vm_flags); 619 vm_flags);
519 else if (is_locked) 620 else if (page->mapping)
520 referenced += page_referenced_file(page, mem_cont, 621 referenced += page_referenced_file(page, mem_cont,
521 vm_flags); 622 vm_flags);
522 else if (!trylock_page(page)) 623 if (we_locked)
523 referenced++;
524 else {
525 if (page->mapping)
526 referenced += page_referenced_file(page,
527 mem_cont, vm_flags);
528 unlock_page(page); 624 unlock_page(page);
529 }
530 } 625 }
531 626out:
532 if (page_test_and_clear_young(page)) 627 if (page_test_and_clear_young(page))
533 referenced++; 628 referenced++;
534 629
535 return referenced; 630 return referenced;
536} 631}
537 632
538static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) 633static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
634 unsigned long address)
539{ 635{
540 struct mm_struct *mm = vma->vm_mm; 636 struct mm_struct *mm = vma->vm_mm;
541 unsigned long address;
542 pte_t *pte; 637 pte_t *pte;
543 spinlock_t *ptl; 638 spinlock_t *ptl;
544 int ret = 0; 639 int ret = 0;
545 640
546 address = vma_address(page, vma);
547 if (address == -EFAULT)
548 goto out;
549
550 pte = page_check_address(page, mm, address, &ptl, 1); 641 pte = page_check_address(page, mm, address, &ptl, 1);
551 if (!pte) 642 if (!pte)
552 goto out; 643 goto out;
@@ -578,8 +669,12 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page)
578 669
579 spin_lock(&mapping->i_mmap_lock); 670 spin_lock(&mapping->i_mmap_lock);
580 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 671 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
581 if (vma->vm_flags & VM_SHARED) 672 if (vma->vm_flags & VM_SHARED) {
582 ret += page_mkclean_one(page, vma); 673 unsigned long address = vma_address(page, vma);
674 if (address == -EFAULT)
675 continue;
676 ret += page_mkclean_one(page, vma, address);
677 }
583 } 678 }
584 spin_unlock(&mapping->i_mmap_lock); 679 spin_unlock(&mapping->i_mmap_lock);
585 return ret; 680 return ret;
@@ -607,6 +702,30 @@ int page_mkclean(struct page *page)
607EXPORT_SYMBOL_GPL(page_mkclean); 702EXPORT_SYMBOL_GPL(page_mkclean);
608 703
609/** 704/**
705 * page_move_anon_rmap - move a page to our anon_vma
706 * @page: the page to move to our anon_vma
707 * @vma: the vma the page belongs to
708 * @address: the user virtual address mapped
709 *
710 * When a page belongs exclusively to one process after a COW event,
711 * that page can be moved into the anon_vma that belongs to just that
712 * process, so the rmap code will not search the parent or sibling
713 * processes.
714 */
715void page_move_anon_rmap(struct page *page,
716 struct vm_area_struct *vma, unsigned long address)
717{
718 struct anon_vma *anon_vma = vma->anon_vma;
719
720 VM_BUG_ON(!PageLocked(page));
721 VM_BUG_ON(!anon_vma);
722 VM_BUG_ON(page->index != linear_page_index(vma, address));
723
724 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
725 page->mapping = (struct address_space *) anon_vma;
726}
727
728/**
610 * __page_set_anon_rmap - setup new anonymous rmap 729 * __page_set_anon_rmap - setup new anonymous rmap
611 * @page: the page to add the mapping to 730 * @page: the page to add the mapping to
612 * @vma: the vm area in which the mapping is added 731 * @vma: the vm area in which the mapping is added
@@ -620,14 +739,7 @@ static void __page_set_anon_rmap(struct page *page,
620 BUG_ON(!anon_vma); 739 BUG_ON(!anon_vma);
621 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; 740 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
622 page->mapping = (struct address_space *) anon_vma; 741 page->mapping = (struct address_space *) anon_vma;
623
624 page->index = linear_page_index(vma, address); 742 page->index = linear_page_index(vma, address);
625
626 /*
627 * nr_mapped state can be updated without turning off
628 * interrupts because it is not modified via interrupt.
629 */
630 __inc_zone_page_state(page, NR_ANON_PAGES);
631} 743}
632 744
633/** 745/**
@@ -652,9 +764,6 @@ static void __page_check_anon_rmap(struct page *page,
652 * are initially only visible via the pagetables, and the pte is locked 764 * are initially only visible via the pagetables, and the pte is locked
653 * over the call to page_add_new_anon_rmap. 765 * over the call to page_add_new_anon_rmap.
654 */ 766 */
655 struct anon_vma *anon_vma = vma->anon_vma;
656 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
657 BUG_ON(page->mapping != (struct address_space *)anon_vma);
658 BUG_ON(page->index != linear_page_index(vma, address)); 767 BUG_ON(page->index != linear_page_index(vma, address));
659#endif 768#endif
660} 769}
@@ -665,14 +774,23 @@ static void __page_check_anon_rmap(struct page *page,
665 * @vma: the vm area in which the mapping is added 774 * @vma: the vm area in which the mapping is added
666 * @address: the user virtual address mapped 775 * @address: the user virtual address mapped
667 * 776 *
668 * The caller needs to hold the pte lock and the page must be locked. 777 * The caller needs to hold the pte lock, and the page must be locked in
778 * the anon_vma case: to serialize mapping,index checking after setting,
779 * and to ensure that PageAnon is not being upgraded racily to PageKsm
780 * (but PageKsm is never downgraded to PageAnon).
669 */ 781 */
670void page_add_anon_rmap(struct page *page, 782void page_add_anon_rmap(struct page *page,
671 struct vm_area_struct *vma, unsigned long address) 783 struct vm_area_struct *vma, unsigned long address)
672{ 784{
785 int first = atomic_inc_and_test(&page->_mapcount);
786 if (first)
787 __inc_zone_page_state(page, NR_ANON_PAGES);
788 if (unlikely(PageKsm(page)))
789 return;
790
673 VM_BUG_ON(!PageLocked(page)); 791 VM_BUG_ON(!PageLocked(page));
674 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); 792 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
675 if (atomic_inc_and_test(&page->_mapcount)) 793 if (first)
676 __page_set_anon_rmap(page, vma, address); 794 __page_set_anon_rmap(page, vma, address);
677 else 795 else
678 __page_check_anon_rmap(page, vma, address); 796 __page_check_anon_rmap(page, vma, address);
@@ -694,6 +812,7 @@ void page_add_new_anon_rmap(struct page *page,
694 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); 812 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
695 SetPageSwapBacked(page); 813 SetPageSwapBacked(page);
696 atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ 814 atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
815 __inc_zone_page_state(page, NR_ANON_PAGES);
697 __page_set_anon_rmap(page, vma, address); 816 __page_set_anon_rmap(page, vma, address);
698 if (page_evictable(page, vma)) 817 if (page_evictable(page, vma))
699 lru_cache_add_lru(page, LRU_ACTIVE_ANON); 818 lru_cache_add_lru(page, LRU_ACTIVE_ANON);
@@ -711,7 +830,7 @@ void page_add_file_rmap(struct page *page)
711{ 830{
712 if (atomic_inc_and_test(&page->_mapcount)) { 831 if (atomic_inc_and_test(&page->_mapcount)) {
713 __inc_zone_page_state(page, NR_FILE_MAPPED); 832 __inc_zone_page_state(page, NR_FILE_MAPPED);
714 mem_cgroup_update_mapped_file_stat(page, 1); 833 mem_cgroup_update_file_mapped(page, 1);
715 } 834 }
716} 835}
717 836
@@ -743,8 +862,8 @@ void page_remove_rmap(struct page *page)
743 __dec_zone_page_state(page, NR_ANON_PAGES); 862 __dec_zone_page_state(page, NR_ANON_PAGES);
744 } else { 863 } else {
745 __dec_zone_page_state(page, NR_FILE_MAPPED); 864 __dec_zone_page_state(page, NR_FILE_MAPPED);
865 mem_cgroup_update_file_mapped(page, -1);
746 } 866 }
747 mem_cgroup_update_mapped_file_stat(page, -1);
748 /* 867 /*
749 * It would be tidy to reset the PageAnon mapping here, 868 * It would be tidy to reset the PageAnon mapping here,
750 * but that might overwrite a racing page_add_anon_rmap 869 * but that might overwrite a racing page_add_anon_rmap
@@ -760,20 +879,15 @@ void page_remove_rmap(struct page *page)
760 * Subfunctions of try_to_unmap: try_to_unmap_one called 879 * Subfunctions of try_to_unmap: try_to_unmap_one called
761 * repeatedly from either try_to_unmap_anon or try_to_unmap_file. 880 * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
762 */ 881 */
763static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, 882int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
764 enum ttu_flags flags) 883 unsigned long address, enum ttu_flags flags)
765{ 884{
766 struct mm_struct *mm = vma->vm_mm; 885 struct mm_struct *mm = vma->vm_mm;
767 unsigned long address;
768 pte_t *pte; 886 pte_t *pte;
769 pte_t pteval; 887 pte_t pteval;
770 spinlock_t *ptl; 888 spinlock_t *ptl;
771 int ret = SWAP_AGAIN; 889 int ret = SWAP_AGAIN;
772 890
773 address = vma_address(page, vma);
774 if (address == -EFAULT)
775 goto out;
776
777 pte = page_check_address(page, mm, address, &ptl, 0); 891 pte = page_check_address(page, mm, address, &ptl, 0);
778 if (!pte) 892 if (!pte)
779 goto out; 893 goto out;
@@ -784,10 +898,11 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
784 * skipped over this mm) then we should reactivate it. 898 * skipped over this mm) then we should reactivate it.
785 */ 899 */
786 if (!(flags & TTU_IGNORE_MLOCK)) { 900 if (!(flags & TTU_IGNORE_MLOCK)) {
787 if (vma->vm_flags & VM_LOCKED) { 901 if (vma->vm_flags & VM_LOCKED)
788 ret = SWAP_MLOCK; 902 goto out_mlock;
903
904 if (TTU_ACTION(flags) == TTU_MUNLOCK)
789 goto out_unmap; 905 goto out_unmap;
790 }
791 } 906 }
792 if (!(flags & TTU_IGNORE_ACCESS)) { 907 if (!(flags & TTU_IGNORE_ACCESS)) {
793 if (ptep_clear_flush_young_notify(vma, address, pte)) { 908 if (ptep_clear_flush_young_notify(vma, address, pte)) {
@@ -809,9 +924,9 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
809 924
810 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { 925 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
811 if (PageAnon(page)) 926 if (PageAnon(page))
812 dec_mm_counter(mm, anon_rss); 927 dec_mm_counter(mm, MM_ANONPAGES);
813 else 928 else
814 dec_mm_counter(mm, file_rss); 929 dec_mm_counter(mm, MM_FILEPAGES);
815 set_pte_at(mm, address, pte, 930 set_pte_at(mm, address, pte,
816 swp_entry_to_pte(make_hwpoison_entry(page))); 931 swp_entry_to_pte(make_hwpoison_entry(page)));
817 } else if (PageAnon(page)) { 932 } else if (PageAnon(page)) {
@@ -822,14 +937,19 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
822 * Store the swap location in the pte. 937 * Store the swap location in the pte.
823 * See handle_pte_fault() ... 938 * See handle_pte_fault() ...
824 */ 939 */
825 swap_duplicate(entry); 940 if (swap_duplicate(entry) < 0) {
941 set_pte_at(mm, address, pte, pteval);
942 ret = SWAP_FAIL;
943 goto out_unmap;
944 }
826 if (list_empty(&mm->mmlist)) { 945 if (list_empty(&mm->mmlist)) {
827 spin_lock(&mmlist_lock); 946 spin_lock(&mmlist_lock);
828 if (list_empty(&mm->mmlist)) 947 if (list_empty(&mm->mmlist))
829 list_add(&mm->mmlist, &init_mm.mmlist); 948 list_add(&mm->mmlist, &init_mm.mmlist);
830 spin_unlock(&mmlist_lock); 949 spin_unlock(&mmlist_lock);
831 } 950 }
832 dec_mm_counter(mm, anon_rss); 951 dec_mm_counter(mm, MM_ANONPAGES);
952 inc_mm_counter(mm, MM_SWAPENTS);
833 } else if (PAGE_MIGRATION) { 953 } else if (PAGE_MIGRATION) {
834 /* 954 /*
835 * Store the pfn of the page in a special migration 955 * Store the pfn of the page in a special migration
@@ -847,8 +967,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
847 entry = make_migration_entry(page, pte_write(pteval)); 967 entry = make_migration_entry(page, pte_write(pteval));
848 set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); 968 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
849 } else 969 } else
850 dec_mm_counter(mm, file_rss); 970 dec_mm_counter(mm, MM_FILEPAGES);
851
852 971
853 page_remove_rmap(page); 972 page_remove_rmap(page);
854 page_cache_release(page); 973 page_cache_release(page);
@@ -857,6 +976,27 @@ out_unmap:
857 pte_unmap_unlock(pte, ptl); 976 pte_unmap_unlock(pte, ptl);
858out: 977out:
859 return ret; 978 return ret;
979
980out_mlock:
981 pte_unmap_unlock(pte, ptl);
982
983
984 /*
985 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
986 * unstable result and race. Plus, We can't wait here because
987 * we now hold anon_vma->lock or mapping->i_mmap_lock.
988 * if trylock failed, the page remain in evictable lru and later
989 * vmscan could retry to move the page to unevictable lru if the
990 * page is actually mlocked.
991 */
992 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
993 if (vma->vm_flags & VM_LOCKED) {
994 mlock_vma_page(page);
995 ret = SWAP_MLOCK;
996 }
997 up_read(&vma->vm_mm->mmap_sem);
998 }
999 return ret;
860} 1000}
861 1001
862/* 1002/*
@@ -922,11 +1062,10 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
922 return ret; 1062 return ret;
923 1063
924 /* 1064 /*
925 * MLOCK_PAGES => feature is configured. 1065 * If we can acquire the mmap_sem for read, and vma is VM_LOCKED,
926 * if we can acquire the mmap_sem for read, and vma is VM_LOCKED,
927 * keep the sem while scanning the cluster for mlocking pages. 1066 * keep the sem while scanning the cluster for mlocking pages.
928 */ 1067 */
929 if (MLOCK_PAGES && down_read_trylock(&vma->vm_mm->mmap_sem)) { 1068 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
930 locked_vma = (vma->vm_flags & VM_LOCKED); 1069 locked_vma = (vma->vm_flags & VM_LOCKED);
931 if (!locked_vma) 1070 if (!locked_vma)
932 up_read(&vma->vm_mm->mmap_sem); /* don't need it */ 1071 up_read(&vma->vm_mm->mmap_sem); /* don't need it */
@@ -967,7 +1106,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
967 1106
968 page_remove_rmap(page); 1107 page_remove_rmap(page);
969 page_cache_release(page); 1108 page_cache_release(page);
970 dec_mm_counter(mm, file_rss); 1109 dec_mm_counter(mm, MM_FILEPAGES);
971 (*mapcount)--; 1110 (*mapcount)--;
972 } 1111 }
973 pte_unmap_unlock(pte - 1, ptl); 1112 pte_unmap_unlock(pte - 1, ptl);
@@ -976,29 +1115,11 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
976 return ret; 1115 return ret;
977} 1116}
978 1117
979/*
980 * common handling for pages mapped in VM_LOCKED vmas
981 */
982static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
983{
984 int mlocked = 0;
985
986 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
987 if (vma->vm_flags & VM_LOCKED) {
988 mlock_vma_page(page);
989 mlocked++; /* really mlocked the page */
990 }
991 up_read(&vma->vm_mm->mmap_sem);
992 }
993 return mlocked;
994}
995
996/** 1118/**
997 * try_to_unmap_anon - unmap or unlock anonymous page using the object-based 1119 * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
998 * rmap method 1120 * rmap method
999 * @page: the page to unmap/unlock 1121 * @page: the page to unmap/unlock
1000 * @unlock: request for unlock rather than unmap [unlikely] 1122 * @flags: action and flags
1001 * @migration: unmapping for migration - ignored if @unlock
1002 * 1123 *
1003 * Find all the mappings of a page using the mapping pointer and the vma chains 1124 * Find all the mappings of a page using the mapping pointer and the vma chains
1004 * contained in the anon_vma struct it points to. 1125 * contained in the anon_vma struct it points to.
@@ -1013,43 +1134,24 @@ static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
1013static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) 1134static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1014{ 1135{
1015 struct anon_vma *anon_vma; 1136 struct anon_vma *anon_vma;
1016 struct vm_area_struct *vma; 1137 struct anon_vma_chain *avc;
1017 unsigned int mlocked = 0;
1018 int ret = SWAP_AGAIN; 1138 int ret = SWAP_AGAIN;
1019 int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
1020
1021 if (MLOCK_PAGES && unlikely(unlock))
1022 ret = SWAP_SUCCESS; /* default for try_to_munlock() */
1023 1139
1024 anon_vma = page_lock_anon_vma(page); 1140 anon_vma = page_lock_anon_vma(page);
1025 if (!anon_vma) 1141 if (!anon_vma)
1026 return ret; 1142 return ret;
1027 1143
1028 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 1144 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1029 if (MLOCK_PAGES && unlikely(unlock)) { 1145 struct vm_area_struct *vma = avc->vma;
1030 if (!((vma->vm_flags & VM_LOCKED) && 1146 unsigned long address = vma_address(page, vma);
1031 page_mapped_in_vma(page, vma))) 1147 if (address == -EFAULT)
1032 continue; /* must visit all unlocked vmas */ 1148 continue;
1033 ret = SWAP_MLOCK; /* saw at least one mlocked vma */ 1149 ret = try_to_unmap_one(page, vma, address, flags);
1034 } else { 1150 if (ret != SWAP_AGAIN || !page_mapped(page))
1035 ret = try_to_unmap_one(page, vma, flags); 1151 break;
1036 if (ret == SWAP_FAIL || !page_mapped(page))
1037 break;
1038 }
1039 if (ret == SWAP_MLOCK) {
1040 mlocked = try_to_mlock_page(page, vma);
1041 if (mlocked)
1042 break; /* stop if actually mlocked page */
1043 }
1044 } 1152 }
1045 1153
1046 page_unlock_anon_vma(anon_vma); 1154 page_unlock_anon_vma(anon_vma);
1047
1048 if (mlocked)
1049 ret = SWAP_MLOCK; /* actually mlocked the page */
1050 else if (ret == SWAP_MLOCK)
1051 ret = SWAP_AGAIN; /* saw VM_LOCKED vma */
1052
1053 return ret; 1155 return ret;
1054} 1156}
1055 1157
@@ -1079,48 +1181,30 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1079 unsigned long max_nl_cursor = 0; 1181 unsigned long max_nl_cursor = 0;
1080 unsigned long max_nl_size = 0; 1182 unsigned long max_nl_size = 0;
1081 unsigned int mapcount; 1183 unsigned int mapcount;
1082 unsigned int mlocked = 0;
1083 int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
1084
1085 if (MLOCK_PAGES && unlikely(unlock))
1086 ret = SWAP_SUCCESS; /* default for try_to_munlock() */
1087 1184
1088 spin_lock(&mapping->i_mmap_lock); 1185 spin_lock(&mapping->i_mmap_lock);
1089 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 1186 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1090 if (MLOCK_PAGES && unlikely(unlock)) { 1187 unsigned long address = vma_address(page, vma);
1091 if (!((vma->vm_flags & VM_LOCKED) && 1188 if (address == -EFAULT)
1092 page_mapped_in_vma(page, vma))) 1189 continue;
1093 continue; /* must visit all vmas */ 1190 ret = try_to_unmap_one(page, vma, address, flags);
1094 ret = SWAP_MLOCK; 1191 if (ret != SWAP_AGAIN || !page_mapped(page))
1095 } else { 1192 goto out;
1096 ret = try_to_unmap_one(page, vma, flags);
1097 if (ret == SWAP_FAIL || !page_mapped(page))
1098 goto out;
1099 }
1100 if (ret == SWAP_MLOCK) {
1101 mlocked = try_to_mlock_page(page, vma);
1102 if (mlocked)
1103 break; /* stop if actually mlocked page */
1104 }
1105 } 1193 }
1106 1194
1107 if (mlocked) 1195 if (list_empty(&mapping->i_mmap_nonlinear))
1108 goto out; 1196 goto out;
1109 1197
1110 if (list_empty(&mapping->i_mmap_nonlinear)) 1198 /*
1199 * We don't bother to try to find the munlocked page in nonlinears.
1200 * It's costly. Instead, later, page reclaim logic may call
1201 * try_to_unmap(TTU_MUNLOCK) and recover PG_mlocked lazily.
1202 */
1203 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1111 goto out; 1204 goto out;
1112 1205
1113 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 1206 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1114 shared.vm_set.list) { 1207 shared.vm_set.list) {
1115 if (MLOCK_PAGES && unlikely(unlock)) {
1116 if (!(vma->vm_flags & VM_LOCKED))
1117 continue; /* must visit all vmas */
1118 ret = SWAP_MLOCK; /* leave mlocked == 0 */
1119 goto out; /* no need to look further */
1120 }
1121 if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
1122 (vma->vm_flags & VM_LOCKED))
1123 continue;
1124 cursor = (unsigned long) vma->vm_private_data; 1208 cursor = (unsigned long) vma->vm_private_data;
1125 if (cursor > max_nl_cursor) 1209 if (cursor > max_nl_cursor)
1126 max_nl_cursor = cursor; 1210 max_nl_cursor = cursor;
@@ -1153,16 +1237,12 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1153 do { 1237 do {
1154 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 1238 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1155 shared.vm_set.list) { 1239 shared.vm_set.list) {
1156 if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
1157 (vma->vm_flags & VM_LOCKED))
1158 continue;
1159 cursor = (unsigned long) vma->vm_private_data; 1240 cursor = (unsigned long) vma->vm_private_data;
1160 while ( cursor < max_nl_cursor && 1241 while ( cursor < max_nl_cursor &&
1161 cursor < vma->vm_end - vma->vm_start) { 1242 cursor < vma->vm_end - vma->vm_start) {
1162 ret = try_to_unmap_cluster(cursor, &mapcount, 1243 if (try_to_unmap_cluster(cursor, &mapcount,
1163 vma, page); 1244 vma, page) == SWAP_MLOCK)
1164 if (ret == SWAP_MLOCK) 1245 ret = SWAP_MLOCK;
1165 mlocked = 2; /* to return below */
1166 cursor += CLUSTER_SIZE; 1246 cursor += CLUSTER_SIZE;
1167 vma->vm_private_data = (void *) cursor; 1247 vma->vm_private_data = (void *) cursor;
1168 if ((int)mapcount <= 0) 1248 if ((int)mapcount <= 0)
@@ -1183,10 +1263,6 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1183 vma->vm_private_data = NULL; 1263 vma->vm_private_data = NULL;
1184out: 1264out:
1185 spin_unlock(&mapping->i_mmap_lock); 1265 spin_unlock(&mapping->i_mmap_lock);
1186 if (mlocked)
1187 ret = SWAP_MLOCK; /* actually mlocked the page */
1188 else if (ret == SWAP_MLOCK)
1189 ret = SWAP_AGAIN; /* saw VM_LOCKED vma */
1190 return ret; 1266 return ret;
1191} 1267}
1192 1268
@@ -1210,7 +1286,9 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
1210 1286
1211 BUG_ON(!PageLocked(page)); 1287 BUG_ON(!PageLocked(page));
1212 1288
1213 if (PageAnon(page)) 1289 if (unlikely(PageKsm(page)))
1290 ret = try_to_unmap_ksm(page, flags);
1291 else if (PageAnon(page))
1214 ret = try_to_unmap_anon(page, flags); 1292 ret = try_to_unmap_anon(page, flags);
1215 else 1293 else
1216 ret = try_to_unmap_file(page, flags); 1294 ret = try_to_unmap_file(page, flags);
@@ -1229,17 +1307,99 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
1229 * 1307 *
1230 * Return values are: 1308 * Return values are:
1231 * 1309 *
1232 * SWAP_SUCCESS - no vma's holding page mlocked. 1310 * SWAP_AGAIN - no vma is holding page mlocked, or,
1233 * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem 1311 * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem
1312 * SWAP_FAIL - page cannot be located at present
1234 * SWAP_MLOCK - page is now mlocked. 1313 * SWAP_MLOCK - page is now mlocked.
1235 */ 1314 */
1236int try_to_munlock(struct page *page) 1315int try_to_munlock(struct page *page)
1237{ 1316{
1238 VM_BUG_ON(!PageLocked(page) || PageLRU(page)); 1317 VM_BUG_ON(!PageLocked(page) || PageLRU(page));
1239 1318
1240 if (PageAnon(page)) 1319 if (unlikely(PageKsm(page)))
1320 return try_to_unmap_ksm(page, TTU_MUNLOCK);
1321 else if (PageAnon(page))
1241 return try_to_unmap_anon(page, TTU_MUNLOCK); 1322 return try_to_unmap_anon(page, TTU_MUNLOCK);
1242 else 1323 else
1243 return try_to_unmap_file(page, TTU_MUNLOCK); 1324 return try_to_unmap_file(page, TTU_MUNLOCK);
1244} 1325}
1245 1326
1327#ifdef CONFIG_MIGRATION
1328/*
1329 * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
1330 * Called by migrate.c to remove migration ptes, but might be used more later.
1331 */
1332static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1333 struct vm_area_struct *, unsigned long, void *), void *arg)
1334{
1335 struct anon_vma *anon_vma;
1336 struct anon_vma_chain *avc;
1337 int ret = SWAP_AGAIN;
1338
1339 /*
1340 * Note: remove_migration_ptes() cannot use page_lock_anon_vma()
1341 * because that depends on page_mapped(); but not all its usages
1342 * are holding mmap_sem, which also gave the necessary guarantee
1343 * (that this anon_vma's slab has not already been destroyed).
1344 * This needs to be reviewed later: avoiding page_lock_anon_vma()
1345 * is risky, and currently limits the usefulness of rmap_walk().
1346 */
1347 anon_vma = page_anon_vma(page);
1348 if (!anon_vma)
1349 return ret;
1350 spin_lock(&anon_vma->lock);
1351 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1352 struct vm_area_struct *vma = avc->vma;
1353 unsigned long address = vma_address(page, vma);
1354 if (address == -EFAULT)
1355 continue;
1356 ret = rmap_one(page, vma, address, arg);
1357 if (ret != SWAP_AGAIN)
1358 break;
1359 }
1360 spin_unlock(&anon_vma->lock);
1361 return ret;
1362}
1363
1364static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
1365 struct vm_area_struct *, unsigned long, void *), void *arg)
1366{
1367 struct address_space *mapping = page->mapping;
1368 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1369 struct vm_area_struct *vma;
1370 struct prio_tree_iter iter;
1371 int ret = SWAP_AGAIN;
1372
1373 if (!mapping)
1374 return ret;
1375 spin_lock(&mapping->i_mmap_lock);
1376 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1377 unsigned long address = vma_address(page, vma);
1378 if (address == -EFAULT)
1379 continue;
1380 ret = rmap_one(page, vma, address, arg);
1381 if (ret != SWAP_AGAIN)
1382 break;
1383 }
1384 /*
1385 * No nonlinear handling: being always shared, nonlinear vmas
1386 * never contain migration ptes. Decide what to do about this
1387 * limitation to linear when we need rmap_walk() on nonlinear.
1388 */
1389 spin_unlock(&mapping->i_mmap_lock);
1390 return ret;
1391}
1392
1393int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
1394 struct vm_area_struct *, unsigned long, void *), void *arg)
1395{
1396 VM_BUG_ON(!PageLocked(page));
1397
1398 if (unlikely(PageKsm(page)))
1399 return rmap_walk_ksm(page, rmap_one, arg);
1400 else if (PageAnon(page))
1401 return rmap_walk_anon(page, rmap_one, arg);
1402 else
1403 return rmap_walk_file(page, rmap_one, arg);
1404}
1405#endif /* CONFIG_MIGRATION */