diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2011-05-24 03:06:26 -0400 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2011-05-24 03:06:26 -0400 |
commit | b73077eb03f510a84b102fb97640e595a958403c (patch) | |
tree | 8b639000418e2756bf6baece4e00e07d2534bccc /mm/rmap.c | |
parent | 28350e330cfab46b60a1dbf763b678d859f9f3d9 (diff) | |
parent | 9d2e173644bb5c42ff1b280fbdda3f195a7cf1f7 (diff) |
Merge branch 'next' into for-linus
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 139 |
1 files changed, 67 insertions, 72 deletions
@@ -31,11 +31,12 @@ | |||
31 | * swap_lock (in swap_duplicate, swap_info_get) | 31 | * swap_lock (in swap_duplicate, swap_info_get) |
32 | * mmlist_lock (in mmput, drain_mmlist and others) | 32 | * mmlist_lock (in mmput, drain_mmlist and others) |
33 | * mapping->private_lock (in __set_page_dirty_buffers) | 33 | * mapping->private_lock (in __set_page_dirty_buffers) |
34 | * inode_lock (in set_page_dirty's __mark_inode_dirty) | 34 | * inode->i_lock (in set_page_dirty's __mark_inode_dirty) |
35 | * inode_wb_list_lock (in set_page_dirty's __mark_inode_dirty) | ||
35 | * sb_lock (within inode_lock in fs/fs-writeback.c) | 36 | * sb_lock (within inode_lock in fs/fs-writeback.c) |
36 | * mapping->tree_lock (widely used, in set_page_dirty, | 37 | * mapping->tree_lock (widely used, in set_page_dirty, |
37 | * in arch-dependent flush_dcache_mmap_lock, | 38 | * in arch-dependent flush_dcache_mmap_lock, |
38 | * within inode_lock in __sync_single_inode) | 39 | * within inode_wb_list_lock in __sync_single_inode) |
39 | * | 40 | * |
40 | * (code doesn't rely on that order so it could be switched around) | 41 | * (code doesn't rely on that order so it could be switched around) |
41 | * ->tasklist_lock | 42 | * ->tasklist_lock |
@@ -67,11 +68,24 @@ static struct kmem_cache *anon_vma_chain_cachep; | |||
67 | 68 | ||
68 | static inline struct anon_vma *anon_vma_alloc(void) | 69 | static inline struct anon_vma *anon_vma_alloc(void) |
69 | { | 70 | { |
70 | return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); | 71 | struct anon_vma *anon_vma; |
72 | |||
73 | anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); | ||
74 | if (anon_vma) { | ||
75 | atomic_set(&anon_vma->refcount, 1); | ||
76 | /* | ||
77 | * Initialise the anon_vma root to point to itself. If called | ||
78 | * from fork, the root will be reset to the parents anon_vma. | ||
79 | */ | ||
80 | anon_vma->root = anon_vma; | ||
81 | } | ||
82 | |||
83 | return anon_vma; | ||
71 | } | 84 | } |
72 | 85 | ||
73 | void anon_vma_free(struct anon_vma *anon_vma) | 86 | static inline void anon_vma_free(struct anon_vma *anon_vma) |
74 | { | 87 | { |
88 | VM_BUG_ON(atomic_read(&anon_vma->refcount)); | ||
75 | kmem_cache_free(anon_vma_cachep, anon_vma); | 89 | kmem_cache_free(anon_vma_cachep, anon_vma); |
76 | } | 90 | } |
77 | 91 | ||
@@ -133,11 +147,6 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
133 | if (unlikely(!anon_vma)) | 147 | if (unlikely(!anon_vma)) |
134 | goto out_enomem_free_avc; | 148 | goto out_enomem_free_avc; |
135 | allocated = anon_vma; | 149 | allocated = anon_vma; |
136 | /* | ||
137 | * This VMA had no anon_vma yet. This anon_vma is | ||
138 | * the root of any anon_vma tree that might form. | ||
139 | */ | ||
140 | anon_vma->root = anon_vma; | ||
141 | } | 150 | } |
142 | 151 | ||
143 | anon_vma_lock(anon_vma); | 152 | anon_vma_lock(anon_vma); |
@@ -156,7 +165,7 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
156 | anon_vma_unlock(anon_vma); | 165 | anon_vma_unlock(anon_vma); |
157 | 166 | ||
158 | if (unlikely(allocated)) | 167 | if (unlikely(allocated)) |
159 | anon_vma_free(allocated); | 168 | put_anon_vma(allocated); |
160 | if (unlikely(avc)) | 169 | if (unlikely(avc)) |
161 | anon_vma_chain_free(avc); | 170 | anon_vma_chain_free(avc); |
162 | } | 171 | } |
@@ -241,9 +250,9 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) | |||
241 | */ | 250 | */ |
242 | anon_vma->root = pvma->anon_vma->root; | 251 | anon_vma->root = pvma->anon_vma->root; |
243 | /* | 252 | /* |
244 | * With KSM refcounts, an anon_vma can stay around longer than the | 253 | * With refcounts, an anon_vma can stay around longer than the |
245 | * process it belongs to. The root anon_vma needs to be pinned | 254 | * process it belongs to. The root anon_vma needs to be pinned until |
246 | * until this anon_vma is freed, because the lock lives in the root. | 255 | * this anon_vma is freed, because the lock lives in the root. |
247 | */ | 256 | */ |
248 | get_anon_vma(anon_vma->root); | 257 | get_anon_vma(anon_vma->root); |
249 | /* Mark this anon_vma as the one where our new (COWed) pages go. */ | 258 | /* Mark this anon_vma as the one where our new (COWed) pages go. */ |
@@ -253,7 +262,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) | |||
253 | return 0; | 262 | return 0; |
254 | 263 | ||
255 | out_error_free_anon_vma: | 264 | out_error_free_anon_vma: |
256 | anon_vma_free(anon_vma); | 265 | put_anon_vma(anon_vma); |
257 | out_error: | 266 | out_error: |
258 | unlink_anon_vmas(vma); | 267 | unlink_anon_vmas(vma); |
259 | return -ENOMEM; | 268 | return -ENOMEM; |
@@ -272,15 +281,11 @@ static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain) | |||
272 | list_del(&anon_vma_chain->same_anon_vma); | 281 | list_del(&anon_vma_chain->same_anon_vma); |
273 | 282 | ||
274 | /* We must garbage collect the anon_vma if it's empty */ | 283 | /* We must garbage collect the anon_vma if it's empty */ |
275 | empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma); | 284 | empty = list_empty(&anon_vma->head); |
276 | anon_vma_unlock(anon_vma); | 285 | anon_vma_unlock(anon_vma); |
277 | 286 | ||
278 | if (empty) { | 287 | if (empty) |
279 | /* We no longer need the root anon_vma */ | 288 | put_anon_vma(anon_vma); |
280 | if (anon_vma->root != anon_vma) | ||
281 | drop_anon_vma(anon_vma->root); | ||
282 | anon_vma_free(anon_vma); | ||
283 | } | ||
284 | } | 289 | } |
285 | 290 | ||
286 | void unlink_anon_vmas(struct vm_area_struct *vma) | 291 | void unlink_anon_vmas(struct vm_area_struct *vma) |
@@ -303,7 +308,7 @@ static void anon_vma_ctor(void *data) | |||
303 | struct anon_vma *anon_vma = data; | 308 | struct anon_vma *anon_vma = data; |
304 | 309 | ||
305 | spin_lock_init(&anon_vma->lock); | 310 | spin_lock_init(&anon_vma->lock); |
306 | anonvma_external_refcount_init(anon_vma); | 311 | atomic_set(&anon_vma->refcount, 0); |
307 | INIT_LIST_HEAD(&anon_vma->head); | 312 | INIT_LIST_HEAD(&anon_vma->head); |
308 | } | 313 | } |
309 | 314 | ||
@@ -497,41 +502,51 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, | |||
497 | struct mm_struct *mm = vma->vm_mm; | 502 | struct mm_struct *mm = vma->vm_mm; |
498 | int referenced = 0; | 503 | int referenced = 0; |
499 | 504 | ||
500 | /* | ||
501 | * Don't want to elevate referenced for mlocked page that gets this far, | ||
502 | * in order that it progresses to try_to_unmap and is moved to the | ||
503 | * unevictable list. | ||
504 | */ | ||
505 | if (vma->vm_flags & VM_LOCKED) { | ||
506 | *mapcount = 0; /* break early from loop */ | ||
507 | *vm_flags |= VM_LOCKED; | ||
508 | goto out; | ||
509 | } | ||
510 | |||
511 | /* Pretend the page is referenced if the task has the | ||
512 | swap token and is in the middle of a page fault. */ | ||
513 | if (mm != current->mm && has_swap_token(mm) && | ||
514 | rwsem_is_locked(&mm->mmap_sem)) | ||
515 | referenced++; | ||
516 | |||
517 | if (unlikely(PageTransHuge(page))) { | 505 | if (unlikely(PageTransHuge(page))) { |
518 | pmd_t *pmd; | 506 | pmd_t *pmd; |
519 | 507 | ||
520 | spin_lock(&mm->page_table_lock); | 508 | spin_lock(&mm->page_table_lock); |
509 | /* | ||
510 | * rmap might return false positives; we must filter | ||
511 | * these out using page_check_address_pmd(). | ||
512 | */ | ||
521 | pmd = page_check_address_pmd(page, mm, address, | 513 | pmd = page_check_address_pmd(page, mm, address, |
522 | PAGE_CHECK_ADDRESS_PMD_FLAG); | 514 | PAGE_CHECK_ADDRESS_PMD_FLAG); |
523 | if (pmd && !pmd_trans_splitting(*pmd) && | 515 | if (!pmd) { |
524 | pmdp_clear_flush_young_notify(vma, address, pmd)) | 516 | spin_unlock(&mm->page_table_lock); |
517 | goto out; | ||
518 | } | ||
519 | |||
520 | if (vma->vm_flags & VM_LOCKED) { | ||
521 | spin_unlock(&mm->page_table_lock); | ||
522 | *mapcount = 0; /* break early from loop */ | ||
523 | *vm_flags |= VM_LOCKED; | ||
524 | goto out; | ||
525 | } | ||
526 | |||
527 | /* go ahead even if the pmd is pmd_trans_splitting() */ | ||
528 | if (pmdp_clear_flush_young_notify(vma, address, pmd)) | ||
525 | referenced++; | 529 | referenced++; |
526 | spin_unlock(&mm->page_table_lock); | 530 | spin_unlock(&mm->page_table_lock); |
527 | } else { | 531 | } else { |
528 | pte_t *pte; | 532 | pte_t *pte; |
529 | spinlock_t *ptl; | 533 | spinlock_t *ptl; |
530 | 534 | ||
535 | /* | ||
536 | * rmap might return false positives; we must filter | ||
537 | * these out using page_check_address(). | ||
538 | */ | ||
531 | pte = page_check_address(page, mm, address, &ptl, 0); | 539 | pte = page_check_address(page, mm, address, &ptl, 0); |
532 | if (!pte) | 540 | if (!pte) |
533 | goto out; | 541 | goto out; |
534 | 542 | ||
543 | if (vma->vm_flags & VM_LOCKED) { | ||
544 | pte_unmap_unlock(pte, ptl); | ||
545 | *mapcount = 0; /* break early from loop */ | ||
546 | *vm_flags |= VM_LOCKED; | ||
547 | goto out; | ||
548 | } | ||
549 | |||
535 | if (ptep_clear_flush_young_notify(vma, address, pte)) { | 550 | if (ptep_clear_flush_young_notify(vma, address, pte)) { |
536 | /* | 551 | /* |
537 | * Don't treat a reference through a sequentially read | 552 | * Don't treat a reference through a sequentially read |
@@ -546,6 +561,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, | |||
546 | pte_unmap_unlock(pte, ptl); | 561 | pte_unmap_unlock(pte, ptl); |
547 | } | 562 | } |
548 | 563 | ||
564 | /* Pretend the page is referenced if the task has the | ||
565 | swap token and is in the middle of a page fault. */ | ||
566 | if (mm != current->mm && has_swap_token(mm) && | ||
567 | rwsem_is_locked(&mm->mmap_sem)) | ||
568 | referenced++; | ||
569 | |||
549 | (*mapcount)--; | 570 | (*mapcount)--; |
550 | 571 | ||
551 | if (referenced) | 572 | if (referenced) |
@@ -1470,41 +1491,15 @@ int try_to_munlock(struct page *page) | |||
1470 | return try_to_unmap_file(page, TTU_MUNLOCK); | 1491 | return try_to_unmap_file(page, TTU_MUNLOCK); |
1471 | } | 1492 | } |
1472 | 1493 | ||
1473 | #if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION) | 1494 | void __put_anon_vma(struct anon_vma *anon_vma) |
1474 | /* | ||
1475 | * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root | ||
1476 | * if necessary. Be careful to do all the tests under the lock. Once | ||
1477 | * we know we are the last user, nobody else can get a reference and we | ||
1478 | * can do the freeing without the lock. | ||
1479 | */ | ||
1480 | void drop_anon_vma(struct anon_vma *anon_vma) | ||
1481 | { | 1495 | { |
1482 | BUG_ON(atomic_read(&anon_vma->external_refcount) <= 0); | 1496 | struct anon_vma *root = anon_vma->root; |
1483 | if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) { | ||
1484 | struct anon_vma *root = anon_vma->root; | ||
1485 | int empty = list_empty(&anon_vma->head); | ||
1486 | int last_root_user = 0; | ||
1487 | int root_empty = 0; | ||
1488 | 1497 | ||
1489 | /* | 1498 | if (root != anon_vma && atomic_dec_and_test(&root->refcount)) |
1490 | * The refcount on a non-root anon_vma got dropped. Drop | 1499 | anon_vma_free(root); |
1491 | * the refcount on the root and check if we need to free it. | ||
1492 | */ | ||
1493 | if (empty && anon_vma != root) { | ||
1494 | BUG_ON(atomic_read(&root->external_refcount) <= 0); | ||
1495 | last_root_user = atomic_dec_and_test(&root->external_refcount); | ||
1496 | root_empty = list_empty(&root->head); | ||
1497 | } | ||
1498 | anon_vma_unlock(anon_vma); | ||
1499 | 1500 | ||
1500 | if (empty) { | 1501 | anon_vma_free(anon_vma); |
1501 | anon_vma_free(anon_vma); | ||
1502 | if (root_empty && last_root_user) | ||
1503 | anon_vma_free(root); | ||
1504 | } | ||
1505 | } | ||
1506 | } | 1502 | } |
1507 | #endif | ||
1508 | 1503 | ||
1509 | #ifdef CONFIG_MIGRATION | 1504 | #ifdef CONFIG_MIGRATION |
1510 | /* | 1505 | /* |