diff options
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 127 |
1 files changed, 103 insertions, 24 deletions
@@ -133,9 +133,14 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
133 | if (unlikely(!anon_vma)) | 133 | if (unlikely(!anon_vma)) |
134 | goto out_enomem_free_avc; | 134 | goto out_enomem_free_avc; |
135 | allocated = anon_vma; | 135 | allocated = anon_vma; |
136 | /* | ||
137 | * This VMA had no anon_vma yet. This anon_vma is | ||
138 | * the root of any anon_vma tree that might form. | ||
139 | */ | ||
140 | anon_vma->root = anon_vma; | ||
136 | } | 141 | } |
137 | 142 | ||
138 | spin_lock(&anon_vma->lock); | 143 | anon_vma_lock(anon_vma); |
139 | /* page_table_lock to protect against threads */ | 144 | /* page_table_lock to protect against threads */ |
140 | spin_lock(&mm->page_table_lock); | 145 | spin_lock(&mm->page_table_lock); |
141 | if (likely(!vma->anon_vma)) { | 146 | if (likely(!vma->anon_vma)) { |
@@ -143,12 +148,12 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
143 | avc->anon_vma = anon_vma; | 148 | avc->anon_vma = anon_vma; |
144 | avc->vma = vma; | 149 | avc->vma = vma; |
145 | list_add(&avc->same_vma, &vma->anon_vma_chain); | 150 | list_add(&avc->same_vma, &vma->anon_vma_chain); |
146 | list_add(&avc->same_anon_vma, &anon_vma->head); | 151 | list_add_tail(&avc->same_anon_vma, &anon_vma->head); |
147 | allocated = NULL; | 152 | allocated = NULL; |
148 | avc = NULL; | 153 | avc = NULL; |
149 | } | 154 | } |
150 | spin_unlock(&mm->page_table_lock); | 155 | spin_unlock(&mm->page_table_lock); |
151 | spin_unlock(&anon_vma->lock); | 156 | anon_vma_unlock(anon_vma); |
152 | 157 | ||
153 | if (unlikely(allocated)) | 158 | if (unlikely(allocated)) |
154 | anon_vma_free(allocated); | 159 | anon_vma_free(allocated); |
@@ -171,9 +176,9 @@ static void anon_vma_chain_link(struct vm_area_struct *vma, | |||
171 | avc->anon_vma = anon_vma; | 176 | avc->anon_vma = anon_vma; |
172 | list_add(&avc->same_vma, &vma->anon_vma_chain); | 177 | list_add(&avc->same_vma, &vma->anon_vma_chain); |
173 | 178 | ||
174 | spin_lock(&anon_vma->lock); | 179 | anon_vma_lock(anon_vma); |
175 | list_add_tail(&avc->same_anon_vma, &anon_vma->head); | 180 | list_add_tail(&avc->same_anon_vma, &anon_vma->head); |
176 | spin_unlock(&anon_vma->lock); | 181 | anon_vma_unlock(anon_vma); |
177 | } | 182 | } |
178 | 183 | ||
179 | /* | 184 | /* |
@@ -225,9 +230,21 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) | |||
225 | avc = anon_vma_chain_alloc(); | 230 | avc = anon_vma_chain_alloc(); |
226 | if (!avc) | 231 | if (!avc) |
227 | goto out_error_free_anon_vma; | 232 | goto out_error_free_anon_vma; |
228 | anon_vma_chain_link(vma, avc, anon_vma); | 233 | |
234 | /* | ||
235 | * The root anon_vma's spinlock is the lock actually used when we | ||
236 | * lock any of the anon_vmas in this anon_vma tree. | ||
237 | */ | ||
238 | anon_vma->root = pvma->anon_vma->root; | ||
239 | /* | ||
240 | * With KSM refcounts, an anon_vma can stay around longer than the | ||
241 | * process it belongs to. The root anon_vma needs to be pinned | ||
242 | * until this anon_vma is freed, because the lock lives in the root. | ||
243 | */ | ||
244 | get_anon_vma(anon_vma->root); | ||
229 | /* Mark this anon_vma as the one where our new (COWed) pages go. */ | 245 | /* Mark this anon_vma as the one where our new (COWed) pages go. */ |
230 | vma->anon_vma = anon_vma; | 246 | vma->anon_vma = anon_vma; |
247 | anon_vma_chain_link(vma, avc, anon_vma); | ||
231 | 248 | ||
232 | return 0; | 249 | return 0; |
233 | 250 | ||
@@ -247,22 +264,29 @@ static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain) | |||
247 | if (!anon_vma) | 264 | if (!anon_vma) |
248 | return; | 265 | return; |
249 | 266 | ||
250 | spin_lock(&anon_vma->lock); | 267 | anon_vma_lock(anon_vma); |
251 | list_del(&anon_vma_chain->same_anon_vma); | 268 | list_del(&anon_vma_chain->same_anon_vma); |
252 | 269 | ||
253 | /* We must garbage collect the anon_vma if it's empty */ | 270 | /* We must garbage collect the anon_vma if it's empty */ |
254 | empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma); | 271 | empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma); |
255 | spin_unlock(&anon_vma->lock); | 272 | anon_vma_unlock(anon_vma); |
256 | 273 | ||
257 | if (empty) | 274 | if (empty) { |
275 | /* We no longer need the root anon_vma */ | ||
276 | if (anon_vma->root != anon_vma) | ||
277 | drop_anon_vma(anon_vma->root); | ||
258 | anon_vma_free(anon_vma); | 278 | anon_vma_free(anon_vma); |
279 | } | ||
259 | } | 280 | } |
260 | 281 | ||
261 | void unlink_anon_vmas(struct vm_area_struct *vma) | 282 | void unlink_anon_vmas(struct vm_area_struct *vma) |
262 | { | 283 | { |
263 | struct anon_vma_chain *avc, *next; | 284 | struct anon_vma_chain *avc, *next; |
264 | 285 | ||
265 | /* Unlink each anon_vma chained to the VMA. */ | 286 | /* |
287 | * Unlink each anon_vma chained to the VMA. This list is ordered | ||
288 | * from newest to oldest, ensuring the root anon_vma gets freed last. | ||
289 | */ | ||
266 | list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { | 290 | list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { |
267 | anon_vma_unlink(avc); | 291 | anon_vma_unlink(avc); |
268 | list_del(&avc->same_vma); | 292 | list_del(&avc->same_vma); |
@@ -303,7 +327,7 @@ struct anon_vma *page_lock_anon_vma(struct page *page) | |||
303 | goto out; | 327 | goto out; |
304 | 328 | ||
305 | anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); | 329 | anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); |
306 | spin_lock(&anon_vma->lock); | 330 | anon_vma_lock(anon_vma); |
307 | return anon_vma; | 331 | return anon_vma; |
308 | out: | 332 | out: |
309 | rcu_read_unlock(); | 333 | rcu_read_unlock(); |
@@ -312,7 +336,7 @@ out: | |||
312 | 336 | ||
313 | void page_unlock_anon_vma(struct anon_vma *anon_vma) | 337 | void page_unlock_anon_vma(struct anon_vma *anon_vma) |
314 | { | 338 | { |
315 | spin_unlock(&anon_vma->lock); | 339 | anon_vma_unlock(anon_vma); |
316 | rcu_read_unlock(); | 340 | rcu_read_unlock(); |
317 | } | 341 | } |
318 | 342 | ||
@@ -343,9 +367,10 @@ vma_address(struct page *page, struct vm_area_struct *vma) | |||
343 | */ | 367 | */ |
344 | unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) | 368 | unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) |
345 | { | 369 | { |
346 | if (PageAnon(page)) | 370 | if (PageAnon(page)) { |
347 | ; | 371 | if (vma->anon_vma->root != page_anon_vma(page)->root) |
348 | else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { | 372 | return -EFAULT; |
373 | } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { | ||
349 | if (!vma->vm_file || | 374 | if (!vma->vm_file || |
350 | vma->vm_file->f_mapping != page->mapping) | 375 | vma->vm_file->f_mapping != page->mapping) |
351 | return -EFAULT; | 376 | return -EFAULT; |
@@ -753,14 +778,20 @@ static void __page_set_anon_rmap(struct page *page, | |||
753 | * If the page isn't exclusively mapped into this vma, | 778 | * If the page isn't exclusively mapped into this vma, |
754 | * we must use the _oldest_ possible anon_vma for the | 779 | * we must use the _oldest_ possible anon_vma for the |
755 | * page mapping! | 780 | * page mapping! |
756 | * | ||
757 | * So take the last AVC chain entry in the vma, which is | ||
758 | * the deepest ancestor, and use the anon_vma from that. | ||
759 | */ | 781 | */ |
760 | if (!exclusive) { | 782 | if (!exclusive) { |
761 | struct anon_vma_chain *avc; | 783 | if (PageAnon(page)) |
762 | avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma); | 784 | return; |
763 | anon_vma = avc->anon_vma; | 785 | anon_vma = anon_vma->root; |
786 | } else { | ||
787 | /* | ||
788 | * In this case, swapped-out-but-not-discarded swap-cache | ||
789 | * is remapped. So, no need to update page->mapping here. | ||
790 | * We convice anon_vma poitned by page->mapping is not obsolete | ||
791 | * because vma->anon_vma is necessary to be a family of it. | ||
792 | */ | ||
793 | if (PageAnon(page)) | ||
794 | return; | ||
764 | } | 795 | } |
765 | 796 | ||
766 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; | 797 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; |
@@ -790,6 +821,7 @@ static void __page_check_anon_rmap(struct page *page, | |||
790 | * are initially only visible via the pagetables, and the pte is locked | 821 | * are initially only visible via the pagetables, and the pte is locked |
791 | * over the call to page_add_new_anon_rmap. | 822 | * over the call to page_add_new_anon_rmap. |
792 | */ | 823 | */ |
824 | BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root); | ||
793 | BUG_ON(page->index != linear_page_index(vma, address)); | 825 | BUG_ON(page->index != linear_page_index(vma, address)); |
794 | #endif | 826 | #endif |
795 | } | 827 | } |
@@ -808,6 +840,17 @@ static void __page_check_anon_rmap(struct page *page, | |||
808 | void page_add_anon_rmap(struct page *page, | 840 | void page_add_anon_rmap(struct page *page, |
809 | struct vm_area_struct *vma, unsigned long address) | 841 | struct vm_area_struct *vma, unsigned long address) |
810 | { | 842 | { |
843 | do_page_add_anon_rmap(page, vma, address, 0); | ||
844 | } | ||
845 | |||
846 | /* | ||
847 | * Special version of the above for do_swap_page, which often runs | ||
848 | * into pages that are exclusively owned by the current process. | ||
849 | * Everybody else should continue to use page_add_anon_rmap above. | ||
850 | */ | ||
851 | void do_page_add_anon_rmap(struct page *page, | ||
852 | struct vm_area_struct *vma, unsigned long address, int exclusive) | ||
853 | { | ||
811 | int first = atomic_inc_and_test(&page->_mapcount); | 854 | int first = atomic_inc_and_test(&page->_mapcount); |
812 | if (first) | 855 | if (first) |
813 | __inc_zone_page_state(page, NR_ANON_PAGES); | 856 | __inc_zone_page_state(page, NR_ANON_PAGES); |
@@ -817,7 +860,7 @@ void page_add_anon_rmap(struct page *page, | |||
817 | VM_BUG_ON(!PageLocked(page)); | 860 | VM_BUG_ON(!PageLocked(page)); |
818 | VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); | 861 | VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); |
819 | if (first) | 862 | if (first) |
820 | __page_set_anon_rmap(page, vma, address, 0); | 863 | __page_set_anon_rmap(page, vma, address, exclusive); |
821 | else | 864 | else |
822 | __page_check_anon_rmap(page, vma, address); | 865 | __page_check_anon_rmap(page, vma, address); |
823 | } | 866 | } |
@@ -1384,6 +1427,42 @@ int try_to_munlock(struct page *page) | |||
1384 | return try_to_unmap_file(page, TTU_MUNLOCK); | 1427 | return try_to_unmap_file(page, TTU_MUNLOCK); |
1385 | } | 1428 | } |
1386 | 1429 | ||
1430 | #if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION) | ||
1431 | /* | ||
1432 | * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root | ||
1433 | * if necessary. Be careful to do all the tests under the lock. Once | ||
1434 | * we know we are the last user, nobody else can get a reference and we | ||
1435 | * can do the freeing without the lock. | ||
1436 | */ | ||
1437 | void drop_anon_vma(struct anon_vma *anon_vma) | ||
1438 | { | ||
1439 | BUG_ON(atomic_read(&anon_vma->external_refcount) <= 0); | ||
1440 | if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) { | ||
1441 | struct anon_vma *root = anon_vma->root; | ||
1442 | int empty = list_empty(&anon_vma->head); | ||
1443 | int last_root_user = 0; | ||
1444 | int root_empty = 0; | ||
1445 | |||
1446 | /* | ||
1447 | * The refcount on a non-root anon_vma got dropped. Drop | ||
1448 | * the refcount on the root and check if we need to free it. | ||
1449 | */ | ||
1450 | if (empty && anon_vma != root) { | ||
1451 | BUG_ON(atomic_read(&root->external_refcount) <= 0); | ||
1452 | last_root_user = atomic_dec_and_test(&root->external_refcount); | ||
1453 | root_empty = list_empty(&root->head); | ||
1454 | } | ||
1455 | anon_vma_unlock(anon_vma); | ||
1456 | |||
1457 | if (empty) { | ||
1458 | anon_vma_free(anon_vma); | ||
1459 | if (root_empty && last_root_user) | ||
1460 | anon_vma_free(root); | ||
1461 | } | ||
1462 | } | ||
1463 | } | ||
1464 | #endif | ||
1465 | |||
1387 | #ifdef CONFIG_MIGRATION | 1466 | #ifdef CONFIG_MIGRATION |
1388 | /* | 1467 | /* |
1389 | * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file(): | 1468 | * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file(): |
@@ -1405,7 +1484,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, | |||
1405 | anon_vma = page_anon_vma(page); | 1484 | anon_vma = page_anon_vma(page); |
1406 | if (!anon_vma) | 1485 | if (!anon_vma) |
1407 | return ret; | 1486 | return ret; |
1408 | spin_lock(&anon_vma->lock); | 1487 | anon_vma_lock(anon_vma); |
1409 | list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { | 1488 | list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { |
1410 | struct vm_area_struct *vma = avc->vma; | 1489 | struct vm_area_struct *vma = avc->vma; |
1411 | unsigned long address = vma_address(page, vma); | 1490 | unsigned long address = vma_address(page, vma); |
@@ -1415,7 +1494,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, | |||
1415 | if (ret != SWAP_AGAIN) | 1494 | if (ret != SWAP_AGAIN) |
1416 | break; | 1495 | break; |
1417 | } | 1496 | } |
1418 | spin_unlock(&anon_vma->lock); | 1497 | anon_vma_unlock(anon_vma); |
1419 | return ret; | 1498 | return ret; |
1420 | } | 1499 | } |
1421 | 1500 | ||