aboutsummaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c127
1 files changed, 103 insertions, 24 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 71bd30a147cf..87b9e8ad4509 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -133,9 +133,14 @@ int anon_vma_prepare(struct vm_area_struct *vma)
133 if (unlikely(!anon_vma)) 133 if (unlikely(!anon_vma))
134 goto out_enomem_free_avc; 134 goto out_enomem_free_avc;
135 allocated = anon_vma; 135 allocated = anon_vma;
136 /*
137 * This VMA had no anon_vma yet. This anon_vma is
138 * the root of any anon_vma tree that might form.
139 */
140 anon_vma->root = anon_vma;
136 } 141 }
137 142
138 spin_lock(&anon_vma->lock); 143 anon_vma_lock(anon_vma);
139 /* page_table_lock to protect against threads */ 144 /* page_table_lock to protect against threads */
140 spin_lock(&mm->page_table_lock); 145 spin_lock(&mm->page_table_lock);
141 if (likely(!vma->anon_vma)) { 146 if (likely(!vma->anon_vma)) {
@@ -143,12 +148,12 @@ int anon_vma_prepare(struct vm_area_struct *vma)
143 avc->anon_vma = anon_vma; 148 avc->anon_vma = anon_vma;
144 avc->vma = vma; 149 avc->vma = vma;
145 list_add(&avc->same_vma, &vma->anon_vma_chain); 150 list_add(&avc->same_vma, &vma->anon_vma_chain);
146 list_add(&avc->same_anon_vma, &anon_vma->head); 151 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
147 allocated = NULL; 152 allocated = NULL;
148 avc = NULL; 153 avc = NULL;
149 } 154 }
150 spin_unlock(&mm->page_table_lock); 155 spin_unlock(&mm->page_table_lock);
151 spin_unlock(&anon_vma->lock); 156 anon_vma_unlock(anon_vma);
152 157
153 if (unlikely(allocated)) 158 if (unlikely(allocated))
154 anon_vma_free(allocated); 159 anon_vma_free(allocated);
@@ -171,9 +176,9 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
171 avc->anon_vma = anon_vma; 176 avc->anon_vma = anon_vma;
172 list_add(&avc->same_vma, &vma->anon_vma_chain); 177 list_add(&avc->same_vma, &vma->anon_vma_chain);
173 178
174 spin_lock(&anon_vma->lock); 179 anon_vma_lock(anon_vma);
175 list_add_tail(&avc->same_anon_vma, &anon_vma->head); 180 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
176 spin_unlock(&anon_vma->lock); 181 anon_vma_unlock(anon_vma);
177} 182}
178 183
179/* 184/*
@@ -225,9 +230,21 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
225 avc = anon_vma_chain_alloc(); 230 avc = anon_vma_chain_alloc();
226 if (!avc) 231 if (!avc)
227 goto out_error_free_anon_vma; 232 goto out_error_free_anon_vma;
228 anon_vma_chain_link(vma, avc, anon_vma); 233
234 /*
235 * The root anon_vma's spinlock is the lock actually used when we
236 * lock any of the anon_vmas in this anon_vma tree.
237 */
238 anon_vma->root = pvma->anon_vma->root;
239 /*
240 * With KSM refcounts, an anon_vma can stay around longer than the
241 * process it belongs to. The root anon_vma needs to be pinned
242 * until this anon_vma is freed, because the lock lives in the root.
243 */
244 get_anon_vma(anon_vma->root);
229 /* Mark this anon_vma as the one where our new (COWed) pages go. */ 245 /* Mark this anon_vma as the one where our new (COWed) pages go. */
230 vma->anon_vma = anon_vma; 246 vma->anon_vma = anon_vma;
247 anon_vma_chain_link(vma, avc, anon_vma);
231 248
232 return 0; 249 return 0;
233 250
@@ -247,22 +264,29 @@ static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
247 if (!anon_vma) 264 if (!anon_vma)
248 return; 265 return;
249 266
250 spin_lock(&anon_vma->lock); 267 anon_vma_lock(anon_vma);
251 list_del(&anon_vma_chain->same_anon_vma); 268 list_del(&anon_vma_chain->same_anon_vma);
252 269
253 /* We must garbage collect the anon_vma if it's empty */ 270 /* We must garbage collect the anon_vma if it's empty */
254 empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma); 271 empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma);
255 spin_unlock(&anon_vma->lock); 272 anon_vma_unlock(anon_vma);
256 273
257 if (empty) 274 if (empty) {
275 /* We no longer need the root anon_vma */
276 if (anon_vma->root != anon_vma)
277 drop_anon_vma(anon_vma->root);
258 anon_vma_free(anon_vma); 278 anon_vma_free(anon_vma);
279 }
259} 280}
260 281
261void unlink_anon_vmas(struct vm_area_struct *vma) 282void unlink_anon_vmas(struct vm_area_struct *vma)
262{ 283{
263 struct anon_vma_chain *avc, *next; 284 struct anon_vma_chain *avc, *next;
264 285
265 /* Unlink each anon_vma chained to the VMA. */ 286 /*
287 * Unlink each anon_vma chained to the VMA. This list is ordered
288 * from newest to oldest, ensuring the root anon_vma gets freed last.
289 */
266 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { 290 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
267 anon_vma_unlink(avc); 291 anon_vma_unlink(avc);
268 list_del(&avc->same_vma); 292 list_del(&avc->same_vma);
@@ -303,7 +327,7 @@ struct anon_vma *page_lock_anon_vma(struct page *page)
303 goto out; 327 goto out;
304 328
305 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); 329 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
306 spin_lock(&anon_vma->lock); 330 anon_vma_lock(anon_vma);
307 return anon_vma; 331 return anon_vma;
308out: 332out:
309 rcu_read_unlock(); 333 rcu_read_unlock();
@@ -312,7 +336,7 @@ out:
312 336
313void page_unlock_anon_vma(struct anon_vma *anon_vma) 337void page_unlock_anon_vma(struct anon_vma *anon_vma)
314{ 338{
315 spin_unlock(&anon_vma->lock); 339 anon_vma_unlock(anon_vma);
316 rcu_read_unlock(); 340 rcu_read_unlock();
317} 341}
318 342
@@ -343,9 +367,10 @@ vma_address(struct page *page, struct vm_area_struct *vma)
343 */ 367 */
344unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) 368unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
345{ 369{
346 if (PageAnon(page)) 370 if (PageAnon(page)) {
347 ; 371 if (vma->anon_vma->root != page_anon_vma(page)->root)
348 else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { 372 return -EFAULT;
373 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
349 if (!vma->vm_file || 374 if (!vma->vm_file ||
350 vma->vm_file->f_mapping != page->mapping) 375 vma->vm_file->f_mapping != page->mapping)
351 return -EFAULT; 376 return -EFAULT;
@@ -753,14 +778,20 @@ static void __page_set_anon_rmap(struct page *page,
753 * If the page isn't exclusively mapped into this vma, 778 * If the page isn't exclusively mapped into this vma,
754 * we must use the _oldest_ possible anon_vma for the 779 * we must use the _oldest_ possible anon_vma for the
755 * page mapping! 780 * page mapping!
756 *
757 * So take the last AVC chain entry in the vma, which is
758 * the deepest ancestor, and use the anon_vma from that.
759 */ 781 */
760 if (!exclusive) { 782 if (!exclusive) {
761 struct anon_vma_chain *avc; 783 if (PageAnon(page))
762 avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma); 784 return;
763 anon_vma = avc->anon_vma; 785 anon_vma = anon_vma->root;
786 } else {
787 /*
788 * In this case, swapped-out-but-not-discarded swap-cache
789 * is remapped. So, no need to update page->mapping here.
790 * We convice anon_vma poitned by page->mapping is not obsolete
791 * because vma->anon_vma is necessary to be a family of it.
792 */
793 if (PageAnon(page))
794 return;
764 } 795 }
765 796
766 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; 797 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
@@ -790,6 +821,7 @@ static void __page_check_anon_rmap(struct page *page,
790 * are initially only visible via the pagetables, and the pte is locked 821 * are initially only visible via the pagetables, and the pte is locked
791 * over the call to page_add_new_anon_rmap. 822 * over the call to page_add_new_anon_rmap.
792 */ 823 */
824 BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
793 BUG_ON(page->index != linear_page_index(vma, address)); 825 BUG_ON(page->index != linear_page_index(vma, address));
794#endif 826#endif
795} 827}
@@ -808,6 +840,17 @@ static void __page_check_anon_rmap(struct page *page,
808void page_add_anon_rmap(struct page *page, 840void page_add_anon_rmap(struct page *page,
809 struct vm_area_struct *vma, unsigned long address) 841 struct vm_area_struct *vma, unsigned long address)
810{ 842{
843 do_page_add_anon_rmap(page, vma, address, 0);
844}
845
846/*
847 * Special version of the above for do_swap_page, which often runs
848 * into pages that are exclusively owned by the current process.
849 * Everybody else should continue to use page_add_anon_rmap above.
850 */
851void do_page_add_anon_rmap(struct page *page,
852 struct vm_area_struct *vma, unsigned long address, int exclusive)
853{
811 int first = atomic_inc_and_test(&page->_mapcount); 854 int first = atomic_inc_and_test(&page->_mapcount);
812 if (first) 855 if (first)
813 __inc_zone_page_state(page, NR_ANON_PAGES); 856 __inc_zone_page_state(page, NR_ANON_PAGES);
@@ -817,7 +860,7 @@ void page_add_anon_rmap(struct page *page,
817 VM_BUG_ON(!PageLocked(page)); 860 VM_BUG_ON(!PageLocked(page));
818 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); 861 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
819 if (first) 862 if (first)
820 __page_set_anon_rmap(page, vma, address, 0); 863 __page_set_anon_rmap(page, vma, address, exclusive);
821 else 864 else
822 __page_check_anon_rmap(page, vma, address); 865 __page_check_anon_rmap(page, vma, address);
823} 866}
@@ -1384,6 +1427,42 @@ int try_to_munlock(struct page *page)
1384 return try_to_unmap_file(page, TTU_MUNLOCK); 1427 return try_to_unmap_file(page, TTU_MUNLOCK);
1385} 1428}
1386 1429
1430#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
1431/*
1432 * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root
1433 * if necessary. Be careful to do all the tests under the lock. Once
1434 * we know we are the last user, nobody else can get a reference and we
1435 * can do the freeing without the lock.
1436 */
1437void drop_anon_vma(struct anon_vma *anon_vma)
1438{
1439 BUG_ON(atomic_read(&anon_vma->external_refcount) <= 0);
1440 if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
1441 struct anon_vma *root = anon_vma->root;
1442 int empty = list_empty(&anon_vma->head);
1443 int last_root_user = 0;
1444 int root_empty = 0;
1445
1446 /*
1447 * The refcount on a non-root anon_vma got dropped. Drop
1448 * the refcount on the root and check if we need to free it.
1449 */
1450 if (empty && anon_vma != root) {
1451 BUG_ON(atomic_read(&root->external_refcount) <= 0);
1452 last_root_user = atomic_dec_and_test(&root->external_refcount);
1453 root_empty = list_empty(&root->head);
1454 }
1455 anon_vma_unlock(anon_vma);
1456
1457 if (empty) {
1458 anon_vma_free(anon_vma);
1459 if (root_empty && last_root_user)
1460 anon_vma_free(root);
1461 }
1462 }
1463}
1464#endif
1465
1387#ifdef CONFIG_MIGRATION 1466#ifdef CONFIG_MIGRATION
1388/* 1467/*
1389 * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file(): 1468 * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
@@ -1405,7 +1484,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1405 anon_vma = page_anon_vma(page); 1484 anon_vma = page_anon_vma(page);
1406 if (!anon_vma) 1485 if (!anon_vma)
1407 return ret; 1486 return ret;
1408 spin_lock(&anon_vma->lock); 1487 anon_vma_lock(anon_vma);
1409 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { 1488 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1410 struct vm_area_struct *vma = avc->vma; 1489 struct vm_area_struct *vma = avc->vma;
1411 unsigned long address = vma_address(page, vma); 1490 unsigned long address = vma_address(page, vma);
@@ -1415,7 +1494,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1415 if (ret != SWAP_AGAIN) 1494 if (ret != SWAP_AGAIN)
1416 break; 1495 break;
1417 } 1496 }
1418 spin_unlock(&anon_vma->lock); 1497 anon_vma_unlock(anon_vma);
1419 return ret; 1498 return ret;
1420} 1499}
1421 1500