diff options
author | Dave Airlie <airlied@redhat.com> | 2010-03-31 00:55:14 -0400 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2010-03-31 00:55:14 -0400 |
commit | 3595be778d8cb887f0e0575ef0a0c1a094d120bb (patch) | |
tree | 15671ed8bd3597d2efe13aa57b755c66014acb57 /mm/rmap.c | |
parent | c414a117c6094c3f86b533f97beaf45ef9075f03 (diff) | |
parent | 220bf991b0366cc50a94feede3d7341fa5710ee4 (diff) |
Merge branch 'v2.6.34-rc2' into drm-linus
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 185 |
1 files changed, 148 insertions, 37 deletions
@@ -62,6 +62,7 @@ | |||
62 | #include "internal.h" | 62 | #include "internal.h" |
63 | 63 | ||
64 | static struct kmem_cache *anon_vma_cachep; | 64 | static struct kmem_cache *anon_vma_cachep; |
65 | static struct kmem_cache *anon_vma_chain_cachep; | ||
65 | 66 | ||
66 | static inline struct anon_vma *anon_vma_alloc(void) | 67 | static inline struct anon_vma *anon_vma_alloc(void) |
67 | { | 68 | { |
@@ -73,6 +74,16 @@ void anon_vma_free(struct anon_vma *anon_vma) | |||
73 | kmem_cache_free(anon_vma_cachep, anon_vma); | 74 | kmem_cache_free(anon_vma_cachep, anon_vma); |
74 | } | 75 | } |
75 | 76 | ||
77 | static inline struct anon_vma_chain *anon_vma_chain_alloc(void) | ||
78 | { | ||
79 | return kmem_cache_alloc(anon_vma_chain_cachep, GFP_KERNEL); | ||
80 | } | ||
81 | |||
82 | void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain) | ||
83 | { | ||
84 | kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain); | ||
85 | } | ||
86 | |||
76 | /** | 87 | /** |
77 | * anon_vma_prepare - attach an anon_vma to a memory region | 88 | * anon_vma_prepare - attach an anon_vma to a memory region |
78 | * @vma: the memory region in question | 89 | * @vma: the memory region in question |
@@ -103,18 +114,23 @@ void anon_vma_free(struct anon_vma *anon_vma) | |||
103 | int anon_vma_prepare(struct vm_area_struct *vma) | 114 | int anon_vma_prepare(struct vm_area_struct *vma) |
104 | { | 115 | { |
105 | struct anon_vma *anon_vma = vma->anon_vma; | 116 | struct anon_vma *anon_vma = vma->anon_vma; |
117 | struct anon_vma_chain *avc; | ||
106 | 118 | ||
107 | might_sleep(); | 119 | might_sleep(); |
108 | if (unlikely(!anon_vma)) { | 120 | if (unlikely(!anon_vma)) { |
109 | struct mm_struct *mm = vma->vm_mm; | 121 | struct mm_struct *mm = vma->vm_mm; |
110 | struct anon_vma *allocated; | 122 | struct anon_vma *allocated; |
111 | 123 | ||
124 | avc = anon_vma_chain_alloc(); | ||
125 | if (!avc) | ||
126 | goto out_enomem; | ||
127 | |||
112 | anon_vma = find_mergeable_anon_vma(vma); | 128 | anon_vma = find_mergeable_anon_vma(vma); |
113 | allocated = NULL; | 129 | allocated = NULL; |
114 | if (!anon_vma) { | 130 | if (!anon_vma) { |
115 | anon_vma = anon_vma_alloc(); | 131 | anon_vma = anon_vma_alloc(); |
116 | if (unlikely(!anon_vma)) | 132 | if (unlikely(!anon_vma)) |
117 | return -ENOMEM; | 133 | goto out_enomem_free_avc; |
118 | allocated = anon_vma; | 134 | allocated = anon_vma; |
119 | } | 135 | } |
120 | spin_lock(&anon_vma->lock); | 136 | spin_lock(&anon_vma->lock); |
@@ -123,53 +139,113 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
123 | spin_lock(&mm->page_table_lock); | 139 | spin_lock(&mm->page_table_lock); |
124 | if (likely(!vma->anon_vma)) { | 140 | if (likely(!vma->anon_vma)) { |
125 | vma->anon_vma = anon_vma; | 141 | vma->anon_vma = anon_vma; |
126 | list_add_tail(&vma->anon_vma_node, &anon_vma->head); | 142 | avc->anon_vma = anon_vma; |
143 | avc->vma = vma; | ||
144 | list_add(&avc->same_vma, &vma->anon_vma_chain); | ||
145 | list_add(&avc->same_anon_vma, &anon_vma->head); | ||
127 | allocated = NULL; | 146 | allocated = NULL; |
128 | } | 147 | } |
129 | spin_unlock(&mm->page_table_lock); | 148 | spin_unlock(&mm->page_table_lock); |
130 | 149 | ||
131 | spin_unlock(&anon_vma->lock); | 150 | spin_unlock(&anon_vma->lock); |
132 | if (unlikely(allocated)) | 151 | if (unlikely(allocated)) { |
133 | anon_vma_free(allocated); | 152 | anon_vma_free(allocated); |
153 | anon_vma_chain_free(avc); | ||
154 | } | ||
134 | } | 155 | } |
135 | return 0; | 156 | return 0; |
157 | |||
158 | out_enomem_free_avc: | ||
159 | anon_vma_chain_free(avc); | ||
160 | out_enomem: | ||
161 | return -ENOMEM; | ||
136 | } | 162 | } |
137 | 163 | ||
138 | void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next) | 164 | static void anon_vma_chain_link(struct vm_area_struct *vma, |
165 | struct anon_vma_chain *avc, | ||
166 | struct anon_vma *anon_vma) | ||
139 | { | 167 | { |
140 | BUG_ON(vma->anon_vma != next->anon_vma); | 168 | avc->vma = vma; |
141 | list_del(&next->anon_vma_node); | 169 | avc->anon_vma = anon_vma; |
170 | list_add(&avc->same_vma, &vma->anon_vma_chain); | ||
171 | |||
172 | spin_lock(&anon_vma->lock); | ||
173 | list_add_tail(&avc->same_anon_vma, &anon_vma->head); | ||
174 | spin_unlock(&anon_vma->lock); | ||
142 | } | 175 | } |
143 | 176 | ||
144 | void __anon_vma_link(struct vm_area_struct *vma) | 177 | /* |
178 | * Attach the anon_vmas from src to dst. | ||
179 | * Returns 0 on success, -ENOMEM on failure. | ||
180 | */ | ||
181 | int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) | ||
145 | { | 182 | { |
146 | struct anon_vma *anon_vma = vma->anon_vma; | 183 | struct anon_vma_chain *avc, *pavc; |
147 | 184 | ||
148 | if (anon_vma) | 185 | list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) { |
149 | list_add_tail(&vma->anon_vma_node, &anon_vma->head); | 186 | avc = anon_vma_chain_alloc(); |
187 | if (!avc) | ||
188 | goto enomem_failure; | ||
189 | anon_vma_chain_link(dst, avc, pavc->anon_vma); | ||
190 | } | ||
191 | return 0; | ||
192 | |||
193 | enomem_failure: | ||
194 | unlink_anon_vmas(dst); | ||
195 | return -ENOMEM; | ||
150 | } | 196 | } |
151 | 197 | ||
152 | void anon_vma_link(struct vm_area_struct *vma) | 198 | /* |
199 | * Attach vma to its own anon_vma, as well as to the anon_vmas that | ||
200 | * the corresponding VMA in the parent process is attached to. | ||
201 | * Returns 0 on success, non-zero on failure. | ||
202 | */ | ||
203 | int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) | ||
153 | { | 204 | { |
154 | struct anon_vma *anon_vma = vma->anon_vma; | 205 | struct anon_vma_chain *avc; |
206 | struct anon_vma *anon_vma; | ||
155 | 207 | ||
156 | if (anon_vma) { | 208 | /* Don't bother if the parent process has no anon_vma here. */ |
157 | spin_lock(&anon_vma->lock); | 209 | if (!pvma->anon_vma) |
158 | list_add_tail(&vma->anon_vma_node, &anon_vma->head); | 210 | return 0; |
159 | spin_unlock(&anon_vma->lock); | 211 | |
160 | } | 212 | /* |
213 | * First, attach the new VMA to the parent VMA's anon_vmas, | ||
214 | * so rmap can find non-COWed pages in child processes. | ||
215 | */ | ||
216 | if (anon_vma_clone(vma, pvma)) | ||
217 | return -ENOMEM; | ||
218 | |||
219 | /* Then add our own anon_vma. */ | ||
220 | anon_vma = anon_vma_alloc(); | ||
221 | if (!anon_vma) | ||
222 | goto out_error; | ||
223 | avc = anon_vma_chain_alloc(); | ||
224 | if (!avc) | ||
225 | goto out_error_free_anon_vma; | ||
226 | anon_vma_chain_link(vma, avc, anon_vma); | ||
227 | /* Mark this anon_vma as the one where our new (COWed) pages go. */ | ||
228 | vma->anon_vma = anon_vma; | ||
229 | |||
230 | return 0; | ||
231 | |||
232 | out_error_free_anon_vma: | ||
233 | anon_vma_free(anon_vma); | ||
234 | out_error: | ||
235 | return -ENOMEM; | ||
161 | } | 236 | } |
162 | 237 | ||
163 | void anon_vma_unlink(struct vm_area_struct *vma) | 238 | static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain) |
164 | { | 239 | { |
165 | struct anon_vma *anon_vma = vma->anon_vma; | 240 | struct anon_vma *anon_vma = anon_vma_chain->anon_vma; |
166 | int empty; | 241 | int empty; |
167 | 242 | ||
243 | /* If anon_vma_fork fails, we can get an empty anon_vma_chain. */ | ||
168 | if (!anon_vma) | 244 | if (!anon_vma) |
169 | return; | 245 | return; |
170 | 246 | ||
171 | spin_lock(&anon_vma->lock); | 247 | spin_lock(&anon_vma->lock); |
172 | list_del(&vma->anon_vma_node); | 248 | list_del(&anon_vma_chain->same_anon_vma); |
173 | 249 | ||
174 | /* We must garbage collect the anon_vma if it's empty */ | 250 | /* We must garbage collect the anon_vma if it's empty */ |
175 | empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma); | 251 | empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma); |
@@ -179,6 +255,18 @@ void anon_vma_unlink(struct vm_area_struct *vma) | |||
179 | anon_vma_free(anon_vma); | 255 | anon_vma_free(anon_vma); |
180 | } | 256 | } |
181 | 257 | ||
258 | void unlink_anon_vmas(struct vm_area_struct *vma) | ||
259 | { | ||
260 | struct anon_vma_chain *avc, *next; | ||
261 | |||
262 | /* Unlink each anon_vma chained to the VMA. */ | ||
263 | list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { | ||
264 | anon_vma_unlink(avc); | ||
265 | list_del(&avc->same_vma); | ||
266 | anon_vma_chain_free(avc); | ||
267 | } | ||
268 | } | ||
269 | |||
182 | static void anon_vma_ctor(void *data) | 270 | static void anon_vma_ctor(void *data) |
183 | { | 271 | { |
184 | struct anon_vma *anon_vma = data; | 272 | struct anon_vma *anon_vma = data; |
@@ -192,6 +280,7 @@ void __init anon_vma_init(void) | |||
192 | { | 280 | { |
193 | anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), | 281 | anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), |
194 | 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor); | 282 | 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor); |
283 | anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC); | ||
195 | } | 284 | } |
196 | 285 | ||
197 | /* | 286 | /* |
@@ -396,7 +485,7 @@ static int page_referenced_anon(struct page *page, | |||
396 | { | 485 | { |
397 | unsigned int mapcount; | 486 | unsigned int mapcount; |
398 | struct anon_vma *anon_vma; | 487 | struct anon_vma *anon_vma; |
399 | struct vm_area_struct *vma; | 488 | struct anon_vma_chain *avc; |
400 | int referenced = 0; | 489 | int referenced = 0; |
401 | 490 | ||
402 | anon_vma = page_lock_anon_vma(page); | 491 | anon_vma = page_lock_anon_vma(page); |
@@ -404,7 +493,8 @@ static int page_referenced_anon(struct page *page, | |||
404 | return referenced; | 493 | return referenced; |
405 | 494 | ||
406 | mapcount = page_mapcount(page); | 495 | mapcount = page_mapcount(page); |
407 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 496 | list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { |
497 | struct vm_area_struct *vma = avc->vma; | ||
408 | unsigned long address = vma_address(page, vma); | 498 | unsigned long address = vma_address(page, vma); |
409 | if (address == -EFAULT) | 499 | if (address == -EFAULT) |
410 | continue; | 500 | continue; |
@@ -511,9 +601,6 @@ int page_referenced(struct page *page, | |||
511 | int referenced = 0; | 601 | int referenced = 0; |
512 | int we_locked = 0; | 602 | int we_locked = 0; |
513 | 603 | ||
514 | if (TestClearPageReferenced(page)) | ||
515 | referenced++; | ||
516 | |||
517 | *vm_flags = 0; | 604 | *vm_flags = 0; |
518 | if (page_mapped(page) && page_rmapping(page)) { | 605 | if (page_mapped(page) && page_rmapping(page)) { |
519 | if (!is_locked && (!PageAnon(page) || PageKsm(page))) { | 606 | if (!is_locked && (!PageAnon(page) || PageKsm(page))) { |
@@ -614,6 +701,30 @@ int page_mkclean(struct page *page) | |||
614 | EXPORT_SYMBOL_GPL(page_mkclean); | 701 | EXPORT_SYMBOL_GPL(page_mkclean); |
615 | 702 | ||
616 | /** | 703 | /** |
704 | * page_move_anon_rmap - move a page to our anon_vma | ||
705 | * @page: the page to move to our anon_vma | ||
706 | * @vma: the vma the page belongs to | ||
707 | * @address: the user virtual address mapped | ||
708 | * | ||
709 | * When a page belongs exclusively to one process after a COW event, | ||
710 | * that page can be moved into the anon_vma that belongs to just that | ||
711 | * process, so the rmap code will not search the parent or sibling | ||
712 | * processes. | ||
713 | */ | ||
714 | void page_move_anon_rmap(struct page *page, | ||
715 | struct vm_area_struct *vma, unsigned long address) | ||
716 | { | ||
717 | struct anon_vma *anon_vma = vma->anon_vma; | ||
718 | |||
719 | VM_BUG_ON(!PageLocked(page)); | ||
720 | VM_BUG_ON(!anon_vma); | ||
721 | VM_BUG_ON(page->index != linear_page_index(vma, address)); | ||
722 | |||
723 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; | ||
724 | page->mapping = (struct address_space *) anon_vma; | ||
725 | } | ||
726 | |||
727 | /** | ||
617 | * __page_set_anon_rmap - setup new anonymous rmap | 728 | * __page_set_anon_rmap - setup new anonymous rmap |
618 | * @page: the page to add the mapping to | 729 | * @page: the page to add the mapping to |
619 | * @vma: the vm area in which the mapping is added | 730 | * @vma: the vm area in which the mapping is added |
@@ -652,9 +763,6 @@ static void __page_check_anon_rmap(struct page *page, | |||
652 | * are initially only visible via the pagetables, and the pte is locked | 763 | * are initially only visible via the pagetables, and the pte is locked |
653 | * over the call to page_add_new_anon_rmap. | 764 | * over the call to page_add_new_anon_rmap. |
654 | */ | 765 | */ |
655 | struct anon_vma *anon_vma = vma->anon_vma; | ||
656 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; | ||
657 | BUG_ON(page->mapping != (struct address_space *)anon_vma); | ||
658 | BUG_ON(page->index != linear_page_index(vma, address)); | 766 | BUG_ON(page->index != linear_page_index(vma, address)); |
659 | #endif | 767 | #endif |
660 | } | 768 | } |
@@ -815,9 +923,9 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
815 | 923 | ||
816 | if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { | 924 | if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { |
817 | if (PageAnon(page)) | 925 | if (PageAnon(page)) |
818 | dec_mm_counter(mm, anon_rss); | 926 | dec_mm_counter(mm, MM_ANONPAGES); |
819 | else | 927 | else |
820 | dec_mm_counter(mm, file_rss); | 928 | dec_mm_counter(mm, MM_FILEPAGES); |
821 | set_pte_at(mm, address, pte, | 929 | set_pte_at(mm, address, pte, |
822 | swp_entry_to_pte(make_hwpoison_entry(page))); | 930 | swp_entry_to_pte(make_hwpoison_entry(page))); |
823 | } else if (PageAnon(page)) { | 931 | } else if (PageAnon(page)) { |
@@ -839,7 +947,8 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
839 | list_add(&mm->mmlist, &init_mm.mmlist); | 947 | list_add(&mm->mmlist, &init_mm.mmlist); |
840 | spin_unlock(&mmlist_lock); | 948 | spin_unlock(&mmlist_lock); |
841 | } | 949 | } |
842 | dec_mm_counter(mm, anon_rss); | 950 | dec_mm_counter(mm, MM_ANONPAGES); |
951 | inc_mm_counter(mm, MM_SWAPENTS); | ||
843 | } else if (PAGE_MIGRATION) { | 952 | } else if (PAGE_MIGRATION) { |
844 | /* | 953 | /* |
845 | * Store the pfn of the page in a special migration | 954 | * Store the pfn of the page in a special migration |
@@ -857,7 +966,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
857 | entry = make_migration_entry(page, pte_write(pteval)); | 966 | entry = make_migration_entry(page, pte_write(pteval)); |
858 | set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); | 967 | set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); |
859 | } else | 968 | } else |
860 | dec_mm_counter(mm, file_rss); | 969 | dec_mm_counter(mm, MM_FILEPAGES); |
861 | 970 | ||
862 | page_remove_rmap(page); | 971 | page_remove_rmap(page); |
863 | page_cache_release(page); | 972 | page_cache_release(page); |
@@ -996,7 +1105,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, | |||
996 | 1105 | ||
997 | page_remove_rmap(page); | 1106 | page_remove_rmap(page); |
998 | page_cache_release(page); | 1107 | page_cache_release(page); |
999 | dec_mm_counter(mm, file_rss); | 1108 | dec_mm_counter(mm, MM_FILEPAGES); |
1000 | (*mapcount)--; | 1109 | (*mapcount)--; |
1001 | } | 1110 | } |
1002 | pte_unmap_unlock(pte - 1, ptl); | 1111 | pte_unmap_unlock(pte - 1, ptl); |
@@ -1024,14 +1133,15 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, | |||
1024 | static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) | 1133 | static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) |
1025 | { | 1134 | { |
1026 | struct anon_vma *anon_vma; | 1135 | struct anon_vma *anon_vma; |
1027 | struct vm_area_struct *vma; | 1136 | struct anon_vma_chain *avc; |
1028 | int ret = SWAP_AGAIN; | 1137 | int ret = SWAP_AGAIN; |
1029 | 1138 | ||
1030 | anon_vma = page_lock_anon_vma(page); | 1139 | anon_vma = page_lock_anon_vma(page); |
1031 | if (!anon_vma) | 1140 | if (!anon_vma) |
1032 | return ret; | 1141 | return ret; |
1033 | 1142 | ||
1034 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 1143 | list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { |
1144 | struct vm_area_struct *vma = avc->vma; | ||
1035 | unsigned long address = vma_address(page, vma); | 1145 | unsigned long address = vma_address(page, vma); |
1036 | if (address == -EFAULT) | 1146 | if (address == -EFAULT) |
1037 | continue; | 1147 | continue; |
@@ -1222,7 +1332,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, | |||
1222 | struct vm_area_struct *, unsigned long, void *), void *arg) | 1332 | struct vm_area_struct *, unsigned long, void *), void *arg) |
1223 | { | 1333 | { |
1224 | struct anon_vma *anon_vma; | 1334 | struct anon_vma *anon_vma; |
1225 | struct vm_area_struct *vma; | 1335 | struct anon_vma_chain *avc; |
1226 | int ret = SWAP_AGAIN; | 1336 | int ret = SWAP_AGAIN; |
1227 | 1337 | ||
1228 | /* | 1338 | /* |
@@ -1237,7 +1347,8 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, | |||
1237 | if (!anon_vma) | 1347 | if (!anon_vma) |
1238 | return ret; | 1348 | return ret; |
1239 | spin_lock(&anon_vma->lock); | 1349 | spin_lock(&anon_vma->lock); |
1240 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 1350 | list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { |
1351 | struct vm_area_struct *vma = avc->vma; | ||
1241 | unsigned long address = vma_address(page, vma); | 1352 | unsigned long address = vma_address(page, vma); |
1242 | if (address == -EFAULT) | 1353 | if (address == -EFAULT) |
1243 | continue; | 1354 | continue; |