diff options
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 156 |
1 files changed, 130 insertions, 26 deletions
@@ -62,6 +62,7 @@ | |||
62 | #include "internal.h" | 62 | #include "internal.h" |
63 | 63 | ||
64 | static struct kmem_cache *anon_vma_cachep; | 64 | static struct kmem_cache *anon_vma_cachep; |
65 | static struct kmem_cache *anon_vma_chain_cachep; | ||
65 | 66 | ||
66 | static inline struct anon_vma *anon_vma_alloc(void) | 67 | static inline struct anon_vma *anon_vma_alloc(void) |
67 | { | 68 | { |
@@ -73,6 +74,16 @@ void anon_vma_free(struct anon_vma *anon_vma) | |||
73 | kmem_cache_free(anon_vma_cachep, anon_vma); | 74 | kmem_cache_free(anon_vma_cachep, anon_vma); |
74 | } | 75 | } |
75 | 76 | ||
77 | static inline struct anon_vma_chain *anon_vma_chain_alloc(void) | ||
78 | { | ||
79 | return kmem_cache_alloc(anon_vma_chain_cachep, GFP_KERNEL); | ||
80 | } | ||
81 | |||
82 | void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain) | ||
83 | { | ||
84 | kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain); | ||
85 | } | ||
86 | |||
76 | /** | 87 | /** |
77 | * anon_vma_prepare - attach an anon_vma to a memory region | 88 | * anon_vma_prepare - attach an anon_vma to a memory region |
78 | * @vma: the memory region in question | 89 | * @vma: the memory region in question |
@@ -103,18 +114,23 @@ void anon_vma_free(struct anon_vma *anon_vma) | |||
103 | int anon_vma_prepare(struct vm_area_struct *vma) | 114 | int anon_vma_prepare(struct vm_area_struct *vma) |
104 | { | 115 | { |
105 | struct anon_vma *anon_vma = vma->anon_vma; | 116 | struct anon_vma *anon_vma = vma->anon_vma; |
117 | struct anon_vma_chain *avc; | ||
106 | 118 | ||
107 | might_sleep(); | 119 | might_sleep(); |
108 | if (unlikely(!anon_vma)) { | 120 | if (unlikely(!anon_vma)) { |
109 | struct mm_struct *mm = vma->vm_mm; | 121 | struct mm_struct *mm = vma->vm_mm; |
110 | struct anon_vma *allocated; | 122 | struct anon_vma *allocated; |
111 | 123 | ||
124 | avc = anon_vma_chain_alloc(); | ||
125 | if (!avc) | ||
126 | goto out_enomem; | ||
127 | |||
112 | anon_vma = find_mergeable_anon_vma(vma); | 128 | anon_vma = find_mergeable_anon_vma(vma); |
113 | allocated = NULL; | 129 | allocated = NULL; |
114 | if (!anon_vma) { | 130 | if (!anon_vma) { |
115 | anon_vma = anon_vma_alloc(); | 131 | anon_vma = anon_vma_alloc(); |
116 | if (unlikely(!anon_vma)) | 132 | if (unlikely(!anon_vma)) |
117 | return -ENOMEM; | 133 | goto out_enomem_free_avc; |
118 | allocated = anon_vma; | 134 | allocated = anon_vma; |
119 | } | 135 | } |
120 | spin_lock(&anon_vma->lock); | 136 | spin_lock(&anon_vma->lock); |
@@ -123,53 +139,113 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
123 | spin_lock(&mm->page_table_lock); | 139 | spin_lock(&mm->page_table_lock); |
124 | if (likely(!vma->anon_vma)) { | 140 | if (likely(!vma->anon_vma)) { |
125 | vma->anon_vma = anon_vma; | 141 | vma->anon_vma = anon_vma; |
126 | list_add_tail(&vma->anon_vma_node, &anon_vma->head); | 142 | avc->anon_vma = anon_vma; |
143 | avc->vma = vma; | ||
144 | list_add(&avc->same_vma, &vma->anon_vma_chain); | ||
145 | list_add(&avc->same_anon_vma, &anon_vma->head); | ||
127 | allocated = NULL; | 146 | allocated = NULL; |
128 | } | 147 | } |
129 | spin_unlock(&mm->page_table_lock); | 148 | spin_unlock(&mm->page_table_lock); |
130 | 149 | ||
131 | spin_unlock(&anon_vma->lock); | 150 | spin_unlock(&anon_vma->lock); |
132 | if (unlikely(allocated)) | 151 | if (unlikely(allocated)) { |
133 | anon_vma_free(allocated); | 152 | anon_vma_free(allocated); |
153 | anon_vma_chain_free(avc); | ||
154 | } | ||
134 | } | 155 | } |
135 | return 0; | 156 | return 0; |
157 | |||
158 | out_enomem_free_avc: | ||
159 | anon_vma_chain_free(avc); | ||
160 | out_enomem: | ||
161 | return -ENOMEM; | ||
136 | } | 162 | } |
137 | 163 | ||
138 | void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next) | 164 | static void anon_vma_chain_link(struct vm_area_struct *vma, |
165 | struct anon_vma_chain *avc, | ||
166 | struct anon_vma *anon_vma) | ||
139 | { | 167 | { |
140 | BUG_ON(vma->anon_vma != next->anon_vma); | 168 | avc->vma = vma; |
141 | list_del(&next->anon_vma_node); | 169 | avc->anon_vma = anon_vma; |
170 | list_add(&avc->same_vma, &vma->anon_vma_chain); | ||
171 | |||
172 | spin_lock(&anon_vma->lock); | ||
173 | list_add_tail(&avc->same_anon_vma, &anon_vma->head); | ||
174 | spin_unlock(&anon_vma->lock); | ||
142 | } | 175 | } |
143 | 176 | ||
144 | void __anon_vma_link(struct vm_area_struct *vma) | 177 | /* |
178 | * Attach the anon_vmas from src to dst. | ||
179 | * Returns 0 on success, -ENOMEM on failure. | ||
180 | */ | ||
181 | int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) | ||
145 | { | 182 | { |
146 | struct anon_vma *anon_vma = vma->anon_vma; | 183 | struct anon_vma_chain *avc, *pavc; |
184 | |||
185 | list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) { | ||
186 | avc = anon_vma_chain_alloc(); | ||
187 | if (!avc) | ||
188 | goto enomem_failure; | ||
189 | anon_vma_chain_link(dst, avc, pavc->anon_vma); | ||
190 | } | ||
191 | return 0; | ||
147 | 192 | ||
148 | if (anon_vma) | 193 | enomem_failure: |
149 | list_add_tail(&vma->anon_vma_node, &anon_vma->head); | 194 | unlink_anon_vmas(dst); |
195 | return -ENOMEM; | ||
150 | } | 196 | } |
151 | 197 | ||
152 | void anon_vma_link(struct vm_area_struct *vma) | 198 | /* |
199 | * Attach vma to its own anon_vma, as well as to the anon_vmas that | ||
200 | * the corresponding VMA in the parent process is attached to. | ||
201 | * Returns 0 on success, non-zero on failure. | ||
202 | */ | ||
203 | int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) | ||
153 | { | 204 | { |
154 | struct anon_vma *anon_vma = vma->anon_vma; | 205 | struct anon_vma_chain *avc; |
206 | struct anon_vma *anon_vma; | ||
155 | 207 | ||
156 | if (anon_vma) { | 208 | /* Don't bother if the parent process has no anon_vma here. */ |
157 | spin_lock(&anon_vma->lock); | 209 | if (!pvma->anon_vma) |
158 | list_add_tail(&vma->anon_vma_node, &anon_vma->head); | 210 | return 0; |
159 | spin_unlock(&anon_vma->lock); | 211 | |
160 | } | 212 | /* |
213 | * First, attach the new VMA to the parent VMA's anon_vmas, | ||
214 | * so rmap can find non-COWed pages in child processes. | ||
215 | */ | ||
216 | if (anon_vma_clone(vma, pvma)) | ||
217 | return -ENOMEM; | ||
218 | |||
219 | /* Then add our own anon_vma. */ | ||
220 | anon_vma = anon_vma_alloc(); | ||
221 | if (!anon_vma) | ||
222 | goto out_error; | ||
223 | avc = anon_vma_chain_alloc(); | ||
224 | if (!avc) | ||
225 | goto out_error_free_anon_vma; | ||
226 | anon_vma_chain_link(vma, avc, anon_vma); | ||
227 | /* Mark this anon_vma as the one where our new (COWed) pages go. */ | ||
228 | vma->anon_vma = anon_vma; | ||
229 | |||
230 | return 0; | ||
231 | |||
232 | out_error_free_anon_vma: | ||
233 | anon_vma_free(anon_vma); | ||
234 | out_error: | ||
235 | return -ENOMEM; | ||
161 | } | 236 | } |
162 | 237 | ||
163 | void anon_vma_unlink(struct vm_area_struct *vma) | 238 | static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain) |
164 | { | 239 | { |
165 | struct anon_vma *anon_vma = vma->anon_vma; | 240 | struct anon_vma *anon_vma = anon_vma_chain->anon_vma; |
166 | int empty; | 241 | int empty; |
167 | 242 | ||
243 | /* If anon_vma_fork fails, we can get an empty anon_vma_chain. */ | ||
168 | if (!anon_vma) | 244 | if (!anon_vma) |
169 | return; | 245 | return; |
170 | 246 | ||
171 | spin_lock(&anon_vma->lock); | 247 | spin_lock(&anon_vma->lock); |
172 | list_del(&vma->anon_vma_node); | 248 | list_del(&anon_vma_chain->same_anon_vma); |
173 | 249 | ||
174 | /* We must garbage collect the anon_vma if it's empty */ | 250 | /* We must garbage collect the anon_vma if it's empty */ |
175 | empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma); | 251 | empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma); |
@@ -179,6 +255,18 @@ void anon_vma_unlink(struct vm_area_struct *vma) | |||
179 | anon_vma_free(anon_vma); | 255 | anon_vma_free(anon_vma); |
180 | } | 256 | } |
181 | 257 | ||
258 | void unlink_anon_vmas(struct vm_area_struct *vma) | ||
259 | { | ||
260 | struct anon_vma_chain *avc, *next; | ||
261 | |||
262 | /* Unlink each anon_vma chained to the VMA. */ | ||
263 | list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { | ||
264 | anon_vma_unlink(avc); | ||
265 | list_del(&avc->same_vma); | ||
266 | anon_vma_chain_free(avc); | ||
267 | } | ||
268 | } | ||
269 | |||
182 | static void anon_vma_ctor(void *data) | 270 | static void anon_vma_ctor(void *data) |
183 | { | 271 | { |
184 | struct anon_vma *anon_vma = data; | 272 | struct anon_vma *anon_vma = data; |
@@ -192,6 +280,7 @@ void __init anon_vma_init(void) | |||
192 | { | 280 | { |
193 | anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), | 281 | anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), |
194 | 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor); | 282 | 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor); |
283 | anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC); | ||
195 | } | 284 | } |
196 | 285 | ||
197 | /* | 286 | /* |
@@ -240,6 +329,18 @@ vma_address(struct page *page, struct vm_area_struct *vma) | |||
240 | /* page should be within @vma mapping range */ | 329 | /* page should be within @vma mapping range */ |
241 | return -EFAULT; | 330 | return -EFAULT; |
242 | } | 331 | } |
332 | if (unlikely(vma->vm_flags & VM_LOCK_RMAP)) { | ||
333 | /* | ||
334 | * This VMA is being unlinked or is not yet linked into the | ||
335 | * VMA tree. Do not try to follow this rmap. This race | ||
336 | * condition can result in page_referenced() ignoring a | ||
337 | * reference or in try_to_unmap() failing to unmap a page. | ||
338 | * The VMA cannot be freed under us because we hold the | ||
339 | * anon_vma->lock, which the munmap code takes while | ||
340 | * unlinking the anon_vmas from the VMA. | ||
341 | */ | ||
342 | return -EFAULT; | ||
343 | } | ||
243 | return address; | 344 | return address; |
244 | } | 345 | } |
245 | 346 | ||
@@ -396,7 +497,7 @@ static int page_referenced_anon(struct page *page, | |||
396 | { | 497 | { |
397 | unsigned int mapcount; | 498 | unsigned int mapcount; |
398 | struct anon_vma *anon_vma; | 499 | struct anon_vma *anon_vma; |
399 | struct vm_area_struct *vma; | 500 | struct anon_vma_chain *avc; |
400 | int referenced = 0; | 501 | int referenced = 0; |
401 | 502 | ||
402 | anon_vma = page_lock_anon_vma(page); | 503 | anon_vma = page_lock_anon_vma(page); |
@@ -404,7 +505,8 @@ static int page_referenced_anon(struct page *page, | |||
404 | return referenced; | 505 | return referenced; |
405 | 506 | ||
406 | mapcount = page_mapcount(page); | 507 | mapcount = page_mapcount(page); |
407 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 508 | list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { |
509 | struct vm_area_struct *vma = avc->vma; | ||
408 | unsigned long address = vma_address(page, vma); | 510 | unsigned long address = vma_address(page, vma); |
409 | if (address == -EFAULT) | 511 | if (address == -EFAULT) |
410 | continue; | 512 | continue; |
@@ -1025,14 +1127,15 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, | |||
1025 | static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) | 1127 | static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) |
1026 | { | 1128 | { |
1027 | struct anon_vma *anon_vma; | 1129 | struct anon_vma *anon_vma; |
1028 | struct vm_area_struct *vma; | 1130 | struct anon_vma_chain *avc; |
1029 | int ret = SWAP_AGAIN; | 1131 | int ret = SWAP_AGAIN; |
1030 | 1132 | ||
1031 | anon_vma = page_lock_anon_vma(page); | 1133 | anon_vma = page_lock_anon_vma(page); |
1032 | if (!anon_vma) | 1134 | if (!anon_vma) |
1033 | return ret; | 1135 | return ret; |
1034 | 1136 | ||
1035 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 1137 | list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { |
1138 | struct vm_area_struct *vma = avc->vma; | ||
1036 | unsigned long address = vma_address(page, vma); | 1139 | unsigned long address = vma_address(page, vma); |
1037 | if (address == -EFAULT) | 1140 | if (address == -EFAULT) |
1038 | continue; | 1141 | continue; |
@@ -1223,7 +1326,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, | |||
1223 | struct vm_area_struct *, unsigned long, void *), void *arg) | 1326 | struct vm_area_struct *, unsigned long, void *), void *arg) |
1224 | { | 1327 | { |
1225 | struct anon_vma *anon_vma; | 1328 | struct anon_vma *anon_vma; |
1226 | struct vm_area_struct *vma; | 1329 | struct anon_vma_chain *avc; |
1227 | int ret = SWAP_AGAIN; | 1330 | int ret = SWAP_AGAIN; |
1228 | 1331 | ||
1229 | /* | 1332 | /* |
@@ -1238,7 +1341,8 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, | |||
1238 | if (!anon_vma) | 1341 | if (!anon_vma) |
1239 | return ret; | 1342 | return ret; |
1240 | spin_lock(&anon_vma->lock); | 1343 | spin_lock(&anon_vma->lock); |
1241 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 1344 | list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { |
1345 | struct vm_area_struct *vma = avc->vma; | ||
1242 | unsigned long address = vma_address(page, vma); | 1346 | unsigned long address = vma_address(page, vma); |
1243 | if (address == -EFAULT) | 1347 | if (address == -EFAULT) |
1244 | continue; | 1348 | continue; |