aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-06-16 23:44:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-06-17 22:20:49 -0400
commitbb4aa39676f73b4657b3edd893ae83881c430c0c (patch)
tree6b8db9ed4a9e3fb6c232dd8447b0d24e76f5885a
parenteb96c925152fc289311e5d7e956b919e9b60ab53 (diff)
mm: avoid repeated anon_vma lock/unlock sequences in anon_vma_clone()
In anon_vma_clone() we traverse the vma->anon_vma_chain of the source vma, locking the anon_vma for each entry. But they are all going to have the same root entry, which means that we're locking and unlocking the same lock over and over again. Which is expensive in locked operations, but can get _really_ expensive when that root entry sees any kind of lock contention. In fact, Tim Chen reports a big performance regression due to this: when we switched to use a mutex instead of a spinlock, the contention case gets much worse. So to alleviate this all, this commit creates a small helper function (lock_anon_vma_root()) that can be used to take the lock just once rather than taking and releasing it over and over again. We still have the same "take the lock and release" it behavior in the exit path (in unlink_anon_vmas()), but that one is a bit harder to fix since we're actually freeing the anon_vma entries as we go, and that will touch the lock too. Reported-and-tested-by: Tim Chen <tim.c.chen@linux.intel.com> Tested-by: Hugh Dickins <hughd@google.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Andi Kleen <ak@linux.intel.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/rmap.c39
1 files changed, 36 insertions, 3 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 0eb463ea88dd..f286697c61dc 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -200,6 +200,32 @@ int anon_vma_prepare(struct vm_area_struct *vma)
200 return -ENOMEM; 200 return -ENOMEM;
201} 201}
202 202
203/*
204 * This is a useful helper function for locking the anon_vma root as
205 * we traverse the vma->anon_vma_chain, looping over anon_vma's that
206 * have the same vma.
207 *
208 * Such anon_vma's should have the same root, so you'd expect to see
209 * just a single mutex_lock for the whole traversal.
210 */
211static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
212{
213 struct anon_vma *new_root = anon_vma->root;
214 if (new_root != root) {
215 if (WARN_ON_ONCE(root))
216 mutex_unlock(&root->mutex);
217 root = new_root;
218 mutex_lock(&root->mutex);
219 }
220 return root;
221}
222
223static inline void unlock_anon_vma_root(struct anon_vma *root)
224{
225 if (root)
226 mutex_unlock(&root->mutex);
227}
228
203static void anon_vma_chain_link(struct vm_area_struct *vma, 229static void anon_vma_chain_link(struct vm_area_struct *vma,
204 struct anon_vma_chain *avc, 230 struct anon_vma_chain *avc,
205 struct anon_vma *anon_vma) 231 struct anon_vma *anon_vma)
@@ -208,13 +234,11 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
208 avc->anon_vma = anon_vma; 234 avc->anon_vma = anon_vma;
209 list_add(&avc->same_vma, &vma->anon_vma_chain); 235 list_add(&avc->same_vma, &vma->anon_vma_chain);
210 236
211 anon_vma_lock(anon_vma);
212 /* 237 /*
213 * It's critical to add new vmas to the tail of the anon_vma, 238 * It's critical to add new vmas to the tail of the anon_vma,
214 * see comment in huge_memory.c:__split_huge_page(). 239 * see comment in huge_memory.c:__split_huge_page().
215 */ 240 */
216 list_add_tail(&avc->same_anon_vma, &anon_vma->head); 241 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
217 anon_vma_unlock(anon_vma);
218} 242}
219 243
220/* 244/*
@@ -224,16 +248,23 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
224int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) 248int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
225{ 249{
226 struct anon_vma_chain *avc, *pavc; 250 struct anon_vma_chain *avc, *pavc;
251 struct anon_vma *root = NULL;
227 252
228 list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) { 253 list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
254 struct anon_vma *anon_vma;
255
229 avc = anon_vma_chain_alloc(); 256 avc = anon_vma_chain_alloc();
230 if (!avc) 257 if (!avc)
231 goto enomem_failure; 258 goto enomem_failure;
232 anon_vma_chain_link(dst, avc, pavc->anon_vma); 259 anon_vma = pavc->anon_vma;
260 root = lock_anon_vma_root(root, anon_vma);
261 anon_vma_chain_link(dst, avc, anon_vma);
233 } 262 }
263 unlock_anon_vma_root(root);
234 return 0; 264 return 0;
235 265
236 enomem_failure: 266 enomem_failure:
267 unlock_anon_vma_root(root);
237 unlink_anon_vmas(dst); 268 unlink_anon_vmas(dst);
238 return -ENOMEM; 269 return -ENOMEM;
239} 270}
@@ -280,7 +311,9 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
280 get_anon_vma(anon_vma->root); 311 get_anon_vma(anon_vma->root);
281 /* Mark this anon_vma as the one where our new (COWed) pages go. */ 312 /* Mark this anon_vma as the one where our new (COWed) pages go. */
282 vma->anon_vma = anon_vma; 313 vma->anon_vma = anon_vma;
314 anon_vma_lock(anon_vma);
283 anon_vma_chain_link(vma, avc, anon_vma); 315 anon_vma_chain_link(vma, avc, anon_vma);
316 anon_vma_unlock(anon_vma);
284 317
285 return 0; 318 return 0;
286 319