mm: avoid repeated anon_vma lock/unlock sequences in anon_vma_clone()

In anon_vma_clone() we traverse the vma->anon_vma_chain of the source vma, locking the anon_vma for each entry. But they are all going to have the same root entry, which means that we're locking and unlocking the same lock over and over again. Which is expensive in locked operations, but can get _really_ expensive when that root entry sees any kind of lock contention. In fact, Tim Chen reports a big performance regression due to this: when we switched to use a mutex instead of a spinlock, the contention case gets much worse. So to alleviate this all, this commit creates a small helper function (lock_anon_vma_root()) that can be used to take the lock just once rather than taking and releasing it over and over again. We still have the same "take the lock and release" it behavior in the exit path (in unlink_anon_vmas()), but that one is a bit harder to fix since we're actually freeing the anon_vma entries as we go, and that will touch the lock too. Reported-and-tested-by: Tim Chen <tim.c.chen@linux.intel.com> Tested-by: Hugh Dickins <hughd@google.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Andi Kleen <ak@linux.intel.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Linus Torvalds <torvalds@linux-foundation.org> 2011-06-16 23:44:51 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2011-06-17 22:20:49 -0400
commit: bb4aa39676f73b4657b3edd893ae83881c430c0c (patch)
tree: 6b8db9ed4a9e3fb6c232dd8447b0d24e76f5885a /mm/rmap.c
parent: eb96c925152fc289311e5d7e956b919e9b60ab53 (diff)
1 files changed, 36 insertions, 3 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 0eb463ea88dd..f286697c61dc 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -200,6 +200,32 @@ int anon_vma_prepare(struct vm_area_struct *vma)
        return -ENOMEM;
 }
+/*
+ * This is a useful helper function for locking the anon_vma root as
+ * we traverse the vma->anon_vma_chain, looping over anon_vma's that
+ * have the same vma.
+ *
+ * Such anon_vma's should have the same root, so you'd expect to see
+ * just a single mutex_lock for the whole traversal.
+ */
+static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
+{
+        struct anon_vma *new_root = anon_vma->root;
+        if (new_root != root) {
+                if (WARN_ON_ONCE(root))
+                        mutex_unlock(&root->mutex);
+                root = new_root;
+                mutex_lock(&root->mutex);
+        }
+        return root;
+}
+static inline void unlock_anon_vma_root(struct anon_vma *root)
+{
+        if (root)
+                mutex_unlock(&root->mutex);
+}
 static void anon_vma_chain_link(struct vm_area_struct *vma,
                                struct anon_vma_chain *avc,
                                struct anon_vma *anon_vma)
@@ -208,13 +234,11 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
        avc->anon_vma = anon_vma;
        list_add(&avc->same_vma, &vma->anon_vma_chain);
-        anon_vma_lock(anon_vma);
        /*
         * It's critical to add new vmas to the tail of the anon_vma,
         * see comment in huge_memory.c:__split_huge_page().
         */
        list_add_tail(&avc->same_anon_vma, &anon_vma->head);
-        anon_vma_unlock(anon_vma);
 }
 /*
@@ -224,16 +248,23 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
 int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
 {
        struct anon_vma_chain *avc, *pavc;
+        struct anon_vma *root = NULL;
        list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
+                struct anon_vma *anon_vma;
                avc = anon_vma_chain_alloc();
                if (!avc)
                        goto enomem_failure;
-                anon_vma_chain_link(dst, avc, pavc->anon_vma);
+                anon_vma = pavc->anon_vma;
+                root = lock_anon_vma_root(root, anon_vma);
+                anon_vma_chain_link(dst, avc, anon_vma);
        }
+        unlock_anon_vma_root(root);
        return 0;
 enomem_failure:
+        unlock_anon_vma_root(root);
        unlink_anon_vmas(dst);
        return -ENOMEM;
 }
@@ -280,7 +311,9 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
        get_anon_vma(anon_vma->root);
        /* Mark this anon_vma as the one where our new (COWed) pages go. */
        vma->anon_vma = anon_vma;
+        anon_vma_lock(anon_vma);
        anon_vma_chain_link(vma, avc, anon_vma);
+        anon_vma_unlock(anon_vma);
        return 0;
author	Linus Torvalds <torvalds@linux-foundation.org>	2011-06-16 23:44:51 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2011-06-17 22:20:49 -0400
commit	bb4aa39676f73b4657b3edd893ae83881c430c0c (patch)
tree	6b8db9ed4a9e3fb6c232dd8447b0d24e76f5885a /mm/rmap.c
parent	eb96c925152fc289311e5d7e956b919e9b60ab53 (diff)