diff options
author | Ingo Molnar <mingo@kernel.org> | 2012-12-02 14:56:50 -0500 |
---|---|---|
committer | Mel Gorman <mgorman@suse.de> | 2012-12-11 09:43:00 -0500 |
commit | 4fc3f1d66b1ef0d7b8dc11f4ff1cc510f78b37d6 (patch) | |
tree | 90baaa56f6b5244525c7637a14550f830486112a /include | |
parent | 5a505085f043e8380f83610f79642853c051e2f1 (diff) |
mm/rmap, migration: Make rmap_walk_anon() and try_to_unmap_anon() more scalable
rmap_walk_anon() and try_to_unmap_anon() appears to be too
careful about locking the anon vma: while it needs protection
against anon vma list modifications, it does not need exclusive
access to the list itself.
Transforming this exclusive lock to a read-locked rwsem removes
a global lock from the hot path of page-migration intense
threaded workloads which can cause pathological performance like
this:
96.43% process 0 [kernel.kallsyms] [k] perf_trace_sched_switch
|
--- perf_trace_sched_switch
__schedule
schedule
schedule_preempt_disabled
__mutex_lock_common.isra.6
__mutex_lock_slowpath
mutex_lock
|
|--50.61%-- rmap_walk
| move_to_new_page
| migrate_pages
| migrate_misplaced_page
| __do_numa_page.isra.69
| handle_pte_fault
| handle_mm_fault
| __do_page_fault
| do_page_fault
| page_fault
| __memset_sse2
| |
| --100.00%-- worker_thread
| |
| --100.00%-- start_thread
|
--49.39%-- page_lock_anon_vma
try_to_unmap_anon
try_to_unmap
migrate_pages
migrate_misplaced_page
__do_numa_page.isra.69
handle_pte_fault
handle_mm_fault
__do_page_fault
do_page_fault
page_fault
__memset_sse2
|
--100.00%-- worker_thread
start_thread
With this change applied the profile is now nicely flat
and there's no anon-vma related scheduling/blocking.
Rename anon_vma_[un]lock() => anon_vma_[un]lock_write(),
to make it clearer that it's an exclusive write-lock in
that case - suggested by Rik van Riel.
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Turner <pjt@google.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/huge_mm.h | 2 | ||||
-rw-r--r-- | include/linux/rmap.h | 17 |
2 files changed, 15 insertions, 4 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 027ad04ef3a8..0d1208c0bdc4 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h | |||
@@ -102,7 +102,7 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd); | |||
102 | #define wait_split_huge_page(__anon_vma, __pmd) \ | 102 | #define wait_split_huge_page(__anon_vma, __pmd) \ |
103 | do { \ | 103 | do { \ |
104 | pmd_t *____pmd = (__pmd); \ | 104 | pmd_t *____pmd = (__pmd); \ |
105 | anon_vma_lock(__anon_vma); \ | 105 | anon_vma_lock_write(__anon_vma); \ |
106 | anon_vma_unlock(__anon_vma); \ | 106 | anon_vma_unlock(__anon_vma); \ |
107 | BUG_ON(pmd_trans_splitting(*____pmd) || \ | 107 | BUG_ON(pmd_trans_splitting(*____pmd) || \ |
108 | pmd_trans_huge(*____pmd)); \ | 108 | pmd_trans_huge(*____pmd)); \ |
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index f3f41d242e25..c20635c527a9 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
@@ -118,7 +118,7 @@ static inline void vma_unlock_anon_vma(struct vm_area_struct *vma) | |||
118 | up_write(&anon_vma->root->rwsem); | 118 | up_write(&anon_vma->root->rwsem); |
119 | } | 119 | } |
120 | 120 | ||
121 | static inline void anon_vma_lock(struct anon_vma *anon_vma) | 121 | static inline void anon_vma_lock_write(struct anon_vma *anon_vma) |
122 | { | 122 | { |
123 | down_write(&anon_vma->root->rwsem); | 123 | down_write(&anon_vma->root->rwsem); |
124 | } | 124 | } |
@@ -128,6 +128,17 @@ static inline void anon_vma_unlock(struct anon_vma *anon_vma) | |||
128 | up_write(&anon_vma->root->rwsem); | 128 | up_write(&anon_vma->root->rwsem); |
129 | } | 129 | } |
130 | 130 | ||
131 | static inline void anon_vma_lock_read(struct anon_vma *anon_vma) | ||
132 | { | ||
133 | down_read(&anon_vma->root->rwsem); | ||
134 | } | ||
135 | |||
136 | static inline void anon_vma_unlock_read(struct anon_vma *anon_vma) | ||
137 | { | ||
138 | up_read(&anon_vma->root->rwsem); | ||
139 | } | ||
140 | |||
141 | |||
131 | /* | 142 | /* |
132 | * anon_vma helper functions. | 143 | * anon_vma helper functions. |
133 | */ | 144 | */ |
@@ -220,8 +231,8 @@ int try_to_munlock(struct page *); | |||
220 | /* | 231 | /* |
221 | * Called by memory-failure.c to kill processes. | 232 | * Called by memory-failure.c to kill processes. |
222 | */ | 233 | */ |
223 | struct anon_vma *page_lock_anon_vma(struct page *page); | 234 | struct anon_vma *page_lock_anon_vma_read(struct page *page); |
224 | void page_unlock_anon_vma(struct anon_vma *anon_vma); | 235 | void page_unlock_anon_vma_read(struct anon_vma *anon_vma); |
225 | int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); | 236 | int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); |
226 | 237 | ||
227 | /* | 238 | /* |