aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mlock.c
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2013-09-11 17:22:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-11 18:58:00 -0400
commit7225522bb429a2f7dae6667e533e2d735b4882d0 (patch)
treebea75219dc36159322ddcd6de8dc4a4d5e1f6c79 /mm/mlock.c
parent586a32ac1d33ce7a7548a27e4087e98842c3a06f (diff)
mm: munlock: batch non-THP page isolation and munlock+putback using pagevec
Currently, munlock_vma_range() calls munlock_vma_page on each page in a loop, which results in repeated taking and releasing of the lru_lock spinlock for isolating pages one by one. This patch batches the munlock operations using an on-stack pagevec, so that isolation is done under single lru_lock. For THP pages, the old behavior is preserved as they might be split while putting them into the pagevec. After this patch, a 9% speedup was measured for munlocking a 56GB large memory area with THP disabled. A new function __munlock_pagevec() is introduced that takes a pagevec and: 1) It clears PageMlocked and isolates all pages under lru_lock. Zone page stats can be also updated using the variant which assumes disabled interrupts. 2) It finishes the munlock and lru putback on all pages under their lock_page. Note that previously, lock_page covered also the PageMlocked clearing and page isolation, but it is not needed for those operations. Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Jörn Engel <joern@logfs.org> Acked-by: Mel Gorman <mgorman@suse.de> Cc: Michel Lespinasse <walken@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mlock.c')
-rw-r--r--mm/mlock.c196
1 files changed, 156 insertions, 40 deletions
diff --git a/mm/mlock.c b/mm/mlock.c
index b85f1e827610..b3b4a78b7802 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -11,6 +11,7 @@
11#include <linux/swap.h> 11#include <linux/swap.h>
12#include <linux/swapops.h> 12#include <linux/swapops.h>
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/pagevec.h>
14#include <linux/mempolicy.h> 15#include <linux/mempolicy.h>
15#include <linux/syscalls.h> 16#include <linux/syscalls.h>
16#include <linux/sched.h> 17#include <linux/sched.h>
@@ -18,6 +19,8 @@
18#include <linux/rmap.h> 19#include <linux/rmap.h>
19#include <linux/mmzone.h> 20#include <linux/mmzone.h>
20#include <linux/hugetlb.h> 21#include <linux/hugetlb.h>
22#include <linux/memcontrol.h>
23#include <linux/mm_inline.h>
21 24
22#include "internal.h" 25#include "internal.h"
23 26
@@ -87,6 +90,47 @@ void mlock_vma_page(struct page *page)
87 } 90 }
88} 91}
89 92
93/*
94 * Finish munlock after successful page isolation
95 *
96 * Page must be locked. This is a wrapper for try_to_munlock()
97 * and putback_lru_page() with munlock accounting.
98 */
99static void __munlock_isolated_page(struct page *page)
100{
101 int ret = SWAP_AGAIN;
102
103 /*
104 * Optimization: if the page was mapped just once, that's our mapping
105 * and we don't need to check all the other vmas.
106 */
107 if (page_mapcount(page) > 1)
108 ret = try_to_munlock(page);
109
110 /* Did try_to_unlock() succeed or punt? */
111 if (ret != SWAP_MLOCK)
112 count_vm_event(UNEVICTABLE_PGMUNLOCKED);
113
114 putback_lru_page(page);
115}
116
117/*
118 * Accounting for page isolation fail during munlock
119 *
120 * Performs accounting when page isolation fails in munlock. There is nothing
121 * else to do because it means some other task has already removed the page
122 * from the LRU. putback_lru_page() will take care of removing the page from
123 * the unevictable list, if necessary. vmscan [page_referenced()] will move
124 * the page back to the unevictable list if some other vma has it mlocked.
125 */
126static void __munlock_isolation_failed(struct page *page)
127{
128 if (PageUnevictable(page))
129 count_vm_event(UNEVICTABLE_PGSTRANDED);
130 else
131 count_vm_event(UNEVICTABLE_PGMUNLOCKED);
132}
133
90/** 134/**
91 * munlock_vma_page - munlock a vma page 135 * munlock_vma_page - munlock a vma page
92 * @page - page to be unlocked 136 * @page - page to be unlocked
@@ -112,37 +156,10 @@ unsigned int munlock_vma_page(struct page *page)
112 unsigned int nr_pages = hpage_nr_pages(page); 156 unsigned int nr_pages = hpage_nr_pages(page);
113 mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); 157 mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
114 page_mask = nr_pages - 1; 158 page_mask = nr_pages - 1;
115 if (!isolate_lru_page(page)) { 159 if (!isolate_lru_page(page))
116 int ret = SWAP_AGAIN; 160 __munlock_isolated_page(page);
117 161 else
118 /* 162 __munlock_isolation_failed(page);
119 * Optimization: if the page was mapped just once,
120 * that's our mapping and we don't need to check all the
121 * other vmas.
122 */
123 if (page_mapcount(page) > 1)
124 ret = try_to_munlock(page);
125 /*
126 * did try_to_unlock() succeed or punt?
127 */
128 if (ret != SWAP_MLOCK)
129 count_vm_event(UNEVICTABLE_PGMUNLOCKED);
130
131 putback_lru_page(page);
132 } else {
133 /*
134 * Some other task has removed the page from the LRU.
135 * putback_lru_page() will take care of removing the
136 * page from the unevictable list, if necessary.
137 * vmscan [page_referenced()] will move the page back
138 * to the unevictable list if some other vma has it
139 * mlocked.
140 */
141 if (PageUnevictable(page))
142 count_vm_event(UNEVICTABLE_PGSTRANDED);
143 else
144 count_vm_event(UNEVICTABLE_PGMUNLOCKED);
145 }
146 } 163 }
147 164
148 return page_mask; 165 return page_mask;
@@ -210,6 +227,73 @@ static int __mlock_posix_error_return(long retval)
210} 227}
211 228
212/* 229/*
230 * Munlock a batch of pages from the same zone
231 *
232 * The work is split to two main phases. First phase clears the Mlocked flag
233 * and attempts to isolate the pages, all under a single zone lru lock.
234 * The second phase finishes the munlock only for pages where isolation
235 * succeeded.
236 *
237 * Note that pvec is modified during the process. Before returning
238 * pagevec_reinit() is called on it.
239 */
240static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
241{
242 int i;
243 int nr = pagevec_count(pvec);
244
245 /* Phase 1: page isolation */
246 spin_lock_irq(&zone->lru_lock);
247 for (i = 0; i < nr; i++) {
248 struct page *page = pvec->pages[i];
249
250 if (TestClearPageMlocked(page)) {
251 struct lruvec *lruvec;
252 int lru;
253
254 /* we have disabled interrupts */
255 __mod_zone_page_state(zone, NR_MLOCK, -1);
256
257 if (PageLRU(page)) {
258 lruvec = mem_cgroup_page_lruvec(page, zone);
259 lru = page_lru(page);
260
261 get_page(page);
262 ClearPageLRU(page);
263 del_page_from_lru_list(page, lruvec, lru);
264 } else {
265 __munlock_isolation_failed(page);
266 goto skip_munlock;
267 }
268
269 } else {
270skip_munlock:
271 /*
272 * We won't be munlocking this page in the next phase
273 * but we still need to release the follow_page_mask()
274 * pin.
275 */
276 pvec->pages[i] = NULL;
277 put_page(page);
278 }
279 }
280 spin_unlock_irq(&zone->lru_lock);
281
282 /* Phase 2: page munlock and putback */
283 for (i = 0; i < nr; i++) {
284 struct page *page = pvec->pages[i];
285
286 if (page) {
287 lock_page(page);
288 __munlock_isolated_page(page);
289 unlock_page(page);
290 put_page(page); /* pin from follow_page_mask() */
291 }
292 }
293 pagevec_reinit(pvec);
294}
295
296/*
213 * munlock_vma_pages_range() - munlock all pages in the vma range.' 297 * munlock_vma_pages_range() - munlock all pages in the vma range.'
214 * @vma - vma containing range to be munlock()ed. 298 * @vma - vma containing range to be munlock()ed.
215 * @start - start address in @vma of the range 299 * @start - start address in @vma of the range
@@ -230,11 +314,16 @@ static int __mlock_posix_error_return(long retval)
230void munlock_vma_pages_range(struct vm_area_struct *vma, 314void munlock_vma_pages_range(struct vm_area_struct *vma,
231 unsigned long start, unsigned long end) 315 unsigned long start, unsigned long end)
232{ 316{
317 struct pagevec pvec;
318 struct zone *zone = NULL;
319
320 pagevec_init(&pvec, 0);
233 vma->vm_flags &= ~VM_LOCKED; 321 vma->vm_flags &= ~VM_LOCKED;
234 322
235 while (start < end) { 323 while (start < end) {
236 struct page *page; 324 struct page *page;
237 unsigned int page_mask, page_increm; 325 unsigned int page_mask, page_increm;
326 struct zone *pagezone;
238 327
239 /* 328 /*
240 * Although FOLL_DUMP is intended for get_dump_page(), 329 * Although FOLL_DUMP is intended for get_dump_page(),
@@ -246,20 +335,47 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
246 page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP, 335 page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
247 &page_mask); 336 &page_mask);
248 if (page && !IS_ERR(page)) { 337 if (page && !IS_ERR(page)) {
249 lock_page(page); 338 pagezone = page_zone(page);
250 /* 339 /* The whole pagevec must be in the same zone */
251 * Any THP page found by follow_page_mask() may have 340 if (pagezone != zone) {
252 * gotten split before reaching munlock_vma_page(), 341 if (pagevec_count(&pvec))
253 * so we need to recompute the page_mask here. 342 __munlock_pagevec(&pvec, zone);
254 */ 343 zone = pagezone;
255 page_mask = munlock_vma_page(page); 344 }
256 unlock_page(page); 345 if (PageTransHuge(page)) {
257 put_page(page); 346 /*
347 * THP pages are not handled by pagevec due
348 * to their possible split (see below).
349 */
350 if (pagevec_count(&pvec))
351 __munlock_pagevec(&pvec, zone);
352 lock_page(page);
353 /*
354 * Any THP page found by follow_page_mask() may
355 * have gotten split before reaching
356 * munlock_vma_page(), so we need to recompute
357 * the page_mask here.
358 */
359 page_mask = munlock_vma_page(page);
360 unlock_page(page);
361 put_page(page); /* follow_page_mask() */
362 } else {
363 /*
364 * Non-huge pages are handled in batches
365 * via pagevec. The pin from
366 * follow_page_mask() prevents them from
367 * collapsing by THP.
368 */
369 if (pagevec_add(&pvec, page) == 0)
370 __munlock_pagevec(&pvec, zone);
371 }
258 } 372 }
259 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); 373 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
260 start += page_increm * PAGE_SIZE; 374 start += page_increm * PAGE_SIZE;
261 cond_resched(); 375 cond_resched();
262 } 376 }
377 if (pagevec_count(&pvec))
378 __munlock_pagevec(&pvec, zone);
263} 379}
264 380
265/* 381/*