aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2013-09-11 17:22:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-11 18:58:00 -0400
commit56afe477df3cbbcd656682d0355ef7d9eb8bdd81 (patch)
tree8955c59c624d0790da97f44fad3ce5a45dbad76d /mm
parent1ebb7cc6a58321a4b22c4c9097b4651b0ab859d0 (diff)
mm: munlock: bypass per-cpu pvec for putback_lru_page
After introducing batching by pagevecs into munlock_vma_range(), we can further improve performance by bypassing the copying into per-cpu pagevec and the get_page/put_page pair associated with that. Instead we perform LRU putback directly from our pagevec. However, this is possible only for single-mapped pages that are evictable after munlock. Unevictable pages require rechecking after putting on the unevictable list, so for those we fallback to putback_lru_page(), hich handles that. After this patch, a 13% speedup was measured for munlocking a 56GB large memory area with THP disabled. [akpm@linux-foundation.org:clarify comment] Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Jörn Engel <joern@logfs.org> Acked-by: Mel Gorman <mgorman@suse.de> Cc: Michel Lespinasse <walken@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/mlock.c73
1 files changed, 69 insertions, 4 deletions
diff --git a/mm/mlock.c b/mm/mlock.c
index b1a7c8007c89..abdc612b042d 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -227,6 +227,52 @@ static int __mlock_posix_error_return(long retval)
227} 227}
228 228
229/* 229/*
230 * Prepare page for fast batched LRU putback via putback_lru_evictable_pagevec()
231 *
232 * The fast path is available only for evictable pages with single mapping.
233 * Then we can bypass the per-cpu pvec and get better performance.
234 * when mapcount > 1 we need try_to_munlock() which can fail.
235 * when !page_evictable(), we need the full redo logic of putback_lru_page to
236 * avoid leaving evictable page in unevictable list.
237 *
238 * In case of success, @page is added to @pvec and @pgrescued is incremented
239 * in case that the page was previously unevictable. @page is also unlocked.
240 */
241static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec,
242 int *pgrescued)
243{
244 VM_BUG_ON(PageLRU(page));
245 VM_BUG_ON(!PageLocked(page));
246
247 if (page_mapcount(page) <= 1 && page_evictable(page)) {
248 pagevec_add(pvec, page);
249 if (TestClearPageUnevictable(page))
250 (*pgrescued)++;
251 unlock_page(page);
252 return true;
253 }
254
255 return false;
256}
257
258/*
259 * Putback multiple evictable pages to the LRU
260 *
261 * Batched putback of evictable pages that bypasses the per-cpu pvec. Some of
262 * the pages might have meanwhile become unevictable but that is OK.
263 */
264static void __putback_lru_fast(struct pagevec *pvec, int pgrescued)
265{
266 count_vm_events(UNEVICTABLE_PGMUNLOCKED, pagevec_count(pvec));
267 /*
268 *__pagevec_lru_add() calls release_pages() so we don't call
269 * put_page() explicitly
270 */
271 __pagevec_lru_add(pvec);
272 count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
273}
274
275/*
230 * Munlock a batch of pages from the same zone 276 * Munlock a batch of pages from the same zone
231 * 277 *
232 * The work is split to two main phases. First phase clears the Mlocked flag 278 * The work is split to two main phases. First phase clears the Mlocked flag
@@ -242,6 +288,8 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
242 int i; 288 int i;
243 int nr = pagevec_count(pvec); 289 int nr = pagevec_count(pvec);
244 int delta_munlocked = -nr; 290 int delta_munlocked = -nr;
291 struct pagevec pvec_putback;
292 int pgrescued = 0;
245 293
246 /* Phase 1: page isolation */ 294 /* Phase 1: page isolation */
247 spin_lock_irq(&zone->lru_lock); 295 spin_lock_irq(&zone->lru_lock);
@@ -279,17 +327,34 @@ skip_munlock:
279 __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); 327 __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
280 spin_unlock_irq(&zone->lru_lock); 328 spin_unlock_irq(&zone->lru_lock);
281 329
282 /* Phase 2: page munlock and putback */ 330 /* Phase 2: page munlock */
331 pagevec_init(&pvec_putback, 0);
283 for (i = 0; i < nr; i++) { 332 for (i = 0; i < nr; i++) {
284 struct page *page = pvec->pages[i]; 333 struct page *page = pvec->pages[i];
285 334
286 if (page) { 335 if (page) {
287 lock_page(page); 336 lock_page(page);
288 __munlock_isolated_page(page); 337 if (!__putback_lru_fast_prepare(page, &pvec_putback,
289 unlock_page(page); 338 &pgrescued)) {
290 put_page(page); /* pin from follow_page_mask() */ 339 /* Slow path */
340 __munlock_isolated_page(page);
341 unlock_page(page);
342 }
291 } 343 }
292 } 344 }
345
346 /* Phase 3: page putback for pages that qualified for the fast path */
347 if (pagevec_count(&pvec_putback))
348 __putback_lru_fast(&pvec_putback, pgrescued);
349
350 /* Phase 4: put_page to return pin from follow_page_mask() */
351 for (i = 0; i < nr; i++) {
352 struct page *page = pvec->pages[i];
353
354 if (page)
355 put_page(page);
356 }
357
293 pagevec_reinit(pvec); 358 pagevec_reinit(pvec);
294} 359}
295 360