diff options
author | Vlastimil Babka <vbabka@suse.cz> | 2014-01-02 15:58:44 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-02 17:40:30 -0500 |
commit | 3b25df93c6e37e323b86a2a8c1e00c0a2821c6c9 (patch) | |
tree | 69737469efd0313ffa2b3fcbb93daf04c72a5dc6 /mm/mlock.c | |
parent | c424be1cbbf852e46acc84d73162af3066cd2c86 (diff) |
mm: munlock: fix deadlock in __munlock_pagevec()
Commit 7225522bb429 ("mm: munlock: batch non-THP page isolation and
munlock+putback using pagevec" introduced __munlock_pagevec() to speed
up munlock by holding lru_lock over multiple isolated pages. Pages that
fail to be isolated are put_page()d immediately, also within the lock.
This can lead to deadlock when __munlock_pagevec() becomes the holder of
the last page pin and put_page() leads to __page_cache_release() which
also locks lru_lock. The deadlock has been observed by Sasha Levin
using trinity.
This patch avoids the deadlock by deferring put_page() operations until
lru_lock is released. Another pagevec (which is also used by later
phases of the function is reused to gather the pages for put_page()
operation.
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mlock.c')
-rw-r--r-- | mm/mlock.c | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/mm/mlock.c b/mm/mlock.c index c59c420fd6e1..192e6eebe4f2 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -298,10 +298,12 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) | |||
298 | { | 298 | { |
299 | int i; | 299 | int i; |
300 | int nr = pagevec_count(pvec); | 300 | int nr = pagevec_count(pvec); |
301 | int delta_munlocked = -nr; | 301 | int delta_munlocked; |
302 | struct pagevec pvec_putback; | 302 | struct pagevec pvec_putback; |
303 | int pgrescued = 0; | 303 | int pgrescued = 0; |
304 | 304 | ||
305 | pagevec_init(&pvec_putback, 0); | ||
306 | |||
305 | /* Phase 1: page isolation */ | 307 | /* Phase 1: page isolation */ |
306 | spin_lock_irq(&zone->lru_lock); | 308 | spin_lock_irq(&zone->lru_lock); |
307 | for (i = 0; i < nr; i++) { | 309 | for (i = 0; i < nr; i++) { |
@@ -330,18 +332,21 @@ skip_munlock: | |||
330 | /* | 332 | /* |
331 | * We won't be munlocking this page in the next phase | 333 | * We won't be munlocking this page in the next phase |
332 | * but we still need to release the follow_page_mask() | 334 | * but we still need to release the follow_page_mask() |
333 | * pin. | 335 | * pin. We cannot do it under lru_lock however. If it's |
336 | * the last pin, __page_cache_release would deadlock. | ||
334 | */ | 337 | */ |
338 | pagevec_add(&pvec_putback, pvec->pages[i]); | ||
335 | pvec->pages[i] = NULL; | 339 | pvec->pages[i] = NULL; |
336 | put_page(page); | ||
337 | delta_munlocked++; | ||
338 | } | 340 | } |
339 | } | 341 | } |
342 | delta_munlocked = -nr + pagevec_count(&pvec_putback); | ||
340 | __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); | 343 | __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); |
341 | spin_unlock_irq(&zone->lru_lock); | 344 | spin_unlock_irq(&zone->lru_lock); |
342 | 345 | ||
346 | /* Now we can release pins of pages that we are not munlocking */ | ||
347 | pagevec_release(&pvec_putback); | ||
348 | |||
343 | /* Phase 2: page munlock */ | 349 | /* Phase 2: page munlock */ |
344 | pagevec_init(&pvec_putback, 0); | ||
345 | for (i = 0; i < nr; i++) { | 350 | for (i = 0; i < nr; i++) { |
346 | struct page *page = pvec->pages[i]; | 351 | struct page *page = pvec->pages[i]; |
347 | 352 | ||