diff options
author | Hugh Dickins <hughd@google.com> | 2012-01-20 17:34:21 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-23 11:38:48 -0500 |
commit | 245132643e1cfcd145bbc86a716c1818371fcb93 (patch) | |
tree | e5bf3cb56efedb059b1a68fd8efd37482131783b /mm/shmem.c | |
parent | 85046579bde15e532983438f86b36856e358f417 (diff) |
SHM_UNLOCK: fix Unevictable pages stranded after swap
Commit cc39c6a9bbde ("mm: account skipped entries to avoid looping in
find_get_pages") correctly fixed an infinite loop; but left a problem
that find_get_pages() on shmem would return 0 (appearing to callers to
mean end of tree) when it meets a run of nr_pages swap entries.
The only uses of find_get_pages() on shmem are via pagevec_lookup(),
called from invalidate_mapping_pages(), and from shmctl SHM_UNLOCK's
scan_mapping_unevictable_pages(). The first is already commented, and
not worth worrying about; but the second can leave pages on the
Unevictable list after an unusual sequence of swapping and locking.
Fix that by using shmem_find_get_pages_and_swap() (then ignoring the
swap) instead of pagevec_lookup().
But I don't want to contaminate vmscan.c with shmem internals, nor
shmem.c with LRU locking. So move scan_mapping_unevictable_pages() into
shmem.c, renaming it shmem_unlock_mapping(); and rename
check_move_unevictable_page() to check_move_unevictable_pages(), looping
down an array of pages, oftentimes under the same lock.
Leave out the "rotate unevictable list" block: that's a leftover from
when this was used for /proc/sys/vm/scan_unevictable_pages, whose flawed
handling involved looking at pages at tail of LRU.
Was there significance to the sequence first ClearPageUnevictable, then
test page_evictable, then SetPageUnevictable here? I think not, we're
under LRU lock, and have no barriers between those.
Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michel Lespinasse <walken@google.com>
Cc: <stable@vger.kernel.org> [back to 3.1 but will need respins]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/shmem.c')
-rw-r--r-- | mm/shmem.c | 46 |
1 files changed, 41 insertions, 5 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 4aaa53abe302..269d049294ab 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -379,7 +379,7 @@ static int shmem_free_swap(struct address_space *mapping, | |||
379 | /* | 379 | /* |
380 | * Pagevec may contain swap entries, so shuffle up pages before releasing. | 380 | * Pagevec may contain swap entries, so shuffle up pages before releasing. |
381 | */ | 381 | */ |
382 | static void shmem_pagevec_release(struct pagevec *pvec) | 382 | static void shmem_deswap_pagevec(struct pagevec *pvec) |
383 | { | 383 | { |
384 | int i, j; | 384 | int i, j; |
385 | 385 | ||
@@ -389,7 +389,36 @@ static void shmem_pagevec_release(struct pagevec *pvec) | |||
389 | pvec->pages[j++] = page; | 389 | pvec->pages[j++] = page; |
390 | } | 390 | } |
391 | pvec->nr = j; | 391 | pvec->nr = j; |
392 | pagevec_release(pvec); | 392 | } |
393 | |||
394 | /* | ||
395 | * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists. | ||
396 | */ | ||
397 | void shmem_unlock_mapping(struct address_space *mapping) | ||
398 | { | ||
399 | struct pagevec pvec; | ||
400 | pgoff_t indices[PAGEVEC_SIZE]; | ||
401 | pgoff_t index = 0; | ||
402 | |||
403 | pagevec_init(&pvec, 0); | ||
404 | /* | ||
405 | * Minor point, but we might as well stop if someone else SHM_LOCKs it. | ||
406 | */ | ||
407 | while (!mapping_unevictable(mapping)) { | ||
408 | /* | ||
409 | * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it | ||
410 | * has finished, if it hits a row of PAGEVEC_SIZE swap entries. | ||
411 | */ | ||
412 | pvec.nr = shmem_find_get_pages_and_swap(mapping, index, | ||
413 | PAGEVEC_SIZE, pvec.pages, indices); | ||
414 | if (!pvec.nr) | ||
415 | break; | ||
416 | index = indices[pvec.nr - 1] + 1; | ||
417 | shmem_deswap_pagevec(&pvec); | ||
418 | check_move_unevictable_pages(pvec.pages, pvec.nr); | ||
419 | pagevec_release(&pvec); | ||
420 | cond_resched(); | ||
421 | } | ||
393 | } | 422 | } |
394 | 423 | ||
395 | /* | 424 | /* |
@@ -440,7 +469,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
440 | } | 469 | } |
441 | unlock_page(page); | 470 | unlock_page(page); |
442 | } | 471 | } |
443 | shmem_pagevec_release(&pvec); | 472 | shmem_deswap_pagevec(&pvec); |
473 | pagevec_release(&pvec); | ||
444 | mem_cgroup_uncharge_end(); | 474 | mem_cgroup_uncharge_end(); |
445 | cond_resched(); | 475 | cond_resched(); |
446 | index++; | 476 | index++; |
@@ -470,7 +500,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
470 | continue; | 500 | continue; |
471 | } | 501 | } |
472 | if (index == start && indices[0] > end) { | 502 | if (index == start && indices[0] > end) { |
473 | shmem_pagevec_release(&pvec); | 503 | shmem_deswap_pagevec(&pvec); |
504 | pagevec_release(&pvec); | ||
474 | break; | 505 | break; |
475 | } | 506 | } |
476 | mem_cgroup_uncharge_start(); | 507 | mem_cgroup_uncharge_start(); |
@@ -494,7 +525,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
494 | } | 525 | } |
495 | unlock_page(page); | 526 | unlock_page(page); |
496 | } | 527 | } |
497 | shmem_pagevec_release(&pvec); | 528 | shmem_deswap_pagevec(&pvec); |
529 | pagevec_release(&pvec); | ||
498 | mem_cgroup_uncharge_end(); | 530 | mem_cgroup_uncharge_end(); |
499 | index++; | 531 | index++; |
500 | } | 532 | } |
@@ -2438,6 +2470,10 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) | |||
2438 | return 0; | 2470 | return 0; |
2439 | } | 2471 | } |
2440 | 2472 | ||
2473 | void shmem_unlock_mapping(struct address_space *mapping) | ||
2474 | { | ||
2475 | } | ||
2476 | |||
2441 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | 2477 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) |
2442 | { | 2478 | { |
2443 | truncate_inode_pages_range(inode->i_mapping, lstart, lend); | 2479 | truncate_inode_pages_range(inode->i_mapping, lstart, lend); |