diff options
author | Hugh Dickins <hughd@google.com> | 2012-01-20 17:34:21 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-23 11:38:48 -0500 |
commit | 245132643e1cfcd145bbc86a716c1818371fcb93 (patch) | |
tree | e5bf3cb56efedb059b1a68fd8efd37482131783b /mm | |
parent | 85046579bde15e532983438f86b36856e358f417 (diff) |
SHM_UNLOCK: fix Unevictable pages stranded after swap
Commit cc39c6a9bbde ("mm: account skipped entries to avoid looping in
find_get_pages") correctly fixed an infinite loop; but left a problem
that find_get_pages() on shmem would return 0 (appearing to callers to
mean end of tree) when it meets a run of nr_pages swap entries.
The only uses of find_get_pages() on shmem are via pagevec_lookup(),
called from invalidate_mapping_pages(), and from shmctl SHM_UNLOCK's
scan_mapping_unevictable_pages(). The first is already commented, and
not worth worrying about; but the second can leave pages on the
Unevictable list after an unusual sequence of swapping and locking.
Fix that by using shmem_find_get_pages_and_swap() (then ignoring the
swap) instead of pagevec_lookup().
But I don't want to contaminate vmscan.c with shmem internals, nor
shmem.c with LRU locking. So move scan_mapping_unevictable_pages() into
shmem.c, renaming it shmem_unlock_mapping(); and rename
check_move_unevictable_page() to check_move_unevictable_pages(), looping
down an array of pages, oftentimes under the same lock.
Leave out the "rotate unevictable list" block: that's a leftover from
when this was used for /proc/sys/vm/scan_unevictable_pages, whose flawed
handling involved looking at pages at tail of LRU.
Was there significance to the sequence first ClearPageUnevictable, then
test page_evictable, then SetPageUnevictable here? I think not, we're
under LRU lock, and have no barriers between those.
Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michel Lespinasse <walken@google.com>
Cc: <stable@vger.kernel.org> [back to 3.1 but will need respins]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/shmem.c | 46 | ||||
-rw-r--r-- | mm/vmscan.c | 128 |
2 files changed, 80 insertions, 94 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 4aaa53abe302..269d049294ab 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -379,7 +379,7 @@ static int shmem_free_swap(struct address_space *mapping, | |||
379 | /* | 379 | /* |
380 | * Pagevec may contain swap entries, so shuffle up pages before releasing. | 380 | * Pagevec may contain swap entries, so shuffle up pages before releasing. |
381 | */ | 381 | */ |
382 | static void shmem_pagevec_release(struct pagevec *pvec) | 382 | static void shmem_deswap_pagevec(struct pagevec *pvec) |
383 | { | 383 | { |
384 | int i, j; | 384 | int i, j; |
385 | 385 | ||
@@ -389,7 +389,36 @@ static void shmem_pagevec_release(struct pagevec *pvec) | |||
389 | pvec->pages[j++] = page; | 389 | pvec->pages[j++] = page; |
390 | } | 390 | } |
391 | pvec->nr = j; | 391 | pvec->nr = j; |
392 | pagevec_release(pvec); | 392 | } |
393 | |||
394 | /* | ||
395 | * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists. | ||
396 | */ | ||
397 | void shmem_unlock_mapping(struct address_space *mapping) | ||
398 | { | ||
399 | struct pagevec pvec; | ||
400 | pgoff_t indices[PAGEVEC_SIZE]; | ||
401 | pgoff_t index = 0; | ||
402 | |||
403 | pagevec_init(&pvec, 0); | ||
404 | /* | ||
405 | * Minor point, but we might as well stop if someone else SHM_LOCKs it. | ||
406 | */ | ||
407 | while (!mapping_unevictable(mapping)) { | ||
408 | /* | ||
409 | * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it | ||
410 | * has finished, if it hits a row of PAGEVEC_SIZE swap entries. | ||
411 | */ | ||
412 | pvec.nr = shmem_find_get_pages_and_swap(mapping, index, | ||
413 | PAGEVEC_SIZE, pvec.pages, indices); | ||
414 | if (!pvec.nr) | ||
415 | break; | ||
416 | index = indices[pvec.nr - 1] + 1; | ||
417 | shmem_deswap_pagevec(&pvec); | ||
418 | check_move_unevictable_pages(pvec.pages, pvec.nr); | ||
419 | pagevec_release(&pvec); | ||
420 | cond_resched(); | ||
421 | } | ||
393 | } | 422 | } |
394 | 423 | ||
395 | /* | 424 | /* |
@@ -440,7 +469,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
440 | } | 469 | } |
441 | unlock_page(page); | 470 | unlock_page(page); |
442 | } | 471 | } |
443 | shmem_pagevec_release(&pvec); | 472 | shmem_deswap_pagevec(&pvec); |
473 | pagevec_release(&pvec); | ||
444 | mem_cgroup_uncharge_end(); | 474 | mem_cgroup_uncharge_end(); |
445 | cond_resched(); | 475 | cond_resched(); |
446 | index++; | 476 | index++; |
@@ -470,7 +500,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
470 | continue; | 500 | continue; |
471 | } | 501 | } |
472 | if (index == start && indices[0] > end) { | 502 | if (index == start && indices[0] > end) { |
473 | shmem_pagevec_release(&pvec); | 503 | shmem_deswap_pagevec(&pvec); |
504 | pagevec_release(&pvec); | ||
474 | break; | 505 | break; |
475 | } | 506 | } |
476 | mem_cgroup_uncharge_start(); | 507 | mem_cgroup_uncharge_start(); |
@@ -494,7 +525,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
494 | } | 525 | } |
495 | unlock_page(page); | 526 | unlock_page(page); |
496 | } | 527 | } |
497 | shmem_pagevec_release(&pvec); | 528 | shmem_deswap_pagevec(&pvec); |
529 | pagevec_release(&pvec); | ||
498 | mem_cgroup_uncharge_end(); | 530 | mem_cgroup_uncharge_end(); |
499 | index++; | 531 | index++; |
500 | } | 532 | } |
@@ -2438,6 +2470,10 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) | |||
2438 | return 0; | 2470 | return 0; |
2439 | } | 2471 | } |
2440 | 2472 | ||
2473 | void shmem_unlock_mapping(struct address_space *mapping) | ||
2474 | { | ||
2475 | } | ||
2476 | |||
2441 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) | 2477 | void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) |
2442 | { | 2478 | { |
2443 | truncate_inode_pages_range(inode->i_mapping, lstart, lend); | 2479 | truncate_inode_pages_range(inode->i_mapping, lstart, lend); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index e097c1026b58..c52b23552659 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include <linux/buffer_head.h> /* for try_to_release_page(), | 26 | #include <linux/buffer_head.h> /* for try_to_release_page(), |
27 | buffer_heads_over_limit */ | 27 | buffer_heads_over_limit */ |
28 | #include <linux/mm_inline.h> | 28 | #include <linux/mm_inline.h> |
29 | #include <linux/pagevec.h> | ||
30 | #include <linux/backing-dev.h> | 29 | #include <linux/backing-dev.h> |
31 | #include <linux/rmap.h> | 30 | #include <linux/rmap.h> |
32 | #include <linux/topology.h> | 31 | #include <linux/topology.h> |
@@ -661,7 +660,7 @@ redo: | |||
661 | * When racing with an mlock or AS_UNEVICTABLE clearing | 660 | * When racing with an mlock or AS_UNEVICTABLE clearing |
662 | * (page is unlocked) make sure that if the other thread | 661 | * (page is unlocked) make sure that if the other thread |
663 | * does not observe our setting of PG_lru and fails | 662 | * does not observe our setting of PG_lru and fails |
664 | * isolation/check_move_unevictable_page, | 663 | * isolation/check_move_unevictable_pages, |
665 | * we see PG_mlocked/AS_UNEVICTABLE cleared below and move | 664 | * we see PG_mlocked/AS_UNEVICTABLE cleared below and move |
666 | * the page back to the evictable list. | 665 | * the page back to the evictable list. |
667 | * | 666 | * |
@@ -3501,107 +3500,58 @@ int page_evictable(struct page *page, struct vm_area_struct *vma) | |||
3501 | 3500 | ||
3502 | #ifdef CONFIG_SHMEM | 3501 | #ifdef CONFIG_SHMEM |
3503 | /** | 3502 | /** |
3504 | * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list | 3503 | * check_move_unevictable_pages - check pages for evictability and move to appropriate zone lru list |
3505 | * @page: page to check evictability and move to appropriate lru list | 3504 | * @pages: array of pages to check |
3506 | * @zone: zone page is in | 3505 | * @nr_pages: number of pages to check |
3507 | * | 3506 | * |
3508 | * Checks a page for evictability and moves the page to the appropriate | 3507 | * Checks pages for evictability and moves them to the appropriate lru list. |
3509 | * zone lru list. | ||
3510 | * | ||
3511 | * Restrictions: zone->lru_lock must be held, page must be on LRU and must | ||
3512 | * have PageUnevictable set. | ||
3513 | * | 3508 | * |
3514 | * This function is only used for SysV IPC SHM_UNLOCK. | 3509 | * This function is only used for SysV IPC SHM_UNLOCK. |
3515 | */ | 3510 | */ |
3516 | static void check_move_unevictable_page(struct page *page, struct zone *zone) | 3511 | void check_move_unevictable_pages(struct page **pages, int nr_pages) |
3517 | { | 3512 | { |
3518 | struct lruvec *lruvec; | 3513 | struct lruvec *lruvec; |
3514 | struct zone *zone = NULL; | ||
3515 | int pgscanned = 0; | ||
3516 | int pgrescued = 0; | ||
3517 | int i; | ||
3519 | 3518 | ||
3520 | VM_BUG_ON(PageActive(page)); | 3519 | for (i = 0; i < nr_pages; i++) { |
3521 | retry: | 3520 | struct page *page = pages[i]; |
3522 | ClearPageUnevictable(page); | 3521 | struct zone *pagezone; |
3523 | if (page_evictable(page, NULL)) { | ||
3524 | enum lru_list l = page_lru_base_type(page); | ||
3525 | |||
3526 | __dec_zone_state(zone, NR_UNEVICTABLE); | ||
3527 | lruvec = mem_cgroup_lru_move_lists(zone, page, | ||
3528 | LRU_UNEVICTABLE, l); | ||
3529 | list_move(&page->lru, &lruvec->lists[l]); | ||
3530 | __inc_zone_state(zone, NR_INACTIVE_ANON + l); | ||
3531 | __count_vm_event(UNEVICTABLE_PGRESCUED); | ||
3532 | } else { | ||
3533 | /* | ||
3534 | * rotate unevictable list | ||
3535 | */ | ||
3536 | SetPageUnevictable(page); | ||
3537 | lruvec = mem_cgroup_lru_move_lists(zone, page, LRU_UNEVICTABLE, | ||
3538 | LRU_UNEVICTABLE); | ||
3539 | list_move(&page->lru, &lruvec->lists[LRU_UNEVICTABLE]); | ||
3540 | if (page_evictable(page, NULL)) | ||
3541 | goto retry; | ||
3542 | } | ||
3543 | } | ||
3544 | |||
3545 | /** | ||
3546 | * scan_mapping_unevictable_pages - scan an address space for evictable pages | ||
3547 | * @mapping: struct address_space to scan for evictable pages | ||
3548 | * | ||
3549 | * Scan all pages in mapping. Check unevictable pages for | ||
3550 | * evictability and move them to the appropriate zone lru list. | ||
3551 | * | ||
3552 | * This function is only used for SysV IPC SHM_UNLOCK. | ||
3553 | */ | ||
3554 | void scan_mapping_unevictable_pages(struct address_space *mapping) | ||
3555 | { | ||
3556 | pgoff_t next = 0; | ||
3557 | pgoff_t end = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >> | ||
3558 | PAGE_CACHE_SHIFT; | ||
3559 | struct zone *zone; | ||
3560 | struct pagevec pvec; | ||
3561 | |||
3562 | if (mapping->nrpages == 0) | ||
3563 | return; | ||
3564 | |||
3565 | pagevec_init(&pvec, 0); | ||
3566 | while (next < end && | ||
3567 | pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { | ||
3568 | int i; | ||
3569 | int pg_scanned = 0; | ||
3570 | |||
3571 | zone = NULL; | ||
3572 | |||
3573 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
3574 | struct page *page = pvec.pages[i]; | ||
3575 | pgoff_t page_index = page->index; | ||
3576 | struct zone *pagezone = page_zone(page); | ||
3577 | 3522 | ||
3578 | pg_scanned++; | 3523 | pgscanned++; |
3579 | if (page_index > next) | 3524 | pagezone = page_zone(page); |
3580 | next = page_index; | 3525 | if (pagezone != zone) { |
3581 | next++; | 3526 | if (zone) |
3527 | spin_unlock_irq(&zone->lru_lock); | ||
3528 | zone = pagezone; | ||
3529 | spin_lock_irq(&zone->lru_lock); | ||
3530 | } | ||
3582 | 3531 | ||
3583 | if (pagezone != zone) { | 3532 | if (!PageLRU(page) || !PageUnevictable(page)) |
3584 | if (zone) | 3533 | continue; |
3585 | spin_unlock_irq(&zone->lru_lock); | ||
3586 | zone = pagezone; | ||
3587 | spin_lock_irq(&zone->lru_lock); | ||
3588 | } | ||
3589 | 3534 | ||
3590 | if (PageLRU(page) && PageUnevictable(page)) | 3535 | if (page_evictable(page, NULL)) { |
3591 | check_move_unevictable_page(page, zone); | 3536 | enum lru_list lru = page_lru_base_type(page); |
3537 | |||
3538 | VM_BUG_ON(PageActive(page)); | ||
3539 | ClearPageUnevictable(page); | ||
3540 | __dec_zone_state(zone, NR_UNEVICTABLE); | ||
3541 | lruvec = mem_cgroup_lru_move_lists(zone, page, | ||
3542 | LRU_UNEVICTABLE, lru); | ||
3543 | list_move(&page->lru, &lruvec->lists[lru]); | ||
3544 | __inc_zone_state(zone, NR_INACTIVE_ANON + lru); | ||
3545 | pgrescued++; | ||
3592 | } | 3546 | } |
3593 | if (zone) | 3547 | } |
3594 | spin_unlock_irq(&zone->lru_lock); | ||
3595 | pagevec_release(&pvec); | ||
3596 | 3548 | ||
3597 | count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned); | 3549 | if (zone) { |
3598 | cond_resched(); | 3550 | __count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued); |
3551 | __count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned); | ||
3552 | spin_unlock_irq(&zone->lru_lock); | ||
3599 | } | 3553 | } |
3600 | } | 3554 | } |
3601 | #else | ||
3602 | void scan_mapping_unevictable_pages(struct address_space *mapping) | ||
3603 | { | ||
3604 | } | ||
3605 | #endif /* CONFIG_SHMEM */ | 3555 | #endif /* CONFIG_SHMEM */ |
3606 | 3556 | ||
3607 | static void warn_scan_unevictable_pages(void) | 3557 | static void warn_scan_unevictable_pages(void) |