diff options
author | Hugh Dickins <hugh@veritas.com> | 2008-02-07 03:14:04 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-07 11:42:19 -0500 |
commit | 044d66c1d2b1c5aa50b4d6d68c21c6c93dd678da (patch) | |
tree | 35442e01a5ef7e2d45abc20d45122c3c4c809d5d /mm | |
parent | 3062fc67dad01b1d2a15d58c709eff946389eca4 (diff) |
memcgroup: reinstate swapoff mod
This patch reinstates the "swapoff: scan ptes preemptibly" mod we started
with: in due course it should be rendered down into the earlier patches,
leaving us with a more straightforward mem_cgroup_charge mod to unuse_pte,
allocating with GFP_KERNEL while holding no spinlock and no atomic kmap.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/swapfile.c | 42 |
1 files changed, 34 insertions, 8 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c index 35e00c3d0286..02ccab5ad9d9 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -507,11 +507,23 @@ unsigned int count_swap_pages(int type, int free) | |||
507 | * just let do_wp_page work it out if a write is requested later - to | 507 | * just let do_wp_page work it out if a write is requested later - to |
508 | * force COW, vm_page_prot omits write permission from any private vma. | 508 | * force COW, vm_page_prot omits write permission from any private vma. |
509 | */ | 509 | */ |
510 | static int unuse_pte(struct vm_area_struct *vma, pte_t *pte, | 510 | static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, |
511 | unsigned long addr, swp_entry_t entry, struct page *page) | 511 | unsigned long addr, swp_entry_t entry, struct page *page) |
512 | { | 512 | { |
513 | spinlock_t *ptl; | ||
514 | pte_t *pte; | ||
515 | int ret = 1; | ||
516 | |||
513 | if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) | 517 | if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) |
514 | return -ENOMEM; | 518 | ret = -ENOMEM; |
519 | |||
520 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | ||
521 | if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { | ||
522 | if (ret > 0) | ||
523 | mem_cgroup_uncharge_page(page); | ||
524 | ret = 0; | ||
525 | goto out; | ||
526 | } | ||
515 | 527 | ||
516 | inc_mm_counter(vma->vm_mm, anon_rss); | 528 | inc_mm_counter(vma->vm_mm, anon_rss); |
517 | get_page(page); | 529 | get_page(page); |
@@ -524,7 +536,9 @@ static int unuse_pte(struct vm_area_struct *vma, pte_t *pte, | |||
524 | * immediately swapped out again after swapon. | 536 | * immediately swapped out again after swapon. |
525 | */ | 537 | */ |
526 | activate_page(page); | 538 | activate_page(page); |
527 | return 1; | 539 | out: |
540 | pte_unmap_unlock(pte, ptl); | ||
541 | return ret; | ||
528 | } | 542 | } |
529 | 543 | ||
530 | static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 544 | static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
@@ -533,21 +547,33 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
533 | { | 547 | { |
534 | pte_t swp_pte = swp_entry_to_pte(entry); | 548 | pte_t swp_pte = swp_entry_to_pte(entry); |
535 | pte_t *pte; | 549 | pte_t *pte; |
536 | spinlock_t *ptl; | ||
537 | int ret = 0; | 550 | int ret = 0; |
538 | 551 | ||
539 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 552 | /* |
553 | * We don't actually need pte lock while scanning for swp_pte: since | ||
554 | * we hold page lock and mmap_sem, swp_pte cannot be inserted into the | ||
555 | * page table while we're scanning; though it could get zapped, and on | ||
556 | * some architectures (e.g. x86_32 with PAE) we might catch a glimpse | ||
557 | * of unmatched parts which look like swp_pte, so unuse_pte must | ||
558 | * recheck under pte lock. Scanning without pte lock lets it be | ||
559 | * preemptible whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE. | ||
560 | */ | ||
561 | pte = pte_offset_map(pmd, addr); | ||
540 | do { | 562 | do { |
541 | /* | 563 | /* |
542 | * swapoff spends a _lot_ of time in this loop! | 564 | * swapoff spends a _lot_ of time in this loop! |
543 | * Test inline before going to call unuse_pte. | 565 | * Test inline before going to call unuse_pte. |
544 | */ | 566 | */ |
545 | if (unlikely(pte_same(*pte, swp_pte))) { | 567 | if (unlikely(pte_same(*pte, swp_pte))) { |
546 | ret = unuse_pte(vma, pte++, addr, entry, page); | 568 | pte_unmap(pte); |
547 | break; | 569 | ret = unuse_pte(vma, pmd, addr, entry, page); |
570 | if (ret) | ||
571 | goto out; | ||
572 | pte = pte_offset_map(pmd, addr); | ||
548 | } | 573 | } |
549 | } while (pte++, addr += PAGE_SIZE, addr != end); | 574 | } while (pte++, addr += PAGE_SIZE, addr != end); |
550 | pte_unmap_unlock(pte - 1, ptl); | 575 | pte_unmap(pte - 1); |
576 | out: | ||
551 | return ret; | 577 | return ret; |
552 | } | 578 | } |
553 | 579 | ||