diff options
author | Haggai Eran <haggaie@mellanox.com> | 2012-10-08 19:33:35 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-09 03:22:58 -0400 |
commit | 6bdb913f0a70a4dfb7f066fb15e2d6f960701d00 (patch) | |
tree | 9a61960b27bf801794104b8bb8fccee1813f1b4b /mm | |
parent | 2ec74c3ef2d8c58d71e0e00336fb6b891192155a (diff) |
mm: wrap calls to set_pte_at_notify with invalidate_range_start and invalidate_range_end
In order to allow sleeping during invalidate_page mmu notifier calls, we
need to avoid calling when holding the PT lock. In addition to its direct
calls, invalidate_page can also be called as a substitute for a change_pte
call, in case the notifier client hasn't implemented change_pte.
This patch drops the invalidate_page call from change_pte, and instead
wraps all calls to change_pte with invalidate_range_start and
invalidate_range_end calls.
Note that change_pte still cannot sleep after this patch, and that clients
implementing change_pte should not take action on it in case the number of
outstanding invalidate_range_start calls is larger than one, otherwise
they might miss a later invalidation.
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Cc: Andrea Arcangeli <andrea@qumranet.com>
Cc: Sagi Grimberg <sagig@mellanox.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Or Gerlitz <ogerlitz@mellanox.com>
Cc: Haggai Eran <haggaie@mellanox.com>
Cc: Shachar Raindel <raindel@mellanox.com>
Cc: Liran Liss <liranl@mellanox.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/ksm.c | 21 | ||||
-rw-r--r-- | mm/memory.c | 18 | ||||
-rw-r--r-- | mm/mmu_notifier.c | 6 |
3 files changed, 31 insertions, 14 deletions
@@ -709,15 +709,22 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, | |||
709 | spinlock_t *ptl; | 709 | spinlock_t *ptl; |
710 | int swapped; | 710 | int swapped; |
711 | int err = -EFAULT; | 711 | int err = -EFAULT; |
712 | unsigned long mmun_start; /* For mmu_notifiers */ | ||
713 | unsigned long mmun_end; /* For mmu_notifiers */ | ||
712 | 714 | ||
713 | addr = page_address_in_vma(page, vma); | 715 | addr = page_address_in_vma(page, vma); |
714 | if (addr == -EFAULT) | 716 | if (addr == -EFAULT) |
715 | goto out; | 717 | goto out; |
716 | 718 | ||
717 | BUG_ON(PageTransCompound(page)); | 719 | BUG_ON(PageTransCompound(page)); |
720 | |||
721 | mmun_start = addr; | ||
722 | mmun_end = addr + PAGE_SIZE; | ||
723 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | ||
724 | |||
718 | ptep = page_check_address(page, mm, addr, &ptl, 0); | 725 | ptep = page_check_address(page, mm, addr, &ptl, 0); |
719 | if (!ptep) | 726 | if (!ptep) |
720 | goto out; | 727 | goto out_mn; |
721 | 728 | ||
722 | if (pte_write(*ptep) || pte_dirty(*ptep)) { | 729 | if (pte_write(*ptep) || pte_dirty(*ptep)) { |
723 | pte_t entry; | 730 | pte_t entry; |
@@ -752,6 +759,8 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, | |||
752 | 759 | ||
753 | out_unlock: | 760 | out_unlock: |
754 | pte_unmap_unlock(ptep, ptl); | 761 | pte_unmap_unlock(ptep, ptl); |
762 | out_mn: | ||
763 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
755 | out: | 764 | out: |
756 | return err; | 765 | return err; |
757 | } | 766 | } |
@@ -776,6 +785,8 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, | |||
776 | spinlock_t *ptl; | 785 | spinlock_t *ptl; |
777 | unsigned long addr; | 786 | unsigned long addr; |
778 | int err = -EFAULT; | 787 | int err = -EFAULT; |
788 | unsigned long mmun_start; /* For mmu_notifiers */ | ||
789 | unsigned long mmun_end; /* For mmu_notifiers */ | ||
779 | 790 | ||
780 | addr = page_address_in_vma(page, vma); | 791 | addr = page_address_in_vma(page, vma); |
781 | if (addr == -EFAULT) | 792 | if (addr == -EFAULT) |
@@ -794,10 +805,14 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, | |||
794 | if (!pmd_present(*pmd)) | 805 | if (!pmd_present(*pmd)) |
795 | goto out; | 806 | goto out; |
796 | 807 | ||
808 | mmun_start = addr; | ||
809 | mmun_end = addr + PAGE_SIZE; | ||
810 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | ||
811 | |||
797 | ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); | 812 | ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); |
798 | if (!pte_same(*ptep, orig_pte)) { | 813 | if (!pte_same(*ptep, orig_pte)) { |
799 | pte_unmap_unlock(ptep, ptl); | 814 | pte_unmap_unlock(ptep, ptl); |
800 | goto out; | 815 | goto out_mn; |
801 | } | 816 | } |
802 | 817 | ||
803 | get_page(kpage); | 818 | get_page(kpage); |
@@ -814,6 +829,8 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, | |||
814 | 829 | ||
815 | pte_unmap_unlock(ptep, ptl); | 830 | pte_unmap_unlock(ptep, ptl); |
816 | err = 0; | 831 | err = 0; |
832 | out_mn: | ||
833 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
817 | out: | 834 | out: |
818 | return err; | 835 | return err; |
819 | } | 836 | } |
diff --git a/mm/memory.c b/mm/memory.c index b03a4a21c1d0..01ec048ece8b 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2527,6 +2527,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2527 | int ret = 0; | 2527 | int ret = 0; |
2528 | int page_mkwrite = 0; | 2528 | int page_mkwrite = 0; |
2529 | struct page *dirty_page = NULL; | 2529 | struct page *dirty_page = NULL; |
2530 | unsigned long mmun_start; /* For mmu_notifiers */ | ||
2531 | unsigned long mmun_end; /* For mmu_notifiers */ | ||
2532 | bool mmun_called = false; /* For mmu_notifiers */ | ||
2530 | 2533 | ||
2531 | old_page = vm_normal_page(vma, address, orig_pte); | 2534 | old_page = vm_normal_page(vma, address, orig_pte); |
2532 | if (!old_page) { | 2535 | if (!old_page) { |
@@ -2704,6 +2707,11 @@ gotten: | |||
2704 | if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) | 2707 | if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) |
2705 | goto oom_free_new; | 2708 | goto oom_free_new; |
2706 | 2709 | ||
2710 | mmun_start = address & PAGE_MASK; | ||
2711 | mmun_end = (address & PAGE_MASK) + PAGE_SIZE; | ||
2712 | mmun_called = true; | ||
2713 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | ||
2714 | |||
2707 | /* | 2715 | /* |
2708 | * Re-check the pte - we dropped the lock | 2716 | * Re-check the pte - we dropped the lock |
2709 | */ | 2717 | */ |
@@ -2766,14 +2774,12 @@ gotten: | |||
2766 | } else | 2774 | } else |
2767 | mem_cgroup_uncharge_page(new_page); | 2775 | mem_cgroup_uncharge_page(new_page); |
2768 | 2776 | ||
2777 | if (new_page) | ||
2778 | page_cache_release(new_page); | ||
2769 | unlock: | 2779 | unlock: |
2770 | pte_unmap_unlock(page_table, ptl); | 2780 | pte_unmap_unlock(page_table, ptl); |
2771 | if (new_page) { | 2781 | if (mmun_called) |
2772 | if (new_page == old_page) | 2782 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
2773 | /* cow happened, notify before releasing old_page */ | ||
2774 | mmu_notifier_invalidate_page(mm, address); | ||
2775 | page_cache_release(new_page); | ||
2776 | } | ||
2777 | if (old_page) { | 2783 | if (old_page) { |
2778 | /* | 2784 | /* |
2779 | * Don't let another task, with possibly unlocked vma, | 2785 | * Don't let another task, with possibly unlocked vma, |
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index c297142f0fe6..479a1e751a73 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c | |||
@@ -137,12 +137,6 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, | |||
137 | hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { | 137 | hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { |
138 | if (mn->ops->change_pte) | 138 | if (mn->ops->change_pte) |
139 | mn->ops->change_pte(mn, mm, address, pte); | 139 | mn->ops->change_pte(mn, mm, address, pte); |
140 | /* | ||
141 | * Some drivers don't have change_pte, | ||
142 | * so we must call invalidate_page in that case. | ||
143 | */ | ||
144 | else if (mn->ops->invalidate_page) | ||
145 | mn->ops->invalidate_page(mn, mm, address); | ||
146 | } | 140 | } |
147 | srcu_read_unlock(&srcu, id); | 141 | srcu_read_unlock(&srcu, id); |
148 | } | 142 | } |