aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorHaggai Eran <haggaie@mellanox.com>2012-10-08 19:33:35 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:22:58 -0400
commit6bdb913f0a70a4dfb7f066fb15e2d6f960701d00 (patch)
tree9a61960b27bf801794104b8bb8fccee1813f1b4b /mm
parent2ec74c3ef2d8c58d71e0e00336fb6b891192155a (diff)
mm: wrap calls to set_pte_at_notify with invalidate_range_start and invalidate_range_end
In order to allow sleeping during invalidate_page mmu notifier calls, we need to avoid calling when holding the PT lock. In addition to its direct calls, invalidate_page can also be called as a substitute for a change_pte call, in case the notifier client hasn't implemented change_pte. This patch drops the invalidate_page call from change_pte, and instead wraps all calls to change_pte with invalidate_range_start and invalidate_range_end calls. Note that change_pte still cannot sleep after this patch, and that clients implementing change_pte should not take action on it in case the number of outstanding invalidate_range_start calls is larger than one, otherwise they might miss a later invalidation. Signed-off-by: Haggai Eran <haggaie@mellanox.com> Cc: Andrea Arcangeli <andrea@qumranet.com> Cc: Sagi Grimberg <sagig@mellanox.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Cc: Or Gerlitz <ogerlitz@mellanox.com> Cc: Haggai Eran <haggaie@mellanox.com> Cc: Shachar Raindel <raindel@mellanox.com> Cc: Liran Liss <liranl@mellanox.com> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: Avi Kivity <avi@redhat.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/ksm.c21
-rw-r--r--mm/memory.c18
-rw-r--r--mm/mmu_notifier.c6
3 files changed, 31 insertions, 14 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index ecbc090cdaad..ae539f0b8aa1 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -709,15 +709,22 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
709 spinlock_t *ptl; 709 spinlock_t *ptl;
710 int swapped; 710 int swapped;
711 int err = -EFAULT; 711 int err = -EFAULT;
712 unsigned long mmun_start; /* For mmu_notifiers */
713 unsigned long mmun_end; /* For mmu_notifiers */
712 714
713 addr = page_address_in_vma(page, vma); 715 addr = page_address_in_vma(page, vma);
714 if (addr == -EFAULT) 716 if (addr == -EFAULT)
715 goto out; 717 goto out;
716 718
717 BUG_ON(PageTransCompound(page)); 719 BUG_ON(PageTransCompound(page));
720
721 mmun_start = addr;
722 mmun_end = addr + PAGE_SIZE;
723 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
724
718 ptep = page_check_address(page, mm, addr, &ptl, 0); 725 ptep = page_check_address(page, mm, addr, &ptl, 0);
719 if (!ptep) 726 if (!ptep)
720 goto out; 727 goto out_mn;
721 728
722 if (pte_write(*ptep) || pte_dirty(*ptep)) { 729 if (pte_write(*ptep) || pte_dirty(*ptep)) {
723 pte_t entry; 730 pte_t entry;
@@ -752,6 +759,8 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
752 759
753out_unlock: 760out_unlock:
754 pte_unmap_unlock(ptep, ptl); 761 pte_unmap_unlock(ptep, ptl);
762out_mn:
763 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
755out: 764out:
756 return err; 765 return err;
757} 766}
@@ -776,6 +785,8 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
776 spinlock_t *ptl; 785 spinlock_t *ptl;
777 unsigned long addr; 786 unsigned long addr;
778 int err = -EFAULT; 787 int err = -EFAULT;
788 unsigned long mmun_start; /* For mmu_notifiers */
789 unsigned long mmun_end; /* For mmu_notifiers */
779 790
780 addr = page_address_in_vma(page, vma); 791 addr = page_address_in_vma(page, vma);
781 if (addr == -EFAULT) 792 if (addr == -EFAULT)
@@ -794,10 +805,14 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
794 if (!pmd_present(*pmd)) 805 if (!pmd_present(*pmd))
795 goto out; 806 goto out;
796 807
808 mmun_start = addr;
809 mmun_end = addr + PAGE_SIZE;
810 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
811
797 ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); 812 ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
798 if (!pte_same(*ptep, orig_pte)) { 813 if (!pte_same(*ptep, orig_pte)) {
799 pte_unmap_unlock(ptep, ptl); 814 pte_unmap_unlock(ptep, ptl);
800 goto out; 815 goto out_mn;
801 } 816 }
802 817
803 get_page(kpage); 818 get_page(kpage);
@@ -814,6 +829,8 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
814 829
815 pte_unmap_unlock(ptep, ptl); 830 pte_unmap_unlock(ptep, ptl);
816 err = 0; 831 err = 0;
832out_mn:
833 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
817out: 834out:
818 return err; 835 return err;
819} 836}
diff --git a/mm/memory.c b/mm/memory.c
index b03a4a21c1d0..01ec048ece8b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2527,6 +2527,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
2527 int ret = 0; 2527 int ret = 0;
2528 int page_mkwrite = 0; 2528 int page_mkwrite = 0;
2529 struct page *dirty_page = NULL; 2529 struct page *dirty_page = NULL;
2530 unsigned long mmun_start; /* For mmu_notifiers */
2531 unsigned long mmun_end; /* For mmu_notifiers */
2532 bool mmun_called = false; /* For mmu_notifiers */
2530 2533
2531 old_page = vm_normal_page(vma, address, orig_pte); 2534 old_page = vm_normal_page(vma, address, orig_pte);
2532 if (!old_page) { 2535 if (!old_page) {
@@ -2704,6 +2707,11 @@ gotten:
2704 if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) 2707 if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
2705 goto oom_free_new; 2708 goto oom_free_new;
2706 2709
2710 mmun_start = address & PAGE_MASK;
2711 mmun_end = (address & PAGE_MASK) + PAGE_SIZE;
2712 mmun_called = true;
2713 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
2714
2707 /* 2715 /*
2708 * Re-check the pte - we dropped the lock 2716 * Re-check the pte - we dropped the lock
2709 */ 2717 */
@@ -2766,14 +2774,12 @@ gotten:
2766 } else 2774 } else
2767 mem_cgroup_uncharge_page(new_page); 2775 mem_cgroup_uncharge_page(new_page);
2768 2776
2777 if (new_page)
2778 page_cache_release(new_page);
2769unlock: 2779unlock:
2770 pte_unmap_unlock(page_table, ptl); 2780 pte_unmap_unlock(page_table, ptl);
2771 if (new_page) { 2781 if (mmun_called)
2772 if (new_page == old_page) 2782 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2773 /* cow happened, notify before releasing old_page */
2774 mmu_notifier_invalidate_page(mm, address);
2775 page_cache_release(new_page);
2776 }
2777 if (old_page) { 2783 if (old_page) {
2778 /* 2784 /*
2779 * Don't let another task, with possibly unlocked vma, 2785 * Don't let another task, with possibly unlocked vma,
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index c297142f0fe6..479a1e751a73 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -137,12 +137,6 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
137 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 137 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
138 if (mn->ops->change_pte) 138 if (mn->ops->change_pte)
139 mn->ops->change_pte(mn, mm, address, pte); 139 mn->ops->change_pte(mn, mm, address, pte);
140 /*
141 * Some drivers don't have change_pte,
142 * so we must call invalidate_page in that case.
143 */
144 else if (mn->ops->invalidate_page)
145 mn->ops->invalidate_page(mn, mm, address);
146 } 140 }
147 srcu_read_unlock(&srcu, id); 141 srcu_read_unlock(&srcu, id);
148} 142}