aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-08-31 20:30:01 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-08-31 20:30:01 -0400
commitea25c43179462e342d4a0e66c3f6a5f53514da05 (patch)
treeba4ab1aef8f166de65da4ad9e9162ff277f318a4
parentc227390c91a355300f47f9bef0aefbdfaaca1500 (diff)
parent5f32b265400de723ab0db23101a75ac073bdd980 (diff)
Merge branch 'mmu_notifier_fixes'
Merge mmu_notifier fixes from Jérôme Glisse: "The invalidate_page callback suffered from 2 pitfalls. First it used to happen after page table lock was release and thus a new page might have been setup for the virtual address before the call to invalidate_page(). This is in a weird way fixed by commit c7ab0d2fdc84 ("mm: convert try_to_unmap_one() to use page_vma_mapped_walk()") which moved the callback under the page table lock. Which also broke several existing user of the mmu_notifier API that assumed they could sleep inside this callback. The second pitfall was invalidate_page being the only callback not taking a range of address in respect to invalidation but was giving an address and a page. Lot of the callback implementer assumed this could never be THP and thus failed to invalidate the appropriate range for THP pages. By killing this callback we unify the mmu_notifier callback API to always take a virtual address range as input. There is now two clear API (I am not mentioning the youngess API which is seldomly used): - invalidate_range_start()/end() callback (which allow you to sleep) - invalidate_range() where you can not sleep but happen right after page table update under page table lock Note that a lot of existing user feels broken in respect to range_start/ range_end. Many user only have range_start() callback but there is nothing preventing them to undo what was invalidated in their range_start() callback after it returns but before any CPU page table update take place. The code pattern use in kvm or umem odp is an example on how to properly avoid such race. In a nutshell use some kind of sequence number and active range invalidation counter to block anything that might undo what the range_start() callback did. If you do not care about keeping fully in sync with CPU page table (ie you can live with CPU page table pointing to new different page for a given virtual address) then you can take a reference on the pages inside the range_start callback and drop it in range_end or when your driver is done with those pages. Last alternative is to use invalidate_range() if you can do invalidation without sleeping as invalidate_range() callback happens under the CPU page table spinlock right after the page table is updated. The first two patches convert existing mmu_notifier_invalidate_page() calls to mmu_notifier_invalidate_range() and bracket those call with call to mmu_notifier_invalidate_range_start()/end(). The next ten patches remove existing invalidate_page() callback as it can no longer happen. Finally the last page remove the invalidate_page() callback completely so it can RIP. Changes since v1: - remove more dead code in kvm (no testing impact) - more accurate end address computation (patch 2) in page_mkclean_one and try_to_unmap_one - added tested-by/reviewed-by gotten so far" * emailed patches from Jérôme Glisse <jglisse@redhat.com>: mm/mmu_notifier: kill invalidate_page KVM: update to new mmu_notifier semantic v2 xen/gntdev: update to new mmu_notifier semantic sgi-gru: update to new mmu_notifier semantic misc/mic/scif: update to new mmu_notifier semantic iommu/intel: update to new mmu_notifier semantic iommu/amd: update to new mmu_notifier semantic IB/hfi1: update to new mmu_notifier semantic IB/umem: update to new mmu_notifier semantic drm/amdgpu: update to new mmu_notifier semantic powerpc/powernv: update to new mmu_notifier semantic mm/rmap: update to new mmu_notifier semantic v2 dax: update to new mmu_notifier semantic
-rw-r--r--arch/arm/include/asm/kvm_host.h6
-rw-r--r--arch/arm64/include/asm/kvm_host.h6
-rw-r--r--arch/mips/include/asm/kvm_host.h5
-rw-r--r--arch/powerpc/include/asm/kvm_host.h5
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c10
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/kvm/x86.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c31
-rw-r--r--drivers/infiniband/core/umem_odp.c19
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c9
-rw-r--r--drivers/iommu/amd_iommu_v2.c8
-rw-r--r--drivers/iommu/intel-svm.c9
-rw-r--r--drivers/misc/mic/scif/scif_dma.c11
-rw-r--r--drivers/misc/sgi-gru/grutlbpurge.c12
-rw-r--r--drivers/xen/gntdev.c8
-rw-r--r--fs/dax.c19
-rw-r--r--include/linux/mm.h1
-rw-r--r--include/linux/mmu_notifier.h25
-rw-r--r--mm/memory.c26
-rw-r--r--mm/mmu_notifier.c14
-rw-r--r--mm/rmap.c35
-rw-r--r--virt/kvm/kvm_main.c42
22 files changed, 65 insertions, 249 deletions
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 127e2dd2e21c..4a879f6ff13b 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -225,12 +225,6 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
225int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); 225int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
226int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); 226int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
227 227
228/* We do not have shadow page tables, hence the empty hooks */
229static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
230 unsigned long address)
231{
232}
233
234struct kvm_vcpu *kvm_arm_get_running_vcpu(void); 228struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
235struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); 229struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
236void kvm_arm_halt_guest(struct kvm *kvm); 230void kvm_arm_halt_guest(struct kvm *kvm);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d68630007b14..e923b58606e2 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -326,12 +326,6 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
326int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); 326int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
327int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); 327int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
328 328
329/* We do not have shadow page tables, hence the empty hooks */
330static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
331 unsigned long address)
332{
333}
334
335struct kvm_vcpu *kvm_arm_get_running_vcpu(void); 329struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
336struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); 330struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
337void kvm_arm_halt_guest(struct kvm *kvm); 331void kvm_arm_halt_guest(struct kvm *kvm);
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 2998479fd4e8..a9af1d2dcd69 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -938,11 +938,6 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
938int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); 938int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
939int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); 939int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
940 940
941static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
942 unsigned long address)
943{
944}
945
946/* Emulation */ 941/* Emulation */
947int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out); 942int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out);
948enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause); 943enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 8b3f1238d07f..e372ed871c51 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -67,11 +67,6 @@ extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
67extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); 67extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
68extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); 68extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
69 69
70static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
71 unsigned long address)
72{
73}
74
75#define HPTEG_CACHE_NUM (1 << 15) 70#define HPTEG_CACHE_NUM (1 << 15)
76#define HPTEG_HASH_BITS_PTE 13 71#define HPTEG_HASH_BITS_PTE 13
77#define HPTEG_HASH_BITS_PTE_LONG 12 72#define HPTEG_HASH_BITS_PTE_LONG 12
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index b5d960d6db3d..4c7b8591f737 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -614,15 +614,6 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
614 mmio_invalidate(npu_context, 1, address, true); 614 mmio_invalidate(npu_context, 1, address, true);
615} 615}
616 616
617static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
618 struct mm_struct *mm,
619 unsigned long address)
620{
621 struct npu_context *npu_context = mn_to_npu_context(mn);
622
623 mmio_invalidate(npu_context, 1, address, true);
624}
625
626static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, 617static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
627 struct mm_struct *mm, 618 struct mm_struct *mm,
628 unsigned long start, unsigned long end) 619 unsigned long start, unsigned long end)
@@ -640,7 +631,6 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
640static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { 631static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
641 .release = pnv_npu2_mn_release, 632 .release = pnv_npu2_mn_release,
642 .change_pte = pnv_npu2_mn_change_pte, 633 .change_pte = pnv_npu2_mn_change_pte,
643 .invalidate_page = pnv_npu2_mn_invalidate_page,
644 .invalidate_range = pnv_npu2_mn_invalidate_range, 634 .invalidate_range = pnv_npu2_mn_invalidate_range,
645}; 635};
646 636
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f4d120a3e22e..92c9032502d8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1375,8 +1375,6 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
1375int kvm_cpu_get_interrupt(struct kvm_vcpu *v); 1375int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
1376void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); 1376void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
1377void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); 1377void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
1378void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
1379 unsigned long address);
1380 1378
1381void kvm_define_shared_msr(unsigned index, u32 msr); 1379void kvm_define_shared_msr(unsigned index, u32 msr);
1382int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); 1380int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 05a5e57c6f39..272320eb328c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6734,17 +6734,6 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
6734} 6734}
6735EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); 6735EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
6736 6736
6737void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
6738 unsigned long address)
6739{
6740 /*
6741 * The physical address of apic access page is stored in the VMCS.
6742 * Update it when it becomes invalid.
6743 */
6744 if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT))
6745 kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
6746}
6747
6748/* 6737/*
6749 * Returns 1 to let vcpu_run() continue the guest execution loop without 6738 * Returns 1 to let vcpu_run() continue the guest execution loop without
6750 * exiting to the userspace. Otherwise, the value will be returned to the 6739 * exiting to the userspace. Otherwise, the value will be returned to the
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 6558a3ed57a7..e1cde6b80027 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -147,36 +147,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
147} 147}
148 148
149/** 149/**
150 * amdgpu_mn_invalidate_page - callback to notify about mm change
151 *
152 * @mn: our notifier
153 * @mn: the mm this callback is about
154 * @address: address of invalidate page
155 *
156 * Invalidation of a single page. Blocks for all BOs mapping it
157 * and unmap them by move them into system domain again.
158 */
159static void amdgpu_mn_invalidate_page(struct mmu_notifier *mn,
160 struct mm_struct *mm,
161 unsigned long address)
162{
163 struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
164 struct interval_tree_node *it;
165
166 mutex_lock(&rmn->lock);
167
168 it = interval_tree_iter_first(&rmn->objects, address, address);
169 if (it) {
170 struct amdgpu_mn_node *node;
171
172 node = container_of(it, struct amdgpu_mn_node, it);
173 amdgpu_mn_invalidate_node(node, address, address);
174 }
175
176 mutex_unlock(&rmn->lock);
177}
178
179/**
180 * amdgpu_mn_invalidate_range_start - callback to notify about mm change 150 * amdgpu_mn_invalidate_range_start - callback to notify about mm change
181 * 151 *
182 * @mn: our notifier 152 * @mn: our notifier
@@ -215,7 +185,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
215 185
216static const struct mmu_notifier_ops amdgpu_mn_ops = { 186static const struct mmu_notifier_ops amdgpu_mn_ops = {
217 .release = amdgpu_mn_release, 187 .release = amdgpu_mn_release,
218 .invalidate_page = amdgpu_mn_invalidate_page,
219 .invalidate_range_start = amdgpu_mn_invalidate_range_start, 188 .invalidate_range_start = amdgpu_mn_invalidate_range_start,
220}; 189};
221 190
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 8c4ec564e495..55e8f5ed8b3c 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -166,24 +166,6 @@ static int invalidate_page_trampoline(struct ib_umem *item, u64 start,
166 return 0; 166 return 0;
167} 167}
168 168
169static void ib_umem_notifier_invalidate_page(struct mmu_notifier *mn,
170 struct mm_struct *mm,
171 unsigned long address)
172{
173 struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
174
175 if (!context->invalidate_range)
176 return;
177
178 ib_ucontext_notifier_start_account(context);
179 down_read(&context->umem_rwsem);
180 rbt_ib_umem_for_each_in_range(&context->umem_tree, address,
181 address + PAGE_SIZE,
182 invalidate_page_trampoline, NULL);
183 up_read(&context->umem_rwsem);
184 ib_ucontext_notifier_end_account(context);
185}
186
187static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start, 169static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start,
188 u64 end, void *cookie) 170 u64 end, void *cookie)
189{ 171{
@@ -237,7 +219,6 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
237 219
238static const struct mmu_notifier_ops ib_umem_notifiers = { 220static const struct mmu_notifier_ops ib_umem_notifiers = {
239 .release = ib_umem_notifier_release, 221 .release = ib_umem_notifier_release,
240 .invalidate_page = ib_umem_notifier_invalidate_page,
241 .invalidate_range_start = ib_umem_notifier_invalidate_range_start, 222 .invalidate_range_start = ib_umem_notifier_invalidate_range_start,
242 .invalidate_range_end = ib_umem_notifier_invalidate_range_end, 223 .invalidate_range_end = ib_umem_notifier_invalidate_range_end,
243}; 224};
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index ccbf52c8ff6f..e4b56a0dd6d0 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -67,8 +67,6 @@ struct mmu_rb_handler {
67 67
68static unsigned long mmu_node_start(struct mmu_rb_node *); 68static unsigned long mmu_node_start(struct mmu_rb_node *);
69static unsigned long mmu_node_last(struct mmu_rb_node *); 69static unsigned long mmu_node_last(struct mmu_rb_node *);
70static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *,
71 unsigned long);
72static inline void mmu_notifier_range_start(struct mmu_notifier *, 70static inline void mmu_notifier_range_start(struct mmu_notifier *,
73 struct mm_struct *, 71 struct mm_struct *,
74 unsigned long, unsigned long); 72 unsigned long, unsigned long);
@@ -82,7 +80,6 @@ static void do_remove(struct mmu_rb_handler *handler,
82static void handle_remove(struct work_struct *work); 80static void handle_remove(struct work_struct *work);
83 81
84static const struct mmu_notifier_ops mn_opts = { 82static const struct mmu_notifier_ops mn_opts = {
85 .invalidate_page = mmu_notifier_page,
86 .invalidate_range_start = mmu_notifier_range_start, 83 .invalidate_range_start = mmu_notifier_range_start,
87}; 84};
88 85
@@ -285,12 +282,6 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
285 handler->ops->remove(handler->ops_arg, node); 282 handler->ops->remove(handler->ops_arg, node);
286} 283}
287 284
288static inline void mmu_notifier_page(struct mmu_notifier *mn,
289 struct mm_struct *mm, unsigned long addr)
290{
291 mmu_notifier_mem_invalidate(mn, mm, addr, addr + PAGE_SIZE);
292}
293
294static inline void mmu_notifier_range_start(struct mmu_notifier *mn, 285static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
295 struct mm_struct *mm, 286 struct mm_struct *mm,
296 unsigned long start, 287 unsigned long start,
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 6629c472eafd..dccf5b76eff2 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -391,13 +391,6 @@ static int mn_clear_flush_young(struct mmu_notifier *mn,
391 return 0; 391 return 0;
392} 392}
393 393
394static void mn_invalidate_page(struct mmu_notifier *mn,
395 struct mm_struct *mm,
396 unsigned long address)
397{
398 __mn_flush_page(mn, address);
399}
400
401static void mn_invalidate_range(struct mmu_notifier *mn, 394static void mn_invalidate_range(struct mmu_notifier *mn,
402 struct mm_struct *mm, 395 struct mm_struct *mm,
403 unsigned long start, unsigned long end) 396 unsigned long start, unsigned long end)
@@ -436,7 +429,6 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
436static const struct mmu_notifier_ops iommu_mn = { 429static const struct mmu_notifier_ops iommu_mn = {
437 .release = mn_release, 430 .release = mn_release,
438 .clear_flush_young = mn_clear_flush_young, 431 .clear_flush_young = mn_clear_flush_young,
439 .invalidate_page = mn_invalidate_page,
440 .invalidate_range = mn_invalidate_range, 432 .invalidate_range = mn_invalidate_range,
441}; 433};
442 434
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index f167c0d84ebf..f620dccec8ee 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -223,14 +223,6 @@ static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
223 intel_flush_svm_range(svm, address, 1, 1, 0); 223 intel_flush_svm_range(svm, address, 1, 1, 0);
224} 224}
225 225
226static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
227 unsigned long address)
228{
229 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
230
231 intel_flush_svm_range(svm, address, 1, 1, 0);
232}
233
234/* Pages have been freed at this point */ 226/* Pages have been freed at this point */
235static void intel_invalidate_range(struct mmu_notifier *mn, 227static void intel_invalidate_range(struct mmu_notifier *mn,
236 struct mm_struct *mm, 228 struct mm_struct *mm,
@@ -285,7 +277,6 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
285static const struct mmu_notifier_ops intel_mmuops = { 277static const struct mmu_notifier_ops intel_mmuops = {
286 .release = intel_mm_release, 278 .release = intel_mm_release,
287 .change_pte = intel_change_pte, 279 .change_pte = intel_change_pte,
288 .invalidate_page = intel_invalidate_page,
289 .invalidate_range = intel_invalidate_range, 280 .invalidate_range = intel_invalidate_range,
290}; 281};
291 282
diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c
index 64d5760d069a..63d6246d6dff 100644
--- a/drivers/misc/mic/scif/scif_dma.c
+++ b/drivers/misc/mic/scif/scif_dma.c
@@ -200,16 +200,6 @@ static void scif_mmu_notifier_release(struct mmu_notifier *mn,
200 schedule_work(&scif_info.misc_work); 200 schedule_work(&scif_info.misc_work);
201} 201}
202 202
203static void scif_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
204 struct mm_struct *mm,
205 unsigned long address)
206{
207 struct scif_mmu_notif *mmn;
208
209 mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
210 scif_rma_destroy_tcw(mmn, address, PAGE_SIZE);
211}
212
213static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, 203static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
214 struct mm_struct *mm, 204 struct mm_struct *mm,
215 unsigned long start, 205 unsigned long start,
@@ -235,7 +225,6 @@ static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
235static const struct mmu_notifier_ops scif_mmu_notifier_ops = { 225static const struct mmu_notifier_ops scif_mmu_notifier_ops = {
236 .release = scif_mmu_notifier_release, 226 .release = scif_mmu_notifier_release,
237 .clear_flush_young = NULL, 227 .clear_flush_young = NULL,
238 .invalidate_page = scif_mmu_notifier_invalidate_page,
239 .invalidate_range_start = scif_mmu_notifier_invalidate_range_start, 228 .invalidate_range_start = scif_mmu_notifier_invalidate_range_start,
240 .invalidate_range_end = scif_mmu_notifier_invalidate_range_end}; 229 .invalidate_range_end = scif_mmu_notifier_invalidate_range_end};
241 230
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
index e936d43895d2..9918eda0e05f 100644
--- a/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -247,17 +247,6 @@ static void gru_invalidate_range_end(struct mmu_notifier *mn,
247 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end); 247 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
248} 248}
249 249
250static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
251 unsigned long address)
252{
253 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
254 ms_notifier);
255
256 STAT(mmu_invalidate_page);
257 gru_flush_tlb_range(gms, address, PAGE_SIZE);
258 gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address);
259}
260
261static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm) 250static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
262{ 251{
263 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, 252 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
@@ -269,7 +258,6 @@ static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
269 258
270 259
271static const struct mmu_notifier_ops gru_mmuops = { 260static const struct mmu_notifier_ops gru_mmuops = {
272 .invalidate_page = gru_invalidate_page,
273 .invalidate_range_start = gru_invalidate_range_start, 261 .invalidate_range_start = gru_invalidate_range_start,
274 .invalidate_range_end = gru_invalidate_range_end, 262 .invalidate_range_end = gru_invalidate_range_end,
275 .release = gru_release, 263 .release = gru_release,
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index f3bf8f4e2d6c..82360594fa8e 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -484,13 +484,6 @@ static void mn_invl_range_start(struct mmu_notifier *mn,
484 mutex_unlock(&priv->lock); 484 mutex_unlock(&priv->lock);
485} 485}
486 486
487static void mn_invl_page(struct mmu_notifier *mn,
488 struct mm_struct *mm,
489 unsigned long address)
490{
491 mn_invl_range_start(mn, mm, address, address + PAGE_SIZE);
492}
493
494static void mn_release(struct mmu_notifier *mn, 487static void mn_release(struct mmu_notifier *mn,
495 struct mm_struct *mm) 488 struct mm_struct *mm)
496{ 489{
@@ -522,7 +515,6 @@ static void mn_release(struct mmu_notifier *mn,
522 515
523static const struct mmu_notifier_ops gntdev_mmu_ops = { 516static const struct mmu_notifier_ops gntdev_mmu_ops = {
524 .release = mn_release, 517 .release = mn_release,
525 .invalidate_page = mn_invl_page,
526 .invalidate_range_start = mn_invl_range_start, 518 .invalidate_range_start = mn_invl_range_start,
527}; 519};
528 520
diff --git a/fs/dax.c b/fs/dax.c
index 865d42c63e23..ab925dc6647a 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -646,11 +646,10 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
646 pte_t pte, *ptep = NULL; 646 pte_t pte, *ptep = NULL;
647 pmd_t *pmdp = NULL; 647 pmd_t *pmdp = NULL;
648 spinlock_t *ptl; 648 spinlock_t *ptl;
649 bool changed;
650 649
651 i_mmap_lock_read(mapping); 650 i_mmap_lock_read(mapping);
652 vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) { 651 vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) {
653 unsigned long address; 652 unsigned long address, start, end;
654 653
655 cond_resched(); 654 cond_resched();
656 655
@@ -658,8 +657,13 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
658 continue; 657 continue;
659 658
660 address = pgoff_address(index, vma); 659 address = pgoff_address(index, vma);
661 changed = false; 660
662 if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl)) 661 /*
662 * Note because we provide start/end to follow_pte_pmd it will
663 * call mmu_notifier_invalidate_range_start() on our behalf
664 * before taking any lock.
665 */
666 if (follow_pte_pmd(vma->vm_mm, address, &start, &end, &ptep, &pmdp, &ptl))
663 continue; 667 continue;
664 668
665 if (pmdp) { 669 if (pmdp) {
@@ -676,7 +680,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
676 pmd = pmd_wrprotect(pmd); 680 pmd = pmd_wrprotect(pmd);
677 pmd = pmd_mkclean(pmd); 681 pmd = pmd_mkclean(pmd);
678 set_pmd_at(vma->vm_mm, address, pmdp, pmd); 682 set_pmd_at(vma->vm_mm, address, pmdp, pmd);
679 changed = true; 683 mmu_notifier_invalidate_range(vma->vm_mm, start, end);
680unlock_pmd: 684unlock_pmd:
681 spin_unlock(ptl); 685 spin_unlock(ptl);
682#endif 686#endif
@@ -691,13 +695,12 @@ unlock_pmd:
691 pte = pte_wrprotect(pte); 695 pte = pte_wrprotect(pte);
692 pte = pte_mkclean(pte); 696 pte = pte_mkclean(pte);
693 set_pte_at(vma->vm_mm, address, ptep, pte); 697 set_pte_at(vma->vm_mm, address, ptep, pte);
694 changed = true; 698 mmu_notifier_invalidate_range(vma->vm_mm, start, end);
695unlock_pte: 699unlock_pte:
696 pte_unmap_unlock(ptep, ptl); 700 pte_unmap_unlock(ptep, ptl);
697 } 701 }
698 702
699 if (changed) 703 mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
700 mmu_notifier_invalidate_page(vma->vm_mm, address);
701 } 704 }
702 i_mmap_unlock_read(mapping); 705 i_mmap_unlock_read(mapping);
703} 706}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 46b9ac5e8569..c1f6c95f3496 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1260,6 +1260,7 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
1260void unmap_mapping_range(struct address_space *mapping, 1260void unmap_mapping_range(struct address_space *mapping,
1261 loff_t const holebegin, loff_t const holelen, int even_cows); 1261 loff_t const holebegin, loff_t const holelen, int even_cows);
1262int follow_pte_pmd(struct mm_struct *mm, unsigned long address, 1262int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
1263 unsigned long *start, unsigned long *end,
1263 pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); 1264 pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
1264int follow_pfn(struct vm_area_struct *vma, unsigned long address, 1265int follow_pfn(struct vm_area_struct *vma, unsigned long address,
1265 unsigned long *pfn); 1266 unsigned long *pfn);
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index c91b3bcd158f..7b2e31b1745a 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -95,17 +95,6 @@ struct mmu_notifier_ops {
95 pte_t pte); 95 pte_t pte);
96 96
97 /* 97 /*
98 * Before this is invoked any secondary MMU is still ok to
99 * read/write to the page previously pointed to by the Linux
100 * pte because the page hasn't been freed yet and it won't be
101 * freed until this returns. If required set_page_dirty has to
102 * be called internally to this method.
103 */
104 void (*invalidate_page)(struct mmu_notifier *mn,
105 struct mm_struct *mm,
106 unsigned long address);
107
108 /*
109 * invalidate_range_start() and invalidate_range_end() must be 98 * invalidate_range_start() and invalidate_range_end() must be
110 * paired and are called only when the mmap_sem and/or the 99 * paired and are called only when the mmap_sem and/or the
111 * locks protecting the reverse maps are held. If the subsystem 100 * locks protecting the reverse maps are held. If the subsystem
@@ -220,8 +209,6 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm,
220 unsigned long address); 209 unsigned long address);
221extern void __mmu_notifier_change_pte(struct mm_struct *mm, 210extern void __mmu_notifier_change_pte(struct mm_struct *mm,
222 unsigned long address, pte_t pte); 211 unsigned long address, pte_t pte);
223extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
224 unsigned long address);
225extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, 212extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
226 unsigned long start, unsigned long end); 213 unsigned long start, unsigned long end);
227extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, 214extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
@@ -268,13 +255,6 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm,
268 __mmu_notifier_change_pte(mm, address, pte); 255 __mmu_notifier_change_pte(mm, address, pte);
269} 256}
270 257
271static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
272 unsigned long address)
273{
274 if (mm_has_notifiers(mm))
275 __mmu_notifier_invalidate_page(mm, address);
276}
277
278static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, 258static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
279 unsigned long start, unsigned long end) 259 unsigned long start, unsigned long end)
280{ 260{
@@ -442,11 +422,6 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm,
442{ 422{
443} 423}
444 424
445static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
446 unsigned long address)
447{
448}
449
450static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, 425static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
451 unsigned long start, unsigned long end) 426 unsigned long start, unsigned long end)
452{ 427{
diff --git a/mm/memory.c b/mm/memory.c
index fe2fba27ded2..56e48e4593cb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4008,7 +4008,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
4008#endif /* __PAGETABLE_PMD_FOLDED */ 4008#endif /* __PAGETABLE_PMD_FOLDED */
4009 4009
4010static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, 4010static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
4011 pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) 4011 unsigned long *start, unsigned long *end,
4012 pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
4012{ 4013{
4013 pgd_t *pgd; 4014 pgd_t *pgd;
4014 p4d_t *p4d; 4015 p4d_t *p4d;
@@ -4035,17 +4036,29 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
4035 if (!pmdpp) 4036 if (!pmdpp)
4036 goto out; 4037 goto out;
4037 4038
4039 if (start && end) {
4040 *start = address & PMD_MASK;
4041 *end = *start + PMD_SIZE;
4042 mmu_notifier_invalidate_range_start(mm, *start, *end);
4043 }
4038 *ptlp = pmd_lock(mm, pmd); 4044 *ptlp = pmd_lock(mm, pmd);
4039 if (pmd_huge(*pmd)) { 4045 if (pmd_huge(*pmd)) {
4040 *pmdpp = pmd; 4046 *pmdpp = pmd;
4041 return 0; 4047 return 0;
4042 } 4048 }
4043 spin_unlock(*ptlp); 4049 spin_unlock(*ptlp);
4050 if (start && end)
4051 mmu_notifier_invalidate_range_end(mm, *start, *end);
4044 } 4052 }
4045 4053
4046 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) 4054 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
4047 goto out; 4055 goto out;
4048 4056
4057 if (start && end) {
4058 *start = address & PAGE_MASK;
4059 *end = *start + PAGE_SIZE;
4060 mmu_notifier_invalidate_range_start(mm, *start, *end);
4061 }
4049 ptep = pte_offset_map_lock(mm, pmd, address, ptlp); 4062 ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
4050 if (!pte_present(*ptep)) 4063 if (!pte_present(*ptep))
4051 goto unlock; 4064 goto unlock;
@@ -4053,6 +4066,8 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
4053 return 0; 4066 return 0;
4054unlock: 4067unlock:
4055 pte_unmap_unlock(ptep, *ptlp); 4068 pte_unmap_unlock(ptep, *ptlp);
4069 if (start && end)
4070 mmu_notifier_invalidate_range_end(mm, *start, *end);
4056out: 4071out:
4057 return -EINVAL; 4072 return -EINVAL;
4058} 4073}
@@ -4064,20 +4079,21 @@ static inline int follow_pte(struct mm_struct *mm, unsigned long address,
4064 4079
4065 /* (void) is needed to make gcc happy */ 4080 /* (void) is needed to make gcc happy */
4066 (void) __cond_lock(*ptlp, 4081 (void) __cond_lock(*ptlp,
4067 !(res = __follow_pte_pmd(mm, address, ptepp, NULL, 4082 !(res = __follow_pte_pmd(mm, address, NULL, NULL,
4068 ptlp))); 4083 ptepp, NULL, ptlp)));
4069 return res; 4084 return res;
4070} 4085}
4071 4086
4072int follow_pte_pmd(struct mm_struct *mm, unsigned long address, 4087int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
4088 unsigned long *start, unsigned long *end,
4073 pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) 4089 pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
4074{ 4090{
4075 int res; 4091 int res;
4076 4092
4077 /* (void) is needed to make gcc happy */ 4093 /* (void) is needed to make gcc happy */
4078 (void) __cond_lock(*ptlp, 4094 (void) __cond_lock(*ptlp,
4079 !(res = __follow_pte_pmd(mm, address, ptepp, pmdpp, 4095 !(res = __follow_pte_pmd(mm, address, start, end,
4080 ptlp))); 4096 ptepp, pmdpp, ptlp)));
4081 return res; 4097 return res;
4082} 4098}
4083EXPORT_SYMBOL(follow_pte_pmd); 4099EXPORT_SYMBOL(follow_pte_pmd);
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 54ca54562928..314285284e6e 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -174,20 +174,6 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
174 srcu_read_unlock(&srcu, id); 174 srcu_read_unlock(&srcu, id);
175} 175}
176 176
177void __mmu_notifier_invalidate_page(struct mm_struct *mm,
178 unsigned long address)
179{
180 struct mmu_notifier *mn;
181 int id;
182
183 id = srcu_read_lock(&srcu);
184 hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
185 if (mn->ops->invalidate_page)
186 mn->ops->invalidate_page(mn, mm, address);
187 }
188 srcu_read_unlock(&srcu, id);
189}
190
191void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, 177void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
192 unsigned long start, unsigned long end) 178 unsigned long start, unsigned long end)
193{ 179{
diff --git a/mm/rmap.c b/mm/rmap.c
index c8993c63eb25..c570f82e6827 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -887,11 +887,21 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
887 .address = address, 887 .address = address,
888 .flags = PVMW_SYNC, 888 .flags = PVMW_SYNC,
889 }; 889 };
890 unsigned long start = address, end;
890 int *cleaned = arg; 891 int *cleaned = arg;
891 892
893 /*
894 * We have to assume the worse case ie pmd for invalidation. Note that
895 * the page can not be free from this function.
896 */
897 end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
898 mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
899
892 while (page_vma_mapped_walk(&pvmw)) { 900 while (page_vma_mapped_walk(&pvmw)) {
901 unsigned long cstart, cend;
893 int ret = 0; 902 int ret = 0;
894 address = pvmw.address; 903
904 cstart = address = pvmw.address;
895 if (pvmw.pte) { 905 if (pvmw.pte) {
896 pte_t entry; 906 pte_t entry;
897 pte_t *pte = pvmw.pte; 907 pte_t *pte = pvmw.pte;
@@ -904,6 +914,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
904 entry = pte_wrprotect(entry); 914 entry = pte_wrprotect(entry);
905 entry = pte_mkclean(entry); 915 entry = pte_mkclean(entry);
906 set_pte_at(vma->vm_mm, address, pte, entry); 916 set_pte_at(vma->vm_mm, address, pte, entry);
917 cend = cstart + PAGE_SIZE;
907 ret = 1; 918 ret = 1;
908 } else { 919 } else {
909#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE 920#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
@@ -918,6 +929,8 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
918 entry = pmd_wrprotect(entry); 929 entry = pmd_wrprotect(entry);
919 entry = pmd_mkclean(entry); 930 entry = pmd_mkclean(entry);
920 set_pmd_at(vma->vm_mm, address, pmd, entry); 931 set_pmd_at(vma->vm_mm, address, pmd, entry);
932 cstart &= PMD_MASK;
933 cend = cstart + PMD_SIZE;
921 ret = 1; 934 ret = 1;
922#else 935#else
923 /* unexpected pmd-mapped page? */ 936 /* unexpected pmd-mapped page? */
@@ -926,11 +939,13 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
926 } 939 }
927 940
928 if (ret) { 941 if (ret) {
929 mmu_notifier_invalidate_page(vma->vm_mm, address); 942 mmu_notifier_invalidate_range(vma->vm_mm, cstart, cend);
930 (*cleaned)++; 943 (*cleaned)++;
931 } 944 }
932 } 945 }
933 946
947 mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
948
934 return true; 949 return true;
935} 950}
936 951
@@ -1324,6 +1339,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1324 pte_t pteval; 1339 pte_t pteval;
1325 struct page *subpage; 1340 struct page *subpage;
1326 bool ret = true; 1341 bool ret = true;
1342 unsigned long start = address, end;
1327 enum ttu_flags flags = (enum ttu_flags)arg; 1343 enum ttu_flags flags = (enum ttu_flags)arg;
1328 1344
1329 /* munlock has nothing to gain from examining un-locked vmas */ 1345 /* munlock has nothing to gain from examining un-locked vmas */
@@ -1335,6 +1351,14 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1335 flags & TTU_MIGRATION, page); 1351 flags & TTU_MIGRATION, page);
1336 } 1352 }
1337 1353
1354 /*
1355 * We have to assume the worse case ie pmd for invalidation. Note that
1356 * the page can not be free in this function as call of try_to_unmap()
1357 * must hold a reference on the page.
1358 */
1359 end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
1360 mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
1361
1338 while (page_vma_mapped_walk(&pvmw)) { 1362 while (page_vma_mapped_walk(&pvmw)) {
1339 /* 1363 /*
1340 * If the page is mlock()d, we cannot swap it out. 1364 * If the page is mlock()d, we cannot swap it out.
@@ -1445,6 +1469,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1445 if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) { 1469 if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) {
1446 WARN_ON_ONCE(1); 1470 WARN_ON_ONCE(1);
1447 ret = false; 1471 ret = false;
1472 /* We have to invalidate as we cleared the pte */
1448 page_vma_mapped_walk_done(&pvmw); 1473 page_vma_mapped_walk_done(&pvmw);
1449 break; 1474 break;
1450 } 1475 }
@@ -1490,8 +1515,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1490discard: 1515discard:
1491 page_remove_rmap(subpage, PageHuge(page)); 1516 page_remove_rmap(subpage, PageHuge(page));
1492 put_page(page); 1517 put_page(page);
1493 mmu_notifier_invalidate_page(mm, address); 1518 mmu_notifier_invalidate_range(mm, address,
1519 address + PAGE_SIZE);
1494 } 1520 }
1521
1522 mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
1523
1495 return ret; 1524 return ret;
1496} 1525}
1497 1526
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 15252d723b54..4d81f6ded88e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -322,47 +322,6 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
322 return container_of(mn, struct kvm, mmu_notifier); 322 return container_of(mn, struct kvm, mmu_notifier);
323} 323}
324 324
325static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
326 struct mm_struct *mm,
327 unsigned long address)
328{
329 struct kvm *kvm = mmu_notifier_to_kvm(mn);
330 int need_tlb_flush, idx;
331
332 /*
333 * When ->invalidate_page runs, the linux pte has been zapped
334 * already but the page is still allocated until
335 * ->invalidate_page returns. So if we increase the sequence
336 * here the kvm page fault will notice if the spte can't be
337 * established because the page is going to be freed. If
338 * instead the kvm page fault establishes the spte before
339 * ->invalidate_page runs, kvm_unmap_hva will release it
340 * before returning.
341 *
342 * The sequence increase only need to be seen at spin_unlock
343 * time, and not at spin_lock time.
344 *
345 * Increasing the sequence after the spin_unlock would be
346 * unsafe because the kvm page fault could then establish the
347 * pte after kvm_unmap_hva returned, without noticing the page
348 * is going to be freed.
349 */
350 idx = srcu_read_lock(&kvm->srcu);
351 spin_lock(&kvm->mmu_lock);
352
353 kvm->mmu_notifier_seq++;
354 need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty;
355 /* we've to flush the tlb before the pages can be freed */
356 if (need_tlb_flush)
357 kvm_flush_remote_tlbs(kvm);
358
359 spin_unlock(&kvm->mmu_lock);
360
361 kvm_arch_mmu_notifier_invalidate_page(kvm, address);
362
363 srcu_read_unlock(&kvm->srcu, idx);
364}
365
366static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, 325static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
367 struct mm_struct *mm, 326 struct mm_struct *mm,
368 unsigned long address, 327 unsigned long address,
@@ -510,7 +469,6 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
510} 469}
511 470
512static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { 471static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
513 .invalidate_page = kvm_mmu_notifier_invalidate_page,
514 .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, 472 .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
515 .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, 473 .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
516 .clear_flush_young = kvm_mmu_notifier_clear_flush_young, 474 .clear_flush_young = kvm_mmu_notifier_clear_flush_young,