aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/kvm/mmu.c34
-rw-r--r--arch/x86/mm/gup.c3
-rw-r--r--include/linux/mmu_notifier.h26
-rw-r--r--mm/huge_memory.c6
-rw-r--r--mm/mmu_notifier.c20
-rw-r--r--virt/kvm/kvm_main.c17
7 files changed, 105 insertions, 2 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index aa75f21a9fba..ffd7f8d29187 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -822,6 +822,7 @@ extern bool kvm_rebooting;
822#define KVM_ARCH_WANT_MMU_NOTIFIER 822#define KVM_ARCH_WANT_MMU_NOTIFIER
823int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 823int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
824int kvm_age_hva(struct kvm *kvm, unsigned long hva); 824int kvm_age_hva(struct kvm *kvm, unsigned long hva);
825int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
825void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); 826void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
826int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); 827int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
827int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); 828int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 47b2c3288b6b..f02b8edc3d44 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -945,6 +945,35 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
945 return young; 945 return young;
946} 946}
947 947
948static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
949 unsigned long data)
950{
951 u64 *spte;
952 int young = 0;
953
954 /*
955 * If there's no access bit in the secondary pte set by the
956 * hardware it's up to gup-fast/gup to set the access bit in
957 * the primary pte or in the page structure.
958 */
959 if (!shadow_accessed_mask)
960 goto out;
961
962 spte = rmap_next(kvm, rmapp, NULL);
963 while (spte) {
964 u64 _spte = *spte;
965 BUG_ON(!(_spte & PT_PRESENT_MASK));
966 young = _spte & PT_ACCESSED_MASK;
967 if (young) {
968 young = 1;
969 break;
970 }
971 spte = rmap_next(kvm, rmapp, spte);
972 }
973out:
974 return young;
975}
976
948#define RMAP_RECYCLE_THRESHOLD 1000 977#define RMAP_RECYCLE_THRESHOLD 1000
949 978
950static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) 979static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
@@ -965,6 +994,11 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva)
965 return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); 994 return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
966} 995}
967 996
997int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
998{
999 return kvm_handle_hva(kvm, hva, 0, kvm_test_age_rmapp);
1000}
1001
968#ifdef MMU_DEBUG 1002#ifdef MMU_DEBUG
969static int is_empty_shadow_page(u64 *spt) 1003static int is_empty_shadow_page(u64 *spt)
970{ 1004{
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 269aa53932e0..dbe34b931374 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -8,6 +8,7 @@
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/vmstat.h> 9#include <linux/vmstat.h>
10#include <linux/highmem.h> 10#include <linux/highmem.h>
11#include <linux/swap.h>
11 12
12#include <asm/pgtable.h> 13#include <asm/pgtable.h>
13 14
@@ -89,6 +90,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
89 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 90 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
90 page = pte_page(pte); 91 page = pte_page(pte);
91 get_page(page); 92 get_page(page);
93 SetPageReferenced(page);
92 pages[*nr] = page; 94 pages[*nr] = page;
93 (*nr)++; 95 (*nr)++;
94 96
@@ -103,6 +105,7 @@ static inline void get_head_page_multiple(struct page *page, int nr)
103 VM_BUG_ON(page != compound_head(page)); 105 VM_BUG_ON(page != compound_head(page));
104 VM_BUG_ON(page_count(page) == 0); 106 VM_BUG_ON(page_count(page) == 0);
105 atomic_add(nr, &page->_count); 107 atomic_add(nr, &page->_count);
108 SetPageReferenced(page);
106} 109}
107 110
108static inline void get_huge_page_tail(struct page *page) 111static inline void get_huge_page_tail(struct page *page)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index cbfab1e9957d..cc2e7dfea9d7 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -62,6 +62,16 @@ struct mmu_notifier_ops {
62 unsigned long address); 62 unsigned long address);
63 63
64 /* 64 /*
65 * test_young is called to check the young/accessed bitflag in
66 * the secondary pte. This is used to know if the page is
67 * frequently used without actually clearing the flag or tearing
68 * down the secondary mapping on the page.
69 */
70 int (*test_young)(struct mmu_notifier *mn,
71 struct mm_struct *mm,
72 unsigned long address);
73
74 /*
65 * change_pte is called in cases that pte mapping to page is changed: 75 * change_pte is called in cases that pte mapping to page is changed:
66 * for example, when ksm remaps pte to point to a new shared page. 76 * for example, when ksm remaps pte to point to a new shared page.
67 */ 77 */
@@ -163,6 +173,8 @@ extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
163extern void __mmu_notifier_release(struct mm_struct *mm); 173extern void __mmu_notifier_release(struct mm_struct *mm);
164extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, 174extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
165 unsigned long address); 175 unsigned long address);
176extern int __mmu_notifier_test_young(struct mm_struct *mm,
177 unsigned long address);
166extern void __mmu_notifier_change_pte(struct mm_struct *mm, 178extern void __mmu_notifier_change_pte(struct mm_struct *mm,
167 unsigned long address, pte_t pte); 179 unsigned long address, pte_t pte);
168extern void __mmu_notifier_invalidate_page(struct mm_struct *mm, 180extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
@@ -186,6 +198,14 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
186 return 0; 198 return 0;
187} 199}
188 200
201static inline int mmu_notifier_test_young(struct mm_struct *mm,
202 unsigned long address)
203{
204 if (mm_has_notifiers(mm))
205 return __mmu_notifier_test_young(mm, address);
206 return 0;
207}
208
189static inline void mmu_notifier_change_pte(struct mm_struct *mm, 209static inline void mmu_notifier_change_pte(struct mm_struct *mm,
190 unsigned long address, pte_t pte) 210 unsigned long address, pte_t pte)
191{ 211{
@@ -313,6 +333,12 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
313 return 0; 333 return 0;
314} 334}
315 335
336static inline int mmu_notifier_test_young(struct mm_struct *mm,
337 unsigned long address)
338{
339 return 0;
340}
341
316static inline void mmu_notifier_change_pte(struct mm_struct *mm, 342static inline void mmu_notifier_change_pte(struct mm_struct *mm,
317 unsigned long address, pte_t pte) 343 unsigned long address, pte_t pte)
318{ 344{
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 915809b16edf..39d7df40c067 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1632,7 +1632,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
1632 VM_BUG_ON(PageLRU(page)); 1632 VM_BUG_ON(PageLRU(page));
1633 1633
1634 /* If there is no mapped pte young don't collapse the page */ 1634 /* If there is no mapped pte young don't collapse the page */
1635 if (pte_young(pteval)) 1635 if (pte_young(pteval) || PageReferenced(page) ||
1636 mmu_notifier_test_young(vma->vm_mm, address))
1636 referenced = 1; 1637 referenced = 1;
1637 } 1638 }
1638 if (unlikely(!referenced)) 1639 if (unlikely(!referenced))
@@ -1892,7 +1893,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
1892 /* cannot use mapcount: can't collapse if there's a gup pin */ 1893 /* cannot use mapcount: can't collapse if there's a gup pin */
1893 if (page_count(page) != 1) 1894 if (page_count(page) != 1)
1894 goto out_unmap; 1895 goto out_unmap;
1895 if (pte_young(pteval)) 1896 if (pte_young(pteval) || PageReferenced(page) ||
1897 mmu_notifier_test_young(vma->vm_mm, address))
1896 referenced = 1; 1898 referenced = 1;
1897 } 1899 }
1898 if (referenced) 1900 if (referenced)
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 438951d366f2..8d032de4088e 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -100,6 +100,26 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
100 return young; 100 return young;
101} 101}
102 102
103int __mmu_notifier_test_young(struct mm_struct *mm,
104 unsigned long address)
105{
106 struct mmu_notifier *mn;
107 struct hlist_node *n;
108 int young = 0;
109
110 rcu_read_lock();
111 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
112 if (mn->ops->test_young) {
113 young = mn->ops->test_young(mn, mm, address);
114 if (young)
115 break;
116 }
117 }
118 rcu_read_unlock();
119
120 return young;
121}
122
103void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, 123void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
104 pte_t pte) 124 pte_t pte)
105{ 125{
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 85ab7db0d366..4286d4766510 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -380,6 +380,22 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
380 return young; 380 return young;
381} 381}
382 382
383static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
384 struct mm_struct *mm,
385 unsigned long address)
386{
387 struct kvm *kvm = mmu_notifier_to_kvm(mn);
388 int young, idx;
389
390 idx = srcu_read_lock(&kvm->srcu);
391 spin_lock(&kvm->mmu_lock);
392 young = kvm_test_age_hva(kvm, address);
393 spin_unlock(&kvm->mmu_lock);
394 srcu_read_unlock(&kvm->srcu, idx);
395
396 return young;
397}
398
383static void kvm_mmu_notifier_release(struct mmu_notifier *mn, 399static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
384 struct mm_struct *mm) 400 struct mm_struct *mm)
385{ 401{
@@ -396,6 +412,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
396 .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, 412 .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
397 .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, 413 .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
398 .clear_flush_young = kvm_mmu_notifier_clear_flush_young, 414 .clear_flush_young = kvm_mmu_notifier_clear_flush_young,
415 .test_young = kvm_mmu_notifier_test_young,
399 .change_pte = kvm_mmu_notifier_change_pte, 416 .change_pte = kvm_mmu_notifier_change_pte,
400 .release = kvm_mmu_notifier_release, 417 .release = kvm_mmu_notifier_release,
401}; 418};