diff options
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 34 | ||||
-rw-r--r-- | arch/x86/mm/gup.c | 3 | ||||
-rw-r--r-- | include/linux/mmu_notifier.h | 26 | ||||
-rw-r--r-- | mm/huge_memory.c | 6 | ||||
-rw-r--r-- | mm/mmu_notifier.c | 20 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 17 |
7 files changed, 105 insertions, 2 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index aa75f21a9fba..ffd7f8d29187 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -822,6 +822,7 @@ extern bool kvm_rebooting; | |||
822 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 822 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
823 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 823 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
824 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); | 824 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); |
825 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); | ||
825 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | 826 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); |
826 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); | 827 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); |
827 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | 828 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 47b2c3288b6b..f02b8edc3d44 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -945,6 +945,35 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
945 | return young; | 945 | return young; |
946 | } | 946 | } |
947 | 947 | ||
948 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
949 | unsigned long data) | ||
950 | { | ||
951 | u64 *spte; | ||
952 | int young = 0; | ||
953 | |||
954 | /* | ||
955 | * If there's no access bit in the secondary pte set by the | ||
956 | * hardware it's up to gup-fast/gup to set the access bit in | ||
957 | * the primary pte or in the page structure. | ||
958 | */ | ||
959 | if (!shadow_accessed_mask) | ||
960 | goto out; | ||
961 | |||
962 | spte = rmap_next(kvm, rmapp, NULL); | ||
963 | while (spte) { | ||
964 | u64 _spte = *spte; | ||
965 | BUG_ON(!(_spte & PT_PRESENT_MASK)); | ||
966 | young = _spte & PT_ACCESSED_MASK; | ||
967 | if (young) { | ||
968 | young = 1; | ||
969 | break; | ||
970 | } | ||
971 | spte = rmap_next(kvm, rmapp, spte); | ||
972 | } | ||
973 | out: | ||
974 | return young; | ||
975 | } | ||
976 | |||
948 | #define RMAP_RECYCLE_THRESHOLD 1000 | 977 | #define RMAP_RECYCLE_THRESHOLD 1000 |
949 | 978 | ||
950 | static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | 979 | static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) |
@@ -965,6 +994,11 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva) | |||
965 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); | 994 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); |
966 | } | 995 | } |
967 | 996 | ||
997 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | ||
998 | { | ||
999 | return kvm_handle_hva(kvm, hva, 0, kvm_test_age_rmapp); | ||
1000 | } | ||
1001 | |||
968 | #ifdef MMU_DEBUG | 1002 | #ifdef MMU_DEBUG |
969 | static int is_empty_shadow_page(u64 *spt) | 1003 | static int is_empty_shadow_page(u64 *spt) |
970 | { | 1004 | { |
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 269aa53932e0..dbe34b931374 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
9 | #include <linux/vmstat.h> | 9 | #include <linux/vmstat.h> |
10 | #include <linux/highmem.h> | 10 | #include <linux/highmem.h> |
11 | #include <linux/swap.h> | ||
11 | 12 | ||
12 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
13 | 14 | ||
@@ -89,6 +90,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | |||
89 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | 90 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); |
90 | page = pte_page(pte); | 91 | page = pte_page(pte); |
91 | get_page(page); | 92 | get_page(page); |
93 | SetPageReferenced(page); | ||
92 | pages[*nr] = page; | 94 | pages[*nr] = page; |
93 | (*nr)++; | 95 | (*nr)++; |
94 | 96 | ||
@@ -103,6 +105,7 @@ static inline void get_head_page_multiple(struct page *page, int nr) | |||
103 | VM_BUG_ON(page != compound_head(page)); | 105 | VM_BUG_ON(page != compound_head(page)); |
104 | VM_BUG_ON(page_count(page) == 0); | 106 | VM_BUG_ON(page_count(page) == 0); |
105 | atomic_add(nr, &page->_count); | 107 | atomic_add(nr, &page->_count); |
108 | SetPageReferenced(page); | ||
106 | } | 109 | } |
107 | 110 | ||
108 | static inline void get_huge_page_tail(struct page *page) | 111 | static inline void get_huge_page_tail(struct page *page) |
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index cbfab1e9957d..cc2e7dfea9d7 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h | |||
@@ -62,6 +62,16 @@ struct mmu_notifier_ops { | |||
62 | unsigned long address); | 62 | unsigned long address); |
63 | 63 | ||
64 | /* | 64 | /* |
65 | * test_young is called to check the young/accessed bitflag in | ||
66 | * the secondary pte. This is used to know if the page is | ||
67 | * frequently used without actually clearing the flag or tearing | ||
68 | * down the secondary mapping on the page. | ||
69 | */ | ||
70 | int (*test_young)(struct mmu_notifier *mn, | ||
71 | struct mm_struct *mm, | ||
72 | unsigned long address); | ||
73 | |||
74 | /* | ||
65 | * change_pte is called in cases that pte mapping to page is changed: | 75 | * change_pte is called in cases that pte mapping to page is changed: |
66 | * for example, when ksm remaps pte to point to a new shared page. | 76 | * for example, when ksm remaps pte to point to a new shared page. |
67 | */ | 77 | */ |
@@ -163,6 +173,8 @@ extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); | |||
163 | extern void __mmu_notifier_release(struct mm_struct *mm); | 173 | extern void __mmu_notifier_release(struct mm_struct *mm); |
164 | extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, | 174 | extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, |
165 | unsigned long address); | 175 | unsigned long address); |
176 | extern int __mmu_notifier_test_young(struct mm_struct *mm, | ||
177 | unsigned long address); | ||
166 | extern void __mmu_notifier_change_pte(struct mm_struct *mm, | 178 | extern void __mmu_notifier_change_pte(struct mm_struct *mm, |
167 | unsigned long address, pte_t pte); | 179 | unsigned long address, pte_t pte); |
168 | extern void __mmu_notifier_invalidate_page(struct mm_struct *mm, | 180 | extern void __mmu_notifier_invalidate_page(struct mm_struct *mm, |
@@ -186,6 +198,14 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, | |||
186 | return 0; | 198 | return 0; |
187 | } | 199 | } |
188 | 200 | ||
201 | static inline int mmu_notifier_test_young(struct mm_struct *mm, | ||
202 | unsigned long address) | ||
203 | { | ||
204 | if (mm_has_notifiers(mm)) | ||
205 | return __mmu_notifier_test_young(mm, address); | ||
206 | return 0; | ||
207 | } | ||
208 | |||
189 | static inline void mmu_notifier_change_pte(struct mm_struct *mm, | 209 | static inline void mmu_notifier_change_pte(struct mm_struct *mm, |
190 | unsigned long address, pte_t pte) | 210 | unsigned long address, pte_t pte) |
191 | { | 211 | { |
@@ -313,6 +333,12 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, | |||
313 | return 0; | 333 | return 0; |
314 | } | 334 | } |
315 | 335 | ||
336 | static inline int mmu_notifier_test_young(struct mm_struct *mm, | ||
337 | unsigned long address) | ||
338 | { | ||
339 | return 0; | ||
340 | } | ||
341 | |||
316 | static inline void mmu_notifier_change_pte(struct mm_struct *mm, | 342 | static inline void mmu_notifier_change_pte(struct mm_struct *mm, |
317 | unsigned long address, pte_t pte) | 343 | unsigned long address, pte_t pte) |
318 | { | 344 | { |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 915809b16edf..39d7df40c067 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1632,7 +1632,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, | |||
1632 | VM_BUG_ON(PageLRU(page)); | 1632 | VM_BUG_ON(PageLRU(page)); |
1633 | 1633 | ||
1634 | /* If there is no mapped pte young don't collapse the page */ | 1634 | /* If there is no mapped pte young don't collapse the page */ |
1635 | if (pte_young(pteval)) | 1635 | if (pte_young(pteval) || PageReferenced(page) || |
1636 | mmu_notifier_test_young(vma->vm_mm, address)) | ||
1636 | referenced = 1; | 1637 | referenced = 1; |
1637 | } | 1638 | } |
1638 | if (unlikely(!referenced)) | 1639 | if (unlikely(!referenced)) |
@@ -1892,7 +1893,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
1892 | /* cannot use mapcount: can't collapse if there's a gup pin */ | 1893 | /* cannot use mapcount: can't collapse if there's a gup pin */ |
1893 | if (page_count(page) != 1) | 1894 | if (page_count(page) != 1) |
1894 | goto out_unmap; | 1895 | goto out_unmap; |
1895 | if (pte_young(pteval)) | 1896 | if (pte_young(pteval) || PageReferenced(page) || |
1897 | mmu_notifier_test_young(vma->vm_mm, address)) | ||
1896 | referenced = 1; | 1898 | referenced = 1; |
1897 | } | 1899 | } |
1898 | if (referenced) | 1900 | if (referenced) |
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 438951d366f2..8d032de4088e 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c | |||
@@ -100,6 +100,26 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm, | |||
100 | return young; | 100 | return young; |
101 | } | 101 | } |
102 | 102 | ||
103 | int __mmu_notifier_test_young(struct mm_struct *mm, | ||
104 | unsigned long address) | ||
105 | { | ||
106 | struct mmu_notifier *mn; | ||
107 | struct hlist_node *n; | ||
108 | int young = 0; | ||
109 | |||
110 | rcu_read_lock(); | ||
111 | hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { | ||
112 | if (mn->ops->test_young) { | ||
113 | young = mn->ops->test_young(mn, mm, address); | ||
114 | if (young) | ||
115 | break; | ||
116 | } | ||
117 | } | ||
118 | rcu_read_unlock(); | ||
119 | |||
120 | return young; | ||
121 | } | ||
122 | |||
103 | void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, | 123 | void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, |
104 | pte_t pte) | 124 | pte_t pte) |
105 | { | 125 | { |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 85ab7db0d366..4286d4766510 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -380,6 +380,22 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | |||
380 | return young; | 380 | return young; |
381 | } | 381 | } |
382 | 382 | ||
383 | static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn, | ||
384 | struct mm_struct *mm, | ||
385 | unsigned long address) | ||
386 | { | ||
387 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
388 | int young, idx; | ||
389 | |||
390 | idx = srcu_read_lock(&kvm->srcu); | ||
391 | spin_lock(&kvm->mmu_lock); | ||
392 | young = kvm_test_age_hva(kvm, address); | ||
393 | spin_unlock(&kvm->mmu_lock); | ||
394 | srcu_read_unlock(&kvm->srcu, idx); | ||
395 | |||
396 | return young; | ||
397 | } | ||
398 | |||
383 | static void kvm_mmu_notifier_release(struct mmu_notifier *mn, | 399 | static void kvm_mmu_notifier_release(struct mmu_notifier *mn, |
384 | struct mm_struct *mm) | 400 | struct mm_struct *mm) |
385 | { | 401 | { |
@@ -396,6 +412,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { | |||
396 | .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, | 412 | .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, |
397 | .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, | 413 | .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, |
398 | .clear_flush_young = kvm_mmu_notifier_clear_flush_young, | 414 | .clear_flush_young = kvm_mmu_notifier_clear_flush_young, |
415 | .test_young = kvm_mmu_notifier_test_young, | ||
399 | .change_pte = kvm_mmu_notifier_change_pte, | 416 | .change_pte = kvm_mmu_notifier_change_pte, |
400 | .release = kvm_mmu_notifier_release, | 417 | .release = kvm_mmu_notifier_release, |
401 | }; | 418 | }; |