aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2011-01-13 18:47:10 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:32:46 -0500
commit8ee53820edfd1f3b6554c593f337148dd3d7fc91 (patch)
treeca69957e928cd3efa1b47f92dcfb00591702684c /arch
parent4b7167b9ff9b7f3f528cbc4c7d02ebd275b9b10c (diff)
thp: mmu_notifier_test_young
For GRU and EPT, we need gup-fast to set referenced bit too (this is why it's correct to return 0 when shadow_access_mask is zero, it requires gup-fast to set the referenced bit). qemu-kvm access already sets the young bit in the pte if it isn't zero-copy, if it's zero copy or a shadow paging EPT minor fault we relay on gup-fast to signal the page is in use... We also need to check the young bits on the secondary pagetables for NPT and not nested shadow mmu as the data may never get accessed again by the primary pte. Without this closer accuracy, we'd have to remove the heuristic that avoids collapsing hugepages in hugepage virtual regions that have not even a single subpage in use. ->test_young is full backwards compatible with GRU and other usages that don't have young bits in pagetables set by the hardware and that should nuke the secondary mmu mappings when ->clear_flush_young runs just like EPT does. Removing the heuristic that checks the young bit in khugepaged/collapse_huge_page completely isn't so bad either probably but I thought it was worth it and this makes it reliable. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/kvm/mmu.c34
-rw-r--r--arch/x86/mm/gup.c3
3 files changed, 38 insertions, 0 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index aa75f21a9fba..ffd7f8d29187 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -822,6 +822,7 @@ extern bool kvm_rebooting;
822#define KVM_ARCH_WANT_MMU_NOTIFIER 822#define KVM_ARCH_WANT_MMU_NOTIFIER
823int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 823int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
824int kvm_age_hva(struct kvm *kvm, unsigned long hva); 824int kvm_age_hva(struct kvm *kvm, unsigned long hva);
825int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
825void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); 826void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
826int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); 827int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
827int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); 828int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 47b2c3288b6b..f02b8edc3d44 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -945,6 +945,35 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
945 return young; 945 return young;
946} 946}
947 947
948static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
949 unsigned long data)
950{
951 u64 *spte;
952 int young = 0;
953
954 /*
955 * If there's no access bit in the secondary pte set by the
956 * hardware it's up to gup-fast/gup to set the access bit in
957 * the primary pte or in the page structure.
958 */
959 if (!shadow_accessed_mask)
960 goto out;
961
962 spte = rmap_next(kvm, rmapp, NULL);
963 while (spte) {
964 u64 _spte = *spte;
965 BUG_ON(!(_spte & PT_PRESENT_MASK));
966 young = _spte & PT_ACCESSED_MASK;
967 if (young) {
968 young = 1;
969 break;
970 }
971 spte = rmap_next(kvm, rmapp, spte);
972 }
973out:
974 return young;
975}
976
948#define RMAP_RECYCLE_THRESHOLD 1000 977#define RMAP_RECYCLE_THRESHOLD 1000
949 978
950static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) 979static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
@@ -965,6 +994,11 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva)
965 return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); 994 return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
966} 995}
967 996
997int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
998{
999 return kvm_handle_hva(kvm, hva, 0, kvm_test_age_rmapp);
1000}
1001
968#ifdef MMU_DEBUG 1002#ifdef MMU_DEBUG
969static int is_empty_shadow_page(u64 *spt) 1003static int is_empty_shadow_page(u64 *spt)
970{ 1004{
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 269aa53932e0..dbe34b931374 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -8,6 +8,7 @@
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/vmstat.h> 9#include <linux/vmstat.h>
10#include <linux/highmem.h> 10#include <linux/highmem.h>
11#include <linux/swap.h>
11 12
12#include <asm/pgtable.h> 13#include <asm/pgtable.h>
13 14
@@ -89,6 +90,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
89 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 90 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
90 page = pte_page(pte); 91 page = pte_page(pte);
91 get_page(page); 92 get_page(page);
93 SetPageReferenced(page);
92 pages[*nr] = page; 94 pages[*nr] = page;
93 (*nr)++; 95 (*nr)++;
94 96
@@ -103,6 +105,7 @@ static inline void get_head_page_multiple(struct page *page, int nr)
103 VM_BUG_ON(page != compound_head(page)); 105 VM_BUG_ON(page != compound_head(page));
104 VM_BUG_ON(page_count(page) == 0); 106 VM_BUG_ON(page_count(page) == 0);
105 atomic_add(nr, &page->_count); 107 atomic_add(nr, &page->_count);
108 SetPageReferenced(page);
106} 109}
107 110
108static inline void get_huge_page_tail(struct page *page) 111static inline void get_huge_page_tail(struct page *page)