aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2011-01-13 18:47:10 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:32:46 -0500
commit8ee53820edfd1f3b6554c593f337148dd3d7fc91 (patch)
treeca69957e928cd3efa1b47f92dcfb00591702684c /mm
parent4b7167b9ff9b7f3f528cbc4c7d02ebd275b9b10c (diff)
thp: mmu_notifier_test_young
For GRU and EPT, we need gup-fast to set referenced bit too (this is why it's correct to return 0 when shadow_access_mask is zero, it requires gup-fast to set the referenced bit). qemu-kvm access already sets the young bit in the pte if it isn't zero-copy, if it's zero copy or a shadow paging EPT minor fault we relay on gup-fast to signal the page is in use... We also need to check the young bits on the secondary pagetables for NPT and not nested shadow mmu as the data may never get accessed again by the primary pte. Without this closer accuracy, we'd have to remove the heuristic that avoids collapsing hugepages in hugepage virtual regions that have not even a single subpage in use. ->test_young is full backwards compatible with GRU and other usages that don't have young bits in pagetables set by the hardware and that should nuke the secondary mmu mappings when ->clear_flush_young runs just like EPT does. Removing the heuristic that checks the young bit in khugepaged/collapse_huge_page completely isn't so bad either probably but I thought it was worth it and this makes it reliable. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c6
-rw-r--r--mm/mmu_notifier.c20
2 files changed, 24 insertions, 2 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 915809b16edf..39d7df40c067 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1632,7 +1632,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
1632 VM_BUG_ON(PageLRU(page)); 1632 VM_BUG_ON(PageLRU(page));
1633 1633
1634 /* If there is no mapped pte young don't collapse the page */ 1634 /* If there is no mapped pte young don't collapse the page */
1635 if (pte_young(pteval)) 1635 if (pte_young(pteval) || PageReferenced(page) ||
1636 mmu_notifier_test_young(vma->vm_mm, address))
1636 referenced = 1; 1637 referenced = 1;
1637 } 1638 }
1638 if (unlikely(!referenced)) 1639 if (unlikely(!referenced))
@@ -1892,7 +1893,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
1892 /* cannot use mapcount: can't collapse if there's a gup pin */ 1893 /* cannot use mapcount: can't collapse if there's a gup pin */
1893 if (page_count(page) != 1) 1894 if (page_count(page) != 1)
1894 goto out_unmap; 1895 goto out_unmap;
1895 if (pte_young(pteval)) 1896 if (pte_young(pteval) || PageReferenced(page) ||
1897 mmu_notifier_test_young(vma->vm_mm, address))
1896 referenced = 1; 1898 referenced = 1;
1897 } 1899 }
1898 if (referenced) 1900 if (referenced)
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 438951d366f2..8d032de4088e 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -100,6 +100,26 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
100 return young; 100 return young;
101} 101}
102 102
103int __mmu_notifier_test_young(struct mm_struct *mm,
104 unsigned long address)
105{
106 struct mmu_notifier *mn;
107 struct hlist_node *n;
108 int young = 0;
109
110 rcu_read_lock();
111 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
112 if (mn->ops->test_young) {
113 young = mn->ops->test_young(mn, mm, address);
114 if (young)
115 break;
116 }
117 }
118 rcu_read_unlock();
119
120 return young;
121}
122
103void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, 123void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
104 pte_t pte) 124 pte_t pte)
105{ 125{