summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2017-02-24 17:57:45 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-24 20:46:55 -0500
commitace71a19cec5eb430207c3269d8a2683f0574306 (patch)
treea4008d66fc253ba7a0b15c80f9df6306aa409ec3 /mm
parentc8394812e56fbc334d815226268cea69b447d461 (diff)
mm: introduce page_vma_mapped_walk()
Introduce a new interface to check if a page is mapped into a vma. It aims to address shortcomings of page_check_address{,_transhuge}. Existing interface is not able to handle PTE-mapped THPs: it only finds the first PTE. The rest lefted unnoticed. page_vma_mapped_walk() iterates over all possible mapping of the page in the vma. Link: http://lkml.kernel.org/r/20170129173858.45174-3-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rik van Riel <riel@redhat.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile6
-rw-r--r--mm/huge_memory.c9
-rw-r--r--mm/page_vma_mapped.c188
3 files changed, 198 insertions, 5 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 433eaf9a876e..aa0aa17cb413 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -23,8 +23,10 @@ KCOV_INSTRUMENT_vmstat.o := n
23 23
24mmu-y := nommu.o 24mmu-y := nommu.o
25mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \ 25mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \
26 mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ 26 mlock.o mmap.o mprotect.o mremap.o msync.o \
27 vmalloc.o pagewalk.o pgtable-generic.o 27 page_vma_mapped.o pagewalk.o pgtable-generic.o \
28 rmap.o vmalloc.o
29
28 30
29ifdef CONFIG_CROSS_MEMORY_ATTACH 31ifdef CONFIG_CROSS_MEMORY_ATTACH
30mmu-$(CONFIG_MMU) += process_vm_access.o 32mmu-$(CONFIG_MMU) += process_vm_access.o
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 85742ac5b32e..a7bac4f2b78a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2129,9 +2129,12 @@ static void freeze_page(struct page *page)
2129static void unfreeze_page(struct page *page) 2129static void unfreeze_page(struct page *page)
2130{ 2130{
2131 int i; 2131 int i;
2132 2132 if (PageTransHuge(page)) {
2133 for (i = 0; i < HPAGE_PMD_NR; i++) 2133 remove_migration_ptes(page, page, true);
2134 remove_migration_ptes(page + i, page + i, true); 2134 } else {
2135 for (i = 0; i < HPAGE_PMD_NR; i++)
2136 remove_migration_ptes(page + i, page + i, true);
2137 }
2135} 2138}
2136 2139
2137static void __split_huge_page_tail(struct page *head, int tail, 2140static void __split_huge_page_tail(struct page *head, int tail,
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
new file mode 100644
index 000000000000..dc1a54826cf2
--- /dev/null
+++ b/mm/page_vma_mapped.c
@@ -0,0 +1,188 @@
1#include <linux/mm.h>
2#include <linux/rmap.h>
3#include <linux/hugetlb.h>
4#include <linux/swap.h>
5#include <linux/swapops.h>
6
7#include "internal.h"
8
9static inline bool check_pmd(struct page_vma_mapped_walk *pvmw)
10{
11 pmd_t pmde;
12 /*
13 * Make sure we don't re-load pmd between present and !trans_huge check.
14 * We need a consistent view.
15 */
16 pmde = READ_ONCE(*pvmw->pmd);
17 return pmd_present(pmde) && !pmd_trans_huge(pmde);
18}
19
20static inline bool not_found(struct page_vma_mapped_walk *pvmw)
21{
22 page_vma_mapped_walk_done(pvmw);
23 return false;
24}
25
26static bool map_pte(struct page_vma_mapped_walk *pvmw)
27{
28 pvmw->pte = pte_offset_map(pvmw->pmd, pvmw->address);
29 if (!(pvmw->flags & PVMW_SYNC)) {
30 if (pvmw->flags & PVMW_MIGRATION) {
31 if (!is_swap_pte(*pvmw->pte))
32 return false;
33 } else {
34 if (!pte_present(*pvmw->pte))
35 return false;
36 }
37 }
38 pvmw->ptl = pte_lockptr(pvmw->vma->vm_mm, pvmw->pmd);
39 spin_lock(pvmw->ptl);
40 return true;
41}
42
43static bool check_pte(struct page_vma_mapped_walk *pvmw)
44{
45 if (pvmw->flags & PVMW_MIGRATION) {
46#ifdef CONFIG_MIGRATION
47 swp_entry_t entry;
48 if (!is_swap_pte(*pvmw->pte))
49 return false;
50 entry = pte_to_swp_entry(*pvmw->pte);
51 if (!is_migration_entry(entry))
52 return false;
53 if (migration_entry_to_page(entry) - pvmw->page >=
54 hpage_nr_pages(pvmw->page)) {
55 return false;
56 }
57 if (migration_entry_to_page(entry) < pvmw->page)
58 return false;
59#else
60 WARN_ON_ONCE(1);
61#endif
62 } else {
63 if (!pte_present(*pvmw->pte))
64 return false;
65
66 /* THP can be referenced by any subpage */
67 if (pte_page(*pvmw->pte) - pvmw->page >=
68 hpage_nr_pages(pvmw->page)) {
69 return false;
70 }
71 if (pte_page(*pvmw->pte) < pvmw->page)
72 return false;
73 }
74
75 return true;
76}
77
78/**
79 * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
80 * @pvmw->address
81 * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags
82 * must be set. pmd, pte and ptl must be NULL.
83 *
84 * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point
85 * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is
86 * adjusted if needed (for PTE-mapped THPs).
87 *
88 * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page
89 * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in
90 * a loop to find all PTEs that map the THP.
91 *
92 * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry
93 * regardless of which page table level the page is mapped at. @pvmw->pmd is
94 * NULL.
95 *
96 * Retruns false if there are no more page table entries for the page in
97 * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped.
98 *
99 * If you need to stop the walk before page_vma_mapped_walk() returned false,
100 * use page_vma_mapped_walk_done(). It will do the housekeeping.
101 */
102bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
103{
104 struct mm_struct *mm = pvmw->vma->vm_mm;
105 struct page *page = pvmw->page;
106 pgd_t *pgd;
107 pud_t *pud;
108
109 /* The only possible pmd mapping has been handled on last iteration */
110 if (pvmw->pmd && !pvmw->pte)
111 return not_found(pvmw);
112
113 /* Only for THP, seek to next pte entry makes sense */
114 if (pvmw->pte) {
115 if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
116 return not_found(pvmw);
117 goto next_pte;
118 }
119
120 if (unlikely(PageHuge(pvmw->page))) {
121 /* when pud is not present, pte will be NULL */
122 pvmw->pte = huge_pte_offset(mm, pvmw->address);
123 if (!pvmw->pte)
124 return false;
125
126 pvmw->ptl = huge_pte_lockptr(page_hstate(page), mm, pvmw->pte);
127 spin_lock(pvmw->ptl);
128 if (!check_pte(pvmw))
129 return not_found(pvmw);
130 return true;
131 }
132restart:
133 pgd = pgd_offset(mm, pvmw->address);
134 if (!pgd_present(*pgd))
135 return false;
136 pud = pud_offset(pgd, pvmw->address);
137 if (!pud_present(*pud))
138 return false;
139 pvmw->pmd = pmd_offset(pud, pvmw->address);
140 if (pmd_trans_huge(*pvmw->pmd)) {
141 pvmw->ptl = pmd_lock(mm, pvmw->pmd);
142 if (!pmd_present(*pvmw->pmd))
143 return not_found(pvmw);
144 if (likely(pmd_trans_huge(*pvmw->pmd))) {
145 if (pvmw->flags & PVMW_MIGRATION)
146 return not_found(pvmw);
147 if (pmd_page(*pvmw->pmd) != page)
148 return not_found(pvmw);
149 return true;
150 } else {
151 /* THP pmd was split under us: handle on pte level */
152 spin_unlock(pvmw->ptl);
153 pvmw->ptl = NULL;
154 }
155 } else {
156 if (!check_pmd(pvmw))
157 return false;
158 }
159 if (!map_pte(pvmw))
160 goto next_pte;
161 while (1) {
162 if (check_pte(pvmw))
163 return true;
164next_pte: do {
165 pvmw->address += PAGE_SIZE;
166 if (pvmw->address >=
167 __vma_address(pvmw->page, pvmw->vma) +
168 hpage_nr_pages(pvmw->page) * PAGE_SIZE)
169 return not_found(pvmw);
170 /* Did we cross page table boundary? */
171 if (pvmw->address % PMD_SIZE == 0) {
172 pte_unmap(pvmw->pte);
173 if (pvmw->ptl) {
174 spin_unlock(pvmw->ptl);
175 pvmw->ptl = NULL;
176 }
177 goto restart;
178 } else {
179 pvmw->pte++;
180 }
181 } while (pte_none(*pvmw->pte));
182
183 if (!pvmw->ptl) {
184 pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
185 spin_lock(pvmw->ptl);
186 }
187 }
188}