diff options
author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2017-02-24 17:57:45 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-24 20:46:55 -0500 |
commit | ace71a19cec5eb430207c3269d8a2683f0574306 (patch) | |
tree | a4008d66fc253ba7a0b15c80f9df6306aa409ec3 /mm | |
parent | c8394812e56fbc334d815226268cea69b447d461 (diff) |
mm: introduce page_vma_mapped_walk()
Introduce a new interface to check if a page is mapped into a vma. It
aims to address shortcomings of page_check_address{,_transhuge}.
Existing interface is not able to handle PTE-mapped THPs: it only finds
the first PTE. The rest lefted unnoticed.
page_vma_mapped_walk() iterates over all possible mapping of the page in
the vma.
Link: http://lkml.kernel.org/r/20170129173858.45174-3-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Makefile | 6 | ||||
-rw-r--r-- | mm/huge_memory.c | 9 | ||||
-rw-r--r-- | mm/page_vma_mapped.c | 188 |
3 files changed, 198 insertions, 5 deletions
diff --git a/mm/Makefile b/mm/Makefile index 433eaf9a876e..aa0aa17cb413 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -23,8 +23,10 @@ KCOV_INSTRUMENT_vmstat.o := n | |||
23 | 23 | ||
24 | mmu-y := nommu.o | 24 | mmu-y := nommu.o |
25 | mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \ | 25 | mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \ |
26 | mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ | 26 | mlock.o mmap.o mprotect.o mremap.o msync.o \ |
27 | vmalloc.o pagewalk.o pgtable-generic.o | 27 | page_vma_mapped.o pagewalk.o pgtable-generic.o \ |
28 | rmap.o vmalloc.o | ||
29 | |||
28 | 30 | ||
29 | ifdef CONFIG_CROSS_MEMORY_ATTACH | 31 | ifdef CONFIG_CROSS_MEMORY_ATTACH |
30 | mmu-$(CONFIG_MMU) += process_vm_access.o | 32 | mmu-$(CONFIG_MMU) += process_vm_access.o |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 85742ac5b32e..a7bac4f2b78a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -2129,9 +2129,12 @@ static void freeze_page(struct page *page) | |||
2129 | static void unfreeze_page(struct page *page) | 2129 | static void unfreeze_page(struct page *page) |
2130 | { | 2130 | { |
2131 | int i; | 2131 | int i; |
2132 | 2132 | if (PageTransHuge(page)) { | |
2133 | for (i = 0; i < HPAGE_PMD_NR; i++) | 2133 | remove_migration_ptes(page, page, true); |
2134 | remove_migration_ptes(page + i, page + i, true); | 2134 | } else { |
2135 | for (i = 0; i < HPAGE_PMD_NR; i++) | ||
2136 | remove_migration_ptes(page + i, page + i, true); | ||
2137 | } | ||
2135 | } | 2138 | } |
2136 | 2139 | ||
2137 | static void __split_huge_page_tail(struct page *head, int tail, | 2140 | static void __split_huge_page_tail(struct page *head, int tail, |
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c new file mode 100644 index 000000000000..dc1a54826cf2 --- /dev/null +++ b/mm/page_vma_mapped.c | |||
@@ -0,0 +1,188 @@ | |||
1 | #include <linux/mm.h> | ||
2 | #include <linux/rmap.h> | ||
3 | #include <linux/hugetlb.h> | ||
4 | #include <linux/swap.h> | ||
5 | #include <linux/swapops.h> | ||
6 | |||
7 | #include "internal.h" | ||
8 | |||
9 | static inline bool check_pmd(struct page_vma_mapped_walk *pvmw) | ||
10 | { | ||
11 | pmd_t pmde; | ||
12 | /* | ||
13 | * Make sure we don't re-load pmd between present and !trans_huge check. | ||
14 | * We need a consistent view. | ||
15 | */ | ||
16 | pmde = READ_ONCE(*pvmw->pmd); | ||
17 | return pmd_present(pmde) && !pmd_trans_huge(pmde); | ||
18 | } | ||
19 | |||
20 | static inline bool not_found(struct page_vma_mapped_walk *pvmw) | ||
21 | { | ||
22 | page_vma_mapped_walk_done(pvmw); | ||
23 | return false; | ||
24 | } | ||
25 | |||
26 | static bool map_pte(struct page_vma_mapped_walk *pvmw) | ||
27 | { | ||
28 | pvmw->pte = pte_offset_map(pvmw->pmd, pvmw->address); | ||
29 | if (!(pvmw->flags & PVMW_SYNC)) { | ||
30 | if (pvmw->flags & PVMW_MIGRATION) { | ||
31 | if (!is_swap_pte(*pvmw->pte)) | ||
32 | return false; | ||
33 | } else { | ||
34 | if (!pte_present(*pvmw->pte)) | ||
35 | return false; | ||
36 | } | ||
37 | } | ||
38 | pvmw->ptl = pte_lockptr(pvmw->vma->vm_mm, pvmw->pmd); | ||
39 | spin_lock(pvmw->ptl); | ||
40 | return true; | ||
41 | } | ||
42 | |||
43 | static bool check_pte(struct page_vma_mapped_walk *pvmw) | ||
44 | { | ||
45 | if (pvmw->flags & PVMW_MIGRATION) { | ||
46 | #ifdef CONFIG_MIGRATION | ||
47 | swp_entry_t entry; | ||
48 | if (!is_swap_pte(*pvmw->pte)) | ||
49 | return false; | ||
50 | entry = pte_to_swp_entry(*pvmw->pte); | ||
51 | if (!is_migration_entry(entry)) | ||
52 | return false; | ||
53 | if (migration_entry_to_page(entry) - pvmw->page >= | ||
54 | hpage_nr_pages(pvmw->page)) { | ||
55 | return false; | ||
56 | } | ||
57 | if (migration_entry_to_page(entry) < pvmw->page) | ||
58 | return false; | ||
59 | #else | ||
60 | WARN_ON_ONCE(1); | ||
61 | #endif | ||
62 | } else { | ||
63 | if (!pte_present(*pvmw->pte)) | ||
64 | return false; | ||
65 | |||
66 | /* THP can be referenced by any subpage */ | ||
67 | if (pte_page(*pvmw->pte) - pvmw->page >= | ||
68 | hpage_nr_pages(pvmw->page)) { | ||
69 | return false; | ||
70 | } | ||
71 | if (pte_page(*pvmw->pte) < pvmw->page) | ||
72 | return false; | ||
73 | } | ||
74 | |||
75 | return true; | ||
76 | } | ||
77 | |||
78 | /** | ||
79 | * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at | ||
80 | * @pvmw->address | ||
81 | * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags | ||
82 | * must be set. pmd, pte and ptl must be NULL. | ||
83 | * | ||
84 | * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point | ||
85 | * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is | ||
86 | * adjusted if needed (for PTE-mapped THPs). | ||
87 | * | ||
88 | * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page | ||
89 | * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in | ||
90 | * a loop to find all PTEs that map the THP. | ||
91 | * | ||
92 | * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry | ||
93 | * regardless of which page table level the page is mapped at. @pvmw->pmd is | ||
94 | * NULL. | ||
95 | * | ||
96 | * Retruns false if there are no more page table entries for the page in | ||
97 | * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped. | ||
98 | * | ||
99 | * If you need to stop the walk before page_vma_mapped_walk() returned false, | ||
100 | * use page_vma_mapped_walk_done(). It will do the housekeeping. | ||
101 | */ | ||
102 | bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) | ||
103 | { | ||
104 | struct mm_struct *mm = pvmw->vma->vm_mm; | ||
105 | struct page *page = pvmw->page; | ||
106 | pgd_t *pgd; | ||
107 | pud_t *pud; | ||
108 | |||
109 | /* The only possible pmd mapping has been handled on last iteration */ | ||
110 | if (pvmw->pmd && !pvmw->pte) | ||
111 | return not_found(pvmw); | ||
112 | |||
113 | /* Only for THP, seek to next pte entry makes sense */ | ||
114 | if (pvmw->pte) { | ||
115 | if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page)) | ||
116 | return not_found(pvmw); | ||
117 | goto next_pte; | ||
118 | } | ||
119 | |||
120 | if (unlikely(PageHuge(pvmw->page))) { | ||
121 | /* when pud is not present, pte will be NULL */ | ||
122 | pvmw->pte = huge_pte_offset(mm, pvmw->address); | ||
123 | if (!pvmw->pte) | ||
124 | return false; | ||
125 | |||
126 | pvmw->ptl = huge_pte_lockptr(page_hstate(page), mm, pvmw->pte); | ||
127 | spin_lock(pvmw->ptl); | ||
128 | if (!check_pte(pvmw)) | ||
129 | return not_found(pvmw); | ||
130 | return true; | ||
131 | } | ||
132 | restart: | ||
133 | pgd = pgd_offset(mm, pvmw->address); | ||
134 | if (!pgd_present(*pgd)) | ||
135 | return false; | ||
136 | pud = pud_offset(pgd, pvmw->address); | ||
137 | if (!pud_present(*pud)) | ||
138 | return false; | ||
139 | pvmw->pmd = pmd_offset(pud, pvmw->address); | ||
140 | if (pmd_trans_huge(*pvmw->pmd)) { | ||
141 | pvmw->ptl = pmd_lock(mm, pvmw->pmd); | ||
142 | if (!pmd_present(*pvmw->pmd)) | ||
143 | return not_found(pvmw); | ||
144 | if (likely(pmd_trans_huge(*pvmw->pmd))) { | ||
145 | if (pvmw->flags & PVMW_MIGRATION) | ||
146 | return not_found(pvmw); | ||
147 | if (pmd_page(*pvmw->pmd) != page) | ||
148 | return not_found(pvmw); | ||
149 | return true; | ||
150 | } else { | ||
151 | /* THP pmd was split under us: handle on pte level */ | ||
152 | spin_unlock(pvmw->ptl); | ||
153 | pvmw->ptl = NULL; | ||
154 | } | ||
155 | } else { | ||
156 | if (!check_pmd(pvmw)) | ||
157 | return false; | ||
158 | } | ||
159 | if (!map_pte(pvmw)) | ||
160 | goto next_pte; | ||
161 | while (1) { | ||
162 | if (check_pte(pvmw)) | ||
163 | return true; | ||
164 | next_pte: do { | ||
165 | pvmw->address += PAGE_SIZE; | ||
166 | if (pvmw->address >= | ||
167 | __vma_address(pvmw->page, pvmw->vma) + | ||
168 | hpage_nr_pages(pvmw->page) * PAGE_SIZE) | ||
169 | return not_found(pvmw); | ||
170 | /* Did we cross page table boundary? */ | ||
171 | if (pvmw->address % PMD_SIZE == 0) { | ||
172 | pte_unmap(pvmw->pte); | ||
173 | if (pvmw->ptl) { | ||
174 | spin_unlock(pvmw->ptl); | ||
175 | pvmw->ptl = NULL; | ||
176 | } | ||
177 | goto restart; | ||
178 | } else { | ||
179 | pvmw->pte++; | ||
180 | } | ||
181 | } while (pte_none(*pvmw->pte)); | ||
182 | |||
183 | if (!pvmw->ptl) { | ||
184 | pvmw->ptl = pte_lockptr(mm, pvmw->pmd); | ||
185 | spin_lock(pvmw->ptl); | ||
186 | } | ||
187 | } | ||
188 | } | ||