diff options
author | Andrea Arcangeli <aarcange@redhat.com> | 2011-01-13 18:46:41 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 20:32:40 -0500 |
commit | db3eb96f4e6281b84dd33c8980dacc27f2efe177 (patch) | |
tree | 0746ed213778d8cb9ec18c1c7c16490c233d178b | |
parent | e2cda322648122dc400c85ada80eaddbc612ef6a (diff) |
thp: add pmd mangling functions to x86
Add needed pmd mangling functions with symmetry with their pte
counterparts. pmdp_splitting_flush() is the only new addition on the pmd_
methods and it's needed to serialize the VM against split_huge_page. It
simply atomically sets the splitting bit in a similar way
pmdp_clear_flush_young atomically clears the accessed bit.
pmdp_splitting_flush() also has to flush the tlb to make it effective
against gup_fast, but it wouldn't really require to flush the tlb too.
Just the tlb flush is the simplest operation we can invoke to serialize
pmdp_splitting_flush() against gup_fast.
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_64.h | 119 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 66 |
3 files changed, 179 insertions, 8 deletions
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index e576cbd7a343..3278038e9706 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -353,7 +353,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) | |||
353 | * Currently stuck as a macro due to indirect forward reference to | 353 | * Currently stuck as a macro due to indirect forward reference to |
354 | * linux/mmzone.h's __section_mem_map_addr() definition: | 354 | * linux/mmzone.h's __section_mem_map_addr() definition: |
355 | */ | 355 | */ |
356 | #define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) | 356 | #define pmd_page(pmd) pfn_to_page((pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT) |
357 | 357 | ||
358 | /* | 358 | /* |
359 | * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] | 359 | * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] |
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 6dffd4c551cc..1fb61a74b2e1 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -59,6 +59,16 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) | |||
59 | native_set_pte(ptep, pte); | 59 | native_set_pte(ptep, pte); |
60 | } | 60 | } |
61 | 61 | ||
62 | static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) | ||
63 | { | ||
64 | *pmdp = pmd; | ||
65 | } | ||
66 | |||
67 | static inline void native_pmd_clear(pmd_t *pmd) | ||
68 | { | ||
69 | native_set_pmd(pmd, native_make_pmd(0)); | ||
70 | } | ||
71 | |||
62 | static inline pte_t native_ptep_get_and_clear(pte_t *xp) | 72 | static inline pte_t native_ptep_get_and_clear(pte_t *xp) |
63 | { | 73 | { |
64 | #ifdef CONFIG_SMP | 74 | #ifdef CONFIG_SMP |
@@ -72,14 +82,17 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) | |||
72 | #endif | 82 | #endif |
73 | } | 83 | } |
74 | 84 | ||
75 | static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) | 85 | static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) |
76 | { | ||
77 | *pmdp = pmd; | ||
78 | } | ||
79 | |||
80 | static inline void native_pmd_clear(pmd_t *pmd) | ||
81 | { | 86 | { |
82 | native_set_pmd(pmd, native_make_pmd(0)); | 87 | #ifdef CONFIG_SMP |
88 | return native_make_pmd(xchg(&xp->pmd, 0)); | ||
89 | #else | ||
90 | /* native_local_pmdp_get_and_clear, | ||
91 | but duplicated because of cyclic dependency */ | ||
92 | pmd_t ret = *xp; | ||
93 | native_pmd_clear(xp); | ||
94 | return ret; | ||
95 | #endif | ||
83 | } | 96 | } |
84 | 97 | ||
85 | static inline void native_set_pud(pud_t *pudp, pud_t pud) | 98 | static inline void native_set_pud(pud_t *pudp, pud_t pud) |
@@ -181,6 +194,98 @@ static inline int pmd_trans_huge(pmd_t pmd) | |||
181 | } | 194 | } |
182 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 195 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
183 | 196 | ||
197 | #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) | ||
198 | |||
199 | #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS | ||
200 | extern int pmdp_set_access_flags(struct vm_area_struct *vma, | ||
201 | unsigned long address, pmd_t *pmdp, | ||
202 | pmd_t entry, int dirty); | ||
203 | |||
204 | #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG | ||
205 | extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, | ||
206 | unsigned long addr, pmd_t *pmdp); | ||
207 | |||
208 | #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH | ||
209 | extern int pmdp_clear_flush_young(struct vm_area_struct *vma, | ||
210 | unsigned long address, pmd_t *pmdp); | ||
211 | |||
212 | |||
213 | #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH | ||
214 | extern void pmdp_splitting_flush(struct vm_area_struct *vma, | ||
215 | unsigned long addr, pmd_t *pmdp); | ||
216 | |||
217 | #define __HAVE_ARCH_PMD_WRITE | ||
218 | static inline int pmd_write(pmd_t pmd) | ||
219 | { | ||
220 | return pmd_flags(pmd) & _PAGE_RW; | ||
221 | } | ||
222 | |||
223 | #define __HAVE_ARCH_PMDP_GET_AND_CLEAR | ||
224 | static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, unsigned long addr, | ||
225 | pmd_t *pmdp) | ||
226 | { | ||
227 | pmd_t pmd = native_pmdp_get_and_clear(pmdp); | ||
228 | pmd_update(mm, addr, pmdp); | ||
229 | return pmd; | ||
230 | } | ||
231 | |||
232 | #define __HAVE_ARCH_PMDP_SET_WRPROTECT | ||
233 | static inline void pmdp_set_wrprotect(struct mm_struct *mm, | ||
234 | unsigned long addr, pmd_t *pmdp) | ||
235 | { | ||
236 | clear_bit(_PAGE_BIT_RW, (unsigned long *)&pmdp->pmd); | ||
237 | pmd_update(mm, addr, pmdp); | ||
238 | } | ||
239 | |||
240 | static inline int pmd_young(pmd_t pmd) | ||
241 | { | ||
242 | return pmd_flags(pmd) & _PAGE_ACCESSED; | ||
243 | } | ||
244 | |||
245 | static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) | ||
246 | { | ||
247 | pmdval_t v = native_pmd_val(pmd); | ||
248 | |||
249 | return native_make_pmd(v | set); | ||
250 | } | ||
251 | |||
252 | static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) | ||
253 | { | ||
254 | pmdval_t v = native_pmd_val(pmd); | ||
255 | |||
256 | return native_make_pmd(v & ~clear); | ||
257 | } | ||
258 | |||
259 | static inline pmd_t pmd_mkold(pmd_t pmd) | ||
260 | { | ||
261 | return pmd_clear_flags(pmd, _PAGE_ACCESSED); | ||
262 | } | ||
263 | |||
264 | static inline pmd_t pmd_wrprotect(pmd_t pmd) | ||
265 | { | ||
266 | return pmd_clear_flags(pmd, _PAGE_RW); | ||
267 | } | ||
268 | |||
269 | static inline pmd_t pmd_mkdirty(pmd_t pmd) | ||
270 | { | ||
271 | return pmd_set_flags(pmd, _PAGE_DIRTY); | ||
272 | } | ||
273 | |||
274 | static inline pmd_t pmd_mkhuge(pmd_t pmd) | ||
275 | { | ||
276 | return pmd_set_flags(pmd, _PAGE_PSE); | ||
277 | } | ||
278 | |||
279 | static inline pmd_t pmd_mkyoung(pmd_t pmd) | ||
280 | { | ||
281 | return pmd_set_flags(pmd, _PAGE_ACCESSED); | ||
282 | } | ||
283 | |||
284 | static inline pmd_t pmd_mkwrite(pmd_t pmd) | ||
285 | { | ||
286 | return pmd_set_flags(pmd, _PAGE_RW); | ||
287 | } | ||
288 | |||
184 | #endif /* !__ASSEMBLY__ */ | 289 | #endif /* !__ASSEMBLY__ */ |
185 | 290 | ||
186 | #endif /* _ASM_X86_PGTABLE_64_H */ | 291 | #endif /* _ASM_X86_PGTABLE_64_H */ |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 8be8c7d7bc89..65e92d58f942 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -320,6 +320,25 @@ int ptep_set_access_flags(struct vm_area_struct *vma, | |||
320 | return changed; | 320 | return changed; |
321 | } | 321 | } |
322 | 322 | ||
323 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
324 | int pmdp_set_access_flags(struct vm_area_struct *vma, | ||
325 | unsigned long address, pmd_t *pmdp, | ||
326 | pmd_t entry, int dirty) | ||
327 | { | ||
328 | int changed = !pmd_same(*pmdp, entry); | ||
329 | |||
330 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
331 | |||
332 | if (changed && dirty) { | ||
333 | *pmdp = entry; | ||
334 | pmd_update_defer(vma->vm_mm, address, pmdp); | ||
335 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
336 | } | ||
337 | |||
338 | return changed; | ||
339 | } | ||
340 | #endif | ||
341 | |||
323 | int ptep_test_and_clear_young(struct vm_area_struct *vma, | 342 | int ptep_test_and_clear_young(struct vm_area_struct *vma, |
324 | unsigned long addr, pte_t *ptep) | 343 | unsigned long addr, pte_t *ptep) |
325 | { | 344 | { |
@@ -335,6 +354,23 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, | |||
335 | return ret; | 354 | return ret; |
336 | } | 355 | } |
337 | 356 | ||
357 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
358 | int pmdp_test_and_clear_young(struct vm_area_struct *vma, | ||
359 | unsigned long addr, pmd_t *pmdp) | ||
360 | { | ||
361 | int ret = 0; | ||
362 | |||
363 | if (pmd_young(*pmdp)) | ||
364 | ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, | ||
365 | (unsigned long *) &pmdp->pmd); | ||
366 | |||
367 | if (ret) | ||
368 | pmd_update(vma->vm_mm, addr, pmdp); | ||
369 | |||
370 | return ret; | ||
371 | } | ||
372 | #endif | ||
373 | |||
338 | int ptep_clear_flush_young(struct vm_area_struct *vma, | 374 | int ptep_clear_flush_young(struct vm_area_struct *vma, |
339 | unsigned long address, pte_t *ptep) | 375 | unsigned long address, pte_t *ptep) |
340 | { | 376 | { |
@@ -347,6 +383,36 @@ int ptep_clear_flush_young(struct vm_area_struct *vma, | |||
347 | return young; | 383 | return young; |
348 | } | 384 | } |
349 | 385 | ||
386 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
387 | int pmdp_clear_flush_young(struct vm_area_struct *vma, | ||
388 | unsigned long address, pmd_t *pmdp) | ||
389 | { | ||
390 | int young; | ||
391 | |||
392 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
393 | |||
394 | young = pmdp_test_and_clear_young(vma, address, pmdp); | ||
395 | if (young) | ||
396 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
397 | |||
398 | return young; | ||
399 | } | ||
400 | |||
401 | void pmdp_splitting_flush(struct vm_area_struct *vma, | ||
402 | unsigned long address, pmd_t *pmdp) | ||
403 | { | ||
404 | int set; | ||
405 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
406 | set = !test_and_set_bit(_PAGE_BIT_SPLITTING, | ||
407 | (unsigned long *)&pmdp->pmd); | ||
408 | if (set) { | ||
409 | pmd_update(vma->vm_mm, address, pmdp); | ||
410 | /* need tlb flush only to serialize against gup-fast */ | ||
411 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
412 | } | ||
413 | } | ||
414 | #endif | ||
415 | |||
350 | /** | 416 | /** |
351 | * reserve_top_address - reserves a hole in the top of kernel address space | 417 | * reserve_top_address - reserves a hole in the top of kernel address space |
352 | * @reserve - size of hole to reserve | 418 | * @reserve - size of hole to reserve |