diff options
-rw-r--r-- | arch/Kconfig | 3 | ||||
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/paravirt.h | 11 | ||||
-rw-r--r-- | arch/x86/include/asm/paravirt_types.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable-2level.h | 17 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable-3level.h | 30 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 140 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_64.h | 15 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 31 | ||||
-rw-r--r-- | include/asm-generic/pgtable.h | 80 | ||||
-rw-r--r-- | include/asm-generic/tlb.h | 14 | ||||
-rw-r--r-- | include/linux/huge_mm.h | 83 | ||||
-rw-r--r-- | include/linux/mm.h | 30 | ||||
-rw-r--r-- | include/linux/mmu_notifier.h | 14 | ||||
-rw-r--r-- | include/linux/pfn_t.h | 12 | ||||
-rw-r--r-- | mm/gup.c | 7 | ||||
-rw-r--r-- | mm/huge_memory.c | 249 | ||||
-rw-r--r-- | mm/memory.c | 88 | ||||
-rw-r--r-- | mm/pagewalk.c | 20 | ||||
-rw-r--r-- | mm/pgtable-generic.c | 14 |
21 files changed, 844 insertions, 18 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index f761142976e5..d0012add6b19 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -571,6 +571,9 @@ config HAVE_IRQ_TIME_ACCOUNTING | |||
571 | config HAVE_ARCH_TRANSPARENT_HUGEPAGE | 571 | config HAVE_ARCH_TRANSPARENT_HUGEPAGE |
572 | bool | 572 | bool |
573 | 573 | ||
574 | config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD | ||
575 | bool | ||
576 | |||
574 | config HAVE_ARCH_HUGE_VMAP | 577 | config HAVE_ARCH_HUGE_VMAP |
575 | bool | 578 | bool |
576 | 579 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 874c1238dffd..33007aa74111 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -109,6 +109,7 @@ config X86 | |||
109 | select HAVE_ARCH_SECCOMP_FILTER | 109 | select HAVE_ARCH_SECCOMP_FILTER |
110 | select HAVE_ARCH_TRACEHOOK | 110 | select HAVE_ARCH_TRACEHOOK |
111 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE | 111 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE |
112 | select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 | ||
112 | select HAVE_ARCH_VMAP_STACK if X86_64 | 113 | select HAVE_ARCH_VMAP_STACK if X86_64 |
113 | select HAVE_ARCH_WITHIN_STACK_FRAMES | 114 | select HAVE_ARCH_WITHIN_STACK_FRAMES |
114 | select HAVE_CC_STACKPROTECTOR | 115 | select HAVE_CC_STACKPROTECTOR |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index f75fbfe550f2..0489884fdc44 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -475,6 +475,17 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, | |||
475 | native_pmd_val(pmd)); | 475 | native_pmd_val(pmd)); |
476 | } | 476 | } |
477 | 477 | ||
478 | static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, | ||
479 | pud_t *pudp, pud_t pud) | ||
480 | { | ||
481 | if (sizeof(pudval_t) > sizeof(long)) | ||
482 | /* 5 arg words */ | ||
483 | pv_mmu_ops.set_pud_at(mm, addr, pudp, pud); | ||
484 | else | ||
485 | PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp, | ||
486 | native_pud_val(pud)); | ||
487 | } | ||
488 | |||
478 | static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) | 489 | static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) |
479 | { | 490 | { |
480 | pmdval_t val = native_pmd_val(pmd); | 491 | pmdval_t val = native_pmd_val(pmd); |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index bb2de45a60f2..b060f962d581 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -249,6 +249,8 @@ struct pv_mmu_ops { | |||
249 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); | 249 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); |
250 | void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr, | 250 | void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr, |
251 | pmd_t *pmdp, pmd_t pmdval); | 251 | pmd_t *pmdp, pmd_t pmdval); |
252 | void (*set_pud_at)(struct mm_struct *mm, unsigned long addr, | ||
253 | pud_t *pudp, pud_t pudval); | ||
252 | void (*pte_update)(struct mm_struct *mm, unsigned long addr, | 254 | void (*pte_update)(struct mm_struct *mm, unsigned long addr, |
253 | pte_t *ptep); | 255 | pte_t *ptep); |
254 | 256 | ||
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index fd74a11959de..a8b96e708c2b 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h | |||
@@ -21,6 +21,10 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) | |||
21 | *pmdp = pmd; | 21 | *pmdp = pmd; |
22 | } | 22 | } |
23 | 23 | ||
24 | static inline void native_set_pud(pud_t *pudp, pud_t pud) | ||
25 | { | ||
26 | } | ||
27 | |||
24 | static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) | 28 | static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) |
25 | { | 29 | { |
26 | native_set_pte(ptep, pte); | 30 | native_set_pte(ptep, pte); |
@@ -31,6 +35,10 @@ static inline void native_pmd_clear(pmd_t *pmdp) | |||
31 | native_set_pmd(pmdp, __pmd(0)); | 35 | native_set_pmd(pmdp, __pmd(0)); |
32 | } | 36 | } |
33 | 37 | ||
38 | static inline void native_pud_clear(pud_t *pudp) | ||
39 | { | ||
40 | } | ||
41 | |||
34 | static inline void native_pte_clear(struct mm_struct *mm, | 42 | static inline void native_pte_clear(struct mm_struct *mm, |
35 | unsigned long addr, pte_t *xp) | 43 | unsigned long addr, pte_t *xp) |
36 | { | 44 | { |
@@ -55,6 +63,15 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) | |||
55 | #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) | 63 | #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) |
56 | #endif | 64 | #endif |
57 | 65 | ||
66 | #ifdef CONFIG_SMP | ||
67 | static inline pud_t native_pudp_get_and_clear(pud_t *xp) | ||
68 | { | ||
69 | return __pud(xchg((pudval_t *)xp, 0)); | ||
70 | } | ||
71 | #else | ||
72 | #define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp) | ||
73 | #endif | ||
74 | |||
58 | /* Bit manipulation helper on pte/pgoff entry */ | 75 | /* Bit manipulation helper on pte/pgoff entry */ |
59 | static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshift, | 76 | static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshift, |
60 | unsigned long mask, unsigned int leftshift) | 77 | unsigned long mask, unsigned int leftshift) |
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index cdaa58c9b39e..8f50fb3f04e1 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h | |||
@@ -121,6 +121,12 @@ static inline void native_pmd_clear(pmd_t *pmd) | |||
121 | *(tmp + 1) = 0; | 121 | *(tmp + 1) = 0; |
122 | } | 122 | } |
123 | 123 | ||
124 | #ifndef CONFIG_SMP | ||
125 | static inline void native_pud_clear(pud_t *pudp) | ||
126 | { | ||
127 | } | ||
128 | #endif | ||
129 | |||
124 | static inline void pud_clear(pud_t *pudp) | 130 | static inline void pud_clear(pud_t *pudp) |
125 | { | 131 | { |
126 | set_pud(pudp, __pud(0)); | 132 | set_pud(pudp, __pud(0)); |
@@ -176,6 +182,30 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) | |||
176 | #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) | 182 | #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) |
177 | #endif | 183 | #endif |
178 | 184 | ||
185 | #ifdef CONFIG_SMP | ||
186 | union split_pud { | ||
187 | struct { | ||
188 | u32 pud_low; | ||
189 | u32 pud_high; | ||
190 | }; | ||
191 | pud_t pud; | ||
192 | }; | ||
193 | |||
194 | static inline pud_t native_pudp_get_and_clear(pud_t *pudp) | ||
195 | { | ||
196 | union split_pud res, *orig = (union split_pud *)pudp; | ||
197 | |||
198 | /* xchg acts as a barrier before setting of the high bits */ | ||
199 | res.pud_low = xchg(&orig->pud_low, 0); | ||
200 | res.pud_high = orig->pud_high; | ||
201 | orig->pud_high = 0; | ||
202 | |||
203 | return res.pud; | ||
204 | } | ||
205 | #else | ||
206 | #define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp) | ||
207 | #endif | ||
208 | |||
179 | /* Encode and de-code a swap entry */ | 209 | /* Encode and de-code a swap entry */ |
180 | #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) | 210 | #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) |
181 | #define __swp_type(x) (((x).val) & 0x1f) | 211 | #define __swp_type(x) (((x).val) & 0x1f) |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 437feb436efa..1cfb36b8c024 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -46,6 +46,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); | |||
46 | #define set_pte(ptep, pte) native_set_pte(ptep, pte) | 46 | #define set_pte(ptep, pte) native_set_pte(ptep, pte) |
47 | #define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) | 47 | #define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) |
48 | #define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd) | 48 | #define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd) |
49 | #define set_pud_at(mm, addr, pudp, pud) native_set_pud_at(mm, addr, pudp, pud) | ||
49 | 50 | ||
50 | #define set_pte_atomic(ptep, pte) \ | 51 | #define set_pte_atomic(ptep, pte) \ |
51 | native_set_pte_atomic(ptep, pte) | 52 | native_set_pte_atomic(ptep, pte) |
@@ -128,6 +129,16 @@ static inline int pmd_young(pmd_t pmd) | |||
128 | return pmd_flags(pmd) & _PAGE_ACCESSED; | 129 | return pmd_flags(pmd) & _PAGE_ACCESSED; |
129 | } | 130 | } |
130 | 131 | ||
132 | static inline int pud_dirty(pud_t pud) | ||
133 | { | ||
134 | return pud_flags(pud) & _PAGE_DIRTY; | ||
135 | } | ||
136 | |||
137 | static inline int pud_young(pud_t pud) | ||
138 | { | ||
139 | return pud_flags(pud) & _PAGE_ACCESSED; | ||
140 | } | ||
141 | |||
131 | static inline int pte_write(pte_t pte) | 142 | static inline int pte_write(pte_t pte) |
132 | { | 143 | { |
133 | return pte_flags(pte) & _PAGE_RW; | 144 | return pte_flags(pte) & _PAGE_RW; |
@@ -181,6 +192,13 @@ static inline int pmd_trans_huge(pmd_t pmd) | |||
181 | return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; | 192 | return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; |
182 | } | 193 | } |
183 | 194 | ||
195 | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD | ||
196 | static inline int pud_trans_huge(pud_t pud) | ||
197 | { | ||
198 | return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; | ||
199 | } | ||
200 | #endif | ||
201 | |||
184 | #define has_transparent_hugepage has_transparent_hugepage | 202 | #define has_transparent_hugepage has_transparent_hugepage |
185 | static inline int has_transparent_hugepage(void) | 203 | static inline int has_transparent_hugepage(void) |
186 | { | 204 | { |
@@ -192,6 +210,18 @@ static inline int pmd_devmap(pmd_t pmd) | |||
192 | { | 210 | { |
193 | return !!(pmd_val(pmd) & _PAGE_DEVMAP); | 211 | return !!(pmd_val(pmd) & _PAGE_DEVMAP); |
194 | } | 212 | } |
213 | |||
214 | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD | ||
215 | static inline int pud_devmap(pud_t pud) | ||
216 | { | ||
217 | return !!(pud_val(pud) & _PAGE_DEVMAP); | ||
218 | } | ||
219 | #else | ||
220 | static inline int pud_devmap(pud_t pud) | ||
221 | { | ||
222 | return 0; | ||
223 | } | ||
224 | #endif | ||
195 | #endif | 225 | #endif |
196 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 226 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
197 | 227 | ||
@@ -333,6 +363,65 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd) | |||
333 | return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE); | 363 | return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE); |
334 | } | 364 | } |
335 | 365 | ||
366 | static inline pud_t pud_set_flags(pud_t pud, pudval_t set) | ||
367 | { | ||
368 | pudval_t v = native_pud_val(pud); | ||
369 | |||
370 | return __pud(v | set); | ||
371 | } | ||
372 | |||
373 | static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) | ||
374 | { | ||
375 | pudval_t v = native_pud_val(pud); | ||
376 | |||
377 | return __pud(v & ~clear); | ||
378 | } | ||
379 | |||
380 | static inline pud_t pud_mkold(pud_t pud) | ||
381 | { | ||
382 | return pud_clear_flags(pud, _PAGE_ACCESSED); | ||
383 | } | ||
384 | |||
385 | static inline pud_t pud_mkclean(pud_t pud) | ||
386 | { | ||
387 | return pud_clear_flags(pud, _PAGE_DIRTY); | ||
388 | } | ||
389 | |||
390 | static inline pud_t pud_wrprotect(pud_t pud) | ||
391 | { | ||
392 | return pud_clear_flags(pud, _PAGE_RW); | ||
393 | } | ||
394 | |||
395 | static inline pud_t pud_mkdirty(pud_t pud) | ||
396 | { | ||
397 | return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); | ||
398 | } | ||
399 | |||
400 | static inline pud_t pud_mkdevmap(pud_t pud) | ||
401 | { | ||
402 | return pud_set_flags(pud, _PAGE_DEVMAP); | ||
403 | } | ||
404 | |||
405 | static inline pud_t pud_mkhuge(pud_t pud) | ||
406 | { | ||
407 | return pud_set_flags(pud, _PAGE_PSE); | ||
408 | } | ||
409 | |||
410 | static inline pud_t pud_mkyoung(pud_t pud) | ||
411 | { | ||
412 | return pud_set_flags(pud, _PAGE_ACCESSED); | ||
413 | } | ||
414 | |||
415 | static inline pud_t pud_mkwrite(pud_t pud) | ||
416 | { | ||
417 | return pud_set_flags(pud, _PAGE_RW); | ||
418 | } | ||
419 | |||
420 | static inline pud_t pud_mknotpresent(pud_t pud) | ||
421 | { | ||
422 | return pud_clear_flags(pud, _PAGE_PRESENT | _PAGE_PROTNONE); | ||
423 | } | ||
424 | |||
336 | #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY | 425 | #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY |
337 | static inline int pte_soft_dirty(pte_t pte) | 426 | static inline int pte_soft_dirty(pte_t pte) |
338 | { | 427 | { |
@@ -344,6 +433,11 @@ static inline int pmd_soft_dirty(pmd_t pmd) | |||
344 | return pmd_flags(pmd) & _PAGE_SOFT_DIRTY; | 433 | return pmd_flags(pmd) & _PAGE_SOFT_DIRTY; |
345 | } | 434 | } |
346 | 435 | ||
436 | static inline int pud_soft_dirty(pud_t pud) | ||
437 | { | ||
438 | return pud_flags(pud) & _PAGE_SOFT_DIRTY; | ||
439 | } | ||
440 | |||
347 | static inline pte_t pte_mksoft_dirty(pte_t pte) | 441 | static inline pte_t pte_mksoft_dirty(pte_t pte) |
348 | { | 442 | { |
349 | return pte_set_flags(pte, _PAGE_SOFT_DIRTY); | 443 | return pte_set_flags(pte, _PAGE_SOFT_DIRTY); |
@@ -354,6 +448,11 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) | |||
354 | return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); | 448 | return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); |
355 | } | 449 | } |
356 | 450 | ||
451 | static inline pud_t pud_mksoft_dirty(pud_t pud) | ||
452 | { | ||
453 | return pud_set_flags(pud, _PAGE_SOFT_DIRTY); | ||
454 | } | ||
455 | |||
357 | static inline pte_t pte_clear_soft_dirty(pte_t pte) | 456 | static inline pte_t pte_clear_soft_dirty(pte_t pte) |
358 | { | 457 | { |
359 | return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); | 458 | return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); |
@@ -364,6 +463,11 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) | |||
364 | return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY); | 463 | return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY); |
365 | } | 464 | } |
366 | 465 | ||
466 | static inline pud_t pud_clear_soft_dirty(pud_t pud) | ||
467 | { | ||
468 | return pud_clear_flags(pud, _PAGE_SOFT_DIRTY); | ||
469 | } | ||
470 | |||
367 | #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ | 471 | #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ |
368 | 472 | ||
369 | /* | 473 | /* |
@@ -392,6 +496,12 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) | |||
392 | massage_pgprot(pgprot)); | 496 | massage_pgprot(pgprot)); |
393 | } | 497 | } |
394 | 498 | ||
499 | static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) | ||
500 | { | ||
501 | return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) | | ||
502 | massage_pgprot(pgprot)); | ||
503 | } | ||
504 | |||
395 | static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | 505 | static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) |
396 | { | 506 | { |
397 | pteval_t val = pte_val(pte); | 507 | pteval_t val = pte_val(pte); |
@@ -771,6 +881,14 @@ static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp) | |||
771 | return res; | 881 | return res; |
772 | } | 882 | } |
773 | 883 | ||
884 | static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp) | ||
885 | { | ||
886 | pud_t res = *pudp; | ||
887 | |||
888 | native_pud_clear(pudp); | ||
889 | return res; | ||
890 | } | ||
891 | |||
774 | static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, | 892 | static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, |
775 | pte_t *ptep , pte_t pte) | 893 | pte_t *ptep , pte_t pte) |
776 | { | 894 | { |
@@ -783,6 +901,12 @@ static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr, | |||
783 | native_set_pmd(pmdp, pmd); | 901 | native_set_pmd(pmdp, pmd); |
784 | } | 902 | } |
785 | 903 | ||
904 | static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr, | ||
905 | pud_t *pudp, pud_t pud) | ||
906 | { | ||
907 | native_set_pud(pudp, pud); | ||
908 | } | ||
909 | |||
786 | #ifndef CONFIG_PARAVIRT | 910 | #ifndef CONFIG_PARAVIRT |
787 | /* | 911 | /* |
788 | * Rules for using pte_update - it must be called after any PTE update which | 912 | * Rules for using pte_update - it must be called after any PTE update which |
@@ -861,10 +985,15 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, | |||
861 | extern int pmdp_set_access_flags(struct vm_area_struct *vma, | 985 | extern int pmdp_set_access_flags(struct vm_area_struct *vma, |
862 | unsigned long address, pmd_t *pmdp, | 986 | unsigned long address, pmd_t *pmdp, |
863 | pmd_t entry, int dirty); | 987 | pmd_t entry, int dirty); |
988 | extern int pudp_set_access_flags(struct vm_area_struct *vma, | ||
989 | unsigned long address, pud_t *pudp, | ||
990 | pud_t entry, int dirty); | ||
864 | 991 | ||
865 | #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG | 992 | #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG |
866 | extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, | 993 | extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, |
867 | unsigned long addr, pmd_t *pmdp); | 994 | unsigned long addr, pmd_t *pmdp); |
995 | extern int pudp_test_and_clear_young(struct vm_area_struct *vma, | ||
996 | unsigned long addr, pud_t *pudp); | ||
868 | 997 | ||
869 | #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH | 998 | #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH |
870 | extern int pmdp_clear_flush_young(struct vm_area_struct *vma, | 999 | extern int pmdp_clear_flush_young(struct vm_area_struct *vma, |
@@ -884,6 +1013,13 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long | |||
884 | return native_pmdp_get_and_clear(pmdp); | 1013 | return native_pmdp_get_and_clear(pmdp); |
885 | } | 1014 | } |
886 | 1015 | ||
1016 | #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR | ||
1017 | static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, | ||
1018 | unsigned long addr, pud_t *pudp) | ||
1019 | { | ||
1020 | return native_pudp_get_and_clear(pudp); | ||
1021 | } | ||
1022 | |||
887 | #define __HAVE_ARCH_PMDP_SET_WRPROTECT | 1023 | #define __HAVE_ARCH_PMDP_SET_WRPROTECT |
888 | static inline void pmdp_set_wrprotect(struct mm_struct *mm, | 1024 | static inline void pmdp_set_wrprotect(struct mm_struct *mm, |
889 | unsigned long addr, pmd_t *pmdp) | 1025 | unsigned long addr, pmd_t *pmdp) |
@@ -932,6 +1068,10 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, | |||
932 | unsigned long addr, pmd_t *pmd) | 1068 | unsigned long addr, pmd_t *pmd) |
933 | { | 1069 | { |
934 | } | 1070 | } |
1071 | static inline void update_mmu_cache_pud(struct vm_area_struct *vma, | ||
1072 | unsigned long addr, pud_t *pud) | ||
1073 | { | ||
1074 | } | ||
935 | 1075 | ||
936 | #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY | 1076 | #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY |
937 | static inline pte_t pte_swp_mksoft_dirty(pte_t pte) | 1077 | static inline pte_t pte_swp_mksoft_dirty(pte_t pte) |
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 62b775926045..73c7ccc38912 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -106,6 +106,21 @@ static inline void native_pud_clear(pud_t *pud) | |||
106 | native_set_pud(pud, native_make_pud(0)); | 106 | native_set_pud(pud, native_make_pud(0)); |
107 | } | 107 | } |
108 | 108 | ||
109 | static inline pud_t native_pudp_get_and_clear(pud_t *xp) | ||
110 | { | ||
111 | #ifdef CONFIG_SMP | ||
112 | return native_make_pud(xchg(&xp->pud, 0)); | ||
113 | #else | ||
114 | /* native_local_pudp_get_and_clear, | ||
115 | * but duplicated because of cyclic dependency | ||
116 | */ | ||
117 | pud_t ret = *xp; | ||
118 | |||
119 | native_pud_clear(xp); | ||
120 | return ret; | ||
121 | #endif | ||
122 | } | ||
123 | |||
109 | static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) | 124 | static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) |
110 | { | 125 | { |
111 | *pgdp = pgd; | 126 | *pgdp = pgd; |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index a1bfba0f7234..4797e87b0fb6 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -425,6 +425,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = { | |||
425 | .pmd_clear = native_pmd_clear, | 425 | .pmd_clear = native_pmd_clear, |
426 | #endif | 426 | #endif |
427 | .set_pud = native_set_pud, | 427 | .set_pud = native_set_pud, |
428 | .set_pud_at = native_set_pud_at, | ||
428 | 429 | ||
429 | .pmd_val = PTE_IDENT, | 430 | .pmd_val = PTE_IDENT, |
430 | .make_pmd = PTE_IDENT, | 431 | .make_pmd = PTE_IDENT, |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3feec5af4e67..6cbdff26bb96 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -445,6 +445,26 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, | |||
445 | 445 | ||
446 | return changed; | 446 | return changed; |
447 | } | 447 | } |
448 | |||
449 | int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address, | ||
450 | pud_t *pudp, pud_t entry, int dirty) | ||
451 | { | ||
452 | int changed = !pud_same(*pudp, entry); | ||
453 | |||
454 | VM_BUG_ON(address & ~HPAGE_PUD_MASK); | ||
455 | |||
456 | if (changed && dirty) { | ||
457 | *pudp = entry; | ||
458 | /* | ||
459 | * We had a write-protection fault here and changed the pud | ||
460 | * to to more permissive. No need to flush the TLB for that, | ||
461 | * #PF is architecturally guaranteed to do that and in the | ||
462 | * worst-case we'll generate a spurious fault. | ||
463 | */ | ||
464 | } | ||
465 | |||
466 | return changed; | ||
467 | } | ||
448 | #endif | 468 | #endif |
449 | 469 | ||
450 | int ptep_test_and_clear_young(struct vm_area_struct *vma, | 470 | int ptep_test_and_clear_young(struct vm_area_struct *vma, |
@@ -474,6 +494,17 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, | |||
474 | 494 | ||
475 | return ret; | 495 | return ret; |
476 | } | 496 | } |
497 | int pudp_test_and_clear_young(struct vm_area_struct *vma, | ||
498 | unsigned long addr, pud_t *pudp) | ||
499 | { | ||
500 | int ret = 0; | ||
501 | |||
502 | if (pud_young(*pudp)) | ||
503 | ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, | ||
504 | (unsigned long *)pudp); | ||
505 | |||
506 | return ret; | ||
507 | } | ||
477 | #endif | 508 | #endif |
478 | 509 | ||
479 | int ptep_clear_flush_young(struct vm_area_struct *vma, | 510 | int ptep_clear_flush_young(struct vm_area_struct *vma, |
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 18af2bcefe6a..a0aba0f9c57b 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
@@ -36,6 +36,9 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma, | |||
36 | extern int pmdp_set_access_flags(struct vm_area_struct *vma, | 36 | extern int pmdp_set_access_flags(struct vm_area_struct *vma, |
37 | unsigned long address, pmd_t *pmdp, | 37 | unsigned long address, pmd_t *pmdp, |
38 | pmd_t entry, int dirty); | 38 | pmd_t entry, int dirty); |
39 | extern int pudp_set_access_flags(struct vm_area_struct *vma, | ||
40 | unsigned long address, pud_t *pudp, | ||
41 | pud_t entry, int dirty); | ||
39 | #else | 42 | #else |
40 | static inline int pmdp_set_access_flags(struct vm_area_struct *vma, | 43 | static inline int pmdp_set_access_flags(struct vm_area_struct *vma, |
41 | unsigned long address, pmd_t *pmdp, | 44 | unsigned long address, pmd_t *pmdp, |
@@ -44,6 +47,13 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma, | |||
44 | BUILD_BUG(); | 47 | BUILD_BUG(); |
45 | return 0; | 48 | return 0; |
46 | } | 49 | } |
50 | static inline int pudp_set_access_flags(struct vm_area_struct *vma, | ||
51 | unsigned long address, pud_t *pudp, | ||
52 | pud_t entry, int dirty) | ||
53 | { | ||
54 | BUILD_BUG(); | ||
55 | return 0; | ||
56 | } | ||
47 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 57 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
48 | #endif | 58 | #endif |
49 | 59 | ||
@@ -121,8 +131,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, | |||
121 | } | 131 | } |
122 | #endif | 132 | #endif |
123 | 133 | ||
124 | #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR | ||
125 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 134 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
135 | #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR | ||
126 | static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, | 136 | static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, |
127 | unsigned long address, | 137 | unsigned long address, |
128 | pmd_t *pmdp) | 138 | pmd_t *pmdp) |
@@ -131,20 +141,40 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, | |||
131 | pmd_clear(pmdp); | 141 | pmd_clear(pmdp); |
132 | return pmd; | 142 | return pmd; |
133 | } | 143 | } |
144 | #endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */ | ||
145 | #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR | ||
146 | static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, | ||
147 | unsigned long address, | ||
148 | pud_t *pudp) | ||
149 | { | ||
150 | pud_t pud = *pudp; | ||
151 | |||
152 | pud_clear(pudp); | ||
153 | return pud; | ||
154 | } | ||
155 | #endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */ | ||
134 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 156 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
135 | #endif | ||
136 | 157 | ||
137 | #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL | ||
138 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 158 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
159 | #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL | ||
139 | static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, | 160 | static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, |
140 | unsigned long address, pmd_t *pmdp, | 161 | unsigned long address, pmd_t *pmdp, |
141 | int full) | 162 | int full) |
142 | { | 163 | { |
143 | return pmdp_huge_get_and_clear(mm, address, pmdp); | 164 | return pmdp_huge_get_and_clear(mm, address, pmdp); |
144 | } | 165 | } |
145 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
146 | #endif | 166 | #endif |
147 | 167 | ||
168 | #ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL | ||
169 | static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm, | ||
170 | unsigned long address, pud_t *pudp, | ||
171 | int full) | ||
172 | { | ||
173 | return pudp_huge_get_and_clear(mm, address, pudp); | ||
174 | } | ||
175 | #endif | ||
176 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
177 | |||
148 | #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL | 178 | #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL |
149 | static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, | 179 | static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, |
150 | unsigned long address, pte_t *ptep, | 180 | unsigned long address, pte_t *ptep, |
@@ -181,6 +211,9 @@ extern pte_t ptep_clear_flush(struct vm_area_struct *vma, | |||
181 | extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, | 211 | extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, |
182 | unsigned long address, | 212 | unsigned long address, |
183 | pmd_t *pmdp); | 213 | pmd_t *pmdp); |
214 | extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, | ||
215 | unsigned long address, | ||
216 | pud_t *pudp); | ||
184 | #endif | 217 | #endif |
185 | 218 | ||
186 | #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT | 219 | #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT |
@@ -208,6 +241,23 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, | |||
208 | } | 241 | } |
209 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 242 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
210 | #endif | 243 | #endif |
244 | #ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT | ||
245 | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD | ||
246 | static inline void pudp_set_wrprotect(struct mm_struct *mm, | ||
247 | unsigned long address, pud_t *pudp) | ||
248 | { | ||
249 | pud_t old_pud = *pudp; | ||
250 | |||
251 | set_pud_at(mm, address, pudp, pud_wrprotect(old_pud)); | ||
252 | } | ||
253 | #else | ||
254 | static inline void pudp_set_wrprotect(struct mm_struct *mm, | ||
255 | unsigned long address, pud_t *pudp) | ||
256 | { | ||
257 | BUILD_BUG(); | ||
258 | } | ||
259 | #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ | ||
260 | #endif | ||
211 | 261 | ||
212 | #ifndef pmdp_collapse_flush | 262 | #ifndef pmdp_collapse_flush |
213 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 263 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
@@ -273,12 +323,23 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) | |||
273 | { | 323 | { |
274 | return pmd_val(pmd_a) == pmd_val(pmd_b); | 324 | return pmd_val(pmd_a) == pmd_val(pmd_b); |
275 | } | 325 | } |
326 | |||
327 | static inline int pud_same(pud_t pud_a, pud_t pud_b) | ||
328 | { | ||
329 | return pud_val(pud_a) == pud_val(pud_b); | ||
330 | } | ||
276 | #else /* CONFIG_TRANSPARENT_HUGEPAGE */ | 331 | #else /* CONFIG_TRANSPARENT_HUGEPAGE */ |
277 | static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) | 332 | static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) |
278 | { | 333 | { |
279 | BUILD_BUG(); | 334 | BUILD_BUG(); |
280 | return 0; | 335 | return 0; |
281 | } | 336 | } |
337 | |||
338 | static inline int pud_same(pud_t pud_a, pud_t pud_b) | ||
339 | { | ||
340 | BUILD_BUG(); | ||
341 | return 0; | ||
342 | } | ||
282 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 343 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
283 | #endif | 344 | #endif |
284 | 345 | ||
@@ -640,6 +701,15 @@ static inline int pmd_write(pmd_t pmd) | |||
640 | #endif /* __HAVE_ARCH_PMD_WRITE */ | 701 | #endif /* __HAVE_ARCH_PMD_WRITE */ |
641 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 702 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
642 | 703 | ||
704 | #if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \ | ||
705 | (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ | ||
706 | !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) | ||
707 | static inline int pud_trans_huge(pud_t pud) | ||
708 | { | ||
709 | return 0; | ||
710 | } | ||
711 | #endif | ||
712 | |||
643 | #ifndef pmd_read_atomic | 713 | #ifndef pmd_read_atomic |
644 | static inline pmd_t pmd_read_atomic(pmd_t *pmdp) | 714 | static inline pmd_t pmd_read_atomic(pmd_t *pmdp) |
645 | { | 715 | { |
@@ -785,8 +855,10 @@ static inline int pmd_clear_huge(pmd_t *pmd) | |||
785 | * e.g. see arch/arc: flush_pmd_tlb_range | 855 | * e.g. see arch/arc: flush_pmd_tlb_range |
786 | */ | 856 | */ |
787 | #define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) | 857 | #define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) |
858 | #define flush_pud_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) | ||
788 | #else | 859 | #else |
789 | #define flush_pmd_tlb_range(vma, addr, end) BUILD_BUG() | 860 | #define flush_pmd_tlb_range(vma, addr, end) BUILD_BUG() |
861 | #define flush_pud_tlb_range(vma, addr, end) BUILD_BUG() | ||
790 | #endif | 862 | #endif |
791 | #endif | 863 | #endif |
792 | 864 | ||
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 7eed8cf3130a..4329bc6ef04b 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h | |||
@@ -232,6 +232,20 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, | |||
232 | __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ | 232 | __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ |
233 | } while (0) | 233 | } while (0) |
234 | 234 | ||
235 | /** | ||
236 | * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb | ||
237 | * invalidation. This is a nop so far, because only x86 needs it. | ||
238 | */ | ||
239 | #ifndef __tlb_remove_pud_tlb_entry | ||
240 | #define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0) | ||
241 | #endif | ||
242 | |||
243 | #define tlb_remove_pud_tlb_entry(tlb, pudp, address) \ | ||
244 | do { \ | ||
245 | __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \ | ||
246 | __tlb_remove_pud_tlb_entry(tlb, pudp, address); \ | ||
247 | } while (0) | ||
248 | |||
235 | /* | 249 | /* |
236 | * For things like page tables caches (ie caching addresses "inside" the | 250 | * For things like page tables caches (ie caching addresses "inside" the |
237 | * page tables, like x86 does), for legacy reasons, flushing an | 251 | * page tables, like x86 does), for legacy reasons, flushing an |
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f0029e786205..a3762d49ba39 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h | |||
@@ -6,6 +6,18 @@ extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
6 | pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, | 6 | pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, |
7 | struct vm_area_struct *vma); | 7 | struct vm_area_struct *vma); |
8 | extern void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd); | 8 | extern void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd); |
9 | extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, | ||
10 | pud_t *dst_pud, pud_t *src_pud, unsigned long addr, | ||
11 | struct vm_area_struct *vma); | ||
12 | |||
13 | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD | ||
14 | extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud); | ||
15 | #else | ||
16 | static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) | ||
17 | { | ||
18 | } | ||
19 | #endif | ||
20 | |||
9 | extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd); | 21 | extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd); |
10 | extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, | 22 | extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, |
11 | unsigned long addr, | 23 | unsigned long addr, |
@@ -17,6 +29,9 @@ extern bool madvise_free_huge_pmd(struct mmu_gather *tlb, | |||
17 | extern int zap_huge_pmd(struct mmu_gather *tlb, | 29 | extern int zap_huge_pmd(struct mmu_gather *tlb, |
18 | struct vm_area_struct *vma, | 30 | struct vm_area_struct *vma, |
19 | pmd_t *pmd, unsigned long addr); | 31 | pmd_t *pmd, unsigned long addr); |
32 | extern int zap_huge_pud(struct mmu_gather *tlb, | ||
33 | struct vm_area_struct *vma, | ||
34 | pud_t *pud, unsigned long addr); | ||
20 | extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | 35 | extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, |
21 | unsigned long addr, unsigned long end, | 36 | unsigned long addr, unsigned long end, |
22 | unsigned char *vec); | 37 | unsigned char *vec); |
@@ -26,8 +41,10 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, | |||
26 | extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | 41 | extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, |
27 | unsigned long addr, pgprot_t newprot, | 42 | unsigned long addr, pgprot_t newprot, |
28 | int prot_numa); | 43 | int prot_numa); |
29 | int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *, | 44 | int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, |
30 | pfn_t pfn, bool write); | 45 | pmd_t *pmd, pfn_t pfn, bool write); |
46 | int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, | ||
47 | pud_t *pud, pfn_t pfn, bool write); | ||
31 | enum transparent_hugepage_flag { | 48 | enum transparent_hugepage_flag { |
32 | TRANSPARENT_HUGEPAGE_FLAG, | 49 | TRANSPARENT_HUGEPAGE_FLAG, |
33 | TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, | 50 | TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, |
@@ -58,13 +75,14 @@ extern struct kobj_attribute shmem_enabled_attr; | |||
58 | #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) | 75 | #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) |
59 | 76 | ||
60 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 77 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
61 | struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, | ||
62 | pmd_t *pmd, int flags); | ||
63 | |||
64 | #define HPAGE_PMD_SHIFT PMD_SHIFT | 78 | #define HPAGE_PMD_SHIFT PMD_SHIFT |
65 | #define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT) | 79 | #define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT) |
66 | #define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1)) | 80 | #define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1)) |
67 | 81 | ||
82 | #define HPAGE_PUD_SHIFT PUD_SHIFT | ||
83 | #define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT) | ||
84 | #define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1)) | ||
85 | |||
68 | extern bool is_vma_temporary_stack(struct vm_area_struct *vma); | 86 | extern bool is_vma_temporary_stack(struct vm_area_struct *vma); |
69 | 87 | ||
70 | #define transparent_hugepage_enabled(__vma) \ | 88 | #define transparent_hugepage_enabled(__vma) \ |
@@ -118,6 +136,17 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
118 | void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, | 136 | void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, |
119 | bool freeze, struct page *page); | 137 | bool freeze, struct page *page); |
120 | 138 | ||
139 | void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, | ||
140 | unsigned long address); | ||
141 | |||
142 | #define split_huge_pud(__vma, __pud, __address) \ | ||
143 | do { \ | ||
144 | pud_t *____pud = (__pud); \ | ||
145 | if (pud_trans_huge(*____pud) \ | ||
146 | || pud_devmap(*____pud)) \ | ||
147 | __split_huge_pud(__vma, __pud, __address); \ | ||
148 | } while (0) | ||
149 | |||
121 | extern int hugepage_madvise(struct vm_area_struct *vma, | 150 | extern int hugepage_madvise(struct vm_area_struct *vma, |
122 | unsigned long *vm_flags, int advice); | 151 | unsigned long *vm_flags, int advice); |
123 | extern void vma_adjust_trans_huge(struct vm_area_struct *vma, | 152 | extern void vma_adjust_trans_huge(struct vm_area_struct *vma, |
@@ -126,6 +155,8 @@ extern void vma_adjust_trans_huge(struct vm_area_struct *vma, | |||
126 | long adjust_next); | 155 | long adjust_next); |
127 | extern spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, | 156 | extern spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, |
128 | struct vm_area_struct *vma); | 157 | struct vm_area_struct *vma); |
158 | extern spinlock_t *__pud_trans_huge_lock(pud_t *pud, | ||
159 | struct vm_area_struct *vma); | ||
129 | /* mmap_sem must be held on entry */ | 160 | /* mmap_sem must be held on entry */ |
130 | static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, | 161 | static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, |
131 | struct vm_area_struct *vma) | 162 | struct vm_area_struct *vma) |
@@ -136,6 +167,15 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, | |||
136 | else | 167 | else |
137 | return NULL; | 168 | return NULL; |
138 | } | 169 | } |
170 | static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, | ||
171 | struct vm_area_struct *vma) | ||
172 | { | ||
173 | VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma); | ||
174 | if (pud_trans_huge(*pud) || pud_devmap(*pud)) | ||
175 | return __pud_trans_huge_lock(pud, vma); | ||
176 | else | ||
177 | return NULL; | ||
178 | } | ||
139 | static inline int hpage_nr_pages(struct page *page) | 179 | static inline int hpage_nr_pages(struct page *page) |
140 | { | 180 | { |
141 | if (unlikely(PageTransHuge(page))) | 181 | if (unlikely(PageTransHuge(page))) |
@@ -143,6 +183,11 @@ static inline int hpage_nr_pages(struct page *page) | |||
143 | return 1; | 183 | return 1; |
144 | } | 184 | } |
145 | 185 | ||
186 | struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, | ||
187 | pmd_t *pmd, int flags); | ||
188 | struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, | ||
189 | pud_t *pud, int flags); | ||
190 | |||
146 | extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd); | 191 | extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd); |
147 | 192 | ||
148 | extern struct page *huge_zero_page; | 193 | extern struct page *huge_zero_page; |
@@ -157,6 +202,11 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) | |||
157 | return is_huge_zero_page(pmd_page(pmd)); | 202 | return is_huge_zero_page(pmd_page(pmd)); |
158 | } | 203 | } |
159 | 204 | ||
205 | static inline bool is_huge_zero_pud(pud_t pud) | ||
206 | { | ||
207 | return false; | ||
208 | } | ||
209 | |||
160 | struct page *mm_get_huge_zero_page(struct mm_struct *mm); | 210 | struct page *mm_get_huge_zero_page(struct mm_struct *mm); |
161 | void mm_put_huge_zero_page(struct mm_struct *mm); | 211 | void mm_put_huge_zero_page(struct mm_struct *mm); |
162 | 212 | ||
@@ -167,6 +217,10 @@ void mm_put_huge_zero_page(struct mm_struct *mm); | |||
167 | #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) | 217 | #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) |
168 | #define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; }) | 218 | #define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; }) |
169 | 219 | ||
220 | #define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; }) | ||
221 | #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) | ||
222 | #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) | ||
223 | |||
170 | #define hpage_nr_pages(x) 1 | 224 | #define hpage_nr_pages(x) 1 |
171 | 225 | ||
172 | #define transparent_hugepage_enabled(__vma) 0 | 226 | #define transparent_hugepage_enabled(__vma) 0 |
@@ -195,6 +249,9 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
195 | static inline void split_huge_pmd_address(struct vm_area_struct *vma, | 249 | static inline void split_huge_pmd_address(struct vm_area_struct *vma, |
196 | unsigned long address, bool freeze, struct page *page) {} | 250 | unsigned long address, bool freeze, struct page *page) {} |
197 | 251 | ||
252 | #define split_huge_pud(__vma, __pmd, __address) \ | ||
253 | do { } while (0) | ||
254 | |||
198 | static inline int hugepage_madvise(struct vm_area_struct *vma, | 255 | static inline int hugepage_madvise(struct vm_area_struct *vma, |
199 | unsigned long *vm_flags, int advice) | 256 | unsigned long *vm_flags, int advice) |
200 | { | 257 | { |
@@ -212,6 +269,11 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, | |||
212 | { | 269 | { |
213 | return NULL; | 270 | return NULL; |
214 | } | 271 | } |
272 | static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, | ||
273 | struct vm_area_struct *vma) | ||
274 | { | ||
275 | return NULL; | ||
276 | } | ||
215 | 277 | ||
216 | static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd) | 278 | static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd) |
217 | { | 279 | { |
@@ -223,6 +285,11 @@ static inline bool is_huge_zero_page(struct page *page) | |||
223 | return false; | 285 | return false; |
224 | } | 286 | } |
225 | 287 | ||
288 | static inline bool is_huge_zero_pud(pud_t pud) | ||
289 | { | ||
290 | return false; | ||
291 | } | ||
292 | |||
226 | static inline void mm_put_huge_zero_page(struct mm_struct *mm) | 293 | static inline void mm_put_huge_zero_page(struct mm_struct *mm) |
227 | { | 294 | { |
228 | return; | 295 | return; |
@@ -233,6 +300,12 @@ static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, | |||
233 | { | 300 | { |
234 | return NULL; | 301 | return NULL; |
235 | } | 302 | } |
303 | |||
304 | static inline struct page *follow_devmap_pud(struct vm_area_struct *vma, | ||
305 | unsigned long addr, pud_t *pud, int flags) | ||
306 | { | ||
307 | return NULL; | ||
308 | } | ||
236 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 309 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
237 | 310 | ||
238 | #endif /* _LINUX_HUGE_MM_H */ | 311 | #endif /* _LINUX_HUGE_MM_H */ |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 035a688e5472..d8b75d7d6a9e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -424,6 +424,10 @@ static inline int pmd_devmap(pmd_t pmd) | |||
424 | { | 424 | { |
425 | return 0; | 425 | return 0; |
426 | } | 426 | } |
427 | static inline int pud_devmap(pud_t pud) | ||
428 | { | ||
429 | return 0; | ||
430 | } | ||
427 | #endif | 431 | #endif |
428 | 432 | ||
429 | /* | 433 | /* |
@@ -1199,6 +1203,10 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, | |||
1199 | 1203 | ||
1200 | /** | 1204 | /** |
1201 | * mm_walk - callbacks for walk_page_range | 1205 | * mm_walk - callbacks for walk_page_range |
1206 | * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry | ||
1207 | * this handler should only handle pud_trans_huge() puds. | ||
1208 | * the pmd_entry or pte_entry callbacks will be used for | ||
1209 | * regular PUDs. | ||
1202 | * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry | 1210 | * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry |
1203 | * this handler is required to be able to handle | 1211 | * this handler is required to be able to handle |
1204 | * pmd_trans_huge() pmds. They may simply choose to | 1212 | * pmd_trans_huge() pmds. They may simply choose to |
@@ -1218,6 +1226,8 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, | |||
1218 | * (see the comment on walk_page_range() for more details) | 1226 | * (see the comment on walk_page_range() for more details) |
1219 | */ | 1227 | */ |
1220 | struct mm_walk { | 1228 | struct mm_walk { |
1229 | int (*pud_entry)(pud_t *pud, unsigned long addr, | ||
1230 | unsigned long next, struct mm_walk *walk); | ||
1221 | int (*pmd_entry)(pmd_t *pmd, unsigned long addr, | 1231 | int (*pmd_entry)(pmd_t *pmd, unsigned long addr, |
1222 | unsigned long next, struct mm_walk *walk); | 1232 | unsigned long next, struct mm_walk *walk); |
1223 | int (*pte_entry)(pte_t *pte, unsigned long addr, | 1233 | int (*pte_entry)(pte_t *pte, unsigned long addr, |
@@ -1801,8 +1811,26 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) | |||
1801 | return ptl; | 1811 | return ptl; |
1802 | } | 1812 | } |
1803 | 1813 | ||
1804 | extern void __init pagecache_init(void); | 1814 | /* |
1815 | * No scalability reason to split PUD locks yet, but follow the same pattern | ||
1816 | * as the PMD locks to make it easier if we decide to. The VM should not be | ||
1817 | * considered ready to switch to split PUD locks yet; there may be places | ||
1818 | * which need to be converted from page_table_lock. | ||
1819 | */ | ||
1820 | static inline spinlock_t *pud_lockptr(struct mm_struct *mm, pud_t *pud) | ||
1821 | { | ||
1822 | return &mm->page_table_lock; | ||
1823 | } | ||
1824 | |||
1825 | static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud) | ||
1826 | { | ||
1827 | spinlock_t *ptl = pud_lockptr(mm, pud); | ||
1828 | |||
1829 | spin_lock(ptl); | ||
1830 | return ptl; | ||
1831 | } | ||
1805 | 1832 | ||
1833 | extern void __init pagecache_init(void); | ||
1806 | extern void free_area_init(unsigned long * zones_size); | 1834 | extern void free_area_init(unsigned long * zones_size); |
1807 | extern void free_area_init_node(int nid, unsigned long * zones_size, | 1835 | extern void free_area_init_node(int nid, unsigned long * zones_size, |
1808 | unsigned long zone_start_pfn, unsigned long *zholes_size); | 1836 | unsigned long zone_start_pfn, unsigned long *zholes_size); |
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index a1a210d59961..51891fb0d3ce 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h | |||
@@ -381,6 +381,19 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) | |||
381 | ___pmd; \ | 381 | ___pmd; \ |
382 | }) | 382 | }) |
383 | 383 | ||
384 | #define pudp_huge_clear_flush_notify(__vma, __haddr, __pud) \ | ||
385 | ({ \ | ||
386 | unsigned long ___haddr = __haddr & HPAGE_PUD_MASK; \ | ||
387 | struct mm_struct *___mm = (__vma)->vm_mm; \ | ||
388 | pud_t ___pud; \ | ||
389 | \ | ||
390 | ___pud = pudp_huge_clear_flush(__vma, __haddr, __pud); \ | ||
391 | mmu_notifier_invalidate_range(___mm, ___haddr, \ | ||
392 | ___haddr + HPAGE_PUD_SIZE); \ | ||
393 | \ | ||
394 | ___pud; \ | ||
395 | }) | ||
396 | |||
384 | #define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \ | 397 | #define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \ |
385 | ({ \ | 398 | ({ \ |
386 | unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ | 399 | unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ |
@@ -475,6 +488,7 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) | |||
475 | #define pmdp_clear_young_notify pmdp_test_and_clear_young | 488 | #define pmdp_clear_young_notify pmdp_test_and_clear_young |
476 | #define ptep_clear_flush_notify ptep_clear_flush | 489 | #define ptep_clear_flush_notify ptep_clear_flush |
477 | #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush | 490 | #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush |
491 | #define pudp_huge_clear_flush_notify pudp_huge_clear_flush | ||
478 | #define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear | 492 | #define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear |
479 | #define set_pte_at_notify set_pte_at | 493 | #define set_pte_at_notify set_pte_at |
480 | 494 | ||
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 033fc7bbcefa..a49b3259cad7 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h | |||
@@ -90,6 +90,13 @@ static inline pmd_t pfn_t_pmd(pfn_t pfn, pgprot_t pgprot) | |||
90 | { | 90 | { |
91 | return pfn_pmd(pfn_t_to_pfn(pfn), pgprot); | 91 | return pfn_pmd(pfn_t_to_pfn(pfn), pgprot); |
92 | } | 92 | } |
93 | |||
94 | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD | ||
95 | static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot) | ||
96 | { | ||
97 | return pfn_pud(pfn_t_to_pfn(pfn), pgprot); | ||
98 | } | ||
99 | #endif | ||
93 | #endif | 100 | #endif |
94 | 101 | ||
95 | #ifdef __HAVE_ARCH_PTE_DEVMAP | 102 | #ifdef __HAVE_ARCH_PTE_DEVMAP |
@@ -106,5 +113,10 @@ static inline bool pfn_t_devmap(pfn_t pfn) | |||
106 | } | 113 | } |
107 | pte_t pte_mkdevmap(pte_t pte); | 114 | pte_t pte_mkdevmap(pte_t pte); |
108 | pmd_t pmd_mkdevmap(pmd_t pmd); | 115 | pmd_t pmd_mkdevmap(pmd_t pmd); |
116 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ | ||
117 | defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) | ||
118 | pud_t pud_mkdevmap(pud_t pud); | ||
109 | #endif | 119 | #endif |
120 | #endif /* __HAVE_ARCH_PTE_DEVMAP */ | ||
121 | |||
110 | #endif /* _LINUX_PFN_T_H_ */ | 122 | #endif /* _LINUX_PFN_T_H_ */ |
@@ -253,6 +253,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma, | |||
253 | return page; | 253 | return page; |
254 | return no_page_table(vma, flags); | 254 | return no_page_table(vma, flags); |
255 | } | 255 | } |
256 | if (pud_devmap(*pud)) { | ||
257 | ptl = pud_lock(mm, pud); | ||
258 | page = follow_devmap_pud(vma, address, pud, flags); | ||
259 | spin_unlock(ptl); | ||
260 | if (page) | ||
261 | return page; | ||
262 | } | ||
256 | if (unlikely(pud_bad(*pud))) | 263 | if (unlikely(pud_bad(*pud))) |
257 | return no_page_table(vma, flags); | 264 | return no_page_table(vma, flags); |
258 | 265 | ||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f9ecc2aeadfc..85742ac5b32e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -757,6 +757,60 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, | |||
757 | } | 757 | } |
758 | EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); | 758 | EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); |
759 | 759 | ||
760 | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD | ||
761 | static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma) | ||
762 | { | ||
763 | if (likely(vma->vm_flags & VM_WRITE)) | ||
764 | pud = pud_mkwrite(pud); | ||
765 | return pud; | ||
766 | } | ||
767 | |||
768 | static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, | ||
769 | pud_t *pud, pfn_t pfn, pgprot_t prot, bool write) | ||
770 | { | ||
771 | struct mm_struct *mm = vma->vm_mm; | ||
772 | pud_t entry; | ||
773 | spinlock_t *ptl; | ||
774 | |||
775 | ptl = pud_lock(mm, pud); | ||
776 | entry = pud_mkhuge(pfn_t_pud(pfn, prot)); | ||
777 | if (pfn_t_devmap(pfn)) | ||
778 | entry = pud_mkdevmap(entry); | ||
779 | if (write) { | ||
780 | entry = pud_mkyoung(pud_mkdirty(entry)); | ||
781 | entry = maybe_pud_mkwrite(entry, vma); | ||
782 | } | ||
783 | set_pud_at(mm, addr, pud, entry); | ||
784 | update_mmu_cache_pud(vma, addr, pud); | ||
785 | spin_unlock(ptl); | ||
786 | } | ||
787 | |||
788 | int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, | ||
789 | pud_t *pud, pfn_t pfn, bool write) | ||
790 | { | ||
791 | pgprot_t pgprot = vma->vm_page_prot; | ||
792 | /* | ||
793 | * If we had pud_special, we could avoid all these restrictions, | ||
794 | * but we need to be consistent with PTEs and architectures that | ||
795 | * can't support a 'special' bit. | ||
796 | */ | ||
797 | BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); | ||
798 | BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == | ||
799 | (VM_PFNMAP|VM_MIXEDMAP)); | ||
800 | BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); | ||
801 | BUG_ON(!pfn_t_devmap(pfn)); | ||
802 | |||
803 | if (addr < vma->vm_start || addr >= vma->vm_end) | ||
804 | return VM_FAULT_SIGBUS; | ||
805 | |||
806 | track_pfn_insert(vma, &pgprot, pfn); | ||
807 | |||
808 | insert_pfn_pud(vma, addr, pud, pfn, pgprot, write); | ||
809 | return VM_FAULT_NOPAGE; | ||
810 | } | ||
811 | EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud); | ||
812 | #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ | ||
813 | |||
760 | static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, | 814 | static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, |
761 | pmd_t *pmd) | 815 | pmd_t *pmd) |
762 | { | 816 | { |
@@ -887,6 +941,123 @@ out: | |||
887 | return ret; | 941 | return ret; |
888 | } | 942 | } |
889 | 943 | ||
944 | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD | ||
945 | static void touch_pud(struct vm_area_struct *vma, unsigned long addr, | ||
946 | pud_t *pud) | ||
947 | { | ||
948 | pud_t _pud; | ||
949 | |||
950 | /* | ||
951 | * We should set the dirty bit only for FOLL_WRITE but for now | ||
952 | * the dirty bit in the pud is meaningless. And if the dirty | ||
953 | * bit will become meaningful and we'll only set it with | ||
954 | * FOLL_WRITE, an atomic set_bit will be required on the pud to | ||
955 | * set the young bit, instead of the current set_pud_at. | ||
956 | */ | ||
957 | _pud = pud_mkyoung(pud_mkdirty(*pud)); | ||
958 | if (pudp_set_access_flags(vma, addr & HPAGE_PUD_MASK, | ||
959 | pud, _pud, 1)) | ||
960 | update_mmu_cache_pud(vma, addr, pud); | ||
961 | } | ||
962 | |||
963 | struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, | ||
964 | pud_t *pud, int flags) | ||
965 | { | ||
966 | unsigned long pfn = pud_pfn(*pud); | ||
967 | struct mm_struct *mm = vma->vm_mm; | ||
968 | struct dev_pagemap *pgmap; | ||
969 | struct page *page; | ||
970 | |||
971 | assert_spin_locked(pud_lockptr(mm, pud)); | ||
972 | |||
973 | if (flags & FOLL_WRITE && !pud_write(*pud)) | ||
974 | return NULL; | ||
975 | |||
976 | if (pud_present(*pud) && pud_devmap(*pud)) | ||
977 | /* pass */; | ||
978 | else | ||
979 | return NULL; | ||
980 | |||
981 | if (flags & FOLL_TOUCH) | ||
982 | touch_pud(vma, addr, pud); | ||
983 | |||
984 | /* | ||
985 | * device mapped pages can only be returned if the | ||
986 | * caller will manage the page reference count. | ||
987 | */ | ||
988 | if (!(flags & FOLL_GET)) | ||
989 | return ERR_PTR(-EEXIST); | ||
990 | |||
991 | pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT; | ||
992 | pgmap = get_dev_pagemap(pfn, NULL); | ||
993 | if (!pgmap) | ||
994 | return ERR_PTR(-EFAULT); | ||
995 | page = pfn_to_page(pfn); | ||
996 | get_page(page); | ||
997 | put_dev_pagemap(pgmap); | ||
998 | |||
999 | return page; | ||
1000 | } | ||
1001 | |||
1002 | int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, | ||
1003 | pud_t *dst_pud, pud_t *src_pud, unsigned long addr, | ||
1004 | struct vm_area_struct *vma) | ||
1005 | { | ||
1006 | spinlock_t *dst_ptl, *src_ptl; | ||
1007 | pud_t pud; | ||
1008 | int ret; | ||
1009 | |||
1010 | dst_ptl = pud_lock(dst_mm, dst_pud); | ||
1011 | src_ptl = pud_lockptr(src_mm, src_pud); | ||
1012 | spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); | ||
1013 | |||
1014 | ret = -EAGAIN; | ||
1015 | pud = *src_pud; | ||
1016 | if (unlikely(!pud_trans_huge(pud) && !pud_devmap(pud))) | ||
1017 | goto out_unlock; | ||
1018 | |||
1019 | /* | ||
1020 | * When page table lock is held, the huge zero pud should not be | ||
1021 | * under splitting since we don't split the page itself, only pud to | ||
1022 | * a page table. | ||
1023 | */ | ||
1024 | if (is_huge_zero_pud(pud)) { | ||
1025 | /* No huge zero pud yet */ | ||
1026 | } | ||
1027 | |||
1028 | pudp_set_wrprotect(src_mm, addr, src_pud); | ||
1029 | pud = pud_mkold(pud_wrprotect(pud)); | ||
1030 | set_pud_at(dst_mm, addr, dst_pud, pud); | ||
1031 | |||
1032 | ret = 0; | ||
1033 | out_unlock: | ||
1034 | spin_unlock(src_ptl); | ||
1035 | spin_unlock(dst_ptl); | ||
1036 | return ret; | ||
1037 | } | ||
1038 | |||
1039 | void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) | ||
1040 | { | ||
1041 | pud_t entry; | ||
1042 | unsigned long haddr; | ||
1043 | bool write = vmf->flags & FAULT_FLAG_WRITE; | ||
1044 | |||
1045 | vmf->ptl = pud_lock(vmf->vma->vm_mm, vmf->pud); | ||
1046 | if (unlikely(!pud_same(*vmf->pud, orig_pud))) | ||
1047 | goto unlock; | ||
1048 | |||
1049 | entry = pud_mkyoung(orig_pud); | ||
1050 | if (write) | ||
1051 | entry = pud_mkdirty(entry); | ||
1052 | haddr = vmf->address & HPAGE_PUD_MASK; | ||
1053 | if (pudp_set_access_flags(vmf->vma, haddr, vmf->pud, entry, write)) | ||
1054 | update_mmu_cache_pud(vmf->vma, vmf->address, vmf->pud); | ||
1055 | |||
1056 | unlock: | ||
1057 | spin_unlock(vmf->ptl); | ||
1058 | } | ||
1059 | #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ | ||
1060 | |||
890 | void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd) | 1061 | void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd) |
891 | { | 1062 | { |
892 | pmd_t entry; | 1063 | pmd_t entry; |
@@ -1601,6 +1772,84 @@ spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) | |||
1601 | return NULL; | 1772 | return NULL; |
1602 | } | 1773 | } |
1603 | 1774 | ||
1775 | /* | ||
1776 | * Returns true if a given pud maps a thp, false otherwise. | ||
1777 | * | ||
1778 | * Note that if it returns true, this routine returns without unlocking page | ||
1779 | * table lock. So callers must unlock it. | ||
1780 | */ | ||
1781 | spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma) | ||
1782 | { | ||
1783 | spinlock_t *ptl; | ||
1784 | |||
1785 | ptl = pud_lock(vma->vm_mm, pud); | ||
1786 | if (likely(pud_trans_huge(*pud) || pud_devmap(*pud))) | ||
1787 | return ptl; | ||
1788 | spin_unlock(ptl); | ||
1789 | return NULL; | ||
1790 | } | ||
1791 | |||
1792 | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD | ||
1793 | int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, | ||
1794 | pud_t *pud, unsigned long addr) | ||
1795 | { | ||
1796 | pud_t orig_pud; | ||
1797 | spinlock_t *ptl; | ||
1798 | |||
1799 | ptl = __pud_trans_huge_lock(pud, vma); | ||
1800 | if (!ptl) | ||
1801 | return 0; | ||
1802 | /* | ||
1803 | * For architectures like ppc64 we look at deposited pgtable | ||
1804 | * when calling pudp_huge_get_and_clear. So do the | ||
1805 | * pgtable_trans_huge_withdraw after finishing pudp related | ||
1806 | * operations. | ||
1807 | */ | ||
1808 | orig_pud = pudp_huge_get_and_clear_full(tlb->mm, addr, pud, | ||
1809 | tlb->fullmm); | ||
1810 | tlb_remove_pud_tlb_entry(tlb, pud, addr); | ||
1811 | if (vma_is_dax(vma)) { | ||
1812 | spin_unlock(ptl); | ||
1813 | /* No zero page support yet */ | ||
1814 | } else { | ||
1815 | /* No support for anonymous PUD pages yet */ | ||
1816 | BUG(); | ||
1817 | } | ||
1818 | return 1; | ||
1819 | } | ||
1820 | |||
1821 | static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud, | ||
1822 | unsigned long haddr) | ||
1823 | { | ||
1824 | VM_BUG_ON(haddr & ~HPAGE_PUD_MASK); | ||
1825 | VM_BUG_ON_VMA(vma->vm_start > haddr, vma); | ||
1826 | VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma); | ||
1827 | VM_BUG_ON(!pud_trans_huge(*pud) && !pud_devmap(*pud)); | ||
1828 | |||
1829 | count_vm_event(THP_SPLIT_PMD); | ||
1830 | |||
1831 | pudp_huge_clear_flush_notify(vma, haddr, pud); | ||
1832 | } | ||
1833 | |||
1834 | void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, | ||
1835 | unsigned long address) | ||
1836 | { | ||
1837 | spinlock_t *ptl; | ||
1838 | struct mm_struct *mm = vma->vm_mm; | ||
1839 | unsigned long haddr = address & HPAGE_PUD_MASK; | ||
1840 | |||
1841 | mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PUD_SIZE); | ||
1842 | ptl = pud_lock(mm, pud); | ||
1843 | if (unlikely(!pud_trans_huge(*pud) && !pud_devmap(*pud))) | ||
1844 | goto out; | ||
1845 | __split_huge_pud_locked(vma, pud, haddr); | ||
1846 | |||
1847 | out: | ||
1848 | spin_unlock(ptl); | ||
1849 | mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PUD_SIZE); | ||
1850 | } | ||
1851 | #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ | ||
1852 | |||
1604 | static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, | 1853 | static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, |
1605 | unsigned long haddr, pmd_t *pmd) | 1854 | unsigned long haddr, pmd_t *pmd) |
1606 | { | 1855 | { |
diff --git a/mm/memory.c b/mm/memory.c index e721e8eba570..41e2a2d4b2a6 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1001,7 +1001,7 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src | |||
1001 | next = pmd_addr_end(addr, end); | 1001 | next = pmd_addr_end(addr, end); |
1002 | if (pmd_trans_huge(*src_pmd) || pmd_devmap(*src_pmd)) { | 1002 | if (pmd_trans_huge(*src_pmd) || pmd_devmap(*src_pmd)) { |
1003 | int err; | 1003 | int err; |
1004 | VM_BUG_ON(next-addr != HPAGE_PMD_SIZE); | 1004 | VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, vma); |
1005 | err = copy_huge_pmd(dst_mm, src_mm, | 1005 | err = copy_huge_pmd(dst_mm, src_mm, |
1006 | dst_pmd, src_pmd, addr, vma); | 1006 | dst_pmd, src_pmd, addr, vma); |
1007 | if (err == -ENOMEM) | 1007 | if (err == -ENOMEM) |
@@ -1032,6 +1032,18 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src | |||
1032 | src_pud = pud_offset(src_pgd, addr); | 1032 | src_pud = pud_offset(src_pgd, addr); |
1033 | do { | 1033 | do { |
1034 | next = pud_addr_end(addr, end); | 1034 | next = pud_addr_end(addr, end); |
1035 | if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) { | ||
1036 | int err; | ||
1037 | |||
1038 | VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, vma); | ||
1039 | err = copy_huge_pud(dst_mm, src_mm, | ||
1040 | dst_pud, src_pud, addr, vma); | ||
1041 | if (err == -ENOMEM) | ||
1042 | return -ENOMEM; | ||
1043 | if (!err) | ||
1044 | continue; | ||
1045 | /* fall through */ | ||
1046 | } | ||
1035 | if (pud_none_or_clear_bad(src_pud)) | 1047 | if (pud_none_or_clear_bad(src_pud)) |
1036 | continue; | 1048 | continue; |
1037 | if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud, | 1049 | if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud, |
@@ -1263,9 +1275,19 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb, | |||
1263 | pud = pud_offset(pgd, addr); | 1275 | pud = pud_offset(pgd, addr); |
1264 | do { | 1276 | do { |
1265 | next = pud_addr_end(addr, end); | 1277 | next = pud_addr_end(addr, end); |
1278 | if (pud_trans_huge(*pud) || pud_devmap(*pud)) { | ||
1279 | if (next - addr != HPAGE_PUD_SIZE) { | ||
1280 | VM_BUG_ON_VMA(!rwsem_is_locked(&tlb->mm->mmap_sem), vma); | ||
1281 | split_huge_pud(vma, pud, addr); | ||
1282 | } else if (zap_huge_pud(tlb, vma, pud, addr)) | ||
1283 | goto next; | ||
1284 | /* fall through */ | ||
1285 | } | ||
1266 | if (pud_none_or_clear_bad(pud)) | 1286 | if (pud_none_or_clear_bad(pud)) |
1267 | continue; | 1287 | continue; |
1268 | next = zap_pmd_range(tlb, vma, pud, addr, next, details); | 1288 | next = zap_pmd_range(tlb, vma, pud, addr, next, details); |
1289 | next: | ||
1290 | cond_resched(); | ||
1269 | } while (pud++, addr = next, addr != end); | 1291 | } while (pud++, addr = next, addr != end); |
1270 | 1292 | ||
1271 | return addr; | 1293 | return addr; |
@@ -3490,6 +3512,30 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma) | |||
3490 | return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE); | 3512 | return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE); |
3491 | } | 3513 | } |
3492 | 3514 | ||
3515 | static int create_huge_pud(struct vm_fault *vmf) | ||
3516 | { | ||
3517 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
3518 | /* No support for anonymous transparent PUD pages yet */ | ||
3519 | if (vma_is_anonymous(vmf->vma)) | ||
3520 | return VM_FAULT_FALLBACK; | ||
3521 | if (vmf->vma->vm_ops->huge_fault) | ||
3522 | return vmf->vma->vm_ops->huge_fault(vmf); | ||
3523 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
3524 | return VM_FAULT_FALLBACK; | ||
3525 | } | ||
3526 | |||
3527 | static int wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) | ||
3528 | { | ||
3529 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
3530 | /* No support for anonymous transparent PUD pages yet */ | ||
3531 | if (vma_is_anonymous(vmf->vma)) | ||
3532 | return VM_FAULT_FALLBACK; | ||
3533 | if (vmf->vma->vm_ops->huge_fault) | ||
3534 | return vmf->vma->vm_ops->huge_fault(vmf); | ||
3535 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
3536 | return VM_FAULT_FALLBACK; | ||
3537 | } | ||
3538 | |||
3493 | /* | 3539 | /* |
3494 | * These routines also need to handle stuff like marking pages dirty | 3540 | * These routines also need to handle stuff like marking pages dirty |
3495 | * and/or accessed for architectures that don't do it in hardware (most | 3541 | * and/or accessed for architectures that don't do it in hardware (most |
@@ -3605,14 +3651,41 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, | |||
3605 | }; | 3651 | }; |
3606 | struct mm_struct *mm = vma->vm_mm; | 3652 | struct mm_struct *mm = vma->vm_mm; |
3607 | pgd_t *pgd; | 3653 | pgd_t *pgd; |
3608 | pud_t *pud; | ||
3609 | int ret; | 3654 | int ret; |
3610 | 3655 | ||
3611 | pgd = pgd_offset(mm, address); | 3656 | pgd = pgd_offset(mm, address); |
3612 | pud = pud_alloc(mm, pgd, address); | 3657 | |
3613 | if (!pud) | 3658 | vmf.pud = pud_alloc(mm, pgd, address); |
3659 | if (!vmf.pud) | ||
3614 | return VM_FAULT_OOM; | 3660 | return VM_FAULT_OOM; |
3615 | vmf.pmd = pmd_alloc(mm, pud, address); | 3661 | if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) { |
3662 | vmf.flags |= FAULT_FLAG_SIZE_PUD; | ||
3663 | ret = create_huge_pud(&vmf); | ||
3664 | if (!(ret & VM_FAULT_FALLBACK)) | ||
3665 | return ret; | ||
3666 | } else { | ||
3667 | pud_t orig_pud = *vmf.pud; | ||
3668 | |||
3669 | barrier(); | ||
3670 | if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) { | ||
3671 | unsigned int dirty = flags & FAULT_FLAG_WRITE; | ||
3672 | |||
3673 | vmf.flags |= FAULT_FLAG_SIZE_PUD; | ||
3674 | |||
3675 | /* NUMA case for anonymous PUDs would go here */ | ||
3676 | |||
3677 | if (dirty && !pud_write(orig_pud)) { | ||
3678 | ret = wp_huge_pud(&vmf, orig_pud); | ||
3679 | if (!(ret & VM_FAULT_FALLBACK)) | ||
3680 | return ret; | ||
3681 | } else { | ||
3682 | huge_pud_set_accessed(&vmf, orig_pud); | ||
3683 | return 0; | ||
3684 | } | ||
3685 | } | ||
3686 | } | ||
3687 | |||
3688 | vmf.pmd = pmd_alloc(mm, vmf.pud, address); | ||
3616 | if (!vmf.pmd) | 3689 | if (!vmf.pmd) |
3617 | return VM_FAULT_OOM; | 3690 | return VM_FAULT_OOM; |
3618 | if (pmd_none(*vmf.pmd) && transparent_hugepage_enabled(vma)) { | 3691 | if (pmd_none(*vmf.pmd) && transparent_hugepage_enabled(vma)) { |
@@ -3743,13 +3816,14 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) | |||
3743 | */ | 3816 | */ |
3744 | int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) | 3817 | int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) |
3745 | { | 3818 | { |
3819 | spinlock_t *ptl; | ||
3746 | pmd_t *new = pmd_alloc_one(mm, address); | 3820 | pmd_t *new = pmd_alloc_one(mm, address); |
3747 | if (!new) | 3821 | if (!new) |
3748 | return -ENOMEM; | 3822 | return -ENOMEM; |
3749 | 3823 | ||
3750 | smp_wmb(); /* See comment in __pte_alloc */ | 3824 | smp_wmb(); /* See comment in __pte_alloc */ |
3751 | 3825 | ||
3752 | spin_lock(&mm->page_table_lock); | 3826 | ptl = pud_lock(mm, pud); |
3753 | #ifndef __ARCH_HAS_4LEVEL_HACK | 3827 | #ifndef __ARCH_HAS_4LEVEL_HACK |
3754 | if (!pud_present(*pud)) { | 3828 | if (!pud_present(*pud)) { |
3755 | mm_inc_nr_pmds(mm); | 3829 | mm_inc_nr_pmds(mm); |
@@ -3763,7 +3837,7 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) | |||
3763 | } else /* Another has populated it */ | 3837 | } else /* Another has populated it */ |
3764 | pmd_free(mm, new); | 3838 | pmd_free(mm, new); |
3765 | #endif /* __ARCH_HAS_4LEVEL_HACK */ | 3839 | #endif /* __ARCH_HAS_4LEVEL_HACK */ |
3766 | spin_unlock(&mm->page_table_lock); | 3840 | spin_unlock(ptl); |
3767 | return 0; | 3841 | return 0; |
3768 | } | 3842 | } |
3769 | #endif /* __PAGETABLE_PMD_FOLDED */ | 3843 | #endif /* __PAGETABLE_PMD_FOLDED */ |
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 207244489a68..03761577ae86 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
@@ -78,14 +78,32 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, | |||
78 | 78 | ||
79 | pud = pud_offset(pgd, addr); | 79 | pud = pud_offset(pgd, addr); |
80 | do { | 80 | do { |
81 | again: | ||
81 | next = pud_addr_end(addr, end); | 82 | next = pud_addr_end(addr, end); |
82 | if (pud_none_or_clear_bad(pud)) { | 83 | if (pud_none(*pud) || !walk->vma) { |
83 | if (walk->pte_hole) | 84 | if (walk->pte_hole) |
84 | err = walk->pte_hole(addr, next, walk); | 85 | err = walk->pte_hole(addr, next, walk); |
85 | if (err) | 86 | if (err) |
86 | break; | 87 | break; |
87 | continue; | 88 | continue; |
88 | } | 89 | } |
90 | |||
91 | if (walk->pud_entry) { | ||
92 | spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma); | ||
93 | |||
94 | if (ptl) { | ||
95 | err = walk->pud_entry(pud, addr, next, walk); | ||
96 | spin_unlock(ptl); | ||
97 | if (err) | ||
98 | break; | ||
99 | continue; | ||
100 | } | ||
101 | } | ||
102 | |||
103 | split_huge_pud(walk->vma, pud, addr); | ||
104 | if (pud_none(*pud)) | ||
105 | goto again; | ||
106 | |||
89 | if (walk->pmd_entry || walk->pte_entry) | 107 | if (walk->pmd_entry || walk->pte_entry) |
90 | err = walk_pmd_range(pud, addr, next, walk); | 108 | err = walk_pmd_range(pud, addr, next, walk); |
91 | if (err) | 109 | if (err) |
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 71c5f9109f2a..4ed5908c65b0 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c | |||
@@ -123,6 +123,20 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, | |||
123 | flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | 123 | flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); |
124 | return pmd; | 124 | return pmd; |
125 | } | 125 | } |
126 | |||
127 | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD | ||
128 | pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, | ||
129 | pud_t *pudp) | ||
130 | { | ||
131 | pud_t pud; | ||
132 | |||
133 | VM_BUG_ON(address & ~HPAGE_PUD_MASK); | ||
134 | VM_BUG_ON(!pud_trans_huge(*pudp) && !pud_devmap(*pudp)); | ||
135 | pud = pudp_huge_get_and_clear(vma->vm_mm, address, pudp); | ||
136 | flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE); | ||
137 | return pud; | ||
138 | } | ||
139 | #endif | ||
126 | #endif | 140 | #endif |
127 | 141 | ||
128 | #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT | 142 | #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT |