aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2017-03-09 09:24:07 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-03-09 14:48:47 -0500
commitc2febafc67734a62196c1b9dfba926412d4077ba (patch)
treea61b7cd503e4c6d2fbb58f0cb53662ffd60b4c14
parent048456dcf2c56ad6f6248e2899dda92fb6a613f6 (diff)
mm: convert generic code to 5-level paging
Convert all non-architecture-specific code to 5-level paging. It's mostly mechanical adding handling one more page table level in places where we deal with pud_t. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/misc/sgi-gru/grufault.c9
-rw-r--r--fs/userfaultfd.c6
-rw-r--r--include/asm-generic/pgtable.h48
-rw-r--r--include/linux/hugetlb.h5
-rw-r--r--include/linux/kasan.h1
-rw-r--r--include/linux/mm.h31
-rw-r--r--lib/ioremap.c39
-rw-r--r--mm/gup.c46
-rw-r--r--mm/huge_memory.c7
-rw-r--r--mm/hugetlb.c29
-rw-r--r--mm/kasan/kasan_init.c44
-rw-r--r--mm/memory.c207
-rw-r--r--mm/mlock.c1
-rw-r--r--mm/mprotect.c26
-rw-r--r--mm/mremap.c13
-rw-r--r--mm/page_vma_mapped.c6
-rw-r--r--mm/pagewalk.c32
-rw-r--r--mm/pgtable-generic.c6
-rw-r--r--mm/rmap.c7
-rw-r--r--mm/sparse-vmemmap.c22
-rw-r--r--mm/swapfile.c26
-rw-r--r--mm/userfaultfd.c23
-rw-r--r--mm/vmalloc.c81
23 files changed, 595 insertions, 120 deletions
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index 6fb773dbcd0c..93be82fc338a 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -219,15 +219,20 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
219 int write, unsigned long *paddr, int *pageshift) 219 int write, unsigned long *paddr, int *pageshift)
220{ 220{
221 pgd_t *pgdp; 221 pgd_t *pgdp;
222 pmd_t *pmdp; 222 p4d_t *p4dp;
223 pud_t *pudp; 223 pud_t *pudp;
224 pmd_t *pmdp;
224 pte_t pte; 225 pte_t pte;
225 226
226 pgdp = pgd_offset(vma->vm_mm, vaddr); 227 pgdp = pgd_offset(vma->vm_mm, vaddr);
227 if (unlikely(pgd_none(*pgdp))) 228 if (unlikely(pgd_none(*pgdp)))
228 goto err; 229 goto err;
229 230
230 pudp = pud_offset(pgdp, vaddr); 231 p4dp = p4d_offset(pgdp, vaddr);
232 if (unlikely(p4d_none(*p4dp)))
233 goto err;
234
235 pudp = pud_offset(p4dp, vaddr);
231 if (unlikely(pud_none(*pudp))) 236 if (unlikely(pud_none(*pudp)))
232 goto err; 237 goto err;
233 238
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 973607df579d..02ce3944d0f5 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -267,6 +267,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
267{ 267{
268 struct mm_struct *mm = ctx->mm; 268 struct mm_struct *mm = ctx->mm;
269 pgd_t *pgd; 269 pgd_t *pgd;
270 p4d_t *p4d;
270 pud_t *pud; 271 pud_t *pud;
271 pmd_t *pmd, _pmd; 272 pmd_t *pmd, _pmd;
272 pte_t *pte; 273 pte_t *pte;
@@ -277,7 +278,10 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
277 pgd = pgd_offset(mm, address); 278 pgd = pgd_offset(mm, address);
278 if (!pgd_present(*pgd)) 279 if (!pgd_present(*pgd))
279 goto out; 280 goto out;
280 pud = pud_offset(pgd, address); 281 p4d = p4d_offset(pgd, address);
282 if (!p4d_present(*p4d))
283 goto out;
284 pud = pud_offset(p4d, address);
281 if (!pud_present(*pud)) 285 if (!pud_present(*pud))
282 goto out; 286 goto out;
283 pmd = pmd_offset(pud, address); 287 pmd = pmd_offset(pud, address);
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f4ca23b158b3..1fad160f35de 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -10,9 +10,9 @@
10#include <linux/bug.h> 10#include <linux/bug.h>
11#include <linux/errno.h> 11#include <linux/errno.h>
12 12
13#if 4 - defined(__PAGETABLE_PUD_FOLDED) - defined(__PAGETABLE_PMD_FOLDED) != \ 13#if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
14 CONFIG_PGTABLE_LEVELS 14 defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
15#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{PUD,PMD}_FOLDED 15#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
16#endif 16#endif
17 17
18/* 18/*
@@ -424,6 +424,13 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
424 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 424 (__boundary - 1 < (end) - 1)? __boundary: (end); \
425}) 425})
426 426
427#ifndef p4d_addr_end
428#define p4d_addr_end(addr, end) \
429({ unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK; \
430 (__boundary - 1 < (end) - 1)? __boundary: (end); \
431})
432#endif
433
427#ifndef pud_addr_end 434#ifndef pud_addr_end
428#define pud_addr_end(addr, end) \ 435#define pud_addr_end(addr, end) \
429({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \ 436({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \
@@ -444,6 +451,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
444 * Do the tests inline, but report and clear the bad entry in mm/memory.c. 451 * Do the tests inline, but report and clear the bad entry in mm/memory.c.
445 */ 452 */
446void pgd_clear_bad(pgd_t *); 453void pgd_clear_bad(pgd_t *);
454void p4d_clear_bad(p4d_t *);
447void pud_clear_bad(pud_t *); 455void pud_clear_bad(pud_t *);
448void pmd_clear_bad(pmd_t *); 456void pmd_clear_bad(pmd_t *);
449 457
@@ -458,6 +466,17 @@ static inline int pgd_none_or_clear_bad(pgd_t *pgd)
458 return 0; 466 return 0;
459} 467}
460 468
469static inline int p4d_none_or_clear_bad(p4d_t *p4d)
470{
471 if (p4d_none(*p4d))
472 return 1;
473 if (unlikely(p4d_bad(*p4d))) {
474 p4d_clear_bad(p4d);
475 return 1;
476 }
477 return 0;
478}
479
461static inline int pud_none_or_clear_bad(pud_t *pud) 480static inline int pud_none_or_clear_bad(pud_t *pud)
462{ 481{
463 if (pud_none(*pud)) 482 if (pud_none(*pud))
@@ -844,11 +863,30 @@ static inline int pmd_protnone(pmd_t pmd)
844#endif /* CONFIG_MMU */ 863#endif /* CONFIG_MMU */
845 864
846#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP 865#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
866
867#ifndef __PAGETABLE_P4D_FOLDED
868int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot);
869int p4d_clear_huge(p4d_t *p4d);
870#else
871static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
872{
873 return 0;
874}
875static inline int p4d_clear_huge(p4d_t *p4d)
876{
877 return 0;
878}
879#endif /* !__PAGETABLE_P4D_FOLDED */
880
847int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); 881int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
848int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); 882int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
849int pud_clear_huge(pud_t *pud); 883int pud_clear_huge(pud_t *pud);
850int pmd_clear_huge(pmd_t *pmd); 884int pmd_clear_huge(pmd_t *pmd);
851#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ 885#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
886static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
887{
888 return 0;
889}
852static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) 890static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
853{ 891{
854 return 0; 892 return 0;
@@ -857,6 +895,10 @@ static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
857{ 895{
858 return 0; 896 return 0;
859} 897}
898static inline int p4d_clear_huge(p4d_t *p4d)
899{
900 return 0;
901}
860static inline int pud_clear_huge(pud_t *pud) 902static inline int pud_clear_huge(pud_t *pud)
861{ 903{
862 return 0; 904 return 0;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 503099d8aada..b857fc8cc2ec 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -122,7 +122,7 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
122struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, 122struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
123 pud_t *pud, int flags); 123 pud_t *pud, int flags);
124int pmd_huge(pmd_t pmd); 124int pmd_huge(pmd_t pmd);
125int pud_huge(pud_t pmd); 125int pud_huge(pud_t pud);
126unsigned long hugetlb_change_protection(struct vm_area_struct *vma, 126unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
127 unsigned long address, unsigned long end, pgprot_t newprot); 127 unsigned long address, unsigned long end, pgprot_t newprot);
128 128
@@ -197,6 +197,9 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
197#ifndef pgd_huge 197#ifndef pgd_huge
198#define pgd_huge(x) 0 198#define pgd_huge(x) 0
199#endif 199#endif
200#ifndef p4d_huge
201#define p4d_huge(x) 0
202#endif
200 203
201#ifndef pgd_write 204#ifndef pgd_write
202static inline int pgd_write(pgd_t pgd) 205static inline int pgd_write(pgd_t pgd)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index ceb3fe78a0d3..1c823bef4c15 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -18,6 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE];
18extern pte_t kasan_zero_pte[PTRS_PER_PTE]; 18extern pte_t kasan_zero_pte[PTRS_PER_PTE];
19extern pmd_t kasan_zero_pmd[PTRS_PER_PMD]; 19extern pmd_t kasan_zero_pmd[PTRS_PER_PMD];
20extern pud_t kasan_zero_pud[PTRS_PER_PUD]; 20extern pud_t kasan_zero_pud[PTRS_PER_PUD];
21extern p4d_t kasan_zero_p4d[PTRS_PER_P4D];
21 22
22void kasan_populate_zero_shadow(const void *shadow_start, 23void kasan_populate_zero_shadow(const void *shadow_start,
23 const void *shadow_end); 24 const void *shadow_end);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index be1fe264eb37..5f01c88f0800 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1560,14 +1560,24 @@ static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
1560 return ptep; 1560 return ptep;
1561} 1561}
1562 1562
1563#ifdef __PAGETABLE_P4D_FOLDED
1564static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
1565 unsigned long address)
1566{
1567 return 0;
1568}
1569#else
1570int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
1571#endif
1572
1563#ifdef __PAGETABLE_PUD_FOLDED 1573#ifdef __PAGETABLE_PUD_FOLDED
1564static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, 1574static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d,
1565 unsigned long address) 1575 unsigned long address)
1566{ 1576{
1567 return 0; 1577 return 0;
1568} 1578}
1569#else 1579#else
1570int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); 1580int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);
1571#endif 1581#endif
1572 1582
1573#if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU) 1583#if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU)
@@ -1621,10 +1631,18 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
1621#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) 1631#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
1622 1632
1623#ifndef __ARCH_HAS_5LEVEL_HACK 1633#ifndef __ARCH_HAS_5LEVEL_HACK
1624static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) 1634static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
1635 unsigned long address)
1636{
1637 return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ?
1638 NULL : p4d_offset(pgd, address);
1639}
1640
1641static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
1642 unsigned long address)
1625{ 1643{
1626 return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? 1644 return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ?
1627 NULL: pud_offset(pgd, address); 1645 NULL : pud_offset(p4d, address);
1628} 1646}
1629#endif /* !__ARCH_HAS_5LEVEL_HACK */ 1647#endif /* !__ARCH_HAS_5LEVEL_HACK */
1630 1648
@@ -2388,7 +2406,8 @@ void sparse_mem_maps_populate_node(struct page **map_map,
2388 2406
2389struct page *sparse_mem_map_populate(unsigned long pnum, int nid); 2407struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
2390pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); 2408pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
2391pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); 2409p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
2410pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
2392pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); 2411pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
2393pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); 2412pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
2394void *vmemmap_alloc_block(unsigned long size, int node); 2413void *vmemmap_alloc_block(unsigned long size, int node);
diff --git a/lib/ioremap.c b/lib/ioremap.c
index a3e14ce92a56..4bb30206b942 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -14,6 +14,7 @@
14#include <asm/pgtable.h> 14#include <asm/pgtable.h>
15 15
16#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP 16#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
17static int __read_mostly ioremap_p4d_capable;
17static int __read_mostly ioremap_pud_capable; 18static int __read_mostly ioremap_pud_capable;
18static int __read_mostly ioremap_pmd_capable; 19static int __read_mostly ioremap_pmd_capable;
19static int __read_mostly ioremap_huge_disabled; 20static int __read_mostly ioremap_huge_disabled;
@@ -35,6 +36,11 @@ void __init ioremap_huge_init(void)
35 } 36 }
36} 37}
37 38
39static inline int ioremap_p4d_enabled(void)
40{
41 return ioremap_p4d_capable;
42}
43
38static inline int ioremap_pud_enabled(void) 44static inline int ioremap_pud_enabled(void)
39{ 45{
40 return ioremap_pud_capable; 46 return ioremap_pud_capable;
@@ -46,6 +52,7 @@ static inline int ioremap_pmd_enabled(void)
46} 52}
47 53
48#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ 54#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
55static inline int ioremap_p4d_enabled(void) { return 0; }
49static inline int ioremap_pud_enabled(void) { return 0; } 56static inline int ioremap_pud_enabled(void) { return 0; }
50static inline int ioremap_pmd_enabled(void) { return 0; } 57static inline int ioremap_pmd_enabled(void) { return 0; }
51#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ 58#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
@@ -94,14 +101,14 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
94 return 0; 101 return 0;
95} 102}
96 103
97static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, 104static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
98 unsigned long end, phys_addr_t phys_addr, pgprot_t prot) 105 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
99{ 106{
100 pud_t *pud; 107 pud_t *pud;
101 unsigned long next; 108 unsigned long next;
102 109
103 phys_addr -= addr; 110 phys_addr -= addr;
104 pud = pud_alloc(&init_mm, pgd, addr); 111 pud = pud_alloc(&init_mm, p4d, addr);
105 if (!pud) 112 if (!pud)
106 return -ENOMEM; 113 return -ENOMEM;
107 do { 114 do {
@@ -120,6 +127,32 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
120 return 0; 127 return 0;
121} 128}
122 129
130static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr,
131 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
132{
133 p4d_t *p4d;
134 unsigned long next;
135
136 phys_addr -= addr;
137 p4d = p4d_alloc(&init_mm, pgd, addr);
138 if (!p4d)
139 return -ENOMEM;
140 do {
141 next = p4d_addr_end(addr, end);
142
143 if (ioremap_p4d_enabled() &&
144 ((next - addr) == P4D_SIZE) &&
145 IS_ALIGNED(phys_addr + addr, P4D_SIZE)) {
146 if (p4d_set_huge(p4d, phys_addr + addr, prot))
147 continue;
148 }
149
150 if (ioremap_pud_range(p4d, addr, next, phys_addr + addr, prot))
151 return -ENOMEM;
152 } while (p4d++, addr = next, addr != end);
153 return 0;
154}
155
123int ioremap_page_range(unsigned long addr, 156int ioremap_page_range(unsigned long addr,
124 unsigned long end, phys_addr_t phys_addr, pgprot_t prot) 157 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
125{ 158{
@@ -135,7 +168,7 @@ int ioremap_page_range(unsigned long addr,
135 pgd = pgd_offset_k(addr); 168 pgd = pgd_offset_k(addr);
136 do { 169 do {
137 next = pgd_addr_end(addr, end); 170 next = pgd_addr_end(addr, end);
138 err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot); 171 err = ioremap_p4d_range(pgd, addr, next, phys_addr+addr, prot);
139 if (err) 172 if (err)
140 break; 173 break;
141 } while (pgd++, addr = next, addr != end); 174 } while (pgd++, addr = next, addr != end);
diff --git a/mm/gup.c b/mm/gup.c
index 9c047e951aa3..c74bad1bf6e8 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -226,6 +226,7 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
226 unsigned int *page_mask) 226 unsigned int *page_mask)
227{ 227{
228 pgd_t *pgd; 228 pgd_t *pgd;
229 p4d_t *p4d;
229 pud_t *pud; 230 pud_t *pud;
230 pmd_t *pmd; 231 pmd_t *pmd;
231 spinlock_t *ptl; 232 spinlock_t *ptl;
@@ -243,8 +244,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
243 pgd = pgd_offset(mm, address); 244 pgd = pgd_offset(mm, address);
244 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) 245 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
245 return no_page_table(vma, flags); 246 return no_page_table(vma, flags);
246 247 p4d = p4d_offset(pgd, address);
247 pud = pud_offset(pgd, address); 248 if (p4d_none(*p4d))
249 return no_page_table(vma, flags);
250 BUILD_BUG_ON(p4d_huge(*p4d));
251 if (unlikely(p4d_bad(*p4d)))
252 return no_page_table(vma, flags);
253 pud = pud_offset(p4d, address);
248 if (pud_none(*pud)) 254 if (pud_none(*pud))
249 return no_page_table(vma, flags); 255 return no_page_table(vma, flags);
250 if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) { 256 if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
@@ -325,6 +331,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
325 struct page **page) 331 struct page **page)
326{ 332{
327 pgd_t *pgd; 333 pgd_t *pgd;
334 p4d_t *p4d;
328 pud_t *pud; 335 pud_t *pud;
329 pmd_t *pmd; 336 pmd_t *pmd;
330 pte_t *pte; 337 pte_t *pte;
@@ -338,7 +345,9 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
338 else 345 else
339 pgd = pgd_offset_gate(mm, address); 346 pgd = pgd_offset_gate(mm, address);
340 BUG_ON(pgd_none(*pgd)); 347 BUG_ON(pgd_none(*pgd));
341 pud = pud_offset(pgd, address); 348 p4d = p4d_offset(pgd, address);
349 BUG_ON(p4d_none(*p4d));
350 pud = pud_offset(p4d, address);
342 BUG_ON(pud_none(*pud)); 351 BUG_ON(pud_none(*pud));
343 pmd = pmd_offset(pud, address); 352 pmd = pmd_offset(pud, address);
344 if (pmd_none(*pmd)) 353 if (pmd_none(*pmd))
@@ -1400,13 +1409,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
1400 return 1; 1409 return 1;
1401} 1410}
1402 1411
1403static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, 1412static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
1404 int write, struct page **pages, int *nr) 1413 int write, struct page **pages, int *nr)
1405{ 1414{
1406 unsigned long next; 1415 unsigned long next;
1407 pud_t *pudp; 1416 pud_t *pudp;
1408 1417
1409 pudp = pud_offset(&pgd, addr); 1418 pudp = pud_offset(&p4d, addr);
1410 do { 1419 do {
1411 pud_t pud = READ_ONCE(*pudp); 1420 pud_t pud = READ_ONCE(*pudp);
1412 1421
@@ -1428,6 +1437,31 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
1428 return 1; 1437 return 1;
1429} 1438}
1430 1439
1440static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
1441 int write, struct page **pages, int *nr)
1442{
1443 unsigned long next;
1444 p4d_t *p4dp;
1445
1446 p4dp = p4d_offset(&pgd, addr);
1447 do {
1448 p4d_t p4d = READ_ONCE(*p4dp);
1449
1450 next = p4d_addr_end(addr, end);
1451 if (p4d_none(p4d))
1452 return 0;
1453 BUILD_BUG_ON(p4d_huge(p4d));
1454 if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
1455 if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
1456 P4D_SHIFT, next, write, pages, nr))
1457 return 0;
1458 } else if (!gup_p4d_range(p4d, addr, next, write, pages, nr))
1459 return 0;
1460 } while (p4dp++, addr = next, addr != end);
1461
1462 return 1;
1463}
1464
1431/* 1465/*
1432 * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to 1466 * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
1433 * the regular GUP. It will only return non-negative values. 1467 * the regular GUP. It will only return non-negative values.
@@ -1478,7 +1512,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
1478 if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, 1512 if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
1479 PGDIR_SHIFT, next, write, pages, &nr)) 1513 PGDIR_SHIFT, next, write, pages, &nr))
1480 break; 1514 break;
1481 } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) 1515 } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
1482 break; 1516 break;
1483 } while (pgdp++, addr = next, addr != end); 1517 } while (pgdp++, addr = next, addr != end);
1484 local_irq_restore(flags); 1518 local_irq_restore(flags);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d36b2af4d1bf..e4766de25709 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2048,6 +2048,7 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
2048 bool freeze, struct page *page) 2048 bool freeze, struct page *page)
2049{ 2049{
2050 pgd_t *pgd; 2050 pgd_t *pgd;
2051 p4d_t *p4d;
2051 pud_t *pud; 2052 pud_t *pud;
2052 pmd_t *pmd; 2053 pmd_t *pmd;
2053 2054
@@ -2055,7 +2056,11 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
2055 if (!pgd_present(*pgd)) 2056 if (!pgd_present(*pgd))
2056 return; 2057 return;
2057 2058
2058 pud = pud_offset(pgd, address); 2059 p4d = p4d_offset(pgd, address);
2060 if (!p4d_present(*p4d))
2061 return;
2062
2063 pud = pud_offset(p4d, address);
2059 if (!pud_present(*pud)) 2064 if (!pud_present(*pud))
2060 return; 2065 return;
2061 2066
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a7aa811b7d14..3d0aab9ee80d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4555,7 +4555,8 @@ out:
4555int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) 4555int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
4556{ 4556{
4557 pgd_t *pgd = pgd_offset(mm, *addr); 4557 pgd_t *pgd = pgd_offset(mm, *addr);
4558 pud_t *pud = pud_offset(pgd, *addr); 4558 p4d_t *p4d = p4d_offset(pgd, *addr);
4559 pud_t *pud = pud_offset(p4d, *addr);
4559 4560
4560 BUG_ON(page_count(virt_to_page(ptep)) == 0); 4561 BUG_ON(page_count(virt_to_page(ptep)) == 0);
4561 if (page_count(virt_to_page(ptep)) == 1) 4562 if (page_count(virt_to_page(ptep)) == 1)
@@ -4586,11 +4587,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
4586 unsigned long addr, unsigned long sz) 4587 unsigned long addr, unsigned long sz)
4587{ 4588{
4588 pgd_t *pgd; 4589 pgd_t *pgd;
4590 p4d_t *p4d;
4589 pud_t *pud; 4591 pud_t *pud;
4590 pte_t *pte = NULL; 4592 pte_t *pte = NULL;
4591 4593
4592 pgd = pgd_offset(mm, addr); 4594 pgd = pgd_offset(mm, addr);
4593 pud = pud_alloc(mm, pgd, addr); 4595 p4d = p4d_offset(pgd, addr);
4596 pud = pud_alloc(mm, p4d, addr);
4594 if (pud) { 4597 if (pud) {
4595 if (sz == PUD_SIZE) { 4598 if (sz == PUD_SIZE) {
4596 pte = (pte_t *)pud; 4599 pte = (pte_t *)pud;
@@ -4610,18 +4613,22 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
4610pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 4613pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
4611{ 4614{
4612 pgd_t *pgd; 4615 pgd_t *pgd;
4616 p4d_t *p4d;
4613 pud_t *pud; 4617 pud_t *pud;
4614 pmd_t *pmd = NULL; 4618 pmd_t *pmd;
4615 4619
4616 pgd = pgd_offset(mm, addr); 4620 pgd = pgd_offset(mm, addr);
4617 if (pgd_present(*pgd)) { 4621 if (!pgd_present(*pgd))
4618 pud = pud_offset(pgd, addr); 4622 return NULL;
4619 if (pud_present(*pud)) { 4623 p4d = p4d_offset(pgd, addr);
4620 if (pud_huge(*pud)) 4624 if (!p4d_present(*p4d))
4621 return (pte_t *)pud; 4625 return NULL;
4622 pmd = pmd_offset(pud, addr); 4626 pud = pud_offset(p4d, addr);
4623 } 4627 if (!pud_present(*pud))
4624 } 4628 return NULL;
4629 if (pud_huge(*pud))
4630 return (pte_t *)pud;
4631 pmd = pmd_offset(pud, addr);
4625 return (pte_t *) pmd; 4632 return (pte_t *) pmd;
4626} 4633}
4627 4634
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 31238dad85fb..b96a5f773d88 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -30,6 +30,9 @@
30 */ 30 */
31unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; 31unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
32 32
33#if CONFIG_PGTABLE_LEVELS > 4
34p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss;
35#endif
33#if CONFIG_PGTABLE_LEVELS > 3 36#if CONFIG_PGTABLE_LEVELS > 3
34pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; 37pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
35#endif 38#endif
@@ -82,10 +85,10 @@ static void __init zero_pmd_populate(pud_t *pud, unsigned long addr,
82 } while (pmd++, addr = next, addr != end); 85 } while (pmd++, addr = next, addr != end);
83} 86}
84 87
85static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr, 88static void __init zero_pud_populate(p4d_t *p4d, unsigned long addr,
86 unsigned long end) 89 unsigned long end)
87{ 90{
88 pud_t *pud = pud_offset(pgd, addr); 91 pud_t *pud = pud_offset(p4d, addr);
89 unsigned long next; 92 unsigned long next;
90 93
91 do { 94 do {
@@ -107,6 +110,23 @@ static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr,
107 } while (pud++, addr = next, addr != end); 110 } while (pud++, addr = next, addr != end);
108} 111}
109 112
113static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
114 unsigned long end)
115{
116 p4d_t *p4d = p4d_offset(pgd, addr);
117 unsigned long next;
118
119 do {
120 next = p4d_addr_end(addr, end);
121
122 if (p4d_none(*p4d)) {
123 p4d_populate(&init_mm, p4d,
124 early_alloc(PAGE_SIZE, NUMA_NO_NODE));
125 }
126 zero_pud_populate(p4d, addr, next);
127 } while (p4d++, addr = next, addr != end);
128}
129
110/** 130/**
111 * kasan_populate_zero_shadow - populate shadow memory region with 131 * kasan_populate_zero_shadow - populate shadow memory region with
112 * kasan_zero_page 132 * kasan_zero_page
@@ -125,6 +145,7 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
125 next = pgd_addr_end(addr, end); 145 next = pgd_addr_end(addr, end);
126 146
127 if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { 147 if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) {
148 p4d_t *p4d;
128 pud_t *pud; 149 pud_t *pud;
129 pmd_t *pmd; 150 pmd_t *pmd;
130 151
@@ -135,9 +156,22 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
135 * 3,2 - level page tables where we don't have 156 * 3,2 - level page tables where we don't have
136 * puds,pmds, so pgd_populate(), pud_populate() 157 * puds,pmds, so pgd_populate(), pud_populate()
137 * is noops. 158 * is noops.
159 *
160 * The ifndef is required to avoid build breakage.
161 *
162 * With 5level-fixup.h, pgd_populate() is not nop and
163 * we reference kasan_zero_p4d. It's not defined
164 * unless 5-level paging enabled.
165 *
166 * The ifndef can be dropped once all KASAN-enabled
167 * architectures will switch to pgtable-nop4d.h.
138 */ 168 */
139 pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_pud)); 169#ifndef __ARCH_HAS_5LEVEL_HACK
140 pud = pud_offset(pgd, addr); 170 pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_p4d));
171#endif
172 p4d = p4d_offset(pgd, addr);
173 p4d_populate(&init_mm, p4d, lm_alias(kasan_zero_pud));
174 pud = pud_offset(p4d, addr);
141 pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd)); 175 pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd));
142 pmd = pmd_offset(pud, addr); 176 pmd = pmd_offset(pud, addr);
143 pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte)); 177 pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte));
@@ -148,6 +182,6 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
148 pgd_populate(&init_mm, pgd, 182 pgd_populate(&init_mm, pgd,
149 early_alloc(PAGE_SIZE, NUMA_NO_NODE)); 183 early_alloc(PAGE_SIZE, NUMA_NO_NODE));
150 } 184 }
151 zero_pud_populate(pgd, addr, next); 185 zero_p4d_populate(pgd, addr, next);
152 } while (pgd++, addr = next, addr != end); 186 } while (pgd++, addr = next, addr != end);
153} 187}
diff --git a/mm/memory.c b/mm/memory.c
index a97a4cec2e1f..7f1c2163b3ce 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -445,7 +445,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
445 mm_dec_nr_pmds(tlb->mm); 445 mm_dec_nr_pmds(tlb->mm);
446} 446}
447 447
448static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, 448static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
449 unsigned long addr, unsigned long end, 449 unsigned long addr, unsigned long end,
450 unsigned long floor, unsigned long ceiling) 450 unsigned long floor, unsigned long ceiling)
451{ 451{
@@ -454,7 +454,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
454 unsigned long start; 454 unsigned long start;
455 455
456 start = addr; 456 start = addr;
457 pud = pud_offset(pgd, addr); 457 pud = pud_offset(p4d, addr);
458 do { 458 do {
459 next = pud_addr_end(addr, end); 459 next = pud_addr_end(addr, end);
460 if (pud_none_or_clear_bad(pud)) 460 if (pud_none_or_clear_bad(pud))
@@ -462,6 +462,39 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
462 free_pmd_range(tlb, pud, addr, next, floor, ceiling); 462 free_pmd_range(tlb, pud, addr, next, floor, ceiling);
463 } while (pud++, addr = next, addr != end); 463 } while (pud++, addr = next, addr != end);
464 464
465 start &= P4D_MASK;
466 if (start < floor)
467 return;
468 if (ceiling) {
469 ceiling &= P4D_MASK;
470 if (!ceiling)
471 return;
472 }
473 if (end - 1 > ceiling - 1)
474 return;
475
476 pud = pud_offset(p4d, start);
477 p4d_clear(p4d);
478 pud_free_tlb(tlb, pud, start);
479}
480
481static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd,
482 unsigned long addr, unsigned long end,
483 unsigned long floor, unsigned long ceiling)
484{
485 p4d_t *p4d;
486 unsigned long next;
487 unsigned long start;
488
489 start = addr;
490 p4d = p4d_offset(pgd, addr);
491 do {
492 next = p4d_addr_end(addr, end);
493 if (p4d_none_or_clear_bad(p4d))
494 continue;
495 free_pud_range(tlb, p4d, addr, next, floor, ceiling);
496 } while (p4d++, addr = next, addr != end);
497
465 start &= PGDIR_MASK; 498 start &= PGDIR_MASK;
466 if (start < floor) 499 if (start < floor)
467 return; 500 return;
@@ -473,9 +506,9 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
473 if (end - 1 > ceiling - 1) 506 if (end - 1 > ceiling - 1)
474 return; 507 return;
475 508
476 pud = pud_offset(pgd, start); 509 p4d = p4d_offset(pgd, start);
477 pgd_clear(pgd); 510 pgd_clear(pgd);
478 pud_free_tlb(tlb, pud, start); 511 p4d_free_tlb(tlb, p4d, start);
479} 512}
480 513
481/* 514/*
@@ -539,7 +572,7 @@ void free_pgd_range(struct mmu_gather *tlb,
539 next = pgd_addr_end(addr, end); 572 next = pgd_addr_end(addr, end);
540 if (pgd_none_or_clear_bad(pgd)) 573 if (pgd_none_or_clear_bad(pgd))
541 continue; 574 continue;
542 free_pud_range(tlb, pgd, addr, next, floor, ceiling); 575 free_p4d_range(tlb, pgd, addr, next, floor, ceiling);
543 } while (pgd++, addr = next, addr != end); 576 } while (pgd++, addr = next, addr != end);
544} 577}
545 578
@@ -658,7 +691,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
658 pte_t pte, struct page *page) 691 pte_t pte, struct page *page)
659{ 692{
660 pgd_t *pgd = pgd_offset(vma->vm_mm, addr); 693 pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
661 pud_t *pud = pud_offset(pgd, addr); 694 p4d_t *p4d = p4d_offset(pgd, addr);
695 pud_t *pud = pud_offset(p4d, addr);
662 pmd_t *pmd = pmd_offset(pud, addr); 696 pmd_t *pmd = pmd_offset(pud, addr);
663 struct address_space *mapping; 697 struct address_space *mapping;
664 pgoff_t index; 698 pgoff_t index;
@@ -1023,16 +1057,16 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
1023} 1057}
1024 1058
1025static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, 1059static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1026 pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma, 1060 p4d_t *dst_p4d, p4d_t *src_p4d, struct vm_area_struct *vma,
1027 unsigned long addr, unsigned long end) 1061 unsigned long addr, unsigned long end)
1028{ 1062{
1029 pud_t *src_pud, *dst_pud; 1063 pud_t *src_pud, *dst_pud;
1030 unsigned long next; 1064 unsigned long next;
1031 1065
1032 dst_pud = pud_alloc(dst_mm, dst_pgd, addr); 1066 dst_pud = pud_alloc(dst_mm, dst_p4d, addr);
1033 if (!dst_pud) 1067 if (!dst_pud)
1034 return -ENOMEM; 1068 return -ENOMEM;
1035 src_pud = pud_offset(src_pgd, addr); 1069 src_pud = pud_offset(src_p4d, addr);
1036 do { 1070 do {
1037 next = pud_addr_end(addr, end); 1071 next = pud_addr_end(addr, end);
1038 if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) { 1072 if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) {
@@ -1056,6 +1090,28 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src
1056 return 0; 1090 return 0;
1057} 1091}
1058 1092
1093static inline int copy_p4d_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1094 pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
1095 unsigned long addr, unsigned long end)
1096{
1097 p4d_t *src_p4d, *dst_p4d;
1098 unsigned long next;
1099
1100 dst_p4d = p4d_alloc(dst_mm, dst_pgd, addr);
1101 if (!dst_p4d)
1102 return -ENOMEM;
1103 src_p4d = p4d_offset(src_pgd, addr);
1104 do {
1105 next = p4d_addr_end(addr, end);
1106 if (p4d_none_or_clear_bad(src_p4d))
1107 continue;
1108 if (copy_pud_range(dst_mm, src_mm, dst_p4d, src_p4d,
1109 vma, addr, next))
1110 return -ENOMEM;
1111 } while (dst_p4d++, src_p4d++, addr = next, addr != end);
1112 return 0;
1113}
1114
1059int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, 1115int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1060 struct vm_area_struct *vma) 1116 struct vm_area_struct *vma)
1061{ 1117{
@@ -1111,7 +1167,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1111 next = pgd_addr_end(addr, end); 1167 next = pgd_addr_end(addr, end);
1112 if (pgd_none_or_clear_bad(src_pgd)) 1168 if (pgd_none_or_clear_bad(src_pgd))
1113 continue; 1169 continue;
1114 if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, 1170 if (unlikely(copy_p4d_range(dst_mm, src_mm, dst_pgd, src_pgd,
1115 vma, addr, next))) { 1171 vma, addr, next))) {
1116 ret = -ENOMEM; 1172 ret = -ENOMEM;
1117 break; 1173 break;
@@ -1267,14 +1323,14 @@ next:
1267} 1323}
1268 1324
1269static inline unsigned long zap_pud_range(struct mmu_gather *tlb, 1325static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
1270 struct vm_area_struct *vma, pgd_t *pgd, 1326 struct vm_area_struct *vma, p4d_t *p4d,
1271 unsigned long addr, unsigned long end, 1327 unsigned long addr, unsigned long end,
1272 struct zap_details *details) 1328 struct zap_details *details)
1273{ 1329{
1274 pud_t *pud; 1330 pud_t *pud;
1275 unsigned long next; 1331 unsigned long next;
1276 1332
1277 pud = pud_offset(pgd, addr); 1333 pud = pud_offset(p4d, addr);
1278 do { 1334 do {
1279 next = pud_addr_end(addr, end); 1335 next = pud_addr_end(addr, end);
1280 if (pud_trans_huge(*pud) || pud_devmap(*pud)) { 1336 if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
@@ -1295,6 +1351,25 @@ next:
1295 return addr; 1351 return addr;
1296} 1352}
1297 1353
1354static inline unsigned long zap_p4d_range(struct mmu_gather *tlb,
1355 struct vm_area_struct *vma, pgd_t *pgd,
1356 unsigned long addr, unsigned long end,
1357 struct zap_details *details)
1358{
1359 p4d_t *p4d;
1360 unsigned long next;
1361
1362 p4d = p4d_offset(pgd, addr);
1363 do {
1364 next = p4d_addr_end(addr, end);
1365 if (p4d_none_or_clear_bad(p4d))
1366 continue;
1367 next = zap_pud_range(tlb, vma, p4d, addr, next, details);
1368 } while (p4d++, addr = next, addr != end);
1369
1370 return addr;
1371}
1372
1298void unmap_page_range(struct mmu_gather *tlb, 1373void unmap_page_range(struct mmu_gather *tlb,
1299 struct vm_area_struct *vma, 1374 struct vm_area_struct *vma,
1300 unsigned long addr, unsigned long end, 1375 unsigned long addr, unsigned long end,
@@ -1310,7 +1385,7 @@ void unmap_page_range(struct mmu_gather *tlb,
1310 next = pgd_addr_end(addr, end); 1385 next = pgd_addr_end(addr, end);
1311 if (pgd_none_or_clear_bad(pgd)) 1386 if (pgd_none_or_clear_bad(pgd))
1312 continue; 1387 continue;
1313 next = zap_pud_range(tlb, vma, pgd, addr, next, details); 1388 next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
1314 } while (pgd++, addr = next, addr != end); 1389 } while (pgd++, addr = next, addr != end);
1315 tlb_end_vma(tlb, vma); 1390 tlb_end_vma(tlb, vma);
1316} 1391}
@@ -1465,16 +1540,24 @@ EXPORT_SYMBOL_GPL(zap_vma_ptes);
1465pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, 1540pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
1466 spinlock_t **ptl) 1541 spinlock_t **ptl)
1467{ 1542{
1468 pgd_t *pgd = pgd_offset(mm, addr); 1543 pgd_t *pgd;
1469 pud_t *pud = pud_alloc(mm, pgd, addr); 1544 p4d_t *p4d;
1470 if (pud) { 1545 pud_t *pud;
1471 pmd_t *pmd = pmd_alloc(mm, pud, addr); 1546 pmd_t *pmd;
1472 if (pmd) { 1547
1473 VM_BUG_ON(pmd_trans_huge(*pmd)); 1548 pgd = pgd_offset(mm, addr);
1474 return pte_alloc_map_lock(mm, pmd, addr, ptl); 1549 p4d = p4d_alloc(mm, pgd, addr);
1475 } 1550 if (!p4d)
1476 } 1551 return NULL;
1477 return NULL; 1552 pud = pud_alloc(mm, p4d, addr);
1553 if (!pud)
1554 return NULL;
1555 pmd = pmd_alloc(mm, pud, addr);
1556 if (!pmd)
1557 return NULL;
1558
1559 VM_BUG_ON(pmd_trans_huge(*pmd));
1560 return pte_alloc_map_lock(mm, pmd, addr, ptl);
1478} 1561}
1479 1562
1480/* 1563/*
@@ -1740,7 +1823,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
1740 return 0; 1823 return 0;
1741} 1824}
1742 1825
1743static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, 1826static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
1744 unsigned long addr, unsigned long end, 1827 unsigned long addr, unsigned long end,
1745 unsigned long pfn, pgprot_t prot) 1828 unsigned long pfn, pgprot_t prot)
1746{ 1829{
@@ -1748,7 +1831,7 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
1748 unsigned long next; 1831 unsigned long next;
1749 1832
1750 pfn -= addr >> PAGE_SHIFT; 1833 pfn -= addr >> PAGE_SHIFT;
1751 pud = pud_alloc(mm, pgd, addr); 1834 pud = pud_alloc(mm, p4d, addr);
1752 if (!pud) 1835 if (!pud)
1753 return -ENOMEM; 1836 return -ENOMEM;
1754 do { 1837 do {
@@ -1760,6 +1843,26 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
1760 return 0; 1843 return 0;
1761} 1844}
1762 1845
1846static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
1847 unsigned long addr, unsigned long end,
1848 unsigned long pfn, pgprot_t prot)
1849{
1850 p4d_t *p4d;
1851 unsigned long next;
1852
1853 pfn -= addr >> PAGE_SHIFT;
1854 p4d = p4d_alloc(mm, pgd, addr);
1855 if (!p4d)
1856 return -ENOMEM;
1857 do {
1858 next = p4d_addr_end(addr, end);
1859 if (remap_pud_range(mm, p4d, addr, next,
1860 pfn + (addr >> PAGE_SHIFT), prot))
1861 return -ENOMEM;
1862 } while (p4d++, addr = next, addr != end);
1863 return 0;
1864}
1865
1763/** 1866/**
1764 * remap_pfn_range - remap kernel memory to userspace 1867 * remap_pfn_range - remap kernel memory to userspace
1765 * @vma: user vma to map to 1868 * @vma: user vma to map to
@@ -1816,7 +1919,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1816 flush_cache_range(vma, addr, end); 1919 flush_cache_range(vma, addr, end);
1817 do { 1920 do {
1818 next = pgd_addr_end(addr, end); 1921 next = pgd_addr_end(addr, end);
1819 err = remap_pud_range(mm, pgd, addr, next, 1922 err = remap_p4d_range(mm, pgd, addr, next,
1820 pfn + (addr >> PAGE_SHIFT), prot); 1923 pfn + (addr >> PAGE_SHIFT), prot);
1821 if (err) 1924 if (err)
1822 break; 1925 break;
@@ -1932,7 +2035,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
1932 return err; 2035 return err;
1933} 2036}
1934 2037
1935static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, 2038static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
1936 unsigned long addr, unsigned long end, 2039 unsigned long addr, unsigned long end,
1937 pte_fn_t fn, void *data) 2040 pte_fn_t fn, void *data)
1938{ 2041{
@@ -1940,7 +2043,7 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
1940 unsigned long next; 2043 unsigned long next;
1941 int err; 2044 int err;
1942 2045
1943 pud = pud_alloc(mm, pgd, addr); 2046 pud = pud_alloc(mm, p4d, addr);
1944 if (!pud) 2047 if (!pud)
1945 return -ENOMEM; 2048 return -ENOMEM;
1946 do { 2049 do {
@@ -1952,6 +2055,26 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
1952 return err; 2055 return err;
1953} 2056}
1954 2057
2058static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
2059 unsigned long addr, unsigned long end,
2060 pte_fn_t fn, void *data)
2061{
2062 p4d_t *p4d;
2063 unsigned long next;
2064 int err;
2065
2066 p4d = p4d_alloc(mm, pgd, addr);
2067 if (!p4d)
2068 return -ENOMEM;
2069 do {
2070 next = p4d_addr_end(addr, end);
2071 err = apply_to_pud_range(mm, p4d, addr, next, fn, data);
2072 if (err)
2073 break;
2074 } while (p4d++, addr = next, addr != end);
2075 return err;
2076}
2077
1955/* 2078/*
1956 * Scan a region of virtual memory, filling in page tables as necessary 2079 * Scan a region of virtual memory, filling in page tables as necessary
1957 * and calling a provided function on each leaf page table. 2080 * and calling a provided function on each leaf page table.
@@ -1970,7 +2093,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
1970 pgd = pgd_offset(mm, addr); 2093 pgd = pgd_offset(mm, addr);
1971 do { 2094 do {
1972 next = pgd_addr_end(addr, end); 2095 next = pgd_addr_end(addr, end);
1973 err = apply_to_pud_range(mm, pgd, addr, next, fn, data); 2096 err = apply_to_p4d_range(mm, pgd, addr, next, fn, data);
1974 if (err) 2097 if (err)
1975 break; 2098 break;
1976 } while (pgd++, addr = next, addr != end); 2099 } while (pgd++, addr = next, addr != end);
@@ -3653,11 +3776,15 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
3653 }; 3776 };
3654 struct mm_struct *mm = vma->vm_mm; 3777 struct mm_struct *mm = vma->vm_mm;
3655 pgd_t *pgd; 3778 pgd_t *pgd;
3779 p4d_t *p4d;
3656 int ret; 3780 int ret;
3657 3781
3658 pgd = pgd_offset(mm, address); 3782 pgd = pgd_offset(mm, address);
3783 p4d = p4d_alloc(mm, pgd, address);
3784 if (!p4d)
3785 return VM_FAULT_OOM;
3659 3786
3660 vmf.pud = pud_alloc(mm, pgd, address); 3787 vmf.pud = pud_alloc(mm, p4d, address);
3661 if (!vmf.pud) 3788 if (!vmf.pud)
3662 return VM_FAULT_OOM; 3789 return VM_FAULT_OOM;
3663 if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) { 3790 if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) {
@@ -3784,7 +3911,7 @@ EXPORT_SYMBOL_GPL(handle_mm_fault);
3784 * Allocate page upper directory. 3911 * Allocate page upper directory.
3785 * We've already handled the fast-path in-line. 3912 * We've already handled the fast-path in-line.
3786 */ 3913 */
3787int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) 3914int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address)
3788{ 3915{
3789 pud_t *new = pud_alloc_one(mm, address); 3916 pud_t *new = pud_alloc_one(mm, address);
3790 if (!new) 3917 if (!new)
@@ -3793,10 +3920,17 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
3793 smp_wmb(); /* See comment in __pte_alloc */ 3920 smp_wmb(); /* See comment in __pte_alloc */
3794 3921
3795 spin_lock(&mm->page_table_lock); 3922 spin_lock(&mm->page_table_lock);
3796 if (pgd_present(*pgd)) /* Another has populated it */ 3923#ifndef __ARCH_HAS_5LEVEL_HACK
3924 if (p4d_present(*p4d)) /* Another has populated it */
3925 pud_free(mm, new);
3926 else
3927 p4d_populate(mm, p4d, new);
3928#else
3929 if (pgd_present(*p4d)) /* Another has populated it */
3797 pud_free(mm, new); 3930 pud_free(mm, new);
3798 else 3931 else
3799 pgd_populate(mm, pgd, new); 3932 pgd_populate(mm, p4d, new);
3933#endif /* __ARCH_HAS_5LEVEL_HACK */
3800 spin_unlock(&mm->page_table_lock); 3934 spin_unlock(&mm->page_table_lock);
3801 return 0; 3935 return 0;
3802} 3936}
@@ -3839,6 +3973,7 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
3839 pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) 3973 pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
3840{ 3974{
3841 pgd_t *pgd; 3975 pgd_t *pgd;
3976 p4d_t *p4d;
3842 pud_t *pud; 3977 pud_t *pud;
3843 pmd_t *pmd; 3978 pmd_t *pmd;
3844 pte_t *ptep; 3979 pte_t *ptep;
@@ -3847,7 +3982,11 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
3847 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) 3982 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
3848 goto out; 3983 goto out;
3849 3984
3850 pud = pud_offset(pgd, address); 3985 p4d = p4d_offset(pgd, address);
3986 if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d)))
3987 goto out;
3988
3989 pud = pud_offset(p4d, address);
3851 if (pud_none(*pud) || unlikely(pud_bad(*pud))) 3990 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
3852 goto out; 3991 goto out;
3853 3992
diff --git a/mm/mlock.c b/mm/mlock.c
index 1050511f8b2b..945edac46810 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -380,6 +380,7 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
380 pte = get_locked_pte(vma->vm_mm, start, &ptl); 380 pte = get_locked_pte(vma->vm_mm, start, &ptl);
381 /* Make sure we do not cross the page table boundary */ 381 /* Make sure we do not cross the page table boundary */
382 end = pgd_addr_end(start, end); 382 end = pgd_addr_end(start, end);
383 end = p4d_addr_end(start, end);
383 end = pud_addr_end(start, end); 384 end = pud_addr_end(start, end);
384 end = pmd_addr_end(start, end); 385 end = pmd_addr_end(start, end);
385 386
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 848e946b08e5..8edd0d576254 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -193,14 +193,14 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
193} 193}
194 194
195static inline unsigned long change_pud_range(struct vm_area_struct *vma, 195static inline unsigned long change_pud_range(struct vm_area_struct *vma,
196 pgd_t *pgd, unsigned long addr, unsigned long end, 196 p4d_t *p4d, unsigned long addr, unsigned long end,
197 pgprot_t newprot, int dirty_accountable, int prot_numa) 197 pgprot_t newprot, int dirty_accountable, int prot_numa)
198{ 198{
199 pud_t *pud; 199 pud_t *pud;
200 unsigned long next; 200 unsigned long next;
201 unsigned long pages = 0; 201 unsigned long pages = 0;
202 202
203 pud = pud_offset(pgd, addr); 203 pud = pud_offset(p4d, addr);
204 do { 204 do {
205 next = pud_addr_end(addr, end); 205 next = pud_addr_end(addr, end);
206 if (pud_none_or_clear_bad(pud)) 206 if (pud_none_or_clear_bad(pud))
@@ -212,6 +212,26 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma,
212 return pages; 212 return pages;
213} 213}
214 214
215static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
216 pgd_t *pgd, unsigned long addr, unsigned long end,
217 pgprot_t newprot, int dirty_accountable, int prot_numa)
218{
219 p4d_t *p4d;
220 unsigned long next;
221 unsigned long pages = 0;
222
223 p4d = p4d_offset(pgd, addr);
224 do {
225 next = p4d_addr_end(addr, end);
226 if (p4d_none_or_clear_bad(p4d))
227 continue;
228 pages += change_pud_range(vma, p4d, addr, next, newprot,
229 dirty_accountable, prot_numa);
230 } while (p4d++, addr = next, addr != end);
231
232 return pages;
233}
234
215static unsigned long change_protection_range(struct vm_area_struct *vma, 235static unsigned long change_protection_range(struct vm_area_struct *vma,
216 unsigned long addr, unsigned long end, pgprot_t newprot, 236 unsigned long addr, unsigned long end, pgprot_t newprot,
217 int dirty_accountable, int prot_numa) 237 int dirty_accountable, int prot_numa)
@@ -230,7 +250,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
230 next = pgd_addr_end(addr, end); 250 next = pgd_addr_end(addr, end);
231 if (pgd_none_or_clear_bad(pgd)) 251 if (pgd_none_or_clear_bad(pgd))
232 continue; 252 continue;
233 pages += change_pud_range(vma, pgd, addr, next, newprot, 253 pages += change_p4d_range(vma, pgd, addr, next, newprot,
234 dirty_accountable, prot_numa); 254 dirty_accountable, prot_numa);
235 } while (pgd++, addr = next, addr != end); 255 } while (pgd++, addr = next, addr != end);
236 256
diff --git a/mm/mremap.c b/mm/mremap.c
index 8233b0105c82..cd8a1b199ef9 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -32,6 +32,7 @@
32static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) 32static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
33{ 33{
34 pgd_t *pgd; 34 pgd_t *pgd;
35 p4d_t *p4d;
35 pud_t *pud; 36 pud_t *pud;
36 pmd_t *pmd; 37 pmd_t *pmd;
37 38
@@ -39,7 +40,11 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
39 if (pgd_none_or_clear_bad(pgd)) 40 if (pgd_none_or_clear_bad(pgd))
40 return NULL; 41 return NULL;
41 42
42 pud = pud_offset(pgd, addr); 43 p4d = p4d_offset(pgd, addr);
44 if (p4d_none_or_clear_bad(p4d))
45 return NULL;
46
47 pud = pud_offset(p4d, addr);
43 if (pud_none_or_clear_bad(pud)) 48 if (pud_none_or_clear_bad(pud))
44 return NULL; 49 return NULL;
45 50
@@ -54,11 +59,15 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
54 unsigned long addr) 59 unsigned long addr)
55{ 60{
56 pgd_t *pgd; 61 pgd_t *pgd;
62 p4d_t *p4d;
57 pud_t *pud; 63 pud_t *pud;
58 pmd_t *pmd; 64 pmd_t *pmd;
59 65
60 pgd = pgd_offset(mm, addr); 66 pgd = pgd_offset(mm, addr);
61 pud = pud_alloc(mm, pgd, addr); 67 p4d = p4d_alloc(mm, pgd, addr);
68 if (!p4d)
69 return NULL;
70 pud = pud_alloc(mm, p4d, addr);
62 if (!pud) 71 if (!pud)
63 return NULL; 72 return NULL;
64 73
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index a23001a22c15..c4c9def8ffea 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -104,6 +104,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
104 struct mm_struct *mm = pvmw->vma->vm_mm; 104 struct mm_struct *mm = pvmw->vma->vm_mm;
105 struct page *page = pvmw->page; 105 struct page *page = pvmw->page;
106 pgd_t *pgd; 106 pgd_t *pgd;
107 p4d_t *p4d;
107 pud_t *pud; 108 pud_t *pud;
108 109
109 /* The only possible pmd mapping has been handled on last iteration */ 110 /* The only possible pmd mapping has been handled on last iteration */
@@ -133,7 +134,10 @@ restart:
133 pgd = pgd_offset(mm, pvmw->address); 134 pgd = pgd_offset(mm, pvmw->address);
134 if (!pgd_present(*pgd)) 135 if (!pgd_present(*pgd))
135 return false; 136 return false;
136 pud = pud_offset(pgd, pvmw->address); 137 p4d = p4d_offset(pgd, pvmw->address);
138 if (!p4d_present(*p4d))
139 return false;
140 pud = pud_offset(p4d, pvmw->address);
137 if (!pud_present(*pud)) 141 if (!pud_present(*pud))
138 return false; 142 return false;
139 pvmw->pmd = pmd_offset(pud, pvmw->address); 143 pvmw->pmd = pmd_offset(pud, pvmw->address);
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 03761577ae86..60f7856e508f 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -69,14 +69,14 @@ again:
69 return err; 69 return err;
70} 70}
71 71
72static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, 72static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
73 struct mm_walk *walk) 73 struct mm_walk *walk)
74{ 74{
75 pud_t *pud; 75 pud_t *pud;
76 unsigned long next; 76 unsigned long next;
77 int err = 0; 77 int err = 0;
78 78
79 pud = pud_offset(pgd, addr); 79 pud = pud_offset(p4d, addr);
80 do { 80 do {
81 again: 81 again:
82 next = pud_addr_end(addr, end); 82 next = pud_addr_end(addr, end);
@@ -113,6 +113,32 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
113 return err; 113 return err;
114} 114}
115 115
116static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
117 struct mm_walk *walk)
118{
119 p4d_t *p4d;
120 unsigned long next;
121 int err = 0;
122
123 p4d = p4d_offset(pgd, addr);
124 do {
125 next = p4d_addr_end(addr, end);
126 if (p4d_none_or_clear_bad(p4d)) {
127 if (walk->pte_hole)
128 err = walk->pte_hole(addr, next, walk);
129 if (err)
130 break;
131 continue;
132 }
133 if (walk->pmd_entry || walk->pte_entry)
134 err = walk_pud_range(p4d, addr, next, walk);
135 if (err)
136 break;
137 } while (p4d++, addr = next, addr != end);
138
139 return err;
140}
141
116static int walk_pgd_range(unsigned long addr, unsigned long end, 142static int walk_pgd_range(unsigned long addr, unsigned long end,
117 struct mm_walk *walk) 143 struct mm_walk *walk)
118{ 144{
@@ -131,7 +157,7 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
131 continue; 157 continue;
132 } 158 }
133 if (walk->pmd_entry || walk->pte_entry) 159 if (walk->pmd_entry || walk->pte_entry)
134 err = walk_pud_range(pgd, addr, next, walk); 160 err = walk_p4d_range(pgd, addr, next, walk);
135 if (err) 161 if (err)
136 break; 162 break;
137 } while (pgd++, addr = next, addr != end); 163 } while (pgd++, addr = next, addr != end);
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 4ed5908c65b0..c99d9512a45b 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -22,6 +22,12 @@ void pgd_clear_bad(pgd_t *pgd)
22 pgd_clear(pgd); 22 pgd_clear(pgd);
23} 23}
24 24
25void p4d_clear_bad(p4d_t *p4d)
26{
27 p4d_ERROR(*p4d);
28 p4d_clear(p4d);
29}
30
25void pud_clear_bad(pud_t *pud) 31void pud_clear_bad(pud_t *pud)
26{ 32{
27 pud_ERROR(*pud); 33 pud_ERROR(*pud);
diff --git a/mm/rmap.c b/mm/rmap.c
index 2da487d6cea8..2984403a2424 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -684,6 +684,7 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
684pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) 684pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
685{ 685{
686 pgd_t *pgd; 686 pgd_t *pgd;
687 p4d_t *p4d;
687 pud_t *pud; 688 pud_t *pud;
688 pmd_t *pmd = NULL; 689 pmd_t *pmd = NULL;
689 pmd_t pmde; 690 pmd_t pmde;
@@ -692,7 +693,11 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
692 if (!pgd_present(*pgd)) 693 if (!pgd_present(*pgd))
693 goto out; 694 goto out;
694 695
695 pud = pud_offset(pgd, address); 696 p4d = p4d_offset(pgd, address);
697 if (!p4d_present(*p4d))
698 goto out;
699
700 pud = pud_offset(p4d, address);
696 if (!pud_present(*pud)) 701 if (!pud_present(*pud))
697 goto out; 702 goto out;
698 703
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 574c67b663fe..a56c3989f773 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -196,9 +196,9 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
196 return pmd; 196 return pmd;
197} 197}
198 198
199pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node) 199pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
200{ 200{
201 pud_t *pud = pud_offset(pgd, addr); 201 pud_t *pud = pud_offset(p4d, addr);
202 if (pud_none(*pud)) { 202 if (pud_none(*pud)) {
203 void *p = vmemmap_alloc_block(PAGE_SIZE, node); 203 void *p = vmemmap_alloc_block(PAGE_SIZE, node);
204 if (!p) 204 if (!p)
@@ -208,6 +208,18 @@ pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node)
208 return pud; 208 return pud;
209} 209}
210 210
211p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
212{
213 p4d_t *p4d = p4d_offset(pgd, addr);
214 if (p4d_none(*p4d)) {
215 void *p = vmemmap_alloc_block(PAGE_SIZE, node);
216 if (!p)
217 return NULL;
218 p4d_populate(&init_mm, p4d, p);
219 }
220 return p4d;
221}
222
211pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) 223pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
212{ 224{
213 pgd_t *pgd = pgd_offset_k(addr); 225 pgd_t *pgd = pgd_offset_k(addr);
@@ -225,6 +237,7 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
225{ 237{
226 unsigned long addr = start; 238 unsigned long addr = start;
227 pgd_t *pgd; 239 pgd_t *pgd;
240 p4d_t *p4d;
228 pud_t *pud; 241 pud_t *pud;
229 pmd_t *pmd; 242 pmd_t *pmd;
230 pte_t *pte; 243 pte_t *pte;
@@ -233,7 +246,10 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
233 pgd = vmemmap_pgd_populate(addr, node); 246 pgd = vmemmap_pgd_populate(addr, node);
234 if (!pgd) 247 if (!pgd)
235 return -ENOMEM; 248 return -ENOMEM;
236 pud = vmemmap_pud_populate(pgd, addr, node); 249 p4d = vmemmap_p4d_populate(pgd, addr, node);
250 if (!p4d)
251 return -ENOMEM;
252 pud = vmemmap_pud_populate(p4d, addr, node);
237 if (!pud) 253 if (!pud)
238 return -ENOMEM; 254 return -ENOMEM;
239 pmd = vmemmap_pmd_populate(pud, addr, node); 255 pmd = vmemmap_pmd_populate(pud, addr, node);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 521ef9b6064f..178130880b90 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1517,7 +1517,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
1517 return 0; 1517 return 0;
1518} 1518}
1519 1519
1520static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd, 1520static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
1521 unsigned long addr, unsigned long end, 1521 unsigned long addr, unsigned long end,
1522 swp_entry_t entry, struct page *page) 1522 swp_entry_t entry, struct page *page)
1523{ 1523{
@@ -1525,7 +1525,7 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
1525 unsigned long next; 1525 unsigned long next;
1526 int ret; 1526 int ret;
1527 1527
1528 pud = pud_offset(pgd, addr); 1528 pud = pud_offset(p4d, addr);
1529 do { 1529 do {
1530 next = pud_addr_end(addr, end); 1530 next = pud_addr_end(addr, end);
1531 if (pud_none_or_clear_bad(pud)) 1531 if (pud_none_or_clear_bad(pud))
@@ -1537,6 +1537,26 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
1537 return 0; 1537 return 0;
1538} 1538}
1539 1539
1540static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd,
1541 unsigned long addr, unsigned long end,
1542 swp_entry_t entry, struct page *page)
1543{
1544 p4d_t *p4d;
1545 unsigned long next;
1546 int ret;
1547
1548 p4d = p4d_offset(pgd, addr);
1549 do {
1550 next = p4d_addr_end(addr, end);
1551 if (p4d_none_or_clear_bad(p4d))
1552 continue;
1553 ret = unuse_pud_range(vma, p4d, addr, next, entry, page);
1554 if (ret)
1555 return ret;
1556 } while (p4d++, addr = next, addr != end);
1557 return 0;
1558}
1559
1540static int unuse_vma(struct vm_area_struct *vma, 1560static int unuse_vma(struct vm_area_struct *vma,
1541 swp_entry_t entry, struct page *page) 1561 swp_entry_t entry, struct page *page)
1542{ 1562{
@@ -1560,7 +1580,7 @@ static int unuse_vma(struct vm_area_struct *vma,
1560 next = pgd_addr_end(addr, end); 1580 next = pgd_addr_end(addr, end);
1561 if (pgd_none_or_clear_bad(pgd)) 1581 if (pgd_none_or_clear_bad(pgd))
1562 continue; 1582 continue;
1563 ret = unuse_pud_range(vma, pgd, addr, next, entry, page); 1583 ret = unuse_p4d_range(vma, pgd, addr, next, entry, page);
1564 if (ret) 1584 if (ret)
1565 return ret; 1585 return ret;
1566 } while (pgd++, addr = next, addr != end); 1586 } while (pgd++, addr = next, addr != end);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 479e631d43c2..8bcb501bce60 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -128,19 +128,22 @@ out_unlock:
128static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address) 128static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
129{ 129{
130 pgd_t *pgd; 130 pgd_t *pgd;
131 p4d_t *p4d;
131 pud_t *pud; 132 pud_t *pud;
132 pmd_t *pmd = NULL;
133 133
134 pgd = pgd_offset(mm, address); 134 pgd = pgd_offset(mm, address);
135 pud = pud_alloc(mm, pgd, address); 135 p4d = p4d_alloc(mm, pgd, address);
136 if (pud) 136 if (!p4d)
137 /* 137 return NULL;
138 * Note that we didn't run this because the pmd was 138 pud = pud_alloc(mm, p4d, address);
139 * missing, the *pmd may be already established and in 139 if (!pud)
140 * turn it may also be a trans_huge_pmd. 140 return NULL;
141 */ 141 /*
142 pmd = pmd_alloc(mm, pud, address); 142 * Note that we didn't run this because the pmd was
143 return pmd; 143 * missing, the *pmd may be already established and in
144 * turn it may also be a trans_huge_pmd.
145 */
146 return pmd_alloc(mm, pud, address);
144} 147}
145 148
146#ifdef CONFIG_HUGETLB_PAGE 149#ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index b4024d688f38..0dd80222b20b 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -86,12 +86,12 @@ static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
86 } while (pmd++, addr = next, addr != end); 86 } while (pmd++, addr = next, addr != end);
87} 87}
88 88
89static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end) 89static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end)
90{ 90{
91 pud_t *pud; 91 pud_t *pud;
92 unsigned long next; 92 unsigned long next;
93 93
94 pud = pud_offset(pgd, addr); 94 pud = pud_offset(p4d, addr);
95 do { 95 do {
96 next = pud_addr_end(addr, end); 96 next = pud_addr_end(addr, end);
97 if (pud_clear_huge(pud)) 97 if (pud_clear_huge(pud))
@@ -102,6 +102,22 @@ static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
102 } while (pud++, addr = next, addr != end); 102 } while (pud++, addr = next, addr != end);
103} 103}
104 104
105static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end)
106{
107 p4d_t *p4d;
108 unsigned long next;
109
110 p4d = p4d_offset(pgd, addr);
111 do {
112 next = p4d_addr_end(addr, end);
113 if (p4d_clear_huge(p4d))
114 continue;
115 if (p4d_none_or_clear_bad(p4d))
116 continue;
117 vunmap_pud_range(p4d, addr, next);
118 } while (p4d++, addr = next, addr != end);
119}
120
105static void vunmap_page_range(unsigned long addr, unsigned long end) 121static void vunmap_page_range(unsigned long addr, unsigned long end)
106{ 122{
107 pgd_t *pgd; 123 pgd_t *pgd;
@@ -113,7 +129,7 @@ static void vunmap_page_range(unsigned long addr, unsigned long end)
113 next = pgd_addr_end(addr, end); 129 next = pgd_addr_end(addr, end);
114 if (pgd_none_or_clear_bad(pgd)) 130 if (pgd_none_or_clear_bad(pgd))
115 continue; 131 continue;
116 vunmap_pud_range(pgd, addr, next); 132 vunmap_p4d_range(pgd, addr, next);
117 } while (pgd++, addr = next, addr != end); 133 } while (pgd++, addr = next, addr != end);
118} 134}
119 135
@@ -160,13 +176,13 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr,
160 return 0; 176 return 0;
161} 177}
162 178
163static int vmap_pud_range(pgd_t *pgd, unsigned long addr, 179static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
164 unsigned long end, pgprot_t prot, struct page **pages, int *nr) 180 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
165{ 181{
166 pud_t *pud; 182 pud_t *pud;
167 unsigned long next; 183 unsigned long next;
168 184
169 pud = pud_alloc(&init_mm, pgd, addr); 185 pud = pud_alloc(&init_mm, p4d, addr);
170 if (!pud) 186 if (!pud)
171 return -ENOMEM; 187 return -ENOMEM;
172 do { 188 do {
@@ -177,6 +193,23 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
177 return 0; 193 return 0;
178} 194}
179 195
196static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
197 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
198{
199 p4d_t *p4d;
200 unsigned long next;
201
202 p4d = p4d_alloc(&init_mm, pgd, addr);
203 if (!p4d)
204 return -ENOMEM;
205 do {
206 next = p4d_addr_end(addr, end);
207 if (vmap_pud_range(p4d, addr, next, prot, pages, nr))
208 return -ENOMEM;
209 } while (p4d++, addr = next, addr != end);
210 return 0;
211}
212
180/* 213/*
181 * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and 214 * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
182 * will have pfns corresponding to the "pages" array. 215 * will have pfns corresponding to the "pages" array.
@@ -196,7 +229,7 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end,
196 pgd = pgd_offset_k(addr); 229 pgd = pgd_offset_k(addr);
197 do { 230 do {
198 next = pgd_addr_end(addr, end); 231 next = pgd_addr_end(addr, end);
199 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr); 232 err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr);
200 if (err) 233 if (err)
201 return err; 234 return err;
202 } while (pgd++, addr = next, addr != end); 235 } while (pgd++, addr = next, addr != end);
@@ -237,6 +270,10 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
237 unsigned long addr = (unsigned long) vmalloc_addr; 270 unsigned long addr = (unsigned long) vmalloc_addr;
238 struct page *page = NULL; 271 struct page *page = NULL;
239 pgd_t *pgd = pgd_offset_k(addr); 272 pgd_t *pgd = pgd_offset_k(addr);
273 p4d_t *p4d;
274 pud_t *pud;
275 pmd_t *pmd;
276 pte_t *ptep, pte;
240 277
241 /* 278 /*
242 * XXX we might need to change this if we add VIRTUAL_BUG_ON for 279 * XXX we might need to change this if we add VIRTUAL_BUG_ON for
@@ -244,21 +281,23 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
244 */ 281 */
245 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr)); 282 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
246 283
247 if (!pgd_none(*pgd)) { 284 if (pgd_none(*pgd))
248 pud_t *pud = pud_offset(pgd, addr); 285 return NULL;
249 if (!pud_none(*pud)) { 286 p4d = p4d_offset(pgd, addr);
250 pmd_t *pmd = pmd_offset(pud, addr); 287 if (p4d_none(*p4d))
251 if (!pmd_none(*pmd)) { 288 return NULL;
252 pte_t *ptep, pte; 289 pud = pud_offset(p4d, addr);
253 290 if (pud_none(*pud))
254 ptep = pte_offset_map(pmd, addr); 291 return NULL;
255 pte = *ptep; 292 pmd = pmd_offset(pud, addr);
256 if (pte_present(pte)) 293 if (pmd_none(*pmd))
257 page = pte_page(pte); 294 return NULL;
258 pte_unmap(ptep); 295
259 } 296 ptep = pte_offset_map(pmd, addr);
260 } 297 pte = *ptep;
261 } 298 if (pte_present(pte))
299 page = pte_page(pte);
300 pte_unmap(ptep);
262 return page; 301 return page;
263} 302}
264EXPORT_SYMBOL(vmalloc_to_page); 303EXPORT_SYMBOL(vmalloc_to_page);