aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c230
1 files changed, 196 insertions, 34 deletions
diff --git a/mm/memory.c b/mm/memory.c
index a97a4cec2e1f..235ba51b2fbf 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -445,7 +445,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
445 mm_dec_nr_pmds(tlb->mm); 445 mm_dec_nr_pmds(tlb->mm);
446} 446}
447 447
448static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, 448static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
449 unsigned long addr, unsigned long end, 449 unsigned long addr, unsigned long end,
450 unsigned long floor, unsigned long ceiling) 450 unsigned long floor, unsigned long ceiling)
451{ 451{
@@ -454,7 +454,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
454 unsigned long start; 454 unsigned long start;
455 455
456 start = addr; 456 start = addr;
457 pud = pud_offset(pgd, addr); 457 pud = pud_offset(p4d, addr);
458 do { 458 do {
459 next = pud_addr_end(addr, end); 459 next = pud_addr_end(addr, end);
460 if (pud_none_or_clear_bad(pud)) 460 if (pud_none_or_clear_bad(pud))
@@ -462,6 +462,39 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
462 free_pmd_range(tlb, pud, addr, next, floor, ceiling); 462 free_pmd_range(tlb, pud, addr, next, floor, ceiling);
463 } while (pud++, addr = next, addr != end); 463 } while (pud++, addr = next, addr != end);
464 464
465 start &= P4D_MASK;
466 if (start < floor)
467 return;
468 if (ceiling) {
469 ceiling &= P4D_MASK;
470 if (!ceiling)
471 return;
472 }
473 if (end - 1 > ceiling - 1)
474 return;
475
476 pud = pud_offset(p4d, start);
477 p4d_clear(p4d);
478 pud_free_tlb(tlb, pud, start);
479}
480
481static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd,
482 unsigned long addr, unsigned long end,
483 unsigned long floor, unsigned long ceiling)
484{
485 p4d_t *p4d;
486 unsigned long next;
487 unsigned long start;
488
489 start = addr;
490 p4d = p4d_offset(pgd, addr);
491 do {
492 next = p4d_addr_end(addr, end);
493 if (p4d_none_or_clear_bad(p4d))
494 continue;
495 free_pud_range(tlb, p4d, addr, next, floor, ceiling);
496 } while (p4d++, addr = next, addr != end);
497
465 start &= PGDIR_MASK; 498 start &= PGDIR_MASK;
466 if (start < floor) 499 if (start < floor)
467 return; 500 return;
@@ -473,9 +506,9 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
473 if (end - 1 > ceiling - 1) 506 if (end - 1 > ceiling - 1)
474 return; 507 return;
475 508
476 pud = pud_offset(pgd, start); 509 p4d = p4d_offset(pgd, start);
477 pgd_clear(pgd); 510 pgd_clear(pgd);
478 pud_free_tlb(tlb, pud, start); 511 p4d_free_tlb(tlb, p4d, start);
479} 512}
480 513
481/* 514/*
@@ -539,7 +572,7 @@ void free_pgd_range(struct mmu_gather *tlb,
539 next = pgd_addr_end(addr, end); 572 next = pgd_addr_end(addr, end);
540 if (pgd_none_or_clear_bad(pgd)) 573 if (pgd_none_or_clear_bad(pgd))
541 continue; 574 continue;
542 free_pud_range(tlb, pgd, addr, next, floor, ceiling); 575 free_p4d_range(tlb, pgd, addr, next, floor, ceiling);
543 } while (pgd++, addr = next, addr != end); 576 } while (pgd++, addr = next, addr != end);
544} 577}
545 578
@@ -658,7 +691,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
658 pte_t pte, struct page *page) 691 pte_t pte, struct page *page)
659{ 692{
660 pgd_t *pgd = pgd_offset(vma->vm_mm, addr); 693 pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
661 pud_t *pud = pud_offset(pgd, addr); 694 p4d_t *p4d = p4d_offset(pgd, addr);
695 pud_t *pud = pud_offset(p4d, addr);
662 pmd_t *pmd = pmd_offset(pud, addr); 696 pmd_t *pmd = pmd_offset(pud, addr);
663 struct address_space *mapping; 697 struct address_space *mapping;
664 pgoff_t index; 698 pgoff_t index;
@@ -1023,16 +1057,16 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
1023} 1057}
1024 1058
1025static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, 1059static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1026 pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma, 1060 p4d_t *dst_p4d, p4d_t *src_p4d, struct vm_area_struct *vma,
1027 unsigned long addr, unsigned long end) 1061 unsigned long addr, unsigned long end)
1028{ 1062{
1029 pud_t *src_pud, *dst_pud; 1063 pud_t *src_pud, *dst_pud;
1030 unsigned long next; 1064 unsigned long next;
1031 1065
1032 dst_pud = pud_alloc(dst_mm, dst_pgd, addr); 1066 dst_pud = pud_alloc(dst_mm, dst_p4d, addr);
1033 if (!dst_pud) 1067 if (!dst_pud)
1034 return -ENOMEM; 1068 return -ENOMEM;
1035 src_pud = pud_offset(src_pgd, addr); 1069 src_pud = pud_offset(src_p4d, addr);
1036 do { 1070 do {
1037 next = pud_addr_end(addr, end); 1071 next = pud_addr_end(addr, end);
1038 if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) { 1072 if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) {
@@ -1056,6 +1090,28 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src
1056 return 0; 1090 return 0;
1057} 1091}
1058 1092
1093static inline int copy_p4d_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1094 pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
1095 unsigned long addr, unsigned long end)
1096{
1097 p4d_t *src_p4d, *dst_p4d;
1098 unsigned long next;
1099
1100 dst_p4d = p4d_alloc(dst_mm, dst_pgd, addr);
1101 if (!dst_p4d)
1102 return -ENOMEM;
1103 src_p4d = p4d_offset(src_pgd, addr);
1104 do {
1105 next = p4d_addr_end(addr, end);
1106 if (p4d_none_or_clear_bad(src_p4d))
1107 continue;
1108 if (copy_pud_range(dst_mm, src_mm, dst_p4d, src_p4d,
1109 vma, addr, next))
1110 return -ENOMEM;
1111 } while (dst_p4d++, src_p4d++, addr = next, addr != end);
1112 return 0;
1113}
1114
1059int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, 1115int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1060 struct vm_area_struct *vma) 1116 struct vm_area_struct *vma)
1061{ 1117{
@@ -1111,7 +1167,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1111 next = pgd_addr_end(addr, end); 1167 next = pgd_addr_end(addr, end);
1112 if (pgd_none_or_clear_bad(src_pgd)) 1168 if (pgd_none_or_clear_bad(src_pgd))
1113 continue; 1169 continue;
1114 if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, 1170 if (unlikely(copy_p4d_range(dst_mm, src_mm, dst_pgd, src_pgd,
1115 vma, addr, next))) { 1171 vma, addr, next))) {
1116 ret = -ENOMEM; 1172 ret = -ENOMEM;
1117 break; 1173 break;
@@ -1267,14 +1323,14 @@ next:
1267} 1323}
1268 1324
1269static inline unsigned long zap_pud_range(struct mmu_gather *tlb, 1325static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
1270 struct vm_area_struct *vma, pgd_t *pgd, 1326 struct vm_area_struct *vma, p4d_t *p4d,
1271 unsigned long addr, unsigned long end, 1327 unsigned long addr, unsigned long end,
1272 struct zap_details *details) 1328 struct zap_details *details)
1273{ 1329{
1274 pud_t *pud; 1330 pud_t *pud;
1275 unsigned long next; 1331 unsigned long next;
1276 1332
1277 pud = pud_offset(pgd, addr); 1333 pud = pud_offset(p4d, addr);
1278 do { 1334 do {
1279 next = pud_addr_end(addr, end); 1335 next = pud_addr_end(addr, end);
1280 if (pud_trans_huge(*pud) || pud_devmap(*pud)) { 1336 if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
@@ -1295,6 +1351,25 @@ next:
1295 return addr; 1351 return addr;
1296} 1352}
1297 1353
1354static inline unsigned long zap_p4d_range(struct mmu_gather *tlb,
1355 struct vm_area_struct *vma, pgd_t *pgd,
1356 unsigned long addr, unsigned long end,
1357 struct zap_details *details)
1358{
1359 p4d_t *p4d;
1360 unsigned long next;
1361
1362 p4d = p4d_offset(pgd, addr);
1363 do {
1364 next = p4d_addr_end(addr, end);
1365 if (p4d_none_or_clear_bad(p4d))
1366 continue;
1367 next = zap_pud_range(tlb, vma, p4d, addr, next, details);
1368 } while (p4d++, addr = next, addr != end);
1369
1370 return addr;
1371}
1372
1298void unmap_page_range(struct mmu_gather *tlb, 1373void unmap_page_range(struct mmu_gather *tlb,
1299 struct vm_area_struct *vma, 1374 struct vm_area_struct *vma,
1300 unsigned long addr, unsigned long end, 1375 unsigned long addr, unsigned long end,
@@ -1310,7 +1385,7 @@ void unmap_page_range(struct mmu_gather *tlb,
1310 next = pgd_addr_end(addr, end); 1385 next = pgd_addr_end(addr, end);
1311 if (pgd_none_or_clear_bad(pgd)) 1386 if (pgd_none_or_clear_bad(pgd))
1312 continue; 1387 continue;
1313 next = zap_pud_range(tlb, vma, pgd, addr, next, details); 1388 next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
1314 } while (pgd++, addr = next, addr != end); 1389 } while (pgd++, addr = next, addr != end);
1315 tlb_end_vma(tlb, vma); 1390 tlb_end_vma(tlb, vma);
1316} 1391}
@@ -1465,16 +1540,24 @@ EXPORT_SYMBOL_GPL(zap_vma_ptes);
1465pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, 1540pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
1466 spinlock_t **ptl) 1541 spinlock_t **ptl)
1467{ 1542{
1468 pgd_t *pgd = pgd_offset(mm, addr); 1543 pgd_t *pgd;
1469 pud_t *pud = pud_alloc(mm, pgd, addr); 1544 p4d_t *p4d;
1470 if (pud) { 1545 pud_t *pud;
1471 pmd_t *pmd = pmd_alloc(mm, pud, addr); 1546 pmd_t *pmd;
1472 if (pmd) { 1547
1473 VM_BUG_ON(pmd_trans_huge(*pmd)); 1548 pgd = pgd_offset(mm, addr);
1474 return pte_alloc_map_lock(mm, pmd, addr, ptl); 1549 p4d = p4d_alloc(mm, pgd, addr);
1475 } 1550 if (!p4d)
1476 } 1551 return NULL;
1477 return NULL; 1552 pud = pud_alloc(mm, p4d, addr);
1553 if (!pud)
1554 return NULL;
1555 pmd = pmd_alloc(mm, pud, addr);
1556 if (!pmd)
1557 return NULL;
1558
1559 VM_BUG_ON(pmd_trans_huge(*pmd));
1560 return pte_alloc_map_lock(mm, pmd, addr, ptl);
1478} 1561}
1479 1562
1480/* 1563/*
@@ -1740,7 +1823,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
1740 return 0; 1823 return 0;
1741} 1824}
1742 1825
1743static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, 1826static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
1744 unsigned long addr, unsigned long end, 1827 unsigned long addr, unsigned long end,
1745 unsigned long pfn, pgprot_t prot) 1828 unsigned long pfn, pgprot_t prot)
1746{ 1829{
@@ -1748,7 +1831,7 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
1748 unsigned long next; 1831 unsigned long next;
1749 1832
1750 pfn -= addr >> PAGE_SHIFT; 1833 pfn -= addr >> PAGE_SHIFT;
1751 pud = pud_alloc(mm, pgd, addr); 1834 pud = pud_alloc(mm, p4d, addr);
1752 if (!pud) 1835 if (!pud)
1753 return -ENOMEM; 1836 return -ENOMEM;
1754 do { 1837 do {
@@ -1760,6 +1843,26 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
1760 return 0; 1843 return 0;
1761} 1844}
1762 1845
1846static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
1847 unsigned long addr, unsigned long end,
1848 unsigned long pfn, pgprot_t prot)
1849{
1850 p4d_t *p4d;
1851 unsigned long next;
1852
1853 pfn -= addr >> PAGE_SHIFT;
1854 p4d = p4d_alloc(mm, pgd, addr);
1855 if (!p4d)
1856 return -ENOMEM;
1857 do {
1858 next = p4d_addr_end(addr, end);
1859 if (remap_pud_range(mm, p4d, addr, next,
1860 pfn + (addr >> PAGE_SHIFT), prot))
1861 return -ENOMEM;
1862 } while (p4d++, addr = next, addr != end);
1863 return 0;
1864}
1865
1763/** 1866/**
1764 * remap_pfn_range - remap kernel memory to userspace 1867 * remap_pfn_range - remap kernel memory to userspace
1765 * @vma: user vma to map to 1868 * @vma: user vma to map to
@@ -1816,7 +1919,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1816 flush_cache_range(vma, addr, end); 1919 flush_cache_range(vma, addr, end);
1817 do { 1920 do {
1818 next = pgd_addr_end(addr, end); 1921 next = pgd_addr_end(addr, end);
1819 err = remap_pud_range(mm, pgd, addr, next, 1922 err = remap_p4d_range(mm, pgd, addr, next,
1820 pfn + (addr >> PAGE_SHIFT), prot); 1923 pfn + (addr >> PAGE_SHIFT), prot);
1821 if (err) 1924 if (err)
1822 break; 1925 break;
@@ -1932,7 +2035,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
1932 return err; 2035 return err;
1933} 2036}
1934 2037
1935static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, 2038static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
1936 unsigned long addr, unsigned long end, 2039 unsigned long addr, unsigned long end,
1937 pte_fn_t fn, void *data) 2040 pte_fn_t fn, void *data)
1938{ 2041{
@@ -1940,7 +2043,7 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
1940 unsigned long next; 2043 unsigned long next;
1941 int err; 2044 int err;
1942 2045
1943 pud = pud_alloc(mm, pgd, addr); 2046 pud = pud_alloc(mm, p4d, addr);
1944 if (!pud) 2047 if (!pud)
1945 return -ENOMEM; 2048 return -ENOMEM;
1946 do { 2049 do {
@@ -1952,6 +2055,26 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
1952 return err; 2055 return err;
1953} 2056}
1954 2057
2058static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
2059 unsigned long addr, unsigned long end,
2060 pte_fn_t fn, void *data)
2061{
2062 p4d_t *p4d;
2063 unsigned long next;
2064 int err;
2065
2066 p4d = p4d_alloc(mm, pgd, addr);
2067 if (!p4d)
2068 return -ENOMEM;
2069 do {
2070 next = p4d_addr_end(addr, end);
2071 err = apply_to_pud_range(mm, p4d, addr, next, fn, data);
2072 if (err)
2073 break;
2074 } while (p4d++, addr = next, addr != end);
2075 return err;
2076}
2077
1955/* 2078/*
1956 * Scan a region of virtual memory, filling in page tables as necessary 2079 * Scan a region of virtual memory, filling in page tables as necessary
1957 * and calling a provided function on each leaf page table. 2080 * and calling a provided function on each leaf page table.
@@ -1970,7 +2093,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
1970 pgd = pgd_offset(mm, addr); 2093 pgd = pgd_offset(mm, addr);
1971 do { 2094 do {
1972 next = pgd_addr_end(addr, end); 2095 next = pgd_addr_end(addr, end);
1973 err = apply_to_pud_range(mm, pgd, addr, next, fn, data); 2096 err = apply_to_p4d_range(mm, pgd, addr, next, fn, data);
1974 if (err) 2097 if (err)
1975 break; 2098 break;
1976 } while (pgd++, addr = next, addr != end); 2099 } while (pgd++, addr = next, addr != end);
@@ -3653,11 +3776,15 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
3653 }; 3776 };
3654 struct mm_struct *mm = vma->vm_mm; 3777 struct mm_struct *mm = vma->vm_mm;
3655 pgd_t *pgd; 3778 pgd_t *pgd;
3779 p4d_t *p4d;
3656 int ret; 3780 int ret;
3657 3781
3658 pgd = pgd_offset(mm, address); 3782 pgd = pgd_offset(mm, address);
3783 p4d = p4d_alloc(mm, pgd, address);
3784 if (!p4d)
3785 return VM_FAULT_OOM;
3659 3786
3660 vmf.pud = pud_alloc(mm, pgd, address); 3787 vmf.pud = pud_alloc(mm, p4d, address);
3661 if (!vmf.pud) 3788 if (!vmf.pud)
3662 return VM_FAULT_OOM; 3789 return VM_FAULT_OOM;
3663 if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) { 3790 if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) {
@@ -3779,12 +3906,35 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
3779} 3906}
3780EXPORT_SYMBOL_GPL(handle_mm_fault); 3907EXPORT_SYMBOL_GPL(handle_mm_fault);
3781 3908
3909#ifndef __PAGETABLE_P4D_FOLDED
3910/*
3911 * Allocate p4d page table.
3912 * We've already handled the fast-path in-line.
3913 */
3914int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
3915{
3916 p4d_t *new = p4d_alloc_one(mm, address);
3917 if (!new)
3918 return -ENOMEM;
3919
3920 smp_wmb(); /* See comment in __pte_alloc */
3921
3922 spin_lock(&mm->page_table_lock);
3923 if (pgd_present(*pgd)) /* Another has populated it */
3924 p4d_free(mm, new);
3925 else
3926 pgd_populate(mm, pgd, new);
3927 spin_unlock(&mm->page_table_lock);
3928 return 0;
3929}
3930#endif /* __PAGETABLE_P4D_FOLDED */
3931
3782#ifndef __PAGETABLE_PUD_FOLDED 3932#ifndef __PAGETABLE_PUD_FOLDED
3783/* 3933/*
3784 * Allocate page upper directory. 3934 * Allocate page upper directory.
3785 * We've already handled the fast-path in-line. 3935 * We've already handled the fast-path in-line.
3786 */ 3936 */
3787int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) 3937int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address)
3788{ 3938{
3789 pud_t *new = pud_alloc_one(mm, address); 3939 pud_t *new = pud_alloc_one(mm, address);
3790 if (!new) 3940 if (!new)
@@ -3793,10 +3943,17 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
3793 smp_wmb(); /* See comment in __pte_alloc */ 3943 smp_wmb(); /* See comment in __pte_alloc */
3794 3944
3795 spin_lock(&mm->page_table_lock); 3945 spin_lock(&mm->page_table_lock);
3796 if (pgd_present(*pgd)) /* Another has populated it */ 3946#ifndef __ARCH_HAS_5LEVEL_HACK
3947 if (p4d_present(*p4d)) /* Another has populated it */
3797 pud_free(mm, new); 3948 pud_free(mm, new);
3798 else 3949 else
3799 pgd_populate(mm, pgd, new); 3950 p4d_populate(mm, p4d, new);
3951#else
3952 if (pgd_present(*p4d)) /* Another has populated it */
3953 pud_free(mm, new);
3954 else
3955 pgd_populate(mm, p4d, new);
3956#endif /* __ARCH_HAS_5LEVEL_HACK */
3800 spin_unlock(&mm->page_table_lock); 3957 spin_unlock(&mm->page_table_lock);
3801 return 0; 3958 return 0;
3802} 3959}
@@ -3839,6 +3996,7 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
3839 pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) 3996 pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
3840{ 3997{
3841 pgd_t *pgd; 3998 pgd_t *pgd;
3999 p4d_t *p4d;
3842 pud_t *pud; 4000 pud_t *pud;
3843 pmd_t *pmd; 4001 pmd_t *pmd;
3844 pte_t *ptep; 4002 pte_t *ptep;
@@ -3847,7 +4005,11 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
3847 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) 4005 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
3848 goto out; 4006 goto out;
3849 4007
3850 pud = pud_offset(pgd, address); 4008 p4d = p4d_offset(pgd, address);
4009 if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d)))
4010 goto out;
4011
4012 pud = pud_offset(p4d, address);
3851 if (pud_none(*pud) || unlikely(pud_bad(*pud))) 4013 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
3852 goto out; 4014 goto out;
3853 4015